Commit 92bfc6e7c4eabbbd15e7d6d49123b296d05dcfd1

Authored by Christoph Hellwig
Committed by Niv Sardi
1 parent 94e1b69d1a

[XFS] embededd struct xfs_imap into xfs_inode

Most uses of struct xfs_imap are to map and inode to a buffer.  To avoid
copying around the inode location information we should just embedd a
strcut xfs_imap into the xfs_inode.  To make sure it doesn't bloat an
inode the im_len is changed to a ushort, which is fine as that's what
the users exepect anyway.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Dave Chinner <david@fromorbit.com>
Signed-off-by: Niv Sardi <xaiki@sgi.com>

Showing 7 changed files with 33 additions and 71 deletions Inline Diff

1 /* 1 /*
2 * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc. 2 * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
3 * All Rights Reserved. 3 * All Rights Reserved.
4 * 4 *
5 * This program is free software; you can redistribute it and/or 5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as 6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation. 7 * published by the Free Software Foundation.
8 * 8 *
9 * This program is distributed in the hope that it would be useful, 9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details. 12 * GNU General Public License for more details.
13 * 13 *
14 * You should have received a copy of the GNU General Public License 14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation, 15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */ 17 */
18 #include "xfs.h" 18 #include "xfs.h"
19 #include "xfs_fs.h" 19 #include "xfs_fs.h"
20 #include "xfs_types.h" 20 #include "xfs_types.h"
21 #include "xfs_bit.h" 21 #include "xfs_bit.h"
22 #include "xfs_log.h" 22 #include "xfs_log.h"
23 #include "xfs_inum.h" 23 #include "xfs_inum.h"
24 #include "xfs_trans.h" 24 #include "xfs_trans.h"
25 #include "xfs_sb.h" 25 #include "xfs_sb.h"
26 #include "xfs_ag.h" 26 #include "xfs_ag.h"
27 #include "xfs_dir2.h" 27 #include "xfs_dir2.h"
28 #include "xfs_dmapi.h" 28 #include "xfs_dmapi.h"
29 #include "xfs_mount.h" 29 #include "xfs_mount.h"
30 #include "xfs_bmap_btree.h" 30 #include "xfs_bmap_btree.h"
31 #include "xfs_alloc_btree.h" 31 #include "xfs_alloc_btree.h"
32 #include "xfs_ialloc_btree.h" 32 #include "xfs_ialloc_btree.h"
33 #include "xfs_dir2_sf.h" 33 #include "xfs_dir2_sf.h"
34 #include "xfs_attr_sf.h" 34 #include "xfs_attr_sf.h"
35 #include "xfs_dinode.h" 35 #include "xfs_dinode.h"
36 #include "xfs_inode.h" 36 #include "xfs_inode.h"
37 #include "xfs_btree.h" 37 #include "xfs_btree.h"
38 #include "xfs_ialloc.h" 38 #include "xfs_ialloc.h"
39 #include "xfs_alloc.h" 39 #include "xfs_alloc.h"
40 #include "xfs_rtalloc.h" 40 #include "xfs_rtalloc.h"
41 #include "xfs_error.h" 41 #include "xfs_error.h"
42 #include "xfs_bmap.h" 42 #include "xfs_bmap.h"
43 #include "xfs_imap.h"
44 43
45 44
46 /* 45 /*
47 * Allocation group level functions. 46 * Allocation group level functions.
48 */ 47 */
49 static inline int 48 static inline int
50 xfs_ialloc_cluster_alignment( 49 xfs_ialloc_cluster_alignment(
51 xfs_alloc_arg_t *args) 50 xfs_alloc_arg_t *args)
52 { 51 {
53 if (xfs_sb_version_hasalign(&args->mp->m_sb) && 52 if (xfs_sb_version_hasalign(&args->mp->m_sb) &&
54 args->mp->m_sb.sb_inoalignmt >= 53 args->mp->m_sb.sb_inoalignmt >=
55 XFS_B_TO_FSBT(args->mp, XFS_INODE_CLUSTER_SIZE(args->mp))) 54 XFS_B_TO_FSBT(args->mp, XFS_INODE_CLUSTER_SIZE(args->mp)))
56 return args->mp->m_sb.sb_inoalignmt; 55 return args->mp->m_sb.sb_inoalignmt;
57 return 1; 56 return 1;
58 } 57 }
59 58
60 /* 59 /*
61 * Lookup the record equal to ino in the btree given by cur. 60 * Lookup the record equal to ino in the btree given by cur.
62 */ 61 */
63 STATIC int /* error */ 62 STATIC int /* error */
64 xfs_inobt_lookup_eq( 63 xfs_inobt_lookup_eq(
65 struct xfs_btree_cur *cur, /* btree cursor */ 64 struct xfs_btree_cur *cur, /* btree cursor */
66 xfs_agino_t ino, /* starting inode of chunk */ 65 xfs_agino_t ino, /* starting inode of chunk */
67 __int32_t fcnt, /* free inode count */ 66 __int32_t fcnt, /* free inode count */
68 xfs_inofree_t free, /* free inode mask */ 67 xfs_inofree_t free, /* free inode mask */
69 int *stat) /* success/failure */ 68 int *stat) /* success/failure */
70 { 69 {
71 cur->bc_rec.i.ir_startino = ino; 70 cur->bc_rec.i.ir_startino = ino;
72 cur->bc_rec.i.ir_freecount = fcnt; 71 cur->bc_rec.i.ir_freecount = fcnt;
73 cur->bc_rec.i.ir_free = free; 72 cur->bc_rec.i.ir_free = free;
74 return xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat); 73 return xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat);
75 } 74 }
76 75
77 /* 76 /*
78 * Lookup the first record greater than or equal to ino 77 * Lookup the first record greater than or equal to ino
79 * in the btree given by cur. 78 * in the btree given by cur.
80 */ 79 */
81 int /* error */ 80 int /* error */
82 xfs_inobt_lookup_ge( 81 xfs_inobt_lookup_ge(
83 struct xfs_btree_cur *cur, /* btree cursor */ 82 struct xfs_btree_cur *cur, /* btree cursor */
84 xfs_agino_t ino, /* starting inode of chunk */ 83 xfs_agino_t ino, /* starting inode of chunk */
85 __int32_t fcnt, /* free inode count */ 84 __int32_t fcnt, /* free inode count */
86 xfs_inofree_t free, /* free inode mask */ 85 xfs_inofree_t free, /* free inode mask */
87 int *stat) /* success/failure */ 86 int *stat) /* success/failure */
88 { 87 {
89 cur->bc_rec.i.ir_startino = ino; 88 cur->bc_rec.i.ir_startino = ino;
90 cur->bc_rec.i.ir_freecount = fcnt; 89 cur->bc_rec.i.ir_freecount = fcnt;
91 cur->bc_rec.i.ir_free = free; 90 cur->bc_rec.i.ir_free = free;
92 return xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat); 91 return xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat);
93 } 92 }
94 93
95 /* 94 /*
96 * Lookup the first record less than or equal to ino 95 * Lookup the first record less than or equal to ino
97 * in the btree given by cur. 96 * in the btree given by cur.
98 */ 97 */
99 int /* error */ 98 int /* error */
100 xfs_inobt_lookup_le( 99 xfs_inobt_lookup_le(
101 struct xfs_btree_cur *cur, /* btree cursor */ 100 struct xfs_btree_cur *cur, /* btree cursor */
102 xfs_agino_t ino, /* starting inode of chunk */ 101 xfs_agino_t ino, /* starting inode of chunk */
103 __int32_t fcnt, /* free inode count */ 102 __int32_t fcnt, /* free inode count */
104 xfs_inofree_t free, /* free inode mask */ 103 xfs_inofree_t free, /* free inode mask */
105 int *stat) /* success/failure */ 104 int *stat) /* success/failure */
106 { 105 {
107 cur->bc_rec.i.ir_startino = ino; 106 cur->bc_rec.i.ir_startino = ino;
108 cur->bc_rec.i.ir_freecount = fcnt; 107 cur->bc_rec.i.ir_freecount = fcnt;
109 cur->bc_rec.i.ir_free = free; 108 cur->bc_rec.i.ir_free = free;
110 return xfs_btree_lookup(cur, XFS_LOOKUP_LE, stat); 109 return xfs_btree_lookup(cur, XFS_LOOKUP_LE, stat);
111 } 110 }
112 111
113 /* 112 /*
114 * Update the record referred to by cur to the value given 113 * Update the record referred to by cur to the value given
115 * by [ino, fcnt, free]. 114 * by [ino, fcnt, free].
116 * This either works (return 0) or gets an EFSCORRUPTED error. 115 * This either works (return 0) or gets an EFSCORRUPTED error.
117 */ 116 */
118 STATIC int /* error */ 117 STATIC int /* error */
119 xfs_inobt_update( 118 xfs_inobt_update(
120 struct xfs_btree_cur *cur, /* btree cursor */ 119 struct xfs_btree_cur *cur, /* btree cursor */
121 xfs_agino_t ino, /* starting inode of chunk */ 120 xfs_agino_t ino, /* starting inode of chunk */
122 __int32_t fcnt, /* free inode count */ 121 __int32_t fcnt, /* free inode count */
123 xfs_inofree_t free) /* free inode mask */ 122 xfs_inofree_t free) /* free inode mask */
124 { 123 {
125 union xfs_btree_rec rec; 124 union xfs_btree_rec rec;
126 125
127 rec.inobt.ir_startino = cpu_to_be32(ino); 126 rec.inobt.ir_startino = cpu_to_be32(ino);
128 rec.inobt.ir_freecount = cpu_to_be32(fcnt); 127 rec.inobt.ir_freecount = cpu_to_be32(fcnt);
129 rec.inobt.ir_free = cpu_to_be64(free); 128 rec.inobt.ir_free = cpu_to_be64(free);
130 return xfs_btree_update(cur, &rec); 129 return xfs_btree_update(cur, &rec);
131 } 130 }
132 131
133 /* 132 /*
134 * Get the data from the pointed-to record. 133 * Get the data from the pointed-to record.
135 */ 134 */
136 int /* error */ 135 int /* error */
137 xfs_inobt_get_rec( 136 xfs_inobt_get_rec(
138 struct xfs_btree_cur *cur, /* btree cursor */ 137 struct xfs_btree_cur *cur, /* btree cursor */
139 xfs_agino_t *ino, /* output: starting inode of chunk */ 138 xfs_agino_t *ino, /* output: starting inode of chunk */
140 __int32_t *fcnt, /* output: number of free inodes */ 139 __int32_t *fcnt, /* output: number of free inodes */
141 xfs_inofree_t *free, /* output: free inode mask */ 140 xfs_inofree_t *free, /* output: free inode mask */
142 int *stat) /* output: success/failure */ 141 int *stat) /* output: success/failure */
143 { 142 {
144 union xfs_btree_rec *rec; 143 union xfs_btree_rec *rec;
145 int error; 144 int error;
146 145
147 error = xfs_btree_get_rec(cur, &rec, stat); 146 error = xfs_btree_get_rec(cur, &rec, stat);
148 if (!error && *stat == 1) { 147 if (!error && *stat == 1) {
149 *ino = be32_to_cpu(rec->inobt.ir_startino); 148 *ino = be32_to_cpu(rec->inobt.ir_startino);
150 *fcnt = be32_to_cpu(rec->inobt.ir_freecount); 149 *fcnt = be32_to_cpu(rec->inobt.ir_freecount);
151 *free = be64_to_cpu(rec->inobt.ir_free); 150 *free = be64_to_cpu(rec->inobt.ir_free);
152 } 151 }
153 return error; 152 return error;
154 } 153 }
155 154
156 /* 155 /*
157 * Allocate new inodes in the allocation group specified by agbp. 156 * Allocate new inodes in the allocation group specified by agbp.
158 * Return 0 for success, else error code. 157 * Return 0 for success, else error code.
159 */ 158 */
160 STATIC int /* error code or 0 */ 159 STATIC int /* error code or 0 */
161 xfs_ialloc_ag_alloc( 160 xfs_ialloc_ag_alloc(
162 xfs_trans_t *tp, /* transaction pointer */ 161 xfs_trans_t *tp, /* transaction pointer */
163 xfs_buf_t *agbp, /* alloc group buffer */ 162 xfs_buf_t *agbp, /* alloc group buffer */
164 int *alloc) 163 int *alloc)
165 { 164 {
166 xfs_agi_t *agi; /* allocation group header */ 165 xfs_agi_t *agi; /* allocation group header */
167 xfs_alloc_arg_t args; /* allocation argument structure */ 166 xfs_alloc_arg_t args; /* allocation argument structure */
168 int blks_per_cluster; /* fs blocks per inode cluster */ 167 int blks_per_cluster; /* fs blocks per inode cluster */
169 xfs_btree_cur_t *cur; /* inode btree cursor */ 168 xfs_btree_cur_t *cur; /* inode btree cursor */
170 xfs_daddr_t d; /* disk addr of buffer */ 169 xfs_daddr_t d; /* disk addr of buffer */
171 xfs_agnumber_t agno; 170 xfs_agnumber_t agno;
172 int error; 171 int error;
173 xfs_buf_t *fbuf; /* new free inodes' buffer */ 172 xfs_buf_t *fbuf; /* new free inodes' buffer */
174 xfs_dinode_t *free; /* new free inode structure */ 173 xfs_dinode_t *free; /* new free inode structure */
175 int i; /* inode counter */ 174 int i; /* inode counter */
176 int j; /* block counter */ 175 int j; /* block counter */
177 int nbufs; /* num bufs of new inodes */ 176 int nbufs; /* num bufs of new inodes */
178 xfs_agino_t newino; /* new first inode's number */ 177 xfs_agino_t newino; /* new first inode's number */
179 xfs_agino_t newlen; /* new number of inodes */ 178 xfs_agino_t newlen; /* new number of inodes */
180 int ninodes; /* num inodes per buf */ 179 int ninodes; /* num inodes per buf */
181 xfs_agino_t thisino; /* current inode number, for loop */ 180 xfs_agino_t thisino; /* current inode number, for loop */
182 int version; /* inode version number to use */ 181 int version; /* inode version number to use */
183 int isaligned = 0; /* inode allocation at stripe unit */ 182 int isaligned = 0; /* inode allocation at stripe unit */
184 /* boundary */ 183 /* boundary */
185 unsigned int gen; 184 unsigned int gen;
186 185
187 args.tp = tp; 186 args.tp = tp;
188 args.mp = tp->t_mountp; 187 args.mp = tp->t_mountp;
189 188
190 /* 189 /*
191 * Locking will ensure that we don't have two callers in here 190 * Locking will ensure that we don't have two callers in here
192 * at one time. 191 * at one time.
193 */ 192 */
194 newlen = XFS_IALLOC_INODES(args.mp); 193 newlen = XFS_IALLOC_INODES(args.mp);
195 if (args.mp->m_maxicount && 194 if (args.mp->m_maxicount &&
196 args.mp->m_sb.sb_icount + newlen > args.mp->m_maxicount) 195 args.mp->m_sb.sb_icount + newlen > args.mp->m_maxicount)
197 return XFS_ERROR(ENOSPC); 196 return XFS_ERROR(ENOSPC);
198 args.minlen = args.maxlen = XFS_IALLOC_BLOCKS(args.mp); 197 args.minlen = args.maxlen = XFS_IALLOC_BLOCKS(args.mp);
199 /* 198 /*
200 * First try to allocate inodes contiguous with the last-allocated 199 * First try to allocate inodes contiguous with the last-allocated
201 * chunk of inodes. If the filesystem is striped, this will fill 200 * chunk of inodes. If the filesystem is striped, this will fill
202 * an entire stripe unit with inodes. 201 * an entire stripe unit with inodes.
203 */ 202 */
204 agi = XFS_BUF_TO_AGI(agbp); 203 agi = XFS_BUF_TO_AGI(agbp);
205 newino = be32_to_cpu(agi->agi_newino); 204 newino = be32_to_cpu(agi->agi_newino);
206 args.agbno = XFS_AGINO_TO_AGBNO(args.mp, newino) + 205 args.agbno = XFS_AGINO_TO_AGBNO(args.mp, newino) +
207 XFS_IALLOC_BLOCKS(args.mp); 206 XFS_IALLOC_BLOCKS(args.mp);
208 if (likely(newino != NULLAGINO && 207 if (likely(newino != NULLAGINO &&
209 (args.agbno < be32_to_cpu(agi->agi_length)))) { 208 (args.agbno < be32_to_cpu(agi->agi_length)))) {
210 args.fsbno = XFS_AGB_TO_FSB(args.mp, 209 args.fsbno = XFS_AGB_TO_FSB(args.mp,
211 be32_to_cpu(agi->agi_seqno), args.agbno); 210 be32_to_cpu(agi->agi_seqno), args.agbno);
212 args.type = XFS_ALLOCTYPE_THIS_BNO; 211 args.type = XFS_ALLOCTYPE_THIS_BNO;
213 args.mod = args.total = args.wasdel = args.isfl = 212 args.mod = args.total = args.wasdel = args.isfl =
214 args.userdata = args.minalignslop = 0; 213 args.userdata = args.minalignslop = 0;
215 args.prod = 1; 214 args.prod = 1;
216 215
217 /* 216 /*
218 * We need to take into account alignment here to ensure that 217 * We need to take into account alignment here to ensure that
219 * we don't modify the free list if we fail to have an exact 218 * we don't modify the free list if we fail to have an exact
220 * block. If we don't have an exact match, and every oher 219 * block. If we don't have an exact match, and every oher
221 * attempt allocation attempt fails, we'll end up cancelling 220 * attempt allocation attempt fails, we'll end up cancelling
222 * a dirty transaction and shutting down. 221 * a dirty transaction and shutting down.
223 * 222 *
224 * For an exact allocation, alignment must be 1, 223 * For an exact allocation, alignment must be 1,
225 * however we need to take cluster alignment into account when 224 * however we need to take cluster alignment into account when
226 * fixing up the freelist. Use the minalignslop field to 225 * fixing up the freelist. Use the minalignslop field to
227 * indicate that extra blocks might be required for alignment, 226 * indicate that extra blocks might be required for alignment,
228 * but not to use them in the actual exact allocation. 227 * but not to use them in the actual exact allocation.
229 */ 228 */
230 args.alignment = 1; 229 args.alignment = 1;
231 args.minalignslop = xfs_ialloc_cluster_alignment(&args) - 1; 230 args.minalignslop = xfs_ialloc_cluster_alignment(&args) - 1;
232 231
233 /* Allow space for the inode btree to split. */ 232 /* Allow space for the inode btree to split. */
234 args.minleft = XFS_IN_MAXLEVELS(args.mp) - 1; 233 args.minleft = XFS_IN_MAXLEVELS(args.mp) - 1;
235 if ((error = xfs_alloc_vextent(&args))) 234 if ((error = xfs_alloc_vextent(&args)))
236 return error; 235 return error;
237 } else 236 } else
238 args.fsbno = NULLFSBLOCK; 237 args.fsbno = NULLFSBLOCK;
239 238
240 if (unlikely(args.fsbno == NULLFSBLOCK)) { 239 if (unlikely(args.fsbno == NULLFSBLOCK)) {
241 /* 240 /*
242 * Set the alignment for the allocation. 241 * Set the alignment for the allocation.
243 * If stripe alignment is turned on then align at stripe unit 242 * If stripe alignment is turned on then align at stripe unit
244 * boundary. 243 * boundary.
245 * If the cluster size is smaller than a filesystem block 244 * If the cluster size is smaller than a filesystem block
246 * then we're doing I/O for inodes in filesystem block size 245 * then we're doing I/O for inodes in filesystem block size
247 * pieces, so don't need alignment anyway. 246 * pieces, so don't need alignment anyway.
248 */ 247 */
249 isaligned = 0; 248 isaligned = 0;
250 if (args.mp->m_sinoalign) { 249 if (args.mp->m_sinoalign) {
251 ASSERT(!(args.mp->m_flags & XFS_MOUNT_NOALIGN)); 250 ASSERT(!(args.mp->m_flags & XFS_MOUNT_NOALIGN));
252 args.alignment = args.mp->m_dalign; 251 args.alignment = args.mp->m_dalign;
253 isaligned = 1; 252 isaligned = 1;
254 } else 253 } else
255 args.alignment = xfs_ialloc_cluster_alignment(&args); 254 args.alignment = xfs_ialloc_cluster_alignment(&args);
256 /* 255 /*
257 * Need to figure out where to allocate the inode blocks. 256 * Need to figure out where to allocate the inode blocks.
258 * Ideally they should be spaced out through the a.g. 257 * Ideally they should be spaced out through the a.g.
259 * For now, just allocate blocks up front. 258 * For now, just allocate blocks up front.
260 */ 259 */
261 args.agbno = be32_to_cpu(agi->agi_root); 260 args.agbno = be32_to_cpu(agi->agi_root);
262 args.fsbno = XFS_AGB_TO_FSB(args.mp, 261 args.fsbno = XFS_AGB_TO_FSB(args.mp,
263 be32_to_cpu(agi->agi_seqno), args.agbno); 262 be32_to_cpu(agi->agi_seqno), args.agbno);
264 /* 263 /*
265 * Allocate a fixed-size extent of inodes. 264 * Allocate a fixed-size extent of inodes.
266 */ 265 */
267 args.type = XFS_ALLOCTYPE_NEAR_BNO; 266 args.type = XFS_ALLOCTYPE_NEAR_BNO;
268 args.mod = args.total = args.wasdel = args.isfl = 267 args.mod = args.total = args.wasdel = args.isfl =
269 args.userdata = args.minalignslop = 0; 268 args.userdata = args.minalignslop = 0;
270 args.prod = 1; 269 args.prod = 1;
271 /* 270 /*
272 * Allow space for the inode btree to split. 271 * Allow space for the inode btree to split.
273 */ 272 */
274 args.minleft = XFS_IN_MAXLEVELS(args.mp) - 1; 273 args.minleft = XFS_IN_MAXLEVELS(args.mp) - 1;
275 if ((error = xfs_alloc_vextent(&args))) 274 if ((error = xfs_alloc_vextent(&args)))
276 return error; 275 return error;
277 } 276 }
278 277
279 /* 278 /*
280 * If stripe alignment is turned on, then try again with cluster 279 * If stripe alignment is turned on, then try again with cluster
281 * alignment. 280 * alignment.
282 */ 281 */
283 if (isaligned && args.fsbno == NULLFSBLOCK) { 282 if (isaligned && args.fsbno == NULLFSBLOCK) {
284 args.type = XFS_ALLOCTYPE_NEAR_BNO; 283 args.type = XFS_ALLOCTYPE_NEAR_BNO;
285 args.agbno = be32_to_cpu(agi->agi_root); 284 args.agbno = be32_to_cpu(agi->agi_root);
286 args.fsbno = XFS_AGB_TO_FSB(args.mp, 285 args.fsbno = XFS_AGB_TO_FSB(args.mp,
287 be32_to_cpu(agi->agi_seqno), args.agbno); 286 be32_to_cpu(agi->agi_seqno), args.agbno);
288 args.alignment = xfs_ialloc_cluster_alignment(&args); 287 args.alignment = xfs_ialloc_cluster_alignment(&args);
289 if ((error = xfs_alloc_vextent(&args))) 288 if ((error = xfs_alloc_vextent(&args)))
290 return error; 289 return error;
291 } 290 }
292 291
293 if (args.fsbno == NULLFSBLOCK) { 292 if (args.fsbno == NULLFSBLOCK) {
294 *alloc = 0; 293 *alloc = 0;
295 return 0; 294 return 0;
296 } 295 }
297 ASSERT(args.len == args.minlen); 296 ASSERT(args.len == args.minlen);
298 /* 297 /*
299 * Convert the results. 298 * Convert the results.
300 */ 299 */
301 newino = XFS_OFFBNO_TO_AGINO(args.mp, args.agbno, 0); 300 newino = XFS_OFFBNO_TO_AGINO(args.mp, args.agbno, 0);
302 /* 301 /*
303 * Loop over the new block(s), filling in the inodes. 302 * Loop over the new block(s), filling in the inodes.
304 * For small block sizes, manipulate the inodes in buffers 303 * For small block sizes, manipulate the inodes in buffers
305 * which are multiples of the blocks size. 304 * which are multiples of the blocks size.
306 */ 305 */
307 if (args.mp->m_sb.sb_blocksize >= XFS_INODE_CLUSTER_SIZE(args.mp)) { 306 if (args.mp->m_sb.sb_blocksize >= XFS_INODE_CLUSTER_SIZE(args.mp)) {
308 blks_per_cluster = 1; 307 blks_per_cluster = 1;
309 nbufs = (int)args.len; 308 nbufs = (int)args.len;
310 ninodes = args.mp->m_sb.sb_inopblock; 309 ninodes = args.mp->m_sb.sb_inopblock;
311 } else { 310 } else {
312 blks_per_cluster = XFS_INODE_CLUSTER_SIZE(args.mp) / 311 blks_per_cluster = XFS_INODE_CLUSTER_SIZE(args.mp) /
313 args.mp->m_sb.sb_blocksize; 312 args.mp->m_sb.sb_blocksize;
314 nbufs = (int)args.len / blks_per_cluster; 313 nbufs = (int)args.len / blks_per_cluster;
315 ninodes = blks_per_cluster * args.mp->m_sb.sb_inopblock; 314 ninodes = blks_per_cluster * args.mp->m_sb.sb_inopblock;
316 } 315 }
317 /* 316 /*
318 * Figure out what version number to use in the inodes we create. 317 * Figure out what version number to use in the inodes we create.
319 * If the superblock version has caught up to the one that supports 318 * If the superblock version has caught up to the one that supports
320 * the new inode format, then use the new inode version. Otherwise 319 * the new inode format, then use the new inode version. Otherwise
321 * use the old version so that old kernels will continue to be 320 * use the old version so that old kernels will continue to be
322 * able to use the file system. 321 * able to use the file system.
323 */ 322 */
324 if (xfs_sb_version_hasnlink(&args.mp->m_sb)) 323 if (xfs_sb_version_hasnlink(&args.mp->m_sb))
325 version = 2; 324 version = 2;
326 else 325 else
327 version = 1; 326 version = 1;
328 327
329 /* 328 /*
330 * Seed the new inode cluster with a random generation number. This 329 * Seed the new inode cluster with a random generation number. This
331 * prevents short-term reuse of generation numbers if a chunk is 330 * prevents short-term reuse of generation numbers if a chunk is
332 * freed and then immediately reallocated. We use random numbers 331 * freed and then immediately reallocated. We use random numbers
333 * rather than a linear progression to prevent the next generation 332 * rather than a linear progression to prevent the next generation
334 * number from being easily guessable. 333 * number from being easily guessable.
335 */ 334 */
336 gen = random32(); 335 gen = random32();
337 for (j = 0; j < nbufs; j++) { 336 for (j = 0; j < nbufs; j++) {
338 /* 337 /*
339 * Get the block. 338 * Get the block.
340 */ 339 */
341 d = XFS_AGB_TO_DADDR(args.mp, be32_to_cpu(agi->agi_seqno), 340 d = XFS_AGB_TO_DADDR(args.mp, be32_to_cpu(agi->agi_seqno),
342 args.agbno + (j * blks_per_cluster)); 341 args.agbno + (j * blks_per_cluster));
343 fbuf = xfs_trans_get_buf(tp, args.mp->m_ddev_targp, d, 342 fbuf = xfs_trans_get_buf(tp, args.mp->m_ddev_targp, d,
344 args.mp->m_bsize * blks_per_cluster, 343 args.mp->m_bsize * blks_per_cluster,
345 XFS_BUF_LOCK); 344 XFS_BUF_LOCK);
346 ASSERT(fbuf); 345 ASSERT(fbuf);
347 ASSERT(!XFS_BUF_GETERROR(fbuf)); 346 ASSERT(!XFS_BUF_GETERROR(fbuf));
348 347
349 /* 348 /*
350 * Initialize all inodes in this buffer and then log them. 349 * Initialize all inodes in this buffer and then log them.
351 * 350 *
352 * XXX: It would be much better if we had just one transaction to 351 * XXX: It would be much better if we had just one transaction to
353 * log a whole cluster of inodes instead of all the indivdual 352 * log a whole cluster of inodes instead of all the indivdual
354 * transactions causing a lot of log traffic. 353 * transactions causing a lot of log traffic.
355 */ 354 */
356 xfs_biozero(fbuf, 0, ninodes << args.mp->m_sb.sb_inodelog); 355 xfs_biozero(fbuf, 0, ninodes << args.mp->m_sb.sb_inodelog);
357 for (i = 0; i < ninodes; i++) { 356 for (i = 0; i < ninodes; i++) {
358 int ioffset = i << args.mp->m_sb.sb_inodelog; 357 int ioffset = i << args.mp->m_sb.sb_inodelog;
359 uint isize = sizeof(struct xfs_dinode); 358 uint isize = sizeof(struct xfs_dinode);
360 359
361 free = XFS_MAKE_IPTR(args.mp, fbuf, i); 360 free = XFS_MAKE_IPTR(args.mp, fbuf, i);
362 free->di_magic = cpu_to_be16(XFS_DINODE_MAGIC); 361 free->di_magic = cpu_to_be16(XFS_DINODE_MAGIC);
363 free->di_version = version; 362 free->di_version = version;
364 free->di_gen = cpu_to_be32(gen); 363 free->di_gen = cpu_to_be32(gen);
365 free->di_next_unlinked = cpu_to_be32(NULLAGINO); 364 free->di_next_unlinked = cpu_to_be32(NULLAGINO);
366 xfs_trans_log_buf(tp, fbuf, ioffset, ioffset + isize - 1); 365 xfs_trans_log_buf(tp, fbuf, ioffset, ioffset + isize - 1);
367 } 366 }
368 xfs_trans_inode_alloc_buf(tp, fbuf); 367 xfs_trans_inode_alloc_buf(tp, fbuf);
369 } 368 }
370 be32_add_cpu(&agi->agi_count, newlen); 369 be32_add_cpu(&agi->agi_count, newlen);
371 be32_add_cpu(&agi->agi_freecount, newlen); 370 be32_add_cpu(&agi->agi_freecount, newlen);
372 agno = be32_to_cpu(agi->agi_seqno); 371 agno = be32_to_cpu(agi->agi_seqno);
373 down_read(&args.mp->m_peraglock); 372 down_read(&args.mp->m_peraglock);
374 args.mp->m_perag[agno].pagi_freecount += newlen; 373 args.mp->m_perag[agno].pagi_freecount += newlen;
375 up_read(&args.mp->m_peraglock); 374 up_read(&args.mp->m_peraglock);
376 agi->agi_newino = cpu_to_be32(newino); 375 agi->agi_newino = cpu_to_be32(newino);
377 /* 376 /*
378 * Insert records describing the new inode chunk into the btree. 377 * Insert records describing the new inode chunk into the btree.
379 */ 378 */
380 cur = xfs_inobt_init_cursor(args.mp, tp, agbp, agno); 379 cur = xfs_inobt_init_cursor(args.mp, tp, agbp, agno);
381 for (thisino = newino; 380 for (thisino = newino;
382 thisino < newino + newlen; 381 thisino < newino + newlen;
383 thisino += XFS_INODES_PER_CHUNK) { 382 thisino += XFS_INODES_PER_CHUNK) {
384 if ((error = xfs_inobt_lookup_eq(cur, thisino, 383 if ((error = xfs_inobt_lookup_eq(cur, thisino,
385 XFS_INODES_PER_CHUNK, XFS_INOBT_ALL_FREE, &i))) { 384 XFS_INODES_PER_CHUNK, XFS_INOBT_ALL_FREE, &i))) {
386 xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); 385 xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
387 return error; 386 return error;
388 } 387 }
389 ASSERT(i == 0); 388 ASSERT(i == 0);
390 if ((error = xfs_btree_insert(cur, &i))) { 389 if ((error = xfs_btree_insert(cur, &i))) {
391 xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); 390 xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
392 return error; 391 return error;
393 } 392 }
394 ASSERT(i == 1); 393 ASSERT(i == 1);
395 } 394 }
396 xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); 395 xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
397 /* 396 /*
398 * Log allocation group header fields 397 * Log allocation group header fields
399 */ 398 */
400 xfs_ialloc_log_agi(tp, agbp, 399 xfs_ialloc_log_agi(tp, agbp,
401 XFS_AGI_COUNT | XFS_AGI_FREECOUNT | XFS_AGI_NEWINO); 400 XFS_AGI_COUNT | XFS_AGI_FREECOUNT | XFS_AGI_NEWINO);
402 /* 401 /*
403 * Modify/log superblock values for inode count and inode free count. 402 * Modify/log superblock values for inode count and inode free count.
404 */ 403 */
405 xfs_trans_mod_sb(tp, XFS_TRANS_SB_ICOUNT, (long)newlen); 404 xfs_trans_mod_sb(tp, XFS_TRANS_SB_ICOUNT, (long)newlen);
406 xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, (long)newlen); 405 xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, (long)newlen);
407 *alloc = 1; 406 *alloc = 1;
408 return 0; 407 return 0;
409 } 408 }
410 409
411 STATIC_INLINE xfs_agnumber_t 410 STATIC_INLINE xfs_agnumber_t
412 xfs_ialloc_next_ag( 411 xfs_ialloc_next_ag(
413 xfs_mount_t *mp) 412 xfs_mount_t *mp)
414 { 413 {
415 xfs_agnumber_t agno; 414 xfs_agnumber_t agno;
416 415
417 spin_lock(&mp->m_agirotor_lock); 416 spin_lock(&mp->m_agirotor_lock);
418 agno = mp->m_agirotor; 417 agno = mp->m_agirotor;
419 if (++mp->m_agirotor == mp->m_maxagi) 418 if (++mp->m_agirotor == mp->m_maxagi)
420 mp->m_agirotor = 0; 419 mp->m_agirotor = 0;
421 spin_unlock(&mp->m_agirotor_lock); 420 spin_unlock(&mp->m_agirotor_lock);
422 421
423 return agno; 422 return agno;
424 } 423 }
425 424
426 /* 425 /*
427 * Select an allocation group to look for a free inode in, based on the parent 426 * Select an allocation group to look for a free inode in, based on the parent
428 * inode and then mode. Return the allocation group buffer. 427 * inode and then mode. Return the allocation group buffer.
429 */ 428 */
430 STATIC xfs_buf_t * /* allocation group buffer */ 429 STATIC xfs_buf_t * /* allocation group buffer */
431 xfs_ialloc_ag_select( 430 xfs_ialloc_ag_select(
432 xfs_trans_t *tp, /* transaction pointer */ 431 xfs_trans_t *tp, /* transaction pointer */
433 xfs_ino_t parent, /* parent directory inode number */ 432 xfs_ino_t parent, /* parent directory inode number */
434 mode_t mode, /* bits set to indicate file type */ 433 mode_t mode, /* bits set to indicate file type */
435 int okalloc) /* ok to allocate more space */ 434 int okalloc) /* ok to allocate more space */
436 { 435 {
437 xfs_buf_t *agbp; /* allocation group header buffer */ 436 xfs_buf_t *agbp; /* allocation group header buffer */
438 xfs_agnumber_t agcount; /* number of ag's in the filesystem */ 437 xfs_agnumber_t agcount; /* number of ag's in the filesystem */
439 xfs_agnumber_t agno; /* current ag number */ 438 xfs_agnumber_t agno; /* current ag number */
440 int flags; /* alloc buffer locking flags */ 439 int flags; /* alloc buffer locking flags */
441 xfs_extlen_t ineed; /* blocks needed for inode allocation */ 440 xfs_extlen_t ineed; /* blocks needed for inode allocation */
442 xfs_extlen_t longest = 0; /* longest extent available */ 441 xfs_extlen_t longest = 0; /* longest extent available */
443 xfs_mount_t *mp; /* mount point structure */ 442 xfs_mount_t *mp; /* mount point structure */
444 int needspace; /* file mode implies space allocated */ 443 int needspace; /* file mode implies space allocated */
445 xfs_perag_t *pag; /* per allocation group data */ 444 xfs_perag_t *pag; /* per allocation group data */
446 xfs_agnumber_t pagno; /* parent (starting) ag number */ 445 xfs_agnumber_t pagno; /* parent (starting) ag number */
447 446
448 /* 447 /*
449 * Files of these types need at least one block if length > 0 448 * Files of these types need at least one block if length > 0
450 * (and they won't fit in the inode, but that's hard to figure out). 449 * (and they won't fit in the inode, but that's hard to figure out).
451 */ 450 */
452 needspace = S_ISDIR(mode) || S_ISREG(mode) || S_ISLNK(mode); 451 needspace = S_ISDIR(mode) || S_ISREG(mode) || S_ISLNK(mode);
453 mp = tp->t_mountp; 452 mp = tp->t_mountp;
454 agcount = mp->m_maxagi; 453 agcount = mp->m_maxagi;
455 if (S_ISDIR(mode)) 454 if (S_ISDIR(mode))
456 pagno = xfs_ialloc_next_ag(mp); 455 pagno = xfs_ialloc_next_ag(mp);
457 else { 456 else {
458 pagno = XFS_INO_TO_AGNO(mp, parent); 457 pagno = XFS_INO_TO_AGNO(mp, parent);
459 if (pagno >= agcount) 458 if (pagno >= agcount)
460 pagno = 0; 459 pagno = 0;
461 } 460 }
462 ASSERT(pagno < agcount); 461 ASSERT(pagno < agcount);
463 /* 462 /*
464 * Loop through allocation groups, looking for one with a little 463 * Loop through allocation groups, looking for one with a little
465 * free space in it. Note we don't look for free inodes, exactly. 464 * free space in it. Note we don't look for free inodes, exactly.
466 * Instead, we include whether there is a need to allocate inodes 465 * Instead, we include whether there is a need to allocate inodes
467 * to mean that blocks must be allocated for them, 466 * to mean that blocks must be allocated for them,
468 * if none are currently free. 467 * if none are currently free.
469 */ 468 */
470 agno = pagno; 469 agno = pagno;
471 flags = XFS_ALLOC_FLAG_TRYLOCK; 470 flags = XFS_ALLOC_FLAG_TRYLOCK;
472 down_read(&mp->m_peraglock); 471 down_read(&mp->m_peraglock);
473 for (;;) { 472 for (;;) {
474 pag = &mp->m_perag[agno]; 473 pag = &mp->m_perag[agno];
475 if (!pag->pagi_init) { 474 if (!pag->pagi_init) {
476 if (xfs_ialloc_read_agi(mp, tp, agno, &agbp)) { 475 if (xfs_ialloc_read_agi(mp, tp, agno, &agbp)) {
477 agbp = NULL; 476 agbp = NULL;
478 goto nextag; 477 goto nextag;
479 } 478 }
480 } else 479 } else
481 agbp = NULL; 480 agbp = NULL;
482 481
483 if (!pag->pagi_inodeok) { 482 if (!pag->pagi_inodeok) {
484 xfs_ialloc_next_ag(mp); 483 xfs_ialloc_next_ag(mp);
485 goto unlock_nextag; 484 goto unlock_nextag;
486 } 485 }
487 486
488 /* 487 /*
489 * Is there enough free space for the file plus a block 488 * Is there enough free space for the file plus a block
490 * of inodes (if we need to allocate some)? 489 * of inodes (if we need to allocate some)?
491 */ 490 */
492 ineed = pag->pagi_freecount ? 0 : XFS_IALLOC_BLOCKS(mp); 491 ineed = pag->pagi_freecount ? 0 : XFS_IALLOC_BLOCKS(mp);
493 if (ineed && !pag->pagf_init) { 492 if (ineed && !pag->pagf_init) {
494 if (agbp == NULL && 493 if (agbp == NULL &&
495 xfs_ialloc_read_agi(mp, tp, agno, &agbp)) { 494 xfs_ialloc_read_agi(mp, tp, agno, &agbp)) {
496 agbp = NULL; 495 agbp = NULL;
497 goto nextag; 496 goto nextag;
498 } 497 }
499 (void)xfs_alloc_pagf_init(mp, tp, agno, flags); 498 (void)xfs_alloc_pagf_init(mp, tp, agno, flags);
500 } 499 }
501 if (!ineed || pag->pagf_init) { 500 if (!ineed || pag->pagf_init) {
502 if (ineed && !(longest = pag->pagf_longest)) 501 if (ineed && !(longest = pag->pagf_longest))
503 longest = pag->pagf_flcount > 0; 502 longest = pag->pagf_flcount > 0;
504 if (!ineed || 503 if (!ineed ||
505 (pag->pagf_freeblks >= needspace + ineed && 504 (pag->pagf_freeblks >= needspace + ineed &&
506 longest >= ineed && 505 longest >= ineed &&
507 okalloc)) { 506 okalloc)) {
508 if (agbp == NULL && 507 if (agbp == NULL &&
509 xfs_ialloc_read_agi(mp, tp, agno, &agbp)) { 508 xfs_ialloc_read_agi(mp, tp, agno, &agbp)) {
510 agbp = NULL; 509 agbp = NULL;
511 goto nextag; 510 goto nextag;
512 } 511 }
513 up_read(&mp->m_peraglock); 512 up_read(&mp->m_peraglock);
514 return agbp; 513 return agbp;
515 } 514 }
516 } 515 }
517 unlock_nextag: 516 unlock_nextag:
518 if (agbp) 517 if (agbp)
519 xfs_trans_brelse(tp, agbp); 518 xfs_trans_brelse(tp, agbp);
520 nextag: 519 nextag:
521 /* 520 /*
522 * No point in iterating over the rest, if we're shutting 521 * No point in iterating over the rest, if we're shutting
523 * down. 522 * down.
524 */ 523 */
525 if (XFS_FORCED_SHUTDOWN(mp)) { 524 if (XFS_FORCED_SHUTDOWN(mp)) {
526 up_read(&mp->m_peraglock); 525 up_read(&mp->m_peraglock);
527 return NULL; 526 return NULL;
528 } 527 }
529 agno++; 528 agno++;
530 if (agno >= agcount) 529 if (agno >= agcount)
531 agno = 0; 530 agno = 0;
532 if (agno == pagno) { 531 if (agno == pagno) {
533 if (flags == 0) { 532 if (flags == 0) {
534 up_read(&mp->m_peraglock); 533 up_read(&mp->m_peraglock);
535 return NULL; 534 return NULL;
536 } 535 }
537 flags = 0; 536 flags = 0;
538 } 537 }
539 } 538 }
540 } 539 }
541 540
542 /* 541 /*
543 * Visible inode allocation functions. 542 * Visible inode allocation functions.
544 */ 543 */
545 544
546 /* 545 /*
547 * Allocate an inode on disk. 546 * Allocate an inode on disk.
548 * Mode is used to tell whether the new inode will need space, and whether 547 * Mode is used to tell whether the new inode will need space, and whether
549 * it is a directory. 548 * it is a directory.
550 * 549 *
551 * The arguments IO_agbp and alloc_done are defined to work within 550 * The arguments IO_agbp and alloc_done are defined to work within
552 * the constraint of one allocation per transaction. 551 * the constraint of one allocation per transaction.
553 * xfs_dialloc() is designed to be called twice if it has to do an 552 * xfs_dialloc() is designed to be called twice if it has to do an
554 * allocation to make more free inodes. On the first call, 553 * allocation to make more free inodes. On the first call,
555 * IO_agbp should be set to NULL. If an inode is available, 554 * IO_agbp should be set to NULL. If an inode is available,
556 * i.e., xfs_dialloc() did not need to do an allocation, an inode 555 * i.e., xfs_dialloc() did not need to do an allocation, an inode
557 * number is returned. In this case, IO_agbp would be set to the 556 * number is returned. In this case, IO_agbp would be set to the
558 * current ag_buf and alloc_done set to false. 557 * current ag_buf and alloc_done set to false.
559 * If an allocation needed to be done, xfs_dialloc would return 558 * If an allocation needed to be done, xfs_dialloc would return
560 * the current ag_buf in IO_agbp and set alloc_done to true. 559 * the current ag_buf in IO_agbp and set alloc_done to true.
561 * The caller should then commit the current transaction, allocate a new 560 * The caller should then commit the current transaction, allocate a new
562 * transaction, and call xfs_dialloc() again, passing in the previous 561 * transaction, and call xfs_dialloc() again, passing in the previous
563 * value of IO_agbp. IO_agbp should be held across the transactions. 562 * value of IO_agbp. IO_agbp should be held across the transactions.
564 * Since the agbp is locked across the two calls, the second call is 563 * Since the agbp is locked across the two calls, the second call is
565 * guaranteed to have a free inode available. 564 * guaranteed to have a free inode available.
566 * 565 *
567 * Once we successfully pick an inode its number is returned and the 566 * Once we successfully pick an inode its number is returned and the
568 * on-disk data structures are updated. The inode itself is not read 567 * on-disk data structures are updated. The inode itself is not read
569 * in, since doing so would break ordering constraints with xfs_reclaim. 568 * in, since doing so would break ordering constraints with xfs_reclaim.
570 */ 569 */
571 int 570 int
572 xfs_dialloc( 571 xfs_dialloc(
573 xfs_trans_t *tp, /* transaction pointer */ 572 xfs_trans_t *tp, /* transaction pointer */
574 xfs_ino_t parent, /* parent inode (directory) */ 573 xfs_ino_t parent, /* parent inode (directory) */
575 mode_t mode, /* mode bits for new inode */ 574 mode_t mode, /* mode bits for new inode */
576 int okalloc, /* ok to allocate more space */ 575 int okalloc, /* ok to allocate more space */
577 xfs_buf_t **IO_agbp, /* in/out ag header's buffer */ 576 xfs_buf_t **IO_agbp, /* in/out ag header's buffer */
578 boolean_t *alloc_done, /* true if we needed to replenish 577 boolean_t *alloc_done, /* true if we needed to replenish
579 inode freelist */ 578 inode freelist */
580 xfs_ino_t *inop) /* inode number allocated */ 579 xfs_ino_t *inop) /* inode number allocated */
581 { 580 {
582 xfs_agnumber_t agcount; /* number of allocation groups */ 581 xfs_agnumber_t agcount; /* number of allocation groups */
583 xfs_buf_t *agbp; /* allocation group header's buffer */ 582 xfs_buf_t *agbp; /* allocation group header's buffer */
584 xfs_agnumber_t agno; /* allocation group number */ 583 xfs_agnumber_t agno; /* allocation group number */
585 xfs_agi_t *agi; /* allocation group header structure */ 584 xfs_agi_t *agi; /* allocation group header structure */
586 xfs_btree_cur_t *cur; /* inode allocation btree cursor */ 585 xfs_btree_cur_t *cur; /* inode allocation btree cursor */
587 int error; /* error return value */ 586 int error; /* error return value */
588 int i; /* result code */ 587 int i; /* result code */
589 int ialloced; /* inode allocation status */ 588 int ialloced; /* inode allocation status */
590 int noroom = 0; /* no space for inode blk allocation */ 589 int noroom = 0; /* no space for inode blk allocation */
591 xfs_ino_t ino; /* fs-relative inode to be returned */ 590 xfs_ino_t ino; /* fs-relative inode to be returned */
592 /* REFERENCED */ 591 /* REFERENCED */
593 int j; /* result code */ 592 int j; /* result code */
594 xfs_mount_t *mp; /* file system mount structure */ 593 xfs_mount_t *mp; /* file system mount structure */
595 int offset; /* index of inode in chunk */ 594 int offset; /* index of inode in chunk */
596 xfs_agino_t pagino; /* parent's a.g. relative inode # */ 595 xfs_agino_t pagino; /* parent's a.g. relative inode # */
597 xfs_agnumber_t pagno; /* parent's allocation group number */ 596 xfs_agnumber_t pagno; /* parent's allocation group number */
598 xfs_inobt_rec_incore_t rec; /* inode allocation record */ 597 xfs_inobt_rec_incore_t rec; /* inode allocation record */
599 xfs_agnumber_t tagno; /* testing allocation group number */ 598 xfs_agnumber_t tagno; /* testing allocation group number */
600 xfs_btree_cur_t *tcur; /* temp cursor */ 599 xfs_btree_cur_t *tcur; /* temp cursor */
601 xfs_inobt_rec_incore_t trec; /* temp inode allocation record */ 600 xfs_inobt_rec_incore_t trec; /* temp inode allocation record */
602 601
603 602
604 if (*IO_agbp == NULL) { 603 if (*IO_agbp == NULL) {
605 /* 604 /*
606 * We do not have an agbp, so select an initial allocation 605 * We do not have an agbp, so select an initial allocation
607 * group for inode allocation. 606 * group for inode allocation.
608 */ 607 */
609 agbp = xfs_ialloc_ag_select(tp, parent, mode, okalloc); 608 agbp = xfs_ialloc_ag_select(tp, parent, mode, okalloc);
610 /* 609 /*
611 * Couldn't find an allocation group satisfying the 610 * Couldn't find an allocation group satisfying the
612 * criteria, give up. 611 * criteria, give up.
613 */ 612 */
614 if (!agbp) { 613 if (!agbp) {
615 *inop = NULLFSINO; 614 *inop = NULLFSINO;
616 return 0; 615 return 0;
617 } 616 }
618 agi = XFS_BUF_TO_AGI(agbp); 617 agi = XFS_BUF_TO_AGI(agbp);
619 ASSERT(be32_to_cpu(agi->agi_magicnum) == XFS_AGI_MAGIC); 618 ASSERT(be32_to_cpu(agi->agi_magicnum) == XFS_AGI_MAGIC);
620 } else { 619 } else {
621 /* 620 /*
622 * Continue where we left off before. In this case, we 621 * Continue where we left off before. In this case, we
623 * know that the allocation group has free inodes. 622 * know that the allocation group has free inodes.
624 */ 623 */
625 agbp = *IO_agbp; 624 agbp = *IO_agbp;
626 agi = XFS_BUF_TO_AGI(agbp); 625 agi = XFS_BUF_TO_AGI(agbp);
627 ASSERT(be32_to_cpu(agi->agi_magicnum) == XFS_AGI_MAGIC); 626 ASSERT(be32_to_cpu(agi->agi_magicnum) == XFS_AGI_MAGIC);
628 ASSERT(be32_to_cpu(agi->agi_freecount) > 0); 627 ASSERT(be32_to_cpu(agi->agi_freecount) > 0);
629 } 628 }
630 mp = tp->t_mountp; 629 mp = tp->t_mountp;
631 agcount = mp->m_sb.sb_agcount; 630 agcount = mp->m_sb.sb_agcount;
632 agno = be32_to_cpu(agi->agi_seqno); 631 agno = be32_to_cpu(agi->agi_seqno);
633 tagno = agno; 632 tagno = agno;
634 pagno = XFS_INO_TO_AGNO(mp, parent); 633 pagno = XFS_INO_TO_AGNO(mp, parent);
635 pagino = XFS_INO_TO_AGINO(mp, parent); 634 pagino = XFS_INO_TO_AGINO(mp, parent);
636 635
637 /* 636 /*
638 * If we have already hit the ceiling of inode blocks then clear 637 * If we have already hit the ceiling of inode blocks then clear
639 * okalloc so we scan all available agi structures for a free 638 * okalloc so we scan all available agi structures for a free
640 * inode. 639 * inode.
641 */ 640 */
642 641
643 if (mp->m_maxicount && 642 if (mp->m_maxicount &&
644 mp->m_sb.sb_icount + XFS_IALLOC_INODES(mp) > mp->m_maxicount) { 643 mp->m_sb.sb_icount + XFS_IALLOC_INODES(mp) > mp->m_maxicount) {
645 noroom = 1; 644 noroom = 1;
646 okalloc = 0; 645 okalloc = 0;
647 } 646 }
648 647
649 /* 648 /*
650 * Loop until we find an allocation group that either has free inodes 649 * Loop until we find an allocation group that either has free inodes
651 * or in which we can allocate some inodes. Iterate through the 650 * or in which we can allocate some inodes. Iterate through the
652 * allocation groups upward, wrapping at the end. 651 * allocation groups upward, wrapping at the end.
653 */ 652 */
654 *alloc_done = B_FALSE; 653 *alloc_done = B_FALSE;
655 while (!agi->agi_freecount) { 654 while (!agi->agi_freecount) {
656 /* 655 /*
657 * Don't do anything if we're not supposed to allocate 656 * Don't do anything if we're not supposed to allocate
658 * any blocks, just go on to the next ag. 657 * any blocks, just go on to the next ag.
659 */ 658 */
660 if (okalloc) { 659 if (okalloc) {
661 /* 660 /*
662 * Try to allocate some new inodes in the allocation 661 * Try to allocate some new inodes in the allocation
663 * group. 662 * group.
664 */ 663 */
665 if ((error = xfs_ialloc_ag_alloc(tp, agbp, &ialloced))) { 664 if ((error = xfs_ialloc_ag_alloc(tp, agbp, &ialloced))) {
666 xfs_trans_brelse(tp, agbp); 665 xfs_trans_brelse(tp, agbp);
667 if (error == ENOSPC) { 666 if (error == ENOSPC) {
668 *inop = NULLFSINO; 667 *inop = NULLFSINO;
669 return 0; 668 return 0;
670 } else 669 } else
671 return error; 670 return error;
672 } 671 }
673 if (ialloced) { 672 if (ialloced) {
674 /* 673 /*
675 * We successfully allocated some inodes, return 674 * We successfully allocated some inodes, return
676 * the current context to the caller so that it 675 * the current context to the caller so that it
677 * can commit the current transaction and call 676 * can commit the current transaction and call
678 * us again where we left off. 677 * us again where we left off.
679 */ 678 */
680 ASSERT(be32_to_cpu(agi->agi_freecount) > 0); 679 ASSERT(be32_to_cpu(agi->agi_freecount) > 0);
681 *alloc_done = B_TRUE; 680 *alloc_done = B_TRUE;
682 *IO_agbp = agbp; 681 *IO_agbp = agbp;
683 *inop = NULLFSINO; 682 *inop = NULLFSINO;
684 return 0; 683 return 0;
685 } 684 }
686 } 685 }
687 /* 686 /*
688 * If it failed, give up on this ag. 687 * If it failed, give up on this ag.
689 */ 688 */
690 xfs_trans_brelse(tp, agbp); 689 xfs_trans_brelse(tp, agbp);
691 /* 690 /*
692 * Go on to the next ag: get its ag header. 691 * Go on to the next ag: get its ag header.
693 */ 692 */
694 nextag: 693 nextag:
695 if (++tagno == agcount) 694 if (++tagno == agcount)
696 tagno = 0; 695 tagno = 0;
697 if (tagno == agno) { 696 if (tagno == agno) {
698 *inop = NULLFSINO; 697 *inop = NULLFSINO;
699 return noroom ? ENOSPC : 0; 698 return noroom ? ENOSPC : 0;
700 } 699 }
701 down_read(&mp->m_peraglock); 700 down_read(&mp->m_peraglock);
702 if (mp->m_perag[tagno].pagi_inodeok == 0) { 701 if (mp->m_perag[tagno].pagi_inodeok == 0) {
703 up_read(&mp->m_peraglock); 702 up_read(&mp->m_peraglock);
704 goto nextag; 703 goto nextag;
705 } 704 }
706 error = xfs_ialloc_read_agi(mp, tp, tagno, &agbp); 705 error = xfs_ialloc_read_agi(mp, tp, tagno, &agbp);
707 up_read(&mp->m_peraglock); 706 up_read(&mp->m_peraglock);
708 if (error) 707 if (error)
709 goto nextag; 708 goto nextag;
710 agi = XFS_BUF_TO_AGI(agbp); 709 agi = XFS_BUF_TO_AGI(agbp);
711 ASSERT(be32_to_cpu(agi->agi_magicnum) == XFS_AGI_MAGIC); 710 ASSERT(be32_to_cpu(agi->agi_magicnum) == XFS_AGI_MAGIC);
712 } 711 }
713 /* 712 /*
714 * Here with an allocation group that has a free inode. 713 * Here with an allocation group that has a free inode.
715 * Reset agno since we may have chosen a new ag in the 714 * Reset agno since we may have chosen a new ag in the
716 * loop above. 715 * loop above.
717 */ 716 */
718 agno = tagno; 717 agno = tagno;
719 *IO_agbp = NULL; 718 *IO_agbp = NULL;
720 cur = xfs_inobt_init_cursor(mp, tp, agbp, be32_to_cpu(agi->agi_seqno)); 719 cur = xfs_inobt_init_cursor(mp, tp, agbp, be32_to_cpu(agi->agi_seqno));
721 /* 720 /*
722 * If pagino is 0 (this is the root inode allocation) use newino. 721 * If pagino is 0 (this is the root inode allocation) use newino.
723 * This must work because we've just allocated some. 722 * This must work because we've just allocated some.
724 */ 723 */
725 if (!pagino) 724 if (!pagino)
726 pagino = be32_to_cpu(agi->agi_newino); 725 pagino = be32_to_cpu(agi->agi_newino);
727 #ifdef DEBUG 726 #ifdef DEBUG
728 if (cur->bc_nlevels == 1) { 727 if (cur->bc_nlevels == 1) {
729 int freecount = 0; 728 int freecount = 0;
730 729
731 if ((error = xfs_inobt_lookup_ge(cur, 0, 0, 0, &i))) 730 if ((error = xfs_inobt_lookup_ge(cur, 0, 0, 0, &i)))
732 goto error0; 731 goto error0;
733 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 732 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
734 do { 733 do {
735 if ((error = xfs_inobt_get_rec(cur, &rec.ir_startino, 734 if ((error = xfs_inobt_get_rec(cur, &rec.ir_startino,
736 &rec.ir_freecount, &rec.ir_free, &i))) 735 &rec.ir_freecount, &rec.ir_free, &i)))
737 goto error0; 736 goto error0;
738 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 737 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
739 freecount += rec.ir_freecount; 738 freecount += rec.ir_freecount;
740 if ((error = xfs_btree_increment(cur, 0, &i))) 739 if ((error = xfs_btree_increment(cur, 0, &i)))
741 goto error0; 740 goto error0;
742 } while (i == 1); 741 } while (i == 1);
743 742
744 ASSERT(freecount == be32_to_cpu(agi->agi_freecount) || 743 ASSERT(freecount == be32_to_cpu(agi->agi_freecount) ||
745 XFS_FORCED_SHUTDOWN(mp)); 744 XFS_FORCED_SHUTDOWN(mp));
746 } 745 }
747 #endif 746 #endif
748 /* 747 /*
749 * If in the same a.g. as the parent, try to get near the parent. 748 * If in the same a.g. as the parent, try to get near the parent.
750 */ 749 */
751 if (pagno == agno) { 750 if (pagno == agno) {
752 if ((error = xfs_inobt_lookup_le(cur, pagino, 0, 0, &i))) 751 if ((error = xfs_inobt_lookup_le(cur, pagino, 0, 0, &i)))
753 goto error0; 752 goto error0;
754 if (i != 0 && 753 if (i != 0 &&
755 (error = xfs_inobt_get_rec(cur, &rec.ir_startino, 754 (error = xfs_inobt_get_rec(cur, &rec.ir_startino,
756 &rec.ir_freecount, &rec.ir_free, &j)) == 0 && 755 &rec.ir_freecount, &rec.ir_free, &j)) == 0 &&
757 j == 1 && 756 j == 1 &&
758 rec.ir_freecount > 0) { 757 rec.ir_freecount > 0) {
759 /* 758 /*
760 * Found a free inode in the same chunk 759 * Found a free inode in the same chunk
761 * as parent, done. 760 * as parent, done.
762 */ 761 */
763 } 762 }
764 /* 763 /*
765 * In the same a.g. as parent, but parent's chunk is full. 764 * In the same a.g. as parent, but parent's chunk is full.
766 */ 765 */
767 else { 766 else {
768 int doneleft; /* done, to the left */ 767 int doneleft; /* done, to the left */
769 int doneright; /* done, to the right */ 768 int doneright; /* done, to the right */
770 769
771 if (error) 770 if (error)
772 goto error0; 771 goto error0;
773 ASSERT(i == 1); 772 ASSERT(i == 1);
774 ASSERT(j == 1); 773 ASSERT(j == 1);
775 /* 774 /*
776 * Duplicate the cursor, search left & right 775 * Duplicate the cursor, search left & right
777 * simultaneously. 776 * simultaneously.
778 */ 777 */
779 if ((error = xfs_btree_dup_cursor(cur, &tcur))) 778 if ((error = xfs_btree_dup_cursor(cur, &tcur)))
780 goto error0; 779 goto error0;
781 /* 780 /*
782 * Search left with tcur, back up 1 record. 781 * Search left with tcur, back up 1 record.
783 */ 782 */
784 if ((error = xfs_btree_decrement(tcur, 0, &i))) 783 if ((error = xfs_btree_decrement(tcur, 0, &i)))
785 goto error1; 784 goto error1;
786 doneleft = !i; 785 doneleft = !i;
787 if (!doneleft) { 786 if (!doneleft) {
788 if ((error = xfs_inobt_get_rec(tcur, 787 if ((error = xfs_inobt_get_rec(tcur,
789 &trec.ir_startino, 788 &trec.ir_startino,
790 &trec.ir_freecount, 789 &trec.ir_freecount,
791 &trec.ir_free, &i))) 790 &trec.ir_free, &i)))
792 goto error1; 791 goto error1;
793 XFS_WANT_CORRUPTED_GOTO(i == 1, error1); 792 XFS_WANT_CORRUPTED_GOTO(i == 1, error1);
794 } 793 }
795 /* 794 /*
796 * Search right with cur, go forward 1 record. 795 * Search right with cur, go forward 1 record.
797 */ 796 */
798 if ((error = xfs_btree_increment(cur, 0, &i))) 797 if ((error = xfs_btree_increment(cur, 0, &i)))
799 goto error1; 798 goto error1;
800 doneright = !i; 799 doneright = !i;
801 if (!doneright) { 800 if (!doneright) {
802 if ((error = xfs_inobt_get_rec(cur, 801 if ((error = xfs_inobt_get_rec(cur,
803 &rec.ir_startino, 802 &rec.ir_startino,
804 &rec.ir_freecount, 803 &rec.ir_freecount,
805 &rec.ir_free, &i))) 804 &rec.ir_free, &i)))
806 goto error1; 805 goto error1;
807 XFS_WANT_CORRUPTED_GOTO(i == 1, error1); 806 XFS_WANT_CORRUPTED_GOTO(i == 1, error1);
808 } 807 }
809 /* 808 /*
810 * Loop until we find the closest inode chunk 809 * Loop until we find the closest inode chunk
811 * with a free one. 810 * with a free one.
812 */ 811 */
813 while (!doneleft || !doneright) { 812 while (!doneleft || !doneright) {
814 int useleft; /* using left inode 813 int useleft; /* using left inode
815 chunk this time */ 814 chunk this time */
816 815
817 /* 816 /*
818 * Figure out which block is closer, 817 * Figure out which block is closer,
819 * if both are valid. 818 * if both are valid.
820 */ 819 */
821 if (!doneleft && !doneright) 820 if (!doneleft && !doneright)
822 useleft = 821 useleft =
823 pagino - 822 pagino -
824 (trec.ir_startino + 823 (trec.ir_startino +
825 XFS_INODES_PER_CHUNK - 1) < 824 XFS_INODES_PER_CHUNK - 1) <
826 rec.ir_startino - pagino; 825 rec.ir_startino - pagino;
827 else 826 else
828 useleft = !doneleft; 827 useleft = !doneleft;
829 /* 828 /*
830 * If checking the left, does it have 829 * If checking the left, does it have
831 * free inodes? 830 * free inodes?
832 */ 831 */
833 if (useleft && trec.ir_freecount) { 832 if (useleft && trec.ir_freecount) {
834 /* 833 /*
835 * Yes, set it up as the chunk to use. 834 * Yes, set it up as the chunk to use.
836 */ 835 */
837 rec = trec; 836 rec = trec;
838 xfs_btree_del_cursor(cur, 837 xfs_btree_del_cursor(cur,
839 XFS_BTREE_NOERROR); 838 XFS_BTREE_NOERROR);
840 cur = tcur; 839 cur = tcur;
841 break; 840 break;
842 } 841 }
843 /* 842 /*
844 * If checking the right, does it have 843 * If checking the right, does it have
845 * free inodes? 844 * free inodes?
846 */ 845 */
847 if (!useleft && rec.ir_freecount) { 846 if (!useleft && rec.ir_freecount) {
848 /* 847 /*
849 * Yes, it's already set up. 848 * Yes, it's already set up.
850 */ 849 */
851 xfs_btree_del_cursor(tcur, 850 xfs_btree_del_cursor(tcur,
852 XFS_BTREE_NOERROR); 851 XFS_BTREE_NOERROR);
853 break; 852 break;
854 } 853 }
855 /* 854 /*
856 * If used the left, get another one 855 * If used the left, get another one
857 * further left. 856 * further left.
858 */ 857 */
859 if (useleft) { 858 if (useleft) {
860 if ((error = xfs_btree_decrement(tcur, 0, 859 if ((error = xfs_btree_decrement(tcur, 0,
861 &i))) 860 &i)))
862 goto error1; 861 goto error1;
863 doneleft = !i; 862 doneleft = !i;
864 if (!doneleft) { 863 if (!doneleft) {
865 if ((error = xfs_inobt_get_rec( 864 if ((error = xfs_inobt_get_rec(
866 tcur, 865 tcur,
867 &trec.ir_startino, 866 &trec.ir_startino,
868 &trec.ir_freecount, 867 &trec.ir_freecount,
869 &trec.ir_free, &i))) 868 &trec.ir_free, &i)))
870 goto error1; 869 goto error1;
871 XFS_WANT_CORRUPTED_GOTO(i == 1, 870 XFS_WANT_CORRUPTED_GOTO(i == 1,
872 error1); 871 error1);
873 } 872 }
874 } 873 }
875 /* 874 /*
876 * If used the right, get another one 875 * If used the right, get another one
877 * further right. 876 * further right.
878 */ 877 */
879 else { 878 else {
880 if ((error = xfs_btree_increment(cur, 0, 879 if ((error = xfs_btree_increment(cur, 0,
881 &i))) 880 &i)))
882 goto error1; 881 goto error1;
883 doneright = !i; 882 doneright = !i;
884 if (!doneright) { 883 if (!doneright) {
885 if ((error = xfs_inobt_get_rec( 884 if ((error = xfs_inobt_get_rec(
886 cur, 885 cur,
887 &rec.ir_startino, 886 &rec.ir_startino,
888 &rec.ir_freecount, 887 &rec.ir_freecount,
889 &rec.ir_free, &i))) 888 &rec.ir_free, &i)))
890 goto error1; 889 goto error1;
891 XFS_WANT_CORRUPTED_GOTO(i == 1, 890 XFS_WANT_CORRUPTED_GOTO(i == 1,
892 error1); 891 error1);
893 } 892 }
894 } 893 }
895 } 894 }
896 ASSERT(!doneleft || !doneright); 895 ASSERT(!doneleft || !doneright);
897 } 896 }
898 } 897 }
899 /* 898 /*
900 * In a different a.g. from the parent. 899 * In a different a.g. from the parent.
901 * See if the most recently allocated block has any free. 900 * See if the most recently allocated block has any free.
902 */ 901 */
903 else if (be32_to_cpu(agi->agi_newino) != NULLAGINO) { 902 else if (be32_to_cpu(agi->agi_newino) != NULLAGINO) {
904 if ((error = xfs_inobt_lookup_eq(cur, 903 if ((error = xfs_inobt_lookup_eq(cur,
905 be32_to_cpu(agi->agi_newino), 0, 0, &i))) 904 be32_to_cpu(agi->agi_newino), 0, 0, &i)))
906 goto error0; 905 goto error0;
907 if (i == 1 && 906 if (i == 1 &&
908 (error = xfs_inobt_get_rec(cur, &rec.ir_startino, 907 (error = xfs_inobt_get_rec(cur, &rec.ir_startino,
909 &rec.ir_freecount, &rec.ir_free, &j)) == 0 && 908 &rec.ir_freecount, &rec.ir_free, &j)) == 0 &&
910 j == 1 && 909 j == 1 &&
911 rec.ir_freecount > 0) { 910 rec.ir_freecount > 0) {
912 /* 911 /*
913 * The last chunk allocated in the group still has 912 * The last chunk allocated in the group still has
914 * a free inode. 913 * a free inode.
915 */ 914 */
916 } 915 }
917 /* 916 /*
918 * None left in the last group, search the whole a.g. 917 * None left in the last group, search the whole a.g.
919 */ 918 */
920 else { 919 else {
921 if (error) 920 if (error)
922 goto error0; 921 goto error0;
923 if ((error = xfs_inobt_lookup_ge(cur, 0, 0, 0, &i))) 922 if ((error = xfs_inobt_lookup_ge(cur, 0, 0, 0, &i)))
924 goto error0; 923 goto error0;
925 ASSERT(i == 1); 924 ASSERT(i == 1);
926 for (;;) { 925 for (;;) {
927 if ((error = xfs_inobt_get_rec(cur, 926 if ((error = xfs_inobt_get_rec(cur,
928 &rec.ir_startino, 927 &rec.ir_startino,
929 &rec.ir_freecount, &rec.ir_free, 928 &rec.ir_freecount, &rec.ir_free,
930 &i))) 929 &i)))
931 goto error0; 930 goto error0;
932 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 931 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
933 if (rec.ir_freecount > 0) 932 if (rec.ir_freecount > 0)
934 break; 933 break;
935 if ((error = xfs_btree_increment(cur, 0, &i))) 934 if ((error = xfs_btree_increment(cur, 0, &i)))
936 goto error0; 935 goto error0;
937 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 936 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
938 } 937 }
939 } 938 }
940 } 939 }
941 offset = XFS_IALLOC_FIND_FREE(&rec.ir_free); 940 offset = XFS_IALLOC_FIND_FREE(&rec.ir_free);
942 ASSERT(offset >= 0); 941 ASSERT(offset >= 0);
943 ASSERT(offset < XFS_INODES_PER_CHUNK); 942 ASSERT(offset < XFS_INODES_PER_CHUNK);
944 ASSERT((XFS_AGINO_TO_OFFSET(mp, rec.ir_startino) % 943 ASSERT((XFS_AGINO_TO_OFFSET(mp, rec.ir_startino) %
945 XFS_INODES_PER_CHUNK) == 0); 944 XFS_INODES_PER_CHUNK) == 0);
946 ino = XFS_AGINO_TO_INO(mp, agno, rec.ir_startino + offset); 945 ino = XFS_AGINO_TO_INO(mp, agno, rec.ir_startino + offset);
947 XFS_INOBT_CLR_FREE(&rec, offset); 946 XFS_INOBT_CLR_FREE(&rec, offset);
948 rec.ir_freecount--; 947 rec.ir_freecount--;
949 if ((error = xfs_inobt_update(cur, rec.ir_startino, rec.ir_freecount, 948 if ((error = xfs_inobt_update(cur, rec.ir_startino, rec.ir_freecount,
950 rec.ir_free))) 949 rec.ir_free)))
951 goto error0; 950 goto error0;
952 be32_add_cpu(&agi->agi_freecount, -1); 951 be32_add_cpu(&agi->agi_freecount, -1);
953 xfs_ialloc_log_agi(tp, agbp, XFS_AGI_FREECOUNT); 952 xfs_ialloc_log_agi(tp, agbp, XFS_AGI_FREECOUNT);
954 down_read(&mp->m_peraglock); 953 down_read(&mp->m_peraglock);
955 mp->m_perag[tagno].pagi_freecount--; 954 mp->m_perag[tagno].pagi_freecount--;
956 up_read(&mp->m_peraglock); 955 up_read(&mp->m_peraglock);
957 #ifdef DEBUG 956 #ifdef DEBUG
958 if (cur->bc_nlevels == 1) { 957 if (cur->bc_nlevels == 1) {
959 int freecount = 0; 958 int freecount = 0;
960 959
961 if ((error = xfs_inobt_lookup_ge(cur, 0, 0, 0, &i))) 960 if ((error = xfs_inobt_lookup_ge(cur, 0, 0, 0, &i)))
962 goto error0; 961 goto error0;
963 do { 962 do {
964 if ((error = xfs_inobt_get_rec(cur, &rec.ir_startino, 963 if ((error = xfs_inobt_get_rec(cur, &rec.ir_startino,
965 &rec.ir_freecount, &rec.ir_free, &i))) 964 &rec.ir_freecount, &rec.ir_free, &i)))
966 goto error0; 965 goto error0;
967 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 966 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
968 freecount += rec.ir_freecount; 967 freecount += rec.ir_freecount;
969 if ((error = xfs_btree_increment(cur, 0, &i))) 968 if ((error = xfs_btree_increment(cur, 0, &i)))
970 goto error0; 969 goto error0;
971 } while (i == 1); 970 } while (i == 1);
972 ASSERT(freecount == be32_to_cpu(agi->agi_freecount) || 971 ASSERT(freecount == be32_to_cpu(agi->agi_freecount) ||
973 XFS_FORCED_SHUTDOWN(mp)); 972 XFS_FORCED_SHUTDOWN(mp));
974 } 973 }
975 #endif 974 #endif
976 xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); 975 xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
977 xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, -1); 976 xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, -1);
978 *inop = ino; 977 *inop = ino;
979 return 0; 978 return 0;
980 error1: 979 error1:
981 xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR); 980 xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
982 error0: 981 error0:
983 xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); 982 xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
984 return error; 983 return error;
985 } 984 }
986 985
987 /* 986 /*
988 * Free disk inode. Carefully avoids touching the incore inode, all 987 * Free disk inode. Carefully avoids touching the incore inode, all
989 * manipulations incore are the caller's responsibility. 988 * manipulations incore are the caller's responsibility.
990 * The on-disk inode is not changed by this operation, only the 989 * The on-disk inode is not changed by this operation, only the
991 * btree (free inode mask) is changed. 990 * btree (free inode mask) is changed.
992 */ 991 */
993 int 992 int
994 xfs_difree( 993 xfs_difree(
995 xfs_trans_t *tp, /* transaction pointer */ 994 xfs_trans_t *tp, /* transaction pointer */
996 xfs_ino_t inode, /* inode to be freed */ 995 xfs_ino_t inode, /* inode to be freed */
997 xfs_bmap_free_t *flist, /* extents to free */ 996 xfs_bmap_free_t *flist, /* extents to free */
998 int *delete, /* set if inode cluster was deleted */ 997 int *delete, /* set if inode cluster was deleted */
999 xfs_ino_t *first_ino) /* first inode in deleted cluster */ 998 xfs_ino_t *first_ino) /* first inode in deleted cluster */
1000 { 999 {
1001 /* REFERENCED */ 1000 /* REFERENCED */
1002 xfs_agblock_t agbno; /* block number containing inode */ 1001 xfs_agblock_t agbno; /* block number containing inode */
1003 xfs_buf_t *agbp; /* buffer containing allocation group header */ 1002 xfs_buf_t *agbp; /* buffer containing allocation group header */
1004 xfs_agino_t agino; /* inode number relative to allocation group */ 1003 xfs_agino_t agino; /* inode number relative to allocation group */
1005 xfs_agnumber_t agno; /* allocation group number */ 1004 xfs_agnumber_t agno; /* allocation group number */
1006 xfs_agi_t *agi; /* allocation group header */ 1005 xfs_agi_t *agi; /* allocation group header */
1007 xfs_btree_cur_t *cur; /* inode btree cursor */ 1006 xfs_btree_cur_t *cur; /* inode btree cursor */
1008 int error; /* error return value */ 1007 int error; /* error return value */
1009 int i; /* result code */ 1008 int i; /* result code */
1010 int ilen; /* inodes in an inode cluster */ 1009 int ilen; /* inodes in an inode cluster */
1011 xfs_mount_t *mp; /* mount structure for filesystem */ 1010 xfs_mount_t *mp; /* mount structure for filesystem */
1012 int off; /* offset of inode in inode chunk */ 1011 int off; /* offset of inode in inode chunk */
1013 xfs_inobt_rec_incore_t rec; /* btree record */ 1012 xfs_inobt_rec_incore_t rec; /* btree record */
1014 1013
1015 mp = tp->t_mountp; 1014 mp = tp->t_mountp;
1016 1015
1017 /* 1016 /*
1018 * Break up inode number into its components. 1017 * Break up inode number into its components.
1019 */ 1018 */
1020 agno = XFS_INO_TO_AGNO(mp, inode); 1019 agno = XFS_INO_TO_AGNO(mp, inode);
1021 if (agno >= mp->m_sb.sb_agcount) { 1020 if (agno >= mp->m_sb.sb_agcount) {
1022 cmn_err(CE_WARN, 1021 cmn_err(CE_WARN,
1023 "xfs_difree: agno >= mp->m_sb.sb_agcount (%d >= %d) on %s. Returning EINVAL.", 1022 "xfs_difree: agno >= mp->m_sb.sb_agcount (%d >= %d) on %s. Returning EINVAL.",
1024 agno, mp->m_sb.sb_agcount, mp->m_fsname); 1023 agno, mp->m_sb.sb_agcount, mp->m_fsname);
1025 ASSERT(0); 1024 ASSERT(0);
1026 return XFS_ERROR(EINVAL); 1025 return XFS_ERROR(EINVAL);
1027 } 1026 }
1028 agino = XFS_INO_TO_AGINO(mp, inode); 1027 agino = XFS_INO_TO_AGINO(mp, inode);
1029 if (inode != XFS_AGINO_TO_INO(mp, agno, agino)) { 1028 if (inode != XFS_AGINO_TO_INO(mp, agno, agino)) {
1030 cmn_err(CE_WARN, 1029 cmn_err(CE_WARN,
1031 "xfs_difree: inode != XFS_AGINO_TO_INO() " 1030 "xfs_difree: inode != XFS_AGINO_TO_INO() "
1032 "(%llu != %llu) on %s. Returning EINVAL.", 1031 "(%llu != %llu) on %s. Returning EINVAL.",
1033 (unsigned long long)inode, 1032 (unsigned long long)inode,
1034 (unsigned long long)XFS_AGINO_TO_INO(mp, agno, agino), 1033 (unsigned long long)XFS_AGINO_TO_INO(mp, agno, agino),
1035 mp->m_fsname); 1034 mp->m_fsname);
1036 ASSERT(0); 1035 ASSERT(0);
1037 return XFS_ERROR(EINVAL); 1036 return XFS_ERROR(EINVAL);
1038 } 1037 }
1039 agbno = XFS_AGINO_TO_AGBNO(mp, agino); 1038 agbno = XFS_AGINO_TO_AGBNO(mp, agino);
1040 if (agbno >= mp->m_sb.sb_agblocks) { 1039 if (agbno >= mp->m_sb.sb_agblocks) {
1041 cmn_err(CE_WARN, 1040 cmn_err(CE_WARN,
1042 "xfs_difree: agbno >= mp->m_sb.sb_agblocks (%d >= %d) on %s. Returning EINVAL.", 1041 "xfs_difree: agbno >= mp->m_sb.sb_agblocks (%d >= %d) on %s. Returning EINVAL.",
1043 agbno, mp->m_sb.sb_agblocks, mp->m_fsname); 1042 agbno, mp->m_sb.sb_agblocks, mp->m_fsname);
1044 ASSERT(0); 1043 ASSERT(0);
1045 return XFS_ERROR(EINVAL); 1044 return XFS_ERROR(EINVAL);
1046 } 1045 }
1047 /* 1046 /*
1048 * Get the allocation group header. 1047 * Get the allocation group header.
1049 */ 1048 */
1050 down_read(&mp->m_peraglock); 1049 down_read(&mp->m_peraglock);
1051 error = xfs_ialloc_read_agi(mp, tp, agno, &agbp); 1050 error = xfs_ialloc_read_agi(mp, tp, agno, &agbp);
1052 up_read(&mp->m_peraglock); 1051 up_read(&mp->m_peraglock);
1053 if (error) { 1052 if (error) {
1054 cmn_err(CE_WARN, 1053 cmn_err(CE_WARN,
1055 "xfs_difree: xfs_ialloc_read_agi() returned an error %d on %s. Returning error.", 1054 "xfs_difree: xfs_ialloc_read_agi() returned an error %d on %s. Returning error.",
1056 error, mp->m_fsname); 1055 error, mp->m_fsname);
1057 return error; 1056 return error;
1058 } 1057 }
1059 agi = XFS_BUF_TO_AGI(agbp); 1058 agi = XFS_BUF_TO_AGI(agbp);
1060 ASSERT(be32_to_cpu(agi->agi_magicnum) == XFS_AGI_MAGIC); 1059 ASSERT(be32_to_cpu(agi->agi_magicnum) == XFS_AGI_MAGIC);
1061 ASSERT(agbno < be32_to_cpu(agi->agi_length)); 1060 ASSERT(agbno < be32_to_cpu(agi->agi_length));
1062 /* 1061 /*
1063 * Initialize the cursor. 1062 * Initialize the cursor.
1064 */ 1063 */
1065 cur = xfs_inobt_init_cursor(mp, tp, agbp, agno); 1064 cur = xfs_inobt_init_cursor(mp, tp, agbp, agno);
1066 #ifdef DEBUG 1065 #ifdef DEBUG
1067 if (cur->bc_nlevels == 1) { 1066 if (cur->bc_nlevels == 1) {
1068 int freecount = 0; 1067 int freecount = 0;
1069 1068
1070 if ((error = xfs_inobt_lookup_ge(cur, 0, 0, 0, &i))) 1069 if ((error = xfs_inobt_lookup_ge(cur, 0, 0, 0, &i)))
1071 goto error0; 1070 goto error0;
1072 do { 1071 do {
1073 if ((error = xfs_inobt_get_rec(cur, &rec.ir_startino, 1072 if ((error = xfs_inobt_get_rec(cur, &rec.ir_startino,
1074 &rec.ir_freecount, &rec.ir_free, &i))) 1073 &rec.ir_freecount, &rec.ir_free, &i)))
1075 goto error0; 1074 goto error0;
1076 if (i) { 1075 if (i) {
1077 freecount += rec.ir_freecount; 1076 freecount += rec.ir_freecount;
1078 if ((error = xfs_btree_increment(cur, 0, &i))) 1077 if ((error = xfs_btree_increment(cur, 0, &i)))
1079 goto error0; 1078 goto error0;
1080 } 1079 }
1081 } while (i == 1); 1080 } while (i == 1);
1082 ASSERT(freecount == be32_to_cpu(agi->agi_freecount) || 1081 ASSERT(freecount == be32_to_cpu(agi->agi_freecount) ||
1083 XFS_FORCED_SHUTDOWN(mp)); 1082 XFS_FORCED_SHUTDOWN(mp));
1084 } 1083 }
1085 #endif 1084 #endif
1086 /* 1085 /*
1087 * Look for the entry describing this inode. 1086 * Look for the entry describing this inode.
1088 */ 1087 */
1089 if ((error = xfs_inobt_lookup_le(cur, agino, 0, 0, &i))) { 1088 if ((error = xfs_inobt_lookup_le(cur, agino, 0, 0, &i))) {
1090 cmn_err(CE_WARN, 1089 cmn_err(CE_WARN,
1091 "xfs_difree: xfs_inobt_lookup_le returned() an error %d on %s. Returning error.", 1090 "xfs_difree: xfs_inobt_lookup_le returned() an error %d on %s. Returning error.",
1092 error, mp->m_fsname); 1091 error, mp->m_fsname);
1093 goto error0; 1092 goto error0;
1094 } 1093 }
1095 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1094 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1096 if ((error = xfs_inobt_get_rec(cur, &rec.ir_startino, &rec.ir_freecount, 1095 if ((error = xfs_inobt_get_rec(cur, &rec.ir_startino, &rec.ir_freecount,
1097 &rec.ir_free, &i))) { 1096 &rec.ir_free, &i))) {
1098 cmn_err(CE_WARN, 1097 cmn_err(CE_WARN,
1099 "xfs_difree: xfs_inobt_get_rec() returned an error %d on %s. Returning error.", 1098 "xfs_difree: xfs_inobt_get_rec() returned an error %d on %s. Returning error.",
1100 error, mp->m_fsname); 1099 error, mp->m_fsname);
1101 goto error0; 1100 goto error0;
1102 } 1101 }
1103 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1102 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1104 /* 1103 /*
1105 * Get the offset in the inode chunk. 1104 * Get the offset in the inode chunk.
1106 */ 1105 */
1107 off = agino - rec.ir_startino; 1106 off = agino - rec.ir_startino;
1108 ASSERT(off >= 0 && off < XFS_INODES_PER_CHUNK); 1107 ASSERT(off >= 0 && off < XFS_INODES_PER_CHUNK);
1109 ASSERT(!XFS_INOBT_IS_FREE(&rec, off)); 1108 ASSERT(!XFS_INOBT_IS_FREE(&rec, off));
1110 /* 1109 /*
1111 * Mark the inode free & increment the count. 1110 * Mark the inode free & increment the count.
1112 */ 1111 */
1113 XFS_INOBT_SET_FREE(&rec, off); 1112 XFS_INOBT_SET_FREE(&rec, off);
1114 rec.ir_freecount++; 1113 rec.ir_freecount++;
1115 1114
1116 /* 1115 /*
1117 * When an inode cluster is free, it becomes eligible for removal 1116 * When an inode cluster is free, it becomes eligible for removal
1118 */ 1117 */
1119 if (!(mp->m_flags & XFS_MOUNT_IKEEP) && 1118 if (!(mp->m_flags & XFS_MOUNT_IKEEP) &&
1120 (rec.ir_freecount == XFS_IALLOC_INODES(mp))) { 1119 (rec.ir_freecount == XFS_IALLOC_INODES(mp))) {
1121 1120
1122 *delete = 1; 1121 *delete = 1;
1123 *first_ino = XFS_AGINO_TO_INO(mp, agno, rec.ir_startino); 1122 *first_ino = XFS_AGINO_TO_INO(mp, agno, rec.ir_startino);
1124 1123
1125 /* 1124 /*
1126 * Remove the inode cluster from the AGI B+Tree, adjust the 1125 * Remove the inode cluster from the AGI B+Tree, adjust the
1127 * AGI and Superblock inode counts, and mark the disk space 1126 * AGI and Superblock inode counts, and mark the disk space
1128 * to be freed when the transaction is committed. 1127 * to be freed when the transaction is committed.
1129 */ 1128 */
1130 ilen = XFS_IALLOC_INODES(mp); 1129 ilen = XFS_IALLOC_INODES(mp);
1131 be32_add_cpu(&agi->agi_count, -ilen); 1130 be32_add_cpu(&agi->agi_count, -ilen);
1132 be32_add_cpu(&agi->agi_freecount, -(ilen - 1)); 1131 be32_add_cpu(&agi->agi_freecount, -(ilen - 1));
1133 xfs_ialloc_log_agi(tp, agbp, XFS_AGI_COUNT | XFS_AGI_FREECOUNT); 1132 xfs_ialloc_log_agi(tp, agbp, XFS_AGI_COUNT | XFS_AGI_FREECOUNT);
1134 down_read(&mp->m_peraglock); 1133 down_read(&mp->m_peraglock);
1135 mp->m_perag[agno].pagi_freecount -= ilen - 1; 1134 mp->m_perag[agno].pagi_freecount -= ilen - 1;
1136 up_read(&mp->m_peraglock); 1135 up_read(&mp->m_peraglock);
1137 xfs_trans_mod_sb(tp, XFS_TRANS_SB_ICOUNT, -ilen); 1136 xfs_trans_mod_sb(tp, XFS_TRANS_SB_ICOUNT, -ilen);
1138 xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, -(ilen - 1)); 1137 xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, -(ilen - 1));
1139 1138
1140 if ((error = xfs_btree_delete(cur, &i))) { 1139 if ((error = xfs_btree_delete(cur, &i))) {
1141 cmn_err(CE_WARN, "xfs_difree: xfs_btree_delete returned an error %d on %s.\n", 1140 cmn_err(CE_WARN, "xfs_difree: xfs_btree_delete returned an error %d on %s.\n",
1142 error, mp->m_fsname); 1141 error, mp->m_fsname);
1143 goto error0; 1142 goto error0;
1144 } 1143 }
1145 1144
1146 xfs_bmap_add_free(XFS_AGB_TO_FSB(mp, 1145 xfs_bmap_add_free(XFS_AGB_TO_FSB(mp,
1147 agno, XFS_INO_TO_AGBNO(mp,rec.ir_startino)), 1146 agno, XFS_INO_TO_AGBNO(mp,rec.ir_startino)),
1148 XFS_IALLOC_BLOCKS(mp), flist, mp); 1147 XFS_IALLOC_BLOCKS(mp), flist, mp);
1149 } else { 1148 } else {
1150 *delete = 0; 1149 *delete = 0;
1151 1150
1152 if ((error = xfs_inobt_update(cur, rec.ir_startino, rec.ir_freecount, rec.ir_free))) { 1151 if ((error = xfs_inobt_update(cur, rec.ir_startino, rec.ir_freecount, rec.ir_free))) {
1153 cmn_err(CE_WARN, 1152 cmn_err(CE_WARN,
1154 "xfs_difree: xfs_inobt_update() returned an error %d on %s. Returning error.", 1153 "xfs_difree: xfs_inobt_update() returned an error %d on %s. Returning error.",
1155 error, mp->m_fsname); 1154 error, mp->m_fsname);
1156 goto error0; 1155 goto error0;
1157 } 1156 }
1158 /* 1157 /*
1159 * Change the inode free counts and log the ag/sb changes. 1158 * Change the inode free counts and log the ag/sb changes.
1160 */ 1159 */
1161 be32_add_cpu(&agi->agi_freecount, 1); 1160 be32_add_cpu(&agi->agi_freecount, 1);
1162 xfs_ialloc_log_agi(tp, agbp, XFS_AGI_FREECOUNT); 1161 xfs_ialloc_log_agi(tp, agbp, XFS_AGI_FREECOUNT);
1163 down_read(&mp->m_peraglock); 1162 down_read(&mp->m_peraglock);
1164 mp->m_perag[agno].pagi_freecount++; 1163 mp->m_perag[agno].pagi_freecount++;
1165 up_read(&mp->m_peraglock); 1164 up_read(&mp->m_peraglock);
1166 xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, 1); 1165 xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, 1);
1167 } 1166 }
1168 1167
1169 #ifdef DEBUG 1168 #ifdef DEBUG
1170 if (cur->bc_nlevels == 1) { 1169 if (cur->bc_nlevels == 1) {
1171 int freecount = 0; 1170 int freecount = 0;
1172 1171
1173 if ((error = xfs_inobt_lookup_ge(cur, 0, 0, 0, &i))) 1172 if ((error = xfs_inobt_lookup_ge(cur, 0, 0, 0, &i)))
1174 goto error0; 1173 goto error0;
1175 do { 1174 do {
1176 if ((error = xfs_inobt_get_rec(cur, 1175 if ((error = xfs_inobt_get_rec(cur,
1177 &rec.ir_startino, 1176 &rec.ir_startino,
1178 &rec.ir_freecount, 1177 &rec.ir_freecount,
1179 &rec.ir_free, &i))) 1178 &rec.ir_free, &i)))
1180 goto error0; 1179 goto error0;
1181 if (i) { 1180 if (i) {
1182 freecount += rec.ir_freecount; 1181 freecount += rec.ir_freecount;
1183 if ((error = xfs_btree_increment(cur, 0, &i))) 1182 if ((error = xfs_btree_increment(cur, 0, &i)))
1184 goto error0; 1183 goto error0;
1185 } 1184 }
1186 } while (i == 1); 1185 } while (i == 1);
1187 ASSERT(freecount == be32_to_cpu(agi->agi_freecount) || 1186 ASSERT(freecount == be32_to_cpu(agi->agi_freecount) ||
1188 XFS_FORCED_SHUTDOWN(mp)); 1187 XFS_FORCED_SHUTDOWN(mp));
1189 } 1188 }
1190 #endif 1189 #endif
1191 xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); 1190 xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
1192 return 0; 1191 return 0;
1193 1192
1194 error0: 1193 error0:
1195 xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); 1194 xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
1196 return error; 1195 return error;
1197 } 1196 }
1198 1197
1199 /* 1198 /*
1200 * Return the location of the inode in imap, for mapping it into a buffer. 1199 * Return the location of the inode in imap, for mapping it into a buffer.
1201 */ 1200 */
1202 int 1201 int
1203 xfs_imap( 1202 xfs_imap(
1204 xfs_mount_t *mp, /* file system mount structure */ 1203 xfs_mount_t *mp, /* file system mount structure */
1205 xfs_trans_t *tp, /* transaction pointer */ 1204 xfs_trans_t *tp, /* transaction pointer */
1206 xfs_ino_t ino, /* inode to locate */ 1205 xfs_ino_t ino, /* inode to locate */
1207 struct xfs_imap *imap, /* location map structure */ 1206 struct xfs_imap *imap, /* location map structure */
1208 uint flags) /* flags for inode btree lookup */ 1207 uint flags) /* flags for inode btree lookup */
1209 { 1208 {
1210 xfs_agblock_t agbno; /* block number of inode in the alloc group */ 1209 xfs_agblock_t agbno; /* block number of inode in the alloc group */
1211 xfs_agino_t agino; /* inode number within alloc group */ 1210 xfs_agino_t agino; /* inode number within alloc group */
1212 xfs_agnumber_t agno; /* allocation group number */ 1211 xfs_agnumber_t agno; /* allocation group number */
1213 int blks_per_cluster; /* num blocks per inode cluster */ 1212 int blks_per_cluster; /* num blocks per inode cluster */
1214 xfs_agblock_t chunk_agbno; /* first block in inode chunk */ 1213 xfs_agblock_t chunk_agbno; /* first block in inode chunk */
1215 xfs_agblock_t cluster_agbno; /* first block in inode cluster */ 1214 xfs_agblock_t cluster_agbno; /* first block in inode cluster */
1216 int error; /* error code */ 1215 int error; /* error code */
1217 int offset; /* index of inode in its buffer */ 1216 int offset; /* index of inode in its buffer */
1218 int offset_agbno; /* blks from chunk start to inode */ 1217 int offset_agbno; /* blks from chunk start to inode */
1219 1218
1220 ASSERT(ino != NULLFSINO); 1219 ASSERT(ino != NULLFSINO);
1221 1220
1222 /* 1221 /*
1223 * Split up the inode number into its parts. 1222 * Split up the inode number into its parts.
1224 */ 1223 */
1225 agno = XFS_INO_TO_AGNO(mp, ino); 1224 agno = XFS_INO_TO_AGNO(mp, ino);
1226 agino = XFS_INO_TO_AGINO(mp, ino); 1225 agino = XFS_INO_TO_AGINO(mp, ino);
1227 agbno = XFS_AGINO_TO_AGBNO(mp, agino); 1226 agbno = XFS_AGINO_TO_AGBNO(mp, agino);
1228 if (agno >= mp->m_sb.sb_agcount || agbno >= mp->m_sb.sb_agblocks || 1227 if (agno >= mp->m_sb.sb_agcount || agbno >= mp->m_sb.sb_agblocks ||
1229 ino != XFS_AGINO_TO_INO(mp, agno, agino)) { 1228 ino != XFS_AGINO_TO_INO(mp, agno, agino)) {
1230 #ifdef DEBUG 1229 #ifdef DEBUG
1231 /* no diagnostics for bulkstat, ino comes from userspace */ 1230 /* no diagnostics for bulkstat, ino comes from userspace */
1232 if (flags & XFS_IMAP_BULKSTAT) 1231 if (flags & XFS_IMAP_BULKSTAT)
1233 return XFS_ERROR(EINVAL); 1232 return XFS_ERROR(EINVAL);
1234 if (agno >= mp->m_sb.sb_agcount) { 1233 if (agno >= mp->m_sb.sb_agcount) {
1235 xfs_fs_cmn_err(CE_ALERT, mp, 1234 xfs_fs_cmn_err(CE_ALERT, mp,
1236 "xfs_imap: agno (%d) >= " 1235 "xfs_imap: agno (%d) >= "
1237 "mp->m_sb.sb_agcount (%d)", 1236 "mp->m_sb.sb_agcount (%d)",
1238 agno, mp->m_sb.sb_agcount); 1237 agno, mp->m_sb.sb_agcount);
1239 } 1238 }
1240 if (agbno >= mp->m_sb.sb_agblocks) { 1239 if (agbno >= mp->m_sb.sb_agblocks) {
1241 xfs_fs_cmn_err(CE_ALERT, mp, 1240 xfs_fs_cmn_err(CE_ALERT, mp,
1242 "xfs_imap: agbno (0x%llx) >= " 1241 "xfs_imap: agbno (0x%llx) >= "
1243 "mp->m_sb.sb_agblocks (0x%lx)", 1242 "mp->m_sb.sb_agblocks (0x%lx)",
1244 (unsigned long long) agbno, 1243 (unsigned long long) agbno,
1245 (unsigned long) mp->m_sb.sb_agblocks); 1244 (unsigned long) mp->m_sb.sb_agblocks);
1246 } 1245 }
1247 if (ino != XFS_AGINO_TO_INO(mp, agno, agino)) { 1246 if (ino != XFS_AGINO_TO_INO(mp, agno, agino)) {
1248 xfs_fs_cmn_err(CE_ALERT, mp, 1247 xfs_fs_cmn_err(CE_ALERT, mp,
1249 "xfs_imap: ino (0x%llx) != " 1248 "xfs_imap: ino (0x%llx) != "
1250 "XFS_AGINO_TO_INO(mp, agno, agino) " 1249 "XFS_AGINO_TO_INO(mp, agno, agino) "
1251 "(0x%llx)", 1250 "(0x%llx)",
1252 ino, XFS_AGINO_TO_INO(mp, agno, agino)); 1251 ino, XFS_AGINO_TO_INO(mp, agno, agino));
1253 } 1252 }
1254 xfs_stack_trace(); 1253 xfs_stack_trace();
1255 #endif /* DEBUG */ 1254 #endif /* DEBUG */
1256 return XFS_ERROR(EINVAL); 1255 return XFS_ERROR(EINVAL);
1257 } 1256 }
1258 1257
1259 /* 1258 /*
1260 * If the inode cluster size is the same as the blocksize or 1259 * If the inode cluster size is the same as the blocksize or
1261 * smaller we get to the buffer by simple arithmetics. 1260 * smaller we get to the buffer by simple arithmetics.
1262 */ 1261 */
1263 if (XFS_INODE_CLUSTER_SIZE(mp) <= mp->m_sb.sb_blocksize) { 1262 if (XFS_INODE_CLUSTER_SIZE(mp) <= mp->m_sb.sb_blocksize) {
1264 offset = XFS_INO_TO_OFFSET(mp, ino); 1263 offset = XFS_INO_TO_OFFSET(mp, ino);
1265 ASSERT(offset < mp->m_sb.sb_inopblock); 1264 ASSERT(offset < mp->m_sb.sb_inopblock);
1266 1265
1267 imap->im_blkno = XFS_AGB_TO_DADDR(mp, agno, agbno); 1266 imap->im_blkno = XFS_AGB_TO_DADDR(mp, agno, agbno);
1268 imap->im_len = XFS_FSB_TO_BB(mp, 1); 1267 imap->im_len = XFS_FSB_TO_BB(mp, 1);
1269 imap->im_boffset = (ushort)(offset << mp->m_sb.sb_inodelog); 1268 imap->im_boffset = (ushort)(offset << mp->m_sb.sb_inodelog);
1270 return 0; 1269 return 0;
1271 } 1270 }
1272 1271
1273 blks_per_cluster = XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_blocklog; 1272 blks_per_cluster = XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_blocklog;
1274 1273
1275 /* 1274 /*
1276 * If we get a block number passed from bulkstat we can use it to 1275 * If we get a block number passed from bulkstat we can use it to
1277 * find the buffer easily. 1276 * find the buffer easily.
1278 */ 1277 */
1279 if (imap->im_blkno) { 1278 if (imap->im_blkno) {
1280 offset = XFS_INO_TO_OFFSET(mp, ino); 1279 offset = XFS_INO_TO_OFFSET(mp, ino);
1281 ASSERT(offset < mp->m_sb.sb_inopblock); 1280 ASSERT(offset < mp->m_sb.sb_inopblock);
1282 1281
1283 cluster_agbno = XFS_DADDR_TO_AGBNO(mp, imap->im_blkno); 1282 cluster_agbno = XFS_DADDR_TO_AGBNO(mp, imap->im_blkno);
1284 offset += (agbno - cluster_agbno) * mp->m_sb.sb_inopblock; 1283 offset += (agbno - cluster_agbno) * mp->m_sb.sb_inopblock;
1285 1284
1286 imap->im_len = XFS_FSB_TO_BB(mp, blks_per_cluster); 1285 imap->im_len = XFS_FSB_TO_BB(mp, blks_per_cluster);
1287 imap->im_boffset = (ushort)(offset << mp->m_sb.sb_inodelog); 1286 imap->im_boffset = (ushort)(offset << mp->m_sb.sb_inodelog);
1288 return 0; 1287 return 0;
1289 } 1288 }
1290 1289
1291 /* 1290 /*
1292 * If the inode chunks are aligned then use simple maths to 1291 * If the inode chunks are aligned then use simple maths to
1293 * find the location. Otherwise we have to do a btree 1292 * find the location. Otherwise we have to do a btree
1294 * lookup to find the location. 1293 * lookup to find the location.
1295 */ 1294 */
1296 if (mp->m_inoalign_mask) { 1295 if (mp->m_inoalign_mask) {
1297 offset_agbno = agbno & mp->m_inoalign_mask; 1296 offset_agbno = agbno & mp->m_inoalign_mask;
1298 chunk_agbno = agbno - offset_agbno; 1297 chunk_agbno = agbno - offset_agbno;
1299 } else { 1298 } else {
1300 xfs_btree_cur_t *cur; /* inode btree cursor */ 1299 xfs_btree_cur_t *cur; /* inode btree cursor */
1301 xfs_agino_t chunk_agino; /* first agino in inode chunk */ 1300 xfs_agino_t chunk_agino; /* first agino in inode chunk */
1302 __int32_t chunk_cnt; /* count of free inodes in chunk */ 1301 __int32_t chunk_cnt; /* count of free inodes in chunk */
1303 xfs_inofree_t chunk_free; /* mask of free inodes in chunk */ 1302 xfs_inofree_t chunk_free; /* mask of free inodes in chunk */
1304 xfs_buf_t *agbp; /* agi buffer */ 1303 xfs_buf_t *agbp; /* agi buffer */
1305 int i; /* temp state */ 1304 int i; /* temp state */
1306 1305
1307 down_read(&mp->m_peraglock); 1306 down_read(&mp->m_peraglock);
1308 error = xfs_ialloc_read_agi(mp, tp, agno, &agbp); 1307 error = xfs_ialloc_read_agi(mp, tp, agno, &agbp);
1309 up_read(&mp->m_peraglock); 1308 up_read(&mp->m_peraglock);
1310 if (error) { 1309 if (error) {
1311 xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: " 1310 xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: "
1312 "xfs_ialloc_read_agi() returned " 1311 "xfs_ialloc_read_agi() returned "
1313 "error %d, agno %d", 1312 "error %d, agno %d",
1314 error, agno); 1313 error, agno);
1315 return error; 1314 return error;
1316 } 1315 }
1317 1316
1318 cur = xfs_inobt_init_cursor(mp, tp, agbp, agno); 1317 cur = xfs_inobt_init_cursor(mp, tp, agbp, agno);
1319 error = xfs_inobt_lookup_le(cur, agino, 0, 0, &i); 1318 error = xfs_inobt_lookup_le(cur, agino, 0, 0, &i);
1320 if (error) { 1319 if (error) {
1321 xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: " 1320 xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: "
1322 "xfs_inobt_lookup_le() failed"); 1321 "xfs_inobt_lookup_le() failed");
1323 goto error0; 1322 goto error0;
1324 } 1323 }
1325 1324
1326 error = xfs_inobt_get_rec(cur, &chunk_agino, &chunk_cnt, 1325 error = xfs_inobt_get_rec(cur, &chunk_agino, &chunk_cnt,
1327 &chunk_free, &i); 1326 &chunk_free, &i);
1328 if (error) { 1327 if (error) {
1329 xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: " 1328 xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: "
1330 "xfs_inobt_get_rec() failed"); 1329 "xfs_inobt_get_rec() failed");
1331 goto error0; 1330 goto error0;
1332 } 1331 }
1333 if (i == 0) { 1332 if (i == 0) {
1334 #ifdef DEBUG 1333 #ifdef DEBUG
1335 xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: " 1334 xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: "
1336 "xfs_inobt_get_rec() failed"); 1335 "xfs_inobt_get_rec() failed");
1337 #endif /* DEBUG */ 1336 #endif /* DEBUG */
1338 error = XFS_ERROR(EINVAL); 1337 error = XFS_ERROR(EINVAL);
1339 } 1338 }
1340 error0: 1339 error0:
1341 xfs_trans_brelse(tp, agbp); 1340 xfs_trans_brelse(tp, agbp);
1342 xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); 1341 xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
1343 if (error) 1342 if (error)
1344 return error; 1343 return error;
1345 chunk_agbno = XFS_AGINO_TO_AGBNO(mp, chunk_agino); 1344 chunk_agbno = XFS_AGINO_TO_AGBNO(mp, chunk_agino);
1346 offset_agbno = agbno - chunk_agbno; 1345 offset_agbno = agbno - chunk_agbno;
1347 } 1346 }
1348 1347
1349 ASSERT(agbno >= chunk_agbno); 1348 ASSERT(agbno >= chunk_agbno);
1350 cluster_agbno = chunk_agbno + 1349 cluster_agbno = chunk_agbno +
1351 ((offset_agbno / blks_per_cluster) * blks_per_cluster); 1350 ((offset_agbno / blks_per_cluster) * blks_per_cluster);
1352 offset = ((agbno - cluster_agbno) * mp->m_sb.sb_inopblock) + 1351 offset = ((agbno - cluster_agbno) * mp->m_sb.sb_inopblock) +
1353 XFS_INO_TO_OFFSET(mp, ino); 1352 XFS_INO_TO_OFFSET(mp, ino);
1354 1353
1355 imap->im_blkno = XFS_AGB_TO_DADDR(mp, agno, cluster_agbno); 1354 imap->im_blkno = XFS_AGB_TO_DADDR(mp, agno, cluster_agbno);
1356 imap->im_len = XFS_FSB_TO_BB(mp, blks_per_cluster); 1355 imap->im_len = XFS_FSB_TO_BB(mp, blks_per_cluster);
1357 imap->im_boffset = (ushort)(offset << mp->m_sb.sb_inodelog); 1356 imap->im_boffset = (ushort)(offset << mp->m_sb.sb_inodelog);
1358 1357
1359 /* 1358 /*
1360 * If the inode number maps to a block outside the bounds 1359 * If the inode number maps to a block outside the bounds
1361 * of the file system then return NULL rather than calling 1360 * of the file system then return NULL rather than calling
1362 * read_buf and panicing when we get an error from the 1361 * read_buf and panicing when we get an error from the
1363 * driver. 1362 * driver.
1364 */ 1363 */
1365 if ((imap->im_blkno + imap->im_len) > 1364 if ((imap->im_blkno + imap->im_len) >
1366 XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks)) { 1365 XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks)) {
1367 xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: " 1366 xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: "
1368 "(imap->im_blkno (0x%llx) + imap->im_len (0x%llx)) > " 1367 "(imap->im_blkno (0x%llx) + imap->im_len (0x%llx)) > "
1369 " XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks) (0x%llx)", 1368 " XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks) (0x%llx)",
1370 (unsigned long long) imap->im_blkno, 1369 (unsigned long long) imap->im_blkno,
1371 (unsigned long long) imap->im_len, 1370 (unsigned long long) imap->im_len,
1372 XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks)); 1371 XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks));
1373 return XFS_ERROR(EINVAL); 1372 return XFS_ERROR(EINVAL);
1374 } 1373 }
1375 1374
1376 return 0; 1375 return 0;
1377 } 1376 }
1378 1377
1379 /* 1378 /*
1380 * Compute and fill in value of m_in_maxlevels. 1379 * Compute and fill in value of m_in_maxlevels.
1381 */ 1380 */
1382 void 1381 void
1383 xfs_ialloc_compute_maxlevels( 1382 xfs_ialloc_compute_maxlevels(
1384 xfs_mount_t *mp) /* file system mount structure */ 1383 xfs_mount_t *mp) /* file system mount structure */
1385 { 1384 {
1386 int level; 1385 int level;
1387 uint maxblocks; 1386 uint maxblocks;
1388 uint maxleafents; 1387 uint maxleafents;
1389 int minleafrecs; 1388 int minleafrecs;
1390 int minnoderecs; 1389 int minnoderecs;
1391 1390
1392 maxleafents = (1LL << XFS_INO_AGINO_BITS(mp)) >> 1391 maxleafents = (1LL << XFS_INO_AGINO_BITS(mp)) >>
1393 XFS_INODES_PER_CHUNK_LOG; 1392 XFS_INODES_PER_CHUNK_LOG;
1394 minleafrecs = mp->m_alloc_mnr[0]; 1393 minleafrecs = mp->m_alloc_mnr[0];
1395 minnoderecs = mp->m_alloc_mnr[1]; 1394 minnoderecs = mp->m_alloc_mnr[1];
1396 maxblocks = (maxleafents + minleafrecs - 1) / minleafrecs; 1395 maxblocks = (maxleafents + minleafrecs - 1) / minleafrecs;
1397 for (level = 1; maxblocks > 1; level++) 1396 for (level = 1; maxblocks > 1; level++)
1398 maxblocks = (maxblocks + minnoderecs - 1) / minnoderecs; 1397 maxblocks = (maxblocks + minnoderecs - 1) / minnoderecs;
1399 mp->m_in_maxlevels = level; 1398 mp->m_in_maxlevels = level;
1400 } 1399 }
1401 1400
1402 /* 1401 /*
1403 * Log specified fields for the ag hdr (inode section) 1402 * Log specified fields for the ag hdr (inode section)
1404 */ 1403 */
1405 void 1404 void
1406 xfs_ialloc_log_agi( 1405 xfs_ialloc_log_agi(
1407 xfs_trans_t *tp, /* transaction pointer */ 1406 xfs_trans_t *tp, /* transaction pointer */
1408 xfs_buf_t *bp, /* allocation group header buffer */ 1407 xfs_buf_t *bp, /* allocation group header buffer */
1409 int fields) /* bitmask of fields to log */ 1408 int fields) /* bitmask of fields to log */
1410 { 1409 {
1411 int first; /* first byte number */ 1410 int first; /* first byte number */
1412 int last; /* last byte number */ 1411 int last; /* last byte number */
1413 static const short offsets[] = { /* field starting offsets */ 1412 static const short offsets[] = { /* field starting offsets */
1414 /* keep in sync with bit definitions */ 1413 /* keep in sync with bit definitions */
1415 offsetof(xfs_agi_t, agi_magicnum), 1414 offsetof(xfs_agi_t, agi_magicnum),
1416 offsetof(xfs_agi_t, agi_versionnum), 1415 offsetof(xfs_agi_t, agi_versionnum),
1417 offsetof(xfs_agi_t, agi_seqno), 1416 offsetof(xfs_agi_t, agi_seqno),
1418 offsetof(xfs_agi_t, agi_length), 1417 offsetof(xfs_agi_t, agi_length),
1419 offsetof(xfs_agi_t, agi_count), 1418 offsetof(xfs_agi_t, agi_count),
1420 offsetof(xfs_agi_t, agi_root), 1419 offsetof(xfs_agi_t, agi_root),
1421 offsetof(xfs_agi_t, agi_level), 1420 offsetof(xfs_agi_t, agi_level),
1422 offsetof(xfs_agi_t, agi_freecount), 1421 offsetof(xfs_agi_t, agi_freecount),
1423 offsetof(xfs_agi_t, agi_newino), 1422 offsetof(xfs_agi_t, agi_newino),
1424 offsetof(xfs_agi_t, agi_dirino), 1423 offsetof(xfs_agi_t, agi_dirino),
1425 offsetof(xfs_agi_t, agi_unlinked), 1424 offsetof(xfs_agi_t, agi_unlinked),
1426 sizeof(xfs_agi_t) 1425 sizeof(xfs_agi_t)
1427 }; 1426 };
1428 #ifdef DEBUG 1427 #ifdef DEBUG
1429 xfs_agi_t *agi; /* allocation group header */ 1428 xfs_agi_t *agi; /* allocation group header */
1430 1429
1431 agi = XFS_BUF_TO_AGI(bp); 1430 agi = XFS_BUF_TO_AGI(bp);
1432 ASSERT(be32_to_cpu(agi->agi_magicnum) == XFS_AGI_MAGIC); 1431 ASSERT(be32_to_cpu(agi->agi_magicnum) == XFS_AGI_MAGIC);
1433 #endif 1432 #endif
1434 /* 1433 /*
1435 * Compute byte offsets for the first and last fields. 1434 * Compute byte offsets for the first and last fields.
1436 */ 1435 */
1437 xfs_btree_offsets(fields, offsets, XFS_AGI_NUM_BITS, &first, &last); 1436 xfs_btree_offsets(fields, offsets, XFS_AGI_NUM_BITS, &first, &last);
1438 /* 1437 /*
1439 * Log the allocation group inode header buffer. 1438 * Log the allocation group inode header buffer.
1440 */ 1439 */
1441 xfs_trans_log_buf(tp, bp, first, last); 1440 xfs_trans_log_buf(tp, bp, first, last);
1442 } 1441 }
1443 1442
1444 #ifdef DEBUG 1443 #ifdef DEBUG
1445 STATIC void 1444 STATIC void
1446 xfs_check_agi_unlinked( 1445 xfs_check_agi_unlinked(
1447 struct xfs_agi *agi) 1446 struct xfs_agi *agi)
1448 { 1447 {
1449 int i; 1448 int i;
1450 1449
1451 for (i = 0; i < XFS_AGI_UNLINKED_BUCKETS; i++) 1450 for (i = 0; i < XFS_AGI_UNLINKED_BUCKETS; i++)
1452 ASSERT(agi->agi_unlinked[i]); 1451 ASSERT(agi->agi_unlinked[i]);
1453 } 1452 }
1454 #else 1453 #else
1455 #define xfs_check_agi_unlinked(agi) 1454 #define xfs_check_agi_unlinked(agi)
1456 #endif 1455 #endif
1457 1456
1458 /* 1457 /*
1459 * Read in the allocation group header (inode allocation section) 1458 * Read in the allocation group header (inode allocation section)
1460 */ 1459 */
1461 int 1460 int
1462 xfs_read_agi( 1461 xfs_read_agi(
1463 struct xfs_mount *mp, /* file system mount structure */ 1462 struct xfs_mount *mp, /* file system mount structure */
1464 struct xfs_trans *tp, /* transaction pointer */ 1463 struct xfs_trans *tp, /* transaction pointer */
1465 xfs_agnumber_t agno, /* allocation group number */ 1464 xfs_agnumber_t agno, /* allocation group number */
1466 struct xfs_buf **bpp) /* allocation group hdr buf */ 1465 struct xfs_buf **bpp) /* allocation group hdr buf */
1467 { 1466 {
1468 struct xfs_agi *agi; /* allocation group header */ 1467 struct xfs_agi *agi; /* allocation group header */
1469 int agi_ok; /* agi is consistent */ 1468 int agi_ok; /* agi is consistent */
1470 int error; 1469 int error;
1471 1470
1472 ASSERT(agno != NULLAGNUMBER); 1471 ASSERT(agno != NULLAGNUMBER);
1473 1472
1474 error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, 1473 error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp,
1475 XFS_AG_DADDR(mp, agno, XFS_AGI_DADDR(mp)), 1474 XFS_AG_DADDR(mp, agno, XFS_AGI_DADDR(mp)),
1476 XFS_FSS_TO_BB(mp, 1), 0, bpp); 1475 XFS_FSS_TO_BB(mp, 1), 0, bpp);
1477 if (error) 1476 if (error)
1478 return error; 1477 return error;
1479 1478
1480 ASSERT(*bpp && !XFS_BUF_GETERROR(*bpp)); 1479 ASSERT(*bpp && !XFS_BUF_GETERROR(*bpp));
1481 agi = XFS_BUF_TO_AGI(*bpp); 1480 agi = XFS_BUF_TO_AGI(*bpp);
1482 1481
1483 /* 1482 /*
1484 * Validate the magic number of the agi block. 1483 * Validate the magic number of the agi block.
1485 */ 1484 */
1486 agi_ok = be32_to_cpu(agi->agi_magicnum) == XFS_AGI_MAGIC && 1485 agi_ok = be32_to_cpu(agi->agi_magicnum) == XFS_AGI_MAGIC &&
1487 XFS_AGI_GOOD_VERSION(be32_to_cpu(agi->agi_versionnum)) && 1486 XFS_AGI_GOOD_VERSION(be32_to_cpu(agi->agi_versionnum)) &&
1488 be32_to_cpu(agi->agi_seqno) == agno; 1487 be32_to_cpu(agi->agi_seqno) == agno;
1489 if (unlikely(XFS_TEST_ERROR(!agi_ok, mp, XFS_ERRTAG_IALLOC_READ_AGI, 1488 if (unlikely(XFS_TEST_ERROR(!agi_ok, mp, XFS_ERRTAG_IALLOC_READ_AGI,
1490 XFS_RANDOM_IALLOC_READ_AGI))) { 1489 XFS_RANDOM_IALLOC_READ_AGI))) {
1491 XFS_CORRUPTION_ERROR("xfs_read_agi", XFS_ERRLEVEL_LOW, 1490 XFS_CORRUPTION_ERROR("xfs_read_agi", XFS_ERRLEVEL_LOW,
1492 mp, agi); 1491 mp, agi);
1493 xfs_trans_brelse(tp, *bpp); 1492 xfs_trans_brelse(tp, *bpp);
1494 return XFS_ERROR(EFSCORRUPTED); 1493 return XFS_ERROR(EFSCORRUPTED);
1495 } 1494 }
1496 1495
1497 XFS_BUF_SET_VTYPE_REF(*bpp, B_FS_AGI, XFS_AGI_REF); 1496 XFS_BUF_SET_VTYPE_REF(*bpp, B_FS_AGI, XFS_AGI_REF);
1498 1497
1499 xfs_check_agi_unlinked(agi); 1498 xfs_check_agi_unlinked(agi);
1500 return 0; 1499 return 0;
1501 } 1500 }
1502 1501
1503 int 1502 int
1504 xfs_ialloc_read_agi( 1503 xfs_ialloc_read_agi(
1505 struct xfs_mount *mp, /* file system mount structure */ 1504 struct xfs_mount *mp, /* file system mount structure */
1506 struct xfs_trans *tp, /* transaction pointer */ 1505 struct xfs_trans *tp, /* transaction pointer */
1507 xfs_agnumber_t agno, /* allocation group number */ 1506 xfs_agnumber_t agno, /* allocation group number */
1508 struct xfs_buf **bpp) /* allocation group hdr buf */ 1507 struct xfs_buf **bpp) /* allocation group hdr buf */
1509 { 1508 {
1510 struct xfs_agi *agi; /* allocation group header */ 1509 struct xfs_agi *agi; /* allocation group header */
1511 struct xfs_perag *pag; /* per allocation group data */ 1510 struct xfs_perag *pag; /* per allocation group data */
1512 int error; 1511 int error;
1513 1512
1514 error = xfs_read_agi(mp, tp, agno, bpp); 1513 error = xfs_read_agi(mp, tp, agno, bpp);
1515 if (error) 1514 if (error)
1516 return error; 1515 return error;
1517 1516
1518 agi = XFS_BUF_TO_AGI(*bpp); 1517 agi = XFS_BUF_TO_AGI(*bpp);
1519 pag = &mp->m_perag[agno]; 1518 pag = &mp->m_perag[agno];
1520 1519
1521 if (!pag->pagi_init) { 1520 if (!pag->pagi_init) {
1522 pag->pagi_freecount = be32_to_cpu(agi->agi_freecount); 1521 pag->pagi_freecount = be32_to_cpu(agi->agi_freecount);
1523 pag->pagi_count = be32_to_cpu(agi->agi_count); 1522 pag->pagi_count = be32_to_cpu(agi->agi_count);
1524 pag->pagi_init = 1; 1523 pag->pagi_init = 1;
1525 } 1524 }
1526 1525
1527 /* 1526 /*
1528 * It's possible for these to be out of sync if 1527 * It's possible for these to be out of sync if
1529 * we are in the middle of a forced shutdown. 1528 * we are in the middle of a forced shutdown.
1530 */ 1529 */
1531 ASSERT(pag->pagi_freecount == be32_to_cpu(agi->agi_freecount) || 1530 ASSERT(pag->pagi_freecount == be32_to_cpu(agi->agi_freecount) ||
1532 XFS_FORCED_SHUTDOWN(mp)); 1531 XFS_FORCED_SHUTDOWN(mp));
1533 return 0; 1532 return 0;
1534 } 1533 }
1535 1534
1536 /* 1535 /*
1537 * Read in the agi to initialise the per-ag data in the mount structure 1536 * Read in the agi to initialise the per-ag data in the mount structure
1538 */ 1537 */
1539 int 1538 int
1540 xfs_ialloc_pagi_init( 1539 xfs_ialloc_pagi_init(
1541 xfs_mount_t *mp, /* file system mount structure */ 1540 xfs_mount_t *mp, /* file system mount structure */
1542 xfs_trans_t *tp, /* transaction pointer */ 1541 xfs_trans_t *tp, /* transaction pointer */
1543 xfs_agnumber_t agno) /* allocation group number */ 1542 xfs_agnumber_t agno) /* allocation group number */
1544 { 1543 {
1545 xfs_buf_t *bp = NULL; 1544 xfs_buf_t *bp = NULL;
1546 int error; 1545 int error;
1547 1546
1548 error = xfs_ialloc_read_agi(mp, tp, agno, &bp); 1547 error = xfs_ialloc_read_agi(mp, tp, agno, &bp);
1549 if (error) 1548 if (error)
1550 return error; 1549 return error;
1551 if (bp) 1550 if (bp)
1552 xfs_trans_brelse(tp, bp); 1551 xfs_trans_brelse(tp, bp);
1553 return 0; 1552 return 0;
1554 } 1553 }
1555 1554
fs/xfs/xfs_imap.h
1 /* File was deleted
2 * Copyright (c) 2000,2005 Silicon Graphics, Inc.
3 * All Rights Reserved.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18 #ifndef __XFS_IMAP_H__
19 #define __XFS_IMAP_H__
20
21 /*
22 * This is the structure passed to xfs_imap() to map
23 * an inode number to its on disk location.
24 */
25 typedef struct xfs_imap {
26 xfs_daddr_t im_blkno; /* starting BB of inode chunk */
27 uint im_len; /* length in BBs of inode chunk */
28 ushort im_boffset; /* inode offset in block in bytes */
29 } xfs_imap_t;
30
31 #endif /* __XFS_IMAP_H__ */
32 1 /*
1 /* 1 /*
2 * Copyright (c) 2000-2006 Silicon Graphics, Inc. 2 * Copyright (c) 2000-2006 Silicon Graphics, Inc.
3 * All Rights Reserved. 3 * All Rights Reserved.
4 * 4 *
5 * This program is free software; you can redistribute it and/or 5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as 6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation. 7 * published by the Free Software Foundation.
8 * 8 *
9 * This program is distributed in the hope that it would be useful, 9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details. 12 * GNU General Public License for more details.
13 * 13 *
14 * You should have received a copy of the GNU General Public License 14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation, 15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */ 17 */
18 #include <linux/log2.h> 18 #include <linux/log2.h>
19 19
20 #include "xfs.h" 20 #include "xfs.h"
21 #include "xfs_fs.h" 21 #include "xfs_fs.h"
22 #include "xfs_types.h" 22 #include "xfs_types.h"
23 #include "xfs_bit.h" 23 #include "xfs_bit.h"
24 #include "xfs_log.h" 24 #include "xfs_log.h"
25 #include "xfs_inum.h" 25 #include "xfs_inum.h"
26 #include "xfs_imap.h"
27 #include "xfs_trans.h" 26 #include "xfs_trans.h"
28 #include "xfs_trans_priv.h" 27 #include "xfs_trans_priv.h"
29 #include "xfs_sb.h" 28 #include "xfs_sb.h"
30 #include "xfs_ag.h" 29 #include "xfs_ag.h"
31 #include "xfs_dir2.h" 30 #include "xfs_dir2.h"
32 #include "xfs_dmapi.h" 31 #include "xfs_dmapi.h"
33 #include "xfs_mount.h" 32 #include "xfs_mount.h"
34 #include "xfs_bmap_btree.h" 33 #include "xfs_bmap_btree.h"
35 #include "xfs_alloc_btree.h" 34 #include "xfs_alloc_btree.h"
36 #include "xfs_ialloc_btree.h" 35 #include "xfs_ialloc_btree.h"
37 #include "xfs_dir2_sf.h" 36 #include "xfs_dir2_sf.h"
38 #include "xfs_attr_sf.h" 37 #include "xfs_attr_sf.h"
39 #include "xfs_dinode.h" 38 #include "xfs_dinode.h"
40 #include "xfs_inode.h" 39 #include "xfs_inode.h"
41 #include "xfs_buf_item.h" 40 #include "xfs_buf_item.h"
42 #include "xfs_inode_item.h" 41 #include "xfs_inode_item.h"
43 #include "xfs_btree.h" 42 #include "xfs_btree.h"
44 #include "xfs_btree_trace.h" 43 #include "xfs_btree_trace.h"
45 #include "xfs_alloc.h" 44 #include "xfs_alloc.h"
46 #include "xfs_ialloc.h" 45 #include "xfs_ialloc.h"
47 #include "xfs_bmap.h" 46 #include "xfs_bmap.h"
48 #include "xfs_rw.h" 47 #include "xfs_rw.h"
49 #include "xfs_error.h" 48 #include "xfs_error.h"
50 #include "xfs_utils.h" 49 #include "xfs_utils.h"
51 #include "xfs_dir2_trace.h" 50 #include "xfs_dir2_trace.h"
52 #include "xfs_quota.h" 51 #include "xfs_quota.h"
53 #include "xfs_acl.h" 52 #include "xfs_acl.h"
54 #include "xfs_filestream.h" 53 #include "xfs_filestream.h"
55 #include "xfs_vnodeops.h" 54 #include "xfs_vnodeops.h"
56 55
57 kmem_zone_t *xfs_ifork_zone; 56 kmem_zone_t *xfs_ifork_zone;
58 kmem_zone_t *xfs_inode_zone; 57 kmem_zone_t *xfs_inode_zone;
59 58
60 /* 59 /*
61 * Used in xfs_itruncate(). This is the maximum number of extents 60 * Used in xfs_itruncate(). This is the maximum number of extents
62 * freed from a file in a single transaction. 61 * freed from a file in a single transaction.
63 */ 62 */
64 #define XFS_ITRUNC_MAX_EXTENTS 2 63 #define XFS_ITRUNC_MAX_EXTENTS 2
65 64
66 STATIC int xfs_iflush_int(xfs_inode_t *, xfs_buf_t *); 65 STATIC int xfs_iflush_int(xfs_inode_t *, xfs_buf_t *);
67 STATIC int xfs_iformat_local(xfs_inode_t *, xfs_dinode_t *, int, int); 66 STATIC int xfs_iformat_local(xfs_inode_t *, xfs_dinode_t *, int, int);
68 STATIC int xfs_iformat_extents(xfs_inode_t *, xfs_dinode_t *, int); 67 STATIC int xfs_iformat_extents(xfs_inode_t *, xfs_dinode_t *, int);
69 STATIC int xfs_iformat_btree(xfs_inode_t *, xfs_dinode_t *, int); 68 STATIC int xfs_iformat_btree(xfs_inode_t *, xfs_dinode_t *, int);
70 69
71 #ifdef DEBUG 70 #ifdef DEBUG
72 /* 71 /*
73 * Make sure that the extents in the given memory buffer 72 * Make sure that the extents in the given memory buffer
74 * are valid. 73 * are valid.
75 */ 74 */
76 STATIC void 75 STATIC void
77 xfs_validate_extents( 76 xfs_validate_extents(
78 xfs_ifork_t *ifp, 77 xfs_ifork_t *ifp,
79 int nrecs, 78 int nrecs,
80 xfs_exntfmt_t fmt) 79 xfs_exntfmt_t fmt)
81 { 80 {
82 xfs_bmbt_irec_t irec; 81 xfs_bmbt_irec_t irec;
83 xfs_bmbt_rec_host_t rec; 82 xfs_bmbt_rec_host_t rec;
84 int i; 83 int i;
85 84
86 for (i = 0; i < nrecs; i++) { 85 for (i = 0; i < nrecs; i++) {
87 xfs_bmbt_rec_host_t *ep = xfs_iext_get_ext(ifp, i); 86 xfs_bmbt_rec_host_t *ep = xfs_iext_get_ext(ifp, i);
88 rec.l0 = get_unaligned(&ep->l0); 87 rec.l0 = get_unaligned(&ep->l0);
89 rec.l1 = get_unaligned(&ep->l1); 88 rec.l1 = get_unaligned(&ep->l1);
90 xfs_bmbt_get_all(&rec, &irec); 89 xfs_bmbt_get_all(&rec, &irec);
91 if (fmt == XFS_EXTFMT_NOSTATE) 90 if (fmt == XFS_EXTFMT_NOSTATE)
92 ASSERT(irec.br_state == XFS_EXT_NORM); 91 ASSERT(irec.br_state == XFS_EXT_NORM);
93 } 92 }
94 } 93 }
95 #else /* DEBUG */ 94 #else /* DEBUG */
96 #define xfs_validate_extents(ifp, nrecs, fmt) 95 #define xfs_validate_extents(ifp, nrecs, fmt)
97 #endif /* DEBUG */ 96 #endif /* DEBUG */
98 97
99 /* 98 /*
100 * Check that none of the inode's in the buffer have a next 99 * Check that none of the inode's in the buffer have a next
101 * unlinked field of 0. 100 * unlinked field of 0.
102 */ 101 */
103 #if defined(DEBUG) 102 #if defined(DEBUG)
104 void 103 void
105 xfs_inobp_check( 104 xfs_inobp_check(
106 xfs_mount_t *mp, 105 xfs_mount_t *mp,
107 xfs_buf_t *bp) 106 xfs_buf_t *bp)
108 { 107 {
109 int i; 108 int i;
110 int j; 109 int j;
111 xfs_dinode_t *dip; 110 xfs_dinode_t *dip;
112 111
113 j = mp->m_inode_cluster_size >> mp->m_sb.sb_inodelog; 112 j = mp->m_inode_cluster_size >> mp->m_sb.sb_inodelog;
114 113
115 for (i = 0; i < j; i++) { 114 for (i = 0; i < j; i++) {
116 dip = (xfs_dinode_t *)xfs_buf_offset(bp, 115 dip = (xfs_dinode_t *)xfs_buf_offset(bp,
117 i * mp->m_sb.sb_inodesize); 116 i * mp->m_sb.sb_inodesize);
118 if (!dip->di_next_unlinked) { 117 if (!dip->di_next_unlinked) {
119 xfs_fs_cmn_err(CE_ALERT, mp, 118 xfs_fs_cmn_err(CE_ALERT, mp,
120 "Detected a bogus zero next_unlinked field in incore inode buffer 0x%p. About to pop an ASSERT.", 119 "Detected a bogus zero next_unlinked field in incore inode buffer 0x%p. About to pop an ASSERT.",
121 bp); 120 bp);
122 ASSERT(dip->di_next_unlinked); 121 ASSERT(dip->di_next_unlinked);
123 } 122 }
124 } 123 }
125 } 124 }
126 #endif 125 #endif
127 126
128 /* 127 /*
129 * Find the buffer associated with the given inode map 128 * Find the buffer associated with the given inode map
130 * We do basic validation checks on the buffer once it has been 129 * We do basic validation checks on the buffer once it has been
131 * retrieved from disk. 130 * retrieved from disk.
132 */ 131 */
133 STATIC int 132 STATIC int
134 xfs_imap_to_bp( 133 xfs_imap_to_bp(
135 xfs_mount_t *mp, 134 xfs_mount_t *mp,
136 xfs_trans_t *tp, 135 xfs_trans_t *tp,
137 xfs_imap_t *imap, 136 struct xfs_imap *imap,
138 xfs_buf_t **bpp, 137 xfs_buf_t **bpp,
139 uint buf_flags, 138 uint buf_flags,
140 uint imap_flags) 139 uint imap_flags)
141 { 140 {
142 int error; 141 int error;
143 int i; 142 int i;
144 int ni; 143 int ni;
145 xfs_buf_t *bp; 144 xfs_buf_t *bp;
146 145
147 error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, imap->im_blkno, 146 error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, imap->im_blkno,
148 (int)imap->im_len, buf_flags, &bp); 147 (int)imap->im_len, buf_flags, &bp);
149 if (error) { 148 if (error) {
150 if (error != EAGAIN) { 149 if (error != EAGAIN) {
151 cmn_err(CE_WARN, 150 cmn_err(CE_WARN,
152 "xfs_imap_to_bp: xfs_trans_read_buf()returned " 151 "xfs_imap_to_bp: xfs_trans_read_buf()returned "
153 "an error %d on %s. Returning error.", 152 "an error %d on %s. Returning error.",
154 error, mp->m_fsname); 153 error, mp->m_fsname);
155 } else { 154 } else {
156 ASSERT(buf_flags & XFS_BUF_TRYLOCK); 155 ASSERT(buf_flags & XFS_BUF_TRYLOCK);
157 } 156 }
158 return error; 157 return error;
159 } 158 }
160 159
161 /* 160 /*
162 * Validate the magic number and version of every inode in the buffer 161 * Validate the magic number and version of every inode in the buffer
163 * (if DEBUG kernel) or the first inode in the buffer, otherwise. 162 * (if DEBUG kernel) or the first inode in the buffer, otherwise.
164 */ 163 */
165 #ifdef DEBUG 164 #ifdef DEBUG
166 ni = BBTOB(imap->im_len) >> mp->m_sb.sb_inodelog; 165 ni = BBTOB(imap->im_len) >> mp->m_sb.sb_inodelog;
167 #else /* usual case */ 166 #else /* usual case */
168 ni = 1; 167 ni = 1;
169 #endif 168 #endif
170 169
171 for (i = 0; i < ni; i++) { 170 for (i = 0; i < ni; i++) {
172 int di_ok; 171 int di_ok;
173 xfs_dinode_t *dip; 172 xfs_dinode_t *dip;
174 173
175 dip = (xfs_dinode_t *)xfs_buf_offset(bp, 174 dip = (xfs_dinode_t *)xfs_buf_offset(bp,
176 (i << mp->m_sb.sb_inodelog)); 175 (i << mp->m_sb.sb_inodelog));
177 di_ok = be16_to_cpu(dip->di_magic) == XFS_DINODE_MAGIC && 176 di_ok = be16_to_cpu(dip->di_magic) == XFS_DINODE_MAGIC &&
178 XFS_DINODE_GOOD_VERSION(dip->di_version); 177 XFS_DINODE_GOOD_VERSION(dip->di_version);
179 if (unlikely(XFS_TEST_ERROR(!di_ok, mp, 178 if (unlikely(XFS_TEST_ERROR(!di_ok, mp,
180 XFS_ERRTAG_ITOBP_INOTOBP, 179 XFS_ERRTAG_ITOBP_INOTOBP,
181 XFS_RANDOM_ITOBP_INOTOBP))) { 180 XFS_RANDOM_ITOBP_INOTOBP))) {
182 if (imap_flags & XFS_IMAP_BULKSTAT) { 181 if (imap_flags & XFS_IMAP_BULKSTAT) {
183 xfs_trans_brelse(tp, bp); 182 xfs_trans_brelse(tp, bp);
184 return XFS_ERROR(EINVAL); 183 return XFS_ERROR(EINVAL);
185 } 184 }
186 XFS_CORRUPTION_ERROR("xfs_imap_to_bp", 185 XFS_CORRUPTION_ERROR("xfs_imap_to_bp",
187 XFS_ERRLEVEL_HIGH, mp, dip); 186 XFS_ERRLEVEL_HIGH, mp, dip);
188 #ifdef DEBUG 187 #ifdef DEBUG
189 cmn_err(CE_PANIC, 188 cmn_err(CE_PANIC,
190 "Device %s - bad inode magic/vsn " 189 "Device %s - bad inode magic/vsn "
191 "daddr %lld #%d (magic=%x)", 190 "daddr %lld #%d (magic=%x)",
192 XFS_BUFTARG_NAME(mp->m_ddev_targp), 191 XFS_BUFTARG_NAME(mp->m_ddev_targp),
193 (unsigned long long)imap->im_blkno, i, 192 (unsigned long long)imap->im_blkno, i,
194 be16_to_cpu(dip->di_magic)); 193 be16_to_cpu(dip->di_magic));
195 #endif 194 #endif
196 xfs_trans_brelse(tp, bp); 195 xfs_trans_brelse(tp, bp);
197 return XFS_ERROR(EFSCORRUPTED); 196 return XFS_ERROR(EFSCORRUPTED);
198 } 197 }
199 } 198 }
200 199
201 xfs_inobp_check(mp, bp); 200 xfs_inobp_check(mp, bp);
202 201
203 /* 202 /*
204 * Mark the buffer as an inode buffer now that it looks good 203 * Mark the buffer as an inode buffer now that it looks good
205 */ 204 */
206 XFS_BUF_SET_VTYPE(bp, B_FS_INO); 205 XFS_BUF_SET_VTYPE(bp, B_FS_INO);
207 206
208 *bpp = bp; 207 *bpp = bp;
209 return 0; 208 return 0;
210 } 209 }
211 210
212 /* 211 /*
213 * This routine is called to map an inode number within a file 212 * This routine is called to map an inode number within a file
214 * system to the buffer containing the on-disk version of the 213 * system to the buffer containing the on-disk version of the
215 * inode. It returns a pointer to the buffer containing the 214 * inode. It returns a pointer to the buffer containing the
216 * on-disk inode in the bpp parameter, and in the dip parameter 215 * on-disk inode in the bpp parameter, and in the dip parameter
217 * it returns a pointer to the on-disk inode within that buffer. 216 * it returns a pointer to the on-disk inode within that buffer.
218 * 217 *
219 * If a non-zero error is returned, then the contents of bpp and 218 * If a non-zero error is returned, then the contents of bpp and
220 * dipp are undefined. 219 * dipp are undefined.
221 * 220 *
222 * Use xfs_imap() to determine the size and location of the 221 * Use xfs_imap() to determine the size and location of the
223 * buffer to read from disk. 222 * buffer to read from disk.
224 */ 223 */
225 int 224 int
226 xfs_inotobp( 225 xfs_inotobp(
227 xfs_mount_t *mp, 226 xfs_mount_t *mp,
228 xfs_trans_t *tp, 227 xfs_trans_t *tp,
229 xfs_ino_t ino, 228 xfs_ino_t ino,
230 xfs_dinode_t **dipp, 229 xfs_dinode_t **dipp,
231 xfs_buf_t **bpp, 230 xfs_buf_t **bpp,
232 int *offset, 231 int *offset,
233 uint imap_flags) 232 uint imap_flags)
234 { 233 {
235 xfs_imap_t imap; 234 struct xfs_imap imap;
236 xfs_buf_t *bp; 235 xfs_buf_t *bp;
237 int error; 236 int error;
238 237
239 imap.im_blkno = 0; 238 imap.im_blkno = 0;
240 error = xfs_imap(mp, tp, ino, &imap, imap_flags); 239 error = xfs_imap(mp, tp, ino, &imap, imap_flags);
241 if (error) 240 if (error)
242 return error; 241 return error;
243 242
244 error = xfs_imap_to_bp(mp, tp, &imap, &bp, XFS_BUF_LOCK, imap_flags); 243 error = xfs_imap_to_bp(mp, tp, &imap, &bp, XFS_BUF_LOCK, imap_flags);
245 if (error) 244 if (error)
246 return error; 245 return error;
247 246
248 *dipp = (xfs_dinode_t *)xfs_buf_offset(bp, imap.im_boffset); 247 *dipp = (xfs_dinode_t *)xfs_buf_offset(bp, imap.im_boffset);
249 *bpp = bp; 248 *bpp = bp;
250 *offset = imap.im_boffset; 249 *offset = imap.im_boffset;
251 return 0; 250 return 0;
252 } 251 }
253 252
254 253
255 /* 254 /*
256 * This routine is called to map an inode to the buffer containing 255 * This routine is called to map an inode to the buffer containing
257 * the on-disk version of the inode. It returns a pointer to the 256 * the on-disk version of the inode. It returns a pointer to the
258 * buffer containing the on-disk inode in the bpp parameter, and in 257 * buffer containing the on-disk inode in the bpp parameter, and in
259 * the dip parameter it returns a pointer to the on-disk inode within 258 * the dip parameter it returns a pointer to the on-disk inode within
260 * that buffer. 259 * that buffer.
261 * 260 *
262 * If a non-zero error is returned, then the contents of bpp and 261 * If a non-zero error is returned, then the contents of bpp and
263 * dipp are undefined. 262 * dipp are undefined.
264 * 263 *
265 * The inode is expected to already been mapped to its buffer and read 264 * The inode is expected to already been mapped to its buffer and read
266 * in once, thus we can use the mapping information stored in the inode 265 * in once, thus we can use the mapping information stored in the inode
267 * rather than calling xfs_imap(). This allows us to avoid the overhead 266 * rather than calling xfs_imap(). This allows us to avoid the overhead
268 * of looking at the inode btree for small block file systems 267 * of looking at the inode btree for small block file systems
269 * (see xfs_imap()). 268 * (see xfs_imap()).
270 */ 269 */
271 int 270 int
272 xfs_itobp( 271 xfs_itobp(
273 xfs_mount_t *mp, 272 xfs_mount_t *mp,
274 xfs_trans_t *tp, 273 xfs_trans_t *tp,
275 xfs_inode_t *ip, 274 xfs_inode_t *ip,
276 xfs_dinode_t **dipp, 275 xfs_dinode_t **dipp,
277 xfs_buf_t **bpp, 276 xfs_buf_t **bpp,
278 uint buf_flags) 277 uint buf_flags)
279 { 278 {
280 xfs_imap_t imap;
281 xfs_buf_t *bp; 279 xfs_buf_t *bp;
282 int error; 280 int error;
283 281
284 ASSERT(ip->i_blkno != 0); 282 ASSERT(ip->i_imap.im_blkno != 0);
285 283
286 imap.im_blkno = ip->i_blkno; 284 error = xfs_imap_to_bp(mp, tp, &ip->i_imap, &bp, buf_flags, 0);
287 imap.im_len = ip->i_len;
288 imap.im_boffset = ip->i_boffset;
289
290 error = xfs_imap_to_bp(mp, tp, &imap, &bp, buf_flags, 0);
291 if (error) 285 if (error)
292 return error; 286 return error;
293 287
294 if (!bp) { 288 if (!bp) {
295 ASSERT(buf_flags & XFS_BUF_TRYLOCK); 289 ASSERT(buf_flags & XFS_BUF_TRYLOCK);
296 ASSERT(tp == NULL); 290 ASSERT(tp == NULL);
297 *bpp = NULL; 291 *bpp = NULL;
298 return EAGAIN; 292 return EAGAIN;
299 } 293 }
300 294
301 *dipp = (xfs_dinode_t *)xfs_buf_offset(bp, imap.im_boffset); 295 *dipp = (xfs_dinode_t *)xfs_buf_offset(bp, ip->i_imap.im_boffset);
302 *bpp = bp; 296 *bpp = bp;
303 return 0; 297 return 0;
304 } 298 }
305 299
306 /* 300 /*
307 * Move inode type and inode format specific information from the 301 * Move inode type and inode format specific information from the
308 * on-disk inode to the in-core inode. For fifos, devs, and sockets 302 * on-disk inode to the in-core inode. For fifos, devs, and sockets
309 * this means set if_rdev to the proper value. For files, directories, 303 * this means set if_rdev to the proper value. For files, directories,
310 * and symlinks this means to bring in the in-line data or extent 304 * and symlinks this means to bring in the in-line data or extent
311 * pointers. For a file in B-tree format, only the root is immediately 305 * pointers. For a file in B-tree format, only the root is immediately
312 * brought in-core. The rest will be in-lined in if_extents when it 306 * brought in-core. The rest will be in-lined in if_extents when it
313 * is first referenced (see xfs_iread_extents()). 307 * is first referenced (see xfs_iread_extents()).
314 */ 308 */
315 STATIC int 309 STATIC int
316 xfs_iformat( 310 xfs_iformat(
317 xfs_inode_t *ip, 311 xfs_inode_t *ip,
318 xfs_dinode_t *dip) 312 xfs_dinode_t *dip)
319 { 313 {
320 xfs_attr_shortform_t *atp; 314 xfs_attr_shortform_t *atp;
321 int size; 315 int size;
322 int error; 316 int error;
323 xfs_fsize_t di_size; 317 xfs_fsize_t di_size;
324 ip->i_df.if_ext_max = 318 ip->i_df.if_ext_max =
325 XFS_IFORK_DSIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t); 319 XFS_IFORK_DSIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t);
326 error = 0; 320 error = 0;
327 321
328 if (unlikely(be32_to_cpu(dip->di_nextents) + 322 if (unlikely(be32_to_cpu(dip->di_nextents) +
329 be16_to_cpu(dip->di_anextents) > 323 be16_to_cpu(dip->di_anextents) >
330 be64_to_cpu(dip->di_nblocks))) { 324 be64_to_cpu(dip->di_nblocks))) {
331 xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount, 325 xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount,
332 "corrupt dinode %Lu, extent total = %d, nblocks = %Lu.", 326 "corrupt dinode %Lu, extent total = %d, nblocks = %Lu.",
333 (unsigned long long)ip->i_ino, 327 (unsigned long long)ip->i_ino,
334 (int)(be32_to_cpu(dip->di_nextents) + 328 (int)(be32_to_cpu(dip->di_nextents) +
335 be16_to_cpu(dip->di_anextents)), 329 be16_to_cpu(dip->di_anextents)),
336 (unsigned long long) 330 (unsigned long long)
337 be64_to_cpu(dip->di_nblocks)); 331 be64_to_cpu(dip->di_nblocks));
338 XFS_CORRUPTION_ERROR("xfs_iformat(1)", XFS_ERRLEVEL_LOW, 332 XFS_CORRUPTION_ERROR("xfs_iformat(1)", XFS_ERRLEVEL_LOW,
339 ip->i_mount, dip); 333 ip->i_mount, dip);
340 return XFS_ERROR(EFSCORRUPTED); 334 return XFS_ERROR(EFSCORRUPTED);
341 } 335 }
342 336
343 if (unlikely(dip->di_forkoff > ip->i_mount->m_sb.sb_inodesize)) { 337 if (unlikely(dip->di_forkoff > ip->i_mount->m_sb.sb_inodesize)) {
344 xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount, 338 xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount,
345 "corrupt dinode %Lu, forkoff = 0x%x.", 339 "corrupt dinode %Lu, forkoff = 0x%x.",
346 (unsigned long long)ip->i_ino, 340 (unsigned long long)ip->i_ino,
347 dip->di_forkoff); 341 dip->di_forkoff);
348 XFS_CORRUPTION_ERROR("xfs_iformat(2)", XFS_ERRLEVEL_LOW, 342 XFS_CORRUPTION_ERROR("xfs_iformat(2)", XFS_ERRLEVEL_LOW,
349 ip->i_mount, dip); 343 ip->i_mount, dip);
350 return XFS_ERROR(EFSCORRUPTED); 344 return XFS_ERROR(EFSCORRUPTED);
351 } 345 }
352 346
353 switch (ip->i_d.di_mode & S_IFMT) { 347 switch (ip->i_d.di_mode & S_IFMT) {
354 case S_IFIFO: 348 case S_IFIFO:
355 case S_IFCHR: 349 case S_IFCHR:
356 case S_IFBLK: 350 case S_IFBLK:
357 case S_IFSOCK: 351 case S_IFSOCK:
358 if (unlikely(dip->di_format != XFS_DINODE_FMT_DEV)) { 352 if (unlikely(dip->di_format != XFS_DINODE_FMT_DEV)) {
359 XFS_CORRUPTION_ERROR("xfs_iformat(3)", XFS_ERRLEVEL_LOW, 353 XFS_CORRUPTION_ERROR("xfs_iformat(3)", XFS_ERRLEVEL_LOW,
360 ip->i_mount, dip); 354 ip->i_mount, dip);
361 return XFS_ERROR(EFSCORRUPTED); 355 return XFS_ERROR(EFSCORRUPTED);
362 } 356 }
363 ip->i_d.di_size = 0; 357 ip->i_d.di_size = 0;
364 ip->i_size = 0; 358 ip->i_size = 0;
365 ip->i_df.if_u2.if_rdev = xfs_dinode_get_rdev(dip); 359 ip->i_df.if_u2.if_rdev = xfs_dinode_get_rdev(dip);
366 break; 360 break;
367 361
368 case S_IFREG: 362 case S_IFREG:
369 case S_IFLNK: 363 case S_IFLNK:
370 case S_IFDIR: 364 case S_IFDIR:
371 switch (dip->di_format) { 365 switch (dip->di_format) {
372 case XFS_DINODE_FMT_LOCAL: 366 case XFS_DINODE_FMT_LOCAL:
373 /* 367 /*
374 * no local regular files yet 368 * no local regular files yet
375 */ 369 */
376 if (unlikely((be16_to_cpu(dip->di_mode) & S_IFMT) == S_IFREG)) { 370 if (unlikely((be16_to_cpu(dip->di_mode) & S_IFMT) == S_IFREG)) {
377 xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount, 371 xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount,
378 "corrupt inode %Lu " 372 "corrupt inode %Lu "
379 "(local format for regular file).", 373 "(local format for regular file).",
380 (unsigned long long) ip->i_ino); 374 (unsigned long long) ip->i_ino);
381 XFS_CORRUPTION_ERROR("xfs_iformat(4)", 375 XFS_CORRUPTION_ERROR("xfs_iformat(4)",
382 XFS_ERRLEVEL_LOW, 376 XFS_ERRLEVEL_LOW,
383 ip->i_mount, dip); 377 ip->i_mount, dip);
384 return XFS_ERROR(EFSCORRUPTED); 378 return XFS_ERROR(EFSCORRUPTED);
385 } 379 }
386 380
387 di_size = be64_to_cpu(dip->di_size); 381 di_size = be64_to_cpu(dip->di_size);
388 if (unlikely(di_size > XFS_DFORK_DSIZE(dip, ip->i_mount))) { 382 if (unlikely(di_size > XFS_DFORK_DSIZE(dip, ip->i_mount))) {
389 xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount, 383 xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount,
390 "corrupt inode %Lu " 384 "corrupt inode %Lu "
391 "(bad size %Ld for local inode).", 385 "(bad size %Ld for local inode).",
392 (unsigned long long) ip->i_ino, 386 (unsigned long long) ip->i_ino,
393 (long long) di_size); 387 (long long) di_size);
394 XFS_CORRUPTION_ERROR("xfs_iformat(5)", 388 XFS_CORRUPTION_ERROR("xfs_iformat(5)",
395 XFS_ERRLEVEL_LOW, 389 XFS_ERRLEVEL_LOW,
396 ip->i_mount, dip); 390 ip->i_mount, dip);
397 return XFS_ERROR(EFSCORRUPTED); 391 return XFS_ERROR(EFSCORRUPTED);
398 } 392 }
399 393
400 size = (int)di_size; 394 size = (int)di_size;
401 error = xfs_iformat_local(ip, dip, XFS_DATA_FORK, size); 395 error = xfs_iformat_local(ip, dip, XFS_DATA_FORK, size);
402 break; 396 break;
403 case XFS_DINODE_FMT_EXTENTS: 397 case XFS_DINODE_FMT_EXTENTS:
404 error = xfs_iformat_extents(ip, dip, XFS_DATA_FORK); 398 error = xfs_iformat_extents(ip, dip, XFS_DATA_FORK);
405 break; 399 break;
406 case XFS_DINODE_FMT_BTREE: 400 case XFS_DINODE_FMT_BTREE:
407 error = xfs_iformat_btree(ip, dip, XFS_DATA_FORK); 401 error = xfs_iformat_btree(ip, dip, XFS_DATA_FORK);
408 break; 402 break;
409 default: 403 default:
410 XFS_ERROR_REPORT("xfs_iformat(6)", XFS_ERRLEVEL_LOW, 404 XFS_ERROR_REPORT("xfs_iformat(6)", XFS_ERRLEVEL_LOW,
411 ip->i_mount); 405 ip->i_mount);
412 return XFS_ERROR(EFSCORRUPTED); 406 return XFS_ERROR(EFSCORRUPTED);
413 } 407 }
414 break; 408 break;
415 409
416 default: 410 default:
417 XFS_ERROR_REPORT("xfs_iformat(7)", XFS_ERRLEVEL_LOW, ip->i_mount); 411 XFS_ERROR_REPORT("xfs_iformat(7)", XFS_ERRLEVEL_LOW, ip->i_mount);
418 return XFS_ERROR(EFSCORRUPTED); 412 return XFS_ERROR(EFSCORRUPTED);
419 } 413 }
420 if (error) { 414 if (error) {
421 return error; 415 return error;
422 } 416 }
423 if (!XFS_DFORK_Q(dip)) 417 if (!XFS_DFORK_Q(dip))
424 return 0; 418 return 0;
425 ASSERT(ip->i_afp == NULL); 419 ASSERT(ip->i_afp == NULL);
426 ip->i_afp = kmem_zone_zalloc(xfs_ifork_zone, KM_SLEEP); 420 ip->i_afp = kmem_zone_zalloc(xfs_ifork_zone, KM_SLEEP);
427 ip->i_afp->if_ext_max = 421 ip->i_afp->if_ext_max =
428 XFS_IFORK_ASIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t); 422 XFS_IFORK_ASIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t);
429 switch (dip->di_aformat) { 423 switch (dip->di_aformat) {
430 case XFS_DINODE_FMT_LOCAL: 424 case XFS_DINODE_FMT_LOCAL:
431 atp = (xfs_attr_shortform_t *)XFS_DFORK_APTR(dip); 425 atp = (xfs_attr_shortform_t *)XFS_DFORK_APTR(dip);
432 size = be16_to_cpu(atp->hdr.totsize); 426 size = be16_to_cpu(atp->hdr.totsize);
433 error = xfs_iformat_local(ip, dip, XFS_ATTR_FORK, size); 427 error = xfs_iformat_local(ip, dip, XFS_ATTR_FORK, size);
434 break; 428 break;
435 case XFS_DINODE_FMT_EXTENTS: 429 case XFS_DINODE_FMT_EXTENTS:
436 error = xfs_iformat_extents(ip, dip, XFS_ATTR_FORK); 430 error = xfs_iformat_extents(ip, dip, XFS_ATTR_FORK);
437 break; 431 break;
438 case XFS_DINODE_FMT_BTREE: 432 case XFS_DINODE_FMT_BTREE:
439 error = xfs_iformat_btree(ip, dip, XFS_ATTR_FORK); 433 error = xfs_iformat_btree(ip, dip, XFS_ATTR_FORK);
440 break; 434 break;
441 default: 435 default:
442 error = XFS_ERROR(EFSCORRUPTED); 436 error = XFS_ERROR(EFSCORRUPTED);
443 break; 437 break;
444 } 438 }
445 if (error) { 439 if (error) {
446 kmem_zone_free(xfs_ifork_zone, ip->i_afp); 440 kmem_zone_free(xfs_ifork_zone, ip->i_afp);
447 ip->i_afp = NULL; 441 ip->i_afp = NULL;
448 xfs_idestroy_fork(ip, XFS_DATA_FORK); 442 xfs_idestroy_fork(ip, XFS_DATA_FORK);
449 } 443 }
450 return error; 444 return error;
451 } 445 }
452 446
453 /* 447 /*
454 * The file is in-lined in the on-disk inode. 448 * The file is in-lined in the on-disk inode.
455 * If it fits into if_inline_data, then copy 449 * If it fits into if_inline_data, then copy
456 * it there, otherwise allocate a buffer for it 450 * it there, otherwise allocate a buffer for it
457 * and copy the data there. Either way, set 451 * and copy the data there. Either way, set
458 * if_data to point at the data. 452 * if_data to point at the data.
459 * If we allocate a buffer for the data, make 453 * If we allocate a buffer for the data, make
460 * sure that its size is a multiple of 4 and 454 * sure that its size is a multiple of 4 and
461 * record the real size in i_real_bytes. 455 * record the real size in i_real_bytes.
462 */ 456 */
463 STATIC int 457 STATIC int
464 xfs_iformat_local( 458 xfs_iformat_local(
465 xfs_inode_t *ip, 459 xfs_inode_t *ip,
466 xfs_dinode_t *dip, 460 xfs_dinode_t *dip,
467 int whichfork, 461 int whichfork,
468 int size) 462 int size)
469 { 463 {
470 xfs_ifork_t *ifp; 464 xfs_ifork_t *ifp;
471 int real_size; 465 int real_size;
472 466
473 /* 467 /*
474 * If the size is unreasonable, then something 468 * If the size is unreasonable, then something
475 * is wrong and we just bail out rather than crash in 469 * is wrong and we just bail out rather than crash in
476 * kmem_alloc() or memcpy() below. 470 * kmem_alloc() or memcpy() below.
477 */ 471 */
478 if (unlikely(size > XFS_DFORK_SIZE(dip, ip->i_mount, whichfork))) { 472 if (unlikely(size > XFS_DFORK_SIZE(dip, ip->i_mount, whichfork))) {
479 xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount, 473 xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount,
480 "corrupt inode %Lu " 474 "corrupt inode %Lu "
481 "(bad size %d for local fork, size = %d).", 475 "(bad size %d for local fork, size = %d).",
482 (unsigned long long) ip->i_ino, size, 476 (unsigned long long) ip->i_ino, size,
483 XFS_DFORK_SIZE(dip, ip->i_mount, whichfork)); 477 XFS_DFORK_SIZE(dip, ip->i_mount, whichfork));
484 XFS_CORRUPTION_ERROR("xfs_iformat_local", XFS_ERRLEVEL_LOW, 478 XFS_CORRUPTION_ERROR("xfs_iformat_local", XFS_ERRLEVEL_LOW,
485 ip->i_mount, dip); 479 ip->i_mount, dip);
486 return XFS_ERROR(EFSCORRUPTED); 480 return XFS_ERROR(EFSCORRUPTED);
487 } 481 }
488 ifp = XFS_IFORK_PTR(ip, whichfork); 482 ifp = XFS_IFORK_PTR(ip, whichfork);
489 real_size = 0; 483 real_size = 0;
490 if (size == 0) 484 if (size == 0)
491 ifp->if_u1.if_data = NULL; 485 ifp->if_u1.if_data = NULL;
492 else if (size <= sizeof(ifp->if_u2.if_inline_data)) 486 else if (size <= sizeof(ifp->if_u2.if_inline_data))
493 ifp->if_u1.if_data = ifp->if_u2.if_inline_data; 487 ifp->if_u1.if_data = ifp->if_u2.if_inline_data;
494 else { 488 else {
495 real_size = roundup(size, 4); 489 real_size = roundup(size, 4);
496 ifp->if_u1.if_data = kmem_alloc(real_size, KM_SLEEP); 490 ifp->if_u1.if_data = kmem_alloc(real_size, KM_SLEEP);
497 } 491 }
498 ifp->if_bytes = size; 492 ifp->if_bytes = size;
499 ifp->if_real_bytes = real_size; 493 ifp->if_real_bytes = real_size;
500 if (size) 494 if (size)
501 memcpy(ifp->if_u1.if_data, XFS_DFORK_PTR(dip, whichfork), size); 495 memcpy(ifp->if_u1.if_data, XFS_DFORK_PTR(dip, whichfork), size);
502 ifp->if_flags &= ~XFS_IFEXTENTS; 496 ifp->if_flags &= ~XFS_IFEXTENTS;
503 ifp->if_flags |= XFS_IFINLINE; 497 ifp->if_flags |= XFS_IFINLINE;
504 return 0; 498 return 0;
505 } 499 }
506 500
507 /* 501 /*
508 * The file consists of a set of extents all 502 * The file consists of a set of extents all
509 * of which fit into the on-disk inode. 503 * of which fit into the on-disk inode.
510 * If there are few enough extents to fit into 504 * If there are few enough extents to fit into
511 * the if_inline_ext, then copy them there. 505 * the if_inline_ext, then copy them there.
512 * Otherwise allocate a buffer for them and copy 506 * Otherwise allocate a buffer for them and copy
513 * them into it. Either way, set if_extents 507 * them into it. Either way, set if_extents
514 * to point at the extents. 508 * to point at the extents.
515 */ 509 */
516 STATIC int 510 STATIC int
517 xfs_iformat_extents( 511 xfs_iformat_extents(
518 xfs_inode_t *ip, 512 xfs_inode_t *ip,
519 xfs_dinode_t *dip, 513 xfs_dinode_t *dip,
520 int whichfork) 514 int whichfork)
521 { 515 {
522 xfs_bmbt_rec_t *dp; 516 xfs_bmbt_rec_t *dp;
523 xfs_ifork_t *ifp; 517 xfs_ifork_t *ifp;
524 int nex; 518 int nex;
525 int size; 519 int size;
526 int i; 520 int i;
527 521
528 ifp = XFS_IFORK_PTR(ip, whichfork); 522 ifp = XFS_IFORK_PTR(ip, whichfork);
529 nex = XFS_DFORK_NEXTENTS(dip, whichfork); 523 nex = XFS_DFORK_NEXTENTS(dip, whichfork);
530 size = nex * (uint)sizeof(xfs_bmbt_rec_t); 524 size = nex * (uint)sizeof(xfs_bmbt_rec_t);
531 525
532 /* 526 /*
533 * If the number of extents is unreasonable, then something 527 * If the number of extents is unreasonable, then something
534 * is wrong and we just bail out rather than crash in 528 * is wrong and we just bail out rather than crash in
535 * kmem_alloc() or memcpy() below. 529 * kmem_alloc() or memcpy() below.
536 */ 530 */
537 if (unlikely(size < 0 || size > XFS_DFORK_SIZE(dip, ip->i_mount, whichfork))) { 531 if (unlikely(size < 0 || size > XFS_DFORK_SIZE(dip, ip->i_mount, whichfork))) {
538 xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount, 532 xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount,
539 "corrupt inode %Lu ((a)extents = %d).", 533 "corrupt inode %Lu ((a)extents = %d).",
540 (unsigned long long) ip->i_ino, nex); 534 (unsigned long long) ip->i_ino, nex);
541 XFS_CORRUPTION_ERROR("xfs_iformat_extents(1)", XFS_ERRLEVEL_LOW, 535 XFS_CORRUPTION_ERROR("xfs_iformat_extents(1)", XFS_ERRLEVEL_LOW,
542 ip->i_mount, dip); 536 ip->i_mount, dip);
543 return XFS_ERROR(EFSCORRUPTED); 537 return XFS_ERROR(EFSCORRUPTED);
544 } 538 }
545 539
546 ifp->if_real_bytes = 0; 540 ifp->if_real_bytes = 0;
547 if (nex == 0) 541 if (nex == 0)
548 ifp->if_u1.if_extents = NULL; 542 ifp->if_u1.if_extents = NULL;
549 else if (nex <= XFS_INLINE_EXTS) 543 else if (nex <= XFS_INLINE_EXTS)
550 ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext; 544 ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext;
551 else 545 else
552 xfs_iext_add(ifp, 0, nex); 546 xfs_iext_add(ifp, 0, nex);
553 547
554 ifp->if_bytes = size; 548 ifp->if_bytes = size;
555 if (size) { 549 if (size) {
556 dp = (xfs_bmbt_rec_t *) XFS_DFORK_PTR(dip, whichfork); 550 dp = (xfs_bmbt_rec_t *) XFS_DFORK_PTR(dip, whichfork);
557 xfs_validate_extents(ifp, nex, XFS_EXTFMT_INODE(ip)); 551 xfs_validate_extents(ifp, nex, XFS_EXTFMT_INODE(ip));
558 for (i = 0; i < nex; i++, dp++) { 552 for (i = 0; i < nex; i++, dp++) {
559 xfs_bmbt_rec_host_t *ep = xfs_iext_get_ext(ifp, i); 553 xfs_bmbt_rec_host_t *ep = xfs_iext_get_ext(ifp, i);
560 ep->l0 = get_unaligned_be64(&dp->l0); 554 ep->l0 = get_unaligned_be64(&dp->l0);
561 ep->l1 = get_unaligned_be64(&dp->l1); 555 ep->l1 = get_unaligned_be64(&dp->l1);
562 } 556 }
563 XFS_BMAP_TRACE_EXLIST(ip, nex, whichfork); 557 XFS_BMAP_TRACE_EXLIST(ip, nex, whichfork);
564 if (whichfork != XFS_DATA_FORK || 558 if (whichfork != XFS_DATA_FORK ||
565 XFS_EXTFMT_INODE(ip) == XFS_EXTFMT_NOSTATE) 559 XFS_EXTFMT_INODE(ip) == XFS_EXTFMT_NOSTATE)
566 if (unlikely(xfs_check_nostate_extents( 560 if (unlikely(xfs_check_nostate_extents(
567 ifp, 0, nex))) { 561 ifp, 0, nex))) {
568 XFS_ERROR_REPORT("xfs_iformat_extents(2)", 562 XFS_ERROR_REPORT("xfs_iformat_extents(2)",
569 XFS_ERRLEVEL_LOW, 563 XFS_ERRLEVEL_LOW,
570 ip->i_mount); 564 ip->i_mount);
571 return XFS_ERROR(EFSCORRUPTED); 565 return XFS_ERROR(EFSCORRUPTED);
572 } 566 }
573 } 567 }
574 ifp->if_flags |= XFS_IFEXTENTS; 568 ifp->if_flags |= XFS_IFEXTENTS;
575 return 0; 569 return 0;
576 } 570 }
577 571
578 /* 572 /*
579 * The file has too many extents to fit into 573 * The file has too many extents to fit into
580 * the inode, so they are in B-tree format. 574 * the inode, so they are in B-tree format.
581 * Allocate a buffer for the root of the B-tree 575 * Allocate a buffer for the root of the B-tree
582 * and copy the root into it. The i_extents 576 * and copy the root into it. The i_extents
583 * field will remain NULL until all of the 577 * field will remain NULL until all of the
584 * extents are read in (when they are needed). 578 * extents are read in (when they are needed).
585 */ 579 */
586 STATIC int 580 STATIC int
587 xfs_iformat_btree( 581 xfs_iformat_btree(
588 xfs_inode_t *ip, 582 xfs_inode_t *ip,
589 xfs_dinode_t *dip, 583 xfs_dinode_t *dip,
590 int whichfork) 584 int whichfork)
591 { 585 {
592 xfs_bmdr_block_t *dfp; 586 xfs_bmdr_block_t *dfp;
593 xfs_ifork_t *ifp; 587 xfs_ifork_t *ifp;
594 /* REFERENCED */ 588 /* REFERENCED */
595 int nrecs; 589 int nrecs;
596 int size; 590 int size;
597 591
598 ifp = XFS_IFORK_PTR(ip, whichfork); 592 ifp = XFS_IFORK_PTR(ip, whichfork);
599 dfp = (xfs_bmdr_block_t *)XFS_DFORK_PTR(dip, whichfork); 593 dfp = (xfs_bmdr_block_t *)XFS_DFORK_PTR(dip, whichfork);
600 size = XFS_BMAP_BROOT_SPACE(dfp); 594 size = XFS_BMAP_BROOT_SPACE(dfp);
601 nrecs = be16_to_cpu(dfp->bb_numrecs); 595 nrecs = be16_to_cpu(dfp->bb_numrecs);
602 596
603 /* 597 /*
604 * blow out if -- fork has less extents than can fit in 598 * blow out if -- fork has less extents than can fit in
605 * fork (fork shouldn't be a btree format), root btree 599 * fork (fork shouldn't be a btree format), root btree
606 * block has more records than can fit into the fork, 600 * block has more records than can fit into the fork,
607 * or the number of extents is greater than the number of 601 * or the number of extents is greater than the number of
608 * blocks. 602 * blocks.
609 */ 603 */
610 if (unlikely(XFS_IFORK_NEXTENTS(ip, whichfork) <= ifp->if_ext_max 604 if (unlikely(XFS_IFORK_NEXTENTS(ip, whichfork) <= ifp->if_ext_max
611 || XFS_BMDR_SPACE_CALC(nrecs) > 605 || XFS_BMDR_SPACE_CALC(nrecs) >
612 XFS_DFORK_SIZE(dip, ip->i_mount, whichfork) 606 XFS_DFORK_SIZE(dip, ip->i_mount, whichfork)
613 || XFS_IFORK_NEXTENTS(ip, whichfork) > ip->i_d.di_nblocks)) { 607 || XFS_IFORK_NEXTENTS(ip, whichfork) > ip->i_d.di_nblocks)) {
614 xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount, 608 xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount,
615 "corrupt inode %Lu (btree).", 609 "corrupt inode %Lu (btree).",
616 (unsigned long long) ip->i_ino); 610 (unsigned long long) ip->i_ino);
617 XFS_ERROR_REPORT("xfs_iformat_btree", XFS_ERRLEVEL_LOW, 611 XFS_ERROR_REPORT("xfs_iformat_btree", XFS_ERRLEVEL_LOW,
618 ip->i_mount); 612 ip->i_mount);
619 return XFS_ERROR(EFSCORRUPTED); 613 return XFS_ERROR(EFSCORRUPTED);
620 } 614 }
621 615
622 ifp->if_broot_bytes = size; 616 ifp->if_broot_bytes = size;
623 ifp->if_broot = kmem_alloc(size, KM_SLEEP); 617 ifp->if_broot = kmem_alloc(size, KM_SLEEP);
624 ASSERT(ifp->if_broot != NULL); 618 ASSERT(ifp->if_broot != NULL);
625 /* 619 /*
626 * Copy and convert from the on-disk structure 620 * Copy and convert from the on-disk structure
627 * to the in-memory structure. 621 * to the in-memory structure.
628 */ 622 */
629 xfs_bmdr_to_bmbt(ip->i_mount, dfp, 623 xfs_bmdr_to_bmbt(ip->i_mount, dfp,
630 XFS_DFORK_SIZE(dip, ip->i_mount, whichfork), 624 XFS_DFORK_SIZE(dip, ip->i_mount, whichfork),
631 ifp->if_broot, size); 625 ifp->if_broot, size);
632 ifp->if_flags &= ~XFS_IFEXTENTS; 626 ifp->if_flags &= ~XFS_IFEXTENTS;
633 ifp->if_flags |= XFS_IFBROOT; 627 ifp->if_flags |= XFS_IFBROOT;
634 628
635 return 0; 629 return 0;
636 } 630 }
637 631
638 void 632 void
639 xfs_dinode_from_disk( 633 xfs_dinode_from_disk(
640 xfs_icdinode_t *to, 634 xfs_icdinode_t *to,
641 xfs_dinode_t *from) 635 xfs_dinode_t *from)
642 { 636 {
643 to->di_magic = be16_to_cpu(from->di_magic); 637 to->di_magic = be16_to_cpu(from->di_magic);
644 to->di_mode = be16_to_cpu(from->di_mode); 638 to->di_mode = be16_to_cpu(from->di_mode);
645 to->di_version = from ->di_version; 639 to->di_version = from ->di_version;
646 to->di_format = from->di_format; 640 to->di_format = from->di_format;
647 to->di_onlink = be16_to_cpu(from->di_onlink); 641 to->di_onlink = be16_to_cpu(from->di_onlink);
648 to->di_uid = be32_to_cpu(from->di_uid); 642 to->di_uid = be32_to_cpu(from->di_uid);
649 to->di_gid = be32_to_cpu(from->di_gid); 643 to->di_gid = be32_to_cpu(from->di_gid);
650 to->di_nlink = be32_to_cpu(from->di_nlink); 644 to->di_nlink = be32_to_cpu(from->di_nlink);
651 to->di_projid = be16_to_cpu(from->di_projid); 645 to->di_projid = be16_to_cpu(from->di_projid);
652 memcpy(to->di_pad, from->di_pad, sizeof(to->di_pad)); 646 memcpy(to->di_pad, from->di_pad, sizeof(to->di_pad));
653 to->di_flushiter = be16_to_cpu(from->di_flushiter); 647 to->di_flushiter = be16_to_cpu(from->di_flushiter);
654 to->di_atime.t_sec = be32_to_cpu(from->di_atime.t_sec); 648 to->di_atime.t_sec = be32_to_cpu(from->di_atime.t_sec);
655 to->di_atime.t_nsec = be32_to_cpu(from->di_atime.t_nsec); 649 to->di_atime.t_nsec = be32_to_cpu(from->di_atime.t_nsec);
656 to->di_mtime.t_sec = be32_to_cpu(from->di_mtime.t_sec); 650 to->di_mtime.t_sec = be32_to_cpu(from->di_mtime.t_sec);
657 to->di_mtime.t_nsec = be32_to_cpu(from->di_mtime.t_nsec); 651 to->di_mtime.t_nsec = be32_to_cpu(from->di_mtime.t_nsec);
658 to->di_ctime.t_sec = be32_to_cpu(from->di_ctime.t_sec); 652 to->di_ctime.t_sec = be32_to_cpu(from->di_ctime.t_sec);
659 to->di_ctime.t_nsec = be32_to_cpu(from->di_ctime.t_nsec); 653 to->di_ctime.t_nsec = be32_to_cpu(from->di_ctime.t_nsec);
660 to->di_size = be64_to_cpu(from->di_size); 654 to->di_size = be64_to_cpu(from->di_size);
661 to->di_nblocks = be64_to_cpu(from->di_nblocks); 655 to->di_nblocks = be64_to_cpu(from->di_nblocks);
662 to->di_extsize = be32_to_cpu(from->di_extsize); 656 to->di_extsize = be32_to_cpu(from->di_extsize);
663 to->di_nextents = be32_to_cpu(from->di_nextents); 657 to->di_nextents = be32_to_cpu(from->di_nextents);
664 to->di_anextents = be16_to_cpu(from->di_anextents); 658 to->di_anextents = be16_to_cpu(from->di_anextents);
665 to->di_forkoff = from->di_forkoff; 659 to->di_forkoff = from->di_forkoff;
666 to->di_aformat = from->di_aformat; 660 to->di_aformat = from->di_aformat;
667 to->di_dmevmask = be32_to_cpu(from->di_dmevmask); 661 to->di_dmevmask = be32_to_cpu(from->di_dmevmask);
668 to->di_dmstate = be16_to_cpu(from->di_dmstate); 662 to->di_dmstate = be16_to_cpu(from->di_dmstate);
669 to->di_flags = be16_to_cpu(from->di_flags); 663 to->di_flags = be16_to_cpu(from->di_flags);
670 to->di_gen = be32_to_cpu(from->di_gen); 664 to->di_gen = be32_to_cpu(from->di_gen);
671 } 665 }
672 666
673 void 667 void
674 xfs_dinode_to_disk( 668 xfs_dinode_to_disk(
675 xfs_dinode_t *to, 669 xfs_dinode_t *to,
676 xfs_icdinode_t *from) 670 xfs_icdinode_t *from)
677 { 671 {
678 to->di_magic = cpu_to_be16(from->di_magic); 672 to->di_magic = cpu_to_be16(from->di_magic);
679 to->di_mode = cpu_to_be16(from->di_mode); 673 to->di_mode = cpu_to_be16(from->di_mode);
680 to->di_version = from ->di_version; 674 to->di_version = from ->di_version;
681 to->di_format = from->di_format; 675 to->di_format = from->di_format;
682 to->di_onlink = cpu_to_be16(from->di_onlink); 676 to->di_onlink = cpu_to_be16(from->di_onlink);
683 to->di_uid = cpu_to_be32(from->di_uid); 677 to->di_uid = cpu_to_be32(from->di_uid);
684 to->di_gid = cpu_to_be32(from->di_gid); 678 to->di_gid = cpu_to_be32(from->di_gid);
685 to->di_nlink = cpu_to_be32(from->di_nlink); 679 to->di_nlink = cpu_to_be32(from->di_nlink);
686 to->di_projid = cpu_to_be16(from->di_projid); 680 to->di_projid = cpu_to_be16(from->di_projid);
687 memcpy(to->di_pad, from->di_pad, sizeof(to->di_pad)); 681 memcpy(to->di_pad, from->di_pad, sizeof(to->di_pad));
688 to->di_flushiter = cpu_to_be16(from->di_flushiter); 682 to->di_flushiter = cpu_to_be16(from->di_flushiter);
689 to->di_atime.t_sec = cpu_to_be32(from->di_atime.t_sec); 683 to->di_atime.t_sec = cpu_to_be32(from->di_atime.t_sec);
690 to->di_atime.t_nsec = cpu_to_be32(from->di_atime.t_nsec); 684 to->di_atime.t_nsec = cpu_to_be32(from->di_atime.t_nsec);
691 to->di_mtime.t_sec = cpu_to_be32(from->di_mtime.t_sec); 685 to->di_mtime.t_sec = cpu_to_be32(from->di_mtime.t_sec);
692 to->di_mtime.t_nsec = cpu_to_be32(from->di_mtime.t_nsec); 686 to->di_mtime.t_nsec = cpu_to_be32(from->di_mtime.t_nsec);
693 to->di_ctime.t_sec = cpu_to_be32(from->di_ctime.t_sec); 687 to->di_ctime.t_sec = cpu_to_be32(from->di_ctime.t_sec);
694 to->di_ctime.t_nsec = cpu_to_be32(from->di_ctime.t_nsec); 688 to->di_ctime.t_nsec = cpu_to_be32(from->di_ctime.t_nsec);
695 to->di_size = cpu_to_be64(from->di_size); 689 to->di_size = cpu_to_be64(from->di_size);
696 to->di_nblocks = cpu_to_be64(from->di_nblocks); 690 to->di_nblocks = cpu_to_be64(from->di_nblocks);
697 to->di_extsize = cpu_to_be32(from->di_extsize); 691 to->di_extsize = cpu_to_be32(from->di_extsize);
698 to->di_nextents = cpu_to_be32(from->di_nextents); 692 to->di_nextents = cpu_to_be32(from->di_nextents);
699 to->di_anextents = cpu_to_be16(from->di_anextents); 693 to->di_anextents = cpu_to_be16(from->di_anextents);
700 to->di_forkoff = from->di_forkoff; 694 to->di_forkoff = from->di_forkoff;
701 to->di_aformat = from->di_aformat; 695 to->di_aformat = from->di_aformat;
702 to->di_dmevmask = cpu_to_be32(from->di_dmevmask); 696 to->di_dmevmask = cpu_to_be32(from->di_dmevmask);
703 to->di_dmstate = cpu_to_be16(from->di_dmstate); 697 to->di_dmstate = cpu_to_be16(from->di_dmstate);
704 to->di_flags = cpu_to_be16(from->di_flags); 698 to->di_flags = cpu_to_be16(from->di_flags);
705 to->di_gen = cpu_to_be32(from->di_gen); 699 to->di_gen = cpu_to_be32(from->di_gen);
706 } 700 }
707 701
708 STATIC uint 702 STATIC uint
709 _xfs_dic2xflags( 703 _xfs_dic2xflags(
710 __uint16_t di_flags) 704 __uint16_t di_flags)
711 { 705 {
712 uint flags = 0; 706 uint flags = 0;
713 707
714 if (di_flags & XFS_DIFLAG_ANY) { 708 if (di_flags & XFS_DIFLAG_ANY) {
715 if (di_flags & XFS_DIFLAG_REALTIME) 709 if (di_flags & XFS_DIFLAG_REALTIME)
716 flags |= XFS_XFLAG_REALTIME; 710 flags |= XFS_XFLAG_REALTIME;
717 if (di_flags & XFS_DIFLAG_PREALLOC) 711 if (di_flags & XFS_DIFLAG_PREALLOC)
718 flags |= XFS_XFLAG_PREALLOC; 712 flags |= XFS_XFLAG_PREALLOC;
719 if (di_flags & XFS_DIFLAG_IMMUTABLE) 713 if (di_flags & XFS_DIFLAG_IMMUTABLE)
720 flags |= XFS_XFLAG_IMMUTABLE; 714 flags |= XFS_XFLAG_IMMUTABLE;
721 if (di_flags & XFS_DIFLAG_APPEND) 715 if (di_flags & XFS_DIFLAG_APPEND)
722 flags |= XFS_XFLAG_APPEND; 716 flags |= XFS_XFLAG_APPEND;
723 if (di_flags & XFS_DIFLAG_SYNC) 717 if (di_flags & XFS_DIFLAG_SYNC)
724 flags |= XFS_XFLAG_SYNC; 718 flags |= XFS_XFLAG_SYNC;
725 if (di_flags & XFS_DIFLAG_NOATIME) 719 if (di_flags & XFS_DIFLAG_NOATIME)
726 flags |= XFS_XFLAG_NOATIME; 720 flags |= XFS_XFLAG_NOATIME;
727 if (di_flags & XFS_DIFLAG_NODUMP) 721 if (di_flags & XFS_DIFLAG_NODUMP)
728 flags |= XFS_XFLAG_NODUMP; 722 flags |= XFS_XFLAG_NODUMP;
729 if (di_flags & XFS_DIFLAG_RTINHERIT) 723 if (di_flags & XFS_DIFLAG_RTINHERIT)
730 flags |= XFS_XFLAG_RTINHERIT; 724 flags |= XFS_XFLAG_RTINHERIT;
731 if (di_flags & XFS_DIFLAG_PROJINHERIT) 725 if (di_flags & XFS_DIFLAG_PROJINHERIT)
732 flags |= XFS_XFLAG_PROJINHERIT; 726 flags |= XFS_XFLAG_PROJINHERIT;
733 if (di_flags & XFS_DIFLAG_NOSYMLINKS) 727 if (di_flags & XFS_DIFLAG_NOSYMLINKS)
734 flags |= XFS_XFLAG_NOSYMLINKS; 728 flags |= XFS_XFLAG_NOSYMLINKS;
735 if (di_flags & XFS_DIFLAG_EXTSIZE) 729 if (di_flags & XFS_DIFLAG_EXTSIZE)
736 flags |= XFS_XFLAG_EXTSIZE; 730 flags |= XFS_XFLAG_EXTSIZE;
737 if (di_flags & XFS_DIFLAG_EXTSZINHERIT) 731 if (di_flags & XFS_DIFLAG_EXTSZINHERIT)
738 flags |= XFS_XFLAG_EXTSZINHERIT; 732 flags |= XFS_XFLAG_EXTSZINHERIT;
739 if (di_flags & XFS_DIFLAG_NODEFRAG) 733 if (di_flags & XFS_DIFLAG_NODEFRAG)
740 flags |= XFS_XFLAG_NODEFRAG; 734 flags |= XFS_XFLAG_NODEFRAG;
741 if (di_flags & XFS_DIFLAG_FILESTREAM) 735 if (di_flags & XFS_DIFLAG_FILESTREAM)
742 flags |= XFS_XFLAG_FILESTREAM; 736 flags |= XFS_XFLAG_FILESTREAM;
743 } 737 }
744 738
745 return flags; 739 return flags;
746 } 740 }
747 741
748 uint 742 uint
749 xfs_ip2xflags( 743 xfs_ip2xflags(
750 xfs_inode_t *ip) 744 xfs_inode_t *ip)
751 { 745 {
752 xfs_icdinode_t *dic = &ip->i_d; 746 xfs_icdinode_t *dic = &ip->i_d;
753 747
754 return _xfs_dic2xflags(dic->di_flags) | 748 return _xfs_dic2xflags(dic->di_flags) |
755 (XFS_IFORK_Q(ip) ? XFS_XFLAG_HASATTR : 0); 749 (XFS_IFORK_Q(ip) ? XFS_XFLAG_HASATTR : 0);
756 } 750 }
757 751
758 uint 752 uint
759 xfs_dic2xflags( 753 xfs_dic2xflags(
760 xfs_dinode_t *dip) 754 xfs_dinode_t *dip)
761 { 755 {
762 return _xfs_dic2xflags(be16_to_cpu(dip->di_flags)) | 756 return _xfs_dic2xflags(be16_to_cpu(dip->di_flags)) |
763 (XFS_DFORK_Q(dip) ? XFS_XFLAG_HASATTR : 0); 757 (XFS_DFORK_Q(dip) ? XFS_XFLAG_HASATTR : 0);
764 } 758 }
765 759
766 /* 760 /*
767 * Allocate and initialise an xfs_inode. 761 * Allocate and initialise an xfs_inode.
768 */ 762 */
769 STATIC struct xfs_inode * 763 STATIC struct xfs_inode *
770 xfs_inode_alloc( 764 xfs_inode_alloc(
771 struct xfs_mount *mp, 765 struct xfs_mount *mp,
772 xfs_ino_t ino) 766 xfs_ino_t ino)
773 { 767 {
774 struct xfs_inode *ip; 768 struct xfs_inode *ip;
775 769
776 /* 770 /*
777 * if this didn't occur in transactions, we could use 771 * if this didn't occur in transactions, we could use
778 * KM_MAYFAIL and return NULL here on ENOMEM. Set the 772 * KM_MAYFAIL and return NULL here on ENOMEM. Set the
779 * code up to do this anyway. 773 * code up to do this anyway.
780 */ 774 */
781 ip = kmem_zone_alloc(xfs_inode_zone, KM_SLEEP); 775 ip = kmem_zone_alloc(xfs_inode_zone, KM_SLEEP);
782 if (!ip) 776 if (!ip)
783 return NULL; 777 return NULL;
784 778
785 ASSERT(atomic_read(&ip->i_iocount) == 0); 779 ASSERT(atomic_read(&ip->i_iocount) == 0);
786 ASSERT(atomic_read(&ip->i_pincount) == 0); 780 ASSERT(atomic_read(&ip->i_pincount) == 0);
787 ASSERT(!spin_is_locked(&ip->i_flags_lock)); 781 ASSERT(!spin_is_locked(&ip->i_flags_lock));
788 ASSERT(completion_done(&ip->i_flush)); 782 ASSERT(completion_done(&ip->i_flush));
789 783
790 /* 784 /*
791 * initialise the VFS inode here to get failures 785 * initialise the VFS inode here to get failures
792 * out of the way early. 786 * out of the way early.
793 */ 787 */
794 if (!inode_init_always(mp->m_super, VFS_I(ip))) { 788 if (!inode_init_always(mp->m_super, VFS_I(ip))) {
795 kmem_zone_free(xfs_inode_zone, ip); 789 kmem_zone_free(xfs_inode_zone, ip);
796 return NULL; 790 return NULL;
797 } 791 }
798 792
799 /* initialise the xfs inode */ 793 /* initialise the xfs inode */
800 ip->i_ino = ino; 794 ip->i_ino = ino;
801 ip->i_mount = mp; 795 ip->i_mount = mp;
802 ip->i_blkno = 0; 796 memset(&ip->i_imap, 0, sizeof(struct xfs_imap));
803 ip->i_len = 0;
804 ip->i_boffset =0;
805 ip->i_afp = NULL; 797 ip->i_afp = NULL;
806 memset(&ip->i_df, 0, sizeof(xfs_ifork_t)); 798 memset(&ip->i_df, 0, sizeof(xfs_ifork_t));
807 ip->i_flags = 0; 799 ip->i_flags = 0;
808 ip->i_update_core = 0; 800 ip->i_update_core = 0;
809 ip->i_update_size = 0; 801 ip->i_update_size = 0;
810 ip->i_delayed_blks = 0; 802 ip->i_delayed_blks = 0;
811 memset(&ip->i_d, 0, sizeof(xfs_icdinode_t)); 803 memset(&ip->i_d, 0, sizeof(xfs_icdinode_t));
812 ip->i_size = 0; 804 ip->i_size = 0;
813 ip->i_new_size = 0; 805 ip->i_new_size = 0;
814 806
815 /* 807 /*
816 * Initialize inode's trace buffers. 808 * Initialize inode's trace buffers.
817 */ 809 */
818 #ifdef XFS_INODE_TRACE 810 #ifdef XFS_INODE_TRACE
819 ip->i_trace = ktrace_alloc(INODE_TRACE_SIZE, KM_NOFS); 811 ip->i_trace = ktrace_alloc(INODE_TRACE_SIZE, KM_NOFS);
820 #endif 812 #endif
821 #ifdef XFS_BMAP_TRACE 813 #ifdef XFS_BMAP_TRACE
822 ip->i_xtrace = ktrace_alloc(XFS_BMAP_KTRACE_SIZE, KM_NOFS); 814 ip->i_xtrace = ktrace_alloc(XFS_BMAP_KTRACE_SIZE, KM_NOFS);
823 #endif 815 #endif
824 #ifdef XFS_BTREE_TRACE 816 #ifdef XFS_BTREE_TRACE
825 ip->i_btrace = ktrace_alloc(XFS_BMBT_KTRACE_SIZE, KM_NOFS); 817 ip->i_btrace = ktrace_alloc(XFS_BMBT_KTRACE_SIZE, KM_NOFS);
826 #endif 818 #endif
827 #ifdef XFS_RW_TRACE 819 #ifdef XFS_RW_TRACE
828 ip->i_rwtrace = ktrace_alloc(XFS_RW_KTRACE_SIZE, KM_NOFS); 820 ip->i_rwtrace = ktrace_alloc(XFS_RW_KTRACE_SIZE, KM_NOFS);
829 #endif 821 #endif
830 #ifdef XFS_ILOCK_TRACE 822 #ifdef XFS_ILOCK_TRACE
831 ip->i_lock_trace = ktrace_alloc(XFS_ILOCK_KTRACE_SIZE, KM_NOFS); 823 ip->i_lock_trace = ktrace_alloc(XFS_ILOCK_KTRACE_SIZE, KM_NOFS);
832 #endif 824 #endif
833 #ifdef XFS_DIR2_TRACE 825 #ifdef XFS_DIR2_TRACE
834 ip->i_dir_trace = ktrace_alloc(XFS_DIR2_KTRACE_SIZE, KM_NOFS); 826 ip->i_dir_trace = ktrace_alloc(XFS_DIR2_KTRACE_SIZE, KM_NOFS);
835 #endif 827 #endif
836 828
837 return ip; 829 return ip;
838 } 830 }
839 831
840 /* 832 /*
841 * Given a mount structure and an inode number, return a pointer 833 * Given a mount structure and an inode number, return a pointer
842 * to a newly allocated in-core inode corresponding to the given 834 * to a newly allocated in-core inode corresponding to the given
843 * inode number. 835 * inode number.
844 * 836 *
845 * Initialize the inode's attributes and extent pointers if it 837 * Initialize the inode's attributes and extent pointers if it
846 * already has them (it will not if the inode has no links). 838 * already has them (it will not if the inode has no links).
847 */ 839 */
848 int 840 int
849 xfs_iread( 841 xfs_iread(
850 xfs_mount_t *mp, 842 xfs_mount_t *mp,
851 xfs_trans_t *tp, 843 xfs_trans_t *tp,
852 xfs_ino_t ino, 844 xfs_ino_t ino,
853 xfs_inode_t **ipp, 845 xfs_inode_t **ipp,
854 xfs_daddr_t bno, 846 xfs_daddr_t bno,
855 uint imap_flags) 847 uint imap_flags)
856 { 848 {
857 xfs_buf_t *bp; 849 xfs_buf_t *bp;
858 xfs_dinode_t *dip; 850 xfs_dinode_t *dip;
859 xfs_inode_t *ip; 851 xfs_inode_t *ip;
860 xfs_imap_t imap;
861 int error; 852 int error;
862 853
863 ip = xfs_inode_alloc(mp, ino); 854 ip = xfs_inode_alloc(mp, ino);
864 if (!ip) 855 if (!ip)
865 return ENOMEM; 856 return ENOMEM;
866 857
867 /* 858 /*
868 * Get pointers to the on-disk inode and the buffer containing it. 859 * Fill in the location information in the in-core inode.
869 */ 860 */
870 imap.im_blkno = bno; 861 ip->i_imap.im_blkno = bno;
871 error = xfs_imap(mp, tp, ip->i_ino, &imap, imap_flags); 862 error = xfs_imap(mp, tp, ip->i_ino, &ip->i_imap, imap_flags);
872 if (error) 863 if (error)
873 goto out_destroy_inode; 864 goto out_destroy_inode;
865 ASSERT(bno == 0 || bno == ip->i_imap.im_blkno);
874 866
875 /* 867 /*
876 * Fill in the fields in the inode that will be used to 868 * Get pointers to the on-disk inode and the buffer containing it.
877 * map the inode to its buffer from now on.
878 */ 869 */
879 ip->i_blkno = imap.im_blkno; 870 error = xfs_imap_to_bp(mp, tp, &ip->i_imap, &bp,
880 ip->i_len = imap.im_len; 871 XFS_BUF_LOCK, imap_flags);
881 ip->i_boffset = imap.im_boffset;
882 ASSERT(bno == 0 || bno == imap.im_blkno);
883
884 error = xfs_imap_to_bp(mp, tp, &imap, &bp, XFS_BUF_LOCK, imap_flags);
885 if (error) 872 if (error)
886 goto out_destroy_inode; 873 goto out_destroy_inode;
887 dip = (xfs_dinode_t *)xfs_buf_offset(bp, imap.im_boffset); 874 dip = (xfs_dinode_t *)xfs_buf_offset(bp, ip->i_imap.im_boffset);
888 875
889 /* 876 /*
890 * If we got something that isn't an inode it means someone 877 * If we got something that isn't an inode it means someone
891 * (nfs or dmi) has a stale handle. 878 * (nfs or dmi) has a stale handle.
892 */ 879 */
893 if (be16_to_cpu(dip->di_magic) != XFS_DINODE_MAGIC) { 880 if (be16_to_cpu(dip->di_magic) != XFS_DINODE_MAGIC) {
894 #ifdef DEBUG 881 #ifdef DEBUG
895 xfs_fs_cmn_err(CE_ALERT, mp, "xfs_iread: " 882 xfs_fs_cmn_err(CE_ALERT, mp, "xfs_iread: "
896 "dip->di_magic (0x%x) != " 883 "dip->di_magic (0x%x) != "
897 "XFS_DINODE_MAGIC (0x%x)", 884 "XFS_DINODE_MAGIC (0x%x)",
898 be16_to_cpu(dip->di_magic), 885 be16_to_cpu(dip->di_magic),
899 XFS_DINODE_MAGIC); 886 XFS_DINODE_MAGIC);
900 #endif /* DEBUG */ 887 #endif /* DEBUG */
901 error = XFS_ERROR(EINVAL); 888 error = XFS_ERROR(EINVAL);
902 goto out_brelse; 889 goto out_brelse;
903 } 890 }
904 891
905 /* 892 /*
906 * If the on-disk inode is already linked to a directory 893 * If the on-disk inode is already linked to a directory
907 * entry, copy all of the inode into the in-core inode. 894 * entry, copy all of the inode into the in-core inode.
908 * xfs_iformat() handles copying in the inode format 895 * xfs_iformat() handles copying in the inode format
909 * specific information. 896 * specific information.
910 * Otherwise, just get the truly permanent information. 897 * Otherwise, just get the truly permanent information.
911 */ 898 */
912 if (dip->di_mode) { 899 if (dip->di_mode) {
913 xfs_dinode_from_disk(&ip->i_d, dip); 900 xfs_dinode_from_disk(&ip->i_d, dip);
914 error = xfs_iformat(ip, dip); 901 error = xfs_iformat(ip, dip);
915 if (error) { 902 if (error) {
916 #ifdef DEBUG 903 #ifdef DEBUG
917 xfs_fs_cmn_err(CE_ALERT, mp, "xfs_iread: " 904 xfs_fs_cmn_err(CE_ALERT, mp, "xfs_iread: "
918 "xfs_iformat() returned error %d", 905 "xfs_iformat() returned error %d",
919 error); 906 error);
920 #endif /* DEBUG */ 907 #endif /* DEBUG */
921 goto out_brelse; 908 goto out_brelse;
922 } 909 }
923 } else { 910 } else {
924 ip->i_d.di_magic = be16_to_cpu(dip->di_magic); 911 ip->i_d.di_magic = be16_to_cpu(dip->di_magic);
925 ip->i_d.di_version = dip->di_version; 912 ip->i_d.di_version = dip->di_version;
926 ip->i_d.di_gen = be32_to_cpu(dip->di_gen); 913 ip->i_d.di_gen = be32_to_cpu(dip->di_gen);
927 ip->i_d.di_flushiter = be16_to_cpu(dip->di_flushiter); 914 ip->i_d.di_flushiter = be16_to_cpu(dip->di_flushiter);
928 /* 915 /*
929 * Make sure to pull in the mode here as well in 916 * Make sure to pull in the mode here as well in
930 * case the inode is released without being used. 917 * case the inode is released without being used.
931 * This ensures that xfs_inactive() will see that 918 * This ensures that xfs_inactive() will see that
932 * the inode is already free and not try to mess 919 * the inode is already free and not try to mess
933 * with the uninitialized part of it. 920 * with the uninitialized part of it.
934 */ 921 */
935 ip->i_d.di_mode = 0; 922 ip->i_d.di_mode = 0;
936 /* 923 /*
937 * Initialize the per-fork minima and maxima for a new 924 * Initialize the per-fork minima and maxima for a new
938 * inode here. xfs_iformat will do it for old inodes. 925 * inode here. xfs_iformat will do it for old inodes.
939 */ 926 */
940 ip->i_df.if_ext_max = 927 ip->i_df.if_ext_max =
941 XFS_IFORK_DSIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t); 928 XFS_IFORK_DSIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t);
942 } 929 }
943 930
944 /* 931 /*
945 * The inode format changed when we moved the link count and 932 * The inode format changed when we moved the link count and
946 * made it 32 bits long. If this is an old format inode, 933 * made it 32 bits long. If this is an old format inode,
947 * convert it in memory to look like a new one. If it gets 934 * convert it in memory to look like a new one. If it gets
948 * flushed to disk we will convert back before flushing or 935 * flushed to disk we will convert back before flushing or
949 * logging it. We zero out the new projid field and the old link 936 * logging it. We zero out the new projid field and the old link
950 * count field. We'll handle clearing the pad field (the remains 937 * count field. We'll handle clearing the pad field (the remains
951 * of the old uuid field) when we actually convert the inode to 938 * of the old uuid field) when we actually convert the inode to
952 * the new format. We don't change the version number so that we 939 * the new format. We don't change the version number so that we
953 * can distinguish this from a real new format inode. 940 * can distinguish this from a real new format inode.
954 */ 941 */
955 if (ip->i_d.di_version == 1) { 942 if (ip->i_d.di_version == 1) {
956 ip->i_d.di_nlink = ip->i_d.di_onlink; 943 ip->i_d.di_nlink = ip->i_d.di_onlink;
957 ip->i_d.di_onlink = 0; 944 ip->i_d.di_onlink = 0;
958 ip->i_d.di_projid = 0; 945 ip->i_d.di_projid = 0;
959 } 946 }
960 947
961 ip->i_delayed_blks = 0; 948 ip->i_delayed_blks = 0;
962 ip->i_size = ip->i_d.di_size; 949 ip->i_size = ip->i_d.di_size;
963 950
964 /* 951 /*
965 * Mark the buffer containing the inode as something to keep 952 * Mark the buffer containing the inode as something to keep
966 * around for a while. This helps to keep recently accessed 953 * around for a while. This helps to keep recently accessed
967 * meta-data in-core longer. 954 * meta-data in-core longer.
968 */ 955 */
969 XFS_BUF_SET_REF(bp, XFS_INO_REF); 956 XFS_BUF_SET_REF(bp, XFS_INO_REF);
970 957
971 /* 958 /*
972 * Use xfs_trans_brelse() to release the buffer containing the 959 * Use xfs_trans_brelse() to release the buffer containing the
973 * on-disk inode, because it was acquired with xfs_trans_read_buf() 960 * on-disk inode, because it was acquired with xfs_trans_read_buf()
974 * in xfs_itobp() above. If tp is NULL, this is just a normal 961 * in xfs_itobp() above. If tp is NULL, this is just a normal
975 * brelse(). If we're within a transaction, then xfs_trans_brelse() 962 * brelse(). If we're within a transaction, then xfs_trans_brelse()
976 * will only release the buffer if it is not dirty within the 963 * will only release the buffer if it is not dirty within the
977 * transaction. It will be OK to release the buffer in this case, 964 * transaction. It will be OK to release the buffer in this case,
978 * because inodes on disk are never destroyed and we will be 965 * because inodes on disk are never destroyed and we will be
979 * locking the new in-core inode before putting it in the hash 966 * locking the new in-core inode before putting it in the hash
980 * table where other processes can find it. Thus we don't have 967 * table where other processes can find it. Thus we don't have
981 * to worry about the inode being changed just because we released 968 * to worry about the inode being changed just because we released
982 * the buffer. 969 * the buffer.
983 */ 970 */
984 xfs_trans_brelse(tp, bp); 971 xfs_trans_brelse(tp, bp);
985 *ipp = ip; 972 *ipp = ip;
986 return 0; 973 return 0;
987 974
988 out_brelse: 975 out_brelse:
989 xfs_trans_brelse(tp, bp); 976 xfs_trans_brelse(tp, bp);
990 out_destroy_inode: 977 out_destroy_inode:
991 xfs_destroy_inode(ip); 978 xfs_destroy_inode(ip);
992 return error; 979 return error;
993 } 980 }
994 981
995 /* 982 /*
996 * Read in extents from a btree-format inode. 983 * Read in extents from a btree-format inode.
997 * Allocate and fill in if_extents. Real work is done in xfs_bmap.c. 984 * Allocate and fill in if_extents. Real work is done in xfs_bmap.c.
998 */ 985 */
999 int 986 int
1000 xfs_iread_extents( 987 xfs_iread_extents(
1001 xfs_trans_t *tp, 988 xfs_trans_t *tp,
1002 xfs_inode_t *ip, 989 xfs_inode_t *ip,
1003 int whichfork) 990 int whichfork)
1004 { 991 {
1005 int error; 992 int error;
1006 xfs_ifork_t *ifp; 993 xfs_ifork_t *ifp;
1007 xfs_extnum_t nextents; 994 xfs_extnum_t nextents;
1008 size_t size; 995 size_t size;
1009 996
1010 if (unlikely(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE)) { 997 if (unlikely(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE)) {
1011 XFS_ERROR_REPORT("xfs_iread_extents", XFS_ERRLEVEL_LOW, 998 XFS_ERROR_REPORT("xfs_iread_extents", XFS_ERRLEVEL_LOW,
1012 ip->i_mount); 999 ip->i_mount);
1013 return XFS_ERROR(EFSCORRUPTED); 1000 return XFS_ERROR(EFSCORRUPTED);
1014 } 1001 }
1015 nextents = XFS_IFORK_NEXTENTS(ip, whichfork); 1002 nextents = XFS_IFORK_NEXTENTS(ip, whichfork);
1016 size = nextents * sizeof(xfs_bmbt_rec_t); 1003 size = nextents * sizeof(xfs_bmbt_rec_t);
1017 ifp = XFS_IFORK_PTR(ip, whichfork); 1004 ifp = XFS_IFORK_PTR(ip, whichfork);
1018 1005
1019 /* 1006 /*
1020 * We know that the size is valid (it's checked in iformat_btree) 1007 * We know that the size is valid (it's checked in iformat_btree)
1021 */ 1008 */
1022 ifp->if_lastex = NULLEXTNUM; 1009 ifp->if_lastex = NULLEXTNUM;
1023 ifp->if_bytes = ifp->if_real_bytes = 0; 1010 ifp->if_bytes = ifp->if_real_bytes = 0;
1024 ifp->if_flags |= XFS_IFEXTENTS; 1011 ifp->if_flags |= XFS_IFEXTENTS;
1025 xfs_iext_add(ifp, 0, nextents); 1012 xfs_iext_add(ifp, 0, nextents);
1026 error = xfs_bmap_read_extents(tp, ip, whichfork); 1013 error = xfs_bmap_read_extents(tp, ip, whichfork);
1027 if (error) { 1014 if (error) {
1028 xfs_iext_destroy(ifp); 1015 xfs_iext_destroy(ifp);
1029 ifp->if_flags &= ~XFS_IFEXTENTS; 1016 ifp->if_flags &= ~XFS_IFEXTENTS;
1030 return error; 1017 return error;
1031 } 1018 }
1032 xfs_validate_extents(ifp, nextents, XFS_EXTFMT_INODE(ip)); 1019 xfs_validate_extents(ifp, nextents, XFS_EXTFMT_INODE(ip));
1033 return 0; 1020 return 0;
1034 } 1021 }
1035 1022
1036 /* 1023 /*
1037 * Allocate an inode on disk and return a copy of its in-core version. 1024 * Allocate an inode on disk and return a copy of its in-core version.
1038 * The in-core inode is locked exclusively. Set mode, nlink, and rdev 1025 * The in-core inode is locked exclusively. Set mode, nlink, and rdev
1039 * appropriately within the inode. The uid and gid for the inode are 1026 * appropriately within the inode. The uid and gid for the inode are
1040 * set according to the contents of the given cred structure. 1027 * set according to the contents of the given cred structure.
1041 * 1028 *
1042 * Use xfs_dialloc() to allocate the on-disk inode. If xfs_dialloc() 1029 * Use xfs_dialloc() to allocate the on-disk inode. If xfs_dialloc()
1043 * has a free inode available, call xfs_iget() 1030 * has a free inode available, call xfs_iget()
1044 * to obtain the in-core version of the allocated inode. Finally, 1031 * to obtain the in-core version of the allocated inode. Finally,
1045 * fill in the inode and log its initial contents. In this case, 1032 * fill in the inode and log its initial contents. In this case,
1046 * ialloc_context would be set to NULL and call_again set to false. 1033 * ialloc_context would be set to NULL and call_again set to false.
1047 * 1034 *
1048 * If xfs_dialloc() does not have an available inode, 1035 * If xfs_dialloc() does not have an available inode,
1049 * it will replenish its supply by doing an allocation. Since we can 1036 * it will replenish its supply by doing an allocation. Since we can
1050 * only do one allocation within a transaction without deadlocks, we 1037 * only do one allocation within a transaction without deadlocks, we
1051 * must commit the current transaction before returning the inode itself. 1038 * must commit the current transaction before returning the inode itself.
1052 * In this case, therefore, we will set call_again to true and return. 1039 * In this case, therefore, we will set call_again to true and return.
1053 * The caller should then commit the current transaction, start a new 1040 * The caller should then commit the current transaction, start a new
1054 * transaction, and call xfs_ialloc() again to actually get the inode. 1041 * transaction, and call xfs_ialloc() again to actually get the inode.
1055 * 1042 *
1056 * To ensure that some other process does not grab the inode that 1043 * To ensure that some other process does not grab the inode that
1057 * was allocated during the first call to xfs_ialloc(), this routine 1044 * was allocated during the first call to xfs_ialloc(), this routine
1058 * also returns the [locked] bp pointing to the head of the freelist 1045 * also returns the [locked] bp pointing to the head of the freelist
1059 * as ialloc_context. The caller should hold this buffer across 1046 * as ialloc_context. The caller should hold this buffer across
1060 * the commit and pass it back into this routine on the second call. 1047 * the commit and pass it back into this routine on the second call.
1061 * 1048 *
1062 * If we are allocating quota inodes, we do not have a parent inode 1049 * If we are allocating quota inodes, we do not have a parent inode
1063 * to attach to or associate with (i.e. pip == NULL) because they 1050 * to attach to or associate with (i.e. pip == NULL) because they
1064 * are not linked into the directory structure - they are attached 1051 * are not linked into the directory structure - they are attached
1065 * directly to the superblock - and so have no parent. 1052 * directly to the superblock - and so have no parent.
1066 */ 1053 */
1067 int 1054 int
1068 xfs_ialloc( 1055 xfs_ialloc(
1069 xfs_trans_t *tp, 1056 xfs_trans_t *tp,
1070 xfs_inode_t *pip, 1057 xfs_inode_t *pip,
1071 mode_t mode, 1058 mode_t mode,
1072 xfs_nlink_t nlink, 1059 xfs_nlink_t nlink,
1073 xfs_dev_t rdev, 1060 xfs_dev_t rdev,
1074 cred_t *cr, 1061 cred_t *cr,
1075 xfs_prid_t prid, 1062 xfs_prid_t prid,
1076 int okalloc, 1063 int okalloc,
1077 xfs_buf_t **ialloc_context, 1064 xfs_buf_t **ialloc_context,
1078 boolean_t *call_again, 1065 boolean_t *call_again,
1079 xfs_inode_t **ipp) 1066 xfs_inode_t **ipp)
1080 { 1067 {
1081 xfs_ino_t ino; 1068 xfs_ino_t ino;
1082 xfs_inode_t *ip; 1069 xfs_inode_t *ip;
1083 uint flags; 1070 uint flags;
1084 int error; 1071 int error;
1085 timespec_t tv; 1072 timespec_t tv;
1086 int filestreams = 0; 1073 int filestreams = 0;
1087 1074
1088 /* 1075 /*
1089 * Call the space management code to pick 1076 * Call the space management code to pick
1090 * the on-disk inode to be allocated. 1077 * the on-disk inode to be allocated.
1091 */ 1078 */
1092 error = xfs_dialloc(tp, pip ? pip->i_ino : 0, mode, okalloc, 1079 error = xfs_dialloc(tp, pip ? pip->i_ino : 0, mode, okalloc,
1093 ialloc_context, call_again, &ino); 1080 ialloc_context, call_again, &ino);
1094 if (error) 1081 if (error)
1095 return error; 1082 return error;
1096 if (*call_again || ino == NULLFSINO) { 1083 if (*call_again || ino == NULLFSINO) {
1097 *ipp = NULL; 1084 *ipp = NULL;
1098 return 0; 1085 return 0;
1099 } 1086 }
1100 ASSERT(*ialloc_context == NULL); 1087 ASSERT(*ialloc_context == NULL);
1101 1088
1102 /* 1089 /*
1103 * Get the in-core inode with the lock held exclusively. 1090 * Get the in-core inode with the lock held exclusively.
1104 * This is because we're setting fields here we need 1091 * This is because we're setting fields here we need
1105 * to prevent others from looking at until we're done. 1092 * to prevent others from looking at until we're done.
1106 */ 1093 */
1107 error = xfs_trans_iget(tp->t_mountp, tp, ino, 1094 error = xfs_trans_iget(tp->t_mountp, tp, ino,
1108 XFS_IGET_CREATE, XFS_ILOCK_EXCL, &ip); 1095 XFS_IGET_CREATE, XFS_ILOCK_EXCL, &ip);
1109 if (error) 1096 if (error)
1110 return error; 1097 return error;
1111 ASSERT(ip != NULL); 1098 ASSERT(ip != NULL);
1112 1099
1113 ip->i_d.di_mode = (__uint16_t)mode; 1100 ip->i_d.di_mode = (__uint16_t)mode;
1114 ip->i_d.di_onlink = 0; 1101 ip->i_d.di_onlink = 0;
1115 ip->i_d.di_nlink = nlink; 1102 ip->i_d.di_nlink = nlink;
1116 ASSERT(ip->i_d.di_nlink == nlink); 1103 ASSERT(ip->i_d.di_nlink == nlink);
1117 ip->i_d.di_uid = current_fsuid(); 1104 ip->i_d.di_uid = current_fsuid();
1118 ip->i_d.di_gid = current_fsgid(); 1105 ip->i_d.di_gid = current_fsgid();
1119 ip->i_d.di_projid = prid; 1106 ip->i_d.di_projid = prid;
1120 memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad)); 1107 memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad));
1121 1108
1122 /* 1109 /*
1123 * If the superblock version is up to where we support new format 1110 * If the superblock version is up to where we support new format
1124 * inodes and this is currently an old format inode, then change 1111 * inodes and this is currently an old format inode, then change
1125 * the inode version number now. This way we only do the conversion 1112 * the inode version number now. This way we only do the conversion
1126 * here rather than here and in the flush/logging code. 1113 * here rather than here and in the flush/logging code.
1127 */ 1114 */
1128 if (xfs_sb_version_hasnlink(&tp->t_mountp->m_sb) && 1115 if (xfs_sb_version_hasnlink(&tp->t_mountp->m_sb) &&
1129 ip->i_d.di_version == 1) { 1116 ip->i_d.di_version == 1) {
1130 ip->i_d.di_version = 2; 1117 ip->i_d.di_version = 2;
1131 /* 1118 /*
1132 * We've already zeroed the old link count, the projid field, 1119 * We've already zeroed the old link count, the projid field,
1133 * and the pad field. 1120 * and the pad field.
1134 */ 1121 */
1135 } 1122 }
1136 1123
1137 /* 1124 /*
1138 * Project ids won't be stored on disk if we are using a version 1 inode. 1125 * Project ids won't be stored on disk if we are using a version 1 inode.
1139 */ 1126 */
1140 if ((prid != 0) && (ip->i_d.di_version == 1)) 1127 if ((prid != 0) && (ip->i_d.di_version == 1))
1141 xfs_bump_ino_vers2(tp, ip); 1128 xfs_bump_ino_vers2(tp, ip);
1142 1129
1143 if (pip && XFS_INHERIT_GID(pip)) { 1130 if (pip && XFS_INHERIT_GID(pip)) {
1144 ip->i_d.di_gid = pip->i_d.di_gid; 1131 ip->i_d.di_gid = pip->i_d.di_gid;
1145 if ((pip->i_d.di_mode & S_ISGID) && (mode & S_IFMT) == S_IFDIR) { 1132 if ((pip->i_d.di_mode & S_ISGID) && (mode & S_IFMT) == S_IFDIR) {
1146 ip->i_d.di_mode |= S_ISGID; 1133 ip->i_d.di_mode |= S_ISGID;
1147 } 1134 }
1148 } 1135 }
1149 1136
1150 /* 1137 /*
1151 * If the group ID of the new file does not match the effective group 1138 * If the group ID of the new file does not match the effective group
1152 * ID or one of the supplementary group IDs, the S_ISGID bit is cleared 1139 * ID or one of the supplementary group IDs, the S_ISGID bit is cleared
1153 * (and only if the irix_sgid_inherit compatibility variable is set). 1140 * (and only if the irix_sgid_inherit compatibility variable is set).
1154 */ 1141 */
1155 if ((irix_sgid_inherit) && 1142 if ((irix_sgid_inherit) &&
1156 (ip->i_d.di_mode & S_ISGID) && 1143 (ip->i_d.di_mode & S_ISGID) &&
1157 (!in_group_p((gid_t)ip->i_d.di_gid))) { 1144 (!in_group_p((gid_t)ip->i_d.di_gid))) {
1158 ip->i_d.di_mode &= ~S_ISGID; 1145 ip->i_d.di_mode &= ~S_ISGID;
1159 } 1146 }
1160 1147
1161 ip->i_d.di_size = 0; 1148 ip->i_d.di_size = 0;
1162 ip->i_size = 0; 1149 ip->i_size = 0;
1163 ip->i_d.di_nextents = 0; 1150 ip->i_d.di_nextents = 0;
1164 ASSERT(ip->i_d.di_nblocks == 0); 1151 ASSERT(ip->i_d.di_nblocks == 0);
1165 1152
1166 nanotime(&tv); 1153 nanotime(&tv);
1167 ip->i_d.di_mtime.t_sec = (__int32_t)tv.tv_sec; 1154 ip->i_d.di_mtime.t_sec = (__int32_t)tv.tv_sec;
1168 ip->i_d.di_mtime.t_nsec = (__int32_t)tv.tv_nsec; 1155 ip->i_d.di_mtime.t_nsec = (__int32_t)tv.tv_nsec;
1169 ip->i_d.di_atime = ip->i_d.di_mtime; 1156 ip->i_d.di_atime = ip->i_d.di_mtime;
1170 ip->i_d.di_ctime = ip->i_d.di_mtime; 1157 ip->i_d.di_ctime = ip->i_d.di_mtime;
1171 1158
1172 /* 1159 /*
1173 * di_gen will have been taken care of in xfs_iread. 1160 * di_gen will have been taken care of in xfs_iread.
1174 */ 1161 */
1175 ip->i_d.di_extsize = 0; 1162 ip->i_d.di_extsize = 0;
1176 ip->i_d.di_dmevmask = 0; 1163 ip->i_d.di_dmevmask = 0;
1177 ip->i_d.di_dmstate = 0; 1164 ip->i_d.di_dmstate = 0;
1178 ip->i_d.di_flags = 0; 1165 ip->i_d.di_flags = 0;
1179 flags = XFS_ILOG_CORE; 1166 flags = XFS_ILOG_CORE;
1180 switch (mode & S_IFMT) { 1167 switch (mode & S_IFMT) {
1181 case S_IFIFO: 1168 case S_IFIFO:
1182 case S_IFCHR: 1169 case S_IFCHR:
1183 case S_IFBLK: 1170 case S_IFBLK:
1184 case S_IFSOCK: 1171 case S_IFSOCK:
1185 ip->i_d.di_format = XFS_DINODE_FMT_DEV; 1172 ip->i_d.di_format = XFS_DINODE_FMT_DEV;
1186 ip->i_df.if_u2.if_rdev = rdev; 1173 ip->i_df.if_u2.if_rdev = rdev;
1187 ip->i_df.if_flags = 0; 1174 ip->i_df.if_flags = 0;
1188 flags |= XFS_ILOG_DEV; 1175 flags |= XFS_ILOG_DEV;
1189 break; 1176 break;
1190 case S_IFREG: 1177 case S_IFREG:
1191 /* 1178 /*
1192 * we can't set up filestreams until after the VFS inode 1179 * we can't set up filestreams until after the VFS inode
1193 * is set up properly. 1180 * is set up properly.
1194 */ 1181 */
1195 if (pip && xfs_inode_is_filestream(pip)) 1182 if (pip && xfs_inode_is_filestream(pip))
1196 filestreams = 1; 1183 filestreams = 1;
1197 /* fall through */ 1184 /* fall through */
1198 case S_IFDIR: 1185 case S_IFDIR:
1199 if (pip && (pip->i_d.di_flags & XFS_DIFLAG_ANY)) { 1186 if (pip && (pip->i_d.di_flags & XFS_DIFLAG_ANY)) {
1200 uint di_flags = 0; 1187 uint di_flags = 0;
1201 1188
1202 if ((mode & S_IFMT) == S_IFDIR) { 1189 if ((mode & S_IFMT) == S_IFDIR) {
1203 if (pip->i_d.di_flags & XFS_DIFLAG_RTINHERIT) 1190 if (pip->i_d.di_flags & XFS_DIFLAG_RTINHERIT)
1204 di_flags |= XFS_DIFLAG_RTINHERIT; 1191 di_flags |= XFS_DIFLAG_RTINHERIT;
1205 if (pip->i_d.di_flags & XFS_DIFLAG_EXTSZINHERIT) { 1192 if (pip->i_d.di_flags & XFS_DIFLAG_EXTSZINHERIT) {
1206 di_flags |= XFS_DIFLAG_EXTSZINHERIT; 1193 di_flags |= XFS_DIFLAG_EXTSZINHERIT;
1207 ip->i_d.di_extsize = pip->i_d.di_extsize; 1194 ip->i_d.di_extsize = pip->i_d.di_extsize;
1208 } 1195 }
1209 } else if ((mode & S_IFMT) == S_IFREG) { 1196 } else if ((mode & S_IFMT) == S_IFREG) {
1210 if (pip->i_d.di_flags & XFS_DIFLAG_RTINHERIT) 1197 if (pip->i_d.di_flags & XFS_DIFLAG_RTINHERIT)
1211 di_flags |= XFS_DIFLAG_REALTIME; 1198 di_flags |= XFS_DIFLAG_REALTIME;
1212 if (pip->i_d.di_flags & XFS_DIFLAG_EXTSZINHERIT) { 1199 if (pip->i_d.di_flags & XFS_DIFLAG_EXTSZINHERIT) {
1213 di_flags |= XFS_DIFLAG_EXTSIZE; 1200 di_flags |= XFS_DIFLAG_EXTSIZE;
1214 ip->i_d.di_extsize = pip->i_d.di_extsize; 1201 ip->i_d.di_extsize = pip->i_d.di_extsize;
1215 } 1202 }
1216 } 1203 }
1217 if ((pip->i_d.di_flags & XFS_DIFLAG_NOATIME) && 1204 if ((pip->i_d.di_flags & XFS_DIFLAG_NOATIME) &&
1218 xfs_inherit_noatime) 1205 xfs_inherit_noatime)
1219 di_flags |= XFS_DIFLAG_NOATIME; 1206 di_flags |= XFS_DIFLAG_NOATIME;
1220 if ((pip->i_d.di_flags & XFS_DIFLAG_NODUMP) && 1207 if ((pip->i_d.di_flags & XFS_DIFLAG_NODUMP) &&
1221 xfs_inherit_nodump) 1208 xfs_inherit_nodump)
1222 di_flags |= XFS_DIFLAG_NODUMP; 1209 di_flags |= XFS_DIFLAG_NODUMP;
1223 if ((pip->i_d.di_flags & XFS_DIFLAG_SYNC) && 1210 if ((pip->i_d.di_flags & XFS_DIFLAG_SYNC) &&
1224 xfs_inherit_sync) 1211 xfs_inherit_sync)
1225 di_flags |= XFS_DIFLAG_SYNC; 1212 di_flags |= XFS_DIFLAG_SYNC;
1226 if ((pip->i_d.di_flags & XFS_DIFLAG_NOSYMLINKS) && 1213 if ((pip->i_d.di_flags & XFS_DIFLAG_NOSYMLINKS) &&
1227 xfs_inherit_nosymlinks) 1214 xfs_inherit_nosymlinks)
1228 di_flags |= XFS_DIFLAG_NOSYMLINKS; 1215 di_flags |= XFS_DIFLAG_NOSYMLINKS;
1229 if (pip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) 1216 if (pip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT)
1230 di_flags |= XFS_DIFLAG_PROJINHERIT; 1217 di_flags |= XFS_DIFLAG_PROJINHERIT;
1231 if ((pip->i_d.di_flags & XFS_DIFLAG_NODEFRAG) && 1218 if ((pip->i_d.di_flags & XFS_DIFLAG_NODEFRAG) &&
1232 xfs_inherit_nodefrag) 1219 xfs_inherit_nodefrag)
1233 di_flags |= XFS_DIFLAG_NODEFRAG; 1220 di_flags |= XFS_DIFLAG_NODEFRAG;
1234 if (pip->i_d.di_flags & XFS_DIFLAG_FILESTREAM) 1221 if (pip->i_d.di_flags & XFS_DIFLAG_FILESTREAM)
1235 di_flags |= XFS_DIFLAG_FILESTREAM; 1222 di_flags |= XFS_DIFLAG_FILESTREAM;
1236 ip->i_d.di_flags |= di_flags; 1223 ip->i_d.di_flags |= di_flags;
1237 } 1224 }
1238 /* FALLTHROUGH */ 1225 /* FALLTHROUGH */
1239 case S_IFLNK: 1226 case S_IFLNK:
1240 ip->i_d.di_format = XFS_DINODE_FMT_EXTENTS; 1227 ip->i_d.di_format = XFS_DINODE_FMT_EXTENTS;
1241 ip->i_df.if_flags = XFS_IFEXTENTS; 1228 ip->i_df.if_flags = XFS_IFEXTENTS;
1242 ip->i_df.if_bytes = ip->i_df.if_real_bytes = 0; 1229 ip->i_df.if_bytes = ip->i_df.if_real_bytes = 0;
1243 ip->i_df.if_u1.if_extents = NULL; 1230 ip->i_df.if_u1.if_extents = NULL;
1244 break; 1231 break;
1245 default: 1232 default:
1246 ASSERT(0); 1233 ASSERT(0);
1247 } 1234 }
1248 /* 1235 /*
1249 * Attribute fork settings for new inode. 1236 * Attribute fork settings for new inode.
1250 */ 1237 */
1251 ip->i_d.di_aformat = XFS_DINODE_FMT_EXTENTS; 1238 ip->i_d.di_aformat = XFS_DINODE_FMT_EXTENTS;
1252 ip->i_d.di_anextents = 0; 1239 ip->i_d.di_anextents = 0;
1253 1240
1254 /* 1241 /*
1255 * Log the new values stuffed into the inode. 1242 * Log the new values stuffed into the inode.
1256 */ 1243 */
1257 xfs_trans_log_inode(tp, ip, flags); 1244 xfs_trans_log_inode(tp, ip, flags);
1258 1245
1259 /* now that we have an i_mode we can setup inode ops and unlock */ 1246 /* now that we have an i_mode we can setup inode ops and unlock */
1260 xfs_setup_inode(ip); 1247 xfs_setup_inode(ip);
1261 1248
1262 /* now we have set up the vfs inode we can associate the filestream */ 1249 /* now we have set up the vfs inode we can associate the filestream */
1263 if (filestreams) { 1250 if (filestreams) {
1264 error = xfs_filestream_associate(pip, ip); 1251 error = xfs_filestream_associate(pip, ip);
1265 if (error < 0) 1252 if (error < 0)
1266 return -error; 1253 return -error;
1267 if (!error) 1254 if (!error)
1268 xfs_iflags_set(ip, XFS_IFILESTREAM); 1255 xfs_iflags_set(ip, XFS_IFILESTREAM);
1269 } 1256 }
1270 1257
1271 *ipp = ip; 1258 *ipp = ip;
1272 return 0; 1259 return 0;
1273 } 1260 }
1274 1261
1275 /* 1262 /*
1276 * Check to make sure that there are no blocks allocated to the 1263 * Check to make sure that there are no blocks allocated to the
1277 * file beyond the size of the file. We don't check this for 1264 * file beyond the size of the file. We don't check this for
1278 * files with fixed size extents or real time extents, but we 1265 * files with fixed size extents or real time extents, but we
1279 * at least do it for regular files. 1266 * at least do it for regular files.
1280 */ 1267 */
1281 #ifdef DEBUG 1268 #ifdef DEBUG
1282 void 1269 void
1283 xfs_isize_check( 1270 xfs_isize_check(
1284 xfs_mount_t *mp, 1271 xfs_mount_t *mp,
1285 xfs_inode_t *ip, 1272 xfs_inode_t *ip,
1286 xfs_fsize_t isize) 1273 xfs_fsize_t isize)
1287 { 1274 {
1288 xfs_fileoff_t map_first; 1275 xfs_fileoff_t map_first;
1289 int nimaps; 1276 int nimaps;
1290 xfs_bmbt_irec_t imaps[2]; 1277 xfs_bmbt_irec_t imaps[2];
1291 1278
1292 if ((ip->i_d.di_mode & S_IFMT) != S_IFREG) 1279 if ((ip->i_d.di_mode & S_IFMT) != S_IFREG)
1293 return; 1280 return;
1294 1281
1295 if (XFS_IS_REALTIME_INODE(ip)) 1282 if (XFS_IS_REALTIME_INODE(ip))
1296 return; 1283 return;
1297 1284
1298 if (ip->i_d.di_flags & XFS_DIFLAG_EXTSIZE) 1285 if (ip->i_d.di_flags & XFS_DIFLAG_EXTSIZE)
1299 return; 1286 return;
1300 1287
1301 nimaps = 2; 1288 nimaps = 2;
1302 map_first = XFS_B_TO_FSB(mp, (xfs_ufsize_t)isize); 1289 map_first = XFS_B_TO_FSB(mp, (xfs_ufsize_t)isize);
1303 /* 1290 /*
1304 * The filesystem could be shutting down, so bmapi may return 1291 * The filesystem could be shutting down, so bmapi may return
1305 * an error. 1292 * an error.
1306 */ 1293 */
1307 if (xfs_bmapi(NULL, ip, map_first, 1294 if (xfs_bmapi(NULL, ip, map_first,
1308 (XFS_B_TO_FSB(mp, 1295 (XFS_B_TO_FSB(mp,
1309 (xfs_ufsize_t)XFS_MAXIOFFSET(mp)) - 1296 (xfs_ufsize_t)XFS_MAXIOFFSET(mp)) -
1310 map_first), 1297 map_first),
1311 XFS_BMAPI_ENTIRE, NULL, 0, imaps, &nimaps, 1298 XFS_BMAPI_ENTIRE, NULL, 0, imaps, &nimaps,
1312 NULL, NULL)) 1299 NULL, NULL))
1313 return; 1300 return;
1314 ASSERT(nimaps == 1); 1301 ASSERT(nimaps == 1);
1315 ASSERT(imaps[0].br_startblock == HOLESTARTBLOCK); 1302 ASSERT(imaps[0].br_startblock == HOLESTARTBLOCK);
1316 } 1303 }
1317 #endif /* DEBUG */ 1304 #endif /* DEBUG */
1318 1305
1319 /* 1306 /*
1320 * Calculate the last possible buffered byte in a file. This must 1307 * Calculate the last possible buffered byte in a file. This must
1321 * include data that was buffered beyond the EOF by the write code. 1308 * include data that was buffered beyond the EOF by the write code.
1322 * This also needs to deal with overflowing the xfs_fsize_t type 1309 * This also needs to deal with overflowing the xfs_fsize_t type
1323 * which can happen for sizes near the limit. 1310 * which can happen for sizes near the limit.
1324 * 1311 *
1325 * We also need to take into account any blocks beyond the EOF. It 1312 * We also need to take into account any blocks beyond the EOF. It
1326 * may be the case that they were buffered by a write which failed. 1313 * may be the case that they were buffered by a write which failed.
1327 * In that case the pages will still be in memory, but the inode size 1314 * In that case the pages will still be in memory, but the inode size
1328 * will never have been updated. 1315 * will never have been updated.
1329 */ 1316 */
1330 xfs_fsize_t 1317 xfs_fsize_t
1331 xfs_file_last_byte( 1318 xfs_file_last_byte(
1332 xfs_inode_t *ip) 1319 xfs_inode_t *ip)
1333 { 1320 {
1334 xfs_mount_t *mp; 1321 xfs_mount_t *mp;
1335 xfs_fsize_t last_byte; 1322 xfs_fsize_t last_byte;
1336 xfs_fileoff_t last_block; 1323 xfs_fileoff_t last_block;
1337 xfs_fileoff_t size_last_block; 1324 xfs_fileoff_t size_last_block;
1338 int error; 1325 int error;
1339 1326
1340 ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL|XFS_IOLOCK_SHARED)); 1327 ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL|XFS_IOLOCK_SHARED));
1341 1328
1342 mp = ip->i_mount; 1329 mp = ip->i_mount;
1343 /* 1330 /*
1344 * Only check for blocks beyond the EOF if the extents have 1331 * Only check for blocks beyond the EOF if the extents have
1345 * been read in. This eliminates the need for the inode lock, 1332 * been read in. This eliminates the need for the inode lock,
1346 * and it also saves us from looking when it really isn't 1333 * and it also saves us from looking when it really isn't
1347 * necessary. 1334 * necessary.
1348 */ 1335 */
1349 if (ip->i_df.if_flags & XFS_IFEXTENTS) { 1336 if (ip->i_df.if_flags & XFS_IFEXTENTS) {
1350 error = xfs_bmap_last_offset(NULL, ip, &last_block, 1337 error = xfs_bmap_last_offset(NULL, ip, &last_block,
1351 XFS_DATA_FORK); 1338 XFS_DATA_FORK);
1352 if (error) { 1339 if (error) {
1353 last_block = 0; 1340 last_block = 0;
1354 } 1341 }
1355 } else { 1342 } else {
1356 last_block = 0; 1343 last_block = 0;
1357 } 1344 }
1358 size_last_block = XFS_B_TO_FSB(mp, (xfs_ufsize_t)ip->i_size); 1345 size_last_block = XFS_B_TO_FSB(mp, (xfs_ufsize_t)ip->i_size);
1359 last_block = XFS_FILEOFF_MAX(last_block, size_last_block); 1346 last_block = XFS_FILEOFF_MAX(last_block, size_last_block);
1360 1347
1361 last_byte = XFS_FSB_TO_B(mp, last_block); 1348 last_byte = XFS_FSB_TO_B(mp, last_block);
1362 if (last_byte < 0) { 1349 if (last_byte < 0) {
1363 return XFS_MAXIOFFSET(mp); 1350 return XFS_MAXIOFFSET(mp);
1364 } 1351 }
1365 last_byte += (1 << mp->m_writeio_log); 1352 last_byte += (1 << mp->m_writeio_log);
1366 if (last_byte < 0) { 1353 if (last_byte < 0) {
1367 return XFS_MAXIOFFSET(mp); 1354 return XFS_MAXIOFFSET(mp);
1368 } 1355 }
1369 return last_byte; 1356 return last_byte;
1370 } 1357 }
1371 1358
1372 #if defined(XFS_RW_TRACE) 1359 #if defined(XFS_RW_TRACE)
1373 STATIC void 1360 STATIC void
1374 xfs_itrunc_trace( 1361 xfs_itrunc_trace(
1375 int tag, 1362 int tag,
1376 xfs_inode_t *ip, 1363 xfs_inode_t *ip,
1377 int flag, 1364 int flag,
1378 xfs_fsize_t new_size, 1365 xfs_fsize_t new_size,
1379 xfs_off_t toss_start, 1366 xfs_off_t toss_start,
1380 xfs_off_t toss_finish) 1367 xfs_off_t toss_finish)
1381 { 1368 {
1382 if (ip->i_rwtrace == NULL) { 1369 if (ip->i_rwtrace == NULL) {
1383 return; 1370 return;
1384 } 1371 }
1385 1372
1386 ktrace_enter(ip->i_rwtrace, 1373 ktrace_enter(ip->i_rwtrace,
1387 (void*)((long)tag), 1374 (void*)((long)tag),
1388 (void*)ip, 1375 (void*)ip,
1389 (void*)(unsigned long)((ip->i_d.di_size >> 32) & 0xffffffff), 1376 (void*)(unsigned long)((ip->i_d.di_size >> 32) & 0xffffffff),
1390 (void*)(unsigned long)(ip->i_d.di_size & 0xffffffff), 1377 (void*)(unsigned long)(ip->i_d.di_size & 0xffffffff),
1391 (void*)((long)flag), 1378 (void*)((long)flag),
1392 (void*)(unsigned long)((new_size >> 32) & 0xffffffff), 1379 (void*)(unsigned long)((new_size >> 32) & 0xffffffff),
1393 (void*)(unsigned long)(new_size & 0xffffffff), 1380 (void*)(unsigned long)(new_size & 0xffffffff),
1394 (void*)(unsigned long)((toss_start >> 32) & 0xffffffff), 1381 (void*)(unsigned long)((toss_start >> 32) & 0xffffffff),
1395 (void*)(unsigned long)(toss_start & 0xffffffff), 1382 (void*)(unsigned long)(toss_start & 0xffffffff),
1396 (void*)(unsigned long)((toss_finish >> 32) & 0xffffffff), 1383 (void*)(unsigned long)((toss_finish >> 32) & 0xffffffff),
1397 (void*)(unsigned long)(toss_finish & 0xffffffff), 1384 (void*)(unsigned long)(toss_finish & 0xffffffff),
1398 (void*)(unsigned long)current_cpu(), 1385 (void*)(unsigned long)current_cpu(),
1399 (void*)(unsigned long)current_pid(), 1386 (void*)(unsigned long)current_pid(),
1400 (void*)NULL, 1387 (void*)NULL,
1401 (void*)NULL, 1388 (void*)NULL,
1402 (void*)NULL); 1389 (void*)NULL);
1403 } 1390 }
1404 #else 1391 #else
1405 #define xfs_itrunc_trace(tag, ip, flag, new_size, toss_start, toss_finish) 1392 #define xfs_itrunc_trace(tag, ip, flag, new_size, toss_start, toss_finish)
1406 #endif 1393 #endif
1407 1394
1408 /* 1395 /*
1409 * Start the truncation of the file to new_size. The new size 1396 * Start the truncation of the file to new_size. The new size
1410 * must be smaller than the current size. This routine will 1397 * must be smaller than the current size. This routine will
1411 * clear the buffer and page caches of file data in the removed 1398 * clear the buffer and page caches of file data in the removed
1412 * range, and xfs_itruncate_finish() will remove the underlying 1399 * range, and xfs_itruncate_finish() will remove the underlying
1413 * disk blocks. 1400 * disk blocks.
1414 * 1401 *
1415 * The inode must have its I/O lock locked EXCLUSIVELY, and it 1402 * The inode must have its I/O lock locked EXCLUSIVELY, and it
1416 * must NOT have the inode lock held at all. This is because we're 1403 * must NOT have the inode lock held at all. This is because we're
1417 * calling into the buffer/page cache code and we can't hold the 1404 * calling into the buffer/page cache code and we can't hold the
1418 * inode lock when we do so. 1405 * inode lock when we do so.
1419 * 1406 *
1420 * We need to wait for any direct I/Os in flight to complete before we 1407 * We need to wait for any direct I/Os in flight to complete before we
1421 * proceed with the truncate. This is needed to prevent the extents 1408 * proceed with the truncate. This is needed to prevent the extents
1422 * being read or written by the direct I/Os from being removed while the 1409 * being read or written by the direct I/Os from being removed while the
1423 * I/O is in flight as there is no other method of synchronising 1410 * I/O is in flight as there is no other method of synchronising
1424 * direct I/O with the truncate operation. Also, because we hold 1411 * direct I/O with the truncate operation. Also, because we hold
1425 * the IOLOCK in exclusive mode, we prevent new direct I/Os from being 1412 * the IOLOCK in exclusive mode, we prevent new direct I/Os from being
1426 * started until the truncate completes and drops the lock. Essentially, 1413 * started until the truncate completes and drops the lock. Essentially,
1427 * the vn_iowait() call forms an I/O barrier that provides strict ordering 1414 * the vn_iowait() call forms an I/O barrier that provides strict ordering
1428 * between direct I/Os and the truncate operation. 1415 * between direct I/Os and the truncate operation.
1429 * 1416 *
1430 * The flags parameter can have either the value XFS_ITRUNC_DEFINITE 1417 * The flags parameter can have either the value XFS_ITRUNC_DEFINITE
1431 * or XFS_ITRUNC_MAYBE. The XFS_ITRUNC_MAYBE value should be used 1418 * or XFS_ITRUNC_MAYBE. The XFS_ITRUNC_MAYBE value should be used
1432 * in the case that the caller is locking things out of order and 1419 * in the case that the caller is locking things out of order and
1433 * may not be able to call xfs_itruncate_finish() with the inode lock 1420 * may not be able to call xfs_itruncate_finish() with the inode lock
1434 * held without dropping the I/O lock. If the caller must drop the 1421 * held without dropping the I/O lock. If the caller must drop the
1435 * I/O lock before calling xfs_itruncate_finish(), then xfs_itruncate_start() 1422 * I/O lock before calling xfs_itruncate_finish(), then xfs_itruncate_start()
1436 * must be called again with all the same restrictions as the initial 1423 * must be called again with all the same restrictions as the initial
1437 * call. 1424 * call.
1438 */ 1425 */
1439 int 1426 int
1440 xfs_itruncate_start( 1427 xfs_itruncate_start(
1441 xfs_inode_t *ip, 1428 xfs_inode_t *ip,
1442 uint flags, 1429 uint flags,
1443 xfs_fsize_t new_size) 1430 xfs_fsize_t new_size)
1444 { 1431 {
1445 xfs_fsize_t last_byte; 1432 xfs_fsize_t last_byte;
1446 xfs_off_t toss_start; 1433 xfs_off_t toss_start;
1447 xfs_mount_t *mp; 1434 xfs_mount_t *mp;
1448 int error = 0; 1435 int error = 0;
1449 1436
1450 ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL)); 1437 ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
1451 ASSERT((new_size == 0) || (new_size <= ip->i_size)); 1438 ASSERT((new_size == 0) || (new_size <= ip->i_size));
1452 ASSERT((flags == XFS_ITRUNC_DEFINITE) || 1439 ASSERT((flags == XFS_ITRUNC_DEFINITE) ||
1453 (flags == XFS_ITRUNC_MAYBE)); 1440 (flags == XFS_ITRUNC_MAYBE));
1454 1441
1455 mp = ip->i_mount; 1442 mp = ip->i_mount;
1456 1443
1457 /* wait for the completion of any pending DIOs */ 1444 /* wait for the completion of any pending DIOs */
1458 if (new_size == 0 || new_size < ip->i_size) 1445 if (new_size == 0 || new_size < ip->i_size)
1459 vn_iowait(ip); 1446 vn_iowait(ip);
1460 1447
1461 /* 1448 /*
1462 * Call toss_pages or flushinval_pages to get rid of pages 1449 * Call toss_pages or flushinval_pages to get rid of pages
1463 * overlapping the region being removed. We have to use 1450 * overlapping the region being removed. We have to use
1464 * the less efficient flushinval_pages in the case that the 1451 * the less efficient flushinval_pages in the case that the
1465 * caller may not be able to finish the truncate without 1452 * caller may not be able to finish the truncate without
1466 * dropping the inode's I/O lock. Make sure 1453 * dropping the inode's I/O lock. Make sure
1467 * to catch any pages brought in by buffers overlapping 1454 * to catch any pages brought in by buffers overlapping
1468 * the EOF by searching out beyond the isize by our 1455 * the EOF by searching out beyond the isize by our
1469 * block size. We round new_size up to a block boundary 1456 * block size. We round new_size up to a block boundary
1470 * so that we don't toss things on the same block as 1457 * so that we don't toss things on the same block as
1471 * new_size but before it. 1458 * new_size but before it.
1472 * 1459 *
1473 * Before calling toss_page or flushinval_pages, make sure to 1460 * Before calling toss_page or flushinval_pages, make sure to
1474 * call remapf() over the same region if the file is mapped. 1461 * call remapf() over the same region if the file is mapped.
1475 * This frees up mapped file references to the pages in the 1462 * This frees up mapped file references to the pages in the
1476 * given range and for the flushinval_pages case it ensures 1463 * given range and for the flushinval_pages case it ensures
1477 * that we get the latest mapped changes flushed out. 1464 * that we get the latest mapped changes flushed out.
1478 */ 1465 */
1479 toss_start = XFS_B_TO_FSB(mp, (xfs_ufsize_t)new_size); 1466 toss_start = XFS_B_TO_FSB(mp, (xfs_ufsize_t)new_size);
1480 toss_start = XFS_FSB_TO_B(mp, toss_start); 1467 toss_start = XFS_FSB_TO_B(mp, toss_start);
1481 if (toss_start < 0) { 1468 if (toss_start < 0) {
1482 /* 1469 /*
1483 * The place to start tossing is beyond our maximum 1470 * The place to start tossing is beyond our maximum
1484 * file size, so there is no way that the data extended 1471 * file size, so there is no way that the data extended
1485 * out there. 1472 * out there.
1486 */ 1473 */
1487 return 0; 1474 return 0;
1488 } 1475 }
1489 last_byte = xfs_file_last_byte(ip); 1476 last_byte = xfs_file_last_byte(ip);
1490 xfs_itrunc_trace(XFS_ITRUNC_START, ip, flags, new_size, toss_start, 1477 xfs_itrunc_trace(XFS_ITRUNC_START, ip, flags, new_size, toss_start,
1491 last_byte); 1478 last_byte);
1492 if (last_byte > toss_start) { 1479 if (last_byte > toss_start) {
1493 if (flags & XFS_ITRUNC_DEFINITE) { 1480 if (flags & XFS_ITRUNC_DEFINITE) {
1494 xfs_tosspages(ip, toss_start, 1481 xfs_tosspages(ip, toss_start,
1495 -1, FI_REMAPF_LOCKED); 1482 -1, FI_REMAPF_LOCKED);
1496 } else { 1483 } else {
1497 error = xfs_flushinval_pages(ip, toss_start, 1484 error = xfs_flushinval_pages(ip, toss_start,
1498 -1, FI_REMAPF_LOCKED); 1485 -1, FI_REMAPF_LOCKED);
1499 } 1486 }
1500 } 1487 }
1501 1488
1502 #ifdef DEBUG 1489 #ifdef DEBUG
1503 if (new_size == 0) { 1490 if (new_size == 0) {
1504 ASSERT(VN_CACHED(VFS_I(ip)) == 0); 1491 ASSERT(VN_CACHED(VFS_I(ip)) == 0);
1505 } 1492 }
1506 #endif 1493 #endif
1507 return error; 1494 return error;
1508 } 1495 }
1509 1496
1510 /* 1497 /*
1511 * Shrink the file to the given new_size. The new size must be smaller than 1498 * Shrink the file to the given new_size. The new size must be smaller than
1512 * the current size. This will free up the underlying blocks in the removed 1499 * the current size. This will free up the underlying blocks in the removed
1513 * range after a call to xfs_itruncate_start() or xfs_atruncate_start(). 1500 * range after a call to xfs_itruncate_start() or xfs_atruncate_start().
1514 * 1501 *
1515 * The transaction passed to this routine must have made a permanent log 1502 * The transaction passed to this routine must have made a permanent log
1516 * reservation of at least XFS_ITRUNCATE_LOG_RES. This routine may commit the 1503 * reservation of at least XFS_ITRUNCATE_LOG_RES. This routine may commit the
1517 * given transaction and start new ones, so make sure everything involved in 1504 * given transaction and start new ones, so make sure everything involved in
1518 * the transaction is tidy before calling here. Some transaction will be 1505 * the transaction is tidy before calling here. Some transaction will be
1519 * returned to the caller to be committed. The incoming transaction must 1506 * returned to the caller to be committed. The incoming transaction must
1520 * already include the inode, and both inode locks must be held exclusively. 1507 * already include the inode, and both inode locks must be held exclusively.
1521 * The inode must also be "held" within the transaction. On return the inode 1508 * The inode must also be "held" within the transaction. On return the inode
1522 * will be "held" within the returned transaction. This routine does NOT 1509 * will be "held" within the returned transaction. This routine does NOT
1523 * require any disk space to be reserved for it within the transaction. 1510 * require any disk space to be reserved for it within the transaction.
1524 * 1511 *
1525 * The fork parameter must be either xfs_attr_fork or xfs_data_fork, and it 1512 * The fork parameter must be either xfs_attr_fork or xfs_data_fork, and it
1526 * indicates the fork which is to be truncated. For the attribute fork we only 1513 * indicates the fork which is to be truncated. For the attribute fork we only
1527 * support truncation to size 0. 1514 * support truncation to size 0.
1528 * 1515 *
1529 * We use the sync parameter to indicate whether or not the first transaction 1516 * We use the sync parameter to indicate whether or not the first transaction
1530 * we perform might have to be synchronous. For the attr fork, it needs to be 1517 * we perform might have to be synchronous. For the attr fork, it needs to be
1531 * so if the unlink of the inode is not yet known to be permanent in the log. 1518 * so if the unlink of the inode is not yet known to be permanent in the log.
1532 * This keeps us from freeing and reusing the blocks of the attribute fork 1519 * This keeps us from freeing and reusing the blocks of the attribute fork
1533 * before the unlink of the inode becomes permanent. 1520 * before the unlink of the inode becomes permanent.
1534 * 1521 *
1535 * For the data fork, we normally have to run synchronously if we're being 1522 * For the data fork, we normally have to run synchronously if we're being
1536 * called out of the inactive path or we're being called out of the create path 1523 * called out of the inactive path or we're being called out of the create path
1537 * where we're truncating an existing file. Either way, the truncate needs to 1524 * where we're truncating an existing file. Either way, the truncate needs to
1538 * be sync so blocks don't reappear in the file with altered data in case of a 1525 * be sync so blocks don't reappear in the file with altered data in case of a
1539 * crash. wsync filesystems can run the first case async because anything that 1526 * crash. wsync filesystems can run the first case async because anything that
1540 * shrinks the inode has to run sync so by the time we're called here from 1527 * shrinks the inode has to run sync so by the time we're called here from
1541 * inactive, the inode size is permanently set to 0. 1528 * inactive, the inode size is permanently set to 0.
1542 * 1529 *
1543 * Calls from the truncate path always need to be sync unless we're in a wsync 1530 * Calls from the truncate path always need to be sync unless we're in a wsync
1544 * filesystem and the file has already been unlinked. 1531 * filesystem and the file has already been unlinked.
1545 * 1532 *
1546 * The caller is responsible for correctly setting the sync parameter. It gets 1533 * The caller is responsible for correctly setting the sync parameter. It gets
1547 * too hard for us to guess here which path we're being called out of just 1534 * too hard for us to guess here which path we're being called out of just
1548 * based on inode state. 1535 * based on inode state.
1549 * 1536 *
1550 * If we get an error, we must return with the inode locked and linked into the 1537 * If we get an error, we must return with the inode locked and linked into the
1551 * current transaction. This keeps things simple for the higher level code, 1538 * current transaction. This keeps things simple for the higher level code,
1552 * because it always knows that the inode is locked and held in the transaction 1539 * because it always knows that the inode is locked and held in the transaction
1553 * that returns to it whether errors occur or not. We don't mark the inode 1540 * that returns to it whether errors occur or not. We don't mark the inode
1554 * dirty on error so that transactions can be easily aborted if possible. 1541 * dirty on error so that transactions can be easily aborted if possible.
1555 */ 1542 */
1556 int 1543 int
1557 xfs_itruncate_finish( 1544 xfs_itruncate_finish(
1558 xfs_trans_t **tp, 1545 xfs_trans_t **tp,
1559 xfs_inode_t *ip, 1546 xfs_inode_t *ip,
1560 xfs_fsize_t new_size, 1547 xfs_fsize_t new_size,
1561 int fork, 1548 int fork,
1562 int sync) 1549 int sync)
1563 { 1550 {
1564 xfs_fsblock_t first_block; 1551 xfs_fsblock_t first_block;
1565 xfs_fileoff_t first_unmap_block; 1552 xfs_fileoff_t first_unmap_block;
1566 xfs_fileoff_t last_block; 1553 xfs_fileoff_t last_block;
1567 xfs_filblks_t unmap_len=0; 1554 xfs_filblks_t unmap_len=0;
1568 xfs_mount_t *mp; 1555 xfs_mount_t *mp;
1569 xfs_trans_t *ntp; 1556 xfs_trans_t *ntp;
1570 int done; 1557 int done;
1571 int committed; 1558 int committed;
1572 xfs_bmap_free_t free_list; 1559 xfs_bmap_free_t free_list;
1573 int error; 1560 int error;
1574 1561
1575 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL)); 1562 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL));
1576 ASSERT((new_size == 0) || (new_size <= ip->i_size)); 1563 ASSERT((new_size == 0) || (new_size <= ip->i_size));
1577 ASSERT(*tp != NULL); 1564 ASSERT(*tp != NULL);
1578 ASSERT((*tp)->t_flags & XFS_TRANS_PERM_LOG_RES); 1565 ASSERT((*tp)->t_flags & XFS_TRANS_PERM_LOG_RES);
1579 ASSERT(ip->i_transp == *tp); 1566 ASSERT(ip->i_transp == *tp);
1580 ASSERT(ip->i_itemp != NULL); 1567 ASSERT(ip->i_itemp != NULL);
1581 ASSERT(ip->i_itemp->ili_flags & XFS_ILI_HOLD); 1568 ASSERT(ip->i_itemp->ili_flags & XFS_ILI_HOLD);
1582 1569
1583 1570
1584 ntp = *tp; 1571 ntp = *tp;
1585 mp = (ntp)->t_mountp; 1572 mp = (ntp)->t_mountp;
1586 ASSERT(! XFS_NOT_DQATTACHED(mp, ip)); 1573 ASSERT(! XFS_NOT_DQATTACHED(mp, ip));
1587 1574
1588 /* 1575 /*
1589 * We only support truncating the entire attribute fork. 1576 * We only support truncating the entire attribute fork.
1590 */ 1577 */
1591 if (fork == XFS_ATTR_FORK) { 1578 if (fork == XFS_ATTR_FORK) {
1592 new_size = 0LL; 1579 new_size = 0LL;
1593 } 1580 }
1594 first_unmap_block = XFS_B_TO_FSB(mp, (xfs_ufsize_t)new_size); 1581 first_unmap_block = XFS_B_TO_FSB(mp, (xfs_ufsize_t)new_size);
1595 xfs_itrunc_trace(XFS_ITRUNC_FINISH1, ip, 0, new_size, 0, 0); 1582 xfs_itrunc_trace(XFS_ITRUNC_FINISH1, ip, 0, new_size, 0, 0);
1596 /* 1583 /*
1597 * The first thing we do is set the size to new_size permanently 1584 * The first thing we do is set the size to new_size permanently
1598 * on disk. This way we don't have to worry about anyone ever 1585 * on disk. This way we don't have to worry about anyone ever
1599 * being able to look at the data being freed even in the face 1586 * being able to look at the data being freed even in the face
1600 * of a crash. What we're getting around here is the case where 1587 * of a crash. What we're getting around here is the case where
1601 * we free a block, it is allocated to another file, it is written 1588 * we free a block, it is allocated to another file, it is written
1602 * to, and then we crash. If the new data gets written to the 1589 * to, and then we crash. If the new data gets written to the
1603 * file but the log buffers containing the free and reallocation 1590 * file but the log buffers containing the free and reallocation
1604 * don't, then we'd end up with garbage in the blocks being freed. 1591 * don't, then we'd end up with garbage in the blocks being freed.
1605 * As long as we make the new_size permanent before actually 1592 * As long as we make the new_size permanent before actually
1606 * freeing any blocks it doesn't matter if they get writtten to. 1593 * freeing any blocks it doesn't matter if they get writtten to.
1607 * 1594 *
1608 * The callers must signal into us whether or not the size 1595 * The callers must signal into us whether or not the size
1609 * setting here must be synchronous. There are a few cases 1596 * setting here must be synchronous. There are a few cases
1610 * where it doesn't have to be synchronous. Those cases 1597 * where it doesn't have to be synchronous. Those cases
1611 * occur if the file is unlinked and we know the unlink is 1598 * occur if the file is unlinked and we know the unlink is
1612 * permanent or if the blocks being truncated are guaranteed 1599 * permanent or if the blocks being truncated are guaranteed
1613 * to be beyond the inode eof (regardless of the link count) 1600 * to be beyond the inode eof (regardless of the link count)
1614 * and the eof value is permanent. Both of these cases occur 1601 * and the eof value is permanent. Both of these cases occur
1615 * only on wsync-mounted filesystems. In those cases, we're 1602 * only on wsync-mounted filesystems. In those cases, we're
1616 * guaranteed that no user will ever see the data in the blocks 1603 * guaranteed that no user will ever see the data in the blocks
1617 * that are being truncated so the truncate can run async. 1604 * that are being truncated so the truncate can run async.
1618 * In the free beyond eof case, the file may wind up with 1605 * In the free beyond eof case, the file may wind up with
1619 * more blocks allocated to it than it needs if we crash 1606 * more blocks allocated to it than it needs if we crash
1620 * and that won't get fixed until the next time the file 1607 * and that won't get fixed until the next time the file
1621 * is re-opened and closed but that's ok as that shouldn't 1608 * is re-opened and closed but that's ok as that shouldn't
1622 * be too many blocks. 1609 * be too many blocks.
1623 * 1610 *
1624 * However, we can't just make all wsync xactions run async 1611 * However, we can't just make all wsync xactions run async
1625 * because there's one call out of the create path that needs 1612 * because there's one call out of the create path that needs
1626 * to run sync where it's truncating an existing file to size 1613 * to run sync where it's truncating an existing file to size
1627 * 0 whose size is > 0. 1614 * 0 whose size is > 0.
1628 * 1615 *
1629 * It's probably possible to come up with a test in this 1616 * It's probably possible to come up with a test in this
1630 * routine that would correctly distinguish all the above 1617 * routine that would correctly distinguish all the above
1631 * cases from the values of the function parameters and the 1618 * cases from the values of the function parameters and the
1632 * inode state but for sanity's sake, I've decided to let the 1619 * inode state but for sanity's sake, I've decided to let the
1633 * layers above just tell us. It's simpler to correctly figure 1620 * layers above just tell us. It's simpler to correctly figure
1634 * out in the layer above exactly under what conditions we 1621 * out in the layer above exactly under what conditions we
1635 * can run async and I think it's easier for others read and 1622 * can run async and I think it's easier for others read and
1636 * follow the logic in case something has to be changed. 1623 * follow the logic in case something has to be changed.
1637 * cscope is your friend -- rcc. 1624 * cscope is your friend -- rcc.
1638 * 1625 *
1639 * The attribute fork is much simpler. 1626 * The attribute fork is much simpler.
1640 * 1627 *
1641 * For the attribute fork we allow the caller to tell us whether 1628 * For the attribute fork we allow the caller to tell us whether
1642 * the unlink of the inode that led to this call is yet permanent 1629 * the unlink of the inode that led to this call is yet permanent
1643 * in the on disk log. If it is not and we will be freeing extents 1630 * in the on disk log. If it is not and we will be freeing extents
1644 * in this inode then we make the first transaction synchronous 1631 * in this inode then we make the first transaction synchronous
1645 * to make sure that the unlink is permanent by the time we free 1632 * to make sure that the unlink is permanent by the time we free
1646 * the blocks. 1633 * the blocks.
1647 */ 1634 */
1648 if (fork == XFS_DATA_FORK) { 1635 if (fork == XFS_DATA_FORK) {
1649 if (ip->i_d.di_nextents > 0) { 1636 if (ip->i_d.di_nextents > 0) {
1650 /* 1637 /*
1651 * If we are not changing the file size then do 1638 * If we are not changing the file size then do
1652 * not update the on-disk file size - we may be 1639 * not update the on-disk file size - we may be
1653 * called from xfs_inactive_free_eofblocks(). If we 1640 * called from xfs_inactive_free_eofblocks(). If we
1654 * update the on-disk file size and then the system 1641 * update the on-disk file size and then the system
1655 * crashes before the contents of the file are 1642 * crashes before the contents of the file are
1656 * flushed to disk then the files may be full of 1643 * flushed to disk then the files may be full of
1657 * holes (ie NULL files bug). 1644 * holes (ie NULL files bug).
1658 */ 1645 */
1659 if (ip->i_size != new_size) { 1646 if (ip->i_size != new_size) {
1660 ip->i_d.di_size = new_size; 1647 ip->i_d.di_size = new_size;
1661 ip->i_size = new_size; 1648 ip->i_size = new_size;
1662 xfs_trans_log_inode(ntp, ip, XFS_ILOG_CORE); 1649 xfs_trans_log_inode(ntp, ip, XFS_ILOG_CORE);
1663 } 1650 }
1664 } 1651 }
1665 } else if (sync) { 1652 } else if (sync) {
1666 ASSERT(!(mp->m_flags & XFS_MOUNT_WSYNC)); 1653 ASSERT(!(mp->m_flags & XFS_MOUNT_WSYNC));
1667 if (ip->i_d.di_anextents > 0) 1654 if (ip->i_d.di_anextents > 0)
1668 xfs_trans_set_sync(ntp); 1655 xfs_trans_set_sync(ntp);
1669 } 1656 }
1670 ASSERT(fork == XFS_DATA_FORK || 1657 ASSERT(fork == XFS_DATA_FORK ||
1671 (fork == XFS_ATTR_FORK && 1658 (fork == XFS_ATTR_FORK &&
1672 ((sync && !(mp->m_flags & XFS_MOUNT_WSYNC)) || 1659 ((sync && !(mp->m_flags & XFS_MOUNT_WSYNC)) ||
1673 (sync == 0 && (mp->m_flags & XFS_MOUNT_WSYNC))))); 1660 (sync == 0 && (mp->m_flags & XFS_MOUNT_WSYNC)))));
1674 1661
1675 /* 1662 /*
1676 * Since it is possible for space to become allocated beyond 1663 * Since it is possible for space to become allocated beyond
1677 * the end of the file (in a crash where the space is allocated 1664 * the end of the file (in a crash where the space is allocated
1678 * but the inode size is not yet updated), simply remove any 1665 * but the inode size is not yet updated), simply remove any
1679 * blocks which show up between the new EOF and the maximum 1666 * blocks which show up between the new EOF and the maximum
1680 * possible file size. If the first block to be removed is 1667 * possible file size. If the first block to be removed is
1681 * beyond the maximum file size (ie it is the same as last_block), 1668 * beyond the maximum file size (ie it is the same as last_block),
1682 * then there is nothing to do. 1669 * then there is nothing to do.
1683 */ 1670 */
1684 last_block = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_MAXIOFFSET(mp)); 1671 last_block = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_MAXIOFFSET(mp));
1685 ASSERT(first_unmap_block <= last_block); 1672 ASSERT(first_unmap_block <= last_block);
1686 done = 0; 1673 done = 0;
1687 if (last_block == first_unmap_block) { 1674 if (last_block == first_unmap_block) {
1688 done = 1; 1675 done = 1;
1689 } else { 1676 } else {
1690 unmap_len = last_block - first_unmap_block + 1; 1677 unmap_len = last_block - first_unmap_block + 1;
1691 } 1678 }
1692 while (!done) { 1679 while (!done) {
1693 /* 1680 /*
1694 * Free up up to XFS_ITRUNC_MAX_EXTENTS. xfs_bunmapi() 1681 * Free up up to XFS_ITRUNC_MAX_EXTENTS. xfs_bunmapi()
1695 * will tell us whether it freed the entire range or 1682 * will tell us whether it freed the entire range or
1696 * not. If this is a synchronous mount (wsync), 1683 * not. If this is a synchronous mount (wsync),
1697 * then we can tell bunmapi to keep all the 1684 * then we can tell bunmapi to keep all the
1698 * transactions asynchronous since the unlink 1685 * transactions asynchronous since the unlink
1699 * transaction that made this inode inactive has 1686 * transaction that made this inode inactive has
1700 * already hit the disk. There's no danger of 1687 * already hit the disk. There's no danger of
1701 * the freed blocks being reused, there being a 1688 * the freed blocks being reused, there being a
1702 * crash, and the reused blocks suddenly reappearing 1689 * crash, and the reused blocks suddenly reappearing
1703 * in this file with garbage in them once recovery 1690 * in this file with garbage in them once recovery
1704 * runs. 1691 * runs.
1705 */ 1692 */
1706 XFS_BMAP_INIT(&free_list, &first_block); 1693 XFS_BMAP_INIT(&free_list, &first_block);
1707 error = xfs_bunmapi(ntp, ip, 1694 error = xfs_bunmapi(ntp, ip,
1708 first_unmap_block, unmap_len, 1695 first_unmap_block, unmap_len,
1709 XFS_BMAPI_AFLAG(fork) | 1696 XFS_BMAPI_AFLAG(fork) |
1710 (sync ? 0 : XFS_BMAPI_ASYNC), 1697 (sync ? 0 : XFS_BMAPI_ASYNC),
1711 XFS_ITRUNC_MAX_EXTENTS, 1698 XFS_ITRUNC_MAX_EXTENTS,
1712 &first_block, &free_list, 1699 &first_block, &free_list,
1713 NULL, &done); 1700 NULL, &done);
1714 if (error) { 1701 if (error) {
1715 /* 1702 /*
1716 * If the bunmapi call encounters an error, 1703 * If the bunmapi call encounters an error,
1717 * return to the caller where the transaction 1704 * return to the caller where the transaction
1718 * can be properly aborted. We just need to 1705 * can be properly aborted. We just need to
1719 * make sure we're not holding any resources 1706 * make sure we're not holding any resources
1720 * that we were not when we came in. 1707 * that we were not when we came in.
1721 */ 1708 */
1722 xfs_bmap_cancel(&free_list); 1709 xfs_bmap_cancel(&free_list);
1723 return error; 1710 return error;
1724 } 1711 }
1725 1712
1726 /* 1713 /*
1727 * Duplicate the transaction that has the permanent 1714 * Duplicate the transaction that has the permanent
1728 * reservation and commit the old transaction. 1715 * reservation and commit the old transaction.
1729 */ 1716 */
1730 error = xfs_bmap_finish(tp, &free_list, &committed); 1717 error = xfs_bmap_finish(tp, &free_list, &committed);
1731 ntp = *tp; 1718 ntp = *tp;
1732 if (committed) { 1719 if (committed) {
1733 /* link the inode into the next xact in the chain */ 1720 /* link the inode into the next xact in the chain */
1734 xfs_trans_ijoin(ntp, ip, 1721 xfs_trans_ijoin(ntp, ip,
1735 XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); 1722 XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
1736 xfs_trans_ihold(ntp, ip); 1723 xfs_trans_ihold(ntp, ip);
1737 } 1724 }
1738 1725
1739 if (error) { 1726 if (error) {
1740 /* 1727 /*
1741 * If the bmap finish call encounters an error, return 1728 * If the bmap finish call encounters an error, return
1742 * to the caller where the transaction can be properly 1729 * to the caller where the transaction can be properly
1743 * aborted. We just need to make sure we're not 1730 * aborted. We just need to make sure we're not
1744 * holding any resources that we were not when we came 1731 * holding any resources that we were not when we came
1745 * in. 1732 * in.
1746 * 1733 *
1747 * Aborting from this point might lose some blocks in 1734 * Aborting from this point might lose some blocks in
1748 * the file system, but oh well. 1735 * the file system, but oh well.
1749 */ 1736 */
1750 xfs_bmap_cancel(&free_list); 1737 xfs_bmap_cancel(&free_list);
1751 return error; 1738 return error;
1752 } 1739 }
1753 1740
1754 if (committed) { 1741 if (committed) {
1755 /* 1742 /*
1756 * Mark the inode dirty so it will be logged and 1743 * Mark the inode dirty so it will be logged and
1757 * moved forward in the log as part of every commit. 1744 * moved forward in the log as part of every commit.
1758 */ 1745 */
1759 xfs_trans_log_inode(ntp, ip, XFS_ILOG_CORE); 1746 xfs_trans_log_inode(ntp, ip, XFS_ILOG_CORE);
1760 } 1747 }
1761 1748
1762 ntp = xfs_trans_dup(ntp); 1749 ntp = xfs_trans_dup(ntp);
1763 error = xfs_trans_commit(*tp, 0); 1750 error = xfs_trans_commit(*tp, 0);
1764 *tp = ntp; 1751 *tp = ntp;
1765 1752
1766 /* link the inode into the next transaction in the chain */ 1753 /* link the inode into the next transaction in the chain */
1767 xfs_trans_ijoin(ntp, ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); 1754 xfs_trans_ijoin(ntp, ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
1768 xfs_trans_ihold(ntp, ip); 1755 xfs_trans_ihold(ntp, ip);
1769 1756
1770 if (error) 1757 if (error)
1771 return error; 1758 return error;
1772 /* 1759 /*
1773 * transaction commit worked ok so we can drop the extra ticket 1760 * transaction commit worked ok so we can drop the extra ticket
1774 * reference that we gained in xfs_trans_dup() 1761 * reference that we gained in xfs_trans_dup()
1775 */ 1762 */
1776 xfs_log_ticket_put(ntp->t_ticket); 1763 xfs_log_ticket_put(ntp->t_ticket);
1777 error = xfs_trans_reserve(ntp, 0, 1764 error = xfs_trans_reserve(ntp, 0,
1778 XFS_ITRUNCATE_LOG_RES(mp), 0, 1765 XFS_ITRUNCATE_LOG_RES(mp), 0,
1779 XFS_TRANS_PERM_LOG_RES, 1766 XFS_TRANS_PERM_LOG_RES,
1780 XFS_ITRUNCATE_LOG_COUNT); 1767 XFS_ITRUNCATE_LOG_COUNT);
1781 if (error) 1768 if (error)
1782 return error; 1769 return error;
1783 } 1770 }
1784 /* 1771 /*
1785 * Only update the size in the case of the data fork, but 1772 * Only update the size in the case of the data fork, but
1786 * always re-log the inode so that our permanent transaction 1773 * always re-log the inode so that our permanent transaction
1787 * can keep on rolling it forward in the log. 1774 * can keep on rolling it forward in the log.
1788 */ 1775 */
1789 if (fork == XFS_DATA_FORK) { 1776 if (fork == XFS_DATA_FORK) {
1790 xfs_isize_check(mp, ip, new_size); 1777 xfs_isize_check(mp, ip, new_size);
1791 /* 1778 /*
1792 * If we are not changing the file size then do 1779 * If we are not changing the file size then do
1793 * not update the on-disk file size - we may be 1780 * not update the on-disk file size - we may be
1794 * called from xfs_inactive_free_eofblocks(). If we 1781 * called from xfs_inactive_free_eofblocks(). If we
1795 * update the on-disk file size and then the system 1782 * update the on-disk file size and then the system
1796 * crashes before the contents of the file are 1783 * crashes before the contents of the file are
1797 * flushed to disk then the files may be full of 1784 * flushed to disk then the files may be full of
1798 * holes (ie NULL files bug). 1785 * holes (ie NULL files bug).
1799 */ 1786 */
1800 if (ip->i_size != new_size) { 1787 if (ip->i_size != new_size) {
1801 ip->i_d.di_size = new_size; 1788 ip->i_d.di_size = new_size;
1802 ip->i_size = new_size; 1789 ip->i_size = new_size;
1803 } 1790 }
1804 } 1791 }
1805 xfs_trans_log_inode(ntp, ip, XFS_ILOG_CORE); 1792 xfs_trans_log_inode(ntp, ip, XFS_ILOG_CORE);
1806 ASSERT((new_size != 0) || 1793 ASSERT((new_size != 0) ||
1807 (fork == XFS_ATTR_FORK) || 1794 (fork == XFS_ATTR_FORK) ||
1808 (ip->i_delayed_blks == 0)); 1795 (ip->i_delayed_blks == 0));
1809 ASSERT((new_size != 0) || 1796 ASSERT((new_size != 0) ||
1810 (fork == XFS_ATTR_FORK) || 1797 (fork == XFS_ATTR_FORK) ||
1811 (ip->i_d.di_nextents == 0)); 1798 (ip->i_d.di_nextents == 0));
1812 xfs_itrunc_trace(XFS_ITRUNC_FINISH2, ip, 0, new_size, 0, 0); 1799 xfs_itrunc_trace(XFS_ITRUNC_FINISH2, ip, 0, new_size, 0, 0);
1813 return 0; 1800 return 0;
1814 } 1801 }
1815 1802
1816 /* 1803 /*
1817 * This is called when the inode's link count goes to 0. 1804 * This is called when the inode's link count goes to 0.
1818 * We place the on-disk inode on a list in the AGI. It 1805 * We place the on-disk inode on a list in the AGI. It
1819 * will be pulled from this list when the inode is freed. 1806 * will be pulled from this list when the inode is freed.
1820 */ 1807 */
1821 int 1808 int
1822 xfs_iunlink( 1809 xfs_iunlink(
1823 xfs_trans_t *tp, 1810 xfs_trans_t *tp,
1824 xfs_inode_t *ip) 1811 xfs_inode_t *ip)
1825 { 1812 {
1826 xfs_mount_t *mp; 1813 xfs_mount_t *mp;
1827 xfs_agi_t *agi; 1814 xfs_agi_t *agi;
1828 xfs_dinode_t *dip; 1815 xfs_dinode_t *dip;
1829 xfs_buf_t *agibp; 1816 xfs_buf_t *agibp;
1830 xfs_buf_t *ibp; 1817 xfs_buf_t *ibp;
1831 xfs_agino_t agino; 1818 xfs_agino_t agino;
1832 short bucket_index; 1819 short bucket_index;
1833 int offset; 1820 int offset;
1834 int error; 1821 int error;
1835 1822
1836 ASSERT(ip->i_d.di_nlink == 0); 1823 ASSERT(ip->i_d.di_nlink == 0);
1837 ASSERT(ip->i_d.di_mode != 0); 1824 ASSERT(ip->i_d.di_mode != 0);
1838 ASSERT(ip->i_transp == tp); 1825 ASSERT(ip->i_transp == tp);
1839 1826
1840 mp = tp->t_mountp; 1827 mp = tp->t_mountp;
1841 1828
1842 /* 1829 /*
1843 * Get the agi buffer first. It ensures lock ordering 1830 * Get the agi buffer first. It ensures lock ordering
1844 * on the list. 1831 * on the list.
1845 */ 1832 */
1846 error = xfs_read_agi(mp, tp, XFS_INO_TO_AGNO(mp, ip->i_ino), &agibp); 1833 error = xfs_read_agi(mp, tp, XFS_INO_TO_AGNO(mp, ip->i_ino), &agibp);
1847 if (error) 1834 if (error)
1848 return error; 1835 return error;
1849 agi = XFS_BUF_TO_AGI(agibp); 1836 agi = XFS_BUF_TO_AGI(agibp);
1850 1837
1851 /* 1838 /*
1852 * Get the index into the agi hash table for the 1839 * Get the index into the agi hash table for the
1853 * list this inode will go on. 1840 * list this inode will go on.
1854 */ 1841 */
1855 agino = XFS_INO_TO_AGINO(mp, ip->i_ino); 1842 agino = XFS_INO_TO_AGINO(mp, ip->i_ino);
1856 ASSERT(agino != 0); 1843 ASSERT(agino != 0);
1857 bucket_index = agino % XFS_AGI_UNLINKED_BUCKETS; 1844 bucket_index = agino % XFS_AGI_UNLINKED_BUCKETS;
1858 ASSERT(agi->agi_unlinked[bucket_index]); 1845 ASSERT(agi->agi_unlinked[bucket_index]);
1859 ASSERT(be32_to_cpu(agi->agi_unlinked[bucket_index]) != agino); 1846 ASSERT(be32_to_cpu(agi->agi_unlinked[bucket_index]) != agino);
1860 1847
1861 if (be32_to_cpu(agi->agi_unlinked[bucket_index]) != NULLAGINO) { 1848 if (be32_to_cpu(agi->agi_unlinked[bucket_index]) != NULLAGINO) {
1862 /* 1849 /*
1863 * There is already another inode in the bucket we need 1850 * There is already another inode in the bucket we need
1864 * to add ourselves to. Add us at the front of the list. 1851 * to add ourselves to. Add us at the front of the list.
1865 * Here we put the head pointer into our next pointer, 1852 * Here we put the head pointer into our next pointer,
1866 * and then we fall through to point the head at us. 1853 * and then we fall through to point the head at us.
1867 */ 1854 */
1868 error = xfs_itobp(mp, tp, ip, &dip, &ibp, XFS_BUF_LOCK); 1855 error = xfs_itobp(mp, tp, ip, &dip, &ibp, XFS_BUF_LOCK);
1869 if (error) 1856 if (error)
1870 return error; 1857 return error;
1871 1858
1872 ASSERT(be32_to_cpu(dip->di_next_unlinked) == NULLAGINO); 1859 ASSERT(be32_to_cpu(dip->di_next_unlinked) == NULLAGINO);
1873 /* both on-disk, don't endian flip twice */ 1860 /* both on-disk, don't endian flip twice */
1874 dip->di_next_unlinked = agi->agi_unlinked[bucket_index]; 1861 dip->di_next_unlinked = agi->agi_unlinked[bucket_index];
1875 offset = ip->i_boffset + 1862 offset = ip->i_imap.im_boffset +
1876 offsetof(xfs_dinode_t, di_next_unlinked); 1863 offsetof(xfs_dinode_t, di_next_unlinked);
1877 xfs_trans_inode_buf(tp, ibp); 1864 xfs_trans_inode_buf(tp, ibp);
1878 xfs_trans_log_buf(tp, ibp, offset, 1865 xfs_trans_log_buf(tp, ibp, offset,
1879 (offset + sizeof(xfs_agino_t) - 1)); 1866 (offset + sizeof(xfs_agino_t) - 1));
1880 xfs_inobp_check(mp, ibp); 1867 xfs_inobp_check(mp, ibp);
1881 } 1868 }
1882 1869
1883 /* 1870 /*
1884 * Point the bucket head pointer at the inode being inserted. 1871 * Point the bucket head pointer at the inode being inserted.
1885 */ 1872 */
1886 ASSERT(agino != 0); 1873 ASSERT(agino != 0);
1887 agi->agi_unlinked[bucket_index] = cpu_to_be32(agino); 1874 agi->agi_unlinked[bucket_index] = cpu_to_be32(agino);
1888 offset = offsetof(xfs_agi_t, agi_unlinked) + 1875 offset = offsetof(xfs_agi_t, agi_unlinked) +
1889 (sizeof(xfs_agino_t) * bucket_index); 1876 (sizeof(xfs_agino_t) * bucket_index);
1890 xfs_trans_log_buf(tp, agibp, offset, 1877 xfs_trans_log_buf(tp, agibp, offset,
1891 (offset + sizeof(xfs_agino_t) - 1)); 1878 (offset + sizeof(xfs_agino_t) - 1));
1892 return 0; 1879 return 0;
1893 } 1880 }
1894 1881
1895 /* 1882 /*
1896 * Pull the on-disk inode from the AGI unlinked list. 1883 * Pull the on-disk inode from the AGI unlinked list.
1897 */ 1884 */
1898 STATIC int 1885 STATIC int
1899 xfs_iunlink_remove( 1886 xfs_iunlink_remove(
1900 xfs_trans_t *tp, 1887 xfs_trans_t *tp,
1901 xfs_inode_t *ip) 1888 xfs_inode_t *ip)
1902 { 1889 {
1903 xfs_ino_t next_ino; 1890 xfs_ino_t next_ino;
1904 xfs_mount_t *mp; 1891 xfs_mount_t *mp;
1905 xfs_agi_t *agi; 1892 xfs_agi_t *agi;
1906 xfs_dinode_t *dip; 1893 xfs_dinode_t *dip;
1907 xfs_buf_t *agibp; 1894 xfs_buf_t *agibp;
1908 xfs_buf_t *ibp; 1895 xfs_buf_t *ibp;
1909 xfs_agnumber_t agno; 1896 xfs_agnumber_t agno;
1910 xfs_agino_t agino; 1897 xfs_agino_t agino;
1911 xfs_agino_t next_agino; 1898 xfs_agino_t next_agino;
1912 xfs_buf_t *last_ibp; 1899 xfs_buf_t *last_ibp;
1913 xfs_dinode_t *last_dip = NULL; 1900 xfs_dinode_t *last_dip = NULL;
1914 short bucket_index; 1901 short bucket_index;
1915 int offset, last_offset = 0; 1902 int offset, last_offset = 0;
1916 int error; 1903 int error;
1917 1904
1918 mp = tp->t_mountp; 1905 mp = tp->t_mountp;
1919 agno = XFS_INO_TO_AGNO(mp, ip->i_ino); 1906 agno = XFS_INO_TO_AGNO(mp, ip->i_ino);
1920 1907
1921 /* 1908 /*
1922 * Get the agi buffer first. It ensures lock ordering 1909 * Get the agi buffer first. It ensures lock ordering
1923 * on the list. 1910 * on the list.
1924 */ 1911 */
1925 error = xfs_read_agi(mp, tp, agno, &agibp); 1912 error = xfs_read_agi(mp, tp, agno, &agibp);
1926 if (error) 1913 if (error)
1927 return error; 1914 return error;
1928 1915
1929 agi = XFS_BUF_TO_AGI(agibp); 1916 agi = XFS_BUF_TO_AGI(agibp);
1930 1917
1931 /* 1918 /*
1932 * Get the index into the agi hash table for the 1919 * Get the index into the agi hash table for the
1933 * list this inode will go on. 1920 * list this inode will go on.
1934 */ 1921 */
1935 agino = XFS_INO_TO_AGINO(mp, ip->i_ino); 1922 agino = XFS_INO_TO_AGINO(mp, ip->i_ino);
1936 ASSERT(agino != 0); 1923 ASSERT(agino != 0);
1937 bucket_index = agino % XFS_AGI_UNLINKED_BUCKETS; 1924 bucket_index = agino % XFS_AGI_UNLINKED_BUCKETS;
1938 ASSERT(be32_to_cpu(agi->agi_unlinked[bucket_index]) != NULLAGINO); 1925 ASSERT(be32_to_cpu(agi->agi_unlinked[bucket_index]) != NULLAGINO);
1939 ASSERT(agi->agi_unlinked[bucket_index]); 1926 ASSERT(agi->agi_unlinked[bucket_index]);
1940 1927
1941 if (be32_to_cpu(agi->agi_unlinked[bucket_index]) == agino) { 1928 if (be32_to_cpu(agi->agi_unlinked[bucket_index]) == agino) {
1942 /* 1929 /*
1943 * We're at the head of the list. Get the inode's 1930 * We're at the head of the list. Get the inode's
1944 * on-disk buffer to see if there is anyone after us 1931 * on-disk buffer to see if there is anyone after us
1945 * on the list. Only modify our next pointer if it 1932 * on the list. Only modify our next pointer if it
1946 * is not already NULLAGINO. This saves us the overhead 1933 * is not already NULLAGINO. This saves us the overhead
1947 * of dealing with the buffer when there is no need to 1934 * of dealing with the buffer when there is no need to
1948 * change it. 1935 * change it.
1949 */ 1936 */
1950 error = xfs_itobp(mp, tp, ip, &dip, &ibp, XFS_BUF_LOCK); 1937 error = xfs_itobp(mp, tp, ip, &dip, &ibp, XFS_BUF_LOCK);
1951 if (error) { 1938 if (error) {
1952 cmn_err(CE_WARN, 1939 cmn_err(CE_WARN,
1953 "xfs_iunlink_remove: xfs_itobp() returned an error %d on %s. Returning error.", 1940 "xfs_iunlink_remove: xfs_itobp() returned an error %d on %s. Returning error.",
1954 error, mp->m_fsname); 1941 error, mp->m_fsname);
1955 return error; 1942 return error;
1956 } 1943 }
1957 next_agino = be32_to_cpu(dip->di_next_unlinked); 1944 next_agino = be32_to_cpu(dip->di_next_unlinked);
1958 ASSERT(next_agino != 0); 1945 ASSERT(next_agino != 0);
1959 if (next_agino != NULLAGINO) { 1946 if (next_agino != NULLAGINO) {
1960 dip->di_next_unlinked = cpu_to_be32(NULLAGINO); 1947 dip->di_next_unlinked = cpu_to_be32(NULLAGINO);
1961 offset = ip->i_boffset + 1948 offset = ip->i_imap.im_boffset +
1962 offsetof(xfs_dinode_t, di_next_unlinked); 1949 offsetof(xfs_dinode_t, di_next_unlinked);
1963 xfs_trans_inode_buf(tp, ibp); 1950 xfs_trans_inode_buf(tp, ibp);
1964 xfs_trans_log_buf(tp, ibp, offset, 1951 xfs_trans_log_buf(tp, ibp, offset,
1965 (offset + sizeof(xfs_agino_t) - 1)); 1952 (offset + sizeof(xfs_agino_t) - 1));
1966 xfs_inobp_check(mp, ibp); 1953 xfs_inobp_check(mp, ibp);
1967 } else { 1954 } else {
1968 xfs_trans_brelse(tp, ibp); 1955 xfs_trans_brelse(tp, ibp);
1969 } 1956 }
1970 /* 1957 /*
1971 * Point the bucket head pointer at the next inode. 1958 * Point the bucket head pointer at the next inode.
1972 */ 1959 */
1973 ASSERT(next_agino != 0); 1960 ASSERT(next_agino != 0);
1974 ASSERT(next_agino != agino); 1961 ASSERT(next_agino != agino);
1975 agi->agi_unlinked[bucket_index] = cpu_to_be32(next_agino); 1962 agi->agi_unlinked[bucket_index] = cpu_to_be32(next_agino);
1976 offset = offsetof(xfs_agi_t, agi_unlinked) + 1963 offset = offsetof(xfs_agi_t, agi_unlinked) +
1977 (sizeof(xfs_agino_t) * bucket_index); 1964 (sizeof(xfs_agino_t) * bucket_index);
1978 xfs_trans_log_buf(tp, agibp, offset, 1965 xfs_trans_log_buf(tp, agibp, offset,
1979 (offset + sizeof(xfs_agino_t) - 1)); 1966 (offset + sizeof(xfs_agino_t) - 1));
1980 } else { 1967 } else {
1981 /* 1968 /*
1982 * We need to search the list for the inode being freed. 1969 * We need to search the list for the inode being freed.
1983 */ 1970 */
1984 next_agino = be32_to_cpu(agi->agi_unlinked[bucket_index]); 1971 next_agino = be32_to_cpu(agi->agi_unlinked[bucket_index]);
1985 last_ibp = NULL; 1972 last_ibp = NULL;
1986 while (next_agino != agino) { 1973 while (next_agino != agino) {
1987 /* 1974 /*
1988 * If the last inode wasn't the one pointing to 1975 * If the last inode wasn't the one pointing to
1989 * us, then release its buffer since we're not 1976 * us, then release its buffer since we're not
1990 * going to do anything with it. 1977 * going to do anything with it.
1991 */ 1978 */
1992 if (last_ibp != NULL) { 1979 if (last_ibp != NULL) {
1993 xfs_trans_brelse(tp, last_ibp); 1980 xfs_trans_brelse(tp, last_ibp);
1994 } 1981 }
1995 next_ino = XFS_AGINO_TO_INO(mp, agno, next_agino); 1982 next_ino = XFS_AGINO_TO_INO(mp, agno, next_agino);
1996 error = xfs_inotobp(mp, tp, next_ino, &last_dip, 1983 error = xfs_inotobp(mp, tp, next_ino, &last_dip,
1997 &last_ibp, &last_offset, 0); 1984 &last_ibp, &last_offset, 0);
1998 if (error) { 1985 if (error) {
1999 cmn_err(CE_WARN, 1986 cmn_err(CE_WARN,
2000 "xfs_iunlink_remove: xfs_inotobp() returned an error %d on %s. Returning error.", 1987 "xfs_iunlink_remove: xfs_inotobp() returned an error %d on %s. Returning error.",
2001 error, mp->m_fsname); 1988 error, mp->m_fsname);
2002 return error; 1989 return error;
2003 } 1990 }
2004 next_agino = be32_to_cpu(last_dip->di_next_unlinked); 1991 next_agino = be32_to_cpu(last_dip->di_next_unlinked);
2005 ASSERT(next_agino != NULLAGINO); 1992 ASSERT(next_agino != NULLAGINO);
2006 ASSERT(next_agino != 0); 1993 ASSERT(next_agino != 0);
2007 } 1994 }
2008 /* 1995 /*
2009 * Now last_ibp points to the buffer previous to us on 1996 * Now last_ibp points to the buffer previous to us on
2010 * the unlinked list. Pull us from the list. 1997 * the unlinked list. Pull us from the list.
2011 */ 1998 */
2012 error = xfs_itobp(mp, tp, ip, &dip, &ibp, XFS_BUF_LOCK); 1999 error = xfs_itobp(mp, tp, ip, &dip, &ibp, XFS_BUF_LOCK);
2013 if (error) { 2000 if (error) {
2014 cmn_err(CE_WARN, 2001 cmn_err(CE_WARN,
2015 "xfs_iunlink_remove: xfs_itobp() returned an error %d on %s. Returning error.", 2002 "xfs_iunlink_remove: xfs_itobp() returned an error %d on %s. Returning error.",
2016 error, mp->m_fsname); 2003 error, mp->m_fsname);
2017 return error; 2004 return error;
2018 } 2005 }
2019 next_agino = be32_to_cpu(dip->di_next_unlinked); 2006 next_agino = be32_to_cpu(dip->di_next_unlinked);
2020 ASSERT(next_agino != 0); 2007 ASSERT(next_agino != 0);
2021 ASSERT(next_agino != agino); 2008 ASSERT(next_agino != agino);
2022 if (next_agino != NULLAGINO) { 2009 if (next_agino != NULLAGINO) {
2023 dip->di_next_unlinked = cpu_to_be32(NULLAGINO); 2010 dip->di_next_unlinked = cpu_to_be32(NULLAGINO);
2024 offset = ip->i_boffset + 2011 offset = ip->i_imap.im_boffset +
2025 offsetof(xfs_dinode_t, di_next_unlinked); 2012 offsetof(xfs_dinode_t, di_next_unlinked);
2026 xfs_trans_inode_buf(tp, ibp); 2013 xfs_trans_inode_buf(tp, ibp);
2027 xfs_trans_log_buf(tp, ibp, offset, 2014 xfs_trans_log_buf(tp, ibp, offset,
2028 (offset + sizeof(xfs_agino_t) - 1)); 2015 (offset + sizeof(xfs_agino_t) - 1));
2029 xfs_inobp_check(mp, ibp); 2016 xfs_inobp_check(mp, ibp);
2030 } else { 2017 } else {
2031 xfs_trans_brelse(tp, ibp); 2018 xfs_trans_brelse(tp, ibp);
2032 } 2019 }
2033 /* 2020 /*
2034 * Point the previous inode on the list to the next inode. 2021 * Point the previous inode on the list to the next inode.
2035 */ 2022 */
2036 last_dip->di_next_unlinked = cpu_to_be32(next_agino); 2023 last_dip->di_next_unlinked = cpu_to_be32(next_agino);
2037 ASSERT(next_agino != 0); 2024 ASSERT(next_agino != 0);
2038 offset = last_offset + offsetof(xfs_dinode_t, di_next_unlinked); 2025 offset = last_offset + offsetof(xfs_dinode_t, di_next_unlinked);
2039 xfs_trans_inode_buf(tp, last_ibp); 2026 xfs_trans_inode_buf(tp, last_ibp);
2040 xfs_trans_log_buf(tp, last_ibp, offset, 2027 xfs_trans_log_buf(tp, last_ibp, offset,
2041 (offset + sizeof(xfs_agino_t) - 1)); 2028 (offset + sizeof(xfs_agino_t) - 1));
2042 xfs_inobp_check(mp, last_ibp); 2029 xfs_inobp_check(mp, last_ibp);
2043 } 2030 }
2044 return 0; 2031 return 0;
2045 } 2032 }
2046 2033
2047 STATIC void 2034 STATIC void
2048 xfs_ifree_cluster( 2035 xfs_ifree_cluster(
2049 xfs_inode_t *free_ip, 2036 xfs_inode_t *free_ip,
2050 xfs_trans_t *tp, 2037 xfs_trans_t *tp,
2051 xfs_ino_t inum) 2038 xfs_ino_t inum)
2052 { 2039 {
2053 xfs_mount_t *mp = free_ip->i_mount; 2040 xfs_mount_t *mp = free_ip->i_mount;
2054 int blks_per_cluster; 2041 int blks_per_cluster;
2055 int nbufs; 2042 int nbufs;
2056 int ninodes; 2043 int ninodes;
2057 int i, j, found, pre_flushed; 2044 int i, j, found, pre_flushed;
2058 xfs_daddr_t blkno; 2045 xfs_daddr_t blkno;
2059 xfs_buf_t *bp; 2046 xfs_buf_t *bp;
2060 xfs_inode_t *ip, **ip_found; 2047 xfs_inode_t *ip, **ip_found;
2061 xfs_inode_log_item_t *iip; 2048 xfs_inode_log_item_t *iip;
2062 xfs_log_item_t *lip; 2049 xfs_log_item_t *lip;
2063 xfs_perag_t *pag = xfs_get_perag(mp, inum); 2050 xfs_perag_t *pag = xfs_get_perag(mp, inum);
2064 2051
2065 if (mp->m_sb.sb_blocksize >= XFS_INODE_CLUSTER_SIZE(mp)) { 2052 if (mp->m_sb.sb_blocksize >= XFS_INODE_CLUSTER_SIZE(mp)) {
2066 blks_per_cluster = 1; 2053 blks_per_cluster = 1;
2067 ninodes = mp->m_sb.sb_inopblock; 2054 ninodes = mp->m_sb.sb_inopblock;
2068 nbufs = XFS_IALLOC_BLOCKS(mp); 2055 nbufs = XFS_IALLOC_BLOCKS(mp);
2069 } else { 2056 } else {
2070 blks_per_cluster = XFS_INODE_CLUSTER_SIZE(mp) / 2057 blks_per_cluster = XFS_INODE_CLUSTER_SIZE(mp) /
2071 mp->m_sb.sb_blocksize; 2058 mp->m_sb.sb_blocksize;
2072 ninodes = blks_per_cluster * mp->m_sb.sb_inopblock; 2059 ninodes = blks_per_cluster * mp->m_sb.sb_inopblock;
2073 nbufs = XFS_IALLOC_BLOCKS(mp) / blks_per_cluster; 2060 nbufs = XFS_IALLOC_BLOCKS(mp) / blks_per_cluster;
2074 } 2061 }
2075 2062
2076 ip_found = kmem_alloc(ninodes * sizeof(xfs_inode_t *), KM_NOFS); 2063 ip_found = kmem_alloc(ninodes * sizeof(xfs_inode_t *), KM_NOFS);
2077 2064
2078 for (j = 0; j < nbufs; j++, inum += ninodes) { 2065 for (j = 0; j < nbufs; j++, inum += ninodes) {
2079 blkno = XFS_AGB_TO_DADDR(mp, XFS_INO_TO_AGNO(mp, inum), 2066 blkno = XFS_AGB_TO_DADDR(mp, XFS_INO_TO_AGNO(mp, inum),
2080 XFS_INO_TO_AGBNO(mp, inum)); 2067 XFS_INO_TO_AGBNO(mp, inum));
2081 2068
2082 2069
2083 /* 2070 /*
2084 * Look for each inode in memory and attempt to lock it, 2071 * Look for each inode in memory and attempt to lock it,
2085 * we can be racing with flush and tail pushing here. 2072 * we can be racing with flush and tail pushing here.
2086 * any inode we get the locks on, add to an array of 2073 * any inode we get the locks on, add to an array of
2087 * inode items to process later. 2074 * inode items to process later.
2088 * 2075 *
2089 * The get the buffer lock, we could beat a flush 2076 * The get the buffer lock, we could beat a flush
2090 * or tail pushing thread to the lock here, in which 2077 * or tail pushing thread to the lock here, in which
2091 * case they will go looking for the inode buffer 2078 * case they will go looking for the inode buffer
2092 * and fail, we need some other form of interlock 2079 * and fail, we need some other form of interlock
2093 * here. 2080 * here.
2094 */ 2081 */
2095 found = 0; 2082 found = 0;
2096 for (i = 0; i < ninodes; i++) { 2083 for (i = 0; i < ninodes; i++) {
2097 read_lock(&pag->pag_ici_lock); 2084 read_lock(&pag->pag_ici_lock);
2098 ip = radix_tree_lookup(&pag->pag_ici_root, 2085 ip = radix_tree_lookup(&pag->pag_ici_root,
2099 XFS_INO_TO_AGINO(mp, (inum + i))); 2086 XFS_INO_TO_AGINO(mp, (inum + i)));
2100 2087
2101 /* Inode not in memory or we found it already, 2088 /* Inode not in memory or we found it already,
2102 * nothing to do 2089 * nothing to do
2103 */ 2090 */
2104 if (!ip || xfs_iflags_test(ip, XFS_ISTALE)) { 2091 if (!ip || xfs_iflags_test(ip, XFS_ISTALE)) {
2105 read_unlock(&pag->pag_ici_lock); 2092 read_unlock(&pag->pag_ici_lock);
2106 continue; 2093 continue;
2107 } 2094 }
2108 2095
2109 if (xfs_inode_clean(ip)) { 2096 if (xfs_inode_clean(ip)) {
2110 read_unlock(&pag->pag_ici_lock); 2097 read_unlock(&pag->pag_ici_lock);
2111 continue; 2098 continue;
2112 } 2099 }
2113 2100
2114 /* If we can get the locks then add it to the 2101 /* If we can get the locks then add it to the
2115 * list, otherwise by the time we get the bp lock 2102 * list, otherwise by the time we get the bp lock
2116 * below it will already be attached to the 2103 * below it will already be attached to the
2117 * inode buffer. 2104 * inode buffer.
2118 */ 2105 */
2119 2106
2120 /* This inode will already be locked - by us, lets 2107 /* This inode will already be locked - by us, lets
2121 * keep it that way. 2108 * keep it that way.
2122 */ 2109 */
2123 2110
2124 if (ip == free_ip) { 2111 if (ip == free_ip) {
2125 if (xfs_iflock_nowait(ip)) { 2112 if (xfs_iflock_nowait(ip)) {
2126 xfs_iflags_set(ip, XFS_ISTALE); 2113 xfs_iflags_set(ip, XFS_ISTALE);
2127 if (xfs_inode_clean(ip)) { 2114 if (xfs_inode_clean(ip)) {
2128 xfs_ifunlock(ip); 2115 xfs_ifunlock(ip);
2129 } else { 2116 } else {
2130 ip_found[found++] = ip; 2117 ip_found[found++] = ip;
2131 } 2118 }
2132 } 2119 }
2133 read_unlock(&pag->pag_ici_lock); 2120 read_unlock(&pag->pag_ici_lock);
2134 continue; 2121 continue;
2135 } 2122 }
2136 2123
2137 if (xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) { 2124 if (xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) {
2138 if (xfs_iflock_nowait(ip)) { 2125 if (xfs_iflock_nowait(ip)) {
2139 xfs_iflags_set(ip, XFS_ISTALE); 2126 xfs_iflags_set(ip, XFS_ISTALE);
2140 2127
2141 if (xfs_inode_clean(ip)) { 2128 if (xfs_inode_clean(ip)) {
2142 xfs_ifunlock(ip); 2129 xfs_ifunlock(ip);
2143 xfs_iunlock(ip, XFS_ILOCK_EXCL); 2130 xfs_iunlock(ip, XFS_ILOCK_EXCL);
2144 } else { 2131 } else {
2145 ip_found[found++] = ip; 2132 ip_found[found++] = ip;
2146 } 2133 }
2147 } else { 2134 } else {
2148 xfs_iunlock(ip, XFS_ILOCK_EXCL); 2135 xfs_iunlock(ip, XFS_ILOCK_EXCL);
2149 } 2136 }
2150 } 2137 }
2151 read_unlock(&pag->pag_ici_lock); 2138 read_unlock(&pag->pag_ici_lock);
2152 } 2139 }
2153 2140
2154 bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, blkno, 2141 bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, blkno,
2155 mp->m_bsize * blks_per_cluster, 2142 mp->m_bsize * blks_per_cluster,
2156 XFS_BUF_LOCK); 2143 XFS_BUF_LOCK);
2157 2144
2158 pre_flushed = 0; 2145 pre_flushed = 0;
2159 lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *); 2146 lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *);
2160 while (lip) { 2147 while (lip) {
2161 if (lip->li_type == XFS_LI_INODE) { 2148 if (lip->li_type == XFS_LI_INODE) {
2162 iip = (xfs_inode_log_item_t *)lip; 2149 iip = (xfs_inode_log_item_t *)lip;
2163 ASSERT(iip->ili_logged == 1); 2150 ASSERT(iip->ili_logged == 1);
2164 lip->li_cb = (void(*)(xfs_buf_t*,xfs_log_item_t*)) xfs_istale_done; 2151 lip->li_cb = (void(*)(xfs_buf_t*,xfs_log_item_t*)) xfs_istale_done;
2165 xfs_trans_ail_copy_lsn(mp->m_ail, 2152 xfs_trans_ail_copy_lsn(mp->m_ail,
2166 &iip->ili_flush_lsn, 2153 &iip->ili_flush_lsn,
2167 &iip->ili_item.li_lsn); 2154 &iip->ili_item.li_lsn);
2168 xfs_iflags_set(iip->ili_inode, XFS_ISTALE); 2155 xfs_iflags_set(iip->ili_inode, XFS_ISTALE);
2169 pre_flushed++; 2156 pre_flushed++;
2170 } 2157 }
2171 lip = lip->li_bio_list; 2158 lip = lip->li_bio_list;
2172 } 2159 }
2173 2160
2174 for (i = 0; i < found; i++) { 2161 for (i = 0; i < found; i++) {
2175 ip = ip_found[i]; 2162 ip = ip_found[i];
2176 iip = ip->i_itemp; 2163 iip = ip->i_itemp;
2177 2164
2178 if (!iip) { 2165 if (!iip) {
2179 ip->i_update_core = 0; 2166 ip->i_update_core = 0;
2180 xfs_ifunlock(ip); 2167 xfs_ifunlock(ip);
2181 xfs_iunlock(ip, XFS_ILOCK_EXCL); 2168 xfs_iunlock(ip, XFS_ILOCK_EXCL);
2182 continue; 2169 continue;
2183 } 2170 }
2184 2171
2185 iip->ili_last_fields = iip->ili_format.ilf_fields; 2172 iip->ili_last_fields = iip->ili_format.ilf_fields;
2186 iip->ili_format.ilf_fields = 0; 2173 iip->ili_format.ilf_fields = 0;
2187 iip->ili_logged = 1; 2174 iip->ili_logged = 1;
2188 xfs_trans_ail_copy_lsn(mp->m_ail, &iip->ili_flush_lsn, 2175 xfs_trans_ail_copy_lsn(mp->m_ail, &iip->ili_flush_lsn,
2189 &iip->ili_item.li_lsn); 2176 &iip->ili_item.li_lsn);
2190 2177
2191 xfs_buf_attach_iodone(bp, 2178 xfs_buf_attach_iodone(bp,
2192 (void(*)(xfs_buf_t*,xfs_log_item_t*)) 2179 (void(*)(xfs_buf_t*,xfs_log_item_t*))
2193 xfs_istale_done, (xfs_log_item_t *)iip); 2180 xfs_istale_done, (xfs_log_item_t *)iip);
2194 if (ip != free_ip) { 2181 if (ip != free_ip) {
2195 xfs_iunlock(ip, XFS_ILOCK_EXCL); 2182 xfs_iunlock(ip, XFS_ILOCK_EXCL);
2196 } 2183 }
2197 } 2184 }
2198 2185
2199 if (found || pre_flushed) 2186 if (found || pre_flushed)
2200 xfs_trans_stale_inode_buf(tp, bp); 2187 xfs_trans_stale_inode_buf(tp, bp);
2201 xfs_trans_binval(tp, bp); 2188 xfs_trans_binval(tp, bp);
2202 } 2189 }
2203 2190
2204 kmem_free(ip_found); 2191 kmem_free(ip_found);
2205 xfs_put_perag(mp, pag); 2192 xfs_put_perag(mp, pag);
2206 } 2193 }
2207 2194
2208 /* 2195 /*
2209 * This is called to return an inode to the inode free list. 2196 * This is called to return an inode to the inode free list.
2210 * The inode should already be truncated to 0 length and have 2197 * The inode should already be truncated to 0 length and have
2211 * no pages associated with it. This routine also assumes that 2198 * no pages associated with it. This routine also assumes that
2212 * the inode is already a part of the transaction. 2199 * the inode is already a part of the transaction.
2213 * 2200 *
2214 * The on-disk copy of the inode will have been added to the list 2201 * The on-disk copy of the inode will have been added to the list
2215 * of unlinked inodes in the AGI. We need to remove the inode from 2202 * of unlinked inodes in the AGI. We need to remove the inode from
2216 * that list atomically with respect to freeing it here. 2203 * that list atomically with respect to freeing it here.
2217 */ 2204 */
2218 int 2205 int
2219 xfs_ifree( 2206 xfs_ifree(
2220 xfs_trans_t *tp, 2207 xfs_trans_t *tp,
2221 xfs_inode_t *ip, 2208 xfs_inode_t *ip,
2222 xfs_bmap_free_t *flist) 2209 xfs_bmap_free_t *flist)
2223 { 2210 {
2224 int error; 2211 int error;
2225 int delete; 2212 int delete;
2226 xfs_ino_t first_ino; 2213 xfs_ino_t first_ino;
2227 xfs_dinode_t *dip; 2214 xfs_dinode_t *dip;
2228 xfs_buf_t *ibp; 2215 xfs_buf_t *ibp;
2229 2216
2230 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); 2217 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
2231 ASSERT(ip->i_transp == tp); 2218 ASSERT(ip->i_transp == tp);
2232 ASSERT(ip->i_d.di_nlink == 0); 2219 ASSERT(ip->i_d.di_nlink == 0);
2233 ASSERT(ip->i_d.di_nextents == 0); 2220 ASSERT(ip->i_d.di_nextents == 0);
2234 ASSERT(ip->i_d.di_anextents == 0); 2221 ASSERT(ip->i_d.di_anextents == 0);
2235 ASSERT((ip->i_d.di_size == 0 && ip->i_size == 0) || 2222 ASSERT((ip->i_d.di_size == 0 && ip->i_size == 0) ||
2236 ((ip->i_d.di_mode & S_IFMT) != S_IFREG)); 2223 ((ip->i_d.di_mode & S_IFMT) != S_IFREG));
2237 ASSERT(ip->i_d.di_nblocks == 0); 2224 ASSERT(ip->i_d.di_nblocks == 0);
2238 2225
2239 /* 2226 /*
2240 * Pull the on-disk inode from the AGI unlinked list. 2227 * Pull the on-disk inode from the AGI unlinked list.
2241 */ 2228 */
2242 error = xfs_iunlink_remove(tp, ip); 2229 error = xfs_iunlink_remove(tp, ip);
2243 if (error != 0) { 2230 if (error != 0) {
2244 return error; 2231 return error;
2245 } 2232 }
2246 2233
2247 error = xfs_difree(tp, ip->i_ino, flist, &delete, &first_ino); 2234 error = xfs_difree(tp, ip->i_ino, flist, &delete, &first_ino);
2248 if (error != 0) { 2235 if (error != 0) {
2249 return error; 2236 return error;
2250 } 2237 }
2251 ip->i_d.di_mode = 0; /* mark incore inode as free */ 2238 ip->i_d.di_mode = 0; /* mark incore inode as free */
2252 ip->i_d.di_flags = 0; 2239 ip->i_d.di_flags = 0;
2253 ip->i_d.di_dmevmask = 0; 2240 ip->i_d.di_dmevmask = 0;
2254 ip->i_d.di_forkoff = 0; /* mark the attr fork not in use */ 2241 ip->i_d.di_forkoff = 0; /* mark the attr fork not in use */
2255 ip->i_df.if_ext_max = 2242 ip->i_df.if_ext_max =
2256 XFS_IFORK_DSIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t); 2243 XFS_IFORK_DSIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t);
2257 ip->i_d.di_format = XFS_DINODE_FMT_EXTENTS; 2244 ip->i_d.di_format = XFS_DINODE_FMT_EXTENTS;
2258 ip->i_d.di_aformat = XFS_DINODE_FMT_EXTENTS; 2245 ip->i_d.di_aformat = XFS_DINODE_FMT_EXTENTS;
2259 /* 2246 /*
2260 * Bump the generation count so no one will be confused 2247 * Bump the generation count so no one will be confused
2261 * by reincarnations of this inode. 2248 * by reincarnations of this inode.
2262 */ 2249 */
2263 ip->i_d.di_gen++; 2250 ip->i_d.di_gen++;
2264 2251
2265 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 2252 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
2266 2253
2267 error = xfs_itobp(ip->i_mount, tp, ip, &dip, &ibp, XFS_BUF_LOCK); 2254 error = xfs_itobp(ip->i_mount, tp, ip, &dip, &ibp, XFS_BUF_LOCK);
2268 if (error) 2255 if (error)
2269 return error; 2256 return error;
2270 2257
2271 /* 2258 /*
2272 * Clear the on-disk di_mode. This is to prevent xfs_bulkstat 2259 * Clear the on-disk di_mode. This is to prevent xfs_bulkstat
2273 * from picking up this inode when it is reclaimed (its incore state 2260 * from picking up this inode when it is reclaimed (its incore state
2274 * initialzed but not flushed to disk yet). The in-core di_mode is 2261 * initialzed but not flushed to disk yet). The in-core di_mode is
2275 * already cleared and a corresponding transaction logged. 2262 * already cleared and a corresponding transaction logged.
2276 * The hack here just synchronizes the in-core to on-disk 2263 * The hack here just synchronizes the in-core to on-disk
2277 * di_mode value in advance before the actual inode sync to disk. 2264 * di_mode value in advance before the actual inode sync to disk.
2278 * This is OK because the inode is already unlinked and would never 2265 * This is OK because the inode is already unlinked and would never
2279 * change its di_mode again for this inode generation. 2266 * change its di_mode again for this inode generation.
2280 * This is a temporary hack that would require a proper fix 2267 * This is a temporary hack that would require a proper fix
2281 * in the future. 2268 * in the future.
2282 */ 2269 */
2283 dip->di_mode = 0; 2270 dip->di_mode = 0;
2284 2271
2285 if (delete) { 2272 if (delete) {
2286 xfs_ifree_cluster(ip, tp, first_ino); 2273 xfs_ifree_cluster(ip, tp, first_ino);
2287 } 2274 }
2288 2275
2289 return 0; 2276 return 0;
2290 } 2277 }
2291 2278
2292 /* 2279 /*
2293 * Reallocate the space for if_broot based on the number of records 2280 * Reallocate the space for if_broot based on the number of records
2294 * being added or deleted as indicated in rec_diff. Move the records 2281 * being added or deleted as indicated in rec_diff. Move the records
2295 * and pointers in if_broot to fit the new size. When shrinking this 2282 * and pointers in if_broot to fit the new size. When shrinking this
2296 * will eliminate holes between the records and pointers created by 2283 * will eliminate holes between the records and pointers created by
2297 * the caller. When growing this will create holes to be filled in 2284 * the caller. When growing this will create holes to be filled in
2298 * by the caller. 2285 * by the caller.
2299 * 2286 *
2300 * The caller must not request to add more records than would fit in 2287 * The caller must not request to add more records than would fit in
2301 * the on-disk inode root. If the if_broot is currently NULL, then 2288 * the on-disk inode root. If the if_broot is currently NULL, then
2302 * if we adding records one will be allocated. The caller must also 2289 * if we adding records one will be allocated. The caller must also
2303 * not request that the number of records go below zero, although 2290 * not request that the number of records go below zero, although
2304 * it can go to zero. 2291 * it can go to zero.
2305 * 2292 *
2306 * ip -- the inode whose if_broot area is changing 2293 * ip -- the inode whose if_broot area is changing
2307 * ext_diff -- the change in the number of records, positive or negative, 2294 * ext_diff -- the change in the number of records, positive or negative,
2308 * requested for the if_broot array. 2295 * requested for the if_broot array.
2309 */ 2296 */
2310 void 2297 void
2311 xfs_iroot_realloc( 2298 xfs_iroot_realloc(
2312 xfs_inode_t *ip, 2299 xfs_inode_t *ip,
2313 int rec_diff, 2300 int rec_diff,
2314 int whichfork) 2301 int whichfork)
2315 { 2302 {
2316 struct xfs_mount *mp = ip->i_mount; 2303 struct xfs_mount *mp = ip->i_mount;
2317 int cur_max; 2304 int cur_max;
2318 xfs_ifork_t *ifp; 2305 xfs_ifork_t *ifp;
2319 struct xfs_btree_block *new_broot; 2306 struct xfs_btree_block *new_broot;
2320 int new_max; 2307 int new_max;
2321 size_t new_size; 2308 size_t new_size;
2322 char *np; 2309 char *np;
2323 char *op; 2310 char *op;
2324 2311
2325 /* 2312 /*
2326 * Handle the degenerate case quietly. 2313 * Handle the degenerate case quietly.
2327 */ 2314 */
2328 if (rec_diff == 0) { 2315 if (rec_diff == 0) {
2329 return; 2316 return;
2330 } 2317 }
2331 2318
2332 ifp = XFS_IFORK_PTR(ip, whichfork); 2319 ifp = XFS_IFORK_PTR(ip, whichfork);
2333 if (rec_diff > 0) { 2320 if (rec_diff > 0) {
2334 /* 2321 /*
2335 * If there wasn't any memory allocated before, just 2322 * If there wasn't any memory allocated before, just
2336 * allocate it now and get out. 2323 * allocate it now and get out.
2337 */ 2324 */
2338 if (ifp->if_broot_bytes == 0) { 2325 if (ifp->if_broot_bytes == 0) {
2339 new_size = (size_t)XFS_BMAP_BROOT_SPACE_CALC(rec_diff); 2326 new_size = (size_t)XFS_BMAP_BROOT_SPACE_CALC(rec_diff);
2340 ifp->if_broot = kmem_alloc(new_size, KM_SLEEP); 2327 ifp->if_broot = kmem_alloc(new_size, KM_SLEEP);
2341 ifp->if_broot_bytes = (int)new_size; 2328 ifp->if_broot_bytes = (int)new_size;
2342 return; 2329 return;
2343 } 2330 }
2344 2331
2345 /* 2332 /*
2346 * If there is already an existing if_broot, then we need 2333 * If there is already an existing if_broot, then we need
2347 * to realloc() it and shift the pointers to their new 2334 * to realloc() it and shift the pointers to their new
2348 * location. The records don't change location because 2335 * location. The records don't change location because
2349 * they are kept butted up against the btree block header. 2336 * they are kept butted up against the btree block header.
2350 */ 2337 */
2351 cur_max = xfs_bmbt_maxrecs(mp, ifp->if_broot_bytes, 0); 2338 cur_max = xfs_bmbt_maxrecs(mp, ifp->if_broot_bytes, 0);
2352 new_max = cur_max + rec_diff; 2339 new_max = cur_max + rec_diff;
2353 new_size = (size_t)XFS_BMAP_BROOT_SPACE_CALC(new_max); 2340 new_size = (size_t)XFS_BMAP_BROOT_SPACE_CALC(new_max);
2354 ifp->if_broot = kmem_realloc(ifp->if_broot, new_size, 2341 ifp->if_broot = kmem_realloc(ifp->if_broot, new_size,
2355 (size_t)XFS_BMAP_BROOT_SPACE_CALC(cur_max), /* old size */ 2342 (size_t)XFS_BMAP_BROOT_SPACE_CALC(cur_max), /* old size */
2356 KM_SLEEP); 2343 KM_SLEEP);
2357 op = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1, 2344 op = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1,
2358 ifp->if_broot_bytes); 2345 ifp->if_broot_bytes);
2359 np = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1, 2346 np = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1,
2360 (int)new_size); 2347 (int)new_size);
2361 ifp->if_broot_bytes = (int)new_size; 2348 ifp->if_broot_bytes = (int)new_size;
2362 ASSERT(ifp->if_broot_bytes <= 2349 ASSERT(ifp->if_broot_bytes <=
2363 XFS_IFORK_SIZE(ip, whichfork) + XFS_BROOT_SIZE_ADJ); 2350 XFS_IFORK_SIZE(ip, whichfork) + XFS_BROOT_SIZE_ADJ);
2364 memmove(np, op, cur_max * (uint)sizeof(xfs_dfsbno_t)); 2351 memmove(np, op, cur_max * (uint)sizeof(xfs_dfsbno_t));
2365 return; 2352 return;
2366 } 2353 }
2367 2354
2368 /* 2355 /*
2369 * rec_diff is less than 0. In this case, we are shrinking the 2356 * rec_diff is less than 0. In this case, we are shrinking the
2370 * if_broot buffer. It must already exist. If we go to zero 2357 * if_broot buffer. It must already exist. If we go to zero
2371 * records, just get rid of the root and clear the status bit. 2358 * records, just get rid of the root and clear the status bit.
2372 */ 2359 */
2373 ASSERT((ifp->if_broot != NULL) && (ifp->if_broot_bytes > 0)); 2360 ASSERT((ifp->if_broot != NULL) && (ifp->if_broot_bytes > 0));
2374 cur_max = xfs_bmbt_maxrecs(mp, ifp->if_broot_bytes, 0); 2361 cur_max = xfs_bmbt_maxrecs(mp, ifp->if_broot_bytes, 0);
2375 new_max = cur_max + rec_diff; 2362 new_max = cur_max + rec_diff;
2376 ASSERT(new_max >= 0); 2363 ASSERT(new_max >= 0);
2377 if (new_max > 0) 2364 if (new_max > 0)
2378 new_size = (size_t)XFS_BMAP_BROOT_SPACE_CALC(new_max); 2365 new_size = (size_t)XFS_BMAP_BROOT_SPACE_CALC(new_max);
2379 else 2366 else
2380 new_size = 0; 2367 new_size = 0;
2381 if (new_size > 0) { 2368 if (new_size > 0) {
2382 new_broot = kmem_alloc(new_size, KM_SLEEP); 2369 new_broot = kmem_alloc(new_size, KM_SLEEP);
2383 /* 2370 /*
2384 * First copy over the btree block header. 2371 * First copy over the btree block header.
2385 */ 2372 */
2386 memcpy(new_broot, ifp->if_broot, XFS_BTREE_LBLOCK_LEN); 2373 memcpy(new_broot, ifp->if_broot, XFS_BTREE_LBLOCK_LEN);
2387 } else { 2374 } else {
2388 new_broot = NULL; 2375 new_broot = NULL;
2389 ifp->if_flags &= ~XFS_IFBROOT; 2376 ifp->if_flags &= ~XFS_IFBROOT;
2390 } 2377 }
2391 2378
2392 /* 2379 /*
2393 * Only copy the records and pointers if there are any. 2380 * Only copy the records and pointers if there are any.
2394 */ 2381 */
2395 if (new_max > 0) { 2382 if (new_max > 0) {
2396 /* 2383 /*
2397 * First copy the records. 2384 * First copy the records.
2398 */ 2385 */
2399 op = (char *)XFS_BMBT_REC_ADDR(mp, ifp->if_broot, 1); 2386 op = (char *)XFS_BMBT_REC_ADDR(mp, ifp->if_broot, 1);
2400 np = (char *)XFS_BMBT_REC_ADDR(mp, new_broot, 1); 2387 np = (char *)XFS_BMBT_REC_ADDR(mp, new_broot, 1);
2401 memcpy(np, op, new_max * (uint)sizeof(xfs_bmbt_rec_t)); 2388 memcpy(np, op, new_max * (uint)sizeof(xfs_bmbt_rec_t));
2402 2389
2403 /* 2390 /*
2404 * Then copy the pointers. 2391 * Then copy the pointers.
2405 */ 2392 */
2406 op = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1, 2393 op = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1,
2407 ifp->if_broot_bytes); 2394 ifp->if_broot_bytes);
2408 np = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, new_broot, 1, 2395 np = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, new_broot, 1,
2409 (int)new_size); 2396 (int)new_size);
2410 memcpy(np, op, new_max * (uint)sizeof(xfs_dfsbno_t)); 2397 memcpy(np, op, new_max * (uint)sizeof(xfs_dfsbno_t));
2411 } 2398 }
2412 kmem_free(ifp->if_broot); 2399 kmem_free(ifp->if_broot);
2413 ifp->if_broot = new_broot; 2400 ifp->if_broot = new_broot;
2414 ifp->if_broot_bytes = (int)new_size; 2401 ifp->if_broot_bytes = (int)new_size;
2415 ASSERT(ifp->if_broot_bytes <= 2402 ASSERT(ifp->if_broot_bytes <=
2416 XFS_IFORK_SIZE(ip, whichfork) + XFS_BROOT_SIZE_ADJ); 2403 XFS_IFORK_SIZE(ip, whichfork) + XFS_BROOT_SIZE_ADJ);
2417 return; 2404 return;
2418 } 2405 }
2419 2406
2420 2407
2421 /* 2408 /*
2422 * This is called when the amount of space needed for if_data 2409 * This is called when the amount of space needed for if_data
2423 * is increased or decreased. The change in size is indicated by 2410 * is increased or decreased. The change in size is indicated by
2424 * the number of bytes that need to be added or deleted in the 2411 * the number of bytes that need to be added or deleted in the
2425 * byte_diff parameter. 2412 * byte_diff parameter.
2426 * 2413 *
2427 * If the amount of space needed has decreased below the size of the 2414 * If the amount of space needed has decreased below the size of the
2428 * inline buffer, then switch to using the inline buffer. Otherwise, 2415 * inline buffer, then switch to using the inline buffer. Otherwise,
2429 * use kmem_realloc() or kmem_alloc() to adjust the size of the buffer 2416 * use kmem_realloc() or kmem_alloc() to adjust the size of the buffer
2430 * to what is needed. 2417 * to what is needed.
2431 * 2418 *
2432 * ip -- the inode whose if_data area is changing 2419 * ip -- the inode whose if_data area is changing
2433 * byte_diff -- the change in the number of bytes, positive or negative, 2420 * byte_diff -- the change in the number of bytes, positive or negative,
2434 * requested for the if_data array. 2421 * requested for the if_data array.
2435 */ 2422 */
2436 void 2423 void
2437 xfs_idata_realloc( 2424 xfs_idata_realloc(
2438 xfs_inode_t *ip, 2425 xfs_inode_t *ip,
2439 int byte_diff, 2426 int byte_diff,
2440 int whichfork) 2427 int whichfork)
2441 { 2428 {
2442 xfs_ifork_t *ifp; 2429 xfs_ifork_t *ifp;
2443 int new_size; 2430 int new_size;
2444 int real_size; 2431 int real_size;
2445 2432
2446 if (byte_diff == 0) { 2433 if (byte_diff == 0) {
2447 return; 2434 return;
2448 } 2435 }
2449 2436
2450 ifp = XFS_IFORK_PTR(ip, whichfork); 2437 ifp = XFS_IFORK_PTR(ip, whichfork);
2451 new_size = (int)ifp->if_bytes + byte_diff; 2438 new_size = (int)ifp->if_bytes + byte_diff;
2452 ASSERT(new_size >= 0); 2439 ASSERT(new_size >= 0);
2453 2440
2454 if (new_size == 0) { 2441 if (new_size == 0) {
2455 if (ifp->if_u1.if_data != ifp->if_u2.if_inline_data) { 2442 if (ifp->if_u1.if_data != ifp->if_u2.if_inline_data) {
2456 kmem_free(ifp->if_u1.if_data); 2443 kmem_free(ifp->if_u1.if_data);
2457 } 2444 }
2458 ifp->if_u1.if_data = NULL; 2445 ifp->if_u1.if_data = NULL;
2459 real_size = 0; 2446 real_size = 0;
2460 } else if (new_size <= sizeof(ifp->if_u2.if_inline_data)) { 2447 } else if (new_size <= sizeof(ifp->if_u2.if_inline_data)) {
2461 /* 2448 /*
2462 * If the valid extents/data can fit in if_inline_ext/data, 2449 * If the valid extents/data can fit in if_inline_ext/data,
2463 * copy them from the malloc'd vector and free it. 2450 * copy them from the malloc'd vector and free it.
2464 */ 2451 */
2465 if (ifp->if_u1.if_data == NULL) { 2452 if (ifp->if_u1.if_data == NULL) {
2466 ifp->if_u1.if_data = ifp->if_u2.if_inline_data; 2453 ifp->if_u1.if_data = ifp->if_u2.if_inline_data;
2467 } else if (ifp->if_u1.if_data != ifp->if_u2.if_inline_data) { 2454 } else if (ifp->if_u1.if_data != ifp->if_u2.if_inline_data) {
2468 ASSERT(ifp->if_real_bytes != 0); 2455 ASSERT(ifp->if_real_bytes != 0);
2469 memcpy(ifp->if_u2.if_inline_data, ifp->if_u1.if_data, 2456 memcpy(ifp->if_u2.if_inline_data, ifp->if_u1.if_data,
2470 new_size); 2457 new_size);
2471 kmem_free(ifp->if_u1.if_data); 2458 kmem_free(ifp->if_u1.if_data);
2472 ifp->if_u1.if_data = ifp->if_u2.if_inline_data; 2459 ifp->if_u1.if_data = ifp->if_u2.if_inline_data;
2473 } 2460 }
2474 real_size = 0; 2461 real_size = 0;
2475 } else { 2462 } else {
2476 /* 2463 /*
2477 * Stuck with malloc/realloc. 2464 * Stuck with malloc/realloc.
2478 * For inline data, the underlying buffer must be 2465 * For inline data, the underlying buffer must be
2479 * a multiple of 4 bytes in size so that it can be 2466 * a multiple of 4 bytes in size so that it can be
2480 * logged and stay on word boundaries. We enforce 2467 * logged and stay on word boundaries. We enforce
2481 * that here. 2468 * that here.
2482 */ 2469 */
2483 real_size = roundup(new_size, 4); 2470 real_size = roundup(new_size, 4);
2484 if (ifp->if_u1.if_data == NULL) { 2471 if (ifp->if_u1.if_data == NULL) {
2485 ASSERT(ifp->if_real_bytes == 0); 2472 ASSERT(ifp->if_real_bytes == 0);
2486 ifp->if_u1.if_data = kmem_alloc(real_size, KM_SLEEP); 2473 ifp->if_u1.if_data = kmem_alloc(real_size, KM_SLEEP);
2487 } else if (ifp->if_u1.if_data != ifp->if_u2.if_inline_data) { 2474 } else if (ifp->if_u1.if_data != ifp->if_u2.if_inline_data) {
2488 /* 2475 /*
2489 * Only do the realloc if the underlying size 2476 * Only do the realloc if the underlying size
2490 * is really changing. 2477 * is really changing.
2491 */ 2478 */
2492 if (ifp->if_real_bytes != real_size) { 2479 if (ifp->if_real_bytes != real_size) {
2493 ifp->if_u1.if_data = 2480 ifp->if_u1.if_data =
2494 kmem_realloc(ifp->if_u1.if_data, 2481 kmem_realloc(ifp->if_u1.if_data,
2495 real_size, 2482 real_size,
2496 ifp->if_real_bytes, 2483 ifp->if_real_bytes,
2497 KM_SLEEP); 2484 KM_SLEEP);
2498 } 2485 }
2499 } else { 2486 } else {
2500 ASSERT(ifp->if_real_bytes == 0); 2487 ASSERT(ifp->if_real_bytes == 0);
2501 ifp->if_u1.if_data = kmem_alloc(real_size, KM_SLEEP); 2488 ifp->if_u1.if_data = kmem_alloc(real_size, KM_SLEEP);
2502 memcpy(ifp->if_u1.if_data, ifp->if_u2.if_inline_data, 2489 memcpy(ifp->if_u1.if_data, ifp->if_u2.if_inline_data,
2503 ifp->if_bytes); 2490 ifp->if_bytes);
2504 } 2491 }
2505 } 2492 }
2506 ifp->if_real_bytes = real_size; 2493 ifp->if_real_bytes = real_size;
2507 ifp->if_bytes = new_size; 2494 ifp->if_bytes = new_size;
2508 ASSERT(ifp->if_bytes <= XFS_IFORK_SIZE(ip, whichfork)); 2495 ASSERT(ifp->if_bytes <= XFS_IFORK_SIZE(ip, whichfork));
2509 } 2496 }
2510 2497
2511 void 2498 void
2512 xfs_idestroy_fork( 2499 xfs_idestroy_fork(
2513 xfs_inode_t *ip, 2500 xfs_inode_t *ip,
2514 int whichfork) 2501 int whichfork)
2515 { 2502 {
2516 xfs_ifork_t *ifp; 2503 xfs_ifork_t *ifp;
2517 2504
2518 ifp = XFS_IFORK_PTR(ip, whichfork); 2505 ifp = XFS_IFORK_PTR(ip, whichfork);
2519 if (ifp->if_broot != NULL) { 2506 if (ifp->if_broot != NULL) {
2520 kmem_free(ifp->if_broot); 2507 kmem_free(ifp->if_broot);
2521 ifp->if_broot = NULL; 2508 ifp->if_broot = NULL;
2522 } 2509 }
2523 2510
2524 /* 2511 /*
2525 * If the format is local, then we can't have an extents 2512 * If the format is local, then we can't have an extents
2526 * array so just look for an inline data array. If we're 2513 * array so just look for an inline data array. If we're
2527 * not local then we may or may not have an extents list, 2514 * not local then we may or may not have an extents list,
2528 * so check and free it up if we do. 2515 * so check and free it up if we do.
2529 */ 2516 */
2530 if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) { 2517 if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) {
2531 if ((ifp->if_u1.if_data != ifp->if_u2.if_inline_data) && 2518 if ((ifp->if_u1.if_data != ifp->if_u2.if_inline_data) &&
2532 (ifp->if_u1.if_data != NULL)) { 2519 (ifp->if_u1.if_data != NULL)) {
2533 ASSERT(ifp->if_real_bytes != 0); 2520 ASSERT(ifp->if_real_bytes != 0);
2534 kmem_free(ifp->if_u1.if_data); 2521 kmem_free(ifp->if_u1.if_data);
2535 ifp->if_u1.if_data = NULL; 2522 ifp->if_u1.if_data = NULL;
2536 ifp->if_real_bytes = 0; 2523 ifp->if_real_bytes = 0;
2537 } 2524 }
2538 } else if ((ifp->if_flags & XFS_IFEXTENTS) && 2525 } else if ((ifp->if_flags & XFS_IFEXTENTS) &&
2539 ((ifp->if_flags & XFS_IFEXTIREC) || 2526 ((ifp->if_flags & XFS_IFEXTIREC) ||
2540 ((ifp->if_u1.if_extents != NULL) && 2527 ((ifp->if_u1.if_extents != NULL) &&
2541 (ifp->if_u1.if_extents != ifp->if_u2.if_inline_ext)))) { 2528 (ifp->if_u1.if_extents != ifp->if_u2.if_inline_ext)))) {
2542 ASSERT(ifp->if_real_bytes != 0); 2529 ASSERT(ifp->if_real_bytes != 0);
2543 xfs_iext_destroy(ifp); 2530 xfs_iext_destroy(ifp);
2544 } 2531 }
2545 ASSERT(ifp->if_u1.if_extents == NULL || 2532 ASSERT(ifp->if_u1.if_extents == NULL ||
2546 ifp->if_u1.if_extents == ifp->if_u2.if_inline_ext); 2533 ifp->if_u1.if_extents == ifp->if_u2.if_inline_ext);
2547 ASSERT(ifp->if_real_bytes == 0); 2534 ASSERT(ifp->if_real_bytes == 0);
2548 if (whichfork == XFS_ATTR_FORK) { 2535 if (whichfork == XFS_ATTR_FORK) {
2549 kmem_zone_free(xfs_ifork_zone, ip->i_afp); 2536 kmem_zone_free(xfs_ifork_zone, ip->i_afp);
2550 ip->i_afp = NULL; 2537 ip->i_afp = NULL;
2551 } 2538 }
2552 } 2539 }
2553 2540
2554 /* 2541 /*
2555 * This is called free all the memory associated with an inode. 2542 * This is called free all the memory associated with an inode.
2556 * It must free the inode itself and any buffers allocated for 2543 * It must free the inode itself and any buffers allocated for
2557 * if_extents/if_data and if_broot. It must also free the lock 2544 * if_extents/if_data and if_broot. It must also free the lock
2558 * associated with the inode. 2545 * associated with the inode.
2559 * 2546 *
2560 * Note: because we don't initialise everything on reallocation out 2547 * Note: because we don't initialise everything on reallocation out
2561 * of the zone, we must ensure we nullify everything correctly before 2548 * of the zone, we must ensure we nullify everything correctly before
2562 * freeing the structure. 2549 * freeing the structure.
2563 */ 2550 */
2564 void 2551 void
2565 xfs_idestroy( 2552 xfs_idestroy(
2566 xfs_inode_t *ip) 2553 xfs_inode_t *ip)
2567 { 2554 {
2568 switch (ip->i_d.di_mode & S_IFMT) { 2555 switch (ip->i_d.di_mode & S_IFMT) {
2569 case S_IFREG: 2556 case S_IFREG:
2570 case S_IFDIR: 2557 case S_IFDIR:
2571 case S_IFLNK: 2558 case S_IFLNK:
2572 xfs_idestroy_fork(ip, XFS_DATA_FORK); 2559 xfs_idestroy_fork(ip, XFS_DATA_FORK);
2573 break; 2560 break;
2574 } 2561 }
2575 if (ip->i_afp) 2562 if (ip->i_afp)
2576 xfs_idestroy_fork(ip, XFS_ATTR_FORK); 2563 xfs_idestroy_fork(ip, XFS_ATTR_FORK);
2577 2564
2578 #ifdef XFS_INODE_TRACE 2565 #ifdef XFS_INODE_TRACE
2579 ktrace_free(ip->i_trace); 2566 ktrace_free(ip->i_trace);
2580 #endif 2567 #endif
2581 #ifdef XFS_BMAP_TRACE 2568 #ifdef XFS_BMAP_TRACE
2582 ktrace_free(ip->i_xtrace); 2569 ktrace_free(ip->i_xtrace);
2583 #endif 2570 #endif
2584 #ifdef XFS_BTREE_TRACE 2571 #ifdef XFS_BTREE_TRACE
2585 ktrace_free(ip->i_btrace); 2572 ktrace_free(ip->i_btrace);
2586 #endif 2573 #endif
2587 #ifdef XFS_RW_TRACE 2574 #ifdef XFS_RW_TRACE
2588 ktrace_free(ip->i_rwtrace); 2575 ktrace_free(ip->i_rwtrace);
2589 #endif 2576 #endif
2590 #ifdef XFS_ILOCK_TRACE 2577 #ifdef XFS_ILOCK_TRACE
2591 ktrace_free(ip->i_lock_trace); 2578 ktrace_free(ip->i_lock_trace);
2592 #endif 2579 #endif
2593 #ifdef XFS_DIR2_TRACE 2580 #ifdef XFS_DIR2_TRACE
2594 ktrace_free(ip->i_dir_trace); 2581 ktrace_free(ip->i_dir_trace);
2595 #endif 2582 #endif
2596 if (ip->i_itemp) { 2583 if (ip->i_itemp) {
2597 /* 2584 /*
2598 * Only if we are shutting down the fs will we see an 2585 * Only if we are shutting down the fs will we see an
2599 * inode still in the AIL. If it is there, we should remove 2586 * inode still in the AIL. If it is there, we should remove
2600 * it to prevent a use-after-free from occurring. 2587 * it to prevent a use-after-free from occurring.
2601 */ 2588 */
2602 xfs_log_item_t *lip = &ip->i_itemp->ili_item; 2589 xfs_log_item_t *lip = &ip->i_itemp->ili_item;
2603 struct xfs_ail *ailp = lip->li_ailp; 2590 struct xfs_ail *ailp = lip->li_ailp;
2604 2591
2605 ASSERT(((lip->li_flags & XFS_LI_IN_AIL) == 0) || 2592 ASSERT(((lip->li_flags & XFS_LI_IN_AIL) == 0) ||
2606 XFS_FORCED_SHUTDOWN(ip->i_mount)); 2593 XFS_FORCED_SHUTDOWN(ip->i_mount));
2607 if (lip->li_flags & XFS_LI_IN_AIL) { 2594 if (lip->li_flags & XFS_LI_IN_AIL) {
2608 spin_lock(&ailp->xa_lock); 2595 spin_lock(&ailp->xa_lock);
2609 if (lip->li_flags & XFS_LI_IN_AIL) 2596 if (lip->li_flags & XFS_LI_IN_AIL)
2610 xfs_trans_ail_delete(ailp, lip); 2597 xfs_trans_ail_delete(ailp, lip);
2611 else 2598 else
2612 spin_unlock(&ailp->xa_lock); 2599 spin_unlock(&ailp->xa_lock);
2613 } 2600 }
2614 xfs_inode_item_destroy(ip); 2601 xfs_inode_item_destroy(ip);
2615 ip->i_itemp = NULL; 2602 ip->i_itemp = NULL;
2616 } 2603 }
2617 /* asserts to verify all state is correct here */ 2604 /* asserts to verify all state is correct here */
2618 ASSERT(atomic_read(&ip->i_iocount) == 0); 2605 ASSERT(atomic_read(&ip->i_iocount) == 0);
2619 ASSERT(atomic_read(&ip->i_pincount) == 0); 2606 ASSERT(atomic_read(&ip->i_pincount) == 0);
2620 ASSERT(!spin_is_locked(&ip->i_flags_lock)); 2607 ASSERT(!spin_is_locked(&ip->i_flags_lock));
2621 ASSERT(completion_done(&ip->i_flush)); 2608 ASSERT(completion_done(&ip->i_flush));
2622 kmem_zone_free(xfs_inode_zone, ip); 2609 kmem_zone_free(xfs_inode_zone, ip);
2623 } 2610 }
2624 2611
2625 2612
2626 /* 2613 /*
2627 * Increment the pin count of the given buffer. 2614 * Increment the pin count of the given buffer.
2628 * This value is protected by ipinlock spinlock in the mount structure. 2615 * This value is protected by ipinlock spinlock in the mount structure.
2629 */ 2616 */
2630 void 2617 void
2631 xfs_ipin( 2618 xfs_ipin(
2632 xfs_inode_t *ip) 2619 xfs_inode_t *ip)
2633 { 2620 {
2634 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); 2621 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
2635 2622
2636 atomic_inc(&ip->i_pincount); 2623 atomic_inc(&ip->i_pincount);
2637 } 2624 }
2638 2625
2639 /* 2626 /*
2640 * Decrement the pin count of the given inode, and wake up 2627 * Decrement the pin count of the given inode, and wake up
2641 * anyone in xfs_iwait_unpin() if the count goes to 0. The 2628 * anyone in xfs_iwait_unpin() if the count goes to 0. The
2642 * inode must have been previously pinned with a call to xfs_ipin(). 2629 * inode must have been previously pinned with a call to xfs_ipin().
2643 */ 2630 */
2644 void 2631 void
2645 xfs_iunpin( 2632 xfs_iunpin(
2646 xfs_inode_t *ip) 2633 xfs_inode_t *ip)
2647 { 2634 {
2648 ASSERT(atomic_read(&ip->i_pincount) > 0); 2635 ASSERT(atomic_read(&ip->i_pincount) > 0);
2649 2636
2650 if (atomic_dec_and_test(&ip->i_pincount)) 2637 if (atomic_dec_and_test(&ip->i_pincount))
2651 wake_up(&ip->i_ipin_wait); 2638 wake_up(&ip->i_ipin_wait);
2652 } 2639 }
2653 2640
2654 /* 2641 /*
2655 * This is called to unpin an inode. It can be directed to wait or to return 2642 * This is called to unpin an inode. It can be directed to wait or to return
2656 * immediately without waiting for the inode to be unpinned. The caller must 2643 * immediately without waiting for the inode to be unpinned. The caller must
2657 * have the inode locked in at least shared mode so that the buffer cannot be 2644 * have the inode locked in at least shared mode so that the buffer cannot be
2658 * subsequently pinned once someone is waiting for it to be unpinned. 2645 * subsequently pinned once someone is waiting for it to be unpinned.
2659 */ 2646 */
2660 STATIC void 2647 STATIC void
2661 __xfs_iunpin_wait( 2648 __xfs_iunpin_wait(
2662 xfs_inode_t *ip, 2649 xfs_inode_t *ip,
2663 int wait) 2650 int wait)
2664 { 2651 {
2665 xfs_inode_log_item_t *iip = ip->i_itemp; 2652 xfs_inode_log_item_t *iip = ip->i_itemp;
2666 2653
2667 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)); 2654 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
2668 if (atomic_read(&ip->i_pincount) == 0) 2655 if (atomic_read(&ip->i_pincount) == 0)
2669 return; 2656 return;
2670 2657
2671 /* Give the log a push to start the unpinning I/O */ 2658 /* Give the log a push to start the unpinning I/O */
2672 xfs_log_force(ip->i_mount, (iip && iip->ili_last_lsn) ? 2659 xfs_log_force(ip->i_mount, (iip && iip->ili_last_lsn) ?
2673 iip->ili_last_lsn : 0, XFS_LOG_FORCE); 2660 iip->ili_last_lsn : 0, XFS_LOG_FORCE);
2674 if (wait) 2661 if (wait)
2675 wait_event(ip->i_ipin_wait, (atomic_read(&ip->i_pincount) == 0)); 2662 wait_event(ip->i_ipin_wait, (atomic_read(&ip->i_pincount) == 0));
2676 } 2663 }
2677 2664
2678 static inline void 2665 static inline void
2679 xfs_iunpin_wait( 2666 xfs_iunpin_wait(
2680 xfs_inode_t *ip) 2667 xfs_inode_t *ip)
2681 { 2668 {
2682 __xfs_iunpin_wait(ip, 1); 2669 __xfs_iunpin_wait(ip, 1);
2683 } 2670 }
2684 2671
2685 static inline void 2672 static inline void
2686 xfs_iunpin_nowait( 2673 xfs_iunpin_nowait(
2687 xfs_inode_t *ip) 2674 xfs_inode_t *ip)
2688 { 2675 {
2689 __xfs_iunpin_wait(ip, 0); 2676 __xfs_iunpin_wait(ip, 0);
2690 } 2677 }
2691 2678
2692 2679
2693 /* 2680 /*
2694 * xfs_iextents_copy() 2681 * xfs_iextents_copy()
2695 * 2682 *
2696 * This is called to copy the REAL extents (as opposed to the delayed 2683 * This is called to copy the REAL extents (as opposed to the delayed
2697 * allocation extents) from the inode into the given buffer. It 2684 * allocation extents) from the inode into the given buffer. It
2698 * returns the number of bytes copied into the buffer. 2685 * returns the number of bytes copied into the buffer.
2699 * 2686 *
2700 * If there are no delayed allocation extents, then we can just 2687 * If there are no delayed allocation extents, then we can just
2701 * memcpy() the extents into the buffer. Otherwise, we need to 2688 * memcpy() the extents into the buffer. Otherwise, we need to
2702 * examine each extent in turn and skip those which are delayed. 2689 * examine each extent in turn and skip those which are delayed.
2703 */ 2690 */
2704 int 2691 int
2705 xfs_iextents_copy( 2692 xfs_iextents_copy(
2706 xfs_inode_t *ip, 2693 xfs_inode_t *ip,
2707 xfs_bmbt_rec_t *dp, 2694 xfs_bmbt_rec_t *dp,
2708 int whichfork) 2695 int whichfork)
2709 { 2696 {
2710 int copied; 2697 int copied;
2711 int i; 2698 int i;
2712 xfs_ifork_t *ifp; 2699 xfs_ifork_t *ifp;
2713 int nrecs; 2700 int nrecs;
2714 xfs_fsblock_t start_block; 2701 xfs_fsblock_t start_block;
2715 2702
2716 ifp = XFS_IFORK_PTR(ip, whichfork); 2703 ifp = XFS_IFORK_PTR(ip, whichfork);
2717 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)); 2704 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
2718 ASSERT(ifp->if_bytes > 0); 2705 ASSERT(ifp->if_bytes > 0);
2719 2706
2720 nrecs = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); 2707 nrecs = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
2721 XFS_BMAP_TRACE_EXLIST(ip, nrecs, whichfork); 2708 XFS_BMAP_TRACE_EXLIST(ip, nrecs, whichfork);
2722 ASSERT(nrecs > 0); 2709 ASSERT(nrecs > 0);
2723 2710
2724 /* 2711 /*
2725 * There are some delayed allocation extents in the 2712 * There are some delayed allocation extents in the
2726 * inode, so copy the extents one at a time and skip 2713 * inode, so copy the extents one at a time and skip
2727 * the delayed ones. There must be at least one 2714 * the delayed ones. There must be at least one
2728 * non-delayed extent. 2715 * non-delayed extent.
2729 */ 2716 */
2730 copied = 0; 2717 copied = 0;
2731 for (i = 0; i < nrecs; i++) { 2718 for (i = 0; i < nrecs; i++) {
2732 xfs_bmbt_rec_host_t *ep = xfs_iext_get_ext(ifp, i); 2719 xfs_bmbt_rec_host_t *ep = xfs_iext_get_ext(ifp, i);
2733 start_block = xfs_bmbt_get_startblock(ep); 2720 start_block = xfs_bmbt_get_startblock(ep);
2734 if (ISNULLSTARTBLOCK(start_block)) { 2721 if (ISNULLSTARTBLOCK(start_block)) {
2735 /* 2722 /*
2736 * It's a delayed allocation extent, so skip it. 2723 * It's a delayed allocation extent, so skip it.
2737 */ 2724 */
2738 continue; 2725 continue;
2739 } 2726 }
2740 2727
2741 /* Translate to on disk format */ 2728 /* Translate to on disk format */
2742 put_unaligned(cpu_to_be64(ep->l0), &dp->l0); 2729 put_unaligned(cpu_to_be64(ep->l0), &dp->l0);
2743 put_unaligned(cpu_to_be64(ep->l1), &dp->l1); 2730 put_unaligned(cpu_to_be64(ep->l1), &dp->l1);
2744 dp++; 2731 dp++;
2745 copied++; 2732 copied++;
2746 } 2733 }
2747 ASSERT(copied != 0); 2734 ASSERT(copied != 0);
2748 xfs_validate_extents(ifp, copied, XFS_EXTFMT_INODE(ip)); 2735 xfs_validate_extents(ifp, copied, XFS_EXTFMT_INODE(ip));
2749 2736
2750 return (copied * (uint)sizeof(xfs_bmbt_rec_t)); 2737 return (copied * (uint)sizeof(xfs_bmbt_rec_t));
2751 } 2738 }
2752 2739
2753 /* 2740 /*
2754 * Each of the following cases stores data into the same region 2741 * Each of the following cases stores data into the same region
2755 * of the on-disk inode, so only one of them can be valid at 2742 * of the on-disk inode, so only one of them can be valid at
2756 * any given time. While it is possible to have conflicting formats 2743 * any given time. While it is possible to have conflicting formats
2757 * and log flags, e.g. having XFS_ILOG_?DATA set when the fork is 2744 * and log flags, e.g. having XFS_ILOG_?DATA set when the fork is
2758 * in EXTENTS format, this can only happen when the fork has 2745 * in EXTENTS format, this can only happen when the fork has
2759 * changed formats after being modified but before being flushed. 2746 * changed formats after being modified but before being flushed.
2760 * In these cases, the format always takes precedence, because the 2747 * In these cases, the format always takes precedence, because the
2761 * format indicates the current state of the fork. 2748 * format indicates the current state of the fork.
2762 */ 2749 */
2763 /*ARGSUSED*/ 2750 /*ARGSUSED*/
2764 STATIC void 2751 STATIC void
2765 xfs_iflush_fork( 2752 xfs_iflush_fork(
2766 xfs_inode_t *ip, 2753 xfs_inode_t *ip,
2767 xfs_dinode_t *dip, 2754 xfs_dinode_t *dip,
2768 xfs_inode_log_item_t *iip, 2755 xfs_inode_log_item_t *iip,
2769 int whichfork, 2756 int whichfork,
2770 xfs_buf_t *bp) 2757 xfs_buf_t *bp)
2771 { 2758 {
2772 char *cp; 2759 char *cp;
2773 xfs_ifork_t *ifp; 2760 xfs_ifork_t *ifp;
2774 xfs_mount_t *mp; 2761 xfs_mount_t *mp;
2775 #ifdef XFS_TRANS_DEBUG 2762 #ifdef XFS_TRANS_DEBUG
2776 int first; 2763 int first;
2777 #endif 2764 #endif
2778 static const short brootflag[2] = 2765 static const short brootflag[2] =
2779 { XFS_ILOG_DBROOT, XFS_ILOG_ABROOT }; 2766 { XFS_ILOG_DBROOT, XFS_ILOG_ABROOT };
2780 static const short dataflag[2] = 2767 static const short dataflag[2] =
2781 { XFS_ILOG_DDATA, XFS_ILOG_ADATA }; 2768 { XFS_ILOG_DDATA, XFS_ILOG_ADATA };
2782 static const short extflag[2] = 2769 static const short extflag[2] =
2783 { XFS_ILOG_DEXT, XFS_ILOG_AEXT }; 2770 { XFS_ILOG_DEXT, XFS_ILOG_AEXT };
2784 2771
2785 if (!iip) 2772 if (!iip)
2786 return; 2773 return;
2787 ifp = XFS_IFORK_PTR(ip, whichfork); 2774 ifp = XFS_IFORK_PTR(ip, whichfork);
2788 /* 2775 /*
2789 * This can happen if we gave up in iformat in an error path, 2776 * This can happen if we gave up in iformat in an error path,
2790 * for the attribute fork. 2777 * for the attribute fork.
2791 */ 2778 */
2792 if (!ifp) { 2779 if (!ifp) {
2793 ASSERT(whichfork == XFS_ATTR_FORK); 2780 ASSERT(whichfork == XFS_ATTR_FORK);
2794 return; 2781 return;
2795 } 2782 }
2796 cp = XFS_DFORK_PTR(dip, whichfork); 2783 cp = XFS_DFORK_PTR(dip, whichfork);
2797 mp = ip->i_mount; 2784 mp = ip->i_mount;
2798 switch (XFS_IFORK_FORMAT(ip, whichfork)) { 2785 switch (XFS_IFORK_FORMAT(ip, whichfork)) {
2799 case XFS_DINODE_FMT_LOCAL: 2786 case XFS_DINODE_FMT_LOCAL:
2800 if ((iip->ili_format.ilf_fields & dataflag[whichfork]) && 2787 if ((iip->ili_format.ilf_fields & dataflag[whichfork]) &&
2801 (ifp->if_bytes > 0)) { 2788 (ifp->if_bytes > 0)) {
2802 ASSERT(ifp->if_u1.if_data != NULL); 2789 ASSERT(ifp->if_u1.if_data != NULL);
2803 ASSERT(ifp->if_bytes <= XFS_IFORK_SIZE(ip, whichfork)); 2790 ASSERT(ifp->if_bytes <= XFS_IFORK_SIZE(ip, whichfork));
2804 memcpy(cp, ifp->if_u1.if_data, ifp->if_bytes); 2791 memcpy(cp, ifp->if_u1.if_data, ifp->if_bytes);
2805 } 2792 }
2806 break; 2793 break;
2807 2794
2808 case XFS_DINODE_FMT_EXTENTS: 2795 case XFS_DINODE_FMT_EXTENTS:
2809 ASSERT((ifp->if_flags & XFS_IFEXTENTS) || 2796 ASSERT((ifp->if_flags & XFS_IFEXTENTS) ||
2810 !(iip->ili_format.ilf_fields & extflag[whichfork])); 2797 !(iip->ili_format.ilf_fields & extflag[whichfork]));
2811 ASSERT((xfs_iext_get_ext(ifp, 0) != NULL) || 2798 ASSERT((xfs_iext_get_ext(ifp, 0) != NULL) ||
2812 (ifp->if_bytes == 0)); 2799 (ifp->if_bytes == 0));
2813 ASSERT((xfs_iext_get_ext(ifp, 0) == NULL) || 2800 ASSERT((xfs_iext_get_ext(ifp, 0) == NULL) ||
2814 (ifp->if_bytes > 0)); 2801 (ifp->if_bytes > 0));
2815 if ((iip->ili_format.ilf_fields & extflag[whichfork]) && 2802 if ((iip->ili_format.ilf_fields & extflag[whichfork]) &&
2816 (ifp->if_bytes > 0)) { 2803 (ifp->if_bytes > 0)) {
2817 ASSERT(XFS_IFORK_NEXTENTS(ip, whichfork) > 0); 2804 ASSERT(XFS_IFORK_NEXTENTS(ip, whichfork) > 0);
2818 (void)xfs_iextents_copy(ip, (xfs_bmbt_rec_t *)cp, 2805 (void)xfs_iextents_copy(ip, (xfs_bmbt_rec_t *)cp,
2819 whichfork); 2806 whichfork);
2820 } 2807 }
2821 break; 2808 break;
2822 2809
2823 case XFS_DINODE_FMT_BTREE: 2810 case XFS_DINODE_FMT_BTREE:
2824 if ((iip->ili_format.ilf_fields & brootflag[whichfork]) && 2811 if ((iip->ili_format.ilf_fields & brootflag[whichfork]) &&
2825 (ifp->if_broot_bytes > 0)) { 2812 (ifp->if_broot_bytes > 0)) {
2826 ASSERT(ifp->if_broot != NULL); 2813 ASSERT(ifp->if_broot != NULL);
2827 ASSERT(ifp->if_broot_bytes <= 2814 ASSERT(ifp->if_broot_bytes <=
2828 (XFS_IFORK_SIZE(ip, whichfork) + 2815 (XFS_IFORK_SIZE(ip, whichfork) +
2829 XFS_BROOT_SIZE_ADJ)); 2816 XFS_BROOT_SIZE_ADJ));
2830 xfs_bmbt_to_bmdr(mp, ifp->if_broot, ifp->if_broot_bytes, 2817 xfs_bmbt_to_bmdr(mp, ifp->if_broot, ifp->if_broot_bytes,
2831 (xfs_bmdr_block_t *)cp, 2818 (xfs_bmdr_block_t *)cp,
2832 XFS_DFORK_SIZE(dip, mp, whichfork)); 2819 XFS_DFORK_SIZE(dip, mp, whichfork));
2833 } 2820 }
2834 break; 2821 break;
2835 2822
2836 case XFS_DINODE_FMT_DEV: 2823 case XFS_DINODE_FMT_DEV:
2837 if (iip->ili_format.ilf_fields & XFS_ILOG_DEV) { 2824 if (iip->ili_format.ilf_fields & XFS_ILOG_DEV) {
2838 ASSERT(whichfork == XFS_DATA_FORK); 2825 ASSERT(whichfork == XFS_DATA_FORK);
2839 xfs_dinode_put_rdev(dip, ip->i_df.if_u2.if_rdev); 2826 xfs_dinode_put_rdev(dip, ip->i_df.if_u2.if_rdev);
2840 } 2827 }
2841 break; 2828 break;
2842 2829
2843 case XFS_DINODE_FMT_UUID: 2830 case XFS_DINODE_FMT_UUID:
2844 if (iip->ili_format.ilf_fields & XFS_ILOG_UUID) { 2831 if (iip->ili_format.ilf_fields & XFS_ILOG_UUID) {
2845 ASSERT(whichfork == XFS_DATA_FORK); 2832 ASSERT(whichfork == XFS_DATA_FORK);
2846 memcpy(XFS_DFORK_DPTR(dip), 2833 memcpy(XFS_DFORK_DPTR(dip),
2847 &ip->i_df.if_u2.if_uuid, 2834 &ip->i_df.if_u2.if_uuid,
2848 sizeof(uuid_t)); 2835 sizeof(uuid_t));
2849 } 2836 }
2850 break; 2837 break;
2851 2838
2852 default: 2839 default:
2853 ASSERT(0); 2840 ASSERT(0);
2854 break; 2841 break;
2855 } 2842 }
2856 } 2843 }
2857 2844
2858 STATIC int 2845 STATIC int
2859 xfs_iflush_cluster( 2846 xfs_iflush_cluster(
2860 xfs_inode_t *ip, 2847 xfs_inode_t *ip,
2861 xfs_buf_t *bp) 2848 xfs_buf_t *bp)
2862 { 2849 {
2863 xfs_mount_t *mp = ip->i_mount; 2850 xfs_mount_t *mp = ip->i_mount;
2864 xfs_perag_t *pag = xfs_get_perag(mp, ip->i_ino); 2851 xfs_perag_t *pag = xfs_get_perag(mp, ip->i_ino);
2865 unsigned long first_index, mask; 2852 unsigned long first_index, mask;
2866 unsigned long inodes_per_cluster; 2853 unsigned long inodes_per_cluster;
2867 int ilist_size; 2854 int ilist_size;
2868 xfs_inode_t **ilist; 2855 xfs_inode_t **ilist;
2869 xfs_inode_t *iq; 2856 xfs_inode_t *iq;
2870 int nr_found; 2857 int nr_found;
2871 int clcount = 0; 2858 int clcount = 0;
2872 int bufwasdelwri; 2859 int bufwasdelwri;
2873 int i; 2860 int i;
2874 2861
2875 ASSERT(pag->pagi_inodeok); 2862 ASSERT(pag->pagi_inodeok);
2876 ASSERT(pag->pag_ici_init); 2863 ASSERT(pag->pag_ici_init);
2877 2864
2878 inodes_per_cluster = XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog; 2865 inodes_per_cluster = XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog;
2879 ilist_size = inodes_per_cluster * sizeof(xfs_inode_t *); 2866 ilist_size = inodes_per_cluster * sizeof(xfs_inode_t *);
2880 ilist = kmem_alloc(ilist_size, KM_MAYFAIL|KM_NOFS); 2867 ilist = kmem_alloc(ilist_size, KM_MAYFAIL|KM_NOFS);
2881 if (!ilist) 2868 if (!ilist)
2882 return 0; 2869 return 0;
2883 2870
2884 mask = ~(((XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog)) - 1); 2871 mask = ~(((XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog)) - 1);
2885 first_index = XFS_INO_TO_AGINO(mp, ip->i_ino) & mask; 2872 first_index = XFS_INO_TO_AGINO(mp, ip->i_ino) & mask;
2886 read_lock(&pag->pag_ici_lock); 2873 read_lock(&pag->pag_ici_lock);
2887 /* really need a gang lookup range call here */ 2874 /* really need a gang lookup range call here */
2888 nr_found = radix_tree_gang_lookup(&pag->pag_ici_root, (void**)ilist, 2875 nr_found = radix_tree_gang_lookup(&pag->pag_ici_root, (void**)ilist,
2889 first_index, inodes_per_cluster); 2876 first_index, inodes_per_cluster);
2890 if (nr_found == 0) 2877 if (nr_found == 0)
2891 goto out_free; 2878 goto out_free;
2892 2879
2893 for (i = 0; i < nr_found; i++) { 2880 for (i = 0; i < nr_found; i++) {
2894 iq = ilist[i]; 2881 iq = ilist[i];
2895 if (iq == ip) 2882 if (iq == ip)
2896 continue; 2883 continue;
2897 /* if the inode lies outside this cluster, we're done. */ 2884 /* if the inode lies outside this cluster, we're done. */
2898 if ((XFS_INO_TO_AGINO(mp, iq->i_ino) & mask) != first_index) 2885 if ((XFS_INO_TO_AGINO(mp, iq->i_ino) & mask) != first_index)
2899 break; 2886 break;
2900 /* 2887 /*
2901 * Do an un-protected check to see if the inode is dirty and 2888 * Do an un-protected check to see if the inode is dirty and
2902 * is a candidate for flushing. These checks will be repeated 2889 * is a candidate for flushing. These checks will be repeated
2903 * later after the appropriate locks are acquired. 2890 * later after the appropriate locks are acquired.
2904 */ 2891 */
2905 if (xfs_inode_clean(iq) && xfs_ipincount(iq) == 0) 2892 if (xfs_inode_clean(iq) && xfs_ipincount(iq) == 0)
2906 continue; 2893 continue;
2907 2894
2908 /* 2895 /*
2909 * Try to get locks. If any are unavailable or it is pinned, 2896 * Try to get locks. If any are unavailable or it is pinned,
2910 * then this inode cannot be flushed and is skipped. 2897 * then this inode cannot be flushed and is skipped.
2911 */ 2898 */
2912 2899
2913 if (!xfs_ilock_nowait(iq, XFS_ILOCK_SHARED)) 2900 if (!xfs_ilock_nowait(iq, XFS_ILOCK_SHARED))
2914 continue; 2901 continue;
2915 if (!xfs_iflock_nowait(iq)) { 2902 if (!xfs_iflock_nowait(iq)) {
2916 xfs_iunlock(iq, XFS_ILOCK_SHARED); 2903 xfs_iunlock(iq, XFS_ILOCK_SHARED);
2917 continue; 2904 continue;
2918 } 2905 }
2919 if (xfs_ipincount(iq)) { 2906 if (xfs_ipincount(iq)) {
2920 xfs_ifunlock(iq); 2907 xfs_ifunlock(iq);
2921 xfs_iunlock(iq, XFS_ILOCK_SHARED); 2908 xfs_iunlock(iq, XFS_ILOCK_SHARED);
2922 continue; 2909 continue;
2923 } 2910 }
2924 2911
2925 /* 2912 /*
2926 * arriving here means that this inode can be flushed. First 2913 * arriving here means that this inode can be flushed. First
2927 * re-check that it's dirty before flushing. 2914 * re-check that it's dirty before flushing.
2928 */ 2915 */
2929 if (!xfs_inode_clean(iq)) { 2916 if (!xfs_inode_clean(iq)) {
2930 int error; 2917 int error;
2931 error = xfs_iflush_int(iq, bp); 2918 error = xfs_iflush_int(iq, bp);
2932 if (error) { 2919 if (error) {
2933 xfs_iunlock(iq, XFS_ILOCK_SHARED); 2920 xfs_iunlock(iq, XFS_ILOCK_SHARED);
2934 goto cluster_corrupt_out; 2921 goto cluster_corrupt_out;
2935 } 2922 }
2936 clcount++; 2923 clcount++;
2937 } else { 2924 } else {
2938 xfs_ifunlock(iq); 2925 xfs_ifunlock(iq);
2939 } 2926 }
2940 xfs_iunlock(iq, XFS_ILOCK_SHARED); 2927 xfs_iunlock(iq, XFS_ILOCK_SHARED);
2941 } 2928 }
2942 2929
2943 if (clcount) { 2930 if (clcount) {
2944 XFS_STATS_INC(xs_icluster_flushcnt); 2931 XFS_STATS_INC(xs_icluster_flushcnt);
2945 XFS_STATS_ADD(xs_icluster_flushinode, clcount); 2932 XFS_STATS_ADD(xs_icluster_flushinode, clcount);
2946 } 2933 }
2947 2934
2948 out_free: 2935 out_free:
2949 read_unlock(&pag->pag_ici_lock); 2936 read_unlock(&pag->pag_ici_lock);
2950 kmem_free(ilist); 2937 kmem_free(ilist);
2951 return 0; 2938 return 0;
2952 2939
2953 2940
2954 cluster_corrupt_out: 2941 cluster_corrupt_out:
2955 /* 2942 /*
2956 * Corruption detected in the clustering loop. Invalidate the 2943 * Corruption detected in the clustering loop. Invalidate the
2957 * inode buffer and shut down the filesystem. 2944 * inode buffer and shut down the filesystem.
2958 */ 2945 */
2959 read_unlock(&pag->pag_ici_lock); 2946 read_unlock(&pag->pag_ici_lock);
2960 /* 2947 /*
2961 * Clean up the buffer. If it was B_DELWRI, just release it -- 2948 * Clean up the buffer. If it was B_DELWRI, just release it --
2962 * brelse can handle it with no problems. If not, shut down the 2949 * brelse can handle it with no problems. If not, shut down the
2963 * filesystem before releasing the buffer. 2950 * filesystem before releasing the buffer.
2964 */ 2951 */
2965 bufwasdelwri = XFS_BUF_ISDELAYWRITE(bp); 2952 bufwasdelwri = XFS_BUF_ISDELAYWRITE(bp);
2966 if (bufwasdelwri) 2953 if (bufwasdelwri)
2967 xfs_buf_relse(bp); 2954 xfs_buf_relse(bp);
2968 2955
2969 xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); 2956 xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
2970 2957
2971 if (!bufwasdelwri) { 2958 if (!bufwasdelwri) {
2972 /* 2959 /*
2973 * Just like incore_relse: if we have b_iodone functions, 2960 * Just like incore_relse: if we have b_iodone functions,
2974 * mark the buffer as an error and call them. Otherwise 2961 * mark the buffer as an error and call them. Otherwise
2975 * mark it as stale and brelse. 2962 * mark it as stale and brelse.
2976 */ 2963 */
2977 if (XFS_BUF_IODONE_FUNC(bp)) { 2964 if (XFS_BUF_IODONE_FUNC(bp)) {
2978 XFS_BUF_CLR_BDSTRAT_FUNC(bp); 2965 XFS_BUF_CLR_BDSTRAT_FUNC(bp);
2979 XFS_BUF_UNDONE(bp); 2966 XFS_BUF_UNDONE(bp);
2980 XFS_BUF_STALE(bp); 2967 XFS_BUF_STALE(bp);
2981 XFS_BUF_SHUT(bp); 2968 XFS_BUF_SHUT(bp);
2982 XFS_BUF_ERROR(bp,EIO); 2969 XFS_BUF_ERROR(bp,EIO);
2983 xfs_biodone(bp); 2970 xfs_biodone(bp);
2984 } else { 2971 } else {
2985 XFS_BUF_STALE(bp); 2972 XFS_BUF_STALE(bp);
2986 xfs_buf_relse(bp); 2973 xfs_buf_relse(bp);
2987 } 2974 }
2988 } 2975 }
2989 2976
2990 /* 2977 /*
2991 * Unlocks the flush lock 2978 * Unlocks the flush lock
2992 */ 2979 */
2993 xfs_iflush_abort(iq); 2980 xfs_iflush_abort(iq);
2994 kmem_free(ilist); 2981 kmem_free(ilist);
2995 return XFS_ERROR(EFSCORRUPTED); 2982 return XFS_ERROR(EFSCORRUPTED);
2996 } 2983 }
2997 2984
2998 /* 2985 /*
2999 * xfs_iflush() will write a modified inode's changes out to the 2986 * xfs_iflush() will write a modified inode's changes out to the
3000 * inode's on disk home. The caller must have the inode lock held 2987 * inode's on disk home. The caller must have the inode lock held
3001 * in at least shared mode and the inode flush completion must be 2988 * in at least shared mode and the inode flush completion must be
3002 * active as well. The inode lock will still be held upon return from 2989 * active as well. The inode lock will still be held upon return from
3003 * the call and the caller is free to unlock it. 2990 * the call and the caller is free to unlock it.
3004 * The inode flush will be completed when the inode reaches the disk. 2991 * The inode flush will be completed when the inode reaches the disk.
3005 * The flags indicate how the inode's buffer should be written out. 2992 * The flags indicate how the inode's buffer should be written out.
3006 */ 2993 */
3007 int 2994 int
3008 xfs_iflush( 2995 xfs_iflush(
3009 xfs_inode_t *ip, 2996 xfs_inode_t *ip,
3010 uint flags) 2997 uint flags)
3011 { 2998 {
3012 xfs_inode_log_item_t *iip; 2999 xfs_inode_log_item_t *iip;
3013 xfs_buf_t *bp; 3000 xfs_buf_t *bp;
3014 xfs_dinode_t *dip; 3001 xfs_dinode_t *dip;
3015 xfs_mount_t *mp; 3002 xfs_mount_t *mp;
3016 int error; 3003 int error;
3017 int noblock = (flags == XFS_IFLUSH_ASYNC_NOBLOCK); 3004 int noblock = (flags == XFS_IFLUSH_ASYNC_NOBLOCK);
3018 enum { INT_DELWRI = (1 << 0), INT_ASYNC = (1 << 1) }; 3005 enum { INT_DELWRI = (1 << 0), INT_ASYNC = (1 << 1) };
3019 3006
3020 XFS_STATS_INC(xs_iflush_count); 3007 XFS_STATS_INC(xs_iflush_count);
3021 3008
3022 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)); 3009 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
3023 ASSERT(!completion_done(&ip->i_flush)); 3010 ASSERT(!completion_done(&ip->i_flush));
3024 ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE || 3011 ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE ||
3025 ip->i_d.di_nextents > ip->i_df.if_ext_max); 3012 ip->i_d.di_nextents > ip->i_df.if_ext_max);
3026 3013
3027 iip = ip->i_itemp; 3014 iip = ip->i_itemp;
3028 mp = ip->i_mount; 3015 mp = ip->i_mount;
3029 3016
3030 /* 3017 /*
3031 * If the inode isn't dirty, then just release the inode 3018 * If the inode isn't dirty, then just release the inode
3032 * flush lock and do nothing. 3019 * flush lock and do nothing.
3033 */ 3020 */
3034 if (xfs_inode_clean(ip)) { 3021 if (xfs_inode_clean(ip)) {
3035 xfs_ifunlock(ip); 3022 xfs_ifunlock(ip);
3036 return 0; 3023 return 0;
3037 } 3024 }
3038 3025
3039 /* 3026 /*
3040 * We can't flush the inode until it is unpinned, so wait for it if we 3027 * We can't flush the inode until it is unpinned, so wait for it if we
3041 * are allowed to block. We know noone new can pin it, because we are 3028 * are allowed to block. We know noone new can pin it, because we are
3042 * holding the inode lock shared and you need to hold it exclusively to 3029 * holding the inode lock shared and you need to hold it exclusively to
3043 * pin the inode. 3030 * pin the inode.
3044 * 3031 *
3045 * If we are not allowed to block, force the log out asynchronously so 3032 * If we are not allowed to block, force the log out asynchronously so
3046 * that when we come back the inode will be unpinned. If other inodes 3033 * that when we come back the inode will be unpinned. If other inodes
3047 * in the same cluster are dirty, they will probably write the inode 3034 * in the same cluster are dirty, they will probably write the inode
3048 * out for us if they occur after the log force completes. 3035 * out for us if they occur after the log force completes.
3049 */ 3036 */
3050 if (noblock && xfs_ipincount(ip)) { 3037 if (noblock && xfs_ipincount(ip)) {
3051 xfs_iunpin_nowait(ip); 3038 xfs_iunpin_nowait(ip);
3052 xfs_ifunlock(ip); 3039 xfs_ifunlock(ip);
3053 return EAGAIN; 3040 return EAGAIN;
3054 } 3041 }
3055 xfs_iunpin_wait(ip); 3042 xfs_iunpin_wait(ip);
3056 3043
3057 /* 3044 /*
3058 * This may have been unpinned because the filesystem is shutting 3045 * This may have been unpinned because the filesystem is shutting
3059 * down forcibly. If that's the case we must not write this inode 3046 * down forcibly. If that's the case we must not write this inode
3060 * to disk, because the log record didn't make it to disk! 3047 * to disk, because the log record didn't make it to disk!
3061 */ 3048 */
3062 if (XFS_FORCED_SHUTDOWN(mp)) { 3049 if (XFS_FORCED_SHUTDOWN(mp)) {
3063 ip->i_update_core = 0; 3050 ip->i_update_core = 0;
3064 if (iip) 3051 if (iip)
3065 iip->ili_format.ilf_fields = 0; 3052 iip->ili_format.ilf_fields = 0;
3066 xfs_ifunlock(ip); 3053 xfs_ifunlock(ip);
3067 return XFS_ERROR(EIO); 3054 return XFS_ERROR(EIO);
3068 } 3055 }
3069 3056
3070 /* 3057 /*
3071 * Decide how buffer will be flushed out. This is done before 3058 * Decide how buffer will be flushed out. This is done before
3072 * the call to xfs_iflush_int because this field is zeroed by it. 3059 * the call to xfs_iflush_int because this field is zeroed by it.
3073 */ 3060 */
3074 if (iip != NULL && iip->ili_format.ilf_fields != 0) { 3061 if (iip != NULL && iip->ili_format.ilf_fields != 0) {
3075 /* 3062 /*
3076 * Flush out the inode buffer according to the directions 3063 * Flush out the inode buffer according to the directions
3077 * of the caller. In the cases where the caller has given 3064 * of the caller. In the cases where the caller has given
3078 * us a choice choose the non-delwri case. This is because 3065 * us a choice choose the non-delwri case. This is because
3079 * the inode is in the AIL and we need to get it out soon. 3066 * the inode is in the AIL and we need to get it out soon.
3080 */ 3067 */
3081 switch (flags) { 3068 switch (flags) {
3082 case XFS_IFLUSH_SYNC: 3069 case XFS_IFLUSH_SYNC:
3083 case XFS_IFLUSH_DELWRI_ELSE_SYNC: 3070 case XFS_IFLUSH_DELWRI_ELSE_SYNC:
3084 flags = 0; 3071 flags = 0;
3085 break; 3072 break;
3086 case XFS_IFLUSH_ASYNC_NOBLOCK: 3073 case XFS_IFLUSH_ASYNC_NOBLOCK:
3087 case XFS_IFLUSH_ASYNC: 3074 case XFS_IFLUSH_ASYNC:
3088 case XFS_IFLUSH_DELWRI_ELSE_ASYNC: 3075 case XFS_IFLUSH_DELWRI_ELSE_ASYNC:
3089 flags = INT_ASYNC; 3076 flags = INT_ASYNC;
3090 break; 3077 break;
3091 case XFS_IFLUSH_DELWRI: 3078 case XFS_IFLUSH_DELWRI:
3092 flags = INT_DELWRI; 3079 flags = INT_DELWRI;
3093 break; 3080 break;
3094 default: 3081 default:
3095 ASSERT(0); 3082 ASSERT(0);
3096 flags = 0; 3083 flags = 0;
3097 break; 3084 break;
3098 } 3085 }
3099 } else { 3086 } else {
3100 switch (flags) { 3087 switch (flags) {
3101 case XFS_IFLUSH_DELWRI_ELSE_SYNC: 3088 case XFS_IFLUSH_DELWRI_ELSE_SYNC:
3102 case XFS_IFLUSH_DELWRI_ELSE_ASYNC: 3089 case XFS_IFLUSH_DELWRI_ELSE_ASYNC:
3103 case XFS_IFLUSH_DELWRI: 3090 case XFS_IFLUSH_DELWRI:
3104 flags = INT_DELWRI; 3091 flags = INT_DELWRI;
3105 break; 3092 break;
3106 case XFS_IFLUSH_ASYNC_NOBLOCK: 3093 case XFS_IFLUSH_ASYNC_NOBLOCK:
3107 case XFS_IFLUSH_ASYNC: 3094 case XFS_IFLUSH_ASYNC:
3108 flags = INT_ASYNC; 3095 flags = INT_ASYNC;
3109 break; 3096 break;
3110 case XFS_IFLUSH_SYNC: 3097 case XFS_IFLUSH_SYNC:
3111 flags = 0; 3098 flags = 0;
3112 break; 3099 break;
3113 default: 3100 default:
3114 ASSERT(0); 3101 ASSERT(0);
3115 flags = 0; 3102 flags = 0;
3116 break; 3103 break;
3117 } 3104 }
3118 } 3105 }
3119 3106
3120 /* 3107 /*
3121 * Get the buffer containing the on-disk inode. 3108 * Get the buffer containing the on-disk inode.
3122 */ 3109 */
3123 error = xfs_itobp(mp, NULL, ip, &dip, &bp, 3110 error = xfs_itobp(mp, NULL, ip, &dip, &bp,
3124 noblock ? XFS_BUF_TRYLOCK : XFS_BUF_LOCK); 3111 noblock ? XFS_BUF_TRYLOCK : XFS_BUF_LOCK);
3125 if (error || !bp) { 3112 if (error || !bp) {
3126 xfs_ifunlock(ip); 3113 xfs_ifunlock(ip);
3127 return error; 3114 return error;
3128 } 3115 }
3129 3116
3130 /* 3117 /*
3131 * First flush out the inode that xfs_iflush was called with. 3118 * First flush out the inode that xfs_iflush was called with.
3132 */ 3119 */
3133 error = xfs_iflush_int(ip, bp); 3120 error = xfs_iflush_int(ip, bp);
3134 if (error) 3121 if (error)
3135 goto corrupt_out; 3122 goto corrupt_out;
3136 3123
3137 /* 3124 /*
3138 * If the buffer is pinned then push on the log now so we won't 3125 * If the buffer is pinned then push on the log now so we won't
3139 * get stuck waiting in the write for too long. 3126 * get stuck waiting in the write for too long.
3140 */ 3127 */
3141 if (XFS_BUF_ISPINNED(bp)) 3128 if (XFS_BUF_ISPINNED(bp))
3142 xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE); 3129 xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE);
3143 3130
3144 /* 3131 /*
3145 * inode clustering: 3132 * inode clustering:
3146 * see if other inodes can be gathered into this write 3133 * see if other inodes can be gathered into this write
3147 */ 3134 */
3148 error = xfs_iflush_cluster(ip, bp); 3135 error = xfs_iflush_cluster(ip, bp);
3149 if (error) 3136 if (error)
3150 goto cluster_corrupt_out; 3137 goto cluster_corrupt_out;
3151 3138
3152 if (flags & INT_DELWRI) { 3139 if (flags & INT_DELWRI) {
3153 xfs_bdwrite(mp, bp); 3140 xfs_bdwrite(mp, bp);
3154 } else if (flags & INT_ASYNC) { 3141 } else if (flags & INT_ASYNC) {
3155 error = xfs_bawrite(mp, bp); 3142 error = xfs_bawrite(mp, bp);
3156 } else { 3143 } else {
3157 error = xfs_bwrite(mp, bp); 3144 error = xfs_bwrite(mp, bp);
3158 } 3145 }
3159 return error; 3146 return error;
3160 3147
3161 corrupt_out: 3148 corrupt_out:
3162 xfs_buf_relse(bp); 3149 xfs_buf_relse(bp);
3163 xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); 3150 xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
3164 cluster_corrupt_out: 3151 cluster_corrupt_out:
3165 /* 3152 /*
3166 * Unlocks the flush lock 3153 * Unlocks the flush lock
3167 */ 3154 */
3168 xfs_iflush_abort(ip); 3155 xfs_iflush_abort(ip);
3169 return XFS_ERROR(EFSCORRUPTED); 3156 return XFS_ERROR(EFSCORRUPTED);
3170 } 3157 }
3171 3158
3172 3159
3173 STATIC int 3160 STATIC int
3174 xfs_iflush_int( 3161 xfs_iflush_int(
3175 xfs_inode_t *ip, 3162 xfs_inode_t *ip,
3176 xfs_buf_t *bp) 3163 xfs_buf_t *bp)
3177 { 3164 {
3178 xfs_inode_log_item_t *iip; 3165 xfs_inode_log_item_t *iip;
3179 xfs_dinode_t *dip; 3166 xfs_dinode_t *dip;
3180 xfs_mount_t *mp; 3167 xfs_mount_t *mp;
3181 #ifdef XFS_TRANS_DEBUG 3168 #ifdef XFS_TRANS_DEBUG
3182 int first; 3169 int first;
3183 #endif 3170 #endif
3184 3171
3185 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)); 3172 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
3186 ASSERT(!completion_done(&ip->i_flush)); 3173 ASSERT(!completion_done(&ip->i_flush));
3187 ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE || 3174 ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE ||
3188 ip->i_d.di_nextents > ip->i_df.if_ext_max); 3175 ip->i_d.di_nextents > ip->i_df.if_ext_max);
3189 3176
3190 iip = ip->i_itemp; 3177 iip = ip->i_itemp;
3191 mp = ip->i_mount; 3178 mp = ip->i_mount;
3192 3179
3193 3180
3194 /* 3181 /*
3195 * If the inode isn't dirty, then just release the inode 3182 * If the inode isn't dirty, then just release the inode
3196 * flush lock and do nothing. 3183 * flush lock and do nothing.
3197 */ 3184 */
3198 if (xfs_inode_clean(ip)) { 3185 if (xfs_inode_clean(ip)) {
3199 xfs_ifunlock(ip); 3186 xfs_ifunlock(ip);
3200 return 0; 3187 return 0;
3201 } 3188 }
3202 3189
3203 /* set *dip = inode's place in the buffer */ 3190 /* set *dip = inode's place in the buffer */
3204 dip = (xfs_dinode_t *)xfs_buf_offset(bp, ip->i_boffset); 3191 dip = (xfs_dinode_t *)xfs_buf_offset(bp, ip->i_imap.im_boffset);
3205 3192
3206 /* 3193 /*
3207 * Clear i_update_core before copying out the data. 3194 * Clear i_update_core before copying out the data.
3208 * This is for coordination with our timestamp updates 3195 * This is for coordination with our timestamp updates
3209 * that don't hold the inode lock. They will always 3196 * that don't hold the inode lock. They will always
3210 * update the timestamps BEFORE setting i_update_core, 3197 * update the timestamps BEFORE setting i_update_core,
3211 * so if we clear i_update_core after they set it we 3198 * so if we clear i_update_core after they set it we
3212 * are guaranteed to see their updates to the timestamps. 3199 * are guaranteed to see their updates to the timestamps.
3213 * I believe that this depends on strongly ordered memory 3200 * I believe that this depends on strongly ordered memory
3214 * semantics, but we have that. We use the SYNCHRONIZE 3201 * semantics, but we have that. We use the SYNCHRONIZE
3215 * macro to make sure that the compiler does not reorder 3202 * macro to make sure that the compiler does not reorder
3216 * the i_update_core access below the data copy below. 3203 * the i_update_core access below the data copy below.
3217 */ 3204 */
3218 ip->i_update_core = 0; 3205 ip->i_update_core = 0;
3219 SYNCHRONIZE(); 3206 SYNCHRONIZE();
3220 3207
3221 /* 3208 /*
3222 * Make sure to get the latest atime from the Linux inode. 3209 * Make sure to get the latest atime from the Linux inode.
3223 */ 3210 */
3224 xfs_synchronize_atime(ip); 3211 xfs_synchronize_atime(ip);
3225 3212
3226 if (XFS_TEST_ERROR(be16_to_cpu(dip->di_magic) != XFS_DINODE_MAGIC, 3213 if (XFS_TEST_ERROR(be16_to_cpu(dip->di_magic) != XFS_DINODE_MAGIC,
3227 mp, XFS_ERRTAG_IFLUSH_1, XFS_RANDOM_IFLUSH_1)) { 3214 mp, XFS_ERRTAG_IFLUSH_1, XFS_RANDOM_IFLUSH_1)) {
3228 xfs_cmn_err(XFS_PTAG_IFLUSH, CE_ALERT, mp, 3215 xfs_cmn_err(XFS_PTAG_IFLUSH, CE_ALERT, mp,
3229 "xfs_iflush: Bad inode %Lu magic number 0x%x, ptr 0x%p", 3216 "xfs_iflush: Bad inode %Lu magic number 0x%x, ptr 0x%p",
3230 ip->i_ino, be16_to_cpu(dip->di_magic), dip); 3217 ip->i_ino, be16_to_cpu(dip->di_magic), dip);
3231 goto corrupt_out; 3218 goto corrupt_out;
3232 } 3219 }
3233 if (XFS_TEST_ERROR(ip->i_d.di_magic != XFS_DINODE_MAGIC, 3220 if (XFS_TEST_ERROR(ip->i_d.di_magic != XFS_DINODE_MAGIC,
3234 mp, XFS_ERRTAG_IFLUSH_2, XFS_RANDOM_IFLUSH_2)) { 3221 mp, XFS_ERRTAG_IFLUSH_2, XFS_RANDOM_IFLUSH_2)) {
3235 xfs_cmn_err(XFS_PTAG_IFLUSH, CE_ALERT, mp, 3222 xfs_cmn_err(XFS_PTAG_IFLUSH, CE_ALERT, mp,
3236 "xfs_iflush: Bad inode %Lu, ptr 0x%p, magic number 0x%x", 3223 "xfs_iflush: Bad inode %Lu, ptr 0x%p, magic number 0x%x",
3237 ip->i_ino, ip, ip->i_d.di_magic); 3224 ip->i_ino, ip, ip->i_d.di_magic);
3238 goto corrupt_out; 3225 goto corrupt_out;
3239 } 3226 }
3240 if ((ip->i_d.di_mode & S_IFMT) == S_IFREG) { 3227 if ((ip->i_d.di_mode & S_IFMT) == S_IFREG) {
3241 if (XFS_TEST_ERROR( 3228 if (XFS_TEST_ERROR(
3242 (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS) && 3229 (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS) &&
3243 (ip->i_d.di_format != XFS_DINODE_FMT_BTREE), 3230 (ip->i_d.di_format != XFS_DINODE_FMT_BTREE),
3244 mp, XFS_ERRTAG_IFLUSH_3, XFS_RANDOM_IFLUSH_3)) { 3231 mp, XFS_ERRTAG_IFLUSH_3, XFS_RANDOM_IFLUSH_3)) {
3245 xfs_cmn_err(XFS_PTAG_IFLUSH, CE_ALERT, mp, 3232 xfs_cmn_err(XFS_PTAG_IFLUSH, CE_ALERT, mp,
3246 "xfs_iflush: Bad regular inode %Lu, ptr 0x%p", 3233 "xfs_iflush: Bad regular inode %Lu, ptr 0x%p",
3247 ip->i_ino, ip); 3234 ip->i_ino, ip);
3248 goto corrupt_out; 3235 goto corrupt_out;
3249 } 3236 }
3250 } else if ((ip->i_d.di_mode & S_IFMT) == S_IFDIR) { 3237 } else if ((ip->i_d.di_mode & S_IFMT) == S_IFDIR) {
3251 if (XFS_TEST_ERROR( 3238 if (XFS_TEST_ERROR(
3252 (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS) && 3239 (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS) &&
3253 (ip->i_d.di_format != XFS_DINODE_FMT_BTREE) && 3240 (ip->i_d.di_format != XFS_DINODE_FMT_BTREE) &&
3254 (ip->i_d.di_format != XFS_DINODE_FMT_LOCAL), 3241 (ip->i_d.di_format != XFS_DINODE_FMT_LOCAL),
3255 mp, XFS_ERRTAG_IFLUSH_4, XFS_RANDOM_IFLUSH_4)) { 3242 mp, XFS_ERRTAG_IFLUSH_4, XFS_RANDOM_IFLUSH_4)) {
3256 xfs_cmn_err(XFS_PTAG_IFLUSH, CE_ALERT, mp, 3243 xfs_cmn_err(XFS_PTAG_IFLUSH, CE_ALERT, mp,
3257 "xfs_iflush: Bad directory inode %Lu, ptr 0x%p", 3244 "xfs_iflush: Bad directory inode %Lu, ptr 0x%p",
3258 ip->i_ino, ip); 3245 ip->i_ino, ip);
3259 goto corrupt_out; 3246 goto corrupt_out;
3260 } 3247 }
3261 } 3248 }
3262 if (XFS_TEST_ERROR(ip->i_d.di_nextents + ip->i_d.di_anextents > 3249 if (XFS_TEST_ERROR(ip->i_d.di_nextents + ip->i_d.di_anextents >
3263 ip->i_d.di_nblocks, mp, XFS_ERRTAG_IFLUSH_5, 3250 ip->i_d.di_nblocks, mp, XFS_ERRTAG_IFLUSH_5,
3264 XFS_RANDOM_IFLUSH_5)) { 3251 XFS_RANDOM_IFLUSH_5)) {
3265 xfs_cmn_err(XFS_PTAG_IFLUSH, CE_ALERT, mp, 3252 xfs_cmn_err(XFS_PTAG_IFLUSH, CE_ALERT, mp,
3266 "xfs_iflush: detected corrupt incore inode %Lu, total extents = %d, nblocks = %Ld, ptr 0x%p", 3253 "xfs_iflush: detected corrupt incore inode %Lu, total extents = %d, nblocks = %Ld, ptr 0x%p",
3267 ip->i_ino, 3254 ip->i_ino,
3268 ip->i_d.di_nextents + ip->i_d.di_anextents, 3255 ip->i_d.di_nextents + ip->i_d.di_anextents,
3269 ip->i_d.di_nblocks, 3256 ip->i_d.di_nblocks,
3270 ip); 3257 ip);
3271 goto corrupt_out; 3258 goto corrupt_out;
3272 } 3259 }
3273 if (XFS_TEST_ERROR(ip->i_d.di_forkoff > mp->m_sb.sb_inodesize, 3260 if (XFS_TEST_ERROR(ip->i_d.di_forkoff > mp->m_sb.sb_inodesize,
3274 mp, XFS_ERRTAG_IFLUSH_6, XFS_RANDOM_IFLUSH_6)) { 3261 mp, XFS_ERRTAG_IFLUSH_6, XFS_RANDOM_IFLUSH_6)) {
3275 xfs_cmn_err(XFS_PTAG_IFLUSH, CE_ALERT, mp, 3262 xfs_cmn_err(XFS_PTAG_IFLUSH, CE_ALERT, mp,
3276 "xfs_iflush: bad inode %Lu, forkoff 0x%x, ptr 0x%p", 3263 "xfs_iflush: bad inode %Lu, forkoff 0x%x, ptr 0x%p",
3277 ip->i_ino, ip->i_d.di_forkoff, ip); 3264 ip->i_ino, ip->i_d.di_forkoff, ip);
3278 goto corrupt_out; 3265 goto corrupt_out;
3279 } 3266 }
3280 /* 3267 /*
3281 * bump the flush iteration count, used to detect flushes which 3268 * bump the flush iteration count, used to detect flushes which
3282 * postdate a log record during recovery. 3269 * postdate a log record during recovery.
3283 */ 3270 */
3284 3271
3285 ip->i_d.di_flushiter++; 3272 ip->i_d.di_flushiter++;
3286 3273
3287 /* 3274 /*
3288 * Copy the dirty parts of the inode into the on-disk 3275 * Copy the dirty parts of the inode into the on-disk
3289 * inode. We always copy out the core of the inode, 3276 * inode. We always copy out the core of the inode,
3290 * because if the inode is dirty at all the core must 3277 * because if the inode is dirty at all the core must
3291 * be. 3278 * be.
3292 */ 3279 */
3293 xfs_dinode_to_disk(dip, &ip->i_d); 3280 xfs_dinode_to_disk(dip, &ip->i_d);
3294 3281
3295 /* Wrap, we never let the log put out DI_MAX_FLUSH */ 3282 /* Wrap, we never let the log put out DI_MAX_FLUSH */
3296 if (ip->i_d.di_flushiter == DI_MAX_FLUSH) 3283 if (ip->i_d.di_flushiter == DI_MAX_FLUSH)
3297 ip->i_d.di_flushiter = 0; 3284 ip->i_d.di_flushiter = 0;
3298 3285
3299 /* 3286 /*
3300 * If this is really an old format inode and the superblock version 3287 * If this is really an old format inode and the superblock version
3301 * has not been updated to support only new format inodes, then 3288 * has not been updated to support only new format inodes, then
3302 * convert back to the old inode format. If the superblock version 3289 * convert back to the old inode format. If the superblock version
3303 * has been updated, then make the conversion permanent. 3290 * has been updated, then make the conversion permanent.
3304 */ 3291 */
3305 ASSERT(ip->i_d.di_version == 1 || xfs_sb_version_hasnlink(&mp->m_sb)); 3292 ASSERT(ip->i_d.di_version == 1 || xfs_sb_version_hasnlink(&mp->m_sb));
3306 if (ip->i_d.di_version == 1) { 3293 if (ip->i_d.di_version == 1) {
3307 if (!xfs_sb_version_hasnlink(&mp->m_sb)) { 3294 if (!xfs_sb_version_hasnlink(&mp->m_sb)) {
3308 /* 3295 /*
3309 * Convert it back. 3296 * Convert it back.
3310 */ 3297 */
3311 ASSERT(ip->i_d.di_nlink <= XFS_MAXLINK_1); 3298 ASSERT(ip->i_d.di_nlink <= XFS_MAXLINK_1);
3312 dip->di_onlink = cpu_to_be16(ip->i_d.di_nlink); 3299 dip->di_onlink = cpu_to_be16(ip->i_d.di_nlink);
3313 } else { 3300 } else {
3314 /* 3301 /*
3315 * The superblock version has already been bumped, 3302 * The superblock version has already been bumped,
3316 * so just make the conversion to the new inode 3303 * so just make the conversion to the new inode
3317 * format permanent. 3304 * format permanent.
3318 */ 3305 */
3319 ip->i_d.di_version = 2; 3306 ip->i_d.di_version = 2;
3320 dip->di_version = 2; 3307 dip->di_version = 2;
3321 ip->i_d.di_onlink = 0; 3308 ip->i_d.di_onlink = 0;
3322 dip->di_onlink = 0; 3309 dip->di_onlink = 0;
3323 memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad)); 3310 memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad));
3324 memset(&(dip->di_pad[0]), 0, 3311 memset(&(dip->di_pad[0]), 0,
3325 sizeof(dip->di_pad)); 3312 sizeof(dip->di_pad));
3326 ASSERT(ip->i_d.di_projid == 0); 3313 ASSERT(ip->i_d.di_projid == 0);
3327 } 3314 }
3328 } 3315 }
3329 3316
3330 xfs_iflush_fork(ip, dip, iip, XFS_DATA_FORK, bp); 3317 xfs_iflush_fork(ip, dip, iip, XFS_DATA_FORK, bp);
3331 if (XFS_IFORK_Q(ip)) 3318 if (XFS_IFORK_Q(ip))
3332 xfs_iflush_fork(ip, dip, iip, XFS_ATTR_FORK, bp); 3319 xfs_iflush_fork(ip, dip, iip, XFS_ATTR_FORK, bp);
3333 xfs_inobp_check(mp, bp); 3320 xfs_inobp_check(mp, bp);
3334 3321
3335 /* 3322 /*
3336 * We've recorded everything logged in the inode, so we'd 3323 * We've recorded everything logged in the inode, so we'd
3337 * like to clear the ilf_fields bits so we don't log and 3324 * like to clear the ilf_fields bits so we don't log and
3338 * flush things unnecessarily. However, we can't stop 3325 * flush things unnecessarily. However, we can't stop
3339 * logging all this information until the data we've copied 3326 * logging all this information until the data we've copied
3340 * into the disk buffer is written to disk. If we did we might 3327 * into the disk buffer is written to disk. If we did we might
3341 * overwrite the copy of the inode in the log with all the 3328 * overwrite the copy of the inode in the log with all the
3342 * data after re-logging only part of it, and in the face of 3329 * data after re-logging only part of it, and in the face of
3343 * a crash we wouldn't have all the data we need to recover. 3330 * a crash we wouldn't have all the data we need to recover.
3344 * 3331 *
3345 * What we do is move the bits to the ili_last_fields field. 3332 * What we do is move the bits to the ili_last_fields field.
3346 * When logging the inode, these bits are moved back to the 3333 * When logging the inode, these bits are moved back to the
3347 * ilf_fields field. In the xfs_iflush_done() routine we 3334 * ilf_fields field. In the xfs_iflush_done() routine we
3348 * clear ili_last_fields, since we know that the information 3335 * clear ili_last_fields, since we know that the information
3349 * those bits represent is permanently on disk. As long as 3336 * those bits represent is permanently on disk. As long as
3350 * the flush completes before the inode is logged again, then 3337 * the flush completes before the inode is logged again, then
3351 * both ilf_fields and ili_last_fields will be cleared. 3338 * both ilf_fields and ili_last_fields will be cleared.
3352 * 3339 *
3353 * We can play with the ilf_fields bits here, because the inode 3340 * We can play with the ilf_fields bits here, because the inode
3354 * lock must be held exclusively in order to set bits there 3341 * lock must be held exclusively in order to set bits there
3355 * and the flush lock protects the ili_last_fields bits. 3342 * and the flush lock protects the ili_last_fields bits.
3356 * Set ili_logged so the flush done 3343 * Set ili_logged so the flush done
3357 * routine can tell whether or not to look in the AIL. 3344 * routine can tell whether or not to look in the AIL.
3358 * Also, store the current LSN of the inode so that we can tell 3345 * Also, store the current LSN of the inode so that we can tell
3359 * whether the item has moved in the AIL from xfs_iflush_done(). 3346 * whether the item has moved in the AIL from xfs_iflush_done().
3360 * In order to read the lsn we need the AIL lock, because 3347 * In order to read the lsn we need the AIL lock, because
3361 * it is a 64 bit value that cannot be read atomically. 3348 * it is a 64 bit value that cannot be read atomically.
3362 */ 3349 */
3363 if (iip != NULL && iip->ili_format.ilf_fields != 0) { 3350 if (iip != NULL && iip->ili_format.ilf_fields != 0) {
3364 iip->ili_last_fields = iip->ili_format.ilf_fields; 3351 iip->ili_last_fields = iip->ili_format.ilf_fields;
3365 iip->ili_format.ilf_fields = 0; 3352 iip->ili_format.ilf_fields = 0;
3366 iip->ili_logged = 1; 3353 iip->ili_logged = 1;
3367 3354
3368 xfs_trans_ail_copy_lsn(mp->m_ail, &iip->ili_flush_lsn, 3355 xfs_trans_ail_copy_lsn(mp->m_ail, &iip->ili_flush_lsn,
3369 &iip->ili_item.li_lsn); 3356 &iip->ili_item.li_lsn);
3370 3357
3371 /* 3358 /*
3372 * Attach the function xfs_iflush_done to the inode's 3359 * Attach the function xfs_iflush_done to the inode's
3373 * buffer. This will remove the inode from the AIL 3360 * buffer. This will remove the inode from the AIL
3374 * and unlock the inode's flush lock when the inode is 3361 * and unlock the inode's flush lock when the inode is
3375 * completely written to disk. 3362 * completely written to disk.
3376 */ 3363 */
3377 xfs_buf_attach_iodone(bp, (void(*)(xfs_buf_t*,xfs_log_item_t*)) 3364 xfs_buf_attach_iodone(bp, (void(*)(xfs_buf_t*,xfs_log_item_t*))
3378 xfs_iflush_done, (xfs_log_item_t *)iip); 3365 xfs_iflush_done, (xfs_log_item_t *)iip);
3379 3366
3380 ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL); 3367 ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL);
3381 ASSERT(XFS_BUF_IODONE_FUNC(bp) != NULL); 3368 ASSERT(XFS_BUF_IODONE_FUNC(bp) != NULL);
3382 } else { 3369 } else {
3383 /* 3370 /*
3384 * We're flushing an inode which is not in the AIL and has 3371 * We're flushing an inode which is not in the AIL and has
3385 * not been logged but has i_update_core set. For this 3372 * not been logged but has i_update_core set. For this
3386 * case we can use a B_DELWRI flush and immediately drop 3373 * case we can use a B_DELWRI flush and immediately drop
3387 * the inode flush lock because we can avoid the whole 3374 * the inode flush lock because we can avoid the whole
3388 * AIL state thing. It's OK to drop the flush lock now, 3375 * AIL state thing. It's OK to drop the flush lock now,
3389 * because we've already locked the buffer and to do anything 3376 * because we've already locked the buffer and to do anything
3390 * you really need both. 3377 * you really need both.
3391 */ 3378 */
3392 if (iip != NULL) { 3379 if (iip != NULL) {
3393 ASSERT(iip->ili_logged == 0); 3380 ASSERT(iip->ili_logged == 0);
3394 ASSERT(iip->ili_last_fields == 0); 3381 ASSERT(iip->ili_last_fields == 0);
3395 ASSERT((iip->ili_item.li_flags & XFS_LI_IN_AIL) == 0); 3382 ASSERT((iip->ili_item.li_flags & XFS_LI_IN_AIL) == 0);
3396 } 3383 }
3397 xfs_ifunlock(ip); 3384 xfs_ifunlock(ip);
3398 } 3385 }
3399 3386
3400 return 0; 3387 return 0;
3401 3388
3402 corrupt_out: 3389 corrupt_out:
3403 return XFS_ERROR(EFSCORRUPTED); 3390 return XFS_ERROR(EFSCORRUPTED);
3404 } 3391 }
3405 3392
3406 3393
3407 3394
3408 #ifdef XFS_ILOCK_TRACE 3395 #ifdef XFS_ILOCK_TRACE
3409 ktrace_t *xfs_ilock_trace_buf; 3396 ktrace_t *xfs_ilock_trace_buf;
3410 3397
3411 void 3398 void
3412 xfs_ilock_trace(xfs_inode_t *ip, int lock, unsigned int lockflags, inst_t *ra) 3399 xfs_ilock_trace(xfs_inode_t *ip, int lock, unsigned int lockflags, inst_t *ra)
3413 { 3400 {
3414 ktrace_enter(ip->i_lock_trace, 3401 ktrace_enter(ip->i_lock_trace,
3415 (void *)ip, 3402 (void *)ip,
3416 (void *)(unsigned long)lock, /* 1 = LOCK, 3=UNLOCK, etc */ 3403 (void *)(unsigned long)lock, /* 1 = LOCK, 3=UNLOCK, etc */
3417 (void *)(unsigned long)lockflags, /* XFS_ILOCK_EXCL etc */ 3404 (void *)(unsigned long)lockflags, /* XFS_ILOCK_EXCL etc */
3418 (void *)ra, /* caller of ilock */ 3405 (void *)ra, /* caller of ilock */
3419 (void *)(unsigned long)current_cpu(), 3406 (void *)(unsigned long)current_cpu(),
3420 (void *)(unsigned long)current_pid(), 3407 (void *)(unsigned long)current_pid(),
3421 NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL); 3408 NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL);
3422 } 3409 }
3423 #endif 3410 #endif
3424 3411
3425 /* 3412 /*
3426 * Return a pointer to the extent record at file index idx. 3413 * Return a pointer to the extent record at file index idx.
3427 */ 3414 */
3428 xfs_bmbt_rec_host_t * 3415 xfs_bmbt_rec_host_t *
3429 xfs_iext_get_ext( 3416 xfs_iext_get_ext(
3430 xfs_ifork_t *ifp, /* inode fork pointer */ 3417 xfs_ifork_t *ifp, /* inode fork pointer */
3431 xfs_extnum_t idx) /* index of target extent */ 3418 xfs_extnum_t idx) /* index of target extent */
3432 { 3419 {
3433 ASSERT(idx >= 0); 3420 ASSERT(idx >= 0);
3434 if ((ifp->if_flags & XFS_IFEXTIREC) && (idx == 0)) { 3421 if ((ifp->if_flags & XFS_IFEXTIREC) && (idx == 0)) {
3435 return ifp->if_u1.if_ext_irec->er_extbuf; 3422 return ifp->if_u1.if_ext_irec->er_extbuf;
3436 } else if (ifp->if_flags & XFS_IFEXTIREC) { 3423 } else if (ifp->if_flags & XFS_IFEXTIREC) {
3437 xfs_ext_irec_t *erp; /* irec pointer */ 3424 xfs_ext_irec_t *erp; /* irec pointer */
3438 int erp_idx = 0; /* irec index */ 3425 int erp_idx = 0; /* irec index */
3439 xfs_extnum_t page_idx = idx; /* ext index in target list */ 3426 xfs_extnum_t page_idx = idx; /* ext index in target list */
3440 3427
3441 erp = xfs_iext_idx_to_irec(ifp, &page_idx, &erp_idx, 0); 3428 erp = xfs_iext_idx_to_irec(ifp, &page_idx, &erp_idx, 0);
3442 return &erp->er_extbuf[page_idx]; 3429 return &erp->er_extbuf[page_idx];
3443 } else if (ifp->if_bytes) { 3430 } else if (ifp->if_bytes) {
3444 return &ifp->if_u1.if_extents[idx]; 3431 return &ifp->if_u1.if_extents[idx];
3445 } else { 3432 } else {
3446 return NULL; 3433 return NULL;
3447 } 3434 }
3448 } 3435 }
3449 3436
3450 /* 3437 /*
3451 * Insert new item(s) into the extent records for incore inode 3438 * Insert new item(s) into the extent records for incore inode
3452 * fork 'ifp'. 'count' new items are inserted at index 'idx'. 3439 * fork 'ifp'. 'count' new items are inserted at index 'idx'.
3453 */ 3440 */
3454 void 3441 void
3455 xfs_iext_insert( 3442 xfs_iext_insert(
3456 xfs_ifork_t *ifp, /* inode fork pointer */ 3443 xfs_ifork_t *ifp, /* inode fork pointer */
3457 xfs_extnum_t idx, /* starting index of new items */ 3444 xfs_extnum_t idx, /* starting index of new items */
3458 xfs_extnum_t count, /* number of inserted items */ 3445 xfs_extnum_t count, /* number of inserted items */
3459 xfs_bmbt_irec_t *new) /* items to insert */ 3446 xfs_bmbt_irec_t *new) /* items to insert */
3460 { 3447 {
3461 xfs_extnum_t i; /* extent record index */ 3448 xfs_extnum_t i; /* extent record index */
3462 3449
3463 ASSERT(ifp->if_flags & XFS_IFEXTENTS); 3450 ASSERT(ifp->if_flags & XFS_IFEXTENTS);
3464 xfs_iext_add(ifp, idx, count); 3451 xfs_iext_add(ifp, idx, count);
3465 for (i = idx; i < idx + count; i++, new++) 3452 for (i = idx; i < idx + count; i++, new++)
3466 xfs_bmbt_set_all(xfs_iext_get_ext(ifp, i), new); 3453 xfs_bmbt_set_all(xfs_iext_get_ext(ifp, i), new);
3467 } 3454 }
3468 3455
3469 /* 3456 /*
3470 * This is called when the amount of space required for incore file 3457 * This is called when the amount of space required for incore file
3471 * extents needs to be increased. The ext_diff parameter stores the 3458 * extents needs to be increased. The ext_diff parameter stores the
3472 * number of new extents being added and the idx parameter contains 3459 * number of new extents being added and the idx parameter contains
3473 * the extent index where the new extents will be added. If the new 3460 * the extent index where the new extents will be added. If the new
3474 * extents are being appended, then we just need to (re)allocate and 3461 * extents are being appended, then we just need to (re)allocate and
3475 * initialize the space. Otherwise, if the new extents are being 3462 * initialize the space. Otherwise, if the new extents are being
3476 * inserted into the middle of the existing entries, a bit more work 3463 * inserted into the middle of the existing entries, a bit more work
3477 * is required to make room for the new extents to be inserted. The 3464 * is required to make room for the new extents to be inserted. The
3478 * caller is responsible for filling in the new extent entries upon 3465 * caller is responsible for filling in the new extent entries upon
3479 * return. 3466 * return.
3480 */ 3467 */
3481 void 3468 void
3482 xfs_iext_add( 3469 xfs_iext_add(
3483 xfs_ifork_t *ifp, /* inode fork pointer */ 3470 xfs_ifork_t *ifp, /* inode fork pointer */
3484 xfs_extnum_t idx, /* index to begin adding exts */ 3471 xfs_extnum_t idx, /* index to begin adding exts */
3485 int ext_diff) /* number of extents to add */ 3472 int ext_diff) /* number of extents to add */
3486 { 3473 {
3487 int byte_diff; /* new bytes being added */ 3474 int byte_diff; /* new bytes being added */
3488 int new_size; /* size of extents after adding */ 3475 int new_size; /* size of extents after adding */
3489 xfs_extnum_t nextents; /* number of extents in file */ 3476 xfs_extnum_t nextents; /* number of extents in file */
3490 3477
3491 nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); 3478 nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
3492 ASSERT((idx >= 0) && (idx <= nextents)); 3479 ASSERT((idx >= 0) && (idx <= nextents));
3493 byte_diff = ext_diff * sizeof(xfs_bmbt_rec_t); 3480 byte_diff = ext_diff * sizeof(xfs_bmbt_rec_t);
3494 new_size = ifp->if_bytes + byte_diff; 3481 new_size = ifp->if_bytes + byte_diff;
3495 /* 3482 /*
3496 * If the new number of extents (nextents + ext_diff) 3483 * If the new number of extents (nextents + ext_diff)
3497 * fits inside the inode, then continue to use the inline 3484 * fits inside the inode, then continue to use the inline
3498 * extent buffer. 3485 * extent buffer.
3499 */ 3486 */
3500 if (nextents + ext_diff <= XFS_INLINE_EXTS) { 3487 if (nextents + ext_diff <= XFS_INLINE_EXTS) {
3501 if (idx < nextents) { 3488 if (idx < nextents) {
3502 memmove(&ifp->if_u2.if_inline_ext[idx + ext_diff], 3489 memmove(&ifp->if_u2.if_inline_ext[idx + ext_diff],
3503 &ifp->if_u2.if_inline_ext[idx], 3490 &ifp->if_u2.if_inline_ext[idx],
3504 (nextents - idx) * sizeof(xfs_bmbt_rec_t)); 3491 (nextents - idx) * sizeof(xfs_bmbt_rec_t));
3505 memset(&ifp->if_u2.if_inline_ext[idx], 0, byte_diff); 3492 memset(&ifp->if_u2.if_inline_ext[idx], 0, byte_diff);
3506 } 3493 }
3507 ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext; 3494 ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext;
3508 ifp->if_real_bytes = 0; 3495 ifp->if_real_bytes = 0;
3509 ifp->if_lastex = nextents + ext_diff; 3496 ifp->if_lastex = nextents + ext_diff;
3510 } 3497 }
3511 /* 3498 /*
3512 * Otherwise use a linear (direct) extent list. 3499 * Otherwise use a linear (direct) extent list.
3513 * If the extents are currently inside the inode, 3500 * If the extents are currently inside the inode,
3514 * xfs_iext_realloc_direct will switch us from 3501 * xfs_iext_realloc_direct will switch us from
3515 * inline to direct extent allocation mode. 3502 * inline to direct extent allocation mode.
3516 */ 3503 */
3517 else if (nextents + ext_diff <= XFS_LINEAR_EXTS) { 3504 else if (nextents + ext_diff <= XFS_LINEAR_EXTS) {
3518 xfs_iext_realloc_direct(ifp, new_size); 3505 xfs_iext_realloc_direct(ifp, new_size);
3519 if (idx < nextents) { 3506 if (idx < nextents) {
3520 memmove(&ifp->if_u1.if_extents[idx + ext_diff], 3507 memmove(&ifp->if_u1.if_extents[idx + ext_diff],
3521 &ifp->if_u1.if_extents[idx], 3508 &ifp->if_u1.if_extents[idx],
3522 (nextents - idx) * sizeof(xfs_bmbt_rec_t)); 3509 (nextents - idx) * sizeof(xfs_bmbt_rec_t));
3523 memset(&ifp->if_u1.if_extents[idx], 0, byte_diff); 3510 memset(&ifp->if_u1.if_extents[idx], 0, byte_diff);
3524 } 3511 }
3525 } 3512 }
3526 /* Indirection array */ 3513 /* Indirection array */
3527 else { 3514 else {
3528 xfs_ext_irec_t *erp; 3515 xfs_ext_irec_t *erp;
3529 int erp_idx = 0; 3516 int erp_idx = 0;
3530 int page_idx = idx; 3517 int page_idx = idx;
3531 3518
3532 ASSERT(nextents + ext_diff > XFS_LINEAR_EXTS); 3519 ASSERT(nextents + ext_diff > XFS_LINEAR_EXTS);
3533 if (ifp->if_flags & XFS_IFEXTIREC) { 3520 if (ifp->if_flags & XFS_IFEXTIREC) {
3534 erp = xfs_iext_idx_to_irec(ifp, &page_idx, &erp_idx, 1); 3521 erp = xfs_iext_idx_to_irec(ifp, &page_idx, &erp_idx, 1);
3535 } else { 3522 } else {
3536 xfs_iext_irec_init(ifp); 3523 xfs_iext_irec_init(ifp);
3537 ASSERT(ifp->if_flags & XFS_IFEXTIREC); 3524 ASSERT(ifp->if_flags & XFS_IFEXTIREC);
3538 erp = ifp->if_u1.if_ext_irec; 3525 erp = ifp->if_u1.if_ext_irec;
3539 } 3526 }
3540 /* Extents fit in target extent page */ 3527 /* Extents fit in target extent page */
3541 if (erp && erp->er_extcount + ext_diff <= XFS_LINEAR_EXTS) { 3528 if (erp && erp->er_extcount + ext_diff <= XFS_LINEAR_EXTS) {
3542 if (page_idx < erp->er_extcount) { 3529 if (page_idx < erp->er_extcount) {
3543 memmove(&erp->er_extbuf[page_idx + ext_diff], 3530 memmove(&erp->er_extbuf[page_idx + ext_diff],
3544 &erp->er_extbuf[page_idx], 3531 &erp->er_extbuf[page_idx],
3545 (erp->er_extcount - page_idx) * 3532 (erp->er_extcount - page_idx) *
3546 sizeof(xfs_bmbt_rec_t)); 3533 sizeof(xfs_bmbt_rec_t));
3547 memset(&erp->er_extbuf[page_idx], 0, byte_diff); 3534 memset(&erp->er_extbuf[page_idx], 0, byte_diff);
3548 } 3535 }
3549 erp->er_extcount += ext_diff; 3536 erp->er_extcount += ext_diff;
3550 xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, ext_diff); 3537 xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, ext_diff);
3551 } 3538 }
3552 /* Insert a new extent page */ 3539 /* Insert a new extent page */
3553 else if (erp) { 3540 else if (erp) {
3554 xfs_iext_add_indirect_multi(ifp, 3541 xfs_iext_add_indirect_multi(ifp,
3555 erp_idx, page_idx, ext_diff); 3542 erp_idx, page_idx, ext_diff);
3556 } 3543 }
3557 /* 3544 /*
3558 * If extent(s) are being appended to the last page in 3545 * If extent(s) are being appended to the last page in
3559 * the indirection array and the new extent(s) don't fit 3546 * the indirection array and the new extent(s) don't fit
3560 * in the page, then erp is NULL and erp_idx is set to 3547 * in the page, then erp is NULL and erp_idx is set to
3561 * the next index needed in the indirection array. 3548 * the next index needed in the indirection array.
3562 */ 3549 */
3563 else { 3550 else {
3564 int count = ext_diff; 3551 int count = ext_diff;
3565 3552
3566 while (count) { 3553 while (count) {
3567 erp = xfs_iext_irec_new(ifp, erp_idx); 3554 erp = xfs_iext_irec_new(ifp, erp_idx);
3568 erp->er_extcount = count; 3555 erp->er_extcount = count;
3569 count -= MIN(count, (int)XFS_LINEAR_EXTS); 3556 count -= MIN(count, (int)XFS_LINEAR_EXTS);
3570 if (count) { 3557 if (count) {
3571 erp_idx++; 3558 erp_idx++;
3572 } 3559 }
3573 } 3560 }
3574 } 3561 }
3575 } 3562 }
3576 ifp->if_bytes = new_size; 3563 ifp->if_bytes = new_size;
3577 } 3564 }
3578 3565
3579 /* 3566 /*
3580 * This is called when incore extents are being added to the indirection 3567 * This is called when incore extents are being added to the indirection
3581 * array and the new extents do not fit in the target extent list. The 3568 * array and the new extents do not fit in the target extent list. The
3582 * erp_idx parameter contains the irec index for the target extent list 3569 * erp_idx parameter contains the irec index for the target extent list
3583 * in the indirection array, and the idx parameter contains the extent 3570 * in the indirection array, and the idx parameter contains the extent
3584 * index within the list. The number of extents being added is stored 3571 * index within the list. The number of extents being added is stored
3585 * in the count parameter. 3572 * in the count parameter.
3586 * 3573 *
3587 * |-------| |-------| 3574 * |-------| |-------|
3588 * | | | | idx - number of extents before idx 3575 * | | | | idx - number of extents before idx
3589 * | idx | | count | 3576 * | idx | | count |
3590 * | | | | count - number of extents being inserted at idx 3577 * | | | | count - number of extents being inserted at idx
3591 * |-------| |-------| 3578 * |-------| |-------|
3592 * | count | | nex2 | nex2 - number of extents after idx + count 3579 * | count | | nex2 | nex2 - number of extents after idx + count
3593 * |-------| |-------| 3580 * |-------| |-------|
3594 */ 3581 */
3595 void 3582 void
3596 xfs_iext_add_indirect_multi( 3583 xfs_iext_add_indirect_multi(
3597 xfs_ifork_t *ifp, /* inode fork pointer */ 3584 xfs_ifork_t *ifp, /* inode fork pointer */
3598 int erp_idx, /* target extent irec index */ 3585 int erp_idx, /* target extent irec index */
3599 xfs_extnum_t idx, /* index within target list */ 3586 xfs_extnum_t idx, /* index within target list */
3600 int count) /* new extents being added */ 3587 int count) /* new extents being added */
3601 { 3588 {
3602 int byte_diff; /* new bytes being added */ 3589 int byte_diff; /* new bytes being added */
3603 xfs_ext_irec_t *erp; /* pointer to irec entry */ 3590 xfs_ext_irec_t *erp; /* pointer to irec entry */
3604 xfs_extnum_t ext_diff; /* number of extents to add */ 3591 xfs_extnum_t ext_diff; /* number of extents to add */
3605 xfs_extnum_t ext_cnt; /* new extents still needed */ 3592 xfs_extnum_t ext_cnt; /* new extents still needed */
3606 xfs_extnum_t nex2; /* extents after idx + count */ 3593 xfs_extnum_t nex2; /* extents after idx + count */
3607 xfs_bmbt_rec_t *nex2_ep = NULL; /* temp list for nex2 extents */ 3594 xfs_bmbt_rec_t *nex2_ep = NULL; /* temp list for nex2 extents */
3608 int nlists; /* number of irec's (lists) */ 3595 int nlists; /* number of irec's (lists) */
3609 3596
3610 ASSERT(ifp->if_flags & XFS_IFEXTIREC); 3597 ASSERT(ifp->if_flags & XFS_IFEXTIREC);
3611 erp = &ifp->if_u1.if_ext_irec[erp_idx]; 3598 erp = &ifp->if_u1.if_ext_irec[erp_idx];
3612 nex2 = erp->er_extcount - idx; 3599 nex2 = erp->er_extcount - idx;
3613 nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; 3600 nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
3614 3601
3615 /* 3602 /*
3616 * Save second part of target extent list 3603 * Save second part of target extent list
3617 * (all extents past */ 3604 * (all extents past */
3618 if (nex2) { 3605 if (nex2) {
3619 byte_diff = nex2 * sizeof(xfs_bmbt_rec_t); 3606 byte_diff = nex2 * sizeof(xfs_bmbt_rec_t);
3620 nex2_ep = (xfs_bmbt_rec_t *) kmem_alloc(byte_diff, KM_NOFS); 3607 nex2_ep = (xfs_bmbt_rec_t *) kmem_alloc(byte_diff, KM_NOFS);
3621 memmove(nex2_ep, &erp->er_extbuf[idx], byte_diff); 3608 memmove(nex2_ep, &erp->er_extbuf[idx], byte_diff);
3622 erp->er_extcount -= nex2; 3609 erp->er_extcount -= nex2;
3623 xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, -nex2); 3610 xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, -nex2);
3624 memset(&erp->er_extbuf[idx], 0, byte_diff); 3611 memset(&erp->er_extbuf[idx], 0, byte_diff);
3625 } 3612 }
3626 3613
3627 /* 3614 /*
3628 * Add the new extents to the end of the target 3615 * Add the new extents to the end of the target
3629 * list, then allocate new irec record(s) and 3616 * list, then allocate new irec record(s) and
3630 * extent buffer(s) as needed to store the rest 3617 * extent buffer(s) as needed to store the rest
3631 * of the new extents. 3618 * of the new extents.
3632 */ 3619 */
3633 ext_cnt = count; 3620 ext_cnt = count;
3634 ext_diff = MIN(ext_cnt, (int)XFS_LINEAR_EXTS - erp->er_extcount); 3621 ext_diff = MIN(ext_cnt, (int)XFS_LINEAR_EXTS - erp->er_extcount);
3635 if (ext_diff) { 3622 if (ext_diff) {
3636 erp->er_extcount += ext_diff; 3623 erp->er_extcount += ext_diff;
3637 xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, ext_diff); 3624 xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, ext_diff);
3638 ext_cnt -= ext_diff; 3625 ext_cnt -= ext_diff;
3639 } 3626 }
3640 while (ext_cnt) { 3627 while (ext_cnt) {
3641 erp_idx++; 3628 erp_idx++;
3642 erp = xfs_iext_irec_new(ifp, erp_idx); 3629 erp = xfs_iext_irec_new(ifp, erp_idx);
3643 ext_diff = MIN(ext_cnt, (int)XFS_LINEAR_EXTS); 3630 ext_diff = MIN(ext_cnt, (int)XFS_LINEAR_EXTS);
3644 erp->er_extcount = ext_diff; 3631 erp->er_extcount = ext_diff;
3645 xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, ext_diff); 3632 xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, ext_diff);
3646 ext_cnt -= ext_diff; 3633 ext_cnt -= ext_diff;
3647 } 3634 }
3648 3635
3649 /* Add nex2 extents back to indirection array */ 3636 /* Add nex2 extents back to indirection array */
3650 if (nex2) { 3637 if (nex2) {
3651 xfs_extnum_t ext_avail; 3638 xfs_extnum_t ext_avail;
3652 int i; 3639 int i;
3653 3640
3654 byte_diff = nex2 * sizeof(xfs_bmbt_rec_t); 3641 byte_diff = nex2 * sizeof(xfs_bmbt_rec_t);
3655 ext_avail = XFS_LINEAR_EXTS - erp->er_extcount; 3642 ext_avail = XFS_LINEAR_EXTS - erp->er_extcount;
3656 i = 0; 3643 i = 0;
3657 /* 3644 /*
3658 * If nex2 extents fit in the current page, append 3645 * If nex2 extents fit in the current page, append
3659 * nex2_ep after the new extents. 3646 * nex2_ep after the new extents.
3660 */ 3647 */
3661 if (nex2 <= ext_avail) { 3648 if (nex2 <= ext_avail) {
3662 i = erp->er_extcount; 3649 i = erp->er_extcount;
3663 } 3650 }
3664 /* 3651 /*
3665 * Otherwise, check if space is available in the 3652 * Otherwise, check if space is available in the
3666 * next page. 3653 * next page.
3667 */ 3654 */
3668 else if ((erp_idx < nlists - 1) && 3655 else if ((erp_idx < nlists - 1) &&
3669 (nex2 <= (ext_avail = XFS_LINEAR_EXTS - 3656 (nex2 <= (ext_avail = XFS_LINEAR_EXTS -
3670 ifp->if_u1.if_ext_irec[erp_idx+1].er_extcount))) { 3657 ifp->if_u1.if_ext_irec[erp_idx+1].er_extcount))) {
3671 erp_idx++; 3658 erp_idx++;
3672 erp++; 3659 erp++;
3673 /* Create a hole for nex2 extents */ 3660 /* Create a hole for nex2 extents */
3674 memmove(&erp->er_extbuf[nex2], erp->er_extbuf, 3661 memmove(&erp->er_extbuf[nex2], erp->er_extbuf,
3675 erp->er_extcount * sizeof(xfs_bmbt_rec_t)); 3662 erp->er_extcount * sizeof(xfs_bmbt_rec_t));
3676 } 3663 }
3677 /* 3664 /*
3678 * Final choice, create a new extent page for 3665 * Final choice, create a new extent page for
3679 * nex2 extents. 3666 * nex2 extents.
3680 */ 3667 */
3681 else { 3668 else {
3682 erp_idx++; 3669 erp_idx++;
3683 erp = xfs_iext_irec_new(ifp, erp_idx); 3670 erp = xfs_iext_irec_new(ifp, erp_idx);
3684 } 3671 }
3685 memmove(&erp->er_extbuf[i], nex2_ep, byte_diff); 3672 memmove(&erp->er_extbuf[i], nex2_ep, byte_diff);
3686 kmem_free(nex2_ep); 3673 kmem_free(nex2_ep);
3687 erp->er_extcount += nex2; 3674 erp->er_extcount += nex2;
3688 xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, nex2); 3675 xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, nex2);
3689 } 3676 }
3690 } 3677 }
3691 3678
3692 /* 3679 /*
3693 * This is called when the amount of space required for incore file 3680 * This is called when the amount of space required for incore file
3694 * extents needs to be decreased. The ext_diff parameter stores the 3681 * extents needs to be decreased. The ext_diff parameter stores the
3695 * number of extents to be removed and the idx parameter contains 3682 * number of extents to be removed and the idx parameter contains
3696 * the extent index where the extents will be removed from. 3683 * the extent index where the extents will be removed from.
3697 * 3684 *
3698 * If the amount of space needed has decreased below the linear 3685 * If the amount of space needed has decreased below the linear
3699 * limit, XFS_IEXT_BUFSZ, then switch to using the contiguous 3686 * limit, XFS_IEXT_BUFSZ, then switch to using the contiguous
3700 * extent array. Otherwise, use kmem_realloc() to adjust the 3687 * extent array. Otherwise, use kmem_realloc() to adjust the
3701 * size to what is needed. 3688 * size to what is needed.
3702 */ 3689 */
3703 void 3690 void
3704 xfs_iext_remove( 3691 xfs_iext_remove(
3705 xfs_ifork_t *ifp, /* inode fork pointer */ 3692 xfs_ifork_t *ifp, /* inode fork pointer */
3706 xfs_extnum_t idx, /* index to begin removing exts */ 3693 xfs_extnum_t idx, /* index to begin removing exts */
3707 int ext_diff) /* number of extents to remove */ 3694 int ext_diff) /* number of extents to remove */
3708 { 3695 {
3709 xfs_extnum_t nextents; /* number of extents in file */ 3696 xfs_extnum_t nextents; /* number of extents in file */
3710 int new_size; /* size of extents after removal */ 3697 int new_size; /* size of extents after removal */
3711 3698
3712 ASSERT(ext_diff > 0); 3699 ASSERT(ext_diff > 0);
3713 nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); 3700 nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
3714 new_size = (nextents - ext_diff) * sizeof(xfs_bmbt_rec_t); 3701 new_size = (nextents - ext_diff) * sizeof(xfs_bmbt_rec_t);
3715 3702
3716 if (new_size == 0) { 3703 if (new_size == 0) {
3717 xfs_iext_destroy(ifp); 3704 xfs_iext_destroy(ifp);
3718 } else if (ifp->if_flags & XFS_IFEXTIREC) { 3705 } else if (ifp->if_flags & XFS_IFEXTIREC) {
3719 xfs_iext_remove_indirect(ifp, idx, ext_diff); 3706 xfs_iext_remove_indirect(ifp, idx, ext_diff);
3720 } else if (ifp->if_real_bytes) { 3707 } else if (ifp->if_real_bytes) {
3721 xfs_iext_remove_direct(ifp, idx, ext_diff); 3708 xfs_iext_remove_direct(ifp, idx, ext_diff);
3722 } else { 3709 } else {
3723 xfs_iext_remove_inline(ifp, idx, ext_diff); 3710 xfs_iext_remove_inline(ifp, idx, ext_diff);
3724 } 3711 }
3725 ifp->if_bytes = new_size; 3712 ifp->if_bytes = new_size;
3726 } 3713 }
3727 3714
3728 /* 3715 /*
3729 * This removes ext_diff extents from the inline buffer, beginning 3716 * This removes ext_diff extents from the inline buffer, beginning
3730 * at extent index idx. 3717 * at extent index idx.
3731 */ 3718 */
3732 void 3719 void
3733 xfs_iext_remove_inline( 3720 xfs_iext_remove_inline(
3734 xfs_ifork_t *ifp, /* inode fork pointer */ 3721 xfs_ifork_t *ifp, /* inode fork pointer */
3735 xfs_extnum_t idx, /* index to begin removing exts */ 3722 xfs_extnum_t idx, /* index to begin removing exts */
3736 int ext_diff) /* number of extents to remove */ 3723 int ext_diff) /* number of extents to remove */
3737 { 3724 {
3738 int nextents; /* number of extents in file */ 3725 int nextents; /* number of extents in file */
3739 3726
3740 ASSERT(!(ifp->if_flags & XFS_IFEXTIREC)); 3727 ASSERT(!(ifp->if_flags & XFS_IFEXTIREC));
3741 ASSERT(idx < XFS_INLINE_EXTS); 3728 ASSERT(idx < XFS_INLINE_EXTS);
3742 nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); 3729 nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
3743 ASSERT(((nextents - ext_diff) > 0) && 3730 ASSERT(((nextents - ext_diff) > 0) &&
3744 (nextents - ext_diff) < XFS_INLINE_EXTS); 3731 (nextents - ext_diff) < XFS_INLINE_EXTS);
3745 3732
3746 if (idx + ext_diff < nextents) { 3733 if (idx + ext_diff < nextents) {
3747 memmove(&ifp->if_u2.if_inline_ext[idx], 3734 memmove(&ifp->if_u2.if_inline_ext[idx],
3748 &ifp->if_u2.if_inline_ext[idx + ext_diff], 3735 &ifp->if_u2.if_inline_ext[idx + ext_diff],
3749 (nextents - (idx + ext_diff)) * 3736 (nextents - (idx + ext_diff)) *
3750 sizeof(xfs_bmbt_rec_t)); 3737 sizeof(xfs_bmbt_rec_t));
3751 memset(&ifp->if_u2.if_inline_ext[nextents - ext_diff], 3738 memset(&ifp->if_u2.if_inline_ext[nextents - ext_diff],
3752 0, ext_diff * sizeof(xfs_bmbt_rec_t)); 3739 0, ext_diff * sizeof(xfs_bmbt_rec_t));
3753 } else { 3740 } else {
3754 memset(&ifp->if_u2.if_inline_ext[idx], 0, 3741 memset(&ifp->if_u2.if_inline_ext[idx], 0,
3755 ext_diff * sizeof(xfs_bmbt_rec_t)); 3742 ext_diff * sizeof(xfs_bmbt_rec_t));
3756 } 3743 }
3757 } 3744 }
3758 3745
3759 /* 3746 /*
3760 * This removes ext_diff extents from a linear (direct) extent list, 3747 * This removes ext_diff extents from a linear (direct) extent list,
3761 * beginning at extent index idx. If the extents are being removed 3748 * beginning at extent index idx. If the extents are being removed
3762 * from the end of the list (ie. truncate) then we just need to re- 3749 * from the end of the list (ie. truncate) then we just need to re-
3763 * allocate the list to remove the extra space. Otherwise, if the 3750 * allocate the list to remove the extra space. Otherwise, if the
3764 * extents are being removed from the middle of the existing extent 3751 * extents are being removed from the middle of the existing extent
3765 * entries, then we first need to move the extent records beginning 3752 * entries, then we first need to move the extent records beginning
3766 * at idx + ext_diff up in the list to overwrite the records being 3753 * at idx + ext_diff up in the list to overwrite the records being
3767 * removed, then remove the extra space via kmem_realloc. 3754 * removed, then remove the extra space via kmem_realloc.
3768 */ 3755 */
3769 void 3756 void
3770 xfs_iext_remove_direct( 3757 xfs_iext_remove_direct(
3771 xfs_ifork_t *ifp, /* inode fork pointer */ 3758 xfs_ifork_t *ifp, /* inode fork pointer */
3772 xfs_extnum_t idx, /* index to begin removing exts */ 3759 xfs_extnum_t idx, /* index to begin removing exts */
3773 int ext_diff) /* number of extents to remove */ 3760 int ext_diff) /* number of extents to remove */
3774 { 3761 {
3775 xfs_extnum_t nextents; /* number of extents in file */ 3762 xfs_extnum_t nextents; /* number of extents in file */
3776 int new_size; /* size of extents after removal */ 3763 int new_size; /* size of extents after removal */
3777 3764
3778 ASSERT(!(ifp->if_flags & XFS_IFEXTIREC)); 3765 ASSERT(!(ifp->if_flags & XFS_IFEXTIREC));
3779 new_size = ifp->if_bytes - 3766 new_size = ifp->if_bytes -
3780 (ext_diff * sizeof(xfs_bmbt_rec_t)); 3767 (ext_diff * sizeof(xfs_bmbt_rec_t));
3781 nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); 3768 nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
3782 3769
3783 if (new_size == 0) { 3770 if (new_size == 0) {
3784 xfs_iext_destroy(ifp); 3771 xfs_iext_destroy(ifp);
3785 return; 3772 return;
3786 } 3773 }
3787 /* Move extents up in the list (if needed) */ 3774 /* Move extents up in the list (if needed) */
3788 if (idx + ext_diff < nextents) { 3775 if (idx + ext_diff < nextents) {
3789 memmove(&ifp->if_u1.if_extents[idx], 3776 memmove(&ifp->if_u1.if_extents[idx],
3790 &ifp->if_u1.if_extents[idx + ext_diff], 3777 &ifp->if_u1.if_extents[idx + ext_diff],
3791 (nextents - (idx + ext_diff)) * 3778 (nextents - (idx + ext_diff)) *
3792 sizeof(xfs_bmbt_rec_t)); 3779 sizeof(xfs_bmbt_rec_t));
3793 } 3780 }
3794 memset(&ifp->if_u1.if_extents[nextents - ext_diff], 3781 memset(&ifp->if_u1.if_extents[nextents - ext_diff],
3795 0, ext_diff * sizeof(xfs_bmbt_rec_t)); 3782 0, ext_diff * sizeof(xfs_bmbt_rec_t));
3796 /* 3783 /*
3797 * Reallocate the direct extent list. If the extents 3784 * Reallocate the direct extent list. If the extents
3798 * will fit inside the inode then xfs_iext_realloc_direct 3785 * will fit inside the inode then xfs_iext_realloc_direct
3799 * will switch from direct to inline extent allocation 3786 * will switch from direct to inline extent allocation
3800 * mode for us. 3787 * mode for us.
3801 */ 3788 */
3802 xfs_iext_realloc_direct(ifp, new_size); 3789 xfs_iext_realloc_direct(ifp, new_size);
3803 ifp->if_bytes = new_size; 3790 ifp->if_bytes = new_size;
3804 } 3791 }
3805 3792
3806 /* 3793 /*
3807 * This is called when incore extents are being removed from the 3794 * This is called when incore extents are being removed from the
3808 * indirection array and the extents being removed span multiple extent 3795 * indirection array and the extents being removed span multiple extent
3809 * buffers. The idx parameter contains the file extent index where we 3796 * buffers. The idx parameter contains the file extent index where we
3810 * want to begin removing extents, and the count parameter contains 3797 * want to begin removing extents, and the count parameter contains
3811 * how many extents need to be removed. 3798 * how many extents need to be removed.
3812 * 3799 *
3813 * |-------| |-------| 3800 * |-------| |-------|
3814 * | nex1 | | | nex1 - number of extents before idx 3801 * | nex1 | | | nex1 - number of extents before idx
3815 * |-------| | count | 3802 * |-------| | count |
3816 * | | | | count - number of extents being removed at idx 3803 * | | | | count - number of extents being removed at idx
3817 * | count | |-------| 3804 * | count | |-------|
3818 * | | | nex2 | nex2 - number of extents after idx + count 3805 * | | | nex2 | nex2 - number of extents after idx + count
3819 * |-------| |-------| 3806 * |-------| |-------|
3820 */ 3807 */
3821 void 3808 void
3822 xfs_iext_remove_indirect( 3809 xfs_iext_remove_indirect(
3823 xfs_ifork_t *ifp, /* inode fork pointer */ 3810 xfs_ifork_t *ifp, /* inode fork pointer */
3824 xfs_extnum_t idx, /* index to begin removing extents */ 3811 xfs_extnum_t idx, /* index to begin removing extents */
3825 int count) /* number of extents to remove */ 3812 int count) /* number of extents to remove */
3826 { 3813 {
3827 xfs_ext_irec_t *erp; /* indirection array pointer */ 3814 xfs_ext_irec_t *erp; /* indirection array pointer */
3828 int erp_idx = 0; /* indirection array index */ 3815 int erp_idx = 0; /* indirection array index */
3829 xfs_extnum_t ext_cnt; /* extents left to remove */ 3816 xfs_extnum_t ext_cnt; /* extents left to remove */
3830 xfs_extnum_t ext_diff; /* extents to remove in current list */ 3817 xfs_extnum_t ext_diff; /* extents to remove in current list */
3831 xfs_extnum_t nex1; /* number of extents before idx */ 3818 xfs_extnum_t nex1; /* number of extents before idx */
3832 xfs_extnum_t nex2; /* extents after idx + count */ 3819 xfs_extnum_t nex2; /* extents after idx + count */
3833 int nlists; /* entries in indirection array */ 3820 int nlists; /* entries in indirection array */
3834 int page_idx = idx; /* index in target extent list */ 3821 int page_idx = idx; /* index in target extent list */
3835 3822
3836 ASSERT(ifp->if_flags & XFS_IFEXTIREC); 3823 ASSERT(ifp->if_flags & XFS_IFEXTIREC);
3837 erp = xfs_iext_idx_to_irec(ifp, &page_idx, &erp_idx, 0); 3824 erp = xfs_iext_idx_to_irec(ifp, &page_idx, &erp_idx, 0);
3838 ASSERT(erp != NULL); 3825 ASSERT(erp != NULL);
3839 nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; 3826 nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
3840 nex1 = page_idx; 3827 nex1 = page_idx;
3841 ext_cnt = count; 3828 ext_cnt = count;
3842 while (ext_cnt) { 3829 while (ext_cnt) {
3843 nex2 = MAX((erp->er_extcount - (nex1 + ext_cnt)), 0); 3830 nex2 = MAX((erp->er_extcount - (nex1 + ext_cnt)), 0);
3844 ext_diff = MIN(ext_cnt, (erp->er_extcount - nex1)); 3831 ext_diff = MIN(ext_cnt, (erp->er_extcount - nex1));
3845 /* 3832 /*
3846 * Check for deletion of entire list; 3833 * Check for deletion of entire list;
3847 * xfs_iext_irec_remove() updates extent offsets. 3834 * xfs_iext_irec_remove() updates extent offsets.
3848 */ 3835 */
3849 if (ext_diff == erp->er_extcount) { 3836 if (ext_diff == erp->er_extcount) {
3850 xfs_iext_irec_remove(ifp, erp_idx); 3837 xfs_iext_irec_remove(ifp, erp_idx);
3851 ext_cnt -= ext_diff; 3838 ext_cnt -= ext_diff;
3852 nex1 = 0; 3839 nex1 = 0;
3853 if (ext_cnt) { 3840 if (ext_cnt) {
3854 ASSERT(erp_idx < ifp->if_real_bytes / 3841 ASSERT(erp_idx < ifp->if_real_bytes /
3855 XFS_IEXT_BUFSZ); 3842 XFS_IEXT_BUFSZ);
3856 erp = &ifp->if_u1.if_ext_irec[erp_idx]; 3843 erp = &ifp->if_u1.if_ext_irec[erp_idx];
3857 nex1 = 0; 3844 nex1 = 0;
3858 continue; 3845 continue;
3859 } else { 3846 } else {
3860 break; 3847 break;
3861 } 3848 }
3862 } 3849 }
3863 /* Move extents up (if needed) */ 3850 /* Move extents up (if needed) */
3864 if (nex2) { 3851 if (nex2) {
3865 memmove(&erp->er_extbuf[nex1], 3852 memmove(&erp->er_extbuf[nex1],
3866 &erp->er_extbuf[nex1 + ext_diff], 3853 &erp->er_extbuf[nex1 + ext_diff],
3867 nex2 * sizeof(xfs_bmbt_rec_t)); 3854 nex2 * sizeof(xfs_bmbt_rec_t));
3868 } 3855 }
3869 /* Zero out rest of page */ 3856 /* Zero out rest of page */
3870 memset(&erp->er_extbuf[nex1 + nex2], 0, (XFS_IEXT_BUFSZ - 3857 memset(&erp->er_extbuf[nex1 + nex2], 0, (XFS_IEXT_BUFSZ -
3871 ((nex1 + nex2) * sizeof(xfs_bmbt_rec_t)))); 3858 ((nex1 + nex2) * sizeof(xfs_bmbt_rec_t))));
3872 /* Update remaining counters */ 3859 /* Update remaining counters */
3873 erp->er_extcount -= ext_diff; 3860 erp->er_extcount -= ext_diff;
3874 xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, -ext_diff); 3861 xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, -ext_diff);
3875 ext_cnt -= ext_diff; 3862 ext_cnt -= ext_diff;
3876 nex1 = 0; 3863 nex1 = 0;
3877 erp_idx++; 3864 erp_idx++;
3878 erp++; 3865 erp++;
3879 } 3866 }
3880 ifp->if_bytes -= count * sizeof(xfs_bmbt_rec_t); 3867 ifp->if_bytes -= count * sizeof(xfs_bmbt_rec_t);
3881 xfs_iext_irec_compact(ifp); 3868 xfs_iext_irec_compact(ifp);
3882 } 3869 }
3883 3870
3884 /* 3871 /*
3885 * Create, destroy, or resize a linear (direct) block of extents. 3872 * Create, destroy, or resize a linear (direct) block of extents.
3886 */ 3873 */
3887 void 3874 void
3888 xfs_iext_realloc_direct( 3875 xfs_iext_realloc_direct(
3889 xfs_ifork_t *ifp, /* inode fork pointer */ 3876 xfs_ifork_t *ifp, /* inode fork pointer */
3890 int new_size) /* new size of extents */ 3877 int new_size) /* new size of extents */
3891 { 3878 {
3892 int rnew_size; /* real new size of extents */ 3879 int rnew_size; /* real new size of extents */
3893 3880
3894 rnew_size = new_size; 3881 rnew_size = new_size;
3895 3882
3896 ASSERT(!(ifp->if_flags & XFS_IFEXTIREC) || 3883 ASSERT(!(ifp->if_flags & XFS_IFEXTIREC) ||
3897 ((new_size >= 0) && (new_size <= XFS_IEXT_BUFSZ) && 3884 ((new_size >= 0) && (new_size <= XFS_IEXT_BUFSZ) &&
3898 (new_size != ifp->if_real_bytes))); 3885 (new_size != ifp->if_real_bytes)));
3899 3886
3900 /* Free extent records */ 3887 /* Free extent records */
3901 if (new_size == 0) { 3888 if (new_size == 0) {
3902 xfs_iext_destroy(ifp); 3889 xfs_iext_destroy(ifp);
3903 } 3890 }
3904 /* Resize direct extent list and zero any new bytes */ 3891 /* Resize direct extent list and zero any new bytes */
3905 else if (ifp->if_real_bytes) { 3892 else if (ifp->if_real_bytes) {
3906 /* Check if extents will fit inside the inode */ 3893 /* Check if extents will fit inside the inode */
3907 if (new_size <= XFS_INLINE_EXTS * sizeof(xfs_bmbt_rec_t)) { 3894 if (new_size <= XFS_INLINE_EXTS * sizeof(xfs_bmbt_rec_t)) {
3908 xfs_iext_direct_to_inline(ifp, new_size / 3895 xfs_iext_direct_to_inline(ifp, new_size /
3909 (uint)sizeof(xfs_bmbt_rec_t)); 3896 (uint)sizeof(xfs_bmbt_rec_t));
3910 ifp->if_bytes = new_size; 3897 ifp->if_bytes = new_size;
3911 return; 3898 return;
3912 } 3899 }
3913 if (!is_power_of_2(new_size)){ 3900 if (!is_power_of_2(new_size)){
3914 rnew_size = roundup_pow_of_two(new_size); 3901 rnew_size = roundup_pow_of_two(new_size);
3915 } 3902 }
3916 if (rnew_size != ifp->if_real_bytes) { 3903 if (rnew_size != ifp->if_real_bytes) {
3917 ifp->if_u1.if_extents = 3904 ifp->if_u1.if_extents =
3918 kmem_realloc(ifp->if_u1.if_extents, 3905 kmem_realloc(ifp->if_u1.if_extents,
3919 rnew_size, 3906 rnew_size,
3920 ifp->if_real_bytes, KM_NOFS); 3907 ifp->if_real_bytes, KM_NOFS);
3921 } 3908 }
3922 if (rnew_size > ifp->if_real_bytes) { 3909 if (rnew_size > ifp->if_real_bytes) {
3923 memset(&ifp->if_u1.if_extents[ifp->if_bytes / 3910 memset(&ifp->if_u1.if_extents[ifp->if_bytes /
3924 (uint)sizeof(xfs_bmbt_rec_t)], 0, 3911 (uint)sizeof(xfs_bmbt_rec_t)], 0,
3925 rnew_size - ifp->if_real_bytes); 3912 rnew_size - ifp->if_real_bytes);
3926 } 3913 }
3927 } 3914 }
3928 /* 3915 /*
3929 * Switch from the inline extent buffer to a direct 3916 * Switch from the inline extent buffer to a direct
3930 * extent list. Be sure to include the inline extent 3917 * extent list. Be sure to include the inline extent
3931 * bytes in new_size. 3918 * bytes in new_size.
3932 */ 3919 */
3933 else { 3920 else {
3934 new_size += ifp->if_bytes; 3921 new_size += ifp->if_bytes;
3935 if (!is_power_of_2(new_size)) { 3922 if (!is_power_of_2(new_size)) {
3936 rnew_size = roundup_pow_of_two(new_size); 3923 rnew_size = roundup_pow_of_two(new_size);
3937 } 3924 }
3938 xfs_iext_inline_to_direct(ifp, rnew_size); 3925 xfs_iext_inline_to_direct(ifp, rnew_size);
3939 } 3926 }
3940 ifp->if_real_bytes = rnew_size; 3927 ifp->if_real_bytes = rnew_size;
3941 ifp->if_bytes = new_size; 3928 ifp->if_bytes = new_size;
3942 } 3929 }
3943 3930
3944 /* 3931 /*
3945 * Switch from linear (direct) extent records to inline buffer. 3932 * Switch from linear (direct) extent records to inline buffer.
3946 */ 3933 */
3947 void 3934 void
3948 xfs_iext_direct_to_inline( 3935 xfs_iext_direct_to_inline(
3949 xfs_ifork_t *ifp, /* inode fork pointer */ 3936 xfs_ifork_t *ifp, /* inode fork pointer */
3950 xfs_extnum_t nextents) /* number of extents in file */ 3937 xfs_extnum_t nextents) /* number of extents in file */
3951 { 3938 {
3952 ASSERT(ifp->if_flags & XFS_IFEXTENTS); 3939 ASSERT(ifp->if_flags & XFS_IFEXTENTS);
3953 ASSERT(nextents <= XFS_INLINE_EXTS); 3940 ASSERT(nextents <= XFS_INLINE_EXTS);
3954 /* 3941 /*
3955 * The inline buffer was zeroed when we switched 3942 * The inline buffer was zeroed when we switched
3956 * from inline to direct extent allocation mode, 3943 * from inline to direct extent allocation mode,
3957 * so we don't need to clear it here. 3944 * so we don't need to clear it here.
3958 */ 3945 */
3959 memcpy(ifp->if_u2.if_inline_ext, ifp->if_u1.if_extents, 3946 memcpy(ifp->if_u2.if_inline_ext, ifp->if_u1.if_extents,
3960 nextents * sizeof(xfs_bmbt_rec_t)); 3947 nextents * sizeof(xfs_bmbt_rec_t));
3961 kmem_free(ifp->if_u1.if_extents); 3948 kmem_free(ifp->if_u1.if_extents);
3962 ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext; 3949 ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext;
3963 ifp->if_real_bytes = 0; 3950 ifp->if_real_bytes = 0;
3964 } 3951 }
3965 3952
3966 /* 3953 /*
3967 * Switch from inline buffer to linear (direct) extent records. 3954 * Switch from inline buffer to linear (direct) extent records.
3968 * new_size should already be rounded up to the next power of 2 3955 * new_size should already be rounded up to the next power of 2
3969 * by the caller (when appropriate), so use new_size as it is. 3956 * by the caller (when appropriate), so use new_size as it is.
3970 * However, since new_size may be rounded up, we can't update 3957 * However, since new_size may be rounded up, we can't update
3971 * if_bytes here. It is the caller's responsibility to update 3958 * if_bytes here. It is the caller's responsibility to update
3972 * if_bytes upon return. 3959 * if_bytes upon return.
3973 */ 3960 */
3974 void 3961 void
3975 xfs_iext_inline_to_direct( 3962 xfs_iext_inline_to_direct(
3976 xfs_ifork_t *ifp, /* inode fork pointer */ 3963 xfs_ifork_t *ifp, /* inode fork pointer */
3977 int new_size) /* number of extents in file */ 3964 int new_size) /* number of extents in file */
3978 { 3965 {
3979 ifp->if_u1.if_extents = kmem_alloc(new_size, KM_NOFS); 3966 ifp->if_u1.if_extents = kmem_alloc(new_size, KM_NOFS);
3980 memset(ifp->if_u1.if_extents, 0, new_size); 3967 memset(ifp->if_u1.if_extents, 0, new_size);
3981 if (ifp->if_bytes) { 3968 if (ifp->if_bytes) {
3982 memcpy(ifp->if_u1.if_extents, ifp->if_u2.if_inline_ext, 3969 memcpy(ifp->if_u1.if_extents, ifp->if_u2.if_inline_ext,
3983 ifp->if_bytes); 3970 ifp->if_bytes);
3984 memset(ifp->if_u2.if_inline_ext, 0, XFS_INLINE_EXTS * 3971 memset(ifp->if_u2.if_inline_ext, 0, XFS_INLINE_EXTS *
3985 sizeof(xfs_bmbt_rec_t)); 3972 sizeof(xfs_bmbt_rec_t));
3986 } 3973 }
3987 ifp->if_real_bytes = new_size; 3974 ifp->if_real_bytes = new_size;
3988 } 3975 }
3989 3976
3990 /* 3977 /*
3991 * Resize an extent indirection array to new_size bytes. 3978 * Resize an extent indirection array to new_size bytes.
3992 */ 3979 */
3993 void 3980 void
3994 xfs_iext_realloc_indirect( 3981 xfs_iext_realloc_indirect(
3995 xfs_ifork_t *ifp, /* inode fork pointer */ 3982 xfs_ifork_t *ifp, /* inode fork pointer */
3996 int new_size) /* new indirection array size */ 3983 int new_size) /* new indirection array size */
3997 { 3984 {
3998 int nlists; /* number of irec's (ex lists) */ 3985 int nlists; /* number of irec's (ex lists) */
3999 int size; /* current indirection array size */ 3986 int size; /* current indirection array size */
4000 3987
4001 ASSERT(ifp->if_flags & XFS_IFEXTIREC); 3988 ASSERT(ifp->if_flags & XFS_IFEXTIREC);
4002 nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; 3989 nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
4003 size = nlists * sizeof(xfs_ext_irec_t); 3990 size = nlists * sizeof(xfs_ext_irec_t);
4004 ASSERT(ifp->if_real_bytes); 3991 ASSERT(ifp->if_real_bytes);
4005 ASSERT((new_size >= 0) && (new_size != size)); 3992 ASSERT((new_size >= 0) && (new_size != size));
4006 if (new_size == 0) { 3993 if (new_size == 0) {
4007 xfs_iext_destroy(ifp); 3994 xfs_iext_destroy(ifp);
4008 } else { 3995 } else {
4009 ifp->if_u1.if_ext_irec = (xfs_ext_irec_t *) 3996 ifp->if_u1.if_ext_irec = (xfs_ext_irec_t *)
4010 kmem_realloc(ifp->if_u1.if_ext_irec, 3997 kmem_realloc(ifp->if_u1.if_ext_irec,
4011 new_size, size, KM_NOFS); 3998 new_size, size, KM_NOFS);
4012 } 3999 }
4013 } 4000 }
4014 4001
4015 /* 4002 /*
4016 * Switch from indirection array to linear (direct) extent allocations. 4003 * Switch from indirection array to linear (direct) extent allocations.
4017 */ 4004 */
4018 void 4005 void
4019 xfs_iext_indirect_to_direct( 4006 xfs_iext_indirect_to_direct(
4020 xfs_ifork_t *ifp) /* inode fork pointer */ 4007 xfs_ifork_t *ifp) /* inode fork pointer */
4021 { 4008 {
4022 xfs_bmbt_rec_host_t *ep; /* extent record pointer */ 4009 xfs_bmbt_rec_host_t *ep; /* extent record pointer */
4023 xfs_extnum_t nextents; /* number of extents in file */ 4010 xfs_extnum_t nextents; /* number of extents in file */
4024 int size; /* size of file extents */ 4011 int size; /* size of file extents */
4025 4012
4026 ASSERT(ifp->if_flags & XFS_IFEXTIREC); 4013 ASSERT(ifp->if_flags & XFS_IFEXTIREC);
4027 nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); 4014 nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
4028 ASSERT(nextents <= XFS_LINEAR_EXTS); 4015 ASSERT(nextents <= XFS_LINEAR_EXTS);
4029 size = nextents * sizeof(xfs_bmbt_rec_t); 4016 size = nextents * sizeof(xfs_bmbt_rec_t);
4030 4017
4031 xfs_iext_irec_compact_pages(ifp); 4018 xfs_iext_irec_compact_pages(ifp);
4032 ASSERT(ifp->if_real_bytes == XFS_IEXT_BUFSZ); 4019 ASSERT(ifp->if_real_bytes == XFS_IEXT_BUFSZ);
4033 4020
4034 ep = ifp->if_u1.if_ext_irec->er_extbuf; 4021 ep = ifp->if_u1.if_ext_irec->er_extbuf;
4035 kmem_free(ifp->if_u1.if_ext_irec); 4022 kmem_free(ifp->if_u1.if_ext_irec);
4036 ifp->if_flags &= ~XFS_IFEXTIREC; 4023 ifp->if_flags &= ~XFS_IFEXTIREC;
4037 ifp->if_u1.if_extents = ep; 4024 ifp->if_u1.if_extents = ep;
4038 ifp->if_bytes = size; 4025 ifp->if_bytes = size;
4039 if (nextents < XFS_LINEAR_EXTS) { 4026 if (nextents < XFS_LINEAR_EXTS) {
4040 xfs_iext_realloc_direct(ifp, size); 4027 xfs_iext_realloc_direct(ifp, size);
4041 } 4028 }
4042 } 4029 }
4043 4030
4044 /* 4031 /*
4045 * Free incore file extents. 4032 * Free incore file extents.
4046 */ 4033 */
4047 void 4034 void
4048 xfs_iext_destroy( 4035 xfs_iext_destroy(
4049 xfs_ifork_t *ifp) /* inode fork pointer */ 4036 xfs_ifork_t *ifp) /* inode fork pointer */
4050 { 4037 {
4051 if (ifp->if_flags & XFS_IFEXTIREC) { 4038 if (ifp->if_flags & XFS_IFEXTIREC) {
4052 int erp_idx; 4039 int erp_idx;
4053 int nlists; 4040 int nlists;
4054 4041
4055 nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; 4042 nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
4056 for (erp_idx = nlists - 1; erp_idx >= 0 ; erp_idx--) { 4043 for (erp_idx = nlists - 1; erp_idx >= 0 ; erp_idx--) {
4057 xfs_iext_irec_remove(ifp, erp_idx); 4044 xfs_iext_irec_remove(ifp, erp_idx);
4058 } 4045 }
4059 ifp->if_flags &= ~XFS_IFEXTIREC; 4046 ifp->if_flags &= ~XFS_IFEXTIREC;
4060 } else if (ifp->if_real_bytes) { 4047 } else if (ifp->if_real_bytes) {
4061 kmem_free(ifp->if_u1.if_extents); 4048 kmem_free(ifp->if_u1.if_extents);
4062 } else if (ifp->if_bytes) { 4049 } else if (ifp->if_bytes) {
4063 memset(ifp->if_u2.if_inline_ext, 0, XFS_INLINE_EXTS * 4050 memset(ifp->if_u2.if_inline_ext, 0, XFS_INLINE_EXTS *
4064 sizeof(xfs_bmbt_rec_t)); 4051 sizeof(xfs_bmbt_rec_t));
4065 } 4052 }
4066 ifp->if_u1.if_extents = NULL; 4053 ifp->if_u1.if_extents = NULL;
4067 ifp->if_real_bytes = 0; 4054 ifp->if_real_bytes = 0;
4068 ifp->if_bytes = 0; 4055 ifp->if_bytes = 0;
4069 } 4056 }
4070 4057
4071 /* 4058 /*
4072 * Return a pointer to the extent record for file system block bno. 4059 * Return a pointer to the extent record for file system block bno.
4073 */ 4060 */
4074 xfs_bmbt_rec_host_t * /* pointer to found extent record */ 4061 xfs_bmbt_rec_host_t * /* pointer to found extent record */
4075 xfs_iext_bno_to_ext( 4062 xfs_iext_bno_to_ext(
4076 xfs_ifork_t *ifp, /* inode fork pointer */ 4063 xfs_ifork_t *ifp, /* inode fork pointer */
4077 xfs_fileoff_t bno, /* block number to search for */ 4064 xfs_fileoff_t bno, /* block number to search for */
4078 xfs_extnum_t *idxp) /* index of target extent */ 4065 xfs_extnum_t *idxp) /* index of target extent */
4079 { 4066 {
4080 xfs_bmbt_rec_host_t *base; /* pointer to first extent */ 4067 xfs_bmbt_rec_host_t *base; /* pointer to first extent */
4081 xfs_filblks_t blockcount = 0; /* number of blocks in extent */ 4068 xfs_filblks_t blockcount = 0; /* number of blocks in extent */
4082 xfs_bmbt_rec_host_t *ep = NULL; /* pointer to target extent */ 4069 xfs_bmbt_rec_host_t *ep = NULL; /* pointer to target extent */
4083 xfs_ext_irec_t *erp = NULL; /* indirection array pointer */ 4070 xfs_ext_irec_t *erp = NULL; /* indirection array pointer */
4084 int high; /* upper boundary in search */ 4071 int high; /* upper boundary in search */
4085 xfs_extnum_t idx = 0; /* index of target extent */ 4072 xfs_extnum_t idx = 0; /* index of target extent */
4086 int low; /* lower boundary in search */ 4073 int low; /* lower boundary in search */
4087 xfs_extnum_t nextents; /* number of file extents */ 4074 xfs_extnum_t nextents; /* number of file extents */
4088 xfs_fileoff_t startoff = 0; /* start offset of extent */ 4075 xfs_fileoff_t startoff = 0; /* start offset of extent */
4089 4076
4090 nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); 4077 nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
4091 if (nextents == 0) { 4078 if (nextents == 0) {
4092 *idxp = 0; 4079 *idxp = 0;
4093 return NULL; 4080 return NULL;
4094 } 4081 }
4095 low = 0; 4082 low = 0;
4096 if (ifp->if_flags & XFS_IFEXTIREC) { 4083 if (ifp->if_flags & XFS_IFEXTIREC) {
4097 /* Find target extent list */ 4084 /* Find target extent list */
4098 int erp_idx = 0; 4085 int erp_idx = 0;
4099 erp = xfs_iext_bno_to_irec(ifp, bno, &erp_idx); 4086 erp = xfs_iext_bno_to_irec(ifp, bno, &erp_idx);
4100 base = erp->er_extbuf; 4087 base = erp->er_extbuf;
4101 high = erp->er_extcount - 1; 4088 high = erp->er_extcount - 1;
4102 } else { 4089 } else {
4103 base = ifp->if_u1.if_extents; 4090 base = ifp->if_u1.if_extents;
4104 high = nextents - 1; 4091 high = nextents - 1;
4105 } 4092 }
4106 /* Binary search extent records */ 4093 /* Binary search extent records */
4107 while (low <= high) { 4094 while (low <= high) {
4108 idx = (low + high) >> 1; 4095 idx = (low + high) >> 1;
4109 ep = base + idx; 4096 ep = base + idx;
4110 startoff = xfs_bmbt_get_startoff(ep); 4097 startoff = xfs_bmbt_get_startoff(ep);
4111 blockcount = xfs_bmbt_get_blockcount(ep); 4098 blockcount = xfs_bmbt_get_blockcount(ep);
4112 if (bno < startoff) { 4099 if (bno < startoff) {
4113 high = idx - 1; 4100 high = idx - 1;
4114 } else if (bno >= startoff + blockcount) { 4101 } else if (bno >= startoff + blockcount) {
4115 low = idx + 1; 4102 low = idx + 1;
4116 } else { 4103 } else {
4117 /* Convert back to file-based extent index */ 4104 /* Convert back to file-based extent index */
4118 if (ifp->if_flags & XFS_IFEXTIREC) { 4105 if (ifp->if_flags & XFS_IFEXTIREC) {
4119 idx += erp->er_extoff; 4106 idx += erp->er_extoff;
4120 } 4107 }
4121 *idxp = idx; 4108 *idxp = idx;
4122 return ep; 4109 return ep;
4123 } 4110 }
4124 } 4111 }
4125 /* Convert back to file-based extent index */ 4112 /* Convert back to file-based extent index */
4126 if (ifp->if_flags & XFS_IFEXTIREC) { 4113 if (ifp->if_flags & XFS_IFEXTIREC) {
4127 idx += erp->er_extoff; 4114 idx += erp->er_extoff;
4128 } 4115 }
4129 if (bno >= startoff + blockcount) { 4116 if (bno >= startoff + blockcount) {
4130 if (++idx == nextents) { 4117 if (++idx == nextents) {
4131 ep = NULL; 4118 ep = NULL;
4132 } else { 4119 } else {
4133 ep = xfs_iext_get_ext(ifp, idx); 4120 ep = xfs_iext_get_ext(ifp, idx);
4134 } 4121 }
4135 } 4122 }
4136 *idxp = idx; 4123 *idxp = idx;
4137 return ep; 4124 return ep;
4138 } 4125 }
4139 4126
4140 /* 4127 /*
4141 * Return a pointer to the indirection array entry containing the 4128 * Return a pointer to the indirection array entry containing the
4142 * extent record for filesystem block bno. Store the index of the 4129 * extent record for filesystem block bno. Store the index of the
4143 * target irec in *erp_idxp. 4130 * target irec in *erp_idxp.
4144 */ 4131 */
4145 xfs_ext_irec_t * /* pointer to found extent record */ 4132 xfs_ext_irec_t * /* pointer to found extent record */
4146 xfs_iext_bno_to_irec( 4133 xfs_iext_bno_to_irec(
4147 xfs_ifork_t *ifp, /* inode fork pointer */ 4134 xfs_ifork_t *ifp, /* inode fork pointer */
4148 xfs_fileoff_t bno, /* block number to search for */ 4135 xfs_fileoff_t bno, /* block number to search for */
4149 int *erp_idxp) /* irec index of target ext list */ 4136 int *erp_idxp) /* irec index of target ext list */
4150 { 4137 {
4151 xfs_ext_irec_t *erp = NULL; /* indirection array pointer */ 4138 xfs_ext_irec_t *erp = NULL; /* indirection array pointer */
4152 xfs_ext_irec_t *erp_next; /* next indirection array entry */ 4139 xfs_ext_irec_t *erp_next; /* next indirection array entry */
4153 int erp_idx; /* indirection array index */ 4140 int erp_idx; /* indirection array index */
4154 int nlists; /* number of extent irec's (lists) */ 4141 int nlists; /* number of extent irec's (lists) */
4155 int high; /* binary search upper limit */ 4142 int high; /* binary search upper limit */
4156 int low; /* binary search lower limit */ 4143 int low; /* binary search lower limit */
4157 4144
4158 ASSERT(ifp->if_flags & XFS_IFEXTIREC); 4145 ASSERT(ifp->if_flags & XFS_IFEXTIREC);
4159 nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; 4146 nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
4160 erp_idx = 0; 4147 erp_idx = 0;
4161 low = 0; 4148 low = 0;
4162 high = nlists - 1; 4149 high = nlists - 1;
4163 while (low <= high) { 4150 while (low <= high) {
4164 erp_idx = (low + high) >> 1; 4151 erp_idx = (low + high) >> 1;
4165 erp = &ifp->if_u1.if_ext_irec[erp_idx]; 4152 erp = &ifp->if_u1.if_ext_irec[erp_idx];
4166 erp_next = erp_idx < nlists - 1 ? erp + 1 : NULL; 4153 erp_next = erp_idx < nlists - 1 ? erp + 1 : NULL;
4167 if (bno < xfs_bmbt_get_startoff(erp->er_extbuf)) { 4154 if (bno < xfs_bmbt_get_startoff(erp->er_extbuf)) {
4168 high = erp_idx - 1; 4155 high = erp_idx - 1;
4169 } else if (erp_next && bno >= 4156 } else if (erp_next && bno >=
4170 xfs_bmbt_get_startoff(erp_next->er_extbuf)) { 4157 xfs_bmbt_get_startoff(erp_next->er_extbuf)) {
4171 low = erp_idx + 1; 4158 low = erp_idx + 1;
4172 } else { 4159 } else {
4173 break; 4160 break;
4174 } 4161 }
4175 } 4162 }
4176 *erp_idxp = erp_idx; 4163 *erp_idxp = erp_idx;
4177 return erp; 4164 return erp;
4178 } 4165 }
4179 4166
4180 /* 4167 /*
4181 * Return a pointer to the indirection array entry containing the 4168 * Return a pointer to the indirection array entry containing the
4182 * extent record at file extent index *idxp. Store the index of the 4169 * extent record at file extent index *idxp. Store the index of the
4183 * target irec in *erp_idxp and store the page index of the target 4170 * target irec in *erp_idxp and store the page index of the target
4184 * extent record in *idxp. 4171 * extent record in *idxp.
4185 */ 4172 */
4186 xfs_ext_irec_t * 4173 xfs_ext_irec_t *
4187 xfs_iext_idx_to_irec( 4174 xfs_iext_idx_to_irec(
4188 xfs_ifork_t *ifp, /* inode fork pointer */ 4175 xfs_ifork_t *ifp, /* inode fork pointer */
4189 xfs_extnum_t *idxp, /* extent index (file -> page) */ 4176 xfs_extnum_t *idxp, /* extent index (file -> page) */
4190 int *erp_idxp, /* pointer to target irec */ 4177 int *erp_idxp, /* pointer to target irec */
4191 int realloc) /* new bytes were just added */ 4178 int realloc) /* new bytes were just added */
4192 { 4179 {
4193 xfs_ext_irec_t *prev; /* pointer to previous irec */ 4180 xfs_ext_irec_t *prev; /* pointer to previous irec */
4194 xfs_ext_irec_t *erp = NULL; /* pointer to current irec */ 4181 xfs_ext_irec_t *erp = NULL; /* pointer to current irec */
4195 int erp_idx; /* indirection array index */ 4182 int erp_idx; /* indirection array index */
4196 int nlists; /* number of irec's (ex lists) */ 4183 int nlists; /* number of irec's (ex lists) */
4197 int high; /* binary search upper limit */ 4184 int high; /* binary search upper limit */
4198 int low; /* binary search lower limit */ 4185 int low; /* binary search lower limit */
4199 xfs_extnum_t page_idx = *idxp; /* extent index in target list */ 4186 xfs_extnum_t page_idx = *idxp; /* extent index in target list */
4200 4187
4201 ASSERT(ifp->if_flags & XFS_IFEXTIREC); 4188 ASSERT(ifp->if_flags & XFS_IFEXTIREC);
4202 ASSERT(page_idx >= 0 && page_idx <= 4189 ASSERT(page_idx >= 0 && page_idx <=
4203 ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t)); 4190 ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t));
4204 nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; 4191 nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
4205 erp_idx = 0; 4192 erp_idx = 0;
4206 low = 0; 4193 low = 0;
4207 high = nlists - 1; 4194 high = nlists - 1;
4208 4195
4209 /* Binary search extent irec's */ 4196 /* Binary search extent irec's */
4210 while (low <= high) { 4197 while (low <= high) {
4211 erp_idx = (low + high) >> 1; 4198 erp_idx = (low + high) >> 1;
4212 erp = &ifp->if_u1.if_ext_irec[erp_idx]; 4199 erp = &ifp->if_u1.if_ext_irec[erp_idx];
4213 prev = erp_idx > 0 ? erp - 1 : NULL; 4200 prev = erp_idx > 0 ? erp - 1 : NULL;
4214 if (page_idx < erp->er_extoff || (page_idx == erp->er_extoff && 4201 if (page_idx < erp->er_extoff || (page_idx == erp->er_extoff &&
4215 realloc && prev && prev->er_extcount < XFS_LINEAR_EXTS)) { 4202 realloc && prev && prev->er_extcount < XFS_LINEAR_EXTS)) {
4216 high = erp_idx - 1; 4203 high = erp_idx - 1;
4217 } else if (page_idx > erp->er_extoff + erp->er_extcount || 4204 } else if (page_idx > erp->er_extoff + erp->er_extcount ||
4218 (page_idx == erp->er_extoff + erp->er_extcount && 4205 (page_idx == erp->er_extoff + erp->er_extcount &&
4219 !realloc)) { 4206 !realloc)) {
4220 low = erp_idx + 1; 4207 low = erp_idx + 1;
4221 } else if (page_idx == erp->er_extoff + erp->er_extcount && 4208 } else if (page_idx == erp->er_extoff + erp->er_extcount &&
4222 erp->er_extcount == XFS_LINEAR_EXTS) { 4209 erp->er_extcount == XFS_LINEAR_EXTS) {
4223 ASSERT(realloc); 4210 ASSERT(realloc);
4224 page_idx = 0; 4211 page_idx = 0;
4225 erp_idx++; 4212 erp_idx++;
4226 erp = erp_idx < nlists ? erp + 1 : NULL; 4213 erp = erp_idx < nlists ? erp + 1 : NULL;
4227 break; 4214 break;
4228 } else { 4215 } else {
4229 page_idx -= erp->er_extoff; 4216 page_idx -= erp->er_extoff;
4230 break; 4217 break;
4231 } 4218 }
4232 } 4219 }
4233 *idxp = page_idx; 4220 *idxp = page_idx;
4234 *erp_idxp = erp_idx; 4221 *erp_idxp = erp_idx;
4235 return(erp); 4222 return(erp);
4236 } 4223 }
4237 4224
4238 /* 4225 /*
4239 * Allocate and initialize an indirection array once the space needed 4226 * Allocate and initialize an indirection array once the space needed
4240 * for incore extents increases above XFS_IEXT_BUFSZ. 4227 * for incore extents increases above XFS_IEXT_BUFSZ.
4241 */ 4228 */
4242 void 4229 void
4243 xfs_iext_irec_init( 4230 xfs_iext_irec_init(
4244 xfs_ifork_t *ifp) /* inode fork pointer */ 4231 xfs_ifork_t *ifp) /* inode fork pointer */
4245 { 4232 {
4246 xfs_ext_irec_t *erp; /* indirection array pointer */ 4233 xfs_ext_irec_t *erp; /* indirection array pointer */
4247 xfs_extnum_t nextents; /* number of extents in file */ 4234 xfs_extnum_t nextents; /* number of extents in file */
4248 4235
4249 ASSERT(!(ifp->if_flags & XFS_IFEXTIREC)); 4236 ASSERT(!(ifp->if_flags & XFS_IFEXTIREC));
4250 nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); 4237 nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
4251 ASSERT(nextents <= XFS_LINEAR_EXTS); 4238 ASSERT(nextents <= XFS_LINEAR_EXTS);
4252 4239
4253 erp = kmem_alloc(sizeof(xfs_ext_irec_t), KM_NOFS); 4240 erp = kmem_alloc(sizeof(xfs_ext_irec_t), KM_NOFS);
4254 4241
4255 if (nextents == 0) { 4242 if (nextents == 0) {
4256 ifp->if_u1.if_extents = kmem_alloc(XFS_IEXT_BUFSZ, KM_NOFS); 4243 ifp->if_u1.if_extents = kmem_alloc(XFS_IEXT_BUFSZ, KM_NOFS);
4257 } else if (!ifp->if_real_bytes) { 4244 } else if (!ifp->if_real_bytes) {
4258 xfs_iext_inline_to_direct(ifp, XFS_IEXT_BUFSZ); 4245 xfs_iext_inline_to_direct(ifp, XFS_IEXT_BUFSZ);
4259 } else if (ifp->if_real_bytes < XFS_IEXT_BUFSZ) { 4246 } else if (ifp->if_real_bytes < XFS_IEXT_BUFSZ) {
4260 xfs_iext_realloc_direct(ifp, XFS_IEXT_BUFSZ); 4247 xfs_iext_realloc_direct(ifp, XFS_IEXT_BUFSZ);
4261 } 4248 }
4262 erp->er_extbuf = ifp->if_u1.if_extents; 4249 erp->er_extbuf = ifp->if_u1.if_extents;
4263 erp->er_extcount = nextents; 4250 erp->er_extcount = nextents;
4264 erp->er_extoff = 0; 4251 erp->er_extoff = 0;
4265 4252
4266 ifp->if_flags |= XFS_IFEXTIREC; 4253 ifp->if_flags |= XFS_IFEXTIREC;
4267 ifp->if_real_bytes = XFS_IEXT_BUFSZ; 4254 ifp->if_real_bytes = XFS_IEXT_BUFSZ;
4268 ifp->if_bytes = nextents * sizeof(xfs_bmbt_rec_t); 4255 ifp->if_bytes = nextents * sizeof(xfs_bmbt_rec_t);
4269 ifp->if_u1.if_ext_irec = erp; 4256 ifp->if_u1.if_ext_irec = erp;
4270 4257
4271 return; 4258 return;
4272 } 4259 }
4273 4260
4274 /* 4261 /*
4275 * Allocate and initialize a new entry in the indirection array. 4262 * Allocate and initialize a new entry in the indirection array.
4276 */ 4263 */
4277 xfs_ext_irec_t * 4264 xfs_ext_irec_t *
4278 xfs_iext_irec_new( 4265 xfs_iext_irec_new(
4279 xfs_ifork_t *ifp, /* inode fork pointer */ 4266 xfs_ifork_t *ifp, /* inode fork pointer */
4280 int erp_idx) /* index for new irec */ 4267 int erp_idx) /* index for new irec */
4281 { 4268 {
4282 xfs_ext_irec_t *erp; /* indirection array pointer */ 4269 xfs_ext_irec_t *erp; /* indirection array pointer */
4283 int i; /* loop counter */ 4270 int i; /* loop counter */
4284 int nlists; /* number of irec's (ex lists) */ 4271 int nlists; /* number of irec's (ex lists) */
4285 4272
4286 ASSERT(ifp->if_flags & XFS_IFEXTIREC); 4273 ASSERT(ifp->if_flags & XFS_IFEXTIREC);
4287 nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; 4274 nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
4288 4275
4289 /* Resize indirection array */ 4276 /* Resize indirection array */
4290 xfs_iext_realloc_indirect(ifp, ++nlists * 4277 xfs_iext_realloc_indirect(ifp, ++nlists *
4291 sizeof(xfs_ext_irec_t)); 4278 sizeof(xfs_ext_irec_t));
4292 /* 4279 /*
4293 * Move records down in the array so the 4280 * Move records down in the array so the
4294 * new page can use erp_idx. 4281 * new page can use erp_idx.
4295 */ 4282 */
4296 erp = ifp->if_u1.if_ext_irec; 4283 erp = ifp->if_u1.if_ext_irec;
4297 for (i = nlists - 1; i > erp_idx; i--) { 4284 for (i = nlists - 1; i > erp_idx; i--) {
4298 memmove(&erp[i], &erp[i-1], sizeof(xfs_ext_irec_t)); 4285 memmove(&erp[i], &erp[i-1], sizeof(xfs_ext_irec_t));
4299 } 4286 }
4300 ASSERT(i == erp_idx); 4287 ASSERT(i == erp_idx);
4301 4288
4302 /* Initialize new extent record */ 4289 /* Initialize new extent record */
4303 erp = ifp->if_u1.if_ext_irec; 4290 erp = ifp->if_u1.if_ext_irec;
4304 erp[erp_idx].er_extbuf = kmem_alloc(XFS_IEXT_BUFSZ, KM_NOFS); 4291 erp[erp_idx].er_extbuf = kmem_alloc(XFS_IEXT_BUFSZ, KM_NOFS);
4305 ifp->if_real_bytes = nlists * XFS_IEXT_BUFSZ; 4292 ifp->if_real_bytes = nlists * XFS_IEXT_BUFSZ;
4306 memset(erp[erp_idx].er_extbuf, 0, XFS_IEXT_BUFSZ); 4293 memset(erp[erp_idx].er_extbuf, 0, XFS_IEXT_BUFSZ);
4307 erp[erp_idx].er_extcount = 0; 4294 erp[erp_idx].er_extcount = 0;
4308 erp[erp_idx].er_extoff = erp_idx > 0 ? 4295 erp[erp_idx].er_extoff = erp_idx > 0 ?
4309 erp[erp_idx-1].er_extoff + erp[erp_idx-1].er_extcount : 0; 4296 erp[erp_idx-1].er_extoff + erp[erp_idx-1].er_extcount : 0;
4310 return (&erp[erp_idx]); 4297 return (&erp[erp_idx]);
4311 } 4298 }
4312 4299
4313 /* 4300 /*
4314 * Remove a record from the indirection array. 4301 * Remove a record from the indirection array.
4315 */ 4302 */
4316 void 4303 void
4317 xfs_iext_irec_remove( 4304 xfs_iext_irec_remove(
4318 xfs_ifork_t *ifp, /* inode fork pointer */ 4305 xfs_ifork_t *ifp, /* inode fork pointer */
4319 int erp_idx) /* irec index to remove */ 4306 int erp_idx) /* irec index to remove */
4320 { 4307 {
4321 xfs_ext_irec_t *erp; /* indirection array pointer */ 4308 xfs_ext_irec_t *erp; /* indirection array pointer */
4322 int i; /* loop counter */ 4309 int i; /* loop counter */
4323 int nlists; /* number of irec's (ex lists) */ 4310 int nlists; /* number of irec's (ex lists) */
4324 4311
4325 ASSERT(ifp->if_flags & XFS_IFEXTIREC); 4312 ASSERT(ifp->if_flags & XFS_IFEXTIREC);
4326 nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; 4313 nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
4327 erp = &ifp->if_u1.if_ext_irec[erp_idx]; 4314 erp = &ifp->if_u1.if_ext_irec[erp_idx];
4328 if (erp->er_extbuf) { 4315 if (erp->er_extbuf) {
4329 xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, 4316 xfs_iext_irec_update_extoffs(ifp, erp_idx + 1,
4330 -erp->er_extcount); 4317 -erp->er_extcount);
4331 kmem_free(erp->er_extbuf); 4318 kmem_free(erp->er_extbuf);
4332 } 4319 }
4333 /* Compact extent records */ 4320 /* Compact extent records */
4334 erp = ifp->if_u1.if_ext_irec; 4321 erp = ifp->if_u1.if_ext_irec;
4335 for (i = erp_idx; i < nlists - 1; i++) { 4322 for (i = erp_idx; i < nlists - 1; i++) {
4336 memmove(&erp[i], &erp[i+1], sizeof(xfs_ext_irec_t)); 4323 memmove(&erp[i], &erp[i+1], sizeof(xfs_ext_irec_t));
4337 } 4324 }
4338 /* 4325 /*
4339 * Manually free the last extent record from the indirection 4326 * Manually free the last extent record from the indirection
4340 * array. A call to xfs_iext_realloc_indirect() with a size 4327 * array. A call to xfs_iext_realloc_indirect() with a size
4341 * of zero would result in a call to xfs_iext_destroy() which 4328 * of zero would result in a call to xfs_iext_destroy() which
4342 * would in turn call this function again, creating a nasty 4329 * would in turn call this function again, creating a nasty
4343 * infinite loop. 4330 * infinite loop.
4344 */ 4331 */
4345 if (--nlists) { 4332 if (--nlists) {
4346 xfs_iext_realloc_indirect(ifp, 4333 xfs_iext_realloc_indirect(ifp,
4347 nlists * sizeof(xfs_ext_irec_t)); 4334 nlists * sizeof(xfs_ext_irec_t));
4348 } else { 4335 } else {
4349 kmem_free(ifp->if_u1.if_ext_irec); 4336 kmem_free(ifp->if_u1.if_ext_irec);
4350 } 4337 }
4351 ifp->if_real_bytes = nlists * XFS_IEXT_BUFSZ; 4338 ifp->if_real_bytes = nlists * XFS_IEXT_BUFSZ;
4352 } 4339 }
4353 4340
4354 /* 4341 /*
4355 * This is called to clean up large amounts of unused memory allocated 4342 * This is called to clean up large amounts of unused memory allocated
4356 * by the indirection array. Before compacting anything though, verify 4343 * by the indirection array. Before compacting anything though, verify
4357 * that the indirection array is still needed and switch back to the 4344 * that the indirection array is still needed and switch back to the
4358 * linear extent list (or even the inline buffer) if possible. The 4345 * linear extent list (or even the inline buffer) if possible. The
4359 * compaction policy is as follows: 4346 * compaction policy is as follows:
4360 * 4347 *
4361 * Full Compaction: Extents fit into a single page (or inline buffer) 4348 * Full Compaction: Extents fit into a single page (or inline buffer)
4362 * Partial Compaction: Extents occupy less than 50% of allocated space 4349 * Partial Compaction: Extents occupy less than 50% of allocated space
4363 * No Compaction: Extents occupy at least 50% of allocated space 4350 * No Compaction: Extents occupy at least 50% of allocated space
4364 */ 4351 */
4365 void 4352 void
4366 xfs_iext_irec_compact( 4353 xfs_iext_irec_compact(
4367 xfs_ifork_t *ifp) /* inode fork pointer */ 4354 xfs_ifork_t *ifp) /* inode fork pointer */
4368 { 4355 {
4369 xfs_extnum_t nextents; /* number of extents in file */ 4356 xfs_extnum_t nextents; /* number of extents in file */
4370 int nlists; /* number of irec's (ex lists) */ 4357 int nlists; /* number of irec's (ex lists) */
4371 4358
4372 ASSERT(ifp->if_flags & XFS_IFEXTIREC); 4359 ASSERT(ifp->if_flags & XFS_IFEXTIREC);
4373 nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; 4360 nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
4374 nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); 4361 nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
4375 4362
4376 if (nextents == 0) { 4363 if (nextents == 0) {
4377 xfs_iext_destroy(ifp); 4364 xfs_iext_destroy(ifp);
4378 } else if (nextents <= XFS_INLINE_EXTS) { 4365 } else if (nextents <= XFS_INLINE_EXTS) {
4379 xfs_iext_indirect_to_direct(ifp); 4366 xfs_iext_indirect_to_direct(ifp);
4380 xfs_iext_direct_to_inline(ifp, nextents); 4367 xfs_iext_direct_to_inline(ifp, nextents);
4381 } else if (nextents <= XFS_LINEAR_EXTS) { 4368 } else if (nextents <= XFS_LINEAR_EXTS) {
4382 xfs_iext_indirect_to_direct(ifp); 4369 xfs_iext_indirect_to_direct(ifp);
4383 } else if (nextents < (nlists * XFS_LINEAR_EXTS) >> 1) { 4370 } else if (nextents < (nlists * XFS_LINEAR_EXTS) >> 1) {
4384 xfs_iext_irec_compact_pages(ifp); 4371 xfs_iext_irec_compact_pages(ifp);
4385 } 4372 }
4386 } 4373 }
4387 4374
4388 /* 4375 /*
4389 * Combine extents from neighboring extent pages. 4376 * Combine extents from neighboring extent pages.
4390 */ 4377 */
4391 void 4378 void
4392 xfs_iext_irec_compact_pages( 4379 xfs_iext_irec_compact_pages(
4393 xfs_ifork_t *ifp) /* inode fork pointer */ 4380 xfs_ifork_t *ifp) /* inode fork pointer */
4394 { 4381 {
4395 xfs_ext_irec_t *erp, *erp_next;/* pointers to irec entries */ 4382 xfs_ext_irec_t *erp, *erp_next;/* pointers to irec entries */
4396 int erp_idx = 0; /* indirection array index */ 4383 int erp_idx = 0; /* indirection array index */
4397 int nlists; /* number of irec's (ex lists) */ 4384 int nlists; /* number of irec's (ex lists) */
4398 4385
4399 ASSERT(ifp->if_flags & XFS_IFEXTIREC); 4386 ASSERT(ifp->if_flags & XFS_IFEXTIREC);
4400 nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; 4387 nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
4401 while (erp_idx < nlists - 1) { 4388 while (erp_idx < nlists - 1) {
4402 erp = &ifp->if_u1.if_ext_irec[erp_idx]; 4389 erp = &ifp->if_u1.if_ext_irec[erp_idx];
4403 erp_next = erp + 1; 4390 erp_next = erp + 1;
4404 if (erp_next->er_extcount <= 4391 if (erp_next->er_extcount <=
4405 (XFS_LINEAR_EXTS - erp->er_extcount)) { 4392 (XFS_LINEAR_EXTS - erp->er_extcount)) {
4406 memcpy(&erp->er_extbuf[erp->er_extcount], 4393 memcpy(&erp->er_extbuf[erp->er_extcount],
4407 erp_next->er_extbuf, erp_next->er_extcount * 4394 erp_next->er_extbuf, erp_next->er_extcount *
4408 sizeof(xfs_bmbt_rec_t)); 4395 sizeof(xfs_bmbt_rec_t));
4409 erp->er_extcount += erp_next->er_extcount; 4396 erp->er_extcount += erp_next->er_extcount;
4410 /* 4397 /*
4411 * Free page before removing extent record 4398 * Free page before removing extent record
4412 * so er_extoffs don't get modified in 4399 * so er_extoffs don't get modified in
4413 * xfs_iext_irec_remove. 4400 * xfs_iext_irec_remove.
4414 */ 4401 */
4415 kmem_free(erp_next->er_extbuf); 4402 kmem_free(erp_next->er_extbuf);
4416 erp_next->er_extbuf = NULL; 4403 erp_next->er_extbuf = NULL;
4417 xfs_iext_irec_remove(ifp, erp_idx + 1); 4404 xfs_iext_irec_remove(ifp, erp_idx + 1);
4418 nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; 4405 nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
4419 } else { 4406 } else {
4420 erp_idx++; 4407 erp_idx++;
4421 } 4408 }
4422 } 4409 }
4423 } 4410 }
4424 4411
4425 /* 4412 /*
4426 * This is called to update the er_extoff field in the indirection 4413 * This is called to update the er_extoff field in the indirection
4427 * array when extents have been added or removed from one of the 4414 * array when extents have been added or removed from one of the
4428 * extent lists. erp_idx contains the irec index to begin updating 4415 * extent lists. erp_idx contains the irec index to begin updating
4429 * at and ext_diff contains the number of extents that were added 4416 * at and ext_diff contains the number of extents that were added
4430 * or removed. 4417 * or removed.
4431 */ 4418 */
4432 void 4419 void
4433 xfs_iext_irec_update_extoffs( 4420 xfs_iext_irec_update_extoffs(
4434 xfs_ifork_t *ifp, /* inode fork pointer */ 4421 xfs_ifork_t *ifp, /* inode fork pointer */
4435 int erp_idx, /* irec index to update */ 4422 int erp_idx, /* irec index to update */
4436 int ext_diff) /* number of new extents */ 4423 int ext_diff) /* number of new extents */
4437 { 4424 {
4438 int i; /* loop counter */ 4425 int i; /* loop counter */
4439 int nlists; /* number of irec's (ex lists */ 4426 int nlists; /* number of irec's (ex lists */
4440 4427
4441 ASSERT(ifp->if_flags & XFS_IFEXTIREC); 4428 ASSERT(ifp->if_flags & XFS_IFEXTIREC);
4442 nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; 4429 nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
4443 for (i = erp_idx; i < nlists; i++) { 4430 for (i = erp_idx; i < nlists; i++) {
4444 ifp->if_u1.if_ext_irec[i].er_extoff += ext_diff; 4431 ifp->if_u1.if_ext_irec[i].er_extoff += ext_diff;
4445 } 4432 }
4446 } 4433 }
1 /* 1 /*
2 * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc. 2 * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
3 * All Rights Reserved. 3 * All Rights Reserved.
4 * 4 *
5 * This program is free software; you can redistribute it and/or 5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as 6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation. 7 * published by the Free Software Foundation.
8 * 8 *
9 * This program is distributed in the hope that it would be useful, 9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details. 12 * GNU General Public License for more details.
13 * 13 *
14 * You should have received a copy of the GNU General Public License 14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation, 15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */ 17 */
18 #ifndef __XFS_INODE_H__ 18 #ifndef __XFS_INODE_H__
19 #define __XFS_INODE_H__ 19 #define __XFS_INODE_H__
20 20
21 struct xfs_dinode; 21 struct xfs_dinode;
22 struct xfs_inode; 22 struct xfs_inode;
23 23
24 /* 24 /*
25 * Fork identifiers. 25 * Fork identifiers.
26 */ 26 */
27 #define XFS_DATA_FORK 0 27 #define XFS_DATA_FORK 0
28 #define XFS_ATTR_FORK 1 28 #define XFS_ATTR_FORK 1
29 29
30 /* 30 /*
31 * The following xfs_ext_irec_t struct introduces a second (top) level 31 * The following xfs_ext_irec_t struct introduces a second (top) level
32 * to the in-core extent allocation scheme. These structs are allocated 32 * to the in-core extent allocation scheme. These structs are allocated
33 * in a contiguous block, creating an indirection array where each entry 33 * in a contiguous block, creating an indirection array where each entry
34 * (irec) contains a pointer to a buffer of in-core extent records which 34 * (irec) contains a pointer to a buffer of in-core extent records which
35 * it manages. Each extent buffer is 4k in size, since 4k is the system 35 * it manages. Each extent buffer is 4k in size, since 4k is the system
36 * page size on Linux i386 and systems with larger page sizes don't seem 36 * page size on Linux i386 and systems with larger page sizes don't seem
37 * to gain much, if anything, by using their native page size as the 37 * to gain much, if anything, by using their native page size as the
38 * extent buffer size. Also, using 4k extent buffers everywhere provides 38 * extent buffer size. Also, using 4k extent buffers everywhere provides
39 * a consistent interface for CXFS across different platforms. 39 * a consistent interface for CXFS across different platforms.
40 * 40 *
41 * There is currently no limit on the number of irec's (extent lists) 41 * There is currently no limit on the number of irec's (extent lists)
42 * allowed, so heavily fragmented files may require an indirection array 42 * allowed, so heavily fragmented files may require an indirection array
43 * which spans multiple system pages of memory. The number of extents 43 * which spans multiple system pages of memory. The number of extents
44 * which would require this amount of contiguous memory is very large 44 * which would require this amount of contiguous memory is very large
45 * and should not cause problems in the foreseeable future. However, 45 * and should not cause problems in the foreseeable future. However,
46 * if the memory needed for the contiguous array ever becomes a problem, 46 * if the memory needed for the contiguous array ever becomes a problem,
47 * it is possible that a third level of indirection may be required. 47 * it is possible that a third level of indirection may be required.
48 */ 48 */
49 typedef struct xfs_ext_irec { 49 typedef struct xfs_ext_irec {
50 xfs_bmbt_rec_host_t *er_extbuf; /* block of extent records */ 50 xfs_bmbt_rec_host_t *er_extbuf; /* block of extent records */
51 xfs_extnum_t er_extoff; /* extent offset in file */ 51 xfs_extnum_t er_extoff; /* extent offset in file */
52 xfs_extnum_t er_extcount; /* number of extents in page/block */ 52 xfs_extnum_t er_extcount; /* number of extents in page/block */
53 } xfs_ext_irec_t; 53 } xfs_ext_irec_t;
54 54
55 /* 55 /*
56 * File incore extent information, present for each of data & attr forks. 56 * File incore extent information, present for each of data & attr forks.
57 */ 57 */
58 #define XFS_IEXT_BUFSZ 4096 58 #define XFS_IEXT_BUFSZ 4096
59 #define XFS_LINEAR_EXTS (XFS_IEXT_BUFSZ / (uint)sizeof(xfs_bmbt_rec_t)) 59 #define XFS_LINEAR_EXTS (XFS_IEXT_BUFSZ / (uint)sizeof(xfs_bmbt_rec_t))
60 #define XFS_INLINE_EXTS 2 60 #define XFS_INLINE_EXTS 2
61 #define XFS_INLINE_DATA 32 61 #define XFS_INLINE_DATA 32
62 typedef struct xfs_ifork { 62 typedef struct xfs_ifork {
63 int if_bytes; /* bytes in if_u1 */ 63 int if_bytes; /* bytes in if_u1 */
64 int if_real_bytes; /* bytes allocated in if_u1 */ 64 int if_real_bytes; /* bytes allocated in if_u1 */
65 struct xfs_btree_block *if_broot; /* file's incore btree root */ 65 struct xfs_btree_block *if_broot; /* file's incore btree root */
66 short if_broot_bytes; /* bytes allocated for root */ 66 short if_broot_bytes; /* bytes allocated for root */
67 unsigned char if_flags; /* per-fork flags */ 67 unsigned char if_flags; /* per-fork flags */
68 unsigned char if_ext_max; /* max # of extent records */ 68 unsigned char if_ext_max; /* max # of extent records */
69 xfs_extnum_t if_lastex; /* last if_extents used */ 69 xfs_extnum_t if_lastex; /* last if_extents used */
70 union { 70 union {
71 xfs_bmbt_rec_host_t *if_extents;/* linear map file exts */ 71 xfs_bmbt_rec_host_t *if_extents;/* linear map file exts */
72 xfs_ext_irec_t *if_ext_irec; /* irec map file exts */ 72 xfs_ext_irec_t *if_ext_irec; /* irec map file exts */
73 char *if_data; /* inline file data */ 73 char *if_data; /* inline file data */
74 } if_u1; 74 } if_u1;
75 union { 75 union {
76 xfs_bmbt_rec_host_t if_inline_ext[XFS_INLINE_EXTS]; 76 xfs_bmbt_rec_host_t if_inline_ext[XFS_INLINE_EXTS];
77 /* very small file extents */ 77 /* very small file extents */
78 char if_inline_data[XFS_INLINE_DATA]; 78 char if_inline_data[XFS_INLINE_DATA];
79 /* very small file data */ 79 /* very small file data */
80 xfs_dev_t if_rdev; /* dev number if special */ 80 xfs_dev_t if_rdev; /* dev number if special */
81 uuid_t if_uuid; /* mount point value */ 81 uuid_t if_uuid; /* mount point value */
82 } if_u2; 82 } if_u2;
83 } xfs_ifork_t; 83 } xfs_ifork_t;
84 84
85 /* 85 /*
86 * Inode location information. Stored in the inode and passed to
87 * xfs_imap_to_bp() to get a buffer and dinode for a given inode.
88 */
89 struct xfs_imap {
90 xfs_daddr_t im_blkno; /* starting BB of inode chunk */
91 ushort im_len; /* length in BBs of inode chunk */
92 ushort im_boffset; /* inode offset in block in bytes */
93 };
94
95 /*
86 * This is the xfs in-core inode structure. 96 * This is the xfs in-core inode structure.
87 * Most of the on-disk inode is embedded in the i_d field. 97 * Most of the on-disk inode is embedded in the i_d field.
88 * 98 *
89 * The extent pointers/inline file space, however, are managed 99 * The extent pointers/inline file space, however, are managed
90 * separately. The memory for this information is pointed to by 100 * separately. The memory for this information is pointed to by
91 * the if_u1 unions depending on the type of the data. 101 * the if_u1 unions depending on the type of the data.
92 * This is used to linearize the array of extents for fast in-core 102 * This is used to linearize the array of extents for fast in-core
93 * access. This is used until the file's number of extents 103 * access. This is used until the file's number of extents
94 * surpasses XFS_MAX_INCORE_EXTENTS, at which point all extent pointers 104 * surpasses XFS_MAX_INCORE_EXTENTS, at which point all extent pointers
95 * are accessed through the buffer cache. 105 * are accessed through the buffer cache.
96 * 106 *
97 * Other state kept in the in-core inode is used for identification, 107 * Other state kept in the in-core inode is used for identification,
98 * locking, transactional updating, etc of the inode. 108 * locking, transactional updating, etc of the inode.
99 * 109 *
100 * Generally, we do not want to hold the i_rlock while holding the 110 * Generally, we do not want to hold the i_rlock while holding the
101 * i_ilock. Hierarchy is i_iolock followed by i_rlock. 111 * i_ilock. Hierarchy is i_iolock followed by i_rlock.
102 * 112 *
103 * xfs_iptr_t contains all the inode fields upto and including the 113 * xfs_iptr_t contains all the inode fields upto and including the
104 * i_mnext and i_mprev fields, it is used as a marker in the inode 114 * i_mnext and i_mprev fields, it is used as a marker in the inode
105 * chain off the mount structure by xfs_sync calls. 115 * chain off the mount structure by xfs_sync calls.
106 */ 116 */
107 117
108 typedef struct xfs_ictimestamp { 118 typedef struct xfs_ictimestamp {
109 __int32_t t_sec; /* timestamp seconds */ 119 __int32_t t_sec; /* timestamp seconds */
110 __int32_t t_nsec; /* timestamp nanoseconds */ 120 __int32_t t_nsec; /* timestamp nanoseconds */
111 } xfs_ictimestamp_t; 121 } xfs_ictimestamp_t;
112 122
113 /* 123 /*
114 * NOTE: This structure must be kept identical to struct xfs_dinode 124 * NOTE: This structure must be kept identical to struct xfs_dinode
115 * in xfs_dinode.h except for the endianess annotations. 125 * in xfs_dinode.h except for the endianess annotations.
116 */ 126 */
117 typedef struct xfs_icdinode { 127 typedef struct xfs_icdinode {
118 __uint16_t di_magic; /* inode magic # = XFS_DINODE_MAGIC */ 128 __uint16_t di_magic; /* inode magic # = XFS_DINODE_MAGIC */
119 __uint16_t di_mode; /* mode and type of file */ 129 __uint16_t di_mode; /* mode and type of file */
120 __int8_t di_version; /* inode version */ 130 __int8_t di_version; /* inode version */
121 __int8_t di_format; /* format of di_c data */ 131 __int8_t di_format; /* format of di_c data */
122 __uint16_t di_onlink; /* old number of links to file */ 132 __uint16_t di_onlink; /* old number of links to file */
123 __uint32_t di_uid; /* owner's user id */ 133 __uint32_t di_uid; /* owner's user id */
124 __uint32_t di_gid; /* owner's group id */ 134 __uint32_t di_gid; /* owner's group id */
125 __uint32_t di_nlink; /* number of links to file */ 135 __uint32_t di_nlink; /* number of links to file */
126 __uint16_t di_projid; /* owner's project id */ 136 __uint16_t di_projid; /* owner's project id */
127 __uint8_t di_pad[8]; /* unused, zeroed space */ 137 __uint8_t di_pad[8]; /* unused, zeroed space */
128 __uint16_t di_flushiter; /* incremented on flush */ 138 __uint16_t di_flushiter; /* incremented on flush */
129 xfs_ictimestamp_t di_atime; /* time last accessed */ 139 xfs_ictimestamp_t di_atime; /* time last accessed */
130 xfs_ictimestamp_t di_mtime; /* time last modified */ 140 xfs_ictimestamp_t di_mtime; /* time last modified */
131 xfs_ictimestamp_t di_ctime; /* time created/inode modified */ 141 xfs_ictimestamp_t di_ctime; /* time created/inode modified */
132 xfs_fsize_t di_size; /* number of bytes in file */ 142 xfs_fsize_t di_size; /* number of bytes in file */
133 xfs_drfsbno_t di_nblocks; /* # of direct & btree blocks used */ 143 xfs_drfsbno_t di_nblocks; /* # of direct & btree blocks used */
134 xfs_extlen_t di_extsize; /* basic/minimum extent size for file */ 144 xfs_extlen_t di_extsize; /* basic/minimum extent size for file */
135 xfs_extnum_t di_nextents; /* number of extents in data fork */ 145 xfs_extnum_t di_nextents; /* number of extents in data fork */
136 xfs_aextnum_t di_anextents; /* number of extents in attribute fork*/ 146 xfs_aextnum_t di_anextents; /* number of extents in attribute fork*/
137 __uint8_t di_forkoff; /* attr fork offs, <<3 for 64b align */ 147 __uint8_t di_forkoff; /* attr fork offs, <<3 for 64b align */
138 __int8_t di_aformat; /* format of attr fork's data */ 148 __int8_t di_aformat; /* format of attr fork's data */
139 __uint32_t di_dmevmask; /* DMIG event mask */ 149 __uint32_t di_dmevmask; /* DMIG event mask */
140 __uint16_t di_dmstate; /* DMIG state info */ 150 __uint16_t di_dmstate; /* DMIG state info */
141 __uint16_t di_flags; /* random flags, XFS_DIFLAG_... */ 151 __uint16_t di_flags; /* random flags, XFS_DIFLAG_... */
142 __uint32_t di_gen; /* generation number */ 152 __uint32_t di_gen; /* generation number */
143 } xfs_icdinode_t; 153 } xfs_icdinode_t;
144 154
145 /* 155 /*
146 * Flags for xfs_ichgtime(). 156 * Flags for xfs_ichgtime().
147 */ 157 */
148 #define XFS_ICHGTIME_MOD 0x1 /* data fork modification timestamp */ 158 #define XFS_ICHGTIME_MOD 0x1 /* data fork modification timestamp */
149 #define XFS_ICHGTIME_CHG 0x2 /* inode field change timestamp */ 159 #define XFS_ICHGTIME_CHG 0x2 /* inode field change timestamp */
150 160
151 /* 161 /*
152 * Per-fork incore inode flags. 162 * Per-fork incore inode flags.
153 */ 163 */
154 #define XFS_IFINLINE 0x01 /* Inline data is read in */ 164 #define XFS_IFINLINE 0x01 /* Inline data is read in */
155 #define XFS_IFEXTENTS 0x02 /* All extent pointers are read in */ 165 #define XFS_IFEXTENTS 0x02 /* All extent pointers are read in */
156 #define XFS_IFBROOT 0x04 /* i_broot points to the bmap b-tree root */ 166 #define XFS_IFBROOT 0x04 /* i_broot points to the bmap b-tree root */
157 #define XFS_IFEXTIREC 0x08 /* Indirection array of extent blocks */ 167 #define XFS_IFEXTIREC 0x08 /* Indirection array of extent blocks */
158 168
159 /* 169 /*
160 * Flags for xfs_inotobp and xfs_imap(). 170 * Flags for xfs_inotobp and xfs_imap().
161 */ 171 */
162 #define XFS_IMAP_BULKSTAT 0x1 172 #define XFS_IMAP_BULKSTAT 0x1
163 173
164 /* 174 /*
165 * Fork handling. 175 * Fork handling.
166 */ 176 */
167 177
168 #define XFS_IFORK_Q(ip) ((ip)->i_d.di_forkoff != 0) 178 #define XFS_IFORK_Q(ip) ((ip)->i_d.di_forkoff != 0)
169 #define XFS_IFORK_BOFF(ip) ((int)((ip)->i_d.di_forkoff << 3)) 179 #define XFS_IFORK_BOFF(ip) ((int)((ip)->i_d.di_forkoff << 3))
170 180
171 #define XFS_IFORK_PTR(ip,w) \ 181 #define XFS_IFORK_PTR(ip,w) \
172 ((w) == XFS_DATA_FORK ? \ 182 ((w) == XFS_DATA_FORK ? \
173 &(ip)->i_df : \ 183 &(ip)->i_df : \
174 (ip)->i_afp) 184 (ip)->i_afp)
175 #define XFS_IFORK_DSIZE(ip) \ 185 #define XFS_IFORK_DSIZE(ip) \
176 (XFS_IFORK_Q(ip) ? \ 186 (XFS_IFORK_Q(ip) ? \
177 XFS_IFORK_BOFF(ip) : \ 187 XFS_IFORK_BOFF(ip) : \
178 XFS_LITINO((ip)->i_mount)) 188 XFS_LITINO((ip)->i_mount))
179 #define XFS_IFORK_ASIZE(ip) \ 189 #define XFS_IFORK_ASIZE(ip) \
180 (XFS_IFORK_Q(ip) ? \ 190 (XFS_IFORK_Q(ip) ? \
181 XFS_LITINO((ip)->i_mount) - XFS_IFORK_BOFF(ip) : \ 191 XFS_LITINO((ip)->i_mount) - XFS_IFORK_BOFF(ip) : \
182 0) 192 0)
183 #define XFS_IFORK_SIZE(ip,w) \ 193 #define XFS_IFORK_SIZE(ip,w) \
184 ((w) == XFS_DATA_FORK ? \ 194 ((w) == XFS_DATA_FORK ? \
185 XFS_IFORK_DSIZE(ip) : \ 195 XFS_IFORK_DSIZE(ip) : \
186 XFS_IFORK_ASIZE(ip)) 196 XFS_IFORK_ASIZE(ip))
187 #define XFS_IFORK_FORMAT(ip,w) \ 197 #define XFS_IFORK_FORMAT(ip,w) \
188 ((w) == XFS_DATA_FORK ? \ 198 ((w) == XFS_DATA_FORK ? \
189 (ip)->i_d.di_format : \ 199 (ip)->i_d.di_format : \
190 (ip)->i_d.di_aformat) 200 (ip)->i_d.di_aformat)
191 #define XFS_IFORK_FMT_SET(ip,w,n) \ 201 #define XFS_IFORK_FMT_SET(ip,w,n) \
192 ((w) == XFS_DATA_FORK ? \ 202 ((w) == XFS_DATA_FORK ? \
193 ((ip)->i_d.di_format = (n)) : \ 203 ((ip)->i_d.di_format = (n)) : \
194 ((ip)->i_d.di_aformat = (n))) 204 ((ip)->i_d.di_aformat = (n)))
195 #define XFS_IFORK_NEXTENTS(ip,w) \ 205 #define XFS_IFORK_NEXTENTS(ip,w) \
196 ((w) == XFS_DATA_FORK ? \ 206 ((w) == XFS_DATA_FORK ? \
197 (ip)->i_d.di_nextents : \ 207 (ip)->i_d.di_nextents : \
198 (ip)->i_d.di_anextents) 208 (ip)->i_d.di_anextents)
199 #define XFS_IFORK_NEXT_SET(ip,w,n) \ 209 #define XFS_IFORK_NEXT_SET(ip,w,n) \
200 ((w) == XFS_DATA_FORK ? \ 210 ((w) == XFS_DATA_FORK ? \
201 ((ip)->i_d.di_nextents = (n)) : \ 211 ((ip)->i_d.di_nextents = (n)) : \
202 ((ip)->i_d.di_anextents = (n))) 212 ((ip)->i_d.di_anextents = (n)))
203 213
204 214
205 215
206 #ifdef __KERNEL__ 216 #ifdef __KERNEL__
207 217
208 struct bhv_desc; 218 struct bhv_desc;
209 struct cred; 219 struct cred;
210 struct ktrace; 220 struct ktrace;
211 struct xfs_buf; 221 struct xfs_buf;
212 struct xfs_bmap_free; 222 struct xfs_bmap_free;
213 struct xfs_bmbt_irec; 223 struct xfs_bmbt_irec;
214 struct xfs_inode_log_item; 224 struct xfs_inode_log_item;
215 struct xfs_mount; 225 struct xfs_mount;
216 struct xfs_trans; 226 struct xfs_trans;
217 struct xfs_dquot; 227 struct xfs_dquot;
218 228
219 #if defined(XFS_ILOCK_TRACE) 229 #if defined(XFS_ILOCK_TRACE)
220 #define XFS_ILOCK_KTRACE_SIZE 32 230 #define XFS_ILOCK_KTRACE_SIZE 32
221 extern ktrace_t *xfs_ilock_trace_buf; 231 extern ktrace_t *xfs_ilock_trace_buf;
222 extern void xfs_ilock_trace(struct xfs_inode *, int, unsigned int, inst_t *); 232 extern void xfs_ilock_trace(struct xfs_inode *, int, unsigned int, inst_t *);
223 #else 233 #else
224 #define xfs_ilock_trace(i,n,f,ra) 234 #define xfs_ilock_trace(i,n,f,ra)
225 #endif 235 #endif
226 236
227 typedef struct dm_attrs_s { 237 typedef struct dm_attrs_s {
228 __uint32_t da_dmevmask; /* DMIG event mask */ 238 __uint32_t da_dmevmask; /* DMIG event mask */
229 __uint16_t da_dmstate; /* DMIG state info */ 239 __uint16_t da_dmstate; /* DMIG state info */
230 __uint16_t da_pad; /* DMIG extra padding */ 240 __uint16_t da_pad; /* DMIG extra padding */
231 } dm_attrs_t; 241 } dm_attrs_t;
232 242
233 typedef struct xfs_inode { 243 typedef struct xfs_inode {
234 /* Inode linking and identification information. */ 244 /* Inode linking and identification information. */
235 struct xfs_mount *i_mount; /* fs mount struct ptr */ 245 struct xfs_mount *i_mount; /* fs mount struct ptr */
236 struct xfs_dquot *i_udquot; /* user dquot */ 246 struct xfs_dquot *i_udquot; /* user dquot */
237 struct xfs_dquot *i_gdquot; /* group dquot */ 247 struct xfs_dquot *i_gdquot; /* group dquot */
238 248
239 /* Inode location stuff */ 249 /* Inode location stuff */
240 xfs_ino_t i_ino; /* inode number (agno/agino)*/ 250 xfs_ino_t i_ino; /* inode number (agno/agino)*/
241 xfs_daddr_t i_blkno; /* blkno of inode buffer */ 251 struct xfs_imap i_imap; /* location for xfs_imap() */
242 ushort i_len; /* len of inode buffer */
243 ushort i_boffset; /* off of inode in buffer */
244 252
245 /* Extent information. */ 253 /* Extent information. */
246 xfs_ifork_t *i_afp; /* attribute fork pointer */ 254 xfs_ifork_t *i_afp; /* attribute fork pointer */
247 xfs_ifork_t i_df; /* data fork */ 255 xfs_ifork_t i_df; /* data fork */
248 256
249 /* Transaction and locking information. */ 257 /* Transaction and locking information. */
250 struct xfs_trans *i_transp; /* ptr to owning transaction*/ 258 struct xfs_trans *i_transp; /* ptr to owning transaction*/
251 struct xfs_inode_log_item *i_itemp; /* logging information */ 259 struct xfs_inode_log_item *i_itemp; /* logging information */
252 mrlock_t i_lock; /* inode lock */ 260 mrlock_t i_lock; /* inode lock */
253 mrlock_t i_iolock; /* inode IO lock */ 261 mrlock_t i_iolock; /* inode IO lock */
254 struct completion i_flush; /* inode flush completion q */ 262 struct completion i_flush; /* inode flush completion q */
255 atomic_t i_pincount; /* inode pin count */ 263 atomic_t i_pincount; /* inode pin count */
256 wait_queue_head_t i_ipin_wait; /* inode pinning wait queue */ 264 wait_queue_head_t i_ipin_wait; /* inode pinning wait queue */
257 spinlock_t i_flags_lock; /* inode i_flags lock */ 265 spinlock_t i_flags_lock; /* inode i_flags lock */
258 /* Miscellaneous state. */ 266 /* Miscellaneous state. */
259 unsigned short i_flags; /* see defined flags below */ 267 unsigned short i_flags; /* see defined flags below */
260 unsigned char i_update_core; /* timestamps/size is dirty */ 268 unsigned char i_update_core; /* timestamps/size is dirty */
261 unsigned char i_update_size; /* di_size field is dirty */ 269 unsigned char i_update_size; /* di_size field is dirty */
262 unsigned int i_delayed_blks; /* count of delay alloc blks */ 270 unsigned int i_delayed_blks; /* count of delay alloc blks */
263 271
264 xfs_icdinode_t i_d; /* most of ondisk inode */ 272 xfs_icdinode_t i_d; /* most of ondisk inode */
265 273
266 xfs_fsize_t i_size; /* in-memory size */ 274 xfs_fsize_t i_size; /* in-memory size */
267 xfs_fsize_t i_new_size; /* size when write completes */ 275 xfs_fsize_t i_new_size; /* size when write completes */
268 atomic_t i_iocount; /* outstanding I/O count */ 276 atomic_t i_iocount; /* outstanding I/O count */
269 277
270 /* VFS inode */ 278 /* VFS inode */
271 struct inode i_vnode; /* embedded VFS inode */ 279 struct inode i_vnode; /* embedded VFS inode */
272 280
273 /* Trace buffers per inode. */ 281 /* Trace buffers per inode. */
274 #ifdef XFS_INODE_TRACE 282 #ifdef XFS_INODE_TRACE
275 struct ktrace *i_trace; /* general inode trace */ 283 struct ktrace *i_trace; /* general inode trace */
276 #endif 284 #endif
277 #ifdef XFS_BMAP_TRACE 285 #ifdef XFS_BMAP_TRACE
278 struct ktrace *i_xtrace; /* inode extent list trace */ 286 struct ktrace *i_xtrace; /* inode extent list trace */
279 #endif 287 #endif
280 #ifdef XFS_BTREE_TRACE 288 #ifdef XFS_BTREE_TRACE
281 struct ktrace *i_btrace; /* inode bmap btree trace */ 289 struct ktrace *i_btrace; /* inode bmap btree trace */
282 #endif 290 #endif
283 #ifdef XFS_RW_TRACE 291 #ifdef XFS_RW_TRACE
284 struct ktrace *i_rwtrace; /* inode read/write trace */ 292 struct ktrace *i_rwtrace; /* inode read/write trace */
285 #endif 293 #endif
286 #ifdef XFS_ILOCK_TRACE 294 #ifdef XFS_ILOCK_TRACE
287 struct ktrace *i_lock_trace; /* inode lock/unlock trace */ 295 struct ktrace *i_lock_trace; /* inode lock/unlock trace */
288 #endif 296 #endif
289 #ifdef XFS_DIR2_TRACE 297 #ifdef XFS_DIR2_TRACE
290 struct ktrace *i_dir_trace; /* inode directory trace */ 298 struct ktrace *i_dir_trace; /* inode directory trace */
291 #endif 299 #endif
292 } xfs_inode_t; 300 } xfs_inode_t;
293 301
294 #define XFS_ISIZE(ip) (((ip)->i_d.di_mode & S_IFMT) == S_IFREG) ? \ 302 #define XFS_ISIZE(ip) (((ip)->i_d.di_mode & S_IFMT) == S_IFREG) ? \
295 (ip)->i_size : (ip)->i_d.di_size; 303 (ip)->i_size : (ip)->i_d.di_size;
296 304
297 /* Convert from vfs inode to xfs inode */ 305 /* Convert from vfs inode to xfs inode */
298 static inline struct xfs_inode *XFS_I(struct inode *inode) 306 static inline struct xfs_inode *XFS_I(struct inode *inode)
299 { 307 {
300 return container_of(inode, struct xfs_inode, i_vnode); 308 return container_of(inode, struct xfs_inode, i_vnode);
301 } 309 }
302 310
303 /* convert from xfs inode to vfs inode */ 311 /* convert from xfs inode to vfs inode */
304 static inline struct inode *VFS_I(struct xfs_inode *ip) 312 static inline struct inode *VFS_I(struct xfs_inode *ip)
305 { 313 {
306 return &ip->i_vnode; 314 return &ip->i_vnode;
307 } 315 }
308 316
309 /* 317 /*
310 * Get rid of a partially initialized inode. 318 * Get rid of a partially initialized inode.
311 * 319 *
312 * We have to go through destroy_inode to make sure allocations 320 * We have to go through destroy_inode to make sure allocations
313 * from init_inode_always like the security data are undone. 321 * from init_inode_always like the security data are undone.
314 * 322 *
315 * We mark the inode bad so that it takes the short cut in 323 * We mark the inode bad so that it takes the short cut in
316 * the reclaim path instead of going through the flush path 324 * the reclaim path instead of going through the flush path
317 * which doesn't make sense for an inode that has never seen the 325 * which doesn't make sense for an inode that has never seen the
318 * light of day. 326 * light of day.
319 */ 327 */
320 static inline void xfs_destroy_inode(struct xfs_inode *ip) 328 static inline void xfs_destroy_inode(struct xfs_inode *ip)
321 { 329 {
322 make_bad_inode(VFS_I(ip)); 330 make_bad_inode(VFS_I(ip));
323 return destroy_inode(VFS_I(ip)); 331 return destroy_inode(VFS_I(ip));
324 } 332 }
325 333
326 /* 334 /*
327 * i_flags helper functions 335 * i_flags helper functions
328 */ 336 */
329 static inline void 337 static inline void
330 __xfs_iflags_set(xfs_inode_t *ip, unsigned short flags) 338 __xfs_iflags_set(xfs_inode_t *ip, unsigned short flags)
331 { 339 {
332 ip->i_flags |= flags; 340 ip->i_flags |= flags;
333 } 341 }
334 342
335 static inline void 343 static inline void
336 xfs_iflags_set(xfs_inode_t *ip, unsigned short flags) 344 xfs_iflags_set(xfs_inode_t *ip, unsigned short flags)
337 { 345 {
338 spin_lock(&ip->i_flags_lock); 346 spin_lock(&ip->i_flags_lock);
339 __xfs_iflags_set(ip, flags); 347 __xfs_iflags_set(ip, flags);
340 spin_unlock(&ip->i_flags_lock); 348 spin_unlock(&ip->i_flags_lock);
341 } 349 }
342 350
343 static inline void 351 static inline void
344 xfs_iflags_clear(xfs_inode_t *ip, unsigned short flags) 352 xfs_iflags_clear(xfs_inode_t *ip, unsigned short flags)
345 { 353 {
346 spin_lock(&ip->i_flags_lock); 354 spin_lock(&ip->i_flags_lock);
347 ip->i_flags &= ~flags; 355 ip->i_flags &= ~flags;
348 spin_unlock(&ip->i_flags_lock); 356 spin_unlock(&ip->i_flags_lock);
349 } 357 }
350 358
351 static inline int 359 static inline int
352 __xfs_iflags_test(xfs_inode_t *ip, unsigned short flags) 360 __xfs_iflags_test(xfs_inode_t *ip, unsigned short flags)
353 { 361 {
354 return (ip->i_flags & flags); 362 return (ip->i_flags & flags);
355 } 363 }
356 364
357 static inline int 365 static inline int
358 xfs_iflags_test(xfs_inode_t *ip, unsigned short flags) 366 xfs_iflags_test(xfs_inode_t *ip, unsigned short flags)
359 { 367 {
360 int ret; 368 int ret;
361 spin_lock(&ip->i_flags_lock); 369 spin_lock(&ip->i_flags_lock);
362 ret = __xfs_iflags_test(ip, flags); 370 ret = __xfs_iflags_test(ip, flags);
363 spin_unlock(&ip->i_flags_lock); 371 spin_unlock(&ip->i_flags_lock);
364 return ret; 372 return ret;
365 } 373 }
366 374
367 static inline int 375 static inline int
368 xfs_iflags_test_and_clear(xfs_inode_t *ip, unsigned short flags) 376 xfs_iflags_test_and_clear(xfs_inode_t *ip, unsigned short flags)
369 { 377 {
370 int ret; 378 int ret;
371 379
372 spin_lock(&ip->i_flags_lock); 380 spin_lock(&ip->i_flags_lock);
373 ret = ip->i_flags & flags; 381 ret = ip->i_flags & flags;
374 if (ret) 382 if (ret)
375 ip->i_flags &= ~flags; 383 ip->i_flags &= ~flags;
376 spin_unlock(&ip->i_flags_lock); 384 spin_unlock(&ip->i_flags_lock);
377 return ret; 385 return ret;
378 } 386 }
379 387
380 /* 388 /*
381 * Manage the i_flush queue embedded in the inode. This completion 389 * Manage the i_flush queue embedded in the inode. This completion
382 * queue synchronizes processes attempting to flush the in-core 390 * queue synchronizes processes attempting to flush the in-core
383 * inode back to disk. 391 * inode back to disk.
384 */ 392 */
385 static inline void xfs_iflock(xfs_inode_t *ip) 393 static inline void xfs_iflock(xfs_inode_t *ip)
386 { 394 {
387 wait_for_completion(&ip->i_flush); 395 wait_for_completion(&ip->i_flush);
388 } 396 }
389 397
390 static inline int xfs_iflock_nowait(xfs_inode_t *ip) 398 static inline int xfs_iflock_nowait(xfs_inode_t *ip)
391 { 399 {
392 return try_wait_for_completion(&ip->i_flush); 400 return try_wait_for_completion(&ip->i_flush);
393 } 401 }
394 402
395 static inline void xfs_ifunlock(xfs_inode_t *ip) 403 static inline void xfs_ifunlock(xfs_inode_t *ip)
396 { 404 {
397 complete(&ip->i_flush); 405 complete(&ip->i_flush);
398 } 406 }
399 407
400 /* 408 /*
401 * In-core inode flags. 409 * In-core inode flags.
402 */ 410 */
403 #define XFS_IGRIO 0x0001 /* inode used for guaranteed rate i/o */ 411 #define XFS_IGRIO 0x0001 /* inode used for guaranteed rate i/o */
404 #define XFS_IUIOSZ 0x0002 /* inode i/o sizes have been explicitly set */ 412 #define XFS_IUIOSZ 0x0002 /* inode i/o sizes have been explicitly set */
405 #define XFS_IQUIESCE 0x0004 /* we have started quiescing for this inode */ 413 #define XFS_IQUIESCE 0x0004 /* we have started quiescing for this inode */
406 #define XFS_IRECLAIM 0x0008 /* we have started reclaiming this inode */ 414 #define XFS_IRECLAIM 0x0008 /* we have started reclaiming this inode */
407 #define XFS_ISTALE 0x0010 /* inode has been staled */ 415 #define XFS_ISTALE 0x0010 /* inode has been staled */
408 #define XFS_IRECLAIMABLE 0x0020 /* inode can be reclaimed */ 416 #define XFS_IRECLAIMABLE 0x0020 /* inode can be reclaimed */
409 #define XFS_INEW 0x0040 417 #define XFS_INEW 0x0040
410 #define XFS_IFILESTREAM 0x0080 /* inode is in a filestream directory */ 418 #define XFS_IFILESTREAM 0x0080 /* inode is in a filestream directory */
411 #define XFS_IMODIFIED 0x0100 /* XFS inode state possibly differs */ 419 #define XFS_IMODIFIED 0x0100 /* XFS inode state possibly differs */
412 /* to the Linux inode state. */ 420 /* to the Linux inode state. */
413 #define XFS_ITRUNCATED 0x0200 /* truncated down so flush-on-close */ 421 #define XFS_ITRUNCATED 0x0200 /* truncated down so flush-on-close */
414 422
415 /* 423 /*
416 * Flags for inode locking. 424 * Flags for inode locking.
417 * Bit ranges: 1<<1 - 1<<16-1 -- iolock/ilock modes (bitfield) 425 * Bit ranges: 1<<1 - 1<<16-1 -- iolock/ilock modes (bitfield)
418 * 1<<16 - 1<<32-1 -- lockdep annotation (integers) 426 * 1<<16 - 1<<32-1 -- lockdep annotation (integers)
419 */ 427 */
420 #define XFS_IOLOCK_EXCL (1<<0) 428 #define XFS_IOLOCK_EXCL (1<<0)
421 #define XFS_IOLOCK_SHARED (1<<1) 429 #define XFS_IOLOCK_SHARED (1<<1)
422 #define XFS_ILOCK_EXCL (1<<2) 430 #define XFS_ILOCK_EXCL (1<<2)
423 #define XFS_ILOCK_SHARED (1<<3) 431 #define XFS_ILOCK_SHARED (1<<3)
424 #define XFS_IUNLOCK_NONOTIFY (1<<4) 432 #define XFS_IUNLOCK_NONOTIFY (1<<4)
425 433
426 #define XFS_LOCK_MASK (XFS_IOLOCK_EXCL | XFS_IOLOCK_SHARED \ 434 #define XFS_LOCK_MASK (XFS_IOLOCK_EXCL | XFS_IOLOCK_SHARED \
427 | XFS_ILOCK_EXCL | XFS_ILOCK_SHARED) 435 | XFS_ILOCK_EXCL | XFS_ILOCK_SHARED)
428 436
429 /* 437 /*
430 * Flags for lockdep annotations. 438 * Flags for lockdep annotations.
431 * 439 *
432 * XFS_I[O]LOCK_PARENT - for operations that require locking two inodes 440 * XFS_I[O]LOCK_PARENT - for operations that require locking two inodes
433 * (ie directory operations that require locking a directory inode and 441 * (ie directory operations that require locking a directory inode and
434 * an entry inode). The first inode gets locked with this flag so it 442 * an entry inode). The first inode gets locked with this flag so it
435 * gets a lockdep subclass of 1 and the second lock will have a lockdep 443 * gets a lockdep subclass of 1 and the second lock will have a lockdep
436 * subclass of 0. 444 * subclass of 0.
437 * 445 *
438 * XFS_LOCK_INUMORDER - for locking several inodes at the some time 446 * XFS_LOCK_INUMORDER - for locking several inodes at the some time
439 * with xfs_lock_inodes(). This flag is used as the starting subclass 447 * with xfs_lock_inodes(). This flag is used as the starting subclass
440 * and each subsequent lock acquired will increment the subclass by one. 448 * and each subsequent lock acquired will increment the subclass by one.
441 * So the first lock acquired will have a lockdep subclass of 2, the 449 * So the first lock acquired will have a lockdep subclass of 2, the
442 * second lock will have a lockdep subclass of 3, and so on. It is 450 * second lock will have a lockdep subclass of 3, and so on. It is
443 * the responsibility of the class builder to shift this to the correct 451 * the responsibility of the class builder to shift this to the correct
444 * portion of the lock_mode lockdep mask. 452 * portion of the lock_mode lockdep mask.
445 */ 453 */
446 #define XFS_LOCK_PARENT 1 454 #define XFS_LOCK_PARENT 1
447 #define XFS_LOCK_INUMORDER 2 455 #define XFS_LOCK_INUMORDER 2
448 456
449 #define XFS_IOLOCK_SHIFT 16 457 #define XFS_IOLOCK_SHIFT 16
450 #define XFS_IOLOCK_PARENT (XFS_LOCK_PARENT << XFS_IOLOCK_SHIFT) 458 #define XFS_IOLOCK_PARENT (XFS_LOCK_PARENT << XFS_IOLOCK_SHIFT)
451 459
452 #define XFS_ILOCK_SHIFT 24 460 #define XFS_ILOCK_SHIFT 24
453 #define XFS_ILOCK_PARENT (XFS_LOCK_PARENT << XFS_ILOCK_SHIFT) 461 #define XFS_ILOCK_PARENT (XFS_LOCK_PARENT << XFS_ILOCK_SHIFT)
454 462
455 #define XFS_IOLOCK_DEP_MASK 0x00ff0000 463 #define XFS_IOLOCK_DEP_MASK 0x00ff0000
456 #define XFS_ILOCK_DEP_MASK 0xff000000 464 #define XFS_ILOCK_DEP_MASK 0xff000000
457 #define XFS_LOCK_DEP_MASK (XFS_IOLOCK_DEP_MASK | XFS_ILOCK_DEP_MASK) 465 #define XFS_LOCK_DEP_MASK (XFS_IOLOCK_DEP_MASK | XFS_ILOCK_DEP_MASK)
458 466
459 #define XFS_IOLOCK_DEP(flags) (((flags) & XFS_IOLOCK_DEP_MASK) >> XFS_IOLOCK_SHIFT) 467 #define XFS_IOLOCK_DEP(flags) (((flags) & XFS_IOLOCK_DEP_MASK) >> XFS_IOLOCK_SHIFT)
460 #define XFS_ILOCK_DEP(flags) (((flags) & XFS_ILOCK_DEP_MASK) >> XFS_ILOCK_SHIFT) 468 #define XFS_ILOCK_DEP(flags) (((flags) & XFS_ILOCK_DEP_MASK) >> XFS_ILOCK_SHIFT)
461 469
462 /* 470 /*
463 * Flags for xfs_iflush() 471 * Flags for xfs_iflush()
464 */ 472 */
465 #define XFS_IFLUSH_DELWRI_ELSE_SYNC 1 473 #define XFS_IFLUSH_DELWRI_ELSE_SYNC 1
466 #define XFS_IFLUSH_DELWRI_ELSE_ASYNC 2 474 #define XFS_IFLUSH_DELWRI_ELSE_ASYNC 2
467 #define XFS_IFLUSH_SYNC 3 475 #define XFS_IFLUSH_SYNC 3
468 #define XFS_IFLUSH_ASYNC 4 476 #define XFS_IFLUSH_ASYNC 4
469 #define XFS_IFLUSH_DELWRI 5 477 #define XFS_IFLUSH_DELWRI 5
470 #define XFS_IFLUSH_ASYNC_NOBLOCK 6 478 #define XFS_IFLUSH_ASYNC_NOBLOCK 6
471 479
472 /* 480 /*
473 * Flags for xfs_itruncate_start(). 481 * Flags for xfs_itruncate_start().
474 */ 482 */
475 #define XFS_ITRUNC_DEFINITE 0x1 483 #define XFS_ITRUNC_DEFINITE 0x1
476 #define XFS_ITRUNC_MAYBE 0x2 484 #define XFS_ITRUNC_MAYBE 0x2
477 485
478 /* 486 /*
479 * For multiple groups support: if S_ISGID bit is set in the parent 487 * For multiple groups support: if S_ISGID bit is set in the parent
480 * directory, group of new file is set to that of the parent, and 488 * directory, group of new file is set to that of the parent, and
481 * new subdirectory gets S_ISGID bit from parent. 489 * new subdirectory gets S_ISGID bit from parent.
482 */ 490 */
483 #define XFS_INHERIT_GID(pip) \ 491 #define XFS_INHERIT_GID(pip) \
484 (((pip)->i_mount->m_flags & XFS_MOUNT_GRPID) || \ 492 (((pip)->i_mount->m_flags & XFS_MOUNT_GRPID) || \
485 ((pip)->i_d.di_mode & S_ISGID)) 493 ((pip)->i_d.di_mode & S_ISGID))
486 494
487 /* 495 /*
488 * Flags for xfs_iget() 496 * Flags for xfs_iget()
489 */ 497 */
490 #define XFS_IGET_CREATE 0x1 498 #define XFS_IGET_CREATE 0x1
491 #define XFS_IGET_BULKSTAT 0x2 499 #define XFS_IGET_BULKSTAT 0x2
492 500
493 /* 501 /*
494 * xfs_iget.c prototypes. 502 * xfs_iget.c prototypes.
495 */ 503 */
496 void xfs_ihash_init(struct xfs_mount *); 504 void xfs_ihash_init(struct xfs_mount *);
497 void xfs_ihash_free(struct xfs_mount *); 505 void xfs_ihash_free(struct xfs_mount *);
498 xfs_inode_t *xfs_inode_incore(struct xfs_mount *, xfs_ino_t, 506 xfs_inode_t *xfs_inode_incore(struct xfs_mount *, xfs_ino_t,
499 struct xfs_trans *); 507 struct xfs_trans *);
500 int xfs_iget(struct xfs_mount *, struct xfs_trans *, xfs_ino_t, 508 int xfs_iget(struct xfs_mount *, struct xfs_trans *, xfs_ino_t,
501 uint, uint, xfs_inode_t **, xfs_daddr_t); 509 uint, uint, xfs_inode_t **, xfs_daddr_t);
502 void xfs_iput(xfs_inode_t *, uint); 510 void xfs_iput(xfs_inode_t *, uint);
503 void xfs_iput_new(xfs_inode_t *, uint); 511 void xfs_iput_new(xfs_inode_t *, uint);
504 void xfs_ilock(xfs_inode_t *, uint); 512 void xfs_ilock(xfs_inode_t *, uint);
505 int xfs_ilock_nowait(xfs_inode_t *, uint); 513 int xfs_ilock_nowait(xfs_inode_t *, uint);
506 void xfs_iunlock(xfs_inode_t *, uint); 514 void xfs_iunlock(xfs_inode_t *, uint);
507 void xfs_ilock_demote(xfs_inode_t *, uint); 515 void xfs_ilock_demote(xfs_inode_t *, uint);
508 int xfs_isilocked(xfs_inode_t *, uint); 516 int xfs_isilocked(xfs_inode_t *, uint);
509 uint xfs_ilock_map_shared(xfs_inode_t *); 517 uint xfs_ilock_map_shared(xfs_inode_t *);
510 void xfs_iunlock_map_shared(xfs_inode_t *, uint); 518 void xfs_iunlock_map_shared(xfs_inode_t *, uint);
511 void xfs_ireclaim(xfs_inode_t *); 519 void xfs_ireclaim(xfs_inode_t *);
512 520
513 /* 521 /*
514 * xfs_inode.c prototypes. 522 * xfs_inode.c prototypes.
515 */ 523 */
516 int xfs_iread(struct xfs_mount *, struct xfs_trans *, xfs_ino_t, 524 int xfs_iread(struct xfs_mount *, struct xfs_trans *, xfs_ino_t,
517 xfs_inode_t **, xfs_daddr_t, uint); 525 xfs_inode_t **, xfs_daddr_t, uint);
518 int xfs_ialloc(struct xfs_trans *, xfs_inode_t *, mode_t, 526 int xfs_ialloc(struct xfs_trans *, xfs_inode_t *, mode_t,
519 xfs_nlink_t, xfs_dev_t, struct cred *, xfs_prid_t, 527 xfs_nlink_t, xfs_dev_t, struct cred *, xfs_prid_t,
520 int, struct xfs_buf **, boolean_t *, xfs_inode_t **); 528 int, struct xfs_buf **, boolean_t *, xfs_inode_t **);
521 529
522 uint xfs_ip2xflags(struct xfs_inode *); 530 uint xfs_ip2xflags(struct xfs_inode *);
523 uint xfs_dic2xflags(struct xfs_dinode *); 531 uint xfs_dic2xflags(struct xfs_dinode *);
524 int xfs_ifree(struct xfs_trans *, xfs_inode_t *, 532 int xfs_ifree(struct xfs_trans *, xfs_inode_t *,
525 struct xfs_bmap_free *); 533 struct xfs_bmap_free *);
526 int xfs_itruncate_start(xfs_inode_t *, uint, xfs_fsize_t); 534 int xfs_itruncate_start(xfs_inode_t *, uint, xfs_fsize_t);
527 int xfs_itruncate_finish(struct xfs_trans **, xfs_inode_t *, 535 int xfs_itruncate_finish(struct xfs_trans **, xfs_inode_t *,
528 xfs_fsize_t, int, int); 536 xfs_fsize_t, int, int);
529 int xfs_iunlink(struct xfs_trans *, xfs_inode_t *); 537 int xfs_iunlink(struct xfs_trans *, xfs_inode_t *);
530 538
531 void xfs_idestroy(xfs_inode_t *); 539 void xfs_idestroy(xfs_inode_t *);
532 void xfs_iextract(xfs_inode_t *); 540 void xfs_iextract(xfs_inode_t *);
533 void xfs_iext_realloc(xfs_inode_t *, int, int); 541 void xfs_iext_realloc(xfs_inode_t *, int, int);
534 void xfs_ipin(xfs_inode_t *); 542 void xfs_ipin(xfs_inode_t *);
535 void xfs_iunpin(xfs_inode_t *); 543 void xfs_iunpin(xfs_inode_t *);
536 int xfs_iflush(xfs_inode_t *, uint); 544 int xfs_iflush(xfs_inode_t *, uint);
537 void xfs_ichgtime(xfs_inode_t *, int); 545 void xfs_ichgtime(xfs_inode_t *, int);
538 xfs_fsize_t xfs_file_last_byte(xfs_inode_t *); 546 xfs_fsize_t xfs_file_last_byte(xfs_inode_t *);
539 void xfs_lock_inodes(xfs_inode_t **, int, uint); 547 void xfs_lock_inodes(xfs_inode_t **, int, uint);
540 void xfs_lock_two_inodes(xfs_inode_t *, xfs_inode_t *, uint); 548 void xfs_lock_two_inodes(xfs_inode_t *, xfs_inode_t *, uint);
541 549
542 void xfs_synchronize_atime(xfs_inode_t *); 550 void xfs_synchronize_atime(xfs_inode_t *);
543 void xfs_mark_inode_dirty_sync(xfs_inode_t *); 551 void xfs_mark_inode_dirty_sync(xfs_inode_t *);
544 552
545 #endif /* __KERNEL__ */ 553 #endif /* __KERNEL__ */
546 554
547 int xfs_inotobp(struct xfs_mount *, struct xfs_trans *, 555 int xfs_inotobp(struct xfs_mount *, struct xfs_trans *,
548 xfs_ino_t, struct xfs_dinode **, 556 xfs_ino_t, struct xfs_dinode **,
549 struct xfs_buf **, int *, uint); 557 struct xfs_buf **, int *, uint);
550 int xfs_itobp(struct xfs_mount *, struct xfs_trans *, 558 int xfs_itobp(struct xfs_mount *, struct xfs_trans *,
551 struct xfs_inode *, struct xfs_dinode **, 559 struct xfs_inode *, struct xfs_dinode **,
552 struct xfs_buf **, uint); 560 struct xfs_buf **, uint);
553 void xfs_dinode_from_disk(struct xfs_icdinode *, 561 void xfs_dinode_from_disk(struct xfs_icdinode *,
554 struct xfs_dinode *); 562 struct xfs_dinode *);
555 void xfs_dinode_to_disk(struct xfs_dinode *, 563 void xfs_dinode_to_disk(struct xfs_dinode *,
556 struct xfs_icdinode *); 564 struct xfs_icdinode *);
557 void xfs_idestroy_fork(struct xfs_inode *, int); 565 void xfs_idestroy_fork(struct xfs_inode *, int);
558 void xfs_idata_realloc(struct xfs_inode *, int, int); 566 void xfs_idata_realloc(struct xfs_inode *, int, int);
559 void xfs_iroot_realloc(struct xfs_inode *, int, int); 567 void xfs_iroot_realloc(struct xfs_inode *, int, int);
560 int xfs_iread_extents(struct xfs_trans *, struct xfs_inode *, int); 568 int xfs_iread_extents(struct xfs_trans *, struct xfs_inode *, int);
561 int xfs_iextents_copy(struct xfs_inode *, xfs_bmbt_rec_t *, int); 569 int xfs_iextents_copy(struct xfs_inode *, xfs_bmbt_rec_t *, int);
562 570
563 xfs_bmbt_rec_host_t *xfs_iext_get_ext(xfs_ifork_t *, xfs_extnum_t); 571 xfs_bmbt_rec_host_t *xfs_iext_get_ext(xfs_ifork_t *, xfs_extnum_t);
564 void xfs_iext_insert(xfs_ifork_t *, xfs_extnum_t, xfs_extnum_t, 572 void xfs_iext_insert(xfs_ifork_t *, xfs_extnum_t, xfs_extnum_t,
565 xfs_bmbt_irec_t *); 573 xfs_bmbt_irec_t *);
566 void xfs_iext_add(xfs_ifork_t *, xfs_extnum_t, int); 574 void xfs_iext_add(xfs_ifork_t *, xfs_extnum_t, int);
567 void xfs_iext_add_indirect_multi(xfs_ifork_t *, int, xfs_extnum_t, int); 575 void xfs_iext_add_indirect_multi(xfs_ifork_t *, int, xfs_extnum_t, int);
568 void xfs_iext_remove(xfs_ifork_t *, xfs_extnum_t, int); 576 void xfs_iext_remove(xfs_ifork_t *, xfs_extnum_t, int);
569 void xfs_iext_remove_inline(xfs_ifork_t *, xfs_extnum_t, int); 577 void xfs_iext_remove_inline(xfs_ifork_t *, xfs_extnum_t, int);
570 void xfs_iext_remove_direct(xfs_ifork_t *, xfs_extnum_t, int); 578 void xfs_iext_remove_direct(xfs_ifork_t *, xfs_extnum_t, int);
571 void xfs_iext_remove_indirect(xfs_ifork_t *, xfs_extnum_t, int); 579 void xfs_iext_remove_indirect(xfs_ifork_t *, xfs_extnum_t, int);
572 void xfs_iext_realloc_direct(xfs_ifork_t *, int); 580 void xfs_iext_realloc_direct(xfs_ifork_t *, int);
573 void xfs_iext_realloc_indirect(xfs_ifork_t *, int); 581 void xfs_iext_realloc_indirect(xfs_ifork_t *, int);
574 void xfs_iext_indirect_to_direct(xfs_ifork_t *); 582 void xfs_iext_indirect_to_direct(xfs_ifork_t *);
575 void xfs_iext_direct_to_inline(xfs_ifork_t *, xfs_extnum_t); 583 void xfs_iext_direct_to_inline(xfs_ifork_t *, xfs_extnum_t);
576 void xfs_iext_inline_to_direct(xfs_ifork_t *, int); 584 void xfs_iext_inline_to_direct(xfs_ifork_t *, int);
577 void xfs_iext_destroy(xfs_ifork_t *); 585 void xfs_iext_destroy(xfs_ifork_t *);
578 xfs_bmbt_rec_host_t *xfs_iext_bno_to_ext(xfs_ifork_t *, xfs_fileoff_t, int *); 586 xfs_bmbt_rec_host_t *xfs_iext_bno_to_ext(xfs_ifork_t *, xfs_fileoff_t, int *);
579 xfs_ext_irec_t *xfs_iext_bno_to_irec(xfs_ifork_t *, xfs_fileoff_t, int *); 587 xfs_ext_irec_t *xfs_iext_bno_to_irec(xfs_ifork_t *, xfs_fileoff_t, int *);
580 xfs_ext_irec_t *xfs_iext_idx_to_irec(xfs_ifork_t *, xfs_extnum_t *, int *, int); 588 xfs_ext_irec_t *xfs_iext_idx_to_irec(xfs_ifork_t *, xfs_extnum_t *, int *, int);
581 void xfs_iext_irec_init(xfs_ifork_t *); 589 void xfs_iext_irec_init(xfs_ifork_t *);
582 xfs_ext_irec_t *xfs_iext_irec_new(xfs_ifork_t *, int); 590 xfs_ext_irec_t *xfs_iext_irec_new(xfs_ifork_t *, int);
583 void xfs_iext_irec_remove(xfs_ifork_t *, int); 591 void xfs_iext_irec_remove(xfs_ifork_t *, int);
584 void xfs_iext_irec_compact(xfs_ifork_t *); 592 void xfs_iext_irec_compact(xfs_ifork_t *);
585 void xfs_iext_irec_compact_pages(xfs_ifork_t *); 593 void xfs_iext_irec_compact_pages(xfs_ifork_t *);
586 void xfs_iext_irec_compact_full(xfs_ifork_t *); 594 void xfs_iext_irec_compact_full(xfs_ifork_t *);
587 void xfs_iext_irec_update_extoffs(xfs_ifork_t *, int, int); 595 void xfs_iext_irec_update_extoffs(xfs_ifork_t *, int, int);
588 596
589 #define xfs_ipincount(ip) ((unsigned int) atomic_read(&ip->i_pincount)) 597 #define xfs_ipincount(ip) ((unsigned int) atomic_read(&ip->i_pincount))
590 598
591 #ifdef DEBUG 599 #ifdef DEBUG
592 void xfs_isize_check(struct xfs_mount *, struct xfs_inode *, 600 void xfs_isize_check(struct xfs_mount *, struct xfs_inode *,
593 xfs_fsize_t); 601 xfs_fsize_t);
594 #else /* DEBUG */ 602 #else /* DEBUG */
595 #define xfs_isize_check(mp, ip, isize) 603 #define xfs_isize_check(mp, ip, isize)
596 #endif /* DEBUG */ 604 #endif /* DEBUG */
597 605
598 #if defined(DEBUG) 606 #if defined(DEBUG)
599 void xfs_inobp_check(struct xfs_mount *, struct xfs_buf *); 607 void xfs_inobp_check(struct xfs_mount *, struct xfs_buf *);
600 #else 608 #else
601 #define xfs_inobp_check(mp, bp) 609 #define xfs_inobp_check(mp, bp)
602 #endif /* DEBUG */ 610 #endif /* DEBUG */
603 611
604 extern struct kmem_zone *xfs_ifork_zone; 612 extern struct kmem_zone *xfs_ifork_zone;
605 extern struct kmem_zone *xfs_inode_zone; 613 extern struct kmem_zone *xfs_inode_zone;
606 extern struct kmem_zone *xfs_ili_zone; 614 extern struct kmem_zone *xfs_ili_zone;
607 615
fs/xfs/xfs_inode_item.c
1 /* 1 /*
2 * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc. 2 * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
3 * All Rights Reserved. 3 * All Rights Reserved.
4 * 4 *
5 * This program is free software; you can redistribute it and/or 5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as 6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation. 7 * published by the Free Software Foundation.
8 * 8 *
9 * This program is distributed in the hope that it would be useful, 9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details. 12 * GNU General Public License for more details.
13 * 13 *
14 * You should have received a copy of the GNU General Public License 14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation, 15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */ 17 */
18 #include "xfs.h" 18 #include "xfs.h"
19 #include "xfs_fs.h" 19 #include "xfs_fs.h"
20 #include "xfs_types.h" 20 #include "xfs_types.h"
21 #include "xfs_bit.h" 21 #include "xfs_bit.h"
22 #include "xfs_log.h" 22 #include "xfs_log.h"
23 #include "xfs_inum.h" 23 #include "xfs_inum.h"
24 #include "xfs_trans.h" 24 #include "xfs_trans.h"
25 #include "xfs_buf_item.h" 25 #include "xfs_buf_item.h"
26 #include "xfs_sb.h" 26 #include "xfs_sb.h"
27 #include "xfs_ag.h" 27 #include "xfs_ag.h"
28 #include "xfs_dir2.h" 28 #include "xfs_dir2.h"
29 #include "xfs_dmapi.h" 29 #include "xfs_dmapi.h"
30 #include "xfs_mount.h" 30 #include "xfs_mount.h"
31 #include "xfs_trans_priv.h" 31 #include "xfs_trans_priv.h"
32 #include "xfs_bmap_btree.h" 32 #include "xfs_bmap_btree.h"
33 #include "xfs_alloc_btree.h" 33 #include "xfs_alloc_btree.h"
34 #include "xfs_ialloc_btree.h" 34 #include "xfs_ialloc_btree.h"
35 #include "xfs_dir2_sf.h" 35 #include "xfs_dir2_sf.h"
36 #include "xfs_attr_sf.h" 36 #include "xfs_attr_sf.h"
37 #include "xfs_dinode.h" 37 #include "xfs_dinode.h"
38 #include "xfs_inode.h" 38 #include "xfs_inode.h"
39 #include "xfs_inode_item.h" 39 #include "xfs_inode_item.h"
40 #include "xfs_btree.h" 40 #include "xfs_btree.h"
41 #include "xfs_ialloc.h" 41 #include "xfs_ialloc.h"
42 #include "xfs_rw.h" 42 #include "xfs_rw.h"
43 #include "xfs_error.h" 43 #include "xfs_error.h"
44 44
45 45
46 kmem_zone_t *xfs_ili_zone; /* inode log item zone */ 46 kmem_zone_t *xfs_ili_zone; /* inode log item zone */
47 47
48 /* 48 /*
49 * This returns the number of iovecs needed to log the given inode item. 49 * This returns the number of iovecs needed to log the given inode item.
50 * 50 *
51 * We need one iovec for the inode log format structure, one for the 51 * We need one iovec for the inode log format structure, one for the
52 * inode core, and possibly one for the inode data/extents/b-tree root 52 * inode core, and possibly one for the inode data/extents/b-tree root
53 * and one for the inode attribute data/extents/b-tree root. 53 * and one for the inode attribute data/extents/b-tree root.
54 */ 54 */
55 STATIC uint 55 STATIC uint
56 xfs_inode_item_size( 56 xfs_inode_item_size(
57 xfs_inode_log_item_t *iip) 57 xfs_inode_log_item_t *iip)
58 { 58 {
59 uint nvecs; 59 uint nvecs;
60 xfs_inode_t *ip; 60 xfs_inode_t *ip;
61 61
62 ip = iip->ili_inode; 62 ip = iip->ili_inode;
63 nvecs = 2; 63 nvecs = 2;
64 64
65 /* 65 /*
66 * Only log the data/extents/b-tree root if there is something 66 * Only log the data/extents/b-tree root if there is something
67 * left to log. 67 * left to log.
68 */ 68 */
69 iip->ili_format.ilf_fields |= XFS_ILOG_CORE; 69 iip->ili_format.ilf_fields |= XFS_ILOG_CORE;
70 70
71 switch (ip->i_d.di_format) { 71 switch (ip->i_d.di_format) {
72 case XFS_DINODE_FMT_EXTENTS: 72 case XFS_DINODE_FMT_EXTENTS:
73 iip->ili_format.ilf_fields &= 73 iip->ili_format.ilf_fields &=
74 ~(XFS_ILOG_DDATA | XFS_ILOG_DBROOT | 74 ~(XFS_ILOG_DDATA | XFS_ILOG_DBROOT |
75 XFS_ILOG_DEV | XFS_ILOG_UUID); 75 XFS_ILOG_DEV | XFS_ILOG_UUID);
76 if ((iip->ili_format.ilf_fields & XFS_ILOG_DEXT) && 76 if ((iip->ili_format.ilf_fields & XFS_ILOG_DEXT) &&
77 (ip->i_d.di_nextents > 0) && 77 (ip->i_d.di_nextents > 0) &&
78 (ip->i_df.if_bytes > 0)) { 78 (ip->i_df.if_bytes > 0)) {
79 ASSERT(ip->i_df.if_u1.if_extents != NULL); 79 ASSERT(ip->i_df.if_u1.if_extents != NULL);
80 nvecs++; 80 nvecs++;
81 } else { 81 } else {
82 iip->ili_format.ilf_fields &= ~XFS_ILOG_DEXT; 82 iip->ili_format.ilf_fields &= ~XFS_ILOG_DEXT;
83 } 83 }
84 break; 84 break;
85 85
86 case XFS_DINODE_FMT_BTREE: 86 case XFS_DINODE_FMT_BTREE:
87 ASSERT(ip->i_df.if_ext_max == 87 ASSERT(ip->i_df.if_ext_max ==
88 XFS_IFORK_DSIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t)); 88 XFS_IFORK_DSIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t));
89 iip->ili_format.ilf_fields &= 89 iip->ili_format.ilf_fields &=
90 ~(XFS_ILOG_DDATA | XFS_ILOG_DEXT | 90 ~(XFS_ILOG_DDATA | XFS_ILOG_DEXT |
91 XFS_ILOG_DEV | XFS_ILOG_UUID); 91 XFS_ILOG_DEV | XFS_ILOG_UUID);
92 if ((iip->ili_format.ilf_fields & XFS_ILOG_DBROOT) && 92 if ((iip->ili_format.ilf_fields & XFS_ILOG_DBROOT) &&
93 (ip->i_df.if_broot_bytes > 0)) { 93 (ip->i_df.if_broot_bytes > 0)) {
94 ASSERT(ip->i_df.if_broot != NULL); 94 ASSERT(ip->i_df.if_broot != NULL);
95 nvecs++; 95 nvecs++;
96 } else { 96 } else {
97 ASSERT(!(iip->ili_format.ilf_fields & 97 ASSERT(!(iip->ili_format.ilf_fields &
98 XFS_ILOG_DBROOT)); 98 XFS_ILOG_DBROOT));
99 #ifdef XFS_TRANS_DEBUG 99 #ifdef XFS_TRANS_DEBUG
100 if (iip->ili_root_size > 0) { 100 if (iip->ili_root_size > 0) {
101 ASSERT(iip->ili_root_size == 101 ASSERT(iip->ili_root_size ==
102 ip->i_df.if_broot_bytes); 102 ip->i_df.if_broot_bytes);
103 ASSERT(memcmp(iip->ili_orig_root, 103 ASSERT(memcmp(iip->ili_orig_root,
104 ip->i_df.if_broot, 104 ip->i_df.if_broot,
105 iip->ili_root_size) == 0); 105 iip->ili_root_size) == 0);
106 } else { 106 } else {
107 ASSERT(ip->i_df.if_broot_bytes == 0); 107 ASSERT(ip->i_df.if_broot_bytes == 0);
108 } 108 }
109 #endif 109 #endif
110 iip->ili_format.ilf_fields &= ~XFS_ILOG_DBROOT; 110 iip->ili_format.ilf_fields &= ~XFS_ILOG_DBROOT;
111 } 111 }
112 break; 112 break;
113 113
114 case XFS_DINODE_FMT_LOCAL: 114 case XFS_DINODE_FMT_LOCAL:
115 iip->ili_format.ilf_fields &= 115 iip->ili_format.ilf_fields &=
116 ~(XFS_ILOG_DEXT | XFS_ILOG_DBROOT | 116 ~(XFS_ILOG_DEXT | XFS_ILOG_DBROOT |
117 XFS_ILOG_DEV | XFS_ILOG_UUID); 117 XFS_ILOG_DEV | XFS_ILOG_UUID);
118 if ((iip->ili_format.ilf_fields & XFS_ILOG_DDATA) && 118 if ((iip->ili_format.ilf_fields & XFS_ILOG_DDATA) &&
119 (ip->i_df.if_bytes > 0)) { 119 (ip->i_df.if_bytes > 0)) {
120 ASSERT(ip->i_df.if_u1.if_data != NULL); 120 ASSERT(ip->i_df.if_u1.if_data != NULL);
121 ASSERT(ip->i_d.di_size > 0); 121 ASSERT(ip->i_d.di_size > 0);
122 nvecs++; 122 nvecs++;
123 } else { 123 } else {
124 iip->ili_format.ilf_fields &= ~XFS_ILOG_DDATA; 124 iip->ili_format.ilf_fields &= ~XFS_ILOG_DDATA;
125 } 125 }
126 break; 126 break;
127 127
128 case XFS_DINODE_FMT_DEV: 128 case XFS_DINODE_FMT_DEV:
129 iip->ili_format.ilf_fields &= 129 iip->ili_format.ilf_fields &=
130 ~(XFS_ILOG_DDATA | XFS_ILOG_DBROOT | 130 ~(XFS_ILOG_DDATA | XFS_ILOG_DBROOT |
131 XFS_ILOG_DEXT | XFS_ILOG_UUID); 131 XFS_ILOG_DEXT | XFS_ILOG_UUID);
132 break; 132 break;
133 133
134 case XFS_DINODE_FMT_UUID: 134 case XFS_DINODE_FMT_UUID:
135 iip->ili_format.ilf_fields &= 135 iip->ili_format.ilf_fields &=
136 ~(XFS_ILOG_DDATA | XFS_ILOG_DBROOT | 136 ~(XFS_ILOG_DDATA | XFS_ILOG_DBROOT |
137 XFS_ILOG_DEXT | XFS_ILOG_DEV); 137 XFS_ILOG_DEXT | XFS_ILOG_DEV);
138 break; 138 break;
139 139
140 default: 140 default:
141 ASSERT(0); 141 ASSERT(0);
142 break; 142 break;
143 } 143 }
144 144
145 /* 145 /*
146 * If there are no attributes associated with this file, 146 * If there are no attributes associated with this file,
147 * then there cannot be anything more to log. 147 * then there cannot be anything more to log.
148 * Clear all attribute-related log flags. 148 * Clear all attribute-related log flags.
149 */ 149 */
150 if (!XFS_IFORK_Q(ip)) { 150 if (!XFS_IFORK_Q(ip)) {
151 iip->ili_format.ilf_fields &= 151 iip->ili_format.ilf_fields &=
152 ~(XFS_ILOG_ADATA | XFS_ILOG_ABROOT | XFS_ILOG_AEXT); 152 ~(XFS_ILOG_ADATA | XFS_ILOG_ABROOT | XFS_ILOG_AEXT);
153 return nvecs; 153 return nvecs;
154 } 154 }
155 155
156 /* 156 /*
157 * Log any necessary attribute data. 157 * Log any necessary attribute data.
158 */ 158 */
159 switch (ip->i_d.di_aformat) { 159 switch (ip->i_d.di_aformat) {
160 case XFS_DINODE_FMT_EXTENTS: 160 case XFS_DINODE_FMT_EXTENTS:
161 iip->ili_format.ilf_fields &= 161 iip->ili_format.ilf_fields &=
162 ~(XFS_ILOG_ADATA | XFS_ILOG_ABROOT); 162 ~(XFS_ILOG_ADATA | XFS_ILOG_ABROOT);
163 if ((iip->ili_format.ilf_fields & XFS_ILOG_AEXT) && 163 if ((iip->ili_format.ilf_fields & XFS_ILOG_AEXT) &&
164 (ip->i_d.di_anextents > 0) && 164 (ip->i_d.di_anextents > 0) &&
165 (ip->i_afp->if_bytes > 0)) { 165 (ip->i_afp->if_bytes > 0)) {
166 ASSERT(ip->i_afp->if_u1.if_extents != NULL); 166 ASSERT(ip->i_afp->if_u1.if_extents != NULL);
167 nvecs++; 167 nvecs++;
168 } else { 168 } else {
169 iip->ili_format.ilf_fields &= ~XFS_ILOG_AEXT; 169 iip->ili_format.ilf_fields &= ~XFS_ILOG_AEXT;
170 } 170 }
171 break; 171 break;
172 172
173 case XFS_DINODE_FMT_BTREE: 173 case XFS_DINODE_FMT_BTREE:
174 iip->ili_format.ilf_fields &= 174 iip->ili_format.ilf_fields &=
175 ~(XFS_ILOG_ADATA | XFS_ILOG_AEXT); 175 ~(XFS_ILOG_ADATA | XFS_ILOG_AEXT);
176 if ((iip->ili_format.ilf_fields & XFS_ILOG_ABROOT) && 176 if ((iip->ili_format.ilf_fields & XFS_ILOG_ABROOT) &&
177 (ip->i_afp->if_broot_bytes > 0)) { 177 (ip->i_afp->if_broot_bytes > 0)) {
178 ASSERT(ip->i_afp->if_broot != NULL); 178 ASSERT(ip->i_afp->if_broot != NULL);
179 nvecs++; 179 nvecs++;
180 } else { 180 } else {
181 iip->ili_format.ilf_fields &= ~XFS_ILOG_ABROOT; 181 iip->ili_format.ilf_fields &= ~XFS_ILOG_ABROOT;
182 } 182 }
183 break; 183 break;
184 184
185 case XFS_DINODE_FMT_LOCAL: 185 case XFS_DINODE_FMT_LOCAL:
186 iip->ili_format.ilf_fields &= 186 iip->ili_format.ilf_fields &=
187 ~(XFS_ILOG_AEXT | XFS_ILOG_ABROOT); 187 ~(XFS_ILOG_AEXT | XFS_ILOG_ABROOT);
188 if ((iip->ili_format.ilf_fields & XFS_ILOG_ADATA) && 188 if ((iip->ili_format.ilf_fields & XFS_ILOG_ADATA) &&
189 (ip->i_afp->if_bytes > 0)) { 189 (ip->i_afp->if_bytes > 0)) {
190 ASSERT(ip->i_afp->if_u1.if_data != NULL); 190 ASSERT(ip->i_afp->if_u1.if_data != NULL);
191 nvecs++; 191 nvecs++;
192 } else { 192 } else {
193 iip->ili_format.ilf_fields &= ~XFS_ILOG_ADATA; 193 iip->ili_format.ilf_fields &= ~XFS_ILOG_ADATA;
194 } 194 }
195 break; 195 break;
196 196
197 default: 197 default:
198 ASSERT(0); 198 ASSERT(0);
199 break; 199 break;
200 } 200 }
201 201
202 return nvecs; 202 return nvecs;
203 } 203 }
204 204
205 /* 205 /*
206 * This is called to fill in the vector of log iovecs for the 206 * This is called to fill in the vector of log iovecs for the
207 * given inode log item. It fills the first item with an inode 207 * given inode log item. It fills the first item with an inode
208 * log format structure, the second with the on-disk inode structure, 208 * log format structure, the second with the on-disk inode structure,
209 * and a possible third and/or fourth with the inode data/extents/b-tree 209 * and a possible third and/or fourth with the inode data/extents/b-tree
210 * root and inode attributes data/extents/b-tree root. 210 * root and inode attributes data/extents/b-tree root.
211 */ 211 */
212 STATIC void 212 STATIC void
213 xfs_inode_item_format( 213 xfs_inode_item_format(
214 xfs_inode_log_item_t *iip, 214 xfs_inode_log_item_t *iip,
215 xfs_log_iovec_t *log_vector) 215 xfs_log_iovec_t *log_vector)
216 { 216 {
217 uint nvecs; 217 uint nvecs;
218 xfs_log_iovec_t *vecp; 218 xfs_log_iovec_t *vecp;
219 xfs_inode_t *ip; 219 xfs_inode_t *ip;
220 size_t data_bytes; 220 size_t data_bytes;
221 xfs_bmbt_rec_t *ext_buffer; 221 xfs_bmbt_rec_t *ext_buffer;
222 int nrecs; 222 int nrecs;
223 xfs_mount_t *mp; 223 xfs_mount_t *mp;
224 224
225 ip = iip->ili_inode; 225 ip = iip->ili_inode;
226 vecp = log_vector; 226 vecp = log_vector;
227 227
228 vecp->i_addr = (xfs_caddr_t)&iip->ili_format; 228 vecp->i_addr = (xfs_caddr_t)&iip->ili_format;
229 vecp->i_len = sizeof(xfs_inode_log_format_t); 229 vecp->i_len = sizeof(xfs_inode_log_format_t);
230 XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_IFORMAT); 230 XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_IFORMAT);
231 vecp++; 231 vecp++;
232 nvecs = 1; 232 nvecs = 1;
233 233
234 /* 234 /*
235 * Clear i_update_core if the timestamps (or any other 235 * Clear i_update_core if the timestamps (or any other
236 * non-transactional modification) need flushing/logging 236 * non-transactional modification) need flushing/logging
237 * and we're about to log them with the rest of the core. 237 * and we're about to log them with the rest of the core.
238 * 238 *
239 * This is the same logic as xfs_iflush() but this code can't 239 * This is the same logic as xfs_iflush() but this code can't
240 * run at the same time as xfs_iflush because we're in commit 240 * run at the same time as xfs_iflush because we're in commit
241 * processing here and so we have the inode lock held in 241 * processing here and so we have the inode lock held in
242 * exclusive mode. Although it doesn't really matter 242 * exclusive mode. Although it doesn't really matter
243 * for the timestamps if both routines were to grab the 243 * for the timestamps if both routines were to grab the
244 * timestamps or not. That would be ok. 244 * timestamps or not. That would be ok.
245 * 245 *
246 * We clear i_update_core before copying out the data. 246 * We clear i_update_core before copying out the data.
247 * This is for coordination with our timestamp updates 247 * This is for coordination with our timestamp updates
248 * that don't hold the inode lock. They will always 248 * that don't hold the inode lock. They will always
249 * update the timestamps BEFORE setting i_update_core, 249 * update the timestamps BEFORE setting i_update_core,
250 * so if we clear i_update_core after they set it we 250 * so if we clear i_update_core after they set it we
251 * are guaranteed to see their updates to the timestamps 251 * are guaranteed to see their updates to the timestamps
252 * either here. Likewise, if they set it after we clear it 252 * either here. Likewise, if they set it after we clear it
253 * here, we'll see it either on the next commit of this 253 * here, we'll see it either on the next commit of this
254 * inode or the next time the inode gets flushed via 254 * inode or the next time the inode gets flushed via
255 * xfs_iflush(). This depends on strongly ordered memory 255 * xfs_iflush(). This depends on strongly ordered memory
256 * semantics, but we have that. We use the SYNCHRONIZE 256 * semantics, but we have that. We use the SYNCHRONIZE
257 * macro to make sure that the compiler does not reorder 257 * macro to make sure that the compiler does not reorder
258 * the i_update_core access below the data copy below. 258 * the i_update_core access below the data copy below.
259 */ 259 */
260 if (ip->i_update_core) { 260 if (ip->i_update_core) {
261 ip->i_update_core = 0; 261 ip->i_update_core = 0;
262 SYNCHRONIZE(); 262 SYNCHRONIZE();
263 } 263 }
264 264
265 /* 265 /*
266 * We don't have to worry about re-ordering here because 266 * We don't have to worry about re-ordering here because
267 * the update_size field is protected by the inode lock 267 * the update_size field is protected by the inode lock
268 * and we have that held in exclusive mode. 268 * and we have that held in exclusive mode.
269 */ 269 */
270 if (ip->i_update_size) 270 if (ip->i_update_size)
271 ip->i_update_size = 0; 271 ip->i_update_size = 0;
272 272
273 /* 273 /*
274 * Make sure to get the latest atime from the Linux inode. 274 * Make sure to get the latest atime from the Linux inode.
275 */ 275 */
276 xfs_synchronize_atime(ip); 276 xfs_synchronize_atime(ip);
277 277
278 /* 278 /*
279 * make sure the linux inode is dirty 279 * make sure the linux inode is dirty
280 */ 280 */
281 xfs_mark_inode_dirty_sync(ip); 281 xfs_mark_inode_dirty_sync(ip);
282 282
283 vecp->i_addr = (xfs_caddr_t)&ip->i_d; 283 vecp->i_addr = (xfs_caddr_t)&ip->i_d;
284 vecp->i_len = sizeof(struct xfs_icdinode); 284 vecp->i_len = sizeof(struct xfs_icdinode);
285 XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_ICORE); 285 XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_ICORE);
286 vecp++; 286 vecp++;
287 nvecs++; 287 nvecs++;
288 iip->ili_format.ilf_fields |= XFS_ILOG_CORE; 288 iip->ili_format.ilf_fields |= XFS_ILOG_CORE;
289 289
290 /* 290 /*
291 * If this is really an old format inode, then we need to 291 * If this is really an old format inode, then we need to
292 * log it as such. This means that we have to copy the link 292 * log it as such. This means that we have to copy the link
293 * count from the new field to the old. We don't have to worry 293 * count from the new field to the old. We don't have to worry
294 * about the new fields, because nothing trusts them as long as 294 * about the new fields, because nothing trusts them as long as
295 * the old inode version number is there. If the superblock already 295 * the old inode version number is there. If the superblock already
296 * has a new version number, then we don't bother converting back. 296 * has a new version number, then we don't bother converting back.
297 */ 297 */
298 mp = ip->i_mount; 298 mp = ip->i_mount;
299 ASSERT(ip->i_d.di_version == 1 || xfs_sb_version_hasnlink(&mp->m_sb)); 299 ASSERT(ip->i_d.di_version == 1 || xfs_sb_version_hasnlink(&mp->m_sb));
300 if (ip->i_d.di_version == 1) { 300 if (ip->i_d.di_version == 1) {
301 if (!xfs_sb_version_hasnlink(&mp->m_sb)) { 301 if (!xfs_sb_version_hasnlink(&mp->m_sb)) {
302 /* 302 /*
303 * Convert it back. 303 * Convert it back.
304 */ 304 */
305 ASSERT(ip->i_d.di_nlink <= XFS_MAXLINK_1); 305 ASSERT(ip->i_d.di_nlink <= XFS_MAXLINK_1);
306 ip->i_d.di_onlink = ip->i_d.di_nlink; 306 ip->i_d.di_onlink = ip->i_d.di_nlink;
307 } else { 307 } else {
308 /* 308 /*
309 * The superblock version has already been bumped, 309 * The superblock version has already been bumped,
310 * so just make the conversion to the new inode 310 * so just make the conversion to the new inode
311 * format permanent. 311 * format permanent.
312 */ 312 */
313 ip->i_d.di_version = 2; 313 ip->i_d.di_version = 2;
314 ip->i_d.di_onlink = 0; 314 ip->i_d.di_onlink = 0;
315 memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad)); 315 memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad));
316 } 316 }
317 } 317 }
318 318
319 switch (ip->i_d.di_format) { 319 switch (ip->i_d.di_format) {
320 case XFS_DINODE_FMT_EXTENTS: 320 case XFS_DINODE_FMT_EXTENTS:
321 ASSERT(!(iip->ili_format.ilf_fields & 321 ASSERT(!(iip->ili_format.ilf_fields &
322 (XFS_ILOG_DDATA | XFS_ILOG_DBROOT | 322 (XFS_ILOG_DDATA | XFS_ILOG_DBROOT |
323 XFS_ILOG_DEV | XFS_ILOG_UUID))); 323 XFS_ILOG_DEV | XFS_ILOG_UUID)));
324 if (iip->ili_format.ilf_fields & XFS_ILOG_DEXT) { 324 if (iip->ili_format.ilf_fields & XFS_ILOG_DEXT) {
325 ASSERT(ip->i_df.if_bytes > 0); 325 ASSERT(ip->i_df.if_bytes > 0);
326 ASSERT(ip->i_df.if_u1.if_extents != NULL); 326 ASSERT(ip->i_df.if_u1.if_extents != NULL);
327 ASSERT(ip->i_d.di_nextents > 0); 327 ASSERT(ip->i_d.di_nextents > 0);
328 ASSERT(iip->ili_extents_buf == NULL); 328 ASSERT(iip->ili_extents_buf == NULL);
329 nrecs = ip->i_df.if_bytes / 329 nrecs = ip->i_df.if_bytes /
330 (uint)sizeof(xfs_bmbt_rec_t); 330 (uint)sizeof(xfs_bmbt_rec_t);
331 ASSERT(nrecs > 0); 331 ASSERT(nrecs > 0);
332 #ifdef XFS_NATIVE_HOST 332 #ifdef XFS_NATIVE_HOST
333 if (nrecs == ip->i_d.di_nextents) { 333 if (nrecs == ip->i_d.di_nextents) {
334 /* 334 /*
335 * There are no delayed allocation 335 * There are no delayed allocation
336 * extents, so just point to the 336 * extents, so just point to the
337 * real extents array. 337 * real extents array.
338 */ 338 */
339 vecp->i_addr = 339 vecp->i_addr =
340 (char *)(ip->i_df.if_u1.if_extents); 340 (char *)(ip->i_df.if_u1.if_extents);
341 vecp->i_len = ip->i_df.if_bytes; 341 vecp->i_len = ip->i_df.if_bytes;
342 XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_IEXT); 342 XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_IEXT);
343 } else 343 } else
344 #endif 344 #endif
345 { 345 {
346 /* 346 /*
347 * There are delayed allocation extents 347 * There are delayed allocation extents
348 * in the inode, or we need to convert 348 * in the inode, or we need to convert
349 * the extents to on disk format. 349 * the extents to on disk format.
350 * Use xfs_iextents_copy() 350 * Use xfs_iextents_copy()
351 * to copy only the real extents into 351 * to copy only the real extents into
352 * a separate buffer. We'll free the 352 * a separate buffer. We'll free the
353 * buffer in the unlock routine. 353 * buffer in the unlock routine.
354 */ 354 */
355 ext_buffer = kmem_alloc(ip->i_df.if_bytes, 355 ext_buffer = kmem_alloc(ip->i_df.if_bytes,
356 KM_SLEEP); 356 KM_SLEEP);
357 iip->ili_extents_buf = ext_buffer; 357 iip->ili_extents_buf = ext_buffer;
358 vecp->i_addr = (xfs_caddr_t)ext_buffer; 358 vecp->i_addr = (xfs_caddr_t)ext_buffer;
359 vecp->i_len = xfs_iextents_copy(ip, ext_buffer, 359 vecp->i_len = xfs_iextents_copy(ip, ext_buffer,
360 XFS_DATA_FORK); 360 XFS_DATA_FORK);
361 XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_IEXT); 361 XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_IEXT);
362 } 362 }
363 ASSERT(vecp->i_len <= ip->i_df.if_bytes); 363 ASSERT(vecp->i_len <= ip->i_df.if_bytes);
364 iip->ili_format.ilf_dsize = vecp->i_len; 364 iip->ili_format.ilf_dsize = vecp->i_len;
365 vecp++; 365 vecp++;
366 nvecs++; 366 nvecs++;
367 } 367 }
368 break; 368 break;
369 369
370 case XFS_DINODE_FMT_BTREE: 370 case XFS_DINODE_FMT_BTREE:
371 ASSERT(!(iip->ili_format.ilf_fields & 371 ASSERT(!(iip->ili_format.ilf_fields &
372 (XFS_ILOG_DDATA | XFS_ILOG_DEXT | 372 (XFS_ILOG_DDATA | XFS_ILOG_DEXT |
373 XFS_ILOG_DEV | XFS_ILOG_UUID))); 373 XFS_ILOG_DEV | XFS_ILOG_UUID)));
374 if (iip->ili_format.ilf_fields & XFS_ILOG_DBROOT) { 374 if (iip->ili_format.ilf_fields & XFS_ILOG_DBROOT) {
375 ASSERT(ip->i_df.if_broot_bytes > 0); 375 ASSERT(ip->i_df.if_broot_bytes > 0);
376 ASSERT(ip->i_df.if_broot != NULL); 376 ASSERT(ip->i_df.if_broot != NULL);
377 vecp->i_addr = (xfs_caddr_t)ip->i_df.if_broot; 377 vecp->i_addr = (xfs_caddr_t)ip->i_df.if_broot;
378 vecp->i_len = ip->i_df.if_broot_bytes; 378 vecp->i_len = ip->i_df.if_broot_bytes;
379 XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_IBROOT); 379 XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_IBROOT);
380 vecp++; 380 vecp++;
381 nvecs++; 381 nvecs++;
382 iip->ili_format.ilf_dsize = ip->i_df.if_broot_bytes; 382 iip->ili_format.ilf_dsize = ip->i_df.if_broot_bytes;
383 } 383 }
384 break; 384 break;
385 385
386 case XFS_DINODE_FMT_LOCAL: 386 case XFS_DINODE_FMT_LOCAL:
387 ASSERT(!(iip->ili_format.ilf_fields & 387 ASSERT(!(iip->ili_format.ilf_fields &
388 (XFS_ILOG_DBROOT | XFS_ILOG_DEXT | 388 (XFS_ILOG_DBROOT | XFS_ILOG_DEXT |
389 XFS_ILOG_DEV | XFS_ILOG_UUID))); 389 XFS_ILOG_DEV | XFS_ILOG_UUID)));
390 if (iip->ili_format.ilf_fields & XFS_ILOG_DDATA) { 390 if (iip->ili_format.ilf_fields & XFS_ILOG_DDATA) {
391 ASSERT(ip->i_df.if_bytes > 0); 391 ASSERT(ip->i_df.if_bytes > 0);
392 ASSERT(ip->i_df.if_u1.if_data != NULL); 392 ASSERT(ip->i_df.if_u1.if_data != NULL);
393 ASSERT(ip->i_d.di_size > 0); 393 ASSERT(ip->i_d.di_size > 0);
394 394
395 vecp->i_addr = (xfs_caddr_t)ip->i_df.if_u1.if_data; 395 vecp->i_addr = (xfs_caddr_t)ip->i_df.if_u1.if_data;
396 /* 396 /*
397 * Round i_bytes up to a word boundary. 397 * Round i_bytes up to a word boundary.
398 * The underlying memory is guaranteed to 398 * The underlying memory is guaranteed to
399 * to be there by xfs_idata_realloc(). 399 * to be there by xfs_idata_realloc().
400 */ 400 */
401 data_bytes = roundup(ip->i_df.if_bytes, 4); 401 data_bytes = roundup(ip->i_df.if_bytes, 4);
402 ASSERT((ip->i_df.if_real_bytes == 0) || 402 ASSERT((ip->i_df.if_real_bytes == 0) ||
403 (ip->i_df.if_real_bytes == data_bytes)); 403 (ip->i_df.if_real_bytes == data_bytes));
404 vecp->i_len = (int)data_bytes; 404 vecp->i_len = (int)data_bytes;
405 XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_ILOCAL); 405 XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_ILOCAL);
406 vecp++; 406 vecp++;
407 nvecs++; 407 nvecs++;
408 iip->ili_format.ilf_dsize = (unsigned)data_bytes; 408 iip->ili_format.ilf_dsize = (unsigned)data_bytes;
409 } 409 }
410 break; 410 break;
411 411
412 case XFS_DINODE_FMT_DEV: 412 case XFS_DINODE_FMT_DEV:
413 ASSERT(!(iip->ili_format.ilf_fields & 413 ASSERT(!(iip->ili_format.ilf_fields &
414 (XFS_ILOG_DBROOT | XFS_ILOG_DEXT | 414 (XFS_ILOG_DBROOT | XFS_ILOG_DEXT |
415 XFS_ILOG_DDATA | XFS_ILOG_UUID))); 415 XFS_ILOG_DDATA | XFS_ILOG_UUID)));
416 if (iip->ili_format.ilf_fields & XFS_ILOG_DEV) { 416 if (iip->ili_format.ilf_fields & XFS_ILOG_DEV) {
417 iip->ili_format.ilf_u.ilfu_rdev = 417 iip->ili_format.ilf_u.ilfu_rdev =
418 ip->i_df.if_u2.if_rdev; 418 ip->i_df.if_u2.if_rdev;
419 } 419 }
420 break; 420 break;
421 421
422 case XFS_DINODE_FMT_UUID: 422 case XFS_DINODE_FMT_UUID:
423 ASSERT(!(iip->ili_format.ilf_fields & 423 ASSERT(!(iip->ili_format.ilf_fields &
424 (XFS_ILOG_DBROOT | XFS_ILOG_DEXT | 424 (XFS_ILOG_DBROOT | XFS_ILOG_DEXT |
425 XFS_ILOG_DDATA | XFS_ILOG_DEV))); 425 XFS_ILOG_DDATA | XFS_ILOG_DEV)));
426 if (iip->ili_format.ilf_fields & XFS_ILOG_UUID) { 426 if (iip->ili_format.ilf_fields & XFS_ILOG_UUID) {
427 iip->ili_format.ilf_u.ilfu_uuid = 427 iip->ili_format.ilf_u.ilfu_uuid =
428 ip->i_df.if_u2.if_uuid; 428 ip->i_df.if_u2.if_uuid;
429 } 429 }
430 break; 430 break;
431 431
432 default: 432 default:
433 ASSERT(0); 433 ASSERT(0);
434 break; 434 break;
435 } 435 }
436 436
437 /* 437 /*
438 * If there are no attributes associated with the file, 438 * If there are no attributes associated with the file,
439 * then we're done. 439 * then we're done.
440 * Assert that no attribute-related log flags are set. 440 * Assert that no attribute-related log flags are set.
441 */ 441 */
442 if (!XFS_IFORK_Q(ip)) { 442 if (!XFS_IFORK_Q(ip)) {
443 ASSERT(nvecs == iip->ili_item.li_desc->lid_size); 443 ASSERT(nvecs == iip->ili_item.li_desc->lid_size);
444 iip->ili_format.ilf_size = nvecs; 444 iip->ili_format.ilf_size = nvecs;
445 ASSERT(!(iip->ili_format.ilf_fields & 445 ASSERT(!(iip->ili_format.ilf_fields &
446 (XFS_ILOG_ADATA | XFS_ILOG_ABROOT | XFS_ILOG_AEXT))); 446 (XFS_ILOG_ADATA | XFS_ILOG_ABROOT | XFS_ILOG_AEXT)));
447 return; 447 return;
448 } 448 }
449 449
450 switch (ip->i_d.di_aformat) { 450 switch (ip->i_d.di_aformat) {
451 case XFS_DINODE_FMT_EXTENTS: 451 case XFS_DINODE_FMT_EXTENTS:
452 ASSERT(!(iip->ili_format.ilf_fields & 452 ASSERT(!(iip->ili_format.ilf_fields &
453 (XFS_ILOG_ADATA | XFS_ILOG_ABROOT))); 453 (XFS_ILOG_ADATA | XFS_ILOG_ABROOT)));
454 if (iip->ili_format.ilf_fields & XFS_ILOG_AEXT) { 454 if (iip->ili_format.ilf_fields & XFS_ILOG_AEXT) {
455 ASSERT(ip->i_afp->if_bytes > 0); 455 ASSERT(ip->i_afp->if_bytes > 0);
456 ASSERT(ip->i_afp->if_u1.if_extents != NULL); 456 ASSERT(ip->i_afp->if_u1.if_extents != NULL);
457 ASSERT(ip->i_d.di_anextents > 0); 457 ASSERT(ip->i_d.di_anextents > 0);
458 #ifdef DEBUG 458 #ifdef DEBUG
459 nrecs = ip->i_afp->if_bytes / 459 nrecs = ip->i_afp->if_bytes /
460 (uint)sizeof(xfs_bmbt_rec_t); 460 (uint)sizeof(xfs_bmbt_rec_t);
461 #endif 461 #endif
462 ASSERT(nrecs > 0); 462 ASSERT(nrecs > 0);
463 ASSERT(nrecs == ip->i_d.di_anextents); 463 ASSERT(nrecs == ip->i_d.di_anextents);
464 #ifdef XFS_NATIVE_HOST 464 #ifdef XFS_NATIVE_HOST
465 /* 465 /*
466 * There are not delayed allocation extents 466 * There are not delayed allocation extents
467 * for attributes, so just point at the array. 467 * for attributes, so just point at the array.
468 */ 468 */
469 vecp->i_addr = (char *)(ip->i_afp->if_u1.if_extents); 469 vecp->i_addr = (char *)(ip->i_afp->if_u1.if_extents);
470 vecp->i_len = ip->i_afp->if_bytes; 470 vecp->i_len = ip->i_afp->if_bytes;
471 #else 471 #else
472 ASSERT(iip->ili_aextents_buf == NULL); 472 ASSERT(iip->ili_aextents_buf == NULL);
473 /* 473 /*
474 * Need to endian flip before logging 474 * Need to endian flip before logging
475 */ 475 */
476 ext_buffer = kmem_alloc(ip->i_afp->if_bytes, 476 ext_buffer = kmem_alloc(ip->i_afp->if_bytes,
477 KM_SLEEP); 477 KM_SLEEP);
478 iip->ili_aextents_buf = ext_buffer; 478 iip->ili_aextents_buf = ext_buffer;
479 vecp->i_addr = (xfs_caddr_t)ext_buffer; 479 vecp->i_addr = (xfs_caddr_t)ext_buffer;
480 vecp->i_len = xfs_iextents_copy(ip, ext_buffer, 480 vecp->i_len = xfs_iextents_copy(ip, ext_buffer,
481 XFS_ATTR_FORK); 481 XFS_ATTR_FORK);
482 #endif 482 #endif
483 XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_IATTR_EXT); 483 XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_IATTR_EXT);
484 iip->ili_format.ilf_asize = vecp->i_len; 484 iip->ili_format.ilf_asize = vecp->i_len;
485 vecp++; 485 vecp++;
486 nvecs++; 486 nvecs++;
487 } 487 }
488 break; 488 break;
489 489
490 case XFS_DINODE_FMT_BTREE: 490 case XFS_DINODE_FMT_BTREE:
491 ASSERT(!(iip->ili_format.ilf_fields & 491 ASSERT(!(iip->ili_format.ilf_fields &
492 (XFS_ILOG_ADATA | XFS_ILOG_AEXT))); 492 (XFS_ILOG_ADATA | XFS_ILOG_AEXT)));
493 if (iip->ili_format.ilf_fields & XFS_ILOG_ABROOT) { 493 if (iip->ili_format.ilf_fields & XFS_ILOG_ABROOT) {
494 ASSERT(ip->i_afp->if_broot_bytes > 0); 494 ASSERT(ip->i_afp->if_broot_bytes > 0);
495 ASSERT(ip->i_afp->if_broot != NULL); 495 ASSERT(ip->i_afp->if_broot != NULL);
496 vecp->i_addr = (xfs_caddr_t)ip->i_afp->if_broot; 496 vecp->i_addr = (xfs_caddr_t)ip->i_afp->if_broot;
497 vecp->i_len = ip->i_afp->if_broot_bytes; 497 vecp->i_len = ip->i_afp->if_broot_bytes;
498 XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_IATTR_BROOT); 498 XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_IATTR_BROOT);
499 vecp++; 499 vecp++;
500 nvecs++; 500 nvecs++;
501 iip->ili_format.ilf_asize = ip->i_afp->if_broot_bytes; 501 iip->ili_format.ilf_asize = ip->i_afp->if_broot_bytes;
502 } 502 }
503 break; 503 break;
504 504
505 case XFS_DINODE_FMT_LOCAL: 505 case XFS_DINODE_FMT_LOCAL:
506 ASSERT(!(iip->ili_format.ilf_fields & 506 ASSERT(!(iip->ili_format.ilf_fields &
507 (XFS_ILOG_ABROOT | XFS_ILOG_AEXT))); 507 (XFS_ILOG_ABROOT | XFS_ILOG_AEXT)));
508 if (iip->ili_format.ilf_fields & XFS_ILOG_ADATA) { 508 if (iip->ili_format.ilf_fields & XFS_ILOG_ADATA) {
509 ASSERT(ip->i_afp->if_bytes > 0); 509 ASSERT(ip->i_afp->if_bytes > 0);
510 ASSERT(ip->i_afp->if_u1.if_data != NULL); 510 ASSERT(ip->i_afp->if_u1.if_data != NULL);
511 511
512 vecp->i_addr = (xfs_caddr_t)ip->i_afp->if_u1.if_data; 512 vecp->i_addr = (xfs_caddr_t)ip->i_afp->if_u1.if_data;
513 /* 513 /*
514 * Round i_bytes up to a word boundary. 514 * Round i_bytes up to a word boundary.
515 * The underlying memory is guaranteed to 515 * The underlying memory is guaranteed to
516 * to be there by xfs_idata_realloc(). 516 * to be there by xfs_idata_realloc().
517 */ 517 */
518 data_bytes = roundup(ip->i_afp->if_bytes, 4); 518 data_bytes = roundup(ip->i_afp->if_bytes, 4);
519 ASSERT((ip->i_afp->if_real_bytes == 0) || 519 ASSERT((ip->i_afp->if_real_bytes == 0) ||
520 (ip->i_afp->if_real_bytes == data_bytes)); 520 (ip->i_afp->if_real_bytes == data_bytes));
521 vecp->i_len = (int)data_bytes; 521 vecp->i_len = (int)data_bytes;
522 XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_IATTR_LOCAL); 522 XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_IATTR_LOCAL);
523 vecp++; 523 vecp++;
524 nvecs++; 524 nvecs++;
525 iip->ili_format.ilf_asize = (unsigned)data_bytes; 525 iip->ili_format.ilf_asize = (unsigned)data_bytes;
526 } 526 }
527 break; 527 break;
528 528
529 default: 529 default:
530 ASSERT(0); 530 ASSERT(0);
531 break; 531 break;
532 } 532 }
533 533
534 ASSERT(nvecs == iip->ili_item.li_desc->lid_size); 534 ASSERT(nvecs == iip->ili_item.li_desc->lid_size);
535 iip->ili_format.ilf_size = nvecs; 535 iip->ili_format.ilf_size = nvecs;
536 } 536 }
537 537
538 538
539 /* 539 /*
540 * This is called to pin the inode associated with the inode log 540 * This is called to pin the inode associated with the inode log
541 * item in memory so it cannot be written out. Do this by calling 541 * item in memory so it cannot be written out. Do this by calling
542 * xfs_ipin() to bump the pin count in the inode while holding the 542 * xfs_ipin() to bump the pin count in the inode while holding the
543 * inode pin lock. 543 * inode pin lock.
544 */ 544 */
545 STATIC void 545 STATIC void
546 xfs_inode_item_pin( 546 xfs_inode_item_pin(
547 xfs_inode_log_item_t *iip) 547 xfs_inode_log_item_t *iip)
548 { 548 {
549 ASSERT(xfs_isilocked(iip->ili_inode, XFS_ILOCK_EXCL)); 549 ASSERT(xfs_isilocked(iip->ili_inode, XFS_ILOCK_EXCL));
550 xfs_ipin(iip->ili_inode); 550 xfs_ipin(iip->ili_inode);
551 } 551 }
552 552
553 553
554 /* 554 /*
555 * This is called to unpin the inode associated with the inode log 555 * This is called to unpin the inode associated with the inode log
556 * item which was previously pinned with a call to xfs_inode_item_pin(). 556 * item which was previously pinned with a call to xfs_inode_item_pin().
557 * Just call xfs_iunpin() on the inode to do this. 557 * Just call xfs_iunpin() on the inode to do this.
558 */ 558 */
559 /* ARGSUSED */ 559 /* ARGSUSED */
560 STATIC void 560 STATIC void
561 xfs_inode_item_unpin( 561 xfs_inode_item_unpin(
562 xfs_inode_log_item_t *iip, 562 xfs_inode_log_item_t *iip,
563 int stale) 563 int stale)
564 { 564 {
565 xfs_iunpin(iip->ili_inode); 565 xfs_iunpin(iip->ili_inode);
566 } 566 }
567 567
568 /* ARGSUSED */ 568 /* ARGSUSED */
569 STATIC void 569 STATIC void
570 xfs_inode_item_unpin_remove( 570 xfs_inode_item_unpin_remove(
571 xfs_inode_log_item_t *iip, 571 xfs_inode_log_item_t *iip,
572 xfs_trans_t *tp) 572 xfs_trans_t *tp)
573 { 573 {
574 xfs_iunpin(iip->ili_inode); 574 xfs_iunpin(iip->ili_inode);
575 } 575 }
576 576
577 /* 577 /*
578 * This is called to attempt to lock the inode associated with this 578 * This is called to attempt to lock the inode associated with this
579 * inode log item, in preparation for the push routine which does the actual 579 * inode log item, in preparation for the push routine which does the actual
580 * iflush. Don't sleep on the inode lock or the flush lock. 580 * iflush. Don't sleep on the inode lock or the flush lock.
581 * 581 *
582 * If the flush lock is already held, indicating that the inode has 582 * If the flush lock is already held, indicating that the inode has
583 * been or is in the process of being flushed, then (ideally) we'd like to 583 * been or is in the process of being flushed, then (ideally) we'd like to
584 * see if the inode's buffer is still incore, and if so give it a nudge. 584 * see if the inode's buffer is still incore, and if so give it a nudge.
585 * We delay doing so until the pushbuf routine, though, to avoid holding 585 * We delay doing so until the pushbuf routine, though, to avoid holding
586 * the AIL lock across a call to the blackhole which is the buffer cache. 586 * the AIL lock across a call to the blackhole which is the buffer cache.
587 * Also we don't want to sleep in any device strategy routines, which can happen 587 * Also we don't want to sleep in any device strategy routines, which can happen
588 * if we do the subsequent bawrite in here. 588 * if we do the subsequent bawrite in here.
589 */ 589 */
590 STATIC uint 590 STATIC uint
591 xfs_inode_item_trylock( 591 xfs_inode_item_trylock(
592 xfs_inode_log_item_t *iip) 592 xfs_inode_log_item_t *iip)
593 { 593 {
594 register xfs_inode_t *ip; 594 register xfs_inode_t *ip;
595 595
596 ip = iip->ili_inode; 596 ip = iip->ili_inode;
597 597
598 if (xfs_ipincount(ip) > 0) { 598 if (xfs_ipincount(ip) > 0) {
599 return XFS_ITEM_PINNED; 599 return XFS_ITEM_PINNED;
600 } 600 }
601 601
602 if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED)) { 602 if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED)) {
603 return XFS_ITEM_LOCKED; 603 return XFS_ITEM_LOCKED;
604 } 604 }
605 605
606 if (!xfs_iflock_nowait(ip)) { 606 if (!xfs_iflock_nowait(ip)) {
607 /* 607 /*
608 * If someone else isn't already trying to push the inode 608 * If someone else isn't already trying to push the inode
609 * buffer, we get to do it. 609 * buffer, we get to do it.
610 */ 610 */
611 if (iip->ili_pushbuf_flag == 0) { 611 if (iip->ili_pushbuf_flag == 0) {
612 iip->ili_pushbuf_flag = 1; 612 iip->ili_pushbuf_flag = 1;
613 #ifdef DEBUG 613 #ifdef DEBUG
614 iip->ili_push_owner = current_pid(); 614 iip->ili_push_owner = current_pid();
615 #endif 615 #endif
616 /* 616 /*
617 * Inode is left locked in shared mode. 617 * Inode is left locked in shared mode.
618 * Pushbuf routine gets to unlock it. 618 * Pushbuf routine gets to unlock it.
619 */ 619 */
620 return XFS_ITEM_PUSHBUF; 620 return XFS_ITEM_PUSHBUF;
621 } else { 621 } else {
622 /* 622 /*
623 * We hold the AIL lock, so we must specify the 623 * We hold the AIL lock, so we must specify the
624 * NONOTIFY flag so that we won't double trip. 624 * NONOTIFY flag so that we won't double trip.
625 */ 625 */
626 xfs_iunlock(ip, XFS_ILOCK_SHARED|XFS_IUNLOCK_NONOTIFY); 626 xfs_iunlock(ip, XFS_ILOCK_SHARED|XFS_IUNLOCK_NONOTIFY);
627 return XFS_ITEM_FLUSHING; 627 return XFS_ITEM_FLUSHING;
628 } 628 }
629 /* NOTREACHED */ 629 /* NOTREACHED */
630 } 630 }
631 631
632 /* Stale items should force out the iclog */ 632 /* Stale items should force out the iclog */
633 if (ip->i_flags & XFS_ISTALE) { 633 if (ip->i_flags & XFS_ISTALE) {
634 xfs_ifunlock(ip); 634 xfs_ifunlock(ip);
635 xfs_iunlock(ip, XFS_ILOCK_SHARED|XFS_IUNLOCK_NONOTIFY); 635 xfs_iunlock(ip, XFS_ILOCK_SHARED|XFS_IUNLOCK_NONOTIFY);
636 return XFS_ITEM_PINNED; 636 return XFS_ITEM_PINNED;
637 } 637 }
638 638
639 #ifdef DEBUG 639 #ifdef DEBUG
640 if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) { 640 if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) {
641 ASSERT(iip->ili_format.ilf_fields != 0); 641 ASSERT(iip->ili_format.ilf_fields != 0);
642 ASSERT(iip->ili_logged == 0); 642 ASSERT(iip->ili_logged == 0);
643 ASSERT(iip->ili_item.li_flags & XFS_LI_IN_AIL); 643 ASSERT(iip->ili_item.li_flags & XFS_LI_IN_AIL);
644 } 644 }
645 #endif 645 #endif
646 return XFS_ITEM_SUCCESS; 646 return XFS_ITEM_SUCCESS;
647 } 647 }
648 648
649 /* 649 /*
650 * Unlock the inode associated with the inode log item. 650 * Unlock the inode associated with the inode log item.
651 * Clear the fields of the inode and inode log item that 651 * Clear the fields of the inode and inode log item that
652 * are specific to the current transaction. If the 652 * are specific to the current transaction. If the
653 * hold flags is set, do not unlock the inode. 653 * hold flags is set, do not unlock the inode.
654 */ 654 */
655 STATIC void 655 STATIC void
656 xfs_inode_item_unlock( 656 xfs_inode_item_unlock(
657 xfs_inode_log_item_t *iip) 657 xfs_inode_log_item_t *iip)
658 { 658 {
659 uint hold; 659 uint hold;
660 uint iolocked; 660 uint iolocked;
661 uint lock_flags; 661 uint lock_flags;
662 xfs_inode_t *ip; 662 xfs_inode_t *ip;
663 663
664 ASSERT(iip != NULL); 664 ASSERT(iip != NULL);
665 ASSERT(iip->ili_inode->i_itemp != NULL); 665 ASSERT(iip->ili_inode->i_itemp != NULL);
666 ASSERT(xfs_isilocked(iip->ili_inode, XFS_ILOCK_EXCL)); 666 ASSERT(xfs_isilocked(iip->ili_inode, XFS_ILOCK_EXCL));
667 ASSERT((!(iip->ili_inode->i_itemp->ili_flags & 667 ASSERT((!(iip->ili_inode->i_itemp->ili_flags &
668 XFS_ILI_IOLOCKED_EXCL)) || 668 XFS_ILI_IOLOCKED_EXCL)) ||
669 xfs_isilocked(iip->ili_inode, XFS_IOLOCK_EXCL)); 669 xfs_isilocked(iip->ili_inode, XFS_IOLOCK_EXCL));
670 ASSERT((!(iip->ili_inode->i_itemp->ili_flags & 670 ASSERT((!(iip->ili_inode->i_itemp->ili_flags &
671 XFS_ILI_IOLOCKED_SHARED)) || 671 XFS_ILI_IOLOCKED_SHARED)) ||
672 xfs_isilocked(iip->ili_inode, XFS_IOLOCK_SHARED)); 672 xfs_isilocked(iip->ili_inode, XFS_IOLOCK_SHARED));
673 /* 673 /*
674 * Clear the transaction pointer in the inode. 674 * Clear the transaction pointer in the inode.
675 */ 675 */
676 ip = iip->ili_inode; 676 ip = iip->ili_inode;
677 ip->i_transp = NULL; 677 ip->i_transp = NULL;
678 678
679 /* 679 /*
680 * If the inode needed a separate buffer with which to log 680 * If the inode needed a separate buffer with which to log
681 * its extents, then free it now. 681 * its extents, then free it now.
682 */ 682 */
683 if (iip->ili_extents_buf != NULL) { 683 if (iip->ili_extents_buf != NULL) {
684 ASSERT(ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS); 684 ASSERT(ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS);
685 ASSERT(ip->i_d.di_nextents > 0); 685 ASSERT(ip->i_d.di_nextents > 0);
686 ASSERT(iip->ili_format.ilf_fields & XFS_ILOG_DEXT); 686 ASSERT(iip->ili_format.ilf_fields & XFS_ILOG_DEXT);
687 ASSERT(ip->i_df.if_bytes > 0); 687 ASSERT(ip->i_df.if_bytes > 0);
688 kmem_free(iip->ili_extents_buf); 688 kmem_free(iip->ili_extents_buf);
689 iip->ili_extents_buf = NULL; 689 iip->ili_extents_buf = NULL;
690 } 690 }
691 if (iip->ili_aextents_buf != NULL) { 691 if (iip->ili_aextents_buf != NULL) {
692 ASSERT(ip->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS); 692 ASSERT(ip->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS);
693 ASSERT(ip->i_d.di_anextents > 0); 693 ASSERT(ip->i_d.di_anextents > 0);
694 ASSERT(iip->ili_format.ilf_fields & XFS_ILOG_AEXT); 694 ASSERT(iip->ili_format.ilf_fields & XFS_ILOG_AEXT);
695 ASSERT(ip->i_afp->if_bytes > 0); 695 ASSERT(ip->i_afp->if_bytes > 0);
696 kmem_free(iip->ili_aextents_buf); 696 kmem_free(iip->ili_aextents_buf);
697 iip->ili_aextents_buf = NULL; 697 iip->ili_aextents_buf = NULL;
698 } 698 }
699 699
700 /* 700 /*
701 * Figure out if we should unlock the inode or not. 701 * Figure out if we should unlock the inode or not.
702 */ 702 */
703 hold = iip->ili_flags & XFS_ILI_HOLD; 703 hold = iip->ili_flags & XFS_ILI_HOLD;
704 704
705 /* 705 /*
706 * Before clearing out the flags, remember whether we 706 * Before clearing out the flags, remember whether we
707 * are holding the inode's IO lock. 707 * are holding the inode's IO lock.
708 */ 708 */
709 iolocked = iip->ili_flags & XFS_ILI_IOLOCKED_ANY; 709 iolocked = iip->ili_flags & XFS_ILI_IOLOCKED_ANY;
710 710
711 /* 711 /*
712 * Clear out the fields of the inode log item particular 712 * Clear out the fields of the inode log item particular
713 * to the current transaction. 713 * to the current transaction.
714 */ 714 */
715 iip->ili_ilock_recur = 0; 715 iip->ili_ilock_recur = 0;
716 iip->ili_iolock_recur = 0; 716 iip->ili_iolock_recur = 0;
717 iip->ili_flags = 0; 717 iip->ili_flags = 0;
718 718
719 /* 719 /*
720 * Unlock the inode if XFS_ILI_HOLD was not set. 720 * Unlock the inode if XFS_ILI_HOLD was not set.
721 */ 721 */
722 if (!hold) { 722 if (!hold) {
723 lock_flags = XFS_ILOCK_EXCL; 723 lock_flags = XFS_ILOCK_EXCL;
724 if (iolocked & XFS_ILI_IOLOCKED_EXCL) { 724 if (iolocked & XFS_ILI_IOLOCKED_EXCL) {
725 lock_flags |= XFS_IOLOCK_EXCL; 725 lock_flags |= XFS_IOLOCK_EXCL;
726 } else if (iolocked & XFS_ILI_IOLOCKED_SHARED) { 726 } else if (iolocked & XFS_ILI_IOLOCKED_SHARED) {
727 lock_flags |= XFS_IOLOCK_SHARED; 727 lock_flags |= XFS_IOLOCK_SHARED;
728 } 728 }
729 xfs_iput(iip->ili_inode, lock_flags); 729 xfs_iput(iip->ili_inode, lock_flags);
730 } 730 }
731 } 731 }
732 732
733 /* 733 /*
734 * This is called to find out where the oldest active copy of the 734 * This is called to find out where the oldest active copy of the
735 * inode log item in the on disk log resides now that the last log 735 * inode log item in the on disk log resides now that the last log
736 * write of it completed at the given lsn. Since we always re-log 736 * write of it completed at the given lsn. Since we always re-log
737 * all dirty data in an inode, the latest copy in the on disk log 737 * all dirty data in an inode, the latest copy in the on disk log
738 * is the only one that matters. Therefore, simply return the 738 * is the only one that matters. Therefore, simply return the
739 * given lsn. 739 * given lsn.
740 */ 740 */
741 /*ARGSUSED*/ 741 /*ARGSUSED*/
742 STATIC xfs_lsn_t 742 STATIC xfs_lsn_t
743 xfs_inode_item_committed( 743 xfs_inode_item_committed(
744 xfs_inode_log_item_t *iip, 744 xfs_inode_log_item_t *iip,
745 xfs_lsn_t lsn) 745 xfs_lsn_t lsn)
746 { 746 {
747 return (lsn); 747 return (lsn);
748 } 748 }
749 749
750 /* 750 /*
751 * This gets called by xfs_trans_push_ail(), when IOP_TRYLOCK 751 * This gets called by xfs_trans_push_ail(), when IOP_TRYLOCK
752 * failed to get the inode flush lock but did get the inode locked SHARED. 752 * failed to get the inode flush lock but did get the inode locked SHARED.
753 * Here we're trying to see if the inode buffer is incore, and if so whether it's 753 * Here we're trying to see if the inode buffer is incore, and if so whether it's
754 * marked delayed write. If that's the case, we'll initiate a bawrite on that 754 * marked delayed write. If that's the case, we'll initiate a bawrite on that
755 * buffer to expedite the process. 755 * buffer to expedite the process.
756 * 756 *
757 * We aren't holding the AIL lock (or the flush lock) when this gets called, 757 * We aren't holding the AIL lock (or the flush lock) when this gets called,
758 * so it is inherently race-y. 758 * so it is inherently race-y.
759 */ 759 */
760 STATIC void 760 STATIC void
761 xfs_inode_item_pushbuf( 761 xfs_inode_item_pushbuf(
762 xfs_inode_log_item_t *iip) 762 xfs_inode_log_item_t *iip)
763 { 763 {
764 xfs_inode_t *ip; 764 xfs_inode_t *ip;
765 xfs_mount_t *mp; 765 xfs_mount_t *mp;
766 xfs_buf_t *bp; 766 xfs_buf_t *bp;
767 uint dopush; 767 uint dopush;
768 768
769 ip = iip->ili_inode; 769 ip = iip->ili_inode;
770 770
771 ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED)); 771 ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED));
772 772
773 /* 773 /*
774 * The ili_pushbuf_flag keeps others from 774 * The ili_pushbuf_flag keeps others from
775 * trying to duplicate our effort. 775 * trying to duplicate our effort.
776 */ 776 */
777 ASSERT(iip->ili_pushbuf_flag != 0); 777 ASSERT(iip->ili_pushbuf_flag != 0);
778 ASSERT(iip->ili_push_owner == current_pid()); 778 ASSERT(iip->ili_push_owner == current_pid());
779 779
780 /* 780 /*
781 * If a flush is not in progress anymore, chances are that the 781 * If a flush is not in progress anymore, chances are that the
782 * inode was taken off the AIL. So, just get out. 782 * inode was taken off the AIL. So, just get out.
783 */ 783 */
784 if (completion_done(&ip->i_flush) || 784 if (completion_done(&ip->i_flush) ||
785 ((iip->ili_item.li_flags & XFS_LI_IN_AIL) == 0)) { 785 ((iip->ili_item.li_flags & XFS_LI_IN_AIL) == 0)) {
786 iip->ili_pushbuf_flag = 0; 786 iip->ili_pushbuf_flag = 0;
787 xfs_iunlock(ip, XFS_ILOCK_SHARED); 787 xfs_iunlock(ip, XFS_ILOCK_SHARED);
788 return; 788 return;
789 } 789 }
790 790
791 mp = ip->i_mount; 791 mp = ip->i_mount;
792 bp = xfs_incore(mp->m_ddev_targp, iip->ili_format.ilf_blkno, 792 bp = xfs_incore(mp->m_ddev_targp, iip->ili_format.ilf_blkno,
793 iip->ili_format.ilf_len, XFS_INCORE_TRYLOCK); 793 iip->ili_format.ilf_len, XFS_INCORE_TRYLOCK);
794 794
795 if (bp != NULL) { 795 if (bp != NULL) {
796 if (XFS_BUF_ISDELAYWRITE(bp)) { 796 if (XFS_BUF_ISDELAYWRITE(bp)) {
797 /* 797 /*
798 * We were racing with iflush because we don't hold 798 * We were racing with iflush because we don't hold
799 * the AIL lock or the flush lock. However, at this point, 799 * the AIL lock or the flush lock. However, at this point,
800 * we have the buffer, and we know that it's dirty. 800 * we have the buffer, and we know that it's dirty.
801 * So, it's possible that iflush raced with us, and 801 * So, it's possible that iflush raced with us, and
802 * this item is already taken off the AIL. 802 * this item is already taken off the AIL.
803 * If not, we can flush it async. 803 * If not, we can flush it async.
804 */ 804 */
805 dopush = ((iip->ili_item.li_flags & XFS_LI_IN_AIL) && 805 dopush = ((iip->ili_item.li_flags & XFS_LI_IN_AIL) &&
806 !completion_done(&ip->i_flush)); 806 !completion_done(&ip->i_flush));
807 iip->ili_pushbuf_flag = 0; 807 iip->ili_pushbuf_flag = 0;
808 xfs_iunlock(ip, XFS_ILOCK_SHARED); 808 xfs_iunlock(ip, XFS_ILOCK_SHARED);
809 xfs_buftrace("INODE ITEM PUSH", bp); 809 xfs_buftrace("INODE ITEM PUSH", bp);
810 if (XFS_BUF_ISPINNED(bp)) { 810 if (XFS_BUF_ISPINNED(bp)) {
811 xfs_log_force(mp, (xfs_lsn_t)0, 811 xfs_log_force(mp, (xfs_lsn_t)0,
812 XFS_LOG_FORCE); 812 XFS_LOG_FORCE);
813 } 813 }
814 if (dopush) { 814 if (dopush) {
815 int error; 815 int error;
816 error = xfs_bawrite(mp, bp); 816 error = xfs_bawrite(mp, bp);
817 if (error) 817 if (error)
818 xfs_fs_cmn_err(CE_WARN, mp, 818 xfs_fs_cmn_err(CE_WARN, mp,
819 "xfs_inode_item_pushbuf: pushbuf error %d on iip %p, bp %p", 819 "xfs_inode_item_pushbuf: pushbuf error %d on iip %p, bp %p",
820 error, iip, bp); 820 error, iip, bp);
821 } else { 821 } else {
822 xfs_buf_relse(bp); 822 xfs_buf_relse(bp);
823 } 823 }
824 } else { 824 } else {
825 iip->ili_pushbuf_flag = 0; 825 iip->ili_pushbuf_flag = 0;
826 xfs_iunlock(ip, XFS_ILOCK_SHARED); 826 xfs_iunlock(ip, XFS_ILOCK_SHARED);
827 xfs_buf_relse(bp); 827 xfs_buf_relse(bp);
828 } 828 }
829 return; 829 return;
830 } 830 }
831 /* 831 /*
832 * We have to be careful about resetting pushbuf flag too early (above). 832 * We have to be careful about resetting pushbuf flag too early (above).
833 * Even though in theory we can do it as soon as we have the buflock, 833 * Even though in theory we can do it as soon as we have the buflock,
834 * we don't want others to be doing work needlessly. They'll come to 834 * we don't want others to be doing work needlessly. They'll come to
835 * this function thinking that pushing the buffer is their 835 * this function thinking that pushing the buffer is their
836 * responsibility only to find that the buffer is still locked by 836 * responsibility only to find that the buffer is still locked by
837 * another doing the same thing 837 * another doing the same thing
838 */ 838 */
839 iip->ili_pushbuf_flag = 0; 839 iip->ili_pushbuf_flag = 0;
840 xfs_iunlock(ip, XFS_ILOCK_SHARED); 840 xfs_iunlock(ip, XFS_ILOCK_SHARED);
841 return; 841 return;
842 } 842 }
843 843
844 844
845 /* 845 /*
846 * This is called to asynchronously write the inode associated with this 846 * This is called to asynchronously write the inode associated with this
847 * inode log item out to disk. The inode will already have been locked by 847 * inode log item out to disk. The inode will already have been locked by
848 * a successful call to xfs_inode_item_trylock(). 848 * a successful call to xfs_inode_item_trylock().
849 */ 849 */
850 STATIC void 850 STATIC void
851 xfs_inode_item_push( 851 xfs_inode_item_push(
852 xfs_inode_log_item_t *iip) 852 xfs_inode_log_item_t *iip)
853 { 853 {
854 xfs_inode_t *ip; 854 xfs_inode_t *ip;
855 855
856 ip = iip->ili_inode; 856 ip = iip->ili_inode;
857 857
858 ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED)); 858 ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED));
859 ASSERT(!completion_done(&ip->i_flush)); 859 ASSERT(!completion_done(&ip->i_flush));
860 /* 860 /*
861 * Since we were able to lock the inode's flush lock and 861 * Since we were able to lock the inode's flush lock and
862 * we found it on the AIL, the inode must be dirty. This 862 * we found it on the AIL, the inode must be dirty. This
863 * is because the inode is removed from the AIL while still 863 * is because the inode is removed from the AIL while still
864 * holding the flush lock in xfs_iflush_done(). Thus, if 864 * holding the flush lock in xfs_iflush_done(). Thus, if
865 * we found it in the AIL and were able to obtain the flush 865 * we found it in the AIL and were able to obtain the flush
866 * lock without sleeping, then there must not have been 866 * lock without sleeping, then there must not have been
867 * anyone in the process of flushing the inode. 867 * anyone in the process of flushing the inode.
868 */ 868 */
869 ASSERT(XFS_FORCED_SHUTDOWN(ip->i_mount) || 869 ASSERT(XFS_FORCED_SHUTDOWN(ip->i_mount) ||
870 iip->ili_format.ilf_fields != 0); 870 iip->ili_format.ilf_fields != 0);
871 871
872 /* 872 /*
873 * Write out the inode. The completion routine ('iflush_done') will 873 * Write out the inode. The completion routine ('iflush_done') will
874 * pull it from the AIL, mark it clean, unlock the flush lock. 874 * pull it from the AIL, mark it clean, unlock the flush lock.
875 */ 875 */
876 (void) xfs_iflush(ip, XFS_IFLUSH_ASYNC); 876 (void) xfs_iflush(ip, XFS_IFLUSH_ASYNC);
877 xfs_iunlock(ip, XFS_ILOCK_SHARED); 877 xfs_iunlock(ip, XFS_ILOCK_SHARED);
878 878
879 return; 879 return;
880 } 880 }
881 881
882 /* 882 /*
883 * XXX rcc - this one really has to do something. Probably needs 883 * XXX rcc - this one really has to do something. Probably needs
884 * to stamp in a new field in the incore inode. 884 * to stamp in a new field in the incore inode.
885 */ 885 */
886 /* ARGSUSED */ 886 /* ARGSUSED */
887 STATIC void 887 STATIC void
888 xfs_inode_item_committing( 888 xfs_inode_item_committing(
889 xfs_inode_log_item_t *iip, 889 xfs_inode_log_item_t *iip,
890 xfs_lsn_t lsn) 890 xfs_lsn_t lsn)
891 { 891 {
892 iip->ili_last_lsn = lsn; 892 iip->ili_last_lsn = lsn;
893 return; 893 return;
894 } 894 }
895 895
896 /* 896 /*
897 * This is the ops vector shared by all buf log items. 897 * This is the ops vector shared by all buf log items.
898 */ 898 */
899 static struct xfs_item_ops xfs_inode_item_ops = { 899 static struct xfs_item_ops xfs_inode_item_ops = {
900 .iop_size = (uint(*)(xfs_log_item_t*))xfs_inode_item_size, 900 .iop_size = (uint(*)(xfs_log_item_t*))xfs_inode_item_size,
901 .iop_format = (void(*)(xfs_log_item_t*, xfs_log_iovec_t*)) 901 .iop_format = (void(*)(xfs_log_item_t*, xfs_log_iovec_t*))
902 xfs_inode_item_format, 902 xfs_inode_item_format,
903 .iop_pin = (void(*)(xfs_log_item_t*))xfs_inode_item_pin, 903 .iop_pin = (void(*)(xfs_log_item_t*))xfs_inode_item_pin,
904 .iop_unpin = (void(*)(xfs_log_item_t*, int))xfs_inode_item_unpin, 904 .iop_unpin = (void(*)(xfs_log_item_t*, int))xfs_inode_item_unpin,
905 .iop_unpin_remove = (void(*)(xfs_log_item_t*, xfs_trans_t*)) 905 .iop_unpin_remove = (void(*)(xfs_log_item_t*, xfs_trans_t*))
906 xfs_inode_item_unpin_remove, 906 xfs_inode_item_unpin_remove,
907 .iop_trylock = (uint(*)(xfs_log_item_t*))xfs_inode_item_trylock, 907 .iop_trylock = (uint(*)(xfs_log_item_t*))xfs_inode_item_trylock,
908 .iop_unlock = (void(*)(xfs_log_item_t*))xfs_inode_item_unlock, 908 .iop_unlock = (void(*)(xfs_log_item_t*))xfs_inode_item_unlock,
909 .iop_committed = (xfs_lsn_t(*)(xfs_log_item_t*, xfs_lsn_t)) 909 .iop_committed = (xfs_lsn_t(*)(xfs_log_item_t*, xfs_lsn_t))
910 xfs_inode_item_committed, 910 xfs_inode_item_committed,
911 .iop_push = (void(*)(xfs_log_item_t*))xfs_inode_item_push, 911 .iop_push = (void(*)(xfs_log_item_t*))xfs_inode_item_push,
912 .iop_pushbuf = (void(*)(xfs_log_item_t*))xfs_inode_item_pushbuf, 912 .iop_pushbuf = (void(*)(xfs_log_item_t*))xfs_inode_item_pushbuf,
913 .iop_committing = (void(*)(xfs_log_item_t*, xfs_lsn_t)) 913 .iop_committing = (void(*)(xfs_log_item_t*, xfs_lsn_t))
914 xfs_inode_item_committing 914 xfs_inode_item_committing
915 }; 915 };
916 916
917 917
918 /* 918 /*
919 * Initialize the inode log item for a newly allocated (in-core) inode. 919 * Initialize the inode log item for a newly allocated (in-core) inode.
920 */ 920 */
921 void 921 void
922 xfs_inode_item_init( 922 xfs_inode_item_init(
923 xfs_inode_t *ip, 923 xfs_inode_t *ip,
924 xfs_mount_t *mp) 924 xfs_mount_t *mp)
925 { 925 {
926 xfs_inode_log_item_t *iip; 926 xfs_inode_log_item_t *iip;
927 927
928 ASSERT(ip->i_itemp == NULL); 928 ASSERT(ip->i_itemp == NULL);
929 iip = ip->i_itemp = kmem_zone_zalloc(xfs_ili_zone, KM_SLEEP); 929 iip = ip->i_itemp = kmem_zone_zalloc(xfs_ili_zone, KM_SLEEP);
930 930
931 iip->ili_item.li_type = XFS_LI_INODE; 931 iip->ili_item.li_type = XFS_LI_INODE;
932 iip->ili_item.li_ops = &xfs_inode_item_ops; 932 iip->ili_item.li_ops = &xfs_inode_item_ops;
933 iip->ili_item.li_mountp = mp; 933 iip->ili_item.li_mountp = mp;
934 iip->ili_item.li_ailp = mp->m_ail; 934 iip->ili_item.li_ailp = mp->m_ail;
935 iip->ili_inode = ip; 935 iip->ili_inode = ip;
936 936
937 /* 937 /*
938 We have zeroed memory. No need ... 938 We have zeroed memory. No need ...
939 iip->ili_extents_buf = NULL; 939 iip->ili_extents_buf = NULL;
940 iip->ili_pushbuf_flag = 0; 940 iip->ili_pushbuf_flag = 0;
941 */ 941 */
942 942
943 iip->ili_format.ilf_type = XFS_LI_INODE; 943 iip->ili_format.ilf_type = XFS_LI_INODE;
944 iip->ili_format.ilf_ino = ip->i_ino; 944 iip->ili_format.ilf_ino = ip->i_ino;
945 iip->ili_format.ilf_blkno = ip->i_blkno; 945 iip->ili_format.ilf_blkno = ip->i_imap.im_blkno;
946 iip->ili_format.ilf_len = ip->i_len; 946 iip->ili_format.ilf_len = ip->i_imap.im_len;
947 iip->ili_format.ilf_boffset = ip->i_boffset; 947 iip->ili_format.ilf_boffset = ip->i_imap.im_boffset;
948 } 948 }
949 949
950 /* 950 /*
951 * Free the inode log item and any memory hanging off of it. 951 * Free the inode log item and any memory hanging off of it.
952 */ 952 */
953 void 953 void
954 xfs_inode_item_destroy( 954 xfs_inode_item_destroy(
955 xfs_inode_t *ip) 955 xfs_inode_t *ip)
956 { 956 {
957 #ifdef XFS_TRANS_DEBUG 957 #ifdef XFS_TRANS_DEBUG
958 if (ip->i_itemp->ili_root_size != 0) { 958 if (ip->i_itemp->ili_root_size != 0) {
959 kmem_free(ip->i_itemp->ili_orig_root); 959 kmem_free(ip->i_itemp->ili_orig_root);
960 } 960 }
961 #endif 961 #endif
962 kmem_zone_free(xfs_ili_zone, ip->i_itemp); 962 kmem_zone_free(xfs_ili_zone, ip->i_itemp);
963 } 963 }
964 964
965 965
966 /* 966 /*
967 * This is the inode flushing I/O completion routine. It is called 967 * This is the inode flushing I/O completion routine. It is called
968 * from interrupt level when the buffer containing the inode is 968 * from interrupt level when the buffer containing the inode is
969 * flushed to disk. It is responsible for removing the inode item 969 * flushed to disk. It is responsible for removing the inode item
970 * from the AIL if it has not been re-logged, and unlocking the inode's 970 * from the AIL if it has not been re-logged, and unlocking the inode's
971 * flush lock. 971 * flush lock.
972 */ 972 */
973 /*ARGSUSED*/ 973 /*ARGSUSED*/
974 void 974 void
975 xfs_iflush_done( 975 xfs_iflush_done(
976 xfs_buf_t *bp, 976 xfs_buf_t *bp,
977 xfs_inode_log_item_t *iip) 977 xfs_inode_log_item_t *iip)
978 { 978 {
979 xfs_inode_t *ip = iip->ili_inode; 979 xfs_inode_t *ip = iip->ili_inode;
980 struct xfs_ail *ailp = iip->ili_item.li_ailp; 980 struct xfs_ail *ailp = iip->ili_item.li_ailp;
981 981
982 /* 982 /*
983 * We only want to pull the item from the AIL if it is 983 * We only want to pull the item from the AIL if it is
984 * actually there and its location in the log has not 984 * actually there and its location in the log has not
985 * changed since we started the flush. Thus, we only bother 985 * changed since we started the flush. Thus, we only bother
986 * if the ili_logged flag is set and the inode's lsn has not 986 * if the ili_logged flag is set and the inode's lsn has not
987 * changed. First we check the lsn outside 987 * changed. First we check the lsn outside
988 * the lock since it's cheaper, and then we recheck while 988 * the lock since it's cheaper, and then we recheck while
989 * holding the lock before removing the inode from the AIL. 989 * holding the lock before removing the inode from the AIL.
990 */ 990 */
991 if (iip->ili_logged && 991 if (iip->ili_logged &&
992 (iip->ili_item.li_lsn == iip->ili_flush_lsn)) { 992 (iip->ili_item.li_lsn == iip->ili_flush_lsn)) {
993 spin_lock(&ailp->xa_lock); 993 spin_lock(&ailp->xa_lock);
994 if (iip->ili_item.li_lsn == iip->ili_flush_lsn) { 994 if (iip->ili_item.li_lsn == iip->ili_flush_lsn) {
995 /* xfs_trans_ail_delete() drops the AIL lock. */ 995 /* xfs_trans_ail_delete() drops the AIL lock. */
996 xfs_trans_ail_delete(ailp, (xfs_log_item_t*)iip); 996 xfs_trans_ail_delete(ailp, (xfs_log_item_t*)iip);
997 } else { 997 } else {
998 spin_unlock(&ailp->xa_lock); 998 spin_unlock(&ailp->xa_lock);
999 } 999 }
1000 } 1000 }
1001 1001
1002 iip->ili_logged = 0; 1002 iip->ili_logged = 0;
1003 1003
1004 /* 1004 /*
1005 * Clear the ili_last_fields bits now that we know that the 1005 * Clear the ili_last_fields bits now that we know that the
1006 * data corresponding to them is safely on disk. 1006 * data corresponding to them is safely on disk.
1007 */ 1007 */
1008 iip->ili_last_fields = 0; 1008 iip->ili_last_fields = 0;
1009 1009
1010 /* 1010 /*
1011 * Release the inode's flush lock since we're done with it. 1011 * Release the inode's flush lock since we're done with it.
1012 */ 1012 */
1013 xfs_ifunlock(ip); 1013 xfs_ifunlock(ip);
1014 1014
1015 return; 1015 return;
1016 } 1016 }
1017 1017
1018 /* 1018 /*
1019 * This is the inode flushing abort routine. It is called 1019 * This is the inode flushing abort routine. It is called
1020 * from xfs_iflush when the filesystem is shutting down to clean 1020 * from xfs_iflush when the filesystem is shutting down to clean
1021 * up the inode state. 1021 * up the inode state.
1022 * It is responsible for removing the inode item 1022 * It is responsible for removing the inode item
1023 * from the AIL if it has not been re-logged, and unlocking the inode's 1023 * from the AIL if it has not been re-logged, and unlocking the inode's
1024 * flush lock. 1024 * flush lock.
1025 */ 1025 */
1026 void 1026 void
1027 xfs_iflush_abort( 1027 xfs_iflush_abort(
1028 xfs_inode_t *ip) 1028 xfs_inode_t *ip)
1029 { 1029 {
1030 xfs_inode_log_item_t *iip = ip->i_itemp; 1030 xfs_inode_log_item_t *iip = ip->i_itemp;
1031 xfs_mount_t *mp; 1031 xfs_mount_t *mp;
1032 1032
1033 iip = ip->i_itemp; 1033 iip = ip->i_itemp;
1034 mp = ip->i_mount; 1034 mp = ip->i_mount;
1035 if (iip) { 1035 if (iip) {
1036 struct xfs_ail *ailp = iip->ili_item.li_ailp; 1036 struct xfs_ail *ailp = iip->ili_item.li_ailp;
1037 if (iip->ili_item.li_flags & XFS_LI_IN_AIL) { 1037 if (iip->ili_item.li_flags & XFS_LI_IN_AIL) {
1038 spin_lock(&ailp->xa_lock); 1038 spin_lock(&ailp->xa_lock);
1039 if (iip->ili_item.li_flags & XFS_LI_IN_AIL) { 1039 if (iip->ili_item.li_flags & XFS_LI_IN_AIL) {
1040 /* xfs_trans_ail_delete() drops the AIL lock. */ 1040 /* xfs_trans_ail_delete() drops the AIL lock. */
1041 xfs_trans_ail_delete(ailp, (xfs_log_item_t *)iip); 1041 xfs_trans_ail_delete(ailp, (xfs_log_item_t *)iip);
1042 } else 1042 } else
1043 spin_unlock(&ailp->xa_lock); 1043 spin_unlock(&ailp->xa_lock);
1044 } 1044 }
1045 iip->ili_logged = 0; 1045 iip->ili_logged = 0;
1046 /* 1046 /*
1047 * Clear the ili_last_fields bits now that we know that the 1047 * Clear the ili_last_fields bits now that we know that the
1048 * data corresponding to them is safely on disk. 1048 * data corresponding to them is safely on disk.
1049 */ 1049 */
1050 iip->ili_last_fields = 0; 1050 iip->ili_last_fields = 0;
1051 /* 1051 /*
1052 * Clear the inode logging fields so no more flushes are 1052 * Clear the inode logging fields so no more flushes are
1053 * attempted. 1053 * attempted.
1054 */ 1054 */
1055 iip->ili_format.ilf_fields = 0; 1055 iip->ili_format.ilf_fields = 0;
1056 } 1056 }
1057 /* 1057 /*
1058 * Release the inode's flush lock since we're done with it. 1058 * Release the inode's flush lock since we're done with it.
1059 */ 1059 */
1060 xfs_ifunlock(ip); 1060 xfs_ifunlock(ip);
1061 } 1061 }
1062 1062
1063 void 1063 void
1064 xfs_istale_done( 1064 xfs_istale_done(
1065 xfs_buf_t *bp, 1065 xfs_buf_t *bp,
1066 xfs_inode_log_item_t *iip) 1066 xfs_inode_log_item_t *iip)
1067 { 1067 {
1068 xfs_iflush_abort(iip->ili_inode); 1068 xfs_iflush_abort(iip->ili_inode);
1069 } 1069 }
1070 1070
1071 /* 1071 /*
1072 * convert an xfs_inode_log_format struct from either 32 or 64 bit versions 1072 * convert an xfs_inode_log_format struct from either 32 or 64 bit versions
1073 * (which can have different field alignments) to the native version 1073 * (which can have different field alignments) to the native version
1074 */ 1074 */
1075 int 1075 int
1076 xfs_inode_item_format_convert( 1076 xfs_inode_item_format_convert(
1077 xfs_log_iovec_t *buf, 1077 xfs_log_iovec_t *buf,
1078 xfs_inode_log_format_t *in_f) 1078 xfs_inode_log_format_t *in_f)
1079 { 1079 {
1080 if (buf->i_len == sizeof(xfs_inode_log_format_32_t)) { 1080 if (buf->i_len == sizeof(xfs_inode_log_format_32_t)) {
1081 xfs_inode_log_format_32_t *in_f32; 1081 xfs_inode_log_format_32_t *in_f32;
1082 1082
1083 in_f32 = (xfs_inode_log_format_32_t *)buf->i_addr; 1083 in_f32 = (xfs_inode_log_format_32_t *)buf->i_addr;
1084 in_f->ilf_type = in_f32->ilf_type; 1084 in_f->ilf_type = in_f32->ilf_type;
1085 in_f->ilf_size = in_f32->ilf_size; 1085 in_f->ilf_size = in_f32->ilf_size;
1086 in_f->ilf_fields = in_f32->ilf_fields; 1086 in_f->ilf_fields = in_f32->ilf_fields;
1087 in_f->ilf_asize = in_f32->ilf_asize; 1087 in_f->ilf_asize = in_f32->ilf_asize;
1088 in_f->ilf_dsize = in_f32->ilf_dsize; 1088 in_f->ilf_dsize = in_f32->ilf_dsize;
1089 in_f->ilf_ino = in_f32->ilf_ino; 1089 in_f->ilf_ino = in_f32->ilf_ino;
1090 /* copy biggest field of ilf_u */ 1090 /* copy biggest field of ilf_u */
1091 memcpy(in_f->ilf_u.ilfu_uuid.__u_bits, 1091 memcpy(in_f->ilf_u.ilfu_uuid.__u_bits,
1092 in_f32->ilf_u.ilfu_uuid.__u_bits, 1092 in_f32->ilf_u.ilfu_uuid.__u_bits,
1093 sizeof(uuid_t)); 1093 sizeof(uuid_t));
1094 in_f->ilf_blkno = in_f32->ilf_blkno; 1094 in_f->ilf_blkno = in_f32->ilf_blkno;
1095 in_f->ilf_len = in_f32->ilf_len; 1095 in_f->ilf_len = in_f32->ilf_len;
1096 in_f->ilf_boffset = in_f32->ilf_boffset; 1096 in_f->ilf_boffset = in_f32->ilf_boffset;
1097 return 0; 1097 return 0;
1098 } else if (buf->i_len == sizeof(xfs_inode_log_format_64_t)){ 1098 } else if (buf->i_len == sizeof(xfs_inode_log_format_64_t)){
1099 xfs_inode_log_format_64_t *in_f64; 1099 xfs_inode_log_format_64_t *in_f64;
1100 1100
1101 in_f64 = (xfs_inode_log_format_64_t *)buf->i_addr; 1101 in_f64 = (xfs_inode_log_format_64_t *)buf->i_addr;
1102 in_f->ilf_type = in_f64->ilf_type; 1102 in_f->ilf_type = in_f64->ilf_type;
1103 in_f->ilf_size = in_f64->ilf_size; 1103 in_f->ilf_size = in_f64->ilf_size;
1104 in_f->ilf_fields = in_f64->ilf_fields; 1104 in_f->ilf_fields = in_f64->ilf_fields;
1105 in_f->ilf_asize = in_f64->ilf_asize; 1105 in_f->ilf_asize = in_f64->ilf_asize;
1106 in_f->ilf_dsize = in_f64->ilf_dsize; 1106 in_f->ilf_dsize = in_f64->ilf_dsize;
1107 in_f->ilf_ino = in_f64->ilf_ino; 1107 in_f->ilf_ino = in_f64->ilf_ino;
1108 /* copy biggest field of ilf_u */ 1108 /* copy biggest field of ilf_u */
1109 memcpy(in_f->ilf_u.ilfu_uuid.__u_bits, 1109 memcpy(in_f->ilf_u.ilfu_uuid.__u_bits,
1110 in_f64->ilf_u.ilfu_uuid.__u_bits, 1110 in_f64->ilf_u.ilfu_uuid.__u_bits,
1111 sizeof(uuid_t)); 1111 sizeof(uuid_t));
1112 in_f->ilf_blkno = in_f64->ilf_blkno; 1112 in_f->ilf_blkno = in_f64->ilf_blkno;
1113 in_f->ilf_len = in_f64->ilf_len; 1113 in_f->ilf_len = in_f64->ilf_len;
1114 in_f->ilf_boffset = in_f64->ilf_boffset; 1114 in_f->ilf_boffset = in_f64->ilf_boffset;
1115 return 0; 1115 return 0;
1116 } 1116 }
1117 return EFSCORRUPTED; 1117 return EFSCORRUPTED;
1118 } 1118 }
1119 1119
1 /* 1 /*
2 * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc. 2 * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
3 * All Rights Reserved. 3 * All Rights Reserved.
4 * 4 *
5 * This program is free software; you can redistribute it and/or 5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as 6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation. 7 * published by the Free Software Foundation.
8 * 8 *
9 * This program is distributed in the hope that it would be useful, 9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details. 12 * GNU General Public License for more details.
13 * 13 *
14 * You should have received a copy of the GNU General Public License 14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation, 15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */ 17 */
18 #include "xfs.h" 18 #include "xfs.h"
19 #include "xfs_fs.h" 19 #include "xfs_fs.h"
20 #include "xfs_types.h" 20 #include "xfs_types.h"
21 #include "xfs_bit.h" 21 #include "xfs_bit.h"
22 #include "xfs_log.h" 22 #include "xfs_log.h"
23 #include "xfs_inum.h" 23 #include "xfs_inum.h"
24 #include "xfs_trans.h" 24 #include "xfs_trans.h"
25 #include "xfs_sb.h" 25 #include "xfs_sb.h"
26 #include "xfs_ag.h" 26 #include "xfs_ag.h"
27 #include "xfs_dir2.h" 27 #include "xfs_dir2.h"
28 #include "xfs_dmapi.h" 28 #include "xfs_dmapi.h"
29 #include "xfs_mount.h" 29 #include "xfs_mount.h"
30 #include "xfs_bmap_btree.h" 30 #include "xfs_bmap_btree.h"
31 #include "xfs_alloc_btree.h" 31 #include "xfs_alloc_btree.h"
32 #include "xfs_ialloc_btree.h" 32 #include "xfs_ialloc_btree.h"
33 #include "xfs_dir2_sf.h" 33 #include "xfs_dir2_sf.h"
34 #include "xfs_attr_sf.h" 34 #include "xfs_attr_sf.h"
35 #include "xfs_dinode.h" 35 #include "xfs_dinode.h"
36 #include "xfs_inode.h" 36 #include "xfs_inode.h"
37 #include "xfs_ialloc.h" 37 #include "xfs_ialloc.h"
38 #include "xfs_itable.h" 38 #include "xfs_itable.h"
39 #include "xfs_error.h" 39 #include "xfs_error.h"
40 #include "xfs_btree.h" 40 #include "xfs_btree.h"
41 41
42 int 42 int
43 xfs_internal_inum( 43 xfs_internal_inum(
44 xfs_mount_t *mp, 44 xfs_mount_t *mp,
45 xfs_ino_t ino) 45 xfs_ino_t ino)
46 { 46 {
47 return (ino == mp->m_sb.sb_rbmino || ino == mp->m_sb.sb_rsumino || 47 return (ino == mp->m_sb.sb_rbmino || ino == mp->m_sb.sb_rsumino ||
48 (xfs_sb_version_hasquota(&mp->m_sb) && 48 (xfs_sb_version_hasquota(&mp->m_sb) &&
49 (ino == mp->m_sb.sb_uquotino || ino == mp->m_sb.sb_gquotino))); 49 (ino == mp->m_sb.sb_uquotino || ino == mp->m_sb.sb_gquotino)));
50 } 50 }
51 51
52 STATIC int 52 STATIC int
53 xfs_bulkstat_one_iget( 53 xfs_bulkstat_one_iget(
54 xfs_mount_t *mp, /* mount point for filesystem */ 54 xfs_mount_t *mp, /* mount point for filesystem */
55 xfs_ino_t ino, /* inode number to get data for */ 55 xfs_ino_t ino, /* inode number to get data for */
56 xfs_daddr_t bno, /* starting bno of inode cluster */ 56 xfs_daddr_t bno, /* starting bno of inode cluster */
57 xfs_bstat_t *buf, /* return buffer */ 57 xfs_bstat_t *buf, /* return buffer */
58 int *stat) /* BULKSTAT_RV_... */ 58 int *stat) /* BULKSTAT_RV_... */
59 { 59 {
60 xfs_icdinode_t *dic; /* dinode core info pointer */ 60 xfs_icdinode_t *dic; /* dinode core info pointer */
61 xfs_inode_t *ip; /* incore inode pointer */ 61 xfs_inode_t *ip; /* incore inode pointer */
62 int error; 62 int error;
63 63
64 error = xfs_iget(mp, NULL, ino, 64 error = xfs_iget(mp, NULL, ino,
65 XFS_IGET_BULKSTAT, XFS_ILOCK_SHARED, &ip, bno); 65 XFS_IGET_BULKSTAT, XFS_ILOCK_SHARED, &ip, bno);
66 if (error) { 66 if (error) {
67 *stat = BULKSTAT_RV_NOTHING; 67 *stat = BULKSTAT_RV_NOTHING;
68 return error; 68 return error;
69 } 69 }
70 70
71 ASSERT(ip != NULL); 71 ASSERT(ip != NULL);
72 ASSERT(ip->i_blkno != (xfs_daddr_t)0); 72 ASSERT(ip->i_imap.im_blkno != 0);
73 73
74 dic = &ip->i_d; 74 dic = &ip->i_d;
75 75
76 /* xfs_iget returns the following without needing 76 /* xfs_iget returns the following without needing
77 * further change. 77 * further change.
78 */ 78 */
79 buf->bs_nlink = dic->di_nlink; 79 buf->bs_nlink = dic->di_nlink;
80 buf->bs_projid = dic->di_projid; 80 buf->bs_projid = dic->di_projid;
81 buf->bs_ino = ino; 81 buf->bs_ino = ino;
82 buf->bs_mode = dic->di_mode; 82 buf->bs_mode = dic->di_mode;
83 buf->bs_uid = dic->di_uid; 83 buf->bs_uid = dic->di_uid;
84 buf->bs_gid = dic->di_gid; 84 buf->bs_gid = dic->di_gid;
85 buf->bs_size = dic->di_size; 85 buf->bs_size = dic->di_size;
86 vn_atime_to_bstime(VFS_I(ip), &buf->bs_atime); 86 vn_atime_to_bstime(VFS_I(ip), &buf->bs_atime);
87 buf->bs_mtime.tv_sec = dic->di_mtime.t_sec; 87 buf->bs_mtime.tv_sec = dic->di_mtime.t_sec;
88 buf->bs_mtime.tv_nsec = dic->di_mtime.t_nsec; 88 buf->bs_mtime.tv_nsec = dic->di_mtime.t_nsec;
89 buf->bs_ctime.tv_sec = dic->di_ctime.t_sec; 89 buf->bs_ctime.tv_sec = dic->di_ctime.t_sec;
90 buf->bs_ctime.tv_nsec = dic->di_ctime.t_nsec; 90 buf->bs_ctime.tv_nsec = dic->di_ctime.t_nsec;
91 buf->bs_xflags = xfs_ip2xflags(ip); 91 buf->bs_xflags = xfs_ip2xflags(ip);
92 buf->bs_extsize = dic->di_extsize << mp->m_sb.sb_blocklog; 92 buf->bs_extsize = dic->di_extsize << mp->m_sb.sb_blocklog;
93 buf->bs_extents = dic->di_nextents; 93 buf->bs_extents = dic->di_nextents;
94 buf->bs_gen = dic->di_gen; 94 buf->bs_gen = dic->di_gen;
95 memset(buf->bs_pad, 0, sizeof(buf->bs_pad)); 95 memset(buf->bs_pad, 0, sizeof(buf->bs_pad));
96 buf->bs_dmevmask = dic->di_dmevmask; 96 buf->bs_dmevmask = dic->di_dmevmask;
97 buf->bs_dmstate = dic->di_dmstate; 97 buf->bs_dmstate = dic->di_dmstate;
98 buf->bs_aextents = dic->di_anextents; 98 buf->bs_aextents = dic->di_anextents;
99 99
100 switch (dic->di_format) { 100 switch (dic->di_format) {
101 case XFS_DINODE_FMT_DEV: 101 case XFS_DINODE_FMT_DEV:
102 buf->bs_rdev = ip->i_df.if_u2.if_rdev; 102 buf->bs_rdev = ip->i_df.if_u2.if_rdev;
103 buf->bs_blksize = BLKDEV_IOSIZE; 103 buf->bs_blksize = BLKDEV_IOSIZE;
104 buf->bs_blocks = 0; 104 buf->bs_blocks = 0;
105 break; 105 break;
106 case XFS_DINODE_FMT_LOCAL: 106 case XFS_DINODE_FMT_LOCAL:
107 case XFS_DINODE_FMT_UUID: 107 case XFS_DINODE_FMT_UUID:
108 buf->bs_rdev = 0; 108 buf->bs_rdev = 0;
109 buf->bs_blksize = mp->m_sb.sb_blocksize; 109 buf->bs_blksize = mp->m_sb.sb_blocksize;
110 buf->bs_blocks = 0; 110 buf->bs_blocks = 0;
111 break; 111 break;
112 case XFS_DINODE_FMT_EXTENTS: 112 case XFS_DINODE_FMT_EXTENTS:
113 case XFS_DINODE_FMT_BTREE: 113 case XFS_DINODE_FMT_BTREE:
114 buf->bs_rdev = 0; 114 buf->bs_rdev = 0;
115 buf->bs_blksize = mp->m_sb.sb_blocksize; 115 buf->bs_blksize = mp->m_sb.sb_blocksize;
116 buf->bs_blocks = dic->di_nblocks + ip->i_delayed_blks; 116 buf->bs_blocks = dic->di_nblocks + ip->i_delayed_blks;
117 break; 117 break;
118 } 118 }
119 119
120 xfs_iput(ip, XFS_ILOCK_SHARED); 120 xfs_iput(ip, XFS_ILOCK_SHARED);
121 return error; 121 return error;
122 } 122 }
123 123
124 STATIC void 124 STATIC void
125 xfs_bulkstat_one_dinode( 125 xfs_bulkstat_one_dinode(
126 xfs_mount_t *mp, /* mount point for filesystem */ 126 xfs_mount_t *mp, /* mount point for filesystem */
127 xfs_ino_t ino, /* inode number to get data for */ 127 xfs_ino_t ino, /* inode number to get data for */
128 xfs_dinode_t *dic, /* dinode inode pointer */ 128 xfs_dinode_t *dic, /* dinode inode pointer */
129 xfs_bstat_t *buf) /* return buffer */ 129 xfs_bstat_t *buf) /* return buffer */
130 { 130 {
131 /* 131 /*
132 * The inode format changed when we moved the link count and 132 * The inode format changed when we moved the link count and
133 * made it 32 bits long. If this is an old format inode, 133 * made it 32 bits long. If this is an old format inode,
134 * convert it in memory to look like a new one. If it gets 134 * convert it in memory to look like a new one. If it gets
135 * flushed to disk we will convert back before flushing or 135 * flushed to disk we will convert back before flushing or
136 * logging it. We zero out the new projid field and the old link 136 * logging it. We zero out the new projid field and the old link
137 * count field. We'll handle clearing the pad field (the remains 137 * count field. We'll handle clearing the pad field (the remains
138 * of the old uuid field) when we actually convert the inode to 138 * of the old uuid field) when we actually convert the inode to
139 * the new format. We don't change the version number so that we 139 * the new format. We don't change the version number so that we
140 * can distinguish this from a real new format inode. 140 * can distinguish this from a real new format inode.
141 */ 141 */
142 if (dic->di_version == 1) { 142 if (dic->di_version == 1) {
143 buf->bs_nlink = be16_to_cpu(dic->di_onlink); 143 buf->bs_nlink = be16_to_cpu(dic->di_onlink);
144 buf->bs_projid = 0; 144 buf->bs_projid = 0;
145 } else { 145 } else {
146 buf->bs_nlink = be32_to_cpu(dic->di_nlink); 146 buf->bs_nlink = be32_to_cpu(dic->di_nlink);
147 buf->bs_projid = be16_to_cpu(dic->di_projid); 147 buf->bs_projid = be16_to_cpu(dic->di_projid);
148 } 148 }
149 149
150 buf->bs_ino = ino; 150 buf->bs_ino = ino;
151 buf->bs_mode = be16_to_cpu(dic->di_mode); 151 buf->bs_mode = be16_to_cpu(dic->di_mode);
152 buf->bs_uid = be32_to_cpu(dic->di_uid); 152 buf->bs_uid = be32_to_cpu(dic->di_uid);
153 buf->bs_gid = be32_to_cpu(dic->di_gid); 153 buf->bs_gid = be32_to_cpu(dic->di_gid);
154 buf->bs_size = be64_to_cpu(dic->di_size); 154 buf->bs_size = be64_to_cpu(dic->di_size);
155 buf->bs_atime.tv_sec = be32_to_cpu(dic->di_atime.t_sec); 155 buf->bs_atime.tv_sec = be32_to_cpu(dic->di_atime.t_sec);
156 buf->bs_atime.tv_nsec = be32_to_cpu(dic->di_atime.t_nsec); 156 buf->bs_atime.tv_nsec = be32_to_cpu(dic->di_atime.t_nsec);
157 buf->bs_mtime.tv_sec = be32_to_cpu(dic->di_mtime.t_sec); 157 buf->bs_mtime.tv_sec = be32_to_cpu(dic->di_mtime.t_sec);
158 buf->bs_mtime.tv_nsec = be32_to_cpu(dic->di_mtime.t_nsec); 158 buf->bs_mtime.tv_nsec = be32_to_cpu(dic->di_mtime.t_nsec);
159 buf->bs_ctime.tv_sec = be32_to_cpu(dic->di_ctime.t_sec); 159 buf->bs_ctime.tv_sec = be32_to_cpu(dic->di_ctime.t_sec);
160 buf->bs_ctime.tv_nsec = be32_to_cpu(dic->di_ctime.t_nsec); 160 buf->bs_ctime.tv_nsec = be32_to_cpu(dic->di_ctime.t_nsec);
161 buf->bs_xflags = xfs_dic2xflags(dic); 161 buf->bs_xflags = xfs_dic2xflags(dic);
162 buf->bs_extsize = be32_to_cpu(dic->di_extsize) << mp->m_sb.sb_blocklog; 162 buf->bs_extsize = be32_to_cpu(dic->di_extsize) << mp->m_sb.sb_blocklog;
163 buf->bs_extents = be32_to_cpu(dic->di_nextents); 163 buf->bs_extents = be32_to_cpu(dic->di_nextents);
164 buf->bs_gen = be32_to_cpu(dic->di_gen); 164 buf->bs_gen = be32_to_cpu(dic->di_gen);
165 memset(buf->bs_pad, 0, sizeof(buf->bs_pad)); 165 memset(buf->bs_pad, 0, sizeof(buf->bs_pad));
166 buf->bs_dmevmask = be32_to_cpu(dic->di_dmevmask); 166 buf->bs_dmevmask = be32_to_cpu(dic->di_dmevmask);
167 buf->bs_dmstate = be16_to_cpu(dic->di_dmstate); 167 buf->bs_dmstate = be16_to_cpu(dic->di_dmstate);
168 buf->bs_aextents = be16_to_cpu(dic->di_anextents); 168 buf->bs_aextents = be16_to_cpu(dic->di_anextents);
169 169
170 switch (dic->di_format) { 170 switch (dic->di_format) {
171 case XFS_DINODE_FMT_DEV: 171 case XFS_DINODE_FMT_DEV:
172 buf->bs_rdev = xfs_dinode_get_rdev(dic); 172 buf->bs_rdev = xfs_dinode_get_rdev(dic);
173 buf->bs_blksize = BLKDEV_IOSIZE; 173 buf->bs_blksize = BLKDEV_IOSIZE;
174 buf->bs_blocks = 0; 174 buf->bs_blocks = 0;
175 break; 175 break;
176 case XFS_DINODE_FMT_LOCAL: 176 case XFS_DINODE_FMT_LOCAL:
177 case XFS_DINODE_FMT_UUID: 177 case XFS_DINODE_FMT_UUID:
178 buf->bs_rdev = 0; 178 buf->bs_rdev = 0;
179 buf->bs_blksize = mp->m_sb.sb_blocksize; 179 buf->bs_blksize = mp->m_sb.sb_blocksize;
180 buf->bs_blocks = 0; 180 buf->bs_blocks = 0;
181 break; 181 break;
182 case XFS_DINODE_FMT_EXTENTS: 182 case XFS_DINODE_FMT_EXTENTS:
183 case XFS_DINODE_FMT_BTREE: 183 case XFS_DINODE_FMT_BTREE:
184 buf->bs_rdev = 0; 184 buf->bs_rdev = 0;
185 buf->bs_blksize = mp->m_sb.sb_blocksize; 185 buf->bs_blksize = mp->m_sb.sb_blocksize;
186 buf->bs_blocks = be64_to_cpu(dic->di_nblocks); 186 buf->bs_blocks = be64_to_cpu(dic->di_nblocks);
187 break; 187 break;
188 } 188 }
189 } 189 }
190 190
191 STATIC int 191 STATIC int
192 xfs_bulkstat_one_fmt( 192 xfs_bulkstat_one_fmt(
193 void __user *ubuffer, 193 void __user *ubuffer,
194 const xfs_bstat_t *buffer) 194 const xfs_bstat_t *buffer)
195 { 195 {
196 if (copy_to_user(ubuffer, buffer, sizeof(*buffer))) 196 if (copy_to_user(ubuffer, buffer, sizeof(*buffer)))
197 return -EFAULT; 197 return -EFAULT;
198 return sizeof(*buffer); 198 return sizeof(*buffer);
199 } 199 }
200 200
201 /* 201 /*
202 * Return stat information for one inode. 202 * Return stat information for one inode.
203 * Return 0 if ok, else errno. 203 * Return 0 if ok, else errno.
204 */ 204 */
205 int /* error status */ 205 int /* error status */
206 xfs_bulkstat_one( 206 xfs_bulkstat_one(
207 xfs_mount_t *mp, /* mount point for filesystem */ 207 xfs_mount_t *mp, /* mount point for filesystem */
208 xfs_ino_t ino, /* inode number to get data for */ 208 xfs_ino_t ino, /* inode number to get data for */
209 void __user *buffer, /* buffer to place output in */ 209 void __user *buffer, /* buffer to place output in */
210 int ubsize, /* size of buffer */ 210 int ubsize, /* size of buffer */
211 void *private_data, /* my private data */ 211 void *private_data, /* my private data */
212 xfs_daddr_t bno, /* starting bno of inode cluster */ 212 xfs_daddr_t bno, /* starting bno of inode cluster */
213 int *ubused, /* bytes used by me */ 213 int *ubused, /* bytes used by me */
214 void *dibuff, /* on-disk inode buffer */ 214 void *dibuff, /* on-disk inode buffer */
215 int *stat) /* BULKSTAT_RV_... */ 215 int *stat) /* BULKSTAT_RV_... */
216 { 216 {
217 xfs_bstat_t *buf; /* return buffer */ 217 xfs_bstat_t *buf; /* return buffer */
218 int error = 0; /* error value */ 218 int error = 0; /* error value */
219 xfs_dinode_t *dip; /* dinode inode pointer */ 219 xfs_dinode_t *dip; /* dinode inode pointer */
220 bulkstat_one_fmt_pf formatter = private_data ? : xfs_bulkstat_one_fmt; 220 bulkstat_one_fmt_pf formatter = private_data ? : xfs_bulkstat_one_fmt;
221 221
222 dip = (xfs_dinode_t *)dibuff; 222 dip = (xfs_dinode_t *)dibuff;
223 *stat = BULKSTAT_RV_NOTHING; 223 *stat = BULKSTAT_RV_NOTHING;
224 224
225 if (!buffer || xfs_internal_inum(mp, ino)) 225 if (!buffer || xfs_internal_inum(mp, ino))
226 return XFS_ERROR(EINVAL); 226 return XFS_ERROR(EINVAL);
227 if (ubsize < sizeof(*buf)) 227 if (ubsize < sizeof(*buf))
228 return XFS_ERROR(ENOMEM); 228 return XFS_ERROR(ENOMEM);
229 229
230 buf = kmem_alloc(sizeof(*buf), KM_SLEEP); 230 buf = kmem_alloc(sizeof(*buf), KM_SLEEP);
231 231
232 if (dip == NULL) { 232 if (dip == NULL) {
233 /* We're not being passed a pointer to a dinode. This happens 233 /* We're not being passed a pointer to a dinode. This happens
234 * if BULKSTAT_FG_IGET is selected. Do the iget. 234 * if BULKSTAT_FG_IGET is selected. Do the iget.
235 */ 235 */
236 error = xfs_bulkstat_one_iget(mp, ino, bno, buf, stat); 236 error = xfs_bulkstat_one_iget(mp, ino, bno, buf, stat);
237 if (error) 237 if (error)
238 goto out_free; 238 goto out_free;
239 } else { 239 } else {
240 xfs_bulkstat_one_dinode(mp, ino, dip, buf); 240 xfs_bulkstat_one_dinode(mp, ino, dip, buf);
241 } 241 }
242 242
243 error = formatter(buffer, buf); 243 error = formatter(buffer, buf);
244 if (error < 0) { 244 if (error < 0) {
245 error = EFAULT; 245 error = EFAULT;
246 goto out_free; 246 goto out_free;
247 } 247 }
248 248
249 *stat = BULKSTAT_RV_DIDONE; 249 *stat = BULKSTAT_RV_DIDONE;
250 if (ubused) 250 if (ubused)
251 *ubused = error; 251 *ubused = error;
252 252
253 out_free: 253 out_free:
254 kmem_free(buf); 254 kmem_free(buf);
255 return error; 255 return error;
256 } 256 }
257 257
258 /* 258 /*
259 * Test to see whether we can use the ondisk inode directly, based 259 * Test to see whether we can use the ondisk inode directly, based
260 * on the given bulkstat flags, filling in dipp accordingly. 260 * on the given bulkstat flags, filling in dipp accordingly.
261 * Returns zero if the inode is dodgey. 261 * Returns zero if the inode is dodgey.
262 */ 262 */
263 STATIC int 263 STATIC int
264 xfs_bulkstat_use_dinode( 264 xfs_bulkstat_use_dinode(
265 xfs_mount_t *mp, 265 xfs_mount_t *mp,
266 int flags, 266 int flags,
267 xfs_buf_t *bp, 267 xfs_buf_t *bp,
268 int clustidx, 268 int clustidx,
269 xfs_dinode_t **dipp) 269 xfs_dinode_t **dipp)
270 { 270 {
271 xfs_dinode_t *dip; 271 xfs_dinode_t *dip;
272 unsigned int aformat; 272 unsigned int aformat;
273 273
274 *dipp = NULL; 274 *dipp = NULL;
275 if (!bp || (flags & BULKSTAT_FG_IGET)) 275 if (!bp || (flags & BULKSTAT_FG_IGET))
276 return 1; 276 return 1;
277 dip = (xfs_dinode_t *) 277 dip = (xfs_dinode_t *)
278 xfs_buf_offset(bp, clustidx << mp->m_sb.sb_inodelog); 278 xfs_buf_offset(bp, clustidx << mp->m_sb.sb_inodelog);
279 /* 279 /*
280 * Check the buffer containing the on-disk inode for di_mode == 0. 280 * Check the buffer containing the on-disk inode for di_mode == 0.
281 * This is to prevent xfs_bulkstat from picking up just reclaimed 281 * This is to prevent xfs_bulkstat from picking up just reclaimed
282 * inodes that have their in-core state initialized but not flushed 282 * inodes that have their in-core state initialized but not flushed
283 * to disk yet. This is a temporary hack that would require a proper 283 * to disk yet. This is a temporary hack that would require a proper
284 * fix in the future. 284 * fix in the future.
285 */ 285 */
286 if (be16_to_cpu(dip->di_magic) != XFS_DINODE_MAGIC || 286 if (be16_to_cpu(dip->di_magic) != XFS_DINODE_MAGIC ||
287 !XFS_DINODE_GOOD_VERSION(dip->di_version) || 287 !XFS_DINODE_GOOD_VERSION(dip->di_version) ||
288 !dip->di_mode) 288 !dip->di_mode)
289 return 0; 289 return 0;
290 if (flags & BULKSTAT_FG_QUICK) { 290 if (flags & BULKSTAT_FG_QUICK) {
291 *dipp = dip; 291 *dipp = dip;
292 return 1; 292 return 1;
293 } 293 }
294 /* BULKSTAT_FG_INLINE: if attr fork is local, or not there, use it */ 294 /* BULKSTAT_FG_INLINE: if attr fork is local, or not there, use it */
295 aformat = dip->di_aformat; 295 aformat = dip->di_aformat;
296 if ((XFS_DFORK_Q(dip) == 0) || 296 if ((XFS_DFORK_Q(dip) == 0) ||
297 (aformat == XFS_DINODE_FMT_LOCAL) || 297 (aformat == XFS_DINODE_FMT_LOCAL) ||
298 (aformat == XFS_DINODE_FMT_EXTENTS && !dip->di_anextents)) { 298 (aformat == XFS_DINODE_FMT_EXTENTS && !dip->di_anextents)) {
299 *dipp = dip; 299 *dipp = dip;
300 return 1; 300 return 1;
301 } 301 }
302 return 1; 302 return 1;
303 } 303 }
304 304
305 #define XFS_BULKSTAT_UBLEFT(ubleft) ((ubleft) >= statstruct_size) 305 #define XFS_BULKSTAT_UBLEFT(ubleft) ((ubleft) >= statstruct_size)
306 306
307 /* 307 /*
308 * Return stat information in bulk (by-inode) for the filesystem. 308 * Return stat information in bulk (by-inode) for the filesystem.
309 */ 309 */
310 int /* error status */ 310 int /* error status */
311 xfs_bulkstat( 311 xfs_bulkstat(
312 xfs_mount_t *mp, /* mount point for filesystem */ 312 xfs_mount_t *mp, /* mount point for filesystem */
313 xfs_ino_t *lastinop, /* last inode returned */ 313 xfs_ino_t *lastinop, /* last inode returned */
314 int *ubcountp, /* size of buffer/count returned */ 314 int *ubcountp, /* size of buffer/count returned */
315 bulkstat_one_pf formatter, /* func that'd fill a single buf */ 315 bulkstat_one_pf formatter, /* func that'd fill a single buf */
316 void *private_data,/* private data for formatter */ 316 void *private_data,/* private data for formatter */
317 size_t statstruct_size, /* sizeof struct filling */ 317 size_t statstruct_size, /* sizeof struct filling */
318 char __user *ubuffer, /* buffer with inode stats */ 318 char __user *ubuffer, /* buffer with inode stats */
319 int flags, /* defined in xfs_itable.h */ 319 int flags, /* defined in xfs_itable.h */
320 int *done) /* 1 if there are more stats to get */ 320 int *done) /* 1 if there are more stats to get */
321 { 321 {
322 xfs_agblock_t agbno=0;/* allocation group block number */ 322 xfs_agblock_t agbno=0;/* allocation group block number */
323 xfs_buf_t *agbp; /* agi header buffer */ 323 xfs_buf_t *agbp; /* agi header buffer */
324 xfs_agi_t *agi; /* agi header data */ 324 xfs_agi_t *agi; /* agi header data */
325 xfs_agino_t agino; /* inode # in allocation group */ 325 xfs_agino_t agino; /* inode # in allocation group */
326 xfs_agnumber_t agno; /* allocation group number */ 326 xfs_agnumber_t agno; /* allocation group number */
327 xfs_daddr_t bno; /* inode cluster start daddr */ 327 xfs_daddr_t bno; /* inode cluster start daddr */
328 int chunkidx; /* current index into inode chunk */ 328 int chunkidx; /* current index into inode chunk */
329 int clustidx; /* current index into inode cluster */ 329 int clustidx; /* current index into inode cluster */
330 xfs_btree_cur_t *cur; /* btree cursor for ialloc btree */ 330 xfs_btree_cur_t *cur; /* btree cursor for ialloc btree */
331 int end_of_ag; /* set if we've seen the ag end */ 331 int end_of_ag; /* set if we've seen the ag end */
332 int error; /* error code */ 332 int error; /* error code */
333 int fmterror;/* bulkstat formatter result */ 333 int fmterror;/* bulkstat formatter result */
334 __int32_t gcnt; /* current btree rec's count */ 334 __int32_t gcnt; /* current btree rec's count */
335 xfs_inofree_t gfree; /* current btree rec's free mask */ 335 xfs_inofree_t gfree; /* current btree rec's free mask */
336 xfs_agino_t gino; /* current btree rec's start inode */ 336 xfs_agino_t gino; /* current btree rec's start inode */
337 int i; /* loop index */ 337 int i; /* loop index */
338 int icount; /* count of inodes good in irbuf */ 338 int icount; /* count of inodes good in irbuf */
339 size_t irbsize; /* size of irec buffer in bytes */ 339 size_t irbsize; /* size of irec buffer in bytes */
340 xfs_ino_t ino; /* inode number (filesystem) */ 340 xfs_ino_t ino; /* inode number (filesystem) */
341 xfs_inobt_rec_incore_t *irbp; /* current irec buffer pointer */ 341 xfs_inobt_rec_incore_t *irbp; /* current irec buffer pointer */
342 xfs_inobt_rec_incore_t *irbuf; /* start of irec buffer */ 342 xfs_inobt_rec_incore_t *irbuf; /* start of irec buffer */
343 xfs_inobt_rec_incore_t *irbufend; /* end of good irec buffer entries */ 343 xfs_inobt_rec_incore_t *irbufend; /* end of good irec buffer entries */
344 xfs_ino_t lastino; /* last inode number returned */ 344 xfs_ino_t lastino; /* last inode number returned */
345 int nbcluster; /* # of blocks in a cluster */ 345 int nbcluster; /* # of blocks in a cluster */
346 int nicluster; /* # of inodes in a cluster */ 346 int nicluster; /* # of inodes in a cluster */
347 int nimask; /* mask for inode clusters */ 347 int nimask; /* mask for inode clusters */
348 int nirbuf; /* size of irbuf */ 348 int nirbuf; /* size of irbuf */
349 int rval; /* return value error code */ 349 int rval; /* return value error code */
350 int tmp; /* result value from btree calls */ 350 int tmp; /* result value from btree calls */
351 int ubcount; /* size of user's buffer */ 351 int ubcount; /* size of user's buffer */
352 int ubleft; /* bytes left in user's buffer */ 352 int ubleft; /* bytes left in user's buffer */
353 char __user *ubufp; /* pointer into user's buffer */ 353 char __user *ubufp; /* pointer into user's buffer */
354 int ubelem; /* spaces used in user's buffer */ 354 int ubelem; /* spaces used in user's buffer */
355 int ubused; /* bytes used by formatter */ 355 int ubused; /* bytes used by formatter */
356 xfs_buf_t *bp; /* ptr to on-disk inode cluster buf */ 356 xfs_buf_t *bp; /* ptr to on-disk inode cluster buf */
357 xfs_dinode_t *dip; /* ptr into bp for specific inode */ 357 xfs_dinode_t *dip; /* ptr into bp for specific inode */
358 358
359 /* 359 /*
360 * Get the last inode value, see if there's nothing to do. 360 * Get the last inode value, see if there's nothing to do.
361 */ 361 */
362 ino = (xfs_ino_t)*lastinop; 362 ino = (xfs_ino_t)*lastinop;
363 lastino = ino; 363 lastino = ino;
364 dip = NULL; 364 dip = NULL;
365 agno = XFS_INO_TO_AGNO(mp, ino); 365 agno = XFS_INO_TO_AGNO(mp, ino);
366 agino = XFS_INO_TO_AGINO(mp, ino); 366 agino = XFS_INO_TO_AGINO(mp, ino);
367 if (agno >= mp->m_sb.sb_agcount || 367 if (agno >= mp->m_sb.sb_agcount ||
368 ino != XFS_AGINO_TO_INO(mp, agno, agino)) { 368 ino != XFS_AGINO_TO_INO(mp, agno, agino)) {
369 *done = 1; 369 *done = 1;
370 *ubcountp = 0; 370 *ubcountp = 0;
371 return 0; 371 return 0;
372 } 372 }
373 if (!ubcountp || *ubcountp <= 0) { 373 if (!ubcountp || *ubcountp <= 0) {
374 return EINVAL; 374 return EINVAL;
375 } 375 }
376 ubcount = *ubcountp; /* statstruct's */ 376 ubcount = *ubcountp; /* statstruct's */
377 ubleft = ubcount * statstruct_size; /* bytes */ 377 ubleft = ubcount * statstruct_size; /* bytes */
378 *ubcountp = ubelem = 0; 378 *ubcountp = ubelem = 0;
379 *done = 0; 379 *done = 0;
380 fmterror = 0; 380 fmterror = 0;
381 ubufp = ubuffer; 381 ubufp = ubuffer;
382 nicluster = mp->m_sb.sb_blocksize >= XFS_INODE_CLUSTER_SIZE(mp) ? 382 nicluster = mp->m_sb.sb_blocksize >= XFS_INODE_CLUSTER_SIZE(mp) ?
383 mp->m_sb.sb_inopblock : 383 mp->m_sb.sb_inopblock :
384 (XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog); 384 (XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog);
385 nimask = ~(nicluster - 1); 385 nimask = ~(nicluster - 1);
386 nbcluster = nicluster >> mp->m_sb.sb_inopblog; 386 nbcluster = nicluster >> mp->m_sb.sb_inopblog;
387 irbuf = kmem_zalloc_greedy(&irbsize, PAGE_SIZE, PAGE_SIZE * 4, 387 irbuf = kmem_zalloc_greedy(&irbsize, PAGE_SIZE, PAGE_SIZE * 4,
388 KM_SLEEP | KM_MAYFAIL | KM_LARGE); 388 KM_SLEEP | KM_MAYFAIL | KM_LARGE);
389 nirbuf = irbsize / sizeof(*irbuf); 389 nirbuf = irbsize / sizeof(*irbuf);
390 390
391 /* 391 /*
392 * Loop over the allocation groups, starting from the last 392 * Loop over the allocation groups, starting from the last
393 * inode returned; 0 means start of the allocation group. 393 * inode returned; 0 means start of the allocation group.
394 */ 394 */
395 rval = 0; 395 rval = 0;
396 while (XFS_BULKSTAT_UBLEFT(ubleft) && agno < mp->m_sb.sb_agcount) { 396 while (XFS_BULKSTAT_UBLEFT(ubleft) && agno < mp->m_sb.sb_agcount) {
397 cond_resched(); 397 cond_resched();
398 bp = NULL; 398 bp = NULL;
399 down_read(&mp->m_peraglock); 399 down_read(&mp->m_peraglock);
400 error = xfs_ialloc_read_agi(mp, NULL, agno, &agbp); 400 error = xfs_ialloc_read_agi(mp, NULL, agno, &agbp);
401 up_read(&mp->m_peraglock); 401 up_read(&mp->m_peraglock);
402 if (error) { 402 if (error) {
403 /* 403 /*
404 * Skip this allocation group and go to the next one. 404 * Skip this allocation group and go to the next one.
405 */ 405 */
406 agno++; 406 agno++;
407 agino = 0; 407 agino = 0;
408 continue; 408 continue;
409 } 409 }
410 agi = XFS_BUF_TO_AGI(agbp); 410 agi = XFS_BUF_TO_AGI(agbp);
411 /* 411 /*
412 * Allocate and initialize a btree cursor for ialloc btree. 412 * Allocate and initialize a btree cursor for ialloc btree.
413 */ 413 */
414 cur = xfs_inobt_init_cursor(mp, NULL, agbp, agno); 414 cur = xfs_inobt_init_cursor(mp, NULL, agbp, agno);
415 irbp = irbuf; 415 irbp = irbuf;
416 irbufend = irbuf + nirbuf; 416 irbufend = irbuf + nirbuf;
417 end_of_ag = 0; 417 end_of_ag = 0;
418 /* 418 /*
419 * If we're returning in the middle of an allocation group, 419 * If we're returning in the middle of an allocation group,
420 * we need to get the remainder of the chunk we're in. 420 * we need to get the remainder of the chunk we're in.
421 */ 421 */
422 if (agino > 0) { 422 if (agino > 0) {
423 /* 423 /*
424 * Lookup the inode chunk that this inode lives in. 424 * Lookup the inode chunk that this inode lives in.
425 */ 425 */
426 error = xfs_inobt_lookup_le(cur, agino, 0, 0, &tmp); 426 error = xfs_inobt_lookup_le(cur, agino, 0, 0, &tmp);
427 if (!error && /* no I/O error */ 427 if (!error && /* no I/O error */
428 tmp && /* lookup succeeded */ 428 tmp && /* lookup succeeded */
429 /* got the record, should always work */ 429 /* got the record, should always work */
430 !(error = xfs_inobt_get_rec(cur, &gino, &gcnt, 430 !(error = xfs_inobt_get_rec(cur, &gino, &gcnt,
431 &gfree, &i)) && 431 &gfree, &i)) &&
432 i == 1 && 432 i == 1 &&
433 /* this is the right chunk */ 433 /* this is the right chunk */
434 agino < gino + XFS_INODES_PER_CHUNK && 434 agino < gino + XFS_INODES_PER_CHUNK &&
435 /* lastino was not last in chunk */ 435 /* lastino was not last in chunk */
436 (chunkidx = agino - gino + 1) < 436 (chunkidx = agino - gino + 1) <
437 XFS_INODES_PER_CHUNK && 437 XFS_INODES_PER_CHUNK &&
438 /* there are some left allocated */ 438 /* there are some left allocated */
439 XFS_INOBT_MASKN(chunkidx, 439 XFS_INOBT_MASKN(chunkidx,
440 XFS_INODES_PER_CHUNK - chunkidx) & ~gfree) { 440 XFS_INODES_PER_CHUNK - chunkidx) & ~gfree) {
441 /* 441 /*
442 * Grab the chunk record. Mark all the 442 * Grab the chunk record. Mark all the
443 * uninteresting inodes (because they're 443 * uninteresting inodes (because they're
444 * before our start point) free. 444 * before our start point) free.
445 */ 445 */
446 for (i = 0; i < chunkidx; i++) { 446 for (i = 0; i < chunkidx; i++) {
447 if (XFS_INOBT_MASK(i) & ~gfree) 447 if (XFS_INOBT_MASK(i) & ~gfree)
448 gcnt++; 448 gcnt++;
449 } 449 }
450 gfree |= XFS_INOBT_MASKN(0, chunkidx); 450 gfree |= XFS_INOBT_MASKN(0, chunkidx);
451 irbp->ir_startino = gino; 451 irbp->ir_startino = gino;
452 irbp->ir_freecount = gcnt; 452 irbp->ir_freecount = gcnt;
453 irbp->ir_free = gfree; 453 irbp->ir_free = gfree;
454 irbp++; 454 irbp++;
455 agino = gino + XFS_INODES_PER_CHUNK; 455 agino = gino + XFS_INODES_PER_CHUNK;
456 icount = XFS_INODES_PER_CHUNK - gcnt; 456 icount = XFS_INODES_PER_CHUNK - gcnt;
457 } else { 457 } else {
458 /* 458 /*
459 * If any of those tests failed, bump the 459 * If any of those tests failed, bump the
460 * inode number (just in case). 460 * inode number (just in case).
461 */ 461 */
462 agino++; 462 agino++;
463 icount = 0; 463 icount = 0;
464 } 464 }
465 /* 465 /*
466 * In any case, increment to the next record. 466 * In any case, increment to the next record.
467 */ 467 */
468 if (!error) 468 if (!error)
469 error = xfs_btree_increment(cur, 0, &tmp); 469 error = xfs_btree_increment(cur, 0, &tmp);
470 } else { 470 } else {
471 /* 471 /*
472 * Start of ag. Lookup the first inode chunk. 472 * Start of ag. Lookup the first inode chunk.
473 */ 473 */
474 error = xfs_inobt_lookup_ge(cur, 0, 0, 0, &tmp); 474 error = xfs_inobt_lookup_ge(cur, 0, 0, 0, &tmp);
475 icount = 0; 475 icount = 0;
476 } 476 }
477 /* 477 /*
478 * Loop through inode btree records in this ag, 478 * Loop through inode btree records in this ag,
479 * until we run out of inodes or space in the buffer. 479 * until we run out of inodes or space in the buffer.
480 */ 480 */
481 while (irbp < irbufend && icount < ubcount) { 481 while (irbp < irbufend && icount < ubcount) {
482 /* 482 /*
483 * Loop as long as we're unable to read the 483 * Loop as long as we're unable to read the
484 * inode btree. 484 * inode btree.
485 */ 485 */
486 while (error) { 486 while (error) {
487 agino += XFS_INODES_PER_CHUNK; 487 agino += XFS_INODES_PER_CHUNK;
488 if (XFS_AGINO_TO_AGBNO(mp, agino) >= 488 if (XFS_AGINO_TO_AGBNO(mp, agino) >=
489 be32_to_cpu(agi->agi_length)) 489 be32_to_cpu(agi->agi_length))
490 break; 490 break;
491 error = xfs_inobt_lookup_ge(cur, agino, 0, 0, 491 error = xfs_inobt_lookup_ge(cur, agino, 0, 0,
492 &tmp); 492 &tmp);
493 cond_resched(); 493 cond_resched();
494 } 494 }
495 /* 495 /*
496 * If ran off the end of the ag either with an error, 496 * If ran off the end of the ag either with an error,
497 * or the normal way, set end and stop collecting. 497 * or the normal way, set end and stop collecting.
498 */ 498 */
499 if (error || 499 if (error ||
500 (error = xfs_inobt_get_rec(cur, &gino, &gcnt, 500 (error = xfs_inobt_get_rec(cur, &gino, &gcnt,
501 &gfree, &i)) || 501 &gfree, &i)) ||
502 i == 0) { 502 i == 0) {
503 end_of_ag = 1; 503 end_of_ag = 1;
504 break; 504 break;
505 } 505 }
506 /* 506 /*
507 * If this chunk has any allocated inodes, save it. 507 * If this chunk has any allocated inodes, save it.
508 * Also start read-ahead now for this chunk. 508 * Also start read-ahead now for this chunk.
509 */ 509 */
510 if (gcnt < XFS_INODES_PER_CHUNK) { 510 if (gcnt < XFS_INODES_PER_CHUNK) {
511 /* 511 /*
512 * Loop over all clusters in the next chunk. 512 * Loop over all clusters in the next chunk.
513 * Do a readahead if there are any allocated 513 * Do a readahead if there are any allocated
514 * inodes in that cluster. 514 * inodes in that cluster.
515 */ 515 */
516 for (agbno = XFS_AGINO_TO_AGBNO(mp, gino), 516 for (agbno = XFS_AGINO_TO_AGBNO(mp, gino),
517 chunkidx = 0; 517 chunkidx = 0;
518 chunkidx < XFS_INODES_PER_CHUNK; 518 chunkidx < XFS_INODES_PER_CHUNK;
519 chunkidx += nicluster, 519 chunkidx += nicluster,
520 agbno += nbcluster) { 520 agbno += nbcluster) {
521 if (XFS_INOBT_MASKN(chunkidx, 521 if (XFS_INOBT_MASKN(chunkidx,
522 nicluster) & ~gfree) 522 nicluster) & ~gfree)
523 xfs_btree_reada_bufs(mp, agno, 523 xfs_btree_reada_bufs(mp, agno,
524 agbno, nbcluster); 524 agbno, nbcluster);
525 } 525 }
526 irbp->ir_startino = gino; 526 irbp->ir_startino = gino;
527 irbp->ir_freecount = gcnt; 527 irbp->ir_freecount = gcnt;
528 irbp->ir_free = gfree; 528 irbp->ir_free = gfree;
529 irbp++; 529 irbp++;
530 icount += XFS_INODES_PER_CHUNK - gcnt; 530 icount += XFS_INODES_PER_CHUNK - gcnt;
531 } 531 }
532 /* 532 /*
533 * Set agino to after this chunk and bump the cursor. 533 * Set agino to after this chunk and bump the cursor.
534 */ 534 */
535 agino = gino + XFS_INODES_PER_CHUNK; 535 agino = gino + XFS_INODES_PER_CHUNK;
536 error = xfs_btree_increment(cur, 0, &tmp); 536 error = xfs_btree_increment(cur, 0, &tmp);
537 cond_resched(); 537 cond_resched();
538 } 538 }
539 /* 539 /*
540 * Drop the btree buffers and the agi buffer. 540 * Drop the btree buffers and the agi buffer.
541 * We can't hold any of the locks these represent 541 * We can't hold any of the locks these represent
542 * when calling iget. 542 * when calling iget.
543 */ 543 */
544 xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); 544 xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
545 xfs_buf_relse(agbp); 545 xfs_buf_relse(agbp);
546 /* 546 /*
547 * Now format all the good inodes into the user's buffer. 547 * Now format all the good inodes into the user's buffer.
548 */ 548 */
549 irbufend = irbp; 549 irbufend = irbp;
550 for (irbp = irbuf; 550 for (irbp = irbuf;
551 irbp < irbufend && XFS_BULKSTAT_UBLEFT(ubleft); irbp++) { 551 irbp < irbufend && XFS_BULKSTAT_UBLEFT(ubleft); irbp++) {
552 /* 552 /*
553 * Now process this chunk of inodes. 553 * Now process this chunk of inodes.
554 */ 554 */
555 for (agino = irbp->ir_startino, chunkidx = clustidx = 0; 555 for (agino = irbp->ir_startino, chunkidx = clustidx = 0;
556 XFS_BULKSTAT_UBLEFT(ubleft) && 556 XFS_BULKSTAT_UBLEFT(ubleft) &&
557 irbp->ir_freecount < XFS_INODES_PER_CHUNK; 557 irbp->ir_freecount < XFS_INODES_PER_CHUNK;
558 chunkidx++, clustidx++, agino++) { 558 chunkidx++, clustidx++, agino++) {
559 ASSERT(chunkidx < XFS_INODES_PER_CHUNK); 559 ASSERT(chunkidx < XFS_INODES_PER_CHUNK);
560 /* 560 /*
561 * Recompute agbno if this is the 561 * Recompute agbno if this is the
562 * first inode of the cluster. 562 * first inode of the cluster.
563 * 563 *
564 * Careful with clustidx. There can be 564 * Careful with clustidx. There can be
565 * multple clusters per chunk, a single 565 * multple clusters per chunk, a single
566 * cluster per chunk or a cluster that has 566 * cluster per chunk or a cluster that has
567 * inodes represented from several different 567 * inodes represented from several different
568 * chunks (if blocksize is large). 568 * chunks (if blocksize is large).
569 * 569 *
570 * Because of this, the starting clustidx is 570 * Because of this, the starting clustidx is
571 * initialized to zero in this loop but must 571 * initialized to zero in this loop but must
572 * later be reset after reading in the cluster 572 * later be reset after reading in the cluster
573 * buffer. 573 * buffer.
574 */ 574 */
575 if ((chunkidx & (nicluster - 1)) == 0) { 575 if ((chunkidx & (nicluster - 1)) == 0) {
576 agbno = XFS_AGINO_TO_AGBNO(mp, 576 agbno = XFS_AGINO_TO_AGBNO(mp,
577 irbp->ir_startino) + 577 irbp->ir_startino) +
578 ((chunkidx & nimask) >> 578 ((chunkidx & nimask) >>
579 mp->m_sb.sb_inopblog); 579 mp->m_sb.sb_inopblog);
580 580
581 if (flags & (BULKSTAT_FG_QUICK | 581 if (flags & (BULKSTAT_FG_QUICK |
582 BULKSTAT_FG_INLINE)) { 582 BULKSTAT_FG_INLINE)) {
583 int offset; 583 int offset;
584 584
585 ino = XFS_AGINO_TO_INO(mp, agno, 585 ino = XFS_AGINO_TO_INO(mp, agno,
586 agino); 586 agino);
587 bno = XFS_AGB_TO_DADDR(mp, agno, 587 bno = XFS_AGB_TO_DADDR(mp, agno,
588 agbno); 588 agbno);
589 589
590 /* 590 /*
591 * Get the inode cluster buffer 591 * Get the inode cluster buffer
592 */ 592 */
593 if (bp) 593 if (bp)
594 xfs_buf_relse(bp); 594 xfs_buf_relse(bp);
595 595
596 error = xfs_inotobp(mp, NULL, ino, &dip, 596 error = xfs_inotobp(mp, NULL, ino, &dip,
597 &bp, &offset, 597 &bp, &offset,
598 XFS_IMAP_BULKSTAT); 598 XFS_IMAP_BULKSTAT);
599 599
600 if (!error) 600 if (!error)
601 clustidx = offset / mp->m_sb.sb_inodesize; 601 clustidx = offset / mp->m_sb.sb_inodesize;
602 if (XFS_TEST_ERROR(error != 0, 602 if (XFS_TEST_ERROR(error != 0,
603 mp, XFS_ERRTAG_BULKSTAT_READ_CHUNK, 603 mp, XFS_ERRTAG_BULKSTAT_READ_CHUNK,
604 XFS_RANDOM_BULKSTAT_READ_CHUNK)) { 604 XFS_RANDOM_BULKSTAT_READ_CHUNK)) {
605 bp = NULL; 605 bp = NULL;
606 ubleft = 0; 606 ubleft = 0;
607 rval = error; 607 rval = error;
608 break; 608 break;
609 } 609 }
610 } 610 }
611 } 611 }
612 ino = XFS_AGINO_TO_INO(mp, agno, agino); 612 ino = XFS_AGINO_TO_INO(mp, agno, agino);
613 bno = XFS_AGB_TO_DADDR(mp, agno, agbno); 613 bno = XFS_AGB_TO_DADDR(mp, agno, agbno);
614 /* 614 /*
615 * Skip if this inode is free. 615 * Skip if this inode is free.
616 */ 616 */
617 if (XFS_INOBT_MASK(chunkidx) & irbp->ir_free) { 617 if (XFS_INOBT_MASK(chunkidx) & irbp->ir_free) {
618 lastino = ino; 618 lastino = ino;
619 continue; 619 continue;
620 } 620 }
621 /* 621 /*
622 * Count used inodes as free so we can tell 622 * Count used inodes as free so we can tell
623 * when the chunk is used up. 623 * when the chunk is used up.
624 */ 624 */
625 irbp->ir_freecount++; 625 irbp->ir_freecount++;
626 if (!xfs_bulkstat_use_dinode(mp, flags, bp, 626 if (!xfs_bulkstat_use_dinode(mp, flags, bp,
627 clustidx, &dip)) { 627 clustidx, &dip)) {
628 lastino = ino; 628 lastino = ino;
629 continue; 629 continue;
630 } 630 }
631 /* 631 /*
632 * If we need to do an iget, cannot hold bp. 632 * If we need to do an iget, cannot hold bp.
633 * Drop it, until starting the next cluster. 633 * Drop it, until starting the next cluster.
634 */ 634 */
635 if ((flags & BULKSTAT_FG_INLINE) && !dip) { 635 if ((flags & BULKSTAT_FG_INLINE) && !dip) {
636 if (bp) 636 if (bp)
637 xfs_buf_relse(bp); 637 xfs_buf_relse(bp);
638 bp = NULL; 638 bp = NULL;
639 } 639 }
640 640
641 /* 641 /*
642 * Get the inode and fill in a single buffer. 642 * Get the inode and fill in a single buffer.
643 * BULKSTAT_FG_QUICK uses dip to fill it in. 643 * BULKSTAT_FG_QUICK uses dip to fill it in.
644 * BULKSTAT_FG_IGET uses igets. 644 * BULKSTAT_FG_IGET uses igets.
645 * BULKSTAT_FG_INLINE uses dip if we have an 645 * BULKSTAT_FG_INLINE uses dip if we have an
646 * inline attr fork, else igets. 646 * inline attr fork, else igets.
647 * See: xfs_bulkstat_one & xfs_dm_bulkstat_one. 647 * See: xfs_bulkstat_one & xfs_dm_bulkstat_one.
648 * This is also used to count inodes/blks, etc 648 * This is also used to count inodes/blks, etc
649 * in xfs_qm_quotacheck. 649 * in xfs_qm_quotacheck.
650 */ 650 */
651 ubused = statstruct_size; 651 ubused = statstruct_size;
652 error = formatter(mp, ino, ubufp, 652 error = formatter(mp, ino, ubufp,
653 ubleft, private_data, 653 ubleft, private_data,
654 bno, &ubused, dip, &fmterror); 654 bno, &ubused, dip, &fmterror);
655 if (fmterror == BULKSTAT_RV_NOTHING) { 655 if (fmterror == BULKSTAT_RV_NOTHING) {
656 if (error && error != ENOENT && 656 if (error && error != ENOENT &&
657 error != EINVAL) { 657 error != EINVAL) {
658 ubleft = 0; 658 ubleft = 0;
659 rval = error; 659 rval = error;
660 break; 660 break;
661 } 661 }
662 lastino = ino; 662 lastino = ino;
663 continue; 663 continue;
664 } 664 }
665 if (fmterror == BULKSTAT_RV_GIVEUP) { 665 if (fmterror == BULKSTAT_RV_GIVEUP) {
666 ubleft = 0; 666 ubleft = 0;
667 ASSERT(error); 667 ASSERT(error);
668 rval = error; 668 rval = error;
669 break; 669 break;
670 } 670 }
671 if (ubufp) 671 if (ubufp)
672 ubufp += ubused; 672 ubufp += ubused;
673 ubleft -= ubused; 673 ubleft -= ubused;
674 ubelem++; 674 ubelem++;
675 lastino = ino; 675 lastino = ino;
676 } 676 }
677 677
678 cond_resched(); 678 cond_resched();
679 } 679 }
680 680
681 if (bp) 681 if (bp)
682 xfs_buf_relse(bp); 682 xfs_buf_relse(bp);
683 683
684 /* 684 /*
685 * Set up for the next loop iteration. 685 * Set up for the next loop iteration.
686 */ 686 */
687 if (XFS_BULKSTAT_UBLEFT(ubleft)) { 687 if (XFS_BULKSTAT_UBLEFT(ubleft)) {
688 if (end_of_ag) { 688 if (end_of_ag) {
689 agno++; 689 agno++;
690 agino = 0; 690 agino = 0;
691 } else 691 } else
692 agino = XFS_INO_TO_AGINO(mp, lastino); 692 agino = XFS_INO_TO_AGINO(mp, lastino);
693 } else 693 } else
694 break; 694 break;
695 } 695 }
696 /* 696 /*
697 * Done, we're either out of filesystem or space to put the data. 697 * Done, we're either out of filesystem or space to put the data.
698 */ 698 */
699 kmem_free(irbuf); 699 kmem_free(irbuf);
700 *ubcountp = ubelem; 700 *ubcountp = ubelem;
701 /* 701 /*
702 * Found some inodes, return them now and return the error next time. 702 * Found some inodes, return them now and return the error next time.
703 */ 703 */
704 if (ubelem) 704 if (ubelem)
705 rval = 0; 705 rval = 0;
706 if (agno >= mp->m_sb.sb_agcount) { 706 if (agno >= mp->m_sb.sb_agcount) {
707 /* 707 /*
708 * If we ran out of filesystem, mark lastino as off 708 * If we ran out of filesystem, mark lastino as off
709 * the end of the filesystem, so the next call 709 * the end of the filesystem, so the next call
710 * will return immediately. 710 * will return immediately.
711 */ 711 */
712 *lastinop = (xfs_ino_t)XFS_AGINO_TO_INO(mp, agno, 0); 712 *lastinop = (xfs_ino_t)XFS_AGINO_TO_INO(mp, agno, 0);
713 *done = 1; 713 *done = 1;
714 } else 714 } else
715 *lastinop = (xfs_ino_t)lastino; 715 *lastinop = (xfs_ino_t)lastino;
716 716
717 return rval; 717 return rval;
718 } 718 }
719 719
720 /* 720 /*
721 * Return stat information in bulk (by-inode) for the filesystem. 721 * Return stat information in bulk (by-inode) for the filesystem.
722 * Special case for non-sequential one inode bulkstat. 722 * Special case for non-sequential one inode bulkstat.
723 */ 723 */
724 int /* error status */ 724 int /* error status */
725 xfs_bulkstat_single( 725 xfs_bulkstat_single(
726 xfs_mount_t *mp, /* mount point for filesystem */ 726 xfs_mount_t *mp, /* mount point for filesystem */
727 xfs_ino_t *lastinop, /* inode to return */ 727 xfs_ino_t *lastinop, /* inode to return */
728 char __user *buffer, /* buffer with inode stats */ 728 char __user *buffer, /* buffer with inode stats */
729 int *done) /* 1 if there are more stats to get */ 729 int *done) /* 1 if there are more stats to get */
730 { 730 {
731 int count; /* count value for bulkstat call */ 731 int count; /* count value for bulkstat call */
732 int error; /* return value */ 732 int error; /* return value */
733 xfs_ino_t ino; /* filesystem inode number */ 733 xfs_ino_t ino; /* filesystem inode number */
734 int res; /* result from bs1 */ 734 int res; /* result from bs1 */
735 735
736 /* 736 /*
737 * note that requesting valid inode numbers which are not allocated 737 * note that requesting valid inode numbers which are not allocated
738 * to inodes will most likely cause xfs_itobp to generate warning 738 * to inodes will most likely cause xfs_itobp to generate warning
739 * messages about bad magic numbers. This is ok. The fact that 739 * messages about bad magic numbers. This is ok. The fact that
740 * the inode isn't actually an inode is handled by the 740 * the inode isn't actually an inode is handled by the
741 * error check below. Done this way to make the usual case faster 741 * error check below. Done this way to make the usual case faster
742 * at the expense of the error case. 742 * at the expense of the error case.
743 */ 743 */
744 744
745 ino = (xfs_ino_t)*lastinop; 745 ino = (xfs_ino_t)*lastinop;
746 error = xfs_bulkstat_one(mp, ino, buffer, sizeof(xfs_bstat_t), 746 error = xfs_bulkstat_one(mp, ino, buffer, sizeof(xfs_bstat_t),
747 NULL, 0, NULL, NULL, &res); 747 NULL, 0, NULL, NULL, &res);
748 if (error) { 748 if (error) {
749 /* 749 /*
750 * Special case way failed, do it the "long" way 750 * Special case way failed, do it the "long" way
751 * to see if that works. 751 * to see if that works.
752 */ 752 */
753 (*lastinop)--; 753 (*lastinop)--;
754 count = 1; 754 count = 1;
755 if (xfs_bulkstat(mp, lastinop, &count, xfs_bulkstat_one, 755 if (xfs_bulkstat(mp, lastinop, &count, xfs_bulkstat_one,
756 NULL, sizeof(xfs_bstat_t), buffer, 756 NULL, sizeof(xfs_bstat_t), buffer,
757 BULKSTAT_FG_IGET, done)) 757 BULKSTAT_FG_IGET, done))
758 return error; 758 return error;
759 if (count == 0 || (xfs_ino_t)*lastinop != ino) 759 if (count == 0 || (xfs_ino_t)*lastinop != ino)
760 return error == EFSCORRUPTED ? 760 return error == EFSCORRUPTED ?
761 XFS_ERROR(EINVAL) : error; 761 XFS_ERROR(EINVAL) : error;
762 else 762 else
763 return 0; 763 return 0;
764 } 764 }
765 *done = 0; 765 *done = 0;
766 return 0; 766 return 0;
767 } 767 }
768 768
769 int 769 int
770 xfs_inumbers_fmt( 770 xfs_inumbers_fmt(
771 void __user *ubuffer, /* buffer to write to */ 771 void __user *ubuffer, /* buffer to write to */
772 const xfs_inogrp_t *buffer, /* buffer to read from */ 772 const xfs_inogrp_t *buffer, /* buffer to read from */
773 long count, /* # of elements to read */ 773 long count, /* # of elements to read */
774 long *written) /* # of bytes written */ 774 long *written) /* # of bytes written */
775 { 775 {
776 if (copy_to_user(ubuffer, buffer, count * sizeof(*buffer))) 776 if (copy_to_user(ubuffer, buffer, count * sizeof(*buffer)))
777 return -EFAULT; 777 return -EFAULT;
778 *written = count * sizeof(*buffer); 778 *written = count * sizeof(*buffer);
779 return 0; 779 return 0;
780 } 780 }
781 781
782 /* 782 /*
783 * Return inode number table for the filesystem. 783 * Return inode number table for the filesystem.
784 */ 784 */
785 int /* error status */ 785 int /* error status */
786 xfs_inumbers( 786 xfs_inumbers(
787 xfs_mount_t *mp, /* mount point for filesystem */ 787 xfs_mount_t *mp, /* mount point for filesystem */
788 xfs_ino_t *lastino, /* last inode returned */ 788 xfs_ino_t *lastino, /* last inode returned */
789 int *count, /* size of buffer/count returned */ 789 int *count, /* size of buffer/count returned */
790 void __user *ubuffer,/* buffer with inode descriptions */ 790 void __user *ubuffer,/* buffer with inode descriptions */
791 inumbers_fmt_pf formatter) 791 inumbers_fmt_pf formatter)
792 { 792 {
793 xfs_buf_t *agbp; 793 xfs_buf_t *agbp;
794 xfs_agino_t agino; 794 xfs_agino_t agino;
795 xfs_agnumber_t agno; 795 xfs_agnumber_t agno;
796 int bcount; 796 int bcount;
797 xfs_inogrp_t *buffer; 797 xfs_inogrp_t *buffer;
798 int bufidx; 798 int bufidx;
799 xfs_btree_cur_t *cur; 799 xfs_btree_cur_t *cur;
800 int error; 800 int error;
801 __int32_t gcnt; 801 __int32_t gcnt;
802 xfs_inofree_t gfree; 802 xfs_inofree_t gfree;
803 xfs_agino_t gino; 803 xfs_agino_t gino;
804 int i; 804 int i;
805 xfs_ino_t ino; 805 xfs_ino_t ino;
806 int left; 806 int left;
807 int tmp; 807 int tmp;
808 808
809 ino = (xfs_ino_t)*lastino; 809 ino = (xfs_ino_t)*lastino;
810 agno = XFS_INO_TO_AGNO(mp, ino); 810 agno = XFS_INO_TO_AGNO(mp, ino);
811 agino = XFS_INO_TO_AGINO(mp, ino); 811 agino = XFS_INO_TO_AGINO(mp, ino);
812 left = *count; 812 left = *count;
813 *count = 0; 813 *count = 0;
814 bcount = MIN(left, (int)(PAGE_SIZE / sizeof(*buffer))); 814 bcount = MIN(left, (int)(PAGE_SIZE / sizeof(*buffer)));
815 buffer = kmem_alloc(bcount * sizeof(*buffer), KM_SLEEP); 815 buffer = kmem_alloc(bcount * sizeof(*buffer), KM_SLEEP);
816 error = bufidx = 0; 816 error = bufidx = 0;
817 cur = NULL; 817 cur = NULL;
818 agbp = NULL; 818 agbp = NULL;
819 while (left > 0 && agno < mp->m_sb.sb_agcount) { 819 while (left > 0 && agno < mp->m_sb.sb_agcount) {
820 if (agbp == NULL) { 820 if (agbp == NULL) {
821 down_read(&mp->m_peraglock); 821 down_read(&mp->m_peraglock);
822 error = xfs_ialloc_read_agi(mp, NULL, agno, &agbp); 822 error = xfs_ialloc_read_agi(mp, NULL, agno, &agbp);
823 up_read(&mp->m_peraglock); 823 up_read(&mp->m_peraglock);
824 if (error) { 824 if (error) {
825 /* 825 /*
826 * If we can't read the AGI of this ag, 826 * If we can't read the AGI of this ag,
827 * then just skip to the next one. 827 * then just skip to the next one.
828 */ 828 */
829 ASSERT(cur == NULL); 829 ASSERT(cur == NULL);
830 agbp = NULL; 830 agbp = NULL;
831 agno++; 831 agno++;
832 agino = 0; 832 agino = 0;
833 continue; 833 continue;
834 } 834 }
835 cur = xfs_inobt_init_cursor(mp, NULL, agbp, agno); 835 cur = xfs_inobt_init_cursor(mp, NULL, agbp, agno);
836 error = xfs_inobt_lookup_ge(cur, agino, 0, 0, &tmp); 836 error = xfs_inobt_lookup_ge(cur, agino, 0, 0, &tmp);
837 if (error) { 837 if (error) {
838 xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); 838 xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
839 cur = NULL; 839 cur = NULL;
840 xfs_buf_relse(agbp); 840 xfs_buf_relse(agbp);
841 agbp = NULL; 841 agbp = NULL;
842 /* 842 /*
843 * Move up the last inode in the current 843 * Move up the last inode in the current
844 * chunk. The lookup_ge will always get 844 * chunk. The lookup_ge will always get
845 * us the first inode in the next chunk. 845 * us the first inode in the next chunk.
846 */ 846 */
847 agino += XFS_INODES_PER_CHUNK - 1; 847 agino += XFS_INODES_PER_CHUNK - 1;
848 continue; 848 continue;
849 } 849 }
850 } 850 }
851 if ((error = xfs_inobt_get_rec(cur, &gino, &gcnt, &gfree, 851 if ((error = xfs_inobt_get_rec(cur, &gino, &gcnt, &gfree,
852 &i)) || 852 &i)) ||
853 i == 0) { 853 i == 0) {
854 xfs_buf_relse(agbp); 854 xfs_buf_relse(agbp);
855 agbp = NULL; 855 agbp = NULL;
856 xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); 856 xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
857 cur = NULL; 857 cur = NULL;
858 agno++; 858 agno++;
859 agino = 0; 859 agino = 0;
860 continue; 860 continue;
861 } 861 }
862 agino = gino + XFS_INODES_PER_CHUNK - 1; 862 agino = gino + XFS_INODES_PER_CHUNK - 1;
863 buffer[bufidx].xi_startino = XFS_AGINO_TO_INO(mp, agno, gino); 863 buffer[bufidx].xi_startino = XFS_AGINO_TO_INO(mp, agno, gino);
864 buffer[bufidx].xi_alloccount = XFS_INODES_PER_CHUNK - gcnt; 864 buffer[bufidx].xi_alloccount = XFS_INODES_PER_CHUNK - gcnt;
865 buffer[bufidx].xi_allocmask = ~gfree; 865 buffer[bufidx].xi_allocmask = ~gfree;
866 bufidx++; 866 bufidx++;
867 left--; 867 left--;
868 if (bufidx == bcount) { 868 if (bufidx == bcount) {
869 long written; 869 long written;
870 if (formatter(ubuffer, buffer, bufidx, &written)) { 870 if (formatter(ubuffer, buffer, bufidx, &written)) {
871 error = XFS_ERROR(EFAULT); 871 error = XFS_ERROR(EFAULT);
872 break; 872 break;
873 } 873 }
874 ubuffer += written; 874 ubuffer += written;
875 *count += bufidx; 875 *count += bufidx;
876 bufidx = 0; 876 bufidx = 0;
877 } 877 }
878 if (left) { 878 if (left) {
879 error = xfs_btree_increment(cur, 0, &tmp); 879 error = xfs_btree_increment(cur, 0, &tmp);
880 if (error) { 880 if (error) {
881 xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); 881 xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
882 cur = NULL; 882 cur = NULL;
883 xfs_buf_relse(agbp); 883 xfs_buf_relse(agbp);
884 agbp = NULL; 884 agbp = NULL;
885 /* 885 /*
886 * The agino value has already been bumped. 886 * The agino value has already been bumped.
887 * Just try to skip up to it. 887 * Just try to skip up to it.
888 */ 888 */
889 agino += XFS_INODES_PER_CHUNK; 889 agino += XFS_INODES_PER_CHUNK;
890 continue; 890 continue;
891 } 891 }
892 } 892 }
893 } 893 }
894 if (!error) { 894 if (!error) {
895 if (bufidx) { 895 if (bufidx) {
896 long written; 896 long written;
897 if (formatter(ubuffer, buffer, bufidx, &written)) 897 if (formatter(ubuffer, buffer, bufidx, &written))
898 error = XFS_ERROR(EFAULT); 898 error = XFS_ERROR(EFAULT);
899 else 899 else
900 *count += bufidx; 900 *count += bufidx;
901 } 901 }
902 *lastino = XFS_AGINO_TO_INO(mp, agno, agino); 902 *lastino = XFS_AGINO_TO_INO(mp, agno, agino);
903 } 903 }
904 kmem_free(buffer); 904 kmem_free(buffer);
905 if (cur) 905 if (cur)
906 xfs_btree_del_cursor(cur, (error ? XFS_BTREE_ERROR : 906 xfs_btree_del_cursor(cur, (error ? XFS_BTREE_ERROR :
907 XFS_BTREE_NOERROR)); 907 XFS_BTREE_NOERROR));
908 if (agbp) 908 if (agbp)
909 xfs_buf_relse(agbp); 909 xfs_buf_relse(agbp);
910 return error; 910 return error;
911 } 911 }
912 912
fs/xfs/xfs_log_recover.c
1 /* 1 /*
2 * Copyright (c) 2000-2006 Silicon Graphics, Inc. 2 * Copyright (c) 2000-2006 Silicon Graphics, Inc.
3 * All Rights Reserved. 3 * All Rights Reserved.
4 * 4 *
5 * This program is free software; you can redistribute it and/or 5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as 6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation. 7 * published by the Free Software Foundation.
8 * 8 *
9 * This program is distributed in the hope that it would be useful, 9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details. 12 * GNU General Public License for more details.
13 * 13 *
14 * You should have received a copy of the GNU General Public License 14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation, 15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */ 17 */
18 #include "xfs.h" 18 #include "xfs.h"
19 #include "xfs_fs.h" 19 #include "xfs_fs.h"
20 #include "xfs_types.h" 20 #include "xfs_types.h"
21 #include "xfs_bit.h" 21 #include "xfs_bit.h"
22 #include "xfs_log.h" 22 #include "xfs_log.h"
23 #include "xfs_inum.h" 23 #include "xfs_inum.h"
24 #include "xfs_trans.h" 24 #include "xfs_trans.h"
25 #include "xfs_sb.h" 25 #include "xfs_sb.h"
26 #include "xfs_ag.h" 26 #include "xfs_ag.h"
27 #include "xfs_dir2.h" 27 #include "xfs_dir2.h"
28 #include "xfs_dmapi.h" 28 #include "xfs_dmapi.h"
29 #include "xfs_mount.h" 29 #include "xfs_mount.h"
30 #include "xfs_error.h" 30 #include "xfs_error.h"
31 #include "xfs_bmap_btree.h" 31 #include "xfs_bmap_btree.h"
32 #include "xfs_alloc_btree.h" 32 #include "xfs_alloc_btree.h"
33 #include "xfs_ialloc_btree.h" 33 #include "xfs_ialloc_btree.h"
34 #include "xfs_dir2_sf.h" 34 #include "xfs_dir2_sf.h"
35 #include "xfs_attr_sf.h" 35 #include "xfs_attr_sf.h"
36 #include "xfs_dinode.h" 36 #include "xfs_dinode.h"
37 #include "xfs_inode.h" 37 #include "xfs_inode.h"
38 #include "xfs_inode_item.h" 38 #include "xfs_inode_item.h"
39 #include "xfs_imap.h"
40 #include "xfs_alloc.h" 39 #include "xfs_alloc.h"
41 #include "xfs_ialloc.h" 40 #include "xfs_ialloc.h"
42 #include "xfs_log_priv.h" 41 #include "xfs_log_priv.h"
43 #include "xfs_buf_item.h" 42 #include "xfs_buf_item.h"
44 #include "xfs_log_recover.h" 43 #include "xfs_log_recover.h"
45 #include "xfs_extfree_item.h" 44 #include "xfs_extfree_item.h"
46 #include "xfs_trans_priv.h" 45 #include "xfs_trans_priv.h"
47 #include "xfs_quota.h" 46 #include "xfs_quota.h"
48 #include "xfs_rw.h" 47 #include "xfs_rw.h"
49 #include "xfs_utils.h" 48 #include "xfs_utils.h"
50 49
51 STATIC int xlog_find_zeroed(xlog_t *, xfs_daddr_t *); 50 STATIC int xlog_find_zeroed(xlog_t *, xfs_daddr_t *);
52 STATIC int xlog_clear_stale_blocks(xlog_t *, xfs_lsn_t); 51 STATIC int xlog_clear_stale_blocks(xlog_t *, xfs_lsn_t);
53 STATIC void xlog_recover_insert_item_backq(xlog_recover_item_t **q, 52 STATIC void xlog_recover_insert_item_backq(xlog_recover_item_t **q,
54 xlog_recover_item_t *item); 53 xlog_recover_item_t *item);
55 #if defined(DEBUG) 54 #if defined(DEBUG)
56 STATIC void xlog_recover_check_summary(xlog_t *); 55 STATIC void xlog_recover_check_summary(xlog_t *);
57 #else 56 #else
58 #define xlog_recover_check_summary(log) 57 #define xlog_recover_check_summary(log)
59 #endif 58 #endif
60 59
61 60
62 /* 61 /*
63 * Sector aligned buffer routines for buffer create/read/write/access 62 * Sector aligned buffer routines for buffer create/read/write/access
64 */ 63 */
65 64
66 #define XLOG_SECTOR_ROUNDUP_BBCOUNT(log, bbs) \ 65 #define XLOG_SECTOR_ROUNDUP_BBCOUNT(log, bbs) \
67 ( ((log)->l_sectbb_mask && (bbs & (log)->l_sectbb_mask)) ? \ 66 ( ((log)->l_sectbb_mask && (bbs & (log)->l_sectbb_mask)) ? \
68 ((bbs + (log)->l_sectbb_mask + 1) & ~(log)->l_sectbb_mask) : (bbs) ) 67 ((bbs + (log)->l_sectbb_mask + 1) & ~(log)->l_sectbb_mask) : (bbs) )
69 #define XLOG_SECTOR_ROUNDDOWN_BLKNO(log, bno) ((bno) & ~(log)->l_sectbb_mask) 68 #define XLOG_SECTOR_ROUNDDOWN_BLKNO(log, bno) ((bno) & ~(log)->l_sectbb_mask)
70 69
71 xfs_buf_t * 70 xfs_buf_t *
72 xlog_get_bp( 71 xlog_get_bp(
73 xlog_t *log, 72 xlog_t *log,
74 int num_bblks) 73 int num_bblks)
75 { 74 {
76 ASSERT(num_bblks > 0); 75 ASSERT(num_bblks > 0);
77 76
78 if (log->l_sectbb_log) { 77 if (log->l_sectbb_log) {
79 if (num_bblks > 1) 78 if (num_bblks > 1)
80 num_bblks += XLOG_SECTOR_ROUNDUP_BBCOUNT(log, 1); 79 num_bblks += XLOG_SECTOR_ROUNDUP_BBCOUNT(log, 1);
81 num_bblks = XLOG_SECTOR_ROUNDUP_BBCOUNT(log, num_bblks); 80 num_bblks = XLOG_SECTOR_ROUNDUP_BBCOUNT(log, num_bblks);
82 } 81 }
83 return xfs_buf_get_noaddr(BBTOB(num_bblks), log->l_mp->m_logdev_targp); 82 return xfs_buf_get_noaddr(BBTOB(num_bblks), log->l_mp->m_logdev_targp);
84 } 83 }
85 84
86 void 85 void
87 xlog_put_bp( 86 xlog_put_bp(
88 xfs_buf_t *bp) 87 xfs_buf_t *bp)
89 { 88 {
90 xfs_buf_free(bp); 89 xfs_buf_free(bp);
91 } 90 }
92 91
93 92
94 /* 93 /*
95 * nbblks should be uint, but oh well. Just want to catch that 32-bit length. 94 * nbblks should be uint, but oh well. Just want to catch that 32-bit length.
96 */ 95 */
97 int 96 int
98 xlog_bread( 97 xlog_bread(
99 xlog_t *log, 98 xlog_t *log,
100 xfs_daddr_t blk_no, 99 xfs_daddr_t blk_no,
101 int nbblks, 100 int nbblks,
102 xfs_buf_t *bp) 101 xfs_buf_t *bp)
103 { 102 {
104 int error; 103 int error;
105 104
106 if (log->l_sectbb_log) { 105 if (log->l_sectbb_log) {
107 blk_no = XLOG_SECTOR_ROUNDDOWN_BLKNO(log, blk_no); 106 blk_no = XLOG_SECTOR_ROUNDDOWN_BLKNO(log, blk_no);
108 nbblks = XLOG_SECTOR_ROUNDUP_BBCOUNT(log, nbblks); 107 nbblks = XLOG_SECTOR_ROUNDUP_BBCOUNT(log, nbblks);
109 } 108 }
110 109
111 ASSERT(nbblks > 0); 110 ASSERT(nbblks > 0);
112 ASSERT(BBTOB(nbblks) <= XFS_BUF_SIZE(bp)); 111 ASSERT(BBTOB(nbblks) <= XFS_BUF_SIZE(bp));
113 ASSERT(bp); 112 ASSERT(bp);
114 113
115 XFS_BUF_SET_ADDR(bp, log->l_logBBstart + blk_no); 114 XFS_BUF_SET_ADDR(bp, log->l_logBBstart + blk_no);
116 XFS_BUF_READ(bp); 115 XFS_BUF_READ(bp);
117 XFS_BUF_BUSY(bp); 116 XFS_BUF_BUSY(bp);
118 XFS_BUF_SET_COUNT(bp, BBTOB(nbblks)); 117 XFS_BUF_SET_COUNT(bp, BBTOB(nbblks));
119 XFS_BUF_SET_TARGET(bp, log->l_mp->m_logdev_targp); 118 XFS_BUF_SET_TARGET(bp, log->l_mp->m_logdev_targp);
120 119
121 xfsbdstrat(log->l_mp, bp); 120 xfsbdstrat(log->l_mp, bp);
122 error = xfs_iowait(bp); 121 error = xfs_iowait(bp);
123 if (error) 122 if (error)
124 xfs_ioerror_alert("xlog_bread", log->l_mp, 123 xfs_ioerror_alert("xlog_bread", log->l_mp,
125 bp, XFS_BUF_ADDR(bp)); 124 bp, XFS_BUF_ADDR(bp));
126 return error; 125 return error;
127 } 126 }
128 127
129 /* 128 /*
130 * Write out the buffer at the given block for the given number of blocks. 129 * Write out the buffer at the given block for the given number of blocks.
131 * The buffer is kept locked across the write and is returned locked. 130 * The buffer is kept locked across the write and is returned locked.
132 * This can only be used for synchronous log writes. 131 * This can only be used for synchronous log writes.
133 */ 132 */
134 STATIC int 133 STATIC int
135 xlog_bwrite( 134 xlog_bwrite(
136 xlog_t *log, 135 xlog_t *log,
137 xfs_daddr_t blk_no, 136 xfs_daddr_t blk_no,
138 int nbblks, 137 int nbblks,
139 xfs_buf_t *bp) 138 xfs_buf_t *bp)
140 { 139 {
141 int error; 140 int error;
142 141
143 if (log->l_sectbb_log) { 142 if (log->l_sectbb_log) {
144 blk_no = XLOG_SECTOR_ROUNDDOWN_BLKNO(log, blk_no); 143 blk_no = XLOG_SECTOR_ROUNDDOWN_BLKNO(log, blk_no);
145 nbblks = XLOG_SECTOR_ROUNDUP_BBCOUNT(log, nbblks); 144 nbblks = XLOG_SECTOR_ROUNDUP_BBCOUNT(log, nbblks);
146 } 145 }
147 146
148 ASSERT(nbblks > 0); 147 ASSERT(nbblks > 0);
149 ASSERT(BBTOB(nbblks) <= XFS_BUF_SIZE(bp)); 148 ASSERT(BBTOB(nbblks) <= XFS_BUF_SIZE(bp));
150 149
151 XFS_BUF_SET_ADDR(bp, log->l_logBBstart + blk_no); 150 XFS_BUF_SET_ADDR(bp, log->l_logBBstart + blk_no);
152 XFS_BUF_ZEROFLAGS(bp); 151 XFS_BUF_ZEROFLAGS(bp);
153 XFS_BUF_BUSY(bp); 152 XFS_BUF_BUSY(bp);
154 XFS_BUF_HOLD(bp); 153 XFS_BUF_HOLD(bp);
155 XFS_BUF_PSEMA(bp, PRIBIO); 154 XFS_BUF_PSEMA(bp, PRIBIO);
156 XFS_BUF_SET_COUNT(bp, BBTOB(nbblks)); 155 XFS_BUF_SET_COUNT(bp, BBTOB(nbblks));
157 XFS_BUF_SET_TARGET(bp, log->l_mp->m_logdev_targp); 156 XFS_BUF_SET_TARGET(bp, log->l_mp->m_logdev_targp);
158 157
159 if ((error = xfs_bwrite(log->l_mp, bp))) 158 if ((error = xfs_bwrite(log->l_mp, bp)))
160 xfs_ioerror_alert("xlog_bwrite", log->l_mp, 159 xfs_ioerror_alert("xlog_bwrite", log->l_mp,
161 bp, XFS_BUF_ADDR(bp)); 160 bp, XFS_BUF_ADDR(bp));
162 return error; 161 return error;
163 } 162 }
164 163
165 STATIC xfs_caddr_t 164 STATIC xfs_caddr_t
166 xlog_align( 165 xlog_align(
167 xlog_t *log, 166 xlog_t *log,
168 xfs_daddr_t blk_no, 167 xfs_daddr_t blk_no,
169 int nbblks, 168 int nbblks,
170 xfs_buf_t *bp) 169 xfs_buf_t *bp)
171 { 170 {
172 xfs_caddr_t ptr; 171 xfs_caddr_t ptr;
173 172
174 if (!log->l_sectbb_log) 173 if (!log->l_sectbb_log)
175 return XFS_BUF_PTR(bp); 174 return XFS_BUF_PTR(bp);
176 175
177 ptr = XFS_BUF_PTR(bp) + BBTOB((int)blk_no & log->l_sectbb_mask); 176 ptr = XFS_BUF_PTR(bp) + BBTOB((int)blk_no & log->l_sectbb_mask);
178 ASSERT(XFS_BUF_SIZE(bp) >= 177 ASSERT(XFS_BUF_SIZE(bp) >=
179 BBTOB(nbblks + (blk_no & log->l_sectbb_mask))); 178 BBTOB(nbblks + (blk_no & log->l_sectbb_mask)));
180 return ptr; 179 return ptr;
181 } 180 }
182 181
183 #ifdef DEBUG 182 #ifdef DEBUG
184 /* 183 /*
185 * dump debug superblock and log record information 184 * dump debug superblock and log record information
186 */ 185 */
187 STATIC void 186 STATIC void
188 xlog_header_check_dump( 187 xlog_header_check_dump(
189 xfs_mount_t *mp, 188 xfs_mount_t *mp,
190 xlog_rec_header_t *head) 189 xlog_rec_header_t *head)
191 { 190 {
192 int b; 191 int b;
193 192
194 cmn_err(CE_DEBUG, "%s: SB : uuid = ", __func__); 193 cmn_err(CE_DEBUG, "%s: SB : uuid = ", __func__);
195 for (b = 0; b < 16; b++) 194 for (b = 0; b < 16; b++)
196 cmn_err(CE_DEBUG, "%02x", ((uchar_t *)&mp->m_sb.sb_uuid)[b]); 195 cmn_err(CE_DEBUG, "%02x", ((uchar_t *)&mp->m_sb.sb_uuid)[b]);
197 cmn_err(CE_DEBUG, ", fmt = %d\n", XLOG_FMT); 196 cmn_err(CE_DEBUG, ", fmt = %d\n", XLOG_FMT);
198 cmn_err(CE_DEBUG, " log : uuid = "); 197 cmn_err(CE_DEBUG, " log : uuid = ");
199 for (b = 0; b < 16; b++) 198 for (b = 0; b < 16; b++)
200 cmn_err(CE_DEBUG, "%02x",((uchar_t *)&head->h_fs_uuid)[b]); 199 cmn_err(CE_DEBUG, "%02x",((uchar_t *)&head->h_fs_uuid)[b]);
201 cmn_err(CE_DEBUG, ", fmt = %d\n", be32_to_cpu(head->h_fmt)); 200 cmn_err(CE_DEBUG, ", fmt = %d\n", be32_to_cpu(head->h_fmt));
202 } 201 }
203 #else 202 #else
204 #define xlog_header_check_dump(mp, head) 203 #define xlog_header_check_dump(mp, head)
205 #endif 204 #endif
206 205
207 /* 206 /*
208 * check log record header for recovery 207 * check log record header for recovery
209 */ 208 */
210 STATIC int 209 STATIC int
211 xlog_header_check_recover( 210 xlog_header_check_recover(
212 xfs_mount_t *mp, 211 xfs_mount_t *mp,
213 xlog_rec_header_t *head) 212 xlog_rec_header_t *head)
214 { 213 {
215 ASSERT(be32_to_cpu(head->h_magicno) == XLOG_HEADER_MAGIC_NUM); 214 ASSERT(be32_to_cpu(head->h_magicno) == XLOG_HEADER_MAGIC_NUM);
216 215
217 /* 216 /*
218 * IRIX doesn't write the h_fmt field and leaves it zeroed 217 * IRIX doesn't write the h_fmt field and leaves it zeroed
219 * (XLOG_FMT_UNKNOWN). This stops us from trying to recover 218 * (XLOG_FMT_UNKNOWN). This stops us from trying to recover
220 * a dirty log created in IRIX. 219 * a dirty log created in IRIX.
221 */ 220 */
222 if (unlikely(be32_to_cpu(head->h_fmt) != XLOG_FMT)) { 221 if (unlikely(be32_to_cpu(head->h_fmt) != XLOG_FMT)) {
223 xlog_warn( 222 xlog_warn(
224 "XFS: dirty log written in incompatible format - can't recover"); 223 "XFS: dirty log written in incompatible format - can't recover");
225 xlog_header_check_dump(mp, head); 224 xlog_header_check_dump(mp, head);
226 XFS_ERROR_REPORT("xlog_header_check_recover(1)", 225 XFS_ERROR_REPORT("xlog_header_check_recover(1)",
227 XFS_ERRLEVEL_HIGH, mp); 226 XFS_ERRLEVEL_HIGH, mp);
228 return XFS_ERROR(EFSCORRUPTED); 227 return XFS_ERROR(EFSCORRUPTED);
229 } else if (unlikely(!uuid_equal(&mp->m_sb.sb_uuid, &head->h_fs_uuid))) { 228 } else if (unlikely(!uuid_equal(&mp->m_sb.sb_uuid, &head->h_fs_uuid))) {
230 xlog_warn( 229 xlog_warn(
231 "XFS: dirty log entry has mismatched uuid - can't recover"); 230 "XFS: dirty log entry has mismatched uuid - can't recover");
232 xlog_header_check_dump(mp, head); 231 xlog_header_check_dump(mp, head);
233 XFS_ERROR_REPORT("xlog_header_check_recover(2)", 232 XFS_ERROR_REPORT("xlog_header_check_recover(2)",
234 XFS_ERRLEVEL_HIGH, mp); 233 XFS_ERRLEVEL_HIGH, mp);
235 return XFS_ERROR(EFSCORRUPTED); 234 return XFS_ERROR(EFSCORRUPTED);
236 } 235 }
237 return 0; 236 return 0;
238 } 237 }
239 238
240 /* 239 /*
241 * read the head block of the log and check the header 240 * read the head block of the log and check the header
242 */ 241 */
243 STATIC int 242 STATIC int
244 xlog_header_check_mount( 243 xlog_header_check_mount(
245 xfs_mount_t *mp, 244 xfs_mount_t *mp,
246 xlog_rec_header_t *head) 245 xlog_rec_header_t *head)
247 { 246 {
248 ASSERT(be32_to_cpu(head->h_magicno) == XLOG_HEADER_MAGIC_NUM); 247 ASSERT(be32_to_cpu(head->h_magicno) == XLOG_HEADER_MAGIC_NUM);
249 248
250 if (uuid_is_nil(&head->h_fs_uuid)) { 249 if (uuid_is_nil(&head->h_fs_uuid)) {
251 /* 250 /*
252 * IRIX doesn't write the h_fs_uuid or h_fmt fields. If 251 * IRIX doesn't write the h_fs_uuid or h_fmt fields. If
253 * h_fs_uuid is nil, we assume this log was last mounted 252 * h_fs_uuid is nil, we assume this log was last mounted
254 * by IRIX and continue. 253 * by IRIX and continue.
255 */ 254 */
256 xlog_warn("XFS: nil uuid in log - IRIX style log"); 255 xlog_warn("XFS: nil uuid in log - IRIX style log");
257 } else if (unlikely(!uuid_equal(&mp->m_sb.sb_uuid, &head->h_fs_uuid))) { 256 } else if (unlikely(!uuid_equal(&mp->m_sb.sb_uuid, &head->h_fs_uuid))) {
258 xlog_warn("XFS: log has mismatched uuid - can't recover"); 257 xlog_warn("XFS: log has mismatched uuid - can't recover");
259 xlog_header_check_dump(mp, head); 258 xlog_header_check_dump(mp, head);
260 XFS_ERROR_REPORT("xlog_header_check_mount", 259 XFS_ERROR_REPORT("xlog_header_check_mount",
261 XFS_ERRLEVEL_HIGH, mp); 260 XFS_ERRLEVEL_HIGH, mp);
262 return XFS_ERROR(EFSCORRUPTED); 261 return XFS_ERROR(EFSCORRUPTED);
263 } 262 }
264 return 0; 263 return 0;
265 } 264 }
266 265
267 STATIC void 266 STATIC void
268 xlog_recover_iodone( 267 xlog_recover_iodone(
269 struct xfs_buf *bp) 268 struct xfs_buf *bp)
270 { 269 {
271 xfs_mount_t *mp; 270 xfs_mount_t *mp;
272 271
273 ASSERT(XFS_BUF_FSPRIVATE(bp, void *)); 272 ASSERT(XFS_BUF_FSPRIVATE(bp, void *));
274 273
275 if (XFS_BUF_GETERROR(bp)) { 274 if (XFS_BUF_GETERROR(bp)) {
276 /* 275 /*
277 * We're not going to bother about retrying 276 * We're not going to bother about retrying
278 * this during recovery. One strike! 277 * this during recovery. One strike!
279 */ 278 */
280 mp = XFS_BUF_FSPRIVATE(bp, xfs_mount_t *); 279 mp = XFS_BUF_FSPRIVATE(bp, xfs_mount_t *);
281 xfs_ioerror_alert("xlog_recover_iodone", 280 xfs_ioerror_alert("xlog_recover_iodone",
282 mp, bp, XFS_BUF_ADDR(bp)); 281 mp, bp, XFS_BUF_ADDR(bp));
283 xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR); 282 xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR);
284 } 283 }
285 XFS_BUF_SET_FSPRIVATE(bp, NULL); 284 XFS_BUF_SET_FSPRIVATE(bp, NULL);
286 XFS_BUF_CLR_IODONE_FUNC(bp); 285 XFS_BUF_CLR_IODONE_FUNC(bp);
287 xfs_biodone(bp); 286 xfs_biodone(bp);
288 } 287 }
289 288
290 /* 289 /*
291 * This routine finds (to an approximation) the first block in the physical 290 * This routine finds (to an approximation) the first block in the physical
292 * log which contains the given cycle. It uses a binary search algorithm. 291 * log which contains the given cycle. It uses a binary search algorithm.
293 * Note that the algorithm can not be perfect because the disk will not 292 * Note that the algorithm can not be perfect because the disk will not
294 * necessarily be perfect. 293 * necessarily be perfect.
295 */ 294 */
296 STATIC int 295 STATIC int
297 xlog_find_cycle_start( 296 xlog_find_cycle_start(
298 xlog_t *log, 297 xlog_t *log,
299 xfs_buf_t *bp, 298 xfs_buf_t *bp,
300 xfs_daddr_t first_blk, 299 xfs_daddr_t first_blk,
301 xfs_daddr_t *last_blk, 300 xfs_daddr_t *last_blk,
302 uint cycle) 301 uint cycle)
303 { 302 {
304 xfs_caddr_t offset; 303 xfs_caddr_t offset;
305 xfs_daddr_t mid_blk; 304 xfs_daddr_t mid_blk;
306 uint mid_cycle; 305 uint mid_cycle;
307 int error; 306 int error;
308 307
309 mid_blk = BLK_AVG(first_blk, *last_blk); 308 mid_blk = BLK_AVG(first_blk, *last_blk);
310 while (mid_blk != first_blk && mid_blk != *last_blk) { 309 while (mid_blk != first_blk && mid_blk != *last_blk) {
311 if ((error = xlog_bread(log, mid_blk, 1, bp))) 310 if ((error = xlog_bread(log, mid_blk, 1, bp)))
312 return error; 311 return error;
313 offset = xlog_align(log, mid_blk, 1, bp); 312 offset = xlog_align(log, mid_blk, 1, bp);
314 mid_cycle = xlog_get_cycle(offset); 313 mid_cycle = xlog_get_cycle(offset);
315 if (mid_cycle == cycle) { 314 if (mid_cycle == cycle) {
316 *last_blk = mid_blk; 315 *last_blk = mid_blk;
317 /* last_half_cycle == mid_cycle */ 316 /* last_half_cycle == mid_cycle */
318 } else { 317 } else {
319 first_blk = mid_blk; 318 first_blk = mid_blk;
320 /* first_half_cycle == mid_cycle */ 319 /* first_half_cycle == mid_cycle */
321 } 320 }
322 mid_blk = BLK_AVG(first_blk, *last_blk); 321 mid_blk = BLK_AVG(first_blk, *last_blk);
323 } 322 }
324 ASSERT((mid_blk == first_blk && mid_blk+1 == *last_blk) || 323 ASSERT((mid_blk == first_blk && mid_blk+1 == *last_blk) ||
325 (mid_blk == *last_blk && mid_blk-1 == first_blk)); 324 (mid_blk == *last_blk && mid_blk-1 == first_blk));
326 325
327 return 0; 326 return 0;
328 } 327 }
329 328
330 /* 329 /*
331 * Check that the range of blocks does not contain the cycle number 330 * Check that the range of blocks does not contain the cycle number
332 * given. The scan needs to occur from front to back and the ptr into the 331 * given. The scan needs to occur from front to back and the ptr into the
333 * region must be updated since a later routine will need to perform another 332 * region must be updated since a later routine will need to perform another
334 * test. If the region is completely good, we end up returning the same 333 * test. If the region is completely good, we end up returning the same
335 * last block number. 334 * last block number.
336 * 335 *
337 * Set blkno to -1 if we encounter no errors. This is an invalid block number 336 * Set blkno to -1 if we encounter no errors. This is an invalid block number
338 * since we don't ever expect logs to get this large. 337 * since we don't ever expect logs to get this large.
339 */ 338 */
340 STATIC int 339 STATIC int
341 xlog_find_verify_cycle( 340 xlog_find_verify_cycle(
342 xlog_t *log, 341 xlog_t *log,
343 xfs_daddr_t start_blk, 342 xfs_daddr_t start_blk,
344 int nbblks, 343 int nbblks,
345 uint stop_on_cycle_no, 344 uint stop_on_cycle_no,
346 xfs_daddr_t *new_blk) 345 xfs_daddr_t *new_blk)
347 { 346 {
348 xfs_daddr_t i, j; 347 xfs_daddr_t i, j;
349 uint cycle; 348 uint cycle;
350 xfs_buf_t *bp; 349 xfs_buf_t *bp;
351 xfs_daddr_t bufblks; 350 xfs_daddr_t bufblks;
352 xfs_caddr_t buf = NULL; 351 xfs_caddr_t buf = NULL;
353 int error = 0; 352 int error = 0;
354 353
355 bufblks = 1 << ffs(nbblks); 354 bufblks = 1 << ffs(nbblks);
356 355
357 while (!(bp = xlog_get_bp(log, bufblks))) { 356 while (!(bp = xlog_get_bp(log, bufblks))) {
358 /* can't get enough memory to do everything in one big buffer */ 357 /* can't get enough memory to do everything in one big buffer */
359 bufblks >>= 1; 358 bufblks >>= 1;
360 if (bufblks <= log->l_sectbb_log) 359 if (bufblks <= log->l_sectbb_log)
361 return ENOMEM; 360 return ENOMEM;
362 } 361 }
363 362
364 for (i = start_blk; i < start_blk + nbblks; i += bufblks) { 363 for (i = start_blk; i < start_blk + nbblks; i += bufblks) {
365 int bcount; 364 int bcount;
366 365
367 bcount = min(bufblks, (start_blk + nbblks - i)); 366 bcount = min(bufblks, (start_blk + nbblks - i));
368 367
369 if ((error = xlog_bread(log, i, bcount, bp))) 368 if ((error = xlog_bread(log, i, bcount, bp)))
370 goto out; 369 goto out;
371 370
372 buf = xlog_align(log, i, bcount, bp); 371 buf = xlog_align(log, i, bcount, bp);
373 for (j = 0; j < bcount; j++) { 372 for (j = 0; j < bcount; j++) {
374 cycle = xlog_get_cycle(buf); 373 cycle = xlog_get_cycle(buf);
375 if (cycle == stop_on_cycle_no) { 374 if (cycle == stop_on_cycle_no) {
376 *new_blk = i+j; 375 *new_blk = i+j;
377 goto out; 376 goto out;
378 } 377 }
379 378
380 buf += BBSIZE; 379 buf += BBSIZE;
381 } 380 }
382 } 381 }
383 382
384 *new_blk = -1; 383 *new_blk = -1;
385 384
386 out: 385 out:
387 xlog_put_bp(bp); 386 xlog_put_bp(bp);
388 return error; 387 return error;
389 } 388 }
390 389
391 /* 390 /*
392 * Potentially backup over partial log record write. 391 * Potentially backup over partial log record write.
393 * 392 *
394 * In the typical case, last_blk is the number of the block directly after 393 * In the typical case, last_blk is the number of the block directly after
395 * a good log record. Therefore, we subtract one to get the block number 394 * a good log record. Therefore, we subtract one to get the block number
396 * of the last block in the given buffer. extra_bblks contains the number 395 * of the last block in the given buffer. extra_bblks contains the number
397 * of blocks we would have read on a previous read. This happens when the 396 * of blocks we would have read on a previous read. This happens when the
398 * last log record is split over the end of the physical log. 397 * last log record is split over the end of the physical log.
399 * 398 *
400 * extra_bblks is the number of blocks potentially verified on a previous 399 * extra_bblks is the number of blocks potentially verified on a previous
401 * call to this routine. 400 * call to this routine.
402 */ 401 */
403 STATIC int 402 STATIC int
404 xlog_find_verify_log_record( 403 xlog_find_verify_log_record(
405 xlog_t *log, 404 xlog_t *log,
406 xfs_daddr_t start_blk, 405 xfs_daddr_t start_blk,
407 xfs_daddr_t *last_blk, 406 xfs_daddr_t *last_blk,
408 int extra_bblks) 407 int extra_bblks)
409 { 408 {
410 xfs_daddr_t i; 409 xfs_daddr_t i;
411 xfs_buf_t *bp; 410 xfs_buf_t *bp;
412 xfs_caddr_t offset = NULL; 411 xfs_caddr_t offset = NULL;
413 xlog_rec_header_t *head = NULL; 412 xlog_rec_header_t *head = NULL;
414 int error = 0; 413 int error = 0;
415 int smallmem = 0; 414 int smallmem = 0;
416 int num_blks = *last_blk - start_blk; 415 int num_blks = *last_blk - start_blk;
417 int xhdrs; 416 int xhdrs;
418 417
419 ASSERT(start_blk != 0 || *last_blk != start_blk); 418 ASSERT(start_blk != 0 || *last_blk != start_blk);
420 419
421 if (!(bp = xlog_get_bp(log, num_blks))) { 420 if (!(bp = xlog_get_bp(log, num_blks))) {
422 if (!(bp = xlog_get_bp(log, 1))) 421 if (!(bp = xlog_get_bp(log, 1)))
423 return ENOMEM; 422 return ENOMEM;
424 smallmem = 1; 423 smallmem = 1;
425 } else { 424 } else {
426 if ((error = xlog_bread(log, start_blk, num_blks, bp))) 425 if ((error = xlog_bread(log, start_blk, num_blks, bp)))
427 goto out; 426 goto out;
428 offset = xlog_align(log, start_blk, num_blks, bp); 427 offset = xlog_align(log, start_blk, num_blks, bp);
429 offset += ((num_blks - 1) << BBSHIFT); 428 offset += ((num_blks - 1) << BBSHIFT);
430 } 429 }
431 430
432 for (i = (*last_blk) - 1; i >= 0; i--) { 431 for (i = (*last_blk) - 1; i >= 0; i--) {
433 if (i < start_blk) { 432 if (i < start_blk) {
434 /* valid log record not found */ 433 /* valid log record not found */
435 xlog_warn( 434 xlog_warn(
436 "XFS: Log inconsistent (didn't find previous header)"); 435 "XFS: Log inconsistent (didn't find previous header)");
437 ASSERT(0); 436 ASSERT(0);
438 error = XFS_ERROR(EIO); 437 error = XFS_ERROR(EIO);
439 goto out; 438 goto out;
440 } 439 }
441 440
442 if (smallmem) { 441 if (smallmem) {
443 if ((error = xlog_bread(log, i, 1, bp))) 442 if ((error = xlog_bread(log, i, 1, bp)))
444 goto out; 443 goto out;
445 offset = xlog_align(log, i, 1, bp); 444 offset = xlog_align(log, i, 1, bp);
446 } 445 }
447 446
448 head = (xlog_rec_header_t *)offset; 447 head = (xlog_rec_header_t *)offset;
449 448
450 if (XLOG_HEADER_MAGIC_NUM == be32_to_cpu(head->h_magicno)) 449 if (XLOG_HEADER_MAGIC_NUM == be32_to_cpu(head->h_magicno))
451 break; 450 break;
452 451
453 if (!smallmem) 452 if (!smallmem)
454 offset -= BBSIZE; 453 offset -= BBSIZE;
455 } 454 }
456 455
457 /* 456 /*
458 * We hit the beginning of the physical log & still no header. Return 457 * We hit the beginning of the physical log & still no header. Return
459 * to caller. If caller can handle a return of -1, then this routine 458 * to caller. If caller can handle a return of -1, then this routine
460 * will be called again for the end of the physical log. 459 * will be called again for the end of the physical log.
461 */ 460 */
462 if (i == -1) { 461 if (i == -1) {
463 error = -1; 462 error = -1;
464 goto out; 463 goto out;
465 } 464 }
466 465
467 /* 466 /*
468 * We have the final block of the good log (the first block 467 * We have the final block of the good log (the first block
469 * of the log record _before_ the head. So we check the uuid. 468 * of the log record _before_ the head. So we check the uuid.
470 */ 469 */
471 if ((error = xlog_header_check_mount(log->l_mp, head))) 470 if ((error = xlog_header_check_mount(log->l_mp, head)))
472 goto out; 471 goto out;
473 472
474 /* 473 /*
475 * We may have found a log record header before we expected one. 474 * We may have found a log record header before we expected one.
476 * last_blk will be the 1st block # with a given cycle #. We may end 475 * last_blk will be the 1st block # with a given cycle #. We may end
477 * up reading an entire log record. In this case, we don't want to 476 * up reading an entire log record. In this case, we don't want to
478 * reset last_blk. Only when last_blk points in the middle of a log 477 * reset last_blk. Only when last_blk points in the middle of a log
479 * record do we update last_blk. 478 * record do we update last_blk.
480 */ 479 */
481 if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) { 480 if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) {
482 uint h_size = be32_to_cpu(head->h_size); 481 uint h_size = be32_to_cpu(head->h_size);
483 482
484 xhdrs = h_size / XLOG_HEADER_CYCLE_SIZE; 483 xhdrs = h_size / XLOG_HEADER_CYCLE_SIZE;
485 if (h_size % XLOG_HEADER_CYCLE_SIZE) 484 if (h_size % XLOG_HEADER_CYCLE_SIZE)
486 xhdrs++; 485 xhdrs++;
487 } else { 486 } else {
488 xhdrs = 1; 487 xhdrs = 1;
489 } 488 }
490 489
491 if (*last_blk - i + extra_bblks != 490 if (*last_blk - i + extra_bblks !=
492 BTOBB(be32_to_cpu(head->h_len)) + xhdrs) 491 BTOBB(be32_to_cpu(head->h_len)) + xhdrs)
493 *last_blk = i; 492 *last_blk = i;
494 493
495 out: 494 out:
496 xlog_put_bp(bp); 495 xlog_put_bp(bp);
497 return error; 496 return error;
498 } 497 }
499 498
500 /* 499 /*
501 * Head is defined to be the point of the log where the next log write 500 * Head is defined to be the point of the log where the next log write
502 * write could go. This means that incomplete LR writes at the end are 501 * write could go. This means that incomplete LR writes at the end are
503 * eliminated when calculating the head. We aren't guaranteed that previous 502 * eliminated when calculating the head. We aren't guaranteed that previous
504 * LR have complete transactions. We only know that a cycle number of 503 * LR have complete transactions. We only know that a cycle number of
505 * current cycle number -1 won't be present in the log if we start writing 504 * current cycle number -1 won't be present in the log if we start writing
506 * from our current block number. 505 * from our current block number.
507 * 506 *
508 * last_blk contains the block number of the first block with a given 507 * last_blk contains the block number of the first block with a given
509 * cycle number. 508 * cycle number.
510 * 509 *
511 * Return: zero if normal, non-zero if error. 510 * Return: zero if normal, non-zero if error.
512 */ 511 */
513 STATIC int 512 STATIC int
514 xlog_find_head( 513 xlog_find_head(
515 xlog_t *log, 514 xlog_t *log,
516 xfs_daddr_t *return_head_blk) 515 xfs_daddr_t *return_head_blk)
517 { 516 {
518 xfs_buf_t *bp; 517 xfs_buf_t *bp;
519 xfs_caddr_t offset; 518 xfs_caddr_t offset;
520 xfs_daddr_t new_blk, first_blk, start_blk, last_blk, head_blk; 519 xfs_daddr_t new_blk, first_blk, start_blk, last_blk, head_blk;
521 int num_scan_bblks; 520 int num_scan_bblks;
522 uint first_half_cycle, last_half_cycle; 521 uint first_half_cycle, last_half_cycle;
523 uint stop_on_cycle; 522 uint stop_on_cycle;
524 int error, log_bbnum = log->l_logBBsize; 523 int error, log_bbnum = log->l_logBBsize;
525 524
526 /* Is the end of the log device zeroed? */ 525 /* Is the end of the log device zeroed? */
527 if ((error = xlog_find_zeroed(log, &first_blk)) == -1) { 526 if ((error = xlog_find_zeroed(log, &first_blk)) == -1) {
528 *return_head_blk = first_blk; 527 *return_head_blk = first_blk;
529 528
530 /* Is the whole lot zeroed? */ 529 /* Is the whole lot zeroed? */
531 if (!first_blk) { 530 if (!first_blk) {
532 /* Linux XFS shouldn't generate totally zeroed logs - 531 /* Linux XFS shouldn't generate totally zeroed logs -
533 * mkfs etc write a dummy unmount record to a fresh 532 * mkfs etc write a dummy unmount record to a fresh
534 * log so we can store the uuid in there 533 * log so we can store the uuid in there
535 */ 534 */
536 xlog_warn("XFS: totally zeroed log"); 535 xlog_warn("XFS: totally zeroed log");
537 } 536 }
538 537
539 return 0; 538 return 0;
540 } else if (error) { 539 } else if (error) {
541 xlog_warn("XFS: empty log check failed"); 540 xlog_warn("XFS: empty log check failed");
542 return error; 541 return error;
543 } 542 }
544 543
545 first_blk = 0; /* get cycle # of 1st block */ 544 first_blk = 0; /* get cycle # of 1st block */
546 bp = xlog_get_bp(log, 1); 545 bp = xlog_get_bp(log, 1);
547 if (!bp) 546 if (!bp)
548 return ENOMEM; 547 return ENOMEM;
549 if ((error = xlog_bread(log, 0, 1, bp))) 548 if ((error = xlog_bread(log, 0, 1, bp)))
550 goto bp_err; 549 goto bp_err;
551 offset = xlog_align(log, 0, 1, bp); 550 offset = xlog_align(log, 0, 1, bp);
552 first_half_cycle = xlog_get_cycle(offset); 551 first_half_cycle = xlog_get_cycle(offset);
553 552
554 last_blk = head_blk = log_bbnum - 1; /* get cycle # of last block */ 553 last_blk = head_blk = log_bbnum - 1; /* get cycle # of last block */
555 if ((error = xlog_bread(log, last_blk, 1, bp))) 554 if ((error = xlog_bread(log, last_blk, 1, bp)))
556 goto bp_err; 555 goto bp_err;
557 offset = xlog_align(log, last_blk, 1, bp); 556 offset = xlog_align(log, last_blk, 1, bp);
558 last_half_cycle = xlog_get_cycle(offset); 557 last_half_cycle = xlog_get_cycle(offset);
559 ASSERT(last_half_cycle != 0); 558 ASSERT(last_half_cycle != 0);
560 559
561 /* 560 /*
562 * If the 1st half cycle number is equal to the last half cycle number, 561 * If the 1st half cycle number is equal to the last half cycle number,
563 * then the entire log is stamped with the same cycle number. In this 562 * then the entire log is stamped with the same cycle number. In this
564 * case, head_blk can't be set to zero (which makes sense). The below 563 * case, head_blk can't be set to zero (which makes sense). The below
565 * math doesn't work out properly with head_blk equal to zero. Instead, 564 * math doesn't work out properly with head_blk equal to zero. Instead,
566 * we set it to log_bbnum which is an invalid block number, but this 565 * we set it to log_bbnum which is an invalid block number, but this
567 * value makes the math correct. If head_blk doesn't changed through 566 * value makes the math correct. If head_blk doesn't changed through
568 * all the tests below, *head_blk is set to zero at the very end rather 567 * all the tests below, *head_blk is set to zero at the very end rather
569 * than log_bbnum. In a sense, log_bbnum and zero are the same block 568 * than log_bbnum. In a sense, log_bbnum and zero are the same block
570 * in a circular file. 569 * in a circular file.
571 */ 570 */
572 if (first_half_cycle == last_half_cycle) { 571 if (first_half_cycle == last_half_cycle) {
573 /* 572 /*
574 * In this case we believe that the entire log should have 573 * In this case we believe that the entire log should have
575 * cycle number last_half_cycle. We need to scan backwards 574 * cycle number last_half_cycle. We need to scan backwards
576 * from the end verifying that there are no holes still 575 * from the end verifying that there are no holes still
577 * containing last_half_cycle - 1. If we find such a hole, 576 * containing last_half_cycle - 1. If we find such a hole,
578 * then the start of that hole will be the new head. The 577 * then the start of that hole will be the new head. The
579 * simple case looks like 578 * simple case looks like
580 * x | x ... | x - 1 | x 579 * x | x ... | x - 1 | x
581 * Another case that fits this picture would be 580 * Another case that fits this picture would be
582 * x | x + 1 | x ... | x 581 * x | x + 1 | x ... | x
583 * In this case the head really is somewhere at the end of the 582 * In this case the head really is somewhere at the end of the
584 * log, as one of the latest writes at the beginning was 583 * log, as one of the latest writes at the beginning was
585 * incomplete. 584 * incomplete.
586 * One more case is 585 * One more case is
587 * x | x + 1 | x ... | x - 1 | x 586 * x | x + 1 | x ... | x - 1 | x
588 * This is really the combination of the above two cases, and 587 * This is really the combination of the above two cases, and
589 * the head has to end up at the start of the x-1 hole at the 588 * the head has to end up at the start of the x-1 hole at the
590 * end of the log. 589 * end of the log.
591 * 590 *
592 * In the 256k log case, we will read from the beginning to the 591 * In the 256k log case, we will read from the beginning to the
593 * end of the log and search for cycle numbers equal to x-1. 592 * end of the log and search for cycle numbers equal to x-1.
594 * We don't worry about the x+1 blocks that we encounter, 593 * We don't worry about the x+1 blocks that we encounter,
595 * because we know that they cannot be the head since the log 594 * because we know that they cannot be the head since the log
596 * started with x. 595 * started with x.
597 */ 596 */
598 head_blk = log_bbnum; 597 head_blk = log_bbnum;
599 stop_on_cycle = last_half_cycle - 1; 598 stop_on_cycle = last_half_cycle - 1;
600 } else { 599 } else {
601 /* 600 /*
602 * In this case we want to find the first block with cycle 601 * In this case we want to find the first block with cycle
603 * number matching last_half_cycle. We expect the log to be 602 * number matching last_half_cycle. We expect the log to be
604 * some variation on 603 * some variation on
605 * x + 1 ... | x ... 604 * x + 1 ... | x ...
606 * The first block with cycle number x (last_half_cycle) will 605 * The first block with cycle number x (last_half_cycle) will
607 * be where the new head belongs. First we do a binary search 606 * be where the new head belongs. First we do a binary search
608 * for the first occurrence of last_half_cycle. The binary 607 * for the first occurrence of last_half_cycle. The binary
609 * search may not be totally accurate, so then we scan back 608 * search may not be totally accurate, so then we scan back
610 * from there looking for occurrences of last_half_cycle before 609 * from there looking for occurrences of last_half_cycle before
611 * us. If that backwards scan wraps around the beginning of 610 * us. If that backwards scan wraps around the beginning of
612 * the log, then we look for occurrences of last_half_cycle - 1 611 * the log, then we look for occurrences of last_half_cycle - 1
613 * at the end of the log. The cases we're looking for look 612 * at the end of the log. The cases we're looking for look
614 * like 613 * like
615 * x + 1 ... | x | x + 1 | x ... 614 * x + 1 ... | x | x + 1 | x ...
616 * ^ binary search stopped here 615 * ^ binary search stopped here
617 * or 616 * or
618 * x + 1 ... | x ... | x - 1 | x 617 * x + 1 ... | x ... | x - 1 | x
619 * <---------> less than scan distance 618 * <---------> less than scan distance
620 */ 619 */
621 stop_on_cycle = last_half_cycle; 620 stop_on_cycle = last_half_cycle;
622 if ((error = xlog_find_cycle_start(log, bp, first_blk, 621 if ((error = xlog_find_cycle_start(log, bp, first_blk,
623 &head_blk, last_half_cycle))) 622 &head_blk, last_half_cycle)))
624 goto bp_err; 623 goto bp_err;
625 } 624 }
626 625
627 /* 626 /*
628 * Now validate the answer. Scan back some number of maximum possible 627 * Now validate the answer. Scan back some number of maximum possible
629 * blocks and make sure each one has the expected cycle number. The 628 * blocks and make sure each one has the expected cycle number. The
630 * maximum is determined by the total possible amount of buffering 629 * maximum is determined by the total possible amount of buffering
631 * in the in-core log. The following number can be made tighter if 630 * in the in-core log. The following number can be made tighter if
632 * we actually look at the block size of the filesystem. 631 * we actually look at the block size of the filesystem.
633 */ 632 */
634 num_scan_bblks = XLOG_TOTAL_REC_SHIFT(log); 633 num_scan_bblks = XLOG_TOTAL_REC_SHIFT(log);
635 if (head_blk >= num_scan_bblks) { 634 if (head_blk >= num_scan_bblks) {
636 /* 635 /*
637 * We are guaranteed that the entire check can be performed 636 * We are guaranteed that the entire check can be performed
638 * in one buffer. 637 * in one buffer.
639 */ 638 */
640 start_blk = head_blk - num_scan_bblks; 639 start_blk = head_blk - num_scan_bblks;
641 if ((error = xlog_find_verify_cycle(log, 640 if ((error = xlog_find_verify_cycle(log,
642 start_blk, num_scan_bblks, 641 start_blk, num_scan_bblks,
643 stop_on_cycle, &new_blk))) 642 stop_on_cycle, &new_blk)))
644 goto bp_err; 643 goto bp_err;
645 if (new_blk != -1) 644 if (new_blk != -1)
646 head_blk = new_blk; 645 head_blk = new_blk;
647 } else { /* need to read 2 parts of log */ 646 } else { /* need to read 2 parts of log */
648 /* 647 /*
649 * We are going to scan backwards in the log in two parts. 648 * We are going to scan backwards in the log in two parts.
650 * First we scan the physical end of the log. In this part 649 * First we scan the physical end of the log. In this part
651 * of the log, we are looking for blocks with cycle number 650 * of the log, we are looking for blocks with cycle number
652 * last_half_cycle - 1. 651 * last_half_cycle - 1.
653 * If we find one, then we know that the log starts there, as 652 * If we find one, then we know that the log starts there, as
654 * we've found a hole that didn't get written in going around 653 * we've found a hole that didn't get written in going around
655 * the end of the physical log. The simple case for this is 654 * the end of the physical log. The simple case for this is
656 * x + 1 ... | x ... | x - 1 | x 655 * x + 1 ... | x ... | x - 1 | x
657 * <---------> less than scan distance 656 * <---------> less than scan distance
658 * If all of the blocks at the end of the log have cycle number 657 * If all of the blocks at the end of the log have cycle number
659 * last_half_cycle, then we check the blocks at the start of 658 * last_half_cycle, then we check the blocks at the start of
660 * the log looking for occurrences of last_half_cycle. If we 659 * the log looking for occurrences of last_half_cycle. If we
661 * find one, then our current estimate for the location of the 660 * find one, then our current estimate for the location of the
662 * first occurrence of last_half_cycle is wrong and we move 661 * first occurrence of last_half_cycle is wrong and we move
663 * back to the hole we've found. This case looks like 662 * back to the hole we've found. This case looks like
664 * x + 1 ... | x | x + 1 | x ... 663 * x + 1 ... | x | x + 1 | x ...
665 * ^ binary search stopped here 664 * ^ binary search stopped here
666 * Another case we need to handle that only occurs in 256k 665 * Another case we need to handle that only occurs in 256k
667 * logs is 666 * logs is
668 * x + 1 ... | x ... | x+1 | x ... 667 * x + 1 ... | x ... | x+1 | x ...
669 * ^ binary search stops here 668 * ^ binary search stops here
670 * In a 256k log, the scan at the end of the log will see the 669 * In a 256k log, the scan at the end of the log will see the
671 * x + 1 blocks. We need to skip past those since that is 670 * x + 1 blocks. We need to skip past those since that is
672 * certainly not the head of the log. By searching for 671 * certainly not the head of the log. By searching for
673 * last_half_cycle-1 we accomplish that. 672 * last_half_cycle-1 we accomplish that.
674 */ 673 */
675 start_blk = log_bbnum - num_scan_bblks + head_blk; 674 start_blk = log_bbnum - num_scan_bblks + head_blk;
676 ASSERT(head_blk <= INT_MAX && 675 ASSERT(head_blk <= INT_MAX &&
677 (xfs_daddr_t) num_scan_bblks - head_blk >= 0); 676 (xfs_daddr_t) num_scan_bblks - head_blk >= 0);
678 if ((error = xlog_find_verify_cycle(log, start_blk, 677 if ((error = xlog_find_verify_cycle(log, start_blk,
679 num_scan_bblks - (int)head_blk, 678 num_scan_bblks - (int)head_blk,
680 (stop_on_cycle - 1), &new_blk))) 679 (stop_on_cycle - 1), &new_blk)))
681 goto bp_err; 680 goto bp_err;
682 if (new_blk != -1) { 681 if (new_blk != -1) {
683 head_blk = new_blk; 682 head_blk = new_blk;
684 goto bad_blk; 683 goto bad_blk;
685 } 684 }
686 685
687 /* 686 /*
688 * Scan beginning of log now. The last part of the physical 687 * Scan beginning of log now. The last part of the physical
689 * log is good. This scan needs to verify that it doesn't find 688 * log is good. This scan needs to verify that it doesn't find
690 * the last_half_cycle. 689 * the last_half_cycle.
691 */ 690 */
692 start_blk = 0; 691 start_blk = 0;
693 ASSERT(head_blk <= INT_MAX); 692 ASSERT(head_blk <= INT_MAX);
694 if ((error = xlog_find_verify_cycle(log, 693 if ((error = xlog_find_verify_cycle(log,
695 start_blk, (int)head_blk, 694 start_blk, (int)head_blk,
696 stop_on_cycle, &new_blk))) 695 stop_on_cycle, &new_blk)))
697 goto bp_err; 696 goto bp_err;
698 if (new_blk != -1) 697 if (new_blk != -1)
699 head_blk = new_blk; 698 head_blk = new_blk;
700 } 699 }
701 700
702 bad_blk: 701 bad_blk:
703 /* 702 /*
704 * Now we need to make sure head_blk is not pointing to a block in 703 * Now we need to make sure head_blk is not pointing to a block in
705 * the middle of a log record. 704 * the middle of a log record.
706 */ 705 */
707 num_scan_bblks = XLOG_REC_SHIFT(log); 706 num_scan_bblks = XLOG_REC_SHIFT(log);
708 if (head_blk >= num_scan_bblks) { 707 if (head_blk >= num_scan_bblks) {
709 start_blk = head_blk - num_scan_bblks; /* don't read head_blk */ 708 start_blk = head_blk - num_scan_bblks; /* don't read head_blk */
710 709
711 /* start ptr at last block ptr before head_blk */ 710 /* start ptr at last block ptr before head_blk */
712 if ((error = xlog_find_verify_log_record(log, start_blk, 711 if ((error = xlog_find_verify_log_record(log, start_blk,
713 &head_blk, 0)) == -1) { 712 &head_blk, 0)) == -1) {
714 error = XFS_ERROR(EIO); 713 error = XFS_ERROR(EIO);
715 goto bp_err; 714 goto bp_err;
716 } else if (error) 715 } else if (error)
717 goto bp_err; 716 goto bp_err;
718 } else { 717 } else {
719 start_blk = 0; 718 start_blk = 0;
720 ASSERT(head_blk <= INT_MAX); 719 ASSERT(head_blk <= INT_MAX);
721 if ((error = xlog_find_verify_log_record(log, start_blk, 720 if ((error = xlog_find_verify_log_record(log, start_blk,
722 &head_blk, 0)) == -1) { 721 &head_blk, 0)) == -1) {
723 /* We hit the beginning of the log during our search */ 722 /* We hit the beginning of the log during our search */
724 start_blk = log_bbnum - num_scan_bblks + head_blk; 723 start_blk = log_bbnum - num_scan_bblks + head_blk;
725 new_blk = log_bbnum; 724 new_blk = log_bbnum;
726 ASSERT(start_blk <= INT_MAX && 725 ASSERT(start_blk <= INT_MAX &&
727 (xfs_daddr_t) log_bbnum-start_blk >= 0); 726 (xfs_daddr_t) log_bbnum-start_blk >= 0);
728 ASSERT(head_blk <= INT_MAX); 727 ASSERT(head_blk <= INT_MAX);
729 if ((error = xlog_find_verify_log_record(log, 728 if ((error = xlog_find_verify_log_record(log,
730 start_blk, &new_blk, 729 start_blk, &new_blk,
731 (int)head_blk)) == -1) { 730 (int)head_blk)) == -1) {
732 error = XFS_ERROR(EIO); 731 error = XFS_ERROR(EIO);
733 goto bp_err; 732 goto bp_err;
734 } else if (error) 733 } else if (error)
735 goto bp_err; 734 goto bp_err;
736 if (new_blk != log_bbnum) 735 if (new_blk != log_bbnum)
737 head_blk = new_blk; 736 head_blk = new_blk;
738 } else if (error) 737 } else if (error)
739 goto bp_err; 738 goto bp_err;
740 } 739 }
741 740
742 xlog_put_bp(bp); 741 xlog_put_bp(bp);
743 if (head_blk == log_bbnum) 742 if (head_blk == log_bbnum)
744 *return_head_blk = 0; 743 *return_head_blk = 0;
745 else 744 else
746 *return_head_blk = head_blk; 745 *return_head_blk = head_blk;
747 /* 746 /*
748 * When returning here, we have a good block number. Bad block 747 * When returning here, we have a good block number. Bad block
749 * means that during a previous crash, we didn't have a clean break 748 * means that during a previous crash, we didn't have a clean break
750 * from cycle number N to cycle number N-1. In this case, we need 749 * from cycle number N to cycle number N-1. In this case, we need
751 * to find the first block with cycle number N-1. 750 * to find the first block with cycle number N-1.
752 */ 751 */
753 return 0; 752 return 0;
754 753
755 bp_err: 754 bp_err:
756 xlog_put_bp(bp); 755 xlog_put_bp(bp);
757 756
758 if (error) 757 if (error)
759 xlog_warn("XFS: failed to find log head"); 758 xlog_warn("XFS: failed to find log head");
760 return error; 759 return error;
761 } 760 }
762 761
763 /* 762 /*
764 * Find the sync block number or the tail of the log. 763 * Find the sync block number or the tail of the log.
765 * 764 *
766 * This will be the block number of the last record to have its 765 * This will be the block number of the last record to have its
767 * associated buffers synced to disk. Every log record header has 766 * associated buffers synced to disk. Every log record header has
768 * a sync lsn embedded in it. LSNs hold block numbers, so it is easy 767 * a sync lsn embedded in it. LSNs hold block numbers, so it is easy
769 * to get a sync block number. The only concern is to figure out which 768 * to get a sync block number. The only concern is to figure out which
770 * log record header to believe. 769 * log record header to believe.
771 * 770 *
772 * The following algorithm uses the log record header with the largest 771 * The following algorithm uses the log record header with the largest
773 * lsn. The entire log record does not need to be valid. We only care 772 * lsn. The entire log record does not need to be valid. We only care
774 * that the header is valid. 773 * that the header is valid.
775 * 774 *
776 * We could speed up search by using current head_blk buffer, but it is not 775 * We could speed up search by using current head_blk buffer, but it is not
777 * available. 776 * available.
778 */ 777 */
779 int 778 int
780 xlog_find_tail( 779 xlog_find_tail(
781 xlog_t *log, 780 xlog_t *log,
782 xfs_daddr_t *head_blk, 781 xfs_daddr_t *head_blk,
783 xfs_daddr_t *tail_blk) 782 xfs_daddr_t *tail_blk)
784 { 783 {
785 xlog_rec_header_t *rhead; 784 xlog_rec_header_t *rhead;
786 xlog_op_header_t *op_head; 785 xlog_op_header_t *op_head;
787 xfs_caddr_t offset = NULL; 786 xfs_caddr_t offset = NULL;
788 xfs_buf_t *bp; 787 xfs_buf_t *bp;
789 int error, i, found; 788 int error, i, found;
790 xfs_daddr_t umount_data_blk; 789 xfs_daddr_t umount_data_blk;
791 xfs_daddr_t after_umount_blk; 790 xfs_daddr_t after_umount_blk;
792 xfs_lsn_t tail_lsn; 791 xfs_lsn_t tail_lsn;
793 int hblks; 792 int hblks;
794 793
795 found = 0; 794 found = 0;
796 795
797 /* 796 /*
798 * Find previous log record 797 * Find previous log record
799 */ 798 */
800 if ((error = xlog_find_head(log, head_blk))) 799 if ((error = xlog_find_head(log, head_blk)))
801 return error; 800 return error;
802 801
803 bp = xlog_get_bp(log, 1); 802 bp = xlog_get_bp(log, 1);
804 if (!bp) 803 if (!bp)
805 return ENOMEM; 804 return ENOMEM;
806 if (*head_blk == 0) { /* special case */ 805 if (*head_blk == 0) { /* special case */
807 if ((error = xlog_bread(log, 0, 1, bp))) 806 if ((error = xlog_bread(log, 0, 1, bp)))
808 goto bread_err; 807 goto bread_err;
809 offset = xlog_align(log, 0, 1, bp); 808 offset = xlog_align(log, 0, 1, bp);
810 if (xlog_get_cycle(offset) == 0) { 809 if (xlog_get_cycle(offset) == 0) {
811 *tail_blk = 0; 810 *tail_blk = 0;
812 /* leave all other log inited values alone */ 811 /* leave all other log inited values alone */
813 goto exit; 812 goto exit;
814 } 813 }
815 } 814 }
816 815
817 /* 816 /*
818 * Search backwards looking for log record header block 817 * Search backwards looking for log record header block
819 */ 818 */
820 ASSERT(*head_blk < INT_MAX); 819 ASSERT(*head_blk < INT_MAX);
821 for (i = (int)(*head_blk) - 1; i >= 0; i--) { 820 for (i = (int)(*head_blk) - 1; i >= 0; i--) {
822 if ((error = xlog_bread(log, i, 1, bp))) 821 if ((error = xlog_bread(log, i, 1, bp)))
823 goto bread_err; 822 goto bread_err;
824 offset = xlog_align(log, i, 1, bp); 823 offset = xlog_align(log, i, 1, bp);
825 if (XLOG_HEADER_MAGIC_NUM == be32_to_cpu(*(__be32 *)offset)) { 824 if (XLOG_HEADER_MAGIC_NUM == be32_to_cpu(*(__be32 *)offset)) {
826 found = 1; 825 found = 1;
827 break; 826 break;
828 } 827 }
829 } 828 }
830 /* 829 /*
831 * If we haven't found the log record header block, start looking 830 * If we haven't found the log record header block, start looking
832 * again from the end of the physical log. XXXmiken: There should be 831 * again from the end of the physical log. XXXmiken: There should be
833 * a check here to make sure we didn't search more than N blocks in 832 * a check here to make sure we didn't search more than N blocks in
834 * the previous code. 833 * the previous code.
835 */ 834 */
836 if (!found) { 835 if (!found) {
837 for (i = log->l_logBBsize - 1; i >= (int)(*head_blk); i--) { 836 for (i = log->l_logBBsize - 1; i >= (int)(*head_blk); i--) {
838 if ((error = xlog_bread(log, i, 1, bp))) 837 if ((error = xlog_bread(log, i, 1, bp)))
839 goto bread_err; 838 goto bread_err;
840 offset = xlog_align(log, i, 1, bp); 839 offset = xlog_align(log, i, 1, bp);
841 if (XLOG_HEADER_MAGIC_NUM == 840 if (XLOG_HEADER_MAGIC_NUM ==
842 be32_to_cpu(*(__be32 *)offset)) { 841 be32_to_cpu(*(__be32 *)offset)) {
843 found = 2; 842 found = 2;
844 break; 843 break;
845 } 844 }
846 } 845 }
847 } 846 }
848 if (!found) { 847 if (!found) {
849 xlog_warn("XFS: xlog_find_tail: couldn't find sync record"); 848 xlog_warn("XFS: xlog_find_tail: couldn't find sync record");
850 ASSERT(0); 849 ASSERT(0);
851 return XFS_ERROR(EIO); 850 return XFS_ERROR(EIO);
852 } 851 }
853 852
854 /* find blk_no of tail of log */ 853 /* find blk_no of tail of log */
855 rhead = (xlog_rec_header_t *)offset; 854 rhead = (xlog_rec_header_t *)offset;
856 *tail_blk = BLOCK_LSN(be64_to_cpu(rhead->h_tail_lsn)); 855 *tail_blk = BLOCK_LSN(be64_to_cpu(rhead->h_tail_lsn));
857 856
858 /* 857 /*
859 * Reset log values according to the state of the log when we 858 * Reset log values according to the state of the log when we
860 * crashed. In the case where head_blk == 0, we bump curr_cycle 859 * crashed. In the case where head_blk == 0, we bump curr_cycle
861 * one because the next write starts a new cycle rather than 860 * one because the next write starts a new cycle rather than
862 * continuing the cycle of the last good log record. At this 861 * continuing the cycle of the last good log record. At this
863 * point we have guaranteed that all partial log records have been 862 * point we have guaranteed that all partial log records have been
864 * accounted for. Therefore, we know that the last good log record 863 * accounted for. Therefore, we know that the last good log record
865 * written was complete and ended exactly on the end boundary 864 * written was complete and ended exactly on the end boundary
866 * of the physical log. 865 * of the physical log.
867 */ 866 */
868 log->l_prev_block = i; 867 log->l_prev_block = i;
869 log->l_curr_block = (int)*head_blk; 868 log->l_curr_block = (int)*head_blk;
870 log->l_curr_cycle = be32_to_cpu(rhead->h_cycle); 869 log->l_curr_cycle = be32_to_cpu(rhead->h_cycle);
871 if (found == 2) 870 if (found == 2)
872 log->l_curr_cycle++; 871 log->l_curr_cycle++;
873 log->l_tail_lsn = be64_to_cpu(rhead->h_tail_lsn); 872 log->l_tail_lsn = be64_to_cpu(rhead->h_tail_lsn);
874 log->l_last_sync_lsn = be64_to_cpu(rhead->h_lsn); 873 log->l_last_sync_lsn = be64_to_cpu(rhead->h_lsn);
875 log->l_grant_reserve_cycle = log->l_curr_cycle; 874 log->l_grant_reserve_cycle = log->l_curr_cycle;
876 log->l_grant_reserve_bytes = BBTOB(log->l_curr_block); 875 log->l_grant_reserve_bytes = BBTOB(log->l_curr_block);
877 log->l_grant_write_cycle = log->l_curr_cycle; 876 log->l_grant_write_cycle = log->l_curr_cycle;
878 log->l_grant_write_bytes = BBTOB(log->l_curr_block); 877 log->l_grant_write_bytes = BBTOB(log->l_curr_block);
879 878
880 /* 879 /*
881 * Look for unmount record. If we find it, then we know there 880 * Look for unmount record. If we find it, then we know there
882 * was a clean unmount. Since 'i' could be the last block in 881 * was a clean unmount. Since 'i' could be the last block in
883 * the physical log, we convert to a log block before comparing 882 * the physical log, we convert to a log block before comparing
884 * to the head_blk. 883 * to the head_blk.
885 * 884 *
886 * Save the current tail lsn to use to pass to 885 * Save the current tail lsn to use to pass to
887 * xlog_clear_stale_blocks() below. We won't want to clear the 886 * xlog_clear_stale_blocks() below. We won't want to clear the
888 * unmount record if there is one, so we pass the lsn of the 887 * unmount record if there is one, so we pass the lsn of the
889 * unmount record rather than the block after it. 888 * unmount record rather than the block after it.
890 */ 889 */
891 if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) { 890 if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) {
892 int h_size = be32_to_cpu(rhead->h_size); 891 int h_size = be32_to_cpu(rhead->h_size);
893 int h_version = be32_to_cpu(rhead->h_version); 892 int h_version = be32_to_cpu(rhead->h_version);
894 893
895 if ((h_version & XLOG_VERSION_2) && 894 if ((h_version & XLOG_VERSION_2) &&
896 (h_size > XLOG_HEADER_CYCLE_SIZE)) { 895 (h_size > XLOG_HEADER_CYCLE_SIZE)) {
897 hblks = h_size / XLOG_HEADER_CYCLE_SIZE; 896 hblks = h_size / XLOG_HEADER_CYCLE_SIZE;
898 if (h_size % XLOG_HEADER_CYCLE_SIZE) 897 if (h_size % XLOG_HEADER_CYCLE_SIZE)
899 hblks++; 898 hblks++;
900 } else { 899 } else {
901 hblks = 1; 900 hblks = 1;
902 } 901 }
903 } else { 902 } else {
904 hblks = 1; 903 hblks = 1;
905 } 904 }
906 after_umount_blk = (i + hblks + (int) 905 after_umount_blk = (i + hblks + (int)
907 BTOBB(be32_to_cpu(rhead->h_len))) % log->l_logBBsize; 906 BTOBB(be32_to_cpu(rhead->h_len))) % log->l_logBBsize;
908 tail_lsn = log->l_tail_lsn; 907 tail_lsn = log->l_tail_lsn;
909 if (*head_blk == after_umount_blk && 908 if (*head_blk == after_umount_blk &&
910 be32_to_cpu(rhead->h_num_logops) == 1) { 909 be32_to_cpu(rhead->h_num_logops) == 1) {
911 umount_data_blk = (i + hblks) % log->l_logBBsize; 910 umount_data_blk = (i + hblks) % log->l_logBBsize;
912 if ((error = xlog_bread(log, umount_data_blk, 1, bp))) { 911 if ((error = xlog_bread(log, umount_data_blk, 1, bp))) {
913 goto bread_err; 912 goto bread_err;
914 } 913 }
915 offset = xlog_align(log, umount_data_blk, 1, bp); 914 offset = xlog_align(log, umount_data_blk, 1, bp);
916 op_head = (xlog_op_header_t *)offset; 915 op_head = (xlog_op_header_t *)offset;
917 if (op_head->oh_flags & XLOG_UNMOUNT_TRANS) { 916 if (op_head->oh_flags & XLOG_UNMOUNT_TRANS) {
918 /* 917 /*
919 * Set tail and last sync so that newly written 918 * Set tail and last sync so that newly written
920 * log records will point recovery to after the 919 * log records will point recovery to after the
921 * current unmount record. 920 * current unmount record.
922 */ 921 */
923 log->l_tail_lsn = 922 log->l_tail_lsn =
924 xlog_assign_lsn(log->l_curr_cycle, 923 xlog_assign_lsn(log->l_curr_cycle,
925 after_umount_blk); 924 after_umount_blk);
926 log->l_last_sync_lsn = 925 log->l_last_sync_lsn =
927 xlog_assign_lsn(log->l_curr_cycle, 926 xlog_assign_lsn(log->l_curr_cycle,
928 after_umount_blk); 927 after_umount_blk);
929 *tail_blk = after_umount_blk; 928 *tail_blk = after_umount_blk;
930 929
931 /* 930 /*
932 * Note that the unmount was clean. If the unmount 931 * Note that the unmount was clean. If the unmount
933 * was not clean, we need to know this to rebuild the 932 * was not clean, we need to know this to rebuild the
934 * superblock counters from the perag headers if we 933 * superblock counters from the perag headers if we
935 * have a filesystem using non-persistent counters. 934 * have a filesystem using non-persistent counters.
936 */ 935 */
937 log->l_mp->m_flags |= XFS_MOUNT_WAS_CLEAN; 936 log->l_mp->m_flags |= XFS_MOUNT_WAS_CLEAN;
938 } 937 }
939 } 938 }
940 939
941 /* 940 /*
942 * Make sure that there are no blocks in front of the head 941 * Make sure that there are no blocks in front of the head
943 * with the same cycle number as the head. This can happen 942 * with the same cycle number as the head. This can happen
944 * because we allow multiple outstanding log writes concurrently, 943 * because we allow multiple outstanding log writes concurrently,
945 * and the later writes might make it out before earlier ones. 944 * and the later writes might make it out before earlier ones.
946 * 945 *
947 * We use the lsn from before modifying it so that we'll never 946 * We use the lsn from before modifying it so that we'll never
948 * overwrite the unmount record after a clean unmount. 947 * overwrite the unmount record after a clean unmount.
949 * 948 *
950 * Do this only if we are going to recover the filesystem 949 * Do this only if we are going to recover the filesystem
951 * 950 *
952 * NOTE: This used to say "if (!readonly)" 951 * NOTE: This used to say "if (!readonly)"
953 * However on Linux, we can & do recover a read-only filesystem. 952 * However on Linux, we can & do recover a read-only filesystem.
954 * We only skip recovery if NORECOVERY is specified on mount, 953 * We only skip recovery if NORECOVERY is specified on mount,
955 * in which case we would not be here. 954 * in which case we would not be here.
956 * 955 *
957 * But... if the -device- itself is readonly, just skip this. 956 * But... if the -device- itself is readonly, just skip this.
958 * We can't recover this device anyway, so it won't matter. 957 * We can't recover this device anyway, so it won't matter.
959 */ 958 */
960 if (!xfs_readonly_buftarg(log->l_mp->m_logdev_targp)) { 959 if (!xfs_readonly_buftarg(log->l_mp->m_logdev_targp)) {
961 error = xlog_clear_stale_blocks(log, tail_lsn); 960 error = xlog_clear_stale_blocks(log, tail_lsn);
962 } 961 }
963 962
964 bread_err: 963 bread_err:
965 exit: 964 exit:
966 xlog_put_bp(bp); 965 xlog_put_bp(bp);
967 966
968 if (error) 967 if (error)
969 xlog_warn("XFS: failed to locate log tail"); 968 xlog_warn("XFS: failed to locate log tail");
970 return error; 969 return error;
971 } 970 }
972 971
973 /* 972 /*
974 * Is the log zeroed at all? 973 * Is the log zeroed at all?
975 * 974 *
976 * The last binary search should be changed to perform an X block read 975 * The last binary search should be changed to perform an X block read
977 * once X becomes small enough. You can then search linearly through 976 * once X becomes small enough. You can then search linearly through
978 * the X blocks. This will cut down on the number of reads we need to do. 977 * the X blocks. This will cut down on the number of reads we need to do.
979 * 978 *
980 * If the log is partially zeroed, this routine will pass back the blkno 979 * If the log is partially zeroed, this routine will pass back the blkno
981 * of the first block with cycle number 0. It won't have a complete LR 980 * of the first block with cycle number 0. It won't have a complete LR
982 * preceding it. 981 * preceding it.
983 * 982 *
984 * Return: 983 * Return:
985 * 0 => the log is completely written to 984 * 0 => the log is completely written to
986 * -1 => use *blk_no as the first block of the log 985 * -1 => use *blk_no as the first block of the log
987 * >0 => error has occurred 986 * >0 => error has occurred
988 */ 987 */
989 STATIC int 988 STATIC int
990 xlog_find_zeroed( 989 xlog_find_zeroed(
991 xlog_t *log, 990 xlog_t *log,
992 xfs_daddr_t *blk_no) 991 xfs_daddr_t *blk_no)
993 { 992 {
994 xfs_buf_t *bp; 993 xfs_buf_t *bp;
995 xfs_caddr_t offset; 994 xfs_caddr_t offset;
996 uint first_cycle, last_cycle; 995 uint first_cycle, last_cycle;
997 xfs_daddr_t new_blk, last_blk, start_blk; 996 xfs_daddr_t new_blk, last_blk, start_blk;
998 xfs_daddr_t num_scan_bblks; 997 xfs_daddr_t num_scan_bblks;
999 int error, log_bbnum = log->l_logBBsize; 998 int error, log_bbnum = log->l_logBBsize;
1000 999
1001 *blk_no = 0; 1000 *blk_no = 0;
1002 1001
1003 /* check totally zeroed log */ 1002 /* check totally zeroed log */
1004 bp = xlog_get_bp(log, 1); 1003 bp = xlog_get_bp(log, 1);
1005 if (!bp) 1004 if (!bp)
1006 return ENOMEM; 1005 return ENOMEM;
1007 if ((error = xlog_bread(log, 0, 1, bp))) 1006 if ((error = xlog_bread(log, 0, 1, bp)))
1008 goto bp_err; 1007 goto bp_err;
1009 offset = xlog_align(log, 0, 1, bp); 1008 offset = xlog_align(log, 0, 1, bp);
1010 first_cycle = xlog_get_cycle(offset); 1009 first_cycle = xlog_get_cycle(offset);
1011 if (first_cycle == 0) { /* completely zeroed log */ 1010 if (first_cycle == 0) { /* completely zeroed log */
1012 *blk_no = 0; 1011 *blk_no = 0;
1013 xlog_put_bp(bp); 1012 xlog_put_bp(bp);
1014 return -1; 1013 return -1;
1015 } 1014 }
1016 1015
1017 /* check partially zeroed log */ 1016 /* check partially zeroed log */
1018 if ((error = xlog_bread(log, log_bbnum-1, 1, bp))) 1017 if ((error = xlog_bread(log, log_bbnum-1, 1, bp)))
1019 goto bp_err; 1018 goto bp_err;
1020 offset = xlog_align(log, log_bbnum-1, 1, bp); 1019 offset = xlog_align(log, log_bbnum-1, 1, bp);
1021 last_cycle = xlog_get_cycle(offset); 1020 last_cycle = xlog_get_cycle(offset);
1022 if (last_cycle != 0) { /* log completely written to */ 1021 if (last_cycle != 0) { /* log completely written to */
1023 xlog_put_bp(bp); 1022 xlog_put_bp(bp);
1024 return 0; 1023 return 0;
1025 } else if (first_cycle != 1) { 1024 } else if (first_cycle != 1) {
1026 /* 1025 /*
1027 * If the cycle of the last block is zero, the cycle of 1026 * If the cycle of the last block is zero, the cycle of
1028 * the first block must be 1. If it's not, maybe we're 1027 * the first block must be 1. If it's not, maybe we're
1029 * not looking at a log... Bail out. 1028 * not looking at a log... Bail out.
1030 */ 1029 */
1031 xlog_warn("XFS: Log inconsistent or not a log (last==0, first!=1)"); 1030 xlog_warn("XFS: Log inconsistent or not a log (last==0, first!=1)");
1032 return XFS_ERROR(EINVAL); 1031 return XFS_ERROR(EINVAL);
1033 } 1032 }
1034 1033
1035 /* we have a partially zeroed log */ 1034 /* we have a partially zeroed log */
1036 last_blk = log_bbnum-1; 1035 last_blk = log_bbnum-1;
1037 if ((error = xlog_find_cycle_start(log, bp, 0, &last_blk, 0))) 1036 if ((error = xlog_find_cycle_start(log, bp, 0, &last_blk, 0)))
1038 goto bp_err; 1037 goto bp_err;
1039 1038
1040 /* 1039 /*
1041 * Validate the answer. Because there is no way to guarantee that 1040 * Validate the answer. Because there is no way to guarantee that
1042 * the entire log is made up of log records which are the same size, 1041 * the entire log is made up of log records which are the same size,
1043 * we scan over the defined maximum blocks. At this point, the maximum 1042 * we scan over the defined maximum blocks. At this point, the maximum
1044 * is not chosen to mean anything special. XXXmiken 1043 * is not chosen to mean anything special. XXXmiken
1045 */ 1044 */
1046 num_scan_bblks = XLOG_TOTAL_REC_SHIFT(log); 1045 num_scan_bblks = XLOG_TOTAL_REC_SHIFT(log);
1047 ASSERT(num_scan_bblks <= INT_MAX); 1046 ASSERT(num_scan_bblks <= INT_MAX);
1048 1047
1049 if (last_blk < num_scan_bblks) 1048 if (last_blk < num_scan_bblks)
1050 num_scan_bblks = last_blk; 1049 num_scan_bblks = last_blk;
1051 start_blk = last_blk - num_scan_bblks; 1050 start_blk = last_blk - num_scan_bblks;
1052 1051
1053 /* 1052 /*
1054 * We search for any instances of cycle number 0 that occur before 1053 * We search for any instances of cycle number 0 that occur before
1055 * our current estimate of the head. What we're trying to detect is 1054 * our current estimate of the head. What we're trying to detect is
1056 * 1 ... | 0 | 1 | 0... 1055 * 1 ... | 0 | 1 | 0...
1057 * ^ binary search ends here 1056 * ^ binary search ends here
1058 */ 1057 */
1059 if ((error = xlog_find_verify_cycle(log, start_blk, 1058 if ((error = xlog_find_verify_cycle(log, start_blk,
1060 (int)num_scan_bblks, 0, &new_blk))) 1059 (int)num_scan_bblks, 0, &new_blk)))
1061 goto bp_err; 1060 goto bp_err;
1062 if (new_blk != -1) 1061 if (new_blk != -1)
1063 last_blk = new_blk; 1062 last_blk = new_blk;
1064 1063
1065 /* 1064 /*
1066 * Potentially backup over partial log record write. We don't need 1065 * Potentially backup over partial log record write. We don't need
1067 * to search the end of the log because we know it is zero. 1066 * to search the end of the log because we know it is zero.
1068 */ 1067 */
1069 if ((error = xlog_find_verify_log_record(log, start_blk, 1068 if ((error = xlog_find_verify_log_record(log, start_blk,
1070 &last_blk, 0)) == -1) { 1069 &last_blk, 0)) == -1) {
1071 error = XFS_ERROR(EIO); 1070 error = XFS_ERROR(EIO);
1072 goto bp_err; 1071 goto bp_err;
1073 } else if (error) 1072 } else if (error)
1074 goto bp_err; 1073 goto bp_err;
1075 1074
1076 *blk_no = last_blk; 1075 *blk_no = last_blk;
1077 bp_err: 1076 bp_err:
1078 xlog_put_bp(bp); 1077 xlog_put_bp(bp);
1079 if (error) 1078 if (error)
1080 return error; 1079 return error;
1081 return -1; 1080 return -1;
1082 } 1081 }
1083 1082
1084 /* 1083 /*
1085 * These are simple subroutines used by xlog_clear_stale_blocks() below 1084 * These are simple subroutines used by xlog_clear_stale_blocks() below
1086 * to initialize a buffer full of empty log record headers and write 1085 * to initialize a buffer full of empty log record headers and write
1087 * them into the log. 1086 * them into the log.
1088 */ 1087 */
1089 STATIC void 1088 STATIC void
1090 xlog_add_record( 1089 xlog_add_record(
1091 xlog_t *log, 1090 xlog_t *log,
1092 xfs_caddr_t buf, 1091 xfs_caddr_t buf,
1093 int cycle, 1092 int cycle,
1094 int block, 1093 int block,
1095 int tail_cycle, 1094 int tail_cycle,
1096 int tail_block) 1095 int tail_block)
1097 { 1096 {
1098 xlog_rec_header_t *recp = (xlog_rec_header_t *)buf; 1097 xlog_rec_header_t *recp = (xlog_rec_header_t *)buf;
1099 1098
1100 memset(buf, 0, BBSIZE); 1099 memset(buf, 0, BBSIZE);
1101 recp->h_magicno = cpu_to_be32(XLOG_HEADER_MAGIC_NUM); 1100 recp->h_magicno = cpu_to_be32(XLOG_HEADER_MAGIC_NUM);
1102 recp->h_cycle = cpu_to_be32(cycle); 1101 recp->h_cycle = cpu_to_be32(cycle);
1103 recp->h_version = cpu_to_be32( 1102 recp->h_version = cpu_to_be32(
1104 xfs_sb_version_haslogv2(&log->l_mp->m_sb) ? 2 : 1); 1103 xfs_sb_version_haslogv2(&log->l_mp->m_sb) ? 2 : 1);
1105 recp->h_lsn = cpu_to_be64(xlog_assign_lsn(cycle, block)); 1104 recp->h_lsn = cpu_to_be64(xlog_assign_lsn(cycle, block));
1106 recp->h_tail_lsn = cpu_to_be64(xlog_assign_lsn(tail_cycle, tail_block)); 1105 recp->h_tail_lsn = cpu_to_be64(xlog_assign_lsn(tail_cycle, tail_block));
1107 recp->h_fmt = cpu_to_be32(XLOG_FMT); 1106 recp->h_fmt = cpu_to_be32(XLOG_FMT);
1108 memcpy(&recp->h_fs_uuid, &log->l_mp->m_sb.sb_uuid, sizeof(uuid_t)); 1107 memcpy(&recp->h_fs_uuid, &log->l_mp->m_sb.sb_uuid, sizeof(uuid_t));
1109 } 1108 }
1110 1109
1111 STATIC int 1110 STATIC int
1112 xlog_write_log_records( 1111 xlog_write_log_records(
1113 xlog_t *log, 1112 xlog_t *log,
1114 int cycle, 1113 int cycle,
1115 int start_block, 1114 int start_block,
1116 int blocks, 1115 int blocks,
1117 int tail_cycle, 1116 int tail_cycle,
1118 int tail_block) 1117 int tail_block)
1119 { 1118 {
1120 xfs_caddr_t offset; 1119 xfs_caddr_t offset;
1121 xfs_buf_t *bp; 1120 xfs_buf_t *bp;
1122 int balign, ealign; 1121 int balign, ealign;
1123 int sectbb = XLOG_SECTOR_ROUNDUP_BBCOUNT(log, 1); 1122 int sectbb = XLOG_SECTOR_ROUNDUP_BBCOUNT(log, 1);
1124 int end_block = start_block + blocks; 1123 int end_block = start_block + blocks;
1125 int bufblks; 1124 int bufblks;
1126 int error = 0; 1125 int error = 0;
1127 int i, j = 0; 1126 int i, j = 0;
1128 1127
1129 bufblks = 1 << ffs(blocks); 1128 bufblks = 1 << ffs(blocks);
1130 while (!(bp = xlog_get_bp(log, bufblks))) { 1129 while (!(bp = xlog_get_bp(log, bufblks))) {
1131 bufblks >>= 1; 1130 bufblks >>= 1;
1132 if (bufblks <= log->l_sectbb_log) 1131 if (bufblks <= log->l_sectbb_log)
1133 return ENOMEM; 1132 return ENOMEM;
1134 } 1133 }
1135 1134
1136 /* We may need to do a read at the start to fill in part of 1135 /* We may need to do a read at the start to fill in part of
1137 * the buffer in the starting sector not covered by the first 1136 * the buffer in the starting sector not covered by the first
1138 * write below. 1137 * write below.
1139 */ 1138 */
1140 balign = XLOG_SECTOR_ROUNDDOWN_BLKNO(log, start_block); 1139 balign = XLOG_SECTOR_ROUNDDOWN_BLKNO(log, start_block);
1141 if (balign != start_block) { 1140 if (balign != start_block) {
1142 if ((error = xlog_bread(log, start_block, 1, bp))) { 1141 if ((error = xlog_bread(log, start_block, 1, bp))) {
1143 xlog_put_bp(bp); 1142 xlog_put_bp(bp);
1144 return error; 1143 return error;
1145 } 1144 }
1146 j = start_block - balign; 1145 j = start_block - balign;
1147 } 1146 }
1148 1147
1149 for (i = start_block; i < end_block; i += bufblks) { 1148 for (i = start_block; i < end_block; i += bufblks) {
1150 int bcount, endcount; 1149 int bcount, endcount;
1151 1150
1152 bcount = min(bufblks, end_block - start_block); 1151 bcount = min(bufblks, end_block - start_block);
1153 endcount = bcount - j; 1152 endcount = bcount - j;
1154 1153
1155 /* We may need to do a read at the end to fill in part of 1154 /* We may need to do a read at the end to fill in part of
1156 * the buffer in the final sector not covered by the write. 1155 * the buffer in the final sector not covered by the write.
1157 * If this is the same sector as the above read, skip it. 1156 * If this is the same sector as the above read, skip it.
1158 */ 1157 */
1159 ealign = XLOG_SECTOR_ROUNDDOWN_BLKNO(log, end_block); 1158 ealign = XLOG_SECTOR_ROUNDDOWN_BLKNO(log, end_block);
1160 if (j == 0 && (start_block + endcount > ealign)) { 1159 if (j == 0 && (start_block + endcount > ealign)) {
1161 offset = XFS_BUF_PTR(bp); 1160 offset = XFS_BUF_PTR(bp);
1162 balign = BBTOB(ealign - start_block); 1161 balign = BBTOB(ealign - start_block);
1163 error = XFS_BUF_SET_PTR(bp, offset + balign, 1162 error = XFS_BUF_SET_PTR(bp, offset + balign,
1164 BBTOB(sectbb)); 1163 BBTOB(sectbb));
1165 if (!error) 1164 if (!error)
1166 error = xlog_bread(log, ealign, sectbb, bp); 1165 error = xlog_bread(log, ealign, sectbb, bp);
1167 if (!error) 1166 if (!error)
1168 error = XFS_BUF_SET_PTR(bp, offset, bufblks); 1167 error = XFS_BUF_SET_PTR(bp, offset, bufblks);
1169 if (error) 1168 if (error)
1170 break; 1169 break;
1171 } 1170 }
1172 1171
1173 offset = xlog_align(log, start_block, endcount, bp); 1172 offset = xlog_align(log, start_block, endcount, bp);
1174 for (; j < endcount; j++) { 1173 for (; j < endcount; j++) {
1175 xlog_add_record(log, offset, cycle, i+j, 1174 xlog_add_record(log, offset, cycle, i+j,
1176 tail_cycle, tail_block); 1175 tail_cycle, tail_block);
1177 offset += BBSIZE; 1176 offset += BBSIZE;
1178 } 1177 }
1179 error = xlog_bwrite(log, start_block, endcount, bp); 1178 error = xlog_bwrite(log, start_block, endcount, bp);
1180 if (error) 1179 if (error)
1181 break; 1180 break;
1182 start_block += endcount; 1181 start_block += endcount;
1183 j = 0; 1182 j = 0;
1184 } 1183 }
1185 xlog_put_bp(bp); 1184 xlog_put_bp(bp);
1186 return error; 1185 return error;
1187 } 1186 }
1188 1187
1189 /* 1188 /*
1190 * This routine is called to blow away any incomplete log writes out 1189 * This routine is called to blow away any incomplete log writes out
1191 * in front of the log head. We do this so that we won't become confused 1190 * in front of the log head. We do this so that we won't become confused
1192 * if we come up, write only a little bit more, and then crash again. 1191 * if we come up, write only a little bit more, and then crash again.
1193 * If we leave the partial log records out there, this situation could 1192 * If we leave the partial log records out there, this situation could
1194 * cause us to think those partial writes are valid blocks since they 1193 * cause us to think those partial writes are valid blocks since they
1195 * have the current cycle number. We get rid of them by overwriting them 1194 * have the current cycle number. We get rid of them by overwriting them
1196 * with empty log records with the old cycle number rather than the 1195 * with empty log records with the old cycle number rather than the
1197 * current one. 1196 * current one.
1198 * 1197 *
1199 * The tail lsn is passed in rather than taken from 1198 * The tail lsn is passed in rather than taken from
1200 * the log so that we will not write over the unmount record after a 1199 * the log so that we will not write over the unmount record after a
1201 * clean unmount in a 512 block log. Doing so would leave the log without 1200 * clean unmount in a 512 block log. Doing so would leave the log without
1202 * any valid log records in it until a new one was written. If we crashed 1201 * any valid log records in it until a new one was written. If we crashed
1203 * during that time we would not be able to recover. 1202 * during that time we would not be able to recover.
1204 */ 1203 */
1205 STATIC int 1204 STATIC int
1206 xlog_clear_stale_blocks( 1205 xlog_clear_stale_blocks(
1207 xlog_t *log, 1206 xlog_t *log,
1208 xfs_lsn_t tail_lsn) 1207 xfs_lsn_t tail_lsn)
1209 { 1208 {
1210 int tail_cycle, head_cycle; 1209 int tail_cycle, head_cycle;
1211 int tail_block, head_block; 1210 int tail_block, head_block;
1212 int tail_distance, max_distance; 1211 int tail_distance, max_distance;
1213 int distance; 1212 int distance;
1214 int error; 1213 int error;
1215 1214
1216 tail_cycle = CYCLE_LSN(tail_lsn); 1215 tail_cycle = CYCLE_LSN(tail_lsn);
1217 tail_block = BLOCK_LSN(tail_lsn); 1216 tail_block = BLOCK_LSN(tail_lsn);
1218 head_cycle = log->l_curr_cycle; 1217 head_cycle = log->l_curr_cycle;
1219 head_block = log->l_curr_block; 1218 head_block = log->l_curr_block;
1220 1219
1221 /* 1220 /*
1222 * Figure out the distance between the new head of the log 1221 * Figure out the distance between the new head of the log
1223 * and the tail. We want to write over any blocks beyond the 1222 * and the tail. We want to write over any blocks beyond the
1224 * head that we may have written just before the crash, but 1223 * head that we may have written just before the crash, but
1225 * we don't want to overwrite the tail of the log. 1224 * we don't want to overwrite the tail of the log.
1226 */ 1225 */
1227 if (head_cycle == tail_cycle) { 1226 if (head_cycle == tail_cycle) {
1228 /* 1227 /*
1229 * The tail is behind the head in the physical log, 1228 * The tail is behind the head in the physical log,
1230 * so the distance from the head to the tail is the 1229 * so the distance from the head to the tail is the
1231 * distance from the head to the end of the log plus 1230 * distance from the head to the end of the log plus
1232 * the distance from the beginning of the log to the 1231 * the distance from the beginning of the log to the
1233 * tail. 1232 * tail.
1234 */ 1233 */
1235 if (unlikely(head_block < tail_block || head_block >= log->l_logBBsize)) { 1234 if (unlikely(head_block < tail_block || head_block >= log->l_logBBsize)) {
1236 XFS_ERROR_REPORT("xlog_clear_stale_blocks(1)", 1235 XFS_ERROR_REPORT("xlog_clear_stale_blocks(1)",
1237 XFS_ERRLEVEL_LOW, log->l_mp); 1236 XFS_ERRLEVEL_LOW, log->l_mp);
1238 return XFS_ERROR(EFSCORRUPTED); 1237 return XFS_ERROR(EFSCORRUPTED);
1239 } 1238 }
1240 tail_distance = tail_block + (log->l_logBBsize - head_block); 1239 tail_distance = tail_block + (log->l_logBBsize - head_block);
1241 } else { 1240 } else {
1242 /* 1241 /*
1243 * The head is behind the tail in the physical log, 1242 * The head is behind the tail in the physical log,
1244 * so the distance from the head to the tail is just 1243 * so the distance from the head to the tail is just
1245 * the tail block minus the head block. 1244 * the tail block minus the head block.
1246 */ 1245 */
1247 if (unlikely(head_block >= tail_block || head_cycle != (tail_cycle + 1))){ 1246 if (unlikely(head_block >= tail_block || head_cycle != (tail_cycle + 1))){
1248 XFS_ERROR_REPORT("xlog_clear_stale_blocks(2)", 1247 XFS_ERROR_REPORT("xlog_clear_stale_blocks(2)",
1249 XFS_ERRLEVEL_LOW, log->l_mp); 1248 XFS_ERRLEVEL_LOW, log->l_mp);
1250 return XFS_ERROR(EFSCORRUPTED); 1249 return XFS_ERROR(EFSCORRUPTED);
1251 } 1250 }
1252 tail_distance = tail_block - head_block; 1251 tail_distance = tail_block - head_block;
1253 } 1252 }
1254 1253
1255 /* 1254 /*
1256 * If the head is right up against the tail, we can't clear 1255 * If the head is right up against the tail, we can't clear
1257 * anything. 1256 * anything.
1258 */ 1257 */
1259 if (tail_distance <= 0) { 1258 if (tail_distance <= 0) {
1260 ASSERT(tail_distance == 0); 1259 ASSERT(tail_distance == 0);
1261 return 0; 1260 return 0;
1262 } 1261 }
1263 1262
1264 max_distance = XLOG_TOTAL_REC_SHIFT(log); 1263 max_distance = XLOG_TOTAL_REC_SHIFT(log);
1265 /* 1264 /*
1266 * Take the smaller of the maximum amount of outstanding I/O 1265 * Take the smaller of the maximum amount of outstanding I/O
1267 * we could have and the distance to the tail to clear out. 1266 * we could have and the distance to the tail to clear out.
1268 * We take the smaller so that we don't overwrite the tail and 1267 * We take the smaller so that we don't overwrite the tail and
1269 * we don't waste all day writing from the head to the tail 1268 * we don't waste all day writing from the head to the tail
1270 * for no reason. 1269 * for no reason.
1271 */ 1270 */
1272 max_distance = MIN(max_distance, tail_distance); 1271 max_distance = MIN(max_distance, tail_distance);
1273 1272
1274 if ((head_block + max_distance) <= log->l_logBBsize) { 1273 if ((head_block + max_distance) <= log->l_logBBsize) {
1275 /* 1274 /*
1276 * We can stomp all the blocks we need to without 1275 * We can stomp all the blocks we need to without
1277 * wrapping around the end of the log. Just do it 1276 * wrapping around the end of the log. Just do it
1278 * in a single write. Use the cycle number of the 1277 * in a single write. Use the cycle number of the
1279 * current cycle minus one so that the log will look like: 1278 * current cycle minus one so that the log will look like:
1280 * n ... | n - 1 ... 1279 * n ... | n - 1 ...
1281 */ 1280 */
1282 error = xlog_write_log_records(log, (head_cycle - 1), 1281 error = xlog_write_log_records(log, (head_cycle - 1),
1283 head_block, max_distance, tail_cycle, 1282 head_block, max_distance, tail_cycle,
1284 tail_block); 1283 tail_block);
1285 if (error) 1284 if (error)
1286 return error; 1285 return error;
1287 } else { 1286 } else {
1288 /* 1287 /*
1289 * We need to wrap around the end of the physical log in 1288 * We need to wrap around the end of the physical log in
1290 * order to clear all the blocks. Do it in two separate 1289 * order to clear all the blocks. Do it in two separate
1291 * I/Os. The first write should be from the head to the 1290 * I/Os. The first write should be from the head to the
1292 * end of the physical log, and it should use the current 1291 * end of the physical log, and it should use the current
1293 * cycle number minus one just like above. 1292 * cycle number minus one just like above.
1294 */ 1293 */
1295 distance = log->l_logBBsize - head_block; 1294 distance = log->l_logBBsize - head_block;
1296 error = xlog_write_log_records(log, (head_cycle - 1), 1295 error = xlog_write_log_records(log, (head_cycle - 1),
1297 head_block, distance, tail_cycle, 1296 head_block, distance, tail_cycle,
1298 tail_block); 1297 tail_block);
1299 1298
1300 if (error) 1299 if (error)
1301 return error; 1300 return error;
1302 1301
1303 /* 1302 /*
1304 * Now write the blocks at the start of the physical log. 1303 * Now write the blocks at the start of the physical log.
1305 * This writes the remainder of the blocks we want to clear. 1304 * This writes the remainder of the blocks we want to clear.
1306 * It uses the current cycle number since we're now on the 1305 * It uses the current cycle number since we're now on the
1307 * same cycle as the head so that we get: 1306 * same cycle as the head so that we get:
1308 * n ... n ... | n - 1 ... 1307 * n ... n ... | n - 1 ...
1309 * ^^^^^ blocks we're writing 1308 * ^^^^^ blocks we're writing
1310 */ 1309 */
1311 distance = max_distance - (log->l_logBBsize - head_block); 1310 distance = max_distance - (log->l_logBBsize - head_block);
1312 error = xlog_write_log_records(log, head_cycle, 0, distance, 1311 error = xlog_write_log_records(log, head_cycle, 0, distance,
1313 tail_cycle, tail_block); 1312 tail_cycle, tail_block);
1314 if (error) 1313 if (error)
1315 return error; 1314 return error;
1316 } 1315 }
1317 1316
1318 return 0; 1317 return 0;
1319 } 1318 }
1320 1319
1321 /****************************************************************************** 1320 /******************************************************************************
1322 * 1321 *
1323 * Log recover routines 1322 * Log recover routines
1324 * 1323 *
1325 ****************************************************************************** 1324 ******************************************************************************
1326 */ 1325 */
1327 1326
1328 STATIC xlog_recover_t * 1327 STATIC xlog_recover_t *
1329 xlog_recover_find_tid( 1328 xlog_recover_find_tid(
1330 xlog_recover_t *q, 1329 xlog_recover_t *q,
1331 xlog_tid_t tid) 1330 xlog_tid_t tid)
1332 { 1331 {
1333 xlog_recover_t *p = q; 1332 xlog_recover_t *p = q;
1334 1333
1335 while (p != NULL) { 1334 while (p != NULL) {
1336 if (p->r_log_tid == tid) 1335 if (p->r_log_tid == tid)
1337 break; 1336 break;
1338 p = p->r_next; 1337 p = p->r_next;
1339 } 1338 }
1340 return p; 1339 return p;
1341 } 1340 }
1342 1341
1343 STATIC void 1342 STATIC void
1344 xlog_recover_put_hashq( 1343 xlog_recover_put_hashq(
1345 xlog_recover_t **q, 1344 xlog_recover_t **q,
1346 xlog_recover_t *trans) 1345 xlog_recover_t *trans)
1347 { 1346 {
1348 trans->r_next = *q; 1347 trans->r_next = *q;
1349 *q = trans; 1348 *q = trans;
1350 } 1349 }
1351 1350
1352 STATIC void 1351 STATIC void
1353 xlog_recover_add_item( 1352 xlog_recover_add_item(
1354 xlog_recover_item_t **itemq) 1353 xlog_recover_item_t **itemq)
1355 { 1354 {
1356 xlog_recover_item_t *item; 1355 xlog_recover_item_t *item;
1357 1356
1358 item = kmem_zalloc(sizeof(xlog_recover_item_t), KM_SLEEP); 1357 item = kmem_zalloc(sizeof(xlog_recover_item_t), KM_SLEEP);
1359 xlog_recover_insert_item_backq(itemq, item); 1358 xlog_recover_insert_item_backq(itemq, item);
1360 } 1359 }
1361 1360
1362 STATIC int 1361 STATIC int
1363 xlog_recover_add_to_cont_trans( 1362 xlog_recover_add_to_cont_trans(
1364 xlog_recover_t *trans, 1363 xlog_recover_t *trans,
1365 xfs_caddr_t dp, 1364 xfs_caddr_t dp,
1366 int len) 1365 int len)
1367 { 1366 {
1368 xlog_recover_item_t *item; 1367 xlog_recover_item_t *item;
1369 xfs_caddr_t ptr, old_ptr; 1368 xfs_caddr_t ptr, old_ptr;
1370 int old_len; 1369 int old_len;
1371 1370
1372 item = trans->r_itemq; 1371 item = trans->r_itemq;
1373 if (item == NULL) { 1372 if (item == NULL) {
1374 /* finish copying rest of trans header */ 1373 /* finish copying rest of trans header */
1375 xlog_recover_add_item(&trans->r_itemq); 1374 xlog_recover_add_item(&trans->r_itemq);
1376 ptr = (xfs_caddr_t) &trans->r_theader + 1375 ptr = (xfs_caddr_t) &trans->r_theader +
1377 sizeof(xfs_trans_header_t) - len; 1376 sizeof(xfs_trans_header_t) - len;
1378 memcpy(ptr, dp, len); /* d, s, l */ 1377 memcpy(ptr, dp, len); /* d, s, l */
1379 return 0; 1378 return 0;
1380 } 1379 }
1381 item = item->ri_prev; 1380 item = item->ri_prev;
1382 1381
1383 old_ptr = item->ri_buf[item->ri_cnt-1].i_addr; 1382 old_ptr = item->ri_buf[item->ri_cnt-1].i_addr;
1384 old_len = item->ri_buf[item->ri_cnt-1].i_len; 1383 old_len = item->ri_buf[item->ri_cnt-1].i_len;
1385 1384
1386 ptr = kmem_realloc(old_ptr, len+old_len, old_len, 0u); 1385 ptr = kmem_realloc(old_ptr, len+old_len, old_len, 0u);
1387 memcpy(&ptr[old_len], dp, len); /* d, s, l */ 1386 memcpy(&ptr[old_len], dp, len); /* d, s, l */
1388 item->ri_buf[item->ri_cnt-1].i_len += len; 1387 item->ri_buf[item->ri_cnt-1].i_len += len;
1389 item->ri_buf[item->ri_cnt-1].i_addr = ptr; 1388 item->ri_buf[item->ri_cnt-1].i_addr = ptr;
1390 return 0; 1389 return 0;
1391 } 1390 }
1392 1391
1393 /* 1392 /*
1394 * The next region to add is the start of a new region. It could be 1393 * The next region to add is the start of a new region. It could be
1395 * a whole region or it could be the first part of a new region. Because 1394 * a whole region or it could be the first part of a new region. Because
1396 * of this, the assumption here is that the type and size fields of all 1395 * of this, the assumption here is that the type and size fields of all
1397 * format structures fit into the first 32 bits of the structure. 1396 * format structures fit into the first 32 bits of the structure.
1398 * 1397 *
1399 * This works because all regions must be 32 bit aligned. Therefore, we 1398 * This works because all regions must be 32 bit aligned. Therefore, we
1400 * either have both fields or we have neither field. In the case we have 1399 * either have both fields or we have neither field. In the case we have
1401 * neither field, the data part of the region is zero length. We only have 1400 * neither field, the data part of the region is zero length. We only have
1402 * a log_op_header and can throw away the header since a new one will appear 1401 * a log_op_header and can throw away the header since a new one will appear
1403 * later. If we have at least 4 bytes, then we can determine how many regions 1402 * later. If we have at least 4 bytes, then we can determine how many regions
1404 * will appear in the current log item. 1403 * will appear in the current log item.
1405 */ 1404 */
1406 STATIC int 1405 STATIC int
1407 xlog_recover_add_to_trans( 1406 xlog_recover_add_to_trans(
1408 xlog_recover_t *trans, 1407 xlog_recover_t *trans,
1409 xfs_caddr_t dp, 1408 xfs_caddr_t dp,
1410 int len) 1409 int len)
1411 { 1410 {
1412 xfs_inode_log_format_t *in_f; /* any will do */ 1411 xfs_inode_log_format_t *in_f; /* any will do */
1413 xlog_recover_item_t *item; 1412 xlog_recover_item_t *item;
1414 xfs_caddr_t ptr; 1413 xfs_caddr_t ptr;
1415 1414
1416 if (!len) 1415 if (!len)
1417 return 0; 1416 return 0;
1418 item = trans->r_itemq; 1417 item = trans->r_itemq;
1419 if (item == NULL) { 1418 if (item == NULL) {
1420 /* we need to catch log corruptions here */ 1419 /* we need to catch log corruptions here */
1421 if (*(uint *)dp != XFS_TRANS_HEADER_MAGIC) { 1420 if (*(uint *)dp != XFS_TRANS_HEADER_MAGIC) {
1422 xlog_warn("XFS: xlog_recover_add_to_trans: " 1421 xlog_warn("XFS: xlog_recover_add_to_trans: "
1423 "bad header magic number"); 1422 "bad header magic number");
1424 ASSERT(0); 1423 ASSERT(0);
1425 return XFS_ERROR(EIO); 1424 return XFS_ERROR(EIO);
1426 } 1425 }
1427 if (len == sizeof(xfs_trans_header_t)) 1426 if (len == sizeof(xfs_trans_header_t))
1428 xlog_recover_add_item(&trans->r_itemq); 1427 xlog_recover_add_item(&trans->r_itemq);
1429 memcpy(&trans->r_theader, dp, len); /* d, s, l */ 1428 memcpy(&trans->r_theader, dp, len); /* d, s, l */
1430 return 0; 1429 return 0;
1431 } 1430 }
1432 1431
1433 ptr = kmem_alloc(len, KM_SLEEP); 1432 ptr = kmem_alloc(len, KM_SLEEP);
1434 memcpy(ptr, dp, len); 1433 memcpy(ptr, dp, len);
1435 in_f = (xfs_inode_log_format_t *)ptr; 1434 in_f = (xfs_inode_log_format_t *)ptr;
1436 1435
1437 if (item->ri_prev->ri_total != 0 && 1436 if (item->ri_prev->ri_total != 0 &&
1438 item->ri_prev->ri_total == item->ri_prev->ri_cnt) { 1437 item->ri_prev->ri_total == item->ri_prev->ri_cnt) {
1439 xlog_recover_add_item(&trans->r_itemq); 1438 xlog_recover_add_item(&trans->r_itemq);
1440 } 1439 }
1441 item = trans->r_itemq; 1440 item = trans->r_itemq;
1442 item = item->ri_prev; 1441 item = item->ri_prev;
1443 1442
1444 if (item->ri_total == 0) { /* first region to be added */ 1443 if (item->ri_total == 0) { /* first region to be added */
1445 item->ri_total = in_f->ilf_size; 1444 item->ri_total = in_f->ilf_size;
1446 ASSERT(item->ri_total <= XLOG_MAX_REGIONS_IN_ITEM); 1445 ASSERT(item->ri_total <= XLOG_MAX_REGIONS_IN_ITEM);
1447 item->ri_buf = kmem_zalloc((item->ri_total * 1446 item->ri_buf = kmem_zalloc((item->ri_total *
1448 sizeof(xfs_log_iovec_t)), KM_SLEEP); 1447 sizeof(xfs_log_iovec_t)), KM_SLEEP);
1449 } 1448 }
1450 ASSERT(item->ri_total > item->ri_cnt); 1449 ASSERT(item->ri_total > item->ri_cnt);
1451 /* Description region is ri_buf[0] */ 1450 /* Description region is ri_buf[0] */
1452 item->ri_buf[item->ri_cnt].i_addr = ptr; 1451 item->ri_buf[item->ri_cnt].i_addr = ptr;
1453 item->ri_buf[item->ri_cnt].i_len = len; 1452 item->ri_buf[item->ri_cnt].i_len = len;
1454 item->ri_cnt++; 1453 item->ri_cnt++;
1455 return 0; 1454 return 0;
1456 } 1455 }
1457 1456
1458 STATIC void 1457 STATIC void
1459 xlog_recover_new_tid( 1458 xlog_recover_new_tid(
1460 xlog_recover_t **q, 1459 xlog_recover_t **q,
1461 xlog_tid_t tid, 1460 xlog_tid_t tid,
1462 xfs_lsn_t lsn) 1461 xfs_lsn_t lsn)
1463 { 1462 {
1464 xlog_recover_t *trans; 1463 xlog_recover_t *trans;
1465 1464
1466 trans = kmem_zalloc(sizeof(xlog_recover_t), KM_SLEEP); 1465 trans = kmem_zalloc(sizeof(xlog_recover_t), KM_SLEEP);
1467 trans->r_log_tid = tid; 1466 trans->r_log_tid = tid;
1468 trans->r_lsn = lsn; 1467 trans->r_lsn = lsn;
1469 xlog_recover_put_hashq(q, trans); 1468 xlog_recover_put_hashq(q, trans);
1470 } 1469 }
1471 1470
1472 STATIC int 1471 STATIC int
1473 xlog_recover_unlink_tid( 1472 xlog_recover_unlink_tid(
1474 xlog_recover_t **q, 1473 xlog_recover_t **q,
1475 xlog_recover_t *trans) 1474 xlog_recover_t *trans)
1476 { 1475 {
1477 xlog_recover_t *tp; 1476 xlog_recover_t *tp;
1478 int found = 0; 1477 int found = 0;
1479 1478
1480 ASSERT(trans != NULL); 1479 ASSERT(trans != NULL);
1481 if (trans == *q) { 1480 if (trans == *q) {
1482 *q = (*q)->r_next; 1481 *q = (*q)->r_next;
1483 } else { 1482 } else {
1484 tp = *q; 1483 tp = *q;
1485 while (tp) { 1484 while (tp) {
1486 if (tp->r_next == trans) { 1485 if (tp->r_next == trans) {
1487 found = 1; 1486 found = 1;
1488 break; 1487 break;
1489 } 1488 }
1490 tp = tp->r_next; 1489 tp = tp->r_next;
1491 } 1490 }
1492 if (!found) { 1491 if (!found) {
1493 xlog_warn( 1492 xlog_warn(
1494 "XFS: xlog_recover_unlink_tid: trans not found"); 1493 "XFS: xlog_recover_unlink_tid: trans not found");
1495 ASSERT(0); 1494 ASSERT(0);
1496 return XFS_ERROR(EIO); 1495 return XFS_ERROR(EIO);
1497 } 1496 }
1498 tp->r_next = tp->r_next->r_next; 1497 tp->r_next = tp->r_next->r_next;
1499 } 1498 }
1500 return 0; 1499 return 0;
1501 } 1500 }
1502 1501
1503 STATIC void 1502 STATIC void
1504 xlog_recover_insert_item_backq( 1503 xlog_recover_insert_item_backq(
1505 xlog_recover_item_t **q, 1504 xlog_recover_item_t **q,
1506 xlog_recover_item_t *item) 1505 xlog_recover_item_t *item)
1507 { 1506 {
1508 if (*q == NULL) { 1507 if (*q == NULL) {
1509 item->ri_prev = item->ri_next = item; 1508 item->ri_prev = item->ri_next = item;
1510 *q = item; 1509 *q = item;
1511 } else { 1510 } else {
1512 item->ri_next = *q; 1511 item->ri_next = *q;
1513 item->ri_prev = (*q)->ri_prev; 1512 item->ri_prev = (*q)->ri_prev;
1514 (*q)->ri_prev = item; 1513 (*q)->ri_prev = item;
1515 item->ri_prev->ri_next = item; 1514 item->ri_prev->ri_next = item;
1516 } 1515 }
1517 } 1516 }
1518 1517
1519 STATIC void 1518 STATIC void
1520 xlog_recover_insert_item_frontq( 1519 xlog_recover_insert_item_frontq(
1521 xlog_recover_item_t **q, 1520 xlog_recover_item_t **q,
1522 xlog_recover_item_t *item) 1521 xlog_recover_item_t *item)
1523 { 1522 {
1524 xlog_recover_insert_item_backq(q, item); 1523 xlog_recover_insert_item_backq(q, item);
1525 *q = item; 1524 *q = item;
1526 } 1525 }
1527 1526
1528 STATIC int 1527 STATIC int
1529 xlog_recover_reorder_trans( 1528 xlog_recover_reorder_trans(
1530 xlog_recover_t *trans) 1529 xlog_recover_t *trans)
1531 { 1530 {
1532 xlog_recover_item_t *first_item, *itemq, *itemq_next; 1531 xlog_recover_item_t *first_item, *itemq, *itemq_next;
1533 xfs_buf_log_format_t *buf_f; 1532 xfs_buf_log_format_t *buf_f;
1534 ushort flags = 0; 1533 ushort flags = 0;
1535 1534
1536 first_item = itemq = trans->r_itemq; 1535 first_item = itemq = trans->r_itemq;
1537 trans->r_itemq = NULL; 1536 trans->r_itemq = NULL;
1538 do { 1537 do {
1539 itemq_next = itemq->ri_next; 1538 itemq_next = itemq->ri_next;
1540 buf_f = (xfs_buf_log_format_t *)itemq->ri_buf[0].i_addr; 1539 buf_f = (xfs_buf_log_format_t *)itemq->ri_buf[0].i_addr;
1541 1540
1542 switch (ITEM_TYPE(itemq)) { 1541 switch (ITEM_TYPE(itemq)) {
1543 case XFS_LI_BUF: 1542 case XFS_LI_BUF:
1544 flags = buf_f->blf_flags; 1543 flags = buf_f->blf_flags;
1545 if (!(flags & XFS_BLI_CANCEL)) { 1544 if (!(flags & XFS_BLI_CANCEL)) {
1546 xlog_recover_insert_item_frontq(&trans->r_itemq, 1545 xlog_recover_insert_item_frontq(&trans->r_itemq,
1547 itemq); 1546 itemq);
1548 break; 1547 break;
1549 } 1548 }
1550 case XFS_LI_INODE: 1549 case XFS_LI_INODE:
1551 case XFS_LI_DQUOT: 1550 case XFS_LI_DQUOT:
1552 case XFS_LI_QUOTAOFF: 1551 case XFS_LI_QUOTAOFF:
1553 case XFS_LI_EFD: 1552 case XFS_LI_EFD:
1554 case XFS_LI_EFI: 1553 case XFS_LI_EFI:
1555 xlog_recover_insert_item_backq(&trans->r_itemq, itemq); 1554 xlog_recover_insert_item_backq(&trans->r_itemq, itemq);
1556 break; 1555 break;
1557 default: 1556 default:
1558 xlog_warn( 1557 xlog_warn(
1559 "XFS: xlog_recover_reorder_trans: unrecognized type of log operation"); 1558 "XFS: xlog_recover_reorder_trans: unrecognized type of log operation");
1560 ASSERT(0); 1559 ASSERT(0);
1561 return XFS_ERROR(EIO); 1560 return XFS_ERROR(EIO);
1562 } 1561 }
1563 itemq = itemq_next; 1562 itemq = itemq_next;
1564 } while (first_item != itemq); 1563 } while (first_item != itemq);
1565 return 0; 1564 return 0;
1566 } 1565 }
1567 1566
1568 /* 1567 /*
1569 * Build up the table of buf cancel records so that we don't replay 1568 * Build up the table of buf cancel records so that we don't replay
1570 * cancelled data in the second pass. For buffer records that are 1569 * cancelled data in the second pass. For buffer records that are
1571 * not cancel records, there is nothing to do here so we just return. 1570 * not cancel records, there is nothing to do here so we just return.
1572 * 1571 *
1573 * If we get a cancel record which is already in the table, this indicates 1572 * If we get a cancel record which is already in the table, this indicates
1574 * that the buffer was cancelled multiple times. In order to ensure 1573 * that the buffer was cancelled multiple times. In order to ensure
1575 * that during pass 2 we keep the record in the table until we reach its 1574 * that during pass 2 we keep the record in the table until we reach its
1576 * last occurrence in the log, we keep a reference count in the cancel 1575 * last occurrence in the log, we keep a reference count in the cancel
1577 * record in the table to tell us how many times we expect to see this 1576 * record in the table to tell us how many times we expect to see this
1578 * record during the second pass. 1577 * record during the second pass.
1579 */ 1578 */
1580 STATIC void 1579 STATIC void
1581 xlog_recover_do_buffer_pass1( 1580 xlog_recover_do_buffer_pass1(
1582 xlog_t *log, 1581 xlog_t *log,
1583 xfs_buf_log_format_t *buf_f) 1582 xfs_buf_log_format_t *buf_f)
1584 { 1583 {
1585 xfs_buf_cancel_t *bcp; 1584 xfs_buf_cancel_t *bcp;
1586 xfs_buf_cancel_t *nextp; 1585 xfs_buf_cancel_t *nextp;
1587 xfs_buf_cancel_t *prevp; 1586 xfs_buf_cancel_t *prevp;
1588 xfs_buf_cancel_t **bucket; 1587 xfs_buf_cancel_t **bucket;
1589 xfs_daddr_t blkno = 0; 1588 xfs_daddr_t blkno = 0;
1590 uint len = 0; 1589 uint len = 0;
1591 ushort flags = 0; 1590 ushort flags = 0;
1592 1591
1593 switch (buf_f->blf_type) { 1592 switch (buf_f->blf_type) {
1594 case XFS_LI_BUF: 1593 case XFS_LI_BUF:
1595 blkno = buf_f->blf_blkno; 1594 blkno = buf_f->blf_blkno;
1596 len = buf_f->blf_len; 1595 len = buf_f->blf_len;
1597 flags = buf_f->blf_flags; 1596 flags = buf_f->blf_flags;
1598 break; 1597 break;
1599 } 1598 }
1600 1599
1601 /* 1600 /*
1602 * If this isn't a cancel buffer item, then just return. 1601 * If this isn't a cancel buffer item, then just return.
1603 */ 1602 */
1604 if (!(flags & XFS_BLI_CANCEL)) 1603 if (!(flags & XFS_BLI_CANCEL))
1605 return; 1604 return;
1606 1605
1607 /* 1606 /*
1608 * Insert an xfs_buf_cancel record into the hash table of 1607 * Insert an xfs_buf_cancel record into the hash table of
1609 * them. If there is already an identical record, bump 1608 * them. If there is already an identical record, bump
1610 * its reference count. 1609 * its reference count.
1611 */ 1610 */
1612 bucket = &log->l_buf_cancel_table[(__uint64_t)blkno % 1611 bucket = &log->l_buf_cancel_table[(__uint64_t)blkno %
1613 XLOG_BC_TABLE_SIZE]; 1612 XLOG_BC_TABLE_SIZE];
1614 /* 1613 /*
1615 * If the hash bucket is empty then just insert a new record into 1614 * If the hash bucket is empty then just insert a new record into
1616 * the bucket. 1615 * the bucket.
1617 */ 1616 */
1618 if (*bucket == NULL) { 1617 if (*bucket == NULL) {
1619 bcp = (xfs_buf_cancel_t *)kmem_alloc(sizeof(xfs_buf_cancel_t), 1618 bcp = (xfs_buf_cancel_t *)kmem_alloc(sizeof(xfs_buf_cancel_t),
1620 KM_SLEEP); 1619 KM_SLEEP);
1621 bcp->bc_blkno = blkno; 1620 bcp->bc_blkno = blkno;
1622 bcp->bc_len = len; 1621 bcp->bc_len = len;
1623 bcp->bc_refcount = 1; 1622 bcp->bc_refcount = 1;
1624 bcp->bc_next = NULL; 1623 bcp->bc_next = NULL;
1625 *bucket = bcp; 1624 *bucket = bcp;
1626 return; 1625 return;
1627 } 1626 }
1628 1627
1629 /* 1628 /*
1630 * The hash bucket is not empty, so search for duplicates of our 1629 * The hash bucket is not empty, so search for duplicates of our
1631 * record. If we find one them just bump its refcount. If not 1630 * record. If we find one them just bump its refcount. If not
1632 * then add us at the end of the list. 1631 * then add us at the end of the list.
1633 */ 1632 */
1634 prevp = NULL; 1633 prevp = NULL;
1635 nextp = *bucket; 1634 nextp = *bucket;
1636 while (nextp != NULL) { 1635 while (nextp != NULL) {
1637 if (nextp->bc_blkno == blkno && nextp->bc_len == len) { 1636 if (nextp->bc_blkno == blkno && nextp->bc_len == len) {
1638 nextp->bc_refcount++; 1637 nextp->bc_refcount++;
1639 return; 1638 return;
1640 } 1639 }
1641 prevp = nextp; 1640 prevp = nextp;
1642 nextp = nextp->bc_next; 1641 nextp = nextp->bc_next;
1643 } 1642 }
1644 ASSERT(prevp != NULL); 1643 ASSERT(prevp != NULL);
1645 bcp = (xfs_buf_cancel_t *)kmem_alloc(sizeof(xfs_buf_cancel_t), 1644 bcp = (xfs_buf_cancel_t *)kmem_alloc(sizeof(xfs_buf_cancel_t),
1646 KM_SLEEP); 1645 KM_SLEEP);
1647 bcp->bc_blkno = blkno; 1646 bcp->bc_blkno = blkno;
1648 bcp->bc_len = len; 1647 bcp->bc_len = len;
1649 bcp->bc_refcount = 1; 1648 bcp->bc_refcount = 1;
1650 bcp->bc_next = NULL; 1649 bcp->bc_next = NULL;
1651 prevp->bc_next = bcp; 1650 prevp->bc_next = bcp;
1652 } 1651 }
1653 1652
1654 /* 1653 /*
1655 * Check to see whether the buffer being recovered has a corresponding 1654 * Check to see whether the buffer being recovered has a corresponding
1656 * entry in the buffer cancel record table. If it does then return 1 1655 * entry in the buffer cancel record table. If it does then return 1
1657 * so that it will be cancelled, otherwise return 0. If the buffer is 1656 * so that it will be cancelled, otherwise return 0. If the buffer is
1658 * actually a buffer cancel item (XFS_BLI_CANCEL is set), then decrement 1657 * actually a buffer cancel item (XFS_BLI_CANCEL is set), then decrement
1659 * the refcount on the entry in the table and remove it from the table 1658 * the refcount on the entry in the table and remove it from the table
1660 * if this is the last reference. 1659 * if this is the last reference.
1661 * 1660 *
1662 * We remove the cancel record from the table when we encounter its 1661 * We remove the cancel record from the table when we encounter its
1663 * last occurrence in the log so that if the same buffer is re-used 1662 * last occurrence in the log so that if the same buffer is re-used
1664 * again after its last cancellation we actually replay the changes 1663 * again after its last cancellation we actually replay the changes
1665 * made at that point. 1664 * made at that point.
1666 */ 1665 */
1667 STATIC int 1666 STATIC int
1668 xlog_check_buffer_cancelled( 1667 xlog_check_buffer_cancelled(
1669 xlog_t *log, 1668 xlog_t *log,
1670 xfs_daddr_t blkno, 1669 xfs_daddr_t blkno,
1671 uint len, 1670 uint len,
1672 ushort flags) 1671 ushort flags)
1673 { 1672 {
1674 xfs_buf_cancel_t *bcp; 1673 xfs_buf_cancel_t *bcp;
1675 xfs_buf_cancel_t *prevp; 1674 xfs_buf_cancel_t *prevp;
1676 xfs_buf_cancel_t **bucket; 1675 xfs_buf_cancel_t **bucket;
1677 1676
1678 if (log->l_buf_cancel_table == NULL) { 1677 if (log->l_buf_cancel_table == NULL) {
1679 /* 1678 /*
1680 * There is nothing in the table built in pass one, 1679 * There is nothing in the table built in pass one,
1681 * so this buffer must not be cancelled. 1680 * so this buffer must not be cancelled.
1682 */ 1681 */
1683 ASSERT(!(flags & XFS_BLI_CANCEL)); 1682 ASSERT(!(flags & XFS_BLI_CANCEL));
1684 return 0; 1683 return 0;
1685 } 1684 }
1686 1685
1687 bucket = &log->l_buf_cancel_table[(__uint64_t)blkno % 1686 bucket = &log->l_buf_cancel_table[(__uint64_t)blkno %
1688 XLOG_BC_TABLE_SIZE]; 1687 XLOG_BC_TABLE_SIZE];
1689 bcp = *bucket; 1688 bcp = *bucket;
1690 if (bcp == NULL) { 1689 if (bcp == NULL) {
1691 /* 1690 /*
1692 * There is no corresponding entry in the table built 1691 * There is no corresponding entry in the table built
1693 * in pass one, so this buffer has not been cancelled. 1692 * in pass one, so this buffer has not been cancelled.
1694 */ 1693 */
1695 ASSERT(!(flags & XFS_BLI_CANCEL)); 1694 ASSERT(!(flags & XFS_BLI_CANCEL));
1696 return 0; 1695 return 0;
1697 } 1696 }
1698 1697
1699 /* 1698 /*
1700 * Search for an entry in the buffer cancel table that 1699 * Search for an entry in the buffer cancel table that
1701 * matches our buffer. 1700 * matches our buffer.
1702 */ 1701 */
1703 prevp = NULL; 1702 prevp = NULL;
1704 while (bcp != NULL) { 1703 while (bcp != NULL) {
1705 if (bcp->bc_blkno == blkno && bcp->bc_len == len) { 1704 if (bcp->bc_blkno == blkno && bcp->bc_len == len) {
1706 /* 1705 /*
1707 * We've go a match, so return 1 so that the 1706 * We've go a match, so return 1 so that the
1708 * recovery of this buffer is cancelled. 1707 * recovery of this buffer is cancelled.
1709 * If this buffer is actually a buffer cancel 1708 * If this buffer is actually a buffer cancel
1710 * log item, then decrement the refcount on the 1709 * log item, then decrement the refcount on the
1711 * one in the table and remove it if this is the 1710 * one in the table and remove it if this is the
1712 * last reference. 1711 * last reference.
1713 */ 1712 */
1714 if (flags & XFS_BLI_CANCEL) { 1713 if (flags & XFS_BLI_CANCEL) {
1715 bcp->bc_refcount--; 1714 bcp->bc_refcount--;
1716 if (bcp->bc_refcount == 0) { 1715 if (bcp->bc_refcount == 0) {
1717 if (prevp == NULL) { 1716 if (prevp == NULL) {
1718 *bucket = bcp->bc_next; 1717 *bucket = bcp->bc_next;
1719 } else { 1718 } else {
1720 prevp->bc_next = bcp->bc_next; 1719 prevp->bc_next = bcp->bc_next;
1721 } 1720 }
1722 kmem_free(bcp); 1721 kmem_free(bcp);
1723 } 1722 }
1724 } 1723 }
1725 return 1; 1724 return 1;
1726 } 1725 }
1727 prevp = bcp; 1726 prevp = bcp;
1728 bcp = bcp->bc_next; 1727 bcp = bcp->bc_next;
1729 } 1728 }
1730 /* 1729 /*
1731 * We didn't find a corresponding entry in the table, so 1730 * We didn't find a corresponding entry in the table, so
1732 * return 0 so that the buffer is NOT cancelled. 1731 * return 0 so that the buffer is NOT cancelled.
1733 */ 1732 */
1734 ASSERT(!(flags & XFS_BLI_CANCEL)); 1733 ASSERT(!(flags & XFS_BLI_CANCEL));
1735 return 0; 1734 return 0;
1736 } 1735 }
1737 1736
1738 STATIC int 1737 STATIC int
1739 xlog_recover_do_buffer_pass2( 1738 xlog_recover_do_buffer_pass2(
1740 xlog_t *log, 1739 xlog_t *log,
1741 xfs_buf_log_format_t *buf_f) 1740 xfs_buf_log_format_t *buf_f)
1742 { 1741 {
1743 xfs_daddr_t blkno = 0; 1742 xfs_daddr_t blkno = 0;
1744 ushort flags = 0; 1743 ushort flags = 0;
1745 uint len = 0; 1744 uint len = 0;
1746 1745
1747 switch (buf_f->blf_type) { 1746 switch (buf_f->blf_type) {
1748 case XFS_LI_BUF: 1747 case XFS_LI_BUF:
1749 blkno = buf_f->blf_blkno; 1748 blkno = buf_f->blf_blkno;
1750 flags = buf_f->blf_flags; 1749 flags = buf_f->blf_flags;
1751 len = buf_f->blf_len; 1750 len = buf_f->blf_len;
1752 break; 1751 break;
1753 } 1752 }
1754 1753
1755 return xlog_check_buffer_cancelled(log, blkno, len, flags); 1754 return xlog_check_buffer_cancelled(log, blkno, len, flags);
1756 } 1755 }
1757 1756
1758 /* 1757 /*
1759 * Perform recovery for a buffer full of inodes. In these buffers, 1758 * Perform recovery for a buffer full of inodes. In these buffers,
1760 * the only data which should be recovered is that which corresponds 1759 * the only data which should be recovered is that which corresponds
1761 * to the di_next_unlinked pointers in the on disk inode structures. 1760 * to the di_next_unlinked pointers in the on disk inode structures.
1762 * The rest of the data for the inodes is always logged through the 1761 * The rest of the data for the inodes is always logged through the
1763 * inodes themselves rather than the inode buffer and is recovered 1762 * inodes themselves rather than the inode buffer and is recovered
1764 * in xlog_recover_do_inode_trans(). 1763 * in xlog_recover_do_inode_trans().
1765 * 1764 *
1766 * The only time when buffers full of inodes are fully recovered is 1765 * The only time when buffers full of inodes are fully recovered is
1767 * when the buffer is full of newly allocated inodes. In this case 1766 * when the buffer is full of newly allocated inodes. In this case
1768 * the buffer will not be marked as an inode buffer and so will be 1767 * the buffer will not be marked as an inode buffer and so will be
1769 * sent to xlog_recover_do_reg_buffer() below during recovery. 1768 * sent to xlog_recover_do_reg_buffer() below during recovery.
1770 */ 1769 */
1771 STATIC int 1770 STATIC int
1772 xlog_recover_do_inode_buffer( 1771 xlog_recover_do_inode_buffer(
1773 xfs_mount_t *mp, 1772 xfs_mount_t *mp,
1774 xlog_recover_item_t *item, 1773 xlog_recover_item_t *item,
1775 xfs_buf_t *bp, 1774 xfs_buf_t *bp,
1776 xfs_buf_log_format_t *buf_f) 1775 xfs_buf_log_format_t *buf_f)
1777 { 1776 {
1778 int i; 1777 int i;
1779 int item_index; 1778 int item_index;
1780 int bit; 1779 int bit;
1781 int nbits; 1780 int nbits;
1782 int reg_buf_offset; 1781 int reg_buf_offset;
1783 int reg_buf_bytes; 1782 int reg_buf_bytes;
1784 int next_unlinked_offset; 1783 int next_unlinked_offset;
1785 int inodes_per_buf; 1784 int inodes_per_buf;
1786 xfs_agino_t *logged_nextp; 1785 xfs_agino_t *logged_nextp;
1787 xfs_agino_t *buffer_nextp; 1786 xfs_agino_t *buffer_nextp;
1788 unsigned int *data_map = NULL; 1787 unsigned int *data_map = NULL;
1789 unsigned int map_size = 0; 1788 unsigned int map_size = 0;
1790 1789
1791 switch (buf_f->blf_type) { 1790 switch (buf_f->blf_type) {
1792 case XFS_LI_BUF: 1791 case XFS_LI_BUF:
1793 data_map = buf_f->blf_data_map; 1792 data_map = buf_f->blf_data_map;
1794 map_size = buf_f->blf_map_size; 1793 map_size = buf_f->blf_map_size;
1795 break; 1794 break;
1796 } 1795 }
1797 /* 1796 /*
1798 * Set the variables corresponding to the current region to 1797 * Set the variables corresponding to the current region to
1799 * 0 so that we'll initialize them on the first pass through 1798 * 0 so that we'll initialize them on the first pass through
1800 * the loop. 1799 * the loop.
1801 */ 1800 */
1802 reg_buf_offset = 0; 1801 reg_buf_offset = 0;
1803 reg_buf_bytes = 0; 1802 reg_buf_bytes = 0;
1804 bit = 0; 1803 bit = 0;
1805 nbits = 0; 1804 nbits = 0;
1806 item_index = 0; 1805 item_index = 0;
1807 inodes_per_buf = XFS_BUF_COUNT(bp) >> mp->m_sb.sb_inodelog; 1806 inodes_per_buf = XFS_BUF_COUNT(bp) >> mp->m_sb.sb_inodelog;
1808 for (i = 0; i < inodes_per_buf; i++) { 1807 for (i = 0; i < inodes_per_buf; i++) {
1809 next_unlinked_offset = (i * mp->m_sb.sb_inodesize) + 1808 next_unlinked_offset = (i * mp->m_sb.sb_inodesize) +
1810 offsetof(xfs_dinode_t, di_next_unlinked); 1809 offsetof(xfs_dinode_t, di_next_unlinked);
1811 1810
1812 while (next_unlinked_offset >= 1811 while (next_unlinked_offset >=
1813 (reg_buf_offset + reg_buf_bytes)) { 1812 (reg_buf_offset + reg_buf_bytes)) {
1814 /* 1813 /*
1815 * The next di_next_unlinked field is beyond 1814 * The next di_next_unlinked field is beyond
1816 * the current logged region. Find the next 1815 * the current logged region. Find the next
1817 * logged region that contains or is beyond 1816 * logged region that contains or is beyond
1818 * the current di_next_unlinked field. 1817 * the current di_next_unlinked field.
1819 */ 1818 */
1820 bit += nbits; 1819 bit += nbits;
1821 bit = xfs_next_bit(data_map, map_size, bit); 1820 bit = xfs_next_bit(data_map, map_size, bit);
1822 1821
1823 /* 1822 /*
1824 * If there are no more logged regions in the 1823 * If there are no more logged regions in the
1825 * buffer, then we're done. 1824 * buffer, then we're done.
1826 */ 1825 */
1827 if (bit == -1) { 1826 if (bit == -1) {
1828 return 0; 1827 return 0;
1829 } 1828 }
1830 1829
1831 nbits = xfs_contig_bits(data_map, map_size, 1830 nbits = xfs_contig_bits(data_map, map_size,
1832 bit); 1831 bit);
1833 ASSERT(nbits > 0); 1832 ASSERT(nbits > 0);
1834 reg_buf_offset = bit << XFS_BLI_SHIFT; 1833 reg_buf_offset = bit << XFS_BLI_SHIFT;
1835 reg_buf_bytes = nbits << XFS_BLI_SHIFT; 1834 reg_buf_bytes = nbits << XFS_BLI_SHIFT;
1836 item_index++; 1835 item_index++;
1837 } 1836 }
1838 1837
1839 /* 1838 /*
1840 * If the current logged region starts after the current 1839 * If the current logged region starts after the current
1841 * di_next_unlinked field, then move on to the next 1840 * di_next_unlinked field, then move on to the next
1842 * di_next_unlinked field. 1841 * di_next_unlinked field.
1843 */ 1842 */
1844 if (next_unlinked_offset < reg_buf_offset) { 1843 if (next_unlinked_offset < reg_buf_offset) {
1845 continue; 1844 continue;
1846 } 1845 }
1847 1846
1848 ASSERT(item->ri_buf[item_index].i_addr != NULL); 1847 ASSERT(item->ri_buf[item_index].i_addr != NULL);
1849 ASSERT((item->ri_buf[item_index].i_len % XFS_BLI_CHUNK) == 0); 1848 ASSERT((item->ri_buf[item_index].i_len % XFS_BLI_CHUNK) == 0);
1850 ASSERT((reg_buf_offset + reg_buf_bytes) <= XFS_BUF_COUNT(bp)); 1849 ASSERT((reg_buf_offset + reg_buf_bytes) <= XFS_BUF_COUNT(bp));
1851 1850
1852 /* 1851 /*
1853 * The current logged region contains a copy of the 1852 * The current logged region contains a copy of the
1854 * current di_next_unlinked field. Extract its value 1853 * current di_next_unlinked field. Extract its value
1855 * and copy it to the buffer copy. 1854 * and copy it to the buffer copy.
1856 */ 1855 */
1857 logged_nextp = (xfs_agino_t *) 1856 logged_nextp = (xfs_agino_t *)
1858 ((char *)(item->ri_buf[item_index].i_addr) + 1857 ((char *)(item->ri_buf[item_index].i_addr) +
1859 (next_unlinked_offset - reg_buf_offset)); 1858 (next_unlinked_offset - reg_buf_offset));
1860 if (unlikely(*logged_nextp == 0)) { 1859 if (unlikely(*logged_nextp == 0)) {
1861 xfs_fs_cmn_err(CE_ALERT, mp, 1860 xfs_fs_cmn_err(CE_ALERT, mp,
1862 "bad inode buffer log record (ptr = 0x%p, bp = 0x%p). XFS trying to replay bad (0) inode di_next_unlinked field", 1861 "bad inode buffer log record (ptr = 0x%p, bp = 0x%p). XFS trying to replay bad (0) inode di_next_unlinked field",
1863 item, bp); 1862 item, bp);
1864 XFS_ERROR_REPORT("xlog_recover_do_inode_buf", 1863 XFS_ERROR_REPORT("xlog_recover_do_inode_buf",
1865 XFS_ERRLEVEL_LOW, mp); 1864 XFS_ERRLEVEL_LOW, mp);
1866 return XFS_ERROR(EFSCORRUPTED); 1865 return XFS_ERROR(EFSCORRUPTED);
1867 } 1866 }
1868 1867
1869 buffer_nextp = (xfs_agino_t *)xfs_buf_offset(bp, 1868 buffer_nextp = (xfs_agino_t *)xfs_buf_offset(bp,
1870 next_unlinked_offset); 1869 next_unlinked_offset);
1871 *buffer_nextp = *logged_nextp; 1870 *buffer_nextp = *logged_nextp;
1872 } 1871 }
1873 1872
1874 return 0; 1873 return 0;
1875 } 1874 }
1876 1875
1877 /* 1876 /*
1878 * Perform a 'normal' buffer recovery. Each logged region of the 1877 * Perform a 'normal' buffer recovery. Each logged region of the
1879 * buffer should be copied over the corresponding region in the 1878 * buffer should be copied over the corresponding region in the
1880 * given buffer. The bitmap in the buf log format structure indicates 1879 * given buffer. The bitmap in the buf log format structure indicates
1881 * where to place the logged data. 1880 * where to place the logged data.
1882 */ 1881 */
1883 /*ARGSUSED*/ 1882 /*ARGSUSED*/
1884 STATIC void 1883 STATIC void
1885 xlog_recover_do_reg_buffer( 1884 xlog_recover_do_reg_buffer(
1886 xlog_recover_item_t *item, 1885 xlog_recover_item_t *item,
1887 xfs_buf_t *bp, 1886 xfs_buf_t *bp,
1888 xfs_buf_log_format_t *buf_f) 1887 xfs_buf_log_format_t *buf_f)
1889 { 1888 {
1890 int i; 1889 int i;
1891 int bit; 1890 int bit;
1892 int nbits; 1891 int nbits;
1893 unsigned int *data_map = NULL; 1892 unsigned int *data_map = NULL;
1894 unsigned int map_size = 0; 1893 unsigned int map_size = 0;
1895 int error; 1894 int error;
1896 1895
1897 switch (buf_f->blf_type) { 1896 switch (buf_f->blf_type) {
1898 case XFS_LI_BUF: 1897 case XFS_LI_BUF:
1899 data_map = buf_f->blf_data_map; 1898 data_map = buf_f->blf_data_map;
1900 map_size = buf_f->blf_map_size; 1899 map_size = buf_f->blf_map_size;
1901 break; 1900 break;
1902 } 1901 }
1903 bit = 0; 1902 bit = 0;
1904 i = 1; /* 0 is the buf format structure */ 1903 i = 1; /* 0 is the buf format structure */
1905 while (1) { 1904 while (1) {
1906 bit = xfs_next_bit(data_map, map_size, bit); 1905 bit = xfs_next_bit(data_map, map_size, bit);
1907 if (bit == -1) 1906 if (bit == -1)
1908 break; 1907 break;
1909 nbits = xfs_contig_bits(data_map, map_size, bit); 1908 nbits = xfs_contig_bits(data_map, map_size, bit);
1910 ASSERT(nbits > 0); 1909 ASSERT(nbits > 0);
1911 ASSERT(item->ri_buf[i].i_addr != NULL); 1910 ASSERT(item->ri_buf[i].i_addr != NULL);
1912 ASSERT(item->ri_buf[i].i_len % XFS_BLI_CHUNK == 0); 1911 ASSERT(item->ri_buf[i].i_len % XFS_BLI_CHUNK == 0);
1913 ASSERT(XFS_BUF_COUNT(bp) >= 1912 ASSERT(XFS_BUF_COUNT(bp) >=
1914 ((uint)bit << XFS_BLI_SHIFT)+(nbits<<XFS_BLI_SHIFT)); 1913 ((uint)bit << XFS_BLI_SHIFT)+(nbits<<XFS_BLI_SHIFT));
1915 1914
1916 /* 1915 /*
1917 * Do a sanity check if this is a dquot buffer. Just checking 1916 * Do a sanity check if this is a dquot buffer. Just checking
1918 * the first dquot in the buffer should do. XXXThis is 1917 * the first dquot in the buffer should do. XXXThis is
1919 * probably a good thing to do for other buf types also. 1918 * probably a good thing to do for other buf types also.
1920 */ 1919 */
1921 error = 0; 1920 error = 0;
1922 if (buf_f->blf_flags & 1921 if (buf_f->blf_flags &
1923 (XFS_BLI_UDQUOT_BUF|XFS_BLI_PDQUOT_BUF|XFS_BLI_GDQUOT_BUF)) { 1922 (XFS_BLI_UDQUOT_BUF|XFS_BLI_PDQUOT_BUF|XFS_BLI_GDQUOT_BUF)) {
1924 error = xfs_qm_dqcheck((xfs_disk_dquot_t *) 1923 error = xfs_qm_dqcheck((xfs_disk_dquot_t *)
1925 item->ri_buf[i].i_addr, 1924 item->ri_buf[i].i_addr,
1926 -1, 0, XFS_QMOPT_DOWARN, 1925 -1, 0, XFS_QMOPT_DOWARN,
1927 "dquot_buf_recover"); 1926 "dquot_buf_recover");
1928 } 1927 }
1929 if (!error) 1928 if (!error)
1930 memcpy(xfs_buf_offset(bp, 1929 memcpy(xfs_buf_offset(bp,
1931 (uint)bit << XFS_BLI_SHIFT), /* dest */ 1930 (uint)bit << XFS_BLI_SHIFT), /* dest */
1932 item->ri_buf[i].i_addr, /* source */ 1931 item->ri_buf[i].i_addr, /* source */
1933 nbits<<XFS_BLI_SHIFT); /* length */ 1932 nbits<<XFS_BLI_SHIFT); /* length */
1934 i++; 1933 i++;
1935 bit += nbits; 1934 bit += nbits;
1936 } 1935 }
1937 1936
1938 /* Shouldn't be any more regions */ 1937 /* Shouldn't be any more regions */
1939 ASSERT(i == item->ri_total); 1938 ASSERT(i == item->ri_total);
1940 } 1939 }
1941 1940
1942 /* 1941 /*
1943 * Do some primitive error checking on ondisk dquot data structures. 1942 * Do some primitive error checking on ondisk dquot data structures.
1944 */ 1943 */
1945 int 1944 int
1946 xfs_qm_dqcheck( 1945 xfs_qm_dqcheck(
1947 xfs_disk_dquot_t *ddq, 1946 xfs_disk_dquot_t *ddq,
1948 xfs_dqid_t id, 1947 xfs_dqid_t id,
1949 uint type, /* used only when IO_dorepair is true */ 1948 uint type, /* used only when IO_dorepair is true */
1950 uint flags, 1949 uint flags,
1951 char *str) 1950 char *str)
1952 { 1951 {
1953 xfs_dqblk_t *d = (xfs_dqblk_t *)ddq; 1952 xfs_dqblk_t *d = (xfs_dqblk_t *)ddq;
1954 int errs = 0; 1953 int errs = 0;
1955 1954
1956 /* 1955 /*
1957 * We can encounter an uninitialized dquot buffer for 2 reasons: 1956 * We can encounter an uninitialized dquot buffer for 2 reasons:
1958 * 1. If we crash while deleting the quotainode(s), and those blks got 1957 * 1. If we crash while deleting the quotainode(s), and those blks got
1959 * used for user data. This is because we take the path of regular 1958 * used for user data. This is because we take the path of regular
1960 * file deletion; however, the size field of quotainodes is never 1959 * file deletion; however, the size field of quotainodes is never
1961 * updated, so all the tricks that we play in itruncate_finish 1960 * updated, so all the tricks that we play in itruncate_finish
1962 * don't quite matter. 1961 * don't quite matter.
1963 * 1962 *
1964 * 2. We don't play the quota buffers when there's a quotaoff logitem. 1963 * 2. We don't play the quota buffers when there's a quotaoff logitem.
1965 * But the allocation will be replayed so we'll end up with an 1964 * But the allocation will be replayed so we'll end up with an
1966 * uninitialized quota block. 1965 * uninitialized quota block.
1967 * 1966 *
1968 * This is all fine; things are still consistent, and we haven't lost 1967 * This is all fine; things are still consistent, and we haven't lost
1969 * any quota information. Just don't complain about bad dquot blks. 1968 * any quota information. Just don't complain about bad dquot blks.
1970 */ 1969 */
1971 if (be16_to_cpu(ddq->d_magic) != XFS_DQUOT_MAGIC) { 1970 if (be16_to_cpu(ddq->d_magic) != XFS_DQUOT_MAGIC) {
1972 if (flags & XFS_QMOPT_DOWARN) 1971 if (flags & XFS_QMOPT_DOWARN)
1973 cmn_err(CE_ALERT, 1972 cmn_err(CE_ALERT,
1974 "%s : XFS dquot ID 0x%x, magic 0x%x != 0x%x", 1973 "%s : XFS dquot ID 0x%x, magic 0x%x != 0x%x",
1975 str, id, be16_to_cpu(ddq->d_magic), XFS_DQUOT_MAGIC); 1974 str, id, be16_to_cpu(ddq->d_magic), XFS_DQUOT_MAGIC);
1976 errs++; 1975 errs++;
1977 } 1976 }
1978 if (ddq->d_version != XFS_DQUOT_VERSION) { 1977 if (ddq->d_version != XFS_DQUOT_VERSION) {
1979 if (flags & XFS_QMOPT_DOWARN) 1978 if (flags & XFS_QMOPT_DOWARN)
1980 cmn_err(CE_ALERT, 1979 cmn_err(CE_ALERT,
1981 "%s : XFS dquot ID 0x%x, version 0x%x != 0x%x", 1980 "%s : XFS dquot ID 0x%x, version 0x%x != 0x%x",
1982 str, id, ddq->d_version, XFS_DQUOT_VERSION); 1981 str, id, ddq->d_version, XFS_DQUOT_VERSION);
1983 errs++; 1982 errs++;
1984 } 1983 }
1985 1984
1986 if (ddq->d_flags != XFS_DQ_USER && 1985 if (ddq->d_flags != XFS_DQ_USER &&
1987 ddq->d_flags != XFS_DQ_PROJ && 1986 ddq->d_flags != XFS_DQ_PROJ &&
1988 ddq->d_flags != XFS_DQ_GROUP) { 1987 ddq->d_flags != XFS_DQ_GROUP) {
1989 if (flags & XFS_QMOPT_DOWARN) 1988 if (flags & XFS_QMOPT_DOWARN)
1990 cmn_err(CE_ALERT, 1989 cmn_err(CE_ALERT,
1991 "%s : XFS dquot ID 0x%x, unknown flags 0x%x", 1990 "%s : XFS dquot ID 0x%x, unknown flags 0x%x",
1992 str, id, ddq->d_flags); 1991 str, id, ddq->d_flags);
1993 errs++; 1992 errs++;
1994 } 1993 }
1995 1994
1996 if (id != -1 && id != be32_to_cpu(ddq->d_id)) { 1995 if (id != -1 && id != be32_to_cpu(ddq->d_id)) {
1997 if (flags & XFS_QMOPT_DOWARN) 1996 if (flags & XFS_QMOPT_DOWARN)
1998 cmn_err(CE_ALERT, 1997 cmn_err(CE_ALERT,
1999 "%s : ondisk-dquot 0x%p, ID mismatch: " 1998 "%s : ondisk-dquot 0x%p, ID mismatch: "
2000 "0x%x expected, found id 0x%x", 1999 "0x%x expected, found id 0x%x",
2001 str, ddq, id, be32_to_cpu(ddq->d_id)); 2000 str, ddq, id, be32_to_cpu(ddq->d_id));
2002 errs++; 2001 errs++;
2003 } 2002 }
2004 2003
2005 if (!errs && ddq->d_id) { 2004 if (!errs && ddq->d_id) {
2006 if (ddq->d_blk_softlimit && 2005 if (ddq->d_blk_softlimit &&
2007 be64_to_cpu(ddq->d_bcount) >= 2006 be64_to_cpu(ddq->d_bcount) >=
2008 be64_to_cpu(ddq->d_blk_softlimit)) { 2007 be64_to_cpu(ddq->d_blk_softlimit)) {
2009 if (!ddq->d_btimer) { 2008 if (!ddq->d_btimer) {
2010 if (flags & XFS_QMOPT_DOWARN) 2009 if (flags & XFS_QMOPT_DOWARN)
2011 cmn_err(CE_ALERT, 2010 cmn_err(CE_ALERT,
2012 "%s : Dquot ID 0x%x (0x%p) " 2011 "%s : Dquot ID 0x%x (0x%p) "
2013 "BLK TIMER NOT STARTED", 2012 "BLK TIMER NOT STARTED",
2014 str, (int)be32_to_cpu(ddq->d_id), ddq); 2013 str, (int)be32_to_cpu(ddq->d_id), ddq);
2015 errs++; 2014 errs++;
2016 } 2015 }
2017 } 2016 }
2018 if (ddq->d_ino_softlimit && 2017 if (ddq->d_ino_softlimit &&
2019 be64_to_cpu(ddq->d_icount) >= 2018 be64_to_cpu(ddq->d_icount) >=
2020 be64_to_cpu(ddq->d_ino_softlimit)) { 2019 be64_to_cpu(ddq->d_ino_softlimit)) {
2021 if (!ddq->d_itimer) { 2020 if (!ddq->d_itimer) {
2022 if (flags & XFS_QMOPT_DOWARN) 2021 if (flags & XFS_QMOPT_DOWARN)
2023 cmn_err(CE_ALERT, 2022 cmn_err(CE_ALERT,
2024 "%s : Dquot ID 0x%x (0x%p) " 2023 "%s : Dquot ID 0x%x (0x%p) "
2025 "INODE TIMER NOT STARTED", 2024 "INODE TIMER NOT STARTED",
2026 str, (int)be32_to_cpu(ddq->d_id), ddq); 2025 str, (int)be32_to_cpu(ddq->d_id), ddq);
2027 errs++; 2026 errs++;
2028 } 2027 }
2029 } 2028 }
2030 if (ddq->d_rtb_softlimit && 2029 if (ddq->d_rtb_softlimit &&
2031 be64_to_cpu(ddq->d_rtbcount) >= 2030 be64_to_cpu(ddq->d_rtbcount) >=
2032 be64_to_cpu(ddq->d_rtb_softlimit)) { 2031 be64_to_cpu(ddq->d_rtb_softlimit)) {
2033 if (!ddq->d_rtbtimer) { 2032 if (!ddq->d_rtbtimer) {
2034 if (flags & XFS_QMOPT_DOWARN) 2033 if (flags & XFS_QMOPT_DOWARN)
2035 cmn_err(CE_ALERT, 2034 cmn_err(CE_ALERT,
2036 "%s : Dquot ID 0x%x (0x%p) " 2035 "%s : Dquot ID 0x%x (0x%p) "
2037 "RTBLK TIMER NOT STARTED", 2036 "RTBLK TIMER NOT STARTED",
2038 str, (int)be32_to_cpu(ddq->d_id), ddq); 2037 str, (int)be32_to_cpu(ddq->d_id), ddq);
2039 errs++; 2038 errs++;
2040 } 2039 }
2041 } 2040 }
2042 } 2041 }
2043 2042
2044 if (!errs || !(flags & XFS_QMOPT_DQREPAIR)) 2043 if (!errs || !(flags & XFS_QMOPT_DQREPAIR))
2045 return errs; 2044 return errs;
2046 2045
2047 if (flags & XFS_QMOPT_DOWARN) 2046 if (flags & XFS_QMOPT_DOWARN)
2048 cmn_err(CE_NOTE, "Re-initializing dquot ID 0x%x", id); 2047 cmn_err(CE_NOTE, "Re-initializing dquot ID 0x%x", id);
2049 2048
2050 /* 2049 /*
2051 * Typically, a repair is only requested by quotacheck. 2050 * Typically, a repair is only requested by quotacheck.
2052 */ 2051 */
2053 ASSERT(id != -1); 2052 ASSERT(id != -1);
2054 ASSERT(flags & XFS_QMOPT_DQREPAIR); 2053 ASSERT(flags & XFS_QMOPT_DQREPAIR);
2055 memset(d, 0, sizeof(xfs_dqblk_t)); 2054 memset(d, 0, sizeof(xfs_dqblk_t));
2056 2055
2057 d->dd_diskdq.d_magic = cpu_to_be16(XFS_DQUOT_MAGIC); 2056 d->dd_diskdq.d_magic = cpu_to_be16(XFS_DQUOT_MAGIC);
2058 d->dd_diskdq.d_version = XFS_DQUOT_VERSION; 2057 d->dd_diskdq.d_version = XFS_DQUOT_VERSION;
2059 d->dd_diskdq.d_flags = type; 2058 d->dd_diskdq.d_flags = type;
2060 d->dd_diskdq.d_id = cpu_to_be32(id); 2059 d->dd_diskdq.d_id = cpu_to_be32(id);
2061 2060
2062 return errs; 2061 return errs;
2063 } 2062 }
2064 2063
2065 /* 2064 /*
2066 * Perform a dquot buffer recovery. 2065 * Perform a dquot buffer recovery.
2067 * Simple algorithm: if we have found a QUOTAOFF logitem of the same type 2066 * Simple algorithm: if we have found a QUOTAOFF logitem of the same type
2068 * (ie. USR or GRP), then just toss this buffer away; don't recover it. 2067 * (ie. USR or GRP), then just toss this buffer away; don't recover it.
2069 * Else, treat it as a regular buffer and do recovery. 2068 * Else, treat it as a regular buffer and do recovery.
2070 */ 2069 */
2071 STATIC void 2070 STATIC void
2072 xlog_recover_do_dquot_buffer( 2071 xlog_recover_do_dquot_buffer(
2073 xfs_mount_t *mp, 2072 xfs_mount_t *mp,
2074 xlog_t *log, 2073 xlog_t *log,
2075 xlog_recover_item_t *item, 2074 xlog_recover_item_t *item,
2076 xfs_buf_t *bp, 2075 xfs_buf_t *bp,
2077 xfs_buf_log_format_t *buf_f) 2076 xfs_buf_log_format_t *buf_f)
2078 { 2077 {
2079 uint type; 2078 uint type;
2080 2079
2081 /* 2080 /*
2082 * Filesystems are required to send in quota flags at mount time. 2081 * Filesystems are required to send in quota flags at mount time.
2083 */ 2082 */
2084 if (mp->m_qflags == 0) { 2083 if (mp->m_qflags == 0) {
2085 return; 2084 return;
2086 } 2085 }
2087 2086
2088 type = 0; 2087 type = 0;
2089 if (buf_f->blf_flags & XFS_BLI_UDQUOT_BUF) 2088 if (buf_f->blf_flags & XFS_BLI_UDQUOT_BUF)
2090 type |= XFS_DQ_USER; 2089 type |= XFS_DQ_USER;
2091 if (buf_f->blf_flags & XFS_BLI_PDQUOT_BUF) 2090 if (buf_f->blf_flags & XFS_BLI_PDQUOT_BUF)
2092 type |= XFS_DQ_PROJ; 2091 type |= XFS_DQ_PROJ;
2093 if (buf_f->blf_flags & XFS_BLI_GDQUOT_BUF) 2092 if (buf_f->blf_flags & XFS_BLI_GDQUOT_BUF)
2094 type |= XFS_DQ_GROUP; 2093 type |= XFS_DQ_GROUP;
2095 /* 2094 /*
2096 * This type of quotas was turned off, so ignore this buffer 2095 * This type of quotas was turned off, so ignore this buffer
2097 */ 2096 */
2098 if (log->l_quotaoffs_flag & type) 2097 if (log->l_quotaoffs_flag & type)
2099 return; 2098 return;
2100 2099
2101 xlog_recover_do_reg_buffer(item, bp, buf_f); 2100 xlog_recover_do_reg_buffer(item, bp, buf_f);
2102 } 2101 }
2103 2102
2104 /* 2103 /*
2105 * This routine replays a modification made to a buffer at runtime. 2104 * This routine replays a modification made to a buffer at runtime.
2106 * There are actually two types of buffer, regular and inode, which 2105 * There are actually two types of buffer, regular and inode, which
2107 * are handled differently. Inode buffers are handled differently 2106 * are handled differently. Inode buffers are handled differently
2108 * in that we only recover a specific set of data from them, namely 2107 * in that we only recover a specific set of data from them, namely
2109 * the inode di_next_unlinked fields. This is because all other inode 2108 * the inode di_next_unlinked fields. This is because all other inode
2110 * data is actually logged via inode records and any data we replay 2109 * data is actually logged via inode records and any data we replay
2111 * here which overlaps that may be stale. 2110 * here which overlaps that may be stale.
2112 * 2111 *
2113 * When meta-data buffers are freed at run time we log a buffer item 2112 * When meta-data buffers are freed at run time we log a buffer item
2114 * with the XFS_BLI_CANCEL bit set to indicate that previous copies 2113 * with the XFS_BLI_CANCEL bit set to indicate that previous copies
2115 * of the buffer in the log should not be replayed at recovery time. 2114 * of the buffer in the log should not be replayed at recovery time.
2116 * This is so that if the blocks covered by the buffer are reused for 2115 * This is so that if the blocks covered by the buffer are reused for
2117 * file data before we crash we don't end up replaying old, freed 2116 * file data before we crash we don't end up replaying old, freed
2118 * meta-data into a user's file. 2117 * meta-data into a user's file.
2119 * 2118 *
2120 * To handle the cancellation of buffer log items, we make two passes 2119 * To handle the cancellation of buffer log items, we make two passes
2121 * over the log during recovery. During the first we build a table of 2120 * over the log during recovery. During the first we build a table of
2122 * those buffers which have been cancelled, and during the second we 2121 * those buffers which have been cancelled, and during the second we
2123 * only replay those buffers which do not have corresponding cancel 2122 * only replay those buffers which do not have corresponding cancel
2124 * records in the table. See xlog_recover_do_buffer_pass[1,2] above 2123 * records in the table. See xlog_recover_do_buffer_pass[1,2] above
2125 * for more details on the implementation of the table of cancel records. 2124 * for more details on the implementation of the table of cancel records.
2126 */ 2125 */
2127 STATIC int 2126 STATIC int
2128 xlog_recover_do_buffer_trans( 2127 xlog_recover_do_buffer_trans(
2129 xlog_t *log, 2128 xlog_t *log,
2130 xlog_recover_item_t *item, 2129 xlog_recover_item_t *item,
2131 int pass) 2130 int pass)
2132 { 2131 {
2133 xfs_buf_log_format_t *buf_f; 2132 xfs_buf_log_format_t *buf_f;
2134 xfs_mount_t *mp; 2133 xfs_mount_t *mp;
2135 xfs_buf_t *bp; 2134 xfs_buf_t *bp;
2136 int error; 2135 int error;
2137 int cancel; 2136 int cancel;
2138 xfs_daddr_t blkno; 2137 xfs_daddr_t blkno;
2139 int len; 2138 int len;
2140 ushort flags; 2139 ushort flags;
2141 2140
2142 buf_f = (xfs_buf_log_format_t *)item->ri_buf[0].i_addr; 2141 buf_f = (xfs_buf_log_format_t *)item->ri_buf[0].i_addr;
2143 2142
2144 if (pass == XLOG_RECOVER_PASS1) { 2143 if (pass == XLOG_RECOVER_PASS1) {
2145 /* 2144 /*
2146 * In this pass we're only looking for buf items 2145 * In this pass we're only looking for buf items
2147 * with the XFS_BLI_CANCEL bit set. 2146 * with the XFS_BLI_CANCEL bit set.
2148 */ 2147 */
2149 xlog_recover_do_buffer_pass1(log, buf_f); 2148 xlog_recover_do_buffer_pass1(log, buf_f);
2150 return 0; 2149 return 0;
2151 } else { 2150 } else {
2152 /* 2151 /*
2153 * In this pass we want to recover all the buffers 2152 * In this pass we want to recover all the buffers
2154 * which have not been cancelled and are not 2153 * which have not been cancelled and are not
2155 * cancellation buffers themselves. The routine 2154 * cancellation buffers themselves. The routine
2156 * we call here will tell us whether or not to 2155 * we call here will tell us whether or not to
2157 * continue with the replay of this buffer. 2156 * continue with the replay of this buffer.
2158 */ 2157 */
2159 cancel = xlog_recover_do_buffer_pass2(log, buf_f); 2158 cancel = xlog_recover_do_buffer_pass2(log, buf_f);
2160 if (cancel) { 2159 if (cancel) {
2161 return 0; 2160 return 0;
2162 } 2161 }
2163 } 2162 }
2164 switch (buf_f->blf_type) { 2163 switch (buf_f->blf_type) {
2165 case XFS_LI_BUF: 2164 case XFS_LI_BUF:
2166 blkno = buf_f->blf_blkno; 2165 blkno = buf_f->blf_blkno;
2167 len = buf_f->blf_len; 2166 len = buf_f->blf_len;
2168 flags = buf_f->blf_flags; 2167 flags = buf_f->blf_flags;
2169 break; 2168 break;
2170 default: 2169 default:
2171 xfs_fs_cmn_err(CE_ALERT, log->l_mp, 2170 xfs_fs_cmn_err(CE_ALERT, log->l_mp,
2172 "xfs_log_recover: unknown buffer type 0x%x, logdev %s", 2171 "xfs_log_recover: unknown buffer type 0x%x, logdev %s",
2173 buf_f->blf_type, log->l_mp->m_logname ? 2172 buf_f->blf_type, log->l_mp->m_logname ?
2174 log->l_mp->m_logname : "internal"); 2173 log->l_mp->m_logname : "internal");
2175 XFS_ERROR_REPORT("xlog_recover_do_buffer_trans", 2174 XFS_ERROR_REPORT("xlog_recover_do_buffer_trans",
2176 XFS_ERRLEVEL_LOW, log->l_mp); 2175 XFS_ERRLEVEL_LOW, log->l_mp);
2177 return XFS_ERROR(EFSCORRUPTED); 2176 return XFS_ERROR(EFSCORRUPTED);
2178 } 2177 }
2179 2178
2180 mp = log->l_mp; 2179 mp = log->l_mp;
2181 if (flags & XFS_BLI_INODE_BUF) { 2180 if (flags & XFS_BLI_INODE_BUF) {
2182 bp = xfs_buf_read_flags(mp->m_ddev_targp, blkno, len, 2181 bp = xfs_buf_read_flags(mp->m_ddev_targp, blkno, len,
2183 XFS_BUF_LOCK); 2182 XFS_BUF_LOCK);
2184 } else { 2183 } else {
2185 bp = xfs_buf_read(mp->m_ddev_targp, blkno, len, 0); 2184 bp = xfs_buf_read(mp->m_ddev_targp, blkno, len, 0);
2186 } 2185 }
2187 if (XFS_BUF_ISERROR(bp)) { 2186 if (XFS_BUF_ISERROR(bp)) {
2188 xfs_ioerror_alert("xlog_recover_do..(read#1)", log->l_mp, 2187 xfs_ioerror_alert("xlog_recover_do..(read#1)", log->l_mp,
2189 bp, blkno); 2188 bp, blkno);
2190 error = XFS_BUF_GETERROR(bp); 2189 error = XFS_BUF_GETERROR(bp);
2191 xfs_buf_relse(bp); 2190 xfs_buf_relse(bp);
2192 return error; 2191 return error;
2193 } 2192 }
2194 2193
2195 error = 0; 2194 error = 0;
2196 if (flags & XFS_BLI_INODE_BUF) { 2195 if (flags & XFS_BLI_INODE_BUF) {
2197 error = xlog_recover_do_inode_buffer(mp, item, bp, buf_f); 2196 error = xlog_recover_do_inode_buffer(mp, item, bp, buf_f);
2198 } else if (flags & 2197 } else if (flags &
2199 (XFS_BLI_UDQUOT_BUF|XFS_BLI_PDQUOT_BUF|XFS_BLI_GDQUOT_BUF)) { 2198 (XFS_BLI_UDQUOT_BUF|XFS_BLI_PDQUOT_BUF|XFS_BLI_GDQUOT_BUF)) {
2200 xlog_recover_do_dquot_buffer(mp, log, item, bp, buf_f); 2199 xlog_recover_do_dquot_buffer(mp, log, item, bp, buf_f);
2201 } else { 2200 } else {
2202 xlog_recover_do_reg_buffer(item, bp, buf_f); 2201 xlog_recover_do_reg_buffer(item, bp, buf_f);
2203 } 2202 }
2204 if (error) 2203 if (error)
2205 return XFS_ERROR(error); 2204 return XFS_ERROR(error);
2206 2205
2207 /* 2206 /*
2208 * Perform delayed write on the buffer. Asynchronous writes will be 2207 * Perform delayed write on the buffer. Asynchronous writes will be
2209 * slower when taking into account all the buffers to be flushed. 2208 * slower when taking into account all the buffers to be flushed.
2210 * 2209 *
2211 * Also make sure that only inode buffers with good sizes stay in 2210 * Also make sure that only inode buffers with good sizes stay in
2212 * the buffer cache. The kernel moves inodes in buffers of 1 block 2211 * the buffer cache. The kernel moves inodes in buffers of 1 block
2213 * or XFS_INODE_CLUSTER_SIZE bytes, whichever is bigger. The inode 2212 * or XFS_INODE_CLUSTER_SIZE bytes, whichever is bigger. The inode
2214 * buffers in the log can be a different size if the log was generated 2213 * buffers in the log can be a different size if the log was generated
2215 * by an older kernel using unclustered inode buffers or a newer kernel 2214 * by an older kernel using unclustered inode buffers or a newer kernel
2216 * running with a different inode cluster size. Regardless, if the 2215 * running with a different inode cluster size. Regardless, if the
2217 * the inode buffer size isn't MAX(blocksize, XFS_INODE_CLUSTER_SIZE) 2216 * the inode buffer size isn't MAX(blocksize, XFS_INODE_CLUSTER_SIZE)
2218 * for *our* value of XFS_INODE_CLUSTER_SIZE, then we need to keep 2217 * for *our* value of XFS_INODE_CLUSTER_SIZE, then we need to keep
2219 * the buffer out of the buffer cache so that the buffer won't 2218 * the buffer out of the buffer cache so that the buffer won't
2220 * overlap with future reads of those inodes. 2219 * overlap with future reads of those inodes.
2221 */ 2220 */
2222 if (XFS_DINODE_MAGIC == 2221 if (XFS_DINODE_MAGIC ==
2223 be16_to_cpu(*((__be16 *)xfs_buf_offset(bp, 0))) && 2222 be16_to_cpu(*((__be16 *)xfs_buf_offset(bp, 0))) &&
2224 (XFS_BUF_COUNT(bp) != MAX(log->l_mp->m_sb.sb_blocksize, 2223 (XFS_BUF_COUNT(bp) != MAX(log->l_mp->m_sb.sb_blocksize,
2225 (__uint32_t)XFS_INODE_CLUSTER_SIZE(log->l_mp)))) { 2224 (__uint32_t)XFS_INODE_CLUSTER_SIZE(log->l_mp)))) {
2226 XFS_BUF_STALE(bp); 2225 XFS_BUF_STALE(bp);
2227 error = xfs_bwrite(mp, bp); 2226 error = xfs_bwrite(mp, bp);
2228 } else { 2227 } else {
2229 ASSERT(XFS_BUF_FSPRIVATE(bp, void *) == NULL || 2228 ASSERT(XFS_BUF_FSPRIVATE(bp, void *) == NULL ||
2230 XFS_BUF_FSPRIVATE(bp, xfs_mount_t *) == mp); 2229 XFS_BUF_FSPRIVATE(bp, xfs_mount_t *) == mp);
2231 XFS_BUF_SET_FSPRIVATE(bp, mp); 2230 XFS_BUF_SET_FSPRIVATE(bp, mp);
2232 XFS_BUF_SET_IODONE_FUNC(bp, xlog_recover_iodone); 2231 XFS_BUF_SET_IODONE_FUNC(bp, xlog_recover_iodone);
2233 xfs_bdwrite(mp, bp); 2232 xfs_bdwrite(mp, bp);
2234 } 2233 }
2235 2234
2236 return (error); 2235 return (error);
2237 } 2236 }
2238 2237
2239 STATIC int 2238 STATIC int
2240 xlog_recover_do_inode_trans( 2239 xlog_recover_do_inode_trans(
2241 xlog_t *log, 2240 xlog_t *log,
2242 xlog_recover_item_t *item, 2241 xlog_recover_item_t *item,
2243 int pass) 2242 int pass)
2244 { 2243 {
2245 xfs_inode_log_format_t *in_f; 2244 xfs_inode_log_format_t *in_f;
2246 xfs_mount_t *mp; 2245 xfs_mount_t *mp;
2247 xfs_buf_t *bp; 2246 xfs_buf_t *bp;
2248 xfs_dinode_t *dip; 2247 xfs_dinode_t *dip;
2249 xfs_ino_t ino; 2248 xfs_ino_t ino;
2250 int len; 2249 int len;
2251 xfs_caddr_t src; 2250 xfs_caddr_t src;
2252 xfs_caddr_t dest; 2251 xfs_caddr_t dest;
2253 int error; 2252 int error;
2254 int attr_index; 2253 int attr_index;
2255 uint fields; 2254 uint fields;
2256 xfs_icdinode_t *dicp; 2255 xfs_icdinode_t *dicp;
2257 int need_free = 0; 2256 int need_free = 0;
2258 2257
2259 if (pass == XLOG_RECOVER_PASS1) { 2258 if (pass == XLOG_RECOVER_PASS1) {
2260 return 0; 2259 return 0;
2261 } 2260 }
2262 2261
2263 if (item->ri_buf[0].i_len == sizeof(xfs_inode_log_format_t)) { 2262 if (item->ri_buf[0].i_len == sizeof(xfs_inode_log_format_t)) {
2264 in_f = (xfs_inode_log_format_t *)item->ri_buf[0].i_addr; 2263 in_f = (xfs_inode_log_format_t *)item->ri_buf[0].i_addr;
2265 } else { 2264 } else {
2266 in_f = (xfs_inode_log_format_t *)kmem_alloc( 2265 in_f = (xfs_inode_log_format_t *)kmem_alloc(
2267 sizeof(xfs_inode_log_format_t), KM_SLEEP); 2266 sizeof(xfs_inode_log_format_t), KM_SLEEP);
2268 need_free = 1; 2267 need_free = 1;
2269 error = xfs_inode_item_format_convert(&item->ri_buf[0], in_f); 2268 error = xfs_inode_item_format_convert(&item->ri_buf[0], in_f);
2270 if (error) 2269 if (error)
2271 goto error; 2270 goto error;
2272 } 2271 }
2273 ino = in_f->ilf_ino; 2272 ino = in_f->ilf_ino;
2274 mp = log->l_mp; 2273 mp = log->l_mp;
2275 2274
2276 /* 2275 /*
2277 * Inode buffers can be freed, look out for it, 2276 * Inode buffers can be freed, look out for it,
2278 * and do not replay the inode. 2277 * and do not replay the inode.
2279 */ 2278 */
2280 if (xlog_check_buffer_cancelled(log, in_f->ilf_blkno, 2279 if (xlog_check_buffer_cancelled(log, in_f->ilf_blkno,
2281 in_f->ilf_len, 0)) { 2280 in_f->ilf_len, 0)) {
2282 error = 0; 2281 error = 0;
2283 goto error; 2282 goto error;
2284 } 2283 }
2285 2284
2286 bp = xfs_buf_read_flags(mp->m_ddev_targp, in_f->ilf_blkno, 2285 bp = xfs_buf_read_flags(mp->m_ddev_targp, in_f->ilf_blkno,
2287 in_f->ilf_len, XFS_BUF_LOCK); 2286 in_f->ilf_len, XFS_BUF_LOCK);
2288 if (XFS_BUF_ISERROR(bp)) { 2287 if (XFS_BUF_ISERROR(bp)) {
2289 xfs_ioerror_alert("xlog_recover_do..(read#2)", mp, 2288 xfs_ioerror_alert("xlog_recover_do..(read#2)", mp,
2290 bp, in_f->ilf_blkno); 2289 bp, in_f->ilf_blkno);
2291 error = XFS_BUF_GETERROR(bp); 2290 error = XFS_BUF_GETERROR(bp);
2292 xfs_buf_relse(bp); 2291 xfs_buf_relse(bp);
2293 goto error; 2292 goto error;
2294 } 2293 }
2295 error = 0; 2294 error = 0;
2296 ASSERT(in_f->ilf_fields & XFS_ILOG_CORE); 2295 ASSERT(in_f->ilf_fields & XFS_ILOG_CORE);
2297 dip = (xfs_dinode_t *)xfs_buf_offset(bp, in_f->ilf_boffset); 2296 dip = (xfs_dinode_t *)xfs_buf_offset(bp, in_f->ilf_boffset);
2298 2297
2299 /* 2298 /*
2300 * Make sure the place we're flushing out to really looks 2299 * Make sure the place we're flushing out to really looks
2301 * like an inode! 2300 * like an inode!
2302 */ 2301 */
2303 if (unlikely(be16_to_cpu(dip->di_magic) != XFS_DINODE_MAGIC)) { 2302 if (unlikely(be16_to_cpu(dip->di_magic) != XFS_DINODE_MAGIC)) {
2304 xfs_buf_relse(bp); 2303 xfs_buf_relse(bp);
2305 xfs_fs_cmn_err(CE_ALERT, mp, 2304 xfs_fs_cmn_err(CE_ALERT, mp,
2306 "xfs_inode_recover: Bad inode magic number, dino ptr = 0x%p, dino bp = 0x%p, ino = %Ld", 2305 "xfs_inode_recover: Bad inode magic number, dino ptr = 0x%p, dino bp = 0x%p, ino = %Ld",
2307 dip, bp, ino); 2306 dip, bp, ino);
2308 XFS_ERROR_REPORT("xlog_recover_do_inode_trans(1)", 2307 XFS_ERROR_REPORT("xlog_recover_do_inode_trans(1)",
2309 XFS_ERRLEVEL_LOW, mp); 2308 XFS_ERRLEVEL_LOW, mp);
2310 error = EFSCORRUPTED; 2309 error = EFSCORRUPTED;
2311 goto error; 2310 goto error;
2312 } 2311 }
2313 dicp = (xfs_icdinode_t *)(item->ri_buf[1].i_addr); 2312 dicp = (xfs_icdinode_t *)(item->ri_buf[1].i_addr);
2314 if (unlikely(dicp->di_magic != XFS_DINODE_MAGIC)) { 2313 if (unlikely(dicp->di_magic != XFS_DINODE_MAGIC)) {
2315 xfs_buf_relse(bp); 2314 xfs_buf_relse(bp);
2316 xfs_fs_cmn_err(CE_ALERT, mp, 2315 xfs_fs_cmn_err(CE_ALERT, mp,
2317 "xfs_inode_recover: Bad inode log record, rec ptr 0x%p, ino %Ld", 2316 "xfs_inode_recover: Bad inode log record, rec ptr 0x%p, ino %Ld",
2318 item, ino); 2317 item, ino);
2319 XFS_ERROR_REPORT("xlog_recover_do_inode_trans(2)", 2318 XFS_ERROR_REPORT("xlog_recover_do_inode_trans(2)",
2320 XFS_ERRLEVEL_LOW, mp); 2319 XFS_ERRLEVEL_LOW, mp);
2321 error = EFSCORRUPTED; 2320 error = EFSCORRUPTED;
2322 goto error; 2321 goto error;
2323 } 2322 }
2324 2323
2325 /* Skip replay when the on disk inode is newer than the log one */ 2324 /* Skip replay when the on disk inode is newer than the log one */
2326 if (dicp->di_flushiter < be16_to_cpu(dip->di_flushiter)) { 2325 if (dicp->di_flushiter < be16_to_cpu(dip->di_flushiter)) {
2327 /* 2326 /*
2328 * Deal with the wrap case, DI_MAX_FLUSH is less 2327 * Deal with the wrap case, DI_MAX_FLUSH is less
2329 * than smaller numbers 2328 * than smaller numbers
2330 */ 2329 */
2331 if (be16_to_cpu(dip->di_flushiter) == DI_MAX_FLUSH && 2330 if (be16_to_cpu(dip->di_flushiter) == DI_MAX_FLUSH &&
2332 dicp->di_flushiter < (DI_MAX_FLUSH >> 1)) { 2331 dicp->di_flushiter < (DI_MAX_FLUSH >> 1)) {
2333 /* do nothing */ 2332 /* do nothing */
2334 } else { 2333 } else {
2335 xfs_buf_relse(bp); 2334 xfs_buf_relse(bp);
2336 error = 0; 2335 error = 0;
2337 goto error; 2336 goto error;
2338 } 2337 }
2339 } 2338 }
2340 /* Take the opportunity to reset the flush iteration count */ 2339 /* Take the opportunity to reset the flush iteration count */
2341 dicp->di_flushiter = 0; 2340 dicp->di_flushiter = 0;
2342 2341
2343 if (unlikely((dicp->di_mode & S_IFMT) == S_IFREG)) { 2342 if (unlikely((dicp->di_mode & S_IFMT) == S_IFREG)) {
2344 if ((dicp->di_format != XFS_DINODE_FMT_EXTENTS) && 2343 if ((dicp->di_format != XFS_DINODE_FMT_EXTENTS) &&
2345 (dicp->di_format != XFS_DINODE_FMT_BTREE)) { 2344 (dicp->di_format != XFS_DINODE_FMT_BTREE)) {
2346 XFS_CORRUPTION_ERROR("xlog_recover_do_inode_trans(3)", 2345 XFS_CORRUPTION_ERROR("xlog_recover_do_inode_trans(3)",
2347 XFS_ERRLEVEL_LOW, mp, dicp); 2346 XFS_ERRLEVEL_LOW, mp, dicp);
2348 xfs_buf_relse(bp); 2347 xfs_buf_relse(bp);
2349 xfs_fs_cmn_err(CE_ALERT, mp, 2348 xfs_fs_cmn_err(CE_ALERT, mp,
2350 "xfs_inode_recover: Bad regular inode log record, rec ptr 0x%p, ino ptr = 0x%p, ino bp = 0x%p, ino %Ld", 2349 "xfs_inode_recover: Bad regular inode log record, rec ptr 0x%p, ino ptr = 0x%p, ino bp = 0x%p, ino %Ld",
2351 item, dip, bp, ino); 2350 item, dip, bp, ino);
2352 error = EFSCORRUPTED; 2351 error = EFSCORRUPTED;
2353 goto error; 2352 goto error;
2354 } 2353 }
2355 } else if (unlikely((dicp->di_mode & S_IFMT) == S_IFDIR)) { 2354 } else if (unlikely((dicp->di_mode & S_IFMT) == S_IFDIR)) {
2356 if ((dicp->di_format != XFS_DINODE_FMT_EXTENTS) && 2355 if ((dicp->di_format != XFS_DINODE_FMT_EXTENTS) &&
2357 (dicp->di_format != XFS_DINODE_FMT_BTREE) && 2356 (dicp->di_format != XFS_DINODE_FMT_BTREE) &&
2358 (dicp->di_format != XFS_DINODE_FMT_LOCAL)) { 2357 (dicp->di_format != XFS_DINODE_FMT_LOCAL)) {
2359 XFS_CORRUPTION_ERROR("xlog_recover_do_inode_trans(4)", 2358 XFS_CORRUPTION_ERROR("xlog_recover_do_inode_trans(4)",
2360 XFS_ERRLEVEL_LOW, mp, dicp); 2359 XFS_ERRLEVEL_LOW, mp, dicp);
2361 xfs_buf_relse(bp); 2360 xfs_buf_relse(bp);
2362 xfs_fs_cmn_err(CE_ALERT, mp, 2361 xfs_fs_cmn_err(CE_ALERT, mp,
2363 "xfs_inode_recover: Bad dir inode log record, rec ptr 0x%p, ino ptr = 0x%p, ino bp = 0x%p, ino %Ld", 2362 "xfs_inode_recover: Bad dir inode log record, rec ptr 0x%p, ino ptr = 0x%p, ino bp = 0x%p, ino %Ld",
2364 item, dip, bp, ino); 2363 item, dip, bp, ino);
2365 error = EFSCORRUPTED; 2364 error = EFSCORRUPTED;
2366 goto error; 2365 goto error;
2367 } 2366 }
2368 } 2367 }
2369 if (unlikely(dicp->di_nextents + dicp->di_anextents > dicp->di_nblocks)){ 2368 if (unlikely(dicp->di_nextents + dicp->di_anextents > dicp->di_nblocks)){
2370 XFS_CORRUPTION_ERROR("xlog_recover_do_inode_trans(5)", 2369 XFS_CORRUPTION_ERROR("xlog_recover_do_inode_trans(5)",
2371 XFS_ERRLEVEL_LOW, mp, dicp); 2370 XFS_ERRLEVEL_LOW, mp, dicp);
2372 xfs_buf_relse(bp); 2371 xfs_buf_relse(bp);
2373 xfs_fs_cmn_err(CE_ALERT, mp, 2372 xfs_fs_cmn_err(CE_ALERT, mp,
2374 "xfs_inode_recover: Bad inode log record, rec ptr 0x%p, dino ptr 0x%p, dino bp 0x%p, ino %Ld, total extents = %d, nblocks = %Ld", 2373 "xfs_inode_recover: Bad inode log record, rec ptr 0x%p, dino ptr 0x%p, dino bp 0x%p, ino %Ld, total extents = %d, nblocks = %Ld",
2375 item, dip, bp, ino, 2374 item, dip, bp, ino,
2376 dicp->di_nextents + dicp->di_anextents, 2375 dicp->di_nextents + dicp->di_anextents,
2377 dicp->di_nblocks); 2376 dicp->di_nblocks);
2378 error = EFSCORRUPTED; 2377 error = EFSCORRUPTED;
2379 goto error; 2378 goto error;
2380 } 2379 }
2381 if (unlikely(dicp->di_forkoff > mp->m_sb.sb_inodesize)) { 2380 if (unlikely(dicp->di_forkoff > mp->m_sb.sb_inodesize)) {
2382 XFS_CORRUPTION_ERROR("xlog_recover_do_inode_trans(6)", 2381 XFS_CORRUPTION_ERROR("xlog_recover_do_inode_trans(6)",
2383 XFS_ERRLEVEL_LOW, mp, dicp); 2382 XFS_ERRLEVEL_LOW, mp, dicp);
2384 xfs_buf_relse(bp); 2383 xfs_buf_relse(bp);
2385 xfs_fs_cmn_err(CE_ALERT, mp, 2384 xfs_fs_cmn_err(CE_ALERT, mp,
2386 "xfs_inode_recover: Bad inode log rec ptr 0x%p, dino ptr 0x%p, dino bp 0x%p, ino %Ld, forkoff 0x%x", 2385 "xfs_inode_recover: Bad inode log rec ptr 0x%p, dino ptr 0x%p, dino bp 0x%p, ino %Ld, forkoff 0x%x",
2387 item, dip, bp, ino, dicp->di_forkoff); 2386 item, dip, bp, ino, dicp->di_forkoff);
2388 error = EFSCORRUPTED; 2387 error = EFSCORRUPTED;
2389 goto error; 2388 goto error;
2390 } 2389 }
2391 if (unlikely(item->ri_buf[1].i_len > sizeof(struct xfs_icdinode))) { 2390 if (unlikely(item->ri_buf[1].i_len > sizeof(struct xfs_icdinode))) {
2392 XFS_CORRUPTION_ERROR("xlog_recover_do_inode_trans(7)", 2391 XFS_CORRUPTION_ERROR("xlog_recover_do_inode_trans(7)",
2393 XFS_ERRLEVEL_LOW, mp, dicp); 2392 XFS_ERRLEVEL_LOW, mp, dicp);
2394 xfs_buf_relse(bp); 2393 xfs_buf_relse(bp);
2395 xfs_fs_cmn_err(CE_ALERT, mp, 2394 xfs_fs_cmn_err(CE_ALERT, mp,
2396 "xfs_inode_recover: Bad inode log record length %d, rec ptr 0x%p", 2395 "xfs_inode_recover: Bad inode log record length %d, rec ptr 0x%p",
2397 item->ri_buf[1].i_len, item); 2396 item->ri_buf[1].i_len, item);
2398 error = EFSCORRUPTED; 2397 error = EFSCORRUPTED;
2399 goto error; 2398 goto error;
2400 } 2399 }
2401 2400
2402 /* The core is in in-core format */ 2401 /* The core is in in-core format */
2403 xfs_dinode_to_disk(dip, (xfs_icdinode_t *)item->ri_buf[1].i_addr); 2402 xfs_dinode_to_disk(dip, (xfs_icdinode_t *)item->ri_buf[1].i_addr);
2404 2403
2405 /* the rest is in on-disk format */ 2404 /* the rest is in on-disk format */
2406 if (item->ri_buf[1].i_len > sizeof(struct xfs_icdinode)) { 2405 if (item->ri_buf[1].i_len > sizeof(struct xfs_icdinode)) {
2407 memcpy((xfs_caddr_t) dip + sizeof(struct xfs_icdinode), 2406 memcpy((xfs_caddr_t) dip + sizeof(struct xfs_icdinode),
2408 item->ri_buf[1].i_addr + sizeof(struct xfs_icdinode), 2407 item->ri_buf[1].i_addr + sizeof(struct xfs_icdinode),
2409 item->ri_buf[1].i_len - sizeof(struct xfs_icdinode)); 2408 item->ri_buf[1].i_len - sizeof(struct xfs_icdinode));
2410 } 2409 }
2411 2410
2412 fields = in_f->ilf_fields; 2411 fields = in_f->ilf_fields;
2413 switch (fields & (XFS_ILOG_DEV | XFS_ILOG_UUID)) { 2412 switch (fields & (XFS_ILOG_DEV | XFS_ILOG_UUID)) {
2414 case XFS_ILOG_DEV: 2413 case XFS_ILOG_DEV:
2415 xfs_dinode_put_rdev(dip, in_f->ilf_u.ilfu_rdev); 2414 xfs_dinode_put_rdev(dip, in_f->ilf_u.ilfu_rdev);
2416 break; 2415 break;
2417 case XFS_ILOG_UUID: 2416 case XFS_ILOG_UUID:
2418 memcpy(XFS_DFORK_DPTR(dip), 2417 memcpy(XFS_DFORK_DPTR(dip),
2419 &in_f->ilf_u.ilfu_uuid, 2418 &in_f->ilf_u.ilfu_uuid,
2420 sizeof(uuid_t)); 2419 sizeof(uuid_t));
2421 break; 2420 break;
2422 } 2421 }
2423 2422
2424 if (in_f->ilf_size == 2) 2423 if (in_f->ilf_size == 2)
2425 goto write_inode_buffer; 2424 goto write_inode_buffer;
2426 len = item->ri_buf[2].i_len; 2425 len = item->ri_buf[2].i_len;
2427 src = item->ri_buf[2].i_addr; 2426 src = item->ri_buf[2].i_addr;
2428 ASSERT(in_f->ilf_size <= 4); 2427 ASSERT(in_f->ilf_size <= 4);
2429 ASSERT((in_f->ilf_size == 3) || (fields & XFS_ILOG_AFORK)); 2428 ASSERT((in_f->ilf_size == 3) || (fields & XFS_ILOG_AFORK));
2430 ASSERT(!(fields & XFS_ILOG_DFORK) || 2429 ASSERT(!(fields & XFS_ILOG_DFORK) ||
2431 (len == in_f->ilf_dsize)); 2430 (len == in_f->ilf_dsize));
2432 2431
2433 switch (fields & XFS_ILOG_DFORK) { 2432 switch (fields & XFS_ILOG_DFORK) {
2434 case XFS_ILOG_DDATA: 2433 case XFS_ILOG_DDATA:
2435 case XFS_ILOG_DEXT: 2434 case XFS_ILOG_DEXT:
2436 memcpy(XFS_DFORK_DPTR(dip), src, len); 2435 memcpy(XFS_DFORK_DPTR(dip), src, len);
2437 break; 2436 break;
2438 2437
2439 case XFS_ILOG_DBROOT: 2438 case XFS_ILOG_DBROOT:
2440 xfs_bmbt_to_bmdr(mp, (struct xfs_btree_block *)src, len, 2439 xfs_bmbt_to_bmdr(mp, (struct xfs_btree_block *)src, len,
2441 (xfs_bmdr_block_t *)XFS_DFORK_DPTR(dip), 2440 (xfs_bmdr_block_t *)XFS_DFORK_DPTR(dip),
2442 XFS_DFORK_DSIZE(dip, mp)); 2441 XFS_DFORK_DSIZE(dip, mp));
2443 break; 2442 break;
2444 2443
2445 default: 2444 default:
2446 /* 2445 /*
2447 * There are no data fork flags set. 2446 * There are no data fork flags set.
2448 */ 2447 */
2449 ASSERT((fields & XFS_ILOG_DFORK) == 0); 2448 ASSERT((fields & XFS_ILOG_DFORK) == 0);
2450 break; 2449 break;
2451 } 2450 }
2452 2451
2453 /* 2452 /*
2454 * If we logged any attribute data, recover it. There may or 2453 * If we logged any attribute data, recover it. There may or
2455 * may not have been any other non-core data logged in this 2454 * may not have been any other non-core data logged in this
2456 * transaction. 2455 * transaction.
2457 */ 2456 */
2458 if (in_f->ilf_fields & XFS_ILOG_AFORK) { 2457 if (in_f->ilf_fields & XFS_ILOG_AFORK) {
2459 if (in_f->ilf_fields & XFS_ILOG_DFORK) { 2458 if (in_f->ilf_fields & XFS_ILOG_DFORK) {
2460 attr_index = 3; 2459 attr_index = 3;
2461 } else { 2460 } else {
2462 attr_index = 2; 2461 attr_index = 2;
2463 } 2462 }
2464 len = item->ri_buf[attr_index].i_len; 2463 len = item->ri_buf[attr_index].i_len;
2465 src = item->ri_buf[attr_index].i_addr; 2464 src = item->ri_buf[attr_index].i_addr;
2466 ASSERT(len == in_f->ilf_asize); 2465 ASSERT(len == in_f->ilf_asize);
2467 2466
2468 switch (in_f->ilf_fields & XFS_ILOG_AFORK) { 2467 switch (in_f->ilf_fields & XFS_ILOG_AFORK) {
2469 case XFS_ILOG_ADATA: 2468 case XFS_ILOG_ADATA:
2470 case XFS_ILOG_AEXT: 2469 case XFS_ILOG_AEXT:
2471 dest = XFS_DFORK_APTR(dip); 2470 dest = XFS_DFORK_APTR(dip);
2472 ASSERT(len <= XFS_DFORK_ASIZE(dip, mp)); 2471 ASSERT(len <= XFS_DFORK_ASIZE(dip, mp));
2473 memcpy(dest, src, len); 2472 memcpy(dest, src, len);
2474 break; 2473 break;
2475 2474
2476 case XFS_ILOG_ABROOT: 2475 case XFS_ILOG_ABROOT:
2477 dest = XFS_DFORK_APTR(dip); 2476 dest = XFS_DFORK_APTR(dip);
2478 xfs_bmbt_to_bmdr(mp, (struct xfs_btree_block *)src, 2477 xfs_bmbt_to_bmdr(mp, (struct xfs_btree_block *)src,
2479 len, (xfs_bmdr_block_t*)dest, 2478 len, (xfs_bmdr_block_t*)dest,
2480 XFS_DFORK_ASIZE(dip, mp)); 2479 XFS_DFORK_ASIZE(dip, mp));
2481 break; 2480 break;
2482 2481
2483 default: 2482 default:
2484 xlog_warn("XFS: xlog_recover_do_inode_trans: Invalid flag"); 2483 xlog_warn("XFS: xlog_recover_do_inode_trans: Invalid flag");
2485 ASSERT(0); 2484 ASSERT(0);
2486 xfs_buf_relse(bp); 2485 xfs_buf_relse(bp);
2487 error = EIO; 2486 error = EIO;
2488 goto error; 2487 goto error;
2489 } 2488 }
2490 } 2489 }
2491 2490
2492 write_inode_buffer: 2491 write_inode_buffer:
2493 if (ITEM_TYPE(item) == XFS_LI_INODE) { 2492 if (ITEM_TYPE(item) == XFS_LI_INODE) {
2494 ASSERT(XFS_BUF_FSPRIVATE(bp, void *) == NULL || 2493 ASSERT(XFS_BUF_FSPRIVATE(bp, void *) == NULL ||
2495 XFS_BUF_FSPRIVATE(bp, xfs_mount_t *) == mp); 2494 XFS_BUF_FSPRIVATE(bp, xfs_mount_t *) == mp);
2496 XFS_BUF_SET_FSPRIVATE(bp, mp); 2495 XFS_BUF_SET_FSPRIVATE(bp, mp);
2497 XFS_BUF_SET_IODONE_FUNC(bp, xlog_recover_iodone); 2496 XFS_BUF_SET_IODONE_FUNC(bp, xlog_recover_iodone);
2498 xfs_bdwrite(mp, bp); 2497 xfs_bdwrite(mp, bp);
2499 } else { 2498 } else {
2500 XFS_BUF_STALE(bp); 2499 XFS_BUF_STALE(bp);
2501 error = xfs_bwrite(mp, bp); 2500 error = xfs_bwrite(mp, bp);
2502 } 2501 }
2503 2502
2504 error: 2503 error:
2505 if (need_free) 2504 if (need_free)
2506 kmem_free(in_f); 2505 kmem_free(in_f);
2507 return XFS_ERROR(error); 2506 return XFS_ERROR(error);
2508 } 2507 }
2509 2508
2510 /* 2509 /*
2511 * Recover QUOTAOFF records. We simply make a note of it in the xlog_t 2510 * Recover QUOTAOFF records. We simply make a note of it in the xlog_t
2512 * structure, so that we know not to do any dquot item or dquot buffer recovery, 2511 * structure, so that we know not to do any dquot item or dquot buffer recovery,
2513 * of that type. 2512 * of that type.
2514 */ 2513 */
2515 STATIC int 2514 STATIC int
2516 xlog_recover_do_quotaoff_trans( 2515 xlog_recover_do_quotaoff_trans(
2517 xlog_t *log, 2516 xlog_t *log,
2518 xlog_recover_item_t *item, 2517 xlog_recover_item_t *item,
2519 int pass) 2518 int pass)
2520 { 2519 {
2521 xfs_qoff_logformat_t *qoff_f; 2520 xfs_qoff_logformat_t *qoff_f;
2522 2521
2523 if (pass == XLOG_RECOVER_PASS2) { 2522 if (pass == XLOG_RECOVER_PASS2) {
2524 return (0); 2523 return (0);
2525 } 2524 }
2526 2525
2527 qoff_f = (xfs_qoff_logformat_t *)item->ri_buf[0].i_addr; 2526 qoff_f = (xfs_qoff_logformat_t *)item->ri_buf[0].i_addr;
2528 ASSERT(qoff_f); 2527 ASSERT(qoff_f);
2529 2528
2530 /* 2529 /*
2531 * The logitem format's flag tells us if this was user quotaoff, 2530 * The logitem format's flag tells us if this was user quotaoff,
2532 * group/project quotaoff or both. 2531 * group/project quotaoff or both.
2533 */ 2532 */
2534 if (qoff_f->qf_flags & XFS_UQUOTA_ACCT) 2533 if (qoff_f->qf_flags & XFS_UQUOTA_ACCT)
2535 log->l_quotaoffs_flag |= XFS_DQ_USER; 2534 log->l_quotaoffs_flag |= XFS_DQ_USER;
2536 if (qoff_f->qf_flags & XFS_PQUOTA_ACCT) 2535 if (qoff_f->qf_flags & XFS_PQUOTA_ACCT)
2537 log->l_quotaoffs_flag |= XFS_DQ_PROJ; 2536 log->l_quotaoffs_flag |= XFS_DQ_PROJ;
2538 if (qoff_f->qf_flags & XFS_GQUOTA_ACCT) 2537 if (qoff_f->qf_flags & XFS_GQUOTA_ACCT)
2539 log->l_quotaoffs_flag |= XFS_DQ_GROUP; 2538 log->l_quotaoffs_flag |= XFS_DQ_GROUP;
2540 2539
2541 return (0); 2540 return (0);
2542 } 2541 }
2543 2542
2544 /* 2543 /*
2545 * Recover a dquot record 2544 * Recover a dquot record
2546 */ 2545 */
2547 STATIC int 2546 STATIC int
2548 xlog_recover_do_dquot_trans( 2547 xlog_recover_do_dquot_trans(
2549 xlog_t *log, 2548 xlog_t *log,
2550 xlog_recover_item_t *item, 2549 xlog_recover_item_t *item,
2551 int pass) 2550 int pass)
2552 { 2551 {
2553 xfs_mount_t *mp; 2552 xfs_mount_t *mp;
2554 xfs_buf_t *bp; 2553 xfs_buf_t *bp;
2555 struct xfs_disk_dquot *ddq, *recddq; 2554 struct xfs_disk_dquot *ddq, *recddq;
2556 int error; 2555 int error;
2557 xfs_dq_logformat_t *dq_f; 2556 xfs_dq_logformat_t *dq_f;
2558 uint type; 2557 uint type;
2559 2558
2560 if (pass == XLOG_RECOVER_PASS1) { 2559 if (pass == XLOG_RECOVER_PASS1) {
2561 return 0; 2560 return 0;
2562 } 2561 }
2563 mp = log->l_mp; 2562 mp = log->l_mp;
2564 2563
2565 /* 2564 /*
2566 * Filesystems are required to send in quota flags at mount time. 2565 * Filesystems are required to send in quota flags at mount time.
2567 */ 2566 */
2568 if (mp->m_qflags == 0) 2567 if (mp->m_qflags == 0)
2569 return (0); 2568 return (0);
2570 2569
2571 recddq = (xfs_disk_dquot_t *)item->ri_buf[1].i_addr; 2570 recddq = (xfs_disk_dquot_t *)item->ri_buf[1].i_addr;
2572 ASSERT(recddq); 2571 ASSERT(recddq);
2573 /* 2572 /*
2574 * This type of quotas was turned off, so ignore this record. 2573 * This type of quotas was turned off, so ignore this record.
2575 */ 2574 */
2576 type = recddq->d_flags & (XFS_DQ_USER | XFS_DQ_PROJ | XFS_DQ_GROUP); 2575 type = recddq->d_flags & (XFS_DQ_USER | XFS_DQ_PROJ | XFS_DQ_GROUP);
2577 ASSERT(type); 2576 ASSERT(type);
2578 if (log->l_quotaoffs_flag & type) 2577 if (log->l_quotaoffs_flag & type)
2579 return (0); 2578 return (0);
2580 2579
2581 /* 2580 /*
2582 * At this point we know that quota was _not_ turned off. 2581 * At this point we know that quota was _not_ turned off.
2583 * Since the mount flags are not indicating to us otherwise, this 2582 * Since the mount flags are not indicating to us otherwise, this
2584 * must mean that quota is on, and the dquot needs to be replayed. 2583 * must mean that quota is on, and the dquot needs to be replayed.
2585 * Remember that we may not have fully recovered the superblock yet, 2584 * Remember that we may not have fully recovered the superblock yet,
2586 * so we can't do the usual trick of looking at the SB quota bits. 2585 * so we can't do the usual trick of looking at the SB quota bits.
2587 * 2586 *
2588 * The other possibility, of course, is that the quota subsystem was 2587 * The other possibility, of course, is that the quota subsystem was
2589 * removed since the last mount - ENOSYS. 2588 * removed since the last mount - ENOSYS.
2590 */ 2589 */
2591 dq_f = (xfs_dq_logformat_t *)item->ri_buf[0].i_addr; 2590 dq_f = (xfs_dq_logformat_t *)item->ri_buf[0].i_addr;
2592 ASSERT(dq_f); 2591 ASSERT(dq_f);
2593 if ((error = xfs_qm_dqcheck(recddq, 2592 if ((error = xfs_qm_dqcheck(recddq,
2594 dq_f->qlf_id, 2593 dq_f->qlf_id,
2595 0, XFS_QMOPT_DOWARN, 2594 0, XFS_QMOPT_DOWARN,
2596 "xlog_recover_do_dquot_trans (log copy)"))) { 2595 "xlog_recover_do_dquot_trans (log copy)"))) {
2597 return XFS_ERROR(EIO); 2596 return XFS_ERROR(EIO);
2598 } 2597 }
2599 ASSERT(dq_f->qlf_len == 1); 2598 ASSERT(dq_f->qlf_len == 1);
2600 2599
2601 error = xfs_read_buf(mp, mp->m_ddev_targp, 2600 error = xfs_read_buf(mp, mp->m_ddev_targp,
2602 dq_f->qlf_blkno, 2601 dq_f->qlf_blkno,
2603 XFS_FSB_TO_BB(mp, dq_f->qlf_len), 2602 XFS_FSB_TO_BB(mp, dq_f->qlf_len),
2604 0, &bp); 2603 0, &bp);
2605 if (error) { 2604 if (error) {
2606 xfs_ioerror_alert("xlog_recover_do..(read#3)", mp, 2605 xfs_ioerror_alert("xlog_recover_do..(read#3)", mp,
2607 bp, dq_f->qlf_blkno); 2606 bp, dq_f->qlf_blkno);
2608 return error; 2607 return error;
2609 } 2608 }
2610 ASSERT(bp); 2609 ASSERT(bp);
2611 ddq = (xfs_disk_dquot_t *)xfs_buf_offset(bp, dq_f->qlf_boffset); 2610 ddq = (xfs_disk_dquot_t *)xfs_buf_offset(bp, dq_f->qlf_boffset);
2612 2611
2613 /* 2612 /*
2614 * At least the magic num portion should be on disk because this 2613 * At least the magic num portion should be on disk because this
2615 * was among a chunk of dquots created earlier, and we did some 2614 * was among a chunk of dquots created earlier, and we did some
2616 * minimal initialization then. 2615 * minimal initialization then.
2617 */ 2616 */
2618 if (xfs_qm_dqcheck(ddq, dq_f->qlf_id, 0, XFS_QMOPT_DOWARN, 2617 if (xfs_qm_dqcheck(ddq, dq_f->qlf_id, 0, XFS_QMOPT_DOWARN,
2619 "xlog_recover_do_dquot_trans")) { 2618 "xlog_recover_do_dquot_trans")) {
2620 xfs_buf_relse(bp); 2619 xfs_buf_relse(bp);
2621 return XFS_ERROR(EIO); 2620 return XFS_ERROR(EIO);
2622 } 2621 }
2623 2622
2624 memcpy(ddq, recddq, item->ri_buf[1].i_len); 2623 memcpy(ddq, recddq, item->ri_buf[1].i_len);
2625 2624
2626 ASSERT(dq_f->qlf_size == 2); 2625 ASSERT(dq_f->qlf_size == 2);
2627 ASSERT(XFS_BUF_FSPRIVATE(bp, void *) == NULL || 2626 ASSERT(XFS_BUF_FSPRIVATE(bp, void *) == NULL ||
2628 XFS_BUF_FSPRIVATE(bp, xfs_mount_t *) == mp); 2627 XFS_BUF_FSPRIVATE(bp, xfs_mount_t *) == mp);
2629 XFS_BUF_SET_FSPRIVATE(bp, mp); 2628 XFS_BUF_SET_FSPRIVATE(bp, mp);
2630 XFS_BUF_SET_IODONE_FUNC(bp, xlog_recover_iodone); 2629 XFS_BUF_SET_IODONE_FUNC(bp, xlog_recover_iodone);
2631 xfs_bdwrite(mp, bp); 2630 xfs_bdwrite(mp, bp);
2632 2631
2633 return (0); 2632 return (0);
2634 } 2633 }
2635 2634
2636 /* 2635 /*
2637 * This routine is called to create an in-core extent free intent 2636 * This routine is called to create an in-core extent free intent
2638 * item from the efi format structure which was logged on disk. 2637 * item from the efi format structure which was logged on disk.
2639 * It allocates an in-core efi, copies the extents from the format 2638 * It allocates an in-core efi, copies the extents from the format
2640 * structure into it, and adds the efi to the AIL with the given 2639 * structure into it, and adds the efi to the AIL with the given
2641 * LSN. 2640 * LSN.
2642 */ 2641 */
2643 STATIC int 2642 STATIC int
2644 xlog_recover_do_efi_trans( 2643 xlog_recover_do_efi_trans(
2645 xlog_t *log, 2644 xlog_t *log,
2646 xlog_recover_item_t *item, 2645 xlog_recover_item_t *item,
2647 xfs_lsn_t lsn, 2646 xfs_lsn_t lsn,
2648 int pass) 2647 int pass)
2649 { 2648 {
2650 int error; 2649 int error;
2651 xfs_mount_t *mp; 2650 xfs_mount_t *mp;
2652 xfs_efi_log_item_t *efip; 2651 xfs_efi_log_item_t *efip;
2653 xfs_efi_log_format_t *efi_formatp; 2652 xfs_efi_log_format_t *efi_formatp;
2654 2653
2655 if (pass == XLOG_RECOVER_PASS1) { 2654 if (pass == XLOG_RECOVER_PASS1) {
2656 return 0; 2655 return 0;
2657 } 2656 }
2658 2657
2659 efi_formatp = (xfs_efi_log_format_t *)item->ri_buf[0].i_addr; 2658 efi_formatp = (xfs_efi_log_format_t *)item->ri_buf[0].i_addr;
2660 2659
2661 mp = log->l_mp; 2660 mp = log->l_mp;
2662 efip = xfs_efi_init(mp, efi_formatp->efi_nextents); 2661 efip = xfs_efi_init(mp, efi_formatp->efi_nextents);
2663 if ((error = xfs_efi_copy_format(&(item->ri_buf[0]), 2662 if ((error = xfs_efi_copy_format(&(item->ri_buf[0]),
2664 &(efip->efi_format)))) { 2663 &(efip->efi_format)))) {
2665 xfs_efi_item_free(efip); 2664 xfs_efi_item_free(efip);
2666 return error; 2665 return error;
2667 } 2666 }
2668 efip->efi_next_extent = efi_formatp->efi_nextents; 2667 efip->efi_next_extent = efi_formatp->efi_nextents;
2669 efip->efi_flags |= XFS_EFI_COMMITTED; 2668 efip->efi_flags |= XFS_EFI_COMMITTED;
2670 2669
2671 spin_lock(&log->l_ailp->xa_lock); 2670 spin_lock(&log->l_ailp->xa_lock);
2672 /* 2671 /*
2673 * xfs_trans_ail_update() drops the AIL lock. 2672 * xfs_trans_ail_update() drops the AIL lock.
2674 */ 2673 */
2675 xfs_trans_ail_update(log->l_ailp, (xfs_log_item_t *)efip, lsn); 2674 xfs_trans_ail_update(log->l_ailp, (xfs_log_item_t *)efip, lsn);
2676 return 0; 2675 return 0;
2677 } 2676 }
2678 2677
2679 2678
2680 /* 2679 /*
2681 * This routine is called when an efd format structure is found in 2680 * This routine is called when an efd format structure is found in
2682 * a committed transaction in the log. It's purpose is to cancel 2681 * a committed transaction in the log. It's purpose is to cancel
2683 * the corresponding efi if it was still in the log. To do this 2682 * the corresponding efi if it was still in the log. To do this
2684 * it searches the AIL for the efi with an id equal to that in the 2683 * it searches the AIL for the efi with an id equal to that in the
2685 * efd format structure. If we find it, we remove the efi from the 2684 * efd format structure. If we find it, we remove the efi from the
2686 * AIL and free it. 2685 * AIL and free it.
2687 */ 2686 */
2688 STATIC void 2687 STATIC void
2689 xlog_recover_do_efd_trans( 2688 xlog_recover_do_efd_trans(
2690 xlog_t *log, 2689 xlog_t *log,
2691 xlog_recover_item_t *item, 2690 xlog_recover_item_t *item,
2692 int pass) 2691 int pass)
2693 { 2692 {
2694 xfs_efd_log_format_t *efd_formatp; 2693 xfs_efd_log_format_t *efd_formatp;
2695 xfs_efi_log_item_t *efip = NULL; 2694 xfs_efi_log_item_t *efip = NULL;
2696 xfs_log_item_t *lip; 2695 xfs_log_item_t *lip;
2697 __uint64_t efi_id; 2696 __uint64_t efi_id;
2698 struct xfs_ail_cursor cur; 2697 struct xfs_ail_cursor cur;
2699 struct xfs_ail *ailp = log->l_ailp; 2698 struct xfs_ail *ailp = log->l_ailp;
2700 2699
2701 if (pass == XLOG_RECOVER_PASS1) { 2700 if (pass == XLOG_RECOVER_PASS1) {
2702 return; 2701 return;
2703 } 2702 }
2704 2703
2705 efd_formatp = (xfs_efd_log_format_t *)item->ri_buf[0].i_addr; 2704 efd_formatp = (xfs_efd_log_format_t *)item->ri_buf[0].i_addr;
2706 ASSERT((item->ri_buf[0].i_len == (sizeof(xfs_efd_log_format_32_t) + 2705 ASSERT((item->ri_buf[0].i_len == (sizeof(xfs_efd_log_format_32_t) +
2707 ((efd_formatp->efd_nextents - 1) * sizeof(xfs_extent_32_t)))) || 2706 ((efd_formatp->efd_nextents - 1) * sizeof(xfs_extent_32_t)))) ||
2708 (item->ri_buf[0].i_len == (sizeof(xfs_efd_log_format_64_t) + 2707 (item->ri_buf[0].i_len == (sizeof(xfs_efd_log_format_64_t) +
2709 ((efd_formatp->efd_nextents - 1) * sizeof(xfs_extent_64_t))))); 2708 ((efd_formatp->efd_nextents - 1) * sizeof(xfs_extent_64_t)))));
2710 efi_id = efd_formatp->efd_efi_id; 2709 efi_id = efd_formatp->efd_efi_id;
2711 2710
2712 /* 2711 /*
2713 * Search for the efi with the id in the efd format structure 2712 * Search for the efi with the id in the efd format structure
2714 * in the AIL. 2713 * in the AIL.
2715 */ 2714 */
2716 spin_lock(&ailp->xa_lock); 2715 spin_lock(&ailp->xa_lock);
2717 lip = xfs_trans_ail_cursor_first(ailp, &cur, 0); 2716 lip = xfs_trans_ail_cursor_first(ailp, &cur, 0);
2718 while (lip != NULL) { 2717 while (lip != NULL) {
2719 if (lip->li_type == XFS_LI_EFI) { 2718 if (lip->li_type == XFS_LI_EFI) {
2720 efip = (xfs_efi_log_item_t *)lip; 2719 efip = (xfs_efi_log_item_t *)lip;
2721 if (efip->efi_format.efi_id == efi_id) { 2720 if (efip->efi_format.efi_id == efi_id) {
2722 /* 2721 /*
2723 * xfs_trans_ail_delete() drops the 2722 * xfs_trans_ail_delete() drops the
2724 * AIL lock. 2723 * AIL lock.
2725 */ 2724 */
2726 xfs_trans_ail_delete(ailp, lip); 2725 xfs_trans_ail_delete(ailp, lip);
2727 xfs_efi_item_free(efip); 2726 xfs_efi_item_free(efip);
2728 spin_lock(&ailp->xa_lock); 2727 spin_lock(&ailp->xa_lock);
2729 break; 2728 break;
2730 } 2729 }
2731 } 2730 }
2732 lip = xfs_trans_ail_cursor_next(ailp, &cur); 2731 lip = xfs_trans_ail_cursor_next(ailp, &cur);
2733 } 2732 }
2734 xfs_trans_ail_cursor_done(ailp, &cur); 2733 xfs_trans_ail_cursor_done(ailp, &cur);
2735 spin_unlock(&ailp->xa_lock); 2734 spin_unlock(&ailp->xa_lock);
2736 } 2735 }
2737 2736
2738 /* 2737 /*
2739 * Perform the transaction 2738 * Perform the transaction
2740 * 2739 *
2741 * If the transaction modifies a buffer or inode, do it now. Otherwise, 2740 * If the transaction modifies a buffer or inode, do it now. Otherwise,
2742 * EFIs and EFDs get queued up by adding entries into the AIL for them. 2741 * EFIs and EFDs get queued up by adding entries into the AIL for them.
2743 */ 2742 */
2744 STATIC int 2743 STATIC int
2745 xlog_recover_do_trans( 2744 xlog_recover_do_trans(
2746 xlog_t *log, 2745 xlog_t *log,
2747 xlog_recover_t *trans, 2746 xlog_recover_t *trans,
2748 int pass) 2747 int pass)
2749 { 2748 {
2750 int error = 0; 2749 int error = 0;
2751 xlog_recover_item_t *item, *first_item; 2750 xlog_recover_item_t *item, *first_item;
2752 2751
2753 if ((error = xlog_recover_reorder_trans(trans))) 2752 if ((error = xlog_recover_reorder_trans(trans)))
2754 return error; 2753 return error;
2755 first_item = item = trans->r_itemq; 2754 first_item = item = trans->r_itemq;
2756 do { 2755 do {
2757 /* 2756 /*
2758 * we don't need to worry about the block number being 2757 * we don't need to worry about the block number being
2759 * truncated in > 1 TB buffers because in user-land, 2758 * truncated in > 1 TB buffers because in user-land,
2760 * we're now n32 or 64-bit so xfs_daddr_t is 64-bits so 2759 * we're now n32 or 64-bit so xfs_daddr_t is 64-bits so
2761 * the blknos will get through the user-mode buffer 2760 * the blknos will get through the user-mode buffer
2762 * cache properly. The only bad case is o32 kernels 2761 * cache properly. The only bad case is o32 kernels
2763 * where xfs_daddr_t is 32-bits but mount will warn us 2762 * where xfs_daddr_t is 32-bits but mount will warn us
2764 * off a > 1 TB filesystem before we get here. 2763 * off a > 1 TB filesystem before we get here.
2765 */ 2764 */
2766 if ((ITEM_TYPE(item) == XFS_LI_BUF)) { 2765 if ((ITEM_TYPE(item) == XFS_LI_BUF)) {
2767 if ((error = xlog_recover_do_buffer_trans(log, item, 2766 if ((error = xlog_recover_do_buffer_trans(log, item,
2768 pass))) 2767 pass)))
2769 break; 2768 break;
2770 } else if ((ITEM_TYPE(item) == XFS_LI_INODE)) { 2769 } else if ((ITEM_TYPE(item) == XFS_LI_INODE)) {
2771 if ((error = xlog_recover_do_inode_trans(log, item, 2770 if ((error = xlog_recover_do_inode_trans(log, item,
2772 pass))) 2771 pass)))
2773 break; 2772 break;
2774 } else if (ITEM_TYPE(item) == XFS_LI_EFI) { 2773 } else if (ITEM_TYPE(item) == XFS_LI_EFI) {
2775 if ((error = xlog_recover_do_efi_trans(log, item, trans->r_lsn, 2774 if ((error = xlog_recover_do_efi_trans(log, item, trans->r_lsn,
2776 pass))) 2775 pass)))
2777 break; 2776 break;
2778 } else if (ITEM_TYPE(item) == XFS_LI_EFD) { 2777 } else if (ITEM_TYPE(item) == XFS_LI_EFD) {
2779 xlog_recover_do_efd_trans(log, item, pass); 2778 xlog_recover_do_efd_trans(log, item, pass);
2780 } else if (ITEM_TYPE(item) == XFS_LI_DQUOT) { 2779 } else if (ITEM_TYPE(item) == XFS_LI_DQUOT) {
2781 if ((error = xlog_recover_do_dquot_trans(log, item, 2780 if ((error = xlog_recover_do_dquot_trans(log, item,
2782 pass))) 2781 pass)))
2783 break; 2782 break;
2784 } else if ((ITEM_TYPE(item) == XFS_LI_QUOTAOFF)) { 2783 } else if ((ITEM_TYPE(item) == XFS_LI_QUOTAOFF)) {
2785 if ((error = xlog_recover_do_quotaoff_trans(log, item, 2784 if ((error = xlog_recover_do_quotaoff_trans(log, item,
2786 pass))) 2785 pass)))
2787 break; 2786 break;
2788 } else { 2787 } else {
2789 xlog_warn("XFS: xlog_recover_do_trans"); 2788 xlog_warn("XFS: xlog_recover_do_trans");
2790 ASSERT(0); 2789 ASSERT(0);
2791 error = XFS_ERROR(EIO); 2790 error = XFS_ERROR(EIO);
2792 break; 2791 break;
2793 } 2792 }
2794 item = item->ri_next; 2793 item = item->ri_next;
2795 } while (first_item != item); 2794 } while (first_item != item);
2796 2795
2797 return error; 2796 return error;
2798 } 2797 }
2799 2798
2800 /* 2799 /*
2801 * Free up any resources allocated by the transaction 2800 * Free up any resources allocated by the transaction
2802 * 2801 *
2803 * Remember that EFIs, EFDs, and IUNLINKs are handled later. 2802 * Remember that EFIs, EFDs, and IUNLINKs are handled later.
2804 */ 2803 */
2805 STATIC void 2804 STATIC void
2806 xlog_recover_free_trans( 2805 xlog_recover_free_trans(
2807 xlog_recover_t *trans) 2806 xlog_recover_t *trans)
2808 { 2807 {
2809 xlog_recover_item_t *first_item, *item, *free_item; 2808 xlog_recover_item_t *first_item, *item, *free_item;
2810 int i; 2809 int i;
2811 2810
2812 item = first_item = trans->r_itemq; 2811 item = first_item = trans->r_itemq;
2813 do { 2812 do {
2814 free_item = item; 2813 free_item = item;
2815 item = item->ri_next; 2814 item = item->ri_next;
2816 /* Free the regions in the item. */ 2815 /* Free the regions in the item. */
2817 for (i = 0; i < free_item->ri_cnt; i++) { 2816 for (i = 0; i < free_item->ri_cnt; i++) {
2818 kmem_free(free_item->ri_buf[i].i_addr); 2817 kmem_free(free_item->ri_buf[i].i_addr);
2819 } 2818 }
2820 /* Free the item itself */ 2819 /* Free the item itself */
2821 kmem_free(free_item->ri_buf); 2820 kmem_free(free_item->ri_buf);
2822 kmem_free(free_item); 2821 kmem_free(free_item);
2823 } while (first_item != item); 2822 } while (first_item != item);
2824 /* Free the transaction recover structure */ 2823 /* Free the transaction recover structure */
2825 kmem_free(trans); 2824 kmem_free(trans);
2826 } 2825 }
2827 2826
2828 STATIC int 2827 STATIC int
2829 xlog_recover_commit_trans( 2828 xlog_recover_commit_trans(
2830 xlog_t *log, 2829 xlog_t *log,
2831 xlog_recover_t **q, 2830 xlog_recover_t **q,
2832 xlog_recover_t *trans, 2831 xlog_recover_t *trans,
2833 int pass) 2832 int pass)
2834 { 2833 {
2835 int error; 2834 int error;
2836 2835
2837 if ((error = xlog_recover_unlink_tid(q, trans))) 2836 if ((error = xlog_recover_unlink_tid(q, trans)))
2838 return error; 2837 return error;
2839 if ((error = xlog_recover_do_trans(log, trans, pass))) 2838 if ((error = xlog_recover_do_trans(log, trans, pass)))
2840 return error; 2839 return error;
2841 xlog_recover_free_trans(trans); /* no error */ 2840 xlog_recover_free_trans(trans); /* no error */
2842 return 0; 2841 return 0;
2843 } 2842 }
2844 2843
2845 STATIC int 2844 STATIC int
2846 xlog_recover_unmount_trans( 2845 xlog_recover_unmount_trans(
2847 xlog_recover_t *trans) 2846 xlog_recover_t *trans)
2848 { 2847 {
2849 /* Do nothing now */ 2848 /* Do nothing now */
2850 xlog_warn("XFS: xlog_recover_unmount_trans: Unmount LR"); 2849 xlog_warn("XFS: xlog_recover_unmount_trans: Unmount LR");
2851 return 0; 2850 return 0;
2852 } 2851 }
2853 2852
2854 /* 2853 /*
2855 * There are two valid states of the r_state field. 0 indicates that the 2854 * There are two valid states of the r_state field. 0 indicates that the
2856 * transaction structure is in a normal state. We have either seen the 2855 * transaction structure is in a normal state. We have either seen the
2857 * start of the transaction or the last operation we added was not a partial 2856 * start of the transaction or the last operation we added was not a partial
2858 * operation. If the last operation we added to the transaction was a 2857 * operation. If the last operation we added to the transaction was a
2859 * partial operation, we need to mark r_state with XLOG_WAS_CONT_TRANS. 2858 * partial operation, we need to mark r_state with XLOG_WAS_CONT_TRANS.
2860 * 2859 *
2861 * NOTE: skip LRs with 0 data length. 2860 * NOTE: skip LRs with 0 data length.
2862 */ 2861 */
2863 STATIC int 2862 STATIC int
2864 xlog_recover_process_data( 2863 xlog_recover_process_data(
2865 xlog_t *log, 2864 xlog_t *log,
2866 xlog_recover_t *rhash[], 2865 xlog_recover_t *rhash[],
2867 xlog_rec_header_t *rhead, 2866 xlog_rec_header_t *rhead,
2868 xfs_caddr_t dp, 2867 xfs_caddr_t dp,
2869 int pass) 2868 int pass)
2870 { 2869 {
2871 xfs_caddr_t lp; 2870 xfs_caddr_t lp;
2872 int num_logops; 2871 int num_logops;
2873 xlog_op_header_t *ohead; 2872 xlog_op_header_t *ohead;
2874 xlog_recover_t *trans; 2873 xlog_recover_t *trans;
2875 xlog_tid_t tid; 2874 xlog_tid_t tid;
2876 int error; 2875 int error;
2877 unsigned long hash; 2876 unsigned long hash;
2878 uint flags; 2877 uint flags;
2879 2878
2880 lp = dp + be32_to_cpu(rhead->h_len); 2879 lp = dp + be32_to_cpu(rhead->h_len);
2881 num_logops = be32_to_cpu(rhead->h_num_logops); 2880 num_logops = be32_to_cpu(rhead->h_num_logops);
2882 2881
2883 /* check the log format matches our own - else we can't recover */ 2882 /* check the log format matches our own - else we can't recover */
2884 if (xlog_header_check_recover(log->l_mp, rhead)) 2883 if (xlog_header_check_recover(log->l_mp, rhead))
2885 return (XFS_ERROR(EIO)); 2884 return (XFS_ERROR(EIO));
2886 2885
2887 while ((dp < lp) && num_logops) { 2886 while ((dp < lp) && num_logops) {
2888 ASSERT(dp + sizeof(xlog_op_header_t) <= lp); 2887 ASSERT(dp + sizeof(xlog_op_header_t) <= lp);
2889 ohead = (xlog_op_header_t *)dp; 2888 ohead = (xlog_op_header_t *)dp;
2890 dp += sizeof(xlog_op_header_t); 2889 dp += sizeof(xlog_op_header_t);
2891 if (ohead->oh_clientid != XFS_TRANSACTION && 2890 if (ohead->oh_clientid != XFS_TRANSACTION &&
2892 ohead->oh_clientid != XFS_LOG) { 2891 ohead->oh_clientid != XFS_LOG) {
2893 xlog_warn( 2892 xlog_warn(
2894 "XFS: xlog_recover_process_data: bad clientid"); 2893 "XFS: xlog_recover_process_data: bad clientid");
2895 ASSERT(0); 2894 ASSERT(0);
2896 return (XFS_ERROR(EIO)); 2895 return (XFS_ERROR(EIO));
2897 } 2896 }
2898 tid = be32_to_cpu(ohead->oh_tid); 2897 tid = be32_to_cpu(ohead->oh_tid);
2899 hash = XLOG_RHASH(tid); 2898 hash = XLOG_RHASH(tid);
2900 trans = xlog_recover_find_tid(rhash[hash], tid); 2899 trans = xlog_recover_find_tid(rhash[hash], tid);
2901 if (trans == NULL) { /* not found; add new tid */ 2900 if (trans == NULL) { /* not found; add new tid */
2902 if (ohead->oh_flags & XLOG_START_TRANS) 2901 if (ohead->oh_flags & XLOG_START_TRANS)
2903 xlog_recover_new_tid(&rhash[hash], tid, 2902 xlog_recover_new_tid(&rhash[hash], tid,
2904 be64_to_cpu(rhead->h_lsn)); 2903 be64_to_cpu(rhead->h_lsn));
2905 } else { 2904 } else {
2906 if (dp + be32_to_cpu(ohead->oh_len) > lp) { 2905 if (dp + be32_to_cpu(ohead->oh_len) > lp) {
2907 xlog_warn( 2906 xlog_warn(
2908 "XFS: xlog_recover_process_data: bad length"); 2907 "XFS: xlog_recover_process_data: bad length");
2909 WARN_ON(1); 2908 WARN_ON(1);
2910 return (XFS_ERROR(EIO)); 2909 return (XFS_ERROR(EIO));
2911 } 2910 }
2912 flags = ohead->oh_flags & ~XLOG_END_TRANS; 2911 flags = ohead->oh_flags & ~XLOG_END_TRANS;
2913 if (flags & XLOG_WAS_CONT_TRANS) 2912 if (flags & XLOG_WAS_CONT_TRANS)
2914 flags &= ~XLOG_CONTINUE_TRANS; 2913 flags &= ~XLOG_CONTINUE_TRANS;
2915 switch (flags) { 2914 switch (flags) {
2916 case XLOG_COMMIT_TRANS: 2915 case XLOG_COMMIT_TRANS:
2917 error = xlog_recover_commit_trans(log, 2916 error = xlog_recover_commit_trans(log,
2918 &rhash[hash], trans, pass); 2917 &rhash[hash], trans, pass);
2919 break; 2918 break;
2920 case XLOG_UNMOUNT_TRANS: 2919 case XLOG_UNMOUNT_TRANS:
2921 error = xlog_recover_unmount_trans(trans); 2920 error = xlog_recover_unmount_trans(trans);
2922 break; 2921 break;
2923 case XLOG_WAS_CONT_TRANS: 2922 case XLOG_WAS_CONT_TRANS:
2924 error = xlog_recover_add_to_cont_trans(trans, 2923 error = xlog_recover_add_to_cont_trans(trans,
2925 dp, be32_to_cpu(ohead->oh_len)); 2924 dp, be32_to_cpu(ohead->oh_len));
2926 break; 2925 break;
2927 case XLOG_START_TRANS: 2926 case XLOG_START_TRANS:
2928 xlog_warn( 2927 xlog_warn(
2929 "XFS: xlog_recover_process_data: bad transaction"); 2928 "XFS: xlog_recover_process_data: bad transaction");
2930 ASSERT(0); 2929 ASSERT(0);
2931 error = XFS_ERROR(EIO); 2930 error = XFS_ERROR(EIO);
2932 break; 2931 break;
2933 case 0: 2932 case 0:
2934 case XLOG_CONTINUE_TRANS: 2933 case XLOG_CONTINUE_TRANS:
2935 error = xlog_recover_add_to_trans(trans, 2934 error = xlog_recover_add_to_trans(trans,
2936 dp, be32_to_cpu(ohead->oh_len)); 2935 dp, be32_to_cpu(ohead->oh_len));
2937 break; 2936 break;
2938 default: 2937 default:
2939 xlog_warn( 2938 xlog_warn(
2940 "XFS: xlog_recover_process_data: bad flag"); 2939 "XFS: xlog_recover_process_data: bad flag");
2941 ASSERT(0); 2940 ASSERT(0);
2942 error = XFS_ERROR(EIO); 2941 error = XFS_ERROR(EIO);
2943 break; 2942 break;
2944 } 2943 }
2945 if (error) 2944 if (error)
2946 return error; 2945 return error;
2947 } 2946 }
2948 dp += be32_to_cpu(ohead->oh_len); 2947 dp += be32_to_cpu(ohead->oh_len);
2949 num_logops--; 2948 num_logops--;
2950 } 2949 }
2951 return 0; 2950 return 0;
2952 } 2951 }
2953 2952
2954 /* 2953 /*
2955 * Process an extent free intent item that was recovered from 2954 * Process an extent free intent item that was recovered from
2956 * the log. We need to free the extents that it describes. 2955 * the log. We need to free the extents that it describes.
2957 */ 2956 */
2958 STATIC int 2957 STATIC int
2959 xlog_recover_process_efi( 2958 xlog_recover_process_efi(
2960 xfs_mount_t *mp, 2959 xfs_mount_t *mp,
2961 xfs_efi_log_item_t *efip) 2960 xfs_efi_log_item_t *efip)
2962 { 2961 {
2963 xfs_efd_log_item_t *efdp; 2962 xfs_efd_log_item_t *efdp;
2964 xfs_trans_t *tp; 2963 xfs_trans_t *tp;
2965 int i; 2964 int i;
2966 int error = 0; 2965 int error = 0;
2967 xfs_extent_t *extp; 2966 xfs_extent_t *extp;
2968 xfs_fsblock_t startblock_fsb; 2967 xfs_fsblock_t startblock_fsb;
2969 2968
2970 ASSERT(!(efip->efi_flags & XFS_EFI_RECOVERED)); 2969 ASSERT(!(efip->efi_flags & XFS_EFI_RECOVERED));
2971 2970
2972 /* 2971 /*
2973 * First check the validity of the extents described by the 2972 * First check the validity of the extents described by the
2974 * EFI. If any are bad, then assume that all are bad and 2973 * EFI. If any are bad, then assume that all are bad and
2975 * just toss the EFI. 2974 * just toss the EFI.
2976 */ 2975 */
2977 for (i = 0; i < efip->efi_format.efi_nextents; i++) { 2976 for (i = 0; i < efip->efi_format.efi_nextents; i++) {
2978 extp = &(efip->efi_format.efi_extents[i]); 2977 extp = &(efip->efi_format.efi_extents[i]);
2979 startblock_fsb = XFS_BB_TO_FSB(mp, 2978 startblock_fsb = XFS_BB_TO_FSB(mp,
2980 XFS_FSB_TO_DADDR(mp, extp->ext_start)); 2979 XFS_FSB_TO_DADDR(mp, extp->ext_start));
2981 if ((startblock_fsb == 0) || 2980 if ((startblock_fsb == 0) ||
2982 (extp->ext_len == 0) || 2981 (extp->ext_len == 0) ||
2983 (startblock_fsb >= mp->m_sb.sb_dblocks) || 2982 (startblock_fsb >= mp->m_sb.sb_dblocks) ||
2984 (extp->ext_len >= mp->m_sb.sb_agblocks)) { 2983 (extp->ext_len >= mp->m_sb.sb_agblocks)) {
2985 /* 2984 /*
2986 * This will pull the EFI from the AIL and 2985 * This will pull the EFI from the AIL and
2987 * free the memory associated with it. 2986 * free the memory associated with it.
2988 */ 2987 */
2989 xfs_efi_release(efip, efip->efi_format.efi_nextents); 2988 xfs_efi_release(efip, efip->efi_format.efi_nextents);
2990 return XFS_ERROR(EIO); 2989 return XFS_ERROR(EIO);
2991 } 2990 }
2992 } 2991 }
2993 2992
2994 tp = xfs_trans_alloc(mp, 0); 2993 tp = xfs_trans_alloc(mp, 0);
2995 error = xfs_trans_reserve(tp, 0, XFS_ITRUNCATE_LOG_RES(mp), 0, 0, 0); 2994 error = xfs_trans_reserve(tp, 0, XFS_ITRUNCATE_LOG_RES(mp), 0, 0, 0);
2996 if (error) 2995 if (error)
2997 goto abort_error; 2996 goto abort_error;
2998 efdp = xfs_trans_get_efd(tp, efip, efip->efi_format.efi_nextents); 2997 efdp = xfs_trans_get_efd(tp, efip, efip->efi_format.efi_nextents);
2999 2998
3000 for (i = 0; i < efip->efi_format.efi_nextents; i++) { 2999 for (i = 0; i < efip->efi_format.efi_nextents; i++) {
3001 extp = &(efip->efi_format.efi_extents[i]); 3000 extp = &(efip->efi_format.efi_extents[i]);
3002 error = xfs_free_extent(tp, extp->ext_start, extp->ext_len); 3001 error = xfs_free_extent(tp, extp->ext_start, extp->ext_len);
3003 if (error) 3002 if (error)
3004 goto abort_error; 3003 goto abort_error;
3005 xfs_trans_log_efd_extent(tp, efdp, extp->ext_start, 3004 xfs_trans_log_efd_extent(tp, efdp, extp->ext_start,
3006 extp->ext_len); 3005 extp->ext_len);
3007 } 3006 }
3008 3007
3009 efip->efi_flags |= XFS_EFI_RECOVERED; 3008 efip->efi_flags |= XFS_EFI_RECOVERED;
3010 error = xfs_trans_commit(tp, 0); 3009 error = xfs_trans_commit(tp, 0);
3011 return error; 3010 return error;
3012 3011
3013 abort_error: 3012 abort_error:
3014 xfs_trans_cancel(tp, XFS_TRANS_ABORT); 3013 xfs_trans_cancel(tp, XFS_TRANS_ABORT);
3015 return error; 3014 return error;
3016 } 3015 }
3017 3016
3018 /* 3017 /*
3019 * When this is called, all of the EFIs which did not have 3018 * When this is called, all of the EFIs which did not have
3020 * corresponding EFDs should be in the AIL. What we do now 3019 * corresponding EFDs should be in the AIL. What we do now
3021 * is free the extents associated with each one. 3020 * is free the extents associated with each one.
3022 * 3021 *
3023 * Since we process the EFIs in normal transactions, they 3022 * Since we process the EFIs in normal transactions, they
3024 * will be removed at some point after the commit. This prevents 3023 * will be removed at some point after the commit. This prevents
3025 * us from just walking down the list processing each one. 3024 * us from just walking down the list processing each one.
3026 * We'll use a flag in the EFI to skip those that we've already 3025 * We'll use a flag in the EFI to skip those that we've already
3027 * processed and use the AIL iteration mechanism's generation 3026 * processed and use the AIL iteration mechanism's generation
3028 * count to try to speed this up at least a bit. 3027 * count to try to speed this up at least a bit.
3029 * 3028 *
3030 * When we start, we know that the EFIs are the only things in 3029 * When we start, we know that the EFIs are the only things in
3031 * the AIL. As we process them, however, other items are added 3030 * the AIL. As we process them, however, other items are added
3032 * to the AIL. Since everything added to the AIL must come after 3031 * to the AIL. Since everything added to the AIL must come after
3033 * everything already in the AIL, we stop processing as soon as 3032 * everything already in the AIL, we stop processing as soon as
3034 * we see something other than an EFI in the AIL. 3033 * we see something other than an EFI in the AIL.
3035 */ 3034 */
3036 STATIC int 3035 STATIC int
3037 xlog_recover_process_efis( 3036 xlog_recover_process_efis(
3038 xlog_t *log) 3037 xlog_t *log)
3039 { 3038 {
3040 xfs_log_item_t *lip; 3039 xfs_log_item_t *lip;
3041 xfs_efi_log_item_t *efip; 3040 xfs_efi_log_item_t *efip;
3042 int error = 0; 3041 int error = 0;
3043 struct xfs_ail_cursor cur; 3042 struct xfs_ail_cursor cur;
3044 struct xfs_ail *ailp; 3043 struct xfs_ail *ailp;
3045 3044
3046 ailp = log->l_ailp; 3045 ailp = log->l_ailp;
3047 spin_lock(&ailp->xa_lock); 3046 spin_lock(&ailp->xa_lock);
3048 lip = xfs_trans_ail_cursor_first(ailp, &cur, 0); 3047 lip = xfs_trans_ail_cursor_first(ailp, &cur, 0);
3049 while (lip != NULL) { 3048 while (lip != NULL) {
3050 /* 3049 /*
3051 * We're done when we see something other than an EFI. 3050 * We're done when we see something other than an EFI.
3052 * There should be no EFIs left in the AIL now. 3051 * There should be no EFIs left in the AIL now.
3053 */ 3052 */
3054 if (lip->li_type != XFS_LI_EFI) { 3053 if (lip->li_type != XFS_LI_EFI) {
3055 #ifdef DEBUG 3054 #ifdef DEBUG
3056 for (; lip; lip = xfs_trans_ail_cursor_next(ailp, &cur)) 3055 for (; lip; lip = xfs_trans_ail_cursor_next(ailp, &cur))
3057 ASSERT(lip->li_type != XFS_LI_EFI); 3056 ASSERT(lip->li_type != XFS_LI_EFI);
3058 #endif 3057 #endif
3059 break; 3058 break;
3060 } 3059 }
3061 3060
3062 /* 3061 /*
3063 * Skip EFIs that we've already processed. 3062 * Skip EFIs that we've already processed.
3064 */ 3063 */
3065 efip = (xfs_efi_log_item_t *)lip; 3064 efip = (xfs_efi_log_item_t *)lip;
3066 if (efip->efi_flags & XFS_EFI_RECOVERED) { 3065 if (efip->efi_flags & XFS_EFI_RECOVERED) {
3067 lip = xfs_trans_ail_cursor_next(ailp, &cur); 3066 lip = xfs_trans_ail_cursor_next(ailp, &cur);
3068 continue; 3067 continue;
3069 } 3068 }
3070 3069
3071 spin_unlock(&ailp->xa_lock); 3070 spin_unlock(&ailp->xa_lock);
3072 error = xlog_recover_process_efi(log->l_mp, efip); 3071 error = xlog_recover_process_efi(log->l_mp, efip);
3073 spin_lock(&ailp->xa_lock); 3072 spin_lock(&ailp->xa_lock);
3074 if (error) 3073 if (error)
3075 goto out; 3074 goto out;
3076 lip = xfs_trans_ail_cursor_next(ailp, &cur); 3075 lip = xfs_trans_ail_cursor_next(ailp, &cur);
3077 } 3076 }
3078 out: 3077 out:
3079 xfs_trans_ail_cursor_done(ailp, &cur); 3078 xfs_trans_ail_cursor_done(ailp, &cur);
3080 spin_unlock(&ailp->xa_lock); 3079 spin_unlock(&ailp->xa_lock);
3081 return error; 3080 return error;
3082 } 3081 }
3083 3082
3084 /* 3083 /*
3085 * This routine performs a transaction to null out a bad inode pointer 3084 * This routine performs a transaction to null out a bad inode pointer
3086 * in an agi unlinked inode hash bucket. 3085 * in an agi unlinked inode hash bucket.
3087 */ 3086 */
3088 STATIC void 3087 STATIC void
3089 xlog_recover_clear_agi_bucket( 3088 xlog_recover_clear_agi_bucket(
3090 xfs_mount_t *mp, 3089 xfs_mount_t *mp,
3091 xfs_agnumber_t agno, 3090 xfs_agnumber_t agno,
3092 int bucket) 3091 int bucket)
3093 { 3092 {
3094 xfs_trans_t *tp; 3093 xfs_trans_t *tp;
3095 xfs_agi_t *agi; 3094 xfs_agi_t *agi;
3096 xfs_buf_t *agibp; 3095 xfs_buf_t *agibp;
3097 int offset; 3096 int offset;
3098 int error; 3097 int error;
3099 3098
3100 tp = xfs_trans_alloc(mp, XFS_TRANS_CLEAR_AGI_BUCKET); 3099 tp = xfs_trans_alloc(mp, XFS_TRANS_CLEAR_AGI_BUCKET);
3101 error = xfs_trans_reserve(tp, 0, XFS_CLEAR_AGI_BUCKET_LOG_RES(mp), 3100 error = xfs_trans_reserve(tp, 0, XFS_CLEAR_AGI_BUCKET_LOG_RES(mp),
3102 0, 0, 0); 3101 0, 0, 0);
3103 if (error) 3102 if (error)
3104 goto out_abort; 3103 goto out_abort;
3105 3104
3106 error = xfs_read_agi(mp, tp, agno, &agibp); 3105 error = xfs_read_agi(mp, tp, agno, &agibp);
3107 if (error) 3106 if (error)
3108 goto out_abort; 3107 goto out_abort;
3109 3108
3110 agi = XFS_BUF_TO_AGI(agibp); 3109 agi = XFS_BUF_TO_AGI(agibp);
3111 agi->agi_unlinked[bucket] = cpu_to_be32(NULLAGINO); 3110 agi->agi_unlinked[bucket] = cpu_to_be32(NULLAGINO);
3112 offset = offsetof(xfs_agi_t, agi_unlinked) + 3111 offset = offsetof(xfs_agi_t, agi_unlinked) +
3113 (sizeof(xfs_agino_t) * bucket); 3112 (sizeof(xfs_agino_t) * bucket);
3114 xfs_trans_log_buf(tp, agibp, offset, 3113 xfs_trans_log_buf(tp, agibp, offset,
3115 (offset + sizeof(xfs_agino_t) - 1)); 3114 (offset + sizeof(xfs_agino_t) - 1));
3116 3115
3117 error = xfs_trans_commit(tp, 0); 3116 error = xfs_trans_commit(tp, 0);
3118 if (error) 3117 if (error)
3119 goto out_error; 3118 goto out_error;
3120 return; 3119 return;
3121 3120
3122 out_abort: 3121 out_abort:
3123 xfs_trans_cancel(tp, XFS_TRANS_ABORT); 3122 xfs_trans_cancel(tp, XFS_TRANS_ABORT);
3124 out_error: 3123 out_error:
3125 xfs_fs_cmn_err(CE_WARN, mp, "xlog_recover_clear_agi_bucket: " 3124 xfs_fs_cmn_err(CE_WARN, mp, "xlog_recover_clear_agi_bucket: "
3126 "failed to clear agi %d. Continuing.", agno); 3125 "failed to clear agi %d. Continuing.", agno);
3127 return; 3126 return;
3128 } 3127 }
3129 3128
3130 STATIC xfs_agino_t 3129 STATIC xfs_agino_t
3131 xlog_recover_process_one_iunlink( 3130 xlog_recover_process_one_iunlink(
3132 struct xfs_mount *mp, 3131 struct xfs_mount *mp,
3133 xfs_agnumber_t agno, 3132 xfs_agnumber_t agno,
3134 xfs_agino_t agino, 3133 xfs_agino_t agino,
3135 int bucket) 3134 int bucket)
3136 { 3135 {
3137 struct xfs_buf *ibp; 3136 struct xfs_buf *ibp;
3138 struct xfs_dinode *dip; 3137 struct xfs_dinode *dip;
3139 struct xfs_inode *ip; 3138 struct xfs_inode *ip;
3140 xfs_ino_t ino; 3139 xfs_ino_t ino;
3141 int error; 3140 int error;
3142 3141
3143 ino = XFS_AGINO_TO_INO(mp, agno, agino); 3142 ino = XFS_AGINO_TO_INO(mp, agno, agino);
3144 error = xfs_iget(mp, NULL, ino, 0, 0, &ip, 0); 3143 error = xfs_iget(mp, NULL, ino, 0, 0, &ip, 0);
3145 if (error) 3144 if (error)
3146 goto fail; 3145 goto fail;
3147 3146
3148 /* 3147 /*
3149 * Get the on disk inode to find the next inode in the bucket. 3148 * Get the on disk inode to find the next inode in the bucket.
3150 */ 3149 */
3151 ASSERT(ip != NULL); 3150 ASSERT(ip != NULL);
3152 error = xfs_itobp(mp, NULL, ip, &dip, &ibp, XFS_BUF_LOCK); 3151 error = xfs_itobp(mp, NULL, ip, &dip, &ibp, XFS_BUF_LOCK);
3153 if (error) 3152 if (error)
3154 goto fail; 3153 goto fail;
3155 3154
3156 ASSERT(dip != NULL); 3155 ASSERT(dip != NULL);
3157 ASSERT(ip->i_d.di_nlink == 0); 3156 ASSERT(ip->i_d.di_nlink == 0);
3158 3157
3159 /* setup for the next pass */ 3158 /* setup for the next pass */
3160 agino = be32_to_cpu(dip->di_next_unlinked); 3159 agino = be32_to_cpu(dip->di_next_unlinked);
3161 xfs_buf_relse(ibp); 3160 xfs_buf_relse(ibp);
3162 3161
3163 /* 3162 /*
3164 * Prevent any DMAPI event from being sent when the reference on 3163 * Prevent any DMAPI event from being sent when the reference on
3165 * the inode is dropped. 3164 * the inode is dropped.
3166 */ 3165 */
3167 ip->i_d.di_dmevmask = 0; 3166 ip->i_d.di_dmevmask = 0;
3168 3167
3169 /* 3168 /*
3170 * If this is a new inode, handle it specially. Otherwise, just 3169 * If this is a new inode, handle it specially. Otherwise, just
3171 * drop our reference to the inode. If there are no other 3170 * drop our reference to the inode. If there are no other
3172 * references, this will send the inode to xfs_inactive() which 3171 * references, this will send the inode to xfs_inactive() which
3173 * will truncate the file and free the inode. 3172 * will truncate the file and free the inode.
3174 */ 3173 */
3175 if (ip->i_d.di_mode == 0) 3174 if (ip->i_d.di_mode == 0)
3176 xfs_iput_new(ip, 0); 3175 xfs_iput_new(ip, 0);
3177 else 3176 else
3178 IRELE(ip); 3177 IRELE(ip);
3179 return agino; 3178 return agino;
3180 3179
3181 fail: 3180 fail:
3182 /* 3181 /*
3183 * We can't read in the inode this bucket points to, or this inode 3182 * We can't read in the inode this bucket points to, or this inode
3184 * is messed up. Just ditch this bucket of inodes. We will lose 3183 * is messed up. Just ditch this bucket of inodes. We will lose
3185 * some inodes and space, but at least we won't hang. 3184 * some inodes and space, but at least we won't hang.
3186 * 3185 *
3187 * Call xlog_recover_clear_agi_bucket() to perform a transaction to 3186 * Call xlog_recover_clear_agi_bucket() to perform a transaction to
3188 * clear the inode pointer in the bucket. 3187 * clear the inode pointer in the bucket.
3189 */ 3188 */
3190 xlog_recover_clear_agi_bucket(mp, agno, bucket); 3189 xlog_recover_clear_agi_bucket(mp, agno, bucket);
3191 return NULLAGINO; 3190 return NULLAGINO;
3192 } 3191 }
3193 3192
3194 /* 3193 /*
3195 * xlog_iunlink_recover 3194 * xlog_iunlink_recover
3196 * 3195 *
3197 * This is called during recovery to process any inodes which 3196 * This is called during recovery to process any inodes which
3198 * we unlinked but not freed when the system crashed. These 3197 * we unlinked but not freed when the system crashed. These
3199 * inodes will be on the lists in the AGI blocks. What we do 3198 * inodes will be on the lists in the AGI blocks. What we do
3200 * here is scan all the AGIs and fully truncate and free any 3199 * here is scan all the AGIs and fully truncate and free any
3201 * inodes found on the lists. Each inode is removed from the 3200 * inodes found on the lists. Each inode is removed from the
3202 * lists when it has been fully truncated and is freed. The 3201 * lists when it has been fully truncated and is freed. The
3203 * freeing of the inode and its removal from the list must be 3202 * freeing of the inode and its removal from the list must be
3204 * atomic. 3203 * atomic.
3205 */ 3204 */
3206 void 3205 void
3207 xlog_recover_process_iunlinks( 3206 xlog_recover_process_iunlinks(
3208 xlog_t *log) 3207 xlog_t *log)
3209 { 3208 {
3210 xfs_mount_t *mp; 3209 xfs_mount_t *mp;
3211 xfs_agnumber_t agno; 3210 xfs_agnumber_t agno;
3212 xfs_agi_t *agi; 3211 xfs_agi_t *agi;
3213 xfs_buf_t *agibp; 3212 xfs_buf_t *agibp;
3214 xfs_agino_t agino; 3213 xfs_agino_t agino;
3215 int bucket; 3214 int bucket;
3216 int error; 3215 int error;
3217 uint mp_dmevmask; 3216 uint mp_dmevmask;
3218 3217
3219 mp = log->l_mp; 3218 mp = log->l_mp;
3220 3219
3221 /* 3220 /*
3222 * Prevent any DMAPI event from being sent while in this function. 3221 * Prevent any DMAPI event from being sent while in this function.
3223 */ 3222 */
3224 mp_dmevmask = mp->m_dmevmask; 3223 mp_dmevmask = mp->m_dmevmask;
3225 mp->m_dmevmask = 0; 3224 mp->m_dmevmask = 0;
3226 3225
3227 for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) { 3226 for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) {
3228 /* 3227 /*
3229 * Find the agi for this ag. 3228 * Find the agi for this ag.
3230 */ 3229 */
3231 error = xfs_read_agi(mp, NULL, agno, &agibp); 3230 error = xfs_read_agi(mp, NULL, agno, &agibp);
3232 if (error) { 3231 if (error) {
3233 /* 3232 /*
3234 * AGI is b0rked. Don't process it. 3233 * AGI is b0rked. Don't process it.
3235 * 3234 *
3236 * We should probably mark the filesystem as corrupt 3235 * We should probably mark the filesystem as corrupt
3237 * after we've recovered all the ag's we can.... 3236 * after we've recovered all the ag's we can....
3238 */ 3237 */
3239 continue; 3238 continue;
3240 } 3239 }
3241 agi = XFS_BUF_TO_AGI(agibp); 3240 agi = XFS_BUF_TO_AGI(agibp);
3242 3241
3243 for (bucket = 0; bucket < XFS_AGI_UNLINKED_BUCKETS; bucket++) { 3242 for (bucket = 0; bucket < XFS_AGI_UNLINKED_BUCKETS; bucket++) {
3244 agino = be32_to_cpu(agi->agi_unlinked[bucket]); 3243 agino = be32_to_cpu(agi->agi_unlinked[bucket]);
3245 while (agino != NULLAGINO) { 3244 while (agino != NULLAGINO) {
3246 /* 3245 /*
3247 * Release the agi buffer so that it can 3246 * Release the agi buffer so that it can
3248 * be acquired in the normal course of the 3247 * be acquired in the normal course of the
3249 * transaction to truncate and free the inode. 3248 * transaction to truncate and free the inode.
3250 */ 3249 */
3251 xfs_buf_relse(agibp); 3250 xfs_buf_relse(agibp);
3252 3251
3253 agino = xlog_recover_process_one_iunlink(mp, 3252 agino = xlog_recover_process_one_iunlink(mp,
3254 agno, agino, bucket); 3253 agno, agino, bucket);
3255 3254
3256 /* 3255 /*
3257 * Reacquire the agibuffer and continue around 3256 * Reacquire the agibuffer and continue around
3258 * the loop. This should never fail as we know 3257 * the loop. This should never fail as we know
3259 * the buffer was good earlier on. 3258 * the buffer was good earlier on.
3260 */ 3259 */
3261 error = xfs_read_agi(mp, NULL, agno, &agibp); 3260 error = xfs_read_agi(mp, NULL, agno, &agibp);
3262 ASSERT(error == 0); 3261 ASSERT(error == 0);
3263 agi = XFS_BUF_TO_AGI(agibp); 3262 agi = XFS_BUF_TO_AGI(agibp);
3264 } 3263 }
3265 } 3264 }
3266 3265
3267 /* 3266 /*
3268 * Release the buffer for the current agi so we can 3267 * Release the buffer for the current agi so we can
3269 * go on to the next one. 3268 * go on to the next one.
3270 */ 3269 */
3271 xfs_buf_relse(agibp); 3270 xfs_buf_relse(agibp);
3272 } 3271 }
3273 3272
3274 mp->m_dmevmask = mp_dmevmask; 3273 mp->m_dmevmask = mp_dmevmask;
3275 } 3274 }
3276 3275
3277 3276
3278 #ifdef DEBUG 3277 #ifdef DEBUG
3279 STATIC void 3278 STATIC void
3280 xlog_pack_data_checksum( 3279 xlog_pack_data_checksum(
3281 xlog_t *log, 3280 xlog_t *log,
3282 xlog_in_core_t *iclog, 3281 xlog_in_core_t *iclog,
3283 int size) 3282 int size)
3284 { 3283 {
3285 int i; 3284 int i;
3286 __be32 *up; 3285 __be32 *up;
3287 uint chksum = 0; 3286 uint chksum = 0;
3288 3287
3289 up = (__be32 *)iclog->ic_datap; 3288 up = (__be32 *)iclog->ic_datap;
3290 /* divide length by 4 to get # words */ 3289 /* divide length by 4 to get # words */
3291 for (i = 0; i < (size >> 2); i++) { 3290 for (i = 0; i < (size >> 2); i++) {
3292 chksum ^= be32_to_cpu(*up); 3291 chksum ^= be32_to_cpu(*up);
3293 up++; 3292 up++;
3294 } 3293 }
3295 iclog->ic_header.h_chksum = cpu_to_be32(chksum); 3294 iclog->ic_header.h_chksum = cpu_to_be32(chksum);
3296 } 3295 }
3297 #else 3296 #else
3298 #define xlog_pack_data_checksum(log, iclog, size) 3297 #define xlog_pack_data_checksum(log, iclog, size)
3299 #endif 3298 #endif
3300 3299
3301 /* 3300 /*
3302 * Stamp cycle number in every block 3301 * Stamp cycle number in every block
3303 */ 3302 */
3304 void 3303 void
3305 xlog_pack_data( 3304 xlog_pack_data(
3306 xlog_t *log, 3305 xlog_t *log,
3307 xlog_in_core_t *iclog, 3306 xlog_in_core_t *iclog,
3308 int roundoff) 3307 int roundoff)
3309 { 3308 {
3310 int i, j, k; 3309 int i, j, k;
3311 int size = iclog->ic_offset + roundoff; 3310 int size = iclog->ic_offset + roundoff;
3312 __be32 cycle_lsn; 3311 __be32 cycle_lsn;
3313 xfs_caddr_t dp; 3312 xfs_caddr_t dp;
3314 3313
3315 xlog_pack_data_checksum(log, iclog, size); 3314 xlog_pack_data_checksum(log, iclog, size);
3316 3315
3317 cycle_lsn = CYCLE_LSN_DISK(iclog->ic_header.h_lsn); 3316 cycle_lsn = CYCLE_LSN_DISK(iclog->ic_header.h_lsn);
3318 3317
3319 dp = iclog->ic_datap; 3318 dp = iclog->ic_datap;
3320 for (i = 0; i < BTOBB(size) && 3319 for (i = 0; i < BTOBB(size) &&
3321 i < (XLOG_HEADER_CYCLE_SIZE / BBSIZE); i++) { 3320 i < (XLOG_HEADER_CYCLE_SIZE / BBSIZE); i++) {
3322 iclog->ic_header.h_cycle_data[i] = *(__be32 *)dp; 3321 iclog->ic_header.h_cycle_data[i] = *(__be32 *)dp;
3323 *(__be32 *)dp = cycle_lsn; 3322 *(__be32 *)dp = cycle_lsn;
3324 dp += BBSIZE; 3323 dp += BBSIZE;
3325 } 3324 }
3326 3325
3327 if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) { 3326 if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) {
3328 xlog_in_core_2_t *xhdr = iclog->ic_data; 3327 xlog_in_core_2_t *xhdr = iclog->ic_data;
3329 3328
3330 for ( ; i < BTOBB(size); i++) { 3329 for ( ; i < BTOBB(size); i++) {
3331 j = i / (XLOG_HEADER_CYCLE_SIZE / BBSIZE); 3330 j = i / (XLOG_HEADER_CYCLE_SIZE / BBSIZE);
3332 k = i % (XLOG_HEADER_CYCLE_SIZE / BBSIZE); 3331 k = i % (XLOG_HEADER_CYCLE_SIZE / BBSIZE);
3333 xhdr[j].hic_xheader.xh_cycle_data[k] = *(__be32 *)dp; 3332 xhdr[j].hic_xheader.xh_cycle_data[k] = *(__be32 *)dp;
3334 *(__be32 *)dp = cycle_lsn; 3333 *(__be32 *)dp = cycle_lsn;
3335 dp += BBSIZE; 3334 dp += BBSIZE;
3336 } 3335 }
3337 3336
3338 for (i = 1; i < log->l_iclog_heads; i++) { 3337 for (i = 1; i < log->l_iclog_heads; i++) {
3339 xhdr[i].hic_xheader.xh_cycle = cycle_lsn; 3338 xhdr[i].hic_xheader.xh_cycle = cycle_lsn;
3340 } 3339 }
3341 } 3340 }
3342 } 3341 }
3343 3342
3344 #if defined(DEBUG) && defined(XFS_LOUD_RECOVERY) 3343 #if defined(DEBUG) && defined(XFS_LOUD_RECOVERY)
3345 STATIC void 3344 STATIC void
3346 xlog_unpack_data_checksum( 3345 xlog_unpack_data_checksum(
3347 xlog_rec_header_t *rhead, 3346 xlog_rec_header_t *rhead,
3348 xfs_caddr_t dp, 3347 xfs_caddr_t dp,
3349 xlog_t *log) 3348 xlog_t *log)
3350 { 3349 {
3351 __be32 *up = (__be32 *)dp; 3350 __be32 *up = (__be32 *)dp;
3352 uint chksum = 0; 3351 uint chksum = 0;
3353 int i; 3352 int i;
3354 3353
3355 /* divide length by 4 to get # words */ 3354 /* divide length by 4 to get # words */
3356 for (i=0; i < be32_to_cpu(rhead->h_len) >> 2; i++) { 3355 for (i=0; i < be32_to_cpu(rhead->h_len) >> 2; i++) {
3357 chksum ^= be32_to_cpu(*up); 3356 chksum ^= be32_to_cpu(*up);
3358 up++; 3357 up++;
3359 } 3358 }
3360 if (chksum != be32_to_cpu(rhead->h_chksum)) { 3359 if (chksum != be32_to_cpu(rhead->h_chksum)) {
3361 if (rhead->h_chksum || 3360 if (rhead->h_chksum ||
3362 ((log->l_flags & XLOG_CHKSUM_MISMATCH) == 0)) { 3361 ((log->l_flags & XLOG_CHKSUM_MISMATCH) == 0)) {
3363 cmn_err(CE_DEBUG, 3362 cmn_err(CE_DEBUG,
3364 "XFS: LogR chksum mismatch: was (0x%x) is (0x%x)\n", 3363 "XFS: LogR chksum mismatch: was (0x%x) is (0x%x)\n",
3365 be32_to_cpu(rhead->h_chksum), chksum); 3364 be32_to_cpu(rhead->h_chksum), chksum);
3366 cmn_err(CE_DEBUG, 3365 cmn_err(CE_DEBUG,
3367 "XFS: Disregard message if filesystem was created with non-DEBUG kernel"); 3366 "XFS: Disregard message if filesystem was created with non-DEBUG kernel");
3368 if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) { 3367 if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) {
3369 cmn_err(CE_DEBUG, 3368 cmn_err(CE_DEBUG,
3370 "XFS: LogR this is a LogV2 filesystem\n"); 3369 "XFS: LogR this is a LogV2 filesystem\n");
3371 } 3370 }
3372 log->l_flags |= XLOG_CHKSUM_MISMATCH; 3371 log->l_flags |= XLOG_CHKSUM_MISMATCH;
3373 } 3372 }
3374 } 3373 }
3375 } 3374 }
3376 #else 3375 #else
3377 #define xlog_unpack_data_checksum(rhead, dp, log) 3376 #define xlog_unpack_data_checksum(rhead, dp, log)
3378 #endif 3377 #endif
3379 3378
3380 STATIC void 3379 STATIC void
3381 xlog_unpack_data( 3380 xlog_unpack_data(
3382 xlog_rec_header_t *rhead, 3381 xlog_rec_header_t *rhead,
3383 xfs_caddr_t dp, 3382 xfs_caddr_t dp,
3384 xlog_t *log) 3383 xlog_t *log)
3385 { 3384 {
3386 int i, j, k; 3385 int i, j, k;
3387 3386
3388 for (i = 0; i < BTOBB(be32_to_cpu(rhead->h_len)) && 3387 for (i = 0; i < BTOBB(be32_to_cpu(rhead->h_len)) &&
3389 i < (XLOG_HEADER_CYCLE_SIZE / BBSIZE); i++) { 3388 i < (XLOG_HEADER_CYCLE_SIZE / BBSIZE); i++) {
3390 *(__be32 *)dp = *(__be32 *)&rhead->h_cycle_data[i]; 3389 *(__be32 *)dp = *(__be32 *)&rhead->h_cycle_data[i];
3391 dp += BBSIZE; 3390 dp += BBSIZE;
3392 } 3391 }
3393 3392
3394 if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) { 3393 if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) {
3395 xlog_in_core_2_t *xhdr = (xlog_in_core_2_t *)rhead; 3394 xlog_in_core_2_t *xhdr = (xlog_in_core_2_t *)rhead;
3396 for ( ; i < BTOBB(be32_to_cpu(rhead->h_len)); i++) { 3395 for ( ; i < BTOBB(be32_to_cpu(rhead->h_len)); i++) {
3397 j = i / (XLOG_HEADER_CYCLE_SIZE / BBSIZE); 3396 j = i / (XLOG_HEADER_CYCLE_SIZE / BBSIZE);
3398 k = i % (XLOG_HEADER_CYCLE_SIZE / BBSIZE); 3397 k = i % (XLOG_HEADER_CYCLE_SIZE / BBSIZE);
3399 *(__be32 *)dp = xhdr[j].hic_xheader.xh_cycle_data[k]; 3398 *(__be32 *)dp = xhdr[j].hic_xheader.xh_cycle_data[k];
3400 dp += BBSIZE; 3399 dp += BBSIZE;
3401 } 3400 }
3402 } 3401 }
3403 3402
3404 xlog_unpack_data_checksum(rhead, dp, log); 3403 xlog_unpack_data_checksum(rhead, dp, log);
3405 } 3404 }
3406 3405
3407 STATIC int 3406 STATIC int
3408 xlog_valid_rec_header( 3407 xlog_valid_rec_header(
3409 xlog_t *log, 3408 xlog_t *log,
3410 xlog_rec_header_t *rhead, 3409 xlog_rec_header_t *rhead,
3411 xfs_daddr_t blkno) 3410 xfs_daddr_t blkno)
3412 { 3411 {
3413 int hlen; 3412 int hlen;
3414 3413
3415 if (unlikely(be32_to_cpu(rhead->h_magicno) != XLOG_HEADER_MAGIC_NUM)) { 3414 if (unlikely(be32_to_cpu(rhead->h_magicno) != XLOG_HEADER_MAGIC_NUM)) {
3416 XFS_ERROR_REPORT("xlog_valid_rec_header(1)", 3415 XFS_ERROR_REPORT("xlog_valid_rec_header(1)",
3417 XFS_ERRLEVEL_LOW, log->l_mp); 3416 XFS_ERRLEVEL_LOW, log->l_mp);
3418 return XFS_ERROR(EFSCORRUPTED); 3417 return XFS_ERROR(EFSCORRUPTED);
3419 } 3418 }
3420 if (unlikely( 3419 if (unlikely(
3421 (!rhead->h_version || 3420 (!rhead->h_version ||
3422 (be32_to_cpu(rhead->h_version) & (~XLOG_VERSION_OKBITS))))) { 3421 (be32_to_cpu(rhead->h_version) & (~XLOG_VERSION_OKBITS))))) {
3423 xlog_warn("XFS: %s: unrecognised log version (%d).", 3422 xlog_warn("XFS: %s: unrecognised log version (%d).",
3424 __func__, be32_to_cpu(rhead->h_version)); 3423 __func__, be32_to_cpu(rhead->h_version));
3425 return XFS_ERROR(EIO); 3424 return XFS_ERROR(EIO);
3426 } 3425 }
3427 3426
3428 /* LR body must have data or it wouldn't have been written */ 3427 /* LR body must have data or it wouldn't have been written */
3429 hlen = be32_to_cpu(rhead->h_len); 3428 hlen = be32_to_cpu(rhead->h_len);
3430 if (unlikely( hlen <= 0 || hlen > INT_MAX )) { 3429 if (unlikely( hlen <= 0 || hlen > INT_MAX )) {
3431 XFS_ERROR_REPORT("xlog_valid_rec_header(2)", 3430 XFS_ERROR_REPORT("xlog_valid_rec_header(2)",
3432 XFS_ERRLEVEL_LOW, log->l_mp); 3431 XFS_ERRLEVEL_LOW, log->l_mp);
3433 return XFS_ERROR(EFSCORRUPTED); 3432 return XFS_ERROR(EFSCORRUPTED);
3434 } 3433 }
3435 if (unlikely( blkno > log->l_logBBsize || blkno > INT_MAX )) { 3434 if (unlikely( blkno > log->l_logBBsize || blkno > INT_MAX )) {
3436 XFS_ERROR_REPORT("xlog_valid_rec_header(3)", 3435 XFS_ERROR_REPORT("xlog_valid_rec_header(3)",
3437 XFS_ERRLEVEL_LOW, log->l_mp); 3436 XFS_ERRLEVEL_LOW, log->l_mp);
3438 return XFS_ERROR(EFSCORRUPTED); 3437 return XFS_ERROR(EFSCORRUPTED);
3439 } 3438 }
3440 return 0; 3439 return 0;
3441 } 3440 }
3442 3441
3443 /* 3442 /*
3444 * Read the log from tail to head and process the log records found. 3443 * Read the log from tail to head and process the log records found.
3445 * Handle the two cases where the tail and head are in the same cycle 3444 * Handle the two cases where the tail and head are in the same cycle
3446 * and where the active portion of the log wraps around the end of 3445 * and where the active portion of the log wraps around the end of
3447 * the physical log separately. The pass parameter is passed through 3446 * the physical log separately. The pass parameter is passed through
3448 * to the routines called to process the data and is not looked at 3447 * to the routines called to process the data and is not looked at
3449 * here. 3448 * here.
3450 */ 3449 */
3451 STATIC int 3450 STATIC int
3452 xlog_do_recovery_pass( 3451 xlog_do_recovery_pass(
3453 xlog_t *log, 3452 xlog_t *log,
3454 xfs_daddr_t head_blk, 3453 xfs_daddr_t head_blk,
3455 xfs_daddr_t tail_blk, 3454 xfs_daddr_t tail_blk,
3456 int pass) 3455 int pass)
3457 { 3456 {
3458 xlog_rec_header_t *rhead; 3457 xlog_rec_header_t *rhead;
3459 xfs_daddr_t blk_no; 3458 xfs_daddr_t blk_no;
3460 xfs_caddr_t bufaddr, offset; 3459 xfs_caddr_t bufaddr, offset;
3461 xfs_buf_t *hbp, *dbp; 3460 xfs_buf_t *hbp, *dbp;
3462 int error = 0, h_size; 3461 int error = 0, h_size;
3463 int bblks, split_bblks; 3462 int bblks, split_bblks;
3464 int hblks, split_hblks, wrapped_hblks; 3463 int hblks, split_hblks, wrapped_hblks;
3465 xlog_recover_t *rhash[XLOG_RHASH_SIZE]; 3464 xlog_recover_t *rhash[XLOG_RHASH_SIZE];
3466 3465
3467 ASSERT(head_blk != tail_blk); 3466 ASSERT(head_blk != tail_blk);
3468 3467
3469 /* 3468 /*
3470 * Read the header of the tail block and get the iclog buffer size from 3469 * Read the header of the tail block and get the iclog buffer size from
3471 * h_size. Use this to tell how many sectors make up the log header. 3470 * h_size. Use this to tell how many sectors make up the log header.
3472 */ 3471 */
3473 if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) { 3472 if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) {
3474 /* 3473 /*
3475 * When using variable length iclogs, read first sector of 3474 * When using variable length iclogs, read first sector of
3476 * iclog header and extract the header size from it. Get a 3475 * iclog header and extract the header size from it. Get a
3477 * new hbp that is the correct size. 3476 * new hbp that is the correct size.
3478 */ 3477 */
3479 hbp = xlog_get_bp(log, 1); 3478 hbp = xlog_get_bp(log, 1);
3480 if (!hbp) 3479 if (!hbp)
3481 return ENOMEM; 3480 return ENOMEM;
3482 if ((error = xlog_bread(log, tail_blk, 1, hbp))) 3481 if ((error = xlog_bread(log, tail_blk, 1, hbp)))
3483 goto bread_err1; 3482 goto bread_err1;
3484 offset = xlog_align(log, tail_blk, 1, hbp); 3483 offset = xlog_align(log, tail_blk, 1, hbp);
3485 rhead = (xlog_rec_header_t *)offset; 3484 rhead = (xlog_rec_header_t *)offset;
3486 error = xlog_valid_rec_header(log, rhead, tail_blk); 3485 error = xlog_valid_rec_header(log, rhead, tail_blk);
3487 if (error) 3486 if (error)
3488 goto bread_err1; 3487 goto bread_err1;
3489 h_size = be32_to_cpu(rhead->h_size); 3488 h_size = be32_to_cpu(rhead->h_size);
3490 if ((be32_to_cpu(rhead->h_version) & XLOG_VERSION_2) && 3489 if ((be32_to_cpu(rhead->h_version) & XLOG_VERSION_2) &&
3491 (h_size > XLOG_HEADER_CYCLE_SIZE)) { 3490 (h_size > XLOG_HEADER_CYCLE_SIZE)) {
3492 hblks = h_size / XLOG_HEADER_CYCLE_SIZE; 3491 hblks = h_size / XLOG_HEADER_CYCLE_SIZE;
3493 if (h_size % XLOG_HEADER_CYCLE_SIZE) 3492 if (h_size % XLOG_HEADER_CYCLE_SIZE)
3494 hblks++; 3493 hblks++;
3495 xlog_put_bp(hbp); 3494 xlog_put_bp(hbp);
3496 hbp = xlog_get_bp(log, hblks); 3495 hbp = xlog_get_bp(log, hblks);
3497 } else { 3496 } else {
3498 hblks = 1; 3497 hblks = 1;
3499 } 3498 }
3500 } else { 3499 } else {
3501 ASSERT(log->l_sectbb_log == 0); 3500 ASSERT(log->l_sectbb_log == 0);
3502 hblks = 1; 3501 hblks = 1;
3503 hbp = xlog_get_bp(log, 1); 3502 hbp = xlog_get_bp(log, 1);
3504 h_size = XLOG_BIG_RECORD_BSIZE; 3503 h_size = XLOG_BIG_RECORD_BSIZE;
3505 } 3504 }
3506 3505
3507 if (!hbp) 3506 if (!hbp)
3508 return ENOMEM; 3507 return ENOMEM;
3509 dbp = xlog_get_bp(log, BTOBB(h_size)); 3508 dbp = xlog_get_bp(log, BTOBB(h_size));
3510 if (!dbp) { 3509 if (!dbp) {
3511 xlog_put_bp(hbp); 3510 xlog_put_bp(hbp);
3512 return ENOMEM; 3511 return ENOMEM;
3513 } 3512 }
3514 3513
3515 memset(rhash, 0, sizeof(rhash)); 3514 memset(rhash, 0, sizeof(rhash));
3516 if (tail_blk <= head_blk) { 3515 if (tail_blk <= head_blk) {
3517 for (blk_no = tail_blk; blk_no < head_blk; ) { 3516 for (blk_no = tail_blk; blk_no < head_blk; ) {
3518 if ((error = xlog_bread(log, blk_no, hblks, hbp))) 3517 if ((error = xlog_bread(log, blk_no, hblks, hbp)))
3519 goto bread_err2; 3518 goto bread_err2;
3520 offset = xlog_align(log, blk_no, hblks, hbp); 3519 offset = xlog_align(log, blk_no, hblks, hbp);
3521 rhead = (xlog_rec_header_t *)offset; 3520 rhead = (xlog_rec_header_t *)offset;
3522 error = xlog_valid_rec_header(log, rhead, blk_no); 3521 error = xlog_valid_rec_header(log, rhead, blk_no);
3523 if (error) 3522 if (error)
3524 goto bread_err2; 3523 goto bread_err2;
3525 3524
3526 /* blocks in data section */ 3525 /* blocks in data section */
3527 bblks = (int)BTOBB(be32_to_cpu(rhead->h_len)); 3526 bblks = (int)BTOBB(be32_to_cpu(rhead->h_len));
3528 error = xlog_bread(log, blk_no + hblks, bblks, dbp); 3527 error = xlog_bread(log, blk_no + hblks, bblks, dbp);
3529 if (error) 3528 if (error)
3530 goto bread_err2; 3529 goto bread_err2;
3531 offset = xlog_align(log, blk_no + hblks, bblks, dbp); 3530 offset = xlog_align(log, blk_no + hblks, bblks, dbp);
3532 xlog_unpack_data(rhead, offset, log); 3531 xlog_unpack_data(rhead, offset, log);
3533 if ((error = xlog_recover_process_data(log, 3532 if ((error = xlog_recover_process_data(log,
3534 rhash, rhead, offset, pass))) 3533 rhash, rhead, offset, pass)))
3535 goto bread_err2; 3534 goto bread_err2;
3536 blk_no += bblks + hblks; 3535 blk_no += bblks + hblks;
3537 } 3536 }
3538 } else { 3537 } else {
3539 /* 3538 /*
3540 * Perform recovery around the end of the physical log. 3539 * Perform recovery around the end of the physical log.
3541 * When the head is not on the same cycle number as the tail, 3540 * When the head is not on the same cycle number as the tail,
3542 * we can't do a sequential recovery as above. 3541 * we can't do a sequential recovery as above.
3543 */ 3542 */
3544 blk_no = tail_blk; 3543 blk_no = tail_blk;
3545 while (blk_no < log->l_logBBsize) { 3544 while (blk_no < log->l_logBBsize) {
3546 /* 3545 /*
3547 * Check for header wrapping around physical end-of-log 3546 * Check for header wrapping around physical end-of-log
3548 */ 3547 */
3549 offset = NULL; 3548 offset = NULL;
3550 split_hblks = 0; 3549 split_hblks = 0;
3551 wrapped_hblks = 0; 3550 wrapped_hblks = 0;
3552 if (blk_no + hblks <= log->l_logBBsize) { 3551 if (blk_no + hblks <= log->l_logBBsize) {
3553 /* Read header in one read */ 3552 /* Read header in one read */
3554 error = xlog_bread(log, blk_no, hblks, hbp); 3553 error = xlog_bread(log, blk_no, hblks, hbp);
3555 if (error) 3554 if (error)
3556 goto bread_err2; 3555 goto bread_err2;
3557 offset = xlog_align(log, blk_no, hblks, hbp); 3556 offset = xlog_align(log, blk_no, hblks, hbp);
3558 } else { 3557 } else {
3559 /* This LR is split across physical log end */ 3558 /* This LR is split across physical log end */
3560 if (blk_no != log->l_logBBsize) { 3559 if (blk_no != log->l_logBBsize) {
3561 /* some data before physical log end */ 3560 /* some data before physical log end */
3562 ASSERT(blk_no <= INT_MAX); 3561 ASSERT(blk_no <= INT_MAX);
3563 split_hblks = log->l_logBBsize - (int)blk_no; 3562 split_hblks = log->l_logBBsize - (int)blk_no;
3564 ASSERT(split_hblks > 0); 3563 ASSERT(split_hblks > 0);
3565 if ((error = xlog_bread(log, blk_no, 3564 if ((error = xlog_bread(log, blk_no,
3566 split_hblks, hbp))) 3565 split_hblks, hbp)))
3567 goto bread_err2; 3566 goto bread_err2;
3568 offset = xlog_align(log, blk_no, 3567 offset = xlog_align(log, blk_no,
3569 split_hblks, hbp); 3568 split_hblks, hbp);
3570 } 3569 }
3571 /* 3570 /*
3572 * Note: this black magic still works with 3571 * Note: this black magic still works with
3573 * large sector sizes (non-512) only because: 3572 * large sector sizes (non-512) only because:
3574 * - we increased the buffer size originally 3573 * - we increased the buffer size originally
3575 * by 1 sector giving us enough extra space 3574 * by 1 sector giving us enough extra space
3576 * for the second read; 3575 * for the second read;
3577 * - the log start is guaranteed to be sector 3576 * - the log start is guaranteed to be sector
3578 * aligned; 3577 * aligned;
3579 * - we read the log end (LR header start) 3578 * - we read the log end (LR header start)
3580 * _first_, then the log start (LR header end) 3579 * _first_, then the log start (LR header end)
3581 * - order is important. 3580 * - order is important.
3582 */ 3581 */
3583 wrapped_hblks = hblks - split_hblks; 3582 wrapped_hblks = hblks - split_hblks;
3584 bufaddr = XFS_BUF_PTR(hbp); 3583 bufaddr = XFS_BUF_PTR(hbp);
3585 error = XFS_BUF_SET_PTR(hbp, 3584 error = XFS_BUF_SET_PTR(hbp,
3586 bufaddr + BBTOB(split_hblks), 3585 bufaddr + BBTOB(split_hblks),
3587 BBTOB(hblks - split_hblks)); 3586 BBTOB(hblks - split_hblks));
3588 if (!error) 3587 if (!error)
3589 error = xlog_bread(log, 0, 3588 error = xlog_bread(log, 0,
3590 wrapped_hblks, hbp); 3589 wrapped_hblks, hbp);
3591 if (!error) 3590 if (!error)
3592 error = XFS_BUF_SET_PTR(hbp, bufaddr, 3591 error = XFS_BUF_SET_PTR(hbp, bufaddr,
3593 BBTOB(hblks)); 3592 BBTOB(hblks));
3594 if (error) 3593 if (error)
3595 goto bread_err2; 3594 goto bread_err2;
3596 if (!offset) 3595 if (!offset)
3597 offset = xlog_align(log, 0, 3596 offset = xlog_align(log, 0,
3598 wrapped_hblks, hbp); 3597 wrapped_hblks, hbp);
3599 } 3598 }
3600 rhead = (xlog_rec_header_t *)offset; 3599 rhead = (xlog_rec_header_t *)offset;
3601 error = xlog_valid_rec_header(log, rhead, 3600 error = xlog_valid_rec_header(log, rhead,
3602 split_hblks ? blk_no : 0); 3601 split_hblks ? blk_no : 0);
3603 if (error) 3602 if (error)
3604 goto bread_err2; 3603 goto bread_err2;
3605 3604
3606 bblks = (int)BTOBB(be32_to_cpu(rhead->h_len)); 3605 bblks = (int)BTOBB(be32_to_cpu(rhead->h_len));
3607 blk_no += hblks; 3606 blk_no += hblks;
3608 3607
3609 /* Read in data for log record */ 3608 /* Read in data for log record */
3610 if (blk_no + bblks <= log->l_logBBsize) { 3609 if (blk_no + bblks <= log->l_logBBsize) {
3611 error = xlog_bread(log, blk_no, bblks, dbp); 3610 error = xlog_bread(log, blk_no, bblks, dbp);
3612 if (error) 3611 if (error)
3613 goto bread_err2; 3612 goto bread_err2;
3614 offset = xlog_align(log, blk_no, bblks, dbp); 3613 offset = xlog_align(log, blk_no, bblks, dbp);
3615 } else { 3614 } else {
3616 /* This log record is split across the 3615 /* This log record is split across the
3617 * physical end of log */ 3616 * physical end of log */
3618 offset = NULL; 3617 offset = NULL;
3619 split_bblks = 0; 3618 split_bblks = 0;
3620 if (blk_no != log->l_logBBsize) { 3619 if (blk_no != log->l_logBBsize) {
3621 /* some data is before the physical 3620 /* some data is before the physical
3622 * end of log */ 3621 * end of log */
3623 ASSERT(!wrapped_hblks); 3622 ASSERT(!wrapped_hblks);
3624 ASSERT(blk_no <= INT_MAX); 3623 ASSERT(blk_no <= INT_MAX);
3625 split_bblks = 3624 split_bblks =
3626 log->l_logBBsize - (int)blk_no; 3625 log->l_logBBsize - (int)blk_no;
3627 ASSERT(split_bblks > 0); 3626 ASSERT(split_bblks > 0);
3628 if ((error = xlog_bread(log, blk_no, 3627 if ((error = xlog_bread(log, blk_no,
3629 split_bblks, dbp))) 3628 split_bblks, dbp)))
3630 goto bread_err2; 3629 goto bread_err2;
3631 offset = xlog_align(log, blk_no, 3630 offset = xlog_align(log, blk_no,
3632 split_bblks, dbp); 3631 split_bblks, dbp);
3633 } 3632 }
3634 /* 3633 /*
3635 * Note: this black magic still works with 3634 * Note: this black magic still works with
3636 * large sector sizes (non-512) only because: 3635 * large sector sizes (non-512) only because:
3637 * - we increased the buffer size originally 3636 * - we increased the buffer size originally
3638 * by 1 sector giving us enough extra space 3637 * by 1 sector giving us enough extra space
3639 * for the second read; 3638 * for the second read;
3640 * - the log start is guaranteed to be sector 3639 * - the log start is guaranteed to be sector
3641 * aligned; 3640 * aligned;
3642 * - we read the log end (LR header start) 3641 * - we read the log end (LR header start)
3643 * _first_, then the log start (LR header end) 3642 * _first_, then the log start (LR header end)
3644 * - order is important. 3643 * - order is important.
3645 */ 3644 */
3646 bufaddr = XFS_BUF_PTR(dbp); 3645 bufaddr = XFS_BUF_PTR(dbp);
3647 error = XFS_BUF_SET_PTR(dbp, 3646 error = XFS_BUF_SET_PTR(dbp,
3648 bufaddr + BBTOB(split_bblks), 3647 bufaddr + BBTOB(split_bblks),
3649 BBTOB(bblks - split_bblks)); 3648 BBTOB(bblks - split_bblks));
3650 if (!error) 3649 if (!error)
3651 error = xlog_bread(log, wrapped_hblks, 3650 error = xlog_bread(log, wrapped_hblks,
3652 bblks - split_bblks, 3651 bblks - split_bblks,
3653 dbp); 3652 dbp);
3654 if (!error) 3653 if (!error)
3655 error = XFS_BUF_SET_PTR(dbp, bufaddr, 3654 error = XFS_BUF_SET_PTR(dbp, bufaddr,
3656 h_size); 3655 h_size);
3657 if (error) 3656 if (error)
3658 goto bread_err2; 3657 goto bread_err2;
3659 if (!offset) 3658 if (!offset)
3660 offset = xlog_align(log, wrapped_hblks, 3659 offset = xlog_align(log, wrapped_hblks,
3661 bblks - split_bblks, dbp); 3660 bblks - split_bblks, dbp);
3662 } 3661 }
3663 xlog_unpack_data(rhead, offset, log); 3662 xlog_unpack_data(rhead, offset, log);
3664 if ((error = xlog_recover_process_data(log, rhash, 3663 if ((error = xlog_recover_process_data(log, rhash,
3665 rhead, offset, pass))) 3664 rhead, offset, pass)))
3666 goto bread_err2; 3665 goto bread_err2;
3667 blk_no += bblks; 3666 blk_no += bblks;
3668 } 3667 }
3669 3668
3670 ASSERT(blk_no >= log->l_logBBsize); 3669 ASSERT(blk_no >= log->l_logBBsize);
3671 blk_no -= log->l_logBBsize; 3670 blk_no -= log->l_logBBsize;
3672 3671
3673 /* read first part of physical log */ 3672 /* read first part of physical log */
3674 while (blk_no < head_blk) { 3673 while (blk_no < head_blk) {
3675 if ((error = xlog_bread(log, blk_no, hblks, hbp))) 3674 if ((error = xlog_bread(log, blk_no, hblks, hbp)))
3676 goto bread_err2; 3675 goto bread_err2;
3677 offset = xlog_align(log, blk_no, hblks, hbp); 3676 offset = xlog_align(log, blk_no, hblks, hbp);
3678 rhead = (xlog_rec_header_t *)offset; 3677 rhead = (xlog_rec_header_t *)offset;
3679 error = xlog_valid_rec_header(log, rhead, blk_no); 3678 error = xlog_valid_rec_header(log, rhead, blk_no);
3680 if (error) 3679 if (error)
3681 goto bread_err2; 3680 goto bread_err2;
3682 bblks = (int)BTOBB(be32_to_cpu(rhead->h_len)); 3681 bblks = (int)BTOBB(be32_to_cpu(rhead->h_len));
3683 if ((error = xlog_bread(log, blk_no+hblks, bblks, dbp))) 3682 if ((error = xlog_bread(log, blk_no+hblks, bblks, dbp)))
3684 goto bread_err2; 3683 goto bread_err2;
3685 offset = xlog_align(log, blk_no+hblks, bblks, dbp); 3684 offset = xlog_align(log, blk_no+hblks, bblks, dbp);
3686 xlog_unpack_data(rhead, offset, log); 3685 xlog_unpack_data(rhead, offset, log);
3687 if ((error = xlog_recover_process_data(log, rhash, 3686 if ((error = xlog_recover_process_data(log, rhash,
3688 rhead, offset, pass))) 3687 rhead, offset, pass)))
3689 goto bread_err2; 3688 goto bread_err2;
3690 blk_no += bblks + hblks; 3689 blk_no += bblks + hblks;
3691 } 3690 }
3692 } 3691 }
3693 3692
3694 bread_err2: 3693 bread_err2:
3695 xlog_put_bp(dbp); 3694 xlog_put_bp(dbp);
3696 bread_err1: 3695 bread_err1:
3697 xlog_put_bp(hbp); 3696 xlog_put_bp(hbp);
3698 return error; 3697 return error;
3699 } 3698 }
3700 3699
3701 /* 3700 /*
3702 * Do the recovery of the log. We actually do this in two phases. 3701 * Do the recovery of the log. We actually do this in two phases.
3703 * The two passes are necessary in order to implement the function 3702 * The two passes are necessary in order to implement the function
3704 * of cancelling a record written into the log. The first pass 3703 * of cancelling a record written into the log. The first pass
3705 * determines those things which have been cancelled, and the 3704 * determines those things which have been cancelled, and the
3706 * second pass replays log items normally except for those which 3705 * second pass replays log items normally except for those which
3707 * have been cancelled. The handling of the replay and cancellations 3706 * have been cancelled. The handling of the replay and cancellations
3708 * takes place in the log item type specific routines. 3707 * takes place in the log item type specific routines.
3709 * 3708 *
3710 * The table of items which have cancel records in the log is allocated 3709 * The table of items which have cancel records in the log is allocated
3711 * and freed at this level, since only here do we know when all of 3710 * and freed at this level, since only here do we know when all of
3712 * the log recovery has been completed. 3711 * the log recovery has been completed.
3713 */ 3712 */
3714 STATIC int 3713 STATIC int
3715 xlog_do_log_recovery( 3714 xlog_do_log_recovery(
3716 xlog_t *log, 3715 xlog_t *log,
3717 xfs_daddr_t head_blk, 3716 xfs_daddr_t head_blk,
3718 xfs_daddr_t tail_blk) 3717 xfs_daddr_t tail_blk)
3719 { 3718 {
3720 int error; 3719 int error;
3721 3720
3722 ASSERT(head_blk != tail_blk); 3721 ASSERT(head_blk != tail_blk);
3723 3722
3724 /* 3723 /*
3725 * First do a pass to find all of the cancelled buf log items. 3724 * First do a pass to find all of the cancelled buf log items.
3726 * Store them in the buf_cancel_table for use in the second pass. 3725 * Store them in the buf_cancel_table for use in the second pass.
3727 */ 3726 */
3728 log->l_buf_cancel_table = 3727 log->l_buf_cancel_table =
3729 (xfs_buf_cancel_t **)kmem_zalloc(XLOG_BC_TABLE_SIZE * 3728 (xfs_buf_cancel_t **)kmem_zalloc(XLOG_BC_TABLE_SIZE *
3730 sizeof(xfs_buf_cancel_t*), 3729 sizeof(xfs_buf_cancel_t*),
3731 KM_SLEEP); 3730 KM_SLEEP);
3732 error = xlog_do_recovery_pass(log, head_blk, tail_blk, 3731 error = xlog_do_recovery_pass(log, head_blk, tail_blk,
3733 XLOG_RECOVER_PASS1); 3732 XLOG_RECOVER_PASS1);
3734 if (error != 0) { 3733 if (error != 0) {
3735 kmem_free(log->l_buf_cancel_table); 3734 kmem_free(log->l_buf_cancel_table);
3736 log->l_buf_cancel_table = NULL; 3735 log->l_buf_cancel_table = NULL;
3737 return error; 3736 return error;
3738 } 3737 }
3739 /* 3738 /*
3740 * Then do a second pass to actually recover the items in the log. 3739 * Then do a second pass to actually recover the items in the log.
3741 * When it is complete free the table of buf cancel items. 3740 * When it is complete free the table of buf cancel items.
3742 */ 3741 */
3743 error = xlog_do_recovery_pass(log, head_blk, tail_blk, 3742 error = xlog_do_recovery_pass(log, head_blk, tail_blk,
3744 XLOG_RECOVER_PASS2); 3743 XLOG_RECOVER_PASS2);
3745 #ifdef DEBUG 3744 #ifdef DEBUG
3746 if (!error) { 3745 if (!error) {
3747 int i; 3746 int i;
3748 3747
3749 for (i = 0; i < XLOG_BC_TABLE_SIZE; i++) 3748 for (i = 0; i < XLOG_BC_TABLE_SIZE; i++)
3750 ASSERT(log->l_buf_cancel_table[i] == NULL); 3749 ASSERT(log->l_buf_cancel_table[i] == NULL);
3751 } 3750 }
3752 #endif /* DEBUG */ 3751 #endif /* DEBUG */
3753 3752
3754 kmem_free(log->l_buf_cancel_table); 3753 kmem_free(log->l_buf_cancel_table);
3755 log->l_buf_cancel_table = NULL; 3754 log->l_buf_cancel_table = NULL;
3756 3755
3757 return error; 3756 return error;
3758 } 3757 }
3759 3758
3760 /* 3759 /*
3761 * Do the actual recovery 3760 * Do the actual recovery
3762 */ 3761 */
3763 STATIC int 3762 STATIC int
3764 xlog_do_recover( 3763 xlog_do_recover(
3765 xlog_t *log, 3764 xlog_t *log,
3766 xfs_daddr_t head_blk, 3765 xfs_daddr_t head_blk,
3767 xfs_daddr_t tail_blk) 3766 xfs_daddr_t tail_blk)
3768 { 3767 {
3769 int error; 3768 int error;
3770 xfs_buf_t *bp; 3769 xfs_buf_t *bp;
3771 xfs_sb_t *sbp; 3770 xfs_sb_t *sbp;
3772 3771
3773 /* 3772 /*
3774 * First replay the images in the log. 3773 * First replay the images in the log.
3775 */ 3774 */
3776 error = xlog_do_log_recovery(log, head_blk, tail_blk); 3775 error = xlog_do_log_recovery(log, head_blk, tail_blk);
3777 if (error) { 3776 if (error) {
3778 return error; 3777 return error;
3779 } 3778 }
3780 3779
3781 XFS_bflush(log->l_mp->m_ddev_targp); 3780 XFS_bflush(log->l_mp->m_ddev_targp);
3782 3781
3783 /* 3782 /*
3784 * If IO errors happened during recovery, bail out. 3783 * If IO errors happened during recovery, bail out.
3785 */ 3784 */
3786 if (XFS_FORCED_SHUTDOWN(log->l_mp)) { 3785 if (XFS_FORCED_SHUTDOWN(log->l_mp)) {
3787 return (EIO); 3786 return (EIO);
3788 } 3787 }
3789 3788
3790 /* 3789 /*
3791 * We now update the tail_lsn since much of the recovery has completed 3790 * We now update the tail_lsn since much of the recovery has completed
3792 * and there may be space available to use. If there were no extent 3791 * and there may be space available to use. If there were no extent
3793 * or iunlinks, we can free up the entire log and set the tail_lsn to 3792 * or iunlinks, we can free up the entire log and set the tail_lsn to
3794 * be the last_sync_lsn. This was set in xlog_find_tail to be the 3793 * be the last_sync_lsn. This was set in xlog_find_tail to be the
3795 * lsn of the last known good LR on disk. If there are extent frees 3794 * lsn of the last known good LR on disk. If there are extent frees
3796 * or iunlinks they will have some entries in the AIL; so we look at 3795 * or iunlinks they will have some entries in the AIL; so we look at
3797 * the AIL to determine how to set the tail_lsn. 3796 * the AIL to determine how to set the tail_lsn.
3798 */ 3797 */
3799 xlog_assign_tail_lsn(log->l_mp); 3798 xlog_assign_tail_lsn(log->l_mp);
3800 3799
3801 /* 3800 /*
3802 * Now that we've finished replaying all buffer and inode 3801 * Now that we've finished replaying all buffer and inode
3803 * updates, re-read in the superblock. 3802 * updates, re-read in the superblock.
3804 */ 3803 */
3805 bp = xfs_getsb(log->l_mp, 0); 3804 bp = xfs_getsb(log->l_mp, 0);
3806 XFS_BUF_UNDONE(bp); 3805 XFS_BUF_UNDONE(bp);
3807 ASSERT(!(XFS_BUF_ISWRITE(bp))); 3806 ASSERT(!(XFS_BUF_ISWRITE(bp)));
3808 ASSERT(!(XFS_BUF_ISDELAYWRITE(bp))); 3807 ASSERT(!(XFS_BUF_ISDELAYWRITE(bp)));
3809 XFS_BUF_READ(bp); 3808 XFS_BUF_READ(bp);
3810 XFS_BUF_UNASYNC(bp); 3809 XFS_BUF_UNASYNC(bp);
3811 xfsbdstrat(log->l_mp, bp); 3810 xfsbdstrat(log->l_mp, bp);
3812 error = xfs_iowait(bp); 3811 error = xfs_iowait(bp);
3813 if (error) { 3812 if (error) {
3814 xfs_ioerror_alert("xlog_do_recover", 3813 xfs_ioerror_alert("xlog_do_recover",
3815 log->l_mp, bp, XFS_BUF_ADDR(bp)); 3814 log->l_mp, bp, XFS_BUF_ADDR(bp));
3816 ASSERT(0); 3815 ASSERT(0);
3817 xfs_buf_relse(bp); 3816 xfs_buf_relse(bp);
3818 return error; 3817 return error;
3819 } 3818 }
3820 3819
3821 /* Convert superblock from on-disk format */ 3820 /* Convert superblock from on-disk format */
3822 sbp = &log->l_mp->m_sb; 3821 sbp = &log->l_mp->m_sb;
3823 xfs_sb_from_disk(sbp, XFS_BUF_TO_SBP(bp)); 3822 xfs_sb_from_disk(sbp, XFS_BUF_TO_SBP(bp));
3824 ASSERT(sbp->sb_magicnum == XFS_SB_MAGIC); 3823 ASSERT(sbp->sb_magicnum == XFS_SB_MAGIC);
3825 ASSERT(xfs_sb_good_version(sbp)); 3824 ASSERT(xfs_sb_good_version(sbp));
3826 xfs_buf_relse(bp); 3825 xfs_buf_relse(bp);
3827 3826
3828 /* We've re-read the superblock so re-initialize per-cpu counters */ 3827 /* We've re-read the superblock so re-initialize per-cpu counters */
3829 xfs_icsb_reinit_counters(log->l_mp); 3828 xfs_icsb_reinit_counters(log->l_mp);
3830 3829
3831 xlog_recover_check_summary(log); 3830 xlog_recover_check_summary(log);
3832 3831
3833 /* Normal transactions can now occur */ 3832 /* Normal transactions can now occur */
3834 log->l_flags &= ~XLOG_ACTIVE_RECOVERY; 3833 log->l_flags &= ~XLOG_ACTIVE_RECOVERY;
3835 return 0; 3834 return 0;
3836 } 3835 }
3837 3836
3838 /* 3837 /*
3839 * Perform recovery and re-initialize some log variables in xlog_find_tail. 3838 * Perform recovery and re-initialize some log variables in xlog_find_tail.
3840 * 3839 *
3841 * Return error or zero. 3840 * Return error or zero.
3842 */ 3841 */
3843 int 3842 int
3844 xlog_recover( 3843 xlog_recover(
3845 xlog_t *log) 3844 xlog_t *log)
3846 { 3845 {
3847 xfs_daddr_t head_blk, tail_blk; 3846 xfs_daddr_t head_blk, tail_blk;
3848 int error; 3847 int error;
3849 3848
3850 /* find the tail of the log */ 3849 /* find the tail of the log */
3851 if ((error = xlog_find_tail(log, &head_blk, &tail_blk))) 3850 if ((error = xlog_find_tail(log, &head_blk, &tail_blk)))
3852 return error; 3851 return error;
3853 3852
3854 if (tail_blk != head_blk) { 3853 if (tail_blk != head_blk) {
3855 /* There used to be a comment here: 3854 /* There used to be a comment here:
3856 * 3855 *
3857 * disallow recovery on read-only mounts. note -- mount 3856 * disallow recovery on read-only mounts. note -- mount
3858 * checks for ENOSPC and turns it into an intelligent 3857 * checks for ENOSPC and turns it into an intelligent
3859 * error message. 3858 * error message.
3860 * ...but this is no longer true. Now, unless you specify 3859 * ...but this is no longer true. Now, unless you specify
3861 * NORECOVERY (in which case this function would never be 3860 * NORECOVERY (in which case this function would never be
3862 * called), we just go ahead and recover. We do this all 3861 * called), we just go ahead and recover. We do this all
3863 * under the vfs layer, so we can get away with it unless 3862 * under the vfs layer, so we can get away with it unless
3864 * the device itself is read-only, in which case we fail. 3863 * the device itself is read-only, in which case we fail.
3865 */ 3864 */
3866 if ((error = xfs_dev_is_read_only(log->l_mp, "recovery"))) { 3865 if ((error = xfs_dev_is_read_only(log->l_mp, "recovery"))) {
3867 return error; 3866 return error;
3868 } 3867 }
3869 3868
3870 cmn_err(CE_NOTE, 3869 cmn_err(CE_NOTE,
3871 "Starting XFS recovery on filesystem: %s (logdev: %s)", 3870 "Starting XFS recovery on filesystem: %s (logdev: %s)",
3872 log->l_mp->m_fsname, log->l_mp->m_logname ? 3871 log->l_mp->m_fsname, log->l_mp->m_logname ?
3873 log->l_mp->m_logname : "internal"); 3872 log->l_mp->m_logname : "internal");
3874 3873
3875 error = xlog_do_recover(log, head_blk, tail_blk); 3874 error = xlog_do_recover(log, head_blk, tail_blk);
3876 log->l_flags |= XLOG_RECOVERY_NEEDED; 3875 log->l_flags |= XLOG_RECOVERY_NEEDED;
3877 } 3876 }
3878 return error; 3877 return error;
3879 } 3878 }
3880 3879
3881 /* 3880 /*
3882 * In the first part of recovery we replay inodes and buffers and build 3881 * In the first part of recovery we replay inodes and buffers and build
3883 * up the list of extent free items which need to be processed. Here 3882 * up the list of extent free items which need to be processed. Here
3884 * we process the extent free items and clean up the on disk unlinked 3883 * we process the extent free items and clean up the on disk unlinked
3885 * inode lists. This is separated from the first part of recovery so 3884 * inode lists. This is separated from the first part of recovery so
3886 * that the root and real-time bitmap inodes can be read in from disk in 3885 * that the root and real-time bitmap inodes can be read in from disk in
3887 * between the two stages. This is necessary so that we can free space 3886 * between the two stages. This is necessary so that we can free space
3888 * in the real-time portion of the file system. 3887 * in the real-time portion of the file system.
3889 */ 3888 */
3890 int 3889 int
3891 xlog_recover_finish( 3890 xlog_recover_finish(
3892 xlog_t *log) 3891 xlog_t *log)
3893 { 3892 {
3894 /* 3893 /*
3895 * Now we're ready to do the transactions needed for the 3894 * Now we're ready to do the transactions needed for the
3896 * rest of recovery. Start with completing all the extent 3895 * rest of recovery. Start with completing all the extent
3897 * free intent records and then process the unlinked inode 3896 * free intent records and then process the unlinked inode
3898 * lists. At this point, we essentially run in normal mode 3897 * lists. At this point, we essentially run in normal mode
3899 * except that we're still performing recovery actions 3898 * except that we're still performing recovery actions
3900 * rather than accepting new requests. 3899 * rather than accepting new requests.
3901 */ 3900 */
3902 if (log->l_flags & XLOG_RECOVERY_NEEDED) { 3901 if (log->l_flags & XLOG_RECOVERY_NEEDED) {
3903 int error; 3902 int error;
3904 error = xlog_recover_process_efis(log); 3903 error = xlog_recover_process_efis(log);
3905 if (error) { 3904 if (error) {
3906 cmn_err(CE_ALERT, 3905 cmn_err(CE_ALERT,
3907 "Failed to recover EFIs on filesystem: %s", 3906 "Failed to recover EFIs on filesystem: %s",
3908 log->l_mp->m_fsname); 3907 log->l_mp->m_fsname);
3909 return error; 3908 return error;
3910 } 3909 }
3911 /* 3910 /*
3912 * Sync the log to get all the EFIs out of the AIL. 3911 * Sync the log to get all the EFIs out of the AIL.
3913 * This isn't absolutely necessary, but it helps in 3912 * This isn't absolutely necessary, but it helps in
3914 * case the unlink transactions would have problems 3913 * case the unlink transactions would have problems
3915 * pushing the EFIs out of the way. 3914 * pushing the EFIs out of the way.
3916 */ 3915 */
3917 xfs_log_force(log->l_mp, (xfs_lsn_t)0, 3916 xfs_log_force(log->l_mp, (xfs_lsn_t)0,
3918 (XFS_LOG_FORCE | XFS_LOG_SYNC)); 3917 (XFS_LOG_FORCE | XFS_LOG_SYNC));
3919 3918
3920 xlog_recover_process_iunlinks(log); 3919 xlog_recover_process_iunlinks(log);
3921 3920
3922 xlog_recover_check_summary(log); 3921 xlog_recover_check_summary(log);
3923 3922
3924 cmn_err(CE_NOTE, 3923 cmn_err(CE_NOTE,
3925 "Ending XFS recovery on filesystem: %s (logdev: %s)", 3924 "Ending XFS recovery on filesystem: %s (logdev: %s)",
3926 log->l_mp->m_fsname, log->l_mp->m_logname ? 3925 log->l_mp->m_fsname, log->l_mp->m_logname ?
3927 log->l_mp->m_logname : "internal"); 3926 log->l_mp->m_logname : "internal");
3928 log->l_flags &= ~XLOG_RECOVERY_NEEDED; 3927 log->l_flags &= ~XLOG_RECOVERY_NEEDED;
3929 } else { 3928 } else {
3930 cmn_err(CE_DEBUG, 3929 cmn_err(CE_DEBUG,
3931 "!Ending clean XFS mount for filesystem: %s\n", 3930 "!Ending clean XFS mount for filesystem: %s\n",
3932 log->l_mp->m_fsname); 3931 log->l_mp->m_fsname);
3933 } 3932 }
3934 return 0; 3933 return 0;
3935 } 3934 }
3936 3935
3937 3936
3938 #if defined(DEBUG) 3937 #if defined(DEBUG)
3939 /* 3938 /*
3940 * Read all of the agf and agi counters and check that they 3939 * Read all of the agf and agi counters and check that they
3941 * are consistent with the superblock counters. 3940 * are consistent with the superblock counters.
3942 */ 3941 */
3943 void 3942 void
3944 xlog_recover_check_summary( 3943 xlog_recover_check_summary(
3945 xlog_t *log) 3944 xlog_t *log)
3946 { 3945 {
3947 xfs_mount_t *mp; 3946 xfs_mount_t *mp;
3948 xfs_agf_t *agfp; 3947 xfs_agf_t *agfp;
3949 xfs_buf_t *agfbp; 3948 xfs_buf_t *agfbp;
3950 xfs_buf_t *agibp; 3949 xfs_buf_t *agibp;
3951 xfs_buf_t *sbbp; 3950 xfs_buf_t *sbbp;
3952 #ifdef XFS_LOUD_RECOVERY 3951 #ifdef XFS_LOUD_RECOVERY
3953 xfs_sb_t *sbp; 3952 xfs_sb_t *sbp;
3954 #endif 3953 #endif
3955 xfs_agnumber_t agno; 3954 xfs_agnumber_t agno;
3956 __uint64_t freeblks; 3955 __uint64_t freeblks;
3957 __uint64_t itotal; 3956 __uint64_t itotal;
3958 __uint64_t ifree; 3957 __uint64_t ifree;
3959 int error; 3958 int error;
3960 3959
3961 mp = log->l_mp; 3960 mp = log->l_mp;
3962 3961
3963 freeblks = 0LL; 3962 freeblks = 0LL;
3964 itotal = 0LL; 3963 itotal = 0LL;
3965 ifree = 0LL; 3964 ifree = 0LL;
3966 for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) { 3965 for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) {
3967 error = xfs_read_agf(mp, NULL, agno, 0, &agfbp); 3966 error = xfs_read_agf(mp, NULL, agno, 0, &agfbp);
3968 if (error) { 3967 if (error) {
3969 xfs_fs_cmn_err(CE_ALERT, mp, 3968 xfs_fs_cmn_err(CE_ALERT, mp,
3970 "xlog_recover_check_summary(agf)" 3969 "xlog_recover_check_summary(agf)"
3971 "agf read failed agno %d error %d", 3970 "agf read failed agno %d error %d",
3972 agno, error); 3971 agno, error);
3973 } else { 3972 } else {
3974 agfp = XFS_BUF_TO_AGF(agfbp); 3973 agfp = XFS_BUF_TO_AGF(agfbp);
3975 freeblks += be32_to_cpu(agfp->agf_freeblks) + 3974 freeblks += be32_to_cpu(agfp->agf_freeblks) +
3976 be32_to_cpu(agfp->agf_flcount); 3975 be32_to_cpu(agfp->agf_flcount);
3977 xfs_buf_relse(agfbp); 3976 xfs_buf_relse(agfbp);
3978 } 3977 }
3979 3978
3980 error = xfs_read_agi(mp, NULL, agno, &agibp); 3979 error = xfs_read_agi(mp, NULL, agno, &agibp);
3981 if (!error) { 3980 if (!error) {
3982 struct xfs_agi *agi = XFS_BUF_TO_AGI(agibp); 3981 struct xfs_agi *agi = XFS_BUF_TO_AGI(agibp);
3983 3982
3984 itotal += be32_to_cpu(agi->agi_count); 3983 itotal += be32_to_cpu(agi->agi_count);
3985 ifree += be32_to_cpu(agi->agi_freecount); 3984 ifree += be32_to_cpu(agi->agi_freecount);
3986 xfs_buf_relse(agibp); 3985 xfs_buf_relse(agibp);
3987 } 3986 }
3988 } 3987 }
3989 3988
3990 sbbp = xfs_getsb(mp, 0); 3989 sbbp = xfs_getsb(mp, 0);
3991 #ifdef XFS_LOUD_RECOVERY 3990 #ifdef XFS_LOUD_RECOVERY
3992 sbp = &mp->m_sb; 3991 sbp = &mp->m_sb;
3993 xfs_sb_from_disk(sbp, XFS_BUF_TO_SBP(sbbp)); 3992 xfs_sb_from_disk(sbp, XFS_BUF_TO_SBP(sbbp));
3994 cmn_err(CE_NOTE, 3993 cmn_err(CE_NOTE,
3995 "xlog_recover_check_summary: sb_icount %Lu itotal %Lu", 3994 "xlog_recover_check_summary: sb_icount %Lu itotal %Lu",
3996 sbp->sb_icount, itotal); 3995 sbp->sb_icount, itotal);
3997 cmn_err(CE_NOTE, 3996 cmn_err(CE_NOTE,
3998 "xlog_recover_check_summary: sb_ifree %Lu itotal %Lu", 3997 "xlog_recover_check_summary: sb_ifree %Lu itotal %Lu",
3999 sbp->sb_ifree, ifree); 3998 sbp->sb_ifree, ifree);
4000 cmn_err(CE_NOTE, 3999 cmn_err(CE_NOTE,
4001 "xlog_recover_check_summary: sb_fdblocks %Lu freeblks %Lu", 4000 "xlog_recover_check_summary: sb_fdblocks %Lu freeblks %Lu",
4002 sbp->sb_fdblocks, freeblks); 4001 sbp->sb_fdblocks, freeblks);
4003 #if 0 4002 #if 0
4004 /* 4003 /*
4005 * This is turned off until I account for the allocation 4004 * This is turned off until I account for the allocation
4006 * btree blocks which live in free space. 4005 * btree blocks which live in free space.
4007 */ 4006 */
4008 ASSERT(sbp->sb_icount == itotal); 4007 ASSERT(sbp->sb_icount == itotal);
4009 ASSERT(sbp->sb_ifree == ifree); 4008 ASSERT(sbp->sb_ifree == ifree);
4010 ASSERT(sbp->sb_fdblocks == freeblks); 4009 ASSERT(sbp->sb_fdblocks == freeblks);
4011 #endif 4010 #endif
4012 #endif 4011 #endif
4013 xfs_buf_relse(sbbp); 4012 xfs_buf_relse(sbbp);
4014 } 4013 }
4015 #endif /* DEBUG */ 4014 #endif /* DEBUG */
4016 4015