Commit 745b1f47fc0c68dbb1ff440eec8889f61e57194b

Authored by Nathan Scott
Committed by Tim Shimmin
1 parent a3c6685eaa

[XFS] Remove last bulkstat false-positives with debug kernels.

SGI-PV: 953819
SGI-Modid: xfs-linux-melb:xfs-kern:26628a

Signed-off-by: Nathan Scott <nathans@sgi.com>
Signed-off-by: Tim Shimmin <tes@sgi.com>

Showing 5 changed files with 22 additions and 17 deletions Inline Diff

1 /* 1 /*
2 * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc. 2 * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
3 * All Rights Reserved. 3 * All Rights Reserved.
4 * 4 *
5 * This program is free software; you can redistribute it and/or 5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as 6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation. 7 * published by the Free Software Foundation.
8 * 8 *
9 * This program is distributed in the hope that it would be useful, 9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details. 12 * GNU General Public License for more details.
13 * 13 *
14 * You should have received a copy of the GNU General Public License 14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation, 15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */ 17 */
18 #include "xfs.h" 18 #include "xfs.h"
19 #include "xfs_fs.h" 19 #include "xfs_fs.h"
20 #include "xfs_types.h" 20 #include "xfs_types.h"
21 #include "xfs_bit.h" 21 #include "xfs_bit.h"
22 #include "xfs_log.h" 22 #include "xfs_log.h"
23 #include "xfs_inum.h" 23 #include "xfs_inum.h"
24 #include "xfs_trans.h" 24 #include "xfs_trans.h"
25 #include "xfs_sb.h" 25 #include "xfs_sb.h"
26 #include "xfs_ag.h" 26 #include "xfs_ag.h"
27 #include "xfs_dir2.h" 27 #include "xfs_dir2.h"
28 #include "xfs_dmapi.h" 28 #include "xfs_dmapi.h"
29 #include "xfs_mount.h" 29 #include "xfs_mount.h"
30 #include "xfs_bmap_btree.h" 30 #include "xfs_bmap_btree.h"
31 #include "xfs_alloc_btree.h" 31 #include "xfs_alloc_btree.h"
32 #include "xfs_ialloc_btree.h" 32 #include "xfs_ialloc_btree.h"
33 #include "xfs_dir2_sf.h" 33 #include "xfs_dir2_sf.h"
34 #include "xfs_attr_sf.h" 34 #include "xfs_attr_sf.h"
35 #include "xfs_dinode.h" 35 #include "xfs_dinode.h"
36 #include "xfs_inode.h" 36 #include "xfs_inode.h"
37 #include "xfs_btree.h" 37 #include "xfs_btree.h"
38 #include "xfs_ialloc.h" 38 #include "xfs_ialloc.h"
39 #include "xfs_alloc.h" 39 #include "xfs_alloc.h"
40 #include "xfs_rtalloc.h" 40 #include "xfs_rtalloc.h"
41 #include "xfs_error.h" 41 #include "xfs_error.h"
42 #include "xfs_bmap.h" 42 #include "xfs_bmap.h"
43 43
44 /* 44 /*
45 * Log specified fields for the inode given by bp and off. 45 * Log specified fields for the inode given by bp and off.
46 */ 46 */
47 STATIC void 47 STATIC void
48 xfs_ialloc_log_di( 48 xfs_ialloc_log_di(
49 xfs_trans_t *tp, /* transaction pointer */ 49 xfs_trans_t *tp, /* transaction pointer */
50 xfs_buf_t *bp, /* inode buffer */ 50 xfs_buf_t *bp, /* inode buffer */
51 int off, /* index of inode in buffer */ 51 int off, /* index of inode in buffer */
52 int fields) /* bitmask of fields to log */ 52 int fields) /* bitmask of fields to log */
53 { 53 {
54 int first; /* first byte number */ 54 int first; /* first byte number */
55 int ioffset; /* off in bytes */ 55 int ioffset; /* off in bytes */
56 int last; /* last byte number */ 56 int last; /* last byte number */
57 xfs_mount_t *mp; /* mount point structure */ 57 xfs_mount_t *mp; /* mount point structure */
58 static const short offsets[] = { /* field offsets */ 58 static const short offsets[] = { /* field offsets */
59 /* keep in sync with bits */ 59 /* keep in sync with bits */
60 offsetof(xfs_dinode_core_t, di_magic), 60 offsetof(xfs_dinode_core_t, di_magic),
61 offsetof(xfs_dinode_core_t, di_mode), 61 offsetof(xfs_dinode_core_t, di_mode),
62 offsetof(xfs_dinode_core_t, di_version), 62 offsetof(xfs_dinode_core_t, di_version),
63 offsetof(xfs_dinode_core_t, di_format), 63 offsetof(xfs_dinode_core_t, di_format),
64 offsetof(xfs_dinode_core_t, di_onlink), 64 offsetof(xfs_dinode_core_t, di_onlink),
65 offsetof(xfs_dinode_core_t, di_uid), 65 offsetof(xfs_dinode_core_t, di_uid),
66 offsetof(xfs_dinode_core_t, di_gid), 66 offsetof(xfs_dinode_core_t, di_gid),
67 offsetof(xfs_dinode_core_t, di_nlink), 67 offsetof(xfs_dinode_core_t, di_nlink),
68 offsetof(xfs_dinode_core_t, di_projid), 68 offsetof(xfs_dinode_core_t, di_projid),
69 offsetof(xfs_dinode_core_t, di_pad), 69 offsetof(xfs_dinode_core_t, di_pad),
70 offsetof(xfs_dinode_core_t, di_atime), 70 offsetof(xfs_dinode_core_t, di_atime),
71 offsetof(xfs_dinode_core_t, di_mtime), 71 offsetof(xfs_dinode_core_t, di_mtime),
72 offsetof(xfs_dinode_core_t, di_ctime), 72 offsetof(xfs_dinode_core_t, di_ctime),
73 offsetof(xfs_dinode_core_t, di_size), 73 offsetof(xfs_dinode_core_t, di_size),
74 offsetof(xfs_dinode_core_t, di_nblocks), 74 offsetof(xfs_dinode_core_t, di_nblocks),
75 offsetof(xfs_dinode_core_t, di_extsize), 75 offsetof(xfs_dinode_core_t, di_extsize),
76 offsetof(xfs_dinode_core_t, di_nextents), 76 offsetof(xfs_dinode_core_t, di_nextents),
77 offsetof(xfs_dinode_core_t, di_anextents), 77 offsetof(xfs_dinode_core_t, di_anextents),
78 offsetof(xfs_dinode_core_t, di_forkoff), 78 offsetof(xfs_dinode_core_t, di_forkoff),
79 offsetof(xfs_dinode_core_t, di_aformat), 79 offsetof(xfs_dinode_core_t, di_aformat),
80 offsetof(xfs_dinode_core_t, di_dmevmask), 80 offsetof(xfs_dinode_core_t, di_dmevmask),
81 offsetof(xfs_dinode_core_t, di_dmstate), 81 offsetof(xfs_dinode_core_t, di_dmstate),
82 offsetof(xfs_dinode_core_t, di_flags), 82 offsetof(xfs_dinode_core_t, di_flags),
83 offsetof(xfs_dinode_core_t, di_gen), 83 offsetof(xfs_dinode_core_t, di_gen),
84 offsetof(xfs_dinode_t, di_next_unlinked), 84 offsetof(xfs_dinode_t, di_next_unlinked),
85 offsetof(xfs_dinode_t, di_u), 85 offsetof(xfs_dinode_t, di_u),
86 offsetof(xfs_dinode_t, di_a), 86 offsetof(xfs_dinode_t, di_a),
87 sizeof(xfs_dinode_t) 87 sizeof(xfs_dinode_t)
88 }; 88 };
89 89
90 90
91 ASSERT(offsetof(xfs_dinode_t, di_core) == 0); 91 ASSERT(offsetof(xfs_dinode_t, di_core) == 0);
92 ASSERT((fields & (XFS_DI_U|XFS_DI_A)) == 0); 92 ASSERT((fields & (XFS_DI_U|XFS_DI_A)) == 0);
93 mp = tp->t_mountp; 93 mp = tp->t_mountp;
94 /* 94 /*
95 * Get the inode-relative first and last bytes for these fields 95 * Get the inode-relative first and last bytes for these fields
96 */ 96 */
97 xfs_btree_offsets(fields, offsets, XFS_DI_NUM_BITS, &first, &last); 97 xfs_btree_offsets(fields, offsets, XFS_DI_NUM_BITS, &first, &last);
98 /* 98 /*
99 * Convert to buffer offsets and log it. 99 * Convert to buffer offsets and log it.
100 */ 100 */
101 ioffset = off << mp->m_sb.sb_inodelog; 101 ioffset = off << mp->m_sb.sb_inodelog;
102 first += ioffset; 102 first += ioffset;
103 last += ioffset; 103 last += ioffset;
104 xfs_trans_log_buf(tp, bp, first, last); 104 xfs_trans_log_buf(tp, bp, first, last);
105 } 105 }
106 106
107 /* 107 /*
108 * Allocation group level functions. 108 * Allocation group level functions.
109 */ 109 */
110 110
111 /* 111 /*
112 * Allocate new inodes in the allocation group specified by agbp. 112 * Allocate new inodes in the allocation group specified by agbp.
113 * Return 0 for success, else error code. 113 * Return 0 for success, else error code.
114 */ 114 */
115 STATIC int /* error code or 0 */ 115 STATIC int /* error code or 0 */
116 xfs_ialloc_ag_alloc( 116 xfs_ialloc_ag_alloc(
117 xfs_trans_t *tp, /* transaction pointer */ 117 xfs_trans_t *tp, /* transaction pointer */
118 xfs_buf_t *agbp, /* alloc group buffer */ 118 xfs_buf_t *agbp, /* alloc group buffer */
119 int *alloc) 119 int *alloc)
120 { 120 {
121 xfs_agi_t *agi; /* allocation group header */ 121 xfs_agi_t *agi; /* allocation group header */
122 xfs_alloc_arg_t args; /* allocation argument structure */ 122 xfs_alloc_arg_t args; /* allocation argument structure */
123 int blks_per_cluster; /* fs blocks per inode cluster */ 123 int blks_per_cluster; /* fs blocks per inode cluster */
124 xfs_btree_cur_t *cur; /* inode btree cursor */ 124 xfs_btree_cur_t *cur; /* inode btree cursor */
125 xfs_daddr_t d; /* disk addr of buffer */ 125 xfs_daddr_t d; /* disk addr of buffer */
126 int error; 126 int error;
127 xfs_buf_t *fbuf; /* new free inodes' buffer */ 127 xfs_buf_t *fbuf; /* new free inodes' buffer */
128 xfs_dinode_t *free; /* new free inode structure */ 128 xfs_dinode_t *free; /* new free inode structure */
129 int i; /* inode counter */ 129 int i; /* inode counter */
130 int j; /* block counter */ 130 int j; /* block counter */
131 int nbufs; /* num bufs of new inodes */ 131 int nbufs; /* num bufs of new inodes */
132 xfs_agino_t newino; /* new first inode's number */ 132 xfs_agino_t newino; /* new first inode's number */
133 xfs_agino_t newlen; /* new number of inodes */ 133 xfs_agino_t newlen; /* new number of inodes */
134 int ninodes; /* num inodes per buf */ 134 int ninodes; /* num inodes per buf */
135 xfs_agino_t thisino; /* current inode number, for loop */ 135 xfs_agino_t thisino; /* current inode number, for loop */
136 int version; /* inode version number to use */ 136 int version; /* inode version number to use */
137 int isaligned = 0; /* inode allocation at stripe unit */ 137 int isaligned = 0; /* inode allocation at stripe unit */
138 /* boundary */ 138 /* boundary */
139 139
140 args.tp = tp; 140 args.tp = tp;
141 args.mp = tp->t_mountp; 141 args.mp = tp->t_mountp;
142 142
143 /* 143 /*
144 * Locking will ensure that we don't have two callers in here 144 * Locking will ensure that we don't have two callers in here
145 * at one time. 145 * at one time.
146 */ 146 */
147 newlen = XFS_IALLOC_INODES(args.mp); 147 newlen = XFS_IALLOC_INODES(args.mp);
148 if (args.mp->m_maxicount && 148 if (args.mp->m_maxicount &&
149 args.mp->m_sb.sb_icount + newlen > args.mp->m_maxicount) 149 args.mp->m_sb.sb_icount + newlen > args.mp->m_maxicount)
150 return XFS_ERROR(ENOSPC); 150 return XFS_ERROR(ENOSPC);
151 args.minlen = args.maxlen = XFS_IALLOC_BLOCKS(args.mp); 151 args.minlen = args.maxlen = XFS_IALLOC_BLOCKS(args.mp);
152 /* 152 /*
153 * First try to allocate inodes contiguous with the last-allocated 153 * First try to allocate inodes contiguous with the last-allocated
154 * chunk of inodes. If the filesystem is striped, this will fill 154 * chunk of inodes. If the filesystem is striped, this will fill
155 * an entire stripe unit with inodes. 155 * an entire stripe unit with inodes.
156 */ 156 */
157 agi = XFS_BUF_TO_AGI(agbp); 157 agi = XFS_BUF_TO_AGI(agbp);
158 newino = be32_to_cpu(agi->agi_newino); 158 newino = be32_to_cpu(agi->agi_newino);
159 args.agbno = XFS_AGINO_TO_AGBNO(args.mp, newino) + 159 args.agbno = XFS_AGINO_TO_AGBNO(args.mp, newino) +
160 XFS_IALLOC_BLOCKS(args.mp); 160 XFS_IALLOC_BLOCKS(args.mp);
161 if (likely(newino != NULLAGINO && 161 if (likely(newino != NULLAGINO &&
162 (args.agbno < be32_to_cpu(agi->agi_length)))) { 162 (args.agbno < be32_to_cpu(agi->agi_length)))) {
163 args.fsbno = XFS_AGB_TO_FSB(args.mp, 163 args.fsbno = XFS_AGB_TO_FSB(args.mp,
164 be32_to_cpu(agi->agi_seqno), args.agbno); 164 be32_to_cpu(agi->agi_seqno), args.agbno);
165 args.type = XFS_ALLOCTYPE_THIS_BNO; 165 args.type = XFS_ALLOCTYPE_THIS_BNO;
166 args.mod = args.total = args.wasdel = args.isfl = 166 args.mod = args.total = args.wasdel = args.isfl =
167 args.userdata = args.minalignslop = 0; 167 args.userdata = args.minalignslop = 0;
168 args.prod = 1; 168 args.prod = 1;
169 args.alignment = 1; 169 args.alignment = 1;
170 /* 170 /*
171 * Allow space for the inode btree to split. 171 * Allow space for the inode btree to split.
172 */ 172 */
173 args.minleft = XFS_IN_MAXLEVELS(args.mp) - 1; 173 args.minleft = XFS_IN_MAXLEVELS(args.mp) - 1;
174 if ((error = xfs_alloc_vextent(&args))) 174 if ((error = xfs_alloc_vextent(&args)))
175 return error; 175 return error;
176 } else 176 } else
177 args.fsbno = NULLFSBLOCK; 177 args.fsbno = NULLFSBLOCK;
178 178
179 if (unlikely(args.fsbno == NULLFSBLOCK)) { 179 if (unlikely(args.fsbno == NULLFSBLOCK)) {
180 /* 180 /*
181 * Set the alignment for the allocation. 181 * Set the alignment for the allocation.
182 * If stripe alignment is turned on then align at stripe unit 182 * If stripe alignment is turned on then align at stripe unit
183 * boundary. 183 * boundary.
184 * If the cluster size is smaller than a filesystem block 184 * If the cluster size is smaller than a filesystem block
185 * then we're doing I/O for inodes in filesystem block size 185 * then we're doing I/O for inodes in filesystem block size
186 * pieces, so don't need alignment anyway. 186 * pieces, so don't need alignment anyway.
187 */ 187 */
188 isaligned = 0; 188 isaligned = 0;
189 if (args.mp->m_sinoalign) { 189 if (args.mp->m_sinoalign) {
190 ASSERT(!(args.mp->m_flags & XFS_MOUNT_NOALIGN)); 190 ASSERT(!(args.mp->m_flags & XFS_MOUNT_NOALIGN));
191 args.alignment = args.mp->m_dalign; 191 args.alignment = args.mp->m_dalign;
192 isaligned = 1; 192 isaligned = 1;
193 } else if (XFS_SB_VERSION_HASALIGN(&args.mp->m_sb) && 193 } else if (XFS_SB_VERSION_HASALIGN(&args.mp->m_sb) &&
194 args.mp->m_sb.sb_inoalignmt >= 194 args.mp->m_sb.sb_inoalignmt >=
195 XFS_B_TO_FSBT(args.mp, 195 XFS_B_TO_FSBT(args.mp,
196 XFS_INODE_CLUSTER_SIZE(args.mp))) 196 XFS_INODE_CLUSTER_SIZE(args.mp)))
197 args.alignment = args.mp->m_sb.sb_inoalignmt; 197 args.alignment = args.mp->m_sb.sb_inoalignmt;
198 else 198 else
199 args.alignment = 1; 199 args.alignment = 1;
200 /* 200 /*
201 * Need to figure out where to allocate the inode blocks. 201 * Need to figure out where to allocate the inode blocks.
202 * Ideally they should be spaced out through the a.g. 202 * Ideally they should be spaced out through the a.g.
203 * For now, just allocate blocks up front. 203 * For now, just allocate blocks up front.
204 */ 204 */
205 args.agbno = be32_to_cpu(agi->agi_root); 205 args.agbno = be32_to_cpu(agi->agi_root);
206 args.fsbno = XFS_AGB_TO_FSB(args.mp, 206 args.fsbno = XFS_AGB_TO_FSB(args.mp,
207 be32_to_cpu(agi->agi_seqno), args.agbno); 207 be32_to_cpu(agi->agi_seqno), args.agbno);
208 /* 208 /*
209 * Allocate a fixed-size extent of inodes. 209 * Allocate a fixed-size extent of inodes.
210 */ 210 */
211 args.type = XFS_ALLOCTYPE_NEAR_BNO; 211 args.type = XFS_ALLOCTYPE_NEAR_BNO;
212 args.mod = args.total = args.wasdel = args.isfl = 212 args.mod = args.total = args.wasdel = args.isfl =
213 args.userdata = args.minalignslop = 0; 213 args.userdata = args.minalignslop = 0;
214 args.prod = 1; 214 args.prod = 1;
215 /* 215 /*
216 * Allow space for the inode btree to split. 216 * Allow space for the inode btree to split.
217 */ 217 */
218 args.minleft = XFS_IN_MAXLEVELS(args.mp) - 1; 218 args.minleft = XFS_IN_MAXLEVELS(args.mp) - 1;
219 if ((error = xfs_alloc_vextent(&args))) 219 if ((error = xfs_alloc_vextent(&args)))
220 return error; 220 return error;
221 } 221 }
222 222
223 /* 223 /*
224 * If stripe alignment is turned on, then try again with cluster 224 * If stripe alignment is turned on, then try again with cluster
225 * alignment. 225 * alignment.
226 */ 226 */
227 if (isaligned && args.fsbno == NULLFSBLOCK) { 227 if (isaligned && args.fsbno == NULLFSBLOCK) {
228 args.type = XFS_ALLOCTYPE_NEAR_BNO; 228 args.type = XFS_ALLOCTYPE_NEAR_BNO;
229 args.agbno = be32_to_cpu(agi->agi_root); 229 args.agbno = be32_to_cpu(agi->agi_root);
230 args.fsbno = XFS_AGB_TO_FSB(args.mp, 230 args.fsbno = XFS_AGB_TO_FSB(args.mp,
231 be32_to_cpu(agi->agi_seqno), args.agbno); 231 be32_to_cpu(agi->agi_seqno), args.agbno);
232 if (XFS_SB_VERSION_HASALIGN(&args.mp->m_sb) && 232 if (XFS_SB_VERSION_HASALIGN(&args.mp->m_sb) &&
233 args.mp->m_sb.sb_inoalignmt >= 233 args.mp->m_sb.sb_inoalignmt >=
234 XFS_B_TO_FSBT(args.mp, XFS_INODE_CLUSTER_SIZE(args.mp))) 234 XFS_B_TO_FSBT(args.mp, XFS_INODE_CLUSTER_SIZE(args.mp)))
235 args.alignment = args.mp->m_sb.sb_inoalignmt; 235 args.alignment = args.mp->m_sb.sb_inoalignmt;
236 else 236 else
237 args.alignment = 1; 237 args.alignment = 1;
238 if ((error = xfs_alloc_vextent(&args))) 238 if ((error = xfs_alloc_vextent(&args)))
239 return error; 239 return error;
240 } 240 }
241 241
242 if (args.fsbno == NULLFSBLOCK) { 242 if (args.fsbno == NULLFSBLOCK) {
243 *alloc = 0; 243 *alloc = 0;
244 return 0; 244 return 0;
245 } 245 }
246 ASSERT(args.len == args.minlen); 246 ASSERT(args.len == args.minlen);
247 /* 247 /*
248 * Convert the results. 248 * Convert the results.
249 */ 249 */
250 newino = XFS_OFFBNO_TO_AGINO(args.mp, args.agbno, 0); 250 newino = XFS_OFFBNO_TO_AGINO(args.mp, args.agbno, 0);
251 /* 251 /*
252 * Loop over the new block(s), filling in the inodes. 252 * Loop over the new block(s), filling in the inodes.
253 * For small block sizes, manipulate the inodes in buffers 253 * For small block sizes, manipulate the inodes in buffers
254 * which are multiples of the blocks size. 254 * which are multiples of the blocks size.
255 */ 255 */
256 if (args.mp->m_sb.sb_blocksize >= XFS_INODE_CLUSTER_SIZE(args.mp)) { 256 if (args.mp->m_sb.sb_blocksize >= XFS_INODE_CLUSTER_SIZE(args.mp)) {
257 blks_per_cluster = 1; 257 blks_per_cluster = 1;
258 nbufs = (int)args.len; 258 nbufs = (int)args.len;
259 ninodes = args.mp->m_sb.sb_inopblock; 259 ninodes = args.mp->m_sb.sb_inopblock;
260 } else { 260 } else {
261 blks_per_cluster = XFS_INODE_CLUSTER_SIZE(args.mp) / 261 blks_per_cluster = XFS_INODE_CLUSTER_SIZE(args.mp) /
262 args.mp->m_sb.sb_blocksize; 262 args.mp->m_sb.sb_blocksize;
263 nbufs = (int)args.len / blks_per_cluster; 263 nbufs = (int)args.len / blks_per_cluster;
264 ninodes = blks_per_cluster * args.mp->m_sb.sb_inopblock; 264 ninodes = blks_per_cluster * args.mp->m_sb.sb_inopblock;
265 } 265 }
266 /* 266 /*
267 * Figure out what version number to use in the inodes we create. 267 * Figure out what version number to use in the inodes we create.
268 * If the superblock version has caught up to the one that supports 268 * If the superblock version has caught up to the one that supports
269 * the new inode format, then use the new inode version. Otherwise 269 * the new inode format, then use the new inode version. Otherwise
270 * use the old version so that old kernels will continue to be 270 * use the old version so that old kernels will continue to be
271 * able to use the file system. 271 * able to use the file system.
272 */ 272 */
273 if (XFS_SB_VERSION_HASNLINK(&args.mp->m_sb)) 273 if (XFS_SB_VERSION_HASNLINK(&args.mp->m_sb))
274 version = XFS_DINODE_VERSION_2; 274 version = XFS_DINODE_VERSION_2;
275 else 275 else
276 version = XFS_DINODE_VERSION_1; 276 version = XFS_DINODE_VERSION_1;
277 277
278 for (j = 0; j < nbufs; j++) { 278 for (j = 0; j < nbufs; j++) {
279 /* 279 /*
280 * Get the block. 280 * Get the block.
281 */ 281 */
282 d = XFS_AGB_TO_DADDR(args.mp, be32_to_cpu(agi->agi_seqno), 282 d = XFS_AGB_TO_DADDR(args.mp, be32_to_cpu(agi->agi_seqno),
283 args.agbno + (j * blks_per_cluster)); 283 args.agbno + (j * blks_per_cluster));
284 fbuf = xfs_trans_get_buf(tp, args.mp->m_ddev_targp, d, 284 fbuf = xfs_trans_get_buf(tp, args.mp->m_ddev_targp, d,
285 args.mp->m_bsize * blks_per_cluster, 285 args.mp->m_bsize * blks_per_cluster,
286 XFS_BUF_LOCK); 286 XFS_BUF_LOCK);
287 ASSERT(fbuf); 287 ASSERT(fbuf);
288 ASSERT(!XFS_BUF_GETERROR(fbuf)); 288 ASSERT(!XFS_BUF_GETERROR(fbuf));
289 /* 289 /*
290 * Set initial values for the inodes in this buffer. 290 * Set initial values for the inodes in this buffer.
291 */ 291 */
292 xfs_biozero(fbuf, 0, ninodes << args.mp->m_sb.sb_inodelog); 292 xfs_biozero(fbuf, 0, ninodes << args.mp->m_sb.sb_inodelog);
293 for (i = 0; i < ninodes; i++) { 293 for (i = 0; i < ninodes; i++) {
294 free = XFS_MAKE_IPTR(args.mp, fbuf, i); 294 free = XFS_MAKE_IPTR(args.mp, fbuf, i);
295 INT_SET(free->di_core.di_magic, ARCH_CONVERT, XFS_DINODE_MAGIC); 295 INT_SET(free->di_core.di_magic, ARCH_CONVERT, XFS_DINODE_MAGIC);
296 INT_SET(free->di_core.di_version, ARCH_CONVERT, version); 296 INT_SET(free->di_core.di_version, ARCH_CONVERT, version);
297 INT_SET(free->di_next_unlinked, ARCH_CONVERT, NULLAGINO); 297 INT_SET(free->di_next_unlinked, ARCH_CONVERT, NULLAGINO);
298 xfs_ialloc_log_di(tp, fbuf, i, 298 xfs_ialloc_log_di(tp, fbuf, i,
299 XFS_DI_CORE_BITS | XFS_DI_NEXT_UNLINKED); 299 XFS_DI_CORE_BITS | XFS_DI_NEXT_UNLINKED);
300 } 300 }
301 xfs_trans_inode_alloc_buf(tp, fbuf); 301 xfs_trans_inode_alloc_buf(tp, fbuf);
302 } 302 }
303 be32_add(&agi->agi_count, newlen); 303 be32_add(&agi->agi_count, newlen);
304 be32_add(&agi->agi_freecount, newlen); 304 be32_add(&agi->agi_freecount, newlen);
305 down_read(&args.mp->m_peraglock); 305 down_read(&args.mp->m_peraglock);
306 args.mp->m_perag[be32_to_cpu(agi->agi_seqno)].pagi_freecount += newlen; 306 args.mp->m_perag[be32_to_cpu(agi->agi_seqno)].pagi_freecount += newlen;
307 up_read(&args.mp->m_peraglock); 307 up_read(&args.mp->m_peraglock);
308 agi->agi_newino = cpu_to_be32(newino); 308 agi->agi_newino = cpu_to_be32(newino);
309 /* 309 /*
310 * Insert records describing the new inode chunk into the btree. 310 * Insert records describing the new inode chunk into the btree.
311 */ 311 */
312 cur = xfs_btree_init_cursor(args.mp, tp, agbp, 312 cur = xfs_btree_init_cursor(args.mp, tp, agbp,
313 be32_to_cpu(agi->agi_seqno), 313 be32_to_cpu(agi->agi_seqno),
314 XFS_BTNUM_INO, (xfs_inode_t *)0, 0); 314 XFS_BTNUM_INO, (xfs_inode_t *)0, 0);
315 for (thisino = newino; 315 for (thisino = newino;
316 thisino < newino + newlen; 316 thisino < newino + newlen;
317 thisino += XFS_INODES_PER_CHUNK) { 317 thisino += XFS_INODES_PER_CHUNK) {
318 if ((error = xfs_inobt_lookup_eq(cur, thisino, 318 if ((error = xfs_inobt_lookup_eq(cur, thisino,
319 XFS_INODES_PER_CHUNK, XFS_INOBT_ALL_FREE, &i))) { 319 XFS_INODES_PER_CHUNK, XFS_INOBT_ALL_FREE, &i))) {
320 xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); 320 xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
321 return error; 321 return error;
322 } 322 }
323 ASSERT(i == 0); 323 ASSERT(i == 0);
324 if ((error = xfs_inobt_insert(cur, &i))) { 324 if ((error = xfs_inobt_insert(cur, &i))) {
325 xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); 325 xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
326 return error; 326 return error;
327 } 327 }
328 ASSERT(i == 1); 328 ASSERT(i == 1);
329 } 329 }
330 xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); 330 xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
331 /* 331 /*
332 * Log allocation group header fields 332 * Log allocation group header fields
333 */ 333 */
334 xfs_ialloc_log_agi(tp, agbp, 334 xfs_ialloc_log_agi(tp, agbp,
335 XFS_AGI_COUNT | XFS_AGI_FREECOUNT | XFS_AGI_NEWINO); 335 XFS_AGI_COUNT | XFS_AGI_FREECOUNT | XFS_AGI_NEWINO);
336 /* 336 /*
337 * Modify/log superblock values for inode count and inode free count. 337 * Modify/log superblock values for inode count and inode free count.
338 */ 338 */
339 xfs_trans_mod_sb(tp, XFS_TRANS_SB_ICOUNT, (long)newlen); 339 xfs_trans_mod_sb(tp, XFS_TRANS_SB_ICOUNT, (long)newlen);
340 xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, (long)newlen); 340 xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, (long)newlen);
341 *alloc = 1; 341 *alloc = 1;
342 return 0; 342 return 0;
343 } 343 }
344 344
345 STATIC __inline xfs_agnumber_t 345 STATIC __inline xfs_agnumber_t
346 xfs_ialloc_next_ag( 346 xfs_ialloc_next_ag(
347 xfs_mount_t *mp) 347 xfs_mount_t *mp)
348 { 348 {
349 xfs_agnumber_t agno; 349 xfs_agnumber_t agno;
350 350
351 spin_lock(&mp->m_agirotor_lock); 351 spin_lock(&mp->m_agirotor_lock);
352 agno = mp->m_agirotor; 352 agno = mp->m_agirotor;
353 if (++mp->m_agirotor == mp->m_maxagi) 353 if (++mp->m_agirotor == mp->m_maxagi)
354 mp->m_agirotor = 0; 354 mp->m_agirotor = 0;
355 spin_unlock(&mp->m_agirotor_lock); 355 spin_unlock(&mp->m_agirotor_lock);
356 356
357 return agno; 357 return agno;
358 } 358 }
359 359
360 /* 360 /*
361 * Select an allocation group to look for a free inode in, based on the parent 361 * Select an allocation group to look for a free inode in, based on the parent
362 * inode and then mode. Return the allocation group buffer. 362 * inode and then mode. Return the allocation group buffer.
363 */ 363 */
364 STATIC xfs_buf_t * /* allocation group buffer */ 364 STATIC xfs_buf_t * /* allocation group buffer */
365 xfs_ialloc_ag_select( 365 xfs_ialloc_ag_select(
366 xfs_trans_t *tp, /* transaction pointer */ 366 xfs_trans_t *tp, /* transaction pointer */
367 xfs_ino_t parent, /* parent directory inode number */ 367 xfs_ino_t parent, /* parent directory inode number */
368 mode_t mode, /* bits set to indicate file type */ 368 mode_t mode, /* bits set to indicate file type */
369 int okalloc) /* ok to allocate more space */ 369 int okalloc) /* ok to allocate more space */
370 { 370 {
371 xfs_buf_t *agbp; /* allocation group header buffer */ 371 xfs_buf_t *agbp; /* allocation group header buffer */
372 xfs_agnumber_t agcount; /* number of ag's in the filesystem */ 372 xfs_agnumber_t agcount; /* number of ag's in the filesystem */
373 xfs_agnumber_t agno; /* current ag number */ 373 xfs_agnumber_t agno; /* current ag number */
374 int flags; /* alloc buffer locking flags */ 374 int flags; /* alloc buffer locking flags */
375 xfs_extlen_t ineed; /* blocks needed for inode allocation */ 375 xfs_extlen_t ineed; /* blocks needed for inode allocation */
376 xfs_extlen_t longest = 0; /* longest extent available */ 376 xfs_extlen_t longest = 0; /* longest extent available */
377 xfs_mount_t *mp; /* mount point structure */ 377 xfs_mount_t *mp; /* mount point structure */
378 int needspace; /* file mode implies space allocated */ 378 int needspace; /* file mode implies space allocated */
379 xfs_perag_t *pag; /* per allocation group data */ 379 xfs_perag_t *pag; /* per allocation group data */
380 xfs_agnumber_t pagno; /* parent (starting) ag number */ 380 xfs_agnumber_t pagno; /* parent (starting) ag number */
381 381
382 /* 382 /*
383 * Files of these types need at least one block if length > 0 383 * Files of these types need at least one block if length > 0
384 * (and they won't fit in the inode, but that's hard to figure out). 384 * (and they won't fit in the inode, but that's hard to figure out).
385 */ 385 */
386 needspace = S_ISDIR(mode) || S_ISREG(mode) || S_ISLNK(mode); 386 needspace = S_ISDIR(mode) || S_ISREG(mode) || S_ISLNK(mode);
387 mp = tp->t_mountp; 387 mp = tp->t_mountp;
388 agcount = mp->m_maxagi; 388 agcount = mp->m_maxagi;
389 if (S_ISDIR(mode)) 389 if (S_ISDIR(mode))
390 pagno = xfs_ialloc_next_ag(mp); 390 pagno = xfs_ialloc_next_ag(mp);
391 else { 391 else {
392 pagno = XFS_INO_TO_AGNO(mp, parent); 392 pagno = XFS_INO_TO_AGNO(mp, parent);
393 if (pagno >= agcount) 393 if (pagno >= agcount)
394 pagno = 0; 394 pagno = 0;
395 } 395 }
396 ASSERT(pagno < agcount); 396 ASSERT(pagno < agcount);
397 /* 397 /*
398 * Loop through allocation groups, looking for one with a little 398 * Loop through allocation groups, looking for one with a little
399 * free space in it. Note we don't look for free inodes, exactly. 399 * free space in it. Note we don't look for free inodes, exactly.
400 * Instead, we include whether there is a need to allocate inodes 400 * Instead, we include whether there is a need to allocate inodes
401 * to mean that blocks must be allocated for them, 401 * to mean that blocks must be allocated for them,
402 * if none are currently free. 402 * if none are currently free.
403 */ 403 */
404 agno = pagno; 404 agno = pagno;
405 flags = XFS_ALLOC_FLAG_TRYLOCK; 405 flags = XFS_ALLOC_FLAG_TRYLOCK;
406 down_read(&mp->m_peraglock); 406 down_read(&mp->m_peraglock);
407 for (;;) { 407 for (;;) {
408 pag = &mp->m_perag[agno]; 408 pag = &mp->m_perag[agno];
409 if (!pag->pagi_init) { 409 if (!pag->pagi_init) {
410 if (xfs_ialloc_read_agi(mp, tp, agno, &agbp)) { 410 if (xfs_ialloc_read_agi(mp, tp, agno, &agbp)) {
411 agbp = NULL; 411 agbp = NULL;
412 goto nextag; 412 goto nextag;
413 } 413 }
414 } else 414 } else
415 agbp = NULL; 415 agbp = NULL;
416 416
417 if (!pag->pagi_inodeok) { 417 if (!pag->pagi_inodeok) {
418 xfs_ialloc_next_ag(mp); 418 xfs_ialloc_next_ag(mp);
419 goto unlock_nextag; 419 goto unlock_nextag;
420 } 420 }
421 421
422 /* 422 /*
423 * Is there enough free space for the file plus a block 423 * Is there enough free space for the file plus a block
424 * of inodes (if we need to allocate some)? 424 * of inodes (if we need to allocate some)?
425 */ 425 */
426 ineed = pag->pagi_freecount ? 0 : XFS_IALLOC_BLOCKS(mp); 426 ineed = pag->pagi_freecount ? 0 : XFS_IALLOC_BLOCKS(mp);
427 if (ineed && !pag->pagf_init) { 427 if (ineed && !pag->pagf_init) {
428 if (agbp == NULL && 428 if (agbp == NULL &&
429 xfs_ialloc_read_agi(mp, tp, agno, &agbp)) { 429 xfs_ialloc_read_agi(mp, tp, agno, &agbp)) {
430 agbp = NULL; 430 agbp = NULL;
431 goto nextag; 431 goto nextag;
432 } 432 }
433 (void)xfs_alloc_pagf_init(mp, tp, agno, flags); 433 (void)xfs_alloc_pagf_init(mp, tp, agno, flags);
434 } 434 }
435 if (!ineed || pag->pagf_init) { 435 if (!ineed || pag->pagf_init) {
436 if (ineed && !(longest = pag->pagf_longest)) 436 if (ineed && !(longest = pag->pagf_longest))
437 longest = pag->pagf_flcount > 0; 437 longest = pag->pagf_flcount > 0;
438 if (!ineed || 438 if (!ineed ||
439 (pag->pagf_freeblks >= needspace + ineed && 439 (pag->pagf_freeblks >= needspace + ineed &&
440 longest >= ineed && 440 longest >= ineed &&
441 okalloc)) { 441 okalloc)) {
442 if (agbp == NULL && 442 if (agbp == NULL &&
443 xfs_ialloc_read_agi(mp, tp, agno, &agbp)) { 443 xfs_ialloc_read_agi(mp, tp, agno, &agbp)) {
444 agbp = NULL; 444 agbp = NULL;
445 goto nextag; 445 goto nextag;
446 } 446 }
447 up_read(&mp->m_peraglock); 447 up_read(&mp->m_peraglock);
448 return agbp; 448 return agbp;
449 } 449 }
450 } 450 }
451 unlock_nextag: 451 unlock_nextag:
452 if (agbp) 452 if (agbp)
453 xfs_trans_brelse(tp, agbp); 453 xfs_trans_brelse(tp, agbp);
454 nextag: 454 nextag:
455 /* 455 /*
456 * No point in iterating over the rest, if we're shutting 456 * No point in iterating over the rest, if we're shutting
457 * down. 457 * down.
458 */ 458 */
459 if (XFS_FORCED_SHUTDOWN(mp)) { 459 if (XFS_FORCED_SHUTDOWN(mp)) {
460 up_read(&mp->m_peraglock); 460 up_read(&mp->m_peraglock);
461 return NULL; 461 return NULL;
462 } 462 }
463 agno++; 463 agno++;
464 if (agno >= agcount) 464 if (agno >= agcount)
465 agno = 0; 465 agno = 0;
466 if (agno == pagno) { 466 if (agno == pagno) {
467 if (flags == 0) { 467 if (flags == 0) {
468 up_read(&mp->m_peraglock); 468 up_read(&mp->m_peraglock);
469 return NULL; 469 return NULL;
470 } 470 }
471 flags = 0; 471 flags = 0;
472 } 472 }
473 } 473 }
474 } 474 }
475 475
476 /* 476 /*
477 * Visible inode allocation functions. 477 * Visible inode allocation functions.
478 */ 478 */
479 479
480 /* 480 /*
481 * Allocate an inode on disk. 481 * Allocate an inode on disk.
482 * Mode is used to tell whether the new inode will need space, and whether 482 * Mode is used to tell whether the new inode will need space, and whether
483 * it is a directory. 483 * it is a directory.
484 * 484 *
485 * The arguments IO_agbp and alloc_done are defined to work within 485 * The arguments IO_agbp and alloc_done are defined to work within
486 * the constraint of one allocation per transaction. 486 * the constraint of one allocation per transaction.
487 * xfs_dialloc() is designed to be called twice if it has to do an 487 * xfs_dialloc() is designed to be called twice if it has to do an
488 * allocation to make more free inodes. On the first call, 488 * allocation to make more free inodes. On the first call,
489 * IO_agbp should be set to NULL. If an inode is available, 489 * IO_agbp should be set to NULL. If an inode is available,
490 * i.e., xfs_dialloc() did not need to do an allocation, an inode 490 * i.e., xfs_dialloc() did not need to do an allocation, an inode
491 * number is returned. In this case, IO_agbp would be set to the 491 * number is returned. In this case, IO_agbp would be set to the
492 * current ag_buf and alloc_done set to false. 492 * current ag_buf and alloc_done set to false.
493 * If an allocation needed to be done, xfs_dialloc would return 493 * If an allocation needed to be done, xfs_dialloc would return
494 * the current ag_buf in IO_agbp and set alloc_done to true. 494 * the current ag_buf in IO_agbp and set alloc_done to true.
495 * The caller should then commit the current transaction, allocate a new 495 * The caller should then commit the current transaction, allocate a new
496 * transaction, and call xfs_dialloc() again, passing in the previous 496 * transaction, and call xfs_dialloc() again, passing in the previous
497 * value of IO_agbp. IO_agbp should be held across the transactions. 497 * value of IO_agbp. IO_agbp should be held across the transactions.
498 * Since the agbp is locked across the two calls, the second call is 498 * Since the agbp is locked across the two calls, the second call is
499 * guaranteed to have a free inode available. 499 * guaranteed to have a free inode available.
500 * 500 *
501 * Once we successfully pick an inode its number is returned and the 501 * Once we successfully pick an inode its number is returned and the
502 * on-disk data structures are updated. The inode itself is not read 502 * on-disk data structures are updated. The inode itself is not read
503 * in, since doing so would break ordering constraints with xfs_reclaim. 503 * in, since doing so would break ordering constraints with xfs_reclaim.
504 */ 504 */
505 int 505 int
506 xfs_dialloc( 506 xfs_dialloc(
507 xfs_trans_t *tp, /* transaction pointer */ 507 xfs_trans_t *tp, /* transaction pointer */
508 xfs_ino_t parent, /* parent inode (directory) */ 508 xfs_ino_t parent, /* parent inode (directory) */
509 mode_t mode, /* mode bits for new inode */ 509 mode_t mode, /* mode bits for new inode */
510 int okalloc, /* ok to allocate more space */ 510 int okalloc, /* ok to allocate more space */
511 xfs_buf_t **IO_agbp, /* in/out ag header's buffer */ 511 xfs_buf_t **IO_agbp, /* in/out ag header's buffer */
512 boolean_t *alloc_done, /* true if we needed to replenish 512 boolean_t *alloc_done, /* true if we needed to replenish
513 inode freelist */ 513 inode freelist */
514 xfs_ino_t *inop) /* inode number allocated */ 514 xfs_ino_t *inop) /* inode number allocated */
515 { 515 {
516 xfs_agnumber_t agcount; /* number of allocation groups */ 516 xfs_agnumber_t agcount; /* number of allocation groups */
517 xfs_buf_t *agbp; /* allocation group header's buffer */ 517 xfs_buf_t *agbp; /* allocation group header's buffer */
518 xfs_agnumber_t agno; /* allocation group number */ 518 xfs_agnumber_t agno; /* allocation group number */
519 xfs_agi_t *agi; /* allocation group header structure */ 519 xfs_agi_t *agi; /* allocation group header structure */
520 xfs_btree_cur_t *cur; /* inode allocation btree cursor */ 520 xfs_btree_cur_t *cur; /* inode allocation btree cursor */
521 int error; /* error return value */ 521 int error; /* error return value */
522 int i; /* result code */ 522 int i; /* result code */
523 int ialloced; /* inode allocation status */ 523 int ialloced; /* inode allocation status */
524 int noroom = 0; /* no space for inode blk allocation */ 524 int noroom = 0; /* no space for inode blk allocation */
525 xfs_ino_t ino; /* fs-relative inode to be returned */ 525 xfs_ino_t ino; /* fs-relative inode to be returned */
526 /* REFERENCED */ 526 /* REFERENCED */
527 int j; /* result code */ 527 int j; /* result code */
528 xfs_mount_t *mp; /* file system mount structure */ 528 xfs_mount_t *mp; /* file system mount structure */
529 int offset; /* index of inode in chunk */ 529 int offset; /* index of inode in chunk */
530 xfs_agino_t pagino; /* parent's a.g. relative inode # */ 530 xfs_agino_t pagino; /* parent's a.g. relative inode # */
531 xfs_agnumber_t pagno; /* parent's allocation group number */ 531 xfs_agnumber_t pagno; /* parent's allocation group number */
532 xfs_inobt_rec_incore_t rec; /* inode allocation record */ 532 xfs_inobt_rec_incore_t rec; /* inode allocation record */
533 xfs_agnumber_t tagno; /* testing allocation group number */ 533 xfs_agnumber_t tagno; /* testing allocation group number */
534 xfs_btree_cur_t *tcur; /* temp cursor */ 534 xfs_btree_cur_t *tcur; /* temp cursor */
535 xfs_inobt_rec_incore_t trec; /* temp inode allocation record */ 535 xfs_inobt_rec_incore_t trec; /* temp inode allocation record */
536 536
537 537
538 if (*IO_agbp == NULL) { 538 if (*IO_agbp == NULL) {
539 /* 539 /*
540 * We do not have an agbp, so select an initial allocation 540 * We do not have an agbp, so select an initial allocation
541 * group for inode allocation. 541 * group for inode allocation.
542 */ 542 */
543 agbp = xfs_ialloc_ag_select(tp, parent, mode, okalloc); 543 agbp = xfs_ialloc_ag_select(tp, parent, mode, okalloc);
544 /* 544 /*
545 * Couldn't find an allocation group satisfying the 545 * Couldn't find an allocation group satisfying the
546 * criteria, give up. 546 * criteria, give up.
547 */ 547 */
548 if (!agbp) { 548 if (!agbp) {
549 *inop = NULLFSINO; 549 *inop = NULLFSINO;
550 return 0; 550 return 0;
551 } 551 }
552 agi = XFS_BUF_TO_AGI(agbp); 552 agi = XFS_BUF_TO_AGI(agbp);
553 ASSERT(be32_to_cpu(agi->agi_magicnum) == XFS_AGI_MAGIC); 553 ASSERT(be32_to_cpu(agi->agi_magicnum) == XFS_AGI_MAGIC);
554 } else { 554 } else {
555 /* 555 /*
556 * Continue where we left off before. In this case, we 556 * Continue where we left off before. In this case, we
557 * know that the allocation group has free inodes. 557 * know that the allocation group has free inodes.
558 */ 558 */
559 agbp = *IO_agbp; 559 agbp = *IO_agbp;
560 agi = XFS_BUF_TO_AGI(agbp); 560 agi = XFS_BUF_TO_AGI(agbp);
561 ASSERT(be32_to_cpu(agi->agi_magicnum) == XFS_AGI_MAGIC); 561 ASSERT(be32_to_cpu(agi->agi_magicnum) == XFS_AGI_MAGIC);
562 ASSERT(be32_to_cpu(agi->agi_freecount) > 0); 562 ASSERT(be32_to_cpu(agi->agi_freecount) > 0);
563 } 563 }
564 mp = tp->t_mountp; 564 mp = tp->t_mountp;
565 agcount = mp->m_sb.sb_agcount; 565 agcount = mp->m_sb.sb_agcount;
566 agno = be32_to_cpu(agi->agi_seqno); 566 agno = be32_to_cpu(agi->agi_seqno);
567 tagno = agno; 567 tagno = agno;
568 pagno = XFS_INO_TO_AGNO(mp, parent); 568 pagno = XFS_INO_TO_AGNO(mp, parent);
569 pagino = XFS_INO_TO_AGINO(mp, parent); 569 pagino = XFS_INO_TO_AGINO(mp, parent);
570 570
571 /* 571 /*
572 * If we have already hit the ceiling of inode blocks then clear 572 * If we have already hit the ceiling of inode blocks then clear
573 * okalloc so we scan all available agi structures for a free 573 * okalloc so we scan all available agi structures for a free
574 * inode. 574 * inode.
575 */ 575 */
576 576
577 if (mp->m_maxicount && 577 if (mp->m_maxicount &&
578 mp->m_sb.sb_icount + XFS_IALLOC_INODES(mp) > mp->m_maxicount) { 578 mp->m_sb.sb_icount + XFS_IALLOC_INODES(mp) > mp->m_maxicount) {
579 noroom = 1; 579 noroom = 1;
580 okalloc = 0; 580 okalloc = 0;
581 } 581 }
582 582
583 /* 583 /*
584 * Loop until we find an allocation group that either has free inodes 584 * Loop until we find an allocation group that either has free inodes
585 * or in which we can allocate some inodes. Iterate through the 585 * or in which we can allocate some inodes. Iterate through the
586 * allocation groups upward, wrapping at the end. 586 * allocation groups upward, wrapping at the end.
587 */ 587 */
588 *alloc_done = B_FALSE; 588 *alloc_done = B_FALSE;
589 while (!agi->agi_freecount) { 589 while (!agi->agi_freecount) {
590 /* 590 /*
591 * Don't do anything if we're not supposed to allocate 591 * Don't do anything if we're not supposed to allocate
592 * any blocks, just go on to the next ag. 592 * any blocks, just go on to the next ag.
593 */ 593 */
594 if (okalloc) { 594 if (okalloc) {
595 /* 595 /*
596 * Try to allocate some new inodes in the allocation 596 * Try to allocate some new inodes in the allocation
597 * group. 597 * group.
598 */ 598 */
599 if ((error = xfs_ialloc_ag_alloc(tp, agbp, &ialloced))) { 599 if ((error = xfs_ialloc_ag_alloc(tp, agbp, &ialloced))) {
600 xfs_trans_brelse(tp, agbp); 600 xfs_trans_brelse(tp, agbp);
601 if (error == ENOSPC) { 601 if (error == ENOSPC) {
602 *inop = NULLFSINO; 602 *inop = NULLFSINO;
603 return 0; 603 return 0;
604 } else 604 } else
605 return error; 605 return error;
606 } 606 }
607 if (ialloced) { 607 if (ialloced) {
608 /* 608 /*
609 * We successfully allocated some inodes, return 609 * We successfully allocated some inodes, return
610 * the current context to the caller so that it 610 * the current context to the caller so that it
611 * can commit the current transaction and call 611 * can commit the current transaction and call
612 * us again where we left off. 612 * us again where we left off.
613 */ 613 */
614 ASSERT(be32_to_cpu(agi->agi_freecount) > 0); 614 ASSERT(be32_to_cpu(agi->agi_freecount) > 0);
615 *alloc_done = B_TRUE; 615 *alloc_done = B_TRUE;
616 *IO_agbp = agbp; 616 *IO_agbp = agbp;
617 *inop = NULLFSINO; 617 *inop = NULLFSINO;
618 return 0; 618 return 0;
619 } 619 }
620 } 620 }
621 /* 621 /*
622 * If it failed, give up on this ag. 622 * If it failed, give up on this ag.
623 */ 623 */
624 xfs_trans_brelse(tp, agbp); 624 xfs_trans_brelse(tp, agbp);
625 /* 625 /*
626 * Go on to the next ag: get its ag header. 626 * Go on to the next ag: get its ag header.
627 */ 627 */
628 nextag: 628 nextag:
629 if (++tagno == agcount) 629 if (++tagno == agcount)
630 tagno = 0; 630 tagno = 0;
631 if (tagno == agno) { 631 if (tagno == agno) {
632 *inop = NULLFSINO; 632 *inop = NULLFSINO;
633 return noroom ? ENOSPC : 0; 633 return noroom ? ENOSPC : 0;
634 } 634 }
635 down_read(&mp->m_peraglock); 635 down_read(&mp->m_peraglock);
636 if (mp->m_perag[tagno].pagi_inodeok == 0) { 636 if (mp->m_perag[tagno].pagi_inodeok == 0) {
637 up_read(&mp->m_peraglock); 637 up_read(&mp->m_peraglock);
638 goto nextag; 638 goto nextag;
639 } 639 }
640 error = xfs_ialloc_read_agi(mp, tp, tagno, &agbp); 640 error = xfs_ialloc_read_agi(mp, tp, tagno, &agbp);
641 up_read(&mp->m_peraglock); 641 up_read(&mp->m_peraglock);
642 if (error) 642 if (error)
643 goto nextag; 643 goto nextag;
644 agi = XFS_BUF_TO_AGI(agbp); 644 agi = XFS_BUF_TO_AGI(agbp);
645 ASSERT(be32_to_cpu(agi->agi_magicnum) == XFS_AGI_MAGIC); 645 ASSERT(be32_to_cpu(agi->agi_magicnum) == XFS_AGI_MAGIC);
646 } 646 }
647 /* 647 /*
648 * Here with an allocation group that has a free inode. 648 * Here with an allocation group that has a free inode.
649 * Reset agno since we may have chosen a new ag in the 649 * Reset agno since we may have chosen a new ag in the
650 * loop above. 650 * loop above.
651 */ 651 */
652 agno = tagno; 652 agno = tagno;
653 *IO_agbp = NULL; 653 *IO_agbp = NULL;
654 cur = xfs_btree_init_cursor(mp, tp, agbp, be32_to_cpu(agi->agi_seqno), 654 cur = xfs_btree_init_cursor(mp, tp, agbp, be32_to_cpu(agi->agi_seqno),
655 XFS_BTNUM_INO, (xfs_inode_t *)0, 0); 655 XFS_BTNUM_INO, (xfs_inode_t *)0, 0);
656 /* 656 /*
657 * If pagino is 0 (this is the root inode allocation) use newino. 657 * If pagino is 0 (this is the root inode allocation) use newino.
658 * This must work because we've just allocated some. 658 * This must work because we've just allocated some.
659 */ 659 */
660 if (!pagino) 660 if (!pagino)
661 pagino = be32_to_cpu(agi->agi_newino); 661 pagino = be32_to_cpu(agi->agi_newino);
662 #ifdef DEBUG 662 #ifdef DEBUG
663 if (cur->bc_nlevels == 1) { 663 if (cur->bc_nlevels == 1) {
664 int freecount = 0; 664 int freecount = 0;
665 665
666 if ((error = xfs_inobt_lookup_ge(cur, 0, 0, 0, &i))) 666 if ((error = xfs_inobt_lookup_ge(cur, 0, 0, 0, &i)))
667 goto error0; 667 goto error0;
668 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 668 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
669 do { 669 do {
670 if ((error = xfs_inobt_get_rec(cur, &rec.ir_startino, 670 if ((error = xfs_inobt_get_rec(cur, &rec.ir_startino,
671 &rec.ir_freecount, &rec.ir_free, &i))) 671 &rec.ir_freecount, &rec.ir_free, &i)))
672 goto error0; 672 goto error0;
673 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 673 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
674 freecount += rec.ir_freecount; 674 freecount += rec.ir_freecount;
675 if ((error = xfs_inobt_increment(cur, 0, &i))) 675 if ((error = xfs_inobt_increment(cur, 0, &i)))
676 goto error0; 676 goto error0;
677 } while (i == 1); 677 } while (i == 1);
678 678
679 ASSERT(freecount == be32_to_cpu(agi->agi_freecount) || 679 ASSERT(freecount == be32_to_cpu(agi->agi_freecount) ||
680 XFS_FORCED_SHUTDOWN(mp)); 680 XFS_FORCED_SHUTDOWN(mp));
681 } 681 }
682 #endif 682 #endif
683 /* 683 /*
684 * If in the same a.g. as the parent, try to get near the parent. 684 * If in the same a.g. as the parent, try to get near the parent.
685 */ 685 */
686 if (pagno == agno) { 686 if (pagno == agno) {
687 if ((error = xfs_inobt_lookup_le(cur, pagino, 0, 0, &i))) 687 if ((error = xfs_inobt_lookup_le(cur, pagino, 0, 0, &i)))
688 goto error0; 688 goto error0;
689 if (i != 0 && 689 if (i != 0 &&
690 (error = xfs_inobt_get_rec(cur, &rec.ir_startino, 690 (error = xfs_inobt_get_rec(cur, &rec.ir_startino,
691 &rec.ir_freecount, &rec.ir_free, &j)) == 0 && 691 &rec.ir_freecount, &rec.ir_free, &j)) == 0 &&
692 j == 1 && 692 j == 1 &&
693 rec.ir_freecount > 0) { 693 rec.ir_freecount > 0) {
694 /* 694 /*
695 * Found a free inode in the same chunk 695 * Found a free inode in the same chunk
696 * as parent, done. 696 * as parent, done.
697 */ 697 */
698 } 698 }
699 /* 699 /*
700 * In the same a.g. as parent, but parent's chunk is full. 700 * In the same a.g. as parent, but parent's chunk is full.
701 */ 701 */
702 else { 702 else {
703 int doneleft; /* done, to the left */ 703 int doneleft; /* done, to the left */
704 int doneright; /* done, to the right */ 704 int doneright; /* done, to the right */
705 705
706 if (error) 706 if (error)
707 goto error0; 707 goto error0;
708 ASSERT(i == 1); 708 ASSERT(i == 1);
709 ASSERT(j == 1); 709 ASSERT(j == 1);
710 /* 710 /*
711 * Duplicate the cursor, search left & right 711 * Duplicate the cursor, search left & right
712 * simultaneously. 712 * simultaneously.
713 */ 713 */
714 if ((error = xfs_btree_dup_cursor(cur, &tcur))) 714 if ((error = xfs_btree_dup_cursor(cur, &tcur)))
715 goto error0; 715 goto error0;
716 /* 716 /*
717 * Search left with tcur, back up 1 record. 717 * Search left with tcur, back up 1 record.
718 */ 718 */
719 if ((error = xfs_inobt_decrement(tcur, 0, &i))) 719 if ((error = xfs_inobt_decrement(tcur, 0, &i)))
720 goto error1; 720 goto error1;
721 doneleft = !i; 721 doneleft = !i;
722 if (!doneleft) { 722 if (!doneleft) {
723 if ((error = xfs_inobt_get_rec(tcur, 723 if ((error = xfs_inobt_get_rec(tcur,
724 &trec.ir_startino, 724 &trec.ir_startino,
725 &trec.ir_freecount, 725 &trec.ir_freecount,
726 &trec.ir_free, &i))) 726 &trec.ir_free, &i)))
727 goto error1; 727 goto error1;
728 XFS_WANT_CORRUPTED_GOTO(i == 1, error1); 728 XFS_WANT_CORRUPTED_GOTO(i == 1, error1);
729 } 729 }
730 /* 730 /*
731 * Search right with cur, go forward 1 record. 731 * Search right with cur, go forward 1 record.
732 */ 732 */
733 if ((error = xfs_inobt_increment(cur, 0, &i))) 733 if ((error = xfs_inobt_increment(cur, 0, &i)))
734 goto error1; 734 goto error1;
735 doneright = !i; 735 doneright = !i;
736 if (!doneright) { 736 if (!doneright) {
737 if ((error = xfs_inobt_get_rec(cur, 737 if ((error = xfs_inobt_get_rec(cur,
738 &rec.ir_startino, 738 &rec.ir_startino,
739 &rec.ir_freecount, 739 &rec.ir_freecount,
740 &rec.ir_free, &i))) 740 &rec.ir_free, &i)))
741 goto error1; 741 goto error1;
742 XFS_WANT_CORRUPTED_GOTO(i == 1, error1); 742 XFS_WANT_CORRUPTED_GOTO(i == 1, error1);
743 } 743 }
744 /* 744 /*
745 * Loop until we find the closest inode chunk 745 * Loop until we find the closest inode chunk
746 * with a free one. 746 * with a free one.
747 */ 747 */
748 while (!doneleft || !doneright) { 748 while (!doneleft || !doneright) {
749 int useleft; /* using left inode 749 int useleft; /* using left inode
750 chunk this time */ 750 chunk this time */
751 751
752 /* 752 /*
753 * Figure out which block is closer, 753 * Figure out which block is closer,
754 * if both are valid. 754 * if both are valid.
755 */ 755 */
756 if (!doneleft && !doneright) 756 if (!doneleft && !doneright)
757 useleft = 757 useleft =
758 pagino - 758 pagino -
759 (trec.ir_startino + 759 (trec.ir_startino +
760 XFS_INODES_PER_CHUNK - 1) < 760 XFS_INODES_PER_CHUNK - 1) <
761 rec.ir_startino - pagino; 761 rec.ir_startino - pagino;
762 else 762 else
763 useleft = !doneleft; 763 useleft = !doneleft;
764 /* 764 /*
765 * If checking the left, does it have 765 * If checking the left, does it have
766 * free inodes? 766 * free inodes?
767 */ 767 */
768 if (useleft && trec.ir_freecount) { 768 if (useleft && trec.ir_freecount) {
769 /* 769 /*
770 * Yes, set it up as the chunk to use. 770 * Yes, set it up as the chunk to use.
771 */ 771 */
772 rec = trec; 772 rec = trec;
773 xfs_btree_del_cursor(cur, 773 xfs_btree_del_cursor(cur,
774 XFS_BTREE_NOERROR); 774 XFS_BTREE_NOERROR);
775 cur = tcur; 775 cur = tcur;
776 break; 776 break;
777 } 777 }
778 /* 778 /*
779 * If checking the right, does it have 779 * If checking the right, does it have
780 * free inodes? 780 * free inodes?
781 */ 781 */
782 if (!useleft && rec.ir_freecount) { 782 if (!useleft && rec.ir_freecount) {
783 /* 783 /*
784 * Yes, it's already set up. 784 * Yes, it's already set up.
785 */ 785 */
786 xfs_btree_del_cursor(tcur, 786 xfs_btree_del_cursor(tcur,
787 XFS_BTREE_NOERROR); 787 XFS_BTREE_NOERROR);
788 break; 788 break;
789 } 789 }
790 /* 790 /*
791 * If used the left, get another one 791 * If used the left, get another one
792 * further left. 792 * further left.
793 */ 793 */
794 if (useleft) { 794 if (useleft) {
795 if ((error = xfs_inobt_decrement(tcur, 0, 795 if ((error = xfs_inobt_decrement(tcur, 0,
796 &i))) 796 &i)))
797 goto error1; 797 goto error1;
798 doneleft = !i; 798 doneleft = !i;
799 if (!doneleft) { 799 if (!doneleft) {
800 if ((error = xfs_inobt_get_rec( 800 if ((error = xfs_inobt_get_rec(
801 tcur, 801 tcur,
802 &trec.ir_startino, 802 &trec.ir_startino,
803 &trec.ir_freecount, 803 &trec.ir_freecount,
804 &trec.ir_free, &i))) 804 &trec.ir_free, &i)))
805 goto error1; 805 goto error1;
806 XFS_WANT_CORRUPTED_GOTO(i == 1, 806 XFS_WANT_CORRUPTED_GOTO(i == 1,
807 error1); 807 error1);
808 } 808 }
809 } 809 }
810 /* 810 /*
811 * If used the right, get another one 811 * If used the right, get another one
812 * further right. 812 * further right.
813 */ 813 */
814 else { 814 else {
815 if ((error = xfs_inobt_increment(cur, 0, 815 if ((error = xfs_inobt_increment(cur, 0,
816 &i))) 816 &i)))
817 goto error1; 817 goto error1;
818 doneright = !i; 818 doneright = !i;
819 if (!doneright) { 819 if (!doneright) {
820 if ((error = xfs_inobt_get_rec( 820 if ((error = xfs_inobt_get_rec(
821 cur, 821 cur,
822 &rec.ir_startino, 822 &rec.ir_startino,
823 &rec.ir_freecount, 823 &rec.ir_freecount,
824 &rec.ir_free, &i))) 824 &rec.ir_free, &i)))
825 goto error1; 825 goto error1;
826 XFS_WANT_CORRUPTED_GOTO(i == 1, 826 XFS_WANT_CORRUPTED_GOTO(i == 1,
827 error1); 827 error1);
828 } 828 }
829 } 829 }
830 } 830 }
831 ASSERT(!doneleft || !doneright); 831 ASSERT(!doneleft || !doneright);
832 } 832 }
833 } 833 }
834 /* 834 /*
835 * In a different a.g. from the parent. 835 * In a different a.g. from the parent.
836 * See if the most recently allocated block has any free. 836 * See if the most recently allocated block has any free.
837 */ 837 */
838 else if (be32_to_cpu(agi->agi_newino) != NULLAGINO) { 838 else if (be32_to_cpu(agi->agi_newino) != NULLAGINO) {
839 if ((error = xfs_inobt_lookup_eq(cur, 839 if ((error = xfs_inobt_lookup_eq(cur,
840 be32_to_cpu(agi->agi_newino), 0, 0, &i))) 840 be32_to_cpu(agi->agi_newino), 0, 0, &i)))
841 goto error0; 841 goto error0;
842 if (i == 1 && 842 if (i == 1 &&
843 (error = xfs_inobt_get_rec(cur, &rec.ir_startino, 843 (error = xfs_inobt_get_rec(cur, &rec.ir_startino,
844 &rec.ir_freecount, &rec.ir_free, &j)) == 0 && 844 &rec.ir_freecount, &rec.ir_free, &j)) == 0 &&
845 j == 1 && 845 j == 1 &&
846 rec.ir_freecount > 0) { 846 rec.ir_freecount > 0) {
847 /* 847 /*
848 * The last chunk allocated in the group still has 848 * The last chunk allocated in the group still has
849 * a free inode. 849 * a free inode.
850 */ 850 */
851 } 851 }
852 /* 852 /*
853 * None left in the last group, search the whole a.g. 853 * None left in the last group, search the whole a.g.
854 */ 854 */
855 else { 855 else {
856 if (error) 856 if (error)
857 goto error0; 857 goto error0;
858 if ((error = xfs_inobt_lookup_ge(cur, 0, 0, 0, &i))) 858 if ((error = xfs_inobt_lookup_ge(cur, 0, 0, 0, &i)))
859 goto error0; 859 goto error0;
860 ASSERT(i == 1); 860 ASSERT(i == 1);
861 for (;;) { 861 for (;;) {
862 if ((error = xfs_inobt_get_rec(cur, 862 if ((error = xfs_inobt_get_rec(cur,
863 &rec.ir_startino, 863 &rec.ir_startino,
864 &rec.ir_freecount, &rec.ir_free, 864 &rec.ir_freecount, &rec.ir_free,
865 &i))) 865 &i)))
866 goto error0; 866 goto error0;
867 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 867 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
868 if (rec.ir_freecount > 0) 868 if (rec.ir_freecount > 0)
869 break; 869 break;
870 if ((error = xfs_inobt_increment(cur, 0, &i))) 870 if ((error = xfs_inobt_increment(cur, 0, &i)))
871 goto error0; 871 goto error0;
872 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 872 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
873 } 873 }
874 } 874 }
875 } 875 }
876 offset = XFS_IALLOC_FIND_FREE(&rec.ir_free); 876 offset = XFS_IALLOC_FIND_FREE(&rec.ir_free);
877 ASSERT(offset >= 0); 877 ASSERT(offset >= 0);
878 ASSERT(offset < XFS_INODES_PER_CHUNK); 878 ASSERT(offset < XFS_INODES_PER_CHUNK);
879 ASSERT((XFS_AGINO_TO_OFFSET(mp, rec.ir_startino) % 879 ASSERT((XFS_AGINO_TO_OFFSET(mp, rec.ir_startino) %
880 XFS_INODES_PER_CHUNK) == 0); 880 XFS_INODES_PER_CHUNK) == 0);
881 ino = XFS_AGINO_TO_INO(mp, agno, rec.ir_startino + offset); 881 ino = XFS_AGINO_TO_INO(mp, agno, rec.ir_startino + offset);
882 XFS_INOBT_CLR_FREE(&rec, offset); 882 XFS_INOBT_CLR_FREE(&rec, offset);
883 rec.ir_freecount--; 883 rec.ir_freecount--;
884 if ((error = xfs_inobt_update(cur, rec.ir_startino, rec.ir_freecount, 884 if ((error = xfs_inobt_update(cur, rec.ir_startino, rec.ir_freecount,
885 rec.ir_free))) 885 rec.ir_free)))
886 goto error0; 886 goto error0;
887 be32_add(&agi->agi_freecount, -1); 887 be32_add(&agi->agi_freecount, -1);
888 xfs_ialloc_log_agi(tp, agbp, XFS_AGI_FREECOUNT); 888 xfs_ialloc_log_agi(tp, agbp, XFS_AGI_FREECOUNT);
889 down_read(&mp->m_peraglock); 889 down_read(&mp->m_peraglock);
890 mp->m_perag[tagno].pagi_freecount--; 890 mp->m_perag[tagno].pagi_freecount--;
891 up_read(&mp->m_peraglock); 891 up_read(&mp->m_peraglock);
892 #ifdef DEBUG 892 #ifdef DEBUG
893 if (cur->bc_nlevels == 1) { 893 if (cur->bc_nlevels == 1) {
894 int freecount = 0; 894 int freecount = 0;
895 895
896 if ((error = xfs_inobt_lookup_ge(cur, 0, 0, 0, &i))) 896 if ((error = xfs_inobt_lookup_ge(cur, 0, 0, 0, &i)))
897 goto error0; 897 goto error0;
898 do { 898 do {
899 if ((error = xfs_inobt_get_rec(cur, &rec.ir_startino, 899 if ((error = xfs_inobt_get_rec(cur, &rec.ir_startino,
900 &rec.ir_freecount, &rec.ir_free, &i))) 900 &rec.ir_freecount, &rec.ir_free, &i)))
901 goto error0; 901 goto error0;
902 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 902 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
903 freecount += rec.ir_freecount; 903 freecount += rec.ir_freecount;
904 if ((error = xfs_inobt_increment(cur, 0, &i))) 904 if ((error = xfs_inobt_increment(cur, 0, &i)))
905 goto error0; 905 goto error0;
906 } while (i == 1); 906 } while (i == 1);
907 ASSERT(freecount == be32_to_cpu(agi->agi_freecount) || 907 ASSERT(freecount == be32_to_cpu(agi->agi_freecount) ||
908 XFS_FORCED_SHUTDOWN(mp)); 908 XFS_FORCED_SHUTDOWN(mp));
909 } 909 }
910 #endif 910 #endif
911 xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); 911 xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
912 xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, -1); 912 xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, -1);
913 *inop = ino; 913 *inop = ino;
914 return 0; 914 return 0;
915 error1: 915 error1:
916 xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR); 916 xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
917 error0: 917 error0:
918 xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); 918 xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
919 return error; 919 return error;
920 } 920 }
921 921
922 /* 922 /*
923 * Free disk inode. Carefully avoids touching the incore inode, all 923 * Free disk inode. Carefully avoids touching the incore inode, all
924 * manipulations incore are the caller's responsibility. 924 * manipulations incore are the caller's responsibility.
925 * The on-disk inode is not changed by this operation, only the 925 * The on-disk inode is not changed by this operation, only the
926 * btree (free inode mask) is changed. 926 * btree (free inode mask) is changed.
927 */ 927 */
928 int 928 int
929 xfs_difree( 929 xfs_difree(
930 xfs_trans_t *tp, /* transaction pointer */ 930 xfs_trans_t *tp, /* transaction pointer */
931 xfs_ino_t inode, /* inode to be freed */ 931 xfs_ino_t inode, /* inode to be freed */
932 xfs_bmap_free_t *flist, /* extents to free */ 932 xfs_bmap_free_t *flist, /* extents to free */
933 int *delete, /* set if inode cluster was deleted */ 933 int *delete, /* set if inode cluster was deleted */
934 xfs_ino_t *first_ino) /* first inode in deleted cluster */ 934 xfs_ino_t *first_ino) /* first inode in deleted cluster */
935 { 935 {
936 /* REFERENCED */ 936 /* REFERENCED */
937 xfs_agblock_t agbno; /* block number containing inode */ 937 xfs_agblock_t agbno; /* block number containing inode */
938 xfs_buf_t *agbp; /* buffer containing allocation group header */ 938 xfs_buf_t *agbp; /* buffer containing allocation group header */
939 xfs_agino_t agino; /* inode number relative to allocation group */ 939 xfs_agino_t agino; /* inode number relative to allocation group */
940 xfs_agnumber_t agno; /* allocation group number */ 940 xfs_agnumber_t agno; /* allocation group number */
941 xfs_agi_t *agi; /* allocation group header */ 941 xfs_agi_t *agi; /* allocation group header */
942 xfs_btree_cur_t *cur; /* inode btree cursor */ 942 xfs_btree_cur_t *cur; /* inode btree cursor */
943 int error; /* error return value */ 943 int error; /* error return value */
944 int i; /* result code */ 944 int i; /* result code */
945 int ilen; /* inodes in an inode cluster */ 945 int ilen; /* inodes in an inode cluster */
946 xfs_mount_t *mp; /* mount structure for filesystem */ 946 xfs_mount_t *mp; /* mount structure for filesystem */
947 int off; /* offset of inode in inode chunk */ 947 int off; /* offset of inode in inode chunk */
948 xfs_inobt_rec_incore_t rec; /* btree record */ 948 xfs_inobt_rec_incore_t rec; /* btree record */
949 949
950 mp = tp->t_mountp; 950 mp = tp->t_mountp;
951 951
952 /* 952 /*
953 * Break up inode number into its components. 953 * Break up inode number into its components.
954 */ 954 */
955 agno = XFS_INO_TO_AGNO(mp, inode); 955 agno = XFS_INO_TO_AGNO(mp, inode);
956 if (agno >= mp->m_sb.sb_agcount) { 956 if (agno >= mp->m_sb.sb_agcount) {
957 cmn_err(CE_WARN, 957 cmn_err(CE_WARN,
958 "xfs_difree: agno >= mp->m_sb.sb_agcount (%d >= %d) on %s. Returning EINVAL.", 958 "xfs_difree: agno >= mp->m_sb.sb_agcount (%d >= %d) on %s. Returning EINVAL.",
959 agno, mp->m_sb.sb_agcount, mp->m_fsname); 959 agno, mp->m_sb.sb_agcount, mp->m_fsname);
960 ASSERT(0); 960 ASSERT(0);
961 return XFS_ERROR(EINVAL); 961 return XFS_ERROR(EINVAL);
962 } 962 }
963 agino = XFS_INO_TO_AGINO(mp, inode); 963 agino = XFS_INO_TO_AGINO(mp, inode);
964 if (inode != XFS_AGINO_TO_INO(mp, agno, agino)) { 964 if (inode != XFS_AGINO_TO_INO(mp, agno, agino)) {
965 cmn_err(CE_WARN, 965 cmn_err(CE_WARN,
966 "xfs_difree: inode != XFS_AGINO_TO_INO() " 966 "xfs_difree: inode != XFS_AGINO_TO_INO() "
967 "(%llu != %llu) on %s. Returning EINVAL.", 967 "(%llu != %llu) on %s. Returning EINVAL.",
968 (unsigned long long)inode, 968 (unsigned long long)inode,
969 (unsigned long long)XFS_AGINO_TO_INO(mp, agno, agino), 969 (unsigned long long)XFS_AGINO_TO_INO(mp, agno, agino),
970 mp->m_fsname); 970 mp->m_fsname);
971 ASSERT(0); 971 ASSERT(0);
972 return XFS_ERROR(EINVAL); 972 return XFS_ERROR(EINVAL);
973 } 973 }
974 agbno = XFS_AGINO_TO_AGBNO(mp, agino); 974 agbno = XFS_AGINO_TO_AGBNO(mp, agino);
975 if (agbno >= mp->m_sb.sb_agblocks) { 975 if (agbno >= mp->m_sb.sb_agblocks) {
976 cmn_err(CE_WARN, 976 cmn_err(CE_WARN,
977 "xfs_difree: agbno >= mp->m_sb.sb_agblocks (%d >= %d) on %s. Returning EINVAL.", 977 "xfs_difree: agbno >= mp->m_sb.sb_agblocks (%d >= %d) on %s. Returning EINVAL.",
978 agbno, mp->m_sb.sb_agblocks, mp->m_fsname); 978 agbno, mp->m_sb.sb_agblocks, mp->m_fsname);
979 ASSERT(0); 979 ASSERT(0);
980 return XFS_ERROR(EINVAL); 980 return XFS_ERROR(EINVAL);
981 } 981 }
982 /* 982 /*
983 * Get the allocation group header. 983 * Get the allocation group header.
984 */ 984 */
985 down_read(&mp->m_peraglock); 985 down_read(&mp->m_peraglock);
986 error = xfs_ialloc_read_agi(mp, tp, agno, &agbp); 986 error = xfs_ialloc_read_agi(mp, tp, agno, &agbp);
987 up_read(&mp->m_peraglock); 987 up_read(&mp->m_peraglock);
988 if (error) { 988 if (error) {
989 cmn_err(CE_WARN, 989 cmn_err(CE_WARN,
990 "xfs_difree: xfs_ialloc_read_agi() returned an error %d on %s. Returning error.", 990 "xfs_difree: xfs_ialloc_read_agi() returned an error %d on %s. Returning error.",
991 error, mp->m_fsname); 991 error, mp->m_fsname);
992 return error; 992 return error;
993 } 993 }
994 agi = XFS_BUF_TO_AGI(agbp); 994 agi = XFS_BUF_TO_AGI(agbp);
995 ASSERT(be32_to_cpu(agi->agi_magicnum) == XFS_AGI_MAGIC); 995 ASSERT(be32_to_cpu(agi->agi_magicnum) == XFS_AGI_MAGIC);
996 ASSERT(agbno < be32_to_cpu(agi->agi_length)); 996 ASSERT(agbno < be32_to_cpu(agi->agi_length));
997 /* 997 /*
998 * Initialize the cursor. 998 * Initialize the cursor.
999 */ 999 */
1000 cur = xfs_btree_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_INO, 1000 cur = xfs_btree_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_INO,
1001 (xfs_inode_t *)0, 0); 1001 (xfs_inode_t *)0, 0);
1002 #ifdef DEBUG 1002 #ifdef DEBUG
1003 if (cur->bc_nlevels == 1) { 1003 if (cur->bc_nlevels == 1) {
1004 int freecount = 0; 1004 int freecount = 0;
1005 1005
1006 if ((error = xfs_inobt_lookup_ge(cur, 0, 0, 0, &i))) 1006 if ((error = xfs_inobt_lookup_ge(cur, 0, 0, 0, &i)))
1007 goto error0; 1007 goto error0;
1008 do { 1008 do {
1009 if ((error = xfs_inobt_get_rec(cur, &rec.ir_startino, 1009 if ((error = xfs_inobt_get_rec(cur, &rec.ir_startino,
1010 &rec.ir_freecount, &rec.ir_free, &i))) 1010 &rec.ir_freecount, &rec.ir_free, &i)))
1011 goto error0; 1011 goto error0;
1012 if (i) { 1012 if (i) {
1013 freecount += rec.ir_freecount; 1013 freecount += rec.ir_freecount;
1014 if ((error = xfs_inobt_increment(cur, 0, &i))) 1014 if ((error = xfs_inobt_increment(cur, 0, &i)))
1015 goto error0; 1015 goto error0;
1016 } 1016 }
1017 } while (i == 1); 1017 } while (i == 1);
1018 ASSERT(freecount == be32_to_cpu(agi->agi_freecount) || 1018 ASSERT(freecount == be32_to_cpu(agi->agi_freecount) ||
1019 XFS_FORCED_SHUTDOWN(mp)); 1019 XFS_FORCED_SHUTDOWN(mp));
1020 } 1020 }
1021 #endif 1021 #endif
1022 /* 1022 /*
1023 * Look for the entry describing this inode. 1023 * Look for the entry describing this inode.
1024 */ 1024 */
1025 if ((error = xfs_inobt_lookup_le(cur, agino, 0, 0, &i))) { 1025 if ((error = xfs_inobt_lookup_le(cur, agino, 0, 0, &i))) {
1026 cmn_err(CE_WARN, 1026 cmn_err(CE_WARN,
1027 "xfs_difree: xfs_inobt_lookup_le returned() an error %d on %s. Returning error.", 1027 "xfs_difree: xfs_inobt_lookup_le returned() an error %d on %s. Returning error.",
1028 error, mp->m_fsname); 1028 error, mp->m_fsname);
1029 goto error0; 1029 goto error0;
1030 } 1030 }
1031 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1031 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1032 if ((error = xfs_inobt_get_rec(cur, &rec.ir_startino, &rec.ir_freecount, 1032 if ((error = xfs_inobt_get_rec(cur, &rec.ir_startino, &rec.ir_freecount,
1033 &rec.ir_free, &i))) { 1033 &rec.ir_free, &i))) {
1034 cmn_err(CE_WARN, 1034 cmn_err(CE_WARN,
1035 "xfs_difree: xfs_inobt_get_rec() returned an error %d on %s. Returning error.", 1035 "xfs_difree: xfs_inobt_get_rec() returned an error %d on %s. Returning error.",
1036 error, mp->m_fsname); 1036 error, mp->m_fsname);
1037 goto error0; 1037 goto error0;
1038 } 1038 }
1039 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1039 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1040 /* 1040 /*
1041 * Get the offset in the inode chunk. 1041 * Get the offset in the inode chunk.
1042 */ 1042 */
1043 off = agino - rec.ir_startino; 1043 off = agino - rec.ir_startino;
1044 ASSERT(off >= 0 && off < XFS_INODES_PER_CHUNK); 1044 ASSERT(off >= 0 && off < XFS_INODES_PER_CHUNK);
1045 ASSERT(!XFS_INOBT_IS_FREE(&rec, off)); 1045 ASSERT(!XFS_INOBT_IS_FREE(&rec, off));
1046 /* 1046 /*
1047 * Mark the inode free & increment the count. 1047 * Mark the inode free & increment the count.
1048 */ 1048 */
1049 XFS_INOBT_SET_FREE(&rec, off); 1049 XFS_INOBT_SET_FREE(&rec, off);
1050 rec.ir_freecount++; 1050 rec.ir_freecount++;
1051 1051
1052 /* 1052 /*
1053 * When an inode cluster is free, it becomes eligible for removal 1053 * When an inode cluster is free, it becomes eligible for removal
1054 */ 1054 */
1055 if ((mp->m_flags & XFS_MOUNT_IDELETE) && 1055 if ((mp->m_flags & XFS_MOUNT_IDELETE) &&
1056 (rec.ir_freecount == XFS_IALLOC_INODES(mp))) { 1056 (rec.ir_freecount == XFS_IALLOC_INODES(mp))) {
1057 1057
1058 *delete = 1; 1058 *delete = 1;
1059 *first_ino = XFS_AGINO_TO_INO(mp, agno, rec.ir_startino); 1059 *first_ino = XFS_AGINO_TO_INO(mp, agno, rec.ir_startino);
1060 1060
1061 /* 1061 /*
1062 * Remove the inode cluster from the AGI B+Tree, adjust the 1062 * Remove the inode cluster from the AGI B+Tree, adjust the
1063 * AGI and Superblock inode counts, and mark the disk space 1063 * AGI and Superblock inode counts, and mark the disk space
1064 * to be freed when the transaction is committed. 1064 * to be freed when the transaction is committed.
1065 */ 1065 */
1066 ilen = XFS_IALLOC_INODES(mp); 1066 ilen = XFS_IALLOC_INODES(mp);
1067 be32_add(&agi->agi_count, -ilen); 1067 be32_add(&agi->agi_count, -ilen);
1068 be32_add(&agi->agi_freecount, -(ilen - 1)); 1068 be32_add(&agi->agi_freecount, -(ilen - 1));
1069 xfs_ialloc_log_agi(tp, agbp, XFS_AGI_COUNT | XFS_AGI_FREECOUNT); 1069 xfs_ialloc_log_agi(tp, agbp, XFS_AGI_COUNT | XFS_AGI_FREECOUNT);
1070 down_read(&mp->m_peraglock); 1070 down_read(&mp->m_peraglock);
1071 mp->m_perag[agno].pagi_freecount -= ilen - 1; 1071 mp->m_perag[agno].pagi_freecount -= ilen - 1;
1072 up_read(&mp->m_peraglock); 1072 up_read(&mp->m_peraglock);
1073 xfs_trans_mod_sb(tp, XFS_TRANS_SB_ICOUNT, -ilen); 1073 xfs_trans_mod_sb(tp, XFS_TRANS_SB_ICOUNT, -ilen);
1074 xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, -(ilen - 1)); 1074 xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, -(ilen - 1));
1075 1075
1076 if ((error = xfs_inobt_delete(cur, &i))) { 1076 if ((error = xfs_inobt_delete(cur, &i))) {
1077 cmn_err(CE_WARN, "xfs_difree: xfs_inobt_delete returned an error %d on %s.\n", 1077 cmn_err(CE_WARN, "xfs_difree: xfs_inobt_delete returned an error %d on %s.\n",
1078 error, mp->m_fsname); 1078 error, mp->m_fsname);
1079 goto error0; 1079 goto error0;
1080 } 1080 }
1081 1081
1082 xfs_bmap_add_free(XFS_AGB_TO_FSB(mp, 1082 xfs_bmap_add_free(XFS_AGB_TO_FSB(mp,
1083 agno, XFS_INO_TO_AGBNO(mp,rec.ir_startino)), 1083 agno, XFS_INO_TO_AGBNO(mp,rec.ir_startino)),
1084 XFS_IALLOC_BLOCKS(mp), flist, mp); 1084 XFS_IALLOC_BLOCKS(mp), flist, mp);
1085 } else { 1085 } else {
1086 *delete = 0; 1086 *delete = 0;
1087 1087
1088 if ((error = xfs_inobt_update(cur, rec.ir_startino, rec.ir_freecount, rec.ir_free))) { 1088 if ((error = xfs_inobt_update(cur, rec.ir_startino, rec.ir_freecount, rec.ir_free))) {
1089 cmn_err(CE_WARN, 1089 cmn_err(CE_WARN,
1090 "xfs_difree: xfs_inobt_update() returned an error %d on %s. Returning error.", 1090 "xfs_difree: xfs_inobt_update() returned an error %d on %s. Returning error.",
1091 error, mp->m_fsname); 1091 error, mp->m_fsname);
1092 goto error0; 1092 goto error0;
1093 } 1093 }
1094 /* 1094 /*
1095 * Change the inode free counts and log the ag/sb changes. 1095 * Change the inode free counts and log the ag/sb changes.
1096 */ 1096 */
1097 be32_add(&agi->agi_freecount, 1); 1097 be32_add(&agi->agi_freecount, 1);
1098 xfs_ialloc_log_agi(tp, agbp, XFS_AGI_FREECOUNT); 1098 xfs_ialloc_log_agi(tp, agbp, XFS_AGI_FREECOUNT);
1099 down_read(&mp->m_peraglock); 1099 down_read(&mp->m_peraglock);
1100 mp->m_perag[agno].pagi_freecount++; 1100 mp->m_perag[agno].pagi_freecount++;
1101 up_read(&mp->m_peraglock); 1101 up_read(&mp->m_peraglock);
1102 xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, 1); 1102 xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, 1);
1103 } 1103 }
1104 1104
1105 #ifdef DEBUG 1105 #ifdef DEBUG
1106 if (cur->bc_nlevels == 1) { 1106 if (cur->bc_nlevels == 1) {
1107 int freecount = 0; 1107 int freecount = 0;
1108 1108
1109 if ((error = xfs_inobt_lookup_ge(cur, 0, 0, 0, &i))) 1109 if ((error = xfs_inobt_lookup_ge(cur, 0, 0, 0, &i)))
1110 goto error0; 1110 goto error0;
1111 do { 1111 do {
1112 if ((error = xfs_inobt_get_rec(cur, 1112 if ((error = xfs_inobt_get_rec(cur,
1113 &rec.ir_startino, 1113 &rec.ir_startino,
1114 &rec.ir_freecount, 1114 &rec.ir_freecount,
1115 &rec.ir_free, &i))) 1115 &rec.ir_free, &i)))
1116 goto error0; 1116 goto error0;
1117 if (i) { 1117 if (i) {
1118 freecount += rec.ir_freecount; 1118 freecount += rec.ir_freecount;
1119 if ((error = xfs_inobt_increment(cur, 0, &i))) 1119 if ((error = xfs_inobt_increment(cur, 0, &i)))
1120 goto error0; 1120 goto error0;
1121 } 1121 }
1122 } while (i == 1); 1122 } while (i == 1);
1123 ASSERT(freecount == be32_to_cpu(agi->agi_freecount) || 1123 ASSERT(freecount == be32_to_cpu(agi->agi_freecount) ||
1124 XFS_FORCED_SHUTDOWN(mp)); 1124 XFS_FORCED_SHUTDOWN(mp));
1125 } 1125 }
1126 #endif 1126 #endif
1127 xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); 1127 xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
1128 return 0; 1128 return 0;
1129 1129
1130 error0: 1130 error0:
1131 xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); 1131 xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
1132 return error; 1132 return error;
1133 } 1133 }
1134 1134
1135 /* 1135 /*
1136 * Return the location of the inode in bno/off, for mapping it into a buffer. 1136 * Return the location of the inode in bno/off, for mapping it into a buffer.
1137 */ 1137 */
1138 /*ARGSUSED*/ 1138 /*ARGSUSED*/
1139 int 1139 int
1140 xfs_dilocate( 1140 xfs_dilocate(
1141 xfs_mount_t *mp, /* file system mount structure */ 1141 xfs_mount_t *mp, /* file system mount structure */
1142 xfs_trans_t *tp, /* transaction pointer */ 1142 xfs_trans_t *tp, /* transaction pointer */
1143 xfs_ino_t ino, /* inode to locate */ 1143 xfs_ino_t ino, /* inode to locate */
1144 xfs_fsblock_t *bno, /* output: block containing inode */ 1144 xfs_fsblock_t *bno, /* output: block containing inode */
1145 int *len, /* output: num blocks in inode cluster */ 1145 int *len, /* output: num blocks in inode cluster */
1146 int *off, /* output: index in block of inode */ 1146 int *off, /* output: index in block of inode */
1147 uint flags) /* flags concerning inode lookup */ 1147 uint flags) /* flags concerning inode lookup */
1148 { 1148 {
1149 xfs_agblock_t agbno; /* block number of inode in the alloc group */ 1149 xfs_agblock_t agbno; /* block number of inode in the alloc group */
1150 xfs_buf_t *agbp; /* agi buffer */ 1150 xfs_buf_t *agbp; /* agi buffer */
1151 xfs_agino_t agino; /* inode number within alloc group */ 1151 xfs_agino_t agino; /* inode number within alloc group */
1152 xfs_agnumber_t agno; /* allocation group number */ 1152 xfs_agnumber_t agno; /* allocation group number */
1153 int blks_per_cluster; /* num blocks per inode cluster */ 1153 int blks_per_cluster; /* num blocks per inode cluster */
1154 xfs_agblock_t chunk_agbno; /* first block in inode chunk */ 1154 xfs_agblock_t chunk_agbno; /* first block in inode chunk */
1155 xfs_agino_t chunk_agino; /* first agino in inode chunk */ 1155 xfs_agino_t chunk_agino; /* first agino in inode chunk */
1156 __int32_t chunk_cnt; /* count of free inodes in chunk */ 1156 __int32_t chunk_cnt; /* count of free inodes in chunk */
1157 xfs_inofree_t chunk_free; /* mask of free inodes in chunk */ 1157 xfs_inofree_t chunk_free; /* mask of free inodes in chunk */
1158 xfs_agblock_t cluster_agbno; /* first block in inode cluster */ 1158 xfs_agblock_t cluster_agbno; /* first block in inode cluster */
1159 xfs_btree_cur_t *cur; /* inode btree cursor */ 1159 xfs_btree_cur_t *cur; /* inode btree cursor */
1160 int error; /* error code */ 1160 int error; /* error code */
1161 int i; /* temp state */ 1161 int i; /* temp state */
1162 int offset; /* index of inode in its buffer */ 1162 int offset; /* index of inode in its buffer */
1163 int offset_agbno; /* blks from chunk start to inode */ 1163 int offset_agbno; /* blks from chunk start to inode */
1164 1164
1165 ASSERT(ino != NULLFSINO); 1165 ASSERT(ino != NULLFSINO);
1166 /* 1166 /*
1167 * Split up the inode number into its parts. 1167 * Split up the inode number into its parts.
1168 */ 1168 */
1169 agno = XFS_INO_TO_AGNO(mp, ino); 1169 agno = XFS_INO_TO_AGNO(mp, ino);
1170 agino = XFS_INO_TO_AGINO(mp, ino); 1170 agino = XFS_INO_TO_AGINO(mp, ino);
1171 agbno = XFS_AGINO_TO_AGBNO(mp, agino); 1171 agbno = XFS_AGINO_TO_AGBNO(mp, agino);
1172 if (agno >= mp->m_sb.sb_agcount || agbno >= mp->m_sb.sb_agblocks || 1172 if (agno >= mp->m_sb.sb_agcount || agbno >= mp->m_sb.sb_agblocks ||
1173 ino != XFS_AGINO_TO_INO(mp, agno, agino)) { 1173 ino != XFS_AGINO_TO_INO(mp, agno, agino)) {
1174 #ifdef DEBUG 1174 #ifdef DEBUG
1175 /* no diagnostics for bulkstat, ino comes from userspace */ 1175 /* no diagnostics for bulkstat, ino comes from userspace */
1176 if (flags & XFS_IMAP_BULKSTAT) 1176 if (flags & XFS_IMAP_BULKSTAT)
1177 return XFS_ERROR(EINVAL); 1177 return XFS_ERROR(EINVAL);
1178 if (agno >= mp->m_sb.sb_agcount) { 1178 if (agno >= mp->m_sb.sb_agcount) {
1179 xfs_fs_cmn_err(CE_ALERT, mp, 1179 xfs_fs_cmn_err(CE_ALERT, mp,
1180 "xfs_dilocate: agno (%d) >= " 1180 "xfs_dilocate: agno (%d) >= "
1181 "mp->m_sb.sb_agcount (%d)", 1181 "mp->m_sb.sb_agcount (%d)",
1182 agno, mp->m_sb.sb_agcount); 1182 agno, mp->m_sb.sb_agcount);
1183 } 1183 }
1184 if (agbno >= mp->m_sb.sb_agblocks) { 1184 if (agbno >= mp->m_sb.sb_agblocks) {
1185 xfs_fs_cmn_err(CE_ALERT, mp, 1185 xfs_fs_cmn_err(CE_ALERT, mp,
1186 "xfs_dilocate: agbno (0x%llx) >= " 1186 "xfs_dilocate: agbno (0x%llx) >= "
1187 "mp->m_sb.sb_agblocks (0x%lx)", 1187 "mp->m_sb.sb_agblocks (0x%lx)",
1188 (unsigned long long) agbno, 1188 (unsigned long long) agbno,
1189 (unsigned long) mp->m_sb.sb_agblocks); 1189 (unsigned long) mp->m_sb.sb_agblocks);
1190 } 1190 }
1191 if (ino != XFS_AGINO_TO_INO(mp, agno, agino)) { 1191 if (ino != XFS_AGINO_TO_INO(mp, agno, agino)) {
1192 xfs_fs_cmn_err(CE_ALERT, mp, 1192 xfs_fs_cmn_err(CE_ALERT, mp,
1193 "xfs_dilocate: ino (0x%llx) != " 1193 "xfs_dilocate: ino (0x%llx) != "
1194 "XFS_AGINO_TO_INO(mp, agno, agino) " 1194 "XFS_AGINO_TO_INO(mp, agno, agino) "
1195 "(0x%llx)", 1195 "(0x%llx)",
1196 ino, XFS_AGINO_TO_INO(mp, agno, agino)); 1196 ino, XFS_AGINO_TO_INO(mp, agno, agino));
1197 } 1197 }
1198 xfs_stack_trace();
1198 #endif /* DEBUG */ 1199 #endif /* DEBUG */
1199 return XFS_ERROR(EINVAL); 1200 return XFS_ERROR(EINVAL);
1200 } 1201 }
1201 if ((mp->m_sb.sb_blocksize >= XFS_INODE_CLUSTER_SIZE(mp)) || 1202 if ((mp->m_sb.sb_blocksize >= XFS_INODE_CLUSTER_SIZE(mp)) ||
1202 !(flags & XFS_IMAP_LOOKUP)) { 1203 !(flags & XFS_IMAP_LOOKUP)) {
1203 offset = XFS_INO_TO_OFFSET(mp, ino); 1204 offset = XFS_INO_TO_OFFSET(mp, ino);
1204 ASSERT(offset < mp->m_sb.sb_inopblock); 1205 ASSERT(offset < mp->m_sb.sb_inopblock);
1205 *bno = XFS_AGB_TO_FSB(mp, agno, agbno); 1206 *bno = XFS_AGB_TO_FSB(mp, agno, agbno);
1206 *off = offset; 1207 *off = offset;
1207 *len = 1; 1208 *len = 1;
1208 return 0; 1209 return 0;
1209 } 1210 }
1210 blks_per_cluster = XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_blocklog; 1211 blks_per_cluster = XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_blocklog;
1211 if (*bno != NULLFSBLOCK) { 1212 if (*bno != NULLFSBLOCK) {
1212 offset = XFS_INO_TO_OFFSET(mp, ino); 1213 offset = XFS_INO_TO_OFFSET(mp, ino);
1213 ASSERT(offset < mp->m_sb.sb_inopblock); 1214 ASSERT(offset < mp->m_sb.sb_inopblock);
1214 cluster_agbno = XFS_FSB_TO_AGBNO(mp, *bno); 1215 cluster_agbno = XFS_FSB_TO_AGBNO(mp, *bno);
1215 *off = ((agbno - cluster_agbno) * mp->m_sb.sb_inopblock) + 1216 *off = ((agbno - cluster_agbno) * mp->m_sb.sb_inopblock) +
1216 offset; 1217 offset;
1217 *len = blks_per_cluster; 1218 *len = blks_per_cluster;
1218 return 0; 1219 return 0;
1219 } 1220 }
1220 if (mp->m_inoalign_mask) { 1221 if (mp->m_inoalign_mask) {
1221 offset_agbno = agbno & mp->m_inoalign_mask; 1222 offset_agbno = agbno & mp->m_inoalign_mask;
1222 chunk_agbno = agbno - offset_agbno; 1223 chunk_agbno = agbno - offset_agbno;
1223 } else { 1224 } else {
1224 down_read(&mp->m_peraglock); 1225 down_read(&mp->m_peraglock);
1225 error = xfs_ialloc_read_agi(mp, tp, agno, &agbp); 1226 error = xfs_ialloc_read_agi(mp, tp, agno, &agbp);
1226 up_read(&mp->m_peraglock); 1227 up_read(&mp->m_peraglock);
1227 if (error) { 1228 if (error) {
1228 #ifdef DEBUG 1229 #ifdef DEBUG
1229 xfs_fs_cmn_err(CE_ALERT, mp, "xfs_dilocate: " 1230 xfs_fs_cmn_err(CE_ALERT, mp, "xfs_dilocate: "
1230 "xfs_ialloc_read_agi() returned " 1231 "xfs_ialloc_read_agi() returned "
1231 "error %d, agno %d", 1232 "error %d, agno %d",
1232 error, agno); 1233 error, agno);
1233 #endif /* DEBUG */ 1234 #endif /* DEBUG */
1234 return error; 1235 return error;
1235 } 1236 }
1236 cur = xfs_btree_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_INO, 1237 cur = xfs_btree_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_INO,
1237 (xfs_inode_t *)0, 0); 1238 (xfs_inode_t *)0, 0);
1238 if ((error = xfs_inobt_lookup_le(cur, agino, 0, 0, &i))) { 1239 if ((error = xfs_inobt_lookup_le(cur, agino, 0, 0, &i))) {
1239 #ifdef DEBUG 1240 #ifdef DEBUG
1240 xfs_fs_cmn_err(CE_ALERT, mp, "xfs_dilocate: " 1241 xfs_fs_cmn_err(CE_ALERT, mp, "xfs_dilocate: "
1241 "xfs_inobt_lookup_le() failed"); 1242 "xfs_inobt_lookup_le() failed");
1242 #endif /* DEBUG */ 1243 #endif /* DEBUG */
1243 goto error0; 1244 goto error0;
1244 } 1245 }
1245 if ((error = xfs_inobt_get_rec(cur, &chunk_agino, &chunk_cnt, 1246 if ((error = xfs_inobt_get_rec(cur, &chunk_agino, &chunk_cnt,
1246 &chunk_free, &i))) { 1247 &chunk_free, &i))) {
1247 #ifdef DEBUG 1248 #ifdef DEBUG
1248 xfs_fs_cmn_err(CE_ALERT, mp, "xfs_dilocate: " 1249 xfs_fs_cmn_err(CE_ALERT, mp, "xfs_dilocate: "
1249 "xfs_inobt_get_rec() failed"); 1250 "xfs_inobt_get_rec() failed");
1250 #endif /* DEBUG */ 1251 #endif /* DEBUG */
1251 goto error0; 1252 goto error0;
1252 } 1253 }
1253 if (i == 0) { 1254 if (i == 0) {
1254 #ifdef DEBUG 1255 #ifdef DEBUG
1255 xfs_fs_cmn_err(CE_ALERT, mp, "xfs_dilocate: " 1256 xfs_fs_cmn_err(CE_ALERT, mp, "xfs_dilocate: "
1256 "xfs_inobt_get_rec() failed"); 1257 "xfs_inobt_get_rec() failed");
1257 #endif /* DEBUG */ 1258 #endif /* DEBUG */
1258 error = XFS_ERROR(EINVAL); 1259 error = XFS_ERROR(EINVAL);
1259 } 1260 }
1260 xfs_trans_brelse(tp, agbp); 1261 xfs_trans_brelse(tp, agbp);
1261 xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); 1262 xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
1262 if (error) 1263 if (error)
1263 return error; 1264 return error;
1264 chunk_agbno = XFS_AGINO_TO_AGBNO(mp, chunk_agino); 1265 chunk_agbno = XFS_AGINO_TO_AGBNO(mp, chunk_agino);
1265 offset_agbno = agbno - chunk_agbno; 1266 offset_agbno = agbno - chunk_agbno;
1266 } 1267 }
1267 ASSERT(agbno >= chunk_agbno); 1268 ASSERT(agbno >= chunk_agbno);
1268 cluster_agbno = chunk_agbno + 1269 cluster_agbno = chunk_agbno +
1269 ((offset_agbno / blks_per_cluster) * blks_per_cluster); 1270 ((offset_agbno / blks_per_cluster) * blks_per_cluster);
1270 offset = ((agbno - cluster_agbno) * mp->m_sb.sb_inopblock) + 1271 offset = ((agbno - cluster_agbno) * mp->m_sb.sb_inopblock) +
1271 XFS_INO_TO_OFFSET(mp, ino); 1272 XFS_INO_TO_OFFSET(mp, ino);
1272 *bno = XFS_AGB_TO_FSB(mp, agno, cluster_agbno); 1273 *bno = XFS_AGB_TO_FSB(mp, agno, cluster_agbno);
1273 *off = offset; 1274 *off = offset;
1274 *len = blks_per_cluster; 1275 *len = blks_per_cluster;
1275 return 0; 1276 return 0;
1276 error0: 1277 error0:
1277 xfs_trans_brelse(tp, agbp); 1278 xfs_trans_brelse(tp, agbp);
1278 xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); 1279 xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
1279 return error; 1280 return error;
1280 } 1281 }
1281 1282
1282 /* 1283 /*
1283 * Compute and fill in value of m_in_maxlevels. 1284 * Compute and fill in value of m_in_maxlevels.
1284 */ 1285 */
1285 void 1286 void
1286 xfs_ialloc_compute_maxlevels( 1287 xfs_ialloc_compute_maxlevels(
1287 xfs_mount_t *mp) /* file system mount structure */ 1288 xfs_mount_t *mp) /* file system mount structure */
1288 { 1289 {
1289 int level; 1290 int level;
1290 uint maxblocks; 1291 uint maxblocks;
1291 uint maxleafents; 1292 uint maxleafents;
1292 int minleafrecs; 1293 int minleafrecs;
1293 int minnoderecs; 1294 int minnoderecs;
1294 1295
1295 maxleafents = (1LL << XFS_INO_AGINO_BITS(mp)) >> 1296 maxleafents = (1LL << XFS_INO_AGINO_BITS(mp)) >>
1296 XFS_INODES_PER_CHUNK_LOG; 1297 XFS_INODES_PER_CHUNK_LOG;
1297 minleafrecs = mp->m_alloc_mnr[0]; 1298 minleafrecs = mp->m_alloc_mnr[0];
1298 minnoderecs = mp->m_alloc_mnr[1]; 1299 minnoderecs = mp->m_alloc_mnr[1];
1299 maxblocks = (maxleafents + minleafrecs - 1) / minleafrecs; 1300 maxblocks = (maxleafents + minleafrecs - 1) / minleafrecs;
1300 for (level = 1; maxblocks > 1; level++) 1301 for (level = 1; maxblocks > 1; level++)
1301 maxblocks = (maxblocks + minnoderecs - 1) / minnoderecs; 1302 maxblocks = (maxblocks + minnoderecs - 1) / minnoderecs;
1302 mp->m_in_maxlevels = level; 1303 mp->m_in_maxlevels = level;
1303 } 1304 }
1304 1305
1305 /* 1306 /*
1306 * Log specified fields for the ag hdr (inode section) 1307 * Log specified fields for the ag hdr (inode section)
1307 */ 1308 */
1308 void 1309 void
1309 xfs_ialloc_log_agi( 1310 xfs_ialloc_log_agi(
1310 xfs_trans_t *tp, /* transaction pointer */ 1311 xfs_trans_t *tp, /* transaction pointer */
1311 xfs_buf_t *bp, /* allocation group header buffer */ 1312 xfs_buf_t *bp, /* allocation group header buffer */
1312 int fields) /* bitmask of fields to log */ 1313 int fields) /* bitmask of fields to log */
1313 { 1314 {
1314 int first; /* first byte number */ 1315 int first; /* first byte number */
1315 int last; /* last byte number */ 1316 int last; /* last byte number */
1316 static const short offsets[] = { /* field starting offsets */ 1317 static const short offsets[] = { /* field starting offsets */
1317 /* keep in sync with bit definitions */ 1318 /* keep in sync with bit definitions */
1318 offsetof(xfs_agi_t, agi_magicnum), 1319 offsetof(xfs_agi_t, agi_magicnum),
1319 offsetof(xfs_agi_t, agi_versionnum), 1320 offsetof(xfs_agi_t, agi_versionnum),
1320 offsetof(xfs_agi_t, agi_seqno), 1321 offsetof(xfs_agi_t, agi_seqno),
1321 offsetof(xfs_agi_t, agi_length), 1322 offsetof(xfs_agi_t, agi_length),
1322 offsetof(xfs_agi_t, agi_count), 1323 offsetof(xfs_agi_t, agi_count),
1323 offsetof(xfs_agi_t, agi_root), 1324 offsetof(xfs_agi_t, agi_root),
1324 offsetof(xfs_agi_t, agi_level), 1325 offsetof(xfs_agi_t, agi_level),
1325 offsetof(xfs_agi_t, agi_freecount), 1326 offsetof(xfs_agi_t, agi_freecount),
1326 offsetof(xfs_agi_t, agi_newino), 1327 offsetof(xfs_agi_t, agi_newino),
1327 offsetof(xfs_agi_t, agi_dirino), 1328 offsetof(xfs_agi_t, agi_dirino),
1328 offsetof(xfs_agi_t, agi_unlinked), 1329 offsetof(xfs_agi_t, agi_unlinked),
1329 sizeof(xfs_agi_t) 1330 sizeof(xfs_agi_t)
1330 }; 1331 };
1331 #ifdef DEBUG 1332 #ifdef DEBUG
1332 xfs_agi_t *agi; /* allocation group header */ 1333 xfs_agi_t *agi; /* allocation group header */
1333 1334
1334 agi = XFS_BUF_TO_AGI(bp); 1335 agi = XFS_BUF_TO_AGI(bp);
1335 ASSERT(be32_to_cpu(agi->agi_magicnum) == XFS_AGI_MAGIC); 1336 ASSERT(be32_to_cpu(agi->agi_magicnum) == XFS_AGI_MAGIC);
1336 #endif 1337 #endif
1337 /* 1338 /*
1338 * Compute byte offsets for the first and last fields. 1339 * Compute byte offsets for the first and last fields.
1339 */ 1340 */
1340 xfs_btree_offsets(fields, offsets, XFS_AGI_NUM_BITS, &first, &last); 1341 xfs_btree_offsets(fields, offsets, XFS_AGI_NUM_BITS, &first, &last);
1341 /* 1342 /*
1342 * Log the allocation group inode header buffer. 1343 * Log the allocation group inode header buffer.
1343 */ 1344 */
1344 xfs_trans_log_buf(tp, bp, first, last); 1345 xfs_trans_log_buf(tp, bp, first, last);
1345 } 1346 }
1346 1347
1347 /* 1348 /*
1348 * Read in the allocation group header (inode allocation section) 1349 * Read in the allocation group header (inode allocation section)
1349 */ 1350 */
1350 int 1351 int
1351 xfs_ialloc_read_agi( 1352 xfs_ialloc_read_agi(
1352 xfs_mount_t *mp, /* file system mount structure */ 1353 xfs_mount_t *mp, /* file system mount structure */
1353 xfs_trans_t *tp, /* transaction pointer */ 1354 xfs_trans_t *tp, /* transaction pointer */
1354 xfs_agnumber_t agno, /* allocation group number */ 1355 xfs_agnumber_t agno, /* allocation group number */
1355 xfs_buf_t **bpp) /* allocation group hdr buf */ 1356 xfs_buf_t **bpp) /* allocation group hdr buf */
1356 { 1357 {
1357 xfs_agi_t *agi; /* allocation group header */ 1358 xfs_agi_t *agi; /* allocation group header */
1358 int agi_ok; /* agi is consistent */ 1359 int agi_ok; /* agi is consistent */
1359 xfs_buf_t *bp; /* allocation group hdr buf */ 1360 xfs_buf_t *bp; /* allocation group hdr buf */
1360 xfs_perag_t *pag; /* per allocation group data */ 1361 xfs_perag_t *pag; /* per allocation group data */
1361 int error; 1362 int error;
1362 1363
1363 ASSERT(agno != NULLAGNUMBER); 1364 ASSERT(agno != NULLAGNUMBER);
1364 error = xfs_trans_read_buf( 1365 error = xfs_trans_read_buf(
1365 mp, tp, mp->m_ddev_targp, 1366 mp, tp, mp->m_ddev_targp,
1366 XFS_AG_DADDR(mp, agno, XFS_AGI_DADDR(mp)), 1367 XFS_AG_DADDR(mp, agno, XFS_AGI_DADDR(mp)),
1367 XFS_FSS_TO_BB(mp, 1), 0, &bp); 1368 XFS_FSS_TO_BB(mp, 1), 0, &bp);
1368 if (error) 1369 if (error)
1369 return error; 1370 return error;
1370 ASSERT(bp && !XFS_BUF_GETERROR(bp)); 1371 ASSERT(bp && !XFS_BUF_GETERROR(bp));
1371 1372
1372 /* 1373 /*
1373 * Validate the magic number of the agi block. 1374 * Validate the magic number of the agi block.
1374 */ 1375 */
1375 agi = XFS_BUF_TO_AGI(bp); 1376 agi = XFS_BUF_TO_AGI(bp);
1376 agi_ok = 1377 agi_ok =
1377 be32_to_cpu(agi->agi_magicnum) == XFS_AGI_MAGIC && 1378 be32_to_cpu(agi->agi_magicnum) == XFS_AGI_MAGIC &&
1378 XFS_AGI_GOOD_VERSION(be32_to_cpu(agi->agi_versionnum)); 1379 XFS_AGI_GOOD_VERSION(be32_to_cpu(agi->agi_versionnum));
1379 if (unlikely(XFS_TEST_ERROR(!agi_ok, mp, XFS_ERRTAG_IALLOC_READ_AGI, 1380 if (unlikely(XFS_TEST_ERROR(!agi_ok, mp, XFS_ERRTAG_IALLOC_READ_AGI,
1380 XFS_RANDOM_IALLOC_READ_AGI))) { 1381 XFS_RANDOM_IALLOC_READ_AGI))) {
1381 XFS_CORRUPTION_ERROR("xfs_ialloc_read_agi", XFS_ERRLEVEL_LOW, 1382 XFS_CORRUPTION_ERROR("xfs_ialloc_read_agi", XFS_ERRLEVEL_LOW,
1382 mp, agi); 1383 mp, agi);
1383 xfs_trans_brelse(tp, bp); 1384 xfs_trans_brelse(tp, bp);
1384 return XFS_ERROR(EFSCORRUPTED); 1385 return XFS_ERROR(EFSCORRUPTED);
1385 } 1386 }
1386 pag = &mp->m_perag[agno]; 1387 pag = &mp->m_perag[agno];
1387 if (!pag->pagi_init) { 1388 if (!pag->pagi_init) {
1388 pag->pagi_freecount = be32_to_cpu(agi->agi_freecount); 1389 pag->pagi_freecount = be32_to_cpu(agi->agi_freecount);
1389 pag->pagi_init = 1; 1390 pag->pagi_init = 1;
1390 } else { 1391 } else {
1391 /* 1392 /*
1392 * It's possible for these to be out of sync if 1393 * It's possible for these to be out of sync if
1393 * we are in the middle of a forced shutdown. 1394 * we are in the middle of a forced shutdown.
1394 */ 1395 */
1395 ASSERT(pag->pagi_freecount == be32_to_cpu(agi->agi_freecount) || 1396 ASSERT(pag->pagi_freecount == be32_to_cpu(agi->agi_freecount) ||
1396 XFS_FORCED_SHUTDOWN(mp)); 1397 XFS_FORCED_SHUTDOWN(mp));
1397 } 1398 }
1398 1399
1399 #ifdef DEBUG 1400 #ifdef DEBUG
1400 { 1401 {
1401 int i; 1402 int i;
1402 1403
1403 for (i = 0; i < XFS_AGI_UNLINKED_BUCKETS; i++) 1404 for (i = 0; i < XFS_AGI_UNLINKED_BUCKETS; i++)
1404 ASSERT(agi->agi_unlinked[i]); 1405 ASSERT(agi->agi_unlinked[i]);
1405 } 1406 }
1406 #endif 1407 #endif
1407 1408
1408 XFS_BUF_SET_VTYPE_REF(bp, B_FS_AGI, XFS_AGI_REF); 1409 XFS_BUF_SET_VTYPE_REF(bp, B_FS_AGI, XFS_AGI_REF);
1409 *bpp = bp; 1410 *bpp = bp;
1410 return 0; 1411 return 0;
1411 } 1412 }
1412 1413
1 /* 1 /*
2 * Copyright (c) 2000-2005 Silicon Graphics, Inc. 2 * Copyright (c) 2000-2005 Silicon Graphics, Inc.
3 * All Rights Reserved. 3 * All Rights Reserved.
4 * 4 *
5 * This program is free software; you can redistribute it and/or 5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as 6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation. 7 * published by the Free Software Foundation.
8 * 8 *
9 * This program is distributed in the hope that it would be useful, 9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details. 12 * GNU General Public License for more details.
13 * 13 *
14 * You should have received a copy of the GNU General Public License 14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation, 15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */ 17 */
18 #include "xfs.h" 18 #include "xfs.h"
19 #include "xfs_fs.h" 19 #include "xfs_fs.h"
20 #include "xfs_types.h" 20 #include "xfs_types.h"
21 #include "xfs_bit.h" 21 #include "xfs_bit.h"
22 #include "xfs_log.h" 22 #include "xfs_log.h"
23 #include "xfs_inum.h" 23 #include "xfs_inum.h"
24 #include "xfs_trans.h" 24 #include "xfs_trans.h"
25 #include "xfs_sb.h" 25 #include "xfs_sb.h"
26 #include "xfs_ag.h" 26 #include "xfs_ag.h"
27 #include "xfs_dir2.h" 27 #include "xfs_dir2.h"
28 #include "xfs_dmapi.h" 28 #include "xfs_dmapi.h"
29 #include "xfs_mount.h" 29 #include "xfs_mount.h"
30 #include "xfs_bmap_btree.h" 30 #include "xfs_bmap_btree.h"
31 #include "xfs_alloc_btree.h" 31 #include "xfs_alloc_btree.h"
32 #include "xfs_ialloc_btree.h" 32 #include "xfs_ialloc_btree.h"
33 #include "xfs_dir2_sf.h" 33 #include "xfs_dir2_sf.h"
34 #include "xfs_attr_sf.h" 34 #include "xfs_attr_sf.h"
35 #include "xfs_dinode.h" 35 #include "xfs_dinode.h"
36 #include "xfs_inode.h" 36 #include "xfs_inode.h"
37 #include "xfs_btree.h" 37 #include "xfs_btree.h"
38 #include "xfs_ialloc.h" 38 #include "xfs_ialloc.h"
39 #include "xfs_quota.h" 39 #include "xfs_quota.h"
40 #include "xfs_utils.h" 40 #include "xfs_utils.h"
41 41
42 /* 42 /*
43 * Initialize the inode hash table for the newly mounted file system. 43 * Initialize the inode hash table for the newly mounted file system.
44 * Choose an initial table size based on user specified value, else 44 * Choose an initial table size based on user specified value, else
45 * use a simple algorithm using the maximum number of inodes as an 45 * use a simple algorithm using the maximum number of inodes as an
46 * indicator for table size, and clamp it between one and some large 46 * indicator for table size, and clamp it between one and some large
47 * number of pages. 47 * number of pages.
48 */ 48 */
49 void 49 void
50 xfs_ihash_init(xfs_mount_t *mp) 50 xfs_ihash_init(xfs_mount_t *mp)
51 { 51 {
52 __uint64_t icount; 52 __uint64_t icount;
53 uint i, flags = KM_SLEEP | KM_MAYFAIL; 53 uint i, flags = KM_SLEEP | KM_MAYFAIL;
54 54
55 if (!mp->m_ihsize) { 55 if (!mp->m_ihsize) {
56 icount = mp->m_maxicount ? mp->m_maxicount : 56 icount = mp->m_maxicount ? mp->m_maxicount :
57 (mp->m_sb.sb_dblocks << mp->m_sb.sb_inopblog); 57 (mp->m_sb.sb_dblocks << mp->m_sb.sb_inopblog);
58 mp->m_ihsize = 1 << max_t(uint, 8, 58 mp->m_ihsize = 1 << max_t(uint, 8,
59 (xfs_highbit64(icount) + 1) / 2); 59 (xfs_highbit64(icount) + 1) / 2);
60 mp->m_ihsize = min_t(uint, mp->m_ihsize, 60 mp->m_ihsize = min_t(uint, mp->m_ihsize,
61 (64 * NBPP) / sizeof(xfs_ihash_t)); 61 (64 * NBPP) / sizeof(xfs_ihash_t));
62 } 62 }
63 63
64 while (!(mp->m_ihash = (xfs_ihash_t *)kmem_zalloc(mp->m_ihsize * 64 while (!(mp->m_ihash = (xfs_ihash_t *)kmem_zalloc(mp->m_ihsize *
65 sizeof(xfs_ihash_t), flags))) { 65 sizeof(xfs_ihash_t), flags))) {
66 if ((mp->m_ihsize >>= 1) <= NBPP) 66 if ((mp->m_ihsize >>= 1) <= NBPP)
67 flags = KM_SLEEP; 67 flags = KM_SLEEP;
68 } 68 }
69 for (i = 0; i < mp->m_ihsize; i++) { 69 for (i = 0; i < mp->m_ihsize; i++) {
70 rwlock_init(&(mp->m_ihash[i].ih_lock)); 70 rwlock_init(&(mp->m_ihash[i].ih_lock));
71 } 71 }
72 } 72 }
73 73
74 /* 74 /*
75 * Free up structures allocated by xfs_ihash_init, at unmount time. 75 * Free up structures allocated by xfs_ihash_init, at unmount time.
76 */ 76 */
77 void 77 void
78 xfs_ihash_free(xfs_mount_t *mp) 78 xfs_ihash_free(xfs_mount_t *mp)
79 { 79 {
80 kmem_free(mp->m_ihash, mp->m_ihsize*sizeof(xfs_ihash_t)); 80 kmem_free(mp->m_ihash, mp->m_ihsize*sizeof(xfs_ihash_t));
81 mp->m_ihash = NULL; 81 mp->m_ihash = NULL;
82 } 82 }
83 83
84 /* 84 /*
85 * Initialize the inode cluster hash table for the newly mounted file system. 85 * Initialize the inode cluster hash table for the newly mounted file system.
86 * Its size is derived from the ihash table size. 86 * Its size is derived from the ihash table size.
87 */ 87 */
88 void 88 void
89 xfs_chash_init(xfs_mount_t *mp) 89 xfs_chash_init(xfs_mount_t *mp)
90 { 90 {
91 uint i; 91 uint i;
92 92
93 mp->m_chsize = max_t(uint, 1, mp->m_ihsize / 93 mp->m_chsize = max_t(uint, 1, mp->m_ihsize /
94 (XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog)); 94 (XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog));
95 mp->m_chsize = min_t(uint, mp->m_chsize, mp->m_ihsize); 95 mp->m_chsize = min_t(uint, mp->m_chsize, mp->m_ihsize);
96 mp->m_chash = (xfs_chash_t *)kmem_zalloc(mp->m_chsize 96 mp->m_chash = (xfs_chash_t *)kmem_zalloc(mp->m_chsize
97 * sizeof(xfs_chash_t), 97 * sizeof(xfs_chash_t),
98 KM_SLEEP); 98 KM_SLEEP);
99 for (i = 0; i < mp->m_chsize; i++) { 99 for (i = 0; i < mp->m_chsize; i++) {
100 spinlock_init(&mp->m_chash[i].ch_lock,"xfshash"); 100 spinlock_init(&mp->m_chash[i].ch_lock,"xfshash");
101 } 101 }
102 } 102 }
103 103
104 /* 104 /*
105 * Free up structures allocated by xfs_chash_init, at unmount time. 105 * Free up structures allocated by xfs_chash_init, at unmount time.
106 */ 106 */
107 void 107 void
108 xfs_chash_free(xfs_mount_t *mp) 108 xfs_chash_free(xfs_mount_t *mp)
109 { 109 {
110 int i; 110 int i;
111 111
112 for (i = 0; i < mp->m_chsize; i++) { 112 for (i = 0; i < mp->m_chsize; i++) {
113 spinlock_destroy(&mp->m_chash[i].ch_lock); 113 spinlock_destroy(&mp->m_chash[i].ch_lock);
114 } 114 }
115 115
116 kmem_free(mp->m_chash, mp->m_chsize*sizeof(xfs_chash_t)); 116 kmem_free(mp->m_chash, mp->m_chsize*sizeof(xfs_chash_t));
117 mp->m_chash = NULL; 117 mp->m_chash = NULL;
118 } 118 }
119 119
120 /* 120 /*
121 * Try to move an inode to the front of its hash list if possible 121 * Try to move an inode to the front of its hash list if possible
122 * (and if its not there already). Called right after obtaining 122 * (and if its not there already). Called right after obtaining
123 * the list version number and then dropping the read_lock on the 123 * the list version number and then dropping the read_lock on the
124 * hash list in question (which is done right after looking up the 124 * hash list in question (which is done right after looking up the
125 * inode in question...). 125 * inode in question...).
126 */ 126 */
127 STATIC void 127 STATIC void
128 xfs_ihash_promote( 128 xfs_ihash_promote(
129 xfs_ihash_t *ih, 129 xfs_ihash_t *ih,
130 xfs_inode_t *ip, 130 xfs_inode_t *ip,
131 ulong version) 131 ulong version)
132 { 132 {
133 xfs_inode_t *iq; 133 xfs_inode_t *iq;
134 134
135 if ((ip->i_prevp != &ih->ih_next) && write_trylock(&ih->ih_lock)) { 135 if ((ip->i_prevp != &ih->ih_next) && write_trylock(&ih->ih_lock)) {
136 if (likely(version == ih->ih_version)) { 136 if (likely(version == ih->ih_version)) {
137 /* remove from list */ 137 /* remove from list */
138 if ((iq = ip->i_next)) { 138 if ((iq = ip->i_next)) {
139 iq->i_prevp = ip->i_prevp; 139 iq->i_prevp = ip->i_prevp;
140 } 140 }
141 *ip->i_prevp = iq; 141 *ip->i_prevp = iq;
142 142
143 /* insert at list head */ 143 /* insert at list head */
144 iq = ih->ih_next; 144 iq = ih->ih_next;
145 iq->i_prevp = &ip->i_next; 145 iq->i_prevp = &ip->i_next;
146 ip->i_next = iq; 146 ip->i_next = iq;
147 ip->i_prevp = &ih->ih_next; 147 ip->i_prevp = &ih->ih_next;
148 ih->ih_next = ip; 148 ih->ih_next = ip;
149 } 149 }
150 write_unlock(&ih->ih_lock); 150 write_unlock(&ih->ih_lock);
151 } 151 }
152 } 152 }
153 153
154 /* 154 /*
155 * Look up an inode by number in the given file system. 155 * Look up an inode by number in the given file system.
156 * The inode is looked up in the hash table for the file system 156 * The inode is looked up in the hash table for the file system
157 * represented by the mount point parameter mp. Each bucket of 157 * represented by the mount point parameter mp. Each bucket of
158 * the hash table is guarded by an individual semaphore. 158 * the hash table is guarded by an individual semaphore.
159 * 159 *
160 * If the inode is found in the hash table, its corresponding vnode 160 * If the inode is found in the hash table, its corresponding vnode
161 * is obtained with a call to vn_get(). This call takes care of 161 * is obtained with a call to vn_get(). This call takes care of
162 * coordination with the reclamation of the inode and vnode. Note 162 * coordination with the reclamation of the inode and vnode. Note
163 * that the vmap structure is filled in while holding the hash lock. 163 * that the vmap structure is filled in while holding the hash lock.
164 * This gives us the state of the inode/vnode when we found it and 164 * This gives us the state of the inode/vnode when we found it and
165 * is used for coordination in vn_get(). 165 * is used for coordination in vn_get().
166 * 166 *
167 * If it is not in core, read it in from the file system's device and 167 * If it is not in core, read it in from the file system's device and
168 * add the inode into the hash table. 168 * add the inode into the hash table.
169 * 169 *
170 * The inode is locked according to the value of the lock_flags parameter. 170 * The inode is locked according to the value of the lock_flags parameter.
171 * This flag parameter indicates how and if the inode's IO lock and inode lock 171 * This flag parameter indicates how and if the inode's IO lock and inode lock
172 * should be taken. 172 * should be taken.
173 * 173 *
174 * mp -- the mount point structure for the current file system. It points 174 * mp -- the mount point structure for the current file system. It points
175 * to the inode hash table. 175 * to the inode hash table.
176 * tp -- a pointer to the current transaction if there is one. This is 176 * tp -- a pointer to the current transaction if there is one. This is
177 * simply passed through to the xfs_iread() call. 177 * simply passed through to the xfs_iread() call.
178 * ino -- the number of the inode desired. This is the unique identifier 178 * ino -- the number of the inode desired. This is the unique identifier
179 * within the file system for the inode being requested. 179 * within the file system for the inode being requested.
180 * lock_flags -- flags indicating how to lock the inode. See the comment 180 * lock_flags -- flags indicating how to lock the inode. See the comment
181 * for xfs_ilock() for a list of valid values. 181 * for xfs_ilock() for a list of valid values.
182 * bno -- the block number starting the buffer containing the inode, 182 * bno -- the block number starting the buffer containing the inode,
183 * if known (as by bulkstat), else 0. 183 * if known (as by bulkstat), else 0.
184 */ 184 */
185 STATIC int 185 STATIC int
186 xfs_iget_core( 186 xfs_iget_core(
187 bhv_vnode_t *vp, 187 bhv_vnode_t *vp,
188 xfs_mount_t *mp, 188 xfs_mount_t *mp,
189 xfs_trans_t *tp, 189 xfs_trans_t *tp,
190 xfs_ino_t ino, 190 xfs_ino_t ino,
191 uint flags, 191 uint flags,
192 uint lock_flags, 192 uint lock_flags,
193 xfs_inode_t **ipp, 193 xfs_inode_t **ipp,
194 xfs_daddr_t bno) 194 xfs_daddr_t bno)
195 { 195 {
196 xfs_ihash_t *ih; 196 xfs_ihash_t *ih;
197 xfs_inode_t *ip; 197 xfs_inode_t *ip;
198 xfs_inode_t *iq; 198 xfs_inode_t *iq;
199 bhv_vnode_t *inode_vp; 199 bhv_vnode_t *inode_vp;
200 ulong version; 200 ulong version;
201 int error; 201 int error;
202 /* REFERENCED */ 202 /* REFERENCED */
203 xfs_chash_t *ch; 203 xfs_chash_t *ch;
204 xfs_chashlist_t *chl, *chlnew; 204 xfs_chashlist_t *chl, *chlnew;
205 SPLDECL(s); 205 SPLDECL(s);
206 206
207 207
208 ih = XFS_IHASH(mp, ino); 208 ih = XFS_IHASH(mp, ino);
209 209
210 again: 210 again:
211 read_lock(&ih->ih_lock); 211 read_lock(&ih->ih_lock);
212 212
213 for (ip = ih->ih_next; ip != NULL; ip = ip->i_next) { 213 for (ip = ih->ih_next; ip != NULL; ip = ip->i_next) {
214 if (ip->i_ino == ino) { 214 if (ip->i_ino == ino) {
215 /* 215 /*
216 * If INEW is set this inode is being set up 216 * If INEW is set this inode is being set up
217 * we need to pause and try again. 217 * we need to pause and try again.
218 */ 218 */
219 if (ip->i_flags & XFS_INEW) { 219 if (ip->i_flags & XFS_INEW) {
220 read_unlock(&ih->ih_lock); 220 read_unlock(&ih->ih_lock);
221 delay(1); 221 delay(1);
222 XFS_STATS_INC(xs_ig_frecycle); 222 XFS_STATS_INC(xs_ig_frecycle);
223 223
224 goto again; 224 goto again;
225 } 225 }
226 226
227 inode_vp = XFS_ITOV_NULL(ip); 227 inode_vp = XFS_ITOV_NULL(ip);
228 if (inode_vp == NULL) { 228 if (inode_vp == NULL) {
229 /* 229 /*
230 * If IRECLAIM is set this inode is 230 * If IRECLAIM is set this inode is
231 * on its way out of the system, 231 * on its way out of the system,
232 * we need to pause and try again. 232 * we need to pause and try again.
233 */ 233 */
234 if (ip->i_flags & XFS_IRECLAIM) { 234 if (ip->i_flags & XFS_IRECLAIM) {
235 read_unlock(&ih->ih_lock); 235 read_unlock(&ih->ih_lock);
236 delay(1); 236 delay(1);
237 XFS_STATS_INC(xs_ig_frecycle); 237 XFS_STATS_INC(xs_ig_frecycle);
238 238
239 goto again; 239 goto again;
240 } 240 }
241 241
242 vn_trace_exit(vp, "xfs_iget.alloc", 242 vn_trace_exit(vp, "xfs_iget.alloc",
243 (inst_t *)__return_address); 243 (inst_t *)__return_address);
244 244
245 XFS_STATS_INC(xs_ig_found); 245 XFS_STATS_INC(xs_ig_found);
246 246
247 ip->i_flags &= ~XFS_IRECLAIMABLE; 247 ip->i_flags &= ~XFS_IRECLAIMABLE;
248 version = ih->ih_version; 248 version = ih->ih_version;
249 read_unlock(&ih->ih_lock); 249 read_unlock(&ih->ih_lock);
250 xfs_ihash_promote(ih, ip, version); 250 xfs_ihash_promote(ih, ip, version);
251 251
252 XFS_MOUNT_ILOCK(mp); 252 XFS_MOUNT_ILOCK(mp);
253 list_del_init(&ip->i_reclaim); 253 list_del_init(&ip->i_reclaim);
254 XFS_MOUNT_IUNLOCK(mp); 254 XFS_MOUNT_IUNLOCK(mp);
255 255
256 goto finish_inode; 256 goto finish_inode;
257 257
258 } else if (vp != inode_vp) { 258 } else if (vp != inode_vp) {
259 struct inode *inode = vn_to_inode(inode_vp); 259 struct inode *inode = vn_to_inode(inode_vp);
260 260
261 /* The inode is being torn down, pause and 261 /* The inode is being torn down, pause and
262 * try again. 262 * try again.
263 */ 263 */
264 if (inode->i_state & (I_FREEING | I_CLEAR)) { 264 if (inode->i_state & (I_FREEING | I_CLEAR)) {
265 read_unlock(&ih->ih_lock); 265 read_unlock(&ih->ih_lock);
266 delay(1); 266 delay(1);
267 XFS_STATS_INC(xs_ig_frecycle); 267 XFS_STATS_INC(xs_ig_frecycle);
268 268
269 goto again; 269 goto again;
270 } 270 }
271 /* Chances are the other vnode (the one in the inode) is being torn 271 /* Chances are the other vnode (the one in the inode) is being torn
272 * down right now, and we landed on top of it. Question is, what do 272 * down right now, and we landed on top of it. Question is, what do
273 * we do? Unhook the old inode and hook up the new one? 273 * we do? Unhook the old inode and hook up the new one?
274 */ 274 */
275 cmn_err(CE_PANIC, 275 cmn_err(CE_PANIC,
276 "xfs_iget_core: ambiguous vns: vp/0x%p, invp/0x%p", 276 "xfs_iget_core: ambiguous vns: vp/0x%p, invp/0x%p",
277 inode_vp, vp); 277 inode_vp, vp);
278 } 278 }
279 279
280 /* 280 /*
281 * Inode cache hit: if ip is not at the front of 281 * Inode cache hit: if ip is not at the front of
282 * its hash chain, move it there now. 282 * its hash chain, move it there now.
283 * Do this with the lock held for update, but 283 * Do this with the lock held for update, but
284 * do statistics after releasing the lock. 284 * do statistics after releasing the lock.
285 */ 285 */
286 version = ih->ih_version; 286 version = ih->ih_version;
287 read_unlock(&ih->ih_lock); 287 read_unlock(&ih->ih_lock);
288 xfs_ihash_promote(ih, ip, version); 288 xfs_ihash_promote(ih, ip, version);
289 XFS_STATS_INC(xs_ig_found); 289 XFS_STATS_INC(xs_ig_found);
290 290
291 finish_inode: 291 finish_inode:
292 if (ip->i_d.di_mode == 0) { 292 if (ip->i_d.di_mode == 0) {
293 if (!(flags & IGET_CREATE)) 293 if (!(flags & XFS_IGET_CREATE))
294 return ENOENT; 294 return ENOENT;
295 xfs_iocore_inode_reinit(ip); 295 xfs_iocore_inode_reinit(ip);
296 } 296 }
297 297
298 if (lock_flags != 0) 298 if (lock_flags != 0)
299 xfs_ilock(ip, lock_flags); 299 xfs_ilock(ip, lock_flags);
300 300
301 ip->i_flags &= ~XFS_ISTALE; 301 ip->i_flags &= ~XFS_ISTALE;
302 302
303 vn_trace_exit(vp, "xfs_iget.found", 303 vn_trace_exit(vp, "xfs_iget.found",
304 (inst_t *)__return_address); 304 (inst_t *)__return_address);
305 goto return_ip; 305 goto return_ip;
306 } 306 }
307 } 307 }
308 308
309 /* 309 /*
310 * Inode cache miss: save the hash chain version stamp and unlock 310 * Inode cache miss: save the hash chain version stamp and unlock
311 * the chain, so we don't deadlock in vn_alloc. 311 * the chain, so we don't deadlock in vn_alloc.
312 */ 312 */
313 XFS_STATS_INC(xs_ig_missed); 313 XFS_STATS_INC(xs_ig_missed);
314 314
315 version = ih->ih_version; 315 version = ih->ih_version;
316 316
317 read_unlock(&ih->ih_lock); 317 read_unlock(&ih->ih_lock);
318 318
319 /* 319 /*
320 * Read the disk inode attributes into a new inode structure and get 320 * Read the disk inode attributes into a new inode structure and get
321 * a new vnode for it. This should also initialize i_ino and i_mount. 321 * a new vnode for it. This should also initialize i_ino and i_mount.
322 */ 322 */
323 error = xfs_iread(mp, tp, ino, &ip, bno); 323 error = xfs_iread(mp, tp, ino, &ip, bno,
324 if (error) { 324 (flags & XFS_IGET_BULKSTAT) ? XFS_IMAP_BULKSTAT : 0);
325 if (error)
325 return error; 326 return error;
326 }
327 327
328 vn_trace_exit(vp, "xfs_iget.alloc", (inst_t *)__return_address); 328 vn_trace_exit(vp, "xfs_iget.alloc", (inst_t *)__return_address);
329 329
330 xfs_inode_lock_init(ip, vp); 330 xfs_inode_lock_init(ip, vp);
331 xfs_iocore_inode_init(ip); 331 xfs_iocore_inode_init(ip);
332 332
333 if (lock_flags != 0) { 333 if (lock_flags)
334 xfs_ilock(ip, lock_flags); 334 xfs_ilock(ip, lock_flags);
335 } 335
336 336 if ((ip->i_d.di_mode == 0) && !(flags & XFS_IGET_CREATE)) {
337 if ((ip->i_d.di_mode == 0) && !(flags & IGET_CREATE)) {
338 xfs_idestroy(ip); 337 xfs_idestroy(ip);
339 return ENOENT; 338 return ENOENT;
340 } 339 }
341 340
342 /* 341 /*
343 * Put ip on its hash chain, unless someone else hashed a duplicate 342 * Put ip on its hash chain, unless someone else hashed a duplicate
344 * after we released the hash lock. 343 * after we released the hash lock.
345 */ 344 */
346 write_lock(&ih->ih_lock); 345 write_lock(&ih->ih_lock);
347 346
348 if (ih->ih_version != version) { 347 if (ih->ih_version != version) {
349 for (iq = ih->ih_next; iq != NULL; iq = iq->i_next) { 348 for (iq = ih->ih_next; iq != NULL; iq = iq->i_next) {
350 if (iq->i_ino == ino) { 349 if (iq->i_ino == ino) {
351 write_unlock(&ih->ih_lock); 350 write_unlock(&ih->ih_lock);
352 xfs_idestroy(ip); 351 xfs_idestroy(ip);
353 352
354 XFS_STATS_INC(xs_ig_dup); 353 XFS_STATS_INC(xs_ig_dup);
355 goto again; 354 goto again;
356 } 355 }
357 } 356 }
358 } 357 }
359 358
360 /* 359 /*
361 * These values _must_ be set before releasing ihlock! 360 * These values _must_ be set before releasing ihlock!
362 */ 361 */
363 ip->i_hash = ih; 362 ip->i_hash = ih;
364 if ((iq = ih->ih_next)) { 363 if ((iq = ih->ih_next)) {
365 iq->i_prevp = &ip->i_next; 364 iq->i_prevp = &ip->i_next;
366 } 365 }
367 ip->i_next = iq; 366 ip->i_next = iq;
368 ip->i_prevp = &ih->ih_next; 367 ip->i_prevp = &ih->ih_next;
369 ih->ih_next = ip; 368 ih->ih_next = ip;
370 ip->i_udquot = ip->i_gdquot = NULL; 369 ip->i_udquot = ip->i_gdquot = NULL;
371 ih->ih_version++; 370 ih->ih_version++;
372 ip->i_flags |= XFS_INEW; 371 ip->i_flags |= XFS_INEW;
373 372
374 write_unlock(&ih->ih_lock); 373 write_unlock(&ih->ih_lock);
375 374
376 /* 375 /*
377 * put ip on its cluster's hash chain 376 * put ip on its cluster's hash chain
378 */ 377 */
379 ASSERT(ip->i_chash == NULL && ip->i_cprev == NULL && 378 ASSERT(ip->i_chash == NULL && ip->i_cprev == NULL &&
380 ip->i_cnext == NULL); 379 ip->i_cnext == NULL);
381 380
382 chlnew = NULL; 381 chlnew = NULL;
383 ch = XFS_CHASH(mp, ip->i_blkno); 382 ch = XFS_CHASH(mp, ip->i_blkno);
384 chlredo: 383 chlredo:
385 s = mutex_spinlock(&ch->ch_lock); 384 s = mutex_spinlock(&ch->ch_lock);
386 for (chl = ch->ch_list; chl != NULL; chl = chl->chl_next) { 385 for (chl = ch->ch_list; chl != NULL; chl = chl->chl_next) {
387 if (chl->chl_blkno == ip->i_blkno) { 386 if (chl->chl_blkno == ip->i_blkno) {
388 387
389 /* insert this inode into the doubly-linked list 388 /* insert this inode into the doubly-linked list
390 * where chl points */ 389 * where chl points */
391 if ((iq = chl->chl_ip)) { 390 if ((iq = chl->chl_ip)) {
392 ip->i_cprev = iq->i_cprev; 391 ip->i_cprev = iq->i_cprev;
393 iq->i_cprev->i_cnext = ip; 392 iq->i_cprev->i_cnext = ip;
394 iq->i_cprev = ip; 393 iq->i_cprev = ip;
395 ip->i_cnext = iq; 394 ip->i_cnext = iq;
396 } else { 395 } else {
397 ip->i_cnext = ip; 396 ip->i_cnext = ip;
398 ip->i_cprev = ip; 397 ip->i_cprev = ip;
399 } 398 }
400 chl->chl_ip = ip; 399 chl->chl_ip = ip;
401 ip->i_chash = chl; 400 ip->i_chash = chl;
402 break; 401 break;
403 } 402 }
404 } 403 }
405 404
406 /* no hash list found for this block; add a new hash list */ 405 /* no hash list found for this block; add a new hash list */
407 if (chl == NULL) { 406 if (chl == NULL) {
408 if (chlnew == NULL) { 407 if (chlnew == NULL) {
409 mutex_spinunlock(&ch->ch_lock, s); 408 mutex_spinunlock(&ch->ch_lock, s);
410 ASSERT(xfs_chashlist_zone != NULL); 409 ASSERT(xfs_chashlist_zone != NULL);
411 chlnew = (xfs_chashlist_t *) 410 chlnew = (xfs_chashlist_t *)
412 kmem_zone_alloc(xfs_chashlist_zone, 411 kmem_zone_alloc(xfs_chashlist_zone,
413 KM_SLEEP); 412 KM_SLEEP);
414 ASSERT(chlnew != NULL); 413 ASSERT(chlnew != NULL);
415 goto chlredo; 414 goto chlredo;
416 } else { 415 } else {
417 ip->i_cnext = ip; 416 ip->i_cnext = ip;
418 ip->i_cprev = ip; 417 ip->i_cprev = ip;
419 ip->i_chash = chlnew; 418 ip->i_chash = chlnew;
420 chlnew->chl_ip = ip; 419 chlnew->chl_ip = ip;
421 chlnew->chl_blkno = ip->i_blkno; 420 chlnew->chl_blkno = ip->i_blkno;
422 if (ch->ch_list) 421 if (ch->ch_list)
423 ch->ch_list->chl_prev = chlnew; 422 ch->ch_list->chl_prev = chlnew;
424 chlnew->chl_next = ch->ch_list; 423 chlnew->chl_next = ch->ch_list;
425 chlnew->chl_prev = NULL; 424 chlnew->chl_prev = NULL;
426 ch->ch_list = chlnew; 425 ch->ch_list = chlnew;
427 chlnew = NULL; 426 chlnew = NULL;
428 } 427 }
429 } else { 428 } else {
430 if (chlnew != NULL) { 429 if (chlnew != NULL) {
431 kmem_zone_free(xfs_chashlist_zone, chlnew); 430 kmem_zone_free(xfs_chashlist_zone, chlnew);
432 } 431 }
433 } 432 }
434 433
435 mutex_spinunlock(&ch->ch_lock, s); 434 mutex_spinunlock(&ch->ch_lock, s);
436 435
437 436
438 /* 437 /*
439 * Link ip to its mount and thread it on the mount's inode list. 438 * Link ip to its mount and thread it on the mount's inode list.
440 */ 439 */
441 XFS_MOUNT_ILOCK(mp); 440 XFS_MOUNT_ILOCK(mp);
442 if ((iq = mp->m_inodes)) { 441 if ((iq = mp->m_inodes)) {
443 ASSERT(iq->i_mprev->i_mnext == iq); 442 ASSERT(iq->i_mprev->i_mnext == iq);
444 ip->i_mprev = iq->i_mprev; 443 ip->i_mprev = iq->i_mprev;
445 iq->i_mprev->i_mnext = ip; 444 iq->i_mprev->i_mnext = ip;
446 iq->i_mprev = ip; 445 iq->i_mprev = ip;
447 ip->i_mnext = iq; 446 ip->i_mnext = iq;
448 } else { 447 } else {
449 ip->i_mnext = ip; 448 ip->i_mnext = ip;
450 ip->i_mprev = ip; 449 ip->i_mprev = ip;
451 } 450 }
452 mp->m_inodes = ip; 451 mp->m_inodes = ip;
453 452
454 XFS_MOUNT_IUNLOCK(mp); 453 XFS_MOUNT_IUNLOCK(mp);
455 454
456 return_ip: 455 return_ip:
457 ASSERT(ip->i_df.if_ext_max == 456 ASSERT(ip->i_df.if_ext_max ==
458 XFS_IFORK_DSIZE(ip) / sizeof(xfs_bmbt_rec_t)); 457 XFS_IFORK_DSIZE(ip) / sizeof(xfs_bmbt_rec_t));
459 458
460 ASSERT(((ip->i_d.di_flags & XFS_DIFLAG_REALTIME) != 0) == 459 ASSERT(((ip->i_d.di_flags & XFS_DIFLAG_REALTIME) != 0) ==
461 ((ip->i_iocore.io_flags & XFS_IOCORE_RT) != 0)); 460 ((ip->i_iocore.io_flags & XFS_IOCORE_RT) != 0));
462 461
463 *ipp = ip; 462 *ipp = ip;
464 463
465 /* 464 /*
466 * If we have a real type for an on-disk inode, we can set ops(&unlock) 465 * If we have a real type for an on-disk inode, we can set ops(&unlock)
467 * now. If it's a new inode being created, xfs_ialloc will handle it. 466 * now. If it's a new inode being created, xfs_ialloc will handle it.
468 */ 467 */
469 bhv_vfs_init_vnode(XFS_MTOVFS(mp), vp, XFS_ITOBHV(ip), 1); 468 bhv_vfs_init_vnode(XFS_MTOVFS(mp), vp, XFS_ITOBHV(ip), 1);
470 469
471 return 0; 470 return 0;
472 } 471 }
473 472
474 473
475 /* 474 /*
476 * The 'normal' internal xfs_iget, if needed it will 475 * The 'normal' internal xfs_iget, if needed it will
477 * 'allocate', or 'get', the vnode. 476 * 'allocate', or 'get', the vnode.
478 */ 477 */
479 int 478 int
480 xfs_iget( 479 xfs_iget(
481 xfs_mount_t *mp, 480 xfs_mount_t *mp,
482 xfs_trans_t *tp, 481 xfs_trans_t *tp,
483 xfs_ino_t ino, 482 xfs_ino_t ino,
484 uint flags, 483 uint flags,
485 uint lock_flags, 484 uint lock_flags,
486 xfs_inode_t **ipp, 485 xfs_inode_t **ipp,
487 xfs_daddr_t bno) 486 xfs_daddr_t bno)
488 { 487 {
489 struct inode *inode; 488 struct inode *inode;
490 bhv_vnode_t *vp = NULL; 489 bhv_vnode_t *vp = NULL;
491 int error; 490 int error;
492 491
493 XFS_STATS_INC(xs_ig_attempts); 492 XFS_STATS_INC(xs_ig_attempts);
494 493
495 retry: 494 retry:
496 if ((inode = iget_locked(XFS_MTOVFS(mp)->vfs_super, ino))) { 495 if ((inode = iget_locked(XFS_MTOVFS(mp)->vfs_super, ino))) {
497 xfs_inode_t *ip; 496 xfs_inode_t *ip;
498 497
499 vp = vn_from_inode(inode); 498 vp = vn_from_inode(inode);
500 if (inode->i_state & I_NEW) { 499 if (inode->i_state & I_NEW) {
501 vn_initialize(inode); 500 vn_initialize(inode);
502 error = xfs_iget_core(vp, mp, tp, ino, flags, 501 error = xfs_iget_core(vp, mp, tp, ino, flags,
503 lock_flags, ipp, bno); 502 lock_flags, ipp, bno);
504 if (error) { 503 if (error) {
505 vn_mark_bad(vp); 504 vn_mark_bad(vp);
506 if (inode->i_state & I_NEW) 505 if (inode->i_state & I_NEW)
507 unlock_new_inode(inode); 506 unlock_new_inode(inode);
508 iput(inode); 507 iput(inode);
509 } 508 }
510 } else { 509 } else {
511 /* 510 /*
512 * If the inode is not fully constructed due to 511 * If the inode is not fully constructed due to
513 * filehandle mismatches wait for the inode to go 512 * filehandle mismatches wait for the inode to go
514 * away and try again. 513 * away and try again.
515 * 514 *
516 * iget_locked will call __wait_on_freeing_inode 515 * iget_locked will call __wait_on_freeing_inode
517 * to wait for the inode to go away. 516 * to wait for the inode to go away.
518 */ 517 */
519 if (is_bad_inode(inode) || 518 if (is_bad_inode(inode) ||
520 ((ip = xfs_vtoi(vp)) == NULL)) { 519 ((ip = xfs_vtoi(vp)) == NULL)) {
521 iput(inode); 520 iput(inode);
522 delay(1); 521 delay(1);
523 goto retry; 522 goto retry;
524 } 523 }
525 524
526 if (lock_flags != 0) 525 if (lock_flags != 0)
527 xfs_ilock(ip, lock_flags); 526 xfs_ilock(ip, lock_flags);
528 XFS_STATS_INC(xs_ig_found); 527 XFS_STATS_INC(xs_ig_found);
529 *ipp = ip; 528 *ipp = ip;
530 error = 0; 529 error = 0;
531 } 530 }
532 } else 531 } else
533 error = ENOMEM; /* If we got no inode we are out of memory */ 532 error = ENOMEM; /* If we got no inode we are out of memory */
534 533
535 return error; 534 return error;
536 } 535 }
537 536
538 /* 537 /*
539 * Do the setup for the various locks within the incore inode. 538 * Do the setup for the various locks within the incore inode.
540 */ 539 */
541 void 540 void
542 xfs_inode_lock_init( 541 xfs_inode_lock_init(
543 xfs_inode_t *ip, 542 xfs_inode_t *ip,
544 bhv_vnode_t *vp) 543 bhv_vnode_t *vp)
545 { 544 {
546 mrlock_init(&ip->i_lock, MRLOCK_ALLOW_EQUAL_PRI|MRLOCK_BARRIER, 545 mrlock_init(&ip->i_lock, MRLOCK_ALLOW_EQUAL_PRI|MRLOCK_BARRIER,
547 "xfsino", (long)vp->v_number); 546 "xfsino", (long)vp->v_number);
548 mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", vp->v_number); 547 mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", vp->v_number);
549 init_waitqueue_head(&ip->i_ipin_wait); 548 init_waitqueue_head(&ip->i_ipin_wait);
550 atomic_set(&ip->i_pincount, 0); 549 atomic_set(&ip->i_pincount, 0);
551 init_sema(&ip->i_flock, 1, "xfsfino", vp->v_number); 550 init_sema(&ip->i_flock, 1, "xfsfino", vp->v_number);
552 } 551 }
553 552
554 /* 553 /*
555 * Look for the inode corresponding to the given ino in the hash table. 554 * Look for the inode corresponding to the given ino in the hash table.
556 * If it is there and its i_transp pointer matches tp, return it. 555 * If it is there and its i_transp pointer matches tp, return it.
557 * Otherwise, return NULL. 556 * Otherwise, return NULL.
558 */ 557 */
559 xfs_inode_t * 558 xfs_inode_t *
560 xfs_inode_incore(xfs_mount_t *mp, 559 xfs_inode_incore(xfs_mount_t *mp,
561 xfs_ino_t ino, 560 xfs_ino_t ino,
562 xfs_trans_t *tp) 561 xfs_trans_t *tp)
563 { 562 {
564 xfs_ihash_t *ih; 563 xfs_ihash_t *ih;
565 xfs_inode_t *ip; 564 xfs_inode_t *ip;
566 ulong version; 565 ulong version;
567 566
568 ih = XFS_IHASH(mp, ino); 567 ih = XFS_IHASH(mp, ino);
569 read_lock(&ih->ih_lock); 568 read_lock(&ih->ih_lock);
570 for (ip = ih->ih_next; ip != NULL; ip = ip->i_next) { 569 for (ip = ih->ih_next; ip != NULL; ip = ip->i_next) {
571 if (ip->i_ino == ino) { 570 if (ip->i_ino == ino) {
572 /* 571 /*
573 * If we find it and tp matches, return it. 572 * If we find it and tp matches, return it.
574 * Also move it to the front of the hash list 573 * Also move it to the front of the hash list
575 * if we find it and it is not already there. 574 * if we find it and it is not already there.
576 * Otherwise break from the loop and return 575 * Otherwise break from the loop and return
577 * NULL. 576 * NULL.
578 */ 577 */
579 if (ip->i_transp == tp) { 578 if (ip->i_transp == tp) {
580 version = ih->ih_version; 579 version = ih->ih_version;
581 read_unlock(&ih->ih_lock); 580 read_unlock(&ih->ih_lock);
582 xfs_ihash_promote(ih, ip, version); 581 xfs_ihash_promote(ih, ip, version);
583 return (ip); 582 return (ip);
584 } 583 }
585 break; 584 break;
586 } 585 }
587 } 586 }
588 read_unlock(&ih->ih_lock); 587 read_unlock(&ih->ih_lock);
589 return (NULL); 588 return (NULL);
590 } 589 }
591 590
592 /* 591 /*
593 * Decrement reference count of an inode structure and unlock it. 592 * Decrement reference count of an inode structure and unlock it.
594 * 593 *
595 * ip -- the inode being released 594 * ip -- the inode being released
596 * lock_flags -- this parameter indicates the inode's locks to be 595 * lock_flags -- this parameter indicates the inode's locks to be
597 * to be released. See the comment on xfs_iunlock() for a list 596 * to be released. See the comment on xfs_iunlock() for a list
598 * of valid values. 597 * of valid values.
599 */ 598 */
600 void 599 void
601 xfs_iput(xfs_inode_t *ip, 600 xfs_iput(xfs_inode_t *ip,
602 uint lock_flags) 601 uint lock_flags)
603 { 602 {
604 bhv_vnode_t *vp = XFS_ITOV(ip); 603 bhv_vnode_t *vp = XFS_ITOV(ip);
605 604
606 vn_trace_entry(vp, "xfs_iput", (inst_t *)__return_address); 605 vn_trace_entry(vp, "xfs_iput", (inst_t *)__return_address);
607 xfs_iunlock(ip, lock_flags); 606 xfs_iunlock(ip, lock_flags);
608 VN_RELE(vp); 607 VN_RELE(vp);
609 } 608 }
610 609
611 /* 610 /*
612 * Special iput for brand-new inodes that are still locked 611 * Special iput for brand-new inodes that are still locked
613 */ 612 */
614 void 613 void
615 xfs_iput_new(xfs_inode_t *ip, 614 xfs_iput_new(xfs_inode_t *ip,
616 uint lock_flags) 615 uint lock_flags)
617 { 616 {
618 bhv_vnode_t *vp = XFS_ITOV(ip); 617 bhv_vnode_t *vp = XFS_ITOV(ip);
619 struct inode *inode = vn_to_inode(vp); 618 struct inode *inode = vn_to_inode(vp);
620 619
621 vn_trace_entry(vp, "xfs_iput_new", (inst_t *)__return_address); 620 vn_trace_entry(vp, "xfs_iput_new", (inst_t *)__return_address);
622 621
623 if ((ip->i_d.di_mode == 0)) { 622 if ((ip->i_d.di_mode == 0)) {
624 ASSERT(!(ip->i_flags & XFS_IRECLAIMABLE)); 623 ASSERT(!(ip->i_flags & XFS_IRECLAIMABLE));
625 vn_mark_bad(vp); 624 vn_mark_bad(vp);
626 } 625 }
627 if (inode->i_state & I_NEW) 626 if (inode->i_state & I_NEW)
628 unlock_new_inode(inode); 627 unlock_new_inode(inode);
629 if (lock_flags) 628 if (lock_flags)
630 xfs_iunlock(ip, lock_flags); 629 xfs_iunlock(ip, lock_flags);
631 VN_RELE(vp); 630 VN_RELE(vp);
632 } 631 }
633 632
634 633
635 /* 634 /*
636 * This routine embodies the part of the reclaim code that pulls 635 * This routine embodies the part of the reclaim code that pulls
637 * the inode from the inode hash table and the mount structure's 636 * the inode from the inode hash table and the mount structure's
638 * inode list. 637 * inode list.
639 * This should only be called from xfs_reclaim(). 638 * This should only be called from xfs_reclaim().
640 */ 639 */
641 void 640 void
642 xfs_ireclaim(xfs_inode_t *ip) 641 xfs_ireclaim(xfs_inode_t *ip)
643 { 642 {
644 bhv_vnode_t *vp; 643 bhv_vnode_t *vp;
645 644
646 /* 645 /*
647 * Remove from old hash list and mount list. 646 * Remove from old hash list and mount list.
648 */ 647 */
649 XFS_STATS_INC(xs_ig_reclaims); 648 XFS_STATS_INC(xs_ig_reclaims);
650 649
651 xfs_iextract(ip); 650 xfs_iextract(ip);
652 651
653 /* 652 /*
654 * Here we do a spurious inode lock in order to coordinate with 653 * Here we do a spurious inode lock in order to coordinate with
655 * xfs_sync(). This is because xfs_sync() references the inodes 654 * xfs_sync(). This is because xfs_sync() references the inodes
656 * in the mount list without taking references on the corresponding 655 * in the mount list without taking references on the corresponding
657 * vnodes. We make that OK here by ensuring that we wait until 656 * vnodes. We make that OK here by ensuring that we wait until
658 * the inode is unlocked in xfs_sync() before we go ahead and 657 * the inode is unlocked in xfs_sync() before we go ahead and
659 * free it. We get both the regular lock and the io lock because 658 * free it. We get both the regular lock and the io lock because
660 * the xfs_sync() code may need to drop the regular one but will 659 * the xfs_sync() code may need to drop the regular one but will
661 * still hold the io lock. 660 * still hold the io lock.
662 */ 661 */
663 xfs_ilock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); 662 xfs_ilock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
664 663
665 /* 664 /*
666 * Release dquots (and their references) if any. An inode may escape 665 * Release dquots (and their references) if any. An inode may escape
667 * xfs_inactive and get here via vn_alloc->vn_reclaim path. 666 * xfs_inactive and get here via vn_alloc->vn_reclaim path.
668 */ 667 */
669 XFS_QM_DQDETACH(ip->i_mount, ip); 668 XFS_QM_DQDETACH(ip->i_mount, ip);
670 669
671 /* 670 /*
672 * Pull our behavior descriptor from the vnode chain. 671 * Pull our behavior descriptor from the vnode chain.
673 */ 672 */
674 vp = XFS_ITOV_NULL(ip); 673 vp = XFS_ITOV_NULL(ip);
675 if (vp) { 674 if (vp) {
676 vn_bhv_remove(VN_BHV_HEAD(vp), XFS_ITOBHV(ip)); 675 vn_bhv_remove(VN_BHV_HEAD(vp), XFS_ITOBHV(ip));
677 } 676 }
678 677
679 /* 678 /*
680 * Free all memory associated with the inode. 679 * Free all memory associated with the inode.
681 */ 680 */
682 xfs_idestroy(ip); 681 xfs_idestroy(ip);
683 } 682 }
684 683
685 /* 684 /*
686 * This routine removes an about-to-be-destroyed inode from 685 * This routine removes an about-to-be-destroyed inode from
687 * all of the lists in which it is located with the exception 686 * all of the lists in which it is located with the exception
688 * of the behavior chain. 687 * of the behavior chain.
689 */ 688 */
690 void 689 void
691 xfs_iextract( 690 xfs_iextract(
692 xfs_inode_t *ip) 691 xfs_inode_t *ip)
693 { 692 {
694 xfs_ihash_t *ih; 693 xfs_ihash_t *ih;
695 xfs_inode_t *iq; 694 xfs_inode_t *iq;
696 xfs_mount_t *mp; 695 xfs_mount_t *mp;
697 xfs_chash_t *ch; 696 xfs_chash_t *ch;
698 xfs_chashlist_t *chl, *chm; 697 xfs_chashlist_t *chl, *chm;
699 SPLDECL(s); 698 SPLDECL(s);
700 699
701 ih = ip->i_hash; 700 ih = ip->i_hash;
702 write_lock(&ih->ih_lock); 701 write_lock(&ih->ih_lock);
703 if ((iq = ip->i_next)) { 702 if ((iq = ip->i_next)) {
704 iq->i_prevp = ip->i_prevp; 703 iq->i_prevp = ip->i_prevp;
705 } 704 }
706 *ip->i_prevp = iq; 705 *ip->i_prevp = iq;
707 ih->ih_version++; 706 ih->ih_version++;
708 write_unlock(&ih->ih_lock); 707 write_unlock(&ih->ih_lock);
709 708
710 /* 709 /*
711 * Remove from cluster hash list 710 * Remove from cluster hash list
712 * 1) delete the chashlist if this is the last inode on the chashlist 711 * 1) delete the chashlist if this is the last inode on the chashlist
713 * 2) unchain from list of inodes 712 * 2) unchain from list of inodes
714 * 3) point chashlist->chl_ip to 'chl_next' if to this inode. 713 * 3) point chashlist->chl_ip to 'chl_next' if to this inode.
715 */ 714 */
716 mp = ip->i_mount; 715 mp = ip->i_mount;
717 ch = XFS_CHASH(mp, ip->i_blkno); 716 ch = XFS_CHASH(mp, ip->i_blkno);
718 s = mutex_spinlock(&ch->ch_lock); 717 s = mutex_spinlock(&ch->ch_lock);
719 718
720 if (ip->i_cnext == ip) { 719 if (ip->i_cnext == ip) {
721 /* Last inode on chashlist */ 720 /* Last inode on chashlist */
722 ASSERT(ip->i_cnext == ip && ip->i_cprev == ip); 721 ASSERT(ip->i_cnext == ip && ip->i_cprev == ip);
723 ASSERT(ip->i_chash != NULL); 722 ASSERT(ip->i_chash != NULL);
724 chm=NULL; 723 chm=NULL;
725 chl = ip->i_chash; 724 chl = ip->i_chash;
726 if (chl->chl_prev) 725 if (chl->chl_prev)
727 chl->chl_prev->chl_next = chl->chl_next; 726 chl->chl_prev->chl_next = chl->chl_next;
728 else 727 else
729 ch->ch_list = chl->chl_next; 728 ch->ch_list = chl->chl_next;
730 if (chl->chl_next) 729 if (chl->chl_next)
731 chl->chl_next->chl_prev = chl->chl_prev; 730 chl->chl_next->chl_prev = chl->chl_prev;
732 kmem_zone_free(xfs_chashlist_zone, chl); 731 kmem_zone_free(xfs_chashlist_zone, chl);
733 } else { 732 } else {
734 /* delete one inode from a non-empty list */ 733 /* delete one inode from a non-empty list */
735 iq = ip->i_cnext; 734 iq = ip->i_cnext;
736 iq->i_cprev = ip->i_cprev; 735 iq->i_cprev = ip->i_cprev;
737 ip->i_cprev->i_cnext = iq; 736 ip->i_cprev->i_cnext = iq;
738 if (ip->i_chash->chl_ip == ip) { 737 if (ip->i_chash->chl_ip == ip) {
739 ip->i_chash->chl_ip = iq; 738 ip->i_chash->chl_ip = iq;
740 } 739 }
741 ip->i_chash = __return_address; 740 ip->i_chash = __return_address;
742 ip->i_cprev = __return_address; 741 ip->i_cprev = __return_address;
743 ip->i_cnext = __return_address; 742 ip->i_cnext = __return_address;
744 } 743 }
745 mutex_spinunlock(&ch->ch_lock, s); 744 mutex_spinunlock(&ch->ch_lock, s);
746 745
747 /* 746 /*
748 * Remove from mount's inode list. 747 * Remove from mount's inode list.
749 */ 748 */
750 XFS_MOUNT_ILOCK(mp); 749 XFS_MOUNT_ILOCK(mp);
751 ASSERT((ip->i_mnext != NULL) && (ip->i_mprev != NULL)); 750 ASSERT((ip->i_mnext != NULL) && (ip->i_mprev != NULL));
752 iq = ip->i_mnext; 751 iq = ip->i_mnext;
753 iq->i_mprev = ip->i_mprev; 752 iq->i_mprev = ip->i_mprev;
754 ip->i_mprev->i_mnext = iq; 753 ip->i_mprev->i_mnext = iq;
755 754
756 /* 755 /*
757 * Fix up the head pointer if it points to the inode being deleted. 756 * Fix up the head pointer if it points to the inode being deleted.
758 */ 757 */
759 if (mp->m_inodes == ip) { 758 if (mp->m_inodes == ip) {
760 if (ip == iq) { 759 if (ip == iq) {
761 mp->m_inodes = NULL; 760 mp->m_inodes = NULL;
762 } else { 761 } else {
763 mp->m_inodes = iq; 762 mp->m_inodes = iq;
764 } 763 }
765 } 764 }
766 765
767 /* Deal with the deleted inodes list */ 766 /* Deal with the deleted inodes list */
768 list_del_init(&ip->i_reclaim); 767 list_del_init(&ip->i_reclaim);
769 768
770 mp->m_ireclaims++; 769 mp->m_ireclaims++;
771 XFS_MOUNT_IUNLOCK(mp); 770 XFS_MOUNT_IUNLOCK(mp);
772 } 771 }
773 772
774 /* 773 /*
775 * This is a wrapper routine around the xfs_ilock() routine 774 * This is a wrapper routine around the xfs_ilock() routine
776 * used to centralize some grungy code. It is used in places 775 * used to centralize some grungy code. It is used in places
777 * that wish to lock the inode solely for reading the extents. 776 * that wish to lock the inode solely for reading the extents.
778 * The reason these places can't just call xfs_ilock(SHARED) 777 * The reason these places can't just call xfs_ilock(SHARED)
779 * is that the inode lock also guards to bringing in of the 778 * is that the inode lock also guards to bringing in of the
780 * extents from disk for a file in b-tree format. If the inode 779 * extents from disk for a file in b-tree format. If the inode
781 * is in b-tree format, then we need to lock the inode exclusively 780 * is in b-tree format, then we need to lock the inode exclusively
782 * until the extents are read in. Locking it exclusively all 781 * until the extents are read in. Locking it exclusively all
783 * the time would limit our parallelism unnecessarily, though. 782 * the time would limit our parallelism unnecessarily, though.
784 * What we do instead is check to see if the extents have been 783 * What we do instead is check to see if the extents have been
785 * read in yet, and only lock the inode exclusively if they 784 * read in yet, and only lock the inode exclusively if they
786 * have not. 785 * have not.
787 * 786 *
788 * The function returns a value which should be given to the 787 * The function returns a value which should be given to the
789 * corresponding xfs_iunlock_map_shared(). This value is 788 * corresponding xfs_iunlock_map_shared(). This value is
790 * the mode in which the lock was actually taken. 789 * the mode in which the lock was actually taken.
791 */ 790 */
792 uint 791 uint
793 xfs_ilock_map_shared( 792 xfs_ilock_map_shared(
794 xfs_inode_t *ip) 793 xfs_inode_t *ip)
795 { 794 {
796 uint lock_mode; 795 uint lock_mode;
797 796
798 if ((ip->i_d.di_format == XFS_DINODE_FMT_BTREE) && 797 if ((ip->i_d.di_format == XFS_DINODE_FMT_BTREE) &&
799 ((ip->i_df.if_flags & XFS_IFEXTENTS) == 0)) { 798 ((ip->i_df.if_flags & XFS_IFEXTENTS) == 0)) {
800 lock_mode = XFS_ILOCK_EXCL; 799 lock_mode = XFS_ILOCK_EXCL;
801 } else { 800 } else {
802 lock_mode = XFS_ILOCK_SHARED; 801 lock_mode = XFS_ILOCK_SHARED;
803 } 802 }
804 803
805 xfs_ilock(ip, lock_mode); 804 xfs_ilock(ip, lock_mode);
806 805
807 return lock_mode; 806 return lock_mode;
808 } 807 }
809 808
810 /* 809 /*
811 * This is simply the unlock routine to go with xfs_ilock_map_shared(). 810 * This is simply the unlock routine to go with xfs_ilock_map_shared().
812 * All it does is call xfs_iunlock() with the given lock_mode. 811 * All it does is call xfs_iunlock() with the given lock_mode.
813 */ 812 */
814 void 813 void
815 xfs_iunlock_map_shared( 814 xfs_iunlock_map_shared(
816 xfs_inode_t *ip, 815 xfs_inode_t *ip,
817 unsigned int lock_mode) 816 unsigned int lock_mode)
818 { 817 {
819 xfs_iunlock(ip, lock_mode); 818 xfs_iunlock(ip, lock_mode);
820 } 819 }
821 820
822 /* 821 /*
823 * The xfs inode contains 2 locks: a multi-reader lock called the 822 * The xfs inode contains 2 locks: a multi-reader lock called the
824 * i_iolock and a multi-reader lock called the i_lock. This routine 823 * i_iolock and a multi-reader lock called the i_lock. This routine
825 * allows either or both of the locks to be obtained. 824 * allows either or both of the locks to be obtained.
826 * 825 *
827 * The 2 locks should always be ordered so that the IO lock is 826 * The 2 locks should always be ordered so that the IO lock is
828 * obtained first in order to prevent deadlock. 827 * obtained first in order to prevent deadlock.
829 * 828 *
830 * ip -- the inode being locked 829 * ip -- the inode being locked
831 * lock_flags -- this parameter indicates the inode's locks 830 * lock_flags -- this parameter indicates the inode's locks
832 * to be locked. It can be: 831 * to be locked. It can be:
833 * XFS_IOLOCK_SHARED, 832 * XFS_IOLOCK_SHARED,
834 * XFS_IOLOCK_EXCL, 833 * XFS_IOLOCK_EXCL,
835 * XFS_ILOCK_SHARED, 834 * XFS_ILOCK_SHARED,
836 * XFS_ILOCK_EXCL, 835 * XFS_ILOCK_EXCL,
837 * XFS_IOLOCK_SHARED | XFS_ILOCK_SHARED, 836 * XFS_IOLOCK_SHARED | XFS_ILOCK_SHARED,
838 * XFS_IOLOCK_SHARED | XFS_ILOCK_EXCL, 837 * XFS_IOLOCK_SHARED | XFS_ILOCK_EXCL,
839 * XFS_IOLOCK_EXCL | XFS_ILOCK_SHARED, 838 * XFS_IOLOCK_EXCL | XFS_ILOCK_SHARED,
840 * XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL 839 * XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL
841 */ 840 */
842 void 841 void
843 xfs_ilock(xfs_inode_t *ip, 842 xfs_ilock(xfs_inode_t *ip,
844 uint lock_flags) 843 uint lock_flags)
845 { 844 {
846 /* 845 /*
847 * You can't set both SHARED and EXCL for the same lock, 846 * You can't set both SHARED and EXCL for the same lock,
848 * and only XFS_IOLOCK_SHARED, XFS_IOLOCK_EXCL, XFS_ILOCK_SHARED, 847 * and only XFS_IOLOCK_SHARED, XFS_IOLOCK_EXCL, XFS_ILOCK_SHARED,
849 * and XFS_ILOCK_EXCL are valid values to set in lock_flags. 848 * and XFS_ILOCK_EXCL are valid values to set in lock_flags.
850 */ 849 */
851 ASSERT((lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) != 850 ASSERT((lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) !=
852 (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)); 851 (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL));
853 ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) != 852 ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) !=
854 (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)); 853 (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL));
855 ASSERT((lock_flags & ~XFS_LOCK_MASK) == 0); 854 ASSERT((lock_flags & ~XFS_LOCK_MASK) == 0);
856 855
857 if (lock_flags & XFS_IOLOCK_EXCL) { 856 if (lock_flags & XFS_IOLOCK_EXCL) {
858 mrupdate(&ip->i_iolock); 857 mrupdate(&ip->i_iolock);
859 } else if (lock_flags & XFS_IOLOCK_SHARED) { 858 } else if (lock_flags & XFS_IOLOCK_SHARED) {
860 mraccess(&ip->i_iolock); 859 mraccess(&ip->i_iolock);
861 } 860 }
862 if (lock_flags & XFS_ILOCK_EXCL) { 861 if (lock_flags & XFS_ILOCK_EXCL) {
863 mrupdate(&ip->i_lock); 862 mrupdate(&ip->i_lock);
864 } else if (lock_flags & XFS_ILOCK_SHARED) { 863 } else if (lock_flags & XFS_ILOCK_SHARED) {
865 mraccess(&ip->i_lock); 864 mraccess(&ip->i_lock);
866 } 865 }
867 xfs_ilock_trace(ip, 1, lock_flags, (inst_t *)__return_address); 866 xfs_ilock_trace(ip, 1, lock_flags, (inst_t *)__return_address);
868 } 867 }
869 868
870 /* 869 /*
871 * This is just like xfs_ilock(), except that the caller 870 * This is just like xfs_ilock(), except that the caller
872 * is guaranteed not to sleep. It returns 1 if it gets 871 * is guaranteed not to sleep. It returns 1 if it gets
873 * the requested locks and 0 otherwise. If the IO lock is 872 * the requested locks and 0 otherwise. If the IO lock is
874 * obtained but the inode lock cannot be, then the IO lock 873 * obtained but the inode lock cannot be, then the IO lock
875 * is dropped before returning. 874 * is dropped before returning.
876 * 875 *
877 * ip -- the inode being locked 876 * ip -- the inode being locked
878 * lock_flags -- this parameter indicates the inode's locks to be 877 * lock_flags -- this parameter indicates the inode's locks to be
879 * to be locked. See the comment for xfs_ilock() for a list 878 * to be locked. See the comment for xfs_ilock() for a list
880 * of valid values. 879 * of valid values.
881 * 880 *
882 */ 881 */
883 int 882 int
884 xfs_ilock_nowait(xfs_inode_t *ip, 883 xfs_ilock_nowait(xfs_inode_t *ip,
885 uint lock_flags) 884 uint lock_flags)
886 { 885 {
887 int iolocked; 886 int iolocked;
888 int ilocked; 887 int ilocked;
889 888
890 /* 889 /*
891 * You can't set both SHARED and EXCL for the same lock, 890 * You can't set both SHARED and EXCL for the same lock,
892 * and only XFS_IOLOCK_SHARED, XFS_IOLOCK_EXCL, XFS_ILOCK_SHARED, 891 * and only XFS_IOLOCK_SHARED, XFS_IOLOCK_EXCL, XFS_ILOCK_SHARED,
893 * and XFS_ILOCK_EXCL are valid values to set in lock_flags. 892 * and XFS_ILOCK_EXCL are valid values to set in lock_flags.
894 */ 893 */
895 ASSERT((lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) != 894 ASSERT((lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) !=
896 (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)); 895 (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL));
897 ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) != 896 ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) !=
898 (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)); 897 (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL));
899 ASSERT((lock_flags & ~XFS_LOCK_MASK) == 0); 898 ASSERT((lock_flags & ~XFS_LOCK_MASK) == 0);
900 899
901 iolocked = 0; 900 iolocked = 0;
902 if (lock_flags & XFS_IOLOCK_EXCL) { 901 if (lock_flags & XFS_IOLOCK_EXCL) {
903 iolocked = mrtryupdate(&ip->i_iolock); 902 iolocked = mrtryupdate(&ip->i_iolock);
904 if (!iolocked) { 903 if (!iolocked) {
905 return 0; 904 return 0;
906 } 905 }
907 } else if (lock_flags & XFS_IOLOCK_SHARED) { 906 } else if (lock_flags & XFS_IOLOCK_SHARED) {
908 iolocked = mrtryaccess(&ip->i_iolock); 907 iolocked = mrtryaccess(&ip->i_iolock);
909 if (!iolocked) { 908 if (!iolocked) {
910 return 0; 909 return 0;
911 } 910 }
912 } 911 }
913 if (lock_flags & XFS_ILOCK_EXCL) { 912 if (lock_flags & XFS_ILOCK_EXCL) {
914 ilocked = mrtryupdate(&ip->i_lock); 913 ilocked = mrtryupdate(&ip->i_lock);
915 if (!ilocked) { 914 if (!ilocked) {
916 if (iolocked) { 915 if (iolocked) {
917 mrunlock(&ip->i_iolock); 916 mrunlock(&ip->i_iolock);
918 } 917 }
919 return 0; 918 return 0;
920 } 919 }
921 } else if (lock_flags & XFS_ILOCK_SHARED) { 920 } else if (lock_flags & XFS_ILOCK_SHARED) {
922 ilocked = mrtryaccess(&ip->i_lock); 921 ilocked = mrtryaccess(&ip->i_lock);
923 if (!ilocked) { 922 if (!ilocked) {
924 if (iolocked) { 923 if (iolocked) {
925 mrunlock(&ip->i_iolock); 924 mrunlock(&ip->i_iolock);
926 } 925 }
927 return 0; 926 return 0;
928 } 927 }
929 } 928 }
930 xfs_ilock_trace(ip, 2, lock_flags, (inst_t *)__return_address); 929 xfs_ilock_trace(ip, 2, lock_flags, (inst_t *)__return_address);
931 return 1; 930 return 1;
932 } 931 }
933 932
934 /* 933 /*
935 * xfs_iunlock() is used to drop the inode locks acquired with 934 * xfs_iunlock() is used to drop the inode locks acquired with
936 * xfs_ilock() and xfs_ilock_nowait(). The caller must pass 935 * xfs_ilock() and xfs_ilock_nowait(). The caller must pass
937 * in the flags given to xfs_ilock() or xfs_ilock_nowait() so 936 * in the flags given to xfs_ilock() or xfs_ilock_nowait() so
938 * that we know which locks to drop. 937 * that we know which locks to drop.
939 * 938 *
940 * ip -- the inode being unlocked 939 * ip -- the inode being unlocked
941 * lock_flags -- this parameter indicates the inode's locks to be 940 * lock_flags -- this parameter indicates the inode's locks to be
942 * to be unlocked. See the comment for xfs_ilock() for a list 941 * to be unlocked. See the comment for xfs_ilock() for a list
943 * of valid values for this parameter. 942 * of valid values for this parameter.
944 * 943 *
945 */ 944 */
946 void 945 void
947 xfs_iunlock(xfs_inode_t *ip, 946 xfs_iunlock(xfs_inode_t *ip,
948 uint lock_flags) 947 uint lock_flags)
949 { 948 {
950 /* 949 /*
951 * You can't set both SHARED and EXCL for the same lock, 950 * You can't set both SHARED and EXCL for the same lock,
952 * and only XFS_IOLOCK_SHARED, XFS_IOLOCK_EXCL, XFS_ILOCK_SHARED, 951 * and only XFS_IOLOCK_SHARED, XFS_IOLOCK_EXCL, XFS_ILOCK_SHARED,
953 * and XFS_ILOCK_EXCL are valid values to set in lock_flags. 952 * and XFS_ILOCK_EXCL are valid values to set in lock_flags.
954 */ 953 */
955 ASSERT((lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) != 954 ASSERT((lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) !=
956 (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)); 955 (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL));
957 ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) != 956 ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) !=
958 (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)); 957 (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL));
959 ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_IUNLOCK_NONOTIFY)) == 0); 958 ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_IUNLOCK_NONOTIFY)) == 0);
960 ASSERT(lock_flags != 0); 959 ASSERT(lock_flags != 0);
961 960
962 if (lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) { 961 if (lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) {
963 ASSERT(!(lock_flags & XFS_IOLOCK_SHARED) || 962 ASSERT(!(lock_flags & XFS_IOLOCK_SHARED) ||
964 (ismrlocked(&ip->i_iolock, MR_ACCESS))); 963 (ismrlocked(&ip->i_iolock, MR_ACCESS)));
965 ASSERT(!(lock_flags & XFS_IOLOCK_EXCL) || 964 ASSERT(!(lock_flags & XFS_IOLOCK_EXCL) ||
966 (ismrlocked(&ip->i_iolock, MR_UPDATE))); 965 (ismrlocked(&ip->i_iolock, MR_UPDATE)));
967 mrunlock(&ip->i_iolock); 966 mrunlock(&ip->i_iolock);
968 } 967 }
969 968
970 if (lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) { 969 if (lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) {
971 ASSERT(!(lock_flags & XFS_ILOCK_SHARED) || 970 ASSERT(!(lock_flags & XFS_ILOCK_SHARED) ||
972 (ismrlocked(&ip->i_lock, MR_ACCESS))); 971 (ismrlocked(&ip->i_lock, MR_ACCESS)));
973 ASSERT(!(lock_flags & XFS_ILOCK_EXCL) || 972 ASSERT(!(lock_flags & XFS_ILOCK_EXCL) ||
974 (ismrlocked(&ip->i_lock, MR_UPDATE))); 973 (ismrlocked(&ip->i_lock, MR_UPDATE)));
975 mrunlock(&ip->i_lock); 974 mrunlock(&ip->i_lock);
976 975
977 /* 976 /*
978 * Let the AIL know that this item has been unlocked in case 977 * Let the AIL know that this item has been unlocked in case
979 * it is in the AIL and anyone is waiting on it. Don't do 978 * it is in the AIL and anyone is waiting on it. Don't do
980 * this if the caller has asked us not to. 979 * this if the caller has asked us not to.
981 */ 980 */
982 if (!(lock_flags & XFS_IUNLOCK_NONOTIFY) && 981 if (!(lock_flags & XFS_IUNLOCK_NONOTIFY) &&
983 ip->i_itemp != NULL) { 982 ip->i_itemp != NULL) {
984 xfs_trans_unlocked_item(ip->i_mount, 983 xfs_trans_unlocked_item(ip->i_mount,
985 (xfs_log_item_t*)(ip->i_itemp)); 984 (xfs_log_item_t*)(ip->i_itemp));
986 } 985 }
987 } 986 }
988 xfs_ilock_trace(ip, 3, lock_flags, (inst_t *)__return_address); 987 xfs_ilock_trace(ip, 3, lock_flags, (inst_t *)__return_address);
989 } 988 }
990 989
991 /* 990 /*
992 * give up write locks. the i/o lock cannot be held nested 991 * give up write locks. the i/o lock cannot be held nested
993 * if it is being demoted. 992 * if it is being demoted.
994 */ 993 */
995 void 994 void
996 xfs_ilock_demote(xfs_inode_t *ip, 995 xfs_ilock_demote(xfs_inode_t *ip,
997 uint lock_flags) 996 uint lock_flags)
998 { 997 {
999 ASSERT(lock_flags & (XFS_IOLOCK_EXCL|XFS_ILOCK_EXCL)); 998 ASSERT(lock_flags & (XFS_IOLOCK_EXCL|XFS_ILOCK_EXCL));
1000 ASSERT((lock_flags & ~(XFS_IOLOCK_EXCL|XFS_ILOCK_EXCL)) == 0); 999 ASSERT((lock_flags & ~(XFS_IOLOCK_EXCL|XFS_ILOCK_EXCL)) == 0);
1001 1000
1002 if (lock_flags & XFS_ILOCK_EXCL) { 1001 if (lock_flags & XFS_ILOCK_EXCL) {
1003 ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE)); 1002 ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE));
1004 mrdemote(&ip->i_lock); 1003 mrdemote(&ip->i_lock);
1005 } 1004 }
1006 if (lock_flags & XFS_IOLOCK_EXCL) { 1005 if (lock_flags & XFS_IOLOCK_EXCL) {
1007 ASSERT(ismrlocked(&ip->i_iolock, MR_UPDATE)); 1006 ASSERT(ismrlocked(&ip->i_iolock, MR_UPDATE));
1008 mrdemote(&ip->i_iolock); 1007 mrdemote(&ip->i_iolock);
1009 } 1008 }
1010 } 1009 }
1011 1010
1012 /* 1011 /*
1013 * The following three routines simply manage the i_flock 1012 * The following three routines simply manage the i_flock
1014 * semaphore embedded in the inode. This semaphore synchronizes 1013 * semaphore embedded in the inode. This semaphore synchronizes
1015 * processes attempting to flush the in-core inode back to disk. 1014 * processes attempting to flush the in-core inode back to disk.
1016 */ 1015 */
1017 void 1016 void
1018 xfs_iflock(xfs_inode_t *ip) 1017 xfs_iflock(xfs_inode_t *ip)
1019 { 1018 {
1020 psema(&(ip->i_flock), PINOD|PLTWAIT); 1019 psema(&(ip->i_flock), PINOD|PLTWAIT);
1021 } 1020 }
1022 1021
1023 int 1022 int
1024 xfs_iflock_nowait(xfs_inode_t *ip) 1023 xfs_iflock_nowait(xfs_inode_t *ip)
1025 { 1024 {
1026 return (cpsema(&(ip->i_flock))); 1025 return (cpsema(&(ip->i_flock)));
1027 } 1026 }
1028 1027
1029 void 1028 void
1030 xfs_ifunlock(xfs_inode_t *ip) 1029 xfs_ifunlock(xfs_inode_t *ip)
1031 { 1030 {
1032 ASSERT(issemalocked(&(ip->i_flock))); 1031 ASSERT(issemalocked(&(ip->i_flock)));
1033 vsema(&(ip->i_flock)); 1032 vsema(&(ip->i_flock));
1034 } 1033 }
1 /* 1 /*
2 * Copyright (c) 2000-2006 Silicon Graphics, Inc. 2 * Copyright (c) 2000-2006 Silicon Graphics, Inc.
3 * All Rights Reserved. 3 * All Rights Reserved.
4 * 4 *
5 * This program is free software; you can redistribute it and/or 5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as 6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation. 7 * published by the Free Software Foundation.
8 * 8 *
9 * This program is distributed in the hope that it would be useful, 9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details. 12 * GNU General Public License for more details.
13 * 13 *
14 * You should have received a copy of the GNU General Public License 14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation, 15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */ 17 */
18 #include "xfs.h" 18 #include "xfs.h"
19 #include "xfs_fs.h" 19 #include "xfs_fs.h"
20 #include "xfs_types.h" 20 #include "xfs_types.h"
21 #include "xfs_bit.h" 21 #include "xfs_bit.h"
22 #include "xfs_log.h" 22 #include "xfs_log.h"
23 #include "xfs_inum.h" 23 #include "xfs_inum.h"
24 #include "xfs_imap.h" 24 #include "xfs_imap.h"
25 #include "xfs_trans.h" 25 #include "xfs_trans.h"
26 #include "xfs_trans_priv.h" 26 #include "xfs_trans_priv.h"
27 #include "xfs_sb.h" 27 #include "xfs_sb.h"
28 #include "xfs_ag.h" 28 #include "xfs_ag.h"
29 #include "xfs_dir2.h" 29 #include "xfs_dir2.h"
30 #include "xfs_dmapi.h" 30 #include "xfs_dmapi.h"
31 #include "xfs_mount.h" 31 #include "xfs_mount.h"
32 #include "xfs_bmap_btree.h" 32 #include "xfs_bmap_btree.h"
33 #include "xfs_alloc_btree.h" 33 #include "xfs_alloc_btree.h"
34 #include "xfs_ialloc_btree.h" 34 #include "xfs_ialloc_btree.h"
35 #include "xfs_dir2_sf.h" 35 #include "xfs_dir2_sf.h"
36 #include "xfs_attr_sf.h" 36 #include "xfs_attr_sf.h"
37 #include "xfs_dinode.h" 37 #include "xfs_dinode.h"
38 #include "xfs_inode.h" 38 #include "xfs_inode.h"
39 #include "xfs_buf_item.h" 39 #include "xfs_buf_item.h"
40 #include "xfs_inode_item.h" 40 #include "xfs_inode_item.h"
41 #include "xfs_btree.h" 41 #include "xfs_btree.h"
42 #include "xfs_alloc.h" 42 #include "xfs_alloc.h"
43 #include "xfs_ialloc.h" 43 #include "xfs_ialloc.h"
44 #include "xfs_bmap.h" 44 #include "xfs_bmap.h"
45 #include "xfs_rw.h" 45 #include "xfs_rw.h"
46 #include "xfs_error.h" 46 #include "xfs_error.h"
47 #include "xfs_utils.h" 47 #include "xfs_utils.h"
48 #include "xfs_dir2_trace.h" 48 #include "xfs_dir2_trace.h"
49 #include "xfs_quota.h" 49 #include "xfs_quota.h"
50 #include "xfs_mac.h" 50 #include "xfs_mac.h"
51 #include "xfs_acl.h" 51 #include "xfs_acl.h"
52 52
53 53
54 kmem_zone_t *xfs_ifork_zone; 54 kmem_zone_t *xfs_ifork_zone;
55 kmem_zone_t *xfs_inode_zone; 55 kmem_zone_t *xfs_inode_zone;
56 kmem_zone_t *xfs_chashlist_zone; 56 kmem_zone_t *xfs_chashlist_zone;
57 57
58 /* 58 /*
59 * Used in xfs_itruncate(). This is the maximum number of extents 59 * Used in xfs_itruncate(). This is the maximum number of extents
60 * freed from a file in a single transaction. 60 * freed from a file in a single transaction.
61 */ 61 */
62 #define XFS_ITRUNC_MAX_EXTENTS 2 62 #define XFS_ITRUNC_MAX_EXTENTS 2
63 63
64 STATIC int xfs_iflush_int(xfs_inode_t *, xfs_buf_t *); 64 STATIC int xfs_iflush_int(xfs_inode_t *, xfs_buf_t *);
65 STATIC int xfs_iformat_local(xfs_inode_t *, xfs_dinode_t *, int, int); 65 STATIC int xfs_iformat_local(xfs_inode_t *, xfs_dinode_t *, int, int);
66 STATIC int xfs_iformat_extents(xfs_inode_t *, xfs_dinode_t *, int); 66 STATIC int xfs_iformat_extents(xfs_inode_t *, xfs_dinode_t *, int);
67 STATIC int xfs_iformat_btree(xfs_inode_t *, xfs_dinode_t *, int); 67 STATIC int xfs_iformat_btree(xfs_inode_t *, xfs_dinode_t *, int);
68 68
69 69
70 #ifdef DEBUG 70 #ifdef DEBUG
71 /* 71 /*
72 * Make sure that the extents in the given memory buffer 72 * Make sure that the extents in the given memory buffer
73 * are valid. 73 * are valid.
74 */ 74 */
75 STATIC void 75 STATIC void
76 xfs_validate_extents( 76 xfs_validate_extents(
77 xfs_ifork_t *ifp, 77 xfs_ifork_t *ifp,
78 int nrecs, 78 int nrecs,
79 int disk, 79 int disk,
80 xfs_exntfmt_t fmt) 80 xfs_exntfmt_t fmt)
81 { 81 {
82 xfs_bmbt_rec_t *ep; 82 xfs_bmbt_rec_t *ep;
83 xfs_bmbt_irec_t irec; 83 xfs_bmbt_irec_t irec;
84 xfs_bmbt_rec_t rec; 84 xfs_bmbt_rec_t rec;
85 int i; 85 int i;
86 86
87 for (i = 0; i < nrecs; i++) { 87 for (i = 0; i < nrecs; i++) {
88 ep = xfs_iext_get_ext(ifp, i); 88 ep = xfs_iext_get_ext(ifp, i);
89 rec.l0 = get_unaligned((__uint64_t*)&ep->l0); 89 rec.l0 = get_unaligned((__uint64_t*)&ep->l0);
90 rec.l1 = get_unaligned((__uint64_t*)&ep->l1); 90 rec.l1 = get_unaligned((__uint64_t*)&ep->l1);
91 if (disk) 91 if (disk)
92 xfs_bmbt_disk_get_all(&rec, &irec); 92 xfs_bmbt_disk_get_all(&rec, &irec);
93 else 93 else
94 xfs_bmbt_get_all(&rec, &irec); 94 xfs_bmbt_get_all(&rec, &irec);
95 if (fmt == XFS_EXTFMT_NOSTATE) 95 if (fmt == XFS_EXTFMT_NOSTATE)
96 ASSERT(irec.br_state == XFS_EXT_NORM); 96 ASSERT(irec.br_state == XFS_EXT_NORM);
97 } 97 }
98 } 98 }
99 #else /* DEBUG */ 99 #else /* DEBUG */
100 #define xfs_validate_extents(ifp, nrecs, disk, fmt) 100 #define xfs_validate_extents(ifp, nrecs, disk, fmt)
101 #endif /* DEBUG */ 101 #endif /* DEBUG */
102 102
103 /* 103 /*
104 * Check that none of the inode's in the buffer have a next 104 * Check that none of the inode's in the buffer have a next
105 * unlinked field of 0. 105 * unlinked field of 0.
106 */ 106 */
107 #if defined(DEBUG) 107 #if defined(DEBUG)
108 void 108 void
109 xfs_inobp_check( 109 xfs_inobp_check(
110 xfs_mount_t *mp, 110 xfs_mount_t *mp,
111 xfs_buf_t *bp) 111 xfs_buf_t *bp)
112 { 112 {
113 int i; 113 int i;
114 int j; 114 int j;
115 xfs_dinode_t *dip; 115 xfs_dinode_t *dip;
116 116
117 j = mp->m_inode_cluster_size >> mp->m_sb.sb_inodelog; 117 j = mp->m_inode_cluster_size >> mp->m_sb.sb_inodelog;
118 118
119 for (i = 0; i < j; i++) { 119 for (i = 0; i < j; i++) {
120 dip = (xfs_dinode_t *)xfs_buf_offset(bp, 120 dip = (xfs_dinode_t *)xfs_buf_offset(bp,
121 i * mp->m_sb.sb_inodesize); 121 i * mp->m_sb.sb_inodesize);
122 if (!dip->di_next_unlinked) { 122 if (!dip->di_next_unlinked) {
123 xfs_fs_cmn_err(CE_ALERT, mp, 123 xfs_fs_cmn_err(CE_ALERT, mp,
124 "Detected a bogus zero next_unlinked field in incore inode buffer 0x%p. About to pop an ASSERT.", 124 "Detected a bogus zero next_unlinked field in incore inode buffer 0x%p. About to pop an ASSERT.",
125 bp); 125 bp);
126 ASSERT(dip->di_next_unlinked); 126 ASSERT(dip->di_next_unlinked);
127 } 127 }
128 } 128 }
129 } 129 }
130 #endif 130 #endif
131 131
132 /* 132 /*
133 * This routine is called to map an inode number within a file 133 * This routine is called to map an inode number within a file
134 * system to the buffer containing the on-disk version of the 134 * system to the buffer containing the on-disk version of the
135 * inode. It returns a pointer to the buffer containing the 135 * inode. It returns a pointer to the buffer containing the
136 * on-disk inode in the bpp parameter, and in the dip parameter 136 * on-disk inode in the bpp parameter, and in the dip parameter
137 * it returns a pointer to the on-disk inode within that buffer. 137 * it returns a pointer to the on-disk inode within that buffer.
138 * 138 *
139 * If a non-zero error is returned, then the contents of bpp and 139 * If a non-zero error is returned, then the contents of bpp and
140 * dipp are undefined. 140 * dipp are undefined.
141 * 141 *
142 * Use xfs_imap() to determine the size and location of the 142 * Use xfs_imap() to determine the size and location of the
143 * buffer to read from disk. 143 * buffer to read from disk.
144 */ 144 */
145 STATIC int 145 STATIC int
146 xfs_inotobp( 146 xfs_inotobp(
147 xfs_mount_t *mp, 147 xfs_mount_t *mp,
148 xfs_trans_t *tp, 148 xfs_trans_t *tp,
149 xfs_ino_t ino, 149 xfs_ino_t ino,
150 xfs_dinode_t **dipp, 150 xfs_dinode_t **dipp,
151 xfs_buf_t **bpp, 151 xfs_buf_t **bpp,
152 int *offset) 152 int *offset)
153 { 153 {
154 int di_ok; 154 int di_ok;
155 xfs_imap_t imap; 155 xfs_imap_t imap;
156 xfs_buf_t *bp; 156 xfs_buf_t *bp;
157 int error; 157 int error;
158 xfs_dinode_t *dip; 158 xfs_dinode_t *dip;
159 159
160 /* 160 /*
161 * Call the space management code to find the location of the 161 * Call the space management code to find the location of the
162 * inode on disk. 162 * inode on disk.
163 */ 163 */
164 imap.im_blkno = 0; 164 imap.im_blkno = 0;
165 error = xfs_imap(mp, tp, ino, &imap, XFS_IMAP_LOOKUP); 165 error = xfs_imap(mp, tp, ino, &imap, XFS_IMAP_LOOKUP);
166 if (error != 0) { 166 if (error != 0) {
167 cmn_err(CE_WARN, 167 cmn_err(CE_WARN,
168 "xfs_inotobp: xfs_imap() returned an " 168 "xfs_inotobp: xfs_imap() returned an "
169 "error %d on %s. Returning error.", error, mp->m_fsname); 169 "error %d on %s. Returning error.", error, mp->m_fsname);
170 return error; 170 return error;
171 } 171 }
172 172
173 /* 173 /*
174 * If the inode number maps to a block outside the bounds of the 174 * If the inode number maps to a block outside the bounds of the
175 * file system then return NULL rather than calling read_buf 175 * file system then return NULL rather than calling read_buf
176 * and panicing when we get an error from the driver. 176 * and panicing when we get an error from the driver.
177 */ 177 */
178 if ((imap.im_blkno + imap.im_len) > 178 if ((imap.im_blkno + imap.im_len) >
179 XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks)) { 179 XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks)) {
180 cmn_err(CE_WARN, 180 cmn_err(CE_WARN,
181 "xfs_inotobp: inode number (%llu + %d) maps to a block outside the bounds " 181 "xfs_inotobp: inode number (%llu + %d) maps to a block outside the bounds "
182 "of the file system %s. Returning EINVAL.", 182 "of the file system %s. Returning EINVAL.",
183 (unsigned long long)imap.im_blkno, 183 (unsigned long long)imap.im_blkno,
184 imap.im_len, mp->m_fsname); 184 imap.im_len, mp->m_fsname);
185 return XFS_ERROR(EINVAL); 185 return XFS_ERROR(EINVAL);
186 } 186 }
187 187
188 /* 188 /*
189 * Read in the buffer. If tp is NULL, xfs_trans_read_buf() will 189 * Read in the buffer. If tp is NULL, xfs_trans_read_buf() will
190 * default to just a read_buf() call. 190 * default to just a read_buf() call.
191 */ 191 */
192 error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, imap.im_blkno, 192 error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, imap.im_blkno,
193 (int)imap.im_len, XFS_BUF_LOCK, &bp); 193 (int)imap.im_len, XFS_BUF_LOCK, &bp);
194 194
195 if (error) { 195 if (error) {
196 cmn_err(CE_WARN, 196 cmn_err(CE_WARN,
197 "xfs_inotobp: xfs_trans_read_buf() returned an " 197 "xfs_inotobp: xfs_trans_read_buf() returned an "
198 "error %d on %s. Returning error.", error, mp->m_fsname); 198 "error %d on %s. Returning error.", error, mp->m_fsname);
199 return error; 199 return error;
200 } 200 }
201 dip = (xfs_dinode_t *)xfs_buf_offset(bp, 0); 201 dip = (xfs_dinode_t *)xfs_buf_offset(bp, 0);
202 di_ok = 202 di_ok =
203 INT_GET(dip->di_core.di_magic, ARCH_CONVERT) == XFS_DINODE_MAGIC && 203 INT_GET(dip->di_core.di_magic, ARCH_CONVERT) == XFS_DINODE_MAGIC &&
204 XFS_DINODE_GOOD_VERSION(INT_GET(dip->di_core.di_version, ARCH_CONVERT)); 204 XFS_DINODE_GOOD_VERSION(INT_GET(dip->di_core.di_version, ARCH_CONVERT));
205 if (unlikely(XFS_TEST_ERROR(!di_ok, mp, XFS_ERRTAG_ITOBP_INOTOBP, 205 if (unlikely(XFS_TEST_ERROR(!di_ok, mp, XFS_ERRTAG_ITOBP_INOTOBP,
206 XFS_RANDOM_ITOBP_INOTOBP))) { 206 XFS_RANDOM_ITOBP_INOTOBP))) {
207 XFS_CORRUPTION_ERROR("xfs_inotobp", XFS_ERRLEVEL_LOW, mp, dip); 207 XFS_CORRUPTION_ERROR("xfs_inotobp", XFS_ERRLEVEL_LOW, mp, dip);
208 xfs_trans_brelse(tp, bp); 208 xfs_trans_brelse(tp, bp);
209 cmn_err(CE_WARN, 209 cmn_err(CE_WARN,
210 "xfs_inotobp: XFS_TEST_ERROR() returned an " 210 "xfs_inotobp: XFS_TEST_ERROR() returned an "
211 "error on %s. Returning EFSCORRUPTED.", mp->m_fsname); 211 "error on %s. Returning EFSCORRUPTED.", mp->m_fsname);
212 return XFS_ERROR(EFSCORRUPTED); 212 return XFS_ERROR(EFSCORRUPTED);
213 } 213 }
214 214
215 xfs_inobp_check(mp, bp); 215 xfs_inobp_check(mp, bp);
216 216
217 /* 217 /*
218 * Set *dipp to point to the on-disk inode in the buffer. 218 * Set *dipp to point to the on-disk inode in the buffer.
219 */ 219 */
220 *dipp = (xfs_dinode_t *)xfs_buf_offset(bp, imap.im_boffset); 220 *dipp = (xfs_dinode_t *)xfs_buf_offset(bp, imap.im_boffset);
221 *bpp = bp; 221 *bpp = bp;
222 *offset = imap.im_boffset; 222 *offset = imap.im_boffset;
223 return 0; 223 return 0;
224 } 224 }
225 225
226 226
227 /* 227 /*
228 * This routine is called to map an inode to the buffer containing 228 * This routine is called to map an inode to the buffer containing
229 * the on-disk version of the inode. It returns a pointer to the 229 * the on-disk version of the inode. It returns a pointer to the
230 * buffer containing the on-disk inode in the bpp parameter, and in 230 * buffer containing the on-disk inode in the bpp parameter, and in
231 * the dip parameter it returns a pointer to the on-disk inode within 231 * the dip parameter it returns a pointer to the on-disk inode within
232 * that buffer. 232 * that buffer.
233 * 233 *
234 * If a non-zero error is returned, then the contents of bpp and 234 * If a non-zero error is returned, then the contents of bpp and
235 * dipp are undefined. 235 * dipp are undefined.
236 * 236 *
237 * If the inode is new and has not yet been initialized, use xfs_imap() 237 * If the inode is new and has not yet been initialized, use xfs_imap()
238 * to determine the size and location of the buffer to read from disk. 238 * to determine the size and location of the buffer to read from disk.
239 * If the inode has already been mapped to its buffer and read in once, 239 * If the inode has already been mapped to its buffer and read in once,
240 * then use the mapping information stored in the inode rather than 240 * then use the mapping information stored in the inode rather than
241 * calling xfs_imap(). This allows us to avoid the overhead of looking 241 * calling xfs_imap(). This allows us to avoid the overhead of looking
242 * at the inode btree for small block file systems (see xfs_dilocate()). 242 * at the inode btree for small block file systems (see xfs_dilocate()).
243 * We can tell whether the inode has been mapped in before by comparing 243 * We can tell whether the inode has been mapped in before by comparing
244 * its disk block address to 0. Only uninitialized inodes will have 244 * its disk block address to 0. Only uninitialized inodes will have
245 * 0 for the disk block address. 245 * 0 for the disk block address.
246 */ 246 */
247 int 247 int
248 xfs_itobp( 248 xfs_itobp(
249 xfs_mount_t *mp, 249 xfs_mount_t *mp,
250 xfs_trans_t *tp, 250 xfs_trans_t *tp,
251 xfs_inode_t *ip, 251 xfs_inode_t *ip,
252 xfs_dinode_t **dipp, 252 xfs_dinode_t **dipp,
253 xfs_buf_t **bpp, 253 xfs_buf_t **bpp,
254 xfs_daddr_t bno, 254 xfs_daddr_t bno,
255 uint imap_flags) 255 uint imap_flags)
256 { 256 {
257 xfs_imap_t imap; 257 xfs_imap_t imap;
258 xfs_buf_t *bp; 258 xfs_buf_t *bp;
259 int error; 259 int error;
260 int i; 260 int i;
261 int ni; 261 int ni;
262 262
263 if (ip->i_blkno == (xfs_daddr_t)0) { 263 if (ip->i_blkno == (xfs_daddr_t)0) {
264 /* 264 /*
265 * Call the space management code to find the location of the 265 * Call the space management code to find the location of the
266 * inode on disk. 266 * inode on disk.
267 */ 267 */
268 imap.im_blkno = bno; 268 imap.im_blkno = bno;
269 if ((error = xfs_imap(mp, tp, ip->i_ino, &imap, 269 if ((error = xfs_imap(mp, tp, ip->i_ino, &imap,
270 XFS_IMAP_LOOKUP | imap_flags))) 270 XFS_IMAP_LOOKUP | imap_flags)))
271 return error; 271 return error;
272 272
273 /* 273 /*
274 * If the inode number maps to a block outside the bounds 274 * If the inode number maps to a block outside the bounds
275 * of the file system then return NULL rather than calling 275 * of the file system then return NULL rather than calling
276 * read_buf and panicing when we get an error from the 276 * read_buf and panicing when we get an error from the
277 * driver. 277 * driver.
278 */ 278 */
279 if ((imap.im_blkno + imap.im_len) > 279 if ((imap.im_blkno + imap.im_len) >
280 XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks)) { 280 XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks)) {
281 #ifdef DEBUG 281 #ifdef DEBUG
282 xfs_fs_cmn_err(CE_ALERT, mp, "xfs_itobp: " 282 xfs_fs_cmn_err(CE_ALERT, mp, "xfs_itobp: "
283 "(imap.im_blkno (0x%llx) " 283 "(imap.im_blkno (0x%llx) "
284 "+ imap.im_len (0x%llx)) > " 284 "+ imap.im_len (0x%llx)) > "
285 " XFS_FSB_TO_BB(mp, " 285 " XFS_FSB_TO_BB(mp, "
286 "mp->m_sb.sb_dblocks) (0x%llx)", 286 "mp->m_sb.sb_dblocks) (0x%llx)",
287 (unsigned long long) imap.im_blkno, 287 (unsigned long long) imap.im_blkno,
288 (unsigned long long) imap.im_len, 288 (unsigned long long) imap.im_len,
289 XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks)); 289 XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks));
290 #endif /* DEBUG */ 290 #endif /* DEBUG */
291 return XFS_ERROR(EINVAL); 291 return XFS_ERROR(EINVAL);
292 } 292 }
293 293
294 /* 294 /*
295 * Fill in the fields in the inode that will be used to 295 * Fill in the fields in the inode that will be used to
296 * map the inode to its buffer from now on. 296 * map the inode to its buffer from now on.
297 */ 297 */
298 ip->i_blkno = imap.im_blkno; 298 ip->i_blkno = imap.im_blkno;
299 ip->i_len = imap.im_len; 299 ip->i_len = imap.im_len;
300 ip->i_boffset = imap.im_boffset; 300 ip->i_boffset = imap.im_boffset;
301 } else { 301 } else {
302 /* 302 /*
303 * We've already mapped the inode once, so just use the 303 * We've already mapped the inode once, so just use the
304 * mapping that we saved the first time. 304 * mapping that we saved the first time.
305 */ 305 */
306 imap.im_blkno = ip->i_blkno; 306 imap.im_blkno = ip->i_blkno;
307 imap.im_len = ip->i_len; 307 imap.im_len = ip->i_len;
308 imap.im_boffset = ip->i_boffset; 308 imap.im_boffset = ip->i_boffset;
309 } 309 }
310 ASSERT(bno == 0 || bno == imap.im_blkno); 310 ASSERT(bno == 0 || bno == imap.im_blkno);
311 311
312 /* 312 /*
313 * Read in the buffer. If tp is NULL, xfs_trans_read_buf() will 313 * Read in the buffer. If tp is NULL, xfs_trans_read_buf() will
314 * default to just a read_buf() call. 314 * default to just a read_buf() call.
315 */ 315 */
316 error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, imap.im_blkno, 316 error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, imap.im_blkno,
317 (int)imap.im_len, XFS_BUF_LOCK, &bp); 317 (int)imap.im_len, XFS_BUF_LOCK, &bp);
318 if (error) { 318 if (error) {
319 #ifdef DEBUG 319 #ifdef DEBUG
320 xfs_fs_cmn_err(CE_ALERT, mp, "xfs_itobp: " 320 xfs_fs_cmn_err(CE_ALERT, mp, "xfs_itobp: "
321 "xfs_trans_read_buf() returned error %d, " 321 "xfs_trans_read_buf() returned error %d, "
322 "imap.im_blkno 0x%llx, imap.im_len 0x%llx", 322 "imap.im_blkno 0x%llx, imap.im_len 0x%llx",
323 error, (unsigned long long) imap.im_blkno, 323 error, (unsigned long long) imap.im_blkno,
324 (unsigned long long) imap.im_len); 324 (unsigned long long) imap.im_len);
325 #endif /* DEBUG */ 325 #endif /* DEBUG */
326 return error; 326 return error;
327 } 327 }
328 328
329 /* 329 /*
330 * Validate the magic number and version of every inode in the buffer 330 * Validate the magic number and version of every inode in the buffer
331 * (if DEBUG kernel) or the first inode in the buffer, otherwise. 331 * (if DEBUG kernel) or the first inode in the buffer, otherwise.
332 * No validation is done here in userspace (xfs_repair). 332 * No validation is done here in userspace (xfs_repair).
333 */ 333 */
334 #if !defined(__KERNEL__) 334 #if !defined(__KERNEL__)
335 ni = 0; 335 ni = 0;
336 #elif defined(DEBUG) 336 #elif defined(DEBUG)
337 ni = BBTOB(imap.im_len) >> mp->m_sb.sb_inodelog; 337 ni = BBTOB(imap.im_len) >> mp->m_sb.sb_inodelog;
338 #else /* usual case */ 338 #else /* usual case */
339 ni = 1; 339 ni = 1;
340 #endif 340 #endif
341 341
342 for (i = 0; i < ni; i++) { 342 for (i = 0; i < ni; i++) {
343 int di_ok; 343 int di_ok;
344 xfs_dinode_t *dip; 344 xfs_dinode_t *dip;
345 345
346 dip = (xfs_dinode_t *)xfs_buf_offset(bp, 346 dip = (xfs_dinode_t *)xfs_buf_offset(bp,
347 (i << mp->m_sb.sb_inodelog)); 347 (i << mp->m_sb.sb_inodelog));
348 di_ok = INT_GET(dip->di_core.di_magic, ARCH_CONVERT) == XFS_DINODE_MAGIC && 348 di_ok = INT_GET(dip->di_core.di_magic, ARCH_CONVERT) == XFS_DINODE_MAGIC &&
349 XFS_DINODE_GOOD_VERSION(INT_GET(dip->di_core.di_version, ARCH_CONVERT)); 349 XFS_DINODE_GOOD_VERSION(INT_GET(dip->di_core.di_version, ARCH_CONVERT));
350 if (unlikely(XFS_TEST_ERROR(!di_ok, mp, 350 if (unlikely(XFS_TEST_ERROR(!di_ok, mp,
351 XFS_ERRTAG_ITOBP_INOTOBP, 351 XFS_ERRTAG_ITOBP_INOTOBP,
352 XFS_RANDOM_ITOBP_INOTOBP))) { 352 XFS_RANDOM_ITOBP_INOTOBP))) {
353 if (imap_flags & XFS_IMAP_BULKSTAT) { 353 if (imap_flags & XFS_IMAP_BULKSTAT) {
354 xfs_trans_brelse(tp, bp); 354 xfs_trans_brelse(tp, bp);
355 return XFS_ERROR(EINVAL); 355 return XFS_ERROR(EINVAL);
356 } 356 }
357 #ifdef DEBUG 357 #ifdef DEBUG
358 cmn_err(CE_ALERT, 358 cmn_err(CE_ALERT,
359 "Device %s - bad inode magic/vsn " 359 "Device %s - bad inode magic/vsn "
360 "daddr %lld #%d (magic=%x)", 360 "daddr %lld #%d (magic=%x)",
361 XFS_BUFTARG_NAME(mp->m_ddev_targp), 361 XFS_BUFTARG_NAME(mp->m_ddev_targp),
362 (unsigned long long)imap.im_blkno, i, 362 (unsigned long long)imap.im_blkno, i,
363 INT_GET(dip->di_core.di_magic, ARCH_CONVERT)); 363 INT_GET(dip->di_core.di_magic, ARCH_CONVERT));
364 #endif 364 #endif
365 XFS_CORRUPTION_ERROR("xfs_itobp", XFS_ERRLEVEL_HIGH, 365 XFS_CORRUPTION_ERROR("xfs_itobp", XFS_ERRLEVEL_HIGH,
366 mp, dip); 366 mp, dip);
367 xfs_trans_brelse(tp, bp); 367 xfs_trans_brelse(tp, bp);
368 return XFS_ERROR(EFSCORRUPTED); 368 return XFS_ERROR(EFSCORRUPTED);
369 } 369 }
370 } 370 }
371 371
372 xfs_inobp_check(mp, bp); 372 xfs_inobp_check(mp, bp);
373 373
374 /* 374 /*
375 * Mark the buffer as an inode buffer now that it looks good 375 * Mark the buffer as an inode buffer now that it looks good
376 */ 376 */
377 XFS_BUF_SET_VTYPE(bp, B_FS_INO); 377 XFS_BUF_SET_VTYPE(bp, B_FS_INO);
378 378
379 /* 379 /*
380 * Set *dipp to point to the on-disk inode in the buffer. 380 * Set *dipp to point to the on-disk inode in the buffer.
381 */ 381 */
382 *dipp = (xfs_dinode_t *)xfs_buf_offset(bp, imap.im_boffset); 382 *dipp = (xfs_dinode_t *)xfs_buf_offset(bp, imap.im_boffset);
383 *bpp = bp; 383 *bpp = bp;
384 return 0; 384 return 0;
385 } 385 }
386 386
387 /* 387 /*
388 * Move inode type and inode format specific information from the 388 * Move inode type and inode format specific information from the
389 * on-disk inode to the in-core inode. For fifos, devs, and sockets 389 * on-disk inode to the in-core inode. For fifos, devs, and sockets
390 * this means set if_rdev to the proper value. For files, directories, 390 * this means set if_rdev to the proper value. For files, directories,
391 * and symlinks this means to bring in the in-line data or extent 391 * and symlinks this means to bring in the in-line data or extent
392 * pointers. For a file in B-tree format, only the root is immediately 392 * pointers. For a file in B-tree format, only the root is immediately
393 * brought in-core. The rest will be in-lined in if_extents when it 393 * brought in-core. The rest will be in-lined in if_extents when it
394 * is first referenced (see xfs_iread_extents()). 394 * is first referenced (see xfs_iread_extents()).
395 */ 395 */
396 STATIC int 396 STATIC int
397 xfs_iformat( 397 xfs_iformat(
398 xfs_inode_t *ip, 398 xfs_inode_t *ip,
399 xfs_dinode_t *dip) 399 xfs_dinode_t *dip)
400 { 400 {
401 xfs_attr_shortform_t *atp; 401 xfs_attr_shortform_t *atp;
402 int size; 402 int size;
403 int error; 403 int error;
404 xfs_fsize_t di_size; 404 xfs_fsize_t di_size;
405 ip->i_df.if_ext_max = 405 ip->i_df.if_ext_max =
406 XFS_IFORK_DSIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t); 406 XFS_IFORK_DSIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t);
407 error = 0; 407 error = 0;
408 408
409 if (unlikely( 409 if (unlikely(
410 INT_GET(dip->di_core.di_nextents, ARCH_CONVERT) + 410 INT_GET(dip->di_core.di_nextents, ARCH_CONVERT) +
411 INT_GET(dip->di_core.di_anextents, ARCH_CONVERT) > 411 INT_GET(dip->di_core.di_anextents, ARCH_CONVERT) >
412 INT_GET(dip->di_core.di_nblocks, ARCH_CONVERT))) { 412 INT_GET(dip->di_core.di_nblocks, ARCH_CONVERT))) {
413 xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount, 413 xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount,
414 "corrupt dinode %Lu, extent total = %d, nblocks = %Lu.", 414 "corrupt dinode %Lu, extent total = %d, nblocks = %Lu.",
415 (unsigned long long)ip->i_ino, 415 (unsigned long long)ip->i_ino,
416 (int)(INT_GET(dip->di_core.di_nextents, ARCH_CONVERT) 416 (int)(INT_GET(dip->di_core.di_nextents, ARCH_CONVERT)
417 + INT_GET(dip->di_core.di_anextents, ARCH_CONVERT)), 417 + INT_GET(dip->di_core.di_anextents, ARCH_CONVERT)),
418 (unsigned long long) 418 (unsigned long long)
419 INT_GET(dip->di_core.di_nblocks, ARCH_CONVERT)); 419 INT_GET(dip->di_core.di_nblocks, ARCH_CONVERT));
420 XFS_CORRUPTION_ERROR("xfs_iformat(1)", XFS_ERRLEVEL_LOW, 420 XFS_CORRUPTION_ERROR("xfs_iformat(1)", XFS_ERRLEVEL_LOW,
421 ip->i_mount, dip); 421 ip->i_mount, dip);
422 return XFS_ERROR(EFSCORRUPTED); 422 return XFS_ERROR(EFSCORRUPTED);
423 } 423 }
424 424
425 if (unlikely(INT_GET(dip->di_core.di_forkoff, ARCH_CONVERT) > ip->i_mount->m_sb.sb_inodesize)) { 425 if (unlikely(INT_GET(dip->di_core.di_forkoff, ARCH_CONVERT) > ip->i_mount->m_sb.sb_inodesize)) {
426 xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount, 426 xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount,
427 "corrupt dinode %Lu, forkoff = 0x%x.", 427 "corrupt dinode %Lu, forkoff = 0x%x.",
428 (unsigned long long)ip->i_ino, 428 (unsigned long long)ip->i_ino,
429 (int)(INT_GET(dip->di_core.di_forkoff, ARCH_CONVERT))); 429 (int)(INT_GET(dip->di_core.di_forkoff, ARCH_CONVERT)));
430 XFS_CORRUPTION_ERROR("xfs_iformat(2)", XFS_ERRLEVEL_LOW, 430 XFS_CORRUPTION_ERROR("xfs_iformat(2)", XFS_ERRLEVEL_LOW,
431 ip->i_mount, dip); 431 ip->i_mount, dip);
432 return XFS_ERROR(EFSCORRUPTED); 432 return XFS_ERROR(EFSCORRUPTED);
433 } 433 }
434 434
435 switch (ip->i_d.di_mode & S_IFMT) { 435 switch (ip->i_d.di_mode & S_IFMT) {
436 case S_IFIFO: 436 case S_IFIFO:
437 case S_IFCHR: 437 case S_IFCHR:
438 case S_IFBLK: 438 case S_IFBLK:
439 case S_IFSOCK: 439 case S_IFSOCK:
440 if (unlikely(INT_GET(dip->di_core.di_format, ARCH_CONVERT) != XFS_DINODE_FMT_DEV)) { 440 if (unlikely(INT_GET(dip->di_core.di_format, ARCH_CONVERT) != XFS_DINODE_FMT_DEV)) {
441 XFS_CORRUPTION_ERROR("xfs_iformat(3)", XFS_ERRLEVEL_LOW, 441 XFS_CORRUPTION_ERROR("xfs_iformat(3)", XFS_ERRLEVEL_LOW,
442 ip->i_mount, dip); 442 ip->i_mount, dip);
443 return XFS_ERROR(EFSCORRUPTED); 443 return XFS_ERROR(EFSCORRUPTED);
444 } 444 }
445 ip->i_d.di_size = 0; 445 ip->i_d.di_size = 0;
446 ip->i_df.if_u2.if_rdev = INT_GET(dip->di_u.di_dev, ARCH_CONVERT); 446 ip->i_df.if_u2.if_rdev = INT_GET(dip->di_u.di_dev, ARCH_CONVERT);
447 break; 447 break;
448 448
449 case S_IFREG: 449 case S_IFREG:
450 case S_IFLNK: 450 case S_IFLNK:
451 case S_IFDIR: 451 case S_IFDIR:
452 switch (INT_GET(dip->di_core.di_format, ARCH_CONVERT)) { 452 switch (INT_GET(dip->di_core.di_format, ARCH_CONVERT)) {
453 case XFS_DINODE_FMT_LOCAL: 453 case XFS_DINODE_FMT_LOCAL:
454 /* 454 /*
455 * no local regular files yet 455 * no local regular files yet
456 */ 456 */
457 if (unlikely((INT_GET(dip->di_core.di_mode, ARCH_CONVERT) & S_IFMT) == S_IFREG)) { 457 if (unlikely((INT_GET(dip->di_core.di_mode, ARCH_CONVERT) & S_IFMT) == S_IFREG)) {
458 xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount, 458 xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount,
459 "corrupt inode %Lu " 459 "corrupt inode %Lu "
460 "(local format for regular file).", 460 "(local format for regular file).",
461 (unsigned long long) ip->i_ino); 461 (unsigned long long) ip->i_ino);
462 XFS_CORRUPTION_ERROR("xfs_iformat(4)", 462 XFS_CORRUPTION_ERROR("xfs_iformat(4)",
463 XFS_ERRLEVEL_LOW, 463 XFS_ERRLEVEL_LOW,
464 ip->i_mount, dip); 464 ip->i_mount, dip);
465 return XFS_ERROR(EFSCORRUPTED); 465 return XFS_ERROR(EFSCORRUPTED);
466 } 466 }
467 467
468 di_size = INT_GET(dip->di_core.di_size, ARCH_CONVERT); 468 di_size = INT_GET(dip->di_core.di_size, ARCH_CONVERT);
469 if (unlikely(di_size > XFS_DFORK_DSIZE(dip, ip->i_mount))) { 469 if (unlikely(di_size > XFS_DFORK_DSIZE(dip, ip->i_mount))) {
470 xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount, 470 xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount,
471 "corrupt inode %Lu " 471 "corrupt inode %Lu "
472 "(bad size %Ld for local inode).", 472 "(bad size %Ld for local inode).",
473 (unsigned long long) ip->i_ino, 473 (unsigned long long) ip->i_ino,
474 (long long) di_size); 474 (long long) di_size);
475 XFS_CORRUPTION_ERROR("xfs_iformat(5)", 475 XFS_CORRUPTION_ERROR("xfs_iformat(5)",
476 XFS_ERRLEVEL_LOW, 476 XFS_ERRLEVEL_LOW,
477 ip->i_mount, dip); 477 ip->i_mount, dip);
478 return XFS_ERROR(EFSCORRUPTED); 478 return XFS_ERROR(EFSCORRUPTED);
479 } 479 }
480 480
481 size = (int)di_size; 481 size = (int)di_size;
482 error = xfs_iformat_local(ip, dip, XFS_DATA_FORK, size); 482 error = xfs_iformat_local(ip, dip, XFS_DATA_FORK, size);
483 break; 483 break;
484 case XFS_DINODE_FMT_EXTENTS: 484 case XFS_DINODE_FMT_EXTENTS:
485 error = xfs_iformat_extents(ip, dip, XFS_DATA_FORK); 485 error = xfs_iformat_extents(ip, dip, XFS_DATA_FORK);
486 break; 486 break;
487 case XFS_DINODE_FMT_BTREE: 487 case XFS_DINODE_FMT_BTREE:
488 error = xfs_iformat_btree(ip, dip, XFS_DATA_FORK); 488 error = xfs_iformat_btree(ip, dip, XFS_DATA_FORK);
489 break; 489 break;
490 default: 490 default:
491 XFS_ERROR_REPORT("xfs_iformat(6)", XFS_ERRLEVEL_LOW, 491 XFS_ERROR_REPORT("xfs_iformat(6)", XFS_ERRLEVEL_LOW,
492 ip->i_mount); 492 ip->i_mount);
493 return XFS_ERROR(EFSCORRUPTED); 493 return XFS_ERROR(EFSCORRUPTED);
494 } 494 }
495 break; 495 break;
496 496
497 default: 497 default:
498 XFS_ERROR_REPORT("xfs_iformat(7)", XFS_ERRLEVEL_LOW, ip->i_mount); 498 XFS_ERROR_REPORT("xfs_iformat(7)", XFS_ERRLEVEL_LOW, ip->i_mount);
499 return XFS_ERROR(EFSCORRUPTED); 499 return XFS_ERROR(EFSCORRUPTED);
500 } 500 }
501 if (error) { 501 if (error) {
502 return error; 502 return error;
503 } 503 }
504 if (!XFS_DFORK_Q(dip)) 504 if (!XFS_DFORK_Q(dip))
505 return 0; 505 return 0;
506 ASSERT(ip->i_afp == NULL); 506 ASSERT(ip->i_afp == NULL);
507 ip->i_afp = kmem_zone_zalloc(xfs_ifork_zone, KM_SLEEP); 507 ip->i_afp = kmem_zone_zalloc(xfs_ifork_zone, KM_SLEEP);
508 ip->i_afp->if_ext_max = 508 ip->i_afp->if_ext_max =
509 XFS_IFORK_ASIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t); 509 XFS_IFORK_ASIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t);
510 switch (INT_GET(dip->di_core.di_aformat, ARCH_CONVERT)) { 510 switch (INT_GET(dip->di_core.di_aformat, ARCH_CONVERT)) {
511 case XFS_DINODE_FMT_LOCAL: 511 case XFS_DINODE_FMT_LOCAL:
512 atp = (xfs_attr_shortform_t *)XFS_DFORK_APTR(dip); 512 atp = (xfs_attr_shortform_t *)XFS_DFORK_APTR(dip);
513 size = be16_to_cpu(atp->hdr.totsize); 513 size = be16_to_cpu(atp->hdr.totsize);
514 error = xfs_iformat_local(ip, dip, XFS_ATTR_FORK, size); 514 error = xfs_iformat_local(ip, dip, XFS_ATTR_FORK, size);
515 break; 515 break;
516 case XFS_DINODE_FMT_EXTENTS: 516 case XFS_DINODE_FMT_EXTENTS:
517 error = xfs_iformat_extents(ip, dip, XFS_ATTR_FORK); 517 error = xfs_iformat_extents(ip, dip, XFS_ATTR_FORK);
518 break; 518 break;
519 case XFS_DINODE_FMT_BTREE: 519 case XFS_DINODE_FMT_BTREE:
520 error = xfs_iformat_btree(ip, dip, XFS_ATTR_FORK); 520 error = xfs_iformat_btree(ip, dip, XFS_ATTR_FORK);
521 break; 521 break;
522 default: 522 default:
523 error = XFS_ERROR(EFSCORRUPTED); 523 error = XFS_ERROR(EFSCORRUPTED);
524 break; 524 break;
525 } 525 }
526 if (error) { 526 if (error) {
527 kmem_zone_free(xfs_ifork_zone, ip->i_afp); 527 kmem_zone_free(xfs_ifork_zone, ip->i_afp);
528 ip->i_afp = NULL; 528 ip->i_afp = NULL;
529 xfs_idestroy_fork(ip, XFS_DATA_FORK); 529 xfs_idestroy_fork(ip, XFS_DATA_FORK);
530 } 530 }
531 return error; 531 return error;
532 } 532 }
533 533
534 /* 534 /*
535 * The file is in-lined in the on-disk inode. 535 * The file is in-lined in the on-disk inode.
536 * If it fits into if_inline_data, then copy 536 * If it fits into if_inline_data, then copy
537 * it there, otherwise allocate a buffer for it 537 * it there, otherwise allocate a buffer for it
538 * and copy the data there. Either way, set 538 * and copy the data there. Either way, set
539 * if_data to point at the data. 539 * if_data to point at the data.
540 * If we allocate a buffer for the data, make 540 * If we allocate a buffer for the data, make
541 * sure that its size is a multiple of 4 and 541 * sure that its size is a multiple of 4 and
542 * record the real size in i_real_bytes. 542 * record the real size in i_real_bytes.
543 */ 543 */
544 STATIC int 544 STATIC int
545 xfs_iformat_local( 545 xfs_iformat_local(
546 xfs_inode_t *ip, 546 xfs_inode_t *ip,
547 xfs_dinode_t *dip, 547 xfs_dinode_t *dip,
548 int whichfork, 548 int whichfork,
549 int size) 549 int size)
550 { 550 {
551 xfs_ifork_t *ifp; 551 xfs_ifork_t *ifp;
552 int real_size; 552 int real_size;
553 553
554 /* 554 /*
555 * If the size is unreasonable, then something 555 * If the size is unreasonable, then something
556 * is wrong and we just bail out rather than crash in 556 * is wrong and we just bail out rather than crash in
557 * kmem_alloc() or memcpy() below. 557 * kmem_alloc() or memcpy() below.
558 */ 558 */
559 if (unlikely(size > XFS_DFORK_SIZE(dip, ip->i_mount, whichfork))) { 559 if (unlikely(size > XFS_DFORK_SIZE(dip, ip->i_mount, whichfork))) {
560 xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount, 560 xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount,
561 "corrupt inode %Lu " 561 "corrupt inode %Lu "
562 "(bad size %d for local fork, size = %d).", 562 "(bad size %d for local fork, size = %d).",
563 (unsigned long long) ip->i_ino, size, 563 (unsigned long long) ip->i_ino, size,
564 XFS_DFORK_SIZE(dip, ip->i_mount, whichfork)); 564 XFS_DFORK_SIZE(dip, ip->i_mount, whichfork));
565 XFS_CORRUPTION_ERROR("xfs_iformat_local", XFS_ERRLEVEL_LOW, 565 XFS_CORRUPTION_ERROR("xfs_iformat_local", XFS_ERRLEVEL_LOW,
566 ip->i_mount, dip); 566 ip->i_mount, dip);
567 return XFS_ERROR(EFSCORRUPTED); 567 return XFS_ERROR(EFSCORRUPTED);
568 } 568 }
569 ifp = XFS_IFORK_PTR(ip, whichfork); 569 ifp = XFS_IFORK_PTR(ip, whichfork);
570 real_size = 0; 570 real_size = 0;
571 if (size == 0) 571 if (size == 0)
572 ifp->if_u1.if_data = NULL; 572 ifp->if_u1.if_data = NULL;
573 else if (size <= sizeof(ifp->if_u2.if_inline_data)) 573 else if (size <= sizeof(ifp->if_u2.if_inline_data))
574 ifp->if_u1.if_data = ifp->if_u2.if_inline_data; 574 ifp->if_u1.if_data = ifp->if_u2.if_inline_data;
575 else { 575 else {
576 real_size = roundup(size, 4); 576 real_size = roundup(size, 4);
577 ifp->if_u1.if_data = kmem_alloc(real_size, KM_SLEEP); 577 ifp->if_u1.if_data = kmem_alloc(real_size, KM_SLEEP);
578 } 578 }
579 ifp->if_bytes = size; 579 ifp->if_bytes = size;
580 ifp->if_real_bytes = real_size; 580 ifp->if_real_bytes = real_size;
581 if (size) 581 if (size)
582 memcpy(ifp->if_u1.if_data, XFS_DFORK_PTR(dip, whichfork), size); 582 memcpy(ifp->if_u1.if_data, XFS_DFORK_PTR(dip, whichfork), size);
583 ifp->if_flags &= ~XFS_IFEXTENTS; 583 ifp->if_flags &= ~XFS_IFEXTENTS;
584 ifp->if_flags |= XFS_IFINLINE; 584 ifp->if_flags |= XFS_IFINLINE;
585 return 0; 585 return 0;
586 } 586 }
587 587
588 /* 588 /*
589 * The file consists of a set of extents all 589 * The file consists of a set of extents all
590 * of which fit into the on-disk inode. 590 * of which fit into the on-disk inode.
591 * If there are few enough extents to fit into 591 * If there are few enough extents to fit into
592 * the if_inline_ext, then copy them there. 592 * the if_inline_ext, then copy them there.
593 * Otherwise allocate a buffer for them and copy 593 * Otherwise allocate a buffer for them and copy
594 * them into it. Either way, set if_extents 594 * them into it. Either way, set if_extents
595 * to point at the extents. 595 * to point at the extents.
596 */ 596 */
597 STATIC int 597 STATIC int
598 xfs_iformat_extents( 598 xfs_iformat_extents(
599 xfs_inode_t *ip, 599 xfs_inode_t *ip,
600 xfs_dinode_t *dip, 600 xfs_dinode_t *dip,
601 int whichfork) 601 int whichfork)
602 { 602 {
603 xfs_bmbt_rec_t *ep, *dp; 603 xfs_bmbt_rec_t *ep, *dp;
604 xfs_ifork_t *ifp; 604 xfs_ifork_t *ifp;
605 int nex; 605 int nex;
606 int size; 606 int size;
607 int i; 607 int i;
608 608
609 ifp = XFS_IFORK_PTR(ip, whichfork); 609 ifp = XFS_IFORK_PTR(ip, whichfork);
610 nex = XFS_DFORK_NEXTENTS(dip, whichfork); 610 nex = XFS_DFORK_NEXTENTS(dip, whichfork);
611 size = nex * (uint)sizeof(xfs_bmbt_rec_t); 611 size = nex * (uint)sizeof(xfs_bmbt_rec_t);
612 612
613 /* 613 /*
614 * If the number of extents is unreasonable, then something 614 * If the number of extents is unreasonable, then something
615 * is wrong and we just bail out rather than crash in 615 * is wrong and we just bail out rather than crash in
616 * kmem_alloc() or memcpy() below. 616 * kmem_alloc() or memcpy() below.
617 */ 617 */
618 if (unlikely(size < 0 || size > XFS_DFORK_SIZE(dip, ip->i_mount, whichfork))) { 618 if (unlikely(size < 0 || size > XFS_DFORK_SIZE(dip, ip->i_mount, whichfork))) {
619 xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount, 619 xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount,
620 "corrupt inode %Lu ((a)extents = %d).", 620 "corrupt inode %Lu ((a)extents = %d).",
621 (unsigned long long) ip->i_ino, nex); 621 (unsigned long long) ip->i_ino, nex);
622 XFS_CORRUPTION_ERROR("xfs_iformat_extents(1)", XFS_ERRLEVEL_LOW, 622 XFS_CORRUPTION_ERROR("xfs_iformat_extents(1)", XFS_ERRLEVEL_LOW,
623 ip->i_mount, dip); 623 ip->i_mount, dip);
624 return XFS_ERROR(EFSCORRUPTED); 624 return XFS_ERROR(EFSCORRUPTED);
625 } 625 }
626 626
627 ifp->if_real_bytes = 0; 627 ifp->if_real_bytes = 0;
628 if (nex == 0) 628 if (nex == 0)
629 ifp->if_u1.if_extents = NULL; 629 ifp->if_u1.if_extents = NULL;
630 else if (nex <= XFS_INLINE_EXTS) 630 else if (nex <= XFS_INLINE_EXTS)
631 ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext; 631 ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext;
632 else 632 else
633 xfs_iext_add(ifp, 0, nex); 633 xfs_iext_add(ifp, 0, nex);
634 634
635 ifp->if_bytes = size; 635 ifp->if_bytes = size;
636 if (size) { 636 if (size) {
637 dp = (xfs_bmbt_rec_t *) XFS_DFORK_PTR(dip, whichfork); 637 dp = (xfs_bmbt_rec_t *) XFS_DFORK_PTR(dip, whichfork);
638 xfs_validate_extents(ifp, nex, 1, XFS_EXTFMT_INODE(ip)); 638 xfs_validate_extents(ifp, nex, 1, XFS_EXTFMT_INODE(ip));
639 for (i = 0; i < nex; i++, dp++) { 639 for (i = 0; i < nex; i++, dp++) {
640 ep = xfs_iext_get_ext(ifp, i); 640 ep = xfs_iext_get_ext(ifp, i);
641 ep->l0 = INT_GET(get_unaligned((__uint64_t*)&dp->l0), 641 ep->l0 = INT_GET(get_unaligned((__uint64_t*)&dp->l0),
642 ARCH_CONVERT); 642 ARCH_CONVERT);
643 ep->l1 = INT_GET(get_unaligned((__uint64_t*)&dp->l1), 643 ep->l1 = INT_GET(get_unaligned((__uint64_t*)&dp->l1),
644 ARCH_CONVERT); 644 ARCH_CONVERT);
645 } 645 }
646 xfs_bmap_trace_exlist("xfs_iformat_extents", ip, nex, 646 xfs_bmap_trace_exlist("xfs_iformat_extents", ip, nex,
647 whichfork); 647 whichfork);
648 if (whichfork != XFS_DATA_FORK || 648 if (whichfork != XFS_DATA_FORK ||
649 XFS_EXTFMT_INODE(ip) == XFS_EXTFMT_NOSTATE) 649 XFS_EXTFMT_INODE(ip) == XFS_EXTFMT_NOSTATE)
650 if (unlikely(xfs_check_nostate_extents( 650 if (unlikely(xfs_check_nostate_extents(
651 ifp, 0, nex))) { 651 ifp, 0, nex))) {
652 XFS_ERROR_REPORT("xfs_iformat_extents(2)", 652 XFS_ERROR_REPORT("xfs_iformat_extents(2)",
653 XFS_ERRLEVEL_LOW, 653 XFS_ERRLEVEL_LOW,
654 ip->i_mount); 654 ip->i_mount);
655 return XFS_ERROR(EFSCORRUPTED); 655 return XFS_ERROR(EFSCORRUPTED);
656 } 656 }
657 } 657 }
658 ifp->if_flags |= XFS_IFEXTENTS; 658 ifp->if_flags |= XFS_IFEXTENTS;
659 return 0; 659 return 0;
660 } 660 }
661 661
662 /* 662 /*
663 * The file has too many extents to fit into 663 * The file has too many extents to fit into
664 * the inode, so they are in B-tree format. 664 * the inode, so they are in B-tree format.
665 * Allocate a buffer for the root of the B-tree 665 * Allocate a buffer for the root of the B-tree
666 * and copy the root into it. The i_extents 666 * and copy the root into it. The i_extents
667 * field will remain NULL until all of the 667 * field will remain NULL until all of the
668 * extents are read in (when they are needed). 668 * extents are read in (when they are needed).
669 */ 669 */
670 STATIC int 670 STATIC int
671 xfs_iformat_btree( 671 xfs_iformat_btree(
672 xfs_inode_t *ip, 672 xfs_inode_t *ip,
673 xfs_dinode_t *dip, 673 xfs_dinode_t *dip,
674 int whichfork) 674 int whichfork)
675 { 675 {
676 xfs_bmdr_block_t *dfp; 676 xfs_bmdr_block_t *dfp;
677 xfs_ifork_t *ifp; 677 xfs_ifork_t *ifp;
678 /* REFERENCED */ 678 /* REFERENCED */
679 int nrecs; 679 int nrecs;
680 int size; 680 int size;
681 681
682 ifp = XFS_IFORK_PTR(ip, whichfork); 682 ifp = XFS_IFORK_PTR(ip, whichfork);
683 dfp = (xfs_bmdr_block_t *)XFS_DFORK_PTR(dip, whichfork); 683 dfp = (xfs_bmdr_block_t *)XFS_DFORK_PTR(dip, whichfork);
684 size = XFS_BMAP_BROOT_SPACE(dfp); 684 size = XFS_BMAP_BROOT_SPACE(dfp);
685 nrecs = XFS_BMAP_BROOT_NUMRECS(dfp); 685 nrecs = XFS_BMAP_BROOT_NUMRECS(dfp);
686 686
687 /* 687 /*
688 * blow out if -- fork has less extents than can fit in 688 * blow out if -- fork has less extents than can fit in
689 * fork (fork shouldn't be a btree format), root btree 689 * fork (fork shouldn't be a btree format), root btree
690 * block has more records than can fit into the fork, 690 * block has more records than can fit into the fork,
691 * or the number of extents is greater than the number of 691 * or the number of extents is greater than the number of
692 * blocks. 692 * blocks.
693 */ 693 */
694 if (unlikely(XFS_IFORK_NEXTENTS(ip, whichfork) <= ifp->if_ext_max 694 if (unlikely(XFS_IFORK_NEXTENTS(ip, whichfork) <= ifp->if_ext_max
695 || XFS_BMDR_SPACE_CALC(nrecs) > 695 || XFS_BMDR_SPACE_CALC(nrecs) >
696 XFS_DFORK_SIZE(dip, ip->i_mount, whichfork) 696 XFS_DFORK_SIZE(dip, ip->i_mount, whichfork)
697 || XFS_IFORK_NEXTENTS(ip, whichfork) > ip->i_d.di_nblocks)) { 697 || XFS_IFORK_NEXTENTS(ip, whichfork) > ip->i_d.di_nblocks)) {
698 xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount, 698 xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount,
699 "corrupt inode %Lu (btree).", 699 "corrupt inode %Lu (btree).",
700 (unsigned long long) ip->i_ino); 700 (unsigned long long) ip->i_ino);
701 XFS_ERROR_REPORT("xfs_iformat_btree", XFS_ERRLEVEL_LOW, 701 XFS_ERROR_REPORT("xfs_iformat_btree", XFS_ERRLEVEL_LOW,
702 ip->i_mount); 702 ip->i_mount);
703 return XFS_ERROR(EFSCORRUPTED); 703 return XFS_ERROR(EFSCORRUPTED);
704 } 704 }
705 705
706 ifp->if_broot_bytes = size; 706 ifp->if_broot_bytes = size;
707 ifp->if_broot = kmem_alloc(size, KM_SLEEP); 707 ifp->if_broot = kmem_alloc(size, KM_SLEEP);
708 ASSERT(ifp->if_broot != NULL); 708 ASSERT(ifp->if_broot != NULL);
709 /* 709 /*
710 * Copy and convert from the on-disk structure 710 * Copy and convert from the on-disk structure
711 * to the in-memory structure. 711 * to the in-memory structure.
712 */ 712 */
713 xfs_bmdr_to_bmbt(dfp, XFS_DFORK_SIZE(dip, ip->i_mount, whichfork), 713 xfs_bmdr_to_bmbt(dfp, XFS_DFORK_SIZE(dip, ip->i_mount, whichfork),
714 ifp->if_broot, size); 714 ifp->if_broot, size);
715 ifp->if_flags &= ~XFS_IFEXTENTS; 715 ifp->if_flags &= ~XFS_IFEXTENTS;
716 ifp->if_flags |= XFS_IFBROOT; 716 ifp->if_flags |= XFS_IFBROOT;
717 717
718 return 0; 718 return 0;
719 } 719 }
720 720
721 /* 721 /*
722 * xfs_xlate_dinode_core - translate an xfs_inode_core_t between ondisk 722 * xfs_xlate_dinode_core - translate an xfs_inode_core_t between ondisk
723 * and native format 723 * and native format
724 * 724 *
725 * buf = on-disk representation 725 * buf = on-disk representation
726 * dip = native representation 726 * dip = native representation
727 * dir = direction - +ve -> disk to native 727 * dir = direction - +ve -> disk to native
728 * -ve -> native to disk 728 * -ve -> native to disk
729 */ 729 */
730 void 730 void
731 xfs_xlate_dinode_core( 731 xfs_xlate_dinode_core(
732 xfs_caddr_t buf, 732 xfs_caddr_t buf,
733 xfs_dinode_core_t *dip, 733 xfs_dinode_core_t *dip,
734 int dir) 734 int dir)
735 { 735 {
736 xfs_dinode_core_t *buf_core = (xfs_dinode_core_t *)buf; 736 xfs_dinode_core_t *buf_core = (xfs_dinode_core_t *)buf;
737 xfs_dinode_core_t *mem_core = (xfs_dinode_core_t *)dip; 737 xfs_dinode_core_t *mem_core = (xfs_dinode_core_t *)dip;
738 xfs_arch_t arch = ARCH_CONVERT; 738 xfs_arch_t arch = ARCH_CONVERT;
739 739
740 ASSERT(dir); 740 ASSERT(dir);
741 741
742 INT_XLATE(buf_core->di_magic, mem_core->di_magic, dir, arch); 742 INT_XLATE(buf_core->di_magic, mem_core->di_magic, dir, arch);
743 INT_XLATE(buf_core->di_mode, mem_core->di_mode, dir, arch); 743 INT_XLATE(buf_core->di_mode, mem_core->di_mode, dir, arch);
744 INT_XLATE(buf_core->di_version, mem_core->di_version, dir, arch); 744 INT_XLATE(buf_core->di_version, mem_core->di_version, dir, arch);
745 INT_XLATE(buf_core->di_format, mem_core->di_format, dir, arch); 745 INT_XLATE(buf_core->di_format, mem_core->di_format, dir, arch);
746 INT_XLATE(buf_core->di_onlink, mem_core->di_onlink, dir, arch); 746 INT_XLATE(buf_core->di_onlink, mem_core->di_onlink, dir, arch);
747 INT_XLATE(buf_core->di_uid, mem_core->di_uid, dir, arch); 747 INT_XLATE(buf_core->di_uid, mem_core->di_uid, dir, arch);
748 INT_XLATE(buf_core->di_gid, mem_core->di_gid, dir, arch); 748 INT_XLATE(buf_core->di_gid, mem_core->di_gid, dir, arch);
749 INT_XLATE(buf_core->di_nlink, mem_core->di_nlink, dir, arch); 749 INT_XLATE(buf_core->di_nlink, mem_core->di_nlink, dir, arch);
750 INT_XLATE(buf_core->di_projid, mem_core->di_projid, dir, arch); 750 INT_XLATE(buf_core->di_projid, mem_core->di_projid, dir, arch);
751 751
752 if (dir > 0) { 752 if (dir > 0) {
753 memcpy(mem_core->di_pad, buf_core->di_pad, 753 memcpy(mem_core->di_pad, buf_core->di_pad,
754 sizeof(buf_core->di_pad)); 754 sizeof(buf_core->di_pad));
755 } else { 755 } else {
756 memcpy(buf_core->di_pad, mem_core->di_pad, 756 memcpy(buf_core->di_pad, mem_core->di_pad,
757 sizeof(buf_core->di_pad)); 757 sizeof(buf_core->di_pad));
758 } 758 }
759 759
760 INT_XLATE(buf_core->di_flushiter, mem_core->di_flushiter, dir, arch); 760 INT_XLATE(buf_core->di_flushiter, mem_core->di_flushiter, dir, arch);
761 761
762 INT_XLATE(buf_core->di_atime.t_sec, mem_core->di_atime.t_sec, 762 INT_XLATE(buf_core->di_atime.t_sec, mem_core->di_atime.t_sec,
763 dir, arch); 763 dir, arch);
764 INT_XLATE(buf_core->di_atime.t_nsec, mem_core->di_atime.t_nsec, 764 INT_XLATE(buf_core->di_atime.t_nsec, mem_core->di_atime.t_nsec,
765 dir, arch); 765 dir, arch);
766 INT_XLATE(buf_core->di_mtime.t_sec, mem_core->di_mtime.t_sec, 766 INT_XLATE(buf_core->di_mtime.t_sec, mem_core->di_mtime.t_sec,
767 dir, arch); 767 dir, arch);
768 INT_XLATE(buf_core->di_mtime.t_nsec, mem_core->di_mtime.t_nsec, 768 INT_XLATE(buf_core->di_mtime.t_nsec, mem_core->di_mtime.t_nsec,
769 dir, arch); 769 dir, arch);
770 INT_XLATE(buf_core->di_ctime.t_sec, mem_core->di_ctime.t_sec, 770 INT_XLATE(buf_core->di_ctime.t_sec, mem_core->di_ctime.t_sec,
771 dir, arch); 771 dir, arch);
772 INT_XLATE(buf_core->di_ctime.t_nsec, mem_core->di_ctime.t_nsec, 772 INT_XLATE(buf_core->di_ctime.t_nsec, mem_core->di_ctime.t_nsec,
773 dir, arch); 773 dir, arch);
774 INT_XLATE(buf_core->di_size, mem_core->di_size, dir, arch); 774 INT_XLATE(buf_core->di_size, mem_core->di_size, dir, arch);
775 INT_XLATE(buf_core->di_nblocks, mem_core->di_nblocks, dir, arch); 775 INT_XLATE(buf_core->di_nblocks, mem_core->di_nblocks, dir, arch);
776 INT_XLATE(buf_core->di_extsize, mem_core->di_extsize, dir, arch); 776 INT_XLATE(buf_core->di_extsize, mem_core->di_extsize, dir, arch);
777 INT_XLATE(buf_core->di_nextents, mem_core->di_nextents, dir, arch); 777 INT_XLATE(buf_core->di_nextents, mem_core->di_nextents, dir, arch);
778 INT_XLATE(buf_core->di_anextents, mem_core->di_anextents, dir, arch); 778 INT_XLATE(buf_core->di_anextents, mem_core->di_anextents, dir, arch);
779 INT_XLATE(buf_core->di_forkoff, mem_core->di_forkoff, dir, arch); 779 INT_XLATE(buf_core->di_forkoff, mem_core->di_forkoff, dir, arch);
780 INT_XLATE(buf_core->di_aformat, mem_core->di_aformat, dir, arch); 780 INT_XLATE(buf_core->di_aformat, mem_core->di_aformat, dir, arch);
781 INT_XLATE(buf_core->di_dmevmask, mem_core->di_dmevmask, dir, arch); 781 INT_XLATE(buf_core->di_dmevmask, mem_core->di_dmevmask, dir, arch);
782 INT_XLATE(buf_core->di_dmstate, mem_core->di_dmstate, dir, arch); 782 INT_XLATE(buf_core->di_dmstate, mem_core->di_dmstate, dir, arch);
783 INT_XLATE(buf_core->di_flags, mem_core->di_flags, dir, arch); 783 INT_XLATE(buf_core->di_flags, mem_core->di_flags, dir, arch);
784 INT_XLATE(buf_core->di_gen, mem_core->di_gen, dir, arch); 784 INT_XLATE(buf_core->di_gen, mem_core->di_gen, dir, arch);
785 } 785 }
786 786
787 STATIC uint 787 STATIC uint
788 _xfs_dic2xflags( 788 _xfs_dic2xflags(
789 __uint16_t di_flags) 789 __uint16_t di_flags)
790 { 790 {
791 uint flags = 0; 791 uint flags = 0;
792 792
793 if (di_flags & XFS_DIFLAG_ANY) { 793 if (di_flags & XFS_DIFLAG_ANY) {
794 if (di_flags & XFS_DIFLAG_REALTIME) 794 if (di_flags & XFS_DIFLAG_REALTIME)
795 flags |= XFS_XFLAG_REALTIME; 795 flags |= XFS_XFLAG_REALTIME;
796 if (di_flags & XFS_DIFLAG_PREALLOC) 796 if (di_flags & XFS_DIFLAG_PREALLOC)
797 flags |= XFS_XFLAG_PREALLOC; 797 flags |= XFS_XFLAG_PREALLOC;
798 if (di_flags & XFS_DIFLAG_IMMUTABLE) 798 if (di_flags & XFS_DIFLAG_IMMUTABLE)
799 flags |= XFS_XFLAG_IMMUTABLE; 799 flags |= XFS_XFLAG_IMMUTABLE;
800 if (di_flags & XFS_DIFLAG_APPEND) 800 if (di_flags & XFS_DIFLAG_APPEND)
801 flags |= XFS_XFLAG_APPEND; 801 flags |= XFS_XFLAG_APPEND;
802 if (di_flags & XFS_DIFLAG_SYNC) 802 if (di_flags & XFS_DIFLAG_SYNC)
803 flags |= XFS_XFLAG_SYNC; 803 flags |= XFS_XFLAG_SYNC;
804 if (di_flags & XFS_DIFLAG_NOATIME) 804 if (di_flags & XFS_DIFLAG_NOATIME)
805 flags |= XFS_XFLAG_NOATIME; 805 flags |= XFS_XFLAG_NOATIME;
806 if (di_flags & XFS_DIFLAG_NODUMP) 806 if (di_flags & XFS_DIFLAG_NODUMP)
807 flags |= XFS_XFLAG_NODUMP; 807 flags |= XFS_XFLAG_NODUMP;
808 if (di_flags & XFS_DIFLAG_RTINHERIT) 808 if (di_flags & XFS_DIFLAG_RTINHERIT)
809 flags |= XFS_XFLAG_RTINHERIT; 809 flags |= XFS_XFLAG_RTINHERIT;
810 if (di_flags & XFS_DIFLAG_PROJINHERIT) 810 if (di_flags & XFS_DIFLAG_PROJINHERIT)
811 flags |= XFS_XFLAG_PROJINHERIT; 811 flags |= XFS_XFLAG_PROJINHERIT;
812 if (di_flags & XFS_DIFLAG_NOSYMLINKS) 812 if (di_flags & XFS_DIFLAG_NOSYMLINKS)
813 flags |= XFS_XFLAG_NOSYMLINKS; 813 flags |= XFS_XFLAG_NOSYMLINKS;
814 if (di_flags & XFS_DIFLAG_EXTSIZE) 814 if (di_flags & XFS_DIFLAG_EXTSIZE)
815 flags |= XFS_XFLAG_EXTSIZE; 815 flags |= XFS_XFLAG_EXTSIZE;
816 if (di_flags & XFS_DIFLAG_EXTSZINHERIT) 816 if (di_flags & XFS_DIFLAG_EXTSZINHERIT)
817 flags |= XFS_XFLAG_EXTSZINHERIT; 817 flags |= XFS_XFLAG_EXTSZINHERIT;
818 if (di_flags & XFS_DIFLAG_NODEFRAG) 818 if (di_flags & XFS_DIFLAG_NODEFRAG)
819 flags |= XFS_XFLAG_NODEFRAG; 819 flags |= XFS_XFLAG_NODEFRAG;
820 } 820 }
821 821
822 return flags; 822 return flags;
823 } 823 }
824 824
825 uint 825 uint
826 xfs_ip2xflags( 826 xfs_ip2xflags(
827 xfs_inode_t *ip) 827 xfs_inode_t *ip)
828 { 828 {
829 xfs_dinode_core_t *dic = &ip->i_d; 829 xfs_dinode_core_t *dic = &ip->i_d;
830 830
831 return _xfs_dic2xflags(dic->di_flags) | 831 return _xfs_dic2xflags(dic->di_flags) |
832 (XFS_CFORK_Q(dic) ? XFS_XFLAG_HASATTR : 0); 832 (XFS_CFORK_Q(dic) ? XFS_XFLAG_HASATTR : 0);
833 } 833 }
834 834
835 uint 835 uint
836 xfs_dic2xflags( 836 xfs_dic2xflags(
837 xfs_dinode_core_t *dic) 837 xfs_dinode_core_t *dic)
838 { 838 {
839 return _xfs_dic2xflags(INT_GET(dic->di_flags, ARCH_CONVERT)) | 839 return _xfs_dic2xflags(INT_GET(dic->di_flags, ARCH_CONVERT)) |
840 (XFS_CFORK_Q_DISK(dic) ? XFS_XFLAG_HASATTR : 0); 840 (XFS_CFORK_Q_DISK(dic) ? XFS_XFLAG_HASATTR : 0);
841 } 841 }
842 842
843 /* 843 /*
844 * Given a mount structure and an inode number, return a pointer 844 * Given a mount structure and an inode number, return a pointer
845 * to a newly allocated in-core inode corresponding to the given 845 * to a newly allocated in-core inode corresponding to the given
846 * inode number. 846 * inode number.
847 * 847 *
848 * Initialize the inode's attributes and extent pointers if it 848 * Initialize the inode's attributes and extent pointers if it
849 * already has them (it will not if the inode has no links). 849 * already has them (it will not if the inode has no links).
850 */ 850 */
851 int 851 int
852 xfs_iread( 852 xfs_iread(
853 xfs_mount_t *mp, 853 xfs_mount_t *mp,
854 xfs_trans_t *tp, 854 xfs_trans_t *tp,
855 xfs_ino_t ino, 855 xfs_ino_t ino,
856 xfs_inode_t **ipp, 856 xfs_inode_t **ipp,
857 xfs_daddr_t bno) 857 xfs_daddr_t bno,
858 uint imap_flags)
858 { 859 {
859 xfs_buf_t *bp; 860 xfs_buf_t *bp;
860 xfs_dinode_t *dip; 861 xfs_dinode_t *dip;
861 xfs_inode_t *ip; 862 xfs_inode_t *ip;
862 int error; 863 int error;
863 864
864 ASSERT(xfs_inode_zone != NULL); 865 ASSERT(xfs_inode_zone != NULL);
865 866
866 ip = kmem_zone_zalloc(xfs_inode_zone, KM_SLEEP); 867 ip = kmem_zone_zalloc(xfs_inode_zone, KM_SLEEP);
867 ip->i_ino = ino; 868 ip->i_ino = ino;
868 ip->i_mount = mp; 869 ip->i_mount = mp;
869 870
870 /* 871 /*
871 * Get pointer's to the on-disk inode and the buffer containing it. 872 * Get pointer's to the on-disk inode and the buffer containing it.
872 * If the inode number refers to a block outside the file system 873 * If the inode number refers to a block outside the file system
873 * then xfs_itobp() will return NULL. In this case we should 874 * then xfs_itobp() will return NULL. In this case we should
874 * return NULL as well. Set i_blkno to 0 so that xfs_itobp() will 875 * return NULL as well. Set i_blkno to 0 so that xfs_itobp() will
875 * know that this is a new incore inode. 876 * know that this is a new incore inode.
876 */ 877 */
877 error = xfs_itobp(mp, tp, ip, &dip, &bp, bno, 0); 878 error = xfs_itobp(mp, tp, ip, &dip, &bp, bno, imap_flags);
878 if (error) { 879 if (error) {
879 kmem_zone_free(xfs_inode_zone, ip); 880 kmem_zone_free(xfs_inode_zone, ip);
880 return error; 881 return error;
881 } 882 }
882 883
883 /* 884 /*
884 * Initialize inode's trace buffers. 885 * Initialize inode's trace buffers.
885 * Do this before xfs_iformat in case it adds entries. 886 * Do this before xfs_iformat in case it adds entries.
886 */ 887 */
887 #ifdef XFS_BMAP_TRACE 888 #ifdef XFS_BMAP_TRACE
888 ip->i_xtrace = ktrace_alloc(XFS_BMAP_KTRACE_SIZE, KM_SLEEP); 889 ip->i_xtrace = ktrace_alloc(XFS_BMAP_KTRACE_SIZE, KM_SLEEP);
889 #endif 890 #endif
890 #ifdef XFS_BMBT_TRACE 891 #ifdef XFS_BMBT_TRACE
891 ip->i_btrace = ktrace_alloc(XFS_BMBT_KTRACE_SIZE, KM_SLEEP); 892 ip->i_btrace = ktrace_alloc(XFS_BMBT_KTRACE_SIZE, KM_SLEEP);
892 #endif 893 #endif
893 #ifdef XFS_RW_TRACE 894 #ifdef XFS_RW_TRACE
894 ip->i_rwtrace = ktrace_alloc(XFS_RW_KTRACE_SIZE, KM_SLEEP); 895 ip->i_rwtrace = ktrace_alloc(XFS_RW_KTRACE_SIZE, KM_SLEEP);
895 #endif 896 #endif
896 #ifdef XFS_ILOCK_TRACE 897 #ifdef XFS_ILOCK_TRACE
897 ip->i_lock_trace = ktrace_alloc(XFS_ILOCK_KTRACE_SIZE, KM_SLEEP); 898 ip->i_lock_trace = ktrace_alloc(XFS_ILOCK_KTRACE_SIZE, KM_SLEEP);
898 #endif 899 #endif
899 #ifdef XFS_DIR2_TRACE 900 #ifdef XFS_DIR2_TRACE
900 ip->i_dir_trace = ktrace_alloc(XFS_DIR2_KTRACE_SIZE, KM_SLEEP); 901 ip->i_dir_trace = ktrace_alloc(XFS_DIR2_KTRACE_SIZE, KM_SLEEP);
901 #endif 902 #endif
902 903
903 /* 904 /*
904 * If we got something that isn't an inode it means someone 905 * If we got something that isn't an inode it means someone
905 * (nfs or dmi) has a stale handle. 906 * (nfs or dmi) has a stale handle.
906 */ 907 */
907 if (INT_GET(dip->di_core.di_magic, ARCH_CONVERT) != XFS_DINODE_MAGIC) { 908 if (INT_GET(dip->di_core.di_magic, ARCH_CONVERT) != XFS_DINODE_MAGIC) {
908 kmem_zone_free(xfs_inode_zone, ip); 909 kmem_zone_free(xfs_inode_zone, ip);
909 xfs_trans_brelse(tp, bp); 910 xfs_trans_brelse(tp, bp);
910 #ifdef DEBUG 911 #ifdef DEBUG
911 xfs_fs_cmn_err(CE_ALERT, mp, "xfs_iread: " 912 xfs_fs_cmn_err(CE_ALERT, mp, "xfs_iread: "
912 "dip->di_core.di_magic (0x%x) != " 913 "dip->di_core.di_magic (0x%x) != "
913 "XFS_DINODE_MAGIC (0x%x)", 914 "XFS_DINODE_MAGIC (0x%x)",
914 INT_GET(dip->di_core.di_magic, ARCH_CONVERT), 915 INT_GET(dip->di_core.di_magic, ARCH_CONVERT),
915 XFS_DINODE_MAGIC); 916 XFS_DINODE_MAGIC);
916 #endif /* DEBUG */ 917 #endif /* DEBUG */
917 return XFS_ERROR(EINVAL); 918 return XFS_ERROR(EINVAL);
918 } 919 }
919 920
920 /* 921 /*
921 * If the on-disk inode is already linked to a directory 922 * If the on-disk inode is already linked to a directory
922 * entry, copy all of the inode into the in-core inode. 923 * entry, copy all of the inode into the in-core inode.
923 * xfs_iformat() handles copying in the inode format 924 * xfs_iformat() handles copying in the inode format
924 * specific information. 925 * specific information.
925 * Otherwise, just get the truly permanent information. 926 * Otherwise, just get the truly permanent information.
926 */ 927 */
927 if (dip->di_core.di_mode) { 928 if (dip->di_core.di_mode) {
928 xfs_xlate_dinode_core((xfs_caddr_t)&dip->di_core, 929 xfs_xlate_dinode_core((xfs_caddr_t)&dip->di_core,
929 &(ip->i_d), 1); 930 &(ip->i_d), 1);
930 error = xfs_iformat(ip, dip); 931 error = xfs_iformat(ip, dip);
931 if (error) { 932 if (error) {
932 kmem_zone_free(xfs_inode_zone, ip); 933 kmem_zone_free(xfs_inode_zone, ip);
933 xfs_trans_brelse(tp, bp); 934 xfs_trans_brelse(tp, bp);
934 #ifdef DEBUG 935 #ifdef DEBUG
935 xfs_fs_cmn_err(CE_ALERT, mp, "xfs_iread: " 936 xfs_fs_cmn_err(CE_ALERT, mp, "xfs_iread: "
936 "xfs_iformat() returned error %d", 937 "xfs_iformat() returned error %d",
937 error); 938 error);
938 #endif /* DEBUG */ 939 #endif /* DEBUG */
939 return error; 940 return error;
940 } 941 }
941 } else { 942 } else {
942 ip->i_d.di_magic = INT_GET(dip->di_core.di_magic, ARCH_CONVERT); 943 ip->i_d.di_magic = INT_GET(dip->di_core.di_magic, ARCH_CONVERT);
943 ip->i_d.di_version = INT_GET(dip->di_core.di_version, ARCH_CONVERT); 944 ip->i_d.di_version = INT_GET(dip->di_core.di_version, ARCH_CONVERT);
944 ip->i_d.di_gen = INT_GET(dip->di_core.di_gen, ARCH_CONVERT); 945 ip->i_d.di_gen = INT_GET(dip->di_core.di_gen, ARCH_CONVERT);
945 ip->i_d.di_flushiter = INT_GET(dip->di_core.di_flushiter, ARCH_CONVERT); 946 ip->i_d.di_flushiter = INT_GET(dip->di_core.di_flushiter, ARCH_CONVERT);
946 /* 947 /*
947 * Make sure to pull in the mode here as well in 948 * Make sure to pull in the mode here as well in
948 * case the inode is released without being used. 949 * case the inode is released without being used.
949 * This ensures that xfs_inactive() will see that 950 * This ensures that xfs_inactive() will see that
950 * the inode is already free and not try to mess 951 * the inode is already free and not try to mess
951 * with the uninitialized part of it. 952 * with the uninitialized part of it.
952 */ 953 */
953 ip->i_d.di_mode = 0; 954 ip->i_d.di_mode = 0;
954 /* 955 /*
955 * Initialize the per-fork minima and maxima for a new 956 * Initialize the per-fork minima and maxima for a new
956 * inode here. xfs_iformat will do it for old inodes. 957 * inode here. xfs_iformat will do it for old inodes.
957 */ 958 */
958 ip->i_df.if_ext_max = 959 ip->i_df.if_ext_max =
959 XFS_IFORK_DSIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t); 960 XFS_IFORK_DSIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t);
960 } 961 }
961 962
962 INIT_LIST_HEAD(&ip->i_reclaim); 963 INIT_LIST_HEAD(&ip->i_reclaim);
963 964
964 /* 965 /*
965 * The inode format changed when we moved the link count and 966 * The inode format changed when we moved the link count and
966 * made it 32 bits long. If this is an old format inode, 967 * made it 32 bits long. If this is an old format inode,
967 * convert it in memory to look like a new one. If it gets 968 * convert it in memory to look like a new one. If it gets
968 * flushed to disk we will convert back before flushing or 969 * flushed to disk we will convert back before flushing or
969 * logging it. We zero out the new projid field and the old link 970 * logging it. We zero out the new projid field and the old link
970 * count field. We'll handle clearing the pad field (the remains 971 * count field. We'll handle clearing the pad field (the remains
971 * of the old uuid field) when we actually convert the inode to 972 * of the old uuid field) when we actually convert the inode to
972 * the new format. We don't change the version number so that we 973 * the new format. We don't change the version number so that we
973 * can distinguish this from a real new format inode. 974 * can distinguish this from a real new format inode.
974 */ 975 */
975 if (ip->i_d.di_version == XFS_DINODE_VERSION_1) { 976 if (ip->i_d.di_version == XFS_DINODE_VERSION_1) {
976 ip->i_d.di_nlink = ip->i_d.di_onlink; 977 ip->i_d.di_nlink = ip->i_d.di_onlink;
977 ip->i_d.di_onlink = 0; 978 ip->i_d.di_onlink = 0;
978 ip->i_d.di_projid = 0; 979 ip->i_d.di_projid = 0;
979 } 980 }
980 981
981 ip->i_delayed_blks = 0; 982 ip->i_delayed_blks = 0;
982 983
983 /* 984 /*
984 * Mark the buffer containing the inode as something to keep 985 * Mark the buffer containing the inode as something to keep
985 * around for a while. This helps to keep recently accessed 986 * around for a while. This helps to keep recently accessed
986 * meta-data in-core longer. 987 * meta-data in-core longer.
987 */ 988 */
988 XFS_BUF_SET_REF(bp, XFS_INO_REF); 989 XFS_BUF_SET_REF(bp, XFS_INO_REF);
989 990
990 /* 991 /*
991 * Use xfs_trans_brelse() to release the buffer containing the 992 * Use xfs_trans_brelse() to release the buffer containing the
992 * on-disk inode, because it was acquired with xfs_trans_read_buf() 993 * on-disk inode, because it was acquired with xfs_trans_read_buf()
993 * in xfs_itobp() above. If tp is NULL, this is just a normal 994 * in xfs_itobp() above. If tp is NULL, this is just a normal
994 * brelse(). If we're within a transaction, then xfs_trans_brelse() 995 * brelse(). If we're within a transaction, then xfs_trans_brelse()
995 * will only release the buffer if it is not dirty within the 996 * will only release the buffer if it is not dirty within the
996 * transaction. It will be OK to release the buffer in this case, 997 * transaction. It will be OK to release the buffer in this case,
997 * because inodes on disk are never destroyed and we will be 998 * because inodes on disk are never destroyed and we will be
998 * locking the new in-core inode before putting it in the hash 999 * locking the new in-core inode before putting it in the hash
999 * table where other processes can find it. Thus we don't have 1000 * table where other processes can find it. Thus we don't have
1000 * to worry about the inode being changed just because we released 1001 * to worry about the inode being changed just because we released
1001 * the buffer. 1002 * the buffer.
1002 */ 1003 */
1003 xfs_trans_brelse(tp, bp); 1004 xfs_trans_brelse(tp, bp);
1004 *ipp = ip; 1005 *ipp = ip;
1005 return 0; 1006 return 0;
1006 } 1007 }
1007 1008
1008 /* 1009 /*
1009 * Read in extents from a btree-format inode. 1010 * Read in extents from a btree-format inode.
1010 * Allocate and fill in if_extents. Real work is done in xfs_bmap.c. 1011 * Allocate and fill in if_extents. Real work is done in xfs_bmap.c.
1011 */ 1012 */
1012 int 1013 int
1013 xfs_iread_extents( 1014 xfs_iread_extents(
1014 xfs_trans_t *tp, 1015 xfs_trans_t *tp,
1015 xfs_inode_t *ip, 1016 xfs_inode_t *ip,
1016 int whichfork) 1017 int whichfork)
1017 { 1018 {
1018 int error; 1019 int error;
1019 xfs_ifork_t *ifp; 1020 xfs_ifork_t *ifp;
1020 xfs_extnum_t nextents; 1021 xfs_extnum_t nextents;
1021 size_t size; 1022 size_t size;
1022 1023
1023 if (unlikely(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE)) { 1024 if (unlikely(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE)) {
1024 XFS_ERROR_REPORT("xfs_iread_extents", XFS_ERRLEVEL_LOW, 1025 XFS_ERROR_REPORT("xfs_iread_extents", XFS_ERRLEVEL_LOW,
1025 ip->i_mount); 1026 ip->i_mount);
1026 return XFS_ERROR(EFSCORRUPTED); 1027 return XFS_ERROR(EFSCORRUPTED);
1027 } 1028 }
1028 nextents = XFS_IFORK_NEXTENTS(ip, whichfork); 1029 nextents = XFS_IFORK_NEXTENTS(ip, whichfork);
1029 size = nextents * sizeof(xfs_bmbt_rec_t); 1030 size = nextents * sizeof(xfs_bmbt_rec_t);
1030 ifp = XFS_IFORK_PTR(ip, whichfork); 1031 ifp = XFS_IFORK_PTR(ip, whichfork);
1031 1032
1032 /* 1033 /*
1033 * We know that the size is valid (it's checked in iformat_btree) 1034 * We know that the size is valid (it's checked in iformat_btree)
1034 */ 1035 */
1035 ifp->if_lastex = NULLEXTNUM; 1036 ifp->if_lastex = NULLEXTNUM;
1036 ifp->if_bytes = ifp->if_real_bytes = 0; 1037 ifp->if_bytes = ifp->if_real_bytes = 0;
1037 ifp->if_flags |= XFS_IFEXTENTS; 1038 ifp->if_flags |= XFS_IFEXTENTS;
1038 xfs_iext_add(ifp, 0, nextents); 1039 xfs_iext_add(ifp, 0, nextents);
1039 error = xfs_bmap_read_extents(tp, ip, whichfork); 1040 error = xfs_bmap_read_extents(tp, ip, whichfork);
1040 if (error) { 1041 if (error) {
1041 xfs_iext_destroy(ifp); 1042 xfs_iext_destroy(ifp);
1042 ifp->if_flags &= ~XFS_IFEXTENTS; 1043 ifp->if_flags &= ~XFS_IFEXTENTS;
1043 return error; 1044 return error;
1044 } 1045 }
1045 xfs_validate_extents(ifp, nextents, 0, XFS_EXTFMT_INODE(ip)); 1046 xfs_validate_extents(ifp, nextents, 0, XFS_EXTFMT_INODE(ip));
1046 return 0; 1047 return 0;
1047 } 1048 }
1048 1049
1049 /* 1050 /*
1050 * Allocate an inode on disk and return a copy of its in-core version. 1051 * Allocate an inode on disk and return a copy of its in-core version.
1051 * The in-core inode is locked exclusively. Set mode, nlink, and rdev 1052 * The in-core inode is locked exclusively. Set mode, nlink, and rdev
1052 * appropriately within the inode. The uid and gid for the inode are 1053 * appropriately within the inode. The uid and gid for the inode are
1053 * set according to the contents of the given cred structure. 1054 * set according to the contents of the given cred structure.
1054 * 1055 *
1055 * Use xfs_dialloc() to allocate the on-disk inode. If xfs_dialloc() 1056 * Use xfs_dialloc() to allocate the on-disk inode. If xfs_dialloc()
1056 * has a free inode available, call xfs_iget() 1057 * has a free inode available, call xfs_iget()
1057 * to obtain the in-core version of the allocated inode. Finally, 1058 * to obtain the in-core version of the allocated inode. Finally,
1058 * fill in the inode and log its initial contents. In this case, 1059 * fill in the inode and log its initial contents. In this case,
1059 * ialloc_context would be set to NULL and call_again set to false. 1060 * ialloc_context would be set to NULL and call_again set to false.
1060 * 1061 *
1061 * If xfs_dialloc() does not have an available inode, 1062 * If xfs_dialloc() does not have an available inode,
1062 * it will replenish its supply by doing an allocation. Since we can 1063 * it will replenish its supply by doing an allocation. Since we can
1063 * only do one allocation within a transaction without deadlocks, we 1064 * only do one allocation within a transaction without deadlocks, we
1064 * must commit the current transaction before returning the inode itself. 1065 * must commit the current transaction before returning the inode itself.
1065 * In this case, therefore, we will set call_again to true and return. 1066 * In this case, therefore, we will set call_again to true and return.
1066 * The caller should then commit the current transaction, start a new 1067 * The caller should then commit the current transaction, start a new
1067 * transaction, and call xfs_ialloc() again to actually get the inode. 1068 * transaction, and call xfs_ialloc() again to actually get the inode.
1068 * 1069 *
1069 * To ensure that some other process does not grab the inode that 1070 * To ensure that some other process does not grab the inode that
1070 * was allocated during the first call to xfs_ialloc(), this routine 1071 * was allocated during the first call to xfs_ialloc(), this routine
1071 * also returns the [locked] bp pointing to the head of the freelist 1072 * also returns the [locked] bp pointing to the head of the freelist
1072 * as ialloc_context. The caller should hold this buffer across 1073 * as ialloc_context. The caller should hold this buffer across
1073 * the commit and pass it back into this routine on the second call. 1074 * the commit and pass it back into this routine on the second call.
1074 */ 1075 */
1075 int 1076 int
1076 xfs_ialloc( 1077 xfs_ialloc(
1077 xfs_trans_t *tp, 1078 xfs_trans_t *tp,
1078 xfs_inode_t *pip, 1079 xfs_inode_t *pip,
1079 mode_t mode, 1080 mode_t mode,
1080 xfs_nlink_t nlink, 1081 xfs_nlink_t nlink,
1081 xfs_dev_t rdev, 1082 xfs_dev_t rdev,
1082 cred_t *cr, 1083 cred_t *cr,
1083 xfs_prid_t prid, 1084 xfs_prid_t prid,
1084 int okalloc, 1085 int okalloc,
1085 xfs_buf_t **ialloc_context, 1086 xfs_buf_t **ialloc_context,
1086 boolean_t *call_again, 1087 boolean_t *call_again,
1087 xfs_inode_t **ipp) 1088 xfs_inode_t **ipp)
1088 { 1089 {
1089 xfs_ino_t ino; 1090 xfs_ino_t ino;
1090 xfs_inode_t *ip; 1091 xfs_inode_t *ip;
1091 bhv_vnode_t *vp; 1092 bhv_vnode_t *vp;
1092 uint flags; 1093 uint flags;
1093 int error; 1094 int error;
1094 1095
1095 /* 1096 /*
1096 * Call the space management code to pick 1097 * Call the space management code to pick
1097 * the on-disk inode to be allocated. 1098 * the on-disk inode to be allocated.
1098 */ 1099 */
1099 error = xfs_dialloc(tp, pip->i_ino, mode, okalloc, 1100 error = xfs_dialloc(tp, pip->i_ino, mode, okalloc,
1100 ialloc_context, call_again, &ino); 1101 ialloc_context, call_again, &ino);
1101 if (error != 0) { 1102 if (error != 0) {
1102 return error; 1103 return error;
1103 } 1104 }
1104 if (*call_again || ino == NULLFSINO) { 1105 if (*call_again || ino == NULLFSINO) {
1105 *ipp = NULL; 1106 *ipp = NULL;
1106 return 0; 1107 return 0;
1107 } 1108 }
1108 ASSERT(*ialloc_context == NULL); 1109 ASSERT(*ialloc_context == NULL);
1109 1110
1110 /* 1111 /*
1111 * Get the in-core inode with the lock held exclusively. 1112 * Get the in-core inode with the lock held exclusively.
1112 * This is because we're setting fields here we need 1113 * This is because we're setting fields here we need
1113 * to prevent others from looking at until we're done. 1114 * to prevent others from looking at until we're done.
1114 */ 1115 */
1115 error = xfs_trans_iget(tp->t_mountp, tp, ino, 1116 error = xfs_trans_iget(tp->t_mountp, tp, ino,
1116 IGET_CREATE, XFS_ILOCK_EXCL, &ip); 1117 XFS_IGET_CREATE, XFS_ILOCK_EXCL, &ip);
1117 if (error != 0) { 1118 if (error != 0) {
1118 return error; 1119 return error;
1119 } 1120 }
1120 ASSERT(ip != NULL); 1121 ASSERT(ip != NULL);
1121 1122
1122 vp = XFS_ITOV(ip); 1123 vp = XFS_ITOV(ip);
1123 ip->i_d.di_mode = (__uint16_t)mode; 1124 ip->i_d.di_mode = (__uint16_t)mode;
1124 ip->i_d.di_onlink = 0; 1125 ip->i_d.di_onlink = 0;
1125 ip->i_d.di_nlink = nlink; 1126 ip->i_d.di_nlink = nlink;
1126 ASSERT(ip->i_d.di_nlink == nlink); 1127 ASSERT(ip->i_d.di_nlink == nlink);
1127 ip->i_d.di_uid = current_fsuid(cr); 1128 ip->i_d.di_uid = current_fsuid(cr);
1128 ip->i_d.di_gid = current_fsgid(cr); 1129 ip->i_d.di_gid = current_fsgid(cr);
1129 ip->i_d.di_projid = prid; 1130 ip->i_d.di_projid = prid;
1130 memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad)); 1131 memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad));
1131 1132
1132 /* 1133 /*
1133 * If the superblock version is up to where we support new format 1134 * If the superblock version is up to where we support new format
1134 * inodes and this is currently an old format inode, then change 1135 * inodes and this is currently an old format inode, then change
1135 * the inode version number now. This way we only do the conversion 1136 * the inode version number now. This way we only do the conversion
1136 * here rather than here and in the flush/logging code. 1137 * here rather than here and in the flush/logging code.
1137 */ 1138 */
1138 if (XFS_SB_VERSION_HASNLINK(&tp->t_mountp->m_sb) && 1139 if (XFS_SB_VERSION_HASNLINK(&tp->t_mountp->m_sb) &&
1139 ip->i_d.di_version == XFS_DINODE_VERSION_1) { 1140 ip->i_d.di_version == XFS_DINODE_VERSION_1) {
1140 ip->i_d.di_version = XFS_DINODE_VERSION_2; 1141 ip->i_d.di_version = XFS_DINODE_VERSION_2;
1141 /* 1142 /*
1142 * We've already zeroed the old link count, the projid field, 1143 * We've already zeroed the old link count, the projid field,
1143 * and the pad field. 1144 * and the pad field.
1144 */ 1145 */
1145 } 1146 }
1146 1147
1147 /* 1148 /*
1148 * Project ids won't be stored on disk if we are using a version 1 inode. 1149 * Project ids won't be stored on disk if we are using a version 1 inode.
1149 */ 1150 */
1150 if ( (prid != 0) && (ip->i_d.di_version == XFS_DINODE_VERSION_1)) 1151 if ( (prid != 0) && (ip->i_d.di_version == XFS_DINODE_VERSION_1))
1151 xfs_bump_ino_vers2(tp, ip); 1152 xfs_bump_ino_vers2(tp, ip);
1152 1153
1153 if (XFS_INHERIT_GID(pip, vp->v_vfsp)) { 1154 if (XFS_INHERIT_GID(pip, vp->v_vfsp)) {
1154 ip->i_d.di_gid = pip->i_d.di_gid; 1155 ip->i_d.di_gid = pip->i_d.di_gid;
1155 if ((pip->i_d.di_mode & S_ISGID) && (mode & S_IFMT) == S_IFDIR) { 1156 if ((pip->i_d.di_mode & S_ISGID) && (mode & S_IFMT) == S_IFDIR) {
1156 ip->i_d.di_mode |= S_ISGID; 1157 ip->i_d.di_mode |= S_ISGID;
1157 } 1158 }
1158 } 1159 }
1159 1160
1160 /* 1161 /*
1161 * If the group ID of the new file does not match the effective group 1162 * If the group ID of the new file does not match the effective group
1162 * ID or one of the supplementary group IDs, the S_ISGID bit is cleared 1163 * ID or one of the supplementary group IDs, the S_ISGID bit is cleared
1163 * (and only if the irix_sgid_inherit compatibility variable is set). 1164 * (and only if the irix_sgid_inherit compatibility variable is set).
1164 */ 1165 */
1165 if ((irix_sgid_inherit) && 1166 if ((irix_sgid_inherit) &&
1166 (ip->i_d.di_mode & S_ISGID) && 1167 (ip->i_d.di_mode & S_ISGID) &&
1167 (!in_group_p((gid_t)ip->i_d.di_gid))) { 1168 (!in_group_p((gid_t)ip->i_d.di_gid))) {
1168 ip->i_d.di_mode &= ~S_ISGID; 1169 ip->i_d.di_mode &= ~S_ISGID;
1169 } 1170 }
1170 1171
1171 ip->i_d.di_size = 0; 1172 ip->i_d.di_size = 0;
1172 ip->i_d.di_nextents = 0; 1173 ip->i_d.di_nextents = 0;
1173 ASSERT(ip->i_d.di_nblocks == 0); 1174 ASSERT(ip->i_d.di_nblocks == 0);
1174 xfs_ichgtime(ip, XFS_ICHGTIME_CHG|XFS_ICHGTIME_ACC|XFS_ICHGTIME_MOD); 1175 xfs_ichgtime(ip, XFS_ICHGTIME_CHG|XFS_ICHGTIME_ACC|XFS_ICHGTIME_MOD);
1175 /* 1176 /*
1176 * di_gen will have been taken care of in xfs_iread. 1177 * di_gen will have been taken care of in xfs_iread.
1177 */ 1178 */
1178 ip->i_d.di_extsize = 0; 1179 ip->i_d.di_extsize = 0;
1179 ip->i_d.di_dmevmask = 0; 1180 ip->i_d.di_dmevmask = 0;
1180 ip->i_d.di_dmstate = 0; 1181 ip->i_d.di_dmstate = 0;
1181 ip->i_d.di_flags = 0; 1182 ip->i_d.di_flags = 0;
1182 flags = XFS_ILOG_CORE; 1183 flags = XFS_ILOG_CORE;
1183 switch (mode & S_IFMT) { 1184 switch (mode & S_IFMT) {
1184 case S_IFIFO: 1185 case S_IFIFO:
1185 case S_IFCHR: 1186 case S_IFCHR:
1186 case S_IFBLK: 1187 case S_IFBLK:
1187 case S_IFSOCK: 1188 case S_IFSOCK:
1188 ip->i_d.di_format = XFS_DINODE_FMT_DEV; 1189 ip->i_d.di_format = XFS_DINODE_FMT_DEV;
1189 ip->i_df.if_u2.if_rdev = rdev; 1190 ip->i_df.if_u2.if_rdev = rdev;
1190 ip->i_df.if_flags = 0; 1191 ip->i_df.if_flags = 0;
1191 flags |= XFS_ILOG_DEV; 1192 flags |= XFS_ILOG_DEV;
1192 break; 1193 break;
1193 case S_IFREG: 1194 case S_IFREG:
1194 case S_IFDIR: 1195 case S_IFDIR:
1195 if (unlikely(pip->i_d.di_flags & XFS_DIFLAG_ANY)) { 1196 if (unlikely(pip->i_d.di_flags & XFS_DIFLAG_ANY)) {
1196 uint di_flags = 0; 1197 uint di_flags = 0;
1197 1198
1198 if ((mode & S_IFMT) == S_IFDIR) { 1199 if ((mode & S_IFMT) == S_IFDIR) {
1199 if (pip->i_d.di_flags & XFS_DIFLAG_RTINHERIT) 1200 if (pip->i_d.di_flags & XFS_DIFLAG_RTINHERIT)
1200 di_flags |= XFS_DIFLAG_RTINHERIT; 1201 di_flags |= XFS_DIFLAG_RTINHERIT;
1201 if (pip->i_d.di_flags & XFS_DIFLAG_EXTSZINHERIT) { 1202 if (pip->i_d.di_flags & XFS_DIFLAG_EXTSZINHERIT) {
1202 di_flags |= XFS_DIFLAG_EXTSZINHERIT; 1203 di_flags |= XFS_DIFLAG_EXTSZINHERIT;
1203 ip->i_d.di_extsize = pip->i_d.di_extsize; 1204 ip->i_d.di_extsize = pip->i_d.di_extsize;
1204 } 1205 }
1205 } else if ((mode & S_IFMT) == S_IFREG) { 1206 } else if ((mode & S_IFMT) == S_IFREG) {
1206 if (pip->i_d.di_flags & XFS_DIFLAG_RTINHERIT) { 1207 if (pip->i_d.di_flags & XFS_DIFLAG_RTINHERIT) {
1207 di_flags |= XFS_DIFLAG_REALTIME; 1208 di_flags |= XFS_DIFLAG_REALTIME;
1208 ip->i_iocore.io_flags |= XFS_IOCORE_RT; 1209 ip->i_iocore.io_flags |= XFS_IOCORE_RT;
1209 } 1210 }
1210 if (pip->i_d.di_flags & XFS_DIFLAG_EXTSZINHERIT) { 1211 if (pip->i_d.di_flags & XFS_DIFLAG_EXTSZINHERIT) {
1211 di_flags |= XFS_DIFLAG_EXTSIZE; 1212 di_flags |= XFS_DIFLAG_EXTSIZE;
1212 ip->i_d.di_extsize = pip->i_d.di_extsize; 1213 ip->i_d.di_extsize = pip->i_d.di_extsize;
1213 } 1214 }
1214 } 1215 }
1215 if ((pip->i_d.di_flags & XFS_DIFLAG_NOATIME) && 1216 if ((pip->i_d.di_flags & XFS_DIFLAG_NOATIME) &&
1216 xfs_inherit_noatime) 1217 xfs_inherit_noatime)
1217 di_flags |= XFS_DIFLAG_NOATIME; 1218 di_flags |= XFS_DIFLAG_NOATIME;
1218 if ((pip->i_d.di_flags & XFS_DIFLAG_NODUMP) && 1219 if ((pip->i_d.di_flags & XFS_DIFLAG_NODUMP) &&
1219 xfs_inherit_nodump) 1220 xfs_inherit_nodump)
1220 di_flags |= XFS_DIFLAG_NODUMP; 1221 di_flags |= XFS_DIFLAG_NODUMP;
1221 if ((pip->i_d.di_flags & XFS_DIFLAG_SYNC) && 1222 if ((pip->i_d.di_flags & XFS_DIFLAG_SYNC) &&
1222 xfs_inherit_sync) 1223 xfs_inherit_sync)
1223 di_flags |= XFS_DIFLAG_SYNC; 1224 di_flags |= XFS_DIFLAG_SYNC;
1224 if ((pip->i_d.di_flags & XFS_DIFLAG_NOSYMLINKS) && 1225 if ((pip->i_d.di_flags & XFS_DIFLAG_NOSYMLINKS) &&
1225 xfs_inherit_nosymlinks) 1226 xfs_inherit_nosymlinks)
1226 di_flags |= XFS_DIFLAG_NOSYMLINKS; 1227 di_flags |= XFS_DIFLAG_NOSYMLINKS;
1227 if (pip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) 1228 if (pip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT)
1228 di_flags |= XFS_DIFLAG_PROJINHERIT; 1229 di_flags |= XFS_DIFLAG_PROJINHERIT;
1229 if ((pip->i_d.di_flags & XFS_DIFLAG_NODEFRAG) && 1230 if ((pip->i_d.di_flags & XFS_DIFLAG_NODEFRAG) &&
1230 xfs_inherit_nodefrag) 1231 xfs_inherit_nodefrag)
1231 di_flags |= XFS_DIFLAG_NODEFRAG; 1232 di_flags |= XFS_DIFLAG_NODEFRAG;
1232 ip->i_d.di_flags |= di_flags; 1233 ip->i_d.di_flags |= di_flags;
1233 } 1234 }
1234 /* FALLTHROUGH */ 1235 /* FALLTHROUGH */
1235 case S_IFLNK: 1236 case S_IFLNK:
1236 ip->i_d.di_format = XFS_DINODE_FMT_EXTENTS; 1237 ip->i_d.di_format = XFS_DINODE_FMT_EXTENTS;
1237 ip->i_df.if_flags = XFS_IFEXTENTS; 1238 ip->i_df.if_flags = XFS_IFEXTENTS;
1238 ip->i_df.if_bytes = ip->i_df.if_real_bytes = 0; 1239 ip->i_df.if_bytes = ip->i_df.if_real_bytes = 0;
1239 ip->i_df.if_u1.if_extents = NULL; 1240 ip->i_df.if_u1.if_extents = NULL;
1240 break; 1241 break;
1241 default: 1242 default:
1242 ASSERT(0); 1243 ASSERT(0);
1243 } 1244 }
1244 /* 1245 /*
1245 * Attribute fork settings for new inode. 1246 * Attribute fork settings for new inode.
1246 */ 1247 */
1247 ip->i_d.di_aformat = XFS_DINODE_FMT_EXTENTS; 1248 ip->i_d.di_aformat = XFS_DINODE_FMT_EXTENTS;
1248 ip->i_d.di_anextents = 0; 1249 ip->i_d.di_anextents = 0;
1249 1250
1250 /* 1251 /*
1251 * Log the new values stuffed into the inode. 1252 * Log the new values stuffed into the inode.
1252 */ 1253 */
1253 xfs_trans_log_inode(tp, ip, flags); 1254 xfs_trans_log_inode(tp, ip, flags);
1254 1255
1255 /* now that we have an i_mode we can setup inode ops and unlock */ 1256 /* now that we have an i_mode we can setup inode ops and unlock */
1256 bhv_vfs_init_vnode(XFS_MTOVFS(tp->t_mountp), vp, XFS_ITOBHV(ip), 1); 1257 bhv_vfs_init_vnode(XFS_MTOVFS(tp->t_mountp), vp, XFS_ITOBHV(ip), 1);
1257 1258
1258 *ipp = ip; 1259 *ipp = ip;
1259 return 0; 1260 return 0;
1260 } 1261 }
1261 1262
1262 /* 1263 /*
1263 * Check to make sure that there are no blocks allocated to the 1264 * Check to make sure that there are no blocks allocated to the
1264 * file beyond the size of the file. We don't check this for 1265 * file beyond the size of the file. We don't check this for
1265 * files with fixed size extents or real time extents, but we 1266 * files with fixed size extents or real time extents, but we
1266 * at least do it for regular files. 1267 * at least do it for regular files.
1267 */ 1268 */
1268 #ifdef DEBUG 1269 #ifdef DEBUG
1269 void 1270 void
1270 xfs_isize_check( 1271 xfs_isize_check(
1271 xfs_mount_t *mp, 1272 xfs_mount_t *mp,
1272 xfs_inode_t *ip, 1273 xfs_inode_t *ip,
1273 xfs_fsize_t isize) 1274 xfs_fsize_t isize)
1274 { 1275 {
1275 xfs_fileoff_t map_first; 1276 xfs_fileoff_t map_first;
1276 int nimaps; 1277 int nimaps;
1277 xfs_bmbt_irec_t imaps[2]; 1278 xfs_bmbt_irec_t imaps[2];
1278 1279
1279 if ((ip->i_d.di_mode & S_IFMT) != S_IFREG) 1280 if ((ip->i_d.di_mode & S_IFMT) != S_IFREG)
1280 return; 1281 return;
1281 1282
1282 if (ip->i_d.di_flags & (XFS_DIFLAG_REALTIME | XFS_DIFLAG_EXTSIZE)) 1283 if (ip->i_d.di_flags & (XFS_DIFLAG_REALTIME | XFS_DIFLAG_EXTSIZE))
1283 return; 1284 return;
1284 1285
1285 nimaps = 2; 1286 nimaps = 2;
1286 map_first = XFS_B_TO_FSB(mp, (xfs_ufsize_t)isize); 1287 map_first = XFS_B_TO_FSB(mp, (xfs_ufsize_t)isize);
1287 /* 1288 /*
1288 * The filesystem could be shutting down, so bmapi may return 1289 * The filesystem could be shutting down, so bmapi may return
1289 * an error. 1290 * an error.
1290 */ 1291 */
1291 if (xfs_bmapi(NULL, ip, map_first, 1292 if (xfs_bmapi(NULL, ip, map_first,
1292 (XFS_B_TO_FSB(mp, 1293 (XFS_B_TO_FSB(mp,
1293 (xfs_ufsize_t)XFS_MAXIOFFSET(mp)) - 1294 (xfs_ufsize_t)XFS_MAXIOFFSET(mp)) -
1294 map_first), 1295 map_first),
1295 XFS_BMAPI_ENTIRE, NULL, 0, imaps, &nimaps, 1296 XFS_BMAPI_ENTIRE, NULL, 0, imaps, &nimaps,
1296 NULL, NULL)) 1297 NULL, NULL))
1297 return; 1298 return;
1298 ASSERT(nimaps == 1); 1299 ASSERT(nimaps == 1);
1299 ASSERT(imaps[0].br_startblock == HOLESTARTBLOCK); 1300 ASSERT(imaps[0].br_startblock == HOLESTARTBLOCK);
1300 } 1301 }
1301 #endif /* DEBUG */ 1302 #endif /* DEBUG */
1302 1303
1303 /* 1304 /*
1304 * Calculate the last possible buffered byte in a file. This must 1305 * Calculate the last possible buffered byte in a file. This must
1305 * include data that was buffered beyond the EOF by the write code. 1306 * include data that was buffered beyond the EOF by the write code.
1306 * This also needs to deal with overflowing the xfs_fsize_t type 1307 * This also needs to deal with overflowing the xfs_fsize_t type
1307 * which can happen for sizes near the limit. 1308 * which can happen for sizes near the limit.
1308 * 1309 *
1309 * We also need to take into account any blocks beyond the EOF. It 1310 * We also need to take into account any blocks beyond the EOF. It
1310 * may be the case that they were buffered by a write which failed. 1311 * may be the case that they were buffered by a write which failed.
1311 * In that case the pages will still be in memory, but the inode size 1312 * In that case the pages will still be in memory, but the inode size
1312 * will never have been updated. 1313 * will never have been updated.
1313 */ 1314 */
1314 xfs_fsize_t 1315 xfs_fsize_t
1315 xfs_file_last_byte( 1316 xfs_file_last_byte(
1316 xfs_inode_t *ip) 1317 xfs_inode_t *ip)
1317 { 1318 {
1318 xfs_mount_t *mp; 1319 xfs_mount_t *mp;
1319 xfs_fsize_t last_byte; 1320 xfs_fsize_t last_byte;
1320 xfs_fileoff_t last_block; 1321 xfs_fileoff_t last_block;
1321 xfs_fileoff_t size_last_block; 1322 xfs_fileoff_t size_last_block;
1322 int error; 1323 int error;
1323 1324
1324 ASSERT(ismrlocked(&(ip->i_iolock), MR_UPDATE | MR_ACCESS)); 1325 ASSERT(ismrlocked(&(ip->i_iolock), MR_UPDATE | MR_ACCESS));
1325 1326
1326 mp = ip->i_mount; 1327 mp = ip->i_mount;
1327 /* 1328 /*
1328 * Only check for blocks beyond the EOF if the extents have 1329 * Only check for blocks beyond the EOF if the extents have
1329 * been read in. This eliminates the need for the inode lock, 1330 * been read in. This eliminates the need for the inode lock,
1330 * and it also saves us from looking when it really isn't 1331 * and it also saves us from looking when it really isn't
1331 * necessary. 1332 * necessary.
1332 */ 1333 */
1333 if (ip->i_df.if_flags & XFS_IFEXTENTS) { 1334 if (ip->i_df.if_flags & XFS_IFEXTENTS) {
1334 error = xfs_bmap_last_offset(NULL, ip, &last_block, 1335 error = xfs_bmap_last_offset(NULL, ip, &last_block,
1335 XFS_DATA_FORK); 1336 XFS_DATA_FORK);
1336 if (error) { 1337 if (error) {
1337 last_block = 0; 1338 last_block = 0;
1338 } 1339 }
1339 } else { 1340 } else {
1340 last_block = 0; 1341 last_block = 0;
1341 } 1342 }
1342 size_last_block = XFS_B_TO_FSB(mp, (xfs_ufsize_t)ip->i_d.di_size); 1343 size_last_block = XFS_B_TO_FSB(mp, (xfs_ufsize_t)ip->i_d.di_size);
1343 last_block = XFS_FILEOFF_MAX(last_block, size_last_block); 1344 last_block = XFS_FILEOFF_MAX(last_block, size_last_block);
1344 1345
1345 last_byte = XFS_FSB_TO_B(mp, last_block); 1346 last_byte = XFS_FSB_TO_B(mp, last_block);
1346 if (last_byte < 0) { 1347 if (last_byte < 0) {
1347 return XFS_MAXIOFFSET(mp); 1348 return XFS_MAXIOFFSET(mp);
1348 } 1349 }
1349 last_byte += (1 << mp->m_writeio_log); 1350 last_byte += (1 << mp->m_writeio_log);
1350 if (last_byte < 0) { 1351 if (last_byte < 0) {
1351 return XFS_MAXIOFFSET(mp); 1352 return XFS_MAXIOFFSET(mp);
1352 } 1353 }
1353 return last_byte; 1354 return last_byte;
1354 } 1355 }
1355 1356
1356 #if defined(XFS_RW_TRACE) 1357 #if defined(XFS_RW_TRACE)
1357 STATIC void 1358 STATIC void
1358 xfs_itrunc_trace( 1359 xfs_itrunc_trace(
1359 int tag, 1360 int tag,
1360 xfs_inode_t *ip, 1361 xfs_inode_t *ip,
1361 int flag, 1362 int flag,
1362 xfs_fsize_t new_size, 1363 xfs_fsize_t new_size,
1363 xfs_off_t toss_start, 1364 xfs_off_t toss_start,
1364 xfs_off_t toss_finish) 1365 xfs_off_t toss_finish)
1365 { 1366 {
1366 if (ip->i_rwtrace == NULL) { 1367 if (ip->i_rwtrace == NULL) {
1367 return; 1368 return;
1368 } 1369 }
1369 1370
1370 ktrace_enter(ip->i_rwtrace, 1371 ktrace_enter(ip->i_rwtrace,
1371 (void*)((long)tag), 1372 (void*)((long)tag),
1372 (void*)ip, 1373 (void*)ip,
1373 (void*)(unsigned long)((ip->i_d.di_size >> 32) & 0xffffffff), 1374 (void*)(unsigned long)((ip->i_d.di_size >> 32) & 0xffffffff),
1374 (void*)(unsigned long)(ip->i_d.di_size & 0xffffffff), 1375 (void*)(unsigned long)(ip->i_d.di_size & 0xffffffff),
1375 (void*)((long)flag), 1376 (void*)((long)flag),
1376 (void*)(unsigned long)((new_size >> 32) & 0xffffffff), 1377 (void*)(unsigned long)((new_size >> 32) & 0xffffffff),
1377 (void*)(unsigned long)(new_size & 0xffffffff), 1378 (void*)(unsigned long)(new_size & 0xffffffff),
1378 (void*)(unsigned long)((toss_start >> 32) & 0xffffffff), 1379 (void*)(unsigned long)((toss_start >> 32) & 0xffffffff),
1379 (void*)(unsigned long)(toss_start & 0xffffffff), 1380 (void*)(unsigned long)(toss_start & 0xffffffff),
1380 (void*)(unsigned long)((toss_finish >> 32) & 0xffffffff), 1381 (void*)(unsigned long)((toss_finish >> 32) & 0xffffffff),
1381 (void*)(unsigned long)(toss_finish & 0xffffffff), 1382 (void*)(unsigned long)(toss_finish & 0xffffffff),
1382 (void*)(unsigned long)current_cpu(), 1383 (void*)(unsigned long)current_cpu(),
1383 (void*)(unsigned long)current_pid(), 1384 (void*)(unsigned long)current_pid(),
1384 (void*)NULL, 1385 (void*)NULL,
1385 (void*)NULL, 1386 (void*)NULL,
1386 (void*)NULL); 1387 (void*)NULL);
1387 } 1388 }
1388 #else 1389 #else
1389 #define xfs_itrunc_trace(tag, ip, flag, new_size, toss_start, toss_finish) 1390 #define xfs_itrunc_trace(tag, ip, flag, new_size, toss_start, toss_finish)
1390 #endif 1391 #endif
1391 1392
1392 /* 1393 /*
1393 * Start the truncation of the file to new_size. The new size 1394 * Start the truncation of the file to new_size. The new size
1394 * must be smaller than the current size. This routine will 1395 * must be smaller than the current size. This routine will
1395 * clear the buffer and page caches of file data in the removed 1396 * clear the buffer and page caches of file data in the removed
1396 * range, and xfs_itruncate_finish() will remove the underlying 1397 * range, and xfs_itruncate_finish() will remove the underlying
1397 * disk blocks. 1398 * disk blocks.
1398 * 1399 *
1399 * The inode must have its I/O lock locked EXCLUSIVELY, and it 1400 * The inode must have its I/O lock locked EXCLUSIVELY, and it
1400 * must NOT have the inode lock held at all. This is because we're 1401 * must NOT have the inode lock held at all. This is because we're
1401 * calling into the buffer/page cache code and we can't hold the 1402 * calling into the buffer/page cache code and we can't hold the
1402 * inode lock when we do so. 1403 * inode lock when we do so.
1403 * 1404 *
1404 * We need to wait for any direct I/Os in flight to complete before we 1405 * We need to wait for any direct I/Os in flight to complete before we
1405 * proceed with the truncate. This is needed to prevent the extents 1406 * proceed with the truncate. This is needed to prevent the extents
1406 * being read or written by the direct I/Os from being removed while the 1407 * being read or written by the direct I/Os from being removed while the
1407 * I/O is in flight as there is no other method of synchronising 1408 * I/O is in flight as there is no other method of synchronising
1408 * direct I/O with the truncate operation. Also, because we hold 1409 * direct I/O with the truncate operation. Also, because we hold
1409 * the IOLOCK in exclusive mode, we prevent new direct I/Os from being 1410 * the IOLOCK in exclusive mode, we prevent new direct I/Os from being
1410 * started until the truncate completes and drops the lock. Essentially, 1411 * started until the truncate completes and drops the lock. Essentially,
1411 * the vn_iowait() call forms an I/O barrier that provides strict ordering 1412 * the vn_iowait() call forms an I/O barrier that provides strict ordering
1412 * between direct I/Os and the truncate operation. 1413 * between direct I/Os and the truncate operation.
1413 * 1414 *
1414 * The flags parameter can have either the value XFS_ITRUNC_DEFINITE 1415 * The flags parameter can have either the value XFS_ITRUNC_DEFINITE
1415 * or XFS_ITRUNC_MAYBE. The XFS_ITRUNC_MAYBE value should be used 1416 * or XFS_ITRUNC_MAYBE. The XFS_ITRUNC_MAYBE value should be used
1416 * in the case that the caller is locking things out of order and 1417 * in the case that the caller is locking things out of order and
1417 * may not be able to call xfs_itruncate_finish() with the inode lock 1418 * may not be able to call xfs_itruncate_finish() with the inode lock
1418 * held without dropping the I/O lock. If the caller must drop the 1419 * held without dropping the I/O lock. If the caller must drop the
1419 * I/O lock before calling xfs_itruncate_finish(), then xfs_itruncate_start() 1420 * I/O lock before calling xfs_itruncate_finish(), then xfs_itruncate_start()
1420 * must be called again with all the same restrictions as the initial 1421 * must be called again with all the same restrictions as the initial
1421 * call. 1422 * call.
1422 */ 1423 */
1423 void 1424 void
1424 xfs_itruncate_start( 1425 xfs_itruncate_start(
1425 xfs_inode_t *ip, 1426 xfs_inode_t *ip,
1426 uint flags, 1427 uint flags,
1427 xfs_fsize_t new_size) 1428 xfs_fsize_t new_size)
1428 { 1429 {
1429 xfs_fsize_t last_byte; 1430 xfs_fsize_t last_byte;
1430 xfs_off_t toss_start; 1431 xfs_off_t toss_start;
1431 xfs_mount_t *mp; 1432 xfs_mount_t *mp;
1432 bhv_vnode_t *vp; 1433 bhv_vnode_t *vp;
1433 1434
1434 ASSERT(ismrlocked(&ip->i_iolock, MR_UPDATE) != 0); 1435 ASSERT(ismrlocked(&ip->i_iolock, MR_UPDATE) != 0);
1435 ASSERT((new_size == 0) || (new_size <= ip->i_d.di_size)); 1436 ASSERT((new_size == 0) || (new_size <= ip->i_d.di_size));
1436 ASSERT((flags == XFS_ITRUNC_DEFINITE) || 1437 ASSERT((flags == XFS_ITRUNC_DEFINITE) ||
1437 (flags == XFS_ITRUNC_MAYBE)); 1438 (flags == XFS_ITRUNC_MAYBE));
1438 1439
1439 mp = ip->i_mount; 1440 mp = ip->i_mount;
1440 vp = XFS_ITOV(ip); 1441 vp = XFS_ITOV(ip);
1441 1442
1442 vn_iowait(vp); /* wait for the completion of any pending DIOs */ 1443 vn_iowait(vp); /* wait for the completion of any pending DIOs */
1443 1444
1444 /* 1445 /*
1445 * Call toss_pages or flushinval_pages to get rid of pages 1446 * Call toss_pages or flushinval_pages to get rid of pages
1446 * overlapping the region being removed. We have to use 1447 * overlapping the region being removed. We have to use
1447 * the less efficient flushinval_pages in the case that the 1448 * the less efficient flushinval_pages in the case that the
1448 * caller may not be able to finish the truncate without 1449 * caller may not be able to finish the truncate without
1449 * dropping the inode's I/O lock. Make sure 1450 * dropping the inode's I/O lock. Make sure
1450 * to catch any pages brought in by buffers overlapping 1451 * to catch any pages brought in by buffers overlapping
1451 * the EOF by searching out beyond the isize by our 1452 * the EOF by searching out beyond the isize by our
1452 * block size. We round new_size up to a block boundary 1453 * block size. We round new_size up to a block boundary
1453 * so that we don't toss things on the same block as 1454 * so that we don't toss things on the same block as
1454 * new_size but before it. 1455 * new_size but before it.
1455 * 1456 *
1456 * Before calling toss_page or flushinval_pages, make sure to 1457 * Before calling toss_page or flushinval_pages, make sure to
1457 * call remapf() over the same region if the file is mapped. 1458 * call remapf() over the same region if the file is mapped.
1458 * This frees up mapped file references to the pages in the 1459 * This frees up mapped file references to the pages in the
1459 * given range and for the flushinval_pages case it ensures 1460 * given range and for the flushinval_pages case it ensures
1460 * that we get the latest mapped changes flushed out. 1461 * that we get the latest mapped changes flushed out.
1461 */ 1462 */
1462 toss_start = XFS_B_TO_FSB(mp, (xfs_ufsize_t)new_size); 1463 toss_start = XFS_B_TO_FSB(mp, (xfs_ufsize_t)new_size);
1463 toss_start = XFS_FSB_TO_B(mp, toss_start); 1464 toss_start = XFS_FSB_TO_B(mp, toss_start);
1464 if (toss_start < 0) { 1465 if (toss_start < 0) {
1465 /* 1466 /*
1466 * The place to start tossing is beyond our maximum 1467 * The place to start tossing is beyond our maximum
1467 * file size, so there is no way that the data extended 1468 * file size, so there is no way that the data extended
1468 * out there. 1469 * out there.
1469 */ 1470 */
1470 return; 1471 return;
1471 } 1472 }
1472 last_byte = xfs_file_last_byte(ip); 1473 last_byte = xfs_file_last_byte(ip);
1473 xfs_itrunc_trace(XFS_ITRUNC_START, ip, flags, new_size, toss_start, 1474 xfs_itrunc_trace(XFS_ITRUNC_START, ip, flags, new_size, toss_start,
1474 last_byte); 1475 last_byte);
1475 if (last_byte > toss_start) { 1476 if (last_byte > toss_start) {
1476 if (flags & XFS_ITRUNC_DEFINITE) { 1477 if (flags & XFS_ITRUNC_DEFINITE) {
1477 bhv_vop_toss_pages(vp, toss_start, -1, FI_REMAPF_LOCKED); 1478 bhv_vop_toss_pages(vp, toss_start, -1, FI_REMAPF_LOCKED);
1478 } else { 1479 } else {
1479 bhv_vop_flushinval_pages(vp, toss_start, -1, FI_REMAPF_LOCKED); 1480 bhv_vop_flushinval_pages(vp, toss_start, -1, FI_REMAPF_LOCKED);
1480 } 1481 }
1481 } 1482 }
1482 1483
1483 #ifdef DEBUG 1484 #ifdef DEBUG
1484 if (new_size == 0) { 1485 if (new_size == 0) {
1485 ASSERT(VN_CACHED(vp) == 0); 1486 ASSERT(VN_CACHED(vp) == 0);
1486 } 1487 }
1487 #endif 1488 #endif
1488 } 1489 }
1489 1490
1490 /* 1491 /*
1491 * Shrink the file to the given new_size. The new 1492 * Shrink the file to the given new_size. The new
1492 * size must be smaller than the current size. 1493 * size must be smaller than the current size.
1493 * This will free up the underlying blocks 1494 * This will free up the underlying blocks
1494 * in the removed range after a call to xfs_itruncate_start() 1495 * in the removed range after a call to xfs_itruncate_start()
1495 * or xfs_atruncate_start(). 1496 * or xfs_atruncate_start().
1496 * 1497 *
1497 * The transaction passed to this routine must have made 1498 * The transaction passed to this routine must have made
1498 * a permanent log reservation of at least XFS_ITRUNCATE_LOG_RES. 1499 * a permanent log reservation of at least XFS_ITRUNCATE_LOG_RES.
1499 * This routine may commit the given transaction and 1500 * This routine may commit the given transaction and
1500 * start new ones, so make sure everything involved in 1501 * start new ones, so make sure everything involved in
1501 * the transaction is tidy before calling here. 1502 * the transaction is tidy before calling here.
1502 * Some transaction will be returned to the caller to be 1503 * Some transaction will be returned to the caller to be
1503 * committed. The incoming transaction must already include 1504 * committed. The incoming transaction must already include
1504 * the inode, and both inode locks must be held exclusively. 1505 * the inode, and both inode locks must be held exclusively.
1505 * The inode must also be "held" within the transaction. On 1506 * The inode must also be "held" within the transaction. On
1506 * return the inode will be "held" within the returned transaction. 1507 * return the inode will be "held" within the returned transaction.
1507 * This routine does NOT require any disk space to be reserved 1508 * This routine does NOT require any disk space to be reserved
1508 * for it within the transaction. 1509 * for it within the transaction.
1509 * 1510 *
1510 * The fork parameter must be either xfs_attr_fork or xfs_data_fork, 1511 * The fork parameter must be either xfs_attr_fork or xfs_data_fork,
1511 * and it indicates the fork which is to be truncated. For the 1512 * and it indicates the fork which is to be truncated. For the
1512 * attribute fork we only support truncation to size 0. 1513 * attribute fork we only support truncation to size 0.
1513 * 1514 *
1514 * We use the sync parameter to indicate whether or not the first 1515 * We use the sync parameter to indicate whether or not the first
1515 * transaction we perform might have to be synchronous. For the attr fork, 1516 * transaction we perform might have to be synchronous. For the attr fork,
1516 * it needs to be so if the unlink of the inode is not yet known to be 1517 * it needs to be so if the unlink of the inode is not yet known to be
1517 * permanent in the log. This keeps us from freeing and reusing the 1518 * permanent in the log. This keeps us from freeing and reusing the
1518 * blocks of the attribute fork before the unlink of the inode becomes 1519 * blocks of the attribute fork before the unlink of the inode becomes
1519 * permanent. 1520 * permanent.
1520 * 1521 *
1521 * For the data fork, we normally have to run synchronously if we're 1522 * For the data fork, we normally have to run synchronously if we're
1522 * being called out of the inactive path or we're being called 1523 * being called out of the inactive path or we're being called
1523 * out of the create path where we're truncating an existing file. 1524 * out of the create path where we're truncating an existing file.
1524 * Either way, the truncate needs to be sync so blocks don't reappear 1525 * Either way, the truncate needs to be sync so blocks don't reappear
1525 * in the file with altered data in case of a crash. wsync filesystems 1526 * in the file with altered data in case of a crash. wsync filesystems
1526 * can run the first case async because anything that shrinks the inode 1527 * can run the first case async because anything that shrinks the inode
1527 * has to run sync so by the time we're called here from inactive, the 1528 * has to run sync so by the time we're called here from inactive, the
1528 * inode size is permanently set to 0. 1529 * inode size is permanently set to 0.
1529 * 1530 *
1530 * Calls from the truncate path always need to be sync unless we're 1531 * Calls from the truncate path always need to be sync unless we're
1531 * in a wsync filesystem and the file has already been unlinked. 1532 * in a wsync filesystem and the file has already been unlinked.
1532 * 1533 *
1533 * The caller is responsible for correctly setting the sync parameter. 1534 * The caller is responsible for correctly setting the sync parameter.
1534 * It gets too hard for us to guess here which path we're being called 1535 * It gets too hard for us to guess here which path we're being called
1535 * out of just based on inode state. 1536 * out of just based on inode state.
1536 */ 1537 */
1537 int 1538 int
1538 xfs_itruncate_finish( 1539 xfs_itruncate_finish(
1539 xfs_trans_t **tp, 1540 xfs_trans_t **tp,
1540 xfs_inode_t *ip, 1541 xfs_inode_t *ip,
1541 xfs_fsize_t new_size, 1542 xfs_fsize_t new_size,
1542 int fork, 1543 int fork,
1543 int sync) 1544 int sync)
1544 { 1545 {
1545 xfs_fsblock_t first_block; 1546 xfs_fsblock_t first_block;
1546 xfs_fileoff_t first_unmap_block; 1547 xfs_fileoff_t first_unmap_block;
1547 xfs_fileoff_t last_block; 1548 xfs_fileoff_t last_block;
1548 xfs_filblks_t unmap_len=0; 1549 xfs_filblks_t unmap_len=0;
1549 xfs_mount_t *mp; 1550 xfs_mount_t *mp;
1550 xfs_trans_t *ntp; 1551 xfs_trans_t *ntp;
1551 int done; 1552 int done;
1552 int committed; 1553 int committed;
1553 xfs_bmap_free_t free_list; 1554 xfs_bmap_free_t free_list;
1554 int error; 1555 int error;
1555 1556
1556 ASSERT(ismrlocked(&ip->i_iolock, MR_UPDATE) != 0); 1557 ASSERT(ismrlocked(&ip->i_iolock, MR_UPDATE) != 0);
1557 ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE) != 0); 1558 ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE) != 0);
1558 ASSERT((new_size == 0) || (new_size <= ip->i_d.di_size)); 1559 ASSERT((new_size == 0) || (new_size <= ip->i_d.di_size));
1559 ASSERT(*tp != NULL); 1560 ASSERT(*tp != NULL);
1560 ASSERT((*tp)->t_flags & XFS_TRANS_PERM_LOG_RES); 1561 ASSERT((*tp)->t_flags & XFS_TRANS_PERM_LOG_RES);
1561 ASSERT(ip->i_transp == *tp); 1562 ASSERT(ip->i_transp == *tp);
1562 ASSERT(ip->i_itemp != NULL); 1563 ASSERT(ip->i_itemp != NULL);
1563 ASSERT(ip->i_itemp->ili_flags & XFS_ILI_HOLD); 1564 ASSERT(ip->i_itemp->ili_flags & XFS_ILI_HOLD);
1564 1565
1565 1566
1566 ntp = *tp; 1567 ntp = *tp;
1567 mp = (ntp)->t_mountp; 1568 mp = (ntp)->t_mountp;
1568 ASSERT(! XFS_NOT_DQATTACHED(mp, ip)); 1569 ASSERT(! XFS_NOT_DQATTACHED(mp, ip));
1569 1570
1570 /* 1571 /*
1571 * We only support truncating the entire attribute fork. 1572 * We only support truncating the entire attribute fork.
1572 */ 1573 */
1573 if (fork == XFS_ATTR_FORK) { 1574 if (fork == XFS_ATTR_FORK) {
1574 new_size = 0LL; 1575 new_size = 0LL;
1575 } 1576 }
1576 first_unmap_block = XFS_B_TO_FSB(mp, (xfs_ufsize_t)new_size); 1577 first_unmap_block = XFS_B_TO_FSB(mp, (xfs_ufsize_t)new_size);
1577 xfs_itrunc_trace(XFS_ITRUNC_FINISH1, ip, 0, new_size, 0, 0); 1578 xfs_itrunc_trace(XFS_ITRUNC_FINISH1, ip, 0, new_size, 0, 0);
1578 /* 1579 /*
1579 * The first thing we do is set the size to new_size permanently 1580 * The first thing we do is set the size to new_size permanently
1580 * on disk. This way we don't have to worry about anyone ever 1581 * on disk. This way we don't have to worry about anyone ever
1581 * being able to look at the data being freed even in the face 1582 * being able to look at the data being freed even in the face
1582 * of a crash. What we're getting around here is the case where 1583 * of a crash. What we're getting around here is the case where
1583 * we free a block, it is allocated to another file, it is written 1584 * we free a block, it is allocated to another file, it is written
1584 * to, and then we crash. If the new data gets written to the 1585 * to, and then we crash. If the new data gets written to the
1585 * file but the log buffers containing the free and reallocation 1586 * file but the log buffers containing the free and reallocation
1586 * don't, then we'd end up with garbage in the blocks being freed. 1587 * don't, then we'd end up with garbage in the blocks being freed.
1587 * As long as we make the new_size permanent before actually 1588 * As long as we make the new_size permanent before actually
1588 * freeing any blocks it doesn't matter if they get writtten to. 1589 * freeing any blocks it doesn't matter if they get writtten to.
1589 * 1590 *
1590 * The callers must signal into us whether or not the size 1591 * The callers must signal into us whether or not the size
1591 * setting here must be synchronous. There are a few cases 1592 * setting here must be synchronous. There are a few cases
1592 * where it doesn't have to be synchronous. Those cases 1593 * where it doesn't have to be synchronous. Those cases
1593 * occur if the file is unlinked and we know the unlink is 1594 * occur if the file is unlinked and we know the unlink is
1594 * permanent or if the blocks being truncated are guaranteed 1595 * permanent or if the blocks being truncated are guaranteed
1595 * to be beyond the inode eof (regardless of the link count) 1596 * to be beyond the inode eof (regardless of the link count)
1596 * and the eof value is permanent. Both of these cases occur 1597 * and the eof value is permanent. Both of these cases occur
1597 * only on wsync-mounted filesystems. In those cases, we're 1598 * only on wsync-mounted filesystems. In those cases, we're
1598 * guaranteed that no user will ever see the data in the blocks 1599 * guaranteed that no user will ever see the data in the blocks
1599 * that are being truncated so the truncate can run async. 1600 * that are being truncated so the truncate can run async.
1600 * In the free beyond eof case, the file may wind up with 1601 * In the free beyond eof case, the file may wind up with
1601 * more blocks allocated to it than it needs if we crash 1602 * more blocks allocated to it than it needs if we crash
1602 * and that won't get fixed until the next time the file 1603 * and that won't get fixed until the next time the file
1603 * is re-opened and closed but that's ok as that shouldn't 1604 * is re-opened and closed but that's ok as that shouldn't
1604 * be too many blocks. 1605 * be too many blocks.
1605 * 1606 *
1606 * However, we can't just make all wsync xactions run async 1607 * However, we can't just make all wsync xactions run async
1607 * because there's one call out of the create path that needs 1608 * because there's one call out of the create path that needs
1608 * to run sync where it's truncating an existing file to size 1609 * to run sync where it's truncating an existing file to size
1609 * 0 whose size is > 0. 1610 * 0 whose size is > 0.
1610 * 1611 *
1611 * It's probably possible to come up with a test in this 1612 * It's probably possible to come up with a test in this
1612 * routine that would correctly distinguish all the above 1613 * routine that would correctly distinguish all the above
1613 * cases from the values of the function parameters and the 1614 * cases from the values of the function parameters and the
1614 * inode state but for sanity's sake, I've decided to let the 1615 * inode state but for sanity's sake, I've decided to let the
1615 * layers above just tell us. It's simpler to correctly figure 1616 * layers above just tell us. It's simpler to correctly figure
1616 * out in the layer above exactly under what conditions we 1617 * out in the layer above exactly under what conditions we
1617 * can run async and I think it's easier for others read and 1618 * can run async and I think it's easier for others read and
1618 * follow the logic in case something has to be changed. 1619 * follow the logic in case something has to be changed.
1619 * cscope is your friend -- rcc. 1620 * cscope is your friend -- rcc.
1620 * 1621 *
1621 * The attribute fork is much simpler. 1622 * The attribute fork is much simpler.
1622 * 1623 *
1623 * For the attribute fork we allow the caller to tell us whether 1624 * For the attribute fork we allow the caller to tell us whether
1624 * the unlink of the inode that led to this call is yet permanent 1625 * the unlink of the inode that led to this call is yet permanent
1625 * in the on disk log. If it is not and we will be freeing extents 1626 * in the on disk log. If it is not and we will be freeing extents
1626 * in this inode then we make the first transaction synchronous 1627 * in this inode then we make the first transaction synchronous
1627 * to make sure that the unlink is permanent by the time we free 1628 * to make sure that the unlink is permanent by the time we free
1628 * the blocks. 1629 * the blocks.
1629 */ 1630 */
1630 if (fork == XFS_DATA_FORK) { 1631 if (fork == XFS_DATA_FORK) {
1631 if (ip->i_d.di_nextents > 0) { 1632 if (ip->i_d.di_nextents > 0) {
1632 ip->i_d.di_size = new_size; 1633 ip->i_d.di_size = new_size;
1633 xfs_trans_log_inode(ntp, ip, XFS_ILOG_CORE); 1634 xfs_trans_log_inode(ntp, ip, XFS_ILOG_CORE);
1634 } 1635 }
1635 } else if (sync) { 1636 } else if (sync) {
1636 ASSERT(!(mp->m_flags & XFS_MOUNT_WSYNC)); 1637 ASSERT(!(mp->m_flags & XFS_MOUNT_WSYNC));
1637 if (ip->i_d.di_anextents > 0) 1638 if (ip->i_d.di_anextents > 0)
1638 xfs_trans_set_sync(ntp); 1639 xfs_trans_set_sync(ntp);
1639 } 1640 }
1640 ASSERT(fork == XFS_DATA_FORK || 1641 ASSERT(fork == XFS_DATA_FORK ||
1641 (fork == XFS_ATTR_FORK && 1642 (fork == XFS_ATTR_FORK &&
1642 ((sync && !(mp->m_flags & XFS_MOUNT_WSYNC)) || 1643 ((sync && !(mp->m_flags & XFS_MOUNT_WSYNC)) ||
1643 (sync == 0 && (mp->m_flags & XFS_MOUNT_WSYNC))))); 1644 (sync == 0 && (mp->m_flags & XFS_MOUNT_WSYNC)))));
1644 1645
1645 /* 1646 /*
1646 * Since it is possible for space to become allocated beyond 1647 * Since it is possible for space to become allocated beyond
1647 * the end of the file (in a crash where the space is allocated 1648 * the end of the file (in a crash where the space is allocated
1648 * but the inode size is not yet updated), simply remove any 1649 * but the inode size is not yet updated), simply remove any
1649 * blocks which show up between the new EOF and the maximum 1650 * blocks which show up between the new EOF and the maximum
1650 * possible file size. If the first block to be removed is 1651 * possible file size. If the first block to be removed is
1651 * beyond the maximum file size (ie it is the same as last_block), 1652 * beyond the maximum file size (ie it is the same as last_block),
1652 * then there is nothing to do. 1653 * then there is nothing to do.
1653 */ 1654 */
1654 last_block = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_MAXIOFFSET(mp)); 1655 last_block = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_MAXIOFFSET(mp));
1655 ASSERT(first_unmap_block <= last_block); 1656 ASSERT(first_unmap_block <= last_block);
1656 done = 0; 1657 done = 0;
1657 if (last_block == first_unmap_block) { 1658 if (last_block == first_unmap_block) {
1658 done = 1; 1659 done = 1;
1659 } else { 1660 } else {
1660 unmap_len = last_block - first_unmap_block + 1; 1661 unmap_len = last_block - first_unmap_block + 1;
1661 } 1662 }
1662 while (!done) { 1663 while (!done) {
1663 /* 1664 /*
1664 * Free up up to XFS_ITRUNC_MAX_EXTENTS. xfs_bunmapi() 1665 * Free up up to XFS_ITRUNC_MAX_EXTENTS. xfs_bunmapi()
1665 * will tell us whether it freed the entire range or 1666 * will tell us whether it freed the entire range or
1666 * not. If this is a synchronous mount (wsync), 1667 * not. If this is a synchronous mount (wsync),
1667 * then we can tell bunmapi to keep all the 1668 * then we can tell bunmapi to keep all the
1668 * transactions asynchronous since the unlink 1669 * transactions asynchronous since the unlink
1669 * transaction that made this inode inactive has 1670 * transaction that made this inode inactive has
1670 * already hit the disk. There's no danger of 1671 * already hit the disk. There's no danger of
1671 * the freed blocks being reused, there being a 1672 * the freed blocks being reused, there being a
1672 * crash, and the reused blocks suddenly reappearing 1673 * crash, and the reused blocks suddenly reappearing
1673 * in this file with garbage in them once recovery 1674 * in this file with garbage in them once recovery
1674 * runs. 1675 * runs.
1675 */ 1676 */
1676 XFS_BMAP_INIT(&free_list, &first_block); 1677 XFS_BMAP_INIT(&free_list, &first_block);
1677 error = XFS_BUNMAPI(mp, ntp, &ip->i_iocore, 1678 error = XFS_BUNMAPI(mp, ntp, &ip->i_iocore,
1678 first_unmap_block, unmap_len, 1679 first_unmap_block, unmap_len,
1679 XFS_BMAPI_AFLAG(fork) | 1680 XFS_BMAPI_AFLAG(fork) |
1680 (sync ? 0 : XFS_BMAPI_ASYNC), 1681 (sync ? 0 : XFS_BMAPI_ASYNC),
1681 XFS_ITRUNC_MAX_EXTENTS, 1682 XFS_ITRUNC_MAX_EXTENTS,
1682 &first_block, &free_list, 1683 &first_block, &free_list,
1683 NULL, &done); 1684 NULL, &done);
1684 if (error) { 1685 if (error) {
1685 /* 1686 /*
1686 * If the bunmapi call encounters an error, 1687 * If the bunmapi call encounters an error,
1687 * return to the caller where the transaction 1688 * return to the caller where the transaction
1688 * can be properly aborted. We just need to 1689 * can be properly aborted. We just need to
1689 * make sure we're not holding any resources 1690 * make sure we're not holding any resources
1690 * that we were not when we came in. 1691 * that we were not when we came in.
1691 */ 1692 */
1692 xfs_bmap_cancel(&free_list); 1693 xfs_bmap_cancel(&free_list);
1693 return error; 1694 return error;
1694 } 1695 }
1695 1696
1696 /* 1697 /*
1697 * Duplicate the transaction that has the permanent 1698 * Duplicate the transaction that has the permanent
1698 * reservation and commit the old transaction. 1699 * reservation and commit the old transaction.
1699 */ 1700 */
1700 error = xfs_bmap_finish(tp, &free_list, first_block, 1701 error = xfs_bmap_finish(tp, &free_list, first_block,
1701 &committed); 1702 &committed);
1702 ntp = *tp; 1703 ntp = *tp;
1703 if (error) { 1704 if (error) {
1704 /* 1705 /*
1705 * If the bmap finish call encounters an error, 1706 * If the bmap finish call encounters an error,
1706 * return to the caller where the transaction 1707 * return to the caller where the transaction
1707 * can be properly aborted. We just need to 1708 * can be properly aborted. We just need to
1708 * make sure we're not holding any resources 1709 * make sure we're not holding any resources
1709 * that we were not when we came in. 1710 * that we were not when we came in.
1710 * 1711 *
1711 * Aborting from this point might lose some 1712 * Aborting from this point might lose some
1712 * blocks in the file system, but oh well. 1713 * blocks in the file system, but oh well.
1713 */ 1714 */
1714 xfs_bmap_cancel(&free_list); 1715 xfs_bmap_cancel(&free_list);
1715 if (committed) { 1716 if (committed) {
1716 /* 1717 /*
1717 * If the passed in transaction committed 1718 * If the passed in transaction committed
1718 * in xfs_bmap_finish(), then we want to 1719 * in xfs_bmap_finish(), then we want to
1719 * add the inode to this one before returning. 1720 * add the inode to this one before returning.
1720 * This keeps things simple for the higher 1721 * This keeps things simple for the higher
1721 * level code, because it always knows that 1722 * level code, because it always knows that
1722 * the inode is locked and held in the 1723 * the inode is locked and held in the
1723 * transaction that returns to it whether 1724 * transaction that returns to it whether
1724 * errors occur or not. We don't mark the 1725 * errors occur or not. We don't mark the
1725 * inode dirty so that this transaction can 1726 * inode dirty so that this transaction can
1726 * be easily aborted if possible. 1727 * be easily aborted if possible.
1727 */ 1728 */
1728 xfs_trans_ijoin(ntp, ip, 1729 xfs_trans_ijoin(ntp, ip,
1729 XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); 1730 XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
1730 xfs_trans_ihold(ntp, ip); 1731 xfs_trans_ihold(ntp, ip);
1731 } 1732 }
1732 return error; 1733 return error;
1733 } 1734 }
1734 1735
1735 if (committed) { 1736 if (committed) {
1736 /* 1737 /*
1737 * The first xact was committed, 1738 * The first xact was committed,
1738 * so add the inode to the new one. 1739 * so add the inode to the new one.
1739 * Mark it dirty so it will be logged 1740 * Mark it dirty so it will be logged
1740 * and moved forward in the log as 1741 * and moved forward in the log as
1741 * part of every commit. 1742 * part of every commit.
1742 */ 1743 */
1743 xfs_trans_ijoin(ntp, ip, 1744 xfs_trans_ijoin(ntp, ip,
1744 XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); 1745 XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
1745 xfs_trans_ihold(ntp, ip); 1746 xfs_trans_ihold(ntp, ip);
1746 xfs_trans_log_inode(ntp, ip, XFS_ILOG_CORE); 1747 xfs_trans_log_inode(ntp, ip, XFS_ILOG_CORE);
1747 } 1748 }
1748 ntp = xfs_trans_dup(ntp); 1749 ntp = xfs_trans_dup(ntp);
1749 (void) xfs_trans_commit(*tp, 0, NULL); 1750 (void) xfs_trans_commit(*tp, 0, NULL);
1750 *tp = ntp; 1751 *tp = ntp;
1751 error = xfs_trans_reserve(ntp, 0, XFS_ITRUNCATE_LOG_RES(mp), 0, 1752 error = xfs_trans_reserve(ntp, 0, XFS_ITRUNCATE_LOG_RES(mp), 0,
1752 XFS_TRANS_PERM_LOG_RES, 1753 XFS_TRANS_PERM_LOG_RES,
1753 XFS_ITRUNCATE_LOG_COUNT); 1754 XFS_ITRUNCATE_LOG_COUNT);
1754 /* 1755 /*
1755 * Add the inode being truncated to the next chained 1756 * Add the inode being truncated to the next chained
1756 * transaction. 1757 * transaction.
1757 */ 1758 */
1758 xfs_trans_ijoin(ntp, ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); 1759 xfs_trans_ijoin(ntp, ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
1759 xfs_trans_ihold(ntp, ip); 1760 xfs_trans_ihold(ntp, ip);
1760 if (error) 1761 if (error)
1761 return (error); 1762 return (error);
1762 } 1763 }
1763 /* 1764 /*
1764 * Only update the size in the case of the data fork, but 1765 * Only update the size in the case of the data fork, but
1765 * always re-log the inode so that our permanent transaction 1766 * always re-log the inode so that our permanent transaction
1766 * can keep on rolling it forward in the log. 1767 * can keep on rolling it forward in the log.
1767 */ 1768 */
1768 if (fork == XFS_DATA_FORK) { 1769 if (fork == XFS_DATA_FORK) {
1769 xfs_isize_check(mp, ip, new_size); 1770 xfs_isize_check(mp, ip, new_size);
1770 ip->i_d.di_size = new_size; 1771 ip->i_d.di_size = new_size;
1771 } 1772 }
1772 xfs_trans_log_inode(ntp, ip, XFS_ILOG_CORE); 1773 xfs_trans_log_inode(ntp, ip, XFS_ILOG_CORE);
1773 ASSERT((new_size != 0) || 1774 ASSERT((new_size != 0) ||
1774 (fork == XFS_ATTR_FORK) || 1775 (fork == XFS_ATTR_FORK) ||
1775 (ip->i_delayed_blks == 0)); 1776 (ip->i_delayed_blks == 0));
1776 ASSERT((new_size != 0) || 1777 ASSERT((new_size != 0) ||
1777 (fork == XFS_ATTR_FORK) || 1778 (fork == XFS_ATTR_FORK) ||
1778 (ip->i_d.di_nextents == 0)); 1779 (ip->i_d.di_nextents == 0));
1779 xfs_itrunc_trace(XFS_ITRUNC_FINISH2, ip, 0, new_size, 0, 0); 1780 xfs_itrunc_trace(XFS_ITRUNC_FINISH2, ip, 0, new_size, 0, 0);
1780 return 0; 1781 return 0;
1781 } 1782 }
1782 1783
1783 1784
1784 /* 1785 /*
1785 * xfs_igrow_start 1786 * xfs_igrow_start
1786 * 1787 *
1787 * Do the first part of growing a file: zero any data in the last 1788 * Do the first part of growing a file: zero any data in the last
1788 * block that is beyond the old EOF. We need to do this before 1789 * block that is beyond the old EOF. We need to do this before
1789 * the inode is joined to the transaction to modify the i_size. 1790 * the inode is joined to the transaction to modify the i_size.
1790 * That way we can drop the inode lock and call into the buffer 1791 * That way we can drop the inode lock and call into the buffer
1791 * cache to get the buffer mapping the EOF. 1792 * cache to get the buffer mapping the EOF.
1792 */ 1793 */
1793 int 1794 int
1794 xfs_igrow_start( 1795 xfs_igrow_start(
1795 xfs_inode_t *ip, 1796 xfs_inode_t *ip,
1796 xfs_fsize_t new_size, 1797 xfs_fsize_t new_size,
1797 cred_t *credp) 1798 cred_t *credp)
1798 { 1799 {
1799 int error; 1800 int error;
1800 1801
1801 ASSERT(ismrlocked(&(ip->i_lock), MR_UPDATE) != 0); 1802 ASSERT(ismrlocked(&(ip->i_lock), MR_UPDATE) != 0);
1802 ASSERT(ismrlocked(&(ip->i_iolock), MR_UPDATE) != 0); 1803 ASSERT(ismrlocked(&(ip->i_iolock), MR_UPDATE) != 0);
1803 ASSERT(new_size > ip->i_d.di_size); 1804 ASSERT(new_size > ip->i_d.di_size);
1804 1805
1805 /* 1806 /*
1806 * Zero any pages that may have been created by 1807 * Zero any pages that may have been created by
1807 * xfs_write_file() beyond the end of the file 1808 * xfs_write_file() beyond the end of the file
1808 * and any blocks between the old and new file sizes. 1809 * and any blocks between the old and new file sizes.
1809 */ 1810 */
1810 error = xfs_zero_eof(XFS_ITOV(ip), &ip->i_iocore, new_size, 1811 error = xfs_zero_eof(XFS_ITOV(ip), &ip->i_iocore, new_size,
1811 ip->i_d.di_size, new_size); 1812 ip->i_d.di_size, new_size);
1812 return error; 1813 return error;
1813 } 1814 }
1814 1815
1815 /* 1816 /*
1816 * xfs_igrow_finish 1817 * xfs_igrow_finish
1817 * 1818 *
1818 * This routine is called to extend the size of a file. 1819 * This routine is called to extend the size of a file.
1819 * The inode must have both the iolock and the ilock locked 1820 * The inode must have both the iolock and the ilock locked
1820 * for update and it must be a part of the current transaction. 1821 * for update and it must be a part of the current transaction.
1821 * The xfs_igrow_start() function must have been called previously. 1822 * The xfs_igrow_start() function must have been called previously.
1822 * If the change_flag is not zero, the inode change timestamp will 1823 * If the change_flag is not zero, the inode change timestamp will
1823 * be updated. 1824 * be updated.
1824 */ 1825 */
1825 void 1826 void
1826 xfs_igrow_finish( 1827 xfs_igrow_finish(
1827 xfs_trans_t *tp, 1828 xfs_trans_t *tp,
1828 xfs_inode_t *ip, 1829 xfs_inode_t *ip,
1829 xfs_fsize_t new_size, 1830 xfs_fsize_t new_size,
1830 int change_flag) 1831 int change_flag)
1831 { 1832 {
1832 ASSERT(ismrlocked(&(ip->i_lock), MR_UPDATE) != 0); 1833 ASSERT(ismrlocked(&(ip->i_lock), MR_UPDATE) != 0);
1833 ASSERT(ismrlocked(&(ip->i_iolock), MR_UPDATE) != 0); 1834 ASSERT(ismrlocked(&(ip->i_iolock), MR_UPDATE) != 0);
1834 ASSERT(ip->i_transp == tp); 1835 ASSERT(ip->i_transp == tp);
1835 ASSERT(new_size > ip->i_d.di_size); 1836 ASSERT(new_size > ip->i_d.di_size);
1836 1837
1837 /* 1838 /*
1838 * Update the file size. Update the inode change timestamp 1839 * Update the file size. Update the inode change timestamp
1839 * if change_flag set. 1840 * if change_flag set.
1840 */ 1841 */
1841 ip->i_d.di_size = new_size; 1842 ip->i_d.di_size = new_size;
1842 if (change_flag) 1843 if (change_flag)
1843 xfs_ichgtime(ip, XFS_ICHGTIME_CHG); 1844 xfs_ichgtime(ip, XFS_ICHGTIME_CHG);
1844 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 1845 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
1845 1846
1846 } 1847 }
1847 1848
1848 1849
1849 /* 1850 /*
1850 * This is called when the inode's link count goes to 0. 1851 * This is called when the inode's link count goes to 0.
1851 * We place the on-disk inode on a list in the AGI. It 1852 * We place the on-disk inode on a list in the AGI. It
1852 * will be pulled from this list when the inode is freed. 1853 * will be pulled from this list when the inode is freed.
1853 */ 1854 */
1854 int 1855 int
1855 xfs_iunlink( 1856 xfs_iunlink(
1856 xfs_trans_t *tp, 1857 xfs_trans_t *tp,
1857 xfs_inode_t *ip) 1858 xfs_inode_t *ip)
1858 { 1859 {
1859 xfs_mount_t *mp; 1860 xfs_mount_t *mp;
1860 xfs_agi_t *agi; 1861 xfs_agi_t *agi;
1861 xfs_dinode_t *dip; 1862 xfs_dinode_t *dip;
1862 xfs_buf_t *agibp; 1863 xfs_buf_t *agibp;
1863 xfs_buf_t *ibp; 1864 xfs_buf_t *ibp;
1864 xfs_agnumber_t agno; 1865 xfs_agnumber_t agno;
1865 xfs_daddr_t agdaddr; 1866 xfs_daddr_t agdaddr;
1866 xfs_agino_t agino; 1867 xfs_agino_t agino;
1867 short bucket_index; 1868 short bucket_index;
1868 int offset; 1869 int offset;
1869 int error; 1870 int error;
1870 int agi_ok; 1871 int agi_ok;
1871 1872
1872 ASSERT(ip->i_d.di_nlink == 0); 1873 ASSERT(ip->i_d.di_nlink == 0);
1873 ASSERT(ip->i_d.di_mode != 0); 1874 ASSERT(ip->i_d.di_mode != 0);
1874 ASSERT(ip->i_transp == tp); 1875 ASSERT(ip->i_transp == tp);
1875 1876
1876 mp = tp->t_mountp; 1877 mp = tp->t_mountp;
1877 1878
1878 agno = XFS_INO_TO_AGNO(mp, ip->i_ino); 1879 agno = XFS_INO_TO_AGNO(mp, ip->i_ino);
1879 agdaddr = XFS_AG_DADDR(mp, agno, XFS_AGI_DADDR(mp)); 1880 agdaddr = XFS_AG_DADDR(mp, agno, XFS_AGI_DADDR(mp));
1880 1881
1881 /* 1882 /*
1882 * Get the agi buffer first. It ensures lock ordering 1883 * Get the agi buffer first. It ensures lock ordering
1883 * on the list. 1884 * on the list.
1884 */ 1885 */
1885 error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, agdaddr, 1886 error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, agdaddr,
1886 XFS_FSS_TO_BB(mp, 1), 0, &agibp); 1887 XFS_FSS_TO_BB(mp, 1), 0, &agibp);
1887 if (error) { 1888 if (error) {
1888 return error; 1889 return error;
1889 } 1890 }
1890 /* 1891 /*
1891 * Validate the magic number of the agi block. 1892 * Validate the magic number of the agi block.
1892 */ 1893 */
1893 agi = XFS_BUF_TO_AGI(agibp); 1894 agi = XFS_BUF_TO_AGI(agibp);
1894 agi_ok = 1895 agi_ok =
1895 be32_to_cpu(agi->agi_magicnum) == XFS_AGI_MAGIC && 1896 be32_to_cpu(agi->agi_magicnum) == XFS_AGI_MAGIC &&
1896 XFS_AGI_GOOD_VERSION(be32_to_cpu(agi->agi_versionnum)); 1897 XFS_AGI_GOOD_VERSION(be32_to_cpu(agi->agi_versionnum));
1897 if (unlikely(XFS_TEST_ERROR(!agi_ok, mp, XFS_ERRTAG_IUNLINK, 1898 if (unlikely(XFS_TEST_ERROR(!agi_ok, mp, XFS_ERRTAG_IUNLINK,
1898 XFS_RANDOM_IUNLINK))) { 1899 XFS_RANDOM_IUNLINK))) {
1899 XFS_CORRUPTION_ERROR("xfs_iunlink", XFS_ERRLEVEL_LOW, mp, agi); 1900 XFS_CORRUPTION_ERROR("xfs_iunlink", XFS_ERRLEVEL_LOW, mp, agi);
1900 xfs_trans_brelse(tp, agibp); 1901 xfs_trans_brelse(tp, agibp);
1901 return XFS_ERROR(EFSCORRUPTED); 1902 return XFS_ERROR(EFSCORRUPTED);
1902 } 1903 }
1903 /* 1904 /*
1904 * Get the index into the agi hash table for the 1905 * Get the index into the agi hash table for the
1905 * list this inode will go on. 1906 * list this inode will go on.
1906 */ 1907 */
1907 agino = XFS_INO_TO_AGINO(mp, ip->i_ino); 1908 agino = XFS_INO_TO_AGINO(mp, ip->i_ino);
1908 ASSERT(agino != 0); 1909 ASSERT(agino != 0);
1909 bucket_index = agino % XFS_AGI_UNLINKED_BUCKETS; 1910 bucket_index = agino % XFS_AGI_UNLINKED_BUCKETS;
1910 ASSERT(agi->agi_unlinked[bucket_index]); 1911 ASSERT(agi->agi_unlinked[bucket_index]);
1911 ASSERT(be32_to_cpu(agi->agi_unlinked[bucket_index]) != agino); 1912 ASSERT(be32_to_cpu(agi->agi_unlinked[bucket_index]) != agino);
1912 1913
1913 if (be32_to_cpu(agi->agi_unlinked[bucket_index]) != NULLAGINO) { 1914 if (be32_to_cpu(agi->agi_unlinked[bucket_index]) != NULLAGINO) {
1914 /* 1915 /*
1915 * There is already another inode in the bucket we need 1916 * There is already another inode in the bucket we need
1916 * to add ourselves to. Add us at the front of the list. 1917 * to add ourselves to. Add us at the front of the list.
1917 * Here we put the head pointer into our next pointer, 1918 * Here we put the head pointer into our next pointer,
1918 * and then we fall through to point the head at us. 1919 * and then we fall through to point the head at us.
1919 */ 1920 */
1920 error = xfs_itobp(mp, tp, ip, &dip, &ibp, 0, 0); 1921 error = xfs_itobp(mp, tp, ip, &dip, &ibp, 0, 0);
1921 if (error) { 1922 if (error) {
1922 return error; 1923 return error;
1923 } 1924 }
1924 ASSERT(INT_GET(dip->di_next_unlinked, ARCH_CONVERT) == NULLAGINO); 1925 ASSERT(INT_GET(dip->di_next_unlinked, ARCH_CONVERT) == NULLAGINO);
1925 ASSERT(dip->di_next_unlinked); 1926 ASSERT(dip->di_next_unlinked);
1926 /* both on-disk, don't endian flip twice */ 1927 /* both on-disk, don't endian flip twice */
1927 dip->di_next_unlinked = agi->agi_unlinked[bucket_index]; 1928 dip->di_next_unlinked = agi->agi_unlinked[bucket_index];
1928 offset = ip->i_boffset + 1929 offset = ip->i_boffset +
1929 offsetof(xfs_dinode_t, di_next_unlinked); 1930 offsetof(xfs_dinode_t, di_next_unlinked);
1930 xfs_trans_inode_buf(tp, ibp); 1931 xfs_trans_inode_buf(tp, ibp);
1931 xfs_trans_log_buf(tp, ibp, offset, 1932 xfs_trans_log_buf(tp, ibp, offset,
1932 (offset + sizeof(xfs_agino_t) - 1)); 1933 (offset + sizeof(xfs_agino_t) - 1));
1933 xfs_inobp_check(mp, ibp); 1934 xfs_inobp_check(mp, ibp);
1934 } 1935 }
1935 1936
1936 /* 1937 /*
1937 * Point the bucket head pointer at the inode being inserted. 1938 * Point the bucket head pointer at the inode being inserted.
1938 */ 1939 */
1939 ASSERT(agino != 0); 1940 ASSERT(agino != 0);
1940 agi->agi_unlinked[bucket_index] = cpu_to_be32(agino); 1941 agi->agi_unlinked[bucket_index] = cpu_to_be32(agino);
1941 offset = offsetof(xfs_agi_t, agi_unlinked) + 1942 offset = offsetof(xfs_agi_t, agi_unlinked) +
1942 (sizeof(xfs_agino_t) * bucket_index); 1943 (sizeof(xfs_agino_t) * bucket_index);
1943 xfs_trans_log_buf(tp, agibp, offset, 1944 xfs_trans_log_buf(tp, agibp, offset,
1944 (offset + sizeof(xfs_agino_t) - 1)); 1945 (offset + sizeof(xfs_agino_t) - 1));
1945 return 0; 1946 return 0;
1946 } 1947 }
1947 1948
1948 /* 1949 /*
1949 * Pull the on-disk inode from the AGI unlinked list. 1950 * Pull the on-disk inode from the AGI unlinked list.
1950 */ 1951 */
1951 STATIC int 1952 STATIC int
1952 xfs_iunlink_remove( 1953 xfs_iunlink_remove(
1953 xfs_trans_t *tp, 1954 xfs_trans_t *tp,
1954 xfs_inode_t *ip) 1955 xfs_inode_t *ip)
1955 { 1956 {
1956 xfs_ino_t next_ino; 1957 xfs_ino_t next_ino;
1957 xfs_mount_t *mp; 1958 xfs_mount_t *mp;
1958 xfs_agi_t *agi; 1959 xfs_agi_t *agi;
1959 xfs_dinode_t *dip; 1960 xfs_dinode_t *dip;
1960 xfs_buf_t *agibp; 1961 xfs_buf_t *agibp;
1961 xfs_buf_t *ibp; 1962 xfs_buf_t *ibp;
1962 xfs_agnumber_t agno; 1963 xfs_agnumber_t agno;
1963 xfs_daddr_t agdaddr; 1964 xfs_daddr_t agdaddr;
1964 xfs_agino_t agino; 1965 xfs_agino_t agino;
1965 xfs_agino_t next_agino; 1966 xfs_agino_t next_agino;
1966 xfs_buf_t *last_ibp; 1967 xfs_buf_t *last_ibp;
1967 xfs_dinode_t *last_dip = NULL; 1968 xfs_dinode_t *last_dip = NULL;
1968 short bucket_index; 1969 short bucket_index;
1969 int offset, last_offset = 0; 1970 int offset, last_offset = 0;
1970 int error; 1971 int error;
1971 int agi_ok; 1972 int agi_ok;
1972 1973
1973 /* 1974 /*
1974 * First pull the on-disk inode from the AGI unlinked list. 1975 * First pull the on-disk inode from the AGI unlinked list.
1975 */ 1976 */
1976 mp = tp->t_mountp; 1977 mp = tp->t_mountp;
1977 1978
1978 agno = XFS_INO_TO_AGNO(mp, ip->i_ino); 1979 agno = XFS_INO_TO_AGNO(mp, ip->i_ino);
1979 agdaddr = XFS_AG_DADDR(mp, agno, XFS_AGI_DADDR(mp)); 1980 agdaddr = XFS_AG_DADDR(mp, agno, XFS_AGI_DADDR(mp));
1980 1981
1981 /* 1982 /*
1982 * Get the agi buffer first. It ensures lock ordering 1983 * Get the agi buffer first. It ensures lock ordering
1983 * on the list. 1984 * on the list.
1984 */ 1985 */
1985 error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, agdaddr, 1986 error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, agdaddr,
1986 XFS_FSS_TO_BB(mp, 1), 0, &agibp); 1987 XFS_FSS_TO_BB(mp, 1), 0, &agibp);
1987 if (error) { 1988 if (error) {
1988 cmn_err(CE_WARN, 1989 cmn_err(CE_WARN,
1989 "xfs_iunlink_remove: xfs_trans_read_buf() returned an error %d on %s. Returning error.", 1990 "xfs_iunlink_remove: xfs_trans_read_buf() returned an error %d on %s. Returning error.",
1990 error, mp->m_fsname); 1991 error, mp->m_fsname);
1991 return error; 1992 return error;
1992 } 1993 }
1993 /* 1994 /*
1994 * Validate the magic number of the agi block. 1995 * Validate the magic number of the agi block.
1995 */ 1996 */
1996 agi = XFS_BUF_TO_AGI(agibp); 1997 agi = XFS_BUF_TO_AGI(agibp);
1997 agi_ok = 1998 agi_ok =
1998 be32_to_cpu(agi->agi_magicnum) == XFS_AGI_MAGIC && 1999 be32_to_cpu(agi->agi_magicnum) == XFS_AGI_MAGIC &&
1999 XFS_AGI_GOOD_VERSION(be32_to_cpu(agi->agi_versionnum)); 2000 XFS_AGI_GOOD_VERSION(be32_to_cpu(agi->agi_versionnum));
2000 if (unlikely(XFS_TEST_ERROR(!agi_ok, mp, XFS_ERRTAG_IUNLINK_REMOVE, 2001 if (unlikely(XFS_TEST_ERROR(!agi_ok, mp, XFS_ERRTAG_IUNLINK_REMOVE,
2001 XFS_RANDOM_IUNLINK_REMOVE))) { 2002 XFS_RANDOM_IUNLINK_REMOVE))) {
2002 XFS_CORRUPTION_ERROR("xfs_iunlink_remove", XFS_ERRLEVEL_LOW, 2003 XFS_CORRUPTION_ERROR("xfs_iunlink_remove", XFS_ERRLEVEL_LOW,
2003 mp, agi); 2004 mp, agi);
2004 xfs_trans_brelse(tp, agibp); 2005 xfs_trans_brelse(tp, agibp);
2005 cmn_err(CE_WARN, 2006 cmn_err(CE_WARN,
2006 "xfs_iunlink_remove: XFS_TEST_ERROR() returned an error on %s. Returning EFSCORRUPTED.", 2007 "xfs_iunlink_remove: XFS_TEST_ERROR() returned an error on %s. Returning EFSCORRUPTED.",
2007 mp->m_fsname); 2008 mp->m_fsname);
2008 return XFS_ERROR(EFSCORRUPTED); 2009 return XFS_ERROR(EFSCORRUPTED);
2009 } 2010 }
2010 /* 2011 /*
2011 * Get the index into the agi hash table for the 2012 * Get the index into the agi hash table for the
2012 * list this inode will go on. 2013 * list this inode will go on.
2013 */ 2014 */
2014 agino = XFS_INO_TO_AGINO(mp, ip->i_ino); 2015 agino = XFS_INO_TO_AGINO(mp, ip->i_ino);
2015 ASSERT(agino != 0); 2016 ASSERT(agino != 0);
2016 bucket_index = agino % XFS_AGI_UNLINKED_BUCKETS; 2017 bucket_index = agino % XFS_AGI_UNLINKED_BUCKETS;
2017 ASSERT(be32_to_cpu(agi->agi_unlinked[bucket_index]) != NULLAGINO); 2018 ASSERT(be32_to_cpu(agi->agi_unlinked[bucket_index]) != NULLAGINO);
2018 ASSERT(agi->agi_unlinked[bucket_index]); 2019 ASSERT(agi->agi_unlinked[bucket_index]);
2019 2020
2020 if (be32_to_cpu(agi->agi_unlinked[bucket_index]) == agino) { 2021 if (be32_to_cpu(agi->agi_unlinked[bucket_index]) == agino) {
2021 /* 2022 /*
2022 * We're at the head of the list. Get the inode's 2023 * We're at the head of the list. Get the inode's
2023 * on-disk buffer to see if there is anyone after us 2024 * on-disk buffer to see if there is anyone after us
2024 * on the list. Only modify our next pointer if it 2025 * on the list. Only modify our next pointer if it
2025 * is not already NULLAGINO. This saves us the overhead 2026 * is not already NULLAGINO. This saves us the overhead
2026 * of dealing with the buffer when there is no need to 2027 * of dealing with the buffer when there is no need to
2027 * change it. 2028 * change it.
2028 */ 2029 */
2029 error = xfs_itobp(mp, tp, ip, &dip, &ibp, 0, 0); 2030 error = xfs_itobp(mp, tp, ip, &dip, &ibp, 0, 0);
2030 if (error) { 2031 if (error) {
2031 cmn_err(CE_WARN, 2032 cmn_err(CE_WARN,
2032 "xfs_iunlink_remove: xfs_itobp() returned an error %d on %s. Returning error.", 2033 "xfs_iunlink_remove: xfs_itobp() returned an error %d on %s. Returning error.",
2033 error, mp->m_fsname); 2034 error, mp->m_fsname);
2034 return error; 2035 return error;
2035 } 2036 }
2036 next_agino = INT_GET(dip->di_next_unlinked, ARCH_CONVERT); 2037 next_agino = INT_GET(dip->di_next_unlinked, ARCH_CONVERT);
2037 ASSERT(next_agino != 0); 2038 ASSERT(next_agino != 0);
2038 if (next_agino != NULLAGINO) { 2039 if (next_agino != NULLAGINO) {
2039 INT_SET(dip->di_next_unlinked, ARCH_CONVERT, NULLAGINO); 2040 INT_SET(dip->di_next_unlinked, ARCH_CONVERT, NULLAGINO);
2040 offset = ip->i_boffset + 2041 offset = ip->i_boffset +
2041 offsetof(xfs_dinode_t, di_next_unlinked); 2042 offsetof(xfs_dinode_t, di_next_unlinked);
2042 xfs_trans_inode_buf(tp, ibp); 2043 xfs_trans_inode_buf(tp, ibp);
2043 xfs_trans_log_buf(tp, ibp, offset, 2044 xfs_trans_log_buf(tp, ibp, offset,
2044 (offset + sizeof(xfs_agino_t) - 1)); 2045 (offset + sizeof(xfs_agino_t) - 1));
2045 xfs_inobp_check(mp, ibp); 2046 xfs_inobp_check(mp, ibp);
2046 } else { 2047 } else {
2047 xfs_trans_brelse(tp, ibp); 2048 xfs_trans_brelse(tp, ibp);
2048 } 2049 }
2049 /* 2050 /*
2050 * Point the bucket head pointer at the next inode. 2051 * Point the bucket head pointer at the next inode.
2051 */ 2052 */
2052 ASSERT(next_agino != 0); 2053 ASSERT(next_agino != 0);
2053 ASSERT(next_agino != agino); 2054 ASSERT(next_agino != agino);
2054 agi->agi_unlinked[bucket_index] = cpu_to_be32(next_agino); 2055 agi->agi_unlinked[bucket_index] = cpu_to_be32(next_agino);
2055 offset = offsetof(xfs_agi_t, agi_unlinked) + 2056 offset = offsetof(xfs_agi_t, agi_unlinked) +
2056 (sizeof(xfs_agino_t) * bucket_index); 2057 (sizeof(xfs_agino_t) * bucket_index);
2057 xfs_trans_log_buf(tp, agibp, offset, 2058 xfs_trans_log_buf(tp, agibp, offset,
2058 (offset + sizeof(xfs_agino_t) - 1)); 2059 (offset + sizeof(xfs_agino_t) - 1));
2059 } else { 2060 } else {
2060 /* 2061 /*
2061 * We need to search the list for the inode being freed. 2062 * We need to search the list for the inode being freed.
2062 */ 2063 */
2063 next_agino = be32_to_cpu(agi->agi_unlinked[bucket_index]); 2064 next_agino = be32_to_cpu(agi->agi_unlinked[bucket_index]);
2064 last_ibp = NULL; 2065 last_ibp = NULL;
2065 while (next_agino != agino) { 2066 while (next_agino != agino) {
2066 /* 2067 /*
2067 * If the last inode wasn't the one pointing to 2068 * If the last inode wasn't the one pointing to
2068 * us, then release its buffer since we're not 2069 * us, then release its buffer since we're not
2069 * going to do anything with it. 2070 * going to do anything with it.
2070 */ 2071 */
2071 if (last_ibp != NULL) { 2072 if (last_ibp != NULL) {
2072 xfs_trans_brelse(tp, last_ibp); 2073 xfs_trans_brelse(tp, last_ibp);
2073 } 2074 }
2074 next_ino = XFS_AGINO_TO_INO(mp, agno, next_agino); 2075 next_ino = XFS_AGINO_TO_INO(mp, agno, next_agino);
2075 error = xfs_inotobp(mp, tp, next_ino, &last_dip, 2076 error = xfs_inotobp(mp, tp, next_ino, &last_dip,
2076 &last_ibp, &last_offset); 2077 &last_ibp, &last_offset);
2077 if (error) { 2078 if (error) {
2078 cmn_err(CE_WARN, 2079 cmn_err(CE_WARN,
2079 "xfs_iunlink_remove: xfs_inotobp() returned an error %d on %s. Returning error.", 2080 "xfs_iunlink_remove: xfs_inotobp() returned an error %d on %s. Returning error.",
2080 error, mp->m_fsname); 2081 error, mp->m_fsname);
2081 return error; 2082 return error;
2082 } 2083 }
2083 next_agino = INT_GET(last_dip->di_next_unlinked, ARCH_CONVERT); 2084 next_agino = INT_GET(last_dip->di_next_unlinked, ARCH_CONVERT);
2084 ASSERT(next_agino != NULLAGINO); 2085 ASSERT(next_agino != NULLAGINO);
2085 ASSERT(next_agino != 0); 2086 ASSERT(next_agino != 0);
2086 } 2087 }
2087 /* 2088 /*
2088 * Now last_ibp points to the buffer previous to us on 2089 * Now last_ibp points to the buffer previous to us on
2089 * the unlinked list. Pull us from the list. 2090 * the unlinked list. Pull us from the list.
2090 */ 2091 */
2091 error = xfs_itobp(mp, tp, ip, &dip, &ibp, 0, 0); 2092 error = xfs_itobp(mp, tp, ip, &dip, &ibp, 0, 0);
2092 if (error) { 2093 if (error) {
2093 cmn_err(CE_WARN, 2094 cmn_err(CE_WARN,
2094 "xfs_iunlink_remove: xfs_itobp() returned an error %d on %s. Returning error.", 2095 "xfs_iunlink_remove: xfs_itobp() returned an error %d on %s. Returning error.",
2095 error, mp->m_fsname); 2096 error, mp->m_fsname);
2096 return error; 2097 return error;
2097 } 2098 }
2098 next_agino = INT_GET(dip->di_next_unlinked, ARCH_CONVERT); 2099 next_agino = INT_GET(dip->di_next_unlinked, ARCH_CONVERT);
2099 ASSERT(next_agino != 0); 2100 ASSERT(next_agino != 0);
2100 ASSERT(next_agino != agino); 2101 ASSERT(next_agino != agino);
2101 if (next_agino != NULLAGINO) { 2102 if (next_agino != NULLAGINO) {
2102 INT_SET(dip->di_next_unlinked, ARCH_CONVERT, NULLAGINO); 2103 INT_SET(dip->di_next_unlinked, ARCH_CONVERT, NULLAGINO);
2103 offset = ip->i_boffset + 2104 offset = ip->i_boffset +
2104 offsetof(xfs_dinode_t, di_next_unlinked); 2105 offsetof(xfs_dinode_t, di_next_unlinked);
2105 xfs_trans_inode_buf(tp, ibp); 2106 xfs_trans_inode_buf(tp, ibp);
2106 xfs_trans_log_buf(tp, ibp, offset, 2107 xfs_trans_log_buf(tp, ibp, offset,
2107 (offset + sizeof(xfs_agino_t) - 1)); 2108 (offset + sizeof(xfs_agino_t) - 1));
2108 xfs_inobp_check(mp, ibp); 2109 xfs_inobp_check(mp, ibp);
2109 } else { 2110 } else {
2110 xfs_trans_brelse(tp, ibp); 2111 xfs_trans_brelse(tp, ibp);
2111 } 2112 }
2112 /* 2113 /*
2113 * Point the previous inode on the list to the next inode. 2114 * Point the previous inode on the list to the next inode.
2114 */ 2115 */
2115 INT_SET(last_dip->di_next_unlinked, ARCH_CONVERT, next_agino); 2116 INT_SET(last_dip->di_next_unlinked, ARCH_CONVERT, next_agino);
2116 ASSERT(next_agino != 0); 2117 ASSERT(next_agino != 0);
2117 offset = last_offset + offsetof(xfs_dinode_t, di_next_unlinked); 2118 offset = last_offset + offsetof(xfs_dinode_t, di_next_unlinked);
2118 xfs_trans_inode_buf(tp, last_ibp); 2119 xfs_trans_inode_buf(tp, last_ibp);
2119 xfs_trans_log_buf(tp, last_ibp, offset, 2120 xfs_trans_log_buf(tp, last_ibp, offset,
2120 (offset + sizeof(xfs_agino_t) - 1)); 2121 (offset + sizeof(xfs_agino_t) - 1));
2121 xfs_inobp_check(mp, last_ibp); 2122 xfs_inobp_check(mp, last_ibp);
2122 } 2123 }
2123 return 0; 2124 return 0;
2124 } 2125 }
2125 2126
2126 static __inline__ int xfs_inode_clean(xfs_inode_t *ip) 2127 static __inline__ int xfs_inode_clean(xfs_inode_t *ip)
2127 { 2128 {
2128 return (((ip->i_itemp == NULL) || 2129 return (((ip->i_itemp == NULL) ||
2129 !(ip->i_itemp->ili_format.ilf_fields & XFS_ILOG_ALL)) && 2130 !(ip->i_itemp->ili_format.ilf_fields & XFS_ILOG_ALL)) &&
2130 (ip->i_update_core == 0)); 2131 (ip->i_update_core == 0));
2131 } 2132 }
2132 2133
2133 STATIC void 2134 STATIC void
2134 xfs_ifree_cluster( 2135 xfs_ifree_cluster(
2135 xfs_inode_t *free_ip, 2136 xfs_inode_t *free_ip,
2136 xfs_trans_t *tp, 2137 xfs_trans_t *tp,
2137 xfs_ino_t inum) 2138 xfs_ino_t inum)
2138 { 2139 {
2139 xfs_mount_t *mp = free_ip->i_mount; 2140 xfs_mount_t *mp = free_ip->i_mount;
2140 int blks_per_cluster; 2141 int blks_per_cluster;
2141 int nbufs; 2142 int nbufs;
2142 int ninodes; 2143 int ninodes;
2143 int i, j, found, pre_flushed; 2144 int i, j, found, pre_flushed;
2144 xfs_daddr_t blkno; 2145 xfs_daddr_t blkno;
2145 xfs_buf_t *bp; 2146 xfs_buf_t *bp;
2146 xfs_ihash_t *ih; 2147 xfs_ihash_t *ih;
2147 xfs_inode_t *ip, **ip_found; 2148 xfs_inode_t *ip, **ip_found;
2148 xfs_inode_log_item_t *iip; 2149 xfs_inode_log_item_t *iip;
2149 xfs_log_item_t *lip; 2150 xfs_log_item_t *lip;
2150 SPLDECL(s); 2151 SPLDECL(s);
2151 2152
2152 if (mp->m_sb.sb_blocksize >= XFS_INODE_CLUSTER_SIZE(mp)) { 2153 if (mp->m_sb.sb_blocksize >= XFS_INODE_CLUSTER_SIZE(mp)) {
2153 blks_per_cluster = 1; 2154 blks_per_cluster = 1;
2154 ninodes = mp->m_sb.sb_inopblock; 2155 ninodes = mp->m_sb.sb_inopblock;
2155 nbufs = XFS_IALLOC_BLOCKS(mp); 2156 nbufs = XFS_IALLOC_BLOCKS(mp);
2156 } else { 2157 } else {
2157 blks_per_cluster = XFS_INODE_CLUSTER_SIZE(mp) / 2158 blks_per_cluster = XFS_INODE_CLUSTER_SIZE(mp) /
2158 mp->m_sb.sb_blocksize; 2159 mp->m_sb.sb_blocksize;
2159 ninodes = blks_per_cluster * mp->m_sb.sb_inopblock; 2160 ninodes = blks_per_cluster * mp->m_sb.sb_inopblock;
2160 nbufs = XFS_IALLOC_BLOCKS(mp) / blks_per_cluster; 2161 nbufs = XFS_IALLOC_BLOCKS(mp) / blks_per_cluster;
2161 } 2162 }
2162 2163
2163 ip_found = kmem_alloc(ninodes * sizeof(xfs_inode_t *), KM_NOFS); 2164 ip_found = kmem_alloc(ninodes * sizeof(xfs_inode_t *), KM_NOFS);
2164 2165
2165 for (j = 0; j < nbufs; j++, inum += ninodes) { 2166 for (j = 0; j < nbufs; j++, inum += ninodes) {
2166 blkno = XFS_AGB_TO_DADDR(mp, XFS_INO_TO_AGNO(mp, inum), 2167 blkno = XFS_AGB_TO_DADDR(mp, XFS_INO_TO_AGNO(mp, inum),
2167 XFS_INO_TO_AGBNO(mp, inum)); 2168 XFS_INO_TO_AGBNO(mp, inum));
2168 2169
2169 2170
2170 /* 2171 /*
2171 * Look for each inode in memory and attempt to lock it, 2172 * Look for each inode in memory and attempt to lock it,
2172 * we can be racing with flush and tail pushing here. 2173 * we can be racing with flush and tail pushing here.
2173 * any inode we get the locks on, add to an array of 2174 * any inode we get the locks on, add to an array of
2174 * inode items to process later. 2175 * inode items to process later.
2175 * 2176 *
2176 * The get the buffer lock, we could beat a flush 2177 * The get the buffer lock, we could beat a flush
2177 * or tail pushing thread to the lock here, in which 2178 * or tail pushing thread to the lock here, in which
2178 * case they will go looking for the inode buffer 2179 * case they will go looking for the inode buffer
2179 * and fail, we need some other form of interlock 2180 * and fail, we need some other form of interlock
2180 * here. 2181 * here.
2181 */ 2182 */
2182 found = 0; 2183 found = 0;
2183 for (i = 0; i < ninodes; i++) { 2184 for (i = 0; i < ninodes; i++) {
2184 ih = XFS_IHASH(mp, inum + i); 2185 ih = XFS_IHASH(mp, inum + i);
2185 read_lock(&ih->ih_lock); 2186 read_lock(&ih->ih_lock);
2186 for (ip = ih->ih_next; ip != NULL; ip = ip->i_next) { 2187 for (ip = ih->ih_next; ip != NULL; ip = ip->i_next) {
2187 if (ip->i_ino == inum + i) 2188 if (ip->i_ino == inum + i)
2188 break; 2189 break;
2189 } 2190 }
2190 2191
2191 /* Inode not in memory or we found it already, 2192 /* Inode not in memory or we found it already,
2192 * nothing to do 2193 * nothing to do
2193 */ 2194 */
2194 if (!ip || (ip->i_flags & XFS_ISTALE)) { 2195 if (!ip || (ip->i_flags & XFS_ISTALE)) {
2195 read_unlock(&ih->ih_lock); 2196 read_unlock(&ih->ih_lock);
2196 continue; 2197 continue;
2197 } 2198 }
2198 2199
2199 if (xfs_inode_clean(ip)) { 2200 if (xfs_inode_clean(ip)) {
2200 read_unlock(&ih->ih_lock); 2201 read_unlock(&ih->ih_lock);
2201 continue; 2202 continue;
2202 } 2203 }
2203 2204
2204 /* If we can get the locks then add it to the 2205 /* If we can get the locks then add it to the
2205 * list, otherwise by the time we get the bp lock 2206 * list, otherwise by the time we get the bp lock
2206 * below it will already be attached to the 2207 * below it will already be attached to the
2207 * inode buffer. 2208 * inode buffer.
2208 */ 2209 */
2209 2210
2210 /* This inode will already be locked - by us, lets 2211 /* This inode will already be locked - by us, lets
2211 * keep it that way. 2212 * keep it that way.
2212 */ 2213 */
2213 2214
2214 if (ip == free_ip) { 2215 if (ip == free_ip) {
2215 if (xfs_iflock_nowait(ip)) { 2216 if (xfs_iflock_nowait(ip)) {
2216 ip->i_flags |= XFS_ISTALE; 2217 ip->i_flags |= XFS_ISTALE;
2217 2218
2218 if (xfs_inode_clean(ip)) { 2219 if (xfs_inode_clean(ip)) {
2219 xfs_ifunlock(ip); 2220 xfs_ifunlock(ip);
2220 } else { 2221 } else {
2221 ip_found[found++] = ip; 2222 ip_found[found++] = ip;
2222 } 2223 }
2223 } 2224 }
2224 read_unlock(&ih->ih_lock); 2225 read_unlock(&ih->ih_lock);
2225 continue; 2226 continue;
2226 } 2227 }
2227 2228
2228 if (xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) { 2229 if (xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) {
2229 if (xfs_iflock_nowait(ip)) { 2230 if (xfs_iflock_nowait(ip)) {
2230 ip->i_flags |= XFS_ISTALE; 2231 ip->i_flags |= XFS_ISTALE;
2231 2232
2232 if (xfs_inode_clean(ip)) { 2233 if (xfs_inode_clean(ip)) {
2233 xfs_ifunlock(ip); 2234 xfs_ifunlock(ip);
2234 xfs_iunlock(ip, XFS_ILOCK_EXCL); 2235 xfs_iunlock(ip, XFS_ILOCK_EXCL);
2235 } else { 2236 } else {
2236 ip_found[found++] = ip; 2237 ip_found[found++] = ip;
2237 } 2238 }
2238 } else { 2239 } else {
2239 xfs_iunlock(ip, XFS_ILOCK_EXCL); 2240 xfs_iunlock(ip, XFS_ILOCK_EXCL);
2240 } 2241 }
2241 } 2242 }
2242 2243
2243 read_unlock(&ih->ih_lock); 2244 read_unlock(&ih->ih_lock);
2244 } 2245 }
2245 2246
2246 bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, blkno, 2247 bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, blkno,
2247 mp->m_bsize * blks_per_cluster, 2248 mp->m_bsize * blks_per_cluster,
2248 XFS_BUF_LOCK); 2249 XFS_BUF_LOCK);
2249 2250
2250 pre_flushed = 0; 2251 pre_flushed = 0;
2251 lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *); 2252 lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *);
2252 while (lip) { 2253 while (lip) {
2253 if (lip->li_type == XFS_LI_INODE) { 2254 if (lip->li_type == XFS_LI_INODE) {
2254 iip = (xfs_inode_log_item_t *)lip; 2255 iip = (xfs_inode_log_item_t *)lip;
2255 ASSERT(iip->ili_logged == 1); 2256 ASSERT(iip->ili_logged == 1);
2256 lip->li_cb = (void(*)(xfs_buf_t*,xfs_log_item_t*)) xfs_istale_done; 2257 lip->li_cb = (void(*)(xfs_buf_t*,xfs_log_item_t*)) xfs_istale_done;
2257 AIL_LOCK(mp,s); 2258 AIL_LOCK(mp,s);
2258 iip->ili_flush_lsn = iip->ili_item.li_lsn; 2259 iip->ili_flush_lsn = iip->ili_item.li_lsn;
2259 AIL_UNLOCK(mp, s); 2260 AIL_UNLOCK(mp, s);
2260 iip->ili_inode->i_flags |= XFS_ISTALE; 2261 iip->ili_inode->i_flags |= XFS_ISTALE;
2261 pre_flushed++; 2262 pre_flushed++;
2262 } 2263 }
2263 lip = lip->li_bio_list; 2264 lip = lip->li_bio_list;
2264 } 2265 }
2265 2266
2266 for (i = 0; i < found; i++) { 2267 for (i = 0; i < found; i++) {
2267 ip = ip_found[i]; 2268 ip = ip_found[i];
2268 iip = ip->i_itemp; 2269 iip = ip->i_itemp;
2269 2270
2270 if (!iip) { 2271 if (!iip) {
2271 ip->i_update_core = 0; 2272 ip->i_update_core = 0;
2272 xfs_ifunlock(ip); 2273 xfs_ifunlock(ip);
2273 xfs_iunlock(ip, XFS_ILOCK_EXCL); 2274 xfs_iunlock(ip, XFS_ILOCK_EXCL);
2274 continue; 2275 continue;
2275 } 2276 }
2276 2277
2277 iip->ili_last_fields = iip->ili_format.ilf_fields; 2278 iip->ili_last_fields = iip->ili_format.ilf_fields;
2278 iip->ili_format.ilf_fields = 0; 2279 iip->ili_format.ilf_fields = 0;
2279 iip->ili_logged = 1; 2280 iip->ili_logged = 1;
2280 AIL_LOCK(mp,s); 2281 AIL_LOCK(mp,s);
2281 iip->ili_flush_lsn = iip->ili_item.li_lsn; 2282 iip->ili_flush_lsn = iip->ili_item.li_lsn;
2282 AIL_UNLOCK(mp, s); 2283 AIL_UNLOCK(mp, s);
2283 2284
2284 xfs_buf_attach_iodone(bp, 2285 xfs_buf_attach_iodone(bp,
2285 (void(*)(xfs_buf_t*,xfs_log_item_t*)) 2286 (void(*)(xfs_buf_t*,xfs_log_item_t*))
2286 xfs_istale_done, (xfs_log_item_t *)iip); 2287 xfs_istale_done, (xfs_log_item_t *)iip);
2287 if (ip != free_ip) { 2288 if (ip != free_ip) {
2288 xfs_iunlock(ip, XFS_ILOCK_EXCL); 2289 xfs_iunlock(ip, XFS_ILOCK_EXCL);
2289 } 2290 }
2290 } 2291 }
2291 2292
2292 if (found || pre_flushed) 2293 if (found || pre_flushed)
2293 xfs_trans_stale_inode_buf(tp, bp); 2294 xfs_trans_stale_inode_buf(tp, bp);
2294 xfs_trans_binval(tp, bp); 2295 xfs_trans_binval(tp, bp);
2295 } 2296 }
2296 2297
2297 kmem_free(ip_found, ninodes * sizeof(xfs_inode_t *)); 2298 kmem_free(ip_found, ninodes * sizeof(xfs_inode_t *));
2298 } 2299 }
2299 2300
2300 /* 2301 /*
2301 * This is called to return an inode to the inode free list. 2302 * This is called to return an inode to the inode free list.
2302 * The inode should already be truncated to 0 length and have 2303 * The inode should already be truncated to 0 length and have
2303 * no pages associated with it. This routine also assumes that 2304 * no pages associated with it. This routine also assumes that
2304 * the inode is already a part of the transaction. 2305 * the inode is already a part of the transaction.
2305 * 2306 *
2306 * The on-disk copy of the inode will have been added to the list 2307 * The on-disk copy of the inode will have been added to the list
2307 * of unlinked inodes in the AGI. We need to remove the inode from 2308 * of unlinked inodes in the AGI. We need to remove the inode from
2308 * that list atomically with respect to freeing it here. 2309 * that list atomically with respect to freeing it here.
2309 */ 2310 */
2310 int 2311 int
2311 xfs_ifree( 2312 xfs_ifree(
2312 xfs_trans_t *tp, 2313 xfs_trans_t *tp,
2313 xfs_inode_t *ip, 2314 xfs_inode_t *ip,
2314 xfs_bmap_free_t *flist) 2315 xfs_bmap_free_t *flist)
2315 { 2316 {
2316 int error; 2317 int error;
2317 int delete; 2318 int delete;
2318 xfs_ino_t first_ino; 2319 xfs_ino_t first_ino;
2319 2320
2320 ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE)); 2321 ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE));
2321 ASSERT(ip->i_transp == tp); 2322 ASSERT(ip->i_transp == tp);
2322 ASSERT(ip->i_d.di_nlink == 0); 2323 ASSERT(ip->i_d.di_nlink == 0);
2323 ASSERT(ip->i_d.di_nextents == 0); 2324 ASSERT(ip->i_d.di_nextents == 0);
2324 ASSERT(ip->i_d.di_anextents == 0); 2325 ASSERT(ip->i_d.di_anextents == 0);
2325 ASSERT((ip->i_d.di_size == 0) || 2326 ASSERT((ip->i_d.di_size == 0) ||
2326 ((ip->i_d.di_mode & S_IFMT) != S_IFREG)); 2327 ((ip->i_d.di_mode & S_IFMT) != S_IFREG));
2327 ASSERT(ip->i_d.di_nblocks == 0); 2328 ASSERT(ip->i_d.di_nblocks == 0);
2328 2329
2329 /* 2330 /*
2330 * Pull the on-disk inode from the AGI unlinked list. 2331 * Pull the on-disk inode from the AGI unlinked list.
2331 */ 2332 */
2332 error = xfs_iunlink_remove(tp, ip); 2333 error = xfs_iunlink_remove(tp, ip);
2333 if (error != 0) { 2334 if (error != 0) {
2334 return error; 2335 return error;
2335 } 2336 }
2336 2337
2337 error = xfs_difree(tp, ip->i_ino, flist, &delete, &first_ino); 2338 error = xfs_difree(tp, ip->i_ino, flist, &delete, &first_ino);
2338 if (error != 0) { 2339 if (error != 0) {
2339 return error; 2340 return error;
2340 } 2341 }
2341 ip->i_d.di_mode = 0; /* mark incore inode as free */ 2342 ip->i_d.di_mode = 0; /* mark incore inode as free */
2342 ip->i_d.di_flags = 0; 2343 ip->i_d.di_flags = 0;
2343 ip->i_d.di_dmevmask = 0; 2344 ip->i_d.di_dmevmask = 0;
2344 ip->i_d.di_forkoff = 0; /* mark the attr fork not in use */ 2345 ip->i_d.di_forkoff = 0; /* mark the attr fork not in use */
2345 ip->i_df.if_ext_max = 2346 ip->i_df.if_ext_max =
2346 XFS_IFORK_DSIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t); 2347 XFS_IFORK_DSIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t);
2347 ip->i_d.di_format = XFS_DINODE_FMT_EXTENTS; 2348 ip->i_d.di_format = XFS_DINODE_FMT_EXTENTS;
2348 ip->i_d.di_aformat = XFS_DINODE_FMT_EXTENTS; 2349 ip->i_d.di_aformat = XFS_DINODE_FMT_EXTENTS;
2349 /* 2350 /*
2350 * Bump the generation count so no one will be confused 2351 * Bump the generation count so no one will be confused
2351 * by reincarnations of this inode. 2352 * by reincarnations of this inode.
2352 */ 2353 */
2353 ip->i_d.di_gen++; 2354 ip->i_d.di_gen++;
2354 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 2355 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
2355 2356
2356 if (delete) { 2357 if (delete) {
2357 xfs_ifree_cluster(ip, tp, first_ino); 2358 xfs_ifree_cluster(ip, tp, first_ino);
2358 } 2359 }
2359 2360
2360 return 0; 2361 return 0;
2361 } 2362 }
2362 2363
2363 /* 2364 /*
2364 * Reallocate the space for if_broot based on the number of records 2365 * Reallocate the space for if_broot based on the number of records
2365 * being added or deleted as indicated in rec_diff. Move the records 2366 * being added or deleted as indicated in rec_diff. Move the records
2366 * and pointers in if_broot to fit the new size. When shrinking this 2367 * and pointers in if_broot to fit the new size. When shrinking this
2367 * will eliminate holes between the records and pointers created by 2368 * will eliminate holes between the records and pointers created by
2368 * the caller. When growing this will create holes to be filled in 2369 * the caller. When growing this will create holes to be filled in
2369 * by the caller. 2370 * by the caller.
2370 * 2371 *
2371 * The caller must not request to add more records than would fit in 2372 * The caller must not request to add more records than would fit in
2372 * the on-disk inode root. If the if_broot is currently NULL, then 2373 * the on-disk inode root. If the if_broot is currently NULL, then
2373 * if we adding records one will be allocated. The caller must also 2374 * if we adding records one will be allocated. The caller must also
2374 * not request that the number of records go below zero, although 2375 * not request that the number of records go below zero, although
2375 * it can go to zero. 2376 * it can go to zero.
2376 * 2377 *
2377 * ip -- the inode whose if_broot area is changing 2378 * ip -- the inode whose if_broot area is changing
2378 * ext_diff -- the change in the number of records, positive or negative, 2379 * ext_diff -- the change in the number of records, positive or negative,
2379 * requested for the if_broot array. 2380 * requested for the if_broot array.
2380 */ 2381 */
2381 void 2382 void
2382 xfs_iroot_realloc( 2383 xfs_iroot_realloc(
2383 xfs_inode_t *ip, 2384 xfs_inode_t *ip,
2384 int rec_diff, 2385 int rec_diff,
2385 int whichfork) 2386 int whichfork)
2386 { 2387 {
2387 int cur_max; 2388 int cur_max;
2388 xfs_ifork_t *ifp; 2389 xfs_ifork_t *ifp;
2389 xfs_bmbt_block_t *new_broot; 2390 xfs_bmbt_block_t *new_broot;
2390 int new_max; 2391 int new_max;
2391 size_t new_size; 2392 size_t new_size;
2392 char *np; 2393 char *np;
2393 char *op; 2394 char *op;
2394 2395
2395 /* 2396 /*
2396 * Handle the degenerate case quietly. 2397 * Handle the degenerate case quietly.
2397 */ 2398 */
2398 if (rec_diff == 0) { 2399 if (rec_diff == 0) {
2399 return; 2400 return;
2400 } 2401 }
2401 2402
2402 ifp = XFS_IFORK_PTR(ip, whichfork); 2403 ifp = XFS_IFORK_PTR(ip, whichfork);
2403 if (rec_diff > 0) { 2404 if (rec_diff > 0) {
2404 /* 2405 /*
2405 * If there wasn't any memory allocated before, just 2406 * If there wasn't any memory allocated before, just
2406 * allocate it now and get out. 2407 * allocate it now and get out.
2407 */ 2408 */
2408 if (ifp->if_broot_bytes == 0) { 2409 if (ifp->if_broot_bytes == 0) {
2409 new_size = (size_t)XFS_BMAP_BROOT_SPACE_CALC(rec_diff); 2410 new_size = (size_t)XFS_BMAP_BROOT_SPACE_CALC(rec_diff);
2410 ifp->if_broot = (xfs_bmbt_block_t*)kmem_alloc(new_size, 2411 ifp->if_broot = (xfs_bmbt_block_t*)kmem_alloc(new_size,
2411 KM_SLEEP); 2412 KM_SLEEP);
2412 ifp->if_broot_bytes = (int)new_size; 2413 ifp->if_broot_bytes = (int)new_size;
2413 return; 2414 return;
2414 } 2415 }
2415 2416
2416 /* 2417 /*
2417 * If there is already an existing if_broot, then we need 2418 * If there is already an existing if_broot, then we need
2418 * to realloc() it and shift the pointers to their new 2419 * to realloc() it and shift the pointers to their new
2419 * location. The records don't change location because 2420 * location. The records don't change location because
2420 * they are kept butted up against the btree block header. 2421 * they are kept butted up against the btree block header.
2421 */ 2422 */
2422 cur_max = XFS_BMAP_BROOT_MAXRECS(ifp->if_broot_bytes); 2423 cur_max = XFS_BMAP_BROOT_MAXRECS(ifp->if_broot_bytes);
2423 new_max = cur_max + rec_diff; 2424 new_max = cur_max + rec_diff;
2424 new_size = (size_t)XFS_BMAP_BROOT_SPACE_CALC(new_max); 2425 new_size = (size_t)XFS_BMAP_BROOT_SPACE_CALC(new_max);
2425 ifp->if_broot = (xfs_bmbt_block_t *) 2426 ifp->if_broot = (xfs_bmbt_block_t *)
2426 kmem_realloc(ifp->if_broot, 2427 kmem_realloc(ifp->if_broot,
2427 new_size, 2428 new_size,
2428 (size_t)XFS_BMAP_BROOT_SPACE_CALC(cur_max), /* old size */ 2429 (size_t)XFS_BMAP_BROOT_SPACE_CALC(cur_max), /* old size */
2429 KM_SLEEP); 2430 KM_SLEEP);
2430 op = (char *)XFS_BMAP_BROOT_PTR_ADDR(ifp->if_broot, 1, 2431 op = (char *)XFS_BMAP_BROOT_PTR_ADDR(ifp->if_broot, 1,
2431 ifp->if_broot_bytes); 2432 ifp->if_broot_bytes);
2432 np = (char *)XFS_BMAP_BROOT_PTR_ADDR(ifp->if_broot, 1, 2433 np = (char *)XFS_BMAP_BROOT_PTR_ADDR(ifp->if_broot, 1,
2433 (int)new_size); 2434 (int)new_size);
2434 ifp->if_broot_bytes = (int)new_size; 2435 ifp->if_broot_bytes = (int)new_size;
2435 ASSERT(ifp->if_broot_bytes <= 2436 ASSERT(ifp->if_broot_bytes <=
2436 XFS_IFORK_SIZE(ip, whichfork) + XFS_BROOT_SIZE_ADJ); 2437 XFS_IFORK_SIZE(ip, whichfork) + XFS_BROOT_SIZE_ADJ);
2437 memmove(np, op, cur_max * (uint)sizeof(xfs_dfsbno_t)); 2438 memmove(np, op, cur_max * (uint)sizeof(xfs_dfsbno_t));
2438 return; 2439 return;
2439 } 2440 }
2440 2441
2441 /* 2442 /*
2442 * rec_diff is less than 0. In this case, we are shrinking the 2443 * rec_diff is less than 0. In this case, we are shrinking the
2443 * if_broot buffer. It must already exist. If we go to zero 2444 * if_broot buffer. It must already exist. If we go to zero
2444 * records, just get rid of the root and clear the status bit. 2445 * records, just get rid of the root and clear the status bit.
2445 */ 2446 */
2446 ASSERT((ifp->if_broot != NULL) && (ifp->if_broot_bytes > 0)); 2447 ASSERT((ifp->if_broot != NULL) && (ifp->if_broot_bytes > 0));
2447 cur_max = XFS_BMAP_BROOT_MAXRECS(ifp->if_broot_bytes); 2448 cur_max = XFS_BMAP_BROOT_MAXRECS(ifp->if_broot_bytes);
2448 new_max = cur_max + rec_diff; 2449 new_max = cur_max + rec_diff;
2449 ASSERT(new_max >= 0); 2450 ASSERT(new_max >= 0);
2450 if (new_max > 0) 2451 if (new_max > 0)
2451 new_size = (size_t)XFS_BMAP_BROOT_SPACE_CALC(new_max); 2452 new_size = (size_t)XFS_BMAP_BROOT_SPACE_CALC(new_max);
2452 else 2453 else
2453 new_size = 0; 2454 new_size = 0;
2454 if (new_size > 0) { 2455 if (new_size > 0) {
2455 new_broot = (xfs_bmbt_block_t *)kmem_alloc(new_size, KM_SLEEP); 2456 new_broot = (xfs_bmbt_block_t *)kmem_alloc(new_size, KM_SLEEP);
2456 /* 2457 /*
2457 * First copy over the btree block header. 2458 * First copy over the btree block header.
2458 */ 2459 */
2459 memcpy(new_broot, ifp->if_broot, sizeof(xfs_bmbt_block_t)); 2460 memcpy(new_broot, ifp->if_broot, sizeof(xfs_bmbt_block_t));
2460 } else { 2461 } else {
2461 new_broot = NULL; 2462 new_broot = NULL;
2462 ifp->if_flags &= ~XFS_IFBROOT; 2463 ifp->if_flags &= ~XFS_IFBROOT;
2463 } 2464 }
2464 2465
2465 /* 2466 /*
2466 * Only copy the records and pointers if there are any. 2467 * Only copy the records and pointers if there are any.
2467 */ 2468 */
2468 if (new_max > 0) { 2469 if (new_max > 0) {
2469 /* 2470 /*
2470 * First copy the records. 2471 * First copy the records.
2471 */ 2472 */
2472 op = (char *)XFS_BMAP_BROOT_REC_ADDR(ifp->if_broot, 1, 2473 op = (char *)XFS_BMAP_BROOT_REC_ADDR(ifp->if_broot, 1,
2473 ifp->if_broot_bytes); 2474 ifp->if_broot_bytes);
2474 np = (char *)XFS_BMAP_BROOT_REC_ADDR(new_broot, 1, 2475 np = (char *)XFS_BMAP_BROOT_REC_ADDR(new_broot, 1,
2475 (int)new_size); 2476 (int)new_size);
2476 memcpy(np, op, new_max * (uint)sizeof(xfs_bmbt_rec_t)); 2477 memcpy(np, op, new_max * (uint)sizeof(xfs_bmbt_rec_t));
2477 2478
2478 /* 2479 /*
2479 * Then copy the pointers. 2480 * Then copy the pointers.
2480 */ 2481 */
2481 op = (char *)XFS_BMAP_BROOT_PTR_ADDR(ifp->if_broot, 1, 2482 op = (char *)XFS_BMAP_BROOT_PTR_ADDR(ifp->if_broot, 1,
2482 ifp->if_broot_bytes); 2483 ifp->if_broot_bytes);
2483 np = (char *)XFS_BMAP_BROOT_PTR_ADDR(new_broot, 1, 2484 np = (char *)XFS_BMAP_BROOT_PTR_ADDR(new_broot, 1,
2484 (int)new_size); 2485 (int)new_size);
2485 memcpy(np, op, new_max * (uint)sizeof(xfs_dfsbno_t)); 2486 memcpy(np, op, new_max * (uint)sizeof(xfs_dfsbno_t));
2486 } 2487 }
2487 kmem_free(ifp->if_broot, ifp->if_broot_bytes); 2488 kmem_free(ifp->if_broot, ifp->if_broot_bytes);
2488 ifp->if_broot = new_broot; 2489 ifp->if_broot = new_broot;
2489 ifp->if_broot_bytes = (int)new_size; 2490 ifp->if_broot_bytes = (int)new_size;
2490 ASSERT(ifp->if_broot_bytes <= 2491 ASSERT(ifp->if_broot_bytes <=
2491 XFS_IFORK_SIZE(ip, whichfork) + XFS_BROOT_SIZE_ADJ); 2492 XFS_IFORK_SIZE(ip, whichfork) + XFS_BROOT_SIZE_ADJ);
2492 return; 2493 return;
2493 } 2494 }
2494 2495
2495 2496
2496 /* 2497 /*
2497 * This is called when the amount of space needed for if_data 2498 * This is called when the amount of space needed for if_data
2498 * is increased or decreased. The change in size is indicated by 2499 * is increased or decreased. The change in size is indicated by
2499 * the number of bytes that need to be added or deleted in the 2500 * the number of bytes that need to be added or deleted in the
2500 * byte_diff parameter. 2501 * byte_diff parameter.
2501 * 2502 *
2502 * If the amount of space needed has decreased below the size of the 2503 * If the amount of space needed has decreased below the size of the
2503 * inline buffer, then switch to using the inline buffer. Otherwise, 2504 * inline buffer, then switch to using the inline buffer. Otherwise,
2504 * use kmem_realloc() or kmem_alloc() to adjust the size of the buffer 2505 * use kmem_realloc() or kmem_alloc() to adjust the size of the buffer
2505 * to what is needed. 2506 * to what is needed.
2506 * 2507 *
2507 * ip -- the inode whose if_data area is changing 2508 * ip -- the inode whose if_data area is changing
2508 * byte_diff -- the change in the number of bytes, positive or negative, 2509 * byte_diff -- the change in the number of bytes, positive or negative,
2509 * requested for the if_data array. 2510 * requested for the if_data array.
2510 */ 2511 */
2511 void 2512 void
2512 xfs_idata_realloc( 2513 xfs_idata_realloc(
2513 xfs_inode_t *ip, 2514 xfs_inode_t *ip,
2514 int byte_diff, 2515 int byte_diff,
2515 int whichfork) 2516 int whichfork)
2516 { 2517 {
2517 xfs_ifork_t *ifp; 2518 xfs_ifork_t *ifp;
2518 int new_size; 2519 int new_size;
2519 int real_size; 2520 int real_size;
2520 2521
2521 if (byte_diff == 0) { 2522 if (byte_diff == 0) {
2522 return; 2523 return;
2523 } 2524 }
2524 2525
2525 ifp = XFS_IFORK_PTR(ip, whichfork); 2526 ifp = XFS_IFORK_PTR(ip, whichfork);
2526 new_size = (int)ifp->if_bytes + byte_diff; 2527 new_size = (int)ifp->if_bytes + byte_diff;
2527 ASSERT(new_size >= 0); 2528 ASSERT(new_size >= 0);
2528 2529
2529 if (new_size == 0) { 2530 if (new_size == 0) {
2530 if (ifp->if_u1.if_data != ifp->if_u2.if_inline_data) { 2531 if (ifp->if_u1.if_data != ifp->if_u2.if_inline_data) {
2531 kmem_free(ifp->if_u1.if_data, ifp->if_real_bytes); 2532 kmem_free(ifp->if_u1.if_data, ifp->if_real_bytes);
2532 } 2533 }
2533 ifp->if_u1.if_data = NULL; 2534 ifp->if_u1.if_data = NULL;
2534 real_size = 0; 2535 real_size = 0;
2535 } else if (new_size <= sizeof(ifp->if_u2.if_inline_data)) { 2536 } else if (new_size <= sizeof(ifp->if_u2.if_inline_data)) {
2536 /* 2537 /*
2537 * If the valid extents/data can fit in if_inline_ext/data, 2538 * If the valid extents/data can fit in if_inline_ext/data,
2538 * copy them from the malloc'd vector and free it. 2539 * copy them from the malloc'd vector and free it.
2539 */ 2540 */
2540 if (ifp->if_u1.if_data == NULL) { 2541 if (ifp->if_u1.if_data == NULL) {
2541 ifp->if_u1.if_data = ifp->if_u2.if_inline_data; 2542 ifp->if_u1.if_data = ifp->if_u2.if_inline_data;
2542 } else if (ifp->if_u1.if_data != ifp->if_u2.if_inline_data) { 2543 } else if (ifp->if_u1.if_data != ifp->if_u2.if_inline_data) {
2543 ASSERT(ifp->if_real_bytes != 0); 2544 ASSERT(ifp->if_real_bytes != 0);
2544 memcpy(ifp->if_u2.if_inline_data, ifp->if_u1.if_data, 2545 memcpy(ifp->if_u2.if_inline_data, ifp->if_u1.if_data,
2545 new_size); 2546 new_size);
2546 kmem_free(ifp->if_u1.if_data, ifp->if_real_bytes); 2547 kmem_free(ifp->if_u1.if_data, ifp->if_real_bytes);
2547 ifp->if_u1.if_data = ifp->if_u2.if_inline_data; 2548 ifp->if_u1.if_data = ifp->if_u2.if_inline_data;
2548 } 2549 }
2549 real_size = 0; 2550 real_size = 0;
2550 } else { 2551 } else {
2551 /* 2552 /*
2552 * Stuck with malloc/realloc. 2553 * Stuck with malloc/realloc.
2553 * For inline data, the underlying buffer must be 2554 * For inline data, the underlying buffer must be
2554 * a multiple of 4 bytes in size so that it can be 2555 * a multiple of 4 bytes in size so that it can be
2555 * logged and stay on word boundaries. We enforce 2556 * logged and stay on word boundaries. We enforce
2556 * that here. 2557 * that here.
2557 */ 2558 */
2558 real_size = roundup(new_size, 4); 2559 real_size = roundup(new_size, 4);
2559 if (ifp->if_u1.if_data == NULL) { 2560 if (ifp->if_u1.if_data == NULL) {
2560 ASSERT(ifp->if_real_bytes == 0); 2561 ASSERT(ifp->if_real_bytes == 0);
2561 ifp->if_u1.if_data = kmem_alloc(real_size, KM_SLEEP); 2562 ifp->if_u1.if_data = kmem_alloc(real_size, KM_SLEEP);
2562 } else if (ifp->if_u1.if_data != ifp->if_u2.if_inline_data) { 2563 } else if (ifp->if_u1.if_data != ifp->if_u2.if_inline_data) {
2563 /* 2564 /*
2564 * Only do the realloc if the underlying size 2565 * Only do the realloc if the underlying size
2565 * is really changing. 2566 * is really changing.
2566 */ 2567 */
2567 if (ifp->if_real_bytes != real_size) { 2568 if (ifp->if_real_bytes != real_size) {
2568 ifp->if_u1.if_data = 2569 ifp->if_u1.if_data =
2569 kmem_realloc(ifp->if_u1.if_data, 2570 kmem_realloc(ifp->if_u1.if_data,
2570 real_size, 2571 real_size,
2571 ifp->if_real_bytes, 2572 ifp->if_real_bytes,
2572 KM_SLEEP); 2573 KM_SLEEP);
2573 } 2574 }
2574 } else { 2575 } else {
2575 ASSERT(ifp->if_real_bytes == 0); 2576 ASSERT(ifp->if_real_bytes == 0);
2576 ifp->if_u1.if_data = kmem_alloc(real_size, KM_SLEEP); 2577 ifp->if_u1.if_data = kmem_alloc(real_size, KM_SLEEP);
2577 memcpy(ifp->if_u1.if_data, ifp->if_u2.if_inline_data, 2578 memcpy(ifp->if_u1.if_data, ifp->if_u2.if_inline_data,
2578 ifp->if_bytes); 2579 ifp->if_bytes);
2579 } 2580 }
2580 } 2581 }
2581 ifp->if_real_bytes = real_size; 2582 ifp->if_real_bytes = real_size;
2582 ifp->if_bytes = new_size; 2583 ifp->if_bytes = new_size;
2583 ASSERT(ifp->if_bytes <= XFS_IFORK_SIZE(ip, whichfork)); 2584 ASSERT(ifp->if_bytes <= XFS_IFORK_SIZE(ip, whichfork));
2584 } 2585 }
2585 2586
2586 2587
2587 2588
2588 2589
2589 /* 2590 /*
2590 * Map inode to disk block and offset. 2591 * Map inode to disk block and offset.
2591 * 2592 *
2592 * mp -- the mount point structure for the current file system 2593 * mp -- the mount point structure for the current file system
2593 * tp -- the current transaction 2594 * tp -- the current transaction
2594 * ino -- the inode number of the inode to be located 2595 * ino -- the inode number of the inode to be located
2595 * imap -- this structure is filled in with the information necessary 2596 * imap -- this structure is filled in with the information necessary
2596 * to retrieve the given inode from disk 2597 * to retrieve the given inode from disk
2597 * flags -- flags to pass to xfs_dilocate indicating whether or not 2598 * flags -- flags to pass to xfs_dilocate indicating whether or not
2598 * lookups in the inode btree were OK or not 2599 * lookups in the inode btree were OK or not
2599 */ 2600 */
2600 int 2601 int
2601 xfs_imap( 2602 xfs_imap(
2602 xfs_mount_t *mp, 2603 xfs_mount_t *mp,
2603 xfs_trans_t *tp, 2604 xfs_trans_t *tp,
2604 xfs_ino_t ino, 2605 xfs_ino_t ino,
2605 xfs_imap_t *imap, 2606 xfs_imap_t *imap,
2606 uint flags) 2607 uint flags)
2607 { 2608 {
2608 xfs_fsblock_t fsbno; 2609 xfs_fsblock_t fsbno;
2609 int len; 2610 int len;
2610 int off; 2611 int off;
2611 int error; 2612 int error;
2612 2613
2613 fsbno = imap->im_blkno ? 2614 fsbno = imap->im_blkno ?
2614 XFS_DADDR_TO_FSB(mp, imap->im_blkno) : NULLFSBLOCK; 2615 XFS_DADDR_TO_FSB(mp, imap->im_blkno) : NULLFSBLOCK;
2615 error = xfs_dilocate(mp, tp, ino, &fsbno, &len, &off, flags); 2616 error = xfs_dilocate(mp, tp, ino, &fsbno, &len, &off, flags);
2616 if (error != 0) { 2617 if (error != 0) {
2617 return error; 2618 return error;
2618 } 2619 }
2619 imap->im_blkno = XFS_FSB_TO_DADDR(mp, fsbno); 2620 imap->im_blkno = XFS_FSB_TO_DADDR(mp, fsbno);
2620 imap->im_len = XFS_FSB_TO_BB(mp, len); 2621 imap->im_len = XFS_FSB_TO_BB(mp, len);
2621 imap->im_agblkno = XFS_FSB_TO_AGBNO(mp, fsbno); 2622 imap->im_agblkno = XFS_FSB_TO_AGBNO(mp, fsbno);
2622 imap->im_ioffset = (ushort)off; 2623 imap->im_ioffset = (ushort)off;
2623 imap->im_boffset = (ushort)(off << mp->m_sb.sb_inodelog); 2624 imap->im_boffset = (ushort)(off << mp->m_sb.sb_inodelog);
2624 return 0; 2625 return 0;
2625 } 2626 }
2626 2627
2627 void 2628 void
2628 xfs_idestroy_fork( 2629 xfs_idestroy_fork(
2629 xfs_inode_t *ip, 2630 xfs_inode_t *ip,
2630 int whichfork) 2631 int whichfork)
2631 { 2632 {
2632 xfs_ifork_t *ifp; 2633 xfs_ifork_t *ifp;
2633 2634
2634 ifp = XFS_IFORK_PTR(ip, whichfork); 2635 ifp = XFS_IFORK_PTR(ip, whichfork);
2635 if (ifp->if_broot != NULL) { 2636 if (ifp->if_broot != NULL) {
2636 kmem_free(ifp->if_broot, ifp->if_broot_bytes); 2637 kmem_free(ifp->if_broot, ifp->if_broot_bytes);
2637 ifp->if_broot = NULL; 2638 ifp->if_broot = NULL;
2638 } 2639 }
2639 2640
2640 /* 2641 /*
2641 * If the format is local, then we can't have an extents 2642 * If the format is local, then we can't have an extents
2642 * array so just look for an inline data array. If we're 2643 * array so just look for an inline data array. If we're
2643 * not local then we may or may not have an extents list, 2644 * not local then we may or may not have an extents list,
2644 * so check and free it up if we do. 2645 * so check and free it up if we do.
2645 */ 2646 */
2646 if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) { 2647 if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) {
2647 if ((ifp->if_u1.if_data != ifp->if_u2.if_inline_data) && 2648 if ((ifp->if_u1.if_data != ifp->if_u2.if_inline_data) &&
2648 (ifp->if_u1.if_data != NULL)) { 2649 (ifp->if_u1.if_data != NULL)) {
2649 ASSERT(ifp->if_real_bytes != 0); 2650 ASSERT(ifp->if_real_bytes != 0);
2650 kmem_free(ifp->if_u1.if_data, ifp->if_real_bytes); 2651 kmem_free(ifp->if_u1.if_data, ifp->if_real_bytes);
2651 ifp->if_u1.if_data = NULL; 2652 ifp->if_u1.if_data = NULL;
2652 ifp->if_real_bytes = 0; 2653 ifp->if_real_bytes = 0;
2653 } 2654 }
2654 } else if ((ifp->if_flags & XFS_IFEXTENTS) && 2655 } else if ((ifp->if_flags & XFS_IFEXTENTS) &&
2655 ((ifp->if_flags & XFS_IFEXTIREC) || 2656 ((ifp->if_flags & XFS_IFEXTIREC) ||
2656 ((ifp->if_u1.if_extents != NULL) && 2657 ((ifp->if_u1.if_extents != NULL) &&
2657 (ifp->if_u1.if_extents != ifp->if_u2.if_inline_ext)))) { 2658 (ifp->if_u1.if_extents != ifp->if_u2.if_inline_ext)))) {
2658 ASSERT(ifp->if_real_bytes != 0); 2659 ASSERT(ifp->if_real_bytes != 0);
2659 xfs_iext_destroy(ifp); 2660 xfs_iext_destroy(ifp);
2660 } 2661 }
2661 ASSERT(ifp->if_u1.if_extents == NULL || 2662 ASSERT(ifp->if_u1.if_extents == NULL ||
2662 ifp->if_u1.if_extents == ifp->if_u2.if_inline_ext); 2663 ifp->if_u1.if_extents == ifp->if_u2.if_inline_ext);
2663 ASSERT(ifp->if_real_bytes == 0); 2664 ASSERT(ifp->if_real_bytes == 0);
2664 if (whichfork == XFS_ATTR_FORK) { 2665 if (whichfork == XFS_ATTR_FORK) {
2665 kmem_zone_free(xfs_ifork_zone, ip->i_afp); 2666 kmem_zone_free(xfs_ifork_zone, ip->i_afp);
2666 ip->i_afp = NULL; 2667 ip->i_afp = NULL;
2667 } 2668 }
2668 } 2669 }
2669 2670
2670 /* 2671 /*
2671 * This is called free all the memory associated with an inode. 2672 * This is called free all the memory associated with an inode.
2672 * It must free the inode itself and any buffers allocated for 2673 * It must free the inode itself and any buffers allocated for
2673 * if_extents/if_data and if_broot. It must also free the lock 2674 * if_extents/if_data and if_broot. It must also free the lock
2674 * associated with the inode. 2675 * associated with the inode.
2675 */ 2676 */
2676 void 2677 void
2677 xfs_idestroy( 2678 xfs_idestroy(
2678 xfs_inode_t *ip) 2679 xfs_inode_t *ip)
2679 { 2680 {
2680 2681
2681 switch (ip->i_d.di_mode & S_IFMT) { 2682 switch (ip->i_d.di_mode & S_IFMT) {
2682 case S_IFREG: 2683 case S_IFREG:
2683 case S_IFDIR: 2684 case S_IFDIR:
2684 case S_IFLNK: 2685 case S_IFLNK:
2685 xfs_idestroy_fork(ip, XFS_DATA_FORK); 2686 xfs_idestroy_fork(ip, XFS_DATA_FORK);
2686 break; 2687 break;
2687 } 2688 }
2688 if (ip->i_afp) 2689 if (ip->i_afp)
2689 xfs_idestroy_fork(ip, XFS_ATTR_FORK); 2690 xfs_idestroy_fork(ip, XFS_ATTR_FORK);
2690 mrfree(&ip->i_lock); 2691 mrfree(&ip->i_lock);
2691 mrfree(&ip->i_iolock); 2692 mrfree(&ip->i_iolock);
2692 freesema(&ip->i_flock); 2693 freesema(&ip->i_flock);
2693 #ifdef XFS_BMAP_TRACE 2694 #ifdef XFS_BMAP_TRACE
2694 ktrace_free(ip->i_xtrace); 2695 ktrace_free(ip->i_xtrace);
2695 #endif 2696 #endif
2696 #ifdef XFS_BMBT_TRACE 2697 #ifdef XFS_BMBT_TRACE
2697 ktrace_free(ip->i_btrace); 2698 ktrace_free(ip->i_btrace);
2698 #endif 2699 #endif
2699 #ifdef XFS_RW_TRACE 2700 #ifdef XFS_RW_TRACE
2700 ktrace_free(ip->i_rwtrace); 2701 ktrace_free(ip->i_rwtrace);
2701 #endif 2702 #endif
2702 #ifdef XFS_ILOCK_TRACE 2703 #ifdef XFS_ILOCK_TRACE
2703 ktrace_free(ip->i_lock_trace); 2704 ktrace_free(ip->i_lock_trace);
2704 #endif 2705 #endif
2705 #ifdef XFS_DIR2_TRACE 2706 #ifdef XFS_DIR2_TRACE
2706 ktrace_free(ip->i_dir_trace); 2707 ktrace_free(ip->i_dir_trace);
2707 #endif 2708 #endif
2708 if (ip->i_itemp) { 2709 if (ip->i_itemp) {
2709 /* XXXdpd should be able to assert this but shutdown 2710 /* XXXdpd should be able to assert this but shutdown
2710 * is leaving the AIL behind. */ 2711 * is leaving the AIL behind. */
2711 ASSERT(((ip->i_itemp->ili_item.li_flags & XFS_LI_IN_AIL) == 0) || 2712 ASSERT(((ip->i_itemp->ili_item.li_flags & XFS_LI_IN_AIL) == 0) ||
2712 XFS_FORCED_SHUTDOWN(ip->i_mount)); 2713 XFS_FORCED_SHUTDOWN(ip->i_mount));
2713 xfs_inode_item_destroy(ip); 2714 xfs_inode_item_destroy(ip);
2714 } 2715 }
2715 kmem_zone_free(xfs_inode_zone, ip); 2716 kmem_zone_free(xfs_inode_zone, ip);
2716 } 2717 }
2717 2718
2718 2719
2719 /* 2720 /*
2720 * Increment the pin count of the given buffer. 2721 * Increment the pin count of the given buffer.
2721 * This value is protected by ipinlock spinlock in the mount structure. 2722 * This value is protected by ipinlock spinlock in the mount structure.
2722 */ 2723 */
2723 void 2724 void
2724 xfs_ipin( 2725 xfs_ipin(
2725 xfs_inode_t *ip) 2726 xfs_inode_t *ip)
2726 { 2727 {
2727 ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE)); 2728 ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE));
2728 2729
2729 atomic_inc(&ip->i_pincount); 2730 atomic_inc(&ip->i_pincount);
2730 } 2731 }
2731 2732
2732 /* 2733 /*
2733 * Decrement the pin count of the given inode, and wake up 2734 * Decrement the pin count of the given inode, and wake up
2734 * anyone in xfs_iwait_unpin() if the count goes to 0. The 2735 * anyone in xfs_iwait_unpin() if the count goes to 0. The
2735 * inode must have been previously pinned with a call to xfs_ipin(). 2736 * inode must have been previously pinned with a call to xfs_ipin().
2736 */ 2737 */
2737 void 2738 void
2738 xfs_iunpin( 2739 xfs_iunpin(
2739 xfs_inode_t *ip) 2740 xfs_inode_t *ip)
2740 { 2741 {
2741 ASSERT(atomic_read(&ip->i_pincount) > 0); 2742 ASSERT(atomic_read(&ip->i_pincount) > 0);
2742 2743
2743 if (atomic_dec_and_test(&ip->i_pincount)) { 2744 if (atomic_dec_and_test(&ip->i_pincount)) {
2744 /* 2745 /*
2745 * If the inode is currently being reclaimed, the 2746 * If the inode is currently being reclaimed, the
2746 * linux inode _and_ the xfs vnode may have been 2747 * linux inode _and_ the xfs vnode may have been
2747 * freed so we cannot reference either of them safely. 2748 * freed so we cannot reference either of them safely.
2748 * Hence we should not try to do anything to them 2749 * Hence we should not try to do anything to them
2749 * if the xfs inode is currently in the reclaim 2750 * if the xfs inode is currently in the reclaim
2750 * path. 2751 * path.
2751 * 2752 *
2752 * However, we still need to issue the unpin wakeup 2753 * However, we still need to issue the unpin wakeup
2753 * call as the inode reclaim may be blocked waiting for 2754 * call as the inode reclaim may be blocked waiting for
2754 * the inode to become unpinned. 2755 * the inode to become unpinned.
2755 */ 2756 */
2756 if (!(ip->i_flags & (XFS_IRECLAIM|XFS_IRECLAIMABLE))) { 2757 if (!(ip->i_flags & (XFS_IRECLAIM|XFS_IRECLAIMABLE))) {
2757 bhv_vnode_t *vp = XFS_ITOV_NULL(ip); 2758 bhv_vnode_t *vp = XFS_ITOV_NULL(ip);
2758 2759
2759 /* make sync come back and flush this inode */ 2760 /* make sync come back and flush this inode */
2760 if (vp) { 2761 if (vp) {
2761 struct inode *inode = vn_to_inode(vp); 2762 struct inode *inode = vn_to_inode(vp);
2762 2763
2763 if (!(inode->i_state & 2764 if (!(inode->i_state &
2764 (I_NEW|I_FREEING|I_CLEAR))) 2765 (I_NEW|I_FREEING|I_CLEAR)))
2765 mark_inode_dirty_sync(inode); 2766 mark_inode_dirty_sync(inode);
2766 } 2767 }
2767 } 2768 }
2768 wake_up(&ip->i_ipin_wait); 2769 wake_up(&ip->i_ipin_wait);
2769 } 2770 }
2770 } 2771 }
2771 2772
2772 /* 2773 /*
2773 * This is called to wait for the given inode to be unpinned. 2774 * This is called to wait for the given inode to be unpinned.
2774 * It will sleep until this happens. The caller must have the 2775 * It will sleep until this happens. The caller must have the
2775 * inode locked in at least shared mode so that the buffer cannot 2776 * inode locked in at least shared mode so that the buffer cannot
2776 * be subsequently pinned once someone is waiting for it to be 2777 * be subsequently pinned once someone is waiting for it to be
2777 * unpinned. 2778 * unpinned.
2778 */ 2779 */
2779 STATIC void 2780 STATIC void
2780 xfs_iunpin_wait( 2781 xfs_iunpin_wait(
2781 xfs_inode_t *ip) 2782 xfs_inode_t *ip)
2782 { 2783 {
2783 xfs_inode_log_item_t *iip; 2784 xfs_inode_log_item_t *iip;
2784 xfs_lsn_t lsn; 2785 xfs_lsn_t lsn;
2785 2786
2786 ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE | MR_ACCESS)); 2787 ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE | MR_ACCESS));
2787 2788
2788 if (atomic_read(&ip->i_pincount) == 0) { 2789 if (atomic_read(&ip->i_pincount) == 0) {
2789 return; 2790 return;
2790 } 2791 }
2791 2792
2792 iip = ip->i_itemp; 2793 iip = ip->i_itemp;
2793 if (iip && iip->ili_last_lsn) { 2794 if (iip && iip->ili_last_lsn) {
2794 lsn = iip->ili_last_lsn; 2795 lsn = iip->ili_last_lsn;
2795 } else { 2796 } else {
2796 lsn = (xfs_lsn_t)0; 2797 lsn = (xfs_lsn_t)0;
2797 } 2798 }
2798 2799
2799 /* 2800 /*
2800 * Give the log a push so we don't wait here too long. 2801 * Give the log a push so we don't wait here too long.
2801 */ 2802 */
2802 xfs_log_force(ip->i_mount, lsn, XFS_LOG_FORCE); 2803 xfs_log_force(ip->i_mount, lsn, XFS_LOG_FORCE);
2803 2804
2804 wait_event(ip->i_ipin_wait, (atomic_read(&ip->i_pincount) == 0)); 2805 wait_event(ip->i_ipin_wait, (atomic_read(&ip->i_pincount) == 0));
2805 } 2806 }
2806 2807
2807 2808
2808 /* 2809 /*
2809 * xfs_iextents_copy() 2810 * xfs_iextents_copy()
2810 * 2811 *
2811 * This is called to copy the REAL extents (as opposed to the delayed 2812 * This is called to copy the REAL extents (as opposed to the delayed
2812 * allocation extents) from the inode into the given buffer. It 2813 * allocation extents) from the inode into the given buffer. It
2813 * returns the number of bytes copied into the buffer. 2814 * returns the number of bytes copied into the buffer.
2814 * 2815 *
2815 * If there are no delayed allocation extents, then we can just 2816 * If there are no delayed allocation extents, then we can just
2816 * memcpy() the extents into the buffer. Otherwise, we need to 2817 * memcpy() the extents into the buffer. Otherwise, we need to
2817 * examine each extent in turn and skip those which are delayed. 2818 * examine each extent in turn and skip those which are delayed.
2818 */ 2819 */
2819 int 2820 int
2820 xfs_iextents_copy( 2821 xfs_iextents_copy(
2821 xfs_inode_t *ip, 2822 xfs_inode_t *ip,
2822 xfs_bmbt_rec_t *buffer, 2823 xfs_bmbt_rec_t *buffer,
2823 int whichfork) 2824 int whichfork)
2824 { 2825 {
2825 int copied; 2826 int copied;
2826 xfs_bmbt_rec_t *dest_ep; 2827 xfs_bmbt_rec_t *dest_ep;
2827 xfs_bmbt_rec_t *ep; 2828 xfs_bmbt_rec_t *ep;
2828 #ifdef XFS_BMAP_TRACE 2829 #ifdef XFS_BMAP_TRACE
2829 static char fname[] = "xfs_iextents_copy"; 2830 static char fname[] = "xfs_iextents_copy";
2830 #endif 2831 #endif
2831 int i; 2832 int i;
2832 xfs_ifork_t *ifp; 2833 xfs_ifork_t *ifp;
2833 int nrecs; 2834 int nrecs;
2834 xfs_fsblock_t start_block; 2835 xfs_fsblock_t start_block;
2835 2836
2836 ifp = XFS_IFORK_PTR(ip, whichfork); 2837 ifp = XFS_IFORK_PTR(ip, whichfork);
2837 ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE|MR_ACCESS)); 2838 ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE|MR_ACCESS));
2838 ASSERT(ifp->if_bytes > 0); 2839 ASSERT(ifp->if_bytes > 0);
2839 2840
2840 nrecs = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); 2841 nrecs = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
2841 xfs_bmap_trace_exlist(fname, ip, nrecs, whichfork); 2842 xfs_bmap_trace_exlist(fname, ip, nrecs, whichfork);
2842 ASSERT(nrecs > 0); 2843 ASSERT(nrecs > 0);
2843 2844
2844 /* 2845 /*
2845 * There are some delayed allocation extents in the 2846 * There are some delayed allocation extents in the
2846 * inode, so copy the extents one at a time and skip 2847 * inode, so copy the extents one at a time and skip
2847 * the delayed ones. There must be at least one 2848 * the delayed ones. There must be at least one
2848 * non-delayed extent. 2849 * non-delayed extent.
2849 */ 2850 */
2850 dest_ep = buffer; 2851 dest_ep = buffer;
2851 copied = 0; 2852 copied = 0;
2852 for (i = 0; i < nrecs; i++) { 2853 for (i = 0; i < nrecs; i++) {
2853 ep = xfs_iext_get_ext(ifp, i); 2854 ep = xfs_iext_get_ext(ifp, i);
2854 start_block = xfs_bmbt_get_startblock(ep); 2855 start_block = xfs_bmbt_get_startblock(ep);
2855 if (ISNULLSTARTBLOCK(start_block)) { 2856 if (ISNULLSTARTBLOCK(start_block)) {
2856 /* 2857 /*
2857 * It's a delayed allocation extent, so skip it. 2858 * It's a delayed allocation extent, so skip it.
2858 */ 2859 */
2859 continue; 2860 continue;
2860 } 2861 }
2861 2862
2862 /* Translate to on disk format */ 2863 /* Translate to on disk format */
2863 put_unaligned(INT_GET(ep->l0, ARCH_CONVERT), 2864 put_unaligned(INT_GET(ep->l0, ARCH_CONVERT),
2864 (__uint64_t*)&dest_ep->l0); 2865 (__uint64_t*)&dest_ep->l0);
2865 put_unaligned(INT_GET(ep->l1, ARCH_CONVERT), 2866 put_unaligned(INT_GET(ep->l1, ARCH_CONVERT),
2866 (__uint64_t*)&dest_ep->l1); 2867 (__uint64_t*)&dest_ep->l1);
2867 dest_ep++; 2868 dest_ep++;
2868 copied++; 2869 copied++;
2869 } 2870 }
2870 ASSERT(copied != 0); 2871 ASSERT(copied != 0);
2871 xfs_validate_extents(ifp, copied, 1, XFS_EXTFMT_INODE(ip)); 2872 xfs_validate_extents(ifp, copied, 1, XFS_EXTFMT_INODE(ip));
2872 2873
2873 return (copied * (uint)sizeof(xfs_bmbt_rec_t)); 2874 return (copied * (uint)sizeof(xfs_bmbt_rec_t));
2874 } 2875 }
2875 2876
2876 /* 2877 /*
2877 * Each of the following cases stores data into the same region 2878 * Each of the following cases stores data into the same region
2878 * of the on-disk inode, so only one of them can be valid at 2879 * of the on-disk inode, so only one of them can be valid at
2879 * any given time. While it is possible to have conflicting formats 2880 * any given time. While it is possible to have conflicting formats
2880 * and log flags, e.g. having XFS_ILOG_?DATA set when the fork is 2881 * and log flags, e.g. having XFS_ILOG_?DATA set when the fork is
2881 * in EXTENTS format, this can only happen when the fork has 2882 * in EXTENTS format, this can only happen when the fork has
2882 * changed formats after being modified but before being flushed. 2883 * changed formats after being modified but before being flushed.
2883 * In these cases, the format always takes precedence, because the 2884 * In these cases, the format always takes precedence, because the
2884 * format indicates the current state of the fork. 2885 * format indicates the current state of the fork.
2885 */ 2886 */
2886 /*ARGSUSED*/ 2887 /*ARGSUSED*/
2887 STATIC int 2888 STATIC int
2888 xfs_iflush_fork( 2889 xfs_iflush_fork(
2889 xfs_inode_t *ip, 2890 xfs_inode_t *ip,
2890 xfs_dinode_t *dip, 2891 xfs_dinode_t *dip,
2891 xfs_inode_log_item_t *iip, 2892 xfs_inode_log_item_t *iip,
2892 int whichfork, 2893 int whichfork,
2893 xfs_buf_t *bp) 2894 xfs_buf_t *bp)
2894 { 2895 {
2895 char *cp; 2896 char *cp;
2896 xfs_ifork_t *ifp; 2897 xfs_ifork_t *ifp;
2897 xfs_mount_t *mp; 2898 xfs_mount_t *mp;
2898 #ifdef XFS_TRANS_DEBUG 2899 #ifdef XFS_TRANS_DEBUG
2899 int first; 2900 int first;
2900 #endif 2901 #endif
2901 static const short brootflag[2] = 2902 static const short brootflag[2] =
2902 { XFS_ILOG_DBROOT, XFS_ILOG_ABROOT }; 2903 { XFS_ILOG_DBROOT, XFS_ILOG_ABROOT };
2903 static const short dataflag[2] = 2904 static const short dataflag[2] =
2904 { XFS_ILOG_DDATA, XFS_ILOG_ADATA }; 2905 { XFS_ILOG_DDATA, XFS_ILOG_ADATA };
2905 static const short extflag[2] = 2906 static const short extflag[2] =
2906 { XFS_ILOG_DEXT, XFS_ILOG_AEXT }; 2907 { XFS_ILOG_DEXT, XFS_ILOG_AEXT };
2907 2908
2908 if (iip == NULL) 2909 if (iip == NULL)
2909 return 0; 2910 return 0;
2910 ifp = XFS_IFORK_PTR(ip, whichfork); 2911 ifp = XFS_IFORK_PTR(ip, whichfork);
2911 /* 2912 /*
2912 * This can happen if we gave up in iformat in an error path, 2913 * This can happen if we gave up in iformat in an error path,
2913 * for the attribute fork. 2914 * for the attribute fork.
2914 */ 2915 */
2915 if (ifp == NULL) { 2916 if (ifp == NULL) {
2916 ASSERT(whichfork == XFS_ATTR_FORK); 2917 ASSERT(whichfork == XFS_ATTR_FORK);
2917 return 0; 2918 return 0;
2918 } 2919 }
2919 cp = XFS_DFORK_PTR(dip, whichfork); 2920 cp = XFS_DFORK_PTR(dip, whichfork);
2920 mp = ip->i_mount; 2921 mp = ip->i_mount;
2921 switch (XFS_IFORK_FORMAT(ip, whichfork)) { 2922 switch (XFS_IFORK_FORMAT(ip, whichfork)) {
2922 case XFS_DINODE_FMT_LOCAL: 2923 case XFS_DINODE_FMT_LOCAL:
2923 if ((iip->ili_format.ilf_fields & dataflag[whichfork]) && 2924 if ((iip->ili_format.ilf_fields & dataflag[whichfork]) &&
2924 (ifp->if_bytes > 0)) { 2925 (ifp->if_bytes > 0)) {
2925 ASSERT(ifp->if_u1.if_data != NULL); 2926 ASSERT(ifp->if_u1.if_data != NULL);
2926 ASSERT(ifp->if_bytes <= XFS_IFORK_SIZE(ip, whichfork)); 2927 ASSERT(ifp->if_bytes <= XFS_IFORK_SIZE(ip, whichfork));
2927 memcpy(cp, ifp->if_u1.if_data, ifp->if_bytes); 2928 memcpy(cp, ifp->if_u1.if_data, ifp->if_bytes);
2928 } 2929 }
2929 break; 2930 break;
2930 2931
2931 case XFS_DINODE_FMT_EXTENTS: 2932 case XFS_DINODE_FMT_EXTENTS:
2932 ASSERT((ifp->if_flags & XFS_IFEXTENTS) || 2933 ASSERT((ifp->if_flags & XFS_IFEXTENTS) ||
2933 !(iip->ili_format.ilf_fields & extflag[whichfork])); 2934 !(iip->ili_format.ilf_fields & extflag[whichfork]));
2934 ASSERT((xfs_iext_get_ext(ifp, 0) != NULL) || 2935 ASSERT((xfs_iext_get_ext(ifp, 0) != NULL) ||
2935 (ifp->if_bytes == 0)); 2936 (ifp->if_bytes == 0));
2936 ASSERT((xfs_iext_get_ext(ifp, 0) == NULL) || 2937 ASSERT((xfs_iext_get_ext(ifp, 0) == NULL) ||
2937 (ifp->if_bytes > 0)); 2938 (ifp->if_bytes > 0));
2938 if ((iip->ili_format.ilf_fields & extflag[whichfork]) && 2939 if ((iip->ili_format.ilf_fields & extflag[whichfork]) &&
2939 (ifp->if_bytes > 0)) { 2940 (ifp->if_bytes > 0)) {
2940 ASSERT(XFS_IFORK_NEXTENTS(ip, whichfork) > 0); 2941 ASSERT(XFS_IFORK_NEXTENTS(ip, whichfork) > 0);
2941 (void)xfs_iextents_copy(ip, (xfs_bmbt_rec_t *)cp, 2942 (void)xfs_iextents_copy(ip, (xfs_bmbt_rec_t *)cp,
2942 whichfork); 2943 whichfork);
2943 } 2944 }
2944 break; 2945 break;
2945 2946
2946 case XFS_DINODE_FMT_BTREE: 2947 case XFS_DINODE_FMT_BTREE:
2947 if ((iip->ili_format.ilf_fields & brootflag[whichfork]) && 2948 if ((iip->ili_format.ilf_fields & brootflag[whichfork]) &&
2948 (ifp->if_broot_bytes > 0)) { 2949 (ifp->if_broot_bytes > 0)) {
2949 ASSERT(ifp->if_broot != NULL); 2950 ASSERT(ifp->if_broot != NULL);
2950 ASSERT(ifp->if_broot_bytes <= 2951 ASSERT(ifp->if_broot_bytes <=
2951 (XFS_IFORK_SIZE(ip, whichfork) + 2952 (XFS_IFORK_SIZE(ip, whichfork) +
2952 XFS_BROOT_SIZE_ADJ)); 2953 XFS_BROOT_SIZE_ADJ));
2953 xfs_bmbt_to_bmdr(ifp->if_broot, ifp->if_broot_bytes, 2954 xfs_bmbt_to_bmdr(ifp->if_broot, ifp->if_broot_bytes,
2954 (xfs_bmdr_block_t *)cp, 2955 (xfs_bmdr_block_t *)cp,
2955 XFS_DFORK_SIZE(dip, mp, whichfork)); 2956 XFS_DFORK_SIZE(dip, mp, whichfork));
2956 } 2957 }
2957 break; 2958 break;
2958 2959
2959 case XFS_DINODE_FMT_DEV: 2960 case XFS_DINODE_FMT_DEV:
2960 if (iip->ili_format.ilf_fields & XFS_ILOG_DEV) { 2961 if (iip->ili_format.ilf_fields & XFS_ILOG_DEV) {
2961 ASSERT(whichfork == XFS_DATA_FORK); 2962 ASSERT(whichfork == XFS_DATA_FORK);
2962 INT_SET(dip->di_u.di_dev, ARCH_CONVERT, ip->i_df.if_u2.if_rdev); 2963 INT_SET(dip->di_u.di_dev, ARCH_CONVERT, ip->i_df.if_u2.if_rdev);
2963 } 2964 }
2964 break; 2965 break;
2965 2966
2966 case XFS_DINODE_FMT_UUID: 2967 case XFS_DINODE_FMT_UUID:
2967 if (iip->ili_format.ilf_fields & XFS_ILOG_UUID) { 2968 if (iip->ili_format.ilf_fields & XFS_ILOG_UUID) {
2968 ASSERT(whichfork == XFS_DATA_FORK); 2969 ASSERT(whichfork == XFS_DATA_FORK);
2969 memcpy(&dip->di_u.di_muuid, &ip->i_df.if_u2.if_uuid, 2970 memcpy(&dip->di_u.di_muuid, &ip->i_df.if_u2.if_uuid,
2970 sizeof(uuid_t)); 2971 sizeof(uuid_t));
2971 } 2972 }
2972 break; 2973 break;
2973 2974
2974 default: 2975 default:
2975 ASSERT(0); 2976 ASSERT(0);
2976 break; 2977 break;
2977 } 2978 }
2978 2979
2979 return 0; 2980 return 0;
2980 } 2981 }
2981 2982
2982 /* 2983 /*
2983 * xfs_iflush() will write a modified inode's changes out to the 2984 * xfs_iflush() will write a modified inode's changes out to the
2984 * inode's on disk home. The caller must have the inode lock held 2985 * inode's on disk home. The caller must have the inode lock held
2985 * in at least shared mode and the inode flush semaphore must be 2986 * in at least shared mode and the inode flush semaphore must be
2986 * held as well. The inode lock will still be held upon return from 2987 * held as well. The inode lock will still be held upon return from
2987 * the call and the caller is free to unlock it. 2988 * the call and the caller is free to unlock it.
2988 * The inode flush lock will be unlocked when the inode reaches the disk. 2989 * The inode flush lock will be unlocked when the inode reaches the disk.
2989 * The flags indicate how the inode's buffer should be written out. 2990 * The flags indicate how the inode's buffer should be written out.
2990 */ 2991 */
2991 int 2992 int
2992 xfs_iflush( 2993 xfs_iflush(
2993 xfs_inode_t *ip, 2994 xfs_inode_t *ip,
2994 uint flags) 2995 uint flags)
2995 { 2996 {
2996 xfs_inode_log_item_t *iip; 2997 xfs_inode_log_item_t *iip;
2997 xfs_buf_t *bp; 2998 xfs_buf_t *bp;
2998 xfs_dinode_t *dip; 2999 xfs_dinode_t *dip;
2999 xfs_mount_t *mp; 3000 xfs_mount_t *mp;
3000 int error; 3001 int error;
3001 /* REFERENCED */ 3002 /* REFERENCED */
3002 xfs_chash_t *ch; 3003 xfs_chash_t *ch;
3003 xfs_inode_t *iq; 3004 xfs_inode_t *iq;
3004 int clcount; /* count of inodes clustered */ 3005 int clcount; /* count of inodes clustered */
3005 int bufwasdelwri; 3006 int bufwasdelwri;
3006 enum { INT_DELWRI = (1 << 0), INT_ASYNC = (1 << 1) }; 3007 enum { INT_DELWRI = (1 << 0), INT_ASYNC = (1 << 1) };
3007 SPLDECL(s); 3008 SPLDECL(s);
3008 3009
3009 XFS_STATS_INC(xs_iflush_count); 3010 XFS_STATS_INC(xs_iflush_count);
3010 3011
3011 ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE|MR_ACCESS)); 3012 ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE|MR_ACCESS));
3012 ASSERT(issemalocked(&(ip->i_flock))); 3013 ASSERT(issemalocked(&(ip->i_flock)));
3013 ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE || 3014 ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE ||
3014 ip->i_d.di_nextents > ip->i_df.if_ext_max); 3015 ip->i_d.di_nextents > ip->i_df.if_ext_max);
3015 3016
3016 iip = ip->i_itemp; 3017 iip = ip->i_itemp;
3017 mp = ip->i_mount; 3018 mp = ip->i_mount;
3018 3019
3019 /* 3020 /*
3020 * If the inode isn't dirty, then just release the inode 3021 * If the inode isn't dirty, then just release the inode
3021 * flush lock and do nothing. 3022 * flush lock and do nothing.
3022 */ 3023 */
3023 if ((ip->i_update_core == 0) && 3024 if ((ip->i_update_core == 0) &&
3024 ((iip == NULL) || !(iip->ili_format.ilf_fields & XFS_ILOG_ALL))) { 3025 ((iip == NULL) || !(iip->ili_format.ilf_fields & XFS_ILOG_ALL))) {
3025 ASSERT((iip != NULL) ? 3026 ASSERT((iip != NULL) ?
3026 !(iip->ili_item.li_flags & XFS_LI_IN_AIL) : 1); 3027 !(iip->ili_item.li_flags & XFS_LI_IN_AIL) : 1);
3027 xfs_ifunlock(ip); 3028 xfs_ifunlock(ip);
3028 return 0; 3029 return 0;
3029 } 3030 }
3030 3031
3031 /* 3032 /*
3032 * We can't flush the inode until it is unpinned, so 3033 * We can't flush the inode until it is unpinned, so
3033 * wait for it. We know noone new can pin it, because 3034 * wait for it. We know noone new can pin it, because
3034 * we are holding the inode lock shared and you need 3035 * we are holding the inode lock shared and you need
3035 * to hold it exclusively to pin the inode. 3036 * to hold it exclusively to pin the inode.
3036 */ 3037 */
3037 xfs_iunpin_wait(ip); 3038 xfs_iunpin_wait(ip);
3038 3039
3039 /* 3040 /*
3040 * This may have been unpinned because the filesystem is shutting 3041 * This may have been unpinned because the filesystem is shutting
3041 * down forcibly. If that's the case we must not write this inode 3042 * down forcibly. If that's the case we must not write this inode
3042 * to disk, because the log record didn't make it to disk! 3043 * to disk, because the log record didn't make it to disk!
3043 */ 3044 */
3044 if (XFS_FORCED_SHUTDOWN(mp)) { 3045 if (XFS_FORCED_SHUTDOWN(mp)) {
3045 ip->i_update_core = 0; 3046 ip->i_update_core = 0;
3046 if (iip) 3047 if (iip)
3047 iip->ili_format.ilf_fields = 0; 3048 iip->ili_format.ilf_fields = 0;
3048 xfs_ifunlock(ip); 3049 xfs_ifunlock(ip);
3049 return XFS_ERROR(EIO); 3050 return XFS_ERROR(EIO);
3050 } 3051 }
3051 3052
3052 /* 3053 /*
3053 * Get the buffer containing the on-disk inode. 3054 * Get the buffer containing the on-disk inode.
3054 */ 3055 */
3055 error = xfs_itobp(mp, NULL, ip, &dip, &bp, 0, 0); 3056 error = xfs_itobp(mp, NULL, ip, &dip, &bp, 0, 0);
3056 if (error) { 3057 if (error) {
3057 xfs_ifunlock(ip); 3058 xfs_ifunlock(ip);
3058 return error; 3059 return error;
3059 } 3060 }
3060 3061
3061 /* 3062 /*
3062 * Decide how buffer will be flushed out. This is done before 3063 * Decide how buffer will be flushed out. This is done before
3063 * the call to xfs_iflush_int because this field is zeroed by it. 3064 * the call to xfs_iflush_int because this field is zeroed by it.
3064 */ 3065 */
3065 if (iip != NULL && iip->ili_format.ilf_fields != 0) { 3066 if (iip != NULL && iip->ili_format.ilf_fields != 0) {
3066 /* 3067 /*
3067 * Flush out the inode buffer according to the directions 3068 * Flush out the inode buffer according to the directions
3068 * of the caller. In the cases where the caller has given 3069 * of the caller. In the cases where the caller has given
3069 * us a choice choose the non-delwri case. This is because 3070 * us a choice choose the non-delwri case. This is because
3070 * the inode is in the AIL and we need to get it out soon. 3071 * the inode is in the AIL and we need to get it out soon.
3071 */ 3072 */
3072 switch (flags) { 3073 switch (flags) {
3073 case XFS_IFLUSH_SYNC: 3074 case XFS_IFLUSH_SYNC:
3074 case XFS_IFLUSH_DELWRI_ELSE_SYNC: 3075 case XFS_IFLUSH_DELWRI_ELSE_SYNC:
3075 flags = 0; 3076 flags = 0;
3076 break; 3077 break;
3077 case XFS_IFLUSH_ASYNC: 3078 case XFS_IFLUSH_ASYNC:
3078 case XFS_IFLUSH_DELWRI_ELSE_ASYNC: 3079 case XFS_IFLUSH_DELWRI_ELSE_ASYNC:
3079 flags = INT_ASYNC; 3080 flags = INT_ASYNC;
3080 break; 3081 break;
3081 case XFS_IFLUSH_DELWRI: 3082 case XFS_IFLUSH_DELWRI:
3082 flags = INT_DELWRI; 3083 flags = INT_DELWRI;
3083 break; 3084 break;
3084 default: 3085 default:
3085 ASSERT(0); 3086 ASSERT(0);
3086 flags = 0; 3087 flags = 0;
3087 break; 3088 break;
3088 } 3089 }
3089 } else { 3090 } else {
3090 switch (flags) { 3091 switch (flags) {
3091 case XFS_IFLUSH_DELWRI_ELSE_SYNC: 3092 case XFS_IFLUSH_DELWRI_ELSE_SYNC:
3092 case XFS_IFLUSH_DELWRI_ELSE_ASYNC: 3093 case XFS_IFLUSH_DELWRI_ELSE_ASYNC:
3093 case XFS_IFLUSH_DELWRI: 3094 case XFS_IFLUSH_DELWRI:
3094 flags = INT_DELWRI; 3095 flags = INT_DELWRI;
3095 break; 3096 break;
3096 case XFS_IFLUSH_ASYNC: 3097 case XFS_IFLUSH_ASYNC:
3097 flags = INT_ASYNC; 3098 flags = INT_ASYNC;
3098 break; 3099 break;
3099 case XFS_IFLUSH_SYNC: 3100 case XFS_IFLUSH_SYNC:
3100 flags = 0; 3101 flags = 0;
3101 break; 3102 break;
3102 default: 3103 default:
3103 ASSERT(0); 3104 ASSERT(0);
3104 flags = 0; 3105 flags = 0;
3105 break; 3106 break;
3106 } 3107 }
3107 } 3108 }
3108 3109
3109 /* 3110 /*
3110 * First flush out the inode that xfs_iflush was called with. 3111 * First flush out the inode that xfs_iflush was called with.
3111 */ 3112 */
3112 error = xfs_iflush_int(ip, bp); 3113 error = xfs_iflush_int(ip, bp);
3113 if (error) { 3114 if (error) {
3114 goto corrupt_out; 3115 goto corrupt_out;
3115 } 3116 }
3116 3117
3117 /* 3118 /*
3118 * inode clustering: 3119 * inode clustering:
3119 * see if other inodes can be gathered into this write 3120 * see if other inodes can be gathered into this write
3120 */ 3121 */
3121 3122
3122 ip->i_chash->chl_buf = bp; 3123 ip->i_chash->chl_buf = bp;
3123 3124
3124 ch = XFS_CHASH(mp, ip->i_blkno); 3125 ch = XFS_CHASH(mp, ip->i_blkno);
3125 s = mutex_spinlock(&ch->ch_lock); 3126 s = mutex_spinlock(&ch->ch_lock);
3126 3127
3127 clcount = 0; 3128 clcount = 0;
3128 for (iq = ip->i_cnext; iq != ip; iq = iq->i_cnext) { 3129 for (iq = ip->i_cnext; iq != ip; iq = iq->i_cnext) {
3129 /* 3130 /*
3130 * Do an un-protected check to see if the inode is dirty and 3131 * Do an un-protected check to see if the inode is dirty and
3131 * is a candidate for flushing. These checks will be repeated 3132 * is a candidate for flushing. These checks will be repeated
3132 * later after the appropriate locks are acquired. 3133 * later after the appropriate locks are acquired.
3133 */ 3134 */
3134 iip = iq->i_itemp; 3135 iip = iq->i_itemp;
3135 if ((iq->i_update_core == 0) && 3136 if ((iq->i_update_core == 0) &&
3136 ((iip == NULL) || 3137 ((iip == NULL) ||
3137 !(iip->ili_format.ilf_fields & XFS_ILOG_ALL)) && 3138 !(iip->ili_format.ilf_fields & XFS_ILOG_ALL)) &&
3138 xfs_ipincount(iq) == 0) { 3139 xfs_ipincount(iq) == 0) {
3139 continue; 3140 continue;
3140 } 3141 }
3141 3142
3142 /* 3143 /*
3143 * Try to get locks. If any are unavailable, 3144 * Try to get locks. If any are unavailable,
3144 * then this inode cannot be flushed and is skipped. 3145 * then this inode cannot be flushed and is skipped.
3145 */ 3146 */
3146 3147
3147 /* get inode locks (just i_lock) */ 3148 /* get inode locks (just i_lock) */
3148 if (xfs_ilock_nowait(iq, XFS_ILOCK_SHARED)) { 3149 if (xfs_ilock_nowait(iq, XFS_ILOCK_SHARED)) {
3149 /* get inode flush lock */ 3150 /* get inode flush lock */
3150 if (xfs_iflock_nowait(iq)) { 3151 if (xfs_iflock_nowait(iq)) {
3151 /* check if pinned */ 3152 /* check if pinned */
3152 if (xfs_ipincount(iq) == 0) { 3153 if (xfs_ipincount(iq) == 0) {
3153 /* arriving here means that 3154 /* arriving here means that
3154 * this inode can be flushed. 3155 * this inode can be flushed.
3155 * first re-check that it's 3156 * first re-check that it's
3156 * dirty 3157 * dirty
3157 */ 3158 */
3158 iip = iq->i_itemp; 3159 iip = iq->i_itemp;
3159 if ((iq->i_update_core != 0)|| 3160 if ((iq->i_update_core != 0)||
3160 ((iip != NULL) && 3161 ((iip != NULL) &&
3161 (iip->ili_format.ilf_fields & XFS_ILOG_ALL))) { 3162 (iip->ili_format.ilf_fields & XFS_ILOG_ALL))) {
3162 clcount++; 3163 clcount++;
3163 error = xfs_iflush_int(iq, bp); 3164 error = xfs_iflush_int(iq, bp);
3164 if (error) { 3165 if (error) {
3165 xfs_iunlock(iq, 3166 xfs_iunlock(iq,
3166 XFS_ILOCK_SHARED); 3167 XFS_ILOCK_SHARED);
3167 goto cluster_corrupt_out; 3168 goto cluster_corrupt_out;
3168 } 3169 }
3169 } else { 3170 } else {
3170 xfs_ifunlock(iq); 3171 xfs_ifunlock(iq);
3171 } 3172 }
3172 } else { 3173 } else {
3173 xfs_ifunlock(iq); 3174 xfs_ifunlock(iq);
3174 } 3175 }
3175 } 3176 }
3176 xfs_iunlock(iq, XFS_ILOCK_SHARED); 3177 xfs_iunlock(iq, XFS_ILOCK_SHARED);
3177 } 3178 }
3178 } 3179 }
3179 mutex_spinunlock(&ch->ch_lock, s); 3180 mutex_spinunlock(&ch->ch_lock, s);
3180 3181
3181 if (clcount) { 3182 if (clcount) {
3182 XFS_STATS_INC(xs_icluster_flushcnt); 3183 XFS_STATS_INC(xs_icluster_flushcnt);
3183 XFS_STATS_ADD(xs_icluster_flushinode, clcount); 3184 XFS_STATS_ADD(xs_icluster_flushinode, clcount);
3184 } 3185 }
3185 3186
3186 /* 3187 /*
3187 * If the buffer is pinned then push on the log so we won't 3188 * If the buffer is pinned then push on the log so we won't
3188 * get stuck waiting in the write for too long. 3189 * get stuck waiting in the write for too long.
3189 */ 3190 */
3190 if (XFS_BUF_ISPINNED(bp)){ 3191 if (XFS_BUF_ISPINNED(bp)){
3191 xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE); 3192 xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE);
3192 } 3193 }
3193 3194
3194 if (flags & INT_DELWRI) { 3195 if (flags & INT_DELWRI) {
3195 xfs_bdwrite(mp, bp); 3196 xfs_bdwrite(mp, bp);
3196 } else if (flags & INT_ASYNC) { 3197 } else if (flags & INT_ASYNC) {
3197 xfs_bawrite(mp, bp); 3198 xfs_bawrite(mp, bp);
3198 } else { 3199 } else {
3199 error = xfs_bwrite(mp, bp); 3200 error = xfs_bwrite(mp, bp);
3200 } 3201 }
3201 return error; 3202 return error;
3202 3203
3203 corrupt_out: 3204 corrupt_out:
3204 xfs_buf_relse(bp); 3205 xfs_buf_relse(bp);
3205 xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); 3206 xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
3206 xfs_iflush_abort(ip); 3207 xfs_iflush_abort(ip);
3207 /* 3208 /*
3208 * Unlocks the flush lock 3209 * Unlocks the flush lock
3209 */ 3210 */
3210 return XFS_ERROR(EFSCORRUPTED); 3211 return XFS_ERROR(EFSCORRUPTED);
3211 3212
3212 cluster_corrupt_out: 3213 cluster_corrupt_out:
3213 /* Corruption detected in the clustering loop. Invalidate the 3214 /* Corruption detected in the clustering loop. Invalidate the
3214 * inode buffer and shut down the filesystem. 3215 * inode buffer and shut down the filesystem.
3215 */ 3216 */
3216 mutex_spinunlock(&ch->ch_lock, s); 3217 mutex_spinunlock(&ch->ch_lock, s);
3217 3218
3218 /* 3219 /*
3219 * Clean up the buffer. If it was B_DELWRI, just release it -- 3220 * Clean up the buffer. If it was B_DELWRI, just release it --
3220 * brelse can handle it with no problems. If not, shut down the 3221 * brelse can handle it with no problems. If not, shut down the
3221 * filesystem before releasing the buffer. 3222 * filesystem before releasing the buffer.
3222 */ 3223 */
3223 if ((bufwasdelwri= XFS_BUF_ISDELAYWRITE(bp))) { 3224 if ((bufwasdelwri= XFS_BUF_ISDELAYWRITE(bp))) {
3224 xfs_buf_relse(bp); 3225 xfs_buf_relse(bp);
3225 } 3226 }
3226 3227
3227 xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); 3228 xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
3228 3229
3229 if(!bufwasdelwri) { 3230 if(!bufwasdelwri) {
3230 /* 3231 /*
3231 * Just like incore_relse: if we have b_iodone functions, 3232 * Just like incore_relse: if we have b_iodone functions,
3232 * mark the buffer as an error and call them. Otherwise 3233 * mark the buffer as an error and call them. Otherwise
3233 * mark it as stale and brelse. 3234 * mark it as stale and brelse.
3234 */ 3235 */
3235 if (XFS_BUF_IODONE_FUNC(bp)) { 3236 if (XFS_BUF_IODONE_FUNC(bp)) {
3236 XFS_BUF_CLR_BDSTRAT_FUNC(bp); 3237 XFS_BUF_CLR_BDSTRAT_FUNC(bp);
3237 XFS_BUF_UNDONE(bp); 3238 XFS_BUF_UNDONE(bp);
3238 XFS_BUF_STALE(bp); 3239 XFS_BUF_STALE(bp);
3239 XFS_BUF_SHUT(bp); 3240 XFS_BUF_SHUT(bp);
3240 XFS_BUF_ERROR(bp,EIO); 3241 XFS_BUF_ERROR(bp,EIO);
3241 xfs_biodone(bp); 3242 xfs_biodone(bp);
3242 } else { 3243 } else {
3243 XFS_BUF_STALE(bp); 3244 XFS_BUF_STALE(bp);
3244 xfs_buf_relse(bp); 3245 xfs_buf_relse(bp);
3245 } 3246 }
3246 } 3247 }
3247 3248
3248 xfs_iflush_abort(iq); 3249 xfs_iflush_abort(iq);
3249 /* 3250 /*
3250 * Unlocks the flush lock 3251 * Unlocks the flush lock
3251 */ 3252 */
3252 return XFS_ERROR(EFSCORRUPTED); 3253 return XFS_ERROR(EFSCORRUPTED);
3253 } 3254 }
3254 3255
3255 3256
3256 STATIC int 3257 STATIC int
3257 xfs_iflush_int( 3258 xfs_iflush_int(
3258 xfs_inode_t *ip, 3259 xfs_inode_t *ip,
3259 xfs_buf_t *bp) 3260 xfs_buf_t *bp)
3260 { 3261 {
3261 xfs_inode_log_item_t *iip; 3262 xfs_inode_log_item_t *iip;
3262 xfs_dinode_t *dip; 3263 xfs_dinode_t *dip;
3263 xfs_mount_t *mp; 3264 xfs_mount_t *mp;
3264 #ifdef XFS_TRANS_DEBUG 3265 #ifdef XFS_TRANS_DEBUG
3265 int first; 3266 int first;
3266 #endif 3267 #endif
3267 SPLDECL(s); 3268 SPLDECL(s);
3268 3269
3269 ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE|MR_ACCESS)); 3270 ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE|MR_ACCESS));
3270 ASSERT(issemalocked(&(ip->i_flock))); 3271 ASSERT(issemalocked(&(ip->i_flock)));
3271 ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE || 3272 ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE ||
3272 ip->i_d.di_nextents > ip->i_df.if_ext_max); 3273 ip->i_d.di_nextents > ip->i_df.if_ext_max);
3273 3274
3274 iip = ip->i_itemp; 3275 iip = ip->i_itemp;
3275 mp = ip->i_mount; 3276 mp = ip->i_mount;
3276 3277
3277 3278
3278 /* 3279 /*
3279 * If the inode isn't dirty, then just release the inode 3280 * If the inode isn't dirty, then just release the inode
3280 * flush lock and do nothing. 3281 * flush lock and do nothing.
3281 */ 3282 */
3282 if ((ip->i_update_core == 0) && 3283 if ((ip->i_update_core == 0) &&
3283 ((iip == NULL) || !(iip->ili_format.ilf_fields & XFS_ILOG_ALL))) { 3284 ((iip == NULL) || !(iip->ili_format.ilf_fields & XFS_ILOG_ALL))) {
3284 xfs_ifunlock(ip); 3285 xfs_ifunlock(ip);
3285 return 0; 3286 return 0;
3286 } 3287 }
3287 3288
3288 /* set *dip = inode's place in the buffer */ 3289 /* set *dip = inode's place in the buffer */
3289 dip = (xfs_dinode_t *)xfs_buf_offset(bp, ip->i_boffset); 3290 dip = (xfs_dinode_t *)xfs_buf_offset(bp, ip->i_boffset);
3290 3291
3291 /* 3292 /*
3292 * Clear i_update_core before copying out the data. 3293 * Clear i_update_core before copying out the data.
3293 * This is for coordination with our timestamp updates 3294 * This is for coordination with our timestamp updates
3294 * that don't hold the inode lock. They will always 3295 * that don't hold the inode lock. They will always
3295 * update the timestamps BEFORE setting i_update_core, 3296 * update the timestamps BEFORE setting i_update_core,
3296 * so if we clear i_update_core after they set it we 3297 * so if we clear i_update_core after they set it we
3297 * are guaranteed to see their updates to the timestamps. 3298 * are guaranteed to see their updates to the timestamps.
3298 * I believe that this depends on strongly ordered memory 3299 * I believe that this depends on strongly ordered memory
3299 * semantics, but we have that. We use the SYNCHRONIZE 3300 * semantics, but we have that. We use the SYNCHRONIZE
3300 * macro to make sure that the compiler does not reorder 3301 * macro to make sure that the compiler does not reorder
3301 * the i_update_core access below the data copy below. 3302 * the i_update_core access below the data copy below.
3302 */ 3303 */
3303 ip->i_update_core = 0; 3304 ip->i_update_core = 0;
3304 SYNCHRONIZE(); 3305 SYNCHRONIZE();
3305 3306
3306 /* 3307 /*
3307 * Make sure to get the latest atime from the Linux inode. 3308 * Make sure to get the latest atime from the Linux inode.
3308 */ 3309 */
3309 xfs_synchronize_atime(ip); 3310 xfs_synchronize_atime(ip);
3310 3311
3311 if (XFS_TEST_ERROR(INT_GET(dip->di_core.di_magic,ARCH_CONVERT) != XFS_DINODE_MAGIC, 3312 if (XFS_TEST_ERROR(INT_GET(dip->di_core.di_magic,ARCH_CONVERT) != XFS_DINODE_MAGIC,
3312 mp, XFS_ERRTAG_IFLUSH_1, XFS_RANDOM_IFLUSH_1)) { 3313 mp, XFS_ERRTAG_IFLUSH_1, XFS_RANDOM_IFLUSH_1)) {
3313 xfs_cmn_err(XFS_PTAG_IFLUSH, CE_ALERT, mp, 3314 xfs_cmn_err(XFS_PTAG_IFLUSH, CE_ALERT, mp,
3314 "xfs_iflush: Bad inode %Lu magic number 0x%x, ptr 0x%p", 3315 "xfs_iflush: Bad inode %Lu magic number 0x%x, ptr 0x%p",
3315 ip->i_ino, (int) INT_GET(dip->di_core.di_magic, ARCH_CONVERT), dip); 3316 ip->i_ino, (int) INT_GET(dip->di_core.di_magic, ARCH_CONVERT), dip);
3316 goto corrupt_out; 3317 goto corrupt_out;
3317 } 3318 }
3318 if (XFS_TEST_ERROR(ip->i_d.di_magic != XFS_DINODE_MAGIC, 3319 if (XFS_TEST_ERROR(ip->i_d.di_magic != XFS_DINODE_MAGIC,
3319 mp, XFS_ERRTAG_IFLUSH_2, XFS_RANDOM_IFLUSH_2)) { 3320 mp, XFS_ERRTAG_IFLUSH_2, XFS_RANDOM_IFLUSH_2)) {
3320 xfs_cmn_err(XFS_PTAG_IFLUSH, CE_ALERT, mp, 3321 xfs_cmn_err(XFS_PTAG_IFLUSH, CE_ALERT, mp,
3321 "xfs_iflush: Bad inode %Lu, ptr 0x%p, magic number 0x%x", 3322 "xfs_iflush: Bad inode %Lu, ptr 0x%p, magic number 0x%x",
3322 ip->i_ino, ip, ip->i_d.di_magic); 3323 ip->i_ino, ip, ip->i_d.di_magic);
3323 goto corrupt_out; 3324 goto corrupt_out;
3324 } 3325 }
3325 if ((ip->i_d.di_mode & S_IFMT) == S_IFREG) { 3326 if ((ip->i_d.di_mode & S_IFMT) == S_IFREG) {
3326 if (XFS_TEST_ERROR( 3327 if (XFS_TEST_ERROR(
3327 (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS) && 3328 (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS) &&
3328 (ip->i_d.di_format != XFS_DINODE_FMT_BTREE), 3329 (ip->i_d.di_format != XFS_DINODE_FMT_BTREE),
3329 mp, XFS_ERRTAG_IFLUSH_3, XFS_RANDOM_IFLUSH_3)) { 3330 mp, XFS_ERRTAG_IFLUSH_3, XFS_RANDOM_IFLUSH_3)) {
3330 xfs_cmn_err(XFS_PTAG_IFLUSH, CE_ALERT, mp, 3331 xfs_cmn_err(XFS_PTAG_IFLUSH, CE_ALERT, mp,
3331 "xfs_iflush: Bad regular inode %Lu, ptr 0x%p", 3332 "xfs_iflush: Bad regular inode %Lu, ptr 0x%p",
3332 ip->i_ino, ip); 3333 ip->i_ino, ip);
3333 goto corrupt_out; 3334 goto corrupt_out;
3334 } 3335 }
3335 } else if ((ip->i_d.di_mode & S_IFMT) == S_IFDIR) { 3336 } else if ((ip->i_d.di_mode & S_IFMT) == S_IFDIR) {
3336 if (XFS_TEST_ERROR( 3337 if (XFS_TEST_ERROR(
3337 (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS) && 3338 (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS) &&
3338 (ip->i_d.di_format != XFS_DINODE_FMT_BTREE) && 3339 (ip->i_d.di_format != XFS_DINODE_FMT_BTREE) &&
3339 (ip->i_d.di_format != XFS_DINODE_FMT_LOCAL), 3340 (ip->i_d.di_format != XFS_DINODE_FMT_LOCAL),
3340 mp, XFS_ERRTAG_IFLUSH_4, XFS_RANDOM_IFLUSH_4)) { 3341 mp, XFS_ERRTAG_IFLUSH_4, XFS_RANDOM_IFLUSH_4)) {
3341 xfs_cmn_err(XFS_PTAG_IFLUSH, CE_ALERT, mp, 3342 xfs_cmn_err(XFS_PTAG_IFLUSH, CE_ALERT, mp,
3342 "xfs_iflush: Bad directory inode %Lu, ptr 0x%p", 3343 "xfs_iflush: Bad directory inode %Lu, ptr 0x%p",
3343 ip->i_ino, ip); 3344 ip->i_ino, ip);
3344 goto corrupt_out; 3345 goto corrupt_out;
3345 } 3346 }
3346 } 3347 }
3347 if (XFS_TEST_ERROR(ip->i_d.di_nextents + ip->i_d.di_anextents > 3348 if (XFS_TEST_ERROR(ip->i_d.di_nextents + ip->i_d.di_anextents >
3348 ip->i_d.di_nblocks, mp, XFS_ERRTAG_IFLUSH_5, 3349 ip->i_d.di_nblocks, mp, XFS_ERRTAG_IFLUSH_5,
3349 XFS_RANDOM_IFLUSH_5)) { 3350 XFS_RANDOM_IFLUSH_5)) {
3350 xfs_cmn_err(XFS_PTAG_IFLUSH, CE_ALERT, mp, 3351 xfs_cmn_err(XFS_PTAG_IFLUSH, CE_ALERT, mp,
3351 "xfs_iflush: detected corrupt incore inode %Lu, total extents = %d, nblocks = %Ld, ptr 0x%p", 3352 "xfs_iflush: detected corrupt incore inode %Lu, total extents = %d, nblocks = %Ld, ptr 0x%p",
3352 ip->i_ino, 3353 ip->i_ino,
3353 ip->i_d.di_nextents + ip->i_d.di_anextents, 3354 ip->i_d.di_nextents + ip->i_d.di_anextents,
3354 ip->i_d.di_nblocks, 3355 ip->i_d.di_nblocks,
3355 ip); 3356 ip);
3356 goto corrupt_out; 3357 goto corrupt_out;
3357 } 3358 }
3358 if (XFS_TEST_ERROR(ip->i_d.di_forkoff > mp->m_sb.sb_inodesize, 3359 if (XFS_TEST_ERROR(ip->i_d.di_forkoff > mp->m_sb.sb_inodesize,
3359 mp, XFS_ERRTAG_IFLUSH_6, XFS_RANDOM_IFLUSH_6)) { 3360 mp, XFS_ERRTAG_IFLUSH_6, XFS_RANDOM_IFLUSH_6)) {
3360 xfs_cmn_err(XFS_PTAG_IFLUSH, CE_ALERT, mp, 3361 xfs_cmn_err(XFS_PTAG_IFLUSH, CE_ALERT, mp,
3361 "xfs_iflush: bad inode %Lu, forkoff 0x%x, ptr 0x%p", 3362 "xfs_iflush: bad inode %Lu, forkoff 0x%x, ptr 0x%p",
3362 ip->i_ino, ip->i_d.di_forkoff, ip); 3363 ip->i_ino, ip->i_d.di_forkoff, ip);
3363 goto corrupt_out; 3364 goto corrupt_out;
3364 } 3365 }
3365 /* 3366 /*
3366 * bump the flush iteration count, used to detect flushes which 3367 * bump the flush iteration count, used to detect flushes which
3367 * postdate a log record during recovery. 3368 * postdate a log record during recovery.
3368 */ 3369 */
3369 3370
3370 ip->i_d.di_flushiter++; 3371 ip->i_d.di_flushiter++;
3371 3372
3372 /* 3373 /*
3373 * Copy the dirty parts of the inode into the on-disk 3374 * Copy the dirty parts of the inode into the on-disk
3374 * inode. We always copy out the core of the inode, 3375 * inode. We always copy out the core of the inode,
3375 * because if the inode is dirty at all the core must 3376 * because if the inode is dirty at all the core must
3376 * be. 3377 * be.
3377 */ 3378 */
3378 xfs_xlate_dinode_core((xfs_caddr_t)&(dip->di_core), &(ip->i_d), -1); 3379 xfs_xlate_dinode_core((xfs_caddr_t)&(dip->di_core), &(ip->i_d), -1);
3379 3380
3380 /* Wrap, we never let the log put out DI_MAX_FLUSH */ 3381 /* Wrap, we never let the log put out DI_MAX_FLUSH */
3381 if (ip->i_d.di_flushiter == DI_MAX_FLUSH) 3382 if (ip->i_d.di_flushiter == DI_MAX_FLUSH)
3382 ip->i_d.di_flushiter = 0; 3383 ip->i_d.di_flushiter = 0;
3383 3384
3384 /* 3385 /*
3385 * If this is really an old format inode and the superblock version 3386 * If this is really an old format inode and the superblock version
3386 * has not been updated to support only new format inodes, then 3387 * has not been updated to support only new format inodes, then
3387 * convert back to the old inode format. If the superblock version 3388 * convert back to the old inode format. If the superblock version
3388 * has been updated, then make the conversion permanent. 3389 * has been updated, then make the conversion permanent.
3389 */ 3390 */
3390 ASSERT(ip->i_d.di_version == XFS_DINODE_VERSION_1 || 3391 ASSERT(ip->i_d.di_version == XFS_DINODE_VERSION_1 ||
3391 XFS_SB_VERSION_HASNLINK(&mp->m_sb)); 3392 XFS_SB_VERSION_HASNLINK(&mp->m_sb));
3392 if (ip->i_d.di_version == XFS_DINODE_VERSION_1) { 3393 if (ip->i_d.di_version == XFS_DINODE_VERSION_1) {
3393 if (!XFS_SB_VERSION_HASNLINK(&mp->m_sb)) { 3394 if (!XFS_SB_VERSION_HASNLINK(&mp->m_sb)) {
3394 /* 3395 /*
3395 * Convert it back. 3396 * Convert it back.
3396 */ 3397 */
3397 ASSERT(ip->i_d.di_nlink <= XFS_MAXLINK_1); 3398 ASSERT(ip->i_d.di_nlink <= XFS_MAXLINK_1);
3398 INT_SET(dip->di_core.di_onlink, ARCH_CONVERT, ip->i_d.di_nlink); 3399 INT_SET(dip->di_core.di_onlink, ARCH_CONVERT, ip->i_d.di_nlink);
3399 } else { 3400 } else {
3400 /* 3401 /*
3401 * The superblock version has already been bumped, 3402 * The superblock version has already been bumped,
3402 * so just make the conversion to the new inode 3403 * so just make the conversion to the new inode
3403 * format permanent. 3404 * format permanent.
3404 */ 3405 */
3405 ip->i_d.di_version = XFS_DINODE_VERSION_2; 3406 ip->i_d.di_version = XFS_DINODE_VERSION_2;
3406 INT_SET(dip->di_core.di_version, ARCH_CONVERT, XFS_DINODE_VERSION_2); 3407 INT_SET(dip->di_core.di_version, ARCH_CONVERT, XFS_DINODE_VERSION_2);
3407 ip->i_d.di_onlink = 0; 3408 ip->i_d.di_onlink = 0;
3408 dip->di_core.di_onlink = 0; 3409 dip->di_core.di_onlink = 0;
3409 memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad)); 3410 memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad));
3410 memset(&(dip->di_core.di_pad[0]), 0, 3411 memset(&(dip->di_core.di_pad[0]), 0,
3411 sizeof(dip->di_core.di_pad)); 3412 sizeof(dip->di_core.di_pad));
3412 ASSERT(ip->i_d.di_projid == 0); 3413 ASSERT(ip->i_d.di_projid == 0);
3413 } 3414 }
3414 } 3415 }
3415 3416
3416 if (xfs_iflush_fork(ip, dip, iip, XFS_DATA_FORK, bp) == EFSCORRUPTED) { 3417 if (xfs_iflush_fork(ip, dip, iip, XFS_DATA_FORK, bp) == EFSCORRUPTED) {
3417 goto corrupt_out; 3418 goto corrupt_out;
3418 } 3419 }
3419 3420
3420 if (XFS_IFORK_Q(ip)) { 3421 if (XFS_IFORK_Q(ip)) {
3421 /* 3422 /*
3422 * The only error from xfs_iflush_fork is on the data fork. 3423 * The only error from xfs_iflush_fork is on the data fork.
3423 */ 3424 */
3424 (void) xfs_iflush_fork(ip, dip, iip, XFS_ATTR_FORK, bp); 3425 (void) xfs_iflush_fork(ip, dip, iip, XFS_ATTR_FORK, bp);
3425 } 3426 }
3426 xfs_inobp_check(mp, bp); 3427 xfs_inobp_check(mp, bp);
3427 3428
3428 /* 3429 /*
3429 * We've recorded everything logged in the inode, so we'd 3430 * We've recorded everything logged in the inode, so we'd
3430 * like to clear the ilf_fields bits so we don't log and 3431 * like to clear the ilf_fields bits so we don't log and
3431 * flush things unnecessarily. However, we can't stop 3432 * flush things unnecessarily. However, we can't stop
3432 * logging all this information until the data we've copied 3433 * logging all this information until the data we've copied
3433 * into the disk buffer is written to disk. If we did we might 3434 * into the disk buffer is written to disk. If we did we might
3434 * overwrite the copy of the inode in the log with all the 3435 * overwrite the copy of the inode in the log with all the
3435 * data after re-logging only part of it, and in the face of 3436 * data after re-logging only part of it, and in the face of
3436 * a crash we wouldn't have all the data we need to recover. 3437 * a crash we wouldn't have all the data we need to recover.
3437 * 3438 *
3438 * What we do is move the bits to the ili_last_fields field. 3439 * What we do is move the bits to the ili_last_fields field.
3439 * When logging the inode, these bits are moved back to the 3440 * When logging the inode, these bits are moved back to the
3440 * ilf_fields field. In the xfs_iflush_done() routine we 3441 * ilf_fields field. In the xfs_iflush_done() routine we
3441 * clear ili_last_fields, since we know that the information 3442 * clear ili_last_fields, since we know that the information
3442 * those bits represent is permanently on disk. As long as 3443 * those bits represent is permanently on disk. As long as
3443 * the flush completes before the inode is logged again, then 3444 * the flush completes before the inode is logged again, then
3444 * both ilf_fields and ili_last_fields will be cleared. 3445 * both ilf_fields and ili_last_fields will be cleared.
3445 * 3446 *
3446 * We can play with the ilf_fields bits here, because the inode 3447 * We can play with the ilf_fields bits here, because the inode
3447 * lock must be held exclusively in order to set bits there 3448 * lock must be held exclusively in order to set bits there
3448 * and the flush lock protects the ili_last_fields bits. 3449 * and the flush lock protects the ili_last_fields bits.
3449 * Set ili_logged so the flush done 3450 * Set ili_logged so the flush done
3450 * routine can tell whether or not to look in the AIL. 3451 * routine can tell whether or not to look in the AIL.
3451 * Also, store the current LSN of the inode so that we can tell 3452 * Also, store the current LSN of the inode so that we can tell
3452 * whether the item has moved in the AIL from xfs_iflush_done(). 3453 * whether the item has moved in the AIL from xfs_iflush_done().
3453 * In order to read the lsn we need the AIL lock, because 3454 * In order to read the lsn we need the AIL lock, because
3454 * it is a 64 bit value that cannot be read atomically. 3455 * it is a 64 bit value that cannot be read atomically.
3455 */ 3456 */
3456 if (iip != NULL && iip->ili_format.ilf_fields != 0) { 3457 if (iip != NULL && iip->ili_format.ilf_fields != 0) {
3457 iip->ili_last_fields = iip->ili_format.ilf_fields; 3458 iip->ili_last_fields = iip->ili_format.ilf_fields;
3458 iip->ili_format.ilf_fields = 0; 3459 iip->ili_format.ilf_fields = 0;
3459 iip->ili_logged = 1; 3460 iip->ili_logged = 1;
3460 3461
3461 ASSERT(sizeof(xfs_lsn_t) == 8); /* don't lock if it shrinks */ 3462 ASSERT(sizeof(xfs_lsn_t) == 8); /* don't lock if it shrinks */
3462 AIL_LOCK(mp,s); 3463 AIL_LOCK(mp,s);
3463 iip->ili_flush_lsn = iip->ili_item.li_lsn; 3464 iip->ili_flush_lsn = iip->ili_item.li_lsn;
3464 AIL_UNLOCK(mp, s); 3465 AIL_UNLOCK(mp, s);
3465 3466
3466 /* 3467 /*
3467 * Attach the function xfs_iflush_done to the inode's 3468 * Attach the function xfs_iflush_done to the inode's
3468 * buffer. This will remove the inode from the AIL 3469 * buffer. This will remove the inode from the AIL
3469 * and unlock the inode's flush lock when the inode is 3470 * and unlock the inode's flush lock when the inode is
3470 * completely written to disk. 3471 * completely written to disk.
3471 */ 3472 */
3472 xfs_buf_attach_iodone(bp, (void(*)(xfs_buf_t*,xfs_log_item_t*)) 3473 xfs_buf_attach_iodone(bp, (void(*)(xfs_buf_t*,xfs_log_item_t*))
3473 xfs_iflush_done, (xfs_log_item_t *)iip); 3474 xfs_iflush_done, (xfs_log_item_t *)iip);
3474 3475
3475 ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL); 3476 ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL);
3476 ASSERT(XFS_BUF_IODONE_FUNC(bp) != NULL); 3477 ASSERT(XFS_BUF_IODONE_FUNC(bp) != NULL);
3477 } else { 3478 } else {
3478 /* 3479 /*
3479 * We're flushing an inode which is not in the AIL and has 3480 * We're flushing an inode which is not in the AIL and has
3480 * not been logged but has i_update_core set. For this 3481 * not been logged but has i_update_core set. For this
3481 * case we can use a B_DELWRI flush and immediately drop 3482 * case we can use a B_DELWRI flush and immediately drop
3482 * the inode flush lock because we can avoid the whole 3483 * the inode flush lock because we can avoid the whole
3483 * AIL state thing. It's OK to drop the flush lock now, 3484 * AIL state thing. It's OK to drop the flush lock now,
3484 * because we've already locked the buffer and to do anything 3485 * because we've already locked the buffer and to do anything
3485 * you really need both. 3486 * you really need both.
3486 */ 3487 */
3487 if (iip != NULL) { 3488 if (iip != NULL) {
3488 ASSERT(iip->ili_logged == 0); 3489 ASSERT(iip->ili_logged == 0);
3489 ASSERT(iip->ili_last_fields == 0); 3490 ASSERT(iip->ili_last_fields == 0);
3490 ASSERT((iip->ili_item.li_flags & XFS_LI_IN_AIL) == 0); 3491 ASSERT((iip->ili_item.li_flags & XFS_LI_IN_AIL) == 0);
3491 } 3492 }
3492 xfs_ifunlock(ip); 3493 xfs_ifunlock(ip);
3493 } 3494 }
3494 3495
3495 return 0; 3496 return 0;
3496 3497
3497 corrupt_out: 3498 corrupt_out:
3498 return XFS_ERROR(EFSCORRUPTED); 3499 return XFS_ERROR(EFSCORRUPTED);
3499 } 3500 }
3500 3501
3501 3502
3502 /* 3503 /*
3503 * Flush all inactive inodes in mp. 3504 * Flush all inactive inodes in mp.
3504 */ 3505 */
3505 void 3506 void
3506 xfs_iflush_all( 3507 xfs_iflush_all(
3507 xfs_mount_t *mp) 3508 xfs_mount_t *mp)
3508 { 3509 {
3509 xfs_inode_t *ip; 3510 xfs_inode_t *ip;
3510 bhv_vnode_t *vp; 3511 bhv_vnode_t *vp;
3511 3512
3512 again: 3513 again:
3513 XFS_MOUNT_ILOCK(mp); 3514 XFS_MOUNT_ILOCK(mp);
3514 ip = mp->m_inodes; 3515 ip = mp->m_inodes;
3515 if (ip == NULL) 3516 if (ip == NULL)
3516 goto out; 3517 goto out;
3517 3518
3518 do { 3519 do {
3519 /* Make sure we skip markers inserted by sync */ 3520 /* Make sure we skip markers inserted by sync */
3520 if (ip->i_mount == NULL) { 3521 if (ip->i_mount == NULL) {
3521 ip = ip->i_mnext; 3522 ip = ip->i_mnext;
3522 continue; 3523 continue;
3523 } 3524 }
3524 3525
3525 vp = XFS_ITOV_NULL(ip); 3526 vp = XFS_ITOV_NULL(ip);
3526 if (!vp) { 3527 if (!vp) {
3527 XFS_MOUNT_IUNLOCK(mp); 3528 XFS_MOUNT_IUNLOCK(mp);
3528 xfs_finish_reclaim(ip, 0, XFS_IFLUSH_ASYNC); 3529 xfs_finish_reclaim(ip, 0, XFS_IFLUSH_ASYNC);
3529 goto again; 3530 goto again;
3530 } 3531 }
3531 3532
3532 ASSERT(vn_count(vp) == 0); 3533 ASSERT(vn_count(vp) == 0);
3533 3534
3534 ip = ip->i_mnext; 3535 ip = ip->i_mnext;
3535 } while (ip != mp->m_inodes); 3536 } while (ip != mp->m_inodes);
3536 out: 3537 out:
3537 XFS_MOUNT_IUNLOCK(mp); 3538 XFS_MOUNT_IUNLOCK(mp);
3538 } 3539 }
3539 3540
3540 /* 3541 /*
3541 * xfs_iaccess: check accessibility of inode for mode. 3542 * xfs_iaccess: check accessibility of inode for mode.
3542 */ 3543 */
3543 int 3544 int
3544 xfs_iaccess( 3545 xfs_iaccess(
3545 xfs_inode_t *ip, 3546 xfs_inode_t *ip,
3546 mode_t mode, 3547 mode_t mode,
3547 cred_t *cr) 3548 cred_t *cr)
3548 { 3549 {
3549 int error; 3550 int error;
3550 mode_t orgmode = mode; 3551 mode_t orgmode = mode;
3551 struct inode *inode = vn_to_inode(XFS_ITOV(ip)); 3552 struct inode *inode = vn_to_inode(XFS_ITOV(ip));
3552 3553
3553 if (mode & S_IWUSR) { 3554 if (mode & S_IWUSR) {
3554 umode_t imode = inode->i_mode; 3555 umode_t imode = inode->i_mode;
3555 3556
3556 if (IS_RDONLY(inode) && 3557 if (IS_RDONLY(inode) &&
3557 (S_ISREG(imode) || S_ISDIR(imode) || S_ISLNK(imode))) 3558 (S_ISREG(imode) || S_ISDIR(imode) || S_ISLNK(imode)))
3558 return XFS_ERROR(EROFS); 3559 return XFS_ERROR(EROFS);
3559 3560
3560 if (IS_IMMUTABLE(inode)) 3561 if (IS_IMMUTABLE(inode))
3561 return XFS_ERROR(EACCES); 3562 return XFS_ERROR(EACCES);
3562 } 3563 }
3563 3564
3564 /* 3565 /*
3565 * If there's an Access Control List it's used instead of 3566 * If there's an Access Control List it's used instead of
3566 * the mode bits. 3567 * the mode bits.
3567 */ 3568 */
3568 if ((error = _ACL_XFS_IACCESS(ip, mode, cr)) != -1) 3569 if ((error = _ACL_XFS_IACCESS(ip, mode, cr)) != -1)
3569 return error ? XFS_ERROR(error) : 0; 3570 return error ? XFS_ERROR(error) : 0;
3570 3571
3571 if (current_fsuid(cr) != ip->i_d.di_uid) { 3572 if (current_fsuid(cr) != ip->i_d.di_uid) {
3572 mode >>= 3; 3573 mode >>= 3;
3573 if (!in_group_p((gid_t)ip->i_d.di_gid)) 3574 if (!in_group_p((gid_t)ip->i_d.di_gid))
3574 mode >>= 3; 3575 mode >>= 3;
3575 } 3576 }
3576 3577
3577 /* 3578 /*
3578 * If the DACs are ok we don't need any capability check. 3579 * If the DACs are ok we don't need any capability check.
3579 */ 3580 */
3580 if ((ip->i_d.di_mode & mode) == mode) 3581 if ((ip->i_d.di_mode & mode) == mode)
3581 return 0; 3582 return 0;
3582 /* 3583 /*
3583 * Read/write DACs are always overridable. 3584 * Read/write DACs are always overridable.
3584 * Executable DACs are overridable if at least one exec bit is set. 3585 * Executable DACs are overridable if at least one exec bit is set.
3585 */ 3586 */
3586 if (!(orgmode & S_IXUSR) || 3587 if (!(orgmode & S_IXUSR) ||
3587 (inode->i_mode & S_IXUGO) || S_ISDIR(inode->i_mode)) 3588 (inode->i_mode & S_IXUGO) || S_ISDIR(inode->i_mode))
3588 if (capable_cred(cr, CAP_DAC_OVERRIDE)) 3589 if (capable_cred(cr, CAP_DAC_OVERRIDE))
3589 return 0; 3590 return 0;
3590 3591
3591 if ((orgmode == S_IRUSR) || 3592 if ((orgmode == S_IRUSR) ||
3592 (S_ISDIR(inode->i_mode) && (!(orgmode & S_IWUSR)))) { 3593 (S_ISDIR(inode->i_mode) && (!(orgmode & S_IWUSR)))) {
3593 if (capable_cred(cr, CAP_DAC_READ_SEARCH)) 3594 if (capable_cred(cr, CAP_DAC_READ_SEARCH))
3594 return 0; 3595 return 0;
3595 #ifdef NOISE 3596 #ifdef NOISE
3596 cmn_err(CE_NOTE, "Ick: mode=%o, orgmode=%o", mode, orgmode); 3597 cmn_err(CE_NOTE, "Ick: mode=%o, orgmode=%o", mode, orgmode);
3597 #endif /* NOISE */ 3598 #endif /* NOISE */
3598 return XFS_ERROR(EACCES); 3599 return XFS_ERROR(EACCES);
3599 } 3600 }
3600 return XFS_ERROR(EACCES); 3601 return XFS_ERROR(EACCES);
3601 } 3602 }
3602 3603
3603 /* 3604 /*
3604 * xfs_iroundup: round up argument to next power of two 3605 * xfs_iroundup: round up argument to next power of two
3605 */ 3606 */
3606 uint 3607 uint
3607 xfs_iroundup( 3608 xfs_iroundup(
3608 uint v) 3609 uint v)
3609 { 3610 {
3610 int i; 3611 int i;
3611 uint m; 3612 uint m;
3612 3613
3613 if ((v & (v - 1)) == 0) 3614 if ((v & (v - 1)) == 0)
3614 return v; 3615 return v;
3615 ASSERT((v & 0x80000000) == 0); 3616 ASSERT((v & 0x80000000) == 0);
3616 if ((v & (v + 1)) == 0) 3617 if ((v & (v + 1)) == 0)
3617 return v + 1; 3618 return v + 1;
3618 for (i = 0, m = 1; i < 31; i++, m <<= 1) { 3619 for (i = 0, m = 1; i < 31; i++, m <<= 1) {
3619 if (v & m) 3620 if (v & m)
3620 continue; 3621 continue;
3621 v |= m; 3622 v |= m;
3622 if ((v & (v + 1)) == 0) 3623 if ((v & (v + 1)) == 0)
3623 return v + 1; 3624 return v + 1;
3624 } 3625 }
3625 ASSERT(0); 3626 ASSERT(0);
3626 return( 0 ); 3627 return( 0 );
3627 } 3628 }
3628 3629
3629 #ifdef XFS_ILOCK_TRACE 3630 #ifdef XFS_ILOCK_TRACE
3630 ktrace_t *xfs_ilock_trace_buf; 3631 ktrace_t *xfs_ilock_trace_buf;
3631 3632
3632 void 3633 void
3633 xfs_ilock_trace(xfs_inode_t *ip, int lock, unsigned int lockflags, inst_t *ra) 3634 xfs_ilock_trace(xfs_inode_t *ip, int lock, unsigned int lockflags, inst_t *ra)
3634 { 3635 {
3635 ktrace_enter(ip->i_lock_trace, 3636 ktrace_enter(ip->i_lock_trace,
3636 (void *)ip, 3637 (void *)ip,
3637 (void *)(unsigned long)lock, /* 1 = LOCK, 3=UNLOCK, etc */ 3638 (void *)(unsigned long)lock, /* 1 = LOCK, 3=UNLOCK, etc */
3638 (void *)(unsigned long)lockflags, /* XFS_ILOCK_EXCL etc */ 3639 (void *)(unsigned long)lockflags, /* XFS_ILOCK_EXCL etc */
3639 (void *)ra, /* caller of ilock */ 3640 (void *)ra, /* caller of ilock */
3640 (void *)(unsigned long)current_cpu(), 3641 (void *)(unsigned long)current_cpu(),
3641 (void *)(unsigned long)current_pid(), 3642 (void *)(unsigned long)current_pid(),
3642 NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL); 3643 NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL);
3643 } 3644 }
3644 #endif 3645 #endif
3645 3646
3646 /* 3647 /*
3647 * Return a pointer to the extent record at file index idx. 3648 * Return a pointer to the extent record at file index idx.
3648 */ 3649 */
3649 xfs_bmbt_rec_t * 3650 xfs_bmbt_rec_t *
3650 xfs_iext_get_ext( 3651 xfs_iext_get_ext(
3651 xfs_ifork_t *ifp, /* inode fork pointer */ 3652 xfs_ifork_t *ifp, /* inode fork pointer */
3652 xfs_extnum_t idx) /* index of target extent */ 3653 xfs_extnum_t idx) /* index of target extent */
3653 { 3654 {
3654 ASSERT(idx >= 0); 3655 ASSERT(idx >= 0);
3655 if ((ifp->if_flags & XFS_IFEXTIREC) && (idx == 0)) { 3656 if ((ifp->if_flags & XFS_IFEXTIREC) && (idx == 0)) {
3656 return ifp->if_u1.if_ext_irec->er_extbuf; 3657 return ifp->if_u1.if_ext_irec->er_extbuf;
3657 } else if (ifp->if_flags & XFS_IFEXTIREC) { 3658 } else if (ifp->if_flags & XFS_IFEXTIREC) {
3658 xfs_ext_irec_t *erp; /* irec pointer */ 3659 xfs_ext_irec_t *erp; /* irec pointer */
3659 int erp_idx = 0; /* irec index */ 3660 int erp_idx = 0; /* irec index */
3660 xfs_extnum_t page_idx = idx; /* ext index in target list */ 3661 xfs_extnum_t page_idx = idx; /* ext index in target list */
3661 3662
3662 erp = xfs_iext_idx_to_irec(ifp, &page_idx, &erp_idx, 0); 3663 erp = xfs_iext_idx_to_irec(ifp, &page_idx, &erp_idx, 0);
3663 return &erp->er_extbuf[page_idx]; 3664 return &erp->er_extbuf[page_idx];
3664 } else if (ifp->if_bytes) { 3665 } else if (ifp->if_bytes) {
3665 return &ifp->if_u1.if_extents[idx]; 3666 return &ifp->if_u1.if_extents[idx];
3666 } else { 3667 } else {
3667 return NULL; 3668 return NULL;
3668 } 3669 }
3669 } 3670 }
3670 3671
3671 /* 3672 /*
3672 * Insert new item(s) into the extent records for incore inode 3673 * Insert new item(s) into the extent records for incore inode
3673 * fork 'ifp'. 'count' new items are inserted at index 'idx'. 3674 * fork 'ifp'. 'count' new items are inserted at index 'idx'.
3674 */ 3675 */
3675 void 3676 void
3676 xfs_iext_insert( 3677 xfs_iext_insert(
3677 xfs_ifork_t *ifp, /* inode fork pointer */ 3678 xfs_ifork_t *ifp, /* inode fork pointer */
3678 xfs_extnum_t idx, /* starting index of new items */ 3679 xfs_extnum_t idx, /* starting index of new items */
3679 xfs_extnum_t count, /* number of inserted items */ 3680 xfs_extnum_t count, /* number of inserted items */
3680 xfs_bmbt_irec_t *new) /* items to insert */ 3681 xfs_bmbt_irec_t *new) /* items to insert */
3681 { 3682 {
3682 xfs_bmbt_rec_t *ep; /* extent record pointer */ 3683 xfs_bmbt_rec_t *ep; /* extent record pointer */
3683 xfs_extnum_t i; /* extent record index */ 3684 xfs_extnum_t i; /* extent record index */
3684 3685
3685 ASSERT(ifp->if_flags & XFS_IFEXTENTS); 3686 ASSERT(ifp->if_flags & XFS_IFEXTENTS);
3686 xfs_iext_add(ifp, idx, count); 3687 xfs_iext_add(ifp, idx, count);
3687 for (i = idx; i < idx + count; i++, new++) { 3688 for (i = idx; i < idx + count; i++, new++) {
3688 ep = xfs_iext_get_ext(ifp, i); 3689 ep = xfs_iext_get_ext(ifp, i);
3689 xfs_bmbt_set_all(ep, new); 3690 xfs_bmbt_set_all(ep, new);
3690 } 3691 }
3691 } 3692 }
3692 3693
3693 /* 3694 /*
3694 * This is called when the amount of space required for incore file 3695 * This is called when the amount of space required for incore file
3695 * extents needs to be increased. The ext_diff parameter stores the 3696 * extents needs to be increased. The ext_diff parameter stores the
3696 * number of new extents being added and the idx parameter contains 3697 * number of new extents being added and the idx parameter contains
3697 * the extent index where the new extents will be added. If the new 3698 * the extent index where the new extents will be added. If the new
3698 * extents are being appended, then we just need to (re)allocate and 3699 * extents are being appended, then we just need to (re)allocate and
3699 * initialize the space. Otherwise, if the new extents are being 3700 * initialize the space. Otherwise, if the new extents are being
3700 * inserted into the middle of the existing entries, a bit more work 3701 * inserted into the middle of the existing entries, a bit more work
3701 * is required to make room for the new extents to be inserted. The 3702 * is required to make room for the new extents to be inserted. The
3702 * caller is responsible for filling in the new extent entries upon 3703 * caller is responsible for filling in the new extent entries upon
3703 * return. 3704 * return.
3704 */ 3705 */
3705 void 3706 void
3706 xfs_iext_add( 3707 xfs_iext_add(
3707 xfs_ifork_t *ifp, /* inode fork pointer */ 3708 xfs_ifork_t *ifp, /* inode fork pointer */
3708 xfs_extnum_t idx, /* index to begin adding exts */ 3709 xfs_extnum_t idx, /* index to begin adding exts */
3709 int ext_diff) /* number of extents to add */ 3710 int ext_diff) /* number of extents to add */
3710 { 3711 {
3711 int byte_diff; /* new bytes being added */ 3712 int byte_diff; /* new bytes being added */
3712 int new_size; /* size of extents after adding */ 3713 int new_size; /* size of extents after adding */
3713 xfs_extnum_t nextents; /* number of extents in file */ 3714 xfs_extnum_t nextents; /* number of extents in file */
3714 3715
3715 nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); 3716 nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
3716 ASSERT((idx >= 0) && (idx <= nextents)); 3717 ASSERT((idx >= 0) && (idx <= nextents));
3717 byte_diff = ext_diff * sizeof(xfs_bmbt_rec_t); 3718 byte_diff = ext_diff * sizeof(xfs_bmbt_rec_t);
3718 new_size = ifp->if_bytes + byte_diff; 3719 new_size = ifp->if_bytes + byte_diff;
3719 /* 3720 /*
3720 * If the new number of extents (nextents + ext_diff) 3721 * If the new number of extents (nextents + ext_diff)
3721 * fits inside the inode, then continue to use the inline 3722 * fits inside the inode, then continue to use the inline
3722 * extent buffer. 3723 * extent buffer.
3723 */ 3724 */
3724 if (nextents + ext_diff <= XFS_INLINE_EXTS) { 3725 if (nextents + ext_diff <= XFS_INLINE_EXTS) {
3725 if (idx < nextents) { 3726 if (idx < nextents) {
3726 memmove(&ifp->if_u2.if_inline_ext[idx + ext_diff], 3727 memmove(&ifp->if_u2.if_inline_ext[idx + ext_diff],
3727 &ifp->if_u2.if_inline_ext[idx], 3728 &ifp->if_u2.if_inline_ext[idx],
3728 (nextents - idx) * sizeof(xfs_bmbt_rec_t)); 3729 (nextents - idx) * sizeof(xfs_bmbt_rec_t));
3729 memset(&ifp->if_u2.if_inline_ext[idx], 0, byte_diff); 3730 memset(&ifp->if_u2.if_inline_ext[idx], 0, byte_diff);
3730 } 3731 }
3731 ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext; 3732 ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext;
3732 ifp->if_real_bytes = 0; 3733 ifp->if_real_bytes = 0;
3733 ifp->if_lastex = nextents + ext_diff; 3734 ifp->if_lastex = nextents + ext_diff;
3734 } 3735 }
3735 /* 3736 /*
3736 * Otherwise use a linear (direct) extent list. 3737 * Otherwise use a linear (direct) extent list.
3737 * If the extents are currently inside the inode, 3738 * If the extents are currently inside the inode,
3738 * xfs_iext_realloc_direct will switch us from 3739 * xfs_iext_realloc_direct will switch us from
3739 * inline to direct extent allocation mode. 3740 * inline to direct extent allocation mode.
3740 */ 3741 */
3741 else if (nextents + ext_diff <= XFS_LINEAR_EXTS) { 3742 else if (nextents + ext_diff <= XFS_LINEAR_EXTS) {
3742 xfs_iext_realloc_direct(ifp, new_size); 3743 xfs_iext_realloc_direct(ifp, new_size);
3743 if (idx < nextents) { 3744 if (idx < nextents) {
3744 memmove(&ifp->if_u1.if_extents[idx + ext_diff], 3745 memmove(&ifp->if_u1.if_extents[idx + ext_diff],
3745 &ifp->if_u1.if_extents[idx], 3746 &ifp->if_u1.if_extents[idx],
3746 (nextents - idx) * sizeof(xfs_bmbt_rec_t)); 3747 (nextents - idx) * sizeof(xfs_bmbt_rec_t));
3747 memset(&ifp->if_u1.if_extents[idx], 0, byte_diff); 3748 memset(&ifp->if_u1.if_extents[idx], 0, byte_diff);
3748 } 3749 }
3749 } 3750 }
3750 /* Indirection array */ 3751 /* Indirection array */
3751 else { 3752 else {
3752 xfs_ext_irec_t *erp; 3753 xfs_ext_irec_t *erp;
3753 int erp_idx = 0; 3754 int erp_idx = 0;
3754 int page_idx = idx; 3755 int page_idx = idx;
3755 3756
3756 ASSERT(nextents + ext_diff > XFS_LINEAR_EXTS); 3757 ASSERT(nextents + ext_diff > XFS_LINEAR_EXTS);
3757 if (ifp->if_flags & XFS_IFEXTIREC) { 3758 if (ifp->if_flags & XFS_IFEXTIREC) {
3758 erp = xfs_iext_idx_to_irec(ifp, &page_idx, &erp_idx, 1); 3759 erp = xfs_iext_idx_to_irec(ifp, &page_idx, &erp_idx, 1);
3759 } else { 3760 } else {
3760 xfs_iext_irec_init(ifp); 3761 xfs_iext_irec_init(ifp);
3761 ASSERT(ifp->if_flags & XFS_IFEXTIREC); 3762 ASSERT(ifp->if_flags & XFS_IFEXTIREC);
3762 erp = ifp->if_u1.if_ext_irec; 3763 erp = ifp->if_u1.if_ext_irec;
3763 } 3764 }
3764 /* Extents fit in target extent page */ 3765 /* Extents fit in target extent page */
3765 if (erp && erp->er_extcount + ext_diff <= XFS_LINEAR_EXTS) { 3766 if (erp && erp->er_extcount + ext_diff <= XFS_LINEAR_EXTS) {
3766 if (page_idx < erp->er_extcount) { 3767 if (page_idx < erp->er_extcount) {
3767 memmove(&erp->er_extbuf[page_idx + ext_diff], 3768 memmove(&erp->er_extbuf[page_idx + ext_diff],
3768 &erp->er_extbuf[page_idx], 3769 &erp->er_extbuf[page_idx],
3769 (erp->er_extcount - page_idx) * 3770 (erp->er_extcount - page_idx) *
3770 sizeof(xfs_bmbt_rec_t)); 3771 sizeof(xfs_bmbt_rec_t));
3771 memset(&erp->er_extbuf[page_idx], 0, byte_diff); 3772 memset(&erp->er_extbuf[page_idx], 0, byte_diff);
3772 } 3773 }
3773 erp->er_extcount += ext_diff; 3774 erp->er_extcount += ext_diff;
3774 xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, ext_diff); 3775 xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, ext_diff);
3775 } 3776 }
3776 /* Insert a new extent page */ 3777 /* Insert a new extent page */
3777 else if (erp) { 3778 else if (erp) {
3778 xfs_iext_add_indirect_multi(ifp, 3779 xfs_iext_add_indirect_multi(ifp,
3779 erp_idx, page_idx, ext_diff); 3780 erp_idx, page_idx, ext_diff);
3780 } 3781 }
3781 /* 3782 /*
3782 * If extent(s) are being appended to the last page in 3783 * If extent(s) are being appended to the last page in
3783 * the indirection array and the new extent(s) don't fit 3784 * the indirection array and the new extent(s) don't fit
3784 * in the page, then erp is NULL and erp_idx is set to 3785 * in the page, then erp is NULL and erp_idx is set to
3785 * the next index needed in the indirection array. 3786 * the next index needed in the indirection array.
3786 */ 3787 */
3787 else { 3788 else {
3788 int count = ext_diff; 3789 int count = ext_diff;
3789 3790
3790 while (count) { 3791 while (count) {
3791 erp = xfs_iext_irec_new(ifp, erp_idx); 3792 erp = xfs_iext_irec_new(ifp, erp_idx);
3792 erp->er_extcount = count; 3793 erp->er_extcount = count;
3793 count -= MIN(count, (int)XFS_LINEAR_EXTS); 3794 count -= MIN(count, (int)XFS_LINEAR_EXTS);
3794 if (count) { 3795 if (count) {
3795 erp_idx++; 3796 erp_idx++;
3796 } 3797 }
3797 } 3798 }
3798 } 3799 }
3799 } 3800 }
3800 ifp->if_bytes = new_size; 3801 ifp->if_bytes = new_size;
3801 } 3802 }
3802 3803
3803 /* 3804 /*
3804 * This is called when incore extents are being added to the indirection 3805 * This is called when incore extents are being added to the indirection
3805 * array and the new extents do not fit in the target extent list. The 3806 * array and the new extents do not fit in the target extent list. The
3806 * erp_idx parameter contains the irec index for the target extent list 3807 * erp_idx parameter contains the irec index for the target extent list
3807 * in the indirection array, and the idx parameter contains the extent 3808 * in the indirection array, and the idx parameter contains the extent
3808 * index within the list. The number of extents being added is stored 3809 * index within the list. The number of extents being added is stored
3809 * in the count parameter. 3810 * in the count parameter.
3810 * 3811 *
3811 * |-------| |-------| 3812 * |-------| |-------|
3812 * | | | | idx - number of extents before idx 3813 * | | | | idx - number of extents before idx
3813 * | idx | | count | 3814 * | idx | | count |
3814 * | | | | count - number of extents being inserted at idx 3815 * | | | | count - number of extents being inserted at idx
3815 * |-------| |-------| 3816 * |-------| |-------|
3816 * | count | | nex2 | nex2 - number of extents after idx + count 3817 * | count | | nex2 | nex2 - number of extents after idx + count
3817 * |-------| |-------| 3818 * |-------| |-------|
3818 */ 3819 */
3819 void 3820 void
3820 xfs_iext_add_indirect_multi( 3821 xfs_iext_add_indirect_multi(
3821 xfs_ifork_t *ifp, /* inode fork pointer */ 3822 xfs_ifork_t *ifp, /* inode fork pointer */
3822 int erp_idx, /* target extent irec index */ 3823 int erp_idx, /* target extent irec index */
3823 xfs_extnum_t idx, /* index within target list */ 3824 xfs_extnum_t idx, /* index within target list */
3824 int count) /* new extents being added */ 3825 int count) /* new extents being added */
3825 { 3826 {
3826 int byte_diff; /* new bytes being added */ 3827 int byte_diff; /* new bytes being added */
3827 xfs_ext_irec_t *erp; /* pointer to irec entry */ 3828 xfs_ext_irec_t *erp; /* pointer to irec entry */
3828 xfs_extnum_t ext_diff; /* number of extents to add */ 3829 xfs_extnum_t ext_diff; /* number of extents to add */
3829 xfs_extnum_t ext_cnt; /* new extents still needed */ 3830 xfs_extnum_t ext_cnt; /* new extents still needed */
3830 xfs_extnum_t nex2; /* extents after idx + count */ 3831 xfs_extnum_t nex2; /* extents after idx + count */
3831 xfs_bmbt_rec_t *nex2_ep = NULL; /* temp list for nex2 extents */ 3832 xfs_bmbt_rec_t *nex2_ep = NULL; /* temp list for nex2 extents */
3832 int nlists; /* number of irec's (lists) */ 3833 int nlists; /* number of irec's (lists) */
3833 3834
3834 ASSERT(ifp->if_flags & XFS_IFEXTIREC); 3835 ASSERT(ifp->if_flags & XFS_IFEXTIREC);
3835 erp = &ifp->if_u1.if_ext_irec[erp_idx]; 3836 erp = &ifp->if_u1.if_ext_irec[erp_idx];
3836 nex2 = erp->er_extcount - idx; 3837 nex2 = erp->er_extcount - idx;
3837 nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; 3838 nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
3838 3839
3839 /* 3840 /*
3840 * Save second part of target extent list 3841 * Save second part of target extent list
3841 * (all extents past */ 3842 * (all extents past */
3842 if (nex2) { 3843 if (nex2) {
3843 byte_diff = nex2 * sizeof(xfs_bmbt_rec_t); 3844 byte_diff = nex2 * sizeof(xfs_bmbt_rec_t);
3844 nex2_ep = (xfs_bmbt_rec_t *) kmem_alloc(byte_diff, KM_SLEEP); 3845 nex2_ep = (xfs_bmbt_rec_t *) kmem_alloc(byte_diff, KM_SLEEP);
3845 memmove(nex2_ep, &erp->er_extbuf[idx], byte_diff); 3846 memmove(nex2_ep, &erp->er_extbuf[idx], byte_diff);
3846 erp->er_extcount -= nex2; 3847 erp->er_extcount -= nex2;
3847 xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, -nex2); 3848 xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, -nex2);
3848 memset(&erp->er_extbuf[idx], 0, byte_diff); 3849 memset(&erp->er_extbuf[idx], 0, byte_diff);
3849 } 3850 }
3850 3851
3851 /* 3852 /*
3852 * Add the new extents to the end of the target 3853 * Add the new extents to the end of the target
3853 * list, then allocate new irec record(s) and 3854 * list, then allocate new irec record(s) and
3854 * extent buffer(s) as needed to store the rest 3855 * extent buffer(s) as needed to store the rest
3855 * of the new extents. 3856 * of the new extents.
3856 */ 3857 */
3857 ext_cnt = count; 3858 ext_cnt = count;
3858 ext_diff = MIN(ext_cnt, (int)XFS_LINEAR_EXTS - erp->er_extcount); 3859 ext_diff = MIN(ext_cnt, (int)XFS_LINEAR_EXTS - erp->er_extcount);
3859 if (ext_diff) { 3860 if (ext_diff) {
3860 erp->er_extcount += ext_diff; 3861 erp->er_extcount += ext_diff;
3861 xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, ext_diff); 3862 xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, ext_diff);
3862 ext_cnt -= ext_diff; 3863 ext_cnt -= ext_diff;
3863 } 3864 }
3864 while (ext_cnt) { 3865 while (ext_cnt) {
3865 erp_idx++; 3866 erp_idx++;
3866 erp = xfs_iext_irec_new(ifp, erp_idx); 3867 erp = xfs_iext_irec_new(ifp, erp_idx);
3867 ext_diff = MIN(ext_cnt, (int)XFS_LINEAR_EXTS); 3868 ext_diff = MIN(ext_cnt, (int)XFS_LINEAR_EXTS);
3868 erp->er_extcount = ext_diff; 3869 erp->er_extcount = ext_diff;
3869 xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, ext_diff); 3870 xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, ext_diff);
3870 ext_cnt -= ext_diff; 3871 ext_cnt -= ext_diff;
3871 } 3872 }
3872 3873
3873 /* Add nex2 extents back to indirection array */ 3874 /* Add nex2 extents back to indirection array */
3874 if (nex2) { 3875 if (nex2) {
3875 xfs_extnum_t ext_avail; 3876 xfs_extnum_t ext_avail;
3876 int i; 3877 int i;
3877 3878
3878 byte_diff = nex2 * sizeof(xfs_bmbt_rec_t); 3879 byte_diff = nex2 * sizeof(xfs_bmbt_rec_t);
3879 ext_avail = XFS_LINEAR_EXTS - erp->er_extcount; 3880 ext_avail = XFS_LINEAR_EXTS - erp->er_extcount;
3880 i = 0; 3881 i = 0;
3881 /* 3882 /*
3882 * If nex2 extents fit in the current page, append 3883 * If nex2 extents fit in the current page, append
3883 * nex2_ep after the new extents. 3884 * nex2_ep after the new extents.
3884 */ 3885 */
3885 if (nex2 <= ext_avail) { 3886 if (nex2 <= ext_avail) {
3886 i = erp->er_extcount; 3887 i = erp->er_extcount;
3887 } 3888 }
3888 /* 3889 /*
3889 * Otherwise, check if space is available in the 3890 * Otherwise, check if space is available in the
3890 * next page. 3891 * next page.
3891 */ 3892 */
3892 else if ((erp_idx < nlists - 1) && 3893 else if ((erp_idx < nlists - 1) &&
3893 (nex2 <= (ext_avail = XFS_LINEAR_EXTS - 3894 (nex2 <= (ext_avail = XFS_LINEAR_EXTS -
3894 ifp->if_u1.if_ext_irec[erp_idx+1].er_extcount))) { 3895 ifp->if_u1.if_ext_irec[erp_idx+1].er_extcount))) {
3895 erp_idx++; 3896 erp_idx++;
3896 erp++; 3897 erp++;
3897 /* Create a hole for nex2 extents */ 3898 /* Create a hole for nex2 extents */
3898 memmove(&erp->er_extbuf[nex2], erp->er_extbuf, 3899 memmove(&erp->er_extbuf[nex2], erp->er_extbuf,
3899 erp->er_extcount * sizeof(xfs_bmbt_rec_t)); 3900 erp->er_extcount * sizeof(xfs_bmbt_rec_t));
3900 } 3901 }
3901 /* 3902 /*
3902 * Final choice, create a new extent page for 3903 * Final choice, create a new extent page for
3903 * nex2 extents. 3904 * nex2 extents.
3904 */ 3905 */
3905 else { 3906 else {
3906 erp_idx++; 3907 erp_idx++;
3907 erp = xfs_iext_irec_new(ifp, erp_idx); 3908 erp = xfs_iext_irec_new(ifp, erp_idx);
3908 } 3909 }
3909 memmove(&erp->er_extbuf[i], nex2_ep, byte_diff); 3910 memmove(&erp->er_extbuf[i], nex2_ep, byte_diff);
3910 kmem_free(nex2_ep, byte_diff); 3911 kmem_free(nex2_ep, byte_diff);
3911 erp->er_extcount += nex2; 3912 erp->er_extcount += nex2;
3912 xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, nex2); 3913 xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, nex2);
3913 } 3914 }
3914 } 3915 }
3915 3916
3916 /* 3917 /*
3917 * This is called when the amount of space required for incore file 3918 * This is called when the amount of space required for incore file
3918 * extents needs to be decreased. The ext_diff parameter stores the 3919 * extents needs to be decreased. The ext_diff parameter stores the
3919 * number of extents to be removed and the idx parameter contains 3920 * number of extents to be removed and the idx parameter contains
3920 * the extent index where the extents will be removed from. 3921 * the extent index where the extents will be removed from.
3921 * 3922 *
3922 * If the amount of space needed has decreased below the linear 3923 * If the amount of space needed has decreased below the linear
3923 * limit, XFS_IEXT_BUFSZ, then switch to using the contiguous 3924 * limit, XFS_IEXT_BUFSZ, then switch to using the contiguous
3924 * extent array. Otherwise, use kmem_realloc() to adjust the 3925 * extent array. Otherwise, use kmem_realloc() to adjust the
3925 * size to what is needed. 3926 * size to what is needed.
3926 */ 3927 */
3927 void 3928 void
3928 xfs_iext_remove( 3929 xfs_iext_remove(
3929 xfs_ifork_t *ifp, /* inode fork pointer */ 3930 xfs_ifork_t *ifp, /* inode fork pointer */
3930 xfs_extnum_t idx, /* index to begin removing exts */ 3931 xfs_extnum_t idx, /* index to begin removing exts */
3931 int ext_diff) /* number of extents to remove */ 3932 int ext_diff) /* number of extents to remove */
3932 { 3933 {
3933 xfs_extnum_t nextents; /* number of extents in file */ 3934 xfs_extnum_t nextents; /* number of extents in file */
3934 int new_size; /* size of extents after removal */ 3935 int new_size; /* size of extents after removal */
3935 3936
3936 ASSERT(ext_diff > 0); 3937 ASSERT(ext_diff > 0);
3937 nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); 3938 nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
3938 new_size = (nextents - ext_diff) * sizeof(xfs_bmbt_rec_t); 3939 new_size = (nextents - ext_diff) * sizeof(xfs_bmbt_rec_t);
3939 3940
3940 if (new_size == 0) { 3941 if (new_size == 0) {
3941 xfs_iext_destroy(ifp); 3942 xfs_iext_destroy(ifp);
3942 } else if (ifp->if_flags & XFS_IFEXTIREC) { 3943 } else if (ifp->if_flags & XFS_IFEXTIREC) {
3943 xfs_iext_remove_indirect(ifp, idx, ext_diff); 3944 xfs_iext_remove_indirect(ifp, idx, ext_diff);
3944 } else if (ifp->if_real_bytes) { 3945 } else if (ifp->if_real_bytes) {
3945 xfs_iext_remove_direct(ifp, idx, ext_diff); 3946 xfs_iext_remove_direct(ifp, idx, ext_diff);
3946 } else { 3947 } else {
3947 xfs_iext_remove_inline(ifp, idx, ext_diff); 3948 xfs_iext_remove_inline(ifp, idx, ext_diff);
3948 } 3949 }
3949 ifp->if_bytes = new_size; 3950 ifp->if_bytes = new_size;
3950 } 3951 }
3951 3952
3952 /* 3953 /*
3953 * This removes ext_diff extents from the inline buffer, beginning 3954 * This removes ext_diff extents from the inline buffer, beginning
3954 * at extent index idx. 3955 * at extent index idx.
3955 */ 3956 */
3956 void 3957 void
3957 xfs_iext_remove_inline( 3958 xfs_iext_remove_inline(
3958 xfs_ifork_t *ifp, /* inode fork pointer */ 3959 xfs_ifork_t *ifp, /* inode fork pointer */
3959 xfs_extnum_t idx, /* index to begin removing exts */ 3960 xfs_extnum_t idx, /* index to begin removing exts */
3960 int ext_diff) /* number of extents to remove */ 3961 int ext_diff) /* number of extents to remove */
3961 { 3962 {
3962 int nextents; /* number of extents in file */ 3963 int nextents; /* number of extents in file */
3963 3964
3964 ASSERT(!(ifp->if_flags & XFS_IFEXTIREC)); 3965 ASSERT(!(ifp->if_flags & XFS_IFEXTIREC));
3965 ASSERT(idx < XFS_INLINE_EXTS); 3966 ASSERT(idx < XFS_INLINE_EXTS);
3966 nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); 3967 nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
3967 ASSERT(((nextents - ext_diff) > 0) && 3968 ASSERT(((nextents - ext_diff) > 0) &&
3968 (nextents - ext_diff) < XFS_INLINE_EXTS); 3969 (nextents - ext_diff) < XFS_INLINE_EXTS);
3969 3970
3970 if (idx + ext_diff < nextents) { 3971 if (idx + ext_diff < nextents) {
3971 memmove(&ifp->if_u2.if_inline_ext[idx], 3972 memmove(&ifp->if_u2.if_inline_ext[idx],
3972 &ifp->if_u2.if_inline_ext[idx + ext_diff], 3973 &ifp->if_u2.if_inline_ext[idx + ext_diff],
3973 (nextents - (idx + ext_diff)) * 3974 (nextents - (idx + ext_diff)) *
3974 sizeof(xfs_bmbt_rec_t)); 3975 sizeof(xfs_bmbt_rec_t));
3975 memset(&ifp->if_u2.if_inline_ext[nextents - ext_diff], 3976 memset(&ifp->if_u2.if_inline_ext[nextents - ext_diff],
3976 0, ext_diff * sizeof(xfs_bmbt_rec_t)); 3977 0, ext_diff * sizeof(xfs_bmbt_rec_t));
3977 } else { 3978 } else {
3978 memset(&ifp->if_u2.if_inline_ext[idx], 0, 3979 memset(&ifp->if_u2.if_inline_ext[idx], 0,
3979 ext_diff * sizeof(xfs_bmbt_rec_t)); 3980 ext_diff * sizeof(xfs_bmbt_rec_t));
3980 } 3981 }
3981 } 3982 }
3982 3983
3983 /* 3984 /*
3984 * This removes ext_diff extents from a linear (direct) extent list, 3985 * This removes ext_diff extents from a linear (direct) extent list,
3985 * beginning at extent index idx. If the extents are being removed 3986 * beginning at extent index idx. If the extents are being removed
3986 * from the end of the list (ie. truncate) then we just need to re- 3987 * from the end of the list (ie. truncate) then we just need to re-
3987 * allocate the list to remove the extra space. Otherwise, if the 3988 * allocate the list to remove the extra space. Otherwise, if the
3988 * extents are being removed from the middle of the existing extent 3989 * extents are being removed from the middle of the existing extent
3989 * entries, then we first need to move the extent records beginning 3990 * entries, then we first need to move the extent records beginning
3990 * at idx + ext_diff up in the list to overwrite the records being 3991 * at idx + ext_diff up in the list to overwrite the records being
3991 * removed, then remove the extra space via kmem_realloc. 3992 * removed, then remove the extra space via kmem_realloc.
3992 */ 3993 */
3993 void 3994 void
3994 xfs_iext_remove_direct( 3995 xfs_iext_remove_direct(
3995 xfs_ifork_t *ifp, /* inode fork pointer */ 3996 xfs_ifork_t *ifp, /* inode fork pointer */
3996 xfs_extnum_t idx, /* index to begin removing exts */ 3997 xfs_extnum_t idx, /* index to begin removing exts */
3997 int ext_diff) /* number of extents to remove */ 3998 int ext_diff) /* number of extents to remove */
3998 { 3999 {
3999 xfs_extnum_t nextents; /* number of extents in file */ 4000 xfs_extnum_t nextents; /* number of extents in file */
4000 int new_size; /* size of extents after removal */ 4001 int new_size; /* size of extents after removal */
4001 4002
4002 ASSERT(!(ifp->if_flags & XFS_IFEXTIREC)); 4003 ASSERT(!(ifp->if_flags & XFS_IFEXTIREC));
4003 new_size = ifp->if_bytes - 4004 new_size = ifp->if_bytes -
4004 (ext_diff * sizeof(xfs_bmbt_rec_t)); 4005 (ext_diff * sizeof(xfs_bmbt_rec_t));
4005 nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); 4006 nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
4006 4007
4007 if (new_size == 0) { 4008 if (new_size == 0) {
4008 xfs_iext_destroy(ifp); 4009 xfs_iext_destroy(ifp);
4009 return; 4010 return;
4010 } 4011 }
4011 /* Move extents up in the list (if needed) */ 4012 /* Move extents up in the list (if needed) */
4012 if (idx + ext_diff < nextents) { 4013 if (idx + ext_diff < nextents) {
4013 memmove(&ifp->if_u1.if_extents[idx], 4014 memmove(&ifp->if_u1.if_extents[idx],
4014 &ifp->if_u1.if_extents[idx + ext_diff], 4015 &ifp->if_u1.if_extents[idx + ext_diff],
4015 (nextents - (idx + ext_diff)) * 4016 (nextents - (idx + ext_diff)) *
4016 sizeof(xfs_bmbt_rec_t)); 4017 sizeof(xfs_bmbt_rec_t));
4017 } 4018 }
4018 memset(&ifp->if_u1.if_extents[nextents - ext_diff], 4019 memset(&ifp->if_u1.if_extents[nextents - ext_diff],
4019 0, ext_diff * sizeof(xfs_bmbt_rec_t)); 4020 0, ext_diff * sizeof(xfs_bmbt_rec_t));
4020 /* 4021 /*
4021 * Reallocate the direct extent list. If the extents 4022 * Reallocate the direct extent list. If the extents
4022 * will fit inside the inode then xfs_iext_realloc_direct 4023 * will fit inside the inode then xfs_iext_realloc_direct
4023 * will switch from direct to inline extent allocation 4024 * will switch from direct to inline extent allocation
4024 * mode for us. 4025 * mode for us.
4025 */ 4026 */
4026 xfs_iext_realloc_direct(ifp, new_size); 4027 xfs_iext_realloc_direct(ifp, new_size);
4027 ifp->if_bytes = new_size; 4028 ifp->if_bytes = new_size;
4028 } 4029 }
4029 4030
4030 /* 4031 /*
4031 * This is called when incore extents are being removed from the 4032 * This is called when incore extents are being removed from the
4032 * indirection array and the extents being removed span multiple extent 4033 * indirection array and the extents being removed span multiple extent
4033 * buffers. The idx parameter contains the file extent index where we 4034 * buffers. The idx parameter contains the file extent index where we
4034 * want to begin removing extents, and the count parameter contains 4035 * want to begin removing extents, and the count parameter contains
4035 * how many extents need to be removed. 4036 * how many extents need to be removed.
4036 * 4037 *
4037 * |-------| |-------| 4038 * |-------| |-------|
4038 * | nex1 | | | nex1 - number of extents before idx 4039 * | nex1 | | | nex1 - number of extents before idx
4039 * |-------| | count | 4040 * |-------| | count |
4040 * | | | | count - number of extents being removed at idx 4041 * | | | | count - number of extents being removed at idx
4041 * | count | |-------| 4042 * | count | |-------|
4042 * | | | nex2 | nex2 - number of extents after idx + count 4043 * | | | nex2 | nex2 - number of extents after idx + count
4043 * |-------| |-------| 4044 * |-------| |-------|
4044 */ 4045 */
4045 void 4046 void
4046 xfs_iext_remove_indirect( 4047 xfs_iext_remove_indirect(
4047 xfs_ifork_t *ifp, /* inode fork pointer */ 4048 xfs_ifork_t *ifp, /* inode fork pointer */
4048 xfs_extnum_t idx, /* index to begin removing extents */ 4049 xfs_extnum_t idx, /* index to begin removing extents */
4049 int count) /* number of extents to remove */ 4050 int count) /* number of extents to remove */
4050 { 4051 {
4051 xfs_ext_irec_t *erp; /* indirection array pointer */ 4052 xfs_ext_irec_t *erp; /* indirection array pointer */
4052 int erp_idx = 0; /* indirection array index */ 4053 int erp_idx = 0; /* indirection array index */
4053 xfs_extnum_t ext_cnt; /* extents left to remove */ 4054 xfs_extnum_t ext_cnt; /* extents left to remove */
4054 xfs_extnum_t ext_diff; /* extents to remove in current list */ 4055 xfs_extnum_t ext_diff; /* extents to remove in current list */
4055 xfs_extnum_t nex1; /* number of extents before idx */ 4056 xfs_extnum_t nex1; /* number of extents before idx */
4056 xfs_extnum_t nex2; /* extents after idx + count */ 4057 xfs_extnum_t nex2; /* extents after idx + count */
4057 int nlists; /* entries in indirection array */ 4058 int nlists; /* entries in indirection array */
4058 int page_idx = idx; /* index in target extent list */ 4059 int page_idx = idx; /* index in target extent list */
4059 4060
4060 ASSERT(ifp->if_flags & XFS_IFEXTIREC); 4061 ASSERT(ifp->if_flags & XFS_IFEXTIREC);
4061 erp = xfs_iext_idx_to_irec(ifp, &page_idx, &erp_idx, 0); 4062 erp = xfs_iext_idx_to_irec(ifp, &page_idx, &erp_idx, 0);
4062 ASSERT(erp != NULL); 4063 ASSERT(erp != NULL);
4063 nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; 4064 nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
4064 nex1 = page_idx; 4065 nex1 = page_idx;
4065 ext_cnt = count; 4066 ext_cnt = count;
4066 while (ext_cnt) { 4067 while (ext_cnt) {
4067 nex2 = MAX((erp->er_extcount - (nex1 + ext_cnt)), 0); 4068 nex2 = MAX((erp->er_extcount - (nex1 + ext_cnt)), 0);
4068 ext_diff = MIN(ext_cnt, (erp->er_extcount - nex1)); 4069 ext_diff = MIN(ext_cnt, (erp->er_extcount - nex1));
4069 /* 4070 /*
4070 * Check for deletion of entire list; 4071 * Check for deletion of entire list;
4071 * xfs_iext_irec_remove() updates extent offsets. 4072 * xfs_iext_irec_remove() updates extent offsets.
4072 */ 4073 */
4073 if (ext_diff == erp->er_extcount) { 4074 if (ext_diff == erp->er_extcount) {
4074 xfs_iext_irec_remove(ifp, erp_idx); 4075 xfs_iext_irec_remove(ifp, erp_idx);
4075 ext_cnt -= ext_diff; 4076 ext_cnt -= ext_diff;
4076 nex1 = 0; 4077 nex1 = 0;
4077 if (ext_cnt) { 4078 if (ext_cnt) {
4078 ASSERT(erp_idx < ifp->if_real_bytes / 4079 ASSERT(erp_idx < ifp->if_real_bytes /
4079 XFS_IEXT_BUFSZ); 4080 XFS_IEXT_BUFSZ);
4080 erp = &ifp->if_u1.if_ext_irec[erp_idx]; 4081 erp = &ifp->if_u1.if_ext_irec[erp_idx];
4081 nex1 = 0; 4082 nex1 = 0;
4082 continue; 4083 continue;
4083 } else { 4084 } else {
4084 break; 4085 break;
4085 } 4086 }
4086 } 4087 }
4087 /* Move extents up (if needed) */ 4088 /* Move extents up (if needed) */
4088 if (nex2) { 4089 if (nex2) {
4089 memmove(&erp->er_extbuf[nex1], 4090 memmove(&erp->er_extbuf[nex1],
4090 &erp->er_extbuf[nex1 + ext_diff], 4091 &erp->er_extbuf[nex1 + ext_diff],
4091 nex2 * sizeof(xfs_bmbt_rec_t)); 4092 nex2 * sizeof(xfs_bmbt_rec_t));
4092 } 4093 }
4093 /* Zero out rest of page */ 4094 /* Zero out rest of page */
4094 memset(&erp->er_extbuf[nex1 + nex2], 0, (XFS_IEXT_BUFSZ - 4095 memset(&erp->er_extbuf[nex1 + nex2], 0, (XFS_IEXT_BUFSZ -
4095 ((nex1 + nex2) * sizeof(xfs_bmbt_rec_t)))); 4096 ((nex1 + nex2) * sizeof(xfs_bmbt_rec_t))));
4096 /* Update remaining counters */ 4097 /* Update remaining counters */
4097 erp->er_extcount -= ext_diff; 4098 erp->er_extcount -= ext_diff;
4098 xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, -ext_diff); 4099 xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, -ext_diff);
4099 ext_cnt -= ext_diff; 4100 ext_cnt -= ext_diff;
4100 nex1 = 0; 4101 nex1 = 0;
4101 erp_idx++; 4102 erp_idx++;
4102 erp++; 4103 erp++;
4103 } 4104 }
4104 ifp->if_bytes -= count * sizeof(xfs_bmbt_rec_t); 4105 ifp->if_bytes -= count * sizeof(xfs_bmbt_rec_t);
4105 xfs_iext_irec_compact(ifp); 4106 xfs_iext_irec_compact(ifp);
4106 } 4107 }
4107 4108
4108 /* 4109 /*
4109 * Create, destroy, or resize a linear (direct) block of extents. 4110 * Create, destroy, or resize a linear (direct) block of extents.
4110 */ 4111 */
4111 void 4112 void
4112 xfs_iext_realloc_direct( 4113 xfs_iext_realloc_direct(
4113 xfs_ifork_t *ifp, /* inode fork pointer */ 4114 xfs_ifork_t *ifp, /* inode fork pointer */
4114 int new_size) /* new size of extents */ 4115 int new_size) /* new size of extents */
4115 { 4116 {
4116 int rnew_size; /* real new size of extents */ 4117 int rnew_size; /* real new size of extents */
4117 4118
4118 rnew_size = new_size; 4119 rnew_size = new_size;
4119 4120
4120 ASSERT(!(ifp->if_flags & XFS_IFEXTIREC) || 4121 ASSERT(!(ifp->if_flags & XFS_IFEXTIREC) ||
4121 ((new_size >= 0) && (new_size <= XFS_IEXT_BUFSZ) && 4122 ((new_size >= 0) && (new_size <= XFS_IEXT_BUFSZ) &&
4122 (new_size != ifp->if_real_bytes))); 4123 (new_size != ifp->if_real_bytes)));
4123 4124
4124 /* Free extent records */ 4125 /* Free extent records */
4125 if (new_size == 0) { 4126 if (new_size == 0) {
4126 xfs_iext_destroy(ifp); 4127 xfs_iext_destroy(ifp);
4127 } 4128 }
4128 /* Resize direct extent list and zero any new bytes */ 4129 /* Resize direct extent list and zero any new bytes */
4129 else if (ifp->if_real_bytes) { 4130 else if (ifp->if_real_bytes) {
4130 /* Check if extents will fit inside the inode */ 4131 /* Check if extents will fit inside the inode */
4131 if (new_size <= XFS_INLINE_EXTS * sizeof(xfs_bmbt_rec_t)) { 4132 if (new_size <= XFS_INLINE_EXTS * sizeof(xfs_bmbt_rec_t)) {
4132 xfs_iext_direct_to_inline(ifp, new_size / 4133 xfs_iext_direct_to_inline(ifp, new_size /
4133 (uint)sizeof(xfs_bmbt_rec_t)); 4134 (uint)sizeof(xfs_bmbt_rec_t));
4134 ifp->if_bytes = new_size; 4135 ifp->if_bytes = new_size;
4135 return; 4136 return;
4136 } 4137 }
4137 if ((new_size & (new_size - 1)) != 0) { 4138 if ((new_size & (new_size - 1)) != 0) {
4138 rnew_size = xfs_iroundup(new_size); 4139 rnew_size = xfs_iroundup(new_size);
4139 } 4140 }
4140 if (rnew_size != ifp->if_real_bytes) { 4141 if (rnew_size != ifp->if_real_bytes) {
4141 ifp->if_u1.if_extents = (xfs_bmbt_rec_t *) 4142 ifp->if_u1.if_extents = (xfs_bmbt_rec_t *)
4142 kmem_realloc(ifp->if_u1.if_extents, 4143 kmem_realloc(ifp->if_u1.if_extents,
4143 rnew_size, 4144 rnew_size,
4144 ifp->if_real_bytes, 4145 ifp->if_real_bytes,
4145 KM_SLEEP); 4146 KM_SLEEP);
4146 } 4147 }
4147 if (rnew_size > ifp->if_real_bytes) { 4148 if (rnew_size > ifp->if_real_bytes) {
4148 memset(&ifp->if_u1.if_extents[ifp->if_bytes / 4149 memset(&ifp->if_u1.if_extents[ifp->if_bytes /
4149 (uint)sizeof(xfs_bmbt_rec_t)], 0, 4150 (uint)sizeof(xfs_bmbt_rec_t)], 0,
4150 rnew_size - ifp->if_real_bytes); 4151 rnew_size - ifp->if_real_bytes);
4151 } 4152 }
4152 } 4153 }
4153 /* 4154 /*
4154 * Switch from the inline extent buffer to a direct 4155 * Switch from the inline extent buffer to a direct
4155 * extent list. Be sure to include the inline extent 4156 * extent list. Be sure to include the inline extent
4156 * bytes in new_size. 4157 * bytes in new_size.
4157 */ 4158 */
4158 else { 4159 else {
4159 new_size += ifp->if_bytes; 4160 new_size += ifp->if_bytes;
4160 if ((new_size & (new_size - 1)) != 0) { 4161 if ((new_size & (new_size - 1)) != 0) {
4161 rnew_size = xfs_iroundup(new_size); 4162 rnew_size = xfs_iroundup(new_size);
4162 } 4163 }
4163 xfs_iext_inline_to_direct(ifp, rnew_size); 4164 xfs_iext_inline_to_direct(ifp, rnew_size);
4164 } 4165 }
4165 ifp->if_real_bytes = rnew_size; 4166 ifp->if_real_bytes = rnew_size;
4166 ifp->if_bytes = new_size; 4167 ifp->if_bytes = new_size;
4167 } 4168 }
4168 4169
4169 /* 4170 /*
4170 * Switch from linear (direct) extent records to inline buffer. 4171 * Switch from linear (direct) extent records to inline buffer.
4171 */ 4172 */
4172 void 4173 void
4173 xfs_iext_direct_to_inline( 4174 xfs_iext_direct_to_inline(
4174 xfs_ifork_t *ifp, /* inode fork pointer */ 4175 xfs_ifork_t *ifp, /* inode fork pointer */
4175 xfs_extnum_t nextents) /* number of extents in file */ 4176 xfs_extnum_t nextents) /* number of extents in file */
4176 { 4177 {
4177 ASSERT(ifp->if_flags & XFS_IFEXTENTS); 4178 ASSERT(ifp->if_flags & XFS_IFEXTENTS);
4178 ASSERT(nextents <= XFS_INLINE_EXTS); 4179 ASSERT(nextents <= XFS_INLINE_EXTS);
4179 /* 4180 /*
4180 * The inline buffer was zeroed when we switched 4181 * The inline buffer was zeroed when we switched
4181 * from inline to direct extent allocation mode, 4182 * from inline to direct extent allocation mode,
4182 * so we don't need to clear it here. 4183 * so we don't need to clear it here.
4183 */ 4184 */
4184 memcpy(ifp->if_u2.if_inline_ext, ifp->if_u1.if_extents, 4185 memcpy(ifp->if_u2.if_inline_ext, ifp->if_u1.if_extents,
4185 nextents * sizeof(xfs_bmbt_rec_t)); 4186 nextents * sizeof(xfs_bmbt_rec_t));
4186 kmem_free(ifp->if_u1.if_extents, ifp->if_real_bytes); 4187 kmem_free(ifp->if_u1.if_extents, ifp->if_real_bytes);
4187 ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext; 4188 ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext;
4188 ifp->if_real_bytes = 0; 4189 ifp->if_real_bytes = 0;
4189 } 4190 }
4190 4191
4191 /* 4192 /*
4192 * Switch from inline buffer to linear (direct) extent records. 4193 * Switch from inline buffer to linear (direct) extent records.
4193 * new_size should already be rounded up to the next power of 2 4194 * new_size should already be rounded up to the next power of 2
4194 * by the caller (when appropriate), so use new_size as it is. 4195 * by the caller (when appropriate), so use new_size as it is.
4195 * However, since new_size may be rounded up, we can't update 4196 * However, since new_size may be rounded up, we can't update
4196 * if_bytes here. It is the caller's responsibility to update 4197 * if_bytes here. It is the caller's responsibility to update
4197 * if_bytes upon return. 4198 * if_bytes upon return.
4198 */ 4199 */
4199 void 4200 void
4200 xfs_iext_inline_to_direct( 4201 xfs_iext_inline_to_direct(
4201 xfs_ifork_t *ifp, /* inode fork pointer */ 4202 xfs_ifork_t *ifp, /* inode fork pointer */
4202 int new_size) /* number of extents in file */ 4203 int new_size) /* number of extents in file */
4203 { 4204 {
4204 ifp->if_u1.if_extents = (xfs_bmbt_rec_t *) 4205 ifp->if_u1.if_extents = (xfs_bmbt_rec_t *)
4205 kmem_alloc(new_size, KM_SLEEP); 4206 kmem_alloc(new_size, KM_SLEEP);
4206 memset(ifp->if_u1.if_extents, 0, new_size); 4207 memset(ifp->if_u1.if_extents, 0, new_size);
4207 if (ifp->if_bytes) { 4208 if (ifp->if_bytes) {
4208 memcpy(ifp->if_u1.if_extents, ifp->if_u2.if_inline_ext, 4209 memcpy(ifp->if_u1.if_extents, ifp->if_u2.if_inline_ext,
4209 ifp->if_bytes); 4210 ifp->if_bytes);
4210 memset(ifp->if_u2.if_inline_ext, 0, XFS_INLINE_EXTS * 4211 memset(ifp->if_u2.if_inline_ext, 0, XFS_INLINE_EXTS *
4211 sizeof(xfs_bmbt_rec_t)); 4212 sizeof(xfs_bmbt_rec_t));
4212 } 4213 }
4213 ifp->if_real_bytes = new_size; 4214 ifp->if_real_bytes = new_size;
4214 } 4215 }
4215 4216
4216 /* 4217 /*
4217 * Resize an extent indirection array to new_size bytes. 4218 * Resize an extent indirection array to new_size bytes.
4218 */ 4219 */
4219 void 4220 void
4220 xfs_iext_realloc_indirect( 4221 xfs_iext_realloc_indirect(
4221 xfs_ifork_t *ifp, /* inode fork pointer */ 4222 xfs_ifork_t *ifp, /* inode fork pointer */
4222 int new_size) /* new indirection array size */ 4223 int new_size) /* new indirection array size */
4223 { 4224 {
4224 int nlists; /* number of irec's (ex lists) */ 4225 int nlists; /* number of irec's (ex lists) */
4225 int size; /* current indirection array size */ 4226 int size; /* current indirection array size */
4226 4227
4227 ASSERT(ifp->if_flags & XFS_IFEXTIREC); 4228 ASSERT(ifp->if_flags & XFS_IFEXTIREC);
4228 nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; 4229 nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
4229 size = nlists * sizeof(xfs_ext_irec_t); 4230 size = nlists * sizeof(xfs_ext_irec_t);
4230 ASSERT(ifp->if_real_bytes); 4231 ASSERT(ifp->if_real_bytes);
4231 ASSERT((new_size >= 0) && (new_size != size)); 4232 ASSERT((new_size >= 0) && (new_size != size));
4232 if (new_size == 0) { 4233 if (new_size == 0) {
4233 xfs_iext_destroy(ifp); 4234 xfs_iext_destroy(ifp);
4234 } else { 4235 } else {
4235 ifp->if_u1.if_ext_irec = (xfs_ext_irec_t *) 4236 ifp->if_u1.if_ext_irec = (xfs_ext_irec_t *)
4236 kmem_realloc(ifp->if_u1.if_ext_irec, 4237 kmem_realloc(ifp->if_u1.if_ext_irec,
4237 new_size, size, KM_SLEEP); 4238 new_size, size, KM_SLEEP);
4238 } 4239 }
4239 } 4240 }
4240 4241
4241 /* 4242 /*
4242 * Switch from indirection array to linear (direct) extent allocations. 4243 * Switch from indirection array to linear (direct) extent allocations.
4243 */ 4244 */
4244 void 4245 void
4245 xfs_iext_indirect_to_direct( 4246 xfs_iext_indirect_to_direct(
4246 xfs_ifork_t *ifp) /* inode fork pointer */ 4247 xfs_ifork_t *ifp) /* inode fork pointer */
4247 { 4248 {
4248 xfs_bmbt_rec_t *ep; /* extent record pointer */ 4249 xfs_bmbt_rec_t *ep; /* extent record pointer */
4249 xfs_extnum_t nextents; /* number of extents in file */ 4250 xfs_extnum_t nextents; /* number of extents in file */
4250 int size; /* size of file extents */ 4251 int size; /* size of file extents */
4251 4252
4252 ASSERT(ifp->if_flags & XFS_IFEXTIREC); 4253 ASSERT(ifp->if_flags & XFS_IFEXTIREC);
4253 nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); 4254 nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
4254 ASSERT(nextents <= XFS_LINEAR_EXTS); 4255 ASSERT(nextents <= XFS_LINEAR_EXTS);
4255 size = nextents * sizeof(xfs_bmbt_rec_t); 4256 size = nextents * sizeof(xfs_bmbt_rec_t);
4256 4257
4257 xfs_iext_irec_compact_full(ifp); 4258 xfs_iext_irec_compact_full(ifp);
4258 ASSERT(ifp->if_real_bytes == XFS_IEXT_BUFSZ); 4259 ASSERT(ifp->if_real_bytes == XFS_IEXT_BUFSZ);
4259 4260
4260 ep = ifp->if_u1.if_ext_irec->er_extbuf; 4261 ep = ifp->if_u1.if_ext_irec->er_extbuf;
4261 kmem_free(ifp->if_u1.if_ext_irec, sizeof(xfs_ext_irec_t)); 4262 kmem_free(ifp->if_u1.if_ext_irec, sizeof(xfs_ext_irec_t));
4262 ifp->if_flags &= ~XFS_IFEXTIREC; 4263 ifp->if_flags &= ~XFS_IFEXTIREC;
4263 ifp->if_u1.if_extents = ep; 4264 ifp->if_u1.if_extents = ep;
4264 ifp->if_bytes = size; 4265 ifp->if_bytes = size;
4265 if (nextents < XFS_LINEAR_EXTS) { 4266 if (nextents < XFS_LINEAR_EXTS) {
4266 xfs_iext_realloc_direct(ifp, size); 4267 xfs_iext_realloc_direct(ifp, size);
4267 } 4268 }
4268 } 4269 }
4269 4270
4270 /* 4271 /*
4271 * Free incore file extents. 4272 * Free incore file extents.
4272 */ 4273 */
4273 void 4274 void
4274 xfs_iext_destroy( 4275 xfs_iext_destroy(
4275 xfs_ifork_t *ifp) /* inode fork pointer */ 4276 xfs_ifork_t *ifp) /* inode fork pointer */
4276 { 4277 {
4277 if (ifp->if_flags & XFS_IFEXTIREC) { 4278 if (ifp->if_flags & XFS_IFEXTIREC) {
4278 int erp_idx; 4279 int erp_idx;
4279 int nlists; 4280 int nlists;
4280 4281
4281 nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; 4282 nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
4282 for (erp_idx = nlists - 1; erp_idx >= 0 ; erp_idx--) { 4283 for (erp_idx = nlists - 1; erp_idx >= 0 ; erp_idx--) {
4283 xfs_iext_irec_remove(ifp, erp_idx); 4284 xfs_iext_irec_remove(ifp, erp_idx);
4284 } 4285 }
4285 ifp->if_flags &= ~XFS_IFEXTIREC; 4286 ifp->if_flags &= ~XFS_IFEXTIREC;
4286 } else if (ifp->if_real_bytes) { 4287 } else if (ifp->if_real_bytes) {
4287 kmem_free(ifp->if_u1.if_extents, ifp->if_real_bytes); 4288 kmem_free(ifp->if_u1.if_extents, ifp->if_real_bytes);
4288 } else if (ifp->if_bytes) { 4289 } else if (ifp->if_bytes) {
4289 memset(ifp->if_u2.if_inline_ext, 0, XFS_INLINE_EXTS * 4290 memset(ifp->if_u2.if_inline_ext, 0, XFS_INLINE_EXTS *
4290 sizeof(xfs_bmbt_rec_t)); 4291 sizeof(xfs_bmbt_rec_t));
4291 } 4292 }
4292 ifp->if_u1.if_extents = NULL; 4293 ifp->if_u1.if_extents = NULL;
4293 ifp->if_real_bytes = 0; 4294 ifp->if_real_bytes = 0;
4294 ifp->if_bytes = 0; 4295 ifp->if_bytes = 0;
4295 } 4296 }
4296 4297
4297 /* 4298 /*
4298 * Return a pointer to the extent record for file system block bno. 4299 * Return a pointer to the extent record for file system block bno.
4299 */ 4300 */
4300 xfs_bmbt_rec_t * /* pointer to found extent record */ 4301 xfs_bmbt_rec_t * /* pointer to found extent record */
4301 xfs_iext_bno_to_ext( 4302 xfs_iext_bno_to_ext(
4302 xfs_ifork_t *ifp, /* inode fork pointer */ 4303 xfs_ifork_t *ifp, /* inode fork pointer */
4303 xfs_fileoff_t bno, /* block number to search for */ 4304 xfs_fileoff_t bno, /* block number to search for */
4304 xfs_extnum_t *idxp) /* index of target extent */ 4305 xfs_extnum_t *idxp) /* index of target extent */
4305 { 4306 {
4306 xfs_bmbt_rec_t *base; /* pointer to first extent */ 4307 xfs_bmbt_rec_t *base; /* pointer to first extent */
4307 xfs_filblks_t blockcount = 0; /* number of blocks in extent */ 4308 xfs_filblks_t blockcount = 0; /* number of blocks in extent */
4308 xfs_bmbt_rec_t *ep = NULL; /* pointer to target extent */ 4309 xfs_bmbt_rec_t *ep = NULL; /* pointer to target extent */
4309 xfs_ext_irec_t *erp = NULL; /* indirection array pointer */ 4310 xfs_ext_irec_t *erp = NULL; /* indirection array pointer */
4310 int high; /* upper boundary in search */ 4311 int high; /* upper boundary in search */
4311 xfs_extnum_t idx = 0; /* index of target extent */ 4312 xfs_extnum_t idx = 0; /* index of target extent */
4312 int low; /* lower boundary in search */ 4313 int low; /* lower boundary in search */
4313 xfs_extnum_t nextents; /* number of file extents */ 4314 xfs_extnum_t nextents; /* number of file extents */
4314 xfs_fileoff_t startoff = 0; /* start offset of extent */ 4315 xfs_fileoff_t startoff = 0; /* start offset of extent */
4315 4316
4316 nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); 4317 nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
4317 if (nextents == 0) { 4318 if (nextents == 0) {
4318 *idxp = 0; 4319 *idxp = 0;
4319 return NULL; 4320 return NULL;
4320 } 4321 }
4321 low = 0; 4322 low = 0;
4322 if (ifp->if_flags & XFS_IFEXTIREC) { 4323 if (ifp->if_flags & XFS_IFEXTIREC) {
4323 /* Find target extent list */ 4324 /* Find target extent list */
4324 int erp_idx = 0; 4325 int erp_idx = 0;
4325 erp = xfs_iext_bno_to_irec(ifp, bno, &erp_idx); 4326 erp = xfs_iext_bno_to_irec(ifp, bno, &erp_idx);
4326 base = erp->er_extbuf; 4327 base = erp->er_extbuf;
4327 high = erp->er_extcount - 1; 4328 high = erp->er_extcount - 1;
4328 } else { 4329 } else {
4329 base = ifp->if_u1.if_extents; 4330 base = ifp->if_u1.if_extents;
4330 high = nextents - 1; 4331 high = nextents - 1;
4331 } 4332 }
4332 /* Binary search extent records */ 4333 /* Binary search extent records */
4333 while (low <= high) { 4334 while (low <= high) {
4334 idx = (low + high) >> 1; 4335 idx = (low + high) >> 1;
4335 ep = base + idx; 4336 ep = base + idx;
4336 startoff = xfs_bmbt_get_startoff(ep); 4337 startoff = xfs_bmbt_get_startoff(ep);
4337 blockcount = xfs_bmbt_get_blockcount(ep); 4338 blockcount = xfs_bmbt_get_blockcount(ep);
4338 if (bno < startoff) { 4339 if (bno < startoff) {
4339 high = idx - 1; 4340 high = idx - 1;
4340 } else if (bno >= startoff + blockcount) { 4341 } else if (bno >= startoff + blockcount) {
4341 low = idx + 1; 4342 low = idx + 1;
4342 } else { 4343 } else {
4343 /* Convert back to file-based extent index */ 4344 /* Convert back to file-based extent index */
4344 if (ifp->if_flags & XFS_IFEXTIREC) { 4345 if (ifp->if_flags & XFS_IFEXTIREC) {
4345 idx += erp->er_extoff; 4346 idx += erp->er_extoff;
4346 } 4347 }
4347 *idxp = idx; 4348 *idxp = idx;
4348 return ep; 4349 return ep;
4349 } 4350 }
4350 } 4351 }
4351 /* Convert back to file-based extent index */ 4352 /* Convert back to file-based extent index */
4352 if (ifp->if_flags & XFS_IFEXTIREC) { 4353 if (ifp->if_flags & XFS_IFEXTIREC) {
4353 idx += erp->er_extoff; 4354 idx += erp->er_extoff;
4354 } 4355 }
4355 if (bno >= startoff + blockcount) { 4356 if (bno >= startoff + blockcount) {
4356 if (++idx == nextents) { 4357 if (++idx == nextents) {
4357 ep = NULL; 4358 ep = NULL;
4358 } else { 4359 } else {
4359 ep = xfs_iext_get_ext(ifp, idx); 4360 ep = xfs_iext_get_ext(ifp, idx);
4360 } 4361 }
4361 } 4362 }
4362 *idxp = idx; 4363 *idxp = idx;
4363 return ep; 4364 return ep;
4364 } 4365 }
4365 4366
4366 /* 4367 /*
4367 * Return a pointer to the indirection array entry containing the 4368 * Return a pointer to the indirection array entry containing the
4368 * extent record for filesystem block bno. Store the index of the 4369 * extent record for filesystem block bno. Store the index of the
4369 * target irec in *erp_idxp. 4370 * target irec in *erp_idxp.
4370 */ 4371 */
4371 xfs_ext_irec_t * /* pointer to found extent record */ 4372 xfs_ext_irec_t * /* pointer to found extent record */
4372 xfs_iext_bno_to_irec( 4373 xfs_iext_bno_to_irec(
4373 xfs_ifork_t *ifp, /* inode fork pointer */ 4374 xfs_ifork_t *ifp, /* inode fork pointer */
4374 xfs_fileoff_t bno, /* block number to search for */ 4375 xfs_fileoff_t bno, /* block number to search for */
4375 int *erp_idxp) /* irec index of target ext list */ 4376 int *erp_idxp) /* irec index of target ext list */
4376 { 4377 {
4377 xfs_ext_irec_t *erp = NULL; /* indirection array pointer */ 4378 xfs_ext_irec_t *erp = NULL; /* indirection array pointer */
4378 xfs_ext_irec_t *erp_next; /* next indirection array entry */ 4379 xfs_ext_irec_t *erp_next; /* next indirection array entry */
4379 int erp_idx; /* indirection array index */ 4380 int erp_idx; /* indirection array index */
4380 int nlists; /* number of extent irec's (lists) */ 4381 int nlists; /* number of extent irec's (lists) */
4381 int high; /* binary search upper limit */ 4382 int high; /* binary search upper limit */
4382 int low; /* binary search lower limit */ 4383 int low; /* binary search lower limit */
4383 4384
4384 ASSERT(ifp->if_flags & XFS_IFEXTIREC); 4385 ASSERT(ifp->if_flags & XFS_IFEXTIREC);
4385 nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; 4386 nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
4386 erp_idx = 0; 4387 erp_idx = 0;
4387 low = 0; 4388 low = 0;
4388 high = nlists - 1; 4389 high = nlists - 1;
4389 while (low <= high) { 4390 while (low <= high) {
4390 erp_idx = (low + high) >> 1; 4391 erp_idx = (low + high) >> 1;
4391 erp = &ifp->if_u1.if_ext_irec[erp_idx]; 4392 erp = &ifp->if_u1.if_ext_irec[erp_idx];
4392 erp_next = erp_idx < nlists - 1 ? erp + 1 : NULL; 4393 erp_next = erp_idx < nlists - 1 ? erp + 1 : NULL;
4393 if (bno < xfs_bmbt_get_startoff(erp->er_extbuf)) { 4394 if (bno < xfs_bmbt_get_startoff(erp->er_extbuf)) {
4394 high = erp_idx - 1; 4395 high = erp_idx - 1;
4395 } else if (erp_next && bno >= 4396 } else if (erp_next && bno >=
4396 xfs_bmbt_get_startoff(erp_next->er_extbuf)) { 4397 xfs_bmbt_get_startoff(erp_next->er_extbuf)) {
4397 low = erp_idx + 1; 4398 low = erp_idx + 1;
4398 } else { 4399 } else {
4399 break; 4400 break;
4400 } 4401 }
4401 } 4402 }
4402 *erp_idxp = erp_idx; 4403 *erp_idxp = erp_idx;
4403 return erp; 4404 return erp;
4404 } 4405 }
4405 4406
4406 /* 4407 /*
4407 * Return a pointer to the indirection array entry containing the 4408 * Return a pointer to the indirection array entry containing the
4408 * extent record at file extent index *idxp. Store the index of the 4409 * extent record at file extent index *idxp. Store the index of the
4409 * target irec in *erp_idxp and store the page index of the target 4410 * target irec in *erp_idxp and store the page index of the target
4410 * extent record in *idxp. 4411 * extent record in *idxp.
4411 */ 4412 */
4412 xfs_ext_irec_t * 4413 xfs_ext_irec_t *
4413 xfs_iext_idx_to_irec( 4414 xfs_iext_idx_to_irec(
4414 xfs_ifork_t *ifp, /* inode fork pointer */ 4415 xfs_ifork_t *ifp, /* inode fork pointer */
4415 xfs_extnum_t *idxp, /* extent index (file -> page) */ 4416 xfs_extnum_t *idxp, /* extent index (file -> page) */
4416 int *erp_idxp, /* pointer to target irec */ 4417 int *erp_idxp, /* pointer to target irec */
4417 int realloc) /* new bytes were just added */ 4418 int realloc) /* new bytes were just added */
4418 { 4419 {
4419 xfs_ext_irec_t *prev; /* pointer to previous irec */ 4420 xfs_ext_irec_t *prev; /* pointer to previous irec */
4420 xfs_ext_irec_t *erp = NULL; /* pointer to current irec */ 4421 xfs_ext_irec_t *erp = NULL; /* pointer to current irec */
4421 int erp_idx; /* indirection array index */ 4422 int erp_idx; /* indirection array index */
4422 int nlists; /* number of irec's (ex lists) */ 4423 int nlists; /* number of irec's (ex lists) */
4423 int high; /* binary search upper limit */ 4424 int high; /* binary search upper limit */
4424 int low; /* binary search lower limit */ 4425 int low; /* binary search lower limit */
4425 xfs_extnum_t page_idx = *idxp; /* extent index in target list */ 4426 xfs_extnum_t page_idx = *idxp; /* extent index in target list */
4426 4427
4427 ASSERT(ifp->if_flags & XFS_IFEXTIREC); 4428 ASSERT(ifp->if_flags & XFS_IFEXTIREC);
4428 ASSERT(page_idx >= 0 && page_idx <= 4429 ASSERT(page_idx >= 0 && page_idx <=
4429 ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t)); 4430 ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t));
4430 nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; 4431 nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
4431 erp_idx = 0; 4432 erp_idx = 0;
4432 low = 0; 4433 low = 0;
4433 high = nlists - 1; 4434 high = nlists - 1;
4434 4435
4435 /* Binary search extent irec's */ 4436 /* Binary search extent irec's */
4436 while (low <= high) { 4437 while (low <= high) {
4437 erp_idx = (low + high) >> 1; 4438 erp_idx = (low + high) >> 1;
4438 erp = &ifp->if_u1.if_ext_irec[erp_idx]; 4439 erp = &ifp->if_u1.if_ext_irec[erp_idx];
4439 prev = erp_idx > 0 ? erp - 1 : NULL; 4440 prev = erp_idx > 0 ? erp - 1 : NULL;
4440 if (page_idx < erp->er_extoff || (page_idx == erp->er_extoff && 4441 if (page_idx < erp->er_extoff || (page_idx == erp->er_extoff &&
4441 realloc && prev && prev->er_extcount < XFS_LINEAR_EXTS)) { 4442 realloc && prev && prev->er_extcount < XFS_LINEAR_EXTS)) {
4442 high = erp_idx - 1; 4443 high = erp_idx - 1;
4443 } else if (page_idx > erp->er_extoff + erp->er_extcount || 4444 } else if (page_idx > erp->er_extoff + erp->er_extcount ||
4444 (page_idx == erp->er_extoff + erp->er_extcount && 4445 (page_idx == erp->er_extoff + erp->er_extcount &&
4445 !realloc)) { 4446 !realloc)) {
4446 low = erp_idx + 1; 4447 low = erp_idx + 1;
4447 } else if (page_idx == erp->er_extoff + erp->er_extcount && 4448 } else if (page_idx == erp->er_extoff + erp->er_extcount &&
4448 erp->er_extcount == XFS_LINEAR_EXTS) { 4449 erp->er_extcount == XFS_LINEAR_EXTS) {
4449 ASSERT(realloc); 4450 ASSERT(realloc);
4450 page_idx = 0; 4451 page_idx = 0;
4451 erp_idx++; 4452 erp_idx++;
4452 erp = erp_idx < nlists ? erp + 1 : NULL; 4453 erp = erp_idx < nlists ? erp + 1 : NULL;
4453 break; 4454 break;
4454 } else { 4455 } else {
4455 page_idx -= erp->er_extoff; 4456 page_idx -= erp->er_extoff;
4456 break; 4457 break;
4457 } 4458 }
4458 } 4459 }
4459 *idxp = page_idx; 4460 *idxp = page_idx;
4460 *erp_idxp = erp_idx; 4461 *erp_idxp = erp_idx;
4461 return(erp); 4462 return(erp);
4462 } 4463 }
4463 4464
4464 /* 4465 /*
4465 * Allocate and initialize an indirection array once the space needed 4466 * Allocate and initialize an indirection array once the space needed
4466 * for incore extents increases above XFS_IEXT_BUFSZ. 4467 * for incore extents increases above XFS_IEXT_BUFSZ.
4467 */ 4468 */
4468 void 4469 void
4469 xfs_iext_irec_init( 4470 xfs_iext_irec_init(
4470 xfs_ifork_t *ifp) /* inode fork pointer */ 4471 xfs_ifork_t *ifp) /* inode fork pointer */
4471 { 4472 {
4472 xfs_ext_irec_t *erp; /* indirection array pointer */ 4473 xfs_ext_irec_t *erp; /* indirection array pointer */
4473 xfs_extnum_t nextents; /* number of extents in file */ 4474 xfs_extnum_t nextents; /* number of extents in file */
4474 4475
4475 ASSERT(!(ifp->if_flags & XFS_IFEXTIREC)); 4476 ASSERT(!(ifp->if_flags & XFS_IFEXTIREC));
4476 nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); 4477 nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
4477 ASSERT(nextents <= XFS_LINEAR_EXTS); 4478 ASSERT(nextents <= XFS_LINEAR_EXTS);
4478 4479
4479 erp = (xfs_ext_irec_t *) 4480 erp = (xfs_ext_irec_t *)
4480 kmem_alloc(sizeof(xfs_ext_irec_t), KM_SLEEP); 4481 kmem_alloc(sizeof(xfs_ext_irec_t), KM_SLEEP);
4481 4482
4482 if (nextents == 0) { 4483 if (nextents == 0) {
4483 ifp->if_u1.if_extents = (xfs_bmbt_rec_t *) 4484 ifp->if_u1.if_extents = (xfs_bmbt_rec_t *)
4484 kmem_alloc(XFS_IEXT_BUFSZ, KM_SLEEP); 4485 kmem_alloc(XFS_IEXT_BUFSZ, KM_SLEEP);
4485 } else if (!ifp->if_real_bytes) { 4486 } else if (!ifp->if_real_bytes) {
4486 xfs_iext_inline_to_direct(ifp, XFS_IEXT_BUFSZ); 4487 xfs_iext_inline_to_direct(ifp, XFS_IEXT_BUFSZ);
4487 } else if (ifp->if_real_bytes < XFS_IEXT_BUFSZ) { 4488 } else if (ifp->if_real_bytes < XFS_IEXT_BUFSZ) {
4488 xfs_iext_realloc_direct(ifp, XFS_IEXT_BUFSZ); 4489 xfs_iext_realloc_direct(ifp, XFS_IEXT_BUFSZ);
4489 } 4490 }
4490 erp->er_extbuf = ifp->if_u1.if_extents; 4491 erp->er_extbuf = ifp->if_u1.if_extents;
4491 erp->er_extcount = nextents; 4492 erp->er_extcount = nextents;
4492 erp->er_extoff = 0; 4493 erp->er_extoff = 0;
4493 4494
4494 ifp->if_flags |= XFS_IFEXTIREC; 4495 ifp->if_flags |= XFS_IFEXTIREC;
4495 ifp->if_real_bytes = XFS_IEXT_BUFSZ; 4496 ifp->if_real_bytes = XFS_IEXT_BUFSZ;
4496 ifp->if_bytes = nextents * sizeof(xfs_bmbt_rec_t); 4497 ifp->if_bytes = nextents * sizeof(xfs_bmbt_rec_t);
4497 ifp->if_u1.if_ext_irec = erp; 4498 ifp->if_u1.if_ext_irec = erp;
4498 4499
4499 return; 4500 return;
4500 } 4501 }
4501 4502
4502 /* 4503 /*
4503 * Allocate and initialize a new entry in the indirection array. 4504 * Allocate and initialize a new entry in the indirection array.
4504 */ 4505 */
4505 xfs_ext_irec_t * 4506 xfs_ext_irec_t *
4506 xfs_iext_irec_new( 4507 xfs_iext_irec_new(
4507 xfs_ifork_t *ifp, /* inode fork pointer */ 4508 xfs_ifork_t *ifp, /* inode fork pointer */
4508 int erp_idx) /* index for new irec */ 4509 int erp_idx) /* index for new irec */
4509 { 4510 {
4510 xfs_ext_irec_t *erp; /* indirection array pointer */ 4511 xfs_ext_irec_t *erp; /* indirection array pointer */
4511 int i; /* loop counter */ 4512 int i; /* loop counter */
4512 int nlists; /* number of irec's (ex lists) */ 4513 int nlists; /* number of irec's (ex lists) */
4513 4514
4514 ASSERT(ifp->if_flags & XFS_IFEXTIREC); 4515 ASSERT(ifp->if_flags & XFS_IFEXTIREC);
4515 nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; 4516 nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
4516 4517
4517 /* Resize indirection array */ 4518 /* Resize indirection array */
4518 xfs_iext_realloc_indirect(ifp, ++nlists * 4519 xfs_iext_realloc_indirect(ifp, ++nlists *
4519 sizeof(xfs_ext_irec_t)); 4520 sizeof(xfs_ext_irec_t));
4520 /* 4521 /*
4521 * Move records down in the array so the 4522 * Move records down in the array so the
4522 * new page can use erp_idx. 4523 * new page can use erp_idx.
4523 */ 4524 */
4524 erp = ifp->if_u1.if_ext_irec; 4525 erp = ifp->if_u1.if_ext_irec;
4525 for (i = nlists - 1; i > erp_idx; i--) { 4526 for (i = nlists - 1; i > erp_idx; i--) {
4526 memmove(&erp[i], &erp[i-1], sizeof(xfs_ext_irec_t)); 4527 memmove(&erp[i], &erp[i-1], sizeof(xfs_ext_irec_t));
4527 } 4528 }
4528 ASSERT(i == erp_idx); 4529 ASSERT(i == erp_idx);
4529 4530
4530 /* Initialize new extent record */ 4531 /* Initialize new extent record */
4531 erp = ifp->if_u1.if_ext_irec; 4532 erp = ifp->if_u1.if_ext_irec;
4532 erp[erp_idx].er_extbuf = (xfs_bmbt_rec_t *) 4533 erp[erp_idx].er_extbuf = (xfs_bmbt_rec_t *)
4533 kmem_alloc(XFS_IEXT_BUFSZ, KM_SLEEP); 4534 kmem_alloc(XFS_IEXT_BUFSZ, KM_SLEEP);
4534 ifp->if_real_bytes = nlists * XFS_IEXT_BUFSZ; 4535 ifp->if_real_bytes = nlists * XFS_IEXT_BUFSZ;
4535 memset(erp[erp_idx].er_extbuf, 0, XFS_IEXT_BUFSZ); 4536 memset(erp[erp_idx].er_extbuf, 0, XFS_IEXT_BUFSZ);
4536 erp[erp_idx].er_extcount = 0; 4537 erp[erp_idx].er_extcount = 0;
4537 erp[erp_idx].er_extoff = erp_idx > 0 ? 4538 erp[erp_idx].er_extoff = erp_idx > 0 ?
4538 erp[erp_idx-1].er_extoff + erp[erp_idx-1].er_extcount : 0; 4539 erp[erp_idx-1].er_extoff + erp[erp_idx-1].er_extcount : 0;
4539 return (&erp[erp_idx]); 4540 return (&erp[erp_idx]);
4540 } 4541 }
4541 4542
4542 /* 4543 /*
4543 * Remove a record from the indirection array. 4544 * Remove a record from the indirection array.
4544 */ 4545 */
4545 void 4546 void
4546 xfs_iext_irec_remove( 4547 xfs_iext_irec_remove(
4547 xfs_ifork_t *ifp, /* inode fork pointer */ 4548 xfs_ifork_t *ifp, /* inode fork pointer */
4548 int erp_idx) /* irec index to remove */ 4549 int erp_idx) /* irec index to remove */
4549 { 4550 {
4550 xfs_ext_irec_t *erp; /* indirection array pointer */ 4551 xfs_ext_irec_t *erp; /* indirection array pointer */
4551 int i; /* loop counter */ 4552 int i; /* loop counter */
4552 int nlists; /* number of irec's (ex lists) */ 4553 int nlists; /* number of irec's (ex lists) */
4553 4554
4554 ASSERT(ifp->if_flags & XFS_IFEXTIREC); 4555 ASSERT(ifp->if_flags & XFS_IFEXTIREC);
4555 nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; 4556 nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
4556 erp = &ifp->if_u1.if_ext_irec[erp_idx]; 4557 erp = &ifp->if_u1.if_ext_irec[erp_idx];
4557 if (erp->er_extbuf) { 4558 if (erp->er_extbuf) {
4558 xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, 4559 xfs_iext_irec_update_extoffs(ifp, erp_idx + 1,
4559 -erp->er_extcount); 4560 -erp->er_extcount);
4560 kmem_free(erp->er_extbuf, XFS_IEXT_BUFSZ); 4561 kmem_free(erp->er_extbuf, XFS_IEXT_BUFSZ);
4561 } 4562 }
4562 /* Compact extent records */ 4563 /* Compact extent records */
4563 erp = ifp->if_u1.if_ext_irec; 4564 erp = ifp->if_u1.if_ext_irec;
4564 for (i = erp_idx; i < nlists - 1; i++) { 4565 for (i = erp_idx; i < nlists - 1; i++) {
4565 memmove(&erp[i], &erp[i+1], sizeof(xfs_ext_irec_t)); 4566 memmove(&erp[i], &erp[i+1], sizeof(xfs_ext_irec_t));
4566 } 4567 }
4567 /* 4568 /*
4568 * Manually free the last extent record from the indirection 4569 * Manually free the last extent record from the indirection
4569 * array. A call to xfs_iext_realloc_indirect() with a size 4570 * array. A call to xfs_iext_realloc_indirect() with a size
4570 * of zero would result in a call to xfs_iext_destroy() which 4571 * of zero would result in a call to xfs_iext_destroy() which
4571 * would in turn call this function again, creating a nasty 4572 * would in turn call this function again, creating a nasty
4572 * infinite loop. 4573 * infinite loop.
4573 */ 4574 */
4574 if (--nlists) { 4575 if (--nlists) {
4575 xfs_iext_realloc_indirect(ifp, 4576 xfs_iext_realloc_indirect(ifp,
4576 nlists * sizeof(xfs_ext_irec_t)); 4577 nlists * sizeof(xfs_ext_irec_t));
4577 } else { 4578 } else {
4578 kmem_free(ifp->if_u1.if_ext_irec, 4579 kmem_free(ifp->if_u1.if_ext_irec,
4579 sizeof(xfs_ext_irec_t)); 4580 sizeof(xfs_ext_irec_t));
4580 } 4581 }
4581 ifp->if_real_bytes = nlists * XFS_IEXT_BUFSZ; 4582 ifp->if_real_bytes = nlists * XFS_IEXT_BUFSZ;
4582 } 4583 }
4583 4584
4584 /* 4585 /*
4585 * This is called to clean up large amounts of unused memory allocated 4586 * This is called to clean up large amounts of unused memory allocated
4586 * by the indirection array. Before compacting anything though, verify 4587 * by the indirection array. Before compacting anything though, verify
4587 * that the indirection array is still needed and switch back to the 4588 * that the indirection array is still needed and switch back to the
4588 * linear extent list (or even the inline buffer) if possible. The 4589 * linear extent list (or even the inline buffer) if possible. The
4589 * compaction policy is as follows: 4590 * compaction policy is as follows:
4590 * 4591 *
4591 * Full Compaction: Extents fit into a single page (or inline buffer) 4592 * Full Compaction: Extents fit into a single page (or inline buffer)
4592 * Full Compaction: Extents occupy less than 10% of allocated space 4593 * Full Compaction: Extents occupy less than 10% of allocated space
4593 * Partial Compaction: Extents occupy > 10% and < 50% of allocated space 4594 * Partial Compaction: Extents occupy > 10% and < 50% of allocated space
4594 * No Compaction: Extents occupy at least 50% of allocated space 4595 * No Compaction: Extents occupy at least 50% of allocated space
4595 */ 4596 */
4596 void 4597 void
4597 xfs_iext_irec_compact( 4598 xfs_iext_irec_compact(
4598 xfs_ifork_t *ifp) /* inode fork pointer */ 4599 xfs_ifork_t *ifp) /* inode fork pointer */
4599 { 4600 {
4600 xfs_extnum_t nextents; /* number of extents in file */ 4601 xfs_extnum_t nextents; /* number of extents in file */
4601 int nlists; /* number of irec's (ex lists) */ 4602 int nlists; /* number of irec's (ex lists) */
4602 4603
4603 ASSERT(ifp->if_flags & XFS_IFEXTIREC); 4604 ASSERT(ifp->if_flags & XFS_IFEXTIREC);
4604 nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; 4605 nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
4605 nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); 4606 nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
4606 4607
4607 if (nextents == 0) { 4608 if (nextents == 0) {
4608 xfs_iext_destroy(ifp); 4609 xfs_iext_destroy(ifp);
4609 } else if (nextents <= XFS_INLINE_EXTS) { 4610 } else if (nextents <= XFS_INLINE_EXTS) {
4610 xfs_iext_indirect_to_direct(ifp); 4611 xfs_iext_indirect_to_direct(ifp);
4611 xfs_iext_direct_to_inline(ifp, nextents); 4612 xfs_iext_direct_to_inline(ifp, nextents);
4612 } else if (nextents <= XFS_LINEAR_EXTS) { 4613 } else if (nextents <= XFS_LINEAR_EXTS) {
4613 xfs_iext_indirect_to_direct(ifp); 4614 xfs_iext_indirect_to_direct(ifp);
4614 } else if (nextents < (nlists * XFS_LINEAR_EXTS) >> 3) { 4615 } else if (nextents < (nlists * XFS_LINEAR_EXTS) >> 3) {
4615 xfs_iext_irec_compact_full(ifp); 4616 xfs_iext_irec_compact_full(ifp);
4616 } else if (nextents < (nlists * XFS_LINEAR_EXTS) >> 1) { 4617 } else if (nextents < (nlists * XFS_LINEAR_EXTS) >> 1) {
4617 xfs_iext_irec_compact_pages(ifp); 4618 xfs_iext_irec_compact_pages(ifp);
4618 } 4619 }
4619 } 4620 }
4620 4621
4621 /* 4622 /*
4622 * Combine extents from neighboring extent pages. 4623 * Combine extents from neighboring extent pages.
4623 */ 4624 */
4624 void 4625 void
4625 xfs_iext_irec_compact_pages( 4626 xfs_iext_irec_compact_pages(
4626 xfs_ifork_t *ifp) /* inode fork pointer */ 4627 xfs_ifork_t *ifp) /* inode fork pointer */
4627 { 4628 {
4628 xfs_ext_irec_t *erp, *erp_next;/* pointers to irec entries */ 4629 xfs_ext_irec_t *erp, *erp_next;/* pointers to irec entries */
4629 int erp_idx = 0; /* indirection array index */ 4630 int erp_idx = 0; /* indirection array index */
4630 int nlists; /* number of irec's (ex lists) */ 4631 int nlists; /* number of irec's (ex lists) */
4631 4632
4632 ASSERT(ifp->if_flags & XFS_IFEXTIREC); 4633 ASSERT(ifp->if_flags & XFS_IFEXTIREC);
4633 nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; 4634 nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
4634 while (erp_idx < nlists - 1) { 4635 while (erp_idx < nlists - 1) {
4635 erp = &ifp->if_u1.if_ext_irec[erp_idx]; 4636 erp = &ifp->if_u1.if_ext_irec[erp_idx];
4636 erp_next = erp + 1; 4637 erp_next = erp + 1;
4637 if (erp_next->er_extcount <= 4638 if (erp_next->er_extcount <=
4638 (XFS_LINEAR_EXTS - erp->er_extcount)) { 4639 (XFS_LINEAR_EXTS - erp->er_extcount)) {
4639 memmove(&erp->er_extbuf[erp->er_extcount], 4640 memmove(&erp->er_extbuf[erp->er_extcount],
4640 erp_next->er_extbuf, erp_next->er_extcount * 4641 erp_next->er_extbuf, erp_next->er_extcount *
4641 sizeof(xfs_bmbt_rec_t)); 4642 sizeof(xfs_bmbt_rec_t));
4642 erp->er_extcount += erp_next->er_extcount; 4643 erp->er_extcount += erp_next->er_extcount;
4643 /* 4644 /*
4644 * Free page before removing extent record 4645 * Free page before removing extent record
4645 * so er_extoffs don't get modified in 4646 * so er_extoffs don't get modified in
4646 * xfs_iext_irec_remove. 4647 * xfs_iext_irec_remove.
4647 */ 4648 */
4648 kmem_free(erp_next->er_extbuf, XFS_IEXT_BUFSZ); 4649 kmem_free(erp_next->er_extbuf, XFS_IEXT_BUFSZ);
4649 erp_next->er_extbuf = NULL; 4650 erp_next->er_extbuf = NULL;
4650 xfs_iext_irec_remove(ifp, erp_idx + 1); 4651 xfs_iext_irec_remove(ifp, erp_idx + 1);
4651 nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; 4652 nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
4652 } else { 4653 } else {
4653 erp_idx++; 4654 erp_idx++;
4654 } 4655 }
4655 } 4656 }
4656 } 4657 }
4657 4658
4658 /* 4659 /*
4659 * Fully compact the extent records managed by the indirection array. 4660 * Fully compact the extent records managed by the indirection array.
4660 */ 4661 */
4661 void 4662 void
4662 xfs_iext_irec_compact_full( 4663 xfs_iext_irec_compact_full(
4663 xfs_ifork_t *ifp) /* inode fork pointer */ 4664 xfs_ifork_t *ifp) /* inode fork pointer */
4664 { 4665 {
4665 xfs_bmbt_rec_t *ep, *ep_next; /* extent record pointers */ 4666 xfs_bmbt_rec_t *ep, *ep_next; /* extent record pointers */
4666 xfs_ext_irec_t *erp, *erp_next; /* extent irec pointers */ 4667 xfs_ext_irec_t *erp, *erp_next; /* extent irec pointers */
4667 int erp_idx = 0; /* extent irec index */ 4668 int erp_idx = 0; /* extent irec index */
4668 int ext_avail; /* empty entries in ex list */ 4669 int ext_avail; /* empty entries in ex list */
4669 int ext_diff; /* number of exts to add */ 4670 int ext_diff; /* number of exts to add */
4670 int nlists; /* number of irec's (ex lists) */ 4671 int nlists; /* number of irec's (ex lists) */
4671 4672
4672 ASSERT(ifp->if_flags & XFS_IFEXTIREC); 4673 ASSERT(ifp->if_flags & XFS_IFEXTIREC);
4673 nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; 4674 nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
4674 erp = ifp->if_u1.if_ext_irec; 4675 erp = ifp->if_u1.if_ext_irec;
4675 ep = &erp->er_extbuf[erp->er_extcount]; 4676 ep = &erp->er_extbuf[erp->er_extcount];
4676 erp_next = erp + 1; 4677 erp_next = erp + 1;
4677 ep_next = erp_next->er_extbuf; 4678 ep_next = erp_next->er_extbuf;
4678 while (erp_idx < nlists - 1) { 4679 while (erp_idx < nlists - 1) {
4679 ext_avail = XFS_LINEAR_EXTS - erp->er_extcount; 4680 ext_avail = XFS_LINEAR_EXTS - erp->er_extcount;
4680 ext_diff = MIN(ext_avail, erp_next->er_extcount); 4681 ext_diff = MIN(ext_avail, erp_next->er_extcount);
4681 memcpy(ep, ep_next, ext_diff * sizeof(xfs_bmbt_rec_t)); 4682 memcpy(ep, ep_next, ext_diff * sizeof(xfs_bmbt_rec_t));
4682 erp->er_extcount += ext_diff; 4683 erp->er_extcount += ext_diff;
4683 erp_next->er_extcount -= ext_diff; 4684 erp_next->er_extcount -= ext_diff;
4684 /* Remove next page */ 4685 /* Remove next page */
4685 if (erp_next->er_extcount == 0) { 4686 if (erp_next->er_extcount == 0) {
4686 /* 4687 /*
4687 * Free page before removing extent record 4688 * Free page before removing extent record
4688 * so er_extoffs don't get modified in 4689 * so er_extoffs don't get modified in
4689 * xfs_iext_irec_remove. 4690 * xfs_iext_irec_remove.
4690 */ 4691 */
4691 kmem_free(erp_next->er_extbuf, 4692 kmem_free(erp_next->er_extbuf,
4692 erp_next->er_extcount * sizeof(xfs_bmbt_rec_t)); 4693 erp_next->er_extcount * sizeof(xfs_bmbt_rec_t));
4693 erp_next->er_extbuf = NULL; 4694 erp_next->er_extbuf = NULL;
4694 xfs_iext_irec_remove(ifp, erp_idx + 1); 4695 xfs_iext_irec_remove(ifp, erp_idx + 1);
4695 erp = &ifp->if_u1.if_ext_irec[erp_idx]; 4696 erp = &ifp->if_u1.if_ext_irec[erp_idx];
4696 nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; 4697 nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
4697 /* Update next page */ 4698 /* Update next page */
4698 } else { 4699 } else {
4699 /* Move rest of page up to become next new page */ 4700 /* Move rest of page up to become next new page */
4700 memmove(erp_next->er_extbuf, ep_next, 4701 memmove(erp_next->er_extbuf, ep_next,
4701 erp_next->er_extcount * sizeof(xfs_bmbt_rec_t)); 4702 erp_next->er_extcount * sizeof(xfs_bmbt_rec_t));
4702 ep_next = erp_next->er_extbuf; 4703 ep_next = erp_next->er_extbuf;
4703 memset(&ep_next[erp_next->er_extcount], 0, 4704 memset(&ep_next[erp_next->er_extcount], 0,
4704 (XFS_LINEAR_EXTS - erp_next->er_extcount) * 4705 (XFS_LINEAR_EXTS - erp_next->er_extcount) *
4705 sizeof(xfs_bmbt_rec_t)); 4706 sizeof(xfs_bmbt_rec_t));
4706 } 4707 }
4707 if (erp->er_extcount == XFS_LINEAR_EXTS) { 4708 if (erp->er_extcount == XFS_LINEAR_EXTS) {
4708 erp_idx++; 4709 erp_idx++;
4709 if (erp_idx < nlists) 4710 if (erp_idx < nlists)
4710 erp = &ifp->if_u1.if_ext_irec[erp_idx]; 4711 erp = &ifp->if_u1.if_ext_irec[erp_idx];
4711 else 4712 else
4712 break; 4713 break;
4713 } 4714 }
4714 ep = &erp->er_extbuf[erp->er_extcount]; 4715 ep = &erp->er_extbuf[erp->er_extcount];
4715 erp_next = erp + 1; 4716 erp_next = erp + 1;
4716 ep_next = erp_next->er_extbuf; 4717 ep_next = erp_next->er_extbuf;
4717 } 4718 }
4718 } 4719 }
4719 4720
4720 /* 4721 /*
4721 * This is called to update the er_extoff field in the indirection 4722 * This is called to update the er_extoff field in the indirection
4722 * array when extents have been added or removed from one of the 4723 * array when extents have been added or removed from one of the
4723 * extent lists. erp_idx contains the irec index to begin updating 4724 * extent lists. erp_idx contains the irec index to begin updating
4724 * at and ext_diff contains the number of extents that were added 4725 * at and ext_diff contains the number of extents that were added
4725 * or removed. 4726 * or removed.
4726 */ 4727 */
4727 void 4728 void
4728 xfs_iext_irec_update_extoffs( 4729 xfs_iext_irec_update_extoffs(
4729 xfs_ifork_t *ifp, /* inode fork pointer */ 4730 xfs_ifork_t *ifp, /* inode fork pointer */
4730 int erp_idx, /* irec index to update */ 4731 int erp_idx, /* irec index to update */
4731 int ext_diff) /* number of new extents */ 4732 int ext_diff) /* number of new extents */
4732 { 4733 {
4733 int i; /* loop counter */ 4734 int i; /* loop counter */
4734 int nlists; /* number of irec's (ex lists */ 4735 int nlists; /* number of irec's (ex lists */
4735 4736
4736 ASSERT(ifp->if_flags & XFS_IFEXTIREC); 4737 ASSERT(ifp->if_flags & XFS_IFEXTIREC);
4737 nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; 4738 nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
4738 for (i = erp_idx; i < nlists; i++) { 4739 for (i = erp_idx; i < nlists; i++) {
4739 ifp->if_u1.if_ext_irec[i].er_extoff += ext_diff; 4740 ifp->if_u1.if_ext_irec[i].er_extoff += ext_diff;
4740 } 4741 }
4741 } 4742 }
4742 4743
1 /* 1 /*
2 * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc. 2 * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
3 * All Rights Reserved. 3 * All Rights Reserved.
4 * 4 *
5 * This program is free software; you can redistribute it and/or 5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as 6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation. 7 * published by the Free Software Foundation.
8 * 8 *
9 * This program is distributed in the hope that it would be useful, 9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details. 12 * GNU General Public License for more details.
13 * 13 *
14 * You should have received a copy of the GNU General Public License 14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation, 15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */ 17 */
18 #ifndef __XFS_INODE_H__ 18 #ifndef __XFS_INODE_H__
19 #define __XFS_INODE_H__ 19 #define __XFS_INODE_H__
20 20
21 /* 21 /*
22 * Fork identifiers. 22 * Fork identifiers.
23 */ 23 */
24 #define XFS_DATA_FORK 0 24 #define XFS_DATA_FORK 0
25 #define XFS_ATTR_FORK 1 25 #define XFS_ATTR_FORK 1
26 26
27 /* 27 /*
28 * The following xfs_ext_irec_t struct introduces a second (top) level 28 * The following xfs_ext_irec_t struct introduces a second (top) level
29 * to the in-core extent allocation scheme. These structs are allocated 29 * to the in-core extent allocation scheme. These structs are allocated
30 * in a contiguous block, creating an indirection array where each entry 30 * in a contiguous block, creating an indirection array where each entry
31 * (irec) contains a pointer to a buffer of in-core extent records which 31 * (irec) contains a pointer to a buffer of in-core extent records which
32 * it manages. Each extent buffer is 4k in size, since 4k is the system 32 * it manages. Each extent buffer is 4k in size, since 4k is the system
33 * page size on Linux i386 and systems with larger page sizes don't seem 33 * page size on Linux i386 and systems with larger page sizes don't seem
34 * to gain much, if anything, by using their native page size as the 34 * to gain much, if anything, by using their native page size as the
35 * extent buffer size. Also, using 4k extent buffers everywhere provides 35 * extent buffer size. Also, using 4k extent buffers everywhere provides
36 * a consistent interface for CXFS across different platforms. 36 * a consistent interface for CXFS across different platforms.
37 * 37 *
38 * There is currently no limit on the number of irec's (extent lists) 38 * There is currently no limit on the number of irec's (extent lists)
39 * allowed, so heavily fragmented files may require an indirection array 39 * allowed, so heavily fragmented files may require an indirection array
40 * which spans multiple system pages of memory. The number of extents 40 * which spans multiple system pages of memory. The number of extents
41 * which would require this amount of contiguous memory is very large 41 * which would require this amount of contiguous memory is very large
42 * and should not cause problems in the foreseeable future. However, 42 * and should not cause problems in the foreseeable future. However,
43 * if the memory needed for the contiguous array ever becomes a problem, 43 * if the memory needed for the contiguous array ever becomes a problem,
44 * it is possible that a third level of indirection may be required. 44 * it is possible that a third level of indirection may be required.
45 */ 45 */
46 typedef struct xfs_ext_irec { 46 typedef struct xfs_ext_irec {
47 xfs_bmbt_rec_t *er_extbuf; /* block of extent records */ 47 xfs_bmbt_rec_t *er_extbuf; /* block of extent records */
48 xfs_extnum_t er_extoff; /* extent offset in file */ 48 xfs_extnum_t er_extoff; /* extent offset in file */
49 xfs_extnum_t er_extcount; /* number of extents in page/block */ 49 xfs_extnum_t er_extcount; /* number of extents in page/block */
50 } xfs_ext_irec_t; 50 } xfs_ext_irec_t;
51 51
52 /* 52 /*
53 * File incore extent information, present for each of data & attr forks. 53 * File incore extent information, present for each of data & attr forks.
54 */ 54 */
55 #define XFS_IEXT_BUFSZ 4096 55 #define XFS_IEXT_BUFSZ 4096
56 #define XFS_LINEAR_EXTS (XFS_IEXT_BUFSZ / (uint)sizeof(xfs_bmbt_rec_t)) 56 #define XFS_LINEAR_EXTS (XFS_IEXT_BUFSZ / (uint)sizeof(xfs_bmbt_rec_t))
57 #define XFS_INLINE_EXTS 2 57 #define XFS_INLINE_EXTS 2
58 #define XFS_INLINE_DATA 32 58 #define XFS_INLINE_DATA 32
59 typedef struct xfs_ifork { 59 typedef struct xfs_ifork {
60 int if_bytes; /* bytes in if_u1 */ 60 int if_bytes; /* bytes in if_u1 */
61 int if_real_bytes; /* bytes allocated in if_u1 */ 61 int if_real_bytes; /* bytes allocated in if_u1 */
62 xfs_bmbt_block_t *if_broot; /* file's incore btree root */ 62 xfs_bmbt_block_t *if_broot; /* file's incore btree root */
63 short if_broot_bytes; /* bytes allocated for root */ 63 short if_broot_bytes; /* bytes allocated for root */
64 unsigned char if_flags; /* per-fork flags */ 64 unsigned char if_flags; /* per-fork flags */
65 unsigned char if_ext_max; /* max # of extent records */ 65 unsigned char if_ext_max; /* max # of extent records */
66 xfs_extnum_t if_lastex; /* last if_extents used */ 66 xfs_extnum_t if_lastex; /* last if_extents used */
67 union { 67 union {
68 xfs_bmbt_rec_t *if_extents; /* linear map file exts */ 68 xfs_bmbt_rec_t *if_extents; /* linear map file exts */
69 xfs_ext_irec_t *if_ext_irec; /* irec map file exts */ 69 xfs_ext_irec_t *if_ext_irec; /* irec map file exts */
70 char *if_data; /* inline file data */ 70 char *if_data; /* inline file data */
71 } if_u1; 71 } if_u1;
72 union { 72 union {
73 xfs_bmbt_rec_t if_inline_ext[XFS_INLINE_EXTS]; 73 xfs_bmbt_rec_t if_inline_ext[XFS_INLINE_EXTS];
74 /* very small file extents */ 74 /* very small file extents */
75 char if_inline_data[XFS_INLINE_DATA]; 75 char if_inline_data[XFS_INLINE_DATA];
76 /* very small file data */ 76 /* very small file data */
77 xfs_dev_t if_rdev; /* dev number if special */ 77 xfs_dev_t if_rdev; /* dev number if special */
78 uuid_t if_uuid; /* mount point value */ 78 uuid_t if_uuid; /* mount point value */
79 } if_u2; 79 } if_u2;
80 } xfs_ifork_t; 80 } xfs_ifork_t;
81 81
82 /* 82 /*
83 * Flags for xfs_ichgtime(). 83 * Flags for xfs_ichgtime().
84 */ 84 */
85 #define XFS_ICHGTIME_MOD 0x1 /* data fork modification timestamp */ 85 #define XFS_ICHGTIME_MOD 0x1 /* data fork modification timestamp */
86 #define XFS_ICHGTIME_ACC 0x2 /* data fork access timestamp */ 86 #define XFS_ICHGTIME_ACC 0x2 /* data fork access timestamp */
87 #define XFS_ICHGTIME_CHG 0x4 /* inode field change timestamp */ 87 #define XFS_ICHGTIME_CHG 0x4 /* inode field change timestamp */
88 88
89 /* 89 /*
90 * Per-fork incore inode flags. 90 * Per-fork incore inode flags.
91 */ 91 */
92 #define XFS_IFINLINE 0x01 /* Inline data is read in */ 92 #define XFS_IFINLINE 0x01 /* Inline data is read in */
93 #define XFS_IFEXTENTS 0x02 /* All extent pointers are read in */ 93 #define XFS_IFEXTENTS 0x02 /* All extent pointers are read in */
94 #define XFS_IFBROOT 0x04 /* i_broot points to the bmap b-tree root */ 94 #define XFS_IFBROOT 0x04 /* i_broot points to the bmap b-tree root */
95 #define XFS_IFEXTIREC 0x08 /* Indirection array of extent blocks */ 95 #define XFS_IFEXTIREC 0x08 /* Indirection array of extent blocks */
96 96
97 /* 97 /*
98 * Flags for xfs_itobp(), xfs_imap() and xfs_dilocate(). 98 * Flags for xfs_itobp(), xfs_imap() and xfs_dilocate().
99 */ 99 */
100 #define XFS_IMAP_LOOKUP 0x1 100 #define XFS_IMAP_LOOKUP 0x1
101 #define XFS_IMAP_BULKSTAT 0x2 101 #define XFS_IMAP_BULKSTAT 0x2
102 102
103 #ifdef __KERNEL__ 103 #ifdef __KERNEL__
104 struct bhv_desc; 104 struct bhv_desc;
105 struct bhv_vnode; 105 struct bhv_vnode;
106 struct cred; 106 struct cred;
107 struct ktrace; 107 struct ktrace;
108 struct xfs_buf; 108 struct xfs_buf;
109 struct xfs_bmap_free; 109 struct xfs_bmap_free;
110 struct xfs_bmbt_irec; 110 struct xfs_bmbt_irec;
111 struct xfs_bmbt_block; 111 struct xfs_bmbt_block;
112 struct xfs_inode; 112 struct xfs_inode;
113 struct xfs_inode_log_item; 113 struct xfs_inode_log_item;
114 struct xfs_mount; 114 struct xfs_mount;
115 struct xfs_trans; 115 struct xfs_trans;
116 struct xfs_dquot; 116 struct xfs_dquot;
117 117
118 #if defined(XFS_ILOCK_TRACE) 118 #if defined(XFS_ILOCK_TRACE)
119 #define XFS_ILOCK_KTRACE_SIZE 32 119 #define XFS_ILOCK_KTRACE_SIZE 32
120 extern ktrace_t *xfs_ilock_trace_buf; 120 extern ktrace_t *xfs_ilock_trace_buf;
121 extern void xfs_ilock_trace(struct xfs_inode *, int, unsigned int, inst_t *); 121 extern void xfs_ilock_trace(struct xfs_inode *, int, unsigned int, inst_t *);
122 #else 122 #else
123 #define xfs_ilock_trace(i,n,f,ra) 123 #define xfs_ilock_trace(i,n,f,ra)
124 #endif 124 #endif
125 125
126 typedef struct dm_attrs_s { 126 typedef struct dm_attrs_s {
127 __uint32_t da_dmevmask; /* DMIG event mask */ 127 __uint32_t da_dmevmask; /* DMIG event mask */
128 __uint16_t da_dmstate; /* DMIG state info */ 128 __uint16_t da_dmstate; /* DMIG state info */
129 __uint16_t da_pad; /* DMIG extra padding */ 129 __uint16_t da_pad; /* DMIG extra padding */
130 } dm_attrs_t; 130 } dm_attrs_t;
131 131
132 typedef struct xfs_iocore { 132 typedef struct xfs_iocore {
133 void *io_obj; /* pointer to container 133 void *io_obj; /* pointer to container
134 * inode or dcxvn structure */ 134 * inode or dcxvn structure */
135 struct xfs_mount *io_mount; /* fs mount struct ptr */ 135 struct xfs_mount *io_mount; /* fs mount struct ptr */
136 #ifdef DEBUG 136 #ifdef DEBUG
137 mrlock_t *io_lock; /* inode IO lock */ 137 mrlock_t *io_lock; /* inode IO lock */
138 mrlock_t *io_iolock; /* inode IO lock */ 138 mrlock_t *io_iolock; /* inode IO lock */
139 #endif 139 #endif
140 140
141 /* I/O state */ 141 /* I/O state */
142 xfs_fsize_t io_new_size; /* sz when write completes */ 142 xfs_fsize_t io_new_size; /* sz when write completes */
143 143
144 /* Miscellaneous state. */ 144 /* Miscellaneous state. */
145 unsigned int io_flags; /* IO related flags */ 145 unsigned int io_flags; /* IO related flags */
146 146
147 /* DMAPI state */ 147 /* DMAPI state */
148 dm_attrs_t io_dmattrs; 148 dm_attrs_t io_dmattrs;
149 149
150 } xfs_iocore_t; 150 } xfs_iocore_t;
151 151
152 #define io_dmevmask io_dmattrs.da_dmevmask 152 #define io_dmevmask io_dmattrs.da_dmevmask
153 #define io_dmstate io_dmattrs.da_dmstate 153 #define io_dmstate io_dmattrs.da_dmstate
154 154
155 #define XFS_IO_INODE(io) ((xfs_inode_t *) ((io)->io_obj)) 155 #define XFS_IO_INODE(io) ((xfs_inode_t *) ((io)->io_obj))
156 #define XFS_IO_DCXVN(io) ((dcxvn_t *) ((io)->io_obj)) 156 #define XFS_IO_DCXVN(io) ((dcxvn_t *) ((io)->io_obj))
157 157
158 /* 158 /*
159 * Flags in the flags field 159 * Flags in the flags field
160 */ 160 */
161 161
162 #define XFS_IOCORE_RT 0x1 162 #define XFS_IOCORE_RT 0x1
163 163
164 /* 164 /*
165 * xfs_iocore prototypes 165 * xfs_iocore prototypes
166 */ 166 */
167 167
168 extern void xfs_iocore_inode_init(struct xfs_inode *); 168 extern void xfs_iocore_inode_init(struct xfs_inode *);
169 extern void xfs_iocore_inode_reinit(struct xfs_inode *); 169 extern void xfs_iocore_inode_reinit(struct xfs_inode *);
170 170
171 171
172 /* 172 /*
173 * This is the type used in the xfs inode hash table. 173 * This is the type used in the xfs inode hash table.
174 * An array of these is allocated for each mounted 174 * An array of these is allocated for each mounted
175 * file system to hash the inodes for that file system. 175 * file system to hash the inodes for that file system.
176 */ 176 */
177 typedef struct xfs_ihash { 177 typedef struct xfs_ihash {
178 struct xfs_inode *ih_next; 178 struct xfs_inode *ih_next;
179 rwlock_t ih_lock; 179 rwlock_t ih_lock;
180 uint ih_version; 180 uint ih_version;
181 } xfs_ihash_t; 181 } xfs_ihash_t;
182 182
183 #define XFS_IHASH(mp,ino) ((mp)->m_ihash + (((uint)(ino)) % (mp)->m_ihsize)) 183 #define XFS_IHASH(mp,ino) ((mp)->m_ihash + (((uint)(ino)) % (mp)->m_ihsize))
184 184
185 /* 185 /*
186 * This is the xfs inode cluster hash. This hash is used by xfs_iflush to 186 * This is the xfs inode cluster hash. This hash is used by xfs_iflush to
187 * find inodes that share a cluster and can be flushed to disk at the same 187 * find inodes that share a cluster and can be flushed to disk at the same
188 * time. 188 * time.
189 */ 189 */
190 typedef struct xfs_chashlist { 190 typedef struct xfs_chashlist {
191 struct xfs_chashlist *chl_next; 191 struct xfs_chashlist *chl_next;
192 struct xfs_chashlist *chl_prev; 192 struct xfs_chashlist *chl_prev;
193 struct xfs_inode *chl_ip; 193 struct xfs_inode *chl_ip;
194 xfs_daddr_t chl_blkno; /* starting block number of 194 xfs_daddr_t chl_blkno; /* starting block number of
195 * the cluster */ 195 * the cluster */
196 struct xfs_buf *chl_buf; /* the inode buffer */ 196 struct xfs_buf *chl_buf; /* the inode buffer */
197 } xfs_chashlist_t; 197 } xfs_chashlist_t;
198 198
199 typedef struct xfs_chash { 199 typedef struct xfs_chash {
200 xfs_chashlist_t *ch_list; 200 xfs_chashlist_t *ch_list;
201 lock_t ch_lock; 201 lock_t ch_lock;
202 } xfs_chash_t; 202 } xfs_chash_t;
203 203
204 #define XFS_CHASH(mp,blk) ((mp)->m_chash + (((uint)blk) % (mp)->m_chsize)) 204 #define XFS_CHASH(mp,blk) ((mp)->m_chash + (((uint)blk) % (mp)->m_chsize))
205 205
206 206
207 /* 207 /*
208 * This is the xfs in-core inode structure. 208 * This is the xfs in-core inode structure.
209 * Most of the on-disk inode is embedded in the i_d field. 209 * Most of the on-disk inode is embedded in the i_d field.
210 * 210 *
211 * The extent pointers/inline file space, however, are managed 211 * The extent pointers/inline file space, however, are managed
212 * separately. The memory for this information is pointed to by 212 * separately. The memory for this information is pointed to by
213 * the if_u1 unions depending on the type of the data. 213 * the if_u1 unions depending on the type of the data.
214 * This is used to linearize the array of extents for fast in-core 214 * This is used to linearize the array of extents for fast in-core
215 * access. This is used until the file's number of extents 215 * access. This is used until the file's number of extents
216 * surpasses XFS_MAX_INCORE_EXTENTS, at which point all extent pointers 216 * surpasses XFS_MAX_INCORE_EXTENTS, at which point all extent pointers
217 * are accessed through the buffer cache. 217 * are accessed through the buffer cache.
218 * 218 *
219 * Other state kept in the in-core inode is used for identification, 219 * Other state kept in the in-core inode is used for identification,
220 * locking, transactional updating, etc of the inode. 220 * locking, transactional updating, etc of the inode.
221 * 221 *
222 * Generally, we do not want to hold the i_rlock while holding the 222 * Generally, we do not want to hold the i_rlock while holding the
223 * i_ilock. Hierarchy is i_iolock followed by i_rlock. 223 * i_ilock. Hierarchy is i_iolock followed by i_rlock.
224 * 224 *
225 * xfs_iptr_t contains all the inode fields upto and including the 225 * xfs_iptr_t contains all the inode fields upto and including the
226 * i_mnext and i_mprev fields, it is used as a marker in the inode 226 * i_mnext and i_mprev fields, it is used as a marker in the inode
227 * chain off the mount structure by xfs_sync calls. 227 * chain off the mount structure by xfs_sync calls.
228 */ 228 */
229 229
230 typedef struct { 230 typedef struct {
231 struct xfs_ihash *ip_hash; /* pointer to hash header */ 231 struct xfs_ihash *ip_hash; /* pointer to hash header */
232 struct xfs_inode *ip_next; /* inode hash link forw */ 232 struct xfs_inode *ip_next; /* inode hash link forw */
233 struct xfs_inode *ip_mnext; /* next inode in mount list */ 233 struct xfs_inode *ip_mnext; /* next inode in mount list */
234 struct xfs_inode *ip_mprev; /* ptr to prev inode */ 234 struct xfs_inode *ip_mprev; /* ptr to prev inode */
235 struct xfs_inode **ip_prevp; /* ptr to prev i_next */ 235 struct xfs_inode **ip_prevp; /* ptr to prev i_next */
236 struct xfs_mount *ip_mount; /* fs mount struct ptr */ 236 struct xfs_mount *ip_mount; /* fs mount struct ptr */
237 } xfs_iptr_t; 237 } xfs_iptr_t;
238 238
239 typedef struct xfs_inode { 239 typedef struct xfs_inode {
240 /* Inode linking and identification information. */ 240 /* Inode linking and identification information. */
241 struct xfs_ihash *i_hash; /* pointer to hash header */ 241 struct xfs_ihash *i_hash; /* pointer to hash header */
242 struct xfs_inode *i_next; /* inode hash link forw */ 242 struct xfs_inode *i_next; /* inode hash link forw */
243 struct xfs_inode *i_mnext; /* next inode in mount list */ 243 struct xfs_inode *i_mnext; /* next inode in mount list */
244 struct xfs_inode *i_mprev; /* ptr to prev inode */ 244 struct xfs_inode *i_mprev; /* ptr to prev inode */
245 struct xfs_inode **i_prevp; /* ptr to prev i_next */ 245 struct xfs_inode **i_prevp; /* ptr to prev i_next */
246 struct xfs_mount *i_mount; /* fs mount struct ptr */ 246 struct xfs_mount *i_mount; /* fs mount struct ptr */
247 struct list_head i_reclaim; /* reclaim list */ 247 struct list_head i_reclaim; /* reclaim list */
248 struct bhv_desc i_bhv_desc; /* inode behavior descriptor*/ 248 struct bhv_desc i_bhv_desc; /* inode behavior descriptor*/
249 struct xfs_dquot *i_udquot; /* user dquot */ 249 struct xfs_dquot *i_udquot; /* user dquot */
250 struct xfs_dquot *i_gdquot; /* group dquot */ 250 struct xfs_dquot *i_gdquot; /* group dquot */
251 251
252 /* Inode location stuff */ 252 /* Inode location stuff */
253 xfs_ino_t i_ino; /* inode number (agno/agino)*/ 253 xfs_ino_t i_ino; /* inode number (agno/agino)*/
254 xfs_daddr_t i_blkno; /* blkno of inode buffer */ 254 xfs_daddr_t i_blkno; /* blkno of inode buffer */
255 ushort i_len; /* len of inode buffer */ 255 ushort i_len; /* len of inode buffer */
256 ushort i_boffset; /* off of inode in buffer */ 256 ushort i_boffset; /* off of inode in buffer */
257 257
258 /* Extent information. */ 258 /* Extent information. */
259 xfs_ifork_t *i_afp; /* attribute fork pointer */ 259 xfs_ifork_t *i_afp; /* attribute fork pointer */
260 xfs_ifork_t i_df; /* data fork */ 260 xfs_ifork_t i_df; /* data fork */
261 261
262 /* Transaction and locking information. */ 262 /* Transaction and locking information. */
263 struct xfs_trans *i_transp; /* ptr to owning transaction*/ 263 struct xfs_trans *i_transp; /* ptr to owning transaction*/
264 struct xfs_inode_log_item *i_itemp; /* logging information */ 264 struct xfs_inode_log_item *i_itemp; /* logging information */
265 mrlock_t i_lock; /* inode lock */ 265 mrlock_t i_lock; /* inode lock */
266 mrlock_t i_iolock; /* inode IO lock */ 266 mrlock_t i_iolock; /* inode IO lock */
267 sema_t i_flock; /* inode flush lock */ 267 sema_t i_flock; /* inode flush lock */
268 atomic_t i_pincount; /* inode pin count */ 268 atomic_t i_pincount; /* inode pin count */
269 wait_queue_head_t i_ipin_wait; /* inode pinning wait queue */ 269 wait_queue_head_t i_ipin_wait; /* inode pinning wait queue */
270 #ifdef HAVE_REFCACHE 270 #ifdef HAVE_REFCACHE
271 struct xfs_inode **i_refcache; /* ptr to entry in ref cache */ 271 struct xfs_inode **i_refcache; /* ptr to entry in ref cache */
272 struct xfs_inode *i_release; /* inode to unref */ 272 struct xfs_inode *i_release; /* inode to unref */
273 #endif 273 #endif
274 /* I/O state */ 274 /* I/O state */
275 xfs_iocore_t i_iocore; /* I/O core */ 275 xfs_iocore_t i_iocore; /* I/O core */
276 276
277 /* Miscellaneous state. */ 277 /* Miscellaneous state. */
278 unsigned short i_flags; /* see defined flags below */ 278 unsigned short i_flags; /* see defined flags below */
279 unsigned char i_update_core; /* timestamps/size is dirty */ 279 unsigned char i_update_core; /* timestamps/size is dirty */
280 unsigned char i_update_size; /* di_size field is dirty */ 280 unsigned char i_update_size; /* di_size field is dirty */
281 unsigned int i_gen; /* generation count */ 281 unsigned int i_gen; /* generation count */
282 unsigned int i_delayed_blks; /* count of delay alloc blks */ 282 unsigned int i_delayed_blks; /* count of delay alloc blks */
283 283
284 xfs_dinode_core_t i_d; /* most of ondisk inode */ 284 xfs_dinode_core_t i_d; /* most of ondisk inode */
285 xfs_chashlist_t *i_chash; /* cluster hash list header */ 285 xfs_chashlist_t *i_chash; /* cluster hash list header */
286 struct xfs_inode *i_cnext; /* cluster hash link forward */ 286 struct xfs_inode *i_cnext; /* cluster hash link forward */
287 struct xfs_inode *i_cprev; /* cluster hash link backward */ 287 struct xfs_inode *i_cprev; /* cluster hash link backward */
288 288
289 /* Trace buffers per inode. */ 289 /* Trace buffers per inode. */
290 #ifdef XFS_BMAP_TRACE 290 #ifdef XFS_BMAP_TRACE
291 struct ktrace *i_xtrace; /* inode extent list trace */ 291 struct ktrace *i_xtrace; /* inode extent list trace */
292 #endif 292 #endif
293 #ifdef XFS_BMBT_TRACE 293 #ifdef XFS_BMBT_TRACE
294 struct ktrace *i_btrace; /* inode bmap btree trace */ 294 struct ktrace *i_btrace; /* inode bmap btree trace */
295 #endif 295 #endif
296 #ifdef XFS_RW_TRACE 296 #ifdef XFS_RW_TRACE
297 struct ktrace *i_rwtrace; /* inode read/write trace */ 297 struct ktrace *i_rwtrace; /* inode read/write trace */
298 #endif 298 #endif
299 #ifdef XFS_ILOCK_TRACE 299 #ifdef XFS_ILOCK_TRACE
300 struct ktrace *i_lock_trace; /* inode lock/unlock trace */ 300 struct ktrace *i_lock_trace; /* inode lock/unlock trace */
301 #endif 301 #endif
302 #ifdef XFS_DIR2_TRACE 302 #ifdef XFS_DIR2_TRACE
303 struct ktrace *i_dir_trace; /* inode directory trace */ 303 struct ktrace *i_dir_trace; /* inode directory trace */
304 #endif 304 #endif
305 } xfs_inode_t; 305 } xfs_inode_t;
306 306
307 #endif /* __KERNEL__ */ 307 #endif /* __KERNEL__ */
308 308
309 309
310 /* 310 /*
311 * Fork handling. 311 * Fork handling.
312 */ 312 */
313 #define XFS_IFORK_PTR(ip,w) \ 313 #define XFS_IFORK_PTR(ip,w) \
314 ((w) == XFS_DATA_FORK ? &(ip)->i_df : (ip)->i_afp) 314 ((w) == XFS_DATA_FORK ? &(ip)->i_df : (ip)->i_afp)
315 #define XFS_IFORK_Q(ip) XFS_CFORK_Q(&(ip)->i_d) 315 #define XFS_IFORK_Q(ip) XFS_CFORK_Q(&(ip)->i_d)
316 #define XFS_IFORK_DSIZE(ip) XFS_CFORK_DSIZE(&ip->i_d, ip->i_mount) 316 #define XFS_IFORK_DSIZE(ip) XFS_CFORK_DSIZE(&ip->i_d, ip->i_mount)
317 #define XFS_IFORK_ASIZE(ip) XFS_CFORK_ASIZE(&ip->i_d, ip->i_mount) 317 #define XFS_IFORK_ASIZE(ip) XFS_CFORK_ASIZE(&ip->i_d, ip->i_mount)
318 #define XFS_IFORK_SIZE(ip,w) XFS_CFORK_SIZE(&ip->i_d, ip->i_mount, w) 318 #define XFS_IFORK_SIZE(ip,w) XFS_CFORK_SIZE(&ip->i_d, ip->i_mount, w)
319 #define XFS_IFORK_FORMAT(ip,w) XFS_CFORK_FORMAT(&ip->i_d, w) 319 #define XFS_IFORK_FORMAT(ip,w) XFS_CFORK_FORMAT(&ip->i_d, w)
320 #define XFS_IFORK_FMT_SET(ip,w,n) XFS_CFORK_FMT_SET(&ip->i_d, w, n) 320 #define XFS_IFORK_FMT_SET(ip,w,n) XFS_CFORK_FMT_SET(&ip->i_d, w, n)
321 #define XFS_IFORK_NEXTENTS(ip,w) XFS_CFORK_NEXTENTS(&ip->i_d, w) 321 #define XFS_IFORK_NEXTENTS(ip,w) XFS_CFORK_NEXTENTS(&ip->i_d, w)
322 #define XFS_IFORK_NEXT_SET(ip,w,n) XFS_CFORK_NEXT_SET(&ip->i_d, w, n) 322 #define XFS_IFORK_NEXT_SET(ip,w,n) XFS_CFORK_NEXT_SET(&ip->i_d, w, n)
323 323
324 324
325 #ifdef __KERNEL__ 325 #ifdef __KERNEL__
326 326
327 /* 327 /*
328 * In-core inode flags. 328 * In-core inode flags.
329 */ 329 */
330 #define XFS_IGRIO 0x0001 /* inode used for guaranteed rate i/o */ 330 #define XFS_IGRIO 0x0001 /* inode used for guaranteed rate i/o */
331 #define XFS_IUIOSZ 0x0002 /* inode i/o sizes have been explicitly set */ 331 #define XFS_IUIOSZ 0x0002 /* inode i/o sizes have been explicitly set */
332 #define XFS_IQUIESCE 0x0004 /* we have started quiescing for this inode */ 332 #define XFS_IQUIESCE 0x0004 /* we have started quiescing for this inode */
333 #define XFS_IRECLAIM 0x0008 /* we have started reclaiming this inode */ 333 #define XFS_IRECLAIM 0x0008 /* we have started reclaiming this inode */
334 #define XFS_ISTALE 0x0010 /* inode has been staled */ 334 #define XFS_ISTALE 0x0010 /* inode has been staled */
335 #define XFS_IRECLAIMABLE 0x0020 /* inode can be reclaimed */ 335 #define XFS_IRECLAIMABLE 0x0020 /* inode can be reclaimed */
336 #define XFS_INEW 0x0040 336 #define XFS_INEW 0x0040
337 337
338 /* 338 /*
339 * Flags for inode locking. 339 * Flags for inode locking.
340 */ 340 */
341 #define XFS_IOLOCK_EXCL 0x001 341 #define XFS_IOLOCK_EXCL 0x001
342 #define XFS_IOLOCK_SHARED 0x002 342 #define XFS_IOLOCK_SHARED 0x002
343 #define XFS_ILOCK_EXCL 0x004 343 #define XFS_ILOCK_EXCL 0x004
344 #define XFS_ILOCK_SHARED 0x008 344 #define XFS_ILOCK_SHARED 0x008
345 #define XFS_IUNLOCK_NONOTIFY 0x010 345 #define XFS_IUNLOCK_NONOTIFY 0x010
346 /* XFS_IOLOCK_NESTED 0x020 */ 346 /* XFS_IOLOCK_NESTED 0x020 */
347 #define XFS_EXTENT_TOKEN_RD 0x040 347 #define XFS_EXTENT_TOKEN_RD 0x040
348 #define XFS_SIZE_TOKEN_RD 0x080 348 #define XFS_SIZE_TOKEN_RD 0x080
349 #define XFS_EXTSIZE_RD (XFS_EXTENT_TOKEN_RD|XFS_SIZE_TOKEN_RD) 349 #define XFS_EXTSIZE_RD (XFS_EXTENT_TOKEN_RD|XFS_SIZE_TOKEN_RD)
350 #define XFS_WILLLEND 0x100 /* Always acquire tokens for lending */ 350 #define XFS_WILLLEND 0x100 /* Always acquire tokens for lending */
351 #define XFS_EXTENT_TOKEN_WR (XFS_EXTENT_TOKEN_RD | XFS_WILLLEND) 351 #define XFS_EXTENT_TOKEN_WR (XFS_EXTENT_TOKEN_RD | XFS_WILLLEND)
352 #define XFS_SIZE_TOKEN_WR (XFS_SIZE_TOKEN_RD | XFS_WILLLEND) 352 #define XFS_SIZE_TOKEN_WR (XFS_SIZE_TOKEN_RD | XFS_WILLLEND)
353 #define XFS_EXTSIZE_WR (XFS_EXTSIZE_RD | XFS_WILLLEND) 353 #define XFS_EXTSIZE_WR (XFS_EXTSIZE_RD | XFS_WILLLEND)
354 /* XFS_SIZE_TOKEN_WANT 0x200 */ 354 /* XFS_SIZE_TOKEN_WANT 0x200 */
355 355
356 #define XFS_LOCK_MASK \ 356 #define XFS_LOCK_MASK \
357 (XFS_IOLOCK_EXCL | XFS_IOLOCK_SHARED | XFS_ILOCK_EXCL | \ 357 (XFS_IOLOCK_EXCL | XFS_IOLOCK_SHARED | XFS_ILOCK_EXCL | \
358 XFS_ILOCK_SHARED | XFS_EXTENT_TOKEN_RD | XFS_SIZE_TOKEN_RD | \ 358 XFS_ILOCK_SHARED | XFS_EXTENT_TOKEN_RD | XFS_SIZE_TOKEN_RD | \
359 XFS_WILLLEND) 359 XFS_WILLLEND)
360 360
361 /* 361 /*
362 * Flags for xfs_iflush() 362 * Flags for xfs_iflush()
363 */ 363 */
364 #define XFS_IFLUSH_DELWRI_ELSE_SYNC 1 364 #define XFS_IFLUSH_DELWRI_ELSE_SYNC 1
365 #define XFS_IFLUSH_DELWRI_ELSE_ASYNC 2 365 #define XFS_IFLUSH_DELWRI_ELSE_ASYNC 2
366 #define XFS_IFLUSH_SYNC 3 366 #define XFS_IFLUSH_SYNC 3
367 #define XFS_IFLUSH_ASYNC 4 367 #define XFS_IFLUSH_ASYNC 4
368 #define XFS_IFLUSH_DELWRI 5 368 #define XFS_IFLUSH_DELWRI 5
369 369
370 /* 370 /*
371 * Flags for xfs_itruncate_start(). 371 * Flags for xfs_itruncate_start().
372 */ 372 */
373 #define XFS_ITRUNC_DEFINITE 0x1 373 #define XFS_ITRUNC_DEFINITE 0x1
374 #define XFS_ITRUNC_MAYBE 0x2 374 #define XFS_ITRUNC_MAYBE 0x2
375 375
376 #define XFS_ITOV(ip) BHV_TO_VNODE(XFS_ITOBHV(ip)) 376 #define XFS_ITOV(ip) BHV_TO_VNODE(XFS_ITOBHV(ip))
377 #define XFS_ITOV_NULL(ip) BHV_TO_VNODE_NULL(XFS_ITOBHV(ip)) 377 #define XFS_ITOV_NULL(ip) BHV_TO_VNODE_NULL(XFS_ITOBHV(ip))
378 #define XFS_ITOBHV(ip) ((struct bhv_desc *)(&((ip)->i_bhv_desc))) 378 #define XFS_ITOBHV(ip) ((struct bhv_desc *)(&((ip)->i_bhv_desc)))
379 #define XFS_BHVTOI(bhvp) ((xfs_inode_t *)((char *)(bhvp) - \ 379 #define XFS_BHVTOI(bhvp) ((xfs_inode_t *)((char *)(bhvp) - \
380 (char *)&(((xfs_inode_t *)0)->i_bhv_desc))) 380 (char *)&(((xfs_inode_t *)0)->i_bhv_desc)))
381 #define BHV_IS_XFS(bdp) (BHV_OPS(bdp) == &xfs_vnodeops) 381 #define BHV_IS_XFS(bdp) (BHV_OPS(bdp) == &xfs_vnodeops)
382 382
383 /* 383 /*
384 * For multiple groups support: if S_ISGID bit is set in the parent 384 * For multiple groups support: if S_ISGID bit is set in the parent
385 * directory, group of new file is set to that of the parent, and 385 * directory, group of new file is set to that of the parent, and
386 * new subdirectory gets S_ISGID bit from parent. 386 * new subdirectory gets S_ISGID bit from parent.
387 */ 387 */
388 #define XFS_INHERIT_GID(pip, vfsp) \ 388 #define XFS_INHERIT_GID(pip, vfsp) \
389 (((vfsp)->vfs_flag & VFS_GRPID) || ((pip)->i_d.di_mode & S_ISGID)) 389 (((vfsp)->vfs_flag & VFS_GRPID) || ((pip)->i_d.di_mode & S_ISGID))
390 390
391 /* 391 /*
392 * xfs_iget.c prototypes. 392 * Flags for xfs_iget()
393 */ 393 */
394 #define XFS_IGET_CREATE 0x1
395 #define XFS_IGET_BULKSTAT 0x2
394 396
395 #define IGET_CREATE 1 397 /*
396 398 * xfs_iget.c prototypes.
399 */
397 void xfs_ihash_init(struct xfs_mount *); 400 void xfs_ihash_init(struct xfs_mount *);
398 void xfs_ihash_free(struct xfs_mount *); 401 void xfs_ihash_free(struct xfs_mount *);
399 void xfs_chash_init(struct xfs_mount *); 402 void xfs_chash_init(struct xfs_mount *);
400 void xfs_chash_free(struct xfs_mount *); 403 void xfs_chash_free(struct xfs_mount *);
401 xfs_inode_t *xfs_inode_incore(struct xfs_mount *, xfs_ino_t, 404 xfs_inode_t *xfs_inode_incore(struct xfs_mount *, xfs_ino_t,
402 struct xfs_trans *); 405 struct xfs_trans *);
403 void xfs_inode_lock_init(xfs_inode_t *, struct bhv_vnode *); 406 void xfs_inode_lock_init(xfs_inode_t *, struct bhv_vnode *);
404 int xfs_iget(struct xfs_mount *, struct xfs_trans *, xfs_ino_t, 407 int xfs_iget(struct xfs_mount *, struct xfs_trans *, xfs_ino_t,
405 uint, uint, xfs_inode_t **, xfs_daddr_t); 408 uint, uint, xfs_inode_t **, xfs_daddr_t);
406 void xfs_iput(xfs_inode_t *, uint); 409 void xfs_iput(xfs_inode_t *, uint);
407 void xfs_iput_new(xfs_inode_t *, uint); 410 void xfs_iput_new(xfs_inode_t *, uint);
408 void xfs_ilock(xfs_inode_t *, uint); 411 void xfs_ilock(xfs_inode_t *, uint);
409 int xfs_ilock_nowait(xfs_inode_t *, uint); 412 int xfs_ilock_nowait(xfs_inode_t *, uint);
410 void xfs_iunlock(xfs_inode_t *, uint); 413 void xfs_iunlock(xfs_inode_t *, uint);
411 void xfs_ilock_demote(xfs_inode_t *, uint); 414 void xfs_ilock_demote(xfs_inode_t *, uint);
412 void xfs_iflock(xfs_inode_t *); 415 void xfs_iflock(xfs_inode_t *);
413 int xfs_iflock_nowait(xfs_inode_t *); 416 int xfs_iflock_nowait(xfs_inode_t *);
414 uint xfs_ilock_map_shared(xfs_inode_t *); 417 uint xfs_ilock_map_shared(xfs_inode_t *);
415 void xfs_iunlock_map_shared(xfs_inode_t *, uint); 418 void xfs_iunlock_map_shared(xfs_inode_t *, uint);
416 void xfs_ifunlock(xfs_inode_t *); 419 void xfs_ifunlock(xfs_inode_t *);
417 void xfs_ireclaim(xfs_inode_t *); 420 void xfs_ireclaim(xfs_inode_t *);
418 int xfs_finish_reclaim(xfs_inode_t *, int, int); 421 int xfs_finish_reclaim(xfs_inode_t *, int, int);
419 int xfs_finish_reclaim_all(struct xfs_mount *, int); 422 int xfs_finish_reclaim_all(struct xfs_mount *, int);
420 423
421 /* 424 /*
422 * xfs_inode.c prototypes. 425 * xfs_inode.c prototypes.
423 */ 426 */
424 int xfs_itobp(struct xfs_mount *, struct xfs_trans *, 427 int xfs_itobp(struct xfs_mount *, struct xfs_trans *,
425 xfs_inode_t *, xfs_dinode_t **, struct xfs_buf **, 428 xfs_inode_t *, xfs_dinode_t **, struct xfs_buf **,
426 xfs_daddr_t, uint); 429 xfs_daddr_t, uint);
427 int xfs_iread(struct xfs_mount *, struct xfs_trans *, xfs_ino_t, 430 int xfs_iread(struct xfs_mount *, struct xfs_trans *, xfs_ino_t,
428 xfs_inode_t **, xfs_daddr_t); 431 xfs_inode_t **, xfs_daddr_t, uint);
429 int xfs_iread_extents(struct xfs_trans *, xfs_inode_t *, int); 432 int xfs_iread_extents(struct xfs_trans *, xfs_inode_t *, int);
430 int xfs_ialloc(struct xfs_trans *, xfs_inode_t *, mode_t, 433 int xfs_ialloc(struct xfs_trans *, xfs_inode_t *, mode_t,
431 xfs_nlink_t, xfs_dev_t, struct cred *, xfs_prid_t, 434 xfs_nlink_t, xfs_dev_t, struct cred *, xfs_prid_t,
432 int, struct xfs_buf **, boolean_t *, xfs_inode_t **); 435 int, struct xfs_buf **, boolean_t *, xfs_inode_t **);
433 void xfs_xlate_dinode_core(xfs_caddr_t, struct xfs_dinode_core *, 436 void xfs_xlate_dinode_core(xfs_caddr_t, struct xfs_dinode_core *,
434 int); 437 int);
435 uint xfs_ip2xflags(struct xfs_inode *); 438 uint xfs_ip2xflags(struct xfs_inode *);
436 uint xfs_dic2xflags(struct xfs_dinode_core *); 439 uint xfs_dic2xflags(struct xfs_dinode_core *);
437 int xfs_ifree(struct xfs_trans *, xfs_inode_t *, 440 int xfs_ifree(struct xfs_trans *, xfs_inode_t *,
438 struct xfs_bmap_free *); 441 struct xfs_bmap_free *);
439 void xfs_itruncate_start(xfs_inode_t *, uint, xfs_fsize_t); 442 void xfs_itruncate_start(xfs_inode_t *, uint, xfs_fsize_t);
440 int xfs_itruncate_finish(struct xfs_trans **, xfs_inode_t *, 443 int xfs_itruncate_finish(struct xfs_trans **, xfs_inode_t *,
441 xfs_fsize_t, int, int); 444 xfs_fsize_t, int, int);
442 int xfs_iunlink(struct xfs_trans *, xfs_inode_t *); 445 int xfs_iunlink(struct xfs_trans *, xfs_inode_t *);
443 int xfs_igrow_start(xfs_inode_t *, xfs_fsize_t, struct cred *); 446 int xfs_igrow_start(xfs_inode_t *, xfs_fsize_t, struct cred *);
444 void xfs_igrow_finish(struct xfs_trans *, xfs_inode_t *, 447 void xfs_igrow_finish(struct xfs_trans *, xfs_inode_t *,
445 xfs_fsize_t, int); 448 xfs_fsize_t, int);
446 449
447 void xfs_idestroy_fork(xfs_inode_t *, int); 450 void xfs_idestroy_fork(xfs_inode_t *, int);
448 void xfs_idestroy(xfs_inode_t *); 451 void xfs_idestroy(xfs_inode_t *);
449 void xfs_idata_realloc(xfs_inode_t *, int, int); 452 void xfs_idata_realloc(xfs_inode_t *, int, int);
450 void xfs_iextract(xfs_inode_t *); 453 void xfs_iextract(xfs_inode_t *);
451 void xfs_iext_realloc(xfs_inode_t *, int, int); 454 void xfs_iext_realloc(xfs_inode_t *, int, int);
452 void xfs_iroot_realloc(xfs_inode_t *, int, int); 455 void xfs_iroot_realloc(xfs_inode_t *, int, int);
453 void xfs_ipin(xfs_inode_t *); 456 void xfs_ipin(xfs_inode_t *);
454 void xfs_iunpin(xfs_inode_t *); 457 void xfs_iunpin(xfs_inode_t *);
455 int xfs_iextents_copy(xfs_inode_t *, xfs_bmbt_rec_t *, int); 458 int xfs_iextents_copy(xfs_inode_t *, xfs_bmbt_rec_t *, int);
456 int xfs_iflush(xfs_inode_t *, uint); 459 int xfs_iflush(xfs_inode_t *, uint);
457 void xfs_iflush_all(struct xfs_mount *); 460 void xfs_iflush_all(struct xfs_mount *);
458 int xfs_iaccess(xfs_inode_t *, mode_t, cred_t *); 461 int xfs_iaccess(xfs_inode_t *, mode_t, cred_t *);
459 uint xfs_iroundup(uint); 462 uint xfs_iroundup(uint);
460 void xfs_ichgtime(xfs_inode_t *, int); 463 void xfs_ichgtime(xfs_inode_t *, int);
461 xfs_fsize_t xfs_file_last_byte(xfs_inode_t *); 464 xfs_fsize_t xfs_file_last_byte(xfs_inode_t *);
462 void xfs_lock_inodes(xfs_inode_t **, int, int, uint); 465 void xfs_lock_inodes(xfs_inode_t **, int, int, uint);
463 466
464 xfs_inode_t *xfs_vtoi(struct bhv_vnode *vp); 467 xfs_inode_t *xfs_vtoi(struct bhv_vnode *vp);
465 468
466 void xfs_synchronize_atime(xfs_inode_t *); 469 void xfs_synchronize_atime(xfs_inode_t *);
467 470
468 xfs_bmbt_rec_t *xfs_iext_get_ext(xfs_ifork_t *, xfs_extnum_t); 471 xfs_bmbt_rec_t *xfs_iext_get_ext(xfs_ifork_t *, xfs_extnum_t);
469 void xfs_iext_insert(xfs_ifork_t *, xfs_extnum_t, xfs_extnum_t, 472 void xfs_iext_insert(xfs_ifork_t *, xfs_extnum_t, xfs_extnum_t,
470 xfs_bmbt_irec_t *); 473 xfs_bmbt_irec_t *);
471 void xfs_iext_add(xfs_ifork_t *, xfs_extnum_t, int); 474 void xfs_iext_add(xfs_ifork_t *, xfs_extnum_t, int);
472 void xfs_iext_add_indirect_multi(xfs_ifork_t *, int, xfs_extnum_t, int); 475 void xfs_iext_add_indirect_multi(xfs_ifork_t *, int, xfs_extnum_t, int);
473 void xfs_iext_remove(xfs_ifork_t *, xfs_extnum_t, int); 476 void xfs_iext_remove(xfs_ifork_t *, xfs_extnum_t, int);
474 void xfs_iext_remove_inline(xfs_ifork_t *, xfs_extnum_t, int); 477 void xfs_iext_remove_inline(xfs_ifork_t *, xfs_extnum_t, int);
475 void xfs_iext_remove_direct(xfs_ifork_t *, xfs_extnum_t, int); 478 void xfs_iext_remove_direct(xfs_ifork_t *, xfs_extnum_t, int);
476 void xfs_iext_remove_indirect(xfs_ifork_t *, xfs_extnum_t, int); 479 void xfs_iext_remove_indirect(xfs_ifork_t *, xfs_extnum_t, int);
477 void xfs_iext_realloc_direct(xfs_ifork_t *, int); 480 void xfs_iext_realloc_direct(xfs_ifork_t *, int);
478 void xfs_iext_realloc_indirect(xfs_ifork_t *, int); 481 void xfs_iext_realloc_indirect(xfs_ifork_t *, int);
479 void xfs_iext_indirect_to_direct(xfs_ifork_t *); 482 void xfs_iext_indirect_to_direct(xfs_ifork_t *);
480 void xfs_iext_direct_to_inline(xfs_ifork_t *, xfs_extnum_t); 483 void xfs_iext_direct_to_inline(xfs_ifork_t *, xfs_extnum_t);
481 void xfs_iext_inline_to_direct(xfs_ifork_t *, int); 484 void xfs_iext_inline_to_direct(xfs_ifork_t *, int);
482 void xfs_iext_destroy(xfs_ifork_t *); 485 void xfs_iext_destroy(xfs_ifork_t *);
483 xfs_bmbt_rec_t *xfs_iext_bno_to_ext(xfs_ifork_t *, xfs_fileoff_t, int *); 486 xfs_bmbt_rec_t *xfs_iext_bno_to_ext(xfs_ifork_t *, xfs_fileoff_t, int *);
484 xfs_ext_irec_t *xfs_iext_bno_to_irec(xfs_ifork_t *, xfs_fileoff_t, int *); 487 xfs_ext_irec_t *xfs_iext_bno_to_irec(xfs_ifork_t *, xfs_fileoff_t, int *);
485 xfs_ext_irec_t *xfs_iext_idx_to_irec(xfs_ifork_t *, xfs_extnum_t *, int *, int); 488 xfs_ext_irec_t *xfs_iext_idx_to_irec(xfs_ifork_t *, xfs_extnum_t *, int *, int);
486 void xfs_iext_irec_init(xfs_ifork_t *); 489 void xfs_iext_irec_init(xfs_ifork_t *);
487 xfs_ext_irec_t *xfs_iext_irec_new(xfs_ifork_t *, int); 490 xfs_ext_irec_t *xfs_iext_irec_new(xfs_ifork_t *, int);
488 void xfs_iext_irec_remove(xfs_ifork_t *, int); 491 void xfs_iext_irec_remove(xfs_ifork_t *, int);
489 void xfs_iext_irec_compact(xfs_ifork_t *); 492 void xfs_iext_irec_compact(xfs_ifork_t *);
490 void xfs_iext_irec_compact_pages(xfs_ifork_t *); 493 void xfs_iext_irec_compact_pages(xfs_ifork_t *);
491 void xfs_iext_irec_compact_full(xfs_ifork_t *); 494 void xfs_iext_irec_compact_full(xfs_ifork_t *);
492 void xfs_iext_irec_update_extoffs(xfs_ifork_t *, int, int); 495 void xfs_iext_irec_update_extoffs(xfs_ifork_t *, int, int);
493 496
494 #define xfs_ipincount(ip) ((unsigned int) atomic_read(&ip->i_pincount)) 497 #define xfs_ipincount(ip) ((unsigned int) atomic_read(&ip->i_pincount))
495 498
496 #ifdef DEBUG 499 #ifdef DEBUG
497 void xfs_isize_check(struct xfs_mount *, xfs_inode_t *, xfs_fsize_t); 500 void xfs_isize_check(struct xfs_mount *, xfs_inode_t *, xfs_fsize_t);
498 #else /* DEBUG */ 501 #else /* DEBUG */
499 #define xfs_isize_check(mp, ip, isize) 502 #define xfs_isize_check(mp, ip, isize)
500 #endif /* DEBUG */ 503 #endif /* DEBUG */
501 504
502 #if defined(DEBUG) 505 #if defined(DEBUG)
503 void xfs_inobp_check(struct xfs_mount *, struct xfs_buf *); 506 void xfs_inobp_check(struct xfs_mount *, struct xfs_buf *);
504 #else 507 #else
505 #define xfs_inobp_check(mp, bp) 508 #define xfs_inobp_check(mp, bp)
506 #endif /* DEBUG */ 509 #endif /* DEBUG */
507 510
508 extern struct kmem_zone *xfs_chashlist_zone; 511 extern struct kmem_zone *xfs_chashlist_zone;
509 extern struct kmem_zone *xfs_ifork_zone; 512 extern struct kmem_zone *xfs_ifork_zone;
510 extern struct kmem_zone *xfs_inode_zone; 513 extern struct kmem_zone *xfs_inode_zone;
511 extern struct kmem_zone *xfs_ili_zone; 514 extern struct kmem_zone *xfs_ili_zone;
512 515
513 #endif /* __KERNEL__ */ 516 #endif /* __KERNEL__ */
514 517
515 #endif /* __XFS_INODE_H__ */ 518 #endif /* __XFS_INODE_H__ */
516 519
1 /* 1 /*
2 * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc. 2 * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
3 * All Rights Reserved. 3 * All Rights Reserved.
4 * 4 *
5 * This program is free software; you can redistribute it and/or 5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as 6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation. 7 * published by the Free Software Foundation.
8 * 8 *
9 * This program is distributed in the hope that it would be useful, 9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details. 12 * GNU General Public License for more details.
13 * 13 *
14 * You should have received a copy of the GNU General Public License 14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation, 15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */ 17 */
18 #include "xfs.h" 18 #include "xfs.h"
19 #include "xfs_fs.h" 19 #include "xfs_fs.h"
20 #include "xfs_types.h" 20 #include "xfs_types.h"
21 #include "xfs_bit.h" 21 #include "xfs_bit.h"
22 #include "xfs_log.h" 22 #include "xfs_log.h"
23 #include "xfs_inum.h" 23 #include "xfs_inum.h"
24 #include "xfs_trans.h" 24 #include "xfs_trans.h"
25 #include "xfs_sb.h" 25 #include "xfs_sb.h"
26 #include "xfs_ag.h" 26 #include "xfs_ag.h"
27 #include "xfs_dir2.h" 27 #include "xfs_dir2.h"
28 #include "xfs_dmapi.h" 28 #include "xfs_dmapi.h"
29 #include "xfs_mount.h" 29 #include "xfs_mount.h"
30 #include "xfs_bmap_btree.h" 30 #include "xfs_bmap_btree.h"
31 #include "xfs_alloc_btree.h" 31 #include "xfs_alloc_btree.h"
32 #include "xfs_ialloc_btree.h" 32 #include "xfs_ialloc_btree.h"
33 #include "xfs_dir2_sf.h" 33 #include "xfs_dir2_sf.h"
34 #include "xfs_attr_sf.h" 34 #include "xfs_attr_sf.h"
35 #include "xfs_dinode.h" 35 #include "xfs_dinode.h"
36 #include "xfs_inode.h" 36 #include "xfs_inode.h"
37 #include "xfs_ialloc.h" 37 #include "xfs_ialloc.h"
38 #include "xfs_itable.h" 38 #include "xfs_itable.h"
39 #include "xfs_error.h" 39 #include "xfs_error.h"
40 #include "xfs_btree.h" 40 #include "xfs_btree.h"
41 41
42 STATIC int 42 STATIC int
43 xfs_bulkstat_one_iget( 43 xfs_bulkstat_one_iget(
44 xfs_mount_t *mp, /* mount point for filesystem */ 44 xfs_mount_t *mp, /* mount point for filesystem */
45 xfs_ino_t ino, /* inode number to get data for */ 45 xfs_ino_t ino, /* inode number to get data for */
46 xfs_daddr_t bno, /* starting bno of inode cluster */ 46 xfs_daddr_t bno, /* starting bno of inode cluster */
47 xfs_bstat_t *buf, /* return buffer */ 47 xfs_bstat_t *buf, /* return buffer */
48 int *stat) /* BULKSTAT_RV_... */ 48 int *stat) /* BULKSTAT_RV_... */
49 { 49 {
50 xfs_dinode_core_t *dic; /* dinode core info pointer */ 50 xfs_dinode_core_t *dic; /* dinode core info pointer */
51 xfs_inode_t *ip; /* incore inode pointer */ 51 xfs_inode_t *ip; /* incore inode pointer */
52 bhv_vnode_t *vp; 52 bhv_vnode_t *vp;
53 int error; 53 int error;
54 54
55 error = xfs_iget(mp, NULL, ino, 0, XFS_ILOCK_SHARED, &ip, bno); 55 error = xfs_iget(mp, NULL, ino,
56 XFS_IGET_BULKSTAT, XFS_ILOCK_SHARED, &ip, bno);
56 if (error) { 57 if (error) {
57 *stat = BULKSTAT_RV_NOTHING; 58 *stat = BULKSTAT_RV_NOTHING;
58 return error; 59 return error;
59 } 60 }
60 61
61 ASSERT(ip != NULL); 62 ASSERT(ip != NULL);
62 ASSERT(ip->i_blkno != (xfs_daddr_t)0); 63 ASSERT(ip->i_blkno != (xfs_daddr_t)0);
63 if (ip->i_d.di_mode == 0) { 64 if (ip->i_d.di_mode == 0) {
64 *stat = BULKSTAT_RV_NOTHING; 65 *stat = BULKSTAT_RV_NOTHING;
65 error = XFS_ERROR(ENOENT); 66 error = XFS_ERROR(ENOENT);
66 goto out_iput; 67 goto out_iput;
67 } 68 }
68 69
69 vp = XFS_ITOV(ip); 70 vp = XFS_ITOV(ip);
70 dic = &ip->i_d; 71 dic = &ip->i_d;
71 72
72 /* xfs_iget returns the following without needing 73 /* xfs_iget returns the following without needing
73 * further change. 74 * further change.
74 */ 75 */
75 buf->bs_nlink = dic->di_nlink; 76 buf->bs_nlink = dic->di_nlink;
76 buf->bs_projid = dic->di_projid; 77 buf->bs_projid = dic->di_projid;
77 buf->bs_ino = ino; 78 buf->bs_ino = ino;
78 buf->bs_mode = dic->di_mode; 79 buf->bs_mode = dic->di_mode;
79 buf->bs_uid = dic->di_uid; 80 buf->bs_uid = dic->di_uid;
80 buf->bs_gid = dic->di_gid; 81 buf->bs_gid = dic->di_gid;
81 buf->bs_size = dic->di_size; 82 buf->bs_size = dic->di_size;
82 vn_atime_to_bstime(vp, &buf->bs_atime); 83 vn_atime_to_bstime(vp, &buf->bs_atime);
83 buf->bs_mtime.tv_sec = dic->di_mtime.t_sec; 84 buf->bs_mtime.tv_sec = dic->di_mtime.t_sec;
84 buf->bs_mtime.tv_nsec = dic->di_mtime.t_nsec; 85 buf->bs_mtime.tv_nsec = dic->di_mtime.t_nsec;
85 buf->bs_ctime.tv_sec = dic->di_ctime.t_sec; 86 buf->bs_ctime.tv_sec = dic->di_ctime.t_sec;
86 buf->bs_ctime.tv_nsec = dic->di_ctime.t_nsec; 87 buf->bs_ctime.tv_nsec = dic->di_ctime.t_nsec;
87 buf->bs_xflags = xfs_ip2xflags(ip); 88 buf->bs_xflags = xfs_ip2xflags(ip);
88 buf->bs_extsize = dic->di_extsize << mp->m_sb.sb_blocklog; 89 buf->bs_extsize = dic->di_extsize << mp->m_sb.sb_blocklog;
89 buf->bs_extents = dic->di_nextents; 90 buf->bs_extents = dic->di_nextents;
90 buf->bs_gen = dic->di_gen; 91 buf->bs_gen = dic->di_gen;
91 memset(buf->bs_pad, 0, sizeof(buf->bs_pad)); 92 memset(buf->bs_pad, 0, sizeof(buf->bs_pad));
92 buf->bs_dmevmask = dic->di_dmevmask; 93 buf->bs_dmevmask = dic->di_dmevmask;
93 buf->bs_dmstate = dic->di_dmstate; 94 buf->bs_dmstate = dic->di_dmstate;
94 buf->bs_aextents = dic->di_anextents; 95 buf->bs_aextents = dic->di_anextents;
95 96
96 switch (dic->di_format) { 97 switch (dic->di_format) {
97 case XFS_DINODE_FMT_DEV: 98 case XFS_DINODE_FMT_DEV:
98 buf->bs_rdev = ip->i_df.if_u2.if_rdev; 99 buf->bs_rdev = ip->i_df.if_u2.if_rdev;
99 buf->bs_blksize = BLKDEV_IOSIZE; 100 buf->bs_blksize = BLKDEV_IOSIZE;
100 buf->bs_blocks = 0; 101 buf->bs_blocks = 0;
101 break; 102 break;
102 case XFS_DINODE_FMT_LOCAL: 103 case XFS_DINODE_FMT_LOCAL:
103 case XFS_DINODE_FMT_UUID: 104 case XFS_DINODE_FMT_UUID:
104 buf->bs_rdev = 0; 105 buf->bs_rdev = 0;
105 buf->bs_blksize = mp->m_sb.sb_blocksize; 106 buf->bs_blksize = mp->m_sb.sb_blocksize;
106 buf->bs_blocks = 0; 107 buf->bs_blocks = 0;
107 break; 108 break;
108 case XFS_DINODE_FMT_EXTENTS: 109 case XFS_DINODE_FMT_EXTENTS:
109 case XFS_DINODE_FMT_BTREE: 110 case XFS_DINODE_FMT_BTREE:
110 buf->bs_rdev = 0; 111 buf->bs_rdev = 0;
111 buf->bs_blksize = mp->m_sb.sb_blocksize; 112 buf->bs_blksize = mp->m_sb.sb_blocksize;
112 buf->bs_blocks = dic->di_nblocks + ip->i_delayed_blks; 113 buf->bs_blocks = dic->di_nblocks + ip->i_delayed_blks;
113 break; 114 break;
114 } 115 }
115 116
116 out_iput: 117 out_iput:
117 xfs_iput(ip, XFS_ILOCK_SHARED); 118 xfs_iput(ip, XFS_ILOCK_SHARED);
118 return error; 119 return error;
119 } 120 }
120 121
121 STATIC int 122 STATIC int
122 xfs_bulkstat_one_dinode( 123 xfs_bulkstat_one_dinode(
123 xfs_mount_t *mp, /* mount point for filesystem */ 124 xfs_mount_t *mp, /* mount point for filesystem */
124 xfs_ino_t ino, /* inode number to get data for */ 125 xfs_ino_t ino, /* inode number to get data for */
125 xfs_dinode_t *dip, /* dinode inode pointer */ 126 xfs_dinode_t *dip, /* dinode inode pointer */
126 xfs_bstat_t *buf) /* return buffer */ 127 xfs_bstat_t *buf) /* return buffer */
127 { 128 {
128 xfs_dinode_core_t *dic; /* dinode core info pointer */ 129 xfs_dinode_core_t *dic; /* dinode core info pointer */
129 130
130 dic = &dip->di_core; 131 dic = &dip->di_core;
131 132
132 /* 133 /*
133 * The inode format changed when we moved the link count and 134 * The inode format changed when we moved the link count and
134 * made it 32 bits long. If this is an old format inode, 135 * made it 32 bits long. If this is an old format inode,
135 * convert it in memory to look like a new one. If it gets 136 * convert it in memory to look like a new one. If it gets
136 * flushed to disk we will convert back before flushing or 137 * flushed to disk we will convert back before flushing or
137 * logging it. We zero out the new projid field and the old link 138 * logging it. We zero out the new projid field and the old link
138 * count field. We'll handle clearing the pad field (the remains 139 * count field. We'll handle clearing the pad field (the remains
139 * of the old uuid field) when we actually convert the inode to 140 * of the old uuid field) when we actually convert the inode to
140 * the new format. We don't change the version number so that we 141 * the new format. We don't change the version number so that we
141 * can distinguish this from a real new format inode. 142 * can distinguish this from a real new format inode.
142 */ 143 */
143 if (INT_GET(dic->di_version, ARCH_CONVERT) == XFS_DINODE_VERSION_1) { 144 if (INT_GET(dic->di_version, ARCH_CONVERT) == XFS_DINODE_VERSION_1) {
144 buf->bs_nlink = INT_GET(dic->di_onlink, ARCH_CONVERT); 145 buf->bs_nlink = INT_GET(dic->di_onlink, ARCH_CONVERT);
145 buf->bs_projid = 0; 146 buf->bs_projid = 0;
146 } else { 147 } else {
147 buf->bs_nlink = INT_GET(dic->di_nlink, ARCH_CONVERT); 148 buf->bs_nlink = INT_GET(dic->di_nlink, ARCH_CONVERT);
148 buf->bs_projid = INT_GET(dic->di_projid, ARCH_CONVERT); 149 buf->bs_projid = INT_GET(dic->di_projid, ARCH_CONVERT);
149 } 150 }
150 151
151 buf->bs_ino = ino; 152 buf->bs_ino = ino;
152 buf->bs_mode = INT_GET(dic->di_mode, ARCH_CONVERT); 153 buf->bs_mode = INT_GET(dic->di_mode, ARCH_CONVERT);
153 buf->bs_uid = INT_GET(dic->di_uid, ARCH_CONVERT); 154 buf->bs_uid = INT_GET(dic->di_uid, ARCH_CONVERT);
154 buf->bs_gid = INT_GET(dic->di_gid, ARCH_CONVERT); 155 buf->bs_gid = INT_GET(dic->di_gid, ARCH_CONVERT);
155 buf->bs_size = INT_GET(dic->di_size, ARCH_CONVERT); 156 buf->bs_size = INT_GET(dic->di_size, ARCH_CONVERT);
156 buf->bs_atime.tv_sec = INT_GET(dic->di_atime.t_sec, ARCH_CONVERT); 157 buf->bs_atime.tv_sec = INT_GET(dic->di_atime.t_sec, ARCH_CONVERT);
157 buf->bs_atime.tv_nsec = INT_GET(dic->di_atime.t_nsec, ARCH_CONVERT); 158 buf->bs_atime.tv_nsec = INT_GET(dic->di_atime.t_nsec, ARCH_CONVERT);
158 buf->bs_mtime.tv_sec = INT_GET(dic->di_mtime.t_sec, ARCH_CONVERT); 159 buf->bs_mtime.tv_sec = INT_GET(dic->di_mtime.t_sec, ARCH_CONVERT);
159 buf->bs_mtime.tv_nsec = INT_GET(dic->di_mtime.t_nsec, ARCH_CONVERT); 160 buf->bs_mtime.tv_nsec = INT_GET(dic->di_mtime.t_nsec, ARCH_CONVERT);
160 buf->bs_ctime.tv_sec = INT_GET(dic->di_ctime.t_sec, ARCH_CONVERT); 161 buf->bs_ctime.tv_sec = INT_GET(dic->di_ctime.t_sec, ARCH_CONVERT);
161 buf->bs_ctime.tv_nsec = INT_GET(dic->di_ctime.t_nsec, ARCH_CONVERT); 162 buf->bs_ctime.tv_nsec = INT_GET(dic->di_ctime.t_nsec, ARCH_CONVERT);
162 buf->bs_xflags = xfs_dic2xflags(dic); 163 buf->bs_xflags = xfs_dic2xflags(dic);
163 buf->bs_extsize = INT_GET(dic->di_extsize, ARCH_CONVERT) << mp->m_sb.sb_blocklog; 164 buf->bs_extsize = INT_GET(dic->di_extsize, ARCH_CONVERT) << mp->m_sb.sb_blocklog;
164 buf->bs_extents = INT_GET(dic->di_nextents, ARCH_CONVERT); 165 buf->bs_extents = INT_GET(dic->di_nextents, ARCH_CONVERT);
165 buf->bs_gen = INT_GET(dic->di_gen, ARCH_CONVERT); 166 buf->bs_gen = INT_GET(dic->di_gen, ARCH_CONVERT);
166 memset(buf->bs_pad, 0, sizeof(buf->bs_pad)); 167 memset(buf->bs_pad, 0, sizeof(buf->bs_pad));
167 buf->bs_dmevmask = INT_GET(dic->di_dmevmask, ARCH_CONVERT); 168 buf->bs_dmevmask = INT_GET(dic->di_dmevmask, ARCH_CONVERT);
168 buf->bs_dmstate = INT_GET(dic->di_dmstate, ARCH_CONVERT); 169 buf->bs_dmstate = INT_GET(dic->di_dmstate, ARCH_CONVERT);
169 buf->bs_aextents = INT_GET(dic->di_anextents, ARCH_CONVERT); 170 buf->bs_aextents = INT_GET(dic->di_anextents, ARCH_CONVERT);
170 171
171 switch (INT_GET(dic->di_format, ARCH_CONVERT)) { 172 switch (INT_GET(dic->di_format, ARCH_CONVERT)) {
172 case XFS_DINODE_FMT_DEV: 173 case XFS_DINODE_FMT_DEV:
173 buf->bs_rdev = INT_GET(dip->di_u.di_dev, ARCH_CONVERT); 174 buf->bs_rdev = INT_GET(dip->di_u.di_dev, ARCH_CONVERT);
174 buf->bs_blksize = BLKDEV_IOSIZE; 175 buf->bs_blksize = BLKDEV_IOSIZE;
175 buf->bs_blocks = 0; 176 buf->bs_blocks = 0;
176 break; 177 break;
177 case XFS_DINODE_FMT_LOCAL: 178 case XFS_DINODE_FMT_LOCAL:
178 case XFS_DINODE_FMT_UUID: 179 case XFS_DINODE_FMT_UUID:
179 buf->bs_rdev = 0; 180 buf->bs_rdev = 0;
180 buf->bs_blksize = mp->m_sb.sb_blocksize; 181 buf->bs_blksize = mp->m_sb.sb_blocksize;
181 buf->bs_blocks = 0; 182 buf->bs_blocks = 0;
182 break; 183 break;
183 case XFS_DINODE_FMT_EXTENTS: 184 case XFS_DINODE_FMT_EXTENTS:
184 case XFS_DINODE_FMT_BTREE: 185 case XFS_DINODE_FMT_BTREE:
185 buf->bs_rdev = 0; 186 buf->bs_rdev = 0;
186 buf->bs_blksize = mp->m_sb.sb_blocksize; 187 buf->bs_blksize = mp->m_sb.sb_blocksize;
187 buf->bs_blocks = INT_GET(dic->di_nblocks, ARCH_CONVERT); 188 buf->bs_blocks = INT_GET(dic->di_nblocks, ARCH_CONVERT);
188 break; 189 break;
189 } 190 }
190 191
191 return 0; 192 return 0;
192 } 193 }
193 194
194 /* 195 /*
195 * Return stat information for one inode. 196 * Return stat information for one inode.
196 * Return 0 if ok, else errno. 197 * Return 0 if ok, else errno.
197 */ 198 */
198 int /* error status */ 199 int /* error status */
199 xfs_bulkstat_one( 200 xfs_bulkstat_one(
200 xfs_mount_t *mp, /* mount point for filesystem */ 201 xfs_mount_t *mp, /* mount point for filesystem */
201 xfs_ino_t ino, /* inode number to get data for */ 202 xfs_ino_t ino, /* inode number to get data for */
202 void __user *buffer, /* buffer to place output in */ 203 void __user *buffer, /* buffer to place output in */
203 int ubsize, /* size of buffer */ 204 int ubsize, /* size of buffer */
204 void *private_data, /* my private data */ 205 void *private_data, /* my private data */
205 xfs_daddr_t bno, /* starting bno of inode cluster */ 206 xfs_daddr_t bno, /* starting bno of inode cluster */
206 int *ubused, /* bytes used by me */ 207 int *ubused, /* bytes used by me */
207 void *dibuff, /* on-disk inode buffer */ 208 void *dibuff, /* on-disk inode buffer */
208 int *stat) /* BULKSTAT_RV_... */ 209 int *stat) /* BULKSTAT_RV_... */
209 { 210 {
210 xfs_bstat_t *buf; /* return buffer */ 211 xfs_bstat_t *buf; /* return buffer */
211 int error = 0; /* error value */ 212 int error = 0; /* error value */
212 xfs_dinode_t *dip; /* dinode inode pointer */ 213 xfs_dinode_t *dip; /* dinode inode pointer */
213 214
214 dip = (xfs_dinode_t *)dibuff; 215 dip = (xfs_dinode_t *)dibuff;
215 216
216 if (!buffer || ino == mp->m_sb.sb_rbmino || ino == mp->m_sb.sb_rsumino || 217 if (!buffer || ino == mp->m_sb.sb_rbmino || ino == mp->m_sb.sb_rsumino ||
217 (XFS_SB_VERSION_HASQUOTA(&mp->m_sb) && 218 (XFS_SB_VERSION_HASQUOTA(&mp->m_sb) &&
218 (ino == mp->m_sb.sb_uquotino || ino == mp->m_sb.sb_gquotino))) { 219 (ino == mp->m_sb.sb_uquotino || ino == mp->m_sb.sb_gquotino))) {
219 *stat = BULKSTAT_RV_NOTHING; 220 *stat = BULKSTAT_RV_NOTHING;
220 return XFS_ERROR(EINVAL); 221 return XFS_ERROR(EINVAL);
221 } 222 }
222 if (ubsize < sizeof(*buf)) { 223 if (ubsize < sizeof(*buf)) {
223 *stat = BULKSTAT_RV_NOTHING; 224 *stat = BULKSTAT_RV_NOTHING;
224 return XFS_ERROR(ENOMEM); 225 return XFS_ERROR(ENOMEM);
225 } 226 }
226 227
227 buf = kmem_alloc(sizeof(*buf), KM_SLEEP); 228 buf = kmem_alloc(sizeof(*buf), KM_SLEEP);
228 229
229 if (dip == NULL) { 230 if (dip == NULL) {
230 /* We're not being passed a pointer to a dinode. This happens 231 /* We're not being passed a pointer to a dinode. This happens
231 * if BULKSTAT_FG_IGET is selected. Do the iget. 232 * if BULKSTAT_FG_IGET is selected. Do the iget.
232 */ 233 */
233 error = xfs_bulkstat_one_iget(mp, ino, bno, buf, stat); 234 error = xfs_bulkstat_one_iget(mp, ino, bno, buf, stat);
234 if (error) 235 if (error)
235 goto out_free; 236 goto out_free;
236 } else { 237 } else {
237 xfs_bulkstat_one_dinode(mp, ino, dip, buf); 238 xfs_bulkstat_one_dinode(mp, ino, dip, buf);
238 } 239 }
239 240
240 if (copy_to_user(buffer, buf, sizeof(*buf))) { 241 if (copy_to_user(buffer, buf, sizeof(*buf))) {
241 *stat = BULKSTAT_RV_NOTHING; 242 *stat = BULKSTAT_RV_NOTHING;
242 error = EFAULT; 243 error = EFAULT;
243 goto out_free; 244 goto out_free;
244 } 245 }
245 246
246 *stat = BULKSTAT_RV_DIDONE; 247 *stat = BULKSTAT_RV_DIDONE;
247 if (ubused) 248 if (ubused)
248 *ubused = sizeof(*buf); 249 *ubused = sizeof(*buf);
249 250
250 out_free: 251 out_free:
251 kmem_free(buf, sizeof(*buf)); 252 kmem_free(buf, sizeof(*buf));
252 return error; 253 return error;
253 } 254 }
254 255
255 /* 256 /*
256 * Test to see whether we can use the ondisk inode directly, based 257 * Test to see whether we can use the ondisk inode directly, based
257 * on the given bulkstat flags, filling in dipp accordingly. 258 * on the given bulkstat flags, filling in dipp accordingly.
258 * Returns zero if the inode is dodgey. 259 * Returns zero if the inode is dodgey.
259 */ 260 */
260 STATIC int 261 STATIC int
261 xfs_bulkstat_use_dinode( 262 xfs_bulkstat_use_dinode(
262 xfs_mount_t *mp, 263 xfs_mount_t *mp,
263 int flags, 264 int flags,
264 xfs_buf_t *bp, 265 xfs_buf_t *bp,
265 int clustidx, 266 int clustidx,
266 xfs_dinode_t **dipp) 267 xfs_dinode_t **dipp)
267 { 268 {
268 xfs_dinode_t *dip; 269 xfs_dinode_t *dip;
269 unsigned int aformat; 270 unsigned int aformat;
270 271
271 *dipp = NULL; 272 *dipp = NULL;
272 if (!bp || (flags & BULKSTAT_FG_IGET)) 273 if (!bp || (flags & BULKSTAT_FG_IGET))
273 return 1; 274 return 1;
274 dip = (xfs_dinode_t *) 275 dip = (xfs_dinode_t *)
275 xfs_buf_offset(bp, clustidx << mp->m_sb.sb_inodelog); 276 xfs_buf_offset(bp, clustidx << mp->m_sb.sb_inodelog);
276 if (INT_GET(dip->di_core.di_magic, ARCH_CONVERT) != XFS_DINODE_MAGIC || 277 if (INT_GET(dip->di_core.di_magic, ARCH_CONVERT) != XFS_DINODE_MAGIC ||
277 !XFS_DINODE_GOOD_VERSION( 278 !XFS_DINODE_GOOD_VERSION(
278 INT_GET(dip->di_core.di_version, ARCH_CONVERT))) 279 INT_GET(dip->di_core.di_version, ARCH_CONVERT)))
279 return 0; 280 return 0;
280 if (flags & BULKSTAT_FG_QUICK) { 281 if (flags & BULKSTAT_FG_QUICK) {
281 *dipp = dip; 282 *dipp = dip;
282 return 1; 283 return 1;
283 } 284 }
284 /* BULKSTAT_FG_INLINE: if attr fork is local, or not there, use it */ 285 /* BULKSTAT_FG_INLINE: if attr fork is local, or not there, use it */
285 aformat = INT_GET(dip->di_core.di_aformat, ARCH_CONVERT); 286 aformat = INT_GET(dip->di_core.di_aformat, ARCH_CONVERT);
286 if ((XFS_CFORK_Q(&dip->di_core) == 0) || 287 if ((XFS_CFORK_Q(&dip->di_core) == 0) ||
287 (aformat == XFS_DINODE_FMT_LOCAL) || 288 (aformat == XFS_DINODE_FMT_LOCAL) ||
288 (aformat == XFS_DINODE_FMT_EXTENTS && !dip->di_core.di_anextents)) { 289 (aformat == XFS_DINODE_FMT_EXTENTS && !dip->di_core.di_anextents)) {
289 *dipp = dip; 290 *dipp = dip;
290 return 1; 291 return 1;
291 } 292 }
292 return 1; 293 return 1;
293 } 294 }
294 295
295 /* 296 /*
296 * Return stat information in bulk (by-inode) for the filesystem. 297 * Return stat information in bulk (by-inode) for the filesystem.
297 */ 298 */
298 int /* error status */ 299 int /* error status */
299 xfs_bulkstat( 300 xfs_bulkstat(
300 xfs_mount_t *mp, /* mount point for filesystem */ 301 xfs_mount_t *mp, /* mount point for filesystem */
301 xfs_ino_t *lastinop, /* last inode returned */ 302 xfs_ino_t *lastinop, /* last inode returned */
302 int *ubcountp, /* size of buffer/count returned */ 303 int *ubcountp, /* size of buffer/count returned */
303 bulkstat_one_pf formatter, /* func that'd fill a single buf */ 304 bulkstat_one_pf formatter, /* func that'd fill a single buf */
304 void *private_data,/* private data for formatter */ 305 void *private_data,/* private data for formatter */
305 size_t statstruct_size, /* sizeof struct filling */ 306 size_t statstruct_size, /* sizeof struct filling */
306 char __user *ubuffer, /* buffer with inode stats */ 307 char __user *ubuffer, /* buffer with inode stats */
307 int flags, /* defined in xfs_itable.h */ 308 int flags, /* defined in xfs_itable.h */
308 int *done) /* 1 if there are more stats to get */ 309 int *done) /* 1 if there are more stats to get */
309 { 310 {
310 xfs_agblock_t agbno=0;/* allocation group block number */ 311 xfs_agblock_t agbno=0;/* allocation group block number */
311 xfs_buf_t *agbp; /* agi header buffer */ 312 xfs_buf_t *agbp; /* agi header buffer */
312 xfs_agi_t *agi; /* agi header data */ 313 xfs_agi_t *agi; /* agi header data */
313 xfs_agino_t agino; /* inode # in allocation group */ 314 xfs_agino_t agino; /* inode # in allocation group */
314 xfs_agnumber_t agno; /* allocation group number */ 315 xfs_agnumber_t agno; /* allocation group number */
315 xfs_daddr_t bno; /* inode cluster start daddr */ 316 xfs_daddr_t bno; /* inode cluster start daddr */
316 int chunkidx; /* current index into inode chunk */ 317 int chunkidx; /* current index into inode chunk */
317 int clustidx; /* current index into inode cluster */ 318 int clustidx; /* current index into inode cluster */
318 xfs_btree_cur_t *cur; /* btree cursor for ialloc btree */ 319 xfs_btree_cur_t *cur; /* btree cursor for ialloc btree */
319 int end_of_ag; /* set if we've seen the ag end */ 320 int end_of_ag; /* set if we've seen the ag end */
320 int error; /* error code */ 321 int error; /* error code */
321 int fmterror;/* bulkstat formatter result */ 322 int fmterror;/* bulkstat formatter result */
322 __int32_t gcnt; /* current btree rec's count */ 323 __int32_t gcnt; /* current btree rec's count */
323 xfs_inofree_t gfree; /* current btree rec's free mask */ 324 xfs_inofree_t gfree; /* current btree rec's free mask */
324 xfs_agino_t gino; /* current btree rec's start inode */ 325 xfs_agino_t gino; /* current btree rec's start inode */
325 int i; /* loop index */ 326 int i; /* loop index */
326 int icount; /* count of inodes good in irbuf */ 327 int icount; /* count of inodes good in irbuf */
327 int irbsize; /* size of irec buffer in bytes */ 328 int irbsize; /* size of irec buffer in bytes */
328 unsigned int kmflags; /* flags for allocating irec buffer */ 329 unsigned int kmflags; /* flags for allocating irec buffer */
329 xfs_ino_t ino; /* inode number (filesystem) */ 330 xfs_ino_t ino; /* inode number (filesystem) */
330 xfs_inobt_rec_incore_t *irbp; /* current irec buffer pointer */ 331 xfs_inobt_rec_incore_t *irbp; /* current irec buffer pointer */
331 xfs_inobt_rec_incore_t *irbuf; /* start of irec buffer */ 332 xfs_inobt_rec_incore_t *irbuf; /* start of irec buffer */
332 xfs_inobt_rec_incore_t *irbufend; /* end of good irec buffer entries */ 333 xfs_inobt_rec_incore_t *irbufend; /* end of good irec buffer entries */
333 xfs_ino_t lastino=0; /* last inode number returned */ 334 xfs_ino_t lastino=0; /* last inode number returned */
334 int nbcluster; /* # of blocks in a cluster */ 335 int nbcluster; /* # of blocks in a cluster */
335 int nicluster; /* # of inodes in a cluster */ 336 int nicluster; /* # of inodes in a cluster */
336 int nimask; /* mask for inode clusters */ 337 int nimask; /* mask for inode clusters */
337 int nirbuf; /* size of irbuf */ 338 int nirbuf; /* size of irbuf */
338 int rval; /* return value error code */ 339 int rval; /* return value error code */
339 int tmp; /* result value from btree calls */ 340 int tmp; /* result value from btree calls */
340 int ubcount; /* size of user's buffer */ 341 int ubcount; /* size of user's buffer */
341 int ubleft; /* bytes left in user's buffer */ 342 int ubleft; /* bytes left in user's buffer */
342 char __user *ubufp; /* pointer into user's buffer */ 343 char __user *ubufp; /* pointer into user's buffer */
343 int ubelem; /* spaces used in user's buffer */ 344 int ubelem; /* spaces used in user's buffer */
344 int ubused; /* bytes used by formatter */ 345 int ubused; /* bytes used by formatter */
345 xfs_buf_t *bp; /* ptr to on-disk inode cluster buf */ 346 xfs_buf_t *bp; /* ptr to on-disk inode cluster buf */
346 xfs_dinode_t *dip; /* ptr into bp for specific inode */ 347 xfs_dinode_t *dip; /* ptr into bp for specific inode */
347 xfs_inode_t *ip; /* ptr to in-core inode struct */ 348 xfs_inode_t *ip; /* ptr to in-core inode struct */
348 349
349 /* 350 /*
350 * Get the last inode value, see if there's nothing to do. 351 * Get the last inode value, see if there's nothing to do.
351 */ 352 */
352 ino = (xfs_ino_t)*lastinop; 353 ino = (xfs_ino_t)*lastinop;
353 dip = NULL; 354 dip = NULL;
354 agno = XFS_INO_TO_AGNO(mp, ino); 355 agno = XFS_INO_TO_AGNO(mp, ino);
355 agino = XFS_INO_TO_AGINO(mp, ino); 356 agino = XFS_INO_TO_AGINO(mp, ino);
356 if (agno >= mp->m_sb.sb_agcount || 357 if (agno >= mp->m_sb.sb_agcount ||
357 ino != XFS_AGINO_TO_INO(mp, agno, agino)) { 358 ino != XFS_AGINO_TO_INO(mp, agno, agino)) {
358 *done = 1; 359 *done = 1;
359 *ubcountp = 0; 360 *ubcountp = 0;
360 return 0; 361 return 0;
361 } 362 }
362 ubcount = *ubcountp; /* statstruct's */ 363 ubcount = *ubcountp; /* statstruct's */
363 ubleft = ubcount * statstruct_size; /* bytes */ 364 ubleft = ubcount * statstruct_size; /* bytes */
364 *ubcountp = ubelem = 0; 365 *ubcountp = ubelem = 0;
365 *done = 0; 366 *done = 0;
366 fmterror = 0; 367 fmterror = 0;
367 ubufp = ubuffer; 368 ubufp = ubuffer;
368 nicluster = mp->m_sb.sb_blocksize >= XFS_INODE_CLUSTER_SIZE(mp) ? 369 nicluster = mp->m_sb.sb_blocksize >= XFS_INODE_CLUSTER_SIZE(mp) ?
369 mp->m_sb.sb_inopblock : 370 mp->m_sb.sb_inopblock :
370 (XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog); 371 (XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog);
371 nimask = ~(nicluster - 1); 372 nimask = ~(nicluster - 1);
372 nbcluster = nicluster >> mp->m_sb.sb_inopblog; 373 nbcluster = nicluster >> mp->m_sb.sb_inopblog;
373 /* 374 /*
374 * Allocate a local buffer for inode cluster btree records. 375 * Allocate a local buffer for inode cluster btree records.
375 * This caps our maximum readahead window (so don't be stingy) 376 * This caps our maximum readahead window (so don't be stingy)
376 * but we must handle the case where we can't get a contiguous 377 * but we must handle the case where we can't get a contiguous
377 * multi-page buffer, so we drop back toward pagesize; the end 378 * multi-page buffer, so we drop back toward pagesize; the end
378 * case we ensure succeeds, via appropriate allocation flags. 379 * case we ensure succeeds, via appropriate allocation flags.
379 */ 380 */
380 irbsize = NBPP * 4; 381 irbsize = NBPP * 4;
381 kmflags = KM_SLEEP | KM_MAYFAIL; 382 kmflags = KM_SLEEP | KM_MAYFAIL;
382 while (!(irbuf = kmem_alloc(irbsize, kmflags))) { 383 while (!(irbuf = kmem_alloc(irbsize, kmflags))) {
383 if ((irbsize >>= 1) <= NBPP) 384 if ((irbsize >>= 1) <= NBPP)
384 kmflags = KM_SLEEP; 385 kmflags = KM_SLEEP;
385 } 386 }
386 nirbuf = irbsize / sizeof(*irbuf); 387 nirbuf = irbsize / sizeof(*irbuf);
387 388
388 /* 389 /*
389 * Loop over the allocation groups, starting from the last 390 * Loop over the allocation groups, starting from the last
390 * inode returned; 0 means start of the allocation group. 391 * inode returned; 0 means start of the allocation group.
391 */ 392 */
392 rval = 0; 393 rval = 0;
393 while (ubleft >= statstruct_size && agno < mp->m_sb.sb_agcount) { 394 while (ubleft >= statstruct_size && agno < mp->m_sb.sb_agcount) {
394 bp = NULL; 395 bp = NULL;
395 down_read(&mp->m_peraglock); 396 down_read(&mp->m_peraglock);
396 error = xfs_ialloc_read_agi(mp, NULL, agno, &agbp); 397 error = xfs_ialloc_read_agi(mp, NULL, agno, &agbp);
397 up_read(&mp->m_peraglock); 398 up_read(&mp->m_peraglock);
398 if (error) { 399 if (error) {
399 /* 400 /*
400 * Skip this allocation group and go to the next one. 401 * Skip this allocation group and go to the next one.
401 */ 402 */
402 agno++; 403 agno++;
403 agino = 0; 404 agino = 0;
404 continue; 405 continue;
405 } 406 }
406 agi = XFS_BUF_TO_AGI(agbp); 407 agi = XFS_BUF_TO_AGI(agbp);
407 /* 408 /*
408 * Allocate and initialize a btree cursor for ialloc btree. 409 * Allocate and initialize a btree cursor for ialloc btree.
409 */ 410 */
410 cur = xfs_btree_init_cursor(mp, NULL, agbp, agno, XFS_BTNUM_INO, 411 cur = xfs_btree_init_cursor(mp, NULL, agbp, agno, XFS_BTNUM_INO,
411 (xfs_inode_t *)0, 0); 412 (xfs_inode_t *)0, 0);
412 irbp = irbuf; 413 irbp = irbuf;
413 irbufend = irbuf + nirbuf; 414 irbufend = irbuf + nirbuf;
414 end_of_ag = 0; 415 end_of_ag = 0;
415 /* 416 /*
416 * If we're returning in the middle of an allocation group, 417 * If we're returning in the middle of an allocation group,
417 * we need to get the remainder of the chunk we're in. 418 * we need to get the remainder of the chunk we're in.
418 */ 419 */
419 if (agino > 0) { 420 if (agino > 0) {
420 /* 421 /*
421 * Lookup the inode chunk that this inode lives in. 422 * Lookup the inode chunk that this inode lives in.
422 */ 423 */
423 error = xfs_inobt_lookup_le(cur, agino, 0, 0, &tmp); 424 error = xfs_inobt_lookup_le(cur, agino, 0, 0, &tmp);
424 if (!error && /* no I/O error */ 425 if (!error && /* no I/O error */
425 tmp && /* lookup succeeded */ 426 tmp && /* lookup succeeded */
426 /* got the record, should always work */ 427 /* got the record, should always work */
427 !(error = xfs_inobt_get_rec(cur, &gino, &gcnt, 428 !(error = xfs_inobt_get_rec(cur, &gino, &gcnt,
428 &gfree, &i)) && 429 &gfree, &i)) &&
429 i == 1 && 430 i == 1 &&
430 /* this is the right chunk */ 431 /* this is the right chunk */
431 agino < gino + XFS_INODES_PER_CHUNK && 432 agino < gino + XFS_INODES_PER_CHUNK &&
432 /* lastino was not last in chunk */ 433 /* lastino was not last in chunk */
433 (chunkidx = agino - gino + 1) < 434 (chunkidx = agino - gino + 1) <
434 XFS_INODES_PER_CHUNK && 435 XFS_INODES_PER_CHUNK &&
435 /* there are some left allocated */ 436 /* there are some left allocated */
436 XFS_INOBT_MASKN(chunkidx, 437 XFS_INOBT_MASKN(chunkidx,
437 XFS_INODES_PER_CHUNK - chunkidx) & ~gfree) { 438 XFS_INODES_PER_CHUNK - chunkidx) & ~gfree) {
438 /* 439 /*
439 * Grab the chunk record. Mark all the 440 * Grab the chunk record. Mark all the
440 * uninteresting inodes (because they're 441 * uninteresting inodes (because they're
441 * before our start point) free. 442 * before our start point) free.
442 */ 443 */
443 for (i = 0; i < chunkidx; i++) { 444 for (i = 0; i < chunkidx; i++) {
444 if (XFS_INOBT_MASK(i) & ~gfree) 445 if (XFS_INOBT_MASK(i) & ~gfree)
445 gcnt++; 446 gcnt++;
446 } 447 }
447 gfree |= XFS_INOBT_MASKN(0, chunkidx); 448 gfree |= XFS_INOBT_MASKN(0, chunkidx);
448 irbp->ir_startino = gino; 449 irbp->ir_startino = gino;
449 irbp->ir_freecount = gcnt; 450 irbp->ir_freecount = gcnt;
450 irbp->ir_free = gfree; 451 irbp->ir_free = gfree;
451 irbp++; 452 irbp++;
452 agino = gino + XFS_INODES_PER_CHUNK; 453 agino = gino + XFS_INODES_PER_CHUNK;
453 icount = XFS_INODES_PER_CHUNK - gcnt; 454 icount = XFS_INODES_PER_CHUNK - gcnt;
454 } else { 455 } else {
455 /* 456 /*
456 * If any of those tests failed, bump the 457 * If any of those tests failed, bump the
457 * inode number (just in case). 458 * inode number (just in case).
458 */ 459 */
459 agino++; 460 agino++;
460 icount = 0; 461 icount = 0;
461 } 462 }
462 /* 463 /*
463 * In any case, increment to the next record. 464 * In any case, increment to the next record.
464 */ 465 */
465 if (!error) 466 if (!error)
466 error = xfs_inobt_increment(cur, 0, &tmp); 467 error = xfs_inobt_increment(cur, 0, &tmp);
467 } else { 468 } else {
468 /* 469 /*
469 * Start of ag. Lookup the first inode chunk. 470 * Start of ag. Lookup the first inode chunk.
470 */ 471 */
471 error = xfs_inobt_lookup_ge(cur, 0, 0, 0, &tmp); 472 error = xfs_inobt_lookup_ge(cur, 0, 0, 0, &tmp);
472 icount = 0; 473 icount = 0;
473 } 474 }
474 /* 475 /*
475 * Loop through inode btree records in this ag, 476 * Loop through inode btree records in this ag,
476 * until we run out of inodes or space in the buffer. 477 * until we run out of inodes or space in the buffer.
477 */ 478 */
478 while (irbp < irbufend && icount < ubcount) { 479 while (irbp < irbufend && icount < ubcount) {
479 /* 480 /*
480 * Loop as long as we're unable to read the 481 * Loop as long as we're unable to read the
481 * inode btree. 482 * inode btree.
482 */ 483 */
483 while (error) { 484 while (error) {
484 agino += XFS_INODES_PER_CHUNK; 485 agino += XFS_INODES_PER_CHUNK;
485 if (XFS_AGINO_TO_AGBNO(mp, agino) >= 486 if (XFS_AGINO_TO_AGBNO(mp, agino) >=
486 be32_to_cpu(agi->agi_length)) 487 be32_to_cpu(agi->agi_length))
487 break; 488 break;
488 error = xfs_inobt_lookup_ge(cur, agino, 0, 0, 489 error = xfs_inobt_lookup_ge(cur, agino, 0, 0,
489 &tmp); 490 &tmp);
490 } 491 }
491 /* 492 /*
492 * If ran off the end of the ag either with an error, 493 * If ran off the end of the ag either with an error,
493 * or the normal way, set end and stop collecting. 494 * or the normal way, set end and stop collecting.
494 */ 495 */
495 if (error || 496 if (error ||
496 (error = xfs_inobt_get_rec(cur, &gino, &gcnt, 497 (error = xfs_inobt_get_rec(cur, &gino, &gcnt,
497 &gfree, &i)) || 498 &gfree, &i)) ||
498 i == 0) { 499 i == 0) {
499 end_of_ag = 1; 500 end_of_ag = 1;
500 break; 501 break;
501 } 502 }
502 /* 503 /*
503 * If this chunk has any allocated inodes, save it. 504 * If this chunk has any allocated inodes, save it.
504 * Also start read-ahead now for this chunk. 505 * Also start read-ahead now for this chunk.
505 */ 506 */
506 if (gcnt < XFS_INODES_PER_CHUNK) { 507 if (gcnt < XFS_INODES_PER_CHUNK) {
507 /* 508 /*
508 * Loop over all clusters in the next chunk. 509 * Loop over all clusters in the next chunk.
509 * Do a readahead if there are any allocated 510 * Do a readahead if there are any allocated
510 * inodes in that cluster. 511 * inodes in that cluster.
511 */ 512 */
512 for (agbno = XFS_AGINO_TO_AGBNO(mp, gino), 513 for (agbno = XFS_AGINO_TO_AGBNO(mp, gino),
513 chunkidx = 0; 514 chunkidx = 0;
514 chunkidx < XFS_INODES_PER_CHUNK; 515 chunkidx < XFS_INODES_PER_CHUNK;
515 chunkidx += nicluster, 516 chunkidx += nicluster,
516 agbno += nbcluster) { 517 agbno += nbcluster) {
517 if (XFS_INOBT_MASKN(chunkidx, 518 if (XFS_INOBT_MASKN(chunkidx,
518 nicluster) & ~gfree) 519 nicluster) & ~gfree)
519 xfs_btree_reada_bufs(mp, agno, 520 xfs_btree_reada_bufs(mp, agno,
520 agbno, nbcluster); 521 agbno, nbcluster);
521 } 522 }
522 irbp->ir_startino = gino; 523 irbp->ir_startino = gino;
523 irbp->ir_freecount = gcnt; 524 irbp->ir_freecount = gcnt;
524 irbp->ir_free = gfree; 525 irbp->ir_free = gfree;
525 irbp++; 526 irbp++;
526 icount += XFS_INODES_PER_CHUNK - gcnt; 527 icount += XFS_INODES_PER_CHUNK - gcnt;
527 } 528 }
528 /* 529 /*
529 * Set agino to after this chunk and bump the cursor. 530 * Set agino to after this chunk and bump the cursor.
530 */ 531 */
531 agino = gino + XFS_INODES_PER_CHUNK; 532 agino = gino + XFS_INODES_PER_CHUNK;
532 error = xfs_inobt_increment(cur, 0, &tmp); 533 error = xfs_inobt_increment(cur, 0, &tmp);
533 } 534 }
534 /* 535 /*
535 * Drop the btree buffers and the agi buffer. 536 * Drop the btree buffers and the agi buffer.
536 * We can't hold any of the locks these represent 537 * We can't hold any of the locks these represent
537 * when calling iget. 538 * when calling iget.
538 */ 539 */
539 xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); 540 xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
540 xfs_buf_relse(agbp); 541 xfs_buf_relse(agbp);
541 /* 542 /*
542 * Now format all the good inodes into the user's buffer. 543 * Now format all the good inodes into the user's buffer.
543 */ 544 */
544 irbufend = irbp; 545 irbufend = irbp;
545 for (irbp = irbuf; 546 for (irbp = irbuf;
546 irbp < irbufend && ubleft >= statstruct_size; irbp++) { 547 irbp < irbufend && ubleft >= statstruct_size; irbp++) {
547 /* 548 /*
548 * Now process this chunk of inodes. 549 * Now process this chunk of inodes.
549 */ 550 */
550 for (agino = irbp->ir_startino, chunkidx = clustidx = 0; 551 for (agino = irbp->ir_startino, chunkidx = clustidx = 0;
551 ubleft > 0 && 552 ubleft > 0 &&
552 irbp->ir_freecount < XFS_INODES_PER_CHUNK; 553 irbp->ir_freecount < XFS_INODES_PER_CHUNK;
553 chunkidx++, clustidx++, agino++) { 554 chunkidx++, clustidx++, agino++) {
554 ASSERT(chunkidx < XFS_INODES_PER_CHUNK); 555 ASSERT(chunkidx < XFS_INODES_PER_CHUNK);
555 /* 556 /*
556 * Recompute agbno if this is the 557 * Recompute agbno if this is the
557 * first inode of the cluster. 558 * first inode of the cluster.
558 * 559 *
559 * Careful with clustidx. There can be 560 * Careful with clustidx. There can be
560 * multple clusters per chunk, a single 561 * multple clusters per chunk, a single
561 * cluster per chunk or a cluster that has 562 * cluster per chunk or a cluster that has
562 * inodes represented from several different 563 * inodes represented from several different
563 * chunks (if blocksize is large). 564 * chunks (if blocksize is large).
564 * 565 *
565 * Because of this, the starting clustidx is 566 * Because of this, the starting clustidx is
566 * initialized to zero in this loop but must 567 * initialized to zero in this loop but must
567 * later be reset after reading in the cluster 568 * later be reset after reading in the cluster
568 * buffer. 569 * buffer.
569 */ 570 */
570 if ((chunkidx & (nicluster - 1)) == 0) { 571 if ((chunkidx & (nicluster - 1)) == 0) {
571 agbno = XFS_AGINO_TO_AGBNO(mp, 572 agbno = XFS_AGINO_TO_AGBNO(mp,
572 irbp->ir_startino) + 573 irbp->ir_startino) +
573 ((chunkidx & nimask) >> 574 ((chunkidx & nimask) >>
574 mp->m_sb.sb_inopblog); 575 mp->m_sb.sb_inopblog);
575 576
576 if (flags & (BULKSTAT_FG_QUICK | 577 if (flags & (BULKSTAT_FG_QUICK |
577 BULKSTAT_FG_INLINE)) { 578 BULKSTAT_FG_INLINE)) {
578 ino = XFS_AGINO_TO_INO(mp, agno, 579 ino = XFS_AGINO_TO_INO(mp, agno,
579 agino); 580 agino);
580 bno = XFS_AGB_TO_DADDR(mp, agno, 581 bno = XFS_AGB_TO_DADDR(mp, agno,
581 agbno); 582 agbno);
582 583
583 /* 584 /*
584 * Get the inode cluster buffer 585 * Get the inode cluster buffer
585 */ 586 */
586 ASSERT(xfs_inode_zone != NULL); 587 ASSERT(xfs_inode_zone != NULL);
587 ip = kmem_zone_zalloc(xfs_inode_zone, 588 ip = kmem_zone_zalloc(xfs_inode_zone,
588 KM_SLEEP); 589 KM_SLEEP);
589 ip->i_ino = ino; 590 ip->i_ino = ino;
590 ip->i_mount = mp; 591 ip->i_mount = mp;
591 if (bp) 592 if (bp)
592 xfs_buf_relse(bp); 593 xfs_buf_relse(bp);
593 error = xfs_itobp(mp, NULL, ip, 594 error = xfs_itobp(mp, NULL, ip,
594 &dip, &bp, bno, 595 &dip, &bp, bno,
595 XFS_IMAP_BULKSTAT); 596 XFS_IMAP_BULKSTAT);
596 if (!error) 597 if (!error)
597 clustidx = ip->i_boffset / mp->m_sb.sb_inodesize; 598 clustidx = ip->i_boffset / mp->m_sb.sb_inodesize;
598 kmem_zone_free(xfs_inode_zone, ip); 599 kmem_zone_free(xfs_inode_zone, ip);
599 if (XFS_TEST_ERROR(error != 0, 600 if (XFS_TEST_ERROR(error != 0,
600 mp, XFS_ERRTAG_BULKSTAT_READ_CHUNK, 601 mp, XFS_ERRTAG_BULKSTAT_READ_CHUNK,
601 XFS_RANDOM_BULKSTAT_READ_CHUNK)) { 602 XFS_RANDOM_BULKSTAT_READ_CHUNK)) {
602 bp = NULL; 603 bp = NULL;
603 ubleft = 0; 604 ubleft = 0;
604 rval = error; 605 rval = error;
605 break; 606 break;
606 } 607 }
607 } 608 }
608 } 609 }
609 /* 610 /*
610 * Skip if this inode is free. 611 * Skip if this inode is free.
611 */ 612 */
612 if (XFS_INOBT_MASK(chunkidx) & irbp->ir_free) 613 if (XFS_INOBT_MASK(chunkidx) & irbp->ir_free)
613 continue; 614 continue;
614 /* 615 /*
615 * Count used inodes as free so we can tell 616 * Count used inodes as free so we can tell
616 * when the chunk is used up. 617 * when the chunk is used up.
617 */ 618 */
618 irbp->ir_freecount++; 619 irbp->ir_freecount++;
619 ino = XFS_AGINO_TO_INO(mp, agno, agino); 620 ino = XFS_AGINO_TO_INO(mp, agno, agino);
620 bno = XFS_AGB_TO_DADDR(mp, agno, agbno); 621 bno = XFS_AGB_TO_DADDR(mp, agno, agbno);
621 if (!xfs_bulkstat_use_dinode(mp, flags, bp, 622 if (!xfs_bulkstat_use_dinode(mp, flags, bp,
622 clustidx, &dip)) 623 clustidx, &dip))
623 continue; 624 continue;
624 /* 625 /*
625 * If we need to do an iget, cannot hold bp. 626 * If we need to do an iget, cannot hold bp.
626 * Drop it, until starting the next cluster. 627 * Drop it, until starting the next cluster.
627 */ 628 */
628 if ((flags & BULKSTAT_FG_INLINE) && !dip) { 629 if ((flags & BULKSTAT_FG_INLINE) && !dip) {
629 if (bp) 630 if (bp)
630 xfs_buf_relse(bp); 631 xfs_buf_relse(bp);
631 bp = NULL; 632 bp = NULL;
632 } 633 }
633 634
634 /* 635 /*
635 * Get the inode and fill in a single buffer. 636 * Get the inode and fill in a single buffer.
636 * BULKSTAT_FG_QUICK uses dip to fill it in. 637 * BULKSTAT_FG_QUICK uses dip to fill it in.
637 * BULKSTAT_FG_IGET uses igets. 638 * BULKSTAT_FG_IGET uses igets.
638 * BULKSTAT_FG_INLINE uses dip if we have an 639 * BULKSTAT_FG_INLINE uses dip if we have an
639 * inline attr fork, else igets. 640 * inline attr fork, else igets.
640 * See: xfs_bulkstat_one & xfs_dm_bulkstat_one. 641 * See: xfs_bulkstat_one & xfs_dm_bulkstat_one.
641 * This is also used to count inodes/blks, etc 642 * This is also used to count inodes/blks, etc
642 * in xfs_qm_quotacheck. 643 * in xfs_qm_quotacheck.
643 */ 644 */
644 ubused = statstruct_size; 645 ubused = statstruct_size;
645 error = formatter(mp, ino, ubufp, 646 error = formatter(mp, ino, ubufp,
646 ubleft, private_data, 647 ubleft, private_data,
647 bno, &ubused, dip, &fmterror); 648 bno, &ubused, dip, &fmterror);
648 if (fmterror == BULKSTAT_RV_NOTHING) { 649 if (fmterror == BULKSTAT_RV_NOTHING) {
649 if (error == ENOMEM) 650 if (error == ENOMEM)
650 ubleft = 0; 651 ubleft = 0;
651 continue; 652 continue;
652 } 653 }
653 if (fmterror == BULKSTAT_RV_GIVEUP) { 654 if (fmterror == BULKSTAT_RV_GIVEUP) {
654 ubleft = 0; 655 ubleft = 0;
655 ASSERT(error); 656 ASSERT(error);
656 rval = error; 657 rval = error;
657 break; 658 break;
658 } 659 }
659 if (ubufp) 660 if (ubufp)
660 ubufp += ubused; 661 ubufp += ubused;
661 ubleft -= ubused; 662 ubleft -= ubused;
662 ubelem++; 663 ubelem++;
663 lastino = ino; 664 lastino = ino;
664 } 665 }
665 } 666 }
666 667
667 if (bp) 668 if (bp)
668 xfs_buf_relse(bp); 669 xfs_buf_relse(bp);
669 670
670 /* 671 /*
671 * Set up for the next loop iteration. 672 * Set up for the next loop iteration.
672 */ 673 */
673 if (ubleft > 0) { 674 if (ubleft > 0) {
674 if (end_of_ag) { 675 if (end_of_ag) {
675 agno++; 676 agno++;
676 agino = 0; 677 agino = 0;
677 } else 678 } else
678 agino = XFS_INO_TO_AGINO(mp, lastino); 679 agino = XFS_INO_TO_AGINO(mp, lastino);
679 } else 680 } else
680 break; 681 break;
681 } 682 }
682 /* 683 /*
683 * Done, we're either out of filesystem or space to put the data. 684 * Done, we're either out of filesystem or space to put the data.
684 */ 685 */
685 kmem_free(irbuf, irbsize); 686 kmem_free(irbuf, irbsize);
686 *ubcountp = ubelem; 687 *ubcountp = ubelem;
687 if (agno >= mp->m_sb.sb_agcount) { 688 if (agno >= mp->m_sb.sb_agcount) {
688 /* 689 /*
689 * If we ran out of filesystem, mark lastino as off 690 * If we ran out of filesystem, mark lastino as off
690 * the end of the filesystem, so the next call 691 * the end of the filesystem, so the next call
691 * will return immediately. 692 * will return immediately.
692 */ 693 */
693 *lastinop = (xfs_ino_t)XFS_AGINO_TO_INO(mp, agno, 0); 694 *lastinop = (xfs_ino_t)XFS_AGINO_TO_INO(mp, agno, 0);
694 *done = 1; 695 *done = 1;
695 } else 696 } else
696 *lastinop = (xfs_ino_t)lastino; 697 *lastinop = (xfs_ino_t)lastino;
697 698
698 return rval; 699 return rval;
699 } 700 }
700 701
701 /* 702 /*
702 * Return stat information in bulk (by-inode) for the filesystem. 703 * Return stat information in bulk (by-inode) for the filesystem.
703 * Special case for non-sequential one inode bulkstat. 704 * Special case for non-sequential one inode bulkstat.
704 */ 705 */
705 int /* error status */ 706 int /* error status */
706 xfs_bulkstat_single( 707 xfs_bulkstat_single(
707 xfs_mount_t *mp, /* mount point for filesystem */ 708 xfs_mount_t *mp, /* mount point for filesystem */
708 xfs_ino_t *lastinop, /* inode to return */ 709 xfs_ino_t *lastinop, /* inode to return */
709 char __user *buffer, /* buffer with inode stats */ 710 char __user *buffer, /* buffer with inode stats */
710 int *done) /* 1 if there are more stats to get */ 711 int *done) /* 1 if there are more stats to get */
711 { 712 {
712 int count; /* count value for bulkstat call */ 713 int count; /* count value for bulkstat call */
713 int error; /* return value */ 714 int error; /* return value */
714 xfs_ino_t ino; /* filesystem inode number */ 715 xfs_ino_t ino; /* filesystem inode number */
715 int res; /* result from bs1 */ 716 int res; /* result from bs1 */
716 717
717 /* 718 /*
718 * note that requesting valid inode numbers which are not allocated 719 * note that requesting valid inode numbers which are not allocated
719 * to inodes will most likely cause xfs_itobp to generate warning 720 * to inodes will most likely cause xfs_itobp to generate warning
720 * messages about bad magic numbers. This is ok. The fact that 721 * messages about bad magic numbers. This is ok. The fact that
721 * the inode isn't actually an inode is handled by the 722 * the inode isn't actually an inode is handled by the
722 * error check below. Done this way to make the usual case faster 723 * error check below. Done this way to make the usual case faster
723 * at the expense of the error case. 724 * at the expense of the error case.
724 */ 725 */
725 726
726 ino = (xfs_ino_t)*lastinop; 727 ino = (xfs_ino_t)*lastinop;
727 error = xfs_bulkstat_one(mp, ino, buffer, sizeof(xfs_bstat_t), 728 error = xfs_bulkstat_one(mp, ino, buffer, sizeof(xfs_bstat_t),
728 NULL, 0, NULL, NULL, &res); 729 NULL, 0, NULL, NULL, &res);
729 if (error) { 730 if (error) {
730 /* 731 /*
731 * Special case way failed, do it the "long" way 732 * Special case way failed, do it the "long" way
732 * to see if that works. 733 * to see if that works.
733 */ 734 */
734 (*lastinop)--; 735 (*lastinop)--;
735 count = 1; 736 count = 1;
736 if (xfs_bulkstat(mp, lastinop, &count, xfs_bulkstat_one, 737 if (xfs_bulkstat(mp, lastinop, &count, xfs_bulkstat_one,
737 NULL, sizeof(xfs_bstat_t), buffer, 738 NULL, sizeof(xfs_bstat_t), buffer,
738 BULKSTAT_FG_IGET, done)) 739 BULKSTAT_FG_IGET, done))
739 return error; 740 return error;
740 if (count == 0 || (xfs_ino_t)*lastinop != ino) 741 if (count == 0 || (xfs_ino_t)*lastinop != ino)
741 return error == EFSCORRUPTED ? 742 return error == EFSCORRUPTED ?
742 XFS_ERROR(EINVAL) : error; 743 XFS_ERROR(EINVAL) : error;
743 else 744 else
744 return 0; 745 return 0;
745 } 746 }
746 *done = 0; 747 *done = 0;
747 return 0; 748 return 0;
748 } 749 }
749 750
750 /* 751 /*
751 * Return inode number table for the filesystem. 752 * Return inode number table for the filesystem.
752 */ 753 */
753 int /* error status */ 754 int /* error status */
754 xfs_inumbers( 755 xfs_inumbers(
755 xfs_mount_t *mp, /* mount point for filesystem */ 756 xfs_mount_t *mp, /* mount point for filesystem */
756 xfs_ino_t *lastino, /* last inode returned */ 757 xfs_ino_t *lastino, /* last inode returned */
757 int *count, /* size of buffer/count returned */ 758 int *count, /* size of buffer/count returned */
758 xfs_inogrp_t __user *ubuffer)/* buffer with inode descriptions */ 759 xfs_inogrp_t __user *ubuffer)/* buffer with inode descriptions */
759 { 760 {
760 xfs_buf_t *agbp; 761 xfs_buf_t *agbp;
761 xfs_agino_t agino; 762 xfs_agino_t agino;
762 xfs_agnumber_t agno; 763 xfs_agnumber_t agno;
763 int bcount; 764 int bcount;
764 xfs_inogrp_t *buffer; 765 xfs_inogrp_t *buffer;
765 int bufidx; 766 int bufidx;
766 xfs_btree_cur_t *cur; 767 xfs_btree_cur_t *cur;
767 int error; 768 int error;
768 __int32_t gcnt; 769 __int32_t gcnt;
769 xfs_inofree_t gfree; 770 xfs_inofree_t gfree;
770 xfs_agino_t gino; 771 xfs_agino_t gino;
771 int i; 772 int i;
772 xfs_ino_t ino; 773 xfs_ino_t ino;
773 int left; 774 int left;
774 int tmp; 775 int tmp;
775 776
776 ino = (xfs_ino_t)*lastino; 777 ino = (xfs_ino_t)*lastino;
777 agno = XFS_INO_TO_AGNO(mp, ino); 778 agno = XFS_INO_TO_AGNO(mp, ino);
778 agino = XFS_INO_TO_AGINO(mp, ino); 779 agino = XFS_INO_TO_AGINO(mp, ino);
779 left = *count; 780 left = *count;
780 *count = 0; 781 *count = 0;
781 bcount = MIN(left, (int)(NBPP / sizeof(*buffer))); 782 bcount = MIN(left, (int)(NBPP / sizeof(*buffer)));
782 buffer = kmem_alloc(bcount * sizeof(*buffer), KM_SLEEP); 783 buffer = kmem_alloc(bcount * sizeof(*buffer), KM_SLEEP);
783 error = bufidx = 0; 784 error = bufidx = 0;
784 cur = NULL; 785 cur = NULL;
785 agbp = NULL; 786 agbp = NULL;
786 while (left > 0 && agno < mp->m_sb.sb_agcount) { 787 while (left > 0 && agno < mp->m_sb.sb_agcount) {
787 if (agbp == NULL) { 788 if (agbp == NULL) {
788 down_read(&mp->m_peraglock); 789 down_read(&mp->m_peraglock);
789 error = xfs_ialloc_read_agi(mp, NULL, agno, &agbp); 790 error = xfs_ialloc_read_agi(mp, NULL, agno, &agbp);
790 up_read(&mp->m_peraglock); 791 up_read(&mp->m_peraglock);
791 if (error) { 792 if (error) {
792 /* 793 /*
793 * If we can't read the AGI of this ag, 794 * If we can't read the AGI of this ag,
794 * then just skip to the next one. 795 * then just skip to the next one.
795 */ 796 */
796 ASSERT(cur == NULL); 797 ASSERT(cur == NULL);
797 agbp = NULL; 798 agbp = NULL;
798 agno++; 799 agno++;
799 agino = 0; 800 agino = 0;
800 continue; 801 continue;
801 } 802 }
802 cur = xfs_btree_init_cursor(mp, NULL, agbp, agno, 803 cur = xfs_btree_init_cursor(mp, NULL, agbp, agno,
803 XFS_BTNUM_INO, (xfs_inode_t *)0, 0); 804 XFS_BTNUM_INO, (xfs_inode_t *)0, 0);
804 error = xfs_inobt_lookup_ge(cur, agino, 0, 0, &tmp); 805 error = xfs_inobt_lookup_ge(cur, agino, 0, 0, &tmp);
805 if (error) { 806 if (error) {
806 xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); 807 xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
807 cur = NULL; 808 cur = NULL;
808 xfs_buf_relse(agbp); 809 xfs_buf_relse(agbp);
809 agbp = NULL; 810 agbp = NULL;
810 /* 811 /*
811 * Move up the the last inode in the current 812 * Move up the the last inode in the current
812 * chunk. The lookup_ge will always get 813 * chunk. The lookup_ge will always get
813 * us the first inode in the next chunk. 814 * us the first inode in the next chunk.
814 */ 815 */
815 agino += XFS_INODES_PER_CHUNK - 1; 816 agino += XFS_INODES_PER_CHUNK - 1;
816 continue; 817 continue;
817 } 818 }
818 } 819 }
819 if ((error = xfs_inobt_get_rec(cur, &gino, &gcnt, &gfree, 820 if ((error = xfs_inobt_get_rec(cur, &gino, &gcnt, &gfree,
820 &i)) || 821 &i)) ||
821 i == 0) { 822 i == 0) {
822 xfs_buf_relse(agbp); 823 xfs_buf_relse(agbp);
823 agbp = NULL; 824 agbp = NULL;
824 xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); 825 xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
825 cur = NULL; 826 cur = NULL;
826 agno++; 827 agno++;
827 agino = 0; 828 agino = 0;
828 continue; 829 continue;
829 } 830 }
830 agino = gino + XFS_INODES_PER_CHUNK - 1; 831 agino = gino + XFS_INODES_PER_CHUNK - 1;
831 buffer[bufidx].xi_startino = XFS_AGINO_TO_INO(mp, agno, gino); 832 buffer[bufidx].xi_startino = XFS_AGINO_TO_INO(mp, agno, gino);
832 buffer[bufidx].xi_alloccount = XFS_INODES_PER_CHUNK - gcnt; 833 buffer[bufidx].xi_alloccount = XFS_INODES_PER_CHUNK - gcnt;
833 buffer[bufidx].xi_allocmask = ~gfree; 834 buffer[bufidx].xi_allocmask = ~gfree;
834 bufidx++; 835 bufidx++;
835 left--; 836 left--;
836 if (bufidx == bcount) { 837 if (bufidx == bcount) {
837 if (copy_to_user(ubuffer, buffer, 838 if (copy_to_user(ubuffer, buffer,
838 bufidx * sizeof(*buffer))) { 839 bufidx * sizeof(*buffer))) {
839 error = XFS_ERROR(EFAULT); 840 error = XFS_ERROR(EFAULT);
840 break; 841 break;
841 } 842 }
842 ubuffer += bufidx; 843 ubuffer += bufidx;
843 *count += bufidx; 844 *count += bufidx;
844 bufidx = 0; 845 bufidx = 0;
845 } 846 }
846 if (left) { 847 if (left) {
847 error = xfs_inobt_increment(cur, 0, &tmp); 848 error = xfs_inobt_increment(cur, 0, &tmp);
848 if (error) { 849 if (error) {
849 xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); 850 xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
850 cur = NULL; 851 cur = NULL;
851 xfs_buf_relse(agbp); 852 xfs_buf_relse(agbp);
852 agbp = NULL; 853 agbp = NULL;
853 /* 854 /*
854 * The agino value has already been bumped. 855 * The agino value has already been bumped.
855 * Just try to skip up to it. 856 * Just try to skip up to it.
856 */ 857 */
857 agino += XFS_INODES_PER_CHUNK; 858 agino += XFS_INODES_PER_CHUNK;
858 continue; 859 continue;
859 } 860 }
860 } 861 }
861 } 862 }
862 if (!error) { 863 if (!error) {
863 if (bufidx) { 864 if (bufidx) {
864 if (copy_to_user(ubuffer, buffer, 865 if (copy_to_user(ubuffer, buffer,
865 bufidx * sizeof(*buffer))) 866 bufidx * sizeof(*buffer)))
866 error = XFS_ERROR(EFAULT); 867 error = XFS_ERROR(EFAULT);
867 else 868 else
868 *count += bufidx; 869 *count += bufidx;
869 } 870 }
870 *lastino = XFS_AGINO_TO_INO(mp, agno, agino); 871 *lastino = XFS_AGINO_TO_INO(mp, agno, agino);
871 } 872 }
872 kmem_free(buffer, bcount * sizeof(*buffer)); 873 kmem_free(buffer, bcount * sizeof(*buffer));
873 if (cur) 874 if (cur)
874 xfs_btree_del_cursor(cur, (error ? XFS_BTREE_ERROR : 875 xfs_btree_del_cursor(cur, (error ? XFS_BTREE_ERROR :
875 XFS_BTREE_NOERROR)); 876 XFS_BTREE_NOERROR));
876 if (agbp) 877 if (agbp)
877 xfs_buf_relse(agbp); 878 xfs_buf_relse(agbp);
878 return error; 879 return error;
879 } 880 }
880 881