Commit 92bfc6e7c4eabbbd15e7d6d49123b296d05dcfd1
Committed by
Niv Sardi
1 parent
94e1b69d1a
[XFS] embededd struct xfs_imap into xfs_inode
Most uses of struct xfs_imap are to map and inode to a buffer. To avoid copying around the inode location information we should just embedd a strcut xfs_imap into the xfs_inode. To make sure it doesn't bloat an inode the im_len is changed to a ushort, which is fine as that's what the users exepect anyway. Signed-off-by: Christoph Hellwig <hch@lst.de> Reviewed-by: Dave Chinner <david@fromorbit.com> Signed-off-by: Niv Sardi <xaiki@sgi.com>
Showing 7 changed files with 33 additions and 71 deletions Inline Diff
fs/xfs/xfs_ialloc.c
1 | /* | 1 | /* |
2 | * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc. | 2 | * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc. |
3 | * All Rights Reserved. | 3 | * All Rights Reserved. |
4 | * | 4 | * |
5 | * This program is free software; you can redistribute it and/or | 5 | * This program is free software; you can redistribute it and/or |
6 | * modify it under the terms of the GNU General Public License as | 6 | * modify it under the terms of the GNU General Public License as |
7 | * published by the Free Software Foundation. | 7 | * published by the Free Software Foundation. |
8 | * | 8 | * |
9 | * This program is distributed in the hope that it would be useful, | 9 | * This program is distributed in the hope that it would be useful, |
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
12 | * GNU General Public License for more details. | 12 | * GNU General Public License for more details. |
13 | * | 13 | * |
14 | * You should have received a copy of the GNU General Public License | 14 | * You should have received a copy of the GNU General Public License |
15 | * along with this program; if not, write the Free Software Foundation, | 15 | * along with this program; if not, write the Free Software Foundation, |
16 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | 16 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA |
17 | */ | 17 | */ |
18 | #include "xfs.h" | 18 | #include "xfs.h" |
19 | #include "xfs_fs.h" | 19 | #include "xfs_fs.h" |
20 | #include "xfs_types.h" | 20 | #include "xfs_types.h" |
21 | #include "xfs_bit.h" | 21 | #include "xfs_bit.h" |
22 | #include "xfs_log.h" | 22 | #include "xfs_log.h" |
23 | #include "xfs_inum.h" | 23 | #include "xfs_inum.h" |
24 | #include "xfs_trans.h" | 24 | #include "xfs_trans.h" |
25 | #include "xfs_sb.h" | 25 | #include "xfs_sb.h" |
26 | #include "xfs_ag.h" | 26 | #include "xfs_ag.h" |
27 | #include "xfs_dir2.h" | 27 | #include "xfs_dir2.h" |
28 | #include "xfs_dmapi.h" | 28 | #include "xfs_dmapi.h" |
29 | #include "xfs_mount.h" | 29 | #include "xfs_mount.h" |
30 | #include "xfs_bmap_btree.h" | 30 | #include "xfs_bmap_btree.h" |
31 | #include "xfs_alloc_btree.h" | 31 | #include "xfs_alloc_btree.h" |
32 | #include "xfs_ialloc_btree.h" | 32 | #include "xfs_ialloc_btree.h" |
33 | #include "xfs_dir2_sf.h" | 33 | #include "xfs_dir2_sf.h" |
34 | #include "xfs_attr_sf.h" | 34 | #include "xfs_attr_sf.h" |
35 | #include "xfs_dinode.h" | 35 | #include "xfs_dinode.h" |
36 | #include "xfs_inode.h" | 36 | #include "xfs_inode.h" |
37 | #include "xfs_btree.h" | 37 | #include "xfs_btree.h" |
38 | #include "xfs_ialloc.h" | 38 | #include "xfs_ialloc.h" |
39 | #include "xfs_alloc.h" | 39 | #include "xfs_alloc.h" |
40 | #include "xfs_rtalloc.h" | 40 | #include "xfs_rtalloc.h" |
41 | #include "xfs_error.h" | 41 | #include "xfs_error.h" |
42 | #include "xfs_bmap.h" | 42 | #include "xfs_bmap.h" |
43 | #include "xfs_imap.h" | ||
44 | 43 | ||
45 | 44 | ||
46 | /* | 45 | /* |
47 | * Allocation group level functions. | 46 | * Allocation group level functions. |
48 | */ | 47 | */ |
49 | static inline int | 48 | static inline int |
50 | xfs_ialloc_cluster_alignment( | 49 | xfs_ialloc_cluster_alignment( |
51 | xfs_alloc_arg_t *args) | 50 | xfs_alloc_arg_t *args) |
52 | { | 51 | { |
53 | if (xfs_sb_version_hasalign(&args->mp->m_sb) && | 52 | if (xfs_sb_version_hasalign(&args->mp->m_sb) && |
54 | args->mp->m_sb.sb_inoalignmt >= | 53 | args->mp->m_sb.sb_inoalignmt >= |
55 | XFS_B_TO_FSBT(args->mp, XFS_INODE_CLUSTER_SIZE(args->mp))) | 54 | XFS_B_TO_FSBT(args->mp, XFS_INODE_CLUSTER_SIZE(args->mp))) |
56 | return args->mp->m_sb.sb_inoalignmt; | 55 | return args->mp->m_sb.sb_inoalignmt; |
57 | return 1; | 56 | return 1; |
58 | } | 57 | } |
59 | 58 | ||
60 | /* | 59 | /* |
61 | * Lookup the record equal to ino in the btree given by cur. | 60 | * Lookup the record equal to ino in the btree given by cur. |
62 | */ | 61 | */ |
63 | STATIC int /* error */ | 62 | STATIC int /* error */ |
64 | xfs_inobt_lookup_eq( | 63 | xfs_inobt_lookup_eq( |
65 | struct xfs_btree_cur *cur, /* btree cursor */ | 64 | struct xfs_btree_cur *cur, /* btree cursor */ |
66 | xfs_agino_t ino, /* starting inode of chunk */ | 65 | xfs_agino_t ino, /* starting inode of chunk */ |
67 | __int32_t fcnt, /* free inode count */ | 66 | __int32_t fcnt, /* free inode count */ |
68 | xfs_inofree_t free, /* free inode mask */ | 67 | xfs_inofree_t free, /* free inode mask */ |
69 | int *stat) /* success/failure */ | 68 | int *stat) /* success/failure */ |
70 | { | 69 | { |
71 | cur->bc_rec.i.ir_startino = ino; | 70 | cur->bc_rec.i.ir_startino = ino; |
72 | cur->bc_rec.i.ir_freecount = fcnt; | 71 | cur->bc_rec.i.ir_freecount = fcnt; |
73 | cur->bc_rec.i.ir_free = free; | 72 | cur->bc_rec.i.ir_free = free; |
74 | return xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat); | 73 | return xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat); |
75 | } | 74 | } |
76 | 75 | ||
77 | /* | 76 | /* |
78 | * Lookup the first record greater than or equal to ino | 77 | * Lookup the first record greater than or equal to ino |
79 | * in the btree given by cur. | 78 | * in the btree given by cur. |
80 | */ | 79 | */ |
81 | int /* error */ | 80 | int /* error */ |
82 | xfs_inobt_lookup_ge( | 81 | xfs_inobt_lookup_ge( |
83 | struct xfs_btree_cur *cur, /* btree cursor */ | 82 | struct xfs_btree_cur *cur, /* btree cursor */ |
84 | xfs_agino_t ino, /* starting inode of chunk */ | 83 | xfs_agino_t ino, /* starting inode of chunk */ |
85 | __int32_t fcnt, /* free inode count */ | 84 | __int32_t fcnt, /* free inode count */ |
86 | xfs_inofree_t free, /* free inode mask */ | 85 | xfs_inofree_t free, /* free inode mask */ |
87 | int *stat) /* success/failure */ | 86 | int *stat) /* success/failure */ |
88 | { | 87 | { |
89 | cur->bc_rec.i.ir_startino = ino; | 88 | cur->bc_rec.i.ir_startino = ino; |
90 | cur->bc_rec.i.ir_freecount = fcnt; | 89 | cur->bc_rec.i.ir_freecount = fcnt; |
91 | cur->bc_rec.i.ir_free = free; | 90 | cur->bc_rec.i.ir_free = free; |
92 | return xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat); | 91 | return xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat); |
93 | } | 92 | } |
94 | 93 | ||
95 | /* | 94 | /* |
96 | * Lookup the first record less than or equal to ino | 95 | * Lookup the first record less than or equal to ino |
97 | * in the btree given by cur. | 96 | * in the btree given by cur. |
98 | */ | 97 | */ |
99 | int /* error */ | 98 | int /* error */ |
100 | xfs_inobt_lookup_le( | 99 | xfs_inobt_lookup_le( |
101 | struct xfs_btree_cur *cur, /* btree cursor */ | 100 | struct xfs_btree_cur *cur, /* btree cursor */ |
102 | xfs_agino_t ino, /* starting inode of chunk */ | 101 | xfs_agino_t ino, /* starting inode of chunk */ |
103 | __int32_t fcnt, /* free inode count */ | 102 | __int32_t fcnt, /* free inode count */ |
104 | xfs_inofree_t free, /* free inode mask */ | 103 | xfs_inofree_t free, /* free inode mask */ |
105 | int *stat) /* success/failure */ | 104 | int *stat) /* success/failure */ |
106 | { | 105 | { |
107 | cur->bc_rec.i.ir_startino = ino; | 106 | cur->bc_rec.i.ir_startino = ino; |
108 | cur->bc_rec.i.ir_freecount = fcnt; | 107 | cur->bc_rec.i.ir_freecount = fcnt; |
109 | cur->bc_rec.i.ir_free = free; | 108 | cur->bc_rec.i.ir_free = free; |
110 | return xfs_btree_lookup(cur, XFS_LOOKUP_LE, stat); | 109 | return xfs_btree_lookup(cur, XFS_LOOKUP_LE, stat); |
111 | } | 110 | } |
112 | 111 | ||
113 | /* | 112 | /* |
114 | * Update the record referred to by cur to the value given | 113 | * Update the record referred to by cur to the value given |
115 | * by [ino, fcnt, free]. | 114 | * by [ino, fcnt, free]. |
116 | * This either works (return 0) or gets an EFSCORRUPTED error. | 115 | * This either works (return 0) or gets an EFSCORRUPTED error. |
117 | */ | 116 | */ |
118 | STATIC int /* error */ | 117 | STATIC int /* error */ |
119 | xfs_inobt_update( | 118 | xfs_inobt_update( |
120 | struct xfs_btree_cur *cur, /* btree cursor */ | 119 | struct xfs_btree_cur *cur, /* btree cursor */ |
121 | xfs_agino_t ino, /* starting inode of chunk */ | 120 | xfs_agino_t ino, /* starting inode of chunk */ |
122 | __int32_t fcnt, /* free inode count */ | 121 | __int32_t fcnt, /* free inode count */ |
123 | xfs_inofree_t free) /* free inode mask */ | 122 | xfs_inofree_t free) /* free inode mask */ |
124 | { | 123 | { |
125 | union xfs_btree_rec rec; | 124 | union xfs_btree_rec rec; |
126 | 125 | ||
127 | rec.inobt.ir_startino = cpu_to_be32(ino); | 126 | rec.inobt.ir_startino = cpu_to_be32(ino); |
128 | rec.inobt.ir_freecount = cpu_to_be32(fcnt); | 127 | rec.inobt.ir_freecount = cpu_to_be32(fcnt); |
129 | rec.inobt.ir_free = cpu_to_be64(free); | 128 | rec.inobt.ir_free = cpu_to_be64(free); |
130 | return xfs_btree_update(cur, &rec); | 129 | return xfs_btree_update(cur, &rec); |
131 | } | 130 | } |
132 | 131 | ||
133 | /* | 132 | /* |
134 | * Get the data from the pointed-to record. | 133 | * Get the data from the pointed-to record. |
135 | */ | 134 | */ |
136 | int /* error */ | 135 | int /* error */ |
137 | xfs_inobt_get_rec( | 136 | xfs_inobt_get_rec( |
138 | struct xfs_btree_cur *cur, /* btree cursor */ | 137 | struct xfs_btree_cur *cur, /* btree cursor */ |
139 | xfs_agino_t *ino, /* output: starting inode of chunk */ | 138 | xfs_agino_t *ino, /* output: starting inode of chunk */ |
140 | __int32_t *fcnt, /* output: number of free inodes */ | 139 | __int32_t *fcnt, /* output: number of free inodes */ |
141 | xfs_inofree_t *free, /* output: free inode mask */ | 140 | xfs_inofree_t *free, /* output: free inode mask */ |
142 | int *stat) /* output: success/failure */ | 141 | int *stat) /* output: success/failure */ |
143 | { | 142 | { |
144 | union xfs_btree_rec *rec; | 143 | union xfs_btree_rec *rec; |
145 | int error; | 144 | int error; |
146 | 145 | ||
147 | error = xfs_btree_get_rec(cur, &rec, stat); | 146 | error = xfs_btree_get_rec(cur, &rec, stat); |
148 | if (!error && *stat == 1) { | 147 | if (!error && *stat == 1) { |
149 | *ino = be32_to_cpu(rec->inobt.ir_startino); | 148 | *ino = be32_to_cpu(rec->inobt.ir_startino); |
150 | *fcnt = be32_to_cpu(rec->inobt.ir_freecount); | 149 | *fcnt = be32_to_cpu(rec->inobt.ir_freecount); |
151 | *free = be64_to_cpu(rec->inobt.ir_free); | 150 | *free = be64_to_cpu(rec->inobt.ir_free); |
152 | } | 151 | } |
153 | return error; | 152 | return error; |
154 | } | 153 | } |
155 | 154 | ||
156 | /* | 155 | /* |
157 | * Allocate new inodes in the allocation group specified by agbp. | 156 | * Allocate new inodes in the allocation group specified by agbp. |
158 | * Return 0 for success, else error code. | 157 | * Return 0 for success, else error code. |
159 | */ | 158 | */ |
160 | STATIC int /* error code or 0 */ | 159 | STATIC int /* error code or 0 */ |
161 | xfs_ialloc_ag_alloc( | 160 | xfs_ialloc_ag_alloc( |
162 | xfs_trans_t *tp, /* transaction pointer */ | 161 | xfs_trans_t *tp, /* transaction pointer */ |
163 | xfs_buf_t *agbp, /* alloc group buffer */ | 162 | xfs_buf_t *agbp, /* alloc group buffer */ |
164 | int *alloc) | 163 | int *alloc) |
165 | { | 164 | { |
166 | xfs_agi_t *agi; /* allocation group header */ | 165 | xfs_agi_t *agi; /* allocation group header */ |
167 | xfs_alloc_arg_t args; /* allocation argument structure */ | 166 | xfs_alloc_arg_t args; /* allocation argument structure */ |
168 | int blks_per_cluster; /* fs blocks per inode cluster */ | 167 | int blks_per_cluster; /* fs blocks per inode cluster */ |
169 | xfs_btree_cur_t *cur; /* inode btree cursor */ | 168 | xfs_btree_cur_t *cur; /* inode btree cursor */ |
170 | xfs_daddr_t d; /* disk addr of buffer */ | 169 | xfs_daddr_t d; /* disk addr of buffer */ |
171 | xfs_agnumber_t agno; | 170 | xfs_agnumber_t agno; |
172 | int error; | 171 | int error; |
173 | xfs_buf_t *fbuf; /* new free inodes' buffer */ | 172 | xfs_buf_t *fbuf; /* new free inodes' buffer */ |
174 | xfs_dinode_t *free; /* new free inode structure */ | 173 | xfs_dinode_t *free; /* new free inode structure */ |
175 | int i; /* inode counter */ | 174 | int i; /* inode counter */ |
176 | int j; /* block counter */ | 175 | int j; /* block counter */ |
177 | int nbufs; /* num bufs of new inodes */ | 176 | int nbufs; /* num bufs of new inodes */ |
178 | xfs_agino_t newino; /* new first inode's number */ | 177 | xfs_agino_t newino; /* new first inode's number */ |
179 | xfs_agino_t newlen; /* new number of inodes */ | 178 | xfs_agino_t newlen; /* new number of inodes */ |
180 | int ninodes; /* num inodes per buf */ | 179 | int ninodes; /* num inodes per buf */ |
181 | xfs_agino_t thisino; /* current inode number, for loop */ | 180 | xfs_agino_t thisino; /* current inode number, for loop */ |
182 | int version; /* inode version number to use */ | 181 | int version; /* inode version number to use */ |
183 | int isaligned = 0; /* inode allocation at stripe unit */ | 182 | int isaligned = 0; /* inode allocation at stripe unit */ |
184 | /* boundary */ | 183 | /* boundary */ |
185 | unsigned int gen; | 184 | unsigned int gen; |
186 | 185 | ||
187 | args.tp = tp; | 186 | args.tp = tp; |
188 | args.mp = tp->t_mountp; | 187 | args.mp = tp->t_mountp; |
189 | 188 | ||
190 | /* | 189 | /* |
191 | * Locking will ensure that we don't have two callers in here | 190 | * Locking will ensure that we don't have two callers in here |
192 | * at one time. | 191 | * at one time. |
193 | */ | 192 | */ |
194 | newlen = XFS_IALLOC_INODES(args.mp); | 193 | newlen = XFS_IALLOC_INODES(args.mp); |
195 | if (args.mp->m_maxicount && | 194 | if (args.mp->m_maxicount && |
196 | args.mp->m_sb.sb_icount + newlen > args.mp->m_maxicount) | 195 | args.mp->m_sb.sb_icount + newlen > args.mp->m_maxicount) |
197 | return XFS_ERROR(ENOSPC); | 196 | return XFS_ERROR(ENOSPC); |
198 | args.minlen = args.maxlen = XFS_IALLOC_BLOCKS(args.mp); | 197 | args.minlen = args.maxlen = XFS_IALLOC_BLOCKS(args.mp); |
199 | /* | 198 | /* |
200 | * First try to allocate inodes contiguous with the last-allocated | 199 | * First try to allocate inodes contiguous with the last-allocated |
201 | * chunk of inodes. If the filesystem is striped, this will fill | 200 | * chunk of inodes. If the filesystem is striped, this will fill |
202 | * an entire stripe unit with inodes. | 201 | * an entire stripe unit with inodes. |
203 | */ | 202 | */ |
204 | agi = XFS_BUF_TO_AGI(agbp); | 203 | agi = XFS_BUF_TO_AGI(agbp); |
205 | newino = be32_to_cpu(agi->agi_newino); | 204 | newino = be32_to_cpu(agi->agi_newino); |
206 | args.agbno = XFS_AGINO_TO_AGBNO(args.mp, newino) + | 205 | args.agbno = XFS_AGINO_TO_AGBNO(args.mp, newino) + |
207 | XFS_IALLOC_BLOCKS(args.mp); | 206 | XFS_IALLOC_BLOCKS(args.mp); |
208 | if (likely(newino != NULLAGINO && | 207 | if (likely(newino != NULLAGINO && |
209 | (args.agbno < be32_to_cpu(agi->agi_length)))) { | 208 | (args.agbno < be32_to_cpu(agi->agi_length)))) { |
210 | args.fsbno = XFS_AGB_TO_FSB(args.mp, | 209 | args.fsbno = XFS_AGB_TO_FSB(args.mp, |
211 | be32_to_cpu(agi->agi_seqno), args.agbno); | 210 | be32_to_cpu(agi->agi_seqno), args.agbno); |
212 | args.type = XFS_ALLOCTYPE_THIS_BNO; | 211 | args.type = XFS_ALLOCTYPE_THIS_BNO; |
213 | args.mod = args.total = args.wasdel = args.isfl = | 212 | args.mod = args.total = args.wasdel = args.isfl = |
214 | args.userdata = args.minalignslop = 0; | 213 | args.userdata = args.minalignslop = 0; |
215 | args.prod = 1; | 214 | args.prod = 1; |
216 | 215 | ||
217 | /* | 216 | /* |
218 | * We need to take into account alignment here to ensure that | 217 | * We need to take into account alignment here to ensure that |
219 | * we don't modify the free list if we fail to have an exact | 218 | * we don't modify the free list if we fail to have an exact |
220 | * block. If we don't have an exact match, and every oher | 219 | * block. If we don't have an exact match, and every oher |
221 | * attempt allocation attempt fails, we'll end up cancelling | 220 | * attempt allocation attempt fails, we'll end up cancelling |
222 | * a dirty transaction and shutting down. | 221 | * a dirty transaction and shutting down. |
223 | * | 222 | * |
224 | * For an exact allocation, alignment must be 1, | 223 | * For an exact allocation, alignment must be 1, |
225 | * however we need to take cluster alignment into account when | 224 | * however we need to take cluster alignment into account when |
226 | * fixing up the freelist. Use the minalignslop field to | 225 | * fixing up the freelist. Use the minalignslop field to |
227 | * indicate that extra blocks might be required for alignment, | 226 | * indicate that extra blocks might be required for alignment, |
228 | * but not to use them in the actual exact allocation. | 227 | * but not to use them in the actual exact allocation. |
229 | */ | 228 | */ |
230 | args.alignment = 1; | 229 | args.alignment = 1; |
231 | args.minalignslop = xfs_ialloc_cluster_alignment(&args) - 1; | 230 | args.minalignslop = xfs_ialloc_cluster_alignment(&args) - 1; |
232 | 231 | ||
233 | /* Allow space for the inode btree to split. */ | 232 | /* Allow space for the inode btree to split. */ |
234 | args.minleft = XFS_IN_MAXLEVELS(args.mp) - 1; | 233 | args.minleft = XFS_IN_MAXLEVELS(args.mp) - 1; |
235 | if ((error = xfs_alloc_vextent(&args))) | 234 | if ((error = xfs_alloc_vextent(&args))) |
236 | return error; | 235 | return error; |
237 | } else | 236 | } else |
238 | args.fsbno = NULLFSBLOCK; | 237 | args.fsbno = NULLFSBLOCK; |
239 | 238 | ||
240 | if (unlikely(args.fsbno == NULLFSBLOCK)) { | 239 | if (unlikely(args.fsbno == NULLFSBLOCK)) { |
241 | /* | 240 | /* |
242 | * Set the alignment for the allocation. | 241 | * Set the alignment for the allocation. |
243 | * If stripe alignment is turned on then align at stripe unit | 242 | * If stripe alignment is turned on then align at stripe unit |
244 | * boundary. | 243 | * boundary. |
245 | * If the cluster size is smaller than a filesystem block | 244 | * If the cluster size is smaller than a filesystem block |
246 | * then we're doing I/O for inodes in filesystem block size | 245 | * then we're doing I/O for inodes in filesystem block size |
247 | * pieces, so don't need alignment anyway. | 246 | * pieces, so don't need alignment anyway. |
248 | */ | 247 | */ |
249 | isaligned = 0; | 248 | isaligned = 0; |
250 | if (args.mp->m_sinoalign) { | 249 | if (args.mp->m_sinoalign) { |
251 | ASSERT(!(args.mp->m_flags & XFS_MOUNT_NOALIGN)); | 250 | ASSERT(!(args.mp->m_flags & XFS_MOUNT_NOALIGN)); |
252 | args.alignment = args.mp->m_dalign; | 251 | args.alignment = args.mp->m_dalign; |
253 | isaligned = 1; | 252 | isaligned = 1; |
254 | } else | 253 | } else |
255 | args.alignment = xfs_ialloc_cluster_alignment(&args); | 254 | args.alignment = xfs_ialloc_cluster_alignment(&args); |
256 | /* | 255 | /* |
257 | * Need to figure out where to allocate the inode blocks. | 256 | * Need to figure out where to allocate the inode blocks. |
258 | * Ideally they should be spaced out through the a.g. | 257 | * Ideally they should be spaced out through the a.g. |
259 | * For now, just allocate blocks up front. | 258 | * For now, just allocate blocks up front. |
260 | */ | 259 | */ |
261 | args.agbno = be32_to_cpu(agi->agi_root); | 260 | args.agbno = be32_to_cpu(agi->agi_root); |
262 | args.fsbno = XFS_AGB_TO_FSB(args.mp, | 261 | args.fsbno = XFS_AGB_TO_FSB(args.mp, |
263 | be32_to_cpu(agi->agi_seqno), args.agbno); | 262 | be32_to_cpu(agi->agi_seqno), args.agbno); |
264 | /* | 263 | /* |
265 | * Allocate a fixed-size extent of inodes. | 264 | * Allocate a fixed-size extent of inodes. |
266 | */ | 265 | */ |
267 | args.type = XFS_ALLOCTYPE_NEAR_BNO; | 266 | args.type = XFS_ALLOCTYPE_NEAR_BNO; |
268 | args.mod = args.total = args.wasdel = args.isfl = | 267 | args.mod = args.total = args.wasdel = args.isfl = |
269 | args.userdata = args.minalignslop = 0; | 268 | args.userdata = args.minalignslop = 0; |
270 | args.prod = 1; | 269 | args.prod = 1; |
271 | /* | 270 | /* |
272 | * Allow space for the inode btree to split. | 271 | * Allow space for the inode btree to split. |
273 | */ | 272 | */ |
274 | args.minleft = XFS_IN_MAXLEVELS(args.mp) - 1; | 273 | args.minleft = XFS_IN_MAXLEVELS(args.mp) - 1; |
275 | if ((error = xfs_alloc_vextent(&args))) | 274 | if ((error = xfs_alloc_vextent(&args))) |
276 | return error; | 275 | return error; |
277 | } | 276 | } |
278 | 277 | ||
279 | /* | 278 | /* |
280 | * If stripe alignment is turned on, then try again with cluster | 279 | * If stripe alignment is turned on, then try again with cluster |
281 | * alignment. | 280 | * alignment. |
282 | */ | 281 | */ |
283 | if (isaligned && args.fsbno == NULLFSBLOCK) { | 282 | if (isaligned && args.fsbno == NULLFSBLOCK) { |
284 | args.type = XFS_ALLOCTYPE_NEAR_BNO; | 283 | args.type = XFS_ALLOCTYPE_NEAR_BNO; |
285 | args.agbno = be32_to_cpu(agi->agi_root); | 284 | args.agbno = be32_to_cpu(agi->agi_root); |
286 | args.fsbno = XFS_AGB_TO_FSB(args.mp, | 285 | args.fsbno = XFS_AGB_TO_FSB(args.mp, |
287 | be32_to_cpu(agi->agi_seqno), args.agbno); | 286 | be32_to_cpu(agi->agi_seqno), args.agbno); |
288 | args.alignment = xfs_ialloc_cluster_alignment(&args); | 287 | args.alignment = xfs_ialloc_cluster_alignment(&args); |
289 | if ((error = xfs_alloc_vextent(&args))) | 288 | if ((error = xfs_alloc_vextent(&args))) |
290 | return error; | 289 | return error; |
291 | } | 290 | } |
292 | 291 | ||
293 | if (args.fsbno == NULLFSBLOCK) { | 292 | if (args.fsbno == NULLFSBLOCK) { |
294 | *alloc = 0; | 293 | *alloc = 0; |
295 | return 0; | 294 | return 0; |
296 | } | 295 | } |
297 | ASSERT(args.len == args.minlen); | 296 | ASSERT(args.len == args.minlen); |
298 | /* | 297 | /* |
299 | * Convert the results. | 298 | * Convert the results. |
300 | */ | 299 | */ |
301 | newino = XFS_OFFBNO_TO_AGINO(args.mp, args.agbno, 0); | 300 | newino = XFS_OFFBNO_TO_AGINO(args.mp, args.agbno, 0); |
302 | /* | 301 | /* |
303 | * Loop over the new block(s), filling in the inodes. | 302 | * Loop over the new block(s), filling in the inodes. |
304 | * For small block sizes, manipulate the inodes in buffers | 303 | * For small block sizes, manipulate the inodes in buffers |
305 | * which are multiples of the blocks size. | 304 | * which are multiples of the blocks size. |
306 | */ | 305 | */ |
307 | if (args.mp->m_sb.sb_blocksize >= XFS_INODE_CLUSTER_SIZE(args.mp)) { | 306 | if (args.mp->m_sb.sb_blocksize >= XFS_INODE_CLUSTER_SIZE(args.mp)) { |
308 | blks_per_cluster = 1; | 307 | blks_per_cluster = 1; |
309 | nbufs = (int)args.len; | 308 | nbufs = (int)args.len; |
310 | ninodes = args.mp->m_sb.sb_inopblock; | 309 | ninodes = args.mp->m_sb.sb_inopblock; |
311 | } else { | 310 | } else { |
312 | blks_per_cluster = XFS_INODE_CLUSTER_SIZE(args.mp) / | 311 | blks_per_cluster = XFS_INODE_CLUSTER_SIZE(args.mp) / |
313 | args.mp->m_sb.sb_blocksize; | 312 | args.mp->m_sb.sb_blocksize; |
314 | nbufs = (int)args.len / blks_per_cluster; | 313 | nbufs = (int)args.len / blks_per_cluster; |
315 | ninodes = blks_per_cluster * args.mp->m_sb.sb_inopblock; | 314 | ninodes = blks_per_cluster * args.mp->m_sb.sb_inopblock; |
316 | } | 315 | } |
317 | /* | 316 | /* |
318 | * Figure out what version number to use in the inodes we create. | 317 | * Figure out what version number to use in the inodes we create. |
319 | * If the superblock version has caught up to the one that supports | 318 | * If the superblock version has caught up to the one that supports |
320 | * the new inode format, then use the new inode version. Otherwise | 319 | * the new inode format, then use the new inode version. Otherwise |
321 | * use the old version so that old kernels will continue to be | 320 | * use the old version so that old kernels will continue to be |
322 | * able to use the file system. | 321 | * able to use the file system. |
323 | */ | 322 | */ |
324 | if (xfs_sb_version_hasnlink(&args.mp->m_sb)) | 323 | if (xfs_sb_version_hasnlink(&args.mp->m_sb)) |
325 | version = 2; | 324 | version = 2; |
326 | else | 325 | else |
327 | version = 1; | 326 | version = 1; |
328 | 327 | ||
329 | /* | 328 | /* |
330 | * Seed the new inode cluster with a random generation number. This | 329 | * Seed the new inode cluster with a random generation number. This |
331 | * prevents short-term reuse of generation numbers if a chunk is | 330 | * prevents short-term reuse of generation numbers if a chunk is |
332 | * freed and then immediately reallocated. We use random numbers | 331 | * freed and then immediately reallocated. We use random numbers |
333 | * rather than a linear progression to prevent the next generation | 332 | * rather than a linear progression to prevent the next generation |
334 | * number from being easily guessable. | 333 | * number from being easily guessable. |
335 | */ | 334 | */ |
336 | gen = random32(); | 335 | gen = random32(); |
337 | for (j = 0; j < nbufs; j++) { | 336 | for (j = 0; j < nbufs; j++) { |
338 | /* | 337 | /* |
339 | * Get the block. | 338 | * Get the block. |
340 | */ | 339 | */ |
341 | d = XFS_AGB_TO_DADDR(args.mp, be32_to_cpu(agi->agi_seqno), | 340 | d = XFS_AGB_TO_DADDR(args.mp, be32_to_cpu(agi->agi_seqno), |
342 | args.agbno + (j * blks_per_cluster)); | 341 | args.agbno + (j * blks_per_cluster)); |
343 | fbuf = xfs_trans_get_buf(tp, args.mp->m_ddev_targp, d, | 342 | fbuf = xfs_trans_get_buf(tp, args.mp->m_ddev_targp, d, |
344 | args.mp->m_bsize * blks_per_cluster, | 343 | args.mp->m_bsize * blks_per_cluster, |
345 | XFS_BUF_LOCK); | 344 | XFS_BUF_LOCK); |
346 | ASSERT(fbuf); | 345 | ASSERT(fbuf); |
347 | ASSERT(!XFS_BUF_GETERROR(fbuf)); | 346 | ASSERT(!XFS_BUF_GETERROR(fbuf)); |
348 | 347 | ||
349 | /* | 348 | /* |
350 | * Initialize all inodes in this buffer and then log them. | 349 | * Initialize all inodes in this buffer and then log them. |
351 | * | 350 | * |
352 | * XXX: It would be much better if we had just one transaction to | 351 | * XXX: It would be much better if we had just one transaction to |
353 | * log a whole cluster of inodes instead of all the indivdual | 352 | * log a whole cluster of inodes instead of all the indivdual |
354 | * transactions causing a lot of log traffic. | 353 | * transactions causing a lot of log traffic. |
355 | */ | 354 | */ |
356 | xfs_biozero(fbuf, 0, ninodes << args.mp->m_sb.sb_inodelog); | 355 | xfs_biozero(fbuf, 0, ninodes << args.mp->m_sb.sb_inodelog); |
357 | for (i = 0; i < ninodes; i++) { | 356 | for (i = 0; i < ninodes; i++) { |
358 | int ioffset = i << args.mp->m_sb.sb_inodelog; | 357 | int ioffset = i << args.mp->m_sb.sb_inodelog; |
359 | uint isize = sizeof(struct xfs_dinode); | 358 | uint isize = sizeof(struct xfs_dinode); |
360 | 359 | ||
361 | free = XFS_MAKE_IPTR(args.mp, fbuf, i); | 360 | free = XFS_MAKE_IPTR(args.mp, fbuf, i); |
362 | free->di_magic = cpu_to_be16(XFS_DINODE_MAGIC); | 361 | free->di_magic = cpu_to_be16(XFS_DINODE_MAGIC); |
363 | free->di_version = version; | 362 | free->di_version = version; |
364 | free->di_gen = cpu_to_be32(gen); | 363 | free->di_gen = cpu_to_be32(gen); |
365 | free->di_next_unlinked = cpu_to_be32(NULLAGINO); | 364 | free->di_next_unlinked = cpu_to_be32(NULLAGINO); |
366 | xfs_trans_log_buf(tp, fbuf, ioffset, ioffset + isize - 1); | 365 | xfs_trans_log_buf(tp, fbuf, ioffset, ioffset + isize - 1); |
367 | } | 366 | } |
368 | xfs_trans_inode_alloc_buf(tp, fbuf); | 367 | xfs_trans_inode_alloc_buf(tp, fbuf); |
369 | } | 368 | } |
370 | be32_add_cpu(&agi->agi_count, newlen); | 369 | be32_add_cpu(&agi->agi_count, newlen); |
371 | be32_add_cpu(&agi->agi_freecount, newlen); | 370 | be32_add_cpu(&agi->agi_freecount, newlen); |
372 | agno = be32_to_cpu(agi->agi_seqno); | 371 | agno = be32_to_cpu(agi->agi_seqno); |
373 | down_read(&args.mp->m_peraglock); | 372 | down_read(&args.mp->m_peraglock); |
374 | args.mp->m_perag[agno].pagi_freecount += newlen; | 373 | args.mp->m_perag[agno].pagi_freecount += newlen; |
375 | up_read(&args.mp->m_peraglock); | 374 | up_read(&args.mp->m_peraglock); |
376 | agi->agi_newino = cpu_to_be32(newino); | 375 | agi->agi_newino = cpu_to_be32(newino); |
377 | /* | 376 | /* |
378 | * Insert records describing the new inode chunk into the btree. | 377 | * Insert records describing the new inode chunk into the btree. |
379 | */ | 378 | */ |
380 | cur = xfs_inobt_init_cursor(args.mp, tp, agbp, agno); | 379 | cur = xfs_inobt_init_cursor(args.mp, tp, agbp, agno); |
381 | for (thisino = newino; | 380 | for (thisino = newino; |
382 | thisino < newino + newlen; | 381 | thisino < newino + newlen; |
383 | thisino += XFS_INODES_PER_CHUNK) { | 382 | thisino += XFS_INODES_PER_CHUNK) { |
384 | if ((error = xfs_inobt_lookup_eq(cur, thisino, | 383 | if ((error = xfs_inobt_lookup_eq(cur, thisino, |
385 | XFS_INODES_PER_CHUNK, XFS_INOBT_ALL_FREE, &i))) { | 384 | XFS_INODES_PER_CHUNK, XFS_INOBT_ALL_FREE, &i))) { |
386 | xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); | 385 | xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); |
387 | return error; | 386 | return error; |
388 | } | 387 | } |
389 | ASSERT(i == 0); | 388 | ASSERT(i == 0); |
390 | if ((error = xfs_btree_insert(cur, &i))) { | 389 | if ((error = xfs_btree_insert(cur, &i))) { |
391 | xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); | 390 | xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); |
392 | return error; | 391 | return error; |
393 | } | 392 | } |
394 | ASSERT(i == 1); | 393 | ASSERT(i == 1); |
395 | } | 394 | } |
396 | xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); | 395 | xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); |
397 | /* | 396 | /* |
398 | * Log allocation group header fields | 397 | * Log allocation group header fields |
399 | */ | 398 | */ |
400 | xfs_ialloc_log_agi(tp, agbp, | 399 | xfs_ialloc_log_agi(tp, agbp, |
401 | XFS_AGI_COUNT | XFS_AGI_FREECOUNT | XFS_AGI_NEWINO); | 400 | XFS_AGI_COUNT | XFS_AGI_FREECOUNT | XFS_AGI_NEWINO); |
402 | /* | 401 | /* |
403 | * Modify/log superblock values for inode count and inode free count. | 402 | * Modify/log superblock values for inode count and inode free count. |
404 | */ | 403 | */ |
405 | xfs_trans_mod_sb(tp, XFS_TRANS_SB_ICOUNT, (long)newlen); | 404 | xfs_trans_mod_sb(tp, XFS_TRANS_SB_ICOUNT, (long)newlen); |
406 | xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, (long)newlen); | 405 | xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, (long)newlen); |
407 | *alloc = 1; | 406 | *alloc = 1; |
408 | return 0; | 407 | return 0; |
409 | } | 408 | } |
410 | 409 | ||
411 | STATIC_INLINE xfs_agnumber_t | 410 | STATIC_INLINE xfs_agnumber_t |
412 | xfs_ialloc_next_ag( | 411 | xfs_ialloc_next_ag( |
413 | xfs_mount_t *mp) | 412 | xfs_mount_t *mp) |
414 | { | 413 | { |
415 | xfs_agnumber_t agno; | 414 | xfs_agnumber_t agno; |
416 | 415 | ||
417 | spin_lock(&mp->m_agirotor_lock); | 416 | spin_lock(&mp->m_agirotor_lock); |
418 | agno = mp->m_agirotor; | 417 | agno = mp->m_agirotor; |
419 | if (++mp->m_agirotor == mp->m_maxagi) | 418 | if (++mp->m_agirotor == mp->m_maxagi) |
420 | mp->m_agirotor = 0; | 419 | mp->m_agirotor = 0; |
421 | spin_unlock(&mp->m_agirotor_lock); | 420 | spin_unlock(&mp->m_agirotor_lock); |
422 | 421 | ||
423 | return agno; | 422 | return agno; |
424 | } | 423 | } |
425 | 424 | ||
426 | /* | 425 | /* |
427 | * Select an allocation group to look for a free inode in, based on the parent | 426 | * Select an allocation group to look for a free inode in, based on the parent |
428 | * inode and then mode. Return the allocation group buffer. | 427 | * inode and then mode. Return the allocation group buffer. |
429 | */ | 428 | */ |
430 | STATIC xfs_buf_t * /* allocation group buffer */ | 429 | STATIC xfs_buf_t * /* allocation group buffer */ |
431 | xfs_ialloc_ag_select( | 430 | xfs_ialloc_ag_select( |
432 | xfs_trans_t *tp, /* transaction pointer */ | 431 | xfs_trans_t *tp, /* transaction pointer */ |
433 | xfs_ino_t parent, /* parent directory inode number */ | 432 | xfs_ino_t parent, /* parent directory inode number */ |
434 | mode_t mode, /* bits set to indicate file type */ | 433 | mode_t mode, /* bits set to indicate file type */ |
435 | int okalloc) /* ok to allocate more space */ | 434 | int okalloc) /* ok to allocate more space */ |
436 | { | 435 | { |
437 | xfs_buf_t *agbp; /* allocation group header buffer */ | 436 | xfs_buf_t *agbp; /* allocation group header buffer */ |
438 | xfs_agnumber_t agcount; /* number of ag's in the filesystem */ | 437 | xfs_agnumber_t agcount; /* number of ag's in the filesystem */ |
439 | xfs_agnumber_t agno; /* current ag number */ | 438 | xfs_agnumber_t agno; /* current ag number */ |
440 | int flags; /* alloc buffer locking flags */ | 439 | int flags; /* alloc buffer locking flags */ |
441 | xfs_extlen_t ineed; /* blocks needed for inode allocation */ | 440 | xfs_extlen_t ineed; /* blocks needed for inode allocation */ |
442 | xfs_extlen_t longest = 0; /* longest extent available */ | 441 | xfs_extlen_t longest = 0; /* longest extent available */ |
443 | xfs_mount_t *mp; /* mount point structure */ | 442 | xfs_mount_t *mp; /* mount point structure */ |
444 | int needspace; /* file mode implies space allocated */ | 443 | int needspace; /* file mode implies space allocated */ |
445 | xfs_perag_t *pag; /* per allocation group data */ | 444 | xfs_perag_t *pag; /* per allocation group data */ |
446 | xfs_agnumber_t pagno; /* parent (starting) ag number */ | 445 | xfs_agnumber_t pagno; /* parent (starting) ag number */ |
447 | 446 | ||
448 | /* | 447 | /* |
449 | * Files of these types need at least one block if length > 0 | 448 | * Files of these types need at least one block if length > 0 |
450 | * (and they won't fit in the inode, but that's hard to figure out). | 449 | * (and they won't fit in the inode, but that's hard to figure out). |
451 | */ | 450 | */ |
452 | needspace = S_ISDIR(mode) || S_ISREG(mode) || S_ISLNK(mode); | 451 | needspace = S_ISDIR(mode) || S_ISREG(mode) || S_ISLNK(mode); |
453 | mp = tp->t_mountp; | 452 | mp = tp->t_mountp; |
454 | agcount = mp->m_maxagi; | 453 | agcount = mp->m_maxagi; |
455 | if (S_ISDIR(mode)) | 454 | if (S_ISDIR(mode)) |
456 | pagno = xfs_ialloc_next_ag(mp); | 455 | pagno = xfs_ialloc_next_ag(mp); |
457 | else { | 456 | else { |
458 | pagno = XFS_INO_TO_AGNO(mp, parent); | 457 | pagno = XFS_INO_TO_AGNO(mp, parent); |
459 | if (pagno >= agcount) | 458 | if (pagno >= agcount) |
460 | pagno = 0; | 459 | pagno = 0; |
461 | } | 460 | } |
462 | ASSERT(pagno < agcount); | 461 | ASSERT(pagno < agcount); |
463 | /* | 462 | /* |
464 | * Loop through allocation groups, looking for one with a little | 463 | * Loop through allocation groups, looking for one with a little |
465 | * free space in it. Note we don't look for free inodes, exactly. | 464 | * free space in it. Note we don't look for free inodes, exactly. |
466 | * Instead, we include whether there is a need to allocate inodes | 465 | * Instead, we include whether there is a need to allocate inodes |
467 | * to mean that blocks must be allocated for them, | 466 | * to mean that blocks must be allocated for them, |
468 | * if none are currently free. | 467 | * if none are currently free. |
469 | */ | 468 | */ |
470 | agno = pagno; | 469 | agno = pagno; |
471 | flags = XFS_ALLOC_FLAG_TRYLOCK; | 470 | flags = XFS_ALLOC_FLAG_TRYLOCK; |
472 | down_read(&mp->m_peraglock); | 471 | down_read(&mp->m_peraglock); |
473 | for (;;) { | 472 | for (;;) { |
474 | pag = &mp->m_perag[agno]; | 473 | pag = &mp->m_perag[agno]; |
475 | if (!pag->pagi_init) { | 474 | if (!pag->pagi_init) { |
476 | if (xfs_ialloc_read_agi(mp, tp, agno, &agbp)) { | 475 | if (xfs_ialloc_read_agi(mp, tp, agno, &agbp)) { |
477 | agbp = NULL; | 476 | agbp = NULL; |
478 | goto nextag; | 477 | goto nextag; |
479 | } | 478 | } |
480 | } else | 479 | } else |
481 | agbp = NULL; | 480 | agbp = NULL; |
482 | 481 | ||
483 | if (!pag->pagi_inodeok) { | 482 | if (!pag->pagi_inodeok) { |
484 | xfs_ialloc_next_ag(mp); | 483 | xfs_ialloc_next_ag(mp); |
485 | goto unlock_nextag; | 484 | goto unlock_nextag; |
486 | } | 485 | } |
487 | 486 | ||
488 | /* | 487 | /* |
489 | * Is there enough free space for the file plus a block | 488 | * Is there enough free space for the file plus a block |
490 | * of inodes (if we need to allocate some)? | 489 | * of inodes (if we need to allocate some)? |
491 | */ | 490 | */ |
492 | ineed = pag->pagi_freecount ? 0 : XFS_IALLOC_BLOCKS(mp); | 491 | ineed = pag->pagi_freecount ? 0 : XFS_IALLOC_BLOCKS(mp); |
493 | if (ineed && !pag->pagf_init) { | 492 | if (ineed && !pag->pagf_init) { |
494 | if (agbp == NULL && | 493 | if (agbp == NULL && |
495 | xfs_ialloc_read_agi(mp, tp, agno, &agbp)) { | 494 | xfs_ialloc_read_agi(mp, tp, agno, &agbp)) { |
496 | agbp = NULL; | 495 | agbp = NULL; |
497 | goto nextag; | 496 | goto nextag; |
498 | } | 497 | } |
499 | (void)xfs_alloc_pagf_init(mp, tp, agno, flags); | 498 | (void)xfs_alloc_pagf_init(mp, tp, agno, flags); |
500 | } | 499 | } |
501 | if (!ineed || pag->pagf_init) { | 500 | if (!ineed || pag->pagf_init) { |
502 | if (ineed && !(longest = pag->pagf_longest)) | 501 | if (ineed && !(longest = pag->pagf_longest)) |
503 | longest = pag->pagf_flcount > 0; | 502 | longest = pag->pagf_flcount > 0; |
504 | if (!ineed || | 503 | if (!ineed || |
505 | (pag->pagf_freeblks >= needspace + ineed && | 504 | (pag->pagf_freeblks >= needspace + ineed && |
506 | longest >= ineed && | 505 | longest >= ineed && |
507 | okalloc)) { | 506 | okalloc)) { |
508 | if (agbp == NULL && | 507 | if (agbp == NULL && |
509 | xfs_ialloc_read_agi(mp, tp, agno, &agbp)) { | 508 | xfs_ialloc_read_agi(mp, tp, agno, &agbp)) { |
510 | agbp = NULL; | 509 | agbp = NULL; |
511 | goto nextag; | 510 | goto nextag; |
512 | } | 511 | } |
513 | up_read(&mp->m_peraglock); | 512 | up_read(&mp->m_peraglock); |
514 | return agbp; | 513 | return agbp; |
515 | } | 514 | } |
516 | } | 515 | } |
517 | unlock_nextag: | 516 | unlock_nextag: |
518 | if (agbp) | 517 | if (agbp) |
519 | xfs_trans_brelse(tp, agbp); | 518 | xfs_trans_brelse(tp, agbp); |
520 | nextag: | 519 | nextag: |
521 | /* | 520 | /* |
522 | * No point in iterating over the rest, if we're shutting | 521 | * No point in iterating over the rest, if we're shutting |
523 | * down. | 522 | * down. |
524 | */ | 523 | */ |
525 | if (XFS_FORCED_SHUTDOWN(mp)) { | 524 | if (XFS_FORCED_SHUTDOWN(mp)) { |
526 | up_read(&mp->m_peraglock); | 525 | up_read(&mp->m_peraglock); |
527 | return NULL; | 526 | return NULL; |
528 | } | 527 | } |
529 | agno++; | 528 | agno++; |
530 | if (agno >= agcount) | 529 | if (agno >= agcount) |
531 | agno = 0; | 530 | agno = 0; |
532 | if (agno == pagno) { | 531 | if (agno == pagno) { |
533 | if (flags == 0) { | 532 | if (flags == 0) { |
534 | up_read(&mp->m_peraglock); | 533 | up_read(&mp->m_peraglock); |
535 | return NULL; | 534 | return NULL; |
536 | } | 535 | } |
537 | flags = 0; | 536 | flags = 0; |
538 | } | 537 | } |
539 | } | 538 | } |
540 | } | 539 | } |
541 | 540 | ||
542 | /* | 541 | /* |
543 | * Visible inode allocation functions. | 542 | * Visible inode allocation functions. |
544 | */ | 543 | */ |
545 | 544 | ||
546 | /* | 545 | /* |
547 | * Allocate an inode on disk. | 546 | * Allocate an inode on disk. |
548 | * Mode is used to tell whether the new inode will need space, and whether | 547 | * Mode is used to tell whether the new inode will need space, and whether |
549 | * it is a directory. | 548 | * it is a directory. |
550 | * | 549 | * |
551 | * The arguments IO_agbp and alloc_done are defined to work within | 550 | * The arguments IO_agbp and alloc_done are defined to work within |
552 | * the constraint of one allocation per transaction. | 551 | * the constraint of one allocation per transaction. |
553 | * xfs_dialloc() is designed to be called twice if it has to do an | 552 | * xfs_dialloc() is designed to be called twice if it has to do an |
554 | * allocation to make more free inodes. On the first call, | 553 | * allocation to make more free inodes. On the first call, |
555 | * IO_agbp should be set to NULL. If an inode is available, | 554 | * IO_agbp should be set to NULL. If an inode is available, |
556 | * i.e., xfs_dialloc() did not need to do an allocation, an inode | 555 | * i.e., xfs_dialloc() did not need to do an allocation, an inode |
557 | * number is returned. In this case, IO_agbp would be set to the | 556 | * number is returned. In this case, IO_agbp would be set to the |
558 | * current ag_buf and alloc_done set to false. | 557 | * current ag_buf and alloc_done set to false. |
559 | * If an allocation needed to be done, xfs_dialloc would return | 558 | * If an allocation needed to be done, xfs_dialloc would return |
560 | * the current ag_buf in IO_agbp and set alloc_done to true. | 559 | * the current ag_buf in IO_agbp and set alloc_done to true. |
561 | * The caller should then commit the current transaction, allocate a new | 560 | * The caller should then commit the current transaction, allocate a new |
562 | * transaction, and call xfs_dialloc() again, passing in the previous | 561 | * transaction, and call xfs_dialloc() again, passing in the previous |
563 | * value of IO_agbp. IO_agbp should be held across the transactions. | 562 | * value of IO_agbp. IO_agbp should be held across the transactions. |
564 | * Since the agbp is locked across the two calls, the second call is | 563 | * Since the agbp is locked across the two calls, the second call is |
565 | * guaranteed to have a free inode available. | 564 | * guaranteed to have a free inode available. |
566 | * | 565 | * |
567 | * Once we successfully pick an inode its number is returned and the | 566 | * Once we successfully pick an inode its number is returned and the |
568 | * on-disk data structures are updated. The inode itself is not read | 567 | * on-disk data structures are updated. The inode itself is not read |
569 | * in, since doing so would break ordering constraints with xfs_reclaim. | 568 | * in, since doing so would break ordering constraints with xfs_reclaim. |
570 | */ | 569 | */ |
571 | int | 570 | int |
572 | xfs_dialloc( | 571 | xfs_dialloc( |
573 | xfs_trans_t *tp, /* transaction pointer */ | 572 | xfs_trans_t *tp, /* transaction pointer */ |
574 | xfs_ino_t parent, /* parent inode (directory) */ | 573 | xfs_ino_t parent, /* parent inode (directory) */ |
575 | mode_t mode, /* mode bits for new inode */ | 574 | mode_t mode, /* mode bits for new inode */ |
576 | int okalloc, /* ok to allocate more space */ | 575 | int okalloc, /* ok to allocate more space */ |
577 | xfs_buf_t **IO_agbp, /* in/out ag header's buffer */ | 576 | xfs_buf_t **IO_agbp, /* in/out ag header's buffer */ |
578 | boolean_t *alloc_done, /* true if we needed to replenish | 577 | boolean_t *alloc_done, /* true if we needed to replenish |
579 | inode freelist */ | 578 | inode freelist */ |
580 | xfs_ino_t *inop) /* inode number allocated */ | 579 | xfs_ino_t *inop) /* inode number allocated */ |
581 | { | 580 | { |
582 | xfs_agnumber_t agcount; /* number of allocation groups */ | 581 | xfs_agnumber_t agcount; /* number of allocation groups */ |
583 | xfs_buf_t *agbp; /* allocation group header's buffer */ | 582 | xfs_buf_t *agbp; /* allocation group header's buffer */ |
584 | xfs_agnumber_t agno; /* allocation group number */ | 583 | xfs_agnumber_t agno; /* allocation group number */ |
585 | xfs_agi_t *agi; /* allocation group header structure */ | 584 | xfs_agi_t *agi; /* allocation group header structure */ |
586 | xfs_btree_cur_t *cur; /* inode allocation btree cursor */ | 585 | xfs_btree_cur_t *cur; /* inode allocation btree cursor */ |
587 | int error; /* error return value */ | 586 | int error; /* error return value */ |
588 | int i; /* result code */ | 587 | int i; /* result code */ |
589 | int ialloced; /* inode allocation status */ | 588 | int ialloced; /* inode allocation status */ |
590 | int noroom = 0; /* no space for inode blk allocation */ | 589 | int noroom = 0; /* no space for inode blk allocation */ |
591 | xfs_ino_t ino; /* fs-relative inode to be returned */ | 590 | xfs_ino_t ino; /* fs-relative inode to be returned */ |
592 | /* REFERENCED */ | 591 | /* REFERENCED */ |
593 | int j; /* result code */ | 592 | int j; /* result code */ |
594 | xfs_mount_t *mp; /* file system mount structure */ | 593 | xfs_mount_t *mp; /* file system mount structure */ |
595 | int offset; /* index of inode in chunk */ | 594 | int offset; /* index of inode in chunk */ |
596 | xfs_agino_t pagino; /* parent's a.g. relative inode # */ | 595 | xfs_agino_t pagino; /* parent's a.g. relative inode # */ |
597 | xfs_agnumber_t pagno; /* parent's allocation group number */ | 596 | xfs_agnumber_t pagno; /* parent's allocation group number */ |
598 | xfs_inobt_rec_incore_t rec; /* inode allocation record */ | 597 | xfs_inobt_rec_incore_t rec; /* inode allocation record */ |
599 | xfs_agnumber_t tagno; /* testing allocation group number */ | 598 | xfs_agnumber_t tagno; /* testing allocation group number */ |
600 | xfs_btree_cur_t *tcur; /* temp cursor */ | 599 | xfs_btree_cur_t *tcur; /* temp cursor */ |
601 | xfs_inobt_rec_incore_t trec; /* temp inode allocation record */ | 600 | xfs_inobt_rec_incore_t trec; /* temp inode allocation record */ |
602 | 601 | ||
603 | 602 | ||
604 | if (*IO_agbp == NULL) { | 603 | if (*IO_agbp == NULL) { |
605 | /* | 604 | /* |
606 | * We do not have an agbp, so select an initial allocation | 605 | * We do not have an agbp, so select an initial allocation |
607 | * group for inode allocation. | 606 | * group for inode allocation. |
608 | */ | 607 | */ |
609 | agbp = xfs_ialloc_ag_select(tp, parent, mode, okalloc); | 608 | agbp = xfs_ialloc_ag_select(tp, parent, mode, okalloc); |
610 | /* | 609 | /* |
611 | * Couldn't find an allocation group satisfying the | 610 | * Couldn't find an allocation group satisfying the |
612 | * criteria, give up. | 611 | * criteria, give up. |
613 | */ | 612 | */ |
614 | if (!agbp) { | 613 | if (!agbp) { |
615 | *inop = NULLFSINO; | 614 | *inop = NULLFSINO; |
616 | return 0; | 615 | return 0; |
617 | } | 616 | } |
618 | agi = XFS_BUF_TO_AGI(agbp); | 617 | agi = XFS_BUF_TO_AGI(agbp); |
619 | ASSERT(be32_to_cpu(agi->agi_magicnum) == XFS_AGI_MAGIC); | 618 | ASSERT(be32_to_cpu(agi->agi_magicnum) == XFS_AGI_MAGIC); |
620 | } else { | 619 | } else { |
621 | /* | 620 | /* |
622 | * Continue where we left off before. In this case, we | 621 | * Continue where we left off before. In this case, we |
623 | * know that the allocation group has free inodes. | 622 | * know that the allocation group has free inodes. |
624 | */ | 623 | */ |
625 | agbp = *IO_agbp; | 624 | agbp = *IO_agbp; |
626 | agi = XFS_BUF_TO_AGI(agbp); | 625 | agi = XFS_BUF_TO_AGI(agbp); |
627 | ASSERT(be32_to_cpu(agi->agi_magicnum) == XFS_AGI_MAGIC); | 626 | ASSERT(be32_to_cpu(agi->agi_magicnum) == XFS_AGI_MAGIC); |
628 | ASSERT(be32_to_cpu(agi->agi_freecount) > 0); | 627 | ASSERT(be32_to_cpu(agi->agi_freecount) > 0); |
629 | } | 628 | } |
630 | mp = tp->t_mountp; | 629 | mp = tp->t_mountp; |
631 | agcount = mp->m_sb.sb_agcount; | 630 | agcount = mp->m_sb.sb_agcount; |
632 | agno = be32_to_cpu(agi->agi_seqno); | 631 | agno = be32_to_cpu(agi->agi_seqno); |
633 | tagno = agno; | 632 | tagno = agno; |
634 | pagno = XFS_INO_TO_AGNO(mp, parent); | 633 | pagno = XFS_INO_TO_AGNO(mp, parent); |
635 | pagino = XFS_INO_TO_AGINO(mp, parent); | 634 | pagino = XFS_INO_TO_AGINO(mp, parent); |
636 | 635 | ||
637 | /* | 636 | /* |
638 | * If we have already hit the ceiling of inode blocks then clear | 637 | * If we have already hit the ceiling of inode blocks then clear |
639 | * okalloc so we scan all available agi structures for a free | 638 | * okalloc so we scan all available agi structures for a free |
640 | * inode. | 639 | * inode. |
641 | */ | 640 | */ |
642 | 641 | ||
643 | if (mp->m_maxicount && | 642 | if (mp->m_maxicount && |
644 | mp->m_sb.sb_icount + XFS_IALLOC_INODES(mp) > mp->m_maxicount) { | 643 | mp->m_sb.sb_icount + XFS_IALLOC_INODES(mp) > mp->m_maxicount) { |
645 | noroom = 1; | 644 | noroom = 1; |
646 | okalloc = 0; | 645 | okalloc = 0; |
647 | } | 646 | } |
648 | 647 | ||
649 | /* | 648 | /* |
650 | * Loop until we find an allocation group that either has free inodes | 649 | * Loop until we find an allocation group that either has free inodes |
651 | * or in which we can allocate some inodes. Iterate through the | 650 | * or in which we can allocate some inodes. Iterate through the |
652 | * allocation groups upward, wrapping at the end. | 651 | * allocation groups upward, wrapping at the end. |
653 | */ | 652 | */ |
654 | *alloc_done = B_FALSE; | 653 | *alloc_done = B_FALSE; |
655 | while (!agi->agi_freecount) { | 654 | while (!agi->agi_freecount) { |
656 | /* | 655 | /* |
657 | * Don't do anything if we're not supposed to allocate | 656 | * Don't do anything if we're not supposed to allocate |
658 | * any blocks, just go on to the next ag. | 657 | * any blocks, just go on to the next ag. |
659 | */ | 658 | */ |
660 | if (okalloc) { | 659 | if (okalloc) { |
661 | /* | 660 | /* |
662 | * Try to allocate some new inodes in the allocation | 661 | * Try to allocate some new inodes in the allocation |
663 | * group. | 662 | * group. |
664 | */ | 663 | */ |
665 | if ((error = xfs_ialloc_ag_alloc(tp, agbp, &ialloced))) { | 664 | if ((error = xfs_ialloc_ag_alloc(tp, agbp, &ialloced))) { |
666 | xfs_trans_brelse(tp, agbp); | 665 | xfs_trans_brelse(tp, agbp); |
667 | if (error == ENOSPC) { | 666 | if (error == ENOSPC) { |
668 | *inop = NULLFSINO; | 667 | *inop = NULLFSINO; |
669 | return 0; | 668 | return 0; |
670 | } else | 669 | } else |
671 | return error; | 670 | return error; |
672 | } | 671 | } |
673 | if (ialloced) { | 672 | if (ialloced) { |
674 | /* | 673 | /* |
675 | * We successfully allocated some inodes, return | 674 | * We successfully allocated some inodes, return |
676 | * the current context to the caller so that it | 675 | * the current context to the caller so that it |
677 | * can commit the current transaction and call | 676 | * can commit the current transaction and call |
678 | * us again where we left off. | 677 | * us again where we left off. |
679 | */ | 678 | */ |
680 | ASSERT(be32_to_cpu(agi->agi_freecount) > 0); | 679 | ASSERT(be32_to_cpu(agi->agi_freecount) > 0); |
681 | *alloc_done = B_TRUE; | 680 | *alloc_done = B_TRUE; |
682 | *IO_agbp = agbp; | 681 | *IO_agbp = agbp; |
683 | *inop = NULLFSINO; | 682 | *inop = NULLFSINO; |
684 | return 0; | 683 | return 0; |
685 | } | 684 | } |
686 | } | 685 | } |
687 | /* | 686 | /* |
688 | * If it failed, give up on this ag. | 687 | * If it failed, give up on this ag. |
689 | */ | 688 | */ |
690 | xfs_trans_brelse(tp, agbp); | 689 | xfs_trans_brelse(tp, agbp); |
691 | /* | 690 | /* |
692 | * Go on to the next ag: get its ag header. | 691 | * Go on to the next ag: get its ag header. |
693 | */ | 692 | */ |
694 | nextag: | 693 | nextag: |
695 | if (++tagno == agcount) | 694 | if (++tagno == agcount) |
696 | tagno = 0; | 695 | tagno = 0; |
697 | if (tagno == agno) { | 696 | if (tagno == agno) { |
698 | *inop = NULLFSINO; | 697 | *inop = NULLFSINO; |
699 | return noroom ? ENOSPC : 0; | 698 | return noroom ? ENOSPC : 0; |
700 | } | 699 | } |
701 | down_read(&mp->m_peraglock); | 700 | down_read(&mp->m_peraglock); |
702 | if (mp->m_perag[tagno].pagi_inodeok == 0) { | 701 | if (mp->m_perag[tagno].pagi_inodeok == 0) { |
703 | up_read(&mp->m_peraglock); | 702 | up_read(&mp->m_peraglock); |
704 | goto nextag; | 703 | goto nextag; |
705 | } | 704 | } |
706 | error = xfs_ialloc_read_agi(mp, tp, tagno, &agbp); | 705 | error = xfs_ialloc_read_agi(mp, tp, tagno, &agbp); |
707 | up_read(&mp->m_peraglock); | 706 | up_read(&mp->m_peraglock); |
708 | if (error) | 707 | if (error) |
709 | goto nextag; | 708 | goto nextag; |
710 | agi = XFS_BUF_TO_AGI(agbp); | 709 | agi = XFS_BUF_TO_AGI(agbp); |
711 | ASSERT(be32_to_cpu(agi->agi_magicnum) == XFS_AGI_MAGIC); | 710 | ASSERT(be32_to_cpu(agi->agi_magicnum) == XFS_AGI_MAGIC); |
712 | } | 711 | } |
713 | /* | 712 | /* |
714 | * Here with an allocation group that has a free inode. | 713 | * Here with an allocation group that has a free inode. |
715 | * Reset agno since we may have chosen a new ag in the | 714 | * Reset agno since we may have chosen a new ag in the |
716 | * loop above. | 715 | * loop above. |
717 | */ | 716 | */ |
718 | agno = tagno; | 717 | agno = tagno; |
719 | *IO_agbp = NULL; | 718 | *IO_agbp = NULL; |
720 | cur = xfs_inobt_init_cursor(mp, tp, agbp, be32_to_cpu(agi->agi_seqno)); | 719 | cur = xfs_inobt_init_cursor(mp, tp, agbp, be32_to_cpu(agi->agi_seqno)); |
721 | /* | 720 | /* |
722 | * If pagino is 0 (this is the root inode allocation) use newino. | 721 | * If pagino is 0 (this is the root inode allocation) use newino. |
723 | * This must work because we've just allocated some. | 722 | * This must work because we've just allocated some. |
724 | */ | 723 | */ |
725 | if (!pagino) | 724 | if (!pagino) |
726 | pagino = be32_to_cpu(agi->agi_newino); | 725 | pagino = be32_to_cpu(agi->agi_newino); |
727 | #ifdef DEBUG | 726 | #ifdef DEBUG |
728 | if (cur->bc_nlevels == 1) { | 727 | if (cur->bc_nlevels == 1) { |
729 | int freecount = 0; | 728 | int freecount = 0; |
730 | 729 | ||
731 | if ((error = xfs_inobt_lookup_ge(cur, 0, 0, 0, &i))) | 730 | if ((error = xfs_inobt_lookup_ge(cur, 0, 0, 0, &i))) |
732 | goto error0; | 731 | goto error0; |
733 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); | 732 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); |
734 | do { | 733 | do { |
735 | if ((error = xfs_inobt_get_rec(cur, &rec.ir_startino, | 734 | if ((error = xfs_inobt_get_rec(cur, &rec.ir_startino, |
736 | &rec.ir_freecount, &rec.ir_free, &i))) | 735 | &rec.ir_freecount, &rec.ir_free, &i))) |
737 | goto error0; | 736 | goto error0; |
738 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); | 737 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); |
739 | freecount += rec.ir_freecount; | 738 | freecount += rec.ir_freecount; |
740 | if ((error = xfs_btree_increment(cur, 0, &i))) | 739 | if ((error = xfs_btree_increment(cur, 0, &i))) |
741 | goto error0; | 740 | goto error0; |
742 | } while (i == 1); | 741 | } while (i == 1); |
743 | 742 | ||
744 | ASSERT(freecount == be32_to_cpu(agi->agi_freecount) || | 743 | ASSERT(freecount == be32_to_cpu(agi->agi_freecount) || |
745 | XFS_FORCED_SHUTDOWN(mp)); | 744 | XFS_FORCED_SHUTDOWN(mp)); |
746 | } | 745 | } |
747 | #endif | 746 | #endif |
748 | /* | 747 | /* |
749 | * If in the same a.g. as the parent, try to get near the parent. | 748 | * If in the same a.g. as the parent, try to get near the parent. |
750 | */ | 749 | */ |
751 | if (pagno == agno) { | 750 | if (pagno == agno) { |
752 | if ((error = xfs_inobt_lookup_le(cur, pagino, 0, 0, &i))) | 751 | if ((error = xfs_inobt_lookup_le(cur, pagino, 0, 0, &i))) |
753 | goto error0; | 752 | goto error0; |
754 | if (i != 0 && | 753 | if (i != 0 && |
755 | (error = xfs_inobt_get_rec(cur, &rec.ir_startino, | 754 | (error = xfs_inobt_get_rec(cur, &rec.ir_startino, |
756 | &rec.ir_freecount, &rec.ir_free, &j)) == 0 && | 755 | &rec.ir_freecount, &rec.ir_free, &j)) == 0 && |
757 | j == 1 && | 756 | j == 1 && |
758 | rec.ir_freecount > 0) { | 757 | rec.ir_freecount > 0) { |
759 | /* | 758 | /* |
760 | * Found a free inode in the same chunk | 759 | * Found a free inode in the same chunk |
761 | * as parent, done. | 760 | * as parent, done. |
762 | */ | 761 | */ |
763 | } | 762 | } |
764 | /* | 763 | /* |
765 | * In the same a.g. as parent, but parent's chunk is full. | 764 | * In the same a.g. as parent, but parent's chunk is full. |
766 | */ | 765 | */ |
767 | else { | 766 | else { |
768 | int doneleft; /* done, to the left */ | 767 | int doneleft; /* done, to the left */ |
769 | int doneright; /* done, to the right */ | 768 | int doneright; /* done, to the right */ |
770 | 769 | ||
771 | if (error) | 770 | if (error) |
772 | goto error0; | 771 | goto error0; |
773 | ASSERT(i == 1); | 772 | ASSERT(i == 1); |
774 | ASSERT(j == 1); | 773 | ASSERT(j == 1); |
775 | /* | 774 | /* |
776 | * Duplicate the cursor, search left & right | 775 | * Duplicate the cursor, search left & right |
777 | * simultaneously. | 776 | * simultaneously. |
778 | */ | 777 | */ |
779 | if ((error = xfs_btree_dup_cursor(cur, &tcur))) | 778 | if ((error = xfs_btree_dup_cursor(cur, &tcur))) |
780 | goto error0; | 779 | goto error0; |
781 | /* | 780 | /* |
782 | * Search left with tcur, back up 1 record. | 781 | * Search left with tcur, back up 1 record. |
783 | */ | 782 | */ |
784 | if ((error = xfs_btree_decrement(tcur, 0, &i))) | 783 | if ((error = xfs_btree_decrement(tcur, 0, &i))) |
785 | goto error1; | 784 | goto error1; |
786 | doneleft = !i; | 785 | doneleft = !i; |
787 | if (!doneleft) { | 786 | if (!doneleft) { |
788 | if ((error = xfs_inobt_get_rec(tcur, | 787 | if ((error = xfs_inobt_get_rec(tcur, |
789 | &trec.ir_startino, | 788 | &trec.ir_startino, |
790 | &trec.ir_freecount, | 789 | &trec.ir_freecount, |
791 | &trec.ir_free, &i))) | 790 | &trec.ir_free, &i))) |
792 | goto error1; | 791 | goto error1; |
793 | XFS_WANT_CORRUPTED_GOTO(i == 1, error1); | 792 | XFS_WANT_CORRUPTED_GOTO(i == 1, error1); |
794 | } | 793 | } |
795 | /* | 794 | /* |
796 | * Search right with cur, go forward 1 record. | 795 | * Search right with cur, go forward 1 record. |
797 | */ | 796 | */ |
798 | if ((error = xfs_btree_increment(cur, 0, &i))) | 797 | if ((error = xfs_btree_increment(cur, 0, &i))) |
799 | goto error1; | 798 | goto error1; |
800 | doneright = !i; | 799 | doneright = !i; |
801 | if (!doneright) { | 800 | if (!doneright) { |
802 | if ((error = xfs_inobt_get_rec(cur, | 801 | if ((error = xfs_inobt_get_rec(cur, |
803 | &rec.ir_startino, | 802 | &rec.ir_startino, |
804 | &rec.ir_freecount, | 803 | &rec.ir_freecount, |
805 | &rec.ir_free, &i))) | 804 | &rec.ir_free, &i))) |
806 | goto error1; | 805 | goto error1; |
807 | XFS_WANT_CORRUPTED_GOTO(i == 1, error1); | 806 | XFS_WANT_CORRUPTED_GOTO(i == 1, error1); |
808 | } | 807 | } |
809 | /* | 808 | /* |
810 | * Loop until we find the closest inode chunk | 809 | * Loop until we find the closest inode chunk |
811 | * with a free one. | 810 | * with a free one. |
812 | */ | 811 | */ |
813 | while (!doneleft || !doneright) { | 812 | while (!doneleft || !doneright) { |
814 | int useleft; /* using left inode | 813 | int useleft; /* using left inode |
815 | chunk this time */ | 814 | chunk this time */ |
816 | 815 | ||
817 | /* | 816 | /* |
818 | * Figure out which block is closer, | 817 | * Figure out which block is closer, |
819 | * if both are valid. | 818 | * if both are valid. |
820 | */ | 819 | */ |
821 | if (!doneleft && !doneright) | 820 | if (!doneleft && !doneright) |
822 | useleft = | 821 | useleft = |
823 | pagino - | 822 | pagino - |
824 | (trec.ir_startino + | 823 | (trec.ir_startino + |
825 | XFS_INODES_PER_CHUNK - 1) < | 824 | XFS_INODES_PER_CHUNK - 1) < |
826 | rec.ir_startino - pagino; | 825 | rec.ir_startino - pagino; |
827 | else | 826 | else |
828 | useleft = !doneleft; | 827 | useleft = !doneleft; |
829 | /* | 828 | /* |
830 | * If checking the left, does it have | 829 | * If checking the left, does it have |
831 | * free inodes? | 830 | * free inodes? |
832 | */ | 831 | */ |
833 | if (useleft && trec.ir_freecount) { | 832 | if (useleft && trec.ir_freecount) { |
834 | /* | 833 | /* |
835 | * Yes, set it up as the chunk to use. | 834 | * Yes, set it up as the chunk to use. |
836 | */ | 835 | */ |
837 | rec = trec; | 836 | rec = trec; |
838 | xfs_btree_del_cursor(cur, | 837 | xfs_btree_del_cursor(cur, |
839 | XFS_BTREE_NOERROR); | 838 | XFS_BTREE_NOERROR); |
840 | cur = tcur; | 839 | cur = tcur; |
841 | break; | 840 | break; |
842 | } | 841 | } |
843 | /* | 842 | /* |
844 | * If checking the right, does it have | 843 | * If checking the right, does it have |
845 | * free inodes? | 844 | * free inodes? |
846 | */ | 845 | */ |
847 | if (!useleft && rec.ir_freecount) { | 846 | if (!useleft && rec.ir_freecount) { |
848 | /* | 847 | /* |
849 | * Yes, it's already set up. | 848 | * Yes, it's already set up. |
850 | */ | 849 | */ |
851 | xfs_btree_del_cursor(tcur, | 850 | xfs_btree_del_cursor(tcur, |
852 | XFS_BTREE_NOERROR); | 851 | XFS_BTREE_NOERROR); |
853 | break; | 852 | break; |
854 | } | 853 | } |
855 | /* | 854 | /* |
856 | * If used the left, get another one | 855 | * If used the left, get another one |
857 | * further left. | 856 | * further left. |
858 | */ | 857 | */ |
859 | if (useleft) { | 858 | if (useleft) { |
860 | if ((error = xfs_btree_decrement(tcur, 0, | 859 | if ((error = xfs_btree_decrement(tcur, 0, |
861 | &i))) | 860 | &i))) |
862 | goto error1; | 861 | goto error1; |
863 | doneleft = !i; | 862 | doneleft = !i; |
864 | if (!doneleft) { | 863 | if (!doneleft) { |
865 | if ((error = xfs_inobt_get_rec( | 864 | if ((error = xfs_inobt_get_rec( |
866 | tcur, | 865 | tcur, |
867 | &trec.ir_startino, | 866 | &trec.ir_startino, |
868 | &trec.ir_freecount, | 867 | &trec.ir_freecount, |
869 | &trec.ir_free, &i))) | 868 | &trec.ir_free, &i))) |
870 | goto error1; | 869 | goto error1; |
871 | XFS_WANT_CORRUPTED_GOTO(i == 1, | 870 | XFS_WANT_CORRUPTED_GOTO(i == 1, |
872 | error1); | 871 | error1); |
873 | } | 872 | } |
874 | } | 873 | } |
875 | /* | 874 | /* |
876 | * If used the right, get another one | 875 | * If used the right, get another one |
877 | * further right. | 876 | * further right. |
878 | */ | 877 | */ |
879 | else { | 878 | else { |
880 | if ((error = xfs_btree_increment(cur, 0, | 879 | if ((error = xfs_btree_increment(cur, 0, |
881 | &i))) | 880 | &i))) |
882 | goto error1; | 881 | goto error1; |
883 | doneright = !i; | 882 | doneright = !i; |
884 | if (!doneright) { | 883 | if (!doneright) { |
885 | if ((error = xfs_inobt_get_rec( | 884 | if ((error = xfs_inobt_get_rec( |
886 | cur, | 885 | cur, |
887 | &rec.ir_startino, | 886 | &rec.ir_startino, |
888 | &rec.ir_freecount, | 887 | &rec.ir_freecount, |
889 | &rec.ir_free, &i))) | 888 | &rec.ir_free, &i))) |
890 | goto error1; | 889 | goto error1; |
891 | XFS_WANT_CORRUPTED_GOTO(i == 1, | 890 | XFS_WANT_CORRUPTED_GOTO(i == 1, |
892 | error1); | 891 | error1); |
893 | } | 892 | } |
894 | } | 893 | } |
895 | } | 894 | } |
896 | ASSERT(!doneleft || !doneright); | 895 | ASSERT(!doneleft || !doneright); |
897 | } | 896 | } |
898 | } | 897 | } |
899 | /* | 898 | /* |
900 | * In a different a.g. from the parent. | 899 | * In a different a.g. from the parent. |
901 | * See if the most recently allocated block has any free. | 900 | * See if the most recently allocated block has any free. |
902 | */ | 901 | */ |
903 | else if (be32_to_cpu(agi->agi_newino) != NULLAGINO) { | 902 | else if (be32_to_cpu(agi->agi_newino) != NULLAGINO) { |
904 | if ((error = xfs_inobt_lookup_eq(cur, | 903 | if ((error = xfs_inobt_lookup_eq(cur, |
905 | be32_to_cpu(agi->agi_newino), 0, 0, &i))) | 904 | be32_to_cpu(agi->agi_newino), 0, 0, &i))) |
906 | goto error0; | 905 | goto error0; |
907 | if (i == 1 && | 906 | if (i == 1 && |
908 | (error = xfs_inobt_get_rec(cur, &rec.ir_startino, | 907 | (error = xfs_inobt_get_rec(cur, &rec.ir_startino, |
909 | &rec.ir_freecount, &rec.ir_free, &j)) == 0 && | 908 | &rec.ir_freecount, &rec.ir_free, &j)) == 0 && |
910 | j == 1 && | 909 | j == 1 && |
911 | rec.ir_freecount > 0) { | 910 | rec.ir_freecount > 0) { |
912 | /* | 911 | /* |
913 | * The last chunk allocated in the group still has | 912 | * The last chunk allocated in the group still has |
914 | * a free inode. | 913 | * a free inode. |
915 | */ | 914 | */ |
916 | } | 915 | } |
917 | /* | 916 | /* |
918 | * None left in the last group, search the whole a.g. | 917 | * None left in the last group, search the whole a.g. |
919 | */ | 918 | */ |
920 | else { | 919 | else { |
921 | if (error) | 920 | if (error) |
922 | goto error0; | 921 | goto error0; |
923 | if ((error = xfs_inobt_lookup_ge(cur, 0, 0, 0, &i))) | 922 | if ((error = xfs_inobt_lookup_ge(cur, 0, 0, 0, &i))) |
924 | goto error0; | 923 | goto error0; |
925 | ASSERT(i == 1); | 924 | ASSERT(i == 1); |
926 | for (;;) { | 925 | for (;;) { |
927 | if ((error = xfs_inobt_get_rec(cur, | 926 | if ((error = xfs_inobt_get_rec(cur, |
928 | &rec.ir_startino, | 927 | &rec.ir_startino, |
929 | &rec.ir_freecount, &rec.ir_free, | 928 | &rec.ir_freecount, &rec.ir_free, |
930 | &i))) | 929 | &i))) |
931 | goto error0; | 930 | goto error0; |
932 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); | 931 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); |
933 | if (rec.ir_freecount > 0) | 932 | if (rec.ir_freecount > 0) |
934 | break; | 933 | break; |
935 | if ((error = xfs_btree_increment(cur, 0, &i))) | 934 | if ((error = xfs_btree_increment(cur, 0, &i))) |
936 | goto error0; | 935 | goto error0; |
937 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); | 936 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); |
938 | } | 937 | } |
939 | } | 938 | } |
940 | } | 939 | } |
941 | offset = XFS_IALLOC_FIND_FREE(&rec.ir_free); | 940 | offset = XFS_IALLOC_FIND_FREE(&rec.ir_free); |
942 | ASSERT(offset >= 0); | 941 | ASSERT(offset >= 0); |
943 | ASSERT(offset < XFS_INODES_PER_CHUNK); | 942 | ASSERT(offset < XFS_INODES_PER_CHUNK); |
944 | ASSERT((XFS_AGINO_TO_OFFSET(mp, rec.ir_startino) % | 943 | ASSERT((XFS_AGINO_TO_OFFSET(mp, rec.ir_startino) % |
945 | XFS_INODES_PER_CHUNK) == 0); | 944 | XFS_INODES_PER_CHUNK) == 0); |
946 | ino = XFS_AGINO_TO_INO(mp, agno, rec.ir_startino + offset); | 945 | ino = XFS_AGINO_TO_INO(mp, agno, rec.ir_startino + offset); |
947 | XFS_INOBT_CLR_FREE(&rec, offset); | 946 | XFS_INOBT_CLR_FREE(&rec, offset); |
948 | rec.ir_freecount--; | 947 | rec.ir_freecount--; |
949 | if ((error = xfs_inobt_update(cur, rec.ir_startino, rec.ir_freecount, | 948 | if ((error = xfs_inobt_update(cur, rec.ir_startino, rec.ir_freecount, |
950 | rec.ir_free))) | 949 | rec.ir_free))) |
951 | goto error0; | 950 | goto error0; |
952 | be32_add_cpu(&agi->agi_freecount, -1); | 951 | be32_add_cpu(&agi->agi_freecount, -1); |
953 | xfs_ialloc_log_agi(tp, agbp, XFS_AGI_FREECOUNT); | 952 | xfs_ialloc_log_agi(tp, agbp, XFS_AGI_FREECOUNT); |
954 | down_read(&mp->m_peraglock); | 953 | down_read(&mp->m_peraglock); |
955 | mp->m_perag[tagno].pagi_freecount--; | 954 | mp->m_perag[tagno].pagi_freecount--; |
956 | up_read(&mp->m_peraglock); | 955 | up_read(&mp->m_peraglock); |
957 | #ifdef DEBUG | 956 | #ifdef DEBUG |
958 | if (cur->bc_nlevels == 1) { | 957 | if (cur->bc_nlevels == 1) { |
959 | int freecount = 0; | 958 | int freecount = 0; |
960 | 959 | ||
961 | if ((error = xfs_inobt_lookup_ge(cur, 0, 0, 0, &i))) | 960 | if ((error = xfs_inobt_lookup_ge(cur, 0, 0, 0, &i))) |
962 | goto error0; | 961 | goto error0; |
963 | do { | 962 | do { |
964 | if ((error = xfs_inobt_get_rec(cur, &rec.ir_startino, | 963 | if ((error = xfs_inobt_get_rec(cur, &rec.ir_startino, |
965 | &rec.ir_freecount, &rec.ir_free, &i))) | 964 | &rec.ir_freecount, &rec.ir_free, &i))) |
966 | goto error0; | 965 | goto error0; |
967 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); | 966 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); |
968 | freecount += rec.ir_freecount; | 967 | freecount += rec.ir_freecount; |
969 | if ((error = xfs_btree_increment(cur, 0, &i))) | 968 | if ((error = xfs_btree_increment(cur, 0, &i))) |
970 | goto error0; | 969 | goto error0; |
971 | } while (i == 1); | 970 | } while (i == 1); |
972 | ASSERT(freecount == be32_to_cpu(agi->agi_freecount) || | 971 | ASSERT(freecount == be32_to_cpu(agi->agi_freecount) || |
973 | XFS_FORCED_SHUTDOWN(mp)); | 972 | XFS_FORCED_SHUTDOWN(mp)); |
974 | } | 973 | } |
975 | #endif | 974 | #endif |
976 | xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); | 975 | xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); |
977 | xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, -1); | 976 | xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, -1); |
978 | *inop = ino; | 977 | *inop = ino; |
979 | return 0; | 978 | return 0; |
980 | error1: | 979 | error1: |
981 | xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR); | 980 | xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR); |
982 | error0: | 981 | error0: |
983 | xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); | 982 | xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); |
984 | return error; | 983 | return error; |
985 | } | 984 | } |
986 | 985 | ||
987 | /* | 986 | /* |
988 | * Free disk inode. Carefully avoids touching the incore inode, all | 987 | * Free disk inode. Carefully avoids touching the incore inode, all |
989 | * manipulations incore are the caller's responsibility. | 988 | * manipulations incore are the caller's responsibility. |
990 | * The on-disk inode is not changed by this operation, only the | 989 | * The on-disk inode is not changed by this operation, only the |
991 | * btree (free inode mask) is changed. | 990 | * btree (free inode mask) is changed. |
992 | */ | 991 | */ |
993 | int | 992 | int |
994 | xfs_difree( | 993 | xfs_difree( |
995 | xfs_trans_t *tp, /* transaction pointer */ | 994 | xfs_trans_t *tp, /* transaction pointer */ |
996 | xfs_ino_t inode, /* inode to be freed */ | 995 | xfs_ino_t inode, /* inode to be freed */ |
997 | xfs_bmap_free_t *flist, /* extents to free */ | 996 | xfs_bmap_free_t *flist, /* extents to free */ |
998 | int *delete, /* set if inode cluster was deleted */ | 997 | int *delete, /* set if inode cluster was deleted */ |
999 | xfs_ino_t *first_ino) /* first inode in deleted cluster */ | 998 | xfs_ino_t *first_ino) /* first inode in deleted cluster */ |
1000 | { | 999 | { |
1001 | /* REFERENCED */ | 1000 | /* REFERENCED */ |
1002 | xfs_agblock_t agbno; /* block number containing inode */ | 1001 | xfs_agblock_t agbno; /* block number containing inode */ |
1003 | xfs_buf_t *agbp; /* buffer containing allocation group header */ | 1002 | xfs_buf_t *agbp; /* buffer containing allocation group header */ |
1004 | xfs_agino_t agino; /* inode number relative to allocation group */ | 1003 | xfs_agino_t agino; /* inode number relative to allocation group */ |
1005 | xfs_agnumber_t agno; /* allocation group number */ | 1004 | xfs_agnumber_t agno; /* allocation group number */ |
1006 | xfs_agi_t *agi; /* allocation group header */ | 1005 | xfs_agi_t *agi; /* allocation group header */ |
1007 | xfs_btree_cur_t *cur; /* inode btree cursor */ | 1006 | xfs_btree_cur_t *cur; /* inode btree cursor */ |
1008 | int error; /* error return value */ | 1007 | int error; /* error return value */ |
1009 | int i; /* result code */ | 1008 | int i; /* result code */ |
1010 | int ilen; /* inodes in an inode cluster */ | 1009 | int ilen; /* inodes in an inode cluster */ |
1011 | xfs_mount_t *mp; /* mount structure for filesystem */ | 1010 | xfs_mount_t *mp; /* mount structure for filesystem */ |
1012 | int off; /* offset of inode in inode chunk */ | 1011 | int off; /* offset of inode in inode chunk */ |
1013 | xfs_inobt_rec_incore_t rec; /* btree record */ | 1012 | xfs_inobt_rec_incore_t rec; /* btree record */ |
1014 | 1013 | ||
1015 | mp = tp->t_mountp; | 1014 | mp = tp->t_mountp; |
1016 | 1015 | ||
1017 | /* | 1016 | /* |
1018 | * Break up inode number into its components. | 1017 | * Break up inode number into its components. |
1019 | */ | 1018 | */ |
1020 | agno = XFS_INO_TO_AGNO(mp, inode); | 1019 | agno = XFS_INO_TO_AGNO(mp, inode); |
1021 | if (agno >= mp->m_sb.sb_agcount) { | 1020 | if (agno >= mp->m_sb.sb_agcount) { |
1022 | cmn_err(CE_WARN, | 1021 | cmn_err(CE_WARN, |
1023 | "xfs_difree: agno >= mp->m_sb.sb_agcount (%d >= %d) on %s. Returning EINVAL.", | 1022 | "xfs_difree: agno >= mp->m_sb.sb_agcount (%d >= %d) on %s. Returning EINVAL.", |
1024 | agno, mp->m_sb.sb_agcount, mp->m_fsname); | 1023 | agno, mp->m_sb.sb_agcount, mp->m_fsname); |
1025 | ASSERT(0); | 1024 | ASSERT(0); |
1026 | return XFS_ERROR(EINVAL); | 1025 | return XFS_ERROR(EINVAL); |
1027 | } | 1026 | } |
1028 | agino = XFS_INO_TO_AGINO(mp, inode); | 1027 | agino = XFS_INO_TO_AGINO(mp, inode); |
1029 | if (inode != XFS_AGINO_TO_INO(mp, agno, agino)) { | 1028 | if (inode != XFS_AGINO_TO_INO(mp, agno, agino)) { |
1030 | cmn_err(CE_WARN, | 1029 | cmn_err(CE_WARN, |
1031 | "xfs_difree: inode != XFS_AGINO_TO_INO() " | 1030 | "xfs_difree: inode != XFS_AGINO_TO_INO() " |
1032 | "(%llu != %llu) on %s. Returning EINVAL.", | 1031 | "(%llu != %llu) on %s. Returning EINVAL.", |
1033 | (unsigned long long)inode, | 1032 | (unsigned long long)inode, |
1034 | (unsigned long long)XFS_AGINO_TO_INO(mp, agno, agino), | 1033 | (unsigned long long)XFS_AGINO_TO_INO(mp, agno, agino), |
1035 | mp->m_fsname); | 1034 | mp->m_fsname); |
1036 | ASSERT(0); | 1035 | ASSERT(0); |
1037 | return XFS_ERROR(EINVAL); | 1036 | return XFS_ERROR(EINVAL); |
1038 | } | 1037 | } |
1039 | agbno = XFS_AGINO_TO_AGBNO(mp, agino); | 1038 | agbno = XFS_AGINO_TO_AGBNO(mp, agino); |
1040 | if (agbno >= mp->m_sb.sb_agblocks) { | 1039 | if (agbno >= mp->m_sb.sb_agblocks) { |
1041 | cmn_err(CE_WARN, | 1040 | cmn_err(CE_WARN, |
1042 | "xfs_difree: agbno >= mp->m_sb.sb_agblocks (%d >= %d) on %s. Returning EINVAL.", | 1041 | "xfs_difree: agbno >= mp->m_sb.sb_agblocks (%d >= %d) on %s. Returning EINVAL.", |
1043 | agbno, mp->m_sb.sb_agblocks, mp->m_fsname); | 1042 | agbno, mp->m_sb.sb_agblocks, mp->m_fsname); |
1044 | ASSERT(0); | 1043 | ASSERT(0); |
1045 | return XFS_ERROR(EINVAL); | 1044 | return XFS_ERROR(EINVAL); |
1046 | } | 1045 | } |
1047 | /* | 1046 | /* |
1048 | * Get the allocation group header. | 1047 | * Get the allocation group header. |
1049 | */ | 1048 | */ |
1050 | down_read(&mp->m_peraglock); | 1049 | down_read(&mp->m_peraglock); |
1051 | error = xfs_ialloc_read_agi(mp, tp, agno, &agbp); | 1050 | error = xfs_ialloc_read_agi(mp, tp, agno, &agbp); |
1052 | up_read(&mp->m_peraglock); | 1051 | up_read(&mp->m_peraglock); |
1053 | if (error) { | 1052 | if (error) { |
1054 | cmn_err(CE_WARN, | 1053 | cmn_err(CE_WARN, |
1055 | "xfs_difree: xfs_ialloc_read_agi() returned an error %d on %s. Returning error.", | 1054 | "xfs_difree: xfs_ialloc_read_agi() returned an error %d on %s. Returning error.", |
1056 | error, mp->m_fsname); | 1055 | error, mp->m_fsname); |
1057 | return error; | 1056 | return error; |
1058 | } | 1057 | } |
1059 | agi = XFS_BUF_TO_AGI(agbp); | 1058 | agi = XFS_BUF_TO_AGI(agbp); |
1060 | ASSERT(be32_to_cpu(agi->agi_magicnum) == XFS_AGI_MAGIC); | 1059 | ASSERT(be32_to_cpu(agi->agi_magicnum) == XFS_AGI_MAGIC); |
1061 | ASSERT(agbno < be32_to_cpu(agi->agi_length)); | 1060 | ASSERT(agbno < be32_to_cpu(agi->agi_length)); |
1062 | /* | 1061 | /* |
1063 | * Initialize the cursor. | 1062 | * Initialize the cursor. |
1064 | */ | 1063 | */ |
1065 | cur = xfs_inobt_init_cursor(mp, tp, agbp, agno); | 1064 | cur = xfs_inobt_init_cursor(mp, tp, agbp, agno); |
1066 | #ifdef DEBUG | 1065 | #ifdef DEBUG |
1067 | if (cur->bc_nlevels == 1) { | 1066 | if (cur->bc_nlevels == 1) { |
1068 | int freecount = 0; | 1067 | int freecount = 0; |
1069 | 1068 | ||
1070 | if ((error = xfs_inobt_lookup_ge(cur, 0, 0, 0, &i))) | 1069 | if ((error = xfs_inobt_lookup_ge(cur, 0, 0, 0, &i))) |
1071 | goto error0; | 1070 | goto error0; |
1072 | do { | 1071 | do { |
1073 | if ((error = xfs_inobt_get_rec(cur, &rec.ir_startino, | 1072 | if ((error = xfs_inobt_get_rec(cur, &rec.ir_startino, |
1074 | &rec.ir_freecount, &rec.ir_free, &i))) | 1073 | &rec.ir_freecount, &rec.ir_free, &i))) |
1075 | goto error0; | 1074 | goto error0; |
1076 | if (i) { | 1075 | if (i) { |
1077 | freecount += rec.ir_freecount; | 1076 | freecount += rec.ir_freecount; |
1078 | if ((error = xfs_btree_increment(cur, 0, &i))) | 1077 | if ((error = xfs_btree_increment(cur, 0, &i))) |
1079 | goto error0; | 1078 | goto error0; |
1080 | } | 1079 | } |
1081 | } while (i == 1); | 1080 | } while (i == 1); |
1082 | ASSERT(freecount == be32_to_cpu(agi->agi_freecount) || | 1081 | ASSERT(freecount == be32_to_cpu(agi->agi_freecount) || |
1083 | XFS_FORCED_SHUTDOWN(mp)); | 1082 | XFS_FORCED_SHUTDOWN(mp)); |
1084 | } | 1083 | } |
1085 | #endif | 1084 | #endif |
1086 | /* | 1085 | /* |
1087 | * Look for the entry describing this inode. | 1086 | * Look for the entry describing this inode. |
1088 | */ | 1087 | */ |
1089 | if ((error = xfs_inobt_lookup_le(cur, agino, 0, 0, &i))) { | 1088 | if ((error = xfs_inobt_lookup_le(cur, agino, 0, 0, &i))) { |
1090 | cmn_err(CE_WARN, | 1089 | cmn_err(CE_WARN, |
1091 | "xfs_difree: xfs_inobt_lookup_le returned() an error %d on %s. Returning error.", | 1090 | "xfs_difree: xfs_inobt_lookup_le returned() an error %d on %s. Returning error.", |
1092 | error, mp->m_fsname); | 1091 | error, mp->m_fsname); |
1093 | goto error0; | 1092 | goto error0; |
1094 | } | 1093 | } |
1095 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); | 1094 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); |
1096 | if ((error = xfs_inobt_get_rec(cur, &rec.ir_startino, &rec.ir_freecount, | 1095 | if ((error = xfs_inobt_get_rec(cur, &rec.ir_startino, &rec.ir_freecount, |
1097 | &rec.ir_free, &i))) { | 1096 | &rec.ir_free, &i))) { |
1098 | cmn_err(CE_WARN, | 1097 | cmn_err(CE_WARN, |
1099 | "xfs_difree: xfs_inobt_get_rec() returned an error %d on %s. Returning error.", | 1098 | "xfs_difree: xfs_inobt_get_rec() returned an error %d on %s. Returning error.", |
1100 | error, mp->m_fsname); | 1099 | error, mp->m_fsname); |
1101 | goto error0; | 1100 | goto error0; |
1102 | } | 1101 | } |
1103 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); | 1102 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); |
1104 | /* | 1103 | /* |
1105 | * Get the offset in the inode chunk. | 1104 | * Get the offset in the inode chunk. |
1106 | */ | 1105 | */ |
1107 | off = agino - rec.ir_startino; | 1106 | off = agino - rec.ir_startino; |
1108 | ASSERT(off >= 0 && off < XFS_INODES_PER_CHUNK); | 1107 | ASSERT(off >= 0 && off < XFS_INODES_PER_CHUNK); |
1109 | ASSERT(!XFS_INOBT_IS_FREE(&rec, off)); | 1108 | ASSERT(!XFS_INOBT_IS_FREE(&rec, off)); |
1110 | /* | 1109 | /* |
1111 | * Mark the inode free & increment the count. | 1110 | * Mark the inode free & increment the count. |
1112 | */ | 1111 | */ |
1113 | XFS_INOBT_SET_FREE(&rec, off); | 1112 | XFS_INOBT_SET_FREE(&rec, off); |
1114 | rec.ir_freecount++; | 1113 | rec.ir_freecount++; |
1115 | 1114 | ||
1116 | /* | 1115 | /* |
1117 | * When an inode cluster is free, it becomes eligible for removal | 1116 | * When an inode cluster is free, it becomes eligible for removal |
1118 | */ | 1117 | */ |
1119 | if (!(mp->m_flags & XFS_MOUNT_IKEEP) && | 1118 | if (!(mp->m_flags & XFS_MOUNT_IKEEP) && |
1120 | (rec.ir_freecount == XFS_IALLOC_INODES(mp))) { | 1119 | (rec.ir_freecount == XFS_IALLOC_INODES(mp))) { |
1121 | 1120 | ||
1122 | *delete = 1; | 1121 | *delete = 1; |
1123 | *first_ino = XFS_AGINO_TO_INO(mp, agno, rec.ir_startino); | 1122 | *first_ino = XFS_AGINO_TO_INO(mp, agno, rec.ir_startino); |
1124 | 1123 | ||
1125 | /* | 1124 | /* |
1126 | * Remove the inode cluster from the AGI B+Tree, adjust the | 1125 | * Remove the inode cluster from the AGI B+Tree, adjust the |
1127 | * AGI and Superblock inode counts, and mark the disk space | 1126 | * AGI and Superblock inode counts, and mark the disk space |
1128 | * to be freed when the transaction is committed. | 1127 | * to be freed when the transaction is committed. |
1129 | */ | 1128 | */ |
1130 | ilen = XFS_IALLOC_INODES(mp); | 1129 | ilen = XFS_IALLOC_INODES(mp); |
1131 | be32_add_cpu(&agi->agi_count, -ilen); | 1130 | be32_add_cpu(&agi->agi_count, -ilen); |
1132 | be32_add_cpu(&agi->agi_freecount, -(ilen - 1)); | 1131 | be32_add_cpu(&agi->agi_freecount, -(ilen - 1)); |
1133 | xfs_ialloc_log_agi(tp, agbp, XFS_AGI_COUNT | XFS_AGI_FREECOUNT); | 1132 | xfs_ialloc_log_agi(tp, agbp, XFS_AGI_COUNT | XFS_AGI_FREECOUNT); |
1134 | down_read(&mp->m_peraglock); | 1133 | down_read(&mp->m_peraglock); |
1135 | mp->m_perag[agno].pagi_freecount -= ilen - 1; | 1134 | mp->m_perag[agno].pagi_freecount -= ilen - 1; |
1136 | up_read(&mp->m_peraglock); | 1135 | up_read(&mp->m_peraglock); |
1137 | xfs_trans_mod_sb(tp, XFS_TRANS_SB_ICOUNT, -ilen); | 1136 | xfs_trans_mod_sb(tp, XFS_TRANS_SB_ICOUNT, -ilen); |
1138 | xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, -(ilen - 1)); | 1137 | xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, -(ilen - 1)); |
1139 | 1138 | ||
1140 | if ((error = xfs_btree_delete(cur, &i))) { | 1139 | if ((error = xfs_btree_delete(cur, &i))) { |
1141 | cmn_err(CE_WARN, "xfs_difree: xfs_btree_delete returned an error %d on %s.\n", | 1140 | cmn_err(CE_WARN, "xfs_difree: xfs_btree_delete returned an error %d on %s.\n", |
1142 | error, mp->m_fsname); | 1141 | error, mp->m_fsname); |
1143 | goto error0; | 1142 | goto error0; |
1144 | } | 1143 | } |
1145 | 1144 | ||
1146 | xfs_bmap_add_free(XFS_AGB_TO_FSB(mp, | 1145 | xfs_bmap_add_free(XFS_AGB_TO_FSB(mp, |
1147 | agno, XFS_INO_TO_AGBNO(mp,rec.ir_startino)), | 1146 | agno, XFS_INO_TO_AGBNO(mp,rec.ir_startino)), |
1148 | XFS_IALLOC_BLOCKS(mp), flist, mp); | 1147 | XFS_IALLOC_BLOCKS(mp), flist, mp); |
1149 | } else { | 1148 | } else { |
1150 | *delete = 0; | 1149 | *delete = 0; |
1151 | 1150 | ||
1152 | if ((error = xfs_inobt_update(cur, rec.ir_startino, rec.ir_freecount, rec.ir_free))) { | 1151 | if ((error = xfs_inobt_update(cur, rec.ir_startino, rec.ir_freecount, rec.ir_free))) { |
1153 | cmn_err(CE_WARN, | 1152 | cmn_err(CE_WARN, |
1154 | "xfs_difree: xfs_inobt_update() returned an error %d on %s. Returning error.", | 1153 | "xfs_difree: xfs_inobt_update() returned an error %d on %s. Returning error.", |
1155 | error, mp->m_fsname); | 1154 | error, mp->m_fsname); |
1156 | goto error0; | 1155 | goto error0; |
1157 | } | 1156 | } |
1158 | /* | 1157 | /* |
1159 | * Change the inode free counts and log the ag/sb changes. | 1158 | * Change the inode free counts and log the ag/sb changes. |
1160 | */ | 1159 | */ |
1161 | be32_add_cpu(&agi->agi_freecount, 1); | 1160 | be32_add_cpu(&agi->agi_freecount, 1); |
1162 | xfs_ialloc_log_agi(tp, agbp, XFS_AGI_FREECOUNT); | 1161 | xfs_ialloc_log_agi(tp, agbp, XFS_AGI_FREECOUNT); |
1163 | down_read(&mp->m_peraglock); | 1162 | down_read(&mp->m_peraglock); |
1164 | mp->m_perag[agno].pagi_freecount++; | 1163 | mp->m_perag[agno].pagi_freecount++; |
1165 | up_read(&mp->m_peraglock); | 1164 | up_read(&mp->m_peraglock); |
1166 | xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, 1); | 1165 | xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, 1); |
1167 | } | 1166 | } |
1168 | 1167 | ||
1169 | #ifdef DEBUG | 1168 | #ifdef DEBUG |
1170 | if (cur->bc_nlevels == 1) { | 1169 | if (cur->bc_nlevels == 1) { |
1171 | int freecount = 0; | 1170 | int freecount = 0; |
1172 | 1171 | ||
1173 | if ((error = xfs_inobt_lookup_ge(cur, 0, 0, 0, &i))) | 1172 | if ((error = xfs_inobt_lookup_ge(cur, 0, 0, 0, &i))) |
1174 | goto error0; | 1173 | goto error0; |
1175 | do { | 1174 | do { |
1176 | if ((error = xfs_inobt_get_rec(cur, | 1175 | if ((error = xfs_inobt_get_rec(cur, |
1177 | &rec.ir_startino, | 1176 | &rec.ir_startino, |
1178 | &rec.ir_freecount, | 1177 | &rec.ir_freecount, |
1179 | &rec.ir_free, &i))) | 1178 | &rec.ir_free, &i))) |
1180 | goto error0; | 1179 | goto error0; |
1181 | if (i) { | 1180 | if (i) { |
1182 | freecount += rec.ir_freecount; | 1181 | freecount += rec.ir_freecount; |
1183 | if ((error = xfs_btree_increment(cur, 0, &i))) | 1182 | if ((error = xfs_btree_increment(cur, 0, &i))) |
1184 | goto error0; | 1183 | goto error0; |
1185 | } | 1184 | } |
1186 | } while (i == 1); | 1185 | } while (i == 1); |
1187 | ASSERT(freecount == be32_to_cpu(agi->agi_freecount) || | 1186 | ASSERT(freecount == be32_to_cpu(agi->agi_freecount) || |
1188 | XFS_FORCED_SHUTDOWN(mp)); | 1187 | XFS_FORCED_SHUTDOWN(mp)); |
1189 | } | 1188 | } |
1190 | #endif | 1189 | #endif |
1191 | xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); | 1190 | xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); |
1192 | return 0; | 1191 | return 0; |
1193 | 1192 | ||
1194 | error0: | 1193 | error0: |
1195 | xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); | 1194 | xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); |
1196 | return error; | 1195 | return error; |
1197 | } | 1196 | } |
1198 | 1197 | ||
1199 | /* | 1198 | /* |
1200 | * Return the location of the inode in imap, for mapping it into a buffer. | 1199 | * Return the location of the inode in imap, for mapping it into a buffer. |
1201 | */ | 1200 | */ |
1202 | int | 1201 | int |
1203 | xfs_imap( | 1202 | xfs_imap( |
1204 | xfs_mount_t *mp, /* file system mount structure */ | 1203 | xfs_mount_t *mp, /* file system mount structure */ |
1205 | xfs_trans_t *tp, /* transaction pointer */ | 1204 | xfs_trans_t *tp, /* transaction pointer */ |
1206 | xfs_ino_t ino, /* inode to locate */ | 1205 | xfs_ino_t ino, /* inode to locate */ |
1207 | struct xfs_imap *imap, /* location map structure */ | 1206 | struct xfs_imap *imap, /* location map structure */ |
1208 | uint flags) /* flags for inode btree lookup */ | 1207 | uint flags) /* flags for inode btree lookup */ |
1209 | { | 1208 | { |
1210 | xfs_agblock_t agbno; /* block number of inode in the alloc group */ | 1209 | xfs_agblock_t agbno; /* block number of inode in the alloc group */ |
1211 | xfs_agino_t agino; /* inode number within alloc group */ | 1210 | xfs_agino_t agino; /* inode number within alloc group */ |
1212 | xfs_agnumber_t agno; /* allocation group number */ | 1211 | xfs_agnumber_t agno; /* allocation group number */ |
1213 | int blks_per_cluster; /* num blocks per inode cluster */ | 1212 | int blks_per_cluster; /* num blocks per inode cluster */ |
1214 | xfs_agblock_t chunk_agbno; /* first block in inode chunk */ | 1213 | xfs_agblock_t chunk_agbno; /* first block in inode chunk */ |
1215 | xfs_agblock_t cluster_agbno; /* first block in inode cluster */ | 1214 | xfs_agblock_t cluster_agbno; /* first block in inode cluster */ |
1216 | int error; /* error code */ | 1215 | int error; /* error code */ |
1217 | int offset; /* index of inode in its buffer */ | 1216 | int offset; /* index of inode in its buffer */ |
1218 | int offset_agbno; /* blks from chunk start to inode */ | 1217 | int offset_agbno; /* blks from chunk start to inode */ |
1219 | 1218 | ||
1220 | ASSERT(ino != NULLFSINO); | 1219 | ASSERT(ino != NULLFSINO); |
1221 | 1220 | ||
1222 | /* | 1221 | /* |
1223 | * Split up the inode number into its parts. | 1222 | * Split up the inode number into its parts. |
1224 | */ | 1223 | */ |
1225 | agno = XFS_INO_TO_AGNO(mp, ino); | 1224 | agno = XFS_INO_TO_AGNO(mp, ino); |
1226 | agino = XFS_INO_TO_AGINO(mp, ino); | 1225 | agino = XFS_INO_TO_AGINO(mp, ino); |
1227 | agbno = XFS_AGINO_TO_AGBNO(mp, agino); | 1226 | agbno = XFS_AGINO_TO_AGBNO(mp, agino); |
1228 | if (agno >= mp->m_sb.sb_agcount || agbno >= mp->m_sb.sb_agblocks || | 1227 | if (agno >= mp->m_sb.sb_agcount || agbno >= mp->m_sb.sb_agblocks || |
1229 | ino != XFS_AGINO_TO_INO(mp, agno, agino)) { | 1228 | ino != XFS_AGINO_TO_INO(mp, agno, agino)) { |
1230 | #ifdef DEBUG | 1229 | #ifdef DEBUG |
1231 | /* no diagnostics for bulkstat, ino comes from userspace */ | 1230 | /* no diagnostics for bulkstat, ino comes from userspace */ |
1232 | if (flags & XFS_IMAP_BULKSTAT) | 1231 | if (flags & XFS_IMAP_BULKSTAT) |
1233 | return XFS_ERROR(EINVAL); | 1232 | return XFS_ERROR(EINVAL); |
1234 | if (agno >= mp->m_sb.sb_agcount) { | 1233 | if (agno >= mp->m_sb.sb_agcount) { |
1235 | xfs_fs_cmn_err(CE_ALERT, mp, | 1234 | xfs_fs_cmn_err(CE_ALERT, mp, |
1236 | "xfs_imap: agno (%d) >= " | 1235 | "xfs_imap: agno (%d) >= " |
1237 | "mp->m_sb.sb_agcount (%d)", | 1236 | "mp->m_sb.sb_agcount (%d)", |
1238 | agno, mp->m_sb.sb_agcount); | 1237 | agno, mp->m_sb.sb_agcount); |
1239 | } | 1238 | } |
1240 | if (agbno >= mp->m_sb.sb_agblocks) { | 1239 | if (agbno >= mp->m_sb.sb_agblocks) { |
1241 | xfs_fs_cmn_err(CE_ALERT, mp, | 1240 | xfs_fs_cmn_err(CE_ALERT, mp, |
1242 | "xfs_imap: agbno (0x%llx) >= " | 1241 | "xfs_imap: agbno (0x%llx) >= " |
1243 | "mp->m_sb.sb_agblocks (0x%lx)", | 1242 | "mp->m_sb.sb_agblocks (0x%lx)", |
1244 | (unsigned long long) agbno, | 1243 | (unsigned long long) agbno, |
1245 | (unsigned long) mp->m_sb.sb_agblocks); | 1244 | (unsigned long) mp->m_sb.sb_agblocks); |
1246 | } | 1245 | } |
1247 | if (ino != XFS_AGINO_TO_INO(mp, agno, agino)) { | 1246 | if (ino != XFS_AGINO_TO_INO(mp, agno, agino)) { |
1248 | xfs_fs_cmn_err(CE_ALERT, mp, | 1247 | xfs_fs_cmn_err(CE_ALERT, mp, |
1249 | "xfs_imap: ino (0x%llx) != " | 1248 | "xfs_imap: ino (0x%llx) != " |
1250 | "XFS_AGINO_TO_INO(mp, agno, agino) " | 1249 | "XFS_AGINO_TO_INO(mp, agno, agino) " |
1251 | "(0x%llx)", | 1250 | "(0x%llx)", |
1252 | ino, XFS_AGINO_TO_INO(mp, agno, agino)); | 1251 | ino, XFS_AGINO_TO_INO(mp, agno, agino)); |
1253 | } | 1252 | } |
1254 | xfs_stack_trace(); | 1253 | xfs_stack_trace(); |
1255 | #endif /* DEBUG */ | 1254 | #endif /* DEBUG */ |
1256 | return XFS_ERROR(EINVAL); | 1255 | return XFS_ERROR(EINVAL); |
1257 | } | 1256 | } |
1258 | 1257 | ||
1259 | /* | 1258 | /* |
1260 | * If the inode cluster size is the same as the blocksize or | 1259 | * If the inode cluster size is the same as the blocksize or |
1261 | * smaller we get to the buffer by simple arithmetics. | 1260 | * smaller we get to the buffer by simple arithmetics. |
1262 | */ | 1261 | */ |
1263 | if (XFS_INODE_CLUSTER_SIZE(mp) <= mp->m_sb.sb_blocksize) { | 1262 | if (XFS_INODE_CLUSTER_SIZE(mp) <= mp->m_sb.sb_blocksize) { |
1264 | offset = XFS_INO_TO_OFFSET(mp, ino); | 1263 | offset = XFS_INO_TO_OFFSET(mp, ino); |
1265 | ASSERT(offset < mp->m_sb.sb_inopblock); | 1264 | ASSERT(offset < mp->m_sb.sb_inopblock); |
1266 | 1265 | ||
1267 | imap->im_blkno = XFS_AGB_TO_DADDR(mp, agno, agbno); | 1266 | imap->im_blkno = XFS_AGB_TO_DADDR(mp, agno, agbno); |
1268 | imap->im_len = XFS_FSB_TO_BB(mp, 1); | 1267 | imap->im_len = XFS_FSB_TO_BB(mp, 1); |
1269 | imap->im_boffset = (ushort)(offset << mp->m_sb.sb_inodelog); | 1268 | imap->im_boffset = (ushort)(offset << mp->m_sb.sb_inodelog); |
1270 | return 0; | 1269 | return 0; |
1271 | } | 1270 | } |
1272 | 1271 | ||
1273 | blks_per_cluster = XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_blocklog; | 1272 | blks_per_cluster = XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_blocklog; |
1274 | 1273 | ||
1275 | /* | 1274 | /* |
1276 | * If we get a block number passed from bulkstat we can use it to | 1275 | * If we get a block number passed from bulkstat we can use it to |
1277 | * find the buffer easily. | 1276 | * find the buffer easily. |
1278 | */ | 1277 | */ |
1279 | if (imap->im_blkno) { | 1278 | if (imap->im_blkno) { |
1280 | offset = XFS_INO_TO_OFFSET(mp, ino); | 1279 | offset = XFS_INO_TO_OFFSET(mp, ino); |
1281 | ASSERT(offset < mp->m_sb.sb_inopblock); | 1280 | ASSERT(offset < mp->m_sb.sb_inopblock); |
1282 | 1281 | ||
1283 | cluster_agbno = XFS_DADDR_TO_AGBNO(mp, imap->im_blkno); | 1282 | cluster_agbno = XFS_DADDR_TO_AGBNO(mp, imap->im_blkno); |
1284 | offset += (agbno - cluster_agbno) * mp->m_sb.sb_inopblock; | 1283 | offset += (agbno - cluster_agbno) * mp->m_sb.sb_inopblock; |
1285 | 1284 | ||
1286 | imap->im_len = XFS_FSB_TO_BB(mp, blks_per_cluster); | 1285 | imap->im_len = XFS_FSB_TO_BB(mp, blks_per_cluster); |
1287 | imap->im_boffset = (ushort)(offset << mp->m_sb.sb_inodelog); | 1286 | imap->im_boffset = (ushort)(offset << mp->m_sb.sb_inodelog); |
1288 | return 0; | 1287 | return 0; |
1289 | } | 1288 | } |
1290 | 1289 | ||
1291 | /* | 1290 | /* |
1292 | * If the inode chunks are aligned then use simple maths to | 1291 | * If the inode chunks are aligned then use simple maths to |
1293 | * find the location. Otherwise we have to do a btree | 1292 | * find the location. Otherwise we have to do a btree |
1294 | * lookup to find the location. | 1293 | * lookup to find the location. |
1295 | */ | 1294 | */ |
1296 | if (mp->m_inoalign_mask) { | 1295 | if (mp->m_inoalign_mask) { |
1297 | offset_agbno = agbno & mp->m_inoalign_mask; | 1296 | offset_agbno = agbno & mp->m_inoalign_mask; |
1298 | chunk_agbno = agbno - offset_agbno; | 1297 | chunk_agbno = agbno - offset_agbno; |
1299 | } else { | 1298 | } else { |
1300 | xfs_btree_cur_t *cur; /* inode btree cursor */ | 1299 | xfs_btree_cur_t *cur; /* inode btree cursor */ |
1301 | xfs_agino_t chunk_agino; /* first agino in inode chunk */ | 1300 | xfs_agino_t chunk_agino; /* first agino in inode chunk */ |
1302 | __int32_t chunk_cnt; /* count of free inodes in chunk */ | 1301 | __int32_t chunk_cnt; /* count of free inodes in chunk */ |
1303 | xfs_inofree_t chunk_free; /* mask of free inodes in chunk */ | 1302 | xfs_inofree_t chunk_free; /* mask of free inodes in chunk */ |
1304 | xfs_buf_t *agbp; /* agi buffer */ | 1303 | xfs_buf_t *agbp; /* agi buffer */ |
1305 | int i; /* temp state */ | 1304 | int i; /* temp state */ |
1306 | 1305 | ||
1307 | down_read(&mp->m_peraglock); | 1306 | down_read(&mp->m_peraglock); |
1308 | error = xfs_ialloc_read_agi(mp, tp, agno, &agbp); | 1307 | error = xfs_ialloc_read_agi(mp, tp, agno, &agbp); |
1309 | up_read(&mp->m_peraglock); | 1308 | up_read(&mp->m_peraglock); |
1310 | if (error) { | 1309 | if (error) { |
1311 | xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: " | 1310 | xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: " |
1312 | "xfs_ialloc_read_agi() returned " | 1311 | "xfs_ialloc_read_agi() returned " |
1313 | "error %d, agno %d", | 1312 | "error %d, agno %d", |
1314 | error, agno); | 1313 | error, agno); |
1315 | return error; | 1314 | return error; |
1316 | } | 1315 | } |
1317 | 1316 | ||
1318 | cur = xfs_inobt_init_cursor(mp, tp, agbp, agno); | 1317 | cur = xfs_inobt_init_cursor(mp, tp, agbp, agno); |
1319 | error = xfs_inobt_lookup_le(cur, agino, 0, 0, &i); | 1318 | error = xfs_inobt_lookup_le(cur, agino, 0, 0, &i); |
1320 | if (error) { | 1319 | if (error) { |
1321 | xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: " | 1320 | xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: " |
1322 | "xfs_inobt_lookup_le() failed"); | 1321 | "xfs_inobt_lookup_le() failed"); |
1323 | goto error0; | 1322 | goto error0; |
1324 | } | 1323 | } |
1325 | 1324 | ||
1326 | error = xfs_inobt_get_rec(cur, &chunk_agino, &chunk_cnt, | 1325 | error = xfs_inobt_get_rec(cur, &chunk_agino, &chunk_cnt, |
1327 | &chunk_free, &i); | 1326 | &chunk_free, &i); |
1328 | if (error) { | 1327 | if (error) { |
1329 | xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: " | 1328 | xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: " |
1330 | "xfs_inobt_get_rec() failed"); | 1329 | "xfs_inobt_get_rec() failed"); |
1331 | goto error0; | 1330 | goto error0; |
1332 | } | 1331 | } |
1333 | if (i == 0) { | 1332 | if (i == 0) { |
1334 | #ifdef DEBUG | 1333 | #ifdef DEBUG |
1335 | xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: " | 1334 | xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: " |
1336 | "xfs_inobt_get_rec() failed"); | 1335 | "xfs_inobt_get_rec() failed"); |
1337 | #endif /* DEBUG */ | 1336 | #endif /* DEBUG */ |
1338 | error = XFS_ERROR(EINVAL); | 1337 | error = XFS_ERROR(EINVAL); |
1339 | } | 1338 | } |
1340 | error0: | 1339 | error0: |
1341 | xfs_trans_brelse(tp, agbp); | 1340 | xfs_trans_brelse(tp, agbp); |
1342 | xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); | 1341 | xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); |
1343 | if (error) | 1342 | if (error) |
1344 | return error; | 1343 | return error; |
1345 | chunk_agbno = XFS_AGINO_TO_AGBNO(mp, chunk_agino); | 1344 | chunk_agbno = XFS_AGINO_TO_AGBNO(mp, chunk_agino); |
1346 | offset_agbno = agbno - chunk_agbno; | 1345 | offset_agbno = agbno - chunk_agbno; |
1347 | } | 1346 | } |
1348 | 1347 | ||
1349 | ASSERT(agbno >= chunk_agbno); | 1348 | ASSERT(agbno >= chunk_agbno); |
1350 | cluster_agbno = chunk_agbno + | 1349 | cluster_agbno = chunk_agbno + |
1351 | ((offset_agbno / blks_per_cluster) * blks_per_cluster); | 1350 | ((offset_agbno / blks_per_cluster) * blks_per_cluster); |
1352 | offset = ((agbno - cluster_agbno) * mp->m_sb.sb_inopblock) + | 1351 | offset = ((agbno - cluster_agbno) * mp->m_sb.sb_inopblock) + |
1353 | XFS_INO_TO_OFFSET(mp, ino); | 1352 | XFS_INO_TO_OFFSET(mp, ino); |
1354 | 1353 | ||
1355 | imap->im_blkno = XFS_AGB_TO_DADDR(mp, agno, cluster_agbno); | 1354 | imap->im_blkno = XFS_AGB_TO_DADDR(mp, agno, cluster_agbno); |
1356 | imap->im_len = XFS_FSB_TO_BB(mp, blks_per_cluster); | 1355 | imap->im_len = XFS_FSB_TO_BB(mp, blks_per_cluster); |
1357 | imap->im_boffset = (ushort)(offset << mp->m_sb.sb_inodelog); | 1356 | imap->im_boffset = (ushort)(offset << mp->m_sb.sb_inodelog); |
1358 | 1357 | ||
1359 | /* | 1358 | /* |
1360 | * If the inode number maps to a block outside the bounds | 1359 | * If the inode number maps to a block outside the bounds |
1361 | * of the file system then return NULL rather than calling | 1360 | * of the file system then return NULL rather than calling |
1362 | * read_buf and panicing when we get an error from the | 1361 | * read_buf and panicing when we get an error from the |
1363 | * driver. | 1362 | * driver. |
1364 | */ | 1363 | */ |
1365 | if ((imap->im_blkno + imap->im_len) > | 1364 | if ((imap->im_blkno + imap->im_len) > |
1366 | XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks)) { | 1365 | XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks)) { |
1367 | xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: " | 1366 | xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: " |
1368 | "(imap->im_blkno (0x%llx) + imap->im_len (0x%llx)) > " | 1367 | "(imap->im_blkno (0x%llx) + imap->im_len (0x%llx)) > " |
1369 | " XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks) (0x%llx)", | 1368 | " XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks) (0x%llx)", |
1370 | (unsigned long long) imap->im_blkno, | 1369 | (unsigned long long) imap->im_blkno, |
1371 | (unsigned long long) imap->im_len, | 1370 | (unsigned long long) imap->im_len, |
1372 | XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks)); | 1371 | XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks)); |
1373 | return XFS_ERROR(EINVAL); | 1372 | return XFS_ERROR(EINVAL); |
1374 | } | 1373 | } |
1375 | 1374 | ||
1376 | return 0; | 1375 | return 0; |
1377 | } | 1376 | } |
1378 | 1377 | ||
1379 | /* | 1378 | /* |
1380 | * Compute and fill in value of m_in_maxlevels. | 1379 | * Compute and fill in value of m_in_maxlevels. |
1381 | */ | 1380 | */ |
1382 | void | 1381 | void |
1383 | xfs_ialloc_compute_maxlevels( | 1382 | xfs_ialloc_compute_maxlevels( |
1384 | xfs_mount_t *mp) /* file system mount structure */ | 1383 | xfs_mount_t *mp) /* file system mount structure */ |
1385 | { | 1384 | { |
1386 | int level; | 1385 | int level; |
1387 | uint maxblocks; | 1386 | uint maxblocks; |
1388 | uint maxleafents; | 1387 | uint maxleafents; |
1389 | int minleafrecs; | 1388 | int minleafrecs; |
1390 | int minnoderecs; | 1389 | int minnoderecs; |
1391 | 1390 | ||
1392 | maxleafents = (1LL << XFS_INO_AGINO_BITS(mp)) >> | 1391 | maxleafents = (1LL << XFS_INO_AGINO_BITS(mp)) >> |
1393 | XFS_INODES_PER_CHUNK_LOG; | 1392 | XFS_INODES_PER_CHUNK_LOG; |
1394 | minleafrecs = mp->m_alloc_mnr[0]; | 1393 | minleafrecs = mp->m_alloc_mnr[0]; |
1395 | minnoderecs = mp->m_alloc_mnr[1]; | 1394 | minnoderecs = mp->m_alloc_mnr[1]; |
1396 | maxblocks = (maxleafents + minleafrecs - 1) / minleafrecs; | 1395 | maxblocks = (maxleafents + minleafrecs - 1) / minleafrecs; |
1397 | for (level = 1; maxblocks > 1; level++) | 1396 | for (level = 1; maxblocks > 1; level++) |
1398 | maxblocks = (maxblocks + minnoderecs - 1) / minnoderecs; | 1397 | maxblocks = (maxblocks + minnoderecs - 1) / minnoderecs; |
1399 | mp->m_in_maxlevels = level; | 1398 | mp->m_in_maxlevels = level; |
1400 | } | 1399 | } |
1401 | 1400 | ||
1402 | /* | 1401 | /* |
1403 | * Log specified fields for the ag hdr (inode section) | 1402 | * Log specified fields for the ag hdr (inode section) |
1404 | */ | 1403 | */ |
1405 | void | 1404 | void |
1406 | xfs_ialloc_log_agi( | 1405 | xfs_ialloc_log_agi( |
1407 | xfs_trans_t *tp, /* transaction pointer */ | 1406 | xfs_trans_t *tp, /* transaction pointer */ |
1408 | xfs_buf_t *bp, /* allocation group header buffer */ | 1407 | xfs_buf_t *bp, /* allocation group header buffer */ |
1409 | int fields) /* bitmask of fields to log */ | 1408 | int fields) /* bitmask of fields to log */ |
1410 | { | 1409 | { |
1411 | int first; /* first byte number */ | 1410 | int first; /* first byte number */ |
1412 | int last; /* last byte number */ | 1411 | int last; /* last byte number */ |
1413 | static const short offsets[] = { /* field starting offsets */ | 1412 | static const short offsets[] = { /* field starting offsets */ |
1414 | /* keep in sync with bit definitions */ | 1413 | /* keep in sync with bit definitions */ |
1415 | offsetof(xfs_agi_t, agi_magicnum), | 1414 | offsetof(xfs_agi_t, agi_magicnum), |
1416 | offsetof(xfs_agi_t, agi_versionnum), | 1415 | offsetof(xfs_agi_t, agi_versionnum), |
1417 | offsetof(xfs_agi_t, agi_seqno), | 1416 | offsetof(xfs_agi_t, agi_seqno), |
1418 | offsetof(xfs_agi_t, agi_length), | 1417 | offsetof(xfs_agi_t, agi_length), |
1419 | offsetof(xfs_agi_t, agi_count), | 1418 | offsetof(xfs_agi_t, agi_count), |
1420 | offsetof(xfs_agi_t, agi_root), | 1419 | offsetof(xfs_agi_t, agi_root), |
1421 | offsetof(xfs_agi_t, agi_level), | 1420 | offsetof(xfs_agi_t, agi_level), |
1422 | offsetof(xfs_agi_t, agi_freecount), | 1421 | offsetof(xfs_agi_t, agi_freecount), |
1423 | offsetof(xfs_agi_t, agi_newino), | 1422 | offsetof(xfs_agi_t, agi_newino), |
1424 | offsetof(xfs_agi_t, agi_dirino), | 1423 | offsetof(xfs_agi_t, agi_dirino), |
1425 | offsetof(xfs_agi_t, agi_unlinked), | 1424 | offsetof(xfs_agi_t, agi_unlinked), |
1426 | sizeof(xfs_agi_t) | 1425 | sizeof(xfs_agi_t) |
1427 | }; | 1426 | }; |
1428 | #ifdef DEBUG | 1427 | #ifdef DEBUG |
1429 | xfs_agi_t *agi; /* allocation group header */ | 1428 | xfs_agi_t *agi; /* allocation group header */ |
1430 | 1429 | ||
1431 | agi = XFS_BUF_TO_AGI(bp); | 1430 | agi = XFS_BUF_TO_AGI(bp); |
1432 | ASSERT(be32_to_cpu(agi->agi_magicnum) == XFS_AGI_MAGIC); | 1431 | ASSERT(be32_to_cpu(agi->agi_magicnum) == XFS_AGI_MAGIC); |
1433 | #endif | 1432 | #endif |
1434 | /* | 1433 | /* |
1435 | * Compute byte offsets for the first and last fields. | 1434 | * Compute byte offsets for the first and last fields. |
1436 | */ | 1435 | */ |
1437 | xfs_btree_offsets(fields, offsets, XFS_AGI_NUM_BITS, &first, &last); | 1436 | xfs_btree_offsets(fields, offsets, XFS_AGI_NUM_BITS, &first, &last); |
1438 | /* | 1437 | /* |
1439 | * Log the allocation group inode header buffer. | 1438 | * Log the allocation group inode header buffer. |
1440 | */ | 1439 | */ |
1441 | xfs_trans_log_buf(tp, bp, first, last); | 1440 | xfs_trans_log_buf(tp, bp, first, last); |
1442 | } | 1441 | } |
1443 | 1442 | ||
1444 | #ifdef DEBUG | 1443 | #ifdef DEBUG |
1445 | STATIC void | 1444 | STATIC void |
1446 | xfs_check_agi_unlinked( | 1445 | xfs_check_agi_unlinked( |
1447 | struct xfs_agi *agi) | 1446 | struct xfs_agi *agi) |
1448 | { | 1447 | { |
1449 | int i; | 1448 | int i; |
1450 | 1449 | ||
1451 | for (i = 0; i < XFS_AGI_UNLINKED_BUCKETS; i++) | 1450 | for (i = 0; i < XFS_AGI_UNLINKED_BUCKETS; i++) |
1452 | ASSERT(agi->agi_unlinked[i]); | 1451 | ASSERT(agi->agi_unlinked[i]); |
1453 | } | 1452 | } |
1454 | #else | 1453 | #else |
1455 | #define xfs_check_agi_unlinked(agi) | 1454 | #define xfs_check_agi_unlinked(agi) |
1456 | #endif | 1455 | #endif |
1457 | 1456 | ||
1458 | /* | 1457 | /* |
1459 | * Read in the allocation group header (inode allocation section) | 1458 | * Read in the allocation group header (inode allocation section) |
1460 | */ | 1459 | */ |
1461 | int | 1460 | int |
1462 | xfs_read_agi( | 1461 | xfs_read_agi( |
1463 | struct xfs_mount *mp, /* file system mount structure */ | 1462 | struct xfs_mount *mp, /* file system mount structure */ |
1464 | struct xfs_trans *tp, /* transaction pointer */ | 1463 | struct xfs_trans *tp, /* transaction pointer */ |
1465 | xfs_agnumber_t agno, /* allocation group number */ | 1464 | xfs_agnumber_t agno, /* allocation group number */ |
1466 | struct xfs_buf **bpp) /* allocation group hdr buf */ | 1465 | struct xfs_buf **bpp) /* allocation group hdr buf */ |
1467 | { | 1466 | { |
1468 | struct xfs_agi *agi; /* allocation group header */ | 1467 | struct xfs_agi *agi; /* allocation group header */ |
1469 | int agi_ok; /* agi is consistent */ | 1468 | int agi_ok; /* agi is consistent */ |
1470 | int error; | 1469 | int error; |
1471 | 1470 | ||
1472 | ASSERT(agno != NULLAGNUMBER); | 1471 | ASSERT(agno != NULLAGNUMBER); |
1473 | 1472 | ||
1474 | error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, | 1473 | error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, |
1475 | XFS_AG_DADDR(mp, agno, XFS_AGI_DADDR(mp)), | 1474 | XFS_AG_DADDR(mp, agno, XFS_AGI_DADDR(mp)), |
1476 | XFS_FSS_TO_BB(mp, 1), 0, bpp); | 1475 | XFS_FSS_TO_BB(mp, 1), 0, bpp); |
1477 | if (error) | 1476 | if (error) |
1478 | return error; | 1477 | return error; |
1479 | 1478 | ||
1480 | ASSERT(*bpp && !XFS_BUF_GETERROR(*bpp)); | 1479 | ASSERT(*bpp && !XFS_BUF_GETERROR(*bpp)); |
1481 | agi = XFS_BUF_TO_AGI(*bpp); | 1480 | agi = XFS_BUF_TO_AGI(*bpp); |
1482 | 1481 | ||
1483 | /* | 1482 | /* |
1484 | * Validate the magic number of the agi block. | 1483 | * Validate the magic number of the agi block. |
1485 | */ | 1484 | */ |
1486 | agi_ok = be32_to_cpu(agi->agi_magicnum) == XFS_AGI_MAGIC && | 1485 | agi_ok = be32_to_cpu(agi->agi_magicnum) == XFS_AGI_MAGIC && |
1487 | XFS_AGI_GOOD_VERSION(be32_to_cpu(agi->agi_versionnum)) && | 1486 | XFS_AGI_GOOD_VERSION(be32_to_cpu(agi->agi_versionnum)) && |
1488 | be32_to_cpu(agi->agi_seqno) == agno; | 1487 | be32_to_cpu(agi->agi_seqno) == agno; |
1489 | if (unlikely(XFS_TEST_ERROR(!agi_ok, mp, XFS_ERRTAG_IALLOC_READ_AGI, | 1488 | if (unlikely(XFS_TEST_ERROR(!agi_ok, mp, XFS_ERRTAG_IALLOC_READ_AGI, |
1490 | XFS_RANDOM_IALLOC_READ_AGI))) { | 1489 | XFS_RANDOM_IALLOC_READ_AGI))) { |
1491 | XFS_CORRUPTION_ERROR("xfs_read_agi", XFS_ERRLEVEL_LOW, | 1490 | XFS_CORRUPTION_ERROR("xfs_read_agi", XFS_ERRLEVEL_LOW, |
1492 | mp, agi); | 1491 | mp, agi); |
1493 | xfs_trans_brelse(tp, *bpp); | 1492 | xfs_trans_brelse(tp, *bpp); |
1494 | return XFS_ERROR(EFSCORRUPTED); | 1493 | return XFS_ERROR(EFSCORRUPTED); |
1495 | } | 1494 | } |
1496 | 1495 | ||
1497 | XFS_BUF_SET_VTYPE_REF(*bpp, B_FS_AGI, XFS_AGI_REF); | 1496 | XFS_BUF_SET_VTYPE_REF(*bpp, B_FS_AGI, XFS_AGI_REF); |
1498 | 1497 | ||
1499 | xfs_check_agi_unlinked(agi); | 1498 | xfs_check_agi_unlinked(agi); |
1500 | return 0; | 1499 | return 0; |
1501 | } | 1500 | } |
1502 | 1501 | ||
1503 | int | 1502 | int |
1504 | xfs_ialloc_read_agi( | 1503 | xfs_ialloc_read_agi( |
1505 | struct xfs_mount *mp, /* file system mount structure */ | 1504 | struct xfs_mount *mp, /* file system mount structure */ |
1506 | struct xfs_trans *tp, /* transaction pointer */ | 1505 | struct xfs_trans *tp, /* transaction pointer */ |
1507 | xfs_agnumber_t agno, /* allocation group number */ | 1506 | xfs_agnumber_t agno, /* allocation group number */ |
1508 | struct xfs_buf **bpp) /* allocation group hdr buf */ | 1507 | struct xfs_buf **bpp) /* allocation group hdr buf */ |
1509 | { | 1508 | { |
1510 | struct xfs_agi *agi; /* allocation group header */ | 1509 | struct xfs_agi *agi; /* allocation group header */ |
1511 | struct xfs_perag *pag; /* per allocation group data */ | 1510 | struct xfs_perag *pag; /* per allocation group data */ |
1512 | int error; | 1511 | int error; |
1513 | 1512 | ||
1514 | error = xfs_read_agi(mp, tp, agno, bpp); | 1513 | error = xfs_read_agi(mp, tp, agno, bpp); |
1515 | if (error) | 1514 | if (error) |
1516 | return error; | 1515 | return error; |
1517 | 1516 | ||
1518 | agi = XFS_BUF_TO_AGI(*bpp); | 1517 | agi = XFS_BUF_TO_AGI(*bpp); |
1519 | pag = &mp->m_perag[agno]; | 1518 | pag = &mp->m_perag[agno]; |
1520 | 1519 | ||
1521 | if (!pag->pagi_init) { | 1520 | if (!pag->pagi_init) { |
1522 | pag->pagi_freecount = be32_to_cpu(agi->agi_freecount); | 1521 | pag->pagi_freecount = be32_to_cpu(agi->agi_freecount); |
1523 | pag->pagi_count = be32_to_cpu(agi->agi_count); | 1522 | pag->pagi_count = be32_to_cpu(agi->agi_count); |
1524 | pag->pagi_init = 1; | 1523 | pag->pagi_init = 1; |
1525 | } | 1524 | } |
1526 | 1525 | ||
1527 | /* | 1526 | /* |
1528 | * It's possible for these to be out of sync if | 1527 | * It's possible for these to be out of sync if |
1529 | * we are in the middle of a forced shutdown. | 1528 | * we are in the middle of a forced shutdown. |
1530 | */ | 1529 | */ |
1531 | ASSERT(pag->pagi_freecount == be32_to_cpu(agi->agi_freecount) || | 1530 | ASSERT(pag->pagi_freecount == be32_to_cpu(agi->agi_freecount) || |
1532 | XFS_FORCED_SHUTDOWN(mp)); | 1531 | XFS_FORCED_SHUTDOWN(mp)); |
1533 | return 0; | 1532 | return 0; |
1534 | } | 1533 | } |
1535 | 1534 | ||
1536 | /* | 1535 | /* |
1537 | * Read in the agi to initialise the per-ag data in the mount structure | 1536 | * Read in the agi to initialise the per-ag data in the mount structure |
1538 | */ | 1537 | */ |
1539 | int | 1538 | int |
1540 | xfs_ialloc_pagi_init( | 1539 | xfs_ialloc_pagi_init( |
1541 | xfs_mount_t *mp, /* file system mount structure */ | 1540 | xfs_mount_t *mp, /* file system mount structure */ |
1542 | xfs_trans_t *tp, /* transaction pointer */ | 1541 | xfs_trans_t *tp, /* transaction pointer */ |
1543 | xfs_agnumber_t agno) /* allocation group number */ | 1542 | xfs_agnumber_t agno) /* allocation group number */ |
1544 | { | 1543 | { |
1545 | xfs_buf_t *bp = NULL; | 1544 | xfs_buf_t *bp = NULL; |
1546 | int error; | 1545 | int error; |
1547 | 1546 | ||
1548 | error = xfs_ialloc_read_agi(mp, tp, agno, &bp); | 1547 | error = xfs_ialloc_read_agi(mp, tp, agno, &bp); |
1549 | if (error) | 1548 | if (error) |
1550 | return error; | 1549 | return error; |
1551 | if (bp) | 1550 | if (bp) |
1552 | xfs_trans_brelse(tp, bp); | 1551 | xfs_trans_brelse(tp, bp); |
1553 | return 0; | 1552 | return 0; |
1554 | } | 1553 | } |
1555 | 1554 |
fs/xfs/xfs_imap.h
1 | /* | File was deleted | |
2 | * Copyright (c) 2000,2005 Silicon Graphics, Inc. | ||
3 | * All Rights Reserved. | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or | ||
6 | * modify it under the terms of the GNU General Public License as | ||
7 | * published by the Free Software Foundation. | ||
8 | * | ||
9 | * This program is distributed in the hope that it would be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program; if not, write the Free Software Foundation, | ||
16 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
17 | */ | ||
18 | #ifndef __XFS_IMAP_H__ | ||
19 | #define __XFS_IMAP_H__ | ||
20 | |||
21 | /* | ||
22 | * This is the structure passed to xfs_imap() to map | ||
23 | * an inode number to its on disk location. | ||
24 | */ | ||
25 | typedef struct xfs_imap { | ||
26 | xfs_daddr_t im_blkno; /* starting BB of inode chunk */ | ||
27 | uint im_len; /* length in BBs of inode chunk */ | ||
28 | ushort im_boffset; /* inode offset in block in bytes */ | ||
29 | } xfs_imap_t; | ||
30 | |||
31 | #endif /* __XFS_IMAP_H__ */ | ||
32 | 1 | /* |
fs/xfs/xfs_inode.c
1 | /* | 1 | /* |
2 | * Copyright (c) 2000-2006 Silicon Graphics, Inc. | 2 | * Copyright (c) 2000-2006 Silicon Graphics, Inc. |
3 | * All Rights Reserved. | 3 | * All Rights Reserved. |
4 | * | 4 | * |
5 | * This program is free software; you can redistribute it and/or | 5 | * This program is free software; you can redistribute it and/or |
6 | * modify it under the terms of the GNU General Public License as | 6 | * modify it under the terms of the GNU General Public License as |
7 | * published by the Free Software Foundation. | 7 | * published by the Free Software Foundation. |
8 | * | 8 | * |
9 | * This program is distributed in the hope that it would be useful, | 9 | * This program is distributed in the hope that it would be useful, |
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
12 | * GNU General Public License for more details. | 12 | * GNU General Public License for more details. |
13 | * | 13 | * |
14 | * You should have received a copy of the GNU General Public License | 14 | * You should have received a copy of the GNU General Public License |
15 | * along with this program; if not, write the Free Software Foundation, | 15 | * along with this program; if not, write the Free Software Foundation, |
16 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | 16 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA |
17 | */ | 17 | */ |
18 | #include <linux/log2.h> | 18 | #include <linux/log2.h> |
19 | 19 | ||
20 | #include "xfs.h" | 20 | #include "xfs.h" |
21 | #include "xfs_fs.h" | 21 | #include "xfs_fs.h" |
22 | #include "xfs_types.h" | 22 | #include "xfs_types.h" |
23 | #include "xfs_bit.h" | 23 | #include "xfs_bit.h" |
24 | #include "xfs_log.h" | 24 | #include "xfs_log.h" |
25 | #include "xfs_inum.h" | 25 | #include "xfs_inum.h" |
26 | #include "xfs_imap.h" | ||
27 | #include "xfs_trans.h" | 26 | #include "xfs_trans.h" |
28 | #include "xfs_trans_priv.h" | 27 | #include "xfs_trans_priv.h" |
29 | #include "xfs_sb.h" | 28 | #include "xfs_sb.h" |
30 | #include "xfs_ag.h" | 29 | #include "xfs_ag.h" |
31 | #include "xfs_dir2.h" | 30 | #include "xfs_dir2.h" |
32 | #include "xfs_dmapi.h" | 31 | #include "xfs_dmapi.h" |
33 | #include "xfs_mount.h" | 32 | #include "xfs_mount.h" |
34 | #include "xfs_bmap_btree.h" | 33 | #include "xfs_bmap_btree.h" |
35 | #include "xfs_alloc_btree.h" | 34 | #include "xfs_alloc_btree.h" |
36 | #include "xfs_ialloc_btree.h" | 35 | #include "xfs_ialloc_btree.h" |
37 | #include "xfs_dir2_sf.h" | 36 | #include "xfs_dir2_sf.h" |
38 | #include "xfs_attr_sf.h" | 37 | #include "xfs_attr_sf.h" |
39 | #include "xfs_dinode.h" | 38 | #include "xfs_dinode.h" |
40 | #include "xfs_inode.h" | 39 | #include "xfs_inode.h" |
41 | #include "xfs_buf_item.h" | 40 | #include "xfs_buf_item.h" |
42 | #include "xfs_inode_item.h" | 41 | #include "xfs_inode_item.h" |
43 | #include "xfs_btree.h" | 42 | #include "xfs_btree.h" |
44 | #include "xfs_btree_trace.h" | 43 | #include "xfs_btree_trace.h" |
45 | #include "xfs_alloc.h" | 44 | #include "xfs_alloc.h" |
46 | #include "xfs_ialloc.h" | 45 | #include "xfs_ialloc.h" |
47 | #include "xfs_bmap.h" | 46 | #include "xfs_bmap.h" |
48 | #include "xfs_rw.h" | 47 | #include "xfs_rw.h" |
49 | #include "xfs_error.h" | 48 | #include "xfs_error.h" |
50 | #include "xfs_utils.h" | 49 | #include "xfs_utils.h" |
51 | #include "xfs_dir2_trace.h" | 50 | #include "xfs_dir2_trace.h" |
52 | #include "xfs_quota.h" | 51 | #include "xfs_quota.h" |
53 | #include "xfs_acl.h" | 52 | #include "xfs_acl.h" |
54 | #include "xfs_filestream.h" | 53 | #include "xfs_filestream.h" |
55 | #include "xfs_vnodeops.h" | 54 | #include "xfs_vnodeops.h" |
56 | 55 | ||
57 | kmem_zone_t *xfs_ifork_zone; | 56 | kmem_zone_t *xfs_ifork_zone; |
58 | kmem_zone_t *xfs_inode_zone; | 57 | kmem_zone_t *xfs_inode_zone; |
59 | 58 | ||
60 | /* | 59 | /* |
61 | * Used in xfs_itruncate(). This is the maximum number of extents | 60 | * Used in xfs_itruncate(). This is the maximum number of extents |
62 | * freed from a file in a single transaction. | 61 | * freed from a file in a single transaction. |
63 | */ | 62 | */ |
64 | #define XFS_ITRUNC_MAX_EXTENTS 2 | 63 | #define XFS_ITRUNC_MAX_EXTENTS 2 |
65 | 64 | ||
66 | STATIC int xfs_iflush_int(xfs_inode_t *, xfs_buf_t *); | 65 | STATIC int xfs_iflush_int(xfs_inode_t *, xfs_buf_t *); |
67 | STATIC int xfs_iformat_local(xfs_inode_t *, xfs_dinode_t *, int, int); | 66 | STATIC int xfs_iformat_local(xfs_inode_t *, xfs_dinode_t *, int, int); |
68 | STATIC int xfs_iformat_extents(xfs_inode_t *, xfs_dinode_t *, int); | 67 | STATIC int xfs_iformat_extents(xfs_inode_t *, xfs_dinode_t *, int); |
69 | STATIC int xfs_iformat_btree(xfs_inode_t *, xfs_dinode_t *, int); | 68 | STATIC int xfs_iformat_btree(xfs_inode_t *, xfs_dinode_t *, int); |
70 | 69 | ||
71 | #ifdef DEBUG | 70 | #ifdef DEBUG |
72 | /* | 71 | /* |
73 | * Make sure that the extents in the given memory buffer | 72 | * Make sure that the extents in the given memory buffer |
74 | * are valid. | 73 | * are valid. |
75 | */ | 74 | */ |
76 | STATIC void | 75 | STATIC void |
77 | xfs_validate_extents( | 76 | xfs_validate_extents( |
78 | xfs_ifork_t *ifp, | 77 | xfs_ifork_t *ifp, |
79 | int nrecs, | 78 | int nrecs, |
80 | xfs_exntfmt_t fmt) | 79 | xfs_exntfmt_t fmt) |
81 | { | 80 | { |
82 | xfs_bmbt_irec_t irec; | 81 | xfs_bmbt_irec_t irec; |
83 | xfs_bmbt_rec_host_t rec; | 82 | xfs_bmbt_rec_host_t rec; |
84 | int i; | 83 | int i; |
85 | 84 | ||
86 | for (i = 0; i < nrecs; i++) { | 85 | for (i = 0; i < nrecs; i++) { |
87 | xfs_bmbt_rec_host_t *ep = xfs_iext_get_ext(ifp, i); | 86 | xfs_bmbt_rec_host_t *ep = xfs_iext_get_ext(ifp, i); |
88 | rec.l0 = get_unaligned(&ep->l0); | 87 | rec.l0 = get_unaligned(&ep->l0); |
89 | rec.l1 = get_unaligned(&ep->l1); | 88 | rec.l1 = get_unaligned(&ep->l1); |
90 | xfs_bmbt_get_all(&rec, &irec); | 89 | xfs_bmbt_get_all(&rec, &irec); |
91 | if (fmt == XFS_EXTFMT_NOSTATE) | 90 | if (fmt == XFS_EXTFMT_NOSTATE) |
92 | ASSERT(irec.br_state == XFS_EXT_NORM); | 91 | ASSERT(irec.br_state == XFS_EXT_NORM); |
93 | } | 92 | } |
94 | } | 93 | } |
95 | #else /* DEBUG */ | 94 | #else /* DEBUG */ |
96 | #define xfs_validate_extents(ifp, nrecs, fmt) | 95 | #define xfs_validate_extents(ifp, nrecs, fmt) |
97 | #endif /* DEBUG */ | 96 | #endif /* DEBUG */ |
98 | 97 | ||
99 | /* | 98 | /* |
100 | * Check that none of the inode's in the buffer have a next | 99 | * Check that none of the inode's in the buffer have a next |
101 | * unlinked field of 0. | 100 | * unlinked field of 0. |
102 | */ | 101 | */ |
103 | #if defined(DEBUG) | 102 | #if defined(DEBUG) |
104 | void | 103 | void |
105 | xfs_inobp_check( | 104 | xfs_inobp_check( |
106 | xfs_mount_t *mp, | 105 | xfs_mount_t *mp, |
107 | xfs_buf_t *bp) | 106 | xfs_buf_t *bp) |
108 | { | 107 | { |
109 | int i; | 108 | int i; |
110 | int j; | 109 | int j; |
111 | xfs_dinode_t *dip; | 110 | xfs_dinode_t *dip; |
112 | 111 | ||
113 | j = mp->m_inode_cluster_size >> mp->m_sb.sb_inodelog; | 112 | j = mp->m_inode_cluster_size >> mp->m_sb.sb_inodelog; |
114 | 113 | ||
115 | for (i = 0; i < j; i++) { | 114 | for (i = 0; i < j; i++) { |
116 | dip = (xfs_dinode_t *)xfs_buf_offset(bp, | 115 | dip = (xfs_dinode_t *)xfs_buf_offset(bp, |
117 | i * mp->m_sb.sb_inodesize); | 116 | i * mp->m_sb.sb_inodesize); |
118 | if (!dip->di_next_unlinked) { | 117 | if (!dip->di_next_unlinked) { |
119 | xfs_fs_cmn_err(CE_ALERT, mp, | 118 | xfs_fs_cmn_err(CE_ALERT, mp, |
120 | "Detected a bogus zero next_unlinked field in incore inode buffer 0x%p. About to pop an ASSERT.", | 119 | "Detected a bogus zero next_unlinked field in incore inode buffer 0x%p. About to pop an ASSERT.", |
121 | bp); | 120 | bp); |
122 | ASSERT(dip->di_next_unlinked); | 121 | ASSERT(dip->di_next_unlinked); |
123 | } | 122 | } |
124 | } | 123 | } |
125 | } | 124 | } |
126 | #endif | 125 | #endif |
127 | 126 | ||
128 | /* | 127 | /* |
129 | * Find the buffer associated with the given inode map | 128 | * Find the buffer associated with the given inode map |
130 | * We do basic validation checks on the buffer once it has been | 129 | * We do basic validation checks on the buffer once it has been |
131 | * retrieved from disk. | 130 | * retrieved from disk. |
132 | */ | 131 | */ |
133 | STATIC int | 132 | STATIC int |
134 | xfs_imap_to_bp( | 133 | xfs_imap_to_bp( |
135 | xfs_mount_t *mp, | 134 | xfs_mount_t *mp, |
136 | xfs_trans_t *tp, | 135 | xfs_trans_t *tp, |
137 | xfs_imap_t *imap, | 136 | struct xfs_imap *imap, |
138 | xfs_buf_t **bpp, | 137 | xfs_buf_t **bpp, |
139 | uint buf_flags, | 138 | uint buf_flags, |
140 | uint imap_flags) | 139 | uint imap_flags) |
141 | { | 140 | { |
142 | int error; | 141 | int error; |
143 | int i; | 142 | int i; |
144 | int ni; | 143 | int ni; |
145 | xfs_buf_t *bp; | 144 | xfs_buf_t *bp; |
146 | 145 | ||
147 | error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, imap->im_blkno, | 146 | error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, imap->im_blkno, |
148 | (int)imap->im_len, buf_flags, &bp); | 147 | (int)imap->im_len, buf_flags, &bp); |
149 | if (error) { | 148 | if (error) { |
150 | if (error != EAGAIN) { | 149 | if (error != EAGAIN) { |
151 | cmn_err(CE_WARN, | 150 | cmn_err(CE_WARN, |
152 | "xfs_imap_to_bp: xfs_trans_read_buf()returned " | 151 | "xfs_imap_to_bp: xfs_trans_read_buf()returned " |
153 | "an error %d on %s. Returning error.", | 152 | "an error %d on %s. Returning error.", |
154 | error, mp->m_fsname); | 153 | error, mp->m_fsname); |
155 | } else { | 154 | } else { |
156 | ASSERT(buf_flags & XFS_BUF_TRYLOCK); | 155 | ASSERT(buf_flags & XFS_BUF_TRYLOCK); |
157 | } | 156 | } |
158 | return error; | 157 | return error; |
159 | } | 158 | } |
160 | 159 | ||
161 | /* | 160 | /* |
162 | * Validate the magic number and version of every inode in the buffer | 161 | * Validate the magic number and version of every inode in the buffer |
163 | * (if DEBUG kernel) or the first inode in the buffer, otherwise. | 162 | * (if DEBUG kernel) or the first inode in the buffer, otherwise. |
164 | */ | 163 | */ |
165 | #ifdef DEBUG | 164 | #ifdef DEBUG |
166 | ni = BBTOB(imap->im_len) >> mp->m_sb.sb_inodelog; | 165 | ni = BBTOB(imap->im_len) >> mp->m_sb.sb_inodelog; |
167 | #else /* usual case */ | 166 | #else /* usual case */ |
168 | ni = 1; | 167 | ni = 1; |
169 | #endif | 168 | #endif |
170 | 169 | ||
171 | for (i = 0; i < ni; i++) { | 170 | for (i = 0; i < ni; i++) { |
172 | int di_ok; | 171 | int di_ok; |
173 | xfs_dinode_t *dip; | 172 | xfs_dinode_t *dip; |
174 | 173 | ||
175 | dip = (xfs_dinode_t *)xfs_buf_offset(bp, | 174 | dip = (xfs_dinode_t *)xfs_buf_offset(bp, |
176 | (i << mp->m_sb.sb_inodelog)); | 175 | (i << mp->m_sb.sb_inodelog)); |
177 | di_ok = be16_to_cpu(dip->di_magic) == XFS_DINODE_MAGIC && | 176 | di_ok = be16_to_cpu(dip->di_magic) == XFS_DINODE_MAGIC && |
178 | XFS_DINODE_GOOD_VERSION(dip->di_version); | 177 | XFS_DINODE_GOOD_VERSION(dip->di_version); |
179 | if (unlikely(XFS_TEST_ERROR(!di_ok, mp, | 178 | if (unlikely(XFS_TEST_ERROR(!di_ok, mp, |
180 | XFS_ERRTAG_ITOBP_INOTOBP, | 179 | XFS_ERRTAG_ITOBP_INOTOBP, |
181 | XFS_RANDOM_ITOBP_INOTOBP))) { | 180 | XFS_RANDOM_ITOBP_INOTOBP))) { |
182 | if (imap_flags & XFS_IMAP_BULKSTAT) { | 181 | if (imap_flags & XFS_IMAP_BULKSTAT) { |
183 | xfs_trans_brelse(tp, bp); | 182 | xfs_trans_brelse(tp, bp); |
184 | return XFS_ERROR(EINVAL); | 183 | return XFS_ERROR(EINVAL); |
185 | } | 184 | } |
186 | XFS_CORRUPTION_ERROR("xfs_imap_to_bp", | 185 | XFS_CORRUPTION_ERROR("xfs_imap_to_bp", |
187 | XFS_ERRLEVEL_HIGH, mp, dip); | 186 | XFS_ERRLEVEL_HIGH, mp, dip); |
188 | #ifdef DEBUG | 187 | #ifdef DEBUG |
189 | cmn_err(CE_PANIC, | 188 | cmn_err(CE_PANIC, |
190 | "Device %s - bad inode magic/vsn " | 189 | "Device %s - bad inode magic/vsn " |
191 | "daddr %lld #%d (magic=%x)", | 190 | "daddr %lld #%d (magic=%x)", |
192 | XFS_BUFTARG_NAME(mp->m_ddev_targp), | 191 | XFS_BUFTARG_NAME(mp->m_ddev_targp), |
193 | (unsigned long long)imap->im_blkno, i, | 192 | (unsigned long long)imap->im_blkno, i, |
194 | be16_to_cpu(dip->di_magic)); | 193 | be16_to_cpu(dip->di_magic)); |
195 | #endif | 194 | #endif |
196 | xfs_trans_brelse(tp, bp); | 195 | xfs_trans_brelse(tp, bp); |
197 | return XFS_ERROR(EFSCORRUPTED); | 196 | return XFS_ERROR(EFSCORRUPTED); |
198 | } | 197 | } |
199 | } | 198 | } |
200 | 199 | ||
201 | xfs_inobp_check(mp, bp); | 200 | xfs_inobp_check(mp, bp); |
202 | 201 | ||
203 | /* | 202 | /* |
204 | * Mark the buffer as an inode buffer now that it looks good | 203 | * Mark the buffer as an inode buffer now that it looks good |
205 | */ | 204 | */ |
206 | XFS_BUF_SET_VTYPE(bp, B_FS_INO); | 205 | XFS_BUF_SET_VTYPE(bp, B_FS_INO); |
207 | 206 | ||
208 | *bpp = bp; | 207 | *bpp = bp; |
209 | return 0; | 208 | return 0; |
210 | } | 209 | } |
211 | 210 | ||
212 | /* | 211 | /* |
213 | * This routine is called to map an inode number within a file | 212 | * This routine is called to map an inode number within a file |
214 | * system to the buffer containing the on-disk version of the | 213 | * system to the buffer containing the on-disk version of the |
215 | * inode. It returns a pointer to the buffer containing the | 214 | * inode. It returns a pointer to the buffer containing the |
216 | * on-disk inode in the bpp parameter, and in the dip parameter | 215 | * on-disk inode in the bpp parameter, and in the dip parameter |
217 | * it returns a pointer to the on-disk inode within that buffer. | 216 | * it returns a pointer to the on-disk inode within that buffer. |
218 | * | 217 | * |
219 | * If a non-zero error is returned, then the contents of bpp and | 218 | * If a non-zero error is returned, then the contents of bpp and |
220 | * dipp are undefined. | 219 | * dipp are undefined. |
221 | * | 220 | * |
222 | * Use xfs_imap() to determine the size and location of the | 221 | * Use xfs_imap() to determine the size and location of the |
223 | * buffer to read from disk. | 222 | * buffer to read from disk. |
224 | */ | 223 | */ |
225 | int | 224 | int |
226 | xfs_inotobp( | 225 | xfs_inotobp( |
227 | xfs_mount_t *mp, | 226 | xfs_mount_t *mp, |
228 | xfs_trans_t *tp, | 227 | xfs_trans_t *tp, |
229 | xfs_ino_t ino, | 228 | xfs_ino_t ino, |
230 | xfs_dinode_t **dipp, | 229 | xfs_dinode_t **dipp, |
231 | xfs_buf_t **bpp, | 230 | xfs_buf_t **bpp, |
232 | int *offset, | 231 | int *offset, |
233 | uint imap_flags) | 232 | uint imap_flags) |
234 | { | 233 | { |
235 | xfs_imap_t imap; | 234 | struct xfs_imap imap; |
236 | xfs_buf_t *bp; | 235 | xfs_buf_t *bp; |
237 | int error; | 236 | int error; |
238 | 237 | ||
239 | imap.im_blkno = 0; | 238 | imap.im_blkno = 0; |
240 | error = xfs_imap(mp, tp, ino, &imap, imap_flags); | 239 | error = xfs_imap(mp, tp, ino, &imap, imap_flags); |
241 | if (error) | 240 | if (error) |
242 | return error; | 241 | return error; |
243 | 242 | ||
244 | error = xfs_imap_to_bp(mp, tp, &imap, &bp, XFS_BUF_LOCK, imap_flags); | 243 | error = xfs_imap_to_bp(mp, tp, &imap, &bp, XFS_BUF_LOCK, imap_flags); |
245 | if (error) | 244 | if (error) |
246 | return error; | 245 | return error; |
247 | 246 | ||
248 | *dipp = (xfs_dinode_t *)xfs_buf_offset(bp, imap.im_boffset); | 247 | *dipp = (xfs_dinode_t *)xfs_buf_offset(bp, imap.im_boffset); |
249 | *bpp = bp; | 248 | *bpp = bp; |
250 | *offset = imap.im_boffset; | 249 | *offset = imap.im_boffset; |
251 | return 0; | 250 | return 0; |
252 | } | 251 | } |
253 | 252 | ||
254 | 253 | ||
255 | /* | 254 | /* |
256 | * This routine is called to map an inode to the buffer containing | 255 | * This routine is called to map an inode to the buffer containing |
257 | * the on-disk version of the inode. It returns a pointer to the | 256 | * the on-disk version of the inode. It returns a pointer to the |
258 | * buffer containing the on-disk inode in the bpp parameter, and in | 257 | * buffer containing the on-disk inode in the bpp parameter, and in |
259 | * the dip parameter it returns a pointer to the on-disk inode within | 258 | * the dip parameter it returns a pointer to the on-disk inode within |
260 | * that buffer. | 259 | * that buffer. |
261 | * | 260 | * |
262 | * If a non-zero error is returned, then the contents of bpp and | 261 | * If a non-zero error is returned, then the contents of bpp and |
263 | * dipp are undefined. | 262 | * dipp are undefined. |
264 | * | 263 | * |
265 | * The inode is expected to already been mapped to its buffer and read | 264 | * The inode is expected to already been mapped to its buffer and read |
266 | * in once, thus we can use the mapping information stored in the inode | 265 | * in once, thus we can use the mapping information stored in the inode |
267 | * rather than calling xfs_imap(). This allows us to avoid the overhead | 266 | * rather than calling xfs_imap(). This allows us to avoid the overhead |
268 | * of looking at the inode btree for small block file systems | 267 | * of looking at the inode btree for small block file systems |
269 | * (see xfs_imap()). | 268 | * (see xfs_imap()). |
270 | */ | 269 | */ |
271 | int | 270 | int |
272 | xfs_itobp( | 271 | xfs_itobp( |
273 | xfs_mount_t *mp, | 272 | xfs_mount_t *mp, |
274 | xfs_trans_t *tp, | 273 | xfs_trans_t *tp, |
275 | xfs_inode_t *ip, | 274 | xfs_inode_t *ip, |
276 | xfs_dinode_t **dipp, | 275 | xfs_dinode_t **dipp, |
277 | xfs_buf_t **bpp, | 276 | xfs_buf_t **bpp, |
278 | uint buf_flags) | 277 | uint buf_flags) |
279 | { | 278 | { |
280 | xfs_imap_t imap; | ||
281 | xfs_buf_t *bp; | 279 | xfs_buf_t *bp; |
282 | int error; | 280 | int error; |
283 | 281 | ||
284 | ASSERT(ip->i_blkno != 0); | 282 | ASSERT(ip->i_imap.im_blkno != 0); |
285 | 283 | ||
286 | imap.im_blkno = ip->i_blkno; | 284 | error = xfs_imap_to_bp(mp, tp, &ip->i_imap, &bp, buf_flags, 0); |
287 | imap.im_len = ip->i_len; | ||
288 | imap.im_boffset = ip->i_boffset; | ||
289 | |||
290 | error = xfs_imap_to_bp(mp, tp, &imap, &bp, buf_flags, 0); | ||
291 | if (error) | 285 | if (error) |
292 | return error; | 286 | return error; |
293 | 287 | ||
294 | if (!bp) { | 288 | if (!bp) { |
295 | ASSERT(buf_flags & XFS_BUF_TRYLOCK); | 289 | ASSERT(buf_flags & XFS_BUF_TRYLOCK); |
296 | ASSERT(tp == NULL); | 290 | ASSERT(tp == NULL); |
297 | *bpp = NULL; | 291 | *bpp = NULL; |
298 | return EAGAIN; | 292 | return EAGAIN; |
299 | } | 293 | } |
300 | 294 | ||
301 | *dipp = (xfs_dinode_t *)xfs_buf_offset(bp, imap.im_boffset); | 295 | *dipp = (xfs_dinode_t *)xfs_buf_offset(bp, ip->i_imap.im_boffset); |
302 | *bpp = bp; | 296 | *bpp = bp; |
303 | return 0; | 297 | return 0; |
304 | } | 298 | } |
305 | 299 | ||
306 | /* | 300 | /* |
307 | * Move inode type and inode format specific information from the | 301 | * Move inode type and inode format specific information from the |
308 | * on-disk inode to the in-core inode. For fifos, devs, and sockets | 302 | * on-disk inode to the in-core inode. For fifos, devs, and sockets |
309 | * this means set if_rdev to the proper value. For files, directories, | 303 | * this means set if_rdev to the proper value. For files, directories, |
310 | * and symlinks this means to bring in the in-line data or extent | 304 | * and symlinks this means to bring in the in-line data or extent |
311 | * pointers. For a file in B-tree format, only the root is immediately | 305 | * pointers. For a file in B-tree format, only the root is immediately |
312 | * brought in-core. The rest will be in-lined in if_extents when it | 306 | * brought in-core. The rest will be in-lined in if_extents when it |
313 | * is first referenced (see xfs_iread_extents()). | 307 | * is first referenced (see xfs_iread_extents()). |
314 | */ | 308 | */ |
315 | STATIC int | 309 | STATIC int |
316 | xfs_iformat( | 310 | xfs_iformat( |
317 | xfs_inode_t *ip, | 311 | xfs_inode_t *ip, |
318 | xfs_dinode_t *dip) | 312 | xfs_dinode_t *dip) |
319 | { | 313 | { |
320 | xfs_attr_shortform_t *atp; | 314 | xfs_attr_shortform_t *atp; |
321 | int size; | 315 | int size; |
322 | int error; | 316 | int error; |
323 | xfs_fsize_t di_size; | 317 | xfs_fsize_t di_size; |
324 | ip->i_df.if_ext_max = | 318 | ip->i_df.if_ext_max = |
325 | XFS_IFORK_DSIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t); | 319 | XFS_IFORK_DSIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t); |
326 | error = 0; | 320 | error = 0; |
327 | 321 | ||
328 | if (unlikely(be32_to_cpu(dip->di_nextents) + | 322 | if (unlikely(be32_to_cpu(dip->di_nextents) + |
329 | be16_to_cpu(dip->di_anextents) > | 323 | be16_to_cpu(dip->di_anextents) > |
330 | be64_to_cpu(dip->di_nblocks))) { | 324 | be64_to_cpu(dip->di_nblocks))) { |
331 | xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount, | 325 | xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount, |
332 | "corrupt dinode %Lu, extent total = %d, nblocks = %Lu.", | 326 | "corrupt dinode %Lu, extent total = %d, nblocks = %Lu.", |
333 | (unsigned long long)ip->i_ino, | 327 | (unsigned long long)ip->i_ino, |
334 | (int)(be32_to_cpu(dip->di_nextents) + | 328 | (int)(be32_to_cpu(dip->di_nextents) + |
335 | be16_to_cpu(dip->di_anextents)), | 329 | be16_to_cpu(dip->di_anextents)), |
336 | (unsigned long long) | 330 | (unsigned long long) |
337 | be64_to_cpu(dip->di_nblocks)); | 331 | be64_to_cpu(dip->di_nblocks)); |
338 | XFS_CORRUPTION_ERROR("xfs_iformat(1)", XFS_ERRLEVEL_LOW, | 332 | XFS_CORRUPTION_ERROR("xfs_iformat(1)", XFS_ERRLEVEL_LOW, |
339 | ip->i_mount, dip); | 333 | ip->i_mount, dip); |
340 | return XFS_ERROR(EFSCORRUPTED); | 334 | return XFS_ERROR(EFSCORRUPTED); |
341 | } | 335 | } |
342 | 336 | ||
343 | if (unlikely(dip->di_forkoff > ip->i_mount->m_sb.sb_inodesize)) { | 337 | if (unlikely(dip->di_forkoff > ip->i_mount->m_sb.sb_inodesize)) { |
344 | xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount, | 338 | xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount, |
345 | "corrupt dinode %Lu, forkoff = 0x%x.", | 339 | "corrupt dinode %Lu, forkoff = 0x%x.", |
346 | (unsigned long long)ip->i_ino, | 340 | (unsigned long long)ip->i_ino, |
347 | dip->di_forkoff); | 341 | dip->di_forkoff); |
348 | XFS_CORRUPTION_ERROR("xfs_iformat(2)", XFS_ERRLEVEL_LOW, | 342 | XFS_CORRUPTION_ERROR("xfs_iformat(2)", XFS_ERRLEVEL_LOW, |
349 | ip->i_mount, dip); | 343 | ip->i_mount, dip); |
350 | return XFS_ERROR(EFSCORRUPTED); | 344 | return XFS_ERROR(EFSCORRUPTED); |
351 | } | 345 | } |
352 | 346 | ||
353 | switch (ip->i_d.di_mode & S_IFMT) { | 347 | switch (ip->i_d.di_mode & S_IFMT) { |
354 | case S_IFIFO: | 348 | case S_IFIFO: |
355 | case S_IFCHR: | 349 | case S_IFCHR: |
356 | case S_IFBLK: | 350 | case S_IFBLK: |
357 | case S_IFSOCK: | 351 | case S_IFSOCK: |
358 | if (unlikely(dip->di_format != XFS_DINODE_FMT_DEV)) { | 352 | if (unlikely(dip->di_format != XFS_DINODE_FMT_DEV)) { |
359 | XFS_CORRUPTION_ERROR("xfs_iformat(3)", XFS_ERRLEVEL_LOW, | 353 | XFS_CORRUPTION_ERROR("xfs_iformat(3)", XFS_ERRLEVEL_LOW, |
360 | ip->i_mount, dip); | 354 | ip->i_mount, dip); |
361 | return XFS_ERROR(EFSCORRUPTED); | 355 | return XFS_ERROR(EFSCORRUPTED); |
362 | } | 356 | } |
363 | ip->i_d.di_size = 0; | 357 | ip->i_d.di_size = 0; |
364 | ip->i_size = 0; | 358 | ip->i_size = 0; |
365 | ip->i_df.if_u2.if_rdev = xfs_dinode_get_rdev(dip); | 359 | ip->i_df.if_u2.if_rdev = xfs_dinode_get_rdev(dip); |
366 | break; | 360 | break; |
367 | 361 | ||
368 | case S_IFREG: | 362 | case S_IFREG: |
369 | case S_IFLNK: | 363 | case S_IFLNK: |
370 | case S_IFDIR: | 364 | case S_IFDIR: |
371 | switch (dip->di_format) { | 365 | switch (dip->di_format) { |
372 | case XFS_DINODE_FMT_LOCAL: | 366 | case XFS_DINODE_FMT_LOCAL: |
373 | /* | 367 | /* |
374 | * no local regular files yet | 368 | * no local regular files yet |
375 | */ | 369 | */ |
376 | if (unlikely((be16_to_cpu(dip->di_mode) & S_IFMT) == S_IFREG)) { | 370 | if (unlikely((be16_to_cpu(dip->di_mode) & S_IFMT) == S_IFREG)) { |
377 | xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount, | 371 | xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount, |
378 | "corrupt inode %Lu " | 372 | "corrupt inode %Lu " |
379 | "(local format for regular file).", | 373 | "(local format for regular file).", |
380 | (unsigned long long) ip->i_ino); | 374 | (unsigned long long) ip->i_ino); |
381 | XFS_CORRUPTION_ERROR("xfs_iformat(4)", | 375 | XFS_CORRUPTION_ERROR("xfs_iformat(4)", |
382 | XFS_ERRLEVEL_LOW, | 376 | XFS_ERRLEVEL_LOW, |
383 | ip->i_mount, dip); | 377 | ip->i_mount, dip); |
384 | return XFS_ERROR(EFSCORRUPTED); | 378 | return XFS_ERROR(EFSCORRUPTED); |
385 | } | 379 | } |
386 | 380 | ||
387 | di_size = be64_to_cpu(dip->di_size); | 381 | di_size = be64_to_cpu(dip->di_size); |
388 | if (unlikely(di_size > XFS_DFORK_DSIZE(dip, ip->i_mount))) { | 382 | if (unlikely(di_size > XFS_DFORK_DSIZE(dip, ip->i_mount))) { |
389 | xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount, | 383 | xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount, |
390 | "corrupt inode %Lu " | 384 | "corrupt inode %Lu " |
391 | "(bad size %Ld for local inode).", | 385 | "(bad size %Ld for local inode).", |
392 | (unsigned long long) ip->i_ino, | 386 | (unsigned long long) ip->i_ino, |
393 | (long long) di_size); | 387 | (long long) di_size); |
394 | XFS_CORRUPTION_ERROR("xfs_iformat(5)", | 388 | XFS_CORRUPTION_ERROR("xfs_iformat(5)", |
395 | XFS_ERRLEVEL_LOW, | 389 | XFS_ERRLEVEL_LOW, |
396 | ip->i_mount, dip); | 390 | ip->i_mount, dip); |
397 | return XFS_ERROR(EFSCORRUPTED); | 391 | return XFS_ERROR(EFSCORRUPTED); |
398 | } | 392 | } |
399 | 393 | ||
400 | size = (int)di_size; | 394 | size = (int)di_size; |
401 | error = xfs_iformat_local(ip, dip, XFS_DATA_FORK, size); | 395 | error = xfs_iformat_local(ip, dip, XFS_DATA_FORK, size); |
402 | break; | 396 | break; |
403 | case XFS_DINODE_FMT_EXTENTS: | 397 | case XFS_DINODE_FMT_EXTENTS: |
404 | error = xfs_iformat_extents(ip, dip, XFS_DATA_FORK); | 398 | error = xfs_iformat_extents(ip, dip, XFS_DATA_FORK); |
405 | break; | 399 | break; |
406 | case XFS_DINODE_FMT_BTREE: | 400 | case XFS_DINODE_FMT_BTREE: |
407 | error = xfs_iformat_btree(ip, dip, XFS_DATA_FORK); | 401 | error = xfs_iformat_btree(ip, dip, XFS_DATA_FORK); |
408 | break; | 402 | break; |
409 | default: | 403 | default: |
410 | XFS_ERROR_REPORT("xfs_iformat(6)", XFS_ERRLEVEL_LOW, | 404 | XFS_ERROR_REPORT("xfs_iformat(6)", XFS_ERRLEVEL_LOW, |
411 | ip->i_mount); | 405 | ip->i_mount); |
412 | return XFS_ERROR(EFSCORRUPTED); | 406 | return XFS_ERROR(EFSCORRUPTED); |
413 | } | 407 | } |
414 | break; | 408 | break; |
415 | 409 | ||
416 | default: | 410 | default: |
417 | XFS_ERROR_REPORT("xfs_iformat(7)", XFS_ERRLEVEL_LOW, ip->i_mount); | 411 | XFS_ERROR_REPORT("xfs_iformat(7)", XFS_ERRLEVEL_LOW, ip->i_mount); |
418 | return XFS_ERROR(EFSCORRUPTED); | 412 | return XFS_ERROR(EFSCORRUPTED); |
419 | } | 413 | } |
420 | if (error) { | 414 | if (error) { |
421 | return error; | 415 | return error; |
422 | } | 416 | } |
423 | if (!XFS_DFORK_Q(dip)) | 417 | if (!XFS_DFORK_Q(dip)) |
424 | return 0; | 418 | return 0; |
425 | ASSERT(ip->i_afp == NULL); | 419 | ASSERT(ip->i_afp == NULL); |
426 | ip->i_afp = kmem_zone_zalloc(xfs_ifork_zone, KM_SLEEP); | 420 | ip->i_afp = kmem_zone_zalloc(xfs_ifork_zone, KM_SLEEP); |
427 | ip->i_afp->if_ext_max = | 421 | ip->i_afp->if_ext_max = |
428 | XFS_IFORK_ASIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t); | 422 | XFS_IFORK_ASIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t); |
429 | switch (dip->di_aformat) { | 423 | switch (dip->di_aformat) { |
430 | case XFS_DINODE_FMT_LOCAL: | 424 | case XFS_DINODE_FMT_LOCAL: |
431 | atp = (xfs_attr_shortform_t *)XFS_DFORK_APTR(dip); | 425 | atp = (xfs_attr_shortform_t *)XFS_DFORK_APTR(dip); |
432 | size = be16_to_cpu(atp->hdr.totsize); | 426 | size = be16_to_cpu(atp->hdr.totsize); |
433 | error = xfs_iformat_local(ip, dip, XFS_ATTR_FORK, size); | 427 | error = xfs_iformat_local(ip, dip, XFS_ATTR_FORK, size); |
434 | break; | 428 | break; |
435 | case XFS_DINODE_FMT_EXTENTS: | 429 | case XFS_DINODE_FMT_EXTENTS: |
436 | error = xfs_iformat_extents(ip, dip, XFS_ATTR_FORK); | 430 | error = xfs_iformat_extents(ip, dip, XFS_ATTR_FORK); |
437 | break; | 431 | break; |
438 | case XFS_DINODE_FMT_BTREE: | 432 | case XFS_DINODE_FMT_BTREE: |
439 | error = xfs_iformat_btree(ip, dip, XFS_ATTR_FORK); | 433 | error = xfs_iformat_btree(ip, dip, XFS_ATTR_FORK); |
440 | break; | 434 | break; |
441 | default: | 435 | default: |
442 | error = XFS_ERROR(EFSCORRUPTED); | 436 | error = XFS_ERROR(EFSCORRUPTED); |
443 | break; | 437 | break; |
444 | } | 438 | } |
445 | if (error) { | 439 | if (error) { |
446 | kmem_zone_free(xfs_ifork_zone, ip->i_afp); | 440 | kmem_zone_free(xfs_ifork_zone, ip->i_afp); |
447 | ip->i_afp = NULL; | 441 | ip->i_afp = NULL; |
448 | xfs_idestroy_fork(ip, XFS_DATA_FORK); | 442 | xfs_idestroy_fork(ip, XFS_DATA_FORK); |
449 | } | 443 | } |
450 | return error; | 444 | return error; |
451 | } | 445 | } |
452 | 446 | ||
453 | /* | 447 | /* |
454 | * The file is in-lined in the on-disk inode. | 448 | * The file is in-lined in the on-disk inode. |
455 | * If it fits into if_inline_data, then copy | 449 | * If it fits into if_inline_data, then copy |
456 | * it there, otherwise allocate a buffer for it | 450 | * it there, otherwise allocate a buffer for it |
457 | * and copy the data there. Either way, set | 451 | * and copy the data there. Either way, set |
458 | * if_data to point at the data. | 452 | * if_data to point at the data. |
459 | * If we allocate a buffer for the data, make | 453 | * If we allocate a buffer for the data, make |
460 | * sure that its size is a multiple of 4 and | 454 | * sure that its size is a multiple of 4 and |
461 | * record the real size in i_real_bytes. | 455 | * record the real size in i_real_bytes. |
462 | */ | 456 | */ |
463 | STATIC int | 457 | STATIC int |
464 | xfs_iformat_local( | 458 | xfs_iformat_local( |
465 | xfs_inode_t *ip, | 459 | xfs_inode_t *ip, |
466 | xfs_dinode_t *dip, | 460 | xfs_dinode_t *dip, |
467 | int whichfork, | 461 | int whichfork, |
468 | int size) | 462 | int size) |
469 | { | 463 | { |
470 | xfs_ifork_t *ifp; | 464 | xfs_ifork_t *ifp; |
471 | int real_size; | 465 | int real_size; |
472 | 466 | ||
473 | /* | 467 | /* |
474 | * If the size is unreasonable, then something | 468 | * If the size is unreasonable, then something |
475 | * is wrong and we just bail out rather than crash in | 469 | * is wrong and we just bail out rather than crash in |
476 | * kmem_alloc() or memcpy() below. | 470 | * kmem_alloc() or memcpy() below. |
477 | */ | 471 | */ |
478 | if (unlikely(size > XFS_DFORK_SIZE(dip, ip->i_mount, whichfork))) { | 472 | if (unlikely(size > XFS_DFORK_SIZE(dip, ip->i_mount, whichfork))) { |
479 | xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount, | 473 | xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount, |
480 | "corrupt inode %Lu " | 474 | "corrupt inode %Lu " |
481 | "(bad size %d for local fork, size = %d).", | 475 | "(bad size %d for local fork, size = %d).", |
482 | (unsigned long long) ip->i_ino, size, | 476 | (unsigned long long) ip->i_ino, size, |
483 | XFS_DFORK_SIZE(dip, ip->i_mount, whichfork)); | 477 | XFS_DFORK_SIZE(dip, ip->i_mount, whichfork)); |
484 | XFS_CORRUPTION_ERROR("xfs_iformat_local", XFS_ERRLEVEL_LOW, | 478 | XFS_CORRUPTION_ERROR("xfs_iformat_local", XFS_ERRLEVEL_LOW, |
485 | ip->i_mount, dip); | 479 | ip->i_mount, dip); |
486 | return XFS_ERROR(EFSCORRUPTED); | 480 | return XFS_ERROR(EFSCORRUPTED); |
487 | } | 481 | } |
488 | ifp = XFS_IFORK_PTR(ip, whichfork); | 482 | ifp = XFS_IFORK_PTR(ip, whichfork); |
489 | real_size = 0; | 483 | real_size = 0; |
490 | if (size == 0) | 484 | if (size == 0) |
491 | ifp->if_u1.if_data = NULL; | 485 | ifp->if_u1.if_data = NULL; |
492 | else if (size <= sizeof(ifp->if_u2.if_inline_data)) | 486 | else if (size <= sizeof(ifp->if_u2.if_inline_data)) |
493 | ifp->if_u1.if_data = ifp->if_u2.if_inline_data; | 487 | ifp->if_u1.if_data = ifp->if_u2.if_inline_data; |
494 | else { | 488 | else { |
495 | real_size = roundup(size, 4); | 489 | real_size = roundup(size, 4); |
496 | ifp->if_u1.if_data = kmem_alloc(real_size, KM_SLEEP); | 490 | ifp->if_u1.if_data = kmem_alloc(real_size, KM_SLEEP); |
497 | } | 491 | } |
498 | ifp->if_bytes = size; | 492 | ifp->if_bytes = size; |
499 | ifp->if_real_bytes = real_size; | 493 | ifp->if_real_bytes = real_size; |
500 | if (size) | 494 | if (size) |
501 | memcpy(ifp->if_u1.if_data, XFS_DFORK_PTR(dip, whichfork), size); | 495 | memcpy(ifp->if_u1.if_data, XFS_DFORK_PTR(dip, whichfork), size); |
502 | ifp->if_flags &= ~XFS_IFEXTENTS; | 496 | ifp->if_flags &= ~XFS_IFEXTENTS; |
503 | ifp->if_flags |= XFS_IFINLINE; | 497 | ifp->if_flags |= XFS_IFINLINE; |
504 | return 0; | 498 | return 0; |
505 | } | 499 | } |
506 | 500 | ||
507 | /* | 501 | /* |
508 | * The file consists of a set of extents all | 502 | * The file consists of a set of extents all |
509 | * of which fit into the on-disk inode. | 503 | * of which fit into the on-disk inode. |
510 | * If there are few enough extents to fit into | 504 | * If there are few enough extents to fit into |
511 | * the if_inline_ext, then copy them there. | 505 | * the if_inline_ext, then copy them there. |
512 | * Otherwise allocate a buffer for them and copy | 506 | * Otherwise allocate a buffer for them and copy |
513 | * them into it. Either way, set if_extents | 507 | * them into it. Either way, set if_extents |
514 | * to point at the extents. | 508 | * to point at the extents. |
515 | */ | 509 | */ |
516 | STATIC int | 510 | STATIC int |
517 | xfs_iformat_extents( | 511 | xfs_iformat_extents( |
518 | xfs_inode_t *ip, | 512 | xfs_inode_t *ip, |
519 | xfs_dinode_t *dip, | 513 | xfs_dinode_t *dip, |
520 | int whichfork) | 514 | int whichfork) |
521 | { | 515 | { |
522 | xfs_bmbt_rec_t *dp; | 516 | xfs_bmbt_rec_t *dp; |
523 | xfs_ifork_t *ifp; | 517 | xfs_ifork_t *ifp; |
524 | int nex; | 518 | int nex; |
525 | int size; | 519 | int size; |
526 | int i; | 520 | int i; |
527 | 521 | ||
528 | ifp = XFS_IFORK_PTR(ip, whichfork); | 522 | ifp = XFS_IFORK_PTR(ip, whichfork); |
529 | nex = XFS_DFORK_NEXTENTS(dip, whichfork); | 523 | nex = XFS_DFORK_NEXTENTS(dip, whichfork); |
530 | size = nex * (uint)sizeof(xfs_bmbt_rec_t); | 524 | size = nex * (uint)sizeof(xfs_bmbt_rec_t); |
531 | 525 | ||
532 | /* | 526 | /* |
533 | * If the number of extents is unreasonable, then something | 527 | * If the number of extents is unreasonable, then something |
534 | * is wrong and we just bail out rather than crash in | 528 | * is wrong and we just bail out rather than crash in |
535 | * kmem_alloc() or memcpy() below. | 529 | * kmem_alloc() or memcpy() below. |
536 | */ | 530 | */ |
537 | if (unlikely(size < 0 || size > XFS_DFORK_SIZE(dip, ip->i_mount, whichfork))) { | 531 | if (unlikely(size < 0 || size > XFS_DFORK_SIZE(dip, ip->i_mount, whichfork))) { |
538 | xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount, | 532 | xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount, |
539 | "corrupt inode %Lu ((a)extents = %d).", | 533 | "corrupt inode %Lu ((a)extents = %d).", |
540 | (unsigned long long) ip->i_ino, nex); | 534 | (unsigned long long) ip->i_ino, nex); |
541 | XFS_CORRUPTION_ERROR("xfs_iformat_extents(1)", XFS_ERRLEVEL_LOW, | 535 | XFS_CORRUPTION_ERROR("xfs_iformat_extents(1)", XFS_ERRLEVEL_LOW, |
542 | ip->i_mount, dip); | 536 | ip->i_mount, dip); |
543 | return XFS_ERROR(EFSCORRUPTED); | 537 | return XFS_ERROR(EFSCORRUPTED); |
544 | } | 538 | } |
545 | 539 | ||
546 | ifp->if_real_bytes = 0; | 540 | ifp->if_real_bytes = 0; |
547 | if (nex == 0) | 541 | if (nex == 0) |
548 | ifp->if_u1.if_extents = NULL; | 542 | ifp->if_u1.if_extents = NULL; |
549 | else if (nex <= XFS_INLINE_EXTS) | 543 | else if (nex <= XFS_INLINE_EXTS) |
550 | ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext; | 544 | ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext; |
551 | else | 545 | else |
552 | xfs_iext_add(ifp, 0, nex); | 546 | xfs_iext_add(ifp, 0, nex); |
553 | 547 | ||
554 | ifp->if_bytes = size; | 548 | ifp->if_bytes = size; |
555 | if (size) { | 549 | if (size) { |
556 | dp = (xfs_bmbt_rec_t *) XFS_DFORK_PTR(dip, whichfork); | 550 | dp = (xfs_bmbt_rec_t *) XFS_DFORK_PTR(dip, whichfork); |
557 | xfs_validate_extents(ifp, nex, XFS_EXTFMT_INODE(ip)); | 551 | xfs_validate_extents(ifp, nex, XFS_EXTFMT_INODE(ip)); |
558 | for (i = 0; i < nex; i++, dp++) { | 552 | for (i = 0; i < nex; i++, dp++) { |
559 | xfs_bmbt_rec_host_t *ep = xfs_iext_get_ext(ifp, i); | 553 | xfs_bmbt_rec_host_t *ep = xfs_iext_get_ext(ifp, i); |
560 | ep->l0 = get_unaligned_be64(&dp->l0); | 554 | ep->l0 = get_unaligned_be64(&dp->l0); |
561 | ep->l1 = get_unaligned_be64(&dp->l1); | 555 | ep->l1 = get_unaligned_be64(&dp->l1); |
562 | } | 556 | } |
563 | XFS_BMAP_TRACE_EXLIST(ip, nex, whichfork); | 557 | XFS_BMAP_TRACE_EXLIST(ip, nex, whichfork); |
564 | if (whichfork != XFS_DATA_FORK || | 558 | if (whichfork != XFS_DATA_FORK || |
565 | XFS_EXTFMT_INODE(ip) == XFS_EXTFMT_NOSTATE) | 559 | XFS_EXTFMT_INODE(ip) == XFS_EXTFMT_NOSTATE) |
566 | if (unlikely(xfs_check_nostate_extents( | 560 | if (unlikely(xfs_check_nostate_extents( |
567 | ifp, 0, nex))) { | 561 | ifp, 0, nex))) { |
568 | XFS_ERROR_REPORT("xfs_iformat_extents(2)", | 562 | XFS_ERROR_REPORT("xfs_iformat_extents(2)", |
569 | XFS_ERRLEVEL_LOW, | 563 | XFS_ERRLEVEL_LOW, |
570 | ip->i_mount); | 564 | ip->i_mount); |
571 | return XFS_ERROR(EFSCORRUPTED); | 565 | return XFS_ERROR(EFSCORRUPTED); |
572 | } | 566 | } |
573 | } | 567 | } |
574 | ifp->if_flags |= XFS_IFEXTENTS; | 568 | ifp->if_flags |= XFS_IFEXTENTS; |
575 | return 0; | 569 | return 0; |
576 | } | 570 | } |
577 | 571 | ||
578 | /* | 572 | /* |
579 | * The file has too many extents to fit into | 573 | * The file has too many extents to fit into |
580 | * the inode, so they are in B-tree format. | 574 | * the inode, so they are in B-tree format. |
581 | * Allocate a buffer for the root of the B-tree | 575 | * Allocate a buffer for the root of the B-tree |
582 | * and copy the root into it. The i_extents | 576 | * and copy the root into it. The i_extents |
583 | * field will remain NULL until all of the | 577 | * field will remain NULL until all of the |
584 | * extents are read in (when they are needed). | 578 | * extents are read in (when they are needed). |
585 | */ | 579 | */ |
586 | STATIC int | 580 | STATIC int |
587 | xfs_iformat_btree( | 581 | xfs_iformat_btree( |
588 | xfs_inode_t *ip, | 582 | xfs_inode_t *ip, |
589 | xfs_dinode_t *dip, | 583 | xfs_dinode_t *dip, |
590 | int whichfork) | 584 | int whichfork) |
591 | { | 585 | { |
592 | xfs_bmdr_block_t *dfp; | 586 | xfs_bmdr_block_t *dfp; |
593 | xfs_ifork_t *ifp; | 587 | xfs_ifork_t *ifp; |
594 | /* REFERENCED */ | 588 | /* REFERENCED */ |
595 | int nrecs; | 589 | int nrecs; |
596 | int size; | 590 | int size; |
597 | 591 | ||
598 | ifp = XFS_IFORK_PTR(ip, whichfork); | 592 | ifp = XFS_IFORK_PTR(ip, whichfork); |
599 | dfp = (xfs_bmdr_block_t *)XFS_DFORK_PTR(dip, whichfork); | 593 | dfp = (xfs_bmdr_block_t *)XFS_DFORK_PTR(dip, whichfork); |
600 | size = XFS_BMAP_BROOT_SPACE(dfp); | 594 | size = XFS_BMAP_BROOT_SPACE(dfp); |
601 | nrecs = be16_to_cpu(dfp->bb_numrecs); | 595 | nrecs = be16_to_cpu(dfp->bb_numrecs); |
602 | 596 | ||
603 | /* | 597 | /* |
604 | * blow out if -- fork has less extents than can fit in | 598 | * blow out if -- fork has less extents than can fit in |
605 | * fork (fork shouldn't be a btree format), root btree | 599 | * fork (fork shouldn't be a btree format), root btree |
606 | * block has more records than can fit into the fork, | 600 | * block has more records than can fit into the fork, |
607 | * or the number of extents is greater than the number of | 601 | * or the number of extents is greater than the number of |
608 | * blocks. | 602 | * blocks. |
609 | */ | 603 | */ |
610 | if (unlikely(XFS_IFORK_NEXTENTS(ip, whichfork) <= ifp->if_ext_max | 604 | if (unlikely(XFS_IFORK_NEXTENTS(ip, whichfork) <= ifp->if_ext_max |
611 | || XFS_BMDR_SPACE_CALC(nrecs) > | 605 | || XFS_BMDR_SPACE_CALC(nrecs) > |
612 | XFS_DFORK_SIZE(dip, ip->i_mount, whichfork) | 606 | XFS_DFORK_SIZE(dip, ip->i_mount, whichfork) |
613 | || XFS_IFORK_NEXTENTS(ip, whichfork) > ip->i_d.di_nblocks)) { | 607 | || XFS_IFORK_NEXTENTS(ip, whichfork) > ip->i_d.di_nblocks)) { |
614 | xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount, | 608 | xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount, |
615 | "corrupt inode %Lu (btree).", | 609 | "corrupt inode %Lu (btree).", |
616 | (unsigned long long) ip->i_ino); | 610 | (unsigned long long) ip->i_ino); |
617 | XFS_ERROR_REPORT("xfs_iformat_btree", XFS_ERRLEVEL_LOW, | 611 | XFS_ERROR_REPORT("xfs_iformat_btree", XFS_ERRLEVEL_LOW, |
618 | ip->i_mount); | 612 | ip->i_mount); |
619 | return XFS_ERROR(EFSCORRUPTED); | 613 | return XFS_ERROR(EFSCORRUPTED); |
620 | } | 614 | } |
621 | 615 | ||
622 | ifp->if_broot_bytes = size; | 616 | ifp->if_broot_bytes = size; |
623 | ifp->if_broot = kmem_alloc(size, KM_SLEEP); | 617 | ifp->if_broot = kmem_alloc(size, KM_SLEEP); |
624 | ASSERT(ifp->if_broot != NULL); | 618 | ASSERT(ifp->if_broot != NULL); |
625 | /* | 619 | /* |
626 | * Copy and convert from the on-disk structure | 620 | * Copy and convert from the on-disk structure |
627 | * to the in-memory structure. | 621 | * to the in-memory structure. |
628 | */ | 622 | */ |
629 | xfs_bmdr_to_bmbt(ip->i_mount, dfp, | 623 | xfs_bmdr_to_bmbt(ip->i_mount, dfp, |
630 | XFS_DFORK_SIZE(dip, ip->i_mount, whichfork), | 624 | XFS_DFORK_SIZE(dip, ip->i_mount, whichfork), |
631 | ifp->if_broot, size); | 625 | ifp->if_broot, size); |
632 | ifp->if_flags &= ~XFS_IFEXTENTS; | 626 | ifp->if_flags &= ~XFS_IFEXTENTS; |
633 | ifp->if_flags |= XFS_IFBROOT; | 627 | ifp->if_flags |= XFS_IFBROOT; |
634 | 628 | ||
635 | return 0; | 629 | return 0; |
636 | } | 630 | } |
637 | 631 | ||
638 | void | 632 | void |
639 | xfs_dinode_from_disk( | 633 | xfs_dinode_from_disk( |
640 | xfs_icdinode_t *to, | 634 | xfs_icdinode_t *to, |
641 | xfs_dinode_t *from) | 635 | xfs_dinode_t *from) |
642 | { | 636 | { |
643 | to->di_magic = be16_to_cpu(from->di_magic); | 637 | to->di_magic = be16_to_cpu(from->di_magic); |
644 | to->di_mode = be16_to_cpu(from->di_mode); | 638 | to->di_mode = be16_to_cpu(from->di_mode); |
645 | to->di_version = from ->di_version; | 639 | to->di_version = from ->di_version; |
646 | to->di_format = from->di_format; | 640 | to->di_format = from->di_format; |
647 | to->di_onlink = be16_to_cpu(from->di_onlink); | 641 | to->di_onlink = be16_to_cpu(from->di_onlink); |
648 | to->di_uid = be32_to_cpu(from->di_uid); | 642 | to->di_uid = be32_to_cpu(from->di_uid); |
649 | to->di_gid = be32_to_cpu(from->di_gid); | 643 | to->di_gid = be32_to_cpu(from->di_gid); |
650 | to->di_nlink = be32_to_cpu(from->di_nlink); | 644 | to->di_nlink = be32_to_cpu(from->di_nlink); |
651 | to->di_projid = be16_to_cpu(from->di_projid); | 645 | to->di_projid = be16_to_cpu(from->di_projid); |
652 | memcpy(to->di_pad, from->di_pad, sizeof(to->di_pad)); | 646 | memcpy(to->di_pad, from->di_pad, sizeof(to->di_pad)); |
653 | to->di_flushiter = be16_to_cpu(from->di_flushiter); | 647 | to->di_flushiter = be16_to_cpu(from->di_flushiter); |
654 | to->di_atime.t_sec = be32_to_cpu(from->di_atime.t_sec); | 648 | to->di_atime.t_sec = be32_to_cpu(from->di_atime.t_sec); |
655 | to->di_atime.t_nsec = be32_to_cpu(from->di_atime.t_nsec); | 649 | to->di_atime.t_nsec = be32_to_cpu(from->di_atime.t_nsec); |
656 | to->di_mtime.t_sec = be32_to_cpu(from->di_mtime.t_sec); | 650 | to->di_mtime.t_sec = be32_to_cpu(from->di_mtime.t_sec); |
657 | to->di_mtime.t_nsec = be32_to_cpu(from->di_mtime.t_nsec); | 651 | to->di_mtime.t_nsec = be32_to_cpu(from->di_mtime.t_nsec); |
658 | to->di_ctime.t_sec = be32_to_cpu(from->di_ctime.t_sec); | 652 | to->di_ctime.t_sec = be32_to_cpu(from->di_ctime.t_sec); |
659 | to->di_ctime.t_nsec = be32_to_cpu(from->di_ctime.t_nsec); | 653 | to->di_ctime.t_nsec = be32_to_cpu(from->di_ctime.t_nsec); |
660 | to->di_size = be64_to_cpu(from->di_size); | 654 | to->di_size = be64_to_cpu(from->di_size); |
661 | to->di_nblocks = be64_to_cpu(from->di_nblocks); | 655 | to->di_nblocks = be64_to_cpu(from->di_nblocks); |
662 | to->di_extsize = be32_to_cpu(from->di_extsize); | 656 | to->di_extsize = be32_to_cpu(from->di_extsize); |
663 | to->di_nextents = be32_to_cpu(from->di_nextents); | 657 | to->di_nextents = be32_to_cpu(from->di_nextents); |
664 | to->di_anextents = be16_to_cpu(from->di_anextents); | 658 | to->di_anextents = be16_to_cpu(from->di_anextents); |
665 | to->di_forkoff = from->di_forkoff; | 659 | to->di_forkoff = from->di_forkoff; |
666 | to->di_aformat = from->di_aformat; | 660 | to->di_aformat = from->di_aformat; |
667 | to->di_dmevmask = be32_to_cpu(from->di_dmevmask); | 661 | to->di_dmevmask = be32_to_cpu(from->di_dmevmask); |
668 | to->di_dmstate = be16_to_cpu(from->di_dmstate); | 662 | to->di_dmstate = be16_to_cpu(from->di_dmstate); |
669 | to->di_flags = be16_to_cpu(from->di_flags); | 663 | to->di_flags = be16_to_cpu(from->di_flags); |
670 | to->di_gen = be32_to_cpu(from->di_gen); | 664 | to->di_gen = be32_to_cpu(from->di_gen); |
671 | } | 665 | } |
672 | 666 | ||
673 | void | 667 | void |
674 | xfs_dinode_to_disk( | 668 | xfs_dinode_to_disk( |
675 | xfs_dinode_t *to, | 669 | xfs_dinode_t *to, |
676 | xfs_icdinode_t *from) | 670 | xfs_icdinode_t *from) |
677 | { | 671 | { |
678 | to->di_magic = cpu_to_be16(from->di_magic); | 672 | to->di_magic = cpu_to_be16(from->di_magic); |
679 | to->di_mode = cpu_to_be16(from->di_mode); | 673 | to->di_mode = cpu_to_be16(from->di_mode); |
680 | to->di_version = from ->di_version; | 674 | to->di_version = from ->di_version; |
681 | to->di_format = from->di_format; | 675 | to->di_format = from->di_format; |
682 | to->di_onlink = cpu_to_be16(from->di_onlink); | 676 | to->di_onlink = cpu_to_be16(from->di_onlink); |
683 | to->di_uid = cpu_to_be32(from->di_uid); | 677 | to->di_uid = cpu_to_be32(from->di_uid); |
684 | to->di_gid = cpu_to_be32(from->di_gid); | 678 | to->di_gid = cpu_to_be32(from->di_gid); |
685 | to->di_nlink = cpu_to_be32(from->di_nlink); | 679 | to->di_nlink = cpu_to_be32(from->di_nlink); |
686 | to->di_projid = cpu_to_be16(from->di_projid); | 680 | to->di_projid = cpu_to_be16(from->di_projid); |
687 | memcpy(to->di_pad, from->di_pad, sizeof(to->di_pad)); | 681 | memcpy(to->di_pad, from->di_pad, sizeof(to->di_pad)); |
688 | to->di_flushiter = cpu_to_be16(from->di_flushiter); | 682 | to->di_flushiter = cpu_to_be16(from->di_flushiter); |
689 | to->di_atime.t_sec = cpu_to_be32(from->di_atime.t_sec); | 683 | to->di_atime.t_sec = cpu_to_be32(from->di_atime.t_sec); |
690 | to->di_atime.t_nsec = cpu_to_be32(from->di_atime.t_nsec); | 684 | to->di_atime.t_nsec = cpu_to_be32(from->di_atime.t_nsec); |
691 | to->di_mtime.t_sec = cpu_to_be32(from->di_mtime.t_sec); | 685 | to->di_mtime.t_sec = cpu_to_be32(from->di_mtime.t_sec); |
692 | to->di_mtime.t_nsec = cpu_to_be32(from->di_mtime.t_nsec); | 686 | to->di_mtime.t_nsec = cpu_to_be32(from->di_mtime.t_nsec); |
693 | to->di_ctime.t_sec = cpu_to_be32(from->di_ctime.t_sec); | 687 | to->di_ctime.t_sec = cpu_to_be32(from->di_ctime.t_sec); |
694 | to->di_ctime.t_nsec = cpu_to_be32(from->di_ctime.t_nsec); | 688 | to->di_ctime.t_nsec = cpu_to_be32(from->di_ctime.t_nsec); |
695 | to->di_size = cpu_to_be64(from->di_size); | 689 | to->di_size = cpu_to_be64(from->di_size); |
696 | to->di_nblocks = cpu_to_be64(from->di_nblocks); | 690 | to->di_nblocks = cpu_to_be64(from->di_nblocks); |
697 | to->di_extsize = cpu_to_be32(from->di_extsize); | 691 | to->di_extsize = cpu_to_be32(from->di_extsize); |
698 | to->di_nextents = cpu_to_be32(from->di_nextents); | 692 | to->di_nextents = cpu_to_be32(from->di_nextents); |
699 | to->di_anextents = cpu_to_be16(from->di_anextents); | 693 | to->di_anextents = cpu_to_be16(from->di_anextents); |
700 | to->di_forkoff = from->di_forkoff; | 694 | to->di_forkoff = from->di_forkoff; |
701 | to->di_aformat = from->di_aformat; | 695 | to->di_aformat = from->di_aformat; |
702 | to->di_dmevmask = cpu_to_be32(from->di_dmevmask); | 696 | to->di_dmevmask = cpu_to_be32(from->di_dmevmask); |
703 | to->di_dmstate = cpu_to_be16(from->di_dmstate); | 697 | to->di_dmstate = cpu_to_be16(from->di_dmstate); |
704 | to->di_flags = cpu_to_be16(from->di_flags); | 698 | to->di_flags = cpu_to_be16(from->di_flags); |
705 | to->di_gen = cpu_to_be32(from->di_gen); | 699 | to->di_gen = cpu_to_be32(from->di_gen); |
706 | } | 700 | } |
707 | 701 | ||
708 | STATIC uint | 702 | STATIC uint |
709 | _xfs_dic2xflags( | 703 | _xfs_dic2xflags( |
710 | __uint16_t di_flags) | 704 | __uint16_t di_flags) |
711 | { | 705 | { |
712 | uint flags = 0; | 706 | uint flags = 0; |
713 | 707 | ||
714 | if (di_flags & XFS_DIFLAG_ANY) { | 708 | if (di_flags & XFS_DIFLAG_ANY) { |
715 | if (di_flags & XFS_DIFLAG_REALTIME) | 709 | if (di_flags & XFS_DIFLAG_REALTIME) |
716 | flags |= XFS_XFLAG_REALTIME; | 710 | flags |= XFS_XFLAG_REALTIME; |
717 | if (di_flags & XFS_DIFLAG_PREALLOC) | 711 | if (di_flags & XFS_DIFLAG_PREALLOC) |
718 | flags |= XFS_XFLAG_PREALLOC; | 712 | flags |= XFS_XFLAG_PREALLOC; |
719 | if (di_flags & XFS_DIFLAG_IMMUTABLE) | 713 | if (di_flags & XFS_DIFLAG_IMMUTABLE) |
720 | flags |= XFS_XFLAG_IMMUTABLE; | 714 | flags |= XFS_XFLAG_IMMUTABLE; |
721 | if (di_flags & XFS_DIFLAG_APPEND) | 715 | if (di_flags & XFS_DIFLAG_APPEND) |
722 | flags |= XFS_XFLAG_APPEND; | 716 | flags |= XFS_XFLAG_APPEND; |
723 | if (di_flags & XFS_DIFLAG_SYNC) | 717 | if (di_flags & XFS_DIFLAG_SYNC) |
724 | flags |= XFS_XFLAG_SYNC; | 718 | flags |= XFS_XFLAG_SYNC; |
725 | if (di_flags & XFS_DIFLAG_NOATIME) | 719 | if (di_flags & XFS_DIFLAG_NOATIME) |
726 | flags |= XFS_XFLAG_NOATIME; | 720 | flags |= XFS_XFLAG_NOATIME; |
727 | if (di_flags & XFS_DIFLAG_NODUMP) | 721 | if (di_flags & XFS_DIFLAG_NODUMP) |
728 | flags |= XFS_XFLAG_NODUMP; | 722 | flags |= XFS_XFLAG_NODUMP; |
729 | if (di_flags & XFS_DIFLAG_RTINHERIT) | 723 | if (di_flags & XFS_DIFLAG_RTINHERIT) |
730 | flags |= XFS_XFLAG_RTINHERIT; | 724 | flags |= XFS_XFLAG_RTINHERIT; |
731 | if (di_flags & XFS_DIFLAG_PROJINHERIT) | 725 | if (di_flags & XFS_DIFLAG_PROJINHERIT) |
732 | flags |= XFS_XFLAG_PROJINHERIT; | 726 | flags |= XFS_XFLAG_PROJINHERIT; |
733 | if (di_flags & XFS_DIFLAG_NOSYMLINKS) | 727 | if (di_flags & XFS_DIFLAG_NOSYMLINKS) |
734 | flags |= XFS_XFLAG_NOSYMLINKS; | 728 | flags |= XFS_XFLAG_NOSYMLINKS; |
735 | if (di_flags & XFS_DIFLAG_EXTSIZE) | 729 | if (di_flags & XFS_DIFLAG_EXTSIZE) |
736 | flags |= XFS_XFLAG_EXTSIZE; | 730 | flags |= XFS_XFLAG_EXTSIZE; |
737 | if (di_flags & XFS_DIFLAG_EXTSZINHERIT) | 731 | if (di_flags & XFS_DIFLAG_EXTSZINHERIT) |
738 | flags |= XFS_XFLAG_EXTSZINHERIT; | 732 | flags |= XFS_XFLAG_EXTSZINHERIT; |
739 | if (di_flags & XFS_DIFLAG_NODEFRAG) | 733 | if (di_flags & XFS_DIFLAG_NODEFRAG) |
740 | flags |= XFS_XFLAG_NODEFRAG; | 734 | flags |= XFS_XFLAG_NODEFRAG; |
741 | if (di_flags & XFS_DIFLAG_FILESTREAM) | 735 | if (di_flags & XFS_DIFLAG_FILESTREAM) |
742 | flags |= XFS_XFLAG_FILESTREAM; | 736 | flags |= XFS_XFLAG_FILESTREAM; |
743 | } | 737 | } |
744 | 738 | ||
745 | return flags; | 739 | return flags; |
746 | } | 740 | } |
747 | 741 | ||
748 | uint | 742 | uint |
749 | xfs_ip2xflags( | 743 | xfs_ip2xflags( |
750 | xfs_inode_t *ip) | 744 | xfs_inode_t *ip) |
751 | { | 745 | { |
752 | xfs_icdinode_t *dic = &ip->i_d; | 746 | xfs_icdinode_t *dic = &ip->i_d; |
753 | 747 | ||
754 | return _xfs_dic2xflags(dic->di_flags) | | 748 | return _xfs_dic2xflags(dic->di_flags) | |
755 | (XFS_IFORK_Q(ip) ? XFS_XFLAG_HASATTR : 0); | 749 | (XFS_IFORK_Q(ip) ? XFS_XFLAG_HASATTR : 0); |
756 | } | 750 | } |
757 | 751 | ||
758 | uint | 752 | uint |
759 | xfs_dic2xflags( | 753 | xfs_dic2xflags( |
760 | xfs_dinode_t *dip) | 754 | xfs_dinode_t *dip) |
761 | { | 755 | { |
762 | return _xfs_dic2xflags(be16_to_cpu(dip->di_flags)) | | 756 | return _xfs_dic2xflags(be16_to_cpu(dip->di_flags)) | |
763 | (XFS_DFORK_Q(dip) ? XFS_XFLAG_HASATTR : 0); | 757 | (XFS_DFORK_Q(dip) ? XFS_XFLAG_HASATTR : 0); |
764 | } | 758 | } |
765 | 759 | ||
766 | /* | 760 | /* |
767 | * Allocate and initialise an xfs_inode. | 761 | * Allocate and initialise an xfs_inode. |
768 | */ | 762 | */ |
769 | STATIC struct xfs_inode * | 763 | STATIC struct xfs_inode * |
770 | xfs_inode_alloc( | 764 | xfs_inode_alloc( |
771 | struct xfs_mount *mp, | 765 | struct xfs_mount *mp, |
772 | xfs_ino_t ino) | 766 | xfs_ino_t ino) |
773 | { | 767 | { |
774 | struct xfs_inode *ip; | 768 | struct xfs_inode *ip; |
775 | 769 | ||
776 | /* | 770 | /* |
777 | * if this didn't occur in transactions, we could use | 771 | * if this didn't occur in transactions, we could use |
778 | * KM_MAYFAIL and return NULL here on ENOMEM. Set the | 772 | * KM_MAYFAIL and return NULL here on ENOMEM. Set the |
779 | * code up to do this anyway. | 773 | * code up to do this anyway. |
780 | */ | 774 | */ |
781 | ip = kmem_zone_alloc(xfs_inode_zone, KM_SLEEP); | 775 | ip = kmem_zone_alloc(xfs_inode_zone, KM_SLEEP); |
782 | if (!ip) | 776 | if (!ip) |
783 | return NULL; | 777 | return NULL; |
784 | 778 | ||
785 | ASSERT(atomic_read(&ip->i_iocount) == 0); | 779 | ASSERT(atomic_read(&ip->i_iocount) == 0); |
786 | ASSERT(atomic_read(&ip->i_pincount) == 0); | 780 | ASSERT(atomic_read(&ip->i_pincount) == 0); |
787 | ASSERT(!spin_is_locked(&ip->i_flags_lock)); | 781 | ASSERT(!spin_is_locked(&ip->i_flags_lock)); |
788 | ASSERT(completion_done(&ip->i_flush)); | 782 | ASSERT(completion_done(&ip->i_flush)); |
789 | 783 | ||
790 | /* | 784 | /* |
791 | * initialise the VFS inode here to get failures | 785 | * initialise the VFS inode here to get failures |
792 | * out of the way early. | 786 | * out of the way early. |
793 | */ | 787 | */ |
794 | if (!inode_init_always(mp->m_super, VFS_I(ip))) { | 788 | if (!inode_init_always(mp->m_super, VFS_I(ip))) { |
795 | kmem_zone_free(xfs_inode_zone, ip); | 789 | kmem_zone_free(xfs_inode_zone, ip); |
796 | return NULL; | 790 | return NULL; |
797 | } | 791 | } |
798 | 792 | ||
799 | /* initialise the xfs inode */ | 793 | /* initialise the xfs inode */ |
800 | ip->i_ino = ino; | 794 | ip->i_ino = ino; |
801 | ip->i_mount = mp; | 795 | ip->i_mount = mp; |
802 | ip->i_blkno = 0; | 796 | memset(&ip->i_imap, 0, sizeof(struct xfs_imap)); |
803 | ip->i_len = 0; | ||
804 | ip->i_boffset =0; | ||
805 | ip->i_afp = NULL; | 797 | ip->i_afp = NULL; |
806 | memset(&ip->i_df, 0, sizeof(xfs_ifork_t)); | 798 | memset(&ip->i_df, 0, sizeof(xfs_ifork_t)); |
807 | ip->i_flags = 0; | 799 | ip->i_flags = 0; |
808 | ip->i_update_core = 0; | 800 | ip->i_update_core = 0; |
809 | ip->i_update_size = 0; | 801 | ip->i_update_size = 0; |
810 | ip->i_delayed_blks = 0; | 802 | ip->i_delayed_blks = 0; |
811 | memset(&ip->i_d, 0, sizeof(xfs_icdinode_t)); | 803 | memset(&ip->i_d, 0, sizeof(xfs_icdinode_t)); |
812 | ip->i_size = 0; | 804 | ip->i_size = 0; |
813 | ip->i_new_size = 0; | 805 | ip->i_new_size = 0; |
814 | 806 | ||
815 | /* | 807 | /* |
816 | * Initialize inode's trace buffers. | 808 | * Initialize inode's trace buffers. |
817 | */ | 809 | */ |
818 | #ifdef XFS_INODE_TRACE | 810 | #ifdef XFS_INODE_TRACE |
819 | ip->i_trace = ktrace_alloc(INODE_TRACE_SIZE, KM_NOFS); | 811 | ip->i_trace = ktrace_alloc(INODE_TRACE_SIZE, KM_NOFS); |
820 | #endif | 812 | #endif |
821 | #ifdef XFS_BMAP_TRACE | 813 | #ifdef XFS_BMAP_TRACE |
822 | ip->i_xtrace = ktrace_alloc(XFS_BMAP_KTRACE_SIZE, KM_NOFS); | 814 | ip->i_xtrace = ktrace_alloc(XFS_BMAP_KTRACE_SIZE, KM_NOFS); |
823 | #endif | 815 | #endif |
824 | #ifdef XFS_BTREE_TRACE | 816 | #ifdef XFS_BTREE_TRACE |
825 | ip->i_btrace = ktrace_alloc(XFS_BMBT_KTRACE_SIZE, KM_NOFS); | 817 | ip->i_btrace = ktrace_alloc(XFS_BMBT_KTRACE_SIZE, KM_NOFS); |
826 | #endif | 818 | #endif |
827 | #ifdef XFS_RW_TRACE | 819 | #ifdef XFS_RW_TRACE |
828 | ip->i_rwtrace = ktrace_alloc(XFS_RW_KTRACE_SIZE, KM_NOFS); | 820 | ip->i_rwtrace = ktrace_alloc(XFS_RW_KTRACE_SIZE, KM_NOFS); |
829 | #endif | 821 | #endif |
830 | #ifdef XFS_ILOCK_TRACE | 822 | #ifdef XFS_ILOCK_TRACE |
831 | ip->i_lock_trace = ktrace_alloc(XFS_ILOCK_KTRACE_SIZE, KM_NOFS); | 823 | ip->i_lock_trace = ktrace_alloc(XFS_ILOCK_KTRACE_SIZE, KM_NOFS); |
832 | #endif | 824 | #endif |
833 | #ifdef XFS_DIR2_TRACE | 825 | #ifdef XFS_DIR2_TRACE |
834 | ip->i_dir_trace = ktrace_alloc(XFS_DIR2_KTRACE_SIZE, KM_NOFS); | 826 | ip->i_dir_trace = ktrace_alloc(XFS_DIR2_KTRACE_SIZE, KM_NOFS); |
835 | #endif | 827 | #endif |
836 | 828 | ||
837 | return ip; | 829 | return ip; |
838 | } | 830 | } |
839 | 831 | ||
840 | /* | 832 | /* |
841 | * Given a mount structure and an inode number, return a pointer | 833 | * Given a mount structure and an inode number, return a pointer |
842 | * to a newly allocated in-core inode corresponding to the given | 834 | * to a newly allocated in-core inode corresponding to the given |
843 | * inode number. | 835 | * inode number. |
844 | * | 836 | * |
845 | * Initialize the inode's attributes and extent pointers if it | 837 | * Initialize the inode's attributes and extent pointers if it |
846 | * already has them (it will not if the inode has no links). | 838 | * already has them (it will not if the inode has no links). |
847 | */ | 839 | */ |
848 | int | 840 | int |
849 | xfs_iread( | 841 | xfs_iread( |
850 | xfs_mount_t *mp, | 842 | xfs_mount_t *mp, |
851 | xfs_trans_t *tp, | 843 | xfs_trans_t *tp, |
852 | xfs_ino_t ino, | 844 | xfs_ino_t ino, |
853 | xfs_inode_t **ipp, | 845 | xfs_inode_t **ipp, |
854 | xfs_daddr_t bno, | 846 | xfs_daddr_t bno, |
855 | uint imap_flags) | 847 | uint imap_flags) |
856 | { | 848 | { |
857 | xfs_buf_t *bp; | 849 | xfs_buf_t *bp; |
858 | xfs_dinode_t *dip; | 850 | xfs_dinode_t *dip; |
859 | xfs_inode_t *ip; | 851 | xfs_inode_t *ip; |
860 | xfs_imap_t imap; | ||
861 | int error; | 852 | int error; |
862 | 853 | ||
863 | ip = xfs_inode_alloc(mp, ino); | 854 | ip = xfs_inode_alloc(mp, ino); |
864 | if (!ip) | 855 | if (!ip) |
865 | return ENOMEM; | 856 | return ENOMEM; |
866 | 857 | ||
867 | /* | 858 | /* |
868 | * Get pointers to the on-disk inode and the buffer containing it. | 859 | * Fill in the location information in the in-core inode. |
869 | */ | 860 | */ |
870 | imap.im_blkno = bno; | 861 | ip->i_imap.im_blkno = bno; |
871 | error = xfs_imap(mp, tp, ip->i_ino, &imap, imap_flags); | 862 | error = xfs_imap(mp, tp, ip->i_ino, &ip->i_imap, imap_flags); |
872 | if (error) | 863 | if (error) |
873 | goto out_destroy_inode; | 864 | goto out_destroy_inode; |
865 | ASSERT(bno == 0 || bno == ip->i_imap.im_blkno); | ||
874 | 866 | ||
875 | /* | 867 | /* |
876 | * Fill in the fields in the inode that will be used to | 868 | * Get pointers to the on-disk inode and the buffer containing it. |
877 | * map the inode to its buffer from now on. | ||
878 | */ | 869 | */ |
879 | ip->i_blkno = imap.im_blkno; | 870 | error = xfs_imap_to_bp(mp, tp, &ip->i_imap, &bp, |
880 | ip->i_len = imap.im_len; | 871 | XFS_BUF_LOCK, imap_flags); |
881 | ip->i_boffset = imap.im_boffset; | ||
882 | ASSERT(bno == 0 || bno == imap.im_blkno); | ||
883 | |||
884 | error = xfs_imap_to_bp(mp, tp, &imap, &bp, XFS_BUF_LOCK, imap_flags); | ||
885 | if (error) | 872 | if (error) |
886 | goto out_destroy_inode; | 873 | goto out_destroy_inode; |
887 | dip = (xfs_dinode_t *)xfs_buf_offset(bp, imap.im_boffset); | 874 | dip = (xfs_dinode_t *)xfs_buf_offset(bp, ip->i_imap.im_boffset); |
888 | 875 | ||
889 | /* | 876 | /* |
890 | * If we got something that isn't an inode it means someone | 877 | * If we got something that isn't an inode it means someone |
891 | * (nfs or dmi) has a stale handle. | 878 | * (nfs or dmi) has a stale handle. |
892 | */ | 879 | */ |
893 | if (be16_to_cpu(dip->di_magic) != XFS_DINODE_MAGIC) { | 880 | if (be16_to_cpu(dip->di_magic) != XFS_DINODE_MAGIC) { |
894 | #ifdef DEBUG | 881 | #ifdef DEBUG |
895 | xfs_fs_cmn_err(CE_ALERT, mp, "xfs_iread: " | 882 | xfs_fs_cmn_err(CE_ALERT, mp, "xfs_iread: " |
896 | "dip->di_magic (0x%x) != " | 883 | "dip->di_magic (0x%x) != " |
897 | "XFS_DINODE_MAGIC (0x%x)", | 884 | "XFS_DINODE_MAGIC (0x%x)", |
898 | be16_to_cpu(dip->di_magic), | 885 | be16_to_cpu(dip->di_magic), |
899 | XFS_DINODE_MAGIC); | 886 | XFS_DINODE_MAGIC); |
900 | #endif /* DEBUG */ | 887 | #endif /* DEBUG */ |
901 | error = XFS_ERROR(EINVAL); | 888 | error = XFS_ERROR(EINVAL); |
902 | goto out_brelse; | 889 | goto out_brelse; |
903 | } | 890 | } |
904 | 891 | ||
905 | /* | 892 | /* |
906 | * If the on-disk inode is already linked to a directory | 893 | * If the on-disk inode is already linked to a directory |
907 | * entry, copy all of the inode into the in-core inode. | 894 | * entry, copy all of the inode into the in-core inode. |
908 | * xfs_iformat() handles copying in the inode format | 895 | * xfs_iformat() handles copying in the inode format |
909 | * specific information. | 896 | * specific information. |
910 | * Otherwise, just get the truly permanent information. | 897 | * Otherwise, just get the truly permanent information. |
911 | */ | 898 | */ |
912 | if (dip->di_mode) { | 899 | if (dip->di_mode) { |
913 | xfs_dinode_from_disk(&ip->i_d, dip); | 900 | xfs_dinode_from_disk(&ip->i_d, dip); |
914 | error = xfs_iformat(ip, dip); | 901 | error = xfs_iformat(ip, dip); |
915 | if (error) { | 902 | if (error) { |
916 | #ifdef DEBUG | 903 | #ifdef DEBUG |
917 | xfs_fs_cmn_err(CE_ALERT, mp, "xfs_iread: " | 904 | xfs_fs_cmn_err(CE_ALERT, mp, "xfs_iread: " |
918 | "xfs_iformat() returned error %d", | 905 | "xfs_iformat() returned error %d", |
919 | error); | 906 | error); |
920 | #endif /* DEBUG */ | 907 | #endif /* DEBUG */ |
921 | goto out_brelse; | 908 | goto out_brelse; |
922 | } | 909 | } |
923 | } else { | 910 | } else { |
924 | ip->i_d.di_magic = be16_to_cpu(dip->di_magic); | 911 | ip->i_d.di_magic = be16_to_cpu(dip->di_magic); |
925 | ip->i_d.di_version = dip->di_version; | 912 | ip->i_d.di_version = dip->di_version; |
926 | ip->i_d.di_gen = be32_to_cpu(dip->di_gen); | 913 | ip->i_d.di_gen = be32_to_cpu(dip->di_gen); |
927 | ip->i_d.di_flushiter = be16_to_cpu(dip->di_flushiter); | 914 | ip->i_d.di_flushiter = be16_to_cpu(dip->di_flushiter); |
928 | /* | 915 | /* |
929 | * Make sure to pull in the mode here as well in | 916 | * Make sure to pull in the mode here as well in |
930 | * case the inode is released without being used. | 917 | * case the inode is released without being used. |
931 | * This ensures that xfs_inactive() will see that | 918 | * This ensures that xfs_inactive() will see that |
932 | * the inode is already free and not try to mess | 919 | * the inode is already free and not try to mess |
933 | * with the uninitialized part of it. | 920 | * with the uninitialized part of it. |
934 | */ | 921 | */ |
935 | ip->i_d.di_mode = 0; | 922 | ip->i_d.di_mode = 0; |
936 | /* | 923 | /* |
937 | * Initialize the per-fork minima and maxima for a new | 924 | * Initialize the per-fork minima and maxima for a new |
938 | * inode here. xfs_iformat will do it for old inodes. | 925 | * inode here. xfs_iformat will do it for old inodes. |
939 | */ | 926 | */ |
940 | ip->i_df.if_ext_max = | 927 | ip->i_df.if_ext_max = |
941 | XFS_IFORK_DSIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t); | 928 | XFS_IFORK_DSIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t); |
942 | } | 929 | } |
943 | 930 | ||
944 | /* | 931 | /* |
945 | * The inode format changed when we moved the link count and | 932 | * The inode format changed when we moved the link count and |
946 | * made it 32 bits long. If this is an old format inode, | 933 | * made it 32 bits long. If this is an old format inode, |
947 | * convert it in memory to look like a new one. If it gets | 934 | * convert it in memory to look like a new one. If it gets |
948 | * flushed to disk we will convert back before flushing or | 935 | * flushed to disk we will convert back before flushing or |
949 | * logging it. We zero out the new projid field and the old link | 936 | * logging it. We zero out the new projid field and the old link |
950 | * count field. We'll handle clearing the pad field (the remains | 937 | * count field. We'll handle clearing the pad field (the remains |
951 | * of the old uuid field) when we actually convert the inode to | 938 | * of the old uuid field) when we actually convert the inode to |
952 | * the new format. We don't change the version number so that we | 939 | * the new format. We don't change the version number so that we |
953 | * can distinguish this from a real new format inode. | 940 | * can distinguish this from a real new format inode. |
954 | */ | 941 | */ |
955 | if (ip->i_d.di_version == 1) { | 942 | if (ip->i_d.di_version == 1) { |
956 | ip->i_d.di_nlink = ip->i_d.di_onlink; | 943 | ip->i_d.di_nlink = ip->i_d.di_onlink; |
957 | ip->i_d.di_onlink = 0; | 944 | ip->i_d.di_onlink = 0; |
958 | ip->i_d.di_projid = 0; | 945 | ip->i_d.di_projid = 0; |
959 | } | 946 | } |
960 | 947 | ||
961 | ip->i_delayed_blks = 0; | 948 | ip->i_delayed_blks = 0; |
962 | ip->i_size = ip->i_d.di_size; | 949 | ip->i_size = ip->i_d.di_size; |
963 | 950 | ||
964 | /* | 951 | /* |
965 | * Mark the buffer containing the inode as something to keep | 952 | * Mark the buffer containing the inode as something to keep |
966 | * around for a while. This helps to keep recently accessed | 953 | * around for a while. This helps to keep recently accessed |
967 | * meta-data in-core longer. | 954 | * meta-data in-core longer. |
968 | */ | 955 | */ |
969 | XFS_BUF_SET_REF(bp, XFS_INO_REF); | 956 | XFS_BUF_SET_REF(bp, XFS_INO_REF); |
970 | 957 | ||
971 | /* | 958 | /* |
972 | * Use xfs_trans_brelse() to release the buffer containing the | 959 | * Use xfs_trans_brelse() to release the buffer containing the |
973 | * on-disk inode, because it was acquired with xfs_trans_read_buf() | 960 | * on-disk inode, because it was acquired with xfs_trans_read_buf() |
974 | * in xfs_itobp() above. If tp is NULL, this is just a normal | 961 | * in xfs_itobp() above. If tp is NULL, this is just a normal |
975 | * brelse(). If we're within a transaction, then xfs_trans_brelse() | 962 | * brelse(). If we're within a transaction, then xfs_trans_brelse() |
976 | * will only release the buffer if it is not dirty within the | 963 | * will only release the buffer if it is not dirty within the |
977 | * transaction. It will be OK to release the buffer in this case, | 964 | * transaction. It will be OK to release the buffer in this case, |
978 | * because inodes on disk are never destroyed and we will be | 965 | * because inodes on disk are never destroyed and we will be |
979 | * locking the new in-core inode before putting it in the hash | 966 | * locking the new in-core inode before putting it in the hash |
980 | * table where other processes can find it. Thus we don't have | 967 | * table where other processes can find it. Thus we don't have |
981 | * to worry about the inode being changed just because we released | 968 | * to worry about the inode being changed just because we released |
982 | * the buffer. | 969 | * the buffer. |
983 | */ | 970 | */ |
984 | xfs_trans_brelse(tp, bp); | 971 | xfs_trans_brelse(tp, bp); |
985 | *ipp = ip; | 972 | *ipp = ip; |
986 | return 0; | 973 | return 0; |
987 | 974 | ||
988 | out_brelse: | 975 | out_brelse: |
989 | xfs_trans_brelse(tp, bp); | 976 | xfs_trans_brelse(tp, bp); |
990 | out_destroy_inode: | 977 | out_destroy_inode: |
991 | xfs_destroy_inode(ip); | 978 | xfs_destroy_inode(ip); |
992 | return error; | 979 | return error; |
993 | } | 980 | } |
994 | 981 | ||
995 | /* | 982 | /* |
996 | * Read in extents from a btree-format inode. | 983 | * Read in extents from a btree-format inode. |
997 | * Allocate and fill in if_extents. Real work is done in xfs_bmap.c. | 984 | * Allocate and fill in if_extents. Real work is done in xfs_bmap.c. |
998 | */ | 985 | */ |
999 | int | 986 | int |
1000 | xfs_iread_extents( | 987 | xfs_iread_extents( |
1001 | xfs_trans_t *tp, | 988 | xfs_trans_t *tp, |
1002 | xfs_inode_t *ip, | 989 | xfs_inode_t *ip, |
1003 | int whichfork) | 990 | int whichfork) |
1004 | { | 991 | { |
1005 | int error; | 992 | int error; |
1006 | xfs_ifork_t *ifp; | 993 | xfs_ifork_t *ifp; |
1007 | xfs_extnum_t nextents; | 994 | xfs_extnum_t nextents; |
1008 | size_t size; | 995 | size_t size; |
1009 | 996 | ||
1010 | if (unlikely(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE)) { | 997 | if (unlikely(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE)) { |
1011 | XFS_ERROR_REPORT("xfs_iread_extents", XFS_ERRLEVEL_LOW, | 998 | XFS_ERROR_REPORT("xfs_iread_extents", XFS_ERRLEVEL_LOW, |
1012 | ip->i_mount); | 999 | ip->i_mount); |
1013 | return XFS_ERROR(EFSCORRUPTED); | 1000 | return XFS_ERROR(EFSCORRUPTED); |
1014 | } | 1001 | } |
1015 | nextents = XFS_IFORK_NEXTENTS(ip, whichfork); | 1002 | nextents = XFS_IFORK_NEXTENTS(ip, whichfork); |
1016 | size = nextents * sizeof(xfs_bmbt_rec_t); | 1003 | size = nextents * sizeof(xfs_bmbt_rec_t); |
1017 | ifp = XFS_IFORK_PTR(ip, whichfork); | 1004 | ifp = XFS_IFORK_PTR(ip, whichfork); |
1018 | 1005 | ||
1019 | /* | 1006 | /* |
1020 | * We know that the size is valid (it's checked in iformat_btree) | 1007 | * We know that the size is valid (it's checked in iformat_btree) |
1021 | */ | 1008 | */ |
1022 | ifp->if_lastex = NULLEXTNUM; | 1009 | ifp->if_lastex = NULLEXTNUM; |
1023 | ifp->if_bytes = ifp->if_real_bytes = 0; | 1010 | ifp->if_bytes = ifp->if_real_bytes = 0; |
1024 | ifp->if_flags |= XFS_IFEXTENTS; | 1011 | ifp->if_flags |= XFS_IFEXTENTS; |
1025 | xfs_iext_add(ifp, 0, nextents); | 1012 | xfs_iext_add(ifp, 0, nextents); |
1026 | error = xfs_bmap_read_extents(tp, ip, whichfork); | 1013 | error = xfs_bmap_read_extents(tp, ip, whichfork); |
1027 | if (error) { | 1014 | if (error) { |
1028 | xfs_iext_destroy(ifp); | 1015 | xfs_iext_destroy(ifp); |
1029 | ifp->if_flags &= ~XFS_IFEXTENTS; | 1016 | ifp->if_flags &= ~XFS_IFEXTENTS; |
1030 | return error; | 1017 | return error; |
1031 | } | 1018 | } |
1032 | xfs_validate_extents(ifp, nextents, XFS_EXTFMT_INODE(ip)); | 1019 | xfs_validate_extents(ifp, nextents, XFS_EXTFMT_INODE(ip)); |
1033 | return 0; | 1020 | return 0; |
1034 | } | 1021 | } |
1035 | 1022 | ||
1036 | /* | 1023 | /* |
1037 | * Allocate an inode on disk and return a copy of its in-core version. | 1024 | * Allocate an inode on disk and return a copy of its in-core version. |
1038 | * The in-core inode is locked exclusively. Set mode, nlink, and rdev | 1025 | * The in-core inode is locked exclusively. Set mode, nlink, and rdev |
1039 | * appropriately within the inode. The uid and gid for the inode are | 1026 | * appropriately within the inode. The uid and gid for the inode are |
1040 | * set according to the contents of the given cred structure. | 1027 | * set according to the contents of the given cred structure. |
1041 | * | 1028 | * |
1042 | * Use xfs_dialloc() to allocate the on-disk inode. If xfs_dialloc() | 1029 | * Use xfs_dialloc() to allocate the on-disk inode. If xfs_dialloc() |
1043 | * has a free inode available, call xfs_iget() | 1030 | * has a free inode available, call xfs_iget() |
1044 | * to obtain the in-core version of the allocated inode. Finally, | 1031 | * to obtain the in-core version of the allocated inode. Finally, |
1045 | * fill in the inode and log its initial contents. In this case, | 1032 | * fill in the inode and log its initial contents. In this case, |
1046 | * ialloc_context would be set to NULL and call_again set to false. | 1033 | * ialloc_context would be set to NULL and call_again set to false. |
1047 | * | 1034 | * |
1048 | * If xfs_dialloc() does not have an available inode, | 1035 | * If xfs_dialloc() does not have an available inode, |
1049 | * it will replenish its supply by doing an allocation. Since we can | 1036 | * it will replenish its supply by doing an allocation. Since we can |
1050 | * only do one allocation within a transaction without deadlocks, we | 1037 | * only do one allocation within a transaction without deadlocks, we |
1051 | * must commit the current transaction before returning the inode itself. | 1038 | * must commit the current transaction before returning the inode itself. |
1052 | * In this case, therefore, we will set call_again to true and return. | 1039 | * In this case, therefore, we will set call_again to true and return. |
1053 | * The caller should then commit the current transaction, start a new | 1040 | * The caller should then commit the current transaction, start a new |
1054 | * transaction, and call xfs_ialloc() again to actually get the inode. | 1041 | * transaction, and call xfs_ialloc() again to actually get the inode. |
1055 | * | 1042 | * |
1056 | * To ensure that some other process does not grab the inode that | 1043 | * To ensure that some other process does not grab the inode that |
1057 | * was allocated during the first call to xfs_ialloc(), this routine | 1044 | * was allocated during the first call to xfs_ialloc(), this routine |
1058 | * also returns the [locked] bp pointing to the head of the freelist | 1045 | * also returns the [locked] bp pointing to the head of the freelist |
1059 | * as ialloc_context. The caller should hold this buffer across | 1046 | * as ialloc_context. The caller should hold this buffer across |
1060 | * the commit and pass it back into this routine on the second call. | 1047 | * the commit and pass it back into this routine on the second call. |
1061 | * | 1048 | * |
1062 | * If we are allocating quota inodes, we do not have a parent inode | 1049 | * If we are allocating quota inodes, we do not have a parent inode |
1063 | * to attach to or associate with (i.e. pip == NULL) because they | 1050 | * to attach to or associate with (i.e. pip == NULL) because they |
1064 | * are not linked into the directory structure - they are attached | 1051 | * are not linked into the directory structure - they are attached |
1065 | * directly to the superblock - and so have no parent. | 1052 | * directly to the superblock - and so have no parent. |
1066 | */ | 1053 | */ |
1067 | int | 1054 | int |
1068 | xfs_ialloc( | 1055 | xfs_ialloc( |
1069 | xfs_trans_t *tp, | 1056 | xfs_trans_t *tp, |
1070 | xfs_inode_t *pip, | 1057 | xfs_inode_t *pip, |
1071 | mode_t mode, | 1058 | mode_t mode, |
1072 | xfs_nlink_t nlink, | 1059 | xfs_nlink_t nlink, |
1073 | xfs_dev_t rdev, | 1060 | xfs_dev_t rdev, |
1074 | cred_t *cr, | 1061 | cred_t *cr, |
1075 | xfs_prid_t prid, | 1062 | xfs_prid_t prid, |
1076 | int okalloc, | 1063 | int okalloc, |
1077 | xfs_buf_t **ialloc_context, | 1064 | xfs_buf_t **ialloc_context, |
1078 | boolean_t *call_again, | 1065 | boolean_t *call_again, |
1079 | xfs_inode_t **ipp) | 1066 | xfs_inode_t **ipp) |
1080 | { | 1067 | { |
1081 | xfs_ino_t ino; | 1068 | xfs_ino_t ino; |
1082 | xfs_inode_t *ip; | 1069 | xfs_inode_t *ip; |
1083 | uint flags; | 1070 | uint flags; |
1084 | int error; | 1071 | int error; |
1085 | timespec_t tv; | 1072 | timespec_t tv; |
1086 | int filestreams = 0; | 1073 | int filestreams = 0; |
1087 | 1074 | ||
1088 | /* | 1075 | /* |
1089 | * Call the space management code to pick | 1076 | * Call the space management code to pick |
1090 | * the on-disk inode to be allocated. | 1077 | * the on-disk inode to be allocated. |
1091 | */ | 1078 | */ |
1092 | error = xfs_dialloc(tp, pip ? pip->i_ino : 0, mode, okalloc, | 1079 | error = xfs_dialloc(tp, pip ? pip->i_ino : 0, mode, okalloc, |
1093 | ialloc_context, call_again, &ino); | 1080 | ialloc_context, call_again, &ino); |
1094 | if (error) | 1081 | if (error) |
1095 | return error; | 1082 | return error; |
1096 | if (*call_again || ino == NULLFSINO) { | 1083 | if (*call_again || ino == NULLFSINO) { |
1097 | *ipp = NULL; | 1084 | *ipp = NULL; |
1098 | return 0; | 1085 | return 0; |
1099 | } | 1086 | } |
1100 | ASSERT(*ialloc_context == NULL); | 1087 | ASSERT(*ialloc_context == NULL); |
1101 | 1088 | ||
1102 | /* | 1089 | /* |
1103 | * Get the in-core inode with the lock held exclusively. | 1090 | * Get the in-core inode with the lock held exclusively. |
1104 | * This is because we're setting fields here we need | 1091 | * This is because we're setting fields here we need |
1105 | * to prevent others from looking at until we're done. | 1092 | * to prevent others from looking at until we're done. |
1106 | */ | 1093 | */ |
1107 | error = xfs_trans_iget(tp->t_mountp, tp, ino, | 1094 | error = xfs_trans_iget(tp->t_mountp, tp, ino, |
1108 | XFS_IGET_CREATE, XFS_ILOCK_EXCL, &ip); | 1095 | XFS_IGET_CREATE, XFS_ILOCK_EXCL, &ip); |
1109 | if (error) | 1096 | if (error) |
1110 | return error; | 1097 | return error; |
1111 | ASSERT(ip != NULL); | 1098 | ASSERT(ip != NULL); |
1112 | 1099 | ||
1113 | ip->i_d.di_mode = (__uint16_t)mode; | 1100 | ip->i_d.di_mode = (__uint16_t)mode; |
1114 | ip->i_d.di_onlink = 0; | 1101 | ip->i_d.di_onlink = 0; |
1115 | ip->i_d.di_nlink = nlink; | 1102 | ip->i_d.di_nlink = nlink; |
1116 | ASSERT(ip->i_d.di_nlink == nlink); | 1103 | ASSERT(ip->i_d.di_nlink == nlink); |
1117 | ip->i_d.di_uid = current_fsuid(); | 1104 | ip->i_d.di_uid = current_fsuid(); |
1118 | ip->i_d.di_gid = current_fsgid(); | 1105 | ip->i_d.di_gid = current_fsgid(); |
1119 | ip->i_d.di_projid = prid; | 1106 | ip->i_d.di_projid = prid; |
1120 | memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad)); | 1107 | memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad)); |
1121 | 1108 | ||
1122 | /* | 1109 | /* |
1123 | * If the superblock version is up to where we support new format | 1110 | * If the superblock version is up to where we support new format |
1124 | * inodes and this is currently an old format inode, then change | 1111 | * inodes and this is currently an old format inode, then change |
1125 | * the inode version number now. This way we only do the conversion | 1112 | * the inode version number now. This way we only do the conversion |
1126 | * here rather than here and in the flush/logging code. | 1113 | * here rather than here and in the flush/logging code. |
1127 | */ | 1114 | */ |
1128 | if (xfs_sb_version_hasnlink(&tp->t_mountp->m_sb) && | 1115 | if (xfs_sb_version_hasnlink(&tp->t_mountp->m_sb) && |
1129 | ip->i_d.di_version == 1) { | 1116 | ip->i_d.di_version == 1) { |
1130 | ip->i_d.di_version = 2; | 1117 | ip->i_d.di_version = 2; |
1131 | /* | 1118 | /* |
1132 | * We've already zeroed the old link count, the projid field, | 1119 | * We've already zeroed the old link count, the projid field, |
1133 | * and the pad field. | 1120 | * and the pad field. |
1134 | */ | 1121 | */ |
1135 | } | 1122 | } |
1136 | 1123 | ||
1137 | /* | 1124 | /* |
1138 | * Project ids won't be stored on disk if we are using a version 1 inode. | 1125 | * Project ids won't be stored on disk if we are using a version 1 inode. |
1139 | */ | 1126 | */ |
1140 | if ((prid != 0) && (ip->i_d.di_version == 1)) | 1127 | if ((prid != 0) && (ip->i_d.di_version == 1)) |
1141 | xfs_bump_ino_vers2(tp, ip); | 1128 | xfs_bump_ino_vers2(tp, ip); |
1142 | 1129 | ||
1143 | if (pip && XFS_INHERIT_GID(pip)) { | 1130 | if (pip && XFS_INHERIT_GID(pip)) { |
1144 | ip->i_d.di_gid = pip->i_d.di_gid; | 1131 | ip->i_d.di_gid = pip->i_d.di_gid; |
1145 | if ((pip->i_d.di_mode & S_ISGID) && (mode & S_IFMT) == S_IFDIR) { | 1132 | if ((pip->i_d.di_mode & S_ISGID) && (mode & S_IFMT) == S_IFDIR) { |
1146 | ip->i_d.di_mode |= S_ISGID; | 1133 | ip->i_d.di_mode |= S_ISGID; |
1147 | } | 1134 | } |
1148 | } | 1135 | } |
1149 | 1136 | ||
1150 | /* | 1137 | /* |
1151 | * If the group ID of the new file does not match the effective group | 1138 | * If the group ID of the new file does not match the effective group |
1152 | * ID or one of the supplementary group IDs, the S_ISGID bit is cleared | 1139 | * ID or one of the supplementary group IDs, the S_ISGID bit is cleared |
1153 | * (and only if the irix_sgid_inherit compatibility variable is set). | 1140 | * (and only if the irix_sgid_inherit compatibility variable is set). |
1154 | */ | 1141 | */ |
1155 | if ((irix_sgid_inherit) && | 1142 | if ((irix_sgid_inherit) && |
1156 | (ip->i_d.di_mode & S_ISGID) && | 1143 | (ip->i_d.di_mode & S_ISGID) && |
1157 | (!in_group_p((gid_t)ip->i_d.di_gid))) { | 1144 | (!in_group_p((gid_t)ip->i_d.di_gid))) { |
1158 | ip->i_d.di_mode &= ~S_ISGID; | 1145 | ip->i_d.di_mode &= ~S_ISGID; |
1159 | } | 1146 | } |
1160 | 1147 | ||
1161 | ip->i_d.di_size = 0; | 1148 | ip->i_d.di_size = 0; |
1162 | ip->i_size = 0; | 1149 | ip->i_size = 0; |
1163 | ip->i_d.di_nextents = 0; | 1150 | ip->i_d.di_nextents = 0; |
1164 | ASSERT(ip->i_d.di_nblocks == 0); | 1151 | ASSERT(ip->i_d.di_nblocks == 0); |
1165 | 1152 | ||
1166 | nanotime(&tv); | 1153 | nanotime(&tv); |
1167 | ip->i_d.di_mtime.t_sec = (__int32_t)tv.tv_sec; | 1154 | ip->i_d.di_mtime.t_sec = (__int32_t)tv.tv_sec; |
1168 | ip->i_d.di_mtime.t_nsec = (__int32_t)tv.tv_nsec; | 1155 | ip->i_d.di_mtime.t_nsec = (__int32_t)tv.tv_nsec; |
1169 | ip->i_d.di_atime = ip->i_d.di_mtime; | 1156 | ip->i_d.di_atime = ip->i_d.di_mtime; |
1170 | ip->i_d.di_ctime = ip->i_d.di_mtime; | 1157 | ip->i_d.di_ctime = ip->i_d.di_mtime; |
1171 | 1158 | ||
1172 | /* | 1159 | /* |
1173 | * di_gen will have been taken care of in xfs_iread. | 1160 | * di_gen will have been taken care of in xfs_iread. |
1174 | */ | 1161 | */ |
1175 | ip->i_d.di_extsize = 0; | 1162 | ip->i_d.di_extsize = 0; |
1176 | ip->i_d.di_dmevmask = 0; | 1163 | ip->i_d.di_dmevmask = 0; |
1177 | ip->i_d.di_dmstate = 0; | 1164 | ip->i_d.di_dmstate = 0; |
1178 | ip->i_d.di_flags = 0; | 1165 | ip->i_d.di_flags = 0; |
1179 | flags = XFS_ILOG_CORE; | 1166 | flags = XFS_ILOG_CORE; |
1180 | switch (mode & S_IFMT) { | 1167 | switch (mode & S_IFMT) { |
1181 | case S_IFIFO: | 1168 | case S_IFIFO: |
1182 | case S_IFCHR: | 1169 | case S_IFCHR: |
1183 | case S_IFBLK: | 1170 | case S_IFBLK: |
1184 | case S_IFSOCK: | 1171 | case S_IFSOCK: |
1185 | ip->i_d.di_format = XFS_DINODE_FMT_DEV; | 1172 | ip->i_d.di_format = XFS_DINODE_FMT_DEV; |
1186 | ip->i_df.if_u2.if_rdev = rdev; | 1173 | ip->i_df.if_u2.if_rdev = rdev; |
1187 | ip->i_df.if_flags = 0; | 1174 | ip->i_df.if_flags = 0; |
1188 | flags |= XFS_ILOG_DEV; | 1175 | flags |= XFS_ILOG_DEV; |
1189 | break; | 1176 | break; |
1190 | case S_IFREG: | 1177 | case S_IFREG: |
1191 | /* | 1178 | /* |
1192 | * we can't set up filestreams until after the VFS inode | 1179 | * we can't set up filestreams until after the VFS inode |
1193 | * is set up properly. | 1180 | * is set up properly. |
1194 | */ | 1181 | */ |
1195 | if (pip && xfs_inode_is_filestream(pip)) | 1182 | if (pip && xfs_inode_is_filestream(pip)) |
1196 | filestreams = 1; | 1183 | filestreams = 1; |
1197 | /* fall through */ | 1184 | /* fall through */ |
1198 | case S_IFDIR: | 1185 | case S_IFDIR: |
1199 | if (pip && (pip->i_d.di_flags & XFS_DIFLAG_ANY)) { | 1186 | if (pip && (pip->i_d.di_flags & XFS_DIFLAG_ANY)) { |
1200 | uint di_flags = 0; | 1187 | uint di_flags = 0; |
1201 | 1188 | ||
1202 | if ((mode & S_IFMT) == S_IFDIR) { | 1189 | if ((mode & S_IFMT) == S_IFDIR) { |
1203 | if (pip->i_d.di_flags & XFS_DIFLAG_RTINHERIT) | 1190 | if (pip->i_d.di_flags & XFS_DIFLAG_RTINHERIT) |
1204 | di_flags |= XFS_DIFLAG_RTINHERIT; | 1191 | di_flags |= XFS_DIFLAG_RTINHERIT; |
1205 | if (pip->i_d.di_flags & XFS_DIFLAG_EXTSZINHERIT) { | 1192 | if (pip->i_d.di_flags & XFS_DIFLAG_EXTSZINHERIT) { |
1206 | di_flags |= XFS_DIFLAG_EXTSZINHERIT; | 1193 | di_flags |= XFS_DIFLAG_EXTSZINHERIT; |
1207 | ip->i_d.di_extsize = pip->i_d.di_extsize; | 1194 | ip->i_d.di_extsize = pip->i_d.di_extsize; |
1208 | } | 1195 | } |
1209 | } else if ((mode & S_IFMT) == S_IFREG) { | 1196 | } else if ((mode & S_IFMT) == S_IFREG) { |
1210 | if (pip->i_d.di_flags & XFS_DIFLAG_RTINHERIT) | 1197 | if (pip->i_d.di_flags & XFS_DIFLAG_RTINHERIT) |
1211 | di_flags |= XFS_DIFLAG_REALTIME; | 1198 | di_flags |= XFS_DIFLAG_REALTIME; |
1212 | if (pip->i_d.di_flags & XFS_DIFLAG_EXTSZINHERIT) { | 1199 | if (pip->i_d.di_flags & XFS_DIFLAG_EXTSZINHERIT) { |
1213 | di_flags |= XFS_DIFLAG_EXTSIZE; | 1200 | di_flags |= XFS_DIFLAG_EXTSIZE; |
1214 | ip->i_d.di_extsize = pip->i_d.di_extsize; | 1201 | ip->i_d.di_extsize = pip->i_d.di_extsize; |
1215 | } | 1202 | } |
1216 | } | 1203 | } |
1217 | if ((pip->i_d.di_flags & XFS_DIFLAG_NOATIME) && | 1204 | if ((pip->i_d.di_flags & XFS_DIFLAG_NOATIME) && |
1218 | xfs_inherit_noatime) | 1205 | xfs_inherit_noatime) |
1219 | di_flags |= XFS_DIFLAG_NOATIME; | 1206 | di_flags |= XFS_DIFLAG_NOATIME; |
1220 | if ((pip->i_d.di_flags & XFS_DIFLAG_NODUMP) && | 1207 | if ((pip->i_d.di_flags & XFS_DIFLAG_NODUMP) && |
1221 | xfs_inherit_nodump) | 1208 | xfs_inherit_nodump) |
1222 | di_flags |= XFS_DIFLAG_NODUMP; | 1209 | di_flags |= XFS_DIFLAG_NODUMP; |
1223 | if ((pip->i_d.di_flags & XFS_DIFLAG_SYNC) && | 1210 | if ((pip->i_d.di_flags & XFS_DIFLAG_SYNC) && |
1224 | xfs_inherit_sync) | 1211 | xfs_inherit_sync) |
1225 | di_flags |= XFS_DIFLAG_SYNC; | 1212 | di_flags |= XFS_DIFLAG_SYNC; |
1226 | if ((pip->i_d.di_flags & XFS_DIFLAG_NOSYMLINKS) && | 1213 | if ((pip->i_d.di_flags & XFS_DIFLAG_NOSYMLINKS) && |
1227 | xfs_inherit_nosymlinks) | 1214 | xfs_inherit_nosymlinks) |
1228 | di_flags |= XFS_DIFLAG_NOSYMLINKS; | 1215 | di_flags |= XFS_DIFLAG_NOSYMLINKS; |
1229 | if (pip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) | 1216 | if (pip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) |
1230 | di_flags |= XFS_DIFLAG_PROJINHERIT; | 1217 | di_flags |= XFS_DIFLAG_PROJINHERIT; |
1231 | if ((pip->i_d.di_flags & XFS_DIFLAG_NODEFRAG) && | 1218 | if ((pip->i_d.di_flags & XFS_DIFLAG_NODEFRAG) && |
1232 | xfs_inherit_nodefrag) | 1219 | xfs_inherit_nodefrag) |
1233 | di_flags |= XFS_DIFLAG_NODEFRAG; | 1220 | di_flags |= XFS_DIFLAG_NODEFRAG; |
1234 | if (pip->i_d.di_flags & XFS_DIFLAG_FILESTREAM) | 1221 | if (pip->i_d.di_flags & XFS_DIFLAG_FILESTREAM) |
1235 | di_flags |= XFS_DIFLAG_FILESTREAM; | 1222 | di_flags |= XFS_DIFLAG_FILESTREAM; |
1236 | ip->i_d.di_flags |= di_flags; | 1223 | ip->i_d.di_flags |= di_flags; |
1237 | } | 1224 | } |
1238 | /* FALLTHROUGH */ | 1225 | /* FALLTHROUGH */ |
1239 | case S_IFLNK: | 1226 | case S_IFLNK: |
1240 | ip->i_d.di_format = XFS_DINODE_FMT_EXTENTS; | 1227 | ip->i_d.di_format = XFS_DINODE_FMT_EXTENTS; |
1241 | ip->i_df.if_flags = XFS_IFEXTENTS; | 1228 | ip->i_df.if_flags = XFS_IFEXTENTS; |
1242 | ip->i_df.if_bytes = ip->i_df.if_real_bytes = 0; | 1229 | ip->i_df.if_bytes = ip->i_df.if_real_bytes = 0; |
1243 | ip->i_df.if_u1.if_extents = NULL; | 1230 | ip->i_df.if_u1.if_extents = NULL; |
1244 | break; | 1231 | break; |
1245 | default: | 1232 | default: |
1246 | ASSERT(0); | 1233 | ASSERT(0); |
1247 | } | 1234 | } |
1248 | /* | 1235 | /* |
1249 | * Attribute fork settings for new inode. | 1236 | * Attribute fork settings for new inode. |
1250 | */ | 1237 | */ |
1251 | ip->i_d.di_aformat = XFS_DINODE_FMT_EXTENTS; | 1238 | ip->i_d.di_aformat = XFS_DINODE_FMT_EXTENTS; |
1252 | ip->i_d.di_anextents = 0; | 1239 | ip->i_d.di_anextents = 0; |
1253 | 1240 | ||
1254 | /* | 1241 | /* |
1255 | * Log the new values stuffed into the inode. | 1242 | * Log the new values stuffed into the inode. |
1256 | */ | 1243 | */ |
1257 | xfs_trans_log_inode(tp, ip, flags); | 1244 | xfs_trans_log_inode(tp, ip, flags); |
1258 | 1245 | ||
1259 | /* now that we have an i_mode we can setup inode ops and unlock */ | 1246 | /* now that we have an i_mode we can setup inode ops and unlock */ |
1260 | xfs_setup_inode(ip); | 1247 | xfs_setup_inode(ip); |
1261 | 1248 | ||
1262 | /* now we have set up the vfs inode we can associate the filestream */ | 1249 | /* now we have set up the vfs inode we can associate the filestream */ |
1263 | if (filestreams) { | 1250 | if (filestreams) { |
1264 | error = xfs_filestream_associate(pip, ip); | 1251 | error = xfs_filestream_associate(pip, ip); |
1265 | if (error < 0) | 1252 | if (error < 0) |
1266 | return -error; | 1253 | return -error; |
1267 | if (!error) | 1254 | if (!error) |
1268 | xfs_iflags_set(ip, XFS_IFILESTREAM); | 1255 | xfs_iflags_set(ip, XFS_IFILESTREAM); |
1269 | } | 1256 | } |
1270 | 1257 | ||
1271 | *ipp = ip; | 1258 | *ipp = ip; |
1272 | return 0; | 1259 | return 0; |
1273 | } | 1260 | } |
1274 | 1261 | ||
1275 | /* | 1262 | /* |
1276 | * Check to make sure that there are no blocks allocated to the | 1263 | * Check to make sure that there are no blocks allocated to the |
1277 | * file beyond the size of the file. We don't check this for | 1264 | * file beyond the size of the file. We don't check this for |
1278 | * files with fixed size extents or real time extents, but we | 1265 | * files with fixed size extents or real time extents, but we |
1279 | * at least do it for regular files. | 1266 | * at least do it for regular files. |
1280 | */ | 1267 | */ |
1281 | #ifdef DEBUG | 1268 | #ifdef DEBUG |
1282 | void | 1269 | void |
1283 | xfs_isize_check( | 1270 | xfs_isize_check( |
1284 | xfs_mount_t *mp, | 1271 | xfs_mount_t *mp, |
1285 | xfs_inode_t *ip, | 1272 | xfs_inode_t *ip, |
1286 | xfs_fsize_t isize) | 1273 | xfs_fsize_t isize) |
1287 | { | 1274 | { |
1288 | xfs_fileoff_t map_first; | 1275 | xfs_fileoff_t map_first; |
1289 | int nimaps; | 1276 | int nimaps; |
1290 | xfs_bmbt_irec_t imaps[2]; | 1277 | xfs_bmbt_irec_t imaps[2]; |
1291 | 1278 | ||
1292 | if ((ip->i_d.di_mode & S_IFMT) != S_IFREG) | 1279 | if ((ip->i_d.di_mode & S_IFMT) != S_IFREG) |
1293 | return; | 1280 | return; |
1294 | 1281 | ||
1295 | if (XFS_IS_REALTIME_INODE(ip)) | 1282 | if (XFS_IS_REALTIME_INODE(ip)) |
1296 | return; | 1283 | return; |
1297 | 1284 | ||
1298 | if (ip->i_d.di_flags & XFS_DIFLAG_EXTSIZE) | 1285 | if (ip->i_d.di_flags & XFS_DIFLAG_EXTSIZE) |
1299 | return; | 1286 | return; |
1300 | 1287 | ||
1301 | nimaps = 2; | 1288 | nimaps = 2; |
1302 | map_first = XFS_B_TO_FSB(mp, (xfs_ufsize_t)isize); | 1289 | map_first = XFS_B_TO_FSB(mp, (xfs_ufsize_t)isize); |
1303 | /* | 1290 | /* |
1304 | * The filesystem could be shutting down, so bmapi may return | 1291 | * The filesystem could be shutting down, so bmapi may return |
1305 | * an error. | 1292 | * an error. |
1306 | */ | 1293 | */ |
1307 | if (xfs_bmapi(NULL, ip, map_first, | 1294 | if (xfs_bmapi(NULL, ip, map_first, |
1308 | (XFS_B_TO_FSB(mp, | 1295 | (XFS_B_TO_FSB(mp, |
1309 | (xfs_ufsize_t)XFS_MAXIOFFSET(mp)) - | 1296 | (xfs_ufsize_t)XFS_MAXIOFFSET(mp)) - |
1310 | map_first), | 1297 | map_first), |
1311 | XFS_BMAPI_ENTIRE, NULL, 0, imaps, &nimaps, | 1298 | XFS_BMAPI_ENTIRE, NULL, 0, imaps, &nimaps, |
1312 | NULL, NULL)) | 1299 | NULL, NULL)) |
1313 | return; | 1300 | return; |
1314 | ASSERT(nimaps == 1); | 1301 | ASSERT(nimaps == 1); |
1315 | ASSERT(imaps[0].br_startblock == HOLESTARTBLOCK); | 1302 | ASSERT(imaps[0].br_startblock == HOLESTARTBLOCK); |
1316 | } | 1303 | } |
1317 | #endif /* DEBUG */ | 1304 | #endif /* DEBUG */ |
1318 | 1305 | ||
1319 | /* | 1306 | /* |
1320 | * Calculate the last possible buffered byte in a file. This must | 1307 | * Calculate the last possible buffered byte in a file. This must |
1321 | * include data that was buffered beyond the EOF by the write code. | 1308 | * include data that was buffered beyond the EOF by the write code. |
1322 | * This also needs to deal with overflowing the xfs_fsize_t type | 1309 | * This also needs to deal with overflowing the xfs_fsize_t type |
1323 | * which can happen for sizes near the limit. | 1310 | * which can happen for sizes near the limit. |
1324 | * | 1311 | * |
1325 | * We also need to take into account any blocks beyond the EOF. It | 1312 | * We also need to take into account any blocks beyond the EOF. It |
1326 | * may be the case that they were buffered by a write which failed. | 1313 | * may be the case that they were buffered by a write which failed. |
1327 | * In that case the pages will still be in memory, but the inode size | 1314 | * In that case the pages will still be in memory, but the inode size |
1328 | * will never have been updated. | 1315 | * will never have been updated. |
1329 | */ | 1316 | */ |
1330 | xfs_fsize_t | 1317 | xfs_fsize_t |
1331 | xfs_file_last_byte( | 1318 | xfs_file_last_byte( |
1332 | xfs_inode_t *ip) | 1319 | xfs_inode_t *ip) |
1333 | { | 1320 | { |
1334 | xfs_mount_t *mp; | 1321 | xfs_mount_t *mp; |
1335 | xfs_fsize_t last_byte; | 1322 | xfs_fsize_t last_byte; |
1336 | xfs_fileoff_t last_block; | 1323 | xfs_fileoff_t last_block; |
1337 | xfs_fileoff_t size_last_block; | 1324 | xfs_fileoff_t size_last_block; |
1338 | int error; | 1325 | int error; |
1339 | 1326 | ||
1340 | ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL|XFS_IOLOCK_SHARED)); | 1327 | ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL|XFS_IOLOCK_SHARED)); |
1341 | 1328 | ||
1342 | mp = ip->i_mount; | 1329 | mp = ip->i_mount; |
1343 | /* | 1330 | /* |
1344 | * Only check for blocks beyond the EOF if the extents have | 1331 | * Only check for blocks beyond the EOF if the extents have |
1345 | * been read in. This eliminates the need for the inode lock, | 1332 | * been read in. This eliminates the need for the inode lock, |
1346 | * and it also saves us from looking when it really isn't | 1333 | * and it also saves us from looking when it really isn't |
1347 | * necessary. | 1334 | * necessary. |
1348 | */ | 1335 | */ |
1349 | if (ip->i_df.if_flags & XFS_IFEXTENTS) { | 1336 | if (ip->i_df.if_flags & XFS_IFEXTENTS) { |
1350 | error = xfs_bmap_last_offset(NULL, ip, &last_block, | 1337 | error = xfs_bmap_last_offset(NULL, ip, &last_block, |
1351 | XFS_DATA_FORK); | 1338 | XFS_DATA_FORK); |
1352 | if (error) { | 1339 | if (error) { |
1353 | last_block = 0; | 1340 | last_block = 0; |
1354 | } | 1341 | } |
1355 | } else { | 1342 | } else { |
1356 | last_block = 0; | 1343 | last_block = 0; |
1357 | } | 1344 | } |
1358 | size_last_block = XFS_B_TO_FSB(mp, (xfs_ufsize_t)ip->i_size); | 1345 | size_last_block = XFS_B_TO_FSB(mp, (xfs_ufsize_t)ip->i_size); |
1359 | last_block = XFS_FILEOFF_MAX(last_block, size_last_block); | 1346 | last_block = XFS_FILEOFF_MAX(last_block, size_last_block); |
1360 | 1347 | ||
1361 | last_byte = XFS_FSB_TO_B(mp, last_block); | 1348 | last_byte = XFS_FSB_TO_B(mp, last_block); |
1362 | if (last_byte < 0) { | 1349 | if (last_byte < 0) { |
1363 | return XFS_MAXIOFFSET(mp); | 1350 | return XFS_MAXIOFFSET(mp); |
1364 | } | 1351 | } |
1365 | last_byte += (1 << mp->m_writeio_log); | 1352 | last_byte += (1 << mp->m_writeio_log); |
1366 | if (last_byte < 0) { | 1353 | if (last_byte < 0) { |
1367 | return XFS_MAXIOFFSET(mp); | 1354 | return XFS_MAXIOFFSET(mp); |
1368 | } | 1355 | } |
1369 | return last_byte; | 1356 | return last_byte; |
1370 | } | 1357 | } |
1371 | 1358 | ||
1372 | #if defined(XFS_RW_TRACE) | 1359 | #if defined(XFS_RW_TRACE) |
1373 | STATIC void | 1360 | STATIC void |
1374 | xfs_itrunc_trace( | 1361 | xfs_itrunc_trace( |
1375 | int tag, | 1362 | int tag, |
1376 | xfs_inode_t *ip, | 1363 | xfs_inode_t *ip, |
1377 | int flag, | 1364 | int flag, |
1378 | xfs_fsize_t new_size, | 1365 | xfs_fsize_t new_size, |
1379 | xfs_off_t toss_start, | 1366 | xfs_off_t toss_start, |
1380 | xfs_off_t toss_finish) | 1367 | xfs_off_t toss_finish) |
1381 | { | 1368 | { |
1382 | if (ip->i_rwtrace == NULL) { | 1369 | if (ip->i_rwtrace == NULL) { |
1383 | return; | 1370 | return; |
1384 | } | 1371 | } |
1385 | 1372 | ||
1386 | ktrace_enter(ip->i_rwtrace, | 1373 | ktrace_enter(ip->i_rwtrace, |
1387 | (void*)((long)tag), | 1374 | (void*)((long)tag), |
1388 | (void*)ip, | 1375 | (void*)ip, |
1389 | (void*)(unsigned long)((ip->i_d.di_size >> 32) & 0xffffffff), | 1376 | (void*)(unsigned long)((ip->i_d.di_size >> 32) & 0xffffffff), |
1390 | (void*)(unsigned long)(ip->i_d.di_size & 0xffffffff), | 1377 | (void*)(unsigned long)(ip->i_d.di_size & 0xffffffff), |
1391 | (void*)((long)flag), | 1378 | (void*)((long)flag), |
1392 | (void*)(unsigned long)((new_size >> 32) & 0xffffffff), | 1379 | (void*)(unsigned long)((new_size >> 32) & 0xffffffff), |
1393 | (void*)(unsigned long)(new_size & 0xffffffff), | 1380 | (void*)(unsigned long)(new_size & 0xffffffff), |
1394 | (void*)(unsigned long)((toss_start >> 32) & 0xffffffff), | 1381 | (void*)(unsigned long)((toss_start >> 32) & 0xffffffff), |
1395 | (void*)(unsigned long)(toss_start & 0xffffffff), | 1382 | (void*)(unsigned long)(toss_start & 0xffffffff), |
1396 | (void*)(unsigned long)((toss_finish >> 32) & 0xffffffff), | 1383 | (void*)(unsigned long)((toss_finish >> 32) & 0xffffffff), |
1397 | (void*)(unsigned long)(toss_finish & 0xffffffff), | 1384 | (void*)(unsigned long)(toss_finish & 0xffffffff), |
1398 | (void*)(unsigned long)current_cpu(), | 1385 | (void*)(unsigned long)current_cpu(), |
1399 | (void*)(unsigned long)current_pid(), | 1386 | (void*)(unsigned long)current_pid(), |
1400 | (void*)NULL, | 1387 | (void*)NULL, |
1401 | (void*)NULL, | 1388 | (void*)NULL, |
1402 | (void*)NULL); | 1389 | (void*)NULL); |
1403 | } | 1390 | } |
1404 | #else | 1391 | #else |
1405 | #define xfs_itrunc_trace(tag, ip, flag, new_size, toss_start, toss_finish) | 1392 | #define xfs_itrunc_trace(tag, ip, flag, new_size, toss_start, toss_finish) |
1406 | #endif | 1393 | #endif |
1407 | 1394 | ||
1408 | /* | 1395 | /* |
1409 | * Start the truncation of the file to new_size. The new size | 1396 | * Start the truncation of the file to new_size. The new size |
1410 | * must be smaller than the current size. This routine will | 1397 | * must be smaller than the current size. This routine will |
1411 | * clear the buffer and page caches of file data in the removed | 1398 | * clear the buffer and page caches of file data in the removed |
1412 | * range, and xfs_itruncate_finish() will remove the underlying | 1399 | * range, and xfs_itruncate_finish() will remove the underlying |
1413 | * disk blocks. | 1400 | * disk blocks. |
1414 | * | 1401 | * |
1415 | * The inode must have its I/O lock locked EXCLUSIVELY, and it | 1402 | * The inode must have its I/O lock locked EXCLUSIVELY, and it |
1416 | * must NOT have the inode lock held at all. This is because we're | 1403 | * must NOT have the inode lock held at all. This is because we're |
1417 | * calling into the buffer/page cache code and we can't hold the | 1404 | * calling into the buffer/page cache code and we can't hold the |
1418 | * inode lock when we do so. | 1405 | * inode lock when we do so. |
1419 | * | 1406 | * |
1420 | * We need to wait for any direct I/Os in flight to complete before we | 1407 | * We need to wait for any direct I/Os in flight to complete before we |
1421 | * proceed with the truncate. This is needed to prevent the extents | 1408 | * proceed with the truncate. This is needed to prevent the extents |
1422 | * being read or written by the direct I/Os from being removed while the | 1409 | * being read or written by the direct I/Os from being removed while the |
1423 | * I/O is in flight as there is no other method of synchronising | 1410 | * I/O is in flight as there is no other method of synchronising |
1424 | * direct I/O with the truncate operation. Also, because we hold | 1411 | * direct I/O with the truncate operation. Also, because we hold |
1425 | * the IOLOCK in exclusive mode, we prevent new direct I/Os from being | 1412 | * the IOLOCK in exclusive mode, we prevent new direct I/Os from being |
1426 | * started until the truncate completes and drops the lock. Essentially, | 1413 | * started until the truncate completes and drops the lock. Essentially, |
1427 | * the vn_iowait() call forms an I/O barrier that provides strict ordering | 1414 | * the vn_iowait() call forms an I/O barrier that provides strict ordering |
1428 | * between direct I/Os and the truncate operation. | 1415 | * between direct I/Os and the truncate operation. |
1429 | * | 1416 | * |
1430 | * The flags parameter can have either the value XFS_ITRUNC_DEFINITE | 1417 | * The flags parameter can have either the value XFS_ITRUNC_DEFINITE |
1431 | * or XFS_ITRUNC_MAYBE. The XFS_ITRUNC_MAYBE value should be used | 1418 | * or XFS_ITRUNC_MAYBE. The XFS_ITRUNC_MAYBE value should be used |
1432 | * in the case that the caller is locking things out of order and | 1419 | * in the case that the caller is locking things out of order and |
1433 | * may not be able to call xfs_itruncate_finish() with the inode lock | 1420 | * may not be able to call xfs_itruncate_finish() with the inode lock |
1434 | * held without dropping the I/O lock. If the caller must drop the | 1421 | * held without dropping the I/O lock. If the caller must drop the |
1435 | * I/O lock before calling xfs_itruncate_finish(), then xfs_itruncate_start() | 1422 | * I/O lock before calling xfs_itruncate_finish(), then xfs_itruncate_start() |
1436 | * must be called again with all the same restrictions as the initial | 1423 | * must be called again with all the same restrictions as the initial |
1437 | * call. | 1424 | * call. |
1438 | */ | 1425 | */ |
1439 | int | 1426 | int |
1440 | xfs_itruncate_start( | 1427 | xfs_itruncate_start( |
1441 | xfs_inode_t *ip, | 1428 | xfs_inode_t *ip, |
1442 | uint flags, | 1429 | uint flags, |
1443 | xfs_fsize_t new_size) | 1430 | xfs_fsize_t new_size) |
1444 | { | 1431 | { |
1445 | xfs_fsize_t last_byte; | 1432 | xfs_fsize_t last_byte; |
1446 | xfs_off_t toss_start; | 1433 | xfs_off_t toss_start; |
1447 | xfs_mount_t *mp; | 1434 | xfs_mount_t *mp; |
1448 | int error = 0; | 1435 | int error = 0; |
1449 | 1436 | ||
1450 | ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL)); | 1437 | ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL)); |
1451 | ASSERT((new_size == 0) || (new_size <= ip->i_size)); | 1438 | ASSERT((new_size == 0) || (new_size <= ip->i_size)); |
1452 | ASSERT((flags == XFS_ITRUNC_DEFINITE) || | 1439 | ASSERT((flags == XFS_ITRUNC_DEFINITE) || |
1453 | (flags == XFS_ITRUNC_MAYBE)); | 1440 | (flags == XFS_ITRUNC_MAYBE)); |
1454 | 1441 | ||
1455 | mp = ip->i_mount; | 1442 | mp = ip->i_mount; |
1456 | 1443 | ||
1457 | /* wait for the completion of any pending DIOs */ | 1444 | /* wait for the completion of any pending DIOs */ |
1458 | if (new_size == 0 || new_size < ip->i_size) | 1445 | if (new_size == 0 || new_size < ip->i_size) |
1459 | vn_iowait(ip); | 1446 | vn_iowait(ip); |
1460 | 1447 | ||
1461 | /* | 1448 | /* |
1462 | * Call toss_pages or flushinval_pages to get rid of pages | 1449 | * Call toss_pages or flushinval_pages to get rid of pages |
1463 | * overlapping the region being removed. We have to use | 1450 | * overlapping the region being removed. We have to use |
1464 | * the less efficient flushinval_pages in the case that the | 1451 | * the less efficient flushinval_pages in the case that the |
1465 | * caller may not be able to finish the truncate without | 1452 | * caller may not be able to finish the truncate without |
1466 | * dropping the inode's I/O lock. Make sure | 1453 | * dropping the inode's I/O lock. Make sure |
1467 | * to catch any pages brought in by buffers overlapping | 1454 | * to catch any pages brought in by buffers overlapping |
1468 | * the EOF by searching out beyond the isize by our | 1455 | * the EOF by searching out beyond the isize by our |
1469 | * block size. We round new_size up to a block boundary | 1456 | * block size. We round new_size up to a block boundary |
1470 | * so that we don't toss things on the same block as | 1457 | * so that we don't toss things on the same block as |
1471 | * new_size but before it. | 1458 | * new_size but before it. |
1472 | * | 1459 | * |
1473 | * Before calling toss_page or flushinval_pages, make sure to | 1460 | * Before calling toss_page or flushinval_pages, make sure to |
1474 | * call remapf() over the same region if the file is mapped. | 1461 | * call remapf() over the same region if the file is mapped. |
1475 | * This frees up mapped file references to the pages in the | 1462 | * This frees up mapped file references to the pages in the |
1476 | * given range and for the flushinval_pages case it ensures | 1463 | * given range and for the flushinval_pages case it ensures |
1477 | * that we get the latest mapped changes flushed out. | 1464 | * that we get the latest mapped changes flushed out. |
1478 | */ | 1465 | */ |
1479 | toss_start = XFS_B_TO_FSB(mp, (xfs_ufsize_t)new_size); | 1466 | toss_start = XFS_B_TO_FSB(mp, (xfs_ufsize_t)new_size); |
1480 | toss_start = XFS_FSB_TO_B(mp, toss_start); | 1467 | toss_start = XFS_FSB_TO_B(mp, toss_start); |
1481 | if (toss_start < 0) { | 1468 | if (toss_start < 0) { |
1482 | /* | 1469 | /* |
1483 | * The place to start tossing is beyond our maximum | 1470 | * The place to start tossing is beyond our maximum |
1484 | * file size, so there is no way that the data extended | 1471 | * file size, so there is no way that the data extended |
1485 | * out there. | 1472 | * out there. |
1486 | */ | 1473 | */ |
1487 | return 0; | 1474 | return 0; |
1488 | } | 1475 | } |
1489 | last_byte = xfs_file_last_byte(ip); | 1476 | last_byte = xfs_file_last_byte(ip); |
1490 | xfs_itrunc_trace(XFS_ITRUNC_START, ip, flags, new_size, toss_start, | 1477 | xfs_itrunc_trace(XFS_ITRUNC_START, ip, flags, new_size, toss_start, |
1491 | last_byte); | 1478 | last_byte); |
1492 | if (last_byte > toss_start) { | 1479 | if (last_byte > toss_start) { |
1493 | if (flags & XFS_ITRUNC_DEFINITE) { | 1480 | if (flags & XFS_ITRUNC_DEFINITE) { |
1494 | xfs_tosspages(ip, toss_start, | 1481 | xfs_tosspages(ip, toss_start, |
1495 | -1, FI_REMAPF_LOCKED); | 1482 | -1, FI_REMAPF_LOCKED); |
1496 | } else { | 1483 | } else { |
1497 | error = xfs_flushinval_pages(ip, toss_start, | 1484 | error = xfs_flushinval_pages(ip, toss_start, |
1498 | -1, FI_REMAPF_LOCKED); | 1485 | -1, FI_REMAPF_LOCKED); |
1499 | } | 1486 | } |
1500 | } | 1487 | } |
1501 | 1488 | ||
1502 | #ifdef DEBUG | 1489 | #ifdef DEBUG |
1503 | if (new_size == 0) { | 1490 | if (new_size == 0) { |
1504 | ASSERT(VN_CACHED(VFS_I(ip)) == 0); | 1491 | ASSERT(VN_CACHED(VFS_I(ip)) == 0); |
1505 | } | 1492 | } |
1506 | #endif | 1493 | #endif |
1507 | return error; | 1494 | return error; |
1508 | } | 1495 | } |
1509 | 1496 | ||
1510 | /* | 1497 | /* |
1511 | * Shrink the file to the given new_size. The new size must be smaller than | 1498 | * Shrink the file to the given new_size. The new size must be smaller than |
1512 | * the current size. This will free up the underlying blocks in the removed | 1499 | * the current size. This will free up the underlying blocks in the removed |
1513 | * range after a call to xfs_itruncate_start() or xfs_atruncate_start(). | 1500 | * range after a call to xfs_itruncate_start() or xfs_atruncate_start(). |
1514 | * | 1501 | * |
1515 | * The transaction passed to this routine must have made a permanent log | 1502 | * The transaction passed to this routine must have made a permanent log |
1516 | * reservation of at least XFS_ITRUNCATE_LOG_RES. This routine may commit the | 1503 | * reservation of at least XFS_ITRUNCATE_LOG_RES. This routine may commit the |
1517 | * given transaction and start new ones, so make sure everything involved in | 1504 | * given transaction and start new ones, so make sure everything involved in |
1518 | * the transaction is tidy before calling here. Some transaction will be | 1505 | * the transaction is tidy before calling here. Some transaction will be |
1519 | * returned to the caller to be committed. The incoming transaction must | 1506 | * returned to the caller to be committed. The incoming transaction must |
1520 | * already include the inode, and both inode locks must be held exclusively. | 1507 | * already include the inode, and both inode locks must be held exclusively. |
1521 | * The inode must also be "held" within the transaction. On return the inode | 1508 | * The inode must also be "held" within the transaction. On return the inode |
1522 | * will be "held" within the returned transaction. This routine does NOT | 1509 | * will be "held" within the returned transaction. This routine does NOT |
1523 | * require any disk space to be reserved for it within the transaction. | 1510 | * require any disk space to be reserved for it within the transaction. |
1524 | * | 1511 | * |
1525 | * The fork parameter must be either xfs_attr_fork or xfs_data_fork, and it | 1512 | * The fork parameter must be either xfs_attr_fork or xfs_data_fork, and it |
1526 | * indicates the fork which is to be truncated. For the attribute fork we only | 1513 | * indicates the fork which is to be truncated. For the attribute fork we only |
1527 | * support truncation to size 0. | 1514 | * support truncation to size 0. |
1528 | * | 1515 | * |
1529 | * We use the sync parameter to indicate whether or not the first transaction | 1516 | * We use the sync parameter to indicate whether or not the first transaction |
1530 | * we perform might have to be synchronous. For the attr fork, it needs to be | 1517 | * we perform might have to be synchronous. For the attr fork, it needs to be |
1531 | * so if the unlink of the inode is not yet known to be permanent in the log. | 1518 | * so if the unlink of the inode is not yet known to be permanent in the log. |
1532 | * This keeps us from freeing and reusing the blocks of the attribute fork | 1519 | * This keeps us from freeing and reusing the blocks of the attribute fork |
1533 | * before the unlink of the inode becomes permanent. | 1520 | * before the unlink of the inode becomes permanent. |
1534 | * | 1521 | * |
1535 | * For the data fork, we normally have to run synchronously if we're being | 1522 | * For the data fork, we normally have to run synchronously if we're being |
1536 | * called out of the inactive path or we're being called out of the create path | 1523 | * called out of the inactive path or we're being called out of the create path |
1537 | * where we're truncating an existing file. Either way, the truncate needs to | 1524 | * where we're truncating an existing file. Either way, the truncate needs to |
1538 | * be sync so blocks don't reappear in the file with altered data in case of a | 1525 | * be sync so blocks don't reappear in the file with altered data in case of a |
1539 | * crash. wsync filesystems can run the first case async because anything that | 1526 | * crash. wsync filesystems can run the first case async because anything that |
1540 | * shrinks the inode has to run sync so by the time we're called here from | 1527 | * shrinks the inode has to run sync so by the time we're called here from |
1541 | * inactive, the inode size is permanently set to 0. | 1528 | * inactive, the inode size is permanently set to 0. |
1542 | * | 1529 | * |
1543 | * Calls from the truncate path always need to be sync unless we're in a wsync | 1530 | * Calls from the truncate path always need to be sync unless we're in a wsync |
1544 | * filesystem and the file has already been unlinked. | 1531 | * filesystem and the file has already been unlinked. |
1545 | * | 1532 | * |
1546 | * The caller is responsible for correctly setting the sync parameter. It gets | 1533 | * The caller is responsible for correctly setting the sync parameter. It gets |
1547 | * too hard for us to guess here which path we're being called out of just | 1534 | * too hard for us to guess here which path we're being called out of just |
1548 | * based on inode state. | 1535 | * based on inode state. |
1549 | * | 1536 | * |
1550 | * If we get an error, we must return with the inode locked and linked into the | 1537 | * If we get an error, we must return with the inode locked and linked into the |
1551 | * current transaction. This keeps things simple for the higher level code, | 1538 | * current transaction. This keeps things simple for the higher level code, |
1552 | * because it always knows that the inode is locked and held in the transaction | 1539 | * because it always knows that the inode is locked and held in the transaction |
1553 | * that returns to it whether errors occur or not. We don't mark the inode | 1540 | * that returns to it whether errors occur or not. We don't mark the inode |
1554 | * dirty on error so that transactions can be easily aborted if possible. | 1541 | * dirty on error so that transactions can be easily aborted if possible. |
1555 | */ | 1542 | */ |
1556 | int | 1543 | int |
1557 | xfs_itruncate_finish( | 1544 | xfs_itruncate_finish( |
1558 | xfs_trans_t **tp, | 1545 | xfs_trans_t **tp, |
1559 | xfs_inode_t *ip, | 1546 | xfs_inode_t *ip, |
1560 | xfs_fsize_t new_size, | 1547 | xfs_fsize_t new_size, |
1561 | int fork, | 1548 | int fork, |
1562 | int sync) | 1549 | int sync) |
1563 | { | 1550 | { |
1564 | xfs_fsblock_t first_block; | 1551 | xfs_fsblock_t first_block; |
1565 | xfs_fileoff_t first_unmap_block; | 1552 | xfs_fileoff_t first_unmap_block; |
1566 | xfs_fileoff_t last_block; | 1553 | xfs_fileoff_t last_block; |
1567 | xfs_filblks_t unmap_len=0; | 1554 | xfs_filblks_t unmap_len=0; |
1568 | xfs_mount_t *mp; | 1555 | xfs_mount_t *mp; |
1569 | xfs_trans_t *ntp; | 1556 | xfs_trans_t *ntp; |
1570 | int done; | 1557 | int done; |
1571 | int committed; | 1558 | int committed; |
1572 | xfs_bmap_free_t free_list; | 1559 | xfs_bmap_free_t free_list; |
1573 | int error; | 1560 | int error; |
1574 | 1561 | ||
1575 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL)); | 1562 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL)); |
1576 | ASSERT((new_size == 0) || (new_size <= ip->i_size)); | 1563 | ASSERT((new_size == 0) || (new_size <= ip->i_size)); |
1577 | ASSERT(*tp != NULL); | 1564 | ASSERT(*tp != NULL); |
1578 | ASSERT((*tp)->t_flags & XFS_TRANS_PERM_LOG_RES); | 1565 | ASSERT((*tp)->t_flags & XFS_TRANS_PERM_LOG_RES); |
1579 | ASSERT(ip->i_transp == *tp); | 1566 | ASSERT(ip->i_transp == *tp); |
1580 | ASSERT(ip->i_itemp != NULL); | 1567 | ASSERT(ip->i_itemp != NULL); |
1581 | ASSERT(ip->i_itemp->ili_flags & XFS_ILI_HOLD); | 1568 | ASSERT(ip->i_itemp->ili_flags & XFS_ILI_HOLD); |
1582 | 1569 | ||
1583 | 1570 | ||
1584 | ntp = *tp; | 1571 | ntp = *tp; |
1585 | mp = (ntp)->t_mountp; | 1572 | mp = (ntp)->t_mountp; |
1586 | ASSERT(! XFS_NOT_DQATTACHED(mp, ip)); | 1573 | ASSERT(! XFS_NOT_DQATTACHED(mp, ip)); |
1587 | 1574 | ||
1588 | /* | 1575 | /* |
1589 | * We only support truncating the entire attribute fork. | 1576 | * We only support truncating the entire attribute fork. |
1590 | */ | 1577 | */ |
1591 | if (fork == XFS_ATTR_FORK) { | 1578 | if (fork == XFS_ATTR_FORK) { |
1592 | new_size = 0LL; | 1579 | new_size = 0LL; |
1593 | } | 1580 | } |
1594 | first_unmap_block = XFS_B_TO_FSB(mp, (xfs_ufsize_t)new_size); | 1581 | first_unmap_block = XFS_B_TO_FSB(mp, (xfs_ufsize_t)new_size); |
1595 | xfs_itrunc_trace(XFS_ITRUNC_FINISH1, ip, 0, new_size, 0, 0); | 1582 | xfs_itrunc_trace(XFS_ITRUNC_FINISH1, ip, 0, new_size, 0, 0); |
1596 | /* | 1583 | /* |
1597 | * The first thing we do is set the size to new_size permanently | 1584 | * The first thing we do is set the size to new_size permanently |
1598 | * on disk. This way we don't have to worry about anyone ever | 1585 | * on disk. This way we don't have to worry about anyone ever |
1599 | * being able to look at the data being freed even in the face | 1586 | * being able to look at the data being freed even in the face |
1600 | * of a crash. What we're getting around here is the case where | 1587 | * of a crash. What we're getting around here is the case where |
1601 | * we free a block, it is allocated to another file, it is written | 1588 | * we free a block, it is allocated to another file, it is written |
1602 | * to, and then we crash. If the new data gets written to the | 1589 | * to, and then we crash. If the new data gets written to the |
1603 | * file but the log buffers containing the free and reallocation | 1590 | * file but the log buffers containing the free and reallocation |
1604 | * don't, then we'd end up with garbage in the blocks being freed. | 1591 | * don't, then we'd end up with garbage in the blocks being freed. |
1605 | * As long as we make the new_size permanent before actually | 1592 | * As long as we make the new_size permanent before actually |
1606 | * freeing any blocks it doesn't matter if they get writtten to. | 1593 | * freeing any blocks it doesn't matter if they get writtten to. |
1607 | * | 1594 | * |
1608 | * The callers must signal into us whether or not the size | 1595 | * The callers must signal into us whether or not the size |
1609 | * setting here must be synchronous. There are a few cases | 1596 | * setting here must be synchronous. There are a few cases |
1610 | * where it doesn't have to be synchronous. Those cases | 1597 | * where it doesn't have to be synchronous. Those cases |
1611 | * occur if the file is unlinked and we know the unlink is | 1598 | * occur if the file is unlinked and we know the unlink is |
1612 | * permanent or if the blocks being truncated are guaranteed | 1599 | * permanent or if the blocks being truncated are guaranteed |
1613 | * to be beyond the inode eof (regardless of the link count) | 1600 | * to be beyond the inode eof (regardless of the link count) |
1614 | * and the eof value is permanent. Both of these cases occur | 1601 | * and the eof value is permanent. Both of these cases occur |
1615 | * only on wsync-mounted filesystems. In those cases, we're | 1602 | * only on wsync-mounted filesystems. In those cases, we're |
1616 | * guaranteed that no user will ever see the data in the blocks | 1603 | * guaranteed that no user will ever see the data in the blocks |
1617 | * that are being truncated so the truncate can run async. | 1604 | * that are being truncated so the truncate can run async. |
1618 | * In the free beyond eof case, the file may wind up with | 1605 | * In the free beyond eof case, the file may wind up with |
1619 | * more blocks allocated to it than it needs if we crash | 1606 | * more blocks allocated to it than it needs if we crash |
1620 | * and that won't get fixed until the next time the file | 1607 | * and that won't get fixed until the next time the file |
1621 | * is re-opened and closed but that's ok as that shouldn't | 1608 | * is re-opened and closed but that's ok as that shouldn't |
1622 | * be too many blocks. | 1609 | * be too many blocks. |
1623 | * | 1610 | * |
1624 | * However, we can't just make all wsync xactions run async | 1611 | * However, we can't just make all wsync xactions run async |
1625 | * because there's one call out of the create path that needs | 1612 | * because there's one call out of the create path that needs |
1626 | * to run sync where it's truncating an existing file to size | 1613 | * to run sync where it's truncating an existing file to size |
1627 | * 0 whose size is > 0. | 1614 | * 0 whose size is > 0. |
1628 | * | 1615 | * |
1629 | * It's probably possible to come up with a test in this | 1616 | * It's probably possible to come up with a test in this |
1630 | * routine that would correctly distinguish all the above | 1617 | * routine that would correctly distinguish all the above |
1631 | * cases from the values of the function parameters and the | 1618 | * cases from the values of the function parameters and the |
1632 | * inode state but for sanity's sake, I've decided to let the | 1619 | * inode state but for sanity's sake, I've decided to let the |
1633 | * layers above just tell us. It's simpler to correctly figure | 1620 | * layers above just tell us. It's simpler to correctly figure |
1634 | * out in the layer above exactly under what conditions we | 1621 | * out in the layer above exactly under what conditions we |
1635 | * can run async and I think it's easier for others read and | 1622 | * can run async and I think it's easier for others read and |
1636 | * follow the logic in case something has to be changed. | 1623 | * follow the logic in case something has to be changed. |
1637 | * cscope is your friend -- rcc. | 1624 | * cscope is your friend -- rcc. |
1638 | * | 1625 | * |
1639 | * The attribute fork is much simpler. | 1626 | * The attribute fork is much simpler. |
1640 | * | 1627 | * |
1641 | * For the attribute fork we allow the caller to tell us whether | 1628 | * For the attribute fork we allow the caller to tell us whether |
1642 | * the unlink of the inode that led to this call is yet permanent | 1629 | * the unlink of the inode that led to this call is yet permanent |
1643 | * in the on disk log. If it is not and we will be freeing extents | 1630 | * in the on disk log. If it is not and we will be freeing extents |
1644 | * in this inode then we make the first transaction synchronous | 1631 | * in this inode then we make the first transaction synchronous |
1645 | * to make sure that the unlink is permanent by the time we free | 1632 | * to make sure that the unlink is permanent by the time we free |
1646 | * the blocks. | 1633 | * the blocks. |
1647 | */ | 1634 | */ |
1648 | if (fork == XFS_DATA_FORK) { | 1635 | if (fork == XFS_DATA_FORK) { |
1649 | if (ip->i_d.di_nextents > 0) { | 1636 | if (ip->i_d.di_nextents > 0) { |
1650 | /* | 1637 | /* |
1651 | * If we are not changing the file size then do | 1638 | * If we are not changing the file size then do |
1652 | * not update the on-disk file size - we may be | 1639 | * not update the on-disk file size - we may be |
1653 | * called from xfs_inactive_free_eofblocks(). If we | 1640 | * called from xfs_inactive_free_eofblocks(). If we |
1654 | * update the on-disk file size and then the system | 1641 | * update the on-disk file size and then the system |
1655 | * crashes before the contents of the file are | 1642 | * crashes before the contents of the file are |
1656 | * flushed to disk then the files may be full of | 1643 | * flushed to disk then the files may be full of |
1657 | * holes (ie NULL files bug). | 1644 | * holes (ie NULL files bug). |
1658 | */ | 1645 | */ |
1659 | if (ip->i_size != new_size) { | 1646 | if (ip->i_size != new_size) { |
1660 | ip->i_d.di_size = new_size; | 1647 | ip->i_d.di_size = new_size; |
1661 | ip->i_size = new_size; | 1648 | ip->i_size = new_size; |
1662 | xfs_trans_log_inode(ntp, ip, XFS_ILOG_CORE); | 1649 | xfs_trans_log_inode(ntp, ip, XFS_ILOG_CORE); |
1663 | } | 1650 | } |
1664 | } | 1651 | } |
1665 | } else if (sync) { | 1652 | } else if (sync) { |
1666 | ASSERT(!(mp->m_flags & XFS_MOUNT_WSYNC)); | 1653 | ASSERT(!(mp->m_flags & XFS_MOUNT_WSYNC)); |
1667 | if (ip->i_d.di_anextents > 0) | 1654 | if (ip->i_d.di_anextents > 0) |
1668 | xfs_trans_set_sync(ntp); | 1655 | xfs_trans_set_sync(ntp); |
1669 | } | 1656 | } |
1670 | ASSERT(fork == XFS_DATA_FORK || | 1657 | ASSERT(fork == XFS_DATA_FORK || |
1671 | (fork == XFS_ATTR_FORK && | 1658 | (fork == XFS_ATTR_FORK && |
1672 | ((sync && !(mp->m_flags & XFS_MOUNT_WSYNC)) || | 1659 | ((sync && !(mp->m_flags & XFS_MOUNT_WSYNC)) || |
1673 | (sync == 0 && (mp->m_flags & XFS_MOUNT_WSYNC))))); | 1660 | (sync == 0 && (mp->m_flags & XFS_MOUNT_WSYNC))))); |
1674 | 1661 | ||
1675 | /* | 1662 | /* |
1676 | * Since it is possible for space to become allocated beyond | 1663 | * Since it is possible for space to become allocated beyond |
1677 | * the end of the file (in a crash where the space is allocated | 1664 | * the end of the file (in a crash where the space is allocated |
1678 | * but the inode size is not yet updated), simply remove any | 1665 | * but the inode size is not yet updated), simply remove any |
1679 | * blocks which show up between the new EOF and the maximum | 1666 | * blocks which show up between the new EOF and the maximum |
1680 | * possible file size. If the first block to be removed is | 1667 | * possible file size. If the first block to be removed is |
1681 | * beyond the maximum file size (ie it is the same as last_block), | 1668 | * beyond the maximum file size (ie it is the same as last_block), |
1682 | * then there is nothing to do. | 1669 | * then there is nothing to do. |
1683 | */ | 1670 | */ |
1684 | last_block = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_MAXIOFFSET(mp)); | 1671 | last_block = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_MAXIOFFSET(mp)); |
1685 | ASSERT(first_unmap_block <= last_block); | 1672 | ASSERT(first_unmap_block <= last_block); |
1686 | done = 0; | 1673 | done = 0; |
1687 | if (last_block == first_unmap_block) { | 1674 | if (last_block == first_unmap_block) { |
1688 | done = 1; | 1675 | done = 1; |
1689 | } else { | 1676 | } else { |
1690 | unmap_len = last_block - first_unmap_block + 1; | 1677 | unmap_len = last_block - first_unmap_block + 1; |
1691 | } | 1678 | } |
1692 | while (!done) { | 1679 | while (!done) { |
1693 | /* | 1680 | /* |
1694 | * Free up up to XFS_ITRUNC_MAX_EXTENTS. xfs_bunmapi() | 1681 | * Free up up to XFS_ITRUNC_MAX_EXTENTS. xfs_bunmapi() |
1695 | * will tell us whether it freed the entire range or | 1682 | * will tell us whether it freed the entire range or |
1696 | * not. If this is a synchronous mount (wsync), | 1683 | * not. If this is a synchronous mount (wsync), |
1697 | * then we can tell bunmapi to keep all the | 1684 | * then we can tell bunmapi to keep all the |
1698 | * transactions asynchronous since the unlink | 1685 | * transactions asynchronous since the unlink |
1699 | * transaction that made this inode inactive has | 1686 | * transaction that made this inode inactive has |
1700 | * already hit the disk. There's no danger of | 1687 | * already hit the disk. There's no danger of |
1701 | * the freed blocks being reused, there being a | 1688 | * the freed blocks being reused, there being a |
1702 | * crash, and the reused blocks suddenly reappearing | 1689 | * crash, and the reused blocks suddenly reappearing |
1703 | * in this file with garbage in them once recovery | 1690 | * in this file with garbage in them once recovery |
1704 | * runs. | 1691 | * runs. |
1705 | */ | 1692 | */ |
1706 | XFS_BMAP_INIT(&free_list, &first_block); | 1693 | XFS_BMAP_INIT(&free_list, &first_block); |
1707 | error = xfs_bunmapi(ntp, ip, | 1694 | error = xfs_bunmapi(ntp, ip, |
1708 | first_unmap_block, unmap_len, | 1695 | first_unmap_block, unmap_len, |
1709 | XFS_BMAPI_AFLAG(fork) | | 1696 | XFS_BMAPI_AFLAG(fork) | |
1710 | (sync ? 0 : XFS_BMAPI_ASYNC), | 1697 | (sync ? 0 : XFS_BMAPI_ASYNC), |
1711 | XFS_ITRUNC_MAX_EXTENTS, | 1698 | XFS_ITRUNC_MAX_EXTENTS, |
1712 | &first_block, &free_list, | 1699 | &first_block, &free_list, |
1713 | NULL, &done); | 1700 | NULL, &done); |
1714 | if (error) { | 1701 | if (error) { |
1715 | /* | 1702 | /* |
1716 | * If the bunmapi call encounters an error, | 1703 | * If the bunmapi call encounters an error, |
1717 | * return to the caller where the transaction | 1704 | * return to the caller where the transaction |
1718 | * can be properly aborted. We just need to | 1705 | * can be properly aborted. We just need to |
1719 | * make sure we're not holding any resources | 1706 | * make sure we're not holding any resources |
1720 | * that we were not when we came in. | 1707 | * that we were not when we came in. |
1721 | */ | 1708 | */ |
1722 | xfs_bmap_cancel(&free_list); | 1709 | xfs_bmap_cancel(&free_list); |
1723 | return error; | 1710 | return error; |
1724 | } | 1711 | } |
1725 | 1712 | ||
1726 | /* | 1713 | /* |
1727 | * Duplicate the transaction that has the permanent | 1714 | * Duplicate the transaction that has the permanent |
1728 | * reservation and commit the old transaction. | 1715 | * reservation and commit the old transaction. |
1729 | */ | 1716 | */ |
1730 | error = xfs_bmap_finish(tp, &free_list, &committed); | 1717 | error = xfs_bmap_finish(tp, &free_list, &committed); |
1731 | ntp = *tp; | 1718 | ntp = *tp; |
1732 | if (committed) { | 1719 | if (committed) { |
1733 | /* link the inode into the next xact in the chain */ | 1720 | /* link the inode into the next xact in the chain */ |
1734 | xfs_trans_ijoin(ntp, ip, | 1721 | xfs_trans_ijoin(ntp, ip, |
1735 | XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); | 1722 | XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); |
1736 | xfs_trans_ihold(ntp, ip); | 1723 | xfs_trans_ihold(ntp, ip); |
1737 | } | 1724 | } |
1738 | 1725 | ||
1739 | if (error) { | 1726 | if (error) { |
1740 | /* | 1727 | /* |
1741 | * If the bmap finish call encounters an error, return | 1728 | * If the bmap finish call encounters an error, return |
1742 | * to the caller where the transaction can be properly | 1729 | * to the caller where the transaction can be properly |
1743 | * aborted. We just need to make sure we're not | 1730 | * aborted. We just need to make sure we're not |
1744 | * holding any resources that we were not when we came | 1731 | * holding any resources that we were not when we came |
1745 | * in. | 1732 | * in. |
1746 | * | 1733 | * |
1747 | * Aborting from this point might lose some blocks in | 1734 | * Aborting from this point might lose some blocks in |
1748 | * the file system, but oh well. | 1735 | * the file system, but oh well. |
1749 | */ | 1736 | */ |
1750 | xfs_bmap_cancel(&free_list); | 1737 | xfs_bmap_cancel(&free_list); |
1751 | return error; | 1738 | return error; |
1752 | } | 1739 | } |
1753 | 1740 | ||
1754 | if (committed) { | 1741 | if (committed) { |
1755 | /* | 1742 | /* |
1756 | * Mark the inode dirty so it will be logged and | 1743 | * Mark the inode dirty so it will be logged and |
1757 | * moved forward in the log as part of every commit. | 1744 | * moved forward in the log as part of every commit. |
1758 | */ | 1745 | */ |
1759 | xfs_trans_log_inode(ntp, ip, XFS_ILOG_CORE); | 1746 | xfs_trans_log_inode(ntp, ip, XFS_ILOG_CORE); |
1760 | } | 1747 | } |
1761 | 1748 | ||
1762 | ntp = xfs_trans_dup(ntp); | 1749 | ntp = xfs_trans_dup(ntp); |
1763 | error = xfs_trans_commit(*tp, 0); | 1750 | error = xfs_trans_commit(*tp, 0); |
1764 | *tp = ntp; | 1751 | *tp = ntp; |
1765 | 1752 | ||
1766 | /* link the inode into the next transaction in the chain */ | 1753 | /* link the inode into the next transaction in the chain */ |
1767 | xfs_trans_ijoin(ntp, ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); | 1754 | xfs_trans_ijoin(ntp, ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); |
1768 | xfs_trans_ihold(ntp, ip); | 1755 | xfs_trans_ihold(ntp, ip); |
1769 | 1756 | ||
1770 | if (error) | 1757 | if (error) |
1771 | return error; | 1758 | return error; |
1772 | /* | 1759 | /* |
1773 | * transaction commit worked ok so we can drop the extra ticket | 1760 | * transaction commit worked ok so we can drop the extra ticket |
1774 | * reference that we gained in xfs_trans_dup() | 1761 | * reference that we gained in xfs_trans_dup() |
1775 | */ | 1762 | */ |
1776 | xfs_log_ticket_put(ntp->t_ticket); | 1763 | xfs_log_ticket_put(ntp->t_ticket); |
1777 | error = xfs_trans_reserve(ntp, 0, | 1764 | error = xfs_trans_reserve(ntp, 0, |
1778 | XFS_ITRUNCATE_LOG_RES(mp), 0, | 1765 | XFS_ITRUNCATE_LOG_RES(mp), 0, |
1779 | XFS_TRANS_PERM_LOG_RES, | 1766 | XFS_TRANS_PERM_LOG_RES, |
1780 | XFS_ITRUNCATE_LOG_COUNT); | 1767 | XFS_ITRUNCATE_LOG_COUNT); |
1781 | if (error) | 1768 | if (error) |
1782 | return error; | 1769 | return error; |
1783 | } | 1770 | } |
1784 | /* | 1771 | /* |
1785 | * Only update the size in the case of the data fork, but | 1772 | * Only update the size in the case of the data fork, but |
1786 | * always re-log the inode so that our permanent transaction | 1773 | * always re-log the inode so that our permanent transaction |
1787 | * can keep on rolling it forward in the log. | 1774 | * can keep on rolling it forward in the log. |
1788 | */ | 1775 | */ |
1789 | if (fork == XFS_DATA_FORK) { | 1776 | if (fork == XFS_DATA_FORK) { |
1790 | xfs_isize_check(mp, ip, new_size); | 1777 | xfs_isize_check(mp, ip, new_size); |
1791 | /* | 1778 | /* |
1792 | * If we are not changing the file size then do | 1779 | * If we are not changing the file size then do |
1793 | * not update the on-disk file size - we may be | 1780 | * not update the on-disk file size - we may be |
1794 | * called from xfs_inactive_free_eofblocks(). If we | 1781 | * called from xfs_inactive_free_eofblocks(). If we |
1795 | * update the on-disk file size and then the system | 1782 | * update the on-disk file size and then the system |
1796 | * crashes before the contents of the file are | 1783 | * crashes before the contents of the file are |
1797 | * flushed to disk then the files may be full of | 1784 | * flushed to disk then the files may be full of |
1798 | * holes (ie NULL files bug). | 1785 | * holes (ie NULL files bug). |
1799 | */ | 1786 | */ |
1800 | if (ip->i_size != new_size) { | 1787 | if (ip->i_size != new_size) { |
1801 | ip->i_d.di_size = new_size; | 1788 | ip->i_d.di_size = new_size; |
1802 | ip->i_size = new_size; | 1789 | ip->i_size = new_size; |
1803 | } | 1790 | } |
1804 | } | 1791 | } |
1805 | xfs_trans_log_inode(ntp, ip, XFS_ILOG_CORE); | 1792 | xfs_trans_log_inode(ntp, ip, XFS_ILOG_CORE); |
1806 | ASSERT((new_size != 0) || | 1793 | ASSERT((new_size != 0) || |
1807 | (fork == XFS_ATTR_FORK) || | 1794 | (fork == XFS_ATTR_FORK) || |
1808 | (ip->i_delayed_blks == 0)); | 1795 | (ip->i_delayed_blks == 0)); |
1809 | ASSERT((new_size != 0) || | 1796 | ASSERT((new_size != 0) || |
1810 | (fork == XFS_ATTR_FORK) || | 1797 | (fork == XFS_ATTR_FORK) || |
1811 | (ip->i_d.di_nextents == 0)); | 1798 | (ip->i_d.di_nextents == 0)); |
1812 | xfs_itrunc_trace(XFS_ITRUNC_FINISH2, ip, 0, new_size, 0, 0); | 1799 | xfs_itrunc_trace(XFS_ITRUNC_FINISH2, ip, 0, new_size, 0, 0); |
1813 | return 0; | 1800 | return 0; |
1814 | } | 1801 | } |
1815 | 1802 | ||
1816 | /* | 1803 | /* |
1817 | * This is called when the inode's link count goes to 0. | 1804 | * This is called when the inode's link count goes to 0. |
1818 | * We place the on-disk inode on a list in the AGI. It | 1805 | * We place the on-disk inode on a list in the AGI. It |
1819 | * will be pulled from this list when the inode is freed. | 1806 | * will be pulled from this list when the inode is freed. |
1820 | */ | 1807 | */ |
1821 | int | 1808 | int |
1822 | xfs_iunlink( | 1809 | xfs_iunlink( |
1823 | xfs_trans_t *tp, | 1810 | xfs_trans_t *tp, |
1824 | xfs_inode_t *ip) | 1811 | xfs_inode_t *ip) |
1825 | { | 1812 | { |
1826 | xfs_mount_t *mp; | 1813 | xfs_mount_t *mp; |
1827 | xfs_agi_t *agi; | 1814 | xfs_agi_t *agi; |
1828 | xfs_dinode_t *dip; | 1815 | xfs_dinode_t *dip; |
1829 | xfs_buf_t *agibp; | 1816 | xfs_buf_t *agibp; |
1830 | xfs_buf_t *ibp; | 1817 | xfs_buf_t *ibp; |
1831 | xfs_agino_t agino; | 1818 | xfs_agino_t agino; |
1832 | short bucket_index; | 1819 | short bucket_index; |
1833 | int offset; | 1820 | int offset; |
1834 | int error; | 1821 | int error; |
1835 | 1822 | ||
1836 | ASSERT(ip->i_d.di_nlink == 0); | 1823 | ASSERT(ip->i_d.di_nlink == 0); |
1837 | ASSERT(ip->i_d.di_mode != 0); | 1824 | ASSERT(ip->i_d.di_mode != 0); |
1838 | ASSERT(ip->i_transp == tp); | 1825 | ASSERT(ip->i_transp == tp); |
1839 | 1826 | ||
1840 | mp = tp->t_mountp; | 1827 | mp = tp->t_mountp; |
1841 | 1828 | ||
1842 | /* | 1829 | /* |
1843 | * Get the agi buffer first. It ensures lock ordering | 1830 | * Get the agi buffer first. It ensures lock ordering |
1844 | * on the list. | 1831 | * on the list. |
1845 | */ | 1832 | */ |
1846 | error = xfs_read_agi(mp, tp, XFS_INO_TO_AGNO(mp, ip->i_ino), &agibp); | 1833 | error = xfs_read_agi(mp, tp, XFS_INO_TO_AGNO(mp, ip->i_ino), &agibp); |
1847 | if (error) | 1834 | if (error) |
1848 | return error; | 1835 | return error; |
1849 | agi = XFS_BUF_TO_AGI(agibp); | 1836 | agi = XFS_BUF_TO_AGI(agibp); |
1850 | 1837 | ||
1851 | /* | 1838 | /* |
1852 | * Get the index into the agi hash table for the | 1839 | * Get the index into the agi hash table for the |
1853 | * list this inode will go on. | 1840 | * list this inode will go on. |
1854 | */ | 1841 | */ |
1855 | agino = XFS_INO_TO_AGINO(mp, ip->i_ino); | 1842 | agino = XFS_INO_TO_AGINO(mp, ip->i_ino); |
1856 | ASSERT(agino != 0); | 1843 | ASSERT(agino != 0); |
1857 | bucket_index = agino % XFS_AGI_UNLINKED_BUCKETS; | 1844 | bucket_index = agino % XFS_AGI_UNLINKED_BUCKETS; |
1858 | ASSERT(agi->agi_unlinked[bucket_index]); | 1845 | ASSERT(agi->agi_unlinked[bucket_index]); |
1859 | ASSERT(be32_to_cpu(agi->agi_unlinked[bucket_index]) != agino); | 1846 | ASSERT(be32_to_cpu(agi->agi_unlinked[bucket_index]) != agino); |
1860 | 1847 | ||
1861 | if (be32_to_cpu(agi->agi_unlinked[bucket_index]) != NULLAGINO) { | 1848 | if (be32_to_cpu(agi->agi_unlinked[bucket_index]) != NULLAGINO) { |
1862 | /* | 1849 | /* |
1863 | * There is already another inode in the bucket we need | 1850 | * There is already another inode in the bucket we need |
1864 | * to add ourselves to. Add us at the front of the list. | 1851 | * to add ourselves to. Add us at the front of the list. |
1865 | * Here we put the head pointer into our next pointer, | 1852 | * Here we put the head pointer into our next pointer, |
1866 | * and then we fall through to point the head at us. | 1853 | * and then we fall through to point the head at us. |
1867 | */ | 1854 | */ |
1868 | error = xfs_itobp(mp, tp, ip, &dip, &ibp, XFS_BUF_LOCK); | 1855 | error = xfs_itobp(mp, tp, ip, &dip, &ibp, XFS_BUF_LOCK); |
1869 | if (error) | 1856 | if (error) |
1870 | return error; | 1857 | return error; |
1871 | 1858 | ||
1872 | ASSERT(be32_to_cpu(dip->di_next_unlinked) == NULLAGINO); | 1859 | ASSERT(be32_to_cpu(dip->di_next_unlinked) == NULLAGINO); |
1873 | /* both on-disk, don't endian flip twice */ | 1860 | /* both on-disk, don't endian flip twice */ |
1874 | dip->di_next_unlinked = agi->agi_unlinked[bucket_index]; | 1861 | dip->di_next_unlinked = agi->agi_unlinked[bucket_index]; |
1875 | offset = ip->i_boffset + | 1862 | offset = ip->i_imap.im_boffset + |
1876 | offsetof(xfs_dinode_t, di_next_unlinked); | 1863 | offsetof(xfs_dinode_t, di_next_unlinked); |
1877 | xfs_trans_inode_buf(tp, ibp); | 1864 | xfs_trans_inode_buf(tp, ibp); |
1878 | xfs_trans_log_buf(tp, ibp, offset, | 1865 | xfs_trans_log_buf(tp, ibp, offset, |
1879 | (offset + sizeof(xfs_agino_t) - 1)); | 1866 | (offset + sizeof(xfs_agino_t) - 1)); |
1880 | xfs_inobp_check(mp, ibp); | 1867 | xfs_inobp_check(mp, ibp); |
1881 | } | 1868 | } |
1882 | 1869 | ||
1883 | /* | 1870 | /* |
1884 | * Point the bucket head pointer at the inode being inserted. | 1871 | * Point the bucket head pointer at the inode being inserted. |
1885 | */ | 1872 | */ |
1886 | ASSERT(agino != 0); | 1873 | ASSERT(agino != 0); |
1887 | agi->agi_unlinked[bucket_index] = cpu_to_be32(agino); | 1874 | agi->agi_unlinked[bucket_index] = cpu_to_be32(agino); |
1888 | offset = offsetof(xfs_agi_t, agi_unlinked) + | 1875 | offset = offsetof(xfs_agi_t, agi_unlinked) + |
1889 | (sizeof(xfs_agino_t) * bucket_index); | 1876 | (sizeof(xfs_agino_t) * bucket_index); |
1890 | xfs_trans_log_buf(tp, agibp, offset, | 1877 | xfs_trans_log_buf(tp, agibp, offset, |
1891 | (offset + sizeof(xfs_agino_t) - 1)); | 1878 | (offset + sizeof(xfs_agino_t) - 1)); |
1892 | return 0; | 1879 | return 0; |
1893 | } | 1880 | } |
1894 | 1881 | ||
1895 | /* | 1882 | /* |
1896 | * Pull the on-disk inode from the AGI unlinked list. | 1883 | * Pull the on-disk inode from the AGI unlinked list. |
1897 | */ | 1884 | */ |
1898 | STATIC int | 1885 | STATIC int |
1899 | xfs_iunlink_remove( | 1886 | xfs_iunlink_remove( |
1900 | xfs_trans_t *tp, | 1887 | xfs_trans_t *tp, |
1901 | xfs_inode_t *ip) | 1888 | xfs_inode_t *ip) |
1902 | { | 1889 | { |
1903 | xfs_ino_t next_ino; | 1890 | xfs_ino_t next_ino; |
1904 | xfs_mount_t *mp; | 1891 | xfs_mount_t *mp; |
1905 | xfs_agi_t *agi; | 1892 | xfs_agi_t *agi; |
1906 | xfs_dinode_t *dip; | 1893 | xfs_dinode_t *dip; |
1907 | xfs_buf_t *agibp; | 1894 | xfs_buf_t *agibp; |
1908 | xfs_buf_t *ibp; | 1895 | xfs_buf_t *ibp; |
1909 | xfs_agnumber_t agno; | 1896 | xfs_agnumber_t agno; |
1910 | xfs_agino_t agino; | 1897 | xfs_agino_t agino; |
1911 | xfs_agino_t next_agino; | 1898 | xfs_agino_t next_agino; |
1912 | xfs_buf_t *last_ibp; | 1899 | xfs_buf_t *last_ibp; |
1913 | xfs_dinode_t *last_dip = NULL; | 1900 | xfs_dinode_t *last_dip = NULL; |
1914 | short bucket_index; | 1901 | short bucket_index; |
1915 | int offset, last_offset = 0; | 1902 | int offset, last_offset = 0; |
1916 | int error; | 1903 | int error; |
1917 | 1904 | ||
1918 | mp = tp->t_mountp; | 1905 | mp = tp->t_mountp; |
1919 | agno = XFS_INO_TO_AGNO(mp, ip->i_ino); | 1906 | agno = XFS_INO_TO_AGNO(mp, ip->i_ino); |
1920 | 1907 | ||
1921 | /* | 1908 | /* |
1922 | * Get the agi buffer first. It ensures lock ordering | 1909 | * Get the agi buffer first. It ensures lock ordering |
1923 | * on the list. | 1910 | * on the list. |
1924 | */ | 1911 | */ |
1925 | error = xfs_read_agi(mp, tp, agno, &agibp); | 1912 | error = xfs_read_agi(mp, tp, agno, &agibp); |
1926 | if (error) | 1913 | if (error) |
1927 | return error; | 1914 | return error; |
1928 | 1915 | ||
1929 | agi = XFS_BUF_TO_AGI(agibp); | 1916 | agi = XFS_BUF_TO_AGI(agibp); |
1930 | 1917 | ||
1931 | /* | 1918 | /* |
1932 | * Get the index into the agi hash table for the | 1919 | * Get the index into the agi hash table for the |
1933 | * list this inode will go on. | 1920 | * list this inode will go on. |
1934 | */ | 1921 | */ |
1935 | agino = XFS_INO_TO_AGINO(mp, ip->i_ino); | 1922 | agino = XFS_INO_TO_AGINO(mp, ip->i_ino); |
1936 | ASSERT(agino != 0); | 1923 | ASSERT(agino != 0); |
1937 | bucket_index = agino % XFS_AGI_UNLINKED_BUCKETS; | 1924 | bucket_index = agino % XFS_AGI_UNLINKED_BUCKETS; |
1938 | ASSERT(be32_to_cpu(agi->agi_unlinked[bucket_index]) != NULLAGINO); | 1925 | ASSERT(be32_to_cpu(agi->agi_unlinked[bucket_index]) != NULLAGINO); |
1939 | ASSERT(agi->agi_unlinked[bucket_index]); | 1926 | ASSERT(agi->agi_unlinked[bucket_index]); |
1940 | 1927 | ||
1941 | if (be32_to_cpu(agi->agi_unlinked[bucket_index]) == agino) { | 1928 | if (be32_to_cpu(agi->agi_unlinked[bucket_index]) == agino) { |
1942 | /* | 1929 | /* |
1943 | * We're at the head of the list. Get the inode's | 1930 | * We're at the head of the list. Get the inode's |
1944 | * on-disk buffer to see if there is anyone after us | 1931 | * on-disk buffer to see if there is anyone after us |
1945 | * on the list. Only modify our next pointer if it | 1932 | * on the list. Only modify our next pointer if it |
1946 | * is not already NULLAGINO. This saves us the overhead | 1933 | * is not already NULLAGINO. This saves us the overhead |
1947 | * of dealing with the buffer when there is no need to | 1934 | * of dealing with the buffer when there is no need to |
1948 | * change it. | 1935 | * change it. |
1949 | */ | 1936 | */ |
1950 | error = xfs_itobp(mp, tp, ip, &dip, &ibp, XFS_BUF_LOCK); | 1937 | error = xfs_itobp(mp, tp, ip, &dip, &ibp, XFS_BUF_LOCK); |
1951 | if (error) { | 1938 | if (error) { |
1952 | cmn_err(CE_WARN, | 1939 | cmn_err(CE_WARN, |
1953 | "xfs_iunlink_remove: xfs_itobp() returned an error %d on %s. Returning error.", | 1940 | "xfs_iunlink_remove: xfs_itobp() returned an error %d on %s. Returning error.", |
1954 | error, mp->m_fsname); | 1941 | error, mp->m_fsname); |
1955 | return error; | 1942 | return error; |
1956 | } | 1943 | } |
1957 | next_agino = be32_to_cpu(dip->di_next_unlinked); | 1944 | next_agino = be32_to_cpu(dip->di_next_unlinked); |
1958 | ASSERT(next_agino != 0); | 1945 | ASSERT(next_agino != 0); |
1959 | if (next_agino != NULLAGINO) { | 1946 | if (next_agino != NULLAGINO) { |
1960 | dip->di_next_unlinked = cpu_to_be32(NULLAGINO); | 1947 | dip->di_next_unlinked = cpu_to_be32(NULLAGINO); |
1961 | offset = ip->i_boffset + | 1948 | offset = ip->i_imap.im_boffset + |
1962 | offsetof(xfs_dinode_t, di_next_unlinked); | 1949 | offsetof(xfs_dinode_t, di_next_unlinked); |
1963 | xfs_trans_inode_buf(tp, ibp); | 1950 | xfs_trans_inode_buf(tp, ibp); |
1964 | xfs_trans_log_buf(tp, ibp, offset, | 1951 | xfs_trans_log_buf(tp, ibp, offset, |
1965 | (offset + sizeof(xfs_agino_t) - 1)); | 1952 | (offset + sizeof(xfs_agino_t) - 1)); |
1966 | xfs_inobp_check(mp, ibp); | 1953 | xfs_inobp_check(mp, ibp); |
1967 | } else { | 1954 | } else { |
1968 | xfs_trans_brelse(tp, ibp); | 1955 | xfs_trans_brelse(tp, ibp); |
1969 | } | 1956 | } |
1970 | /* | 1957 | /* |
1971 | * Point the bucket head pointer at the next inode. | 1958 | * Point the bucket head pointer at the next inode. |
1972 | */ | 1959 | */ |
1973 | ASSERT(next_agino != 0); | 1960 | ASSERT(next_agino != 0); |
1974 | ASSERT(next_agino != agino); | 1961 | ASSERT(next_agino != agino); |
1975 | agi->agi_unlinked[bucket_index] = cpu_to_be32(next_agino); | 1962 | agi->agi_unlinked[bucket_index] = cpu_to_be32(next_agino); |
1976 | offset = offsetof(xfs_agi_t, agi_unlinked) + | 1963 | offset = offsetof(xfs_agi_t, agi_unlinked) + |
1977 | (sizeof(xfs_agino_t) * bucket_index); | 1964 | (sizeof(xfs_agino_t) * bucket_index); |
1978 | xfs_trans_log_buf(tp, agibp, offset, | 1965 | xfs_trans_log_buf(tp, agibp, offset, |
1979 | (offset + sizeof(xfs_agino_t) - 1)); | 1966 | (offset + sizeof(xfs_agino_t) - 1)); |
1980 | } else { | 1967 | } else { |
1981 | /* | 1968 | /* |
1982 | * We need to search the list for the inode being freed. | 1969 | * We need to search the list for the inode being freed. |
1983 | */ | 1970 | */ |
1984 | next_agino = be32_to_cpu(agi->agi_unlinked[bucket_index]); | 1971 | next_agino = be32_to_cpu(agi->agi_unlinked[bucket_index]); |
1985 | last_ibp = NULL; | 1972 | last_ibp = NULL; |
1986 | while (next_agino != agino) { | 1973 | while (next_agino != agino) { |
1987 | /* | 1974 | /* |
1988 | * If the last inode wasn't the one pointing to | 1975 | * If the last inode wasn't the one pointing to |
1989 | * us, then release its buffer since we're not | 1976 | * us, then release its buffer since we're not |
1990 | * going to do anything with it. | 1977 | * going to do anything with it. |
1991 | */ | 1978 | */ |
1992 | if (last_ibp != NULL) { | 1979 | if (last_ibp != NULL) { |
1993 | xfs_trans_brelse(tp, last_ibp); | 1980 | xfs_trans_brelse(tp, last_ibp); |
1994 | } | 1981 | } |
1995 | next_ino = XFS_AGINO_TO_INO(mp, agno, next_agino); | 1982 | next_ino = XFS_AGINO_TO_INO(mp, agno, next_agino); |
1996 | error = xfs_inotobp(mp, tp, next_ino, &last_dip, | 1983 | error = xfs_inotobp(mp, tp, next_ino, &last_dip, |
1997 | &last_ibp, &last_offset, 0); | 1984 | &last_ibp, &last_offset, 0); |
1998 | if (error) { | 1985 | if (error) { |
1999 | cmn_err(CE_WARN, | 1986 | cmn_err(CE_WARN, |
2000 | "xfs_iunlink_remove: xfs_inotobp() returned an error %d on %s. Returning error.", | 1987 | "xfs_iunlink_remove: xfs_inotobp() returned an error %d on %s. Returning error.", |
2001 | error, mp->m_fsname); | 1988 | error, mp->m_fsname); |
2002 | return error; | 1989 | return error; |
2003 | } | 1990 | } |
2004 | next_agino = be32_to_cpu(last_dip->di_next_unlinked); | 1991 | next_agino = be32_to_cpu(last_dip->di_next_unlinked); |
2005 | ASSERT(next_agino != NULLAGINO); | 1992 | ASSERT(next_agino != NULLAGINO); |
2006 | ASSERT(next_agino != 0); | 1993 | ASSERT(next_agino != 0); |
2007 | } | 1994 | } |
2008 | /* | 1995 | /* |
2009 | * Now last_ibp points to the buffer previous to us on | 1996 | * Now last_ibp points to the buffer previous to us on |
2010 | * the unlinked list. Pull us from the list. | 1997 | * the unlinked list. Pull us from the list. |
2011 | */ | 1998 | */ |
2012 | error = xfs_itobp(mp, tp, ip, &dip, &ibp, XFS_BUF_LOCK); | 1999 | error = xfs_itobp(mp, tp, ip, &dip, &ibp, XFS_BUF_LOCK); |
2013 | if (error) { | 2000 | if (error) { |
2014 | cmn_err(CE_WARN, | 2001 | cmn_err(CE_WARN, |
2015 | "xfs_iunlink_remove: xfs_itobp() returned an error %d on %s. Returning error.", | 2002 | "xfs_iunlink_remove: xfs_itobp() returned an error %d on %s. Returning error.", |
2016 | error, mp->m_fsname); | 2003 | error, mp->m_fsname); |
2017 | return error; | 2004 | return error; |
2018 | } | 2005 | } |
2019 | next_agino = be32_to_cpu(dip->di_next_unlinked); | 2006 | next_agino = be32_to_cpu(dip->di_next_unlinked); |
2020 | ASSERT(next_agino != 0); | 2007 | ASSERT(next_agino != 0); |
2021 | ASSERT(next_agino != agino); | 2008 | ASSERT(next_agino != agino); |
2022 | if (next_agino != NULLAGINO) { | 2009 | if (next_agino != NULLAGINO) { |
2023 | dip->di_next_unlinked = cpu_to_be32(NULLAGINO); | 2010 | dip->di_next_unlinked = cpu_to_be32(NULLAGINO); |
2024 | offset = ip->i_boffset + | 2011 | offset = ip->i_imap.im_boffset + |
2025 | offsetof(xfs_dinode_t, di_next_unlinked); | 2012 | offsetof(xfs_dinode_t, di_next_unlinked); |
2026 | xfs_trans_inode_buf(tp, ibp); | 2013 | xfs_trans_inode_buf(tp, ibp); |
2027 | xfs_trans_log_buf(tp, ibp, offset, | 2014 | xfs_trans_log_buf(tp, ibp, offset, |
2028 | (offset + sizeof(xfs_agino_t) - 1)); | 2015 | (offset + sizeof(xfs_agino_t) - 1)); |
2029 | xfs_inobp_check(mp, ibp); | 2016 | xfs_inobp_check(mp, ibp); |
2030 | } else { | 2017 | } else { |
2031 | xfs_trans_brelse(tp, ibp); | 2018 | xfs_trans_brelse(tp, ibp); |
2032 | } | 2019 | } |
2033 | /* | 2020 | /* |
2034 | * Point the previous inode on the list to the next inode. | 2021 | * Point the previous inode on the list to the next inode. |
2035 | */ | 2022 | */ |
2036 | last_dip->di_next_unlinked = cpu_to_be32(next_agino); | 2023 | last_dip->di_next_unlinked = cpu_to_be32(next_agino); |
2037 | ASSERT(next_agino != 0); | 2024 | ASSERT(next_agino != 0); |
2038 | offset = last_offset + offsetof(xfs_dinode_t, di_next_unlinked); | 2025 | offset = last_offset + offsetof(xfs_dinode_t, di_next_unlinked); |
2039 | xfs_trans_inode_buf(tp, last_ibp); | 2026 | xfs_trans_inode_buf(tp, last_ibp); |
2040 | xfs_trans_log_buf(tp, last_ibp, offset, | 2027 | xfs_trans_log_buf(tp, last_ibp, offset, |
2041 | (offset + sizeof(xfs_agino_t) - 1)); | 2028 | (offset + sizeof(xfs_agino_t) - 1)); |
2042 | xfs_inobp_check(mp, last_ibp); | 2029 | xfs_inobp_check(mp, last_ibp); |
2043 | } | 2030 | } |
2044 | return 0; | 2031 | return 0; |
2045 | } | 2032 | } |
2046 | 2033 | ||
2047 | STATIC void | 2034 | STATIC void |
2048 | xfs_ifree_cluster( | 2035 | xfs_ifree_cluster( |
2049 | xfs_inode_t *free_ip, | 2036 | xfs_inode_t *free_ip, |
2050 | xfs_trans_t *tp, | 2037 | xfs_trans_t *tp, |
2051 | xfs_ino_t inum) | 2038 | xfs_ino_t inum) |
2052 | { | 2039 | { |
2053 | xfs_mount_t *mp = free_ip->i_mount; | 2040 | xfs_mount_t *mp = free_ip->i_mount; |
2054 | int blks_per_cluster; | 2041 | int blks_per_cluster; |
2055 | int nbufs; | 2042 | int nbufs; |
2056 | int ninodes; | 2043 | int ninodes; |
2057 | int i, j, found, pre_flushed; | 2044 | int i, j, found, pre_flushed; |
2058 | xfs_daddr_t blkno; | 2045 | xfs_daddr_t blkno; |
2059 | xfs_buf_t *bp; | 2046 | xfs_buf_t *bp; |
2060 | xfs_inode_t *ip, **ip_found; | 2047 | xfs_inode_t *ip, **ip_found; |
2061 | xfs_inode_log_item_t *iip; | 2048 | xfs_inode_log_item_t *iip; |
2062 | xfs_log_item_t *lip; | 2049 | xfs_log_item_t *lip; |
2063 | xfs_perag_t *pag = xfs_get_perag(mp, inum); | 2050 | xfs_perag_t *pag = xfs_get_perag(mp, inum); |
2064 | 2051 | ||
2065 | if (mp->m_sb.sb_blocksize >= XFS_INODE_CLUSTER_SIZE(mp)) { | 2052 | if (mp->m_sb.sb_blocksize >= XFS_INODE_CLUSTER_SIZE(mp)) { |
2066 | blks_per_cluster = 1; | 2053 | blks_per_cluster = 1; |
2067 | ninodes = mp->m_sb.sb_inopblock; | 2054 | ninodes = mp->m_sb.sb_inopblock; |
2068 | nbufs = XFS_IALLOC_BLOCKS(mp); | 2055 | nbufs = XFS_IALLOC_BLOCKS(mp); |
2069 | } else { | 2056 | } else { |
2070 | blks_per_cluster = XFS_INODE_CLUSTER_SIZE(mp) / | 2057 | blks_per_cluster = XFS_INODE_CLUSTER_SIZE(mp) / |
2071 | mp->m_sb.sb_blocksize; | 2058 | mp->m_sb.sb_blocksize; |
2072 | ninodes = blks_per_cluster * mp->m_sb.sb_inopblock; | 2059 | ninodes = blks_per_cluster * mp->m_sb.sb_inopblock; |
2073 | nbufs = XFS_IALLOC_BLOCKS(mp) / blks_per_cluster; | 2060 | nbufs = XFS_IALLOC_BLOCKS(mp) / blks_per_cluster; |
2074 | } | 2061 | } |
2075 | 2062 | ||
2076 | ip_found = kmem_alloc(ninodes * sizeof(xfs_inode_t *), KM_NOFS); | 2063 | ip_found = kmem_alloc(ninodes * sizeof(xfs_inode_t *), KM_NOFS); |
2077 | 2064 | ||
2078 | for (j = 0; j < nbufs; j++, inum += ninodes) { | 2065 | for (j = 0; j < nbufs; j++, inum += ninodes) { |
2079 | blkno = XFS_AGB_TO_DADDR(mp, XFS_INO_TO_AGNO(mp, inum), | 2066 | blkno = XFS_AGB_TO_DADDR(mp, XFS_INO_TO_AGNO(mp, inum), |
2080 | XFS_INO_TO_AGBNO(mp, inum)); | 2067 | XFS_INO_TO_AGBNO(mp, inum)); |
2081 | 2068 | ||
2082 | 2069 | ||
2083 | /* | 2070 | /* |
2084 | * Look for each inode in memory and attempt to lock it, | 2071 | * Look for each inode in memory and attempt to lock it, |
2085 | * we can be racing with flush and tail pushing here. | 2072 | * we can be racing with flush and tail pushing here. |
2086 | * any inode we get the locks on, add to an array of | 2073 | * any inode we get the locks on, add to an array of |
2087 | * inode items to process later. | 2074 | * inode items to process later. |
2088 | * | 2075 | * |
2089 | * The get the buffer lock, we could beat a flush | 2076 | * The get the buffer lock, we could beat a flush |
2090 | * or tail pushing thread to the lock here, in which | 2077 | * or tail pushing thread to the lock here, in which |
2091 | * case they will go looking for the inode buffer | 2078 | * case they will go looking for the inode buffer |
2092 | * and fail, we need some other form of interlock | 2079 | * and fail, we need some other form of interlock |
2093 | * here. | 2080 | * here. |
2094 | */ | 2081 | */ |
2095 | found = 0; | 2082 | found = 0; |
2096 | for (i = 0; i < ninodes; i++) { | 2083 | for (i = 0; i < ninodes; i++) { |
2097 | read_lock(&pag->pag_ici_lock); | 2084 | read_lock(&pag->pag_ici_lock); |
2098 | ip = radix_tree_lookup(&pag->pag_ici_root, | 2085 | ip = radix_tree_lookup(&pag->pag_ici_root, |
2099 | XFS_INO_TO_AGINO(mp, (inum + i))); | 2086 | XFS_INO_TO_AGINO(mp, (inum + i))); |
2100 | 2087 | ||
2101 | /* Inode not in memory or we found it already, | 2088 | /* Inode not in memory or we found it already, |
2102 | * nothing to do | 2089 | * nothing to do |
2103 | */ | 2090 | */ |
2104 | if (!ip || xfs_iflags_test(ip, XFS_ISTALE)) { | 2091 | if (!ip || xfs_iflags_test(ip, XFS_ISTALE)) { |
2105 | read_unlock(&pag->pag_ici_lock); | 2092 | read_unlock(&pag->pag_ici_lock); |
2106 | continue; | 2093 | continue; |
2107 | } | 2094 | } |
2108 | 2095 | ||
2109 | if (xfs_inode_clean(ip)) { | 2096 | if (xfs_inode_clean(ip)) { |
2110 | read_unlock(&pag->pag_ici_lock); | 2097 | read_unlock(&pag->pag_ici_lock); |
2111 | continue; | 2098 | continue; |
2112 | } | 2099 | } |
2113 | 2100 | ||
2114 | /* If we can get the locks then add it to the | 2101 | /* If we can get the locks then add it to the |
2115 | * list, otherwise by the time we get the bp lock | 2102 | * list, otherwise by the time we get the bp lock |
2116 | * below it will already be attached to the | 2103 | * below it will already be attached to the |
2117 | * inode buffer. | 2104 | * inode buffer. |
2118 | */ | 2105 | */ |
2119 | 2106 | ||
2120 | /* This inode will already be locked - by us, lets | 2107 | /* This inode will already be locked - by us, lets |
2121 | * keep it that way. | 2108 | * keep it that way. |
2122 | */ | 2109 | */ |
2123 | 2110 | ||
2124 | if (ip == free_ip) { | 2111 | if (ip == free_ip) { |
2125 | if (xfs_iflock_nowait(ip)) { | 2112 | if (xfs_iflock_nowait(ip)) { |
2126 | xfs_iflags_set(ip, XFS_ISTALE); | 2113 | xfs_iflags_set(ip, XFS_ISTALE); |
2127 | if (xfs_inode_clean(ip)) { | 2114 | if (xfs_inode_clean(ip)) { |
2128 | xfs_ifunlock(ip); | 2115 | xfs_ifunlock(ip); |
2129 | } else { | 2116 | } else { |
2130 | ip_found[found++] = ip; | 2117 | ip_found[found++] = ip; |
2131 | } | 2118 | } |
2132 | } | 2119 | } |
2133 | read_unlock(&pag->pag_ici_lock); | 2120 | read_unlock(&pag->pag_ici_lock); |
2134 | continue; | 2121 | continue; |
2135 | } | 2122 | } |
2136 | 2123 | ||
2137 | if (xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) { | 2124 | if (xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) { |
2138 | if (xfs_iflock_nowait(ip)) { | 2125 | if (xfs_iflock_nowait(ip)) { |
2139 | xfs_iflags_set(ip, XFS_ISTALE); | 2126 | xfs_iflags_set(ip, XFS_ISTALE); |
2140 | 2127 | ||
2141 | if (xfs_inode_clean(ip)) { | 2128 | if (xfs_inode_clean(ip)) { |
2142 | xfs_ifunlock(ip); | 2129 | xfs_ifunlock(ip); |
2143 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | 2130 | xfs_iunlock(ip, XFS_ILOCK_EXCL); |
2144 | } else { | 2131 | } else { |
2145 | ip_found[found++] = ip; | 2132 | ip_found[found++] = ip; |
2146 | } | 2133 | } |
2147 | } else { | 2134 | } else { |
2148 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | 2135 | xfs_iunlock(ip, XFS_ILOCK_EXCL); |
2149 | } | 2136 | } |
2150 | } | 2137 | } |
2151 | read_unlock(&pag->pag_ici_lock); | 2138 | read_unlock(&pag->pag_ici_lock); |
2152 | } | 2139 | } |
2153 | 2140 | ||
2154 | bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, blkno, | 2141 | bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, blkno, |
2155 | mp->m_bsize * blks_per_cluster, | 2142 | mp->m_bsize * blks_per_cluster, |
2156 | XFS_BUF_LOCK); | 2143 | XFS_BUF_LOCK); |
2157 | 2144 | ||
2158 | pre_flushed = 0; | 2145 | pre_flushed = 0; |
2159 | lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *); | 2146 | lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *); |
2160 | while (lip) { | 2147 | while (lip) { |
2161 | if (lip->li_type == XFS_LI_INODE) { | 2148 | if (lip->li_type == XFS_LI_INODE) { |
2162 | iip = (xfs_inode_log_item_t *)lip; | 2149 | iip = (xfs_inode_log_item_t *)lip; |
2163 | ASSERT(iip->ili_logged == 1); | 2150 | ASSERT(iip->ili_logged == 1); |
2164 | lip->li_cb = (void(*)(xfs_buf_t*,xfs_log_item_t*)) xfs_istale_done; | 2151 | lip->li_cb = (void(*)(xfs_buf_t*,xfs_log_item_t*)) xfs_istale_done; |
2165 | xfs_trans_ail_copy_lsn(mp->m_ail, | 2152 | xfs_trans_ail_copy_lsn(mp->m_ail, |
2166 | &iip->ili_flush_lsn, | 2153 | &iip->ili_flush_lsn, |
2167 | &iip->ili_item.li_lsn); | 2154 | &iip->ili_item.li_lsn); |
2168 | xfs_iflags_set(iip->ili_inode, XFS_ISTALE); | 2155 | xfs_iflags_set(iip->ili_inode, XFS_ISTALE); |
2169 | pre_flushed++; | 2156 | pre_flushed++; |
2170 | } | 2157 | } |
2171 | lip = lip->li_bio_list; | 2158 | lip = lip->li_bio_list; |
2172 | } | 2159 | } |
2173 | 2160 | ||
2174 | for (i = 0; i < found; i++) { | 2161 | for (i = 0; i < found; i++) { |
2175 | ip = ip_found[i]; | 2162 | ip = ip_found[i]; |
2176 | iip = ip->i_itemp; | 2163 | iip = ip->i_itemp; |
2177 | 2164 | ||
2178 | if (!iip) { | 2165 | if (!iip) { |
2179 | ip->i_update_core = 0; | 2166 | ip->i_update_core = 0; |
2180 | xfs_ifunlock(ip); | 2167 | xfs_ifunlock(ip); |
2181 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | 2168 | xfs_iunlock(ip, XFS_ILOCK_EXCL); |
2182 | continue; | 2169 | continue; |
2183 | } | 2170 | } |
2184 | 2171 | ||
2185 | iip->ili_last_fields = iip->ili_format.ilf_fields; | 2172 | iip->ili_last_fields = iip->ili_format.ilf_fields; |
2186 | iip->ili_format.ilf_fields = 0; | 2173 | iip->ili_format.ilf_fields = 0; |
2187 | iip->ili_logged = 1; | 2174 | iip->ili_logged = 1; |
2188 | xfs_trans_ail_copy_lsn(mp->m_ail, &iip->ili_flush_lsn, | 2175 | xfs_trans_ail_copy_lsn(mp->m_ail, &iip->ili_flush_lsn, |
2189 | &iip->ili_item.li_lsn); | 2176 | &iip->ili_item.li_lsn); |
2190 | 2177 | ||
2191 | xfs_buf_attach_iodone(bp, | 2178 | xfs_buf_attach_iodone(bp, |
2192 | (void(*)(xfs_buf_t*,xfs_log_item_t*)) | 2179 | (void(*)(xfs_buf_t*,xfs_log_item_t*)) |
2193 | xfs_istale_done, (xfs_log_item_t *)iip); | 2180 | xfs_istale_done, (xfs_log_item_t *)iip); |
2194 | if (ip != free_ip) { | 2181 | if (ip != free_ip) { |
2195 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | 2182 | xfs_iunlock(ip, XFS_ILOCK_EXCL); |
2196 | } | 2183 | } |
2197 | } | 2184 | } |
2198 | 2185 | ||
2199 | if (found || pre_flushed) | 2186 | if (found || pre_flushed) |
2200 | xfs_trans_stale_inode_buf(tp, bp); | 2187 | xfs_trans_stale_inode_buf(tp, bp); |
2201 | xfs_trans_binval(tp, bp); | 2188 | xfs_trans_binval(tp, bp); |
2202 | } | 2189 | } |
2203 | 2190 | ||
2204 | kmem_free(ip_found); | 2191 | kmem_free(ip_found); |
2205 | xfs_put_perag(mp, pag); | 2192 | xfs_put_perag(mp, pag); |
2206 | } | 2193 | } |
2207 | 2194 | ||
2208 | /* | 2195 | /* |
2209 | * This is called to return an inode to the inode free list. | 2196 | * This is called to return an inode to the inode free list. |
2210 | * The inode should already be truncated to 0 length and have | 2197 | * The inode should already be truncated to 0 length and have |
2211 | * no pages associated with it. This routine also assumes that | 2198 | * no pages associated with it. This routine also assumes that |
2212 | * the inode is already a part of the transaction. | 2199 | * the inode is already a part of the transaction. |
2213 | * | 2200 | * |
2214 | * The on-disk copy of the inode will have been added to the list | 2201 | * The on-disk copy of the inode will have been added to the list |
2215 | * of unlinked inodes in the AGI. We need to remove the inode from | 2202 | * of unlinked inodes in the AGI. We need to remove the inode from |
2216 | * that list atomically with respect to freeing it here. | 2203 | * that list atomically with respect to freeing it here. |
2217 | */ | 2204 | */ |
2218 | int | 2205 | int |
2219 | xfs_ifree( | 2206 | xfs_ifree( |
2220 | xfs_trans_t *tp, | 2207 | xfs_trans_t *tp, |
2221 | xfs_inode_t *ip, | 2208 | xfs_inode_t *ip, |
2222 | xfs_bmap_free_t *flist) | 2209 | xfs_bmap_free_t *flist) |
2223 | { | 2210 | { |
2224 | int error; | 2211 | int error; |
2225 | int delete; | 2212 | int delete; |
2226 | xfs_ino_t first_ino; | 2213 | xfs_ino_t first_ino; |
2227 | xfs_dinode_t *dip; | 2214 | xfs_dinode_t *dip; |
2228 | xfs_buf_t *ibp; | 2215 | xfs_buf_t *ibp; |
2229 | 2216 | ||
2230 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); | 2217 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); |
2231 | ASSERT(ip->i_transp == tp); | 2218 | ASSERT(ip->i_transp == tp); |
2232 | ASSERT(ip->i_d.di_nlink == 0); | 2219 | ASSERT(ip->i_d.di_nlink == 0); |
2233 | ASSERT(ip->i_d.di_nextents == 0); | 2220 | ASSERT(ip->i_d.di_nextents == 0); |
2234 | ASSERT(ip->i_d.di_anextents == 0); | 2221 | ASSERT(ip->i_d.di_anextents == 0); |
2235 | ASSERT((ip->i_d.di_size == 0 && ip->i_size == 0) || | 2222 | ASSERT((ip->i_d.di_size == 0 && ip->i_size == 0) || |
2236 | ((ip->i_d.di_mode & S_IFMT) != S_IFREG)); | 2223 | ((ip->i_d.di_mode & S_IFMT) != S_IFREG)); |
2237 | ASSERT(ip->i_d.di_nblocks == 0); | 2224 | ASSERT(ip->i_d.di_nblocks == 0); |
2238 | 2225 | ||
2239 | /* | 2226 | /* |
2240 | * Pull the on-disk inode from the AGI unlinked list. | 2227 | * Pull the on-disk inode from the AGI unlinked list. |
2241 | */ | 2228 | */ |
2242 | error = xfs_iunlink_remove(tp, ip); | 2229 | error = xfs_iunlink_remove(tp, ip); |
2243 | if (error != 0) { | 2230 | if (error != 0) { |
2244 | return error; | 2231 | return error; |
2245 | } | 2232 | } |
2246 | 2233 | ||
2247 | error = xfs_difree(tp, ip->i_ino, flist, &delete, &first_ino); | 2234 | error = xfs_difree(tp, ip->i_ino, flist, &delete, &first_ino); |
2248 | if (error != 0) { | 2235 | if (error != 0) { |
2249 | return error; | 2236 | return error; |
2250 | } | 2237 | } |
2251 | ip->i_d.di_mode = 0; /* mark incore inode as free */ | 2238 | ip->i_d.di_mode = 0; /* mark incore inode as free */ |
2252 | ip->i_d.di_flags = 0; | 2239 | ip->i_d.di_flags = 0; |
2253 | ip->i_d.di_dmevmask = 0; | 2240 | ip->i_d.di_dmevmask = 0; |
2254 | ip->i_d.di_forkoff = 0; /* mark the attr fork not in use */ | 2241 | ip->i_d.di_forkoff = 0; /* mark the attr fork not in use */ |
2255 | ip->i_df.if_ext_max = | 2242 | ip->i_df.if_ext_max = |
2256 | XFS_IFORK_DSIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t); | 2243 | XFS_IFORK_DSIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t); |
2257 | ip->i_d.di_format = XFS_DINODE_FMT_EXTENTS; | 2244 | ip->i_d.di_format = XFS_DINODE_FMT_EXTENTS; |
2258 | ip->i_d.di_aformat = XFS_DINODE_FMT_EXTENTS; | 2245 | ip->i_d.di_aformat = XFS_DINODE_FMT_EXTENTS; |
2259 | /* | 2246 | /* |
2260 | * Bump the generation count so no one will be confused | 2247 | * Bump the generation count so no one will be confused |
2261 | * by reincarnations of this inode. | 2248 | * by reincarnations of this inode. |
2262 | */ | 2249 | */ |
2263 | ip->i_d.di_gen++; | 2250 | ip->i_d.di_gen++; |
2264 | 2251 | ||
2265 | xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); | 2252 | xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); |
2266 | 2253 | ||
2267 | error = xfs_itobp(ip->i_mount, tp, ip, &dip, &ibp, XFS_BUF_LOCK); | 2254 | error = xfs_itobp(ip->i_mount, tp, ip, &dip, &ibp, XFS_BUF_LOCK); |
2268 | if (error) | 2255 | if (error) |
2269 | return error; | 2256 | return error; |
2270 | 2257 | ||
2271 | /* | 2258 | /* |
2272 | * Clear the on-disk di_mode. This is to prevent xfs_bulkstat | 2259 | * Clear the on-disk di_mode. This is to prevent xfs_bulkstat |
2273 | * from picking up this inode when it is reclaimed (its incore state | 2260 | * from picking up this inode when it is reclaimed (its incore state |
2274 | * initialzed but not flushed to disk yet). The in-core di_mode is | 2261 | * initialzed but not flushed to disk yet). The in-core di_mode is |
2275 | * already cleared and a corresponding transaction logged. | 2262 | * already cleared and a corresponding transaction logged. |
2276 | * The hack here just synchronizes the in-core to on-disk | 2263 | * The hack here just synchronizes the in-core to on-disk |
2277 | * di_mode value in advance before the actual inode sync to disk. | 2264 | * di_mode value in advance before the actual inode sync to disk. |
2278 | * This is OK because the inode is already unlinked and would never | 2265 | * This is OK because the inode is already unlinked and would never |
2279 | * change its di_mode again for this inode generation. | 2266 | * change its di_mode again for this inode generation. |
2280 | * This is a temporary hack that would require a proper fix | 2267 | * This is a temporary hack that would require a proper fix |
2281 | * in the future. | 2268 | * in the future. |
2282 | */ | 2269 | */ |
2283 | dip->di_mode = 0; | 2270 | dip->di_mode = 0; |
2284 | 2271 | ||
2285 | if (delete) { | 2272 | if (delete) { |
2286 | xfs_ifree_cluster(ip, tp, first_ino); | 2273 | xfs_ifree_cluster(ip, tp, first_ino); |
2287 | } | 2274 | } |
2288 | 2275 | ||
2289 | return 0; | 2276 | return 0; |
2290 | } | 2277 | } |
2291 | 2278 | ||
2292 | /* | 2279 | /* |
2293 | * Reallocate the space for if_broot based on the number of records | 2280 | * Reallocate the space for if_broot based on the number of records |
2294 | * being added or deleted as indicated in rec_diff. Move the records | 2281 | * being added or deleted as indicated in rec_diff. Move the records |
2295 | * and pointers in if_broot to fit the new size. When shrinking this | 2282 | * and pointers in if_broot to fit the new size. When shrinking this |
2296 | * will eliminate holes between the records and pointers created by | 2283 | * will eliminate holes between the records and pointers created by |
2297 | * the caller. When growing this will create holes to be filled in | 2284 | * the caller. When growing this will create holes to be filled in |
2298 | * by the caller. | 2285 | * by the caller. |
2299 | * | 2286 | * |
2300 | * The caller must not request to add more records than would fit in | 2287 | * The caller must not request to add more records than would fit in |
2301 | * the on-disk inode root. If the if_broot is currently NULL, then | 2288 | * the on-disk inode root. If the if_broot is currently NULL, then |
2302 | * if we adding records one will be allocated. The caller must also | 2289 | * if we adding records one will be allocated. The caller must also |
2303 | * not request that the number of records go below zero, although | 2290 | * not request that the number of records go below zero, although |
2304 | * it can go to zero. | 2291 | * it can go to zero. |
2305 | * | 2292 | * |
2306 | * ip -- the inode whose if_broot area is changing | 2293 | * ip -- the inode whose if_broot area is changing |
2307 | * ext_diff -- the change in the number of records, positive or negative, | 2294 | * ext_diff -- the change in the number of records, positive or negative, |
2308 | * requested for the if_broot array. | 2295 | * requested for the if_broot array. |
2309 | */ | 2296 | */ |
2310 | void | 2297 | void |
2311 | xfs_iroot_realloc( | 2298 | xfs_iroot_realloc( |
2312 | xfs_inode_t *ip, | 2299 | xfs_inode_t *ip, |
2313 | int rec_diff, | 2300 | int rec_diff, |
2314 | int whichfork) | 2301 | int whichfork) |
2315 | { | 2302 | { |
2316 | struct xfs_mount *mp = ip->i_mount; | 2303 | struct xfs_mount *mp = ip->i_mount; |
2317 | int cur_max; | 2304 | int cur_max; |
2318 | xfs_ifork_t *ifp; | 2305 | xfs_ifork_t *ifp; |
2319 | struct xfs_btree_block *new_broot; | 2306 | struct xfs_btree_block *new_broot; |
2320 | int new_max; | 2307 | int new_max; |
2321 | size_t new_size; | 2308 | size_t new_size; |
2322 | char *np; | 2309 | char *np; |
2323 | char *op; | 2310 | char *op; |
2324 | 2311 | ||
2325 | /* | 2312 | /* |
2326 | * Handle the degenerate case quietly. | 2313 | * Handle the degenerate case quietly. |
2327 | */ | 2314 | */ |
2328 | if (rec_diff == 0) { | 2315 | if (rec_diff == 0) { |
2329 | return; | 2316 | return; |
2330 | } | 2317 | } |
2331 | 2318 | ||
2332 | ifp = XFS_IFORK_PTR(ip, whichfork); | 2319 | ifp = XFS_IFORK_PTR(ip, whichfork); |
2333 | if (rec_diff > 0) { | 2320 | if (rec_diff > 0) { |
2334 | /* | 2321 | /* |
2335 | * If there wasn't any memory allocated before, just | 2322 | * If there wasn't any memory allocated before, just |
2336 | * allocate it now and get out. | 2323 | * allocate it now and get out. |
2337 | */ | 2324 | */ |
2338 | if (ifp->if_broot_bytes == 0) { | 2325 | if (ifp->if_broot_bytes == 0) { |
2339 | new_size = (size_t)XFS_BMAP_BROOT_SPACE_CALC(rec_diff); | 2326 | new_size = (size_t)XFS_BMAP_BROOT_SPACE_CALC(rec_diff); |
2340 | ifp->if_broot = kmem_alloc(new_size, KM_SLEEP); | 2327 | ifp->if_broot = kmem_alloc(new_size, KM_SLEEP); |
2341 | ifp->if_broot_bytes = (int)new_size; | 2328 | ifp->if_broot_bytes = (int)new_size; |
2342 | return; | 2329 | return; |
2343 | } | 2330 | } |
2344 | 2331 | ||
2345 | /* | 2332 | /* |
2346 | * If there is already an existing if_broot, then we need | 2333 | * If there is already an existing if_broot, then we need |
2347 | * to realloc() it and shift the pointers to their new | 2334 | * to realloc() it and shift the pointers to their new |
2348 | * location. The records don't change location because | 2335 | * location. The records don't change location because |
2349 | * they are kept butted up against the btree block header. | 2336 | * they are kept butted up against the btree block header. |
2350 | */ | 2337 | */ |
2351 | cur_max = xfs_bmbt_maxrecs(mp, ifp->if_broot_bytes, 0); | 2338 | cur_max = xfs_bmbt_maxrecs(mp, ifp->if_broot_bytes, 0); |
2352 | new_max = cur_max + rec_diff; | 2339 | new_max = cur_max + rec_diff; |
2353 | new_size = (size_t)XFS_BMAP_BROOT_SPACE_CALC(new_max); | 2340 | new_size = (size_t)XFS_BMAP_BROOT_SPACE_CALC(new_max); |
2354 | ifp->if_broot = kmem_realloc(ifp->if_broot, new_size, | 2341 | ifp->if_broot = kmem_realloc(ifp->if_broot, new_size, |
2355 | (size_t)XFS_BMAP_BROOT_SPACE_CALC(cur_max), /* old size */ | 2342 | (size_t)XFS_BMAP_BROOT_SPACE_CALC(cur_max), /* old size */ |
2356 | KM_SLEEP); | 2343 | KM_SLEEP); |
2357 | op = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1, | 2344 | op = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1, |
2358 | ifp->if_broot_bytes); | 2345 | ifp->if_broot_bytes); |
2359 | np = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1, | 2346 | np = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1, |
2360 | (int)new_size); | 2347 | (int)new_size); |
2361 | ifp->if_broot_bytes = (int)new_size; | 2348 | ifp->if_broot_bytes = (int)new_size; |
2362 | ASSERT(ifp->if_broot_bytes <= | 2349 | ASSERT(ifp->if_broot_bytes <= |
2363 | XFS_IFORK_SIZE(ip, whichfork) + XFS_BROOT_SIZE_ADJ); | 2350 | XFS_IFORK_SIZE(ip, whichfork) + XFS_BROOT_SIZE_ADJ); |
2364 | memmove(np, op, cur_max * (uint)sizeof(xfs_dfsbno_t)); | 2351 | memmove(np, op, cur_max * (uint)sizeof(xfs_dfsbno_t)); |
2365 | return; | 2352 | return; |
2366 | } | 2353 | } |
2367 | 2354 | ||
2368 | /* | 2355 | /* |
2369 | * rec_diff is less than 0. In this case, we are shrinking the | 2356 | * rec_diff is less than 0. In this case, we are shrinking the |
2370 | * if_broot buffer. It must already exist. If we go to zero | 2357 | * if_broot buffer. It must already exist. If we go to zero |
2371 | * records, just get rid of the root and clear the status bit. | 2358 | * records, just get rid of the root and clear the status bit. |
2372 | */ | 2359 | */ |
2373 | ASSERT((ifp->if_broot != NULL) && (ifp->if_broot_bytes > 0)); | 2360 | ASSERT((ifp->if_broot != NULL) && (ifp->if_broot_bytes > 0)); |
2374 | cur_max = xfs_bmbt_maxrecs(mp, ifp->if_broot_bytes, 0); | 2361 | cur_max = xfs_bmbt_maxrecs(mp, ifp->if_broot_bytes, 0); |
2375 | new_max = cur_max + rec_diff; | 2362 | new_max = cur_max + rec_diff; |
2376 | ASSERT(new_max >= 0); | 2363 | ASSERT(new_max >= 0); |
2377 | if (new_max > 0) | 2364 | if (new_max > 0) |
2378 | new_size = (size_t)XFS_BMAP_BROOT_SPACE_CALC(new_max); | 2365 | new_size = (size_t)XFS_BMAP_BROOT_SPACE_CALC(new_max); |
2379 | else | 2366 | else |
2380 | new_size = 0; | 2367 | new_size = 0; |
2381 | if (new_size > 0) { | 2368 | if (new_size > 0) { |
2382 | new_broot = kmem_alloc(new_size, KM_SLEEP); | 2369 | new_broot = kmem_alloc(new_size, KM_SLEEP); |
2383 | /* | 2370 | /* |
2384 | * First copy over the btree block header. | 2371 | * First copy over the btree block header. |
2385 | */ | 2372 | */ |
2386 | memcpy(new_broot, ifp->if_broot, XFS_BTREE_LBLOCK_LEN); | 2373 | memcpy(new_broot, ifp->if_broot, XFS_BTREE_LBLOCK_LEN); |
2387 | } else { | 2374 | } else { |
2388 | new_broot = NULL; | 2375 | new_broot = NULL; |
2389 | ifp->if_flags &= ~XFS_IFBROOT; | 2376 | ifp->if_flags &= ~XFS_IFBROOT; |
2390 | } | 2377 | } |
2391 | 2378 | ||
2392 | /* | 2379 | /* |
2393 | * Only copy the records and pointers if there are any. | 2380 | * Only copy the records and pointers if there are any. |
2394 | */ | 2381 | */ |
2395 | if (new_max > 0) { | 2382 | if (new_max > 0) { |
2396 | /* | 2383 | /* |
2397 | * First copy the records. | 2384 | * First copy the records. |
2398 | */ | 2385 | */ |
2399 | op = (char *)XFS_BMBT_REC_ADDR(mp, ifp->if_broot, 1); | 2386 | op = (char *)XFS_BMBT_REC_ADDR(mp, ifp->if_broot, 1); |
2400 | np = (char *)XFS_BMBT_REC_ADDR(mp, new_broot, 1); | 2387 | np = (char *)XFS_BMBT_REC_ADDR(mp, new_broot, 1); |
2401 | memcpy(np, op, new_max * (uint)sizeof(xfs_bmbt_rec_t)); | 2388 | memcpy(np, op, new_max * (uint)sizeof(xfs_bmbt_rec_t)); |
2402 | 2389 | ||
2403 | /* | 2390 | /* |
2404 | * Then copy the pointers. | 2391 | * Then copy the pointers. |
2405 | */ | 2392 | */ |
2406 | op = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1, | 2393 | op = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1, |
2407 | ifp->if_broot_bytes); | 2394 | ifp->if_broot_bytes); |
2408 | np = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, new_broot, 1, | 2395 | np = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, new_broot, 1, |
2409 | (int)new_size); | 2396 | (int)new_size); |
2410 | memcpy(np, op, new_max * (uint)sizeof(xfs_dfsbno_t)); | 2397 | memcpy(np, op, new_max * (uint)sizeof(xfs_dfsbno_t)); |
2411 | } | 2398 | } |
2412 | kmem_free(ifp->if_broot); | 2399 | kmem_free(ifp->if_broot); |
2413 | ifp->if_broot = new_broot; | 2400 | ifp->if_broot = new_broot; |
2414 | ifp->if_broot_bytes = (int)new_size; | 2401 | ifp->if_broot_bytes = (int)new_size; |
2415 | ASSERT(ifp->if_broot_bytes <= | 2402 | ASSERT(ifp->if_broot_bytes <= |
2416 | XFS_IFORK_SIZE(ip, whichfork) + XFS_BROOT_SIZE_ADJ); | 2403 | XFS_IFORK_SIZE(ip, whichfork) + XFS_BROOT_SIZE_ADJ); |
2417 | return; | 2404 | return; |
2418 | } | 2405 | } |
2419 | 2406 | ||
2420 | 2407 | ||
2421 | /* | 2408 | /* |
2422 | * This is called when the amount of space needed for if_data | 2409 | * This is called when the amount of space needed for if_data |
2423 | * is increased or decreased. The change in size is indicated by | 2410 | * is increased or decreased. The change in size is indicated by |
2424 | * the number of bytes that need to be added or deleted in the | 2411 | * the number of bytes that need to be added or deleted in the |
2425 | * byte_diff parameter. | 2412 | * byte_diff parameter. |
2426 | * | 2413 | * |
2427 | * If the amount of space needed has decreased below the size of the | 2414 | * If the amount of space needed has decreased below the size of the |
2428 | * inline buffer, then switch to using the inline buffer. Otherwise, | 2415 | * inline buffer, then switch to using the inline buffer. Otherwise, |
2429 | * use kmem_realloc() or kmem_alloc() to adjust the size of the buffer | 2416 | * use kmem_realloc() or kmem_alloc() to adjust the size of the buffer |
2430 | * to what is needed. | 2417 | * to what is needed. |
2431 | * | 2418 | * |
2432 | * ip -- the inode whose if_data area is changing | 2419 | * ip -- the inode whose if_data area is changing |
2433 | * byte_diff -- the change in the number of bytes, positive or negative, | 2420 | * byte_diff -- the change in the number of bytes, positive or negative, |
2434 | * requested for the if_data array. | 2421 | * requested for the if_data array. |
2435 | */ | 2422 | */ |
2436 | void | 2423 | void |
2437 | xfs_idata_realloc( | 2424 | xfs_idata_realloc( |
2438 | xfs_inode_t *ip, | 2425 | xfs_inode_t *ip, |
2439 | int byte_diff, | 2426 | int byte_diff, |
2440 | int whichfork) | 2427 | int whichfork) |
2441 | { | 2428 | { |
2442 | xfs_ifork_t *ifp; | 2429 | xfs_ifork_t *ifp; |
2443 | int new_size; | 2430 | int new_size; |
2444 | int real_size; | 2431 | int real_size; |
2445 | 2432 | ||
2446 | if (byte_diff == 0) { | 2433 | if (byte_diff == 0) { |
2447 | return; | 2434 | return; |
2448 | } | 2435 | } |
2449 | 2436 | ||
2450 | ifp = XFS_IFORK_PTR(ip, whichfork); | 2437 | ifp = XFS_IFORK_PTR(ip, whichfork); |
2451 | new_size = (int)ifp->if_bytes + byte_diff; | 2438 | new_size = (int)ifp->if_bytes + byte_diff; |
2452 | ASSERT(new_size >= 0); | 2439 | ASSERT(new_size >= 0); |
2453 | 2440 | ||
2454 | if (new_size == 0) { | 2441 | if (new_size == 0) { |
2455 | if (ifp->if_u1.if_data != ifp->if_u2.if_inline_data) { | 2442 | if (ifp->if_u1.if_data != ifp->if_u2.if_inline_data) { |
2456 | kmem_free(ifp->if_u1.if_data); | 2443 | kmem_free(ifp->if_u1.if_data); |
2457 | } | 2444 | } |
2458 | ifp->if_u1.if_data = NULL; | 2445 | ifp->if_u1.if_data = NULL; |
2459 | real_size = 0; | 2446 | real_size = 0; |
2460 | } else if (new_size <= sizeof(ifp->if_u2.if_inline_data)) { | 2447 | } else if (new_size <= sizeof(ifp->if_u2.if_inline_data)) { |
2461 | /* | 2448 | /* |
2462 | * If the valid extents/data can fit in if_inline_ext/data, | 2449 | * If the valid extents/data can fit in if_inline_ext/data, |
2463 | * copy them from the malloc'd vector and free it. | 2450 | * copy them from the malloc'd vector and free it. |
2464 | */ | 2451 | */ |
2465 | if (ifp->if_u1.if_data == NULL) { | 2452 | if (ifp->if_u1.if_data == NULL) { |
2466 | ifp->if_u1.if_data = ifp->if_u2.if_inline_data; | 2453 | ifp->if_u1.if_data = ifp->if_u2.if_inline_data; |
2467 | } else if (ifp->if_u1.if_data != ifp->if_u2.if_inline_data) { | 2454 | } else if (ifp->if_u1.if_data != ifp->if_u2.if_inline_data) { |
2468 | ASSERT(ifp->if_real_bytes != 0); | 2455 | ASSERT(ifp->if_real_bytes != 0); |
2469 | memcpy(ifp->if_u2.if_inline_data, ifp->if_u1.if_data, | 2456 | memcpy(ifp->if_u2.if_inline_data, ifp->if_u1.if_data, |
2470 | new_size); | 2457 | new_size); |
2471 | kmem_free(ifp->if_u1.if_data); | 2458 | kmem_free(ifp->if_u1.if_data); |
2472 | ifp->if_u1.if_data = ifp->if_u2.if_inline_data; | 2459 | ifp->if_u1.if_data = ifp->if_u2.if_inline_data; |
2473 | } | 2460 | } |
2474 | real_size = 0; | 2461 | real_size = 0; |
2475 | } else { | 2462 | } else { |
2476 | /* | 2463 | /* |
2477 | * Stuck with malloc/realloc. | 2464 | * Stuck with malloc/realloc. |
2478 | * For inline data, the underlying buffer must be | 2465 | * For inline data, the underlying buffer must be |
2479 | * a multiple of 4 bytes in size so that it can be | 2466 | * a multiple of 4 bytes in size so that it can be |
2480 | * logged and stay on word boundaries. We enforce | 2467 | * logged and stay on word boundaries. We enforce |
2481 | * that here. | 2468 | * that here. |
2482 | */ | 2469 | */ |
2483 | real_size = roundup(new_size, 4); | 2470 | real_size = roundup(new_size, 4); |
2484 | if (ifp->if_u1.if_data == NULL) { | 2471 | if (ifp->if_u1.if_data == NULL) { |
2485 | ASSERT(ifp->if_real_bytes == 0); | 2472 | ASSERT(ifp->if_real_bytes == 0); |
2486 | ifp->if_u1.if_data = kmem_alloc(real_size, KM_SLEEP); | 2473 | ifp->if_u1.if_data = kmem_alloc(real_size, KM_SLEEP); |
2487 | } else if (ifp->if_u1.if_data != ifp->if_u2.if_inline_data) { | 2474 | } else if (ifp->if_u1.if_data != ifp->if_u2.if_inline_data) { |
2488 | /* | 2475 | /* |
2489 | * Only do the realloc if the underlying size | 2476 | * Only do the realloc if the underlying size |
2490 | * is really changing. | 2477 | * is really changing. |
2491 | */ | 2478 | */ |
2492 | if (ifp->if_real_bytes != real_size) { | 2479 | if (ifp->if_real_bytes != real_size) { |
2493 | ifp->if_u1.if_data = | 2480 | ifp->if_u1.if_data = |
2494 | kmem_realloc(ifp->if_u1.if_data, | 2481 | kmem_realloc(ifp->if_u1.if_data, |
2495 | real_size, | 2482 | real_size, |
2496 | ifp->if_real_bytes, | 2483 | ifp->if_real_bytes, |
2497 | KM_SLEEP); | 2484 | KM_SLEEP); |
2498 | } | 2485 | } |
2499 | } else { | 2486 | } else { |
2500 | ASSERT(ifp->if_real_bytes == 0); | 2487 | ASSERT(ifp->if_real_bytes == 0); |
2501 | ifp->if_u1.if_data = kmem_alloc(real_size, KM_SLEEP); | 2488 | ifp->if_u1.if_data = kmem_alloc(real_size, KM_SLEEP); |
2502 | memcpy(ifp->if_u1.if_data, ifp->if_u2.if_inline_data, | 2489 | memcpy(ifp->if_u1.if_data, ifp->if_u2.if_inline_data, |
2503 | ifp->if_bytes); | 2490 | ifp->if_bytes); |
2504 | } | 2491 | } |
2505 | } | 2492 | } |
2506 | ifp->if_real_bytes = real_size; | 2493 | ifp->if_real_bytes = real_size; |
2507 | ifp->if_bytes = new_size; | 2494 | ifp->if_bytes = new_size; |
2508 | ASSERT(ifp->if_bytes <= XFS_IFORK_SIZE(ip, whichfork)); | 2495 | ASSERT(ifp->if_bytes <= XFS_IFORK_SIZE(ip, whichfork)); |
2509 | } | 2496 | } |
2510 | 2497 | ||
2511 | void | 2498 | void |
2512 | xfs_idestroy_fork( | 2499 | xfs_idestroy_fork( |
2513 | xfs_inode_t *ip, | 2500 | xfs_inode_t *ip, |
2514 | int whichfork) | 2501 | int whichfork) |
2515 | { | 2502 | { |
2516 | xfs_ifork_t *ifp; | 2503 | xfs_ifork_t *ifp; |
2517 | 2504 | ||
2518 | ifp = XFS_IFORK_PTR(ip, whichfork); | 2505 | ifp = XFS_IFORK_PTR(ip, whichfork); |
2519 | if (ifp->if_broot != NULL) { | 2506 | if (ifp->if_broot != NULL) { |
2520 | kmem_free(ifp->if_broot); | 2507 | kmem_free(ifp->if_broot); |
2521 | ifp->if_broot = NULL; | 2508 | ifp->if_broot = NULL; |
2522 | } | 2509 | } |
2523 | 2510 | ||
2524 | /* | 2511 | /* |
2525 | * If the format is local, then we can't have an extents | 2512 | * If the format is local, then we can't have an extents |
2526 | * array so just look for an inline data array. If we're | 2513 | * array so just look for an inline data array. If we're |
2527 | * not local then we may or may not have an extents list, | 2514 | * not local then we may or may not have an extents list, |
2528 | * so check and free it up if we do. | 2515 | * so check and free it up if we do. |
2529 | */ | 2516 | */ |
2530 | if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) { | 2517 | if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) { |
2531 | if ((ifp->if_u1.if_data != ifp->if_u2.if_inline_data) && | 2518 | if ((ifp->if_u1.if_data != ifp->if_u2.if_inline_data) && |
2532 | (ifp->if_u1.if_data != NULL)) { | 2519 | (ifp->if_u1.if_data != NULL)) { |
2533 | ASSERT(ifp->if_real_bytes != 0); | 2520 | ASSERT(ifp->if_real_bytes != 0); |
2534 | kmem_free(ifp->if_u1.if_data); | 2521 | kmem_free(ifp->if_u1.if_data); |
2535 | ifp->if_u1.if_data = NULL; | 2522 | ifp->if_u1.if_data = NULL; |
2536 | ifp->if_real_bytes = 0; | 2523 | ifp->if_real_bytes = 0; |
2537 | } | 2524 | } |
2538 | } else if ((ifp->if_flags & XFS_IFEXTENTS) && | 2525 | } else if ((ifp->if_flags & XFS_IFEXTENTS) && |
2539 | ((ifp->if_flags & XFS_IFEXTIREC) || | 2526 | ((ifp->if_flags & XFS_IFEXTIREC) || |
2540 | ((ifp->if_u1.if_extents != NULL) && | 2527 | ((ifp->if_u1.if_extents != NULL) && |
2541 | (ifp->if_u1.if_extents != ifp->if_u2.if_inline_ext)))) { | 2528 | (ifp->if_u1.if_extents != ifp->if_u2.if_inline_ext)))) { |
2542 | ASSERT(ifp->if_real_bytes != 0); | 2529 | ASSERT(ifp->if_real_bytes != 0); |
2543 | xfs_iext_destroy(ifp); | 2530 | xfs_iext_destroy(ifp); |
2544 | } | 2531 | } |
2545 | ASSERT(ifp->if_u1.if_extents == NULL || | 2532 | ASSERT(ifp->if_u1.if_extents == NULL || |
2546 | ifp->if_u1.if_extents == ifp->if_u2.if_inline_ext); | 2533 | ifp->if_u1.if_extents == ifp->if_u2.if_inline_ext); |
2547 | ASSERT(ifp->if_real_bytes == 0); | 2534 | ASSERT(ifp->if_real_bytes == 0); |
2548 | if (whichfork == XFS_ATTR_FORK) { | 2535 | if (whichfork == XFS_ATTR_FORK) { |
2549 | kmem_zone_free(xfs_ifork_zone, ip->i_afp); | 2536 | kmem_zone_free(xfs_ifork_zone, ip->i_afp); |
2550 | ip->i_afp = NULL; | 2537 | ip->i_afp = NULL; |
2551 | } | 2538 | } |
2552 | } | 2539 | } |
2553 | 2540 | ||
2554 | /* | 2541 | /* |
2555 | * This is called free all the memory associated with an inode. | 2542 | * This is called free all the memory associated with an inode. |
2556 | * It must free the inode itself and any buffers allocated for | 2543 | * It must free the inode itself and any buffers allocated for |
2557 | * if_extents/if_data and if_broot. It must also free the lock | 2544 | * if_extents/if_data and if_broot. It must also free the lock |
2558 | * associated with the inode. | 2545 | * associated with the inode. |
2559 | * | 2546 | * |
2560 | * Note: because we don't initialise everything on reallocation out | 2547 | * Note: because we don't initialise everything on reallocation out |
2561 | * of the zone, we must ensure we nullify everything correctly before | 2548 | * of the zone, we must ensure we nullify everything correctly before |
2562 | * freeing the structure. | 2549 | * freeing the structure. |
2563 | */ | 2550 | */ |
2564 | void | 2551 | void |
2565 | xfs_idestroy( | 2552 | xfs_idestroy( |
2566 | xfs_inode_t *ip) | 2553 | xfs_inode_t *ip) |
2567 | { | 2554 | { |
2568 | switch (ip->i_d.di_mode & S_IFMT) { | 2555 | switch (ip->i_d.di_mode & S_IFMT) { |
2569 | case S_IFREG: | 2556 | case S_IFREG: |
2570 | case S_IFDIR: | 2557 | case S_IFDIR: |
2571 | case S_IFLNK: | 2558 | case S_IFLNK: |
2572 | xfs_idestroy_fork(ip, XFS_DATA_FORK); | 2559 | xfs_idestroy_fork(ip, XFS_DATA_FORK); |
2573 | break; | 2560 | break; |
2574 | } | 2561 | } |
2575 | if (ip->i_afp) | 2562 | if (ip->i_afp) |
2576 | xfs_idestroy_fork(ip, XFS_ATTR_FORK); | 2563 | xfs_idestroy_fork(ip, XFS_ATTR_FORK); |
2577 | 2564 | ||
2578 | #ifdef XFS_INODE_TRACE | 2565 | #ifdef XFS_INODE_TRACE |
2579 | ktrace_free(ip->i_trace); | 2566 | ktrace_free(ip->i_trace); |
2580 | #endif | 2567 | #endif |
2581 | #ifdef XFS_BMAP_TRACE | 2568 | #ifdef XFS_BMAP_TRACE |
2582 | ktrace_free(ip->i_xtrace); | 2569 | ktrace_free(ip->i_xtrace); |
2583 | #endif | 2570 | #endif |
2584 | #ifdef XFS_BTREE_TRACE | 2571 | #ifdef XFS_BTREE_TRACE |
2585 | ktrace_free(ip->i_btrace); | 2572 | ktrace_free(ip->i_btrace); |
2586 | #endif | 2573 | #endif |
2587 | #ifdef XFS_RW_TRACE | 2574 | #ifdef XFS_RW_TRACE |
2588 | ktrace_free(ip->i_rwtrace); | 2575 | ktrace_free(ip->i_rwtrace); |
2589 | #endif | 2576 | #endif |
2590 | #ifdef XFS_ILOCK_TRACE | 2577 | #ifdef XFS_ILOCK_TRACE |
2591 | ktrace_free(ip->i_lock_trace); | 2578 | ktrace_free(ip->i_lock_trace); |
2592 | #endif | 2579 | #endif |
2593 | #ifdef XFS_DIR2_TRACE | 2580 | #ifdef XFS_DIR2_TRACE |
2594 | ktrace_free(ip->i_dir_trace); | 2581 | ktrace_free(ip->i_dir_trace); |
2595 | #endif | 2582 | #endif |
2596 | if (ip->i_itemp) { | 2583 | if (ip->i_itemp) { |
2597 | /* | 2584 | /* |
2598 | * Only if we are shutting down the fs will we see an | 2585 | * Only if we are shutting down the fs will we see an |
2599 | * inode still in the AIL. If it is there, we should remove | 2586 | * inode still in the AIL. If it is there, we should remove |
2600 | * it to prevent a use-after-free from occurring. | 2587 | * it to prevent a use-after-free from occurring. |
2601 | */ | 2588 | */ |
2602 | xfs_log_item_t *lip = &ip->i_itemp->ili_item; | 2589 | xfs_log_item_t *lip = &ip->i_itemp->ili_item; |
2603 | struct xfs_ail *ailp = lip->li_ailp; | 2590 | struct xfs_ail *ailp = lip->li_ailp; |
2604 | 2591 | ||
2605 | ASSERT(((lip->li_flags & XFS_LI_IN_AIL) == 0) || | 2592 | ASSERT(((lip->li_flags & XFS_LI_IN_AIL) == 0) || |
2606 | XFS_FORCED_SHUTDOWN(ip->i_mount)); | 2593 | XFS_FORCED_SHUTDOWN(ip->i_mount)); |
2607 | if (lip->li_flags & XFS_LI_IN_AIL) { | 2594 | if (lip->li_flags & XFS_LI_IN_AIL) { |
2608 | spin_lock(&ailp->xa_lock); | 2595 | spin_lock(&ailp->xa_lock); |
2609 | if (lip->li_flags & XFS_LI_IN_AIL) | 2596 | if (lip->li_flags & XFS_LI_IN_AIL) |
2610 | xfs_trans_ail_delete(ailp, lip); | 2597 | xfs_trans_ail_delete(ailp, lip); |
2611 | else | 2598 | else |
2612 | spin_unlock(&ailp->xa_lock); | 2599 | spin_unlock(&ailp->xa_lock); |
2613 | } | 2600 | } |
2614 | xfs_inode_item_destroy(ip); | 2601 | xfs_inode_item_destroy(ip); |
2615 | ip->i_itemp = NULL; | 2602 | ip->i_itemp = NULL; |
2616 | } | 2603 | } |
2617 | /* asserts to verify all state is correct here */ | 2604 | /* asserts to verify all state is correct here */ |
2618 | ASSERT(atomic_read(&ip->i_iocount) == 0); | 2605 | ASSERT(atomic_read(&ip->i_iocount) == 0); |
2619 | ASSERT(atomic_read(&ip->i_pincount) == 0); | 2606 | ASSERT(atomic_read(&ip->i_pincount) == 0); |
2620 | ASSERT(!spin_is_locked(&ip->i_flags_lock)); | 2607 | ASSERT(!spin_is_locked(&ip->i_flags_lock)); |
2621 | ASSERT(completion_done(&ip->i_flush)); | 2608 | ASSERT(completion_done(&ip->i_flush)); |
2622 | kmem_zone_free(xfs_inode_zone, ip); | 2609 | kmem_zone_free(xfs_inode_zone, ip); |
2623 | } | 2610 | } |
2624 | 2611 | ||
2625 | 2612 | ||
2626 | /* | 2613 | /* |
2627 | * Increment the pin count of the given buffer. | 2614 | * Increment the pin count of the given buffer. |
2628 | * This value is protected by ipinlock spinlock in the mount structure. | 2615 | * This value is protected by ipinlock spinlock in the mount structure. |
2629 | */ | 2616 | */ |
2630 | void | 2617 | void |
2631 | xfs_ipin( | 2618 | xfs_ipin( |
2632 | xfs_inode_t *ip) | 2619 | xfs_inode_t *ip) |
2633 | { | 2620 | { |
2634 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); | 2621 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); |
2635 | 2622 | ||
2636 | atomic_inc(&ip->i_pincount); | 2623 | atomic_inc(&ip->i_pincount); |
2637 | } | 2624 | } |
2638 | 2625 | ||
2639 | /* | 2626 | /* |
2640 | * Decrement the pin count of the given inode, and wake up | 2627 | * Decrement the pin count of the given inode, and wake up |
2641 | * anyone in xfs_iwait_unpin() if the count goes to 0. The | 2628 | * anyone in xfs_iwait_unpin() if the count goes to 0. The |
2642 | * inode must have been previously pinned with a call to xfs_ipin(). | 2629 | * inode must have been previously pinned with a call to xfs_ipin(). |
2643 | */ | 2630 | */ |
2644 | void | 2631 | void |
2645 | xfs_iunpin( | 2632 | xfs_iunpin( |
2646 | xfs_inode_t *ip) | 2633 | xfs_inode_t *ip) |
2647 | { | 2634 | { |
2648 | ASSERT(atomic_read(&ip->i_pincount) > 0); | 2635 | ASSERT(atomic_read(&ip->i_pincount) > 0); |
2649 | 2636 | ||
2650 | if (atomic_dec_and_test(&ip->i_pincount)) | 2637 | if (atomic_dec_and_test(&ip->i_pincount)) |
2651 | wake_up(&ip->i_ipin_wait); | 2638 | wake_up(&ip->i_ipin_wait); |
2652 | } | 2639 | } |
2653 | 2640 | ||
2654 | /* | 2641 | /* |
2655 | * This is called to unpin an inode. It can be directed to wait or to return | 2642 | * This is called to unpin an inode. It can be directed to wait or to return |
2656 | * immediately without waiting for the inode to be unpinned. The caller must | 2643 | * immediately without waiting for the inode to be unpinned. The caller must |
2657 | * have the inode locked in at least shared mode so that the buffer cannot be | 2644 | * have the inode locked in at least shared mode so that the buffer cannot be |
2658 | * subsequently pinned once someone is waiting for it to be unpinned. | 2645 | * subsequently pinned once someone is waiting for it to be unpinned. |
2659 | */ | 2646 | */ |
2660 | STATIC void | 2647 | STATIC void |
2661 | __xfs_iunpin_wait( | 2648 | __xfs_iunpin_wait( |
2662 | xfs_inode_t *ip, | 2649 | xfs_inode_t *ip, |
2663 | int wait) | 2650 | int wait) |
2664 | { | 2651 | { |
2665 | xfs_inode_log_item_t *iip = ip->i_itemp; | 2652 | xfs_inode_log_item_t *iip = ip->i_itemp; |
2666 | 2653 | ||
2667 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)); | 2654 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)); |
2668 | if (atomic_read(&ip->i_pincount) == 0) | 2655 | if (atomic_read(&ip->i_pincount) == 0) |
2669 | return; | 2656 | return; |
2670 | 2657 | ||
2671 | /* Give the log a push to start the unpinning I/O */ | 2658 | /* Give the log a push to start the unpinning I/O */ |
2672 | xfs_log_force(ip->i_mount, (iip && iip->ili_last_lsn) ? | 2659 | xfs_log_force(ip->i_mount, (iip && iip->ili_last_lsn) ? |
2673 | iip->ili_last_lsn : 0, XFS_LOG_FORCE); | 2660 | iip->ili_last_lsn : 0, XFS_LOG_FORCE); |
2674 | if (wait) | 2661 | if (wait) |
2675 | wait_event(ip->i_ipin_wait, (atomic_read(&ip->i_pincount) == 0)); | 2662 | wait_event(ip->i_ipin_wait, (atomic_read(&ip->i_pincount) == 0)); |
2676 | } | 2663 | } |
2677 | 2664 | ||
2678 | static inline void | 2665 | static inline void |
2679 | xfs_iunpin_wait( | 2666 | xfs_iunpin_wait( |
2680 | xfs_inode_t *ip) | 2667 | xfs_inode_t *ip) |
2681 | { | 2668 | { |
2682 | __xfs_iunpin_wait(ip, 1); | 2669 | __xfs_iunpin_wait(ip, 1); |
2683 | } | 2670 | } |
2684 | 2671 | ||
2685 | static inline void | 2672 | static inline void |
2686 | xfs_iunpin_nowait( | 2673 | xfs_iunpin_nowait( |
2687 | xfs_inode_t *ip) | 2674 | xfs_inode_t *ip) |
2688 | { | 2675 | { |
2689 | __xfs_iunpin_wait(ip, 0); | 2676 | __xfs_iunpin_wait(ip, 0); |
2690 | } | 2677 | } |
2691 | 2678 | ||
2692 | 2679 | ||
2693 | /* | 2680 | /* |
2694 | * xfs_iextents_copy() | 2681 | * xfs_iextents_copy() |
2695 | * | 2682 | * |
2696 | * This is called to copy the REAL extents (as opposed to the delayed | 2683 | * This is called to copy the REAL extents (as opposed to the delayed |
2697 | * allocation extents) from the inode into the given buffer. It | 2684 | * allocation extents) from the inode into the given buffer. It |
2698 | * returns the number of bytes copied into the buffer. | 2685 | * returns the number of bytes copied into the buffer. |
2699 | * | 2686 | * |
2700 | * If there are no delayed allocation extents, then we can just | 2687 | * If there are no delayed allocation extents, then we can just |
2701 | * memcpy() the extents into the buffer. Otherwise, we need to | 2688 | * memcpy() the extents into the buffer. Otherwise, we need to |
2702 | * examine each extent in turn and skip those which are delayed. | 2689 | * examine each extent in turn and skip those which are delayed. |
2703 | */ | 2690 | */ |
2704 | int | 2691 | int |
2705 | xfs_iextents_copy( | 2692 | xfs_iextents_copy( |
2706 | xfs_inode_t *ip, | 2693 | xfs_inode_t *ip, |
2707 | xfs_bmbt_rec_t *dp, | 2694 | xfs_bmbt_rec_t *dp, |
2708 | int whichfork) | 2695 | int whichfork) |
2709 | { | 2696 | { |
2710 | int copied; | 2697 | int copied; |
2711 | int i; | 2698 | int i; |
2712 | xfs_ifork_t *ifp; | 2699 | xfs_ifork_t *ifp; |
2713 | int nrecs; | 2700 | int nrecs; |
2714 | xfs_fsblock_t start_block; | 2701 | xfs_fsblock_t start_block; |
2715 | 2702 | ||
2716 | ifp = XFS_IFORK_PTR(ip, whichfork); | 2703 | ifp = XFS_IFORK_PTR(ip, whichfork); |
2717 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)); | 2704 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)); |
2718 | ASSERT(ifp->if_bytes > 0); | 2705 | ASSERT(ifp->if_bytes > 0); |
2719 | 2706 | ||
2720 | nrecs = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); | 2707 | nrecs = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); |
2721 | XFS_BMAP_TRACE_EXLIST(ip, nrecs, whichfork); | 2708 | XFS_BMAP_TRACE_EXLIST(ip, nrecs, whichfork); |
2722 | ASSERT(nrecs > 0); | 2709 | ASSERT(nrecs > 0); |
2723 | 2710 | ||
2724 | /* | 2711 | /* |
2725 | * There are some delayed allocation extents in the | 2712 | * There are some delayed allocation extents in the |
2726 | * inode, so copy the extents one at a time and skip | 2713 | * inode, so copy the extents one at a time and skip |
2727 | * the delayed ones. There must be at least one | 2714 | * the delayed ones. There must be at least one |
2728 | * non-delayed extent. | 2715 | * non-delayed extent. |
2729 | */ | 2716 | */ |
2730 | copied = 0; | 2717 | copied = 0; |
2731 | for (i = 0; i < nrecs; i++) { | 2718 | for (i = 0; i < nrecs; i++) { |
2732 | xfs_bmbt_rec_host_t *ep = xfs_iext_get_ext(ifp, i); | 2719 | xfs_bmbt_rec_host_t *ep = xfs_iext_get_ext(ifp, i); |
2733 | start_block = xfs_bmbt_get_startblock(ep); | 2720 | start_block = xfs_bmbt_get_startblock(ep); |
2734 | if (ISNULLSTARTBLOCK(start_block)) { | 2721 | if (ISNULLSTARTBLOCK(start_block)) { |
2735 | /* | 2722 | /* |
2736 | * It's a delayed allocation extent, so skip it. | 2723 | * It's a delayed allocation extent, so skip it. |
2737 | */ | 2724 | */ |
2738 | continue; | 2725 | continue; |
2739 | } | 2726 | } |
2740 | 2727 | ||
2741 | /* Translate to on disk format */ | 2728 | /* Translate to on disk format */ |
2742 | put_unaligned(cpu_to_be64(ep->l0), &dp->l0); | 2729 | put_unaligned(cpu_to_be64(ep->l0), &dp->l0); |
2743 | put_unaligned(cpu_to_be64(ep->l1), &dp->l1); | 2730 | put_unaligned(cpu_to_be64(ep->l1), &dp->l1); |
2744 | dp++; | 2731 | dp++; |
2745 | copied++; | 2732 | copied++; |
2746 | } | 2733 | } |
2747 | ASSERT(copied != 0); | 2734 | ASSERT(copied != 0); |
2748 | xfs_validate_extents(ifp, copied, XFS_EXTFMT_INODE(ip)); | 2735 | xfs_validate_extents(ifp, copied, XFS_EXTFMT_INODE(ip)); |
2749 | 2736 | ||
2750 | return (copied * (uint)sizeof(xfs_bmbt_rec_t)); | 2737 | return (copied * (uint)sizeof(xfs_bmbt_rec_t)); |
2751 | } | 2738 | } |
2752 | 2739 | ||
2753 | /* | 2740 | /* |
2754 | * Each of the following cases stores data into the same region | 2741 | * Each of the following cases stores data into the same region |
2755 | * of the on-disk inode, so only one of them can be valid at | 2742 | * of the on-disk inode, so only one of them can be valid at |
2756 | * any given time. While it is possible to have conflicting formats | 2743 | * any given time. While it is possible to have conflicting formats |
2757 | * and log flags, e.g. having XFS_ILOG_?DATA set when the fork is | 2744 | * and log flags, e.g. having XFS_ILOG_?DATA set when the fork is |
2758 | * in EXTENTS format, this can only happen when the fork has | 2745 | * in EXTENTS format, this can only happen when the fork has |
2759 | * changed formats after being modified but before being flushed. | 2746 | * changed formats after being modified but before being flushed. |
2760 | * In these cases, the format always takes precedence, because the | 2747 | * In these cases, the format always takes precedence, because the |
2761 | * format indicates the current state of the fork. | 2748 | * format indicates the current state of the fork. |
2762 | */ | 2749 | */ |
2763 | /*ARGSUSED*/ | 2750 | /*ARGSUSED*/ |
2764 | STATIC void | 2751 | STATIC void |
2765 | xfs_iflush_fork( | 2752 | xfs_iflush_fork( |
2766 | xfs_inode_t *ip, | 2753 | xfs_inode_t *ip, |
2767 | xfs_dinode_t *dip, | 2754 | xfs_dinode_t *dip, |
2768 | xfs_inode_log_item_t *iip, | 2755 | xfs_inode_log_item_t *iip, |
2769 | int whichfork, | 2756 | int whichfork, |
2770 | xfs_buf_t *bp) | 2757 | xfs_buf_t *bp) |
2771 | { | 2758 | { |
2772 | char *cp; | 2759 | char *cp; |
2773 | xfs_ifork_t *ifp; | 2760 | xfs_ifork_t *ifp; |
2774 | xfs_mount_t *mp; | 2761 | xfs_mount_t *mp; |
2775 | #ifdef XFS_TRANS_DEBUG | 2762 | #ifdef XFS_TRANS_DEBUG |
2776 | int first; | 2763 | int first; |
2777 | #endif | 2764 | #endif |
2778 | static const short brootflag[2] = | 2765 | static const short brootflag[2] = |
2779 | { XFS_ILOG_DBROOT, XFS_ILOG_ABROOT }; | 2766 | { XFS_ILOG_DBROOT, XFS_ILOG_ABROOT }; |
2780 | static const short dataflag[2] = | 2767 | static const short dataflag[2] = |
2781 | { XFS_ILOG_DDATA, XFS_ILOG_ADATA }; | 2768 | { XFS_ILOG_DDATA, XFS_ILOG_ADATA }; |
2782 | static const short extflag[2] = | 2769 | static const short extflag[2] = |
2783 | { XFS_ILOG_DEXT, XFS_ILOG_AEXT }; | 2770 | { XFS_ILOG_DEXT, XFS_ILOG_AEXT }; |
2784 | 2771 | ||
2785 | if (!iip) | 2772 | if (!iip) |
2786 | return; | 2773 | return; |
2787 | ifp = XFS_IFORK_PTR(ip, whichfork); | 2774 | ifp = XFS_IFORK_PTR(ip, whichfork); |
2788 | /* | 2775 | /* |
2789 | * This can happen if we gave up in iformat in an error path, | 2776 | * This can happen if we gave up in iformat in an error path, |
2790 | * for the attribute fork. | 2777 | * for the attribute fork. |
2791 | */ | 2778 | */ |
2792 | if (!ifp) { | 2779 | if (!ifp) { |
2793 | ASSERT(whichfork == XFS_ATTR_FORK); | 2780 | ASSERT(whichfork == XFS_ATTR_FORK); |
2794 | return; | 2781 | return; |
2795 | } | 2782 | } |
2796 | cp = XFS_DFORK_PTR(dip, whichfork); | 2783 | cp = XFS_DFORK_PTR(dip, whichfork); |
2797 | mp = ip->i_mount; | 2784 | mp = ip->i_mount; |
2798 | switch (XFS_IFORK_FORMAT(ip, whichfork)) { | 2785 | switch (XFS_IFORK_FORMAT(ip, whichfork)) { |
2799 | case XFS_DINODE_FMT_LOCAL: | 2786 | case XFS_DINODE_FMT_LOCAL: |
2800 | if ((iip->ili_format.ilf_fields & dataflag[whichfork]) && | 2787 | if ((iip->ili_format.ilf_fields & dataflag[whichfork]) && |
2801 | (ifp->if_bytes > 0)) { | 2788 | (ifp->if_bytes > 0)) { |
2802 | ASSERT(ifp->if_u1.if_data != NULL); | 2789 | ASSERT(ifp->if_u1.if_data != NULL); |
2803 | ASSERT(ifp->if_bytes <= XFS_IFORK_SIZE(ip, whichfork)); | 2790 | ASSERT(ifp->if_bytes <= XFS_IFORK_SIZE(ip, whichfork)); |
2804 | memcpy(cp, ifp->if_u1.if_data, ifp->if_bytes); | 2791 | memcpy(cp, ifp->if_u1.if_data, ifp->if_bytes); |
2805 | } | 2792 | } |
2806 | break; | 2793 | break; |
2807 | 2794 | ||
2808 | case XFS_DINODE_FMT_EXTENTS: | 2795 | case XFS_DINODE_FMT_EXTENTS: |
2809 | ASSERT((ifp->if_flags & XFS_IFEXTENTS) || | 2796 | ASSERT((ifp->if_flags & XFS_IFEXTENTS) || |
2810 | !(iip->ili_format.ilf_fields & extflag[whichfork])); | 2797 | !(iip->ili_format.ilf_fields & extflag[whichfork])); |
2811 | ASSERT((xfs_iext_get_ext(ifp, 0) != NULL) || | 2798 | ASSERT((xfs_iext_get_ext(ifp, 0) != NULL) || |
2812 | (ifp->if_bytes == 0)); | 2799 | (ifp->if_bytes == 0)); |
2813 | ASSERT((xfs_iext_get_ext(ifp, 0) == NULL) || | 2800 | ASSERT((xfs_iext_get_ext(ifp, 0) == NULL) || |
2814 | (ifp->if_bytes > 0)); | 2801 | (ifp->if_bytes > 0)); |
2815 | if ((iip->ili_format.ilf_fields & extflag[whichfork]) && | 2802 | if ((iip->ili_format.ilf_fields & extflag[whichfork]) && |
2816 | (ifp->if_bytes > 0)) { | 2803 | (ifp->if_bytes > 0)) { |
2817 | ASSERT(XFS_IFORK_NEXTENTS(ip, whichfork) > 0); | 2804 | ASSERT(XFS_IFORK_NEXTENTS(ip, whichfork) > 0); |
2818 | (void)xfs_iextents_copy(ip, (xfs_bmbt_rec_t *)cp, | 2805 | (void)xfs_iextents_copy(ip, (xfs_bmbt_rec_t *)cp, |
2819 | whichfork); | 2806 | whichfork); |
2820 | } | 2807 | } |
2821 | break; | 2808 | break; |
2822 | 2809 | ||
2823 | case XFS_DINODE_FMT_BTREE: | 2810 | case XFS_DINODE_FMT_BTREE: |
2824 | if ((iip->ili_format.ilf_fields & brootflag[whichfork]) && | 2811 | if ((iip->ili_format.ilf_fields & brootflag[whichfork]) && |
2825 | (ifp->if_broot_bytes > 0)) { | 2812 | (ifp->if_broot_bytes > 0)) { |
2826 | ASSERT(ifp->if_broot != NULL); | 2813 | ASSERT(ifp->if_broot != NULL); |
2827 | ASSERT(ifp->if_broot_bytes <= | 2814 | ASSERT(ifp->if_broot_bytes <= |
2828 | (XFS_IFORK_SIZE(ip, whichfork) + | 2815 | (XFS_IFORK_SIZE(ip, whichfork) + |
2829 | XFS_BROOT_SIZE_ADJ)); | 2816 | XFS_BROOT_SIZE_ADJ)); |
2830 | xfs_bmbt_to_bmdr(mp, ifp->if_broot, ifp->if_broot_bytes, | 2817 | xfs_bmbt_to_bmdr(mp, ifp->if_broot, ifp->if_broot_bytes, |
2831 | (xfs_bmdr_block_t *)cp, | 2818 | (xfs_bmdr_block_t *)cp, |
2832 | XFS_DFORK_SIZE(dip, mp, whichfork)); | 2819 | XFS_DFORK_SIZE(dip, mp, whichfork)); |
2833 | } | 2820 | } |
2834 | break; | 2821 | break; |
2835 | 2822 | ||
2836 | case XFS_DINODE_FMT_DEV: | 2823 | case XFS_DINODE_FMT_DEV: |
2837 | if (iip->ili_format.ilf_fields & XFS_ILOG_DEV) { | 2824 | if (iip->ili_format.ilf_fields & XFS_ILOG_DEV) { |
2838 | ASSERT(whichfork == XFS_DATA_FORK); | 2825 | ASSERT(whichfork == XFS_DATA_FORK); |
2839 | xfs_dinode_put_rdev(dip, ip->i_df.if_u2.if_rdev); | 2826 | xfs_dinode_put_rdev(dip, ip->i_df.if_u2.if_rdev); |
2840 | } | 2827 | } |
2841 | break; | 2828 | break; |
2842 | 2829 | ||
2843 | case XFS_DINODE_FMT_UUID: | 2830 | case XFS_DINODE_FMT_UUID: |
2844 | if (iip->ili_format.ilf_fields & XFS_ILOG_UUID) { | 2831 | if (iip->ili_format.ilf_fields & XFS_ILOG_UUID) { |
2845 | ASSERT(whichfork == XFS_DATA_FORK); | 2832 | ASSERT(whichfork == XFS_DATA_FORK); |
2846 | memcpy(XFS_DFORK_DPTR(dip), | 2833 | memcpy(XFS_DFORK_DPTR(dip), |
2847 | &ip->i_df.if_u2.if_uuid, | 2834 | &ip->i_df.if_u2.if_uuid, |
2848 | sizeof(uuid_t)); | 2835 | sizeof(uuid_t)); |
2849 | } | 2836 | } |
2850 | break; | 2837 | break; |
2851 | 2838 | ||
2852 | default: | 2839 | default: |
2853 | ASSERT(0); | 2840 | ASSERT(0); |
2854 | break; | 2841 | break; |
2855 | } | 2842 | } |
2856 | } | 2843 | } |
2857 | 2844 | ||
2858 | STATIC int | 2845 | STATIC int |
2859 | xfs_iflush_cluster( | 2846 | xfs_iflush_cluster( |
2860 | xfs_inode_t *ip, | 2847 | xfs_inode_t *ip, |
2861 | xfs_buf_t *bp) | 2848 | xfs_buf_t *bp) |
2862 | { | 2849 | { |
2863 | xfs_mount_t *mp = ip->i_mount; | 2850 | xfs_mount_t *mp = ip->i_mount; |
2864 | xfs_perag_t *pag = xfs_get_perag(mp, ip->i_ino); | 2851 | xfs_perag_t *pag = xfs_get_perag(mp, ip->i_ino); |
2865 | unsigned long first_index, mask; | 2852 | unsigned long first_index, mask; |
2866 | unsigned long inodes_per_cluster; | 2853 | unsigned long inodes_per_cluster; |
2867 | int ilist_size; | 2854 | int ilist_size; |
2868 | xfs_inode_t **ilist; | 2855 | xfs_inode_t **ilist; |
2869 | xfs_inode_t *iq; | 2856 | xfs_inode_t *iq; |
2870 | int nr_found; | 2857 | int nr_found; |
2871 | int clcount = 0; | 2858 | int clcount = 0; |
2872 | int bufwasdelwri; | 2859 | int bufwasdelwri; |
2873 | int i; | 2860 | int i; |
2874 | 2861 | ||
2875 | ASSERT(pag->pagi_inodeok); | 2862 | ASSERT(pag->pagi_inodeok); |
2876 | ASSERT(pag->pag_ici_init); | 2863 | ASSERT(pag->pag_ici_init); |
2877 | 2864 | ||
2878 | inodes_per_cluster = XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog; | 2865 | inodes_per_cluster = XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog; |
2879 | ilist_size = inodes_per_cluster * sizeof(xfs_inode_t *); | 2866 | ilist_size = inodes_per_cluster * sizeof(xfs_inode_t *); |
2880 | ilist = kmem_alloc(ilist_size, KM_MAYFAIL|KM_NOFS); | 2867 | ilist = kmem_alloc(ilist_size, KM_MAYFAIL|KM_NOFS); |
2881 | if (!ilist) | 2868 | if (!ilist) |
2882 | return 0; | 2869 | return 0; |
2883 | 2870 | ||
2884 | mask = ~(((XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog)) - 1); | 2871 | mask = ~(((XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog)) - 1); |
2885 | first_index = XFS_INO_TO_AGINO(mp, ip->i_ino) & mask; | 2872 | first_index = XFS_INO_TO_AGINO(mp, ip->i_ino) & mask; |
2886 | read_lock(&pag->pag_ici_lock); | 2873 | read_lock(&pag->pag_ici_lock); |
2887 | /* really need a gang lookup range call here */ | 2874 | /* really need a gang lookup range call here */ |
2888 | nr_found = radix_tree_gang_lookup(&pag->pag_ici_root, (void**)ilist, | 2875 | nr_found = radix_tree_gang_lookup(&pag->pag_ici_root, (void**)ilist, |
2889 | first_index, inodes_per_cluster); | 2876 | first_index, inodes_per_cluster); |
2890 | if (nr_found == 0) | 2877 | if (nr_found == 0) |
2891 | goto out_free; | 2878 | goto out_free; |
2892 | 2879 | ||
2893 | for (i = 0; i < nr_found; i++) { | 2880 | for (i = 0; i < nr_found; i++) { |
2894 | iq = ilist[i]; | 2881 | iq = ilist[i]; |
2895 | if (iq == ip) | 2882 | if (iq == ip) |
2896 | continue; | 2883 | continue; |
2897 | /* if the inode lies outside this cluster, we're done. */ | 2884 | /* if the inode lies outside this cluster, we're done. */ |
2898 | if ((XFS_INO_TO_AGINO(mp, iq->i_ino) & mask) != first_index) | 2885 | if ((XFS_INO_TO_AGINO(mp, iq->i_ino) & mask) != first_index) |
2899 | break; | 2886 | break; |
2900 | /* | 2887 | /* |
2901 | * Do an un-protected check to see if the inode is dirty and | 2888 | * Do an un-protected check to see if the inode is dirty and |
2902 | * is a candidate for flushing. These checks will be repeated | 2889 | * is a candidate for flushing. These checks will be repeated |
2903 | * later after the appropriate locks are acquired. | 2890 | * later after the appropriate locks are acquired. |
2904 | */ | 2891 | */ |
2905 | if (xfs_inode_clean(iq) && xfs_ipincount(iq) == 0) | 2892 | if (xfs_inode_clean(iq) && xfs_ipincount(iq) == 0) |
2906 | continue; | 2893 | continue; |
2907 | 2894 | ||
2908 | /* | 2895 | /* |
2909 | * Try to get locks. If any are unavailable or it is pinned, | 2896 | * Try to get locks. If any are unavailable or it is pinned, |
2910 | * then this inode cannot be flushed and is skipped. | 2897 | * then this inode cannot be flushed and is skipped. |
2911 | */ | 2898 | */ |
2912 | 2899 | ||
2913 | if (!xfs_ilock_nowait(iq, XFS_ILOCK_SHARED)) | 2900 | if (!xfs_ilock_nowait(iq, XFS_ILOCK_SHARED)) |
2914 | continue; | 2901 | continue; |
2915 | if (!xfs_iflock_nowait(iq)) { | 2902 | if (!xfs_iflock_nowait(iq)) { |
2916 | xfs_iunlock(iq, XFS_ILOCK_SHARED); | 2903 | xfs_iunlock(iq, XFS_ILOCK_SHARED); |
2917 | continue; | 2904 | continue; |
2918 | } | 2905 | } |
2919 | if (xfs_ipincount(iq)) { | 2906 | if (xfs_ipincount(iq)) { |
2920 | xfs_ifunlock(iq); | 2907 | xfs_ifunlock(iq); |
2921 | xfs_iunlock(iq, XFS_ILOCK_SHARED); | 2908 | xfs_iunlock(iq, XFS_ILOCK_SHARED); |
2922 | continue; | 2909 | continue; |
2923 | } | 2910 | } |
2924 | 2911 | ||
2925 | /* | 2912 | /* |
2926 | * arriving here means that this inode can be flushed. First | 2913 | * arriving here means that this inode can be flushed. First |
2927 | * re-check that it's dirty before flushing. | 2914 | * re-check that it's dirty before flushing. |
2928 | */ | 2915 | */ |
2929 | if (!xfs_inode_clean(iq)) { | 2916 | if (!xfs_inode_clean(iq)) { |
2930 | int error; | 2917 | int error; |
2931 | error = xfs_iflush_int(iq, bp); | 2918 | error = xfs_iflush_int(iq, bp); |
2932 | if (error) { | 2919 | if (error) { |
2933 | xfs_iunlock(iq, XFS_ILOCK_SHARED); | 2920 | xfs_iunlock(iq, XFS_ILOCK_SHARED); |
2934 | goto cluster_corrupt_out; | 2921 | goto cluster_corrupt_out; |
2935 | } | 2922 | } |
2936 | clcount++; | 2923 | clcount++; |
2937 | } else { | 2924 | } else { |
2938 | xfs_ifunlock(iq); | 2925 | xfs_ifunlock(iq); |
2939 | } | 2926 | } |
2940 | xfs_iunlock(iq, XFS_ILOCK_SHARED); | 2927 | xfs_iunlock(iq, XFS_ILOCK_SHARED); |
2941 | } | 2928 | } |
2942 | 2929 | ||
2943 | if (clcount) { | 2930 | if (clcount) { |
2944 | XFS_STATS_INC(xs_icluster_flushcnt); | 2931 | XFS_STATS_INC(xs_icluster_flushcnt); |
2945 | XFS_STATS_ADD(xs_icluster_flushinode, clcount); | 2932 | XFS_STATS_ADD(xs_icluster_flushinode, clcount); |
2946 | } | 2933 | } |
2947 | 2934 | ||
2948 | out_free: | 2935 | out_free: |
2949 | read_unlock(&pag->pag_ici_lock); | 2936 | read_unlock(&pag->pag_ici_lock); |
2950 | kmem_free(ilist); | 2937 | kmem_free(ilist); |
2951 | return 0; | 2938 | return 0; |
2952 | 2939 | ||
2953 | 2940 | ||
2954 | cluster_corrupt_out: | 2941 | cluster_corrupt_out: |
2955 | /* | 2942 | /* |
2956 | * Corruption detected in the clustering loop. Invalidate the | 2943 | * Corruption detected in the clustering loop. Invalidate the |
2957 | * inode buffer and shut down the filesystem. | 2944 | * inode buffer and shut down the filesystem. |
2958 | */ | 2945 | */ |
2959 | read_unlock(&pag->pag_ici_lock); | 2946 | read_unlock(&pag->pag_ici_lock); |
2960 | /* | 2947 | /* |
2961 | * Clean up the buffer. If it was B_DELWRI, just release it -- | 2948 | * Clean up the buffer. If it was B_DELWRI, just release it -- |
2962 | * brelse can handle it with no problems. If not, shut down the | 2949 | * brelse can handle it with no problems. If not, shut down the |
2963 | * filesystem before releasing the buffer. | 2950 | * filesystem before releasing the buffer. |
2964 | */ | 2951 | */ |
2965 | bufwasdelwri = XFS_BUF_ISDELAYWRITE(bp); | 2952 | bufwasdelwri = XFS_BUF_ISDELAYWRITE(bp); |
2966 | if (bufwasdelwri) | 2953 | if (bufwasdelwri) |
2967 | xfs_buf_relse(bp); | 2954 | xfs_buf_relse(bp); |
2968 | 2955 | ||
2969 | xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); | 2956 | xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); |
2970 | 2957 | ||
2971 | if (!bufwasdelwri) { | 2958 | if (!bufwasdelwri) { |
2972 | /* | 2959 | /* |
2973 | * Just like incore_relse: if we have b_iodone functions, | 2960 | * Just like incore_relse: if we have b_iodone functions, |
2974 | * mark the buffer as an error and call them. Otherwise | 2961 | * mark the buffer as an error and call them. Otherwise |
2975 | * mark it as stale and brelse. | 2962 | * mark it as stale and brelse. |
2976 | */ | 2963 | */ |
2977 | if (XFS_BUF_IODONE_FUNC(bp)) { | 2964 | if (XFS_BUF_IODONE_FUNC(bp)) { |
2978 | XFS_BUF_CLR_BDSTRAT_FUNC(bp); | 2965 | XFS_BUF_CLR_BDSTRAT_FUNC(bp); |
2979 | XFS_BUF_UNDONE(bp); | 2966 | XFS_BUF_UNDONE(bp); |
2980 | XFS_BUF_STALE(bp); | 2967 | XFS_BUF_STALE(bp); |
2981 | XFS_BUF_SHUT(bp); | 2968 | XFS_BUF_SHUT(bp); |
2982 | XFS_BUF_ERROR(bp,EIO); | 2969 | XFS_BUF_ERROR(bp,EIO); |
2983 | xfs_biodone(bp); | 2970 | xfs_biodone(bp); |
2984 | } else { | 2971 | } else { |
2985 | XFS_BUF_STALE(bp); | 2972 | XFS_BUF_STALE(bp); |
2986 | xfs_buf_relse(bp); | 2973 | xfs_buf_relse(bp); |
2987 | } | 2974 | } |
2988 | } | 2975 | } |
2989 | 2976 | ||
2990 | /* | 2977 | /* |
2991 | * Unlocks the flush lock | 2978 | * Unlocks the flush lock |
2992 | */ | 2979 | */ |
2993 | xfs_iflush_abort(iq); | 2980 | xfs_iflush_abort(iq); |
2994 | kmem_free(ilist); | 2981 | kmem_free(ilist); |
2995 | return XFS_ERROR(EFSCORRUPTED); | 2982 | return XFS_ERROR(EFSCORRUPTED); |
2996 | } | 2983 | } |
2997 | 2984 | ||
2998 | /* | 2985 | /* |
2999 | * xfs_iflush() will write a modified inode's changes out to the | 2986 | * xfs_iflush() will write a modified inode's changes out to the |
3000 | * inode's on disk home. The caller must have the inode lock held | 2987 | * inode's on disk home. The caller must have the inode lock held |
3001 | * in at least shared mode and the inode flush completion must be | 2988 | * in at least shared mode and the inode flush completion must be |
3002 | * active as well. The inode lock will still be held upon return from | 2989 | * active as well. The inode lock will still be held upon return from |
3003 | * the call and the caller is free to unlock it. | 2990 | * the call and the caller is free to unlock it. |
3004 | * The inode flush will be completed when the inode reaches the disk. | 2991 | * The inode flush will be completed when the inode reaches the disk. |
3005 | * The flags indicate how the inode's buffer should be written out. | 2992 | * The flags indicate how the inode's buffer should be written out. |
3006 | */ | 2993 | */ |
3007 | int | 2994 | int |
3008 | xfs_iflush( | 2995 | xfs_iflush( |
3009 | xfs_inode_t *ip, | 2996 | xfs_inode_t *ip, |
3010 | uint flags) | 2997 | uint flags) |
3011 | { | 2998 | { |
3012 | xfs_inode_log_item_t *iip; | 2999 | xfs_inode_log_item_t *iip; |
3013 | xfs_buf_t *bp; | 3000 | xfs_buf_t *bp; |
3014 | xfs_dinode_t *dip; | 3001 | xfs_dinode_t *dip; |
3015 | xfs_mount_t *mp; | 3002 | xfs_mount_t *mp; |
3016 | int error; | 3003 | int error; |
3017 | int noblock = (flags == XFS_IFLUSH_ASYNC_NOBLOCK); | 3004 | int noblock = (flags == XFS_IFLUSH_ASYNC_NOBLOCK); |
3018 | enum { INT_DELWRI = (1 << 0), INT_ASYNC = (1 << 1) }; | 3005 | enum { INT_DELWRI = (1 << 0), INT_ASYNC = (1 << 1) }; |
3019 | 3006 | ||
3020 | XFS_STATS_INC(xs_iflush_count); | 3007 | XFS_STATS_INC(xs_iflush_count); |
3021 | 3008 | ||
3022 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)); | 3009 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)); |
3023 | ASSERT(!completion_done(&ip->i_flush)); | 3010 | ASSERT(!completion_done(&ip->i_flush)); |
3024 | ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE || | 3011 | ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE || |
3025 | ip->i_d.di_nextents > ip->i_df.if_ext_max); | 3012 | ip->i_d.di_nextents > ip->i_df.if_ext_max); |
3026 | 3013 | ||
3027 | iip = ip->i_itemp; | 3014 | iip = ip->i_itemp; |
3028 | mp = ip->i_mount; | 3015 | mp = ip->i_mount; |
3029 | 3016 | ||
3030 | /* | 3017 | /* |
3031 | * If the inode isn't dirty, then just release the inode | 3018 | * If the inode isn't dirty, then just release the inode |
3032 | * flush lock and do nothing. | 3019 | * flush lock and do nothing. |
3033 | */ | 3020 | */ |
3034 | if (xfs_inode_clean(ip)) { | 3021 | if (xfs_inode_clean(ip)) { |
3035 | xfs_ifunlock(ip); | 3022 | xfs_ifunlock(ip); |
3036 | return 0; | 3023 | return 0; |
3037 | } | 3024 | } |
3038 | 3025 | ||
3039 | /* | 3026 | /* |
3040 | * We can't flush the inode until it is unpinned, so wait for it if we | 3027 | * We can't flush the inode until it is unpinned, so wait for it if we |
3041 | * are allowed to block. We know noone new can pin it, because we are | 3028 | * are allowed to block. We know noone new can pin it, because we are |
3042 | * holding the inode lock shared and you need to hold it exclusively to | 3029 | * holding the inode lock shared and you need to hold it exclusively to |
3043 | * pin the inode. | 3030 | * pin the inode. |
3044 | * | 3031 | * |
3045 | * If we are not allowed to block, force the log out asynchronously so | 3032 | * If we are not allowed to block, force the log out asynchronously so |
3046 | * that when we come back the inode will be unpinned. If other inodes | 3033 | * that when we come back the inode will be unpinned. If other inodes |
3047 | * in the same cluster are dirty, they will probably write the inode | 3034 | * in the same cluster are dirty, they will probably write the inode |
3048 | * out for us if they occur after the log force completes. | 3035 | * out for us if they occur after the log force completes. |
3049 | */ | 3036 | */ |
3050 | if (noblock && xfs_ipincount(ip)) { | 3037 | if (noblock && xfs_ipincount(ip)) { |
3051 | xfs_iunpin_nowait(ip); | 3038 | xfs_iunpin_nowait(ip); |
3052 | xfs_ifunlock(ip); | 3039 | xfs_ifunlock(ip); |
3053 | return EAGAIN; | 3040 | return EAGAIN; |
3054 | } | 3041 | } |
3055 | xfs_iunpin_wait(ip); | 3042 | xfs_iunpin_wait(ip); |
3056 | 3043 | ||
3057 | /* | 3044 | /* |
3058 | * This may have been unpinned because the filesystem is shutting | 3045 | * This may have been unpinned because the filesystem is shutting |
3059 | * down forcibly. If that's the case we must not write this inode | 3046 | * down forcibly. If that's the case we must not write this inode |
3060 | * to disk, because the log record didn't make it to disk! | 3047 | * to disk, because the log record didn't make it to disk! |
3061 | */ | 3048 | */ |
3062 | if (XFS_FORCED_SHUTDOWN(mp)) { | 3049 | if (XFS_FORCED_SHUTDOWN(mp)) { |
3063 | ip->i_update_core = 0; | 3050 | ip->i_update_core = 0; |
3064 | if (iip) | 3051 | if (iip) |
3065 | iip->ili_format.ilf_fields = 0; | 3052 | iip->ili_format.ilf_fields = 0; |
3066 | xfs_ifunlock(ip); | 3053 | xfs_ifunlock(ip); |
3067 | return XFS_ERROR(EIO); | 3054 | return XFS_ERROR(EIO); |
3068 | } | 3055 | } |
3069 | 3056 | ||
3070 | /* | 3057 | /* |
3071 | * Decide how buffer will be flushed out. This is done before | 3058 | * Decide how buffer will be flushed out. This is done before |
3072 | * the call to xfs_iflush_int because this field is zeroed by it. | 3059 | * the call to xfs_iflush_int because this field is zeroed by it. |
3073 | */ | 3060 | */ |
3074 | if (iip != NULL && iip->ili_format.ilf_fields != 0) { | 3061 | if (iip != NULL && iip->ili_format.ilf_fields != 0) { |
3075 | /* | 3062 | /* |
3076 | * Flush out the inode buffer according to the directions | 3063 | * Flush out the inode buffer according to the directions |
3077 | * of the caller. In the cases where the caller has given | 3064 | * of the caller. In the cases where the caller has given |
3078 | * us a choice choose the non-delwri case. This is because | 3065 | * us a choice choose the non-delwri case. This is because |
3079 | * the inode is in the AIL and we need to get it out soon. | 3066 | * the inode is in the AIL and we need to get it out soon. |
3080 | */ | 3067 | */ |
3081 | switch (flags) { | 3068 | switch (flags) { |
3082 | case XFS_IFLUSH_SYNC: | 3069 | case XFS_IFLUSH_SYNC: |
3083 | case XFS_IFLUSH_DELWRI_ELSE_SYNC: | 3070 | case XFS_IFLUSH_DELWRI_ELSE_SYNC: |
3084 | flags = 0; | 3071 | flags = 0; |
3085 | break; | 3072 | break; |
3086 | case XFS_IFLUSH_ASYNC_NOBLOCK: | 3073 | case XFS_IFLUSH_ASYNC_NOBLOCK: |
3087 | case XFS_IFLUSH_ASYNC: | 3074 | case XFS_IFLUSH_ASYNC: |
3088 | case XFS_IFLUSH_DELWRI_ELSE_ASYNC: | 3075 | case XFS_IFLUSH_DELWRI_ELSE_ASYNC: |
3089 | flags = INT_ASYNC; | 3076 | flags = INT_ASYNC; |
3090 | break; | 3077 | break; |
3091 | case XFS_IFLUSH_DELWRI: | 3078 | case XFS_IFLUSH_DELWRI: |
3092 | flags = INT_DELWRI; | 3079 | flags = INT_DELWRI; |
3093 | break; | 3080 | break; |
3094 | default: | 3081 | default: |
3095 | ASSERT(0); | 3082 | ASSERT(0); |
3096 | flags = 0; | 3083 | flags = 0; |
3097 | break; | 3084 | break; |
3098 | } | 3085 | } |
3099 | } else { | 3086 | } else { |
3100 | switch (flags) { | 3087 | switch (flags) { |
3101 | case XFS_IFLUSH_DELWRI_ELSE_SYNC: | 3088 | case XFS_IFLUSH_DELWRI_ELSE_SYNC: |
3102 | case XFS_IFLUSH_DELWRI_ELSE_ASYNC: | 3089 | case XFS_IFLUSH_DELWRI_ELSE_ASYNC: |
3103 | case XFS_IFLUSH_DELWRI: | 3090 | case XFS_IFLUSH_DELWRI: |
3104 | flags = INT_DELWRI; | 3091 | flags = INT_DELWRI; |
3105 | break; | 3092 | break; |
3106 | case XFS_IFLUSH_ASYNC_NOBLOCK: | 3093 | case XFS_IFLUSH_ASYNC_NOBLOCK: |
3107 | case XFS_IFLUSH_ASYNC: | 3094 | case XFS_IFLUSH_ASYNC: |
3108 | flags = INT_ASYNC; | 3095 | flags = INT_ASYNC; |
3109 | break; | 3096 | break; |
3110 | case XFS_IFLUSH_SYNC: | 3097 | case XFS_IFLUSH_SYNC: |
3111 | flags = 0; | 3098 | flags = 0; |
3112 | break; | 3099 | break; |
3113 | default: | 3100 | default: |
3114 | ASSERT(0); | 3101 | ASSERT(0); |
3115 | flags = 0; | 3102 | flags = 0; |
3116 | break; | 3103 | break; |
3117 | } | 3104 | } |
3118 | } | 3105 | } |
3119 | 3106 | ||
3120 | /* | 3107 | /* |
3121 | * Get the buffer containing the on-disk inode. | 3108 | * Get the buffer containing the on-disk inode. |
3122 | */ | 3109 | */ |
3123 | error = xfs_itobp(mp, NULL, ip, &dip, &bp, | 3110 | error = xfs_itobp(mp, NULL, ip, &dip, &bp, |
3124 | noblock ? XFS_BUF_TRYLOCK : XFS_BUF_LOCK); | 3111 | noblock ? XFS_BUF_TRYLOCK : XFS_BUF_LOCK); |
3125 | if (error || !bp) { | 3112 | if (error || !bp) { |
3126 | xfs_ifunlock(ip); | 3113 | xfs_ifunlock(ip); |
3127 | return error; | 3114 | return error; |
3128 | } | 3115 | } |
3129 | 3116 | ||
3130 | /* | 3117 | /* |
3131 | * First flush out the inode that xfs_iflush was called with. | 3118 | * First flush out the inode that xfs_iflush was called with. |
3132 | */ | 3119 | */ |
3133 | error = xfs_iflush_int(ip, bp); | 3120 | error = xfs_iflush_int(ip, bp); |
3134 | if (error) | 3121 | if (error) |
3135 | goto corrupt_out; | 3122 | goto corrupt_out; |
3136 | 3123 | ||
3137 | /* | 3124 | /* |
3138 | * If the buffer is pinned then push on the log now so we won't | 3125 | * If the buffer is pinned then push on the log now so we won't |
3139 | * get stuck waiting in the write for too long. | 3126 | * get stuck waiting in the write for too long. |
3140 | */ | 3127 | */ |
3141 | if (XFS_BUF_ISPINNED(bp)) | 3128 | if (XFS_BUF_ISPINNED(bp)) |
3142 | xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE); | 3129 | xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE); |
3143 | 3130 | ||
3144 | /* | 3131 | /* |
3145 | * inode clustering: | 3132 | * inode clustering: |
3146 | * see if other inodes can be gathered into this write | 3133 | * see if other inodes can be gathered into this write |
3147 | */ | 3134 | */ |
3148 | error = xfs_iflush_cluster(ip, bp); | 3135 | error = xfs_iflush_cluster(ip, bp); |
3149 | if (error) | 3136 | if (error) |
3150 | goto cluster_corrupt_out; | 3137 | goto cluster_corrupt_out; |
3151 | 3138 | ||
3152 | if (flags & INT_DELWRI) { | 3139 | if (flags & INT_DELWRI) { |
3153 | xfs_bdwrite(mp, bp); | 3140 | xfs_bdwrite(mp, bp); |
3154 | } else if (flags & INT_ASYNC) { | 3141 | } else if (flags & INT_ASYNC) { |
3155 | error = xfs_bawrite(mp, bp); | 3142 | error = xfs_bawrite(mp, bp); |
3156 | } else { | 3143 | } else { |
3157 | error = xfs_bwrite(mp, bp); | 3144 | error = xfs_bwrite(mp, bp); |
3158 | } | 3145 | } |
3159 | return error; | 3146 | return error; |
3160 | 3147 | ||
3161 | corrupt_out: | 3148 | corrupt_out: |
3162 | xfs_buf_relse(bp); | 3149 | xfs_buf_relse(bp); |
3163 | xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); | 3150 | xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); |
3164 | cluster_corrupt_out: | 3151 | cluster_corrupt_out: |
3165 | /* | 3152 | /* |
3166 | * Unlocks the flush lock | 3153 | * Unlocks the flush lock |
3167 | */ | 3154 | */ |
3168 | xfs_iflush_abort(ip); | 3155 | xfs_iflush_abort(ip); |
3169 | return XFS_ERROR(EFSCORRUPTED); | 3156 | return XFS_ERROR(EFSCORRUPTED); |
3170 | } | 3157 | } |
3171 | 3158 | ||
3172 | 3159 | ||
3173 | STATIC int | 3160 | STATIC int |
3174 | xfs_iflush_int( | 3161 | xfs_iflush_int( |
3175 | xfs_inode_t *ip, | 3162 | xfs_inode_t *ip, |
3176 | xfs_buf_t *bp) | 3163 | xfs_buf_t *bp) |
3177 | { | 3164 | { |
3178 | xfs_inode_log_item_t *iip; | 3165 | xfs_inode_log_item_t *iip; |
3179 | xfs_dinode_t *dip; | 3166 | xfs_dinode_t *dip; |
3180 | xfs_mount_t *mp; | 3167 | xfs_mount_t *mp; |
3181 | #ifdef XFS_TRANS_DEBUG | 3168 | #ifdef XFS_TRANS_DEBUG |
3182 | int first; | 3169 | int first; |
3183 | #endif | 3170 | #endif |
3184 | 3171 | ||
3185 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)); | 3172 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)); |
3186 | ASSERT(!completion_done(&ip->i_flush)); | 3173 | ASSERT(!completion_done(&ip->i_flush)); |
3187 | ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE || | 3174 | ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE || |
3188 | ip->i_d.di_nextents > ip->i_df.if_ext_max); | 3175 | ip->i_d.di_nextents > ip->i_df.if_ext_max); |
3189 | 3176 | ||
3190 | iip = ip->i_itemp; | 3177 | iip = ip->i_itemp; |
3191 | mp = ip->i_mount; | 3178 | mp = ip->i_mount; |
3192 | 3179 | ||
3193 | 3180 | ||
3194 | /* | 3181 | /* |
3195 | * If the inode isn't dirty, then just release the inode | 3182 | * If the inode isn't dirty, then just release the inode |
3196 | * flush lock and do nothing. | 3183 | * flush lock and do nothing. |
3197 | */ | 3184 | */ |
3198 | if (xfs_inode_clean(ip)) { | 3185 | if (xfs_inode_clean(ip)) { |
3199 | xfs_ifunlock(ip); | 3186 | xfs_ifunlock(ip); |
3200 | return 0; | 3187 | return 0; |
3201 | } | 3188 | } |
3202 | 3189 | ||
3203 | /* set *dip = inode's place in the buffer */ | 3190 | /* set *dip = inode's place in the buffer */ |
3204 | dip = (xfs_dinode_t *)xfs_buf_offset(bp, ip->i_boffset); | 3191 | dip = (xfs_dinode_t *)xfs_buf_offset(bp, ip->i_imap.im_boffset); |
3205 | 3192 | ||
3206 | /* | 3193 | /* |
3207 | * Clear i_update_core before copying out the data. | 3194 | * Clear i_update_core before copying out the data. |
3208 | * This is for coordination with our timestamp updates | 3195 | * This is for coordination with our timestamp updates |
3209 | * that don't hold the inode lock. They will always | 3196 | * that don't hold the inode lock. They will always |
3210 | * update the timestamps BEFORE setting i_update_core, | 3197 | * update the timestamps BEFORE setting i_update_core, |
3211 | * so if we clear i_update_core after they set it we | 3198 | * so if we clear i_update_core after they set it we |
3212 | * are guaranteed to see their updates to the timestamps. | 3199 | * are guaranteed to see their updates to the timestamps. |
3213 | * I believe that this depends on strongly ordered memory | 3200 | * I believe that this depends on strongly ordered memory |
3214 | * semantics, but we have that. We use the SYNCHRONIZE | 3201 | * semantics, but we have that. We use the SYNCHRONIZE |
3215 | * macro to make sure that the compiler does not reorder | 3202 | * macro to make sure that the compiler does not reorder |
3216 | * the i_update_core access below the data copy below. | 3203 | * the i_update_core access below the data copy below. |
3217 | */ | 3204 | */ |
3218 | ip->i_update_core = 0; | 3205 | ip->i_update_core = 0; |
3219 | SYNCHRONIZE(); | 3206 | SYNCHRONIZE(); |
3220 | 3207 | ||
3221 | /* | 3208 | /* |
3222 | * Make sure to get the latest atime from the Linux inode. | 3209 | * Make sure to get the latest atime from the Linux inode. |
3223 | */ | 3210 | */ |
3224 | xfs_synchronize_atime(ip); | 3211 | xfs_synchronize_atime(ip); |
3225 | 3212 | ||
3226 | if (XFS_TEST_ERROR(be16_to_cpu(dip->di_magic) != XFS_DINODE_MAGIC, | 3213 | if (XFS_TEST_ERROR(be16_to_cpu(dip->di_magic) != XFS_DINODE_MAGIC, |
3227 | mp, XFS_ERRTAG_IFLUSH_1, XFS_RANDOM_IFLUSH_1)) { | 3214 | mp, XFS_ERRTAG_IFLUSH_1, XFS_RANDOM_IFLUSH_1)) { |
3228 | xfs_cmn_err(XFS_PTAG_IFLUSH, CE_ALERT, mp, | 3215 | xfs_cmn_err(XFS_PTAG_IFLUSH, CE_ALERT, mp, |
3229 | "xfs_iflush: Bad inode %Lu magic number 0x%x, ptr 0x%p", | 3216 | "xfs_iflush: Bad inode %Lu magic number 0x%x, ptr 0x%p", |
3230 | ip->i_ino, be16_to_cpu(dip->di_magic), dip); | 3217 | ip->i_ino, be16_to_cpu(dip->di_magic), dip); |
3231 | goto corrupt_out; | 3218 | goto corrupt_out; |
3232 | } | 3219 | } |
3233 | if (XFS_TEST_ERROR(ip->i_d.di_magic != XFS_DINODE_MAGIC, | 3220 | if (XFS_TEST_ERROR(ip->i_d.di_magic != XFS_DINODE_MAGIC, |
3234 | mp, XFS_ERRTAG_IFLUSH_2, XFS_RANDOM_IFLUSH_2)) { | 3221 | mp, XFS_ERRTAG_IFLUSH_2, XFS_RANDOM_IFLUSH_2)) { |
3235 | xfs_cmn_err(XFS_PTAG_IFLUSH, CE_ALERT, mp, | 3222 | xfs_cmn_err(XFS_PTAG_IFLUSH, CE_ALERT, mp, |
3236 | "xfs_iflush: Bad inode %Lu, ptr 0x%p, magic number 0x%x", | 3223 | "xfs_iflush: Bad inode %Lu, ptr 0x%p, magic number 0x%x", |
3237 | ip->i_ino, ip, ip->i_d.di_magic); | 3224 | ip->i_ino, ip, ip->i_d.di_magic); |
3238 | goto corrupt_out; | 3225 | goto corrupt_out; |
3239 | } | 3226 | } |
3240 | if ((ip->i_d.di_mode & S_IFMT) == S_IFREG) { | 3227 | if ((ip->i_d.di_mode & S_IFMT) == S_IFREG) { |
3241 | if (XFS_TEST_ERROR( | 3228 | if (XFS_TEST_ERROR( |
3242 | (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS) && | 3229 | (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS) && |
3243 | (ip->i_d.di_format != XFS_DINODE_FMT_BTREE), | 3230 | (ip->i_d.di_format != XFS_DINODE_FMT_BTREE), |
3244 | mp, XFS_ERRTAG_IFLUSH_3, XFS_RANDOM_IFLUSH_3)) { | 3231 | mp, XFS_ERRTAG_IFLUSH_3, XFS_RANDOM_IFLUSH_3)) { |
3245 | xfs_cmn_err(XFS_PTAG_IFLUSH, CE_ALERT, mp, | 3232 | xfs_cmn_err(XFS_PTAG_IFLUSH, CE_ALERT, mp, |
3246 | "xfs_iflush: Bad regular inode %Lu, ptr 0x%p", | 3233 | "xfs_iflush: Bad regular inode %Lu, ptr 0x%p", |
3247 | ip->i_ino, ip); | 3234 | ip->i_ino, ip); |
3248 | goto corrupt_out; | 3235 | goto corrupt_out; |
3249 | } | 3236 | } |
3250 | } else if ((ip->i_d.di_mode & S_IFMT) == S_IFDIR) { | 3237 | } else if ((ip->i_d.di_mode & S_IFMT) == S_IFDIR) { |
3251 | if (XFS_TEST_ERROR( | 3238 | if (XFS_TEST_ERROR( |
3252 | (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS) && | 3239 | (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS) && |
3253 | (ip->i_d.di_format != XFS_DINODE_FMT_BTREE) && | 3240 | (ip->i_d.di_format != XFS_DINODE_FMT_BTREE) && |
3254 | (ip->i_d.di_format != XFS_DINODE_FMT_LOCAL), | 3241 | (ip->i_d.di_format != XFS_DINODE_FMT_LOCAL), |
3255 | mp, XFS_ERRTAG_IFLUSH_4, XFS_RANDOM_IFLUSH_4)) { | 3242 | mp, XFS_ERRTAG_IFLUSH_4, XFS_RANDOM_IFLUSH_4)) { |
3256 | xfs_cmn_err(XFS_PTAG_IFLUSH, CE_ALERT, mp, | 3243 | xfs_cmn_err(XFS_PTAG_IFLUSH, CE_ALERT, mp, |
3257 | "xfs_iflush: Bad directory inode %Lu, ptr 0x%p", | 3244 | "xfs_iflush: Bad directory inode %Lu, ptr 0x%p", |
3258 | ip->i_ino, ip); | 3245 | ip->i_ino, ip); |
3259 | goto corrupt_out; | 3246 | goto corrupt_out; |
3260 | } | 3247 | } |
3261 | } | 3248 | } |
3262 | if (XFS_TEST_ERROR(ip->i_d.di_nextents + ip->i_d.di_anextents > | 3249 | if (XFS_TEST_ERROR(ip->i_d.di_nextents + ip->i_d.di_anextents > |
3263 | ip->i_d.di_nblocks, mp, XFS_ERRTAG_IFLUSH_5, | 3250 | ip->i_d.di_nblocks, mp, XFS_ERRTAG_IFLUSH_5, |
3264 | XFS_RANDOM_IFLUSH_5)) { | 3251 | XFS_RANDOM_IFLUSH_5)) { |
3265 | xfs_cmn_err(XFS_PTAG_IFLUSH, CE_ALERT, mp, | 3252 | xfs_cmn_err(XFS_PTAG_IFLUSH, CE_ALERT, mp, |
3266 | "xfs_iflush: detected corrupt incore inode %Lu, total extents = %d, nblocks = %Ld, ptr 0x%p", | 3253 | "xfs_iflush: detected corrupt incore inode %Lu, total extents = %d, nblocks = %Ld, ptr 0x%p", |
3267 | ip->i_ino, | 3254 | ip->i_ino, |
3268 | ip->i_d.di_nextents + ip->i_d.di_anextents, | 3255 | ip->i_d.di_nextents + ip->i_d.di_anextents, |
3269 | ip->i_d.di_nblocks, | 3256 | ip->i_d.di_nblocks, |
3270 | ip); | 3257 | ip); |
3271 | goto corrupt_out; | 3258 | goto corrupt_out; |
3272 | } | 3259 | } |
3273 | if (XFS_TEST_ERROR(ip->i_d.di_forkoff > mp->m_sb.sb_inodesize, | 3260 | if (XFS_TEST_ERROR(ip->i_d.di_forkoff > mp->m_sb.sb_inodesize, |
3274 | mp, XFS_ERRTAG_IFLUSH_6, XFS_RANDOM_IFLUSH_6)) { | 3261 | mp, XFS_ERRTAG_IFLUSH_6, XFS_RANDOM_IFLUSH_6)) { |
3275 | xfs_cmn_err(XFS_PTAG_IFLUSH, CE_ALERT, mp, | 3262 | xfs_cmn_err(XFS_PTAG_IFLUSH, CE_ALERT, mp, |
3276 | "xfs_iflush: bad inode %Lu, forkoff 0x%x, ptr 0x%p", | 3263 | "xfs_iflush: bad inode %Lu, forkoff 0x%x, ptr 0x%p", |
3277 | ip->i_ino, ip->i_d.di_forkoff, ip); | 3264 | ip->i_ino, ip->i_d.di_forkoff, ip); |
3278 | goto corrupt_out; | 3265 | goto corrupt_out; |
3279 | } | 3266 | } |
3280 | /* | 3267 | /* |
3281 | * bump the flush iteration count, used to detect flushes which | 3268 | * bump the flush iteration count, used to detect flushes which |
3282 | * postdate a log record during recovery. | 3269 | * postdate a log record during recovery. |
3283 | */ | 3270 | */ |
3284 | 3271 | ||
3285 | ip->i_d.di_flushiter++; | 3272 | ip->i_d.di_flushiter++; |
3286 | 3273 | ||
3287 | /* | 3274 | /* |
3288 | * Copy the dirty parts of the inode into the on-disk | 3275 | * Copy the dirty parts of the inode into the on-disk |
3289 | * inode. We always copy out the core of the inode, | 3276 | * inode. We always copy out the core of the inode, |
3290 | * because if the inode is dirty at all the core must | 3277 | * because if the inode is dirty at all the core must |
3291 | * be. | 3278 | * be. |
3292 | */ | 3279 | */ |
3293 | xfs_dinode_to_disk(dip, &ip->i_d); | 3280 | xfs_dinode_to_disk(dip, &ip->i_d); |
3294 | 3281 | ||
3295 | /* Wrap, we never let the log put out DI_MAX_FLUSH */ | 3282 | /* Wrap, we never let the log put out DI_MAX_FLUSH */ |
3296 | if (ip->i_d.di_flushiter == DI_MAX_FLUSH) | 3283 | if (ip->i_d.di_flushiter == DI_MAX_FLUSH) |
3297 | ip->i_d.di_flushiter = 0; | 3284 | ip->i_d.di_flushiter = 0; |
3298 | 3285 | ||
3299 | /* | 3286 | /* |
3300 | * If this is really an old format inode and the superblock version | 3287 | * If this is really an old format inode and the superblock version |
3301 | * has not been updated to support only new format inodes, then | 3288 | * has not been updated to support only new format inodes, then |
3302 | * convert back to the old inode format. If the superblock version | 3289 | * convert back to the old inode format. If the superblock version |
3303 | * has been updated, then make the conversion permanent. | 3290 | * has been updated, then make the conversion permanent. |
3304 | */ | 3291 | */ |
3305 | ASSERT(ip->i_d.di_version == 1 || xfs_sb_version_hasnlink(&mp->m_sb)); | 3292 | ASSERT(ip->i_d.di_version == 1 || xfs_sb_version_hasnlink(&mp->m_sb)); |
3306 | if (ip->i_d.di_version == 1) { | 3293 | if (ip->i_d.di_version == 1) { |
3307 | if (!xfs_sb_version_hasnlink(&mp->m_sb)) { | 3294 | if (!xfs_sb_version_hasnlink(&mp->m_sb)) { |
3308 | /* | 3295 | /* |
3309 | * Convert it back. | 3296 | * Convert it back. |
3310 | */ | 3297 | */ |
3311 | ASSERT(ip->i_d.di_nlink <= XFS_MAXLINK_1); | 3298 | ASSERT(ip->i_d.di_nlink <= XFS_MAXLINK_1); |
3312 | dip->di_onlink = cpu_to_be16(ip->i_d.di_nlink); | 3299 | dip->di_onlink = cpu_to_be16(ip->i_d.di_nlink); |
3313 | } else { | 3300 | } else { |
3314 | /* | 3301 | /* |
3315 | * The superblock version has already been bumped, | 3302 | * The superblock version has already been bumped, |
3316 | * so just make the conversion to the new inode | 3303 | * so just make the conversion to the new inode |
3317 | * format permanent. | 3304 | * format permanent. |
3318 | */ | 3305 | */ |
3319 | ip->i_d.di_version = 2; | 3306 | ip->i_d.di_version = 2; |
3320 | dip->di_version = 2; | 3307 | dip->di_version = 2; |
3321 | ip->i_d.di_onlink = 0; | 3308 | ip->i_d.di_onlink = 0; |
3322 | dip->di_onlink = 0; | 3309 | dip->di_onlink = 0; |
3323 | memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad)); | 3310 | memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad)); |
3324 | memset(&(dip->di_pad[0]), 0, | 3311 | memset(&(dip->di_pad[0]), 0, |
3325 | sizeof(dip->di_pad)); | 3312 | sizeof(dip->di_pad)); |
3326 | ASSERT(ip->i_d.di_projid == 0); | 3313 | ASSERT(ip->i_d.di_projid == 0); |
3327 | } | 3314 | } |
3328 | } | 3315 | } |
3329 | 3316 | ||
3330 | xfs_iflush_fork(ip, dip, iip, XFS_DATA_FORK, bp); | 3317 | xfs_iflush_fork(ip, dip, iip, XFS_DATA_FORK, bp); |
3331 | if (XFS_IFORK_Q(ip)) | 3318 | if (XFS_IFORK_Q(ip)) |
3332 | xfs_iflush_fork(ip, dip, iip, XFS_ATTR_FORK, bp); | 3319 | xfs_iflush_fork(ip, dip, iip, XFS_ATTR_FORK, bp); |
3333 | xfs_inobp_check(mp, bp); | 3320 | xfs_inobp_check(mp, bp); |
3334 | 3321 | ||
3335 | /* | 3322 | /* |
3336 | * We've recorded everything logged in the inode, so we'd | 3323 | * We've recorded everything logged in the inode, so we'd |
3337 | * like to clear the ilf_fields bits so we don't log and | 3324 | * like to clear the ilf_fields bits so we don't log and |
3338 | * flush things unnecessarily. However, we can't stop | 3325 | * flush things unnecessarily. However, we can't stop |
3339 | * logging all this information until the data we've copied | 3326 | * logging all this information until the data we've copied |
3340 | * into the disk buffer is written to disk. If we did we might | 3327 | * into the disk buffer is written to disk. If we did we might |
3341 | * overwrite the copy of the inode in the log with all the | 3328 | * overwrite the copy of the inode in the log with all the |
3342 | * data after re-logging only part of it, and in the face of | 3329 | * data after re-logging only part of it, and in the face of |
3343 | * a crash we wouldn't have all the data we need to recover. | 3330 | * a crash we wouldn't have all the data we need to recover. |
3344 | * | 3331 | * |
3345 | * What we do is move the bits to the ili_last_fields field. | 3332 | * What we do is move the bits to the ili_last_fields field. |
3346 | * When logging the inode, these bits are moved back to the | 3333 | * When logging the inode, these bits are moved back to the |
3347 | * ilf_fields field. In the xfs_iflush_done() routine we | 3334 | * ilf_fields field. In the xfs_iflush_done() routine we |
3348 | * clear ili_last_fields, since we know that the information | 3335 | * clear ili_last_fields, since we know that the information |
3349 | * those bits represent is permanently on disk. As long as | 3336 | * those bits represent is permanently on disk. As long as |
3350 | * the flush completes before the inode is logged again, then | 3337 | * the flush completes before the inode is logged again, then |
3351 | * both ilf_fields and ili_last_fields will be cleared. | 3338 | * both ilf_fields and ili_last_fields will be cleared. |
3352 | * | 3339 | * |
3353 | * We can play with the ilf_fields bits here, because the inode | 3340 | * We can play with the ilf_fields bits here, because the inode |
3354 | * lock must be held exclusively in order to set bits there | 3341 | * lock must be held exclusively in order to set bits there |
3355 | * and the flush lock protects the ili_last_fields bits. | 3342 | * and the flush lock protects the ili_last_fields bits. |
3356 | * Set ili_logged so the flush done | 3343 | * Set ili_logged so the flush done |
3357 | * routine can tell whether or not to look in the AIL. | 3344 | * routine can tell whether or not to look in the AIL. |
3358 | * Also, store the current LSN of the inode so that we can tell | 3345 | * Also, store the current LSN of the inode so that we can tell |
3359 | * whether the item has moved in the AIL from xfs_iflush_done(). | 3346 | * whether the item has moved in the AIL from xfs_iflush_done(). |
3360 | * In order to read the lsn we need the AIL lock, because | 3347 | * In order to read the lsn we need the AIL lock, because |
3361 | * it is a 64 bit value that cannot be read atomically. | 3348 | * it is a 64 bit value that cannot be read atomically. |
3362 | */ | 3349 | */ |
3363 | if (iip != NULL && iip->ili_format.ilf_fields != 0) { | 3350 | if (iip != NULL && iip->ili_format.ilf_fields != 0) { |
3364 | iip->ili_last_fields = iip->ili_format.ilf_fields; | 3351 | iip->ili_last_fields = iip->ili_format.ilf_fields; |
3365 | iip->ili_format.ilf_fields = 0; | 3352 | iip->ili_format.ilf_fields = 0; |
3366 | iip->ili_logged = 1; | 3353 | iip->ili_logged = 1; |
3367 | 3354 | ||
3368 | xfs_trans_ail_copy_lsn(mp->m_ail, &iip->ili_flush_lsn, | 3355 | xfs_trans_ail_copy_lsn(mp->m_ail, &iip->ili_flush_lsn, |
3369 | &iip->ili_item.li_lsn); | 3356 | &iip->ili_item.li_lsn); |
3370 | 3357 | ||
3371 | /* | 3358 | /* |
3372 | * Attach the function xfs_iflush_done to the inode's | 3359 | * Attach the function xfs_iflush_done to the inode's |
3373 | * buffer. This will remove the inode from the AIL | 3360 | * buffer. This will remove the inode from the AIL |
3374 | * and unlock the inode's flush lock when the inode is | 3361 | * and unlock the inode's flush lock when the inode is |
3375 | * completely written to disk. | 3362 | * completely written to disk. |
3376 | */ | 3363 | */ |
3377 | xfs_buf_attach_iodone(bp, (void(*)(xfs_buf_t*,xfs_log_item_t*)) | 3364 | xfs_buf_attach_iodone(bp, (void(*)(xfs_buf_t*,xfs_log_item_t*)) |
3378 | xfs_iflush_done, (xfs_log_item_t *)iip); | 3365 | xfs_iflush_done, (xfs_log_item_t *)iip); |
3379 | 3366 | ||
3380 | ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL); | 3367 | ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL); |
3381 | ASSERT(XFS_BUF_IODONE_FUNC(bp) != NULL); | 3368 | ASSERT(XFS_BUF_IODONE_FUNC(bp) != NULL); |
3382 | } else { | 3369 | } else { |
3383 | /* | 3370 | /* |
3384 | * We're flushing an inode which is not in the AIL and has | 3371 | * We're flushing an inode which is not in the AIL and has |
3385 | * not been logged but has i_update_core set. For this | 3372 | * not been logged but has i_update_core set. For this |
3386 | * case we can use a B_DELWRI flush and immediately drop | 3373 | * case we can use a B_DELWRI flush and immediately drop |
3387 | * the inode flush lock because we can avoid the whole | 3374 | * the inode flush lock because we can avoid the whole |
3388 | * AIL state thing. It's OK to drop the flush lock now, | 3375 | * AIL state thing. It's OK to drop the flush lock now, |
3389 | * because we've already locked the buffer and to do anything | 3376 | * because we've already locked the buffer and to do anything |
3390 | * you really need both. | 3377 | * you really need both. |
3391 | */ | 3378 | */ |
3392 | if (iip != NULL) { | 3379 | if (iip != NULL) { |
3393 | ASSERT(iip->ili_logged == 0); | 3380 | ASSERT(iip->ili_logged == 0); |
3394 | ASSERT(iip->ili_last_fields == 0); | 3381 | ASSERT(iip->ili_last_fields == 0); |
3395 | ASSERT((iip->ili_item.li_flags & XFS_LI_IN_AIL) == 0); | 3382 | ASSERT((iip->ili_item.li_flags & XFS_LI_IN_AIL) == 0); |
3396 | } | 3383 | } |
3397 | xfs_ifunlock(ip); | 3384 | xfs_ifunlock(ip); |
3398 | } | 3385 | } |
3399 | 3386 | ||
3400 | return 0; | 3387 | return 0; |
3401 | 3388 | ||
3402 | corrupt_out: | 3389 | corrupt_out: |
3403 | return XFS_ERROR(EFSCORRUPTED); | 3390 | return XFS_ERROR(EFSCORRUPTED); |
3404 | } | 3391 | } |
3405 | 3392 | ||
3406 | 3393 | ||
3407 | 3394 | ||
3408 | #ifdef XFS_ILOCK_TRACE | 3395 | #ifdef XFS_ILOCK_TRACE |
3409 | ktrace_t *xfs_ilock_trace_buf; | 3396 | ktrace_t *xfs_ilock_trace_buf; |
3410 | 3397 | ||
3411 | void | 3398 | void |
3412 | xfs_ilock_trace(xfs_inode_t *ip, int lock, unsigned int lockflags, inst_t *ra) | 3399 | xfs_ilock_trace(xfs_inode_t *ip, int lock, unsigned int lockflags, inst_t *ra) |
3413 | { | 3400 | { |
3414 | ktrace_enter(ip->i_lock_trace, | 3401 | ktrace_enter(ip->i_lock_trace, |
3415 | (void *)ip, | 3402 | (void *)ip, |
3416 | (void *)(unsigned long)lock, /* 1 = LOCK, 3=UNLOCK, etc */ | 3403 | (void *)(unsigned long)lock, /* 1 = LOCK, 3=UNLOCK, etc */ |
3417 | (void *)(unsigned long)lockflags, /* XFS_ILOCK_EXCL etc */ | 3404 | (void *)(unsigned long)lockflags, /* XFS_ILOCK_EXCL etc */ |
3418 | (void *)ra, /* caller of ilock */ | 3405 | (void *)ra, /* caller of ilock */ |
3419 | (void *)(unsigned long)current_cpu(), | 3406 | (void *)(unsigned long)current_cpu(), |
3420 | (void *)(unsigned long)current_pid(), | 3407 | (void *)(unsigned long)current_pid(), |
3421 | NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL); | 3408 | NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL); |
3422 | } | 3409 | } |
3423 | #endif | 3410 | #endif |
3424 | 3411 | ||
3425 | /* | 3412 | /* |
3426 | * Return a pointer to the extent record at file index idx. | 3413 | * Return a pointer to the extent record at file index idx. |
3427 | */ | 3414 | */ |
3428 | xfs_bmbt_rec_host_t * | 3415 | xfs_bmbt_rec_host_t * |
3429 | xfs_iext_get_ext( | 3416 | xfs_iext_get_ext( |
3430 | xfs_ifork_t *ifp, /* inode fork pointer */ | 3417 | xfs_ifork_t *ifp, /* inode fork pointer */ |
3431 | xfs_extnum_t idx) /* index of target extent */ | 3418 | xfs_extnum_t idx) /* index of target extent */ |
3432 | { | 3419 | { |
3433 | ASSERT(idx >= 0); | 3420 | ASSERT(idx >= 0); |
3434 | if ((ifp->if_flags & XFS_IFEXTIREC) && (idx == 0)) { | 3421 | if ((ifp->if_flags & XFS_IFEXTIREC) && (idx == 0)) { |
3435 | return ifp->if_u1.if_ext_irec->er_extbuf; | 3422 | return ifp->if_u1.if_ext_irec->er_extbuf; |
3436 | } else if (ifp->if_flags & XFS_IFEXTIREC) { | 3423 | } else if (ifp->if_flags & XFS_IFEXTIREC) { |
3437 | xfs_ext_irec_t *erp; /* irec pointer */ | 3424 | xfs_ext_irec_t *erp; /* irec pointer */ |
3438 | int erp_idx = 0; /* irec index */ | 3425 | int erp_idx = 0; /* irec index */ |
3439 | xfs_extnum_t page_idx = idx; /* ext index in target list */ | 3426 | xfs_extnum_t page_idx = idx; /* ext index in target list */ |
3440 | 3427 | ||
3441 | erp = xfs_iext_idx_to_irec(ifp, &page_idx, &erp_idx, 0); | 3428 | erp = xfs_iext_idx_to_irec(ifp, &page_idx, &erp_idx, 0); |
3442 | return &erp->er_extbuf[page_idx]; | 3429 | return &erp->er_extbuf[page_idx]; |
3443 | } else if (ifp->if_bytes) { | 3430 | } else if (ifp->if_bytes) { |
3444 | return &ifp->if_u1.if_extents[idx]; | 3431 | return &ifp->if_u1.if_extents[idx]; |
3445 | } else { | 3432 | } else { |
3446 | return NULL; | 3433 | return NULL; |
3447 | } | 3434 | } |
3448 | } | 3435 | } |
3449 | 3436 | ||
3450 | /* | 3437 | /* |
3451 | * Insert new item(s) into the extent records for incore inode | 3438 | * Insert new item(s) into the extent records for incore inode |
3452 | * fork 'ifp'. 'count' new items are inserted at index 'idx'. | 3439 | * fork 'ifp'. 'count' new items are inserted at index 'idx'. |
3453 | */ | 3440 | */ |
3454 | void | 3441 | void |
3455 | xfs_iext_insert( | 3442 | xfs_iext_insert( |
3456 | xfs_ifork_t *ifp, /* inode fork pointer */ | 3443 | xfs_ifork_t *ifp, /* inode fork pointer */ |
3457 | xfs_extnum_t idx, /* starting index of new items */ | 3444 | xfs_extnum_t idx, /* starting index of new items */ |
3458 | xfs_extnum_t count, /* number of inserted items */ | 3445 | xfs_extnum_t count, /* number of inserted items */ |
3459 | xfs_bmbt_irec_t *new) /* items to insert */ | 3446 | xfs_bmbt_irec_t *new) /* items to insert */ |
3460 | { | 3447 | { |
3461 | xfs_extnum_t i; /* extent record index */ | 3448 | xfs_extnum_t i; /* extent record index */ |
3462 | 3449 | ||
3463 | ASSERT(ifp->if_flags & XFS_IFEXTENTS); | 3450 | ASSERT(ifp->if_flags & XFS_IFEXTENTS); |
3464 | xfs_iext_add(ifp, idx, count); | 3451 | xfs_iext_add(ifp, idx, count); |
3465 | for (i = idx; i < idx + count; i++, new++) | 3452 | for (i = idx; i < idx + count; i++, new++) |
3466 | xfs_bmbt_set_all(xfs_iext_get_ext(ifp, i), new); | 3453 | xfs_bmbt_set_all(xfs_iext_get_ext(ifp, i), new); |
3467 | } | 3454 | } |
3468 | 3455 | ||
3469 | /* | 3456 | /* |
3470 | * This is called when the amount of space required for incore file | 3457 | * This is called when the amount of space required for incore file |
3471 | * extents needs to be increased. The ext_diff parameter stores the | 3458 | * extents needs to be increased. The ext_diff parameter stores the |
3472 | * number of new extents being added and the idx parameter contains | 3459 | * number of new extents being added and the idx parameter contains |
3473 | * the extent index where the new extents will be added. If the new | 3460 | * the extent index where the new extents will be added. If the new |
3474 | * extents are being appended, then we just need to (re)allocate and | 3461 | * extents are being appended, then we just need to (re)allocate and |
3475 | * initialize the space. Otherwise, if the new extents are being | 3462 | * initialize the space. Otherwise, if the new extents are being |
3476 | * inserted into the middle of the existing entries, a bit more work | 3463 | * inserted into the middle of the existing entries, a bit more work |
3477 | * is required to make room for the new extents to be inserted. The | 3464 | * is required to make room for the new extents to be inserted. The |
3478 | * caller is responsible for filling in the new extent entries upon | 3465 | * caller is responsible for filling in the new extent entries upon |
3479 | * return. | 3466 | * return. |
3480 | */ | 3467 | */ |
3481 | void | 3468 | void |
3482 | xfs_iext_add( | 3469 | xfs_iext_add( |
3483 | xfs_ifork_t *ifp, /* inode fork pointer */ | 3470 | xfs_ifork_t *ifp, /* inode fork pointer */ |
3484 | xfs_extnum_t idx, /* index to begin adding exts */ | 3471 | xfs_extnum_t idx, /* index to begin adding exts */ |
3485 | int ext_diff) /* number of extents to add */ | 3472 | int ext_diff) /* number of extents to add */ |
3486 | { | 3473 | { |
3487 | int byte_diff; /* new bytes being added */ | 3474 | int byte_diff; /* new bytes being added */ |
3488 | int new_size; /* size of extents after adding */ | 3475 | int new_size; /* size of extents after adding */ |
3489 | xfs_extnum_t nextents; /* number of extents in file */ | 3476 | xfs_extnum_t nextents; /* number of extents in file */ |
3490 | 3477 | ||
3491 | nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); | 3478 | nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); |
3492 | ASSERT((idx >= 0) && (idx <= nextents)); | 3479 | ASSERT((idx >= 0) && (idx <= nextents)); |
3493 | byte_diff = ext_diff * sizeof(xfs_bmbt_rec_t); | 3480 | byte_diff = ext_diff * sizeof(xfs_bmbt_rec_t); |
3494 | new_size = ifp->if_bytes + byte_diff; | 3481 | new_size = ifp->if_bytes + byte_diff; |
3495 | /* | 3482 | /* |
3496 | * If the new number of extents (nextents + ext_diff) | 3483 | * If the new number of extents (nextents + ext_diff) |
3497 | * fits inside the inode, then continue to use the inline | 3484 | * fits inside the inode, then continue to use the inline |
3498 | * extent buffer. | 3485 | * extent buffer. |
3499 | */ | 3486 | */ |
3500 | if (nextents + ext_diff <= XFS_INLINE_EXTS) { | 3487 | if (nextents + ext_diff <= XFS_INLINE_EXTS) { |
3501 | if (idx < nextents) { | 3488 | if (idx < nextents) { |
3502 | memmove(&ifp->if_u2.if_inline_ext[idx + ext_diff], | 3489 | memmove(&ifp->if_u2.if_inline_ext[idx + ext_diff], |
3503 | &ifp->if_u2.if_inline_ext[idx], | 3490 | &ifp->if_u2.if_inline_ext[idx], |
3504 | (nextents - idx) * sizeof(xfs_bmbt_rec_t)); | 3491 | (nextents - idx) * sizeof(xfs_bmbt_rec_t)); |
3505 | memset(&ifp->if_u2.if_inline_ext[idx], 0, byte_diff); | 3492 | memset(&ifp->if_u2.if_inline_ext[idx], 0, byte_diff); |
3506 | } | 3493 | } |
3507 | ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext; | 3494 | ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext; |
3508 | ifp->if_real_bytes = 0; | 3495 | ifp->if_real_bytes = 0; |
3509 | ifp->if_lastex = nextents + ext_diff; | 3496 | ifp->if_lastex = nextents + ext_diff; |
3510 | } | 3497 | } |
3511 | /* | 3498 | /* |
3512 | * Otherwise use a linear (direct) extent list. | 3499 | * Otherwise use a linear (direct) extent list. |
3513 | * If the extents are currently inside the inode, | 3500 | * If the extents are currently inside the inode, |
3514 | * xfs_iext_realloc_direct will switch us from | 3501 | * xfs_iext_realloc_direct will switch us from |
3515 | * inline to direct extent allocation mode. | 3502 | * inline to direct extent allocation mode. |
3516 | */ | 3503 | */ |
3517 | else if (nextents + ext_diff <= XFS_LINEAR_EXTS) { | 3504 | else if (nextents + ext_diff <= XFS_LINEAR_EXTS) { |
3518 | xfs_iext_realloc_direct(ifp, new_size); | 3505 | xfs_iext_realloc_direct(ifp, new_size); |
3519 | if (idx < nextents) { | 3506 | if (idx < nextents) { |
3520 | memmove(&ifp->if_u1.if_extents[idx + ext_diff], | 3507 | memmove(&ifp->if_u1.if_extents[idx + ext_diff], |
3521 | &ifp->if_u1.if_extents[idx], | 3508 | &ifp->if_u1.if_extents[idx], |
3522 | (nextents - idx) * sizeof(xfs_bmbt_rec_t)); | 3509 | (nextents - idx) * sizeof(xfs_bmbt_rec_t)); |
3523 | memset(&ifp->if_u1.if_extents[idx], 0, byte_diff); | 3510 | memset(&ifp->if_u1.if_extents[idx], 0, byte_diff); |
3524 | } | 3511 | } |
3525 | } | 3512 | } |
3526 | /* Indirection array */ | 3513 | /* Indirection array */ |
3527 | else { | 3514 | else { |
3528 | xfs_ext_irec_t *erp; | 3515 | xfs_ext_irec_t *erp; |
3529 | int erp_idx = 0; | 3516 | int erp_idx = 0; |
3530 | int page_idx = idx; | 3517 | int page_idx = idx; |
3531 | 3518 | ||
3532 | ASSERT(nextents + ext_diff > XFS_LINEAR_EXTS); | 3519 | ASSERT(nextents + ext_diff > XFS_LINEAR_EXTS); |
3533 | if (ifp->if_flags & XFS_IFEXTIREC) { | 3520 | if (ifp->if_flags & XFS_IFEXTIREC) { |
3534 | erp = xfs_iext_idx_to_irec(ifp, &page_idx, &erp_idx, 1); | 3521 | erp = xfs_iext_idx_to_irec(ifp, &page_idx, &erp_idx, 1); |
3535 | } else { | 3522 | } else { |
3536 | xfs_iext_irec_init(ifp); | 3523 | xfs_iext_irec_init(ifp); |
3537 | ASSERT(ifp->if_flags & XFS_IFEXTIREC); | 3524 | ASSERT(ifp->if_flags & XFS_IFEXTIREC); |
3538 | erp = ifp->if_u1.if_ext_irec; | 3525 | erp = ifp->if_u1.if_ext_irec; |
3539 | } | 3526 | } |
3540 | /* Extents fit in target extent page */ | 3527 | /* Extents fit in target extent page */ |
3541 | if (erp && erp->er_extcount + ext_diff <= XFS_LINEAR_EXTS) { | 3528 | if (erp && erp->er_extcount + ext_diff <= XFS_LINEAR_EXTS) { |
3542 | if (page_idx < erp->er_extcount) { | 3529 | if (page_idx < erp->er_extcount) { |
3543 | memmove(&erp->er_extbuf[page_idx + ext_diff], | 3530 | memmove(&erp->er_extbuf[page_idx + ext_diff], |
3544 | &erp->er_extbuf[page_idx], | 3531 | &erp->er_extbuf[page_idx], |
3545 | (erp->er_extcount - page_idx) * | 3532 | (erp->er_extcount - page_idx) * |
3546 | sizeof(xfs_bmbt_rec_t)); | 3533 | sizeof(xfs_bmbt_rec_t)); |
3547 | memset(&erp->er_extbuf[page_idx], 0, byte_diff); | 3534 | memset(&erp->er_extbuf[page_idx], 0, byte_diff); |
3548 | } | 3535 | } |
3549 | erp->er_extcount += ext_diff; | 3536 | erp->er_extcount += ext_diff; |
3550 | xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, ext_diff); | 3537 | xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, ext_diff); |
3551 | } | 3538 | } |
3552 | /* Insert a new extent page */ | 3539 | /* Insert a new extent page */ |
3553 | else if (erp) { | 3540 | else if (erp) { |
3554 | xfs_iext_add_indirect_multi(ifp, | 3541 | xfs_iext_add_indirect_multi(ifp, |
3555 | erp_idx, page_idx, ext_diff); | 3542 | erp_idx, page_idx, ext_diff); |
3556 | } | 3543 | } |
3557 | /* | 3544 | /* |
3558 | * If extent(s) are being appended to the last page in | 3545 | * If extent(s) are being appended to the last page in |
3559 | * the indirection array and the new extent(s) don't fit | 3546 | * the indirection array and the new extent(s) don't fit |
3560 | * in the page, then erp is NULL and erp_idx is set to | 3547 | * in the page, then erp is NULL and erp_idx is set to |
3561 | * the next index needed in the indirection array. | 3548 | * the next index needed in the indirection array. |
3562 | */ | 3549 | */ |
3563 | else { | 3550 | else { |
3564 | int count = ext_diff; | 3551 | int count = ext_diff; |
3565 | 3552 | ||
3566 | while (count) { | 3553 | while (count) { |
3567 | erp = xfs_iext_irec_new(ifp, erp_idx); | 3554 | erp = xfs_iext_irec_new(ifp, erp_idx); |
3568 | erp->er_extcount = count; | 3555 | erp->er_extcount = count; |
3569 | count -= MIN(count, (int)XFS_LINEAR_EXTS); | 3556 | count -= MIN(count, (int)XFS_LINEAR_EXTS); |
3570 | if (count) { | 3557 | if (count) { |
3571 | erp_idx++; | 3558 | erp_idx++; |
3572 | } | 3559 | } |
3573 | } | 3560 | } |
3574 | } | 3561 | } |
3575 | } | 3562 | } |
3576 | ifp->if_bytes = new_size; | 3563 | ifp->if_bytes = new_size; |
3577 | } | 3564 | } |
3578 | 3565 | ||
3579 | /* | 3566 | /* |
3580 | * This is called when incore extents are being added to the indirection | 3567 | * This is called when incore extents are being added to the indirection |
3581 | * array and the new extents do not fit in the target extent list. The | 3568 | * array and the new extents do not fit in the target extent list. The |
3582 | * erp_idx parameter contains the irec index for the target extent list | 3569 | * erp_idx parameter contains the irec index for the target extent list |
3583 | * in the indirection array, and the idx parameter contains the extent | 3570 | * in the indirection array, and the idx parameter contains the extent |
3584 | * index within the list. The number of extents being added is stored | 3571 | * index within the list. The number of extents being added is stored |
3585 | * in the count parameter. | 3572 | * in the count parameter. |
3586 | * | 3573 | * |
3587 | * |-------| |-------| | 3574 | * |-------| |-------| |
3588 | * | | | | idx - number of extents before idx | 3575 | * | | | | idx - number of extents before idx |
3589 | * | idx | | count | | 3576 | * | idx | | count | |
3590 | * | | | | count - number of extents being inserted at idx | 3577 | * | | | | count - number of extents being inserted at idx |
3591 | * |-------| |-------| | 3578 | * |-------| |-------| |
3592 | * | count | | nex2 | nex2 - number of extents after idx + count | 3579 | * | count | | nex2 | nex2 - number of extents after idx + count |
3593 | * |-------| |-------| | 3580 | * |-------| |-------| |
3594 | */ | 3581 | */ |
3595 | void | 3582 | void |
3596 | xfs_iext_add_indirect_multi( | 3583 | xfs_iext_add_indirect_multi( |
3597 | xfs_ifork_t *ifp, /* inode fork pointer */ | 3584 | xfs_ifork_t *ifp, /* inode fork pointer */ |
3598 | int erp_idx, /* target extent irec index */ | 3585 | int erp_idx, /* target extent irec index */ |
3599 | xfs_extnum_t idx, /* index within target list */ | 3586 | xfs_extnum_t idx, /* index within target list */ |
3600 | int count) /* new extents being added */ | 3587 | int count) /* new extents being added */ |
3601 | { | 3588 | { |
3602 | int byte_diff; /* new bytes being added */ | 3589 | int byte_diff; /* new bytes being added */ |
3603 | xfs_ext_irec_t *erp; /* pointer to irec entry */ | 3590 | xfs_ext_irec_t *erp; /* pointer to irec entry */ |
3604 | xfs_extnum_t ext_diff; /* number of extents to add */ | 3591 | xfs_extnum_t ext_diff; /* number of extents to add */ |
3605 | xfs_extnum_t ext_cnt; /* new extents still needed */ | 3592 | xfs_extnum_t ext_cnt; /* new extents still needed */ |
3606 | xfs_extnum_t nex2; /* extents after idx + count */ | 3593 | xfs_extnum_t nex2; /* extents after idx + count */ |
3607 | xfs_bmbt_rec_t *nex2_ep = NULL; /* temp list for nex2 extents */ | 3594 | xfs_bmbt_rec_t *nex2_ep = NULL; /* temp list for nex2 extents */ |
3608 | int nlists; /* number of irec's (lists) */ | 3595 | int nlists; /* number of irec's (lists) */ |
3609 | 3596 | ||
3610 | ASSERT(ifp->if_flags & XFS_IFEXTIREC); | 3597 | ASSERT(ifp->if_flags & XFS_IFEXTIREC); |
3611 | erp = &ifp->if_u1.if_ext_irec[erp_idx]; | 3598 | erp = &ifp->if_u1.if_ext_irec[erp_idx]; |
3612 | nex2 = erp->er_extcount - idx; | 3599 | nex2 = erp->er_extcount - idx; |
3613 | nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; | 3600 | nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; |
3614 | 3601 | ||
3615 | /* | 3602 | /* |
3616 | * Save second part of target extent list | 3603 | * Save second part of target extent list |
3617 | * (all extents past */ | 3604 | * (all extents past */ |
3618 | if (nex2) { | 3605 | if (nex2) { |
3619 | byte_diff = nex2 * sizeof(xfs_bmbt_rec_t); | 3606 | byte_diff = nex2 * sizeof(xfs_bmbt_rec_t); |
3620 | nex2_ep = (xfs_bmbt_rec_t *) kmem_alloc(byte_diff, KM_NOFS); | 3607 | nex2_ep = (xfs_bmbt_rec_t *) kmem_alloc(byte_diff, KM_NOFS); |
3621 | memmove(nex2_ep, &erp->er_extbuf[idx], byte_diff); | 3608 | memmove(nex2_ep, &erp->er_extbuf[idx], byte_diff); |
3622 | erp->er_extcount -= nex2; | 3609 | erp->er_extcount -= nex2; |
3623 | xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, -nex2); | 3610 | xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, -nex2); |
3624 | memset(&erp->er_extbuf[idx], 0, byte_diff); | 3611 | memset(&erp->er_extbuf[idx], 0, byte_diff); |
3625 | } | 3612 | } |
3626 | 3613 | ||
3627 | /* | 3614 | /* |
3628 | * Add the new extents to the end of the target | 3615 | * Add the new extents to the end of the target |
3629 | * list, then allocate new irec record(s) and | 3616 | * list, then allocate new irec record(s) and |
3630 | * extent buffer(s) as needed to store the rest | 3617 | * extent buffer(s) as needed to store the rest |
3631 | * of the new extents. | 3618 | * of the new extents. |
3632 | */ | 3619 | */ |
3633 | ext_cnt = count; | 3620 | ext_cnt = count; |
3634 | ext_diff = MIN(ext_cnt, (int)XFS_LINEAR_EXTS - erp->er_extcount); | 3621 | ext_diff = MIN(ext_cnt, (int)XFS_LINEAR_EXTS - erp->er_extcount); |
3635 | if (ext_diff) { | 3622 | if (ext_diff) { |
3636 | erp->er_extcount += ext_diff; | 3623 | erp->er_extcount += ext_diff; |
3637 | xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, ext_diff); | 3624 | xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, ext_diff); |
3638 | ext_cnt -= ext_diff; | 3625 | ext_cnt -= ext_diff; |
3639 | } | 3626 | } |
3640 | while (ext_cnt) { | 3627 | while (ext_cnt) { |
3641 | erp_idx++; | 3628 | erp_idx++; |
3642 | erp = xfs_iext_irec_new(ifp, erp_idx); | 3629 | erp = xfs_iext_irec_new(ifp, erp_idx); |
3643 | ext_diff = MIN(ext_cnt, (int)XFS_LINEAR_EXTS); | 3630 | ext_diff = MIN(ext_cnt, (int)XFS_LINEAR_EXTS); |
3644 | erp->er_extcount = ext_diff; | 3631 | erp->er_extcount = ext_diff; |
3645 | xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, ext_diff); | 3632 | xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, ext_diff); |
3646 | ext_cnt -= ext_diff; | 3633 | ext_cnt -= ext_diff; |
3647 | } | 3634 | } |
3648 | 3635 | ||
3649 | /* Add nex2 extents back to indirection array */ | 3636 | /* Add nex2 extents back to indirection array */ |
3650 | if (nex2) { | 3637 | if (nex2) { |
3651 | xfs_extnum_t ext_avail; | 3638 | xfs_extnum_t ext_avail; |
3652 | int i; | 3639 | int i; |
3653 | 3640 | ||
3654 | byte_diff = nex2 * sizeof(xfs_bmbt_rec_t); | 3641 | byte_diff = nex2 * sizeof(xfs_bmbt_rec_t); |
3655 | ext_avail = XFS_LINEAR_EXTS - erp->er_extcount; | 3642 | ext_avail = XFS_LINEAR_EXTS - erp->er_extcount; |
3656 | i = 0; | 3643 | i = 0; |
3657 | /* | 3644 | /* |
3658 | * If nex2 extents fit in the current page, append | 3645 | * If nex2 extents fit in the current page, append |
3659 | * nex2_ep after the new extents. | 3646 | * nex2_ep after the new extents. |
3660 | */ | 3647 | */ |
3661 | if (nex2 <= ext_avail) { | 3648 | if (nex2 <= ext_avail) { |
3662 | i = erp->er_extcount; | 3649 | i = erp->er_extcount; |
3663 | } | 3650 | } |
3664 | /* | 3651 | /* |
3665 | * Otherwise, check if space is available in the | 3652 | * Otherwise, check if space is available in the |
3666 | * next page. | 3653 | * next page. |
3667 | */ | 3654 | */ |
3668 | else if ((erp_idx < nlists - 1) && | 3655 | else if ((erp_idx < nlists - 1) && |
3669 | (nex2 <= (ext_avail = XFS_LINEAR_EXTS - | 3656 | (nex2 <= (ext_avail = XFS_LINEAR_EXTS - |
3670 | ifp->if_u1.if_ext_irec[erp_idx+1].er_extcount))) { | 3657 | ifp->if_u1.if_ext_irec[erp_idx+1].er_extcount))) { |
3671 | erp_idx++; | 3658 | erp_idx++; |
3672 | erp++; | 3659 | erp++; |
3673 | /* Create a hole for nex2 extents */ | 3660 | /* Create a hole for nex2 extents */ |
3674 | memmove(&erp->er_extbuf[nex2], erp->er_extbuf, | 3661 | memmove(&erp->er_extbuf[nex2], erp->er_extbuf, |
3675 | erp->er_extcount * sizeof(xfs_bmbt_rec_t)); | 3662 | erp->er_extcount * sizeof(xfs_bmbt_rec_t)); |
3676 | } | 3663 | } |
3677 | /* | 3664 | /* |
3678 | * Final choice, create a new extent page for | 3665 | * Final choice, create a new extent page for |
3679 | * nex2 extents. | 3666 | * nex2 extents. |
3680 | */ | 3667 | */ |
3681 | else { | 3668 | else { |
3682 | erp_idx++; | 3669 | erp_idx++; |
3683 | erp = xfs_iext_irec_new(ifp, erp_idx); | 3670 | erp = xfs_iext_irec_new(ifp, erp_idx); |
3684 | } | 3671 | } |
3685 | memmove(&erp->er_extbuf[i], nex2_ep, byte_diff); | 3672 | memmove(&erp->er_extbuf[i], nex2_ep, byte_diff); |
3686 | kmem_free(nex2_ep); | 3673 | kmem_free(nex2_ep); |
3687 | erp->er_extcount += nex2; | 3674 | erp->er_extcount += nex2; |
3688 | xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, nex2); | 3675 | xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, nex2); |
3689 | } | 3676 | } |
3690 | } | 3677 | } |
3691 | 3678 | ||
3692 | /* | 3679 | /* |
3693 | * This is called when the amount of space required for incore file | 3680 | * This is called when the amount of space required for incore file |
3694 | * extents needs to be decreased. The ext_diff parameter stores the | 3681 | * extents needs to be decreased. The ext_diff parameter stores the |
3695 | * number of extents to be removed and the idx parameter contains | 3682 | * number of extents to be removed and the idx parameter contains |
3696 | * the extent index where the extents will be removed from. | 3683 | * the extent index where the extents will be removed from. |
3697 | * | 3684 | * |
3698 | * If the amount of space needed has decreased below the linear | 3685 | * If the amount of space needed has decreased below the linear |
3699 | * limit, XFS_IEXT_BUFSZ, then switch to using the contiguous | 3686 | * limit, XFS_IEXT_BUFSZ, then switch to using the contiguous |
3700 | * extent array. Otherwise, use kmem_realloc() to adjust the | 3687 | * extent array. Otherwise, use kmem_realloc() to adjust the |
3701 | * size to what is needed. | 3688 | * size to what is needed. |
3702 | */ | 3689 | */ |
3703 | void | 3690 | void |
3704 | xfs_iext_remove( | 3691 | xfs_iext_remove( |
3705 | xfs_ifork_t *ifp, /* inode fork pointer */ | 3692 | xfs_ifork_t *ifp, /* inode fork pointer */ |
3706 | xfs_extnum_t idx, /* index to begin removing exts */ | 3693 | xfs_extnum_t idx, /* index to begin removing exts */ |
3707 | int ext_diff) /* number of extents to remove */ | 3694 | int ext_diff) /* number of extents to remove */ |
3708 | { | 3695 | { |
3709 | xfs_extnum_t nextents; /* number of extents in file */ | 3696 | xfs_extnum_t nextents; /* number of extents in file */ |
3710 | int new_size; /* size of extents after removal */ | 3697 | int new_size; /* size of extents after removal */ |
3711 | 3698 | ||
3712 | ASSERT(ext_diff > 0); | 3699 | ASSERT(ext_diff > 0); |
3713 | nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); | 3700 | nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); |
3714 | new_size = (nextents - ext_diff) * sizeof(xfs_bmbt_rec_t); | 3701 | new_size = (nextents - ext_diff) * sizeof(xfs_bmbt_rec_t); |
3715 | 3702 | ||
3716 | if (new_size == 0) { | 3703 | if (new_size == 0) { |
3717 | xfs_iext_destroy(ifp); | 3704 | xfs_iext_destroy(ifp); |
3718 | } else if (ifp->if_flags & XFS_IFEXTIREC) { | 3705 | } else if (ifp->if_flags & XFS_IFEXTIREC) { |
3719 | xfs_iext_remove_indirect(ifp, idx, ext_diff); | 3706 | xfs_iext_remove_indirect(ifp, idx, ext_diff); |
3720 | } else if (ifp->if_real_bytes) { | 3707 | } else if (ifp->if_real_bytes) { |
3721 | xfs_iext_remove_direct(ifp, idx, ext_diff); | 3708 | xfs_iext_remove_direct(ifp, idx, ext_diff); |
3722 | } else { | 3709 | } else { |
3723 | xfs_iext_remove_inline(ifp, idx, ext_diff); | 3710 | xfs_iext_remove_inline(ifp, idx, ext_diff); |
3724 | } | 3711 | } |
3725 | ifp->if_bytes = new_size; | 3712 | ifp->if_bytes = new_size; |
3726 | } | 3713 | } |
3727 | 3714 | ||
3728 | /* | 3715 | /* |
3729 | * This removes ext_diff extents from the inline buffer, beginning | 3716 | * This removes ext_diff extents from the inline buffer, beginning |
3730 | * at extent index idx. | 3717 | * at extent index idx. |
3731 | */ | 3718 | */ |
3732 | void | 3719 | void |
3733 | xfs_iext_remove_inline( | 3720 | xfs_iext_remove_inline( |
3734 | xfs_ifork_t *ifp, /* inode fork pointer */ | 3721 | xfs_ifork_t *ifp, /* inode fork pointer */ |
3735 | xfs_extnum_t idx, /* index to begin removing exts */ | 3722 | xfs_extnum_t idx, /* index to begin removing exts */ |
3736 | int ext_diff) /* number of extents to remove */ | 3723 | int ext_diff) /* number of extents to remove */ |
3737 | { | 3724 | { |
3738 | int nextents; /* number of extents in file */ | 3725 | int nextents; /* number of extents in file */ |
3739 | 3726 | ||
3740 | ASSERT(!(ifp->if_flags & XFS_IFEXTIREC)); | 3727 | ASSERT(!(ifp->if_flags & XFS_IFEXTIREC)); |
3741 | ASSERT(idx < XFS_INLINE_EXTS); | 3728 | ASSERT(idx < XFS_INLINE_EXTS); |
3742 | nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); | 3729 | nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); |
3743 | ASSERT(((nextents - ext_diff) > 0) && | 3730 | ASSERT(((nextents - ext_diff) > 0) && |
3744 | (nextents - ext_diff) < XFS_INLINE_EXTS); | 3731 | (nextents - ext_diff) < XFS_INLINE_EXTS); |
3745 | 3732 | ||
3746 | if (idx + ext_diff < nextents) { | 3733 | if (idx + ext_diff < nextents) { |
3747 | memmove(&ifp->if_u2.if_inline_ext[idx], | 3734 | memmove(&ifp->if_u2.if_inline_ext[idx], |
3748 | &ifp->if_u2.if_inline_ext[idx + ext_diff], | 3735 | &ifp->if_u2.if_inline_ext[idx + ext_diff], |
3749 | (nextents - (idx + ext_diff)) * | 3736 | (nextents - (idx + ext_diff)) * |
3750 | sizeof(xfs_bmbt_rec_t)); | 3737 | sizeof(xfs_bmbt_rec_t)); |
3751 | memset(&ifp->if_u2.if_inline_ext[nextents - ext_diff], | 3738 | memset(&ifp->if_u2.if_inline_ext[nextents - ext_diff], |
3752 | 0, ext_diff * sizeof(xfs_bmbt_rec_t)); | 3739 | 0, ext_diff * sizeof(xfs_bmbt_rec_t)); |
3753 | } else { | 3740 | } else { |
3754 | memset(&ifp->if_u2.if_inline_ext[idx], 0, | 3741 | memset(&ifp->if_u2.if_inline_ext[idx], 0, |
3755 | ext_diff * sizeof(xfs_bmbt_rec_t)); | 3742 | ext_diff * sizeof(xfs_bmbt_rec_t)); |
3756 | } | 3743 | } |
3757 | } | 3744 | } |
3758 | 3745 | ||
3759 | /* | 3746 | /* |
3760 | * This removes ext_diff extents from a linear (direct) extent list, | 3747 | * This removes ext_diff extents from a linear (direct) extent list, |
3761 | * beginning at extent index idx. If the extents are being removed | 3748 | * beginning at extent index idx. If the extents are being removed |
3762 | * from the end of the list (ie. truncate) then we just need to re- | 3749 | * from the end of the list (ie. truncate) then we just need to re- |
3763 | * allocate the list to remove the extra space. Otherwise, if the | 3750 | * allocate the list to remove the extra space. Otherwise, if the |
3764 | * extents are being removed from the middle of the existing extent | 3751 | * extents are being removed from the middle of the existing extent |
3765 | * entries, then we first need to move the extent records beginning | 3752 | * entries, then we first need to move the extent records beginning |
3766 | * at idx + ext_diff up in the list to overwrite the records being | 3753 | * at idx + ext_diff up in the list to overwrite the records being |
3767 | * removed, then remove the extra space via kmem_realloc. | 3754 | * removed, then remove the extra space via kmem_realloc. |
3768 | */ | 3755 | */ |
3769 | void | 3756 | void |
3770 | xfs_iext_remove_direct( | 3757 | xfs_iext_remove_direct( |
3771 | xfs_ifork_t *ifp, /* inode fork pointer */ | 3758 | xfs_ifork_t *ifp, /* inode fork pointer */ |
3772 | xfs_extnum_t idx, /* index to begin removing exts */ | 3759 | xfs_extnum_t idx, /* index to begin removing exts */ |
3773 | int ext_diff) /* number of extents to remove */ | 3760 | int ext_diff) /* number of extents to remove */ |
3774 | { | 3761 | { |
3775 | xfs_extnum_t nextents; /* number of extents in file */ | 3762 | xfs_extnum_t nextents; /* number of extents in file */ |
3776 | int new_size; /* size of extents after removal */ | 3763 | int new_size; /* size of extents after removal */ |
3777 | 3764 | ||
3778 | ASSERT(!(ifp->if_flags & XFS_IFEXTIREC)); | 3765 | ASSERT(!(ifp->if_flags & XFS_IFEXTIREC)); |
3779 | new_size = ifp->if_bytes - | 3766 | new_size = ifp->if_bytes - |
3780 | (ext_diff * sizeof(xfs_bmbt_rec_t)); | 3767 | (ext_diff * sizeof(xfs_bmbt_rec_t)); |
3781 | nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); | 3768 | nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); |
3782 | 3769 | ||
3783 | if (new_size == 0) { | 3770 | if (new_size == 0) { |
3784 | xfs_iext_destroy(ifp); | 3771 | xfs_iext_destroy(ifp); |
3785 | return; | 3772 | return; |
3786 | } | 3773 | } |
3787 | /* Move extents up in the list (if needed) */ | 3774 | /* Move extents up in the list (if needed) */ |
3788 | if (idx + ext_diff < nextents) { | 3775 | if (idx + ext_diff < nextents) { |
3789 | memmove(&ifp->if_u1.if_extents[idx], | 3776 | memmove(&ifp->if_u1.if_extents[idx], |
3790 | &ifp->if_u1.if_extents[idx + ext_diff], | 3777 | &ifp->if_u1.if_extents[idx + ext_diff], |
3791 | (nextents - (idx + ext_diff)) * | 3778 | (nextents - (idx + ext_diff)) * |
3792 | sizeof(xfs_bmbt_rec_t)); | 3779 | sizeof(xfs_bmbt_rec_t)); |
3793 | } | 3780 | } |
3794 | memset(&ifp->if_u1.if_extents[nextents - ext_diff], | 3781 | memset(&ifp->if_u1.if_extents[nextents - ext_diff], |
3795 | 0, ext_diff * sizeof(xfs_bmbt_rec_t)); | 3782 | 0, ext_diff * sizeof(xfs_bmbt_rec_t)); |
3796 | /* | 3783 | /* |
3797 | * Reallocate the direct extent list. If the extents | 3784 | * Reallocate the direct extent list. If the extents |
3798 | * will fit inside the inode then xfs_iext_realloc_direct | 3785 | * will fit inside the inode then xfs_iext_realloc_direct |
3799 | * will switch from direct to inline extent allocation | 3786 | * will switch from direct to inline extent allocation |
3800 | * mode for us. | 3787 | * mode for us. |
3801 | */ | 3788 | */ |
3802 | xfs_iext_realloc_direct(ifp, new_size); | 3789 | xfs_iext_realloc_direct(ifp, new_size); |
3803 | ifp->if_bytes = new_size; | 3790 | ifp->if_bytes = new_size; |
3804 | } | 3791 | } |
3805 | 3792 | ||
3806 | /* | 3793 | /* |
3807 | * This is called when incore extents are being removed from the | 3794 | * This is called when incore extents are being removed from the |
3808 | * indirection array and the extents being removed span multiple extent | 3795 | * indirection array and the extents being removed span multiple extent |
3809 | * buffers. The idx parameter contains the file extent index where we | 3796 | * buffers. The idx parameter contains the file extent index where we |
3810 | * want to begin removing extents, and the count parameter contains | 3797 | * want to begin removing extents, and the count parameter contains |
3811 | * how many extents need to be removed. | 3798 | * how many extents need to be removed. |
3812 | * | 3799 | * |
3813 | * |-------| |-------| | 3800 | * |-------| |-------| |
3814 | * | nex1 | | | nex1 - number of extents before idx | 3801 | * | nex1 | | | nex1 - number of extents before idx |
3815 | * |-------| | count | | 3802 | * |-------| | count | |
3816 | * | | | | count - number of extents being removed at idx | 3803 | * | | | | count - number of extents being removed at idx |
3817 | * | count | |-------| | 3804 | * | count | |-------| |
3818 | * | | | nex2 | nex2 - number of extents after idx + count | 3805 | * | | | nex2 | nex2 - number of extents after idx + count |
3819 | * |-------| |-------| | 3806 | * |-------| |-------| |
3820 | */ | 3807 | */ |
3821 | void | 3808 | void |
3822 | xfs_iext_remove_indirect( | 3809 | xfs_iext_remove_indirect( |
3823 | xfs_ifork_t *ifp, /* inode fork pointer */ | 3810 | xfs_ifork_t *ifp, /* inode fork pointer */ |
3824 | xfs_extnum_t idx, /* index to begin removing extents */ | 3811 | xfs_extnum_t idx, /* index to begin removing extents */ |
3825 | int count) /* number of extents to remove */ | 3812 | int count) /* number of extents to remove */ |
3826 | { | 3813 | { |
3827 | xfs_ext_irec_t *erp; /* indirection array pointer */ | 3814 | xfs_ext_irec_t *erp; /* indirection array pointer */ |
3828 | int erp_idx = 0; /* indirection array index */ | 3815 | int erp_idx = 0; /* indirection array index */ |
3829 | xfs_extnum_t ext_cnt; /* extents left to remove */ | 3816 | xfs_extnum_t ext_cnt; /* extents left to remove */ |
3830 | xfs_extnum_t ext_diff; /* extents to remove in current list */ | 3817 | xfs_extnum_t ext_diff; /* extents to remove in current list */ |
3831 | xfs_extnum_t nex1; /* number of extents before idx */ | 3818 | xfs_extnum_t nex1; /* number of extents before idx */ |
3832 | xfs_extnum_t nex2; /* extents after idx + count */ | 3819 | xfs_extnum_t nex2; /* extents after idx + count */ |
3833 | int nlists; /* entries in indirection array */ | 3820 | int nlists; /* entries in indirection array */ |
3834 | int page_idx = idx; /* index in target extent list */ | 3821 | int page_idx = idx; /* index in target extent list */ |
3835 | 3822 | ||
3836 | ASSERT(ifp->if_flags & XFS_IFEXTIREC); | 3823 | ASSERT(ifp->if_flags & XFS_IFEXTIREC); |
3837 | erp = xfs_iext_idx_to_irec(ifp, &page_idx, &erp_idx, 0); | 3824 | erp = xfs_iext_idx_to_irec(ifp, &page_idx, &erp_idx, 0); |
3838 | ASSERT(erp != NULL); | 3825 | ASSERT(erp != NULL); |
3839 | nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; | 3826 | nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; |
3840 | nex1 = page_idx; | 3827 | nex1 = page_idx; |
3841 | ext_cnt = count; | 3828 | ext_cnt = count; |
3842 | while (ext_cnt) { | 3829 | while (ext_cnt) { |
3843 | nex2 = MAX((erp->er_extcount - (nex1 + ext_cnt)), 0); | 3830 | nex2 = MAX((erp->er_extcount - (nex1 + ext_cnt)), 0); |
3844 | ext_diff = MIN(ext_cnt, (erp->er_extcount - nex1)); | 3831 | ext_diff = MIN(ext_cnt, (erp->er_extcount - nex1)); |
3845 | /* | 3832 | /* |
3846 | * Check for deletion of entire list; | 3833 | * Check for deletion of entire list; |
3847 | * xfs_iext_irec_remove() updates extent offsets. | 3834 | * xfs_iext_irec_remove() updates extent offsets. |
3848 | */ | 3835 | */ |
3849 | if (ext_diff == erp->er_extcount) { | 3836 | if (ext_diff == erp->er_extcount) { |
3850 | xfs_iext_irec_remove(ifp, erp_idx); | 3837 | xfs_iext_irec_remove(ifp, erp_idx); |
3851 | ext_cnt -= ext_diff; | 3838 | ext_cnt -= ext_diff; |
3852 | nex1 = 0; | 3839 | nex1 = 0; |
3853 | if (ext_cnt) { | 3840 | if (ext_cnt) { |
3854 | ASSERT(erp_idx < ifp->if_real_bytes / | 3841 | ASSERT(erp_idx < ifp->if_real_bytes / |
3855 | XFS_IEXT_BUFSZ); | 3842 | XFS_IEXT_BUFSZ); |
3856 | erp = &ifp->if_u1.if_ext_irec[erp_idx]; | 3843 | erp = &ifp->if_u1.if_ext_irec[erp_idx]; |
3857 | nex1 = 0; | 3844 | nex1 = 0; |
3858 | continue; | 3845 | continue; |
3859 | } else { | 3846 | } else { |
3860 | break; | 3847 | break; |
3861 | } | 3848 | } |
3862 | } | 3849 | } |
3863 | /* Move extents up (if needed) */ | 3850 | /* Move extents up (if needed) */ |
3864 | if (nex2) { | 3851 | if (nex2) { |
3865 | memmove(&erp->er_extbuf[nex1], | 3852 | memmove(&erp->er_extbuf[nex1], |
3866 | &erp->er_extbuf[nex1 + ext_diff], | 3853 | &erp->er_extbuf[nex1 + ext_diff], |
3867 | nex2 * sizeof(xfs_bmbt_rec_t)); | 3854 | nex2 * sizeof(xfs_bmbt_rec_t)); |
3868 | } | 3855 | } |
3869 | /* Zero out rest of page */ | 3856 | /* Zero out rest of page */ |
3870 | memset(&erp->er_extbuf[nex1 + nex2], 0, (XFS_IEXT_BUFSZ - | 3857 | memset(&erp->er_extbuf[nex1 + nex2], 0, (XFS_IEXT_BUFSZ - |
3871 | ((nex1 + nex2) * sizeof(xfs_bmbt_rec_t)))); | 3858 | ((nex1 + nex2) * sizeof(xfs_bmbt_rec_t)))); |
3872 | /* Update remaining counters */ | 3859 | /* Update remaining counters */ |
3873 | erp->er_extcount -= ext_diff; | 3860 | erp->er_extcount -= ext_diff; |
3874 | xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, -ext_diff); | 3861 | xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, -ext_diff); |
3875 | ext_cnt -= ext_diff; | 3862 | ext_cnt -= ext_diff; |
3876 | nex1 = 0; | 3863 | nex1 = 0; |
3877 | erp_idx++; | 3864 | erp_idx++; |
3878 | erp++; | 3865 | erp++; |
3879 | } | 3866 | } |
3880 | ifp->if_bytes -= count * sizeof(xfs_bmbt_rec_t); | 3867 | ifp->if_bytes -= count * sizeof(xfs_bmbt_rec_t); |
3881 | xfs_iext_irec_compact(ifp); | 3868 | xfs_iext_irec_compact(ifp); |
3882 | } | 3869 | } |
3883 | 3870 | ||
3884 | /* | 3871 | /* |
3885 | * Create, destroy, or resize a linear (direct) block of extents. | 3872 | * Create, destroy, or resize a linear (direct) block of extents. |
3886 | */ | 3873 | */ |
3887 | void | 3874 | void |
3888 | xfs_iext_realloc_direct( | 3875 | xfs_iext_realloc_direct( |
3889 | xfs_ifork_t *ifp, /* inode fork pointer */ | 3876 | xfs_ifork_t *ifp, /* inode fork pointer */ |
3890 | int new_size) /* new size of extents */ | 3877 | int new_size) /* new size of extents */ |
3891 | { | 3878 | { |
3892 | int rnew_size; /* real new size of extents */ | 3879 | int rnew_size; /* real new size of extents */ |
3893 | 3880 | ||
3894 | rnew_size = new_size; | 3881 | rnew_size = new_size; |
3895 | 3882 | ||
3896 | ASSERT(!(ifp->if_flags & XFS_IFEXTIREC) || | 3883 | ASSERT(!(ifp->if_flags & XFS_IFEXTIREC) || |
3897 | ((new_size >= 0) && (new_size <= XFS_IEXT_BUFSZ) && | 3884 | ((new_size >= 0) && (new_size <= XFS_IEXT_BUFSZ) && |
3898 | (new_size != ifp->if_real_bytes))); | 3885 | (new_size != ifp->if_real_bytes))); |
3899 | 3886 | ||
3900 | /* Free extent records */ | 3887 | /* Free extent records */ |
3901 | if (new_size == 0) { | 3888 | if (new_size == 0) { |
3902 | xfs_iext_destroy(ifp); | 3889 | xfs_iext_destroy(ifp); |
3903 | } | 3890 | } |
3904 | /* Resize direct extent list and zero any new bytes */ | 3891 | /* Resize direct extent list and zero any new bytes */ |
3905 | else if (ifp->if_real_bytes) { | 3892 | else if (ifp->if_real_bytes) { |
3906 | /* Check if extents will fit inside the inode */ | 3893 | /* Check if extents will fit inside the inode */ |
3907 | if (new_size <= XFS_INLINE_EXTS * sizeof(xfs_bmbt_rec_t)) { | 3894 | if (new_size <= XFS_INLINE_EXTS * sizeof(xfs_bmbt_rec_t)) { |
3908 | xfs_iext_direct_to_inline(ifp, new_size / | 3895 | xfs_iext_direct_to_inline(ifp, new_size / |
3909 | (uint)sizeof(xfs_bmbt_rec_t)); | 3896 | (uint)sizeof(xfs_bmbt_rec_t)); |
3910 | ifp->if_bytes = new_size; | 3897 | ifp->if_bytes = new_size; |
3911 | return; | 3898 | return; |
3912 | } | 3899 | } |
3913 | if (!is_power_of_2(new_size)){ | 3900 | if (!is_power_of_2(new_size)){ |
3914 | rnew_size = roundup_pow_of_two(new_size); | 3901 | rnew_size = roundup_pow_of_two(new_size); |
3915 | } | 3902 | } |
3916 | if (rnew_size != ifp->if_real_bytes) { | 3903 | if (rnew_size != ifp->if_real_bytes) { |
3917 | ifp->if_u1.if_extents = | 3904 | ifp->if_u1.if_extents = |
3918 | kmem_realloc(ifp->if_u1.if_extents, | 3905 | kmem_realloc(ifp->if_u1.if_extents, |
3919 | rnew_size, | 3906 | rnew_size, |
3920 | ifp->if_real_bytes, KM_NOFS); | 3907 | ifp->if_real_bytes, KM_NOFS); |
3921 | } | 3908 | } |
3922 | if (rnew_size > ifp->if_real_bytes) { | 3909 | if (rnew_size > ifp->if_real_bytes) { |
3923 | memset(&ifp->if_u1.if_extents[ifp->if_bytes / | 3910 | memset(&ifp->if_u1.if_extents[ifp->if_bytes / |
3924 | (uint)sizeof(xfs_bmbt_rec_t)], 0, | 3911 | (uint)sizeof(xfs_bmbt_rec_t)], 0, |
3925 | rnew_size - ifp->if_real_bytes); | 3912 | rnew_size - ifp->if_real_bytes); |
3926 | } | 3913 | } |
3927 | } | 3914 | } |
3928 | /* | 3915 | /* |
3929 | * Switch from the inline extent buffer to a direct | 3916 | * Switch from the inline extent buffer to a direct |
3930 | * extent list. Be sure to include the inline extent | 3917 | * extent list. Be sure to include the inline extent |
3931 | * bytes in new_size. | 3918 | * bytes in new_size. |
3932 | */ | 3919 | */ |
3933 | else { | 3920 | else { |
3934 | new_size += ifp->if_bytes; | 3921 | new_size += ifp->if_bytes; |
3935 | if (!is_power_of_2(new_size)) { | 3922 | if (!is_power_of_2(new_size)) { |
3936 | rnew_size = roundup_pow_of_two(new_size); | 3923 | rnew_size = roundup_pow_of_two(new_size); |
3937 | } | 3924 | } |
3938 | xfs_iext_inline_to_direct(ifp, rnew_size); | 3925 | xfs_iext_inline_to_direct(ifp, rnew_size); |
3939 | } | 3926 | } |
3940 | ifp->if_real_bytes = rnew_size; | 3927 | ifp->if_real_bytes = rnew_size; |
3941 | ifp->if_bytes = new_size; | 3928 | ifp->if_bytes = new_size; |
3942 | } | 3929 | } |
3943 | 3930 | ||
3944 | /* | 3931 | /* |
3945 | * Switch from linear (direct) extent records to inline buffer. | 3932 | * Switch from linear (direct) extent records to inline buffer. |
3946 | */ | 3933 | */ |
3947 | void | 3934 | void |
3948 | xfs_iext_direct_to_inline( | 3935 | xfs_iext_direct_to_inline( |
3949 | xfs_ifork_t *ifp, /* inode fork pointer */ | 3936 | xfs_ifork_t *ifp, /* inode fork pointer */ |
3950 | xfs_extnum_t nextents) /* number of extents in file */ | 3937 | xfs_extnum_t nextents) /* number of extents in file */ |
3951 | { | 3938 | { |
3952 | ASSERT(ifp->if_flags & XFS_IFEXTENTS); | 3939 | ASSERT(ifp->if_flags & XFS_IFEXTENTS); |
3953 | ASSERT(nextents <= XFS_INLINE_EXTS); | 3940 | ASSERT(nextents <= XFS_INLINE_EXTS); |
3954 | /* | 3941 | /* |
3955 | * The inline buffer was zeroed when we switched | 3942 | * The inline buffer was zeroed when we switched |
3956 | * from inline to direct extent allocation mode, | 3943 | * from inline to direct extent allocation mode, |
3957 | * so we don't need to clear it here. | 3944 | * so we don't need to clear it here. |
3958 | */ | 3945 | */ |
3959 | memcpy(ifp->if_u2.if_inline_ext, ifp->if_u1.if_extents, | 3946 | memcpy(ifp->if_u2.if_inline_ext, ifp->if_u1.if_extents, |
3960 | nextents * sizeof(xfs_bmbt_rec_t)); | 3947 | nextents * sizeof(xfs_bmbt_rec_t)); |
3961 | kmem_free(ifp->if_u1.if_extents); | 3948 | kmem_free(ifp->if_u1.if_extents); |
3962 | ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext; | 3949 | ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext; |
3963 | ifp->if_real_bytes = 0; | 3950 | ifp->if_real_bytes = 0; |
3964 | } | 3951 | } |
3965 | 3952 | ||
3966 | /* | 3953 | /* |
3967 | * Switch from inline buffer to linear (direct) extent records. | 3954 | * Switch from inline buffer to linear (direct) extent records. |
3968 | * new_size should already be rounded up to the next power of 2 | 3955 | * new_size should already be rounded up to the next power of 2 |
3969 | * by the caller (when appropriate), so use new_size as it is. | 3956 | * by the caller (when appropriate), so use new_size as it is. |
3970 | * However, since new_size may be rounded up, we can't update | 3957 | * However, since new_size may be rounded up, we can't update |
3971 | * if_bytes here. It is the caller's responsibility to update | 3958 | * if_bytes here. It is the caller's responsibility to update |
3972 | * if_bytes upon return. | 3959 | * if_bytes upon return. |
3973 | */ | 3960 | */ |
3974 | void | 3961 | void |
3975 | xfs_iext_inline_to_direct( | 3962 | xfs_iext_inline_to_direct( |
3976 | xfs_ifork_t *ifp, /* inode fork pointer */ | 3963 | xfs_ifork_t *ifp, /* inode fork pointer */ |
3977 | int new_size) /* number of extents in file */ | 3964 | int new_size) /* number of extents in file */ |
3978 | { | 3965 | { |
3979 | ifp->if_u1.if_extents = kmem_alloc(new_size, KM_NOFS); | 3966 | ifp->if_u1.if_extents = kmem_alloc(new_size, KM_NOFS); |
3980 | memset(ifp->if_u1.if_extents, 0, new_size); | 3967 | memset(ifp->if_u1.if_extents, 0, new_size); |
3981 | if (ifp->if_bytes) { | 3968 | if (ifp->if_bytes) { |
3982 | memcpy(ifp->if_u1.if_extents, ifp->if_u2.if_inline_ext, | 3969 | memcpy(ifp->if_u1.if_extents, ifp->if_u2.if_inline_ext, |
3983 | ifp->if_bytes); | 3970 | ifp->if_bytes); |
3984 | memset(ifp->if_u2.if_inline_ext, 0, XFS_INLINE_EXTS * | 3971 | memset(ifp->if_u2.if_inline_ext, 0, XFS_INLINE_EXTS * |
3985 | sizeof(xfs_bmbt_rec_t)); | 3972 | sizeof(xfs_bmbt_rec_t)); |
3986 | } | 3973 | } |
3987 | ifp->if_real_bytes = new_size; | 3974 | ifp->if_real_bytes = new_size; |
3988 | } | 3975 | } |
3989 | 3976 | ||
3990 | /* | 3977 | /* |
3991 | * Resize an extent indirection array to new_size bytes. | 3978 | * Resize an extent indirection array to new_size bytes. |
3992 | */ | 3979 | */ |
3993 | void | 3980 | void |
3994 | xfs_iext_realloc_indirect( | 3981 | xfs_iext_realloc_indirect( |
3995 | xfs_ifork_t *ifp, /* inode fork pointer */ | 3982 | xfs_ifork_t *ifp, /* inode fork pointer */ |
3996 | int new_size) /* new indirection array size */ | 3983 | int new_size) /* new indirection array size */ |
3997 | { | 3984 | { |
3998 | int nlists; /* number of irec's (ex lists) */ | 3985 | int nlists; /* number of irec's (ex lists) */ |
3999 | int size; /* current indirection array size */ | 3986 | int size; /* current indirection array size */ |
4000 | 3987 | ||
4001 | ASSERT(ifp->if_flags & XFS_IFEXTIREC); | 3988 | ASSERT(ifp->if_flags & XFS_IFEXTIREC); |
4002 | nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; | 3989 | nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; |
4003 | size = nlists * sizeof(xfs_ext_irec_t); | 3990 | size = nlists * sizeof(xfs_ext_irec_t); |
4004 | ASSERT(ifp->if_real_bytes); | 3991 | ASSERT(ifp->if_real_bytes); |
4005 | ASSERT((new_size >= 0) && (new_size != size)); | 3992 | ASSERT((new_size >= 0) && (new_size != size)); |
4006 | if (new_size == 0) { | 3993 | if (new_size == 0) { |
4007 | xfs_iext_destroy(ifp); | 3994 | xfs_iext_destroy(ifp); |
4008 | } else { | 3995 | } else { |
4009 | ifp->if_u1.if_ext_irec = (xfs_ext_irec_t *) | 3996 | ifp->if_u1.if_ext_irec = (xfs_ext_irec_t *) |
4010 | kmem_realloc(ifp->if_u1.if_ext_irec, | 3997 | kmem_realloc(ifp->if_u1.if_ext_irec, |
4011 | new_size, size, KM_NOFS); | 3998 | new_size, size, KM_NOFS); |
4012 | } | 3999 | } |
4013 | } | 4000 | } |
4014 | 4001 | ||
4015 | /* | 4002 | /* |
4016 | * Switch from indirection array to linear (direct) extent allocations. | 4003 | * Switch from indirection array to linear (direct) extent allocations. |
4017 | */ | 4004 | */ |
4018 | void | 4005 | void |
4019 | xfs_iext_indirect_to_direct( | 4006 | xfs_iext_indirect_to_direct( |
4020 | xfs_ifork_t *ifp) /* inode fork pointer */ | 4007 | xfs_ifork_t *ifp) /* inode fork pointer */ |
4021 | { | 4008 | { |
4022 | xfs_bmbt_rec_host_t *ep; /* extent record pointer */ | 4009 | xfs_bmbt_rec_host_t *ep; /* extent record pointer */ |
4023 | xfs_extnum_t nextents; /* number of extents in file */ | 4010 | xfs_extnum_t nextents; /* number of extents in file */ |
4024 | int size; /* size of file extents */ | 4011 | int size; /* size of file extents */ |
4025 | 4012 | ||
4026 | ASSERT(ifp->if_flags & XFS_IFEXTIREC); | 4013 | ASSERT(ifp->if_flags & XFS_IFEXTIREC); |
4027 | nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); | 4014 | nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); |
4028 | ASSERT(nextents <= XFS_LINEAR_EXTS); | 4015 | ASSERT(nextents <= XFS_LINEAR_EXTS); |
4029 | size = nextents * sizeof(xfs_bmbt_rec_t); | 4016 | size = nextents * sizeof(xfs_bmbt_rec_t); |
4030 | 4017 | ||
4031 | xfs_iext_irec_compact_pages(ifp); | 4018 | xfs_iext_irec_compact_pages(ifp); |
4032 | ASSERT(ifp->if_real_bytes == XFS_IEXT_BUFSZ); | 4019 | ASSERT(ifp->if_real_bytes == XFS_IEXT_BUFSZ); |
4033 | 4020 | ||
4034 | ep = ifp->if_u1.if_ext_irec->er_extbuf; | 4021 | ep = ifp->if_u1.if_ext_irec->er_extbuf; |
4035 | kmem_free(ifp->if_u1.if_ext_irec); | 4022 | kmem_free(ifp->if_u1.if_ext_irec); |
4036 | ifp->if_flags &= ~XFS_IFEXTIREC; | 4023 | ifp->if_flags &= ~XFS_IFEXTIREC; |
4037 | ifp->if_u1.if_extents = ep; | 4024 | ifp->if_u1.if_extents = ep; |
4038 | ifp->if_bytes = size; | 4025 | ifp->if_bytes = size; |
4039 | if (nextents < XFS_LINEAR_EXTS) { | 4026 | if (nextents < XFS_LINEAR_EXTS) { |
4040 | xfs_iext_realloc_direct(ifp, size); | 4027 | xfs_iext_realloc_direct(ifp, size); |
4041 | } | 4028 | } |
4042 | } | 4029 | } |
4043 | 4030 | ||
4044 | /* | 4031 | /* |
4045 | * Free incore file extents. | 4032 | * Free incore file extents. |
4046 | */ | 4033 | */ |
4047 | void | 4034 | void |
4048 | xfs_iext_destroy( | 4035 | xfs_iext_destroy( |
4049 | xfs_ifork_t *ifp) /* inode fork pointer */ | 4036 | xfs_ifork_t *ifp) /* inode fork pointer */ |
4050 | { | 4037 | { |
4051 | if (ifp->if_flags & XFS_IFEXTIREC) { | 4038 | if (ifp->if_flags & XFS_IFEXTIREC) { |
4052 | int erp_idx; | 4039 | int erp_idx; |
4053 | int nlists; | 4040 | int nlists; |
4054 | 4041 | ||
4055 | nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; | 4042 | nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; |
4056 | for (erp_idx = nlists - 1; erp_idx >= 0 ; erp_idx--) { | 4043 | for (erp_idx = nlists - 1; erp_idx >= 0 ; erp_idx--) { |
4057 | xfs_iext_irec_remove(ifp, erp_idx); | 4044 | xfs_iext_irec_remove(ifp, erp_idx); |
4058 | } | 4045 | } |
4059 | ifp->if_flags &= ~XFS_IFEXTIREC; | 4046 | ifp->if_flags &= ~XFS_IFEXTIREC; |
4060 | } else if (ifp->if_real_bytes) { | 4047 | } else if (ifp->if_real_bytes) { |
4061 | kmem_free(ifp->if_u1.if_extents); | 4048 | kmem_free(ifp->if_u1.if_extents); |
4062 | } else if (ifp->if_bytes) { | 4049 | } else if (ifp->if_bytes) { |
4063 | memset(ifp->if_u2.if_inline_ext, 0, XFS_INLINE_EXTS * | 4050 | memset(ifp->if_u2.if_inline_ext, 0, XFS_INLINE_EXTS * |
4064 | sizeof(xfs_bmbt_rec_t)); | 4051 | sizeof(xfs_bmbt_rec_t)); |
4065 | } | 4052 | } |
4066 | ifp->if_u1.if_extents = NULL; | 4053 | ifp->if_u1.if_extents = NULL; |
4067 | ifp->if_real_bytes = 0; | 4054 | ifp->if_real_bytes = 0; |
4068 | ifp->if_bytes = 0; | 4055 | ifp->if_bytes = 0; |
4069 | } | 4056 | } |
4070 | 4057 | ||
4071 | /* | 4058 | /* |
4072 | * Return a pointer to the extent record for file system block bno. | 4059 | * Return a pointer to the extent record for file system block bno. |
4073 | */ | 4060 | */ |
4074 | xfs_bmbt_rec_host_t * /* pointer to found extent record */ | 4061 | xfs_bmbt_rec_host_t * /* pointer to found extent record */ |
4075 | xfs_iext_bno_to_ext( | 4062 | xfs_iext_bno_to_ext( |
4076 | xfs_ifork_t *ifp, /* inode fork pointer */ | 4063 | xfs_ifork_t *ifp, /* inode fork pointer */ |
4077 | xfs_fileoff_t bno, /* block number to search for */ | 4064 | xfs_fileoff_t bno, /* block number to search for */ |
4078 | xfs_extnum_t *idxp) /* index of target extent */ | 4065 | xfs_extnum_t *idxp) /* index of target extent */ |
4079 | { | 4066 | { |
4080 | xfs_bmbt_rec_host_t *base; /* pointer to first extent */ | 4067 | xfs_bmbt_rec_host_t *base; /* pointer to first extent */ |
4081 | xfs_filblks_t blockcount = 0; /* number of blocks in extent */ | 4068 | xfs_filblks_t blockcount = 0; /* number of blocks in extent */ |
4082 | xfs_bmbt_rec_host_t *ep = NULL; /* pointer to target extent */ | 4069 | xfs_bmbt_rec_host_t *ep = NULL; /* pointer to target extent */ |
4083 | xfs_ext_irec_t *erp = NULL; /* indirection array pointer */ | 4070 | xfs_ext_irec_t *erp = NULL; /* indirection array pointer */ |
4084 | int high; /* upper boundary in search */ | 4071 | int high; /* upper boundary in search */ |
4085 | xfs_extnum_t idx = 0; /* index of target extent */ | 4072 | xfs_extnum_t idx = 0; /* index of target extent */ |
4086 | int low; /* lower boundary in search */ | 4073 | int low; /* lower boundary in search */ |
4087 | xfs_extnum_t nextents; /* number of file extents */ | 4074 | xfs_extnum_t nextents; /* number of file extents */ |
4088 | xfs_fileoff_t startoff = 0; /* start offset of extent */ | 4075 | xfs_fileoff_t startoff = 0; /* start offset of extent */ |
4089 | 4076 | ||
4090 | nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); | 4077 | nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); |
4091 | if (nextents == 0) { | 4078 | if (nextents == 0) { |
4092 | *idxp = 0; | 4079 | *idxp = 0; |
4093 | return NULL; | 4080 | return NULL; |
4094 | } | 4081 | } |
4095 | low = 0; | 4082 | low = 0; |
4096 | if (ifp->if_flags & XFS_IFEXTIREC) { | 4083 | if (ifp->if_flags & XFS_IFEXTIREC) { |
4097 | /* Find target extent list */ | 4084 | /* Find target extent list */ |
4098 | int erp_idx = 0; | 4085 | int erp_idx = 0; |
4099 | erp = xfs_iext_bno_to_irec(ifp, bno, &erp_idx); | 4086 | erp = xfs_iext_bno_to_irec(ifp, bno, &erp_idx); |
4100 | base = erp->er_extbuf; | 4087 | base = erp->er_extbuf; |
4101 | high = erp->er_extcount - 1; | 4088 | high = erp->er_extcount - 1; |
4102 | } else { | 4089 | } else { |
4103 | base = ifp->if_u1.if_extents; | 4090 | base = ifp->if_u1.if_extents; |
4104 | high = nextents - 1; | 4091 | high = nextents - 1; |
4105 | } | 4092 | } |
4106 | /* Binary search extent records */ | 4093 | /* Binary search extent records */ |
4107 | while (low <= high) { | 4094 | while (low <= high) { |
4108 | idx = (low + high) >> 1; | 4095 | idx = (low + high) >> 1; |
4109 | ep = base + idx; | 4096 | ep = base + idx; |
4110 | startoff = xfs_bmbt_get_startoff(ep); | 4097 | startoff = xfs_bmbt_get_startoff(ep); |
4111 | blockcount = xfs_bmbt_get_blockcount(ep); | 4098 | blockcount = xfs_bmbt_get_blockcount(ep); |
4112 | if (bno < startoff) { | 4099 | if (bno < startoff) { |
4113 | high = idx - 1; | 4100 | high = idx - 1; |
4114 | } else if (bno >= startoff + blockcount) { | 4101 | } else if (bno >= startoff + blockcount) { |
4115 | low = idx + 1; | 4102 | low = idx + 1; |
4116 | } else { | 4103 | } else { |
4117 | /* Convert back to file-based extent index */ | 4104 | /* Convert back to file-based extent index */ |
4118 | if (ifp->if_flags & XFS_IFEXTIREC) { | 4105 | if (ifp->if_flags & XFS_IFEXTIREC) { |
4119 | idx += erp->er_extoff; | 4106 | idx += erp->er_extoff; |
4120 | } | 4107 | } |
4121 | *idxp = idx; | 4108 | *idxp = idx; |
4122 | return ep; | 4109 | return ep; |
4123 | } | 4110 | } |
4124 | } | 4111 | } |
4125 | /* Convert back to file-based extent index */ | 4112 | /* Convert back to file-based extent index */ |
4126 | if (ifp->if_flags & XFS_IFEXTIREC) { | 4113 | if (ifp->if_flags & XFS_IFEXTIREC) { |
4127 | idx += erp->er_extoff; | 4114 | idx += erp->er_extoff; |
4128 | } | 4115 | } |
4129 | if (bno >= startoff + blockcount) { | 4116 | if (bno >= startoff + blockcount) { |
4130 | if (++idx == nextents) { | 4117 | if (++idx == nextents) { |
4131 | ep = NULL; | 4118 | ep = NULL; |
4132 | } else { | 4119 | } else { |
4133 | ep = xfs_iext_get_ext(ifp, idx); | 4120 | ep = xfs_iext_get_ext(ifp, idx); |
4134 | } | 4121 | } |
4135 | } | 4122 | } |
4136 | *idxp = idx; | 4123 | *idxp = idx; |
4137 | return ep; | 4124 | return ep; |
4138 | } | 4125 | } |
4139 | 4126 | ||
4140 | /* | 4127 | /* |
4141 | * Return a pointer to the indirection array entry containing the | 4128 | * Return a pointer to the indirection array entry containing the |
4142 | * extent record for filesystem block bno. Store the index of the | 4129 | * extent record for filesystem block bno. Store the index of the |
4143 | * target irec in *erp_idxp. | 4130 | * target irec in *erp_idxp. |
4144 | */ | 4131 | */ |
4145 | xfs_ext_irec_t * /* pointer to found extent record */ | 4132 | xfs_ext_irec_t * /* pointer to found extent record */ |
4146 | xfs_iext_bno_to_irec( | 4133 | xfs_iext_bno_to_irec( |
4147 | xfs_ifork_t *ifp, /* inode fork pointer */ | 4134 | xfs_ifork_t *ifp, /* inode fork pointer */ |
4148 | xfs_fileoff_t bno, /* block number to search for */ | 4135 | xfs_fileoff_t bno, /* block number to search for */ |
4149 | int *erp_idxp) /* irec index of target ext list */ | 4136 | int *erp_idxp) /* irec index of target ext list */ |
4150 | { | 4137 | { |
4151 | xfs_ext_irec_t *erp = NULL; /* indirection array pointer */ | 4138 | xfs_ext_irec_t *erp = NULL; /* indirection array pointer */ |
4152 | xfs_ext_irec_t *erp_next; /* next indirection array entry */ | 4139 | xfs_ext_irec_t *erp_next; /* next indirection array entry */ |
4153 | int erp_idx; /* indirection array index */ | 4140 | int erp_idx; /* indirection array index */ |
4154 | int nlists; /* number of extent irec's (lists) */ | 4141 | int nlists; /* number of extent irec's (lists) */ |
4155 | int high; /* binary search upper limit */ | 4142 | int high; /* binary search upper limit */ |
4156 | int low; /* binary search lower limit */ | 4143 | int low; /* binary search lower limit */ |
4157 | 4144 | ||
4158 | ASSERT(ifp->if_flags & XFS_IFEXTIREC); | 4145 | ASSERT(ifp->if_flags & XFS_IFEXTIREC); |
4159 | nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; | 4146 | nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; |
4160 | erp_idx = 0; | 4147 | erp_idx = 0; |
4161 | low = 0; | 4148 | low = 0; |
4162 | high = nlists - 1; | 4149 | high = nlists - 1; |
4163 | while (low <= high) { | 4150 | while (low <= high) { |
4164 | erp_idx = (low + high) >> 1; | 4151 | erp_idx = (low + high) >> 1; |
4165 | erp = &ifp->if_u1.if_ext_irec[erp_idx]; | 4152 | erp = &ifp->if_u1.if_ext_irec[erp_idx]; |
4166 | erp_next = erp_idx < nlists - 1 ? erp + 1 : NULL; | 4153 | erp_next = erp_idx < nlists - 1 ? erp + 1 : NULL; |
4167 | if (bno < xfs_bmbt_get_startoff(erp->er_extbuf)) { | 4154 | if (bno < xfs_bmbt_get_startoff(erp->er_extbuf)) { |
4168 | high = erp_idx - 1; | 4155 | high = erp_idx - 1; |
4169 | } else if (erp_next && bno >= | 4156 | } else if (erp_next && bno >= |
4170 | xfs_bmbt_get_startoff(erp_next->er_extbuf)) { | 4157 | xfs_bmbt_get_startoff(erp_next->er_extbuf)) { |
4171 | low = erp_idx + 1; | 4158 | low = erp_idx + 1; |
4172 | } else { | 4159 | } else { |
4173 | break; | 4160 | break; |
4174 | } | 4161 | } |
4175 | } | 4162 | } |
4176 | *erp_idxp = erp_idx; | 4163 | *erp_idxp = erp_idx; |
4177 | return erp; | 4164 | return erp; |
4178 | } | 4165 | } |
4179 | 4166 | ||
4180 | /* | 4167 | /* |
4181 | * Return a pointer to the indirection array entry containing the | 4168 | * Return a pointer to the indirection array entry containing the |
4182 | * extent record at file extent index *idxp. Store the index of the | 4169 | * extent record at file extent index *idxp. Store the index of the |
4183 | * target irec in *erp_idxp and store the page index of the target | 4170 | * target irec in *erp_idxp and store the page index of the target |
4184 | * extent record in *idxp. | 4171 | * extent record in *idxp. |
4185 | */ | 4172 | */ |
4186 | xfs_ext_irec_t * | 4173 | xfs_ext_irec_t * |
4187 | xfs_iext_idx_to_irec( | 4174 | xfs_iext_idx_to_irec( |
4188 | xfs_ifork_t *ifp, /* inode fork pointer */ | 4175 | xfs_ifork_t *ifp, /* inode fork pointer */ |
4189 | xfs_extnum_t *idxp, /* extent index (file -> page) */ | 4176 | xfs_extnum_t *idxp, /* extent index (file -> page) */ |
4190 | int *erp_idxp, /* pointer to target irec */ | 4177 | int *erp_idxp, /* pointer to target irec */ |
4191 | int realloc) /* new bytes were just added */ | 4178 | int realloc) /* new bytes were just added */ |
4192 | { | 4179 | { |
4193 | xfs_ext_irec_t *prev; /* pointer to previous irec */ | 4180 | xfs_ext_irec_t *prev; /* pointer to previous irec */ |
4194 | xfs_ext_irec_t *erp = NULL; /* pointer to current irec */ | 4181 | xfs_ext_irec_t *erp = NULL; /* pointer to current irec */ |
4195 | int erp_idx; /* indirection array index */ | 4182 | int erp_idx; /* indirection array index */ |
4196 | int nlists; /* number of irec's (ex lists) */ | 4183 | int nlists; /* number of irec's (ex lists) */ |
4197 | int high; /* binary search upper limit */ | 4184 | int high; /* binary search upper limit */ |
4198 | int low; /* binary search lower limit */ | 4185 | int low; /* binary search lower limit */ |
4199 | xfs_extnum_t page_idx = *idxp; /* extent index in target list */ | 4186 | xfs_extnum_t page_idx = *idxp; /* extent index in target list */ |
4200 | 4187 | ||
4201 | ASSERT(ifp->if_flags & XFS_IFEXTIREC); | 4188 | ASSERT(ifp->if_flags & XFS_IFEXTIREC); |
4202 | ASSERT(page_idx >= 0 && page_idx <= | 4189 | ASSERT(page_idx >= 0 && page_idx <= |
4203 | ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t)); | 4190 | ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t)); |
4204 | nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; | 4191 | nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; |
4205 | erp_idx = 0; | 4192 | erp_idx = 0; |
4206 | low = 0; | 4193 | low = 0; |
4207 | high = nlists - 1; | 4194 | high = nlists - 1; |
4208 | 4195 | ||
4209 | /* Binary search extent irec's */ | 4196 | /* Binary search extent irec's */ |
4210 | while (low <= high) { | 4197 | while (low <= high) { |
4211 | erp_idx = (low + high) >> 1; | 4198 | erp_idx = (low + high) >> 1; |
4212 | erp = &ifp->if_u1.if_ext_irec[erp_idx]; | 4199 | erp = &ifp->if_u1.if_ext_irec[erp_idx]; |
4213 | prev = erp_idx > 0 ? erp - 1 : NULL; | 4200 | prev = erp_idx > 0 ? erp - 1 : NULL; |
4214 | if (page_idx < erp->er_extoff || (page_idx == erp->er_extoff && | 4201 | if (page_idx < erp->er_extoff || (page_idx == erp->er_extoff && |
4215 | realloc && prev && prev->er_extcount < XFS_LINEAR_EXTS)) { | 4202 | realloc && prev && prev->er_extcount < XFS_LINEAR_EXTS)) { |
4216 | high = erp_idx - 1; | 4203 | high = erp_idx - 1; |
4217 | } else if (page_idx > erp->er_extoff + erp->er_extcount || | 4204 | } else if (page_idx > erp->er_extoff + erp->er_extcount || |
4218 | (page_idx == erp->er_extoff + erp->er_extcount && | 4205 | (page_idx == erp->er_extoff + erp->er_extcount && |
4219 | !realloc)) { | 4206 | !realloc)) { |
4220 | low = erp_idx + 1; | 4207 | low = erp_idx + 1; |
4221 | } else if (page_idx == erp->er_extoff + erp->er_extcount && | 4208 | } else if (page_idx == erp->er_extoff + erp->er_extcount && |
4222 | erp->er_extcount == XFS_LINEAR_EXTS) { | 4209 | erp->er_extcount == XFS_LINEAR_EXTS) { |
4223 | ASSERT(realloc); | 4210 | ASSERT(realloc); |
4224 | page_idx = 0; | 4211 | page_idx = 0; |
4225 | erp_idx++; | 4212 | erp_idx++; |
4226 | erp = erp_idx < nlists ? erp + 1 : NULL; | 4213 | erp = erp_idx < nlists ? erp + 1 : NULL; |
4227 | break; | 4214 | break; |
4228 | } else { | 4215 | } else { |
4229 | page_idx -= erp->er_extoff; | 4216 | page_idx -= erp->er_extoff; |
4230 | break; | 4217 | break; |
4231 | } | 4218 | } |
4232 | } | 4219 | } |
4233 | *idxp = page_idx; | 4220 | *idxp = page_idx; |
4234 | *erp_idxp = erp_idx; | 4221 | *erp_idxp = erp_idx; |
4235 | return(erp); | 4222 | return(erp); |
4236 | } | 4223 | } |
4237 | 4224 | ||
4238 | /* | 4225 | /* |
4239 | * Allocate and initialize an indirection array once the space needed | 4226 | * Allocate and initialize an indirection array once the space needed |
4240 | * for incore extents increases above XFS_IEXT_BUFSZ. | 4227 | * for incore extents increases above XFS_IEXT_BUFSZ. |
4241 | */ | 4228 | */ |
4242 | void | 4229 | void |
4243 | xfs_iext_irec_init( | 4230 | xfs_iext_irec_init( |
4244 | xfs_ifork_t *ifp) /* inode fork pointer */ | 4231 | xfs_ifork_t *ifp) /* inode fork pointer */ |
4245 | { | 4232 | { |
4246 | xfs_ext_irec_t *erp; /* indirection array pointer */ | 4233 | xfs_ext_irec_t *erp; /* indirection array pointer */ |
4247 | xfs_extnum_t nextents; /* number of extents in file */ | 4234 | xfs_extnum_t nextents; /* number of extents in file */ |
4248 | 4235 | ||
4249 | ASSERT(!(ifp->if_flags & XFS_IFEXTIREC)); | 4236 | ASSERT(!(ifp->if_flags & XFS_IFEXTIREC)); |
4250 | nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); | 4237 | nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); |
4251 | ASSERT(nextents <= XFS_LINEAR_EXTS); | 4238 | ASSERT(nextents <= XFS_LINEAR_EXTS); |
4252 | 4239 | ||
4253 | erp = kmem_alloc(sizeof(xfs_ext_irec_t), KM_NOFS); | 4240 | erp = kmem_alloc(sizeof(xfs_ext_irec_t), KM_NOFS); |
4254 | 4241 | ||
4255 | if (nextents == 0) { | 4242 | if (nextents == 0) { |
4256 | ifp->if_u1.if_extents = kmem_alloc(XFS_IEXT_BUFSZ, KM_NOFS); | 4243 | ifp->if_u1.if_extents = kmem_alloc(XFS_IEXT_BUFSZ, KM_NOFS); |
4257 | } else if (!ifp->if_real_bytes) { | 4244 | } else if (!ifp->if_real_bytes) { |
4258 | xfs_iext_inline_to_direct(ifp, XFS_IEXT_BUFSZ); | 4245 | xfs_iext_inline_to_direct(ifp, XFS_IEXT_BUFSZ); |
4259 | } else if (ifp->if_real_bytes < XFS_IEXT_BUFSZ) { | 4246 | } else if (ifp->if_real_bytes < XFS_IEXT_BUFSZ) { |
4260 | xfs_iext_realloc_direct(ifp, XFS_IEXT_BUFSZ); | 4247 | xfs_iext_realloc_direct(ifp, XFS_IEXT_BUFSZ); |
4261 | } | 4248 | } |
4262 | erp->er_extbuf = ifp->if_u1.if_extents; | 4249 | erp->er_extbuf = ifp->if_u1.if_extents; |
4263 | erp->er_extcount = nextents; | 4250 | erp->er_extcount = nextents; |
4264 | erp->er_extoff = 0; | 4251 | erp->er_extoff = 0; |
4265 | 4252 | ||
4266 | ifp->if_flags |= XFS_IFEXTIREC; | 4253 | ifp->if_flags |= XFS_IFEXTIREC; |
4267 | ifp->if_real_bytes = XFS_IEXT_BUFSZ; | 4254 | ifp->if_real_bytes = XFS_IEXT_BUFSZ; |
4268 | ifp->if_bytes = nextents * sizeof(xfs_bmbt_rec_t); | 4255 | ifp->if_bytes = nextents * sizeof(xfs_bmbt_rec_t); |
4269 | ifp->if_u1.if_ext_irec = erp; | 4256 | ifp->if_u1.if_ext_irec = erp; |
4270 | 4257 | ||
4271 | return; | 4258 | return; |
4272 | } | 4259 | } |
4273 | 4260 | ||
4274 | /* | 4261 | /* |
4275 | * Allocate and initialize a new entry in the indirection array. | 4262 | * Allocate and initialize a new entry in the indirection array. |
4276 | */ | 4263 | */ |
4277 | xfs_ext_irec_t * | 4264 | xfs_ext_irec_t * |
4278 | xfs_iext_irec_new( | 4265 | xfs_iext_irec_new( |
4279 | xfs_ifork_t *ifp, /* inode fork pointer */ | 4266 | xfs_ifork_t *ifp, /* inode fork pointer */ |
4280 | int erp_idx) /* index for new irec */ | 4267 | int erp_idx) /* index for new irec */ |
4281 | { | 4268 | { |
4282 | xfs_ext_irec_t *erp; /* indirection array pointer */ | 4269 | xfs_ext_irec_t *erp; /* indirection array pointer */ |
4283 | int i; /* loop counter */ | 4270 | int i; /* loop counter */ |
4284 | int nlists; /* number of irec's (ex lists) */ | 4271 | int nlists; /* number of irec's (ex lists) */ |
4285 | 4272 | ||
4286 | ASSERT(ifp->if_flags & XFS_IFEXTIREC); | 4273 | ASSERT(ifp->if_flags & XFS_IFEXTIREC); |
4287 | nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; | 4274 | nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; |
4288 | 4275 | ||
4289 | /* Resize indirection array */ | 4276 | /* Resize indirection array */ |
4290 | xfs_iext_realloc_indirect(ifp, ++nlists * | 4277 | xfs_iext_realloc_indirect(ifp, ++nlists * |
4291 | sizeof(xfs_ext_irec_t)); | 4278 | sizeof(xfs_ext_irec_t)); |
4292 | /* | 4279 | /* |
4293 | * Move records down in the array so the | 4280 | * Move records down in the array so the |
4294 | * new page can use erp_idx. | 4281 | * new page can use erp_idx. |
4295 | */ | 4282 | */ |
4296 | erp = ifp->if_u1.if_ext_irec; | 4283 | erp = ifp->if_u1.if_ext_irec; |
4297 | for (i = nlists - 1; i > erp_idx; i--) { | 4284 | for (i = nlists - 1; i > erp_idx; i--) { |
4298 | memmove(&erp[i], &erp[i-1], sizeof(xfs_ext_irec_t)); | 4285 | memmove(&erp[i], &erp[i-1], sizeof(xfs_ext_irec_t)); |
4299 | } | 4286 | } |
4300 | ASSERT(i == erp_idx); | 4287 | ASSERT(i == erp_idx); |
4301 | 4288 | ||
4302 | /* Initialize new extent record */ | 4289 | /* Initialize new extent record */ |
4303 | erp = ifp->if_u1.if_ext_irec; | 4290 | erp = ifp->if_u1.if_ext_irec; |
4304 | erp[erp_idx].er_extbuf = kmem_alloc(XFS_IEXT_BUFSZ, KM_NOFS); | 4291 | erp[erp_idx].er_extbuf = kmem_alloc(XFS_IEXT_BUFSZ, KM_NOFS); |
4305 | ifp->if_real_bytes = nlists * XFS_IEXT_BUFSZ; | 4292 | ifp->if_real_bytes = nlists * XFS_IEXT_BUFSZ; |
4306 | memset(erp[erp_idx].er_extbuf, 0, XFS_IEXT_BUFSZ); | 4293 | memset(erp[erp_idx].er_extbuf, 0, XFS_IEXT_BUFSZ); |
4307 | erp[erp_idx].er_extcount = 0; | 4294 | erp[erp_idx].er_extcount = 0; |
4308 | erp[erp_idx].er_extoff = erp_idx > 0 ? | 4295 | erp[erp_idx].er_extoff = erp_idx > 0 ? |
4309 | erp[erp_idx-1].er_extoff + erp[erp_idx-1].er_extcount : 0; | 4296 | erp[erp_idx-1].er_extoff + erp[erp_idx-1].er_extcount : 0; |
4310 | return (&erp[erp_idx]); | 4297 | return (&erp[erp_idx]); |
4311 | } | 4298 | } |
4312 | 4299 | ||
4313 | /* | 4300 | /* |
4314 | * Remove a record from the indirection array. | 4301 | * Remove a record from the indirection array. |
4315 | */ | 4302 | */ |
4316 | void | 4303 | void |
4317 | xfs_iext_irec_remove( | 4304 | xfs_iext_irec_remove( |
4318 | xfs_ifork_t *ifp, /* inode fork pointer */ | 4305 | xfs_ifork_t *ifp, /* inode fork pointer */ |
4319 | int erp_idx) /* irec index to remove */ | 4306 | int erp_idx) /* irec index to remove */ |
4320 | { | 4307 | { |
4321 | xfs_ext_irec_t *erp; /* indirection array pointer */ | 4308 | xfs_ext_irec_t *erp; /* indirection array pointer */ |
4322 | int i; /* loop counter */ | 4309 | int i; /* loop counter */ |
4323 | int nlists; /* number of irec's (ex lists) */ | 4310 | int nlists; /* number of irec's (ex lists) */ |
4324 | 4311 | ||
4325 | ASSERT(ifp->if_flags & XFS_IFEXTIREC); | 4312 | ASSERT(ifp->if_flags & XFS_IFEXTIREC); |
4326 | nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; | 4313 | nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; |
4327 | erp = &ifp->if_u1.if_ext_irec[erp_idx]; | 4314 | erp = &ifp->if_u1.if_ext_irec[erp_idx]; |
4328 | if (erp->er_extbuf) { | 4315 | if (erp->er_extbuf) { |
4329 | xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, | 4316 | xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, |
4330 | -erp->er_extcount); | 4317 | -erp->er_extcount); |
4331 | kmem_free(erp->er_extbuf); | 4318 | kmem_free(erp->er_extbuf); |
4332 | } | 4319 | } |
4333 | /* Compact extent records */ | 4320 | /* Compact extent records */ |
4334 | erp = ifp->if_u1.if_ext_irec; | 4321 | erp = ifp->if_u1.if_ext_irec; |
4335 | for (i = erp_idx; i < nlists - 1; i++) { | 4322 | for (i = erp_idx; i < nlists - 1; i++) { |
4336 | memmove(&erp[i], &erp[i+1], sizeof(xfs_ext_irec_t)); | 4323 | memmove(&erp[i], &erp[i+1], sizeof(xfs_ext_irec_t)); |
4337 | } | 4324 | } |
4338 | /* | 4325 | /* |
4339 | * Manually free the last extent record from the indirection | 4326 | * Manually free the last extent record from the indirection |
4340 | * array. A call to xfs_iext_realloc_indirect() with a size | 4327 | * array. A call to xfs_iext_realloc_indirect() with a size |
4341 | * of zero would result in a call to xfs_iext_destroy() which | 4328 | * of zero would result in a call to xfs_iext_destroy() which |
4342 | * would in turn call this function again, creating a nasty | 4329 | * would in turn call this function again, creating a nasty |
4343 | * infinite loop. | 4330 | * infinite loop. |
4344 | */ | 4331 | */ |
4345 | if (--nlists) { | 4332 | if (--nlists) { |
4346 | xfs_iext_realloc_indirect(ifp, | 4333 | xfs_iext_realloc_indirect(ifp, |
4347 | nlists * sizeof(xfs_ext_irec_t)); | 4334 | nlists * sizeof(xfs_ext_irec_t)); |
4348 | } else { | 4335 | } else { |
4349 | kmem_free(ifp->if_u1.if_ext_irec); | 4336 | kmem_free(ifp->if_u1.if_ext_irec); |
4350 | } | 4337 | } |
4351 | ifp->if_real_bytes = nlists * XFS_IEXT_BUFSZ; | 4338 | ifp->if_real_bytes = nlists * XFS_IEXT_BUFSZ; |
4352 | } | 4339 | } |
4353 | 4340 | ||
4354 | /* | 4341 | /* |
4355 | * This is called to clean up large amounts of unused memory allocated | 4342 | * This is called to clean up large amounts of unused memory allocated |
4356 | * by the indirection array. Before compacting anything though, verify | 4343 | * by the indirection array. Before compacting anything though, verify |
4357 | * that the indirection array is still needed and switch back to the | 4344 | * that the indirection array is still needed and switch back to the |
4358 | * linear extent list (or even the inline buffer) if possible. The | 4345 | * linear extent list (or even the inline buffer) if possible. The |
4359 | * compaction policy is as follows: | 4346 | * compaction policy is as follows: |
4360 | * | 4347 | * |
4361 | * Full Compaction: Extents fit into a single page (or inline buffer) | 4348 | * Full Compaction: Extents fit into a single page (or inline buffer) |
4362 | * Partial Compaction: Extents occupy less than 50% of allocated space | 4349 | * Partial Compaction: Extents occupy less than 50% of allocated space |
4363 | * No Compaction: Extents occupy at least 50% of allocated space | 4350 | * No Compaction: Extents occupy at least 50% of allocated space |
4364 | */ | 4351 | */ |
4365 | void | 4352 | void |
4366 | xfs_iext_irec_compact( | 4353 | xfs_iext_irec_compact( |
4367 | xfs_ifork_t *ifp) /* inode fork pointer */ | 4354 | xfs_ifork_t *ifp) /* inode fork pointer */ |
4368 | { | 4355 | { |
4369 | xfs_extnum_t nextents; /* number of extents in file */ | 4356 | xfs_extnum_t nextents; /* number of extents in file */ |
4370 | int nlists; /* number of irec's (ex lists) */ | 4357 | int nlists; /* number of irec's (ex lists) */ |
4371 | 4358 | ||
4372 | ASSERT(ifp->if_flags & XFS_IFEXTIREC); | 4359 | ASSERT(ifp->if_flags & XFS_IFEXTIREC); |
4373 | nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; | 4360 | nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; |
4374 | nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); | 4361 | nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); |
4375 | 4362 | ||
4376 | if (nextents == 0) { | 4363 | if (nextents == 0) { |
4377 | xfs_iext_destroy(ifp); | 4364 | xfs_iext_destroy(ifp); |
4378 | } else if (nextents <= XFS_INLINE_EXTS) { | 4365 | } else if (nextents <= XFS_INLINE_EXTS) { |
4379 | xfs_iext_indirect_to_direct(ifp); | 4366 | xfs_iext_indirect_to_direct(ifp); |
4380 | xfs_iext_direct_to_inline(ifp, nextents); | 4367 | xfs_iext_direct_to_inline(ifp, nextents); |
4381 | } else if (nextents <= XFS_LINEAR_EXTS) { | 4368 | } else if (nextents <= XFS_LINEAR_EXTS) { |
4382 | xfs_iext_indirect_to_direct(ifp); | 4369 | xfs_iext_indirect_to_direct(ifp); |
4383 | } else if (nextents < (nlists * XFS_LINEAR_EXTS) >> 1) { | 4370 | } else if (nextents < (nlists * XFS_LINEAR_EXTS) >> 1) { |
4384 | xfs_iext_irec_compact_pages(ifp); | 4371 | xfs_iext_irec_compact_pages(ifp); |
4385 | } | 4372 | } |
4386 | } | 4373 | } |
4387 | 4374 | ||
4388 | /* | 4375 | /* |
4389 | * Combine extents from neighboring extent pages. | 4376 | * Combine extents from neighboring extent pages. |
4390 | */ | 4377 | */ |
4391 | void | 4378 | void |
4392 | xfs_iext_irec_compact_pages( | 4379 | xfs_iext_irec_compact_pages( |
4393 | xfs_ifork_t *ifp) /* inode fork pointer */ | 4380 | xfs_ifork_t *ifp) /* inode fork pointer */ |
4394 | { | 4381 | { |
4395 | xfs_ext_irec_t *erp, *erp_next;/* pointers to irec entries */ | 4382 | xfs_ext_irec_t *erp, *erp_next;/* pointers to irec entries */ |
4396 | int erp_idx = 0; /* indirection array index */ | 4383 | int erp_idx = 0; /* indirection array index */ |
4397 | int nlists; /* number of irec's (ex lists) */ | 4384 | int nlists; /* number of irec's (ex lists) */ |
4398 | 4385 | ||
4399 | ASSERT(ifp->if_flags & XFS_IFEXTIREC); | 4386 | ASSERT(ifp->if_flags & XFS_IFEXTIREC); |
4400 | nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; | 4387 | nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; |
4401 | while (erp_idx < nlists - 1) { | 4388 | while (erp_idx < nlists - 1) { |
4402 | erp = &ifp->if_u1.if_ext_irec[erp_idx]; | 4389 | erp = &ifp->if_u1.if_ext_irec[erp_idx]; |
4403 | erp_next = erp + 1; | 4390 | erp_next = erp + 1; |
4404 | if (erp_next->er_extcount <= | 4391 | if (erp_next->er_extcount <= |
4405 | (XFS_LINEAR_EXTS - erp->er_extcount)) { | 4392 | (XFS_LINEAR_EXTS - erp->er_extcount)) { |
4406 | memcpy(&erp->er_extbuf[erp->er_extcount], | 4393 | memcpy(&erp->er_extbuf[erp->er_extcount], |
4407 | erp_next->er_extbuf, erp_next->er_extcount * | 4394 | erp_next->er_extbuf, erp_next->er_extcount * |
4408 | sizeof(xfs_bmbt_rec_t)); | 4395 | sizeof(xfs_bmbt_rec_t)); |
4409 | erp->er_extcount += erp_next->er_extcount; | 4396 | erp->er_extcount += erp_next->er_extcount; |
4410 | /* | 4397 | /* |
4411 | * Free page before removing extent record | 4398 | * Free page before removing extent record |
4412 | * so er_extoffs don't get modified in | 4399 | * so er_extoffs don't get modified in |
4413 | * xfs_iext_irec_remove. | 4400 | * xfs_iext_irec_remove. |
4414 | */ | 4401 | */ |
4415 | kmem_free(erp_next->er_extbuf); | 4402 | kmem_free(erp_next->er_extbuf); |
4416 | erp_next->er_extbuf = NULL; | 4403 | erp_next->er_extbuf = NULL; |
4417 | xfs_iext_irec_remove(ifp, erp_idx + 1); | 4404 | xfs_iext_irec_remove(ifp, erp_idx + 1); |
4418 | nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; | 4405 | nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; |
4419 | } else { | 4406 | } else { |
4420 | erp_idx++; | 4407 | erp_idx++; |
4421 | } | 4408 | } |
4422 | } | 4409 | } |
4423 | } | 4410 | } |
4424 | 4411 | ||
4425 | /* | 4412 | /* |
4426 | * This is called to update the er_extoff field in the indirection | 4413 | * This is called to update the er_extoff field in the indirection |
4427 | * array when extents have been added or removed from one of the | 4414 | * array when extents have been added or removed from one of the |
4428 | * extent lists. erp_idx contains the irec index to begin updating | 4415 | * extent lists. erp_idx contains the irec index to begin updating |
4429 | * at and ext_diff contains the number of extents that were added | 4416 | * at and ext_diff contains the number of extents that were added |
4430 | * or removed. | 4417 | * or removed. |
4431 | */ | 4418 | */ |
4432 | void | 4419 | void |
4433 | xfs_iext_irec_update_extoffs( | 4420 | xfs_iext_irec_update_extoffs( |
4434 | xfs_ifork_t *ifp, /* inode fork pointer */ | 4421 | xfs_ifork_t *ifp, /* inode fork pointer */ |
4435 | int erp_idx, /* irec index to update */ | 4422 | int erp_idx, /* irec index to update */ |
4436 | int ext_diff) /* number of new extents */ | 4423 | int ext_diff) /* number of new extents */ |
4437 | { | 4424 | { |
4438 | int i; /* loop counter */ | 4425 | int i; /* loop counter */ |
4439 | int nlists; /* number of irec's (ex lists */ | 4426 | int nlists; /* number of irec's (ex lists */ |
4440 | 4427 | ||
4441 | ASSERT(ifp->if_flags & XFS_IFEXTIREC); | 4428 | ASSERT(ifp->if_flags & XFS_IFEXTIREC); |
4442 | nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; | 4429 | nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; |
4443 | for (i = erp_idx; i < nlists; i++) { | 4430 | for (i = erp_idx; i < nlists; i++) { |
4444 | ifp->if_u1.if_ext_irec[i].er_extoff += ext_diff; | 4431 | ifp->if_u1.if_ext_irec[i].er_extoff += ext_diff; |
4445 | } | 4432 | } |
4446 | } | 4433 | } |
fs/xfs/xfs_inode.h
1 | /* | 1 | /* |
2 | * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc. | 2 | * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc. |
3 | * All Rights Reserved. | 3 | * All Rights Reserved. |
4 | * | 4 | * |
5 | * This program is free software; you can redistribute it and/or | 5 | * This program is free software; you can redistribute it and/or |
6 | * modify it under the terms of the GNU General Public License as | 6 | * modify it under the terms of the GNU General Public License as |
7 | * published by the Free Software Foundation. | 7 | * published by the Free Software Foundation. |
8 | * | 8 | * |
9 | * This program is distributed in the hope that it would be useful, | 9 | * This program is distributed in the hope that it would be useful, |
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
12 | * GNU General Public License for more details. | 12 | * GNU General Public License for more details. |
13 | * | 13 | * |
14 | * You should have received a copy of the GNU General Public License | 14 | * You should have received a copy of the GNU General Public License |
15 | * along with this program; if not, write the Free Software Foundation, | 15 | * along with this program; if not, write the Free Software Foundation, |
16 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | 16 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA |
17 | */ | 17 | */ |
18 | #ifndef __XFS_INODE_H__ | 18 | #ifndef __XFS_INODE_H__ |
19 | #define __XFS_INODE_H__ | 19 | #define __XFS_INODE_H__ |
20 | 20 | ||
21 | struct xfs_dinode; | 21 | struct xfs_dinode; |
22 | struct xfs_inode; | 22 | struct xfs_inode; |
23 | 23 | ||
24 | /* | 24 | /* |
25 | * Fork identifiers. | 25 | * Fork identifiers. |
26 | */ | 26 | */ |
27 | #define XFS_DATA_FORK 0 | 27 | #define XFS_DATA_FORK 0 |
28 | #define XFS_ATTR_FORK 1 | 28 | #define XFS_ATTR_FORK 1 |
29 | 29 | ||
30 | /* | 30 | /* |
31 | * The following xfs_ext_irec_t struct introduces a second (top) level | 31 | * The following xfs_ext_irec_t struct introduces a second (top) level |
32 | * to the in-core extent allocation scheme. These structs are allocated | 32 | * to the in-core extent allocation scheme. These structs are allocated |
33 | * in a contiguous block, creating an indirection array where each entry | 33 | * in a contiguous block, creating an indirection array where each entry |
34 | * (irec) contains a pointer to a buffer of in-core extent records which | 34 | * (irec) contains a pointer to a buffer of in-core extent records which |
35 | * it manages. Each extent buffer is 4k in size, since 4k is the system | 35 | * it manages. Each extent buffer is 4k in size, since 4k is the system |
36 | * page size on Linux i386 and systems with larger page sizes don't seem | 36 | * page size on Linux i386 and systems with larger page sizes don't seem |
37 | * to gain much, if anything, by using their native page size as the | 37 | * to gain much, if anything, by using their native page size as the |
38 | * extent buffer size. Also, using 4k extent buffers everywhere provides | 38 | * extent buffer size. Also, using 4k extent buffers everywhere provides |
39 | * a consistent interface for CXFS across different platforms. | 39 | * a consistent interface for CXFS across different platforms. |
40 | * | 40 | * |
41 | * There is currently no limit on the number of irec's (extent lists) | 41 | * There is currently no limit on the number of irec's (extent lists) |
42 | * allowed, so heavily fragmented files may require an indirection array | 42 | * allowed, so heavily fragmented files may require an indirection array |
43 | * which spans multiple system pages of memory. The number of extents | 43 | * which spans multiple system pages of memory. The number of extents |
44 | * which would require this amount of contiguous memory is very large | 44 | * which would require this amount of contiguous memory is very large |
45 | * and should not cause problems in the foreseeable future. However, | 45 | * and should not cause problems in the foreseeable future. However, |
46 | * if the memory needed for the contiguous array ever becomes a problem, | 46 | * if the memory needed for the contiguous array ever becomes a problem, |
47 | * it is possible that a third level of indirection may be required. | 47 | * it is possible that a third level of indirection may be required. |
48 | */ | 48 | */ |
49 | typedef struct xfs_ext_irec { | 49 | typedef struct xfs_ext_irec { |
50 | xfs_bmbt_rec_host_t *er_extbuf; /* block of extent records */ | 50 | xfs_bmbt_rec_host_t *er_extbuf; /* block of extent records */ |
51 | xfs_extnum_t er_extoff; /* extent offset in file */ | 51 | xfs_extnum_t er_extoff; /* extent offset in file */ |
52 | xfs_extnum_t er_extcount; /* number of extents in page/block */ | 52 | xfs_extnum_t er_extcount; /* number of extents in page/block */ |
53 | } xfs_ext_irec_t; | 53 | } xfs_ext_irec_t; |
54 | 54 | ||
55 | /* | 55 | /* |
56 | * File incore extent information, present for each of data & attr forks. | 56 | * File incore extent information, present for each of data & attr forks. |
57 | */ | 57 | */ |
58 | #define XFS_IEXT_BUFSZ 4096 | 58 | #define XFS_IEXT_BUFSZ 4096 |
59 | #define XFS_LINEAR_EXTS (XFS_IEXT_BUFSZ / (uint)sizeof(xfs_bmbt_rec_t)) | 59 | #define XFS_LINEAR_EXTS (XFS_IEXT_BUFSZ / (uint)sizeof(xfs_bmbt_rec_t)) |
60 | #define XFS_INLINE_EXTS 2 | 60 | #define XFS_INLINE_EXTS 2 |
61 | #define XFS_INLINE_DATA 32 | 61 | #define XFS_INLINE_DATA 32 |
62 | typedef struct xfs_ifork { | 62 | typedef struct xfs_ifork { |
63 | int if_bytes; /* bytes in if_u1 */ | 63 | int if_bytes; /* bytes in if_u1 */ |
64 | int if_real_bytes; /* bytes allocated in if_u1 */ | 64 | int if_real_bytes; /* bytes allocated in if_u1 */ |
65 | struct xfs_btree_block *if_broot; /* file's incore btree root */ | 65 | struct xfs_btree_block *if_broot; /* file's incore btree root */ |
66 | short if_broot_bytes; /* bytes allocated for root */ | 66 | short if_broot_bytes; /* bytes allocated for root */ |
67 | unsigned char if_flags; /* per-fork flags */ | 67 | unsigned char if_flags; /* per-fork flags */ |
68 | unsigned char if_ext_max; /* max # of extent records */ | 68 | unsigned char if_ext_max; /* max # of extent records */ |
69 | xfs_extnum_t if_lastex; /* last if_extents used */ | 69 | xfs_extnum_t if_lastex; /* last if_extents used */ |
70 | union { | 70 | union { |
71 | xfs_bmbt_rec_host_t *if_extents;/* linear map file exts */ | 71 | xfs_bmbt_rec_host_t *if_extents;/* linear map file exts */ |
72 | xfs_ext_irec_t *if_ext_irec; /* irec map file exts */ | 72 | xfs_ext_irec_t *if_ext_irec; /* irec map file exts */ |
73 | char *if_data; /* inline file data */ | 73 | char *if_data; /* inline file data */ |
74 | } if_u1; | 74 | } if_u1; |
75 | union { | 75 | union { |
76 | xfs_bmbt_rec_host_t if_inline_ext[XFS_INLINE_EXTS]; | 76 | xfs_bmbt_rec_host_t if_inline_ext[XFS_INLINE_EXTS]; |
77 | /* very small file extents */ | 77 | /* very small file extents */ |
78 | char if_inline_data[XFS_INLINE_DATA]; | 78 | char if_inline_data[XFS_INLINE_DATA]; |
79 | /* very small file data */ | 79 | /* very small file data */ |
80 | xfs_dev_t if_rdev; /* dev number if special */ | 80 | xfs_dev_t if_rdev; /* dev number if special */ |
81 | uuid_t if_uuid; /* mount point value */ | 81 | uuid_t if_uuid; /* mount point value */ |
82 | } if_u2; | 82 | } if_u2; |
83 | } xfs_ifork_t; | 83 | } xfs_ifork_t; |
84 | 84 | ||
85 | /* | 85 | /* |
86 | * Inode location information. Stored in the inode and passed to | ||
87 | * xfs_imap_to_bp() to get a buffer and dinode for a given inode. | ||
88 | */ | ||
89 | struct xfs_imap { | ||
90 | xfs_daddr_t im_blkno; /* starting BB of inode chunk */ | ||
91 | ushort im_len; /* length in BBs of inode chunk */ | ||
92 | ushort im_boffset; /* inode offset in block in bytes */ | ||
93 | }; | ||
94 | |||
95 | /* | ||
86 | * This is the xfs in-core inode structure. | 96 | * This is the xfs in-core inode structure. |
87 | * Most of the on-disk inode is embedded in the i_d field. | 97 | * Most of the on-disk inode is embedded in the i_d field. |
88 | * | 98 | * |
89 | * The extent pointers/inline file space, however, are managed | 99 | * The extent pointers/inline file space, however, are managed |
90 | * separately. The memory for this information is pointed to by | 100 | * separately. The memory for this information is pointed to by |
91 | * the if_u1 unions depending on the type of the data. | 101 | * the if_u1 unions depending on the type of the data. |
92 | * This is used to linearize the array of extents for fast in-core | 102 | * This is used to linearize the array of extents for fast in-core |
93 | * access. This is used until the file's number of extents | 103 | * access. This is used until the file's number of extents |
94 | * surpasses XFS_MAX_INCORE_EXTENTS, at which point all extent pointers | 104 | * surpasses XFS_MAX_INCORE_EXTENTS, at which point all extent pointers |
95 | * are accessed through the buffer cache. | 105 | * are accessed through the buffer cache. |
96 | * | 106 | * |
97 | * Other state kept in the in-core inode is used for identification, | 107 | * Other state kept in the in-core inode is used for identification, |
98 | * locking, transactional updating, etc of the inode. | 108 | * locking, transactional updating, etc of the inode. |
99 | * | 109 | * |
100 | * Generally, we do not want to hold the i_rlock while holding the | 110 | * Generally, we do not want to hold the i_rlock while holding the |
101 | * i_ilock. Hierarchy is i_iolock followed by i_rlock. | 111 | * i_ilock. Hierarchy is i_iolock followed by i_rlock. |
102 | * | 112 | * |
103 | * xfs_iptr_t contains all the inode fields upto and including the | 113 | * xfs_iptr_t contains all the inode fields upto and including the |
104 | * i_mnext and i_mprev fields, it is used as a marker in the inode | 114 | * i_mnext and i_mprev fields, it is used as a marker in the inode |
105 | * chain off the mount structure by xfs_sync calls. | 115 | * chain off the mount structure by xfs_sync calls. |
106 | */ | 116 | */ |
107 | 117 | ||
108 | typedef struct xfs_ictimestamp { | 118 | typedef struct xfs_ictimestamp { |
109 | __int32_t t_sec; /* timestamp seconds */ | 119 | __int32_t t_sec; /* timestamp seconds */ |
110 | __int32_t t_nsec; /* timestamp nanoseconds */ | 120 | __int32_t t_nsec; /* timestamp nanoseconds */ |
111 | } xfs_ictimestamp_t; | 121 | } xfs_ictimestamp_t; |
112 | 122 | ||
113 | /* | 123 | /* |
114 | * NOTE: This structure must be kept identical to struct xfs_dinode | 124 | * NOTE: This structure must be kept identical to struct xfs_dinode |
115 | * in xfs_dinode.h except for the endianess annotations. | 125 | * in xfs_dinode.h except for the endianess annotations. |
116 | */ | 126 | */ |
117 | typedef struct xfs_icdinode { | 127 | typedef struct xfs_icdinode { |
118 | __uint16_t di_magic; /* inode magic # = XFS_DINODE_MAGIC */ | 128 | __uint16_t di_magic; /* inode magic # = XFS_DINODE_MAGIC */ |
119 | __uint16_t di_mode; /* mode and type of file */ | 129 | __uint16_t di_mode; /* mode and type of file */ |
120 | __int8_t di_version; /* inode version */ | 130 | __int8_t di_version; /* inode version */ |
121 | __int8_t di_format; /* format of di_c data */ | 131 | __int8_t di_format; /* format of di_c data */ |
122 | __uint16_t di_onlink; /* old number of links to file */ | 132 | __uint16_t di_onlink; /* old number of links to file */ |
123 | __uint32_t di_uid; /* owner's user id */ | 133 | __uint32_t di_uid; /* owner's user id */ |
124 | __uint32_t di_gid; /* owner's group id */ | 134 | __uint32_t di_gid; /* owner's group id */ |
125 | __uint32_t di_nlink; /* number of links to file */ | 135 | __uint32_t di_nlink; /* number of links to file */ |
126 | __uint16_t di_projid; /* owner's project id */ | 136 | __uint16_t di_projid; /* owner's project id */ |
127 | __uint8_t di_pad[8]; /* unused, zeroed space */ | 137 | __uint8_t di_pad[8]; /* unused, zeroed space */ |
128 | __uint16_t di_flushiter; /* incremented on flush */ | 138 | __uint16_t di_flushiter; /* incremented on flush */ |
129 | xfs_ictimestamp_t di_atime; /* time last accessed */ | 139 | xfs_ictimestamp_t di_atime; /* time last accessed */ |
130 | xfs_ictimestamp_t di_mtime; /* time last modified */ | 140 | xfs_ictimestamp_t di_mtime; /* time last modified */ |
131 | xfs_ictimestamp_t di_ctime; /* time created/inode modified */ | 141 | xfs_ictimestamp_t di_ctime; /* time created/inode modified */ |
132 | xfs_fsize_t di_size; /* number of bytes in file */ | 142 | xfs_fsize_t di_size; /* number of bytes in file */ |
133 | xfs_drfsbno_t di_nblocks; /* # of direct & btree blocks used */ | 143 | xfs_drfsbno_t di_nblocks; /* # of direct & btree blocks used */ |
134 | xfs_extlen_t di_extsize; /* basic/minimum extent size for file */ | 144 | xfs_extlen_t di_extsize; /* basic/minimum extent size for file */ |
135 | xfs_extnum_t di_nextents; /* number of extents in data fork */ | 145 | xfs_extnum_t di_nextents; /* number of extents in data fork */ |
136 | xfs_aextnum_t di_anextents; /* number of extents in attribute fork*/ | 146 | xfs_aextnum_t di_anextents; /* number of extents in attribute fork*/ |
137 | __uint8_t di_forkoff; /* attr fork offs, <<3 for 64b align */ | 147 | __uint8_t di_forkoff; /* attr fork offs, <<3 for 64b align */ |
138 | __int8_t di_aformat; /* format of attr fork's data */ | 148 | __int8_t di_aformat; /* format of attr fork's data */ |
139 | __uint32_t di_dmevmask; /* DMIG event mask */ | 149 | __uint32_t di_dmevmask; /* DMIG event mask */ |
140 | __uint16_t di_dmstate; /* DMIG state info */ | 150 | __uint16_t di_dmstate; /* DMIG state info */ |
141 | __uint16_t di_flags; /* random flags, XFS_DIFLAG_... */ | 151 | __uint16_t di_flags; /* random flags, XFS_DIFLAG_... */ |
142 | __uint32_t di_gen; /* generation number */ | 152 | __uint32_t di_gen; /* generation number */ |
143 | } xfs_icdinode_t; | 153 | } xfs_icdinode_t; |
144 | 154 | ||
145 | /* | 155 | /* |
146 | * Flags for xfs_ichgtime(). | 156 | * Flags for xfs_ichgtime(). |
147 | */ | 157 | */ |
148 | #define XFS_ICHGTIME_MOD 0x1 /* data fork modification timestamp */ | 158 | #define XFS_ICHGTIME_MOD 0x1 /* data fork modification timestamp */ |
149 | #define XFS_ICHGTIME_CHG 0x2 /* inode field change timestamp */ | 159 | #define XFS_ICHGTIME_CHG 0x2 /* inode field change timestamp */ |
150 | 160 | ||
151 | /* | 161 | /* |
152 | * Per-fork incore inode flags. | 162 | * Per-fork incore inode flags. |
153 | */ | 163 | */ |
154 | #define XFS_IFINLINE 0x01 /* Inline data is read in */ | 164 | #define XFS_IFINLINE 0x01 /* Inline data is read in */ |
155 | #define XFS_IFEXTENTS 0x02 /* All extent pointers are read in */ | 165 | #define XFS_IFEXTENTS 0x02 /* All extent pointers are read in */ |
156 | #define XFS_IFBROOT 0x04 /* i_broot points to the bmap b-tree root */ | 166 | #define XFS_IFBROOT 0x04 /* i_broot points to the bmap b-tree root */ |
157 | #define XFS_IFEXTIREC 0x08 /* Indirection array of extent blocks */ | 167 | #define XFS_IFEXTIREC 0x08 /* Indirection array of extent blocks */ |
158 | 168 | ||
159 | /* | 169 | /* |
160 | * Flags for xfs_inotobp and xfs_imap(). | 170 | * Flags for xfs_inotobp and xfs_imap(). |
161 | */ | 171 | */ |
162 | #define XFS_IMAP_BULKSTAT 0x1 | 172 | #define XFS_IMAP_BULKSTAT 0x1 |
163 | 173 | ||
164 | /* | 174 | /* |
165 | * Fork handling. | 175 | * Fork handling. |
166 | */ | 176 | */ |
167 | 177 | ||
168 | #define XFS_IFORK_Q(ip) ((ip)->i_d.di_forkoff != 0) | 178 | #define XFS_IFORK_Q(ip) ((ip)->i_d.di_forkoff != 0) |
169 | #define XFS_IFORK_BOFF(ip) ((int)((ip)->i_d.di_forkoff << 3)) | 179 | #define XFS_IFORK_BOFF(ip) ((int)((ip)->i_d.di_forkoff << 3)) |
170 | 180 | ||
171 | #define XFS_IFORK_PTR(ip,w) \ | 181 | #define XFS_IFORK_PTR(ip,w) \ |
172 | ((w) == XFS_DATA_FORK ? \ | 182 | ((w) == XFS_DATA_FORK ? \ |
173 | &(ip)->i_df : \ | 183 | &(ip)->i_df : \ |
174 | (ip)->i_afp) | 184 | (ip)->i_afp) |
175 | #define XFS_IFORK_DSIZE(ip) \ | 185 | #define XFS_IFORK_DSIZE(ip) \ |
176 | (XFS_IFORK_Q(ip) ? \ | 186 | (XFS_IFORK_Q(ip) ? \ |
177 | XFS_IFORK_BOFF(ip) : \ | 187 | XFS_IFORK_BOFF(ip) : \ |
178 | XFS_LITINO((ip)->i_mount)) | 188 | XFS_LITINO((ip)->i_mount)) |
179 | #define XFS_IFORK_ASIZE(ip) \ | 189 | #define XFS_IFORK_ASIZE(ip) \ |
180 | (XFS_IFORK_Q(ip) ? \ | 190 | (XFS_IFORK_Q(ip) ? \ |
181 | XFS_LITINO((ip)->i_mount) - XFS_IFORK_BOFF(ip) : \ | 191 | XFS_LITINO((ip)->i_mount) - XFS_IFORK_BOFF(ip) : \ |
182 | 0) | 192 | 0) |
183 | #define XFS_IFORK_SIZE(ip,w) \ | 193 | #define XFS_IFORK_SIZE(ip,w) \ |
184 | ((w) == XFS_DATA_FORK ? \ | 194 | ((w) == XFS_DATA_FORK ? \ |
185 | XFS_IFORK_DSIZE(ip) : \ | 195 | XFS_IFORK_DSIZE(ip) : \ |
186 | XFS_IFORK_ASIZE(ip)) | 196 | XFS_IFORK_ASIZE(ip)) |
187 | #define XFS_IFORK_FORMAT(ip,w) \ | 197 | #define XFS_IFORK_FORMAT(ip,w) \ |
188 | ((w) == XFS_DATA_FORK ? \ | 198 | ((w) == XFS_DATA_FORK ? \ |
189 | (ip)->i_d.di_format : \ | 199 | (ip)->i_d.di_format : \ |
190 | (ip)->i_d.di_aformat) | 200 | (ip)->i_d.di_aformat) |
191 | #define XFS_IFORK_FMT_SET(ip,w,n) \ | 201 | #define XFS_IFORK_FMT_SET(ip,w,n) \ |
192 | ((w) == XFS_DATA_FORK ? \ | 202 | ((w) == XFS_DATA_FORK ? \ |
193 | ((ip)->i_d.di_format = (n)) : \ | 203 | ((ip)->i_d.di_format = (n)) : \ |
194 | ((ip)->i_d.di_aformat = (n))) | 204 | ((ip)->i_d.di_aformat = (n))) |
195 | #define XFS_IFORK_NEXTENTS(ip,w) \ | 205 | #define XFS_IFORK_NEXTENTS(ip,w) \ |
196 | ((w) == XFS_DATA_FORK ? \ | 206 | ((w) == XFS_DATA_FORK ? \ |
197 | (ip)->i_d.di_nextents : \ | 207 | (ip)->i_d.di_nextents : \ |
198 | (ip)->i_d.di_anextents) | 208 | (ip)->i_d.di_anextents) |
199 | #define XFS_IFORK_NEXT_SET(ip,w,n) \ | 209 | #define XFS_IFORK_NEXT_SET(ip,w,n) \ |
200 | ((w) == XFS_DATA_FORK ? \ | 210 | ((w) == XFS_DATA_FORK ? \ |
201 | ((ip)->i_d.di_nextents = (n)) : \ | 211 | ((ip)->i_d.di_nextents = (n)) : \ |
202 | ((ip)->i_d.di_anextents = (n))) | 212 | ((ip)->i_d.di_anextents = (n))) |
203 | 213 | ||
204 | 214 | ||
205 | 215 | ||
206 | #ifdef __KERNEL__ | 216 | #ifdef __KERNEL__ |
207 | 217 | ||
208 | struct bhv_desc; | 218 | struct bhv_desc; |
209 | struct cred; | 219 | struct cred; |
210 | struct ktrace; | 220 | struct ktrace; |
211 | struct xfs_buf; | 221 | struct xfs_buf; |
212 | struct xfs_bmap_free; | 222 | struct xfs_bmap_free; |
213 | struct xfs_bmbt_irec; | 223 | struct xfs_bmbt_irec; |
214 | struct xfs_inode_log_item; | 224 | struct xfs_inode_log_item; |
215 | struct xfs_mount; | 225 | struct xfs_mount; |
216 | struct xfs_trans; | 226 | struct xfs_trans; |
217 | struct xfs_dquot; | 227 | struct xfs_dquot; |
218 | 228 | ||
219 | #if defined(XFS_ILOCK_TRACE) | 229 | #if defined(XFS_ILOCK_TRACE) |
220 | #define XFS_ILOCK_KTRACE_SIZE 32 | 230 | #define XFS_ILOCK_KTRACE_SIZE 32 |
221 | extern ktrace_t *xfs_ilock_trace_buf; | 231 | extern ktrace_t *xfs_ilock_trace_buf; |
222 | extern void xfs_ilock_trace(struct xfs_inode *, int, unsigned int, inst_t *); | 232 | extern void xfs_ilock_trace(struct xfs_inode *, int, unsigned int, inst_t *); |
223 | #else | 233 | #else |
224 | #define xfs_ilock_trace(i,n,f,ra) | 234 | #define xfs_ilock_trace(i,n,f,ra) |
225 | #endif | 235 | #endif |
226 | 236 | ||
227 | typedef struct dm_attrs_s { | 237 | typedef struct dm_attrs_s { |
228 | __uint32_t da_dmevmask; /* DMIG event mask */ | 238 | __uint32_t da_dmevmask; /* DMIG event mask */ |
229 | __uint16_t da_dmstate; /* DMIG state info */ | 239 | __uint16_t da_dmstate; /* DMIG state info */ |
230 | __uint16_t da_pad; /* DMIG extra padding */ | 240 | __uint16_t da_pad; /* DMIG extra padding */ |
231 | } dm_attrs_t; | 241 | } dm_attrs_t; |
232 | 242 | ||
233 | typedef struct xfs_inode { | 243 | typedef struct xfs_inode { |
234 | /* Inode linking and identification information. */ | 244 | /* Inode linking and identification information. */ |
235 | struct xfs_mount *i_mount; /* fs mount struct ptr */ | 245 | struct xfs_mount *i_mount; /* fs mount struct ptr */ |
236 | struct xfs_dquot *i_udquot; /* user dquot */ | 246 | struct xfs_dquot *i_udquot; /* user dquot */ |
237 | struct xfs_dquot *i_gdquot; /* group dquot */ | 247 | struct xfs_dquot *i_gdquot; /* group dquot */ |
238 | 248 | ||
239 | /* Inode location stuff */ | 249 | /* Inode location stuff */ |
240 | xfs_ino_t i_ino; /* inode number (agno/agino)*/ | 250 | xfs_ino_t i_ino; /* inode number (agno/agino)*/ |
241 | xfs_daddr_t i_blkno; /* blkno of inode buffer */ | 251 | struct xfs_imap i_imap; /* location for xfs_imap() */ |
242 | ushort i_len; /* len of inode buffer */ | ||
243 | ushort i_boffset; /* off of inode in buffer */ | ||
244 | 252 | ||
245 | /* Extent information. */ | 253 | /* Extent information. */ |
246 | xfs_ifork_t *i_afp; /* attribute fork pointer */ | 254 | xfs_ifork_t *i_afp; /* attribute fork pointer */ |
247 | xfs_ifork_t i_df; /* data fork */ | 255 | xfs_ifork_t i_df; /* data fork */ |
248 | 256 | ||
249 | /* Transaction and locking information. */ | 257 | /* Transaction and locking information. */ |
250 | struct xfs_trans *i_transp; /* ptr to owning transaction*/ | 258 | struct xfs_trans *i_transp; /* ptr to owning transaction*/ |
251 | struct xfs_inode_log_item *i_itemp; /* logging information */ | 259 | struct xfs_inode_log_item *i_itemp; /* logging information */ |
252 | mrlock_t i_lock; /* inode lock */ | 260 | mrlock_t i_lock; /* inode lock */ |
253 | mrlock_t i_iolock; /* inode IO lock */ | 261 | mrlock_t i_iolock; /* inode IO lock */ |
254 | struct completion i_flush; /* inode flush completion q */ | 262 | struct completion i_flush; /* inode flush completion q */ |
255 | atomic_t i_pincount; /* inode pin count */ | 263 | atomic_t i_pincount; /* inode pin count */ |
256 | wait_queue_head_t i_ipin_wait; /* inode pinning wait queue */ | 264 | wait_queue_head_t i_ipin_wait; /* inode pinning wait queue */ |
257 | spinlock_t i_flags_lock; /* inode i_flags lock */ | 265 | spinlock_t i_flags_lock; /* inode i_flags lock */ |
258 | /* Miscellaneous state. */ | 266 | /* Miscellaneous state. */ |
259 | unsigned short i_flags; /* see defined flags below */ | 267 | unsigned short i_flags; /* see defined flags below */ |
260 | unsigned char i_update_core; /* timestamps/size is dirty */ | 268 | unsigned char i_update_core; /* timestamps/size is dirty */ |
261 | unsigned char i_update_size; /* di_size field is dirty */ | 269 | unsigned char i_update_size; /* di_size field is dirty */ |
262 | unsigned int i_delayed_blks; /* count of delay alloc blks */ | 270 | unsigned int i_delayed_blks; /* count of delay alloc blks */ |
263 | 271 | ||
264 | xfs_icdinode_t i_d; /* most of ondisk inode */ | 272 | xfs_icdinode_t i_d; /* most of ondisk inode */ |
265 | 273 | ||
266 | xfs_fsize_t i_size; /* in-memory size */ | 274 | xfs_fsize_t i_size; /* in-memory size */ |
267 | xfs_fsize_t i_new_size; /* size when write completes */ | 275 | xfs_fsize_t i_new_size; /* size when write completes */ |
268 | atomic_t i_iocount; /* outstanding I/O count */ | 276 | atomic_t i_iocount; /* outstanding I/O count */ |
269 | 277 | ||
270 | /* VFS inode */ | 278 | /* VFS inode */ |
271 | struct inode i_vnode; /* embedded VFS inode */ | 279 | struct inode i_vnode; /* embedded VFS inode */ |
272 | 280 | ||
273 | /* Trace buffers per inode. */ | 281 | /* Trace buffers per inode. */ |
274 | #ifdef XFS_INODE_TRACE | 282 | #ifdef XFS_INODE_TRACE |
275 | struct ktrace *i_trace; /* general inode trace */ | 283 | struct ktrace *i_trace; /* general inode trace */ |
276 | #endif | 284 | #endif |
277 | #ifdef XFS_BMAP_TRACE | 285 | #ifdef XFS_BMAP_TRACE |
278 | struct ktrace *i_xtrace; /* inode extent list trace */ | 286 | struct ktrace *i_xtrace; /* inode extent list trace */ |
279 | #endif | 287 | #endif |
280 | #ifdef XFS_BTREE_TRACE | 288 | #ifdef XFS_BTREE_TRACE |
281 | struct ktrace *i_btrace; /* inode bmap btree trace */ | 289 | struct ktrace *i_btrace; /* inode bmap btree trace */ |
282 | #endif | 290 | #endif |
283 | #ifdef XFS_RW_TRACE | 291 | #ifdef XFS_RW_TRACE |
284 | struct ktrace *i_rwtrace; /* inode read/write trace */ | 292 | struct ktrace *i_rwtrace; /* inode read/write trace */ |
285 | #endif | 293 | #endif |
286 | #ifdef XFS_ILOCK_TRACE | 294 | #ifdef XFS_ILOCK_TRACE |
287 | struct ktrace *i_lock_trace; /* inode lock/unlock trace */ | 295 | struct ktrace *i_lock_trace; /* inode lock/unlock trace */ |
288 | #endif | 296 | #endif |
289 | #ifdef XFS_DIR2_TRACE | 297 | #ifdef XFS_DIR2_TRACE |
290 | struct ktrace *i_dir_trace; /* inode directory trace */ | 298 | struct ktrace *i_dir_trace; /* inode directory trace */ |
291 | #endif | 299 | #endif |
292 | } xfs_inode_t; | 300 | } xfs_inode_t; |
293 | 301 | ||
294 | #define XFS_ISIZE(ip) (((ip)->i_d.di_mode & S_IFMT) == S_IFREG) ? \ | 302 | #define XFS_ISIZE(ip) (((ip)->i_d.di_mode & S_IFMT) == S_IFREG) ? \ |
295 | (ip)->i_size : (ip)->i_d.di_size; | 303 | (ip)->i_size : (ip)->i_d.di_size; |
296 | 304 | ||
297 | /* Convert from vfs inode to xfs inode */ | 305 | /* Convert from vfs inode to xfs inode */ |
298 | static inline struct xfs_inode *XFS_I(struct inode *inode) | 306 | static inline struct xfs_inode *XFS_I(struct inode *inode) |
299 | { | 307 | { |
300 | return container_of(inode, struct xfs_inode, i_vnode); | 308 | return container_of(inode, struct xfs_inode, i_vnode); |
301 | } | 309 | } |
302 | 310 | ||
303 | /* convert from xfs inode to vfs inode */ | 311 | /* convert from xfs inode to vfs inode */ |
304 | static inline struct inode *VFS_I(struct xfs_inode *ip) | 312 | static inline struct inode *VFS_I(struct xfs_inode *ip) |
305 | { | 313 | { |
306 | return &ip->i_vnode; | 314 | return &ip->i_vnode; |
307 | } | 315 | } |
308 | 316 | ||
309 | /* | 317 | /* |
310 | * Get rid of a partially initialized inode. | 318 | * Get rid of a partially initialized inode. |
311 | * | 319 | * |
312 | * We have to go through destroy_inode to make sure allocations | 320 | * We have to go through destroy_inode to make sure allocations |
313 | * from init_inode_always like the security data are undone. | 321 | * from init_inode_always like the security data are undone. |
314 | * | 322 | * |
315 | * We mark the inode bad so that it takes the short cut in | 323 | * We mark the inode bad so that it takes the short cut in |
316 | * the reclaim path instead of going through the flush path | 324 | * the reclaim path instead of going through the flush path |
317 | * which doesn't make sense for an inode that has never seen the | 325 | * which doesn't make sense for an inode that has never seen the |
318 | * light of day. | 326 | * light of day. |
319 | */ | 327 | */ |
320 | static inline void xfs_destroy_inode(struct xfs_inode *ip) | 328 | static inline void xfs_destroy_inode(struct xfs_inode *ip) |
321 | { | 329 | { |
322 | make_bad_inode(VFS_I(ip)); | 330 | make_bad_inode(VFS_I(ip)); |
323 | return destroy_inode(VFS_I(ip)); | 331 | return destroy_inode(VFS_I(ip)); |
324 | } | 332 | } |
325 | 333 | ||
326 | /* | 334 | /* |
327 | * i_flags helper functions | 335 | * i_flags helper functions |
328 | */ | 336 | */ |
329 | static inline void | 337 | static inline void |
330 | __xfs_iflags_set(xfs_inode_t *ip, unsigned short flags) | 338 | __xfs_iflags_set(xfs_inode_t *ip, unsigned short flags) |
331 | { | 339 | { |
332 | ip->i_flags |= flags; | 340 | ip->i_flags |= flags; |
333 | } | 341 | } |
334 | 342 | ||
335 | static inline void | 343 | static inline void |
336 | xfs_iflags_set(xfs_inode_t *ip, unsigned short flags) | 344 | xfs_iflags_set(xfs_inode_t *ip, unsigned short flags) |
337 | { | 345 | { |
338 | spin_lock(&ip->i_flags_lock); | 346 | spin_lock(&ip->i_flags_lock); |
339 | __xfs_iflags_set(ip, flags); | 347 | __xfs_iflags_set(ip, flags); |
340 | spin_unlock(&ip->i_flags_lock); | 348 | spin_unlock(&ip->i_flags_lock); |
341 | } | 349 | } |
342 | 350 | ||
343 | static inline void | 351 | static inline void |
344 | xfs_iflags_clear(xfs_inode_t *ip, unsigned short flags) | 352 | xfs_iflags_clear(xfs_inode_t *ip, unsigned short flags) |
345 | { | 353 | { |
346 | spin_lock(&ip->i_flags_lock); | 354 | spin_lock(&ip->i_flags_lock); |
347 | ip->i_flags &= ~flags; | 355 | ip->i_flags &= ~flags; |
348 | spin_unlock(&ip->i_flags_lock); | 356 | spin_unlock(&ip->i_flags_lock); |
349 | } | 357 | } |
350 | 358 | ||
351 | static inline int | 359 | static inline int |
352 | __xfs_iflags_test(xfs_inode_t *ip, unsigned short flags) | 360 | __xfs_iflags_test(xfs_inode_t *ip, unsigned short flags) |
353 | { | 361 | { |
354 | return (ip->i_flags & flags); | 362 | return (ip->i_flags & flags); |
355 | } | 363 | } |
356 | 364 | ||
357 | static inline int | 365 | static inline int |
358 | xfs_iflags_test(xfs_inode_t *ip, unsigned short flags) | 366 | xfs_iflags_test(xfs_inode_t *ip, unsigned short flags) |
359 | { | 367 | { |
360 | int ret; | 368 | int ret; |
361 | spin_lock(&ip->i_flags_lock); | 369 | spin_lock(&ip->i_flags_lock); |
362 | ret = __xfs_iflags_test(ip, flags); | 370 | ret = __xfs_iflags_test(ip, flags); |
363 | spin_unlock(&ip->i_flags_lock); | 371 | spin_unlock(&ip->i_flags_lock); |
364 | return ret; | 372 | return ret; |
365 | } | 373 | } |
366 | 374 | ||
367 | static inline int | 375 | static inline int |
368 | xfs_iflags_test_and_clear(xfs_inode_t *ip, unsigned short flags) | 376 | xfs_iflags_test_and_clear(xfs_inode_t *ip, unsigned short flags) |
369 | { | 377 | { |
370 | int ret; | 378 | int ret; |
371 | 379 | ||
372 | spin_lock(&ip->i_flags_lock); | 380 | spin_lock(&ip->i_flags_lock); |
373 | ret = ip->i_flags & flags; | 381 | ret = ip->i_flags & flags; |
374 | if (ret) | 382 | if (ret) |
375 | ip->i_flags &= ~flags; | 383 | ip->i_flags &= ~flags; |
376 | spin_unlock(&ip->i_flags_lock); | 384 | spin_unlock(&ip->i_flags_lock); |
377 | return ret; | 385 | return ret; |
378 | } | 386 | } |
379 | 387 | ||
380 | /* | 388 | /* |
381 | * Manage the i_flush queue embedded in the inode. This completion | 389 | * Manage the i_flush queue embedded in the inode. This completion |
382 | * queue synchronizes processes attempting to flush the in-core | 390 | * queue synchronizes processes attempting to flush the in-core |
383 | * inode back to disk. | 391 | * inode back to disk. |
384 | */ | 392 | */ |
385 | static inline void xfs_iflock(xfs_inode_t *ip) | 393 | static inline void xfs_iflock(xfs_inode_t *ip) |
386 | { | 394 | { |
387 | wait_for_completion(&ip->i_flush); | 395 | wait_for_completion(&ip->i_flush); |
388 | } | 396 | } |
389 | 397 | ||
390 | static inline int xfs_iflock_nowait(xfs_inode_t *ip) | 398 | static inline int xfs_iflock_nowait(xfs_inode_t *ip) |
391 | { | 399 | { |
392 | return try_wait_for_completion(&ip->i_flush); | 400 | return try_wait_for_completion(&ip->i_flush); |
393 | } | 401 | } |
394 | 402 | ||
395 | static inline void xfs_ifunlock(xfs_inode_t *ip) | 403 | static inline void xfs_ifunlock(xfs_inode_t *ip) |
396 | { | 404 | { |
397 | complete(&ip->i_flush); | 405 | complete(&ip->i_flush); |
398 | } | 406 | } |
399 | 407 | ||
400 | /* | 408 | /* |
401 | * In-core inode flags. | 409 | * In-core inode flags. |
402 | */ | 410 | */ |
403 | #define XFS_IGRIO 0x0001 /* inode used for guaranteed rate i/o */ | 411 | #define XFS_IGRIO 0x0001 /* inode used for guaranteed rate i/o */ |
404 | #define XFS_IUIOSZ 0x0002 /* inode i/o sizes have been explicitly set */ | 412 | #define XFS_IUIOSZ 0x0002 /* inode i/o sizes have been explicitly set */ |
405 | #define XFS_IQUIESCE 0x0004 /* we have started quiescing for this inode */ | 413 | #define XFS_IQUIESCE 0x0004 /* we have started quiescing for this inode */ |
406 | #define XFS_IRECLAIM 0x0008 /* we have started reclaiming this inode */ | 414 | #define XFS_IRECLAIM 0x0008 /* we have started reclaiming this inode */ |
407 | #define XFS_ISTALE 0x0010 /* inode has been staled */ | 415 | #define XFS_ISTALE 0x0010 /* inode has been staled */ |
408 | #define XFS_IRECLAIMABLE 0x0020 /* inode can be reclaimed */ | 416 | #define XFS_IRECLAIMABLE 0x0020 /* inode can be reclaimed */ |
409 | #define XFS_INEW 0x0040 | 417 | #define XFS_INEW 0x0040 |
410 | #define XFS_IFILESTREAM 0x0080 /* inode is in a filestream directory */ | 418 | #define XFS_IFILESTREAM 0x0080 /* inode is in a filestream directory */ |
411 | #define XFS_IMODIFIED 0x0100 /* XFS inode state possibly differs */ | 419 | #define XFS_IMODIFIED 0x0100 /* XFS inode state possibly differs */ |
412 | /* to the Linux inode state. */ | 420 | /* to the Linux inode state. */ |
413 | #define XFS_ITRUNCATED 0x0200 /* truncated down so flush-on-close */ | 421 | #define XFS_ITRUNCATED 0x0200 /* truncated down so flush-on-close */ |
414 | 422 | ||
415 | /* | 423 | /* |
416 | * Flags for inode locking. | 424 | * Flags for inode locking. |
417 | * Bit ranges: 1<<1 - 1<<16-1 -- iolock/ilock modes (bitfield) | 425 | * Bit ranges: 1<<1 - 1<<16-1 -- iolock/ilock modes (bitfield) |
418 | * 1<<16 - 1<<32-1 -- lockdep annotation (integers) | 426 | * 1<<16 - 1<<32-1 -- lockdep annotation (integers) |
419 | */ | 427 | */ |
420 | #define XFS_IOLOCK_EXCL (1<<0) | 428 | #define XFS_IOLOCK_EXCL (1<<0) |
421 | #define XFS_IOLOCK_SHARED (1<<1) | 429 | #define XFS_IOLOCK_SHARED (1<<1) |
422 | #define XFS_ILOCK_EXCL (1<<2) | 430 | #define XFS_ILOCK_EXCL (1<<2) |
423 | #define XFS_ILOCK_SHARED (1<<3) | 431 | #define XFS_ILOCK_SHARED (1<<3) |
424 | #define XFS_IUNLOCK_NONOTIFY (1<<4) | 432 | #define XFS_IUNLOCK_NONOTIFY (1<<4) |
425 | 433 | ||
426 | #define XFS_LOCK_MASK (XFS_IOLOCK_EXCL | XFS_IOLOCK_SHARED \ | 434 | #define XFS_LOCK_MASK (XFS_IOLOCK_EXCL | XFS_IOLOCK_SHARED \ |
427 | | XFS_ILOCK_EXCL | XFS_ILOCK_SHARED) | 435 | | XFS_ILOCK_EXCL | XFS_ILOCK_SHARED) |
428 | 436 | ||
429 | /* | 437 | /* |
430 | * Flags for lockdep annotations. | 438 | * Flags for lockdep annotations. |
431 | * | 439 | * |
432 | * XFS_I[O]LOCK_PARENT - for operations that require locking two inodes | 440 | * XFS_I[O]LOCK_PARENT - for operations that require locking two inodes |
433 | * (ie directory operations that require locking a directory inode and | 441 | * (ie directory operations that require locking a directory inode and |
434 | * an entry inode). The first inode gets locked with this flag so it | 442 | * an entry inode). The first inode gets locked with this flag so it |
435 | * gets a lockdep subclass of 1 and the second lock will have a lockdep | 443 | * gets a lockdep subclass of 1 and the second lock will have a lockdep |
436 | * subclass of 0. | 444 | * subclass of 0. |
437 | * | 445 | * |
438 | * XFS_LOCK_INUMORDER - for locking several inodes at the some time | 446 | * XFS_LOCK_INUMORDER - for locking several inodes at the some time |
439 | * with xfs_lock_inodes(). This flag is used as the starting subclass | 447 | * with xfs_lock_inodes(). This flag is used as the starting subclass |
440 | * and each subsequent lock acquired will increment the subclass by one. | 448 | * and each subsequent lock acquired will increment the subclass by one. |
441 | * So the first lock acquired will have a lockdep subclass of 2, the | 449 | * So the first lock acquired will have a lockdep subclass of 2, the |
442 | * second lock will have a lockdep subclass of 3, and so on. It is | 450 | * second lock will have a lockdep subclass of 3, and so on. It is |
443 | * the responsibility of the class builder to shift this to the correct | 451 | * the responsibility of the class builder to shift this to the correct |
444 | * portion of the lock_mode lockdep mask. | 452 | * portion of the lock_mode lockdep mask. |
445 | */ | 453 | */ |
446 | #define XFS_LOCK_PARENT 1 | 454 | #define XFS_LOCK_PARENT 1 |
447 | #define XFS_LOCK_INUMORDER 2 | 455 | #define XFS_LOCK_INUMORDER 2 |
448 | 456 | ||
449 | #define XFS_IOLOCK_SHIFT 16 | 457 | #define XFS_IOLOCK_SHIFT 16 |
450 | #define XFS_IOLOCK_PARENT (XFS_LOCK_PARENT << XFS_IOLOCK_SHIFT) | 458 | #define XFS_IOLOCK_PARENT (XFS_LOCK_PARENT << XFS_IOLOCK_SHIFT) |
451 | 459 | ||
452 | #define XFS_ILOCK_SHIFT 24 | 460 | #define XFS_ILOCK_SHIFT 24 |
453 | #define XFS_ILOCK_PARENT (XFS_LOCK_PARENT << XFS_ILOCK_SHIFT) | 461 | #define XFS_ILOCK_PARENT (XFS_LOCK_PARENT << XFS_ILOCK_SHIFT) |
454 | 462 | ||
455 | #define XFS_IOLOCK_DEP_MASK 0x00ff0000 | 463 | #define XFS_IOLOCK_DEP_MASK 0x00ff0000 |
456 | #define XFS_ILOCK_DEP_MASK 0xff000000 | 464 | #define XFS_ILOCK_DEP_MASK 0xff000000 |
457 | #define XFS_LOCK_DEP_MASK (XFS_IOLOCK_DEP_MASK | XFS_ILOCK_DEP_MASK) | 465 | #define XFS_LOCK_DEP_MASK (XFS_IOLOCK_DEP_MASK | XFS_ILOCK_DEP_MASK) |
458 | 466 | ||
459 | #define XFS_IOLOCK_DEP(flags) (((flags) & XFS_IOLOCK_DEP_MASK) >> XFS_IOLOCK_SHIFT) | 467 | #define XFS_IOLOCK_DEP(flags) (((flags) & XFS_IOLOCK_DEP_MASK) >> XFS_IOLOCK_SHIFT) |
460 | #define XFS_ILOCK_DEP(flags) (((flags) & XFS_ILOCK_DEP_MASK) >> XFS_ILOCK_SHIFT) | 468 | #define XFS_ILOCK_DEP(flags) (((flags) & XFS_ILOCK_DEP_MASK) >> XFS_ILOCK_SHIFT) |
461 | 469 | ||
462 | /* | 470 | /* |
463 | * Flags for xfs_iflush() | 471 | * Flags for xfs_iflush() |
464 | */ | 472 | */ |
465 | #define XFS_IFLUSH_DELWRI_ELSE_SYNC 1 | 473 | #define XFS_IFLUSH_DELWRI_ELSE_SYNC 1 |
466 | #define XFS_IFLUSH_DELWRI_ELSE_ASYNC 2 | 474 | #define XFS_IFLUSH_DELWRI_ELSE_ASYNC 2 |
467 | #define XFS_IFLUSH_SYNC 3 | 475 | #define XFS_IFLUSH_SYNC 3 |
468 | #define XFS_IFLUSH_ASYNC 4 | 476 | #define XFS_IFLUSH_ASYNC 4 |
469 | #define XFS_IFLUSH_DELWRI 5 | 477 | #define XFS_IFLUSH_DELWRI 5 |
470 | #define XFS_IFLUSH_ASYNC_NOBLOCK 6 | 478 | #define XFS_IFLUSH_ASYNC_NOBLOCK 6 |
471 | 479 | ||
472 | /* | 480 | /* |
473 | * Flags for xfs_itruncate_start(). | 481 | * Flags for xfs_itruncate_start(). |
474 | */ | 482 | */ |
475 | #define XFS_ITRUNC_DEFINITE 0x1 | 483 | #define XFS_ITRUNC_DEFINITE 0x1 |
476 | #define XFS_ITRUNC_MAYBE 0x2 | 484 | #define XFS_ITRUNC_MAYBE 0x2 |
477 | 485 | ||
478 | /* | 486 | /* |
479 | * For multiple groups support: if S_ISGID bit is set in the parent | 487 | * For multiple groups support: if S_ISGID bit is set in the parent |
480 | * directory, group of new file is set to that of the parent, and | 488 | * directory, group of new file is set to that of the parent, and |
481 | * new subdirectory gets S_ISGID bit from parent. | 489 | * new subdirectory gets S_ISGID bit from parent. |
482 | */ | 490 | */ |
483 | #define XFS_INHERIT_GID(pip) \ | 491 | #define XFS_INHERIT_GID(pip) \ |
484 | (((pip)->i_mount->m_flags & XFS_MOUNT_GRPID) || \ | 492 | (((pip)->i_mount->m_flags & XFS_MOUNT_GRPID) || \ |
485 | ((pip)->i_d.di_mode & S_ISGID)) | 493 | ((pip)->i_d.di_mode & S_ISGID)) |
486 | 494 | ||
487 | /* | 495 | /* |
488 | * Flags for xfs_iget() | 496 | * Flags for xfs_iget() |
489 | */ | 497 | */ |
490 | #define XFS_IGET_CREATE 0x1 | 498 | #define XFS_IGET_CREATE 0x1 |
491 | #define XFS_IGET_BULKSTAT 0x2 | 499 | #define XFS_IGET_BULKSTAT 0x2 |
492 | 500 | ||
493 | /* | 501 | /* |
494 | * xfs_iget.c prototypes. | 502 | * xfs_iget.c prototypes. |
495 | */ | 503 | */ |
496 | void xfs_ihash_init(struct xfs_mount *); | 504 | void xfs_ihash_init(struct xfs_mount *); |
497 | void xfs_ihash_free(struct xfs_mount *); | 505 | void xfs_ihash_free(struct xfs_mount *); |
498 | xfs_inode_t *xfs_inode_incore(struct xfs_mount *, xfs_ino_t, | 506 | xfs_inode_t *xfs_inode_incore(struct xfs_mount *, xfs_ino_t, |
499 | struct xfs_trans *); | 507 | struct xfs_trans *); |
500 | int xfs_iget(struct xfs_mount *, struct xfs_trans *, xfs_ino_t, | 508 | int xfs_iget(struct xfs_mount *, struct xfs_trans *, xfs_ino_t, |
501 | uint, uint, xfs_inode_t **, xfs_daddr_t); | 509 | uint, uint, xfs_inode_t **, xfs_daddr_t); |
502 | void xfs_iput(xfs_inode_t *, uint); | 510 | void xfs_iput(xfs_inode_t *, uint); |
503 | void xfs_iput_new(xfs_inode_t *, uint); | 511 | void xfs_iput_new(xfs_inode_t *, uint); |
504 | void xfs_ilock(xfs_inode_t *, uint); | 512 | void xfs_ilock(xfs_inode_t *, uint); |
505 | int xfs_ilock_nowait(xfs_inode_t *, uint); | 513 | int xfs_ilock_nowait(xfs_inode_t *, uint); |
506 | void xfs_iunlock(xfs_inode_t *, uint); | 514 | void xfs_iunlock(xfs_inode_t *, uint); |
507 | void xfs_ilock_demote(xfs_inode_t *, uint); | 515 | void xfs_ilock_demote(xfs_inode_t *, uint); |
508 | int xfs_isilocked(xfs_inode_t *, uint); | 516 | int xfs_isilocked(xfs_inode_t *, uint); |
509 | uint xfs_ilock_map_shared(xfs_inode_t *); | 517 | uint xfs_ilock_map_shared(xfs_inode_t *); |
510 | void xfs_iunlock_map_shared(xfs_inode_t *, uint); | 518 | void xfs_iunlock_map_shared(xfs_inode_t *, uint); |
511 | void xfs_ireclaim(xfs_inode_t *); | 519 | void xfs_ireclaim(xfs_inode_t *); |
512 | 520 | ||
513 | /* | 521 | /* |
514 | * xfs_inode.c prototypes. | 522 | * xfs_inode.c prototypes. |
515 | */ | 523 | */ |
516 | int xfs_iread(struct xfs_mount *, struct xfs_trans *, xfs_ino_t, | 524 | int xfs_iread(struct xfs_mount *, struct xfs_trans *, xfs_ino_t, |
517 | xfs_inode_t **, xfs_daddr_t, uint); | 525 | xfs_inode_t **, xfs_daddr_t, uint); |
518 | int xfs_ialloc(struct xfs_trans *, xfs_inode_t *, mode_t, | 526 | int xfs_ialloc(struct xfs_trans *, xfs_inode_t *, mode_t, |
519 | xfs_nlink_t, xfs_dev_t, struct cred *, xfs_prid_t, | 527 | xfs_nlink_t, xfs_dev_t, struct cred *, xfs_prid_t, |
520 | int, struct xfs_buf **, boolean_t *, xfs_inode_t **); | 528 | int, struct xfs_buf **, boolean_t *, xfs_inode_t **); |
521 | 529 | ||
522 | uint xfs_ip2xflags(struct xfs_inode *); | 530 | uint xfs_ip2xflags(struct xfs_inode *); |
523 | uint xfs_dic2xflags(struct xfs_dinode *); | 531 | uint xfs_dic2xflags(struct xfs_dinode *); |
524 | int xfs_ifree(struct xfs_trans *, xfs_inode_t *, | 532 | int xfs_ifree(struct xfs_trans *, xfs_inode_t *, |
525 | struct xfs_bmap_free *); | 533 | struct xfs_bmap_free *); |
526 | int xfs_itruncate_start(xfs_inode_t *, uint, xfs_fsize_t); | 534 | int xfs_itruncate_start(xfs_inode_t *, uint, xfs_fsize_t); |
527 | int xfs_itruncate_finish(struct xfs_trans **, xfs_inode_t *, | 535 | int xfs_itruncate_finish(struct xfs_trans **, xfs_inode_t *, |
528 | xfs_fsize_t, int, int); | 536 | xfs_fsize_t, int, int); |
529 | int xfs_iunlink(struct xfs_trans *, xfs_inode_t *); | 537 | int xfs_iunlink(struct xfs_trans *, xfs_inode_t *); |
530 | 538 | ||
531 | void xfs_idestroy(xfs_inode_t *); | 539 | void xfs_idestroy(xfs_inode_t *); |
532 | void xfs_iextract(xfs_inode_t *); | 540 | void xfs_iextract(xfs_inode_t *); |
533 | void xfs_iext_realloc(xfs_inode_t *, int, int); | 541 | void xfs_iext_realloc(xfs_inode_t *, int, int); |
534 | void xfs_ipin(xfs_inode_t *); | 542 | void xfs_ipin(xfs_inode_t *); |
535 | void xfs_iunpin(xfs_inode_t *); | 543 | void xfs_iunpin(xfs_inode_t *); |
536 | int xfs_iflush(xfs_inode_t *, uint); | 544 | int xfs_iflush(xfs_inode_t *, uint); |
537 | void xfs_ichgtime(xfs_inode_t *, int); | 545 | void xfs_ichgtime(xfs_inode_t *, int); |
538 | xfs_fsize_t xfs_file_last_byte(xfs_inode_t *); | 546 | xfs_fsize_t xfs_file_last_byte(xfs_inode_t *); |
539 | void xfs_lock_inodes(xfs_inode_t **, int, uint); | 547 | void xfs_lock_inodes(xfs_inode_t **, int, uint); |
540 | void xfs_lock_two_inodes(xfs_inode_t *, xfs_inode_t *, uint); | 548 | void xfs_lock_two_inodes(xfs_inode_t *, xfs_inode_t *, uint); |
541 | 549 | ||
542 | void xfs_synchronize_atime(xfs_inode_t *); | 550 | void xfs_synchronize_atime(xfs_inode_t *); |
543 | void xfs_mark_inode_dirty_sync(xfs_inode_t *); | 551 | void xfs_mark_inode_dirty_sync(xfs_inode_t *); |
544 | 552 | ||
545 | #endif /* __KERNEL__ */ | 553 | #endif /* __KERNEL__ */ |
546 | 554 | ||
547 | int xfs_inotobp(struct xfs_mount *, struct xfs_trans *, | 555 | int xfs_inotobp(struct xfs_mount *, struct xfs_trans *, |
548 | xfs_ino_t, struct xfs_dinode **, | 556 | xfs_ino_t, struct xfs_dinode **, |
549 | struct xfs_buf **, int *, uint); | 557 | struct xfs_buf **, int *, uint); |
550 | int xfs_itobp(struct xfs_mount *, struct xfs_trans *, | 558 | int xfs_itobp(struct xfs_mount *, struct xfs_trans *, |
551 | struct xfs_inode *, struct xfs_dinode **, | 559 | struct xfs_inode *, struct xfs_dinode **, |
552 | struct xfs_buf **, uint); | 560 | struct xfs_buf **, uint); |
553 | void xfs_dinode_from_disk(struct xfs_icdinode *, | 561 | void xfs_dinode_from_disk(struct xfs_icdinode *, |
554 | struct xfs_dinode *); | 562 | struct xfs_dinode *); |
555 | void xfs_dinode_to_disk(struct xfs_dinode *, | 563 | void xfs_dinode_to_disk(struct xfs_dinode *, |
556 | struct xfs_icdinode *); | 564 | struct xfs_icdinode *); |
557 | void xfs_idestroy_fork(struct xfs_inode *, int); | 565 | void xfs_idestroy_fork(struct xfs_inode *, int); |
558 | void xfs_idata_realloc(struct xfs_inode *, int, int); | 566 | void xfs_idata_realloc(struct xfs_inode *, int, int); |
559 | void xfs_iroot_realloc(struct xfs_inode *, int, int); | 567 | void xfs_iroot_realloc(struct xfs_inode *, int, int); |
560 | int xfs_iread_extents(struct xfs_trans *, struct xfs_inode *, int); | 568 | int xfs_iread_extents(struct xfs_trans *, struct xfs_inode *, int); |
561 | int xfs_iextents_copy(struct xfs_inode *, xfs_bmbt_rec_t *, int); | 569 | int xfs_iextents_copy(struct xfs_inode *, xfs_bmbt_rec_t *, int); |
562 | 570 | ||
563 | xfs_bmbt_rec_host_t *xfs_iext_get_ext(xfs_ifork_t *, xfs_extnum_t); | 571 | xfs_bmbt_rec_host_t *xfs_iext_get_ext(xfs_ifork_t *, xfs_extnum_t); |
564 | void xfs_iext_insert(xfs_ifork_t *, xfs_extnum_t, xfs_extnum_t, | 572 | void xfs_iext_insert(xfs_ifork_t *, xfs_extnum_t, xfs_extnum_t, |
565 | xfs_bmbt_irec_t *); | 573 | xfs_bmbt_irec_t *); |
566 | void xfs_iext_add(xfs_ifork_t *, xfs_extnum_t, int); | 574 | void xfs_iext_add(xfs_ifork_t *, xfs_extnum_t, int); |
567 | void xfs_iext_add_indirect_multi(xfs_ifork_t *, int, xfs_extnum_t, int); | 575 | void xfs_iext_add_indirect_multi(xfs_ifork_t *, int, xfs_extnum_t, int); |
568 | void xfs_iext_remove(xfs_ifork_t *, xfs_extnum_t, int); | 576 | void xfs_iext_remove(xfs_ifork_t *, xfs_extnum_t, int); |
569 | void xfs_iext_remove_inline(xfs_ifork_t *, xfs_extnum_t, int); | 577 | void xfs_iext_remove_inline(xfs_ifork_t *, xfs_extnum_t, int); |
570 | void xfs_iext_remove_direct(xfs_ifork_t *, xfs_extnum_t, int); | 578 | void xfs_iext_remove_direct(xfs_ifork_t *, xfs_extnum_t, int); |
571 | void xfs_iext_remove_indirect(xfs_ifork_t *, xfs_extnum_t, int); | 579 | void xfs_iext_remove_indirect(xfs_ifork_t *, xfs_extnum_t, int); |
572 | void xfs_iext_realloc_direct(xfs_ifork_t *, int); | 580 | void xfs_iext_realloc_direct(xfs_ifork_t *, int); |
573 | void xfs_iext_realloc_indirect(xfs_ifork_t *, int); | 581 | void xfs_iext_realloc_indirect(xfs_ifork_t *, int); |
574 | void xfs_iext_indirect_to_direct(xfs_ifork_t *); | 582 | void xfs_iext_indirect_to_direct(xfs_ifork_t *); |
575 | void xfs_iext_direct_to_inline(xfs_ifork_t *, xfs_extnum_t); | 583 | void xfs_iext_direct_to_inline(xfs_ifork_t *, xfs_extnum_t); |
576 | void xfs_iext_inline_to_direct(xfs_ifork_t *, int); | 584 | void xfs_iext_inline_to_direct(xfs_ifork_t *, int); |
577 | void xfs_iext_destroy(xfs_ifork_t *); | 585 | void xfs_iext_destroy(xfs_ifork_t *); |
578 | xfs_bmbt_rec_host_t *xfs_iext_bno_to_ext(xfs_ifork_t *, xfs_fileoff_t, int *); | 586 | xfs_bmbt_rec_host_t *xfs_iext_bno_to_ext(xfs_ifork_t *, xfs_fileoff_t, int *); |
579 | xfs_ext_irec_t *xfs_iext_bno_to_irec(xfs_ifork_t *, xfs_fileoff_t, int *); | 587 | xfs_ext_irec_t *xfs_iext_bno_to_irec(xfs_ifork_t *, xfs_fileoff_t, int *); |
580 | xfs_ext_irec_t *xfs_iext_idx_to_irec(xfs_ifork_t *, xfs_extnum_t *, int *, int); | 588 | xfs_ext_irec_t *xfs_iext_idx_to_irec(xfs_ifork_t *, xfs_extnum_t *, int *, int); |
581 | void xfs_iext_irec_init(xfs_ifork_t *); | 589 | void xfs_iext_irec_init(xfs_ifork_t *); |
582 | xfs_ext_irec_t *xfs_iext_irec_new(xfs_ifork_t *, int); | 590 | xfs_ext_irec_t *xfs_iext_irec_new(xfs_ifork_t *, int); |
583 | void xfs_iext_irec_remove(xfs_ifork_t *, int); | 591 | void xfs_iext_irec_remove(xfs_ifork_t *, int); |
584 | void xfs_iext_irec_compact(xfs_ifork_t *); | 592 | void xfs_iext_irec_compact(xfs_ifork_t *); |
585 | void xfs_iext_irec_compact_pages(xfs_ifork_t *); | 593 | void xfs_iext_irec_compact_pages(xfs_ifork_t *); |
586 | void xfs_iext_irec_compact_full(xfs_ifork_t *); | 594 | void xfs_iext_irec_compact_full(xfs_ifork_t *); |
587 | void xfs_iext_irec_update_extoffs(xfs_ifork_t *, int, int); | 595 | void xfs_iext_irec_update_extoffs(xfs_ifork_t *, int, int); |
588 | 596 | ||
589 | #define xfs_ipincount(ip) ((unsigned int) atomic_read(&ip->i_pincount)) | 597 | #define xfs_ipincount(ip) ((unsigned int) atomic_read(&ip->i_pincount)) |
590 | 598 | ||
591 | #ifdef DEBUG | 599 | #ifdef DEBUG |
592 | void xfs_isize_check(struct xfs_mount *, struct xfs_inode *, | 600 | void xfs_isize_check(struct xfs_mount *, struct xfs_inode *, |
593 | xfs_fsize_t); | 601 | xfs_fsize_t); |
594 | #else /* DEBUG */ | 602 | #else /* DEBUG */ |
595 | #define xfs_isize_check(mp, ip, isize) | 603 | #define xfs_isize_check(mp, ip, isize) |
596 | #endif /* DEBUG */ | 604 | #endif /* DEBUG */ |
597 | 605 | ||
598 | #if defined(DEBUG) | 606 | #if defined(DEBUG) |
599 | void xfs_inobp_check(struct xfs_mount *, struct xfs_buf *); | 607 | void xfs_inobp_check(struct xfs_mount *, struct xfs_buf *); |
600 | #else | 608 | #else |
601 | #define xfs_inobp_check(mp, bp) | 609 | #define xfs_inobp_check(mp, bp) |
602 | #endif /* DEBUG */ | 610 | #endif /* DEBUG */ |
603 | 611 | ||
604 | extern struct kmem_zone *xfs_ifork_zone; | 612 | extern struct kmem_zone *xfs_ifork_zone; |
605 | extern struct kmem_zone *xfs_inode_zone; | 613 | extern struct kmem_zone *xfs_inode_zone; |
606 | extern struct kmem_zone *xfs_ili_zone; | 614 | extern struct kmem_zone *xfs_ili_zone; |
607 | 615 |
fs/xfs/xfs_inode_item.c
1 | /* | 1 | /* |
2 | * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc. | 2 | * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc. |
3 | * All Rights Reserved. | 3 | * All Rights Reserved. |
4 | * | 4 | * |
5 | * This program is free software; you can redistribute it and/or | 5 | * This program is free software; you can redistribute it and/or |
6 | * modify it under the terms of the GNU General Public License as | 6 | * modify it under the terms of the GNU General Public License as |
7 | * published by the Free Software Foundation. | 7 | * published by the Free Software Foundation. |
8 | * | 8 | * |
9 | * This program is distributed in the hope that it would be useful, | 9 | * This program is distributed in the hope that it would be useful, |
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
12 | * GNU General Public License for more details. | 12 | * GNU General Public License for more details. |
13 | * | 13 | * |
14 | * You should have received a copy of the GNU General Public License | 14 | * You should have received a copy of the GNU General Public License |
15 | * along with this program; if not, write the Free Software Foundation, | 15 | * along with this program; if not, write the Free Software Foundation, |
16 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | 16 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA |
17 | */ | 17 | */ |
18 | #include "xfs.h" | 18 | #include "xfs.h" |
19 | #include "xfs_fs.h" | 19 | #include "xfs_fs.h" |
20 | #include "xfs_types.h" | 20 | #include "xfs_types.h" |
21 | #include "xfs_bit.h" | 21 | #include "xfs_bit.h" |
22 | #include "xfs_log.h" | 22 | #include "xfs_log.h" |
23 | #include "xfs_inum.h" | 23 | #include "xfs_inum.h" |
24 | #include "xfs_trans.h" | 24 | #include "xfs_trans.h" |
25 | #include "xfs_buf_item.h" | 25 | #include "xfs_buf_item.h" |
26 | #include "xfs_sb.h" | 26 | #include "xfs_sb.h" |
27 | #include "xfs_ag.h" | 27 | #include "xfs_ag.h" |
28 | #include "xfs_dir2.h" | 28 | #include "xfs_dir2.h" |
29 | #include "xfs_dmapi.h" | 29 | #include "xfs_dmapi.h" |
30 | #include "xfs_mount.h" | 30 | #include "xfs_mount.h" |
31 | #include "xfs_trans_priv.h" | 31 | #include "xfs_trans_priv.h" |
32 | #include "xfs_bmap_btree.h" | 32 | #include "xfs_bmap_btree.h" |
33 | #include "xfs_alloc_btree.h" | 33 | #include "xfs_alloc_btree.h" |
34 | #include "xfs_ialloc_btree.h" | 34 | #include "xfs_ialloc_btree.h" |
35 | #include "xfs_dir2_sf.h" | 35 | #include "xfs_dir2_sf.h" |
36 | #include "xfs_attr_sf.h" | 36 | #include "xfs_attr_sf.h" |
37 | #include "xfs_dinode.h" | 37 | #include "xfs_dinode.h" |
38 | #include "xfs_inode.h" | 38 | #include "xfs_inode.h" |
39 | #include "xfs_inode_item.h" | 39 | #include "xfs_inode_item.h" |
40 | #include "xfs_btree.h" | 40 | #include "xfs_btree.h" |
41 | #include "xfs_ialloc.h" | 41 | #include "xfs_ialloc.h" |
42 | #include "xfs_rw.h" | 42 | #include "xfs_rw.h" |
43 | #include "xfs_error.h" | 43 | #include "xfs_error.h" |
44 | 44 | ||
45 | 45 | ||
46 | kmem_zone_t *xfs_ili_zone; /* inode log item zone */ | 46 | kmem_zone_t *xfs_ili_zone; /* inode log item zone */ |
47 | 47 | ||
48 | /* | 48 | /* |
49 | * This returns the number of iovecs needed to log the given inode item. | 49 | * This returns the number of iovecs needed to log the given inode item. |
50 | * | 50 | * |
51 | * We need one iovec for the inode log format structure, one for the | 51 | * We need one iovec for the inode log format structure, one for the |
52 | * inode core, and possibly one for the inode data/extents/b-tree root | 52 | * inode core, and possibly one for the inode data/extents/b-tree root |
53 | * and one for the inode attribute data/extents/b-tree root. | 53 | * and one for the inode attribute data/extents/b-tree root. |
54 | */ | 54 | */ |
55 | STATIC uint | 55 | STATIC uint |
56 | xfs_inode_item_size( | 56 | xfs_inode_item_size( |
57 | xfs_inode_log_item_t *iip) | 57 | xfs_inode_log_item_t *iip) |
58 | { | 58 | { |
59 | uint nvecs; | 59 | uint nvecs; |
60 | xfs_inode_t *ip; | 60 | xfs_inode_t *ip; |
61 | 61 | ||
62 | ip = iip->ili_inode; | 62 | ip = iip->ili_inode; |
63 | nvecs = 2; | 63 | nvecs = 2; |
64 | 64 | ||
65 | /* | 65 | /* |
66 | * Only log the data/extents/b-tree root if there is something | 66 | * Only log the data/extents/b-tree root if there is something |
67 | * left to log. | 67 | * left to log. |
68 | */ | 68 | */ |
69 | iip->ili_format.ilf_fields |= XFS_ILOG_CORE; | 69 | iip->ili_format.ilf_fields |= XFS_ILOG_CORE; |
70 | 70 | ||
71 | switch (ip->i_d.di_format) { | 71 | switch (ip->i_d.di_format) { |
72 | case XFS_DINODE_FMT_EXTENTS: | 72 | case XFS_DINODE_FMT_EXTENTS: |
73 | iip->ili_format.ilf_fields &= | 73 | iip->ili_format.ilf_fields &= |
74 | ~(XFS_ILOG_DDATA | XFS_ILOG_DBROOT | | 74 | ~(XFS_ILOG_DDATA | XFS_ILOG_DBROOT | |
75 | XFS_ILOG_DEV | XFS_ILOG_UUID); | 75 | XFS_ILOG_DEV | XFS_ILOG_UUID); |
76 | if ((iip->ili_format.ilf_fields & XFS_ILOG_DEXT) && | 76 | if ((iip->ili_format.ilf_fields & XFS_ILOG_DEXT) && |
77 | (ip->i_d.di_nextents > 0) && | 77 | (ip->i_d.di_nextents > 0) && |
78 | (ip->i_df.if_bytes > 0)) { | 78 | (ip->i_df.if_bytes > 0)) { |
79 | ASSERT(ip->i_df.if_u1.if_extents != NULL); | 79 | ASSERT(ip->i_df.if_u1.if_extents != NULL); |
80 | nvecs++; | 80 | nvecs++; |
81 | } else { | 81 | } else { |
82 | iip->ili_format.ilf_fields &= ~XFS_ILOG_DEXT; | 82 | iip->ili_format.ilf_fields &= ~XFS_ILOG_DEXT; |
83 | } | 83 | } |
84 | break; | 84 | break; |
85 | 85 | ||
86 | case XFS_DINODE_FMT_BTREE: | 86 | case XFS_DINODE_FMT_BTREE: |
87 | ASSERT(ip->i_df.if_ext_max == | 87 | ASSERT(ip->i_df.if_ext_max == |
88 | XFS_IFORK_DSIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t)); | 88 | XFS_IFORK_DSIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t)); |
89 | iip->ili_format.ilf_fields &= | 89 | iip->ili_format.ilf_fields &= |
90 | ~(XFS_ILOG_DDATA | XFS_ILOG_DEXT | | 90 | ~(XFS_ILOG_DDATA | XFS_ILOG_DEXT | |
91 | XFS_ILOG_DEV | XFS_ILOG_UUID); | 91 | XFS_ILOG_DEV | XFS_ILOG_UUID); |
92 | if ((iip->ili_format.ilf_fields & XFS_ILOG_DBROOT) && | 92 | if ((iip->ili_format.ilf_fields & XFS_ILOG_DBROOT) && |
93 | (ip->i_df.if_broot_bytes > 0)) { | 93 | (ip->i_df.if_broot_bytes > 0)) { |
94 | ASSERT(ip->i_df.if_broot != NULL); | 94 | ASSERT(ip->i_df.if_broot != NULL); |
95 | nvecs++; | 95 | nvecs++; |
96 | } else { | 96 | } else { |
97 | ASSERT(!(iip->ili_format.ilf_fields & | 97 | ASSERT(!(iip->ili_format.ilf_fields & |
98 | XFS_ILOG_DBROOT)); | 98 | XFS_ILOG_DBROOT)); |
99 | #ifdef XFS_TRANS_DEBUG | 99 | #ifdef XFS_TRANS_DEBUG |
100 | if (iip->ili_root_size > 0) { | 100 | if (iip->ili_root_size > 0) { |
101 | ASSERT(iip->ili_root_size == | 101 | ASSERT(iip->ili_root_size == |
102 | ip->i_df.if_broot_bytes); | 102 | ip->i_df.if_broot_bytes); |
103 | ASSERT(memcmp(iip->ili_orig_root, | 103 | ASSERT(memcmp(iip->ili_orig_root, |
104 | ip->i_df.if_broot, | 104 | ip->i_df.if_broot, |
105 | iip->ili_root_size) == 0); | 105 | iip->ili_root_size) == 0); |
106 | } else { | 106 | } else { |
107 | ASSERT(ip->i_df.if_broot_bytes == 0); | 107 | ASSERT(ip->i_df.if_broot_bytes == 0); |
108 | } | 108 | } |
109 | #endif | 109 | #endif |
110 | iip->ili_format.ilf_fields &= ~XFS_ILOG_DBROOT; | 110 | iip->ili_format.ilf_fields &= ~XFS_ILOG_DBROOT; |
111 | } | 111 | } |
112 | break; | 112 | break; |
113 | 113 | ||
114 | case XFS_DINODE_FMT_LOCAL: | 114 | case XFS_DINODE_FMT_LOCAL: |
115 | iip->ili_format.ilf_fields &= | 115 | iip->ili_format.ilf_fields &= |
116 | ~(XFS_ILOG_DEXT | XFS_ILOG_DBROOT | | 116 | ~(XFS_ILOG_DEXT | XFS_ILOG_DBROOT | |
117 | XFS_ILOG_DEV | XFS_ILOG_UUID); | 117 | XFS_ILOG_DEV | XFS_ILOG_UUID); |
118 | if ((iip->ili_format.ilf_fields & XFS_ILOG_DDATA) && | 118 | if ((iip->ili_format.ilf_fields & XFS_ILOG_DDATA) && |
119 | (ip->i_df.if_bytes > 0)) { | 119 | (ip->i_df.if_bytes > 0)) { |
120 | ASSERT(ip->i_df.if_u1.if_data != NULL); | 120 | ASSERT(ip->i_df.if_u1.if_data != NULL); |
121 | ASSERT(ip->i_d.di_size > 0); | 121 | ASSERT(ip->i_d.di_size > 0); |
122 | nvecs++; | 122 | nvecs++; |
123 | } else { | 123 | } else { |
124 | iip->ili_format.ilf_fields &= ~XFS_ILOG_DDATA; | 124 | iip->ili_format.ilf_fields &= ~XFS_ILOG_DDATA; |
125 | } | 125 | } |
126 | break; | 126 | break; |
127 | 127 | ||
128 | case XFS_DINODE_FMT_DEV: | 128 | case XFS_DINODE_FMT_DEV: |
129 | iip->ili_format.ilf_fields &= | 129 | iip->ili_format.ilf_fields &= |
130 | ~(XFS_ILOG_DDATA | XFS_ILOG_DBROOT | | 130 | ~(XFS_ILOG_DDATA | XFS_ILOG_DBROOT | |
131 | XFS_ILOG_DEXT | XFS_ILOG_UUID); | 131 | XFS_ILOG_DEXT | XFS_ILOG_UUID); |
132 | break; | 132 | break; |
133 | 133 | ||
134 | case XFS_DINODE_FMT_UUID: | 134 | case XFS_DINODE_FMT_UUID: |
135 | iip->ili_format.ilf_fields &= | 135 | iip->ili_format.ilf_fields &= |
136 | ~(XFS_ILOG_DDATA | XFS_ILOG_DBROOT | | 136 | ~(XFS_ILOG_DDATA | XFS_ILOG_DBROOT | |
137 | XFS_ILOG_DEXT | XFS_ILOG_DEV); | 137 | XFS_ILOG_DEXT | XFS_ILOG_DEV); |
138 | break; | 138 | break; |
139 | 139 | ||
140 | default: | 140 | default: |
141 | ASSERT(0); | 141 | ASSERT(0); |
142 | break; | 142 | break; |
143 | } | 143 | } |
144 | 144 | ||
145 | /* | 145 | /* |
146 | * If there are no attributes associated with this file, | 146 | * If there are no attributes associated with this file, |
147 | * then there cannot be anything more to log. | 147 | * then there cannot be anything more to log. |
148 | * Clear all attribute-related log flags. | 148 | * Clear all attribute-related log flags. |
149 | */ | 149 | */ |
150 | if (!XFS_IFORK_Q(ip)) { | 150 | if (!XFS_IFORK_Q(ip)) { |
151 | iip->ili_format.ilf_fields &= | 151 | iip->ili_format.ilf_fields &= |
152 | ~(XFS_ILOG_ADATA | XFS_ILOG_ABROOT | XFS_ILOG_AEXT); | 152 | ~(XFS_ILOG_ADATA | XFS_ILOG_ABROOT | XFS_ILOG_AEXT); |
153 | return nvecs; | 153 | return nvecs; |
154 | } | 154 | } |
155 | 155 | ||
156 | /* | 156 | /* |
157 | * Log any necessary attribute data. | 157 | * Log any necessary attribute data. |
158 | */ | 158 | */ |
159 | switch (ip->i_d.di_aformat) { | 159 | switch (ip->i_d.di_aformat) { |
160 | case XFS_DINODE_FMT_EXTENTS: | 160 | case XFS_DINODE_FMT_EXTENTS: |
161 | iip->ili_format.ilf_fields &= | 161 | iip->ili_format.ilf_fields &= |
162 | ~(XFS_ILOG_ADATA | XFS_ILOG_ABROOT); | 162 | ~(XFS_ILOG_ADATA | XFS_ILOG_ABROOT); |
163 | if ((iip->ili_format.ilf_fields & XFS_ILOG_AEXT) && | 163 | if ((iip->ili_format.ilf_fields & XFS_ILOG_AEXT) && |
164 | (ip->i_d.di_anextents > 0) && | 164 | (ip->i_d.di_anextents > 0) && |
165 | (ip->i_afp->if_bytes > 0)) { | 165 | (ip->i_afp->if_bytes > 0)) { |
166 | ASSERT(ip->i_afp->if_u1.if_extents != NULL); | 166 | ASSERT(ip->i_afp->if_u1.if_extents != NULL); |
167 | nvecs++; | 167 | nvecs++; |
168 | } else { | 168 | } else { |
169 | iip->ili_format.ilf_fields &= ~XFS_ILOG_AEXT; | 169 | iip->ili_format.ilf_fields &= ~XFS_ILOG_AEXT; |
170 | } | 170 | } |
171 | break; | 171 | break; |
172 | 172 | ||
173 | case XFS_DINODE_FMT_BTREE: | 173 | case XFS_DINODE_FMT_BTREE: |
174 | iip->ili_format.ilf_fields &= | 174 | iip->ili_format.ilf_fields &= |
175 | ~(XFS_ILOG_ADATA | XFS_ILOG_AEXT); | 175 | ~(XFS_ILOG_ADATA | XFS_ILOG_AEXT); |
176 | if ((iip->ili_format.ilf_fields & XFS_ILOG_ABROOT) && | 176 | if ((iip->ili_format.ilf_fields & XFS_ILOG_ABROOT) && |
177 | (ip->i_afp->if_broot_bytes > 0)) { | 177 | (ip->i_afp->if_broot_bytes > 0)) { |
178 | ASSERT(ip->i_afp->if_broot != NULL); | 178 | ASSERT(ip->i_afp->if_broot != NULL); |
179 | nvecs++; | 179 | nvecs++; |
180 | } else { | 180 | } else { |
181 | iip->ili_format.ilf_fields &= ~XFS_ILOG_ABROOT; | 181 | iip->ili_format.ilf_fields &= ~XFS_ILOG_ABROOT; |
182 | } | 182 | } |
183 | break; | 183 | break; |
184 | 184 | ||
185 | case XFS_DINODE_FMT_LOCAL: | 185 | case XFS_DINODE_FMT_LOCAL: |
186 | iip->ili_format.ilf_fields &= | 186 | iip->ili_format.ilf_fields &= |
187 | ~(XFS_ILOG_AEXT | XFS_ILOG_ABROOT); | 187 | ~(XFS_ILOG_AEXT | XFS_ILOG_ABROOT); |
188 | if ((iip->ili_format.ilf_fields & XFS_ILOG_ADATA) && | 188 | if ((iip->ili_format.ilf_fields & XFS_ILOG_ADATA) && |
189 | (ip->i_afp->if_bytes > 0)) { | 189 | (ip->i_afp->if_bytes > 0)) { |
190 | ASSERT(ip->i_afp->if_u1.if_data != NULL); | 190 | ASSERT(ip->i_afp->if_u1.if_data != NULL); |
191 | nvecs++; | 191 | nvecs++; |
192 | } else { | 192 | } else { |
193 | iip->ili_format.ilf_fields &= ~XFS_ILOG_ADATA; | 193 | iip->ili_format.ilf_fields &= ~XFS_ILOG_ADATA; |
194 | } | 194 | } |
195 | break; | 195 | break; |
196 | 196 | ||
197 | default: | 197 | default: |
198 | ASSERT(0); | 198 | ASSERT(0); |
199 | break; | 199 | break; |
200 | } | 200 | } |
201 | 201 | ||
202 | return nvecs; | 202 | return nvecs; |
203 | } | 203 | } |
204 | 204 | ||
205 | /* | 205 | /* |
206 | * This is called to fill in the vector of log iovecs for the | 206 | * This is called to fill in the vector of log iovecs for the |
207 | * given inode log item. It fills the first item with an inode | 207 | * given inode log item. It fills the first item with an inode |
208 | * log format structure, the second with the on-disk inode structure, | 208 | * log format structure, the second with the on-disk inode structure, |
209 | * and a possible third and/or fourth with the inode data/extents/b-tree | 209 | * and a possible third and/or fourth with the inode data/extents/b-tree |
210 | * root and inode attributes data/extents/b-tree root. | 210 | * root and inode attributes data/extents/b-tree root. |
211 | */ | 211 | */ |
212 | STATIC void | 212 | STATIC void |
213 | xfs_inode_item_format( | 213 | xfs_inode_item_format( |
214 | xfs_inode_log_item_t *iip, | 214 | xfs_inode_log_item_t *iip, |
215 | xfs_log_iovec_t *log_vector) | 215 | xfs_log_iovec_t *log_vector) |
216 | { | 216 | { |
217 | uint nvecs; | 217 | uint nvecs; |
218 | xfs_log_iovec_t *vecp; | 218 | xfs_log_iovec_t *vecp; |
219 | xfs_inode_t *ip; | 219 | xfs_inode_t *ip; |
220 | size_t data_bytes; | 220 | size_t data_bytes; |
221 | xfs_bmbt_rec_t *ext_buffer; | 221 | xfs_bmbt_rec_t *ext_buffer; |
222 | int nrecs; | 222 | int nrecs; |
223 | xfs_mount_t *mp; | 223 | xfs_mount_t *mp; |
224 | 224 | ||
225 | ip = iip->ili_inode; | 225 | ip = iip->ili_inode; |
226 | vecp = log_vector; | 226 | vecp = log_vector; |
227 | 227 | ||
228 | vecp->i_addr = (xfs_caddr_t)&iip->ili_format; | 228 | vecp->i_addr = (xfs_caddr_t)&iip->ili_format; |
229 | vecp->i_len = sizeof(xfs_inode_log_format_t); | 229 | vecp->i_len = sizeof(xfs_inode_log_format_t); |
230 | XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_IFORMAT); | 230 | XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_IFORMAT); |
231 | vecp++; | 231 | vecp++; |
232 | nvecs = 1; | 232 | nvecs = 1; |
233 | 233 | ||
234 | /* | 234 | /* |
235 | * Clear i_update_core if the timestamps (or any other | 235 | * Clear i_update_core if the timestamps (or any other |
236 | * non-transactional modification) need flushing/logging | 236 | * non-transactional modification) need flushing/logging |
237 | * and we're about to log them with the rest of the core. | 237 | * and we're about to log them with the rest of the core. |
238 | * | 238 | * |
239 | * This is the same logic as xfs_iflush() but this code can't | 239 | * This is the same logic as xfs_iflush() but this code can't |
240 | * run at the same time as xfs_iflush because we're in commit | 240 | * run at the same time as xfs_iflush because we're in commit |
241 | * processing here and so we have the inode lock held in | 241 | * processing here and so we have the inode lock held in |
242 | * exclusive mode. Although it doesn't really matter | 242 | * exclusive mode. Although it doesn't really matter |
243 | * for the timestamps if both routines were to grab the | 243 | * for the timestamps if both routines were to grab the |
244 | * timestamps or not. That would be ok. | 244 | * timestamps or not. That would be ok. |
245 | * | 245 | * |
246 | * We clear i_update_core before copying out the data. | 246 | * We clear i_update_core before copying out the data. |
247 | * This is for coordination with our timestamp updates | 247 | * This is for coordination with our timestamp updates |
248 | * that don't hold the inode lock. They will always | 248 | * that don't hold the inode lock. They will always |
249 | * update the timestamps BEFORE setting i_update_core, | 249 | * update the timestamps BEFORE setting i_update_core, |
250 | * so if we clear i_update_core after they set it we | 250 | * so if we clear i_update_core after they set it we |
251 | * are guaranteed to see their updates to the timestamps | 251 | * are guaranteed to see their updates to the timestamps |
252 | * either here. Likewise, if they set it after we clear it | 252 | * either here. Likewise, if they set it after we clear it |
253 | * here, we'll see it either on the next commit of this | 253 | * here, we'll see it either on the next commit of this |
254 | * inode or the next time the inode gets flushed via | 254 | * inode or the next time the inode gets flushed via |
255 | * xfs_iflush(). This depends on strongly ordered memory | 255 | * xfs_iflush(). This depends on strongly ordered memory |
256 | * semantics, but we have that. We use the SYNCHRONIZE | 256 | * semantics, but we have that. We use the SYNCHRONIZE |
257 | * macro to make sure that the compiler does not reorder | 257 | * macro to make sure that the compiler does not reorder |
258 | * the i_update_core access below the data copy below. | 258 | * the i_update_core access below the data copy below. |
259 | */ | 259 | */ |
260 | if (ip->i_update_core) { | 260 | if (ip->i_update_core) { |
261 | ip->i_update_core = 0; | 261 | ip->i_update_core = 0; |
262 | SYNCHRONIZE(); | 262 | SYNCHRONIZE(); |
263 | } | 263 | } |
264 | 264 | ||
265 | /* | 265 | /* |
266 | * We don't have to worry about re-ordering here because | 266 | * We don't have to worry about re-ordering here because |
267 | * the update_size field is protected by the inode lock | 267 | * the update_size field is protected by the inode lock |
268 | * and we have that held in exclusive mode. | 268 | * and we have that held in exclusive mode. |
269 | */ | 269 | */ |
270 | if (ip->i_update_size) | 270 | if (ip->i_update_size) |
271 | ip->i_update_size = 0; | 271 | ip->i_update_size = 0; |
272 | 272 | ||
273 | /* | 273 | /* |
274 | * Make sure to get the latest atime from the Linux inode. | 274 | * Make sure to get the latest atime from the Linux inode. |
275 | */ | 275 | */ |
276 | xfs_synchronize_atime(ip); | 276 | xfs_synchronize_atime(ip); |
277 | 277 | ||
278 | /* | 278 | /* |
279 | * make sure the linux inode is dirty | 279 | * make sure the linux inode is dirty |
280 | */ | 280 | */ |
281 | xfs_mark_inode_dirty_sync(ip); | 281 | xfs_mark_inode_dirty_sync(ip); |
282 | 282 | ||
283 | vecp->i_addr = (xfs_caddr_t)&ip->i_d; | 283 | vecp->i_addr = (xfs_caddr_t)&ip->i_d; |
284 | vecp->i_len = sizeof(struct xfs_icdinode); | 284 | vecp->i_len = sizeof(struct xfs_icdinode); |
285 | XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_ICORE); | 285 | XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_ICORE); |
286 | vecp++; | 286 | vecp++; |
287 | nvecs++; | 287 | nvecs++; |
288 | iip->ili_format.ilf_fields |= XFS_ILOG_CORE; | 288 | iip->ili_format.ilf_fields |= XFS_ILOG_CORE; |
289 | 289 | ||
290 | /* | 290 | /* |
291 | * If this is really an old format inode, then we need to | 291 | * If this is really an old format inode, then we need to |
292 | * log it as such. This means that we have to copy the link | 292 | * log it as such. This means that we have to copy the link |
293 | * count from the new field to the old. We don't have to worry | 293 | * count from the new field to the old. We don't have to worry |
294 | * about the new fields, because nothing trusts them as long as | 294 | * about the new fields, because nothing trusts them as long as |
295 | * the old inode version number is there. If the superblock already | 295 | * the old inode version number is there. If the superblock already |
296 | * has a new version number, then we don't bother converting back. | 296 | * has a new version number, then we don't bother converting back. |
297 | */ | 297 | */ |
298 | mp = ip->i_mount; | 298 | mp = ip->i_mount; |
299 | ASSERT(ip->i_d.di_version == 1 || xfs_sb_version_hasnlink(&mp->m_sb)); | 299 | ASSERT(ip->i_d.di_version == 1 || xfs_sb_version_hasnlink(&mp->m_sb)); |
300 | if (ip->i_d.di_version == 1) { | 300 | if (ip->i_d.di_version == 1) { |
301 | if (!xfs_sb_version_hasnlink(&mp->m_sb)) { | 301 | if (!xfs_sb_version_hasnlink(&mp->m_sb)) { |
302 | /* | 302 | /* |
303 | * Convert it back. | 303 | * Convert it back. |
304 | */ | 304 | */ |
305 | ASSERT(ip->i_d.di_nlink <= XFS_MAXLINK_1); | 305 | ASSERT(ip->i_d.di_nlink <= XFS_MAXLINK_1); |
306 | ip->i_d.di_onlink = ip->i_d.di_nlink; | 306 | ip->i_d.di_onlink = ip->i_d.di_nlink; |
307 | } else { | 307 | } else { |
308 | /* | 308 | /* |
309 | * The superblock version has already been bumped, | 309 | * The superblock version has already been bumped, |
310 | * so just make the conversion to the new inode | 310 | * so just make the conversion to the new inode |
311 | * format permanent. | 311 | * format permanent. |
312 | */ | 312 | */ |
313 | ip->i_d.di_version = 2; | 313 | ip->i_d.di_version = 2; |
314 | ip->i_d.di_onlink = 0; | 314 | ip->i_d.di_onlink = 0; |
315 | memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad)); | 315 | memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad)); |
316 | } | 316 | } |
317 | } | 317 | } |
318 | 318 | ||
319 | switch (ip->i_d.di_format) { | 319 | switch (ip->i_d.di_format) { |
320 | case XFS_DINODE_FMT_EXTENTS: | 320 | case XFS_DINODE_FMT_EXTENTS: |
321 | ASSERT(!(iip->ili_format.ilf_fields & | 321 | ASSERT(!(iip->ili_format.ilf_fields & |
322 | (XFS_ILOG_DDATA | XFS_ILOG_DBROOT | | 322 | (XFS_ILOG_DDATA | XFS_ILOG_DBROOT | |
323 | XFS_ILOG_DEV | XFS_ILOG_UUID))); | 323 | XFS_ILOG_DEV | XFS_ILOG_UUID))); |
324 | if (iip->ili_format.ilf_fields & XFS_ILOG_DEXT) { | 324 | if (iip->ili_format.ilf_fields & XFS_ILOG_DEXT) { |
325 | ASSERT(ip->i_df.if_bytes > 0); | 325 | ASSERT(ip->i_df.if_bytes > 0); |
326 | ASSERT(ip->i_df.if_u1.if_extents != NULL); | 326 | ASSERT(ip->i_df.if_u1.if_extents != NULL); |
327 | ASSERT(ip->i_d.di_nextents > 0); | 327 | ASSERT(ip->i_d.di_nextents > 0); |
328 | ASSERT(iip->ili_extents_buf == NULL); | 328 | ASSERT(iip->ili_extents_buf == NULL); |
329 | nrecs = ip->i_df.if_bytes / | 329 | nrecs = ip->i_df.if_bytes / |
330 | (uint)sizeof(xfs_bmbt_rec_t); | 330 | (uint)sizeof(xfs_bmbt_rec_t); |
331 | ASSERT(nrecs > 0); | 331 | ASSERT(nrecs > 0); |
332 | #ifdef XFS_NATIVE_HOST | 332 | #ifdef XFS_NATIVE_HOST |
333 | if (nrecs == ip->i_d.di_nextents) { | 333 | if (nrecs == ip->i_d.di_nextents) { |
334 | /* | 334 | /* |
335 | * There are no delayed allocation | 335 | * There are no delayed allocation |
336 | * extents, so just point to the | 336 | * extents, so just point to the |
337 | * real extents array. | 337 | * real extents array. |
338 | */ | 338 | */ |
339 | vecp->i_addr = | 339 | vecp->i_addr = |
340 | (char *)(ip->i_df.if_u1.if_extents); | 340 | (char *)(ip->i_df.if_u1.if_extents); |
341 | vecp->i_len = ip->i_df.if_bytes; | 341 | vecp->i_len = ip->i_df.if_bytes; |
342 | XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_IEXT); | 342 | XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_IEXT); |
343 | } else | 343 | } else |
344 | #endif | 344 | #endif |
345 | { | 345 | { |
346 | /* | 346 | /* |
347 | * There are delayed allocation extents | 347 | * There are delayed allocation extents |
348 | * in the inode, or we need to convert | 348 | * in the inode, or we need to convert |
349 | * the extents to on disk format. | 349 | * the extents to on disk format. |
350 | * Use xfs_iextents_copy() | 350 | * Use xfs_iextents_copy() |
351 | * to copy only the real extents into | 351 | * to copy only the real extents into |
352 | * a separate buffer. We'll free the | 352 | * a separate buffer. We'll free the |
353 | * buffer in the unlock routine. | 353 | * buffer in the unlock routine. |
354 | */ | 354 | */ |
355 | ext_buffer = kmem_alloc(ip->i_df.if_bytes, | 355 | ext_buffer = kmem_alloc(ip->i_df.if_bytes, |
356 | KM_SLEEP); | 356 | KM_SLEEP); |
357 | iip->ili_extents_buf = ext_buffer; | 357 | iip->ili_extents_buf = ext_buffer; |
358 | vecp->i_addr = (xfs_caddr_t)ext_buffer; | 358 | vecp->i_addr = (xfs_caddr_t)ext_buffer; |
359 | vecp->i_len = xfs_iextents_copy(ip, ext_buffer, | 359 | vecp->i_len = xfs_iextents_copy(ip, ext_buffer, |
360 | XFS_DATA_FORK); | 360 | XFS_DATA_FORK); |
361 | XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_IEXT); | 361 | XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_IEXT); |
362 | } | 362 | } |
363 | ASSERT(vecp->i_len <= ip->i_df.if_bytes); | 363 | ASSERT(vecp->i_len <= ip->i_df.if_bytes); |
364 | iip->ili_format.ilf_dsize = vecp->i_len; | 364 | iip->ili_format.ilf_dsize = vecp->i_len; |
365 | vecp++; | 365 | vecp++; |
366 | nvecs++; | 366 | nvecs++; |
367 | } | 367 | } |
368 | break; | 368 | break; |
369 | 369 | ||
370 | case XFS_DINODE_FMT_BTREE: | 370 | case XFS_DINODE_FMT_BTREE: |
371 | ASSERT(!(iip->ili_format.ilf_fields & | 371 | ASSERT(!(iip->ili_format.ilf_fields & |
372 | (XFS_ILOG_DDATA | XFS_ILOG_DEXT | | 372 | (XFS_ILOG_DDATA | XFS_ILOG_DEXT | |
373 | XFS_ILOG_DEV | XFS_ILOG_UUID))); | 373 | XFS_ILOG_DEV | XFS_ILOG_UUID))); |
374 | if (iip->ili_format.ilf_fields & XFS_ILOG_DBROOT) { | 374 | if (iip->ili_format.ilf_fields & XFS_ILOG_DBROOT) { |
375 | ASSERT(ip->i_df.if_broot_bytes > 0); | 375 | ASSERT(ip->i_df.if_broot_bytes > 0); |
376 | ASSERT(ip->i_df.if_broot != NULL); | 376 | ASSERT(ip->i_df.if_broot != NULL); |
377 | vecp->i_addr = (xfs_caddr_t)ip->i_df.if_broot; | 377 | vecp->i_addr = (xfs_caddr_t)ip->i_df.if_broot; |
378 | vecp->i_len = ip->i_df.if_broot_bytes; | 378 | vecp->i_len = ip->i_df.if_broot_bytes; |
379 | XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_IBROOT); | 379 | XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_IBROOT); |
380 | vecp++; | 380 | vecp++; |
381 | nvecs++; | 381 | nvecs++; |
382 | iip->ili_format.ilf_dsize = ip->i_df.if_broot_bytes; | 382 | iip->ili_format.ilf_dsize = ip->i_df.if_broot_bytes; |
383 | } | 383 | } |
384 | break; | 384 | break; |
385 | 385 | ||
386 | case XFS_DINODE_FMT_LOCAL: | 386 | case XFS_DINODE_FMT_LOCAL: |
387 | ASSERT(!(iip->ili_format.ilf_fields & | 387 | ASSERT(!(iip->ili_format.ilf_fields & |
388 | (XFS_ILOG_DBROOT | XFS_ILOG_DEXT | | 388 | (XFS_ILOG_DBROOT | XFS_ILOG_DEXT | |
389 | XFS_ILOG_DEV | XFS_ILOG_UUID))); | 389 | XFS_ILOG_DEV | XFS_ILOG_UUID))); |
390 | if (iip->ili_format.ilf_fields & XFS_ILOG_DDATA) { | 390 | if (iip->ili_format.ilf_fields & XFS_ILOG_DDATA) { |
391 | ASSERT(ip->i_df.if_bytes > 0); | 391 | ASSERT(ip->i_df.if_bytes > 0); |
392 | ASSERT(ip->i_df.if_u1.if_data != NULL); | 392 | ASSERT(ip->i_df.if_u1.if_data != NULL); |
393 | ASSERT(ip->i_d.di_size > 0); | 393 | ASSERT(ip->i_d.di_size > 0); |
394 | 394 | ||
395 | vecp->i_addr = (xfs_caddr_t)ip->i_df.if_u1.if_data; | 395 | vecp->i_addr = (xfs_caddr_t)ip->i_df.if_u1.if_data; |
396 | /* | 396 | /* |
397 | * Round i_bytes up to a word boundary. | 397 | * Round i_bytes up to a word boundary. |
398 | * The underlying memory is guaranteed to | 398 | * The underlying memory is guaranteed to |
399 | * to be there by xfs_idata_realloc(). | 399 | * to be there by xfs_idata_realloc(). |
400 | */ | 400 | */ |
401 | data_bytes = roundup(ip->i_df.if_bytes, 4); | 401 | data_bytes = roundup(ip->i_df.if_bytes, 4); |
402 | ASSERT((ip->i_df.if_real_bytes == 0) || | 402 | ASSERT((ip->i_df.if_real_bytes == 0) || |
403 | (ip->i_df.if_real_bytes == data_bytes)); | 403 | (ip->i_df.if_real_bytes == data_bytes)); |
404 | vecp->i_len = (int)data_bytes; | 404 | vecp->i_len = (int)data_bytes; |
405 | XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_ILOCAL); | 405 | XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_ILOCAL); |
406 | vecp++; | 406 | vecp++; |
407 | nvecs++; | 407 | nvecs++; |
408 | iip->ili_format.ilf_dsize = (unsigned)data_bytes; | 408 | iip->ili_format.ilf_dsize = (unsigned)data_bytes; |
409 | } | 409 | } |
410 | break; | 410 | break; |
411 | 411 | ||
412 | case XFS_DINODE_FMT_DEV: | 412 | case XFS_DINODE_FMT_DEV: |
413 | ASSERT(!(iip->ili_format.ilf_fields & | 413 | ASSERT(!(iip->ili_format.ilf_fields & |
414 | (XFS_ILOG_DBROOT | XFS_ILOG_DEXT | | 414 | (XFS_ILOG_DBROOT | XFS_ILOG_DEXT | |
415 | XFS_ILOG_DDATA | XFS_ILOG_UUID))); | 415 | XFS_ILOG_DDATA | XFS_ILOG_UUID))); |
416 | if (iip->ili_format.ilf_fields & XFS_ILOG_DEV) { | 416 | if (iip->ili_format.ilf_fields & XFS_ILOG_DEV) { |
417 | iip->ili_format.ilf_u.ilfu_rdev = | 417 | iip->ili_format.ilf_u.ilfu_rdev = |
418 | ip->i_df.if_u2.if_rdev; | 418 | ip->i_df.if_u2.if_rdev; |
419 | } | 419 | } |
420 | break; | 420 | break; |
421 | 421 | ||
422 | case XFS_DINODE_FMT_UUID: | 422 | case XFS_DINODE_FMT_UUID: |
423 | ASSERT(!(iip->ili_format.ilf_fields & | 423 | ASSERT(!(iip->ili_format.ilf_fields & |
424 | (XFS_ILOG_DBROOT | XFS_ILOG_DEXT | | 424 | (XFS_ILOG_DBROOT | XFS_ILOG_DEXT | |
425 | XFS_ILOG_DDATA | XFS_ILOG_DEV))); | 425 | XFS_ILOG_DDATA | XFS_ILOG_DEV))); |
426 | if (iip->ili_format.ilf_fields & XFS_ILOG_UUID) { | 426 | if (iip->ili_format.ilf_fields & XFS_ILOG_UUID) { |
427 | iip->ili_format.ilf_u.ilfu_uuid = | 427 | iip->ili_format.ilf_u.ilfu_uuid = |
428 | ip->i_df.if_u2.if_uuid; | 428 | ip->i_df.if_u2.if_uuid; |
429 | } | 429 | } |
430 | break; | 430 | break; |
431 | 431 | ||
432 | default: | 432 | default: |
433 | ASSERT(0); | 433 | ASSERT(0); |
434 | break; | 434 | break; |
435 | } | 435 | } |
436 | 436 | ||
437 | /* | 437 | /* |
438 | * If there are no attributes associated with the file, | 438 | * If there are no attributes associated with the file, |
439 | * then we're done. | 439 | * then we're done. |
440 | * Assert that no attribute-related log flags are set. | 440 | * Assert that no attribute-related log flags are set. |
441 | */ | 441 | */ |
442 | if (!XFS_IFORK_Q(ip)) { | 442 | if (!XFS_IFORK_Q(ip)) { |
443 | ASSERT(nvecs == iip->ili_item.li_desc->lid_size); | 443 | ASSERT(nvecs == iip->ili_item.li_desc->lid_size); |
444 | iip->ili_format.ilf_size = nvecs; | 444 | iip->ili_format.ilf_size = nvecs; |
445 | ASSERT(!(iip->ili_format.ilf_fields & | 445 | ASSERT(!(iip->ili_format.ilf_fields & |
446 | (XFS_ILOG_ADATA | XFS_ILOG_ABROOT | XFS_ILOG_AEXT))); | 446 | (XFS_ILOG_ADATA | XFS_ILOG_ABROOT | XFS_ILOG_AEXT))); |
447 | return; | 447 | return; |
448 | } | 448 | } |
449 | 449 | ||
450 | switch (ip->i_d.di_aformat) { | 450 | switch (ip->i_d.di_aformat) { |
451 | case XFS_DINODE_FMT_EXTENTS: | 451 | case XFS_DINODE_FMT_EXTENTS: |
452 | ASSERT(!(iip->ili_format.ilf_fields & | 452 | ASSERT(!(iip->ili_format.ilf_fields & |
453 | (XFS_ILOG_ADATA | XFS_ILOG_ABROOT))); | 453 | (XFS_ILOG_ADATA | XFS_ILOG_ABROOT))); |
454 | if (iip->ili_format.ilf_fields & XFS_ILOG_AEXT) { | 454 | if (iip->ili_format.ilf_fields & XFS_ILOG_AEXT) { |
455 | ASSERT(ip->i_afp->if_bytes > 0); | 455 | ASSERT(ip->i_afp->if_bytes > 0); |
456 | ASSERT(ip->i_afp->if_u1.if_extents != NULL); | 456 | ASSERT(ip->i_afp->if_u1.if_extents != NULL); |
457 | ASSERT(ip->i_d.di_anextents > 0); | 457 | ASSERT(ip->i_d.di_anextents > 0); |
458 | #ifdef DEBUG | 458 | #ifdef DEBUG |
459 | nrecs = ip->i_afp->if_bytes / | 459 | nrecs = ip->i_afp->if_bytes / |
460 | (uint)sizeof(xfs_bmbt_rec_t); | 460 | (uint)sizeof(xfs_bmbt_rec_t); |
461 | #endif | 461 | #endif |
462 | ASSERT(nrecs > 0); | 462 | ASSERT(nrecs > 0); |
463 | ASSERT(nrecs == ip->i_d.di_anextents); | 463 | ASSERT(nrecs == ip->i_d.di_anextents); |
464 | #ifdef XFS_NATIVE_HOST | 464 | #ifdef XFS_NATIVE_HOST |
465 | /* | 465 | /* |
466 | * There are not delayed allocation extents | 466 | * There are not delayed allocation extents |
467 | * for attributes, so just point at the array. | 467 | * for attributes, so just point at the array. |
468 | */ | 468 | */ |
469 | vecp->i_addr = (char *)(ip->i_afp->if_u1.if_extents); | 469 | vecp->i_addr = (char *)(ip->i_afp->if_u1.if_extents); |
470 | vecp->i_len = ip->i_afp->if_bytes; | 470 | vecp->i_len = ip->i_afp->if_bytes; |
471 | #else | 471 | #else |
472 | ASSERT(iip->ili_aextents_buf == NULL); | 472 | ASSERT(iip->ili_aextents_buf == NULL); |
473 | /* | 473 | /* |
474 | * Need to endian flip before logging | 474 | * Need to endian flip before logging |
475 | */ | 475 | */ |
476 | ext_buffer = kmem_alloc(ip->i_afp->if_bytes, | 476 | ext_buffer = kmem_alloc(ip->i_afp->if_bytes, |
477 | KM_SLEEP); | 477 | KM_SLEEP); |
478 | iip->ili_aextents_buf = ext_buffer; | 478 | iip->ili_aextents_buf = ext_buffer; |
479 | vecp->i_addr = (xfs_caddr_t)ext_buffer; | 479 | vecp->i_addr = (xfs_caddr_t)ext_buffer; |
480 | vecp->i_len = xfs_iextents_copy(ip, ext_buffer, | 480 | vecp->i_len = xfs_iextents_copy(ip, ext_buffer, |
481 | XFS_ATTR_FORK); | 481 | XFS_ATTR_FORK); |
482 | #endif | 482 | #endif |
483 | XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_IATTR_EXT); | 483 | XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_IATTR_EXT); |
484 | iip->ili_format.ilf_asize = vecp->i_len; | 484 | iip->ili_format.ilf_asize = vecp->i_len; |
485 | vecp++; | 485 | vecp++; |
486 | nvecs++; | 486 | nvecs++; |
487 | } | 487 | } |
488 | break; | 488 | break; |
489 | 489 | ||
490 | case XFS_DINODE_FMT_BTREE: | 490 | case XFS_DINODE_FMT_BTREE: |
491 | ASSERT(!(iip->ili_format.ilf_fields & | 491 | ASSERT(!(iip->ili_format.ilf_fields & |
492 | (XFS_ILOG_ADATA | XFS_ILOG_AEXT))); | 492 | (XFS_ILOG_ADATA | XFS_ILOG_AEXT))); |
493 | if (iip->ili_format.ilf_fields & XFS_ILOG_ABROOT) { | 493 | if (iip->ili_format.ilf_fields & XFS_ILOG_ABROOT) { |
494 | ASSERT(ip->i_afp->if_broot_bytes > 0); | 494 | ASSERT(ip->i_afp->if_broot_bytes > 0); |
495 | ASSERT(ip->i_afp->if_broot != NULL); | 495 | ASSERT(ip->i_afp->if_broot != NULL); |
496 | vecp->i_addr = (xfs_caddr_t)ip->i_afp->if_broot; | 496 | vecp->i_addr = (xfs_caddr_t)ip->i_afp->if_broot; |
497 | vecp->i_len = ip->i_afp->if_broot_bytes; | 497 | vecp->i_len = ip->i_afp->if_broot_bytes; |
498 | XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_IATTR_BROOT); | 498 | XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_IATTR_BROOT); |
499 | vecp++; | 499 | vecp++; |
500 | nvecs++; | 500 | nvecs++; |
501 | iip->ili_format.ilf_asize = ip->i_afp->if_broot_bytes; | 501 | iip->ili_format.ilf_asize = ip->i_afp->if_broot_bytes; |
502 | } | 502 | } |
503 | break; | 503 | break; |
504 | 504 | ||
505 | case XFS_DINODE_FMT_LOCAL: | 505 | case XFS_DINODE_FMT_LOCAL: |
506 | ASSERT(!(iip->ili_format.ilf_fields & | 506 | ASSERT(!(iip->ili_format.ilf_fields & |
507 | (XFS_ILOG_ABROOT | XFS_ILOG_AEXT))); | 507 | (XFS_ILOG_ABROOT | XFS_ILOG_AEXT))); |
508 | if (iip->ili_format.ilf_fields & XFS_ILOG_ADATA) { | 508 | if (iip->ili_format.ilf_fields & XFS_ILOG_ADATA) { |
509 | ASSERT(ip->i_afp->if_bytes > 0); | 509 | ASSERT(ip->i_afp->if_bytes > 0); |
510 | ASSERT(ip->i_afp->if_u1.if_data != NULL); | 510 | ASSERT(ip->i_afp->if_u1.if_data != NULL); |
511 | 511 | ||
512 | vecp->i_addr = (xfs_caddr_t)ip->i_afp->if_u1.if_data; | 512 | vecp->i_addr = (xfs_caddr_t)ip->i_afp->if_u1.if_data; |
513 | /* | 513 | /* |
514 | * Round i_bytes up to a word boundary. | 514 | * Round i_bytes up to a word boundary. |
515 | * The underlying memory is guaranteed to | 515 | * The underlying memory is guaranteed to |
516 | * to be there by xfs_idata_realloc(). | 516 | * to be there by xfs_idata_realloc(). |
517 | */ | 517 | */ |
518 | data_bytes = roundup(ip->i_afp->if_bytes, 4); | 518 | data_bytes = roundup(ip->i_afp->if_bytes, 4); |
519 | ASSERT((ip->i_afp->if_real_bytes == 0) || | 519 | ASSERT((ip->i_afp->if_real_bytes == 0) || |
520 | (ip->i_afp->if_real_bytes == data_bytes)); | 520 | (ip->i_afp->if_real_bytes == data_bytes)); |
521 | vecp->i_len = (int)data_bytes; | 521 | vecp->i_len = (int)data_bytes; |
522 | XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_IATTR_LOCAL); | 522 | XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_IATTR_LOCAL); |
523 | vecp++; | 523 | vecp++; |
524 | nvecs++; | 524 | nvecs++; |
525 | iip->ili_format.ilf_asize = (unsigned)data_bytes; | 525 | iip->ili_format.ilf_asize = (unsigned)data_bytes; |
526 | } | 526 | } |
527 | break; | 527 | break; |
528 | 528 | ||
529 | default: | 529 | default: |
530 | ASSERT(0); | 530 | ASSERT(0); |
531 | break; | 531 | break; |
532 | } | 532 | } |
533 | 533 | ||
534 | ASSERT(nvecs == iip->ili_item.li_desc->lid_size); | 534 | ASSERT(nvecs == iip->ili_item.li_desc->lid_size); |
535 | iip->ili_format.ilf_size = nvecs; | 535 | iip->ili_format.ilf_size = nvecs; |
536 | } | 536 | } |
537 | 537 | ||
538 | 538 | ||
539 | /* | 539 | /* |
540 | * This is called to pin the inode associated with the inode log | 540 | * This is called to pin the inode associated with the inode log |
541 | * item in memory so it cannot be written out. Do this by calling | 541 | * item in memory so it cannot be written out. Do this by calling |
542 | * xfs_ipin() to bump the pin count in the inode while holding the | 542 | * xfs_ipin() to bump the pin count in the inode while holding the |
543 | * inode pin lock. | 543 | * inode pin lock. |
544 | */ | 544 | */ |
545 | STATIC void | 545 | STATIC void |
546 | xfs_inode_item_pin( | 546 | xfs_inode_item_pin( |
547 | xfs_inode_log_item_t *iip) | 547 | xfs_inode_log_item_t *iip) |
548 | { | 548 | { |
549 | ASSERT(xfs_isilocked(iip->ili_inode, XFS_ILOCK_EXCL)); | 549 | ASSERT(xfs_isilocked(iip->ili_inode, XFS_ILOCK_EXCL)); |
550 | xfs_ipin(iip->ili_inode); | 550 | xfs_ipin(iip->ili_inode); |
551 | } | 551 | } |
552 | 552 | ||
553 | 553 | ||
554 | /* | 554 | /* |
555 | * This is called to unpin the inode associated with the inode log | 555 | * This is called to unpin the inode associated with the inode log |
556 | * item which was previously pinned with a call to xfs_inode_item_pin(). | 556 | * item which was previously pinned with a call to xfs_inode_item_pin(). |
557 | * Just call xfs_iunpin() on the inode to do this. | 557 | * Just call xfs_iunpin() on the inode to do this. |
558 | */ | 558 | */ |
559 | /* ARGSUSED */ | 559 | /* ARGSUSED */ |
560 | STATIC void | 560 | STATIC void |
561 | xfs_inode_item_unpin( | 561 | xfs_inode_item_unpin( |
562 | xfs_inode_log_item_t *iip, | 562 | xfs_inode_log_item_t *iip, |
563 | int stale) | 563 | int stale) |
564 | { | 564 | { |
565 | xfs_iunpin(iip->ili_inode); | 565 | xfs_iunpin(iip->ili_inode); |
566 | } | 566 | } |
567 | 567 | ||
568 | /* ARGSUSED */ | 568 | /* ARGSUSED */ |
569 | STATIC void | 569 | STATIC void |
570 | xfs_inode_item_unpin_remove( | 570 | xfs_inode_item_unpin_remove( |
571 | xfs_inode_log_item_t *iip, | 571 | xfs_inode_log_item_t *iip, |
572 | xfs_trans_t *tp) | 572 | xfs_trans_t *tp) |
573 | { | 573 | { |
574 | xfs_iunpin(iip->ili_inode); | 574 | xfs_iunpin(iip->ili_inode); |
575 | } | 575 | } |
576 | 576 | ||
577 | /* | 577 | /* |
578 | * This is called to attempt to lock the inode associated with this | 578 | * This is called to attempt to lock the inode associated with this |
579 | * inode log item, in preparation for the push routine which does the actual | 579 | * inode log item, in preparation for the push routine which does the actual |
580 | * iflush. Don't sleep on the inode lock or the flush lock. | 580 | * iflush. Don't sleep on the inode lock or the flush lock. |
581 | * | 581 | * |
582 | * If the flush lock is already held, indicating that the inode has | 582 | * If the flush lock is already held, indicating that the inode has |
583 | * been or is in the process of being flushed, then (ideally) we'd like to | 583 | * been or is in the process of being flushed, then (ideally) we'd like to |
584 | * see if the inode's buffer is still incore, and if so give it a nudge. | 584 | * see if the inode's buffer is still incore, and if so give it a nudge. |
585 | * We delay doing so until the pushbuf routine, though, to avoid holding | 585 | * We delay doing so until the pushbuf routine, though, to avoid holding |
586 | * the AIL lock across a call to the blackhole which is the buffer cache. | 586 | * the AIL lock across a call to the blackhole which is the buffer cache. |
587 | * Also we don't want to sleep in any device strategy routines, which can happen | 587 | * Also we don't want to sleep in any device strategy routines, which can happen |
588 | * if we do the subsequent bawrite in here. | 588 | * if we do the subsequent bawrite in here. |
589 | */ | 589 | */ |
590 | STATIC uint | 590 | STATIC uint |
591 | xfs_inode_item_trylock( | 591 | xfs_inode_item_trylock( |
592 | xfs_inode_log_item_t *iip) | 592 | xfs_inode_log_item_t *iip) |
593 | { | 593 | { |
594 | register xfs_inode_t *ip; | 594 | register xfs_inode_t *ip; |
595 | 595 | ||
596 | ip = iip->ili_inode; | 596 | ip = iip->ili_inode; |
597 | 597 | ||
598 | if (xfs_ipincount(ip) > 0) { | 598 | if (xfs_ipincount(ip) > 0) { |
599 | return XFS_ITEM_PINNED; | 599 | return XFS_ITEM_PINNED; |
600 | } | 600 | } |
601 | 601 | ||
602 | if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED)) { | 602 | if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED)) { |
603 | return XFS_ITEM_LOCKED; | 603 | return XFS_ITEM_LOCKED; |
604 | } | 604 | } |
605 | 605 | ||
606 | if (!xfs_iflock_nowait(ip)) { | 606 | if (!xfs_iflock_nowait(ip)) { |
607 | /* | 607 | /* |
608 | * If someone else isn't already trying to push the inode | 608 | * If someone else isn't already trying to push the inode |
609 | * buffer, we get to do it. | 609 | * buffer, we get to do it. |
610 | */ | 610 | */ |
611 | if (iip->ili_pushbuf_flag == 0) { | 611 | if (iip->ili_pushbuf_flag == 0) { |
612 | iip->ili_pushbuf_flag = 1; | 612 | iip->ili_pushbuf_flag = 1; |
613 | #ifdef DEBUG | 613 | #ifdef DEBUG |
614 | iip->ili_push_owner = current_pid(); | 614 | iip->ili_push_owner = current_pid(); |
615 | #endif | 615 | #endif |
616 | /* | 616 | /* |
617 | * Inode is left locked in shared mode. | 617 | * Inode is left locked in shared mode. |
618 | * Pushbuf routine gets to unlock it. | 618 | * Pushbuf routine gets to unlock it. |
619 | */ | 619 | */ |
620 | return XFS_ITEM_PUSHBUF; | 620 | return XFS_ITEM_PUSHBUF; |
621 | } else { | 621 | } else { |
622 | /* | 622 | /* |
623 | * We hold the AIL lock, so we must specify the | 623 | * We hold the AIL lock, so we must specify the |
624 | * NONOTIFY flag so that we won't double trip. | 624 | * NONOTIFY flag so that we won't double trip. |
625 | */ | 625 | */ |
626 | xfs_iunlock(ip, XFS_ILOCK_SHARED|XFS_IUNLOCK_NONOTIFY); | 626 | xfs_iunlock(ip, XFS_ILOCK_SHARED|XFS_IUNLOCK_NONOTIFY); |
627 | return XFS_ITEM_FLUSHING; | 627 | return XFS_ITEM_FLUSHING; |
628 | } | 628 | } |
629 | /* NOTREACHED */ | 629 | /* NOTREACHED */ |
630 | } | 630 | } |
631 | 631 | ||
632 | /* Stale items should force out the iclog */ | 632 | /* Stale items should force out the iclog */ |
633 | if (ip->i_flags & XFS_ISTALE) { | 633 | if (ip->i_flags & XFS_ISTALE) { |
634 | xfs_ifunlock(ip); | 634 | xfs_ifunlock(ip); |
635 | xfs_iunlock(ip, XFS_ILOCK_SHARED|XFS_IUNLOCK_NONOTIFY); | 635 | xfs_iunlock(ip, XFS_ILOCK_SHARED|XFS_IUNLOCK_NONOTIFY); |
636 | return XFS_ITEM_PINNED; | 636 | return XFS_ITEM_PINNED; |
637 | } | 637 | } |
638 | 638 | ||
639 | #ifdef DEBUG | 639 | #ifdef DEBUG |
640 | if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) { | 640 | if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) { |
641 | ASSERT(iip->ili_format.ilf_fields != 0); | 641 | ASSERT(iip->ili_format.ilf_fields != 0); |
642 | ASSERT(iip->ili_logged == 0); | 642 | ASSERT(iip->ili_logged == 0); |
643 | ASSERT(iip->ili_item.li_flags & XFS_LI_IN_AIL); | 643 | ASSERT(iip->ili_item.li_flags & XFS_LI_IN_AIL); |
644 | } | 644 | } |
645 | #endif | 645 | #endif |
646 | return XFS_ITEM_SUCCESS; | 646 | return XFS_ITEM_SUCCESS; |
647 | } | 647 | } |
648 | 648 | ||
649 | /* | 649 | /* |
650 | * Unlock the inode associated with the inode log item. | 650 | * Unlock the inode associated with the inode log item. |
651 | * Clear the fields of the inode and inode log item that | 651 | * Clear the fields of the inode and inode log item that |
652 | * are specific to the current transaction. If the | 652 | * are specific to the current transaction. If the |
653 | * hold flags is set, do not unlock the inode. | 653 | * hold flags is set, do not unlock the inode. |
654 | */ | 654 | */ |
655 | STATIC void | 655 | STATIC void |
656 | xfs_inode_item_unlock( | 656 | xfs_inode_item_unlock( |
657 | xfs_inode_log_item_t *iip) | 657 | xfs_inode_log_item_t *iip) |
658 | { | 658 | { |
659 | uint hold; | 659 | uint hold; |
660 | uint iolocked; | 660 | uint iolocked; |
661 | uint lock_flags; | 661 | uint lock_flags; |
662 | xfs_inode_t *ip; | 662 | xfs_inode_t *ip; |
663 | 663 | ||
664 | ASSERT(iip != NULL); | 664 | ASSERT(iip != NULL); |
665 | ASSERT(iip->ili_inode->i_itemp != NULL); | 665 | ASSERT(iip->ili_inode->i_itemp != NULL); |
666 | ASSERT(xfs_isilocked(iip->ili_inode, XFS_ILOCK_EXCL)); | 666 | ASSERT(xfs_isilocked(iip->ili_inode, XFS_ILOCK_EXCL)); |
667 | ASSERT((!(iip->ili_inode->i_itemp->ili_flags & | 667 | ASSERT((!(iip->ili_inode->i_itemp->ili_flags & |
668 | XFS_ILI_IOLOCKED_EXCL)) || | 668 | XFS_ILI_IOLOCKED_EXCL)) || |
669 | xfs_isilocked(iip->ili_inode, XFS_IOLOCK_EXCL)); | 669 | xfs_isilocked(iip->ili_inode, XFS_IOLOCK_EXCL)); |
670 | ASSERT((!(iip->ili_inode->i_itemp->ili_flags & | 670 | ASSERT((!(iip->ili_inode->i_itemp->ili_flags & |
671 | XFS_ILI_IOLOCKED_SHARED)) || | 671 | XFS_ILI_IOLOCKED_SHARED)) || |
672 | xfs_isilocked(iip->ili_inode, XFS_IOLOCK_SHARED)); | 672 | xfs_isilocked(iip->ili_inode, XFS_IOLOCK_SHARED)); |
673 | /* | 673 | /* |
674 | * Clear the transaction pointer in the inode. | 674 | * Clear the transaction pointer in the inode. |
675 | */ | 675 | */ |
676 | ip = iip->ili_inode; | 676 | ip = iip->ili_inode; |
677 | ip->i_transp = NULL; | 677 | ip->i_transp = NULL; |
678 | 678 | ||
679 | /* | 679 | /* |
680 | * If the inode needed a separate buffer with which to log | 680 | * If the inode needed a separate buffer with which to log |
681 | * its extents, then free it now. | 681 | * its extents, then free it now. |
682 | */ | 682 | */ |
683 | if (iip->ili_extents_buf != NULL) { | 683 | if (iip->ili_extents_buf != NULL) { |
684 | ASSERT(ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS); | 684 | ASSERT(ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS); |
685 | ASSERT(ip->i_d.di_nextents > 0); | 685 | ASSERT(ip->i_d.di_nextents > 0); |
686 | ASSERT(iip->ili_format.ilf_fields & XFS_ILOG_DEXT); | 686 | ASSERT(iip->ili_format.ilf_fields & XFS_ILOG_DEXT); |
687 | ASSERT(ip->i_df.if_bytes > 0); | 687 | ASSERT(ip->i_df.if_bytes > 0); |
688 | kmem_free(iip->ili_extents_buf); | 688 | kmem_free(iip->ili_extents_buf); |
689 | iip->ili_extents_buf = NULL; | 689 | iip->ili_extents_buf = NULL; |
690 | } | 690 | } |
691 | if (iip->ili_aextents_buf != NULL) { | 691 | if (iip->ili_aextents_buf != NULL) { |
692 | ASSERT(ip->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS); | 692 | ASSERT(ip->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS); |
693 | ASSERT(ip->i_d.di_anextents > 0); | 693 | ASSERT(ip->i_d.di_anextents > 0); |
694 | ASSERT(iip->ili_format.ilf_fields & XFS_ILOG_AEXT); | 694 | ASSERT(iip->ili_format.ilf_fields & XFS_ILOG_AEXT); |
695 | ASSERT(ip->i_afp->if_bytes > 0); | 695 | ASSERT(ip->i_afp->if_bytes > 0); |
696 | kmem_free(iip->ili_aextents_buf); | 696 | kmem_free(iip->ili_aextents_buf); |
697 | iip->ili_aextents_buf = NULL; | 697 | iip->ili_aextents_buf = NULL; |
698 | } | 698 | } |
699 | 699 | ||
700 | /* | 700 | /* |
701 | * Figure out if we should unlock the inode or not. | 701 | * Figure out if we should unlock the inode or not. |
702 | */ | 702 | */ |
703 | hold = iip->ili_flags & XFS_ILI_HOLD; | 703 | hold = iip->ili_flags & XFS_ILI_HOLD; |
704 | 704 | ||
705 | /* | 705 | /* |
706 | * Before clearing out the flags, remember whether we | 706 | * Before clearing out the flags, remember whether we |
707 | * are holding the inode's IO lock. | 707 | * are holding the inode's IO lock. |
708 | */ | 708 | */ |
709 | iolocked = iip->ili_flags & XFS_ILI_IOLOCKED_ANY; | 709 | iolocked = iip->ili_flags & XFS_ILI_IOLOCKED_ANY; |
710 | 710 | ||
711 | /* | 711 | /* |
712 | * Clear out the fields of the inode log item particular | 712 | * Clear out the fields of the inode log item particular |
713 | * to the current transaction. | 713 | * to the current transaction. |
714 | */ | 714 | */ |
715 | iip->ili_ilock_recur = 0; | 715 | iip->ili_ilock_recur = 0; |
716 | iip->ili_iolock_recur = 0; | 716 | iip->ili_iolock_recur = 0; |
717 | iip->ili_flags = 0; | 717 | iip->ili_flags = 0; |
718 | 718 | ||
719 | /* | 719 | /* |
720 | * Unlock the inode if XFS_ILI_HOLD was not set. | 720 | * Unlock the inode if XFS_ILI_HOLD was not set. |
721 | */ | 721 | */ |
722 | if (!hold) { | 722 | if (!hold) { |
723 | lock_flags = XFS_ILOCK_EXCL; | 723 | lock_flags = XFS_ILOCK_EXCL; |
724 | if (iolocked & XFS_ILI_IOLOCKED_EXCL) { | 724 | if (iolocked & XFS_ILI_IOLOCKED_EXCL) { |
725 | lock_flags |= XFS_IOLOCK_EXCL; | 725 | lock_flags |= XFS_IOLOCK_EXCL; |
726 | } else if (iolocked & XFS_ILI_IOLOCKED_SHARED) { | 726 | } else if (iolocked & XFS_ILI_IOLOCKED_SHARED) { |
727 | lock_flags |= XFS_IOLOCK_SHARED; | 727 | lock_flags |= XFS_IOLOCK_SHARED; |
728 | } | 728 | } |
729 | xfs_iput(iip->ili_inode, lock_flags); | 729 | xfs_iput(iip->ili_inode, lock_flags); |
730 | } | 730 | } |
731 | } | 731 | } |
732 | 732 | ||
733 | /* | 733 | /* |
734 | * This is called to find out where the oldest active copy of the | 734 | * This is called to find out where the oldest active copy of the |
735 | * inode log item in the on disk log resides now that the last log | 735 | * inode log item in the on disk log resides now that the last log |
736 | * write of it completed at the given lsn. Since we always re-log | 736 | * write of it completed at the given lsn. Since we always re-log |
737 | * all dirty data in an inode, the latest copy in the on disk log | 737 | * all dirty data in an inode, the latest copy in the on disk log |
738 | * is the only one that matters. Therefore, simply return the | 738 | * is the only one that matters. Therefore, simply return the |
739 | * given lsn. | 739 | * given lsn. |
740 | */ | 740 | */ |
741 | /*ARGSUSED*/ | 741 | /*ARGSUSED*/ |
742 | STATIC xfs_lsn_t | 742 | STATIC xfs_lsn_t |
743 | xfs_inode_item_committed( | 743 | xfs_inode_item_committed( |
744 | xfs_inode_log_item_t *iip, | 744 | xfs_inode_log_item_t *iip, |
745 | xfs_lsn_t lsn) | 745 | xfs_lsn_t lsn) |
746 | { | 746 | { |
747 | return (lsn); | 747 | return (lsn); |
748 | } | 748 | } |
749 | 749 | ||
750 | /* | 750 | /* |
751 | * This gets called by xfs_trans_push_ail(), when IOP_TRYLOCK | 751 | * This gets called by xfs_trans_push_ail(), when IOP_TRYLOCK |
752 | * failed to get the inode flush lock but did get the inode locked SHARED. | 752 | * failed to get the inode flush lock but did get the inode locked SHARED. |
753 | * Here we're trying to see if the inode buffer is incore, and if so whether it's | 753 | * Here we're trying to see if the inode buffer is incore, and if so whether it's |
754 | * marked delayed write. If that's the case, we'll initiate a bawrite on that | 754 | * marked delayed write. If that's the case, we'll initiate a bawrite on that |
755 | * buffer to expedite the process. | 755 | * buffer to expedite the process. |
756 | * | 756 | * |
757 | * We aren't holding the AIL lock (or the flush lock) when this gets called, | 757 | * We aren't holding the AIL lock (or the flush lock) when this gets called, |
758 | * so it is inherently race-y. | 758 | * so it is inherently race-y. |
759 | */ | 759 | */ |
760 | STATIC void | 760 | STATIC void |
761 | xfs_inode_item_pushbuf( | 761 | xfs_inode_item_pushbuf( |
762 | xfs_inode_log_item_t *iip) | 762 | xfs_inode_log_item_t *iip) |
763 | { | 763 | { |
764 | xfs_inode_t *ip; | 764 | xfs_inode_t *ip; |
765 | xfs_mount_t *mp; | 765 | xfs_mount_t *mp; |
766 | xfs_buf_t *bp; | 766 | xfs_buf_t *bp; |
767 | uint dopush; | 767 | uint dopush; |
768 | 768 | ||
769 | ip = iip->ili_inode; | 769 | ip = iip->ili_inode; |
770 | 770 | ||
771 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED)); | 771 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED)); |
772 | 772 | ||
773 | /* | 773 | /* |
774 | * The ili_pushbuf_flag keeps others from | 774 | * The ili_pushbuf_flag keeps others from |
775 | * trying to duplicate our effort. | 775 | * trying to duplicate our effort. |
776 | */ | 776 | */ |
777 | ASSERT(iip->ili_pushbuf_flag != 0); | 777 | ASSERT(iip->ili_pushbuf_flag != 0); |
778 | ASSERT(iip->ili_push_owner == current_pid()); | 778 | ASSERT(iip->ili_push_owner == current_pid()); |
779 | 779 | ||
780 | /* | 780 | /* |
781 | * If a flush is not in progress anymore, chances are that the | 781 | * If a flush is not in progress anymore, chances are that the |
782 | * inode was taken off the AIL. So, just get out. | 782 | * inode was taken off the AIL. So, just get out. |
783 | */ | 783 | */ |
784 | if (completion_done(&ip->i_flush) || | 784 | if (completion_done(&ip->i_flush) || |
785 | ((iip->ili_item.li_flags & XFS_LI_IN_AIL) == 0)) { | 785 | ((iip->ili_item.li_flags & XFS_LI_IN_AIL) == 0)) { |
786 | iip->ili_pushbuf_flag = 0; | 786 | iip->ili_pushbuf_flag = 0; |
787 | xfs_iunlock(ip, XFS_ILOCK_SHARED); | 787 | xfs_iunlock(ip, XFS_ILOCK_SHARED); |
788 | return; | 788 | return; |
789 | } | 789 | } |
790 | 790 | ||
791 | mp = ip->i_mount; | 791 | mp = ip->i_mount; |
792 | bp = xfs_incore(mp->m_ddev_targp, iip->ili_format.ilf_blkno, | 792 | bp = xfs_incore(mp->m_ddev_targp, iip->ili_format.ilf_blkno, |
793 | iip->ili_format.ilf_len, XFS_INCORE_TRYLOCK); | 793 | iip->ili_format.ilf_len, XFS_INCORE_TRYLOCK); |
794 | 794 | ||
795 | if (bp != NULL) { | 795 | if (bp != NULL) { |
796 | if (XFS_BUF_ISDELAYWRITE(bp)) { | 796 | if (XFS_BUF_ISDELAYWRITE(bp)) { |
797 | /* | 797 | /* |
798 | * We were racing with iflush because we don't hold | 798 | * We were racing with iflush because we don't hold |
799 | * the AIL lock or the flush lock. However, at this point, | 799 | * the AIL lock or the flush lock. However, at this point, |
800 | * we have the buffer, and we know that it's dirty. | 800 | * we have the buffer, and we know that it's dirty. |
801 | * So, it's possible that iflush raced with us, and | 801 | * So, it's possible that iflush raced with us, and |
802 | * this item is already taken off the AIL. | 802 | * this item is already taken off the AIL. |
803 | * If not, we can flush it async. | 803 | * If not, we can flush it async. |
804 | */ | 804 | */ |
805 | dopush = ((iip->ili_item.li_flags & XFS_LI_IN_AIL) && | 805 | dopush = ((iip->ili_item.li_flags & XFS_LI_IN_AIL) && |
806 | !completion_done(&ip->i_flush)); | 806 | !completion_done(&ip->i_flush)); |
807 | iip->ili_pushbuf_flag = 0; | 807 | iip->ili_pushbuf_flag = 0; |
808 | xfs_iunlock(ip, XFS_ILOCK_SHARED); | 808 | xfs_iunlock(ip, XFS_ILOCK_SHARED); |
809 | xfs_buftrace("INODE ITEM PUSH", bp); | 809 | xfs_buftrace("INODE ITEM PUSH", bp); |
810 | if (XFS_BUF_ISPINNED(bp)) { | 810 | if (XFS_BUF_ISPINNED(bp)) { |
811 | xfs_log_force(mp, (xfs_lsn_t)0, | 811 | xfs_log_force(mp, (xfs_lsn_t)0, |
812 | XFS_LOG_FORCE); | 812 | XFS_LOG_FORCE); |
813 | } | 813 | } |
814 | if (dopush) { | 814 | if (dopush) { |
815 | int error; | 815 | int error; |
816 | error = xfs_bawrite(mp, bp); | 816 | error = xfs_bawrite(mp, bp); |
817 | if (error) | 817 | if (error) |
818 | xfs_fs_cmn_err(CE_WARN, mp, | 818 | xfs_fs_cmn_err(CE_WARN, mp, |
819 | "xfs_inode_item_pushbuf: pushbuf error %d on iip %p, bp %p", | 819 | "xfs_inode_item_pushbuf: pushbuf error %d on iip %p, bp %p", |
820 | error, iip, bp); | 820 | error, iip, bp); |
821 | } else { | 821 | } else { |
822 | xfs_buf_relse(bp); | 822 | xfs_buf_relse(bp); |
823 | } | 823 | } |
824 | } else { | 824 | } else { |
825 | iip->ili_pushbuf_flag = 0; | 825 | iip->ili_pushbuf_flag = 0; |
826 | xfs_iunlock(ip, XFS_ILOCK_SHARED); | 826 | xfs_iunlock(ip, XFS_ILOCK_SHARED); |
827 | xfs_buf_relse(bp); | 827 | xfs_buf_relse(bp); |
828 | } | 828 | } |
829 | return; | 829 | return; |
830 | } | 830 | } |
831 | /* | 831 | /* |
832 | * We have to be careful about resetting pushbuf flag too early (above). | 832 | * We have to be careful about resetting pushbuf flag too early (above). |
833 | * Even though in theory we can do it as soon as we have the buflock, | 833 | * Even though in theory we can do it as soon as we have the buflock, |
834 | * we don't want others to be doing work needlessly. They'll come to | 834 | * we don't want others to be doing work needlessly. They'll come to |
835 | * this function thinking that pushing the buffer is their | 835 | * this function thinking that pushing the buffer is their |
836 | * responsibility only to find that the buffer is still locked by | 836 | * responsibility only to find that the buffer is still locked by |
837 | * another doing the same thing | 837 | * another doing the same thing |
838 | */ | 838 | */ |
839 | iip->ili_pushbuf_flag = 0; | 839 | iip->ili_pushbuf_flag = 0; |
840 | xfs_iunlock(ip, XFS_ILOCK_SHARED); | 840 | xfs_iunlock(ip, XFS_ILOCK_SHARED); |
841 | return; | 841 | return; |
842 | } | 842 | } |
843 | 843 | ||
844 | 844 | ||
845 | /* | 845 | /* |
846 | * This is called to asynchronously write the inode associated with this | 846 | * This is called to asynchronously write the inode associated with this |
847 | * inode log item out to disk. The inode will already have been locked by | 847 | * inode log item out to disk. The inode will already have been locked by |
848 | * a successful call to xfs_inode_item_trylock(). | 848 | * a successful call to xfs_inode_item_trylock(). |
849 | */ | 849 | */ |
850 | STATIC void | 850 | STATIC void |
851 | xfs_inode_item_push( | 851 | xfs_inode_item_push( |
852 | xfs_inode_log_item_t *iip) | 852 | xfs_inode_log_item_t *iip) |
853 | { | 853 | { |
854 | xfs_inode_t *ip; | 854 | xfs_inode_t *ip; |
855 | 855 | ||
856 | ip = iip->ili_inode; | 856 | ip = iip->ili_inode; |
857 | 857 | ||
858 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED)); | 858 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED)); |
859 | ASSERT(!completion_done(&ip->i_flush)); | 859 | ASSERT(!completion_done(&ip->i_flush)); |
860 | /* | 860 | /* |
861 | * Since we were able to lock the inode's flush lock and | 861 | * Since we were able to lock the inode's flush lock and |
862 | * we found it on the AIL, the inode must be dirty. This | 862 | * we found it on the AIL, the inode must be dirty. This |
863 | * is because the inode is removed from the AIL while still | 863 | * is because the inode is removed from the AIL while still |
864 | * holding the flush lock in xfs_iflush_done(). Thus, if | 864 | * holding the flush lock in xfs_iflush_done(). Thus, if |
865 | * we found it in the AIL and were able to obtain the flush | 865 | * we found it in the AIL and were able to obtain the flush |
866 | * lock without sleeping, then there must not have been | 866 | * lock without sleeping, then there must not have been |
867 | * anyone in the process of flushing the inode. | 867 | * anyone in the process of flushing the inode. |
868 | */ | 868 | */ |
869 | ASSERT(XFS_FORCED_SHUTDOWN(ip->i_mount) || | 869 | ASSERT(XFS_FORCED_SHUTDOWN(ip->i_mount) || |
870 | iip->ili_format.ilf_fields != 0); | 870 | iip->ili_format.ilf_fields != 0); |
871 | 871 | ||
872 | /* | 872 | /* |
873 | * Write out the inode. The completion routine ('iflush_done') will | 873 | * Write out the inode. The completion routine ('iflush_done') will |
874 | * pull it from the AIL, mark it clean, unlock the flush lock. | 874 | * pull it from the AIL, mark it clean, unlock the flush lock. |
875 | */ | 875 | */ |
876 | (void) xfs_iflush(ip, XFS_IFLUSH_ASYNC); | 876 | (void) xfs_iflush(ip, XFS_IFLUSH_ASYNC); |
877 | xfs_iunlock(ip, XFS_ILOCK_SHARED); | 877 | xfs_iunlock(ip, XFS_ILOCK_SHARED); |
878 | 878 | ||
879 | return; | 879 | return; |
880 | } | 880 | } |
881 | 881 | ||
882 | /* | 882 | /* |
883 | * XXX rcc - this one really has to do something. Probably needs | 883 | * XXX rcc - this one really has to do something. Probably needs |
884 | * to stamp in a new field in the incore inode. | 884 | * to stamp in a new field in the incore inode. |
885 | */ | 885 | */ |
886 | /* ARGSUSED */ | 886 | /* ARGSUSED */ |
887 | STATIC void | 887 | STATIC void |
888 | xfs_inode_item_committing( | 888 | xfs_inode_item_committing( |
889 | xfs_inode_log_item_t *iip, | 889 | xfs_inode_log_item_t *iip, |
890 | xfs_lsn_t lsn) | 890 | xfs_lsn_t lsn) |
891 | { | 891 | { |
892 | iip->ili_last_lsn = lsn; | 892 | iip->ili_last_lsn = lsn; |
893 | return; | 893 | return; |
894 | } | 894 | } |
895 | 895 | ||
896 | /* | 896 | /* |
897 | * This is the ops vector shared by all buf log items. | 897 | * This is the ops vector shared by all buf log items. |
898 | */ | 898 | */ |
899 | static struct xfs_item_ops xfs_inode_item_ops = { | 899 | static struct xfs_item_ops xfs_inode_item_ops = { |
900 | .iop_size = (uint(*)(xfs_log_item_t*))xfs_inode_item_size, | 900 | .iop_size = (uint(*)(xfs_log_item_t*))xfs_inode_item_size, |
901 | .iop_format = (void(*)(xfs_log_item_t*, xfs_log_iovec_t*)) | 901 | .iop_format = (void(*)(xfs_log_item_t*, xfs_log_iovec_t*)) |
902 | xfs_inode_item_format, | 902 | xfs_inode_item_format, |
903 | .iop_pin = (void(*)(xfs_log_item_t*))xfs_inode_item_pin, | 903 | .iop_pin = (void(*)(xfs_log_item_t*))xfs_inode_item_pin, |
904 | .iop_unpin = (void(*)(xfs_log_item_t*, int))xfs_inode_item_unpin, | 904 | .iop_unpin = (void(*)(xfs_log_item_t*, int))xfs_inode_item_unpin, |
905 | .iop_unpin_remove = (void(*)(xfs_log_item_t*, xfs_trans_t*)) | 905 | .iop_unpin_remove = (void(*)(xfs_log_item_t*, xfs_trans_t*)) |
906 | xfs_inode_item_unpin_remove, | 906 | xfs_inode_item_unpin_remove, |
907 | .iop_trylock = (uint(*)(xfs_log_item_t*))xfs_inode_item_trylock, | 907 | .iop_trylock = (uint(*)(xfs_log_item_t*))xfs_inode_item_trylock, |
908 | .iop_unlock = (void(*)(xfs_log_item_t*))xfs_inode_item_unlock, | 908 | .iop_unlock = (void(*)(xfs_log_item_t*))xfs_inode_item_unlock, |
909 | .iop_committed = (xfs_lsn_t(*)(xfs_log_item_t*, xfs_lsn_t)) | 909 | .iop_committed = (xfs_lsn_t(*)(xfs_log_item_t*, xfs_lsn_t)) |
910 | xfs_inode_item_committed, | 910 | xfs_inode_item_committed, |
911 | .iop_push = (void(*)(xfs_log_item_t*))xfs_inode_item_push, | 911 | .iop_push = (void(*)(xfs_log_item_t*))xfs_inode_item_push, |
912 | .iop_pushbuf = (void(*)(xfs_log_item_t*))xfs_inode_item_pushbuf, | 912 | .iop_pushbuf = (void(*)(xfs_log_item_t*))xfs_inode_item_pushbuf, |
913 | .iop_committing = (void(*)(xfs_log_item_t*, xfs_lsn_t)) | 913 | .iop_committing = (void(*)(xfs_log_item_t*, xfs_lsn_t)) |
914 | xfs_inode_item_committing | 914 | xfs_inode_item_committing |
915 | }; | 915 | }; |
916 | 916 | ||
917 | 917 | ||
918 | /* | 918 | /* |
919 | * Initialize the inode log item for a newly allocated (in-core) inode. | 919 | * Initialize the inode log item for a newly allocated (in-core) inode. |
920 | */ | 920 | */ |
921 | void | 921 | void |
922 | xfs_inode_item_init( | 922 | xfs_inode_item_init( |
923 | xfs_inode_t *ip, | 923 | xfs_inode_t *ip, |
924 | xfs_mount_t *mp) | 924 | xfs_mount_t *mp) |
925 | { | 925 | { |
926 | xfs_inode_log_item_t *iip; | 926 | xfs_inode_log_item_t *iip; |
927 | 927 | ||
928 | ASSERT(ip->i_itemp == NULL); | 928 | ASSERT(ip->i_itemp == NULL); |
929 | iip = ip->i_itemp = kmem_zone_zalloc(xfs_ili_zone, KM_SLEEP); | 929 | iip = ip->i_itemp = kmem_zone_zalloc(xfs_ili_zone, KM_SLEEP); |
930 | 930 | ||
931 | iip->ili_item.li_type = XFS_LI_INODE; | 931 | iip->ili_item.li_type = XFS_LI_INODE; |
932 | iip->ili_item.li_ops = &xfs_inode_item_ops; | 932 | iip->ili_item.li_ops = &xfs_inode_item_ops; |
933 | iip->ili_item.li_mountp = mp; | 933 | iip->ili_item.li_mountp = mp; |
934 | iip->ili_item.li_ailp = mp->m_ail; | 934 | iip->ili_item.li_ailp = mp->m_ail; |
935 | iip->ili_inode = ip; | 935 | iip->ili_inode = ip; |
936 | 936 | ||
937 | /* | 937 | /* |
938 | We have zeroed memory. No need ... | 938 | We have zeroed memory. No need ... |
939 | iip->ili_extents_buf = NULL; | 939 | iip->ili_extents_buf = NULL; |
940 | iip->ili_pushbuf_flag = 0; | 940 | iip->ili_pushbuf_flag = 0; |
941 | */ | 941 | */ |
942 | 942 | ||
943 | iip->ili_format.ilf_type = XFS_LI_INODE; | 943 | iip->ili_format.ilf_type = XFS_LI_INODE; |
944 | iip->ili_format.ilf_ino = ip->i_ino; | 944 | iip->ili_format.ilf_ino = ip->i_ino; |
945 | iip->ili_format.ilf_blkno = ip->i_blkno; | 945 | iip->ili_format.ilf_blkno = ip->i_imap.im_blkno; |
946 | iip->ili_format.ilf_len = ip->i_len; | 946 | iip->ili_format.ilf_len = ip->i_imap.im_len; |
947 | iip->ili_format.ilf_boffset = ip->i_boffset; | 947 | iip->ili_format.ilf_boffset = ip->i_imap.im_boffset; |
948 | } | 948 | } |
949 | 949 | ||
950 | /* | 950 | /* |
951 | * Free the inode log item and any memory hanging off of it. | 951 | * Free the inode log item and any memory hanging off of it. |
952 | */ | 952 | */ |
953 | void | 953 | void |
954 | xfs_inode_item_destroy( | 954 | xfs_inode_item_destroy( |
955 | xfs_inode_t *ip) | 955 | xfs_inode_t *ip) |
956 | { | 956 | { |
957 | #ifdef XFS_TRANS_DEBUG | 957 | #ifdef XFS_TRANS_DEBUG |
958 | if (ip->i_itemp->ili_root_size != 0) { | 958 | if (ip->i_itemp->ili_root_size != 0) { |
959 | kmem_free(ip->i_itemp->ili_orig_root); | 959 | kmem_free(ip->i_itemp->ili_orig_root); |
960 | } | 960 | } |
961 | #endif | 961 | #endif |
962 | kmem_zone_free(xfs_ili_zone, ip->i_itemp); | 962 | kmem_zone_free(xfs_ili_zone, ip->i_itemp); |
963 | } | 963 | } |
964 | 964 | ||
965 | 965 | ||
966 | /* | 966 | /* |
967 | * This is the inode flushing I/O completion routine. It is called | 967 | * This is the inode flushing I/O completion routine. It is called |
968 | * from interrupt level when the buffer containing the inode is | 968 | * from interrupt level when the buffer containing the inode is |
969 | * flushed to disk. It is responsible for removing the inode item | 969 | * flushed to disk. It is responsible for removing the inode item |
970 | * from the AIL if it has not been re-logged, and unlocking the inode's | 970 | * from the AIL if it has not been re-logged, and unlocking the inode's |
971 | * flush lock. | 971 | * flush lock. |
972 | */ | 972 | */ |
973 | /*ARGSUSED*/ | 973 | /*ARGSUSED*/ |
974 | void | 974 | void |
975 | xfs_iflush_done( | 975 | xfs_iflush_done( |
976 | xfs_buf_t *bp, | 976 | xfs_buf_t *bp, |
977 | xfs_inode_log_item_t *iip) | 977 | xfs_inode_log_item_t *iip) |
978 | { | 978 | { |
979 | xfs_inode_t *ip = iip->ili_inode; | 979 | xfs_inode_t *ip = iip->ili_inode; |
980 | struct xfs_ail *ailp = iip->ili_item.li_ailp; | 980 | struct xfs_ail *ailp = iip->ili_item.li_ailp; |
981 | 981 | ||
982 | /* | 982 | /* |
983 | * We only want to pull the item from the AIL if it is | 983 | * We only want to pull the item from the AIL if it is |
984 | * actually there and its location in the log has not | 984 | * actually there and its location in the log has not |
985 | * changed since we started the flush. Thus, we only bother | 985 | * changed since we started the flush. Thus, we only bother |
986 | * if the ili_logged flag is set and the inode's lsn has not | 986 | * if the ili_logged flag is set and the inode's lsn has not |
987 | * changed. First we check the lsn outside | 987 | * changed. First we check the lsn outside |
988 | * the lock since it's cheaper, and then we recheck while | 988 | * the lock since it's cheaper, and then we recheck while |
989 | * holding the lock before removing the inode from the AIL. | 989 | * holding the lock before removing the inode from the AIL. |
990 | */ | 990 | */ |
991 | if (iip->ili_logged && | 991 | if (iip->ili_logged && |
992 | (iip->ili_item.li_lsn == iip->ili_flush_lsn)) { | 992 | (iip->ili_item.li_lsn == iip->ili_flush_lsn)) { |
993 | spin_lock(&ailp->xa_lock); | 993 | spin_lock(&ailp->xa_lock); |
994 | if (iip->ili_item.li_lsn == iip->ili_flush_lsn) { | 994 | if (iip->ili_item.li_lsn == iip->ili_flush_lsn) { |
995 | /* xfs_trans_ail_delete() drops the AIL lock. */ | 995 | /* xfs_trans_ail_delete() drops the AIL lock. */ |
996 | xfs_trans_ail_delete(ailp, (xfs_log_item_t*)iip); | 996 | xfs_trans_ail_delete(ailp, (xfs_log_item_t*)iip); |
997 | } else { | 997 | } else { |
998 | spin_unlock(&ailp->xa_lock); | 998 | spin_unlock(&ailp->xa_lock); |
999 | } | 999 | } |
1000 | } | 1000 | } |
1001 | 1001 | ||
1002 | iip->ili_logged = 0; | 1002 | iip->ili_logged = 0; |
1003 | 1003 | ||
1004 | /* | 1004 | /* |
1005 | * Clear the ili_last_fields bits now that we know that the | 1005 | * Clear the ili_last_fields bits now that we know that the |
1006 | * data corresponding to them is safely on disk. | 1006 | * data corresponding to them is safely on disk. |
1007 | */ | 1007 | */ |
1008 | iip->ili_last_fields = 0; | 1008 | iip->ili_last_fields = 0; |
1009 | 1009 | ||
1010 | /* | 1010 | /* |
1011 | * Release the inode's flush lock since we're done with it. | 1011 | * Release the inode's flush lock since we're done with it. |
1012 | */ | 1012 | */ |
1013 | xfs_ifunlock(ip); | 1013 | xfs_ifunlock(ip); |
1014 | 1014 | ||
1015 | return; | 1015 | return; |
1016 | } | 1016 | } |
1017 | 1017 | ||
1018 | /* | 1018 | /* |
1019 | * This is the inode flushing abort routine. It is called | 1019 | * This is the inode flushing abort routine. It is called |
1020 | * from xfs_iflush when the filesystem is shutting down to clean | 1020 | * from xfs_iflush when the filesystem is shutting down to clean |
1021 | * up the inode state. | 1021 | * up the inode state. |
1022 | * It is responsible for removing the inode item | 1022 | * It is responsible for removing the inode item |
1023 | * from the AIL if it has not been re-logged, and unlocking the inode's | 1023 | * from the AIL if it has not been re-logged, and unlocking the inode's |
1024 | * flush lock. | 1024 | * flush lock. |
1025 | */ | 1025 | */ |
1026 | void | 1026 | void |
1027 | xfs_iflush_abort( | 1027 | xfs_iflush_abort( |
1028 | xfs_inode_t *ip) | 1028 | xfs_inode_t *ip) |
1029 | { | 1029 | { |
1030 | xfs_inode_log_item_t *iip = ip->i_itemp; | 1030 | xfs_inode_log_item_t *iip = ip->i_itemp; |
1031 | xfs_mount_t *mp; | 1031 | xfs_mount_t *mp; |
1032 | 1032 | ||
1033 | iip = ip->i_itemp; | 1033 | iip = ip->i_itemp; |
1034 | mp = ip->i_mount; | 1034 | mp = ip->i_mount; |
1035 | if (iip) { | 1035 | if (iip) { |
1036 | struct xfs_ail *ailp = iip->ili_item.li_ailp; | 1036 | struct xfs_ail *ailp = iip->ili_item.li_ailp; |
1037 | if (iip->ili_item.li_flags & XFS_LI_IN_AIL) { | 1037 | if (iip->ili_item.li_flags & XFS_LI_IN_AIL) { |
1038 | spin_lock(&ailp->xa_lock); | 1038 | spin_lock(&ailp->xa_lock); |
1039 | if (iip->ili_item.li_flags & XFS_LI_IN_AIL) { | 1039 | if (iip->ili_item.li_flags & XFS_LI_IN_AIL) { |
1040 | /* xfs_trans_ail_delete() drops the AIL lock. */ | 1040 | /* xfs_trans_ail_delete() drops the AIL lock. */ |
1041 | xfs_trans_ail_delete(ailp, (xfs_log_item_t *)iip); | 1041 | xfs_trans_ail_delete(ailp, (xfs_log_item_t *)iip); |
1042 | } else | 1042 | } else |
1043 | spin_unlock(&ailp->xa_lock); | 1043 | spin_unlock(&ailp->xa_lock); |
1044 | } | 1044 | } |
1045 | iip->ili_logged = 0; | 1045 | iip->ili_logged = 0; |
1046 | /* | 1046 | /* |
1047 | * Clear the ili_last_fields bits now that we know that the | 1047 | * Clear the ili_last_fields bits now that we know that the |
1048 | * data corresponding to them is safely on disk. | 1048 | * data corresponding to them is safely on disk. |
1049 | */ | 1049 | */ |
1050 | iip->ili_last_fields = 0; | 1050 | iip->ili_last_fields = 0; |
1051 | /* | 1051 | /* |
1052 | * Clear the inode logging fields so no more flushes are | 1052 | * Clear the inode logging fields so no more flushes are |
1053 | * attempted. | 1053 | * attempted. |
1054 | */ | 1054 | */ |
1055 | iip->ili_format.ilf_fields = 0; | 1055 | iip->ili_format.ilf_fields = 0; |
1056 | } | 1056 | } |
1057 | /* | 1057 | /* |
1058 | * Release the inode's flush lock since we're done with it. | 1058 | * Release the inode's flush lock since we're done with it. |
1059 | */ | 1059 | */ |
1060 | xfs_ifunlock(ip); | 1060 | xfs_ifunlock(ip); |
1061 | } | 1061 | } |
1062 | 1062 | ||
1063 | void | 1063 | void |
1064 | xfs_istale_done( | 1064 | xfs_istale_done( |
1065 | xfs_buf_t *bp, | 1065 | xfs_buf_t *bp, |
1066 | xfs_inode_log_item_t *iip) | 1066 | xfs_inode_log_item_t *iip) |
1067 | { | 1067 | { |
1068 | xfs_iflush_abort(iip->ili_inode); | 1068 | xfs_iflush_abort(iip->ili_inode); |
1069 | } | 1069 | } |
1070 | 1070 | ||
1071 | /* | 1071 | /* |
1072 | * convert an xfs_inode_log_format struct from either 32 or 64 bit versions | 1072 | * convert an xfs_inode_log_format struct from either 32 or 64 bit versions |
1073 | * (which can have different field alignments) to the native version | 1073 | * (which can have different field alignments) to the native version |
1074 | */ | 1074 | */ |
1075 | int | 1075 | int |
1076 | xfs_inode_item_format_convert( | 1076 | xfs_inode_item_format_convert( |
1077 | xfs_log_iovec_t *buf, | 1077 | xfs_log_iovec_t *buf, |
1078 | xfs_inode_log_format_t *in_f) | 1078 | xfs_inode_log_format_t *in_f) |
1079 | { | 1079 | { |
1080 | if (buf->i_len == sizeof(xfs_inode_log_format_32_t)) { | 1080 | if (buf->i_len == sizeof(xfs_inode_log_format_32_t)) { |
1081 | xfs_inode_log_format_32_t *in_f32; | 1081 | xfs_inode_log_format_32_t *in_f32; |
1082 | 1082 | ||
1083 | in_f32 = (xfs_inode_log_format_32_t *)buf->i_addr; | 1083 | in_f32 = (xfs_inode_log_format_32_t *)buf->i_addr; |
1084 | in_f->ilf_type = in_f32->ilf_type; | 1084 | in_f->ilf_type = in_f32->ilf_type; |
1085 | in_f->ilf_size = in_f32->ilf_size; | 1085 | in_f->ilf_size = in_f32->ilf_size; |
1086 | in_f->ilf_fields = in_f32->ilf_fields; | 1086 | in_f->ilf_fields = in_f32->ilf_fields; |
1087 | in_f->ilf_asize = in_f32->ilf_asize; | 1087 | in_f->ilf_asize = in_f32->ilf_asize; |
1088 | in_f->ilf_dsize = in_f32->ilf_dsize; | 1088 | in_f->ilf_dsize = in_f32->ilf_dsize; |
1089 | in_f->ilf_ino = in_f32->ilf_ino; | 1089 | in_f->ilf_ino = in_f32->ilf_ino; |
1090 | /* copy biggest field of ilf_u */ | 1090 | /* copy biggest field of ilf_u */ |
1091 | memcpy(in_f->ilf_u.ilfu_uuid.__u_bits, | 1091 | memcpy(in_f->ilf_u.ilfu_uuid.__u_bits, |
1092 | in_f32->ilf_u.ilfu_uuid.__u_bits, | 1092 | in_f32->ilf_u.ilfu_uuid.__u_bits, |
1093 | sizeof(uuid_t)); | 1093 | sizeof(uuid_t)); |
1094 | in_f->ilf_blkno = in_f32->ilf_blkno; | 1094 | in_f->ilf_blkno = in_f32->ilf_blkno; |
1095 | in_f->ilf_len = in_f32->ilf_len; | 1095 | in_f->ilf_len = in_f32->ilf_len; |
1096 | in_f->ilf_boffset = in_f32->ilf_boffset; | 1096 | in_f->ilf_boffset = in_f32->ilf_boffset; |
1097 | return 0; | 1097 | return 0; |
1098 | } else if (buf->i_len == sizeof(xfs_inode_log_format_64_t)){ | 1098 | } else if (buf->i_len == sizeof(xfs_inode_log_format_64_t)){ |
1099 | xfs_inode_log_format_64_t *in_f64; | 1099 | xfs_inode_log_format_64_t *in_f64; |
1100 | 1100 | ||
1101 | in_f64 = (xfs_inode_log_format_64_t *)buf->i_addr; | 1101 | in_f64 = (xfs_inode_log_format_64_t *)buf->i_addr; |
1102 | in_f->ilf_type = in_f64->ilf_type; | 1102 | in_f->ilf_type = in_f64->ilf_type; |
1103 | in_f->ilf_size = in_f64->ilf_size; | 1103 | in_f->ilf_size = in_f64->ilf_size; |
1104 | in_f->ilf_fields = in_f64->ilf_fields; | 1104 | in_f->ilf_fields = in_f64->ilf_fields; |
1105 | in_f->ilf_asize = in_f64->ilf_asize; | 1105 | in_f->ilf_asize = in_f64->ilf_asize; |
1106 | in_f->ilf_dsize = in_f64->ilf_dsize; | 1106 | in_f->ilf_dsize = in_f64->ilf_dsize; |
1107 | in_f->ilf_ino = in_f64->ilf_ino; | 1107 | in_f->ilf_ino = in_f64->ilf_ino; |
1108 | /* copy biggest field of ilf_u */ | 1108 | /* copy biggest field of ilf_u */ |
1109 | memcpy(in_f->ilf_u.ilfu_uuid.__u_bits, | 1109 | memcpy(in_f->ilf_u.ilfu_uuid.__u_bits, |
1110 | in_f64->ilf_u.ilfu_uuid.__u_bits, | 1110 | in_f64->ilf_u.ilfu_uuid.__u_bits, |
1111 | sizeof(uuid_t)); | 1111 | sizeof(uuid_t)); |
1112 | in_f->ilf_blkno = in_f64->ilf_blkno; | 1112 | in_f->ilf_blkno = in_f64->ilf_blkno; |
1113 | in_f->ilf_len = in_f64->ilf_len; | 1113 | in_f->ilf_len = in_f64->ilf_len; |
1114 | in_f->ilf_boffset = in_f64->ilf_boffset; | 1114 | in_f->ilf_boffset = in_f64->ilf_boffset; |
1115 | return 0; | 1115 | return 0; |
1116 | } | 1116 | } |
1117 | return EFSCORRUPTED; | 1117 | return EFSCORRUPTED; |
1118 | } | 1118 | } |
1119 | 1119 |
fs/xfs/xfs_itable.c
1 | /* | 1 | /* |
2 | * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc. | 2 | * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc. |
3 | * All Rights Reserved. | 3 | * All Rights Reserved. |
4 | * | 4 | * |
5 | * This program is free software; you can redistribute it and/or | 5 | * This program is free software; you can redistribute it and/or |
6 | * modify it under the terms of the GNU General Public License as | 6 | * modify it under the terms of the GNU General Public License as |
7 | * published by the Free Software Foundation. | 7 | * published by the Free Software Foundation. |
8 | * | 8 | * |
9 | * This program is distributed in the hope that it would be useful, | 9 | * This program is distributed in the hope that it would be useful, |
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
12 | * GNU General Public License for more details. | 12 | * GNU General Public License for more details. |
13 | * | 13 | * |
14 | * You should have received a copy of the GNU General Public License | 14 | * You should have received a copy of the GNU General Public License |
15 | * along with this program; if not, write the Free Software Foundation, | 15 | * along with this program; if not, write the Free Software Foundation, |
16 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | 16 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA |
17 | */ | 17 | */ |
18 | #include "xfs.h" | 18 | #include "xfs.h" |
19 | #include "xfs_fs.h" | 19 | #include "xfs_fs.h" |
20 | #include "xfs_types.h" | 20 | #include "xfs_types.h" |
21 | #include "xfs_bit.h" | 21 | #include "xfs_bit.h" |
22 | #include "xfs_log.h" | 22 | #include "xfs_log.h" |
23 | #include "xfs_inum.h" | 23 | #include "xfs_inum.h" |
24 | #include "xfs_trans.h" | 24 | #include "xfs_trans.h" |
25 | #include "xfs_sb.h" | 25 | #include "xfs_sb.h" |
26 | #include "xfs_ag.h" | 26 | #include "xfs_ag.h" |
27 | #include "xfs_dir2.h" | 27 | #include "xfs_dir2.h" |
28 | #include "xfs_dmapi.h" | 28 | #include "xfs_dmapi.h" |
29 | #include "xfs_mount.h" | 29 | #include "xfs_mount.h" |
30 | #include "xfs_bmap_btree.h" | 30 | #include "xfs_bmap_btree.h" |
31 | #include "xfs_alloc_btree.h" | 31 | #include "xfs_alloc_btree.h" |
32 | #include "xfs_ialloc_btree.h" | 32 | #include "xfs_ialloc_btree.h" |
33 | #include "xfs_dir2_sf.h" | 33 | #include "xfs_dir2_sf.h" |
34 | #include "xfs_attr_sf.h" | 34 | #include "xfs_attr_sf.h" |
35 | #include "xfs_dinode.h" | 35 | #include "xfs_dinode.h" |
36 | #include "xfs_inode.h" | 36 | #include "xfs_inode.h" |
37 | #include "xfs_ialloc.h" | 37 | #include "xfs_ialloc.h" |
38 | #include "xfs_itable.h" | 38 | #include "xfs_itable.h" |
39 | #include "xfs_error.h" | 39 | #include "xfs_error.h" |
40 | #include "xfs_btree.h" | 40 | #include "xfs_btree.h" |
41 | 41 | ||
42 | int | 42 | int |
43 | xfs_internal_inum( | 43 | xfs_internal_inum( |
44 | xfs_mount_t *mp, | 44 | xfs_mount_t *mp, |
45 | xfs_ino_t ino) | 45 | xfs_ino_t ino) |
46 | { | 46 | { |
47 | return (ino == mp->m_sb.sb_rbmino || ino == mp->m_sb.sb_rsumino || | 47 | return (ino == mp->m_sb.sb_rbmino || ino == mp->m_sb.sb_rsumino || |
48 | (xfs_sb_version_hasquota(&mp->m_sb) && | 48 | (xfs_sb_version_hasquota(&mp->m_sb) && |
49 | (ino == mp->m_sb.sb_uquotino || ino == mp->m_sb.sb_gquotino))); | 49 | (ino == mp->m_sb.sb_uquotino || ino == mp->m_sb.sb_gquotino))); |
50 | } | 50 | } |
51 | 51 | ||
52 | STATIC int | 52 | STATIC int |
53 | xfs_bulkstat_one_iget( | 53 | xfs_bulkstat_one_iget( |
54 | xfs_mount_t *mp, /* mount point for filesystem */ | 54 | xfs_mount_t *mp, /* mount point for filesystem */ |
55 | xfs_ino_t ino, /* inode number to get data for */ | 55 | xfs_ino_t ino, /* inode number to get data for */ |
56 | xfs_daddr_t bno, /* starting bno of inode cluster */ | 56 | xfs_daddr_t bno, /* starting bno of inode cluster */ |
57 | xfs_bstat_t *buf, /* return buffer */ | 57 | xfs_bstat_t *buf, /* return buffer */ |
58 | int *stat) /* BULKSTAT_RV_... */ | 58 | int *stat) /* BULKSTAT_RV_... */ |
59 | { | 59 | { |
60 | xfs_icdinode_t *dic; /* dinode core info pointer */ | 60 | xfs_icdinode_t *dic; /* dinode core info pointer */ |
61 | xfs_inode_t *ip; /* incore inode pointer */ | 61 | xfs_inode_t *ip; /* incore inode pointer */ |
62 | int error; | 62 | int error; |
63 | 63 | ||
64 | error = xfs_iget(mp, NULL, ino, | 64 | error = xfs_iget(mp, NULL, ino, |
65 | XFS_IGET_BULKSTAT, XFS_ILOCK_SHARED, &ip, bno); | 65 | XFS_IGET_BULKSTAT, XFS_ILOCK_SHARED, &ip, bno); |
66 | if (error) { | 66 | if (error) { |
67 | *stat = BULKSTAT_RV_NOTHING; | 67 | *stat = BULKSTAT_RV_NOTHING; |
68 | return error; | 68 | return error; |
69 | } | 69 | } |
70 | 70 | ||
71 | ASSERT(ip != NULL); | 71 | ASSERT(ip != NULL); |
72 | ASSERT(ip->i_blkno != (xfs_daddr_t)0); | 72 | ASSERT(ip->i_imap.im_blkno != 0); |
73 | 73 | ||
74 | dic = &ip->i_d; | 74 | dic = &ip->i_d; |
75 | 75 | ||
76 | /* xfs_iget returns the following without needing | 76 | /* xfs_iget returns the following without needing |
77 | * further change. | 77 | * further change. |
78 | */ | 78 | */ |
79 | buf->bs_nlink = dic->di_nlink; | 79 | buf->bs_nlink = dic->di_nlink; |
80 | buf->bs_projid = dic->di_projid; | 80 | buf->bs_projid = dic->di_projid; |
81 | buf->bs_ino = ino; | 81 | buf->bs_ino = ino; |
82 | buf->bs_mode = dic->di_mode; | 82 | buf->bs_mode = dic->di_mode; |
83 | buf->bs_uid = dic->di_uid; | 83 | buf->bs_uid = dic->di_uid; |
84 | buf->bs_gid = dic->di_gid; | 84 | buf->bs_gid = dic->di_gid; |
85 | buf->bs_size = dic->di_size; | 85 | buf->bs_size = dic->di_size; |
86 | vn_atime_to_bstime(VFS_I(ip), &buf->bs_atime); | 86 | vn_atime_to_bstime(VFS_I(ip), &buf->bs_atime); |
87 | buf->bs_mtime.tv_sec = dic->di_mtime.t_sec; | 87 | buf->bs_mtime.tv_sec = dic->di_mtime.t_sec; |
88 | buf->bs_mtime.tv_nsec = dic->di_mtime.t_nsec; | 88 | buf->bs_mtime.tv_nsec = dic->di_mtime.t_nsec; |
89 | buf->bs_ctime.tv_sec = dic->di_ctime.t_sec; | 89 | buf->bs_ctime.tv_sec = dic->di_ctime.t_sec; |
90 | buf->bs_ctime.tv_nsec = dic->di_ctime.t_nsec; | 90 | buf->bs_ctime.tv_nsec = dic->di_ctime.t_nsec; |
91 | buf->bs_xflags = xfs_ip2xflags(ip); | 91 | buf->bs_xflags = xfs_ip2xflags(ip); |
92 | buf->bs_extsize = dic->di_extsize << mp->m_sb.sb_blocklog; | 92 | buf->bs_extsize = dic->di_extsize << mp->m_sb.sb_blocklog; |
93 | buf->bs_extents = dic->di_nextents; | 93 | buf->bs_extents = dic->di_nextents; |
94 | buf->bs_gen = dic->di_gen; | 94 | buf->bs_gen = dic->di_gen; |
95 | memset(buf->bs_pad, 0, sizeof(buf->bs_pad)); | 95 | memset(buf->bs_pad, 0, sizeof(buf->bs_pad)); |
96 | buf->bs_dmevmask = dic->di_dmevmask; | 96 | buf->bs_dmevmask = dic->di_dmevmask; |
97 | buf->bs_dmstate = dic->di_dmstate; | 97 | buf->bs_dmstate = dic->di_dmstate; |
98 | buf->bs_aextents = dic->di_anextents; | 98 | buf->bs_aextents = dic->di_anextents; |
99 | 99 | ||
100 | switch (dic->di_format) { | 100 | switch (dic->di_format) { |
101 | case XFS_DINODE_FMT_DEV: | 101 | case XFS_DINODE_FMT_DEV: |
102 | buf->bs_rdev = ip->i_df.if_u2.if_rdev; | 102 | buf->bs_rdev = ip->i_df.if_u2.if_rdev; |
103 | buf->bs_blksize = BLKDEV_IOSIZE; | 103 | buf->bs_blksize = BLKDEV_IOSIZE; |
104 | buf->bs_blocks = 0; | 104 | buf->bs_blocks = 0; |
105 | break; | 105 | break; |
106 | case XFS_DINODE_FMT_LOCAL: | 106 | case XFS_DINODE_FMT_LOCAL: |
107 | case XFS_DINODE_FMT_UUID: | 107 | case XFS_DINODE_FMT_UUID: |
108 | buf->bs_rdev = 0; | 108 | buf->bs_rdev = 0; |
109 | buf->bs_blksize = mp->m_sb.sb_blocksize; | 109 | buf->bs_blksize = mp->m_sb.sb_blocksize; |
110 | buf->bs_blocks = 0; | 110 | buf->bs_blocks = 0; |
111 | break; | 111 | break; |
112 | case XFS_DINODE_FMT_EXTENTS: | 112 | case XFS_DINODE_FMT_EXTENTS: |
113 | case XFS_DINODE_FMT_BTREE: | 113 | case XFS_DINODE_FMT_BTREE: |
114 | buf->bs_rdev = 0; | 114 | buf->bs_rdev = 0; |
115 | buf->bs_blksize = mp->m_sb.sb_blocksize; | 115 | buf->bs_blksize = mp->m_sb.sb_blocksize; |
116 | buf->bs_blocks = dic->di_nblocks + ip->i_delayed_blks; | 116 | buf->bs_blocks = dic->di_nblocks + ip->i_delayed_blks; |
117 | break; | 117 | break; |
118 | } | 118 | } |
119 | 119 | ||
120 | xfs_iput(ip, XFS_ILOCK_SHARED); | 120 | xfs_iput(ip, XFS_ILOCK_SHARED); |
121 | return error; | 121 | return error; |
122 | } | 122 | } |
123 | 123 | ||
124 | STATIC void | 124 | STATIC void |
125 | xfs_bulkstat_one_dinode( | 125 | xfs_bulkstat_one_dinode( |
126 | xfs_mount_t *mp, /* mount point for filesystem */ | 126 | xfs_mount_t *mp, /* mount point for filesystem */ |
127 | xfs_ino_t ino, /* inode number to get data for */ | 127 | xfs_ino_t ino, /* inode number to get data for */ |
128 | xfs_dinode_t *dic, /* dinode inode pointer */ | 128 | xfs_dinode_t *dic, /* dinode inode pointer */ |
129 | xfs_bstat_t *buf) /* return buffer */ | 129 | xfs_bstat_t *buf) /* return buffer */ |
130 | { | 130 | { |
131 | /* | 131 | /* |
132 | * The inode format changed when we moved the link count and | 132 | * The inode format changed when we moved the link count and |
133 | * made it 32 bits long. If this is an old format inode, | 133 | * made it 32 bits long. If this is an old format inode, |
134 | * convert it in memory to look like a new one. If it gets | 134 | * convert it in memory to look like a new one. If it gets |
135 | * flushed to disk we will convert back before flushing or | 135 | * flushed to disk we will convert back before flushing or |
136 | * logging it. We zero out the new projid field and the old link | 136 | * logging it. We zero out the new projid field and the old link |
137 | * count field. We'll handle clearing the pad field (the remains | 137 | * count field. We'll handle clearing the pad field (the remains |
138 | * of the old uuid field) when we actually convert the inode to | 138 | * of the old uuid field) when we actually convert the inode to |
139 | * the new format. We don't change the version number so that we | 139 | * the new format. We don't change the version number so that we |
140 | * can distinguish this from a real new format inode. | 140 | * can distinguish this from a real new format inode. |
141 | */ | 141 | */ |
142 | if (dic->di_version == 1) { | 142 | if (dic->di_version == 1) { |
143 | buf->bs_nlink = be16_to_cpu(dic->di_onlink); | 143 | buf->bs_nlink = be16_to_cpu(dic->di_onlink); |
144 | buf->bs_projid = 0; | 144 | buf->bs_projid = 0; |
145 | } else { | 145 | } else { |
146 | buf->bs_nlink = be32_to_cpu(dic->di_nlink); | 146 | buf->bs_nlink = be32_to_cpu(dic->di_nlink); |
147 | buf->bs_projid = be16_to_cpu(dic->di_projid); | 147 | buf->bs_projid = be16_to_cpu(dic->di_projid); |
148 | } | 148 | } |
149 | 149 | ||
150 | buf->bs_ino = ino; | 150 | buf->bs_ino = ino; |
151 | buf->bs_mode = be16_to_cpu(dic->di_mode); | 151 | buf->bs_mode = be16_to_cpu(dic->di_mode); |
152 | buf->bs_uid = be32_to_cpu(dic->di_uid); | 152 | buf->bs_uid = be32_to_cpu(dic->di_uid); |
153 | buf->bs_gid = be32_to_cpu(dic->di_gid); | 153 | buf->bs_gid = be32_to_cpu(dic->di_gid); |
154 | buf->bs_size = be64_to_cpu(dic->di_size); | 154 | buf->bs_size = be64_to_cpu(dic->di_size); |
155 | buf->bs_atime.tv_sec = be32_to_cpu(dic->di_atime.t_sec); | 155 | buf->bs_atime.tv_sec = be32_to_cpu(dic->di_atime.t_sec); |
156 | buf->bs_atime.tv_nsec = be32_to_cpu(dic->di_atime.t_nsec); | 156 | buf->bs_atime.tv_nsec = be32_to_cpu(dic->di_atime.t_nsec); |
157 | buf->bs_mtime.tv_sec = be32_to_cpu(dic->di_mtime.t_sec); | 157 | buf->bs_mtime.tv_sec = be32_to_cpu(dic->di_mtime.t_sec); |
158 | buf->bs_mtime.tv_nsec = be32_to_cpu(dic->di_mtime.t_nsec); | 158 | buf->bs_mtime.tv_nsec = be32_to_cpu(dic->di_mtime.t_nsec); |
159 | buf->bs_ctime.tv_sec = be32_to_cpu(dic->di_ctime.t_sec); | 159 | buf->bs_ctime.tv_sec = be32_to_cpu(dic->di_ctime.t_sec); |
160 | buf->bs_ctime.tv_nsec = be32_to_cpu(dic->di_ctime.t_nsec); | 160 | buf->bs_ctime.tv_nsec = be32_to_cpu(dic->di_ctime.t_nsec); |
161 | buf->bs_xflags = xfs_dic2xflags(dic); | 161 | buf->bs_xflags = xfs_dic2xflags(dic); |
162 | buf->bs_extsize = be32_to_cpu(dic->di_extsize) << mp->m_sb.sb_blocklog; | 162 | buf->bs_extsize = be32_to_cpu(dic->di_extsize) << mp->m_sb.sb_blocklog; |
163 | buf->bs_extents = be32_to_cpu(dic->di_nextents); | 163 | buf->bs_extents = be32_to_cpu(dic->di_nextents); |
164 | buf->bs_gen = be32_to_cpu(dic->di_gen); | 164 | buf->bs_gen = be32_to_cpu(dic->di_gen); |
165 | memset(buf->bs_pad, 0, sizeof(buf->bs_pad)); | 165 | memset(buf->bs_pad, 0, sizeof(buf->bs_pad)); |
166 | buf->bs_dmevmask = be32_to_cpu(dic->di_dmevmask); | 166 | buf->bs_dmevmask = be32_to_cpu(dic->di_dmevmask); |
167 | buf->bs_dmstate = be16_to_cpu(dic->di_dmstate); | 167 | buf->bs_dmstate = be16_to_cpu(dic->di_dmstate); |
168 | buf->bs_aextents = be16_to_cpu(dic->di_anextents); | 168 | buf->bs_aextents = be16_to_cpu(dic->di_anextents); |
169 | 169 | ||
170 | switch (dic->di_format) { | 170 | switch (dic->di_format) { |
171 | case XFS_DINODE_FMT_DEV: | 171 | case XFS_DINODE_FMT_DEV: |
172 | buf->bs_rdev = xfs_dinode_get_rdev(dic); | 172 | buf->bs_rdev = xfs_dinode_get_rdev(dic); |
173 | buf->bs_blksize = BLKDEV_IOSIZE; | 173 | buf->bs_blksize = BLKDEV_IOSIZE; |
174 | buf->bs_blocks = 0; | 174 | buf->bs_blocks = 0; |
175 | break; | 175 | break; |
176 | case XFS_DINODE_FMT_LOCAL: | 176 | case XFS_DINODE_FMT_LOCAL: |
177 | case XFS_DINODE_FMT_UUID: | 177 | case XFS_DINODE_FMT_UUID: |
178 | buf->bs_rdev = 0; | 178 | buf->bs_rdev = 0; |
179 | buf->bs_blksize = mp->m_sb.sb_blocksize; | 179 | buf->bs_blksize = mp->m_sb.sb_blocksize; |
180 | buf->bs_blocks = 0; | 180 | buf->bs_blocks = 0; |
181 | break; | 181 | break; |
182 | case XFS_DINODE_FMT_EXTENTS: | 182 | case XFS_DINODE_FMT_EXTENTS: |
183 | case XFS_DINODE_FMT_BTREE: | 183 | case XFS_DINODE_FMT_BTREE: |
184 | buf->bs_rdev = 0; | 184 | buf->bs_rdev = 0; |
185 | buf->bs_blksize = mp->m_sb.sb_blocksize; | 185 | buf->bs_blksize = mp->m_sb.sb_blocksize; |
186 | buf->bs_blocks = be64_to_cpu(dic->di_nblocks); | 186 | buf->bs_blocks = be64_to_cpu(dic->di_nblocks); |
187 | break; | 187 | break; |
188 | } | 188 | } |
189 | } | 189 | } |
190 | 190 | ||
191 | STATIC int | 191 | STATIC int |
192 | xfs_bulkstat_one_fmt( | 192 | xfs_bulkstat_one_fmt( |
193 | void __user *ubuffer, | 193 | void __user *ubuffer, |
194 | const xfs_bstat_t *buffer) | 194 | const xfs_bstat_t *buffer) |
195 | { | 195 | { |
196 | if (copy_to_user(ubuffer, buffer, sizeof(*buffer))) | 196 | if (copy_to_user(ubuffer, buffer, sizeof(*buffer))) |
197 | return -EFAULT; | 197 | return -EFAULT; |
198 | return sizeof(*buffer); | 198 | return sizeof(*buffer); |
199 | } | 199 | } |
200 | 200 | ||
201 | /* | 201 | /* |
202 | * Return stat information for one inode. | 202 | * Return stat information for one inode. |
203 | * Return 0 if ok, else errno. | 203 | * Return 0 if ok, else errno. |
204 | */ | 204 | */ |
205 | int /* error status */ | 205 | int /* error status */ |
206 | xfs_bulkstat_one( | 206 | xfs_bulkstat_one( |
207 | xfs_mount_t *mp, /* mount point for filesystem */ | 207 | xfs_mount_t *mp, /* mount point for filesystem */ |
208 | xfs_ino_t ino, /* inode number to get data for */ | 208 | xfs_ino_t ino, /* inode number to get data for */ |
209 | void __user *buffer, /* buffer to place output in */ | 209 | void __user *buffer, /* buffer to place output in */ |
210 | int ubsize, /* size of buffer */ | 210 | int ubsize, /* size of buffer */ |
211 | void *private_data, /* my private data */ | 211 | void *private_data, /* my private data */ |
212 | xfs_daddr_t bno, /* starting bno of inode cluster */ | 212 | xfs_daddr_t bno, /* starting bno of inode cluster */ |
213 | int *ubused, /* bytes used by me */ | 213 | int *ubused, /* bytes used by me */ |
214 | void *dibuff, /* on-disk inode buffer */ | 214 | void *dibuff, /* on-disk inode buffer */ |
215 | int *stat) /* BULKSTAT_RV_... */ | 215 | int *stat) /* BULKSTAT_RV_... */ |
216 | { | 216 | { |
217 | xfs_bstat_t *buf; /* return buffer */ | 217 | xfs_bstat_t *buf; /* return buffer */ |
218 | int error = 0; /* error value */ | 218 | int error = 0; /* error value */ |
219 | xfs_dinode_t *dip; /* dinode inode pointer */ | 219 | xfs_dinode_t *dip; /* dinode inode pointer */ |
220 | bulkstat_one_fmt_pf formatter = private_data ? : xfs_bulkstat_one_fmt; | 220 | bulkstat_one_fmt_pf formatter = private_data ? : xfs_bulkstat_one_fmt; |
221 | 221 | ||
222 | dip = (xfs_dinode_t *)dibuff; | 222 | dip = (xfs_dinode_t *)dibuff; |
223 | *stat = BULKSTAT_RV_NOTHING; | 223 | *stat = BULKSTAT_RV_NOTHING; |
224 | 224 | ||
225 | if (!buffer || xfs_internal_inum(mp, ino)) | 225 | if (!buffer || xfs_internal_inum(mp, ino)) |
226 | return XFS_ERROR(EINVAL); | 226 | return XFS_ERROR(EINVAL); |
227 | if (ubsize < sizeof(*buf)) | 227 | if (ubsize < sizeof(*buf)) |
228 | return XFS_ERROR(ENOMEM); | 228 | return XFS_ERROR(ENOMEM); |
229 | 229 | ||
230 | buf = kmem_alloc(sizeof(*buf), KM_SLEEP); | 230 | buf = kmem_alloc(sizeof(*buf), KM_SLEEP); |
231 | 231 | ||
232 | if (dip == NULL) { | 232 | if (dip == NULL) { |
233 | /* We're not being passed a pointer to a dinode. This happens | 233 | /* We're not being passed a pointer to a dinode. This happens |
234 | * if BULKSTAT_FG_IGET is selected. Do the iget. | 234 | * if BULKSTAT_FG_IGET is selected. Do the iget. |
235 | */ | 235 | */ |
236 | error = xfs_bulkstat_one_iget(mp, ino, bno, buf, stat); | 236 | error = xfs_bulkstat_one_iget(mp, ino, bno, buf, stat); |
237 | if (error) | 237 | if (error) |
238 | goto out_free; | 238 | goto out_free; |
239 | } else { | 239 | } else { |
240 | xfs_bulkstat_one_dinode(mp, ino, dip, buf); | 240 | xfs_bulkstat_one_dinode(mp, ino, dip, buf); |
241 | } | 241 | } |
242 | 242 | ||
243 | error = formatter(buffer, buf); | 243 | error = formatter(buffer, buf); |
244 | if (error < 0) { | 244 | if (error < 0) { |
245 | error = EFAULT; | 245 | error = EFAULT; |
246 | goto out_free; | 246 | goto out_free; |
247 | } | 247 | } |
248 | 248 | ||
249 | *stat = BULKSTAT_RV_DIDONE; | 249 | *stat = BULKSTAT_RV_DIDONE; |
250 | if (ubused) | 250 | if (ubused) |
251 | *ubused = error; | 251 | *ubused = error; |
252 | 252 | ||
253 | out_free: | 253 | out_free: |
254 | kmem_free(buf); | 254 | kmem_free(buf); |
255 | return error; | 255 | return error; |
256 | } | 256 | } |
257 | 257 | ||
258 | /* | 258 | /* |
259 | * Test to see whether we can use the ondisk inode directly, based | 259 | * Test to see whether we can use the ondisk inode directly, based |
260 | * on the given bulkstat flags, filling in dipp accordingly. | 260 | * on the given bulkstat flags, filling in dipp accordingly. |
261 | * Returns zero if the inode is dodgey. | 261 | * Returns zero if the inode is dodgey. |
262 | */ | 262 | */ |
263 | STATIC int | 263 | STATIC int |
264 | xfs_bulkstat_use_dinode( | 264 | xfs_bulkstat_use_dinode( |
265 | xfs_mount_t *mp, | 265 | xfs_mount_t *mp, |
266 | int flags, | 266 | int flags, |
267 | xfs_buf_t *bp, | 267 | xfs_buf_t *bp, |
268 | int clustidx, | 268 | int clustidx, |
269 | xfs_dinode_t **dipp) | 269 | xfs_dinode_t **dipp) |
270 | { | 270 | { |
271 | xfs_dinode_t *dip; | 271 | xfs_dinode_t *dip; |
272 | unsigned int aformat; | 272 | unsigned int aformat; |
273 | 273 | ||
274 | *dipp = NULL; | 274 | *dipp = NULL; |
275 | if (!bp || (flags & BULKSTAT_FG_IGET)) | 275 | if (!bp || (flags & BULKSTAT_FG_IGET)) |
276 | return 1; | 276 | return 1; |
277 | dip = (xfs_dinode_t *) | 277 | dip = (xfs_dinode_t *) |
278 | xfs_buf_offset(bp, clustidx << mp->m_sb.sb_inodelog); | 278 | xfs_buf_offset(bp, clustidx << mp->m_sb.sb_inodelog); |
279 | /* | 279 | /* |
280 | * Check the buffer containing the on-disk inode for di_mode == 0. | 280 | * Check the buffer containing the on-disk inode for di_mode == 0. |
281 | * This is to prevent xfs_bulkstat from picking up just reclaimed | 281 | * This is to prevent xfs_bulkstat from picking up just reclaimed |
282 | * inodes that have their in-core state initialized but not flushed | 282 | * inodes that have their in-core state initialized but not flushed |
283 | * to disk yet. This is a temporary hack that would require a proper | 283 | * to disk yet. This is a temporary hack that would require a proper |
284 | * fix in the future. | 284 | * fix in the future. |
285 | */ | 285 | */ |
286 | if (be16_to_cpu(dip->di_magic) != XFS_DINODE_MAGIC || | 286 | if (be16_to_cpu(dip->di_magic) != XFS_DINODE_MAGIC || |
287 | !XFS_DINODE_GOOD_VERSION(dip->di_version) || | 287 | !XFS_DINODE_GOOD_VERSION(dip->di_version) || |
288 | !dip->di_mode) | 288 | !dip->di_mode) |
289 | return 0; | 289 | return 0; |
290 | if (flags & BULKSTAT_FG_QUICK) { | 290 | if (flags & BULKSTAT_FG_QUICK) { |
291 | *dipp = dip; | 291 | *dipp = dip; |
292 | return 1; | 292 | return 1; |
293 | } | 293 | } |
294 | /* BULKSTAT_FG_INLINE: if attr fork is local, or not there, use it */ | 294 | /* BULKSTAT_FG_INLINE: if attr fork is local, or not there, use it */ |
295 | aformat = dip->di_aformat; | 295 | aformat = dip->di_aformat; |
296 | if ((XFS_DFORK_Q(dip) == 0) || | 296 | if ((XFS_DFORK_Q(dip) == 0) || |
297 | (aformat == XFS_DINODE_FMT_LOCAL) || | 297 | (aformat == XFS_DINODE_FMT_LOCAL) || |
298 | (aformat == XFS_DINODE_FMT_EXTENTS && !dip->di_anextents)) { | 298 | (aformat == XFS_DINODE_FMT_EXTENTS && !dip->di_anextents)) { |
299 | *dipp = dip; | 299 | *dipp = dip; |
300 | return 1; | 300 | return 1; |
301 | } | 301 | } |
302 | return 1; | 302 | return 1; |
303 | } | 303 | } |
304 | 304 | ||
305 | #define XFS_BULKSTAT_UBLEFT(ubleft) ((ubleft) >= statstruct_size) | 305 | #define XFS_BULKSTAT_UBLEFT(ubleft) ((ubleft) >= statstruct_size) |
306 | 306 | ||
307 | /* | 307 | /* |
308 | * Return stat information in bulk (by-inode) for the filesystem. | 308 | * Return stat information in bulk (by-inode) for the filesystem. |
309 | */ | 309 | */ |
310 | int /* error status */ | 310 | int /* error status */ |
311 | xfs_bulkstat( | 311 | xfs_bulkstat( |
312 | xfs_mount_t *mp, /* mount point for filesystem */ | 312 | xfs_mount_t *mp, /* mount point for filesystem */ |
313 | xfs_ino_t *lastinop, /* last inode returned */ | 313 | xfs_ino_t *lastinop, /* last inode returned */ |
314 | int *ubcountp, /* size of buffer/count returned */ | 314 | int *ubcountp, /* size of buffer/count returned */ |
315 | bulkstat_one_pf formatter, /* func that'd fill a single buf */ | 315 | bulkstat_one_pf formatter, /* func that'd fill a single buf */ |
316 | void *private_data,/* private data for formatter */ | 316 | void *private_data,/* private data for formatter */ |
317 | size_t statstruct_size, /* sizeof struct filling */ | 317 | size_t statstruct_size, /* sizeof struct filling */ |
318 | char __user *ubuffer, /* buffer with inode stats */ | 318 | char __user *ubuffer, /* buffer with inode stats */ |
319 | int flags, /* defined in xfs_itable.h */ | 319 | int flags, /* defined in xfs_itable.h */ |
320 | int *done) /* 1 if there are more stats to get */ | 320 | int *done) /* 1 if there are more stats to get */ |
321 | { | 321 | { |
322 | xfs_agblock_t agbno=0;/* allocation group block number */ | 322 | xfs_agblock_t agbno=0;/* allocation group block number */ |
323 | xfs_buf_t *agbp; /* agi header buffer */ | 323 | xfs_buf_t *agbp; /* agi header buffer */ |
324 | xfs_agi_t *agi; /* agi header data */ | 324 | xfs_agi_t *agi; /* agi header data */ |
325 | xfs_agino_t agino; /* inode # in allocation group */ | 325 | xfs_agino_t agino; /* inode # in allocation group */ |
326 | xfs_agnumber_t agno; /* allocation group number */ | 326 | xfs_agnumber_t agno; /* allocation group number */ |
327 | xfs_daddr_t bno; /* inode cluster start daddr */ | 327 | xfs_daddr_t bno; /* inode cluster start daddr */ |
328 | int chunkidx; /* current index into inode chunk */ | 328 | int chunkidx; /* current index into inode chunk */ |
329 | int clustidx; /* current index into inode cluster */ | 329 | int clustidx; /* current index into inode cluster */ |
330 | xfs_btree_cur_t *cur; /* btree cursor for ialloc btree */ | 330 | xfs_btree_cur_t *cur; /* btree cursor for ialloc btree */ |
331 | int end_of_ag; /* set if we've seen the ag end */ | 331 | int end_of_ag; /* set if we've seen the ag end */ |
332 | int error; /* error code */ | 332 | int error; /* error code */ |
333 | int fmterror;/* bulkstat formatter result */ | 333 | int fmterror;/* bulkstat formatter result */ |
334 | __int32_t gcnt; /* current btree rec's count */ | 334 | __int32_t gcnt; /* current btree rec's count */ |
335 | xfs_inofree_t gfree; /* current btree rec's free mask */ | 335 | xfs_inofree_t gfree; /* current btree rec's free mask */ |
336 | xfs_agino_t gino; /* current btree rec's start inode */ | 336 | xfs_agino_t gino; /* current btree rec's start inode */ |
337 | int i; /* loop index */ | 337 | int i; /* loop index */ |
338 | int icount; /* count of inodes good in irbuf */ | 338 | int icount; /* count of inodes good in irbuf */ |
339 | size_t irbsize; /* size of irec buffer in bytes */ | 339 | size_t irbsize; /* size of irec buffer in bytes */ |
340 | xfs_ino_t ino; /* inode number (filesystem) */ | 340 | xfs_ino_t ino; /* inode number (filesystem) */ |
341 | xfs_inobt_rec_incore_t *irbp; /* current irec buffer pointer */ | 341 | xfs_inobt_rec_incore_t *irbp; /* current irec buffer pointer */ |
342 | xfs_inobt_rec_incore_t *irbuf; /* start of irec buffer */ | 342 | xfs_inobt_rec_incore_t *irbuf; /* start of irec buffer */ |
343 | xfs_inobt_rec_incore_t *irbufend; /* end of good irec buffer entries */ | 343 | xfs_inobt_rec_incore_t *irbufend; /* end of good irec buffer entries */ |
344 | xfs_ino_t lastino; /* last inode number returned */ | 344 | xfs_ino_t lastino; /* last inode number returned */ |
345 | int nbcluster; /* # of blocks in a cluster */ | 345 | int nbcluster; /* # of blocks in a cluster */ |
346 | int nicluster; /* # of inodes in a cluster */ | 346 | int nicluster; /* # of inodes in a cluster */ |
347 | int nimask; /* mask for inode clusters */ | 347 | int nimask; /* mask for inode clusters */ |
348 | int nirbuf; /* size of irbuf */ | 348 | int nirbuf; /* size of irbuf */ |
349 | int rval; /* return value error code */ | 349 | int rval; /* return value error code */ |
350 | int tmp; /* result value from btree calls */ | 350 | int tmp; /* result value from btree calls */ |
351 | int ubcount; /* size of user's buffer */ | 351 | int ubcount; /* size of user's buffer */ |
352 | int ubleft; /* bytes left in user's buffer */ | 352 | int ubleft; /* bytes left in user's buffer */ |
353 | char __user *ubufp; /* pointer into user's buffer */ | 353 | char __user *ubufp; /* pointer into user's buffer */ |
354 | int ubelem; /* spaces used in user's buffer */ | 354 | int ubelem; /* spaces used in user's buffer */ |
355 | int ubused; /* bytes used by formatter */ | 355 | int ubused; /* bytes used by formatter */ |
356 | xfs_buf_t *bp; /* ptr to on-disk inode cluster buf */ | 356 | xfs_buf_t *bp; /* ptr to on-disk inode cluster buf */ |
357 | xfs_dinode_t *dip; /* ptr into bp for specific inode */ | 357 | xfs_dinode_t *dip; /* ptr into bp for specific inode */ |
358 | 358 | ||
359 | /* | 359 | /* |
360 | * Get the last inode value, see if there's nothing to do. | 360 | * Get the last inode value, see if there's nothing to do. |
361 | */ | 361 | */ |
362 | ino = (xfs_ino_t)*lastinop; | 362 | ino = (xfs_ino_t)*lastinop; |
363 | lastino = ino; | 363 | lastino = ino; |
364 | dip = NULL; | 364 | dip = NULL; |
365 | agno = XFS_INO_TO_AGNO(mp, ino); | 365 | agno = XFS_INO_TO_AGNO(mp, ino); |
366 | agino = XFS_INO_TO_AGINO(mp, ino); | 366 | agino = XFS_INO_TO_AGINO(mp, ino); |
367 | if (agno >= mp->m_sb.sb_agcount || | 367 | if (agno >= mp->m_sb.sb_agcount || |
368 | ino != XFS_AGINO_TO_INO(mp, agno, agino)) { | 368 | ino != XFS_AGINO_TO_INO(mp, agno, agino)) { |
369 | *done = 1; | 369 | *done = 1; |
370 | *ubcountp = 0; | 370 | *ubcountp = 0; |
371 | return 0; | 371 | return 0; |
372 | } | 372 | } |
373 | if (!ubcountp || *ubcountp <= 0) { | 373 | if (!ubcountp || *ubcountp <= 0) { |
374 | return EINVAL; | 374 | return EINVAL; |
375 | } | 375 | } |
376 | ubcount = *ubcountp; /* statstruct's */ | 376 | ubcount = *ubcountp; /* statstruct's */ |
377 | ubleft = ubcount * statstruct_size; /* bytes */ | 377 | ubleft = ubcount * statstruct_size; /* bytes */ |
378 | *ubcountp = ubelem = 0; | 378 | *ubcountp = ubelem = 0; |
379 | *done = 0; | 379 | *done = 0; |
380 | fmterror = 0; | 380 | fmterror = 0; |
381 | ubufp = ubuffer; | 381 | ubufp = ubuffer; |
382 | nicluster = mp->m_sb.sb_blocksize >= XFS_INODE_CLUSTER_SIZE(mp) ? | 382 | nicluster = mp->m_sb.sb_blocksize >= XFS_INODE_CLUSTER_SIZE(mp) ? |
383 | mp->m_sb.sb_inopblock : | 383 | mp->m_sb.sb_inopblock : |
384 | (XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog); | 384 | (XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog); |
385 | nimask = ~(nicluster - 1); | 385 | nimask = ~(nicluster - 1); |
386 | nbcluster = nicluster >> mp->m_sb.sb_inopblog; | 386 | nbcluster = nicluster >> mp->m_sb.sb_inopblog; |
387 | irbuf = kmem_zalloc_greedy(&irbsize, PAGE_SIZE, PAGE_SIZE * 4, | 387 | irbuf = kmem_zalloc_greedy(&irbsize, PAGE_SIZE, PAGE_SIZE * 4, |
388 | KM_SLEEP | KM_MAYFAIL | KM_LARGE); | 388 | KM_SLEEP | KM_MAYFAIL | KM_LARGE); |
389 | nirbuf = irbsize / sizeof(*irbuf); | 389 | nirbuf = irbsize / sizeof(*irbuf); |
390 | 390 | ||
391 | /* | 391 | /* |
392 | * Loop over the allocation groups, starting from the last | 392 | * Loop over the allocation groups, starting from the last |
393 | * inode returned; 0 means start of the allocation group. | 393 | * inode returned; 0 means start of the allocation group. |
394 | */ | 394 | */ |
395 | rval = 0; | 395 | rval = 0; |
396 | while (XFS_BULKSTAT_UBLEFT(ubleft) && agno < mp->m_sb.sb_agcount) { | 396 | while (XFS_BULKSTAT_UBLEFT(ubleft) && agno < mp->m_sb.sb_agcount) { |
397 | cond_resched(); | 397 | cond_resched(); |
398 | bp = NULL; | 398 | bp = NULL; |
399 | down_read(&mp->m_peraglock); | 399 | down_read(&mp->m_peraglock); |
400 | error = xfs_ialloc_read_agi(mp, NULL, agno, &agbp); | 400 | error = xfs_ialloc_read_agi(mp, NULL, agno, &agbp); |
401 | up_read(&mp->m_peraglock); | 401 | up_read(&mp->m_peraglock); |
402 | if (error) { | 402 | if (error) { |
403 | /* | 403 | /* |
404 | * Skip this allocation group and go to the next one. | 404 | * Skip this allocation group and go to the next one. |
405 | */ | 405 | */ |
406 | agno++; | 406 | agno++; |
407 | agino = 0; | 407 | agino = 0; |
408 | continue; | 408 | continue; |
409 | } | 409 | } |
410 | agi = XFS_BUF_TO_AGI(agbp); | 410 | agi = XFS_BUF_TO_AGI(agbp); |
411 | /* | 411 | /* |
412 | * Allocate and initialize a btree cursor for ialloc btree. | 412 | * Allocate and initialize a btree cursor for ialloc btree. |
413 | */ | 413 | */ |
414 | cur = xfs_inobt_init_cursor(mp, NULL, agbp, agno); | 414 | cur = xfs_inobt_init_cursor(mp, NULL, agbp, agno); |
415 | irbp = irbuf; | 415 | irbp = irbuf; |
416 | irbufend = irbuf + nirbuf; | 416 | irbufend = irbuf + nirbuf; |
417 | end_of_ag = 0; | 417 | end_of_ag = 0; |
418 | /* | 418 | /* |
419 | * If we're returning in the middle of an allocation group, | 419 | * If we're returning in the middle of an allocation group, |
420 | * we need to get the remainder of the chunk we're in. | 420 | * we need to get the remainder of the chunk we're in. |
421 | */ | 421 | */ |
422 | if (agino > 0) { | 422 | if (agino > 0) { |
423 | /* | 423 | /* |
424 | * Lookup the inode chunk that this inode lives in. | 424 | * Lookup the inode chunk that this inode lives in. |
425 | */ | 425 | */ |
426 | error = xfs_inobt_lookup_le(cur, agino, 0, 0, &tmp); | 426 | error = xfs_inobt_lookup_le(cur, agino, 0, 0, &tmp); |
427 | if (!error && /* no I/O error */ | 427 | if (!error && /* no I/O error */ |
428 | tmp && /* lookup succeeded */ | 428 | tmp && /* lookup succeeded */ |
429 | /* got the record, should always work */ | 429 | /* got the record, should always work */ |
430 | !(error = xfs_inobt_get_rec(cur, &gino, &gcnt, | 430 | !(error = xfs_inobt_get_rec(cur, &gino, &gcnt, |
431 | &gfree, &i)) && | 431 | &gfree, &i)) && |
432 | i == 1 && | 432 | i == 1 && |
433 | /* this is the right chunk */ | 433 | /* this is the right chunk */ |
434 | agino < gino + XFS_INODES_PER_CHUNK && | 434 | agino < gino + XFS_INODES_PER_CHUNK && |
435 | /* lastino was not last in chunk */ | 435 | /* lastino was not last in chunk */ |
436 | (chunkidx = agino - gino + 1) < | 436 | (chunkidx = agino - gino + 1) < |
437 | XFS_INODES_PER_CHUNK && | 437 | XFS_INODES_PER_CHUNK && |
438 | /* there are some left allocated */ | 438 | /* there are some left allocated */ |
439 | XFS_INOBT_MASKN(chunkidx, | 439 | XFS_INOBT_MASKN(chunkidx, |
440 | XFS_INODES_PER_CHUNK - chunkidx) & ~gfree) { | 440 | XFS_INODES_PER_CHUNK - chunkidx) & ~gfree) { |
441 | /* | 441 | /* |
442 | * Grab the chunk record. Mark all the | 442 | * Grab the chunk record. Mark all the |
443 | * uninteresting inodes (because they're | 443 | * uninteresting inodes (because they're |
444 | * before our start point) free. | 444 | * before our start point) free. |
445 | */ | 445 | */ |
446 | for (i = 0; i < chunkidx; i++) { | 446 | for (i = 0; i < chunkidx; i++) { |
447 | if (XFS_INOBT_MASK(i) & ~gfree) | 447 | if (XFS_INOBT_MASK(i) & ~gfree) |
448 | gcnt++; | 448 | gcnt++; |
449 | } | 449 | } |
450 | gfree |= XFS_INOBT_MASKN(0, chunkidx); | 450 | gfree |= XFS_INOBT_MASKN(0, chunkidx); |
451 | irbp->ir_startino = gino; | 451 | irbp->ir_startino = gino; |
452 | irbp->ir_freecount = gcnt; | 452 | irbp->ir_freecount = gcnt; |
453 | irbp->ir_free = gfree; | 453 | irbp->ir_free = gfree; |
454 | irbp++; | 454 | irbp++; |
455 | agino = gino + XFS_INODES_PER_CHUNK; | 455 | agino = gino + XFS_INODES_PER_CHUNK; |
456 | icount = XFS_INODES_PER_CHUNK - gcnt; | 456 | icount = XFS_INODES_PER_CHUNK - gcnt; |
457 | } else { | 457 | } else { |
458 | /* | 458 | /* |
459 | * If any of those tests failed, bump the | 459 | * If any of those tests failed, bump the |
460 | * inode number (just in case). | 460 | * inode number (just in case). |
461 | */ | 461 | */ |
462 | agino++; | 462 | agino++; |
463 | icount = 0; | 463 | icount = 0; |
464 | } | 464 | } |
465 | /* | 465 | /* |
466 | * In any case, increment to the next record. | 466 | * In any case, increment to the next record. |
467 | */ | 467 | */ |
468 | if (!error) | 468 | if (!error) |
469 | error = xfs_btree_increment(cur, 0, &tmp); | 469 | error = xfs_btree_increment(cur, 0, &tmp); |
470 | } else { | 470 | } else { |
471 | /* | 471 | /* |
472 | * Start of ag. Lookup the first inode chunk. | 472 | * Start of ag. Lookup the first inode chunk. |
473 | */ | 473 | */ |
474 | error = xfs_inobt_lookup_ge(cur, 0, 0, 0, &tmp); | 474 | error = xfs_inobt_lookup_ge(cur, 0, 0, 0, &tmp); |
475 | icount = 0; | 475 | icount = 0; |
476 | } | 476 | } |
477 | /* | 477 | /* |
478 | * Loop through inode btree records in this ag, | 478 | * Loop through inode btree records in this ag, |
479 | * until we run out of inodes or space in the buffer. | 479 | * until we run out of inodes or space in the buffer. |
480 | */ | 480 | */ |
481 | while (irbp < irbufend && icount < ubcount) { | 481 | while (irbp < irbufend && icount < ubcount) { |
482 | /* | 482 | /* |
483 | * Loop as long as we're unable to read the | 483 | * Loop as long as we're unable to read the |
484 | * inode btree. | 484 | * inode btree. |
485 | */ | 485 | */ |
486 | while (error) { | 486 | while (error) { |
487 | agino += XFS_INODES_PER_CHUNK; | 487 | agino += XFS_INODES_PER_CHUNK; |
488 | if (XFS_AGINO_TO_AGBNO(mp, agino) >= | 488 | if (XFS_AGINO_TO_AGBNO(mp, agino) >= |
489 | be32_to_cpu(agi->agi_length)) | 489 | be32_to_cpu(agi->agi_length)) |
490 | break; | 490 | break; |
491 | error = xfs_inobt_lookup_ge(cur, agino, 0, 0, | 491 | error = xfs_inobt_lookup_ge(cur, agino, 0, 0, |
492 | &tmp); | 492 | &tmp); |
493 | cond_resched(); | 493 | cond_resched(); |
494 | } | 494 | } |
495 | /* | 495 | /* |
496 | * If ran off the end of the ag either with an error, | 496 | * If ran off the end of the ag either with an error, |
497 | * or the normal way, set end and stop collecting. | 497 | * or the normal way, set end and stop collecting. |
498 | */ | 498 | */ |
499 | if (error || | 499 | if (error || |
500 | (error = xfs_inobt_get_rec(cur, &gino, &gcnt, | 500 | (error = xfs_inobt_get_rec(cur, &gino, &gcnt, |
501 | &gfree, &i)) || | 501 | &gfree, &i)) || |
502 | i == 0) { | 502 | i == 0) { |
503 | end_of_ag = 1; | 503 | end_of_ag = 1; |
504 | break; | 504 | break; |
505 | } | 505 | } |
506 | /* | 506 | /* |
507 | * If this chunk has any allocated inodes, save it. | 507 | * If this chunk has any allocated inodes, save it. |
508 | * Also start read-ahead now for this chunk. | 508 | * Also start read-ahead now for this chunk. |
509 | */ | 509 | */ |
510 | if (gcnt < XFS_INODES_PER_CHUNK) { | 510 | if (gcnt < XFS_INODES_PER_CHUNK) { |
511 | /* | 511 | /* |
512 | * Loop over all clusters in the next chunk. | 512 | * Loop over all clusters in the next chunk. |
513 | * Do a readahead if there are any allocated | 513 | * Do a readahead if there are any allocated |
514 | * inodes in that cluster. | 514 | * inodes in that cluster. |
515 | */ | 515 | */ |
516 | for (agbno = XFS_AGINO_TO_AGBNO(mp, gino), | 516 | for (agbno = XFS_AGINO_TO_AGBNO(mp, gino), |
517 | chunkidx = 0; | 517 | chunkidx = 0; |
518 | chunkidx < XFS_INODES_PER_CHUNK; | 518 | chunkidx < XFS_INODES_PER_CHUNK; |
519 | chunkidx += nicluster, | 519 | chunkidx += nicluster, |
520 | agbno += nbcluster) { | 520 | agbno += nbcluster) { |
521 | if (XFS_INOBT_MASKN(chunkidx, | 521 | if (XFS_INOBT_MASKN(chunkidx, |
522 | nicluster) & ~gfree) | 522 | nicluster) & ~gfree) |
523 | xfs_btree_reada_bufs(mp, agno, | 523 | xfs_btree_reada_bufs(mp, agno, |
524 | agbno, nbcluster); | 524 | agbno, nbcluster); |
525 | } | 525 | } |
526 | irbp->ir_startino = gino; | 526 | irbp->ir_startino = gino; |
527 | irbp->ir_freecount = gcnt; | 527 | irbp->ir_freecount = gcnt; |
528 | irbp->ir_free = gfree; | 528 | irbp->ir_free = gfree; |
529 | irbp++; | 529 | irbp++; |
530 | icount += XFS_INODES_PER_CHUNK - gcnt; | 530 | icount += XFS_INODES_PER_CHUNK - gcnt; |
531 | } | 531 | } |
532 | /* | 532 | /* |
533 | * Set agino to after this chunk and bump the cursor. | 533 | * Set agino to after this chunk and bump the cursor. |
534 | */ | 534 | */ |
535 | agino = gino + XFS_INODES_PER_CHUNK; | 535 | agino = gino + XFS_INODES_PER_CHUNK; |
536 | error = xfs_btree_increment(cur, 0, &tmp); | 536 | error = xfs_btree_increment(cur, 0, &tmp); |
537 | cond_resched(); | 537 | cond_resched(); |
538 | } | 538 | } |
539 | /* | 539 | /* |
540 | * Drop the btree buffers and the agi buffer. | 540 | * Drop the btree buffers and the agi buffer. |
541 | * We can't hold any of the locks these represent | 541 | * We can't hold any of the locks these represent |
542 | * when calling iget. | 542 | * when calling iget. |
543 | */ | 543 | */ |
544 | xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); | 544 | xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); |
545 | xfs_buf_relse(agbp); | 545 | xfs_buf_relse(agbp); |
546 | /* | 546 | /* |
547 | * Now format all the good inodes into the user's buffer. | 547 | * Now format all the good inodes into the user's buffer. |
548 | */ | 548 | */ |
549 | irbufend = irbp; | 549 | irbufend = irbp; |
550 | for (irbp = irbuf; | 550 | for (irbp = irbuf; |
551 | irbp < irbufend && XFS_BULKSTAT_UBLEFT(ubleft); irbp++) { | 551 | irbp < irbufend && XFS_BULKSTAT_UBLEFT(ubleft); irbp++) { |
552 | /* | 552 | /* |
553 | * Now process this chunk of inodes. | 553 | * Now process this chunk of inodes. |
554 | */ | 554 | */ |
555 | for (agino = irbp->ir_startino, chunkidx = clustidx = 0; | 555 | for (agino = irbp->ir_startino, chunkidx = clustidx = 0; |
556 | XFS_BULKSTAT_UBLEFT(ubleft) && | 556 | XFS_BULKSTAT_UBLEFT(ubleft) && |
557 | irbp->ir_freecount < XFS_INODES_PER_CHUNK; | 557 | irbp->ir_freecount < XFS_INODES_PER_CHUNK; |
558 | chunkidx++, clustidx++, agino++) { | 558 | chunkidx++, clustidx++, agino++) { |
559 | ASSERT(chunkidx < XFS_INODES_PER_CHUNK); | 559 | ASSERT(chunkidx < XFS_INODES_PER_CHUNK); |
560 | /* | 560 | /* |
561 | * Recompute agbno if this is the | 561 | * Recompute agbno if this is the |
562 | * first inode of the cluster. | 562 | * first inode of the cluster. |
563 | * | 563 | * |
564 | * Careful with clustidx. There can be | 564 | * Careful with clustidx. There can be |
565 | * multple clusters per chunk, a single | 565 | * multple clusters per chunk, a single |
566 | * cluster per chunk or a cluster that has | 566 | * cluster per chunk or a cluster that has |
567 | * inodes represented from several different | 567 | * inodes represented from several different |
568 | * chunks (if blocksize is large). | 568 | * chunks (if blocksize is large). |
569 | * | 569 | * |
570 | * Because of this, the starting clustidx is | 570 | * Because of this, the starting clustidx is |
571 | * initialized to zero in this loop but must | 571 | * initialized to zero in this loop but must |
572 | * later be reset after reading in the cluster | 572 | * later be reset after reading in the cluster |
573 | * buffer. | 573 | * buffer. |
574 | */ | 574 | */ |
575 | if ((chunkidx & (nicluster - 1)) == 0) { | 575 | if ((chunkidx & (nicluster - 1)) == 0) { |
576 | agbno = XFS_AGINO_TO_AGBNO(mp, | 576 | agbno = XFS_AGINO_TO_AGBNO(mp, |
577 | irbp->ir_startino) + | 577 | irbp->ir_startino) + |
578 | ((chunkidx & nimask) >> | 578 | ((chunkidx & nimask) >> |
579 | mp->m_sb.sb_inopblog); | 579 | mp->m_sb.sb_inopblog); |
580 | 580 | ||
581 | if (flags & (BULKSTAT_FG_QUICK | | 581 | if (flags & (BULKSTAT_FG_QUICK | |
582 | BULKSTAT_FG_INLINE)) { | 582 | BULKSTAT_FG_INLINE)) { |
583 | int offset; | 583 | int offset; |
584 | 584 | ||
585 | ino = XFS_AGINO_TO_INO(mp, agno, | 585 | ino = XFS_AGINO_TO_INO(mp, agno, |
586 | agino); | 586 | agino); |
587 | bno = XFS_AGB_TO_DADDR(mp, agno, | 587 | bno = XFS_AGB_TO_DADDR(mp, agno, |
588 | agbno); | 588 | agbno); |
589 | 589 | ||
590 | /* | 590 | /* |
591 | * Get the inode cluster buffer | 591 | * Get the inode cluster buffer |
592 | */ | 592 | */ |
593 | if (bp) | 593 | if (bp) |
594 | xfs_buf_relse(bp); | 594 | xfs_buf_relse(bp); |
595 | 595 | ||
596 | error = xfs_inotobp(mp, NULL, ino, &dip, | 596 | error = xfs_inotobp(mp, NULL, ino, &dip, |
597 | &bp, &offset, | 597 | &bp, &offset, |
598 | XFS_IMAP_BULKSTAT); | 598 | XFS_IMAP_BULKSTAT); |
599 | 599 | ||
600 | if (!error) | 600 | if (!error) |
601 | clustidx = offset / mp->m_sb.sb_inodesize; | 601 | clustidx = offset / mp->m_sb.sb_inodesize; |
602 | if (XFS_TEST_ERROR(error != 0, | 602 | if (XFS_TEST_ERROR(error != 0, |
603 | mp, XFS_ERRTAG_BULKSTAT_READ_CHUNK, | 603 | mp, XFS_ERRTAG_BULKSTAT_READ_CHUNK, |
604 | XFS_RANDOM_BULKSTAT_READ_CHUNK)) { | 604 | XFS_RANDOM_BULKSTAT_READ_CHUNK)) { |
605 | bp = NULL; | 605 | bp = NULL; |
606 | ubleft = 0; | 606 | ubleft = 0; |
607 | rval = error; | 607 | rval = error; |
608 | break; | 608 | break; |
609 | } | 609 | } |
610 | } | 610 | } |
611 | } | 611 | } |
612 | ino = XFS_AGINO_TO_INO(mp, agno, agino); | 612 | ino = XFS_AGINO_TO_INO(mp, agno, agino); |
613 | bno = XFS_AGB_TO_DADDR(mp, agno, agbno); | 613 | bno = XFS_AGB_TO_DADDR(mp, agno, agbno); |
614 | /* | 614 | /* |
615 | * Skip if this inode is free. | 615 | * Skip if this inode is free. |
616 | */ | 616 | */ |
617 | if (XFS_INOBT_MASK(chunkidx) & irbp->ir_free) { | 617 | if (XFS_INOBT_MASK(chunkidx) & irbp->ir_free) { |
618 | lastino = ino; | 618 | lastino = ino; |
619 | continue; | 619 | continue; |
620 | } | 620 | } |
621 | /* | 621 | /* |
622 | * Count used inodes as free so we can tell | 622 | * Count used inodes as free so we can tell |
623 | * when the chunk is used up. | 623 | * when the chunk is used up. |
624 | */ | 624 | */ |
625 | irbp->ir_freecount++; | 625 | irbp->ir_freecount++; |
626 | if (!xfs_bulkstat_use_dinode(mp, flags, bp, | 626 | if (!xfs_bulkstat_use_dinode(mp, flags, bp, |
627 | clustidx, &dip)) { | 627 | clustidx, &dip)) { |
628 | lastino = ino; | 628 | lastino = ino; |
629 | continue; | 629 | continue; |
630 | } | 630 | } |
631 | /* | 631 | /* |
632 | * If we need to do an iget, cannot hold bp. | 632 | * If we need to do an iget, cannot hold bp. |
633 | * Drop it, until starting the next cluster. | 633 | * Drop it, until starting the next cluster. |
634 | */ | 634 | */ |
635 | if ((flags & BULKSTAT_FG_INLINE) && !dip) { | 635 | if ((flags & BULKSTAT_FG_INLINE) && !dip) { |
636 | if (bp) | 636 | if (bp) |
637 | xfs_buf_relse(bp); | 637 | xfs_buf_relse(bp); |
638 | bp = NULL; | 638 | bp = NULL; |
639 | } | 639 | } |
640 | 640 | ||
641 | /* | 641 | /* |
642 | * Get the inode and fill in a single buffer. | 642 | * Get the inode and fill in a single buffer. |
643 | * BULKSTAT_FG_QUICK uses dip to fill it in. | 643 | * BULKSTAT_FG_QUICK uses dip to fill it in. |
644 | * BULKSTAT_FG_IGET uses igets. | 644 | * BULKSTAT_FG_IGET uses igets. |
645 | * BULKSTAT_FG_INLINE uses dip if we have an | 645 | * BULKSTAT_FG_INLINE uses dip if we have an |
646 | * inline attr fork, else igets. | 646 | * inline attr fork, else igets. |
647 | * See: xfs_bulkstat_one & xfs_dm_bulkstat_one. | 647 | * See: xfs_bulkstat_one & xfs_dm_bulkstat_one. |
648 | * This is also used to count inodes/blks, etc | 648 | * This is also used to count inodes/blks, etc |
649 | * in xfs_qm_quotacheck. | 649 | * in xfs_qm_quotacheck. |
650 | */ | 650 | */ |
651 | ubused = statstruct_size; | 651 | ubused = statstruct_size; |
652 | error = formatter(mp, ino, ubufp, | 652 | error = formatter(mp, ino, ubufp, |
653 | ubleft, private_data, | 653 | ubleft, private_data, |
654 | bno, &ubused, dip, &fmterror); | 654 | bno, &ubused, dip, &fmterror); |
655 | if (fmterror == BULKSTAT_RV_NOTHING) { | 655 | if (fmterror == BULKSTAT_RV_NOTHING) { |
656 | if (error && error != ENOENT && | 656 | if (error && error != ENOENT && |
657 | error != EINVAL) { | 657 | error != EINVAL) { |
658 | ubleft = 0; | 658 | ubleft = 0; |
659 | rval = error; | 659 | rval = error; |
660 | break; | 660 | break; |
661 | } | 661 | } |
662 | lastino = ino; | 662 | lastino = ino; |
663 | continue; | 663 | continue; |
664 | } | 664 | } |
665 | if (fmterror == BULKSTAT_RV_GIVEUP) { | 665 | if (fmterror == BULKSTAT_RV_GIVEUP) { |
666 | ubleft = 0; | 666 | ubleft = 0; |
667 | ASSERT(error); | 667 | ASSERT(error); |
668 | rval = error; | 668 | rval = error; |
669 | break; | 669 | break; |
670 | } | 670 | } |
671 | if (ubufp) | 671 | if (ubufp) |
672 | ubufp += ubused; | 672 | ubufp += ubused; |
673 | ubleft -= ubused; | 673 | ubleft -= ubused; |
674 | ubelem++; | 674 | ubelem++; |
675 | lastino = ino; | 675 | lastino = ino; |
676 | } | 676 | } |
677 | 677 | ||
678 | cond_resched(); | 678 | cond_resched(); |
679 | } | 679 | } |
680 | 680 | ||
681 | if (bp) | 681 | if (bp) |
682 | xfs_buf_relse(bp); | 682 | xfs_buf_relse(bp); |
683 | 683 | ||
684 | /* | 684 | /* |
685 | * Set up for the next loop iteration. | 685 | * Set up for the next loop iteration. |
686 | */ | 686 | */ |
687 | if (XFS_BULKSTAT_UBLEFT(ubleft)) { | 687 | if (XFS_BULKSTAT_UBLEFT(ubleft)) { |
688 | if (end_of_ag) { | 688 | if (end_of_ag) { |
689 | agno++; | 689 | agno++; |
690 | agino = 0; | 690 | agino = 0; |
691 | } else | 691 | } else |
692 | agino = XFS_INO_TO_AGINO(mp, lastino); | 692 | agino = XFS_INO_TO_AGINO(mp, lastino); |
693 | } else | 693 | } else |
694 | break; | 694 | break; |
695 | } | 695 | } |
696 | /* | 696 | /* |
697 | * Done, we're either out of filesystem or space to put the data. | 697 | * Done, we're either out of filesystem or space to put the data. |
698 | */ | 698 | */ |
699 | kmem_free(irbuf); | 699 | kmem_free(irbuf); |
700 | *ubcountp = ubelem; | 700 | *ubcountp = ubelem; |
701 | /* | 701 | /* |
702 | * Found some inodes, return them now and return the error next time. | 702 | * Found some inodes, return them now and return the error next time. |
703 | */ | 703 | */ |
704 | if (ubelem) | 704 | if (ubelem) |
705 | rval = 0; | 705 | rval = 0; |
706 | if (agno >= mp->m_sb.sb_agcount) { | 706 | if (agno >= mp->m_sb.sb_agcount) { |
707 | /* | 707 | /* |
708 | * If we ran out of filesystem, mark lastino as off | 708 | * If we ran out of filesystem, mark lastino as off |
709 | * the end of the filesystem, so the next call | 709 | * the end of the filesystem, so the next call |
710 | * will return immediately. | 710 | * will return immediately. |
711 | */ | 711 | */ |
712 | *lastinop = (xfs_ino_t)XFS_AGINO_TO_INO(mp, agno, 0); | 712 | *lastinop = (xfs_ino_t)XFS_AGINO_TO_INO(mp, agno, 0); |
713 | *done = 1; | 713 | *done = 1; |
714 | } else | 714 | } else |
715 | *lastinop = (xfs_ino_t)lastino; | 715 | *lastinop = (xfs_ino_t)lastino; |
716 | 716 | ||
717 | return rval; | 717 | return rval; |
718 | } | 718 | } |
719 | 719 | ||
720 | /* | 720 | /* |
721 | * Return stat information in bulk (by-inode) for the filesystem. | 721 | * Return stat information in bulk (by-inode) for the filesystem. |
722 | * Special case for non-sequential one inode bulkstat. | 722 | * Special case for non-sequential one inode bulkstat. |
723 | */ | 723 | */ |
724 | int /* error status */ | 724 | int /* error status */ |
725 | xfs_bulkstat_single( | 725 | xfs_bulkstat_single( |
726 | xfs_mount_t *mp, /* mount point for filesystem */ | 726 | xfs_mount_t *mp, /* mount point for filesystem */ |
727 | xfs_ino_t *lastinop, /* inode to return */ | 727 | xfs_ino_t *lastinop, /* inode to return */ |
728 | char __user *buffer, /* buffer with inode stats */ | 728 | char __user *buffer, /* buffer with inode stats */ |
729 | int *done) /* 1 if there are more stats to get */ | 729 | int *done) /* 1 if there are more stats to get */ |
730 | { | 730 | { |
731 | int count; /* count value for bulkstat call */ | 731 | int count; /* count value for bulkstat call */ |
732 | int error; /* return value */ | 732 | int error; /* return value */ |
733 | xfs_ino_t ino; /* filesystem inode number */ | 733 | xfs_ino_t ino; /* filesystem inode number */ |
734 | int res; /* result from bs1 */ | 734 | int res; /* result from bs1 */ |
735 | 735 | ||
736 | /* | 736 | /* |
737 | * note that requesting valid inode numbers which are not allocated | 737 | * note that requesting valid inode numbers which are not allocated |
738 | * to inodes will most likely cause xfs_itobp to generate warning | 738 | * to inodes will most likely cause xfs_itobp to generate warning |
739 | * messages about bad magic numbers. This is ok. The fact that | 739 | * messages about bad magic numbers. This is ok. The fact that |
740 | * the inode isn't actually an inode is handled by the | 740 | * the inode isn't actually an inode is handled by the |
741 | * error check below. Done this way to make the usual case faster | 741 | * error check below. Done this way to make the usual case faster |
742 | * at the expense of the error case. | 742 | * at the expense of the error case. |
743 | */ | 743 | */ |
744 | 744 | ||
745 | ino = (xfs_ino_t)*lastinop; | 745 | ino = (xfs_ino_t)*lastinop; |
746 | error = xfs_bulkstat_one(mp, ino, buffer, sizeof(xfs_bstat_t), | 746 | error = xfs_bulkstat_one(mp, ino, buffer, sizeof(xfs_bstat_t), |
747 | NULL, 0, NULL, NULL, &res); | 747 | NULL, 0, NULL, NULL, &res); |
748 | if (error) { | 748 | if (error) { |
749 | /* | 749 | /* |
750 | * Special case way failed, do it the "long" way | 750 | * Special case way failed, do it the "long" way |
751 | * to see if that works. | 751 | * to see if that works. |
752 | */ | 752 | */ |
753 | (*lastinop)--; | 753 | (*lastinop)--; |
754 | count = 1; | 754 | count = 1; |
755 | if (xfs_bulkstat(mp, lastinop, &count, xfs_bulkstat_one, | 755 | if (xfs_bulkstat(mp, lastinop, &count, xfs_bulkstat_one, |
756 | NULL, sizeof(xfs_bstat_t), buffer, | 756 | NULL, sizeof(xfs_bstat_t), buffer, |
757 | BULKSTAT_FG_IGET, done)) | 757 | BULKSTAT_FG_IGET, done)) |
758 | return error; | 758 | return error; |
759 | if (count == 0 || (xfs_ino_t)*lastinop != ino) | 759 | if (count == 0 || (xfs_ino_t)*lastinop != ino) |
760 | return error == EFSCORRUPTED ? | 760 | return error == EFSCORRUPTED ? |
761 | XFS_ERROR(EINVAL) : error; | 761 | XFS_ERROR(EINVAL) : error; |
762 | else | 762 | else |
763 | return 0; | 763 | return 0; |
764 | } | 764 | } |
765 | *done = 0; | 765 | *done = 0; |
766 | return 0; | 766 | return 0; |
767 | } | 767 | } |
768 | 768 | ||
769 | int | 769 | int |
770 | xfs_inumbers_fmt( | 770 | xfs_inumbers_fmt( |
771 | void __user *ubuffer, /* buffer to write to */ | 771 | void __user *ubuffer, /* buffer to write to */ |
772 | const xfs_inogrp_t *buffer, /* buffer to read from */ | 772 | const xfs_inogrp_t *buffer, /* buffer to read from */ |
773 | long count, /* # of elements to read */ | 773 | long count, /* # of elements to read */ |
774 | long *written) /* # of bytes written */ | 774 | long *written) /* # of bytes written */ |
775 | { | 775 | { |
776 | if (copy_to_user(ubuffer, buffer, count * sizeof(*buffer))) | 776 | if (copy_to_user(ubuffer, buffer, count * sizeof(*buffer))) |
777 | return -EFAULT; | 777 | return -EFAULT; |
778 | *written = count * sizeof(*buffer); | 778 | *written = count * sizeof(*buffer); |
779 | return 0; | 779 | return 0; |
780 | } | 780 | } |
781 | 781 | ||
782 | /* | 782 | /* |
783 | * Return inode number table for the filesystem. | 783 | * Return inode number table for the filesystem. |
784 | */ | 784 | */ |
785 | int /* error status */ | 785 | int /* error status */ |
786 | xfs_inumbers( | 786 | xfs_inumbers( |
787 | xfs_mount_t *mp, /* mount point for filesystem */ | 787 | xfs_mount_t *mp, /* mount point for filesystem */ |
788 | xfs_ino_t *lastino, /* last inode returned */ | 788 | xfs_ino_t *lastino, /* last inode returned */ |
789 | int *count, /* size of buffer/count returned */ | 789 | int *count, /* size of buffer/count returned */ |
790 | void __user *ubuffer,/* buffer with inode descriptions */ | 790 | void __user *ubuffer,/* buffer with inode descriptions */ |
791 | inumbers_fmt_pf formatter) | 791 | inumbers_fmt_pf formatter) |
792 | { | 792 | { |
793 | xfs_buf_t *agbp; | 793 | xfs_buf_t *agbp; |
794 | xfs_agino_t agino; | 794 | xfs_agino_t agino; |
795 | xfs_agnumber_t agno; | 795 | xfs_agnumber_t agno; |
796 | int bcount; | 796 | int bcount; |
797 | xfs_inogrp_t *buffer; | 797 | xfs_inogrp_t *buffer; |
798 | int bufidx; | 798 | int bufidx; |
799 | xfs_btree_cur_t *cur; | 799 | xfs_btree_cur_t *cur; |
800 | int error; | 800 | int error; |
801 | __int32_t gcnt; | 801 | __int32_t gcnt; |
802 | xfs_inofree_t gfree; | 802 | xfs_inofree_t gfree; |
803 | xfs_agino_t gino; | 803 | xfs_agino_t gino; |
804 | int i; | 804 | int i; |
805 | xfs_ino_t ino; | 805 | xfs_ino_t ino; |
806 | int left; | 806 | int left; |
807 | int tmp; | 807 | int tmp; |
808 | 808 | ||
809 | ino = (xfs_ino_t)*lastino; | 809 | ino = (xfs_ino_t)*lastino; |
810 | agno = XFS_INO_TO_AGNO(mp, ino); | 810 | agno = XFS_INO_TO_AGNO(mp, ino); |
811 | agino = XFS_INO_TO_AGINO(mp, ino); | 811 | agino = XFS_INO_TO_AGINO(mp, ino); |
812 | left = *count; | 812 | left = *count; |
813 | *count = 0; | 813 | *count = 0; |
814 | bcount = MIN(left, (int)(PAGE_SIZE / sizeof(*buffer))); | 814 | bcount = MIN(left, (int)(PAGE_SIZE / sizeof(*buffer))); |
815 | buffer = kmem_alloc(bcount * sizeof(*buffer), KM_SLEEP); | 815 | buffer = kmem_alloc(bcount * sizeof(*buffer), KM_SLEEP); |
816 | error = bufidx = 0; | 816 | error = bufidx = 0; |
817 | cur = NULL; | 817 | cur = NULL; |
818 | agbp = NULL; | 818 | agbp = NULL; |
819 | while (left > 0 && agno < mp->m_sb.sb_agcount) { | 819 | while (left > 0 && agno < mp->m_sb.sb_agcount) { |
820 | if (agbp == NULL) { | 820 | if (agbp == NULL) { |
821 | down_read(&mp->m_peraglock); | 821 | down_read(&mp->m_peraglock); |
822 | error = xfs_ialloc_read_agi(mp, NULL, agno, &agbp); | 822 | error = xfs_ialloc_read_agi(mp, NULL, agno, &agbp); |
823 | up_read(&mp->m_peraglock); | 823 | up_read(&mp->m_peraglock); |
824 | if (error) { | 824 | if (error) { |
825 | /* | 825 | /* |
826 | * If we can't read the AGI of this ag, | 826 | * If we can't read the AGI of this ag, |
827 | * then just skip to the next one. | 827 | * then just skip to the next one. |
828 | */ | 828 | */ |
829 | ASSERT(cur == NULL); | 829 | ASSERT(cur == NULL); |
830 | agbp = NULL; | 830 | agbp = NULL; |
831 | agno++; | 831 | agno++; |
832 | agino = 0; | 832 | agino = 0; |
833 | continue; | 833 | continue; |
834 | } | 834 | } |
835 | cur = xfs_inobt_init_cursor(mp, NULL, agbp, agno); | 835 | cur = xfs_inobt_init_cursor(mp, NULL, agbp, agno); |
836 | error = xfs_inobt_lookup_ge(cur, agino, 0, 0, &tmp); | 836 | error = xfs_inobt_lookup_ge(cur, agino, 0, 0, &tmp); |
837 | if (error) { | 837 | if (error) { |
838 | xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); | 838 | xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); |
839 | cur = NULL; | 839 | cur = NULL; |
840 | xfs_buf_relse(agbp); | 840 | xfs_buf_relse(agbp); |
841 | agbp = NULL; | 841 | agbp = NULL; |
842 | /* | 842 | /* |
843 | * Move up the last inode in the current | 843 | * Move up the last inode in the current |
844 | * chunk. The lookup_ge will always get | 844 | * chunk. The lookup_ge will always get |
845 | * us the first inode in the next chunk. | 845 | * us the first inode in the next chunk. |
846 | */ | 846 | */ |
847 | agino += XFS_INODES_PER_CHUNK - 1; | 847 | agino += XFS_INODES_PER_CHUNK - 1; |
848 | continue; | 848 | continue; |
849 | } | 849 | } |
850 | } | 850 | } |
851 | if ((error = xfs_inobt_get_rec(cur, &gino, &gcnt, &gfree, | 851 | if ((error = xfs_inobt_get_rec(cur, &gino, &gcnt, &gfree, |
852 | &i)) || | 852 | &i)) || |
853 | i == 0) { | 853 | i == 0) { |
854 | xfs_buf_relse(agbp); | 854 | xfs_buf_relse(agbp); |
855 | agbp = NULL; | 855 | agbp = NULL; |
856 | xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); | 856 | xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); |
857 | cur = NULL; | 857 | cur = NULL; |
858 | agno++; | 858 | agno++; |
859 | agino = 0; | 859 | agino = 0; |
860 | continue; | 860 | continue; |
861 | } | 861 | } |
862 | agino = gino + XFS_INODES_PER_CHUNK - 1; | 862 | agino = gino + XFS_INODES_PER_CHUNK - 1; |
863 | buffer[bufidx].xi_startino = XFS_AGINO_TO_INO(mp, agno, gino); | 863 | buffer[bufidx].xi_startino = XFS_AGINO_TO_INO(mp, agno, gino); |
864 | buffer[bufidx].xi_alloccount = XFS_INODES_PER_CHUNK - gcnt; | 864 | buffer[bufidx].xi_alloccount = XFS_INODES_PER_CHUNK - gcnt; |
865 | buffer[bufidx].xi_allocmask = ~gfree; | 865 | buffer[bufidx].xi_allocmask = ~gfree; |
866 | bufidx++; | 866 | bufidx++; |
867 | left--; | 867 | left--; |
868 | if (bufidx == bcount) { | 868 | if (bufidx == bcount) { |
869 | long written; | 869 | long written; |
870 | if (formatter(ubuffer, buffer, bufidx, &written)) { | 870 | if (formatter(ubuffer, buffer, bufidx, &written)) { |
871 | error = XFS_ERROR(EFAULT); | 871 | error = XFS_ERROR(EFAULT); |
872 | break; | 872 | break; |
873 | } | 873 | } |
874 | ubuffer += written; | 874 | ubuffer += written; |
875 | *count += bufidx; | 875 | *count += bufidx; |
876 | bufidx = 0; | 876 | bufidx = 0; |
877 | } | 877 | } |
878 | if (left) { | 878 | if (left) { |
879 | error = xfs_btree_increment(cur, 0, &tmp); | 879 | error = xfs_btree_increment(cur, 0, &tmp); |
880 | if (error) { | 880 | if (error) { |
881 | xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); | 881 | xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); |
882 | cur = NULL; | 882 | cur = NULL; |
883 | xfs_buf_relse(agbp); | 883 | xfs_buf_relse(agbp); |
884 | agbp = NULL; | 884 | agbp = NULL; |
885 | /* | 885 | /* |
886 | * The agino value has already been bumped. | 886 | * The agino value has already been bumped. |
887 | * Just try to skip up to it. | 887 | * Just try to skip up to it. |
888 | */ | 888 | */ |
889 | agino += XFS_INODES_PER_CHUNK; | 889 | agino += XFS_INODES_PER_CHUNK; |
890 | continue; | 890 | continue; |
891 | } | 891 | } |
892 | } | 892 | } |
893 | } | 893 | } |
894 | if (!error) { | 894 | if (!error) { |
895 | if (bufidx) { | 895 | if (bufidx) { |
896 | long written; | 896 | long written; |
897 | if (formatter(ubuffer, buffer, bufidx, &written)) | 897 | if (formatter(ubuffer, buffer, bufidx, &written)) |
898 | error = XFS_ERROR(EFAULT); | 898 | error = XFS_ERROR(EFAULT); |
899 | else | 899 | else |
900 | *count += bufidx; | 900 | *count += bufidx; |
901 | } | 901 | } |
902 | *lastino = XFS_AGINO_TO_INO(mp, agno, agino); | 902 | *lastino = XFS_AGINO_TO_INO(mp, agno, agino); |
903 | } | 903 | } |
904 | kmem_free(buffer); | 904 | kmem_free(buffer); |
905 | if (cur) | 905 | if (cur) |
906 | xfs_btree_del_cursor(cur, (error ? XFS_BTREE_ERROR : | 906 | xfs_btree_del_cursor(cur, (error ? XFS_BTREE_ERROR : |
907 | XFS_BTREE_NOERROR)); | 907 | XFS_BTREE_NOERROR)); |
908 | if (agbp) | 908 | if (agbp) |
909 | xfs_buf_relse(agbp); | 909 | xfs_buf_relse(agbp); |
910 | return error; | 910 | return error; |
911 | } | 911 | } |
912 | 912 |
fs/xfs/xfs_log_recover.c
1 | /* | 1 | /* |
2 | * Copyright (c) 2000-2006 Silicon Graphics, Inc. | 2 | * Copyright (c) 2000-2006 Silicon Graphics, Inc. |
3 | * All Rights Reserved. | 3 | * All Rights Reserved. |
4 | * | 4 | * |
5 | * This program is free software; you can redistribute it and/or | 5 | * This program is free software; you can redistribute it and/or |
6 | * modify it under the terms of the GNU General Public License as | 6 | * modify it under the terms of the GNU General Public License as |
7 | * published by the Free Software Foundation. | 7 | * published by the Free Software Foundation. |
8 | * | 8 | * |
9 | * This program is distributed in the hope that it would be useful, | 9 | * This program is distributed in the hope that it would be useful, |
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
12 | * GNU General Public License for more details. | 12 | * GNU General Public License for more details. |
13 | * | 13 | * |
14 | * You should have received a copy of the GNU General Public License | 14 | * You should have received a copy of the GNU General Public License |
15 | * along with this program; if not, write the Free Software Foundation, | 15 | * along with this program; if not, write the Free Software Foundation, |
16 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | 16 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA |
17 | */ | 17 | */ |
18 | #include "xfs.h" | 18 | #include "xfs.h" |
19 | #include "xfs_fs.h" | 19 | #include "xfs_fs.h" |
20 | #include "xfs_types.h" | 20 | #include "xfs_types.h" |
21 | #include "xfs_bit.h" | 21 | #include "xfs_bit.h" |
22 | #include "xfs_log.h" | 22 | #include "xfs_log.h" |
23 | #include "xfs_inum.h" | 23 | #include "xfs_inum.h" |
24 | #include "xfs_trans.h" | 24 | #include "xfs_trans.h" |
25 | #include "xfs_sb.h" | 25 | #include "xfs_sb.h" |
26 | #include "xfs_ag.h" | 26 | #include "xfs_ag.h" |
27 | #include "xfs_dir2.h" | 27 | #include "xfs_dir2.h" |
28 | #include "xfs_dmapi.h" | 28 | #include "xfs_dmapi.h" |
29 | #include "xfs_mount.h" | 29 | #include "xfs_mount.h" |
30 | #include "xfs_error.h" | 30 | #include "xfs_error.h" |
31 | #include "xfs_bmap_btree.h" | 31 | #include "xfs_bmap_btree.h" |
32 | #include "xfs_alloc_btree.h" | 32 | #include "xfs_alloc_btree.h" |
33 | #include "xfs_ialloc_btree.h" | 33 | #include "xfs_ialloc_btree.h" |
34 | #include "xfs_dir2_sf.h" | 34 | #include "xfs_dir2_sf.h" |
35 | #include "xfs_attr_sf.h" | 35 | #include "xfs_attr_sf.h" |
36 | #include "xfs_dinode.h" | 36 | #include "xfs_dinode.h" |
37 | #include "xfs_inode.h" | 37 | #include "xfs_inode.h" |
38 | #include "xfs_inode_item.h" | 38 | #include "xfs_inode_item.h" |
39 | #include "xfs_imap.h" | ||
40 | #include "xfs_alloc.h" | 39 | #include "xfs_alloc.h" |
41 | #include "xfs_ialloc.h" | 40 | #include "xfs_ialloc.h" |
42 | #include "xfs_log_priv.h" | 41 | #include "xfs_log_priv.h" |
43 | #include "xfs_buf_item.h" | 42 | #include "xfs_buf_item.h" |
44 | #include "xfs_log_recover.h" | 43 | #include "xfs_log_recover.h" |
45 | #include "xfs_extfree_item.h" | 44 | #include "xfs_extfree_item.h" |
46 | #include "xfs_trans_priv.h" | 45 | #include "xfs_trans_priv.h" |
47 | #include "xfs_quota.h" | 46 | #include "xfs_quota.h" |
48 | #include "xfs_rw.h" | 47 | #include "xfs_rw.h" |
49 | #include "xfs_utils.h" | 48 | #include "xfs_utils.h" |
50 | 49 | ||
51 | STATIC int xlog_find_zeroed(xlog_t *, xfs_daddr_t *); | 50 | STATIC int xlog_find_zeroed(xlog_t *, xfs_daddr_t *); |
52 | STATIC int xlog_clear_stale_blocks(xlog_t *, xfs_lsn_t); | 51 | STATIC int xlog_clear_stale_blocks(xlog_t *, xfs_lsn_t); |
53 | STATIC void xlog_recover_insert_item_backq(xlog_recover_item_t **q, | 52 | STATIC void xlog_recover_insert_item_backq(xlog_recover_item_t **q, |
54 | xlog_recover_item_t *item); | 53 | xlog_recover_item_t *item); |
55 | #if defined(DEBUG) | 54 | #if defined(DEBUG) |
56 | STATIC void xlog_recover_check_summary(xlog_t *); | 55 | STATIC void xlog_recover_check_summary(xlog_t *); |
57 | #else | 56 | #else |
58 | #define xlog_recover_check_summary(log) | 57 | #define xlog_recover_check_summary(log) |
59 | #endif | 58 | #endif |
60 | 59 | ||
61 | 60 | ||
62 | /* | 61 | /* |
63 | * Sector aligned buffer routines for buffer create/read/write/access | 62 | * Sector aligned buffer routines for buffer create/read/write/access |
64 | */ | 63 | */ |
65 | 64 | ||
66 | #define XLOG_SECTOR_ROUNDUP_BBCOUNT(log, bbs) \ | 65 | #define XLOG_SECTOR_ROUNDUP_BBCOUNT(log, bbs) \ |
67 | ( ((log)->l_sectbb_mask && (bbs & (log)->l_sectbb_mask)) ? \ | 66 | ( ((log)->l_sectbb_mask && (bbs & (log)->l_sectbb_mask)) ? \ |
68 | ((bbs + (log)->l_sectbb_mask + 1) & ~(log)->l_sectbb_mask) : (bbs) ) | 67 | ((bbs + (log)->l_sectbb_mask + 1) & ~(log)->l_sectbb_mask) : (bbs) ) |
69 | #define XLOG_SECTOR_ROUNDDOWN_BLKNO(log, bno) ((bno) & ~(log)->l_sectbb_mask) | 68 | #define XLOG_SECTOR_ROUNDDOWN_BLKNO(log, bno) ((bno) & ~(log)->l_sectbb_mask) |
70 | 69 | ||
71 | xfs_buf_t * | 70 | xfs_buf_t * |
72 | xlog_get_bp( | 71 | xlog_get_bp( |
73 | xlog_t *log, | 72 | xlog_t *log, |
74 | int num_bblks) | 73 | int num_bblks) |
75 | { | 74 | { |
76 | ASSERT(num_bblks > 0); | 75 | ASSERT(num_bblks > 0); |
77 | 76 | ||
78 | if (log->l_sectbb_log) { | 77 | if (log->l_sectbb_log) { |
79 | if (num_bblks > 1) | 78 | if (num_bblks > 1) |
80 | num_bblks += XLOG_SECTOR_ROUNDUP_BBCOUNT(log, 1); | 79 | num_bblks += XLOG_SECTOR_ROUNDUP_BBCOUNT(log, 1); |
81 | num_bblks = XLOG_SECTOR_ROUNDUP_BBCOUNT(log, num_bblks); | 80 | num_bblks = XLOG_SECTOR_ROUNDUP_BBCOUNT(log, num_bblks); |
82 | } | 81 | } |
83 | return xfs_buf_get_noaddr(BBTOB(num_bblks), log->l_mp->m_logdev_targp); | 82 | return xfs_buf_get_noaddr(BBTOB(num_bblks), log->l_mp->m_logdev_targp); |
84 | } | 83 | } |
85 | 84 | ||
86 | void | 85 | void |
87 | xlog_put_bp( | 86 | xlog_put_bp( |
88 | xfs_buf_t *bp) | 87 | xfs_buf_t *bp) |
89 | { | 88 | { |
90 | xfs_buf_free(bp); | 89 | xfs_buf_free(bp); |
91 | } | 90 | } |
92 | 91 | ||
93 | 92 | ||
94 | /* | 93 | /* |
95 | * nbblks should be uint, but oh well. Just want to catch that 32-bit length. | 94 | * nbblks should be uint, but oh well. Just want to catch that 32-bit length. |
96 | */ | 95 | */ |
97 | int | 96 | int |
98 | xlog_bread( | 97 | xlog_bread( |
99 | xlog_t *log, | 98 | xlog_t *log, |
100 | xfs_daddr_t blk_no, | 99 | xfs_daddr_t blk_no, |
101 | int nbblks, | 100 | int nbblks, |
102 | xfs_buf_t *bp) | 101 | xfs_buf_t *bp) |
103 | { | 102 | { |
104 | int error; | 103 | int error; |
105 | 104 | ||
106 | if (log->l_sectbb_log) { | 105 | if (log->l_sectbb_log) { |
107 | blk_no = XLOG_SECTOR_ROUNDDOWN_BLKNO(log, blk_no); | 106 | blk_no = XLOG_SECTOR_ROUNDDOWN_BLKNO(log, blk_no); |
108 | nbblks = XLOG_SECTOR_ROUNDUP_BBCOUNT(log, nbblks); | 107 | nbblks = XLOG_SECTOR_ROUNDUP_BBCOUNT(log, nbblks); |
109 | } | 108 | } |
110 | 109 | ||
111 | ASSERT(nbblks > 0); | 110 | ASSERT(nbblks > 0); |
112 | ASSERT(BBTOB(nbblks) <= XFS_BUF_SIZE(bp)); | 111 | ASSERT(BBTOB(nbblks) <= XFS_BUF_SIZE(bp)); |
113 | ASSERT(bp); | 112 | ASSERT(bp); |
114 | 113 | ||
115 | XFS_BUF_SET_ADDR(bp, log->l_logBBstart + blk_no); | 114 | XFS_BUF_SET_ADDR(bp, log->l_logBBstart + blk_no); |
116 | XFS_BUF_READ(bp); | 115 | XFS_BUF_READ(bp); |
117 | XFS_BUF_BUSY(bp); | 116 | XFS_BUF_BUSY(bp); |
118 | XFS_BUF_SET_COUNT(bp, BBTOB(nbblks)); | 117 | XFS_BUF_SET_COUNT(bp, BBTOB(nbblks)); |
119 | XFS_BUF_SET_TARGET(bp, log->l_mp->m_logdev_targp); | 118 | XFS_BUF_SET_TARGET(bp, log->l_mp->m_logdev_targp); |
120 | 119 | ||
121 | xfsbdstrat(log->l_mp, bp); | 120 | xfsbdstrat(log->l_mp, bp); |
122 | error = xfs_iowait(bp); | 121 | error = xfs_iowait(bp); |
123 | if (error) | 122 | if (error) |
124 | xfs_ioerror_alert("xlog_bread", log->l_mp, | 123 | xfs_ioerror_alert("xlog_bread", log->l_mp, |
125 | bp, XFS_BUF_ADDR(bp)); | 124 | bp, XFS_BUF_ADDR(bp)); |
126 | return error; | 125 | return error; |
127 | } | 126 | } |
128 | 127 | ||
129 | /* | 128 | /* |
130 | * Write out the buffer at the given block for the given number of blocks. | 129 | * Write out the buffer at the given block for the given number of blocks. |
131 | * The buffer is kept locked across the write and is returned locked. | 130 | * The buffer is kept locked across the write and is returned locked. |
132 | * This can only be used for synchronous log writes. | 131 | * This can only be used for synchronous log writes. |
133 | */ | 132 | */ |
134 | STATIC int | 133 | STATIC int |
135 | xlog_bwrite( | 134 | xlog_bwrite( |
136 | xlog_t *log, | 135 | xlog_t *log, |
137 | xfs_daddr_t blk_no, | 136 | xfs_daddr_t blk_no, |
138 | int nbblks, | 137 | int nbblks, |
139 | xfs_buf_t *bp) | 138 | xfs_buf_t *bp) |
140 | { | 139 | { |
141 | int error; | 140 | int error; |
142 | 141 | ||
143 | if (log->l_sectbb_log) { | 142 | if (log->l_sectbb_log) { |
144 | blk_no = XLOG_SECTOR_ROUNDDOWN_BLKNO(log, blk_no); | 143 | blk_no = XLOG_SECTOR_ROUNDDOWN_BLKNO(log, blk_no); |
145 | nbblks = XLOG_SECTOR_ROUNDUP_BBCOUNT(log, nbblks); | 144 | nbblks = XLOG_SECTOR_ROUNDUP_BBCOUNT(log, nbblks); |
146 | } | 145 | } |
147 | 146 | ||
148 | ASSERT(nbblks > 0); | 147 | ASSERT(nbblks > 0); |
149 | ASSERT(BBTOB(nbblks) <= XFS_BUF_SIZE(bp)); | 148 | ASSERT(BBTOB(nbblks) <= XFS_BUF_SIZE(bp)); |
150 | 149 | ||
151 | XFS_BUF_SET_ADDR(bp, log->l_logBBstart + blk_no); | 150 | XFS_BUF_SET_ADDR(bp, log->l_logBBstart + blk_no); |
152 | XFS_BUF_ZEROFLAGS(bp); | 151 | XFS_BUF_ZEROFLAGS(bp); |
153 | XFS_BUF_BUSY(bp); | 152 | XFS_BUF_BUSY(bp); |
154 | XFS_BUF_HOLD(bp); | 153 | XFS_BUF_HOLD(bp); |
155 | XFS_BUF_PSEMA(bp, PRIBIO); | 154 | XFS_BUF_PSEMA(bp, PRIBIO); |
156 | XFS_BUF_SET_COUNT(bp, BBTOB(nbblks)); | 155 | XFS_BUF_SET_COUNT(bp, BBTOB(nbblks)); |
157 | XFS_BUF_SET_TARGET(bp, log->l_mp->m_logdev_targp); | 156 | XFS_BUF_SET_TARGET(bp, log->l_mp->m_logdev_targp); |
158 | 157 | ||
159 | if ((error = xfs_bwrite(log->l_mp, bp))) | 158 | if ((error = xfs_bwrite(log->l_mp, bp))) |
160 | xfs_ioerror_alert("xlog_bwrite", log->l_mp, | 159 | xfs_ioerror_alert("xlog_bwrite", log->l_mp, |
161 | bp, XFS_BUF_ADDR(bp)); | 160 | bp, XFS_BUF_ADDR(bp)); |
162 | return error; | 161 | return error; |
163 | } | 162 | } |
164 | 163 | ||
165 | STATIC xfs_caddr_t | 164 | STATIC xfs_caddr_t |
166 | xlog_align( | 165 | xlog_align( |
167 | xlog_t *log, | 166 | xlog_t *log, |
168 | xfs_daddr_t blk_no, | 167 | xfs_daddr_t blk_no, |
169 | int nbblks, | 168 | int nbblks, |
170 | xfs_buf_t *bp) | 169 | xfs_buf_t *bp) |
171 | { | 170 | { |
172 | xfs_caddr_t ptr; | 171 | xfs_caddr_t ptr; |
173 | 172 | ||
174 | if (!log->l_sectbb_log) | 173 | if (!log->l_sectbb_log) |
175 | return XFS_BUF_PTR(bp); | 174 | return XFS_BUF_PTR(bp); |
176 | 175 | ||
177 | ptr = XFS_BUF_PTR(bp) + BBTOB((int)blk_no & log->l_sectbb_mask); | 176 | ptr = XFS_BUF_PTR(bp) + BBTOB((int)blk_no & log->l_sectbb_mask); |
178 | ASSERT(XFS_BUF_SIZE(bp) >= | 177 | ASSERT(XFS_BUF_SIZE(bp) >= |
179 | BBTOB(nbblks + (blk_no & log->l_sectbb_mask))); | 178 | BBTOB(nbblks + (blk_no & log->l_sectbb_mask))); |
180 | return ptr; | 179 | return ptr; |
181 | } | 180 | } |
182 | 181 | ||
183 | #ifdef DEBUG | 182 | #ifdef DEBUG |
184 | /* | 183 | /* |
185 | * dump debug superblock and log record information | 184 | * dump debug superblock and log record information |
186 | */ | 185 | */ |
187 | STATIC void | 186 | STATIC void |
188 | xlog_header_check_dump( | 187 | xlog_header_check_dump( |
189 | xfs_mount_t *mp, | 188 | xfs_mount_t *mp, |
190 | xlog_rec_header_t *head) | 189 | xlog_rec_header_t *head) |
191 | { | 190 | { |
192 | int b; | 191 | int b; |
193 | 192 | ||
194 | cmn_err(CE_DEBUG, "%s: SB : uuid = ", __func__); | 193 | cmn_err(CE_DEBUG, "%s: SB : uuid = ", __func__); |
195 | for (b = 0; b < 16; b++) | 194 | for (b = 0; b < 16; b++) |
196 | cmn_err(CE_DEBUG, "%02x", ((uchar_t *)&mp->m_sb.sb_uuid)[b]); | 195 | cmn_err(CE_DEBUG, "%02x", ((uchar_t *)&mp->m_sb.sb_uuid)[b]); |
197 | cmn_err(CE_DEBUG, ", fmt = %d\n", XLOG_FMT); | 196 | cmn_err(CE_DEBUG, ", fmt = %d\n", XLOG_FMT); |
198 | cmn_err(CE_DEBUG, " log : uuid = "); | 197 | cmn_err(CE_DEBUG, " log : uuid = "); |
199 | for (b = 0; b < 16; b++) | 198 | for (b = 0; b < 16; b++) |
200 | cmn_err(CE_DEBUG, "%02x",((uchar_t *)&head->h_fs_uuid)[b]); | 199 | cmn_err(CE_DEBUG, "%02x",((uchar_t *)&head->h_fs_uuid)[b]); |
201 | cmn_err(CE_DEBUG, ", fmt = %d\n", be32_to_cpu(head->h_fmt)); | 200 | cmn_err(CE_DEBUG, ", fmt = %d\n", be32_to_cpu(head->h_fmt)); |
202 | } | 201 | } |
203 | #else | 202 | #else |
204 | #define xlog_header_check_dump(mp, head) | 203 | #define xlog_header_check_dump(mp, head) |
205 | #endif | 204 | #endif |
206 | 205 | ||
207 | /* | 206 | /* |
208 | * check log record header for recovery | 207 | * check log record header for recovery |
209 | */ | 208 | */ |
210 | STATIC int | 209 | STATIC int |
211 | xlog_header_check_recover( | 210 | xlog_header_check_recover( |
212 | xfs_mount_t *mp, | 211 | xfs_mount_t *mp, |
213 | xlog_rec_header_t *head) | 212 | xlog_rec_header_t *head) |
214 | { | 213 | { |
215 | ASSERT(be32_to_cpu(head->h_magicno) == XLOG_HEADER_MAGIC_NUM); | 214 | ASSERT(be32_to_cpu(head->h_magicno) == XLOG_HEADER_MAGIC_NUM); |
216 | 215 | ||
217 | /* | 216 | /* |
218 | * IRIX doesn't write the h_fmt field and leaves it zeroed | 217 | * IRIX doesn't write the h_fmt field and leaves it zeroed |
219 | * (XLOG_FMT_UNKNOWN). This stops us from trying to recover | 218 | * (XLOG_FMT_UNKNOWN). This stops us from trying to recover |
220 | * a dirty log created in IRIX. | 219 | * a dirty log created in IRIX. |
221 | */ | 220 | */ |
222 | if (unlikely(be32_to_cpu(head->h_fmt) != XLOG_FMT)) { | 221 | if (unlikely(be32_to_cpu(head->h_fmt) != XLOG_FMT)) { |
223 | xlog_warn( | 222 | xlog_warn( |
224 | "XFS: dirty log written in incompatible format - can't recover"); | 223 | "XFS: dirty log written in incompatible format - can't recover"); |
225 | xlog_header_check_dump(mp, head); | 224 | xlog_header_check_dump(mp, head); |
226 | XFS_ERROR_REPORT("xlog_header_check_recover(1)", | 225 | XFS_ERROR_REPORT("xlog_header_check_recover(1)", |
227 | XFS_ERRLEVEL_HIGH, mp); | 226 | XFS_ERRLEVEL_HIGH, mp); |
228 | return XFS_ERROR(EFSCORRUPTED); | 227 | return XFS_ERROR(EFSCORRUPTED); |
229 | } else if (unlikely(!uuid_equal(&mp->m_sb.sb_uuid, &head->h_fs_uuid))) { | 228 | } else if (unlikely(!uuid_equal(&mp->m_sb.sb_uuid, &head->h_fs_uuid))) { |
230 | xlog_warn( | 229 | xlog_warn( |
231 | "XFS: dirty log entry has mismatched uuid - can't recover"); | 230 | "XFS: dirty log entry has mismatched uuid - can't recover"); |
232 | xlog_header_check_dump(mp, head); | 231 | xlog_header_check_dump(mp, head); |
233 | XFS_ERROR_REPORT("xlog_header_check_recover(2)", | 232 | XFS_ERROR_REPORT("xlog_header_check_recover(2)", |
234 | XFS_ERRLEVEL_HIGH, mp); | 233 | XFS_ERRLEVEL_HIGH, mp); |
235 | return XFS_ERROR(EFSCORRUPTED); | 234 | return XFS_ERROR(EFSCORRUPTED); |
236 | } | 235 | } |
237 | return 0; | 236 | return 0; |
238 | } | 237 | } |
239 | 238 | ||
240 | /* | 239 | /* |
241 | * read the head block of the log and check the header | 240 | * read the head block of the log and check the header |
242 | */ | 241 | */ |
243 | STATIC int | 242 | STATIC int |
244 | xlog_header_check_mount( | 243 | xlog_header_check_mount( |
245 | xfs_mount_t *mp, | 244 | xfs_mount_t *mp, |
246 | xlog_rec_header_t *head) | 245 | xlog_rec_header_t *head) |
247 | { | 246 | { |
248 | ASSERT(be32_to_cpu(head->h_magicno) == XLOG_HEADER_MAGIC_NUM); | 247 | ASSERT(be32_to_cpu(head->h_magicno) == XLOG_HEADER_MAGIC_NUM); |
249 | 248 | ||
250 | if (uuid_is_nil(&head->h_fs_uuid)) { | 249 | if (uuid_is_nil(&head->h_fs_uuid)) { |
251 | /* | 250 | /* |
252 | * IRIX doesn't write the h_fs_uuid or h_fmt fields. If | 251 | * IRIX doesn't write the h_fs_uuid or h_fmt fields. If |
253 | * h_fs_uuid is nil, we assume this log was last mounted | 252 | * h_fs_uuid is nil, we assume this log was last mounted |
254 | * by IRIX and continue. | 253 | * by IRIX and continue. |
255 | */ | 254 | */ |
256 | xlog_warn("XFS: nil uuid in log - IRIX style log"); | 255 | xlog_warn("XFS: nil uuid in log - IRIX style log"); |
257 | } else if (unlikely(!uuid_equal(&mp->m_sb.sb_uuid, &head->h_fs_uuid))) { | 256 | } else if (unlikely(!uuid_equal(&mp->m_sb.sb_uuid, &head->h_fs_uuid))) { |
258 | xlog_warn("XFS: log has mismatched uuid - can't recover"); | 257 | xlog_warn("XFS: log has mismatched uuid - can't recover"); |
259 | xlog_header_check_dump(mp, head); | 258 | xlog_header_check_dump(mp, head); |
260 | XFS_ERROR_REPORT("xlog_header_check_mount", | 259 | XFS_ERROR_REPORT("xlog_header_check_mount", |
261 | XFS_ERRLEVEL_HIGH, mp); | 260 | XFS_ERRLEVEL_HIGH, mp); |
262 | return XFS_ERROR(EFSCORRUPTED); | 261 | return XFS_ERROR(EFSCORRUPTED); |
263 | } | 262 | } |
264 | return 0; | 263 | return 0; |
265 | } | 264 | } |
266 | 265 | ||
267 | STATIC void | 266 | STATIC void |
268 | xlog_recover_iodone( | 267 | xlog_recover_iodone( |
269 | struct xfs_buf *bp) | 268 | struct xfs_buf *bp) |
270 | { | 269 | { |
271 | xfs_mount_t *mp; | 270 | xfs_mount_t *mp; |
272 | 271 | ||
273 | ASSERT(XFS_BUF_FSPRIVATE(bp, void *)); | 272 | ASSERT(XFS_BUF_FSPRIVATE(bp, void *)); |
274 | 273 | ||
275 | if (XFS_BUF_GETERROR(bp)) { | 274 | if (XFS_BUF_GETERROR(bp)) { |
276 | /* | 275 | /* |
277 | * We're not going to bother about retrying | 276 | * We're not going to bother about retrying |
278 | * this during recovery. One strike! | 277 | * this during recovery. One strike! |
279 | */ | 278 | */ |
280 | mp = XFS_BUF_FSPRIVATE(bp, xfs_mount_t *); | 279 | mp = XFS_BUF_FSPRIVATE(bp, xfs_mount_t *); |
281 | xfs_ioerror_alert("xlog_recover_iodone", | 280 | xfs_ioerror_alert("xlog_recover_iodone", |
282 | mp, bp, XFS_BUF_ADDR(bp)); | 281 | mp, bp, XFS_BUF_ADDR(bp)); |
283 | xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR); | 282 | xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR); |
284 | } | 283 | } |
285 | XFS_BUF_SET_FSPRIVATE(bp, NULL); | 284 | XFS_BUF_SET_FSPRIVATE(bp, NULL); |
286 | XFS_BUF_CLR_IODONE_FUNC(bp); | 285 | XFS_BUF_CLR_IODONE_FUNC(bp); |
287 | xfs_biodone(bp); | 286 | xfs_biodone(bp); |
288 | } | 287 | } |
289 | 288 | ||
290 | /* | 289 | /* |
291 | * This routine finds (to an approximation) the first block in the physical | 290 | * This routine finds (to an approximation) the first block in the physical |
292 | * log which contains the given cycle. It uses a binary search algorithm. | 291 | * log which contains the given cycle. It uses a binary search algorithm. |
293 | * Note that the algorithm can not be perfect because the disk will not | 292 | * Note that the algorithm can not be perfect because the disk will not |
294 | * necessarily be perfect. | 293 | * necessarily be perfect. |
295 | */ | 294 | */ |
296 | STATIC int | 295 | STATIC int |
297 | xlog_find_cycle_start( | 296 | xlog_find_cycle_start( |
298 | xlog_t *log, | 297 | xlog_t *log, |
299 | xfs_buf_t *bp, | 298 | xfs_buf_t *bp, |
300 | xfs_daddr_t first_blk, | 299 | xfs_daddr_t first_blk, |
301 | xfs_daddr_t *last_blk, | 300 | xfs_daddr_t *last_blk, |
302 | uint cycle) | 301 | uint cycle) |
303 | { | 302 | { |
304 | xfs_caddr_t offset; | 303 | xfs_caddr_t offset; |
305 | xfs_daddr_t mid_blk; | 304 | xfs_daddr_t mid_blk; |
306 | uint mid_cycle; | 305 | uint mid_cycle; |
307 | int error; | 306 | int error; |
308 | 307 | ||
309 | mid_blk = BLK_AVG(first_blk, *last_blk); | 308 | mid_blk = BLK_AVG(first_blk, *last_blk); |
310 | while (mid_blk != first_blk && mid_blk != *last_blk) { | 309 | while (mid_blk != first_blk && mid_blk != *last_blk) { |
311 | if ((error = xlog_bread(log, mid_blk, 1, bp))) | 310 | if ((error = xlog_bread(log, mid_blk, 1, bp))) |
312 | return error; | 311 | return error; |
313 | offset = xlog_align(log, mid_blk, 1, bp); | 312 | offset = xlog_align(log, mid_blk, 1, bp); |
314 | mid_cycle = xlog_get_cycle(offset); | 313 | mid_cycle = xlog_get_cycle(offset); |
315 | if (mid_cycle == cycle) { | 314 | if (mid_cycle == cycle) { |
316 | *last_blk = mid_blk; | 315 | *last_blk = mid_blk; |
317 | /* last_half_cycle == mid_cycle */ | 316 | /* last_half_cycle == mid_cycle */ |
318 | } else { | 317 | } else { |
319 | first_blk = mid_blk; | 318 | first_blk = mid_blk; |
320 | /* first_half_cycle == mid_cycle */ | 319 | /* first_half_cycle == mid_cycle */ |
321 | } | 320 | } |
322 | mid_blk = BLK_AVG(first_blk, *last_blk); | 321 | mid_blk = BLK_AVG(first_blk, *last_blk); |
323 | } | 322 | } |
324 | ASSERT((mid_blk == first_blk && mid_blk+1 == *last_blk) || | 323 | ASSERT((mid_blk == first_blk && mid_blk+1 == *last_blk) || |
325 | (mid_blk == *last_blk && mid_blk-1 == first_blk)); | 324 | (mid_blk == *last_blk && mid_blk-1 == first_blk)); |
326 | 325 | ||
327 | return 0; | 326 | return 0; |
328 | } | 327 | } |
329 | 328 | ||
330 | /* | 329 | /* |
331 | * Check that the range of blocks does not contain the cycle number | 330 | * Check that the range of blocks does not contain the cycle number |
332 | * given. The scan needs to occur from front to back and the ptr into the | 331 | * given. The scan needs to occur from front to back and the ptr into the |
333 | * region must be updated since a later routine will need to perform another | 332 | * region must be updated since a later routine will need to perform another |
334 | * test. If the region is completely good, we end up returning the same | 333 | * test. If the region is completely good, we end up returning the same |
335 | * last block number. | 334 | * last block number. |
336 | * | 335 | * |
337 | * Set blkno to -1 if we encounter no errors. This is an invalid block number | 336 | * Set blkno to -1 if we encounter no errors. This is an invalid block number |
338 | * since we don't ever expect logs to get this large. | 337 | * since we don't ever expect logs to get this large. |
339 | */ | 338 | */ |
340 | STATIC int | 339 | STATIC int |
341 | xlog_find_verify_cycle( | 340 | xlog_find_verify_cycle( |
342 | xlog_t *log, | 341 | xlog_t *log, |
343 | xfs_daddr_t start_blk, | 342 | xfs_daddr_t start_blk, |
344 | int nbblks, | 343 | int nbblks, |
345 | uint stop_on_cycle_no, | 344 | uint stop_on_cycle_no, |
346 | xfs_daddr_t *new_blk) | 345 | xfs_daddr_t *new_blk) |
347 | { | 346 | { |
348 | xfs_daddr_t i, j; | 347 | xfs_daddr_t i, j; |
349 | uint cycle; | 348 | uint cycle; |
350 | xfs_buf_t *bp; | 349 | xfs_buf_t *bp; |
351 | xfs_daddr_t bufblks; | 350 | xfs_daddr_t bufblks; |
352 | xfs_caddr_t buf = NULL; | 351 | xfs_caddr_t buf = NULL; |
353 | int error = 0; | 352 | int error = 0; |
354 | 353 | ||
355 | bufblks = 1 << ffs(nbblks); | 354 | bufblks = 1 << ffs(nbblks); |
356 | 355 | ||
357 | while (!(bp = xlog_get_bp(log, bufblks))) { | 356 | while (!(bp = xlog_get_bp(log, bufblks))) { |
358 | /* can't get enough memory to do everything in one big buffer */ | 357 | /* can't get enough memory to do everything in one big buffer */ |
359 | bufblks >>= 1; | 358 | bufblks >>= 1; |
360 | if (bufblks <= log->l_sectbb_log) | 359 | if (bufblks <= log->l_sectbb_log) |
361 | return ENOMEM; | 360 | return ENOMEM; |
362 | } | 361 | } |
363 | 362 | ||
364 | for (i = start_blk; i < start_blk + nbblks; i += bufblks) { | 363 | for (i = start_blk; i < start_blk + nbblks; i += bufblks) { |
365 | int bcount; | 364 | int bcount; |
366 | 365 | ||
367 | bcount = min(bufblks, (start_blk + nbblks - i)); | 366 | bcount = min(bufblks, (start_blk + nbblks - i)); |
368 | 367 | ||
369 | if ((error = xlog_bread(log, i, bcount, bp))) | 368 | if ((error = xlog_bread(log, i, bcount, bp))) |
370 | goto out; | 369 | goto out; |
371 | 370 | ||
372 | buf = xlog_align(log, i, bcount, bp); | 371 | buf = xlog_align(log, i, bcount, bp); |
373 | for (j = 0; j < bcount; j++) { | 372 | for (j = 0; j < bcount; j++) { |
374 | cycle = xlog_get_cycle(buf); | 373 | cycle = xlog_get_cycle(buf); |
375 | if (cycle == stop_on_cycle_no) { | 374 | if (cycle == stop_on_cycle_no) { |
376 | *new_blk = i+j; | 375 | *new_blk = i+j; |
377 | goto out; | 376 | goto out; |
378 | } | 377 | } |
379 | 378 | ||
380 | buf += BBSIZE; | 379 | buf += BBSIZE; |
381 | } | 380 | } |
382 | } | 381 | } |
383 | 382 | ||
384 | *new_blk = -1; | 383 | *new_blk = -1; |
385 | 384 | ||
386 | out: | 385 | out: |
387 | xlog_put_bp(bp); | 386 | xlog_put_bp(bp); |
388 | return error; | 387 | return error; |
389 | } | 388 | } |
390 | 389 | ||
391 | /* | 390 | /* |
392 | * Potentially backup over partial log record write. | 391 | * Potentially backup over partial log record write. |
393 | * | 392 | * |
394 | * In the typical case, last_blk is the number of the block directly after | 393 | * In the typical case, last_blk is the number of the block directly after |
395 | * a good log record. Therefore, we subtract one to get the block number | 394 | * a good log record. Therefore, we subtract one to get the block number |
396 | * of the last block in the given buffer. extra_bblks contains the number | 395 | * of the last block in the given buffer. extra_bblks contains the number |
397 | * of blocks we would have read on a previous read. This happens when the | 396 | * of blocks we would have read on a previous read. This happens when the |
398 | * last log record is split over the end of the physical log. | 397 | * last log record is split over the end of the physical log. |
399 | * | 398 | * |
400 | * extra_bblks is the number of blocks potentially verified on a previous | 399 | * extra_bblks is the number of blocks potentially verified on a previous |
401 | * call to this routine. | 400 | * call to this routine. |
402 | */ | 401 | */ |
403 | STATIC int | 402 | STATIC int |
404 | xlog_find_verify_log_record( | 403 | xlog_find_verify_log_record( |
405 | xlog_t *log, | 404 | xlog_t *log, |
406 | xfs_daddr_t start_blk, | 405 | xfs_daddr_t start_blk, |
407 | xfs_daddr_t *last_blk, | 406 | xfs_daddr_t *last_blk, |
408 | int extra_bblks) | 407 | int extra_bblks) |
409 | { | 408 | { |
410 | xfs_daddr_t i; | 409 | xfs_daddr_t i; |
411 | xfs_buf_t *bp; | 410 | xfs_buf_t *bp; |
412 | xfs_caddr_t offset = NULL; | 411 | xfs_caddr_t offset = NULL; |
413 | xlog_rec_header_t *head = NULL; | 412 | xlog_rec_header_t *head = NULL; |
414 | int error = 0; | 413 | int error = 0; |
415 | int smallmem = 0; | 414 | int smallmem = 0; |
416 | int num_blks = *last_blk - start_blk; | 415 | int num_blks = *last_blk - start_blk; |
417 | int xhdrs; | 416 | int xhdrs; |
418 | 417 | ||
419 | ASSERT(start_blk != 0 || *last_blk != start_blk); | 418 | ASSERT(start_blk != 0 || *last_blk != start_blk); |
420 | 419 | ||
421 | if (!(bp = xlog_get_bp(log, num_blks))) { | 420 | if (!(bp = xlog_get_bp(log, num_blks))) { |
422 | if (!(bp = xlog_get_bp(log, 1))) | 421 | if (!(bp = xlog_get_bp(log, 1))) |
423 | return ENOMEM; | 422 | return ENOMEM; |
424 | smallmem = 1; | 423 | smallmem = 1; |
425 | } else { | 424 | } else { |
426 | if ((error = xlog_bread(log, start_blk, num_blks, bp))) | 425 | if ((error = xlog_bread(log, start_blk, num_blks, bp))) |
427 | goto out; | 426 | goto out; |
428 | offset = xlog_align(log, start_blk, num_blks, bp); | 427 | offset = xlog_align(log, start_blk, num_blks, bp); |
429 | offset += ((num_blks - 1) << BBSHIFT); | 428 | offset += ((num_blks - 1) << BBSHIFT); |
430 | } | 429 | } |
431 | 430 | ||
432 | for (i = (*last_blk) - 1; i >= 0; i--) { | 431 | for (i = (*last_blk) - 1; i >= 0; i--) { |
433 | if (i < start_blk) { | 432 | if (i < start_blk) { |
434 | /* valid log record not found */ | 433 | /* valid log record not found */ |
435 | xlog_warn( | 434 | xlog_warn( |
436 | "XFS: Log inconsistent (didn't find previous header)"); | 435 | "XFS: Log inconsistent (didn't find previous header)"); |
437 | ASSERT(0); | 436 | ASSERT(0); |
438 | error = XFS_ERROR(EIO); | 437 | error = XFS_ERROR(EIO); |
439 | goto out; | 438 | goto out; |
440 | } | 439 | } |
441 | 440 | ||
442 | if (smallmem) { | 441 | if (smallmem) { |
443 | if ((error = xlog_bread(log, i, 1, bp))) | 442 | if ((error = xlog_bread(log, i, 1, bp))) |
444 | goto out; | 443 | goto out; |
445 | offset = xlog_align(log, i, 1, bp); | 444 | offset = xlog_align(log, i, 1, bp); |
446 | } | 445 | } |
447 | 446 | ||
448 | head = (xlog_rec_header_t *)offset; | 447 | head = (xlog_rec_header_t *)offset; |
449 | 448 | ||
450 | if (XLOG_HEADER_MAGIC_NUM == be32_to_cpu(head->h_magicno)) | 449 | if (XLOG_HEADER_MAGIC_NUM == be32_to_cpu(head->h_magicno)) |
451 | break; | 450 | break; |
452 | 451 | ||
453 | if (!smallmem) | 452 | if (!smallmem) |
454 | offset -= BBSIZE; | 453 | offset -= BBSIZE; |
455 | } | 454 | } |
456 | 455 | ||
457 | /* | 456 | /* |
458 | * We hit the beginning of the physical log & still no header. Return | 457 | * We hit the beginning of the physical log & still no header. Return |
459 | * to caller. If caller can handle a return of -1, then this routine | 458 | * to caller. If caller can handle a return of -1, then this routine |
460 | * will be called again for the end of the physical log. | 459 | * will be called again for the end of the physical log. |
461 | */ | 460 | */ |
462 | if (i == -1) { | 461 | if (i == -1) { |
463 | error = -1; | 462 | error = -1; |
464 | goto out; | 463 | goto out; |
465 | } | 464 | } |
466 | 465 | ||
467 | /* | 466 | /* |
468 | * We have the final block of the good log (the first block | 467 | * We have the final block of the good log (the first block |
469 | * of the log record _before_ the head. So we check the uuid. | 468 | * of the log record _before_ the head. So we check the uuid. |
470 | */ | 469 | */ |
471 | if ((error = xlog_header_check_mount(log->l_mp, head))) | 470 | if ((error = xlog_header_check_mount(log->l_mp, head))) |
472 | goto out; | 471 | goto out; |
473 | 472 | ||
474 | /* | 473 | /* |
475 | * We may have found a log record header before we expected one. | 474 | * We may have found a log record header before we expected one. |
476 | * last_blk will be the 1st block # with a given cycle #. We may end | 475 | * last_blk will be the 1st block # with a given cycle #. We may end |
477 | * up reading an entire log record. In this case, we don't want to | 476 | * up reading an entire log record. In this case, we don't want to |
478 | * reset last_blk. Only when last_blk points in the middle of a log | 477 | * reset last_blk. Only when last_blk points in the middle of a log |
479 | * record do we update last_blk. | 478 | * record do we update last_blk. |
480 | */ | 479 | */ |
481 | if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) { | 480 | if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) { |
482 | uint h_size = be32_to_cpu(head->h_size); | 481 | uint h_size = be32_to_cpu(head->h_size); |
483 | 482 | ||
484 | xhdrs = h_size / XLOG_HEADER_CYCLE_SIZE; | 483 | xhdrs = h_size / XLOG_HEADER_CYCLE_SIZE; |
485 | if (h_size % XLOG_HEADER_CYCLE_SIZE) | 484 | if (h_size % XLOG_HEADER_CYCLE_SIZE) |
486 | xhdrs++; | 485 | xhdrs++; |
487 | } else { | 486 | } else { |
488 | xhdrs = 1; | 487 | xhdrs = 1; |
489 | } | 488 | } |
490 | 489 | ||
491 | if (*last_blk - i + extra_bblks != | 490 | if (*last_blk - i + extra_bblks != |
492 | BTOBB(be32_to_cpu(head->h_len)) + xhdrs) | 491 | BTOBB(be32_to_cpu(head->h_len)) + xhdrs) |
493 | *last_blk = i; | 492 | *last_blk = i; |
494 | 493 | ||
495 | out: | 494 | out: |
496 | xlog_put_bp(bp); | 495 | xlog_put_bp(bp); |
497 | return error; | 496 | return error; |
498 | } | 497 | } |
499 | 498 | ||
500 | /* | 499 | /* |
501 | * Head is defined to be the point of the log where the next log write | 500 | * Head is defined to be the point of the log where the next log write |
502 | * write could go. This means that incomplete LR writes at the end are | 501 | * write could go. This means that incomplete LR writes at the end are |
503 | * eliminated when calculating the head. We aren't guaranteed that previous | 502 | * eliminated when calculating the head. We aren't guaranteed that previous |
504 | * LR have complete transactions. We only know that a cycle number of | 503 | * LR have complete transactions. We only know that a cycle number of |
505 | * current cycle number -1 won't be present in the log if we start writing | 504 | * current cycle number -1 won't be present in the log if we start writing |
506 | * from our current block number. | 505 | * from our current block number. |
507 | * | 506 | * |
508 | * last_blk contains the block number of the first block with a given | 507 | * last_blk contains the block number of the first block with a given |
509 | * cycle number. | 508 | * cycle number. |
510 | * | 509 | * |
511 | * Return: zero if normal, non-zero if error. | 510 | * Return: zero if normal, non-zero if error. |
512 | */ | 511 | */ |
513 | STATIC int | 512 | STATIC int |
514 | xlog_find_head( | 513 | xlog_find_head( |
515 | xlog_t *log, | 514 | xlog_t *log, |
516 | xfs_daddr_t *return_head_blk) | 515 | xfs_daddr_t *return_head_blk) |
517 | { | 516 | { |
518 | xfs_buf_t *bp; | 517 | xfs_buf_t *bp; |
519 | xfs_caddr_t offset; | 518 | xfs_caddr_t offset; |
520 | xfs_daddr_t new_blk, first_blk, start_blk, last_blk, head_blk; | 519 | xfs_daddr_t new_blk, first_blk, start_blk, last_blk, head_blk; |
521 | int num_scan_bblks; | 520 | int num_scan_bblks; |
522 | uint first_half_cycle, last_half_cycle; | 521 | uint first_half_cycle, last_half_cycle; |
523 | uint stop_on_cycle; | 522 | uint stop_on_cycle; |
524 | int error, log_bbnum = log->l_logBBsize; | 523 | int error, log_bbnum = log->l_logBBsize; |
525 | 524 | ||
526 | /* Is the end of the log device zeroed? */ | 525 | /* Is the end of the log device zeroed? */ |
527 | if ((error = xlog_find_zeroed(log, &first_blk)) == -1) { | 526 | if ((error = xlog_find_zeroed(log, &first_blk)) == -1) { |
528 | *return_head_blk = first_blk; | 527 | *return_head_blk = first_blk; |
529 | 528 | ||
530 | /* Is the whole lot zeroed? */ | 529 | /* Is the whole lot zeroed? */ |
531 | if (!first_blk) { | 530 | if (!first_blk) { |
532 | /* Linux XFS shouldn't generate totally zeroed logs - | 531 | /* Linux XFS shouldn't generate totally zeroed logs - |
533 | * mkfs etc write a dummy unmount record to a fresh | 532 | * mkfs etc write a dummy unmount record to a fresh |
534 | * log so we can store the uuid in there | 533 | * log so we can store the uuid in there |
535 | */ | 534 | */ |
536 | xlog_warn("XFS: totally zeroed log"); | 535 | xlog_warn("XFS: totally zeroed log"); |
537 | } | 536 | } |
538 | 537 | ||
539 | return 0; | 538 | return 0; |
540 | } else if (error) { | 539 | } else if (error) { |
541 | xlog_warn("XFS: empty log check failed"); | 540 | xlog_warn("XFS: empty log check failed"); |
542 | return error; | 541 | return error; |
543 | } | 542 | } |
544 | 543 | ||
545 | first_blk = 0; /* get cycle # of 1st block */ | 544 | first_blk = 0; /* get cycle # of 1st block */ |
546 | bp = xlog_get_bp(log, 1); | 545 | bp = xlog_get_bp(log, 1); |
547 | if (!bp) | 546 | if (!bp) |
548 | return ENOMEM; | 547 | return ENOMEM; |
549 | if ((error = xlog_bread(log, 0, 1, bp))) | 548 | if ((error = xlog_bread(log, 0, 1, bp))) |
550 | goto bp_err; | 549 | goto bp_err; |
551 | offset = xlog_align(log, 0, 1, bp); | 550 | offset = xlog_align(log, 0, 1, bp); |
552 | first_half_cycle = xlog_get_cycle(offset); | 551 | first_half_cycle = xlog_get_cycle(offset); |
553 | 552 | ||
554 | last_blk = head_blk = log_bbnum - 1; /* get cycle # of last block */ | 553 | last_blk = head_blk = log_bbnum - 1; /* get cycle # of last block */ |
555 | if ((error = xlog_bread(log, last_blk, 1, bp))) | 554 | if ((error = xlog_bread(log, last_blk, 1, bp))) |
556 | goto bp_err; | 555 | goto bp_err; |
557 | offset = xlog_align(log, last_blk, 1, bp); | 556 | offset = xlog_align(log, last_blk, 1, bp); |
558 | last_half_cycle = xlog_get_cycle(offset); | 557 | last_half_cycle = xlog_get_cycle(offset); |
559 | ASSERT(last_half_cycle != 0); | 558 | ASSERT(last_half_cycle != 0); |
560 | 559 | ||
561 | /* | 560 | /* |
562 | * If the 1st half cycle number is equal to the last half cycle number, | 561 | * If the 1st half cycle number is equal to the last half cycle number, |
563 | * then the entire log is stamped with the same cycle number. In this | 562 | * then the entire log is stamped with the same cycle number. In this |
564 | * case, head_blk can't be set to zero (which makes sense). The below | 563 | * case, head_blk can't be set to zero (which makes sense). The below |
565 | * math doesn't work out properly with head_blk equal to zero. Instead, | 564 | * math doesn't work out properly with head_blk equal to zero. Instead, |
566 | * we set it to log_bbnum which is an invalid block number, but this | 565 | * we set it to log_bbnum which is an invalid block number, but this |
567 | * value makes the math correct. If head_blk doesn't changed through | 566 | * value makes the math correct. If head_blk doesn't changed through |
568 | * all the tests below, *head_blk is set to zero at the very end rather | 567 | * all the tests below, *head_blk is set to zero at the very end rather |
569 | * than log_bbnum. In a sense, log_bbnum and zero are the same block | 568 | * than log_bbnum. In a sense, log_bbnum and zero are the same block |
570 | * in a circular file. | 569 | * in a circular file. |
571 | */ | 570 | */ |
572 | if (first_half_cycle == last_half_cycle) { | 571 | if (first_half_cycle == last_half_cycle) { |
573 | /* | 572 | /* |
574 | * In this case we believe that the entire log should have | 573 | * In this case we believe that the entire log should have |
575 | * cycle number last_half_cycle. We need to scan backwards | 574 | * cycle number last_half_cycle. We need to scan backwards |
576 | * from the end verifying that there are no holes still | 575 | * from the end verifying that there are no holes still |
577 | * containing last_half_cycle - 1. If we find such a hole, | 576 | * containing last_half_cycle - 1. If we find such a hole, |
578 | * then the start of that hole will be the new head. The | 577 | * then the start of that hole will be the new head. The |
579 | * simple case looks like | 578 | * simple case looks like |
580 | * x | x ... | x - 1 | x | 579 | * x | x ... | x - 1 | x |
581 | * Another case that fits this picture would be | 580 | * Another case that fits this picture would be |
582 | * x | x + 1 | x ... | x | 581 | * x | x + 1 | x ... | x |
583 | * In this case the head really is somewhere at the end of the | 582 | * In this case the head really is somewhere at the end of the |
584 | * log, as one of the latest writes at the beginning was | 583 | * log, as one of the latest writes at the beginning was |
585 | * incomplete. | 584 | * incomplete. |
586 | * One more case is | 585 | * One more case is |
587 | * x | x + 1 | x ... | x - 1 | x | 586 | * x | x + 1 | x ... | x - 1 | x |
588 | * This is really the combination of the above two cases, and | 587 | * This is really the combination of the above two cases, and |
589 | * the head has to end up at the start of the x-1 hole at the | 588 | * the head has to end up at the start of the x-1 hole at the |
590 | * end of the log. | 589 | * end of the log. |
591 | * | 590 | * |
592 | * In the 256k log case, we will read from the beginning to the | 591 | * In the 256k log case, we will read from the beginning to the |
593 | * end of the log and search for cycle numbers equal to x-1. | 592 | * end of the log and search for cycle numbers equal to x-1. |
594 | * We don't worry about the x+1 blocks that we encounter, | 593 | * We don't worry about the x+1 blocks that we encounter, |
595 | * because we know that they cannot be the head since the log | 594 | * because we know that they cannot be the head since the log |
596 | * started with x. | 595 | * started with x. |
597 | */ | 596 | */ |
598 | head_blk = log_bbnum; | 597 | head_blk = log_bbnum; |
599 | stop_on_cycle = last_half_cycle - 1; | 598 | stop_on_cycle = last_half_cycle - 1; |
600 | } else { | 599 | } else { |
601 | /* | 600 | /* |
602 | * In this case we want to find the first block with cycle | 601 | * In this case we want to find the first block with cycle |
603 | * number matching last_half_cycle. We expect the log to be | 602 | * number matching last_half_cycle. We expect the log to be |
604 | * some variation on | 603 | * some variation on |
605 | * x + 1 ... | x ... | 604 | * x + 1 ... | x ... |
606 | * The first block with cycle number x (last_half_cycle) will | 605 | * The first block with cycle number x (last_half_cycle) will |
607 | * be where the new head belongs. First we do a binary search | 606 | * be where the new head belongs. First we do a binary search |
608 | * for the first occurrence of last_half_cycle. The binary | 607 | * for the first occurrence of last_half_cycle. The binary |
609 | * search may not be totally accurate, so then we scan back | 608 | * search may not be totally accurate, so then we scan back |
610 | * from there looking for occurrences of last_half_cycle before | 609 | * from there looking for occurrences of last_half_cycle before |
611 | * us. If that backwards scan wraps around the beginning of | 610 | * us. If that backwards scan wraps around the beginning of |
612 | * the log, then we look for occurrences of last_half_cycle - 1 | 611 | * the log, then we look for occurrences of last_half_cycle - 1 |
613 | * at the end of the log. The cases we're looking for look | 612 | * at the end of the log. The cases we're looking for look |
614 | * like | 613 | * like |
615 | * x + 1 ... | x | x + 1 | x ... | 614 | * x + 1 ... | x | x + 1 | x ... |
616 | * ^ binary search stopped here | 615 | * ^ binary search stopped here |
617 | * or | 616 | * or |
618 | * x + 1 ... | x ... | x - 1 | x | 617 | * x + 1 ... | x ... | x - 1 | x |
619 | * <---------> less than scan distance | 618 | * <---------> less than scan distance |
620 | */ | 619 | */ |
621 | stop_on_cycle = last_half_cycle; | 620 | stop_on_cycle = last_half_cycle; |
622 | if ((error = xlog_find_cycle_start(log, bp, first_blk, | 621 | if ((error = xlog_find_cycle_start(log, bp, first_blk, |
623 | &head_blk, last_half_cycle))) | 622 | &head_blk, last_half_cycle))) |
624 | goto bp_err; | 623 | goto bp_err; |
625 | } | 624 | } |
626 | 625 | ||
627 | /* | 626 | /* |
628 | * Now validate the answer. Scan back some number of maximum possible | 627 | * Now validate the answer. Scan back some number of maximum possible |
629 | * blocks and make sure each one has the expected cycle number. The | 628 | * blocks and make sure each one has the expected cycle number. The |
630 | * maximum is determined by the total possible amount of buffering | 629 | * maximum is determined by the total possible amount of buffering |
631 | * in the in-core log. The following number can be made tighter if | 630 | * in the in-core log. The following number can be made tighter if |
632 | * we actually look at the block size of the filesystem. | 631 | * we actually look at the block size of the filesystem. |
633 | */ | 632 | */ |
634 | num_scan_bblks = XLOG_TOTAL_REC_SHIFT(log); | 633 | num_scan_bblks = XLOG_TOTAL_REC_SHIFT(log); |
635 | if (head_blk >= num_scan_bblks) { | 634 | if (head_blk >= num_scan_bblks) { |
636 | /* | 635 | /* |
637 | * We are guaranteed that the entire check can be performed | 636 | * We are guaranteed that the entire check can be performed |
638 | * in one buffer. | 637 | * in one buffer. |
639 | */ | 638 | */ |
640 | start_blk = head_blk - num_scan_bblks; | 639 | start_blk = head_blk - num_scan_bblks; |
641 | if ((error = xlog_find_verify_cycle(log, | 640 | if ((error = xlog_find_verify_cycle(log, |
642 | start_blk, num_scan_bblks, | 641 | start_blk, num_scan_bblks, |
643 | stop_on_cycle, &new_blk))) | 642 | stop_on_cycle, &new_blk))) |
644 | goto bp_err; | 643 | goto bp_err; |
645 | if (new_blk != -1) | 644 | if (new_blk != -1) |
646 | head_blk = new_blk; | 645 | head_blk = new_blk; |
647 | } else { /* need to read 2 parts of log */ | 646 | } else { /* need to read 2 parts of log */ |
648 | /* | 647 | /* |
649 | * We are going to scan backwards in the log in two parts. | 648 | * We are going to scan backwards in the log in two parts. |
650 | * First we scan the physical end of the log. In this part | 649 | * First we scan the physical end of the log. In this part |
651 | * of the log, we are looking for blocks with cycle number | 650 | * of the log, we are looking for blocks with cycle number |
652 | * last_half_cycle - 1. | 651 | * last_half_cycle - 1. |
653 | * If we find one, then we know that the log starts there, as | 652 | * If we find one, then we know that the log starts there, as |
654 | * we've found a hole that didn't get written in going around | 653 | * we've found a hole that didn't get written in going around |
655 | * the end of the physical log. The simple case for this is | 654 | * the end of the physical log. The simple case for this is |
656 | * x + 1 ... | x ... | x - 1 | x | 655 | * x + 1 ... | x ... | x - 1 | x |
657 | * <---------> less than scan distance | 656 | * <---------> less than scan distance |
658 | * If all of the blocks at the end of the log have cycle number | 657 | * If all of the blocks at the end of the log have cycle number |
659 | * last_half_cycle, then we check the blocks at the start of | 658 | * last_half_cycle, then we check the blocks at the start of |
660 | * the log looking for occurrences of last_half_cycle. If we | 659 | * the log looking for occurrences of last_half_cycle. If we |
661 | * find one, then our current estimate for the location of the | 660 | * find one, then our current estimate for the location of the |
662 | * first occurrence of last_half_cycle is wrong and we move | 661 | * first occurrence of last_half_cycle is wrong and we move |
663 | * back to the hole we've found. This case looks like | 662 | * back to the hole we've found. This case looks like |
664 | * x + 1 ... | x | x + 1 | x ... | 663 | * x + 1 ... | x | x + 1 | x ... |
665 | * ^ binary search stopped here | 664 | * ^ binary search stopped here |
666 | * Another case we need to handle that only occurs in 256k | 665 | * Another case we need to handle that only occurs in 256k |
667 | * logs is | 666 | * logs is |
668 | * x + 1 ... | x ... | x+1 | x ... | 667 | * x + 1 ... | x ... | x+1 | x ... |
669 | * ^ binary search stops here | 668 | * ^ binary search stops here |
670 | * In a 256k log, the scan at the end of the log will see the | 669 | * In a 256k log, the scan at the end of the log will see the |
671 | * x + 1 blocks. We need to skip past those since that is | 670 | * x + 1 blocks. We need to skip past those since that is |
672 | * certainly not the head of the log. By searching for | 671 | * certainly not the head of the log. By searching for |
673 | * last_half_cycle-1 we accomplish that. | 672 | * last_half_cycle-1 we accomplish that. |
674 | */ | 673 | */ |
675 | start_blk = log_bbnum - num_scan_bblks + head_blk; | 674 | start_blk = log_bbnum - num_scan_bblks + head_blk; |
676 | ASSERT(head_blk <= INT_MAX && | 675 | ASSERT(head_blk <= INT_MAX && |
677 | (xfs_daddr_t) num_scan_bblks - head_blk >= 0); | 676 | (xfs_daddr_t) num_scan_bblks - head_blk >= 0); |
678 | if ((error = xlog_find_verify_cycle(log, start_blk, | 677 | if ((error = xlog_find_verify_cycle(log, start_blk, |
679 | num_scan_bblks - (int)head_blk, | 678 | num_scan_bblks - (int)head_blk, |
680 | (stop_on_cycle - 1), &new_blk))) | 679 | (stop_on_cycle - 1), &new_blk))) |
681 | goto bp_err; | 680 | goto bp_err; |
682 | if (new_blk != -1) { | 681 | if (new_blk != -1) { |
683 | head_blk = new_blk; | 682 | head_blk = new_blk; |
684 | goto bad_blk; | 683 | goto bad_blk; |
685 | } | 684 | } |
686 | 685 | ||
687 | /* | 686 | /* |
688 | * Scan beginning of log now. The last part of the physical | 687 | * Scan beginning of log now. The last part of the physical |
689 | * log is good. This scan needs to verify that it doesn't find | 688 | * log is good. This scan needs to verify that it doesn't find |
690 | * the last_half_cycle. | 689 | * the last_half_cycle. |
691 | */ | 690 | */ |
692 | start_blk = 0; | 691 | start_blk = 0; |
693 | ASSERT(head_blk <= INT_MAX); | 692 | ASSERT(head_blk <= INT_MAX); |
694 | if ((error = xlog_find_verify_cycle(log, | 693 | if ((error = xlog_find_verify_cycle(log, |
695 | start_blk, (int)head_blk, | 694 | start_blk, (int)head_blk, |
696 | stop_on_cycle, &new_blk))) | 695 | stop_on_cycle, &new_blk))) |
697 | goto bp_err; | 696 | goto bp_err; |
698 | if (new_blk != -1) | 697 | if (new_blk != -1) |
699 | head_blk = new_blk; | 698 | head_blk = new_blk; |
700 | } | 699 | } |
701 | 700 | ||
702 | bad_blk: | 701 | bad_blk: |
703 | /* | 702 | /* |
704 | * Now we need to make sure head_blk is not pointing to a block in | 703 | * Now we need to make sure head_blk is not pointing to a block in |
705 | * the middle of a log record. | 704 | * the middle of a log record. |
706 | */ | 705 | */ |
707 | num_scan_bblks = XLOG_REC_SHIFT(log); | 706 | num_scan_bblks = XLOG_REC_SHIFT(log); |
708 | if (head_blk >= num_scan_bblks) { | 707 | if (head_blk >= num_scan_bblks) { |
709 | start_blk = head_blk - num_scan_bblks; /* don't read head_blk */ | 708 | start_blk = head_blk - num_scan_bblks; /* don't read head_blk */ |
710 | 709 | ||
711 | /* start ptr at last block ptr before head_blk */ | 710 | /* start ptr at last block ptr before head_blk */ |
712 | if ((error = xlog_find_verify_log_record(log, start_blk, | 711 | if ((error = xlog_find_verify_log_record(log, start_blk, |
713 | &head_blk, 0)) == -1) { | 712 | &head_blk, 0)) == -1) { |
714 | error = XFS_ERROR(EIO); | 713 | error = XFS_ERROR(EIO); |
715 | goto bp_err; | 714 | goto bp_err; |
716 | } else if (error) | 715 | } else if (error) |
717 | goto bp_err; | 716 | goto bp_err; |
718 | } else { | 717 | } else { |
719 | start_blk = 0; | 718 | start_blk = 0; |
720 | ASSERT(head_blk <= INT_MAX); | 719 | ASSERT(head_blk <= INT_MAX); |
721 | if ((error = xlog_find_verify_log_record(log, start_blk, | 720 | if ((error = xlog_find_verify_log_record(log, start_blk, |
722 | &head_blk, 0)) == -1) { | 721 | &head_blk, 0)) == -1) { |
723 | /* We hit the beginning of the log during our search */ | 722 | /* We hit the beginning of the log during our search */ |
724 | start_blk = log_bbnum - num_scan_bblks + head_blk; | 723 | start_blk = log_bbnum - num_scan_bblks + head_blk; |
725 | new_blk = log_bbnum; | 724 | new_blk = log_bbnum; |
726 | ASSERT(start_blk <= INT_MAX && | 725 | ASSERT(start_blk <= INT_MAX && |
727 | (xfs_daddr_t) log_bbnum-start_blk >= 0); | 726 | (xfs_daddr_t) log_bbnum-start_blk >= 0); |
728 | ASSERT(head_blk <= INT_MAX); | 727 | ASSERT(head_blk <= INT_MAX); |
729 | if ((error = xlog_find_verify_log_record(log, | 728 | if ((error = xlog_find_verify_log_record(log, |
730 | start_blk, &new_blk, | 729 | start_blk, &new_blk, |
731 | (int)head_blk)) == -1) { | 730 | (int)head_blk)) == -1) { |
732 | error = XFS_ERROR(EIO); | 731 | error = XFS_ERROR(EIO); |
733 | goto bp_err; | 732 | goto bp_err; |
734 | } else if (error) | 733 | } else if (error) |
735 | goto bp_err; | 734 | goto bp_err; |
736 | if (new_blk != log_bbnum) | 735 | if (new_blk != log_bbnum) |
737 | head_blk = new_blk; | 736 | head_blk = new_blk; |
738 | } else if (error) | 737 | } else if (error) |
739 | goto bp_err; | 738 | goto bp_err; |
740 | } | 739 | } |
741 | 740 | ||
742 | xlog_put_bp(bp); | 741 | xlog_put_bp(bp); |
743 | if (head_blk == log_bbnum) | 742 | if (head_blk == log_bbnum) |
744 | *return_head_blk = 0; | 743 | *return_head_blk = 0; |
745 | else | 744 | else |
746 | *return_head_blk = head_blk; | 745 | *return_head_blk = head_blk; |
747 | /* | 746 | /* |
748 | * When returning here, we have a good block number. Bad block | 747 | * When returning here, we have a good block number. Bad block |
749 | * means that during a previous crash, we didn't have a clean break | 748 | * means that during a previous crash, we didn't have a clean break |
750 | * from cycle number N to cycle number N-1. In this case, we need | 749 | * from cycle number N to cycle number N-1. In this case, we need |
751 | * to find the first block with cycle number N-1. | 750 | * to find the first block with cycle number N-1. |
752 | */ | 751 | */ |
753 | return 0; | 752 | return 0; |
754 | 753 | ||
755 | bp_err: | 754 | bp_err: |
756 | xlog_put_bp(bp); | 755 | xlog_put_bp(bp); |
757 | 756 | ||
758 | if (error) | 757 | if (error) |
759 | xlog_warn("XFS: failed to find log head"); | 758 | xlog_warn("XFS: failed to find log head"); |
760 | return error; | 759 | return error; |
761 | } | 760 | } |
762 | 761 | ||
763 | /* | 762 | /* |
764 | * Find the sync block number or the tail of the log. | 763 | * Find the sync block number or the tail of the log. |
765 | * | 764 | * |
766 | * This will be the block number of the last record to have its | 765 | * This will be the block number of the last record to have its |
767 | * associated buffers synced to disk. Every log record header has | 766 | * associated buffers synced to disk. Every log record header has |
768 | * a sync lsn embedded in it. LSNs hold block numbers, so it is easy | 767 | * a sync lsn embedded in it. LSNs hold block numbers, so it is easy |
769 | * to get a sync block number. The only concern is to figure out which | 768 | * to get a sync block number. The only concern is to figure out which |
770 | * log record header to believe. | 769 | * log record header to believe. |
771 | * | 770 | * |
772 | * The following algorithm uses the log record header with the largest | 771 | * The following algorithm uses the log record header with the largest |
773 | * lsn. The entire log record does not need to be valid. We only care | 772 | * lsn. The entire log record does not need to be valid. We only care |
774 | * that the header is valid. | 773 | * that the header is valid. |
775 | * | 774 | * |
776 | * We could speed up search by using current head_blk buffer, but it is not | 775 | * We could speed up search by using current head_blk buffer, but it is not |
777 | * available. | 776 | * available. |
778 | */ | 777 | */ |
779 | int | 778 | int |
780 | xlog_find_tail( | 779 | xlog_find_tail( |
781 | xlog_t *log, | 780 | xlog_t *log, |
782 | xfs_daddr_t *head_blk, | 781 | xfs_daddr_t *head_blk, |
783 | xfs_daddr_t *tail_blk) | 782 | xfs_daddr_t *tail_blk) |
784 | { | 783 | { |
785 | xlog_rec_header_t *rhead; | 784 | xlog_rec_header_t *rhead; |
786 | xlog_op_header_t *op_head; | 785 | xlog_op_header_t *op_head; |
787 | xfs_caddr_t offset = NULL; | 786 | xfs_caddr_t offset = NULL; |
788 | xfs_buf_t *bp; | 787 | xfs_buf_t *bp; |
789 | int error, i, found; | 788 | int error, i, found; |
790 | xfs_daddr_t umount_data_blk; | 789 | xfs_daddr_t umount_data_blk; |
791 | xfs_daddr_t after_umount_blk; | 790 | xfs_daddr_t after_umount_blk; |
792 | xfs_lsn_t tail_lsn; | 791 | xfs_lsn_t tail_lsn; |
793 | int hblks; | 792 | int hblks; |
794 | 793 | ||
795 | found = 0; | 794 | found = 0; |
796 | 795 | ||
797 | /* | 796 | /* |
798 | * Find previous log record | 797 | * Find previous log record |
799 | */ | 798 | */ |
800 | if ((error = xlog_find_head(log, head_blk))) | 799 | if ((error = xlog_find_head(log, head_blk))) |
801 | return error; | 800 | return error; |
802 | 801 | ||
803 | bp = xlog_get_bp(log, 1); | 802 | bp = xlog_get_bp(log, 1); |
804 | if (!bp) | 803 | if (!bp) |
805 | return ENOMEM; | 804 | return ENOMEM; |
806 | if (*head_blk == 0) { /* special case */ | 805 | if (*head_blk == 0) { /* special case */ |
807 | if ((error = xlog_bread(log, 0, 1, bp))) | 806 | if ((error = xlog_bread(log, 0, 1, bp))) |
808 | goto bread_err; | 807 | goto bread_err; |
809 | offset = xlog_align(log, 0, 1, bp); | 808 | offset = xlog_align(log, 0, 1, bp); |
810 | if (xlog_get_cycle(offset) == 0) { | 809 | if (xlog_get_cycle(offset) == 0) { |
811 | *tail_blk = 0; | 810 | *tail_blk = 0; |
812 | /* leave all other log inited values alone */ | 811 | /* leave all other log inited values alone */ |
813 | goto exit; | 812 | goto exit; |
814 | } | 813 | } |
815 | } | 814 | } |
816 | 815 | ||
817 | /* | 816 | /* |
818 | * Search backwards looking for log record header block | 817 | * Search backwards looking for log record header block |
819 | */ | 818 | */ |
820 | ASSERT(*head_blk < INT_MAX); | 819 | ASSERT(*head_blk < INT_MAX); |
821 | for (i = (int)(*head_blk) - 1; i >= 0; i--) { | 820 | for (i = (int)(*head_blk) - 1; i >= 0; i--) { |
822 | if ((error = xlog_bread(log, i, 1, bp))) | 821 | if ((error = xlog_bread(log, i, 1, bp))) |
823 | goto bread_err; | 822 | goto bread_err; |
824 | offset = xlog_align(log, i, 1, bp); | 823 | offset = xlog_align(log, i, 1, bp); |
825 | if (XLOG_HEADER_MAGIC_NUM == be32_to_cpu(*(__be32 *)offset)) { | 824 | if (XLOG_HEADER_MAGIC_NUM == be32_to_cpu(*(__be32 *)offset)) { |
826 | found = 1; | 825 | found = 1; |
827 | break; | 826 | break; |
828 | } | 827 | } |
829 | } | 828 | } |
830 | /* | 829 | /* |
831 | * If we haven't found the log record header block, start looking | 830 | * If we haven't found the log record header block, start looking |
832 | * again from the end of the physical log. XXXmiken: There should be | 831 | * again from the end of the physical log. XXXmiken: There should be |
833 | * a check here to make sure we didn't search more than N blocks in | 832 | * a check here to make sure we didn't search more than N blocks in |
834 | * the previous code. | 833 | * the previous code. |
835 | */ | 834 | */ |
836 | if (!found) { | 835 | if (!found) { |
837 | for (i = log->l_logBBsize - 1; i >= (int)(*head_blk); i--) { | 836 | for (i = log->l_logBBsize - 1; i >= (int)(*head_blk); i--) { |
838 | if ((error = xlog_bread(log, i, 1, bp))) | 837 | if ((error = xlog_bread(log, i, 1, bp))) |
839 | goto bread_err; | 838 | goto bread_err; |
840 | offset = xlog_align(log, i, 1, bp); | 839 | offset = xlog_align(log, i, 1, bp); |
841 | if (XLOG_HEADER_MAGIC_NUM == | 840 | if (XLOG_HEADER_MAGIC_NUM == |
842 | be32_to_cpu(*(__be32 *)offset)) { | 841 | be32_to_cpu(*(__be32 *)offset)) { |
843 | found = 2; | 842 | found = 2; |
844 | break; | 843 | break; |
845 | } | 844 | } |
846 | } | 845 | } |
847 | } | 846 | } |
848 | if (!found) { | 847 | if (!found) { |
849 | xlog_warn("XFS: xlog_find_tail: couldn't find sync record"); | 848 | xlog_warn("XFS: xlog_find_tail: couldn't find sync record"); |
850 | ASSERT(0); | 849 | ASSERT(0); |
851 | return XFS_ERROR(EIO); | 850 | return XFS_ERROR(EIO); |
852 | } | 851 | } |
853 | 852 | ||
854 | /* find blk_no of tail of log */ | 853 | /* find blk_no of tail of log */ |
855 | rhead = (xlog_rec_header_t *)offset; | 854 | rhead = (xlog_rec_header_t *)offset; |
856 | *tail_blk = BLOCK_LSN(be64_to_cpu(rhead->h_tail_lsn)); | 855 | *tail_blk = BLOCK_LSN(be64_to_cpu(rhead->h_tail_lsn)); |
857 | 856 | ||
858 | /* | 857 | /* |
859 | * Reset log values according to the state of the log when we | 858 | * Reset log values according to the state of the log when we |
860 | * crashed. In the case where head_blk == 0, we bump curr_cycle | 859 | * crashed. In the case where head_blk == 0, we bump curr_cycle |
861 | * one because the next write starts a new cycle rather than | 860 | * one because the next write starts a new cycle rather than |
862 | * continuing the cycle of the last good log record. At this | 861 | * continuing the cycle of the last good log record. At this |
863 | * point we have guaranteed that all partial log records have been | 862 | * point we have guaranteed that all partial log records have been |
864 | * accounted for. Therefore, we know that the last good log record | 863 | * accounted for. Therefore, we know that the last good log record |
865 | * written was complete and ended exactly on the end boundary | 864 | * written was complete and ended exactly on the end boundary |
866 | * of the physical log. | 865 | * of the physical log. |
867 | */ | 866 | */ |
868 | log->l_prev_block = i; | 867 | log->l_prev_block = i; |
869 | log->l_curr_block = (int)*head_blk; | 868 | log->l_curr_block = (int)*head_blk; |
870 | log->l_curr_cycle = be32_to_cpu(rhead->h_cycle); | 869 | log->l_curr_cycle = be32_to_cpu(rhead->h_cycle); |
871 | if (found == 2) | 870 | if (found == 2) |
872 | log->l_curr_cycle++; | 871 | log->l_curr_cycle++; |
873 | log->l_tail_lsn = be64_to_cpu(rhead->h_tail_lsn); | 872 | log->l_tail_lsn = be64_to_cpu(rhead->h_tail_lsn); |
874 | log->l_last_sync_lsn = be64_to_cpu(rhead->h_lsn); | 873 | log->l_last_sync_lsn = be64_to_cpu(rhead->h_lsn); |
875 | log->l_grant_reserve_cycle = log->l_curr_cycle; | 874 | log->l_grant_reserve_cycle = log->l_curr_cycle; |
876 | log->l_grant_reserve_bytes = BBTOB(log->l_curr_block); | 875 | log->l_grant_reserve_bytes = BBTOB(log->l_curr_block); |
877 | log->l_grant_write_cycle = log->l_curr_cycle; | 876 | log->l_grant_write_cycle = log->l_curr_cycle; |
878 | log->l_grant_write_bytes = BBTOB(log->l_curr_block); | 877 | log->l_grant_write_bytes = BBTOB(log->l_curr_block); |
879 | 878 | ||
880 | /* | 879 | /* |
881 | * Look for unmount record. If we find it, then we know there | 880 | * Look for unmount record. If we find it, then we know there |
882 | * was a clean unmount. Since 'i' could be the last block in | 881 | * was a clean unmount. Since 'i' could be the last block in |
883 | * the physical log, we convert to a log block before comparing | 882 | * the physical log, we convert to a log block before comparing |
884 | * to the head_blk. | 883 | * to the head_blk. |
885 | * | 884 | * |
886 | * Save the current tail lsn to use to pass to | 885 | * Save the current tail lsn to use to pass to |
887 | * xlog_clear_stale_blocks() below. We won't want to clear the | 886 | * xlog_clear_stale_blocks() below. We won't want to clear the |
888 | * unmount record if there is one, so we pass the lsn of the | 887 | * unmount record if there is one, so we pass the lsn of the |
889 | * unmount record rather than the block after it. | 888 | * unmount record rather than the block after it. |
890 | */ | 889 | */ |
891 | if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) { | 890 | if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) { |
892 | int h_size = be32_to_cpu(rhead->h_size); | 891 | int h_size = be32_to_cpu(rhead->h_size); |
893 | int h_version = be32_to_cpu(rhead->h_version); | 892 | int h_version = be32_to_cpu(rhead->h_version); |
894 | 893 | ||
895 | if ((h_version & XLOG_VERSION_2) && | 894 | if ((h_version & XLOG_VERSION_2) && |
896 | (h_size > XLOG_HEADER_CYCLE_SIZE)) { | 895 | (h_size > XLOG_HEADER_CYCLE_SIZE)) { |
897 | hblks = h_size / XLOG_HEADER_CYCLE_SIZE; | 896 | hblks = h_size / XLOG_HEADER_CYCLE_SIZE; |
898 | if (h_size % XLOG_HEADER_CYCLE_SIZE) | 897 | if (h_size % XLOG_HEADER_CYCLE_SIZE) |
899 | hblks++; | 898 | hblks++; |
900 | } else { | 899 | } else { |
901 | hblks = 1; | 900 | hblks = 1; |
902 | } | 901 | } |
903 | } else { | 902 | } else { |
904 | hblks = 1; | 903 | hblks = 1; |
905 | } | 904 | } |
906 | after_umount_blk = (i + hblks + (int) | 905 | after_umount_blk = (i + hblks + (int) |
907 | BTOBB(be32_to_cpu(rhead->h_len))) % log->l_logBBsize; | 906 | BTOBB(be32_to_cpu(rhead->h_len))) % log->l_logBBsize; |
908 | tail_lsn = log->l_tail_lsn; | 907 | tail_lsn = log->l_tail_lsn; |
909 | if (*head_blk == after_umount_blk && | 908 | if (*head_blk == after_umount_blk && |
910 | be32_to_cpu(rhead->h_num_logops) == 1) { | 909 | be32_to_cpu(rhead->h_num_logops) == 1) { |
911 | umount_data_blk = (i + hblks) % log->l_logBBsize; | 910 | umount_data_blk = (i + hblks) % log->l_logBBsize; |
912 | if ((error = xlog_bread(log, umount_data_blk, 1, bp))) { | 911 | if ((error = xlog_bread(log, umount_data_blk, 1, bp))) { |
913 | goto bread_err; | 912 | goto bread_err; |
914 | } | 913 | } |
915 | offset = xlog_align(log, umount_data_blk, 1, bp); | 914 | offset = xlog_align(log, umount_data_blk, 1, bp); |
916 | op_head = (xlog_op_header_t *)offset; | 915 | op_head = (xlog_op_header_t *)offset; |
917 | if (op_head->oh_flags & XLOG_UNMOUNT_TRANS) { | 916 | if (op_head->oh_flags & XLOG_UNMOUNT_TRANS) { |
918 | /* | 917 | /* |
919 | * Set tail and last sync so that newly written | 918 | * Set tail and last sync so that newly written |
920 | * log records will point recovery to after the | 919 | * log records will point recovery to after the |
921 | * current unmount record. | 920 | * current unmount record. |
922 | */ | 921 | */ |
923 | log->l_tail_lsn = | 922 | log->l_tail_lsn = |
924 | xlog_assign_lsn(log->l_curr_cycle, | 923 | xlog_assign_lsn(log->l_curr_cycle, |
925 | after_umount_blk); | 924 | after_umount_blk); |
926 | log->l_last_sync_lsn = | 925 | log->l_last_sync_lsn = |
927 | xlog_assign_lsn(log->l_curr_cycle, | 926 | xlog_assign_lsn(log->l_curr_cycle, |
928 | after_umount_blk); | 927 | after_umount_blk); |
929 | *tail_blk = after_umount_blk; | 928 | *tail_blk = after_umount_blk; |
930 | 929 | ||
931 | /* | 930 | /* |
932 | * Note that the unmount was clean. If the unmount | 931 | * Note that the unmount was clean. If the unmount |
933 | * was not clean, we need to know this to rebuild the | 932 | * was not clean, we need to know this to rebuild the |
934 | * superblock counters from the perag headers if we | 933 | * superblock counters from the perag headers if we |
935 | * have a filesystem using non-persistent counters. | 934 | * have a filesystem using non-persistent counters. |
936 | */ | 935 | */ |
937 | log->l_mp->m_flags |= XFS_MOUNT_WAS_CLEAN; | 936 | log->l_mp->m_flags |= XFS_MOUNT_WAS_CLEAN; |
938 | } | 937 | } |
939 | } | 938 | } |
940 | 939 | ||
941 | /* | 940 | /* |
942 | * Make sure that there are no blocks in front of the head | 941 | * Make sure that there are no blocks in front of the head |
943 | * with the same cycle number as the head. This can happen | 942 | * with the same cycle number as the head. This can happen |
944 | * because we allow multiple outstanding log writes concurrently, | 943 | * because we allow multiple outstanding log writes concurrently, |
945 | * and the later writes might make it out before earlier ones. | 944 | * and the later writes might make it out before earlier ones. |
946 | * | 945 | * |
947 | * We use the lsn from before modifying it so that we'll never | 946 | * We use the lsn from before modifying it so that we'll never |
948 | * overwrite the unmount record after a clean unmount. | 947 | * overwrite the unmount record after a clean unmount. |
949 | * | 948 | * |
950 | * Do this only if we are going to recover the filesystem | 949 | * Do this only if we are going to recover the filesystem |
951 | * | 950 | * |
952 | * NOTE: This used to say "if (!readonly)" | 951 | * NOTE: This used to say "if (!readonly)" |
953 | * However on Linux, we can & do recover a read-only filesystem. | 952 | * However on Linux, we can & do recover a read-only filesystem. |
954 | * We only skip recovery if NORECOVERY is specified on mount, | 953 | * We only skip recovery if NORECOVERY is specified on mount, |
955 | * in which case we would not be here. | 954 | * in which case we would not be here. |
956 | * | 955 | * |
957 | * But... if the -device- itself is readonly, just skip this. | 956 | * But... if the -device- itself is readonly, just skip this. |
958 | * We can't recover this device anyway, so it won't matter. | 957 | * We can't recover this device anyway, so it won't matter. |
959 | */ | 958 | */ |
960 | if (!xfs_readonly_buftarg(log->l_mp->m_logdev_targp)) { | 959 | if (!xfs_readonly_buftarg(log->l_mp->m_logdev_targp)) { |
961 | error = xlog_clear_stale_blocks(log, tail_lsn); | 960 | error = xlog_clear_stale_blocks(log, tail_lsn); |
962 | } | 961 | } |
963 | 962 | ||
964 | bread_err: | 963 | bread_err: |
965 | exit: | 964 | exit: |
966 | xlog_put_bp(bp); | 965 | xlog_put_bp(bp); |
967 | 966 | ||
968 | if (error) | 967 | if (error) |
969 | xlog_warn("XFS: failed to locate log tail"); | 968 | xlog_warn("XFS: failed to locate log tail"); |
970 | return error; | 969 | return error; |
971 | } | 970 | } |
972 | 971 | ||
973 | /* | 972 | /* |
974 | * Is the log zeroed at all? | 973 | * Is the log zeroed at all? |
975 | * | 974 | * |
976 | * The last binary search should be changed to perform an X block read | 975 | * The last binary search should be changed to perform an X block read |
977 | * once X becomes small enough. You can then search linearly through | 976 | * once X becomes small enough. You can then search linearly through |
978 | * the X blocks. This will cut down on the number of reads we need to do. | 977 | * the X blocks. This will cut down on the number of reads we need to do. |
979 | * | 978 | * |
980 | * If the log is partially zeroed, this routine will pass back the blkno | 979 | * If the log is partially zeroed, this routine will pass back the blkno |
981 | * of the first block with cycle number 0. It won't have a complete LR | 980 | * of the first block with cycle number 0. It won't have a complete LR |
982 | * preceding it. | 981 | * preceding it. |
983 | * | 982 | * |
984 | * Return: | 983 | * Return: |
985 | * 0 => the log is completely written to | 984 | * 0 => the log is completely written to |
986 | * -1 => use *blk_no as the first block of the log | 985 | * -1 => use *blk_no as the first block of the log |
987 | * >0 => error has occurred | 986 | * >0 => error has occurred |
988 | */ | 987 | */ |
989 | STATIC int | 988 | STATIC int |
990 | xlog_find_zeroed( | 989 | xlog_find_zeroed( |
991 | xlog_t *log, | 990 | xlog_t *log, |
992 | xfs_daddr_t *blk_no) | 991 | xfs_daddr_t *blk_no) |
993 | { | 992 | { |
994 | xfs_buf_t *bp; | 993 | xfs_buf_t *bp; |
995 | xfs_caddr_t offset; | 994 | xfs_caddr_t offset; |
996 | uint first_cycle, last_cycle; | 995 | uint first_cycle, last_cycle; |
997 | xfs_daddr_t new_blk, last_blk, start_blk; | 996 | xfs_daddr_t new_blk, last_blk, start_blk; |
998 | xfs_daddr_t num_scan_bblks; | 997 | xfs_daddr_t num_scan_bblks; |
999 | int error, log_bbnum = log->l_logBBsize; | 998 | int error, log_bbnum = log->l_logBBsize; |
1000 | 999 | ||
1001 | *blk_no = 0; | 1000 | *blk_no = 0; |
1002 | 1001 | ||
1003 | /* check totally zeroed log */ | 1002 | /* check totally zeroed log */ |
1004 | bp = xlog_get_bp(log, 1); | 1003 | bp = xlog_get_bp(log, 1); |
1005 | if (!bp) | 1004 | if (!bp) |
1006 | return ENOMEM; | 1005 | return ENOMEM; |
1007 | if ((error = xlog_bread(log, 0, 1, bp))) | 1006 | if ((error = xlog_bread(log, 0, 1, bp))) |
1008 | goto bp_err; | 1007 | goto bp_err; |
1009 | offset = xlog_align(log, 0, 1, bp); | 1008 | offset = xlog_align(log, 0, 1, bp); |
1010 | first_cycle = xlog_get_cycle(offset); | 1009 | first_cycle = xlog_get_cycle(offset); |
1011 | if (first_cycle == 0) { /* completely zeroed log */ | 1010 | if (first_cycle == 0) { /* completely zeroed log */ |
1012 | *blk_no = 0; | 1011 | *blk_no = 0; |
1013 | xlog_put_bp(bp); | 1012 | xlog_put_bp(bp); |
1014 | return -1; | 1013 | return -1; |
1015 | } | 1014 | } |
1016 | 1015 | ||
1017 | /* check partially zeroed log */ | 1016 | /* check partially zeroed log */ |
1018 | if ((error = xlog_bread(log, log_bbnum-1, 1, bp))) | 1017 | if ((error = xlog_bread(log, log_bbnum-1, 1, bp))) |
1019 | goto bp_err; | 1018 | goto bp_err; |
1020 | offset = xlog_align(log, log_bbnum-1, 1, bp); | 1019 | offset = xlog_align(log, log_bbnum-1, 1, bp); |
1021 | last_cycle = xlog_get_cycle(offset); | 1020 | last_cycle = xlog_get_cycle(offset); |
1022 | if (last_cycle != 0) { /* log completely written to */ | 1021 | if (last_cycle != 0) { /* log completely written to */ |
1023 | xlog_put_bp(bp); | 1022 | xlog_put_bp(bp); |
1024 | return 0; | 1023 | return 0; |
1025 | } else if (first_cycle != 1) { | 1024 | } else if (first_cycle != 1) { |
1026 | /* | 1025 | /* |
1027 | * If the cycle of the last block is zero, the cycle of | 1026 | * If the cycle of the last block is zero, the cycle of |
1028 | * the first block must be 1. If it's not, maybe we're | 1027 | * the first block must be 1. If it's not, maybe we're |
1029 | * not looking at a log... Bail out. | 1028 | * not looking at a log... Bail out. |
1030 | */ | 1029 | */ |
1031 | xlog_warn("XFS: Log inconsistent or not a log (last==0, first!=1)"); | 1030 | xlog_warn("XFS: Log inconsistent or not a log (last==0, first!=1)"); |
1032 | return XFS_ERROR(EINVAL); | 1031 | return XFS_ERROR(EINVAL); |
1033 | } | 1032 | } |
1034 | 1033 | ||
1035 | /* we have a partially zeroed log */ | 1034 | /* we have a partially zeroed log */ |
1036 | last_blk = log_bbnum-1; | 1035 | last_blk = log_bbnum-1; |
1037 | if ((error = xlog_find_cycle_start(log, bp, 0, &last_blk, 0))) | 1036 | if ((error = xlog_find_cycle_start(log, bp, 0, &last_blk, 0))) |
1038 | goto bp_err; | 1037 | goto bp_err; |
1039 | 1038 | ||
1040 | /* | 1039 | /* |
1041 | * Validate the answer. Because there is no way to guarantee that | 1040 | * Validate the answer. Because there is no way to guarantee that |
1042 | * the entire log is made up of log records which are the same size, | 1041 | * the entire log is made up of log records which are the same size, |
1043 | * we scan over the defined maximum blocks. At this point, the maximum | 1042 | * we scan over the defined maximum blocks. At this point, the maximum |
1044 | * is not chosen to mean anything special. XXXmiken | 1043 | * is not chosen to mean anything special. XXXmiken |
1045 | */ | 1044 | */ |
1046 | num_scan_bblks = XLOG_TOTAL_REC_SHIFT(log); | 1045 | num_scan_bblks = XLOG_TOTAL_REC_SHIFT(log); |
1047 | ASSERT(num_scan_bblks <= INT_MAX); | 1046 | ASSERT(num_scan_bblks <= INT_MAX); |
1048 | 1047 | ||
1049 | if (last_blk < num_scan_bblks) | 1048 | if (last_blk < num_scan_bblks) |
1050 | num_scan_bblks = last_blk; | 1049 | num_scan_bblks = last_blk; |
1051 | start_blk = last_blk - num_scan_bblks; | 1050 | start_blk = last_blk - num_scan_bblks; |
1052 | 1051 | ||
1053 | /* | 1052 | /* |
1054 | * We search for any instances of cycle number 0 that occur before | 1053 | * We search for any instances of cycle number 0 that occur before |
1055 | * our current estimate of the head. What we're trying to detect is | 1054 | * our current estimate of the head. What we're trying to detect is |
1056 | * 1 ... | 0 | 1 | 0... | 1055 | * 1 ... | 0 | 1 | 0... |
1057 | * ^ binary search ends here | 1056 | * ^ binary search ends here |
1058 | */ | 1057 | */ |
1059 | if ((error = xlog_find_verify_cycle(log, start_blk, | 1058 | if ((error = xlog_find_verify_cycle(log, start_blk, |
1060 | (int)num_scan_bblks, 0, &new_blk))) | 1059 | (int)num_scan_bblks, 0, &new_blk))) |
1061 | goto bp_err; | 1060 | goto bp_err; |
1062 | if (new_blk != -1) | 1061 | if (new_blk != -1) |
1063 | last_blk = new_blk; | 1062 | last_blk = new_blk; |
1064 | 1063 | ||
1065 | /* | 1064 | /* |
1066 | * Potentially backup over partial log record write. We don't need | 1065 | * Potentially backup over partial log record write. We don't need |
1067 | * to search the end of the log because we know it is zero. | 1066 | * to search the end of the log because we know it is zero. |
1068 | */ | 1067 | */ |
1069 | if ((error = xlog_find_verify_log_record(log, start_blk, | 1068 | if ((error = xlog_find_verify_log_record(log, start_blk, |
1070 | &last_blk, 0)) == -1) { | 1069 | &last_blk, 0)) == -1) { |
1071 | error = XFS_ERROR(EIO); | 1070 | error = XFS_ERROR(EIO); |
1072 | goto bp_err; | 1071 | goto bp_err; |
1073 | } else if (error) | 1072 | } else if (error) |
1074 | goto bp_err; | 1073 | goto bp_err; |
1075 | 1074 | ||
1076 | *blk_no = last_blk; | 1075 | *blk_no = last_blk; |
1077 | bp_err: | 1076 | bp_err: |
1078 | xlog_put_bp(bp); | 1077 | xlog_put_bp(bp); |
1079 | if (error) | 1078 | if (error) |
1080 | return error; | 1079 | return error; |
1081 | return -1; | 1080 | return -1; |
1082 | } | 1081 | } |
1083 | 1082 | ||
1084 | /* | 1083 | /* |
1085 | * These are simple subroutines used by xlog_clear_stale_blocks() below | 1084 | * These are simple subroutines used by xlog_clear_stale_blocks() below |
1086 | * to initialize a buffer full of empty log record headers and write | 1085 | * to initialize a buffer full of empty log record headers and write |
1087 | * them into the log. | 1086 | * them into the log. |
1088 | */ | 1087 | */ |
1089 | STATIC void | 1088 | STATIC void |
1090 | xlog_add_record( | 1089 | xlog_add_record( |
1091 | xlog_t *log, | 1090 | xlog_t *log, |
1092 | xfs_caddr_t buf, | 1091 | xfs_caddr_t buf, |
1093 | int cycle, | 1092 | int cycle, |
1094 | int block, | 1093 | int block, |
1095 | int tail_cycle, | 1094 | int tail_cycle, |
1096 | int tail_block) | 1095 | int tail_block) |
1097 | { | 1096 | { |
1098 | xlog_rec_header_t *recp = (xlog_rec_header_t *)buf; | 1097 | xlog_rec_header_t *recp = (xlog_rec_header_t *)buf; |
1099 | 1098 | ||
1100 | memset(buf, 0, BBSIZE); | 1099 | memset(buf, 0, BBSIZE); |
1101 | recp->h_magicno = cpu_to_be32(XLOG_HEADER_MAGIC_NUM); | 1100 | recp->h_magicno = cpu_to_be32(XLOG_HEADER_MAGIC_NUM); |
1102 | recp->h_cycle = cpu_to_be32(cycle); | 1101 | recp->h_cycle = cpu_to_be32(cycle); |
1103 | recp->h_version = cpu_to_be32( | 1102 | recp->h_version = cpu_to_be32( |
1104 | xfs_sb_version_haslogv2(&log->l_mp->m_sb) ? 2 : 1); | 1103 | xfs_sb_version_haslogv2(&log->l_mp->m_sb) ? 2 : 1); |
1105 | recp->h_lsn = cpu_to_be64(xlog_assign_lsn(cycle, block)); | 1104 | recp->h_lsn = cpu_to_be64(xlog_assign_lsn(cycle, block)); |
1106 | recp->h_tail_lsn = cpu_to_be64(xlog_assign_lsn(tail_cycle, tail_block)); | 1105 | recp->h_tail_lsn = cpu_to_be64(xlog_assign_lsn(tail_cycle, tail_block)); |
1107 | recp->h_fmt = cpu_to_be32(XLOG_FMT); | 1106 | recp->h_fmt = cpu_to_be32(XLOG_FMT); |
1108 | memcpy(&recp->h_fs_uuid, &log->l_mp->m_sb.sb_uuid, sizeof(uuid_t)); | 1107 | memcpy(&recp->h_fs_uuid, &log->l_mp->m_sb.sb_uuid, sizeof(uuid_t)); |
1109 | } | 1108 | } |
1110 | 1109 | ||
1111 | STATIC int | 1110 | STATIC int |
1112 | xlog_write_log_records( | 1111 | xlog_write_log_records( |
1113 | xlog_t *log, | 1112 | xlog_t *log, |
1114 | int cycle, | 1113 | int cycle, |
1115 | int start_block, | 1114 | int start_block, |
1116 | int blocks, | 1115 | int blocks, |
1117 | int tail_cycle, | 1116 | int tail_cycle, |
1118 | int tail_block) | 1117 | int tail_block) |
1119 | { | 1118 | { |
1120 | xfs_caddr_t offset; | 1119 | xfs_caddr_t offset; |
1121 | xfs_buf_t *bp; | 1120 | xfs_buf_t *bp; |
1122 | int balign, ealign; | 1121 | int balign, ealign; |
1123 | int sectbb = XLOG_SECTOR_ROUNDUP_BBCOUNT(log, 1); | 1122 | int sectbb = XLOG_SECTOR_ROUNDUP_BBCOUNT(log, 1); |
1124 | int end_block = start_block + blocks; | 1123 | int end_block = start_block + blocks; |
1125 | int bufblks; | 1124 | int bufblks; |
1126 | int error = 0; | 1125 | int error = 0; |
1127 | int i, j = 0; | 1126 | int i, j = 0; |
1128 | 1127 | ||
1129 | bufblks = 1 << ffs(blocks); | 1128 | bufblks = 1 << ffs(blocks); |
1130 | while (!(bp = xlog_get_bp(log, bufblks))) { | 1129 | while (!(bp = xlog_get_bp(log, bufblks))) { |
1131 | bufblks >>= 1; | 1130 | bufblks >>= 1; |
1132 | if (bufblks <= log->l_sectbb_log) | 1131 | if (bufblks <= log->l_sectbb_log) |
1133 | return ENOMEM; | 1132 | return ENOMEM; |
1134 | } | 1133 | } |
1135 | 1134 | ||
1136 | /* We may need to do a read at the start to fill in part of | 1135 | /* We may need to do a read at the start to fill in part of |
1137 | * the buffer in the starting sector not covered by the first | 1136 | * the buffer in the starting sector not covered by the first |
1138 | * write below. | 1137 | * write below. |
1139 | */ | 1138 | */ |
1140 | balign = XLOG_SECTOR_ROUNDDOWN_BLKNO(log, start_block); | 1139 | balign = XLOG_SECTOR_ROUNDDOWN_BLKNO(log, start_block); |
1141 | if (balign != start_block) { | 1140 | if (balign != start_block) { |
1142 | if ((error = xlog_bread(log, start_block, 1, bp))) { | 1141 | if ((error = xlog_bread(log, start_block, 1, bp))) { |
1143 | xlog_put_bp(bp); | 1142 | xlog_put_bp(bp); |
1144 | return error; | 1143 | return error; |
1145 | } | 1144 | } |
1146 | j = start_block - balign; | 1145 | j = start_block - balign; |
1147 | } | 1146 | } |
1148 | 1147 | ||
1149 | for (i = start_block; i < end_block; i += bufblks) { | 1148 | for (i = start_block; i < end_block; i += bufblks) { |
1150 | int bcount, endcount; | 1149 | int bcount, endcount; |
1151 | 1150 | ||
1152 | bcount = min(bufblks, end_block - start_block); | 1151 | bcount = min(bufblks, end_block - start_block); |
1153 | endcount = bcount - j; | 1152 | endcount = bcount - j; |
1154 | 1153 | ||
1155 | /* We may need to do a read at the end to fill in part of | 1154 | /* We may need to do a read at the end to fill in part of |
1156 | * the buffer in the final sector not covered by the write. | 1155 | * the buffer in the final sector not covered by the write. |
1157 | * If this is the same sector as the above read, skip it. | 1156 | * If this is the same sector as the above read, skip it. |
1158 | */ | 1157 | */ |
1159 | ealign = XLOG_SECTOR_ROUNDDOWN_BLKNO(log, end_block); | 1158 | ealign = XLOG_SECTOR_ROUNDDOWN_BLKNO(log, end_block); |
1160 | if (j == 0 && (start_block + endcount > ealign)) { | 1159 | if (j == 0 && (start_block + endcount > ealign)) { |
1161 | offset = XFS_BUF_PTR(bp); | 1160 | offset = XFS_BUF_PTR(bp); |
1162 | balign = BBTOB(ealign - start_block); | 1161 | balign = BBTOB(ealign - start_block); |
1163 | error = XFS_BUF_SET_PTR(bp, offset + balign, | 1162 | error = XFS_BUF_SET_PTR(bp, offset + balign, |
1164 | BBTOB(sectbb)); | 1163 | BBTOB(sectbb)); |
1165 | if (!error) | 1164 | if (!error) |
1166 | error = xlog_bread(log, ealign, sectbb, bp); | 1165 | error = xlog_bread(log, ealign, sectbb, bp); |
1167 | if (!error) | 1166 | if (!error) |
1168 | error = XFS_BUF_SET_PTR(bp, offset, bufblks); | 1167 | error = XFS_BUF_SET_PTR(bp, offset, bufblks); |
1169 | if (error) | 1168 | if (error) |
1170 | break; | 1169 | break; |
1171 | } | 1170 | } |
1172 | 1171 | ||
1173 | offset = xlog_align(log, start_block, endcount, bp); | 1172 | offset = xlog_align(log, start_block, endcount, bp); |
1174 | for (; j < endcount; j++) { | 1173 | for (; j < endcount; j++) { |
1175 | xlog_add_record(log, offset, cycle, i+j, | 1174 | xlog_add_record(log, offset, cycle, i+j, |
1176 | tail_cycle, tail_block); | 1175 | tail_cycle, tail_block); |
1177 | offset += BBSIZE; | 1176 | offset += BBSIZE; |
1178 | } | 1177 | } |
1179 | error = xlog_bwrite(log, start_block, endcount, bp); | 1178 | error = xlog_bwrite(log, start_block, endcount, bp); |
1180 | if (error) | 1179 | if (error) |
1181 | break; | 1180 | break; |
1182 | start_block += endcount; | 1181 | start_block += endcount; |
1183 | j = 0; | 1182 | j = 0; |
1184 | } | 1183 | } |
1185 | xlog_put_bp(bp); | 1184 | xlog_put_bp(bp); |
1186 | return error; | 1185 | return error; |
1187 | } | 1186 | } |
1188 | 1187 | ||
1189 | /* | 1188 | /* |
1190 | * This routine is called to blow away any incomplete log writes out | 1189 | * This routine is called to blow away any incomplete log writes out |
1191 | * in front of the log head. We do this so that we won't become confused | 1190 | * in front of the log head. We do this so that we won't become confused |
1192 | * if we come up, write only a little bit more, and then crash again. | 1191 | * if we come up, write only a little bit more, and then crash again. |
1193 | * If we leave the partial log records out there, this situation could | 1192 | * If we leave the partial log records out there, this situation could |
1194 | * cause us to think those partial writes are valid blocks since they | 1193 | * cause us to think those partial writes are valid blocks since they |
1195 | * have the current cycle number. We get rid of them by overwriting them | 1194 | * have the current cycle number. We get rid of them by overwriting them |
1196 | * with empty log records with the old cycle number rather than the | 1195 | * with empty log records with the old cycle number rather than the |
1197 | * current one. | 1196 | * current one. |
1198 | * | 1197 | * |
1199 | * The tail lsn is passed in rather than taken from | 1198 | * The tail lsn is passed in rather than taken from |
1200 | * the log so that we will not write over the unmount record after a | 1199 | * the log so that we will not write over the unmount record after a |
1201 | * clean unmount in a 512 block log. Doing so would leave the log without | 1200 | * clean unmount in a 512 block log. Doing so would leave the log without |
1202 | * any valid log records in it until a new one was written. If we crashed | 1201 | * any valid log records in it until a new one was written. If we crashed |
1203 | * during that time we would not be able to recover. | 1202 | * during that time we would not be able to recover. |
1204 | */ | 1203 | */ |
1205 | STATIC int | 1204 | STATIC int |
1206 | xlog_clear_stale_blocks( | 1205 | xlog_clear_stale_blocks( |
1207 | xlog_t *log, | 1206 | xlog_t *log, |
1208 | xfs_lsn_t tail_lsn) | 1207 | xfs_lsn_t tail_lsn) |
1209 | { | 1208 | { |
1210 | int tail_cycle, head_cycle; | 1209 | int tail_cycle, head_cycle; |
1211 | int tail_block, head_block; | 1210 | int tail_block, head_block; |
1212 | int tail_distance, max_distance; | 1211 | int tail_distance, max_distance; |
1213 | int distance; | 1212 | int distance; |
1214 | int error; | 1213 | int error; |
1215 | 1214 | ||
1216 | tail_cycle = CYCLE_LSN(tail_lsn); | 1215 | tail_cycle = CYCLE_LSN(tail_lsn); |
1217 | tail_block = BLOCK_LSN(tail_lsn); | 1216 | tail_block = BLOCK_LSN(tail_lsn); |
1218 | head_cycle = log->l_curr_cycle; | 1217 | head_cycle = log->l_curr_cycle; |
1219 | head_block = log->l_curr_block; | 1218 | head_block = log->l_curr_block; |
1220 | 1219 | ||
1221 | /* | 1220 | /* |
1222 | * Figure out the distance between the new head of the log | 1221 | * Figure out the distance between the new head of the log |
1223 | * and the tail. We want to write over any blocks beyond the | 1222 | * and the tail. We want to write over any blocks beyond the |
1224 | * head that we may have written just before the crash, but | 1223 | * head that we may have written just before the crash, but |
1225 | * we don't want to overwrite the tail of the log. | 1224 | * we don't want to overwrite the tail of the log. |
1226 | */ | 1225 | */ |
1227 | if (head_cycle == tail_cycle) { | 1226 | if (head_cycle == tail_cycle) { |
1228 | /* | 1227 | /* |
1229 | * The tail is behind the head in the physical log, | 1228 | * The tail is behind the head in the physical log, |
1230 | * so the distance from the head to the tail is the | 1229 | * so the distance from the head to the tail is the |
1231 | * distance from the head to the end of the log plus | 1230 | * distance from the head to the end of the log plus |
1232 | * the distance from the beginning of the log to the | 1231 | * the distance from the beginning of the log to the |
1233 | * tail. | 1232 | * tail. |
1234 | */ | 1233 | */ |
1235 | if (unlikely(head_block < tail_block || head_block >= log->l_logBBsize)) { | 1234 | if (unlikely(head_block < tail_block || head_block >= log->l_logBBsize)) { |
1236 | XFS_ERROR_REPORT("xlog_clear_stale_blocks(1)", | 1235 | XFS_ERROR_REPORT("xlog_clear_stale_blocks(1)", |
1237 | XFS_ERRLEVEL_LOW, log->l_mp); | 1236 | XFS_ERRLEVEL_LOW, log->l_mp); |
1238 | return XFS_ERROR(EFSCORRUPTED); | 1237 | return XFS_ERROR(EFSCORRUPTED); |
1239 | } | 1238 | } |
1240 | tail_distance = tail_block + (log->l_logBBsize - head_block); | 1239 | tail_distance = tail_block + (log->l_logBBsize - head_block); |
1241 | } else { | 1240 | } else { |
1242 | /* | 1241 | /* |
1243 | * The head is behind the tail in the physical log, | 1242 | * The head is behind the tail in the physical log, |
1244 | * so the distance from the head to the tail is just | 1243 | * so the distance from the head to the tail is just |
1245 | * the tail block minus the head block. | 1244 | * the tail block minus the head block. |
1246 | */ | 1245 | */ |
1247 | if (unlikely(head_block >= tail_block || head_cycle != (tail_cycle + 1))){ | 1246 | if (unlikely(head_block >= tail_block || head_cycle != (tail_cycle + 1))){ |
1248 | XFS_ERROR_REPORT("xlog_clear_stale_blocks(2)", | 1247 | XFS_ERROR_REPORT("xlog_clear_stale_blocks(2)", |
1249 | XFS_ERRLEVEL_LOW, log->l_mp); | 1248 | XFS_ERRLEVEL_LOW, log->l_mp); |
1250 | return XFS_ERROR(EFSCORRUPTED); | 1249 | return XFS_ERROR(EFSCORRUPTED); |
1251 | } | 1250 | } |
1252 | tail_distance = tail_block - head_block; | 1251 | tail_distance = tail_block - head_block; |
1253 | } | 1252 | } |
1254 | 1253 | ||
1255 | /* | 1254 | /* |
1256 | * If the head is right up against the tail, we can't clear | 1255 | * If the head is right up against the tail, we can't clear |
1257 | * anything. | 1256 | * anything. |
1258 | */ | 1257 | */ |
1259 | if (tail_distance <= 0) { | 1258 | if (tail_distance <= 0) { |
1260 | ASSERT(tail_distance == 0); | 1259 | ASSERT(tail_distance == 0); |
1261 | return 0; | 1260 | return 0; |
1262 | } | 1261 | } |
1263 | 1262 | ||
1264 | max_distance = XLOG_TOTAL_REC_SHIFT(log); | 1263 | max_distance = XLOG_TOTAL_REC_SHIFT(log); |
1265 | /* | 1264 | /* |
1266 | * Take the smaller of the maximum amount of outstanding I/O | 1265 | * Take the smaller of the maximum amount of outstanding I/O |
1267 | * we could have and the distance to the tail to clear out. | 1266 | * we could have and the distance to the tail to clear out. |
1268 | * We take the smaller so that we don't overwrite the tail and | 1267 | * We take the smaller so that we don't overwrite the tail and |
1269 | * we don't waste all day writing from the head to the tail | 1268 | * we don't waste all day writing from the head to the tail |
1270 | * for no reason. | 1269 | * for no reason. |
1271 | */ | 1270 | */ |
1272 | max_distance = MIN(max_distance, tail_distance); | 1271 | max_distance = MIN(max_distance, tail_distance); |
1273 | 1272 | ||
1274 | if ((head_block + max_distance) <= log->l_logBBsize) { | 1273 | if ((head_block + max_distance) <= log->l_logBBsize) { |
1275 | /* | 1274 | /* |
1276 | * We can stomp all the blocks we need to without | 1275 | * We can stomp all the blocks we need to without |
1277 | * wrapping around the end of the log. Just do it | 1276 | * wrapping around the end of the log. Just do it |
1278 | * in a single write. Use the cycle number of the | 1277 | * in a single write. Use the cycle number of the |
1279 | * current cycle minus one so that the log will look like: | 1278 | * current cycle minus one so that the log will look like: |
1280 | * n ... | n - 1 ... | 1279 | * n ... | n - 1 ... |
1281 | */ | 1280 | */ |
1282 | error = xlog_write_log_records(log, (head_cycle - 1), | 1281 | error = xlog_write_log_records(log, (head_cycle - 1), |
1283 | head_block, max_distance, tail_cycle, | 1282 | head_block, max_distance, tail_cycle, |
1284 | tail_block); | 1283 | tail_block); |
1285 | if (error) | 1284 | if (error) |
1286 | return error; | 1285 | return error; |
1287 | } else { | 1286 | } else { |
1288 | /* | 1287 | /* |
1289 | * We need to wrap around the end of the physical log in | 1288 | * We need to wrap around the end of the physical log in |
1290 | * order to clear all the blocks. Do it in two separate | 1289 | * order to clear all the blocks. Do it in two separate |
1291 | * I/Os. The first write should be from the head to the | 1290 | * I/Os. The first write should be from the head to the |
1292 | * end of the physical log, and it should use the current | 1291 | * end of the physical log, and it should use the current |
1293 | * cycle number minus one just like above. | 1292 | * cycle number minus one just like above. |
1294 | */ | 1293 | */ |
1295 | distance = log->l_logBBsize - head_block; | 1294 | distance = log->l_logBBsize - head_block; |
1296 | error = xlog_write_log_records(log, (head_cycle - 1), | 1295 | error = xlog_write_log_records(log, (head_cycle - 1), |
1297 | head_block, distance, tail_cycle, | 1296 | head_block, distance, tail_cycle, |
1298 | tail_block); | 1297 | tail_block); |
1299 | 1298 | ||
1300 | if (error) | 1299 | if (error) |
1301 | return error; | 1300 | return error; |
1302 | 1301 | ||
1303 | /* | 1302 | /* |
1304 | * Now write the blocks at the start of the physical log. | 1303 | * Now write the blocks at the start of the physical log. |
1305 | * This writes the remainder of the blocks we want to clear. | 1304 | * This writes the remainder of the blocks we want to clear. |
1306 | * It uses the current cycle number since we're now on the | 1305 | * It uses the current cycle number since we're now on the |
1307 | * same cycle as the head so that we get: | 1306 | * same cycle as the head so that we get: |
1308 | * n ... n ... | n - 1 ... | 1307 | * n ... n ... | n - 1 ... |
1309 | * ^^^^^ blocks we're writing | 1308 | * ^^^^^ blocks we're writing |
1310 | */ | 1309 | */ |
1311 | distance = max_distance - (log->l_logBBsize - head_block); | 1310 | distance = max_distance - (log->l_logBBsize - head_block); |
1312 | error = xlog_write_log_records(log, head_cycle, 0, distance, | 1311 | error = xlog_write_log_records(log, head_cycle, 0, distance, |
1313 | tail_cycle, tail_block); | 1312 | tail_cycle, tail_block); |
1314 | if (error) | 1313 | if (error) |
1315 | return error; | 1314 | return error; |
1316 | } | 1315 | } |
1317 | 1316 | ||
1318 | return 0; | 1317 | return 0; |
1319 | } | 1318 | } |
1320 | 1319 | ||
1321 | /****************************************************************************** | 1320 | /****************************************************************************** |
1322 | * | 1321 | * |
1323 | * Log recover routines | 1322 | * Log recover routines |
1324 | * | 1323 | * |
1325 | ****************************************************************************** | 1324 | ****************************************************************************** |
1326 | */ | 1325 | */ |
1327 | 1326 | ||
1328 | STATIC xlog_recover_t * | 1327 | STATIC xlog_recover_t * |
1329 | xlog_recover_find_tid( | 1328 | xlog_recover_find_tid( |
1330 | xlog_recover_t *q, | 1329 | xlog_recover_t *q, |
1331 | xlog_tid_t tid) | 1330 | xlog_tid_t tid) |
1332 | { | 1331 | { |
1333 | xlog_recover_t *p = q; | 1332 | xlog_recover_t *p = q; |
1334 | 1333 | ||
1335 | while (p != NULL) { | 1334 | while (p != NULL) { |
1336 | if (p->r_log_tid == tid) | 1335 | if (p->r_log_tid == tid) |
1337 | break; | 1336 | break; |
1338 | p = p->r_next; | 1337 | p = p->r_next; |
1339 | } | 1338 | } |
1340 | return p; | 1339 | return p; |
1341 | } | 1340 | } |
1342 | 1341 | ||
1343 | STATIC void | 1342 | STATIC void |
1344 | xlog_recover_put_hashq( | 1343 | xlog_recover_put_hashq( |
1345 | xlog_recover_t **q, | 1344 | xlog_recover_t **q, |
1346 | xlog_recover_t *trans) | 1345 | xlog_recover_t *trans) |
1347 | { | 1346 | { |
1348 | trans->r_next = *q; | 1347 | trans->r_next = *q; |
1349 | *q = trans; | 1348 | *q = trans; |
1350 | } | 1349 | } |
1351 | 1350 | ||
1352 | STATIC void | 1351 | STATIC void |
1353 | xlog_recover_add_item( | 1352 | xlog_recover_add_item( |
1354 | xlog_recover_item_t **itemq) | 1353 | xlog_recover_item_t **itemq) |
1355 | { | 1354 | { |
1356 | xlog_recover_item_t *item; | 1355 | xlog_recover_item_t *item; |
1357 | 1356 | ||
1358 | item = kmem_zalloc(sizeof(xlog_recover_item_t), KM_SLEEP); | 1357 | item = kmem_zalloc(sizeof(xlog_recover_item_t), KM_SLEEP); |
1359 | xlog_recover_insert_item_backq(itemq, item); | 1358 | xlog_recover_insert_item_backq(itemq, item); |
1360 | } | 1359 | } |
1361 | 1360 | ||
1362 | STATIC int | 1361 | STATIC int |
1363 | xlog_recover_add_to_cont_trans( | 1362 | xlog_recover_add_to_cont_trans( |
1364 | xlog_recover_t *trans, | 1363 | xlog_recover_t *trans, |
1365 | xfs_caddr_t dp, | 1364 | xfs_caddr_t dp, |
1366 | int len) | 1365 | int len) |
1367 | { | 1366 | { |
1368 | xlog_recover_item_t *item; | 1367 | xlog_recover_item_t *item; |
1369 | xfs_caddr_t ptr, old_ptr; | 1368 | xfs_caddr_t ptr, old_ptr; |
1370 | int old_len; | 1369 | int old_len; |
1371 | 1370 | ||
1372 | item = trans->r_itemq; | 1371 | item = trans->r_itemq; |
1373 | if (item == NULL) { | 1372 | if (item == NULL) { |
1374 | /* finish copying rest of trans header */ | 1373 | /* finish copying rest of trans header */ |
1375 | xlog_recover_add_item(&trans->r_itemq); | 1374 | xlog_recover_add_item(&trans->r_itemq); |
1376 | ptr = (xfs_caddr_t) &trans->r_theader + | 1375 | ptr = (xfs_caddr_t) &trans->r_theader + |
1377 | sizeof(xfs_trans_header_t) - len; | 1376 | sizeof(xfs_trans_header_t) - len; |
1378 | memcpy(ptr, dp, len); /* d, s, l */ | 1377 | memcpy(ptr, dp, len); /* d, s, l */ |
1379 | return 0; | 1378 | return 0; |
1380 | } | 1379 | } |
1381 | item = item->ri_prev; | 1380 | item = item->ri_prev; |
1382 | 1381 | ||
1383 | old_ptr = item->ri_buf[item->ri_cnt-1].i_addr; | 1382 | old_ptr = item->ri_buf[item->ri_cnt-1].i_addr; |
1384 | old_len = item->ri_buf[item->ri_cnt-1].i_len; | 1383 | old_len = item->ri_buf[item->ri_cnt-1].i_len; |
1385 | 1384 | ||
1386 | ptr = kmem_realloc(old_ptr, len+old_len, old_len, 0u); | 1385 | ptr = kmem_realloc(old_ptr, len+old_len, old_len, 0u); |
1387 | memcpy(&ptr[old_len], dp, len); /* d, s, l */ | 1386 | memcpy(&ptr[old_len], dp, len); /* d, s, l */ |
1388 | item->ri_buf[item->ri_cnt-1].i_len += len; | 1387 | item->ri_buf[item->ri_cnt-1].i_len += len; |
1389 | item->ri_buf[item->ri_cnt-1].i_addr = ptr; | 1388 | item->ri_buf[item->ri_cnt-1].i_addr = ptr; |
1390 | return 0; | 1389 | return 0; |
1391 | } | 1390 | } |
1392 | 1391 | ||
1393 | /* | 1392 | /* |
1394 | * The next region to add is the start of a new region. It could be | 1393 | * The next region to add is the start of a new region. It could be |
1395 | * a whole region or it could be the first part of a new region. Because | 1394 | * a whole region or it could be the first part of a new region. Because |
1396 | * of this, the assumption here is that the type and size fields of all | 1395 | * of this, the assumption here is that the type and size fields of all |
1397 | * format structures fit into the first 32 bits of the structure. | 1396 | * format structures fit into the first 32 bits of the structure. |
1398 | * | 1397 | * |
1399 | * This works because all regions must be 32 bit aligned. Therefore, we | 1398 | * This works because all regions must be 32 bit aligned. Therefore, we |
1400 | * either have both fields or we have neither field. In the case we have | 1399 | * either have both fields or we have neither field. In the case we have |
1401 | * neither field, the data part of the region is zero length. We only have | 1400 | * neither field, the data part of the region is zero length. We only have |
1402 | * a log_op_header and can throw away the header since a new one will appear | 1401 | * a log_op_header and can throw away the header since a new one will appear |
1403 | * later. If we have at least 4 bytes, then we can determine how many regions | 1402 | * later. If we have at least 4 bytes, then we can determine how many regions |
1404 | * will appear in the current log item. | 1403 | * will appear in the current log item. |
1405 | */ | 1404 | */ |
1406 | STATIC int | 1405 | STATIC int |
1407 | xlog_recover_add_to_trans( | 1406 | xlog_recover_add_to_trans( |
1408 | xlog_recover_t *trans, | 1407 | xlog_recover_t *trans, |
1409 | xfs_caddr_t dp, | 1408 | xfs_caddr_t dp, |
1410 | int len) | 1409 | int len) |
1411 | { | 1410 | { |
1412 | xfs_inode_log_format_t *in_f; /* any will do */ | 1411 | xfs_inode_log_format_t *in_f; /* any will do */ |
1413 | xlog_recover_item_t *item; | 1412 | xlog_recover_item_t *item; |
1414 | xfs_caddr_t ptr; | 1413 | xfs_caddr_t ptr; |
1415 | 1414 | ||
1416 | if (!len) | 1415 | if (!len) |
1417 | return 0; | 1416 | return 0; |
1418 | item = trans->r_itemq; | 1417 | item = trans->r_itemq; |
1419 | if (item == NULL) { | 1418 | if (item == NULL) { |
1420 | /* we need to catch log corruptions here */ | 1419 | /* we need to catch log corruptions here */ |
1421 | if (*(uint *)dp != XFS_TRANS_HEADER_MAGIC) { | 1420 | if (*(uint *)dp != XFS_TRANS_HEADER_MAGIC) { |
1422 | xlog_warn("XFS: xlog_recover_add_to_trans: " | 1421 | xlog_warn("XFS: xlog_recover_add_to_trans: " |
1423 | "bad header magic number"); | 1422 | "bad header magic number"); |
1424 | ASSERT(0); | 1423 | ASSERT(0); |
1425 | return XFS_ERROR(EIO); | 1424 | return XFS_ERROR(EIO); |
1426 | } | 1425 | } |
1427 | if (len == sizeof(xfs_trans_header_t)) | 1426 | if (len == sizeof(xfs_trans_header_t)) |
1428 | xlog_recover_add_item(&trans->r_itemq); | 1427 | xlog_recover_add_item(&trans->r_itemq); |
1429 | memcpy(&trans->r_theader, dp, len); /* d, s, l */ | 1428 | memcpy(&trans->r_theader, dp, len); /* d, s, l */ |
1430 | return 0; | 1429 | return 0; |
1431 | } | 1430 | } |
1432 | 1431 | ||
1433 | ptr = kmem_alloc(len, KM_SLEEP); | 1432 | ptr = kmem_alloc(len, KM_SLEEP); |
1434 | memcpy(ptr, dp, len); | 1433 | memcpy(ptr, dp, len); |
1435 | in_f = (xfs_inode_log_format_t *)ptr; | 1434 | in_f = (xfs_inode_log_format_t *)ptr; |
1436 | 1435 | ||
1437 | if (item->ri_prev->ri_total != 0 && | 1436 | if (item->ri_prev->ri_total != 0 && |
1438 | item->ri_prev->ri_total == item->ri_prev->ri_cnt) { | 1437 | item->ri_prev->ri_total == item->ri_prev->ri_cnt) { |
1439 | xlog_recover_add_item(&trans->r_itemq); | 1438 | xlog_recover_add_item(&trans->r_itemq); |
1440 | } | 1439 | } |
1441 | item = trans->r_itemq; | 1440 | item = trans->r_itemq; |
1442 | item = item->ri_prev; | 1441 | item = item->ri_prev; |
1443 | 1442 | ||
1444 | if (item->ri_total == 0) { /* first region to be added */ | 1443 | if (item->ri_total == 0) { /* first region to be added */ |
1445 | item->ri_total = in_f->ilf_size; | 1444 | item->ri_total = in_f->ilf_size; |
1446 | ASSERT(item->ri_total <= XLOG_MAX_REGIONS_IN_ITEM); | 1445 | ASSERT(item->ri_total <= XLOG_MAX_REGIONS_IN_ITEM); |
1447 | item->ri_buf = kmem_zalloc((item->ri_total * | 1446 | item->ri_buf = kmem_zalloc((item->ri_total * |
1448 | sizeof(xfs_log_iovec_t)), KM_SLEEP); | 1447 | sizeof(xfs_log_iovec_t)), KM_SLEEP); |
1449 | } | 1448 | } |
1450 | ASSERT(item->ri_total > item->ri_cnt); | 1449 | ASSERT(item->ri_total > item->ri_cnt); |
1451 | /* Description region is ri_buf[0] */ | 1450 | /* Description region is ri_buf[0] */ |
1452 | item->ri_buf[item->ri_cnt].i_addr = ptr; | 1451 | item->ri_buf[item->ri_cnt].i_addr = ptr; |
1453 | item->ri_buf[item->ri_cnt].i_len = len; | 1452 | item->ri_buf[item->ri_cnt].i_len = len; |
1454 | item->ri_cnt++; | 1453 | item->ri_cnt++; |
1455 | return 0; | 1454 | return 0; |
1456 | } | 1455 | } |
1457 | 1456 | ||
1458 | STATIC void | 1457 | STATIC void |
1459 | xlog_recover_new_tid( | 1458 | xlog_recover_new_tid( |
1460 | xlog_recover_t **q, | 1459 | xlog_recover_t **q, |
1461 | xlog_tid_t tid, | 1460 | xlog_tid_t tid, |
1462 | xfs_lsn_t lsn) | 1461 | xfs_lsn_t lsn) |
1463 | { | 1462 | { |
1464 | xlog_recover_t *trans; | 1463 | xlog_recover_t *trans; |
1465 | 1464 | ||
1466 | trans = kmem_zalloc(sizeof(xlog_recover_t), KM_SLEEP); | 1465 | trans = kmem_zalloc(sizeof(xlog_recover_t), KM_SLEEP); |
1467 | trans->r_log_tid = tid; | 1466 | trans->r_log_tid = tid; |
1468 | trans->r_lsn = lsn; | 1467 | trans->r_lsn = lsn; |
1469 | xlog_recover_put_hashq(q, trans); | 1468 | xlog_recover_put_hashq(q, trans); |
1470 | } | 1469 | } |
1471 | 1470 | ||
1472 | STATIC int | 1471 | STATIC int |
1473 | xlog_recover_unlink_tid( | 1472 | xlog_recover_unlink_tid( |
1474 | xlog_recover_t **q, | 1473 | xlog_recover_t **q, |
1475 | xlog_recover_t *trans) | 1474 | xlog_recover_t *trans) |
1476 | { | 1475 | { |
1477 | xlog_recover_t *tp; | 1476 | xlog_recover_t *tp; |
1478 | int found = 0; | 1477 | int found = 0; |
1479 | 1478 | ||
1480 | ASSERT(trans != NULL); | 1479 | ASSERT(trans != NULL); |
1481 | if (trans == *q) { | 1480 | if (trans == *q) { |
1482 | *q = (*q)->r_next; | 1481 | *q = (*q)->r_next; |
1483 | } else { | 1482 | } else { |
1484 | tp = *q; | 1483 | tp = *q; |
1485 | while (tp) { | 1484 | while (tp) { |
1486 | if (tp->r_next == trans) { | 1485 | if (tp->r_next == trans) { |
1487 | found = 1; | 1486 | found = 1; |
1488 | break; | 1487 | break; |
1489 | } | 1488 | } |
1490 | tp = tp->r_next; | 1489 | tp = tp->r_next; |
1491 | } | 1490 | } |
1492 | if (!found) { | 1491 | if (!found) { |
1493 | xlog_warn( | 1492 | xlog_warn( |
1494 | "XFS: xlog_recover_unlink_tid: trans not found"); | 1493 | "XFS: xlog_recover_unlink_tid: trans not found"); |
1495 | ASSERT(0); | 1494 | ASSERT(0); |
1496 | return XFS_ERROR(EIO); | 1495 | return XFS_ERROR(EIO); |
1497 | } | 1496 | } |
1498 | tp->r_next = tp->r_next->r_next; | 1497 | tp->r_next = tp->r_next->r_next; |
1499 | } | 1498 | } |
1500 | return 0; | 1499 | return 0; |
1501 | } | 1500 | } |
1502 | 1501 | ||
1503 | STATIC void | 1502 | STATIC void |
1504 | xlog_recover_insert_item_backq( | 1503 | xlog_recover_insert_item_backq( |
1505 | xlog_recover_item_t **q, | 1504 | xlog_recover_item_t **q, |
1506 | xlog_recover_item_t *item) | 1505 | xlog_recover_item_t *item) |
1507 | { | 1506 | { |
1508 | if (*q == NULL) { | 1507 | if (*q == NULL) { |
1509 | item->ri_prev = item->ri_next = item; | 1508 | item->ri_prev = item->ri_next = item; |
1510 | *q = item; | 1509 | *q = item; |
1511 | } else { | 1510 | } else { |
1512 | item->ri_next = *q; | 1511 | item->ri_next = *q; |
1513 | item->ri_prev = (*q)->ri_prev; | 1512 | item->ri_prev = (*q)->ri_prev; |
1514 | (*q)->ri_prev = item; | 1513 | (*q)->ri_prev = item; |
1515 | item->ri_prev->ri_next = item; | 1514 | item->ri_prev->ri_next = item; |
1516 | } | 1515 | } |
1517 | } | 1516 | } |
1518 | 1517 | ||
1519 | STATIC void | 1518 | STATIC void |
1520 | xlog_recover_insert_item_frontq( | 1519 | xlog_recover_insert_item_frontq( |
1521 | xlog_recover_item_t **q, | 1520 | xlog_recover_item_t **q, |
1522 | xlog_recover_item_t *item) | 1521 | xlog_recover_item_t *item) |
1523 | { | 1522 | { |
1524 | xlog_recover_insert_item_backq(q, item); | 1523 | xlog_recover_insert_item_backq(q, item); |
1525 | *q = item; | 1524 | *q = item; |
1526 | } | 1525 | } |
1527 | 1526 | ||
1528 | STATIC int | 1527 | STATIC int |
1529 | xlog_recover_reorder_trans( | 1528 | xlog_recover_reorder_trans( |
1530 | xlog_recover_t *trans) | 1529 | xlog_recover_t *trans) |
1531 | { | 1530 | { |
1532 | xlog_recover_item_t *first_item, *itemq, *itemq_next; | 1531 | xlog_recover_item_t *first_item, *itemq, *itemq_next; |
1533 | xfs_buf_log_format_t *buf_f; | 1532 | xfs_buf_log_format_t *buf_f; |
1534 | ushort flags = 0; | 1533 | ushort flags = 0; |
1535 | 1534 | ||
1536 | first_item = itemq = trans->r_itemq; | 1535 | first_item = itemq = trans->r_itemq; |
1537 | trans->r_itemq = NULL; | 1536 | trans->r_itemq = NULL; |
1538 | do { | 1537 | do { |
1539 | itemq_next = itemq->ri_next; | 1538 | itemq_next = itemq->ri_next; |
1540 | buf_f = (xfs_buf_log_format_t *)itemq->ri_buf[0].i_addr; | 1539 | buf_f = (xfs_buf_log_format_t *)itemq->ri_buf[0].i_addr; |
1541 | 1540 | ||
1542 | switch (ITEM_TYPE(itemq)) { | 1541 | switch (ITEM_TYPE(itemq)) { |
1543 | case XFS_LI_BUF: | 1542 | case XFS_LI_BUF: |
1544 | flags = buf_f->blf_flags; | 1543 | flags = buf_f->blf_flags; |
1545 | if (!(flags & XFS_BLI_CANCEL)) { | 1544 | if (!(flags & XFS_BLI_CANCEL)) { |
1546 | xlog_recover_insert_item_frontq(&trans->r_itemq, | 1545 | xlog_recover_insert_item_frontq(&trans->r_itemq, |
1547 | itemq); | 1546 | itemq); |
1548 | break; | 1547 | break; |
1549 | } | 1548 | } |
1550 | case XFS_LI_INODE: | 1549 | case XFS_LI_INODE: |
1551 | case XFS_LI_DQUOT: | 1550 | case XFS_LI_DQUOT: |
1552 | case XFS_LI_QUOTAOFF: | 1551 | case XFS_LI_QUOTAOFF: |
1553 | case XFS_LI_EFD: | 1552 | case XFS_LI_EFD: |
1554 | case XFS_LI_EFI: | 1553 | case XFS_LI_EFI: |
1555 | xlog_recover_insert_item_backq(&trans->r_itemq, itemq); | 1554 | xlog_recover_insert_item_backq(&trans->r_itemq, itemq); |
1556 | break; | 1555 | break; |
1557 | default: | 1556 | default: |
1558 | xlog_warn( | 1557 | xlog_warn( |
1559 | "XFS: xlog_recover_reorder_trans: unrecognized type of log operation"); | 1558 | "XFS: xlog_recover_reorder_trans: unrecognized type of log operation"); |
1560 | ASSERT(0); | 1559 | ASSERT(0); |
1561 | return XFS_ERROR(EIO); | 1560 | return XFS_ERROR(EIO); |
1562 | } | 1561 | } |
1563 | itemq = itemq_next; | 1562 | itemq = itemq_next; |
1564 | } while (first_item != itemq); | 1563 | } while (first_item != itemq); |
1565 | return 0; | 1564 | return 0; |
1566 | } | 1565 | } |
1567 | 1566 | ||
1568 | /* | 1567 | /* |
1569 | * Build up the table of buf cancel records so that we don't replay | 1568 | * Build up the table of buf cancel records so that we don't replay |
1570 | * cancelled data in the second pass. For buffer records that are | 1569 | * cancelled data in the second pass. For buffer records that are |
1571 | * not cancel records, there is nothing to do here so we just return. | 1570 | * not cancel records, there is nothing to do here so we just return. |
1572 | * | 1571 | * |
1573 | * If we get a cancel record which is already in the table, this indicates | 1572 | * If we get a cancel record which is already in the table, this indicates |
1574 | * that the buffer was cancelled multiple times. In order to ensure | 1573 | * that the buffer was cancelled multiple times. In order to ensure |
1575 | * that during pass 2 we keep the record in the table until we reach its | 1574 | * that during pass 2 we keep the record in the table until we reach its |
1576 | * last occurrence in the log, we keep a reference count in the cancel | 1575 | * last occurrence in the log, we keep a reference count in the cancel |
1577 | * record in the table to tell us how many times we expect to see this | 1576 | * record in the table to tell us how many times we expect to see this |
1578 | * record during the second pass. | 1577 | * record during the second pass. |
1579 | */ | 1578 | */ |
1580 | STATIC void | 1579 | STATIC void |
1581 | xlog_recover_do_buffer_pass1( | 1580 | xlog_recover_do_buffer_pass1( |
1582 | xlog_t *log, | 1581 | xlog_t *log, |
1583 | xfs_buf_log_format_t *buf_f) | 1582 | xfs_buf_log_format_t *buf_f) |
1584 | { | 1583 | { |
1585 | xfs_buf_cancel_t *bcp; | 1584 | xfs_buf_cancel_t *bcp; |
1586 | xfs_buf_cancel_t *nextp; | 1585 | xfs_buf_cancel_t *nextp; |
1587 | xfs_buf_cancel_t *prevp; | 1586 | xfs_buf_cancel_t *prevp; |
1588 | xfs_buf_cancel_t **bucket; | 1587 | xfs_buf_cancel_t **bucket; |
1589 | xfs_daddr_t blkno = 0; | 1588 | xfs_daddr_t blkno = 0; |
1590 | uint len = 0; | 1589 | uint len = 0; |
1591 | ushort flags = 0; | 1590 | ushort flags = 0; |
1592 | 1591 | ||
1593 | switch (buf_f->blf_type) { | 1592 | switch (buf_f->blf_type) { |
1594 | case XFS_LI_BUF: | 1593 | case XFS_LI_BUF: |
1595 | blkno = buf_f->blf_blkno; | 1594 | blkno = buf_f->blf_blkno; |
1596 | len = buf_f->blf_len; | 1595 | len = buf_f->blf_len; |
1597 | flags = buf_f->blf_flags; | 1596 | flags = buf_f->blf_flags; |
1598 | break; | 1597 | break; |
1599 | } | 1598 | } |
1600 | 1599 | ||
1601 | /* | 1600 | /* |
1602 | * If this isn't a cancel buffer item, then just return. | 1601 | * If this isn't a cancel buffer item, then just return. |
1603 | */ | 1602 | */ |
1604 | if (!(flags & XFS_BLI_CANCEL)) | 1603 | if (!(flags & XFS_BLI_CANCEL)) |
1605 | return; | 1604 | return; |
1606 | 1605 | ||
1607 | /* | 1606 | /* |
1608 | * Insert an xfs_buf_cancel record into the hash table of | 1607 | * Insert an xfs_buf_cancel record into the hash table of |
1609 | * them. If there is already an identical record, bump | 1608 | * them. If there is already an identical record, bump |
1610 | * its reference count. | 1609 | * its reference count. |
1611 | */ | 1610 | */ |
1612 | bucket = &log->l_buf_cancel_table[(__uint64_t)blkno % | 1611 | bucket = &log->l_buf_cancel_table[(__uint64_t)blkno % |
1613 | XLOG_BC_TABLE_SIZE]; | 1612 | XLOG_BC_TABLE_SIZE]; |
1614 | /* | 1613 | /* |
1615 | * If the hash bucket is empty then just insert a new record into | 1614 | * If the hash bucket is empty then just insert a new record into |
1616 | * the bucket. | 1615 | * the bucket. |
1617 | */ | 1616 | */ |
1618 | if (*bucket == NULL) { | 1617 | if (*bucket == NULL) { |
1619 | bcp = (xfs_buf_cancel_t *)kmem_alloc(sizeof(xfs_buf_cancel_t), | 1618 | bcp = (xfs_buf_cancel_t *)kmem_alloc(sizeof(xfs_buf_cancel_t), |
1620 | KM_SLEEP); | 1619 | KM_SLEEP); |
1621 | bcp->bc_blkno = blkno; | 1620 | bcp->bc_blkno = blkno; |
1622 | bcp->bc_len = len; | 1621 | bcp->bc_len = len; |
1623 | bcp->bc_refcount = 1; | 1622 | bcp->bc_refcount = 1; |
1624 | bcp->bc_next = NULL; | 1623 | bcp->bc_next = NULL; |
1625 | *bucket = bcp; | 1624 | *bucket = bcp; |
1626 | return; | 1625 | return; |
1627 | } | 1626 | } |
1628 | 1627 | ||
1629 | /* | 1628 | /* |
1630 | * The hash bucket is not empty, so search for duplicates of our | 1629 | * The hash bucket is not empty, so search for duplicates of our |
1631 | * record. If we find one them just bump its refcount. If not | 1630 | * record. If we find one them just bump its refcount. If not |
1632 | * then add us at the end of the list. | 1631 | * then add us at the end of the list. |
1633 | */ | 1632 | */ |
1634 | prevp = NULL; | 1633 | prevp = NULL; |
1635 | nextp = *bucket; | 1634 | nextp = *bucket; |
1636 | while (nextp != NULL) { | 1635 | while (nextp != NULL) { |
1637 | if (nextp->bc_blkno == blkno && nextp->bc_len == len) { | 1636 | if (nextp->bc_blkno == blkno && nextp->bc_len == len) { |
1638 | nextp->bc_refcount++; | 1637 | nextp->bc_refcount++; |
1639 | return; | 1638 | return; |
1640 | } | 1639 | } |
1641 | prevp = nextp; | 1640 | prevp = nextp; |
1642 | nextp = nextp->bc_next; | 1641 | nextp = nextp->bc_next; |
1643 | } | 1642 | } |
1644 | ASSERT(prevp != NULL); | 1643 | ASSERT(prevp != NULL); |
1645 | bcp = (xfs_buf_cancel_t *)kmem_alloc(sizeof(xfs_buf_cancel_t), | 1644 | bcp = (xfs_buf_cancel_t *)kmem_alloc(sizeof(xfs_buf_cancel_t), |
1646 | KM_SLEEP); | 1645 | KM_SLEEP); |
1647 | bcp->bc_blkno = blkno; | 1646 | bcp->bc_blkno = blkno; |
1648 | bcp->bc_len = len; | 1647 | bcp->bc_len = len; |
1649 | bcp->bc_refcount = 1; | 1648 | bcp->bc_refcount = 1; |
1650 | bcp->bc_next = NULL; | 1649 | bcp->bc_next = NULL; |
1651 | prevp->bc_next = bcp; | 1650 | prevp->bc_next = bcp; |
1652 | } | 1651 | } |
1653 | 1652 | ||
1654 | /* | 1653 | /* |
1655 | * Check to see whether the buffer being recovered has a corresponding | 1654 | * Check to see whether the buffer being recovered has a corresponding |
1656 | * entry in the buffer cancel record table. If it does then return 1 | 1655 | * entry in the buffer cancel record table. If it does then return 1 |
1657 | * so that it will be cancelled, otherwise return 0. If the buffer is | 1656 | * so that it will be cancelled, otherwise return 0. If the buffer is |
1658 | * actually a buffer cancel item (XFS_BLI_CANCEL is set), then decrement | 1657 | * actually a buffer cancel item (XFS_BLI_CANCEL is set), then decrement |
1659 | * the refcount on the entry in the table and remove it from the table | 1658 | * the refcount on the entry in the table and remove it from the table |
1660 | * if this is the last reference. | 1659 | * if this is the last reference. |
1661 | * | 1660 | * |
1662 | * We remove the cancel record from the table when we encounter its | 1661 | * We remove the cancel record from the table when we encounter its |
1663 | * last occurrence in the log so that if the same buffer is re-used | 1662 | * last occurrence in the log so that if the same buffer is re-used |
1664 | * again after its last cancellation we actually replay the changes | 1663 | * again after its last cancellation we actually replay the changes |
1665 | * made at that point. | 1664 | * made at that point. |
1666 | */ | 1665 | */ |
1667 | STATIC int | 1666 | STATIC int |
1668 | xlog_check_buffer_cancelled( | 1667 | xlog_check_buffer_cancelled( |
1669 | xlog_t *log, | 1668 | xlog_t *log, |
1670 | xfs_daddr_t blkno, | 1669 | xfs_daddr_t blkno, |
1671 | uint len, | 1670 | uint len, |
1672 | ushort flags) | 1671 | ushort flags) |
1673 | { | 1672 | { |
1674 | xfs_buf_cancel_t *bcp; | 1673 | xfs_buf_cancel_t *bcp; |
1675 | xfs_buf_cancel_t *prevp; | 1674 | xfs_buf_cancel_t *prevp; |
1676 | xfs_buf_cancel_t **bucket; | 1675 | xfs_buf_cancel_t **bucket; |
1677 | 1676 | ||
1678 | if (log->l_buf_cancel_table == NULL) { | 1677 | if (log->l_buf_cancel_table == NULL) { |
1679 | /* | 1678 | /* |
1680 | * There is nothing in the table built in pass one, | 1679 | * There is nothing in the table built in pass one, |
1681 | * so this buffer must not be cancelled. | 1680 | * so this buffer must not be cancelled. |
1682 | */ | 1681 | */ |
1683 | ASSERT(!(flags & XFS_BLI_CANCEL)); | 1682 | ASSERT(!(flags & XFS_BLI_CANCEL)); |
1684 | return 0; | 1683 | return 0; |
1685 | } | 1684 | } |
1686 | 1685 | ||
1687 | bucket = &log->l_buf_cancel_table[(__uint64_t)blkno % | 1686 | bucket = &log->l_buf_cancel_table[(__uint64_t)blkno % |
1688 | XLOG_BC_TABLE_SIZE]; | 1687 | XLOG_BC_TABLE_SIZE]; |
1689 | bcp = *bucket; | 1688 | bcp = *bucket; |
1690 | if (bcp == NULL) { | 1689 | if (bcp == NULL) { |
1691 | /* | 1690 | /* |
1692 | * There is no corresponding entry in the table built | 1691 | * There is no corresponding entry in the table built |
1693 | * in pass one, so this buffer has not been cancelled. | 1692 | * in pass one, so this buffer has not been cancelled. |
1694 | */ | 1693 | */ |
1695 | ASSERT(!(flags & XFS_BLI_CANCEL)); | 1694 | ASSERT(!(flags & XFS_BLI_CANCEL)); |
1696 | return 0; | 1695 | return 0; |
1697 | } | 1696 | } |
1698 | 1697 | ||
1699 | /* | 1698 | /* |
1700 | * Search for an entry in the buffer cancel table that | 1699 | * Search for an entry in the buffer cancel table that |
1701 | * matches our buffer. | 1700 | * matches our buffer. |
1702 | */ | 1701 | */ |
1703 | prevp = NULL; | 1702 | prevp = NULL; |
1704 | while (bcp != NULL) { | 1703 | while (bcp != NULL) { |
1705 | if (bcp->bc_blkno == blkno && bcp->bc_len == len) { | 1704 | if (bcp->bc_blkno == blkno && bcp->bc_len == len) { |
1706 | /* | 1705 | /* |
1707 | * We've go a match, so return 1 so that the | 1706 | * We've go a match, so return 1 so that the |
1708 | * recovery of this buffer is cancelled. | 1707 | * recovery of this buffer is cancelled. |
1709 | * If this buffer is actually a buffer cancel | 1708 | * If this buffer is actually a buffer cancel |
1710 | * log item, then decrement the refcount on the | 1709 | * log item, then decrement the refcount on the |
1711 | * one in the table and remove it if this is the | 1710 | * one in the table and remove it if this is the |
1712 | * last reference. | 1711 | * last reference. |
1713 | */ | 1712 | */ |
1714 | if (flags & XFS_BLI_CANCEL) { | 1713 | if (flags & XFS_BLI_CANCEL) { |
1715 | bcp->bc_refcount--; | 1714 | bcp->bc_refcount--; |
1716 | if (bcp->bc_refcount == 0) { | 1715 | if (bcp->bc_refcount == 0) { |
1717 | if (prevp == NULL) { | 1716 | if (prevp == NULL) { |
1718 | *bucket = bcp->bc_next; | 1717 | *bucket = bcp->bc_next; |
1719 | } else { | 1718 | } else { |
1720 | prevp->bc_next = bcp->bc_next; | 1719 | prevp->bc_next = bcp->bc_next; |
1721 | } | 1720 | } |
1722 | kmem_free(bcp); | 1721 | kmem_free(bcp); |
1723 | } | 1722 | } |
1724 | } | 1723 | } |
1725 | return 1; | 1724 | return 1; |
1726 | } | 1725 | } |
1727 | prevp = bcp; | 1726 | prevp = bcp; |
1728 | bcp = bcp->bc_next; | 1727 | bcp = bcp->bc_next; |
1729 | } | 1728 | } |
1730 | /* | 1729 | /* |
1731 | * We didn't find a corresponding entry in the table, so | 1730 | * We didn't find a corresponding entry in the table, so |
1732 | * return 0 so that the buffer is NOT cancelled. | 1731 | * return 0 so that the buffer is NOT cancelled. |
1733 | */ | 1732 | */ |
1734 | ASSERT(!(flags & XFS_BLI_CANCEL)); | 1733 | ASSERT(!(flags & XFS_BLI_CANCEL)); |
1735 | return 0; | 1734 | return 0; |
1736 | } | 1735 | } |
1737 | 1736 | ||
1738 | STATIC int | 1737 | STATIC int |
1739 | xlog_recover_do_buffer_pass2( | 1738 | xlog_recover_do_buffer_pass2( |
1740 | xlog_t *log, | 1739 | xlog_t *log, |
1741 | xfs_buf_log_format_t *buf_f) | 1740 | xfs_buf_log_format_t *buf_f) |
1742 | { | 1741 | { |
1743 | xfs_daddr_t blkno = 0; | 1742 | xfs_daddr_t blkno = 0; |
1744 | ushort flags = 0; | 1743 | ushort flags = 0; |
1745 | uint len = 0; | 1744 | uint len = 0; |
1746 | 1745 | ||
1747 | switch (buf_f->blf_type) { | 1746 | switch (buf_f->blf_type) { |
1748 | case XFS_LI_BUF: | 1747 | case XFS_LI_BUF: |
1749 | blkno = buf_f->blf_blkno; | 1748 | blkno = buf_f->blf_blkno; |
1750 | flags = buf_f->blf_flags; | 1749 | flags = buf_f->blf_flags; |
1751 | len = buf_f->blf_len; | 1750 | len = buf_f->blf_len; |
1752 | break; | 1751 | break; |
1753 | } | 1752 | } |
1754 | 1753 | ||
1755 | return xlog_check_buffer_cancelled(log, blkno, len, flags); | 1754 | return xlog_check_buffer_cancelled(log, blkno, len, flags); |
1756 | } | 1755 | } |
1757 | 1756 | ||
1758 | /* | 1757 | /* |
1759 | * Perform recovery for a buffer full of inodes. In these buffers, | 1758 | * Perform recovery for a buffer full of inodes. In these buffers, |
1760 | * the only data which should be recovered is that which corresponds | 1759 | * the only data which should be recovered is that which corresponds |
1761 | * to the di_next_unlinked pointers in the on disk inode structures. | 1760 | * to the di_next_unlinked pointers in the on disk inode structures. |
1762 | * The rest of the data for the inodes is always logged through the | 1761 | * The rest of the data for the inodes is always logged through the |
1763 | * inodes themselves rather than the inode buffer and is recovered | 1762 | * inodes themselves rather than the inode buffer and is recovered |
1764 | * in xlog_recover_do_inode_trans(). | 1763 | * in xlog_recover_do_inode_trans(). |
1765 | * | 1764 | * |
1766 | * The only time when buffers full of inodes are fully recovered is | 1765 | * The only time when buffers full of inodes are fully recovered is |
1767 | * when the buffer is full of newly allocated inodes. In this case | 1766 | * when the buffer is full of newly allocated inodes. In this case |
1768 | * the buffer will not be marked as an inode buffer and so will be | 1767 | * the buffer will not be marked as an inode buffer and so will be |
1769 | * sent to xlog_recover_do_reg_buffer() below during recovery. | 1768 | * sent to xlog_recover_do_reg_buffer() below during recovery. |
1770 | */ | 1769 | */ |
1771 | STATIC int | 1770 | STATIC int |
1772 | xlog_recover_do_inode_buffer( | 1771 | xlog_recover_do_inode_buffer( |
1773 | xfs_mount_t *mp, | 1772 | xfs_mount_t *mp, |
1774 | xlog_recover_item_t *item, | 1773 | xlog_recover_item_t *item, |
1775 | xfs_buf_t *bp, | 1774 | xfs_buf_t *bp, |
1776 | xfs_buf_log_format_t *buf_f) | 1775 | xfs_buf_log_format_t *buf_f) |
1777 | { | 1776 | { |
1778 | int i; | 1777 | int i; |
1779 | int item_index; | 1778 | int item_index; |
1780 | int bit; | 1779 | int bit; |
1781 | int nbits; | 1780 | int nbits; |
1782 | int reg_buf_offset; | 1781 | int reg_buf_offset; |
1783 | int reg_buf_bytes; | 1782 | int reg_buf_bytes; |
1784 | int next_unlinked_offset; | 1783 | int next_unlinked_offset; |
1785 | int inodes_per_buf; | 1784 | int inodes_per_buf; |
1786 | xfs_agino_t *logged_nextp; | 1785 | xfs_agino_t *logged_nextp; |
1787 | xfs_agino_t *buffer_nextp; | 1786 | xfs_agino_t *buffer_nextp; |
1788 | unsigned int *data_map = NULL; | 1787 | unsigned int *data_map = NULL; |
1789 | unsigned int map_size = 0; | 1788 | unsigned int map_size = 0; |
1790 | 1789 | ||
1791 | switch (buf_f->blf_type) { | 1790 | switch (buf_f->blf_type) { |
1792 | case XFS_LI_BUF: | 1791 | case XFS_LI_BUF: |
1793 | data_map = buf_f->blf_data_map; | 1792 | data_map = buf_f->blf_data_map; |
1794 | map_size = buf_f->blf_map_size; | 1793 | map_size = buf_f->blf_map_size; |
1795 | break; | 1794 | break; |
1796 | } | 1795 | } |
1797 | /* | 1796 | /* |
1798 | * Set the variables corresponding to the current region to | 1797 | * Set the variables corresponding to the current region to |
1799 | * 0 so that we'll initialize them on the first pass through | 1798 | * 0 so that we'll initialize them on the first pass through |
1800 | * the loop. | 1799 | * the loop. |
1801 | */ | 1800 | */ |
1802 | reg_buf_offset = 0; | 1801 | reg_buf_offset = 0; |
1803 | reg_buf_bytes = 0; | 1802 | reg_buf_bytes = 0; |
1804 | bit = 0; | 1803 | bit = 0; |
1805 | nbits = 0; | 1804 | nbits = 0; |
1806 | item_index = 0; | 1805 | item_index = 0; |
1807 | inodes_per_buf = XFS_BUF_COUNT(bp) >> mp->m_sb.sb_inodelog; | 1806 | inodes_per_buf = XFS_BUF_COUNT(bp) >> mp->m_sb.sb_inodelog; |
1808 | for (i = 0; i < inodes_per_buf; i++) { | 1807 | for (i = 0; i < inodes_per_buf; i++) { |
1809 | next_unlinked_offset = (i * mp->m_sb.sb_inodesize) + | 1808 | next_unlinked_offset = (i * mp->m_sb.sb_inodesize) + |
1810 | offsetof(xfs_dinode_t, di_next_unlinked); | 1809 | offsetof(xfs_dinode_t, di_next_unlinked); |
1811 | 1810 | ||
1812 | while (next_unlinked_offset >= | 1811 | while (next_unlinked_offset >= |
1813 | (reg_buf_offset + reg_buf_bytes)) { | 1812 | (reg_buf_offset + reg_buf_bytes)) { |
1814 | /* | 1813 | /* |
1815 | * The next di_next_unlinked field is beyond | 1814 | * The next di_next_unlinked field is beyond |
1816 | * the current logged region. Find the next | 1815 | * the current logged region. Find the next |
1817 | * logged region that contains or is beyond | 1816 | * logged region that contains or is beyond |
1818 | * the current di_next_unlinked field. | 1817 | * the current di_next_unlinked field. |
1819 | */ | 1818 | */ |
1820 | bit += nbits; | 1819 | bit += nbits; |
1821 | bit = xfs_next_bit(data_map, map_size, bit); | 1820 | bit = xfs_next_bit(data_map, map_size, bit); |
1822 | 1821 | ||
1823 | /* | 1822 | /* |
1824 | * If there are no more logged regions in the | 1823 | * If there are no more logged regions in the |
1825 | * buffer, then we're done. | 1824 | * buffer, then we're done. |
1826 | */ | 1825 | */ |
1827 | if (bit == -1) { | 1826 | if (bit == -1) { |
1828 | return 0; | 1827 | return 0; |
1829 | } | 1828 | } |
1830 | 1829 | ||
1831 | nbits = xfs_contig_bits(data_map, map_size, | 1830 | nbits = xfs_contig_bits(data_map, map_size, |
1832 | bit); | 1831 | bit); |
1833 | ASSERT(nbits > 0); | 1832 | ASSERT(nbits > 0); |
1834 | reg_buf_offset = bit << XFS_BLI_SHIFT; | 1833 | reg_buf_offset = bit << XFS_BLI_SHIFT; |
1835 | reg_buf_bytes = nbits << XFS_BLI_SHIFT; | 1834 | reg_buf_bytes = nbits << XFS_BLI_SHIFT; |
1836 | item_index++; | 1835 | item_index++; |
1837 | } | 1836 | } |
1838 | 1837 | ||
1839 | /* | 1838 | /* |
1840 | * If the current logged region starts after the current | 1839 | * If the current logged region starts after the current |
1841 | * di_next_unlinked field, then move on to the next | 1840 | * di_next_unlinked field, then move on to the next |
1842 | * di_next_unlinked field. | 1841 | * di_next_unlinked field. |
1843 | */ | 1842 | */ |
1844 | if (next_unlinked_offset < reg_buf_offset) { | 1843 | if (next_unlinked_offset < reg_buf_offset) { |
1845 | continue; | 1844 | continue; |
1846 | } | 1845 | } |
1847 | 1846 | ||
1848 | ASSERT(item->ri_buf[item_index].i_addr != NULL); | 1847 | ASSERT(item->ri_buf[item_index].i_addr != NULL); |
1849 | ASSERT((item->ri_buf[item_index].i_len % XFS_BLI_CHUNK) == 0); | 1848 | ASSERT((item->ri_buf[item_index].i_len % XFS_BLI_CHUNK) == 0); |
1850 | ASSERT((reg_buf_offset + reg_buf_bytes) <= XFS_BUF_COUNT(bp)); | 1849 | ASSERT((reg_buf_offset + reg_buf_bytes) <= XFS_BUF_COUNT(bp)); |
1851 | 1850 | ||
1852 | /* | 1851 | /* |
1853 | * The current logged region contains a copy of the | 1852 | * The current logged region contains a copy of the |
1854 | * current di_next_unlinked field. Extract its value | 1853 | * current di_next_unlinked field. Extract its value |
1855 | * and copy it to the buffer copy. | 1854 | * and copy it to the buffer copy. |
1856 | */ | 1855 | */ |
1857 | logged_nextp = (xfs_agino_t *) | 1856 | logged_nextp = (xfs_agino_t *) |
1858 | ((char *)(item->ri_buf[item_index].i_addr) + | 1857 | ((char *)(item->ri_buf[item_index].i_addr) + |
1859 | (next_unlinked_offset - reg_buf_offset)); | 1858 | (next_unlinked_offset - reg_buf_offset)); |
1860 | if (unlikely(*logged_nextp == 0)) { | 1859 | if (unlikely(*logged_nextp == 0)) { |
1861 | xfs_fs_cmn_err(CE_ALERT, mp, | 1860 | xfs_fs_cmn_err(CE_ALERT, mp, |
1862 | "bad inode buffer log record (ptr = 0x%p, bp = 0x%p). XFS trying to replay bad (0) inode di_next_unlinked field", | 1861 | "bad inode buffer log record (ptr = 0x%p, bp = 0x%p). XFS trying to replay bad (0) inode di_next_unlinked field", |
1863 | item, bp); | 1862 | item, bp); |
1864 | XFS_ERROR_REPORT("xlog_recover_do_inode_buf", | 1863 | XFS_ERROR_REPORT("xlog_recover_do_inode_buf", |
1865 | XFS_ERRLEVEL_LOW, mp); | 1864 | XFS_ERRLEVEL_LOW, mp); |
1866 | return XFS_ERROR(EFSCORRUPTED); | 1865 | return XFS_ERROR(EFSCORRUPTED); |
1867 | } | 1866 | } |
1868 | 1867 | ||
1869 | buffer_nextp = (xfs_agino_t *)xfs_buf_offset(bp, | 1868 | buffer_nextp = (xfs_agino_t *)xfs_buf_offset(bp, |
1870 | next_unlinked_offset); | 1869 | next_unlinked_offset); |
1871 | *buffer_nextp = *logged_nextp; | 1870 | *buffer_nextp = *logged_nextp; |
1872 | } | 1871 | } |
1873 | 1872 | ||
1874 | return 0; | 1873 | return 0; |
1875 | } | 1874 | } |
1876 | 1875 | ||
1877 | /* | 1876 | /* |
1878 | * Perform a 'normal' buffer recovery. Each logged region of the | 1877 | * Perform a 'normal' buffer recovery. Each logged region of the |
1879 | * buffer should be copied over the corresponding region in the | 1878 | * buffer should be copied over the corresponding region in the |
1880 | * given buffer. The bitmap in the buf log format structure indicates | 1879 | * given buffer. The bitmap in the buf log format structure indicates |
1881 | * where to place the logged data. | 1880 | * where to place the logged data. |
1882 | */ | 1881 | */ |
1883 | /*ARGSUSED*/ | 1882 | /*ARGSUSED*/ |
1884 | STATIC void | 1883 | STATIC void |
1885 | xlog_recover_do_reg_buffer( | 1884 | xlog_recover_do_reg_buffer( |
1886 | xlog_recover_item_t *item, | 1885 | xlog_recover_item_t *item, |
1887 | xfs_buf_t *bp, | 1886 | xfs_buf_t *bp, |
1888 | xfs_buf_log_format_t *buf_f) | 1887 | xfs_buf_log_format_t *buf_f) |
1889 | { | 1888 | { |
1890 | int i; | 1889 | int i; |
1891 | int bit; | 1890 | int bit; |
1892 | int nbits; | 1891 | int nbits; |
1893 | unsigned int *data_map = NULL; | 1892 | unsigned int *data_map = NULL; |
1894 | unsigned int map_size = 0; | 1893 | unsigned int map_size = 0; |
1895 | int error; | 1894 | int error; |
1896 | 1895 | ||
1897 | switch (buf_f->blf_type) { | 1896 | switch (buf_f->blf_type) { |
1898 | case XFS_LI_BUF: | 1897 | case XFS_LI_BUF: |
1899 | data_map = buf_f->blf_data_map; | 1898 | data_map = buf_f->blf_data_map; |
1900 | map_size = buf_f->blf_map_size; | 1899 | map_size = buf_f->blf_map_size; |
1901 | break; | 1900 | break; |
1902 | } | 1901 | } |
1903 | bit = 0; | 1902 | bit = 0; |
1904 | i = 1; /* 0 is the buf format structure */ | 1903 | i = 1; /* 0 is the buf format structure */ |
1905 | while (1) { | 1904 | while (1) { |
1906 | bit = xfs_next_bit(data_map, map_size, bit); | 1905 | bit = xfs_next_bit(data_map, map_size, bit); |
1907 | if (bit == -1) | 1906 | if (bit == -1) |
1908 | break; | 1907 | break; |
1909 | nbits = xfs_contig_bits(data_map, map_size, bit); | 1908 | nbits = xfs_contig_bits(data_map, map_size, bit); |
1910 | ASSERT(nbits > 0); | 1909 | ASSERT(nbits > 0); |
1911 | ASSERT(item->ri_buf[i].i_addr != NULL); | 1910 | ASSERT(item->ri_buf[i].i_addr != NULL); |
1912 | ASSERT(item->ri_buf[i].i_len % XFS_BLI_CHUNK == 0); | 1911 | ASSERT(item->ri_buf[i].i_len % XFS_BLI_CHUNK == 0); |
1913 | ASSERT(XFS_BUF_COUNT(bp) >= | 1912 | ASSERT(XFS_BUF_COUNT(bp) >= |
1914 | ((uint)bit << XFS_BLI_SHIFT)+(nbits<<XFS_BLI_SHIFT)); | 1913 | ((uint)bit << XFS_BLI_SHIFT)+(nbits<<XFS_BLI_SHIFT)); |
1915 | 1914 | ||
1916 | /* | 1915 | /* |
1917 | * Do a sanity check if this is a dquot buffer. Just checking | 1916 | * Do a sanity check if this is a dquot buffer. Just checking |
1918 | * the first dquot in the buffer should do. XXXThis is | 1917 | * the first dquot in the buffer should do. XXXThis is |
1919 | * probably a good thing to do for other buf types also. | 1918 | * probably a good thing to do for other buf types also. |
1920 | */ | 1919 | */ |
1921 | error = 0; | 1920 | error = 0; |
1922 | if (buf_f->blf_flags & | 1921 | if (buf_f->blf_flags & |
1923 | (XFS_BLI_UDQUOT_BUF|XFS_BLI_PDQUOT_BUF|XFS_BLI_GDQUOT_BUF)) { | 1922 | (XFS_BLI_UDQUOT_BUF|XFS_BLI_PDQUOT_BUF|XFS_BLI_GDQUOT_BUF)) { |
1924 | error = xfs_qm_dqcheck((xfs_disk_dquot_t *) | 1923 | error = xfs_qm_dqcheck((xfs_disk_dquot_t *) |
1925 | item->ri_buf[i].i_addr, | 1924 | item->ri_buf[i].i_addr, |
1926 | -1, 0, XFS_QMOPT_DOWARN, | 1925 | -1, 0, XFS_QMOPT_DOWARN, |
1927 | "dquot_buf_recover"); | 1926 | "dquot_buf_recover"); |
1928 | } | 1927 | } |
1929 | if (!error) | 1928 | if (!error) |
1930 | memcpy(xfs_buf_offset(bp, | 1929 | memcpy(xfs_buf_offset(bp, |
1931 | (uint)bit << XFS_BLI_SHIFT), /* dest */ | 1930 | (uint)bit << XFS_BLI_SHIFT), /* dest */ |
1932 | item->ri_buf[i].i_addr, /* source */ | 1931 | item->ri_buf[i].i_addr, /* source */ |
1933 | nbits<<XFS_BLI_SHIFT); /* length */ | 1932 | nbits<<XFS_BLI_SHIFT); /* length */ |
1934 | i++; | 1933 | i++; |
1935 | bit += nbits; | 1934 | bit += nbits; |
1936 | } | 1935 | } |
1937 | 1936 | ||
1938 | /* Shouldn't be any more regions */ | 1937 | /* Shouldn't be any more regions */ |
1939 | ASSERT(i == item->ri_total); | 1938 | ASSERT(i == item->ri_total); |
1940 | } | 1939 | } |
1941 | 1940 | ||
1942 | /* | 1941 | /* |
1943 | * Do some primitive error checking on ondisk dquot data structures. | 1942 | * Do some primitive error checking on ondisk dquot data structures. |
1944 | */ | 1943 | */ |
1945 | int | 1944 | int |
1946 | xfs_qm_dqcheck( | 1945 | xfs_qm_dqcheck( |
1947 | xfs_disk_dquot_t *ddq, | 1946 | xfs_disk_dquot_t *ddq, |
1948 | xfs_dqid_t id, | 1947 | xfs_dqid_t id, |
1949 | uint type, /* used only when IO_dorepair is true */ | 1948 | uint type, /* used only when IO_dorepair is true */ |
1950 | uint flags, | 1949 | uint flags, |
1951 | char *str) | 1950 | char *str) |
1952 | { | 1951 | { |
1953 | xfs_dqblk_t *d = (xfs_dqblk_t *)ddq; | 1952 | xfs_dqblk_t *d = (xfs_dqblk_t *)ddq; |
1954 | int errs = 0; | 1953 | int errs = 0; |
1955 | 1954 | ||
1956 | /* | 1955 | /* |
1957 | * We can encounter an uninitialized dquot buffer for 2 reasons: | 1956 | * We can encounter an uninitialized dquot buffer for 2 reasons: |
1958 | * 1. If we crash while deleting the quotainode(s), and those blks got | 1957 | * 1. If we crash while deleting the quotainode(s), and those blks got |
1959 | * used for user data. This is because we take the path of regular | 1958 | * used for user data. This is because we take the path of regular |
1960 | * file deletion; however, the size field of quotainodes is never | 1959 | * file deletion; however, the size field of quotainodes is never |
1961 | * updated, so all the tricks that we play in itruncate_finish | 1960 | * updated, so all the tricks that we play in itruncate_finish |
1962 | * don't quite matter. | 1961 | * don't quite matter. |
1963 | * | 1962 | * |
1964 | * 2. We don't play the quota buffers when there's a quotaoff logitem. | 1963 | * 2. We don't play the quota buffers when there's a quotaoff logitem. |
1965 | * But the allocation will be replayed so we'll end up with an | 1964 | * But the allocation will be replayed so we'll end up with an |
1966 | * uninitialized quota block. | 1965 | * uninitialized quota block. |
1967 | * | 1966 | * |
1968 | * This is all fine; things are still consistent, and we haven't lost | 1967 | * This is all fine; things are still consistent, and we haven't lost |
1969 | * any quota information. Just don't complain about bad dquot blks. | 1968 | * any quota information. Just don't complain about bad dquot blks. |
1970 | */ | 1969 | */ |
1971 | if (be16_to_cpu(ddq->d_magic) != XFS_DQUOT_MAGIC) { | 1970 | if (be16_to_cpu(ddq->d_magic) != XFS_DQUOT_MAGIC) { |
1972 | if (flags & XFS_QMOPT_DOWARN) | 1971 | if (flags & XFS_QMOPT_DOWARN) |
1973 | cmn_err(CE_ALERT, | 1972 | cmn_err(CE_ALERT, |
1974 | "%s : XFS dquot ID 0x%x, magic 0x%x != 0x%x", | 1973 | "%s : XFS dquot ID 0x%x, magic 0x%x != 0x%x", |
1975 | str, id, be16_to_cpu(ddq->d_magic), XFS_DQUOT_MAGIC); | 1974 | str, id, be16_to_cpu(ddq->d_magic), XFS_DQUOT_MAGIC); |
1976 | errs++; | 1975 | errs++; |
1977 | } | 1976 | } |
1978 | if (ddq->d_version != XFS_DQUOT_VERSION) { | 1977 | if (ddq->d_version != XFS_DQUOT_VERSION) { |
1979 | if (flags & XFS_QMOPT_DOWARN) | 1978 | if (flags & XFS_QMOPT_DOWARN) |
1980 | cmn_err(CE_ALERT, | 1979 | cmn_err(CE_ALERT, |
1981 | "%s : XFS dquot ID 0x%x, version 0x%x != 0x%x", | 1980 | "%s : XFS dquot ID 0x%x, version 0x%x != 0x%x", |
1982 | str, id, ddq->d_version, XFS_DQUOT_VERSION); | 1981 | str, id, ddq->d_version, XFS_DQUOT_VERSION); |
1983 | errs++; | 1982 | errs++; |
1984 | } | 1983 | } |
1985 | 1984 | ||
1986 | if (ddq->d_flags != XFS_DQ_USER && | 1985 | if (ddq->d_flags != XFS_DQ_USER && |
1987 | ddq->d_flags != XFS_DQ_PROJ && | 1986 | ddq->d_flags != XFS_DQ_PROJ && |
1988 | ddq->d_flags != XFS_DQ_GROUP) { | 1987 | ddq->d_flags != XFS_DQ_GROUP) { |
1989 | if (flags & XFS_QMOPT_DOWARN) | 1988 | if (flags & XFS_QMOPT_DOWARN) |
1990 | cmn_err(CE_ALERT, | 1989 | cmn_err(CE_ALERT, |
1991 | "%s : XFS dquot ID 0x%x, unknown flags 0x%x", | 1990 | "%s : XFS dquot ID 0x%x, unknown flags 0x%x", |
1992 | str, id, ddq->d_flags); | 1991 | str, id, ddq->d_flags); |
1993 | errs++; | 1992 | errs++; |
1994 | } | 1993 | } |
1995 | 1994 | ||
1996 | if (id != -1 && id != be32_to_cpu(ddq->d_id)) { | 1995 | if (id != -1 && id != be32_to_cpu(ddq->d_id)) { |
1997 | if (flags & XFS_QMOPT_DOWARN) | 1996 | if (flags & XFS_QMOPT_DOWARN) |
1998 | cmn_err(CE_ALERT, | 1997 | cmn_err(CE_ALERT, |
1999 | "%s : ondisk-dquot 0x%p, ID mismatch: " | 1998 | "%s : ondisk-dquot 0x%p, ID mismatch: " |
2000 | "0x%x expected, found id 0x%x", | 1999 | "0x%x expected, found id 0x%x", |
2001 | str, ddq, id, be32_to_cpu(ddq->d_id)); | 2000 | str, ddq, id, be32_to_cpu(ddq->d_id)); |
2002 | errs++; | 2001 | errs++; |
2003 | } | 2002 | } |
2004 | 2003 | ||
2005 | if (!errs && ddq->d_id) { | 2004 | if (!errs && ddq->d_id) { |
2006 | if (ddq->d_blk_softlimit && | 2005 | if (ddq->d_blk_softlimit && |
2007 | be64_to_cpu(ddq->d_bcount) >= | 2006 | be64_to_cpu(ddq->d_bcount) >= |
2008 | be64_to_cpu(ddq->d_blk_softlimit)) { | 2007 | be64_to_cpu(ddq->d_blk_softlimit)) { |
2009 | if (!ddq->d_btimer) { | 2008 | if (!ddq->d_btimer) { |
2010 | if (flags & XFS_QMOPT_DOWARN) | 2009 | if (flags & XFS_QMOPT_DOWARN) |
2011 | cmn_err(CE_ALERT, | 2010 | cmn_err(CE_ALERT, |
2012 | "%s : Dquot ID 0x%x (0x%p) " | 2011 | "%s : Dquot ID 0x%x (0x%p) " |
2013 | "BLK TIMER NOT STARTED", | 2012 | "BLK TIMER NOT STARTED", |
2014 | str, (int)be32_to_cpu(ddq->d_id), ddq); | 2013 | str, (int)be32_to_cpu(ddq->d_id), ddq); |
2015 | errs++; | 2014 | errs++; |
2016 | } | 2015 | } |
2017 | } | 2016 | } |
2018 | if (ddq->d_ino_softlimit && | 2017 | if (ddq->d_ino_softlimit && |
2019 | be64_to_cpu(ddq->d_icount) >= | 2018 | be64_to_cpu(ddq->d_icount) >= |
2020 | be64_to_cpu(ddq->d_ino_softlimit)) { | 2019 | be64_to_cpu(ddq->d_ino_softlimit)) { |
2021 | if (!ddq->d_itimer) { | 2020 | if (!ddq->d_itimer) { |
2022 | if (flags & XFS_QMOPT_DOWARN) | 2021 | if (flags & XFS_QMOPT_DOWARN) |
2023 | cmn_err(CE_ALERT, | 2022 | cmn_err(CE_ALERT, |
2024 | "%s : Dquot ID 0x%x (0x%p) " | 2023 | "%s : Dquot ID 0x%x (0x%p) " |
2025 | "INODE TIMER NOT STARTED", | 2024 | "INODE TIMER NOT STARTED", |
2026 | str, (int)be32_to_cpu(ddq->d_id), ddq); | 2025 | str, (int)be32_to_cpu(ddq->d_id), ddq); |
2027 | errs++; | 2026 | errs++; |
2028 | } | 2027 | } |
2029 | } | 2028 | } |
2030 | if (ddq->d_rtb_softlimit && | 2029 | if (ddq->d_rtb_softlimit && |
2031 | be64_to_cpu(ddq->d_rtbcount) >= | 2030 | be64_to_cpu(ddq->d_rtbcount) >= |
2032 | be64_to_cpu(ddq->d_rtb_softlimit)) { | 2031 | be64_to_cpu(ddq->d_rtb_softlimit)) { |
2033 | if (!ddq->d_rtbtimer) { | 2032 | if (!ddq->d_rtbtimer) { |
2034 | if (flags & XFS_QMOPT_DOWARN) | 2033 | if (flags & XFS_QMOPT_DOWARN) |
2035 | cmn_err(CE_ALERT, | 2034 | cmn_err(CE_ALERT, |
2036 | "%s : Dquot ID 0x%x (0x%p) " | 2035 | "%s : Dquot ID 0x%x (0x%p) " |
2037 | "RTBLK TIMER NOT STARTED", | 2036 | "RTBLK TIMER NOT STARTED", |
2038 | str, (int)be32_to_cpu(ddq->d_id), ddq); | 2037 | str, (int)be32_to_cpu(ddq->d_id), ddq); |
2039 | errs++; | 2038 | errs++; |
2040 | } | 2039 | } |
2041 | } | 2040 | } |
2042 | } | 2041 | } |
2043 | 2042 | ||
2044 | if (!errs || !(flags & XFS_QMOPT_DQREPAIR)) | 2043 | if (!errs || !(flags & XFS_QMOPT_DQREPAIR)) |
2045 | return errs; | 2044 | return errs; |
2046 | 2045 | ||
2047 | if (flags & XFS_QMOPT_DOWARN) | 2046 | if (flags & XFS_QMOPT_DOWARN) |
2048 | cmn_err(CE_NOTE, "Re-initializing dquot ID 0x%x", id); | 2047 | cmn_err(CE_NOTE, "Re-initializing dquot ID 0x%x", id); |
2049 | 2048 | ||
2050 | /* | 2049 | /* |
2051 | * Typically, a repair is only requested by quotacheck. | 2050 | * Typically, a repair is only requested by quotacheck. |
2052 | */ | 2051 | */ |
2053 | ASSERT(id != -1); | 2052 | ASSERT(id != -1); |
2054 | ASSERT(flags & XFS_QMOPT_DQREPAIR); | 2053 | ASSERT(flags & XFS_QMOPT_DQREPAIR); |
2055 | memset(d, 0, sizeof(xfs_dqblk_t)); | 2054 | memset(d, 0, sizeof(xfs_dqblk_t)); |
2056 | 2055 | ||
2057 | d->dd_diskdq.d_magic = cpu_to_be16(XFS_DQUOT_MAGIC); | 2056 | d->dd_diskdq.d_magic = cpu_to_be16(XFS_DQUOT_MAGIC); |
2058 | d->dd_diskdq.d_version = XFS_DQUOT_VERSION; | 2057 | d->dd_diskdq.d_version = XFS_DQUOT_VERSION; |
2059 | d->dd_diskdq.d_flags = type; | 2058 | d->dd_diskdq.d_flags = type; |
2060 | d->dd_diskdq.d_id = cpu_to_be32(id); | 2059 | d->dd_diskdq.d_id = cpu_to_be32(id); |
2061 | 2060 | ||
2062 | return errs; | 2061 | return errs; |
2063 | } | 2062 | } |
2064 | 2063 | ||
2065 | /* | 2064 | /* |
2066 | * Perform a dquot buffer recovery. | 2065 | * Perform a dquot buffer recovery. |
2067 | * Simple algorithm: if we have found a QUOTAOFF logitem of the same type | 2066 | * Simple algorithm: if we have found a QUOTAOFF logitem of the same type |
2068 | * (ie. USR or GRP), then just toss this buffer away; don't recover it. | 2067 | * (ie. USR or GRP), then just toss this buffer away; don't recover it. |
2069 | * Else, treat it as a regular buffer and do recovery. | 2068 | * Else, treat it as a regular buffer and do recovery. |
2070 | */ | 2069 | */ |
2071 | STATIC void | 2070 | STATIC void |
2072 | xlog_recover_do_dquot_buffer( | 2071 | xlog_recover_do_dquot_buffer( |
2073 | xfs_mount_t *mp, | 2072 | xfs_mount_t *mp, |
2074 | xlog_t *log, | 2073 | xlog_t *log, |
2075 | xlog_recover_item_t *item, | 2074 | xlog_recover_item_t *item, |
2076 | xfs_buf_t *bp, | 2075 | xfs_buf_t *bp, |
2077 | xfs_buf_log_format_t *buf_f) | 2076 | xfs_buf_log_format_t *buf_f) |
2078 | { | 2077 | { |
2079 | uint type; | 2078 | uint type; |
2080 | 2079 | ||
2081 | /* | 2080 | /* |
2082 | * Filesystems are required to send in quota flags at mount time. | 2081 | * Filesystems are required to send in quota flags at mount time. |
2083 | */ | 2082 | */ |
2084 | if (mp->m_qflags == 0) { | 2083 | if (mp->m_qflags == 0) { |
2085 | return; | 2084 | return; |
2086 | } | 2085 | } |
2087 | 2086 | ||
2088 | type = 0; | 2087 | type = 0; |
2089 | if (buf_f->blf_flags & XFS_BLI_UDQUOT_BUF) | 2088 | if (buf_f->blf_flags & XFS_BLI_UDQUOT_BUF) |
2090 | type |= XFS_DQ_USER; | 2089 | type |= XFS_DQ_USER; |
2091 | if (buf_f->blf_flags & XFS_BLI_PDQUOT_BUF) | 2090 | if (buf_f->blf_flags & XFS_BLI_PDQUOT_BUF) |
2092 | type |= XFS_DQ_PROJ; | 2091 | type |= XFS_DQ_PROJ; |
2093 | if (buf_f->blf_flags & XFS_BLI_GDQUOT_BUF) | 2092 | if (buf_f->blf_flags & XFS_BLI_GDQUOT_BUF) |
2094 | type |= XFS_DQ_GROUP; | 2093 | type |= XFS_DQ_GROUP; |
2095 | /* | 2094 | /* |
2096 | * This type of quotas was turned off, so ignore this buffer | 2095 | * This type of quotas was turned off, so ignore this buffer |
2097 | */ | 2096 | */ |
2098 | if (log->l_quotaoffs_flag & type) | 2097 | if (log->l_quotaoffs_flag & type) |
2099 | return; | 2098 | return; |
2100 | 2099 | ||
2101 | xlog_recover_do_reg_buffer(item, bp, buf_f); | 2100 | xlog_recover_do_reg_buffer(item, bp, buf_f); |
2102 | } | 2101 | } |
2103 | 2102 | ||
2104 | /* | 2103 | /* |
2105 | * This routine replays a modification made to a buffer at runtime. | 2104 | * This routine replays a modification made to a buffer at runtime. |
2106 | * There are actually two types of buffer, regular and inode, which | 2105 | * There are actually two types of buffer, regular and inode, which |
2107 | * are handled differently. Inode buffers are handled differently | 2106 | * are handled differently. Inode buffers are handled differently |
2108 | * in that we only recover a specific set of data from them, namely | 2107 | * in that we only recover a specific set of data from them, namely |
2109 | * the inode di_next_unlinked fields. This is because all other inode | 2108 | * the inode di_next_unlinked fields. This is because all other inode |
2110 | * data is actually logged via inode records and any data we replay | 2109 | * data is actually logged via inode records and any data we replay |
2111 | * here which overlaps that may be stale. | 2110 | * here which overlaps that may be stale. |
2112 | * | 2111 | * |
2113 | * When meta-data buffers are freed at run time we log a buffer item | 2112 | * When meta-data buffers are freed at run time we log a buffer item |
2114 | * with the XFS_BLI_CANCEL bit set to indicate that previous copies | 2113 | * with the XFS_BLI_CANCEL bit set to indicate that previous copies |
2115 | * of the buffer in the log should not be replayed at recovery time. | 2114 | * of the buffer in the log should not be replayed at recovery time. |
2116 | * This is so that if the blocks covered by the buffer are reused for | 2115 | * This is so that if the blocks covered by the buffer are reused for |
2117 | * file data before we crash we don't end up replaying old, freed | 2116 | * file data before we crash we don't end up replaying old, freed |
2118 | * meta-data into a user's file. | 2117 | * meta-data into a user's file. |
2119 | * | 2118 | * |
2120 | * To handle the cancellation of buffer log items, we make two passes | 2119 | * To handle the cancellation of buffer log items, we make two passes |
2121 | * over the log during recovery. During the first we build a table of | 2120 | * over the log during recovery. During the first we build a table of |
2122 | * those buffers which have been cancelled, and during the second we | 2121 | * those buffers which have been cancelled, and during the second we |
2123 | * only replay those buffers which do not have corresponding cancel | 2122 | * only replay those buffers which do not have corresponding cancel |
2124 | * records in the table. See xlog_recover_do_buffer_pass[1,2] above | 2123 | * records in the table. See xlog_recover_do_buffer_pass[1,2] above |
2125 | * for more details on the implementation of the table of cancel records. | 2124 | * for more details on the implementation of the table of cancel records. |
2126 | */ | 2125 | */ |
2127 | STATIC int | 2126 | STATIC int |
2128 | xlog_recover_do_buffer_trans( | 2127 | xlog_recover_do_buffer_trans( |
2129 | xlog_t *log, | 2128 | xlog_t *log, |
2130 | xlog_recover_item_t *item, | 2129 | xlog_recover_item_t *item, |
2131 | int pass) | 2130 | int pass) |
2132 | { | 2131 | { |
2133 | xfs_buf_log_format_t *buf_f; | 2132 | xfs_buf_log_format_t *buf_f; |
2134 | xfs_mount_t *mp; | 2133 | xfs_mount_t *mp; |
2135 | xfs_buf_t *bp; | 2134 | xfs_buf_t *bp; |
2136 | int error; | 2135 | int error; |
2137 | int cancel; | 2136 | int cancel; |
2138 | xfs_daddr_t blkno; | 2137 | xfs_daddr_t blkno; |
2139 | int len; | 2138 | int len; |
2140 | ushort flags; | 2139 | ushort flags; |
2141 | 2140 | ||
2142 | buf_f = (xfs_buf_log_format_t *)item->ri_buf[0].i_addr; | 2141 | buf_f = (xfs_buf_log_format_t *)item->ri_buf[0].i_addr; |
2143 | 2142 | ||
2144 | if (pass == XLOG_RECOVER_PASS1) { | 2143 | if (pass == XLOG_RECOVER_PASS1) { |
2145 | /* | 2144 | /* |
2146 | * In this pass we're only looking for buf items | 2145 | * In this pass we're only looking for buf items |
2147 | * with the XFS_BLI_CANCEL bit set. | 2146 | * with the XFS_BLI_CANCEL bit set. |
2148 | */ | 2147 | */ |
2149 | xlog_recover_do_buffer_pass1(log, buf_f); | 2148 | xlog_recover_do_buffer_pass1(log, buf_f); |
2150 | return 0; | 2149 | return 0; |
2151 | } else { | 2150 | } else { |
2152 | /* | 2151 | /* |
2153 | * In this pass we want to recover all the buffers | 2152 | * In this pass we want to recover all the buffers |
2154 | * which have not been cancelled and are not | 2153 | * which have not been cancelled and are not |
2155 | * cancellation buffers themselves. The routine | 2154 | * cancellation buffers themselves. The routine |
2156 | * we call here will tell us whether or not to | 2155 | * we call here will tell us whether or not to |
2157 | * continue with the replay of this buffer. | 2156 | * continue with the replay of this buffer. |
2158 | */ | 2157 | */ |
2159 | cancel = xlog_recover_do_buffer_pass2(log, buf_f); | 2158 | cancel = xlog_recover_do_buffer_pass2(log, buf_f); |
2160 | if (cancel) { | 2159 | if (cancel) { |
2161 | return 0; | 2160 | return 0; |
2162 | } | 2161 | } |
2163 | } | 2162 | } |
2164 | switch (buf_f->blf_type) { | 2163 | switch (buf_f->blf_type) { |
2165 | case XFS_LI_BUF: | 2164 | case XFS_LI_BUF: |
2166 | blkno = buf_f->blf_blkno; | 2165 | blkno = buf_f->blf_blkno; |
2167 | len = buf_f->blf_len; | 2166 | len = buf_f->blf_len; |
2168 | flags = buf_f->blf_flags; | 2167 | flags = buf_f->blf_flags; |
2169 | break; | 2168 | break; |
2170 | default: | 2169 | default: |
2171 | xfs_fs_cmn_err(CE_ALERT, log->l_mp, | 2170 | xfs_fs_cmn_err(CE_ALERT, log->l_mp, |
2172 | "xfs_log_recover: unknown buffer type 0x%x, logdev %s", | 2171 | "xfs_log_recover: unknown buffer type 0x%x, logdev %s", |
2173 | buf_f->blf_type, log->l_mp->m_logname ? | 2172 | buf_f->blf_type, log->l_mp->m_logname ? |
2174 | log->l_mp->m_logname : "internal"); | 2173 | log->l_mp->m_logname : "internal"); |
2175 | XFS_ERROR_REPORT("xlog_recover_do_buffer_trans", | 2174 | XFS_ERROR_REPORT("xlog_recover_do_buffer_trans", |
2176 | XFS_ERRLEVEL_LOW, log->l_mp); | 2175 | XFS_ERRLEVEL_LOW, log->l_mp); |
2177 | return XFS_ERROR(EFSCORRUPTED); | 2176 | return XFS_ERROR(EFSCORRUPTED); |
2178 | } | 2177 | } |
2179 | 2178 | ||
2180 | mp = log->l_mp; | 2179 | mp = log->l_mp; |
2181 | if (flags & XFS_BLI_INODE_BUF) { | 2180 | if (flags & XFS_BLI_INODE_BUF) { |
2182 | bp = xfs_buf_read_flags(mp->m_ddev_targp, blkno, len, | 2181 | bp = xfs_buf_read_flags(mp->m_ddev_targp, blkno, len, |
2183 | XFS_BUF_LOCK); | 2182 | XFS_BUF_LOCK); |
2184 | } else { | 2183 | } else { |
2185 | bp = xfs_buf_read(mp->m_ddev_targp, blkno, len, 0); | 2184 | bp = xfs_buf_read(mp->m_ddev_targp, blkno, len, 0); |
2186 | } | 2185 | } |
2187 | if (XFS_BUF_ISERROR(bp)) { | 2186 | if (XFS_BUF_ISERROR(bp)) { |
2188 | xfs_ioerror_alert("xlog_recover_do..(read#1)", log->l_mp, | 2187 | xfs_ioerror_alert("xlog_recover_do..(read#1)", log->l_mp, |
2189 | bp, blkno); | 2188 | bp, blkno); |
2190 | error = XFS_BUF_GETERROR(bp); | 2189 | error = XFS_BUF_GETERROR(bp); |
2191 | xfs_buf_relse(bp); | 2190 | xfs_buf_relse(bp); |
2192 | return error; | 2191 | return error; |
2193 | } | 2192 | } |
2194 | 2193 | ||
2195 | error = 0; | 2194 | error = 0; |
2196 | if (flags & XFS_BLI_INODE_BUF) { | 2195 | if (flags & XFS_BLI_INODE_BUF) { |
2197 | error = xlog_recover_do_inode_buffer(mp, item, bp, buf_f); | 2196 | error = xlog_recover_do_inode_buffer(mp, item, bp, buf_f); |
2198 | } else if (flags & | 2197 | } else if (flags & |
2199 | (XFS_BLI_UDQUOT_BUF|XFS_BLI_PDQUOT_BUF|XFS_BLI_GDQUOT_BUF)) { | 2198 | (XFS_BLI_UDQUOT_BUF|XFS_BLI_PDQUOT_BUF|XFS_BLI_GDQUOT_BUF)) { |
2200 | xlog_recover_do_dquot_buffer(mp, log, item, bp, buf_f); | 2199 | xlog_recover_do_dquot_buffer(mp, log, item, bp, buf_f); |
2201 | } else { | 2200 | } else { |
2202 | xlog_recover_do_reg_buffer(item, bp, buf_f); | 2201 | xlog_recover_do_reg_buffer(item, bp, buf_f); |
2203 | } | 2202 | } |
2204 | if (error) | 2203 | if (error) |
2205 | return XFS_ERROR(error); | 2204 | return XFS_ERROR(error); |
2206 | 2205 | ||
2207 | /* | 2206 | /* |
2208 | * Perform delayed write on the buffer. Asynchronous writes will be | 2207 | * Perform delayed write on the buffer. Asynchronous writes will be |
2209 | * slower when taking into account all the buffers to be flushed. | 2208 | * slower when taking into account all the buffers to be flushed. |
2210 | * | 2209 | * |
2211 | * Also make sure that only inode buffers with good sizes stay in | 2210 | * Also make sure that only inode buffers with good sizes stay in |
2212 | * the buffer cache. The kernel moves inodes in buffers of 1 block | 2211 | * the buffer cache. The kernel moves inodes in buffers of 1 block |
2213 | * or XFS_INODE_CLUSTER_SIZE bytes, whichever is bigger. The inode | 2212 | * or XFS_INODE_CLUSTER_SIZE bytes, whichever is bigger. The inode |
2214 | * buffers in the log can be a different size if the log was generated | 2213 | * buffers in the log can be a different size if the log was generated |
2215 | * by an older kernel using unclustered inode buffers or a newer kernel | 2214 | * by an older kernel using unclustered inode buffers or a newer kernel |
2216 | * running with a different inode cluster size. Regardless, if the | 2215 | * running with a different inode cluster size. Regardless, if the |
2217 | * the inode buffer size isn't MAX(blocksize, XFS_INODE_CLUSTER_SIZE) | 2216 | * the inode buffer size isn't MAX(blocksize, XFS_INODE_CLUSTER_SIZE) |
2218 | * for *our* value of XFS_INODE_CLUSTER_SIZE, then we need to keep | 2217 | * for *our* value of XFS_INODE_CLUSTER_SIZE, then we need to keep |
2219 | * the buffer out of the buffer cache so that the buffer won't | 2218 | * the buffer out of the buffer cache so that the buffer won't |
2220 | * overlap with future reads of those inodes. | 2219 | * overlap with future reads of those inodes. |
2221 | */ | 2220 | */ |
2222 | if (XFS_DINODE_MAGIC == | 2221 | if (XFS_DINODE_MAGIC == |
2223 | be16_to_cpu(*((__be16 *)xfs_buf_offset(bp, 0))) && | 2222 | be16_to_cpu(*((__be16 *)xfs_buf_offset(bp, 0))) && |
2224 | (XFS_BUF_COUNT(bp) != MAX(log->l_mp->m_sb.sb_blocksize, | 2223 | (XFS_BUF_COUNT(bp) != MAX(log->l_mp->m_sb.sb_blocksize, |
2225 | (__uint32_t)XFS_INODE_CLUSTER_SIZE(log->l_mp)))) { | 2224 | (__uint32_t)XFS_INODE_CLUSTER_SIZE(log->l_mp)))) { |
2226 | XFS_BUF_STALE(bp); | 2225 | XFS_BUF_STALE(bp); |
2227 | error = xfs_bwrite(mp, bp); | 2226 | error = xfs_bwrite(mp, bp); |
2228 | } else { | 2227 | } else { |
2229 | ASSERT(XFS_BUF_FSPRIVATE(bp, void *) == NULL || | 2228 | ASSERT(XFS_BUF_FSPRIVATE(bp, void *) == NULL || |
2230 | XFS_BUF_FSPRIVATE(bp, xfs_mount_t *) == mp); | 2229 | XFS_BUF_FSPRIVATE(bp, xfs_mount_t *) == mp); |
2231 | XFS_BUF_SET_FSPRIVATE(bp, mp); | 2230 | XFS_BUF_SET_FSPRIVATE(bp, mp); |
2232 | XFS_BUF_SET_IODONE_FUNC(bp, xlog_recover_iodone); | 2231 | XFS_BUF_SET_IODONE_FUNC(bp, xlog_recover_iodone); |
2233 | xfs_bdwrite(mp, bp); | 2232 | xfs_bdwrite(mp, bp); |
2234 | } | 2233 | } |
2235 | 2234 | ||
2236 | return (error); | 2235 | return (error); |
2237 | } | 2236 | } |
2238 | 2237 | ||
2239 | STATIC int | 2238 | STATIC int |
2240 | xlog_recover_do_inode_trans( | 2239 | xlog_recover_do_inode_trans( |
2241 | xlog_t *log, | 2240 | xlog_t *log, |
2242 | xlog_recover_item_t *item, | 2241 | xlog_recover_item_t *item, |
2243 | int pass) | 2242 | int pass) |
2244 | { | 2243 | { |
2245 | xfs_inode_log_format_t *in_f; | 2244 | xfs_inode_log_format_t *in_f; |
2246 | xfs_mount_t *mp; | 2245 | xfs_mount_t *mp; |
2247 | xfs_buf_t *bp; | 2246 | xfs_buf_t *bp; |
2248 | xfs_dinode_t *dip; | 2247 | xfs_dinode_t *dip; |
2249 | xfs_ino_t ino; | 2248 | xfs_ino_t ino; |
2250 | int len; | 2249 | int len; |
2251 | xfs_caddr_t src; | 2250 | xfs_caddr_t src; |
2252 | xfs_caddr_t dest; | 2251 | xfs_caddr_t dest; |
2253 | int error; | 2252 | int error; |
2254 | int attr_index; | 2253 | int attr_index; |
2255 | uint fields; | 2254 | uint fields; |
2256 | xfs_icdinode_t *dicp; | 2255 | xfs_icdinode_t *dicp; |
2257 | int need_free = 0; | 2256 | int need_free = 0; |
2258 | 2257 | ||
2259 | if (pass == XLOG_RECOVER_PASS1) { | 2258 | if (pass == XLOG_RECOVER_PASS1) { |
2260 | return 0; | 2259 | return 0; |
2261 | } | 2260 | } |
2262 | 2261 | ||
2263 | if (item->ri_buf[0].i_len == sizeof(xfs_inode_log_format_t)) { | 2262 | if (item->ri_buf[0].i_len == sizeof(xfs_inode_log_format_t)) { |
2264 | in_f = (xfs_inode_log_format_t *)item->ri_buf[0].i_addr; | 2263 | in_f = (xfs_inode_log_format_t *)item->ri_buf[0].i_addr; |
2265 | } else { | 2264 | } else { |
2266 | in_f = (xfs_inode_log_format_t *)kmem_alloc( | 2265 | in_f = (xfs_inode_log_format_t *)kmem_alloc( |
2267 | sizeof(xfs_inode_log_format_t), KM_SLEEP); | 2266 | sizeof(xfs_inode_log_format_t), KM_SLEEP); |
2268 | need_free = 1; | 2267 | need_free = 1; |
2269 | error = xfs_inode_item_format_convert(&item->ri_buf[0], in_f); | 2268 | error = xfs_inode_item_format_convert(&item->ri_buf[0], in_f); |
2270 | if (error) | 2269 | if (error) |
2271 | goto error; | 2270 | goto error; |
2272 | } | 2271 | } |
2273 | ino = in_f->ilf_ino; | 2272 | ino = in_f->ilf_ino; |
2274 | mp = log->l_mp; | 2273 | mp = log->l_mp; |
2275 | 2274 | ||
2276 | /* | 2275 | /* |
2277 | * Inode buffers can be freed, look out for it, | 2276 | * Inode buffers can be freed, look out for it, |
2278 | * and do not replay the inode. | 2277 | * and do not replay the inode. |
2279 | */ | 2278 | */ |
2280 | if (xlog_check_buffer_cancelled(log, in_f->ilf_blkno, | 2279 | if (xlog_check_buffer_cancelled(log, in_f->ilf_blkno, |
2281 | in_f->ilf_len, 0)) { | 2280 | in_f->ilf_len, 0)) { |
2282 | error = 0; | 2281 | error = 0; |
2283 | goto error; | 2282 | goto error; |
2284 | } | 2283 | } |
2285 | 2284 | ||
2286 | bp = xfs_buf_read_flags(mp->m_ddev_targp, in_f->ilf_blkno, | 2285 | bp = xfs_buf_read_flags(mp->m_ddev_targp, in_f->ilf_blkno, |
2287 | in_f->ilf_len, XFS_BUF_LOCK); | 2286 | in_f->ilf_len, XFS_BUF_LOCK); |
2288 | if (XFS_BUF_ISERROR(bp)) { | 2287 | if (XFS_BUF_ISERROR(bp)) { |
2289 | xfs_ioerror_alert("xlog_recover_do..(read#2)", mp, | 2288 | xfs_ioerror_alert("xlog_recover_do..(read#2)", mp, |
2290 | bp, in_f->ilf_blkno); | 2289 | bp, in_f->ilf_blkno); |
2291 | error = XFS_BUF_GETERROR(bp); | 2290 | error = XFS_BUF_GETERROR(bp); |
2292 | xfs_buf_relse(bp); | 2291 | xfs_buf_relse(bp); |
2293 | goto error; | 2292 | goto error; |
2294 | } | 2293 | } |
2295 | error = 0; | 2294 | error = 0; |
2296 | ASSERT(in_f->ilf_fields & XFS_ILOG_CORE); | 2295 | ASSERT(in_f->ilf_fields & XFS_ILOG_CORE); |
2297 | dip = (xfs_dinode_t *)xfs_buf_offset(bp, in_f->ilf_boffset); | 2296 | dip = (xfs_dinode_t *)xfs_buf_offset(bp, in_f->ilf_boffset); |
2298 | 2297 | ||
2299 | /* | 2298 | /* |
2300 | * Make sure the place we're flushing out to really looks | 2299 | * Make sure the place we're flushing out to really looks |
2301 | * like an inode! | 2300 | * like an inode! |
2302 | */ | 2301 | */ |
2303 | if (unlikely(be16_to_cpu(dip->di_magic) != XFS_DINODE_MAGIC)) { | 2302 | if (unlikely(be16_to_cpu(dip->di_magic) != XFS_DINODE_MAGIC)) { |
2304 | xfs_buf_relse(bp); | 2303 | xfs_buf_relse(bp); |
2305 | xfs_fs_cmn_err(CE_ALERT, mp, | 2304 | xfs_fs_cmn_err(CE_ALERT, mp, |
2306 | "xfs_inode_recover: Bad inode magic number, dino ptr = 0x%p, dino bp = 0x%p, ino = %Ld", | 2305 | "xfs_inode_recover: Bad inode magic number, dino ptr = 0x%p, dino bp = 0x%p, ino = %Ld", |
2307 | dip, bp, ino); | 2306 | dip, bp, ino); |
2308 | XFS_ERROR_REPORT("xlog_recover_do_inode_trans(1)", | 2307 | XFS_ERROR_REPORT("xlog_recover_do_inode_trans(1)", |
2309 | XFS_ERRLEVEL_LOW, mp); | 2308 | XFS_ERRLEVEL_LOW, mp); |
2310 | error = EFSCORRUPTED; | 2309 | error = EFSCORRUPTED; |
2311 | goto error; | 2310 | goto error; |
2312 | } | 2311 | } |
2313 | dicp = (xfs_icdinode_t *)(item->ri_buf[1].i_addr); | 2312 | dicp = (xfs_icdinode_t *)(item->ri_buf[1].i_addr); |
2314 | if (unlikely(dicp->di_magic != XFS_DINODE_MAGIC)) { | 2313 | if (unlikely(dicp->di_magic != XFS_DINODE_MAGIC)) { |
2315 | xfs_buf_relse(bp); | 2314 | xfs_buf_relse(bp); |
2316 | xfs_fs_cmn_err(CE_ALERT, mp, | 2315 | xfs_fs_cmn_err(CE_ALERT, mp, |
2317 | "xfs_inode_recover: Bad inode log record, rec ptr 0x%p, ino %Ld", | 2316 | "xfs_inode_recover: Bad inode log record, rec ptr 0x%p, ino %Ld", |
2318 | item, ino); | 2317 | item, ino); |
2319 | XFS_ERROR_REPORT("xlog_recover_do_inode_trans(2)", | 2318 | XFS_ERROR_REPORT("xlog_recover_do_inode_trans(2)", |
2320 | XFS_ERRLEVEL_LOW, mp); | 2319 | XFS_ERRLEVEL_LOW, mp); |
2321 | error = EFSCORRUPTED; | 2320 | error = EFSCORRUPTED; |
2322 | goto error; | 2321 | goto error; |
2323 | } | 2322 | } |
2324 | 2323 | ||
2325 | /* Skip replay when the on disk inode is newer than the log one */ | 2324 | /* Skip replay when the on disk inode is newer than the log one */ |
2326 | if (dicp->di_flushiter < be16_to_cpu(dip->di_flushiter)) { | 2325 | if (dicp->di_flushiter < be16_to_cpu(dip->di_flushiter)) { |
2327 | /* | 2326 | /* |
2328 | * Deal with the wrap case, DI_MAX_FLUSH is less | 2327 | * Deal with the wrap case, DI_MAX_FLUSH is less |
2329 | * than smaller numbers | 2328 | * than smaller numbers |
2330 | */ | 2329 | */ |
2331 | if (be16_to_cpu(dip->di_flushiter) == DI_MAX_FLUSH && | 2330 | if (be16_to_cpu(dip->di_flushiter) == DI_MAX_FLUSH && |
2332 | dicp->di_flushiter < (DI_MAX_FLUSH >> 1)) { | 2331 | dicp->di_flushiter < (DI_MAX_FLUSH >> 1)) { |
2333 | /* do nothing */ | 2332 | /* do nothing */ |
2334 | } else { | 2333 | } else { |
2335 | xfs_buf_relse(bp); | 2334 | xfs_buf_relse(bp); |
2336 | error = 0; | 2335 | error = 0; |
2337 | goto error; | 2336 | goto error; |
2338 | } | 2337 | } |
2339 | } | 2338 | } |
2340 | /* Take the opportunity to reset the flush iteration count */ | 2339 | /* Take the opportunity to reset the flush iteration count */ |
2341 | dicp->di_flushiter = 0; | 2340 | dicp->di_flushiter = 0; |
2342 | 2341 | ||
2343 | if (unlikely((dicp->di_mode & S_IFMT) == S_IFREG)) { | 2342 | if (unlikely((dicp->di_mode & S_IFMT) == S_IFREG)) { |
2344 | if ((dicp->di_format != XFS_DINODE_FMT_EXTENTS) && | 2343 | if ((dicp->di_format != XFS_DINODE_FMT_EXTENTS) && |
2345 | (dicp->di_format != XFS_DINODE_FMT_BTREE)) { | 2344 | (dicp->di_format != XFS_DINODE_FMT_BTREE)) { |
2346 | XFS_CORRUPTION_ERROR("xlog_recover_do_inode_trans(3)", | 2345 | XFS_CORRUPTION_ERROR("xlog_recover_do_inode_trans(3)", |
2347 | XFS_ERRLEVEL_LOW, mp, dicp); | 2346 | XFS_ERRLEVEL_LOW, mp, dicp); |
2348 | xfs_buf_relse(bp); | 2347 | xfs_buf_relse(bp); |
2349 | xfs_fs_cmn_err(CE_ALERT, mp, | 2348 | xfs_fs_cmn_err(CE_ALERT, mp, |
2350 | "xfs_inode_recover: Bad regular inode log record, rec ptr 0x%p, ino ptr = 0x%p, ino bp = 0x%p, ino %Ld", | 2349 | "xfs_inode_recover: Bad regular inode log record, rec ptr 0x%p, ino ptr = 0x%p, ino bp = 0x%p, ino %Ld", |
2351 | item, dip, bp, ino); | 2350 | item, dip, bp, ino); |
2352 | error = EFSCORRUPTED; | 2351 | error = EFSCORRUPTED; |
2353 | goto error; | 2352 | goto error; |
2354 | } | 2353 | } |
2355 | } else if (unlikely((dicp->di_mode & S_IFMT) == S_IFDIR)) { | 2354 | } else if (unlikely((dicp->di_mode & S_IFMT) == S_IFDIR)) { |
2356 | if ((dicp->di_format != XFS_DINODE_FMT_EXTENTS) && | 2355 | if ((dicp->di_format != XFS_DINODE_FMT_EXTENTS) && |
2357 | (dicp->di_format != XFS_DINODE_FMT_BTREE) && | 2356 | (dicp->di_format != XFS_DINODE_FMT_BTREE) && |
2358 | (dicp->di_format != XFS_DINODE_FMT_LOCAL)) { | 2357 | (dicp->di_format != XFS_DINODE_FMT_LOCAL)) { |
2359 | XFS_CORRUPTION_ERROR("xlog_recover_do_inode_trans(4)", | 2358 | XFS_CORRUPTION_ERROR("xlog_recover_do_inode_trans(4)", |
2360 | XFS_ERRLEVEL_LOW, mp, dicp); | 2359 | XFS_ERRLEVEL_LOW, mp, dicp); |
2361 | xfs_buf_relse(bp); | 2360 | xfs_buf_relse(bp); |
2362 | xfs_fs_cmn_err(CE_ALERT, mp, | 2361 | xfs_fs_cmn_err(CE_ALERT, mp, |
2363 | "xfs_inode_recover: Bad dir inode log record, rec ptr 0x%p, ino ptr = 0x%p, ino bp = 0x%p, ino %Ld", | 2362 | "xfs_inode_recover: Bad dir inode log record, rec ptr 0x%p, ino ptr = 0x%p, ino bp = 0x%p, ino %Ld", |
2364 | item, dip, bp, ino); | 2363 | item, dip, bp, ino); |
2365 | error = EFSCORRUPTED; | 2364 | error = EFSCORRUPTED; |
2366 | goto error; | 2365 | goto error; |
2367 | } | 2366 | } |
2368 | } | 2367 | } |
2369 | if (unlikely(dicp->di_nextents + dicp->di_anextents > dicp->di_nblocks)){ | 2368 | if (unlikely(dicp->di_nextents + dicp->di_anextents > dicp->di_nblocks)){ |
2370 | XFS_CORRUPTION_ERROR("xlog_recover_do_inode_trans(5)", | 2369 | XFS_CORRUPTION_ERROR("xlog_recover_do_inode_trans(5)", |
2371 | XFS_ERRLEVEL_LOW, mp, dicp); | 2370 | XFS_ERRLEVEL_LOW, mp, dicp); |
2372 | xfs_buf_relse(bp); | 2371 | xfs_buf_relse(bp); |
2373 | xfs_fs_cmn_err(CE_ALERT, mp, | 2372 | xfs_fs_cmn_err(CE_ALERT, mp, |
2374 | "xfs_inode_recover: Bad inode log record, rec ptr 0x%p, dino ptr 0x%p, dino bp 0x%p, ino %Ld, total extents = %d, nblocks = %Ld", | 2373 | "xfs_inode_recover: Bad inode log record, rec ptr 0x%p, dino ptr 0x%p, dino bp 0x%p, ino %Ld, total extents = %d, nblocks = %Ld", |
2375 | item, dip, bp, ino, | 2374 | item, dip, bp, ino, |
2376 | dicp->di_nextents + dicp->di_anextents, | 2375 | dicp->di_nextents + dicp->di_anextents, |
2377 | dicp->di_nblocks); | 2376 | dicp->di_nblocks); |
2378 | error = EFSCORRUPTED; | 2377 | error = EFSCORRUPTED; |
2379 | goto error; | 2378 | goto error; |
2380 | } | 2379 | } |
2381 | if (unlikely(dicp->di_forkoff > mp->m_sb.sb_inodesize)) { | 2380 | if (unlikely(dicp->di_forkoff > mp->m_sb.sb_inodesize)) { |
2382 | XFS_CORRUPTION_ERROR("xlog_recover_do_inode_trans(6)", | 2381 | XFS_CORRUPTION_ERROR("xlog_recover_do_inode_trans(6)", |
2383 | XFS_ERRLEVEL_LOW, mp, dicp); | 2382 | XFS_ERRLEVEL_LOW, mp, dicp); |
2384 | xfs_buf_relse(bp); | 2383 | xfs_buf_relse(bp); |
2385 | xfs_fs_cmn_err(CE_ALERT, mp, | 2384 | xfs_fs_cmn_err(CE_ALERT, mp, |
2386 | "xfs_inode_recover: Bad inode log rec ptr 0x%p, dino ptr 0x%p, dino bp 0x%p, ino %Ld, forkoff 0x%x", | 2385 | "xfs_inode_recover: Bad inode log rec ptr 0x%p, dino ptr 0x%p, dino bp 0x%p, ino %Ld, forkoff 0x%x", |
2387 | item, dip, bp, ino, dicp->di_forkoff); | 2386 | item, dip, bp, ino, dicp->di_forkoff); |
2388 | error = EFSCORRUPTED; | 2387 | error = EFSCORRUPTED; |
2389 | goto error; | 2388 | goto error; |
2390 | } | 2389 | } |
2391 | if (unlikely(item->ri_buf[1].i_len > sizeof(struct xfs_icdinode))) { | 2390 | if (unlikely(item->ri_buf[1].i_len > sizeof(struct xfs_icdinode))) { |
2392 | XFS_CORRUPTION_ERROR("xlog_recover_do_inode_trans(7)", | 2391 | XFS_CORRUPTION_ERROR("xlog_recover_do_inode_trans(7)", |
2393 | XFS_ERRLEVEL_LOW, mp, dicp); | 2392 | XFS_ERRLEVEL_LOW, mp, dicp); |
2394 | xfs_buf_relse(bp); | 2393 | xfs_buf_relse(bp); |
2395 | xfs_fs_cmn_err(CE_ALERT, mp, | 2394 | xfs_fs_cmn_err(CE_ALERT, mp, |
2396 | "xfs_inode_recover: Bad inode log record length %d, rec ptr 0x%p", | 2395 | "xfs_inode_recover: Bad inode log record length %d, rec ptr 0x%p", |
2397 | item->ri_buf[1].i_len, item); | 2396 | item->ri_buf[1].i_len, item); |
2398 | error = EFSCORRUPTED; | 2397 | error = EFSCORRUPTED; |
2399 | goto error; | 2398 | goto error; |
2400 | } | 2399 | } |
2401 | 2400 | ||
2402 | /* The core is in in-core format */ | 2401 | /* The core is in in-core format */ |
2403 | xfs_dinode_to_disk(dip, (xfs_icdinode_t *)item->ri_buf[1].i_addr); | 2402 | xfs_dinode_to_disk(dip, (xfs_icdinode_t *)item->ri_buf[1].i_addr); |
2404 | 2403 | ||
2405 | /* the rest is in on-disk format */ | 2404 | /* the rest is in on-disk format */ |
2406 | if (item->ri_buf[1].i_len > sizeof(struct xfs_icdinode)) { | 2405 | if (item->ri_buf[1].i_len > sizeof(struct xfs_icdinode)) { |
2407 | memcpy((xfs_caddr_t) dip + sizeof(struct xfs_icdinode), | 2406 | memcpy((xfs_caddr_t) dip + sizeof(struct xfs_icdinode), |
2408 | item->ri_buf[1].i_addr + sizeof(struct xfs_icdinode), | 2407 | item->ri_buf[1].i_addr + sizeof(struct xfs_icdinode), |
2409 | item->ri_buf[1].i_len - sizeof(struct xfs_icdinode)); | 2408 | item->ri_buf[1].i_len - sizeof(struct xfs_icdinode)); |
2410 | } | 2409 | } |
2411 | 2410 | ||
2412 | fields = in_f->ilf_fields; | 2411 | fields = in_f->ilf_fields; |
2413 | switch (fields & (XFS_ILOG_DEV | XFS_ILOG_UUID)) { | 2412 | switch (fields & (XFS_ILOG_DEV | XFS_ILOG_UUID)) { |
2414 | case XFS_ILOG_DEV: | 2413 | case XFS_ILOG_DEV: |
2415 | xfs_dinode_put_rdev(dip, in_f->ilf_u.ilfu_rdev); | 2414 | xfs_dinode_put_rdev(dip, in_f->ilf_u.ilfu_rdev); |
2416 | break; | 2415 | break; |
2417 | case XFS_ILOG_UUID: | 2416 | case XFS_ILOG_UUID: |
2418 | memcpy(XFS_DFORK_DPTR(dip), | 2417 | memcpy(XFS_DFORK_DPTR(dip), |
2419 | &in_f->ilf_u.ilfu_uuid, | 2418 | &in_f->ilf_u.ilfu_uuid, |
2420 | sizeof(uuid_t)); | 2419 | sizeof(uuid_t)); |
2421 | break; | 2420 | break; |
2422 | } | 2421 | } |
2423 | 2422 | ||
2424 | if (in_f->ilf_size == 2) | 2423 | if (in_f->ilf_size == 2) |
2425 | goto write_inode_buffer; | 2424 | goto write_inode_buffer; |
2426 | len = item->ri_buf[2].i_len; | 2425 | len = item->ri_buf[2].i_len; |
2427 | src = item->ri_buf[2].i_addr; | 2426 | src = item->ri_buf[2].i_addr; |
2428 | ASSERT(in_f->ilf_size <= 4); | 2427 | ASSERT(in_f->ilf_size <= 4); |
2429 | ASSERT((in_f->ilf_size == 3) || (fields & XFS_ILOG_AFORK)); | 2428 | ASSERT((in_f->ilf_size == 3) || (fields & XFS_ILOG_AFORK)); |
2430 | ASSERT(!(fields & XFS_ILOG_DFORK) || | 2429 | ASSERT(!(fields & XFS_ILOG_DFORK) || |
2431 | (len == in_f->ilf_dsize)); | 2430 | (len == in_f->ilf_dsize)); |
2432 | 2431 | ||
2433 | switch (fields & XFS_ILOG_DFORK) { | 2432 | switch (fields & XFS_ILOG_DFORK) { |
2434 | case XFS_ILOG_DDATA: | 2433 | case XFS_ILOG_DDATA: |
2435 | case XFS_ILOG_DEXT: | 2434 | case XFS_ILOG_DEXT: |
2436 | memcpy(XFS_DFORK_DPTR(dip), src, len); | 2435 | memcpy(XFS_DFORK_DPTR(dip), src, len); |
2437 | break; | 2436 | break; |
2438 | 2437 | ||
2439 | case XFS_ILOG_DBROOT: | 2438 | case XFS_ILOG_DBROOT: |
2440 | xfs_bmbt_to_bmdr(mp, (struct xfs_btree_block *)src, len, | 2439 | xfs_bmbt_to_bmdr(mp, (struct xfs_btree_block *)src, len, |
2441 | (xfs_bmdr_block_t *)XFS_DFORK_DPTR(dip), | 2440 | (xfs_bmdr_block_t *)XFS_DFORK_DPTR(dip), |
2442 | XFS_DFORK_DSIZE(dip, mp)); | 2441 | XFS_DFORK_DSIZE(dip, mp)); |
2443 | break; | 2442 | break; |
2444 | 2443 | ||
2445 | default: | 2444 | default: |
2446 | /* | 2445 | /* |
2447 | * There are no data fork flags set. | 2446 | * There are no data fork flags set. |
2448 | */ | 2447 | */ |
2449 | ASSERT((fields & XFS_ILOG_DFORK) == 0); | 2448 | ASSERT((fields & XFS_ILOG_DFORK) == 0); |
2450 | break; | 2449 | break; |
2451 | } | 2450 | } |
2452 | 2451 | ||
2453 | /* | 2452 | /* |
2454 | * If we logged any attribute data, recover it. There may or | 2453 | * If we logged any attribute data, recover it. There may or |
2455 | * may not have been any other non-core data logged in this | 2454 | * may not have been any other non-core data logged in this |
2456 | * transaction. | 2455 | * transaction. |
2457 | */ | 2456 | */ |
2458 | if (in_f->ilf_fields & XFS_ILOG_AFORK) { | 2457 | if (in_f->ilf_fields & XFS_ILOG_AFORK) { |
2459 | if (in_f->ilf_fields & XFS_ILOG_DFORK) { | 2458 | if (in_f->ilf_fields & XFS_ILOG_DFORK) { |
2460 | attr_index = 3; | 2459 | attr_index = 3; |
2461 | } else { | 2460 | } else { |
2462 | attr_index = 2; | 2461 | attr_index = 2; |
2463 | } | 2462 | } |
2464 | len = item->ri_buf[attr_index].i_len; | 2463 | len = item->ri_buf[attr_index].i_len; |
2465 | src = item->ri_buf[attr_index].i_addr; | 2464 | src = item->ri_buf[attr_index].i_addr; |
2466 | ASSERT(len == in_f->ilf_asize); | 2465 | ASSERT(len == in_f->ilf_asize); |
2467 | 2466 | ||
2468 | switch (in_f->ilf_fields & XFS_ILOG_AFORK) { | 2467 | switch (in_f->ilf_fields & XFS_ILOG_AFORK) { |
2469 | case XFS_ILOG_ADATA: | 2468 | case XFS_ILOG_ADATA: |
2470 | case XFS_ILOG_AEXT: | 2469 | case XFS_ILOG_AEXT: |
2471 | dest = XFS_DFORK_APTR(dip); | 2470 | dest = XFS_DFORK_APTR(dip); |
2472 | ASSERT(len <= XFS_DFORK_ASIZE(dip, mp)); | 2471 | ASSERT(len <= XFS_DFORK_ASIZE(dip, mp)); |
2473 | memcpy(dest, src, len); | 2472 | memcpy(dest, src, len); |
2474 | break; | 2473 | break; |
2475 | 2474 | ||
2476 | case XFS_ILOG_ABROOT: | 2475 | case XFS_ILOG_ABROOT: |
2477 | dest = XFS_DFORK_APTR(dip); | 2476 | dest = XFS_DFORK_APTR(dip); |
2478 | xfs_bmbt_to_bmdr(mp, (struct xfs_btree_block *)src, | 2477 | xfs_bmbt_to_bmdr(mp, (struct xfs_btree_block *)src, |
2479 | len, (xfs_bmdr_block_t*)dest, | 2478 | len, (xfs_bmdr_block_t*)dest, |
2480 | XFS_DFORK_ASIZE(dip, mp)); | 2479 | XFS_DFORK_ASIZE(dip, mp)); |
2481 | break; | 2480 | break; |
2482 | 2481 | ||
2483 | default: | 2482 | default: |
2484 | xlog_warn("XFS: xlog_recover_do_inode_trans: Invalid flag"); | 2483 | xlog_warn("XFS: xlog_recover_do_inode_trans: Invalid flag"); |
2485 | ASSERT(0); | 2484 | ASSERT(0); |
2486 | xfs_buf_relse(bp); | 2485 | xfs_buf_relse(bp); |
2487 | error = EIO; | 2486 | error = EIO; |
2488 | goto error; | 2487 | goto error; |
2489 | } | 2488 | } |
2490 | } | 2489 | } |
2491 | 2490 | ||
2492 | write_inode_buffer: | 2491 | write_inode_buffer: |
2493 | if (ITEM_TYPE(item) == XFS_LI_INODE) { | 2492 | if (ITEM_TYPE(item) == XFS_LI_INODE) { |
2494 | ASSERT(XFS_BUF_FSPRIVATE(bp, void *) == NULL || | 2493 | ASSERT(XFS_BUF_FSPRIVATE(bp, void *) == NULL || |
2495 | XFS_BUF_FSPRIVATE(bp, xfs_mount_t *) == mp); | 2494 | XFS_BUF_FSPRIVATE(bp, xfs_mount_t *) == mp); |
2496 | XFS_BUF_SET_FSPRIVATE(bp, mp); | 2495 | XFS_BUF_SET_FSPRIVATE(bp, mp); |
2497 | XFS_BUF_SET_IODONE_FUNC(bp, xlog_recover_iodone); | 2496 | XFS_BUF_SET_IODONE_FUNC(bp, xlog_recover_iodone); |
2498 | xfs_bdwrite(mp, bp); | 2497 | xfs_bdwrite(mp, bp); |
2499 | } else { | 2498 | } else { |
2500 | XFS_BUF_STALE(bp); | 2499 | XFS_BUF_STALE(bp); |
2501 | error = xfs_bwrite(mp, bp); | 2500 | error = xfs_bwrite(mp, bp); |
2502 | } | 2501 | } |
2503 | 2502 | ||
2504 | error: | 2503 | error: |
2505 | if (need_free) | 2504 | if (need_free) |
2506 | kmem_free(in_f); | 2505 | kmem_free(in_f); |
2507 | return XFS_ERROR(error); | 2506 | return XFS_ERROR(error); |
2508 | } | 2507 | } |
2509 | 2508 | ||
2510 | /* | 2509 | /* |
2511 | * Recover QUOTAOFF records. We simply make a note of it in the xlog_t | 2510 | * Recover QUOTAOFF records. We simply make a note of it in the xlog_t |
2512 | * structure, so that we know not to do any dquot item or dquot buffer recovery, | 2511 | * structure, so that we know not to do any dquot item or dquot buffer recovery, |
2513 | * of that type. | 2512 | * of that type. |
2514 | */ | 2513 | */ |
2515 | STATIC int | 2514 | STATIC int |
2516 | xlog_recover_do_quotaoff_trans( | 2515 | xlog_recover_do_quotaoff_trans( |
2517 | xlog_t *log, | 2516 | xlog_t *log, |
2518 | xlog_recover_item_t *item, | 2517 | xlog_recover_item_t *item, |
2519 | int pass) | 2518 | int pass) |
2520 | { | 2519 | { |
2521 | xfs_qoff_logformat_t *qoff_f; | 2520 | xfs_qoff_logformat_t *qoff_f; |
2522 | 2521 | ||
2523 | if (pass == XLOG_RECOVER_PASS2) { | 2522 | if (pass == XLOG_RECOVER_PASS2) { |
2524 | return (0); | 2523 | return (0); |
2525 | } | 2524 | } |
2526 | 2525 | ||
2527 | qoff_f = (xfs_qoff_logformat_t *)item->ri_buf[0].i_addr; | 2526 | qoff_f = (xfs_qoff_logformat_t *)item->ri_buf[0].i_addr; |
2528 | ASSERT(qoff_f); | 2527 | ASSERT(qoff_f); |
2529 | 2528 | ||
2530 | /* | 2529 | /* |
2531 | * The logitem format's flag tells us if this was user quotaoff, | 2530 | * The logitem format's flag tells us if this was user quotaoff, |
2532 | * group/project quotaoff or both. | 2531 | * group/project quotaoff or both. |
2533 | */ | 2532 | */ |
2534 | if (qoff_f->qf_flags & XFS_UQUOTA_ACCT) | 2533 | if (qoff_f->qf_flags & XFS_UQUOTA_ACCT) |
2535 | log->l_quotaoffs_flag |= XFS_DQ_USER; | 2534 | log->l_quotaoffs_flag |= XFS_DQ_USER; |
2536 | if (qoff_f->qf_flags & XFS_PQUOTA_ACCT) | 2535 | if (qoff_f->qf_flags & XFS_PQUOTA_ACCT) |
2537 | log->l_quotaoffs_flag |= XFS_DQ_PROJ; | 2536 | log->l_quotaoffs_flag |= XFS_DQ_PROJ; |
2538 | if (qoff_f->qf_flags & XFS_GQUOTA_ACCT) | 2537 | if (qoff_f->qf_flags & XFS_GQUOTA_ACCT) |
2539 | log->l_quotaoffs_flag |= XFS_DQ_GROUP; | 2538 | log->l_quotaoffs_flag |= XFS_DQ_GROUP; |
2540 | 2539 | ||
2541 | return (0); | 2540 | return (0); |
2542 | } | 2541 | } |
2543 | 2542 | ||
2544 | /* | 2543 | /* |
2545 | * Recover a dquot record | 2544 | * Recover a dquot record |
2546 | */ | 2545 | */ |
2547 | STATIC int | 2546 | STATIC int |
2548 | xlog_recover_do_dquot_trans( | 2547 | xlog_recover_do_dquot_trans( |
2549 | xlog_t *log, | 2548 | xlog_t *log, |
2550 | xlog_recover_item_t *item, | 2549 | xlog_recover_item_t *item, |
2551 | int pass) | 2550 | int pass) |
2552 | { | 2551 | { |
2553 | xfs_mount_t *mp; | 2552 | xfs_mount_t *mp; |
2554 | xfs_buf_t *bp; | 2553 | xfs_buf_t *bp; |
2555 | struct xfs_disk_dquot *ddq, *recddq; | 2554 | struct xfs_disk_dquot *ddq, *recddq; |
2556 | int error; | 2555 | int error; |
2557 | xfs_dq_logformat_t *dq_f; | 2556 | xfs_dq_logformat_t *dq_f; |
2558 | uint type; | 2557 | uint type; |
2559 | 2558 | ||
2560 | if (pass == XLOG_RECOVER_PASS1) { | 2559 | if (pass == XLOG_RECOVER_PASS1) { |
2561 | return 0; | 2560 | return 0; |
2562 | } | 2561 | } |
2563 | mp = log->l_mp; | 2562 | mp = log->l_mp; |
2564 | 2563 | ||
2565 | /* | 2564 | /* |
2566 | * Filesystems are required to send in quota flags at mount time. | 2565 | * Filesystems are required to send in quota flags at mount time. |
2567 | */ | 2566 | */ |
2568 | if (mp->m_qflags == 0) | 2567 | if (mp->m_qflags == 0) |
2569 | return (0); | 2568 | return (0); |
2570 | 2569 | ||
2571 | recddq = (xfs_disk_dquot_t *)item->ri_buf[1].i_addr; | 2570 | recddq = (xfs_disk_dquot_t *)item->ri_buf[1].i_addr; |
2572 | ASSERT(recddq); | 2571 | ASSERT(recddq); |
2573 | /* | 2572 | /* |
2574 | * This type of quotas was turned off, so ignore this record. | 2573 | * This type of quotas was turned off, so ignore this record. |
2575 | */ | 2574 | */ |
2576 | type = recddq->d_flags & (XFS_DQ_USER | XFS_DQ_PROJ | XFS_DQ_GROUP); | 2575 | type = recddq->d_flags & (XFS_DQ_USER | XFS_DQ_PROJ | XFS_DQ_GROUP); |
2577 | ASSERT(type); | 2576 | ASSERT(type); |
2578 | if (log->l_quotaoffs_flag & type) | 2577 | if (log->l_quotaoffs_flag & type) |
2579 | return (0); | 2578 | return (0); |
2580 | 2579 | ||
2581 | /* | 2580 | /* |
2582 | * At this point we know that quota was _not_ turned off. | 2581 | * At this point we know that quota was _not_ turned off. |
2583 | * Since the mount flags are not indicating to us otherwise, this | 2582 | * Since the mount flags are not indicating to us otherwise, this |
2584 | * must mean that quota is on, and the dquot needs to be replayed. | 2583 | * must mean that quota is on, and the dquot needs to be replayed. |
2585 | * Remember that we may not have fully recovered the superblock yet, | 2584 | * Remember that we may not have fully recovered the superblock yet, |
2586 | * so we can't do the usual trick of looking at the SB quota bits. | 2585 | * so we can't do the usual trick of looking at the SB quota bits. |
2587 | * | 2586 | * |
2588 | * The other possibility, of course, is that the quota subsystem was | 2587 | * The other possibility, of course, is that the quota subsystem was |
2589 | * removed since the last mount - ENOSYS. | 2588 | * removed since the last mount - ENOSYS. |
2590 | */ | 2589 | */ |
2591 | dq_f = (xfs_dq_logformat_t *)item->ri_buf[0].i_addr; | 2590 | dq_f = (xfs_dq_logformat_t *)item->ri_buf[0].i_addr; |
2592 | ASSERT(dq_f); | 2591 | ASSERT(dq_f); |
2593 | if ((error = xfs_qm_dqcheck(recddq, | 2592 | if ((error = xfs_qm_dqcheck(recddq, |
2594 | dq_f->qlf_id, | 2593 | dq_f->qlf_id, |
2595 | 0, XFS_QMOPT_DOWARN, | 2594 | 0, XFS_QMOPT_DOWARN, |
2596 | "xlog_recover_do_dquot_trans (log copy)"))) { | 2595 | "xlog_recover_do_dquot_trans (log copy)"))) { |
2597 | return XFS_ERROR(EIO); | 2596 | return XFS_ERROR(EIO); |
2598 | } | 2597 | } |
2599 | ASSERT(dq_f->qlf_len == 1); | 2598 | ASSERT(dq_f->qlf_len == 1); |
2600 | 2599 | ||
2601 | error = xfs_read_buf(mp, mp->m_ddev_targp, | 2600 | error = xfs_read_buf(mp, mp->m_ddev_targp, |
2602 | dq_f->qlf_blkno, | 2601 | dq_f->qlf_blkno, |
2603 | XFS_FSB_TO_BB(mp, dq_f->qlf_len), | 2602 | XFS_FSB_TO_BB(mp, dq_f->qlf_len), |
2604 | 0, &bp); | 2603 | 0, &bp); |
2605 | if (error) { | 2604 | if (error) { |
2606 | xfs_ioerror_alert("xlog_recover_do..(read#3)", mp, | 2605 | xfs_ioerror_alert("xlog_recover_do..(read#3)", mp, |
2607 | bp, dq_f->qlf_blkno); | 2606 | bp, dq_f->qlf_blkno); |
2608 | return error; | 2607 | return error; |
2609 | } | 2608 | } |
2610 | ASSERT(bp); | 2609 | ASSERT(bp); |
2611 | ddq = (xfs_disk_dquot_t *)xfs_buf_offset(bp, dq_f->qlf_boffset); | 2610 | ddq = (xfs_disk_dquot_t *)xfs_buf_offset(bp, dq_f->qlf_boffset); |
2612 | 2611 | ||
2613 | /* | 2612 | /* |
2614 | * At least the magic num portion should be on disk because this | 2613 | * At least the magic num portion should be on disk because this |
2615 | * was among a chunk of dquots created earlier, and we did some | 2614 | * was among a chunk of dquots created earlier, and we did some |
2616 | * minimal initialization then. | 2615 | * minimal initialization then. |
2617 | */ | 2616 | */ |
2618 | if (xfs_qm_dqcheck(ddq, dq_f->qlf_id, 0, XFS_QMOPT_DOWARN, | 2617 | if (xfs_qm_dqcheck(ddq, dq_f->qlf_id, 0, XFS_QMOPT_DOWARN, |
2619 | "xlog_recover_do_dquot_trans")) { | 2618 | "xlog_recover_do_dquot_trans")) { |
2620 | xfs_buf_relse(bp); | 2619 | xfs_buf_relse(bp); |
2621 | return XFS_ERROR(EIO); | 2620 | return XFS_ERROR(EIO); |
2622 | } | 2621 | } |
2623 | 2622 | ||
2624 | memcpy(ddq, recddq, item->ri_buf[1].i_len); | 2623 | memcpy(ddq, recddq, item->ri_buf[1].i_len); |
2625 | 2624 | ||
2626 | ASSERT(dq_f->qlf_size == 2); | 2625 | ASSERT(dq_f->qlf_size == 2); |
2627 | ASSERT(XFS_BUF_FSPRIVATE(bp, void *) == NULL || | 2626 | ASSERT(XFS_BUF_FSPRIVATE(bp, void *) == NULL || |
2628 | XFS_BUF_FSPRIVATE(bp, xfs_mount_t *) == mp); | 2627 | XFS_BUF_FSPRIVATE(bp, xfs_mount_t *) == mp); |
2629 | XFS_BUF_SET_FSPRIVATE(bp, mp); | 2628 | XFS_BUF_SET_FSPRIVATE(bp, mp); |
2630 | XFS_BUF_SET_IODONE_FUNC(bp, xlog_recover_iodone); | 2629 | XFS_BUF_SET_IODONE_FUNC(bp, xlog_recover_iodone); |
2631 | xfs_bdwrite(mp, bp); | 2630 | xfs_bdwrite(mp, bp); |
2632 | 2631 | ||
2633 | return (0); | 2632 | return (0); |
2634 | } | 2633 | } |
2635 | 2634 | ||
2636 | /* | 2635 | /* |
2637 | * This routine is called to create an in-core extent free intent | 2636 | * This routine is called to create an in-core extent free intent |
2638 | * item from the efi format structure which was logged on disk. | 2637 | * item from the efi format structure which was logged on disk. |
2639 | * It allocates an in-core efi, copies the extents from the format | 2638 | * It allocates an in-core efi, copies the extents from the format |
2640 | * structure into it, and adds the efi to the AIL with the given | 2639 | * structure into it, and adds the efi to the AIL with the given |
2641 | * LSN. | 2640 | * LSN. |
2642 | */ | 2641 | */ |
2643 | STATIC int | 2642 | STATIC int |
2644 | xlog_recover_do_efi_trans( | 2643 | xlog_recover_do_efi_trans( |
2645 | xlog_t *log, | 2644 | xlog_t *log, |
2646 | xlog_recover_item_t *item, | 2645 | xlog_recover_item_t *item, |
2647 | xfs_lsn_t lsn, | 2646 | xfs_lsn_t lsn, |
2648 | int pass) | 2647 | int pass) |
2649 | { | 2648 | { |
2650 | int error; | 2649 | int error; |
2651 | xfs_mount_t *mp; | 2650 | xfs_mount_t *mp; |
2652 | xfs_efi_log_item_t *efip; | 2651 | xfs_efi_log_item_t *efip; |
2653 | xfs_efi_log_format_t *efi_formatp; | 2652 | xfs_efi_log_format_t *efi_formatp; |
2654 | 2653 | ||
2655 | if (pass == XLOG_RECOVER_PASS1) { | 2654 | if (pass == XLOG_RECOVER_PASS1) { |
2656 | return 0; | 2655 | return 0; |
2657 | } | 2656 | } |
2658 | 2657 | ||
2659 | efi_formatp = (xfs_efi_log_format_t *)item->ri_buf[0].i_addr; | 2658 | efi_formatp = (xfs_efi_log_format_t *)item->ri_buf[0].i_addr; |
2660 | 2659 | ||
2661 | mp = log->l_mp; | 2660 | mp = log->l_mp; |
2662 | efip = xfs_efi_init(mp, efi_formatp->efi_nextents); | 2661 | efip = xfs_efi_init(mp, efi_formatp->efi_nextents); |
2663 | if ((error = xfs_efi_copy_format(&(item->ri_buf[0]), | 2662 | if ((error = xfs_efi_copy_format(&(item->ri_buf[0]), |
2664 | &(efip->efi_format)))) { | 2663 | &(efip->efi_format)))) { |
2665 | xfs_efi_item_free(efip); | 2664 | xfs_efi_item_free(efip); |
2666 | return error; | 2665 | return error; |
2667 | } | 2666 | } |
2668 | efip->efi_next_extent = efi_formatp->efi_nextents; | 2667 | efip->efi_next_extent = efi_formatp->efi_nextents; |
2669 | efip->efi_flags |= XFS_EFI_COMMITTED; | 2668 | efip->efi_flags |= XFS_EFI_COMMITTED; |
2670 | 2669 | ||
2671 | spin_lock(&log->l_ailp->xa_lock); | 2670 | spin_lock(&log->l_ailp->xa_lock); |
2672 | /* | 2671 | /* |
2673 | * xfs_trans_ail_update() drops the AIL lock. | 2672 | * xfs_trans_ail_update() drops the AIL lock. |
2674 | */ | 2673 | */ |
2675 | xfs_trans_ail_update(log->l_ailp, (xfs_log_item_t *)efip, lsn); | 2674 | xfs_trans_ail_update(log->l_ailp, (xfs_log_item_t *)efip, lsn); |
2676 | return 0; | 2675 | return 0; |
2677 | } | 2676 | } |
2678 | 2677 | ||
2679 | 2678 | ||
2680 | /* | 2679 | /* |
2681 | * This routine is called when an efd format structure is found in | 2680 | * This routine is called when an efd format structure is found in |
2682 | * a committed transaction in the log. It's purpose is to cancel | 2681 | * a committed transaction in the log. It's purpose is to cancel |
2683 | * the corresponding efi if it was still in the log. To do this | 2682 | * the corresponding efi if it was still in the log. To do this |
2684 | * it searches the AIL for the efi with an id equal to that in the | 2683 | * it searches the AIL for the efi with an id equal to that in the |
2685 | * efd format structure. If we find it, we remove the efi from the | 2684 | * efd format structure. If we find it, we remove the efi from the |
2686 | * AIL and free it. | 2685 | * AIL and free it. |
2687 | */ | 2686 | */ |
2688 | STATIC void | 2687 | STATIC void |
2689 | xlog_recover_do_efd_trans( | 2688 | xlog_recover_do_efd_trans( |
2690 | xlog_t *log, | 2689 | xlog_t *log, |
2691 | xlog_recover_item_t *item, | 2690 | xlog_recover_item_t *item, |
2692 | int pass) | 2691 | int pass) |
2693 | { | 2692 | { |
2694 | xfs_efd_log_format_t *efd_formatp; | 2693 | xfs_efd_log_format_t *efd_formatp; |
2695 | xfs_efi_log_item_t *efip = NULL; | 2694 | xfs_efi_log_item_t *efip = NULL; |
2696 | xfs_log_item_t *lip; | 2695 | xfs_log_item_t *lip; |
2697 | __uint64_t efi_id; | 2696 | __uint64_t efi_id; |
2698 | struct xfs_ail_cursor cur; | 2697 | struct xfs_ail_cursor cur; |
2699 | struct xfs_ail *ailp = log->l_ailp; | 2698 | struct xfs_ail *ailp = log->l_ailp; |
2700 | 2699 | ||
2701 | if (pass == XLOG_RECOVER_PASS1) { | 2700 | if (pass == XLOG_RECOVER_PASS1) { |
2702 | return; | 2701 | return; |
2703 | } | 2702 | } |
2704 | 2703 | ||
2705 | efd_formatp = (xfs_efd_log_format_t *)item->ri_buf[0].i_addr; | 2704 | efd_formatp = (xfs_efd_log_format_t *)item->ri_buf[0].i_addr; |
2706 | ASSERT((item->ri_buf[0].i_len == (sizeof(xfs_efd_log_format_32_t) + | 2705 | ASSERT((item->ri_buf[0].i_len == (sizeof(xfs_efd_log_format_32_t) + |
2707 | ((efd_formatp->efd_nextents - 1) * sizeof(xfs_extent_32_t)))) || | 2706 | ((efd_formatp->efd_nextents - 1) * sizeof(xfs_extent_32_t)))) || |
2708 | (item->ri_buf[0].i_len == (sizeof(xfs_efd_log_format_64_t) + | 2707 | (item->ri_buf[0].i_len == (sizeof(xfs_efd_log_format_64_t) + |
2709 | ((efd_formatp->efd_nextents - 1) * sizeof(xfs_extent_64_t))))); | 2708 | ((efd_formatp->efd_nextents - 1) * sizeof(xfs_extent_64_t))))); |
2710 | efi_id = efd_formatp->efd_efi_id; | 2709 | efi_id = efd_formatp->efd_efi_id; |
2711 | 2710 | ||
2712 | /* | 2711 | /* |
2713 | * Search for the efi with the id in the efd format structure | 2712 | * Search for the efi with the id in the efd format structure |
2714 | * in the AIL. | 2713 | * in the AIL. |
2715 | */ | 2714 | */ |
2716 | spin_lock(&ailp->xa_lock); | 2715 | spin_lock(&ailp->xa_lock); |
2717 | lip = xfs_trans_ail_cursor_first(ailp, &cur, 0); | 2716 | lip = xfs_trans_ail_cursor_first(ailp, &cur, 0); |
2718 | while (lip != NULL) { | 2717 | while (lip != NULL) { |
2719 | if (lip->li_type == XFS_LI_EFI) { | 2718 | if (lip->li_type == XFS_LI_EFI) { |
2720 | efip = (xfs_efi_log_item_t *)lip; | 2719 | efip = (xfs_efi_log_item_t *)lip; |
2721 | if (efip->efi_format.efi_id == efi_id) { | 2720 | if (efip->efi_format.efi_id == efi_id) { |
2722 | /* | 2721 | /* |
2723 | * xfs_trans_ail_delete() drops the | 2722 | * xfs_trans_ail_delete() drops the |
2724 | * AIL lock. | 2723 | * AIL lock. |
2725 | */ | 2724 | */ |
2726 | xfs_trans_ail_delete(ailp, lip); | 2725 | xfs_trans_ail_delete(ailp, lip); |
2727 | xfs_efi_item_free(efip); | 2726 | xfs_efi_item_free(efip); |
2728 | spin_lock(&ailp->xa_lock); | 2727 | spin_lock(&ailp->xa_lock); |
2729 | break; | 2728 | break; |
2730 | } | 2729 | } |
2731 | } | 2730 | } |
2732 | lip = xfs_trans_ail_cursor_next(ailp, &cur); | 2731 | lip = xfs_trans_ail_cursor_next(ailp, &cur); |
2733 | } | 2732 | } |
2734 | xfs_trans_ail_cursor_done(ailp, &cur); | 2733 | xfs_trans_ail_cursor_done(ailp, &cur); |
2735 | spin_unlock(&ailp->xa_lock); | 2734 | spin_unlock(&ailp->xa_lock); |
2736 | } | 2735 | } |
2737 | 2736 | ||
2738 | /* | 2737 | /* |
2739 | * Perform the transaction | 2738 | * Perform the transaction |
2740 | * | 2739 | * |
2741 | * If the transaction modifies a buffer or inode, do it now. Otherwise, | 2740 | * If the transaction modifies a buffer or inode, do it now. Otherwise, |
2742 | * EFIs and EFDs get queued up by adding entries into the AIL for them. | 2741 | * EFIs and EFDs get queued up by adding entries into the AIL for them. |
2743 | */ | 2742 | */ |
2744 | STATIC int | 2743 | STATIC int |
2745 | xlog_recover_do_trans( | 2744 | xlog_recover_do_trans( |
2746 | xlog_t *log, | 2745 | xlog_t *log, |
2747 | xlog_recover_t *trans, | 2746 | xlog_recover_t *trans, |
2748 | int pass) | 2747 | int pass) |
2749 | { | 2748 | { |
2750 | int error = 0; | 2749 | int error = 0; |
2751 | xlog_recover_item_t *item, *first_item; | 2750 | xlog_recover_item_t *item, *first_item; |
2752 | 2751 | ||
2753 | if ((error = xlog_recover_reorder_trans(trans))) | 2752 | if ((error = xlog_recover_reorder_trans(trans))) |
2754 | return error; | 2753 | return error; |
2755 | first_item = item = trans->r_itemq; | 2754 | first_item = item = trans->r_itemq; |
2756 | do { | 2755 | do { |
2757 | /* | 2756 | /* |
2758 | * we don't need to worry about the block number being | 2757 | * we don't need to worry about the block number being |
2759 | * truncated in > 1 TB buffers because in user-land, | 2758 | * truncated in > 1 TB buffers because in user-land, |
2760 | * we're now n32 or 64-bit so xfs_daddr_t is 64-bits so | 2759 | * we're now n32 or 64-bit so xfs_daddr_t is 64-bits so |
2761 | * the blknos will get through the user-mode buffer | 2760 | * the blknos will get through the user-mode buffer |
2762 | * cache properly. The only bad case is o32 kernels | 2761 | * cache properly. The only bad case is o32 kernels |
2763 | * where xfs_daddr_t is 32-bits but mount will warn us | 2762 | * where xfs_daddr_t is 32-bits but mount will warn us |
2764 | * off a > 1 TB filesystem before we get here. | 2763 | * off a > 1 TB filesystem before we get here. |
2765 | */ | 2764 | */ |
2766 | if ((ITEM_TYPE(item) == XFS_LI_BUF)) { | 2765 | if ((ITEM_TYPE(item) == XFS_LI_BUF)) { |
2767 | if ((error = xlog_recover_do_buffer_trans(log, item, | 2766 | if ((error = xlog_recover_do_buffer_trans(log, item, |
2768 | pass))) | 2767 | pass))) |
2769 | break; | 2768 | break; |
2770 | } else if ((ITEM_TYPE(item) == XFS_LI_INODE)) { | 2769 | } else if ((ITEM_TYPE(item) == XFS_LI_INODE)) { |
2771 | if ((error = xlog_recover_do_inode_trans(log, item, | 2770 | if ((error = xlog_recover_do_inode_trans(log, item, |
2772 | pass))) | 2771 | pass))) |
2773 | break; | 2772 | break; |
2774 | } else if (ITEM_TYPE(item) == XFS_LI_EFI) { | 2773 | } else if (ITEM_TYPE(item) == XFS_LI_EFI) { |
2775 | if ((error = xlog_recover_do_efi_trans(log, item, trans->r_lsn, | 2774 | if ((error = xlog_recover_do_efi_trans(log, item, trans->r_lsn, |
2776 | pass))) | 2775 | pass))) |
2777 | break; | 2776 | break; |
2778 | } else if (ITEM_TYPE(item) == XFS_LI_EFD) { | 2777 | } else if (ITEM_TYPE(item) == XFS_LI_EFD) { |
2779 | xlog_recover_do_efd_trans(log, item, pass); | 2778 | xlog_recover_do_efd_trans(log, item, pass); |
2780 | } else if (ITEM_TYPE(item) == XFS_LI_DQUOT) { | 2779 | } else if (ITEM_TYPE(item) == XFS_LI_DQUOT) { |
2781 | if ((error = xlog_recover_do_dquot_trans(log, item, | 2780 | if ((error = xlog_recover_do_dquot_trans(log, item, |
2782 | pass))) | 2781 | pass))) |
2783 | break; | 2782 | break; |
2784 | } else if ((ITEM_TYPE(item) == XFS_LI_QUOTAOFF)) { | 2783 | } else if ((ITEM_TYPE(item) == XFS_LI_QUOTAOFF)) { |
2785 | if ((error = xlog_recover_do_quotaoff_trans(log, item, | 2784 | if ((error = xlog_recover_do_quotaoff_trans(log, item, |
2786 | pass))) | 2785 | pass))) |
2787 | break; | 2786 | break; |
2788 | } else { | 2787 | } else { |
2789 | xlog_warn("XFS: xlog_recover_do_trans"); | 2788 | xlog_warn("XFS: xlog_recover_do_trans"); |
2790 | ASSERT(0); | 2789 | ASSERT(0); |
2791 | error = XFS_ERROR(EIO); | 2790 | error = XFS_ERROR(EIO); |
2792 | break; | 2791 | break; |
2793 | } | 2792 | } |
2794 | item = item->ri_next; | 2793 | item = item->ri_next; |
2795 | } while (first_item != item); | 2794 | } while (first_item != item); |
2796 | 2795 | ||
2797 | return error; | 2796 | return error; |
2798 | } | 2797 | } |
2799 | 2798 | ||
2800 | /* | 2799 | /* |
2801 | * Free up any resources allocated by the transaction | 2800 | * Free up any resources allocated by the transaction |
2802 | * | 2801 | * |
2803 | * Remember that EFIs, EFDs, and IUNLINKs are handled later. | 2802 | * Remember that EFIs, EFDs, and IUNLINKs are handled later. |
2804 | */ | 2803 | */ |
2805 | STATIC void | 2804 | STATIC void |
2806 | xlog_recover_free_trans( | 2805 | xlog_recover_free_trans( |
2807 | xlog_recover_t *trans) | 2806 | xlog_recover_t *trans) |
2808 | { | 2807 | { |
2809 | xlog_recover_item_t *first_item, *item, *free_item; | 2808 | xlog_recover_item_t *first_item, *item, *free_item; |
2810 | int i; | 2809 | int i; |
2811 | 2810 | ||
2812 | item = first_item = trans->r_itemq; | 2811 | item = first_item = trans->r_itemq; |
2813 | do { | 2812 | do { |
2814 | free_item = item; | 2813 | free_item = item; |
2815 | item = item->ri_next; | 2814 | item = item->ri_next; |
2816 | /* Free the regions in the item. */ | 2815 | /* Free the regions in the item. */ |
2817 | for (i = 0; i < free_item->ri_cnt; i++) { | 2816 | for (i = 0; i < free_item->ri_cnt; i++) { |
2818 | kmem_free(free_item->ri_buf[i].i_addr); | 2817 | kmem_free(free_item->ri_buf[i].i_addr); |
2819 | } | 2818 | } |
2820 | /* Free the item itself */ | 2819 | /* Free the item itself */ |
2821 | kmem_free(free_item->ri_buf); | 2820 | kmem_free(free_item->ri_buf); |
2822 | kmem_free(free_item); | 2821 | kmem_free(free_item); |
2823 | } while (first_item != item); | 2822 | } while (first_item != item); |
2824 | /* Free the transaction recover structure */ | 2823 | /* Free the transaction recover structure */ |
2825 | kmem_free(trans); | 2824 | kmem_free(trans); |
2826 | } | 2825 | } |
2827 | 2826 | ||
2828 | STATIC int | 2827 | STATIC int |
2829 | xlog_recover_commit_trans( | 2828 | xlog_recover_commit_trans( |
2830 | xlog_t *log, | 2829 | xlog_t *log, |
2831 | xlog_recover_t **q, | 2830 | xlog_recover_t **q, |
2832 | xlog_recover_t *trans, | 2831 | xlog_recover_t *trans, |
2833 | int pass) | 2832 | int pass) |
2834 | { | 2833 | { |
2835 | int error; | 2834 | int error; |
2836 | 2835 | ||
2837 | if ((error = xlog_recover_unlink_tid(q, trans))) | 2836 | if ((error = xlog_recover_unlink_tid(q, trans))) |
2838 | return error; | 2837 | return error; |
2839 | if ((error = xlog_recover_do_trans(log, trans, pass))) | 2838 | if ((error = xlog_recover_do_trans(log, trans, pass))) |
2840 | return error; | 2839 | return error; |
2841 | xlog_recover_free_trans(trans); /* no error */ | 2840 | xlog_recover_free_trans(trans); /* no error */ |
2842 | return 0; | 2841 | return 0; |
2843 | } | 2842 | } |
2844 | 2843 | ||
2845 | STATIC int | 2844 | STATIC int |
2846 | xlog_recover_unmount_trans( | 2845 | xlog_recover_unmount_trans( |
2847 | xlog_recover_t *trans) | 2846 | xlog_recover_t *trans) |
2848 | { | 2847 | { |
2849 | /* Do nothing now */ | 2848 | /* Do nothing now */ |
2850 | xlog_warn("XFS: xlog_recover_unmount_trans: Unmount LR"); | 2849 | xlog_warn("XFS: xlog_recover_unmount_trans: Unmount LR"); |
2851 | return 0; | 2850 | return 0; |
2852 | } | 2851 | } |
2853 | 2852 | ||
2854 | /* | 2853 | /* |
2855 | * There are two valid states of the r_state field. 0 indicates that the | 2854 | * There are two valid states of the r_state field. 0 indicates that the |
2856 | * transaction structure is in a normal state. We have either seen the | 2855 | * transaction structure is in a normal state. We have either seen the |
2857 | * start of the transaction or the last operation we added was not a partial | 2856 | * start of the transaction or the last operation we added was not a partial |
2858 | * operation. If the last operation we added to the transaction was a | 2857 | * operation. If the last operation we added to the transaction was a |
2859 | * partial operation, we need to mark r_state with XLOG_WAS_CONT_TRANS. | 2858 | * partial operation, we need to mark r_state with XLOG_WAS_CONT_TRANS. |
2860 | * | 2859 | * |
2861 | * NOTE: skip LRs with 0 data length. | 2860 | * NOTE: skip LRs with 0 data length. |
2862 | */ | 2861 | */ |
2863 | STATIC int | 2862 | STATIC int |
2864 | xlog_recover_process_data( | 2863 | xlog_recover_process_data( |
2865 | xlog_t *log, | 2864 | xlog_t *log, |
2866 | xlog_recover_t *rhash[], | 2865 | xlog_recover_t *rhash[], |
2867 | xlog_rec_header_t *rhead, | 2866 | xlog_rec_header_t *rhead, |
2868 | xfs_caddr_t dp, | 2867 | xfs_caddr_t dp, |
2869 | int pass) | 2868 | int pass) |
2870 | { | 2869 | { |
2871 | xfs_caddr_t lp; | 2870 | xfs_caddr_t lp; |
2872 | int num_logops; | 2871 | int num_logops; |
2873 | xlog_op_header_t *ohead; | 2872 | xlog_op_header_t *ohead; |
2874 | xlog_recover_t *trans; | 2873 | xlog_recover_t *trans; |
2875 | xlog_tid_t tid; | 2874 | xlog_tid_t tid; |
2876 | int error; | 2875 | int error; |
2877 | unsigned long hash; | 2876 | unsigned long hash; |
2878 | uint flags; | 2877 | uint flags; |
2879 | 2878 | ||
2880 | lp = dp + be32_to_cpu(rhead->h_len); | 2879 | lp = dp + be32_to_cpu(rhead->h_len); |
2881 | num_logops = be32_to_cpu(rhead->h_num_logops); | 2880 | num_logops = be32_to_cpu(rhead->h_num_logops); |
2882 | 2881 | ||
2883 | /* check the log format matches our own - else we can't recover */ | 2882 | /* check the log format matches our own - else we can't recover */ |
2884 | if (xlog_header_check_recover(log->l_mp, rhead)) | 2883 | if (xlog_header_check_recover(log->l_mp, rhead)) |
2885 | return (XFS_ERROR(EIO)); | 2884 | return (XFS_ERROR(EIO)); |
2886 | 2885 | ||
2887 | while ((dp < lp) && num_logops) { | 2886 | while ((dp < lp) && num_logops) { |
2888 | ASSERT(dp + sizeof(xlog_op_header_t) <= lp); | 2887 | ASSERT(dp + sizeof(xlog_op_header_t) <= lp); |
2889 | ohead = (xlog_op_header_t *)dp; | 2888 | ohead = (xlog_op_header_t *)dp; |
2890 | dp += sizeof(xlog_op_header_t); | 2889 | dp += sizeof(xlog_op_header_t); |
2891 | if (ohead->oh_clientid != XFS_TRANSACTION && | 2890 | if (ohead->oh_clientid != XFS_TRANSACTION && |
2892 | ohead->oh_clientid != XFS_LOG) { | 2891 | ohead->oh_clientid != XFS_LOG) { |
2893 | xlog_warn( | 2892 | xlog_warn( |
2894 | "XFS: xlog_recover_process_data: bad clientid"); | 2893 | "XFS: xlog_recover_process_data: bad clientid"); |
2895 | ASSERT(0); | 2894 | ASSERT(0); |
2896 | return (XFS_ERROR(EIO)); | 2895 | return (XFS_ERROR(EIO)); |
2897 | } | 2896 | } |
2898 | tid = be32_to_cpu(ohead->oh_tid); | 2897 | tid = be32_to_cpu(ohead->oh_tid); |
2899 | hash = XLOG_RHASH(tid); | 2898 | hash = XLOG_RHASH(tid); |
2900 | trans = xlog_recover_find_tid(rhash[hash], tid); | 2899 | trans = xlog_recover_find_tid(rhash[hash], tid); |
2901 | if (trans == NULL) { /* not found; add new tid */ | 2900 | if (trans == NULL) { /* not found; add new tid */ |
2902 | if (ohead->oh_flags & XLOG_START_TRANS) | 2901 | if (ohead->oh_flags & XLOG_START_TRANS) |
2903 | xlog_recover_new_tid(&rhash[hash], tid, | 2902 | xlog_recover_new_tid(&rhash[hash], tid, |
2904 | be64_to_cpu(rhead->h_lsn)); | 2903 | be64_to_cpu(rhead->h_lsn)); |
2905 | } else { | 2904 | } else { |
2906 | if (dp + be32_to_cpu(ohead->oh_len) > lp) { | 2905 | if (dp + be32_to_cpu(ohead->oh_len) > lp) { |
2907 | xlog_warn( | 2906 | xlog_warn( |
2908 | "XFS: xlog_recover_process_data: bad length"); | 2907 | "XFS: xlog_recover_process_data: bad length"); |
2909 | WARN_ON(1); | 2908 | WARN_ON(1); |
2910 | return (XFS_ERROR(EIO)); | 2909 | return (XFS_ERROR(EIO)); |
2911 | } | 2910 | } |
2912 | flags = ohead->oh_flags & ~XLOG_END_TRANS; | 2911 | flags = ohead->oh_flags & ~XLOG_END_TRANS; |
2913 | if (flags & XLOG_WAS_CONT_TRANS) | 2912 | if (flags & XLOG_WAS_CONT_TRANS) |
2914 | flags &= ~XLOG_CONTINUE_TRANS; | 2913 | flags &= ~XLOG_CONTINUE_TRANS; |
2915 | switch (flags) { | 2914 | switch (flags) { |
2916 | case XLOG_COMMIT_TRANS: | 2915 | case XLOG_COMMIT_TRANS: |
2917 | error = xlog_recover_commit_trans(log, | 2916 | error = xlog_recover_commit_trans(log, |
2918 | &rhash[hash], trans, pass); | 2917 | &rhash[hash], trans, pass); |
2919 | break; | 2918 | break; |
2920 | case XLOG_UNMOUNT_TRANS: | 2919 | case XLOG_UNMOUNT_TRANS: |
2921 | error = xlog_recover_unmount_trans(trans); | 2920 | error = xlog_recover_unmount_trans(trans); |
2922 | break; | 2921 | break; |
2923 | case XLOG_WAS_CONT_TRANS: | 2922 | case XLOG_WAS_CONT_TRANS: |
2924 | error = xlog_recover_add_to_cont_trans(trans, | 2923 | error = xlog_recover_add_to_cont_trans(trans, |
2925 | dp, be32_to_cpu(ohead->oh_len)); | 2924 | dp, be32_to_cpu(ohead->oh_len)); |
2926 | break; | 2925 | break; |
2927 | case XLOG_START_TRANS: | 2926 | case XLOG_START_TRANS: |
2928 | xlog_warn( | 2927 | xlog_warn( |
2929 | "XFS: xlog_recover_process_data: bad transaction"); | 2928 | "XFS: xlog_recover_process_data: bad transaction"); |
2930 | ASSERT(0); | 2929 | ASSERT(0); |
2931 | error = XFS_ERROR(EIO); | 2930 | error = XFS_ERROR(EIO); |
2932 | break; | 2931 | break; |
2933 | case 0: | 2932 | case 0: |
2934 | case XLOG_CONTINUE_TRANS: | 2933 | case XLOG_CONTINUE_TRANS: |
2935 | error = xlog_recover_add_to_trans(trans, | 2934 | error = xlog_recover_add_to_trans(trans, |
2936 | dp, be32_to_cpu(ohead->oh_len)); | 2935 | dp, be32_to_cpu(ohead->oh_len)); |
2937 | break; | 2936 | break; |
2938 | default: | 2937 | default: |
2939 | xlog_warn( | 2938 | xlog_warn( |
2940 | "XFS: xlog_recover_process_data: bad flag"); | 2939 | "XFS: xlog_recover_process_data: bad flag"); |
2941 | ASSERT(0); | 2940 | ASSERT(0); |
2942 | error = XFS_ERROR(EIO); | 2941 | error = XFS_ERROR(EIO); |
2943 | break; | 2942 | break; |
2944 | } | 2943 | } |
2945 | if (error) | 2944 | if (error) |
2946 | return error; | 2945 | return error; |
2947 | } | 2946 | } |
2948 | dp += be32_to_cpu(ohead->oh_len); | 2947 | dp += be32_to_cpu(ohead->oh_len); |
2949 | num_logops--; | 2948 | num_logops--; |
2950 | } | 2949 | } |
2951 | return 0; | 2950 | return 0; |
2952 | } | 2951 | } |
2953 | 2952 | ||
2954 | /* | 2953 | /* |
2955 | * Process an extent free intent item that was recovered from | 2954 | * Process an extent free intent item that was recovered from |
2956 | * the log. We need to free the extents that it describes. | 2955 | * the log. We need to free the extents that it describes. |
2957 | */ | 2956 | */ |
2958 | STATIC int | 2957 | STATIC int |
2959 | xlog_recover_process_efi( | 2958 | xlog_recover_process_efi( |
2960 | xfs_mount_t *mp, | 2959 | xfs_mount_t *mp, |
2961 | xfs_efi_log_item_t *efip) | 2960 | xfs_efi_log_item_t *efip) |
2962 | { | 2961 | { |
2963 | xfs_efd_log_item_t *efdp; | 2962 | xfs_efd_log_item_t *efdp; |
2964 | xfs_trans_t *tp; | 2963 | xfs_trans_t *tp; |
2965 | int i; | 2964 | int i; |
2966 | int error = 0; | 2965 | int error = 0; |
2967 | xfs_extent_t *extp; | 2966 | xfs_extent_t *extp; |
2968 | xfs_fsblock_t startblock_fsb; | 2967 | xfs_fsblock_t startblock_fsb; |
2969 | 2968 | ||
2970 | ASSERT(!(efip->efi_flags & XFS_EFI_RECOVERED)); | 2969 | ASSERT(!(efip->efi_flags & XFS_EFI_RECOVERED)); |
2971 | 2970 | ||
2972 | /* | 2971 | /* |
2973 | * First check the validity of the extents described by the | 2972 | * First check the validity of the extents described by the |
2974 | * EFI. If any are bad, then assume that all are bad and | 2973 | * EFI. If any are bad, then assume that all are bad and |
2975 | * just toss the EFI. | 2974 | * just toss the EFI. |
2976 | */ | 2975 | */ |
2977 | for (i = 0; i < efip->efi_format.efi_nextents; i++) { | 2976 | for (i = 0; i < efip->efi_format.efi_nextents; i++) { |
2978 | extp = &(efip->efi_format.efi_extents[i]); | 2977 | extp = &(efip->efi_format.efi_extents[i]); |
2979 | startblock_fsb = XFS_BB_TO_FSB(mp, | 2978 | startblock_fsb = XFS_BB_TO_FSB(mp, |
2980 | XFS_FSB_TO_DADDR(mp, extp->ext_start)); | 2979 | XFS_FSB_TO_DADDR(mp, extp->ext_start)); |
2981 | if ((startblock_fsb == 0) || | 2980 | if ((startblock_fsb == 0) || |
2982 | (extp->ext_len == 0) || | 2981 | (extp->ext_len == 0) || |
2983 | (startblock_fsb >= mp->m_sb.sb_dblocks) || | 2982 | (startblock_fsb >= mp->m_sb.sb_dblocks) || |
2984 | (extp->ext_len >= mp->m_sb.sb_agblocks)) { | 2983 | (extp->ext_len >= mp->m_sb.sb_agblocks)) { |
2985 | /* | 2984 | /* |
2986 | * This will pull the EFI from the AIL and | 2985 | * This will pull the EFI from the AIL and |
2987 | * free the memory associated with it. | 2986 | * free the memory associated with it. |
2988 | */ | 2987 | */ |
2989 | xfs_efi_release(efip, efip->efi_format.efi_nextents); | 2988 | xfs_efi_release(efip, efip->efi_format.efi_nextents); |
2990 | return XFS_ERROR(EIO); | 2989 | return XFS_ERROR(EIO); |
2991 | } | 2990 | } |
2992 | } | 2991 | } |
2993 | 2992 | ||
2994 | tp = xfs_trans_alloc(mp, 0); | 2993 | tp = xfs_trans_alloc(mp, 0); |
2995 | error = xfs_trans_reserve(tp, 0, XFS_ITRUNCATE_LOG_RES(mp), 0, 0, 0); | 2994 | error = xfs_trans_reserve(tp, 0, XFS_ITRUNCATE_LOG_RES(mp), 0, 0, 0); |
2996 | if (error) | 2995 | if (error) |
2997 | goto abort_error; | 2996 | goto abort_error; |
2998 | efdp = xfs_trans_get_efd(tp, efip, efip->efi_format.efi_nextents); | 2997 | efdp = xfs_trans_get_efd(tp, efip, efip->efi_format.efi_nextents); |
2999 | 2998 | ||
3000 | for (i = 0; i < efip->efi_format.efi_nextents; i++) { | 2999 | for (i = 0; i < efip->efi_format.efi_nextents; i++) { |
3001 | extp = &(efip->efi_format.efi_extents[i]); | 3000 | extp = &(efip->efi_format.efi_extents[i]); |
3002 | error = xfs_free_extent(tp, extp->ext_start, extp->ext_len); | 3001 | error = xfs_free_extent(tp, extp->ext_start, extp->ext_len); |
3003 | if (error) | 3002 | if (error) |
3004 | goto abort_error; | 3003 | goto abort_error; |
3005 | xfs_trans_log_efd_extent(tp, efdp, extp->ext_start, | 3004 | xfs_trans_log_efd_extent(tp, efdp, extp->ext_start, |
3006 | extp->ext_len); | 3005 | extp->ext_len); |
3007 | } | 3006 | } |
3008 | 3007 | ||
3009 | efip->efi_flags |= XFS_EFI_RECOVERED; | 3008 | efip->efi_flags |= XFS_EFI_RECOVERED; |
3010 | error = xfs_trans_commit(tp, 0); | 3009 | error = xfs_trans_commit(tp, 0); |
3011 | return error; | 3010 | return error; |
3012 | 3011 | ||
3013 | abort_error: | 3012 | abort_error: |
3014 | xfs_trans_cancel(tp, XFS_TRANS_ABORT); | 3013 | xfs_trans_cancel(tp, XFS_TRANS_ABORT); |
3015 | return error; | 3014 | return error; |
3016 | } | 3015 | } |
3017 | 3016 | ||
3018 | /* | 3017 | /* |
3019 | * When this is called, all of the EFIs which did not have | 3018 | * When this is called, all of the EFIs which did not have |
3020 | * corresponding EFDs should be in the AIL. What we do now | 3019 | * corresponding EFDs should be in the AIL. What we do now |
3021 | * is free the extents associated with each one. | 3020 | * is free the extents associated with each one. |
3022 | * | 3021 | * |
3023 | * Since we process the EFIs in normal transactions, they | 3022 | * Since we process the EFIs in normal transactions, they |
3024 | * will be removed at some point after the commit. This prevents | 3023 | * will be removed at some point after the commit. This prevents |
3025 | * us from just walking down the list processing each one. | 3024 | * us from just walking down the list processing each one. |
3026 | * We'll use a flag in the EFI to skip those that we've already | 3025 | * We'll use a flag in the EFI to skip those that we've already |
3027 | * processed and use the AIL iteration mechanism's generation | 3026 | * processed and use the AIL iteration mechanism's generation |
3028 | * count to try to speed this up at least a bit. | 3027 | * count to try to speed this up at least a bit. |
3029 | * | 3028 | * |
3030 | * When we start, we know that the EFIs are the only things in | 3029 | * When we start, we know that the EFIs are the only things in |
3031 | * the AIL. As we process them, however, other items are added | 3030 | * the AIL. As we process them, however, other items are added |
3032 | * to the AIL. Since everything added to the AIL must come after | 3031 | * to the AIL. Since everything added to the AIL must come after |
3033 | * everything already in the AIL, we stop processing as soon as | 3032 | * everything already in the AIL, we stop processing as soon as |
3034 | * we see something other than an EFI in the AIL. | 3033 | * we see something other than an EFI in the AIL. |
3035 | */ | 3034 | */ |
3036 | STATIC int | 3035 | STATIC int |
3037 | xlog_recover_process_efis( | 3036 | xlog_recover_process_efis( |
3038 | xlog_t *log) | 3037 | xlog_t *log) |
3039 | { | 3038 | { |
3040 | xfs_log_item_t *lip; | 3039 | xfs_log_item_t *lip; |
3041 | xfs_efi_log_item_t *efip; | 3040 | xfs_efi_log_item_t *efip; |
3042 | int error = 0; | 3041 | int error = 0; |
3043 | struct xfs_ail_cursor cur; | 3042 | struct xfs_ail_cursor cur; |
3044 | struct xfs_ail *ailp; | 3043 | struct xfs_ail *ailp; |
3045 | 3044 | ||
3046 | ailp = log->l_ailp; | 3045 | ailp = log->l_ailp; |
3047 | spin_lock(&ailp->xa_lock); | 3046 | spin_lock(&ailp->xa_lock); |
3048 | lip = xfs_trans_ail_cursor_first(ailp, &cur, 0); | 3047 | lip = xfs_trans_ail_cursor_first(ailp, &cur, 0); |
3049 | while (lip != NULL) { | 3048 | while (lip != NULL) { |
3050 | /* | 3049 | /* |
3051 | * We're done when we see something other than an EFI. | 3050 | * We're done when we see something other than an EFI. |
3052 | * There should be no EFIs left in the AIL now. | 3051 | * There should be no EFIs left in the AIL now. |
3053 | */ | 3052 | */ |
3054 | if (lip->li_type != XFS_LI_EFI) { | 3053 | if (lip->li_type != XFS_LI_EFI) { |
3055 | #ifdef DEBUG | 3054 | #ifdef DEBUG |
3056 | for (; lip; lip = xfs_trans_ail_cursor_next(ailp, &cur)) | 3055 | for (; lip; lip = xfs_trans_ail_cursor_next(ailp, &cur)) |
3057 | ASSERT(lip->li_type != XFS_LI_EFI); | 3056 | ASSERT(lip->li_type != XFS_LI_EFI); |
3058 | #endif | 3057 | #endif |
3059 | break; | 3058 | break; |
3060 | } | 3059 | } |
3061 | 3060 | ||
3062 | /* | 3061 | /* |
3063 | * Skip EFIs that we've already processed. | 3062 | * Skip EFIs that we've already processed. |
3064 | */ | 3063 | */ |
3065 | efip = (xfs_efi_log_item_t *)lip; | 3064 | efip = (xfs_efi_log_item_t *)lip; |
3066 | if (efip->efi_flags & XFS_EFI_RECOVERED) { | 3065 | if (efip->efi_flags & XFS_EFI_RECOVERED) { |
3067 | lip = xfs_trans_ail_cursor_next(ailp, &cur); | 3066 | lip = xfs_trans_ail_cursor_next(ailp, &cur); |
3068 | continue; | 3067 | continue; |
3069 | } | 3068 | } |
3070 | 3069 | ||
3071 | spin_unlock(&ailp->xa_lock); | 3070 | spin_unlock(&ailp->xa_lock); |
3072 | error = xlog_recover_process_efi(log->l_mp, efip); | 3071 | error = xlog_recover_process_efi(log->l_mp, efip); |
3073 | spin_lock(&ailp->xa_lock); | 3072 | spin_lock(&ailp->xa_lock); |
3074 | if (error) | 3073 | if (error) |
3075 | goto out; | 3074 | goto out; |
3076 | lip = xfs_trans_ail_cursor_next(ailp, &cur); | 3075 | lip = xfs_trans_ail_cursor_next(ailp, &cur); |
3077 | } | 3076 | } |
3078 | out: | 3077 | out: |
3079 | xfs_trans_ail_cursor_done(ailp, &cur); | 3078 | xfs_trans_ail_cursor_done(ailp, &cur); |
3080 | spin_unlock(&ailp->xa_lock); | 3079 | spin_unlock(&ailp->xa_lock); |
3081 | return error; | 3080 | return error; |
3082 | } | 3081 | } |
3083 | 3082 | ||
3084 | /* | 3083 | /* |
3085 | * This routine performs a transaction to null out a bad inode pointer | 3084 | * This routine performs a transaction to null out a bad inode pointer |
3086 | * in an agi unlinked inode hash bucket. | 3085 | * in an agi unlinked inode hash bucket. |
3087 | */ | 3086 | */ |
3088 | STATIC void | 3087 | STATIC void |
3089 | xlog_recover_clear_agi_bucket( | 3088 | xlog_recover_clear_agi_bucket( |
3090 | xfs_mount_t *mp, | 3089 | xfs_mount_t *mp, |
3091 | xfs_agnumber_t agno, | 3090 | xfs_agnumber_t agno, |
3092 | int bucket) | 3091 | int bucket) |
3093 | { | 3092 | { |
3094 | xfs_trans_t *tp; | 3093 | xfs_trans_t *tp; |
3095 | xfs_agi_t *agi; | 3094 | xfs_agi_t *agi; |
3096 | xfs_buf_t *agibp; | 3095 | xfs_buf_t *agibp; |
3097 | int offset; | 3096 | int offset; |
3098 | int error; | 3097 | int error; |
3099 | 3098 | ||
3100 | tp = xfs_trans_alloc(mp, XFS_TRANS_CLEAR_AGI_BUCKET); | 3099 | tp = xfs_trans_alloc(mp, XFS_TRANS_CLEAR_AGI_BUCKET); |
3101 | error = xfs_trans_reserve(tp, 0, XFS_CLEAR_AGI_BUCKET_LOG_RES(mp), | 3100 | error = xfs_trans_reserve(tp, 0, XFS_CLEAR_AGI_BUCKET_LOG_RES(mp), |
3102 | 0, 0, 0); | 3101 | 0, 0, 0); |
3103 | if (error) | 3102 | if (error) |
3104 | goto out_abort; | 3103 | goto out_abort; |
3105 | 3104 | ||
3106 | error = xfs_read_agi(mp, tp, agno, &agibp); | 3105 | error = xfs_read_agi(mp, tp, agno, &agibp); |
3107 | if (error) | 3106 | if (error) |
3108 | goto out_abort; | 3107 | goto out_abort; |
3109 | 3108 | ||
3110 | agi = XFS_BUF_TO_AGI(agibp); | 3109 | agi = XFS_BUF_TO_AGI(agibp); |
3111 | agi->agi_unlinked[bucket] = cpu_to_be32(NULLAGINO); | 3110 | agi->agi_unlinked[bucket] = cpu_to_be32(NULLAGINO); |
3112 | offset = offsetof(xfs_agi_t, agi_unlinked) + | 3111 | offset = offsetof(xfs_agi_t, agi_unlinked) + |
3113 | (sizeof(xfs_agino_t) * bucket); | 3112 | (sizeof(xfs_agino_t) * bucket); |
3114 | xfs_trans_log_buf(tp, agibp, offset, | 3113 | xfs_trans_log_buf(tp, agibp, offset, |
3115 | (offset + sizeof(xfs_agino_t) - 1)); | 3114 | (offset + sizeof(xfs_agino_t) - 1)); |
3116 | 3115 | ||
3117 | error = xfs_trans_commit(tp, 0); | 3116 | error = xfs_trans_commit(tp, 0); |
3118 | if (error) | 3117 | if (error) |
3119 | goto out_error; | 3118 | goto out_error; |
3120 | return; | 3119 | return; |
3121 | 3120 | ||
3122 | out_abort: | 3121 | out_abort: |
3123 | xfs_trans_cancel(tp, XFS_TRANS_ABORT); | 3122 | xfs_trans_cancel(tp, XFS_TRANS_ABORT); |
3124 | out_error: | 3123 | out_error: |
3125 | xfs_fs_cmn_err(CE_WARN, mp, "xlog_recover_clear_agi_bucket: " | 3124 | xfs_fs_cmn_err(CE_WARN, mp, "xlog_recover_clear_agi_bucket: " |
3126 | "failed to clear agi %d. Continuing.", agno); | 3125 | "failed to clear agi %d. Continuing.", agno); |
3127 | return; | 3126 | return; |
3128 | } | 3127 | } |
3129 | 3128 | ||
3130 | STATIC xfs_agino_t | 3129 | STATIC xfs_agino_t |
3131 | xlog_recover_process_one_iunlink( | 3130 | xlog_recover_process_one_iunlink( |
3132 | struct xfs_mount *mp, | 3131 | struct xfs_mount *mp, |
3133 | xfs_agnumber_t agno, | 3132 | xfs_agnumber_t agno, |
3134 | xfs_agino_t agino, | 3133 | xfs_agino_t agino, |
3135 | int bucket) | 3134 | int bucket) |
3136 | { | 3135 | { |
3137 | struct xfs_buf *ibp; | 3136 | struct xfs_buf *ibp; |
3138 | struct xfs_dinode *dip; | 3137 | struct xfs_dinode *dip; |
3139 | struct xfs_inode *ip; | 3138 | struct xfs_inode *ip; |
3140 | xfs_ino_t ino; | 3139 | xfs_ino_t ino; |
3141 | int error; | 3140 | int error; |
3142 | 3141 | ||
3143 | ino = XFS_AGINO_TO_INO(mp, agno, agino); | 3142 | ino = XFS_AGINO_TO_INO(mp, agno, agino); |
3144 | error = xfs_iget(mp, NULL, ino, 0, 0, &ip, 0); | 3143 | error = xfs_iget(mp, NULL, ino, 0, 0, &ip, 0); |
3145 | if (error) | 3144 | if (error) |
3146 | goto fail; | 3145 | goto fail; |
3147 | 3146 | ||
3148 | /* | 3147 | /* |
3149 | * Get the on disk inode to find the next inode in the bucket. | 3148 | * Get the on disk inode to find the next inode in the bucket. |
3150 | */ | 3149 | */ |
3151 | ASSERT(ip != NULL); | 3150 | ASSERT(ip != NULL); |
3152 | error = xfs_itobp(mp, NULL, ip, &dip, &ibp, XFS_BUF_LOCK); | 3151 | error = xfs_itobp(mp, NULL, ip, &dip, &ibp, XFS_BUF_LOCK); |
3153 | if (error) | 3152 | if (error) |
3154 | goto fail; | 3153 | goto fail; |
3155 | 3154 | ||
3156 | ASSERT(dip != NULL); | 3155 | ASSERT(dip != NULL); |
3157 | ASSERT(ip->i_d.di_nlink == 0); | 3156 | ASSERT(ip->i_d.di_nlink == 0); |
3158 | 3157 | ||
3159 | /* setup for the next pass */ | 3158 | /* setup for the next pass */ |
3160 | agino = be32_to_cpu(dip->di_next_unlinked); | 3159 | agino = be32_to_cpu(dip->di_next_unlinked); |
3161 | xfs_buf_relse(ibp); | 3160 | xfs_buf_relse(ibp); |
3162 | 3161 | ||
3163 | /* | 3162 | /* |
3164 | * Prevent any DMAPI event from being sent when the reference on | 3163 | * Prevent any DMAPI event from being sent when the reference on |
3165 | * the inode is dropped. | 3164 | * the inode is dropped. |
3166 | */ | 3165 | */ |
3167 | ip->i_d.di_dmevmask = 0; | 3166 | ip->i_d.di_dmevmask = 0; |
3168 | 3167 | ||
3169 | /* | 3168 | /* |
3170 | * If this is a new inode, handle it specially. Otherwise, just | 3169 | * If this is a new inode, handle it specially. Otherwise, just |
3171 | * drop our reference to the inode. If there are no other | 3170 | * drop our reference to the inode. If there are no other |
3172 | * references, this will send the inode to xfs_inactive() which | 3171 | * references, this will send the inode to xfs_inactive() which |
3173 | * will truncate the file and free the inode. | 3172 | * will truncate the file and free the inode. |
3174 | */ | 3173 | */ |
3175 | if (ip->i_d.di_mode == 0) | 3174 | if (ip->i_d.di_mode == 0) |
3176 | xfs_iput_new(ip, 0); | 3175 | xfs_iput_new(ip, 0); |
3177 | else | 3176 | else |
3178 | IRELE(ip); | 3177 | IRELE(ip); |
3179 | return agino; | 3178 | return agino; |
3180 | 3179 | ||
3181 | fail: | 3180 | fail: |
3182 | /* | 3181 | /* |
3183 | * We can't read in the inode this bucket points to, or this inode | 3182 | * We can't read in the inode this bucket points to, or this inode |
3184 | * is messed up. Just ditch this bucket of inodes. We will lose | 3183 | * is messed up. Just ditch this bucket of inodes. We will lose |
3185 | * some inodes and space, but at least we won't hang. | 3184 | * some inodes and space, but at least we won't hang. |
3186 | * | 3185 | * |
3187 | * Call xlog_recover_clear_agi_bucket() to perform a transaction to | 3186 | * Call xlog_recover_clear_agi_bucket() to perform a transaction to |
3188 | * clear the inode pointer in the bucket. | 3187 | * clear the inode pointer in the bucket. |
3189 | */ | 3188 | */ |
3190 | xlog_recover_clear_agi_bucket(mp, agno, bucket); | 3189 | xlog_recover_clear_agi_bucket(mp, agno, bucket); |
3191 | return NULLAGINO; | 3190 | return NULLAGINO; |
3192 | } | 3191 | } |
3193 | 3192 | ||
3194 | /* | 3193 | /* |
3195 | * xlog_iunlink_recover | 3194 | * xlog_iunlink_recover |
3196 | * | 3195 | * |
3197 | * This is called during recovery to process any inodes which | 3196 | * This is called during recovery to process any inodes which |
3198 | * we unlinked but not freed when the system crashed. These | 3197 | * we unlinked but not freed when the system crashed. These |
3199 | * inodes will be on the lists in the AGI blocks. What we do | 3198 | * inodes will be on the lists in the AGI blocks. What we do |
3200 | * here is scan all the AGIs and fully truncate and free any | 3199 | * here is scan all the AGIs and fully truncate and free any |
3201 | * inodes found on the lists. Each inode is removed from the | 3200 | * inodes found on the lists. Each inode is removed from the |
3202 | * lists when it has been fully truncated and is freed. The | 3201 | * lists when it has been fully truncated and is freed. The |
3203 | * freeing of the inode and its removal from the list must be | 3202 | * freeing of the inode and its removal from the list must be |
3204 | * atomic. | 3203 | * atomic. |
3205 | */ | 3204 | */ |
3206 | void | 3205 | void |
3207 | xlog_recover_process_iunlinks( | 3206 | xlog_recover_process_iunlinks( |
3208 | xlog_t *log) | 3207 | xlog_t *log) |
3209 | { | 3208 | { |
3210 | xfs_mount_t *mp; | 3209 | xfs_mount_t *mp; |
3211 | xfs_agnumber_t agno; | 3210 | xfs_agnumber_t agno; |
3212 | xfs_agi_t *agi; | 3211 | xfs_agi_t *agi; |
3213 | xfs_buf_t *agibp; | 3212 | xfs_buf_t *agibp; |
3214 | xfs_agino_t agino; | 3213 | xfs_agino_t agino; |
3215 | int bucket; | 3214 | int bucket; |
3216 | int error; | 3215 | int error; |
3217 | uint mp_dmevmask; | 3216 | uint mp_dmevmask; |
3218 | 3217 | ||
3219 | mp = log->l_mp; | 3218 | mp = log->l_mp; |
3220 | 3219 | ||
3221 | /* | 3220 | /* |
3222 | * Prevent any DMAPI event from being sent while in this function. | 3221 | * Prevent any DMAPI event from being sent while in this function. |
3223 | */ | 3222 | */ |
3224 | mp_dmevmask = mp->m_dmevmask; | 3223 | mp_dmevmask = mp->m_dmevmask; |
3225 | mp->m_dmevmask = 0; | 3224 | mp->m_dmevmask = 0; |
3226 | 3225 | ||
3227 | for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) { | 3226 | for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) { |
3228 | /* | 3227 | /* |
3229 | * Find the agi for this ag. | 3228 | * Find the agi for this ag. |
3230 | */ | 3229 | */ |
3231 | error = xfs_read_agi(mp, NULL, agno, &agibp); | 3230 | error = xfs_read_agi(mp, NULL, agno, &agibp); |
3232 | if (error) { | 3231 | if (error) { |
3233 | /* | 3232 | /* |
3234 | * AGI is b0rked. Don't process it. | 3233 | * AGI is b0rked. Don't process it. |
3235 | * | 3234 | * |
3236 | * We should probably mark the filesystem as corrupt | 3235 | * We should probably mark the filesystem as corrupt |
3237 | * after we've recovered all the ag's we can.... | 3236 | * after we've recovered all the ag's we can.... |
3238 | */ | 3237 | */ |
3239 | continue; | 3238 | continue; |
3240 | } | 3239 | } |
3241 | agi = XFS_BUF_TO_AGI(agibp); | 3240 | agi = XFS_BUF_TO_AGI(agibp); |
3242 | 3241 | ||
3243 | for (bucket = 0; bucket < XFS_AGI_UNLINKED_BUCKETS; bucket++) { | 3242 | for (bucket = 0; bucket < XFS_AGI_UNLINKED_BUCKETS; bucket++) { |
3244 | agino = be32_to_cpu(agi->agi_unlinked[bucket]); | 3243 | agino = be32_to_cpu(agi->agi_unlinked[bucket]); |
3245 | while (agino != NULLAGINO) { | 3244 | while (agino != NULLAGINO) { |
3246 | /* | 3245 | /* |
3247 | * Release the agi buffer so that it can | 3246 | * Release the agi buffer so that it can |
3248 | * be acquired in the normal course of the | 3247 | * be acquired in the normal course of the |
3249 | * transaction to truncate and free the inode. | 3248 | * transaction to truncate and free the inode. |
3250 | */ | 3249 | */ |
3251 | xfs_buf_relse(agibp); | 3250 | xfs_buf_relse(agibp); |
3252 | 3251 | ||
3253 | agino = xlog_recover_process_one_iunlink(mp, | 3252 | agino = xlog_recover_process_one_iunlink(mp, |
3254 | agno, agino, bucket); | 3253 | agno, agino, bucket); |
3255 | 3254 | ||
3256 | /* | 3255 | /* |
3257 | * Reacquire the agibuffer and continue around | 3256 | * Reacquire the agibuffer and continue around |
3258 | * the loop. This should never fail as we know | 3257 | * the loop. This should never fail as we know |
3259 | * the buffer was good earlier on. | 3258 | * the buffer was good earlier on. |
3260 | */ | 3259 | */ |
3261 | error = xfs_read_agi(mp, NULL, agno, &agibp); | 3260 | error = xfs_read_agi(mp, NULL, agno, &agibp); |
3262 | ASSERT(error == 0); | 3261 | ASSERT(error == 0); |
3263 | agi = XFS_BUF_TO_AGI(agibp); | 3262 | agi = XFS_BUF_TO_AGI(agibp); |
3264 | } | 3263 | } |
3265 | } | 3264 | } |
3266 | 3265 | ||
3267 | /* | 3266 | /* |
3268 | * Release the buffer for the current agi so we can | 3267 | * Release the buffer for the current agi so we can |
3269 | * go on to the next one. | 3268 | * go on to the next one. |
3270 | */ | 3269 | */ |
3271 | xfs_buf_relse(agibp); | 3270 | xfs_buf_relse(agibp); |
3272 | } | 3271 | } |
3273 | 3272 | ||
3274 | mp->m_dmevmask = mp_dmevmask; | 3273 | mp->m_dmevmask = mp_dmevmask; |
3275 | } | 3274 | } |
3276 | 3275 | ||
3277 | 3276 | ||
3278 | #ifdef DEBUG | 3277 | #ifdef DEBUG |
3279 | STATIC void | 3278 | STATIC void |
3280 | xlog_pack_data_checksum( | 3279 | xlog_pack_data_checksum( |
3281 | xlog_t *log, | 3280 | xlog_t *log, |
3282 | xlog_in_core_t *iclog, | 3281 | xlog_in_core_t *iclog, |
3283 | int size) | 3282 | int size) |
3284 | { | 3283 | { |
3285 | int i; | 3284 | int i; |
3286 | __be32 *up; | 3285 | __be32 *up; |
3287 | uint chksum = 0; | 3286 | uint chksum = 0; |
3288 | 3287 | ||
3289 | up = (__be32 *)iclog->ic_datap; | 3288 | up = (__be32 *)iclog->ic_datap; |
3290 | /* divide length by 4 to get # words */ | 3289 | /* divide length by 4 to get # words */ |
3291 | for (i = 0; i < (size >> 2); i++) { | 3290 | for (i = 0; i < (size >> 2); i++) { |
3292 | chksum ^= be32_to_cpu(*up); | 3291 | chksum ^= be32_to_cpu(*up); |
3293 | up++; | 3292 | up++; |
3294 | } | 3293 | } |
3295 | iclog->ic_header.h_chksum = cpu_to_be32(chksum); | 3294 | iclog->ic_header.h_chksum = cpu_to_be32(chksum); |
3296 | } | 3295 | } |
3297 | #else | 3296 | #else |
3298 | #define xlog_pack_data_checksum(log, iclog, size) | 3297 | #define xlog_pack_data_checksum(log, iclog, size) |
3299 | #endif | 3298 | #endif |
3300 | 3299 | ||
3301 | /* | 3300 | /* |
3302 | * Stamp cycle number in every block | 3301 | * Stamp cycle number in every block |
3303 | */ | 3302 | */ |
3304 | void | 3303 | void |
3305 | xlog_pack_data( | 3304 | xlog_pack_data( |
3306 | xlog_t *log, | 3305 | xlog_t *log, |
3307 | xlog_in_core_t *iclog, | 3306 | xlog_in_core_t *iclog, |
3308 | int roundoff) | 3307 | int roundoff) |
3309 | { | 3308 | { |
3310 | int i, j, k; | 3309 | int i, j, k; |
3311 | int size = iclog->ic_offset + roundoff; | 3310 | int size = iclog->ic_offset + roundoff; |
3312 | __be32 cycle_lsn; | 3311 | __be32 cycle_lsn; |
3313 | xfs_caddr_t dp; | 3312 | xfs_caddr_t dp; |
3314 | 3313 | ||
3315 | xlog_pack_data_checksum(log, iclog, size); | 3314 | xlog_pack_data_checksum(log, iclog, size); |
3316 | 3315 | ||
3317 | cycle_lsn = CYCLE_LSN_DISK(iclog->ic_header.h_lsn); | 3316 | cycle_lsn = CYCLE_LSN_DISK(iclog->ic_header.h_lsn); |
3318 | 3317 | ||
3319 | dp = iclog->ic_datap; | 3318 | dp = iclog->ic_datap; |
3320 | for (i = 0; i < BTOBB(size) && | 3319 | for (i = 0; i < BTOBB(size) && |
3321 | i < (XLOG_HEADER_CYCLE_SIZE / BBSIZE); i++) { | 3320 | i < (XLOG_HEADER_CYCLE_SIZE / BBSIZE); i++) { |
3322 | iclog->ic_header.h_cycle_data[i] = *(__be32 *)dp; | 3321 | iclog->ic_header.h_cycle_data[i] = *(__be32 *)dp; |
3323 | *(__be32 *)dp = cycle_lsn; | 3322 | *(__be32 *)dp = cycle_lsn; |
3324 | dp += BBSIZE; | 3323 | dp += BBSIZE; |
3325 | } | 3324 | } |
3326 | 3325 | ||
3327 | if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) { | 3326 | if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) { |
3328 | xlog_in_core_2_t *xhdr = iclog->ic_data; | 3327 | xlog_in_core_2_t *xhdr = iclog->ic_data; |
3329 | 3328 | ||
3330 | for ( ; i < BTOBB(size); i++) { | 3329 | for ( ; i < BTOBB(size); i++) { |
3331 | j = i / (XLOG_HEADER_CYCLE_SIZE / BBSIZE); | 3330 | j = i / (XLOG_HEADER_CYCLE_SIZE / BBSIZE); |
3332 | k = i % (XLOG_HEADER_CYCLE_SIZE / BBSIZE); | 3331 | k = i % (XLOG_HEADER_CYCLE_SIZE / BBSIZE); |
3333 | xhdr[j].hic_xheader.xh_cycle_data[k] = *(__be32 *)dp; | 3332 | xhdr[j].hic_xheader.xh_cycle_data[k] = *(__be32 *)dp; |
3334 | *(__be32 *)dp = cycle_lsn; | 3333 | *(__be32 *)dp = cycle_lsn; |
3335 | dp += BBSIZE; | 3334 | dp += BBSIZE; |
3336 | } | 3335 | } |
3337 | 3336 | ||
3338 | for (i = 1; i < log->l_iclog_heads; i++) { | 3337 | for (i = 1; i < log->l_iclog_heads; i++) { |
3339 | xhdr[i].hic_xheader.xh_cycle = cycle_lsn; | 3338 | xhdr[i].hic_xheader.xh_cycle = cycle_lsn; |
3340 | } | 3339 | } |
3341 | } | 3340 | } |
3342 | } | 3341 | } |
3343 | 3342 | ||
3344 | #if defined(DEBUG) && defined(XFS_LOUD_RECOVERY) | 3343 | #if defined(DEBUG) && defined(XFS_LOUD_RECOVERY) |
3345 | STATIC void | 3344 | STATIC void |
3346 | xlog_unpack_data_checksum( | 3345 | xlog_unpack_data_checksum( |
3347 | xlog_rec_header_t *rhead, | 3346 | xlog_rec_header_t *rhead, |
3348 | xfs_caddr_t dp, | 3347 | xfs_caddr_t dp, |
3349 | xlog_t *log) | 3348 | xlog_t *log) |
3350 | { | 3349 | { |
3351 | __be32 *up = (__be32 *)dp; | 3350 | __be32 *up = (__be32 *)dp; |
3352 | uint chksum = 0; | 3351 | uint chksum = 0; |
3353 | int i; | 3352 | int i; |
3354 | 3353 | ||
3355 | /* divide length by 4 to get # words */ | 3354 | /* divide length by 4 to get # words */ |
3356 | for (i=0; i < be32_to_cpu(rhead->h_len) >> 2; i++) { | 3355 | for (i=0; i < be32_to_cpu(rhead->h_len) >> 2; i++) { |
3357 | chksum ^= be32_to_cpu(*up); | 3356 | chksum ^= be32_to_cpu(*up); |
3358 | up++; | 3357 | up++; |
3359 | } | 3358 | } |
3360 | if (chksum != be32_to_cpu(rhead->h_chksum)) { | 3359 | if (chksum != be32_to_cpu(rhead->h_chksum)) { |
3361 | if (rhead->h_chksum || | 3360 | if (rhead->h_chksum || |
3362 | ((log->l_flags & XLOG_CHKSUM_MISMATCH) == 0)) { | 3361 | ((log->l_flags & XLOG_CHKSUM_MISMATCH) == 0)) { |
3363 | cmn_err(CE_DEBUG, | 3362 | cmn_err(CE_DEBUG, |
3364 | "XFS: LogR chksum mismatch: was (0x%x) is (0x%x)\n", | 3363 | "XFS: LogR chksum mismatch: was (0x%x) is (0x%x)\n", |
3365 | be32_to_cpu(rhead->h_chksum), chksum); | 3364 | be32_to_cpu(rhead->h_chksum), chksum); |
3366 | cmn_err(CE_DEBUG, | 3365 | cmn_err(CE_DEBUG, |
3367 | "XFS: Disregard message if filesystem was created with non-DEBUG kernel"); | 3366 | "XFS: Disregard message if filesystem was created with non-DEBUG kernel"); |
3368 | if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) { | 3367 | if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) { |
3369 | cmn_err(CE_DEBUG, | 3368 | cmn_err(CE_DEBUG, |
3370 | "XFS: LogR this is a LogV2 filesystem\n"); | 3369 | "XFS: LogR this is a LogV2 filesystem\n"); |
3371 | } | 3370 | } |
3372 | log->l_flags |= XLOG_CHKSUM_MISMATCH; | 3371 | log->l_flags |= XLOG_CHKSUM_MISMATCH; |
3373 | } | 3372 | } |
3374 | } | 3373 | } |
3375 | } | 3374 | } |
3376 | #else | 3375 | #else |
3377 | #define xlog_unpack_data_checksum(rhead, dp, log) | 3376 | #define xlog_unpack_data_checksum(rhead, dp, log) |
3378 | #endif | 3377 | #endif |
3379 | 3378 | ||
3380 | STATIC void | 3379 | STATIC void |
3381 | xlog_unpack_data( | 3380 | xlog_unpack_data( |
3382 | xlog_rec_header_t *rhead, | 3381 | xlog_rec_header_t *rhead, |
3383 | xfs_caddr_t dp, | 3382 | xfs_caddr_t dp, |
3384 | xlog_t *log) | 3383 | xlog_t *log) |
3385 | { | 3384 | { |
3386 | int i, j, k; | 3385 | int i, j, k; |
3387 | 3386 | ||
3388 | for (i = 0; i < BTOBB(be32_to_cpu(rhead->h_len)) && | 3387 | for (i = 0; i < BTOBB(be32_to_cpu(rhead->h_len)) && |
3389 | i < (XLOG_HEADER_CYCLE_SIZE / BBSIZE); i++) { | 3388 | i < (XLOG_HEADER_CYCLE_SIZE / BBSIZE); i++) { |
3390 | *(__be32 *)dp = *(__be32 *)&rhead->h_cycle_data[i]; | 3389 | *(__be32 *)dp = *(__be32 *)&rhead->h_cycle_data[i]; |
3391 | dp += BBSIZE; | 3390 | dp += BBSIZE; |
3392 | } | 3391 | } |
3393 | 3392 | ||
3394 | if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) { | 3393 | if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) { |
3395 | xlog_in_core_2_t *xhdr = (xlog_in_core_2_t *)rhead; | 3394 | xlog_in_core_2_t *xhdr = (xlog_in_core_2_t *)rhead; |
3396 | for ( ; i < BTOBB(be32_to_cpu(rhead->h_len)); i++) { | 3395 | for ( ; i < BTOBB(be32_to_cpu(rhead->h_len)); i++) { |
3397 | j = i / (XLOG_HEADER_CYCLE_SIZE / BBSIZE); | 3396 | j = i / (XLOG_HEADER_CYCLE_SIZE / BBSIZE); |
3398 | k = i % (XLOG_HEADER_CYCLE_SIZE / BBSIZE); | 3397 | k = i % (XLOG_HEADER_CYCLE_SIZE / BBSIZE); |
3399 | *(__be32 *)dp = xhdr[j].hic_xheader.xh_cycle_data[k]; | 3398 | *(__be32 *)dp = xhdr[j].hic_xheader.xh_cycle_data[k]; |
3400 | dp += BBSIZE; | 3399 | dp += BBSIZE; |
3401 | } | 3400 | } |
3402 | } | 3401 | } |
3403 | 3402 | ||
3404 | xlog_unpack_data_checksum(rhead, dp, log); | 3403 | xlog_unpack_data_checksum(rhead, dp, log); |
3405 | } | 3404 | } |
3406 | 3405 | ||
3407 | STATIC int | 3406 | STATIC int |
3408 | xlog_valid_rec_header( | 3407 | xlog_valid_rec_header( |
3409 | xlog_t *log, | 3408 | xlog_t *log, |
3410 | xlog_rec_header_t *rhead, | 3409 | xlog_rec_header_t *rhead, |
3411 | xfs_daddr_t blkno) | 3410 | xfs_daddr_t blkno) |
3412 | { | 3411 | { |
3413 | int hlen; | 3412 | int hlen; |
3414 | 3413 | ||
3415 | if (unlikely(be32_to_cpu(rhead->h_magicno) != XLOG_HEADER_MAGIC_NUM)) { | 3414 | if (unlikely(be32_to_cpu(rhead->h_magicno) != XLOG_HEADER_MAGIC_NUM)) { |
3416 | XFS_ERROR_REPORT("xlog_valid_rec_header(1)", | 3415 | XFS_ERROR_REPORT("xlog_valid_rec_header(1)", |
3417 | XFS_ERRLEVEL_LOW, log->l_mp); | 3416 | XFS_ERRLEVEL_LOW, log->l_mp); |
3418 | return XFS_ERROR(EFSCORRUPTED); | 3417 | return XFS_ERROR(EFSCORRUPTED); |
3419 | } | 3418 | } |
3420 | if (unlikely( | 3419 | if (unlikely( |
3421 | (!rhead->h_version || | 3420 | (!rhead->h_version || |
3422 | (be32_to_cpu(rhead->h_version) & (~XLOG_VERSION_OKBITS))))) { | 3421 | (be32_to_cpu(rhead->h_version) & (~XLOG_VERSION_OKBITS))))) { |
3423 | xlog_warn("XFS: %s: unrecognised log version (%d).", | 3422 | xlog_warn("XFS: %s: unrecognised log version (%d).", |
3424 | __func__, be32_to_cpu(rhead->h_version)); | 3423 | __func__, be32_to_cpu(rhead->h_version)); |
3425 | return XFS_ERROR(EIO); | 3424 | return XFS_ERROR(EIO); |
3426 | } | 3425 | } |
3427 | 3426 | ||
3428 | /* LR body must have data or it wouldn't have been written */ | 3427 | /* LR body must have data or it wouldn't have been written */ |
3429 | hlen = be32_to_cpu(rhead->h_len); | 3428 | hlen = be32_to_cpu(rhead->h_len); |
3430 | if (unlikely( hlen <= 0 || hlen > INT_MAX )) { | 3429 | if (unlikely( hlen <= 0 || hlen > INT_MAX )) { |
3431 | XFS_ERROR_REPORT("xlog_valid_rec_header(2)", | 3430 | XFS_ERROR_REPORT("xlog_valid_rec_header(2)", |
3432 | XFS_ERRLEVEL_LOW, log->l_mp); | 3431 | XFS_ERRLEVEL_LOW, log->l_mp); |
3433 | return XFS_ERROR(EFSCORRUPTED); | 3432 | return XFS_ERROR(EFSCORRUPTED); |
3434 | } | 3433 | } |
3435 | if (unlikely( blkno > log->l_logBBsize || blkno > INT_MAX )) { | 3434 | if (unlikely( blkno > log->l_logBBsize || blkno > INT_MAX )) { |
3436 | XFS_ERROR_REPORT("xlog_valid_rec_header(3)", | 3435 | XFS_ERROR_REPORT("xlog_valid_rec_header(3)", |
3437 | XFS_ERRLEVEL_LOW, log->l_mp); | 3436 | XFS_ERRLEVEL_LOW, log->l_mp); |
3438 | return XFS_ERROR(EFSCORRUPTED); | 3437 | return XFS_ERROR(EFSCORRUPTED); |
3439 | } | 3438 | } |
3440 | return 0; | 3439 | return 0; |
3441 | } | 3440 | } |
3442 | 3441 | ||
3443 | /* | 3442 | /* |
3444 | * Read the log from tail to head and process the log records found. | 3443 | * Read the log from tail to head and process the log records found. |
3445 | * Handle the two cases where the tail and head are in the same cycle | 3444 | * Handle the two cases where the tail and head are in the same cycle |
3446 | * and where the active portion of the log wraps around the end of | 3445 | * and where the active portion of the log wraps around the end of |
3447 | * the physical log separately. The pass parameter is passed through | 3446 | * the physical log separately. The pass parameter is passed through |
3448 | * to the routines called to process the data and is not looked at | 3447 | * to the routines called to process the data and is not looked at |
3449 | * here. | 3448 | * here. |
3450 | */ | 3449 | */ |
3451 | STATIC int | 3450 | STATIC int |
3452 | xlog_do_recovery_pass( | 3451 | xlog_do_recovery_pass( |
3453 | xlog_t *log, | 3452 | xlog_t *log, |
3454 | xfs_daddr_t head_blk, | 3453 | xfs_daddr_t head_blk, |
3455 | xfs_daddr_t tail_blk, | 3454 | xfs_daddr_t tail_blk, |
3456 | int pass) | 3455 | int pass) |
3457 | { | 3456 | { |
3458 | xlog_rec_header_t *rhead; | 3457 | xlog_rec_header_t *rhead; |
3459 | xfs_daddr_t blk_no; | 3458 | xfs_daddr_t blk_no; |
3460 | xfs_caddr_t bufaddr, offset; | 3459 | xfs_caddr_t bufaddr, offset; |
3461 | xfs_buf_t *hbp, *dbp; | 3460 | xfs_buf_t *hbp, *dbp; |
3462 | int error = 0, h_size; | 3461 | int error = 0, h_size; |
3463 | int bblks, split_bblks; | 3462 | int bblks, split_bblks; |
3464 | int hblks, split_hblks, wrapped_hblks; | 3463 | int hblks, split_hblks, wrapped_hblks; |
3465 | xlog_recover_t *rhash[XLOG_RHASH_SIZE]; | 3464 | xlog_recover_t *rhash[XLOG_RHASH_SIZE]; |
3466 | 3465 | ||
3467 | ASSERT(head_blk != tail_blk); | 3466 | ASSERT(head_blk != tail_blk); |
3468 | 3467 | ||
3469 | /* | 3468 | /* |
3470 | * Read the header of the tail block and get the iclog buffer size from | 3469 | * Read the header of the tail block and get the iclog buffer size from |
3471 | * h_size. Use this to tell how many sectors make up the log header. | 3470 | * h_size. Use this to tell how many sectors make up the log header. |
3472 | */ | 3471 | */ |
3473 | if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) { | 3472 | if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) { |
3474 | /* | 3473 | /* |
3475 | * When using variable length iclogs, read first sector of | 3474 | * When using variable length iclogs, read first sector of |
3476 | * iclog header and extract the header size from it. Get a | 3475 | * iclog header and extract the header size from it. Get a |
3477 | * new hbp that is the correct size. | 3476 | * new hbp that is the correct size. |
3478 | */ | 3477 | */ |
3479 | hbp = xlog_get_bp(log, 1); | 3478 | hbp = xlog_get_bp(log, 1); |
3480 | if (!hbp) | 3479 | if (!hbp) |
3481 | return ENOMEM; | 3480 | return ENOMEM; |
3482 | if ((error = xlog_bread(log, tail_blk, 1, hbp))) | 3481 | if ((error = xlog_bread(log, tail_blk, 1, hbp))) |
3483 | goto bread_err1; | 3482 | goto bread_err1; |
3484 | offset = xlog_align(log, tail_blk, 1, hbp); | 3483 | offset = xlog_align(log, tail_blk, 1, hbp); |
3485 | rhead = (xlog_rec_header_t *)offset; | 3484 | rhead = (xlog_rec_header_t *)offset; |
3486 | error = xlog_valid_rec_header(log, rhead, tail_blk); | 3485 | error = xlog_valid_rec_header(log, rhead, tail_blk); |
3487 | if (error) | 3486 | if (error) |
3488 | goto bread_err1; | 3487 | goto bread_err1; |
3489 | h_size = be32_to_cpu(rhead->h_size); | 3488 | h_size = be32_to_cpu(rhead->h_size); |
3490 | if ((be32_to_cpu(rhead->h_version) & XLOG_VERSION_2) && | 3489 | if ((be32_to_cpu(rhead->h_version) & XLOG_VERSION_2) && |
3491 | (h_size > XLOG_HEADER_CYCLE_SIZE)) { | 3490 | (h_size > XLOG_HEADER_CYCLE_SIZE)) { |
3492 | hblks = h_size / XLOG_HEADER_CYCLE_SIZE; | 3491 | hblks = h_size / XLOG_HEADER_CYCLE_SIZE; |
3493 | if (h_size % XLOG_HEADER_CYCLE_SIZE) | 3492 | if (h_size % XLOG_HEADER_CYCLE_SIZE) |
3494 | hblks++; | 3493 | hblks++; |
3495 | xlog_put_bp(hbp); | 3494 | xlog_put_bp(hbp); |
3496 | hbp = xlog_get_bp(log, hblks); | 3495 | hbp = xlog_get_bp(log, hblks); |
3497 | } else { | 3496 | } else { |
3498 | hblks = 1; | 3497 | hblks = 1; |
3499 | } | 3498 | } |
3500 | } else { | 3499 | } else { |
3501 | ASSERT(log->l_sectbb_log == 0); | 3500 | ASSERT(log->l_sectbb_log == 0); |
3502 | hblks = 1; | 3501 | hblks = 1; |
3503 | hbp = xlog_get_bp(log, 1); | 3502 | hbp = xlog_get_bp(log, 1); |
3504 | h_size = XLOG_BIG_RECORD_BSIZE; | 3503 | h_size = XLOG_BIG_RECORD_BSIZE; |
3505 | } | 3504 | } |
3506 | 3505 | ||
3507 | if (!hbp) | 3506 | if (!hbp) |
3508 | return ENOMEM; | 3507 | return ENOMEM; |
3509 | dbp = xlog_get_bp(log, BTOBB(h_size)); | 3508 | dbp = xlog_get_bp(log, BTOBB(h_size)); |
3510 | if (!dbp) { | 3509 | if (!dbp) { |
3511 | xlog_put_bp(hbp); | 3510 | xlog_put_bp(hbp); |
3512 | return ENOMEM; | 3511 | return ENOMEM; |
3513 | } | 3512 | } |
3514 | 3513 | ||
3515 | memset(rhash, 0, sizeof(rhash)); | 3514 | memset(rhash, 0, sizeof(rhash)); |
3516 | if (tail_blk <= head_blk) { | 3515 | if (tail_blk <= head_blk) { |
3517 | for (blk_no = tail_blk; blk_no < head_blk; ) { | 3516 | for (blk_no = tail_blk; blk_no < head_blk; ) { |
3518 | if ((error = xlog_bread(log, blk_no, hblks, hbp))) | 3517 | if ((error = xlog_bread(log, blk_no, hblks, hbp))) |
3519 | goto bread_err2; | 3518 | goto bread_err2; |
3520 | offset = xlog_align(log, blk_no, hblks, hbp); | 3519 | offset = xlog_align(log, blk_no, hblks, hbp); |
3521 | rhead = (xlog_rec_header_t *)offset; | 3520 | rhead = (xlog_rec_header_t *)offset; |
3522 | error = xlog_valid_rec_header(log, rhead, blk_no); | 3521 | error = xlog_valid_rec_header(log, rhead, blk_no); |
3523 | if (error) | 3522 | if (error) |
3524 | goto bread_err2; | 3523 | goto bread_err2; |
3525 | 3524 | ||
3526 | /* blocks in data section */ | 3525 | /* blocks in data section */ |
3527 | bblks = (int)BTOBB(be32_to_cpu(rhead->h_len)); | 3526 | bblks = (int)BTOBB(be32_to_cpu(rhead->h_len)); |
3528 | error = xlog_bread(log, blk_no + hblks, bblks, dbp); | 3527 | error = xlog_bread(log, blk_no + hblks, bblks, dbp); |
3529 | if (error) | 3528 | if (error) |
3530 | goto bread_err2; | 3529 | goto bread_err2; |
3531 | offset = xlog_align(log, blk_no + hblks, bblks, dbp); | 3530 | offset = xlog_align(log, blk_no + hblks, bblks, dbp); |
3532 | xlog_unpack_data(rhead, offset, log); | 3531 | xlog_unpack_data(rhead, offset, log); |
3533 | if ((error = xlog_recover_process_data(log, | 3532 | if ((error = xlog_recover_process_data(log, |
3534 | rhash, rhead, offset, pass))) | 3533 | rhash, rhead, offset, pass))) |
3535 | goto bread_err2; | 3534 | goto bread_err2; |
3536 | blk_no += bblks + hblks; | 3535 | blk_no += bblks + hblks; |
3537 | } | 3536 | } |
3538 | } else { | 3537 | } else { |
3539 | /* | 3538 | /* |
3540 | * Perform recovery around the end of the physical log. | 3539 | * Perform recovery around the end of the physical log. |
3541 | * When the head is not on the same cycle number as the tail, | 3540 | * When the head is not on the same cycle number as the tail, |
3542 | * we can't do a sequential recovery as above. | 3541 | * we can't do a sequential recovery as above. |
3543 | */ | 3542 | */ |
3544 | blk_no = tail_blk; | 3543 | blk_no = tail_blk; |
3545 | while (blk_no < log->l_logBBsize) { | 3544 | while (blk_no < log->l_logBBsize) { |
3546 | /* | 3545 | /* |
3547 | * Check for header wrapping around physical end-of-log | 3546 | * Check for header wrapping around physical end-of-log |
3548 | */ | 3547 | */ |
3549 | offset = NULL; | 3548 | offset = NULL; |
3550 | split_hblks = 0; | 3549 | split_hblks = 0; |
3551 | wrapped_hblks = 0; | 3550 | wrapped_hblks = 0; |
3552 | if (blk_no + hblks <= log->l_logBBsize) { | 3551 | if (blk_no + hblks <= log->l_logBBsize) { |
3553 | /* Read header in one read */ | 3552 | /* Read header in one read */ |
3554 | error = xlog_bread(log, blk_no, hblks, hbp); | 3553 | error = xlog_bread(log, blk_no, hblks, hbp); |
3555 | if (error) | 3554 | if (error) |
3556 | goto bread_err2; | 3555 | goto bread_err2; |
3557 | offset = xlog_align(log, blk_no, hblks, hbp); | 3556 | offset = xlog_align(log, blk_no, hblks, hbp); |
3558 | } else { | 3557 | } else { |
3559 | /* This LR is split across physical log end */ | 3558 | /* This LR is split across physical log end */ |
3560 | if (blk_no != log->l_logBBsize) { | 3559 | if (blk_no != log->l_logBBsize) { |
3561 | /* some data before physical log end */ | 3560 | /* some data before physical log end */ |
3562 | ASSERT(blk_no <= INT_MAX); | 3561 | ASSERT(blk_no <= INT_MAX); |
3563 | split_hblks = log->l_logBBsize - (int)blk_no; | 3562 | split_hblks = log->l_logBBsize - (int)blk_no; |
3564 | ASSERT(split_hblks > 0); | 3563 | ASSERT(split_hblks > 0); |
3565 | if ((error = xlog_bread(log, blk_no, | 3564 | if ((error = xlog_bread(log, blk_no, |
3566 | split_hblks, hbp))) | 3565 | split_hblks, hbp))) |
3567 | goto bread_err2; | 3566 | goto bread_err2; |
3568 | offset = xlog_align(log, blk_no, | 3567 | offset = xlog_align(log, blk_no, |
3569 | split_hblks, hbp); | 3568 | split_hblks, hbp); |
3570 | } | 3569 | } |
3571 | /* | 3570 | /* |
3572 | * Note: this black magic still works with | 3571 | * Note: this black magic still works with |
3573 | * large sector sizes (non-512) only because: | 3572 | * large sector sizes (non-512) only because: |
3574 | * - we increased the buffer size originally | 3573 | * - we increased the buffer size originally |
3575 | * by 1 sector giving us enough extra space | 3574 | * by 1 sector giving us enough extra space |
3576 | * for the second read; | 3575 | * for the second read; |
3577 | * - the log start is guaranteed to be sector | 3576 | * - the log start is guaranteed to be sector |
3578 | * aligned; | 3577 | * aligned; |
3579 | * - we read the log end (LR header start) | 3578 | * - we read the log end (LR header start) |
3580 | * _first_, then the log start (LR header end) | 3579 | * _first_, then the log start (LR header end) |
3581 | * - order is important. | 3580 | * - order is important. |
3582 | */ | 3581 | */ |
3583 | wrapped_hblks = hblks - split_hblks; | 3582 | wrapped_hblks = hblks - split_hblks; |
3584 | bufaddr = XFS_BUF_PTR(hbp); | 3583 | bufaddr = XFS_BUF_PTR(hbp); |
3585 | error = XFS_BUF_SET_PTR(hbp, | 3584 | error = XFS_BUF_SET_PTR(hbp, |
3586 | bufaddr + BBTOB(split_hblks), | 3585 | bufaddr + BBTOB(split_hblks), |
3587 | BBTOB(hblks - split_hblks)); | 3586 | BBTOB(hblks - split_hblks)); |
3588 | if (!error) | 3587 | if (!error) |
3589 | error = xlog_bread(log, 0, | 3588 | error = xlog_bread(log, 0, |
3590 | wrapped_hblks, hbp); | 3589 | wrapped_hblks, hbp); |
3591 | if (!error) | 3590 | if (!error) |
3592 | error = XFS_BUF_SET_PTR(hbp, bufaddr, | 3591 | error = XFS_BUF_SET_PTR(hbp, bufaddr, |
3593 | BBTOB(hblks)); | 3592 | BBTOB(hblks)); |
3594 | if (error) | 3593 | if (error) |
3595 | goto bread_err2; | 3594 | goto bread_err2; |
3596 | if (!offset) | 3595 | if (!offset) |
3597 | offset = xlog_align(log, 0, | 3596 | offset = xlog_align(log, 0, |
3598 | wrapped_hblks, hbp); | 3597 | wrapped_hblks, hbp); |
3599 | } | 3598 | } |
3600 | rhead = (xlog_rec_header_t *)offset; | 3599 | rhead = (xlog_rec_header_t *)offset; |
3601 | error = xlog_valid_rec_header(log, rhead, | 3600 | error = xlog_valid_rec_header(log, rhead, |
3602 | split_hblks ? blk_no : 0); | 3601 | split_hblks ? blk_no : 0); |
3603 | if (error) | 3602 | if (error) |
3604 | goto bread_err2; | 3603 | goto bread_err2; |
3605 | 3604 | ||
3606 | bblks = (int)BTOBB(be32_to_cpu(rhead->h_len)); | 3605 | bblks = (int)BTOBB(be32_to_cpu(rhead->h_len)); |
3607 | blk_no += hblks; | 3606 | blk_no += hblks; |
3608 | 3607 | ||
3609 | /* Read in data for log record */ | 3608 | /* Read in data for log record */ |
3610 | if (blk_no + bblks <= log->l_logBBsize) { | 3609 | if (blk_no + bblks <= log->l_logBBsize) { |
3611 | error = xlog_bread(log, blk_no, bblks, dbp); | 3610 | error = xlog_bread(log, blk_no, bblks, dbp); |
3612 | if (error) | 3611 | if (error) |
3613 | goto bread_err2; | 3612 | goto bread_err2; |
3614 | offset = xlog_align(log, blk_no, bblks, dbp); | 3613 | offset = xlog_align(log, blk_no, bblks, dbp); |
3615 | } else { | 3614 | } else { |
3616 | /* This log record is split across the | 3615 | /* This log record is split across the |
3617 | * physical end of log */ | 3616 | * physical end of log */ |
3618 | offset = NULL; | 3617 | offset = NULL; |
3619 | split_bblks = 0; | 3618 | split_bblks = 0; |
3620 | if (blk_no != log->l_logBBsize) { | 3619 | if (blk_no != log->l_logBBsize) { |
3621 | /* some data is before the physical | 3620 | /* some data is before the physical |
3622 | * end of log */ | 3621 | * end of log */ |
3623 | ASSERT(!wrapped_hblks); | 3622 | ASSERT(!wrapped_hblks); |
3624 | ASSERT(blk_no <= INT_MAX); | 3623 | ASSERT(blk_no <= INT_MAX); |
3625 | split_bblks = | 3624 | split_bblks = |
3626 | log->l_logBBsize - (int)blk_no; | 3625 | log->l_logBBsize - (int)blk_no; |
3627 | ASSERT(split_bblks > 0); | 3626 | ASSERT(split_bblks > 0); |
3628 | if ((error = xlog_bread(log, blk_no, | 3627 | if ((error = xlog_bread(log, blk_no, |
3629 | split_bblks, dbp))) | 3628 | split_bblks, dbp))) |
3630 | goto bread_err2; | 3629 | goto bread_err2; |
3631 | offset = xlog_align(log, blk_no, | 3630 | offset = xlog_align(log, blk_no, |
3632 | split_bblks, dbp); | 3631 | split_bblks, dbp); |
3633 | } | 3632 | } |
3634 | /* | 3633 | /* |
3635 | * Note: this black magic still works with | 3634 | * Note: this black magic still works with |
3636 | * large sector sizes (non-512) only because: | 3635 | * large sector sizes (non-512) only because: |
3637 | * - we increased the buffer size originally | 3636 | * - we increased the buffer size originally |
3638 | * by 1 sector giving us enough extra space | 3637 | * by 1 sector giving us enough extra space |
3639 | * for the second read; | 3638 | * for the second read; |
3640 | * - the log start is guaranteed to be sector | 3639 | * - the log start is guaranteed to be sector |
3641 | * aligned; | 3640 | * aligned; |
3642 | * - we read the log end (LR header start) | 3641 | * - we read the log end (LR header start) |
3643 | * _first_, then the log start (LR header end) | 3642 | * _first_, then the log start (LR header end) |
3644 | * - order is important. | 3643 | * - order is important. |
3645 | */ | 3644 | */ |
3646 | bufaddr = XFS_BUF_PTR(dbp); | 3645 | bufaddr = XFS_BUF_PTR(dbp); |
3647 | error = XFS_BUF_SET_PTR(dbp, | 3646 | error = XFS_BUF_SET_PTR(dbp, |
3648 | bufaddr + BBTOB(split_bblks), | 3647 | bufaddr + BBTOB(split_bblks), |
3649 | BBTOB(bblks - split_bblks)); | 3648 | BBTOB(bblks - split_bblks)); |
3650 | if (!error) | 3649 | if (!error) |
3651 | error = xlog_bread(log, wrapped_hblks, | 3650 | error = xlog_bread(log, wrapped_hblks, |
3652 | bblks - split_bblks, | 3651 | bblks - split_bblks, |
3653 | dbp); | 3652 | dbp); |
3654 | if (!error) | 3653 | if (!error) |
3655 | error = XFS_BUF_SET_PTR(dbp, bufaddr, | 3654 | error = XFS_BUF_SET_PTR(dbp, bufaddr, |
3656 | h_size); | 3655 | h_size); |
3657 | if (error) | 3656 | if (error) |
3658 | goto bread_err2; | 3657 | goto bread_err2; |
3659 | if (!offset) | 3658 | if (!offset) |
3660 | offset = xlog_align(log, wrapped_hblks, | 3659 | offset = xlog_align(log, wrapped_hblks, |
3661 | bblks - split_bblks, dbp); | 3660 | bblks - split_bblks, dbp); |
3662 | } | 3661 | } |
3663 | xlog_unpack_data(rhead, offset, log); | 3662 | xlog_unpack_data(rhead, offset, log); |
3664 | if ((error = xlog_recover_process_data(log, rhash, | 3663 | if ((error = xlog_recover_process_data(log, rhash, |
3665 | rhead, offset, pass))) | 3664 | rhead, offset, pass))) |
3666 | goto bread_err2; | 3665 | goto bread_err2; |
3667 | blk_no += bblks; | 3666 | blk_no += bblks; |
3668 | } | 3667 | } |
3669 | 3668 | ||
3670 | ASSERT(blk_no >= log->l_logBBsize); | 3669 | ASSERT(blk_no >= log->l_logBBsize); |
3671 | blk_no -= log->l_logBBsize; | 3670 | blk_no -= log->l_logBBsize; |
3672 | 3671 | ||
3673 | /* read first part of physical log */ | 3672 | /* read first part of physical log */ |
3674 | while (blk_no < head_blk) { | 3673 | while (blk_no < head_blk) { |
3675 | if ((error = xlog_bread(log, blk_no, hblks, hbp))) | 3674 | if ((error = xlog_bread(log, blk_no, hblks, hbp))) |
3676 | goto bread_err2; | 3675 | goto bread_err2; |
3677 | offset = xlog_align(log, blk_no, hblks, hbp); | 3676 | offset = xlog_align(log, blk_no, hblks, hbp); |
3678 | rhead = (xlog_rec_header_t *)offset; | 3677 | rhead = (xlog_rec_header_t *)offset; |
3679 | error = xlog_valid_rec_header(log, rhead, blk_no); | 3678 | error = xlog_valid_rec_header(log, rhead, blk_no); |
3680 | if (error) | 3679 | if (error) |
3681 | goto bread_err2; | 3680 | goto bread_err2; |
3682 | bblks = (int)BTOBB(be32_to_cpu(rhead->h_len)); | 3681 | bblks = (int)BTOBB(be32_to_cpu(rhead->h_len)); |
3683 | if ((error = xlog_bread(log, blk_no+hblks, bblks, dbp))) | 3682 | if ((error = xlog_bread(log, blk_no+hblks, bblks, dbp))) |
3684 | goto bread_err2; | 3683 | goto bread_err2; |
3685 | offset = xlog_align(log, blk_no+hblks, bblks, dbp); | 3684 | offset = xlog_align(log, blk_no+hblks, bblks, dbp); |
3686 | xlog_unpack_data(rhead, offset, log); | 3685 | xlog_unpack_data(rhead, offset, log); |
3687 | if ((error = xlog_recover_process_data(log, rhash, | 3686 | if ((error = xlog_recover_process_data(log, rhash, |
3688 | rhead, offset, pass))) | 3687 | rhead, offset, pass))) |
3689 | goto bread_err2; | 3688 | goto bread_err2; |
3690 | blk_no += bblks + hblks; | 3689 | blk_no += bblks + hblks; |
3691 | } | 3690 | } |
3692 | } | 3691 | } |
3693 | 3692 | ||
3694 | bread_err2: | 3693 | bread_err2: |
3695 | xlog_put_bp(dbp); | 3694 | xlog_put_bp(dbp); |
3696 | bread_err1: | 3695 | bread_err1: |
3697 | xlog_put_bp(hbp); | 3696 | xlog_put_bp(hbp); |
3698 | return error; | 3697 | return error; |
3699 | } | 3698 | } |
3700 | 3699 | ||
3701 | /* | 3700 | /* |
3702 | * Do the recovery of the log. We actually do this in two phases. | 3701 | * Do the recovery of the log. We actually do this in two phases. |
3703 | * The two passes are necessary in order to implement the function | 3702 | * The two passes are necessary in order to implement the function |
3704 | * of cancelling a record written into the log. The first pass | 3703 | * of cancelling a record written into the log. The first pass |
3705 | * determines those things which have been cancelled, and the | 3704 | * determines those things which have been cancelled, and the |
3706 | * second pass replays log items normally except for those which | 3705 | * second pass replays log items normally except for those which |
3707 | * have been cancelled. The handling of the replay and cancellations | 3706 | * have been cancelled. The handling of the replay and cancellations |
3708 | * takes place in the log item type specific routines. | 3707 | * takes place in the log item type specific routines. |
3709 | * | 3708 | * |
3710 | * The table of items which have cancel records in the log is allocated | 3709 | * The table of items which have cancel records in the log is allocated |
3711 | * and freed at this level, since only here do we know when all of | 3710 | * and freed at this level, since only here do we know when all of |
3712 | * the log recovery has been completed. | 3711 | * the log recovery has been completed. |
3713 | */ | 3712 | */ |
3714 | STATIC int | 3713 | STATIC int |
3715 | xlog_do_log_recovery( | 3714 | xlog_do_log_recovery( |
3716 | xlog_t *log, | 3715 | xlog_t *log, |
3717 | xfs_daddr_t head_blk, | 3716 | xfs_daddr_t head_blk, |
3718 | xfs_daddr_t tail_blk) | 3717 | xfs_daddr_t tail_blk) |
3719 | { | 3718 | { |
3720 | int error; | 3719 | int error; |
3721 | 3720 | ||
3722 | ASSERT(head_blk != tail_blk); | 3721 | ASSERT(head_blk != tail_blk); |
3723 | 3722 | ||
3724 | /* | 3723 | /* |
3725 | * First do a pass to find all of the cancelled buf log items. | 3724 | * First do a pass to find all of the cancelled buf log items. |
3726 | * Store them in the buf_cancel_table for use in the second pass. | 3725 | * Store them in the buf_cancel_table for use in the second pass. |
3727 | */ | 3726 | */ |
3728 | log->l_buf_cancel_table = | 3727 | log->l_buf_cancel_table = |
3729 | (xfs_buf_cancel_t **)kmem_zalloc(XLOG_BC_TABLE_SIZE * | 3728 | (xfs_buf_cancel_t **)kmem_zalloc(XLOG_BC_TABLE_SIZE * |
3730 | sizeof(xfs_buf_cancel_t*), | 3729 | sizeof(xfs_buf_cancel_t*), |
3731 | KM_SLEEP); | 3730 | KM_SLEEP); |
3732 | error = xlog_do_recovery_pass(log, head_blk, tail_blk, | 3731 | error = xlog_do_recovery_pass(log, head_blk, tail_blk, |
3733 | XLOG_RECOVER_PASS1); | 3732 | XLOG_RECOVER_PASS1); |
3734 | if (error != 0) { | 3733 | if (error != 0) { |
3735 | kmem_free(log->l_buf_cancel_table); | 3734 | kmem_free(log->l_buf_cancel_table); |
3736 | log->l_buf_cancel_table = NULL; | 3735 | log->l_buf_cancel_table = NULL; |
3737 | return error; | 3736 | return error; |
3738 | } | 3737 | } |
3739 | /* | 3738 | /* |
3740 | * Then do a second pass to actually recover the items in the log. | 3739 | * Then do a second pass to actually recover the items in the log. |
3741 | * When it is complete free the table of buf cancel items. | 3740 | * When it is complete free the table of buf cancel items. |
3742 | */ | 3741 | */ |
3743 | error = xlog_do_recovery_pass(log, head_blk, tail_blk, | 3742 | error = xlog_do_recovery_pass(log, head_blk, tail_blk, |
3744 | XLOG_RECOVER_PASS2); | 3743 | XLOG_RECOVER_PASS2); |
3745 | #ifdef DEBUG | 3744 | #ifdef DEBUG |
3746 | if (!error) { | 3745 | if (!error) { |
3747 | int i; | 3746 | int i; |
3748 | 3747 | ||
3749 | for (i = 0; i < XLOG_BC_TABLE_SIZE; i++) | 3748 | for (i = 0; i < XLOG_BC_TABLE_SIZE; i++) |
3750 | ASSERT(log->l_buf_cancel_table[i] == NULL); | 3749 | ASSERT(log->l_buf_cancel_table[i] == NULL); |
3751 | } | 3750 | } |
3752 | #endif /* DEBUG */ | 3751 | #endif /* DEBUG */ |
3753 | 3752 | ||
3754 | kmem_free(log->l_buf_cancel_table); | 3753 | kmem_free(log->l_buf_cancel_table); |
3755 | log->l_buf_cancel_table = NULL; | 3754 | log->l_buf_cancel_table = NULL; |
3756 | 3755 | ||
3757 | return error; | 3756 | return error; |
3758 | } | 3757 | } |
3759 | 3758 | ||
3760 | /* | 3759 | /* |
3761 | * Do the actual recovery | 3760 | * Do the actual recovery |
3762 | */ | 3761 | */ |
3763 | STATIC int | 3762 | STATIC int |
3764 | xlog_do_recover( | 3763 | xlog_do_recover( |
3765 | xlog_t *log, | 3764 | xlog_t *log, |
3766 | xfs_daddr_t head_blk, | 3765 | xfs_daddr_t head_blk, |
3767 | xfs_daddr_t tail_blk) | 3766 | xfs_daddr_t tail_blk) |
3768 | { | 3767 | { |
3769 | int error; | 3768 | int error; |
3770 | xfs_buf_t *bp; | 3769 | xfs_buf_t *bp; |
3771 | xfs_sb_t *sbp; | 3770 | xfs_sb_t *sbp; |
3772 | 3771 | ||
3773 | /* | 3772 | /* |
3774 | * First replay the images in the log. | 3773 | * First replay the images in the log. |
3775 | */ | 3774 | */ |
3776 | error = xlog_do_log_recovery(log, head_blk, tail_blk); | 3775 | error = xlog_do_log_recovery(log, head_blk, tail_blk); |
3777 | if (error) { | 3776 | if (error) { |
3778 | return error; | 3777 | return error; |
3779 | } | 3778 | } |
3780 | 3779 | ||
3781 | XFS_bflush(log->l_mp->m_ddev_targp); | 3780 | XFS_bflush(log->l_mp->m_ddev_targp); |
3782 | 3781 | ||
3783 | /* | 3782 | /* |
3784 | * If IO errors happened during recovery, bail out. | 3783 | * If IO errors happened during recovery, bail out. |
3785 | */ | 3784 | */ |
3786 | if (XFS_FORCED_SHUTDOWN(log->l_mp)) { | 3785 | if (XFS_FORCED_SHUTDOWN(log->l_mp)) { |
3787 | return (EIO); | 3786 | return (EIO); |
3788 | } | 3787 | } |
3789 | 3788 | ||
3790 | /* | 3789 | /* |
3791 | * We now update the tail_lsn since much of the recovery has completed | 3790 | * We now update the tail_lsn since much of the recovery has completed |
3792 | * and there may be space available to use. If there were no extent | 3791 | * and there may be space available to use. If there were no extent |
3793 | * or iunlinks, we can free up the entire log and set the tail_lsn to | 3792 | * or iunlinks, we can free up the entire log and set the tail_lsn to |
3794 | * be the last_sync_lsn. This was set in xlog_find_tail to be the | 3793 | * be the last_sync_lsn. This was set in xlog_find_tail to be the |
3795 | * lsn of the last known good LR on disk. If there are extent frees | 3794 | * lsn of the last known good LR on disk. If there are extent frees |
3796 | * or iunlinks they will have some entries in the AIL; so we look at | 3795 | * or iunlinks they will have some entries in the AIL; so we look at |
3797 | * the AIL to determine how to set the tail_lsn. | 3796 | * the AIL to determine how to set the tail_lsn. |
3798 | */ | 3797 | */ |
3799 | xlog_assign_tail_lsn(log->l_mp); | 3798 | xlog_assign_tail_lsn(log->l_mp); |
3800 | 3799 | ||
3801 | /* | 3800 | /* |
3802 | * Now that we've finished replaying all buffer and inode | 3801 | * Now that we've finished replaying all buffer and inode |
3803 | * updates, re-read in the superblock. | 3802 | * updates, re-read in the superblock. |
3804 | */ | 3803 | */ |
3805 | bp = xfs_getsb(log->l_mp, 0); | 3804 | bp = xfs_getsb(log->l_mp, 0); |
3806 | XFS_BUF_UNDONE(bp); | 3805 | XFS_BUF_UNDONE(bp); |
3807 | ASSERT(!(XFS_BUF_ISWRITE(bp))); | 3806 | ASSERT(!(XFS_BUF_ISWRITE(bp))); |
3808 | ASSERT(!(XFS_BUF_ISDELAYWRITE(bp))); | 3807 | ASSERT(!(XFS_BUF_ISDELAYWRITE(bp))); |
3809 | XFS_BUF_READ(bp); | 3808 | XFS_BUF_READ(bp); |
3810 | XFS_BUF_UNASYNC(bp); | 3809 | XFS_BUF_UNASYNC(bp); |
3811 | xfsbdstrat(log->l_mp, bp); | 3810 | xfsbdstrat(log->l_mp, bp); |
3812 | error = xfs_iowait(bp); | 3811 | error = xfs_iowait(bp); |
3813 | if (error) { | 3812 | if (error) { |
3814 | xfs_ioerror_alert("xlog_do_recover", | 3813 | xfs_ioerror_alert("xlog_do_recover", |
3815 | log->l_mp, bp, XFS_BUF_ADDR(bp)); | 3814 | log->l_mp, bp, XFS_BUF_ADDR(bp)); |
3816 | ASSERT(0); | 3815 | ASSERT(0); |
3817 | xfs_buf_relse(bp); | 3816 | xfs_buf_relse(bp); |
3818 | return error; | 3817 | return error; |
3819 | } | 3818 | } |
3820 | 3819 | ||
3821 | /* Convert superblock from on-disk format */ | 3820 | /* Convert superblock from on-disk format */ |
3822 | sbp = &log->l_mp->m_sb; | 3821 | sbp = &log->l_mp->m_sb; |
3823 | xfs_sb_from_disk(sbp, XFS_BUF_TO_SBP(bp)); | 3822 | xfs_sb_from_disk(sbp, XFS_BUF_TO_SBP(bp)); |
3824 | ASSERT(sbp->sb_magicnum == XFS_SB_MAGIC); | 3823 | ASSERT(sbp->sb_magicnum == XFS_SB_MAGIC); |
3825 | ASSERT(xfs_sb_good_version(sbp)); | 3824 | ASSERT(xfs_sb_good_version(sbp)); |
3826 | xfs_buf_relse(bp); | 3825 | xfs_buf_relse(bp); |
3827 | 3826 | ||
3828 | /* We've re-read the superblock so re-initialize per-cpu counters */ | 3827 | /* We've re-read the superblock so re-initialize per-cpu counters */ |
3829 | xfs_icsb_reinit_counters(log->l_mp); | 3828 | xfs_icsb_reinit_counters(log->l_mp); |
3830 | 3829 | ||
3831 | xlog_recover_check_summary(log); | 3830 | xlog_recover_check_summary(log); |
3832 | 3831 | ||
3833 | /* Normal transactions can now occur */ | 3832 | /* Normal transactions can now occur */ |
3834 | log->l_flags &= ~XLOG_ACTIVE_RECOVERY; | 3833 | log->l_flags &= ~XLOG_ACTIVE_RECOVERY; |
3835 | return 0; | 3834 | return 0; |
3836 | } | 3835 | } |
3837 | 3836 | ||
3838 | /* | 3837 | /* |
3839 | * Perform recovery and re-initialize some log variables in xlog_find_tail. | 3838 | * Perform recovery and re-initialize some log variables in xlog_find_tail. |
3840 | * | 3839 | * |
3841 | * Return error or zero. | 3840 | * Return error or zero. |
3842 | */ | 3841 | */ |
3843 | int | 3842 | int |
3844 | xlog_recover( | 3843 | xlog_recover( |
3845 | xlog_t *log) | 3844 | xlog_t *log) |
3846 | { | 3845 | { |
3847 | xfs_daddr_t head_blk, tail_blk; | 3846 | xfs_daddr_t head_blk, tail_blk; |
3848 | int error; | 3847 | int error; |
3849 | 3848 | ||
3850 | /* find the tail of the log */ | 3849 | /* find the tail of the log */ |
3851 | if ((error = xlog_find_tail(log, &head_blk, &tail_blk))) | 3850 | if ((error = xlog_find_tail(log, &head_blk, &tail_blk))) |
3852 | return error; | 3851 | return error; |
3853 | 3852 | ||
3854 | if (tail_blk != head_blk) { | 3853 | if (tail_blk != head_blk) { |
3855 | /* There used to be a comment here: | 3854 | /* There used to be a comment here: |
3856 | * | 3855 | * |
3857 | * disallow recovery on read-only mounts. note -- mount | 3856 | * disallow recovery on read-only mounts. note -- mount |
3858 | * checks for ENOSPC and turns it into an intelligent | 3857 | * checks for ENOSPC and turns it into an intelligent |
3859 | * error message. | 3858 | * error message. |
3860 | * ...but this is no longer true. Now, unless you specify | 3859 | * ...but this is no longer true. Now, unless you specify |
3861 | * NORECOVERY (in which case this function would never be | 3860 | * NORECOVERY (in which case this function would never be |
3862 | * called), we just go ahead and recover. We do this all | 3861 | * called), we just go ahead and recover. We do this all |
3863 | * under the vfs layer, so we can get away with it unless | 3862 | * under the vfs layer, so we can get away with it unless |
3864 | * the device itself is read-only, in which case we fail. | 3863 | * the device itself is read-only, in which case we fail. |
3865 | */ | 3864 | */ |
3866 | if ((error = xfs_dev_is_read_only(log->l_mp, "recovery"))) { | 3865 | if ((error = xfs_dev_is_read_only(log->l_mp, "recovery"))) { |
3867 | return error; | 3866 | return error; |
3868 | } | 3867 | } |
3869 | 3868 | ||
3870 | cmn_err(CE_NOTE, | 3869 | cmn_err(CE_NOTE, |
3871 | "Starting XFS recovery on filesystem: %s (logdev: %s)", | 3870 | "Starting XFS recovery on filesystem: %s (logdev: %s)", |
3872 | log->l_mp->m_fsname, log->l_mp->m_logname ? | 3871 | log->l_mp->m_fsname, log->l_mp->m_logname ? |
3873 | log->l_mp->m_logname : "internal"); | 3872 | log->l_mp->m_logname : "internal"); |
3874 | 3873 | ||
3875 | error = xlog_do_recover(log, head_blk, tail_blk); | 3874 | error = xlog_do_recover(log, head_blk, tail_blk); |
3876 | log->l_flags |= XLOG_RECOVERY_NEEDED; | 3875 | log->l_flags |= XLOG_RECOVERY_NEEDED; |
3877 | } | 3876 | } |
3878 | return error; | 3877 | return error; |
3879 | } | 3878 | } |
3880 | 3879 | ||
3881 | /* | 3880 | /* |
3882 | * In the first part of recovery we replay inodes and buffers and build | 3881 | * In the first part of recovery we replay inodes and buffers and build |
3883 | * up the list of extent free items which need to be processed. Here | 3882 | * up the list of extent free items which need to be processed. Here |
3884 | * we process the extent free items and clean up the on disk unlinked | 3883 | * we process the extent free items and clean up the on disk unlinked |
3885 | * inode lists. This is separated from the first part of recovery so | 3884 | * inode lists. This is separated from the first part of recovery so |
3886 | * that the root and real-time bitmap inodes can be read in from disk in | 3885 | * that the root and real-time bitmap inodes can be read in from disk in |
3887 | * between the two stages. This is necessary so that we can free space | 3886 | * between the two stages. This is necessary so that we can free space |
3888 | * in the real-time portion of the file system. | 3887 | * in the real-time portion of the file system. |
3889 | */ | 3888 | */ |
3890 | int | 3889 | int |
3891 | xlog_recover_finish( | 3890 | xlog_recover_finish( |
3892 | xlog_t *log) | 3891 | xlog_t *log) |
3893 | { | 3892 | { |
3894 | /* | 3893 | /* |
3895 | * Now we're ready to do the transactions needed for the | 3894 | * Now we're ready to do the transactions needed for the |
3896 | * rest of recovery. Start with completing all the extent | 3895 | * rest of recovery. Start with completing all the extent |
3897 | * free intent records and then process the unlinked inode | 3896 | * free intent records and then process the unlinked inode |
3898 | * lists. At this point, we essentially run in normal mode | 3897 | * lists. At this point, we essentially run in normal mode |
3899 | * except that we're still performing recovery actions | 3898 | * except that we're still performing recovery actions |
3900 | * rather than accepting new requests. | 3899 | * rather than accepting new requests. |
3901 | */ | 3900 | */ |
3902 | if (log->l_flags & XLOG_RECOVERY_NEEDED) { | 3901 | if (log->l_flags & XLOG_RECOVERY_NEEDED) { |
3903 | int error; | 3902 | int error; |
3904 | error = xlog_recover_process_efis(log); | 3903 | error = xlog_recover_process_efis(log); |
3905 | if (error) { | 3904 | if (error) { |
3906 | cmn_err(CE_ALERT, | 3905 | cmn_err(CE_ALERT, |
3907 | "Failed to recover EFIs on filesystem: %s", | 3906 | "Failed to recover EFIs on filesystem: %s", |
3908 | log->l_mp->m_fsname); | 3907 | log->l_mp->m_fsname); |
3909 | return error; | 3908 | return error; |
3910 | } | 3909 | } |
3911 | /* | 3910 | /* |
3912 | * Sync the log to get all the EFIs out of the AIL. | 3911 | * Sync the log to get all the EFIs out of the AIL. |
3913 | * This isn't absolutely necessary, but it helps in | 3912 | * This isn't absolutely necessary, but it helps in |
3914 | * case the unlink transactions would have problems | 3913 | * case the unlink transactions would have problems |
3915 | * pushing the EFIs out of the way. | 3914 | * pushing the EFIs out of the way. |
3916 | */ | 3915 | */ |
3917 | xfs_log_force(log->l_mp, (xfs_lsn_t)0, | 3916 | xfs_log_force(log->l_mp, (xfs_lsn_t)0, |
3918 | (XFS_LOG_FORCE | XFS_LOG_SYNC)); | 3917 | (XFS_LOG_FORCE | XFS_LOG_SYNC)); |
3919 | 3918 | ||
3920 | xlog_recover_process_iunlinks(log); | 3919 | xlog_recover_process_iunlinks(log); |
3921 | 3920 | ||
3922 | xlog_recover_check_summary(log); | 3921 | xlog_recover_check_summary(log); |
3923 | 3922 | ||
3924 | cmn_err(CE_NOTE, | 3923 | cmn_err(CE_NOTE, |
3925 | "Ending XFS recovery on filesystem: %s (logdev: %s)", | 3924 | "Ending XFS recovery on filesystem: %s (logdev: %s)", |
3926 | log->l_mp->m_fsname, log->l_mp->m_logname ? | 3925 | log->l_mp->m_fsname, log->l_mp->m_logname ? |
3927 | log->l_mp->m_logname : "internal"); | 3926 | log->l_mp->m_logname : "internal"); |
3928 | log->l_flags &= ~XLOG_RECOVERY_NEEDED; | 3927 | log->l_flags &= ~XLOG_RECOVERY_NEEDED; |
3929 | } else { | 3928 | } else { |
3930 | cmn_err(CE_DEBUG, | 3929 | cmn_err(CE_DEBUG, |
3931 | "!Ending clean XFS mount for filesystem: %s\n", | 3930 | "!Ending clean XFS mount for filesystem: %s\n", |
3932 | log->l_mp->m_fsname); | 3931 | log->l_mp->m_fsname); |
3933 | } | 3932 | } |
3934 | return 0; | 3933 | return 0; |
3935 | } | 3934 | } |
3936 | 3935 | ||
3937 | 3936 | ||
3938 | #if defined(DEBUG) | 3937 | #if defined(DEBUG) |
3939 | /* | 3938 | /* |
3940 | * Read all of the agf and agi counters and check that they | 3939 | * Read all of the agf and agi counters and check that they |
3941 | * are consistent with the superblock counters. | 3940 | * are consistent with the superblock counters. |
3942 | */ | 3941 | */ |
3943 | void | 3942 | void |
3944 | xlog_recover_check_summary( | 3943 | xlog_recover_check_summary( |
3945 | xlog_t *log) | 3944 | xlog_t *log) |
3946 | { | 3945 | { |
3947 | xfs_mount_t *mp; | 3946 | xfs_mount_t *mp; |
3948 | xfs_agf_t *agfp; | 3947 | xfs_agf_t *agfp; |
3949 | xfs_buf_t *agfbp; | 3948 | xfs_buf_t *agfbp; |
3950 | xfs_buf_t *agibp; | 3949 | xfs_buf_t *agibp; |
3951 | xfs_buf_t *sbbp; | 3950 | xfs_buf_t *sbbp; |
3952 | #ifdef XFS_LOUD_RECOVERY | 3951 | #ifdef XFS_LOUD_RECOVERY |
3953 | xfs_sb_t *sbp; | 3952 | xfs_sb_t *sbp; |
3954 | #endif | 3953 | #endif |
3955 | xfs_agnumber_t agno; | 3954 | xfs_agnumber_t agno; |
3956 | __uint64_t freeblks; | 3955 | __uint64_t freeblks; |
3957 | __uint64_t itotal; | 3956 | __uint64_t itotal; |
3958 | __uint64_t ifree; | 3957 | __uint64_t ifree; |
3959 | int error; | 3958 | int error; |
3960 | 3959 | ||
3961 | mp = log->l_mp; | 3960 | mp = log->l_mp; |
3962 | 3961 | ||
3963 | freeblks = 0LL; | 3962 | freeblks = 0LL; |
3964 | itotal = 0LL; | 3963 | itotal = 0LL; |
3965 | ifree = 0LL; | 3964 | ifree = 0LL; |
3966 | for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) { | 3965 | for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) { |
3967 | error = xfs_read_agf(mp, NULL, agno, 0, &agfbp); | 3966 | error = xfs_read_agf(mp, NULL, agno, 0, &agfbp); |
3968 | if (error) { | 3967 | if (error) { |
3969 | xfs_fs_cmn_err(CE_ALERT, mp, | 3968 | xfs_fs_cmn_err(CE_ALERT, mp, |
3970 | "xlog_recover_check_summary(agf)" | 3969 | "xlog_recover_check_summary(agf)" |
3971 | "agf read failed agno %d error %d", | 3970 | "agf read failed agno %d error %d", |
3972 | agno, error); | 3971 | agno, error); |
3973 | } else { | 3972 | } else { |
3974 | agfp = XFS_BUF_TO_AGF(agfbp); | 3973 | agfp = XFS_BUF_TO_AGF(agfbp); |
3975 | freeblks += be32_to_cpu(agfp->agf_freeblks) + | 3974 | freeblks += be32_to_cpu(agfp->agf_freeblks) + |
3976 | be32_to_cpu(agfp->agf_flcount); | 3975 | be32_to_cpu(agfp->agf_flcount); |
3977 | xfs_buf_relse(agfbp); | 3976 | xfs_buf_relse(agfbp); |
3978 | } | 3977 | } |
3979 | 3978 | ||
3980 | error = xfs_read_agi(mp, NULL, agno, &agibp); | 3979 | error = xfs_read_agi(mp, NULL, agno, &agibp); |
3981 | if (!error) { | 3980 | if (!error) { |
3982 | struct xfs_agi *agi = XFS_BUF_TO_AGI(agibp); | 3981 | struct xfs_agi *agi = XFS_BUF_TO_AGI(agibp); |
3983 | 3982 | ||
3984 | itotal += be32_to_cpu(agi->agi_count); | 3983 | itotal += be32_to_cpu(agi->agi_count); |
3985 | ifree += be32_to_cpu(agi->agi_freecount); | 3984 | ifree += be32_to_cpu(agi->agi_freecount); |
3986 | xfs_buf_relse(agibp); | 3985 | xfs_buf_relse(agibp); |
3987 | } | 3986 | } |
3988 | } | 3987 | } |
3989 | 3988 | ||
3990 | sbbp = xfs_getsb(mp, 0); | 3989 | sbbp = xfs_getsb(mp, 0); |
3991 | #ifdef XFS_LOUD_RECOVERY | 3990 | #ifdef XFS_LOUD_RECOVERY |
3992 | sbp = &mp->m_sb; | 3991 | sbp = &mp->m_sb; |
3993 | xfs_sb_from_disk(sbp, XFS_BUF_TO_SBP(sbbp)); | 3992 | xfs_sb_from_disk(sbp, XFS_BUF_TO_SBP(sbbp)); |
3994 | cmn_err(CE_NOTE, | 3993 | cmn_err(CE_NOTE, |
3995 | "xlog_recover_check_summary: sb_icount %Lu itotal %Lu", | 3994 | "xlog_recover_check_summary: sb_icount %Lu itotal %Lu", |
3996 | sbp->sb_icount, itotal); | 3995 | sbp->sb_icount, itotal); |
3997 | cmn_err(CE_NOTE, | 3996 | cmn_err(CE_NOTE, |
3998 | "xlog_recover_check_summary: sb_ifree %Lu itotal %Lu", | 3997 | "xlog_recover_check_summary: sb_ifree %Lu itotal %Lu", |
3999 | sbp->sb_ifree, ifree); | 3998 | sbp->sb_ifree, ifree); |
4000 | cmn_err(CE_NOTE, | 3999 | cmn_err(CE_NOTE, |
4001 | "xlog_recover_check_summary: sb_fdblocks %Lu freeblks %Lu", | 4000 | "xlog_recover_check_summary: sb_fdblocks %Lu freeblks %Lu", |
4002 | sbp->sb_fdblocks, freeblks); | 4001 | sbp->sb_fdblocks, freeblks); |
4003 | #if 0 | 4002 | #if 0 |
4004 | /* | 4003 | /* |
4005 | * This is turned off until I account for the allocation | 4004 | * This is turned off until I account for the allocation |
4006 | * btree blocks which live in free space. | 4005 | * btree blocks which live in free space. |
4007 | */ | 4006 | */ |
4008 | ASSERT(sbp->sb_icount == itotal); | 4007 | ASSERT(sbp->sb_icount == itotal); |
4009 | ASSERT(sbp->sb_ifree == ifree); | 4008 | ASSERT(sbp->sb_ifree == ifree); |
4010 | ASSERT(sbp->sb_fdblocks == freeblks); | 4009 | ASSERT(sbp->sb_fdblocks == freeblks); |
4011 | #endif | 4010 | #endif |
4012 | #endif | 4011 | #endif |
4013 | xfs_buf_relse(sbbp); | 4012 | xfs_buf_relse(sbbp); |
4014 | } | 4013 | } |
4015 | #endif /* DEBUG */ | 4014 | #endif /* DEBUG */ |
4016 | 4015 |