Commit 3fe58f30b4fc3f8a9084b035a02bc0c67bee8d00

Authored by Christoph Hellwig
Committed by Ben Myers
1 parent 983d09ffe3

xfs: add CRC checks for quota blocks

Use the reserved space in struct xfs_dqblk to store a UUID and a crc
for the quota blocks.

[dchinner@redhat.com] Add a LSN field and update for current verifier
infrastructure.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Ben Myers <bpm@sgi.com>
Signed-off-by: Ben Myers <bpm@sgi.com>

Showing 5 changed files with 141 additions and 17 deletions Inline Diff

1 /* 1 /*
2 * Copyright (c) 2000-2003 Silicon Graphics, Inc. 2 * Copyright (c) 2000-2003 Silicon Graphics, Inc.
3 * All Rights Reserved. 3 * All Rights Reserved.
4 * 4 *
5 * This program is free software; you can redistribute it and/or 5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as 6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation. 7 * published by the Free Software Foundation.
8 * 8 *
9 * This program is distributed in the hope that it would be useful, 9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details. 12 * GNU General Public License for more details.
13 * 13 *
14 * You should have received a copy of the GNU General Public License 14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation, 15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */ 17 */
18 #include "xfs.h" 18 #include "xfs.h"
19 #include "xfs_fs.h" 19 #include "xfs_fs.h"
20 #include "xfs_bit.h" 20 #include "xfs_bit.h"
21 #include "xfs_log.h" 21 #include "xfs_log.h"
22 #include "xfs_trans.h" 22 #include "xfs_trans.h"
23 #include "xfs_sb.h" 23 #include "xfs_sb.h"
24 #include "xfs_ag.h" 24 #include "xfs_ag.h"
25 #include "xfs_alloc.h" 25 #include "xfs_alloc.h"
26 #include "xfs_quota.h" 26 #include "xfs_quota.h"
27 #include "xfs_mount.h" 27 #include "xfs_mount.h"
28 #include "xfs_bmap_btree.h" 28 #include "xfs_bmap_btree.h"
29 #include "xfs_inode.h" 29 #include "xfs_inode.h"
30 #include "xfs_bmap.h" 30 #include "xfs_bmap.h"
31 #include "xfs_rtalloc.h" 31 #include "xfs_rtalloc.h"
32 #include "xfs_error.h" 32 #include "xfs_error.h"
33 #include "xfs_itable.h" 33 #include "xfs_itable.h"
34 #include "xfs_attr.h" 34 #include "xfs_attr.h"
35 #include "xfs_buf_item.h" 35 #include "xfs_buf_item.h"
36 #include "xfs_trans_space.h" 36 #include "xfs_trans_space.h"
37 #include "xfs_trans_priv.h" 37 #include "xfs_trans_priv.h"
38 #include "xfs_qm.h" 38 #include "xfs_qm.h"
39 #include "xfs_cksum.h"
39 #include "xfs_trace.h" 40 #include "xfs_trace.h"
40 41
41 /* 42 /*
42 * Lock order: 43 * Lock order:
43 * 44 *
44 * ip->i_lock 45 * ip->i_lock
45 * qi->qi_tree_lock 46 * qi->qi_tree_lock
46 * dquot->q_qlock (xfs_dqlock() and friends) 47 * dquot->q_qlock (xfs_dqlock() and friends)
47 * dquot->q_flush (xfs_dqflock() and friends) 48 * dquot->q_flush (xfs_dqflock() and friends)
48 * qi->qi_lru_lock 49 * qi->qi_lru_lock
49 * 50 *
50 * If two dquots need to be locked the order is user before group/project, 51 * If two dquots need to be locked the order is user before group/project,
51 * otherwise by the lowest id first, see xfs_dqlock2. 52 * otherwise by the lowest id first, see xfs_dqlock2.
52 */ 53 */
53 54
54 #ifdef DEBUG 55 #ifdef DEBUG
55 xfs_buftarg_t *xfs_dqerror_target; 56 xfs_buftarg_t *xfs_dqerror_target;
56 int xfs_do_dqerror; 57 int xfs_do_dqerror;
57 int xfs_dqreq_num; 58 int xfs_dqreq_num;
58 int xfs_dqerror_mod = 33; 59 int xfs_dqerror_mod = 33;
59 #endif 60 #endif
60 61
61 struct kmem_zone *xfs_qm_dqtrxzone; 62 struct kmem_zone *xfs_qm_dqtrxzone;
62 static struct kmem_zone *xfs_qm_dqzone; 63 static struct kmem_zone *xfs_qm_dqzone;
63 64
64 static struct lock_class_key xfs_dquot_other_class; 65 static struct lock_class_key xfs_dquot_other_class;
65 66
66 /* 67 /*
67 * This is called to free all the memory associated with a dquot 68 * This is called to free all the memory associated with a dquot
68 */ 69 */
69 void 70 void
70 xfs_qm_dqdestroy( 71 xfs_qm_dqdestroy(
71 xfs_dquot_t *dqp) 72 xfs_dquot_t *dqp)
72 { 73 {
73 ASSERT(list_empty(&dqp->q_lru)); 74 ASSERT(list_empty(&dqp->q_lru));
74 75
75 mutex_destroy(&dqp->q_qlock); 76 mutex_destroy(&dqp->q_qlock);
76 kmem_zone_free(xfs_qm_dqzone, dqp); 77 kmem_zone_free(xfs_qm_dqzone, dqp);
77 78
78 XFS_STATS_DEC(xs_qm_dquot); 79 XFS_STATS_DEC(xs_qm_dquot);
79 } 80 }
80 81
81 /* 82 /*
82 * If default limits are in force, push them into the dquot now. 83 * If default limits are in force, push them into the dquot now.
83 * We overwrite the dquot limits only if they are zero and this 84 * We overwrite the dquot limits only if they are zero and this
84 * is not the root dquot. 85 * is not the root dquot.
85 */ 86 */
86 void 87 void
87 xfs_qm_adjust_dqlimits( 88 xfs_qm_adjust_dqlimits(
88 struct xfs_mount *mp, 89 struct xfs_mount *mp,
89 struct xfs_dquot *dq) 90 struct xfs_dquot *dq)
90 { 91 {
91 struct xfs_quotainfo *q = mp->m_quotainfo; 92 struct xfs_quotainfo *q = mp->m_quotainfo;
92 struct xfs_disk_dquot *d = &dq->q_core; 93 struct xfs_disk_dquot *d = &dq->q_core;
93 int prealloc = 0; 94 int prealloc = 0;
94 95
95 ASSERT(d->d_id); 96 ASSERT(d->d_id);
96 97
97 if (q->qi_bsoftlimit && !d->d_blk_softlimit) { 98 if (q->qi_bsoftlimit && !d->d_blk_softlimit) {
98 d->d_blk_softlimit = cpu_to_be64(q->qi_bsoftlimit); 99 d->d_blk_softlimit = cpu_to_be64(q->qi_bsoftlimit);
99 prealloc = 1; 100 prealloc = 1;
100 } 101 }
101 if (q->qi_bhardlimit && !d->d_blk_hardlimit) { 102 if (q->qi_bhardlimit && !d->d_blk_hardlimit) {
102 d->d_blk_hardlimit = cpu_to_be64(q->qi_bhardlimit); 103 d->d_blk_hardlimit = cpu_to_be64(q->qi_bhardlimit);
103 prealloc = 1; 104 prealloc = 1;
104 } 105 }
105 if (q->qi_isoftlimit && !d->d_ino_softlimit) 106 if (q->qi_isoftlimit && !d->d_ino_softlimit)
106 d->d_ino_softlimit = cpu_to_be64(q->qi_isoftlimit); 107 d->d_ino_softlimit = cpu_to_be64(q->qi_isoftlimit);
107 if (q->qi_ihardlimit && !d->d_ino_hardlimit) 108 if (q->qi_ihardlimit && !d->d_ino_hardlimit)
108 d->d_ino_hardlimit = cpu_to_be64(q->qi_ihardlimit); 109 d->d_ino_hardlimit = cpu_to_be64(q->qi_ihardlimit);
109 if (q->qi_rtbsoftlimit && !d->d_rtb_softlimit) 110 if (q->qi_rtbsoftlimit && !d->d_rtb_softlimit)
110 d->d_rtb_softlimit = cpu_to_be64(q->qi_rtbsoftlimit); 111 d->d_rtb_softlimit = cpu_to_be64(q->qi_rtbsoftlimit);
111 if (q->qi_rtbhardlimit && !d->d_rtb_hardlimit) 112 if (q->qi_rtbhardlimit && !d->d_rtb_hardlimit)
112 d->d_rtb_hardlimit = cpu_to_be64(q->qi_rtbhardlimit); 113 d->d_rtb_hardlimit = cpu_to_be64(q->qi_rtbhardlimit);
113 114
114 if (prealloc) 115 if (prealloc)
115 xfs_dquot_set_prealloc_limits(dq); 116 xfs_dquot_set_prealloc_limits(dq);
116 } 117 }
117 118
118 /* 119 /*
119 * Check the limits and timers of a dquot and start or reset timers 120 * Check the limits and timers of a dquot and start or reset timers
120 * if necessary. 121 * if necessary.
121 * This gets called even when quota enforcement is OFF, which makes our 122 * This gets called even when quota enforcement is OFF, which makes our
122 * life a little less complicated. (We just don't reject any quota 123 * life a little less complicated. (We just don't reject any quota
123 * reservations in that case, when enforcement is off). 124 * reservations in that case, when enforcement is off).
124 * We also return 0 as the values of the timers in Q_GETQUOTA calls, when 125 * We also return 0 as the values of the timers in Q_GETQUOTA calls, when
125 * enforcement's off. 126 * enforcement's off.
126 * In contrast, warnings are a little different in that they don't 127 * In contrast, warnings are a little different in that they don't
127 * 'automatically' get started when limits get exceeded. They do 128 * 'automatically' get started when limits get exceeded. They do
128 * get reset to zero, however, when we find the count to be under 129 * get reset to zero, however, when we find the count to be under
129 * the soft limit (they are only ever set non-zero via userspace). 130 * the soft limit (they are only ever set non-zero via userspace).
130 */ 131 */
131 void 132 void
132 xfs_qm_adjust_dqtimers( 133 xfs_qm_adjust_dqtimers(
133 xfs_mount_t *mp, 134 xfs_mount_t *mp,
134 xfs_disk_dquot_t *d) 135 xfs_disk_dquot_t *d)
135 { 136 {
136 ASSERT(d->d_id); 137 ASSERT(d->d_id);
137 138
138 #ifdef DEBUG 139 #ifdef DEBUG
139 if (d->d_blk_hardlimit) 140 if (d->d_blk_hardlimit)
140 ASSERT(be64_to_cpu(d->d_blk_softlimit) <= 141 ASSERT(be64_to_cpu(d->d_blk_softlimit) <=
141 be64_to_cpu(d->d_blk_hardlimit)); 142 be64_to_cpu(d->d_blk_hardlimit));
142 if (d->d_ino_hardlimit) 143 if (d->d_ino_hardlimit)
143 ASSERT(be64_to_cpu(d->d_ino_softlimit) <= 144 ASSERT(be64_to_cpu(d->d_ino_softlimit) <=
144 be64_to_cpu(d->d_ino_hardlimit)); 145 be64_to_cpu(d->d_ino_hardlimit));
145 if (d->d_rtb_hardlimit) 146 if (d->d_rtb_hardlimit)
146 ASSERT(be64_to_cpu(d->d_rtb_softlimit) <= 147 ASSERT(be64_to_cpu(d->d_rtb_softlimit) <=
147 be64_to_cpu(d->d_rtb_hardlimit)); 148 be64_to_cpu(d->d_rtb_hardlimit));
148 #endif 149 #endif
149 150
150 if (!d->d_btimer) { 151 if (!d->d_btimer) {
151 if ((d->d_blk_softlimit && 152 if ((d->d_blk_softlimit &&
152 (be64_to_cpu(d->d_bcount) > 153 (be64_to_cpu(d->d_bcount) >
153 be64_to_cpu(d->d_blk_softlimit))) || 154 be64_to_cpu(d->d_blk_softlimit))) ||
154 (d->d_blk_hardlimit && 155 (d->d_blk_hardlimit &&
155 (be64_to_cpu(d->d_bcount) > 156 (be64_to_cpu(d->d_bcount) >
156 be64_to_cpu(d->d_blk_hardlimit)))) { 157 be64_to_cpu(d->d_blk_hardlimit)))) {
157 d->d_btimer = cpu_to_be32(get_seconds() + 158 d->d_btimer = cpu_to_be32(get_seconds() +
158 mp->m_quotainfo->qi_btimelimit); 159 mp->m_quotainfo->qi_btimelimit);
159 } else { 160 } else {
160 d->d_bwarns = 0; 161 d->d_bwarns = 0;
161 } 162 }
162 } else { 163 } else {
163 if ((!d->d_blk_softlimit || 164 if ((!d->d_blk_softlimit ||
164 (be64_to_cpu(d->d_bcount) <= 165 (be64_to_cpu(d->d_bcount) <=
165 be64_to_cpu(d->d_blk_softlimit))) && 166 be64_to_cpu(d->d_blk_softlimit))) &&
166 (!d->d_blk_hardlimit || 167 (!d->d_blk_hardlimit ||
167 (be64_to_cpu(d->d_bcount) <= 168 (be64_to_cpu(d->d_bcount) <=
168 be64_to_cpu(d->d_blk_hardlimit)))) { 169 be64_to_cpu(d->d_blk_hardlimit)))) {
169 d->d_btimer = 0; 170 d->d_btimer = 0;
170 } 171 }
171 } 172 }
172 173
173 if (!d->d_itimer) { 174 if (!d->d_itimer) {
174 if ((d->d_ino_softlimit && 175 if ((d->d_ino_softlimit &&
175 (be64_to_cpu(d->d_icount) > 176 (be64_to_cpu(d->d_icount) >
176 be64_to_cpu(d->d_ino_softlimit))) || 177 be64_to_cpu(d->d_ino_softlimit))) ||
177 (d->d_ino_hardlimit && 178 (d->d_ino_hardlimit &&
178 (be64_to_cpu(d->d_icount) > 179 (be64_to_cpu(d->d_icount) >
179 be64_to_cpu(d->d_ino_hardlimit)))) { 180 be64_to_cpu(d->d_ino_hardlimit)))) {
180 d->d_itimer = cpu_to_be32(get_seconds() + 181 d->d_itimer = cpu_to_be32(get_seconds() +
181 mp->m_quotainfo->qi_itimelimit); 182 mp->m_quotainfo->qi_itimelimit);
182 } else { 183 } else {
183 d->d_iwarns = 0; 184 d->d_iwarns = 0;
184 } 185 }
185 } else { 186 } else {
186 if ((!d->d_ino_softlimit || 187 if ((!d->d_ino_softlimit ||
187 (be64_to_cpu(d->d_icount) <= 188 (be64_to_cpu(d->d_icount) <=
188 be64_to_cpu(d->d_ino_softlimit))) && 189 be64_to_cpu(d->d_ino_softlimit))) &&
189 (!d->d_ino_hardlimit || 190 (!d->d_ino_hardlimit ||
190 (be64_to_cpu(d->d_icount) <= 191 (be64_to_cpu(d->d_icount) <=
191 be64_to_cpu(d->d_ino_hardlimit)))) { 192 be64_to_cpu(d->d_ino_hardlimit)))) {
192 d->d_itimer = 0; 193 d->d_itimer = 0;
193 } 194 }
194 } 195 }
195 196
196 if (!d->d_rtbtimer) { 197 if (!d->d_rtbtimer) {
197 if ((d->d_rtb_softlimit && 198 if ((d->d_rtb_softlimit &&
198 (be64_to_cpu(d->d_rtbcount) > 199 (be64_to_cpu(d->d_rtbcount) >
199 be64_to_cpu(d->d_rtb_softlimit))) || 200 be64_to_cpu(d->d_rtb_softlimit))) ||
200 (d->d_rtb_hardlimit && 201 (d->d_rtb_hardlimit &&
201 (be64_to_cpu(d->d_rtbcount) > 202 (be64_to_cpu(d->d_rtbcount) >
202 be64_to_cpu(d->d_rtb_hardlimit)))) { 203 be64_to_cpu(d->d_rtb_hardlimit)))) {
203 d->d_rtbtimer = cpu_to_be32(get_seconds() + 204 d->d_rtbtimer = cpu_to_be32(get_seconds() +
204 mp->m_quotainfo->qi_rtbtimelimit); 205 mp->m_quotainfo->qi_rtbtimelimit);
205 } else { 206 } else {
206 d->d_rtbwarns = 0; 207 d->d_rtbwarns = 0;
207 } 208 }
208 } else { 209 } else {
209 if ((!d->d_rtb_softlimit || 210 if ((!d->d_rtb_softlimit ||
210 (be64_to_cpu(d->d_rtbcount) <= 211 (be64_to_cpu(d->d_rtbcount) <=
211 be64_to_cpu(d->d_rtb_softlimit))) && 212 be64_to_cpu(d->d_rtb_softlimit))) &&
212 (!d->d_rtb_hardlimit || 213 (!d->d_rtb_hardlimit ||
213 (be64_to_cpu(d->d_rtbcount) <= 214 (be64_to_cpu(d->d_rtbcount) <=
214 be64_to_cpu(d->d_rtb_hardlimit)))) { 215 be64_to_cpu(d->d_rtb_hardlimit)))) {
215 d->d_rtbtimer = 0; 216 d->d_rtbtimer = 0;
216 } 217 }
217 } 218 }
218 } 219 }
219 220
220 /* 221 /*
221 * initialize a buffer full of dquots and log the whole thing 222 * initialize a buffer full of dquots and log the whole thing
222 */ 223 */
223 STATIC void 224 STATIC void
224 xfs_qm_init_dquot_blk( 225 xfs_qm_init_dquot_blk(
225 xfs_trans_t *tp, 226 xfs_trans_t *tp,
226 xfs_mount_t *mp, 227 xfs_mount_t *mp,
227 xfs_dqid_t id, 228 xfs_dqid_t id,
228 uint type, 229 uint type,
229 xfs_buf_t *bp) 230 xfs_buf_t *bp)
230 { 231 {
231 struct xfs_quotainfo *q = mp->m_quotainfo; 232 struct xfs_quotainfo *q = mp->m_quotainfo;
232 xfs_dqblk_t *d; 233 xfs_dqblk_t *d;
233 int curid, i; 234 int curid, i;
234 235
235 ASSERT(tp); 236 ASSERT(tp);
236 ASSERT(xfs_buf_islocked(bp)); 237 ASSERT(xfs_buf_islocked(bp));
237 238
238 d = bp->b_addr; 239 d = bp->b_addr;
239 240
240 /* 241 /*
241 * ID of the first dquot in the block - id's are zero based. 242 * ID of the first dquot in the block - id's are zero based.
242 */ 243 */
243 curid = id - (id % q->qi_dqperchunk); 244 curid = id - (id % q->qi_dqperchunk);
244 ASSERT(curid >= 0); 245 ASSERT(curid >= 0);
245 memset(d, 0, BBTOB(q->qi_dqchunklen)); 246 memset(d, 0, BBTOB(q->qi_dqchunklen));
246 for (i = 0; i < q->qi_dqperchunk; i++, d++, curid++) { 247 for (i = 0; i < q->qi_dqperchunk; i++, d++, curid++) {
247 d->dd_diskdq.d_magic = cpu_to_be16(XFS_DQUOT_MAGIC); 248 d->dd_diskdq.d_magic = cpu_to_be16(XFS_DQUOT_MAGIC);
248 d->dd_diskdq.d_version = XFS_DQUOT_VERSION; 249 d->dd_diskdq.d_version = XFS_DQUOT_VERSION;
249 d->dd_diskdq.d_id = cpu_to_be32(curid); 250 d->dd_diskdq.d_id = cpu_to_be32(curid);
250 d->dd_diskdq.d_flags = type; 251 d->dd_diskdq.d_flags = type;
252 if (xfs_sb_version_hascrc(&mp->m_sb))
253 uuid_copy(&d->dd_uuid, &mp->m_sb.sb_uuid);
251 } 254 }
252 255
253 xfs_trans_dquot_buf(tp, bp, 256 xfs_trans_dquot_buf(tp, bp,
254 (type & XFS_DQ_USER ? XFS_BLF_UDQUOT_BUF : 257 (type & XFS_DQ_USER ? XFS_BLF_UDQUOT_BUF :
255 ((type & XFS_DQ_PROJ) ? XFS_BLF_PDQUOT_BUF : 258 ((type & XFS_DQ_PROJ) ? XFS_BLF_PDQUOT_BUF :
256 XFS_BLF_GDQUOT_BUF))); 259 XFS_BLF_GDQUOT_BUF)));
257 xfs_trans_log_buf(tp, bp, 0, BBTOB(q->qi_dqchunklen) - 1); 260 xfs_trans_log_buf(tp, bp, 0, BBTOB(q->qi_dqchunklen) - 1);
258 } 261 }
259 262
260 /* 263 /*
261 * Initialize the dynamic speculative preallocation thresholds. The lo/hi 264 * Initialize the dynamic speculative preallocation thresholds. The lo/hi
262 * watermarks correspond to the soft and hard limits by default. If a soft limit 265 * watermarks correspond to the soft and hard limits by default. If a soft limit
263 * is not specified, we use 95% of the hard limit. 266 * is not specified, we use 95% of the hard limit.
264 */ 267 */
265 void 268 void
266 xfs_dquot_set_prealloc_limits(struct xfs_dquot *dqp) 269 xfs_dquot_set_prealloc_limits(struct xfs_dquot *dqp)
267 { 270 {
268 __uint64_t space; 271 __uint64_t space;
269 272
270 dqp->q_prealloc_hi_wmark = be64_to_cpu(dqp->q_core.d_blk_hardlimit); 273 dqp->q_prealloc_hi_wmark = be64_to_cpu(dqp->q_core.d_blk_hardlimit);
271 dqp->q_prealloc_lo_wmark = be64_to_cpu(dqp->q_core.d_blk_softlimit); 274 dqp->q_prealloc_lo_wmark = be64_to_cpu(dqp->q_core.d_blk_softlimit);
272 if (!dqp->q_prealloc_lo_wmark) { 275 if (!dqp->q_prealloc_lo_wmark) {
273 dqp->q_prealloc_lo_wmark = dqp->q_prealloc_hi_wmark; 276 dqp->q_prealloc_lo_wmark = dqp->q_prealloc_hi_wmark;
274 do_div(dqp->q_prealloc_lo_wmark, 100); 277 do_div(dqp->q_prealloc_lo_wmark, 100);
275 dqp->q_prealloc_lo_wmark *= 95; 278 dqp->q_prealloc_lo_wmark *= 95;
276 } 279 }
277 280
278 space = dqp->q_prealloc_hi_wmark; 281 space = dqp->q_prealloc_hi_wmark;
279 282
280 do_div(space, 100); 283 do_div(space, 100);
281 dqp->q_low_space[XFS_QLOWSP_1_PCNT] = space; 284 dqp->q_low_space[XFS_QLOWSP_1_PCNT] = space;
282 dqp->q_low_space[XFS_QLOWSP_3_PCNT] = space * 3; 285 dqp->q_low_space[XFS_QLOWSP_3_PCNT] = space * 3;
283 dqp->q_low_space[XFS_QLOWSP_5_PCNT] = space * 5; 286 dqp->q_low_space[XFS_QLOWSP_5_PCNT] = space * 5;
284 } 287 }
285 288
286 static void 289 STATIC void
290 xfs_dquot_buf_calc_crc(
291 struct xfs_mount *mp,
292 struct xfs_buf *bp)
293 {
294 struct xfs_dqblk *d = (struct xfs_dqblk *)bp->b_addr;
295 int i;
296
297 if (!xfs_sb_version_hascrc(&mp->m_sb))
298 return;
299
300 for (i = 0; i < mp->m_quotainfo->qi_dqperchunk; i++, d++) {
301 xfs_update_cksum((char *)d, sizeof(struct xfs_dqblk),
302 offsetof(struct xfs_dqblk, dd_crc));
303 }
304 }
305
306 STATIC bool
307 xfs_dquot_buf_verify_crc(
308 struct xfs_mount *mp,
309 struct xfs_buf *bp)
310 {
311 struct xfs_dqblk *d = (struct xfs_dqblk *)bp->b_addr;
312 int ndquots;
313 int i;
314
315 if (!xfs_sb_version_hascrc(&mp->m_sb))
316 return true;
317
318 /*
319 * if we are in log recovery, the quota subsystem has not been
320 * initialised so we have no quotainfo structure. In that case, we need
321 * to manually calculate the number of dquots in the buffer.
322 */
323 if (mp->m_quotainfo)
324 ndquots = mp->m_quotainfo->qi_dqperchunk;
325 else
326 ndquots = xfs_qm_calc_dquots_per_chunk(mp,
327 XFS_BB_TO_FSB(mp, bp->b_length));
328
329 for (i = 0; i < ndquots; i++, d++) {
330 if (!xfs_verify_cksum((char *)d, sizeof(struct xfs_dqblk),
331 offsetof(struct xfs_dqblk, dd_crc)))
332 return false;
333 if (!uuid_equal(&d->dd_uuid, &mp->m_sb.sb_uuid))
334 return false;
335 }
336
337 return true;
338 }
339
340 STATIC bool
287 xfs_dquot_buf_verify( 341 xfs_dquot_buf_verify(
342 struct xfs_mount *mp,
288 struct xfs_buf *bp) 343 struct xfs_buf *bp)
289 { 344 {
290 struct xfs_mount *mp = bp->b_target->bt_mount;
291 struct xfs_dqblk *d = (struct xfs_dqblk *)bp->b_addr; 345 struct xfs_dqblk *d = (struct xfs_dqblk *)bp->b_addr;
292 struct xfs_disk_dquot *ddq;
293 xfs_dqid_t id = 0; 346 xfs_dqid_t id = 0;
347 int ndquots;
294 int i; 348 int i;
295 349
296 /* 350 /*
351 * if we are in log recovery, the quota subsystem has not been
352 * initialised so we have no quotainfo structure. In that case, we need
353 * to manually calculate the number of dquots in the buffer.
354 */
355 if (mp->m_quotainfo)
356 ndquots = mp->m_quotainfo->qi_dqperchunk;
357 else
358 ndquots = xfs_qm_calc_dquots_per_chunk(mp, bp->b_length);
359
360 /*
297 * On the first read of the buffer, verify that each dquot is valid. 361 * On the first read of the buffer, verify that each dquot is valid.
298 * We don't know what the id of the dquot is supposed to be, just that 362 * We don't know what the id of the dquot is supposed to be, just that
299 * they should be increasing monotonically within the buffer. If the 363 * they should be increasing monotonically within the buffer. If the
300 * first id is corrupt, then it will fail on the second dquot in the 364 * first id is corrupt, then it will fail on the second dquot in the
301 * buffer so corruptions could point to the wrong dquot in this case. 365 * buffer so corruptions could point to the wrong dquot in this case.
302 */ 366 */
303 for (i = 0; i < mp->m_quotainfo->qi_dqperchunk; i++) { 367 for (i = 0; i < ndquots; i++) {
304 int error; 368 struct xfs_disk_dquot *ddq;
369 int error;
305 370
306 ddq = &d[i].dd_diskdq; 371 ddq = &d[i].dd_diskdq;
307 372
308 if (i == 0) 373 if (i == 0)
309 id = be32_to_cpu(ddq->d_id); 374 id = be32_to_cpu(ddq->d_id);
310 375
311 error = xfs_qm_dqcheck(mp, ddq, id + i, 0, XFS_QMOPT_DOWARN, 376 error = xfs_qm_dqcheck(mp, ddq, id + i, 0, XFS_QMOPT_DOWARN,
312 "xfs_dquot_read_verify"); 377 "xfs_dquot_buf_verify");
313 if (error) { 378 if (error)
314 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, d); 379 return false;
315 xfs_buf_ioerror(bp, EFSCORRUPTED);
316 break;
317 }
318 } 380 }
381 return true;
319 } 382 }
320 383
321 static void 384 static void
322 xfs_dquot_buf_read_verify( 385 xfs_dquot_buf_read_verify(
323 struct xfs_buf *bp) 386 struct xfs_buf *bp)
324 { 387 {
325 xfs_dquot_buf_verify(bp); 388 struct xfs_mount *mp = bp->b_target->bt_mount;
389
390 if (!xfs_dquot_buf_verify_crc(mp, bp) || !xfs_dquot_buf_verify(mp, bp)) {
391 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
392 xfs_buf_ioerror(bp, EFSCORRUPTED);
393 }
326 } 394 }
327 395
328 void 396 void
329 xfs_dquot_buf_write_verify( 397 xfs_dquot_buf_write_verify(
330 struct xfs_buf *bp) 398 struct xfs_buf *bp)
331 { 399 {
332 xfs_dquot_buf_verify(bp); 400 struct xfs_mount *mp = bp->b_target->bt_mount;
401
402 if (!xfs_dquot_buf_verify(mp, bp)) {
403 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
404 xfs_buf_ioerror(bp, EFSCORRUPTED);
405 return;
406 }
407 xfs_dquot_buf_calc_crc(mp, bp);
333 } 408 }
334 409
335 const struct xfs_buf_ops xfs_dquot_buf_ops = { 410 const struct xfs_buf_ops xfs_dquot_buf_ops = {
336 .verify_read = xfs_dquot_buf_read_verify, 411 .verify_read = xfs_dquot_buf_read_verify,
337 .verify_write = xfs_dquot_buf_write_verify, 412 .verify_write = xfs_dquot_buf_write_verify,
338 }; 413 };
339 414
340 /* 415 /*
341 * Allocate a block and fill it with dquots. 416 * Allocate a block and fill it with dquots.
342 * This is called when the bmapi finds a hole. 417 * This is called when the bmapi finds a hole.
343 */ 418 */
344 STATIC int 419 STATIC int
345 xfs_qm_dqalloc( 420 xfs_qm_dqalloc(
346 xfs_trans_t **tpp, 421 xfs_trans_t **tpp,
347 xfs_mount_t *mp, 422 xfs_mount_t *mp,
348 xfs_dquot_t *dqp, 423 xfs_dquot_t *dqp,
349 xfs_inode_t *quotip, 424 xfs_inode_t *quotip,
350 xfs_fileoff_t offset_fsb, 425 xfs_fileoff_t offset_fsb,
351 xfs_buf_t **O_bpp) 426 xfs_buf_t **O_bpp)
352 { 427 {
353 xfs_fsblock_t firstblock; 428 xfs_fsblock_t firstblock;
354 xfs_bmap_free_t flist; 429 xfs_bmap_free_t flist;
355 xfs_bmbt_irec_t map; 430 xfs_bmbt_irec_t map;
356 int nmaps, error, committed; 431 int nmaps, error, committed;
357 xfs_buf_t *bp; 432 xfs_buf_t *bp;
358 xfs_trans_t *tp = *tpp; 433 xfs_trans_t *tp = *tpp;
359 434
360 ASSERT(tp != NULL); 435 ASSERT(tp != NULL);
361 436
362 trace_xfs_dqalloc(dqp); 437 trace_xfs_dqalloc(dqp);
363 438
364 /* 439 /*
365 * Initialize the bmap freelist prior to calling bmapi code. 440 * Initialize the bmap freelist prior to calling bmapi code.
366 */ 441 */
367 xfs_bmap_init(&flist, &firstblock); 442 xfs_bmap_init(&flist, &firstblock);
368 xfs_ilock(quotip, XFS_ILOCK_EXCL); 443 xfs_ilock(quotip, XFS_ILOCK_EXCL);
369 /* 444 /*
370 * Return if this type of quotas is turned off while we didn't 445 * Return if this type of quotas is turned off while we didn't
371 * have an inode lock 446 * have an inode lock
372 */ 447 */
373 if (!xfs_this_quota_on(dqp->q_mount, dqp->dq_flags)) { 448 if (!xfs_this_quota_on(dqp->q_mount, dqp->dq_flags)) {
374 xfs_iunlock(quotip, XFS_ILOCK_EXCL); 449 xfs_iunlock(quotip, XFS_ILOCK_EXCL);
375 return (ESRCH); 450 return (ESRCH);
376 } 451 }
377 452
378 xfs_trans_ijoin(tp, quotip, XFS_ILOCK_EXCL); 453 xfs_trans_ijoin(tp, quotip, XFS_ILOCK_EXCL);
379 nmaps = 1; 454 nmaps = 1;
380 error = xfs_bmapi_write(tp, quotip, offset_fsb, 455 error = xfs_bmapi_write(tp, quotip, offset_fsb,
381 XFS_DQUOT_CLUSTER_SIZE_FSB, XFS_BMAPI_METADATA, 456 XFS_DQUOT_CLUSTER_SIZE_FSB, XFS_BMAPI_METADATA,
382 &firstblock, XFS_QM_DQALLOC_SPACE_RES(mp), 457 &firstblock, XFS_QM_DQALLOC_SPACE_RES(mp),
383 &map, &nmaps, &flist); 458 &map, &nmaps, &flist);
384 if (error) 459 if (error)
385 goto error0; 460 goto error0;
386 ASSERT(map.br_blockcount == XFS_DQUOT_CLUSTER_SIZE_FSB); 461 ASSERT(map.br_blockcount == XFS_DQUOT_CLUSTER_SIZE_FSB);
387 ASSERT(nmaps == 1); 462 ASSERT(nmaps == 1);
388 ASSERT((map.br_startblock != DELAYSTARTBLOCK) && 463 ASSERT((map.br_startblock != DELAYSTARTBLOCK) &&
389 (map.br_startblock != HOLESTARTBLOCK)); 464 (map.br_startblock != HOLESTARTBLOCK));
390 465
391 /* 466 /*
392 * Keep track of the blkno to save a lookup later 467 * Keep track of the blkno to save a lookup later
393 */ 468 */
394 dqp->q_blkno = XFS_FSB_TO_DADDR(mp, map.br_startblock); 469 dqp->q_blkno = XFS_FSB_TO_DADDR(mp, map.br_startblock);
395 470
396 /* now we can just get the buffer (there's nothing to read yet) */ 471 /* now we can just get the buffer (there's nothing to read yet) */
397 bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, 472 bp = xfs_trans_get_buf(tp, mp->m_ddev_targp,
398 dqp->q_blkno, 473 dqp->q_blkno,
399 mp->m_quotainfo->qi_dqchunklen, 474 mp->m_quotainfo->qi_dqchunklen,
400 0); 475 0);
401 476
402 error = xfs_buf_geterror(bp); 477 error = xfs_buf_geterror(bp);
403 if (error) 478 if (error)
404 goto error1; 479 goto error1;
405 bp->b_ops = &xfs_dquot_buf_ops; 480 bp->b_ops = &xfs_dquot_buf_ops;
406 481
407 /* 482 /*
408 * Make a chunk of dquots out of this buffer and log 483 * Make a chunk of dquots out of this buffer and log
409 * the entire thing. 484 * the entire thing.
410 */ 485 */
411 xfs_qm_init_dquot_blk(tp, mp, be32_to_cpu(dqp->q_core.d_id), 486 xfs_qm_init_dquot_blk(tp, mp, be32_to_cpu(dqp->q_core.d_id),
412 dqp->dq_flags & XFS_DQ_ALLTYPES, bp); 487 dqp->dq_flags & XFS_DQ_ALLTYPES, bp);
413 488
414 /* 489 /*
415 * xfs_bmap_finish() may commit the current transaction and 490 * xfs_bmap_finish() may commit the current transaction and
416 * start a second transaction if the freelist is not empty. 491 * start a second transaction if the freelist is not empty.
417 * 492 *
418 * Since we still want to modify this buffer, we need to 493 * Since we still want to modify this buffer, we need to
419 * ensure that the buffer is not released on commit of 494 * ensure that the buffer is not released on commit of
420 * the first transaction and ensure the buffer is added to the 495 * the first transaction and ensure the buffer is added to the
421 * second transaction. 496 * second transaction.
422 * 497 *
423 * If there is only one transaction then don't stop the buffer 498 * If there is only one transaction then don't stop the buffer
424 * from being released when it commits later on. 499 * from being released when it commits later on.
425 */ 500 */
426 501
427 xfs_trans_bhold(tp, bp); 502 xfs_trans_bhold(tp, bp);
428 503
429 if ((error = xfs_bmap_finish(tpp, &flist, &committed))) { 504 if ((error = xfs_bmap_finish(tpp, &flist, &committed))) {
430 goto error1; 505 goto error1;
431 } 506 }
432 507
433 if (committed) { 508 if (committed) {
434 tp = *tpp; 509 tp = *tpp;
435 xfs_trans_bjoin(tp, bp); 510 xfs_trans_bjoin(tp, bp);
436 } else { 511 } else {
437 xfs_trans_bhold_release(tp, bp); 512 xfs_trans_bhold_release(tp, bp);
438 } 513 }
439 514
440 *O_bpp = bp; 515 *O_bpp = bp;
441 return 0; 516 return 0;
442 517
443 error1: 518 error1:
444 xfs_bmap_cancel(&flist); 519 xfs_bmap_cancel(&flist);
445 error0: 520 error0:
446 xfs_iunlock(quotip, XFS_ILOCK_EXCL); 521 xfs_iunlock(quotip, XFS_ILOCK_EXCL);
447 522
448 return (error); 523 return (error);
449 } 524 }
450 STATIC int 525 STATIC int
451 xfs_qm_dqrepair( 526 xfs_qm_dqrepair(
452 struct xfs_mount *mp, 527 struct xfs_mount *mp,
453 struct xfs_trans *tp, 528 struct xfs_trans *tp,
454 struct xfs_dquot *dqp, 529 struct xfs_dquot *dqp,
455 xfs_dqid_t firstid, 530 xfs_dqid_t firstid,
456 struct xfs_buf **bpp) 531 struct xfs_buf **bpp)
457 { 532 {
458 int error; 533 int error;
459 struct xfs_disk_dquot *ddq; 534 struct xfs_disk_dquot *ddq;
460 struct xfs_dqblk *d; 535 struct xfs_dqblk *d;
461 int i; 536 int i;
462 537
463 /* 538 /*
464 * Read the buffer without verification so we get the corrupted 539 * Read the buffer without verification so we get the corrupted
465 * buffer returned to us. make sure we verify it on write, though. 540 * buffer returned to us. make sure we verify it on write, though.
466 */ 541 */
467 error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, dqp->q_blkno, 542 error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, dqp->q_blkno,
468 mp->m_quotainfo->qi_dqchunklen, 543 mp->m_quotainfo->qi_dqchunklen,
469 0, bpp, NULL); 544 0, bpp, NULL);
470 545
471 if (error) { 546 if (error) {
472 ASSERT(*bpp == NULL); 547 ASSERT(*bpp == NULL);
473 return XFS_ERROR(error); 548 return XFS_ERROR(error);
474 } 549 }
475 (*bpp)->b_ops = &xfs_dquot_buf_ops; 550 (*bpp)->b_ops = &xfs_dquot_buf_ops;
476 551
477 ASSERT(xfs_buf_islocked(*bpp)); 552 ASSERT(xfs_buf_islocked(*bpp));
478 d = (struct xfs_dqblk *)(*bpp)->b_addr; 553 d = (struct xfs_dqblk *)(*bpp)->b_addr;
479 554
480 /* Do the actual repair of dquots in this buffer */ 555 /* Do the actual repair of dquots in this buffer */
481 for (i = 0; i < mp->m_quotainfo->qi_dqperchunk; i++) { 556 for (i = 0; i < mp->m_quotainfo->qi_dqperchunk; i++) {
482 ddq = &d[i].dd_diskdq; 557 ddq = &d[i].dd_diskdq;
483 error = xfs_qm_dqcheck(mp, ddq, firstid + i, 558 error = xfs_qm_dqcheck(mp, ddq, firstid + i,
484 dqp->dq_flags & XFS_DQ_ALLTYPES, 559 dqp->dq_flags & XFS_DQ_ALLTYPES,
485 XFS_QMOPT_DQREPAIR, "xfs_qm_dqrepair"); 560 XFS_QMOPT_DQREPAIR, "xfs_qm_dqrepair");
486 if (error) { 561 if (error) {
487 /* repair failed, we're screwed */ 562 /* repair failed, we're screwed */
488 xfs_trans_brelse(tp, *bpp); 563 xfs_trans_brelse(tp, *bpp);
489 return XFS_ERROR(EIO); 564 return XFS_ERROR(EIO);
490 } 565 }
491 } 566 }
492 567
493 return 0; 568 return 0;
494 } 569 }
495 570
496 /* 571 /*
497 * Maps a dquot to the buffer containing its on-disk version. 572 * Maps a dquot to the buffer containing its on-disk version.
498 * This returns a ptr to the buffer containing the on-disk dquot 573 * This returns a ptr to the buffer containing the on-disk dquot
499 * in the bpp param, and a ptr to the on-disk dquot within that buffer 574 * in the bpp param, and a ptr to the on-disk dquot within that buffer
500 */ 575 */
501 STATIC int 576 STATIC int
502 xfs_qm_dqtobp( 577 xfs_qm_dqtobp(
503 xfs_trans_t **tpp, 578 xfs_trans_t **tpp,
504 xfs_dquot_t *dqp, 579 xfs_dquot_t *dqp,
505 xfs_disk_dquot_t **O_ddpp, 580 xfs_disk_dquot_t **O_ddpp,
506 xfs_buf_t **O_bpp, 581 xfs_buf_t **O_bpp,
507 uint flags) 582 uint flags)
508 { 583 {
509 xfs_bmbt_irec_t map; 584 xfs_bmbt_irec_t map;
510 int nmaps = 1, error; 585 int nmaps = 1, error;
511 xfs_buf_t *bp; 586 xfs_buf_t *bp;
512 xfs_inode_t *quotip = XFS_DQ_TO_QIP(dqp); 587 xfs_inode_t *quotip = XFS_DQ_TO_QIP(dqp);
513 xfs_mount_t *mp = dqp->q_mount; 588 xfs_mount_t *mp = dqp->q_mount;
514 xfs_dqid_t id = be32_to_cpu(dqp->q_core.d_id); 589 xfs_dqid_t id = be32_to_cpu(dqp->q_core.d_id);
515 xfs_trans_t *tp = (tpp ? *tpp : NULL); 590 xfs_trans_t *tp = (tpp ? *tpp : NULL);
516 591
517 dqp->q_fileoffset = (xfs_fileoff_t)id / mp->m_quotainfo->qi_dqperchunk; 592 dqp->q_fileoffset = (xfs_fileoff_t)id / mp->m_quotainfo->qi_dqperchunk;
518 593
519 xfs_ilock(quotip, XFS_ILOCK_SHARED); 594 xfs_ilock(quotip, XFS_ILOCK_SHARED);
520 if (!xfs_this_quota_on(dqp->q_mount, dqp->dq_flags)) { 595 if (!xfs_this_quota_on(dqp->q_mount, dqp->dq_flags)) {
521 /* 596 /*
522 * Return if this type of quotas is turned off while we 597 * Return if this type of quotas is turned off while we
523 * didn't have the quota inode lock. 598 * didn't have the quota inode lock.
524 */ 599 */
525 xfs_iunlock(quotip, XFS_ILOCK_SHARED); 600 xfs_iunlock(quotip, XFS_ILOCK_SHARED);
526 return ESRCH; 601 return ESRCH;
527 } 602 }
528 603
529 /* 604 /*
530 * Find the block map; no allocations yet 605 * Find the block map; no allocations yet
531 */ 606 */
532 error = xfs_bmapi_read(quotip, dqp->q_fileoffset, 607 error = xfs_bmapi_read(quotip, dqp->q_fileoffset,
533 XFS_DQUOT_CLUSTER_SIZE_FSB, &map, &nmaps, 0); 608 XFS_DQUOT_CLUSTER_SIZE_FSB, &map, &nmaps, 0);
534 609
535 xfs_iunlock(quotip, XFS_ILOCK_SHARED); 610 xfs_iunlock(quotip, XFS_ILOCK_SHARED);
536 if (error) 611 if (error)
537 return error; 612 return error;
538 613
539 ASSERT(nmaps == 1); 614 ASSERT(nmaps == 1);
540 ASSERT(map.br_blockcount == 1); 615 ASSERT(map.br_blockcount == 1);
541 616
542 /* 617 /*
543 * Offset of dquot in the (fixed sized) dquot chunk. 618 * Offset of dquot in the (fixed sized) dquot chunk.
544 */ 619 */
545 dqp->q_bufoffset = (id % mp->m_quotainfo->qi_dqperchunk) * 620 dqp->q_bufoffset = (id % mp->m_quotainfo->qi_dqperchunk) *
546 sizeof(xfs_dqblk_t); 621 sizeof(xfs_dqblk_t);
547 622
548 ASSERT(map.br_startblock != DELAYSTARTBLOCK); 623 ASSERT(map.br_startblock != DELAYSTARTBLOCK);
549 if (map.br_startblock == HOLESTARTBLOCK) { 624 if (map.br_startblock == HOLESTARTBLOCK) {
550 /* 625 /*
551 * We don't allocate unless we're asked to 626 * We don't allocate unless we're asked to
552 */ 627 */
553 if (!(flags & XFS_QMOPT_DQALLOC)) 628 if (!(flags & XFS_QMOPT_DQALLOC))
554 return ENOENT; 629 return ENOENT;
555 630
556 ASSERT(tp); 631 ASSERT(tp);
557 error = xfs_qm_dqalloc(tpp, mp, dqp, quotip, 632 error = xfs_qm_dqalloc(tpp, mp, dqp, quotip,
558 dqp->q_fileoffset, &bp); 633 dqp->q_fileoffset, &bp);
559 if (error) 634 if (error)
560 return error; 635 return error;
561 tp = *tpp; 636 tp = *tpp;
562 } else { 637 } else {
563 trace_xfs_dqtobp_read(dqp); 638 trace_xfs_dqtobp_read(dqp);
564 639
565 /* 640 /*
566 * store the blkno etc so that we don't have to do the 641 * store the blkno etc so that we don't have to do the
567 * mapping all the time 642 * mapping all the time
568 */ 643 */
569 dqp->q_blkno = XFS_FSB_TO_DADDR(mp, map.br_startblock); 644 dqp->q_blkno = XFS_FSB_TO_DADDR(mp, map.br_startblock);
570 645
571 error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, 646 error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp,
572 dqp->q_blkno, 647 dqp->q_blkno,
573 mp->m_quotainfo->qi_dqchunklen, 648 mp->m_quotainfo->qi_dqchunklen,
574 0, &bp, &xfs_dquot_buf_ops); 649 0, &bp, &xfs_dquot_buf_ops);
575 650
576 if (error == EFSCORRUPTED && (flags & XFS_QMOPT_DQREPAIR)) { 651 if (error == EFSCORRUPTED && (flags & XFS_QMOPT_DQREPAIR)) {
577 xfs_dqid_t firstid = (xfs_dqid_t)map.br_startoff * 652 xfs_dqid_t firstid = (xfs_dqid_t)map.br_startoff *
578 mp->m_quotainfo->qi_dqperchunk; 653 mp->m_quotainfo->qi_dqperchunk;
579 ASSERT(bp == NULL); 654 ASSERT(bp == NULL);
580 error = xfs_qm_dqrepair(mp, tp, dqp, firstid, &bp); 655 error = xfs_qm_dqrepair(mp, tp, dqp, firstid, &bp);
581 } 656 }
582 657
583 if (error) { 658 if (error) {
584 ASSERT(bp == NULL); 659 ASSERT(bp == NULL);
585 return XFS_ERROR(error); 660 return XFS_ERROR(error);
586 } 661 }
587 } 662 }
588 663
589 ASSERT(xfs_buf_islocked(bp)); 664 ASSERT(xfs_buf_islocked(bp));
590 *O_bpp = bp; 665 *O_bpp = bp;
591 *O_ddpp = bp->b_addr + dqp->q_bufoffset; 666 *O_ddpp = bp->b_addr + dqp->q_bufoffset;
592 667
593 return (0); 668 return (0);
594 } 669 }
595 670
596 671
597 /* 672 /*
598 * Read in the ondisk dquot using dqtobp() then copy it to an incore version, 673 * Read in the ondisk dquot using dqtobp() then copy it to an incore version,
599 * and release the buffer immediately. 674 * and release the buffer immediately.
600 * 675 *
601 * If XFS_QMOPT_DQALLOC is set, allocate a dquot on disk if it needed. 676 * If XFS_QMOPT_DQALLOC is set, allocate a dquot on disk if it needed.
602 */ 677 */
603 int 678 int
604 xfs_qm_dqread( 679 xfs_qm_dqread(
605 struct xfs_mount *mp, 680 struct xfs_mount *mp,
606 xfs_dqid_t id, 681 xfs_dqid_t id,
607 uint type, 682 uint type,
608 uint flags, 683 uint flags,
609 struct xfs_dquot **O_dqpp) 684 struct xfs_dquot **O_dqpp)
610 { 685 {
611 struct xfs_dquot *dqp; 686 struct xfs_dquot *dqp;
612 struct xfs_disk_dquot *ddqp; 687 struct xfs_disk_dquot *ddqp;
613 struct xfs_buf *bp; 688 struct xfs_buf *bp;
614 struct xfs_trans *tp = NULL; 689 struct xfs_trans *tp = NULL;
615 int error; 690 int error;
616 int cancelflags = 0; 691 int cancelflags = 0;
617 692
618 693
619 dqp = kmem_zone_zalloc(xfs_qm_dqzone, KM_SLEEP); 694 dqp = kmem_zone_zalloc(xfs_qm_dqzone, KM_SLEEP);
620 695
621 dqp->dq_flags = type; 696 dqp->dq_flags = type;
622 dqp->q_core.d_id = cpu_to_be32(id); 697 dqp->q_core.d_id = cpu_to_be32(id);
623 dqp->q_mount = mp; 698 dqp->q_mount = mp;
624 INIT_LIST_HEAD(&dqp->q_lru); 699 INIT_LIST_HEAD(&dqp->q_lru);
625 mutex_init(&dqp->q_qlock); 700 mutex_init(&dqp->q_qlock);
626 init_waitqueue_head(&dqp->q_pinwait); 701 init_waitqueue_head(&dqp->q_pinwait);
627 702
628 /* 703 /*
629 * Because we want to use a counting completion, complete 704 * Because we want to use a counting completion, complete
630 * the flush completion once to allow a single access to 705 * the flush completion once to allow a single access to
631 * the flush completion without blocking. 706 * the flush completion without blocking.
632 */ 707 */
633 init_completion(&dqp->q_flush); 708 init_completion(&dqp->q_flush);
634 complete(&dqp->q_flush); 709 complete(&dqp->q_flush);
635 710
636 /* 711 /*
637 * Make sure group quotas have a different lock class than user 712 * Make sure group quotas have a different lock class than user
638 * quotas. 713 * quotas.
639 */ 714 */
640 if (!(type & XFS_DQ_USER)) 715 if (!(type & XFS_DQ_USER))
641 lockdep_set_class(&dqp->q_qlock, &xfs_dquot_other_class); 716 lockdep_set_class(&dqp->q_qlock, &xfs_dquot_other_class);
642 717
643 XFS_STATS_INC(xs_qm_dquot); 718 XFS_STATS_INC(xs_qm_dquot);
644 719
645 trace_xfs_dqread(dqp); 720 trace_xfs_dqread(dqp);
646 721
647 if (flags & XFS_QMOPT_DQALLOC) { 722 if (flags & XFS_QMOPT_DQALLOC) {
648 tp = xfs_trans_alloc(mp, XFS_TRANS_QM_DQALLOC); 723 tp = xfs_trans_alloc(mp, XFS_TRANS_QM_DQALLOC);
649 error = xfs_trans_reserve(tp, XFS_QM_DQALLOC_SPACE_RES(mp), 724 error = xfs_trans_reserve(tp, XFS_QM_DQALLOC_SPACE_RES(mp),
650 XFS_QM_DQALLOC_LOG_RES(mp), 0, 725 XFS_QM_DQALLOC_LOG_RES(mp), 0,
651 XFS_TRANS_PERM_LOG_RES, 726 XFS_TRANS_PERM_LOG_RES,
652 XFS_WRITE_LOG_COUNT); 727 XFS_WRITE_LOG_COUNT);
653 if (error) 728 if (error)
654 goto error1; 729 goto error1;
655 cancelflags = XFS_TRANS_RELEASE_LOG_RES; 730 cancelflags = XFS_TRANS_RELEASE_LOG_RES;
656 } 731 }
657 732
658 /* 733 /*
659 * get a pointer to the on-disk dquot and the buffer containing it 734 * get a pointer to the on-disk dquot and the buffer containing it
660 * dqp already knows its own type (GROUP/USER). 735 * dqp already knows its own type (GROUP/USER).
661 */ 736 */
662 error = xfs_qm_dqtobp(&tp, dqp, &ddqp, &bp, flags); 737 error = xfs_qm_dqtobp(&tp, dqp, &ddqp, &bp, flags);
663 if (error) { 738 if (error) {
664 /* 739 /*
665 * This can happen if quotas got turned off (ESRCH), 740 * This can happen if quotas got turned off (ESRCH),
666 * or if the dquot didn't exist on disk and we ask to 741 * or if the dquot didn't exist on disk and we ask to
667 * allocate (ENOENT). 742 * allocate (ENOENT).
668 */ 743 */
669 trace_xfs_dqread_fail(dqp); 744 trace_xfs_dqread_fail(dqp);
670 cancelflags |= XFS_TRANS_ABORT; 745 cancelflags |= XFS_TRANS_ABORT;
671 goto error1; 746 goto error1;
672 } 747 }
673 748
674 /* copy everything from disk dquot to the incore dquot */ 749 /* copy everything from disk dquot to the incore dquot */
675 memcpy(&dqp->q_core, ddqp, sizeof(xfs_disk_dquot_t)); 750 memcpy(&dqp->q_core, ddqp, sizeof(xfs_disk_dquot_t));
676 xfs_qm_dquot_logitem_init(dqp); 751 xfs_qm_dquot_logitem_init(dqp);
677 752
678 /* 753 /*
679 * Reservation counters are defined as reservation plus current usage 754 * Reservation counters are defined as reservation plus current usage
680 * to avoid having to add every time. 755 * to avoid having to add every time.
681 */ 756 */
682 dqp->q_res_bcount = be64_to_cpu(ddqp->d_bcount); 757 dqp->q_res_bcount = be64_to_cpu(ddqp->d_bcount);
683 dqp->q_res_icount = be64_to_cpu(ddqp->d_icount); 758 dqp->q_res_icount = be64_to_cpu(ddqp->d_icount);
684 dqp->q_res_rtbcount = be64_to_cpu(ddqp->d_rtbcount); 759 dqp->q_res_rtbcount = be64_to_cpu(ddqp->d_rtbcount);
685 760
686 /* initialize the dquot speculative prealloc thresholds */ 761 /* initialize the dquot speculative prealloc thresholds */
687 xfs_dquot_set_prealloc_limits(dqp); 762 xfs_dquot_set_prealloc_limits(dqp);
688 763
689 /* Mark the buf so that this will stay incore a little longer */ 764 /* Mark the buf so that this will stay incore a little longer */
690 xfs_buf_set_ref(bp, XFS_DQUOT_REF); 765 xfs_buf_set_ref(bp, XFS_DQUOT_REF);
691 766
692 /* 767 /*
693 * We got the buffer with a xfs_trans_read_buf() (in dqtobp()) 768 * We got the buffer with a xfs_trans_read_buf() (in dqtobp())
694 * So we need to release with xfs_trans_brelse(). 769 * So we need to release with xfs_trans_brelse().
695 * The strategy here is identical to that of inodes; we lock 770 * The strategy here is identical to that of inodes; we lock
696 * the dquot in xfs_qm_dqget() before making it accessible to 771 * the dquot in xfs_qm_dqget() before making it accessible to
697 * others. This is because dquots, like inodes, need a good level of 772 * others. This is because dquots, like inodes, need a good level of
698 * concurrency, and we don't want to take locks on the entire buffers 773 * concurrency, and we don't want to take locks on the entire buffers
699 * for dquot accesses. 774 * for dquot accesses.
700 * Note also that the dquot buffer may even be dirty at this point, if 775 * Note also that the dquot buffer may even be dirty at this point, if
701 * this particular dquot was repaired. We still aren't afraid to 776 * this particular dquot was repaired. We still aren't afraid to
702 * brelse it because we have the changes incore. 777 * brelse it because we have the changes incore.
703 */ 778 */
704 ASSERT(xfs_buf_islocked(bp)); 779 ASSERT(xfs_buf_islocked(bp));
705 xfs_trans_brelse(tp, bp); 780 xfs_trans_brelse(tp, bp);
706 781
707 if (tp) { 782 if (tp) {
708 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); 783 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
709 if (error) 784 if (error)
710 goto error0; 785 goto error0;
711 } 786 }
712 787
713 *O_dqpp = dqp; 788 *O_dqpp = dqp;
714 return error; 789 return error;
715 790
716 error1: 791 error1:
717 if (tp) 792 if (tp)
718 xfs_trans_cancel(tp, cancelflags); 793 xfs_trans_cancel(tp, cancelflags);
719 error0: 794 error0:
720 xfs_qm_dqdestroy(dqp); 795 xfs_qm_dqdestroy(dqp);
721 *O_dqpp = NULL; 796 *O_dqpp = NULL;
722 return error; 797 return error;
723 } 798 }
724 799
725 /* 800 /*
726 * Given the file system, inode OR id, and type (UDQUOT/GDQUOT), return a 801 * Given the file system, inode OR id, and type (UDQUOT/GDQUOT), return a
727 * a locked dquot, doing an allocation (if requested) as needed. 802 * a locked dquot, doing an allocation (if requested) as needed.
728 * When both an inode and an id are given, the inode's id takes precedence. 803 * When both an inode and an id are given, the inode's id takes precedence.
729 * That is, if the id changes while we don't hold the ilock inside this 804 * That is, if the id changes while we don't hold the ilock inside this
730 * function, the new dquot is returned, not necessarily the one requested 805 * function, the new dquot is returned, not necessarily the one requested
731 * in the id argument. 806 * in the id argument.
732 */ 807 */
733 int 808 int
734 xfs_qm_dqget( 809 xfs_qm_dqget(
735 xfs_mount_t *mp, 810 xfs_mount_t *mp,
736 xfs_inode_t *ip, /* locked inode (optional) */ 811 xfs_inode_t *ip, /* locked inode (optional) */
737 xfs_dqid_t id, /* uid/projid/gid depending on type */ 812 xfs_dqid_t id, /* uid/projid/gid depending on type */
738 uint type, /* XFS_DQ_USER/XFS_DQ_PROJ/XFS_DQ_GROUP */ 813 uint type, /* XFS_DQ_USER/XFS_DQ_PROJ/XFS_DQ_GROUP */
739 uint flags, /* DQALLOC, DQSUSER, DQREPAIR, DOWARN */ 814 uint flags, /* DQALLOC, DQSUSER, DQREPAIR, DOWARN */
740 xfs_dquot_t **O_dqpp) /* OUT : locked incore dquot */ 815 xfs_dquot_t **O_dqpp) /* OUT : locked incore dquot */
741 { 816 {
742 struct xfs_quotainfo *qi = mp->m_quotainfo; 817 struct xfs_quotainfo *qi = mp->m_quotainfo;
743 struct radix_tree_root *tree = XFS_DQUOT_TREE(qi, type); 818 struct radix_tree_root *tree = XFS_DQUOT_TREE(qi, type);
744 struct xfs_dquot *dqp; 819 struct xfs_dquot *dqp;
745 int error; 820 int error;
746 821
747 ASSERT(XFS_IS_QUOTA_RUNNING(mp)); 822 ASSERT(XFS_IS_QUOTA_RUNNING(mp));
748 if ((! XFS_IS_UQUOTA_ON(mp) && type == XFS_DQ_USER) || 823 if ((! XFS_IS_UQUOTA_ON(mp) && type == XFS_DQ_USER) ||
749 (! XFS_IS_PQUOTA_ON(mp) && type == XFS_DQ_PROJ) || 824 (! XFS_IS_PQUOTA_ON(mp) && type == XFS_DQ_PROJ) ||
750 (! XFS_IS_GQUOTA_ON(mp) && type == XFS_DQ_GROUP)) { 825 (! XFS_IS_GQUOTA_ON(mp) && type == XFS_DQ_GROUP)) {
751 return (ESRCH); 826 return (ESRCH);
752 } 827 }
753 828
754 #ifdef DEBUG 829 #ifdef DEBUG
755 if (xfs_do_dqerror) { 830 if (xfs_do_dqerror) {
756 if ((xfs_dqerror_target == mp->m_ddev_targp) && 831 if ((xfs_dqerror_target == mp->m_ddev_targp) &&
757 (xfs_dqreq_num++ % xfs_dqerror_mod) == 0) { 832 (xfs_dqreq_num++ % xfs_dqerror_mod) == 0) {
758 xfs_debug(mp, "Returning error in dqget"); 833 xfs_debug(mp, "Returning error in dqget");
759 return (EIO); 834 return (EIO);
760 } 835 }
761 } 836 }
762 837
763 ASSERT(type == XFS_DQ_USER || 838 ASSERT(type == XFS_DQ_USER ||
764 type == XFS_DQ_PROJ || 839 type == XFS_DQ_PROJ ||
765 type == XFS_DQ_GROUP); 840 type == XFS_DQ_GROUP);
766 if (ip) { 841 if (ip) {
767 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); 842 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
768 ASSERT(xfs_inode_dquot(ip, type) == NULL); 843 ASSERT(xfs_inode_dquot(ip, type) == NULL);
769 } 844 }
770 #endif 845 #endif
771 846
772 restart: 847 restart:
773 mutex_lock(&qi->qi_tree_lock); 848 mutex_lock(&qi->qi_tree_lock);
774 dqp = radix_tree_lookup(tree, id); 849 dqp = radix_tree_lookup(tree, id);
775 if (dqp) { 850 if (dqp) {
776 xfs_dqlock(dqp); 851 xfs_dqlock(dqp);
777 if (dqp->dq_flags & XFS_DQ_FREEING) { 852 if (dqp->dq_flags & XFS_DQ_FREEING) {
778 xfs_dqunlock(dqp); 853 xfs_dqunlock(dqp);
779 mutex_unlock(&qi->qi_tree_lock); 854 mutex_unlock(&qi->qi_tree_lock);
780 trace_xfs_dqget_freeing(dqp); 855 trace_xfs_dqget_freeing(dqp);
781 delay(1); 856 delay(1);
782 goto restart; 857 goto restart;
783 } 858 }
784 859
785 dqp->q_nrefs++; 860 dqp->q_nrefs++;
786 mutex_unlock(&qi->qi_tree_lock); 861 mutex_unlock(&qi->qi_tree_lock);
787 862
788 trace_xfs_dqget_hit(dqp); 863 trace_xfs_dqget_hit(dqp);
789 XFS_STATS_INC(xs_qm_dqcachehits); 864 XFS_STATS_INC(xs_qm_dqcachehits);
790 *O_dqpp = dqp; 865 *O_dqpp = dqp;
791 return 0; 866 return 0;
792 } 867 }
793 mutex_unlock(&qi->qi_tree_lock); 868 mutex_unlock(&qi->qi_tree_lock);
794 XFS_STATS_INC(xs_qm_dqcachemisses); 869 XFS_STATS_INC(xs_qm_dqcachemisses);
795 870
796 /* 871 /*
797 * Dquot cache miss. We don't want to keep the inode lock across 872 * Dquot cache miss. We don't want to keep the inode lock across
798 * a (potential) disk read. Also we don't want to deal with the lock 873 * a (potential) disk read. Also we don't want to deal with the lock
799 * ordering between quotainode and this inode. OTOH, dropping the inode 874 * ordering between quotainode and this inode. OTOH, dropping the inode
800 * lock here means dealing with a chown that can happen before 875 * lock here means dealing with a chown that can happen before
801 * we re-acquire the lock. 876 * we re-acquire the lock.
802 */ 877 */
803 if (ip) 878 if (ip)
804 xfs_iunlock(ip, XFS_ILOCK_EXCL); 879 xfs_iunlock(ip, XFS_ILOCK_EXCL);
805 880
806 error = xfs_qm_dqread(mp, id, type, flags, &dqp); 881 error = xfs_qm_dqread(mp, id, type, flags, &dqp);
807 882
808 if (ip) 883 if (ip)
809 xfs_ilock(ip, XFS_ILOCK_EXCL); 884 xfs_ilock(ip, XFS_ILOCK_EXCL);
810 885
811 if (error) 886 if (error)
812 return error; 887 return error;
813 888
814 if (ip) { 889 if (ip) {
815 /* 890 /*
816 * A dquot could be attached to this inode by now, since 891 * A dquot could be attached to this inode by now, since
817 * we had dropped the ilock. 892 * we had dropped the ilock.
818 */ 893 */
819 if (xfs_this_quota_on(mp, type)) { 894 if (xfs_this_quota_on(mp, type)) {
820 struct xfs_dquot *dqp1; 895 struct xfs_dquot *dqp1;
821 896
822 dqp1 = xfs_inode_dquot(ip, type); 897 dqp1 = xfs_inode_dquot(ip, type);
823 if (dqp1) { 898 if (dqp1) {
824 xfs_qm_dqdestroy(dqp); 899 xfs_qm_dqdestroy(dqp);
825 dqp = dqp1; 900 dqp = dqp1;
826 xfs_dqlock(dqp); 901 xfs_dqlock(dqp);
827 goto dqret; 902 goto dqret;
828 } 903 }
829 } else { 904 } else {
830 /* inode stays locked on return */ 905 /* inode stays locked on return */
831 xfs_qm_dqdestroy(dqp); 906 xfs_qm_dqdestroy(dqp);
832 return XFS_ERROR(ESRCH); 907 return XFS_ERROR(ESRCH);
833 } 908 }
834 } 909 }
835 910
836 mutex_lock(&qi->qi_tree_lock); 911 mutex_lock(&qi->qi_tree_lock);
837 error = -radix_tree_insert(tree, id, dqp); 912 error = -radix_tree_insert(tree, id, dqp);
838 if (unlikely(error)) { 913 if (unlikely(error)) {
839 WARN_ON(error != EEXIST); 914 WARN_ON(error != EEXIST);
840 915
841 /* 916 /*
842 * Duplicate found. Just throw away the new dquot and start 917 * Duplicate found. Just throw away the new dquot and start
843 * over. 918 * over.
844 */ 919 */
845 mutex_unlock(&qi->qi_tree_lock); 920 mutex_unlock(&qi->qi_tree_lock);
846 trace_xfs_dqget_dup(dqp); 921 trace_xfs_dqget_dup(dqp);
847 xfs_qm_dqdestroy(dqp); 922 xfs_qm_dqdestroy(dqp);
848 XFS_STATS_INC(xs_qm_dquot_dups); 923 XFS_STATS_INC(xs_qm_dquot_dups);
849 goto restart; 924 goto restart;
850 } 925 }
851 926
852 /* 927 /*
853 * We return a locked dquot to the caller, with a reference taken 928 * We return a locked dquot to the caller, with a reference taken
854 */ 929 */
855 xfs_dqlock(dqp); 930 xfs_dqlock(dqp);
856 dqp->q_nrefs = 1; 931 dqp->q_nrefs = 1;
857 932
858 qi->qi_dquots++; 933 qi->qi_dquots++;
859 mutex_unlock(&qi->qi_tree_lock); 934 mutex_unlock(&qi->qi_tree_lock);
860 935
861 dqret: 936 dqret:
862 ASSERT((ip == NULL) || xfs_isilocked(ip, XFS_ILOCK_EXCL)); 937 ASSERT((ip == NULL) || xfs_isilocked(ip, XFS_ILOCK_EXCL));
863 trace_xfs_dqget_miss(dqp); 938 trace_xfs_dqget_miss(dqp);
864 *O_dqpp = dqp; 939 *O_dqpp = dqp;
865 return (0); 940 return (0);
866 } 941 }
867 942
868 943
869 STATIC void 944 STATIC void
870 xfs_qm_dqput_final( 945 xfs_qm_dqput_final(
871 struct xfs_dquot *dqp) 946 struct xfs_dquot *dqp)
872 { 947 {
873 struct xfs_quotainfo *qi = dqp->q_mount->m_quotainfo; 948 struct xfs_quotainfo *qi = dqp->q_mount->m_quotainfo;
874 struct xfs_dquot *gdqp; 949 struct xfs_dquot *gdqp;
875 950
876 trace_xfs_dqput_free(dqp); 951 trace_xfs_dqput_free(dqp);
877 952
878 mutex_lock(&qi->qi_lru_lock); 953 mutex_lock(&qi->qi_lru_lock);
879 if (list_empty(&dqp->q_lru)) { 954 if (list_empty(&dqp->q_lru)) {
880 list_add_tail(&dqp->q_lru, &qi->qi_lru_list); 955 list_add_tail(&dqp->q_lru, &qi->qi_lru_list);
881 qi->qi_lru_count++; 956 qi->qi_lru_count++;
882 XFS_STATS_INC(xs_qm_dquot_unused); 957 XFS_STATS_INC(xs_qm_dquot_unused);
883 } 958 }
884 mutex_unlock(&qi->qi_lru_lock); 959 mutex_unlock(&qi->qi_lru_lock);
885 960
886 /* 961 /*
887 * If we just added a udquot to the freelist, then we want to release 962 * If we just added a udquot to the freelist, then we want to release
888 * the gdquot reference that it (probably) has. Otherwise it'll keep 963 * the gdquot reference that it (probably) has. Otherwise it'll keep
889 * the gdquot from getting reclaimed. 964 * the gdquot from getting reclaimed.
890 */ 965 */
891 gdqp = dqp->q_gdquot; 966 gdqp = dqp->q_gdquot;
892 if (gdqp) { 967 if (gdqp) {
893 xfs_dqlock(gdqp); 968 xfs_dqlock(gdqp);
894 dqp->q_gdquot = NULL; 969 dqp->q_gdquot = NULL;
895 } 970 }
896 xfs_dqunlock(dqp); 971 xfs_dqunlock(dqp);
897 972
898 /* 973 /*
899 * If we had a group quota hint, release it now. 974 * If we had a group quota hint, release it now.
900 */ 975 */
901 if (gdqp) 976 if (gdqp)
902 xfs_qm_dqput(gdqp); 977 xfs_qm_dqput(gdqp);
903 } 978 }
904 979
905 /* 980 /*
906 * Release a reference to the dquot (decrement ref-count) and unlock it. 981 * Release a reference to the dquot (decrement ref-count) and unlock it.
907 * 982 *
908 * If there is a group quota attached to this dquot, carefully release that 983 * If there is a group quota attached to this dquot, carefully release that
909 * too without tripping over deadlocks'n'stuff. 984 * too without tripping over deadlocks'n'stuff.
910 */ 985 */
911 void 986 void
912 xfs_qm_dqput( 987 xfs_qm_dqput(
913 struct xfs_dquot *dqp) 988 struct xfs_dquot *dqp)
914 { 989 {
915 ASSERT(dqp->q_nrefs > 0); 990 ASSERT(dqp->q_nrefs > 0);
916 ASSERT(XFS_DQ_IS_LOCKED(dqp)); 991 ASSERT(XFS_DQ_IS_LOCKED(dqp));
917 992
918 trace_xfs_dqput(dqp); 993 trace_xfs_dqput(dqp);
919 994
920 if (--dqp->q_nrefs > 0) 995 if (--dqp->q_nrefs > 0)
921 xfs_dqunlock(dqp); 996 xfs_dqunlock(dqp);
922 else 997 else
923 xfs_qm_dqput_final(dqp); 998 xfs_qm_dqput_final(dqp);
924 } 999 }
925 1000
926 /* 1001 /*
927 * Release a dquot. Flush it if dirty, then dqput() it. 1002 * Release a dquot. Flush it if dirty, then dqput() it.
928 * dquot must not be locked. 1003 * dquot must not be locked.
929 */ 1004 */
930 void 1005 void
931 xfs_qm_dqrele( 1006 xfs_qm_dqrele(
932 xfs_dquot_t *dqp) 1007 xfs_dquot_t *dqp)
933 { 1008 {
934 if (!dqp) 1009 if (!dqp)
935 return; 1010 return;
936 1011
937 trace_xfs_dqrele(dqp); 1012 trace_xfs_dqrele(dqp);
938 1013
939 xfs_dqlock(dqp); 1014 xfs_dqlock(dqp);
940 /* 1015 /*
941 * We don't care to flush it if the dquot is dirty here. 1016 * We don't care to flush it if the dquot is dirty here.
942 * That will create stutters that we want to avoid. 1017 * That will create stutters that we want to avoid.
943 * Instead we do a delayed write when we try to reclaim 1018 * Instead we do a delayed write when we try to reclaim
944 * a dirty dquot. Also xfs_sync will take part of the burden... 1019 * a dirty dquot. Also xfs_sync will take part of the burden...
945 */ 1020 */
946 xfs_qm_dqput(dqp); 1021 xfs_qm_dqput(dqp);
947 } 1022 }
948 1023
949 /* 1024 /*
950 * This is the dquot flushing I/O completion routine. It is called 1025 * This is the dquot flushing I/O completion routine. It is called
951 * from interrupt level when the buffer containing the dquot is 1026 * from interrupt level when the buffer containing the dquot is
952 * flushed to disk. It is responsible for removing the dquot logitem 1027 * flushed to disk. It is responsible for removing the dquot logitem
953 * from the AIL if it has not been re-logged, and unlocking the dquot's 1028 * from the AIL if it has not been re-logged, and unlocking the dquot's
954 * flush lock. This behavior is very similar to that of inodes.. 1029 * flush lock. This behavior is very similar to that of inodes..
955 */ 1030 */
956 STATIC void 1031 STATIC void
957 xfs_qm_dqflush_done( 1032 xfs_qm_dqflush_done(
958 struct xfs_buf *bp, 1033 struct xfs_buf *bp,
959 struct xfs_log_item *lip) 1034 struct xfs_log_item *lip)
960 { 1035 {
961 xfs_dq_logitem_t *qip = (struct xfs_dq_logitem *)lip; 1036 xfs_dq_logitem_t *qip = (struct xfs_dq_logitem *)lip;
962 xfs_dquot_t *dqp = qip->qli_dquot; 1037 xfs_dquot_t *dqp = qip->qli_dquot;
963 struct xfs_ail *ailp = lip->li_ailp; 1038 struct xfs_ail *ailp = lip->li_ailp;
964 1039
965 /* 1040 /*
966 * We only want to pull the item from the AIL if its 1041 * We only want to pull the item from the AIL if its
967 * location in the log has not changed since we started the flush. 1042 * location in the log has not changed since we started the flush.
968 * Thus, we only bother if the dquot's lsn has 1043 * Thus, we only bother if the dquot's lsn has
969 * not changed. First we check the lsn outside the lock 1044 * not changed. First we check the lsn outside the lock
970 * since it's cheaper, and then we recheck while 1045 * since it's cheaper, and then we recheck while
971 * holding the lock before removing the dquot from the AIL. 1046 * holding the lock before removing the dquot from the AIL.
972 */ 1047 */
973 if ((lip->li_flags & XFS_LI_IN_AIL) && 1048 if ((lip->li_flags & XFS_LI_IN_AIL) &&
974 lip->li_lsn == qip->qli_flush_lsn) { 1049 lip->li_lsn == qip->qli_flush_lsn) {
975 1050
976 /* xfs_trans_ail_delete() drops the AIL lock. */ 1051 /* xfs_trans_ail_delete() drops the AIL lock. */
977 spin_lock(&ailp->xa_lock); 1052 spin_lock(&ailp->xa_lock);
978 if (lip->li_lsn == qip->qli_flush_lsn) 1053 if (lip->li_lsn == qip->qli_flush_lsn)
979 xfs_trans_ail_delete(ailp, lip, SHUTDOWN_CORRUPT_INCORE); 1054 xfs_trans_ail_delete(ailp, lip, SHUTDOWN_CORRUPT_INCORE);
980 else 1055 else
981 spin_unlock(&ailp->xa_lock); 1056 spin_unlock(&ailp->xa_lock);
982 } 1057 }
983 1058
984 /* 1059 /*
985 * Release the dq's flush lock since we're done with it. 1060 * Release the dq's flush lock since we're done with it.
986 */ 1061 */
987 xfs_dqfunlock(dqp); 1062 xfs_dqfunlock(dqp);
988 } 1063 }
989 1064
990 /* 1065 /*
991 * Write a modified dquot to disk. 1066 * Write a modified dquot to disk.
992 * The dquot must be locked and the flush lock too taken by caller. 1067 * The dquot must be locked and the flush lock too taken by caller.
993 * The flush lock will not be unlocked until the dquot reaches the disk, 1068 * The flush lock will not be unlocked until the dquot reaches the disk,
994 * but the dquot is free to be unlocked and modified by the caller 1069 * but the dquot is free to be unlocked and modified by the caller
995 * in the interim. Dquot is still locked on return. This behavior is 1070 * in the interim. Dquot is still locked on return. This behavior is
996 * identical to that of inodes. 1071 * identical to that of inodes.
997 */ 1072 */
998 int 1073 int
999 xfs_qm_dqflush( 1074 xfs_qm_dqflush(
1000 struct xfs_dquot *dqp, 1075 struct xfs_dquot *dqp,
1001 struct xfs_buf **bpp) 1076 struct xfs_buf **bpp)
1002 { 1077 {
1003 struct xfs_mount *mp = dqp->q_mount; 1078 struct xfs_mount *mp = dqp->q_mount;
1004 struct xfs_buf *bp; 1079 struct xfs_buf *bp;
1005 struct xfs_disk_dquot *ddqp; 1080 struct xfs_disk_dquot *ddqp;
1006 int error; 1081 int error;
1007 1082
1008 ASSERT(XFS_DQ_IS_LOCKED(dqp)); 1083 ASSERT(XFS_DQ_IS_LOCKED(dqp));
1009 ASSERT(!completion_done(&dqp->q_flush)); 1084 ASSERT(!completion_done(&dqp->q_flush));
1010 1085
1011 trace_xfs_dqflush(dqp); 1086 trace_xfs_dqflush(dqp);
1012 1087
1013 *bpp = NULL; 1088 *bpp = NULL;
1014 1089
1015 xfs_qm_dqunpin_wait(dqp); 1090 xfs_qm_dqunpin_wait(dqp);
1016 1091
1017 /* 1092 /*
1018 * This may have been unpinned because the filesystem is shutting 1093 * This may have been unpinned because the filesystem is shutting
1019 * down forcibly. If that's the case we must not write this dquot 1094 * down forcibly. If that's the case we must not write this dquot
1020 * to disk, because the log record didn't make it to disk. 1095 * to disk, because the log record didn't make it to disk.
1021 * 1096 *
1022 * We also have to remove the log item from the AIL in this case, 1097 * We also have to remove the log item from the AIL in this case,
1023 * as we wait for an emptry AIL as part of the unmount process. 1098 * as we wait for an emptry AIL as part of the unmount process.
1024 */ 1099 */
1025 if (XFS_FORCED_SHUTDOWN(mp)) { 1100 if (XFS_FORCED_SHUTDOWN(mp)) {
1026 struct xfs_log_item *lip = &dqp->q_logitem.qli_item; 1101 struct xfs_log_item *lip = &dqp->q_logitem.qli_item;
1027 dqp->dq_flags &= ~XFS_DQ_DIRTY; 1102 dqp->dq_flags &= ~XFS_DQ_DIRTY;
1028 1103
1029 spin_lock(&mp->m_ail->xa_lock); 1104 spin_lock(&mp->m_ail->xa_lock);
1030 if (lip->li_flags & XFS_LI_IN_AIL) 1105 if (lip->li_flags & XFS_LI_IN_AIL)
1031 xfs_trans_ail_delete(mp->m_ail, lip, 1106 xfs_trans_ail_delete(mp->m_ail, lip,
1032 SHUTDOWN_CORRUPT_INCORE); 1107 SHUTDOWN_CORRUPT_INCORE);
1033 else 1108 else
1034 spin_unlock(&mp->m_ail->xa_lock); 1109 spin_unlock(&mp->m_ail->xa_lock);
1035 error = XFS_ERROR(EIO); 1110 error = XFS_ERROR(EIO);
1036 goto out_unlock; 1111 goto out_unlock;
1037 } 1112 }
1038 1113
1039 /* 1114 /*
1040 * Get the buffer containing the on-disk dquot 1115 * Get the buffer containing the on-disk dquot
1041 */ 1116 */
1042 error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp, dqp->q_blkno, 1117 error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp, dqp->q_blkno,
1043 mp->m_quotainfo->qi_dqchunklen, 0, &bp, NULL); 1118 mp->m_quotainfo->qi_dqchunklen, 0, &bp, NULL);
1044 if (error) 1119 if (error)
1045 goto out_unlock; 1120 goto out_unlock;
1046 1121
1047 /* 1122 /*
1048 * Calculate the location of the dquot inside the buffer. 1123 * Calculate the location of the dquot inside the buffer.
1049 */ 1124 */
1050 ddqp = bp->b_addr + dqp->q_bufoffset; 1125 ddqp = bp->b_addr + dqp->q_bufoffset;
1051 1126
1052 /* 1127 /*
1053 * A simple sanity check in case we got a corrupted dquot.. 1128 * A simple sanity check in case we got a corrupted dquot..
1054 */ 1129 */
1055 error = xfs_qm_dqcheck(mp, &dqp->q_core, be32_to_cpu(ddqp->d_id), 0, 1130 error = xfs_qm_dqcheck(mp, &dqp->q_core, be32_to_cpu(ddqp->d_id), 0,
1056 XFS_QMOPT_DOWARN, "dqflush (incore copy)"); 1131 XFS_QMOPT_DOWARN, "dqflush (incore copy)");
1057 if (error) { 1132 if (error) {
1058 xfs_buf_relse(bp); 1133 xfs_buf_relse(bp);
1059 xfs_dqfunlock(dqp); 1134 xfs_dqfunlock(dqp);
1060 xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); 1135 xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
1061 return XFS_ERROR(EIO); 1136 return XFS_ERROR(EIO);
1062 } 1137 }
1063 1138
1064 /* This is the only portion of data that needs to persist */ 1139 /* This is the only portion of data that needs to persist */
1065 memcpy(ddqp, &dqp->q_core, sizeof(xfs_disk_dquot_t)); 1140 memcpy(ddqp, &dqp->q_core, sizeof(xfs_disk_dquot_t));
1066 1141
1067 /* 1142 /*
1068 * Clear the dirty field and remember the flush lsn for later use. 1143 * Clear the dirty field and remember the flush lsn for later use.
1069 */ 1144 */
1070 dqp->dq_flags &= ~XFS_DQ_DIRTY; 1145 dqp->dq_flags &= ~XFS_DQ_DIRTY;
1071 1146
1072 xfs_trans_ail_copy_lsn(mp->m_ail, &dqp->q_logitem.qli_flush_lsn, 1147 xfs_trans_ail_copy_lsn(mp->m_ail, &dqp->q_logitem.qli_flush_lsn,
1073 &dqp->q_logitem.qli_item.li_lsn); 1148 &dqp->q_logitem.qli_item.li_lsn);
1149
1150 /*
1151 * copy the lsn into the on-disk dquot now while we have the in memory
1152 * dquot here. This can't be done later in the write verifier as we
1153 * can't get access to the log item at that point in time.
1154 */
1155 if (xfs_sb_version_hascrc(&mp->m_sb)) {
1156 struct xfs_dqblk *dqb = (struct xfs_dqblk *)ddqp;
1157
1158 dqb->dd_lsn = cpu_to_be64(dqp->q_logitem.qli_item.li_lsn);
1159 }
1074 1160
1075 /* 1161 /*
1076 * Attach an iodone routine so that we can remove this dquot from the 1162 * Attach an iodone routine so that we can remove this dquot from the
1077 * AIL and release the flush lock once the dquot is synced to disk. 1163 * AIL and release the flush lock once the dquot is synced to disk.
1078 */ 1164 */
1079 xfs_buf_attach_iodone(bp, xfs_qm_dqflush_done, 1165 xfs_buf_attach_iodone(bp, xfs_qm_dqflush_done,
1080 &dqp->q_logitem.qli_item); 1166 &dqp->q_logitem.qli_item);
1081 1167
1082 /* 1168 /*
1083 * If the buffer is pinned then push on the log so we won't 1169 * If the buffer is pinned then push on the log so we won't
1084 * get stuck waiting in the write for too long. 1170 * get stuck waiting in the write for too long.
1085 */ 1171 */
1086 if (xfs_buf_ispinned(bp)) { 1172 if (xfs_buf_ispinned(bp)) {
1087 trace_xfs_dqflush_force(dqp); 1173 trace_xfs_dqflush_force(dqp);
1088 xfs_log_force(mp, 0); 1174 xfs_log_force(mp, 0);
1089 } 1175 }
1090 1176
1091 trace_xfs_dqflush_done(dqp); 1177 trace_xfs_dqflush_done(dqp);
1092 *bpp = bp; 1178 *bpp = bp;
1093 return 0; 1179 return 0;
1094 1180
1095 out_unlock: 1181 out_unlock:
1096 xfs_dqfunlock(dqp); 1182 xfs_dqfunlock(dqp);
1097 return XFS_ERROR(EIO); 1183 return XFS_ERROR(EIO);
1098 } 1184 }
1099 1185
1100 /* 1186 /*
1101 * Lock two xfs_dquot structures. 1187 * Lock two xfs_dquot structures.
1102 * 1188 *
1103 * To avoid deadlocks we always lock the quota structure with 1189 * To avoid deadlocks we always lock the quota structure with
1104 * the lowerd id first. 1190 * the lowerd id first.
1105 */ 1191 */
1106 void 1192 void
1107 xfs_dqlock2( 1193 xfs_dqlock2(
1108 xfs_dquot_t *d1, 1194 xfs_dquot_t *d1,
1109 xfs_dquot_t *d2) 1195 xfs_dquot_t *d2)
1110 { 1196 {
1111 if (d1 && d2) { 1197 if (d1 && d2) {
1112 ASSERT(d1 != d2); 1198 ASSERT(d1 != d2);
1113 if (be32_to_cpu(d1->q_core.d_id) > 1199 if (be32_to_cpu(d1->q_core.d_id) >
1114 be32_to_cpu(d2->q_core.d_id)) { 1200 be32_to_cpu(d2->q_core.d_id)) {
1115 mutex_lock(&d2->q_qlock); 1201 mutex_lock(&d2->q_qlock);
1116 mutex_lock_nested(&d1->q_qlock, XFS_QLOCK_NESTED); 1202 mutex_lock_nested(&d1->q_qlock, XFS_QLOCK_NESTED);
1117 } else { 1203 } else {
1118 mutex_lock(&d1->q_qlock); 1204 mutex_lock(&d1->q_qlock);
1119 mutex_lock_nested(&d2->q_qlock, XFS_QLOCK_NESTED); 1205 mutex_lock_nested(&d2->q_qlock, XFS_QLOCK_NESTED);
1120 } 1206 }
1121 } else if (d1) { 1207 } else if (d1) {
1122 mutex_lock(&d1->q_qlock); 1208 mutex_lock(&d1->q_qlock);
1123 } else if (d2) { 1209 } else if (d2) {
1124 mutex_lock(&d2->q_qlock); 1210 mutex_lock(&d2->q_qlock);
1125 } 1211 }
1126 } 1212 }
1127 1213
1128 int __init 1214 int __init
1129 xfs_qm_init(void) 1215 xfs_qm_init(void)
1130 { 1216 {
1131 xfs_qm_dqzone = 1217 xfs_qm_dqzone =
1132 kmem_zone_init(sizeof(struct xfs_dquot), "xfs_dquot"); 1218 kmem_zone_init(sizeof(struct xfs_dquot), "xfs_dquot");
1133 if (!xfs_qm_dqzone) 1219 if (!xfs_qm_dqzone)
1134 goto out; 1220 goto out;
1135 1221
1136 xfs_qm_dqtrxzone = 1222 xfs_qm_dqtrxzone =
1137 kmem_zone_init(sizeof(struct xfs_dquot_acct), "xfs_dqtrx"); 1223 kmem_zone_init(sizeof(struct xfs_dquot_acct), "xfs_dqtrx");
1138 if (!xfs_qm_dqtrxzone) 1224 if (!xfs_qm_dqtrxzone)
1139 goto out_free_dqzone; 1225 goto out_free_dqzone;
1140 1226
1141 return 0; 1227 return 0;
1142 1228
1143 out_free_dqzone: 1229 out_free_dqzone:
1144 kmem_zone_destroy(xfs_qm_dqzone); 1230 kmem_zone_destroy(xfs_qm_dqzone);
1145 out: 1231 out:
1146 return -ENOMEM; 1232 return -ENOMEM;
1147 } 1233 }
1148 1234
1149 void 1235 void
1150 xfs_qm_exit(void) 1236 xfs_qm_exit(void)
fs/xfs/xfs_log_recover.c
1 /* 1 /*
2 * Copyright (c) 2000-2006 Silicon Graphics, Inc. 2 * Copyright (c) 2000-2006 Silicon Graphics, Inc.
3 * All Rights Reserved. 3 * All Rights Reserved.
4 * 4 *
5 * This program is free software; you can redistribute it and/or 5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as 6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation. 7 * published by the Free Software Foundation.
8 * 8 *
9 * This program is distributed in the hope that it would be useful, 9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details. 12 * GNU General Public License for more details.
13 * 13 *
14 * You should have received a copy of the GNU General Public License 14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation, 15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */ 17 */
18 #include "xfs.h" 18 #include "xfs.h"
19 #include "xfs_fs.h" 19 #include "xfs_fs.h"
20 #include "xfs_types.h" 20 #include "xfs_types.h"
21 #include "xfs_bit.h" 21 #include "xfs_bit.h"
22 #include "xfs_log.h" 22 #include "xfs_log.h"
23 #include "xfs_inum.h" 23 #include "xfs_inum.h"
24 #include "xfs_trans.h" 24 #include "xfs_trans.h"
25 #include "xfs_sb.h" 25 #include "xfs_sb.h"
26 #include "xfs_ag.h" 26 #include "xfs_ag.h"
27 #include "xfs_mount.h" 27 #include "xfs_mount.h"
28 #include "xfs_error.h" 28 #include "xfs_error.h"
29 #include "xfs_bmap_btree.h" 29 #include "xfs_bmap_btree.h"
30 #include "xfs_alloc_btree.h" 30 #include "xfs_alloc_btree.h"
31 #include "xfs_ialloc_btree.h" 31 #include "xfs_ialloc_btree.h"
32 #include "xfs_btree.h" 32 #include "xfs_btree.h"
33 #include "xfs_dinode.h" 33 #include "xfs_dinode.h"
34 #include "xfs_inode.h" 34 #include "xfs_inode.h"
35 #include "xfs_inode_item.h" 35 #include "xfs_inode_item.h"
36 #include "xfs_alloc.h" 36 #include "xfs_alloc.h"
37 #include "xfs_ialloc.h" 37 #include "xfs_ialloc.h"
38 #include "xfs_log_priv.h" 38 #include "xfs_log_priv.h"
39 #include "xfs_buf_item.h" 39 #include "xfs_buf_item.h"
40 #include "xfs_log_recover.h" 40 #include "xfs_log_recover.h"
41 #include "xfs_extfree_item.h" 41 #include "xfs_extfree_item.h"
42 #include "xfs_trans_priv.h" 42 #include "xfs_trans_priv.h"
43 #include "xfs_quota.h" 43 #include "xfs_quota.h"
44 #include "xfs_utils.h" 44 #include "xfs_utils.h"
45 #include "xfs_cksum.h" 45 #include "xfs_cksum.h"
46 #include "xfs_trace.h" 46 #include "xfs_trace.h"
47 #include "xfs_icache.h" 47 #include "xfs_icache.h"
48 48
49 STATIC int 49 STATIC int
50 xlog_find_zeroed( 50 xlog_find_zeroed(
51 struct xlog *, 51 struct xlog *,
52 xfs_daddr_t *); 52 xfs_daddr_t *);
53 STATIC int 53 STATIC int
54 xlog_clear_stale_blocks( 54 xlog_clear_stale_blocks(
55 struct xlog *, 55 struct xlog *,
56 xfs_lsn_t); 56 xfs_lsn_t);
57 #if defined(DEBUG) 57 #if defined(DEBUG)
58 STATIC void 58 STATIC void
59 xlog_recover_check_summary( 59 xlog_recover_check_summary(
60 struct xlog *); 60 struct xlog *);
61 #else 61 #else
62 #define xlog_recover_check_summary(log) 62 #define xlog_recover_check_summary(log)
63 #endif 63 #endif
64 64
65 /* 65 /*
66 * This structure is used during recovery to record the buf log items which 66 * This structure is used during recovery to record the buf log items which
67 * have been canceled and should not be replayed. 67 * have been canceled and should not be replayed.
68 */ 68 */
69 struct xfs_buf_cancel { 69 struct xfs_buf_cancel {
70 xfs_daddr_t bc_blkno; 70 xfs_daddr_t bc_blkno;
71 uint bc_len; 71 uint bc_len;
72 int bc_refcount; 72 int bc_refcount;
73 struct list_head bc_list; 73 struct list_head bc_list;
74 }; 74 };
75 75
76 /* 76 /*
77 * Sector aligned buffer routines for buffer create/read/write/access 77 * Sector aligned buffer routines for buffer create/read/write/access
78 */ 78 */
79 79
80 /* 80 /*
81 * Verify the given count of basic blocks is valid number of blocks 81 * Verify the given count of basic blocks is valid number of blocks
82 * to specify for an operation involving the given XFS log buffer. 82 * to specify for an operation involving the given XFS log buffer.
83 * Returns nonzero if the count is valid, 0 otherwise. 83 * Returns nonzero if the count is valid, 0 otherwise.
84 */ 84 */
85 85
86 static inline int 86 static inline int
87 xlog_buf_bbcount_valid( 87 xlog_buf_bbcount_valid(
88 struct xlog *log, 88 struct xlog *log,
89 int bbcount) 89 int bbcount)
90 { 90 {
91 return bbcount > 0 && bbcount <= log->l_logBBsize; 91 return bbcount > 0 && bbcount <= log->l_logBBsize;
92 } 92 }
93 93
94 /* 94 /*
95 * Allocate a buffer to hold log data. The buffer needs to be able 95 * Allocate a buffer to hold log data. The buffer needs to be able
96 * to map to a range of nbblks basic blocks at any valid (basic 96 * to map to a range of nbblks basic blocks at any valid (basic
97 * block) offset within the log. 97 * block) offset within the log.
98 */ 98 */
99 STATIC xfs_buf_t * 99 STATIC xfs_buf_t *
100 xlog_get_bp( 100 xlog_get_bp(
101 struct xlog *log, 101 struct xlog *log,
102 int nbblks) 102 int nbblks)
103 { 103 {
104 struct xfs_buf *bp; 104 struct xfs_buf *bp;
105 105
106 if (!xlog_buf_bbcount_valid(log, nbblks)) { 106 if (!xlog_buf_bbcount_valid(log, nbblks)) {
107 xfs_warn(log->l_mp, "Invalid block length (0x%x) for buffer", 107 xfs_warn(log->l_mp, "Invalid block length (0x%x) for buffer",
108 nbblks); 108 nbblks);
109 XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_HIGH, log->l_mp); 109 XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_HIGH, log->l_mp);
110 return NULL; 110 return NULL;
111 } 111 }
112 112
113 /* 113 /*
114 * We do log I/O in units of log sectors (a power-of-2 114 * We do log I/O in units of log sectors (a power-of-2
115 * multiple of the basic block size), so we round up the 115 * multiple of the basic block size), so we round up the
116 * requested size to accommodate the basic blocks required 116 * requested size to accommodate the basic blocks required
117 * for complete log sectors. 117 * for complete log sectors.
118 * 118 *
119 * In addition, the buffer may be used for a non-sector- 119 * In addition, the buffer may be used for a non-sector-
120 * aligned block offset, in which case an I/O of the 120 * aligned block offset, in which case an I/O of the
121 * requested size could extend beyond the end of the 121 * requested size could extend beyond the end of the
122 * buffer. If the requested size is only 1 basic block it 122 * buffer. If the requested size is only 1 basic block it
123 * will never straddle a sector boundary, so this won't be 123 * will never straddle a sector boundary, so this won't be
124 * an issue. Nor will this be a problem if the log I/O is 124 * an issue. Nor will this be a problem if the log I/O is
125 * done in basic blocks (sector size 1). But otherwise we 125 * done in basic blocks (sector size 1). But otherwise we
126 * extend the buffer by one extra log sector to ensure 126 * extend the buffer by one extra log sector to ensure
127 * there's space to accommodate this possibility. 127 * there's space to accommodate this possibility.
128 */ 128 */
129 if (nbblks > 1 && log->l_sectBBsize > 1) 129 if (nbblks > 1 && log->l_sectBBsize > 1)
130 nbblks += log->l_sectBBsize; 130 nbblks += log->l_sectBBsize;
131 nbblks = round_up(nbblks, log->l_sectBBsize); 131 nbblks = round_up(nbblks, log->l_sectBBsize);
132 132
133 bp = xfs_buf_get_uncached(log->l_mp->m_logdev_targp, nbblks, 0); 133 bp = xfs_buf_get_uncached(log->l_mp->m_logdev_targp, nbblks, 0);
134 if (bp) 134 if (bp)
135 xfs_buf_unlock(bp); 135 xfs_buf_unlock(bp);
136 return bp; 136 return bp;
137 } 137 }
138 138
139 STATIC void 139 STATIC void
140 xlog_put_bp( 140 xlog_put_bp(
141 xfs_buf_t *bp) 141 xfs_buf_t *bp)
142 { 142 {
143 xfs_buf_free(bp); 143 xfs_buf_free(bp);
144 } 144 }
145 145
146 /* 146 /*
147 * Return the address of the start of the given block number's data 147 * Return the address of the start of the given block number's data
148 * in a log buffer. The buffer covers a log sector-aligned region. 148 * in a log buffer. The buffer covers a log sector-aligned region.
149 */ 149 */
150 STATIC xfs_caddr_t 150 STATIC xfs_caddr_t
151 xlog_align( 151 xlog_align(
152 struct xlog *log, 152 struct xlog *log,
153 xfs_daddr_t blk_no, 153 xfs_daddr_t blk_no,
154 int nbblks, 154 int nbblks,
155 struct xfs_buf *bp) 155 struct xfs_buf *bp)
156 { 156 {
157 xfs_daddr_t offset = blk_no & ((xfs_daddr_t)log->l_sectBBsize - 1); 157 xfs_daddr_t offset = blk_no & ((xfs_daddr_t)log->l_sectBBsize - 1);
158 158
159 ASSERT(offset + nbblks <= bp->b_length); 159 ASSERT(offset + nbblks <= bp->b_length);
160 return bp->b_addr + BBTOB(offset); 160 return bp->b_addr + BBTOB(offset);
161 } 161 }
162 162
163 163
164 /* 164 /*
165 * nbblks should be uint, but oh well. Just want to catch that 32-bit length. 165 * nbblks should be uint, but oh well. Just want to catch that 32-bit length.
166 */ 166 */
167 STATIC int 167 STATIC int
168 xlog_bread_noalign( 168 xlog_bread_noalign(
169 struct xlog *log, 169 struct xlog *log,
170 xfs_daddr_t blk_no, 170 xfs_daddr_t blk_no,
171 int nbblks, 171 int nbblks,
172 struct xfs_buf *bp) 172 struct xfs_buf *bp)
173 { 173 {
174 int error; 174 int error;
175 175
176 if (!xlog_buf_bbcount_valid(log, nbblks)) { 176 if (!xlog_buf_bbcount_valid(log, nbblks)) {
177 xfs_warn(log->l_mp, "Invalid block length (0x%x) for buffer", 177 xfs_warn(log->l_mp, "Invalid block length (0x%x) for buffer",
178 nbblks); 178 nbblks);
179 XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_HIGH, log->l_mp); 179 XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_HIGH, log->l_mp);
180 return EFSCORRUPTED; 180 return EFSCORRUPTED;
181 } 181 }
182 182
183 blk_no = round_down(blk_no, log->l_sectBBsize); 183 blk_no = round_down(blk_no, log->l_sectBBsize);
184 nbblks = round_up(nbblks, log->l_sectBBsize); 184 nbblks = round_up(nbblks, log->l_sectBBsize);
185 185
186 ASSERT(nbblks > 0); 186 ASSERT(nbblks > 0);
187 ASSERT(nbblks <= bp->b_length); 187 ASSERT(nbblks <= bp->b_length);
188 188
189 XFS_BUF_SET_ADDR(bp, log->l_logBBstart + blk_no); 189 XFS_BUF_SET_ADDR(bp, log->l_logBBstart + blk_no);
190 XFS_BUF_READ(bp); 190 XFS_BUF_READ(bp);
191 bp->b_io_length = nbblks; 191 bp->b_io_length = nbblks;
192 bp->b_error = 0; 192 bp->b_error = 0;
193 193
194 xfsbdstrat(log->l_mp, bp); 194 xfsbdstrat(log->l_mp, bp);
195 error = xfs_buf_iowait(bp); 195 error = xfs_buf_iowait(bp);
196 if (error) 196 if (error)
197 xfs_buf_ioerror_alert(bp, __func__); 197 xfs_buf_ioerror_alert(bp, __func__);
198 return error; 198 return error;
199 } 199 }
200 200
201 STATIC int 201 STATIC int
202 xlog_bread( 202 xlog_bread(
203 struct xlog *log, 203 struct xlog *log,
204 xfs_daddr_t blk_no, 204 xfs_daddr_t blk_no,
205 int nbblks, 205 int nbblks,
206 struct xfs_buf *bp, 206 struct xfs_buf *bp,
207 xfs_caddr_t *offset) 207 xfs_caddr_t *offset)
208 { 208 {
209 int error; 209 int error;
210 210
211 error = xlog_bread_noalign(log, blk_no, nbblks, bp); 211 error = xlog_bread_noalign(log, blk_no, nbblks, bp);
212 if (error) 212 if (error)
213 return error; 213 return error;
214 214
215 *offset = xlog_align(log, blk_no, nbblks, bp); 215 *offset = xlog_align(log, blk_no, nbblks, bp);
216 return 0; 216 return 0;
217 } 217 }
218 218
219 /* 219 /*
220 * Read at an offset into the buffer. Returns with the buffer in it's original 220 * Read at an offset into the buffer. Returns with the buffer in it's original
221 * state regardless of the result of the read. 221 * state regardless of the result of the read.
222 */ 222 */
223 STATIC int 223 STATIC int
224 xlog_bread_offset( 224 xlog_bread_offset(
225 struct xlog *log, 225 struct xlog *log,
226 xfs_daddr_t blk_no, /* block to read from */ 226 xfs_daddr_t blk_no, /* block to read from */
227 int nbblks, /* blocks to read */ 227 int nbblks, /* blocks to read */
228 struct xfs_buf *bp, 228 struct xfs_buf *bp,
229 xfs_caddr_t offset) 229 xfs_caddr_t offset)
230 { 230 {
231 xfs_caddr_t orig_offset = bp->b_addr; 231 xfs_caddr_t orig_offset = bp->b_addr;
232 int orig_len = BBTOB(bp->b_length); 232 int orig_len = BBTOB(bp->b_length);
233 int error, error2; 233 int error, error2;
234 234
235 error = xfs_buf_associate_memory(bp, offset, BBTOB(nbblks)); 235 error = xfs_buf_associate_memory(bp, offset, BBTOB(nbblks));
236 if (error) 236 if (error)
237 return error; 237 return error;
238 238
239 error = xlog_bread_noalign(log, blk_no, nbblks, bp); 239 error = xlog_bread_noalign(log, blk_no, nbblks, bp);
240 240
241 /* must reset buffer pointer even on error */ 241 /* must reset buffer pointer even on error */
242 error2 = xfs_buf_associate_memory(bp, orig_offset, orig_len); 242 error2 = xfs_buf_associate_memory(bp, orig_offset, orig_len);
243 if (error) 243 if (error)
244 return error; 244 return error;
245 return error2; 245 return error2;
246 } 246 }
247 247
248 /* 248 /*
249 * Write out the buffer at the given block for the given number of blocks. 249 * Write out the buffer at the given block for the given number of blocks.
250 * The buffer is kept locked across the write and is returned locked. 250 * The buffer is kept locked across the write and is returned locked.
251 * This can only be used for synchronous log writes. 251 * This can only be used for synchronous log writes.
252 */ 252 */
253 STATIC int 253 STATIC int
254 xlog_bwrite( 254 xlog_bwrite(
255 struct xlog *log, 255 struct xlog *log,
256 xfs_daddr_t blk_no, 256 xfs_daddr_t blk_no,
257 int nbblks, 257 int nbblks,
258 struct xfs_buf *bp) 258 struct xfs_buf *bp)
259 { 259 {
260 int error; 260 int error;
261 261
262 if (!xlog_buf_bbcount_valid(log, nbblks)) { 262 if (!xlog_buf_bbcount_valid(log, nbblks)) {
263 xfs_warn(log->l_mp, "Invalid block length (0x%x) for buffer", 263 xfs_warn(log->l_mp, "Invalid block length (0x%x) for buffer",
264 nbblks); 264 nbblks);
265 XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_HIGH, log->l_mp); 265 XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_HIGH, log->l_mp);
266 return EFSCORRUPTED; 266 return EFSCORRUPTED;
267 } 267 }
268 268
269 blk_no = round_down(blk_no, log->l_sectBBsize); 269 blk_no = round_down(blk_no, log->l_sectBBsize);
270 nbblks = round_up(nbblks, log->l_sectBBsize); 270 nbblks = round_up(nbblks, log->l_sectBBsize);
271 271
272 ASSERT(nbblks > 0); 272 ASSERT(nbblks > 0);
273 ASSERT(nbblks <= bp->b_length); 273 ASSERT(nbblks <= bp->b_length);
274 274
275 XFS_BUF_SET_ADDR(bp, log->l_logBBstart + blk_no); 275 XFS_BUF_SET_ADDR(bp, log->l_logBBstart + blk_no);
276 XFS_BUF_ZEROFLAGS(bp); 276 XFS_BUF_ZEROFLAGS(bp);
277 xfs_buf_hold(bp); 277 xfs_buf_hold(bp);
278 xfs_buf_lock(bp); 278 xfs_buf_lock(bp);
279 bp->b_io_length = nbblks; 279 bp->b_io_length = nbblks;
280 bp->b_error = 0; 280 bp->b_error = 0;
281 281
282 error = xfs_bwrite(bp); 282 error = xfs_bwrite(bp);
283 if (error) 283 if (error)
284 xfs_buf_ioerror_alert(bp, __func__); 284 xfs_buf_ioerror_alert(bp, __func__);
285 xfs_buf_relse(bp); 285 xfs_buf_relse(bp);
286 return error; 286 return error;
287 } 287 }
288 288
289 #ifdef DEBUG 289 #ifdef DEBUG
290 /* 290 /*
291 * dump debug superblock and log record information 291 * dump debug superblock and log record information
292 */ 292 */
293 STATIC void 293 STATIC void
294 xlog_header_check_dump( 294 xlog_header_check_dump(
295 xfs_mount_t *mp, 295 xfs_mount_t *mp,
296 xlog_rec_header_t *head) 296 xlog_rec_header_t *head)
297 { 297 {
298 xfs_debug(mp, "%s: SB : uuid = %pU, fmt = %d\n", 298 xfs_debug(mp, "%s: SB : uuid = %pU, fmt = %d\n",
299 __func__, &mp->m_sb.sb_uuid, XLOG_FMT); 299 __func__, &mp->m_sb.sb_uuid, XLOG_FMT);
300 xfs_debug(mp, " log : uuid = %pU, fmt = %d\n", 300 xfs_debug(mp, " log : uuid = %pU, fmt = %d\n",
301 &head->h_fs_uuid, be32_to_cpu(head->h_fmt)); 301 &head->h_fs_uuid, be32_to_cpu(head->h_fmt));
302 } 302 }
303 #else 303 #else
304 #define xlog_header_check_dump(mp, head) 304 #define xlog_header_check_dump(mp, head)
305 #endif 305 #endif
306 306
307 /* 307 /*
308 * check log record header for recovery 308 * check log record header for recovery
309 */ 309 */
310 STATIC int 310 STATIC int
311 xlog_header_check_recover( 311 xlog_header_check_recover(
312 xfs_mount_t *mp, 312 xfs_mount_t *mp,
313 xlog_rec_header_t *head) 313 xlog_rec_header_t *head)
314 { 314 {
315 ASSERT(head->h_magicno == cpu_to_be32(XLOG_HEADER_MAGIC_NUM)); 315 ASSERT(head->h_magicno == cpu_to_be32(XLOG_HEADER_MAGIC_NUM));
316 316
317 /* 317 /*
318 * IRIX doesn't write the h_fmt field and leaves it zeroed 318 * IRIX doesn't write the h_fmt field and leaves it zeroed
319 * (XLOG_FMT_UNKNOWN). This stops us from trying to recover 319 * (XLOG_FMT_UNKNOWN). This stops us from trying to recover
320 * a dirty log created in IRIX. 320 * a dirty log created in IRIX.
321 */ 321 */
322 if (unlikely(head->h_fmt != cpu_to_be32(XLOG_FMT))) { 322 if (unlikely(head->h_fmt != cpu_to_be32(XLOG_FMT))) {
323 xfs_warn(mp, 323 xfs_warn(mp,
324 "dirty log written in incompatible format - can't recover"); 324 "dirty log written in incompatible format - can't recover");
325 xlog_header_check_dump(mp, head); 325 xlog_header_check_dump(mp, head);
326 XFS_ERROR_REPORT("xlog_header_check_recover(1)", 326 XFS_ERROR_REPORT("xlog_header_check_recover(1)",
327 XFS_ERRLEVEL_HIGH, mp); 327 XFS_ERRLEVEL_HIGH, mp);
328 return XFS_ERROR(EFSCORRUPTED); 328 return XFS_ERROR(EFSCORRUPTED);
329 } else if (unlikely(!uuid_equal(&mp->m_sb.sb_uuid, &head->h_fs_uuid))) { 329 } else if (unlikely(!uuid_equal(&mp->m_sb.sb_uuid, &head->h_fs_uuid))) {
330 xfs_warn(mp, 330 xfs_warn(mp,
331 "dirty log entry has mismatched uuid - can't recover"); 331 "dirty log entry has mismatched uuid - can't recover");
332 xlog_header_check_dump(mp, head); 332 xlog_header_check_dump(mp, head);
333 XFS_ERROR_REPORT("xlog_header_check_recover(2)", 333 XFS_ERROR_REPORT("xlog_header_check_recover(2)",
334 XFS_ERRLEVEL_HIGH, mp); 334 XFS_ERRLEVEL_HIGH, mp);
335 return XFS_ERROR(EFSCORRUPTED); 335 return XFS_ERROR(EFSCORRUPTED);
336 } 336 }
337 return 0; 337 return 0;
338 } 338 }
339 339
340 /* 340 /*
341 * read the head block of the log and check the header 341 * read the head block of the log and check the header
342 */ 342 */
343 STATIC int 343 STATIC int
344 xlog_header_check_mount( 344 xlog_header_check_mount(
345 xfs_mount_t *mp, 345 xfs_mount_t *mp,
346 xlog_rec_header_t *head) 346 xlog_rec_header_t *head)
347 { 347 {
348 ASSERT(head->h_magicno == cpu_to_be32(XLOG_HEADER_MAGIC_NUM)); 348 ASSERT(head->h_magicno == cpu_to_be32(XLOG_HEADER_MAGIC_NUM));
349 349
350 if (uuid_is_nil(&head->h_fs_uuid)) { 350 if (uuid_is_nil(&head->h_fs_uuid)) {
351 /* 351 /*
352 * IRIX doesn't write the h_fs_uuid or h_fmt fields. If 352 * IRIX doesn't write the h_fs_uuid or h_fmt fields. If
353 * h_fs_uuid is nil, we assume this log was last mounted 353 * h_fs_uuid is nil, we assume this log was last mounted
354 * by IRIX and continue. 354 * by IRIX and continue.
355 */ 355 */
356 xfs_warn(mp, "nil uuid in log - IRIX style log"); 356 xfs_warn(mp, "nil uuid in log - IRIX style log");
357 } else if (unlikely(!uuid_equal(&mp->m_sb.sb_uuid, &head->h_fs_uuid))) { 357 } else if (unlikely(!uuid_equal(&mp->m_sb.sb_uuid, &head->h_fs_uuid))) {
358 xfs_warn(mp, "log has mismatched uuid - can't recover"); 358 xfs_warn(mp, "log has mismatched uuid - can't recover");
359 xlog_header_check_dump(mp, head); 359 xlog_header_check_dump(mp, head);
360 XFS_ERROR_REPORT("xlog_header_check_mount", 360 XFS_ERROR_REPORT("xlog_header_check_mount",
361 XFS_ERRLEVEL_HIGH, mp); 361 XFS_ERRLEVEL_HIGH, mp);
362 return XFS_ERROR(EFSCORRUPTED); 362 return XFS_ERROR(EFSCORRUPTED);
363 } 363 }
364 return 0; 364 return 0;
365 } 365 }
366 366
367 STATIC void 367 STATIC void
368 xlog_recover_iodone( 368 xlog_recover_iodone(
369 struct xfs_buf *bp) 369 struct xfs_buf *bp)
370 { 370 {
371 if (bp->b_error) { 371 if (bp->b_error) {
372 /* 372 /*
373 * We're not going to bother about retrying 373 * We're not going to bother about retrying
374 * this during recovery. One strike! 374 * this during recovery. One strike!
375 */ 375 */
376 xfs_buf_ioerror_alert(bp, __func__); 376 xfs_buf_ioerror_alert(bp, __func__);
377 xfs_force_shutdown(bp->b_target->bt_mount, 377 xfs_force_shutdown(bp->b_target->bt_mount,
378 SHUTDOWN_META_IO_ERROR); 378 SHUTDOWN_META_IO_ERROR);
379 } 379 }
380 bp->b_iodone = NULL; 380 bp->b_iodone = NULL;
381 xfs_buf_ioend(bp, 0); 381 xfs_buf_ioend(bp, 0);
382 } 382 }
383 383
384 /* 384 /*
385 * This routine finds (to an approximation) the first block in the physical 385 * This routine finds (to an approximation) the first block in the physical
386 * log which contains the given cycle. It uses a binary search algorithm. 386 * log which contains the given cycle. It uses a binary search algorithm.
387 * Note that the algorithm can not be perfect because the disk will not 387 * Note that the algorithm can not be perfect because the disk will not
388 * necessarily be perfect. 388 * necessarily be perfect.
389 */ 389 */
390 STATIC int 390 STATIC int
391 xlog_find_cycle_start( 391 xlog_find_cycle_start(
392 struct xlog *log, 392 struct xlog *log,
393 struct xfs_buf *bp, 393 struct xfs_buf *bp,
394 xfs_daddr_t first_blk, 394 xfs_daddr_t first_blk,
395 xfs_daddr_t *last_blk, 395 xfs_daddr_t *last_blk,
396 uint cycle) 396 uint cycle)
397 { 397 {
398 xfs_caddr_t offset; 398 xfs_caddr_t offset;
399 xfs_daddr_t mid_blk; 399 xfs_daddr_t mid_blk;
400 xfs_daddr_t end_blk; 400 xfs_daddr_t end_blk;
401 uint mid_cycle; 401 uint mid_cycle;
402 int error; 402 int error;
403 403
404 end_blk = *last_blk; 404 end_blk = *last_blk;
405 mid_blk = BLK_AVG(first_blk, end_blk); 405 mid_blk = BLK_AVG(first_blk, end_blk);
406 while (mid_blk != first_blk && mid_blk != end_blk) { 406 while (mid_blk != first_blk && mid_blk != end_blk) {
407 error = xlog_bread(log, mid_blk, 1, bp, &offset); 407 error = xlog_bread(log, mid_blk, 1, bp, &offset);
408 if (error) 408 if (error)
409 return error; 409 return error;
410 mid_cycle = xlog_get_cycle(offset); 410 mid_cycle = xlog_get_cycle(offset);
411 if (mid_cycle == cycle) 411 if (mid_cycle == cycle)
412 end_blk = mid_blk; /* last_half_cycle == mid_cycle */ 412 end_blk = mid_blk; /* last_half_cycle == mid_cycle */
413 else 413 else
414 first_blk = mid_blk; /* first_half_cycle == mid_cycle */ 414 first_blk = mid_blk; /* first_half_cycle == mid_cycle */
415 mid_blk = BLK_AVG(first_blk, end_blk); 415 mid_blk = BLK_AVG(first_blk, end_blk);
416 } 416 }
417 ASSERT((mid_blk == first_blk && mid_blk+1 == end_blk) || 417 ASSERT((mid_blk == first_blk && mid_blk+1 == end_blk) ||
418 (mid_blk == end_blk && mid_blk-1 == first_blk)); 418 (mid_blk == end_blk && mid_blk-1 == first_blk));
419 419
420 *last_blk = end_blk; 420 *last_blk = end_blk;
421 421
422 return 0; 422 return 0;
423 } 423 }
424 424
425 /* 425 /*
426 * Check that a range of blocks does not contain stop_on_cycle_no. 426 * Check that a range of blocks does not contain stop_on_cycle_no.
427 * Fill in *new_blk with the block offset where such a block is 427 * Fill in *new_blk with the block offset where such a block is
428 * found, or with -1 (an invalid block number) if there is no such 428 * found, or with -1 (an invalid block number) if there is no such
429 * block in the range. The scan needs to occur from front to back 429 * block in the range. The scan needs to occur from front to back
430 * and the pointer into the region must be updated since a later 430 * and the pointer into the region must be updated since a later
431 * routine will need to perform another test. 431 * routine will need to perform another test.
432 */ 432 */
433 STATIC int 433 STATIC int
434 xlog_find_verify_cycle( 434 xlog_find_verify_cycle(
435 struct xlog *log, 435 struct xlog *log,
436 xfs_daddr_t start_blk, 436 xfs_daddr_t start_blk,
437 int nbblks, 437 int nbblks,
438 uint stop_on_cycle_no, 438 uint stop_on_cycle_no,
439 xfs_daddr_t *new_blk) 439 xfs_daddr_t *new_blk)
440 { 440 {
441 xfs_daddr_t i, j; 441 xfs_daddr_t i, j;
442 uint cycle; 442 uint cycle;
443 xfs_buf_t *bp; 443 xfs_buf_t *bp;
444 xfs_daddr_t bufblks; 444 xfs_daddr_t bufblks;
445 xfs_caddr_t buf = NULL; 445 xfs_caddr_t buf = NULL;
446 int error = 0; 446 int error = 0;
447 447
448 /* 448 /*
449 * Greedily allocate a buffer big enough to handle the full 449 * Greedily allocate a buffer big enough to handle the full
450 * range of basic blocks we'll be examining. If that fails, 450 * range of basic blocks we'll be examining. If that fails,
451 * try a smaller size. We need to be able to read at least 451 * try a smaller size. We need to be able to read at least
452 * a log sector, or we're out of luck. 452 * a log sector, or we're out of luck.
453 */ 453 */
454 bufblks = 1 << ffs(nbblks); 454 bufblks = 1 << ffs(nbblks);
455 while (bufblks > log->l_logBBsize) 455 while (bufblks > log->l_logBBsize)
456 bufblks >>= 1; 456 bufblks >>= 1;
457 while (!(bp = xlog_get_bp(log, bufblks))) { 457 while (!(bp = xlog_get_bp(log, bufblks))) {
458 bufblks >>= 1; 458 bufblks >>= 1;
459 if (bufblks < log->l_sectBBsize) 459 if (bufblks < log->l_sectBBsize)
460 return ENOMEM; 460 return ENOMEM;
461 } 461 }
462 462
463 for (i = start_blk; i < start_blk + nbblks; i += bufblks) { 463 for (i = start_blk; i < start_blk + nbblks; i += bufblks) {
464 int bcount; 464 int bcount;
465 465
466 bcount = min(bufblks, (start_blk + nbblks - i)); 466 bcount = min(bufblks, (start_blk + nbblks - i));
467 467
468 error = xlog_bread(log, i, bcount, bp, &buf); 468 error = xlog_bread(log, i, bcount, bp, &buf);
469 if (error) 469 if (error)
470 goto out; 470 goto out;
471 471
472 for (j = 0; j < bcount; j++) { 472 for (j = 0; j < bcount; j++) {
473 cycle = xlog_get_cycle(buf); 473 cycle = xlog_get_cycle(buf);
474 if (cycle == stop_on_cycle_no) { 474 if (cycle == stop_on_cycle_no) {
475 *new_blk = i+j; 475 *new_blk = i+j;
476 goto out; 476 goto out;
477 } 477 }
478 478
479 buf += BBSIZE; 479 buf += BBSIZE;
480 } 480 }
481 } 481 }
482 482
483 *new_blk = -1; 483 *new_blk = -1;
484 484
485 out: 485 out:
486 xlog_put_bp(bp); 486 xlog_put_bp(bp);
487 return error; 487 return error;
488 } 488 }
489 489
490 /* 490 /*
491 * Potentially backup over partial log record write. 491 * Potentially backup over partial log record write.
492 * 492 *
493 * In the typical case, last_blk is the number of the block directly after 493 * In the typical case, last_blk is the number of the block directly after
494 * a good log record. Therefore, we subtract one to get the block number 494 * a good log record. Therefore, we subtract one to get the block number
495 * of the last block in the given buffer. extra_bblks contains the number 495 * of the last block in the given buffer. extra_bblks contains the number
496 * of blocks we would have read on a previous read. This happens when the 496 * of blocks we would have read on a previous read. This happens when the
497 * last log record is split over the end of the physical log. 497 * last log record is split over the end of the physical log.
498 * 498 *
499 * extra_bblks is the number of blocks potentially verified on a previous 499 * extra_bblks is the number of blocks potentially verified on a previous
500 * call to this routine. 500 * call to this routine.
501 */ 501 */
502 STATIC int 502 STATIC int
503 xlog_find_verify_log_record( 503 xlog_find_verify_log_record(
504 struct xlog *log, 504 struct xlog *log,
505 xfs_daddr_t start_blk, 505 xfs_daddr_t start_blk,
506 xfs_daddr_t *last_blk, 506 xfs_daddr_t *last_blk,
507 int extra_bblks) 507 int extra_bblks)
508 { 508 {
509 xfs_daddr_t i; 509 xfs_daddr_t i;
510 xfs_buf_t *bp; 510 xfs_buf_t *bp;
511 xfs_caddr_t offset = NULL; 511 xfs_caddr_t offset = NULL;
512 xlog_rec_header_t *head = NULL; 512 xlog_rec_header_t *head = NULL;
513 int error = 0; 513 int error = 0;
514 int smallmem = 0; 514 int smallmem = 0;
515 int num_blks = *last_blk - start_blk; 515 int num_blks = *last_blk - start_blk;
516 int xhdrs; 516 int xhdrs;
517 517
518 ASSERT(start_blk != 0 || *last_blk != start_blk); 518 ASSERT(start_blk != 0 || *last_blk != start_blk);
519 519
520 if (!(bp = xlog_get_bp(log, num_blks))) { 520 if (!(bp = xlog_get_bp(log, num_blks))) {
521 if (!(bp = xlog_get_bp(log, 1))) 521 if (!(bp = xlog_get_bp(log, 1)))
522 return ENOMEM; 522 return ENOMEM;
523 smallmem = 1; 523 smallmem = 1;
524 } else { 524 } else {
525 error = xlog_bread(log, start_blk, num_blks, bp, &offset); 525 error = xlog_bread(log, start_blk, num_blks, bp, &offset);
526 if (error) 526 if (error)
527 goto out; 527 goto out;
528 offset += ((num_blks - 1) << BBSHIFT); 528 offset += ((num_blks - 1) << BBSHIFT);
529 } 529 }
530 530
531 for (i = (*last_blk) - 1; i >= 0; i--) { 531 for (i = (*last_blk) - 1; i >= 0; i--) {
532 if (i < start_blk) { 532 if (i < start_blk) {
533 /* valid log record not found */ 533 /* valid log record not found */
534 xfs_warn(log->l_mp, 534 xfs_warn(log->l_mp,
535 "Log inconsistent (didn't find previous header)"); 535 "Log inconsistent (didn't find previous header)");
536 ASSERT(0); 536 ASSERT(0);
537 error = XFS_ERROR(EIO); 537 error = XFS_ERROR(EIO);
538 goto out; 538 goto out;
539 } 539 }
540 540
541 if (smallmem) { 541 if (smallmem) {
542 error = xlog_bread(log, i, 1, bp, &offset); 542 error = xlog_bread(log, i, 1, bp, &offset);
543 if (error) 543 if (error)
544 goto out; 544 goto out;
545 } 545 }
546 546
547 head = (xlog_rec_header_t *)offset; 547 head = (xlog_rec_header_t *)offset;
548 548
549 if (head->h_magicno == cpu_to_be32(XLOG_HEADER_MAGIC_NUM)) 549 if (head->h_magicno == cpu_to_be32(XLOG_HEADER_MAGIC_NUM))
550 break; 550 break;
551 551
552 if (!smallmem) 552 if (!smallmem)
553 offset -= BBSIZE; 553 offset -= BBSIZE;
554 } 554 }
555 555
556 /* 556 /*
557 * We hit the beginning of the physical log & still no header. Return 557 * We hit the beginning of the physical log & still no header. Return
558 * to caller. If caller can handle a return of -1, then this routine 558 * to caller. If caller can handle a return of -1, then this routine
559 * will be called again for the end of the physical log. 559 * will be called again for the end of the physical log.
560 */ 560 */
561 if (i == -1) { 561 if (i == -1) {
562 error = -1; 562 error = -1;
563 goto out; 563 goto out;
564 } 564 }
565 565
566 /* 566 /*
567 * We have the final block of the good log (the first block 567 * We have the final block of the good log (the first block
568 * of the log record _before_ the head. So we check the uuid. 568 * of the log record _before_ the head. So we check the uuid.
569 */ 569 */
570 if ((error = xlog_header_check_mount(log->l_mp, head))) 570 if ((error = xlog_header_check_mount(log->l_mp, head)))
571 goto out; 571 goto out;
572 572
573 /* 573 /*
574 * We may have found a log record header before we expected one. 574 * We may have found a log record header before we expected one.
575 * last_blk will be the 1st block # with a given cycle #. We may end 575 * last_blk will be the 1st block # with a given cycle #. We may end
576 * up reading an entire log record. In this case, we don't want to 576 * up reading an entire log record. In this case, we don't want to
577 * reset last_blk. Only when last_blk points in the middle of a log 577 * reset last_blk. Only when last_blk points in the middle of a log
578 * record do we update last_blk. 578 * record do we update last_blk.
579 */ 579 */
580 if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) { 580 if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) {
581 uint h_size = be32_to_cpu(head->h_size); 581 uint h_size = be32_to_cpu(head->h_size);
582 582
583 xhdrs = h_size / XLOG_HEADER_CYCLE_SIZE; 583 xhdrs = h_size / XLOG_HEADER_CYCLE_SIZE;
584 if (h_size % XLOG_HEADER_CYCLE_SIZE) 584 if (h_size % XLOG_HEADER_CYCLE_SIZE)
585 xhdrs++; 585 xhdrs++;
586 } else { 586 } else {
587 xhdrs = 1; 587 xhdrs = 1;
588 } 588 }
589 589
590 if (*last_blk - i + extra_bblks != 590 if (*last_blk - i + extra_bblks !=
591 BTOBB(be32_to_cpu(head->h_len)) + xhdrs) 591 BTOBB(be32_to_cpu(head->h_len)) + xhdrs)
592 *last_blk = i; 592 *last_blk = i;
593 593
594 out: 594 out:
595 xlog_put_bp(bp); 595 xlog_put_bp(bp);
596 return error; 596 return error;
597 } 597 }
598 598
599 /* 599 /*
600 * Head is defined to be the point of the log where the next log write 600 * Head is defined to be the point of the log where the next log write
601 * write could go. This means that incomplete LR writes at the end are 601 * write could go. This means that incomplete LR writes at the end are
602 * eliminated when calculating the head. We aren't guaranteed that previous 602 * eliminated when calculating the head. We aren't guaranteed that previous
603 * LR have complete transactions. We only know that a cycle number of 603 * LR have complete transactions. We only know that a cycle number of
604 * current cycle number -1 won't be present in the log if we start writing 604 * current cycle number -1 won't be present in the log if we start writing
605 * from our current block number. 605 * from our current block number.
606 * 606 *
607 * last_blk contains the block number of the first block with a given 607 * last_blk contains the block number of the first block with a given
608 * cycle number. 608 * cycle number.
609 * 609 *
610 * Return: zero if normal, non-zero if error. 610 * Return: zero if normal, non-zero if error.
611 */ 611 */
612 STATIC int 612 STATIC int
613 xlog_find_head( 613 xlog_find_head(
614 struct xlog *log, 614 struct xlog *log,
615 xfs_daddr_t *return_head_blk) 615 xfs_daddr_t *return_head_blk)
616 { 616 {
617 xfs_buf_t *bp; 617 xfs_buf_t *bp;
618 xfs_caddr_t offset; 618 xfs_caddr_t offset;
619 xfs_daddr_t new_blk, first_blk, start_blk, last_blk, head_blk; 619 xfs_daddr_t new_blk, first_blk, start_blk, last_blk, head_blk;
620 int num_scan_bblks; 620 int num_scan_bblks;
621 uint first_half_cycle, last_half_cycle; 621 uint first_half_cycle, last_half_cycle;
622 uint stop_on_cycle; 622 uint stop_on_cycle;
623 int error, log_bbnum = log->l_logBBsize; 623 int error, log_bbnum = log->l_logBBsize;
624 624
625 /* Is the end of the log device zeroed? */ 625 /* Is the end of the log device zeroed? */
626 if ((error = xlog_find_zeroed(log, &first_blk)) == -1) { 626 if ((error = xlog_find_zeroed(log, &first_blk)) == -1) {
627 *return_head_blk = first_blk; 627 *return_head_blk = first_blk;
628 628
629 /* Is the whole lot zeroed? */ 629 /* Is the whole lot zeroed? */
630 if (!first_blk) { 630 if (!first_blk) {
631 /* Linux XFS shouldn't generate totally zeroed logs - 631 /* Linux XFS shouldn't generate totally zeroed logs -
632 * mkfs etc write a dummy unmount record to a fresh 632 * mkfs etc write a dummy unmount record to a fresh
633 * log so we can store the uuid in there 633 * log so we can store the uuid in there
634 */ 634 */
635 xfs_warn(log->l_mp, "totally zeroed log"); 635 xfs_warn(log->l_mp, "totally zeroed log");
636 } 636 }
637 637
638 return 0; 638 return 0;
639 } else if (error) { 639 } else if (error) {
640 xfs_warn(log->l_mp, "empty log check failed"); 640 xfs_warn(log->l_mp, "empty log check failed");
641 return error; 641 return error;
642 } 642 }
643 643
644 first_blk = 0; /* get cycle # of 1st block */ 644 first_blk = 0; /* get cycle # of 1st block */
645 bp = xlog_get_bp(log, 1); 645 bp = xlog_get_bp(log, 1);
646 if (!bp) 646 if (!bp)
647 return ENOMEM; 647 return ENOMEM;
648 648
649 error = xlog_bread(log, 0, 1, bp, &offset); 649 error = xlog_bread(log, 0, 1, bp, &offset);
650 if (error) 650 if (error)
651 goto bp_err; 651 goto bp_err;
652 652
653 first_half_cycle = xlog_get_cycle(offset); 653 first_half_cycle = xlog_get_cycle(offset);
654 654
655 last_blk = head_blk = log_bbnum - 1; /* get cycle # of last block */ 655 last_blk = head_blk = log_bbnum - 1; /* get cycle # of last block */
656 error = xlog_bread(log, last_blk, 1, bp, &offset); 656 error = xlog_bread(log, last_blk, 1, bp, &offset);
657 if (error) 657 if (error)
658 goto bp_err; 658 goto bp_err;
659 659
660 last_half_cycle = xlog_get_cycle(offset); 660 last_half_cycle = xlog_get_cycle(offset);
661 ASSERT(last_half_cycle != 0); 661 ASSERT(last_half_cycle != 0);
662 662
663 /* 663 /*
664 * If the 1st half cycle number is equal to the last half cycle number, 664 * If the 1st half cycle number is equal to the last half cycle number,
665 * then the entire log is stamped with the same cycle number. In this 665 * then the entire log is stamped with the same cycle number. In this
666 * case, head_blk can't be set to zero (which makes sense). The below 666 * case, head_blk can't be set to zero (which makes sense). The below
667 * math doesn't work out properly with head_blk equal to zero. Instead, 667 * math doesn't work out properly with head_blk equal to zero. Instead,
668 * we set it to log_bbnum which is an invalid block number, but this 668 * we set it to log_bbnum which is an invalid block number, but this
669 * value makes the math correct. If head_blk doesn't changed through 669 * value makes the math correct. If head_blk doesn't changed through
670 * all the tests below, *head_blk is set to zero at the very end rather 670 * all the tests below, *head_blk is set to zero at the very end rather
671 * than log_bbnum. In a sense, log_bbnum and zero are the same block 671 * than log_bbnum. In a sense, log_bbnum and zero are the same block
672 * in a circular file. 672 * in a circular file.
673 */ 673 */
674 if (first_half_cycle == last_half_cycle) { 674 if (first_half_cycle == last_half_cycle) {
675 /* 675 /*
676 * In this case we believe that the entire log should have 676 * In this case we believe that the entire log should have
677 * cycle number last_half_cycle. We need to scan backwards 677 * cycle number last_half_cycle. We need to scan backwards
678 * from the end verifying that there are no holes still 678 * from the end verifying that there are no holes still
679 * containing last_half_cycle - 1. If we find such a hole, 679 * containing last_half_cycle - 1. If we find such a hole,
680 * then the start of that hole will be the new head. The 680 * then the start of that hole will be the new head. The
681 * simple case looks like 681 * simple case looks like
682 * x | x ... | x - 1 | x 682 * x | x ... | x - 1 | x
683 * Another case that fits this picture would be 683 * Another case that fits this picture would be
684 * x | x + 1 | x ... | x 684 * x | x + 1 | x ... | x
685 * In this case the head really is somewhere at the end of the 685 * In this case the head really is somewhere at the end of the
686 * log, as one of the latest writes at the beginning was 686 * log, as one of the latest writes at the beginning was
687 * incomplete. 687 * incomplete.
688 * One more case is 688 * One more case is
689 * x | x + 1 | x ... | x - 1 | x 689 * x | x + 1 | x ... | x - 1 | x
690 * This is really the combination of the above two cases, and 690 * This is really the combination of the above two cases, and
691 * the head has to end up at the start of the x-1 hole at the 691 * the head has to end up at the start of the x-1 hole at the
692 * end of the log. 692 * end of the log.
693 * 693 *
694 * In the 256k log case, we will read from the beginning to the 694 * In the 256k log case, we will read from the beginning to the
695 * end of the log and search for cycle numbers equal to x-1. 695 * end of the log and search for cycle numbers equal to x-1.
696 * We don't worry about the x+1 blocks that we encounter, 696 * We don't worry about the x+1 blocks that we encounter,
697 * because we know that they cannot be the head since the log 697 * because we know that they cannot be the head since the log
698 * started with x. 698 * started with x.
699 */ 699 */
700 head_blk = log_bbnum; 700 head_blk = log_bbnum;
701 stop_on_cycle = last_half_cycle - 1; 701 stop_on_cycle = last_half_cycle - 1;
702 } else { 702 } else {
703 /* 703 /*
704 * In this case we want to find the first block with cycle 704 * In this case we want to find the first block with cycle
705 * number matching last_half_cycle. We expect the log to be 705 * number matching last_half_cycle. We expect the log to be
706 * some variation on 706 * some variation on
707 * x + 1 ... | x ... | x 707 * x + 1 ... | x ... | x
708 * The first block with cycle number x (last_half_cycle) will 708 * The first block with cycle number x (last_half_cycle) will
709 * be where the new head belongs. First we do a binary search 709 * be where the new head belongs. First we do a binary search
710 * for the first occurrence of last_half_cycle. The binary 710 * for the first occurrence of last_half_cycle. The binary
711 * search may not be totally accurate, so then we scan back 711 * search may not be totally accurate, so then we scan back
712 * from there looking for occurrences of last_half_cycle before 712 * from there looking for occurrences of last_half_cycle before
713 * us. If that backwards scan wraps around the beginning of 713 * us. If that backwards scan wraps around the beginning of
714 * the log, then we look for occurrences of last_half_cycle - 1 714 * the log, then we look for occurrences of last_half_cycle - 1
715 * at the end of the log. The cases we're looking for look 715 * at the end of the log. The cases we're looking for look
716 * like 716 * like
717 * v binary search stopped here 717 * v binary search stopped here
718 * x + 1 ... | x | x + 1 | x ... | x 718 * x + 1 ... | x | x + 1 | x ... | x
719 * ^ but we want to locate this spot 719 * ^ but we want to locate this spot
720 * or 720 * or
721 * <---------> less than scan distance 721 * <---------> less than scan distance
722 * x + 1 ... | x ... | x - 1 | x 722 * x + 1 ... | x ... | x - 1 | x
723 * ^ we want to locate this spot 723 * ^ we want to locate this spot
724 */ 724 */
725 stop_on_cycle = last_half_cycle; 725 stop_on_cycle = last_half_cycle;
726 if ((error = xlog_find_cycle_start(log, bp, first_blk, 726 if ((error = xlog_find_cycle_start(log, bp, first_blk,
727 &head_blk, last_half_cycle))) 727 &head_blk, last_half_cycle)))
728 goto bp_err; 728 goto bp_err;
729 } 729 }
730 730
731 /* 731 /*
732 * Now validate the answer. Scan back some number of maximum possible 732 * Now validate the answer. Scan back some number of maximum possible
733 * blocks and make sure each one has the expected cycle number. The 733 * blocks and make sure each one has the expected cycle number. The
734 * maximum is determined by the total possible amount of buffering 734 * maximum is determined by the total possible amount of buffering
735 * in the in-core log. The following number can be made tighter if 735 * in the in-core log. The following number can be made tighter if
736 * we actually look at the block size of the filesystem. 736 * we actually look at the block size of the filesystem.
737 */ 737 */
738 num_scan_bblks = XLOG_TOTAL_REC_SHIFT(log); 738 num_scan_bblks = XLOG_TOTAL_REC_SHIFT(log);
739 if (head_blk >= num_scan_bblks) { 739 if (head_blk >= num_scan_bblks) {
740 /* 740 /*
741 * We are guaranteed that the entire check can be performed 741 * We are guaranteed that the entire check can be performed
742 * in one buffer. 742 * in one buffer.
743 */ 743 */
744 start_blk = head_blk - num_scan_bblks; 744 start_blk = head_blk - num_scan_bblks;
745 if ((error = xlog_find_verify_cycle(log, 745 if ((error = xlog_find_verify_cycle(log,
746 start_blk, num_scan_bblks, 746 start_blk, num_scan_bblks,
747 stop_on_cycle, &new_blk))) 747 stop_on_cycle, &new_blk)))
748 goto bp_err; 748 goto bp_err;
749 if (new_blk != -1) 749 if (new_blk != -1)
750 head_blk = new_blk; 750 head_blk = new_blk;
751 } else { /* need to read 2 parts of log */ 751 } else { /* need to read 2 parts of log */
752 /* 752 /*
753 * We are going to scan backwards in the log in two parts. 753 * We are going to scan backwards in the log in two parts.
754 * First we scan the physical end of the log. In this part 754 * First we scan the physical end of the log. In this part
755 * of the log, we are looking for blocks with cycle number 755 * of the log, we are looking for blocks with cycle number
756 * last_half_cycle - 1. 756 * last_half_cycle - 1.
757 * If we find one, then we know that the log starts there, as 757 * If we find one, then we know that the log starts there, as
758 * we've found a hole that didn't get written in going around 758 * we've found a hole that didn't get written in going around
759 * the end of the physical log. The simple case for this is 759 * the end of the physical log. The simple case for this is
760 * x + 1 ... | x ... | x - 1 | x 760 * x + 1 ... | x ... | x - 1 | x
761 * <---------> less than scan distance 761 * <---------> less than scan distance
762 * If all of the blocks at the end of the log have cycle number 762 * If all of the blocks at the end of the log have cycle number
763 * last_half_cycle, then we check the blocks at the start of 763 * last_half_cycle, then we check the blocks at the start of
764 * the log looking for occurrences of last_half_cycle. If we 764 * the log looking for occurrences of last_half_cycle. If we
765 * find one, then our current estimate for the location of the 765 * find one, then our current estimate for the location of the
766 * first occurrence of last_half_cycle is wrong and we move 766 * first occurrence of last_half_cycle is wrong and we move
767 * back to the hole we've found. This case looks like 767 * back to the hole we've found. This case looks like
768 * x + 1 ... | x | x + 1 | x ... 768 * x + 1 ... | x | x + 1 | x ...
769 * ^ binary search stopped here 769 * ^ binary search stopped here
770 * Another case we need to handle that only occurs in 256k 770 * Another case we need to handle that only occurs in 256k
771 * logs is 771 * logs is
772 * x + 1 ... | x ... | x+1 | x ... 772 * x + 1 ... | x ... | x+1 | x ...
773 * ^ binary search stops here 773 * ^ binary search stops here
774 * In a 256k log, the scan at the end of the log will see the 774 * In a 256k log, the scan at the end of the log will see the
775 * x + 1 blocks. We need to skip past those since that is 775 * x + 1 blocks. We need to skip past those since that is
776 * certainly not the head of the log. By searching for 776 * certainly not the head of the log. By searching for
777 * last_half_cycle-1 we accomplish that. 777 * last_half_cycle-1 we accomplish that.
778 */ 778 */
779 ASSERT(head_blk <= INT_MAX && 779 ASSERT(head_blk <= INT_MAX &&
780 (xfs_daddr_t) num_scan_bblks >= head_blk); 780 (xfs_daddr_t) num_scan_bblks >= head_blk);
781 start_blk = log_bbnum - (num_scan_bblks - head_blk); 781 start_blk = log_bbnum - (num_scan_bblks - head_blk);
782 if ((error = xlog_find_verify_cycle(log, start_blk, 782 if ((error = xlog_find_verify_cycle(log, start_blk,
783 num_scan_bblks - (int)head_blk, 783 num_scan_bblks - (int)head_blk,
784 (stop_on_cycle - 1), &new_blk))) 784 (stop_on_cycle - 1), &new_blk)))
785 goto bp_err; 785 goto bp_err;
786 if (new_blk != -1) { 786 if (new_blk != -1) {
787 head_blk = new_blk; 787 head_blk = new_blk;
788 goto validate_head; 788 goto validate_head;
789 } 789 }
790 790
791 /* 791 /*
792 * Scan beginning of log now. The last part of the physical 792 * Scan beginning of log now. The last part of the physical
793 * log is good. This scan needs to verify that it doesn't find 793 * log is good. This scan needs to verify that it doesn't find
794 * the last_half_cycle. 794 * the last_half_cycle.
795 */ 795 */
796 start_blk = 0; 796 start_blk = 0;
797 ASSERT(head_blk <= INT_MAX); 797 ASSERT(head_blk <= INT_MAX);
798 if ((error = xlog_find_verify_cycle(log, 798 if ((error = xlog_find_verify_cycle(log,
799 start_blk, (int)head_blk, 799 start_blk, (int)head_blk,
800 stop_on_cycle, &new_blk))) 800 stop_on_cycle, &new_blk)))
801 goto bp_err; 801 goto bp_err;
802 if (new_blk != -1) 802 if (new_blk != -1)
803 head_blk = new_blk; 803 head_blk = new_blk;
804 } 804 }
805 805
806 validate_head: 806 validate_head:
807 /* 807 /*
808 * Now we need to make sure head_blk is not pointing to a block in 808 * Now we need to make sure head_blk is not pointing to a block in
809 * the middle of a log record. 809 * the middle of a log record.
810 */ 810 */
811 num_scan_bblks = XLOG_REC_SHIFT(log); 811 num_scan_bblks = XLOG_REC_SHIFT(log);
812 if (head_blk >= num_scan_bblks) { 812 if (head_blk >= num_scan_bblks) {
813 start_blk = head_blk - num_scan_bblks; /* don't read head_blk */ 813 start_blk = head_blk - num_scan_bblks; /* don't read head_blk */
814 814
815 /* start ptr at last block ptr before head_blk */ 815 /* start ptr at last block ptr before head_blk */
816 if ((error = xlog_find_verify_log_record(log, start_blk, 816 if ((error = xlog_find_verify_log_record(log, start_blk,
817 &head_blk, 0)) == -1) { 817 &head_blk, 0)) == -1) {
818 error = XFS_ERROR(EIO); 818 error = XFS_ERROR(EIO);
819 goto bp_err; 819 goto bp_err;
820 } else if (error) 820 } else if (error)
821 goto bp_err; 821 goto bp_err;
822 } else { 822 } else {
823 start_blk = 0; 823 start_blk = 0;
824 ASSERT(head_blk <= INT_MAX); 824 ASSERT(head_blk <= INT_MAX);
825 if ((error = xlog_find_verify_log_record(log, start_blk, 825 if ((error = xlog_find_verify_log_record(log, start_blk,
826 &head_blk, 0)) == -1) { 826 &head_blk, 0)) == -1) {
827 /* We hit the beginning of the log during our search */ 827 /* We hit the beginning of the log during our search */
828 start_blk = log_bbnum - (num_scan_bblks - head_blk); 828 start_blk = log_bbnum - (num_scan_bblks - head_blk);
829 new_blk = log_bbnum; 829 new_blk = log_bbnum;
830 ASSERT(start_blk <= INT_MAX && 830 ASSERT(start_blk <= INT_MAX &&
831 (xfs_daddr_t) log_bbnum-start_blk >= 0); 831 (xfs_daddr_t) log_bbnum-start_blk >= 0);
832 ASSERT(head_blk <= INT_MAX); 832 ASSERT(head_blk <= INT_MAX);
833 if ((error = xlog_find_verify_log_record(log, 833 if ((error = xlog_find_verify_log_record(log,
834 start_blk, &new_blk, 834 start_blk, &new_blk,
835 (int)head_blk)) == -1) { 835 (int)head_blk)) == -1) {
836 error = XFS_ERROR(EIO); 836 error = XFS_ERROR(EIO);
837 goto bp_err; 837 goto bp_err;
838 } else if (error) 838 } else if (error)
839 goto bp_err; 839 goto bp_err;
840 if (new_blk != log_bbnum) 840 if (new_blk != log_bbnum)
841 head_blk = new_blk; 841 head_blk = new_blk;
842 } else if (error) 842 } else if (error)
843 goto bp_err; 843 goto bp_err;
844 } 844 }
845 845
846 xlog_put_bp(bp); 846 xlog_put_bp(bp);
847 if (head_blk == log_bbnum) 847 if (head_blk == log_bbnum)
848 *return_head_blk = 0; 848 *return_head_blk = 0;
849 else 849 else
850 *return_head_blk = head_blk; 850 *return_head_blk = head_blk;
851 /* 851 /*
852 * When returning here, we have a good block number. Bad block 852 * When returning here, we have a good block number. Bad block
853 * means that during a previous crash, we didn't have a clean break 853 * means that during a previous crash, we didn't have a clean break
854 * from cycle number N to cycle number N-1. In this case, we need 854 * from cycle number N to cycle number N-1. In this case, we need
855 * to find the first block with cycle number N-1. 855 * to find the first block with cycle number N-1.
856 */ 856 */
857 return 0; 857 return 0;
858 858
859 bp_err: 859 bp_err:
860 xlog_put_bp(bp); 860 xlog_put_bp(bp);
861 861
862 if (error) 862 if (error)
863 xfs_warn(log->l_mp, "failed to find log head"); 863 xfs_warn(log->l_mp, "failed to find log head");
864 return error; 864 return error;
865 } 865 }
866 866
867 /* 867 /*
868 * Find the sync block number or the tail of the log. 868 * Find the sync block number or the tail of the log.
869 * 869 *
870 * This will be the block number of the last record to have its 870 * This will be the block number of the last record to have its
871 * associated buffers synced to disk. Every log record header has 871 * associated buffers synced to disk. Every log record header has
872 * a sync lsn embedded in it. LSNs hold block numbers, so it is easy 872 * a sync lsn embedded in it. LSNs hold block numbers, so it is easy
873 * to get a sync block number. The only concern is to figure out which 873 * to get a sync block number. The only concern is to figure out which
874 * log record header to believe. 874 * log record header to believe.
875 * 875 *
876 * The following algorithm uses the log record header with the largest 876 * The following algorithm uses the log record header with the largest
877 * lsn. The entire log record does not need to be valid. We only care 877 * lsn. The entire log record does not need to be valid. We only care
878 * that the header is valid. 878 * that the header is valid.
879 * 879 *
880 * We could speed up search by using current head_blk buffer, but it is not 880 * We could speed up search by using current head_blk buffer, but it is not
881 * available. 881 * available.
882 */ 882 */
883 STATIC int 883 STATIC int
884 xlog_find_tail( 884 xlog_find_tail(
885 struct xlog *log, 885 struct xlog *log,
886 xfs_daddr_t *head_blk, 886 xfs_daddr_t *head_blk,
887 xfs_daddr_t *tail_blk) 887 xfs_daddr_t *tail_blk)
888 { 888 {
889 xlog_rec_header_t *rhead; 889 xlog_rec_header_t *rhead;
890 xlog_op_header_t *op_head; 890 xlog_op_header_t *op_head;
891 xfs_caddr_t offset = NULL; 891 xfs_caddr_t offset = NULL;
892 xfs_buf_t *bp; 892 xfs_buf_t *bp;
893 int error, i, found; 893 int error, i, found;
894 xfs_daddr_t umount_data_blk; 894 xfs_daddr_t umount_data_blk;
895 xfs_daddr_t after_umount_blk; 895 xfs_daddr_t after_umount_blk;
896 xfs_lsn_t tail_lsn; 896 xfs_lsn_t tail_lsn;
897 int hblks; 897 int hblks;
898 898
899 found = 0; 899 found = 0;
900 900
901 /* 901 /*
902 * Find previous log record 902 * Find previous log record
903 */ 903 */
904 if ((error = xlog_find_head(log, head_blk))) 904 if ((error = xlog_find_head(log, head_blk)))
905 return error; 905 return error;
906 906
907 bp = xlog_get_bp(log, 1); 907 bp = xlog_get_bp(log, 1);
908 if (!bp) 908 if (!bp)
909 return ENOMEM; 909 return ENOMEM;
910 if (*head_blk == 0) { /* special case */ 910 if (*head_blk == 0) { /* special case */
911 error = xlog_bread(log, 0, 1, bp, &offset); 911 error = xlog_bread(log, 0, 1, bp, &offset);
912 if (error) 912 if (error)
913 goto done; 913 goto done;
914 914
915 if (xlog_get_cycle(offset) == 0) { 915 if (xlog_get_cycle(offset) == 0) {
916 *tail_blk = 0; 916 *tail_blk = 0;
917 /* leave all other log inited values alone */ 917 /* leave all other log inited values alone */
918 goto done; 918 goto done;
919 } 919 }
920 } 920 }
921 921
922 /* 922 /*
923 * Search backwards looking for log record header block 923 * Search backwards looking for log record header block
924 */ 924 */
925 ASSERT(*head_blk < INT_MAX); 925 ASSERT(*head_blk < INT_MAX);
926 for (i = (int)(*head_blk) - 1; i >= 0; i--) { 926 for (i = (int)(*head_blk) - 1; i >= 0; i--) {
927 error = xlog_bread(log, i, 1, bp, &offset); 927 error = xlog_bread(log, i, 1, bp, &offset);
928 if (error) 928 if (error)
929 goto done; 929 goto done;
930 930
931 if (*(__be32 *)offset == cpu_to_be32(XLOG_HEADER_MAGIC_NUM)) { 931 if (*(__be32 *)offset == cpu_to_be32(XLOG_HEADER_MAGIC_NUM)) {
932 found = 1; 932 found = 1;
933 break; 933 break;
934 } 934 }
935 } 935 }
936 /* 936 /*
937 * If we haven't found the log record header block, start looking 937 * If we haven't found the log record header block, start looking
938 * again from the end of the physical log. XXXmiken: There should be 938 * again from the end of the physical log. XXXmiken: There should be
939 * a check here to make sure we didn't search more than N blocks in 939 * a check here to make sure we didn't search more than N blocks in
940 * the previous code. 940 * the previous code.
941 */ 941 */
942 if (!found) { 942 if (!found) {
943 for (i = log->l_logBBsize - 1; i >= (int)(*head_blk); i--) { 943 for (i = log->l_logBBsize - 1; i >= (int)(*head_blk); i--) {
944 error = xlog_bread(log, i, 1, bp, &offset); 944 error = xlog_bread(log, i, 1, bp, &offset);
945 if (error) 945 if (error)
946 goto done; 946 goto done;
947 947
948 if (*(__be32 *)offset == 948 if (*(__be32 *)offset ==
949 cpu_to_be32(XLOG_HEADER_MAGIC_NUM)) { 949 cpu_to_be32(XLOG_HEADER_MAGIC_NUM)) {
950 found = 2; 950 found = 2;
951 break; 951 break;
952 } 952 }
953 } 953 }
954 } 954 }
955 if (!found) { 955 if (!found) {
956 xfs_warn(log->l_mp, "%s: couldn't find sync record", __func__); 956 xfs_warn(log->l_mp, "%s: couldn't find sync record", __func__);
957 ASSERT(0); 957 ASSERT(0);
958 return XFS_ERROR(EIO); 958 return XFS_ERROR(EIO);
959 } 959 }
960 960
961 /* find blk_no of tail of log */ 961 /* find blk_no of tail of log */
962 rhead = (xlog_rec_header_t *)offset; 962 rhead = (xlog_rec_header_t *)offset;
963 *tail_blk = BLOCK_LSN(be64_to_cpu(rhead->h_tail_lsn)); 963 *tail_blk = BLOCK_LSN(be64_to_cpu(rhead->h_tail_lsn));
964 964
965 /* 965 /*
966 * Reset log values according to the state of the log when we 966 * Reset log values according to the state of the log when we
967 * crashed. In the case where head_blk == 0, we bump curr_cycle 967 * crashed. In the case where head_blk == 0, we bump curr_cycle
968 * one because the next write starts a new cycle rather than 968 * one because the next write starts a new cycle rather than
969 * continuing the cycle of the last good log record. At this 969 * continuing the cycle of the last good log record. At this
970 * point we have guaranteed that all partial log records have been 970 * point we have guaranteed that all partial log records have been
971 * accounted for. Therefore, we know that the last good log record 971 * accounted for. Therefore, we know that the last good log record
972 * written was complete and ended exactly on the end boundary 972 * written was complete and ended exactly on the end boundary
973 * of the physical log. 973 * of the physical log.
974 */ 974 */
975 log->l_prev_block = i; 975 log->l_prev_block = i;
976 log->l_curr_block = (int)*head_blk; 976 log->l_curr_block = (int)*head_blk;
977 log->l_curr_cycle = be32_to_cpu(rhead->h_cycle); 977 log->l_curr_cycle = be32_to_cpu(rhead->h_cycle);
978 if (found == 2) 978 if (found == 2)
979 log->l_curr_cycle++; 979 log->l_curr_cycle++;
980 atomic64_set(&log->l_tail_lsn, be64_to_cpu(rhead->h_tail_lsn)); 980 atomic64_set(&log->l_tail_lsn, be64_to_cpu(rhead->h_tail_lsn));
981 atomic64_set(&log->l_last_sync_lsn, be64_to_cpu(rhead->h_lsn)); 981 atomic64_set(&log->l_last_sync_lsn, be64_to_cpu(rhead->h_lsn));
982 xlog_assign_grant_head(&log->l_reserve_head.grant, log->l_curr_cycle, 982 xlog_assign_grant_head(&log->l_reserve_head.grant, log->l_curr_cycle,
983 BBTOB(log->l_curr_block)); 983 BBTOB(log->l_curr_block));
984 xlog_assign_grant_head(&log->l_write_head.grant, log->l_curr_cycle, 984 xlog_assign_grant_head(&log->l_write_head.grant, log->l_curr_cycle,
985 BBTOB(log->l_curr_block)); 985 BBTOB(log->l_curr_block));
986 986
987 /* 987 /*
988 * Look for unmount record. If we find it, then we know there 988 * Look for unmount record. If we find it, then we know there
989 * was a clean unmount. Since 'i' could be the last block in 989 * was a clean unmount. Since 'i' could be the last block in
990 * the physical log, we convert to a log block before comparing 990 * the physical log, we convert to a log block before comparing
991 * to the head_blk. 991 * to the head_blk.
992 * 992 *
993 * Save the current tail lsn to use to pass to 993 * Save the current tail lsn to use to pass to
994 * xlog_clear_stale_blocks() below. We won't want to clear the 994 * xlog_clear_stale_blocks() below. We won't want to clear the
995 * unmount record if there is one, so we pass the lsn of the 995 * unmount record if there is one, so we pass the lsn of the
996 * unmount record rather than the block after it. 996 * unmount record rather than the block after it.
997 */ 997 */
998 if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) { 998 if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) {
999 int h_size = be32_to_cpu(rhead->h_size); 999 int h_size = be32_to_cpu(rhead->h_size);
1000 int h_version = be32_to_cpu(rhead->h_version); 1000 int h_version = be32_to_cpu(rhead->h_version);
1001 1001
1002 if ((h_version & XLOG_VERSION_2) && 1002 if ((h_version & XLOG_VERSION_2) &&
1003 (h_size > XLOG_HEADER_CYCLE_SIZE)) { 1003 (h_size > XLOG_HEADER_CYCLE_SIZE)) {
1004 hblks = h_size / XLOG_HEADER_CYCLE_SIZE; 1004 hblks = h_size / XLOG_HEADER_CYCLE_SIZE;
1005 if (h_size % XLOG_HEADER_CYCLE_SIZE) 1005 if (h_size % XLOG_HEADER_CYCLE_SIZE)
1006 hblks++; 1006 hblks++;
1007 } else { 1007 } else {
1008 hblks = 1; 1008 hblks = 1;
1009 } 1009 }
1010 } else { 1010 } else {
1011 hblks = 1; 1011 hblks = 1;
1012 } 1012 }
1013 after_umount_blk = (i + hblks + (int) 1013 after_umount_blk = (i + hblks + (int)
1014 BTOBB(be32_to_cpu(rhead->h_len))) % log->l_logBBsize; 1014 BTOBB(be32_to_cpu(rhead->h_len))) % log->l_logBBsize;
1015 tail_lsn = atomic64_read(&log->l_tail_lsn); 1015 tail_lsn = atomic64_read(&log->l_tail_lsn);
1016 if (*head_blk == after_umount_blk && 1016 if (*head_blk == after_umount_blk &&
1017 be32_to_cpu(rhead->h_num_logops) == 1) { 1017 be32_to_cpu(rhead->h_num_logops) == 1) {
1018 umount_data_blk = (i + hblks) % log->l_logBBsize; 1018 umount_data_blk = (i + hblks) % log->l_logBBsize;
1019 error = xlog_bread(log, umount_data_blk, 1, bp, &offset); 1019 error = xlog_bread(log, umount_data_blk, 1, bp, &offset);
1020 if (error) 1020 if (error)
1021 goto done; 1021 goto done;
1022 1022
1023 op_head = (xlog_op_header_t *)offset; 1023 op_head = (xlog_op_header_t *)offset;
1024 if (op_head->oh_flags & XLOG_UNMOUNT_TRANS) { 1024 if (op_head->oh_flags & XLOG_UNMOUNT_TRANS) {
1025 /* 1025 /*
1026 * Set tail and last sync so that newly written 1026 * Set tail and last sync so that newly written
1027 * log records will point recovery to after the 1027 * log records will point recovery to after the
1028 * current unmount record. 1028 * current unmount record.
1029 */ 1029 */
1030 xlog_assign_atomic_lsn(&log->l_tail_lsn, 1030 xlog_assign_atomic_lsn(&log->l_tail_lsn,
1031 log->l_curr_cycle, after_umount_blk); 1031 log->l_curr_cycle, after_umount_blk);
1032 xlog_assign_atomic_lsn(&log->l_last_sync_lsn, 1032 xlog_assign_atomic_lsn(&log->l_last_sync_lsn,
1033 log->l_curr_cycle, after_umount_blk); 1033 log->l_curr_cycle, after_umount_blk);
1034 *tail_blk = after_umount_blk; 1034 *tail_blk = after_umount_blk;
1035 1035
1036 /* 1036 /*
1037 * Note that the unmount was clean. If the unmount 1037 * Note that the unmount was clean. If the unmount
1038 * was not clean, we need to know this to rebuild the 1038 * was not clean, we need to know this to rebuild the
1039 * superblock counters from the perag headers if we 1039 * superblock counters from the perag headers if we
1040 * have a filesystem using non-persistent counters. 1040 * have a filesystem using non-persistent counters.
1041 */ 1041 */
1042 log->l_mp->m_flags |= XFS_MOUNT_WAS_CLEAN; 1042 log->l_mp->m_flags |= XFS_MOUNT_WAS_CLEAN;
1043 } 1043 }
1044 } 1044 }
1045 1045
1046 /* 1046 /*
1047 * Make sure that there are no blocks in front of the head 1047 * Make sure that there are no blocks in front of the head
1048 * with the same cycle number as the head. This can happen 1048 * with the same cycle number as the head. This can happen
1049 * because we allow multiple outstanding log writes concurrently, 1049 * because we allow multiple outstanding log writes concurrently,
1050 * and the later writes might make it out before earlier ones. 1050 * and the later writes might make it out before earlier ones.
1051 * 1051 *
1052 * We use the lsn from before modifying it so that we'll never 1052 * We use the lsn from before modifying it so that we'll never
1053 * overwrite the unmount record after a clean unmount. 1053 * overwrite the unmount record after a clean unmount.
1054 * 1054 *
1055 * Do this only if we are going to recover the filesystem 1055 * Do this only if we are going to recover the filesystem
1056 * 1056 *
1057 * NOTE: This used to say "if (!readonly)" 1057 * NOTE: This used to say "if (!readonly)"
1058 * However on Linux, we can & do recover a read-only filesystem. 1058 * However on Linux, we can & do recover a read-only filesystem.
1059 * We only skip recovery if NORECOVERY is specified on mount, 1059 * We only skip recovery if NORECOVERY is specified on mount,
1060 * in which case we would not be here. 1060 * in which case we would not be here.
1061 * 1061 *
1062 * But... if the -device- itself is readonly, just skip this. 1062 * But... if the -device- itself is readonly, just skip this.
1063 * We can't recover this device anyway, so it won't matter. 1063 * We can't recover this device anyway, so it won't matter.
1064 */ 1064 */
1065 if (!xfs_readonly_buftarg(log->l_mp->m_logdev_targp)) 1065 if (!xfs_readonly_buftarg(log->l_mp->m_logdev_targp))
1066 error = xlog_clear_stale_blocks(log, tail_lsn); 1066 error = xlog_clear_stale_blocks(log, tail_lsn);
1067 1067
1068 done: 1068 done:
1069 xlog_put_bp(bp); 1069 xlog_put_bp(bp);
1070 1070
1071 if (error) 1071 if (error)
1072 xfs_warn(log->l_mp, "failed to locate log tail"); 1072 xfs_warn(log->l_mp, "failed to locate log tail");
1073 return error; 1073 return error;
1074 } 1074 }
1075 1075
1076 /* 1076 /*
1077 * Is the log zeroed at all? 1077 * Is the log zeroed at all?
1078 * 1078 *
1079 * The last binary search should be changed to perform an X block read 1079 * The last binary search should be changed to perform an X block read
1080 * once X becomes small enough. You can then search linearly through 1080 * once X becomes small enough. You can then search linearly through
1081 * the X blocks. This will cut down on the number of reads we need to do. 1081 * the X blocks. This will cut down on the number of reads we need to do.
1082 * 1082 *
1083 * If the log is partially zeroed, this routine will pass back the blkno 1083 * If the log is partially zeroed, this routine will pass back the blkno
1084 * of the first block with cycle number 0. It won't have a complete LR 1084 * of the first block with cycle number 0. It won't have a complete LR
1085 * preceding it. 1085 * preceding it.
1086 * 1086 *
1087 * Return: 1087 * Return:
1088 * 0 => the log is completely written to 1088 * 0 => the log is completely written to
1089 * -1 => use *blk_no as the first block of the log 1089 * -1 => use *blk_no as the first block of the log
1090 * >0 => error has occurred 1090 * >0 => error has occurred
1091 */ 1091 */
1092 STATIC int 1092 STATIC int
1093 xlog_find_zeroed( 1093 xlog_find_zeroed(
1094 struct xlog *log, 1094 struct xlog *log,
1095 xfs_daddr_t *blk_no) 1095 xfs_daddr_t *blk_no)
1096 { 1096 {
1097 xfs_buf_t *bp; 1097 xfs_buf_t *bp;
1098 xfs_caddr_t offset; 1098 xfs_caddr_t offset;
1099 uint first_cycle, last_cycle; 1099 uint first_cycle, last_cycle;
1100 xfs_daddr_t new_blk, last_blk, start_blk; 1100 xfs_daddr_t new_blk, last_blk, start_blk;
1101 xfs_daddr_t num_scan_bblks; 1101 xfs_daddr_t num_scan_bblks;
1102 int error, log_bbnum = log->l_logBBsize; 1102 int error, log_bbnum = log->l_logBBsize;
1103 1103
1104 *blk_no = 0; 1104 *blk_no = 0;
1105 1105
1106 /* check totally zeroed log */ 1106 /* check totally zeroed log */
1107 bp = xlog_get_bp(log, 1); 1107 bp = xlog_get_bp(log, 1);
1108 if (!bp) 1108 if (!bp)
1109 return ENOMEM; 1109 return ENOMEM;
1110 error = xlog_bread(log, 0, 1, bp, &offset); 1110 error = xlog_bread(log, 0, 1, bp, &offset);
1111 if (error) 1111 if (error)
1112 goto bp_err; 1112 goto bp_err;
1113 1113
1114 first_cycle = xlog_get_cycle(offset); 1114 first_cycle = xlog_get_cycle(offset);
1115 if (first_cycle == 0) { /* completely zeroed log */ 1115 if (first_cycle == 0) { /* completely zeroed log */
1116 *blk_no = 0; 1116 *blk_no = 0;
1117 xlog_put_bp(bp); 1117 xlog_put_bp(bp);
1118 return -1; 1118 return -1;
1119 } 1119 }
1120 1120
1121 /* check partially zeroed log */ 1121 /* check partially zeroed log */
1122 error = xlog_bread(log, log_bbnum-1, 1, bp, &offset); 1122 error = xlog_bread(log, log_bbnum-1, 1, bp, &offset);
1123 if (error) 1123 if (error)
1124 goto bp_err; 1124 goto bp_err;
1125 1125
1126 last_cycle = xlog_get_cycle(offset); 1126 last_cycle = xlog_get_cycle(offset);
1127 if (last_cycle != 0) { /* log completely written to */ 1127 if (last_cycle != 0) { /* log completely written to */
1128 xlog_put_bp(bp); 1128 xlog_put_bp(bp);
1129 return 0; 1129 return 0;
1130 } else if (first_cycle != 1) { 1130 } else if (first_cycle != 1) {
1131 /* 1131 /*
1132 * If the cycle of the last block is zero, the cycle of 1132 * If the cycle of the last block is zero, the cycle of
1133 * the first block must be 1. If it's not, maybe we're 1133 * the first block must be 1. If it's not, maybe we're
1134 * not looking at a log... Bail out. 1134 * not looking at a log... Bail out.
1135 */ 1135 */
1136 xfs_warn(log->l_mp, 1136 xfs_warn(log->l_mp,
1137 "Log inconsistent or not a log (last==0, first!=1)"); 1137 "Log inconsistent or not a log (last==0, first!=1)");
1138 return XFS_ERROR(EINVAL); 1138 return XFS_ERROR(EINVAL);
1139 } 1139 }
1140 1140
1141 /* we have a partially zeroed log */ 1141 /* we have a partially zeroed log */
1142 last_blk = log_bbnum-1; 1142 last_blk = log_bbnum-1;
1143 if ((error = xlog_find_cycle_start(log, bp, 0, &last_blk, 0))) 1143 if ((error = xlog_find_cycle_start(log, bp, 0, &last_blk, 0)))
1144 goto bp_err; 1144 goto bp_err;
1145 1145
1146 /* 1146 /*
1147 * Validate the answer. Because there is no way to guarantee that 1147 * Validate the answer. Because there is no way to guarantee that
1148 * the entire log is made up of log records which are the same size, 1148 * the entire log is made up of log records which are the same size,
1149 * we scan over the defined maximum blocks. At this point, the maximum 1149 * we scan over the defined maximum blocks. At this point, the maximum
1150 * is not chosen to mean anything special. XXXmiken 1150 * is not chosen to mean anything special. XXXmiken
1151 */ 1151 */
1152 num_scan_bblks = XLOG_TOTAL_REC_SHIFT(log); 1152 num_scan_bblks = XLOG_TOTAL_REC_SHIFT(log);
1153 ASSERT(num_scan_bblks <= INT_MAX); 1153 ASSERT(num_scan_bblks <= INT_MAX);
1154 1154
1155 if (last_blk < num_scan_bblks) 1155 if (last_blk < num_scan_bblks)
1156 num_scan_bblks = last_blk; 1156 num_scan_bblks = last_blk;
1157 start_blk = last_blk - num_scan_bblks; 1157 start_blk = last_blk - num_scan_bblks;
1158 1158
1159 /* 1159 /*
1160 * We search for any instances of cycle number 0 that occur before 1160 * We search for any instances of cycle number 0 that occur before
1161 * our current estimate of the head. What we're trying to detect is 1161 * our current estimate of the head. What we're trying to detect is
1162 * 1 ... | 0 | 1 | 0... 1162 * 1 ... | 0 | 1 | 0...
1163 * ^ binary search ends here 1163 * ^ binary search ends here
1164 */ 1164 */
1165 if ((error = xlog_find_verify_cycle(log, start_blk, 1165 if ((error = xlog_find_verify_cycle(log, start_blk,
1166 (int)num_scan_bblks, 0, &new_blk))) 1166 (int)num_scan_bblks, 0, &new_blk)))
1167 goto bp_err; 1167 goto bp_err;
1168 if (new_blk != -1) 1168 if (new_blk != -1)
1169 last_blk = new_blk; 1169 last_blk = new_blk;
1170 1170
1171 /* 1171 /*
1172 * Potentially backup over partial log record write. We don't need 1172 * Potentially backup over partial log record write. We don't need
1173 * to search the end of the log because we know it is zero. 1173 * to search the end of the log because we know it is zero.
1174 */ 1174 */
1175 if ((error = xlog_find_verify_log_record(log, start_blk, 1175 if ((error = xlog_find_verify_log_record(log, start_blk,
1176 &last_blk, 0)) == -1) { 1176 &last_blk, 0)) == -1) {
1177 error = XFS_ERROR(EIO); 1177 error = XFS_ERROR(EIO);
1178 goto bp_err; 1178 goto bp_err;
1179 } else if (error) 1179 } else if (error)
1180 goto bp_err; 1180 goto bp_err;
1181 1181
1182 *blk_no = last_blk; 1182 *blk_no = last_blk;
1183 bp_err: 1183 bp_err:
1184 xlog_put_bp(bp); 1184 xlog_put_bp(bp);
1185 if (error) 1185 if (error)
1186 return error; 1186 return error;
1187 return -1; 1187 return -1;
1188 } 1188 }
1189 1189
1190 /* 1190 /*
1191 * These are simple subroutines used by xlog_clear_stale_blocks() below 1191 * These are simple subroutines used by xlog_clear_stale_blocks() below
1192 * to initialize a buffer full of empty log record headers and write 1192 * to initialize a buffer full of empty log record headers and write
1193 * them into the log. 1193 * them into the log.
1194 */ 1194 */
1195 STATIC void 1195 STATIC void
1196 xlog_add_record( 1196 xlog_add_record(
1197 struct xlog *log, 1197 struct xlog *log,
1198 xfs_caddr_t buf, 1198 xfs_caddr_t buf,
1199 int cycle, 1199 int cycle,
1200 int block, 1200 int block,
1201 int tail_cycle, 1201 int tail_cycle,
1202 int tail_block) 1202 int tail_block)
1203 { 1203 {
1204 xlog_rec_header_t *recp = (xlog_rec_header_t *)buf; 1204 xlog_rec_header_t *recp = (xlog_rec_header_t *)buf;
1205 1205
1206 memset(buf, 0, BBSIZE); 1206 memset(buf, 0, BBSIZE);
1207 recp->h_magicno = cpu_to_be32(XLOG_HEADER_MAGIC_NUM); 1207 recp->h_magicno = cpu_to_be32(XLOG_HEADER_MAGIC_NUM);
1208 recp->h_cycle = cpu_to_be32(cycle); 1208 recp->h_cycle = cpu_to_be32(cycle);
1209 recp->h_version = cpu_to_be32( 1209 recp->h_version = cpu_to_be32(
1210 xfs_sb_version_haslogv2(&log->l_mp->m_sb) ? 2 : 1); 1210 xfs_sb_version_haslogv2(&log->l_mp->m_sb) ? 2 : 1);
1211 recp->h_lsn = cpu_to_be64(xlog_assign_lsn(cycle, block)); 1211 recp->h_lsn = cpu_to_be64(xlog_assign_lsn(cycle, block));
1212 recp->h_tail_lsn = cpu_to_be64(xlog_assign_lsn(tail_cycle, tail_block)); 1212 recp->h_tail_lsn = cpu_to_be64(xlog_assign_lsn(tail_cycle, tail_block));
1213 recp->h_fmt = cpu_to_be32(XLOG_FMT); 1213 recp->h_fmt = cpu_to_be32(XLOG_FMT);
1214 memcpy(&recp->h_fs_uuid, &log->l_mp->m_sb.sb_uuid, sizeof(uuid_t)); 1214 memcpy(&recp->h_fs_uuid, &log->l_mp->m_sb.sb_uuid, sizeof(uuid_t));
1215 } 1215 }
1216 1216
1217 STATIC int 1217 STATIC int
1218 xlog_write_log_records( 1218 xlog_write_log_records(
1219 struct xlog *log, 1219 struct xlog *log,
1220 int cycle, 1220 int cycle,
1221 int start_block, 1221 int start_block,
1222 int blocks, 1222 int blocks,
1223 int tail_cycle, 1223 int tail_cycle,
1224 int tail_block) 1224 int tail_block)
1225 { 1225 {
1226 xfs_caddr_t offset; 1226 xfs_caddr_t offset;
1227 xfs_buf_t *bp; 1227 xfs_buf_t *bp;
1228 int balign, ealign; 1228 int balign, ealign;
1229 int sectbb = log->l_sectBBsize; 1229 int sectbb = log->l_sectBBsize;
1230 int end_block = start_block + blocks; 1230 int end_block = start_block + blocks;
1231 int bufblks; 1231 int bufblks;
1232 int error = 0; 1232 int error = 0;
1233 int i, j = 0; 1233 int i, j = 0;
1234 1234
1235 /* 1235 /*
1236 * Greedily allocate a buffer big enough to handle the full 1236 * Greedily allocate a buffer big enough to handle the full
1237 * range of basic blocks to be written. If that fails, try 1237 * range of basic blocks to be written. If that fails, try
1238 * a smaller size. We need to be able to write at least a 1238 * a smaller size. We need to be able to write at least a
1239 * log sector, or we're out of luck. 1239 * log sector, or we're out of luck.
1240 */ 1240 */
1241 bufblks = 1 << ffs(blocks); 1241 bufblks = 1 << ffs(blocks);
1242 while (bufblks > log->l_logBBsize) 1242 while (bufblks > log->l_logBBsize)
1243 bufblks >>= 1; 1243 bufblks >>= 1;
1244 while (!(bp = xlog_get_bp(log, bufblks))) { 1244 while (!(bp = xlog_get_bp(log, bufblks))) {
1245 bufblks >>= 1; 1245 bufblks >>= 1;
1246 if (bufblks < sectbb) 1246 if (bufblks < sectbb)
1247 return ENOMEM; 1247 return ENOMEM;
1248 } 1248 }
1249 1249
1250 /* We may need to do a read at the start to fill in part of 1250 /* We may need to do a read at the start to fill in part of
1251 * the buffer in the starting sector not covered by the first 1251 * the buffer in the starting sector not covered by the first
1252 * write below. 1252 * write below.
1253 */ 1253 */
1254 balign = round_down(start_block, sectbb); 1254 balign = round_down(start_block, sectbb);
1255 if (balign != start_block) { 1255 if (balign != start_block) {
1256 error = xlog_bread_noalign(log, start_block, 1, bp); 1256 error = xlog_bread_noalign(log, start_block, 1, bp);
1257 if (error) 1257 if (error)
1258 goto out_put_bp; 1258 goto out_put_bp;
1259 1259
1260 j = start_block - balign; 1260 j = start_block - balign;
1261 } 1261 }
1262 1262
1263 for (i = start_block; i < end_block; i += bufblks) { 1263 for (i = start_block; i < end_block; i += bufblks) {
1264 int bcount, endcount; 1264 int bcount, endcount;
1265 1265
1266 bcount = min(bufblks, end_block - start_block); 1266 bcount = min(bufblks, end_block - start_block);
1267 endcount = bcount - j; 1267 endcount = bcount - j;
1268 1268
1269 /* We may need to do a read at the end to fill in part of 1269 /* We may need to do a read at the end to fill in part of
1270 * the buffer in the final sector not covered by the write. 1270 * the buffer in the final sector not covered by the write.
1271 * If this is the same sector as the above read, skip it. 1271 * If this is the same sector as the above read, skip it.
1272 */ 1272 */
1273 ealign = round_down(end_block, sectbb); 1273 ealign = round_down(end_block, sectbb);
1274 if (j == 0 && (start_block + endcount > ealign)) { 1274 if (j == 0 && (start_block + endcount > ealign)) {
1275 offset = bp->b_addr + BBTOB(ealign - start_block); 1275 offset = bp->b_addr + BBTOB(ealign - start_block);
1276 error = xlog_bread_offset(log, ealign, sectbb, 1276 error = xlog_bread_offset(log, ealign, sectbb,
1277 bp, offset); 1277 bp, offset);
1278 if (error) 1278 if (error)
1279 break; 1279 break;
1280 1280
1281 } 1281 }
1282 1282
1283 offset = xlog_align(log, start_block, endcount, bp); 1283 offset = xlog_align(log, start_block, endcount, bp);
1284 for (; j < endcount; j++) { 1284 for (; j < endcount; j++) {
1285 xlog_add_record(log, offset, cycle, i+j, 1285 xlog_add_record(log, offset, cycle, i+j,
1286 tail_cycle, tail_block); 1286 tail_cycle, tail_block);
1287 offset += BBSIZE; 1287 offset += BBSIZE;
1288 } 1288 }
1289 error = xlog_bwrite(log, start_block, endcount, bp); 1289 error = xlog_bwrite(log, start_block, endcount, bp);
1290 if (error) 1290 if (error)
1291 break; 1291 break;
1292 start_block += endcount; 1292 start_block += endcount;
1293 j = 0; 1293 j = 0;
1294 } 1294 }
1295 1295
1296 out_put_bp: 1296 out_put_bp:
1297 xlog_put_bp(bp); 1297 xlog_put_bp(bp);
1298 return error; 1298 return error;
1299 } 1299 }
1300 1300
1301 /* 1301 /*
1302 * This routine is called to blow away any incomplete log writes out 1302 * This routine is called to blow away any incomplete log writes out
1303 * in front of the log head. We do this so that we won't become confused 1303 * in front of the log head. We do this so that we won't become confused
1304 * if we come up, write only a little bit more, and then crash again. 1304 * if we come up, write only a little bit more, and then crash again.
1305 * If we leave the partial log records out there, this situation could 1305 * If we leave the partial log records out there, this situation could
1306 * cause us to think those partial writes are valid blocks since they 1306 * cause us to think those partial writes are valid blocks since they
1307 * have the current cycle number. We get rid of them by overwriting them 1307 * have the current cycle number. We get rid of them by overwriting them
1308 * with empty log records with the old cycle number rather than the 1308 * with empty log records with the old cycle number rather than the
1309 * current one. 1309 * current one.
1310 * 1310 *
1311 * The tail lsn is passed in rather than taken from 1311 * The tail lsn is passed in rather than taken from
1312 * the log so that we will not write over the unmount record after a 1312 * the log so that we will not write over the unmount record after a
1313 * clean unmount in a 512 block log. Doing so would leave the log without 1313 * clean unmount in a 512 block log. Doing so would leave the log without
1314 * any valid log records in it until a new one was written. If we crashed 1314 * any valid log records in it until a new one was written. If we crashed
1315 * during that time we would not be able to recover. 1315 * during that time we would not be able to recover.
1316 */ 1316 */
1317 STATIC int 1317 STATIC int
1318 xlog_clear_stale_blocks( 1318 xlog_clear_stale_blocks(
1319 struct xlog *log, 1319 struct xlog *log,
1320 xfs_lsn_t tail_lsn) 1320 xfs_lsn_t tail_lsn)
1321 { 1321 {
1322 int tail_cycle, head_cycle; 1322 int tail_cycle, head_cycle;
1323 int tail_block, head_block; 1323 int tail_block, head_block;
1324 int tail_distance, max_distance; 1324 int tail_distance, max_distance;
1325 int distance; 1325 int distance;
1326 int error; 1326 int error;
1327 1327
1328 tail_cycle = CYCLE_LSN(tail_lsn); 1328 tail_cycle = CYCLE_LSN(tail_lsn);
1329 tail_block = BLOCK_LSN(tail_lsn); 1329 tail_block = BLOCK_LSN(tail_lsn);
1330 head_cycle = log->l_curr_cycle; 1330 head_cycle = log->l_curr_cycle;
1331 head_block = log->l_curr_block; 1331 head_block = log->l_curr_block;
1332 1332
1333 /* 1333 /*
1334 * Figure out the distance between the new head of the log 1334 * Figure out the distance between the new head of the log
1335 * and the tail. We want to write over any blocks beyond the 1335 * and the tail. We want to write over any blocks beyond the
1336 * head that we may have written just before the crash, but 1336 * head that we may have written just before the crash, but
1337 * we don't want to overwrite the tail of the log. 1337 * we don't want to overwrite the tail of the log.
1338 */ 1338 */
1339 if (head_cycle == tail_cycle) { 1339 if (head_cycle == tail_cycle) {
1340 /* 1340 /*
1341 * The tail is behind the head in the physical log, 1341 * The tail is behind the head in the physical log,
1342 * so the distance from the head to the tail is the 1342 * so the distance from the head to the tail is the
1343 * distance from the head to the end of the log plus 1343 * distance from the head to the end of the log plus
1344 * the distance from the beginning of the log to the 1344 * the distance from the beginning of the log to the
1345 * tail. 1345 * tail.
1346 */ 1346 */
1347 if (unlikely(head_block < tail_block || head_block >= log->l_logBBsize)) { 1347 if (unlikely(head_block < tail_block || head_block >= log->l_logBBsize)) {
1348 XFS_ERROR_REPORT("xlog_clear_stale_blocks(1)", 1348 XFS_ERROR_REPORT("xlog_clear_stale_blocks(1)",
1349 XFS_ERRLEVEL_LOW, log->l_mp); 1349 XFS_ERRLEVEL_LOW, log->l_mp);
1350 return XFS_ERROR(EFSCORRUPTED); 1350 return XFS_ERROR(EFSCORRUPTED);
1351 } 1351 }
1352 tail_distance = tail_block + (log->l_logBBsize - head_block); 1352 tail_distance = tail_block + (log->l_logBBsize - head_block);
1353 } else { 1353 } else {
1354 /* 1354 /*
1355 * The head is behind the tail in the physical log, 1355 * The head is behind the tail in the physical log,
1356 * so the distance from the head to the tail is just 1356 * so the distance from the head to the tail is just
1357 * the tail block minus the head block. 1357 * the tail block minus the head block.
1358 */ 1358 */
1359 if (unlikely(head_block >= tail_block || head_cycle != (tail_cycle + 1))){ 1359 if (unlikely(head_block >= tail_block || head_cycle != (tail_cycle + 1))){
1360 XFS_ERROR_REPORT("xlog_clear_stale_blocks(2)", 1360 XFS_ERROR_REPORT("xlog_clear_stale_blocks(2)",
1361 XFS_ERRLEVEL_LOW, log->l_mp); 1361 XFS_ERRLEVEL_LOW, log->l_mp);
1362 return XFS_ERROR(EFSCORRUPTED); 1362 return XFS_ERROR(EFSCORRUPTED);
1363 } 1363 }
1364 tail_distance = tail_block - head_block; 1364 tail_distance = tail_block - head_block;
1365 } 1365 }
1366 1366
1367 /* 1367 /*
1368 * If the head is right up against the tail, we can't clear 1368 * If the head is right up against the tail, we can't clear
1369 * anything. 1369 * anything.
1370 */ 1370 */
1371 if (tail_distance <= 0) { 1371 if (tail_distance <= 0) {
1372 ASSERT(tail_distance == 0); 1372 ASSERT(tail_distance == 0);
1373 return 0; 1373 return 0;
1374 } 1374 }
1375 1375
1376 max_distance = XLOG_TOTAL_REC_SHIFT(log); 1376 max_distance = XLOG_TOTAL_REC_SHIFT(log);
1377 /* 1377 /*
1378 * Take the smaller of the maximum amount of outstanding I/O 1378 * Take the smaller of the maximum amount of outstanding I/O
1379 * we could have and the distance to the tail to clear out. 1379 * we could have and the distance to the tail to clear out.
1380 * We take the smaller so that we don't overwrite the tail and 1380 * We take the smaller so that we don't overwrite the tail and
1381 * we don't waste all day writing from the head to the tail 1381 * we don't waste all day writing from the head to the tail
1382 * for no reason. 1382 * for no reason.
1383 */ 1383 */
1384 max_distance = MIN(max_distance, tail_distance); 1384 max_distance = MIN(max_distance, tail_distance);
1385 1385
1386 if ((head_block + max_distance) <= log->l_logBBsize) { 1386 if ((head_block + max_distance) <= log->l_logBBsize) {
1387 /* 1387 /*
1388 * We can stomp all the blocks we need to without 1388 * We can stomp all the blocks we need to without
1389 * wrapping around the end of the log. Just do it 1389 * wrapping around the end of the log. Just do it
1390 * in a single write. Use the cycle number of the 1390 * in a single write. Use the cycle number of the
1391 * current cycle minus one so that the log will look like: 1391 * current cycle minus one so that the log will look like:
1392 * n ... | n - 1 ... 1392 * n ... | n - 1 ...
1393 */ 1393 */
1394 error = xlog_write_log_records(log, (head_cycle - 1), 1394 error = xlog_write_log_records(log, (head_cycle - 1),
1395 head_block, max_distance, tail_cycle, 1395 head_block, max_distance, tail_cycle,
1396 tail_block); 1396 tail_block);
1397 if (error) 1397 if (error)
1398 return error; 1398 return error;
1399 } else { 1399 } else {
1400 /* 1400 /*
1401 * We need to wrap around the end of the physical log in 1401 * We need to wrap around the end of the physical log in
1402 * order to clear all the blocks. Do it in two separate 1402 * order to clear all the blocks. Do it in two separate
1403 * I/Os. The first write should be from the head to the 1403 * I/Os. The first write should be from the head to the
1404 * end of the physical log, and it should use the current 1404 * end of the physical log, and it should use the current
1405 * cycle number minus one just like above. 1405 * cycle number minus one just like above.
1406 */ 1406 */
1407 distance = log->l_logBBsize - head_block; 1407 distance = log->l_logBBsize - head_block;
1408 error = xlog_write_log_records(log, (head_cycle - 1), 1408 error = xlog_write_log_records(log, (head_cycle - 1),
1409 head_block, distance, tail_cycle, 1409 head_block, distance, tail_cycle,
1410 tail_block); 1410 tail_block);
1411 1411
1412 if (error) 1412 if (error)
1413 return error; 1413 return error;
1414 1414
1415 /* 1415 /*
1416 * Now write the blocks at the start of the physical log. 1416 * Now write the blocks at the start of the physical log.
1417 * This writes the remainder of the blocks we want to clear. 1417 * This writes the remainder of the blocks we want to clear.
1418 * It uses the current cycle number since we're now on the 1418 * It uses the current cycle number since we're now on the
1419 * same cycle as the head so that we get: 1419 * same cycle as the head so that we get:
1420 * n ... n ... | n - 1 ... 1420 * n ... n ... | n - 1 ...
1421 * ^^^^^ blocks we're writing 1421 * ^^^^^ blocks we're writing
1422 */ 1422 */
1423 distance = max_distance - (log->l_logBBsize - head_block); 1423 distance = max_distance - (log->l_logBBsize - head_block);
1424 error = xlog_write_log_records(log, head_cycle, 0, distance, 1424 error = xlog_write_log_records(log, head_cycle, 0, distance,
1425 tail_cycle, tail_block); 1425 tail_cycle, tail_block);
1426 if (error) 1426 if (error)
1427 return error; 1427 return error;
1428 } 1428 }
1429 1429
1430 return 0; 1430 return 0;
1431 } 1431 }
1432 1432
1433 /****************************************************************************** 1433 /******************************************************************************
1434 * 1434 *
1435 * Log recover routines 1435 * Log recover routines
1436 * 1436 *
1437 ****************************************************************************** 1437 ******************************************************************************
1438 */ 1438 */
1439 1439
1440 STATIC xlog_recover_t * 1440 STATIC xlog_recover_t *
1441 xlog_recover_find_tid( 1441 xlog_recover_find_tid(
1442 struct hlist_head *head, 1442 struct hlist_head *head,
1443 xlog_tid_t tid) 1443 xlog_tid_t tid)
1444 { 1444 {
1445 xlog_recover_t *trans; 1445 xlog_recover_t *trans;
1446 1446
1447 hlist_for_each_entry(trans, head, r_list) { 1447 hlist_for_each_entry(trans, head, r_list) {
1448 if (trans->r_log_tid == tid) 1448 if (trans->r_log_tid == tid)
1449 return trans; 1449 return trans;
1450 } 1450 }
1451 return NULL; 1451 return NULL;
1452 } 1452 }
1453 1453
1454 STATIC void 1454 STATIC void
1455 xlog_recover_new_tid( 1455 xlog_recover_new_tid(
1456 struct hlist_head *head, 1456 struct hlist_head *head,
1457 xlog_tid_t tid, 1457 xlog_tid_t tid,
1458 xfs_lsn_t lsn) 1458 xfs_lsn_t lsn)
1459 { 1459 {
1460 xlog_recover_t *trans; 1460 xlog_recover_t *trans;
1461 1461
1462 trans = kmem_zalloc(sizeof(xlog_recover_t), KM_SLEEP); 1462 trans = kmem_zalloc(sizeof(xlog_recover_t), KM_SLEEP);
1463 trans->r_log_tid = tid; 1463 trans->r_log_tid = tid;
1464 trans->r_lsn = lsn; 1464 trans->r_lsn = lsn;
1465 INIT_LIST_HEAD(&trans->r_itemq); 1465 INIT_LIST_HEAD(&trans->r_itemq);
1466 1466
1467 INIT_HLIST_NODE(&trans->r_list); 1467 INIT_HLIST_NODE(&trans->r_list);
1468 hlist_add_head(&trans->r_list, head); 1468 hlist_add_head(&trans->r_list, head);
1469 } 1469 }
1470 1470
1471 STATIC void 1471 STATIC void
1472 xlog_recover_add_item( 1472 xlog_recover_add_item(
1473 struct list_head *head) 1473 struct list_head *head)
1474 { 1474 {
1475 xlog_recover_item_t *item; 1475 xlog_recover_item_t *item;
1476 1476
1477 item = kmem_zalloc(sizeof(xlog_recover_item_t), KM_SLEEP); 1477 item = kmem_zalloc(sizeof(xlog_recover_item_t), KM_SLEEP);
1478 INIT_LIST_HEAD(&item->ri_list); 1478 INIT_LIST_HEAD(&item->ri_list);
1479 list_add_tail(&item->ri_list, head); 1479 list_add_tail(&item->ri_list, head);
1480 } 1480 }
1481 1481
1482 STATIC int 1482 STATIC int
1483 xlog_recover_add_to_cont_trans( 1483 xlog_recover_add_to_cont_trans(
1484 struct xlog *log, 1484 struct xlog *log,
1485 struct xlog_recover *trans, 1485 struct xlog_recover *trans,
1486 xfs_caddr_t dp, 1486 xfs_caddr_t dp,
1487 int len) 1487 int len)
1488 { 1488 {
1489 xlog_recover_item_t *item; 1489 xlog_recover_item_t *item;
1490 xfs_caddr_t ptr, old_ptr; 1490 xfs_caddr_t ptr, old_ptr;
1491 int old_len; 1491 int old_len;
1492 1492
1493 if (list_empty(&trans->r_itemq)) { 1493 if (list_empty(&trans->r_itemq)) {
1494 /* finish copying rest of trans header */ 1494 /* finish copying rest of trans header */
1495 xlog_recover_add_item(&trans->r_itemq); 1495 xlog_recover_add_item(&trans->r_itemq);
1496 ptr = (xfs_caddr_t) &trans->r_theader + 1496 ptr = (xfs_caddr_t) &trans->r_theader +
1497 sizeof(xfs_trans_header_t) - len; 1497 sizeof(xfs_trans_header_t) - len;
1498 memcpy(ptr, dp, len); /* d, s, l */ 1498 memcpy(ptr, dp, len); /* d, s, l */
1499 return 0; 1499 return 0;
1500 } 1500 }
1501 /* take the tail entry */ 1501 /* take the tail entry */
1502 item = list_entry(trans->r_itemq.prev, xlog_recover_item_t, ri_list); 1502 item = list_entry(trans->r_itemq.prev, xlog_recover_item_t, ri_list);
1503 1503
1504 old_ptr = item->ri_buf[item->ri_cnt-1].i_addr; 1504 old_ptr = item->ri_buf[item->ri_cnt-1].i_addr;
1505 old_len = item->ri_buf[item->ri_cnt-1].i_len; 1505 old_len = item->ri_buf[item->ri_cnt-1].i_len;
1506 1506
1507 ptr = kmem_realloc(old_ptr, len+old_len, old_len, KM_SLEEP); 1507 ptr = kmem_realloc(old_ptr, len+old_len, old_len, KM_SLEEP);
1508 memcpy(&ptr[old_len], dp, len); /* d, s, l */ 1508 memcpy(&ptr[old_len], dp, len); /* d, s, l */
1509 item->ri_buf[item->ri_cnt-1].i_len += len; 1509 item->ri_buf[item->ri_cnt-1].i_len += len;
1510 item->ri_buf[item->ri_cnt-1].i_addr = ptr; 1510 item->ri_buf[item->ri_cnt-1].i_addr = ptr;
1511 trace_xfs_log_recover_item_add_cont(log, trans, item, 0); 1511 trace_xfs_log_recover_item_add_cont(log, trans, item, 0);
1512 return 0; 1512 return 0;
1513 } 1513 }
1514 1514
1515 /* 1515 /*
1516 * The next region to add is the start of a new region. It could be 1516 * The next region to add is the start of a new region. It could be
1517 * a whole region or it could be the first part of a new region. Because 1517 * a whole region or it could be the first part of a new region. Because
1518 * of this, the assumption here is that the type and size fields of all 1518 * of this, the assumption here is that the type and size fields of all
1519 * format structures fit into the first 32 bits of the structure. 1519 * format structures fit into the first 32 bits of the structure.
1520 * 1520 *
1521 * This works because all regions must be 32 bit aligned. Therefore, we 1521 * This works because all regions must be 32 bit aligned. Therefore, we
1522 * either have both fields or we have neither field. In the case we have 1522 * either have both fields or we have neither field. In the case we have
1523 * neither field, the data part of the region is zero length. We only have 1523 * neither field, the data part of the region is zero length. We only have
1524 * a log_op_header and can throw away the header since a new one will appear 1524 * a log_op_header and can throw away the header since a new one will appear
1525 * later. If we have at least 4 bytes, then we can determine how many regions 1525 * later. If we have at least 4 bytes, then we can determine how many regions
1526 * will appear in the current log item. 1526 * will appear in the current log item.
1527 */ 1527 */
1528 STATIC int 1528 STATIC int
1529 xlog_recover_add_to_trans( 1529 xlog_recover_add_to_trans(
1530 struct xlog *log, 1530 struct xlog *log,
1531 struct xlog_recover *trans, 1531 struct xlog_recover *trans,
1532 xfs_caddr_t dp, 1532 xfs_caddr_t dp,
1533 int len) 1533 int len)
1534 { 1534 {
1535 xfs_inode_log_format_t *in_f; /* any will do */ 1535 xfs_inode_log_format_t *in_f; /* any will do */
1536 xlog_recover_item_t *item; 1536 xlog_recover_item_t *item;
1537 xfs_caddr_t ptr; 1537 xfs_caddr_t ptr;
1538 1538
1539 if (!len) 1539 if (!len)
1540 return 0; 1540 return 0;
1541 if (list_empty(&trans->r_itemq)) { 1541 if (list_empty(&trans->r_itemq)) {
1542 /* we need to catch log corruptions here */ 1542 /* we need to catch log corruptions here */
1543 if (*(uint *)dp != XFS_TRANS_HEADER_MAGIC) { 1543 if (*(uint *)dp != XFS_TRANS_HEADER_MAGIC) {
1544 xfs_warn(log->l_mp, "%s: bad header magic number", 1544 xfs_warn(log->l_mp, "%s: bad header magic number",
1545 __func__); 1545 __func__);
1546 ASSERT(0); 1546 ASSERT(0);
1547 return XFS_ERROR(EIO); 1547 return XFS_ERROR(EIO);
1548 } 1548 }
1549 if (len == sizeof(xfs_trans_header_t)) 1549 if (len == sizeof(xfs_trans_header_t))
1550 xlog_recover_add_item(&trans->r_itemq); 1550 xlog_recover_add_item(&trans->r_itemq);
1551 memcpy(&trans->r_theader, dp, len); /* d, s, l */ 1551 memcpy(&trans->r_theader, dp, len); /* d, s, l */
1552 return 0; 1552 return 0;
1553 } 1553 }
1554 1554
1555 ptr = kmem_alloc(len, KM_SLEEP); 1555 ptr = kmem_alloc(len, KM_SLEEP);
1556 memcpy(ptr, dp, len); 1556 memcpy(ptr, dp, len);
1557 in_f = (xfs_inode_log_format_t *)ptr; 1557 in_f = (xfs_inode_log_format_t *)ptr;
1558 1558
1559 /* take the tail entry */ 1559 /* take the tail entry */
1560 item = list_entry(trans->r_itemq.prev, xlog_recover_item_t, ri_list); 1560 item = list_entry(trans->r_itemq.prev, xlog_recover_item_t, ri_list);
1561 if (item->ri_total != 0 && 1561 if (item->ri_total != 0 &&
1562 item->ri_total == item->ri_cnt) { 1562 item->ri_total == item->ri_cnt) {
1563 /* tail item is in use, get a new one */ 1563 /* tail item is in use, get a new one */
1564 xlog_recover_add_item(&trans->r_itemq); 1564 xlog_recover_add_item(&trans->r_itemq);
1565 item = list_entry(trans->r_itemq.prev, 1565 item = list_entry(trans->r_itemq.prev,
1566 xlog_recover_item_t, ri_list); 1566 xlog_recover_item_t, ri_list);
1567 } 1567 }
1568 1568
1569 if (item->ri_total == 0) { /* first region to be added */ 1569 if (item->ri_total == 0) { /* first region to be added */
1570 if (in_f->ilf_size == 0 || 1570 if (in_f->ilf_size == 0 ||
1571 in_f->ilf_size > XLOG_MAX_REGIONS_IN_ITEM) { 1571 in_f->ilf_size > XLOG_MAX_REGIONS_IN_ITEM) {
1572 xfs_warn(log->l_mp, 1572 xfs_warn(log->l_mp,
1573 "bad number of regions (%d) in inode log format", 1573 "bad number of regions (%d) in inode log format",
1574 in_f->ilf_size); 1574 in_f->ilf_size);
1575 ASSERT(0); 1575 ASSERT(0);
1576 return XFS_ERROR(EIO); 1576 return XFS_ERROR(EIO);
1577 } 1577 }
1578 1578
1579 item->ri_total = in_f->ilf_size; 1579 item->ri_total = in_f->ilf_size;
1580 item->ri_buf = 1580 item->ri_buf =
1581 kmem_zalloc(item->ri_total * sizeof(xfs_log_iovec_t), 1581 kmem_zalloc(item->ri_total * sizeof(xfs_log_iovec_t),
1582 KM_SLEEP); 1582 KM_SLEEP);
1583 } 1583 }
1584 ASSERT(item->ri_total > item->ri_cnt); 1584 ASSERT(item->ri_total > item->ri_cnt);
1585 /* Description region is ri_buf[0] */ 1585 /* Description region is ri_buf[0] */
1586 item->ri_buf[item->ri_cnt].i_addr = ptr; 1586 item->ri_buf[item->ri_cnt].i_addr = ptr;
1587 item->ri_buf[item->ri_cnt].i_len = len; 1587 item->ri_buf[item->ri_cnt].i_len = len;
1588 item->ri_cnt++; 1588 item->ri_cnt++;
1589 trace_xfs_log_recover_item_add(log, trans, item, 0); 1589 trace_xfs_log_recover_item_add(log, trans, item, 0);
1590 return 0; 1590 return 0;
1591 } 1591 }
1592 1592
1593 /* 1593 /*
1594 * Sort the log items in the transaction. Cancelled buffers need 1594 * Sort the log items in the transaction. Cancelled buffers need
1595 * to be put first so they are processed before any items that might 1595 * to be put first so they are processed before any items that might
1596 * modify the buffers. If they are cancelled, then the modifications 1596 * modify the buffers. If they are cancelled, then the modifications
1597 * don't need to be replayed. 1597 * don't need to be replayed.
1598 */ 1598 */
1599 STATIC int 1599 STATIC int
1600 xlog_recover_reorder_trans( 1600 xlog_recover_reorder_trans(
1601 struct xlog *log, 1601 struct xlog *log,
1602 struct xlog_recover *trans, 1602 struct xlog_recover *trans,
1603 int pass) 1603 int pass)
1604 { 1604 {
1605 xlog_recover_item_t *item, *n; 1605 xlog_recover_item_t *item, *n;
1606 LIST_HEAD(sort_list); 1606 LIST_HEAD(sort_list);
1607 1607
1608 list_splice_init(&trans->r_itemq, &sort_list); 1608 list_splice_init(&trans->r_itemq, &sort_list);
1609 list_for_each_entry_safe(item, n, &sort_list, ri_list) { 1609 list_for_each_entry_safe(item, n, &sort_list, ri_list) {
1610 xfs_buf_log_format_t *buf_f = item->ri_buf[0].i_addr; 1610 xfs_buf_log_format_t *buf_f = item->ri_buf[0].i_addr;
1611 1611
1612 switch (ITEM_TYPE(item)) { 1612 switch (ITEM_TYPE(item)) {
1613 case XFS_LI_BUF: 1613 case XFS_LI_BUF:
1614 if (!(buf_f->blf_flags & XFS_BLF_CANCEL)) { 1614 if (!(buf_f->blf_flags & XFS_BLF_CANCEL)) {
1615 trace_xfs_log_recover_item_reorder_head(log, 1615 trace_xfs_log_recover_item_reorder_head(log,
1616 trans, item, pass); 1616 trans, item, pass);
1617 list_move(&item->ri_list, &trans->r_itemq); 1617 list_move(&item->ri_list, &trans->r_itemq);
1618 break; 1618 break;
1619 } 1619 }
1620 case XFS_LI_INODE: 1620 case XFS_LI_INODE:
1621 case XFS_LI_DQUOT: 1621 case XFS_LI_DQUOT:
1622 case XFS_LI_QUOTAOFF: 1622 case XFS_LI_QUOTAOFF:
1623 case XFS_LI_EFD: 1623 case XFS_LI_EFD:
1624 case XFS_LI_EFI: 1624 case XFS_LI_EFI:
1625 trace_xfs_log_recover_item_reorder_tail(log, 1625 trace_xfs_log_recover_item_reorder_tail(log,
1626 trans, item, pass); 1626 trans, item, pass);
1627 list_move_tail(&item->ri_list, &trans->r_itemq); 1627 list_move_tail(&item->ri_list, &trans->r_itemq);
1628 break; 1628 break;
1629 default: 1629 default:
1630 xfs_warn(log->l_mp, 1630 xfs_warn(log->l_mp,
1631 "%s: unrecognized type of log operation", 1631 "%s: unrecognized type of log operation",
1632 __func__); 1632 __func__);
1633 ASSERT(0); 1633 ASSERT(0);
1634 return XFS_ERROR(EIO); 1634 return XFS_ERROR(EIO);
1635 } 1635 }
1636 } 1636 }
1637 ASSERT(list_empty(&sort_list)); 1637 ASSERT(list_empty(&sort_list));
1638 return 0; 1638 return 0;
1639 } 1639 }
1640 1640
1641 /* 1641 /*
1642 * Build up the table of buf cancel records so that we don't replay 1642 * Build up the table of buf cancel records so that we don't replay
1643 * cancelled data in the second pass. For buffer records that are 1643 * cancelled data in the second pass. For buffer records that are
1644 * not cancel records, there is nothing to do here so we just return. 1644 * not cancel records, there is nothing to do here so we just return.
1645 * 1645 *
1646 * If we get a cancel record which is already in the table, this indicates 1646 * If we get a cancel record which is already in the table, this indicates
1647 * that the buffer was cancelled multiple times. In order to ensure 1647 * that the buffer was cancelled multiple times. In order to ensure
1648 * that during pass 2 we keep the record in the table until we reach its 1648 * that during pass 2 we keep the record in the table until we reach its
1649 * last occurrence in the log, we keep a reference count in the cancel 1649 * last occurrence in the log, we keep a reference count in the cancel
1650 * record in the table to tell us how many times we expect to see this 1650 * record in the table to tell us how many times we expect to see this
1651 * record during the second pass. 1651 * record during the second pass.
1652 */ 1652 */
1653 STATIC int 1653 STATIC int
1654 xlog_recover_buffer_pass1( 1654 xlog_recover_buffer_pass1(
1655 struct xlog *log, 1655 struct xlog *log,
1656 struct xlog_recover_item *item) 1656 struct xlog_recover_item *item)
1657 { 1657 {
1658 xfs_buf_log_format_t *buf_f = item->ri_buf[0].i_addr; 1658 xfs_buf_log_format_t *buf_f = item->ri_buf[0].i_addr;
1659 struct list_head *bucket; 1659 struct list_head *bucket;
1660 struct xfs_buf_cancel *bcp; 1660 struct xfs_buf_cancel *bcp;
1661 1661
1662 /* 1662 /*
1663 * If this isn't a cancel buffer item, then just return. 1663 * If this isn't a cancel buffer item, then just return.
1664 */ 1664 */
1665 if (!(buf_f->blf_flags & XFS_BLF_CANCEL)) { 1665 if (!(buf_f->blf_flags & XFS_BLF_CANCEL)) {
1666 trace_xfs_log_recover_buf_not_cancel(log, buf_f); 1666 trace_xfs_log_recover_buf_not_cancel(log, buf_f);
1667 return 0; 1667 return 0;
1668 } 1668 }
1669 1669
1670 /* 1670 /*
1671 * Insert an xfs_buf_cancel record into the hash table of them. 1671 * Insert an xfs_buf_cancel record into the hash table of them.
1672 * If there is already an identical record, bump its reference count. 1672 * If there is already an identical record, bump its reference count.
1673 */ 1673 */
1674 bucket = XLOG_BUF_CANCEL_BUCKET(log, buf_f->blf_blkno); 1674 bucket = XLOG_BUF_CANCEL_BUCKET(log, buf_f->blf_blkno);
1675 list_for_each_entry(bcp, bucket, bc_list) { 1675 list_for_each_entry(bcp, bucket, bc_list) {
1676 if (bcp->bc_blkno == buf_f->blf_blkno && 1676 if (bcp->bc_blkno == buf_f->blf_blkno &&
1677 bcp->bc_len == buf_f->blf_len) { 1677 bcp->bc_len == buf_f->blf_len) {
1678 bcp->bc_refcount++; 1678 bcp->bc_refcount++;
1679 trace_xfs_log_recover_buf_cancel_ref_inc(log, buf_f); 1679 trace_xfs_log_recover_buf_cancel_ref_inc(log, buf_f);
1680 return 0; 1680 return 0;
1681 } 1681 }
1682 } 1682 }
1683 1683
1684 bcp = kmem_alloc(sizeof(struct xfs_buf_cancel), KM_SLEEP); 1684 bcp = kmem_alloc(sizeof(struct xfs_buf_cancel), KM_SLEEP);
1685 bcp->bc_blkno = buf_f->blf_blkno; 1685 bcp->bc_blkno = buf_f->blf_blkno;
1686 bcp->bc_len = buf_f->blf_len; 1686 bcp->bc_len = buf_f->blf_len;
1687 bcp->bc_refcount = 1; 1687 bcp->bc_refcount = 1;
1688 list_add_tail(&bcp->bc_list, bucket); 1688 list_add_tail(&bcp->bc_list, bucket);
1689 1689
1690 trace_xfs_log_recover_buf_cancel_add(log, buf_f); 1690 trace_xfs_log_recover_buf_cancel_add(log, buf_f);
1691 return 0; 1691 return 0;
1692 } 1692 }
1693 1693
1694 /* 1694 /*
1695 * Check to see whether the buffer being recovered has a corresponding 1695 * Check to see whether the buffer being recovered has a corresponding
1696 * entry in the buffer cancel record table. If it does then return 1 1696 * entry in the buffer cancel record table. If it does then return 1
1697 * so that it will be cancelled, otherwise return 0. If the buffer is 1697 * so that it will be cancelled, otherwise return 0. If the buffer is
1698 * actually a buffer cancel item (XFS_BLF_CANCEL is set), then decrement 1698 * actually a buffer cancel item (XFS_BLF_CANCEL is set), then decrement
1699 * the refcount on the entry in the table and remove it from the table 1699 * the refcount on the entry in the table and remove it from the table
1700 * if this is the last reference. 1700 * if this is the last reference.
1701 * 1701 *
1702 * We remove the cancel record from the table when we encounter its 1702 * We remove the cancel record from the table when we encounter its
1703 * last occurrence in the log so that if the same buffer is re-used 1703 * last occurrence in the log so that if the same buffer is re-used
1704 * again after its last cancellation we actually replay the changes 1704 * again after its last cancellation we actually replay the changes
1705 * made at that point. 1705 * made at that point.
1706 */ 1706 */
1707 STATIC int 1707 STATIC int
1708 xlog_check_buffer_cancelled( 1708 xlog_check_buffer_cancelled(
1709 struct xlog *log, 1709 struct xlog *log,
1710 xfs_daddr_t blkno, 1710 xfs_daddr_t blkno,
1711 uint len, 1711 uint len,
1712 ushort flags) 1712 ushort flags)
1713 { 1713 {
1714 struct list_head *bucket; 1714 struct list_head *bucket;
1715 struct xfs_buf_cancel *bcp; 1715 struct xfs_buf_cancel *bcp;
1716 1716
1717 if (log->l_buf_cancel_table == NULL) { 1717 if (log->l_buf_cancel_table == NULL) {
1718 /* 1718 /*
1719 * There is nothing in the table built in pass one, 1719 * There is nothing in the table built in pass one,
1720 * so this buffer must not be cancelled. 1720 * so this buffer must not be cancelled.
1721 */ 1721 */
1722 ASSERT(!(flags & XFS_BLF_CANCEL)); 1722 ASSERT(!(flags & XFS_BLF_CANCEL));
1723 return 0; 1723 return 0;
1724 } 1724 }
1725 1725
1726 /* 1726 /*
1727 * Search for an entry in the cancel table that matches our buffer. 1727 * Search for an entry in the cancel table that matches our buffer.
1728 */ 1728 */
1729 bucket = XLOG_BUF_CANCEL_BUCKET(log, blkno); 1729 bucket = XLOG_BUF_CANCEL_BUCKET(log, blkno);
1730 list_for_each_entry(bcp, bucket, bc_list) { 1730 list_for_each_entry(bcp, bucket, bc_list) {
1731 if (bcp->bc_blkno == blkno && bcp->bc_len == len) 1731 if (bcp->bc_blkno == blkno && bcp->bc_len == len)
1732 goto found; 1732 goto found;
1733 } 1733 }
1734 1734
1735 /* 1735 /*
1736 * We didn't find a corresponding entry in the table, so return 0 so 1736 * We didn't find a corresponding entry in the table, so return 0 so
1737 * that the buffer is NOT cancelled. 1737 * that the buffer is NOT cancelled.
1738 */ 1738 */
1739 ASSERT(!(flags & XFS_BLF_CANCEL)); 1739 ASSERT(!(flags & XFS_BLF_CANCEL));
1740 return 0; 1740 return 0;
1741 1741
1742 found: 1742 found:
1743 /* 1743 /*
1744 * We've go a match, so return 1 so that the recovery of this buffer 1744 * We've go a match, so return 1 so that the recovery of this buffer
1745 * is cancelled. If this buffer is actually a buffer cancel log 1745 * is cancelled. If this buffer is actually a buffer cancel log
1746 * item, then decrement the refcount on the one in the table and 1746 * item, then decrement the refcount on the one in the table and
1747 * remove it if this is the last reference. 1747 * remove it if this is the last reference.
1748 */ 1748 */
1749 if (flags & XFS_BLF_CANCEL) { 1749 if (flags & XFS_BLF_CANCEL) {
1750 if (--bcp->bc_refcount == 0) { 1750 if (--bcp->bc_refcount == 0) {
1751 list_del(&bcp->bc_list); 1751 list_del(&bcp->bc_list);
1752 kmem_free(bcp); 1752 kmem_free(bcp);
1753 } 1753 }
1754 } 1754 }
1755 return 1; 1755 return 1;
1756 } 1756 }
1757 1757
1758 /* 1758 /*
1759 * Perform recovery for a buffer full of inodes. In these buffers, the only 1759 * Perform recovery for a buffer full of inodes. In these buffers, the only
1760 * data which should be recovered is that which corresponds to the 1760 * data which should be recovered is that which corresponds to the
1761 * di_next_unlinked pointers in the on disk inode structures. The rest of the 1761 * di_next_unlinked pointers in the on disk inode structures. The rest of the
1762 * data for the inodes is always logged through the inodes themselves rather 1762 * data for the inodes is always logged through the inodes themselves rather
1763 * than the inode buffer and is recovered in xlog_recover_inode_pass2(). 1763 * than the inode buffer and is recovered in xlog_recover_inode_pass2().
1764 * 1764 *
1765 * The only time when buffers full of inodes are fully recovered is when the 1765 * The only time when buffers full of inodes are fully recovered is when the
1766 * buffer is full of newly allocated inodes. In this case the buffer will 1766 * buffer is full of newly allocated inodes. In this case the buffer will
1767 * not be marked as an inode buffer and so will be sent to 1767 * not be marked as an inode buffer and so will be sent to
1768 * xlog_recover_do_reg_buffer() below during recovery. 1768 * xlog_recover_do_reg_buffer() below during recovery.
1769 */ 1769 */
1770 STATIC int 1770 STATIC int
1771 xlog_recover_do_inode_buffer( 1771 xlog_recover_do_inode_buffer(
1772 struct xfs_mount *mp, 1772 struct xfs_mount *mp,
1773 xlog_recover_item_t *item, 1773 xlog_recover_item_t *item,
1774 struct xfs_buf *bp, 1774 struct xfs_buf *bp,
1775 xfs_buf_log_format_t *buf_f) 1775 xfs_buf_log_format_t *buf_f)
1776 { 1776 {
1777 int i; 1777 int i;
1778 int item_index = 0; 1778 int item_index = 0;
1779 int bit = 0; 1779 int bit = 0;
1780 int nbits = 0; 1780 int nbits = 0;
1781 int reg_buf_offset = 0; 1781 int reg_buf_offset = 0;
1782 int reg_buf_bytes = 0; 1782 int reg_buf_bytes = 0;
1783 int next_unlinked_offset; 1783 int next_unlinked_offset;
1784 int inodes_per_buf; 1784 int inodes_per_buf;
1785 xfs_agino_t *logged_nextp; 1785 xfs_agino_t *logged_nextp;
1786 xfs_agino_t *buffer_nextp; 1786 xfs_agino_t *buffer_nextp;
1787 1787
1788 trace_xfs_log_recover_buf_inode_buf(mp->m_log, buf_f); 1788 trace_xfs_log_recover_buf_inode_buf(mp->m_log, buf_f);
1789 1789
1790 inodes_per_buf = BBTOB(bp->b_io_length) >> mp->m_sb.sb_inodelog; 1790 inodes_per_buf = BBTOB(bp->b_io_length) >> mp->m_sb.sb_inodelog;
1791 for (i = 0; i < inodes_per_buf; i++) { 1791 for (i = 0; i < inodes_per_buf; i++) {
1792 next_unlinked_offset = (i * mp->m_sb.sb_inodesize) + 1792 next_unlinked_offset = (i * mp->m_sb.sb_inodesize) +
1793 offsetof(xfs_dinode_t, di_next_unlinked); 1793 offsetof(xfs_dinode_t, di_next_unlinked);
1794 1794
1795 while (next_unlinked_offset >= 1795 while (next_unlinked_offset >=
1796 (reg_buf_offset + reg_buf_bytes)) { 1796 (reg_buf_offset + reg_buf_bytes)) {
1797 /* 1797 /*
1798 * The next di_next_unlinked field is beyond 1798 * The next di_next_unlinked field is beyond
1799 * the current logged region. Find the next 1799 * the current logged region. Find the next
1800 * logged region that contains or is beyond 1800 * logged region that contains or is beyond
1801 * the current di_next_unlinked field. 1801 * the current di_next_unlinked field.
1802 */ 1802 */
1803 bit += nbits; 1803 bit += nbits;
1804 bit = xfs_next_bit(buf_f->blf_data_map, 1804 bit = xfs_next_bit(buf_f->blf_data_map,
1805 buf_f->blf_map_size, bit); 1805 buf_f->blf_map_size, bit);
1806 1806
1807 /* 1807 /*
1808 * If there are no more logged regions in the 1808 * If there are no more logged regions in the
1809 * buffer, then we're done. 1809 * buffer, then we're done.
1810 */ 1810 */
1811 if (bit == -1) 1811 if (bit == -1)
1812 return 0; 1812 return 0;
1813 1813
1814 nbits = xfs_contig_bits(buf_f->blf_data_map, 1814 nbits = xfs_contig_bits(buf_f->blf_data_map,
1815 buf_f->blf_map_size, bit); 1815 buf_f->blf_map_size, bit);
1816 ASSERT(nbits > 0); 1816 ASSERT(nbits > 0);
1817 reg_buf_offset = bit << XFS_BLF_SHIFT; 1817 reg_buf_offset = bit << XFS_BLF_SHIFT;
1818 reg_buf_bytes = nbits << XFS_BLF_SHIFT; 1818 reg_buf_bytes = nbits << XFS_BLF_SHIFT;
1819 item_index++; 1819 item_index++;
1820 } 1820 }
1821 1821
1822 /* 1822 /*
1823 * If the current logged region starts after the current 1823 * If the current logged region starts after the current
1824 * di_next_unlinked field, then move on to the next 1824 * di_next_unlinked field, then move on to the next
1825 * di_next_unlinked field. 1825 * di_next_unlinked field.
1826 */ 1826 */
1827 if (next_unlinked_offset < reg_buf_offset) 1827 if (next_unlinked_offset < reg_buf_offset)
1828 continue; 1828 continue;
1829 1829
1830 ASSERT(item->ri_buf[item_index].i_addr != NULL); 1830 ASSERT(item->ri_buf[item_index].i_addr != NULL);
1831 ASSERT((item->ri_buf[item_index].i_len % XFS_BLF_CHUNK) == 0); 1831 ASSERT((item->ri_buf[item_index].i_len % XFS_BLF_CHUNK) == 0);
1832 ASSERT((reg_buf_offset + reg_buf_bytes) <= 1832 ASSERT((reg_buf_offset + reg_buf_bytes) <=
1833 BBTOB(bp->b_io_length)); 1833 BBTOB(bp->b_io_length));
1834 1834
1835 /* 1835 /*
1836 * The current logged region contains a copy of the 1836 * The current logged region contains a copy of the
1837 * current di_next_unlinked field. Extract its value 1837 * current di_next_unlinked field. Extract its value
1838 * and copy it to the buffer copy. 1838 * and copy it to the buffer copy.
1839 */ 1839 */
1840 logged_nextp = item->ri_buf[item_index].i_addr + 1840 logged_nextp = item->ri_buf[item_index].i_addr +
1841 next_unlinked_offset - reg_buf_offset; 1841 next_unlinked_offset - reg_buf_offset;
1842 if (unlikely(*logged_nextp == 0)) { 1842 if (unlikely(*logged_nextp == 0)) {
1843 xfs_alert(mp, 1843 xfs_alert(mp,
1844 "Bad inode buffer log record (ptr = 0x%p, bp = 0x%p). " 1844 "Bad inode buffer log record (ptr = 0x%p, bp = 0x%p). "
1845 "Trying to replay bad (0) inode di_next_unlinked field.", 1845 "Trying to replay bad (0) inode di_next_unlinked field.",
1846 item, bp); 1846 item, bp);
1847 XFS_ERROR_REPORT("xlog_recover_do_inode_buf", 1847 XFS_ERROR_REPORT("xlog_recover_do_inode_buf",
1848 XFS_ERRLEVEL_LOW, mp); 1848 XFS_ERRLEVEL_LOW, mp);
1849 return XFS_ERROR(EFSCORRUPTED); 1849 return XFS_ERROR(EFSCORRUPTED);
1850 } 1850 }
1851 1851
1852 buffer_nextp = (xfs_agino_t *)xfs_buf_offset(bp, 1852 buffer_nextp = (xfs_agino_t *)xfs_buf_offset(bp,
1853 next_unlinked_offset); 1853 next_unlinked_offset);
1854 *buffer_nextp = *logged_nextp; 1854 *buffer_nextp = *logged_nextp;
1855 } 1855 }
1856 1856
1857 return 0; 1857 return 0;
1858 } 1858 }
1859 1859
1860 /* 1860 /*
1861 * Perform a 'normal' buffer recovery. Each logged region of the 1861 * Perform a 'normal' buffer recovery. Each logged region of the
1862 * buffer should be copied over the corresponding region in the 1862 * buffer should be copied over the corresponding region in the
1863 * given buffer. The bitmap in the buf log format structure indicates 1863 * given buffer. The bitmap in the buf log format structure indicates
1864 * where to place the logged data. 1864 * where to place the logged data.
1865 */ 1865 */
1866 STATIC void 1866 STATIC void
1867 xlog_recover_do_reg_buffer( 1867 xlog_recover_do_reg_buffer(
1868 struct xfs_mount *mp, 1868 struct xfs_mount *mp,
1869 xlog_recover_item_t *item, 1869 xlog_recover_item_t *item,
1870 struct xfs_buf *bp, 1870 struct xfs_buf *bp,
1871 xfs_buf_log_format_t *buf_f) 1871 xfs_buf_log_format_t *buf_f)
1872 { 1872 {
1873 int i; 1873 int i;
1874 int bit; 1874 int bit;
1875 int nbits; 1875 int nbits;
1876 int error; 1876 int error;
1877 1877
1878 trace_xfs_log_recover_buf_reg_buf(mp->m_log, buf_f); 1878 trace_xfs_log_recover_buf_reg_buf(mp->m_log, buf_f);
1879 1879
1880 bit = 0; 1880 bit = 0;
1881 i = 1; /* 0 is the buf format structure */ 1881 i = 1; /* 0 is the buf format structure */
1882 while (1) { 1882 while (1) {
1883 bit = xfs_next_bit(buf_f->blf_data_map, 1883 bit = xfs_next_bit(buf_f->blf_data_map,
1884 buf_f->blf_map_size, bit); 1884 buf_f->blf_map_size, bit);
1885 if (bit == -1) 1885 if (bit == -1)
1886 break; 1886 break;
1887 nbits = xfs_contig_bits(buf_f->blf_data_map, 1887 nbits = xfs_contig_bits(buf_f->blf_data_map,
1888 buf_f->blf_map_size, bit); 1888 buf_f->blf_map_size, bit);
1889 ASSERT(nbits > 0); 1889 ASSERT(nbits > 0);
1890 ASSERT(item->ri_buf[i].i_addr != NULL); 1890 ASSERT(item->ri_buf[i].i_addr != NULL);
1891 ASSERT(item->ri_buf[i].i_len % XFS_BLF_CHUNK == 0); 1891 ASSERT(item->ri_buf[i].i_len % XFS_BLF_CHUNK == 0);
1892 ASSERT(BBTOB(bp->b_io_length) >= 1892 ASSERT(BBTOB(bp->b_io_length) >=
1893 ((uint)bit << XFS_BLF_SHIFT) + (nbits << XFS_BLF_SHIFT)); 1893 ((uint)bit << XFS_BLF_SHIFT) + (nbits << XFS_BLF_SHIFT));
1894 1894
1895 /* 1895 /*
1896 * Do a sanity check if this is a dquot buffer. Just checking 1896 * Do a sanity check if this is a dquot buffer. Just checking
1897 * the first dquot in the buffer should do. XXXThis is 1897 * the first dquot in the buffer should do. XXXThis is
1898 * probably a good thing to do for other buf types also. 1898 * probably a good thing to do for other buf types also.
1899 */ 1899 */
1900 error = 0; 1900 error = 0;
1901 if (buf_f->blf_flags & 1901 if (buf_f->blf_flags &
1902 (XFS_BLF_UDQUOT_BUF|XFS_BLF_PDQUOT_BUF|XFS_BLF_GDQUOT_BUF)) { 1902 (XFS_BLF_UDQUOT_BUF|XFS_BLF_PDQUOT_BUF|XFS_BLF_GDQUOT_BUF)) {
1903 if (item->ri_buf[i].i_addr == NULL) { 1903 if (item->ri_buf[i].i_addr == NULL) {
1904 xfs_alert(mp, 1904 xfs_alert(mp,
1905 "XFS: NULL dquot in %s.", __func__); 1905 "XFS: NULL dquot in %s.", __func__);
1906 goto next; 1906 goto next;
1907 } 1907 }
1908 if (item->ri_buf[i].i_len < sizeof(xfs_disk_dquot_t)) { 1908 if (item->ri_buf[i].i_len < sizeof(xfs_disk_dquot_t)) {
1909 xfs_alert(mp, 1909 xfs_alert(mp,
1910 "XFS: dquot too small (%d) in %s.", 1910 "XFS: dquot too small (%d) in %s.",
1911 item->ri_buf[i].i_len, __func__); 1911 item->ri_buf[i].i_len, __func__);
1912 goto next; 1912 goto next;
1913 } 1913 }
1914 error = xfs_qm_dqcheck(mp, item->ri_buf[i].i_addr, 1914 error = xfs_qm_dqcheck(mp, item->ri_buf[i].i_addr,
1915 -1, 0, XFS_QMOPT_DOWARN, 1915 -1, 0, XFS_QMOPT_DOWARN,
1916 "dquot_buf_recover"); 1916 "dquot_buf_recover");
1917 if (error) 1917 if (error)
1918 goto next; 1918 goto next;
1919 } 1919 }
1920 1920
1921 memcpy(xfs_buf_offset(bp, 1921 memcpy(xfs_buf_offset(bp,
1922 (uint)bit << XFS_BLF_SHIFT), /* dest */ 1922 (uint)bit << XFS_BLF_SHIFT), /* dest */
1923 item->ri_buf[i].i_addr, /* source */ 1923 item->ri_buf[i].i_addr, /* source */
1924 nbits<<XFS_BLF_SHIFT); /* length */ 1924 nbits<<XFS_BLF_SHIFT); /* length */
1925 next: 1925 next:
1926 i++; 1926 i++;
1927 bit += nbits; 1927 bit += nbits;
1928 } 1928 }
1929 1929
1930 /* Shouldn't be any more regions */ 1930 /* Shouldn't be any more regions */
1931 ASSERT(i == item->ri_total); 1931 ASSERT(i == item->ri_total);
1932 1932
1933 switch (buf_f->blf_flags & XFS_BLF_TYPE_MASK) { 1933 switch (buf_f->blf_flags & XFS_BLF_TYPE_MASK) {
1934 case XFS_BLF_BTREE_BUF: 1934 case XFS_BLF_BTREE_BUF:
1935 switch (be32_to_cpu(*(__be32 *)bp->b_addr)) { 1935 switch (be32_to_cpu(*(__be32 *)bp->b_addr)) {
1936 case XFS_ABTB_CRC_MAGIC: 1936 case XFS_ABTB_CRC_MAGIC:
1937 case XFS_ABTC_CRC_MAGIC: 1937 case XFS_ABTC_CRC_MAGIC:
1938 case XFS_ABTB_MAGIC: 1938 case XFS_ABTB_MAGIC:
1939 case XFS_ABTC_MAGIC: 1939 case XFS_ABTC_MAGIC:
1940 bp->b_ops = &xfs_allocbt_buf_ops; 1940 bp->b_ops = &xfs_allocbt_buf_ops;
1941 break; 1941 break;
1942 case XFS_IBT_CRC_MAGIC: 1942 case XFS_IBT_CRC_MAGIC:
1943 case XFS_IBT_MAGIC: 1943 case XFS_IBT_MAGIC:
1944 bp->b_ops = &xfs_inobt_buf_ops; 1944 bp->b_ops = &xfs_inobt_buf_ops;
1945 break; 1945 break;
1946 case XFS_BMAP_CRC_MAGIC: 1946 case XFS_BMAP_CRC_MAGIC:
1947 case XFS_BMAP_MAGIC: 1947 case XFS_BMAP_MAGIC:
1948 bp->b_ops = &xfs_bmbt_buf_ops; 1948 bp->b_ops = &xfs_bmbt_buf_ops;
1949 break; 1949 break;
1950 default: 1950 default:
1951 xfs_warn(mp, "Bad btree block magic!"); 1951 xfs_warn(mp, "Bad btree block magic!");
1952 ASSERT(0); 1952 ASSERT(0);
1953 break; 1953 break;
1954 } 1954 }
1955 break; 1955 break;
1956 case XFS_BLF_AGF_BUF: 1956 case XFS_BLF_AGF_BUF:
1957 if (*(__be32 *)bp->b_addr != cpu_to_be32(XFS_AGF_MAGIC)) { 1957 if (*(__be32 *)bp->b_addr != cpu_to_be32(XFS_AGF_MAGIC)) {
1958 xfs_warn(mp, "Bad AGF block magic!"); 1958 xfs_warn(mp, "Bad AGF block magic!");
1959 ASSERT(0); 1959 ASSERT(0);
1960 break; 1960 break;
1961 } 1961 }
1962 bp->b_ops = &xfs_agf_buf_ops; 1962 bp->b_ops = &xfs_agf_buf_ops;
1963 break; 1963 break;
1964 case XFS_BLF_AGFL_BUF: 1964 case XFS_BLF_AGFL_BUF:
1965 if (!xfs_sb_version_hascrc(&mp->m_sb)) 1965 if (!xfs_sb_version_hascrc(&mp->m_sb))
1966 break; 1966 break;
1967 if (*(__be32 *)bp->b_addr != cpu_to_be32(XFS_AGFL_MAGIC)) { 1967 if (*(__be32 *)bp->b_addr != cpu_to_be32(XFS_AGFL_MAGIC)) {
1968 xfs_warn(mp, "Bad AGFL block magic!"); 1968 xfs_warn(mp, "Bad AGFL block magic!");
1969 ASSERT(0); 1969 ASSERT(0);
1970 break; 1970 break;
1971 } 1971 }
1972 bp->b_ops = &xfs_agfl_buf_ops; 1972 bp->b_ops = &xfs_agfl_buf_ops;
1973 break; 1973 break;
1974 case XFS_BLF_AGI_BUF: 1974 case XFS_BLF_AGI_BUF:
1975 if (*(__be32 *)bp->b_addr != cpu_to_be32(XFS_AGI_MAGIC)) { 1975 if (*(__be32 *)bp->b_addr != cpu_to_be32(XFS_AGI_MAGIC)) {
1976 xfs_warn(mp, "Bad AGI block magic!"); 1976 xfs_warn(mp, "Bad AGI block magic!");
1977 ASSERT(0); 1977 ASSERT(0);
1978 break; 1978 break;
1979 } 1979 }
1980 bp->b_ops = &xfs_agi_buf_ops; 1980 bp->b_ops = &xfs_agi_buf_ops;
1981 break; 1981 break;
1982 case XFS_BLF_UDQUOT_BUF:
1983 case XFS_BLF_PDQUOT_BUF:
1984 case XFS_BLF_GDQUOT_BUF:
1985 if (*(__be16 *)bp->b_addr != cpu_to_be16(XFS_DQUOT_MAGIC)) {
1986 xfs_warn(mp, "Bad DQUOT block magic!");
1987 ASSERT(0);
1988 break;
1989 }
1990 bp->b_ops = &xfs_dquot_buf_ops;
1991 break;
1982 default: 1992 default:
1983 break; 1993 break;
1984 } 1994 }
1985 } 1995 }
1986 1996
1987 /* 1997 /*
1988 * Do some primitive error checking on ondisk dquot data structures. 1998 * Do some primitive error checking on ondisk dquot data structures.
1989 */ 1999 */
1990 int 2000 int
1991 xfs_qm_dqcheck( 2001 xfs_qm_dqcheck(
1992 struct xfs_mount *mp, 2002 struct xfs_mount *mp,
1993 xfs_disk_dquot_t *ddq, 2003 xfs_disk_dquot_t *ddq,
1994 xfs_dqid_t id, 2004 xfs_dqid_t id,
1995 uint type, /* used only when IO_dorepair is true */ 2005 uint type, /* used only when IO_dorepair is true */
1996 uint flags, 2006 uint flags,
1997 char *str) 2007 char *str)
1998 { 2008 {
1999 xfs_dqblk_t *d = (xfs_dqblk_t *)ddq; 2009 xfs_dqblk_t *d = (xfs_dqblk_t *)ddq;
2000 int errs = 0; 2010 int errs = 0;
2001 2011
2002 /* 2012 /*
2003 * We can encounter an uninitialized dquot buffer for 2 reasons: 2013 * We can encounter an uninitialized dquot buffer for 2 reasons:
2004 * 1. If we crash while deleting the quotainode(s), and those blks got 2014 * 1. If we crash while deleting the quotainode(s), and those blks got
2005 * used for user data. This is because we take the path of regular 2015 * used for user data. This is because we take the path of regular
2006 * file deletion; however, the size field of quotainodes is never 2016 * file deletion; however, the size field of quotainodes is never
2007 * updated, so all the tricks that we play in itruncate_finish 2017 * updated, so all the tricks that we play in itruncate_finish
2008 * don't quite matter. 2018 * don't quite matter.
2009 * 2019 *
2010 * 2. We don't play the quota buffers when there's a quotaoff logitem. 2020 * 2. We don't play the quota buffers when there's a quotaoff logitem.
2011 * But the allocation will be replayed so we'll end up with an 2021 * But the allocation will be replayed so we'll end up with an
2012 * uninitialized quota block. 2022 * uninitialized quota block.
2013 * 2023 *
2014 * This is all fine; things are still consistent, and we haven't lost 2024 * This is all fine; things are still consistent, and we haven't lost
2015 * any quota information. Just don't complain about bad dquot blks. 2025 * any quota information. Just don't complain about bad dquot blks.
2016 */ 2026 */
2017 if (ddq->d_magic != cpu_to_be16(XFS_DQUOT_MAGIC)) { 2027 if (ddq->d_magic != cpu_to_be16(XFS_DQUOT_MAGIC)) {
2018 if (flags & XFS_QMOPT_DOWARN) 2028 if (flags & XFS_QMOPT_DOWARN)
2019 xfs_alert(mp, 2029 xfs_alert(mp,
2020 "%s : XFS dquot ID 0x%x, magic 0x%x != 0x%x", 2030 "%s : XFS dquot ID 0x%x, magic 0x%x != 0x%x",
2021 str, id, be16_to_cpu(ddq->d_magic), XFS_DQUOT_MAGIC); 2031 str, id, be16_to_cpu(ddq->d_magic), XFS_DQUOT_MAGIC);
2022 errs++; 2032 errs++;
2023 } 2033 }
2024 if (ddq->d_version != XFS_DQUOT_VERSION) { 2034 if (ddq->d_version != XFS_DQUOT_VERSION) {
2025 if (flags & XFS_QMOPT_DOWARN) 2035 if (flags & XFS_QMOPT_DOWARN)
2026 xfs_alert(mp, 2036 xfs_alert(mp,
2027 "%s : XFS dquot ID 0x%x, version 0x%x != 0x%x", 2037 "%s : XFS dquot ID 0x%x, version 0x%x != 0x%x",
2028 str, id, ddq->d_version, XFS_DQUOT_VERSION); 2038 str, id, ddq->d_version, XFS_DQUOT_VERSION);
2029 errs++; 2039 errs++;
2030 } 2040 }
2031 2041
2032 if (ddq->d_flags != XFS_DQ_USER && 2042 if (ddq->d_flags != XFS_DQ_USER &&
2033 ddq->d_flags != XFS_DQ_PROJ && 2043 ddq->d_flags != XFS_DQ_PROJ &&
2034 ddq->d_flags != XFS_DQ_GROUP) { 2044 ddq->d_flags != XFS_DQ_GROUP) {
2035 if (flags & XFS_QMOPT_DOWARN) 2045 if (flags & XFS_QMOPT_DOWARN)
2036 xfs_alert(mp, 2046 xfs_alert(mp,
2037 "%s : XFS dquot ID 0x%x, unknown flags 0x%x", 2047 "%s : XFS dquot ID 0x%x, unknown flags 0x%x",
2038 str, id, ddq->d_flags); 2048 str, id, ddq->d_flags);
2039 errs++; 2049 errs++;
2040 } 2050 }
2041 2051
2042 if (id != -1 && id != be32_to_cpu(ddq->d_id)) { 2052 if (id != -1 && id != be32_to_cpu(ddq->d_id)) {
2043 if (flags & XFS_QMOPT_DOWARN) 2053 if (flags & XFS_QMOPT_DOWARN)
2044 xfs_alert(mp, 2054 xfs_alert(mp,
2045 "%s : ondisk-dquot 0x%p, ID mismatch: " 2055 "%s : ondisk-dquot 0x%p, ID mismatch: "
2046 "0x%x expected, found id 0x%x", 2056 "0x%x expected, found id 0x%x",
2047 str, ddq, id, be32_to_cpu(ddq->d_id)); 2057 str, ddq, id, be32_to_cpu(ddq->d_id));
2048 errs++; 2058 errs++;
2049 } 2059 }
2050 2060
2051 if (!errs && ddq->d_id) { 2061 if (!errs && ddq->d_id) {
2052 if (ddq->d_blk_softlimit && 2062 if (ddq->d_blk_softlimit &&
2053 be64_to_cpu(ddq->d_bcount) > 2063 be64_to_cpu(ddq->d_bcount) >
2054 be64_to_cpu(ddq->d_blk_softlimit)) { 2064 be64_to_cpu(ddq->d_blk_softlimit)) {
2055 if (!ddq->d_btimer) { 2065 if (!ddq->d_btimer) {
2056 if (flags & XFS_QMOPT_DOWARN) 2066 if (flags & XFS_QMOPT_DOWARN)
2057 xfs_alert(mp, 2067 xfs_alert(mp,
2058 "%s : Dquot ID 0x%x (0x%p) BLK TIMER NOT STARTED", 2068 "%s : Dquot ID 0x%x (0x%p) BLK TIMER NOT STARTED",
2059 str, (int)be32_to_cpu(ddq->d_id), ddq); 2069 str, (int)be32_to_cpu(ddq->d_id), ddq);
2060 errs++; 2070 errs++;
2061 } 2071 }
2062 } 2072 }
2063 if (ddq->d_ino_softlimit && 2073 if (ddq->d_ino_softlimit &&
2064 be64_to_cpu(ddq->d_icount) > 2074 be64_to_cpu(ddq->d_icount) >
2065 be64_to_cpu(ddq->d_ino_softlimit)) { 2075 be64_to_cpu(ddq->d_ino_softlimit)) {
2066 if (!ddq->d_itimer) { 2076 if (!ddq->d_itimer) {
2067 if (flags & XFS_QMOPT_DOWARN) 2077 if (flags & XFS_QMOPT_DOWARN)
2068 xfs_alert(mp, 2078 xfs_alert(mp,
2069 "%s : Dquot ID 0x%x (0x%p) INODE TIMER NOT STARTED", 2079 "%s : Dquot ID 0x%x (0x%p) INODE TIMER NOT STARTED",
2070 str, (int)be32_to_cpu(ddq->d_id), ddq); 2080 str, (int)be32_to_cpu(ddq->d_id), ddq);
2071 errs++; 2081 errs++;
2072 } 2082 }
2073 } 2083 }
2074 if (ddq->d_rtb_softlimit && 2084 if (ddq->d_rtb_softlimit &&
2075 be64_to_cpu(ddq->d_rtbcount) > 2085 be64_to_cpu(ddq->d_rtbcount) >
2076 be64_to_cpu(ddq->d_rtb_softlimit)) { 2086 be64_to_cpu(ddq->d_rtb_softlimit)) {
2077 if (!ddq->d_rtbtimer) { 2087 if (!ddq->d_rtbtimer) {
2078 if (flags & XFS_QMOPT_DOWARN) 2088 if (flags & XFS_QMOPT_DOWARN)
2079 xfs_alert(mp, 2089 xfs_alert(mp,
2080 "%s : Dquot ID 0x%x (0x%p) RTBLK TIMER NOT STARTED", 2090 "%s : Dquot ID 0x%x (0x%p) RTBLK TIMER NOT STARTED",
2081 str, (int)be32_to_cpu(ddq->d_id), ddq); 2091 str, (int)be32_to_cpu(ddq->d_id), ddq);
2082 errs++; 2092 errs++;
2083 } 2093 }
2084 } 2094 }
2085 } 2095 }
2086 2096
2087 if (!errs || !(flags & XFS_QMOPT_DQREPAIR)) 2097 if (!errs || !(flags & XFS_QMOPT_DQREPAIR))
2088 return errs; 2098 return errs;
2089 2099
2090 if (flags & XFS_QMOPT_DOWARN) 2100 if (flags & XFS_QMOPT_DOWARN)
2091 xfs_notice(mp, "Re-initializing dquot ID 0x%x", id); 2101 xfs_notice(mp, "Re-initializing dquot ID 0x%x", id);
2092 2102
2093 /* 2103 /*
2094 * Typically, a repair is only requested by quotacheck. 2104 * Typically, a repair is only requested by quotacheck.
2095 */ 2105 */
2096 ASSERT(id != -1); 2106 ASSERT(id != -1);
2097 ASSERT(flags & XFS_QMOPT_DQREPAIR); 2107 ASSERT(flags & XFS_QMOPT_DQREPAIR);
2098 memset(d, 0, sizeof(xfs_dqblk_t)); 2108 memset(d, 0, sizeof(xfs_dqblk_t));
2099 2109
2100 d->dd_diskdq.d_magic = cpu_to_be16(XFS_DQUOT_MAGIC); 2110 d->dd_diskdq.d_magic = cpu_to_be16(XFS_DQUOT_MAGIC);
2101 d->dd_diskdq.d_version = XFS_DQUOT_VERSION; 2111 d->dd_diskdq.d_version = XFS_DQUOT_VERSION;
2102 d->dd_diskdq.d_flags = type; 2112 d->dd_diskdq.d_flags = type;
2103 d->dd_diskdq.d_id = cpu_to_be32(id); 2113 d->dd_diskdq.d_id = cpu_to_be32(id);
2104 2114
2105 return errs; 2115 return errs;
2106 } 2116 }
2107 2117
2108 /* 2118 /*
2109 * Perform a dquot buffer recovery. 2119 * Perform a dquot buffer recovery.
2110 * Simple algorithm: if we have found a QUOTAOFF logitem of the same type 2120 * Simple algorithm: if we have found a QUOTAOFF logitem of the same type
2111 * (ie. USR or GRP), then just toss this buffer away; don't recover it. 2121 * (ie. USR or GRP), then just toss this buffer away; don't recover it.
2112 * Else, treat it as a regular buffer and do recovery. 2122 * Else, treat it as a regular buffer and do recovery.
2113 */ 2123 */
2114 STATIC void 2124 STATIC void
2115 xlog_recover_do_dquot_buffer( 2125 xlog_recover_do_dquot_buffer(
2116 struct xfs_mount *mp, 2126 struct xfs_mount *mp,
2117 struct xlog *log, 2127 struct xlog *log,
2118 struct xlog_recover_item *item, 2128 struct xlog_recover_item *item,
2119 struct xfs_buf *bp, 2129 struct xfs_buf *bp,
2120 struct xfs_buf_log_format *buf_f) 2130 struct xfs_buf_log_format *buf_f)
2121 { 2131 {
2122 uint type; 2132 uint type;
2123 2133
2124 trace_xfs_log_recover_buf_dquot_buf(log, buf_f); 2134 trace_xfs_log_recover_buf_dquot_buf(log, buf_f);
2125 2135
2126 /* 2136 /*
2127 * Filesystems are required to send in quota flags at mount time. 2137 * Filesystems are required to send in quota flags at mount time.
2128 */ 2138 */
2129 if (mp->m_qflags == 0) { 2139 if (mp->m_qflags == 0) {
2130 return; 2140 return;
2131 } 2141 }
2132 2142
2133 type = 0; 2143 type = 0;
2134 if (buf_f->blf_flags & XFS_BLF_UDQUOT_BUF) 2144 if (buf_f->blf_flags & XFS_BLF_UDQUOT_BUF)
2135 type |= XFS_DQ_USER; 2145 type |= XFS_DQ_USER;
2136 if (buf_f->blf_flags & XFS_BLF_PDQUOT_BUF) 2146 if (buf_f->blf_flags & XFS_BLF_PDQUOT_BUF)
2137 type |= XFS_DQ_PROJ; 2147 type |= XFS_DQ_PROJ;
2138 if (buf_f->blf_flags & XFS_BLF_GDQUOT_BUF) 2148 if (buf_f->blf_flags & XFS_BLF_GDQUOT_BUF)
2139 type |= XFS_DQ_GROUP; 2149 type |= XFS_DQ_GROUP;
2140 /* 2150 /*
2141 * This type of quotas was turned off, so ignore this buffer 2151 * This type of quotas was turned off, so ignore this buffer
2142 */ 2152 */
2143 if (log->l_quotaoffs_flag & type) 2153 if (log->l_quotaoffs_flag & type)
2144 return; 2154 return;
2145 2155
2146 xlog_recover_do_reg_buffer(mp, item, bp, buf_f); 2156 xlog_recover_do_reg_buffer(mp, item, bp, buf_f);
2147 } 2157 }
2148 2158
2149 /* 2159 /*
2150 * This routine replays a modification made to a buffer at runtime. 2160 * This routine replays a modification made to a buffer at runtime.
2151 * There are actually two types of buffer, regular and inode, which 2161 * There are actually two types of buffer, regular and inode, which
2152 * are handled differently. Inode buffers are handled differently 2162 * are handled differently. Inode buffers are handled differently
2153 * in that we only recover a specific set of data from them, namely 2163 * in that we only recover a specific set of data from them, namely
2154 * the inode di_next_unlinked fields. This is because all other inode 2164 * the inode di_next_unlinked fields. This is because all other inode
2155 * data is actually logged via inode records and any data we replay 2165 * data is actually logged via inode records and any data we replay
2156 * here which overlaps that may be stale. 2166 * here which overlaps that may be stale.
2157 * 2167 *
2158 * When meta-data buffers are freed at run time we log a buffer item 2168 * When meta-data buffers are freed at run time we log a buffer item
2159 * with the XFS_BLF_CANCEL bit set to indicate that previous copies 2169 * with the XFS_BLF_CANCEL bit set to indicate that previous copies
2160 * of the buffer in the log should not be replayed at recovery time. 2170 * of the buffer in the log should not be replayed at recovery time.
2161 * This is so that if the blocks covered by the buffer are reused for 2171 * This is so that if the blocks covered by the buffer are reused for
2162 * file data before we crash we don't end up replaying old, freed 2172 * file data before we crash we don't end up replaying old, freed
2163 * meta-data into a user's file. 2173 * meta-data into a user's file.
2164 * 2174 *
2165 * To handle the cancellation of buffer log items, we make two passes 2175 * To handle the cancellation of buffer log items, we make two passes
2166 * over the log during recovery. During the first we build a table of 2176 * over the log during recovery. During the first we build a table of
2167 * those buffers which have been cancelled, and during the second we 2177 * those buffers which have been cancelled, and during the second we
2168 * only replay those buffers which do not have corresponding cancel 2178 * only replay those buffers which do not have corresponding cancel
2169 * records in the table. See xlog_recover_do_buffer_pass[1,2] above 2179 * records in the table. See xlog_recover_do_buffer_pass[1,2] above
2170 * for more details on the implementation of the table of cancel records. 2180 * for more details on the implementation of the table of cancel records.
2171 */ 2181 */
2172 STATIC int 2182 STATIC int
2173 xlog_recover_buffer_pass2( 2183 xlog_recover_buffer_pass2(
2174 struct xlog *log, 2184 struct xlog *log,
2175 struct list_head *buffer_list, 2185 struct list_head *buffer_list,
2176 struct xlog_recover_item *item) 2186 struct xlog_recover_item *item)
2177 { 2187 {
2178 xfs_buf_log_format_t *buf_f = item->ri_buf[0].i_addr; 2188 xfs_buf_log_format_t *buf_f = item->ri_buf[0].i_addr;
2179 xfs_mount_t *mp = log->l_mp; 2189 xfs_mount_t *mp = log->l_mp;
2180 xfs_buf_t *bp; 2190 xfs_buf_t *bp;
2181 int error; 2191 int error;
2182 uint buf_flags; 2192 uint buf_flags;
2183 2193
2184 /* 2194 /*
2185 * In this pass we only want to recover all the buffers which have 2195 * In this pass we only want to recover all the buffers which have
2186 * not been cancelled and are not cancellation buffers themselves. 2196 * not been cancelled and are not cancellation buffers themselves.
2187 */ 2197 */
2188 if (xlog_check_buffer_cancelled(log, buf_f->blf_blkno, 2198 if (xlog_check_buffer_cancelled(log, buf_f->blf_blkno,
2189 buf_f->blf_len, buf_f->blf_flags)) { 2199 buf_f->blf_len, buf_f->blf_flags)) {
2190 trace_xfs_log_recover_buf_cancel(log, buf_f); 2200 trace_xfs_log_recover_buf_cancel(log, buf_f);
2191 return 0; 2201 return 0;
2192 } 2202 }
2193 2203
2194 trace_xfs_log_recover_buf_recover(log, buf_f); 2204 trace_xfs_log_recover_buf_recover(log, buf_f);
2195 2205
2196 buf_flags = 0; 2206 buf_flags = 0;
2197 if (buf_f->blf_flags & XFS_BLF_INODE_BUF) 2207 if (buf_f->blf_flags & XFS_BLF_INODE_BUF)
2198 buf_flags |= XBF_UNMAPPED; 2208 buf_flags |= XBF_UNMAPPED;
2199 2209
2200 bp = xfs_buf_read(mp->m_ddev_targp, buf_f->blf_blkno, buf_f->blf_len, 2210 bp = xfs_buf_read(mp->m_ddev_targp, buf_f->blf_blkno, buf_f->blf_len,
2201 buf_flags, NULL); 2211 buf_flags, NULL);
2202 if (!bp) 2212 if (!bp)
2203 return XFS_ERROR(ENOMEM); 2213 return XFS_ERROR(ENOMEM);
2204 error = bp->b_error; 2214 error = bp->b_error;
2205 if (error) { 2215 if (error) {
2206 xfs_buf_ioerror_alert(bp, "xlog_recover_do..(read#1)"); 2216 xfs_buf_ioerror_alert(bp, "xlog_recover_do..(read#1)");
2207 xfs_buf_relse(bp); 2217 xfs_buf_relse(bp);
2208 return error; 2218 return error;
2209 } 2219 }
2210 2220
2211 if (buf_f->blf_flags & XFS_BLF_INODE_BUF) { 2221 if (buf_f->blf_flags & XFS_BLF_INODE_BUF) {
2212 error = xlog_recover_do_inode_buffer(mp, item, bp, buf_f); 2222 error = xlog_recover_do_inode_buffer(mp, item, bp, buf_f);
2213 } else if (buf_f->blf_flags & 2223 } else if (buf_f->blf_flags &
2214 (XFS_BLF_UDQUOT_BUF|XFS_BLF_PDQUOT_BUF|XFS_BLF_GDQUOT_BUF)) { 2224 (XFS_BLF_UDQUOT_BUF|XFS_BLF_PDQUOT_BUF|XFS_BLF_GDQUOT_BUF)) {
2215 xlog_recover_do_dquot_buffer(mp, log, item, bp, buf_f); 2225 xlog_recover_do_dquot_buffer(mp, log, item, bp, buf_f);
2216 } else { 2226 } else {
2217 xlog_recover_do_reg_buffer(mp, item, bp, buf_f); 2227 xlog_recover_do_reg_buffer(mp, item, bp, buf_f);
2218 } 2228 }
2219 if (error) 2229 if (error)
2220 return XFS_ERROR(error); 2230 return XFS_ERROR(error);
2221 2231
2222 /* 2232 /*
2223 * Perform delayed write on the buffer. Asynchronous writes will be 2233 * Perform delayed write on the buffer. Asynchronous writes will be
2224 * slower when taking into account all the buffers to be flushed. 2234 * slower when taking into account all the buffers to be flushed.
2225 * 2235 *
2226 * Also make sure that only inode buffers with good sizes stay in 2236 * Also make sure that only inode buffers with good sizes stay in
2227 * the buffer cache. The kernel moves inodes in buffers of 1 block 2237 * the buffer cache. The kernel moves inodes in buffers of 1 block
2228 * or XFS_INODE_CLUSTER_SIZE bytes, whichever is bigger. The inode 2238 * or XFS_INODE_CLUSTER_SIZE bytes, whichever is bigger. The inode
2229 * buffers in the log can be a different size if the log was generated 2239 * buffers in the log can be a different size if the log was generated
2230 * by an older kernel using unclustered inode buffers or a newer kernel 2240 * by an older kernel using unclustered inode buffers or a newer kernel
2231 * running with a different inode cluster size. Regardless, if the 2241 * running with a different inode cluster size. Regardless, if the
2232 * the inode buffer size isn't MAX(blocksize, XFS_INODE_CLUSTER_SIZE) 2242 * the inode buffer size isn't MAX(blocksize, XFS_INODE_CLUSTER_SIZE)
2233 * for *our* value of XFS_INODE_CLUSTER_SIZE, then we need to keep 2243 * for *our* value of XFS_INODE_CLUSTER_SIZE, then we need to keep
2234 * the buffer out of the buffer cache so that the buffer won't 2244 * the buffer out of the buffer cache so that the buffer won't
2235 * overlap with future reads of those inodes. 2245 * overlap with future reads of those inodes.
2236 */ 2246 */
2237 if (XFS_DINODE_MAGIC == 2247 if (XFS_DINODE_MAGIC ==
2238 be16_to_cpu(*((__be16 *)xfs_buf_offset(bp, 0))) && 2248 be16_to_cpu(*((__be16 *)xfs_buf_offset(bp, 0))) &&
2239 (BBTOB(bp->b_io_length) != MAX(log->l_mp->m_sb.sb_blocksize, 2249 (BBTOB(bp->b_io_length) != MAX(log->l_mp->m_sb.sb_blocksize,
2240 (__uint32_t)XFS_INODE_CLUSTER_SIZE(log->l_mp)))) { 2250 (__uint32_t)XFS_INODE_CLUSTER_SIZE(log->l_mp)))) {
2241 xfs_buf_stale(bp); 2251 xfs_buf_stale(bp);
2242 error = xfs_bwrite(bp); 2252 error = xfs_bwrite(bp);
2243 } else { 2253 } else {
2244 ASSERT(bp->b_target->bt_mount == mp); 2254 ASSERT(bp->b_target->bt_mount == mp);
2245 bp->b_iodone = xlog_recover_iodone; 2255 bp->b_iodone = xlog_recover_iodone;
2246 xfs_buf_delwri_queue(bp, buffer_list); 2256 xfs_buf_delwri_queue(bp, buffer_list);
2247 } 2257 }
2248 2258
2249 xfs_buf_relse(bp); 2259 xfs_buf_relse(bp);
2250 return error; 2260 return error;
2251 } 2261 }
2252 2262
2253 STATIC int 2263 STATIC int
2254 xlog_recover_inode_pass2( 2264 xlog_recover_inode_pass2(
2255 struct xlog *log, 2265 struct xlog *log,
2256 struct list_head *buffer_list, 2266 struct list_head *buffer_list,
2257 struct xlog_recover_item *item) 2267 struct xlog_recover_item *item)
2258 { 2268 {
2259 xfs_inode_log_format_t *in_f; 2269 xfs_inode_log_format_t *in_f;
2260 xfs_mount_t *mp = log->l_mp; 2270 xfs_mount_t *mp = log->l_mp;
2261 xfs_buf_t *bp; 2271 xfs_buf_t *bp;
2262 xfs_dinode_t *dip; 2272 xfs_dinode_t *dip;
2263 int len; 2273 int len;
2264 xfs_caddr_t src; 2274 xfs_caddr_t src;
2265 xfs_caddr_t dest; 2275 xfs_caddr_t dest;
2266 int error; 2276 int error;
2267 int attr_index; 2277 int attr_index;
2268 uint fields; 2278 uint fields;
2269 xfs_icdinode_t *dicp; 2279 xfs_icdinode_t *dicp;
2270 int need_free = 0; 2280 int need_free = 0;
2271 2281
2272 if (item->ri_buf[0].i_len == sizeof(xfs_inode_log_format_t)) { 2282 if (item->ri_buf[0].i_len == sizeof(xfs_inode_log_format_t)) {
2273 in_f = item->ri_buf[0].i_addr; 2283 in_f = item->ri_buf[0].i_addr;
2274 } else { 2284 } else {
2275 in_f = kmem_alloc(sizeof(xfs_inode_log_format_t), KM_SLEEP); 2285 in_f = kmem_alloc(sizeof(xfs_inode_log_format_t), KM_SLEEP);
2276 need_free = 1; 2286 need_free = 1;
2277 error = xfs_inode_item_format_convert(&item->ri_buf[0], in_f); 2287 error = xfs_inode_item_format_convert(&item->ri_buf[0], in_f);
2278 if (error) 2288 if (error)
2279 goto error; 2289 goto error;
2280 } 2290 }
2281 2291
2282 /* 2292 /*
2283 * Inode buffers can be freed, look out for it, 2293 * Inode buffers can be freed, look out for it,
2284 * and do not replay the inode. 2294 * and do not replay the inode.
2285 */ 2295 */
2286 if (xlog_check_buffer_cancelled(log, in_f->ilf_blkno, 2296 if (xlog_check_buffer_cancelled(log, in_f->ilf_blkno,
2287 in_f->ilf_len, 0)) { 2297 in_f->ilf_len, 0)) {
2288 error = 0; 2298 error = 0;
2289 trace_xfs_log_recover_inode_cancel(log, in_f); 2299 trace_xfs_log_recover_inode_cancel(log, in_f);
2290 goto error; 2300 goto error;
2291 } 2301 }
2292 trace_xfs_log_recover_inode_recover(log, in_f); 2302 trace_xfs_log_recover_inode_recover(log, in_f);
2293 2303
2294 bp = xfs_buf_read(mp->m_ddev_targp, in_f->ilf_blkno, in_f->ilf_len, 0, 2304 bp = xfs_buf_read(mp->m_ddev_targp, in_f->ilf_blkno, in_f->ilf_len, 0,
2295 NULL); 2305 NULL);
2296 if (!bp) { 2306 if (!bp) {
2297 error = ENOMEM; 2307 error = ENOMEM;
2298 goto error; 2308 goto error;
2299 } 2309 }
2300 error = bp->b_error; 2310 error = bp->b_error;
2301 if (error) { 2311 if (error) {
2302 xfs_buf_ioerror_alert(bp, "xlog_recover_do..(read#2)"); 2312 xfs_buf_ioerror_alert(bp, "xlog_recover_do..(read#2)");
2303 xfs_buf_relse(bp); 2313 xfs_buf_relse(bp);
2304 goto error; 2314 goto error;
2305 } 2315 }
2306 ASSERT(in_f->ilf_fields & XFS_ILOG_CORE); 2316 ASSERT(in_f->ilf_fields & XFS_ILOG_CORE);
2307 dip = (xfs_dinode_t *)xfs_buf_offset(bp, in_f->ilf_boffset); 2317 dip = (xfs_dinode_t *)xfs_buf_offset(bp, in_f->ilf_boffset);
2308 2318
2309 /* 2319 /*
2310 * Make sure the place we're flushing out to really looks 2320 * Make sure the place we're flushing out to really looks
2311 * like an inode! 2321 * like an inode!
2312 */ 2322 */
2313 if (unlikely(dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC))) { 2323 if (unlikely(dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC))) {
2314 xfs_buf_relse(bp); 2324 xfs_buf_relse(bp);
2315 xfs_alert(mp, 2325 xfs_alert(mp,
2316 "%s: Bad inode magic number, dip = 0x%p, dino bp = 0x%p, ino = %Ld", 2326 "%s: Bad inode magic number, dip = 0x%p, dino bp = 0x%p, ino = %Ld",
2317 __func__, dip, bp, in_f->ilf_ino); 2327 __func__, dip, bp, in_f->ilf_ino);
2318 XFS_ERROR_REPORT("xlog_recover_inode_pass2(1)", 2328 XFS_ERROR_REPORT("xlog_recover_inode_pass2(1)",
2319 XFS_ERRLEVEL_LOW, mp); 2329 XFS_ERRLEVEL_LOW, mp);
2320 error = EFSCORRUPTED; 2330 error = EFSCORRUPTED;
2321 goto error; 2331 goto error;
2322 } 2332 }
2323 dicp = item->ri_buf[1].i_addr; 2333 dicp = item->ri_buf[1].i_addr;
2324 if (unlikely(dicp->di_magic != XFS_DINODE_MAGIC)) { 2334 if (unlikely(dicp->di_magic != XFS_DINODE_MAGIC)) {
2325 xfs_buf_relse(bp); 2335 xfs_buf_relse(bp);
2326 xfs_alert(mp, 2336 xfs_alert(mp,
2327 "%s: Bad inode log record, rec ptr 0x%p, ino %Ld", 2337 "%s: Bad inode log record, rec ptr 0x%p, ino %Ld",
2328 __func__, item, in_f->ilf_ino); 2338 __func__, item, in_f->ilf_ino);
2329 XFS_ERROR_REPORT("xlog_recover_inode_pass2(2)", 2339 XFS_ERROR_REPORT("xlog_recover_inode_pass2(2)",
2330 XFS_ERRLEVEL_LOW, mp); 2340 XFS_ERRLEVEL_LOW, mp);
2331 error = EFSCORRUPTED; 2341 error = EFSCORRUPTED;
2332 goto error; 2342 goto error;
2333 } 2343 }
2334 2344
2335 /* Skip replay when the on disk inode is newer than the log one */ 2345 /* Skip replay when the on disk inode is newer than the log one */
2336 if (dicp->di_flushiter < be16_to_cpu(dip->di_flushiter)) { 2346 if (dicp->di_flushiter < be16_to_cpu(dip->di_flushiter)) {
2337 /* 2347 /*
2338 * Deal with the wrap case, DI_MAX_FLUSH is less 2348 * Deal with the wrap case, DI_MAX_FLUSH is less
2339 * than smaller numbers 2349 * than smaller numbers
2340 */ 2350 */
2341 if (be16_to_cpu(dip->di_flushiter) == DI_MAX_FLUSH && 2351 if (be16_to_cpu(dip->di_flushiter) == DI_MAX_FLUSH &&
2342 dicp->di_flushiter < (DI_MAX_FLUSH >> 1)) { 2352 dicp->di_flushiter < (DI_MAX_FLUSH >> 1)) {
2343 /* do nothing */ 2353 /* do nothing */
2344 } else { 2354 } else {
2345 xfs_buf_relse(bp); 2355 xfs_buf_relse(bp);
2346 trace_xfs_log_recover_inode_skip(log, in_f); 2356 trace_xfs_log_recover_inode_skip(log, in_f);
2347 error = 0; 2357 error = 0;
2348 goto error; 2358 goto error;
2349 } 2359 }
2350 } 2360 }
2351 /* Take the opportunity to reset the flush iteration count */ 2361 /* Take the opportunity to reset the flush iteration count */
2352 dicp->di_flushiter = 0; 2362 dicp->di_flushiter = 0;
2353 2363
2354 if (unlikely(S_ISREG(dicp->di_mode))) { 2364 if (unlikely(S_ISREG(dicp->di_mode))) {
2355 if ((dicp->di_format != XFS_DINODE_FMT_EXTENTS) && 2365 if ((dicp->di_format != XFS_DINODE_FMT_EXTENTS) &&
2356 (dicp->di_format != XFS_DINODE_FMT_BTREE)) { 2366 (dicp->di_format != XFS_DINODE_FMT_BTREE)) {
2357 XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(3)", 2367 XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(3)",
2358 XFS_ERRLEVEL_LOW, mp, dicp); 2368 XFS_ERRLEVEL_LOW, mp, dicp);
2359 xfs_buf_relse(bp); 2369 xfs_buf_relse(bp);
2360 xfs_alert(mp, 2370 xfs_alert(mp,
2361 "%s: Bad regular inode log record, rec ptr 0x%p, " 2371 "%s: Bad regular inode log record, rec ptr 0x%p, "
2362 "ino ptr = 0x%p, ino bp = 0x%p, ino %Ld", 2372 "ino ptr = 0x%p, ino bp = 0x%p, ino %Ld",
2363 __func__, item, dip, bp, in_f->ilf_ino); 2373 __func__, item, dip, bp, in_f->ilf_ino);
2364 error = EFSCORRUPTED; 2374 error = EFSCORRUPTED;
2365 goto error; 2375 goto error;
2366 } 2376 }
2367 } else if (unlikely(S_ISDIR(dicp->di_mode))) { 2377 } else if (unlikely(S_ISDIR(dicp->di_mode))) {
2368 if ((dicp->di_format != XFS_DINODE_FMT_EXTENTS) && 2378 if ((dicp->di_format != XFS_DINODE_FMT_EXTENTS) &&
2369 (dicp->di_format != XFS_DINODE_FMT_BTREE) && 2379 (dicp->di_format != XFS_DINODE_FMT_BTREE) &&
2370 (dicp->di_format != XFS_DINODE_FMT_LOCAL)) { 2380 (dicp->di_format != XFS_DINODE_FMT_LOCAL)) {
2371 XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(4)", 2381 XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(4)",
2372 XFS_ERRLEVEL_LOW, mp, dicp); 2382 XFS_ERRLEVEL_LOW, mp, dicp);
2373 xfs_buf_relse(bp); 2383 xfs_buf_relse(bp);
2374 xfs_alert(mp, 2384 xfs_alert(mp,
2375 "%s: Bad dir inode log record, rec ptr 0x%p, " 2385 "%s: Bad dir inode log record, rec ptr 0x%p, "
2376 "ino ptr = 0x%p, ino bp = 0x%p, ino %Ld", 2386 "ino ptr = 0x%p, ino bp = 0x%p, ino %Ld",
2377 __func__, item, dip, bp, in_f->ilf_ino); 2387 __func__, item, dip, bp, in_f->ilf_ino);
2378 error = EFSCORRUPTED; 2388 error = EFSCORRUPTED;
2379 goto error; 2389 goto error;
2380 } 2390 }
2381 } 2391 }
2382 if (unlikely(dicp->di_nextents + dicp->di_anextents > dicp->di_nblocks)){ 2392 if (unlikely(dicp->di_nextents + dicp->di_anextents > dicp->di_nblocks)){
2383 XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(5)", 2393 XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(5)",
2384 XFS_ERRLEVEL_LOW, mp, dicp); 2394 XFS_ERRLEVEL_LOW, mp, dicp);
2385 xfs_buf_relse(bp); 2395 xfs_buf_relse(bp);
2386 xfs_alert(mp, 2396 xfs_alert(mp,
2387 "%s: Bad inode log record, rec ptr 0x%p, dino ptr 0x%p, " 2397 "%s: Bad inode log record, rec ptr 0x%p, dino ptr 0x%p, "
2388 "dino bp 0x%p, ino %Ld, total extents = %d, nblocks = %Ld", 2398 "dino bp 0x%p, ino %Ld, total extents = %d, nblocks = %Ld",
2389 __func__, item, dip, bp, in_f->ilf_ino, 2399 __func__, item, dip, bp, in_f->ilf_ino,
2390 dicp->di_nextents + dicp->di_anextents, 2400 dicp->di_nextents + dicp->di_anextents,
2391 dicp->di_nblocks); 2401 dicp->di_nblocks);
2392 error = EFSCORRUPTED; 2402 error = EFSCORRUPTED;
2393 goto error; 2403 goto error;
2394 } 2404 }
2395 if (unlikely(dicp->di_forkoff > mp->m_sb.sb_inodesize)) { 2405 if (unlikely(dicp->di_forkoff > mp->m_sb.sb_inodesize)) {
2396 XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(6)", 2406 XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(6)",
2397 XFS_ERRLEVEL_LOW, mp, dicp); 2407 XFS_ERRLEVEL_LOW, mp, dicp);
2398 xfs_buf_relse(bp); 2408 xfs_buf_relse(bp);
2399 xfs_alert(mp, 2409 xfs_alert(mp,
2400 "%s: Bad inode log record, rec ptr 0x%p, dino ptr 0x%p, " 2410 "%s: Bad inode log record, rec ptr 0x%p, dino ptr 0x%p, "
2401 "dino bp 0x%p, ino %Ld, forkoff 0x%x", __func__, 2411 "dino bp 0x%p, ino %Ld, forkoff 0x%x", __func__,
2402 item, dip, bp, in_f->ilf_ino, dicp->di_forkoff); 2412 item, dip, bp, in_f->ilf_ino, dicp->di_forkoff);
2403 error = EFSCORRUPTED; 2413 error = EFSCORRUPTED;
2404 goto error; 2414 goto error;
2405 } 2415 }
2406 if (unlikely(item->ri_buf[1].i_len > sizeof(struct xfs_icdinode))) { 2416 if (unlikely(item->ri_buf[1].i_len > sizeof(struct xfs_icdinode))) {
2407 XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(7)", 2417 XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(7)",
2408 XFS_ERRLEVEL_LOW, mp, dicp); 2418 XFS_ERRLEVEL_LOW, mp, dicp);
2409 xfs_buf_relse(bp); 2419 xfs_buf_relse(bp);
2410 xfs_alert(mp, 2420 xfs_alert(mp,
2411 "%s: Bad inode log record length %d, rec ptr 0x%p", 2421 "%s: Bad inode log record length %d, rec ptr 0x%p",
2412 __func__, item->ri_buf[1].i_len, item); 2422 __func__, item->ri_buf[1].i_len, item);
2413 error = EFSCORRUPTED; 2423 error = EFSCORRUPTED;
2414 goto error; 2424 goto error;
2415 } 2425 }
2416 2426
2417 /* The core is in in-core format */ 2427 /* The core is in in-core format */
2418 xfs_dinode_to_disk(dip, item->ri_buf[1].i_addr); 2428 xfs_dinode_to_disk(dip, item->ri_buf[1].i_addr);
2419 2429
2420 /* the rest is in on-disk format */ 2430 /* the rest is in on-disk format */
2421 if (item->ri_buf[1].i_len > sizeof(struct xfs_icdinode)) { 2431 if (item->ri_buf[1].i_len > sizeof(struct xfs_icdinode)) {
2422 memcpy((xfs_caddr_t) dip + sizeof(struct xfs_icdinode), 2432 memcpy((xfs_caddr_t) dip + sizeof(struct xfs_icdinode),
2423 item->ri_buf[1].i_addr + sizeof(struct xfs_icdinode), 2433 item->ri_buf[1].i_addr + sizeof(struct xfs_icdinode),
2424 item->ri_buf[1].i_len - sizeof(struct xfs_icdinode)); 2434 item->ri_buf[1].i_len - sizeof(struct xfs_icdinode));
2425 } 2435 }
2426 2436
2427 fields = in_f->ilf_fields; 2437 fields = in_f->ilf_fields;
2428 switch (fields & (XFS_ILOG_DEV | XFS_ILOG_UUID)) { 2438 switch (fields & (XFS_ILOG_DEV | XFS_ILOG_UUID)) {
2429 case XFS_ILOG_DEV: 2439 case XFS_ILOG_DEV:
2430 xfs_dinode_put_rdev(dip, in_f->ilf_u.ilfu_rdev); 2440 xfs_dinode_put_rdev(dip, in_f->ilf_u.ilfu_rdev);
2431 break; 2441 break;
2432 case XFS_ILOG_UUID: 2442 case XFS_ILOG_UUID:
2433 memcpy(XFS_DFORK_DPTR(dip), 2443 memcpy(XFS_DFORK_DPTR(dip),
2434 &in_f->ilf_u.ilfu_uuid, 2444 &in_f->ilf_u.ilfu_uuid,
2435 sizeof(uuid_t)); 2445 sizeof(uuid_t));
2436 break; 2446 break;
2437 } 2447 }
2438 2448
2439 if (in_f->ilf_size == 2) 2449 if (in_f->ilf_size == 2)
2440 goto write_inode_buffer; 2450 goto write_inode_buffer;
2441 len = item->ri_buf[2].i_len; 2451 len = item->ri_buf[2].i_len;
2442 src = item->ri_buf[2].i_addr; 2452 src = item->ri_buf[2].i_addr;
2443 ASSERT(in_f->ilf_size <= 4); 2453 ASSERT(in_f->ilf_size <= 4);
2444 ASSERT((in_f->ilf_size == 3) || (fields & XFS_ILOG_AFORK)); 2454 ASSERT((in_f->ilf_size == 3) || (fields & XFS_ILOG_AFORK));
2445 ASSERT(!(fields & XFS_ILOG_DFORK) || 2455 ASSERT(!(fields & XFS_ILOG_DFORK) ||
2446 (len == in_f->ilf_dsize)); 2456 (len == in_f->ilf_dsize));
2447 2457
2448 switch (fields & XFS_ILOG_DFORK) { 2458 switch (fields & XFS_ILOG_DFORK) {
2449 case XFS_ILOG_DDATA: 2459 case XFS_ILOG_DDATA:
2450 case XFS_ILOG_DEXT: 2460 case XFS_ILOG_DEXT:
2451 memcpy(XFS_DFORK_DPTR(dip), src, len); 2461 memcpy(XFS_DFORK_DPTR(dip), src, len);
2452 break; 2462 break;
2453 2463
2454 case XFS_ILOG_DBROOT: 2464 case XFS_ILOG_DBROOT:
2455 xfs_bmbt_to_bmdr(mp, (struct xfs_btree_block *)src, len, 2465 xfs_bmbt_to_bmdr(mp, (struct xfs_btree_block *)src, len,
2456 (xfs_bmdr_block_t *)XFS_DFORK_DPTR(dip), 2466 (xfs_bmdr_block_t *)XFS_DFORK_DPTR(dip),
2457 XFS_DFORK_DSIZE(dip, mp)); 2467 XFS_DFORK_DSIZE(dip, mp));
2458 break; 2468 break;
2459 2469
2460 default: 2470 default:
2461 /* 2471 /*
2462 * There are no data fork flags set. 2472 * There are no data fork flags set.
2463 */ 2473 */
2464 ASSERT((fields & XFS_ILOG_DFORK) == 0); 2474 ASSERT((fields & XFS_ILOG_DFORK) == 0);
2465 break; 2475 break;
2466 } 2476 }
2467 2477
2468 /* 2478 /*
2469 * If we logged any attribute data, recover it. There may or 2479 * If we logged any attribute data, recover it. There may or
2470 * may not have been any other non-core data logged in this 2480 * may not have been any other non-core data logged in this
2471 * transaction. 2481 * transaction.
2472 */ 2482 */
2473 if (in_f->ilf_fields & XFS_ILOG_AFORK) { 2483 if (in_f->ilf_fields & XFS_ILOG_AFORK) {
2474 if (in_f->ilf_fields & XFS_ILOG_DFORK) { 2484 if (in_f->ilf_fields & XFS_ILOG_DFORK) {
2475 attr_index = 3; 2485 attr_index = 3;
2476 } else { 2486 } else {
2477 attr_index = 2; 2487 attr_index = 2;
2478 } 2488 }
2479 len = item->ri_buf[attr_index].i_len; 2489 len = item->ri_buf[attr_index].i_len;
2480 src = item->ri_buf[attr_index].i_addr; 2490 src = item->ri_buf[attr_index].i_addr;
2481 ASSERT(len == in_f->ilf_asize); 2491 ASSERT(len == in_f->ilf_asize);
2482 2492
2483 switch (in_f->ilf_fields & XFS_ILOG_AFORK) { 2493 switch (in_f->ilf_fields & XFS_ILOG_AFORK) {
2484 case XFS_ILOG_ADATA: 2494 case XFS_ILOG_ADATA:
2485 case XFS_ILOG_AEXT: 2495 case XFS_ILOG_AEXT:
2486 dest = XFS_DFORK_APTR(dip); 2496 dest = XFS_DFORK_APTR(dip);
2487 ASSERT(len <= XFS_DFORK_ASIZE(dip, mp)); 2497 ASSERT(len <= XFS_DFORK_ASIZE(dip, mp));
2488 memcpy(dest, src, len); 2498 memcpy(dest, src, len);
2489 break; 2499 break;
2490 2500
2491 case XFS_ILOG_ABROOT: 2501 case XFS_ILOG_ABROOT:
2492 dest = XFS_DFORK_APTR(dip); 2502 dest = XFS_DFORK_APTR(dip);
2493 xfs_bmbt_to_bmdr(mp, (struct xfs_btree_block *)src, 2503 xfs_bmbt_to_bmdr(mp, (struct xfs_btree_block *)src,
2494 len, (xfs_bmdr_block_t*)dest, 2504 len, (xfs_bmdr_block_t*)dest,
2495 XFS_DFORK_ASIZE(dip, mp)); 2505 XFS_DFORK_ASIZE(dip, mp));
2496 break; 2506 break;
2497 2507
2498 default: 2508 default:
2499 xfs_warn(log->l_mp, "%s: Invalid flag", __func__); 2509 xfs_warn(log->l_mp, "%s: Invalid flag", __func__);
2500 ASSERT(0); 2510 ASSERT(0);
2501 xfs_buf_relse(bp); 2511 xfs_buf_relse(bp);
2502 error = EIO; 2512 error = EIO;
2503 goto error; 2513 goto error;
2504 } 2514 }
2505 } 2515 }
2506 2516
2507 write_inode_buffer: 2517 write_inode_buffer:
2508 ASSERT(bp->b_target->bt_mount == mp); 2518 ASSERT(bp->b_target->bt_mount == mp);
2509 bp->b_iodone = xlog_recover_iodone; 2519 bp->b_iodone = xlog_recover_iodone;
2510 xfs_buf_delwri_queue(bp, buffer_list); 2520 xfs_buf_delwri_queue(bp, buffer_list);
2511 xfs_buf_relse(bp); 2521 xfs_buf_relse(bp);
2512 error: 2522 error:
2513 if (need_free) 2523 if (need_free)
2514 kmem_free(in_f); 2524 kmem_free(in_f);
2515 return XFS_ERROR(error); 2525 return XFS_ERROR(error);
2516 } 2526 }
2517 2527
2518 /* 2528 /*
2519 * Recover QUOTAOFF records. We simply make a note of it in the xlog 2529 * Recover QUOTAOFF records. We simply make a note of it in the xlog
2520 * structure, so that we know not to do any dquot item or dquot buffer recovery, 2530 * structure, so that we know not to do any dquot item or dquot buffer recovery,
2521 * of that type. 2531 * of that type.
2522 */ 2532 */
2523 STATIC int 2533 STATIC int
2524 xlog_recover_quotaoff_pass1( 2534 xlog_recover_quotaoff_pass1(
2525 struct xlog *log, 2535 struct xlog *log,
2526 struct xlog_recover_item *item) 2536 struct xlog_recover_item *item)
2527 { 2537 {
2528 xfs_qoff_logformat_t *qoff_f = item->ri_buf[0].i_addr; 2538 xfs_qoff_logformat_t *qoff_f = item->ri_buf[0].i_addr;
2529 ASSERT(qoff_f); 2539 ASSERT(qoff_f);
2530 2540
2531 /* 2541 /*
2532 * The logitem format's flag tells us if this was user quotaoff, 2542 * The logitem format's flag tells us if this was user quotaoff,
2533 * group/project quotaoff or both. 2543 * group/project quotaoff or both.
2534 */ 2544 */
2535 if (qoff_f->qf_flags & XFS_UQUOTA_ACCT) 2545 if (qoff_f->qf_flags & XFS_UQUOTA_ACCT)
2536 log->l_quotaoffs_flag |= XFS_DQ_USER; 2546 log->l_quotaoffs_flag |= XFS_DQ_USER;
2537 if (qoff_f->qf_flags & XFS_PQUOTA_ACCT) 2547 if (qoff_f->qf_flags & XFS_PQUOTA_ACCT)
2538 log->l_quotaoffs_flag |= XFS_DQ_PROJ; 2548 log->l_quotaoffs_flag |= XFS_DQ_PROJ;
2539 if (qoff_f->qf_flags & XFS_GQUOTA_ACCT) 2549 if (qoff_f->qf_flags & XFS_GQUOTA_ACCT)
2540 log->l_quotaoffs_flag |= XFS_DQ_GROUP; 2550 log->l_quotaoffs_flag |= XFS_DQ_GROUP;
2541 2551
2542 return (0); 2552 return (0);
2543 } 2553 }
2544 2554
2545 /* 2555 /*
2546 * Recover a dquot record 2556 * Recover a dquot record
2547 */ 2557 */
2548 STATIC int 2558 STATIC int
2549 xlog_recover_dquot_pass2( 2559 xlog_recover_dquot_pass2(
2550 struct xlog *log, 2560 struct xlog *log,
2551 struct list_head *buffer_list, 2561 struct list_head *buffer_list,
2552 struct xlog_recover_item *item) 2562 struct xlog_recover_item *item)
2553 { 2563 {
2554 xfs_mount_t *mp = log->l_mp; 2564 xfs_mount_t *mp = log->l_mp;
2555 xfs_buf_t *bp; 2565 xfs_buf_t *bp;
2556 struct xfs_disk_dquot *ddq, *recddq; 2566 struct xfs_disk_dquot *ddq, *recddq;
2557 int error; 2567 int error;
2558 xfs_dq_logformat_t *dq_f; 2568 xfs_dq_logformat_t *dq_f;
2559 uint type; 2569 uint type;
2560 2570
2561 2571
2562 /* 2572 /*
2563 * Filesystems are required to send in quota flags at mount time. 2573 * Filesystems are required to send in quota flags at mount time.
2564 */ 2574 */
2565 if (mp->m_qflags == 0) 2575 if (mp->m_qflags == 0)
2566 return (0); 2576 return (0);
2567 2577
2568 recddq = item->ri_buf[1].i_addr; 2578 recddq = item->ri_buf[1].i_addr;
2569 if (recddq == NULL) { 2579 if (recddq == NULL) {
2570 xfs_alert(log->l_mp, "NULL dquot in %s.", __func__); 2580 xfs_alert(log->l_mp, "NULL dquot in %s.", __func__);
2571 return XFS_ERROR(EIO); 2581 return XFS_ERROR(EIO);
2572 } 2582 }
2573 if (item->ri_buf[1].i_len < sizeof(xfs_disk_dquot_t)) { 2583 if (item->ri_buf[1].i_len < sizeof(xfs_disk_dquot_t)) {
2574 xfs_alert(log->l_mp, "dquot too small (%d) in %s.", 2584 xfs_alert(log->l_mp, "dquot too small (%d) in %s.",
2575 item->ri_buf[1].i_len, __func__); 2585 item->ri_buf[1].i_len, __func__);
2576 return XFS_ERROR(EIO); 2586 return XFS_ERROR(EIO);
2577 } 2587 }
2578 2588
2579 /* 2589 /*
2580 * This type of quotas was turned off, so ignore this record. 2590 * This type of quotas was turned off, so ignore this record.
2581 */ 2591 */
2582 type = recddq->d_flags & (XFS_DQ_USER | XFS_DQ_PROJ | XFS_DQ_GROUP); 2592 type = recddq->d_flags & (XFS_DQ_USER | XFS_DQ_PROJ | XFS_DQ_GROUP);
2583 ASSERT(type); 2593 ASSERT(type);
2584 if (log->l_quotaoffs_flag & type) 2594 if (log->l_quotaoffs_flag & type)
2585 return (0); 2595 return (0);
2586 2596
2587 /* 2597 /*
2588 * At this point we know that quota was _not_ turned off. 2598 * At this point we know that quota was _not_ turned off.
2589 * Since the mount flags are not indicating to us otherwise, this 2599 * Since the mount flags are not indicating to us otherwise, this
2590 * must mean that quota is on, and the dquot needs to be replayed. 2600 * must mean that quota is on, and the dquot needs to be replayed.
2591 * Remember that we may not have fully recovered the superblock yet, 2601 * Remember that we may not have fully recovered the superblock yet,
2592 * so we can't do the usual trick of looking at the SB quota bits. 2602 * so we can't do the usual trick of looking at the SB quota bits.
2593 * 2603 *
2594 * The other possibility, of course, is that the quota subsystem was 2604 * The other possibility, of course, is that the quota subsystem was
2595 * removed since the last mount - ENOSYS. 2605 * removed since the last mount - ENOSYS.
2596 */ 2606 */
2597 dq_f = item->ri_buf[0].i_addr; 2607 dq_f = item->ri_buf[0].i_addr;
2598 ASSERT(dq_f); 2608 ASSERT(dq_f);
2599 error = xfs_qm_dqcheck(mp, recddq, dq_f->qlf_id, 0, XFS_QMOPT_DOWARN, 2609 error = xfs_qm_dqcheck(mp, recddq, dq_f->qlf_id, 0, XFS_QMOPT_DOWARN,
2600 "xlog_recover_dquot_pass2 (log copy)"); 2610 "xlog_recover_dquot_pass2 (log copy)");
2601 if (error) 2611 if (error)
2602 return XFS_ERROR(EIO); 2612 return XFS_ERROR(EIO);
2603 ASSERT(dq_f->qlf_len == 1); 2613 ASSERT(dq_f->qlf_len == 1);
2604 2614
2605 error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp, dq_f->qlf_blkno, 2615 error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp, dq_f->qlf_blkno,
2606 XFS_FSB_TO_BB(mp, dq_f->qlf_len), 0, &bp, 2616 XFS_FSB_TO_BB(mp, dq_f->qlf_len), 0, &bp,
2607 NULL); 2617 NULL);
2608 if (error) 2618 if (error)
2609 return error; 2619 return error;
2610 2620
2611 ASSERT(bp); 2621 ASSERT(bp);
2612 ddq = (xfs_disk_dquot_t *)xfs_buf_offset(bp, dq_f->qlf_boffset); 2622 ddq = (xfs_disk_dquot_t *)xfs_buf_offset(bp, dq_f->qlf_boffset);
2613 2623
2614 /* 2624 /*
2615 * At least the magic num portion should be on disk because this 2625 * At least the magic num portion should be on disk because this
2616 * was among a chunk of dquots created earlier, and we did some 2626 * was among a chunk of dquots created earlier, and we did some
2617 * minimal initialization then. 2627 * minimal initialization then.
2618 */ 2628 */
2619 error = xfs_qm_dqcheck(mp, ddq, dq_f->qlf_id, 0, XFS_QMOPT_DOWARN, 2629 error = xfs_qm_dqcheck(mp, ddq, dq_f->qlf_id, 0, XFS_QMOPT_DOWARN,
2620 "xlog_recover_dquot_pass2"); 2630 "xlog_recover_dquot_pass2");
2621 if (error) { 2631 if (error) {
2622 xfs_buf_relse(bp); 2632 xfs_buf_relse(bp);
2623 return XFS_ERROR(EIO); 2633 return XFS_ERROR(EIO);
2624 } 2634 }
2625 2635
2626 memcpy(ddq, recddq, item->ri_buf[1].i_len); 2636 memcpy(ddq, recddq, item->ri_buf[1].i_len);
2627 2637
2628 ASSERT(dq_f->qlf_size == 2); 2638 ASSERT(dq_f->qlf_size == 2);
2629 ASSERT(bp->b_target->bt_mount == mp); 2639 ASSERT(bp->b_target->bt_mount == mp);
2630 bp->b_iodone = xlog_recover_iodone; 2640 bp->b_iodone = xlog_recover_iodone;
2631 xfs_buf_delwri_queue(bp, buffer_list); 2641 xfs_buf_delwri_queue(bp, buffer_list);
2632 xfs_buf_relse(bp); 2642 xfs_buf_relse(bp);
2633 2643
2634 return (0); 2644 return (0);
2635 } 2645 }
2636 2646
2637 /* 2647 /*
2638 * This routine is called to create an in-core extent free intent 2648 * This routine is called to create an in-core extent free intent
2639 * item from the efi format structure which was logged on disk. 2649 * item from the efi format structure which was logged on disk.
2640 * It allocates an in-core efi, copies the extents from the format 2650 * It allocates an in-core efi, copies the extents from the format
2641 * structure into it, and adds the efi to the AIL with the given 2651 * structure into it, and adds the efi to the AIL with the given
2642 * LSN. 2652 * LSN.
2643 */ 2653 */
2644 STATIC int 2654 STATIC int
2645 xlog_recover_efi_pass2( 2655 xlog_recover_efi_pass2(
2646 struct xlog *log, 2656 struct xlog *log,
2647 struct xlog_recover_item *item, 2657 struct xlog_recover_item *item,
2648 xfs_lsn_t lsn) 2658 xfs_lsn_t lsn)
2649 { 2659 {
2650 int error; 2660 int error;
2651 xfs_mount_t *mp = log->l_mp; 2661 xfs_mount_t *mp = log->l_mp;
2652 xfs_efi_log_item_t *efip; 2662 xfs_efi_log_item_t *efip;
2653 xfs_efi_log_format_t *efi_formatp; 2663 xfs_efi_log_format_t *efi_formatp;
2654 2664
2655 efi_formatp = item->ri_buf[0].i_addr; 2665 efi_formatp = item->ri_buf[0].i_addr;
2656 2666
2657 efip = xfs_efi_init(mp, efi_formatp->efi_nextents); 2667 efip = xfs_efi_init(mp, efi_formatp->efi_nextents);
2658 if ((error = xfs_efi_copy_format(&(item->ri_buf[0]), 2668 if ((error = xfs_efi_copy_format(&(item->ri_buf[0]),
2659 &(efip->efi_format)))) { 2669 &(efip->efi_format)))) {
2660 xfs_efi_item_free(efip); 2670 xfs_efi_item_free(efip);
2661 return error; 2671 return error;
2662 } 2672 }
2663 atomic_set(&efip->efi_next_extent, efi_formatp->efi_nextents); 2673 atomic_set(&efip->efi_next_extent, efi_formatp->efi_nextents);
2664 2674
2665 spin_lock(&log->l_ailp->xa_lock); 2675 spin_lock(&log->l_ailp->xa_lock);
2666 /* 2676 /*
2667 * xfs_trans_ail_update() drops the AIL lock. 2677 * xfs_trans_ail_update() drops the AIL lock.
2668 */ 2678 */
2669 xfs_trans_ail_update(log->l_ailp, &efip->efi_item, lsn); 2679 xfs_trans_ail_update(log->l_ailp, &efip->efi_item, lsn);
2670 return 0; 2680 return 0;
2671 } 2681 }
2672 2682
2673 2683
2674 /* 2684 /*
2675 * This routine is called when an efd format structure is found in 2685 * This routine is called when an efd format structure is found in
2676 * a committed transaction in the log. It's purpose is to cancel 2686 * a committed transaction in the log. It's purpose is to cancel
2677 * the corresponding efi if it was still in the log. To do this 2687 * the corresponding efi if it was still in the log. To do this
2678 * it searches the AIL for the efi with an id equal to that in the 2688 * it searches the AIL for the efi with an id equal to that in the
2679 * efd format structure. If we find it, we remove the efi from the 2689 * efd format structure. If we find it, we remove the efi from the
2680 * AIL and free it. 2690 * AIL and free it.
2681 */ 2691 */
2682 STATIC int 2692 STATIC int
2683 xlog_recover_efd_pass2( 2693 xlog_recover_efd_pass2(
2684 struct xlog *log, 2694 struct xlog *log,
2685 struct xlog_recover_item *item) 2695 struct xlog_recover_item *item)
2686 { 2696 {
2687 xfs_efd_log_format_t *efd_formatp; 2697 xfs_efd_log_format_t *efd_formatp;
2688 xfs_efi_log_item_t *efip = NULL; 2698 xfs_efi_log_item_t *efip = NULL;
2689 xfs_log_item_t *lip; 2699 xfs_log_item_t *lip;
2690 __uint64_t efi_id; 2700 __uint64_t efi_id;
2691 struct xfs_ail_cursor cur; 2701 struct xfs_ail_cursor cur;
2692 struct xfs_ail *ailp = log->l_ailp; 2702 struct xfs_ail *ailp = log->l_ailp;
2693 2703
2694 efd_formatp = item->ri_buf[0].i_addr; 2704 efd_formatp = item->ri_buf[0].i_addr;
2695 ASSERT((item->ri_buf[0].i_len == (sizeof(xfs_efd_log_format_32_t) + 2705 ASSERT((item->ri_buf[0].i_len == (sizeof(xfs_efd_log_format_32_t) +
2696 ((efd_formatp->efd_nextents - 1) * sizeof(xfs_extent_32_t)))) || 2706 ((efd_formatp->efd_nextents - 1) * sizeof(xfs_extent_32_t)))) ||
2697 (item->ri_buf[0].i_len == (sizeof(xfs_efd_log_format_64_t) + 2707 (item->ri_buf[0].i_len == (sizeof(xfs_efd_log_format_64_t) +
2698 ((efd_formatp->efd_nextents - 1) * sizeof(xfs_extent_64_t))))); 2708 ((efd_formatp->efd_nextents - 1) * sizeof(xfs_extent_64_t)))));
2699 efi_id = efd_formatp->efd_efi_id; 2709 efi_id = efd_formatp->efd_efi_id;
2700 2710
2701 /* 2711 /*
2702 * Search for the efi with the id in the efd format structure 2712 * Search for the efi with the id in the efd format structure
2703 * in the AIL. 2713 * in the AIL.
2704 */ 2714 */
2705 spin_lock(&ailp->xa_lock); 2715 spin_lock(&ailp->xa_lock);
2706 lip = xfs_trans_ail_cursor_first(ailp, &cur, 0); 2716 lip = xfs_trans_ail_cursor_first(ailp, &cur, 0);
2707 while (lip != NULL) { 2717 while (lip != NULL) {
2708 if (lip->li_type == XFS_LI_EFI) { 2718 if (lip->li_type == XFS_LI_EFI) {
2709 efip = (xfs_efi_log_item_t *)lip; 2719 efip = (xfs_efi_log_item_t *)lip;
2710 if (efip->efi_format.efi_id == efi_id) { 2720 if (efip->efi_format.efi_id == efi_id) {
2711 /* 2721 /*
2712 * xfs_trans_ail_delete() drops the 2722 * xfs_trans_ail_delete() drops the
2713 * AIL lock. 2723 * AIL lock.
2714 */ 2724 */
2715 xfs_trans_ail_delete(ailp, lip, 2725 xfs_trans_ail_delete(ailp, lip,
2716 SHUTDOWN_CORRUPT_INCORE); 2726 SHUTDOWN_CORRUPT_INCORE);
2717 xfs_efi_item_free(efip); 2727 xfs_efi_item_free(efip);
2718 spin_lock(&ailp->xa_lock); 2728 spin_lock(&ailp->xa_lock);
2719 break; 2729 break;
2720 } 2730 }
2721 } 2731 }
2722 lip = xfs_trans_ail_cursor_next(ailp, &cur); 2732 lip = xfs_trans_ail_cursor_next(ailp, &cur);
2723 } 2733 }
2724 xfs_trans_ail_cursor_done(ailp, &cur); 2734 xfs_trans_ail_cursor_done(ailp, &cur);
2725 spin_unlock(&ailp->xa_lock); 2735 spin_unlock(&ailp->xa_lock);
2726 2736
2727 return 0; 2737 return 0;
2728 } 2738 }
2729 2739
2730 /* 2740 /*
2731 * Free up any resources allocated by the transaction 2741 * Free up any resources allocated by the transaction
2732 * 2742 *
2733 * Remember that EFIs, EFDs, and IUNLINKs are handled later. 2743 * Remember that EFIs, EFDs, and IUNLINKs are handled later.
2734 */ 2744 */
2735 STATIC void 2745 STATIC void
2736 xlog_recover_free_trans( 2746 xlog_recover_free_trans(
2737 struct xlog_recover *trans) 2747 struct xlog_recover *trans)
2738 { 2748 {
2739 xlog_recover_item_t *item, *n; 2749 xlog_recover_item_t *item, *n;
2740 int i; 2750 int i;
2741 2751
2742 list_for_each_entry_safe(item, n, &trans->r_itemq, ri_list) { 2752 list_for_each_entry_safe(item, n, &trans->r_itemq, ri_list) {
2743 /* Free the regions in the item. */ 2753 /* Free the regions in the item. */
2744 list_del(&item->ri_list); 2754 list_del(&item->ri_list);
2745 for (i = 0; i < item->ri_cnt; i++) 2755 for (i = 0; i < item->ri_cnt; i++)
2746 kmem_free(item->ri_buf[i].i_addr); 2756 kmem_free(item->ri_buf[i].i_addr);
2747 /* Free the item itself */ 2757 /* Free the item itself */
2748 kmem_free(item->ri_buf); 2758 kmem_free(item->ri_buf);
2749 kmem_free(item); 2759 kmem_free(item);
2750 } 2760 }
2751 /* Free the transaction recover structure */ 2761 /* Free the transaction recover structure */
2752 kmem_free(trans); 2762 kmem_free(trans);
2753 } 2763 }
2754 2764
2755 STATIC int 2765 STATIC int
2756 xlog_recover_commit_pass1( 2766 xlog_recover_commit_pass1(
2757 struct xlog *log, 2767 struct xlog *log,
2758 struct xlog_recover *trans, 2768 struct xlog_recover *trans,
2759 struct xlog_recover_item *item) 2769 struct xlog_recover_item *item)
2760 { 2770 {
2761 trace_xfs_log_recover_item_recover(log, trans, item, XLOG_RECOVER_PASS1); 2771 trace_xfs_log_recover_item_recover(log, trans, item, XLOG_RECOVER_PASS1);
2762 2772
2763 switch (ITEM_TYPE(item)) { 2773 switch (ITEM_TYPE(item)) {
2764 case XFS_LI_BUF: 2774 case XFS_LI_BUF:
2765 return xlog_recover_buffer_pass1(log, item); 2775 return xlog_recover_buffer_pass1(log, item);
2766 case XFS_LI_QUOTAOFF: 2776 case XFS_LI_QUOTAOFF:
2767 return xlog_recover_quotaoff_pass1(log, item); 2777 return xlog_recover_quotaoff_pass1(log, item);
2768 case XFS_LI_INODE: 2778 case XFS_LI_INODE:
2769 case XFS_LI_EFI: 2779 case XFS_LI_EFI:
2770 case XFS_LI_EFD: 2780 case XFS_LI_EFD:
2771 case XFS_LI_DQUOT: 2781 case XFS_LI_DQUOT:
2772 /* nothing to do in pass 1 */ 2782 /* nothing to do in pass 1 */
2773 return 0; 2783 return 0;
2774 default: 2784 default:
2775 xfs_warn(log->l_mp, "%s: invalid item type (%d)", 2785 xfs_warn(log->l_mp, "%s: invalid item type (%d)",
2776 __func__, ITEM_TYPE(item)); 2786 __func__, ITEM_TYPE(item));
2777 ASSERT(0); 2787 ASSERT(0);
2778 return XFS_ERROR(EIO); 2788 return XFS_ERROR(EIO);
2779 } 2789 }
2780 } 2790 }
2781 2791
2782 STATIC int 2792 STATIC int
2783 xlog_recover_commit_pass2( 2793 xlog_recover_commit_pass2(
2784 struct xlog *log, 2794 struct xlog *log,
2785 struct xlog_recover *trans, 2795 struct xlog_recover *trans,
2786 struct list_head *buffer_list, 2796 struct list_head *buffer_list,
2787 struct xlog_recover_item *item) 2797 struct xlog_recover_item *item)
2788 { 2798 {
2789 trace_xfs_log_recover_item_recover(log, trans, item, XLOG_RECOVER_PASS2); 2799 trace_xfs_log_recover_item_recover(log, trans, item, XLOG_RECOVER_PASS2);
2790 2800
2791 switch (ITEM_TYPE(item)) { 2801 switch (ITEM_TYPE(item)) {
2792 case XFS_LI_BUF: 2802 case XFS_LI_BUF:
2793 return xlog_recover_buffer_pass2(log, buffer_list, item); 2803 return xlog_recover_buffer_pass2(log, buffer_list, item);
2794 case XFS_LI_INODE: 2804 case XFS_LI_INODE:
2795 return xlog_recover_inode_pass2(log, buffer_list, item); 2805 return xlog_recover_inode_pass2(log, buffer_list, item);
2796 case XFS_LI_EFI: 2806 case XFS_LI_EFI:
2797 return xlog_recover_efi_pass2(log, item, trans->r_lsn); 2807 return xlog_recover_efi_pass2(log, item, trans->r_lsn);
2798 case XFS_LI_EFD: 2808 case XFS_LI_EFD:
2799 return xlog_recover_efd_pass2(log, item); 2809 return xlog_recover_efd_pass2(log, item);
2800 case XFS_LI_DQUOT: 2810 case XFS_LI_DQUOT:
2801 return xlog_recover_dquot_pass2(log, buffer_list, item); 2811 return xlog_recover_dquot_pass2(log, buffer_list, item);
2802 case XFS_LI_QUOTAOFF: 2812 case XFS_LI_QUOTAOFF:
2803 /* nothing to do in pass2 */ 2813 /* nothing to do in pass2 */
2804 return 0; 2814 return 0;
2805 default: 2815 default:
2806 xfs_warn(log->l_mp, "%s: invalid item type (%d)", 2816 xfs_warn(log->l_mp, "%s: invalid item type (%d)",
2807 __func__, ITEM_TYPE(item)); 2817 __func__, ITEM_TYPE(item));
2808 ASSERT(0); 2818 ASSERT(0);
2809 return XFS_ERROR(EIO); 2819 return XFS_ERROR(EIO);
2810 } 2820 }
2811 } 2821 }
2812 2822
2813 /* 2823 /*
2814 * Perform the transaction. 2824 * Perform the transaction.
2815 * 2825 *
2816 * If the transaction modifies a buffer or inode, do it now. Otherwise, 2826 * If the transaction modifies a buffer or inode, do it now. Otherwise,
2817 * EFIs and EFDs get queued up by adding entries into the AIL for them. 2827 * EFIs and EFDs get queued up by adding entries into the AIL for them.
2818 */ 2828 */
2819 STATIC int 2829 STATIC int
2820 xlog_recover_commit_trans( 2830 xlog_recover_commit_trans(
2821 struct xlog *log, 2831 struct xlog *log,
2822 struct xlog_recover *trans, 2832 struct xlog_recover *trans,
2823 int pass) 2833 int pass)
2824 { 2834 {
2825 int error = 0, error2; 2835 int error = 0, error2;
2826 xlog_recover_item_t *item; 2836 xlog_recover_item_t *item;
2827 LIST_HEAD (buffer_list); 2837 LIST_HEAD (buffer_list);
2828 2838
2829 hlist_del(&trans->r_list); 2839 hlist_del(&trans->r_list);
2830 2840
2831 error = xlog_recover_reorder_trans(log, trans, pass); 2841 error = xlog_recover_reorder_trans(log, trans, pass);
2832 if (error) 2842 if (error)
2833 return error; 2843 return error;
2834 2844
2835 list_for_each_entry(item, &trans->r_itemq, ri_list) { 2845 list_for_each_entry(item, &trans->r_itemq, ri_list) {
2836 switch (pass) { 2846 switch (pass) {
2837 case XLOG_RECOVER_PASS1: 2847 case XLOG_RECOVER_PASS1:
2838 error = xlog_recover_commit_pass1(log, trans, item); 2848 error = xlog_recover_commit_pass1(log, trans, item);
2839 break; 2849 break;
2840 case XLOG_RECOVER_PASS2: 2850 case XLOG_RECOVER_PASS2:
2841 error = xlog_recover_commit_pass2(log, trans, 2851 error = xlog_recover_commit_pass2(log, trans,
2842 &buffer_list, item); 2852 &buffer_list, item);
2843 break; 2853 break;
2844 default: 2854 default:
2845 ASSERT(0); 2855 ASSERT(0);
2846 } 2856 }
2847 2857
2848 if (error) 2858 if (error)
2849 goto out; 2859 goto out;
2850 } 2860 }
2851 2861
2852 xlog_recover_free_trans(trans); 2862 xlog_recover_free_trans(trans);
2853 2863
2854 out: 2864 out:
2855 error2 = xfs_buf_delwri_submit(&buffer_list); 2865 error2 = xfs_buf_delwri_submit(&buffer_list);
2856 return error ? error : error2; 2866 return error ? error : error2;
2857 } 2867 }
2858 2868
2859 STATIC int 2869 STATIC int
2860 xlog_recover_unmount_trans( 2870 xlog_recover_unmount_trans(
2861 struct xlog *log, 2871 struct xlog *log,
2862 struct xlog_recover *trans) 2872 struct xlog_recover *trans)
2863 { 2873 {
2864 /* Do nothing now */ 2874 /* Do nothing now */
2865 xfs_warn(log->l_mp, "%s: Unmount LR", __func__); 2875 xfs_warn(log->l_mp, "%s: Unmount LR", __func__);
2866 return 0; 2876 return 0;
2867 } 2877 }
2868 2878
2869 /* 2879 /*
2870 * There are two valid states of the r_state field. 0 indicates that the 2880 * There are two valid states of the r_state field. 0 indicates that the
2871 * transaction structure is in a normal state. We have either seen the 2881 * transaction structure is in a normal state. We have either seen the
2872 * start of the transaction or the last operation we added was not a partial 2882 * start of the transaction or the last operation we added was not a partial
2873 * operation. If the last operation we added to the transaction was a 2883 * operation. If the last operation we added to the transaction was a
2874 * partial operation, we need to mark r_state with XLOG_WAS_CONT_TRANS. 2884 * partial operation, we need to mark r_state with XLOG_WAS_CONT_TRANS.
2875 * 2885 *
2876 * NOTE: skip LRs with 0 data length. 2886 * NOTE: skip LRs with 0 data length.
2877 */ 2887 */
2878 STATIC int 2888 STATIC int
2879 xlog_recover_process_data( 2889 xlog_recover_process_data(
2880 struct xlog *log, 2890 struct xlog *log,
2881 struct hlist_head rhash[], 2891 struct hlist_head rhash[],
2882 struct xlog_rec_header *rhead, 2892 struct xlog_rec_header *rhead,
2883 xfs_caddr_t dp, 2893 xfs_caddr_t dp,
2884 int pass) 2894 int pass)
2885 { 2895 {
2886 xfs_caddr_t lp; 2896 xfs_caddr_t lp;
2887 int num_logops; 2897 int num_logops;
2888 xlog_op_header_t *ohead; 2898 xlog_op_header_t *ohead;
2889 xlog_recover_t *trans; 2899 xlog_recover_t *trans;
2890 xlog_tid_t tid; 2900 xlog_tid_t tid;
2891 int error; 2901 int error;
2892 unsigned long hash; 2902 unsigned long hash;
2893 uint flags; 2903 uint flags;
2894 2904
2895 lp = dp + be32_to_cpu(rhead->h_len); 2905 lp = dp + be32_to_cpu(rhead->h_len);
2896 num_logops = be32_to_cpu(rhead->h_num_logops); 2906 num_logops = be32_to_cpu(rhead->h_num_logops);
2897 2907
2898 /* check the log format matches our own - else we can't recover */ 2908 /* check the log format matches our own - else we can't recover */
2899 if (xlog_header_check_recover(log->l_mp, rhead)) 2909 if (xlog_header_check_recover(log->l_mp, rhead))
2900 return (XFS_ERROR(EIO)); 2910 return (XFS_ERROR(EIO));
2901 2911
2902 while ((dp < lp) && num_logops) { 2912 while ((dp < lp) && num_logops) {
2903 ASSERT(dp + sizeof(xlog_op_header_t) <= lp); 2913 ASSERT(dp + sizeof(xlog_op_header_t) <= lp);
2904 ohead = (xlog_op_header_t *)dp; 2914 ohead = (xlog_op_header_t *)dp;
2905 dp += sizeof(xlog_op_header_t); 2915 dp += sizeof(xlog_op_header_t);
2906 if (ohead->oh_clientid != XFS_TRANSACTION && 2916 if (ohead->oh_clientid != XFS_TRANSACTION &&
2907 ohead->oh_clientid != XFS_LOG) { 2917 ohead->oh_clientid != XFS_LOG) {
2908 xfs_warn(log->l_mp, "%s: bad clientid 0x%x", 2918 xfs_warn(log->l_mp, "%s: bad clientid 0x%x",
2909 __func__, ohead->oh_clientid); 2919 __func__, ohead->oh_clientid);
2910 ASSERT(0); 2920 ASSERT(0);
2911 return (XFS_ERROR(EIO)); 2921 return (XFS_ERROR(EIO));
2912 } 2922 }
2913 tid = be32_to_cpu(ohead->oh_tid); 2923 tid = be32_to_cpu(ohead->oh_tid);
2914 hash = XLOG_RHASH(tid); 2924 hash = XLOG_RHASH(tid);
2915 trans = xlog_recover_find_tid(&rhash[hash], tid); 2925 trans = xlog_recover_find_tid(&rhash[hash], tid);
2916 if (trans == NULL) { /* not found; add new tid */ 2926 if (trans == NULL) { /* not found; add new tid */
2917 if (ohead->oh_flags & XLOG_START_TRANS) 2927 if (ohead->oh_flags & XLOG_START_TRANS)
2918 xlog_recover_new_tid(&rhash[hash], tid, 2928 xlog_recover_new_tid(&rhash[hash], tid,
2919 be64_to_cpu(rhead->h_lsn)); 2929 be64_to_cpu(rhead->h_lsn));
2920 } else { 2930 } else {
2921 if (dp + be32_to_cpu(ohead->oh_len) > lp) { 2931 if (dp + be32_to_cpu(ohead->oh_len) > lp) {
2922 xfs_warn(log->l_mp, "%s: bad length 0x%x", 2932 xfs_warn(log->l_mp, "%s: bad length 0x%x",
2923 __func__, be32_to_cpu(ohead->oh_len)); 2933 __func__, be32_to_cpu(ohead->oh_len));
2924 WARN_ON(1); 2934 WARN_ON(1);
2925 return (XFS_ERROR(EIO)); 2935 return (XFS_ERROR(EIO));
2926 } 2936 }
2927 flags = ohead->oh_flags & ~XLOG_END_TRANS; 2937 flags = ohead->oh_flags & ~XLOG_END_TRANS;
2928 if (flags & XLOG_WAS_CONT_TRANS) 2938 if (flags & XLOG_WAS_CONT_TRANS)
2929 flags &= ~XLOG_CONTINUE_TRANS; 2939 flags &= ~XLOG_CONTINUE_TRANS;
2930 switch (flags) { 2940 switch (flags) {
2931 case XLOG_COMMIT_TRANS: 2941 case XLOG_COMMIT_TRANS:
2932 error = xlog_recover_commit_trans(log, 2942 error = xlog_recover_commit_trans(log,
2933 trans, pass); 2943 trans, pass);
2934 break; 2944 break;
2935 case XLOG_UNMOUNT_TRANS: 2945 case XLOG_UNMOUNT_TRANS:
2936 error = xlog_recover_unmount_trans(log, trans); 2946 error = xlog_recover_unmount_trans(log, trans);
2937 break; 2947 break;
2938 case XLOG_WAS_CONT_TRANS: 2948 case XLOG_WAS_CONT_TRANS:
2939 error = xlog_recover_add_to_cont_trans(log, 2949 error = xlog_recover_add_to_cont_trans(log,
2940 trans, dp, 2950 trans, dp,
2941 be32_to_cpu(ohead->oh_len)); 2951 be32_to_cpu(ohead->oh_len));
2942 break; 2952 break;
2943 case XLOG_START_TRANS: 2953 case XLOG_START_TRANS:
2944 xfs_warn(log->l_mp, "%s: bad transaction", 2954 xfs_warn(log->l_mp, "%s: bad transaction",
2945 __func__); 2955 __func__);
2946 ASSERT(0); 2956 ASSERT(0);
2947 error = XFS_ERROR(EIO); 2957 error = XFS_ERROR(EIO);
2948 break; 2958 break;
2949 case 0: 2959 case 0:
2950 case XLOG_CONTINUE_TRANS: 2960 case XLOG_CONTINUE_TRANS:
2951 error = xlog_recover_add_to_trans(log, trans, 2961 error = xlog_recover_add_to_trans(log, trans,
2952 dp, be32_to_cpu(ohead->oh_len)); 2962 dp, be32_to_cpu(ohead->oh_len));
2953 break; 2963 break;
2954 default: 2964 default:
2955 xfs_warn(log->l_mp, "%s: bad flag 0x%x", 2965 xfs_warn(log->l_mp, "%s: bad flag 0x%x",
2956 __func__, flags); 2966 __func__, flags);
2957 ASSERT(0); 2967 ASSERT(0);
2958 error = XFS_ERROR(EIO); 2968 error = XFS_ERROR(EIO);
2959 break; 2969 break;
2960 } 2970 }
2961 if (error) 2971 if (error)
2962 return error; 2972 return error;
2963 } 2973 }
2964 dp += be32_to_cpu(ohead->oh_len); 2974 dp += be32_to_cpu(ohead->oh_len);
2965 num_logops--; 2975 num_logops--;
2966 } 2976 }
2967 return 0; 2977 return 0;
2968 } 2978 }
2969 2979
2970 /* 2980 /*
2971 * Process an extent free intent item that was recovered from 2981 * Process an extent free intent item that was recovered from
2972 * the log. We need to free the extents that it describes. 2982 * the log. We need to free the extents that it describes.
2973 */ 2983 */
2974 STATIC int 2984 STATIC int
2975 xlog_recover_process_efi( 2985 xlog_recover_process_efi(
2976 xfs_mount_t *mp, 2986 xfs_mount_t *mp,
2977 xfs_efi_log_item_t *efip) 2987 xfs_efi_log_item_t *efip)
2978 { 2988 {
2979 xfs_efd_log_item_t *efdp; 2989 xfs_efd_log_item_t *efdp;
2980 xfs_trans_t *tp; 2990 xfs_trans_t *tp;
2981 int i; 2991 int i;
2982 int error = 0; 2992 int error = 0;
2983 xfs_extent_t *extp; 2993 xfs_extent_t *extp;
2984 xfs_fsblock_t startblock_fsb; 2994 xfs_fsblock_t startblock_fsb;
2985 2995
2986 ASSERT(!test_bit(XFS_EFI_RECOVERED, &efip->efi_flags)); 2996 ASSERT(!test_bit(XFS_EFI_RECOVERED, &efip->efi_flags));
2987 2997
2988 /* 2998 /*
2989 * First check the validity of the extents described by the 2999 * First check the validity of the extents described by the
2990 * EFI. If any are bad, then assume that all are bad and 3000 * EFI. If any are bad, then assume that all are bad and
2991 * just toss the EFI. 3001 * just toss the EFI.
2992 */ 3002 */
2993 for (i = 0; i < efip->efi_format.efi_nextents; i++) { 3003 for (i = 0; i < efip->efi_format.efi_nextents; i++) {
2994 extp = &(efip->efi_format.efi_extents[i]); 3004 extp = &(efip->efi_format.efi_extents[i]);
2995 startblock_fsb = XFS_BB_TO_FSB(mp, 3005 startblock_fsb = XFS_BB_TO_FSB(mp,
2996 XFS_FSB_TO_DADDR(mp, extp->ext_start)); 3006 XFS_FSB_TO_DADDR(mp, extp->ext_start));
2997 if ((startblock_fsb == 0) || 3007 if ((startblock_fsb == 0) ||
2998 (extp->ext_len == 0) || 3008 (extp->ext_len == 0) ||
2999 (startblock_fsb >= mp->m_sb.sb_dblocks) || 3009 (startblock_fsb >= mp->m_sb.sb_dblocks) ||
3000 (extp->ext_len >= mp->m_sb.sb_agblocks)) { 3010 (extp->ext_len >= mp->m_sb.sb_agblocks)) {
3001 /* 3011 /*
3002 * This will pull the EFI from the AIL and 3012 * This will pull the EFI from the AIL and
3003 * free the memory associated with it. 3013 * free the memory associated with it.
3004 */ 3014 */
3005 set_bit(XFS_EFI_RECOVERED, &efip->efi_flags); 3015 set_bit(XFS_EFI_RECOVERED, &efip->efi_flags);
3006 xfs_efi_release(efip, efip->efi_format.efi_nextents); 3016 xfs_efi_release(efip, efip->efi_format.efi_nextents);
3007 return XFS_ERROR(EIO); 3017 return XFS_ERROR(EIO);
3008 } 3018 }
3009 } 3019 }
3010 3020
3011 tp = xfs_trans_alloc(mp, 0); 3021 tp = xfs_trans_alloc(mp, 0);
3012 error = xfs_trans_reserve(tp, 0, XFS_ITRUNCATE_LOG_RES(mp), 0, 0, 0); 3022 error = xfs_trans_reserve(tp, 0, XFS_ITRUNCATE_LOG_RES(mp), 0, 0, 0);
3013 if (error) 3023 if (error)
3014 goto abort_error; 3024 goto abort_error;
3015 efdp = xfs_trans_get_efd(tp, efip, efip->efi_format.efi_nextents); 3025 efdp = xfs_trans_get_efd(tp, efip, efip->efi_format.efi_nextents);
3016 3026
3017 for (i = 0; i < efip->efi_format.efi_nextents; i++) { 3027 for (i = 0; i < efip->efi_format.efi_nextents; i++) {
3018 extp = &(efip->efi_format.efi_extents[i]); 3028 extp = &(efip->efi_format.efi_extents[i]);
3019 error = xfs_free_extent(tp, extp->ext_start, extp->ext_len); 3029 error = xfs_free_extent(tp, extp->ext_start, extp->ext_len);
3020 if (error) 3030 if (error)
3021 goto abort_error; 3031 goto abort_error;
3022 xfs_trans_log_efd_extent(tp, efdp, extp->ext_start, 3032 xfs_trans_log_efd_extent(tp, efdp, extp->ext_start,
3023 extp->ext_len); 3033 extp->ext_len);
3024 } 3034 }
3025 3035
3026 set_bit(XFS_EFI_RECOVERED, &efip->efi_flags); 3036 set_bit(XFS_EFI_RECOVERED, &efip->efi_flags);
3027 error = xfs_trans_commit(tp, 0); 3037 error = xfs_trans_commit(tp, 0);
3028 return error; 3038 return error;
3029 3039
3030 abort_error: 3040 abort_error:
3031 xfs_trans_cancel(tp, XFS_TRANS_ABORT); 3041 xfs_trans_cancel(tp, XFS_TRANS_ABORT);
3032 return error; 3042 return error;
3033 } 3043 }
3034 3044
3035 /* 3045 /*
3036 * When this is called, all of the EFIs which did not have 3046 * When this is called, all of the EFIs which did not have
3037 * corresponding EFDs should be in the AIL. What we do now 3047 * corresponding EFDs should be in the AIL. What we do now
3038 * is free the extents associated with each one. 3048 * is free the extents associated with each one.
3039 * 3049 *
3040 * Since we process the EFIs in normal transactions, they 3050 * Since we process the EFIs in normal transactions, they
3041 * will be removed at some point after the commit. This prevents 3051 * will be removed at some point after the commit. This prevents
3042 * us from just walking down the list processing each one. 3052 * us from just walking down the list processing each one.
3043 * We'll use a flag in the EFI to skip those that we've already 3053 * We'll use a flag in the EFI to skip those that we've already
3044 * processed and use the AIL iteration mechanism's generation 3054 * processed and use the AIL iteration mechanism's generation
3045 * count to try to speed this up at least a bit. 3055 * count to try to speed this up at least a bit.
3046 * 3056 *
3047 * When we start, we know that the EFIs are the only things in 3057 * When we start, we know that the EFIs are the only things in
3048 * the AIL. As we process them, however, other items are added 3058 * the AIL. As we process them, however, other items are added
3049 * to the AIL. Since everything added to the AIL must come after 3059 * to the AIL. Since everything added to the AIL must come after
3050 * everything already in the AIL, we stop processing as soon as 3060 * everything already in the AIL, we stop processing as soon as
3051 * we see something other than an EFI in the AIL. 3061 * we see something other than an EFI in the AIL.
3052 */ 3062 */
3053 STATIC int 3063 STATIC int
3054 xlog_recover_process_efis( 3064 xlog_recover_process_efis(
3055 struct xlog *log) 3065 struct xlog *log)
3056 { 3066 {
3057 xfs_log_item_t *lip; 3067 xfs_log_item_t *lip;
3058 xfs_efi_log_item_t *efip; 3068 xfs_efi_log_item_t *efip;
3059 int error = 0; 3069 int error = 0;
3060 struct xfs_ail_cursor cur; 3070 struct xfs_ail_cursor cur;
3061 struct xfs_ail *ailp; 3071 struct xfs_ail *ailp;
3062 3072
3063 ailp = log->l_ailp; 3073 ailp = log->l_ailp;
3064 spin_lock(&ailp->xa_lock); 3074 spin_lock(&ailp->xa_lock);
3065 lip = xfs_trans_ail_cursor_first(ailp, &cur, 0); 3075 lip = xfs_trans_ail_cursor_first(ailp, &cur, 0);
3066 while (lip != NULL) { 3076 while (lip != NULL) {
3067 /* 3077 /*
3068 * We're done when we see something other than an EFI. 3078 * We're done when we see something other than an EFI.
3069 * There should be no EFIs left in the AIL now. 3079 * There should be no EFIs left in the AIL now.
3070 */ 3080 */
3071 if (lip->li_type != XFS_LI_EFI) { 3081 if (lip->li_type != XFS_LI_EFI) {
3072 #ifdef DEBUG 3082 #ifdef DEBUG
3073 for (; lip; lip = xfs_trans_ail_cursor_next(ailp, &cur)) 3083 for (; lip; lip = xfs_trans_ail_cursor_next(ailp, &cur))
3074 ASSERT(lip->li_type != XFS_LI_EFI); 3084 ASSERT(lip->li_type != XFS_LI_EFI);
3075 #endif 3085 #endif
3076 break; 3086 break;
3077 } 3087 }
3078 3088
3079 /* 3089 /*
3080 * Skip EFIs that we've already processed. 3090 * Skip EFIs that we've already processed.
3081 */ 3091 */
3082 efip = (xfs_efi_log_item_t *)lip; 3092 efip = (xfs_efi_log_item_t *)lip;
3083 if (test_bit(XFS_EFI_RECOVERED, &efip->efi_flags)) { 3093 if (test_bit(XFS_EFI_RECOVERED, &efip->efi_flags)) {
3084 lip = xfs_trans_ail_cursor_next(ailp, &cur); 3094 lip = xfs_trans_ail_cursor_next(ailp, &cur);
3085 continue; 3095 continue;
3086 } 3096 }
3087 3097
3088 spin_unlock(&ailp->xa_lock); 3098 spin_unlock(&ailp->xa_lock);
3089 error = xlog_recover_process_efi(log->l_mp, efip); 3099 error = xlog_recover_process_efi(log->l_mp, efip);
3090 spin_lock(&ailp->xa_lock); 3100 spin_lock(&ailp->xa_lock);
3091 if (error) 3101 if (error)
3092 goto out; 3102 goto out;
3093 lip = xfs_trans_ail_cursor_next(ailp, &cur); 3103 lip = xfs_trans_ail_cursor_next(ailp, &cur);
3094 } 3104 }
3095 out: 3105 out:
3096 xfs_trans_ail_cursor_done(ailp, &cur); 3106 xfs_trans_ail_cursor_done(ailp, &cur);
3097 spin_unlock(&ailp->xa_lock); 3107 spin_unlock(&ailp->xa_lock);
3098 return error; 3108 return error;
3099 } 3109 }
3100 3110
3101 /* 3111 /*
3102 * This routine performs a transaction to null out a bad inode pointer 3112 * This routine performs a transaction to null out a bad inode pointer
3103 * in an agi unlinked inode hash bucket. 3113 * in an agi unlinked inode hash bucket.
3104 */ 3114 */
3105 STATIC void 3115 STATIC void
3106 xlog_recover_clear_agi_bucket( 3116 xlog_recover_clear_agi_bucket(
3107 xfs_mount_t *mp, 3117 xfs_mount_t *mp,
3108 xfs_agnumber_t agno, 3118 xfs_agnumber_t agno,
3109 int bucket) 3119 int bucket)
3110 { 3120 {
3111 xfs_trans_t *tp; 3121 xfs_trans_t *tp;
3112 xfs_agi_t *agi; 3122 xfs_agi_t *agi;
3113 xfs_buf_t *agibp; 3123 xfs_buf_t *agibp;
3114 int offset; 3124 int offset;
3115 int error; 3125 int error;
3116 3126
3117 tp = xfs_trans_alloc(mp, XFS_TRANS_CLEAR_AGI_BUCKET); 3127 tp = xfs_trans_alloc(mp, XFS_TRANS_CLEAR_AGI_BUCKET);
3118 error = xfs_trans_reserve(tp, 0, XFS_CLEAR_AGI_BUCKET_LOG_RES(mp), 3128 error = xfs_trans_reserve(tp, 0, XFS_CLEAR_AGI_BUCKET_LOG_RES(mp),
3119 0, 0, 0); 3129 0, 0, 0);
3120 if (error) 3130 if (error)
3121 goto out_abort; 3131 goto out_abort;
3122 3132
3123 error = xfs_read_agi(mp, tp, agno, &agibp); 3133 error = xfs_read_agi(mp, tp, agno, &agibp);
3124 if (error) 3134 if (error)
3125 goto out_abort; 3135 goto out_abort;
3126 3136
3127 agi = XFS_BUF_TO_AGI(agibp); 3137 agi = XFS_BUF_TO_AGI(agibp);
3128 agi->agi_unlinked[bucket] = cpu_to_be32(NULLAGINO); 3138 agi->agi_unlinked[bucket] = cpu_to_be32(NULLAGINO);
3129 offset = offsetof(xfs_agi_t, agi_unlinked) + 3139 offset = offsetof(xfs_agi_t, agi_unlinked) +
3130 (sizeof(xfs_agino_t) * bucket); 3140 (sizeof(xfs_agino_t) * bucket);
3131 xfs_trans_log_buf(tp, agibp, offset, 3141 xfs_trans_log_buf(tp, agibp, offset,
3132 (offset + sizeof(xfs_agino_t) - 1)); 3142 (offset + sizeof(xfs_agino_t) - 1));
3133 3143
3134 error = xfs_trans_commit(tp, 0); 3144 error = xfs_trans_commit(tp, 0);
3135 if (error) 3145 if (error)
3136 goto out_error; 3146 goto out_error;
3137 return; 3147 return;
3138 3148
3139 out_abort: 3149 out_abort:
3140 xfs_trans_cancel(tp, XFS_TRANS_ABORT); 3150 xfs_trans_cancel(tp, XFS_TRANS_ABORT);
3141 out_error: 3151 out_error:
3142 xfs_warn(mp, "%s: failed to clear agi %d. Continuing.", __func__, agno); 3152 xfs_warn(mp, "%s: failed to clear agi %d. Continuing.", __func__, agno);
3143 return; 3153 return;
3144 } 3154 }
3145 3155
3146 STATIC xfs_agino_t 3156 STATIC xfs_agino_t
3147 xlog_recover_process_one_iunlink( 3157 xlog_recover_process_one_iunlink(
3148 struct xfs_mount *mp, 3158 struct xfs_mount *mp,
3149 xfs_agnumber_t agno, 3159 xfs_agnumber_t agno,
3150 xfs_agino_t agino, 3160 xfs_agino_t agino,
3151 int bucket) 3161 int bucket)
3152 { 3162 {
3153 struct xfs_buf *ibp; 3163 struct xfs_buf *ibp;
3154 struct xfs_dinode *dip; 3164 struct xfs_dinode *dip;
3155 struct xfs_inode *ip; 3165 struct xfs_inode *ip;
3156 xfs_ino_t ino; 3166 xfs_ino_t ino;
3157 int error; 3167 int error;
3158 3168
3159 ino = XFS_AGINO_TO_INO(mp, agno, agino); 3169 ino = XFS_AGINO_TO_INO(mp, agno, agino);
3160 error = xfs_iget(mp, NULL, ino, 0, 0, &ip); 3170 error = xfs_iget(mp, NULL, ino, 0, 0, &ip);
3161 if (error) 3171 if (error)
3162 goto fail; 3172 goto fail;
3163 3173
3164 /* 3174 /*
3165 * Get the on disk inode to find the next inode in the bucket. 3175 * Get the on disk inode to find the next inode in the bucket.
3166 */ 3176 */
3167 error = xfs_imap_to_bp(mp, NULL, &ip->i_imap, &dip, &ibp, 0, 0); 3177 error = xfs_imap_to_bp(mp, NULL, &ip->i_imap, &dip, &ibp, 0, 0);
3168 if (error) 3178 if (error)
3169 goto fail_iput; 3179 goto fail_iput;
3170 3180
3171 ASSERT(ip->i_d.di_nlink == 0); 3181 ASSERT(ip->i_d.di_nlink == 0);
3172 ASSERT(ip->i_d.di_mode != 0); 3182 ASSERT(ip->i_d.di_mode != 0);
3173 3183
3174 /* setup for the next pass */ 3184 /* setup for the next pass */
3175 agino = be32_to_cpu(dip->di_next_unlinked); 3185 agino = be32_to_cpu(dip->di_next_unlinked);
3176 xfs_buf_relse(ibp); 3186 xfs_buf_relse(ibp);
3177 3187
3178 /* 3188 /*
3179 * Prevent any DMAPI event from being sent when the reference on 3189 * Prevent any DMAPI event from being sent when the reference on
3180 * the inode is dropped. 3190 * the inode is dropped.
3181 */ 3191 */
3182 ip->i_d.di_dmevmask = 0; 3192 ip->i_d.di_dmevmask = 0;
3183 3193
3184 IRELE(ip); 3194 IRELE(ip);
3185 return agino; 3195 return agino;
3186 3196
3187 fail_iput: 3197 fail_iput:
3188 IRELE(ip); 3198 IRELE(ip);
3189 fail: 3199 fail:
3190 /* 3200 /*
3191 * We can't read in the inode this bucket points to, or this inode 3201 * We can't read in the inode this bucket points to, or this inode
3192 * is messed up. Just ditch this bucket of inodes. We will lose 3202 * is messed up. Just ditch this bucket of inodes. We will lose
3193 * some inodes and space, but at least we won't hang. 3203 * some inodes and space, but at least we won't hang.
3194 * 3204 *
3195 * Call xlog_recover_clear_agi_bucket() to perform a transaction to 3205 * Call xlog_recover_clear_agi_bucket() to perform a transaction to
3196 * clear the inode pointer in the bucket. 3206 * clear the inode pointer in the bucket.
3197 */ 3207 */
3198 xlog_recover_clear_agi_bucket(mp, agno, bucket); 3208 xlog_recover_clear_agi_bucket(mp, agno, bucket);
3199 return NULLAGINO; 3209 return NULLAGINO;
3200 } 3210 }
3201 3211
3202 /* 3212 /*
3203 * xlog_iunlink_recover 3213 * xlog_iunlink_recover
3204 * 3214 *
3205 * This is called during recovery to process any inodes which 3215 * This is called during recovery to process any inodes which
3206 * we unlinked but not freed when the system crashed. These 3216 * we unlinked but not freed when the system crashed. These
3207 * inodes will be on the lists in the AGI blocks. What we do 3217 * inodes will be on the lists in the AGI blocks. What we do
3208 * here is scan all the AGIs and fully truncate and free any 3218 * here is scan all the AGIs and fully truncate and free any
3209 * inodes found on the lists. Each inode is removed from the 3219 * inodes found on the lists. Each inode is removed from the
3210 * lists when it has been fully truncated and is freed. The 3220 * lists when it has been fully truncated and is freed. The
3211 * freeing of the inode and its removal from the list must be 3221 * freeing of the inode and its removal from the list must be
3212 * atomic. 3222 * atomic.
3213 */ 3223 */
3214 STATIC void 3224 STATIC void
3215 xlog_recover_process_iunlinks( 3225 xlog_recover_process_iunlinks(
3216 struct xlog *log) 3226 struct xlog *log)
3217 { 3227 {
3218 xfs_mount_t *mp; 3228 xfs_mount_t *mp;
3219 xfs_agnumber_t agno; 3229 xfs_agnumber_t agno;
3220 xfs_agi_t *agi; 3230 xfs_agi_t *agi;
3221 xfs_buf_t *agibp; 3231 xfs_buf_t *agibp;
3222 xfs_agino_t agino; 3232 xfs_agino_t agino;
3223 int bucket; 3233 int bucket;
3224 int error; 3234 int error;
3225 uint mp_dmevmask; 3235 uint mp_dmevmask;
3226 3236
3227 mp = log->l_mp; 3237 mp = log->l_mp;
3228 3238
3229 /* 3239 /*
3230 * Prevent any DMAPI event from being sent while in this function. 3240 * Prevent any DMAPI event from being sent while in this function.
3231 */ 3241 */
3232 mp_dmevmask = mp->m_dmevmask; 3242 mp_dmevmask = mp->m_dmevmask;
3233 mp->m_dmevmask = 0; 3243 mp->m_dmevmask = 0;
3234 3244
3235 for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) { 3245 for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) {
3236 /* 3246 /*
3237 * Find the agi for this ag. 3247 * Find the agi for this ag.
3238 */ 3248 */
3239 error = xfs_read_agi(mp, NULL, agno, &agibp); 3249 error = xfs_read_agi(mp, NULL, agno, &agibp);
3240 if (error) { 3250 if (error) {
3241 /* 3251 /*
3242 * AGI is b0rked. Don't process it. 3252 * AGI is b0rked. Don't process it.
3243 * 3253 *
3244 * We should probably mark the filesystem as corrupt 3254 * We should probably mark the filesystem as corrupt
3245 * after we've recovered all the ag's we can.... 3255 * after we've recovered all the ag's we can....
3246 */ 3256 */
3247 continue; 3257 continue;
3248 } 3258 }
3249 /* 3259 /*
3250 * Unlock the buffer so that it can be acquired in the normal 3260 * Unlock the buffer so that it can be acquired in the normal
3251 * course of the transaction to truncate and free each inode. 3261 * course of the transaction to truncate and free each inode.
3252 * Because we are not racing with anyone else here for the AGI 3262 * Because we are not racing with anyone else here for the AGI
3253 * buffer, we don't even need to hold it locked to read the 3263 * buffer, we don't even need to hold it locked to read the
3254 * initial unlinked bucket entries out of the buffer. We keep 3264 * initial unlinked bucket entries out of the buffer. We keep
3255 * buffer reference though, so that it stays pinned in memory 3265 * buffer reference though, so that it stays pinned in memory
3256 * while we need the buffer. 3266 * while we need the buffer.
3257 */ 3267 */
3258 agi = XFS_BUF_TO_AGI(agibp); 3268 agi = XFS_BUF_TO_AGI(agibp);
3259 xfs_buf_unlock(agibp); 3269 xfs_buf_unlock(agibp);
3260 3270
3261 for (bucket = 0; bucket < XFS_AGI_UNLINKED_BUCKETS; bucket++) { 3271 for (bucket = 0; bucket < XFS_AGI_UNLINKED_BUCKETS; bucket++) {
3262 agino = be32_to_cpu(agi->agi_unlinked[bucket]); 3272 agino = be32_to_cpu(agi->agi_unlinked[bucket]);
3263 while (agino != NULLAGINO) { 3273 while (agino != NULLAGINO) {
3264 agino = xlog_recover_process_one_iunlink(mp, 3274 agino = xlog_recover_process_one_iunlink(mp,
3265 agno, agino, bucket); 3275 agno, agino, bucket);
3266 } 3276 }
3267 } 3277 }
3268 xfs_buf_rele(agibp); 3278 xfs_buf_rele(agibp);
3269 } 3279 }
3270 3280
3271 mp->m_dmevmask = mp_dmevmask; 3281 mp->m_dmevmask = mp_dmevmask;
3272 } 3282 }
3273 3283
3274 /* 3284 /*
3275 * Upack the log buffer data and crc check it. If the check fails, issue a 3285 * Upack the log buffer data and crc check it. If the check fails, issue a
3276 * warning if and only if the CRC in the header is non-zero. This makes the 3286 * warning if and only if the CRC in the header is non-zero. This makes the
3277 * check an advisory warning, and the zero CRC check will prevent failure 3287 * check an advisory warning, and the zero CRC check will prevent failure
3278 * warnings from being emitted when upgrading the kernel from one that does not 3288 * warnings from being emitted when upgrading the kernel from one that does not
3279 * add CRCs by default. 3289 * add CRCs by default.
3280 * 3290 *
3281 * When filesystems are CRC enabled, this CRC mismatch becomes a fatal log 3291 * When filesystems are CRC enabled, this CRC mismatch becomes a fatal log
3282 * corruption failure 3292 * corruption failure
3283 */ 3293 */
3284 STATIC int 3294 STATIC int
3285 xlog_unpack_data_crc( 3295 xlog_unpack_data_crc(
3286 struct xlog_rec_header *rhead, 3296 struct xlog_rec_header *rhead,
3287 xfs_caddr_t dp, 3297 xfs_caddr_t dp,
3288 struct xlog *log) 3298 struct xlog *log)
3289 { 3299 {
3290 __le32 crc; 3300 __le32 crc;
3291 3301
3292 crc = xlog_cksum(log, rhead, dp, be32_to_cpu(rhead->h_len)); 3302 crc = xlog_cksum(log, rhead, dp, be32_to_cpu(rhead->h_len));
3293 if (crc != rhead->h_crc) { 3303 if (crc != rhead->h_crc) {
3294 if (rhead->h_crc || xfs_sb_version_hascrc(&log->l_mp->m_sb)) { 3304 if (rhead->h_crc || xfs_sb_version_hascrc(&log->l_mp->m_sb)) {
3295 xfs_alert(log->l_mp, 3305 xfs_alert(log->l_mp,
3296 "log record CRC mismatch: found 0x%x, expected 0x%x.\n", 3306 "log record CRC mismatch: found 0x%x, expected 0x%x.\n",
3297 le32_to_cpu(rhead->h_crc), 3307 le32_to_cpu(rhead->h_crc),
3298 le32_to_cpu(crc)); 3308 le32_to_cpu(crc));
3299 xfs_hex_dump(dp, 32); 3309 xfs_hex_dump(dp, 32);
3300 } 3310 }
3301 3311
3302 /* 3312 /*
3303 * If we've detected a log record corruption, then we can't 3313 * If we've detected a log record corruption, then we can't
3304 * recover past this point. Abort recovery if we are enforcing 3314 * recover past this point. Abort recovery if we are enforcing
3305 * CRC protection by punting an error back up the stack. 3315 * CRC protection by punting an error back up the stack.
3306 */ 3316 */
3307 if (xfs_sb_version_hascrc(&log->l_mp->m_sb)) 3317 if (xfs_sb_version_hascrc(&log->l_mp->m_sb))
3308 return EFSCORRUPTED; 3318 return EFSCORRUPTED;
3309 } 3319 }
3310 3320
3311 return 0; 3321 return 0;
3312 } 3322 }
3313 3323
3314 STATIC int 3324 STATIC int
3315 xlog_unpack_data( 3325 xlog_unpack_data(
3316 struct xlog_rec_header *rhead, 3326 struct xlog_rec_header *rhead,
3317 xfs_caddr_t dp, 3327 xfs_caddr_t dp,
3318 struct xlog *log) 3328 struct xlog *log)
3319 { 3329 {
3320 int i, j, k; 3330 int i, j, k;
3321 int error; 3331 int error;
3322 3332
3323 error = xlog_unpack_data_crc(rhead, dp, log); 3333 error = xlog_unpack_data_crc(rhead, dp, log);
3324 if (error) 3334 if (error)
3325 return error; 3335 return error;
3326 3336
3327 for (i = 0; i < BTOBB(be32_to_cpu(rhead->h_len)) && 3337 for (i = 0; i < BTOBB(be32_to_cpu(rhead->h_len)) &&
3328 i < (XLOG_HEADER_CYCLE_SIZE / BBSIZE); i++) { 3338 i < (XLOG_HEADER_CYCLE_SIZE / BBSIZE); i++) {
3329 *(__be32 *)dp = *(__be32 *)&rhead->h_cycle_data[i]; 3339 *(__be32 *)dp = *(__be32 *)&rhead->h_cycle_data[i];
3330 dp += BBSIZE; 3340 dp += BBSIZE;
3331 } 3341 }
3332 3342
3333 if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) { 3343 if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) {
3334 xlog_in_core_2_t *xhdr = (xlog_in_core_2_t *)rhead; 3344 xlog_in_core_2_t *xhdr = (xlog_in_core_2_t *)rhead;
3335 for ( ; i < BTOBB(be32_to_cpu(rhead->h_len)); i++) { 3345 for ( ; i < BTOBB(be32_to_cpu(rhead->h_len)); i++) {
3336 j = i / (XLOG_HEADER_CYCLE_SIZE / BBSIZE); 3346 j = i / (XLOG_HEADER_CYCLE_SIZE / BBSIZE);
3337 k = i % (XLOG_HEADER_CYCLE_SIZE / BBSIZE); 3347 k = i % (XLOG_HEADER_CYCLE_SIZE / BBSIZE);
3338 *(__be32 *)dp = xhdr[j].hic_xheader.xh_cycle_data[k]; 3348 *(__be32 *)dp = xhdr[j].hic_xheader.xh_cycle_data[k];
3339 dp += BBSIZE; 3349 dp += BBSIZE;
3340 } 3350 }
3341 } 3351 }
3342 3352
3343 return 0; 3353 return 0;
3344 } 3354 }
3345 3355
3346 STATIC int 3356 STATIC int
3347 xlog_valid_rec_header( 3357 xlog_valid_rec_header(
3348 struct xlog *log, 3358 struct xlog *log,
3349 struct xlog_rec_header *rhead, 3359 struct xlog_rec_header *rhead,
3350 xfs_daddr_t blkno) 3360 xfs_daddr_t blkno)
3351 { 3361 {
3352 int hlen; 3362 int hlen;
3353 3363
3354 if (unlikely(rhead->h_magicno != cpu_to_be32(XLOG_HEADER_MAGIC_NUM))) { 3364 if (unlikely(rhead->h_magicno != cpu_to_be32(XLOG_HEADER_MAGIC_NUM))) {
3355 XFS_ERROR_REPORT("xlog_valid_rec_header(1)", 3365 XFS_ERROR_REPORT("xlog_valid_rec_header(1)",
3356 XFS_ERRLEVEL_LOW, log->l_mp); 3366 XFS_ERRLEVEL_LOW, log->l_mp);
3357 return XFS_ERROR(EFSCORRUPTED); 3367 return XFS_ERROR(EFSCORRUPTED);
3358 } 3368 }
3359 if (unlikely( 3369 if (unlikely(
3360 (!rhead->h_version || 3370 (!rhead->h_version ||
3361 (be32_to_cpu(rhead->h_version) & (~XLOG_VERSION_OKBITS))))) { 3371 (be32_to_cpu(rhead->h_version) & (~XLOG_VERSION_OKBITS))))) {
3362 xfs_warn(log->l_mp, "%s: unrecognised log version (%d).", 3372 xfs_warn(log->l_mp, "%s: unrecognised log version (%d).",
3363 __func__, be32_to_cpu(rhead->h_version)); 3373 __func__, be32_to_cpu(rhead->h_version));
3364 return XFS_ERROR(EIO); 3374 return XFS_ERROR(EIO);
3365 } 3375 }
3366 3376
3367 /* LR body must have data or it wouldn't have been written */ 3377 /* LR body must have data or it wouldn't have been written */
3368 hlen = be32_to_cpu(rhead->h_len); 3378 hlen = be32_to_cpu(rhead->h_len);
3369 if (unlikely( hlen <= 0 || hlen > INT_MAX )) { 3379 if (unlikely( hlen <= 0 || hlen > INT_MAX )) {
3370 XFS_ERROR_REPORT("xlog_valid_rec_header(2)", 3380 XFS_ERROR_REPORT("xlog_valid_rec_header(2)",
3371 XFS_ERRLEVEL_LOW, log->l_mp); 3381 XFS_ERRLEVEL_LOW, log->l_mp);
3372 return XFS_ERROR(EFSCORRUPTED); 3382 return XFS_ERROR(EFSCORRUPTED);
3373 } 3383 }
3374 if (unlikely( blkno > log->l_logBBsize || blkno > INT_MAX )) { 3384 if (unlikely( blkno > log->l_logBBsize || blkno > INT_MAX )) {
3375 XFS_ERROR_REPORT("xlog_valid_rec_header(3)", 3385 XFS_ERROR_REPORT("xlog_valid_rec_header(3)",
3376 XFS_ERRLEVEL_LOW, log->l_mp); 3386 XFS_ERRLEVEL_LOW, log->l_mp);
3377 return XFS_ERROR(EFSCORRUPTED); 3387 return XFS_ERROR(EFSCORRUPTED);
3378 } 3388 }
3379 return 0; 3389 return 0;
3380 } 3390 }
3381 3391
3382 /* 3392 /*
3383 * Read the log from tail to head and process the log records found. 3393 * Read the log from tail to head and process the log records found.
3384 * Handle the two cases where the tail and head are in the same cycle 3394 * Handle the two cases where the tail and head are in the same cycle
3385 * and where the active portion of the log wraps around the end of 3395 * and where the active portion of the log wraps around the end of
3386 * the physical log separately. The pass parameter is passed through 3396 * the physical log separately. The pass parameter is passed through
3387 * to the routines called to process the data and is not looked at 3397 * to the routines called to process the data and is not looked at
3388 * here. 3398 * here.
3389 */ 3399 */
3390 STATIC int 3400 STATIC int
3391 xlog_do_recovery_pass( 3401 xlog_do_recovery_pass(
3392 struct xlog *log, 3402 struct xlog *log,
3393 xfs_daddr_t head_blk, 3403 xfs_daddr_t head_blk,
3394 xfs_daddr_t tail_blk, 3404 xfs_daddr_t tail_blk,
3395 int pass) 3405 int pass)
3396 { 3406 {
3397 xlog_rec_header_t *rhead; 3407 xlog_rec_header_t *rhead;
3398 xfs_daddr_t blk_no; 3408 xfs_daddr_t blk_no;
3399 xfs_caddr_t offset; 3409 xfs_caddr_t offset;
3400 xfs_buf_t *hbp, *dbp; 3410 xfs_buf_t *hbp, *dbp;
3401 int error = 0, h_size; 3411 int error = 0, h_size;
3402 int bblks, split_bblks; 3412 int bblks, split_bblks;
3403 int hblks, split_hblks, wrapped_hblks; 3413 int hblks, split_hblks, wrapped_hblks;
3404 struct hlist_head rhash[XLOG_RHASH_SIZE]; 3414 struct hlist_head rhash[XLOG_RHASH_SIZE];
3405 3415
3406 ASSERT(head_blk != tail_blk); 3416 ASSERT(head_blk != tail_blk);
3407 3417
3408 /* 3418 /*
3409 * Read the header of the tail block and get the iclog buffer size from 3419 * Read the header of the tail block and get the iclog buffer size from
3410 * h_size. Use this to tell how many sectors make up the log header. 3420 * h_size. Use this to tell how many sectors make up the log header.
3411 */ 3421 */
3412 if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) { 3422 if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) {
3413 /* 3423 /*
3414 * When using variable length iclogs, read first sector of 3424 * When using variable length iclogs, read first sector of
3415 * iclog header and extract the header size from it. Get a 3425 * iclog header and extract the header size from it. Get a
3416 * new hbp that is the correct size. 3426 * new hbp that is the correct size.
3417 */ 3427 */
3418 hbp = xlog_get_bp(log, 1); 3428 hbp = xlog_get_bp(log, 1);
3419 if (!hbp) 3429 if (!hbp)
3420 return ENOMEM; 3430 return ENOMEM;
3421 3431
3422 error = xlog_bread(log, tail_blk, 1, hbp, &offset); 3432 error = xlog_bread(log, tail_blk, 1, hbp, &offset);
3423 if (error) 3433 if (error)
3424 goto bread_err1; 3434 goto bread_err1;
3425 3435
3426 rhead = (xlog_rec_header_t *)offset; 3436 rhead = (xlog_rec_header_t *)offset;
3427 error = xlog_valid_rec_header(log, rhead, tail_blk); 3437 error = xlog_valid_rec_header(log, rhead, tail_blk);
3428 if (error) 3438 if (error)
3429 goto bread_err1; 3439 goto bread_err1;
3430 h_size = be32_to_cpu(rhead->h_size); 3440 h_size = be32_to_cpu(rhead->h_size);
3431 if ((be32_to_cpu(rhead->h_version) & XLOG_VERSION_2) && 3441 if ((be32_to_cpu(rhead->h_version) & XLOG_VERSION_2) &&
3432 (h_size > XLOG_HEADER_CYCLE_SIZE)) { 3442 (h_size > XLOG_HEADER_CYCLE_SIZE)) {
3433 hblks = h_size / XLOG_HEADER_CYCLE_SIZE; 3443 hblks = h_size / XLOG_HEADER_CYCLE_SIZE;
3434 if (h_size % XLOG_HEADER_CYCLE_SIZE) 3444 if (h_size % XLOG_HEADER_CYCLE_SIZE)
3435 hblks++; 3445 hblks++;
3436 xlog_put_bp(hbp); 3446 xlog_put_bp(hbp);
3437 hbp = xlog_get_bp(log, hblks); 3447 hbp = xlog_get_bp(log, hblks);
3438 } else { 3448 } else {
3439 hblks = 1; 3449 hblks = 1;
3440 } 3450 }
3441 } else { 3451 } else {
3442 ASSERT(log->l_sectBBsize == 1); 3452 ASSERT(log->l_sectBBsize == 1);
3443 hblks = 1; 3453 hblks = 1;
3444 hbp = xlog_get_bp(log, 1); 3454 hbp = xlog_get_bp(log, 1);
3445 h_size = XLOG_BIG_RECORD_BSIZE; 3455 h_size = XLOG_BIG_RECORD_BSIZE;
3446 } 3456 }
3447 3457
3448 if (!hbp) 3458 if (!hbp)
3449 return ENOMEM; 3459 return ENOMEM;
3450 dbp = xlog_get_bp(log, BTOBB(h_size)); 3460 dbp = xlog_get_bp(log, BTOBB(h_size));
3451 if (!dbp) { 3461 if (!dbp) {
3452 xlog_put_bp(hbp); 3462 xlog_put_bp(hbp);
3453 return ENOMEM; 3463 return ENOMEM;
3454 } 3464 }
3455 3465
3456 memset(rhash, 0, sizeof(rhash)); 3466 memset(rhash, 0, sizeof(rhash));
3457 if (tail_blk <= head_blk) { 3467 if (tail_blk <= head_blk) {
3458 for (blk_no = tail_blk; blk_no < head_blk; ) { 3468 for (blk_no = tail_blk; blk_no < head_blk; ) {
3459 error = xlog_bread(log, blk_no, hblks, hbp, &offset); 3469 error = xlog_bread(log, blk_no, hblks, hbp, &offset);
3460 if (error) 3470 if (error)
3461 goto bread_err2; 3471 goto bread_err2;
3462 3472
3463 rhead = (xlog_rec_header_t *)offset; 3473 rhead = (xlog_rec_header_t *)offset;
3464 error = xlog_valid_rec_header(log, rhead, blk_no); 3474 error = xlog_valid_rec_header(log, rhead, blk_no);
3465 if (error) 3475 if (error)
3466 goto bread_err2; 3476 goto bread_err2;
3467 3477
3468 /* blocks in data section */ 3478 /* blocks in data section */
3469 bblks = (int)BTOBB(be32_to_cpu(rhead->h_len)); 3479 bblks = (int)BTOBB(be32_to_cpu(rhead->h_len));
3470 error = xlog_bread(log, blk_no + hblks, bblks, dbp, 3480 error = xlog_bread(log, blk_no + hblks, bblks, dbp,
3471 &offset); 3481 &offset);
3472 if (error) 3482 if (error)
3473 goto bread_err2; 3483 goto bread_err2;
3474 3484
3475 error = xlog_unpack_data(rhead, offset, log); 3485 error = xlog_unpack_data(rhead, offset, log);
3476 if (error) 3486 if (error)
3477 goto bread_err2; 3487 goto bread_err2;
3478 3488
3479 error = xlog_recover_process_data(log, 3489 error = xlog_recover_process_data(log,
3480 rhash, rhead, offset, pass); 3490 rhash, rhead, offset, pass);
3481 if (error) 3491 if (error)
3482 goto bread_err2; 3492 goto bread_err2;
3483 blk_no += bblks + hblks; 3493 blk_no += bblks + hblks;
3484 } 3494 }
3485 } else { 3495 } else {
3486 /* 3496 /*
3487 * Perform recovery around the end of the physical log. 3497 * Perform recovery around the end of the physical log.
3488 * When the head is not on the same cycle number as the tail, 3498 * When the head is not on the same cycle number as the tail,
3489 * we can't do a sequential recovery as above. 3499 * we can't do a sequential recovery as above.
3490 */ 3500 */
3491 blk_no = tail_blk; 3501 blk_no = tail_blk;
3492 while (blk_no < log->l_logBBsize) { 3502 while (blk_no < log->l_logBBsize) {
3493 /* 3503 /*
3494 * Check for header wrapping around physical end-of-log 3504 * Check for header wrapping around physical end-of-log
3495 */ 3505 */
3496 offset = hbp->b_addr; 3506 offset = hbp->b_addr;
3497 split_hblks = 0; 3507 split_hblks = 0;
3498 wrapped_hblks = 0; 3508 wrapped_hblks = 0;
3499 if (blk_no + hblks <= log->l_logBBsize) { 3509 if (blk_no + hblks <= log->l_logBBsize) {
3500 /* Read header in one read */ 3510 /* Read header in one read */
3501 error = xlog_bread(log, blk_no, hblks, hbp, 3511 error = xlog_bread(log, blk_no, hblks, hbp,
3502 &offset); 3512 &offset);
3503 if (error) 3513 if (error)
3504 goto bread_err2; 3514 goto bread_err2;
3505 } else { 3515 } else {
3506 /* This LR is split across physical log end */ 3516 /* This LR is split across physical log end */
3507 if (blk_no != log->l_logBBsize) { 3517 if (blk_no != log->l_logBBsize) {
3508 /* some data before physical log end */ 3518 /* some data before physical log end */
3509 ASSERT(blk_no <= INT_MAX); 3519 ASSERT(blk_no <= INT_MAX);
3510 split_hblks = log->l_logBBsize - (int)blk_no; 3520 split_hblks = log->l_logBBsize - (int)blk_no;
3511 ASSERT(split_hblks > 0); 3521 ASSERT(split_hblks > 0);
3512 error = xlog_bread(log, blk_no, 3522 error = xlog_bread(log, blk_no,
3513 split_hblks, hbp, 3523 split_hblks, hbp,
3514 &offset); 3524 &offset);
3515 if (error) 3525 if (error)
3516 goto bread_err2; 3526 goto bread_err2;
3517 } 3527 }
3518 3528
3519 /* 3529 /*
3520 * Note: this black magic still works with 3530 * Note: this black magic still works with
3521 * large sector sizes (non-512) only because: 3531 * large sector sizes (non-512) only because:
3522 * - we increased the buffer size originally 3532 * - we increased the buffer size originally
3523 * by 1 sector giving us enough extra space 3533 * by 1 sector giving us enough extra space
3524 * for the second read; 3534 * for the second read;
3525 * - the log start is guaranteed to be sector 3535 * - the log start is guaranteed to be sector
3526 * aligned; 3536 * aligned;
3527 * - we read the log end (LR header start) 3537 * - we read the log end (LR header start)
3528 * _first_, then the log start (LR header end) 3538 * _first_, then the log start (LR header end)
3529 * - order is important. 3539 * - order is important.
3530 */ 3540 */
3531 wrapped_hblks = hblks - split_hblks; 3541 wrapped_hblks = hblks - split_hblks;
3532 error = xlog_bread_offset(log, 0, 3542 error = xlog_bread_offset(log, 0,
3533 wrapped_hblks, hbp, 3543 wrapped_hblks, hbp,
3534 offset + BBTOB(split_hblks)); 3544 offset + BBTOB(split_hblks));
3535 if (error) 3545 if (error)
3536 goto bread_err2; 3546 goto bread_err2;
3537 } 3547 }
3538 rhead = (xlog_rec_header_t *)offset; 3548 rhead = (xlog_rec_header_t *)offset;
3539 error = xlog_valid_rec_header(log, rhead, 3549 error = xlog_valid_rec_header(log, rhead,
3540 split_hblks ? blk_no : 0); 3550 split_hblks ? blk_no : 0);
3541 if (error) 3551 if (error)
3542 goto bread_err2; 3552 goto bread_err2;
3543 3553
3544 bblks = (int)BTOBB(be32_to_cpu(rhead->h_len)); 3554 bblks = (int)BTOBB(be32_to_cpu(rhead->h_len));
3545 blk_no += hblks; 3555 blk_no += hblks;
3546 3556
3547 /* Read in data for log record */ 3557 /* Read in data for log record */
3548 if (blk_no + bblks <= log->l_logBBsize) { 3558 if (blk_no + bblks <= log->l_logBBsize) {
3549 error = xlog_bread(log, blk_no, bblks, dbp, 3559 error = xlog_bread(log, blk_no, bblks, dbp,
3550 &offset); 3560 &offset);
3551 if (error) 3561 if (error)
3552 goto bread_err2; 3562 goto bread_err2;
3553 } else { 3563 } else {
3554 /* This log record is split across the 3564 /* This log record is split across the
3555 * physical end of log */ 3565 * physical end of log */
3556 offset = dbp->b_addr; 3566 offset = dbp->b_addr;
3557 split_bblks = 0; 3567 split_bblks = 0;
3558 if (blk_no != log->l_logBBsize) { 3568 if (blk_no != log->l_logBBsize) {
3559 /* some data is before the physical 3569 /* some data is before the physical
3560 * end of log */ 3570 * end of log */
3561 ASSERT(!wrapped_hblks); 3571 ASSERT(!wrapped_hblks);
3562 ASSERT(blk_no <= INT_MAX); 3572 ASSERT(blk_no <= INT_MAX);
3563 split_bblks = 3573 split_bblks =
3564 log->l_logBBsize - (int)blk_no; 3574 log->l_logBBsize - (int)blk_no;
3565 ASSERT(split_bblks > 0); 3575 ASSERT(split_bblks > 0);
3566 error = xlog_bread(log, blk_no, 3576 error = xlog_bread(log, blk_no,
3567 split_bblks, dbp, 3577 split_bblks, dbp,
3568 &offset); 3578 &offset);
3569 if (error) 3579 if (error)
3570 goto bread_err2; 3580 goto bread_err2;
3571 } 3581 }
3572 3582
3573 /* 3583 /*
3574 * Note: this black magic still works with 3584 * Note: this black magic still works with
3575 * large sector sizes (non-512) only because: 3585 * large sector sizes (non-512) only because:
3576 * - we increased the buffer size originally 3586 * - we increased the buffer size originally
3577 * by 1 sector giving us enough extra space 3587 * by 1 sector giving us enough extra space
3578 * for the second read; 3588 * for the second read;
3579 * - the log start is guaranteed to be sector 3589 * - the log start is guaranteed to be sector
3580 * aligned; 3590 * aligned;
3581 * - we read the log end (LR header start) 3591 * - we read the log end (LR header start)
3582 * _first_, then the log start (LR header end) 3592 * _first_, then the log start (LR header end)
3583 * - order is important. 3593 * - order is important.
3584 */ 3594 */
3585 error = xlog_bread_offset(log, 0, 3595 error = xlog_bread_offset(log, 0,
3586 bblks - split_bblks, dbp, 3596 bblks - split_bblks, dbp,
3587 offset + BBTOB(split_bblks)); 3597 offset + BBTOB(split_bblks));
3588 if (error) 3598 if (error)
3589 goto bread_err2; 3599 goto bread_err2;
3590 } 3600 }
3591 3601
3592 error = xlog_unpack_data(rhead, offset, log); 3602 error = xlog_unpack_data(rhead, offset, log);
3593 if (error) 3603 if (error)
3594 goto bread_err2; 3604 goto bread_err2;
3595 3605
3596 error = xlog_recover_process_data(log, rhash, 3606 error = xlog_recover_process_data(log, rhash,
3597 rhead, offset, pass); 3607 rhead, offset, pass);
3598 if (error) 3608 if (error)
3599 goto bread_err2; 3609 goto bread_err2;
3600 blk_no += bblks; 3610 blk_no += bblks;
3601 } 3611 }
3602 3612
3603 ASSERT(blk_no >= log->l_logBBsize); 3613 ASSERT(blk_no >= log->l_logBBsize);
3604 blk_no -= log->l_logBBsize; 3614 blk_no -= log->l_logBBsize;
3605 3615
3606 /* read first part of physical log */ 3616 /* read first part of physical log */
3607 while (blk_no < head_blk) { 3617 while (blk_no < head_blk) {
3608 error = xlog_bread(log, blk_no, hblks, hbp, &offset); 3618 error = xlog_bread(log, blk_no, hblks, hbp, &offset);
3609 if (error) 3619 if (error)
3610 goto bread_err2; 3620 goto bread_err2;
3611 3621
3612 rhead = (xlog_rec_header_t *)offset; 3622 rhead = (xlog_rec_header_t *)offset;
3613 error = xlog_valid_rec_header(log, rhead, blk_no); 3623 error = xlog_valid_rec_header(log, rhead, blk_no);
3614 if (error) 3624 if (error)
3615 goto bread_err2; 3625 goto bread_err2;
3616 3626
3617 bblks = (int)BTOBB(be32_to_cpu(rhead->h_len)); 3627 bblks = (int)BTOBB(be32_to_cpu(rhead->h_len));
3618 error = xlog_bread(log, blk_no+hblks, bblks, dbp, 3628 error = xlog_bread(log, blk_no+hblks, bblks, dbp,
3619 &offset); 3629 &offset);
3620 if (error) 3630 if (error)
3621 goto bread_err2; 3631 goto bread_err2;
3622 3632
3623 error = xlog_unpack_data(rhead, offset, log); 3633 error = xlog_unpack_data(rhead, offset, log);
3624 if (error) 3634 if (error)
3625 goto bread_err2; 3635 goto bread_err2;
3626 3636
3627 error = xlog_recover_process_data(log, rhash, 3637 error = xlog_recover_process_data(log, rhash,
3628 rhead, offset, pass); 3638 rhead, offset, pass);
3629 if (error) 3639 if (error)
3630 goto bread_err2; 3640 goto bread_err2;
3631 blk_no += bblks + hblks; 3641 blk_no += bblks + hblks;
3632 } 3642 }
3633 } 3643 }
3634 3644
3635 bread_err2: 3645 bread_err2:
3636 xlog_put_bp(dbp); 3646 xlog_put_bp(dbp);
3637 bread_err1: 3647 bread_err1:
3638 xlog_put_bp(hbp); 3648 xlog_put_bp(hbp);
3639 return error; 3649 return error;
3640 } 3650 }
3641 3651
3642 /* 3652 /*
3643 * Do the recovery of the log. We actually do this in two phases. 3653 * Do the recovery of the log. We actually do this in two phases.
3644 * The two passes are necessary in order to implement the function 3654 * The two passes are necessary in order to implement the function
3645 * of cancelling a record written into the log. The first pass 3655 * of cancelling a record written into the log. The first pass
3646 * determines those things which have been cancelled, and the 3656 * determines those things which have been cancelled, and the
3647 * second pass replays log items normally except for those which 3657 * second pass replays log items normally except for those which
3648 * have been cancelled. The handling of the replay and cancellations 3658 * have been cancelled. The handling of the replay and cancellations
3649 * takes place in the log item type specific routines. 3659 * takes place in the log item type specific routines.
3650 * 3660 *
3651 * The table of items which have cancel records in the log is allocated 3661 * The table of items which have cancel records in the log is allocated
3652 * and freed at this level, since only here do we know when all of 3662 * and freed at this level, since only here do we know when all of
3653 * the log recovery has been completed. 3663 * the log recovery has been completed.
3654 */ 3664 */
3655 STATIC int 3665 STATIC int
3656 xlog_do_log_recovery( 3666 xlog_do_log_recovery(
3657 struct xlog *log, 3667 struct xlog *log,
3658 xfs_daddr_t head_blk, 3668 xfs_daddr_t head_blk,
3659 xfs_daddr_t tail_blk) 3669 xfs_daddr_t tail_blk)
3660 { 3670 {
3661 int error, i; 3671 int error, i;
3662 3672
3663 ASSERT(head_blk != tail_blk); 3673 ASSERT(head_blk != tail_blk);
3664 3674
3665 /* 3675 /*
3666 * First do a pass to find all of the cancelled buf log items. 3676 * First do a pass to find all of the cancelled buf log items.
3667 * Store them in the buf_cancel_table for use in the second pass. 3677 * Store them in the buf_cancel_table for use in the second pass.
3668 */ 3678 */
3669 log->l_buf_cancel_table = kmem_zalloc(XLOG_BC_TABLE_SIZE * 3679 log->l_buf_cancel_table = kmem_zalloc(XLOG_BC_TABLE_SIZE *
3670 sizeof(struct list_head), 3680 sizeof(struct list_head),
3671 KM_SLEEP); 3681 KM_SLEEP);
3672 for (i = 0; i < XLOG_BC_TABLE_SIZE; i++) 3682 for (i = 0; i < XLOG_BC_TABLE_SIZE; i++)
3673 INIT_LIST_HEAD(&log->l_buf_cancel_table[i]); 3683 INIT_LIST_HEAD(&log->l_buf_cancel_table[i]);
3674 3684
3675 error = xlog_do_recovery_pass(log, head_blk, tail_blk, 3685 error = xlog_do_recovery_pass(log, head_blk, tail_blk,
3676 XLOG_RECOVER_PASS1); 3686 XLOG_RECOVER_PASS1);
3677 if (error != 0) { 3687 if (error != 0) {
3678 kmem_free(log->l_buf_cancel_table); 3688 kmem_free(log->l_buf_cancel_table);
3679 log->l_buf_cancel_table = NULL; 3689 log->l_buf_cancel_table = NULL;
3680 return error; 3690 return error;
3681 } 3691 }
3682 /* 3692 /*
3683 * Then do a second pass to actually recover the items in the log. 3693 * Then do a second pass to actually recover the items in the log.
3684 * When it is complete free the table of buf cancel items. 3694 * When it is complete free the table of buf cancel items.
3685 */ 3695 */
3686 error = xlog_do_recovery_pass(log, head_blk, tail_blk, 3696 error = xlog_do_recovery_pass(log, head_blk, tail_blk,
3687 XLOG_RECOVER_PASS2); 3697 XLOG_RECOVER_PASS2);
3688 #ifdef DEBUG 3698 #ifdef DEBUG
3689 if (!error) { 3699 if (!error) {
3690 int i; 3700 int i;
3691 3701
3692 for (i = 0; i < XLOG_BC_TABLE_SIZE; i++) 3702 for (i = 0; i < XLOG_BC_TABLE_SIZE; i++)
3693 ASSERT(list_empty(&log->l_buf_cancel_table[i])); 3703 ASSERT(list_empty(&log->l_buf_cancel_table[i]));
3694 } 3704 }
3695 #endif /* DEBUG */ 3705 #endif /* DEBUG */
3696 3706
3697 kmem_free(log->l_buf_cancel_table); 3707 kmem_free(log->l_buf_cancel_table);
3698 log->l_buf_cancel_table = NULL; 3708 log->l_buf_cancel_table = NULL;
3699 3709
3700 return error; 3710 return error;
3701 } 3711 }
3702 3712
3703 /* 3713 /*
3704 * Do the actual recovery 3714 * Do the actual recovery
3705 */ 3715 */
3706 STATIC int 3716 STATIC int
3707 xlog_do_recover( 3717 xlog_do_recover(
3708 struct xlog *log, 3718 struct xlog *log,
3709 xfs_daddr_t head_blk, 3719 xfs_daddr_t head_blk,
3710 xfs_daddr_t tail_blk) 3720 xfs_daddr_t tail_blk)
3711 { 3721 {
3712 int error; 3722 int error;
3713 xfs_buf_t *bp; 3723 xfs_buf_t *bp;
3714 xfs_sb_t *sbp; 3724 xfs_sb_t *sbp;
3715 3725
3716 /* 3726 /*
3717 * First replay the images in the log. 3727 * First replay the images in the log.
3718 */ 3728 */
3719 error = xlog_do_log_recovery(log, head_blk, tail_blk); 3729 error = xlog_do_log_recovery(log, head_blk, tail_blk);
3720 if (error) 3730 if (error)
3721 return error; 3731 return error;
3722 3732
3723 /* 3733 /*
3724 * If IO errors happened during recovery, bail out. 3734 * If IO errors happened during recovery, bail out.
3725 */ 3735 */
3726 if (XFS_FORCED_SHUTDOWN(log->l_mp)) { 3736 if (XFS_FORCED_SHUTDOWN(log->l_mp)) {
3727 return (EIO); 3737 return (EIO);
3728 } 3738 }
3729 3739
3730 /* 3740 /*
3731 * We now update the tail_lsn since much of the recovery has completed 3741 * We now update the tail_lsn since much of the recovery has completed
3732 * and there may be space available to use. If there were no extent 3742 * and there may be space available to use. If there were no extent
3733 * or iunlinks, we can free up the entire log and set the tail_lsn to 3743 * or iunlinks, we can free up the entire log and set the tail_lsn to
3734 * be the last_sync_lsn. This was set in xlog_find_tail to be the 3744 * be the last_sync_lsn. This was set in xlog_find_tail to be the
3735 * lsn of the last known good LR on disk. If there are extent frees 3745 * lsn of the last known good LR on disk. If there are extent frees
3736 * or iunlinks they will have some entries in the AIL; so we look at 3746 * or iunlinks they will have some entries in the AIL; so we look at
3737 * the AIL to determine how to set the tail_lsn. 3747 * the AIL to determine how to set the tail_lsn.
3738 */ 3748 */
3739 xlog_assign_tail_lsn(log->l_mp); 3749 xlog_assign_tail_lsn(log->l_mp);
3740 3750
3741 /* 3751 /*
3742 * Now that we've finished replaying all buffer and inode 3752 * Now that we've finished replaying all buffer and inode
3743 * updates, re-read in the superblock and reverify it. 3753 * updates, re-read in the superblock and reverify it.
3744 */ 3754 */
3745 bp = xfs_getsb(log->l_mp, 0); 3755 bp = xfs_getsb(log->l_mp, 0);
3746 XFS_BUF_UNDONE(bp); 3756 XFS_BUF_UNDONE(bp);
3747 ASSERT(!(XFS_BUF_ISWRITE(bp))); 3757 ASSERT(!(XFS_BUF_ISWRITE(bp)));
3748 XFS_BUF_READ(bp); 3758 XFS_BUF_READ(bp);
3749 XFS_BUF_UNASYNC(bp); 3759 XFS_BUF_UNASYNC(bp);
3750 bp->b_ops = &xfs_sb_buf_ops; 3760 bp->b_ops = &xfs_sb_buf_ops;
3751 xfsbdstrat(log->l_mp, bp); 3761 xfsbdstrat(log->l_mp, bp);
3752 error = xfs_buf_iowait(bp); 3762 error = xfs_buf_iowait(bp);
3753 if (error) { 3763 if (error) {
3754 xfs_buf_ioerror_alert(bp, __func__); 3764 xfs_buf_ioerror_alert(bp, __func__);
3755 ASSERT(0); 3765 ASSERT(0);
3756 xfs_buf_relse(bp); 3766 xfs_buf_relse(bp);
3757 return error; 3767 return error;
3758 } 3768 }
3759 3769
3760 /* Convert superblock from on-disk format */ 3770 /* Convert superblock from on-disk format */
3761 sbp = &log->l_mp->m_sb; 3771 sbp = &log->l_mp->m_sb;
3762 xfs_sb_from_disk(sbp, XFS_BUF_TO_SBP(bp)); 3772 xfs_sb_from_disk(sbp, XFS_BUF_TO_SBP(bp));
3763 ASSERT(sbp->sb_magicnum == XFS_SB_MAGIC); 3773 ASSERT(sbp->sb_magicnum == XFS_SB_MAGIC);
3764 ASSERT(xfs_sb_good_version(sbp)); 3774 ASSERT(xfs_sb_good_version(sbp));
3765 xfs_buf_relse(bp); 3775 xfs_buf_relse(bp);
3766 3776
3767 /* We've re-read the superblock so re-initialize per-cpu counters */ 3777 /* We've re-read the superblock so re-initialize per-cpu counters */
3768 xfs_icsb_reinit_counters(log->l_mp); 3778 xfs_icsb_reinit_counters(log->l_mp);
3769 3779
3770 xlog_recover_check_summary(log); 3780 xlog_recover_check_summary(log);
3771 3781
3772 /* Normal transactions can now occur */ 3782 /* Normal transactions can now occur */
3773 log->l_flags &= ~XLOG_ACTIVE_RECOVERY; 3783 log->l_flags &= ~XLOG_ACTIVE_RECOVERY;
3774 return 0; 3784 return 0;
3775 } 3785 }
3776 3786
3777 /* 3787 /*
3778 * Perform recovery and re-initialize some log variables in xlog_find_tail. 3788 * Perform recovery and re-initialize some log variables in xlog_find_tail.
3779 * 3789 *
3780 * Return error or zero. 3790 * Return error or zero.
3781 */ 3791 */
3782 int 3792 int
3783 xlog_recover( 3793 xlog_recover(
3784 struct xlog *log) 3794 struct xlog *log)
3785 { 3795 {
3786 xfs_daddr_t head_blk, tail_blk; 3796 xfs_daddr_t head_blk, tail_blk;
3787 int error; 3797 int error;
3788 3798
3789 /* find the tail of the log */ 3799 /* find the tail of the log */
3790 if ((error = xlog_find_tail(log, &head_blk, &tail_blk))) 3800 if ((error = xlog_find_tail(log, &head_blk, &tail_blk)))
3791 return error; 3801 return error;
3792 3802
3793 if (tail_blk != head_blk) { 3803 if (tail_blk != head_blk) {
3794 /* There used to be a comment here: 3804 /* There used to be a comment here:
3795 * 3805 *
3796 * disallow recovery on read-only mounts. note -- mount 3806 * disallow recovery on read-only mounts. note -- mount
3797 * checks for ENOSPC and turns it into an intelligent 3807 * checks for ENOSPC and turns it into an intelligent
3798 * error message. 3808 * error message.
3799 * ...but this is no longer true. Now, unless you specify 3809 * ...but this is no longer true. Now, unless you specify
3800 * NORECOVERY (in which case this function would never be 3810 * NORECOVERY (in which case this function would never be
3801 * called), we just go ahead and recover. We do this all 3811 * called), we just go ahead and recover. We do this all
3802 * under the vfs layer, so we can get away with it unless 3812 * under the vfs layer, so we can get away with it unless
3803 * the device itself is read-only, in which case we fail. 3813 * the device itself is read-only, in which case we fail.
3804 */ 3814 */
3805 if ((error = xfs_dev_is_read_only(log->l_mp, "recovery"))) { 3815 if ((error = xfs_dev_is_read_only(log->l_mp, "recovery"))) {
3806 return error; 3816 return error;
3807 } 3817 }
3808 3818
3809 xfs_notice(log->l_mp, "Starting recovery (logdev: %s)", 3819 xfs_notice(log->l_mp, "Starting recovery (logdev: %s)",
3810 log->l_mp->m_logname ? log->l_mp->m_logname 3820 log->l_mp->m_logname ? log->l_mp->m_logname
3811 : "internal"); 3821 : "internal");
3812 3822
3813 error = xlog_do_recover(log, head_blk, tail_blk); 3823 error = xlog_do_recover(log, head_blk, tail_blk);
3814 log->l_flags |= XLOG_RECOVERY_NEEDED; 3824 log->l_flags |= XLOG_RECOVERY_NEEDED;
3815 } 3825 }
3816 return error; 3826 return error;
3817 } 3827 }
3818 3828
3819 /* 3829 /*
3820 * In the first part of recovery we replay inodes and buffers and build 3830 * In the first part of recovery we replay inodes and buffers and build
3821 * up the list of extent free items which need to be processed. Here 3831 * up the list of extent free items which need to be processed. Here
3822 * we process the extent free items and clean up the on disk unlinked 3832 * we process the extent free items and clean up the on disk unlinked
3823 * inode lists. This is separated from the first part of recovery so 3833 * inode lists. This is separated from the first part of recovery so
3824 * that the root and real-time bitmap inodes can be read in from disk in 3834 * that the root and real-time bitmap inodes can be read in from disk in
3825 * between the two stages. This is necessary so that we can free space 3835 * between the two stages. This is necessary so that we can free space
3826 * in the real-time portion of the file system. 3836 * in the real-time portion of the file system.
3827 */ 3837 */
3828 int 3838 int
3829 xlog_recover_finish( 3839 xlog_recover_finish(
3830 struct xlog *log) 3840 struct xlog *log)
3831 { 3841 {
3832 /* 3842 /*
3833 * Now we're ready to do the transactions needed for the 3843 * Now we're ready to do the transactions needed for the
3834 * rest of recovery. Start with completing all the extent 3844 * rest of recovery. Start with completing all the extent
3835 * free intent records and then process the unlinked inode 3845 * free intent records and then process the unlinked inode
3836 * lists. At this point, we essentially run in normal mode 3846 * lists. At this point, we essentially run in normal mode
3837 * except that we're still performing recovery actions 3847 * except that we're still performing recovery actions
3838 * rather than accepting new requests. 3848 * rather than accepting new requests.
3839 */ 3849 */
3840 if (log->l_flags & XLOG_RECOVERY_NEEDED) { 3850 if (log->l_flags & XLOG_RECOVERY_NEEDED) {
3841 int error; 3851 int error;
3842 error = xlog_recover_process_efis(log); 3852 error = xlog_recover_process_efis(log);
3843 if (error) { 3853 if (error) {
3844 xfs_alert(log->l_mp, "Failed to recover EFIs"); 3854 xfs_alert(log->l_mp, "Failed to recover EFIs");
3845 return error; 3855 return error;
3846 } 3856 }
3847 /* 3857 /*
3848 * Sync the log to get all the EFIs out of the AIL. 3858 * Sync the log to get all the EFIs out of the AIL.
3849 * This isn't absolutely necessary, but it helps in 3859 * This isn't absolutely necessary, but it helps in
3850 * case the unlink transactions would have problems 3860 * case the unlink transactions would have problems
3851 * pushing the EFIs out of the way. 3861 * pushing the EFIs out of the way.
3852 */ 3862 */
3853 xfs_log_force(log->l_mp, XFS_LOG_SYNC); 3863 xfs_log_force(log->l_mp, XFS_LOG_SYNC);
3854 3864
3855 xlog_recover_process_iunlinks(log); 3865 xlog_recover_process_iunlinks(log);
3856 3866
3857 xlog_recover_check_summary(log); 3867 xlog_recover_check_summary(log);
3858 3868
3859 xfs_notice(log->l_mp, "Ending recovery (logdev: %s)", 3869 xfs_notice(log->l_mp, "Ending recovery (logdev: %s)",
3860 log->l_mp->m_logname ? log->l_mp->m_logname 3870 log->l_mp->m_logname ? log->l_mp->m_logname
3861 : "internal"); 3871 : "internal");
3862 log->l_flags &= ~XLOG_RECOVERY_NEEDED; 3872 log->l_flags &= ~XLOG_RECOVERY_NEEDED;
3863 } else { 3873 } else {
3864 xfs_info(log->l_mp, "Ending clean mount"); 3874 xfs_info(log->l_mp, "Ending clean mount");
3865 } 3875 }
3866 return 0; 3876 return 0;
3867 } 3877 }
3868 3878
3869 3879
3870 #if defined(DEBUG) 3880 #if defined(DEBUG)
3871 /* 3881 /*
3872 * Read all of the agf and agi counters and check that they 3882 * Read all of the agf and agi counters and check that they
3873 * are consistent with the superblock counters. 3883 * are consistent with the superblock counters.
3874 */ 3884 */
3875 void 3885 void
3876 xlog_recover_check_summary( 3886 xlog_recover_check_summary(
3877 struct xlog *log) 3887 struct xlog *log)
3878 { 3888 {
3879 xfs_mount_t *mp; 3889 xfs_mount_t *mp;
3880 xfs_agf_t *agfp; 3890 xfs_agf_t *agfp;
3881 xfs_buf_t *agfbp; 3891 xfs_buf_t *agfbp;
3882 xfs_buf_t *agibp; 3892 xfs_buf_t *agibp;
3883 xfs_agnumber_t agno; 3893 xfs_agnumber_t agno;
3884 __uint64_t freeblks; 3894 __uint64_t freeblks;
3885 __uint64_t itotal; 3895 __uint64_t itotal;
3886 __uint64_t ifree; 3896 __uint64_t ifree;
3887 int error; 3897 int error;
3888 3898
3889 mp = log->l_mp; 3899 mp = log->l_mp;
3890 3900
3891 freeblks = 0LL; 3901 freeblks = 0LL;
3892 itotal = 0LL; 3902 itotal = 0LL;
3893 ifree = 0LL; 3903 ifree = 0LL;
3894 for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) { 3904 for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) {
3895 error = xfs_read_agf(mp, NULL, agno, 0, &agfbp); 3905 error = xfs_read_agf(mp, NULL, agno, 0, &agfbp);
3896 if (error) { 3906 if (error) {
3897 xfs_alert(mp, "%s agf read failed agno %d error %d", 3907 xfs_alert(mp, "%s agf read failed agno %d error %d",
3898 __func__, agno, error); 3908 __func__, agno, error);
3899 } else { 3909 } else {
3900 agfp = XFS_BUF_TO_AGF(agfbp); 3910 agfp = XFS_BUF_TO_AGF(agfbp);
3901 freeblks += be32_to_cpu(agfp->agf_freeblks) + 3911 freeblks += be32_to_cpu(agfp->agf_freeblks) +
3902 be32_to_cpu(agfp->agf_flcount); 3912 be32_to_cpu(agfp->agf_flcount);
3903 xfs_buf_relse(agfbp); 3913 xfs_buf_relse(agfbp);
3904 } 3914 }
3905 3915
3906 error = xfs_read_agi(mp, NULL, agno, &agibp); 3916 error = xfs_read_agi(mp, NULL, agno, &agibp);
3907 if (error) { 3917 if (error) {
3908 xfs_alert(mp, "%s agi read failed agno %d error %d", 3918 xfs_alert(mp, "%s agi read failed agno %d error %d",
3909 __func__, agno, error); 3919 __func__, agno, error);
3910 } else { 3920 } else {
3911 struct xfs_agi *agi = XFS_BUF_TO_AGI(agibp); 3921 struct xfs_agi *agi = XFS_BUF_TO_AGI(agibp);
3912 3922
3913 itotal += be32_to_cpu(agi->agi_count); 3923 itotal += be32_to_cpu(agi->agi_count);
3914 ifree += be32_to_cpu(agi->agi_freecount); 3924 ifree += be32_to_cpu(agi->agi_freecount);
3915 xfs_buf_relse(agibp); 3925 xfs_buf_relse(agibp);
3916 } 3926 }
3917 } 3927 }
3918 } 3928 }
3919 #endif /* DEBUG */ 3929 #endif /* DEBUG */
3920 3930
1 /* 1 /*
2 * Copyright (c) 2000-2005 Silicon Graphics, Inc. 2 * Copyright (c) 2000-2005 Silicon Graphics, Inc.
3 * All Rights Reserved. 3 * All Rights Reserved.
4 * 4 *
5 * This program is free software; you can redistribute it and/or 5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as 6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation. 7 * published by the Free Software Foundation.
8 * 8 *
9 * This program is distributed in the hope that it would be useful, 9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details. 12 * GNU General Public License for more details.
13 * 13 *
14 * You should have received a copy of the GNU General Public License 14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation, 15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */ 17 */
18 #include "xfs.h" 18 #include "xfs.h"
19 #include "xfs_fs.h" 19 #include "xfs_fs.h"
20 #include "xfs_bit.h" 20 #include "xfs_bit.h"
21 #include "xfs_log.h" 21 #include "xfs_log.h"
22 #include "xfs_trans.h" 22 #include "xfs_trans.h"
23 #include "xfs_sb.h" 23 #include "xfs_sb.h"
24 #include "xfs_ag.h" 24 #include "xfs_ag.h"
25 #include "xfs_alloc.h" 25 #include "xfs_alloc.h"
26 #include "xfs_quota.h" 26 #include "xfs_quota.h"
27 #include "xfs_mount.h" 27 #include "xfs_mount.h"
28 #include "xfs_bmap_btree.h" 28 #include "xfs_bmap_btree.h"
29 #include "xfs_ialloc_btree.h" 29 #include "xfs_ialloc_btree.h"
30 #include "xfs_dinode.h" 30 #include "xfs_dinode.h"
31 #include "xfs_inode.h" 31 #include "xfs_inode.h"
32 #include "xfs_ialloc.h" 32 #include "xfs_ialloc.h"
33 #include "xfs_itable.h" 33 #include "xfs_itable.h"
34 #include "xfs_rtalloc.h" 34 #include "xfs_rtalloc.h"
35 #include "xfs_error.h" 35 #include "xfs_error.h"
36 #include "xfs_bmap.h" 36 #include "xfs_bmap.h"
37 #include "xfs_attr.h" 37 #include "xfs_attr.h"
38 #include "xfs_buf_item.h" 38 #include "xfs_buf_item.h"
39 #include "xfs_trans_space.h" 39 #include "xfs_trans_space.h"
40 #include "xfs_utils.h" 40 #include "xfs_utils.h"
41 #include "xfs_qm.h" 41 #include "xfs_qm.h"
42 #include "xfs_trace.h" 42 #include "xfs_trace.h"
43 #include "xfs_icache.h" 43 #include "xfs_icache.h"
44 44
45 /* 45 /*
46 * The global quota manager. There is only one of these for the entire 46 * The global quota manager. There is only one of these for the entire
47 * system, _not_ one per file system. XQM keeps track of the overall 47 * system, _not_ one per file system. XQM keeps track of the overall
48 * quota functionality, including maintaining the freelist and hash 48 * quota functionality, including maintaining the freelist and hash
49 * tables of dquots. 49 * tables of dquots.
50 */ 50 */
51 STATIC int xfs_qm_init_quotainos(xfs_mount_t *); 51 STATIC int xfs_qm_init_quotainos(xfs_mount_t *);
52 STATIC int xfs_qm_init_quotainfo(xfs_mount_t *); 52 STATIC int xfs_qm_init_quotainfo(xfs_mount_t *);
53 STATIC int xfs_qm_shake(struct shrinker *, struct shrink_control *); 53 STATIC int xfs_qm_shake(struct shrinker *, struct shrink_control *);
54 54
55 /* 55 /*
56 * We use the batch lookup interface to iterate over the dquots as it 56 * We use the batch lookup interface to iterate over the dquots as it
57 * currently is the only interface into the radix tree code that allows 57 * currently is the only interface into the radix tree code that allows
58 * fuzzy lookups instead of exact matches. Holding the lock over multiple 58 * fuzzy lookups instead of exact matches. Holding the lock over multiple
59 * operations is fine as all callers are used either during mount/umount 59 * operations is fine as all callers are used either during mount/umount
60 * or quotaoff. 60 * or quotaoff.
61 */ 61 */
62 #define XFS_DQ_LOOKUP_BATCH 32 62 #define XFS_DQ_LOOKUP_BATCH 32
63 63
64 STATIC int 64 STATIC int
65 xfs_qm_dquot_walk( 65 xfs_qm_dquot_walk(
66 struct xfs_mount *mp, 66 struct xfs_mount *mp,
67 int type, 67 int type,
68 int (*execute)(struct xfs_dquot *dqp, void *data), 68 int (*execute)(struct xfs_dquot *dqp, void *data),
69 void *data) 69 void *data)
70 { 70 {
71 struct xfs_quotainfo *qi = mp->m_quotainfo; 71 struct xfs_quotainfo *qi = mp->m_quotainfo;
72 struct radix_tree_root *tree = XFS_DQUOT_TREE(qi, type); 72 struct radix_tree_root *tree = XFS_DQUOT_TREE(qi, type);
73 uint32_t next_index; 73 uint32_t next_index;
74 int last_error = 0; 74 int last_error = 0;
75 int skipped; 75 int skipped;
76 int nr_found; 76 int nr_found;
77 77
78 restart: 78 restart:
79 skipped = 0; 79 skipped = 0;
80 next_index = 0; 80 next_index = 0;
81 nr_found = 0; 81 nr_found = 0;
82 82
83 while (1) { 83 while (1) {
84 struct xfs_dquot *batch[XFS_DQ_LOOKUP_BATCH]; 84 struct xfs_dquot *batch[XFS_DQ_LOOKUP_BATCH];
85 int error = 0; 85 int error = 0;
86 int i; 86 int i;
87 87
88 mutex_lock(&qi->qi_tree_lock); 88 mutex_lock(&qi->qi_tree_lock);
89 nr_found = radix_tree_gang_lookup(tree, (void **)batch, 89 nr_found = radix_tree_gang_lookup(tree, (void **)batch,
90 next_index, XFS_DQ_LOOKUP_BATCH); 90 next_index, XFS_DQ_LOOKUP_BATCH);
91 if (!nr_found) { 91 if (!nr_found) {
92 mutex_unlock(&qi->qi_tree_lock); 92 mutex_unlock(&qi->qi_tree_lock);
93 break; 93 break;
94 } 94 }
95 95
96 for (i = 0; i < nr_found; i++) { 96 for (i = 0; i < nr_found; i++) {
97 struct xfs_dquot *dqp = batch[i]; 97 struct xfs_dquot *dqp = batch[i];
98 98
99 next_index = be32_to_cpu(dqp->q_core.d_id) + 1; 99 next_index = be32_to_cpu(dqp->q_core.d_id) + 1;
100 100
101 error = execute(batch[i], data); 101 error = execute(batch[i], data);
102 if (error == EAGAIN) { 102 if (error == EAGAIN) {
103 skipped++; 103 skipped++;
104 continue; 104 continue;
105 } 105 }
106 if (error && last_error != EFSCORRUPTED) 106 if (error && last_error != EFSCORRUPTED)
107 last_error = error; 107 last_error = error;
108 } 108 }
109 109
110 mutex_unlock(&qi->qi_tree_lock); 110 mutex_unlock(&qi->qi_tree_lock);
111 111
112 /* bail out if the filesystem is corrupted. */ 112 /* bail out if the filesystem is corrupted. */
113 if (last_error == EFSCORRUPTED) { 113 if (last_error == EFSCORRUPTED) {
114 skipped = 0; 114 skipped = 0;
115 break; 115 break;
116 } 116 }
117 } 117 }
118 118
119 if (skipped) { 119 if (skipped) {
120 delay(1); 120 delay(1);
121 goto restart; 121 goto restart;
122 } 122 }
123 123
124 return last_error; 124 return last_error;
125 } 125 }
126 126
127 127
128 /* 128 /*
129 * Purge a dquot from all tracking data structures and free it. 129 * Purge a dquot from all tracking data structures and free it.
130 */ 130 */
131 STATIC int 131 STATIC int
132 xfs_qm_dqpurge( 132 xfs_qm_dqpurge(
133 struct xfs_dquot *dqp, 133 struct xfs_dquot *dqp,
134 void *data) 134 void *data)
135 { 135 {
136 struct xfs_mount *mp = dqp->q_mount; 136 struct xfs_mount *mp = dqp->q_mount;
137 struct xfs_quotainfo *qi = mp->m_quotainfo; 137 struct xfs_quotainfo *qi = mp->m_quotainfo;
138 struct xfs_dquot *gdqp = NULL; 138 struct xfs_dquot *gdqp = NULL;
139 139
140 xfs_dqlock(dqp); 140 xfs_dqlock(dqp);
141 if ((dqp->dq_flags & XFS_DQ_FREEING) || dqp->q_nrefs != 0) { 141 if ((dqp->dq_flags & XFS_DQ_FREEING) || dqp->q_nrefs != 0) {
142 xfs_dqunlock(dqp); 142 xfs_dqunlock(dqp);
143 return EAGAIN; 143 return EAGAIN;
144 } 144 }
145 145
146 /* 146 /*
147 * If this quota has a group hint attached, prepare for releasing it 147 * If this quota has a group hint attached, prepare for releasing it
148 * now. 148 * now.
149 */ 149 */
150 gdqp = dqp->q_gdquot; 150 gdqp = dqp->q_gdquot;
151 if (gdqp) { 151 if (gdqp) {
152 xfs_dqlock(gdqp); 152 xfs_dqlock(gdqp);
153 dqp->q_gdquot = NULL; 153 dqp->q_gdquot = NULL;
154 } 154 }
155 155
156 dqp->dq_flags |= XFS_DQ_FREEING; 156 dqp->dq_flags |= XFS_DQ_FREEING;
157 157
158 xfs_dqflock(dqp); 158 xfs_dqflock(dqp);
159 159
160 /* 160 /*
161 * If we are turning this type of quotas off, we don't care 161 * If we are turning this type of quotas off, we don't care
162 * about the dirty metadata sitting in this dquot. OTOH, if 162 * about the dirty metadata sitting in this dquot. OTOH, if
163 * we're unmounting, we do care, so we flush it and wait. 163 * we're unmounting, we do care, so we flush it and wait.
164 */ 164 */
165 if (XFS_DQ_IS_DIRTY(dqp)) { 165 if (XFS_DQ_IS_DIRTY(dqp)) {
166 struct xfs_buf *bp = NULL; 166 struct xfs_buf *bp = NULL;
167 int error; 167 int error;
168 168
169 /* 169 /*
170 * We don't care about getting disk errors here. We need 170 * We don't care about getting disk errors here. We need
171 * to purge this dquot anyway, so we go ahead regardless. 171 * to purge this dquot anyway, so we go ahead regardless.
172 */ 172 */
173 error = xfs_qm_dqflush(dqp, &bp); 173 error = xfs_qm_dqflush(dqp, &bp);
174 if (error) { 174 if (error) {
175 xfs_warn(mp, "%s: dquot %p flush failed", 175 xfs_warn(mp, "%s: dquot %p flush failed",
176 __func__, dqp); 176 __func__, dqp);
177 } else { 177 } else {
178 error = xfs_bwrite(bp); 178 error = xfs_bwrite(bp);
179 xfs_buf_relse(bp); 179 xfs_buf_relse(bp);
180 } 180 }
181 xfs_dqflock(dqp); 181 xfs_dqflock(dqp);
182 } 182 }
183 183
184 ASSERT(atomic_read(&dqp->q_pincount) == 0); 184 ASSERT(atomic_read(&dqp->q_pincount) == 0);
185 ASSERT(XFS_FORCED_SHUTDOWN(mp) || 185 ASSERT(XFS_FORCED_SHUTDOWN(mp) ||
186 !(dqp->q_logitem.qli_item.li_flags & XFS_LI_IN_AIL)); 186 !(dqp->q_logitem.qli_item.li_flags & XFS_LI_IN_AIL));
187 187
188 xfs_dqfunlock(dqp); 188 xfs_dqfunlock(dqp);
189 xfs_dqunlock(dqp); 189 xfs_dqunlock(dqp);
190 190
191 radix_tree_delete(XFS_DQUOT_TREE(qi, dqp->q_core.d_flags), 191 radix_tree_delete(XFS_DQUOT_TREE(qi, dqp->q_core.d_flags),
192 be32_to_cpu(dqp->q_core.d_id)); 192 be32_to_cpu(dqp->q_core.d_id));
193 qi->qi_dquots--; 193 qi->qi_dquots--;
194 194
195 /* 195 /*
196 * We move dquots to the freelist as soon as their reference count 196 * We move dquots to the freelist as soon as their reference count
197 * hits zero, so it really should be on the freelist here. 197 * hits zero, so it really should be on the freelist here.
198 */ 198 */
199 mutex_lock(&qi->qi_lru_lock); 199 mutex_lock(&qi->qi_lru_lock);
200 ASSERT(!list_empty(&dqp->q_lru)); 200 ASSERT(!list_empty(&dqp->q_lru));
201 list_del_init(&dqp->q_lru); 201 list_del_init(&dqp->q_lru);
202 qi->qi_lru_count--; 202 qi->qi_lru_count--;
203 XFS_STATS_DEC(xs_qm_dquot_unused); 203 XFS_STATS_DEC(xs_qm_dquot_unused);
204 mutex_unlock(&qi->qi_lru_lock); 204 mutex_unlock(&qi->qi_lru_lock);
205 205
206 xfs_qm_dqdestroy(dqp); 206 xfs_qm_dqdestroy(dqp);
207 207
208 if (gdqp) 208 if (gdqp)
209 xfs_qm_dqput(gdqp); 209 xfs_qm_dqput(gdqp);
210 return 0; 210 return 0;
211 } 211 }
212 212
213 /* 213 /*
214 * Purge the dquot cache. 214 * Purge the dquot cache.
215 */ 215 */
216 void 216 void
217 xfs_qm_dqpurge_all( 217 xfs_qm_dqpurge_all(
218 struct xfs_mount *mp, 218 struct xfs_mount *mp,
219 uint flags) 219 uint flags)
220 { 220 {
221 if (flags & XFS_QMOPT_UQUOTA) 221 if (flags & XFS_QMOPT_UQUOTA)
222 xfs_qm_dquot_walk(mp, XFS_DQ_USER, xfs_qm_dqpurge, NULL); 222 xfs_qm_dquot_walk(mp, XFS_DQ_USER, xfs_qm_dqpurge, NULL);
223 if (flags & XFS_QMOPT_GQUOTA) 223 if (flags & XFS_QMOPT_GQUOTA)
224 xfs_qm_dquot_walk(mp, XFS_DQ_GROUP, xfs_qm_dqpurge, NULL); 224 xfs_qm_dquot_walk(mp, XFS_DQ_GROUP, xfs_qm_dqpurge, NULL);
225 if (flags & XFS_QMOPT_PQUOTA) 225 if (flags & XFS_QMOPT_PQUOTA)
226 xfs_qm_dquot_walk(mp, XFS_DQ_PROJ, xfs_qm_dqpurge, NULL); 226 xfs_qm_dquot_walk(mp, XFS_DQ_PROJ, xfs_qm_dqpurge, NULL);
227 } 227 }
228 228
229 /* 229 /*
230 * Just destroy the quotainfo structure. 230 * Just destroy the quotainfo structure.
231 */ 231 */
232 void 232 void
233 xfs_qm_unmount( 233 xfs_qm_unmount(
234 struct xfs_mount *mp) 234 struct xfs_mount *mp)
235 { 235 {
236 if (mp->m_quotainfo) { 236 if (mp->m_quotainfo) {
237 xfs_qm_dqpurge_all(mp, XFS_QMOPT_QUOTALL); 237 xfs_qm_dqpurge_all(mp, XFS_QMOPT_QUOTALL);
238 xfs_qm_destroy_quotainfo(mp); 238 xfs_qm_destroy_quotainfo(mp);
239 } 239 }
240 } 240 }
241 241
242 242
243 /* 243 /*
244 * This is called from xfs_mountfs to start quotas and initialize all 244 * This is called from xfs_mountfs to start quotas and initialize all
245 * necessary data structures like quotainfo. This is also responsible for 245 * necessary data structures like quotainfo. This is also responsible for
246 * running a quotacheck as necessary. We are guaranteed that the superblock 246 * running a quotacheck as necessary. We are guaranteed that the superblock
247 * is consistently read in at this point. 247 * is consistently read in at this point.
248 * 248 *
249 * If we fail here, the mount will continue with quota turned off. We don't 249 * If we fail here, the mount will continue with quota turned off. We don't
250 * need to inidicate success or failure at all. 250 * need to inidicate success or failure at all.
251 */ 251 */
252 void 252 void
253 xfs_qm_mount_quotas( 253 xfs_qm_mount_quotas(
254 xfs_mount_t *mp) 254 xfs_mount_t *mp)
255 { 255 {
256 int error = 0; 256 int error = 0;
257 uint sbf; 257 uint sbf;
258 258
259 /* 259 /*
260 * If quotas on realtime volumes is not supported, we disable 260 * If quotas on realtime volumes is not supported, we disable
261 * quotas immediately. 261 * quotas immediately.
262 */ 262 */
263 if (mp->m_sb.sb_rextents) { 263 if (mp->m_sb.sb_rextents) {
264 xfs_notice(mp, "Cannot turn on quotas for realtime filesystem"); 264 xfs_notice(mp, "Cannot turn on quotas for realtime filesystem");
265 mp->m_qflags = 0; 265 mp->m_qflags = 0;
266 goto write_changes; 266 goto write_changes;
267 } 267 }
268 268
269 ASSERT(XFS_IS_QUOTA_RUNNING(mp)); 269 ASSERT(XFS_IS_QUOTA_RUNNING(mp));
270 270
271 /* 271 /*
272 * Allocate the quotainfo structure inside the mount struct, and 272 * Allocate the quotainfo structure inside the mount struct, and
273 * create quotainode(s), and change/rev superblock if necessary. 273 * create quotainode(s), and change/rev superblock if necessary.
274 */ 274 */
275 error = xfs_qm_init_quotainfo(mp); 275 error = xfs_qm_init_quotainfo(mp);
276 if (error) { 276 if (error) {
277 /* 277 /*
278 * We must turn off quotas. 278 * We must turn off quotas.
279 */ 279 */
280 ASSERT(mp->m_quotainfo == NULL); 280 ASSERT(mp->m_quotainfo == NULL);
281 mp->m_qflags = 0; 281 mp->m_qflags = 0;
282 goto write_changes; 282 goto write_changes;
283 } 283 }
284 /* 284 /*
285 * If any of the quotas are not consistent, do a quotacheck. 285 * If any of the quotas are not consistent, do a quotacheck.
286 */ 286 */
287 if (XFS_QM_NEED_QUOTACHECK(mp)) { 287 if (XFS_QM_NEED_QUOTACHECK(mp)) {
288 error = xfs_qm_quotacheck(mp); 288 error = xfs_qm_quotacheck(mp);
289 if (error) { 289 if (error) {
290 /* Quotacheck failed and disabled quotas. */ 290 /* Quotacheck failed and disabled quotas. */
291 return; 291 return;
292 } 292 }
293 } 293 }
294 /* 294 /*
295 * If one type of quotas is off, then it will lose its 295 * If one type of quotas is off, then it will lose its
296 * quotachecked status, since we won't be doing accounting for 296 * quotachecked status, since we won't be doing accounting for
297 * that type anymore. 297 * that type anymore.
298 */ 298 */
299 if (!XFS_IS_UQUOTA_ON(mp)) 299 if (!XFS_IS_UQUOTA_ON(mp))
300 mp->m_qflags &= ~XFS_UQUOTA_CHKD; 300 mp->m_qflags &= ~XFS_UQUOTA_CHKD;
301 if (!(XFS_IS_GQUOTA_ON(mp) || XFS_IS_PQUOTA_ON(mp))) 301 if (!(XFS_IS_GQUOTA_ON(mp) || XFS_IS_PQUOTA_ON(mp)))
302 mp->m_qflags &= ~XFS_OQUOTA_CHKD; 302 mp->m_qflags &= ~XFS_OQUOTA_CHKD;
303 303
304 write_changes: 304 write_changes:
305 /* 305 /*
306 * We actually don't have to acquire the m_sb_lock at all. 306 * We actually don't have to acquire the m_sb_lock at all.
307 * This can only be called from mount, and that's single threaded. XXX 307 * This can only be called from mount, and that's single threaded. XXX
308 */ 308 */
309 spin_lock(&mp->m_sb_lock); 309 spin_lock(&mp->m_sb_lock);
310 sbf = mp->m_sb.sb_qflags; 310 sbf = mp->m_sb.sb_qflags;
311 mp->m_sb.sb_qflags = mp->m_qflags & XFS_MOUNT_QUOTA_ALL; 311 mp->m_sb.sb_qflags = mp->m_qflags & XFS_MOUNT_QUOTA_ALL;
312 spin_unlock(&mp->m_sb_lock); 312 spin_unlock(&mp->m_sb_lock);
313 313
314 if (sbf != (mp->m_qflags & XFS_MOUNT_QUOTA_ALL)) { 314 if (sbf != (mp->m_qflags & XFS_MOUNT_QUOTA_ALL)) {
315 if (xfs_qm_write_sb_changes(mp, XFS_SB_QFLAGS)) { 315 if (xfs_qm_write_sb_changes(mp, XFS_SB_QFLAGS)) {
316 /* 316 /*
317 * We could only have been turning quotas off. 317 * We could only have been turning quotas off.
318 * We aren't in very good shape actually because 318 * We aren't in very good shape actually because
319 * the incore structures are convinced that quotas are 319 * the incore structures are convinced that quotas are
320 * off, but the on disk superblock doesn't know that ! 320 * off, but the on disk superblock doesn't know that !
321 */ 321 */
322 ASSERT(!(XFS_IS_QUOTA_RUNNING(mp))); 322 ASSERT(!(XFS_IS_QUOTA_RUNNING(mp)));
323 xfs_alert(mp, "%s: Superblock update failed!", 323 xfs_alert(mp, "%s: Superblock update failed!",
324 __func__); 324 __func__);
325 } 325 }
326 } 326 }
327 327
328 if (error) { 328 if (error) {
329 xfs_warn(mp, "Failed to initialize disk quotas."); 329 xfs_warn(mp, "Failed to initialize disk quotas.");
330 return; 330 return;
331 } 331 }
332 } 332 }
333 333
334 /* 334 /*
335 * Called from the vfsops layer. 335 * Called from the vfsops layer.
336 */ 336 */
337 void 337 void
338 xfs_qm_unmount_quotas( 338 xfs_qm_unmount_quotas(
339 xfs_mount_t *mp) 339 xfs_mount_t *mp)
340 { 340 {
341 /* 341 /*
342 * Release the dquots that root inode, et al might be holding, 342 * Release the dquots that root inode, et al might be holding,
343 * before we flush quotas and blow away the quotainfo structure. 343 * before we flush quotas and blow away the quotainfo structure.
344 */ 344 */
345 ASSERT(mp->m_rootip); 345 ASSERT(mp->m_rootip);
346 xfs_qm_dqdetach(mp->m_rootip); 346 xfs_qm_dqdetach(mp->m_rootip);
347 if (mp->m_rbmip) 347 if (mp->m_rbmip)
348 xfs_qm_dqdetach(mp->m_rbmip); 348 xfs_qm_dqdetach(mp->m_rbmip);
349 if (mp->m_rsumip) 349 if (mp->m_rsumip)
350 xfs_qm_dqdetach(mp->m_rsumip); 350 xfs_qm_dqdetach(mp->m_rsumip);
351 351
352 /* 352 /*
353 * Release the quota inodes. 353 * Release the quota inodes.
354 */ 354 */
355 if (mp->m_quotainfo) { 355 if (mp->m_quotainfo) {
356 if (mp->m_quotainfo->qi_uquotaip) { 356 if (mp->m_quotainfo->qi_uquotaip) {
357 IRELE(mp->m_quotainfo->qi_uquotaip); 357 IRELE(mp->m_quotainfo->qi_uquotaip);
358 mp->m_quotainfo->qi_uquotaip = NULL; 358 mp->m_quotainfo->qi_uquotaip = NULL;
359 } 359 }
360 if (mp->m_quotainfo->qi_gquotaip) { 360 if (mp->m_quotainfo->qi_gquotaip) {
361 IRELE(mp->m_quotainfo->qi_gquotaip); 361 IRELE(mp->m_quotainfo->qi_gquotaip);
362 mp->m_quotainfo->qi_gquotaip = NULL; 362 mp->m_quotainfo->qi_gquotaip = NULL;
363 } 363 }
364 } 364 }
365 } 365 }
366 366
367 STATIC int 367 STATIC int
368 xfs_qm_dqattach_one( 368 xfs_qm_dqattach_one(
369 xfs_inode_t *ip, 369 xfs_inode_t *ip,
370 xfs_dqid_t id, 370 xfs_dqid_t id,
371 uint type, 371 uint type,
372 uint doalloc, 372 uint doalloc,
373 xfs_dquot_t *udqhint, /* hint */ 373 xfs_dquot_t *udqhint, /* hint */
374 xfs_dquot_t **IO_idqpp) 374 xfs_dquot_t **IO_idqpp)
375 { 375 {
376 xfs_dquot_t *dqp; 376 xfs_dquot_t *dqp;
377 int error; 377 int error;
378 378
379 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); 379 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
380 error = 0; 380 error = 0;
381 381
382 /* 382 /*
383 * See if we already have it in the inode itself. IO_idqpp is 383 * See if we already have it in the inode itself. IO_idqpp is
384 * &i_udquot or &i_gdquot. This made the code look weird, but 384 * &i_udquot or &i_gdquot. This made the code look weird, but
385 * made the logic a lot simpler. 385 * made the logic a lot simpler.
386 */ 386 */
387 dqp = *IO_idqpp; 387 dqp = *IO_idqpp;
388 if (dqp) { 388 if (dqp) {
389 trace_xfs_dqattach_found(dqp); 389 trace_xfs_dqattach_found(dqp);
390 return 0; 390 return 0;
391 } 391 }
392 392
393 /* 393 /*
394 * udqhint is the i_udquot field in inode, and is non-NULL only 394 * udqhint is the i_udquot field in inode, and is non-NULL only
395 * when the type arg is group/project. Its purpose is to save a 395 * when the type arg is group/project. Its purpose is to save a
396 * lookup by dqid (xfs_qm_dqget) by caching a group dquot inside 396 * lookup by dqid (xfs_qm_dqget) by caching a group dquot inside
397 * the user dquot. 397 * the user dquot.
398 */ 398 */
399 if (udqhint) { 399 if (udqhint) {
400 ASSERT(type == XFS_DQ_GROUP || type == XFS_DQ_PROJ); 400 ASSERT(type == XFS_DQ_GROUP || type == XFS_DQ_PROJ);
401 xfs_dqlock(udqhint); 401 xfs_dqlock(udqhint);
402 402
403 /* 403 /*
404 * No need to take dqlock to look at the id. 404 * No need to take dqlock to look at the id.
405 * 405 *
406 * The ID can't change until it gets reclaimed, and it won't 406 * The ID can't change until it gets reclaimed, and it won't
407 * be reclaimed as long as we have a ref from inode and we 407 * be reclaimed as long as we have a ref from inode and we
408 * hold the ilock. 408 * hold the ilock.
409 */ 409 */
410 dqp = udqhint->q_gdquot; 410 dqp = udqhint->q_gdquot;
411 if (dqp && be32_to_cpu(dqp->q_core.d_id) == id) { 411 if (dqp && be32_to_cpu(dqp->q_core.d_id) == id) {
412 ASSERT(*IO_idqpp == NULL); 412 ASSERT(*IO_idqpp == NULL);
413 413
414 *IO_idqpp = xfs_qm_dqhold(dqp); 414 *IO_idqpp = xfs_qm_dqhold(dqp);
415 xfs_dqunlock(udqhint); 415 xfs_dqunlock(udqhint);
416 return 0; 416 return 0;
417 } 417 }
418 418
419 /* 419 /*
420 * We can't hold a dquot lock when we call the dqget code. 420 * We can't hold a dquot lock when we call the dqget code.
421 * We'll deadlock in no time, because of (not conforming to) 421 * We'll deadlock in no time, because of (not conforming to)
422 * lock ordering - the inodelock comes before any dquot lock, 422 * lock ordering - the inodelock comes before any dquot lock,
423 * and we may drop and reacquire the ilock in xfs_qm_dqget(). 423 * and we may drop and reacquire the ilock in xfs_qm_dqget().
424 */ 424 */
425 xfs_dqunlock(udqhint); 425 xfs_dqunlock(udqhint);
426 } 426 }
427 427
428 /* 428 /*
429 * Find the dquot from somewhere. This bumps the 429 * Find the dquot from somewhere. This bumps the
430 * reference count of dquot and returns it locked. 430 * reference count of dquot and returns it locked.
431 * This can return ENOENT if dquot didn't exist on 431 * This can return ENOENT if dquot didn't exist on
432 * disk and we didn't ask it to allocate; 432 * disk and we didn't ask it to allocate;
433 * ESRCH if quotas got turned off suddenly. 433 * ESRCH if quotas got turned off suddenly.
434 */ 434 */
435 error = xfs_qm_dqget(ip->i_mount, ip, id, type, 435 error = xfs_qm_dqget(ip->i_mount, ip, id, type,
436 doalloc | XFS_QMOPT_DOWARN, &dqp); 436 doalloc | XFS_QMOPT_DOWARN, &dqp);
437 if (error) 437 if (error)
438 return error; 438 return error;
439 439
440 trace_xfs_dqattach_get(dqp); 440 trace_xfs_dqattach_get(dqp);
441 441
442 /* 442 /*
443 * dqget may have dropped and re-acquired the ilock, but it guarantees 443 * dqget may have dropped and re-acquired the ilock, but it guarantees
444 * that the dquot returned is the one that should go in the inode. 444 * that the dquot returned is the one that should go in the inode.
445 */ 445 */
446 *IO_idqpp = dqp; 446 *IO_idqpp = dqp;
447 xfs_dqunlock(dqp); 447 xfs_dqunlock(dqp);
448 return 0; 448 return 0;
449 } 449 }
450 450
451 451
452 /* 452 /*
453 * Given a udquot and gdquot, attach a ptr to the group dquot in the 453 * Given a udquot and gdquot, attach a ptr to the group dquot in the
454 * udquot as a hint for future lookups. 454 * udquot as a hint for future lookups.
455 */ 455 */
456 STATIC void 456 STATIC void
457 xfs_qm_dqattach_grouphint( 457 xfs_qm_dqattach_grouphint(
458 xfs_dquot_t *udq, 458 xfs_dquot_t *udq,
459 xfs_dquot_t *gdq) 459 xfs_dquot_t *gdq)
460 { 460 {
461 xfs_dquot_t *tmp; 461 xfs_dquot_t *tmp;
462 462
463 xfs_dqlock(udq); 463 xfs_dqlock(udq);
464 464
465 tmp = udq->q_gdquot; 465 tmp = udq->q_gdquot;
466 if (tmp) { 466 if (tmp) {
467 if (tmp == gdq) 467 if (tmp == gdq)
468 goto done; 468 goto done;
469 469
470 udq->q_gdquot = NULL; 470 udq->q_gdquot = NULL;
471 xfs_qm_dqrele(tmp); 471 xfs_qm_dqrele(tmp);
472 } 472 }
473 473
474 udq->q_gdquot = xfs_qm_dqhold(gdq); 474 udq->q_gdquot = xfs_qm_dqhold(gdq);
475 done: 475 done:
476 xfs_dqunlock(udq); 476 xfs_dqunlock(udq);
477 } 477 }
478 478
479 static bool 479 static bool
480 xfs_qm_need_dqattach( 480 xfs_qm_need_dqattach(
481 struct xfs_inode *ip) 481 struct xfs_inode *ip)
482 { 482 {
483 struct xfs_mount *mp = ip->i_mount; 483 struct xfs_mount *mp = ip->i_mount;
484 484
485 if (!XFS_IS_QUOTA_RUNNING(mp)) 485 if (!XFS_IS_QUOTA_RUNNING(mp))
486 return false; 486 return false;
487 if (!XFS_IS_QUOTA_ON(mp)) 487 if (!XFS_IS_QUOTA_ON(mp))
488 return false; 488 return false;
489 if (!XFS_NOT_DQATTACHED(mp, ip)) 489 if (!XFS_NOT_DQATTACHED(mp, ip))
490 return false; 490 return false;
491 if (ip->i_ino == mp->m_sb.sb_uquotino || 491 if (ip->i_ino == mp->m_sb.sb_uquotino ||
492 ip->i_ino == mp->m_sb.sb_gquotino) 492 ip->i_ino == mp->m_sb.sb_gquotino)
493 return false; 493 return false;
494 return true; 494 return true;
495 } 495 }
496 496
497 /* 497 /*
498 * Given a locked inode, attach dquot(s) to it, taking U/G/P-QUOTAON 498 * Given a locked inode, attach dquot(s) to it, taking U/G/P-QUOTAON
499 * into account. 499 * into account.
500 * If XFS_QMOPT_DQALLOC, the dquot(s) will be allocated if needed. 500 * If XFS_QMOPT_DQALLOC, the dquot(s) will be allocated if needed.
501 * Inode may get unlocked and relocked in here, and the caller must deal with 501 * Inode may get unlocked and relocked in here, and the caller must deal with
502 * the consequences. 502 * the consequences.
503 */ 503 */
504 int 504 int
505 xfs_qm_dqattach_locked( 505 xfs_qm_dqattach_locked(
506 xfs_inode_t *ip, 506 xfs_inode_t *ip,
507 uint flags) 507 uint flags)
508 { 508 {
509 xfs_mount_t *mp = ip->i_mount; 509 xfs_mount_t *mp = ip->i_mount;
510 uint nquotas = 0; 510 uint nquotas = 0;
511 int error = 0; 511 int error = 0;
512 512
513 if (!xfs_qm_need_dqattach(ip)) 513 if (!xfs_qm_need_dqattach(ip))
514 return 0; 514 return 0;
515 515
516 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); 516 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
517 517
518 if (XFS_IS_UQUOTA_ON(mp)) { 518 if (XFS_IS_UQUOTA_ON(mp)) {
519 error = xfs_qm_dqattach_one(ip, ip->i_d.di_uid, XFS_DQ_USER, 519 error = xfs_qm_dqattach_one(ip, ip->i_d.di_uid, XFS_DQ_USER,
520 flags & XFS_QMOPT_DQALLOC, 520 flags & XFS_QMOPT_DQALLOC,
521 NULL, &ip->i_udquot); 521 NULL, &ip->i_udquot);
522 if (error) 522 if (error)
523 goto done; 523 goto done;
524 nquotas++; 524 nquotas++;
525 } 525 }
526 526
527 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); 527 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
528 if (XFS_IS_OQUOTA_ON(mp)) { 528 if (XFS_IS_OQUOTA_ON(mp)) {
529 error = XFS_IS_GQUOTA_ON(mp) ? 529 error = XFS_IS_GQUOTA_ON(mp) ?
530 xfs_qm_dqattach_one(ip, ip->i_d.di_gid, XFS_DQ_GROUP, 530 xfs_qm_dqattach_one(ip, ip->i_d.di_gid, XFS_DQ_GROUP,
531 flags & XFS_QMOPT_DQALLOC, 531 flags & XFS_QMOPT_DQALLOC,
532 ip->i_udquot, &ip->i_gdquot) : 532 ip->i_udquot, &ip->i_gdquot) :
533 xfs_qm_dqattach_one(ip, xfs_get_projid(ip), XFS_DQ_PROJ, 533 xfs_qm_dqattach_one(ip, xfs_get_projid(ip), XFS_DQ_PROJ,
534 flags & XFS_QMOPT_DQALLOC, 534 flags & XFS_QMOPT_DQALLOC,
535 ip->i_udquot, &ip->i_gdquot); 535 ip->i_udquot, &ip->i_gdquot);
536 /* 536 /*
537 * Don't worry about the udquot that we may have 537 * Don't worry about the udquot that we may have
538 * attached above. It'll get detached, if not already. 538 * attached above. It'll get detached, if not already.
539 */ 539 */
540 if (error) 540 if (error)
541 goto done; 541 goto done;
542 nquotas++; 542 nquotas++;
543 } 543 }
544 544
545 /* 545 /*
546 * Attach this group quota to the user quota as a hint. 546 * Attach this group quota to the user quota as a hint.
547 * This WON'T, in general, result in a thrash. 547 * This WON'T, in general, result in a thrash.
548 */ 548 */
549 if (nquotas == 2) { 549 if (nquotas == 2) {
550 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); 550 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
551 ASSERT(ip->i_udquot); 551 ASSERT(ip->i_udquot);
552 ASSERT(ip->i_gdquot); 552 ASSERT(ip->i_gdquot);
553 553
554 /* 554 /*
555 * We do not have i_udquot locked at this point, but this check 555 * We do not have i_udquot locked at this point, but this check
556 * is OK since we don't depend on the i_gdquot to be accurate 556 * is OK since we don't depend on the i_gdquot to be accurate
557 * 100% all the time. It is just a hint, and this will 557 * 100% all the time. It is just a hint, and this will
558 * succeed in general. 558 * succeed in general.
559 */ 559 */
560 if (ip->i_udquot->q_gdquot != ip->i_gdquot) 560 if (ip->i_udquot->q_gdquot != ip->i_gdquot)
561 xfs_qm_dqattach_grouphint(ip->i_udquot, ip->i_gdquot); 561 xfs_qm_dqattach_grouphint(ip->i_udquot, ip->i_gdquot);
562 } 562 }
563 563
564 done: 564 done:
565 #ifdef DEBUG 565 #ifdef DEBUG
566 if (!error) { 566 if (!error) {
567 if (XFS_IS_UQUOTA_ON(mp)) 567 if (XFS_IS_UQUOTA_ON(mp))
568 ASSERT(ip->i_udquot); 568 ASSERT(ip->i_udquot);
569 if (XFS_IS_OQUOTA_ON(mp)) 569 if (XFS_IS_OQUOTA_ON(mp))
570 ASSERT(ip->i_gdquot); 570 ASSERT(ip->i_gdquot);
571 } 571 }
572 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); 572 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
573 #endif 573 #endif
574 return error; 574 return error;
575 } 575 }
576 576
577 int 577 int
578 xfs_qm_dqattach( 578 xfs_qm_dqattach(
579 struct xfs_inode *ip, 579 struct xfs_inode *ip,
580 uint flags) 580 uint flags)
581 { 581 {
582 int error; 582 int error;
583 583
584 if (!xfs_qm_need_dqattach(ip)) 584 if (!xfs_qm_need_dqattach(ip))
585 return 0; 585 return 0;
586 586
587 xfs_ilock(ip, XFS_ILOCK_EXCL); 587 xfs_ilock(ip, XFS_ILOCK_EXCL);
588 error = xfs_qm_dqattach_locked(ip, flags); 588 error = xfs_qm_dqattach_locked(ip, flags);
589 xfs_iunlock(ip, XFS_ILOCK_EXCL); 589 xfs_iunlock(ip, XFS_ILOCK_EXCL);
590 590
591 return error; 591 return error;
592 } 592 }
593 593
594 /* 594 /*
595 * Release dquots (and their references) if any. 595 * Release dquots (and their references) if any.
596 * The inode should be locked EXCL except when this's called by 596 * The inode should be locked EXCL except when this's called by
597 * xfs_ireclaim. 597 * xfs_ireclaim.
598 */ 598 */
599 void 599 void
600 xfs_qm_dqdetach( 600 xfs_qm_dqdetach(
601 xfs_inode_t *ip) 601 xfs_inode_t *ip)
602 { 602 {
603 if (!(ip->i_udquot || ip->i_gdquot)) 603 if (!(ip->i_udquot || ip->i_gdquot))
604 return; 604 return;
605 605
606 trace_xfs_dquot_dqdetach(ip); 606 trace_xfs_dquot_dqdetach(ip);
607 607
608 ASSERT(ip->i_ino != ip->i_mount->m_sb.sb_uquotino); 608 ASSERT(ip->i_ino != ip->i_mount->m_sb.sb_uquotino);
609 ASSERT(ip->i_ino != ip->i_mount->m_sb.sb_gquotino); 609 ASSERT(ip->i_ino != ip->i_mount->m_sb.sb_gquotino);
610 if (ip->i_udquot) { 610 if (ip->i_udquot) {
611 xfs_qm_dqrele(ip->i_udquot); 611 xfs_qm_dqrele(ip->i_udquot);
612 ip->i_udquot = NULL; 612 ip->i_udquot = NULL;
613 } 613 }
614 if (ip->i_gdquot) { 614 if (ip->i_gdquot) {
615 xfs_qm_dqrele(ip->i_gdquot); 615 xfs_qm_dqrele(ip->i_gdquot);
616 ip->i_gdquot = NULL; 616 ip->i_gdquot = NULL;
617 } 617 }
618 } 618 }
619 619
620 int
621 xfs_qm_calc_dquots_per_chunk(
622 struct xfs_mount *mp,
623 unsigned int nbblks) /* basic block units */
624 {
625 unsigned int ndquots;
626
627 ASSERT(nbblks > 0);
628 ndquots = BBTOB(nbblks);
629 do_div(ndquots, sizeof(xfs_dqblk_t));
630
631 return ndquots;
632 }
633
620 /* 634 /*
621 * This initializes all the quota information that's kept in the 635 * This initializes all the quota information that's kept in the
622 * mount structure 636 * mount structure
623 */ 637 */
624 STATIC int 638 STATIC int
625 xfs_qm_init_quotainfo( 639 xfs_qm_init_quotainfo(
626 xfs_mount_t *mp) 640 xfs_mount_t *mp)
627 { 641 {
628 xfs_quotainfo_t *qinf; 642 xfs_quotainfo_t *qinf;
629 int error; 643 int error;
630 xfs_dquot_t *dqp; 644 xfs_dquot_t *dqp;
631 645
632 ASSERT(XFS_IS_QUOTA_RUNNING(mp)); 646 ASSERT(XFS_IS_QUOTA_RUNNING(mp));
633 647
634 qinf = mp->m_quotainfo = kmem_zalloc(sizeof(xfs_quotainfo_t), KM_SLEEP); 648 qinf = mp->m_quotainfo = kmem_zalloc(sizeof(xfs_quotainfo_t), KM_SLEEP);
635 649
636 /* 650 /*
637 * See if quotainodes are setup, and if not, allocate them, 651 * See if quotainodes are setup, and if not, allocate them,
638 * and change the superblock accordingly. 652 * and change the superblock accordingly.
639 */ 653 */
640 if ((error = xfs_qm_init_quotainos(mp))) { 654 if ((error = xfs_qm_init_quotainos(mp))) {
641 kmem_free(qinf); 655 kmem_free(qinf);
642 mp->m_quotainfo = NULL; 656 mp->m_quotainfo = NULL;
643 return error; 657 return error;
644 } 658 }
645 659
646 INIT_RADIX_TREE(&qinf->qi_uquota_tree, GFP_NOFS); 660 INIT_RADIX_TREE(&qinf->qi_uquota_tree, GFP_NOFS);
647 INIT_RADIX_TREE(&qinf->qi_gquota_tree, GFP_NOFS); 661 INIT_RADIX_TREE(&qinf->qi_gquota_tree, GFP_NOFS);
648 mutex_init(&qinf->qi_tree_lock); 662 mutex_init(&qinf->qi_tree_lock);
649 663
650 INIT_LIST_HEAD(&qinf->qi_lru_list); 664 INIT_LIST_HEAD(&qinf->qi_lru_list);
651 qinf->qi_lru_count = 0; 665 qinf->qi_lru_count = 0;
652 mutex_init(&qinf->qi_lru_lock); 666 mutex_init(&qinf->qi_lru_lock);
653 667
654 /* mutex used to serialize quotaoffs */ 668 /* mutex used to serialize quotaoffs */
655 mutex_init(&qinf->qi_quotaofflock); 669 mutex_init(&qinf->qi_quotaofflock);
656 670
657 /* Precalc some constants */ 671 /* Precalc some constants */
658 qinf->qi_dqchunklen = XFS_FSB_TO_BB(mp, XFS_DQUOT_CLUSTER_SIZE_FSB); 672 qinf->qi_dqchunklen = XFS_FSB_TO_BB(mp, XFS_DQUOT_CLUSTER_SIZE_FSB);
659 ASSERT(qinf->qi_dqchunklen); 673 qinf->qi_dqperchunk = xfs_qm_calc_dquots_per_chunk(mp,
660 qinf->qi_dqperchunk = BBTOB(qinf->qi_dqchunklen); 674 qinf->qi_dqchunklen);
661 do_div(qinf->qi_dqperchunk, sizeof(xfs_dqblk_t));
662 675
663 mp->m_qflags |= (mp->m_sb.sb_qflags & XFS_ALL_QUOTA_CHKD); 676 mp->m_qflags |= (mp->m_sb.sb_qflags & XFS_ALL_QUOTA_CHKD);
664 677
665 /* 678 /*
666 * We try to get the limits from the superuser's limits fields. 679 * We try to get the limits from the superuser's limits fields.
667 * This is quite hacky, but it is standard quota practice. 680 * This is quite hacky, but it is standard quota practice.
668 * 681 *
669 * We look at the USR dquot with id == 0 first, but if user quotas 682 * We look at the USR dquot with id == 0 first, but if user quotas
670 * are not enabled we goto the GRP dquot with id == 0. 683 * are not enabled we goto the GRP dquot with id == 0.
671 * We don't really care to keep separate default limits for user 684 * We don't really care to keep separate default limits for user
672 * and group quotas, at least not at this point. 685 * and group quotas, at least not at this point.
673 * 686 *
674 * Since we may not have done a quotacheck by this point, just read 687 * Since we may not have done a quotacheck by this point, just read
675 * the dquot without attaching it to any hashtables or lists. 688 * the dquot without attaching it to any hashtables or lists.
676 */ 689 */
677 error = xfs_qm_dqread(mp, 0, 690 error = xfs_qm_dqread(mp, 0,
678 XFS_IS_UQUOTA_RUNNING(mp) ? XFS_DQ_USER : 691 XFS_IS_UQUOTA_RUNNING(mp) ? XFS_DQ_USER :
679 (XFS_IS_GQUOTA_RUNNING(mp) ? XFS_DQ_GROUP : 692 (XFS_IS_GQUOTA_RUNNING(mp) ? XFS_DQ_GROUP :
680 XFS_DQ_PROJ), 693 XFS_DQ_PROJ),
681 XFS_QMOPT_DOWARN, &dqp); 694 XFS_QMOPT_DOWARN, &dqp);
682 if (!error) { 695 if (!error) {
683 xfs_disk_dquot_t *ddqp = &dqp->q_core; 696 xfs_disk_dquot_t *ddqp = &dqp->q_core;
684 697
685 /* 698 /*
686 * The warnings and timers set the grace period given to 699 * The warnings and timers set the grace period given to
687 * a user or group before he or she can not perform any 700 * a user or group before he or she can not perform any
688 * more writing. If it is zero, a default is used. 701 * more writing. If it is zero, a default is used.
689 */ 702 */
690 qinf->qi_btimelimit = ddqp->d_btimer ? 703 qinf->qi_btimelimit = ddqp->d_btimer ?
691 be32_to_cpu(ddqp->d_btimer) : XFS_QM_BTIMELIMIT; 704 be32_to_cpu(ddqp->d_btimer) : XFS_QM_BTIMELIMIT;
692 qinf->qi_itimelimit = ddqp->d_itimer ? 705 qinf->qi_itimelimit = ddqp->d_itimer ?
693 be32_to_cpu(ddqp->d_itimer) : XFS_QM_ITIMELIMIT; 706 be32_to_cpu(ddqp->d_itimer) : XFS_QM_ITIMELIMIT;
694 qinf->qi_rtbtimelimit = ddqp->d_rtbtimer ? 707 qinf->qi_rtbtimelimit = ddqp->d_rtbtimer ?
695 be32_to_cpu(ddqp->d_rtbtimer) : XFS_QM_RTBTIMELIMIT; 708 be32_to_cpu(ddqp->d_rtbtimer) : XFS_QM_RTBTIMELIMIT;
696 qinf->qi_bwarnlimit = ddqp->d_bwarns ? 709 qinf->qi_bwarnlimit = ddqp->d_bwarns ?
697 be16_to_cpu(ddqp->d_bwarns) : XFS_QM_BWARNLIMIT; 710 be16_to_cpu(ddqp->d_bwarns) : XFS_QM_BWARNLIMIT;
698 qinf->qi_iwarnlimit = ddqp->d_iwarns ? 711 qinf->qi_iwarnlimit = ddqp->d_iwarns ?
699 be16_to_cpu(ddqp->d_iwarns) : XFS_QM_IWARNLIMIT; 712 be16_to_cpu(ddqp->d_iwarns) : XFS_QM_IWARNLIMIT;
700 qinf->qi_rtbwarnlimit = ddqp->d_rtbwarns ? 713 qinf->qi_rtbwarnlimit = ddqp->d_rtbwarns ?
701 be16_to_cpu(ddqp->d_rtbwarns) : XFS_QM_RTBWARNLIMIT; 714 be16_to_cpu(ddqp->d_rtbwarns) : XFS_QM_RTBWARNLIMIT;
702 qinf->qi_bhardlimit = be64_to_cpu(ddqp->d_blk_hardlimit); 715 qinf->qi_bhardlimit = be64_to_cpu(ddqp->d_blk_hardlimit);
703 qinf->qi_bsoftlimit = be64_to_cpu(ddqp->d_blk_softlimit); 716 qinf->qi_bsoftlimit = be64_to_cpu(ddqp->d_blk_softlimit);
704 qinf->qi_ihardlimit = be64_to_cpu(ddqp->d_ino_hardlimit); 717 qinf->qi_ihardlimit = be64_to_cpu(ddqp->d_ino_hardlimit);
705 qinf->qi_isoftlimit = be64_to_cpu(ddqp->d_ino_softlimit); 718 qinf->qi_isoftlimit = be64_to_cpu(ddqp->d_ino_softlimit);
706 qinf->qi_rtbhardlimit = be64_to_cpu(ddqp->d_rtb_hardlimit); 719 qinf->qi_rtbhardlimit = be64_to_cpu(ddqp->d_rtb_hardlimit);
707 qinf->qi_rtbsoftlimit = be64_to_cpu(ddqp->d_rtb_softlimit); 720 qinf->qi_rtbsoftlimit = be64_to_cpu(ddqp->d_rtb_softlimit);
708 721
709 xfs_qm_dqdestroy(dqp); 722 xfs_qm_dqdestroy(dqp);
710 } else { 723 } else {
711 qinf->qi_btimelimit = XFS_QM_BTIMELIMIT; 724 qinf->qi_btimelimit = XFS_QM_BTIMELIMIT;
712 qinf->qi_itimelimit = XFS_QM_ITIMELIMIT; 725 qinf->qi_itimelimit = XFS_QM_ITIMELIMIT;
713 qinf->qi_rtbtimelimit = XFS_QM_RTBTIMELIMIT; 726 qinf->qi_rtbtimelimit = XFS_QM_RTBTIMELIMIT;
714 qinf->qi_bwarnlimit = XFS_QM_BWARNLIMIT; 727 qinf->qi_bwarnlimit = XFS_QM_BWARNLIMIT;
715 qinf->qi_iwarnlimit = XFS_QM_IWARNLIMIT; 728 qinf->qi_iwarnlimit = XFS_QM_IWARNLIMIT;
716 qinf->qi_rtbwarnlimit = XFS_QM_RTBWARNLIMIT; 729 qinf->qi_rtbwarnlimit = XFS_QM_RTBWARNLIMIT;
717 } 730 }
718 731
719 qinf->qi_shrinker.shrink = xfs_qm_shake; 732 qinf->qi_shrinker.shrink = xfs_qm_shake;
720 qinf->qi_shrinker.seeks = DEFAULT_SEEKS; 733 qinf->qi_shrinker.seeks = DEFAULT_SEEKS;
721 register_shrinker(&qinf->qi_shrinker); 734 register_shrinker(&qinf->qi_shrinker);
722 return 0; 735 return 0;
723 } 736 }
724 737
725 738
726 /* 739 /*
727 * Gets called when unmounting a filesystem or when all quotas get 740 * Gets called when unmounting a filesystem or when all quotas get
728 * turned off. 741 * turned off.
729 * This purges the quota inodes, destroys locks and frees itself. 742 * This purges the quota inodes, destroys locks and frees itself.
730 */ 743 */
731 void 744 void
732 xfs_qm_destroy_quotainfo( 745 xfs_qm_destroy_quotainfo(
733 xfs_mount_t *mp) 746 xfs_mount_t *mp)
734 { 747 {
735 xfs_quotainfo_t *qi; 748 xfs_quotainfo_t *qi;
736 749
737 qi = mp->m_quotainfo; 750 qi = mp->m_quotainfo;
738 ASSERT(qi != NULL); 751 ASSERT(qi != NULL);
739 752
740 unregister_shrinker(&qi->qi_shrinker); 753 unregister_shrinker(&qi->qi_shrinker);
741 754
742 if (qi->qi_uquotaip) { 755 if (qi->qi_uquotaip) {
743 IRELE(qi->qi_uquotaip); 756 IRELE(qi->qi_uquotaip);
744 qi->qi_uquotaip = NULL; /* paranoia */ 757 qi->qi_uquotaip = NULL; /* paranoia */
745 } 758 }
746 if (qi->qi_gquotaip) { 759 if (qi->qi_gquotaip) {
747 IRELE(qi->qi_gquotaip); 760 IRELE(qi->qi_gquotaip);
748 qi->qi_gquotaip = NULL; 761 qi->qi_gquotaip = NULL;
749 } 762 }
750 mutex_destroy(&qi->qi_quotaofflock); 763 mutex_destroy(&qi->qi_quotaofflock);
751 kmem_free(qi); 764 kmem_free(qi);
752 mp->m_quotainfo = NULL; 765 mp->m_quotainfo = NULL;
753 } 766 }
754 767
755 /* 768 /*
756 * Create an inode and return with a reference already taken, but unlocked 769 * Create an inode and return with a reference already taken, but unlocked
757 * This is how we create quota inodes 770 * This is how we create quota inodes
758 */ 771 */
759 STATIC int 772 STATIC int
760 xfs_qm_qino_alloc( 773 xfs_qm_qino_alloc(
761 xfs_mount_t *mp, 774 xfs_mount_t *mp,
762 xfs_inode_t **ip, 775 xfs_inode_t **ip,
763 __int64_t sbfields, 776 __int64_t sbfields,
764 uint flags) 777 uint flags)
765 { 778 {
766 xfs_trans_t *tp; 779 xfs_trans_t *tp;
767 int error; 780 int error;
768 int committed; 781 int committed;
769 782
770 tp = xfs_trans_alloc(mp, XFS_TRANS_QM_QINOCREATE); 783 tp = xfs_trans_alloc(mp, XFS_TRANS_QM_QINOCREATE);
771 if ((error = xfs_trans_reserve(tp, 784 if ((error = xfs_trans_reserve(tp,
772 XFS_QM_QINOCREATE_SPACE_RES(mp), 785 XFS_QM_QINOCREATE_SPACE_RES(mp),
773 XFS_CREATE_LOG_RES(mp), 0, 786 XFS_CREATE_LOG_RES(mp), 0,
774 XFS_TRANS_PERM_LOG_RES, 787 XFS_TRANS_PERM_LOG_RES,
775 XFS_CREATE_LOG_COUNT))) { 788 XFS_CREATE_LOG_COUNT))) {
776 xfs_trans_cancel(tp, 0); 789 xfs_trans_cancel(tp, 0);
777 return error; 790 return error;
778 } 791 }
779 792
780 error = xfs_dir_ialloc(&tp, NULL, S_IFREG, 1, 0, 0, 1, ip, &committed); 793 error = xfs_dir_ialloc(&tp, NULL, S_IFREG, 1, 0, 0, 1, ip, &committed);
781 if (error) { 794 if (error) {
782 xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | 795 xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES |
783 XFS_TRANS_ABORT); 796 XFS_TRANS_ABORT);
784 return error; 797 return error;
785 } 798 }
786 799
787 /* 800 /*
788 * Make the changes in the superblock, and log those too. 801 * Make the changes in the superblock, and log those too.
789 * sbfields arg may contain fields other than *QUOTINO; 802 * sbfields arg may contain fields other than *QUOTINO;
790 * VERSIONNUM for example. 803 * VERSIONNUM for example.
791 */ 804 */
792 spin_lock(&mp->m_sb_lock); 805 spin_lock(&mp->m_sb_lock);
793 if (flags & XFS_QMOPT_SBVERSION) { 806 if (flags & XFS_QMOPT_SBVERSION) {
794 ASSERT(!xfs_sb_version_hasquota(&mp->m_sb)); 807 ASSERT(!xfs_sb_version_hasquota(&mp->m_sb));
795 ASSERT((sbfields & (XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO | 808 ASSERT((sbfields & (XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO |
796 XFS_SB_GQUOTINO | XFS_SB_QFLAGS)) == 809 XFS_SB_GQUOTINO | XFS_SB_QFLAGS)) ==
797 (XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO | 810 (XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO |
798 XFS_SB_GQUOTINO | XFS_SB_QFLAGS)); 811 XFS_SB_GQUOTINO | XFS_SB_QFLAGS));
799 812
800 xfs_sb_version_addquota(&mp->m_sb); 813 xfs_sb_version_addquota(&mp->m_sb);
801 mp->m_sb.sb_uquotino = NULLFSINO; 814 mp->m_sb.sb_uquotino = NULLFSINO;
802 mp->m_sb.sb_gquotino = NULLFSINO; 815 mp->m_sb.sb_gquotino = NULLFSINO;
803 816
804 /* qflags will get updated _after_ quotacheck */ 817 /* qflags will get updated _after_ quotacheck */
805 mp->m_sb.sb_qflags = 0; 818 mp->m_sb.sb_qflags = 0;
806 } 819 }
807 if (flags & XFS_QMOPT_UQUOTA) 820 if (flags & XFS_QMOPT_UQUOTA)
808 mp->m_sb.sb_uquotino = (*ip)->i_ino; 821 mp->m_sb.sb_uquotino = (*ip)->i_ino;
809 else 822 else
810 mp->m_sb.sb_gquotino = (*ip)->i_ino; 823 mp->m_sb.sb_gquotino = (*ip)->i_ino;
811 spin_unlock(&mp->m_sb_lock); 824 spin_unlock(&mp->m_sb_lock);
812 xfs_mod_sb(tp, sbfields); 825 xfs_mod_sb(tp, sbfields);
813 826
814 if ((error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES))) { 827 if ((error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES))) {
815 xfs_alert(mp, "%s failed (error %d)!", __func__, error); 828 xfs_alert(mp, "%s failed (error %d)!", __func__, error);
816 return error; 829 return error;
817 } 830 }
818 return 0; 831 return 0;
819 } 832 }
820 833
821 834
822 STATIC void 835 STATIC void
823 xfs_qm_reset_dqcounts( 836 xfs_qm_reset_dqcounts(
824 xfs_mount_t *mp, 837 xfs_mount_t *mp,
825 xfs_buf_t *bp, 838 xfs_buf_t *bp,
826 xfs_dqid_t id, 839 xfs_dqid_t id,
827 uint type) 840 uint type)
828 { 841 {
829 xfs_disk_dquot_t *ddq; 842 xfs_disk_dquot_t *ddq;
830 int j; 843 int j;
831 844
832 trace_xfs_reset_dqcounts(bp, _RET_IP_); 845 trace_xfs_reset_dqcounts(bp, _RET_IP_);
833 846
834 /* 847 /*
835 * Reset all counters and timers. They'll be 848 * Reset all counters and timers. They'll be
836 * started afresh by xfs_qm_quotacheck. 849 * started afresh by xfs_qm_quotacheck.
837 */ 850 */
838 #ifdef DEBUG 851 #ifdef DEBUG
839 j = XFS_FSB_TO_B(mp, XFS_DQUOT_CLUSTER_SIZE_FSB); 852 j = XFS_FSB_TO_B(mp, XFS_DQUOT_CLUSTER_SIZE_FSB);
840 do_div(j, sizeof(xfs_dqblk_t)); 853 do_div(j, sizeof(xfs_dqblk_t));
841 ASSERT(mp->m_quotainfo->qi_dqperchunk == j); 854 ASSERT(mp->m_quotainfo->qi_dqperchunk == j);
842 #endif 855 #endif
843 ddq = bp->b_addr; 856 ddq = bp->b_addr;
844 for (j = 0; j < mp->m_quotainfo->qi_dqperchunk; j++) { 857 for (j = 0; j < mp->m_quotainfo->qi_dqperchunk; j++) {
845 /* 858 /*
846 * Do a sanity check, and if needed, repair the dqblk. Don't 859 * Do a sanity check, and if needed, repair the dqblk. Don't
847 * output any warnings because it's perfectly possible to 860 * output any warnings because it's perfectly possible to
848 * find uninitialised dquot blks. See comment in xfs_qm_dqcheck. 861 * find uninitialised dquot blks. See comment in xfs_qm_dqcheck.
849 */ 862 */
850 (void) xfs_qm_dqcheck(mp, ddq, id+j, type, XFS_QMOPT_DQREPAIR, 863 (void) xfs_qm_dqcheck(mp, ddq, id+j, type, XFS_QMOPT_DQREPAIR,
851 "xfs_quotacheck"); 864 "xfs_quotacheck");
852 ddq->d_bcount = 0; 865 ddq->d_bcount = 0;
853 ddq->d_icount = 0; 866 ddq->d_icount = 0;
854 ddq->d_rtbcount = 0; 867 ddq->d_rtbcount = 0;
855 ddq->d_btimer = 0; 868 ddq->d_btimer = 0;
856 ddq->d_itimer = 0; 869 ddq->d_itimer = 0;
857 ddq->d_rtbtimer = 0; 870 ddq->d_rtbtimer = 0;
858 ddq->d_bwarns = 0; 871 ddq->d_bwarns = 0;
859 ddq->d_iwarns = 0; 872 ddq->d_iwarns = 0;
860 ddq->d_rtbwarns = 0; 873 ddq->d_rtbwarns = 0;
861 ddq = (xfs_disk_dquot_t *) ((xfs_dqblk_t *)ddq + 1); 874 ddq = (xfs_disk_dquot_t *) ((xfs_dqblk_t *)ddq + 1);
862 } 875 }
863 } 876 }
864 877
865 STATIC int 878 STATIC int
866 xfs_qm_dqiter_bufs( 879 xfs_qm_dqiter_bufs(
867 struct xfs_mount *mp, 880 struct xfs_mount *mp,
868 xfs_dqid_t firstid, 881 xfs_dqid_t firstid,
869 xfs_fsblock_t bno, 882 xfs_fsblock_t bno,
870 xfs_filblks_t blkcnt, 883 xfs_filblks_t blkcnt,
871 uint flags, 884 uint flags,
872 struct list_head *buffer_list) 885 struct list_head *buffer_list)
873 { 886 {
874 struct xfs_buf *bp; 887 struct xfs_buf *bp;
875 int error; 888 int error;
876 int type; 889 int type;
877 890
878 ASSERT(blkcnt > 0); 891 ASSERT(blkcnt > 0);
879 type = flags & XFS_QMOPT_UQUOTA ? XFS_DQ_USER : 892 type = flags & XFS_QMOPT_UQUOTA ? XFS_DQ_USER :
880 (flags & XFS_QMOPT_PQUOTA ? XFS_DQ_PROJ : XFS_DQ_GROUP); 893 (flags & XFS_QMOPT_PQUOTA ? XFS_DQ_PROJ : XFS_DQ_GROUP);
881 error = 0; 894 error = 0;
882 895
883 /* 896 /*
884 * Blkcnt arg can be a very big number, and might even be 897 * Blkcnt arg can be a very big number, and might even be
885 * larger than the log itself. So, we have to break it up into 898 * larger than the log itself. So, we have to break it up into
886 * manageable-sized transactions. 899 * manageable-sized transactions.
887 * Note that we don't start a permanent transaction here; we might 900 * Note that we don't start a permanent transaction here; we might
888 * not be able to get a log reservation for the whole thing up front, 901 * not be able to get a log reservation for the whole thing up front,
889 * and we don't really care to either, because we just discard 902 * and we don't really care to either, because we just discard
890 * everything if we were to crash in the middle of this loop. 903 * everything if we were to crash in the middle of this loop.
891 */ 904 */
892 while (blkcnt--) { 905 while (blkcnt--) {
893 error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp, 906 error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp,
894 XFS_FSB_TO_DADDR(mp, bno), 907 XFS_FSB_TO_DADDR(mp, bno),
895 mp->m_quotainfo->qi_dqchunklen, 0, &bp, 908 mp->m_quotainfo->qi_dqchunklen, 0, &bp,
896 &xfs_dquot_buf_ops); 909 &xfs_dquot_buf_ops);
897 if (error) 910 if (error)
898 break; 911 break;
899 912
913 /*
914 * XXX(hch): need to figure out if it makes sense to validate
915 * the CRC here.
916 */
900 xfs_qm_reset_dqcounts(mp, bp, firstid, type); 917 xfs_qm_reset_dqcounts(mp, bp, firstid, type);
901 xfs_buf_delwri_queue(bp, buffer_list); 918 xfs_buf_delwri_queue(bp, buffer_list);
902 xfs_buf_relse(bp); 919 xfs_buf_relse(bp);
903 /* 920 /*
904 * goto the next block. 921 * goto the next block.
905 */ 922 */
906 bno++; 923 bno++;
907 firstid += mp->m_quotainfo->qi_dqperchunk; 924 firstid += mp->m_quotainfo->qi_dqperchunk;
908 } 925 }
909 926
910 return error; 927 return error;
911 } 928 }
912 929
913 /* 930 /*
914 * Iterate over all allocated USR/GRP/PRJ dquots in the system, calling a 931 * Iterate over all allocated USR/GRP/PRJ dquots in the system, calling a
915 * caller supplied function for every chunk of dquots that we find. 932 * caller supplied function for every chunk of dquots that we find.
916 */ 933 */
917 STATIC int 934 STATIC int
918 xfs_qm_dqiterate( 935 xfs_qm_dqiterate(
919 struct xfs_mount *mp, 936 struct xfs_mount *mp,
920 struct xfs_inode *qip, 937 struct xfs_inode *qip,
921 uint flags, 938 uint flags,
922 struct list_head *buffer_list) 939 struct list_head *buffer_list)
923 { 940 {
924 struct xfs_bmbt_irec *map; 941 struct xfs_bmbt_irec *map;
925 int i, nmaps; /* number of map entries */ 942 int i, nmaps; /* number of map entries */
926 int error; /* return value */ 943 int error; /* return value */
927 xfs_fileoff_t lblkno; 944 xfs_fileoff_t lblkno;
928 xfs_filblks_t maxlblkcnt; 945 xfs_filblks_t maxlblkcnt;
929 xfs_dqid_t firstid; 946 xfs_dqid_t firstid;
930 xfs_fsblock_t rablkno; 947 xfs_fsblock_t rablkno;
931 xfs_filblks_t rablkcnt; 948 xfs_filblks_t rablkcnt;
932 949
933 error = 0; 950 error = 0;
934 /* 951 /*
935 * This looks racy, but we can't keep an inode lock across a 952 * This looks racy, but we can't keep an inode lock across a
936 * trans_reserve. But, this gets called during quotacheck, and that 953 * trans_reserve. But, this gets called during quotacheck, and that
937 * happens only at mount time which is single threaded. 954 * happens only at mount time which is single threaded.
938 */ 955 */
939 if (qip->i_d.di_nblocks == 0) 956 if (qip->i_d.di_nblocks == 0)
940 return 0; 957 return 0;
941 958
942 map = kmem_alloc(XFS_DQITER_MAP_SIZE * sizeof(*map), KM_SLEEP); 959 map = kmem_alloc(XFS_DQITER_MAP_SIZE * sizeof(*map), KM_SLEEP);
943 960
944 lblkno = 0; 961 lblkno = 0;
945 maxlblkcnt = XFS_B_TO_FSB(mp, mp->m_super->s_maxbytes); 962 maxlblkcnt = XFS_B_TO_FSB(mp, mp->m_super->s_maxbytes);
946 do { 963 do {
947 nmaps = XFS_DQITER_MAP_SIZE; 964 nmaps = XFS_DQITER_MAP_SIZE;
948 /* 965 /*
949 * We aren't changing the inode itself. Just changing 966 * We aren't changing the inode itself. Just changing
950 * some of its data. No new blocks are added here, and 967 * some of its data. No new blocks are added here, and
951 * the inode is never added to the transaction. 968 * the inode is never added to the transaction.
952 */ 969 */
953 xfs_ilock(qip, XFS_ILOCK_SHARED); 970 xfs_ilock(qip, XFS_ILOCK_SHARED);
954 error = xfs_bmapi_read(qip, lblkno, maxlblkcnt - lblkno, 971 error = xfs_bmapi_read(qip, lblkno, maxlblkcnt - lblkno,
955 map, &nmaps, 0); 972 map, &nmaps, 0);
956 xfs_iunlock(qip, XFS_ILOCK_SHARED); 973 xfs_iunlock(qip, XFS_ILOCK_SHARED);
957 if (error) 974 if (error)
958 break; 975 break;
959 976
960 ASSERT(nmaps <= XFS_DQITER_MAP_SIZE); 977 ASSERT(nmaps <= XFS_DQITER_MAP_SIZE);
961 for (i = 0; i < nmaps; i++) { 978 for (i = 0; i < nmaps; i++) {
962 ASSERT(map[i].br_startblock != DELAYSTARTBLOCK); 979 ASSERT(map[i].br_startblock != DELAYSTARTBLOCK);
963 ASSERT(map[i].br_blockcount); 980 ASSERT(map[i].br_blockcount);
964 981
965 982
966 lblkno += map[i].br_blockcount; 983 lblkno += map[i].br_blockcount;
967 984
968 if (map[i].br_startblock == HOLESTARTBLOCK) 985 if (map[i].br_startblock == HOLESTARTBLOCK)
969 continue; 986 continue;
970 987
971 firstid = (xfs_dqid_t) map[i].br_startoff * 988 firstid = (xfs_dqid_t) map[i].br_startoff *
972 mp->m_quotainfo->qi_dqperchunk; 989 mp->m_quotainfo->qi_dqperchunk;
973 /* 990 /*
974 * Do a read-ahead on the next extent. 991 * Do a read-ahead on the next extent.
975 */ 992 */
976 if ((i+1 < nmaps) && 993 if ((i+1 < nmaps) &&
977 (map[i+1].br_startblock != HOLESTARTBLOCK)) { 994 (map[i+1].br_startblock != HOLESTARTBLOCK)) {
978 rablkcnt = map[i+1].br_blockcount; 995 rablkcnt = map[i+1].br_blockcount;
979 rablkno = map[i+1].br_startblock; 996 rablkno = map[i+1].br_startblock;
980 while (rablkcnt--) { 997 while (rablkcnt--) {
981 xfs_buf_readahead(mp->m_ddev_targp, 998 xfs_buf_readahead(mp->m_ddev_targp,
982 XFS_FSB_TO_DADDR(mp, rablkno), 999 XFS_FSB_TO_DADDR(mp, rablkno),
983 mp->m_quotainfo->qi_dqchunklen, 1000 mp->m_quotainfo->qi_dqchunklen,
984 NULL); 1001 NULL);
985 rablkno++; 1002 rablkno++;
986 } 1003 }
987 } 1004 }
988 /* 1005 /*
989 * Iterate thru all the blks in the extent and 1006 * Iterate thru all the blks in the extent and
990 * reset the counters of all the dquots inside them. 1007 * reset the counters of all the dquots inside them.
991 */ 1008 */
992 error = xfs_qm_dqiter_bufs(mp, firstid, 1009 error = xfs_qm_dqiter_bufs(mp, firstid,
993 map[i].br_startblock, 1010 map[i].br_startblock,
994 map[i].br_blockcount, 1011 map[i].br_blockcount,
995 flags, buffer_list); 1012 flags, buffer_list);
996 if (error) 1013 if (error)
997 goto out; 1014 goto out;
998 } 1015 }
999 } while (nmaps > 0); 1016 } while (nmaps > 0);
1000 1017
1001 out: 1018 out:
1002 kmem_free(map); 1019 kmem_free(map);
1003 return error; 1020 return error;
1004 } 1021 }
1005 1022
1006 /* 1023 /*
1007 * Called by dqusage_adjust in doing a quotacheck. 1024 * Called by dqusage_adjust in doing a quotacheck.
1008 * 1025 *
1009 * Given the inode, and a dquot id this updates both the incore dqout as well 1026 * Given the inode, and a dquot id this updates both the incore dqout as well
1010 * as the buffer copy. This is so that once the quotacheck is done, we can 1027 * as the buffer copy. This is so that once the quotacheck is done, we can
1011 * just log all the buffers, as opposed to logging numerous updates to 1028 * just log all the buffers, as opposed to logging numerous updates to
1012 * individual dquots. 1029 * individual dquots.
1013 */ 1030 */
1014 STATIC int 1031 STATIC int
1015 xfs_qm_quotacheck_dqadjust( 1032 xfs_qm_quotacheck_dqadjust(
1016 struct xfs_inode *ip, 1033 struct xfs_inode *ip,
1017 xfs_dqid_t id, 1034 xfs_dqid_t id,
1018 uint type, 1035 uint type,
1019 xfs_qcnt_t nblks, 1036 xfs_qcnt_t nblks,
1020 xfs_qcnt_t rtblks) 1037 xfs_qcnt_t rtblks)
1021 { 1038 {
1022 struct xfs_mount *mp = ip->i_mount; 1039 struct xfs_mount *mp = ip->i_mount;
1023 struct xfs_dquot *dqp; 1040 struct xfs_dquot *dqp;
1024 int error; 1041 int error;
1025 1042
1026 error = xfs_qm_dqget(mp, ip, id, type, 1043 error = xfs_qm_dqget(mp, ip, id, type,
1027 XFS_QMOPT_DQALLOC | XFS_QMOPT_DOWARN, &dqp); 1044 XFS_QMOPT_DQALLOC | XFS_QMOPT_DOWARN, &dqp);
1028 if (error) { 1045 if (error) {
1029 /* 1046 /*
1030 * Shouldn't be able to turn off quotas here. 1047 * Shouldn't be able to turn off quotas here.
1031 */ 1048 */
1032 ASSERT(error != ESRCH); 1049 ASSERT(error != ESRCH);
1033 ASSERT(error != ENOENT); 1050 ASSERT(error != ENOENT);
1034 return error; 1051 return error;
1035 } 1052 }
1036 1053
1037 trace_xfs_dqadjust(dqp); 1054 trace_xfs_dqadjust(dqp);
1038 1055
1039 /* 1056 /*
1040 * Adjust the inode count and the block count to reflect this inode's 1057 * Adjust the inode count and the block count to reflect this inode's
1041 * resource usage. 1058 * resource usage.
1042 */ 1059 */
1043 be64_add_cpu(&dqp->q_core.d_icount, 1); 1060 be64_add_cpu(&dqp->q_core.d_icount, 1);
1044 dqp->q_res_icount++; 1061 dqp->q_res_icount++;
1045 if (nblks) { 1062 if (nblks) {
1046 be64_add_cpu(&dqp->q_core.d_bcount, nblks); 1063 be64_add_cpu(&dqp->q_core.d_bcount, nblks);
1047 dqp->q_res_bcount += nblks; 1064 dqp->q_res_bcount += nblks;
1048 } 1065 }
1049 if (rtblks) { 1066 if (rtblks) {
1050 be64_add_cpu(&dqp->q_core.d_rtbcount, rtblks); 1067 be64_add_cpu(&dqp->q_core.d_rtbcount, rtblks);
1051 dqp->q_res_rtbcount += rtblks; 1068 dqp->q_res_rtbcount += rtblks;
1052 } 1069 }
1053 1070
1054 /* 1071 /*
1055 * Set default limits, adjust timers (since we changed usages) 1072 * Set default limits, adjust timers (since we changed usages)
1056 * 1073 *
1057 * There are no timers for the default values set in the root dquot. 1074 * There are no timers for the default values set in the root dquot.
1058 */ 1075 */
1059 if (dqp->q_core.d_id) { 1076 if (dqp->q_core.d_id) {
1060 xfs_qm_adjust_dqlimits(mp, dqp); 1077 xfs_qm_adjust_dqlimits(mp, dqp);
1061 xfs_qm_adjust_dqtimers(mp, &dqp->q_core); 1078 xfs_qm_adjust_dqtimers(mp, &dqp->q_core);
1062 } 1079 }
1063 1080
1064 dqp->dq_flags |= XFS_DQ_DIRTY; 1081 dqp->dq_flags |= XFS_DQ_DIRTY;
1065 xfs_qm_dqput(dqp); 1082 xfs_qm_dqput(dqp);
1066 return 0; 1083 return 0;
1067 } 1084 }
1068 1085
1069 STATIC int 1086 STATIC int
1070 xfs_qm_get_rtblks( 1087 xfs_qm_get_rtblks(
1071 xfs_inode_t *ip, 1088 xfs_inode_t *ip,
1072 xfs_qcnt_t *O_rtblks) 1089 xfs_qcnt_t *O_rtblks)
1073 { 1090 {
1074 xfs_filblks_t rtblks; /* total rt blks */ 1091 xfs_filblks_t rtblks; /* total rt blks */
1075 xfs_extnum_t idx; /* extent record index */ 1092 xfs_extnum_t idx; /* extent record index */
1076 xfs_ifork_t *ifp; /* inode fork pointer */ 1093 xfs_ifork_t *ifp; /* inode fork pointer */
1077 xfs_extnum_t nextents; /* number of extent entries */ 1094 xfs_extnum_t nextents; /* number of extent entries */
1078 int error; 1095 int error;
1079 1096
1080 ASSERT(XFS_IS_REALTIME_INODE(ip)); 1097 ASSERT(XFS_IS_REALTIME_INODE(ip));
1081 ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK); 1098 ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
1082 if (!(ifp->if_flags & XFS_IFEXTENTS)) { 1099 if (!(ifp->if_flags & XFS_IFEXTENTS)) {
1083 if ((error = xfs_iread_extents(NULL, ip, XFS_DATA_FORK))) 1100 if ((error = xfs_iread_extents(NULL, ip, XFS_DATA_FORK)))
1084 return error; 1101 return error;
1085 } 1102 }
1086 rtblks = 0; 1103 rtblks = 0;
1087 nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); 1104 nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
1088 for (idx = 0; idx < nextents; idx++) 1105 for (idx = 0; idx < nextents; idx++)
1089 rtblks += xfs_bmbt_get_blockcount(xfs_iext_get_ext(ifp, idx)); 1106 rtblks += xfs_bmbt_get_blockcount(xfs_iext_get_ext(ifp, idx));
1090 *O_rtblks = (xfs_qcnt_t)rtblks; 1107 *O_rtblks = (xfs_qcnt_t)rtblks;
1091 return 0; 1108 return 0;
1092 } 1109 }
1093 1110
1094 /* 1111 /*
1095 * callback routine supplied to bulkstat(). Given an inumber, find its 1112 * callback routine supplied to bulkstat(). Given an inumber, find its
1096 * dquots and update them to account for resources taken by that inode. 1113 * dquots and update them to account for resources taken by that inode.
1097 */ 1114 */
1098 /* ARGSUSED */ 1115 /* ARGSUSED */
1099 STATIC int 1116 STATIC int
1100 xfs_qm_dqusage_adjust( 1117 xfs_qm_dqusage_adjust(
1101 xfs_mount_t *mp, /* mount point for filesystem */ 1118 xfs_mount_t *mp, /* mount point for filesystem */
1102 xfs_ino_t ino, /* inode number to get data for */ 1119 xfs_ino_t ino, /* inode number to get data for */
1103 void __user *buffer, /* not used */ 1120 void __user *buffer, /* not used */
1104 int ubsize, /* not used */ 1121 int ubsize, /* not used */
1105 int *ubused, /* not used */ 1122 int *ubused, /* not used */
1106 int *res) /* result code value */ 1123 int *res) /* result code value */
1107 { 1124 {
1108 xfs_inode_t *ip; 1125 xfs_inode_t *ip;
1109 xfs_qcnt_t nblks, rtblks = 0; 1126 xfs_qcnt_t nblks, rtblks = 0;
1110 int error; 1127 int error;
1111 1128
1112 ASSERT(XFS_IS_QUOTA_RUNNING(mp)); 1129 ASSERT(XFS_IS_QUOTA_RUNNING(mp));
1113 1130
1114 /* 1131 /*
1115 * rootino must have its resources accounted for, not so with the quota 1132 * rootino must have its resources accounted for, not so with the quota
1116 * inodes. 1133 * inodes.
1117 */ 1134 */
1118 if (ino == mp->m_sb.sb_uquotino || ino == mp->m_sb.sb_gquotino) { 1135 if (ino == mp->m_sb.sb_uquotino || ino == mp->m_sb.sb_gquotino) {
1119 *res = BULKSTAT_RV_NOTHING; 1136 *res = BULKSTAT_RV_NOTHING;
1120 return XFS_ERROR(EINVAL); 1137 return XFS_ERROR(EINVAL);
1121 } 1138 }
1122 1139
1123 /* 1140 /*
1124 * We don't _need_ to take the ilock EXCL. However, the xfs_qm_dqget 1141 * We don't _need_ to take the ilock EXCL. However, the xfs_qm_dqget
1125 * interface expects the inode to be exclusively locked because that's 1142 * interface expects the inode to be exclusively locked because that's
1126 * the case in all other instances. It's OK that we do this because 1143 * the case in all other instances. It's OK that we do this because
1127 * quotacheck is done only at mount time. 1144 * quotacheck is done only at mount time.
1128 */ 1145 */
1129 error = xfs_iget(mp, NULL, ino, 0, XFS_ILOCK_EXCL, &ip); 1146 error = xfs_iget(mp, NULL, ino, 0, XFS_ILOCK_EXCL, &ip);
1130 if (error) { 1147 if (error) {
1131 *res = BULKSTAT_RV_NOTHING; 1148 *res = BULKSTAT_RV_NOTHING;
1132 return error; 1149 return error;
1133 } 1150 }
1134 1151
1135 ASSERT(ip->i_delayed_blks == 0); 1152 ASSERT(ip->i_delayed_blks == 0);
1136 1153
1137 if (XFS_IS_REALTIME_INODE(ip)) { 1154 if (XFS_IS_REALTIME_INODE(ip)) {
1138 /* 1155 /*
1139 * Walk thru the extent list and count the realtime blocks. 1156 * Walk thru the extent list and count the realtime blocks.
1140 */ 1157 */
1141 error = xfs_qm_get_rtblks(ip, &rtblks); 1158 error = xfs_qm_get_rtblks(ip, &rtblks);
1142 if (error) 1159 if (error)
1143 goto error0; 1160 goto error0;
1144 } 1161 }
1145 1162
1146 nblks = (xfs_qcnt_t)ip->i_d.di_nblocks - rtblks; 1163 nblks = (xfs_qcnt_t)ip->i_d.di_nblocks - rtblks;
1147 1164
1148 /* 1165 /*
1149 * Add the (disk blocks and inode) resources occupied by this 1166 * Add the (disk blocks and inode) resources occupied by this
1150 * inode to its dquots. We do this adjustment in the incore dquot, 1167 * inode to its dquots. We do this adjustment in the incore dquot,
1151 * and also copy the changes to its buffer. 1168 * and also copy the changes to its buffer.
1152 * We don't care about putting these changes in a transaction 1169 * We don't care about putting these changes in a transaction
1153 * envelope because if we crash in the middle of a 'quotacheck' 1170 * envelope because if we crash in the middle of a 'quotacheck'
1154 * we have to start from the beginning anyway. 1171 * we have to start from the beginning anyway.
1155 * Once we're done, we'll log all the dquot bufs. 1172 * Once we're done, we'll log all the dquot bufs.
1156 * 1173 *
1157 * The *QUOTA_ON checks below may look pretty racy, but quotachecks 1174 * The *QUOTA_ON checks below may look pretty racy, but quotachecks
1158 * and quotaoffs don't race. (Quotachecks happen at mount time only). 1175 * and quotaoffs don't race. (Quotachecks happen at mount time only).
1159 */ 1176 */
1160 if (XFS_IS_UQUOTA_ON(mp)) { 1177 if (XFS_IS_UQUOTA_ON(mp)) {
1161 error = xfs_qm_quotacheck_dqadjust(ip, ip->i_d.di_uid, 1178 error = xfs_qm_quotacheck_dqadjust(ip, ip->i_d.di_uid,
1162 XFS_DQ_USER, nblks, rtblks); 1179 XFS_DQ_USER, nblks, rtblks);
1163 if (error) 1180 if (error)
1164 goto error0; 1181 goto error0;
1165 } 1182 }
1166 1183
1167 if (XFS_IS_GQUOTA_ON(mp)) { 1184 if (XFS_IS_GQUOTA_ON(mp)) {
1168 error = xfs_qm_quotacheck_dqadjust(ip, ip->i_d.di_gid, 1185 error = xfs_qm_quotacheck_dqadjust(ip, ip->i_d.di_gid,
1169 XFS_DQ_GROUP, nblks, rtblks); 1186 XFS_DQ_GROUP, nblks, rtblks);
1170 if (error) 1187 if (error)
1171 goto error0; 1188 goto error0;
1172 } 1189 }
1173 1190
1174 if (XFS_IS_PQUOTA_ON(mp)) { 1191 if (XFS_IS_PQUOTA_ON(mp)) {
1175 error = xfs_qm_quotacheck_dqadjust(ip, xfs_get_projid(ip), 1192 error = xfs_qm_quotacheck_dqadjust(ip, xfs_get_projid(ip),
1176 XFS_DQ_PROJ, nblks, rtblks); 1193 XFS_DQ_PROJ, nblks, rtblks);
1177 if (error) 1194 if (error)
1178 goto error0; 1195 goto error0;
1179 } 1196 }
1180 1197
1181 xfs_iunlock(ip, XFS_ILOCK_EXCL); 1198 xfs_iunlock(ip, XFS_ILOCK_EXCL);
1182 IRELE(ip); 1199 IRELE(ip);
1183 *res = BULKSTAT_RV_DIDONE; 1200 *res = BULKSTAT_RV_DIDONE;
1184 return 0; 1201 return 0;
1185 1202
1186 error0: 1203 error0:
1187 xfs_iunlock(ip, XFS_ILOCK_EXCL); 1204 xfs_iunlock(ip, XFS_ILOCK_EXCL);
1188 IRELE(ip); 1205 IRELE(ip);
1189 *res = BULKSTAT_RV_GIVEUP; 1206 *res = BULKSTAT_RV_GIVEUP;
1190 return error; 1207 return error;
1191 } 1208 }
1192 1209
1193 STATIC int 1210 STATIC int
1194 xfs_qm_flush_one( 1211 xfs_qm_flush_one(
1195 struct xfs_dquot *dqp, 1212 struct xfs_dquot *dqp,
1196 void *data) 1213 void *data)
1197 { 1214 {
1198 struct list_head *buffer_list = data; 1215 struct list_head *buffer_list = data;
1199 struct xfs_buf *bp = NULL; 1216 struct xfs_buf *bp = NULL;
1200 int error = 0; 1217 int error = 0;
1201 1218
1202 xfs_dqlock(dqp); 1219 xfs_dqlock(dqp);
1203 if (dqp->dq_flags & XFS_DQ_FREEING) 1220 if (dqp->dq_flags & XFS_DQ_FREEING)
1204 goto out_unlock; 1221 goto out_unlock;
1205 if (!XFS_DQ_IS_DIRTY(dqp)) 1222 if (!XFS_DQ_IS_DIRTY(dqp))
1206 goto out_unlock; 1223 goto out_unlock;
1207 1224
1208 xfs_dqflock(dqp); 1225 xfs_dqflock(dqp);
1209 error = xfs_qm_dqflush(dqp, &bp); 1226 error = xfs_qm_dqflush(dqp, &bp);
1210 if (error) 1227 if (error)
1211 goto out_unlock; 1228 goto out_unlock;
1212 1229
1213 xfs_buf_delwri_queue(bp, buffer_list); 1230 xfs_buf_delwri_queue(bp, buffer_list);
1214 xfs_buf_relse(bp); 1231 xfs_buf_relse(bp);
1215 out_unlock: 1232 out_unlock:
1216 xfs_dqunlock(dqp); 1233 xfs_dqunlock(dqp);
1217 return error; 1234 return error;
1218 } 1235 }
1219 1236
1220 /* 1237 /*
1221 * Walk thru all the filesystem inodes and construct a consistent view 1238 * Walk thru all the filesystem inodes and construct a consistent view
1222 * of the disk quota world. If the quotacheck fails, disable quotas. 1239 * of the disk quota world. If the quotacheck fails, disable quotas.
1223 */ 1240 */
1224 int 1241 int
1225 xfs_qm_quotacheck( 1242 xfs_qm_quotacheck(
1226 xfs_mount_t *mp) 1243 xfs_mount_t *mp)
1227 { 1244 {
1228 int done, count, error, error2; 1245 int done, count, error, error2;
1229 xfs_ino_t lastino; 1246 xfs_ino_t lastino;
1230 size_t structsz; 1247 size_t structsz;
1231 xfs_inode_t *uip, *gip; 1248 xfs_inode_t *uip, *gip;
1232 uint flags; 1249 uint flags;
1233 LIST_HEAD (buffer_list); 1250 LIST_HEAD (buffer_list);
1234 1251
1235 count = INT_MAX; 1252 count = INT_MAX;
1236 structsz = 1; 1253 structsz = 1;
1237 lastino = 0; 1254 lastino = 0;
1238 flags = 0; 1255 flags = 0;
1239 1256
1240 ASSERT(mp->m_quotainfo->qi_uquotaip || mp->m_quotainfo->qi_gquotaip); 1257 ASSERT(mp->m_quotainfo->qi_uquotaip || mp->m_quotainfo->qi_gquotaip);
1241 ASSERT(XFS_IS_QUOTA_RUNNING(mp)); 1258 ASSERT(XFS_IS_QUOTA_RUNNING(mp));
1242 1259
1243 xfs_notice(mp, "Quotacheck needed: Please wait."); 1260 xfs_notice(mp, "Quotacheck needed: Please wait.");
1244 1261
1245 /* 1262 /*
1246 * First we go thru all the dquots on disk, USR and GRP/PRJ, and reset 1263 * First we go thru all the dquots on disk, USR and GRP/PRJ, and reset
1247 * their counters to zero. We need a clean slate. 1264 * their counters to zero. We need a clean slate.
1248 * We don't log our changes till later. 1265 * We don't log our changes till later.
1249 */ 1266 */
1250 uip = mp->m_quotainfo->qi_uquotaip; 1267 uip = mp->m_quotainfo->qi_uquotaip;
1251 if (uip) { 1268 if (uip) {
1252 error = xfs_qm_dqiterate(mp, uip, XFS_QMOPT_UQUOTA, 1269 error = xfs_qm_dqiterate(mp, uip, XFS_QMOPT_UQUOTA,
1253 &buffer_list); 1270 &buffer_list);
1254 if (error) 1271 if (error)
1255 goto error_return; 1272 goto error_return;
1256 flags |= XFS_UQUOTA_CHKD; 1273 flags |= XFS_UQUOTA_CHKD;
1257 } 1274 }
1258 1275
1259 gip = mp->m_quotainfo->qi_gquotaip; 1276 gip = mp->m_quotainfo->qi_gquotaip;
1260 if (gip) { 1277 if (gip) {
1261 error = xfs_qm_dqiterate(mp, gip, XFS_IS_GQUOTA_ON(mp) ? 1278 error = xfs_qm_dqiterate(mp, gip, XFS_IS_GQUOTA_ON(mp) ?
1262 XFS_QMOPT_GQUOTA : XFS_QMOPT_PQUOTA, 1279 XFS_QMOPT_GQUOTA : XFS_QMOPT_PQUOTA,
1263 &buffer_list); 1280 &buffer_list);
1264 if (error) 1281 if (error)
1265 goto error_return; 1282 goto error_return;
1266 flags |= XFS_OQUOTA_CHKD; 1283 flags |= XFS_OQUOTA_CHKD;
1267 } 1284 }
1268 1285
1269 do { 1286 do {
1270 /* 1287 /*
1271 * Iterate thru all the inodes in the file system, 1288 * Iterate thru all the inodes in the file system,
1272 * adjusting the corresponding dquot counters in core. 1289 * adjusting the corresponding dquot counters in core.
1273 */ 1290 */
1274 error = xfs_bulkstat(mp, &lastino, &count, 1291 error = xfs_bulkstat(mp, &lastino, &count,
1275 xfs_qm_dqusage_adjust, 1292 xfs_qm_dqusage_adjust,
1276 structsz, NULL, &done); 1293 structsz, NULL, &done);
1277 if (error) 1294 if (error)
1278 break; 1295 break;
1279 1296
1280 } while (!done); 1297 } while (!done);
1281 1298
1282 /* 1299 /*
1283 * We've made all the changes that we need to make incore. Flush them 1300 * We've made all the changes that we need to make incore. Flush them
1284 * down to disk buffers if everything was updated successfully. 1301 * down to disk buffers if everything was updated successfully.
1285 */ 1302 */
1286 if (XFS_IS_UQUOTA_ON(mp)) { 1303 if (XFS_IS_UQUOTA_ON(mp)) {
1287 error = xfs_qm_dquot_walk(mp, XFS_DQ_USER, xfs_qm_flush_one, 1304 error = xfs_qm_dquot_walk(mp, XFS_DQ_USER, xfs_qm_flush_one,
1288 &buffer_list); 1305 &buffer_list);
1289 } 1306 }
1290 if (XFS_IS_GQUOTA_ON(mp)) { 1307 if (XFS_IS_GQUOTA_ON(mp)) {
1291 error2 = xfs_qm_dquot_walk(mp, XFS_DQ_GROUP, xfs_qm_flush_one, 1308 error2 = xfs_qm_dquot_walk(mp, XFS_DQ_GROUP, xfs_qm_flush_one,
1292 &buffer_list); 1309 &buffer_list);
1293 if (!error) 1310 if (!error)
1294 error = error2; 1311 error = error2;
1295 } 1312 }
1296 if (XFS_IS_PQUOTA_ON(mp)) { 1313 if (XFS_IS_PQUOTA_ON(mp)) {
1297 error2 = xfs_qm_dquot_walk(mp, XFS_DQ_PROJ, xfs_qm_flush_one, 1314 error2 = xfs_qm_dquot_walk(mp, XFS_DQ_PROJ, xfs_qm_flush_one,
1298 &buffer_list); 1315 &buffer_list);
1299 if (!error) 1316 if (!error)
1300 error = error2; 1317 error = error2;
1301 } 1318 }
1302 1319
1303 error2 = xfs_buf_delwri_submit(&buffer_list); 1320 error2 = xfs_buf_delwri_submit(&buffer_list);
1304 if (!error) 1321 if (!error)
1305 error = error2; 1322 error = error2;
1306 1323
1307 /* 1324 /*
1308 * We can get this error if we couldn't do a dquot allocation inside 1325 * We can get this error if we couldn't do a dquot allocation inside
1309 * xfs_qm_dqusage_adjust (via bulkstat). We don't care about the 1326 * xfs_qm_dqusage_adjust (via bulkstat). We don't care about the
1310 * dirty dquots that might be cached, we just want to get rid of them 1327 * dirty dquots that might be cached, we just want to get rid of them
1311 * and turn quotaoff. The dquots won't be attached to any of the inodes 1328 * and turn quotaoff. The dquots won't be attached to any of the inodes
1312 * at this point (because we intentionally didn't in dqget_noattach). 1329 * at this point (because we intentionally didn't in dqget_noattach).
1313 */ 1330 */
1314 if (error) { 1331 if (error) {
1315 xfs_qm_dqpurge_all(mp, XFS_QMOPT_QUOTALL); 1332 xfs_qm_dqpurge_all(mp, XFS_QMOPT_QUOTALL);
1316 goto error_return; 1333 goto error_return;
1317 } 1334 }
1318 1335
1319 /* 1336 /*
1320 * If one type of quotas is off, then it will lose its 1337 * If one type of quotas is off, then it will lose its
1321 * quotachecked status, since we won't be doing accounting for 1338 * quotachecked status, since we won't be doing accounting for
1322 * that type anymore. 1339 * that type anymore.
1323 */ 1340 */
1324 mp->m_qflags &= ~XFS_ALL_QUOTA_CHKD; 1341 mp->m_qflags &= ~XFS_ALL_QUOTA_CHKD;
1325 mp->m_qflags |= flags; 1342 mp->m_qflags |= flags;
1326 1343
1327 error_return: 1344 error_return:
1328 while (!list_empty(&buffer_list)) { 1345 while (!list_empty(&buffer_list)) {
1329 struct xfs_buf *bp = 1346 struct xfs_buf *bp =
1330 list_first_entry(&buffer_list, struct xfs_buf, b_list); 1347 list_first_entry(&buffer_list, struct xfs_buf, b_list);
1331 list_del_init(&bp->b_list); 1348 list_del_init(&bp->b_list);
1332 xfs_buf_relse(bp); 1349 xfs_buf_relse(bp);
1333 } 1350 }
1334 1351
1335 if (error) { 1352 if (error) {
1336 xfs_warn(mp, 1353 xfs_warn(mp,
1337 "Quotacheck: Unsuccessful (Error %d): Disabling quotas.", 1354 "Quotacheck: Unsuccessful (Error %d): Disabling quotas.",
1338 error); 1355 error);
1339 /* 1356 /*
1340 * We must turn off quotas. 1357 * We must turn off quotas.
1341 */ 1358 */
1342 ASSERT(mp->m_quotainfo != NULL); 1359 ASSERT(mp->m_quotainfo != NULL);
1343 xfs_qm_destroy_quotainfo(mp); 1360 xfs_qm_destroy_quotainfo(mp);
1344 if (xfs_mount_reset_sbqflags(mp)) { 1361 if (xfs_mount_reset_sbqflags(mp)) {
1345 xfs_warn(mp, 1362 xfs_warn(mp,
1346 "Quotacheck: Failed to reset quota flags."); 1363 "Quotacheck: Failed to reset quota flags.");
1347 } 1364 }
1348 } else 1365 } else
1349 xfs_notice(mp, "Quotacheck: Done."); 1366 xfs_notice(mp, "Quotacheck: Done.");
1350 return (error); 1367 return (error);
1351 } 1368 }
1352 1369
1353 /* 1370 /*
1354 * This is called after the superblock has been read in and we're ready to 1371 * This is called after the superblock has been read in and we're ready to
1355 * iget the quota inodes. 1372 * iget the quota inodes.
1356 */ 1373 */
1357 STATIC int 1374 STATIC int
1358 xfs_qm_init_quotainos( 1375 xfs_qm_init_quotainos(
1359 xfs_mount_t *mp) 1376 xfs_mount_t *mp)
1360 { 1377 {
1361 xfs_inode_t *uip, *gip; 1378 xfs_inode_t *uip, *gip;
1362 int error; 1379 int error;
1363 __int64_t sbflags; 1380 __int64_t sbflags;
1364 uint flags; 1381 uint flags;
1365 1382
1366 ASSERT(mp->m_quotainfo); 1383 ASSERT(mp->m_quotainfo);
1367 uip = gip = NULL; 1384 uip = gip = NULL;
1368 sbflags = 0; 1385 sbflags = 0;
1369 flags = 0; 1386 flags = 0;
1370 1387
1371 /* 1388 /*
1372 * Get the uquota and gquota inodes 1389 * Get the uquota and gquota inodes
1373 */ 1390 */
1374 if (xfs_sb_version_hasquota(&mp->m_sb)) { 1391 if (xfs_sb_version_hasquota(&mp->m_sb)) {
1375 if (XFS_IS_UQUOTA_ON(mp) && 1392 if (XFS_IS_UQUOTA_ON(mp) &&
1376 mp->m_sb.sb_uquotino != NULLFSINO) { 1393 mp->m_sb.sb_uquotino != NULLFSINO) {
1377 ASSERT(mp->m_sb.sb_uquotino > 0); 1394 ASSERT(mp->m_sb.sb_uquotino > 0);
1378 if ((error = xfs_iget(mp, NULL, mp->m_sb.sb_uquotino, 1395 if ((error = xfs_iget(mp, NULL, mp->m_sb.sb_uquotino,
1379 0, 0, &uip))) 1396 0, 0, &uip)))
1380 return XFS_ERROR(error); 1397 return XFS_ERROR(error);
1381 } 1398 }
1382 if (XFS_IS_OQUOTA_ON(mp) && 1399 if (XFS_IS_OQUOTA_ON(mp) &&
1383 mp->m_sb.sb_gquotino != NULLFSINO) { 1400 mp->m_sb.sb_gquotino != NULLFSINO) {
1384 ASSERT(mp->m_sb.sb_gquotino > 0); 1401 ASSERT(mp->m_sb.sb_gquotino > 0);
1385 if ((error = xfs_iget(mp, NULL, mp->m_sb.sb_gquotino, 1402 if ((error = xfs_iget(mp, NULL, mp->m_sb.sb_gquotino,
1386 0, 0, &gip))) { 1403 0, 0, &gip))) {
1387 if (uip) 1404 if (uip)
1388 IRELE(uip); 1405 IRELE(uip);
1389 return XFS_ERROR(error); 1406 return XFS_ERROR(error);
1390 } 1407 }
1391 } 1408 }
1392 } else { 1409 } else {
1393 flags |= XFS_QMOPT_SBVERSION; 1410 flags |= XFS_QMOPT_SBVERSION;
1394 sbflags |= (XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO | 1411 sbflags |= (XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO |
1395 XFS_SB_GQUOTINO | XFS_SB_QFLAGS); 1412 XFS_SB_GQUOTINO | XFS_SB_QFLAGS);
1396 } 1413 }
1397 1414
1398 /* 1415 /*
1399 * Create the two inodes, if they don't exist already. The changes 1416 * Create the two inodes, if they don't exist already. The changes
1400 * made above will get added to a transaction and logged in one of 1417 * made above will get added to a transaction and logged in one of
1401 * the qino_alloc calls below. If the device is readonly, 1418 * the qino_alloc calls below. If the device is readonly,
1402 * temporarily switch to read-write to do this. 1419 * temporarily switch to read-write to do this.
1403 */ 1420 */
1404 if (XFS_IS_UQUOTA_ON(mp) && uip == NULL) { 1421 if (XFS_IS_UQUOTA_ON(mp) && uip == NULL) {
1405 if ((error = xfs_qm_qino_alloc(mp, &uip, 1422 if ((error = xfs_qm_qino_alloc(mp, &uip,
1406 sbflags | XFS_SB_UQUOTINO, 1423 sbflags | XFS_SB_UQUOTINO,
1407 flags | XFS_QMOPT_UQUOTA))) 1424 flags | XFS_QMOPT_UQUOTA)))
1408 return XFS_ERROR(error); 1425 return XFS_ERROR(error);
1409 1426
1410 flags &= ~XFS_QMOPT_SBVERSION; 1427 flags &= ~XFS_QMOPT_SBVERSION;
1411 } 1428 }
1412 if (XFS_IS_OQUOTA_ON(mp) && gip == NULL) { 1429 if (XFS_IS_OQUOTA_ON(mp) && gip == NULL) {
1413 flags |= (XFS_IS_GQUOTA_ON(mp) ? 1430 flags |= (XFS_IS_GQUOTA_ON(mp) ?
1414 XFS_QMOPT_GQUOTA : XFS_QMOPT_PQUOTA); 1431 XFS_QMOPT_GQUOTA : XFS_QMOPT_PQUOTA);
1415 error = xfs_qm_qino_alloc(mp, &gip, 1432 error = xfs_qm_qino_alloc(mp, &gip,
1416 sbflags | XFS_SB_GQUOTINO, flags); 1433 sbflags | XFS_SB_GQUOTINO, flags);
1417 if (error) { 1434 if (error) {
1418 if (uip) 1435 if (uip)
1419 IRELE(uip); 1436 IRELE(uip);
1420 1437
1421 return XFS_ERROR(error); 1438 return XFS_ERROR(error);
1422 } 1439 }
1423 } 1440 }
1424 1441
1425 mp->m_quotainfo->qi_uquotaip = uip; 1442 mp->m_quotainfo->qi_uquotaip = uip;
1426 mp->m_quotainfo->qi_gquotaip = gip; 1443 mp->m_quotainfo->qi_gquotaip = gip;
1427 1444
1428 return 0; 1445 return 0;
1429 } 1446 }
1430 1447
1431 STATIC void 1448 STATIC void
1432 xfs_qm_dqfree_one( 1449 xfs_qm_dqfree_one(
1433 struct xfs_dquot *dqp) 1450 struct xfs_dquot *dqp)
1434 { 1451 {
1435 struct xfs_mount *mp = dqp->q_mount; 1452 struct xfs_mount *mp = dqp->q_mount;
1436 struct xfs_quotainfo *qi = mp->m_quotainfo; 1453 struct xfs_quotainfo *qi = mp->m_quotainfo;
1437 1454
1438 mutex_lock(&qi->qi_tree_lock); 1455 mutex_lock(&qi->qi_tree_lock);
1439 radix_tree_delete(XFS_DQUOT_TREE(qi, dqp->q_core.d_flags), 1456 radix_tree_delete(XFS_DQUOT_TREE(qi, dqp->q_core.d_flags),
1440 be32_to_cpu(dqp->q_core.d_id)); 1457 be32_to_cpu(dqp->q_core.d_id));
1441 1458
1442 qi->qi_dquots--; 1459 qi->qi_dquots--;
1443 mutex_unlock(&qi->qi_tree_lock); 1460 mutex_unlock(&qi->qi_tree_lock);
1444 1461
1445 xfs_qm_dqdestroy(dqp); 1462 xfs_qm_dqdestroy(dqp);
1446 } 1463 }
1447 1464
1448 STATIC void 1465 STATIC void
1449 xfs_qm_dqreclaim_one( 1466 xfs_qm_dqreclaim_one(
1450 struct xfs_dquot *dqp, 1467 struct xfs_dquot *dqp,
1451 struct list_head *buffer_list, 1468 struct list_head *buffer_list,
1452 struct list_head *dispose_list) 1469 struct list_head *dispose_list)
1453 { 1470 {
1454 struct xfs_mount *mp = dqp->q_mount; 1471 struct xfs_mount *mp = dqp->q_mount;
1455 struct xfs_quotainfo *qi = mp->m_quotainfo; 1472 struct xfs_quotainfo *qi = mp->m_quotainfo;
1456 int error; 1473 int error;
1457 1474
1458 if (!xfs_dqlock_nowait(dqp)) 1475 if (!xfs_dqlock_nowait(dqp))
1459 goto out_move_tail; 1476 goto out_move_tail;
1460 1477
1461 /* 1478 /*
1462 * This dquot has acquired a reference in the meantime remove it from 1479 * This dquot has acquired a reference in the meantime remove it from
1463 * the freelist and try again. 1480 * the freelist and try again.
1464 */ 1481 */
1465 if (dqp->q_nrefs) { 1482 if (dqp->q_nrefs) {
1466 xfs_dqunlock(dqp); 1483 xfs_dqunlock(dqp);
1467 1484
1468 trace_xfs_dqreclaim_want(dqp); 1485 trace_xfs_dqreclaim_want(dqp);
1469 XFS_STATS_INC(xs_qm_dqwants); 1486 XFS_STATS_INC(xs_qm_dqwants);
1470 1487
1471 list_del_init(&dqp->q_lru); 1488 list_del_init(&dqp->q_lru);
1472 qi->qi_lru_count--; 1489 qi->qi_lru_count--;
1473 XFS_STATS_DEC(xs_qm_dquot_unused); 1490 XFS_STATS_DEC(xs_qm_dquot_unused);
1474 return; 1491 return;
1475 } 1492 }
1476 1493
1477 /* 1494 /*
1478 * Try to grab the flush lock. If this dquot is in the process of 1495 * Try to grab the flush lock. If this dquot is in the process of
1479 * getting flushed to disk, we don't want to reclaim it. 1496 * getting flushed to disk, we don't want to reclaim it.
1480 */ 1497 */
1481 if (!xfs_dqflock_nowait(dqp)) 1498 if (!xfs_dqflock_nowait(dqp))
1482 goto out_unlock_move_tail; 1499 goto out_unlock_move_tail;
1483 1500
1484 if (XFS_DQ_IS_DIRTY(dqp)) { 1501 if (XFS_DQ_IS_DIRTY(dqp)) {
1485 struct xfs_buf *bp = NULL; 1502 struct xfs_buf *bp = NULL;
1486 1503
1487 trace_xfs_dqreclaim_dirty(dqp); 1504 trace_xfs_dqreclaim_dirty(dqp);
1488 1505
1489 error = xfs_qm_dqflush(dqp, &bp); 1506 error = xfs_qm_dqflush(dqp, &bp);
1490 if (error) { 1507 if (error) {
1491 xfs_warn(mp, "%s: dquot %p flush failed", 1508 xfs_warn(mp, "%s: dquot %p flush failed",
1492 __func__, dqp); 1509 __func__, dqp);
1493 goto out_unlock_move_tail; 1510 goto out_unlock_move_tail;
1494 } 1511 }
1495 1512
1496 xfs_buf_delwri_queue(bp, buffer_list); 1513 xfs_buf_delwri_queue(bp, buffer_list);
1497 xfs_buf_relse(bp); 1514 xfs_buf_relse(bp);
1498 /* 1515 /*
1499 * Give the dquot another try on the freelist, as the 1516 * Give the dquot another try on the freelist, as the
1500 * flushing will take some time. 1517 * flushing will take some time.
1501 */ 1518 */
1502 goto out_unlock_move_tail; 1519 goto out_unlock_move_tail;
1503 } 1520 }
1504 xfs_dqfunlock(dqp); 1521 xfs_dqfunlock(dqp);
1505 1522
1506 /* 1523 /*
1507 * Prevent lookups now that we are past the point of no return. 1524 * Prevent lookups now that we are past the point of no return.
1508 */ 1525 */
1509 dqp->dq_flags |= XFS_DQ_FREEING; 1526 dqp->dq_flags |= XFS_DQ_FREEING;
1510 xfs_dqunlock(dqp); 1527 xfs_dqunlock(dqp);
1511 1528
1512 ASSERT(dqp->q_nrefs == 0); 1529 ASSERT(dqp->q_nrefs == 0);
1513 list_move_tail(&dqp->q_lru, dispose_list); 1530 list_move_tail(&dqp->q_lru, dispose_list);
1514 qi->qi_lru_count--; 1531 qi->qi_lru_count--;
1515 XFS_STATS_DEC(xs_qm_dquot_unused); 1532 XFS_STATS_DEC(xs_qm_dquot_unused);
1516 1533
1517 trace_xfs_dqreclaim_done(dqp); 1534 trace_xfs_dqreclaim_done(dqp);
1518 XFS_STATS_INC(xs_qm_dqreclaims); 1535 XFS_STATS_INC(xs_qm_dqreclaims);
1519 return; 1536 return;
1520 1537
1521 /* 1538 /*
1522 * Move the dquot to the tail of the list so that we don't spin on it. 1539 * Move the dquot to the tail of the list so that we don't spin on it.
1523 */ 1540 */
1524 out_unlock_move_tail: 1541 out_unlock_move_tail:
1525 xfs_dqunlock(dqp); 1542 xfs_dqunlock(dqp);
1526 out_move_tail: 1543 out_move_tail:
1527 list_move_tail(&dqp->q_lru, &qi->qi_lru_list); 1544 list_move_tail(&dqp->q_lru, &qi->qi_lru_list);
1528 trace_xfs_dqreclaim_busy(dqp); 1545 trace_xfs_dqreclaim_busy(dqp);
1529 XFS_STATS_INC(xs_qm_dqreclaim_misses); 1546 XFS_STATS_INC(xs_qm_dqreclaim_misses);
1530 } 1547 }
1531 1548
1532 STATIC int 1549 STATIC int
1533 xfs_qm_shake( 1550 xfs_qm_shake(
1534 struct shrinker *shrink, 1551 struct shrinker *shrink,
1535 struct shrink_control *sc) 1552 struct shrink_control *sc)
1536 { 1553 {
1537 struct xfs_quotainfo *qi = 1554 struct xfs_quotainfo *qi =
1538 container_of(shrink, struct xfs_quotainfo, qi_shrinker); 1555 container_of(shrink, struct xfs_quotainfo, qi_shrinker);
1539 int nr_to_scan = sc->nr_to_scan; 1556 int nr_to_scan = sc->nr_to_scan;
1540 LIST_HEAD (buffer_list); 1557 LIST_HEAD (buffer_list);
1541 LIST_HEAD (dispose_list); 1558 LIST_HEAD (dispose_list);
1542 struct xfs_dquot *dqp; 1559 struct xfs_dquot *dqp;
1543 int error; 1560 int error;
1544 1561
1545 if ((sc->gfp_mask & (__GFP_FS|__GFP_WAIT)) != (__GFP_FS|__GFP_WAIT)) 1562 if ((sc->gfp_mask & (__GFP_FS|__GFP_WAIT)) != (__GFP_FS|__GFP_WAIT))
1546 return 0; 1563 return 0;
1547 if (!nr_to_scan) 1564 if (!nr_to_scan)
1548 goto out; 1565 goto out;
1549 1566
1550 mutex_lock(&qi->qi_lru_lock); 1567 mutex_lock(&qi->qi_lru_lock);
1551 while (!list_empty(&qi->qi_lru_list)) { 1568 while (!list_empty(&qi->qi_lru_list)) {
1552 if (nr_to_scan-- <= 0) 1569 if (nr_to_scan-- <= 0)
1553 break; 1570 break;
1554 dqp = list_first_entry(&qi->qi_lru_list, struct xfs_dquot, 1571 dqp = list_first_entry(&qi->qi_lru_list, struct xfs_dquot,
1555 q_lru); 1572 q_lru);
1556 xfs_qm_dqreclaim_one(dqp, &buffer_list, &dispose_list); 1573 xfs_qm_dqreclaim_one(dqp, &buffer_list, &dispose_list);
1557 } 1574 }
1558 mutex_unlock(&qi->qi_lru_lock); 1575 mutex_unlock(&qi->qi_lru_lock);
1559 1576
1560 error = xfs_buf_delwri_submit(&buffer_list); 1577 error = xfs_buf_delwri_submit(&buffer_list);
1561 if (error) 1578 if (error)
1562 xfs_warn(NULL, "%s: dquot reclaim failed", __func__); 1579 xfs_warn(NULL, "%s: dquot reclaim failed", __func__);
1563 1580
1564 while (!list_empty(&dispose_list)) { 1581 while (!list_empty(&dispose_list)) {
1565 dqp = list_first_entry(&dispose_list, struct xfs_dquot, q_lru); 1582 dqp = list_first_entry(&dispose_list, struct xfs_dquot, q_lru);
1566 list_del_init(&dqp->q_lru); 1583 list_del_init(&dqp->q_lru);
1567 xfs_qm_dqfree_one(dqp); 1584 xfs_qm_dqfree_one(dqp);
1568 } 1585 }
1569 1586
1570 out: 1587 out:
1571 return (qi->qi_lru_count / 100) * sysctl_vfs_cache_pressure; 1588 return (qi->qi_lru_count / 100) * sysctl_vfs_cache_pressure;
1572 } 1589 }
1573 1590
1574 /* 1591 /*
1575 * Start a transaction and write the incore superblock changes to 1592 * Start a transaction and write the incore superblock changes to
1576 * disk. flags parameter indicates which fields have changed. 1593 * disk. flags parameter indicates which fields have changed.
1577 */ 1594 */
1578 int 1595 int
1579 xfs_qm_write_sb_changes( 1596 xfs_qm_write_sb_changes(
1580 xfs_mount_t *mp, 1597 xfs_mount_t *mp,
1581 __int64_t flags) 1598 __int64_t flags)
1582 { 1599 {
1583 xfs_trans_t *tp; 1600 xfs_trans_t *tp;
1584 int error; 1601 int error;
1585 1602
1586 tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SBCHANGE); 1603 tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SBCHANGE);
1587 error = xfs_trans_reserve(tp, 0, XFS_QM_SBCHANGE_LOG_RES(mp), 1604 error = xfs_trans_reserve(tp, 0, XFS_QM_SBCHANGE_LOG_RES(mp),
1588 0, 0, XFS_DEFAULT_LOG_COUNT); 1605 0, 0, XFS_DEFAULT_LOG_COUNT);
1589 if (error) { 1606 if (error) {
1590 xfs_trans_cancel(tp, 0); 1607 xfs_trans_cancel(tp, 0);
1591 return error; 1608 return error;
1592 } 1609 }
1593 1610
1594 xfs_mod_sb(tp, flags); 1611 xfs_mod_sb(tp, flags);
1595 error = xfs_trans_commit(tp, 0); 1612 error = xfs_trans_commit(tp, 0);
1596 1613
1597 return error; 1614 return error;
1598 } 1615 }
1599 1616
1600 1617
1601 /* --------------- utility functions for vnodeops ---------------- */ 1618 /* --------------- utility functions for vnodeops ---------------- */
1602 1619
1603 1620
1604 /* 1621 /*
1605 * Given an inode, a uid, gid and prid make sure that we have 1622 * Given an inode, a uid, gid and prid make sure that we have
1606 * allocated relevant dquot(s) on disk, and that we won't exceed inode 1623 * allocated relevant dquot(s) on disk, and that we won't exceed inode
1607 * quotas by creating this file. 1624 * quotas by creating this file.
1608 * This also attaches dquot(s) to the given inode after locking it, 1625 * This also attaches dquot(s) to the given inode after locking it,
1609 * and returns the dquots corresponding to the uid and/or gid. 1626 * and returns the dquots corresponding to the uid and/or gid.
1610 * 1627 *
1611 * in : inode (unlocked) 1628 * in : inode (unlocked)
1612 * out : udquot, gdquot with references taken and unlocked 1629 * out : udquot, gdquot with references taken and unlocked
1613 */ 1630 */
1614 int 1631 int
1615 xfs_qm_vop_dqalloc( 1632 xfs_qm_vop_dqalloc(
1616 struct xfs_inode *ip, 1633 struct xfs_inode *ip,
1617 uid_t uid, 1634 uid_t uid,
1618 gid_t gid, 1635 gid_t gid,
1619 prid_t prid, 1636 prid_t prid,
1620 uint flags, 1637 uint flags,
1621 struct xfs_dquot **O_udqpp, 1638 struct xfs_dquot **O_udqpp,
1622 struct xfs_dquot **O_gdqpp) 1639 struct xfs_dquot **O_gdqpp)
1623 { 1640 {
1624 struct xfs_mount *mp = ip->i_mount; 1641 struct xfs_mount *mp = ip->i_mount;
1625 struct xfs_dquot *uq, *gq; 1642 struct xfs_dquot *uq, *gq;
1626 int error; 1643 int error;
1627 uint lockflags; 1644 uint lockflags;
1628 1645
1629 if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp)) 1646 if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp))
1630 return 0; 1647 return 0;
1631 1648
1632 lockflags = XFS_ILOCK_EXCL; 1649 lockflags = XFS_ILOCK_EXCL;
1633 xfs_ilock(ip, lockflags); 1650 xfs_ilock(ip, lockflags);
1634 1651
1635 if ((flags & XFS_QMOPT_INHERIT) && XFS_INHERIT_GID(ip)) 1652 if ((flags & XFS_QMOPT_INHERIT) && XFS_INHERIT_GID(ip))
1636 gid = ip->i_d.di_gid; 1653 gid = ip->i_d.di_gid;
1637 1654
1638 /* 1655 /*
1639 * Attach the dquot(s) to this inode, doing a dquot allocation 1656 * Attach the dquot(s) to this inode, doing a dquot allocation
1640 * if necessary. The dquot(s) will not be locked. 1657 * if necessary. The dquot(s) will not be locked.
1641 */ 1658 */
1642 if (XFS_NOT_DQATTACHED(mp, ip)) { 1659 if (XFS_NOT_DQATTACHED(mp, ip)) {
1643 error = xfs_qm_dqattach_locked(ip, XFS_QMOPT_DQALLOC); 1660 error = xfs_qm_dqattach_locked(ip, XFS_QMOPT_DQALLOC);
1644 if (error) { 1661 if (error) {
1645 xfs_iunlock(ip, lockflags); 1662 xfs_iunlock(ip, lockflags);
1646 return error; 1663 return error;
1647 } 1664 }
1648 } 1665 }
1649 1666
1650 uq = gq = NULL; 1667 uq = gq = NULL;
1651 if ((flags & XFS_QMOPT_UQUOTA) && XFS_IS_UQUOTA_ON(mp)) { 1668 if ((flags & XFS_QMOPT_UQUOTA) && XFS_IS_UQUOTA_ON(mp)) {
1652 if (ip->i_d.di_uid != uid) { 1669 if (ip->i_d.di_uid != uid) {
1653 /* 1670 /*
1654 * What we need is the dquot that has this uid, and 1671 * What we need is the dquot that has this uid, and
1655 * if we send the inode to dqget, the uid of the inode 1672 * if we send the inode to dqget, the uid of the inode
1656 * takes priority over what's sent in the uid argument. 1673 * takes priority over what's sent in the uid argument.
1657 * We must unlock inode here before calling dqget if 1674 * We must unlock inode here before calling dqget if
1658 * we're not sending the inode, because otherwise 1675 * we're not sending the inode, because otherwise
1659 * we'll deadlock by doing trans_reserve while 1676 * we'll deadlock by doing trans_reserve while
1660 * holding ilock. 1677 * holding ilock.
1661 */ 1678 */
1662 xfs_iunlock(ip, lockflags); 1679 xfs_iunlock(ip, lockflags);
1663 if ((error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t) uid, 1680 if ((error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t) uid,
1664 XFS_DQ_USER, 1681 XFS_DQ_USER,
1665 XFS_QMOPT_DQALLOC | 1682 XFS_QMOPT_DQALLOC |
1666 XFS_QMOPT_DOWARN, 1683 XFS_QMOPT_DOWARN,
1667 &uq))) { 1684 &uq))) {
1668 ASSERT(error != ENOENT); 1685 ASSERT(error != ENOENT);
1669 return error; 1686 return error;
1670 } 1687 }
1671 /* 1688 /*
1672 * Get the ilock in the right order. 1689 * Get the ilock in the right order.
1673 */ 1690 */
1674 xfs_dqunlock(uq); 1691 xfs_dqunlock(uq);
1675 lockflags = XFS_ILOCK_SHARED; 1692 lockflags = XFS_ILOCK_SHARED;
1676 xfs_ilock(ip, lockflags); 1693 xfs_ilock(ip, lockflags);
1677 } else { 1694 } else {
1678 /* 1695 /*
1679 * Take an extra reference, because we'll return 1696 * Take an extra reference, because we'll return
1680 * this to caller 1697 * this to caller
1681 */ 1698 */
1682 ASSERT(ip->i_udquot); 1699 ASSERT(ip->i_udquot);
1683 uq = xfs_qm_dqhold(ip->i_udquot); 1700 uq = xfs_qm_dqhold(ip->i_udquot);
1684 } 1701 }
1685 } 1702 }
1686 if ((flags & XFS_QMOPT_GQUOTA) && XFS_IS_GQUOTA_ON(mp)) { 1703 if ((flags & XFS_QMOPT_GQUOTA) && XFS_IS_GQUOTA_ON(mp)) {
1687 if (ip->i_d.di_gid != gid) { 1704 if (ip->i_d.di_gid != gid) {
1688 xfs_iunlock(ip, lockflags); 1705 xfs_iunlock(ip, lockflags);
1689 if ((error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t)gid, 1706 if ((error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t)gid,
1690 XFS_DQ_GROUP, 1707 XFS_DQ_GROUP,
1691 XFS_QMOPT_DQALLOC | 1708 XFS_QMOPT_DQALLOC |
1692 XFS_QMOPT_DOWARN, 1709 XFS_QMOPT_DOWARN,
1693 &gq))) { 1710 &gq))) {
1694 if (uq) 1711 if (uq)
1695 xfs_qm_dqrele(uq); 1712 xfs_qm_dqrele(uq);
1696 ASSERT(error != ENOENT); 1713 ASSERT(error != ENOENT);
1697 return error; 1714 return error;
1698 } 1715 }
1699 xfs_dqunlock(gq); 1716 xfs_dqunlock(gq);
1700 lockflags = XFS_ILOCK_SHARED; 1717 lockflags = XFS_ILOCK_SHARED;
1701 xfs_ilock(ip, lockflags); 1718 xfs_ilock(ip, lockflags);
1702 } else { 1719 } else {
1703 ASSERT(ip->i_gdquot); 1720 ASSERT(ip->i_gdquot);
1704 gq = xfs_qm_dqhold(ip->i_gdquot); 1721 gq = xfs_qm_dqhold(ip->i_gdquot);
1705 } 1722 }
1706 } else if ((flags & XFS_QMOPT_PQUOTA) && XFS_IS_PQUOTA_ON(mp)) { 1723 } else if ((flags & XFS_QMOPT_PQUOTA) && XFS_IS_PQUOTA_ON(mp)) {
1707 if (xfs_get_projid(ip) != prid) { 1724 if (xfs_get_projid(ip) != prid) {
1708 xfs_iunlock(ip, lockflags); 1725 xfs_iunlock(ip, lockflags);
1709 if ((error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t)prid, 1726 if ((error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t)prid,
1710 XFS_DQ_PROJ, 1727 XFS_DQ_PROJ,
1711 XFS_QMOPT_DQALLOC | 1728 XFS_QMOPT_DQALLOC |
1712 XFS_QMOPT_DOWARN, 1729 XFS_QMOPT_DOWARN,
1713 &gq))) { 1730 &gq))) {
1714 if (uq) 1731 if (uq)
1715 xfs_qm_dqrele(uq); 1732 xfs_qm_dqrele(uq);
1716 ASSERT(error != ENOENT); 1733 ASSERT(error != ENOENT);
1717 return (error); 1734 return (error);
1718 } 1735 }
1719 xfs_dqunlock(gq); 1736 xfs_dqunlock(gq);
1720 lockflags = XFS_ILOCK_SHARED; 1737 lockflags = XFS_ILOCK_SHARED;
1721 xfs_ilock(ip, lockflags); 1738 xfs_ilock(ip, lockflags);
1722 } else { 1739 } else {
1723 ASSERT(ip->i_gdquot); 1740 ASSERT(ip->i_gdquot);
1724 gq = xfs_qm_dqhold(ip->i_gdquot); 1741 gq = xfs_qm_dqhold(ip->i_gdquot);
1725 } 1742 }
1726 } 1743 }
1727 if (uq) 1744 if (uq)
1728 trace_xfs_dquot_dqalloc(ip); 1745 trace_xfs_dquot_dqalloc(ip);
1729 1746
1730 xfs_iunlock(ip, lockflags); 1747 xfs_iunlock(ip, lockflags);
1731 if (O_udqpp) 1748 if (O_udqpp)
1732 *O_udqpp = uq; 1749 *O_udqpp = uq;
1733 else if (uq) 1750 else if (uq)
1734 xfs_qm_dqrele(uq); 1751 xfs_qm_dqrele(uq);
1735 if (O_gdqpp) 1752 if (O_gdqpp)
1736 *O_gdqpp = gq; 1753 *O_gdqpp = gq;
1737 else if (gq) 1754 else if (gq)
1738 xfs_qm_dqrele(gq); 1755 xfs_qm_dqrele(gq);
1739 return 0; 1756 return 0;
1740 } 1757 }
1741 1758
1742 /* 1759 /*
1743 * Actually transfer ownership, and do dquot modifications. 1760 * Actually transfer ownership, and do dquot modifications.
1744 * These were already reserved. 1761 * These were already reserved.
1745 */ 1762 */
1746 xfs_dquot_t * 1763 xfs_dquot_t *
1747 xfs_qm_vop_chown( 1764 xfs_qm_vop_chown(
1748 xfs_trans_t *tp, 1765 xfs_trans_t *tp,
1749 xfs_inode_t *ip, 1766 xfs_inode_t *ip,
1750 xfs_dquot_t **IO_olddq, 1767 xfs_dquot_t **IO_olddq,
1751 xfs_dquot_t *newdq) 1768 xfs_dquot_t *newdq)
1752 { 1769 {
1753 xfs_dquot_t *prevdq; 1770 xfs_dquot_t *prevdq;
1754 uint bfield = XFS_IS_REALTIME_INODE(ip) ? 1771 uint bfield = XFS_IS_REALTIME_INODE(ip) ?
1755 XFS_TRANS_DQ_RTBCOUNT : XFS_TRANS_DQ_BCOUNT; 1772 XFS_TRANS_DQ_RTBCOUNT : XFS_TRANS_DQ_BCOUNT;
1756 1773
1757 1774
1758 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); 1775 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
1759 ASSERT(XFS_IS_QUOTA_RUNNING(ip->i_mount)); 1776 ASSERT(XFS_IS_QUOTA_RUNNING(ip->i_mount));
1760 1777
1761 /* old dquot */ 1778 /* old dquot */
1762 prevdq = *IO_olddq; 1779 prevdq = *IO_olddq;
1763 ASSERT(prevdq); 1780 ASSERT(prevdq);
1764 ASSERT(prevdq != newdq); 1781 ASSERT(prevdq != newdq);
1765 1782
1766 xfs_trans_mod_dquot(tp, prevdq, bfield, -(ip->i_d.di_nblocks)); 1783 xfs_trans_mod_dquot(tp, prevdq, bfield, -(ip->i_d.di_nblocks));
1767 xfs_trans_mod_dquot(tp, prevdq, XFS_TRANS_DQ_ICOUNT, -1); 1784 xfs_trans_mod_dquot(tp, prevdq, XFS_TRANS_DQ_ICOUNT, -1);
1768 1785
1769 /* the sparkling new dquot */ 1786 /* the sparkling new dquot */
1770 xfs_trans_mod_dquot(tp, newdq, bfield, ip->i_d.di_nblocks); 1787 xfs_trans_mod_dquot(tp, newdq, bfield, ip->i_d.di_nblocks);
1771 xfs_trans_mod_dquot(tp, newdq, XFS_TRANS_DQ_ICOUNT, 1); 1788 xfs_trans_mod_dquot(tp, newdq, XFS_TRANS_DQ_ICOUNT, 1);
1772 1789
1773 /* 1790 /*
1774 * Take an extra reference, because the inode is going to keep 1791 * Take an extra reference, because the inode is going to keep
1775 * this dquot pointer even after the trans_commit. 1792 * this dquot pointer even after the trans_commit.
1776 */ 1793 */
1777 *IO_olddq = xfs_qm_dqhold(newdq); 1794 *IO_olddq = xfs_qm_dqhold(newdq);
1778 1795
1779 return prevdq; 1796 return prevdq;
1780 } 1797 }
1781 1798
1782 /* 1799 /*
1783 * Quota reservations for setattr(AT_UID|AT_GID|AT_PROJID). 1800 * Quota reservations for setattr(AT_UID|AT_GID|AT_PROJID).
1784 */ 1801 */
1785 int 1802 int
1786 xfs_qm_vop_chown_reserve( 1803 xfs_qm_vop_chown_reserve(
1787 xfs_trans_t *tp, 1804 xfs_trans_t *tp,
1788 xfs_inode_t *ip, 1805 xfs_inode_t *ip,
1789 xfs_dquot_t *udqp, 1806 xfs_dquot_t *udqp,
1790 xfs_dquot_t *gdqp, 1807 xfs_dquot_t *gdqp,
1791 uint flags) 1808 uint flags)
1792 { 1809 {
1793 xfs_mount_t *mp = ip->i_mount; 1810 xfs_mount_t *mp = ip->i_mount;
1794 uint delblks, blkflags, prjflags = 0; 1811 uint delblks, blkflags, prjflags = 0;
1795 xfs_dquot_t *unresudq, *unresgdq, *delblksudq, *delblksgdq; 1812 xfs_dquot_t *unresudq, *unresgdq, *delblksudq, *delblksgdq;
1796 int error; 1813 int error;
1797 1814
1798 1815
1799 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)); 1816 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
1800 ASSERT(XFS_IS_QUOTA_RUNNING(mp)); 1817 ASSERT(XFS_IS_QUOTA_RUNNING(mp));
1801 1818
1802 delblks = ip->i_delayed_blks; 1819 delblks = ip->i_delayed_blks;
1803 delblksudq = delblksgdq = unresudq = unresgdq = NULL; 1820 delblksudq = delblksgdq = unresudq = unresgdq = NULL;
1804 blkflags = XFS_IS_REALTIME_INODE(ip) ? 1821 blkflags = XFS_IS_REALTIME_INODE(ip) ?
1805 XFS_QMOPT_RES_RTBLKS : XFS_QMOPT_RES_REGBLKS; 1822 XFS_QMOPT_RES_RTBLKS : XFS_QMOPT_RES_REGBLKS;
1806 1823
1807 if (XFS_IS_UQUOTA_ON(mp) && udqp && 1824 if (XFS_IS_UQUOTA_ON(mp) && udqp &&
1808 ip->i_d.di_uid != (uid_t)be32_to_cpu(udqp->q_core.d_id)) { 1825 ip->i_d.di_uid != (uid_t)be32_to_cpu(udqp->q_core.d_id)) {
1809 delblksudq = udqp; 1826 delblksudq = udqp;
1810 /* 1827 /*
1811 * If there are delayed allocation blocks, then we have to 1828 * If there are delayed allocation blocks, then we have to
1812 * unreserve those from the old dquot, and add them to the 1829 * unreserve those from the old dquot, and add them to the
1813 * new dquot. 1830 * new dquot.
1814 */ 1831 */
1815 if (delblks) { 1832 if (delblks) {
1816 ASSERT(ip->i_udquot); 1833 ASSERT(ip->i_udquot);
1817 unresudq = ip->i_udquot; 1834 unresudq = ip->i_udquot;
1818 } 1835 }
1819 } 1836 }
1820 if (XFS_IS_OQUOTA_ON(ip->i_mount) && gdqp) { 1837 if (XFS_IS_OQUOTA_ON(ip->i_mount) && gdqp) {
1821 if (XFS_IS_PQUOTA_ON(ip->i_mount) && 1838 if (XFS_IS_PQUOTA_ON(ip->i_mount) &&
1822 xfs_get_projid(ip) != be32_to_cpu(gdqp->q_core.d_id)) 1839 xfs_get_projid(ip) != be32_to_cpu(gdqp->q_core.d_id))
1823 prjflags = XFS_QMOPT_ENOSPC; 1840 prjflags = XFS_QMOPT_ENOSPC;
1824 1841
1825 if (prjflags || 1842 if (prjflags ||
1826 (XFS_IS_GQUOTA_ON(ip->i_mount) && 1843 (XFS_IS_GQUOTA_ON(ip->i_mount) &&
1827 ip->i_d.di_gid != be32_to_cpu(gdqp->q_core.d_id))) { 1844 ip->i_d.di_gid != be32_to_cpu(gdqp->q_core.d_id))) {
1828 delblksgdq = gdqp; 1845 delblksgdq = gdqp;
1829 if (delblks) { 1846 if (delblks) {
1830 ASSERT(ip->i_gdquot); 1847 ASSERT(ip->i_gdquot);
1831 unresgdq = ip->i_gdquot; 1848 unresgdq = ip->i_gdquot;
1832 } 1849 }
1833 } 1850 }
1834 } 1851 }
1835 1852
1836 if ((error = xfs_trans_reserve_quota_bydquots(tp, ip->i_mount, 1853 if ((error = xfs_trans_reserve_quota_bydquots(tp, ip->i_mount,
1837 delblksudq, delblksgdq, ip->i_d.di_nblocks, 1, 1854 delblksudq, delblksgdq, ip->i_d.di_nblocks, 1,
1838 flags | blkflags | prjflags))) 1855 flags | blkflags | prjflags)))
1839 return (error); 1856 return (error);
1840 1857
1841 /* 1858 /*
1842 * Do the delayed blks reservations/unreservations now. Since, these 1859 * Do the delayed blks reservations/unreservations now. Since, these
1843 * are done without the help of a transaction, if a reservation fails 1860 * are done without the help of a transaction, if a reservation fails
1844 * its previous reservations won't be automatically undone by trans 1861 * its previous reservations won't be automatically undone by trans
1845 * code. So, we have to do it manually here. 1862 * code. So, we have to do it manually here.
1846 */ 1863 */
1847 if (delblks) { 1864 if (delblks) {
1848 /* 1865 /*
1849 * Do the reservations first. Unreservation can't fail. 1866 * Do the reservations first. Unreservation can't fail.
1850 */ 1867 */
1851 ASSERT(delblksudq || delblksgdq); 1868 ASSERT(delblksudq || delblksgdq);
1852 ASSERT(unresudq || unresgdq); 1869 ASSERT(unresudq || unresgdq);
1853 if ((error = xfs_trans_reserve_quota_bydquots(NULL, ip->i_mount, 1870 if ((error = xfs_trans_reserve_quota_bydquots(NULL, ip->i_mount,
1854 delblksudq, delblksgdq, (xfs_qcnt_t)delblks, 0, 1871 delblksudq, delblksgdq, (xfs_qcnt_t)delblks, 0,
1855 flags | blkflags | prjflags))) 1872 flags | blkflags | prjflags)))
1856 return (error); 1873 return (error);
1857 xfs_trans_reserve_quota_bydquots(NULL, ip->i_mount, 1874 xfs_trans_reserve_quota_bydquots(NULL, ip->i_mount,
1858 unresudq, unresgdq, -((xfs_qcnt_t)delblks), 0, 1875 unresudq, unresgdq, -((xfs_qcnt_t)delblks), 0,
1859 blkflags); 1876 blkflags);
1860 } 1877 }
1861 1878
1862 return (0); 1879 return (0);
1863 } 1880 }
1864 1881
1865 int 1882 int
1866 xfs_qm_vop_rename_dqattach( 1883 xfs_qm_vop_rename_dqattach(
1867 struct xfs_inode **i_tab) 1884 struct xfs_inode **i_tab)
1868 { 1885 {
1869 struct xfs_mount *mp = i_tab[0]->i_mount; 1886 struct xfs_mount *mp = i_tab[0]->i_mount;
1870 int i; 1887 int i;
1871 1888
1872 if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp)) 1889 if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp))
1873 return 0; 1890 return 0;
1874 1891
1875 for (i = 0; (i < 4 && i_tab[i]); i++) { 1892 for (i = 0; (i < 4 && i_tab[i]); i++) {
1876 struct xfs_inode *ip = i_tab[i]; 1893 struct xfs_inode *ip = i_tab[i];
1877 int error; 1894 int error;
1878 1895
1879 /* 1896 /*
1880 * Watch out for duplicate entries in the table. 1897 * Watch out for duplicate entries in the table.
1881 */ 1898 */
1882 if (i == 0 || ip != i_tab[i-1]) { 1899 if (i == 0 || ip != i_tab[i-1]) {
1883 if (XFS_NOT_DQATTACHED(mp, ip)) { 1900 if (XFS_NOT_DQATTACHED(mp, ip)) {
1884 error = xfs_qm_dqattach(ip, 0); 1901 error = xfs_qm_dqattach(ip, 0);
1885 if (error) 1902 if (error)
1886 return error; 1903 return error;
1887 } 1904 }
1888 } 1905 }
1889 } 1906 }
1890 return 0; 1907 return 0;
1891 } 1908 }
1892 1909
1893 void 1910 void
1894 xfs_qm_vop_create_dqattach( 1911 xfs_qm_vop_create_dqattach(
1895 struct xfs_trans *tp, 1912 struct xfs_trans *tp,
1896 struct xfs_inode *ip, 1913 struct xfs_inode *ip,
1897 struct xfs_dquot *udqp, 1914 struct xfs_dquot *udqp,
1898 struct xfs_dquot *gdqp) 1915 struct xfs_dquot *gdqp)
1899 { 1916 {
1900 struct xfs_mount *mp = tp->t_mountp; 1917 struct xfs_mount *mp = tp->t_mountp;
1901 1918
1902 if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp)) 1919 if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp))
1903 return; 1920 return;
1904 1921
1905 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); 1922 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
1906 ASSERT(XFS_IS_QUOTA_RUNNING(mp)); 1923 ASSERT(XFS_IS_QUOTA_RUNNING(mp));
1907 1924
1908 if (udqp) { 1925 if (udqp) {
1909 ASSERT(ip->i_udquot == NULL); 1926 ASSERT(ip->i_udquot == NULL);
1910 ASSERT(XFS_IS_UQUOTA_ON(mp)); 1927 ASSERT(XFS_IS_UQUOTA_ON(mp));
1911 ASSERT(ip->i_d.di_uid == be32_to_cpu(udqp->q_core.d_id)); 1928 ASSERT(ip->i_d.di_uid == be32_to_cpu(udqp->q_core.d_id));
1912 1929
1913 ip->i_udquot = xfs_qm_dqhold(udqp); 1930 ip->i_udquot = xfs_qm_dqhold(udqp);
1914 xfs_trans_mod_dquot(tp, udqp, XFS_TRANS_DQ_ICOUNT, 1); 1931 xfs_trans_mod_dquot(tp, udqp, XFS_TRANS_DQ_ICOUNT, 1);
1915 } 1932 }
1916 if (gdqp) { 1933 if (gdqp) {
1917 ASSERT(ip->i_gdquot == NULL); 1934 ASSERT(ip->i_gdquot == NULL);
1918 ASSERT(XFS_IS_OQUOTA_ON(mp)); 1935 ASSERT(XFS_IS_OQUOTA_ON(mp));
1919 ASSERT((XFS_IS_GQUOTA_ON(mp) ? 1936 ASSERT((XFS_IS_GQUOTA_ON(mp) ?
1920 ip->i_d.di_gid : xfs_get_projid(ip)) == 1937 ip->i_d.di_gid : xfs_get_projid(ip)) ==
1921 be32_to_cpu(gdqp->q_core.d_id)); 1938 be32_to_cpu(gdqp->q_core.d_id));
1922 1939
1923 ip->i_gdquot = xfs_qm_dqhold(gdqp); 1940 ip->i_gdquot = xfs_qm_dqhold(gdqp);
1924 xfs_trans_mod_dquot(tp, gdqp, XFS_TRANS_DQ_ICOUNT, 1); 1941 xfs_trans_mod_dquot(tp, gdqp, XFS_TRANS_DQ_ICOUNT, 1);
1925 } 1942 }
1926 } 1943 }
1927 1944
1 /* 1 /*
2 * Copyright (c) 2000-2005 Silicon Graphics, Inc. 2 * Copyright (c) 2000-2005 Silicon Graphics, Inc.
3 * All Rights Reserved. 3 * All Rights Reserved.
4 * 4 *
5 * This program is free software; you can redistribute it and/or 5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as 6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation. 7 * published by the Free Software Foundation.
8 * 8 *
9 * This program is distributed in the hope that it would be useful, 9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details. 12 * GNU General Public License for more details.
13 * 13 *
14 * You should have received a copy of the GNU General Public License 14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation, 15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */ 17 */
18 #ifndef __XFS_QM_H__ 18 #ifndef __XFS_QM_H__
19 #define __XFS_QM_H__ 19 #define __XFS_QM_H__
20 20
21 #include "xfs_dquot_item.h" 21 #include "xfs_dquot_item.h"
22 #include "xfs_dquot.h" 22 #include "xfs_dquot.h"
23 #include "xfs_quota_priv.h" 23 #include "xfs_quota_priv.h"
24 24
25 struct xfs_inode; 25 struct xfs_inode;
26 26
27 extern struct kmem_zone *xfs_qm_dqtrxzone; 27 extern struct kmem_zone *xfs_qm_dqtrxzone;
28 28
29 /* 29 /*
30 * This defines the unit of allocation of dquots. 30 * This defines the unit of allocation of dquots.
31 * Currently, it is just one file system block, and a 4K blk contains 30 31 * Currently, it is just one file system block, and a 4K blk contains 30
32 * (136 * 30 = 4080) dquots. It's probably not worth trying to make 32 * (136 * 30 = 4080) dquots. It's probably not worth trying to make
33 * this more dynamic. 33 * this more dynamic.
34 * XXXsup However, if this number is changed, we have to make sure that we don't 34 * XXXsup However, if this number is changed, we have to make sure that we don't
35 * implicitly assume that we do allocations in chunks of a single filesystem 35 * implicitly assume that we do allocations in chunks of a single filesystem
36 * block in the dquot/xqm code. 36 * block in the dquot/xqm code.
37 */ 37 */
38 #define XFS_DQUOT_CLUSTER_SIZE_FSB (xfs_filblks_t)1 38 #define XFS_DQUOT_CLUSTER_SIZE_FSB (xfs_filblks_t)1
39 39
40 /* 40 /*
41 * Various quota information for individual filesystems. 41 * Various quota information for individual filesystems.
42 * The mount structure keeps a pointer to this. 42 * The mount structure keeps a pointer to this.
43 */ 43 */
44 typedef struct xfs_quotainfo { 44 typedef struct xfs_quotainfo {
45 struct radix_tree_root qi_uquota_tree; 45 struct radix_tree_root qi_uquota_tree;
46 struct radix_tree_root qi_gquota_tree; 46 struct radix_tree_root qi_gquota_tree;
47 struct mutex qi_tree_lock; 47 struct mutex qi_tree_lock;
48 xfs_inode_t *qi_uquotaip; /* user quota inode */ 48 xfs_inode_t *qi_uquotaip; /* user quota inode */
49 xfs_inode_t *qi_gquotaip; /* group quota inode */ 49 xfs_inode_t *qi_gquotaip; /* group quota inode */
50 struct list_head qi_lru_list; 50 struct list_head qi_lru_list;
51 struct mutex qi_lru_lock; 51 struct mutex qi_lru_lock;
52 int qi_lru_count; 52 int qi_lru_count;
53 int qi_dquots; 53 int qi_dquots;
54 time_t qi_btimelimit; /* limit for blks timer */ 54 time_t qi_btimelimit; /* limit for blks timer */
55 time_t qi_itimelimit; /* limit for inodes timer */ 55 time_t qi_itimelimit; /* limit for inodes timer */
56 time_t qi_rtbtimelimit;/* limit for rt blks timer */ 56 time_t qi_rtbtimelimit;/* limit for rt blks timer */
57 xfs_qwarncnt_t qi_bwarnlimit; /* limit for blks warnings */ 57 xfs_qwarncnt_t qi_bwarnlimit; /* limit for blks warnings */
58 xfs_qwarncnt_t qi_iwarnlimit; /* limit for inodes warnings */ 58 xfs_qwarncnt_t qi_iwarnlimit; /* limit for inodes warnings */
59 xfs_qwarncnt_t qi_rtbwarnlimit;/* limit for rt blks warnings */ 59 xfs_qwarncnt_t qi_rtbwarnlimit;/* limit for rt blks warnings */
60 struct mutex qi_quotaofflock;/* to serialize quotaoff */ 60 struct mutex qi_quotaofflock;/* to serialize quotaoff */
61 xfs_filblks_t qi_dqchunklen; /* # BBs in a chunk of dqs */ 61 xfs_filblks_t qi_dqchunklen; /* # BBs in a chunk of dqs */
62 uint qi_dqperchunk; /* # ondisk dqs in above chunk */ 62 uint qi_dqperchunk; /* # ondisk dqs in above chunk */
63 xfs_qcnt_t qi_bhardlimit; /* default data blk hard limit */ 63 xfs_qcnt_t qi_bhardlimit; /* default data blk hard limit */
64 xfs_qcnt_t qi_bsoftlimit; /* default data blk soft limit */ 64 xfs_qcnt_t qi_bsoftlimit; /* default data blk soft limit */
65 xfs_qcnt_t qi_ihardlimit; /* default inode count hard limit */ 65 xfs_qcnt_t qi_ihardlimit; /* default inode count hard limit */
66 xfs_qcnt_t qi_isoftlimit; /* default inode count soft limit */ 66 xfs_qcnt_t qi_isoftlimit; /* default inode count soft limit */
67 xfs_qcnt_t qi_rtbhardlimit;/* default realtime blk hard limit */ 67 xfs_qcnt_t qi_rtbhardlimit;/* default realtime blk hard limit */
68 xfs_qcnt_t qi_rtbsoftlimit;/* default realtime blk soft limit */ 68 xfs_qcnt_t qi_rtbsoftlimit;/* default realtime blk soft limit */
69 struct shrinker qi_shrinker; 69 struct shrinker qi_shrinker;
70 } xfs_quotainfo_t; 70 } xfs_quotainfo_t;
71 71
72 #define XFS_DQUOT_TREE(qi, type) \ 72 #define XFS_DQUOT_TREE(qi, type) \
73 ((type & XFS_DQ_USER) ? \ 73 ((type & XFS_DQ_USER) ? \
74 &((qi)->qi_uquota_tree) : \ 74 &((qi)->qi_uquota_tree) : \
75 &((qi)->qi_gquota_tree)) 75 &((qi)->qi_gquota_tree))
76 76
77 77
78 extern int xfs_qm_calc_dquots_per_chunk(struct xfs_mount *mp,
79 unsigned int nbblks);
78 extern void xfs_trans_mod_dquot(xfs_trans_t *, xfs_dquot_t *, uint, long); 80 extern void xfs_trans_mod_dquot(xfs_trans_t *, xfs_dquot_t *, uint, long);
79 extern int xfs_trans_reserve_quota_bydquots(xfs_trans_t *, xfs_mount_t *, 81 extern int xfs_trans_reserve_quota_bydquots(xfs_trans_t *, xfs_mount_t *,
80 xfs_dquot_t *, xfs_dquot_t *, long, long, uint); 82 xfs_dquot_t *, xfs_dquot_t *, long, long, uint);
81 extern void xfs_trans_dqjoin(xfs_trans_t *, xfs_dquot_t *); 83 extern void xfs_trans_dqjoin(xfs_trans_t *, xfs_dquot_t *);
82 extern void xfs_trans_log_dquot(xfs_trans_t *, xfs_dquot_t *); 84 extern void xfs_trans_log_dquot(xfs_trans_t *, xfs_dquot_t *);
83 85
84 /* 86 /*
85 * We keep the usr and grp dquots separately so that locking will be easier 87 * We keep the usr and grp dquots separately so that locking will be easier
86 * to do at commit time. All transactions that we know of at this point 88 * to do at commit time. All transactions that we know of at this point
87 * affect no more than two dquots of one type. Hence, the TRANS_MAXDQS value. 89 * affect no more than two dquots of one type. Hence, the TRANS_MAXDQS value.
88 */ 90 */
89 #define XFS_QM_TRANS_MAXDQS 2 91 #define XFS_QM_TRANS_MAXDQS 2
90 typedef struct xfs_dquot_acct { 92 typedef struct xfs_dquot_acct {
91 xfs_dqtrx_t dqa_usrdquots[XFS_QM_TRANS_MAXDQS]; 93 xfs_dqtrx_t dqa_usrdquots[XFS_QM_TRANS_MAXDQS];
92 xfs_dqtrx_t dqa_grpdquots[XFS_QM_TRANS_MAXDQS]; 94 xfs_dqtrx_t dqa_grpdquots[XFS_QM_TRANS_MAXDQS];
93 } xfs_dquot_acct_t; 95 } xfs_dquot_acct_t;
94 96
95 /* 97 /*
96 * Users are allowed to have a usage exceeding their softlimit for 98 * Users are allowed to have a usage exceeding their softlimit for
97 * a period this long. 99 * a period this long.
98 */ 100 */
99 #define XFS_QM_BTIMELIMIT (7 * 24*60*60) /* 1 week */ 101 #define XFS_QM_BTIMELIMIT (7 * 24*60*60) /* 1 week */
100 #define XFS_QM_RTBTIMELIMIT (7 * 24*60*60) /* 1 week */ 102 #define XFS_QM_RTBTIMELIMIT (7 * 24*60*60) /* 1 week */
101 #define XFS_QM_ITIMELIMIT (7 * 24*60*60) /* 1 week */ 103 #define XFS_QM_ITIMELIMIT (7 * 24*60*60) /* 1 week */
102 104
103 #define XFS_QM_BWARNLIMIT 5 105 #define XFS_QM_BWARNLIMIT 5
104 #define XFS_QM_IWARNLIMIT 5 106 #define XFS_QM_IWARNLIMIT 5
105 #define XFS_QM_RTBWARNLIMIT 5 107 #define XFS_QM_RTBWARNLIMIT 5
106 108
107 extern void xfs_qm_destroy_quotainfo(xfs_mount_t *); 109 extern void xfs_qm_destroy_quotainfo(xfs_mount_t *);
108 extern int xfs_qm_quotacheck(xfs_mount_t *); 110 extern int xfs_qm_quotacheck(xfs_mount_t *);
109 extern int xfs_qm_write_sb_changes(xfs_mount_t *, __int64_t); 111 extern int xfs_qm_write_sb_changes(xfs_mount_t *, __int64_t);
110 112
111 /* dquot stuff */ 113 /* dquot stuff */
112 extern void xfs_qm_dqpurge_all(xfs_mount_t *, uint); 114 extern void xfs_qm_dqpurge_all(xfs_mount_t *, uint);
113 extern void xfs_qm_dqrele_all_inodes(xfs_mount_t *, uint); 115 extern void xfs_qm_dqrele_all_inodes(xfs_mount_t *, uint);
114 116
115 /* quota ops */ 117 /* quota ops */
116 extern int xfs_qm_scall_trunc_qfiles(xfs_mount_t *, uint); 118 extern int xfs_qm_scall_trunc_qfiles(xfs_mount_t *, uint);
117 extern int xfs_qm_scall_getquota(xfs_mount_t *, xfs_dqid_t, uint, 119 extern int xfs_qm_scall_getquota(xfs_mount_t *, xfs_dqid_t, uint,
118 fs_disk_quota_t *); 120 fs_disk_quota_t *);
119 extern int xfs_qm_scall_setqlim(struct xfs_mount *, xfs_dqid_t, uint, 121 extern int xfs_qm_scall_setqlim(struct xfs_mount *, xfs_dqid_t, uint,
120 fs_disk_quota_t *); 122 fs_disk_quota_t *);
121 extern int xfs_qm_scall_getqstat(xfs_mount_t *, fs_quota_stat_t *); 123 extern int xfs_qm_scall_getqstat(xfs_mount_t *, fs_quota_stat_t *);
122 extern int xfs_qm_scall_quotaon(xfs_mount_t *, uint); 124 extern int xfs_qm_scall_quotaon(xfs_mount_t *, uint);
123 extern int xfs_qm_scall_quotaoff(xfs_mount_t *, uint); 125 extern int xfs_qm_scall_quotaoff(xfs_mount_t *, uint);
124 126
125 #endif /* __XFS_QM_H__ */ 127 #endif /* __XFS_QM_H__ */
126 128
1 /* 1 /*
2 * Copyright (c) 2000-2005 Silicon Graphics, Inc. 2 * Copyright (c) 2000-2005 Silicon Graphics, Inc.
3 * All Rights Reserved. 3 * All Rights Reserved.
4 * 4 *
5 * This program is free software; you can redistribute it and/or 5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as 6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation. 7 * published by the Free Software Foundation.
8 * 8 *
9 * This program is distributed in the hope that it would be useful, 9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details. 12 * GNU General Public License for more details.
13 * 13 *
14 * You should have received a copy of the GNU General Public License 14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation, 15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */ 17 */
18 #ifndef __XFS_QUOTA_H__ 18 #ifndef __XFS_QUOTA_H__
19 #define __XFS_QUOTA_H__ 19 #define __XFS_QUOTA_H__
20 20
21 struct xfs_trans; 21 struct xfs_trans;
22 22
23 /* 23 /*
24 * The ondisk form of a dquot structure. 24 * The ondisk form of a dquot structure.
25 */ 25 */
26 #define XFS_DQUOT_MAGIC 0x4451 /* 'DQ' */ 26 #define XFS_DQUOT_MAGIC 0x4451 /* 'DQ' */
27 #define XFS_DQUOT_VERSION (u_int8_t)0x01 /* latest version number */ 27 #define XFS_DQUOT_VERSION (u_int8_t)0x01 /* latest version number */
28 28
29 /* 29 /*
30 * uid_t and gid_t are hard-coded to 32 bits in the inode. 30 * uid_t and gid_t are hard-coded to 32 bits in the inode.
31 * Hence, an 'id' in a dquot is 32 bits.. 31 * Hence, an 'id' in a dquot is 32 bits..
32 */ 32 */
33 typedef __uint32_t xfs_dqid_t; 33 typedef __uint32_t xfs_dqid_t;
34 34
35 /* 35 /*
36 * Even though users may not have quota limits occupying all 64-bits, 36 * Even though users may not have quota limits occupying all 64-bits,
37 * they may need 64-bit accounting. Hence, 64-bit quota-counters, 37 * they may need 64-bit accounting. Hence, 64-bit quota-counters,
38 * and quota-limits. This is a waste in the common case, but hey ... 38 * and quota-limits. This is a waste in the common case, but hey ...
39 */ 39 */
40 typedef __uint64_t xfs_qcnt_t; 40 typedef __uint64_t xfs_qcnt_t;
41 typedef __uint16_t xfs_qwarncnt_t; 41 typedef __uint16_t xfs_qwarncnt_t;
42 42
43 /* 43 /*
44 * This is the main portion of the on-disk representation of quota 44 * This is the main portion of the on-disk representation of quota
45 * information for a user. This is the q_core of the xfs_dquot_t that 45 * information for a user. This is the q_core of the xfs_dquot_t that
46 * is kept in kernel memory. We pad this with some more expansion room 46 * is kept in kernel memory. We pad this with some more expansion room
47 * to construct the on disk structure. 47 * to construct the on disk structure.
48 */ 48 */
49 typedef struct xfs_disk_dquot { 49 typedef struct xfs_disk_dquot {
50 __be16 d_magic; /* dquot magic = XFS_DQUOT_MAGIC */ 50 __be16 d_magic; /* dquot magic = XFS_DQUOT_MAGIC */
51 __u8 d_version; /* dquot version */ 51 __u8 d_version; /* dquot version */
52 __u8 d_flags; /* XFS_DQ_USER/PROJ/GROUP */ 52 __u8 d_flags; /* XFS_DQ_USER/PROJ/GROUP */
53 __be32 d_id; /* user,project,group id */ 53 __be32 d_id; /* user,project,group id */
54 __be64 d_blk_hardlimit;/* absolute limit on disk blks */ 54 __be64 d_blk_hardlimit;/* absolute limit on disk blks */
55 __be64 d_blk_softlimit;/* preferred limit on disk blks */ 55 __be64 d_blk_softlimit;/* preferred limit on disk blks */
56 __be64 d_ino_hardlimit;/* maximum # allocated inodes */ 56 __be64 d_ino_hardlimit;/* maximum # allocated inodes */
57 __be64 d_ino_softlimit;/* preferred inode limit */ 57 __be64 d_ino_softlimit;/* preferred inode limit */
58 __be64 d_bcount; /* disk blocks owned by the user */ 58 __be64 d_bcount; /* disk blocks owned by the user */
59 __be64 d_icount; /* inodes owned by the user */ 59 __be64 d_icount; /* inodes owned by the user */
60 __be32 d_itimer; /* zero if within inode limits if not, 60 __be32 d_itimer; /* zero if within inode limits if not,
61 this is when we refuse service */ 61 this is when we refuse service */
62 __be32 d_btimer; /* similar to above; for disk blocks */ 62 __be32 d_btimer; /* similar to above; for disk blocks */
63 __be16 d_iwarns; /* warnings issued wrt num inodes */ 63 __be16 d_iwarns; /* warnings issued wrt num inodes */
64 __be16 d_bwarns; /* warnings issued wrt disk blocks */ 64 __be16 d_bwarns; /* warnings issued wrt disk blocks */
65 __be32 d_pad0; /* 64 bit align */ 65 __be32 d_pad0; /* 64 bit align */
66 __be64 d_rtb_hardlimit;/* absolute limit on realtime blks */ 66 __be64 d_rtb_hardlimit;/* absolute limit on realtime blks */
67 __be64 d_rtb_softlimit;/* preferred limit on RT disk blks */ 67 __be64 d_rtb_softlimit;/* preferred limit on RT disk blks */
68 __be64 d_rtbcount; /* realtime blocks owned */ 68 __be64 d_rtbcount; /* realtime blocks owned */
69 __be32 d_rtbtimer; /* similar to above; for RT disk blocks */ 69 __be32 d_rtbtimer; /* similar to above; for RT disk blocks */
70 __be16 d_rtbwarns; /* warnings issued wrt RT disk blocks */ 70 __be16 d_rtbwarns; /* warnings issued wrt RT disk blocks */
71 __be16 d_pad; 71 __be16 d_pad;
72 } xfs_disk_dquot_t; 72 } xfs_disk_dquot_t;
73 73
74 /* 74 /*
75 * This is what goes on disk. This is separated from the xfs_disk_dquot because 75 * This is what goes on disk. This is separated from the xfs_disk_dquot because
76 * carrying the unnecessary padding would be a waste of memory. 76 * carrying the unnecessary padding would be a waste of memory.
77 */ 77 */
78 typedef struct xfs_dqblk { 78 typedef struct xfs_dqblk {
79 xfs_disk_dquot_t dd_diskdq; /* portion that lives incore as well */ 79 xfs_disk_dquot_t dd_diskdq; /* portion that lives incore as well */
80 char dd_fill[32]; /* filling for posterity */ 80 char dd_fill[4]; /* filling for posterity */
81
82 /*
83 * These two are only present on filesystems with the CRC bits set.
84 */
85 __be32 dd_crc; /* checksum */
86 __be64 dd_lsn; /* last modification in log */
87 uuid_t dd_uuid; /* location information */
81 } xfs_dqblk_t; 88 } xfs_dqblk_t;
82 89
83 /* 90 /*
84 * flags for q_flags field in the dquot. 91 * flags for q_flags field in the dquot.
85 */ 92 */
86 #define XFS_DQ_USER 0x0001 /* a user quota */ 93 #define XFS_DQ_USER 0x0001 /* a user quota */
87 #define XFS_DQ_PROJ 0x0002 /* project quota */ 94 #define XFS_DQ_PROJ 0x0002 /* project quota */
88 #define XFS_DQ_GROUP 0x0004 /* a group quota */ 95 #define XFS_DQ_GROUP 0x0004 /* a group quota */
89 #define XFS_DQ_DIRTY 0x0008 /* dquot is dirty */ 96 #define XFS_DQ_DIRTY 0x0008 /* dquot is dirty */
90 #define XFS_DQ_FREEING 0x0010 /* dquot is beeing torn down */ 97 #define XFS_DQ_FREEING 0x0010 /* dquot is beeing torn down */
91 98
92 #define XFS_DQ_ALLTYPES (XFS_DQ_USER|XFS_DQ_PROJ|XFS_DQ_GROUP) 99 #define XFS_DQ_ALLTYPES (XFS_DQ_USER|XFS_DQ_PROJ|XFS_DQ_GROUP)
93 100
94 #define XFS_DQ_FLAGS \ 101 #define XFS_DQ_FLAGS \
95 { XFS_DQ_USER, "USER" }, \ 102 { XFS_DQ_USER, "USER" }, \
96 { XFS_DQ_PROJ, "PROJ" }, \ 103 { XFS_DQ_PROJ, "PROJ" }, \
97 { XFS_DQ_GROUP, "GROUP" }, \ 104 { XFS_DQ_GROUP, "GROUP" }, \
98 { XFS_DQ_DIRTY, "DIRTY" }, \ 105 { XFS_DQ_DIRTY, "DIRTY" }, \
99 { XFS_DQ_FREEING, "FREEING" } 106 { XFS_DQ_FREEING, "FREEING" }
100 107
101 /* 108 /*
102 * In the worst case, when both user and group quotas are on, 109 * In the worst case, when both user and group quotas are on,
103 * we can have a max of three dquots changing in a single transaction. 110 * we can have a max of three dquots changing in a single transaction.
104 */ 111 */
105 #define XFS_DQUOT_LOGRES(mp) (sizeof(xfs_disk_dquot_t) * 3) 112 #define XFS_DQUOT_LOGRES(mp) (sizeof(xfs_disk_dquot_t) * 3)
106 113
107 114
108 /* 115 /*
109 * These are the structures used to lay out dquots and quotaoff 116 * These are the structures used to lay out dquots and quotaoff
110 * records on the log. Quite similar to those of inodes. 117 * records on the log. Quite similar to those of inodes.
111 */ 118 */
112 119
113 /* 120 /*
114 * log format struct for dquots. 121 * log format struct for dquots.
115 * The first two fields must be the type and size fitting into 122 * The first two fields must be the type and size fitting into
116 * 32 bits : log_recovery code assumes that. 123 * 32 bits : log_recovery code assumes that.
117 */ 124 */
118 typedef struct xfs_dq_logformat { 125 typedef struct xfs_dq_logformat {
119 __uint16_t qlf_type; /* dquot log item type */ 126 __uint16_t qlf_type; /* dquot log item type */
120 __uint16_t qlf_size; /* size of this item */ 127 __uint16_t qlf_size; /* size of this item */
121 xfs_dqid_t qlf_id; /* usr/grp/proj id : 32 bits */ 128 xfs_dqid_t qlf_id; /* usr/grp/proj id : 32 bits */
122 __int64_t qlf_blkno; /* blkno of dquot buffer */ 129 __int64_t qlf_blkno; /* blkno of dquot buffer */
123 __int32_t qlf_len; /* len of dquot buffer */ 130 __int32_t qlf_len; /* len of dquot buffer */
124 __uint32_t qlf_boffset; /* off of dquot in buffer */ 131 __uint32_t qlf_boffset; /* off of dquot in buffer */
125 } xfs_dq_logformat_t; 132 } xfs_dq_logformat_t;
126 133
127 /* 134 /*
128 * log format struct for QUOTAOFF records. 135 * log format struct for QUOTAOFF records.
129 * The first two fields must be the type and size fitting into 136 * The first two fields must be the type and size fitting into
130 * 32 bits : log_recovery code assumes that. 137 * 32 bits : log_recovery code assumes that.
131 * We write two LI_QUOTAOFF logitems per quotaoff, the last one keeps a pointer 138 * We write two LI_QUOTAOFF logitems per quotaoff, the last one keeps a pointer
132 * to the first and ensures that the first logitem is taken out of the AIL 139 * to the first and ensures that the first logitem is taken out of the AIL
133 * only when the last one is securely committed. 140 * only when the last one is securely committed.
134 */ 141 */
135 typedef struct xfs_qoff_logformat { 142 typedef struct xfs_qoff_logformat {
136 unsigned short qf_type; /* quotaoff log item type */ 143 unsigned short qf_type; /* quotaoff log item type */
137 unsigned short qf_size; /* size of this item */ 144 unsigned short qf_size; /* size of this item */
138 unsigned int qf_flags; /* USR and/or GRP */ 145 unsigned int qf_flags; /* USR and/or GRP */
139 char qf_pad[12]; /* padding for future */ 146 char qf_pad[12]; /* padding for future */
140 } xfs_qoff_logformat_t; 147 } xfs_qoff_logformat_t;
141 148
142 149
143 /* 150 /*
144 * Disk quotas status in m_qflags, and also sb_qflags. 16 bits. 151 * Disk quotas status in m_qflags, and also sb_qflags. 16 bits.
145 */ 152 */
146 #define XFS_UQUOTA_ACCT 0x0001 /* user quota accounting ON */ 153 #define XFS_UQUOTA_ACCT 0x0001 /* user quota accounting ON */
147 #define XFS_UQUOTA_ENFD 0x0002 /* user quota limits enforced */ 154 #define XFS_UQUOTA_ENFD 0x0002 /* user quota limits enforced */
148 #define XFS_UQUOTA_CHKD 0x0004 /* quotacheck run on usr quotas */ 155 #define XFS_UQUOTA_CHKD 0x0004 /* quotacheck run on usr quotas */
149 #define XFS_PQUOTA_ACCT 0x0008 /* project quota accounting ON */ 156 #define XFS_PQUOTA_ACCT 0x0008 /* project quota accounting ON */
150 #define XFS_OQUOTA_ENFD 0x0010 /* other (grp/prj) quota limits enforced */ 157 #define XFS_OQUOTA_ENFD 0x0010 /* other (grp/prj) quota limits enforced */
151 #define XFS_OQUOTA_CHKD 0x0020 /* quotacheck run on other (grp/prj) quotas */ 158 #define XFS_OQUOTA_CHKD 0x0020 /* quotacheck run on other (grp/prj) quotas */
152 #define XFS_GQUOTA_ACCT 0x0040 /* group quota accounting ON */ 159 #define XFS_GQUOTA_ACCT 0x0040 /* group quota accounting ON */
153 160
154 /* 161 /*
155 * Quota Accounting/Enforcement flags 162 * Quota Accounting/Enforcement flags
156 */ 163 */
157 #define XFS_ALL_QUOTA_ACCT \ 164 #define XFS_ALL_QUOTA_ACCT \
158 (XFS_UQUOTA_ACCT | XFS_GQUOTA_ACCT | XFS_PQUOTA_ACCT) 165 (XFS_UQUOTA_ACCT | XFS_GQUOTA_ACCT | XFS_PQUOTA_ACCT)
159 #define XFS_ALL_QUOTA_ENFD (XFS_UQUOTA_ENFD | XFS_OQUOTA_ENFD) 166 #define XFS_ALL_QUOTA_ENFD (XFS_UQUOTA_ENFD | XFS_OQUOTA_ENFD)
160 #define XFS_ALL_QUOTA_CHKD (XFS_UQUOTA_CHKD | XFS_OQUOTA_CHKD) 167 #define XFS_ALL_QUOTA_CHKD (XFS_UQUOTA_CHKD | XFS_OQUOTA_CHKD)
161 168
162 #define XFS_IS_QUOTA_RUNNING(mp) ((mp)->m_qflags & XFS_ALL_QUOTA_ACCT) 169 #define XFS_IS_QUOTA_RUNNING(mp) ((mp)->m_qflags & XFS_ALL_QUOTA_ACCT)
163 #define XFS_IS_UQUOTA_RUNNING(mp) ((mp)->m_qflags & XFS_UQUOTA_ACCT) 170 #define XFS_IS_UQUOTA_RUNNING(mp) ((mp)->m_qflags & XFS_UQUOTA_ACCT)
164 #define XFS_IS_PQUOTA_RUNNING(mp) ((mp)->m_qflags & XFS_PQUOTA_ACCT) 171 #define XFS_IS_PQUOTA_RUNNING(mp) ((mp)->m_qflags & XFS_PQUOTA_ACCT)
165 #define XFS_IS_GQUOTA_RUNNING(mp) ((mp)->m_qflags & XFS_GQUOTA_ACCT) 172 #define XFS_IS_GQUOTA_RUNNING(mp) ((mp)->m_qflags & XFS_GQUOTA_ACCT)
166 #define XFS_IS_UQUOTA_ENFORCED(mp) ((mp)->m_qflags & XFS_UQUOTA_ENFD) 173 #define XFS_IS_UQUOTA_ENFORCED(mp) ((mp)->m_qflags & XFS_UQUOTA_ENFD)
167 #define XFS_IS_OQUOTA_ENFORCED(mp) ((mp)->m_qflags & XFS_OQUOTA_ENFD) 174 #define XFS_IS_OQUOTA_ENFORCED(mp) ((mp)->m_qflags & XFS_OQUOTA_ENFD)
168 175
169 /* 176 /*
170 * Incore only flags for quotaoff - these bits get cleared when quota(s) 177 * Incore only flags for quotaoff - these bits get cleared when quota(s)
171 * are in the process of getting turned off. These flags are in m_qflags but 178 * are in the process of getting turned off. These flags are in m_qflags but
172 * never in sb_qflags. 179 * never in sb_qflags.
173 */ 180 */
174 #define XFS_UQUOTA_ACTIVE 0x0100 /* uquotas are being turned off */ 181 #define XFS_UQUOTA_ACTIVE 0x0100 /* uquotas are being turned off */
175 #define XFS_PQUOTA_ACTIVE 0x0200 /* pquotas are being turned off */ 182 #define XFS_PQUOTA_ACTIVE 0x0200 /* pquotas are being turned off */
176 #define XFS_GQUOTA_ACTIVE 0x0400 /* gquotas are being turned off */ 183 #define XFS_GQUOTA_ACTIVE 0x0400 /* gquotas are being turned off */
177 #define XFS_ALL_QUOTA_ACTIVE \ 184 #define XFS_ALL_QUOTA_ACTIVE \
178 (XFS_UQUOTA_ACTIVE | XFS_PQUOTA_ACTIVE | XFS_GQUOTA_ACTIVE) 185 (XFS_UQUOTA_ACTIVE | XFS_PQUOTA_ACTIVE | XFS_GQUOTA_ACTIVE)
179 186
180 /* 187 /*
181 * Checking XFS_IS_*QUOTA_ON() while holding any inode lock guarantees 188 * Checking XFS_IS_*QUOTA_ON() while holding any inode lock guarantees
182 * quota will be not be switched off as long as that inode lock is held. 189 * quota will be not be switched off as long as that inode lock is held.
183 */ 190 */
184 #define XFS_IS_QUOTA_ON(mp) ((mp)->m_qflags & (XFS_UQUOTA_ACTIVE | \ 191 #define XFS_IS_QUOTA_ON(mp) ((mp)->m_qflags & (XFS_UQUOTA_ACTIVE | \
185 XFS_GQUOTA_ACTIVE | \ 192 XFS_GQUOTA_ACTIVE | \
186 XFS_PQUOTA_ACTIVE)) 193 XFS_PQUOTA_ACTIVE))
187 #define XFS_IS_OQUOTA_ON(mp) ((mp)->m_qflags & (XFS_GQUOTA_ACTIVE | \ 194 #define XFS_IS_OQUOTA_ON(mp) ((mp)->m_qflags & (XFS_GQUOTA_ACTIVE | \
188 XFS_PQUOTA_ACTIVE)) 195 XFS_PQUOTA_ACTIVE))
189 #define XFS_IS_UQUOTA_ON(mp) ((mp)->m_qflags & XFS_UQUOTA_ACTIVE) 196 #define XFS_IS_UQUOTA_ON(mp) ((mp)->m_qflags & XFS_UQUOTA_ACTIVE)
190 #define XFS_IS_GQUOTA_ON(mp) ((mp)->m_qflags & XFS_GQUOTA_ACTIVE) 197 #define XFS_IS_GQUOTA_ON(mp) ((mp)->m_qflags & XFS_GQUOTA_ACTIVE)
191 #define XFS_IS_PQUOTA_ON(mp) ((mp)->m_qflags & XFS_PQUOTA_ACTIVE) 198 #define XFS_IS_PQUOTA_ON(mp) ((mp)->m_qflags & XFS_PQUOTA_ACTIVE)
192 199
193 /* 200 /*
194 * Flags to tell various functions what to do. Not all of these are meaningful 201 * Flags to tell various functions what to do. Not all of these are meaningful
195 * to a single function. None of these XFS_QMOPT_* flags are meant to have 202 * to a single function. None of these XFS_QMOPT_* flags are meant to have
196 * persistent values (ie. their values can and will change between versions) 203 * persistent values (ie. their values can and will change between versions)
197 */ 204 */
198 #define XFS_QMOPT_DQALLOC 0x0000002 /* alloc dquot ondisk if needed */ 205 #define XFS_QMOPT_DQALLOC 0x0000002 /* alloc dquot ondisk if needed */
199 #define XFS_QMOPT_UQUOTA 0x0000004 /* user dquot requested */ 206 #define XFS_QMOPT_UQUOTA 0x0000004 /* user dquot requested */
200 #define XFS_QMOPT_PQUOTA 0x0000008 /* project dquot requested */ 207 #define XFS_QMOPT_PQUOTA 0x0000008 /* project dquot requested */
201 #define XFS_QMOPT_FORCE_RES 0x0000010 /* ignore quota limits */ 208 #define XFS_QMOPT_FORCE_RES 0x0000010 /* ignore quota limits */
202 #define XFS_QMOPT_SBVERSION 0x0000040 /* change superblock version num */ 209 #define XFS_QMOPT_SBVERSION 0x0000040 /* change superblock version num */
203 #define XFS_QMOPT_DOWARN 0x0000400 /* increase warning cnt if needed */ 210 #define XFS_QMOPT_DOWARN 0x0000400 /* increase warning cnt if needed */
204 #define XFS_QMOPT_DQREPAIR 0x0001000 /* repair dquot if damaged */ 211 #define XFS_QMOPT_DQREPAIR 0x0001000 /* repair dquot if damaged */
205 #define XFS_QMOPT_GQUOTA 0x0002000 /* group dquot requested */ 212 #define XFS_QMOPT_GQUOTA 0x0002000 /* group dquot requested */
206 #define XFS_QMOPT_ENOSPC 0x0004000 /* enospc instead of edquot (prj) */ 213 #define XFS_QMOPT_ENOSPC 0x0004000 /* enospc instead of edquot (prj) */
207 214
208 /* 215 /*
209 * flags to xfs_trans_mod_dquot to indicate which field needs to be 216 * flags to xfs_trans_mod_dquot to indicate which field needs to be
210 * modified. 217 * modified.
211 */ 218 */
212 #define XFS_QMOPT_RES_REGBLKS 0x0010000 219 #define XFS_QMOPT_RES_REGBLKS 0x0010000
213 #define XFS_QMOPT_RES_RTBLKS 0x0020000 220 #define XFS_QMOPT_RES_RTBLKS 0x0020000
214 #define XFS_QMOPT_BCOUNT 0x0040000 221 #define XFS_QMOPT_BCOUNT 0x0040000
215 #define XFS_QMOPT_ICOUNT 0x0080000 222 #define XFS_QMOPT_ICOUNT 0x0080000
216 #define XFS_QMOPT_RTBCOUNT 0x0100000 223 #define XFS_QMOPT_RTBCOUNT 0x0100000
217 #define XFS_QMOPT_DELBCOUNT 0x0200000 224 #define XFS_QMOPT_DELBCOUNT 0x0200000
218 #define XFS_QMOPT_DELRTBCOUNT 0x0400000 225 #define XFS_QMOPT_DELRTBCOUNT 0x0400000
219 #define XFS_QMOPT_RES_INOS 0x0800000 226 #define XFS_QMOPT_RES_INOS 0x0800000
220 227
221 /* 228 /*
222 * flags for dqalloc. 229 * flags for dqalloc.
223 */ 230 */
224 #define XFS_QMOPT_INHERIT 0x1000000 231 #define XFS_QMOPT_INHERIT 0x1000000
225 232
226 /* 233 /*
227 * flags to xfs_trans_mod_dquot. 234 * flags to xfs_trans_mod_dquot.
228 */ 235 */
229 #define XFS_TRANS_DQ_RES_BLKS XFS_QMOPT_RES_REGBLKS 236 #define XFS_TRANS_DQ_RES_BLKS XFS_QMOPT_RES_REGBLKS
230 #define XFS_TRANS_DQ_RES_RTBLKS XFS_QMOPT_RES_RTBLKS 237 #define XFS_TRANS_DQ_RES_RTBLKS XFS_QMOPT_RES_RTBLKS
231 #define XFS_TRANS_DQ_RES_INOS XFS_QMOPT_RES_INOS 238 #define XFS_TRANS_DQ_RES_INOS XFS_QMOPT_RES_INOS
232 #define XFS_TRANS_DQ_BCOUNT XFS_QMOPT_BCOUNT 239 #define XFS_TRANS_DQ_BCOUNT XFS_QMOPT_BCOUNT
233 #define XFS_TRANS_DQ_DELBCOUNT XFS_QMOPT_DELBCOUNT 240 #define XFS_TRANS_DQ_DELBCOUNT XFS_QMOPT_DELBCOUNT
234 #define XFS_TRANS_DQ_ICOUNT XFS_QMOPT_ICOUNT 241 #define XFS_TRANS_DQ_ICOUNT XFS_QMOPT_ICOUNT
235 #define XFS_TRANS_DQ_RTBCOUNT XFS_QMOPT_RTBCOUNT 242 #define XFS_TRANS_DQ_RTBCOUNT XFS_QMOPT_RTBCOUNT
236 #define XFS_TRANS_DQ_DELRTBCOUNT XFS_QMOPT_DELRTBCOUNT 243 #define XFS_TRANS_DQ_DELRTBCOUNT XFS_QMOPT_DELRTBCOUNT
237 244
238 245
239 #define XFS_QMOPT_QUOTALL \ 246 #define XFS_QMOPT_QUOTALL \
240 (XFS_QMOPT_UQUOTA | XFS_QMOPT_PQUOTA | XFS_QMOPT_GQUOTA) 247 (XFS_QMOPT_UQUOTA | XFS_QMOPT_PQUOTA | XFS_QMOPT_GQUOTA)
241 #define XFS_QMOPT_RESBLK_MASK (XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_RES_RTBLKS) 248 #define XFS_QMOPT_RESBLK_MASK (XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_RES_RTBLKS)
242 249
243 #ifdef __KERNEL__ 250 #ifdef __KERNEL__
244 /* 251 /*
245 * This check is done typically without holding the inode lock; 252 * This check is done typically without holding the inode lock;
246 * that may seem racy, but it is harmless in the context that it is used. 253 * that may seem racy, but it is harmless in the context that it is used.
247 * The inode cannot go inactive as long a reference is kept, and 254 * The inode cannot go inactive as long a reference is kept, and
248 * therefore if dquot(s) were attached, they'll stay consistent. 255 * therefore if dquot(s) were attached, they'll stay consistent.
249 * If, for example, the ownership of the inode changes while 256 * If, for example, the ownership of the inode changes while
250 * we didn't have the inode locked, the appropriate dquot(s) will be 257 * we didn't have the inode locked, the appropriate dquot(s) will be
251 * attached atomically. 258 * attached atomically.
252 */ 259 */
253 #define XFS_NOT_DQATTACHED(mp, ip) ((XFS_IS_UQUOTA_ON(mp) &&\ 260 #define XFS_NOT_DQATTACHED(mp, ip) ((XFS_IS_UQUOTA_ON(mp) &&\
254 (ip)->i_udquot == NULL) || \ 261 (ip)->i_udquot == NULL) || \
255 (XFS_IS_OQUOTA_ON(mp) && \ 262 (XFS_IS_OQUOTA_ON(mp) && \
256 (ip)->i_gdquot == NULL)) 263 (ip)->i_gdquot == NULL))
257 264
258 #define XFS_QM_NEED_QUOTACHECK(mp) \ 265 #define XFS_QM_NEED_QUOTACHECK(mp) \
259 ((XFS_IS_UQUOTA_ON(mp) && \ 266 ((XFS_IS_UQUOTA_ON(mp) && \
260 (mp->m_sb.sb_qflags & XFS_UQUOTA_CHKD) == 0) || \ 267 (mp->m_sb.sb_qflags & XFS_UQUOTA_CHKD) == 0) || \
261 (XFS_IS_GQUOTA_ON(mp) && \ 268 (XFS_IS_GQUOTA_ON(mp) && \
262 ((mp->m_sb.sb_qflags & XFS_OQUOTA_CHKD) == 0 || \ 269 ((mp->m_sb.sb_qflags & XFS_OQUOTA_CHKD) == 0 || \
263 (mp->m_sb.sb_qflags & XFS_PQUOTA_ACCT))) || \ 270 (mp->m_sb.sb_qflags & XFS_PQUOTA_ACCT))) || \
264 (XFS_IS_PQUOTA_ON(mp) && \ 271 (XFS_IS_PQUOTA_ON(mp) && \
265 ((mp->m_sb.sb_qflags & XFS_OQUOTA_CHKD) == 0 || \ 272 ((mp->m_sb.sb_qflags & XFS_OQUOTA_CHKD) == 0 || \
266 (mp->m_sb.sb_qflags & XFS_GQUOTA_ACCT)))) 273 (mp->m_sb.sb_qflags & XFS_GQUOTA_ACCT))))
267 274
268 #define XFS_MOUNT_QUOTA_SET1 (XFS_UQUOTA_ACCT|XFS_UQUOTA_ENFD|\ 275 #define XFS_MOUNT_QUOTA_SET1 (XFS_UQUOTA_ACCT|XFS_UQUOTA_ENFD|\
269 XFS_UQUOTA_CHKD|XFS_PQUOTA_ACCT|\ 276 XFS_UQUOTA_CHKD|XFS_PQUOTA_ACCT|\
270 XFS_OQUOTA_ENFD|XFS_OQUOTA_CHKD) 277 XFS_OQUOTA_ENFD|XFS_OQUOTA_CHKD)
271 278
272 #define XFS_MOUNT_QUOTA_SET2 (XFS_UQUOTA_ACCT|XFS_UQUOTA_ENFD|\ 279 #define XFS_MOUNT_QUOTA_SET2 (XFS_UQUOTA_ACCT|XFS_UQUOTA_ENFD|\
273 XFS_UQUOTA_CHKD|XFS_GQUOTA_ACCT|\ 280 XFS_UQUOTA_CHKD|XFS_GQUOTA_ACCT|\
274 XFS_OQUOTA_ENFD|XFS_OQUOTA_CHKD) 281 XFS_OQUOTA_ENFD|XFS_OQUOTA_CHKD)
275 282
276 #define XFS_MOUNT_QUOTA_ALL (XFS_UQUOTA_ACCT|XFS_UQUOTA_ENFD|\ 283 #define XFS_MOUNT_QUOTA_ALL (XFS_UQUOTA_ACCT|XFS_UQUOTA_ENFD|\
277 XFS_UQUOTA_CHKD|XFS_PQUOTA_ACCT|\ 284 XFS_UQUOTA_CHKD|XFS_PQUOTA_ACCT|\
278 XFS_OQUOTA_ENFD|XFS_OQUOTA_CHKD|\ 285 XFS_OQUOTA_ENFD|XFS_OQUOTA_CHKD|\
279 XFS_GQUOTA_ACCT) 286 XFS_GQUOTA_ACCT)
280 287
281 288
282 /* 289 /*
283 * The structure kept inside the xfs_trans_t keep track of dquot changes 290 * The structure kept inside the xfs_trans_t keep track of dquot changes
284 * within a transaction and apply them later. 291 * within a transaction and apply them later.
285 */ 292 */
286 typedef struct xfs_dqtrx { 293 typedef struct xfs_dqtrx {
287 struct xfs_dquot *qt_dquot; /* the dquot this refers to */ 294 struct xfs_dquot *qt_dquot; /* the dquot this refers to */
288 ulong qt_blk_res; /* blks reserved on a dquot */ 295 ulong qt_blk_res; /* blks reserved on a dquot */
289 ulong qt_blk_res_used; /* blks used from the reservation */ 296 ulong qt_blk_res_used; /* blks used from the reservation */
290 ulong qt_ino_res; /* inode reserved on a dquot */ 297 ulong qt_ino_res; /* inode reserved on a dquot */
291 ulong qt_ino_res_used; /* inodes used from the reservation */ 298 ulong qt_ino_res_used; /* inodes used from the reservation */
292 long qt_bcount_delta; /* dquot blk count changes */ 299 long qt_bcount_delta; /* dquot blk count changes */
293 long qt_delbcnt_delta; /* delayed dquot blk count changes */ 300 long qt_delbcnt_delta; /* delayed dquot blk count changes */
294 long qt_icount_delta; /* dquot inode count changes */ 301 long qt_icount_delta; /* dquot inode count changes */
295 ulong qt_rtblk_res; /* # blks reserved on a dquot */ 302 ulong qt_rtblk_res; /* # blks reserved on a dquot */
296 ulong qt_rtblk_res_used;/* # blks used from reservation */ 303 ulong qt_rtblk_res_used;/* # blks used from reservation */
297 long qt_rtbcount_delta;/* dquot realtime blk changes */ 304 long qt_rtbcount_delta;/* dquot realtime blk changes */
298 long qt_delrtb_delta; /* delayed RT blk count changes */ 305 long qt_delrtb_delta; /* delayed RT blk count changes */
299 } xfs_dqtrx_t; 306 } xfs_dqtrx_t;
300 307
301 #ifdef CONFIG_XFS_QUOTA 308 #ifdef CONFIG_XFS_QUOTA
302 extern void xfs_trans_dup_dqinfo(struct xfs_trans *, struct xfs_trans *); 309 extern void xfs_trans_dup_dqinfo(struct xfs_trans *, struct xfs_trans *);
303 extern void xfs_trans_free_dqinfo(struct xfs_trans *); 310 extern void xfs_trans_free_dqinfo(struct xfs_trans *);
304 extern void xfs_trans_mod_dquot_byino(struct xfs_trans *, struct xfs_inode *, 311 extern void xfs_trans_mod_dquot_byino(struct xfs_trans *, struct xfs_inode *,
305 uint, long); 312 uint, long);
306 extern void xfs_trans_apply_dquot_deltas(struct xfs_trans *); 313 extern void xfs_trans_apply_dquot_deltas(struct xfs_trans *);
307 extern void xfs_trans_unreserve_and_mod_dquots(struct xfs_trans *); 314 extern void xfs_trans_unreserve_and_mod_dquots(struct xfs_trans *);
308 extern int xfs_trans_reserve_quota_nblks(struct xfs_trans *, 315 extern int xfs_trans_reserve_quota_nblks(struct xfs_trans *,
309 struct xfs_inode *, long, long, uint); 316 struct xfs_inode *, long, long, uint);
310 extern int xfs_trans_reserve_quota_bydquots(struct xfs_trans *, 317 extern int xfs_trans_reserve_quota_bydquots(struct xfs_trans *,
311 struct xfs_mount *, struct xfs_dquot *, 318 struct xfs_mount *, struct xfs_dquot *,
312 struct xfs_dquot *, long, long, uint); 319 struct xfs_dquot *, long, long, uint);
313 320
314 extern int xfs_qm_vop_dqalloc(struct xfs_inode *, uid_t, gid_t, prid_t, uint, 321 extern int xfs_qm_vop_dqalloc(struct xfs_inode *, uid_t, gid_t, prid_t, uint,
315 struct xfs_dquot **, struct xfs_dquot **); 322 struct xfs_dquot **, struct xfs_dquot **);
316 extern void xfs_qm_vop_create_dqattach(struct xfs_trans *, struct xfs_inode *, 323 extern void xfs_qm_vop_create_dqattach(struct xfs_trans *, struct xfs_inode *,
317 struct xfs_dquot *, struct xfs_dquot *); 324 struct xfs_dquot *, struct xfs_dquot *);
318 extern int xfs_qm_vop_rename_dqattach(struct xfs_inode **); 325 extern int xfs_qm_vop_rename_dqattach(struct xfs_inode **);
319 extern struct xfs_dquot *xfs_qm_vop_chown(struct xfs_trans *, 326 extern struct xfs_dquot *xfs_qm_vop_chown(struct xfs_trans *,
320 struct xfs_inode *, struct xfs_dquot **, struct xfs_dquot *); 327 struct xfs_inode *, struct xfs_dquot **, struct xfs_dquot *);
321 extern int xfs_qm_vop_chown_reserve(struct xfs_trans *, struct xfs_inode *, 328 extern int xfs_qm_vop_chown_reserve(struct xfs_trans *, struct xfs_inode *,
322 struct xfs_dquot *, struct xfs_dquot *, uint); 329 struct xfs_dquot *, struct xfs_dquot *, uint);
323 extern int xfs_qm_dqattach(struct xfs_inode *, uint); 330 extern int xfs_qm_dqattach(struct xfs_inode *, uint);
324 extern int xfs_qm_dqattach_locked(struct xfs_inode *, uint); 331 extern int xfs_qm_dqattach_locked(struct xfs_inode *, uint);
325 extern void xfs_qm_dqdetach(struct xfs_inode *); 332 extern void xfs_qm_dqdetach(struct xfs_inode *);
326 extern void xfs_qm_dqrele(struct xfs_dquot *); 333 extern void xfs_qm_dqrele(struct xfs_dquot *);
327 extern void xfs_qm_statvfs(struct xfs_inode *, struct kstatfs *); 334 extern void xfs_qm_statvfs(struct xfs_inode *, struct kstatfs *);
328 extern int xfs_qm_newmount(struct xfs_mount *, uint *, uint *); 335 extern int xfs_qm_newmount(struct xfs_mount *, uint *, uint *);
329 extern void xfs_qm_mount_quotas(struct xfs_mount *); 336 extern void xfs_qm_mount_quotas(struct xfs_mount *);
330 extern void xfs_qm_unmount(struct xfs_mount *); 337 extern void xfs_qm_unmount(struct xfs_mount *);
331 extern void xfs_qm_unmount_quotas(struct xfs_mount *); 338 extern void xfs_qm_unmount_quotas(struct xfs_mount *);
332 339
333 #else 340 #else
334 static inline int 341 static inline int
335 xfs_qm_vop_dqalloc(struct xfs_inode *ip, uid_t uid, gid_t gid, prid_t prid, 342 xfs_qm_vop_dqalloc(struct xfs_inode *ip, uid_t uid, gid_t gid, prid_t prid,
336 uint flags, struct xfs_dquot **udqp, struct xfs_dquot **gdqp) 343 uint flags, struct xfs_dquot **udqp, struct xfs_dquot **gdqp)
337 { 344 {
338 *udqp = NULL; 345 *udqp = NULL;
339 *gdqp = NULL; 346 *gdqp = NULL;
340 return 0; 347 return 0;
341 } 348 }
342 #define xfs_trans_dup_dqinfo(tp, tp2) 349 #define xfs_trans_dup_dqinfo(tp, tp2)
343 #define xfs_trans_free_dqinfo(tp) 350 #define xfs_trans_free_dqinfo(tp)
344 #define xfs_trans_mod_dquot_byino(tp, ip, fields, delta) 351 #define xfs_trans_mod_dquot_byino(tp, ip, fields, delta)
345 #define xfs_trans_apply_dquot_deltas(tp) 352 #define xfs_trans_apply_dquot_deltas(tp)
346 #define xfs_trans_unreserve_and_mod_dquots(tp) 353 #define xfs_trans_unreserve_and_mod_dquots(tp)
347 static inline int xfs_trans_reserve_quota_nblks(struct xfs_trans *tp, 354 static inline int xfs_trans_reserve_quota_nblks(struct xfs_trans *tp,
348 struct xfs_inode *ip, long nblks, long ninos, uint flags) 355 struct xfs_inode *ip, long nblks, long ninos, uint flags)
349 { 356 {
350 return 0; 357 return 0;
351 } 358 }
352 static inline int xfs_trans_reserve_quota_bydquots(struct xfs_trans *tp, 359 static inline int xfs_trans_reserve_quota_bydquots(struct xfs_trans *tp,
353 struct xfs_mount *mp, struct xfs_dquot *udqp, 360 struct xfs_mount *mp, struct xfs_dquot *udqp,
354 struct xfs_dquot *gdqp, long nblks, long nions, uint flags) 361 struct xfs_dquot *gdqp, long nblks, long nions, uint flags)
355 { 362 {
356 return 0; 363 return 0;
357 } 364 }
358 #define xfs_qm_vop_create_dqattach(tp, ip, u, g) 365 #define xfs_qm_vop_create_dqattach(tp, ip, u, g)
359 #define xfs_qm_vop_rename_dqattach(it) (0) 366 #define xfs_qm_vop_rename_dqattach(it) (0)
360 #define xfs_qm_vop_chown(tp, ip, old, new) (NULL) 367 #define xfs_qm_vop_chown(tp, ip, old, new) (NULL)
361 #define xfs_qm_vop_chown_reserve(tp, ip, u, g, fl) (0) 368 #define xfs_qm_vop_chown_reserve(tp, ip, u, g, fl) (0)
362 #define xfs_qm_dqattach(ip, fl) (0) 369 #define xfs_qm_dqattach(ip, fl) (0)
363 #define xfs_qm_dqattach_locked(ip, fl) (0) 370 #define xfs_qm_dqattach_locked(ip, fl) (0)
364 #define xfs_qm_dqdetach(ip) 371 #define xfs_qm_dqdetach(ip)
365 #define xfs_qm_dqrele(d) 372 #define xfs_qm_dqrele(d)
366 #define xfs_qm_statvfs(ip, s) 373 #define xfs_qm_statvfs(ip, s)
367 #define xfs_qm_newmount(mp, a, b) (0) 374 #define xfs_qm_newmount(mp, a, b) (0)
368 #define xfs_qm_mount_quotas(mp) 375 #define xfs_qm_mount_quotas(mp)
369 #define xfs_qm_unmount(mp) 376 #define xfs_qm_unmount(mp)
370 #define xfs_qm_unmount_quotas(mp) 377 #define xfs_qm_unmount_quotas(mp)
371 #endif /* CONFIG_XFS_QUOTA */ 378 #endif /* CONFIG_XFS_QUOTA */
372 379
373 #define xfs_trans_unreserve_quota_nblks(tp, ip, nblks, ninos, flags) \ 380 #define xfs_trans_unreserve_quota_nblks(tp, ip, nblks, ninos, flags) \
374 xfs_trans_reserve_quota_nblks(tp, ip, -(nblks), -(ninos), flags) 381 xfs_trans_reserve_quota_nblks(tp, ip, -(nblks), -(ninos), flags)
375 #define xfs_trans_reserve_quota(tp, mp, ud, gd, nb, ni, f) \ 382 #define xfs_trans_reserve_quota(tp, mp, ud, gd, nb, ni, f) \
376 xfs_trans_reserve_quota_bydquots(tp, mp, ud, gd, nb, ni, \ 383 xfs_trans_reserve_quota_bydquots(tp, mp, ud, gd, nb, ni, \
377 f | XFS_QMOPT_RES_REGBLKS) 384 f | XFS_QMOPT_RES_REGBLKS)
378 385
379 extern int xfs_qm_dqcheck(struct xfs_mount *, xfs_disk_dquot_t *, 386 extern int xfs_qm_dqcheck(struct xfs_mount *, xfs_disk_dquot_t *,
380 xfs_dqid_t, uint, uint, char *); 387 xfs_dqid_t, uint, uint, char *);
381 extern int xfs_mount_reset_sbqflags(struct xfs_mount *); 388 extern int xfs_mount_reset_sbqflags(struct xfs_mount *);
389
390 extern const struct xfs_buf_ops xfs_dquot_buf_ops;
382 391
383 #endif /* __KERNEL__ */ 392 #endif /* __KERNEL__ */
384 #endif /* __XFS_QUOTA_H__ */ 393 #endif /* __XFS_QUOTA_H__ */
385 394