Commit 3fe58f30b4fc3f8a9084b035a02bc0c67bee8d00
Committed by
Ben Myers
1 parent
983d09ffe3
Exists in
smarc-l5.0.0_1.0.0-ga
and in
5 other branches
xfs: add CRC checks for quota blocks
Use the reserved space in struct xfs_dqblk to store a UUID and a crc for the quota blocks. [dchinner@redhat.com] Add a LSN field and update for current verifier infrastructure. Signed-off-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Dave Chinner <dchinner@redhat.com> Reviewed-by: Ben Myers <bpm@sgi.com> Signed-off-by: Ben Myers <bpm@sgi.com>
Showing 5 changed files with 141 additions and 17 deletions Inline Diff
fs/xfs/xfs_dquot.c
1 | /* | 1 | /* |
2 | * Copyright (c) 2000-2003 Silicon Graphics, Inc. | 2 | * Copyright (c) 2000-2003 Silicon Graphics, Inc. |
3 | * All Rights Reserved. | 3 | * All Rights Reserved. |
4 | * | 4 | * |
5 | * This program is free software; you can redistribute it and/or | 5 | * This program is free software; you can redistribute it and/or |
6 | * modify it under the terms of the GNU General Public License as | 6 | * modify it under the terms of the GNU General Public License as |
7 | * published by the Free Software Foundation. | 7 | * published by the Free Software Foundation. |
8 | * | 8 | * |
9 | * This program is distributed in the hope that it would be useful, | 9 | * This program is distributed in the hope that it would be useful, |
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
12 | * GNU General Public License for more details. | 12 | * GNU General Public License for more details. |
13 | * | 13 | * |
14 | * You should have received a copy of the GNU General Public License | 14 | * You should have received a copy of the GNU General Public License |
15 | * along with this program; if not, write the Free Software Foundation, | 15 | * along with this program; if not, write the Free Software Foundation, |
16 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | 16 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA |
17 | */ | 17 | */ |
18 | #include "xfs.h" | 18 | #include "xfs.h" |
19 | #include "xfs_fs.h" | 19 | #include "xfs_fs.h" |
20 | #include "xfs_bit.h" | 20 | #include "xfs_bit.h" |
21 | #include "xfs_log.h" | 21 | #include "xfs_log.h" |
22 | #include "xfs_trans.h" | 22 | #include "xfs_trans.h" |
23 | #include "xfs_sb.h" | 23 | #include "xfs_sb.h" |
24 | #include "xfs_ag.h" | 24 | #include "xfs_ag.h" |
25 | #include "xfs_alloc.h" | 25 | #include "xfs_alloc.h" |
26 | #include "xfs_quota.h" | 26 | #include "xfs_quota.h" |
27 | #include "xfs_mount.h" | 27 | #include "xfs_mount.h" |
28 | #include "xfs_bmap_btree.h" | 28 | #include "xfs_bmap_btree.h" |
29 | #include "xfs_inode.h" | 29 | #include "xfs_inode.h" |
30 | #include "xfs_bmap.h" | 30 | #include "xfs_bmap.h" |
31 | #include "xfs_rtalloc.h" | 31 | #include "xfs_rtalloc.h" |
32 | #include "xfs_error.h" | 32 | #include "xfs_error.h" |
33 | #include "xfs_itable.h" | 33 | #include "xfs_itable.h" |
34 | #include "xfs_attr.h" | 34 | #include "xfs_attr.h" |
35 | #include "xfs_buf_item.h" | 35 | #include "xfs_buf_item.h" |
36 | #include "xfs_trans_space.h" | 36 | #include "xfs_trans_space.h" |
37 | #include "xfs_trans_priv.h" | 37 | #include "xfs_trans_priv.h" |
38 | #include "xfs_qm.h" | 38 | #include "xfs_qm.h" |
39 | #include "xfs_cksum.h" | ||
39 | #include "xfs_trace.h" | 40 | #include "xfs_trace.h" |
40 | 41 | ||
41 | /* | 42 | /* |
42 | * Lock order: | 43 | * Lock order: |
43 | * | 44 | * |
44 | * ip->i_lock | 45 | * ip->i_lock |
45 | * qi->qi_tree_lock | 46 | * qi->qi_tree_lock |
46 | * dquot->q_qlock (xfs_dqlock() and friends) | 47 | * dquot->q_qlock (xfs_dqlock() and friends) |
47 | * dquot->q_flush (xfs_dqflock() and friends) | 48 | * dquot->q_flush (xfs_dqflock() and friends) |
48 | * qi->qi_lru_lock | 49 | * qi->qi_lru_lock |
49 | * | 50 | * |
50 | * If two dquots need to be locked the order is user before group/project, | 51 | * If two dquots need to be locked the order is user before group/project, |
51 | * otherwise by the lowest id first, see xfs_dqlock2. | 52 | * otherwise by the lowest id first, see xfs_dqlock2. |
52 | */ | 53 | */ |
53 | 54 | ||
54 | #ifdef DEBUG | 55 | #ifdef DEBUG |
55 | xfs_buftarg_t *xfs_dqerror_target; | 56 | xfs_buftarg_t *xfs_dqerror_target; |
56 | int xfs_do_dqerror; | 57 | int xfs_do_dqerror; |
57 | int xfs_dqreq_num; | 58 | int xfs_dqreq_num; |
58 | int xfs_dqerror_mod = 33; | 59 | int xfs_dqerror_mod = 33; |
59 | #endif | 60 | #endif |
60 | 61 | ||
61 | struct kmem_zone *xfs_qm_dqtrxzone; | 62 | struct kmem_zone *xfs_qm_dqtrxzone; |
62 | static struct kmem_zone *xfs_qm_dqzone; | 63 | static struct kmem_zone *xfs_qm_dqzone; |
63 | 64 | ||
64 | static struct lock_class_key xfs_dquot_other_class; | 65 | static struct lock_class_key xfs_dquot_other_class; |
65 | 66 | ||
66 | /* | 67 | /* |
67 | * This is called to free all the memory associated with a dquot | 68 | * This is called to free all the memory associated with a dquot |
68 | */ | 69 | */ |
69 | void | 70 | void |
70 | xfs_qm_dqdestroy( | 71 | xfs_qm_dqdestroy( |
71 | xfs_dquot_t *dqp) | 72 | xfs_dquot_t *dqp) |
72 | { | 73 | { |
73 | ASSERT(list_empty(&dqp->q_lru)); | 74 | ASSERT(list_empty(&dqp->q_lru)); |
74 | 75 | ||
75 | mutex_destroy(&dqp->q_qlock); | 76 | mutex_destroy(&dqp->q_qlock); |
76 | kmem_zone_free(xfs_qm_dqzone, dqp); | 77 | kmem_zone_free(xfs_qm_dqzone, dqp); |
77 | 78 | ||
78 | XFS_STATS_DEC(xs_qm_dquot); | 79 | XFS_STATS_DEC(xs_qm_dquot); |
79 | } | 80 | } |
80 | 81 | ||
81 | /* | 82 | /* |
82 | * If default limits are in force, push them into the dquot now. | 83 | * If default limits are in force, push them into the dquot now. |
83 | * We overwrite the dquot limits only if they are zero and this | 84 | * We overwrite the dquot limits only if they are zero and this |
84 | * is not the root dquot. | 85 | * is not the root dquot. |
85 | */ | 86 | */ |
86 | void | 87 | void |
87 | xfs_qm_adjust_dqlimits( | 88 | xfs_qm_adjust_dqlimits( |
88 | struct xfs_mount *mp, | 89 | struct xfs_mount *mp, |
89 | struct xfs_dquot *dq) | 90 | struct xfs_dquot *dq) |
90 | { | 91 | { |
91 | struct xfs_quotainfo *q = mp->m_quotainfo; | 92 | struct xfs_quotainfo *q = mp->m_quotainfo; |
92 | struct xfs_disk_dquot *d = &dq->q_core; | 93 | struct xfs_disk_dquot *d = &dq->q_core; |
93 | int prealloc = 0; | 94 | int prealloc = 0; |
94 | 95 | ||
95 | ASSERT(d->d_id); | 96 | ASSERT(d->d_id); |
96 | 97 | ||
97 | if (q->qi_bsoftlimit && !d->d_blk_softlimit) { | 98 | if (q->qi_bsoftlimit && !d->d_blk_softlimit) { |
98 | d->d_blk_softlimit = cpu_to_be64(q->qi_bsoftlimit); | 99 | d->d_blk_softlimit = cpu_to_be64(q->qi_bsoftlimit); |
99 | prealloc = 1; | 100 | prealloc = 1; |
100 | } | 101 | } |
101 | if (q->qi_bhardlimit && !d->d_blk_hardlimit) { | 102 | if (q->qi_bhardlimit && !d->d_blk_hardlimit) { |
102 | d->d_blk_hardlimit = cpu_to_be64(q->qi_bhardlimit); | 103 | d->d_blk_hardlimit = cpu_to_be64(q->qi_bhardlimit); |
103 | prealloc = 1; | 104 | prealloc = 1; |
104 | } | 105 | } |
105 | if (q->qi_isoftlimit && !d->d_ino_softlimit) | 106 | if (q->qi_isoftlimit && !d->d_ino_softlimit) |
106 | d->d_ino_softlimit = cpu_to_be64(q->qi_isoftlimit); | 107 | d->d_ino_softlimit = cpu_to_be64(q->qi_isoftlimit); |
107 | if (q->qi_ihardlimit && !d->d_ino_hardlimit) | 108 | if (q->qi_ihardlimit && !d->d_ino_hardlimit) |
108 | d->d_ino_hardlimit = cpu_to_be64(q->qi_ihardlimit); | 109 | d->d_ino_hardlimit = cpu_to_be64(q->qi_ihardlimit); |
109 | if (q->qi_rtbsoftlimit && !d->d_rtb_softlimit) | 110 | if (q->qi_rtbsoftlimit && !d->d_rtb_softlimit) |
110 | d->d_rtb_softlimit = cpu_to_be64(q->qi_rtbsoftlimit); | 111 | d->d_rtb_softlimit = cpu_to_be64(q->qi_rtbsoftlimit); |
111 | if (q->qi_rtbhardlimit && !d->d_rtb_hardlimit) | 112 | if (q->qi_rtbhardlimit && !d->d_rtb_hardlimit) |
112 | d->d_rtb_hardlimit = cpu_to_be64(q->qi_rtbhardlimit); | 113 | d->d_rtb_hardlimit = cpu_to_be64(q->qi_rtbhardlimit); |
113 | 114 | ||
114 | if (prealloc) | 115 | if (prealloc) |
115 | xfs_dquot_set_prealloc_limits(dq); | 116 | xfs_dquot_set_prealloc_limits(dq); |
116 | } | 117 | } |
117 | 118 | ||
118 | /* | 119 | /* |
119 | * Check the limits and timers of a dquot and start or reset timers | 120 | * Check the limits and timers of a dquot and start or reset timers |
120 | * if necessary. | 121 | * if necessary. |
121 | * This gets called even when quota enforcement is OFF, which makes our | 122 | * This gets called even when quota enforcement is OFF, which makes our |
122 | * life a little less complicated. (We just don't reject any quota | 123 | * life a little less complicated. (We just don't reject any quota |
123 | * reservations in that case, when enforcement is off). | 124 | * reservations in that case, when enforcement is off). |
124 | * We also return 0 as the values of the timers in Q_GETQUOTA calls, when | 125 | * We also return 0 as the values of the timers in Q_GETQUOTA calls, when |
125 | * enforcement's off. | 126 | * enforcement's off. |
126 | * In contrast, warnings are a little different in that they don't | 127 | * In contrast, warnings are a little different in that they don't |
127 | * 'automatically' get started when limits get exceeded. They do | 128 | * 'automatically' get started when limits get exceeded. They do |
128 | * get reset to zero, however, when we find the count to be under | 129 | * get reset to zero, however, when we find the count to be under |
129 | * the soft limit (they are only ever set non-zero via userspace). | 130 | * the soft limit (they are only ever set non-zero via userspace). |
130 | */ | 131 | */ |
131 | void | 132 | void |
132 | xfs_qm_adjust_dqtimers( | 133 | xfs_qm_adjust_dqtimers( |
133 | xfs_mount_t *mp, | 134 | xfs_mount_t *mp, |
134 | xfs_disk_dquot_t *d) | 135 | xfs_disk_dquot_t *d) |
135 | { | 136 | { |
136 | ASSERT(d->d_id); | 137 | ASSERT(d->d_id); |
137 | 138 | ||
138 | #ifdef DEBUG | 139 | #ifdef DEBUG |
139 | if (d->d_blk_hardlimit) | 140 | if (d->d_blk_hardlimit) |
140 | ASSERT(be64_to_cpu(d->d_blk_softlimit) <= | 141 | ASSERT(be64_to_cpu(d->d_blk_softlimit) <= |
141 | be64_to_cpu(d->d_blk_hardlimit)); | 142 | be64_to_cpu(d->d_blk_hardlimit)); |
142 | if (d->d_ino_hardlimit) | 143 | if (d->d_ino_hardlimit) |
143 | ASSERT(be64_to_cpu(d->d_ino_softlimit) <= | 144 | ASSERT(be64_to_cpu(d->d_ino_softlimit) <= |
144 | be64_to_cpu(d->d_ino_hardlimit)); | 145 | be64_to_cpu(d->d_ino_hardlimit)); |
145 | if (d->d_rtb_hardlimit) | 146 | if (d->d_rtb_hardlimit) |
146 | ASSERT(be64_to_cpu(d->d_rtb_softlimit) <= | 147 | ASSERT(be64_to_cpu(d->d_rtb_softlimit) <= |
147 | be64_to_cpu(d->d_rtb_hardlimit)); | 148 | be64_to_cpu(d->d_rtb_hardlimit)); |
148 | #endif | 149 | #endif |
149 | 150 | ||
150 | if (!d->d_btimer) { | 151 | if (!d->d_btimer) { |
151 | if ((d->d_blk_softlimit && | 152 | if ((d->d_blk_softlimit && |
152 | (be64_to_cpu(d->d_bcount) > | 153 | (be64_to_cpu(d->d_bcount) > |
153 | be64_to_cpu(d->d_blk_softlimit))) || | 154 | be64_to_cpu(d->d_blk_softlimit))) || |
154 | (d->d_blk_hardlimit && | 155 | (d->d_blk_hardlimit && |
155 | (be64_to_cpu(d->d_bcount) > | 156 | (be64_to_cpu(d->d_bcount) > |
156 | be64_to_cpu(d->d_blk_hardlimit)))) { | 157 | be64_to_cpu(d->d_blk_hardlimit)))) { |
157 | d->d_btimer = cpu_to_be32(get_seconds() + | 158 | d->d_btimer = cpu_to_be32(get_seconds() + |
158 | mp->m_quotainfo->qi_btimelimit); | 159 | mp->m_quotainfo->qi_btimelimit); |
159 | } else { | 160 | } else { |
160 | d->d_bwarns = 0; | 161 | d->d_bwarns = 0; |
161 | } | 162 | } |
162 | } else { | 163 | } else { |
163 | if ((!d->d_blk_softlimit || | 164 | if ((!d->d_blk_softlimit || |
164 | (be64_to_cpu(d->d_bcount) <= | 165 | (be64_to_cpu(d->d_bcount) <= |
165 | be64_to_cpu(d->d_blk_softlimit))) && | 166 | be64_to_cpu(d->d_blk_softlimit))) && |
166 | (!d->d_blk_hardlimit || | 167 | (!d->d_blk_hardlimit || |
167 | (be64_to_cpu(d->d_bcount) <= | 168 | (be64_to_cpu(d->d_bcount) <= |
168 | be64_to_cpu(d->d_blk_hardlimit)))) { | 169 | be64_to_cpu(d->d_blk_hardlimit)))) { |
169 | d->d_btimer = 0; | 170 | d->d_btimer = 0; |
170 | } | 171 | } |
171 | } | 172 | } |
172 | 173 | ||
173 | if (!d->d_itimer) { | 174 | if (!d->d_itimer) { |
174 | if ((d->d_ino_softlimit && | 175 | if ((d->d_ino_softlimit && |
175 | (be64_to_cpu(d->d_icount) > | 176 | (be64_to_cpu(d->d_icount) > |
176 | be64_to_cpu(d->d_ino_softlimit))) || | 177 | be64_to_cpu(d->d_ino_softlimit))) || |
177 | (d->d_ino_hardlimit && | 178 | (d->d_ino_hardlimit && |
178 | (be64_to_cpu(d->d_icount) > | 179 | (be64_to_cpu(d->d_icount) > |
179 | be64_to_cpu(d->d_ino_hardlimit)))) { | 180 | be64_to_cpu(d->d_ino_hardlimit)))) { |
180 | d->d_itimer = cpu_to_be32(get_seconds() + | 181 | d->d_itimer = cpu_to_be32(get_seconds() + |
181 | mp->m_quotainfo->qi_itimelimit); | 182 | mp->m_quotainfo->qi_itimelimit); |
182 | } else { | 183 | } else { |
183 | d->d_iwarns = 0; | 184 | d->d_iwarns = 0; |
184 | } | 185 | } |
185 | } else { | 186 | } else { |
186 | if ((!d->d_ino_softlimit || | 187 | if ((!d->d_ino_softlimit || |
187 | (be64_to_cpu(d->d_icount) <= | 188 | (be64_to_cpu(d->d_icount) <= |
188 | be64_to_cpu(d->d_ino_softlimit))) && | 189 | be64_to_cpu(d->d_ino_softlimit))) && |
189 | (!d->d_ino_hardlimit || | 190 | (!d->d_ino_hardlimit || |
190 | (be64_to_cpu(d->d_icount) <= | 191 | (be64_to_cpu(d->d_icount) <= |
191 | be64_to_cpu(d->d_ino_hardlimit)))) { | 192 | be64_to_cpu(d->d_ino_hardlimit)))) { |
192 | d->d_itimer = 0; | 193 | d->d_itimer = 0; |
193 | } | 194 | } |
194 | } | 195 | } |
195 | 196 | ||
196 | if (!d->d_rtbtimer) { | 197 | if (!d->d_rtbtimer) { |
197 | if ((d->d_rtb_softlimit && | 198 | if ((d->d_rtb_softlimit && |
198 | (be64_to_cpu(d->d_rtbcount) > | 199 | (be64_to_cpu(d->d_rtbcount) > |
199 | be64_to_cpu(d->d_rtb_softlimit))) || | 200 | be64_to_cpu(d->d_rtb_softlimit))) || |
200 | (d->d_rtb_hardlimit && | 201 | (d->d_rtb_hardlimit && |
201 | (be64_to_cpu(d->d_rtbcount) > | 202 | (be64_to_cpu(d->d_rtbcount) > |
202 | be64_to_cpu(d->d_rtb_hardlimit)))) { | 203 | be64_to_cpu(d->d_rtb_hardlimit)))) { |
203 | d->d_rtbtimer = cpu_to_be32(get_seconds() + | 204 | d->d_rtbtimer = cpu_to_be32(get_seconds() + |
204 | mp->m_quotainfo->qi_rtbtimelimit); | 205 | mp->m_quotainfo->qi_rtbtimelimit); |
205 | } else { | 206 | } else { |
206 | d->d_rtbwarns = 0; | 207 | d->d_rtbwarns = 0; |
207 | } | 208 | } |
208 | } else { | 209 | } else { |
209 | if ((!d->d_rtb_softlimit || | 210 | if ((!d->d_rtb_softlimit || |
210 | (be64_to_cpu(d->d_rtbcount) <= | 211 | (be64_to_cpu(d->d_rtbcount) <= |
211 | be64_to_cpu(d->d_rtb_softlimit))) && | 212 | be64_to_cpu(d->d_rtb_softlimit))) && |
212 | (!d->d_rtb_hardlimit || | 213 | (!d->d_rtb_hardlimit || |
213 | (be64_to_cpu(d->d_rtbcount) <= | 214 | (be64_to_cpu(d->d_rtbcount) <= |
214 | be64_to_cpu(d->d_rtb_hardlimit)))) { | 215 | be64_to_cpu(d->d_rtb_hardlimit)))) { |
215 | d->d_rtbtimer = 0; | 216 | d->d_rtbtimer = 0; |
216 | } | 217 | } |
217 | } | 218 | } |
218 | } | 219 | } |
219 | 220 | ||
220 | /* | 221 | /* |
221 | * initialize a buffer full of dquots and log the whole thing | 222 | * initialize a buffer full of dquots and log the whole thing |
222 | */ | 223 | */ |
223 | STATIC void | 224 | STATIC void |
224 | xfs_qm_init_dquot_blk( | 225 | xfs_qm_init_dquot_blk( |
225 | xfs_trans_t *tp, | 226 | xfs_trans_t *tp, |
226 | xfs_mount_t *mp, | 227 | xfs_mount_t *mp, |
227 | xfs_dqid_t id, | 228 | xfs_dqid_t id, |
228 | uint type, | 229 | uint type, |
229 | xfs_buf_t *bp) | 230 | xfs_buf_t *bp) |
230 | { | 231 | { |
231 | struct xfs_quotainfo *q = mp->m_quotainfo; | 232 | struct xfs_quotainfo *q = mp->m_quotainfo; |
232 | xfs_dqblk_t *d; | 233 | xfs_dqblk_t *d; |
233 | int curid, i; | 234 | int curid, i; |
234 | 235 | ||
235 | ASSERT(tp); | 236 | ASSERT(tp); |
236 | ASSERT(xfs_buf_islocked(bp)); | 237 | ASSERT(xfs_buf_islocked(bp)); |
237 | 238 | ||
238 | d = bp->b_addr; | 239 | d = bp->b_addr; |
239 | 240 | ||
240 | /* | 241 | /* |
241 | * ID of the first dquot in the block - id's are zero based. | 242 | * ID of the first dquot in the block - id's are zero based. |
242 | */ | 243 | */ |
243 | curid = id - (id % q->qi_dqperchunk); | 244 | curid = id - (id % q->qi_dqperchunk); |
244 | ASSERT(curid >= 0); | 245 | ASSERT(curid >= 0); |
245 | memset(d, 0, BBTOB(q->qi_dqchunklen)); | 246 | memset(d, 0, BBTOB(q->qi_dqchunklen)); |
246 | for (i = 0; i < q->qi_dqperchunk; i++, d++, curid++) { | 247 | for (i = 0; i < q->qi_dqperchunk; i++, d++, curid++) { |
247 | d->dd_diskdq.d_magic = cpu_to_be16(XFS_DQUOT_MAGIC); | 248 | d->dd_diskdq.d_magic = cpu_to_be16(XFS_DQUOT_MAGIC); |
248 | d->dd_diskdq.d_version = XFS_DQUOT_VERSION; | 249 | d->dd_diskdq.d_version = XFS_DQUOT_VERSION; |
249 | d->dd_diskdq.d_id = cpu_to_be32(curid); | 250 | d->dd_diskdq.d_id = cpu_to_be32(curid); |
250 | d->dd_diskdq.d_flags = type; | 251 | d->dd_diskdq.d_flags = type; |
252 | if (xfs_sb_version_hascrc(&mp->m_sb)) | ||
253 | uuid_copy(&d->dd_uuid, &mp->m_sb.sb_uuid); | ||
251 | } | 254 | } |
252 | 255 | ||
253 | xfs_trans_dquot_buf(tp, bp, | 256 | xfs_trans_dquot_buf(tp, bp, |
254 | (type & XFS_DQ_USER ? XFS_BLF_UDQUOT_BUF : | 257 | (type & XFS_DQ_USER ? XFS_BLF_UDQUOT_BUF : |
255 | ((type & XFS_DQ_PROJ) ? XFS_BLF_PDQUOT_BUF : | 258 | ((type & XFS_DQ_PROJ) ? XFS_BLF_PDQUOT_BUF : |
256 | XFS_BLF_GDQUOT_BUF))); | 259 | XFS_BLF_GDQUOT_BUF))); |
257 | xfs_trans_log_buf(tp, bp, 0, BBTOB(q->qi_dqchunklen) - 1); | 260 | xfs_trans_log_buf(tp, bp, 0, BBTOB(q->qi_dqchunklen) - 1); |
258 | } | 261 | } |
259 | 262 | ||
260 | /* | 263 | /* |
261 | * Initialize the dynamic speculative preallocation thresholds. The lo/hi | 264 | * Initialize the dynamic speculative preallocation thresholds. The lo/hi |
262 | * watermarks correspond to the soft and hard limits by default. If a soft limit | 265 | * watermarks correspond to the soft and hard limits by default. If a soft limit |
263 | * is not specified, we use 95% of the hard limit. | 266 | * is not specified, we use 95% of the hard limit. |
264 | */ | 267 | */ |
265 | void | 268 | void |
266 | xfs_dquot_set_prealloc_limits(struct xfs_dquot *dqp) | 269 | xfs_dquot_set_prealloc_limits(struct xfs_dquot *dqp) |
267 | { | 270 | { |
268 | __uint64_t space; | 271 | __uint64_t space; |
269 | 272 | ||
270 | dqp->q_prealloc_hi_wmark = be64_to_cpu(dqp->q_core.d_blk_hardlimit); | 273 | dqp->q_prealloc_hi_wmark = be64_to_cpu(dqp->q_core.d_blk_hardlimit); |
271 | dqp->q_prealloc_lo_wmark = be64_to_cpu(dqp->q_core.d_blk_softlimit); | 274 | dqp->q_prealloc_lo_wmark = be64_to_cpu(dqp->q_core.d_blk_softlimit); |
272 | if (!dqp->q_prealloc_lo_wmark) { | 275 | if (!dqp->q_prealloc_lo_wmark) { |
273 | dqp->q_prealloc_lo_wmark = dqp->q_prealloc_hi_wmark; | 276 | dqp->q_prealloc_lo_wmark = dqp->q_prealloc_hi_wmark; |
274 | do_div(dqp->q_prealloc_lo_wmark, 100); | 277 | do_div(dqp->q_prealloc_lo_wmark, 100); |
275 | dqp->q_prealloc_lo_wmark *= 95; | 278 | dqp->q_prealloc_lo_wmark *= 95; |
276 | } | 279 | } |
277 | 280 | ||
278 | space = dqp->q_prealloc_hi_wmark; | 281 | space = dqp->q_prealloc_hi_wmark; |
279 | 282 | ||
280 | do_div(space, 100); | 283 | do_div(space, 100); |
281 | dqp->q_low_space[XFS_QLOWSP_1_PCNT] = space; | 284 | dqp->q_low_space[XFS_QLOWSP_1_PCNT] = space; |
282 | dqp->q_low_space[XFS_QLOWSP_3_PCNT] = space * 3; | 285 | dqp->q_low_space[XFS_QLOWSP_3_PCNT] = space * 3; |
283 | dqp->q_low_space[XFS_QLOWSP_5_PCNT] = space * 5; | 286 | dqp->q_low_space[XFS_QLOWSP_5_PCNT] = space * 5; |
284 | } | 287 | } |
285 | 288 | ||
286 | static void | 289 | STATIC void |
290 | xfs_dquot_buf_calc_crc( | ||
291 | struct xfs_mount *mp, | ||
292 | struct xfs_buf *bp) | ||
293 | { | ||
294 | struct xfs_dqblk *d = (struct xfs_dqblk *)bp->b_addr; | ||
295 | int i; | ||
296 | |||
297 | if (!xfs_sb_version_hascrc(&mp->m_sb)) | ||
298 | return; | ||
299 | |||
300 | for (i = 0; i < mp->m_quotainfo->qi_dqperchunk; i++, d++) { | ||
301 | xfs_update_cksum((char *)d, sizeof(struct xfs_dqblk), | ||
302 | offsetof(struct xfs_dqblk, dd_crc)); | ||
303 | } | ||
304 | } | ||
305 | |||
306 | STATIC bool | ||
307 | xfs_dquot_buf_verify_crc( | ||
308 | struct xfs_mount *mp, | ||
309 | struct xfs_buf *bp) | ||
310 | { | ||
311 | struct xfs_dqblk *d = (struct xfs_dqblk *)bp->b_addr; | ||
312 | int ndquots; | ||
313 | int i; | ||
314 | |||
315 | if (!xfs_sb_version_hascrc(&mp->m_sb)) | ||
316 | return true; | ||
317 | |||
318 | /* | ||
319 | * if we are in log recovery, the quota subsystem has not been | ||
320 | * initialised so we have no quotainfo structure. In that case, we need | ||
321 | * to manually calculate the number of dquots in the buffer. | ||
322 | */ | ||
323 | if (mp->m_quotainfo) | ||
324 | ndquots = mp->m_quotainfo->qi_dqperchunk; | ||
325 | else | ||
326 | ndquots = xfs_qm_calc_dquots_per_chunk(mp, | ||
327 | XFS_BB_TO_FSB(mp, bp->b_length)); | ||
328 | |||
329 | for (i = 0; i < ndquots; i++, d++) { | ||
330 | if (!xfs_verify_cksum((char *)d, sizeof(struct xfs_dqblk), | ||
331 | offsetof(struct xfs_dqblk, dd_crc))) | ||
332 | return false; | ||
333 | if (!uuid_equal(&d->dd_uuid, &mp->m_sb.sb_uuid)) | ||
334 | return false; | ||
335 | } | ||
336 | |||
337 | return true; | ||
338 | } | ||
339 | |||
340 | STATIC bool | ||
287 | xfs_dquot_buf_verify( | 341 | xfs_dquot_buf_verify( |
342 | struct xfs_mount *mp, | ||
288 | struct xfs_buf *bp) | 343 | struct xfs_buf *bp) |
289 | { | 344 | { |
290 | struct xfs_mount *mp = bp->b_target->bt_mount; | ||
291 | struct xfs_dqblk *d = (struct xfs_dqblk *)bp->b_addr; | 345 | struct xfs_dqblk *d = (struct xfs_dqblk *)bp->b_addr; |
292 | struct xfs_disk_dquot *ddq; | ||
293 | xfs_dqid_t id = 0; | 346 | xfs_dqid_t id = 0; |
347 | int ndquots; | ||
294 | int i; | 348 | int i; |
295 | 349 | ||
296 | /* | 350 | /* |
351 | * if we are in log recovery, the quota subsystem has not been | ||
352 | * initialised so we have no quotainfo structure. In that case, we need | ||
353 | * to manually calculate the number of dquots in the buffer. | ||
354 | */ | ||
355 | if (mp->m_quotainfo) | ||
356 | ndquots = mp->m_quotainfo->qi_dqperchunk; | ||
357 | else | ||
358 | ndquots = xfs_qm_calc_dquots_per_chunk(mp, bp->b_length); | ||
359 | |||
360 | /* | ||
297 | * On the first read of the buffer, verify that each dquot is valid. | 361 | * On the first read of the buffer, verify that each dquot is valid. |
298 | * We don't know what the id of the dquot is supposed to be, just that | 362 | * We don't know what the id of the dquot is supposed to be, just that |
299 | * they should be increasing monotonically within the buffer. If the | 363 | * they should be increasing monotonically within the buffer. If the |
300 | * first id is corrupt, then it will fail on the second dquot in the | 364 | * first id is corrupt, then it will fail on the second dquot in the |
301 | * buffer so corruptions could point to the wrong dquot in this case. | 365 | * buffer so corruptions could point to the wrong dquot in this case. |
302 | */ | 366 | */ |
303 | for (i = 0; i < mp->m_quotainfo->qi_dqperchunk; i++) { | 367 | for (i = 0; i < ndquots; i++) { |
304 | int error; | 368 | struct xfs_disk_dquot *ddq; |
369 | int error; | ||
305 | 370 | ||
306 | ddq = &d[i].dd_diskdq; | 371 | ddq = &d[i].dd_diskdq; |
307 | 372 | ||
308 | if (i == 0) | 373 | if (i == 0) |
309 | id = be32_to_cpu(ddq->d_id); | 374 | id = be32_to_cpu(ddq->d_id); |
310 | 375 | ||
311 | error = xfs_qm_dqcheck(mp, ddq, id + i, 0, XFS_QMOPT_DOWARN, | 376 | error = xfs_qm_dqcheck(mp, ddq, id + i, 0, XFS_QMOPT_DOWARN, |
312 | "xfs_dquot_read_verify"); | 377 | "xfs_dquot_buf_verify"); |
313 | if (error) { | 378 | if (error) |
314 | XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, d); | 379 | return false; |
315 | xfs_buf_ioerror(bp, EFSCORRUPTED); | ||
316 | break; | ||
317 | } | ||
318 | } | 380 | } |
381 | return true; | ||
319 | } | 382 | } |
320 | 383 | ||
321 | static void | 384 | static void |
322 | xfs_dquot_buf_read_verify( | 385 | xfs_dquot_buf_read_verify( |
323 | struct xfs_buf *bp) | 386 | struct xfs_buf *bp) |
324 | { | 387 | { |
325 | xfs_dquot_buf_verify(bp); | 388 | struct xfs_mount *mp = bp->b_target->bt_mount; |
389 | |||
390 | if (!xfs_dquot_buf_verify_crc(mp, bp) || !xfs_dquot_buf_verify(mp, bp)) { | ||
391 | XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); | ||
392 | xfs_buf_ioerror(bp, EFSCORRUPTED); | ||
393 | } | ||
326 | } | 394 | } |
327 | 395 | ||
328 | void | 396 | void |
329 | xfs_dquot_buf_write_verify( | 397 | xfs_dquot_buf_write_verify( |
330 | struct xfs_buf *bp) | 398 | struct xfs_buf *bp) |
331 | { | 399 | { |
332 | xfs_dquot_buf_verify(bp); | 400 | struct xfs_mount *mp = bp->b_target->bt_mount; |
401 | |||
402 | if (!xfs_dquot_buf_verify(mp, bp)) { | ||
403 | XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); | ||
404 | xfs_buf_ioerror(bp, EFSCORRUPTED); | ||
405 | return; | ||
406 | } | ||
407 | xfs_dquot_buf_calc_crc(mp, bp); | ||
333 | } | 408 | } |
334 | 409 | ||
335 | const struct xfs_buf_ops xfs_dquot_buf_ops = { | 410 | const struct xfs_buf_ops xfs_dquot_buf_ops = { |
336 | .verify_read = xfs_dquot_buf_read_verify, | 411 | .verify_read = xfs_dquot_buf_read_verify, |
337 | .verify_write = xfs_dquot_buf_write_verify, | 412 | .verify_write = xfs_dquot_buf_write_verify, |
338 | }; | 413 | }; |
339 | 414 | ||
340 | /* | 415 | /* |
341 | * Allocate a block and fill it with dquots. | 416 | * Allocate a block and fill it with dquots. |
342 | * This is called when the bmapi finds a hole. | 417 | * This is called when the bmapi finds a hole. |
343 | */ | 418 | */ |
344 | STATIC int | 419 | STATIC int |
345 | xfs_qm_dqalloc( | 420 | xfs_qm_dqalloc( |
346 | xfs_trans_t **tpp, | 421 | xfs_trans_t **tpp, |
347 | xfs_mount_t *mp, | 422 | xfs_mount_t *mp, |
348 | xfs_dquot_t *dqp, | 423 | xfs_dquot_t *dqp, |
349 | xfs_inode_t *quotip, | 424 | xfs_inode_t *quotip, |
350 | xfs_fileoff_t offset_fsb, | 425 | xfs_fileoff_t offset_fsb, |
351 | xfs_buf_t **O_bpp) | 426 | xfs_buf_t **O_bpp) |
352 | { | 427 | { |
353 | xfs_fsblock_t firstblock; | 428 | xfs_fsblock_t firstblock; |
354 | xfs_bmap_free_t flist; | 429 | xfs_bmap_free_t flist; |
355 | xfs_bmbt_irec_t map; | 430 | xfs_bmbt_irec_t map; |
356 | int nmaps, error, committed; | 431 | int nmaps, error, committed; |
357 | xfs_buf_t *bp; | 432 | xfs_buf_t *bp; |
358 | xfs_trans_t *tp = *tpp; | 433 | xfs_trans_t *tp = *tpp; |
359 | 434 | ||
360 | ASSERT(tp != NULL); | 435 | ASSERT(tp != NULL); |
361 | 436 | ||
362 | trace_xfs_dqalloc(dqp); | 437 | trace_xfs_dqalloc(dqp); |
363 | 438 | ||
364 | /* | 439 | /* |
365 | * Initialize the bmap freelist prior to calling bmapi code. | 440 | * Initialize the bmap freelist prior to calling bmapi code. |
366 | */ | 441 | */ |
367 | xfs_bmap_init(&flist, &firstblock); | 442 | xfs_bmap_init(&flist, &firstblock); |
368 | xfs_ilock(quotip, XFS_ILOCK_EXCL); | 443 | xfs_ilock(quotip, XFS_ILOCK_EXCL); |
369 | /* | 444 | /* |
370 | * Return if this type of quotas is turned off while we didn't | 445 | * Return if this type of quotas is turned off while we didn't |
371 | * have an inode lock | 446 | * have an inode lock |
372 | */ | 447 | */ |
373 | if (!xfs_this_quota_on(dqp->q_mount, dqp->dq_flags)) { | 448 | if (!xfs_this_quota_on(dqp->q_mount, dqp->dq_flags)) { |
374 | xfs_iunlock(quotip, XFS_ILOCK_EXCL); | 449 | xfs_iunlock(quotip, XFS_ILOCK_EXCL); |
375 | return (ESRCH); | 450 | return (ESRCH); |
376 | } | 451 | } |
377 | 452 | ||
378 | xfs_trans_ijoin(tp, quotip, XFS_ILOCK_EXCL); | 453 | xfs_trans_ijoin(tp, quotip, XFS_ILOCK_EXCL); |
379 | nmaps = 1; | 454 | nmaps = 1; |
380 | error = xfs_bmapi_write(tp, quotip, offset_fsb, | 455 | error = xfs_bmapi_write(tp, quotip, offset_fsb, |
381 | XFS_DQUOT_CLUSTER_SIZE_FSB, XFS_BMAPI_METADATA, | 456 | XFS_DQUOT_CLUSTER_SIZE_FSB, XFS_BMAPI_METADATA, |
382 | &firstblock, XFS_QM_DQALLOC_SPACE_RES(mp), | 457 | &firstblock, XFS_QM_DQALLOC_SPACE_RES(mp), |
383 | &map, &nmaps, &flist); | 458 | &map, &nmaps, &flist); |
384 | if (error) | 459 | if (error) |
385 | goto error0; | 460 | goto error0; |
386 | ASSERT(map.br_blockcount == XFS_DQUOT_CLUSTER_SIZE_FSB); | 461 | ASSERT(map.br_blockcount == XFS_DQUOT_CLUSTER_SIZE_FSB); |
387 | ASSERT(nmaps == 1); | 462 | ASSERT(nmaps == 1); |
388 | ASSERT((map.br_startblock != DELAYSTARTBLOCK) && | 463 | ASSERT((map.br_startblock != DELAYSTARTBLOCK) && |
389 | (map.br_startblock != HOLESTARTBLOCK)); | 464 | (map.br_startblock != HOLESTARTBLOCK)); |
390 | 465 | ||
391 | /* | 466 | /* |
392 | * Keep track of the blkno to save a lookup later | 467 | * Keep track of the blkno to save a lookup later |
393 | */ | 468 | */ |
394 | dqp->q_blkno = XFS_FSB_TO_DADDR(mp, map.br_startblock); | 469 | dqp->q_blkno = XFS_FSB_TO_DADDR(mp, map.br_startblock); |
395 | 470 | ||
396 | /* now we can just get the buffer (there's nothing to read yet) */ | 471 | /* now we can just get the buffer (there's nothing to read yet) */ |
397 | bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, | 472 | bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, |
398 | dqp->q_blkno, | 473 | dqp->q_blkno, |
399 | mp->m_quotainfo->qi_dqchunklen, | 474 | mp->m_quotainfo->qi_dqchunklen, |
400 | 0); | 475 | 0); |
401 | 476 | ||
402 | error = xfs_buf_geterror(bp); | 477 | error = xfs_buf_geterror(bp); |
403 | if (error) | 478 | if (error) |
404 | goto error1; | 479 | goto error1; |
405 | bp->b_ops = &xfs_dquot_buf_ops; | 480 | bp->b_ops = &xfs_dquot_buf_ops; |
406 | 481 | ||
407 | /* | 482 | /* |
408 | * Make a chunk of dquots out of this buffer and log | 483 | * Make a chunk of dquots out of this buffer and log |
409 | * the entire thing. | 484 | * the entire thing. |
410 | */ | 485 | */ |
411 | xfs_qm_init_dquot_blk(tp, mp, be32_to_cpu(dqp->q_core.d_id), | 486 | xfs_qm_init_dquot_blk(tp, mp, be32_to_cpu(dqp->q_core.d_id), |
412 | dqp->dq_flags & XFS_DQ_ALLTYPES, bp); | 487 | dqp->dq_flags & XFS_DQ_ALLTYPES, bp); |
413 | 488 | ||
414 | /* | 489 | /* |
415 | * xfs_bmap_finish() may commit the current transaction and | 490 | * xfs_bmap_finish() may commit the current transaction and |
416 | * start a second transaction if the freelist is not empty. | 491 | * start a second transaction if the freelist is not empty. |
417 | * | 492 | * |
418 | * Since we still want to modify this buffer, we need to | 493 | * Since we still want to modify this buffer, we need to |
419 | * ensure that the buffer is not released on commit of | 494 | * ensure that the buffer is not released on commit of |
420 | * the first transaction and ensure the buffer is added to the | 495 | * the first transaction and ensure the buffer is added to the |
421 | * second transaction. | 496 | * second transaction. |
422 | * | 497 | * |
423 | * If there is only one transaction then don't stop the buffer | 498 | * If there is only one transaction then don't stop the buffer |
424 | * from being released when it commits later on. | 499 | * from being released when it commits later on. |
425 | */ | 500 | */ |
426 | 501 | ||
427 | xfs_trans_bhold(tp, bp); | 502 | xfs_trans_bhold(tp, bp); |
428 | 503 | ||
429 | if ((error = xfs_bmap_finish(tpp, &flist, &committed))) { | 504 | if ((error = xfs_bmap_finish(tpp, &flist, &committed))) { |
430 | goto error1; | 505 | goto error1; |
431 | } | 506 | } |
432 | 507 | ||
433 | if (committed) { | 508 | if (committed) { |
434 | tp = *tpp; | 509 | tp = *tpp; |
435 | xfs_trans_bjoin(tp, bp); | 510 | xfs_trans_bjoin(tp, bp); |
436 | } else { | 511 | } else { |
437 | xfs_trans_bhold_release(tp, bp); | 512 | xfs_trans_bhold_release(tp, bp); |
438 | } | 513 | } |
439 | 514 | ||
440 | *O_bpp = bp; | 515 | *O_bpp = bp; |
441 | return 0; | 516 | return 0; |
442 | 517 | ||
443 | error1: | 518 | error1: |
444 | xfs_bmap_cancel(&flist); | 519 | xfs_bmap_cancel(&flist); |
445 | error0: | 520 | error0: |
446 | xfs_iunlock(quotip, XFS_ILOCK_EXCL); | 521 | xfs_iunlock(quotip, XFS_ILOCK_EXCL); |
447 | 522 | ||
448 | return (error); | 523 | return (error); |
449 | } | 524 | } |
450 | STATIC int | 525 | STATIC int |
451 | xfs_qm_dqrepair( | 526 | xfs_qm_dqrepair( |
452 | struct xfs_mount *mp, | 527 | struct xfs_mount *mp, |
453 | struct xfs_trans *tp, | 528 | struct xfs_trans *tp, |
454 | struct xfs_dquot *dqp, | 529 | struct xfs_dquot *dqp, |
455 | xfs_dqid_t firstid, | 530 | xfs_dqid_t firstid, |
456 | struct xfs_buf **bpp) | 531 | struct xfs_buf **bpp) |
457 | { | 532 | { |
458 | int error; | 533 | int error; |
459 | struct xfs_disk_dquot *ddq; | 534 | struct xfs_disk_dquot *ddq; |
460 | struct xfs_dqblk *d; | 535 | struct xfs_dqblk *d; |
461 | int i; | 536 | int i; |
462 | 537 | ||
463 | /* | 538 | /* |
464 | * Read the buffer without verification so we get the corrupted | 539 | * Read the buffer without verification so we get the corrupted |
465 | * buffer returned to us. make sure we verify it on write, though. | 540 | * buffer returned to us. make sure we verify it on write, though. |
466 | */ | 541 | */ |
467 | error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, dqp->q_blkno, | 542 | error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, dqp->q_blkno, |
468 | mp->m_quotainfo->qi_dqchunklen, | 543 | mp->m_quotainfo->qi_dqchunklen, |
469 | 0, bpp, NULL); | 544 | 0, bpp, NULL); |
470 | 545 | ||
471 | if (error) { | 546 | if (error) { |
472 | ASSERT(*bpp == NULL); | 547 | ASSERT(*bpp == NULL); |
473 | return XFS_ERROR(error); | 548 | return XFS_ERROR(error); |
474 | } | 549 | } |
475 | (*bpp)->b_ops = &xfs_dquot_buf_ops; | 550 | (*bpp)->b_ops = &xfs_dquot_buf_ops; |
476 | 551 | ||
477 | ASSERT(xfs_buf_islocked(*bpp)); | 552 | ASSERT(xfs_buf_islocked(*bpp)); |
478 | d = (struct xfs_dqblk *)(*bpp)->b_addr; | 553 | d = (struct xfs_dqblk *)(*bpp)->b_addr; |
479 | 554 | ||
480 | /* Do the actual repair of dquots in this buffer */ | 555 | /* Do the actual repair of dquots in this buffer */ |
481 | for (i = 0; i < mp->m_quotainfo->qi_dqperchunk; i++) { | 556 | for (i = 0; i < mp->m_quotainfo->qi_dqperchunk; i++) { |
482 | ddq = &d[i].dd_diskdq; | 557 | ddq = &d[i].dd_diskdq; |
483 | error = xfs_qm_dqcheck(mp, ddq, firstid + i, | 558 | error = xfs_qm_dqcheck(mp, ddq, firstid + i, |
484 | dqp->dq_flags & XFS_DQ_ALLTYPES, | 559 | dqp->dq_flags & XFS_DQ_ALLTYPES, |
485 | XFS_QMOPT_DQREPAIR, "xfs_qm_dqrepair"); | 560 | XFS_QMOPT_DQREPAIR, "xfs_qm_dqrepair"); |
486 | if (error) { | 561 | if (error) { |
487 | /* repair failed, we're screwed */ | 562 | /* repair failed, we're screwed */ |
488 | xfs_trans_brelse(tp, *bpp); | 563 | xfs_trans_brelse(tp, *bpp); |
489 | return XFS_ERROR(EIO); | 564 | return XFS_ERROR(EIO); |
490 | } | 565 | } |
491 | } | 566 | } |
492 | 567 | ||
493 | return 0; | 568 | return 0; |
494 | } | 569 | } |
495 | 570 | ||
496 | /* | 571 | /* |
497 | * Maps a dquot to the buffer containing its on-disk version. | 572 | * Maps a dquot to the buffer containing its on-disk version. |
498 | * This returns a ptr to the buffer containing the on-disk dquot | 573 | * This returns a ptr to the buffer containing the on-disk dquot |
499 | * in the bpp param, and a ptr to the on-disk dquot within that buffer | 574 | * in the bpp param, and a ptr to the on-disk dquot within that buffer |
500 | */ | 575 | */ |
501 | STATIC int | 576 | STATIC int |
502 | xfs_qm_dqtobp( | 577 | xfs_qm_dqtobp( |
503 | xfs_trans_t **tpp, | 578 | xfs_trans_t **tpp, |
504 | xfs_dquot_t *dqp, | 579 | xfs_dquot_t *dqp, |
505 | xfs_disk_dquot_t **O_ddpp, | 580 | xfs_disk_dquot_t **O_ddpp, |
506 | xfs_buf_t **O_bpp, | 581 | xfs_buf_t **O_bpp, |
507 | uint flags) | 582 | uint flags) |
508 | { | 583 | { |
509 | xfs_bmbt_irec_t map; | 584 | xfs_bmbt_irec_t map; |
510 | int nmaps = 1, error; | 585 | int nmaps = 1, error; |
511 | xfs_buf_t *bp; | 586 | xfs_buf_t *bp; |
512 | xfs_inode_t *quotip = XFS_DQ_TO_QIP(dqp); | 587 | xfs_inode_t *quotip = XFS_DQ_TO_QIP(dqp); |
513 | xfs_mount_t *mp = dqp->q_mount; | 588 | xfs_mount_t *mp = dqp->q_mount; |
514 | xfs_dqid_t id = be32_to_cpu(dqp->q_core.d_id); | 589 | xfs_dqid_t id = be32_to_cpu(dqp->q_core.d_id); |
515 | xfs_trans_t *tp = (tpp ? *tpp : NULL); | 590 | xfs_trans_t *tp = (tpp ? *tpp : NULL); |
516 | 591 | ||
517 | dqp->q_fileoffset = (xfs_fileoff_t)id / mp->m_quotainfo->qi_dqperchunk; | 592 | dqp->q_fileoffset = (xfs_fileoff_t)id / mp->m_quotainfo->qi_dqperchunk; |
518 | 593 | ||
519 | xfs_ilock(quotip, XFS_ILOCK_SHARED); | 594 | xfs_ilock(quotip, XFS_ILOCK_SHARED); |
520 | if (!xfs_this_quota_on(dqp->q_mount, dqp->dq_flags)) { | 595 | if (!xfs_this_quota_on(dqp->q_mount, dqp->dq_flags)) { |
521 | /* | 596 | /* |
522 | * Return if this type of quotas is turned off while we | 597 | * Return if this type of quotas is turned off while we |
523 | * didn't have the quota inode lock. | 598 | * didn't have the quota inode lock. |
524 | */ | 599 | */ |
525 | xfs_iunlock(quotip, XFS_ILOCK_SHARED); | 600 | xfs_iunlock(quotip, XFS_ILOCK_SHARED); |
526 | return ESRCH; | 601 | return ESRCH; |
527 | } | 602 | } |
528 | 603 | ||
529 | /* | 604 | /* |
530 | * Find the block map; no allocations yet | 605 | * Find the block map; no allocations yet |
531 | */ | 606 | */ |
532 | error = xfs_bmapi_read(quotip, dqp->q_fileoffset, | 607 | error = xfs_bmapi_read(quotip, dqp->q_fileoffset, |
533 | XFS_DQUOT_CLUSTER_SIZE_FSB, &map, &nmaps, 0); | 608 | XFS_DQUOT_CLUSTER_SIZE_FSB, &map, &nmaps, 0); |
534 | 609 | ||
535 | xfs_iunlock(quotip, XFS_ILOCK_SHARED); | 610 | xfs_iunlock(quotip, XFS_ILOCK_SHARED); |
536 | if (error) | 611 | if (error) |
537 | return error; | 612 | return error; |
538 | 613 | ||
539 | ASSERT(nmaps == 1); | 614 | ASSERT(nmaps == 1); |
540 | ASSERT(map.br_blockcount == 1); | 615 | ASSERT(map.br_blockcount == 1); |
541 | 616 | ||
542 | /* | 617 | /* |
543 | * Offset of dquot in the (fixed sized) dquot chunk. | 618 | * Offset of dquot in the (fixed sized) dquot chunk. |
544 | */ | 619 | */ |
545 | dqp->q_bufoffset = (id % mp->m_quotainfo->qi_dqperchunk) * | 620 | dqp->q_bufoffset = (id % mp->m_quotainfo->qi_dqperchunk) * |
546 | sizeof(xfs_dqblk_t); | 621 | sizeof(xfs_dqblk_t); |
547 | 622 | ||
548 | ASSERT(map.br_startblock != DELAYSTARTBLOCK); | 623 | ASSERT(map.br_startblock != DELAYSTARTBLOCK); |
549 | if (map.br_startblock == HOLESTARTBLOCK) { | 624 | if (map.br_startblock == HOLESTARTBLOCK) { |
550 | /* | 625 | /* |
551 | * We don't allocate unless we're asked to | 626 | * We don't allocate unless we're asked to |
552 | */ | 627 | */ |
553 | if (!(flags & XFS_QMOPT_DQALLOC)) | 628 | if (!(flags & XFS_QMOPT_DQALLOC)) |
554 | return ENOENT; | 629 | return ENOENT; |
555 | 630 | ||
556 | ASSERT(tp); | 631 | ASSERT(tp); |
557 | error = xfs_qm_dqalloc(tpp, mp, dqp, quotip, | 632 | error = xfs_qm_dqalloc(tpp, mp, dqp, quotip, |
558 | dqp->q_fileoffset, &bp); | 633 | dqp->q_fileoffset, &bp); |
559 | if (error) | 634 | if (error) |
560 | return error; | 635 | return error; |
561 | tp = *tpp; | 636 | tp = *tpp; |
562 | } else { | 637 | } else { |
563 | trace_xfs_dqtobp_read(dqp); | 638 | trace_xfs_dqtobp_read(dqp); |
564 | 639 | ||
565 | /* | 640 | /* |
566 | * store the blkno etc so that we don't have to do the | 641 | * store the blkno etc so that we don't have to do the |
567 | * mapping all the time | 642 | * mapping all the time |
568 | */ | 643 | */ |
569 | dqp->q_blkno = XFS_FSB_TO_DADDR(mp, map.br_startblock); | 644 | dqp->q_blkno = XFS_FSB_TO_DADDR(mp, map.br_startblock); |
570 | 645 | ||
571 | error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, | 646 | error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, |
572 | dqp->q_blkno, | 647 | dqp->q_blkno, |
573 | mp->m_quotainfo->qi_dqchunklen, | 648 | mp->m_quotainfo->qi_dqchunklen, |
574 | 0, &bp, &xfs_dquot_buf_ops); | 649 | 0, &bp, &xfs_dquot_buf_ops); |
575 | 650 | ||
576 | if (error == EFSCORRUPTED && (flags & XFS_QMOPT_DQREPAIR)) { | 651 | if (error == EFSCORRUPTED && (flags & XFS_QMOPT_DQREPAIR)) { |
577 | xfs_dqid_t firstid = (xfs_dqid_t)map.br_startoff * | 652 | xfs_dqid_t firstid = (xfs_dqid_t)map.br_startoff * |
578 | mp->m_quotainfo->qi_dqperchunk; | 653 | mp->m_quotainfo->qi_dqperchunk; |
579 | ASSERT(bp == NULL); | 654 | ASSERT(bp == NULL); |
580 | error = xfs_qm_dqrepair(mp, tp, dqp, firstid, &bp); | 655 | error = xfs_qm_dqrepair(mp, tp, dqp, firstid, &bp); |
581 | } | 656 | } |
582 | 657 | ||
583 | if (error) { | 658 | if (error) { |
584 | ASSERT(bp == NULL); | 659 | ASSERT(bp == NULL); |
585 | return XFS_ERROR(error); | 660 | return XFS_ERROR(error); |
586 | } | 661 | } |
587 | } | 662 | } |
588 | 663 | ||
589 | ASSERT(xfs_buf_islocked(bp)); | 664 | ASSERT(xfs_buf_islocked(bp)); |
590 | *O_bpp = bp; | 665 | *O_bpp = bp; |
591 | *O_ddpp = bp->b_addr + dqp->q_bufoffset; | 666 | *O_ddpp = bp->b_addr + dqp->q_bufoffset; |
592 | 667 | ||
593 | return (0); | 668 | return (0); |
594 | } | 669 | } |
595 | 670 | ||
596 | 671 | ||
597 | /* | 672 | /* |
598 | * Read in the ondisk dquot using dqtobp() then copy it to an incore version, | 673 | * Read in the ondisk dquot using dqtobp() then copy it to an incore version, |
599 | * and release the buffer immediately. | 674 | * and release the buffer immediately. |
600 | * | 675 | * |
601 | * If XFS_QMOPT_DQALLOC is set, allocate a dquot on disk if it needed. | 676 | * If XFS_QMOPT_DQALLOC is set, allocate a dquot on disk if it needed. |
602 | */ | 677 | */ |
603 | int | 678 | int |
604 | xfs_qm_dqread( | 679 | xfs_qm_dqread( |
605 | struct xfs_mount *mp, | 680 | struct xfs_mount *mp, |
606 | xfs_dqid_t id, | 681 | xfs_dqid_t id, |
607 | uint type, | 682 | uint type, |
608 | uint flags, | 683 | uint flags, |
609 | struct xfs_dquot **O_dqpp) | 684 | struct xfs_dquot **O_dqpp) |
610 | { | 685 | { |
611 | struct xfs_dquot *dqp; | 686 | struct xfs_dquot *dqp; |
612 | struct xfs_disk_dquot *ddqp; | 687 | struct xfs_disk_dquot *ddqp; |
613 | struct xfs_buf *bp; | 688 | struct xfs_buf *bp; |
614 | struct xfs_trans *tp = NULL; | 689 | struct xfs_trans *tp = NULL; |
615 | int error; | 690 | int error; |
616 | int cancelflags = 0; | 691 | int cancelflags = 0; |
617 | 692 | ||
618 | 693 | ||
619 | dqp = kmem_zone_zalloc(xfs_qm_dqzone, KM_SLEEP); | 694 | dqp = kmem_zone_zalloc(xfs_qm_dqzone, KM_SLEEP); |
620 | 695 | ||
621 | dqp->dq_flags = type; | 696 | dqp->dq_flags = type; |
622 | dqp->q_core.d_id = cpu_to_be32(id); | 697 | dqp->q_core.d_id = cpu_to_be32(id); |
623 | dqp->q_mount = mp; | 698 | dqp->q_mount = mp; |
624 | INIT_LIST_HEAD(&dqp->q_lru); | 699 | INIT_LIST_HEAD(&dqp->q_lru); |
625 | mutex_init(&dqp->q_qlock); | 700 | mutex_init(&dqp->q_qlock); |
626 | init_waitqueue_head(&dqp->q_pinwait); | 701 | init_waitqueue_head(&dqp->q_pinwait); |
627 | 702 | ||
628 | /* | 703 | /* |
629 | * Because we want to use a counting completion, complete | 704 | * Because we want to use a counting completion, complete |
630 | * the flush completion once to allow a single access to | 705 | * the flush completion once to allow a single access to |
631 | * the flush completion without blocking. | 706 | * the flush completion without blocking. |
632 | */ | 707 | */ |
633 | init_completion(&dqp->q_flush); | 708 | init_completion(&dqp->q_flush); |
634 | complete(&dqp->q_flush); | 709 | complete(&dqp->q_flush); |
635 | 710 | ||
636 | /* | 711 | /* |
637 | * Make sure group quotas have a different lock class than user | 712 | * Make sure group quotas have a different lock class than user |
638 | * quotas. | 713 | * quotas. |
639 | */ | 714 | */ |
640 | if (!(type & XFS_DQ_USER)) | 715 | if (!(type & XFS_DQ_USER)) |
641 | lockdep_set_class(&dqp->q_qlock, &xfs_dquot_other_class); | 716 | lockdep_set_class(&dqp->q_qlock, &xfs_dquot_other_class); |
642 | 717 | ||
643 | XFS_STATS_INC(xs_qm_dquot); | 718 | XFS_STATS_INC(xs_qm_dquot); |
644 | 719 | ||
645 | trace_xfs_dqread(dqp); | 720 | trace_xfs_dqread(dqp); |
646 | 721 | ||
647 | if (flags & XFS_QMOPT_DQALLOC) { | 722 | if (flags & XFS_QMOPT_DQALLOC) { |
648 | tp = xfs_trans_alloc(mp, XFS_TRANS_QM_DQALLOC); | 723 | tp = xfs_trans_alloc(mp, XFS_TRANS_QM_DQALLOC); |
649 | error = xfs_trans_reserve(tp, XFS_QM_DQALLOC_SPACE_RES(mp), | 724 | error = xfs_trans_reserve(tp, XFS_QM_DQALLOC_SPACE_RES(mp), |
650 | XFS_QM_DQALLOC_LOG_RES(mp), 0, | 725 | XFS_QM_DQALLOC_LOG_RES(mp), 0, |
651 | XFS_TRANS_PERM_LOG_RES, | 726 | XFS_TRANS_PERM_LOG_RES, |
652 | XFS_WRITE_LOG_COUNT); | 727 | XFS_WRITE_LOG_COUNT); |
653 | if (error) | 728 | if (error) |
654 | goto error1; | 729 | goto error1; |
655 | cancelflags = XFS_TRANS_RELEASE_LOG_RES; | 730 | cancelflags = XFS_TRANS_RELEASE_LOG_RES; |
656 | } | 731 | } |
657 | 732 | ||
658 | /* | 733 | /* |
659 | * get a pointer to the on-disk dquot and the buffer containing it | 734 | * get a pointer to the on-disk dquot and the buffer containing it |
660 | * dqp already knows its own type (GROUP/USER). | 735 | * dqp already knows its own type (GROUP/USER). |
661 | */ | 736 | */ |
662 | error = xfs_qm_dqtobp(&tp, dqp, &ddqp, &bp, flags); | 737 | error = xfs_qm_dqtobp(&tp, dqp, &ddqp, &bp, flags); |
663 | if (error) { | 738 | if (error) { |
664 | /* | 739 | /* |
665 | * This can happen if quotas got turned off (ESRCH), | 740 | * This can happen if quotas got turned off (ESRCH), |
666 | * or if the dquot didn't exist on disk and we ask to | 741 | * or if the dquot didn't exist on disk and we ask to |
667 | * allocate (ENOENT). | 742 | * allocate (ENOENT). |
668 | */ | 743 | */ |
669 | trace_xfs_dqread_fail(dqp); | 744 | trace_xfs_dqread_fail(dqp); |
670 | cancelflags |= XFS_TRANS_ABORT; | 745 | cancelflags |= XFS_TRANS_ABORT; |
671 | goto error1; | 746 | goto error1; |
672 | } | 747 | } |
673 | 748 | ||
674 | /* copy everything from disk dquot to the incore dquot */ | 749 | /* copy everything from disk dquot to the incore dquot */ |
675 | memcpy(&dqp->q_core, ddqp, sizeof(xfs_disk_dquot_t)); | 750 | memcpy(&dqp->q_core, ddqp, sizeof(xfs_disk_dquot_t)); |
676 | xfs_qm_dquot_logitem_init(dqp); | 751 | xfs_qm_dquot_logitem_init(dqp); |
677 | 752 | ||
678 | /* | 753 | /* |
679 | * Reservation counters are defined as reservation plus current usage | 754 | * Reservation counters are defined as reservation plus current usage |
680 | * to avoid having to add every time. | 755 | * to avoid having to add every time. |
681 | */ | 756 | */ |
682 | dqp->q_res_bcount = be64_to_cpu(ddqp->d_bcount); | 757 | dqp->q_res_bcount = be64_to_cpu(ddqp->d_bcount); |
683 | dqp->q_res_icount = be64_to_cpu(ddqp->d_icount); | 758 | dqp->q_res_icount = be64_to_cpu(ddqp->d_icount); |
684 | dqp->q_res_rtbcount = be64_to_cpu(ddqp->d_rtbcount); | 759 | dqp->q_res_rtbcount = be64_to_cpu(ddqp->d_rtbcount); |
685 | 760 | ||
686 | /* initialize the dquot speculative prealloc thresholds */ | 761 | /* initialize the dquot speculative prealloc thresholds */ |
687 | xfs_dquot_set_prealloc_limits(dqp); | 762 | xfs_dquot_set_prealloc_limits(dqp); |
688 | 763 | ||
689 | /* Mark the buf so that this will stay incore a little longer */ | 764 | /* Mark the buf so that this will stay incore a little longer */ |
690 | xfs_buf_set_ref(bp, XFS_DQUOT_REF); | 765 | xfs_buf_set_ref(bp, XFS_DQUOT_REF); |
691 | 766 | ||
692 | /* | 767 | /* |
693 | * We got the buffer with a xfs_trans_read_buf() (in dqtobp()) | 768 | * We got the buffer with a xfs_trans_read_buf() (in dqtobp()) |
694 | * So we need to release with xfs_trans_brelse(). | 769 | * So we need to release with xfs_trans_brelse(). |
695 | * The strategy here is identical to that of inodes; we lock | 770 | * The strategy here is identical to that of inodes; we lock |
696 | * the dquot in xfs_qm_dqget() before making it accessible to | 771 | * the dquot in xfs_qm_dqget() before making it accessible to |
697 | * others. This is because dquots, like inodes, need a good level of | 772 | * others. This is because dquots, like inodes, need a good level of |
698 | * concurrency, and we don't want to take locks on the entire buffers | 773 | * concurrency, and we don't want to take locks on the entire buffers |
699 | * for dquot accesses. | 774 | * for dquot accesses. |
700 | * Note also that the dquot buffer may even be dirty at this point, if | 775 | * Note also that the dquot buffer may even be dirty at this point, if |
701 | * this particular dquot was repaired. We still aren't afraid to | 776 | * this particular dquot was repaired. We still aren't afraid to |
702 | * brelse it because we have the changes incore. | 777 | * brelse it because we have the changes incore. |
703 | */ | 778 | */ |
704 | ASSERT(xfs_buf_islocked(bp)); | 779 | ASSERT(xfs_buf_islocked(bp)); |
705 | xfs_trans_brelse(tp, bp); | 780 | xfs_trans_brelse(tp, bp); |
706 | 781 | ||
707 | if (tp) { | 782 | if (tp) { |
708 | error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); | 783 | error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); |
709 | if (error) | 784 | if (error) |
710 | goto error0; | 785 | goto error0; |
711 | } | 786 | } |
712 | 787 | ||
713 | *O_dqpp = dqp; | 788 | *O_dqpp = dqp; |
714 | return error; | 789 | return error; |
715 | 790 | ||
716 | error1: | 791 | error1: |
717 | if (tp) | 792 | if (tp) |
718 | xfs_trans_cancel(tp, cancelflags); | 793 | xfs_trans_cancel(tp, cancelflags); |
719 | error0: | 794 | error0: |
720 | xfs_qm_dqdestroy(dqp); | 795 | xfs_qm_dqdestroy(dqp); |
721 | *O_dqpp = NULL; | 796 | *O_dqpp = NULL; |
722 | return error; | 797 | return error; |
723 | } | 798 | } |
724 | 799 | ||
725 | /* | 800 | /* |
726 | * Given the file system, inode OR id, and type (UDQUOT/GDQUOT), return a | 801 | * Given the file system, inode OR id, and type (UDQUOT/GDQUOT), return a |
727 | * a locked dquot, doing an allocation (if requested) as needed. | 802 | * a locked dquot, doing an allocation (if requested) as needed. |
728 | * When both an inode and an id are given, the inode's id takes precedence. | 803 | * When both an inode and an id are given, the inode's id takes precedence. |
729 | * That is, if the id changes while we don't hold the ilock inside this | 804 | * That is, if the id changes while we don't hold the ilock inside this |
730 | * function, the new dquot is returned, not necessarily the one requested | 805 | * function, the new dquot is returned, not necessarily the one requested |
731 | * in the id argument. | 806 | * in the id argument. |
732 | */ | 807 | */ |
733 | int | 808 | int |
734 | xfs_qm_dqget( | 809 | xfs_qm_dqget( |
735 | xfs_mount_t *mp, | 810 | xfs_mount_t *mp, |
736 | xfs_inode_t *ip, /* locked inode (optional) */ | 811 | xfs_inode_t *ip, /* locked inode (optional) */ |
737 | xfs_dqid_t id, /* uid/projid/gid depending on type */ | 812 | xfs_dqid_t id, /* uid/projid/gid depending on type */ |
738 | uint type, /* XFS_DQ_USER/XFS_DQ_PROJ/XFS_DQ_GROUP */ | 813 | uint type, /* XFS_DQ_USER/XFS_DQ_PROJ/XFS_DQ_GROUP */ |
739 | uint flags, /* DQALLOC, DQSUSER, DQREPAIR, DOWARN */ | 814 | uint flags, /* DQALLOC, DQSUSER, DQREPAIR, DOWARN */ |
740 | xfs_dquot_t **O_dqpp) /* OUT : locked incore dquot */ | 815 | xfs_dquot_t **O_dqpp) /* OUT : locked incore dquot */ |
741 | { | 816 | { |
742 | struct xfs_quotainfo *qi = mp->m_quotainfo; | 817 | struct xfs_quotainfo *qi = mp->m_quotainfo; |
743 | struct radix_tree_root *tree = XFS_DQUOT_TREE(qi, type); | 818 | struct radix_tree_root *tree = XFS_DQUOT_TREE(qi, type); |
744 | struct xfs_dquot *dqp; | 819 | struct xfs_dquot *dqp; |
745 | int error; | 820 | int error; |
746 | 821 | ||
747 | ASSERT(XFS_IS_QUOTA_RUNNING(mp)); | 822 | ASSERT(XFS_IS_QUOTA_RUNNING(mp)); |
748 | if ((! XFS_IS_UQUOTA_ON(mp) && type == XFS_DQ_USER) || | 823 | if ((! XFS_IS_UQUOTA_ON(mp) && type == XFS_DQ_USER) || |
749 | (! XFS_IS_PQUOTA_ON(mp) && type == XFS_DQ_PROJ) || | 824 | (! XFS_IS_PQUOTA_ON(mp) && type == XFS_DQ_PROJ) || |
750 | (! XFS_IS_GQUOTA_ON(mp) && type == XFS_DQ_GROUP)) { | 825 | (! XFS_IS_GQUOTA_ON(mp) && type == XFS_DQ_GROUP)) { |
751 | return (ESRCH); | 826 | return (ESRCH); |
752 | } | 827 | } |
753 | 828 | ||
754 | #ifdef DEBUG | 829 | #ifdef DEBUG |
755 | if (xfs_do_dqerror) { | 830 | if (xfs_do_dqerror) { |
756 | if ((xfs_dqerror_target == mp->m_ddev_targp) && | 831 | if ((xfs_dqerror_target == mp->m_ddev_targp) && |
757 | (xfs_dqreq_num++ % xfs_dqerror_mod) == 0) { | 832 | (xfs_dqreq_num++ % xfs_dqerror_mod) == 0) { |
758 | xfs_debug(mp, "Returning error in dqget"); | 833 | xfs_debug(mp, "Returning error in dqget"); |
759 | return (EIO); | 834 | return (EIO); |
760 | } | 835 | } |
761 | } | 836 | } |
762 | 837 | ||
763 | ASSERT(type == XFS_DQ_USER || | 838 | ASSERT(type == XFS_DQ_USER || |
764 | type == XFS_DQ_PROJ || | 839 | type == XFS_DQ_PROJ || |
765 | type == XFS_DQ_GROUP); | 840 | type == XFS_DQ_GROUP); |
766 | if (ip) { | 841 | if (ip) { |
767 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); | 842 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); |
768 | ASSERT(xfs_inode_dquot(ip, type) == NULL); | 843 | ASSERT(xfs_inode_dquot(ip, type) == NULL); |
769 | } | 844 | } |
770 | #endif | 845 | #endif |
771 | 846 | ||
772 | restart: | 847 | restart: |
773 | mutex_lock(&qi->qi_tree_lock); | 848 | mutex_lock(&qi->qi_tree_lock); |
774 | dqp = radix_tree_lookup(tree, id); | 849 | dqp = radix_tree_lookup(tree, id); |
775 | if (dqp) { | 850 | if (dqp) { |
776 | xfs_dqlock(dqp); | 851 | xfs_dqlock(dqp); |
777 | if (dqp->dq_flags & XFS_DQ_FREEING) { | 852 | if (dqp->dq_flags & XFS_DQ_FREEING) { |
778 | xfs_dqunlock(dqp); | 853 | xfs_dqunlock(dqp); |
779 | mutex_unlock(&qi->qi_tree_lock); | 854 | mutex_unlock(&qi->qi_tree_lock); |
780 | trace_xfs_dqget_freeing(dqp); | 855 | trace_xfs_dqget_freeing(dqp); |
781 | delay(1); | 856 | delay(1); |
782 | goto restart; | 857 | goto restart; |
783 | } | 858 | } |
784 | 859 | ||
785 | dqp->q_nrefs++; | 860 | dqp->q_nrefs++; |
786 | mutex_unlock(&qi->qi_tree_lock); | 861 | mutex_unlock(&qi->qi_tree_lock); |
787 | 862 | ||
788 | trace_xfs_dqget_hit(dqp); | 863 | trace_xfs_dqget_hit(dqp); |
789 | XFS_STATS_INC(xs_qm_dqcachehits); | 864 | XFS_STATS_INC(xs_qm_dqcachehits); |
790 | *O_dqpp = dqp; | 865 | *O_dqpp = dqp; |
791 | return 0; | 866 | return 0; |
792 | } | 867 | } |
793 | mutex_unlock(&qi->qi_tree_lock); | 868 | mutex_unlock(&qi->qi_tree_lock); |
794 | XFS_STATS_INC(xs_qm_dqcachemisses); | 869 | XFS_STATS_INC(xs_qm_dqcachemisses); |
795 | 870 | ||
796 | /* | 871 | /* |
797 | * Dquot cache miss. We don't want to keep the inode lock across | 872 | * Dquot cache miss. We don't want to keep the inode lock across |
798 | * a (potential) disk read. Also we don't want to deal with the lock | 873 | * a (potential) disk read. Also we don't want to deal with the lock |
799 | * ordering between quotainode and this inode. OTOH, dropping the inode | 874 | * ordering between quotainode and this inode. OTOH, dropping the inode |
800 | * lock here means dealing with a chown that can happen before | 875 | * lock here means dealing with a chown that can happen before |
801 | * we re-acquire the lock. | 876 | * we re-acquire the lock. |
802 | */ | 877 | */ |
803 | if (ip) | 878 | if (ip) |
804 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | 879 | xfs_iunlock(ip, XFS_ILOCK_EXCL); |
805 | 880 | ||
806 | error = xfs_qm_dqread(mp, id, type, flags, &dqp); | 881 | error = xfs_qm_dqread(mp, id, type, flags, &dqp); |
807 | 882 | ||
808 | if (ip) | 883 | if (ip) |
809 | xfs_ilock(ip, XFS_ILOCK_EXCL); | 884 | xfs_ilock(ip, XFS_ILOCK_EXCL); |
810 | 885 | ||
811 | if (error) | 886 | if (error) |
812 | return error; | 887 | return error; |
813 | 888 | ||
814 | if (ip) { | 889 | if (ip) { |
815 | /* | 890 | /* |
816 | * A dquot could be attached to this inode by now, since | 891 | * A dquot could be attached to this inode by now, since |
817 | * we had dropped the ilock. | 892 | * we had dropped the ilock. |
818 | */ | 893 | */ |
819 | if (xfs_this_quota_on(mp, type)) { | 894 | if (xfs_this_quota_on(mp, type)) { |
820 | struct xfs_dquot *dqp1; | 895 | struct xfs_dquot *dqp1; |
821 | 896 | ||
822 | dqp1 = xfs_inode_dquot(ip, type); | 897 | dqp1 = xfs_inode_dquot(ip, type); |
823 | if (dqp1) { | 898 | if (dqp1) { |
824 | xfs_qm_dqdestroy(dqp); | 899 | xfs_qm_dqdestroy(dqp); |
825 | dqp = dqp1; | 900 | dqp = dqp1; |
826 | xfs_dqlock(dqp); | 901 | xfs_dqlock(dqp); |
827 | goto dqret; | 902 | goto dqret; |
828 | } | 903 | } |
829 | } else { | 904 | } else { |
830 | /* inode stays locked on return */ | 905 | /* inode stays locked on return */ |
831 | xfs_qm_dqdestroy(dqp); | 906 | xfs_qm_dqdestroy(dqp); |
832 | return XFS_ERROR(ESRCH); | 907 | return XFS_ERROR(ESRCH); |
833 | } | 908 | } |
834 | } | 909 | } |
835 | 910 | ||
836 | mutex_lock(&qi->qi_tree_lock); | 911 | mutex_lock(&qi->qi_tree_lock); |
837 | error = -radix_tree_insert(tree, id, dqp); | 912 | error = -radix_tree_insert(tree, id, dqp); |
838 | if (unlikely(error)) { | 913 | if (unlikely(error)) { |
839 | WARN_ON(error != EEXIST); | 914 | WARN_ON(error != EEXIST); |
840 | 915 | ||
841 | /* | 916 | /* |
842 | * Duplicate found. Just throw away the new dquot and start | 917 | * Duplicate found. Just throw away the new dquot and start |
843 | * over. | 918 | * over. |
844 | */ | 919 | */ |
845 | mutex_unlock(&qi->qi_tree_lock); | 920 | mutex_unlock(&qi->qi_tree_lock); |
846 | trace_xfs_dqget_dup(dqp); | 921 | trace_xfs_dqget_dup(dqp); |
847 | xfs_qm_dqdestroy(dqp); | 922 | xfs_qm_dqdestroy(dqp); |
848 | XFS_STATS_INC(xs_qm_dquot_dups); | 923 | XFS_STATS_INC(xs_qm_dquot_dups); |
849 | goto restart; | 924 | goto restart; |
850 | } | 925 | } |
851 | 926 | ||
852 | /* | 927 | /* |
853 | * We return a locked dquot to the caller, with a reference taken | 928 | * We return a locked dquot to the caller, with a reference taken |
854 | */ | 929 | */ |
855 | xfs_dqlock(dqp); | 930 | xfs_dqlock(dqp); |
856 | dqp->q_nrefs = 1; | 931 | dqp->q_nrefs = 1; |
857 | 932 | ||
858 | qi->qi_dquots++; | 933 | qi->qi_dquots++; |
859 | mutex_unlock(&qi->qi_tree_lock); | 934 | mutex_unlock(&qi->qi_tree_lock); |
860 | 935 | ||
861 | dqret: | 936 | dqret: |
862 | ASSERT((ip == NULL) || xfs_isilocked(ip, XFS_ILOCK_EXCL)); | 937 | ASSERT((ip == NULL) || xfs_isilocked(ip, XFS_ILOCK_EXCL)); |
863 | trace_xfs_dqget_miss(dqp); | 938 | trace_xfs_dqget_miss(dqp); |
864 | *O_dqpp = dqp; | 939 | *O_dqpp = dqp; |
865 | return (0); | 940 | return (0); |
866 | } | 941 | } |
867 | 942 | ||
868 | 943 | ||
869 | STATIC void | 944 | STATIC void |
870 | xfs_qm_dqput_final( | 945 | xfs_qm_dqput_final( |
871 | struct xfs_dquot *dqp) | 946 | struct xfs_dquot *dqp) |
872 | { | 947 | { |
873 | struct xfs_quotainfo *qi = dqp->q_mount->m_quotainfo; | 948 | struct xfs_quotainfo *qi = dqp->q_mount->m_quotainfo; |
874 | struct xfs_dquot *gdqp; | 949 | struct xfs_dquot *gdqp; |
875 | 950 | ||
876 | trace_xfs_dqput_free(dqp); | 951 | trace_xfs_dqput_free(dqp); |
877 | 952 | ||
878 | mutex_lock(&qi->qi_lru_lock); | 953 | mutex_lock(&qi->qi_lru_lock); |
879 | if (list_empty(&dqp->q_lru)) { | 954 | if (list_empty(&dqp->q_lru)) { |
880 | list_add_tail(&dqp->q_lru, &qi->qi_lru_list); | 955 | list_add_tail(&dqp->q_lru, &qi->qi_lru_list); |
881 | qi->qi_lru_count++; | 956 | qi->qi_lru_count++; |
882 | XFS_STATS_INC(xs_qm_dquot_unused); | 957 | XFS_STATS_INC(xs_qm_dquot_unused); |
883 | } | 958 | } |
884 | mutex_unlock(&qi->qi_lru_lock); | 959 | mutex_unlock(&qi->qi_lru_lock); |
885 | 960 | ||
886 | /* | 961 | /* |
887 | * If we just added a udquot to the freelist, then we want to release | 962 | * If we just added a udquot to the freelist, then we want to release |
888 | * the gdquot reference that it (probably) has. Otherwise it'll keep | 963 | * the gdquot reference that it (probably) has. Otherwise it'll keep |
889 | * the gdquot from getting reclaimed. | 964 | * the gdquot from getting reclaimed. |
890 | */ | 965 | */ |
891 | gdqp = dqp->q_gdquot; | 966 | gdqp = dqp->q_gdquot; |
892 | if (gdqp) { | 967 | if (gdqp) { |
893 | xfs_dqlock(gdqp); | 968 | xfs_dqlock(gdqp); |
894 | dqp->q_gdquot = NULL; | 969 | dqp->q_gdquot = NULL; |
895 | } | 970 | } |
896 | xfs_dqunlock(dqp); | 971 | xfs_dqunlock(dqp); |
897 | 972 | ||
898 | /* | 973 | /* |
899 | * If we had a group quota hint, release it now. | 974 | * If we had a group quota hint, release it now. |
900 | */ | 975 | */ |
901 | if (gdqp) | 976 | if (gdqp) |
902 | xfs_qm_dqput(gdqp); | 977 | xfs_qm_dqput(gdqp); |
903 | } | 978 | } |
904 | 979 | ||
905 | /* | 980 | /* |
906 | * Release a reference to the dquot (decrement ref-count) and unlock it. | 981 | * Release a reference to the dquot (decrement ref-count) and unlock it. |
907 | * | 982 | * |
908 | * If there is a group quota attached to this dquot, carefully release that | 983 | * If there is a group quota attached to this dquot, carefully release that |
909 | * too without tripping over deadlocks'n'stuff. | 984 | * too without tripping over deadlocks'n'stuff. |
910 | */ | 985 | */ |
911 | void | 986 | void |
912 | xfs_qm_dqput( | 987 | xfs_qm_dqput( |
913 | struct xfs_dquot *dqp) | 988 | struct xfs_dquot *dqp) |
914 | { | 989 | { |
915 | ASSERT(dqp->q_nrefs > 0); | 990 | ASSERT(dqp->q_nrefs > 0); |
916 | ASSERT(XFS_DQ_IS_LOCKED(dqp)); | 991 | ASSERT(XFS_DQ_IS_LOCKED(dqp)); |
917 | 992 | ||
918 | trace_xfs_dqput(dqp); | 993 | trace_xfs_dqput(dqp); |
919 | 994 | ||
920 | if (--dqp->q_nrefs > 0) | 995 | if (--dqp->q_nrefs > 0) |
921 | xfs_dqunlock(dqp); | 996 | xfs_dqunlock(dqp); |
922 | else | 997 | else |
923 | xfs_qm_dqput_final(dqp); | 998 | xfs_qm_dqput_final(dqp); |
924 | } | 999 | } |
925 | 1000 | ||
926 | /* | 1001 | /* |
927 | * Release a dquot. Flush it if dirty, then dqput() it. | 1002 | * Release a dquot. Flush it if dirty, then dqput() it. |
928 | * dquot must not be locked. | 1003 | * dquot must not be locked. |
929 | */ | 1004 | */ |
930 | void | 1005 | void |
931 | xfs_qm_dqrele( | 1006 | xfs_qm_dqrele( |
932 | xfs_dquot_t *dqp) | 1007 | xfs_dquot_t *dqp) |
933 | { | 1008 | { |
934 | if (!dqp) | 1009 | if (!dqp) |
935 | return; | 1010 | return; |
936 | 1011 | ||
937 | trace_xfs_dqrele(dqp); | 1012 | trace_xfs_dqrele(dqp); |
938 | 1013 | ||
939 | xfs_dqlock(dqp); | 1014 | xfs_dqlock(dqp); |
940 | /* | 1015 | /* |
941 | * We don't care to flush it if the dquot is dirty here. | 1016 | * We don't care to flush it if the dquot is dirty here. |
942 | * That will create stutters that we want to avoid. | 1017 | * That will create stutters that we want to avoid. |
943 | * Instead we do a delayed write when we try to reclaim | 1018 | * Instead we do a delayed write when we try to reclaim |
944 | * a dirty dquot. Also xfs_sync will take part of the burden... | 1019 | * a dirty dquot. Also xfs_sync will take part of the burden... |
945 | */ | 1020 | */ |
946 | xfs_qm_dqput(dqp); | 1021 | xfs_qm_dqput(dqp); |
947 | } | 1022 | } |
948 | 1023 | ||
949 | /* | 1024 | /* |
950 | * This is the dquot flushing I/O completion routine. It is called | 1025 | * This is the dquot flushing I/O completion routine. It is called |
951 | * from interrupt level when the buffer containing the dquot is | 1026 | * from interrupt level when the buffer containing the dquot is |
952 | * flushed to disk. It is responsible for removing the dquot logitem | 1027 | * flushed to disk. It is responsible for removing the dquot logitem |
953 | * from the AIL if it has not been re-logged, and unlocking the dquot's | 1028 | * from the AIL if it has not been re-logged, and unlocking the dquot's |
954 | * flush lock. This behavior is very similar to that of inodes.. | 1029 | * flush lock. This behavior is very similar to that of inodes.. |
955 | */ | 1030 | */ |
956 | STATIC void | 1031 | STATIC void |
957 | xfs_qm_dqflush_done( | 1032 | xfs_qm_dqflush_done( |
958 | struct xfs_buf *bp, | 1033 | struct xfs_buf *bp, |
959 | struct xfs_log_item *lip) | 1034 | struct xfs_log_item *lip) |
960 | { | 1035 | { |
961 | xfs_dq_logitem_t *qip = (struct xfs_dq_logitem *)lip; | 1036 | xfs_dq_logitem_t *qip = (struct xfs_dq_logitem *)lip; |
962 | xfs_dquot_t *dqp = qip->qli_dquot; | 1037 | xfs_dquot_t *dqp = qip->qli_dquot; |
963 | struct xfs_ail *ailp = lip->li_ailp; | 1038 | struct xfs_ail *ailp = lip->li_ailp; |
964 | 1039 | ||
965 | /* | 1040 | /* |
966 | * We only want to pull the item from the AIL if its | 1041 | * We only want to pull the item from the AIL if its |
967 | * location in the log has not changed since we started the flush. | 1042 | * location in the log has not changed since we started the flush. |
968 | * Thus, we only bother if the dquot's lsn has | 1043 | * Thus, we only bother if the dquot's lsn has |
969 | * not changed. First we check the lsn outside the lock | 1044 | * not changed. First we check the lsn outside the lock |
970 | * since it's cheaper, and then we recheck while | 1045 | * since it's cheaper, and then we recheck while |
971 | * holding the lock before removing the dquot from the AIL. | 1046 | * holding the lock before removing the dquot from the AIL. |
972 | */ | 1047 | */ |
973 | if ((lip->li_flags & XFS_LI_IN_AIL) && | 1048 | if ((lip->li_flags & XFS_LI_IN_AIL) && |
974 | lip->li_lsn == qip->qli_flush_lsn) { | 1049 | lip->li_lsn == qip->qli_flush_lsn) { |
975 | 1050 | ||
976 | /* xfs_trans_ail_delete() drops the AIL lock. */ | 1051 | /* xfs_trans_ail_delete() drops the AIL lock. */ |
977 | spin_lock(&ailp->xa_lock); | 1052 | spin_lock(&ailp->xa_lock); |
978 | if (lip->li_lsn == qip->qli_flush_lsn) | 1053 | if (lip->li_lsn == qip->qli_flush_lsn) |
979 | xfs_trans_ail_delete(ailp, lip, SHUTDOWN_CORRUPT_INCORE); | 1054 | xfs_trans_ail_delete(ailp, lip, SHUTDOWN_CORRUPT_INCORE); |
980 | else | 1055 | else |
981 | spin_unlock(&ailp->xa_lock); | 1056 | spin_unlock(&ailp->xa_lock); |
982 | } | 1057 | } |
983 | 1058 | ||
984 | /* | 1059 | /* |
985 | * Release the dq's flush lock since we're done with it. | 1060 | * Release the dq's flush lock since we're done with it. |
986 | */ | 1061 | */ |
987 | xfs_dqfunlock(dqp); | 1062 | xfs_dqfunlock(dqp); |
988 | } | 1063 | } |
989 | 1064 | ||
990 | /* | 1065 | /* |
991 | * Write a modified dquot to disk. | 1066 | * Write a modified dquot to disk. |
992 | * The dquot must be locked and the flush lock too taken by caller. | 1067 | * The dquot must be locked and the flush lock too taken by caller. |
993 | * The flush lock will not be unlocked until the dquot reaches the disk, | 1068 | * The flush lock will not be unlocked until the dquot reaches the disk, |
994 | * but the dquot is free to be unlocked and modified by the caller | 1069 | * but the dquot is free to be unlocked and modified by the caller |
995 | * in the interim. Dquot is still locked on return. This behavior is | 1070 | * in the interim. Dquot is still locked on return. This behavior is |
996 | * identical to that of inodes. | 1071 | * identical to that of inodes. |
997 | */ | 1072 | */ |
998 | int | 1073 | int |
999 | xfs_qm_dqflush( | 1074 | xfs_qm_dqflush( |
1000 | struct xfs_dquot *dqp, | 1075 | struct xfs_dquot *dqp, |
1001 | struct xfs_buf **bpp) | 1076 | struct xfs_buf **bpp) |
1002 | { | 1077 | { |
1003 | struct xfs_mount *mp = dqp->q_mount; | 1078 | struct xfs_mount *mp = dqp->q_mount; |
1004 | struct xfs_buf *bp; | 1079 | struct xfs_buf *bp; |
1005 | struct xfs_disk_dquot *ddqp; | 1080 | struct xfs_disk_dquot *ddqp; |
1006 | int error; | 1081 | int error; |
1007 | 1082 | ||
1008 | ASSERT(XFS_DQ_IS_LOCKED(dqp)); | 1083 | ASSERT(XFS_DQ_IS_LOCKED(dqp)); |
1009 | ASSERT(!completion_done(&dqp->q_flush)); | 1084 | ASSERT(!completion_done(&dqp->q_flush)); |
1010 | 1085 | ||
1011 | trace_xfs_dqflush(dqp); | 1086 | trace_xfs_dqflush(dqp); |
1012 | 1087 | ||
1013 | *bpp = NULL; | 1088 | *bpp = NULL; |
1014 | 1089 | ||
1015 | xfs_qm_dqunpin_wait(dqp); | 1090 | xfs_qm_dqunpin_wait(dqp); |
1016 | 1091 | ||
1017 | /* | 1092 | /* |
1018 | * This may have been unpinned because the filesystem is shutting | 1093 | * This may have been unpinned because the filesystem is shutting |
1019 | * down forcibly. If that's the case we must not write this dquot | 1094 | * down forcibly. If that's the case we must not write this dquot |
1020 | * to disk, because the log record didn't make it to disk. | 1095 | * to disk, because the log record didn't make it to disk. |
1021 | * | 1096 | * |
1022 | * We also have to remove the log item from the AIL in this case, | 1097 | * We also have to remove the log item from the AIL in this case, |
1023 | * as we wait for an emptry AIL as part of the unmount process. | 1098 | * as we wait for an emptry AIL as part of the unmount process. |
1024 | */ | 1099 | */ |
1025 | if (XFS_FORCED_SHUTDOWN(mp)) { | 1100 | if (XFS_FORCED_SHUTDOWN(mp)) { |
1026 | struct xfs_log_item *lip = &dqp->q_logitem.qli_item; | 1101 | struct xfs_log_item *lip = &dqp->q_logitem.qli_item; |
1027 | dqp->dq_flags &= ~XFS_DQ_DIRTY; | 1102 | dqp->dq_flags &= ~XFS_DQ_DIRTY; |
1028 | 1103 | ||
1029 | spin_lock(&mp->m_ail->xa_lock); | 1104 | spin_lock(&mp->m_ail->xa_lock); |
1030 | if (lip->li_flags & XFS_LI_IN_AIL) | 1105 | if (lip->li_flags & XFS_LI_IN_AIL) |
1031 | xfs_trans_ail_delete(mp->m_ail, lip, | 1106 | xfs_trans_ail_delete(mp->m_ail, lip, |
1032 | SHUTDOWN_CORRUPT_INCORE); | 1107 | SHUTDOWN_CORRUPT_INCORE); |
1033 | else | 1108 | else |
1034 | spin_unlock(&mp->m_ail->xa_lock); | 1109 | spin_unlock(&mp->m_ail->xa_lock); |
1035 | error = XFS_ERROR(EIO); | 1110 | error = XFS_ERROR(EIO); |
1036 | goto out_unlock; | 1111 | goto out_unlock; |
1037 | } | 1112 | } |
1038 | 1113 | ||
1039 | /* | 1114 | /* |
1040 | * Get the buffer containing the on-disk dquot | 1115 | * Get the buffer containing the on-disk dquot |
1041 | */ | 1116 | */ |
1042 | error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp, dqp->q_blkno, | 1117 | error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp, dqp->q_blkno, |
1043 | mp->m_quotainfo->qi_dqchunklen, 0, &bp, NULL); | 1118 | mp->m_quotainfo->qi_dqchunklen, 0, &bp, NULL); |
1044 | if (error) | 1119 | if (error) |
1045 | goto out_unlock; | 1120 | goto out_unlock; |
1046 | 1121 | ||
1047 | /* | 1122 | /* |
1048 | * Calculate the location of the dquot inside the buffer. | 1123 | * Calculate the location of the dquot inside the buffer. |
1049 | */ | 1124 | */ |
1050 | ddqp = bp->b_addr + dqp->q_bufoffset; | 1125 | ddqp = bp->b_addr + dqp->q_bufoffset; |
1051 | 1126 | ||
1052 | /* | 1127 | /* |
1053 | * A simple sanity check in case we got a corrupted dquot.. | 1128 | * A simple sanity check in case we got a corrupted dquot.. |
1054 | */ | 1129 | */ |
1055 | error = xfs_qm_dqcheck(mp, &dqp->q_core, be32_to_cpu(ddqp->d_id), 0, | 1130 | error = xfs_qm_dqcheck(mp, &dqp->q_core, be32_to_cpu(ddqp->d_id), 0, |
1056 | XFS_QMOPT_DOWARN, "dqflush (incore copy)"); | 1131 | XFS_QMOPT_DOWARN, "dqflush (incore copy)"); |
1057 | if (error) { | 1132 | if (error) { |
1058 | xfs_buf_relse(bp); | 1133 | xfs_buf_relse(bp); |
1059 | xfs_dqfunlock(dqp); | 1134 | xfs_dqfunlock(dqp); |
1060 | xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); | 1135 | xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); |
1061 | return XFS_ERROR(EIO); | 1136 | return XFS_ERROR(EIO); |
1062 | } | 1137 | } |
1063 | 1138 | ||
1064 | /* This is the only portion of data that needs to persist */ | 1139 | /* This is the only portion of data that needs to persist */ |
1065 | memcpy(ddqp, &dqp->q_core, sizeof(xfs_disk_dquot_t)); | 1140 | memcpy(ddqp, &dqp->q_core, sizeof(xfs_disk_dquot_t)); |
1066 | 1141 | ||
1067 | /* | 1142 | /* |
1068 | * Clear the dirty field and remember the flush lsn for later use. | 1143 | * Clear the dirty field and remember the flush lsn for later use. |
1069 | */ | 1144 | */ |
1070 | dqp->dq_flags &= ~XFS_DQ_DIRTY; | 1145 | dqp->dq_flags &= ~XFS_DQ_DIRTY; |
1071 | 1146 | ||
1072 | xfs_trans_ail_copy_lsn(mp->m_ail, &dqp->q_logitem.qli_flush_lsn, | 1147 | xfs_trans_ail_copy_lsn(mp->m_ail, &dqp->q_logitem.qli_flush_lsn, |
1073 | &dqp->q_logitem.qli_item.li_lsn); | 1148 | &dqp->q_logitem.qli_item.li_lsn); |
1149 | |||
1150 | /* | ||
1151 | * copy the lsn into the on-disk dquot now while we have the in memory | ||
1152 | * dquot here. This can't be done later in the write verifier as we | ||
1153 | * can't get access to the log item at that point in time. | ||
1154 | */ | ||
1155 | if (xfs_sb_version_hascrc(&mp->m_sb)) { | ||
1156 | struct xfs_dqblk *dqb = (struct xfs_dqblk *)ddqp; | ||
1157 | |||
1158 | dqb->dd_lsn = cpu_to_be64(dqp->q_logitem.qli_item.li_lsn); | ||
1159 | } | ||
1074 | 1160 | ||
1075 | /* | 1161 | /* |
1076 | * Attach an iodone routine so that we can remove this dquot from the | 1162 | * Attach an iodone routine so that we can remove this dquot from the |
1077 | * AIL and release the flush lock once the dquot is synced to disk. | 1163 | * AIL and release the flush lock once the dquot is synced to disk. |
1078 | */ | 1164 | */ |
1079 | xfs_buf_attach_iodone(bp, xfs_qm_dqflush_done, | 1165 | xfs_buf_attach_iodone(bp, xfs_qm_dqflush_done, |
1080 | &dqp->q_logitem.qli_item); | 1166 | &dqp->q_logitem.qli_item); |
1081 | 1167 | ||
1082 | /* | 1168 | /* |
1083 | * If the buffer is pinned then push on the log so we won't | 1169 | * If the buffer is pinned then push on the log so we won't |
1084 | * get stuck waiting in the write for too long. | 1170 | * get stuck waiting in the write for too long. |
1085 | */ | 1171 | */ |
1086 | if (xfs_buf_ispinned(bp)) { | 1172 | if (xfs_buf_ispinned(bp)) { |
1087 | trace_xfs_dqflush_force(dqp); | 1173 | trace_xfs_dqflush_force(dqp); |
1088 | xfs_log_force(mp, 0); | 1174 | xfs_log_force(mp, 0); |
1089 | } | 1175 | } |
1090 | 1176 | ||
1091 | trace_xfs_dqflush_done(dqp); | 1177 | trace_xfs_dqflush_done(dqp); |
1092 | *bpp = bp; | 1178 | *bpp = bp; |
1093 | return 0; | 1179 | return 0; |
1094 | 1180 | ||
1095 | out_unlock: | 1181 | out_unlock: |
1096 | xfs_dqfunlock(dqp); | 1182 | xfs_dqfunlock(dqp); |
1097 | return XFS_ERROR(EIO); | 1183 | return XFS_ERROR(EIO); |
1098 | } | 1184 | } |
1099 | 1185 | ||
1100 | /* | 1186 | /* |
1101 | * Lock two xfs_dquot structures. | 1187 | * Lock two xfs_dquot structures. |
1102 | * | 1188 | * |
1103 | * To avoid deadlocks we always lock the quota structure with | 1189 | * To avoid deadlocks we always lock the quota structure with |
1104 | * the lowerd id first. | 1190 | * the lowerd id first. |
1105 | */ | 1191 | */ |
1106 | void | 1192 | void |
1107 | xfs_dqlock2( | 1193 | xfs_dqlock2( |
1108 | xfs_dquot_t *d1, | 1194 | xfs_dquot_t *d1, |
1109 | xfs_dquot_t *d2) | 1195 | xfs_dquot_t *d2) |
1110 | { | 1196 | { |
1111 | if (d1 && d2) { | 1197 | if (d1 && d2) { |
1112 | ASSERT(d1 != d2); | 1198 | ASSERT(d1 != d2); |
1113 | if (be32_to_cpu(d1->q_core.d_id) > | 1199 | if (be32_to_cpu(d1->q_core.d_id) > |
1114 | be32_to_cpu(d2->q_core.d_id)) { | 1200 | be32_to_cpu(d2->q_core.d_id)) { |
1115 | mutex_lock(&d2->q_qlock); | 1201 | mutex_lock(&d2->q_qlock); |
1116 | mutex_lock_nested(&d1->q_qlock, XFS_QLOCK_NESTED); | 1202 | mutex_lock_nested(&d1->q_qlock, XFS_QLOCK_NESTED); |
1117 | } else { | 1203 | } else { |
1118 | mutex_lock(&d1->q_qlock); | 1204 | mutex_lock(&d1->q_qlock); |
1119 | mutex_lock_nested(&d2->q_qlock, XFS_QLOCK_NESTED); | 1205 | mutex_lock_nested(&d2->q_qlock, XFS_QLOCK_NESTED); |
1120 | } | 1206 | } |
1121 | } else if (d1) { | 1207 | } else if (d1) { |
1122 | mutex_lock(&d1->q_qlock); | 1208 | mutex_lock(&d1->q_qlock); |
1123 | } else if (d2) { | 1209 | } else if (d2) { |
1124 | mutex_lock(&d2->q_qlock); | 1210 | mutex_lock(&d2->q_qlock); |
1125 | } | 1211 | } |
1126 | } | 1212 | } |
1127 | 1213 | ||
1128 | int __init | 1214 | int __init |
1129 | xfs_qm_init(void) | 1215 | xfs_qm_init(void) |
1130 | { | 1216 | { |
1131 | xfs_qm_dqzone = | 1217 | xfs_qm_dqzone = |
1132 | kmem_zone_init(sizeof(struct xfs_dquot), "xfs_dquot"); | 1218 | kmem_zone_init(sizeof(struct xfs_dquot), "xfs_dquot"); |
1133 | if (!xfs_qm_dqzone) | 1219 | if (!xfs_qm_dqzone) |
1134 | goto out; | 1220 | goto out; |
1135 | 1221 | ||
1136 | xfs_qm_dqtrxzone = | 1222 | xfs_qm_dqtrxzone = |
1137 | kmem_zone_init(sizeof(struct xfs_dquot_acct), "xfs_dqtrx"); | 1223 | kmem_zone_init(sizeof(struct xfs_dquot_acct), "xfs_dqtrx"); |
1138 | if (!xfs_qm_dqtrxzone) | 1224 | if (!xfs_qm_dqtrxzone) |
1139 | goto out_free_dqzone; | 1225 | goto out_free_dqzone; |
1140 | 1226 | ||
1141 | return 0; | 1227 | return 0; |
1142 | 1228 | ||
1143 | out_free_dqzone: | 1229 | out_free_dqzone: |
1144 | kmem_zone_destroy(xfs_qm_dqzone); | 1230 | kmem_zone_destroy(xfs_qm_dqzone); |
1145 | out: | 1231 | out: |
1146 | return -ENOMEM; | 1232 | return -ENOMEM; |
1147 | } | 1233 | } |
1148 | 1234 | ||
1149 | void | 1235 | void |
1150 | xfs_qm_exit(void) | 1236 | xfs_qm_exit(void) |
fs/xfs/xfs_log_recover.c
1 | /* | 1 | /* |
2 | * Copyright (c) 2000-2006 Silicon Graphics, Inc. | 2 | * Copyright (c) 2000-2006 Silicon Graphics, Inc. |
3 | * All Rights Reserved. | 3 | * All Rights Reserved. |
4 | * | 4 | * |
5 | * This program is free software; you can redistribute it and/or | 5 | * This program is free software; you can redistribute it and/or |
6 | * modify it under the terms of the GNU General Public License as | 6 | * modify it under the terms of the GNU General Public License as |
7 | * published by the Free Software Foundation. | 7 | * published by the Free Software Foundation. |
8 | * | 8 | * |
9 | * This program is distributed in the hope that it would be useful, | 9 | * This program is distributed in the hope that it would be useful, |
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
12 | * GNU General Public License for more details. | 12 | * GNU General Public License for more details. |
13 | * | 13 | * |
14 | * You should have received a copy of the GNU General Public License | 14 | * You should have received a copy of the GNU General Public License |
15 | * along with this program; if not, write the Free Software Foundation, | 15 | * along with this program; if not, write the Free Software Foundation, |
16 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | 16 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA |
17 | */ | 17 | */ |
18 | #include "xfs.h" | 18 | #include "xfs.h" |
19 | #include "xfs_fs.h" | 19 | #include "xfs_fs.h" |
20 | #include "xfs_types.h" | 20 | #include "xfs_types.h" |
21 | #include "xfs_bit.h" | 21 | #include "xfs_bit.h" |
22 | #include "xfs_log.h" | 22 | #include "xfs_log.h" |
23 | #include "xfs_inum.h" | 23 | #include "xfs_inum.h" |
24 | #include "xfs_trans.h" | 24 | #include "xfs_trans.h" |
25 | #include "xfs_sb.h" | 25 | #include "xfs_sb.h" |
26 | #include "xfs_ag.h" | 26 | #include "xfs_ag.h" |
27 | #include "xfs_mount.h" | 27 | #include "xfs_mount.h" |
28 | #include "xfs_error.h" | 28 | #include "xfs_error.h" |
29 | #include "xfs_bmap_btree.h" | 29 | #include "xfs_bmap_btree.h" |
30 | #include "xfs_alloc_btree.h" | 30 | #include "xfs_alloc_btree.h" |
31 | #include "xfs_ialloc_btree.h" | 31 | #include "xfs_ialloc_btree.h" |
32 | #include "xfs_btree.h" | 32 | #include "xfs_btree.h" |
33 | #include "xfs_dinode.h" | 33 | #include "xfs_dinode.h" |
34 | #include "xfs_inode.h" | 34 | #include "xfs_inode.h" |
35 | #include "xfs_inode_item.h" | 35 | #include "xfs_inode_item.h" |
36 | #include "xfs_alloc.h" | 36 | #include "xfs_alloc.h" |
37 | #include "xfs_ialloc.h" | 37 | #include "xfs_ialloc.h" |
38 | #include "xfs_log_priv.h" | 38 | #include "xfs_log_priv.h" |
39 | #include "xfs_buf_item.h" | 39 | #include "xfs_buf_item.h" |
40 | #include "xfs_log_recover.h" | 40 | #include "xfs_log_recover.h" |
41 | #include "xfs_extfree_item.h" | 41 | #include "xfs_extfree_item.h" |
42 | #include "xfs_trans_priv.h" | 42 | #include "xfs_trans_priv.h" |
43 | #include "xfs_quota.h" | 43 | #include "xfs_quota.h" |
44 | #include "xfs_utils.h" | 44 | #include "xfs_utils.h" |
45 | #include "xfs_cksum.h" | 45 | #include "xfs_cksum.h" |
46 | #include "xfs_trace.h" | 46 | #include "xfs_trace.h" |
47 | #include "xfs_icache.h" | 47 | #include "xfs_icache.h" |
48 | 48 | ||
49 | STATIC int | 49 | STATIC int |
50 | xlog_find_zeroed( | 50 | xlog_find_zeroed( |
51 | struct xlog *, | 51 | struct xlog *, |
52 | xfs_daddr_t *); | 52 | xfs_daddr_t *); |
53 | STATIC int | 53 | STATIC int |
54 | xlog_clear_stale_blocks( | 54 | xlog_clear_stale_blocks( |
55 | struct xlog *, | 55 | struct xlog *, |
56 | xfs_lsn_t); | 56 | xfs_lsn_t); |
57 | #if defined(DEBUG) | 57 | #if defined(DEBUG) |
58 | STATIC void | 58 | STATIC void |
59 | xlog_recover_check_summary( | 59 | xlog_recover_check_summary( |
60 | struct xlog *); | 60 | struct xlog *); |
61 | #else | 61 | #else |
62 | #define xlog_recover_check_summary(log) | 62 | #define xlog_recover_check_summary(log) |
63 | #endif | 63 | #endif |
64 | 64 | ||
65 | /* | 65 | /* |
66 | * This structure is used during recovery to record the buf log items which | 66 | * This structure is used during recovery to record the buf log items which |
67 | * have been canceled and should not be replayed. | 67 | * have been canceled and should not be replayed. |
68 | */ | 68 | */ |
69 | struct xfs_buf_cancel { | 69 | struct xfs_buf_cancel { |
70 | xfs_daddr_t bc_blkno; | 70 | xfs_daddr_t bc_blkno; |
71 | uint bc_len; | 71 | uint bc_len; |
72 | int bc_refcount; | 72 | int bc_refcount; |
73 | struct list_head bc_list; | 73 | struct list_head bc_list; |
74 | }; | 74 | }; |
75 | 75 | ||
76 | /* | 76 | /* |
77 | * Sector aligned buffer routines for buffer create/read/write/access | 77 | * Sector aligned buffer routines for buffer create/read/write/access |
78 | */ | 78 | */ |
79 | 79 | ||
80 | /* | 80 | /* |
81 | * Verify the given count of basic blocks is valid number of blocks | 81 | * Verify the given count of basic blocks is valid number of blocks |
82 | * to specify for an operation involving the given XFS log buffer. | 82 | * to specify for an operation involving the given XFS log buffer. |
83 | * Returns nonzero if the count is valid, 0 otherwise. | 83 | * Returns nonzero if the count is valid, 0 otherwise. |
84 | */ | 84 | */ |
85 | 85 | ||
86 | static inline int | 86 | static inline int |
87 | xlog_buf_bbcount_valid( | 87 | xlog_buf_bbcount_valid( |
88 | struct xlog *log, | 88 | struct xlog *log, |
89 | int bbcount) | 89 | int bbcount) |
90 | { | 90 | { |
91 | return bbcount > 0 && bbcount <= log->l_logBBsize; | 91 | return bbcount > 0 && bbcount <= log->l_logBBsize; |
92 | } | 92 | } |
93 | 93 | ||
94 | /* | 94 | /* |
95 | * Allocate a buffer to hold log data. The buffer needs to be able | 95 | * Allocate a buffer to hold log data. The buffer needs to be able |
96 | * to map to a range of nbblks basic blocks at any valid (basic | 96 | * to map to a range of nbblks basic blocks at any valid (basic |
97 | * block) offset within the log. | 97 | * block) offset within the log. |
98 | */ | 98 | */ |
99 | STATIC xfs_buf_t * | 99 | STATIC xfs_buf_t * |
100 | xlog_get_bp( | 100 | xlog_get_bp( |
101 | struct xlog *log, | 101 | struct xlog *log, |
102 | int nbblks) | 102 | int nbblks) |
103 | { | 103 | { |
104 | struct xfs_buf *bp; | 104 | struct xfs_buf *bp; |
105 | 105 | ||
106 | if (!xlog_buf_bbcount_valid(log, nbblks)) { | 106 | if (!xlog_buf_bbcount_valid(log, nbblks)) { |
107 | xfs_warn(log->l_mp, "Invalid block length (0x%x) for buffer", | 107 | xfs_warn(log->l_mp, "Invalid block length (0x%x) for buffer", |
108 | nbblks); | 108 | nbblks); |
109 | XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_HIGH, log->l_mp); | 109 | XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_HIGH, log->l_mp); |
110 | return NULL; | 110 | return NULL; |
111 | } | 111 | } |
112 | 112 | ||
113 | /* | 113 | /* |
114 | * We do log I/O in units of log sectors (a power-of-2 | 114 | * We do log I/O in units of log sectors (a power-of-2 |
115 | * multiple of the basic block size), so we round up the | 115 | * multiple of the basic block size), so we round up the |
116 | * requested size to accommodate the basic blocks required | 116 | * requested size to accommodate the basic blocks required |
117 | * for complete log sectors. | 117 | * for complete log sectors. |
118 | * | 118 | * |
119 | * In addition, the buffer may be used for a non-sector- | 119 | * In addition, the buffer may be used for a non-sector- |
120 | * aligned block offset, in which case an I/O of the | 120 | * aligned block offset, in which case an I/O of the |
121 | * requested size could extend beyond the end of the | 121 | * requested size could extend beyond the end of the |
122 | * buffer. If the requested size is only 1 basic block it | 122 | * buffer. If the requested size is only 1 basic block it |
123 | * will never straddle a sector boundary, so this won't be | 123 | * will never straddle a sector boundary, so this won't be |
124 | * an issue. Nor will this be a problem if the log I/O is | 124 | * an issue. Nor will this be a problem if the log I/O is |
125 | * done in basic blocks (sector size 1). But otherwise we | 125 | * done in basic blocks (sector size 1). But otherwise we |
126 | * extend the buffer by one extra log sector to ensure | 126 | * extend the buffer by one extra log sector to ensure |
127 | * there's space to accommodate this possibility. | 127 | * there's space to accommodate this possibility. |
128 | */ | 128 | */ |
129 | if (nbblks > 1 && log->l_sectBBsize > 1) | 129 | if (nbblks > 1 && log->l_sectBBsize > 1) |
130 | nbblks += log->l_sectBBsize; | 130 | nbblks += log->l_sectBBsize; |
131 | nbblks = round_up(nbblks, log->l_sectBBsize); | 131 | nbblks = round_up(nbblks, log->l_sectBBsize); |
132 | 132 | ||
133 | bp = xfs_buf_get_uncached(log->l_mp->m_logdev_targp, nbblks, 0); | 133 | bp = xfs_buf_get_uncached(log->l_mp->m_logdev_targp, nbblks, 0); |
134 | if (bp) | 134 | if (bp) |
135 | xfs_buf_unlock(bp); | 135 | xfs_buf_unlock(bp); |
136 | return bp; | 136 | return bp; |
137 | } | 137 | } |
138 | 138 | ||
139 | STATIC void | 139 | STATIC void |
140 | xlog_put_bp( | 140 | xlog_put_bp( |
141 | xfs_buf_t *bp) | 141 | xfs_buf_t *bp) |
142 | { | 142 | { |
143 | xfs_buf_free(bp); | 143 | xfs_buf_free(bp); |
144 | } | 144 | } |
145 | 145 | ||
146 | /* | 146 | /* |
147 | * Return the address of the start of the given block number's data | 147 | * Return the address of the start of the given block number's data |
148 | * in a log buffer. The buffer covers a log sector-aligned region. | 148 | * in a log buffer. The buffer covers a log sector-aligned region. |
149 | */ | 149 | */ |
150 | STATIC xfs_caddr_t | 150 | STATIC xfs_caddr_t |
151 | xlog_align( | 151 | xlog_align( |
152 | struct xlog *log, | 152 | struct xlog *log, |
153 | xfs_daddr_t blk_no, | 153 | xfs_daddr_t blk_no, |
154 | int nbblks, | 154 | int nbblks, |
155 | struct xfs_buf *bp) | 155 | struct xfs_buf *bp) |
156 | { | 156 | { |
157 | xfs_daddr_t offset = blk_no & ((xfs_daddr_t)log->l_sectBBsize - 1); | 157 | xfs_daddr_t offset = blk_no & ((xfs_daddr_t)log->l_sectBBsize - 1); |
158 | 158 | ||
159 | ASSERT(offset + nbblks <= bp->b_length); | 159 | ASSERT(offset + nbblks <= bp->b_length); |
160 | return bp->b_addr + BBTOB(offset); | 160 | return bp->b_addr + BBTOB(offset); |
161 | } | 161 | } |
162 | 162 | ||
163 | 163 | ||
164 | /* | 164 | /* |
165 | * nbblks should be uint, but oh well. Just want to catch that 32-bit length. | 165 | * nbblks should be uint, but oh well. Just want to catch that 32-bit length. |
166 | */ | 166 | */ |
167 | STATIC int | 167 | STATIC int |
168 | xlog_bread_noalign( | 168 | xlog_bread_noalign( |
169 | struct xlog *log, | 169 | struct xlog *log, |
170 | xfs_daddr_t blk_no, | 170 | xfs_daddr_t blk_no, |
171 | int nbblks, | 171 | int nbblks, |
172 | struct xfs_buf *bp) | 172 | struct xfs_buf *bp) |
173 | { | 173 | { |
174 | int error; | 174 | int error; |
175 | 175 | ||
176 | if (!xlog_buf_bbcount_valid(log, nbblks)) { | 176 | if (!xlog_buf_bbcount_valid(log, nbblks)) { |
177 | xfs_warn(log->l_mp, "Invalid block length (0x%x) for buffer", | 177 | xfs_warn(log->l_mp, "Invalid block length (0x%x) for buffer", |
178 | nbblks); | 178 | nbblks); |
179 | XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_HIGH, log->l_mp); | 179 | XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_HIGH, log->l_mp); |
180 | return EFSCORRUPTED; | 180 | return EFSCORRUPTED; |
181 | } | 181 | } |
182 | 182 | ||
183 | blk_no = round_down(blk_no, log->l_sectBBsize); | 183 | blk_no = round_down(blk_no, log->l_sectBBsize); |
184 | nbblks = round_up(nbblks, log->l_sectBBsize); | 184 | nbblks = round_up(nbblks, log->l_sectBBsize); |
185 | 185 | ||
186 | ASSERT(nbblks > 0); | 186 | ASSERT(nbblks > 0); |
187 | ASSERT(nbblks <= bp->b_length); | 187 | ASSERT(nbblks <= bp->b_length); |
188 | 188 | ||
189 | XFS_BUF_SET_ADDR(bp, log->l_logBBstart + blk_no); | 189 | XFS_BUF_SET_ADDR(bp, log->l_logBBstart + blk_no); |
190 | XFS_BUF_READ(bp); | 190 | XFS_BUF_READ(bp); |
191 | bp->b_io_length = nbblks; | 191 | bp->b_io_length = nbblks; |
192 | bp->b_error = 0; | 192 | bp->b_error = 0; |
193 | 193 | ||
194 | xfsbdstrat(log->l_mp, bp); | 194 | xfsbdstrat(log->l_mp, bp); |
195 | error = xfs_buf_iowait(bp); | 195 | error = xfs_buf_iowait(bp); |
196 | if (error) | 196 | if (error) |
197 | xfs_buf_ioerror_alert(bp, __func__); | 197 | xfs_buf_ioerror_alert(bp, __func__); |
198 | return error; | 198 | return error; |
199 | } | 199 | } |
200 | 200 | ||
201 | STATIC int | 201 | STATIC int |
202 | xlog_bread( | 202 | xlog_bread( |
203 | struct xlog *log, | 203 | struct xlog *log, |
204 | xfs_daddr_t blk_no, | 204 | xfs_daddr_t blk_no, |
205 | int nbblks, | 205 | int nbblks, |
206 | struct xfs_buf *bp, | 206 | struct xfs_buf *bp, |
207 | xfs_caddr_t *offset) | 207 | xfs_caddr_t *offset) |
208 | { | 208 | { |
209 | int error; | 209 | int error; |
210 | 210 | ||
211 | error = xlog_bread_noalign(log, blk_no, nbblks, bp); | 211 | error = xlog_bread_noalign(log, blk_no, nbblks, bp); |
212 | if (error) | 212 | if (error) |
213 | return error; | 213 | return error; |
214 | 214 | ||
215 | *offset = xlog_align(log, blk_no, nbblks, bp); | 215 | *offset = xlog_align(log, blk_no, nbblks, bp); |
216 | return 0; | 216 | return 0; |
217 | } | 217 | } |
218 | 218 | ||
219 | /* | 219 | /* |
220 | * Read at an offset into the buffer. Returns with the buffer in it's original | 220 | * Read at an offset into the buffer. Returns with the buffer in it's original |
221 | * state regardless of the result of the read. | 221 | * state regardless of the result of the read. |
222 | */ | 222 | */ |
223 | STATIC int | 223 | STATIC int |
224 | xlog_bread_offset( | 224 | xlog_bread_offset( |
225 | struct xlog *log, | 225 | struct xlog *log, |
226 | xfs_daddr_t blk_no, /* block to read from */ | 226 | xfs_daddr_t blk_no, /* block to read from */ |
227 | int nbblks, /* blocks to read */ | 227 | int nbblks, /* blocks to read */ |
228 | struct xfs_buf *bp, | 228 | struct xfs_buf *bp, |
229 | xfs_caddr_t offset) | 229 | xfs_caddr_t offset) |
230 | { | 230 | { |
231 | xfs_caddr_t orig_offset = bp->b_addr; | 231 | xfs_caddr_t orig_offset = bp->b_addr; |
232 | int orig_len = BBTOB(bp->b_length); | 232 | int orig_len = BBTOB(bp->b_length); |
233 | int error, error2; | 233 | int error, error2; |
234 | 234 | ||
235 | error = xfs_buf_associate_memory(bp, offset, BBTOB(nbblks)); | 235 | error = xfs_buf_associate_memory(bp, offset, BBTOB(nbblks)); |
236 | if (error) | 236 | if (error) |
237 | return error; | 237 | return error; |
238 | 238 | ||
239 | error = xlog_bread_noalign(log, blk_no, nbblks, bp); | 239 | error = xlog_bread_noalign(log, blk_no, nbblks, bp); |
240 | 240 | ||
241 | /* must reset buffer pointer even on error */ | 241 | /* must reset buffer pointer even on error */ |
242 | error2 = xfs_buf_associate_memory(bp, orig_offset, orig_len); | 242 | error2 = xfs_buf_associate_memory(bp, orig_offset, orig_len); |
243 | if (error) | 243 | if (error) |
244 | return error; | 244 | return error; |
245 | return error2; | 245 | return error2; |
246 | } | 246 | } |
247 | 247 | ||
248 | /* | 248 | /* |
249 | * Write out the buffer at the given block for the given number of blocks. | 249 | * Write out the buffer at the given block for the given number of blocks. |
250 | * The buffer is kept locked across the write and is returned locked. | 250 | * The buffer is kept locked across the write and is returned locked. |
251 | * This can only be used for synchronous log writes. | 251 | * This can only be used for synchronous log writes. |
252 | */ | 252 | */ |
253 | STATIC int | 253 | STATIC int |
254 | xlog_bwrite( | 254 | xlog_bwrite( |
255 | struct xlog *log, | 255 | struct xlog *log, |
256 | xfs_daddr_t blk_no, | 256 | xfs_daddr_t blk_no, |
257 | int nbblks, | 257 | int nbblks, |
258 | struct xfs_buf *bp) | 258 | struct xfs_buf *bp) |
259 | { | 259 | { |
260 | int error; | 260 | int error; |
261 | 261 | ||
262 | if (!xlog_buf_bbcount_valid(log, nbblks)) { | 262 | if (!xlog_buf_bbcount_valid(log, nbblks)) { |
263 | xfs_warn(log->l_mp, "Invalid block length (0x%x) for buffer", | 263 | xfs_warn(log->l_mp, "Invalid block length (0x%x) for buffer", |
264 | nbblks); | 264 | nbblks); |
265 | XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_HIGH, log->l_mp); | 265 | XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_HIGH, log->l_mp); |
266 | return EFSCORRUPTED; | 266 | return EFSCORRUPTED; |
267 | } | 267 | } |
268 | 268 | ||
269 | blk_no = round_down(blk_no, log->l_sectBBsize); | 269 | blk_no = round_down(blk_no, log->l_sectBBsize); |
270 | nbblks = round_up(nbblks, log->l_sectBBsize); | 270 | nbblks = round_up(nbblks, log->l_sectBBsize); |
271 | 271 | ||
272 | ASSERT(nbblks > 0); | 272 | ASSERT(nbblks > 0); |
273 | ASSERT(nbblks <= bp->b_length); | 273 | ASSERT(nbblks <= bp->b_length); |
274 | 274 | ||
275 | XFS_BUF_SET_ADDR(bp, log->l_logBBstart + blk_no); | 275 | XFS_BUF_SET_ADDR(bp, log->l_logBBstart + blk_no); |
276 | XFS_BUF_ZEROFLAGS(bp); | 276 | XFS_BUF_ZEROFLAGS(bp); |
277 | xfs_buf_hold(bp); | 277 | xfs_buf_hold(bp); |
278 | xfs_buf_lock(bp); | 278 | xfs_buf_lock(bp); |
279 | bp->b_io_length = nbblks; | 279 | bp->b_io_length = nbblks; |
280 | bp->b_error = 0; | 280 | bp->b_error = 0; |
281 | 281 | ||
282 | error = xfs_bwrite(bp); | 282 | error = xfs_bwrite(bp); |
283 | if (error) | 283 | if (error) |
284 | xfs_buf_ioerror_alert(bp, __func__); | 284 | xfs_buf_ioerror_alert(bp, __func__); |
285 | xfs_buf_relse(bp); | 285 | xfs_buf_relse(bp); |
286 | return error; | 286 | return error; |
287 | } | 287 | } |
288 | 288 | ||
289 | #ifdef DEBUG | 289 | #ifdef DEBUG |
290 | /* | 290 | /* |
291 | * dump debug superblock and log record information | 291 | * dump debug superblock and log record information |
292 | */ | 292 | */ |
293 | STATIC void | 293 | STATIC void |
294 | xlog_header_check_dump( | 294 | xlog_header_check_dump( |
295 | xfs_mount_t *mp, | 295 | xfs_mount_t *mp, |
296 | xlog_rec_header_t *head) | 296 | xlog_rec_header_t *head) |
297 | { | 297 | { |
298 | xfs_debug(mp, "%s: SB : uuid = %pU, fmt = %d\n", | 298 | xfs_debug(mp, "%s: SB : uuid = %pU, fmt = %d\n", |
299 | __func__, &mp->m_sb.sb_uuid, XLOG_FMT); | 299 | __func__, &mp->m_sb.sb_uuid, XLOG_FMT); |
300 | xfs_debug(mp, " log : uuid = %pU, fmt = %d\n", | 300 | xfs_debug(mp, " log : uuid = %pU, fmt = %d\n", |
301 | &head->h_fs_uuid, be32_to_cpu(head->h_fmt)); | 301 | &head->h_fs_uuid, be32_to_cpu(head->h_fmt)); |
302 | } | 302 | } |
303 | #else | 303 | #else |
304 | #define xlog_header_check_dump(mp, head) | 304 | #define xlog_header_check_dump(mp, head) |
305 | #endif | 305 | #endif |
306 | 306 | ||
307 | /* | 307 | /* |
308 | * check log record header for recovery | 308 | * check log record header for recovery |
309 | */ | 309 | */ |
310 | STATIC int | 310 | STATIC int |
311 | xlog_header_check_recover( | 311 | xlog_header_check_recover( |
312 | xfs_mount_t *mp, | 312 | xfs_mount_t *mp, |
313 | xlog_rec_header_t *head) | 313 | xlog_rec_header_t *head) |
314 | { | 314 | { |
315 | ASSERT(head->h_magicno == cpu_to_be32(XLOG_HEADER_MAGIC_NUM)); | 315 | ASSERT(head->h_magicno == cpu_to_be32(XLOG_HEADER_MAGIC_NUM)); |
316 | 316 | ||
317 | /* | 317 | /* |
318 | * IRIX doesn't write the h_fmt field and leaves it zeroed | 318 | * IRIX doesn't write the h_fmt field and leaves it zeroed |
319 | * (XLOG_FMT_UNKNOWN). This stops us from trying to recover | 319 | * (XLOG_FMT_UNKNOWN). This stops us from trying to recover |
320 | * a dirty log created in IRIX. | 320 | * a dirty log created in IRIX. |
321 | */ | 321 | */ |
322 | if (unlikely(head->h_fmt != cpu_to_be32(XLOG_FMT))) { | 322 | if (unlikely(head->h_fmt != cpu_to_be32(XLOG_FMT))) { |
323 | xfs_warn(mp, | 323 | xfs_warn(mp, |
324 | "dirty log written in incompatible format - can't recover"); | 324 | "dirty log written in incompatible format - can't recover"); |
325 | xlog_header_check_dump(mp, head); | 325 | xlog_header_check_dump(mp, head); |
326 | XFS_ERROR_REPORT("xlog_header_check_recover(1)", | 326 | XFS_ERROR_REPORT("xlog_header_check_recover(1)", |
327 | XFS_ERRLEVEL_HIGH, mp); | 327 | XFS_ERRLEVEL_HIGH, mp); |
328 | return XFS_ERROR(EFSCORRUPTED); | 328 | return XFS_ERROR(EFSCORRUPTED); |
329 | } else if (unlikely(!uuid_equal(&mp->m_sb.sb_uuid, &head->h_fs_uuid))) { | 329 | } else if (unlikely(!uuid_equal(&mp->m_sb.sb_uuid, &head->h_fs_uuid))) { |
330 | xfs_warn(mp, | 330 | xfs_warn(mp, |
331 | "dirty log entry has mismatched uuid - can't recover"); | 331 | "dirty log entry has mismatched uuid - can't recover"); |
332 | xlog_header_check_dump(mp, head); | 332 | xlog_header_check_dump(mp, head); |
333 | XFS_ERROR_REPORT("xlog_header_check_recover(2)", | 333 | XFS_ERROR_REPORT("xlog_header_check_recover(2)", |
334 | XFS_ERRLEVEL_HIGH, mp); | 334 | XFS_ERRLEVEL_HIGH, mp); |
335 | return XFS_ERROR(EFSCORRUPTED); | 335 | return XFS_ERROR(EFSCORRUPTED); |
336 | } | 336 | } |
337 | return 0; | 337 | return 0; |
338 | } | 338 | } |
339 | 339 | ||
340 | /* | 340 | /* |
341 | * read the head block of the log and check the header | 341 | * read the head block of the log and check the header |
342 | */ | 342 | */ |
343 | STATIC int | 343 | STATIC int |
344 | xlog_header_check_mount( | 344 | xlog_header_check_mount( |
345 | xfs_mount_t *mp, | 345 | xfs_mount_t *mp, |
346 | xlog_rec_header_t *head) | 346 | xlog_rec_header_t *head) |
347 | { | 347 | { |
348 | ASSERT(head->h_magicno == cpu_to_be32(XLOG_HEADER_MAGIC_NUM)); | 348 | ASSERT(head->h_magicno == cpu_to_be32(XLOG_HEADER_MAGIC_NUM)); |
349 | 349 | ||
350 | if (uuid_is_nil(&head->h_fs_uuid)) { | 350 | if (uuid_is_nil(&head->h_fs_uuid)) { |
351 | /* | 351 | /* |
352 | * IRIX doesn't write the h_fs_uuid or h_fmt fields. If | 352 | * IRIX doesn't write the h_fs_uuid or h_fmt fields. If |
353 | * h_fs_uuid is nil, we assume this log was last mounted | 353 | * h_fs_uuid is nil, we assume this log was last mounted |
354 | * by IRIX and continue. | 354 | * by IRIX and continue. |
355 | */ | 355 | */ |
356 | xfs_warn(mp, "nil uuid in log - IRIX style log"); | 356 | xfs_warn(mp, "nil uuid in log - IRIX style log"); |
357 | } else if (unlikely(!uuid_equal(&mp->m_sb.sb_uuid, &head->h_fs_uuid))) { | 357 | } else if (unlikely(!uuid_equal(&mp->m_sb.sb_uuid, &head->h_fs_uuid))) { |
358 | xfs_warn(mp, "log has mismatched uuid - can't recover"); | 358 | xfs_warn(mp, "log has mismatched uuid - can't recover"); |
359 | xlog_header_check_dump(mp, head); | 359 | xlog_header_check_dump(mp, head); |
360 | XFS_ERROR_REPORT("xlog_header_check_mount", | 360 | XFS_ERROR_REPORT("xlog_header_check_mount", |
361 | XFS_ERRLEVEL_HIGH, mp); | 361 | XFS_ERRLEVEL_HIGH, mp); |
362 | return XFS_ERROR(EFSCORRUPTED); | 362 | return XFS_ERROR(EFSCORRUPTED); |
363 | } | 363 | } |
364 | return 0; | 364 | return 0; |
365 | } | 365 | } |
366 | 366 | ||
367 | STATIC void | 367 | STATIC void |
368 | xlog_recover_iodone( | 368 | xlog_recover_iodone( |
369 | struct xfs_buf *bp) | 369 | struct xfs_buf *bp) |
370 | { | 370 | { |
371 | if (bp->b_error) { | 371 | if (bp->b_error) { |
372 | /* | 372 | /* |
373 | * We're not going to bother about retrying | 373 | * We're not going to bother about retrying |
374 | * this during recovery. One strike! | 374 | * this during recovery. One strike! |
375 | */ | 375 | */ |
376 | xfs_buf_ioerror_alert(bp, __func__); | 376 | xfs_buf_ioerror_alert(bp, __func__); |
377 | xfs_force_shutdown(bp->b_target->bt_mount, | 377 | xfs_force_shutdown(bp->b_target->bt_mount, |
378 | SHUTDOWN_META_IO_ERROR); | 378 | SHUTDOWN_META_IO_ERROR); |
379 | } | 379 | } |
380 | bp->b_iodone = NULL; | 380 | bp->b_iodone = NULL; |
381 | xfs_buf_ioend(bp, 0); | 381 | xfs_buf_ioend(bp, 0); |
382 | } | 382 | } |
383 | 383 | ||
384 | /* | 384 | /* |
385 | * This routine finds (to an approximation) the first block in the physical | 385 | * This routine finds (to an approximation) the first block in the physical |
386 | * log which contains the given cycle. It uses a binary search algorithm. | 386 | * log which contains the given cycle. It uses a binary search algorithm. |
387 | * Note that the algorithm can not be perfect because the disk will not | 387 | * Note that the algorithm can not be perfect because the disk will not |
388 | * necessarily be perfect. | 388 | * necessarily be perfect. |
389 | */ | 389 | */ |
390 | STATIC int | 390 | STATIC int |
391 | xlog_find_cycle_start( | 391 | xlog_find_cycle_start( |
392 | struct xlog *log, | 392 | struct xlog *log, |
393 | struct xfs_buf *bp, | 393 | struct xfs_buf *bp, |
394 | xfs_daddr_t first_blk, | 394 | xfs_daddr_t first_blk, |
395 | xfs_daddr_t *last_blk, | 395 | xfs_daddr_t *last_blk, |
396 | uint cycle) | 396 | uint cycle) |
397 | { | 397 | { |
398 | xfs_caddr_t offset; | 398 | xfs_caddr_t offset; |
399 | xfs_daddr_t mid_blk; | 399 | xfs_daddr_t mid_blk; |
400 | xfs_daddr_t end_blk; | 400 | xfs_daddr_t end_blk; |
401 | uint mid_cycle; | 401 | uint mid_cycle; |
402 | int error; | 402 | int error; |
403 | 403 | ||
404 | end_blk = *last_blk; | 404 | end_blk = *last_blk; |
405 | mid_blk = BLK_AVG(first_blk, end_blk); | 405 | mid_blk = BLK_AVG(first_blk, end_blk); |
406 | while (mid_blk != first_blk && mid_blk != end_blk) { | 406 | while (mid_blk != first_blk && mid_blk != end_blk) { |
407 | error = xlog_bread(log, mid_blk, 1, bp, &offset); | 407 | error = xlog_bread(log, mid_blk, 1, bp, &offset); |
408 | if (error) | 408 | if (error) |
409 | return error; | 409 | return error; |
410 | mid_cycle = xlog_get_cycle(offset); | 410 | mid_cycle = xlog_get_cycle(offset); |
411 | if (mid_cycle == cycle) | 411 | if (mid_cycle == cycle) |
412 | end_blk = mid_blk; /* last_half_cycle == mid_cycle */ | 412 | end_blk = mid_blk; /* last_half_cycle == mid_cycle */ |
413 | else | 413 | else |
414 | first_blk = mid_blk; /* first_half_cycle == mid_cycle */ | 414 | first_blk = mid_blk; /* first_half_cycle == mid_cycle */ |
415 | mid_blk = BLK_AVG(first_blk, end_blk); | 415 | mid_blk = BLK_AVG(first_blk, end_blk); |
416 | } | 416 | } |
417 | ASSERT((mid_blk == first_blk && mid_blk+1 == end_blk) || | 417 | ASSERT((mid_blk == first_blk && mid_blk+1 == end_blk) || |
418 | (mid_blk == end_blk && mid_blk-1 == first_blk)); | 418 | (mid_blk == end_blk && mid_blk-1 == first_blk)); |
419 | 419 | ||
420 | *last_blk = end_blk; | 420 | *last_blk = end_blk; |
421 | 421 | ||
422 | return 0; | 422 | return 0; |
423 | } | 423 | } |
424 | 424 | ||
425 | /* | 425 | /* |
426 | * Check that a range of blocks does not contain stop_on_cycle_no. | 426 | * Check that a range of blocks does not contain stop_on_cycle_no. |
427 | * Fill in *new_blk with the block offset where such a block is | 427 | * Fill in *new_blk with the block offset where such a block is |
428 | * found, or with -1 (an invalid block number) if there is no such | 428 | * found, or with -1 (an invalid block number) if there is no such |
429 | * block in the range. The scan needs to occur from front to back | 429 | * block in the range. The scan needs to occur from front to back |
430 | * and the pointer into the region must be updated since a later | 430 | * and the pointer into the region must be updated since a later |
431 | * routine will need to perform another test. | 431 | * routine will need to perform another test. |
432 | */ | 432 | */ |
433 | STATIC int | 433 | STATIC int |
434 | xlog_find_verify_cycle( | 434 | xlog_find_verify_cycle( |
435 | struct xlog *log, | 435 | struct xlog *log, |
436 | xfs_daddr_t start_blk, | 436 | xfs_daddr_t start_blk, |
437 | int nbblks, | 437 | int nbblks, |
438 | uint stop_on_cycle_no, | 438 | uint stop_on_cycle_no, |
439 | xfs_daddr_t *new_blk) | 439 | xfs_daddr_t *new_blk) |
440 | { | 440 | { |
441 | xfs_daddr_t i, j; | 441 | xfs_daddr_t i, j; |
442 | uint cycle; | 442 | uint cycle; |
443 | xfs_buf_t *bp; | 443 | xfs_buf_t *bp; |
444 | xfs_daddr_t bufblks; | 444 | xfs_daddr_t bufblks; |
445 | xfs_caddr_t buf = NULL; | 445 | xfs_caddr_t buf = NULL; |
446 | int error = 0; | 446 | int error = 0; |
447 | 447 | ||
448 | /* | 448 | /* |
449 | * Greedily allocate a buffer big enough to handle the full | 449 | * Greedily allocate a buffer big enough to handle the full |
450 | * range of basic blocks we'll be examining. If that fails, | 450 | * range of basic blocks we'll be examining. If that fails, |
451 | * try a smaller size. We need to be able to read at least | 451 | * try a smaller size. We need to be able to read at least |
452 | * a log sector, or we're out of luck. | 452 | * a log sector, or we're out of luck. |
453 | */ | 453 | */ |
454 | bufblks = 1 << ffs(nbblks); | 454 | bufblks = 1 << ffs(nbblks); |
455 | while (bufblks > log->l_logBBsize) | 455 | while (bufblks > log->l_logBBsize) |
456 | bufblks >>= 1; | 456 | bufblks >>= 1; |
457 | while (!(bp = xlog_get_bp(log, bufblks))) { | 457 | while (!(bp = xlog_get_bp(log, bufblks))) { |
458 | bufblks >>= 1; | 458 | bufblks >>= 1; |
459 | if (bufblks < log->l_sectBBsize) | 459 | if (bufblks < log->l_sectBBsize) |
460 | return ENOMEM; | 460 | return ENOMEM; |
461 | } | 461 | } |
462 | 462 | ||
463 | for (i = start_blk; i < start_blk + nbblks; i += bufblks) { | 463 | for (i = start_blk; i < start_blk + nbblks; i += bufblks) { |
464 | int bcount; | 464 | int bcount; |
465 | 465 | ||
466 | bcount = min(bufblks, (start_blk + nbblks - i)); | 466 | bcount = min(bufblks, (start_blk + nbblks - i)); |
467 | 467 | ||
468 | error = xlog_bread(log, i, bcount, bp, &buf); | 468 | error = xlog_bread(log, i, bcount, bp, &buf); |
469 | if (error) | 469 | if (error) |
470 | goto out; | 470 | goto out; |
471 | 471 | ||
472 | for (j = 0; j < bcount; j++) { | 472 | for (j = 0; j < bcount; j++) { |
473 | cycle = xlog_get_cycle(buf); | 473 | cycle = xlog_get_cycle(buf); |
474 | if (cycle == stop_on_cycle_no) { | 474 | if (cycle == stop_on_cycle_no) { |
475 | *new_blk = i+j; | 475 | *new_blk = i+j; |
476 | goto out; | 476 | goto out; |
477 | } | 477 | } |
478 | 478 | ||
479 | buf += BBSIZE; | 479 | buf += BBSIZE; |
480 | } | 480 | } |
481 | } | 481 | } |
482 | 482 | ||
483 | *new_blk = -1; | 483 | *new_blk = -1; |
484 | 484 | ||
485 | out: | 485 | out: |
486 | xlog_put_bp(bp); | 486 | xlog_put_bp(bp); |
487 | return error; | 487 | return error; |
488 | } | 488 | } |
489 | 489 | ||
490 | /* | 490 | /* |
491 | * Potentially backup over partial log record write. | 491 | * Potentially backup over partial log record write. |
492 | * | 492 | * |
493 | * In the typical case, last_blk is the number of the block directly after | 493 | * In the typical case, last_blk is the number of the block directly after |
494 | * a good log record. Therefore, we subtract one to get the block number | 494 | * a good log record. Therefore, we subtract one to get the block number |
495 | * of the last block in the given buffer. extra_bblks contains the number | 495 | * of the last block in the given buffer. extra_bblks contains the number |
496 | * of blocks we would have read on a previous read. This happens when the | 496 | * of blocks we would have read on a previous read. This happens when the |
497 | * last log record is split over the end of the physical log. | 497 | * last log record is split over the end of the physical log. |
498 | * | 498 | * |
499 | * extra_bblks is the number of blocks potentially verified on a previous | 499 | * extra_bblks is the number of blocks potentially verified on a previous |
500 | * call to this routine. | 500 | * call to this routine. |
501 | */ | 501 | */ |
502 | STATIC int | 502 | STATIC int |
503 | xlog_find_verify_log_record( | 503 | xlog_find_verify_log_record( |
504 | struct xlog *log, | 504 | struct xlog *log, |
505 | xfs_daddr_t start_blk, | 505 | xfs_daddr_t start_blk, |
506 | xfs_daddr_t *last_blk, | 506 | xfs_daddr_t *last_blk, |
507 | int extra_bblks) | 507 | int extra_bblks) |
508 | { | 508 | { |
509 | xfs_daddr_t i; | 509 | xfs_daddr_t i; |
510 | xfs_buf_t *bp; | 510 | xfs_buf_t *bp; |
511 | xfs_caddr_t offset = NULL; | 511 | xfs_caddr_t offset = NULL; |
512 | xlog_rec_header_t *head = NULL; | 512 | xlog_rec_header_t *head = NULL; |
513 | int error = 0; | 513 | int error = 0; |
514 | int smallmem = 0; | 514 | int smallmem = 0; |
515 | int num_blks = *last_blk - start_blk; | 515 | int num_blks = *last_blk - start_blk; |
516 | int xhdrs; | 516 | int xhdrs; |
517 | 517 | ||
518 | ASSERT(start_blk != 0 || *last_blk != start_blk); | 518 | ASSERT(start_blk != 0 || *last_blk != start_blk); |
519 | 519 | ||
520 | if (!(bp = xlog_get_bp(log, num_blks))) { | 520 | if (!(bp = xlog_get_bp(log, num_blks))) { |
521 | if (!(bp = xlog_get_bp(log, 1))) | 521 | if (!(bp = xlog_get_bp(log, 1))) |
522 | return ENOMEM; | 522 | return ENOMEM; |
523 | smallmem = 1; | 523 | smallmem = 1; |
524 | } else { | 524 | } else { |
525 | error = xlog_bread(log, start_blk, num_blks, bp, &offset); | 525 | error = xlog_bread(log, start_blk, num_blks, bp, &offset); |
526 | if (error) | 526 | if (error) |
527 | goto out; | 527 | goto out; |
528 | offset += ((num_blks - 1) << BBSHIFT); | 528 | offset += ((num_blks - 1) << BBSHIFT); |
529 | } | 529 | } |
530 | 530 | ||
531 | for (i = (*last_blk) - 1; i >= 0; i--) { | 531 | for (i = (*last_blk) - 1; i >= 0; i--) { |
532 | if (i < start_blk) { | 532 | if (i < start_blk) { |
533 | /* valid log record not found */ | 533 | /* valid log record not found */ |
534 | xfs_warn(log->l_mp, | 534 | xfs_warn(log->l_mp, |
535 | "Log inconsistent (didn't find previous header)"); | 535 | "Log inconsistent (didn't find previous header)"); |
536 | ASSERT(0); | 536 | ASSERT(0); |
537 | error = XFS_ERROR(EIO); | 537 | error = XFS_ERROR(EIO); |
538 | goto out; | 538 | goto out; |
539 | } | 539 | } |
540 | 540 | ||
541 | if (smallmem) { | 541 | if (smallmem) { |
542 | error = xlog_bread(log, i, 1, bp, &offset); | 542 | error = xlog_bread(log, i, 1, bp, &offset); |
543 | if (error) | 543 | if (error) |
544 | goto out; | 544 | goto out; |
545 | } | 545 | } |
546 | 546 | ||
547 | head = (xlog_rec_header_t *)offset; | 547 | head = (xlog_rec_header_t *)offset; |
548 | 548 | ||
549 | if (head->h_magicno == cpu_to_be32(XLOG_HEADER_MAGIC_NUM)) | 549 | if (head->h_magicno == cpu_to_be32(XLOG_HEADER_MAGIC_NUM)) |
550 | break; | 550 | break; |
551 | 551 | ||
552 | if (!smallmem) | 552 | if (!smallmem) |
553 | offset -= BBSIZE; | 553 | offset -= BBSIZE; |
554 | } | 554 | } |
555 | 555 | ||
556 | /* | 556 | /* |
557 | * We hit the beginning of the physical log & still no header. Return | 557 | * We hit the beginning of the physical log & still no header. Return |
558 | * to caller. If caller can handle a return of -1, then this routine | 558 | * to caller. If caller can handle a return of -1, then this routine |
559 | * will be called again for the end of the physical log. | 559 | * will be called again for the end of the physical log. |
560 | */ | 560 | */ |
561 | if (i == -1) { | 561 | if (i == -1) { |
562 | error = -1; | 562 | error = -1; |
563 | goto out; | 563 | goto out; |
564 | } | 564 | } |
565 | 565 | ||
566 | /* | 566 | /* |
567 | * We have the final block of the good log (the first block | 567 | * We have the final block of the good log (the first block |
568 | * of the log record _before_ the head. So we check the uuid. | 568 | * of the log record _before_ the head. So we check the uuid. |
569 | */ | 569 | */ |
570 | if ((error = xlog_header_check_mount(log->l_mp, head))) | 570 | if ((error = xlog_header_check_mount(log->l_mp, head))) |
571 | goto out; | 571 | goto out; |
572 | 572 | ||
573 | /* | 573 | /* |
574 | * We may have found a log record header before we expected one. | 574 | * We may have found a log record header before we expected one. |
575 | * last_blk will be the 1st block # with a given cycle #. We may end | 575 | * last_blk will be the 1st block # with a given cycle #. We may end |
576 | * up reading an entire log record. In this case, we don't want to | 576 | * up reading an entire log record. In this case, we don't want to |
577 | * reset last_blk. Only when last_blk points in the middle of a log | 577 | * reset last_blk. Only when last_blk points in the middle of a log |
578 | * record do we update last_blk. | 578 | * record do we update last_blk. |
579 | */ | 579 | */ |
580 | if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) { | 580 | if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) { |
581 | uint h_size = be32_to_cpu(head->h_size); | 581 | uint h_size = be32_to_cpu(head->h_size); |
582 | 582 | ||
583 | xhdrs = h_size / XLOG_HEADER_CYCLE_SIZE; | 583 | xhdrs = h_size / XLOG_HEADER_CYCLE_SIZE; |
584 | if (h_size % XLOG_HEADER_CYCLE_SIZE) | 584 | if (h_size % XLOG_HEADER_CYCLE_SIZE) |
585 | xhdrs++; | 585 | xhdrs++; |
586 | } else { | 586 | } else { |
587 | xhdrs = 1; | 587 | xhdrs = 1; |
588 | } | 588 | } |
589 | 589 | ||
590 | if (*last_blk - i + extra_bblks != | 590 | if (*last_blk - i + extra_bblks != |
591 | BTOBB(be32_to_cpu(head->h_len)) + xhdrs) | 591 | BTOBB(be32_to_cpu(head->h_len)) + xhdrs) |
592 | *last_blk = i; | 592 | *last_blk = i; |
593 | 593 | ||
594 | out: | 594 | out: |
595 | xlog_put_bp(bp); | 595 | xlog_put_bp(bp); |
596 | return error; | 596 | return error; |
597 | } | 597 | } |
598 | 598 | ||
599 | /* | 599 | /* |
600 | * Head is defined to be the point of the log where the next log write | 600 | * Head is defined to be the point of the log where the next log write |
601 | * write could go. This means that incomplete LR writes at the end are | 601 | * write could go. This means that incomplete LR writes at the end are |
602 | * eliminated when calculating the head. We aren't guaranteed that previous | 602 | * eliminated when calculating the head. We aren't guaranteed that previous |
603 | * LR have complete transactions. We only know that a cycle number of | 603 | * LR have complete transactions. We only know that a cycle number of |
604 | * current cycle number -1 won't be present in the log if we start writing | 604 | * current cycle number -1 won't be present in the log if we start writing |
605 | * from our current block number. | 605 | * from our current block number. |
606 | * | 606 | * |
607 | * last_blk contains the block number of the first block with a given | 607 | * last_blk contains the block number of the first block with a given |
608 | * cycle number. | 608 | * cycle number. |
609 | * | 609 | * |
610 | * Return: zero if normal, non-zero if error. | 610 | * Return: zero if normal, non-zero if error. |
611 | */ | 611 | */ |
612 | STATIC int | 612 | STATIC int |
613 | xlog_find_head( | 613 | xlog_find_head( |
614 | struct xlog *log, | 614 | struct xlog *log, |
615 | xfs_daddr_t *return_head_blk) | 615 | xfs_daddr_t *return_head_blk) |
616 | { | 616 | { |
617 | xfs_buf_t *bp; | 617 | xfs_buf_t *bp; |
618 | xfs_caddr_t offset; | 618 | xfs_caddr_t offset; |
619 | xfs_daddr_t new_blk, first_blk, start_blk, last_blk, head_blk; | 619 | xfs_daddr_t new_blk, first_blk, start_blk, last_blk, head_blk; |
620 | int num_scan_bblks; | 620 | int num_scan_bblks; |
621 | uint first_half_cycle, last_half_cycle; | 621 | uint first_half_cycle, last_half_cycle; |
622 | uint stop_on_cycle; | 622 | uint stop_on_cycle; |
623 | int error, log_bbnum = log->l_logBBsize; | 623 | int error, log_bbnum = log->l_logBBsize; |
624 | 624 | ||
625 | /* Is the end of the log device zeroed? */ | 625 | /* Is the end of the log device zeroed? */ |
626 | if ((error = xlog_find_zeroed(log, &first_blk)) == -1) { | 626 | if ((error = xlog_find_zeroed(log, &first_blk)) == -1) { |
627 | *return_head_blk = first_blk; | 627 | *return_head_blk = first_blk; |
628 | 628 | ||
629 | /* Is the whole lot zeroed? */ | 629 | /* Is the whole lot zeroed? */ |
630 | if (!first_blk) { | 630 | if (!first_blk) { |
631 | /* Linux XFS shouldn't generate totally zeroed logs - | 631 | /* Linux XFS shouldn't generate totally zeroed logs - |
632 | * mkfs etc write a dummy unmount record to a fresh | 632 | * mkfs etc write a dummy unmount record to a fresh |
633 | * log so we can store the uuid in there | 633 | * log so we can store the uuid in there |
634 | */ | 634 | */ |
635 | xfs_warn(log->l_mp, "totally zeroed log"); | 635 | xfs_warn(log->l_mp, "totally zeroed log"); |
636 | } | 636 | } |
637 | 637 | ||
638 | return 0; | 638 | return 0; |
639 | } else if (error) { | 639 | } else if (error) { |
640 | xfs_warn(log->l_mp, "empty log check failed"); | 640 | xfs_warn(log->l_mp, "empty log check failed"); |
641 | return error; | 641 | return error; |
642 | } | 642 | } |
643 | 643 | ||
644 | first_blk = 0; /* get cycle # of 1st block */ | 644 | first_blk = 0; /* get cycle # of 1st block */ |
645 | bp = xlog_get_bp(log, 1); | 645 | bp = xlog_get_bp(log, 1); |
646 | if (!bp) | 646 | if (!bp) |
647 | return ENOMEM; | 647 | return ENOMEM; |
648 | 648 | ||
649 | error = xlog_bread(log, 0, 1, bp, &offset); | 649 | error = xlog_bread(log, 0, 1, bp, &offset); |
650 | if (error) | 650 | if (error) |
651 | goto bp_err; | 651 | goto bp_err; |
652 | 652 | ||
653 | first_half_cycle = xlog_get_cycle(offset); | 653 | first_half_cycle = xlog_get_cycle(offset); |
654 | 654 | ||
655 | last_blk = head_blk = log_bbnum - 1; /* get cycle # of last block */ | 655 | last_blk = head_blk = log_bbnum - 1; /* get cycle # of last block */ |
656 | error = xlog_bread(log, last_blk, 1, bp, &offset); | 656 | error = xlog_bread(log, last_blk, 1, bp, &offset); |
657 | if (error) | 657 | if (error) |
658 | goto bp_err; | 658 | goto bp_err; |
659 | 659 | ||
660 | last_half_cycle = xlog_get_cycle(offset); | 660 | last_half_cycle = xlog_get_cycle(offset); |
661 | ASSERT(last_half_cycle != 0); | 661 | ASSERT(last_half_cycle != 0); |
662 | 662 | ||
663 | /* | 663 | /* |
664 | * If the 1st half cycle number is equal to the last half cycle number, | 664 | * If the 1st half cycle number is equal to the last half cycle number, |
665 | * then the entire log is stamped with the same cycle number. In this | 665 | * then the entire log is stamped with the same cycle number. In this |
666 | * case, head_blk can't be set to zero (which makes sense). The below | 666 | * case, head_blk can't be set to zero (which makes sense). The below |
667 | * math doesn't work out properly with head_blk equal to zero. Instead, | 667 | * math doesn't work out properly with head_blk equal to zero. Instead, |
668 | * we set it to log_bbnum which is an invalid block number, but this | 668 | * we set it to log_bbnum which is an invalid block number, but this |
669 | * value makes the math correct. If head_blk doesn't changed through | 669 | * value makes the math correct. If head_blk doesn't changed through |
670 | * all the tests below, *head_blk is set to zero at the very end rather | 670 | * all the tests below, *head_blk is set to zero at the very end rather |
671 | * than log_bbnum. In a sense, log_bbnum and zero are the same block | 671 | * than log_bbnum. In a sense, log_bbnum and zero are the same block |
672 | * in a circular file. | 672 | * in a circular file. |
673 | */ | 673 | */ |
674 | if (first_half_cycle == last_half_cycle) { | 674 | if (first_half_cycle == last_half_cycle) { |
675 | /* | 675 | /* |
676 | * In this case we believe that the entire log should have | 676 | * In this case we believe that the entire log should have |
677 | * cycle number last_half_cycle. We need to scan backwards | 677 | * cycle number last_half_cycle. We need to scan backwards |
678 | * from the end verifying that there are no holes still | 678 | * from the end verifying that there are no holes still |
679 | * containing last_half_cycle - 1. If we find such a hole, | 679 | * containing last_half_cycle - 1. If we find such a hole, |
680 | * then the start of that hole will be the new head. The | 680 | * then the start of that hole will be the new head. The |
681 | * simple case looks like | 681 | * simple case looks like |
682 | * x | x ... | x - 1 | x | 682 | * x | x ... | x - 1 | x |
683 | * Another case that fits this picture would be | 683 | * Another case that fits this picture would be |
684 | * x | x + 1 | x ... | x | 684 | * x | x + 1 | x ... | x |
685 | * In this case the head really is somewhere at the end of the | 685 | * In this case the head really is somewhere at the end of the |
686 | * log, as one of the latest writes at the beginning was | 686 | * log, as one of the latest writes at the beginning was |
687 | * incomplete. | 687 | * incomplete. |
688 | * One more case is | 688 | * One more case is |
689 | * x | x + 1 | x ... | x - 1 | x | 689 | * x | x + 1 | x ... | x - 1 | x |
690 | * This is really the combination of the above two cases, and | 690 | * This is really the combination of the above two cases, and |
691 | * the head has to end up at the start of the x-1 hole at the | 691 | * the head has to end up at the start of the x-1 hole at the |
692 | * end of the log. | 692 | * end of the log. |
693 | * | 693 | * |
694 | * In the 256k log case, we will read from the beginning to the | 694 | * In the 256k log case, we will read from the beginning to the |
695 | * end of the log and search for cycle numbers equal to x-1. | 695 | * end of the log and search for cycle numbers equal to x-1. |
696 | * We don't worry about the x+1 blocks that we encounter, | 696 | * We don't worry about the x+1 blocks that we encounter, |
697 | * because we know that they cannot be the head since the log | 697 | * because we know that they cannot be the head since the log |
698 | * started with x. | 698 | * started with x. |
699 | */ | 699 | */ |
700 | head_blk = log_bbnum; | 700 | head_blk = log_bbnum; |
701 | stop_on_cycle = last_half_cycle - 1; | 701 | stop_on_cycle = last_half_cycle - 1; |
702 | } else { | 702 | } else { |
703 | /* | 703 | /* |
704 | * In this case we want to find the first block with cycle | 704 | * In this case we want to find the first block with cycle |
705 | * number matching last_half_cycle. We expect the log to be | 705 | * number matching last_half_cycle. We expect the log to be |
706 | * some variation on | 706 | * some variation on |
707 | * x + 1 ... | x ... | x | 707 | * x + 1 ... | x ... | x |
708 | * The first block with cycle number x (last_half_cycle) will | 708 | * The first block with cycle number x (last_half_cycle) will |
709 | * be where the new head belongs. First we do a binary search | 709 | * be where the new head belongs. First we do a binary search |
710 | * for the first occurrence of last_half_cycle. The binary | 710 | * for the first occurrence of last_half_cycle. The binary |
711 | * search may not be totally accurate, so then we scan back | 711 | * search may not be totally accurate, so then we scan back |
712 | * from there looking for occurrences of last_half_cycle before | 712 | * from there looking for occurrences of last_half_cycle before |
713 | * us. If that backwards scan wraps around the beginning of | 713 | * us. If that backwards scan wraps around the beginning of |
714 | * the log, then we look for occurrences of last_half_cycle - 1 | 714 | * the log, then we look for occurrences of last_half_cycle - 1 |
715 | * at the end of the log. The cases we're looking for look | 715 | * at the end of the log. The cases we're looking for look |
716 | * like | 716 | * like |
717 | * v binary search stopped here | 717 | * v binary search stopped here |
718 | * x + 1 ... | x | x + 1 | x ... | x | 718 | * x + 1 ... | x | x + 1 | x ... | x |
719 | * ^ but we want to locate this spot | 719 | * ^ but we want to locate this spot |
720 | * or | 720 | * or |
721 | * <---------> less than scan distance | 721 | * <---------> less than scan distance |
722 | * x + 1 ... | x ... | x - 1 | x | 722 | * x + 1 ... | x ... | x - 1 | x |
723 | * ^ we want to locate this spot | 723 | * ^ we want to locate this spot |
724 | */ | 724 | */ |
725 | stop_on_cycle = last_half_cycle; | 725 | stop_on_cycle = last_half_cycle; |
726 | if ((error = xlog_find_cycle_start(log, bp, first_blk, | 726 | if ((error = xlog_find_cycle_start(log, bp, first_blk, |
727 | &head_blk, last_half_cycle))) | 727 | &head_blk, last_half_cycle))) |
728 | goto bp_err; | 728 | goto bp_err; |
729 | } | 729 | } |
730 | 730 | ||
731 | /* | 731 | /* |
732 | * Now validate the answer. Scan back some number of maximum possible | 732 | * Now validate the answer. Scan back some number of maximum possible |
733 | * blocks and make sure each one has the expected cycle number. The | 733 | * blocks and make sure each one has the expected cycle number. The |
734 | * maximum is determined by the total possible amount of buffering | 734 | * maximum is determined by the total possible amount of buffering |
735 | * in the in-core log. The following number can be made tighter if | 735 | * in the in-core log. The following number can be made tighter if |
736 | * we actually look at the block size of the filesystem. | 736 | * we actually look at the block size of the filesystem. |
737 | */ | 737 | */ |
738 | num_scan_bblks = XLOG_TOTAL_REC_SHIFT(log); | 738 | num_scan_bblks = XLOG_TOTAL_REC_SHIFT(log); |
739 | if (head_blk >= num_scan_bblks) { | 739 | if (head_blk >= num_scan_bblks) { |
740 | /* | 740 | /* |
741 | * We are guaranteed that the entire check can be performed | 741 | * We are guaranteed that the entire check can be performed |
742 | * in one buffer. | 742 | * in one buffer. |
743 | */ | 743 | */ |
744 | start_blk = head_blk - num_scan_bblks; | 744 | start_blk = head_blk - num_scan_bblks; |
745 | if ((error = xlog_find_verify_cycle(log, | 745 | if ((error = xlog_find_verify_cycle(log, |
746 | start_blk, num_scan_bblks, | 746 | start_blk, num_scan_bblks, |
747 | stop_on_cycle, &new_blk))) | 747 | stop_on_cycle, &new_blk))) |
748 | goto bp_err; | 748 | goto bp_err; |
749 | if (new_blk != -1) | 749 | if (new_blk != -1) |
750 | head_blk = new_blk; | 750 | head_blk = new_blk; |
751 | } else { /* need to read 2 parts of log */ | 751 | } else { /* need to read 2 parts of log */ |
752 | /* | 752 | /* |
753 | * We are going to scan backwards in the log in two parts. | 753 | * We are going to scan backwards in the log in two parts. |
754 | * First we scan the physical end of the log. In this part | 754 | * First we scan the physical end of the log. In this part |
755 | * of the log, we are looking for blocks with cycle number | 755 | * of the log, we are looking for blocks with cycle number |
756 | * last_half_cycle - 1. | 756 | * last_half_cycle - 1. |
757 | * If we find one, then we know that the log starts there, as | 757 | * If we find one, then we know that the log starts there, as |
758 | * we've found a hole that didn't get written in going around | 758 | * we've found a hole that didn't get written in going around |
759 | * the end of the physical log. The simple case for this is | 759 | * the end of the physical log. The simple case for this is |
760 | * x + 1 ... | x ... | x - 1 | x | 760 | * x + 1 ... | x ... | x - 1 | x |
761 | * <---------> less than scan distance | 761 | * <---------> less than scan distance |
762 | * If all of the blocks at the end of the log have cycle number | 762 | * If all of the blocks at the end of the log have cycle number |
763 | * last_half_cycle, then we check the blocks at the start of | 763 | * last_half_cycle, then we check the blocks at the start of |
764 | * the log looking for occurrences of last_half_cycle. If we | 764 | * the log looking for occurrences of last_half_cycle. If we |
765 | * find one, then our current estimate for the location of the | 765 | * find one, then our current estimate for the location of the |
766 | * first occurrence of last_half_cycle is wrong and we move | 766 | * first occurrence of last_half_cycle is wrong and we move |
767 | * back to the hole we've found. This case looks like | 767 | * back to the hole we've found. This case looks like |
768 | * x + 1 ... | x | x + 1 | x ... | 768 | * x + 1 ... | x | x + 1 | x ... |
769 | * ^ binary search stopped here | 769 | * ^ binary search stopped here |
770 | * Another case we need to handle that only occurs in 256k | 770 | * Another case we need to handle that only occurs in 256k |
771 | * logs is | 771 | * logs is |
772 | * x + 1 ... | x ... | x+1 | x ... | 772 | * x + 1 ... | x ... | x+1 | x ... |
773 | * ^ binary search stops here | 773 | * ^ binary search stops here |
774 | * In a 256k log, the scan at the end of the log will see the | 774 | * In a 256k log, the scan at the end of the log will see the |
775 | * x + 1 blocks. We need to skip past those since that is | 775 | * x + 1 blocks. We need to skip past those since that is |
776 | * certainly not the head of the log. By searching for | 776 | * certainly not the head of the log. By searching for |
777 | * last_half_cycle-1 we accomplish that. | 777 | * last_half_cycle-1 we accomplish that. |
778 | */ | 778 | */ |
779 | ASSERT(head_blk <= INT_MAX && | 779 | ASSERT(head_blk <= INT_MAX && |
780 | (xfs_daddr_t) num_scan_bblks >= head_blk); | 780 | (xfs_daddr_t) num_scan_bblks >= head_blk); |
781 | start_blk = log_bbnum - (num_scan_bblks - head_blk); | 781 | start_blk = log_bbnum - (num_scan_bblks - head_blk); |
782 | if ((error = xlog_find_verify_cycle(log, start_blk, | 782 | if ((error = xlog_find_verify_cycle(log, start_blk, |
783 | num_scan_bblks - (int)head_blk, | 783 | num_scan_bblks - (int)head_blk, |
784 | (stop_on_cycle - 1), &new_blk))) | 784 | (stop_on_cycle - 1), &new_blk))) |
785 | goto bp_err; | 785 | goto bp_err; |
786 | if (new_blk != -1) { | 786 | if (new_blk != -1) { |
787 | head_blk = new_blk; | 787 | head_blk = new_blk; |
788 | goto validate_head; | 788 | goto validate_head; |
789 | } | 789 | } |
790 | 790 | ||
791 | /* | 791 | /* |
792 | * Scan beginning of log now. The last part of the physical | 792 | * Scan beginning of log now. The last part of the physical |
793 | * log is good. This scan needs to verify that it doesn't find | 793 | * log is good. This scan needs to verify that it doesn't find |
794 | * the last_half_cycle. | 794 | * the last_half_cycle. |
795 | */ | 795 | */ |
796 | start_blk = 0; | 796 | start_blk = 0; |
797 | ASSERT(head_blk <= INT_MAX); | 797 | ASSERT(head_blk <= INT_MAX); |
798 | if ((error = xlog_find_verify_cycle(log, | 798 | if ((error = xlog_find_verify_cycle(log, |
799 | start_blk, (int)head_blk, | 799 | start_blk, (int)head_blk, |
800 | stop_on_cycle, &new_blk))) | 800 | stop_on_cycle, &new_blk))) |
801 | goto bp_err; | 801 | goto bp_err; |
802 | if (new_blk != -1) | 802 | if (new_blk != -1) |
803 | head_blk = new_blk; | 803 | head_blk = new_blk; |
804 | } | 804 | } |
805 | 805 | ||
806 | validate_head: | 806 | validate_head: |
807 | /* | 807 | /* |
808 | * Now we need to make sure head_blk is not pointing to a block in | 808 | * Now we need to make sure head_blk is not pointing to a block in |
809 | * the middle of a log record. | 809 | * the middle of a log record. |
810 | */ | 810 | */ |
811 | num_scan_bblks = XLOG_REC_SHIFT(log); | 811 | num_scan_bblks = XLOG_REC_SHIFT(log); |
812 | if (head_blk >= num_scan_bblks) { | 812 | if (head_blk >= num_scan_bblks) { |
813 | start_blk = head_blk - num_scan_bblks; /* don't read head_blk */ | 813 | start_blk = head_blk - num_scan_bblks; /* don't read head_blk */ |
814 | 814 | ||
815 | /* start ptr at last block ptr before head_blk */ | 815 | /* start ptr at last block ptr before head_blk */ |
816 | if ((error = xlog_find_verify_log_record(log, start_blk, | 816 | if ((error = xlog_find_verify_log_record(log, start_blk, |
817 | &head_blk, 0)) == -1) { | 817 | &head_blk, 0)) == -1) { |
818 | error = XFS_ERROR(EIO); | 818 | error = XFS_ERROR(EIO); |
819 | goto bp_err; | 819 | goto bp_err; |
820 | } else if (error) | 820 | } else if (error) |
821 | goto bp_err; | 821 | goto bp_err; |
822 | } else { | 822 | } else { |
823 | start_blk = 0; | 823 | start_blk = 0; |
824 | ASSERT(head_blk <= INT_MAX); | 824 | ASSERT(head_blk <= INT_MAX); |
825 | if ((error = xlog_find_verify_log_record(log, start_blk, | 825 | if ((error = xlog_find_verify_log_record(log, start_blk, |
826 | &head_blk, 0)) == -1) { | 826 | &head_blk, 0)) == -1) { |
827 | /* We hit the beginning of the log during our search */ | 827 | /* We hit the beginning of the log during our search */ |
828 | start_blk = log_bbnum - (num_scan_bblks - head_blk); | 828 | start_blk = log_bbnum - (num_scan_bblks - head_blk); |
829 | new_blk = log_bbnum; | 829 | new_blk = log_bbnum; |
830 | ASSERT(start_blk <= INT_MAX && | 830 | ASSERT(start_blk <= INT_MAX && |
831 | (xfs_daddr_t) log_bbnum-start_blk >= 0); | 831 | (xfs_daddr_t) log_bbnum-start_blk >= 0); |
832 | ASSERT(head_blk <= INT_MAX); | 832 | ASSERT(head_blk <= INT_MAX); |
833 | if ((error = xlog_find_verify_log_record(log, | 833 | if ((error = xlog_find_verify_log_record(log, |
834 | start_blk, &new_blk, | 834 | start_blk, &new_blk, |
835 | (int)head_blk)) == -1) { | 835 | (int)head_blk)) == -1) { |
836 | error = XFS_ERROR(EIO); | 836 | error = XFS_ERROR(EIO); |
837 | goto bp_err; | 837 | goto bp_err; |
838 | } else if (error) | 838 | } else if (error) |
839 | goto bp_err; | 839 | goto bp_err; |
840 | if (new_blk != log_bbnum) | 840 | if (new_blk != log_bbnum) |
841 | head_blk = new_blk; | 841 | head_blk = new_blk; |
842 | } else if (error) | 842 | } else if (error) |
843 | goto bp_err; | 843 | goto bp_err; |
844 | } | 844 | } |
845 | 845 | ||
846 | xlog_put_bp(bp); | 846 | xlog_put_bp(bp); |
847 | if (head_blk == log_bbnum) | 847 | if (head_blk == log_bbnum) |
848 | *return_head_blk = 0; | 848 | *return_head_blk = 0; |
849 | else | 849 | else |
850 | *return_head_blk = head_blk; | 850 | *return_head_blk = head_blk; |
851 | /* | 851 | /* |
852 | * When returning here, we have a good block number. Bad block | 852 | * When returning here, we have a good block number. Bad block |
853 | * means that during a previous crash, we didn't have a clean break | 853 | * means that during a previous crash, we didn't have a clean break |
854 | * from cycle number N to cycle number N-1. In this case, we need | 854 | * from cycle number N to cycle number N-1. In this case, we need |
855 | * to find the first block with cycle number N-1. | 855 | * to find the first block with cycle number N-1. |
856 | */ | 856 | */ |
857 | return 0; | 857 | return 0; |
858 | 858 | ||
859 | bp_err: | 859 | bp_err: |
860 | xlog_put_bp(bp); | 860 | xlog_put_bp(bp); |
861 | 861 | ||
862 | if (error) | 862 | if (error) |
863 | xfs_warn(log->l_mp, "failed to find log head"); | 863 | xfs_warn(log->l_mp, "failed to find log head"); |
864 | return error; | 864 | return error; |
865 | } | 865 | } |
866 | 866 | ||
867 | /* | 867 | /* |
868 | * Find the sync block number or the tail of the log. | 868 | * Find the sync block number or the tail of the log. |
869 | * | 869 | * |
870 | * This will be the block number of the last record to have its | 870 | * This will be the block number of the last record to have its |
871 | * associated buffers synced to disk. Every log record header has | 871 | * associated buffers synced to disk. Every log record header has |
872 | * a sync lsn embedded in it. LSNs hold block numbers, so it is easy | 872 | * a sync lsn embedded in it. LSNs hold block numbers, so it is easy |
873 | * to get a sync block number. The only concern is to figure out which | 873 | * to get a sync block number. The only concern is to figure out which |
874 | * log record header to believe. | 874 | * log record header to believe. |
875 | * | 875 | * |
876 | * The following algorithm uses the log record header with the largest | 876 | * The following algorithm uses the log record header with the largest |
877 | * lsn. The entire log record does not need to be valid. We only care | 877 | * lsn. The entire log record does not need to be valid. We only care |
878 | * that the header is valid. | 878 | * that the header is valid. |
879 | * | 879 | * |
880 | * We could speed up search by using current head_blk buffer, but it is not | 880 | * We could speed up search by using current head_blk buffer, but it is not |
881 | * available. | 881 | * available. |
882 | */ | 882 | */ |
883 | STATIC int | 883 | STATIC int |
884 | xlog_find_tail( | 884 | xlog_find_tail( |
885 | struct xlog *log, | 885 | struct xlog *log, |
886 | xfs_daddr_t *head_blk, | 886 | xfs_daddr_t *head_blk, |
887 | xfs_daddr_t *tail_blk) | 887 | xfs_daddr_t *tail_blk) |
888 | { | 888 | { |
889 | xlog_rec_header_t *rhead; | 889 | xlog_rec_header_t *rhead; |
890 | xlog_op_header_t *op_head; | 890 | xlog_op_header_t *op_head; |
891 | xfs_caddr_t offset = NULL; | 891 | xfs_caddr_t offset = NULL; |
892 | xfs_buf_t *bp; | 892 | xfs_buf_t *bp; |
893 | int error, i, found; | 893 | int error, i, found; |
894 | xfs_daddr_t umount_data_blk; | 894 | xfs_daddr_t umount_data_blk; |
895 | xfs_daddr_t after_umount_blk; | 895 | xfs_daddr_t after_umount_blk; |
896 | xfs_lsn_t tail_lsn; | 896 | xfs_lsn_t tail_lsn; |
897 | int hblks; | 897 | int hblks; |
898 | 898 | ||
899 | found = 0; | 899 | found = 0; |
900 | 900 | ||
901 | /* | 901 | /* |
902 | * Find previous log record | 902 | * Find previous log record |
903 | */ | 903 | */ |
904 | if ((error = xlog_find_head(log, head_blk))) | 904 | if ((error = xlog_find_head(log, head_blk))) |
905 | return error; | 905 | return error; |
906 | 906 | ||
907 | bp = xlog_get_bp(log, 1); | 907 | bp = xlog_get_bp(log, 1); |
908 | if (!bp) | 908 | if (!bp) |
909 | return ENOMEM; | 909 | return ENOMEM; |
910 | if (*head_blk == 0) { /* special case */ | 910 | if (*head_blk == 0) { /* special case */ |
911 | error = xlog_bread(log, 0, 1, bp, &offset); | 911 | error = xlog_bread(log, 0, 1, bp, &offset); |
912 | if (error) | 912 | if (error) |
913 | goto done; | 913 | goto done; |
914 | 914 | ||
915 | if (xlog_get_cycle(offset) == 0) { | 915 | if (xlog_get_cycle(offset) == 0) { |
916 | *tail_blk = 0; | 916 | *tail_blk = 0; |
917 | /* leave all other log inited values alone */ | 917 | /* leave all other log inited values alone */ |
918 | goto done; | 918 | goto done; |
919 | } | 919 | } |
920 | } | 920 | } |
921 | 921 | ||
922 | /* | 922 | /* |
923 | * Search backwards looking for log record header block | 923 | * Search backwards looking for log record header block |
924 | */ | 924 | */ |
925 | ASSERT(*head_blk < INT_MAX); | 925 | ASSERT(*head_blk < INT_MAX); |
926 | for (i = (int)(*head_blk) - 1; i >= 0; i--) { | 926 | for (i = (int)(*head_blk) - 1; i >= 0; i--) { |
927 | error = xlog_bread(log, i, 1, bp, &offset); | 927 | error = xlog_bread(log, i, 1, bp, &offset); |
928 | if (error) | 928 | if (error) |
929 | goto done; | 929 | goto done; |
930 | 930 | ||
931 | if (*(__be32 *)offset == cpu_to_be32(XLOG_HEADER_MAGIC_NUM)) { | 931 | if (*(__be32 *)offset == cpu_to_be32(XLOG_HEADER_MAGIC_NUM)) { |
932 | found = 1; | 932 | found = 1; |
933 | break; | 933 | break; |
934 | } | 934 | } |
935 | } | 935 | } |
936 | /* | 936 | /* |
937 | * If we haven't found the log record header block, start looking | 937 | * If we haven't found the log record header block, start looking |
938 | * again from the end of the physical log. XXXmiken: There should be | 938 | * again from the end of the physical log. XXXmiken: There should be |
939 | * a check here to make sure we didn't search more than N blocks in | 939 | * a check here to make sure we didn't search more than N blocks in |
940 | * the previous code. | 940 | * the previous code. |
941 | */ | 941 | */ |
942 | if (!found) { | 942 | if (!found) { |
943 | for (i = log->l_logBBsize - 1; i >= (int)(*head_blk); i--) { | 943 | for (i = log->l_logBBsize - 1; i >= (int)(*head_blk); i--) { |
944 | error = xlog_bread(log, i, 1, bp, &offset); | 944 | error = xlog_bread(log, i, 1, bp, &offset); |
945 | if (error) | 945 | if (error) |
946 | goto done; | 946 | goto done; |
947 | 947 | ||
948 | if (*(__be32 *)offset == | 948 | if (*(__be32 *)offset == |
949 | cpu_to_be32(XLOG_HEADER_MAGIC_NUM)) { | 949 | cpu_to_be32(XLOG_HEADER_MAGIC_NUM)) { |
950 | found = 2; | 950 | found = 2; |
951 | break; | 951 | break; |
952 | } | 952 | } |
953 | } | 953 | } |
954 | } | 954 | } |
955 | if (!found) { | 955 | if (!found) { |
956 | xfs_warn(log->l_mp, "%s: couldn't find sync record", __func__); | 956 | xfs_warn(log->l_mp, "%s: couldn't find sync record", __func__); |
957 | ASSERT(0); | 957 | ASSERT(0); |
958 | return XFS_ERROR(EIO); | 958 | return XFS_ERROR(EIO); |
959 | } | 959 | } |
960 | 960 | ||
961 | /* find blk_no of tail of log */ | 961 | /* find blk_no of tail of log */ |
962 | rhead = (xlog_rec_header_t *)offset; | 962 | rhead = (xlog_rec_header_t *)offset; |
963 | *tail_blk = BLOCK_LSN(be64_to_cpu(rhead->h_tail_lsn)); | 963 | *tail_blk = BLOCK_LSN(be64_to_cpu(rhead->h_tail_lsn)); |
964 | 964 | ||
965 | /* | 965 | /* |
966 | * Reset log values according to the state of the log when we | 966 | * Reset log values according to the state of the log when we |
967 | * crashed. In the case where head_blk == 0, we bump curr_cycle | 967 | * crashed. In the case where head_blk == 0, we bump curr_cycle |
968 | * one because the next write starts a new cycle rather than | 968 | * one because the next write starts a new cycle rather than |
969 | * continuing the cycle of the last good log record. At this | 969 | * continuing the cycle of the last good log record. At this |
970 | * point we have guaranteed that all partial log records have been | 970 | * point we have guaranteed that all partial log records have been |
971 | * accounted for. Therefore, we know that the last good log record | 971 | * accounted for. Therefore, we know that the last good log record |
972 | * written was complete and ended exactly on the end boundary | 972 | * written was complete and ended exactly on the end boundary |
973 | * of the physical log. | 973 | * of the physical log. |
974 | */ | 974 | */ |
975 | log->l_prev_block = i; | 975 | log->l_prev_block = i; |
976 | log->l_curr_block = (int)*head_blk; | 976 | log->l_curr_block = (int)*head_blk; |
977 | log->l_curr_cycle = be32_to_cpu(rhead->h_cycle); | 977 | log->l_curr_cycle = be32_to_cpu(rhead->h_cycle); |
978 | if (found == 2) | 978 | if (found == 2) |
979 | log->l_curr_cycle++; | 979 | log->l_curr_cycle++; |
980 | atomic64_set(&log->l_tail_lsn, be64_to_cpu(rhead->h_tail_lsn)); | 980 | atomic64_set(&log->l_tail_lsn, be64_to_cpu(rhead->h_tail_lsn)); |
981 | atomic64_set(&log->l_last_sync_lsn, be64_to_cpu(rhead->h_lsn)); | 981 | atomic64_set(&log->l_last_sync_lsn, be64_to_cpu(rhead->h_lsn)); |
982 | xlog_assign_grant_head(&log->l_reserve_head.grant, log->l_curr_cycle, | 982 | xlog_assign_grant_head(&log->l_reserve_head.grant, log->l_curr_cycle, |
983 | BBTOB(log->l_curr_block)); | 983 | BBTOB(log->l_curr_block)); |
984 | xlog_assign_grant_head(&log->l_write_head.grant, log->l_curr_cycle, | 984 | xlog_assign_grant_head(&log->l_write_head.grant, log->l_curr_cycle, |
985 | BBTOB(log->l_curr_block)); | 985 | BBTOB(log->l_curr_block)); |
986 | 986 | ||
987 | /* | 987 | /* |
988 | * Look for unmount record. If we find it, then we know there | 988 | * Look for unmount record. If we find it, then we know there |
989 | * was a clean unmount. Since 'i' could be the last block in | 989 | * was a clean unmount. Since 'i' could be the last block in |
990 | * the physical log, we convert to a log block before comparing | 990 | * the physical log, we convert to a log block before comparing |
991 | * to the head_blk. | 991 | * to the head_blk. |
992 | * | 992 | * |
993 | * Save the current tail lsn to use to pass to | 993 | * Save the current tail lsn to use to pass to |
994 | * xlog_clear_stale_blocks() below. We won't want to clear the | 994 | * xlog_clear_stale_blocks() below. We won't want to clear the |
995 | * unmount record if there is one, so we pass the lsn of the | 995 | * unmount record if there is one, so we pass the lsn of the |
996 | * unmount record rather than the block after it. | 996 | * unmount record rather than the block after it. |
997 | */ | 997 | */ |
998 | if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) { | 998 | if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) { |
999 | int h_size = be32_to_cpu(rhead->h_size); | 999 | int h_size = be32_to_cpu(rhead->h_size); |
1000 | int h_version = be32_to_cpu(rhead->h_version); | 1000 | int h_version = be32_to_cpu(rhead->h_version); |
1001 | 1001 | ||
1002 | if ((h_version & XLOG_VERSION_2) && | 1002 | if ((h_version & XLOG_VERSION_2) && |
1003 | (h_size > XLOG_HEADER_CYCLE_SIZE)) { | 1003 | (h_size > XLOG_HEADER_CYCLE_SIZE)) { |
1004 | hblks = h_size / XLOG_HEADER_CYCLE_SIZE; | 1004 | hblks = h_size / XLOG_HEADER_CYCLE_SIZE; |
1005 | if (h_size % XLOG_HEADER_CYCLE_SIZE) | 1005 | if (h_size % XLOG_HEADER_CYCLE_SIZE) |
1006 | hblks++; | 1006 | hblks++; |
1007 | } else { | 1007 | } else { |
1008 | hblks = 1; | 1008 | hblks = 1; |
1009 | } | 1009 | } |
1010 | } else { | 1010 | } else { |
1011 | hblks = 1; | 1011 | hblks = 1; |
1012 | } | 1012 | } |
1013 | after_umount_blk = (i + hblks + (int) | 1013 | after_umount_blk = (i + hblks + (int) |
1014 | BTOBB(be32_to_cpu(rhead->h_len))) % log->l_logBBsize; | 1014 | BTOBB(be32_to_cpu(rhead->h_len))) % log->l_logBBsize; |
1015 | tail_lsn = atomic64_read(&log->l_tail_lsn); | 1015 | tail_lsn = atomic64_read(&log->l_tail_lsn); |
1016 | if (*head_blk == after_umount_blk && | 1016 | if (*head_blk == after_umount_blk && |
1017 | be32_to_cpu(rhead->h_num_logops) == 1) { | 1017 | be32_to_cpu(rhead->h_num_logops) == 1) { |
1018 | umount_data_blk = (i + hblks) % log->l_logBBsize; | 1018 | umount_data_blk = (i + hblks) % log->l_logBBsize; |
1019 | error = xlog_bread(log, umount_data_blk, 1, bp, &offset); | 1019 | error = xlog_bread(log, umount_data_blk, 1, bp, &offset); |
1020 | if (error) | 1020 | if (error) |
1021 | goto done; | 1021 | goto done; |
1022 | 1022 | ||
1023 | op_head = (xlog_op_header_t *)offset; | 1023 | op_head = (xlog_op_header_t *)offset; |
1024 | if (op_head->oh_flags & XLOG_UNMOUNT_TRANS) { | 1024 | if (op_head->oh_flags & XLOG_UNMOUNT_TRANS) { |
1025 | /* | 1025 | /* |
1026 | * Set tail and last sync so that newly written | 1026 | * Set tail and last sync so that newly written |
1027 | * log records will point recovery to after the | 1027 | * log records will point recovery to after the |
1028 | * current unmount record. | 1028 | * current unmount record. |
1029 | */ | 1029 | */ |
1030 | xlog_assign_atomic_lsn(&log->l_tail_lsn, | 1030 | xlog_assign_atomic_lsn(&log->l_tail_lsn, |
1031 | log->l_curr_cycle, after_umount_blk); | 1031 | log->l_curr_cycle, after_umount_blk); |
1032 | xlog_assign_atomic_lsn(&log->l_last_sync_lsn, | 1032 | xlog_assign_atomic_lsn(&log->l_last_sync_lsn, |
1033 | log->l_curr_cycle, after_umount_blk); | 1033 | log->l_curr_cycle, after_umount_blk); |
1034 | *tail_blk = after_umount_blk; | 1034 | *tail_blk = after_umount_blk; |
1035 | 1035 | ||
1036 | /* | 1036 | /* |
1037 | * Note that the unmount was clean. If the unmount | 1037 | * Note that the unmount was clean. If the unmount |
1038 | * was not clean, we need to know this to rebuild the | 1038 | * was not clean, we need to know this to rebuild the |
1039 | * superblock counters from the perag headers if we | 1039 | * superblock counters from the perag headers if we |
1040 | * have a filesystem using non-persistent counters. | 1040 | * have a filesystem using non-persistent counters. |
1041 | */ | 1041 | */ |
1042 | log->l_mp->m_flags |= XFS_MOUNT_WAS_CLEAN; | 1042 | log->l_mp->m_flags |= XFS_MOUNT_WAS_CLEAN; |
1043 | } | 1043 | } |
1044 | } | 1044 | } |
1045 | 1045 | ||
1046 | /* | 1046 | /* |
1047 | * Make sure that there are no blocks in front of the head | 1047 | * Make sure that there are no blocks in front of the head |
1048 | * with the same cycle number as the head. This can happen | 1048 | * with the same cycle number as the head. This can happen |
1049 | * because we allow multiple outstanding log writes concurrently, | 1049 | * because we allow multiple outstanding log writes concurrently, |
1050 | * and the later writes might make it out before earlier ones. | 1050 | * and the later writes might make it out before earlier ones. |
1051 | * | 1051 | * |
1052 | * We use the lsn from before modifying it so that we'll never | 1052 | * We use the lsn from before modifying it so that we'll never |
1053 | * overwrite the unmount record after a clean unmount. | 1053 | * overwrite the unmount record after a clean unmount. |
1054 | * | 1054 | * |
1055 | * Do this only if we are going to recover the filesystem | 1055 | * Do this only if we are going to recover the filesystem |
1056 | * | 1056 | * |
1057 | * NOTE: This used to say "if (!readonly)" | 1057 | * NOTE: This used to say "if (!readonly)" |
1058 | * However on Linux, we can & do recover a read-only filesystem. | 1058 | * However on Linux, we can & do recover a read-only filesystem. |
1059 | * We only skip recovery if NORECOVERY is specified on mount, | 1059 | * We only skip recovery if NORECOVERY is specified on mount, |
1060 | * in which case we would not be here. | 1060 | * in which case we would not be here. |
1061 | * | 1061 | * |
1062 | * But... if the -device- itself is readonly, just skip this. | 1062 | * But... if the -device- itself is readonly, just skip this. |
1063 | * We can't recover this device anyway, so it won't matter. | 1063 | * We can't recover this device anyway, so it won't matter. |
1064 | */ | 1064 | */ |
1065 | if (!xfs_readonly_buftarg(log->l_mp->m_logdev_targp)) | 1065 | if (!xfs_readonly_buftarg(log->l_mp->m_logdev_targp)) |
1066 | error = xlog_clear_stale_blocks(log, tail_lsn); | 1066 | error = xlog_clear_stale_blocks(log, tail_lsn); |
1067 | 1067 | ||
1068 | done: | 1068 | done: |
1069 | xlog_put_bp(bp); | 1069 | xlog_put_bp(bp); |
1070 | 1070 | ||
1071 | if (error) | 1071 | if (error) |
1072 | xfs_warn(log->l_mp, "failed to locate log tail"); | 1072 | xfs_warn(log->l_mp, "failed to locate log tail"); |
1073 | return error; | 1073 | return error; |
1074 | } | 1074 | } |
1075 | 1075 | ||
1076 | /* | 1076 | /* |
1077 | * Is the log zeroed at all? | 1077 | * Is the log zeroed at all? |
1078 | * | 1078 | * |
1079 | * The last binary search should be changed to perform an X block read | 1079 | * The last binary search should be changed to perform an X block read |
1080 | * once X becomes small enough. You can then search linearly through | 1080 | * once X becomes small enough. You can then search linearly through |
1081 | * the X blocks. This will cut down on the number of reads we need to do. | 1081 | * the X blocks. This will cut down on the number of reads we need to do. |
1082 | * | 1082 | * |
1083 | * If the log is partially zeroed, this routine will pass back the blkno | 1083 | * If the log is partially zeroed, this routine will pass back the blkno |
1084 | * of the first block with cycle number 0. It won't have a complete LR | 1084 | * of the first block with cycle number 0. It won't have a complete LR |
1085 | * preceding it. | 1085 | * preceding it. |
1086 | * | 1086 | * |
1087 | * Return: | 1087 | * Return: |
1088 | * 0 => the log is completely written to | 1088 | * 0 => the log is completely written to |
1089 | * -1 => use *blk_no as the first block of the log | 1089 | * -1 => use *blk_no as the first block of the log |
1090 | * >0 => error has occurred | 1090 | * >0 => error has occurred |
1091 | */ | 1091 | */ |
1092 | STATIC int | 1092 | STATIC int |
1093 | xlog_find_zeroed( | 1093 | xlog_find_zeroed( |
1094 | struct xlog *log, | 1094 | struct xlog *log, |
1095 | xfs_daddr_t *blk_no) | 1095 | xfs_daddr_t *blk_no) |
1096 | { | 1096 | { |
1097 | xfs_buf_t *bp; | 1097 | xfs_buf_t *bp; |
1098 | xfs_caddr_t offset; | 1098 | xfs_caddr_t offset; |
1099 | uint first_cycle, last_cycle; | 1099 | uint first_cycle, last_cycle; |
1100 | xfs_daddr_t new_blk, last_blk, start_blk; | 1100 | xfs_daddr_t new_blk, last_blk, start_blk; |
1101 | xfs_daddr_t num_scan_bblks; | 1101 | xfs_daddr_t num_scan_bblks; |
1102 | int error, log_bbnum = log->l_logBBsize; | 1102 | int error, log_bbnum = log->l_logBBsize; |
1103 | 1103 | ||
1104 | *blk_no = 0; | 1104 | *blk_no = 0; |
1105 | 1105 | ||
1106 | /* check totally zeroed log */ | 1106 | /* check totally zeroed log */ |
1107 | bp = xlog_get_bp(log, 1); | 1107 | bp = xlog_get_bp(log, 1); |
1108 | if (!bp) | 1108 | if (!bp) |
1109 | return ENOMEM; | 1109 | return ENOMEM; |
1110 | error = xlog_bread(log, 0, 1, bp, &offset); | 1110 | error = xlog_bread(log, 0, 1, bp, &offset); |
1111 | if (error) | 1111 | if (error) |
1112 | goto bp_err; | 1112 | goto bp_err; |
1113 | 1113 | ||
1114 | first_cycle = xlog_get_cycle(offset); | 1114 | first_cycle = xlog_get_cycle(offset); |
1115 | if (first_cycle == 0) { /* completely zeroed log */ | 1115 | if (first_cycle == 0) { /* completely zeroed log */ |
1116 | *blk_no = 0; | 1116 | *blk_no = 0; |
1117 | xlog_put_bp(bp); | 1117 | xlog_put_bp(bp); |
1118 | return -1; | 1118 | return -1; |
1119 | } | 1119 | } |
1120 | 1120 | ||
1121 | /* check partially zeroed log */ | 1121 | /* check partially zeroed log */ |
1122 | error = xlog_bread(log, log_bbnum-1, 1, bp, &offset); | 1122 | error = xlog_bread(log, log_bbnum-1, 1, bp, &offset); |
1123 | if (error) | 1123 | if (error) |
1124 | goto bp_err; | 1124 | goto bp_err; |
1125 | 1125 | ||
1126 | last_cycle = xlog_get_cycle(offset); | 1126 | last_cycle = xlog_get_cycle(offset); |
1127 | if (last_cycle != 0) { /* log completely written to */ | 1127 | if (last_cycle != 0) { /* log completely written to */ |
1128 | xlog_put_bp(bp); | 1128 | xlog_put_bp(bp); |
1129 | return 0; | 1129 | return 0; |
1130 | } else if (first_cycle != 1) { | 1130 | } else if (first_cycle != 1) { |
1131 | /* | 1131 | /* |
1132 | * If the cycle of the last block is zero, the cycle of | 1132 | * If the cycle of the last block is zero, the cycle of |
1133 | * the first block must be 1. If it's not, maybe we're | 1133 | * the first block must be 1. If it's not, maybe we're |
1134 | * not looking at a log... Bail out. | 1134 | * not looking at a log... Bail out. |
1135 | */ | 1135 | */ |
1136 | xfs_warn(log->l_mp, | 1136 | xfs_warn(log->l_mp, |
1137 | "Log inconsistent or not a log (last==0, first!=1)"); | 1137 | "Log inconsistent or not a log (last==0, first!=1)"); |
1138 | return XFS_ERROR(EINVAL); | 1138 | return XFS_ERROR(EINVAL); |
1139 | } | 1139 | } |
1140 | 1140 | ||
1141 | /* we have a partially zeroed log */ | 1141 | /* we have a partially zeroed log */ |
1142 | last_blk = log_bbnum-1; | 1142 | last_blk = log_bbnum-1; |
1143 | if ((error = xlog_find_cycle_start(log, bp, 0, &last_blk, 0))) | 1143 | if ((error = xlog_find_cycle_start(log, bp, 0, &last_blk, 0))) |
1144 | goto bp_err; | 1144 | goto bp_err; |
1145 | 1145 | ||
1146 | /* | 1146 | /* |
1147 | * Validate the answer. Because there is no way to guarantee that | 1147 | * Validate the answer. Because there is no way to guarantee that |
1148 | * the entire log is made up of log records which are the same size, | 1148 | * the entire log is made up of log records which are the same size, |
1149 | * we scan over the defined maximum blocks. At this point, the maximum | 1149 | * we scan over the defined maximum blocks. At this point, the maximum |
1150 | * is not chosen to mean anything special. XXXmiken | 1150 | * is not chosen to mean anything special. XXXmiken |
1151 | */ | 1151 | */ |
1152 | num_scan_bblks = XLOG_TOTAL_REC_SHIFT(log); | 1152 | num_scan_bblks = XLOG_TOTAL_REC_SHIFT(log); |
1153 | ASSERT(num_scan_bblks <= INT_MAX); | 1153 | ASSERT(num_scan_bblks <= INT_MAX); |
1154 | 1154 | ||
1155 | if (last_blk < num_scan_bblks) | 1155 | if (last_blk < num_scan_bblks) |
1156 | num_scan_bblks = last_blk; | 1156 | num_scan_bblks = last_blk; |
1157 | start_blk = last_blk - num_scan_bblks; | 1157 | start_blk = last_blk - num_scan_bblks; |
1158 | 1158 | ||
1159 | /* | 1159 | /* |
1160 | * We search for any instances of cycle number 0 that occur before | 1160 | * We search for any instances of cycle number 0 that occur before |
1161 | * our current estimate of the head. What we're trying to detect is | 1161 | * our current estimate of the head. What we're trying to detect is |
1162 | * 1 ... | 0 | 1 | 0... | 1162 | * 1 ... | 0 | 1 | 0... |
1163 | * ^ binary search ends here | 1163 | * ^ binary search ends here |
1164 | */ | 1164 | */ |
1165 | if ((error = xlog_find_verify_cycle(log, start_blk, | 1165 | if ((error = xlog_find_verify_cycle(log, start_blk, |
1166 | (int)num_scan_bblks, 0, &new_blk))) | 1166 | (int)num_scan_bblks, 0, &new_blk))) |
1167 | goto bp_err; | 1167 | goto bp_err; |
1168 | if (new_blk != -1) | 1168 | if (new_blk != -1) |
1169 | last_blk = new_blk; | 1169 | last_blk = new_blk; |
1170 | 1170 | ||
1171 | /* | 1171 | /* |
1172 | * Potentially backup over partial log record write. We don't need | 1172 | * Potentially backup over partial log record write. We don't need |
1173 | * to search the end of the log because we know it is zero. | 1173 | * to search the end of the log because we know it is zero. |
1174 | */ | 1174 | */ |
1175 | if ((error = xlog_find_verify_log_record(log, start_blk, | 1175 | if ((error = xlog_find_verify_log_record(log, start_blk, |
1176 | &last_blk, 0)) == -1) { | 1176 | &last_blk, 0)) == -1) { |
1177 | error = XFS_ERROR(EIO); | 1177 | error = XFS_ERROR(EIO); |
1178 | goto bp_err; | 1178 | goto bp_err; |
1179 | } else if (error) | 1179 | } else if (error) |
1180 | goto bp_err; | 1180 | goto bp_err; |
1181 | 1181 | ||
1182 | *blk_no = last_blk; | 1182 | *blk_no = last_blk; |
1183 | bp_err: | 1183 | bp_err: |
1184 | xlog_put_bp(bp); | 1184 | xlog_put_bp(bp); |
1185 | if (error) | 1185 | if (error) |
1186 | return error; | 1186 | return error; |
1187 | return -1; | 1187 | return -1; |
1188 | } | 1188 | } |
1189 | 1189 | ||
1190 | /* | 1190 | /* |
1191 | * These are simple subroutines used by xlog_clear_stale_blocks() below | 1191 | * These are simple subroutines used by xlog_clear_stale_blocks() below |
1192 | * to initialize a buffer full of empty log record headers and write | 1192 | * to initialize a buffer full of empty log record headers and write |
1193 | * them into the log. | 1193 | * them into the log. |
1194 | */ | 1194 | */ |
1195 | STATIC void | 1195 | STATIC void |
1196 | xlog_add_record( | 1196 | xlog_add_record( |
1197 | struct xlog *log, | 1197 | struct xlog *log, |
1198 | xfs_caddr_t buf, | 1198 | xfs_caddr_t buf, |
1199 | int cycle, | 1199 | int cycle, |
1200 | int block, | 1200 | int block, |
1201 | int tail_cycle, | 1201 | int tail_cycle, |
1202 | int tail_block) | 1202 | int tail_block) |
1203 | { | 1203 | { |
1204 | xlog_rec_header_t *recp = (xlog_rec_header_t *)buf; | 1204 | xlog_rec_header_t *recp = (xlog_rec_header_t *)buf; |
1205 | 1205 | ||
1206 | memset(buf, 0, BBSIZE); | 1206 | memset(buf, 0, BBSIZE); |
1207 | recp->h_magicno = cpu_to_be32(XLOG_HEADER_MAGIC_NUM); | 1207 | recp->h_magicno = cpu_to_be32(XLOG_HEADER_MAGIC_NUM); |
1208 | recp->h_cycle = cpu_to_be32(cycle); | 1208 | recp->h_cycle = cpu_to_be32(cycle); |
1209 | recp->h_version = cpu_to_be32( | 1209 | recp->h_version = cpu_to_be32( |
1210 | xfs_sb_version_haslogv2(&log->l_mp->m_sb) ? 2 : 1); | 1210 | xfs_sb_version_haslogv2(&log->l_mp->m_sb) ? 2 : 1); |
1211 | recp->h_lsn = cpu_to_be64(xlog_assign_lsn(cycle, block)); | 1211 | recp->h_lsn = cpu_to_be64(xlog_assign_lsn(cycle, block)); |
1212 | recp->h_tail_lsn = cpu_to_be64(xlog_assign_lsn(tail_cycle, tail_block)); | 1212 | recp->h_tail_lsn = cpu_to_be64(xlog_assign_lsn(tail_cycle, tail_block)); |
1213 | recp->h_fmt = cpu_to_be32(XLOG_FMT); | 1213 | recp->h_fmt = cpu_to_be32(XLOG_FMT); |
1214 | memcpy(&recp->h_fs_uuid, &log->l_mp->m_sb.sb_uuid, sizeof(uuid_t)); | 1214 | memcpy(&recp->h_fs_uuid, &log->l_mp->m_sb.sb_uuid, sizeof(uuid_t)); |
1215 | } | 1215 | } |
1216 | 1216 | ||
1217 | STATIC int | 1217 | STATIC int |
1218 | xlog_write_log_records( | 1218 | xlog_write_log_records( |
1219 | struct xlog *log, | 1219 | struct xlog *log, |
1220 | int cycle, | 1220 | int cycle, |
1221 | int start_block, | 1221 | int start_block, |
1222 | int blocks, | 1222 | int blocks, |
1223 | int tail_cycle, | 1223 | int tail_cycle, |
1224 | int tail_block) | 1224 | int tail_block) |
1225 | { | 1225 | { |
1226 | xfs_caddr_t offset; | 1226 | xfs_caddr_t offset; |
1227 | xfs_buf_t *bp; | 1227 | xfs_buf_t *bp; |
1228 | int balign, ealign; | 1228 | int balign, ealign; |
1229 | int sectbb = log->l_sectBBsize; | 1229 | int sectbb = log->l_sectBBsize; |
1230 | int end_block = start_block + blocks; | 1230 | int end_block = start_block + blocks; |
1231 | int bufblks; | 1231 | int bufblks; |
1232 | int error = 0; | 1232 | int error = 0; |
1233 | int i, j = 0; | 1233 | int i, j = 0; |
1234 | 1234 | ||
1235 | /* | 1235 | /* |
1236 | * Greedily allocate a buffer big enough to handle the full | 1236 | * Greedily allocate a buffer big enough to handle the full |
1237 | * range of basic blocks to be written. If that fails, try | 1237 | * range of basic blocks to be written. If that fails, try |
1238 | * a smaller size. We need to be able to write at least a | 1238 | * a smaller size. We need to be able to write at least a |
1239 | * log sector, or we're out of luck. | 1239 | * log sector, or we're out of luck. |
1240 | */ | 1240 | */ |
1241 | bufblks = 1 << ffs(blocks); | 1241 | bufblks = 1 << ffs(blocks); |
1242 | while (bufblks > log->l_logBBsize) | 1242 | while (bufblks > log->l_logBBsize) |
1243 | bufblks >>= 1; | 1243 | bufblks >>= 1; |
1244 | while (!(bp = xlog_get_bp(log, bufblks))) { | 1244 | while (!(bp = xlog_get_bp(log, bufblks))) { |
1245 | bufblks >>= 1; | 1245 | bufblks >>= 1; |
1246 | if (bufblks < sectbb) | 1246 | if (bufblks < sectbb) |
1247 | return ENOMEM; | 1247 | return ENOMEM; |
1248 | } | 1248 | } |
1249 | 1249 | ||
1250 | /* We may need to do a read at the start to fill in part of | 1250 | /* We may need to do a read at the start to fill in part of |
1251 | * the buffer in the starting sector not covered by the first | 1251 | * the buffer in the starting sector not covered by the first |
1252 | * write below. | 1252 | * write below. |
1253 | */ | 1253 | */ |
1254 | balign = round_down(start_block, sectbb); | 1254 | balign = round_down(start_block, sectbb); |
1255 | if (balign != start_block) { | 1255 | if (balign != start_block) { |
1256 | error = xlog_bread_noalign(log, start_block, 1, bp); | 1256 | error = xlog_bread_noalign(log, start_block, 1, bp); |
1257 | if (error) | 1257 | if (error) |
1258 | goto out_put_bp; | 1258 | goto out_put_bp; |
1259 | 1259 | ||
1260 | j = start_block - balign; | 1260 | j = start_block - balign; |
1261 | } | 1261 | } |
1262 | 1262 | ||
1263 | for (i = start_block; i < end_block; i += bufblks) { | 1263 | for (i = start_block; i < end_block; i += bufblks) { |
1264 | int bcount, endcount; | 1264 | int bcount, endcount; |
1265 | 1265 | ||
1266 | bcount = min(bufblks, end_block - start_block); | 1266 | bcount = min(bufblks, end_block - start_block); |
1267 | endcount = bcount - j; | 1267 | endcount = bcount - j; |
1268 | 1268 | ||
1269 | /* We may need to do a read at the end to fill in part of | 1269 | /* We may need to do a read at the end to fill in part of |
1270 | * the buffer in the final sector not covered by the write. | 1270 | * the buffer in the final sector not covered by the write. |
1271 | * If this is the same sector as the above read, skip it. | 1271 | * If this is the same sector as the above read, skip it. |
1272 | */ | 1272 | */ |
1273 | ealign = round_down(end_block, sectbb); | 1273 | ealign = round_down(end_block, sectbb); |
1274 | if (j == 0 && (start_block + endcount > ealign)) { | 1274 | if (j == 0 && (start_block + endcount > ealign)) { |
1275 | offset = bp->b_addr + BBTOB(ealign - start_block); | 1275 | offset = bp->b_addr + BBTOB(ealign - start_block); |
1276 | error = xlog_bread_offset(log, ealign, sectbb, | 1276 | error = xlog_bread_offset(log, ealign, sectbb, |
1277 | bp, offset); | 1277 | bp, offset); |
1278 | if (error) | 1278 | if (error) |
1279 | break; | 1279 | break; |
1280 | 1280 | ||
1281 | } | 1281 | } |
1282 | 1282 | ||
1283 | offset = xlog_align(log, start_block, endcount, bp); | 1283 | offset = xlog_align(log, start_block, endcount, bp); |
1284 | for (; j < endcount; j++) { | 1284 | for (; j < endcount; j++) { |
1285 | xlog_add_record(log, offset, cycle, i+j, | 1285 | xlog_add_record(log, offset, cycle, i+j, |
1286 | tail_cycle, tail_block); | 1286 | tail_cycle, tail_block); |
1287 | offset += BBSIZE; | 1287 | offset += BBSIZE; |
1288 | } | 1288 | } |
1289 | error = xlog_bwrite(log, start_block, endcount, bp); | 1289 | error = xlog_bwrite(log, start_block, endcount, bp); |
1290 | if (error) | 1290 | if (error) |
1291 | break; | 1291 | break; |
1292 | start_block += endcount; | 1292 | start_block += endcount; |
1293 | j = 0; | 1293 | j = 0; |
1294 | } | 1294 | } |
1295 | 1295 | ||
1296 | out_put_bp: | 1296 | out_put_bp: |
1297 | xlog_put_bp(bp); | 1297 | xlog_put_bp(bp); |
1298 | return error; | 1298 | return error; |
1299 | } | 1299 | } |
1300 | 1300 | ||
1301 | /* | 1301 | /* |
1302 | * This routine is called to blow away any incomplete log writes out | 1302 | * This routine is called to blow away any incomplete log writes out |
1303 | * in front of the log head. We do this so that we won't become confused | 1303 | * in front of the log head. We do this so that we won't become confused |
1304 | * if we come up, write only a little bit more, and then crash again. | 1304 | * if we come up, write only a little bit more, and then crash again. |
1305 | * If we leave the partial log records out there, this situation could | 1305 | * If we leave the partial log records out there, this situation could |
1306 | * cause us to think those partial writes are valid blocks since they | 1306 | * cause us to think those partial writes are valid blocks since they |
1307 | * have the current cycle number. We get rid of them by overwriting them | 1307 | * have the current cycle number. We get rid of them by overwriting them |
1308 | * with empty log records with the old cycle number rather than the | 1308 | * with empty log records with the old cycle number rather than the |
1309 | * current one. | 1309 | * current one. |
1310 | * | 1310 | * |
1311 | * The tail lsn is passed in rather than taken from | 1311 | * The tail lsn is passed in rather than taken from |
1312 | * the log so that we will not write over the unmount record after a | 1312 | * the log so that we will not write over the unmount record after a |
1313 | * clean unmount in a 512 block log. Doing so would leave the log without | 1313 | * clean unmount in a 512 block log. Doing so would leave the log without |
1314 | * any valid log records in it until a new one was written. If we crashed | 1314 | * any valid log records in it until a new one was written. If we crashed |
1315 | * during that time we would not be able to recover. | 1315 | * during that time we would not be able to recover. |
1316 | */ | 1316 | */ |
1317 | STATIC int | 1317 | STATIC int |
1318 | xlog_clear_stale_blocks( | 1318 | xlog_clear_stale_blocks( |
1319 | struct xlog *log, | 1319 | struct xlog *log, |
1320 | xfs_lsn_t tail_lsn) | 1320 | xfs_lsn_t tail_lsn) |
1321 | { | 1321 | { |
1322 | int tail_cycle, head_cycle; | 1322 | int tail_cycle, head_cycle; |
1323 | int tail_block, head_block; | 1323 | int tail_block, head_block; |
1324 | int tail_distance, max_distance; | 1324 | int tail_distance, max_distance; |
1325 | int distance; | 1325 | int distance; |
1326 | int error; | 1326 | int error; |
1327 | 1327 | ||
1328 | tail_cycle = CYCLE_LSN(tail_lsn); | 1328 | tail_cycle = CYCLE_LSN(tail_lsn); |
1329 | tail_block = BLOCK_LSN(tail_lsn); | 1329 | tail_block = BLOCK_LSN(tail_lsn); |
1330 | head_cycle = log->l_curr_cycle; | 1330 | head_cycle = log->l_curr_cycle; |
1331 | head_block = log->l_curr_block; | 1331 | head_block = log->l_curr_block; |
1332 | 1332 | ||
1333 | /* | 1333 | /* |
1334 | * Figure out the distance between the new head of the log | 1334 | * Figure out the distance between the new head of the log |
1335 | * and the tail. We want to write over any blocks beyond the | 1335 | * and the tail. We want to write over any blocks beyond the |
1336 | * head that we may have written just before the crash, but | 1336 | * head that we may have written just before the crash, but |
1337 | * we don't want to overwrite the tail of the log. | 1337 | * we don't want to overwrite the tail of the log. |
1338 | */ | 1338 | */ |
1339 | if (head_cycle == tail_cycle) { | 1339 | if (head_cycle == tail_cycle) { |
1340 | /* | 1340 | /* |
1341 | * The tail is behind the head in the physical log, | 1341 | * The tail is behind the head in the physical log, |
1342 | * so the distance from the head to the tail is the | 1342 | * so the distance from the head to the tail is the |
1343 | * distance from the head to the end of the log plus | 1343 | * distance from the head to the end of the log plus |
1344 | * the distance from the beginning of the log to the | 1344 | * the distance from the beginning of the log to the |
1345 | * tail. | 1345 | * tail. |
1346 | */ | 1346 | */ |
1347 | if (unlikely(head_block < tail_block || head_block >= log->l_logBBsize)) { | 1347 | if (unlikely(head_block < tail_block || head_block >= log->l_logBBsize)) { |
1348 | XFS_ERROR_REPORT("xlog_clear_stale_blocks(1)", | 1348 | XFS_ERROR_REPORT("xlog_clear_stale_blocks(1)", |
1349 | XFS_ERRLEVEL_LOW, log->l_mp); | 1349 | XFS_ERRLEVEL_LOW, log->l_mp); |
1350 | return XFS_ERROR(EFSCORRUPTED); | 1350 | return XFS_ERROR(EFSCORRUPTED); |
1351 | } | 1351 | } |
1352 | tail_distance = tail_block + (log->l_logBBsize - head_block); | 1352 | tail_distance = tail_block + (log->l_logBBsize - head_block); |
1353 | } else { | 1353 | } else { |
1354 | /* | 1354 | /* |
1355 | * The head is behind the tail in the physical log, | 1355 | * The head is behind the tail in the physical log, |
1356 | * so the distance from the head to the tail is just | 1356 | * so the distance from the head to the tail is just |
1357 | * the tail block minus the head block. | 1357 | * the tail block minus the head block. |
1358 | */ | 1358 | */ |
1359 | if (unlikely(head_block >= tail_block || head_cycle != (tail_cycle + 1))){ | 1359 | if (unlikely(head_block >= tail_block || head_cycle != (tail_cycle + 1))){ |
1360 | XFS_ERROR_REPORT("xlog_clear_stale_blocks(2)", | 1360 | XFS_ERROR_REPORT("xlog_clear_stale_blocks(2)", |
1361 | XFS_ERRLEVEL_LOW, log->l_mp); | 1361 | XFS_ERRLEVEL_LOW, log->l_mp); |
1362 | return XFS_ERROR(EFSCORRUPTED); | 1362 | return XFS_ERROR(EFSCORRUPTED); |
1363 | } | 1363 | } |
1364 | tail_distance = tail_block - head_block; | 1364 | tail_distance = tail_block - head_block; |
1365 | } | 1365 | } |
1366 | 1366 | ||
1367 | /* | 1367 | /* |
1368 | * If the head is right up against the tail, we can't clear | 1368 | * If the head is right up against the tail, we can't clear |
1369 | * anything. | 1369 | * anything. |
1370 | */ | 1370 | */ |
1371 | if (tail_distance <= 0) { | 1371 | if (tail_distance <= 0) { |
1372 | ASSERT(tail_distance == 0); | 1372 | ASSERT(tail_distance == 0); |
1373 | return 0; | 1373 | return 0; |
1374 | } | 1374 | } |
1375 | 1375 | ||
1376 | max_distance = XLOG_TOTAL_REC_SHIFT(log); | 1376 | max_distance = XLOG_TOTAL_REC_SHIFT(log); |
1377 | /* | 1377 | /* |
1378 | * Take the smaller of the maximum amount of outstanding I/O | 1378 | * Take the smaller of the maximum amount of outstanding I/O |
1379 | * we could have and the distance to the tail to clear out. | 1379 | * we could have and the distance to the tail to clear out. |
1380 | * We take the smaller so that we don't overwrite the tail and | 1380 | * We take the smaller so that we don't overwrite the tail and |
1381 | * we don't waste all day writing from the head to the tail | 1381 | * we don't waste all day writing from the head to the tail |
1382 | * for no reason. | 1382 | * for no reason. |
1383 | */ | 1383 | */ |
1384 | max_distance = MIN(max_distance, tail_distance); | 1384 | max_distance = MIN(max_distance, tail_distance); |
1385 | 1385 | ||
1386 | if ((head_block + max_distance) <= log->l_logBBsize) { | 1386 | if ((head_block + max_distance) <= log->l_logBBsize) { |
1387 | /* | 1387 | /* |
1388 | * We can stomp all the blocks we need to without | 1388 | * We can stomp all the blocks we need to without |
1389 | * wrapping around the end of the log. Just do it | 1389 | * wrapping around the end of the log. Just do it |
1390 | * in a single write. Use the cycle number of the | 1390 | * in a single write. Use the cycle number of the |
1391 | * current cycle minus one so that the log will look like: | 1391 | * current cycle minus one so that the log will look like: |
1392 | * n ... | n - 1 ... | 1392 | * n ... | n - 1 ... |
1393 | */ | 1393 | */ |
1394 | error = xlog_write_log_records(log, (head_cycle - 1), | 1394 | error = xlog_write_log_records(log, (head_cycle - 1), |
1395 | head_block, max_distance, tail_cycle, | 1395 | head_block, max_distance, tail_cycle, |
1396 | tail_block); | 1396 | tail_block); |
1397 | if (error) | 1397 | if (error) |
1398 | return error; | 1398 | return error; |
1399 | } else { | 1399 | } else { |
1400 | /* | 1400 | /* |
1401 | * We need to wrap around the end of the physical log in | 1401 | * We need to wrap around the end of the physical log in |
1402 | * order to clear all the blocks. Do it in two separate | 1402 | * order to clear all the blocks. Do it in two separate |
1403 | * I/Os. The first write should be from the head to the | 1403 | * I/Os. The first write should be from the head to the |
1404 | * end of the physical log, and it should use the current | 1404 | * end of the physical log, and it should use the current |
1405 | * cycle number minus one just like above. | 1405 | * cycle number minus one just like above. |
1406 | */ | 1406 | */ |
1407 | distance = log->l_logBBsize - head_block; | 1407 | distance = log->l_logBBsize - head_block; |
1408 | error = xlog_write_log_records(log, (head_cycle - 1), | 1408 | error = xlog_write_log_records(log, (head_cycle - 1), |
1409 | head_block, distance, tail_cycle, | 1409 | head_block, distance, tail_cycle, |
1410 | tail_block); | 1410 | tail_block); |
1411 | 1411 | ||
1412 | if (error) | 1412 | if (error) |
1413 | return error; | 1413 | return error; |
1414 | 1414 | ||
1415 | /* | 1415 | /* |
1416 | * Now write the blocks at the start of the physical log. | 1416 | * Now write the blocks at the start of the physical log. |
1417 | * This writes the remainder of the blocks we want to clear. | 1417 | * This writes the remainder of the blocks we want to clear. |
1418 | * It uses the current cycle number since we're now on the | 1418 | * It uses the current cycle number since we're now on the |
1419 | * same cycle as the head so that we get: | 1419 | * same cycle as the head so that we get: |
1420 | * n ... n ... | n - 1 ... | 1420 | * n ... n ... | n - 1 ... |
1421 | * ^^^^^ blocks we're writing | 1421 | * ^^^^^ blocks we're writing |
1422 | */ | 1422 | */ |
1423 | distance = max_distance - (log->l_logBBsize - head_block); | 1423 | distance = max_distance - (log->l_logBBsize - head_block); |
1424 | error = xlog_write_log_records(log, head_cycle, 0, distance, | 1424 | error = xlog_write_log_records(log, head_cycle, 0, distance, |
1425 | tail_cycle, tail_block); | 1425 | tail_cycle, tail_block); |
1426 | if (error) | 1426 | if (error) |
1427 | return error; | 1427 | return error; |
1428 | } | 1428 | } |
1429 | 1429 | ||
1430 | return 0; | 1430 | return 0; |
1431 | } | 1431 | } |
1432 | 1432 | ||
1433 | /****************************************************************************** | 1433 | /****************************************************************************** |
1434 | * | 1434 | * |
1435 | * Log recover routines | 1435 | * Log recover routines |
1436 | * | 1436 | * |
1437 | ****************************************************************************** | 1437 | ****************************************************************************** |
1438 | */ | 1438 | */ |
1439 | 1439 | ||
1440 | STATIC xlog_recover_t * | 1440 | STATIC xlog_recover_t * |
1441 | xlog_recover_find_tid( | 1441 | xlog_recover_find_tid( |
1442 | struct hlist_head *head, | 1442 | struct hlist_head *head, |
1443 | xlog_tid_t tid) | 1443 | xlog_tid_t tid) |
1444 | { | 1444 | { |
1445 | xlog_recover_t *trans; | 1445 | xlog_recover_t *trans; |
1446 | 1446 | ||
1447 | hlist_for_each_entry(trans, head, r_list) { | 1447 | hlist_for_each_entry(trans, head, r_list) { |
1448 | if (trans->r_log_tid == tid) | 1448 | if (trans->r_log_tid == tid) |
1449 | return trans; | 1449 | return trans; |
1450 | } | 1450 | } |
1451 | return NULL; | 1451 | return NULL; |
1452 | } | 1452 | } |
1453 | 1453 | ||
1454 | STATIC void | 1454 | STATIC void |
1455 | xlog_recover_new_tid( | 1455 | xlog_recover_new_tid( |
1456 | struct hlist_head *head, | 1456 | struct hlist_head *head, |
1457 | xlog_tid_t tid, | 1457 | xlog_tid_t tid, |
1458 | xfs_lsn_t lsn) | 1458 | xfs_lsn_t lsn) |
1459 | { | 1459 | { |
1460 | xlog_recover_t *trans; | 1460 | xlog_recover_t *trans; |
1461 | 1461 | ||
1462 | trans = kmem_zalloc(sizeof(xlog_recover_t), KM_SLEEP); | 1462 | trans = kmem_zalloc(sizeof(xlog_recover_t), KM_SLEEP); |
1463 | trans->r_log_tid = tid; | 1463 | trans->r_log_tid = tid; |
1464 | trans->r_lsn = lsn; | 1464 | trans->r_lsn = lsn; |
1465 | INIT_LIST_HEAD(&trans->r_itemq); | 1465 | INIT_LIST_HEAD(&trans->r_itemq); |
1466 | 1466 | ||
1467 | INIT_HLIST_NODE(&trans->r_list); | 1467 | INIT_HLIST_NODE(&trans->r_list); |
1468 | hlist_add_head(&trans->r_list, head); | 1468 | hlist_add_head(&trans->r_list, head); |
1469 | } | 1469 | } |
1470 | 1470 | ||
1471 | STATIC void | 1471 | STATIC void |
1472 | xlog_recover_add_item( | 1472 | xlog_recover_add_item( |
1473 | struct list_head *head) | 1473 | struct list_head *head) |
1474 | { | 1474 | { |
1475 | xlog_recover_item_t *item; | 1475 | xlog_recover_item_t *item; |
1476 | 1476 | ||
1477 | item = kmem_zalloc(sizeof(xlog_recover_item_t), KM_SLEEP); | 1477 | item = kmem_zalloc(sizeof(xlog_recover_item_t), KM_SLEEP); |
1478 | INIT_LIST_HEAD(&item->ri_list); | 1478 | INIT_LIST_HEAD(&item->ri_list); |
1479 | list_add_tail(&item->ri_list, head); | 1479 | list_add_tail(&item->ri_list, head); |
1480 | } | 1480 | } |
1481 | 1481 | ||
1482 | STATIC int | 1482 | STATIC int |
1483 | xlog_recover_add_to_cont_trans( | 1483 | xlog_recover_add_to_cont_trans( |
1484 | struct xlog *log, | 1484 | struct xlog *log, |
1485 | struct xlog_recover *trans, | 1485 | struct xlog_recover *trans, |
1486 | xfs_caddr_t dp, | 1486 | xfs_caddr_t dp, |
1487 | int len) | 1487 | int len) |
1488 | { | 1488 | { |
1489 | xlog_recover_item_t *item; | 1489 | xlog_recover_item_t *item; |
1490 | xfs_caddr_t ptr, old_ptr; | 1490 | xfs_caddr_t ptr, old_ptr; |
1491 | int old_len; | 1491 | int old_len; |
1492 | 1492 | ||
1493 | if (list_empty(&trans->r_itemq)) { | 1493 | if (list_empty(&trans->r_itemq)) { |
1494 | /* finish copying rest of trans header */ | 1494 | /* finish copying rest of trans header */ |
1495 | xlog_recover_add_item(&trans->r_itemq); | 1495 | xlog_recover_add_item(&trans->r_itemq); |
1496 | ptr = (xfs_caddr_t) &trans->r_theader + | 1496 | ptr = (xfs_caddr_t) &trans->r_theader + |
1497 | sizeof(xfs_trans_header_t) - len; | 1497 | sizeof(xfs_trans_header_t) - len; |
1498 | memcpy(ptr, dp, len); /* d, s, l */ | 1498 | memcpy(ptr, dp, len); /* d, s, l */ |
1499 | return 0; | 1499 | return 0; |
1500 | } | 1500 | } |
1501 | /* take the tail entry */ | 1501 | /* take the tail entry */ |
1502 | item = list_entry(trans->r_itemq.prev, xlog_recover_item_t, ri_list); | 1502 | item = list_entry(trans->r_itemq.prev, xlog_recover_item_t, ri_list); |
1503 | 1503 | ||
1504 | old_ptr = item->ri_buf[item->ri_cnt-1].i_addr; | 1504 | old_ptr = item->ri_buf[item->ri_cnt-1].i_addr; |
1505 | old_len = item->ri_buf[item->ri_cnt-1].i_len; | 1505 | old_len = item->ri_buf[item->ri_cnt-1].i_len; |
1506 | 1506 | ||
1507 | ptr = kmem_realloc(old_ptr, len+old_len, old_len, KM_SLEEP); | 1507 | ptr = kmem_realloc(old_ptr, len+old_len, old_len, KM_SLEEP); |
1508 | memcpy(&ptr[old_len], dp, len); /* d, s, l */ | 1508 | memcpy(&ptr[old_len], dp, len); /* d, s, l */ |
1509 | item->ri_buf[item->ri_cnt-1].i_len += len; | 1509 | item->ri_buf[item->ri_cnt-1].i_len += len; |
1510 | item->ri_buf[item->ri_cnt-1].i_addr = ptr; | 1510 | item->ri_buf[item->ri_cnt-1].i_addr = ptr; |
1511 | trace_xfs_log_recover_item_add_cont(log, trans, item, 0); | 1511 | trace_xfs_log_recover_item_add_cont(log, trans, item, 0); |
1512 | return 0; | 1512 | return 0; |
1513 | } | 1513 | } |
1514 | 1514 | ||
1515 | /* | 1515 | /* |
1516 | * The next region to add is the start of a new region. It could be | 1516 | * The next region to add is the start of a new region. It could be |
1517 | * a whole region or it could be the first part of a new region. Because | 1517 | * a whole region or it could be the first part of a new region. Because |
1518 | * of this, the assumption here is that the type and size fields of all | 1518 | * of this, the assumption here is that the type and size fields of all |
1519 | * format structures fit into the first 32 bits of the structure. | 1519 | * format structures fit into the first 32 bits of the structure. |
1520 | * | 1520 | * |
1521 | * This works because all regions must be 32 bit aligned. Therefore, we | 1521 | * This works because all regions must be 32 bit aligned. Therefore, we |
1522 | * either have both fields or we have neither field. In the case we have | 1522 | * either have both fields or we have neither field. In the case we have |
1523 | * neither field, the data part of the region is zero length. We only have | 1523 | * neither field, the data part of the region is zero length. We only have |
1524 | * a log_op_header and can throw away the header since a new one will appear | 1524 | * a log_op_header and can throw away the header since a new one will appear |
1525 | * later. If we have at least 4 bytes, then we can determine how many regions | 1525 | * later. If we have at least 4 bytes, then we can determine how many regions |
1526 | * will appear in the current log item. | 1526 | * will appear in the current log item. |
1527 | */ | 1527 | */ |
1528 | STATIC int | 1528 | STATIC int |
1529 | xlog_recover_add_to_trans( | 1529 | xlog_recover_add_to_trans( |
1530 | struct xlog *log, | 1530 | struct xlog *log, |
1531 | struct xlog_recover *trans, | 1531 | struct xlog_recover *trans, |
1532 | xfs_caddr_t dp, | 1532 | xfs_caddr_t dp, |
1533 | int len) | 1533 | int len) |
1534 | { | 1534 | { |
1535 | xfs_inode_log_format_t *in_f; /* any will do */ | 1535 | xfs_inode_log_format_t *in_f; /* any will do */ |
1536 | xlog_recover_item_t *item; | 1536 | xlog_recover_item_t *item; |
1537 | xfs_caddr_t ptr; | 1537 | xfs_caddr_t ptr; |
1538 | 1538 | ||
1539 | if (!len) | 1539 | if (!len) |
1540 | return 0; | 1540 | return 0; |
1541 | if (list_empty(&trans->r_itemq)) { | 1541 | if (list_empty(&trans->r_itemq)) { |
1542 | /* we need to catch log corruptions here */ | 1542 | /* we need to catch log corruptions here */ |
1543 | if (*(uint *)dp != XFS_TRANS_HEADER_MAGIC) { | 1543 | if (*(uint *)dp != XFS_TRANS_HEADER_MAGIC) { |
1544 | xfs_warn(log->l_mp, "%s: bad header magic number", | 1544 | xfs_warn(log->l_mp, "%s: bad header magic number", |
1545 | __func__); | 1545 | __func__); |
1546 | ASSERT(0); | 1546 | ASSERT(0); |
1547 | return XFS_ERROR(EIO); | 1547 | return XFS_ERROR(EIO); |
1548 | } | 1548 | } |
1549 | if (len == sizeof(xfs_trans_header_t)) | 1549 | if (len == sizeof(xfs_trans_header_t)) |
1550 | xlog_recover_add_item(&trans->r_itemq); | 1550 | xlog_recover_add_item(&trans->r_itemq); |
1551 | memcpy(&trans->r_theader, dp, len); /* d, s, l */ | 1551 | memcpy(&trans->r_theader, dp, len); /* d, s, l */ |
1552 | return 0; | 1552 | return 0; |
1553 | } | 1553 | } |
1554 | 1554 | ||
1555 | ptr = kmem_alloc(len, KM_SLEEP); | 1555 | ptr = kmem_alloc(len, KM_SLEEP); |
1556 | memcpy(ptr, dp, len); | 1556 | memcpy(ptr, dp, len); |
1557 | in_f = (xfs_inode_log_format_t *)ptr; | 1557 | in_f = (xfs_inode_log_format_t *)ptr; |
1558 | 1558 | ||
1559 | /* take the tail entry */ | 1559 | /* take the tail entry */ |
1560 | item = list_entry(trans->r_itemq.prev, xlog_recover_item_t, ri_list); | 1560 | item = list_entry(trans->r_itemq.prev, xlog_recover_item_t, ri_list); |
1561 | if (item->ri_total != 0 && | 1561 | if (item->ri_total != 0 && |
1562 | item->ri_total == item->ri_cnt) { | 1562 | item->ri_total == item->ri_cnt) { |
1563 | /* tail item is in use, get a new one */ | 1563 | /* tail item is in use, get a new one */ |
1564 | xlog_recover_add_item(&trans->r_itemq); | 1564 | xlog_recover_add_item(&trans->r_itemq); |
1565 | item = list_entry(trans->r_itemq.prev, | 1565 | item = list_entry(trans->r_itemq.prev, |
1566 | xlog_recover_item_t, ri_list); | 1566 | xlog_recover_item_t, ri_list); |
1567 | } | 1567 | } |
1568 | 1568 | ||
1569 | if (item->ri_total == 0) { /* first region to be added */ | 1569 | if (item->ri_total == 0) { /* first region to be added */ |
1570 | if (in_f->ilf_size == 0 || | 1570 | if (in_f->ilf_size == 0 || |
1571 | in_f->ilf_size > XLOG_MAX_REGIONS_IN_ITEM) { | 1571 | in_f->ilf_size > XLOG_MAX_REGIONS_IN_ITEM) { |
1572 | xfs_warn(log->l_mp, | 1572 | xfs_warn(log->l_mp, |
1573 | "bad number of regions (%d) in inode log format", | 1573 | "bad number of regions (%d) in inode log format", |
1574 | in_f->ilf_size); | 1574 | in_f->ilf_size); |
1575 | ASSERT(0); | 1575 | ASSERT(0); |
1576 | return XFS_ERROR(EIO); | 1576 | return XFS_ERROR(EIO); |
1577 | } | 1577 | } |
1578 | 1578 | ||
1579 | item->ri_total = in_f->ilf_size; | 1579 | item->ri_total = in_f->ilf_size; |
1580 | item->ri_buf = | 1580 | item->ri_buf = |
1581 | kmem_zalloc(item->ri_total * sizeof(xfs_log_iovec_t), | 1581 | kmem_zalloc(item->ri_total * sizeof(xfs_log_iovec_t), |
1582 | KM_SLEEP); | 1582 | KM_SLEEP); |
1583 | } | 1583 | } |
1584 | ASSERT(item->ri_total > item->ri_cnt); | 1584 | ASSERT(item->ri_total > item->ri_cnt); |
1585 | /* Description region is ri_buf[0] */ | 1585 | /* Description region is ri_buf[0] */ |
1586 | item->ri_buf[item->ri_cnt].i_addr = ptr; | 1586 | item->ri_buf[item->ri_cnt].i_addr = ptr; |
1587 | item->ri_buf[item->ri_cnt].i_len = len; | 1587 | item->ri_buf[item->ri_cnt].i_len = len; |
1588 | item->ri_cnt++; | 1588 | item->ri_cnt++; |
1589 | trace_xfs_log_recover_item_add(log, trans, item, 0); | 1589 | trace_xfs_log_recover_item_add(log, trans, item, 0); |
1590 | return 0; | 1590 | return 0; |
1591 | } | 1591 | } |
1592 | 1592 | ||
1593 | /* | 1593 | /* |
1594 | * Sort the log items in the transaction. Cancelled buffers need | 1594 | * Sort the log items in the transaction. Cancelled buffers need |
1595 | * to be put first so they are processed before any items that might | 1595 | * to be put first so they are processed before any items that might |
1596 | * modify the buffers. If they are cancelled, then the modifications | 1596 | * modify the buffers. If they are cancelled, then the modifications |
1597 | * don't need to be replayed. | 1597 | * don't need to be replayed. |
1598 | */ | 1598 | */ |
1599 | STATIC int | 1599 | STATIC int |
1600 | xlog_recover_reorder_trans( | 1600 | xlog_recover_reorder_trans( |
1601 | struct xlog *log, | 1601 | struct xlog *log, |
1602 | struct xlog_recover *trans, | 1602 | struct xlog_recover *trans, |
1603 | int pass) | 1603 | int pass) |
1604 | { | 1604 | { |
1605 | xlog_recover_item_t *item, *n; | 1605 | xlog_recover_item_t *item, *n; |
1606 | LIST_HEAD(sort_list); | 1606 | LIST_HEAD(sort_list); |
1607 | 1607 | ||
1608 | list_splice_init(&trans->r_itemq, &sort_list); | 1608 | list_splice_init(&trans->r_itemq, &sort_list); |
1609 | list_for_each_entry_safe(item, n, &sort_list, ri_list) { | 1609 | list_for_each_entry_safe(item, n, &sort_list, ri_list) { |
1610 | xfs_buf_log_format_t *buf_f = item->ri_buf[0].i_addr; | 1610 | xfs_buf_log_format_t *buf_f = item->ri_buf[0].i_addr; |
1611 | 1611 | ||
1612 | switch (ITEM_TYPE(item)) { | 1612 | switch (ITEM_TYPE(item)) { |
1613 | case XFS_LI_BUF: | 1613 | case XFS_LI_BUF: |
1614 | if (!(buf_f->blf_flags & XFS_BLF_CANCEL)) { | 1614 | if (!(buf_f->blf_flags & XFS_BLF_CANCEL)) { |
1615 | trace_xfs_log_recover_item_reorder_head(log, | 1615 | trace_xfs_log_recover_item_reorder_head(log, |
1616 | trans, item, pass); | 1616 | trans, item, pass); |
1617 | list_move(&item->ri_list, &trans->r_itemq); | 1617 | list_move(&item->ri_list, &trans->r_itemq); |
1618 | break; | 1618 | break; |
1619 | } | 1619 | } |
1620 | case XFS_LI_INODE: | 1620 | case XFS_LI_INODE: |
1621 | case XFS_LI_DQUOT: | 1621 | case XFS_LI_DQUOT: |
1622 | case XFS_LI_QUOTAOFF: | 1622 | case XFS_LI_QUOTAOFF: |
1623 | case XFS_LI_EFD: | 1623 | case XFS_LI_EFD: |
1624 | case XFS_LI_EFI: | 1624 | case XFS_LI_EFI: |
1625 | trace_xfs_log_recover_item_reorder_tail(log, | 1625 | trace_xfs_log_recover_item_reorder_tail(log, |
1626 | trans, item, pass); | 1626 | trans, item, pass); |
1627 | list_move_tail(&item->ri_list, &trans->r_itemq); | 1627 | list_move_tail(&item->ri_list, &trans->r_itemq); |
1628 | break; | 1628 | break; |
1629 | default: | 1629 | default: |
1630 | xfs_warn(log->l_mp, | 1630 | xfs_warn(log->l_mp, |
1631 | "%s: unrecognized type of log operation", | 1631 | "%s: unrecognized type of log operation", |
1632 | __func__); | 1632 | __func__); |
1633 | ASSERT(0); | 1633 | ASSERT(0); |
1634 | return XFS_ERROR(EIO); | 1634 | return XFS_ERROR(EIO); |
1635 | } | 1635 | } |
1636 | } | 1636 | } |
1637 | ASSERT(list_empty(&sort_list)); | 1637 | ASSERT(list_empty(&sort_list)); |
1638 | return 0; | 1638 | return 0; |
1639 | } | 1639 | } |
1640 | 1640 | ||
1641 | /* | 1641 | /* |
1642 | * Build up the table of buf cancel records so that we don't replay | 1642 | * Build up the table of buf cancel records so that we don't replay |
1643 | * cancelled data in the second pass. For buffer records that are | 1643 | * cancelled data in the second pass. For buffer records that are |
1644 | * not cancel records, there is nothing to do here so we just return. | 1644 | * not cancel records, there is nothing to do here so we just return. |
1645 | * | 1645 | * |
1646 | * If we get a cancel record which is already in the table, this indicates | 1646 | * If we get a cancel record which is already in the table, this indicates |
1647 | * that the buffer was cancelled multiple times. In order to ensure | 1647 | * that the buffer was cancelled multiple times. In order to ensure |
1648 | * that during pass 2 we keep the record in the table until we reach its | 1648 | * that during pass 2 we keep the record in the table until we reach its |
1649 | * last occurrence in the log, we keep a reference count in the cancel | 1649 | * last occurrence in the log, we keep a reference count in the cancel |
1650 | * record in the table to tell us how many times we expect to see this | 1650 | * record in the table to tell us how many times we expect to see this |
1651 | * record during the second pass. | 1651 | * record during the second pass. |
1652 | */ | 1652 | */ |
1653 | STATIC int | 1653 | STATIC int |
1654 | xlog_recover_buffer_pass1( | 1654 | xlog_recover_buffer_pass1( |
1655 | struct xlog *log, | 1655 | struct xlog *log, |
1656 | struct xlog_recover_item *item) | 1656 | struct xlog_recover_item *item) |
1657 | { | 1657 | { |
1658 | xfs_buf_log_format_t *buf_f = item->ri_buf[0].i_addr; | 1658 | xfs_buf_log_format_t *buf_f = item->ri_buf[0].i_addr; |
1659 | struct list_head *bucket; | 1659 | struct list_head *bucket; |
1660 | struct xfs_buf_cancel *bcp; | 1660 | struct xfs_buf_cancel *bcp; |
1661 | 1661 | ||
1662 | /* | 1662 | /* |
1663 | * If this isn't a cancel buffer item, then just return. | 1663 | * If this isn't a cancel buffer item, then just return. |
1664 | */ | 1664 | */ |
1665 | if (!(buf_f->blf_flags & XFS_BLF_CANCEL)) { | 1665 | if (!(buf_f->blf_flags & XFS_BLF_CANCEL)) { |
1666 | trace_xfs_log_recover_buf_not_cancel(log, buf_f); | 1666 | trace_xfs_log_recover_buf_not_cancel(log, buf_f); |
1667 | return 0; | 1667 | return 0; |
1668 | } | 1668 | } |
1669 | 1669 | ||
1670 | /* | 1670 | /* |
1671 | * Insert an xfs_buf_cancel record into the hash table of them. | 1671 | * Insert an xfs_buf_cancel record into the hash table of them. |
1672 | * If there is already an identical record, bump its reference count. | 1672 | * If there is already an identical record, bump its reference count. |
1673 | */ | 1673 | */ |
1674 | bucket = XLOG_BUF_CANCEL_BUCKET(log, buf_f->blf_blkno); | 1674 | bucket = XLOG_BUF_CANCEL_BUCKET(log, buf_f->blf_blkno); |
1675 | list_for_each_entry(bcp, bucket, bc_list) { | 1675 | list_for_each_entry(bcp, bucket, bc_list) { |
1676 | if (bcp->bc_blkno == buf_f->blf_blkno && | 1676 | if (bcp->bc_blkno == buf_f->blf_blkno && |
1677 | bcp->bc_len == buf_f->blf_len) { | 1677 | bcp->bc_len == buf_f->blf_len) { |
1678 | bcp->bc_refcount++; | 1678 | bcp->bc_refcount++; |
1679 | trace_xfs_log_recover_buf_cancel_ref_inc(log, buf_f); | 1679 | trace_xfs_log_recover_buf_cancel_ref_inc(log, buf_f); |
1680 | return 0; | 1680 | return 0; |
1681 | } | 1681 | } |
1682 | } | 1682 | } |
1683 | 1683 | ||
1684 | bcp = kmem_alloc(sizeof(struct xfs_buf_cancel), KM_SLEEP); | 1684 | bcp = kmem_alloc(sizeof(struct xfs_buf_cancel), KM_SLEEP); |
1685 | bcp->bc_blkno = buf_f->blf_blkno; | 1685 | bcp->bc_blkno = buf_f->blf_blkno; |
1686 | bcp->bc_len = buf_f->blf_len; | 1686 | bcp->bc_len = buf_f->blf_len; |
1687 | bcp->bc_refcount = 1; | 1687 | bcp->bc_refcount = 1; |
1688 | list_add_tail(&bcp->bc_list, bucket); | 1688 | list_add_tail(&bcp->bc_list, bucket); |
1689 | 1689 | ||
1690 | trace_xfs_log_recover_buf_cancel_add(log, buf_f); | 1690 | trace_xfs_log_recover_buf_cancel_add(log, buf_f); |
1691 | return 0; | 1691 | return 0; |
1692 | } | 1692 | } |
1693 | 1693 | ||
1694 | /* | 1694 | /* |
1695 | * Check to see whether the buffer being recovered has a corresponding | 1695 | * Check to see whether the buffer being recovered has a corresponding |
1696 | * entry in the buffer cancel record table. If it does then return 1 | 1696 | * entry in the buffer cancel record table. If it does then return 1 |
1697 | * so that it will be cancelled, otherwise return 0. If the buffer is | 1697 | * so that it will be cancelled, otherwise return 0. If the buffer is |
1698 | * actually a buffer cancel item (XFS_BLF_CANCEL is set), then decrement | 1698 | * actually a buffer cancel item (XFS_BLF_CANCEL is set), then decrement |
1699 | * the refcount on the entry in the table and remove it from the table | 1699 | * the refcount on the entry in the table and remove it from the table |
1700 | * if this is the last reference. | 1700 | * if this is the last reference. |
1701 | * | 1701 | * |
1702 | * We remove the cancel record from the table when we encounter its | 1702 | * We remove the cancel record from the table when we encounter its |
1703 | * last occurrence in the log so that if the same buffer is re-used | 1703 | * last occurrence in the log so that if the same buffer is re-used |
1704 | * again after its last cancellation we actually replay the changes | 1704 | * again after its last cancellation we actually replay the changes |
1705 | * made at that point. | 1705 | * made at that point. |
1706 | */ | 1706 | */ |
1707 | STATIC int | 1707 | STATIC int |
1708 | xlog_check_buffer_cancelled( | 1708 | xlog_check_buffer_cancelled( |
1709 | struct xlog *log, | 1709 | struct xlog *log, |
1710 | xfs_daddr_t blkno, | 1710 | xfs_daddr_t blkno, |
1711 | uint len, | 1711 | uint len, |
1712 | ushort flags) | 1712 | ushort flags) |
1713 | { | 1713 | { |
1714 | struct list_head *bucket; | 1714 | struct list_head *bucket; |
1715 | struct xfs_buf_cancel *bcp; | 1715 | struct xfs_buf_cancel *bcp; |
1716 | 1716 | ||
1717 | if (log->l_buf_cancel_table == NULL) { | 1717 | if (log->l_buf_cancel_table == NULL) { |
1718 | /* | 1718 | /* |
1719 | * There is nothing in the table built in pass one, | 1719 | * There is nothing in the table built in pass one, |
1720 | * so this buffer must not be cancelled. | 1720 | * so this buffer must not be cancelled. |
1721 | */ | 1721 | */ |
1722 | ASSERT(!(flags & XFS_BLF_CANCEL)); | 1722 | ASSERT(!(flags & XFS_BLF_CANCEL)); |
1723 | return 0; | 1723 | return 0; |
1724 | } | 1724 | } |
1725 | 1725 | ||
1726 | /* | 1726 | /* |
1727 | * Search for an entry in the cancel table that matches our buffer. | 1727 | * Search for an entry in the cancel table that matches our buffer. |
1728 | */ | 1728 | */ |
1729 | bucket = XLOG_BUF_CANCEL_BUCKET(log, blkno); | 1729 | bucket = XLOG_BUF_CANCEL_BUCKET(log, blkno); |
1730 | list_for_each_entry(bcp, bucket, bc_list) { | 1730 | list_for_each_entry(bcp, bucket, bc_list) { |
1731 | if (bcp->bc_blkno == blkno && bcp->bc_len == len) | 1731 | if (bcp->bc_blkno == blkno && bcp->bc_len == len) |
1732 | goto found; | 1732 | goto found; |
1733 | } | 1733 | } |
1734 | 1734 | ||
1735 | /* | 1735 | /* |
1736 | * We didn't find a corresponding entry in the table, so return 0 so | 1736 | * We didn't find a corresponding entry in the table, so return 0 so |
1737 | * that the buffer is NOT cancelled. | 1737 | * that the buffer is NOT cancelled. |
1738 | */ | 1738 | */ |
1739 | ASSERT(!(flags & XFS_BLF_CANCEL)); | 1739 | ASSERT(!(flags & XFS_BLF_CANCEL)); |
1740 | return 0; | 1740 | return 0; |
1741 | 1741 | ||
1742 | found: | 1742 | found: |
1743 | /* | 1743 | /* |
1744 | * We've go a match, so return 1 so that the recovery of this buffer | 1744 | * We've go a match, so return 1 so that the recovery of this buffer |
1745 | * is cancelled. If this buffer is actually a buffer cancel log | 1745 | * is cancelled. If this buffer is actually a buffer cancel log |
1746 | * item, then decrement the refcount on the one in the table and | 1746 | * item, then decrement the refcount on the one in the table and |
1747 | * remove it if this is the last reference. | 1747 | * remove it if this is the last reference. |
1748 | */ | 1748 | */ |
1749 | if (flags & XFS_BLF_CANCEL) { | 1749 | if (flags & XFS_BLF_CANCEL) { |
1750 | if (--bcp->bc_refcount == 0) { | 1750 | if (--bcp->bc_refcount == 0) { |
1751 | list_del(&bcp->bc_list); | 1751 | list_del(&bcp->bc_list); |
1752 | kmem_free(bcp); | 1752 | kmem_free(bcp); |
1753 | } | 1753 | } |
1754 | } | 1754 | } |
1755 | return 1; | 1755 | return 1; |
1756 | } | 1756 | } |
1757 | 1757 | ||
1758 | /* | 1758 | /* |
1759 | * Perform recovery for a buffer full of inodes. In these buffers, the only | 1759 | * Perform recovery for a buffer full of inodes. In these buffers, the only |
1760 | * data which should be recovered is that which corresponds to the | 1760 | * data which should be recovered is that which corresponds to the |
1761 | * di_next_unlinked pointers in the on disk inode structures. The rest of the | 1761 | * di_next_unlinked pointers in the on disk inode structures. The rest of the |
1762 | * data for the inodes is always logged through the inodes themselves rather | 1762 | * data for the inodes is always logged through the inodes themselves rather |
1763 | * than the inode buffer and is recovered in xlog_recover_inode_pass2(). | 1763 | * than the inode buffer and is recovered in xlog_recover_inode_pass2(). |
1764 | * | 1764 | * |
1765 | * The only time when buffers full of inodes are fully recovered is when the | 1765 | * The only time when buffers full of inodes are fully recovered is when the |
1766 | * buffer is full of newly allocated inodes. In this case the buffer will | 1766 | * buffer is full of newly allocated inodes. In this case the buffer will |
1767 | * not be marked as an inode buffer and so will be sent to | 1767 | * not be marked as an inode buffer and so will be sent to |
1768 | * xlog_recover_do_reg_buffer() below during recovery. | 1768 | * xlog_recover_do_reg_buffer() below during recovery. |
1769 | */ | 1769 | */ |
1770 | STATIC int | 1770 | STATIC int |
1771 | xlog_recover_do_inode_buffer( | 1771 | xlog_recover_do_inode_buffer( |
1772 | struct xfs_mount *mp, | 1772 | struct xfs_mount *mp, |
1773 | xlog_recover_item_t *item, | 1773 | xlog_recover_item_t *item, |
1774 | struct xfs_buf *bp, | 1774 | struct xfs_buf *bp, |
1775 | xfs_buf_log_format_t *buf_f) | 1775 | xfs_buf_log_format_t *buf_f) |
1776 | { | 1776 | { |
1777 | int i; | 1777 | int i; |
1778 | int item_index = 0; | 1778 | int item_index = 0; |
1779 | int bit = 0; | 1779 | int bit = 0; |
1780 | int nbits = 0; | 1780 | int nbits = 0; |
1781 | int reg_buf_offset = 0; | 1781 | int reg_buf_offset = 0; |
1782 | int reg_buf_bytes = 0; | 1782 | int reg_buf_bytes = 0; |
1783 | int next_unlinked_offset; | 1783 | int next_unlinked_offset; |
1784 | int inodes_per_buf; | 1784 | int inodes_per_buf; |
1785 | xfs_agino_t *logged_nextp; | 1785 | xfs_agino_t *logged_nextp; |
1786 | xfs_agino_t *buffer_nextp; | 1786 | xfs_agino_t *buffer_nextp; |
1787 | 1787 | ||
1788 | trace_xfs_log_recover_buf_inode_buf(mp->m_log, buf_f); | 1788 | trace_xfs_log_recover_buf_inode_buf(mp->m_log, buf_f); |
1789 | 1789 | ||
1790 | inodes_per_buf = BBTOB(bp->b_io_length) >> mp->m_sb.sb_inodelog; | 1790 | inodes_per_buf = BBTOB(bp->b_io_length) >> mp->m_sb.sb_inodelog; |
1791 | for (i = 0; i < inodes_per_buf; i++) { | 1791 | for (i = 0; i < inodes_per_buf; i++) { |
1792 | next_unlinked_offset = (i * mp->m_sb.sb_inodesize) + | 1792 | next_unlinked_offset = (i * mp->m_sb.sb_inodesize) + |
1793 | offsetof(xfs_dinode_t, di_next_unlinked); | 1793 | offsetof(xfs_dinode_t, di_next_unlinked); |
1794 | 1794 | ||
1795 | while (next_unlinked_offset >= | 1795 | while (next_unlinked_offset >= |
1796 | (reg_buf_offset + reg_buf_bytes)) { | 1796 | (reg_buf_offset + reg_buf_bytes)) { |
1797 | /* | 1797 | /* |
1798 | * The next di_next_unlinked field is beyond | 1798 | * The next di_next_unlinked field is beyond |
1799 | * the current logged region. Find the next | 1799 | * the current logged region. Find the next |
1800 | * logged region that contains or is beyond | 1800 | * logged region that contains or is beyond |
1801 | * the current di_next_unlinked field. | 1801 | * the current di_next_unlinked field. |
1802 | */ | 1802 | */ |
1803 | bit += nbits; | 1803 | bit += nbits; |
1804 | bit = xfs_next_bit(buf_f->blf_data_map, | 1804 | bit = xfs_next_bit(buf_f->blf_data_map, |
1805 | buf_f->blf_map_size, bit); | 1805 | buf_f->blf_map_size, bit); |
1806 | 1806 | ||
1807 | /* | 1807 | /* |
1808 | * If there are no more logged regions in the | 1808 | * If there are no more logged regions in the |
1809 | * buffer, then we're done. | 1809 | * buffer, then we're done. |
1810 | */ | 1810 | */ |
1811 | if (bit == -1) | 1811 | if (bit == -1) |
1812 | return 0; | 1812 | return 0; |
1813 | 1813 | ||
1814 | nbits = xfs_contig_bits(buf_f->blf_data_map, | 1814 | nbits = xfs_contig_bits(buf_f->blf_data_map, |
1815 | buf_f->blf_map_size, bit); | 1815 | buf_f->blf_map_size, bit); |
1816 | ASSERT(nbits > 0); | 1816 | ASSERT(nbits > 0); |
1817 | reg_buf_offset = bit << XFS_BLF_SHIFT; | 1817 | reg_buf_offset = bit << XFS_BLF_SHIFT; |
1818 | reg_buf_bytes = nbits << XFS_BLF_SHIFT; | 1818 | reg_buf_bytes = nbits << XFS_BLF_SHIFT; |
1819 | item_index++; | 1819 | item_index++; |
1820 | } | 1820 | } |
1821 | 1821 | ||
1822 | /* | 1822 | /* |
1823 | * If the current logged region starts after the current | 1823 | * If the current logged region starts after the current |
1824 | * di_next_unlinked field, then move on to the next | 1824 | * di_next_unlinked field, then move on to the next |
1825 | * di_next_unlinked field. | 1825 | * di_next_unlinked field. |
1826 | */ | 1826 | */ |
1827 | if (next_unlinked_offset < reg_buf_offset) | 1827 | if (next_unlinked_offset < reg_buf_offset) |
1828 | continue; | 1828 | continue; |
1829 | 1829 | ||
1830 | ASSERT(item->ri_buf[item_index].i_addr != NULL); | 1830 | ASSERT(item->ri_buf[item_index].i_addr != NULL); |
1831 | ASSERT((item->ri_buf[item_index].i_len % XFS_BLF_CHUNK) == 0); | 1831 | ASSERT((item->ri_buf[item_index].i_len % XFS_BLF_CHUNK) == 0); |
1832 | ASSERT((reg_buf_offset + reg_buf_bytes) <= | 1832 | ASSERT((reg_buf_offset + reg_buf_bytes) <= |
1833 | BBTOB(bp->b_io_length)); | 1833 | BBTOB(bp->b_io_length)); |
1834 | 1834 | ||
1835 | /* | 1835 | /* |
1836 | * The current logged region contains a copy of the | 1836 | * The current logged region contains a copy of the |
1837 | * current di_next_unlinked field. Extract its value | 1837 | * current di_next_unlinked field. Extract its value |
1838 | * and copy it to the buffer copy. | 1838 | * and copy it to the buffer copy. |
1839 | */ | 1839 | */ |
1840 | logged_nextp = item->ri_buf[item_index].i_addr + | 1840 | logged_nextp = item->ri_buf[item_index].i_addr + |
1841 | next_unlinked_offset - reg_buf_offset; | 1841 | next_unlinked_offset - reg_buf_offset; |
1842 | if (unlikely(*logged_nextp == 0)) { | 1842 | if (unlikely(*logged_nextp == 0)) { |
1843 | xfs_alert(mp, | 1843 | xfs_alert(mp, |
1844 | "Bad inode buffer log record (ptr = 0x%p, bp = 0x%p). " | 1844 | "Bad inode buffer log record (ptr = 0x%p, bp = 0x%p). " |
1845 | "Trying to replay bad (0) inode di_next_unlinked field.", | 1845 | "Trying to replay bad (0) inode di_next_unlinked field.", |
1846 | item, bp); | 1846 | item, bp); |
1847 | XFS_ERROR_REPORT("xlog_recover_do_inode_buf", | 1847 | XFS_ERROR_REPORT("xlog_recover_do_inode_buf", |
1848 | XFS_ERRLEVEL_LOW, mp); | 1848 | XFS_ERRLEVEL_LOW, mp); |
1849 | return XFS_ERROR(EFSCORRUPTED); | 1849 | return XFS_ERROR(EFSCORRUPTED); |
1850 | } | 1850 | } |
1851 | 1851 | ||
1852 | buffer_nextp = (xfs_agino_t *)xfs_buf_offset(bp, | 1852 | buffer_nextp = (xfs_agino_t *)xfs_buf_offset(bp, |
1853 | next_unlinked_offset); | 1853 | next_unlinked_offset); |
1854 | *buffer_nextp = *logged_nextp; | 1854 | *buffer_nextp = *logged_nextp; |
1855 | } | 1855 | } |
1856 | 1856 | ||
1857 | return 0; | 1857 | return 0; |
1858 | } | 1858 | } |
1859 | 1859 | ||
1860 | /* | 1860 | /* |
1861 | * Perform a 'normal' buffer recovery. Each logged region of the | 1861 | * Perform a 'normal' buffer recovery. Each logged region of the |
1862 | * buffer should be copied over the corresponding region in the | 1862 | * buffer should be copied over the corresponding region in the |
1863 | * given buffer. The bitmap in the buf log format structure indicates | 1863 | * given buffer. The bitmap in the buf log format structure indicates |
1864 | * where to place the logged data. | 1864 | * where to place the logged data. |
1865 | */ | 1865 | */ |
1866 | STATIC void | 1866 | STATIC void |
1867 | xlog_recover_do_reg_buffer( | 1867 | xlog_recover_do_reg_buffer( |
1868 | struct xfs_mount *mp, | 1868 | struct xfs_mount *mp, |
1869 | xlog_recover_item_t *item, | 1869 | xlog_recover_item_t *item, |
1870 | struct xfs_buf *bp, | 1870 | struct xfs_buf *bp, |
1871 | xfs_buf_log_format_t *buf_f) | 1871 | xfs_buf_log_format_t *buf_f) |
1872 | { | 1872 | { |
1873 | int i; | 1873 | int i; |
1874 | int bit; | 1874 | int bit; |
1875 | int nbits; | 1875 | int nbits; |
1876 | int error; | 1876 | int error; |
1877 | 1877 | ||
1878 | trace_xfs_log_recover_buf_reg_buf(mp->m_log, buf_f); | 1878 | trace_xfs_log_recover_buf_reg_buf(mp->m_log, buf_f); |
1879 | 1879 | ||
1880 | bit = 0; | 1880 | bit = 0; |
1881 | i = 1; /* 0 is the buf format structure */ | 1881 | i = 1; /* 0 is the buf format structure */ |
1882 | while (1) { | 1882 | while (1) { |
1883 | bit = xfs_next_bit(buf_f->blf_data_map, | 1883 | bit = xfs_next_bit(buf_f->blf_data_map, |
1884 | buf_f->blf_map_size, bit); | 1884 | buf_f->blf_map_size, bit); |
1885 | if (bit == -1) | 1885 | if (bit == -1) |
1886 | break; | 1886 | break; |
1887 | nbits = xfs_contig_bits(buf_f->blf_data_map, | 1887 | nbits = xfs_contig_bits(buf_f->blf_data_map, |
1888 | buf_f->blf_map_size, bit); | 1888 | buf_f->blf_map_size, bit); |
1889 | ASSERT(nbits > 0); | 1889 | ASSERT(nbits > 0); |
1890 | ASSERT(item->ri_buf[i].i_addr != NULL); | 1890 | ASSERT(item->ri_buf[i].i_addr != NULL); |
1891 | ASSERT(item->ri_buf[i].i_len % XFS_BLF_CHUNK == 0); | 1891 | ASSERT(item->ri_buf[i].i_len % XFS_BLF_CHUNK == 0); |
1892 | ASSERT(BBTOB(bp->b_io_length) >= | 1892 | ASSERT(BBTOB(bp->b_io_length) >= |
1893 | ((uint)bit << XFS_BLF_SHIFT) + (nbits << XFS_BLF_SHIFT)); | 1893 | ((uint)bit << XFS_BLF_SHIFT) + (nbits << XFS_BLF_SHIFT)); |
1894 | 1894 | ||
1895 | /* | 1895 | /* |
1896 | * Do a sanity check if this is a dquot buffer. Just checking | 1896 | * Do a sanity check if this is a dquot buffer. Just checking |
1897 | * the first dquot in the buffer should do. XXXThis is | 1897 | * the first dquot in the buffer should do. XXXThis is |
1898 | * probably a good thing to do for other buf types also. | 1898 | * probably a good thing to do for other buf types also. |
1899 | */ | 1899 | */ |
1900 | error = 0; | 1900 | error = 0; |
1901 | if (buf_f->blf_flags & | 1901 | if (buf_f->blf_flags & |
1902 | (XFS_BLF_UDQUOT_BUF|XFS_BLF_PDQUOT_BUF|XFS_BLF_GDQUOT_BUF)) { | 1902 | (XFS_BLF_UDQUOT_BUF|XFS_BLF_PDQUOT_BUF|XFS_BLF_GDQUOT_BUF)) { |
1903 | if (item->ri_buf[i].i_addr == NULL) { | 1903 | if (item->ri_buf[i].i_addr == NULL) { |
1904 | xfs_alert(mp, | 1904 | xfs_alert(mp, |
1905 | "XFS: NULL dquot in %s.", __func__); | 1905 | "XFS: NULL dquot in %s.", __func__); |
1906 | goto next; | 1906 | goto next; |
1907 | } | 1907 | } |
1908 | if (item->ri_buf[i].i_len < sizeof(xfs_disk_dquot_t)) { | 1908 | if (item->ri_buf[i].i_len < sizeof(xfs_disk_dquot_t)) { |
1909 | xfs_alert(mp, | 1909 | xfs_alert(mp, |
1910 | "XFS: dquot too small (%d) in %s.", | 1910 | "XFS: dquot too small (%d) in %s.", |
1911 | item->ri_buf[i].i_len, __func__); | 1911 | item->ri_buf[i].i_len, __func__); |
1912 | goto next; | 1912 | goto next; |
1913 | } | 1913 | } |
1914 | error = xfs_qm_dqcheck(mp, item->ri_buf[i].i_addr, | 1914 | error = xfs_qm_dqcheck(mp, item->ri_buf[i].i_addr, |
1915 | -1, 0, XFS_QMOPT_DOWARN, | 1915 | -1, 0, XFS_QMOPT_DOWARN, |
1916 | "dquot_buf_recover"); | 1916 | "dquot_buf_recover"); |
1917 | if (error) | 1917 | if (error) |
1918 | goto next; | 1918 | goto next; |
1919 | } | 1919 | } |
1920 | 1920 | ||
1921 | memcpy(xfs_buf_offset(bp, | 1921 | memcpy(xfs_buf_offset(bp, |
1922 | (uint)bit << XFS_BLF_SHIFT), /* dest */ | 1922 | (uint)bit << XFS_BLF_SHIFT), /* dest */ |
1923 | item->ri_buf[i].i_addr, /* source */ | 1923 | item->ri_buf[i].i_addr, /* source */ |
1924 | nbits<<XFS_BLF_SHIFT); /* length */ | 1924 | nbits<<XFS_BLF_SHIFT); /* length */ |
1925 | next: | 1925 | next: |
1926 | i++; | 1926 | i++; |
1927 | bit += nbits; | 1927 | bit += nbits; |
1928 | } | 1928 | } |
1929 | 1929 | ||
1930 | /* Shouldn't be any more regions */ | 1930 | /* Shouldn't be any more regions */ |
1931 | ASSERT(i == item->ri_total); | 1931 | ASSERT(i == item->ri_total); |
1932 | 1932 | ||
1933 | switch (buf_f->blf_flags & XFS_BLF_TYPE_MASK) { | 1933 | switch (buf_f->blf_flags & XFS_BLF_TYPE_MASK) { |
1934 | case XFS_BLF_BTREE_BUF: | 1934 | case XFS_BLF_BTREE_BUF: |
1935 | switch (be32_to_cpu(*(__be32 *)bp->b_addr)) { | 1935 | switch (be32_to_cpu(*(__be32 *)bp->b_addr)) { |
1936 | case XFS_ABTB_CRC_MAGIC: | 1936 | case XFS_ABTB_CRC_MAGIC: |
1937 | case XFS_ABTC_CRC_MAGIC: | 1937 | case XFS_ABTC_CRC_MAGIC: |
1938 | case XFS_ABTB_MAGIC: | 1938 | case XFS_ABTB_MAGIC: |
1939 | case XFS_ABTC_MAGIC: | 1939 | case XFS_ABTC_MAGIC: |
1940 | bp->b_ops = &xfs_allocbt_buf_ops; | 1940 | bp->b_ops = &xfs_allocbt_buf_ops; |
1941 | break; | 1941 | break; |
1942 | case XFS_IBT_CRC_MAGIC: | 1942 | case XFS_IBT_CRC_MAGIC: |
1943 | case XFS_IBT_MAGIC: | 1943 | case XFS_IBT_MAGIC: |
1944 | bp->b_ops = &xfs_inobt_buf_ops; | 1944 | bp->b_ops = &xfs_inobt_buf_ops; |
1945 | break; | 1945 | break; |
1946 | case XFS_BMAP_CRC_MAGIC: | 1946 | case XFS_BMAP_CRC_MAGIC: |
1947 | case XFS_BMAP_MAGIC: | 1947 | case XFS_BMAP_MAGIC: |
1948 | bp->b_ops = &xfs_bmbt_buf_ops; | 1948 | bp->b_ops = &xfs_bmbt_buf_ops; |
1949 | break; | 1949 | break; |
1950 | default: | 1950 | default: |
1951 | xfs_warn(mp, "Bad btree block magic!"); | 1951 | xfs_warn(mp, "Bad btree block magic!"); |
1952 | ASSERT(0); | 1952 | ASSERT(0); |
1953 | break; | 1953 | break; |
1954 | } | 1954 | } |
1955 | break; | 1955 | break; |
1956 | case XFS_BLF_AGF_BUF: | 1956 | case XFS_BLF_AGF_BUF: |
1957 | if (*(__be32 *)bp->b_addr != cpu_to_be32(XFS_AGF_MAGIC)) { | 1957 | if (*(__be32 *)bp->b_addr != cpu_to_be32(XFS_AGF_MAGIC)) { |
1958 | xfs_warn(mp, "Bad AGF block magic!"); | 1958 | xfs_warn(mp, "Bad AGF block magic!"); |
1959 | ASSERT(0); | 1959 | ASSERT(0); |
1960 | break; | 1960 | break; |
1961 | } | 1961 | } |
1962 | bp->b_ops = &xfs_agf_buf_ops; | 1962 | bp->b_ops = &xfs_agf_buf_ops; |
1963 | break; | 1963 | break; |
1964 | case XFS_BLF_AGFL_BUF: | 1964 | case XFS_BLF_AGFL_BUF: |
1965 | if (!xfs_sb_version_hascrc(&mp->m_sb)) | 1965 | if (!xfs_sb_version_hascrc(&mp->m_sb)) |
1966 | break; | 1966 | break; |
1967 | if (*(__be32 *)bp->b_addr != cpu_to_be32(XFS_AGFL_MAGIC)) { | 1967 | if (*(__be32 *)bp->b_addr != cpu_to_be32(XFS_AGFL_MAGIC)) { |
1968 | xfs_warn(mp, "Bad AGFL block magic!"); | 1968 | xfs_warn(mp, "Bad AGFL block magic!"); |
1969 | ASSERT(0); | 1969 | ASSERT(0); |
1970 | break; | 1970 | break; |
1971 | } | 1971 | } |
1972 | bp->b_ops = &xfs_agfl_buf_ops; | 1972 | bp->b_ops = &xfs_agfl_buf_ops; |
1973 | break; | 1973 | break; |
1974 | case XFS_BLF_AGI_BUF: | 1974 | case XFS_BLF_AGI_BUF: |
1975 | if (*(__be32 *)bp->b_addr != cpu_to_be32(XFS_AGI_MAGIC)) { | 1975 | if (*(__be32 *)bp->b_addr != cpu_to_be32(XFS_AGI_MAGIC)) { |
1976 | xfs_warn(mp, "Bad AGI block magic!"); | 1976 | xfs_warn(mp, "Bad AGI block magic!"); |
1977 | ASSERT(0); | 1977 | ASSERT(0); |
1978 | break; | 1978 | break; |
1979 | } | 1979 | } |
1980 | bp->b_ops = &xfs_agi_buf_ops; | 1980 | bp->b_ops = &xfs_agi_buf_ops; |
1981 | break; | 1981 | break; |
1982 | case XFS_BLF_UDQUOT_BUF: | ||
1983 | case XFS_BLF_PDQUOT_BUF: | ||
1984 | case XFS_BLF_GDQUOT_BUF: | ||
1985 | if (*(__be16 *)bp->b_addr != cpu_to_be16(XFS_DQUOT_MAGIC)) { | ||
1986 | xfs_warn(mp, "Bad DQUOT block magic!"); | ||
1987 | ASSERT(0); | ||
1988 | break; | ||
1989 | } | ||
1990 | bp->b_ops = &xfs_dquot_buf_ops; | ||
1991 | break; | ||
1982 | default: | 1992 | default: |
1983 | break; | 1993 | break; |
1984 | } | 1994 | } |
1985 | } | 1995 | } |
1986 | 1996 | ||
1987 | /* | 1997 | /* |
1988 | * Do some primitive error checking on ondisk dquot data structures. | 1998 | * Do some primitive error checking on ondisk dquot data structures. |
1989 | */ | 1999 | */ |
1990 | int | 2000 | int |
1991 | xfs_qm_dqcheck( | 2001 | xfs_qm_dqcheck( |
1992 | struct xfs_mount *mp, | 2002 | struct xfs_mount *mp, |
1993 | xfs_disk_dquot_t *ddq, | 2003 | xfs_disk_dquot_t *ddq, |
1994 | xfs_dqid_t id, | 2004 | xfs_dqid_t id, |
1995 | uint type, /* used only when IO_dorepair is true */ | 2005 | uint type, /* used only when IO_dorepair is true */ |
1996 | uint flags, | 2006 | uint flags, |
1997 | char *str) | 2007 | char *str) |
1998 | { | 2008 | { |
1999 | xfs_dqblk_t *d = (xfs_dqblk_t *)ddq; | 2009 | xfs_dqblk_t *d = (xfs_dqblk_t *)ddq; |
2000 | int errs = 0; | 2010 | int errs = 0; |
2001 | 2011 | ||
2002 | /* | 2012 | /* |
2003 | * We can encounter an uninitialized dquot buffer for 2 reasons: | 2013 | * We can encounter an uninitialized dquot buffer for 2 reasons: |
2004 | * 1. If we crash while deleting the quotainode(s), and those blks got | 2014 | * 1. If we crash while deleting the quotainode(s), and those blks got |
2005 | * used for user data. This is because we take the path of regular | 2015 | * used for user data. This is because we take the path of regular |
2006 | * file deletion; however, the size field of quotainodes is never | 2016 | * file deletion; however, the size field of quotainodes is never |
2007 | * updated, so all the tricks that we play in itruncate_finish | 2017 | * updated, so all the tricks that we play in itruncate_finish |
2008 | * don't quite matter. | 2018 | * don't quite matter. |
2009 | * | 2019 | * |
2010 | * 2. We don't play the quota buffers when there's a quotaoff logitem. | 2020 | * 2. We don't play the quota buffers when there's a quotaoff logitem. |
2011 | * But the allocation will be replayed so we'll end up with an | 2021 | * But the allocation will be replayed so we'll end up with an |
2012 | * uninitialized quota block. | 2022 | * uninitialized quota block. |
2013 | * | 2023 | * |
2014 | * This is all fine; things are still consistent, and we haven't lost | 2024 | * This is all fine; things are still consistent, and we haven't lost |
2015 | * any quota information. Just don't complain about bad dquot blks. | 2025 | * any quota information. Just don't complain about bad dquot blks. |
2016 | */ | 2026 | */ |
2017 | if (ddq->d_magic != cpu_to_be16(XFS_DQUOT_MAGIC)) { | 2027 | if (ddq->d_magic != cpu_to_be16(XFS_DQUOT_MAGIC)) { |
2018 | if (flags & XFS_QMOPT_DOWARN) | 2028 | if (flags & XFS_QMOPT_DOWARN) |
2019 | xfs_alert(mp, | 2029 | xfs_alert(mp, |
2020 | "%s : XFS dquot ID 0x%x, magic 0x%x != 0x%x", | 2030 | "%s : XFS dquot ID 0x%x, magic 0x%x != 0x%x", |
2021 | str, id, be16_to_cpu(ddq->d_magic), XFS_DQUOT_MAGIC); | 2031 | str, id, be16_to_cpu(ddq->d_magic), XFS_DQUOT_MAGIC); |
2022 | errs++; | 2032 | errs++; |
2023 | } | 2033 | } |
2024 | if (ddq->d_version != XFS_DQUOT_VERSION) { | 2034 | if (ddq->d_version != XFS_DQUOT_VERSION) { |
2025 | if (flags & XFS_QMOPT_DOWARN) | 2035 | if (flags & XFS_QMOPT_DOWARN) |
2026 | xfs_alert(mp, | 2036 | xfs_alert(mp, |
2027 | "%s : XFS dquot ID 0x%x, version 0x%x != 0x%x", | 2037 | "%s : XFS dquot ID 0x%x, version 0x%x != 0x%x", |
2028 | str, id, ddq->d_version, XFS_DQUOT_VERSION); | 2038 | str, id, ddq->d_version, XFS_DQUOT_VERSION); |
2029 | errs++; | 2039 | errs++; |
2030 | } | 2040 | } |
2031 | 2041 | ||
2032 | if (ddq->d_flags != XFS_DQ_USER && | 2042 | if (ddq->d_flags != XFS_DQ_USER && |
2033 | ddq->d_flags != XFS_DQ_PROJ && | 2043 | ddq->d_flags != XFS_DQ_PROJ && |
2034 | ddq->d_flags != XFS_DQ_GROUP) { | 2044 | ddq->d_flags != XFS_DQ_GROUP) { |
2035 | if (flags & XFS_QMOPT_DOWARN) | 2045 | if (flags & XFS_QMOPT_DOWARN) |
2036 | xfs_alert(mp, | 2046 | xfs_alert(mp, |
2037 | "%s : XFS dquot ID 0x%x, unknown flags 0x%x", | 2047 | "%s : XFS dquot ID 0x%x, unknown flags 0x%x", |
2038 | str, id, ddq->d_flags); | 2048 | str, id, ddq->d_flags); |
2039 | errs++; | 2049 | errs++; |
2040 | } | 2050 | } |
2041 | 2051 | ||
2042 | if (id != -1 && id != be32_to_cpu(ddq->d_id)) { | 2052 | if (id != -1 && id != be32_to_cpu(ddq->d_id)) { |
2043 | if (flags & XFS_QMOPT_DOWARN) | 2053 | if (flags & XFS_QMOPT_DOWARN) |
2044 | xfs_alert(mp, | 2054 | xfs_alert(mp, |
2045 | "%s : ondisk-dquot 0x%p, ID mismatch: " | 2055 | "%s : ondisk-dquot 0x%p, ID mismatch: " |
2046 | "0x%x expected, found id 0x%x", | 2056 | "0x%x expected, found id 0x%x", |
2047 | str, ddq, id, be32_to_cpu(ddq->d_id)); | 2057 | str, ddq, id, be32_to_cpu(ddq->d_id)); |
2048 | errs++; | 2058 | errs++; |
2049 | } | 2059 | } |
2050 | 2060 | ||
2051 | if (!errs && ddq->d_id) { | 2061 | if (!errs && ddq->d_id) { |
2052 | if (ddq->d_blk_softlimit && | 2062 | if (ddq->d_blk_softlimit && |
2053 | be64_to_cpu(ddq->d_bcount) > | 2063 | be64_to_cpu(ddq->d_bcount) > |
2054 | be64_to_cpu(ddq->d_blk_softlimit)) { | 2064 | be64_to_cpu(ddq->d_blk_softlimit)) { |
2055 | if (!ddq->d_btimer) { | 2065 | if (!ddq->d_btimer) { |
2056 | if (flags & XFS_QMOPT_DOWARN) | 2066 | if (flags & XFS_QMOPT_DOWARN) |
2057 | xfs_alert(mp, | 2067 | xfs_alert(mp, |
2058 | "%s : Dquot ID 0x%x (0x%p) BLK TIMER NOT STARTED", | 2068 | "%s : Dquot ID 0x%x (0x%p) BLK TIMER NOT STARTED", |
2059 | str, (int)be32_to_cpu(ddq->d_id), ddq); | 2069 | str, (int)be32_to_cpu(ddq->d_id), ddq); |
2060 | errs++; | 2070 | errs++; |
2061 | } | 2071 | } |
2062 | } | 2072 | } |
2063 | if (ddq->d_ino_softlimit && | 2073 | if (ddq->d_ino_softlimit && |
2064 | be64_to_cpu(ddq->d_icount) > | 2074 | be64_to_cpu(ddq->d_icount) > |
2065 | be64_to_cpu(ddq->d_ino_softlimit)) { | 2075 | be64_to_cpu(ddq->d_ino_softlimit)) { |
2066 | if (!ddq->d_itimer) { | 2076 | if (!ddq->d_itimer) { |
2067 | if (flags & XFS_QMOPT_DOWARN) | 2077 | if (flags & XFS_QMOPT_DOWARN) |
2068 | xfs_alert(mp, | 2078 | xfs_alert(mp, |
2069 | "%s : Dquot ID 0x%x (0x%p) INODE TIMER NOT STARTED", | 2079 | "%s : Dquot ID 0x%x (0x%p) INODE TIMER NOT STARTED", |
2070 | str, (int)be32_to_cpu(ddq->d_id), ddq); | 2080 | str, (int)be32_to_cpu(ddq->d_id), ddq); |
2071 | errs++; | 2081 | errs++; |
2072 | } | 2082 | } |
2073 | } | 2083 | } |
2074 | if (ddq->d_rtb_softlimit && | 2084 | if (ddq->d_rtb_softlimit && |
2075 | be64_to_cpu(ddq->d_rtbcount) > | 2085 | be64_to_cpu(ddq->d_rtbcount) > |
2076 | be64_to_cpu(ddq->d_rtb_softlimit)) { | 2086 | be64_to_cpu(ddq->d_rtb_softlimit)) { |
2077 | if (!ddq->d_rtbtimer) { | 2087 | if (!ddq->d_rtbtimer) { |
2078 | if (flags & XFS_QMOPT_DOWARN) | 2088 | if (flags & XFS_QMOPT_DOWARN) |
2079 | xfs_alert(mp, | 2089 | xfs_alert(mp, |
2080 | "%s : Dquot ID 0x%x (0x%p) RTBLK TIMER NOT STARTED", | 2090 | "%s : Dquot ID 0x%x (0x%p) RTBLK TIMER NOT STARTED", |
2081 | str, (int)be32_to_cpu(ddq->d_id), ddq); | 2091 | str, (int)be32_to_cpu(ddq->d_id), ddq); |
2082 | errs++; | 2092 | errs++; |
2083 | } | 2093 | } |
2084 | } | 2094 | } |
2085 | } | 2095 | } |
2086 | 2096 | ||
2087 | if (!errs || !(flags & XFS_QMOPT_DQREPAIR)) | 2097 | if (!errs || !(flags & XFS_QMOPT_DQREPAIR)) |
2088 | return errs; | 2098 | return errs; |
2089 | 2099 | ||
2090 | if (flags & XFS_QMOPT_DOWARN) | 2100 | if (flags & XFS_QMOPT_DOWARN) |
2091 | xfs_notice(mp, "Re-initializing dquot ID 0x%x", id); | 2101 | xfs_notice(mp, "Re-initializing dquot ID 0x%x", id); |
2092 | 2102 | ||
2093 | /* | 2103 | /* |
2094 | * Typically, a repair is only requested by quotacheck. | 2104 | * Typically, a repair is only requested by quotacheck. |
2095 | */ | 2105 | */ |
2096 | ASSERT(id != -1); | 2106 | ASSERT(id != -1); |
2097 | ASSERT(flags & XFS_QMOPT_DQREPAIR); | 2107 | ASSERT(flags & XFS_QMOPT_DQREPAIR); |
2098 | memset(d, 0, sizeof(xfs_dqblk_t)); | 2108 | memset(d, 0, sizeof(xfs_dqblk_t)); |
2099 | 2109 | ||
2100 | d->dd_diskdq.d_magic = cpu_to_be16(XFS_DQUOT_MAGIC); | 2110 | d->dd_diskdq.d_magic = cpu_to_be16(XFS_DQUOT_MAGIC); |
2101 | d->dd_diskdq.d_version = XFS_DQUOT_VERSION; | 2111 | d->dd_diskdq.d_version = XFS_DQUOT_VERSION; |
2102 | d->dd_diskdq.d_flags = type; | 2112 | d->dd_diskdq.d_flags = type; |
2103 | d->dd_diskdq.d_id = cpu_to_be32(id); | 2113 | d->dd_diskdq.d_id = cpu_to_be32(id); |
2104 | 2114 | ||
2105 | return errs; | 2115 | return errs; |
2106 | } | 2116 | } |
2107 | 2117 | ||
2108 | /* | 2118 | /* |
2109 | * Perform a dquot buffer recovery. | 2119 | * Perform a dquot buffer recovery. |
2110 | * Simple algorithm: if we have found a QUOTAOFF logitem of the same type | 2120 | * Simple algorithm: if we have found a QUOTAOFF logitem of the same type |
2111 | * (ie. USR or GRP), then just toss this buffer away; don't recover it. | 2121 | * (ie. USR or GRP), then just toss this buffer away; don't recover it. |
2112 | * Else, treat it as a regular buffer and do recovery. | 2122 | * Else, treat it as a regular buffer and do recovery. |
2113 | */ | 2123 | */ |
2114 | STATIC void | 2124 | STATIC void |
2115 | xlog_recover_do_dquot_buffer( | 2125 | xlog_recover_do_dquot_buffer( |
2116 | struct xfs_mount *mp, | 2126 | struct xfs_mount *mp, |
2117 | struct xlog *log, | 2127 | struct xlog *log, |
2118 | struct xlog_recover_item *item, | 2128 | struct xlog_recover_item *item, |
2119 | struct xfs_buf *bp, | 2129 | struct xfs_buf *bp, |
2120 | struct xfs_buf_log_format *buf_f) | 2130 | struct xfs_buf_log_format *buf_f) |
2121 | { | 2131 | { |
2122 | uint type; | 2132 | uint type; |
2123 | 2133 | ||
2124 | trace_xfs_log_recover_buf_dquot_buf(log, buf_f); | 2134 | trace_xfs_log_recover_buf_dquot_buf(log, buf_f); |
2125 | 2135 | ||
2126 | /* | 2136 | /* |
2127 | * Filesystems are required to send in quota flags at mount time. | 2137 | * Filesystems are required to send in quota flags at mount time. |
2128 | */ | 2138 | */ |
2129 | if (mp->m_qflags == 0) { | 2139 | if (mp->m_qflags == 0) { |
2130 | return; | 2140 | return; |
2131 | } | 2141 | } |
2132 | 2142 | ||
2133 | type = 0; | 2143 | type = 0; |
2134 | if (buf_f->blf_flags & XFS_BLF_UDQUOT_BUF) | 2144 | if (buf_f->blf_flags & XFS_BLF_UDQUOT_BUF) |
2135 | type |= XFS_DQ_USER; | 2145 | type |= XFS_DQ_USER; |
2136 | if (buf_f->blf_flags & XFS_BLF_PDQUOT_BUF) | 2146 | if (buf_f->blf_flags & XFS_BLF_PDQUOT_BUF) |
2137 | type |= XFS_DQ_PROJ; | 2147 | type |= XFS_DQ_PROJ; |
2138 | if (buf_f->blf_flags & XFS_BLF_GDQUOT_BUF) | 2148 | if (buf_f->blf_flags & XFS_BLF_GDQUOT_BUF) |
2139 | type |= XFS_DQ_GROUP; | 2149 | type |= XFS_DQ_GROUP; |
2140 | /* | 2150 | /* |
2141 | * This type of quotas was turned off, so ignore this buffer | 2151 | * This type of quotas was turned off, so ignore this buffer |
2142 | */ | 2152 | */ |
2143 | if (log->l_quotaoffs_flag & type) | 2153 | if (log->l_quotaoffs_flag & type) |
2144 | return; | 2154 | return; |
2145 | 2155 | ||
2146 | xlog_recover_do_reg_buffer(mp, item, bp, buf_f); | 2156 | xlog_recover_do_reg_buffer(mp, item, bp, buf_f); |
2147 | } | 2157 | } |
2148 | 2158 | ||
2149 | /* | 2159 | /* |
2150 | * This routine replays a modification made to a buffer at runtime. | 2160 | * This routine replays a modification made to a buffer at runtime. |
2151 | * There are actually two types of buffer, regular and inode, which | 2161 | * There are actually two types of buffer, regular and inode, which |
2152 | * are handled differently. Inode buffers are handled differently | 2162 | * are handled differently. Inode buffers are handled differently |
2153 | * in that we only recover a specific set of data from them, namely | 2163 | * in that we only recover a specific set of data from them, namely |
2154 | * the inode di_next_unlinked fields. This is because all other inode | 2164 | * the inode di_next_unlinked fields. This is because all other inode |
2155 | * data is actually logged via inode records and any data we replay | 2165 | * data is actually logged via inode records and any data we replay |
2156 | * here which overlaps that may be stale. | 2166 | * here which overlaps that may be stale. |
2157 | * | 2167 | * |
2158 | * When meta-data buffers are freed at run time we log a buffer item | 2168 | * When meta-data buffers are freed at run time we log a buffer item |
2159 | * with the XFS_BLF_CANCEL bit set to indicate that previous copies | 2169 | * with the XFS_BLF_CANCEL bit set to indicate that previous copies |
2160 | * of the buffer in the log should not be replayed at recovery time. | 2170 | * of the buffer in the log should not be replayed at recovery time. |
2161 | * This is so that if the blocks covered by the buffer are reused for | 2171 | * This is so that if the blocks covered by the buffer are reused for |
2162 | * file data before we crash we don't end up replaying old, freed | 2172 | * file data before we crash we don't end up replaying old, freed |
2163 | * meta-data into a user's file. | 2173 | * meta-data into a user's file. |
2164 | * | 2174 | * |
2165 | * To handle the cancellation of buffer log items, we make two passes | 2175 | * To handle the cancellation of buffer log items, we make two passes |
2166 | * over the log during recovery. During the first we build a table of | 2176 | * over the log during recovery. During the first we build a table of |
2167 | * those buffers which have been cancelled, and during the second we | 2177 | * those buffers which have been cancelled, and during the second we |
2168 | * only replay those buffers which do not have corresponding cancel | 2178 | * only replay those buffers which do not have corresponding cancel |
2169 | * records in the table. See xlog_recover_do_buffer_pass[1,2] above | 2179 | * records in the table. See xlog_recover_do_buffer_pass[1,2] above |
2170 | * for more details on the implementation of the table of cancel records. | 2180 | * for more details on the implementation of the table of cancel records. |
2171 | */ | 2181 | */ |
2172 | STATIC int | 2182 | STATIC int |
2173 | xlog_recover_buffer_pass2( | 2183 | xlog_recover_buffer_pass2( |
2174 | struct xlog *log, | 2184 | struct xlog *log, |
2175 | struct list_head *buffer_list, | 2185 | struct list_head *buffer_list, |
2176 | struct xlog_recover_item *item) | 2186 | struct xlog_recover_item *item) |
2177 | { | 2187 | { |
2178 | xfs_buf_log_format_t *buf_f = item->ri_buf[0].i_addr; | 2188 | xfs_buf_log_format_t *buf_f = item->ri_buf[0].i_addr; |
2179 | xfs_mount_t *mp = log->l_mp; | 2189 | xfs_mount_t *mp = log->l_mp; |
2180 | xfs_buf_t *bp; | 2190 | xfs_buf_t *bp; |
2181 | int error; | 2191 | int error; |
2182 | uint buf_flags; | 2192 | uint buf_flags; |
2183 | 2193 | ||
2184 | /* | 2194 | /* |
2185 | * In this pass we only want to recover all the buffers which have | 2195 | * In this pass we only want to recover all the buffers which have |
2186 | * not been cancelled and are not cancellation buffers themselves. | 2196 | * not been cancelled and are not cancellation buffers themselves. |
2187 | */ | 2197 | */ |
2188 | if (xlog_check_buffer_cancelled(log, buf_f->blf_blkno, | 2198 | if (xlog_check_buffer_cancelled(log, buf_f->blf_blkno, |
2189 | buf_f->blf_len, buf_f->blf_flags)) { | 2199 | buf_f->blf_len, buf_f->blf_flags)) { |
2190 | trace_xfs_log_recover_buf_cancel(log, buf_f); | 2200 | trace_xfs_log_recover_buf_cancel(log, buf_f); |
2191 | return 0; | 2201 | return 0; |
2192 | } | 2202 | } |
2193 | 2203 | ||
2194 | trace_xfs_log_recover_buf_recover(log, buf_f); | 2204 | trace_xfs_log_recover_buf_recover(log, buf_f); |
2195 | 2205 | ||
2196 | buf_flags = 0; | 2206 | buf_flags = 0; |
2197 | if (buf_f->blf_flags & XFS_BLF_INODE_BUF) | 2207 | if (buf_f->blf_flags & XFS_BLF_INODE_BUF) |
2198 | buf_flags |= XBF_UNMAPPED; | 2208 | buf_flags |= XBF_UNMAPPED; |
2199 | 2209 | ||
2200 | bp = xfs_buf_read(mp->m_ddev_targp, buf_f->blf_blkno, buf_f->blf_len, | 2210 | bp = xfs_buf_read(mp->m_ddev_targp, buf_f->blf_blkno, buf_f->blf_len, |
2201 | buf_flags, NULL); | 2211 | buf_flags, NULL); |
2202 | if (!bp) | 2212 | if (!bp) |
2203 | return XFS_ERROR(ENOMEM); | 2213 | return XFS_ERROR(ENOMEM); |
2204 | error = bp->b_error; | 2214 | error = bp->b_error; |
2205 | if (error) { | 2215 | if (error) { |
2206 | xfs_buf_ioerror_alert(bp, "xlog_recover_do..(read#1)"); | 2216 | xfs_buf_ioerror_alert(bp, "xlog_recover_do..(read#1)"); |
2207 | xfs_buf_relse(bp); | 2217 | xfs_buf_relse(bp); |
2208 | return error; | 2218 | return error; |
2209 | } | 2219 | } |
2210 | 2220 | ||
2211 | if (buf_f->blf_flags & XFS_BLF_INODE_BUF) { | 2221 | if (buf_f->blf_flags & XFS_BLF_INODE_BUF) { |
2212 | error = xlog_recover_do_inode_buffer(mp, item, bp, buf_f); | 2222 | error = xlog_recover_do_inode_buffer(mp, item, bp, buf_f); |
2213 | } else if (buf_f->blf_flags & | 2223 | } else if (buf_f->blf_flags & |
2214 | (XFS_BLF_UDQUOT_BUF|XFS_BLF_PDQUOT_BUF|XFS_BLF_GDQUOT_BUF)) { | 2224 | (XFS_BLF_UDQUOT_BUF|XFS_BLF_PDQUOT_BUF|XFS_BLF_GDQUOT_BUF)) { |
2215 | xlog_recover_do_dquot_buffer(mp, log, item, bp, buf_f); | 2225 | xlog_recover_do_dquot_buffer(mp, log, item, bp, buf_f); |
2216 | } else { | 2226 | } else { |
2217 | xlog_recover_do_reg_buffer(mp, item, bp, buf_f); | 2227 | xlog_recover_do_reg_buffer(mp, item, bp, buf_f); |
2218 | } | 2228 | } |
2219 | if (error) | 2229 | if (error) |
2220 | return XFS_ERROR(error); | 2230 | return XFS_ERROR(error); |
2221 | 2231 | ||
2222 | /* | 2232 | /* |
2223 | * Perform delayed write on the buffer. Asynchronous writes will be | 2233 | * Perform delayed write on the buffer. Asynchronous writes will be |
2224 | * slower when taking into account all the buffers to be flushed. | 2234 | * slower when taking into account all the buffers to be flushed. |
2225 | * | 2235 | * |
2226 | * Also make sure that only inode buffers with good sizes stay in | 2236 | * Also make sure that only inode buffers with good sizes stay in |
2227 | * the buffer cache. The kernel moves inodes in buffers of 1 block | 2237 | * the buffer cache. The kernel moves inodes in buffers of 1 block |
2228 | * or XFS_INODE_CLUSTER_SIZE bytes, whichever is bigger. The inode | 2238 | * or XFS_INODE_CLUSTER_SIZE bytes, whichever is bigger. The inode |
2229 | * buffers in the log can be a different size if the log was generated | 2239 | * buffers in the log can be a different size if the log was generated |
2230 | * by an older kernel using unclustered inode buffers or a newer kernel | 2240 | * by an older kernel using unclustered inode buffers or a newer kernel |
2231 | * running with a different inode cluster size. Regardless, if the | 2241 | * running with a different inode cluster size. Regardless, if the |
2232 | * the inode buffer size isn't MAX(blocksize, XFS_INODE_CLUSTER_SIZE) | 2242 | * the inode buffer size isn't MAX(blocksize, XFS_INODE_CLUSTER_SIZE) |
2233 | * for *our* value of XFS_INODE_CLUSTER_SIZE, then we need to keep | 2243 | * for *our* value of XFS_INODE_CLUSTER_SIZE, then we need to keep |
2234 | * the buffer out of the buffer cache so that the buffer won't | 2244 | * the buffer out of the buffer cache so that the buffer won't |
2235 | * overlap with future reads of those inodes. | 2245 | * overlap with future reads of those inodes. |
2236 | */ | 2246 | */ |
2237 | if (XFS_DINODE_MAGIC == | 2247 | if (XFS_DINODE_MAGIC == |
2238 | be16_to_cpu(*((__be16 *)xfs_buf_offset(bp, 0))) && | 2248 | be16_to_cpu(*((__be16 *)xfs_buf_offset(bp, 0))) && |
2239 | (BBTOB(bp->b_io_length) != MAX(log->l_mp->m_sb.sb_blocksize, | 2249 | (BBTOB(bp->b_io_length) != MAX(log->l_mp->m_sb.sb_blocksize, |
2240 | (__uint32_t)XFS_INODE_CLUSTER_SIZE(log->l_mp)))) { | 2250 | (__uint32_t)XFS_INODE_CLUSTER_SIZE(log->l_mp)))) { |
2241 | xfs_buf_stale(bp); | 2251 | xfs_buf_stale(bp); |
2242 | error = xfs_bwrite(bp); | 2252 | error = xfs_bwrite(bp); |
2243 | } else { | 2253 | } else { |
2244 | ASSERT(bp->b_target->bt_mount == mp); | 2254 | ASSERT(bp->b_target->bt_mount == mp); |
2245 | bp->b_iodone = xlog_recover_iodone; | 2255 | bp->b_iodone = xlog_recover_iodone; |
2246 | xfs_buf_delwri_queue(bp, buffer_list); | 2256 | xfs_buf_delwri_queue(bp, buffer_list); |
2247 | } | 2257 | } |
2248 | 2258 | ||
2249 | xfs_buf_relse(bp); | 2259 | xfs_buf_relse(bp); |
2250 | return error; | 2260 | return error; |
2251 | } | 2261 | } |
2252 | 2262 | ||
2253 | STATIC int | 2263 | STATIC int |
2254 | xlog_recover_inode_pass2( | 2264 | xlog_recover_inode_pass2( |
2255 | struct xlog *log, | 2265 | struct xlog *log, |
2256 | struct list_head *buffer_list, | 2266 | struct list_head *buffer_list, |
2257 | struct xlog_recover_item *item) | 2267 | struct xlog_recover_item *item) |
2258 | { | 2268 | { |
2259 | xfs_inode_log_format_t *in_f; | 2269 | xfs_inode_log_format_t *in_f; |
2260 | xfs_mount_t *mp = log->l_mp; | 2270 | xfs_mount_t *mp = log->l_mp; |
2261 | xfs_buf_t *bp; | 2271 | xfs_buf_t *bp; |
2262 | xfs_dinode_t *dip; | 2272 | xfs_dinode_t *dip; |
2263 | int len; | 2273 | int len; |
2264 | xfs_caddr_t src; | 2274 | xfs_caddr_t src; |
2265 | xfs_caddr_t dest; | 2275 | xfs_caddr_t dest; |
2266 | int error; | 2276 | int error; |
2267 | int attr_index; | 2277 | int attr_index; |
2268 | uint fields; | 2278 | uint fields; |
2269 | xfs_icdinode_t *dicp; | 2279 | xfs_icdinode_t *dicp; |
2270 | int need_free = 0; | 2280 | int need_free = 0; |
2271 | 2281 | ||
2272 | if (item->ri_buf[0].i_len == sizeof(xfs_inode_log_format_t)) { | 2282 | if (item->ri_buf[0].i_len == sizeof(xfs_inode_log_format_t)) { |
2273 | in_f = item->ri_buf[0].i_addr; | 2283 | in_f = item->ri_buf[0].i_addr; |
2274 | } else { | 2284 | } else { |
2275 | in_f = kmem_alloc(sizeof(xfs_inode_log_format_t), KM_SLEEP); | 2285 | in_f = kmem_alloc(sizeof(xfs_inode_log_format_t), KM_SLEEP); |
2276 | need_free = 1; | 2286 | need_free = 1; |
2277 | error = xfs_inode_item_format_convert(&item->ri_buf[0], in_f); | 2287 | error = xfs_inode_item_format_convert(&item->ri_buf[0], in_f); |
2278 | if (error) | 2288 | if (error) |
2279 | goto error; | 2289 | goto error; |
2280 | } | 2290 | } |
2281 | 2291 | ||
2282 | /* | 2292 | /* |
2283 | * Inode buffers can be freed, look out for it, | 2293 | * Inode buffers can be freed, look out for it, |
2284 | * and do not replay the inode. | 2294 | * and do not replay the inode. |
2285 | */ | 2295 | */ |
2286 | if (xlog_check_buffer_cancelled(log, in_f->ilf_blkno, | 2296 | if (xlog_check_buffer_cancelled(log, in_f->ilf_blkno, |
2287 | in_f->ilf_len, 0)) { | 2297 | in_f->ilf_len, 0)) { |
2288 | error = 0; | 2298 | error = 0; |
2289 | trace_xfs_log_recover_inode_cancel(log, in_f); | 2299 | trace_xfs_log_recover_inode_cancel(log, in_f); |
2290 | goto error; | 2300 | goto error; |
2291 | } | 2301 | } |
2292 | trace_xfs_log_recover_inode_recover(log, in_f); | 2302 | trace_xfs_log_recover_inode_recover(log, in_f); |
2293 | 2303 | ||
2294 | bp = xfs_buf_read(mp->m_ddev_targp, in_f->ilf_blkno, in_f->ilf_len, 0, | 2304 | bp = xfs_buf_read(mp->m_ddev_targp, in_f->ilf_blkno, in_f->ilf_len, 0, |
2295 | NULL); | 2305 | NULL); |
2296 | if (!bp) { | 2306 | if (!bp) { |
2297 | error = ENOMEM; | 2307 | error = ENOMEM; |
2298 | goto error; | 2308 | goto error; |
2299 | } | 2309 | } |
2300 | error = bp->b_error; | 2310 | error = bp->b_error; |
2301 | if (error) { | 2311 | if (error) { |
2302 | xfs_buf_ioerror_alert(bp, "xlog_recover_do..(read#2)"); | 2312 | xfs_buf_ioerror_alert(bp, "xlog_recover_do..(read#2)"); |
2303 | xfs_buf_relse(bp); | 2313 | xfs_buf_relse(bp); |
2304 | goto error; | 2314 | goto error; |
2305 | } | 2315 | } |
2306 | ASSERT(in_f->ilf_fields & XFS_ILOG_CORE); | 2316 | ASSERT(in_f->ilf_fields & XFS_ILOG_CORE); |
2307 | dip = (xfs_dinode_t *)xfs_buf_offset(bp, in_f->ilf_boffset); | 2317 | dip = (xfs_dinode_t *)xfs_buf_offset(bp, in_f->ilf_boffset); |
2308 | 2318 | ||
2309 | /* | 2319 | /* |
2310 | * Make sure the place we're flushing out to really looks | 2320 | * Make sure the place we're flushing out to really looks |
2311 | * like an inode! | 2321 | * like an inode! |
2312 | */ | 2322 | */ |
2313 | if (unlikely(dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC))) { | 2323 | if (unlikely(dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC))) { |
2314 | xfs_buf_relse(bp); | 2324 | xfs_buf_relse(bp); |
2315 | xfs_alert(mp, | 2325 | xfs_alert(mp, |
2316 | "%s: Bad inode magic number, dip = 0x%p, dino bp = 0x%p, ino = %Ld", | 2326 | "%s: Bad inode magic number, dip = 0x%p, dino bp = 0x%p, ino = %Ld", |
2317 | __func__, dip, bp, in_f->ilf_ino); | 2327 | __func__, dip, bp, in_f->ilf_ino); |
2318 | XFS_ERROR_REPORT("xlog_recover_inode_pass2(1)", | 2328 | XFS_ERROR_REPORT("xlog_recover_inode_pass2(1)", |
2319 | XFS_ERRLEVEL_LOW, mp); | 2329 | XFS_ERRLEVEL_LOW, mp); |
2320 | error = EFSCORRUPTED; | 2330 | error = EFSCORRUPTED; |
2321 | goto error; | 2331 | goto error; |
2322 | } | 2332 | } |
2323 | dicp = item->ri_buf[1].i_addr; | 2333 | dicp = item->ri_buf[1].i_addr; |
2324 | if (unlikely(dicp->di_magic != XFS_DINODE_MAGIC)) { | 2334 | if (unlikely(dicp->di_magic != XFS_DINODE_MAGIC)) { |
2325 | xfs_buf_relse(bp); | 2335 | xfs_buf_relse(bp); |
2326 | xfs_alert(mp, | 2336 | xfs_alert(mp, |
2327 | "%s: Bad inode log record, rec ptr 0x%p, ino %Ld", | 2337 | "%s: Bad inode log record, rec ptr 0x%p, ino %Ld", |
2328 | __func__, item, in_f->ilf_ino); | 2338 | __func__, item, in_f->ilf_ino); |
2329 | XFS_ERROR_REPORT("xlog_recover_inode_pass2(2)", | 2339 | XFS_ERROR_REPORT("xlog_recover_inode_pass2(2)", |
2330 | XFS_ERRLEVEL_LOW, mp); | 2340 | XFS_ERRLEVEL_LOW, mp); |
2331 | error = EFSCORRUPTED; | 2341 | error = EFSCORRUPTED; |
2332 | goto error; | 2342 | goto error; |
2333 | } | 2343 | } |
2334 | 2344 | ||
2335 | /* Skip replay when the on disk inode is newer than the log one */ | 2345 | /* Skip replay when the on disk inode is newer than the log one */ |
2336 | if (dicp->di_flushiter < be16_to_cpu(dip->di_flushiter)) { | 2346 | if (dicp->di_flushiter < be16_to_cpu(dip->di_flushiter)) { |
2337 | /* | 2347 | /* |
2338 | * Deal with the wrap case, DI_MAX_FLUSH is less | 2348 | * Deal with the wrap case, DI_MAX_FLUSH is less |
2339 | * than smaller numbers | 2349 | * than smaller numbers |
2340 | */ | 2350 | */ |
2341 | if (be16_to_cpu(dip->di_flushiter) == DI_MAX_FLUSH && | 2351 | if (be16_to_cpu(dip->di_flushiter) == DI_MAX_FLUSH && |
2342 | dicp->di_flushiter < (DI_MAX_FLUSH >> 1)) { | 2352 | dicp->di_flushiter < (DI_MAX_FLUSH >> 1)) { |
2343 | /* do nothing */ | 2353 | /* do nothing */ |
2344 | } else { | 2354 | } else { |
2345 | xfs_buf_relse(bp); | 2355 | xfs_buf_relse(bp); |
2346 | trace_xfs_log_recover_inode_skip(log, in_f); | 2356 | trace_xfs_log_recover_inode_skip(log, in_f); |
2347 | error = 0; | 2357 | error = 0; |
2348 | goto error; | 2358 | goto error; |
2349 | } | 2359 | } |
2350 | } | 2360 | } |
2351 | /* Take the opportunity to reset the flush iteration count */ | 2361 | /* Take the opportunity to reset the flush iteration count */ |
2352 | dicp->di_flushiter = 0; | 2362 | dicp->di_flushiter = 0; |
2353 | 2363 | ||
2354 | if (unlikely(S_ISREG(dicp->di_mode))) { | 2364 | if (unlikely(S_ISREG(dicp->di_mode))) { |
2355 | if ((dicp->di_format != XFS_DINODE_FMT_EXTENTS) && | 2365 | if ((dicp->di_format != XFS_DINODE_FMT_EXTENTS) && |
2356 | (dicp->di_format != XFS_DINODE_FMT_BTREE)) { | 2366 | (dicp->di_format != XFS_DINODE_FMT_BTREE)) { |
2357 | XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(3)", | 2367 | XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(3)", |
2358 | XFS_ERRLEVEL_LOW, mp, dicp); | 2368 | XFS_ERRLEVEL_LOW, mp, dicp); |
2359 | xfs_buf_relse(bp); | 2369 | xfs_buf_relse(bp); |
2360 | xfs_alert(mp, | 2370 | xfs_alert(mp, |
2361 | "%s: Bad regular inode log record, rec ptr 0x%p, " | 2371 | "%s: Bad regular inode log record, rec ptr 0x%p, " |
2362 | "ino ptr = 0x%p, ino bp = 0x%p, ino %Ld", | 2372 | "ino ptr = 0x%p, ino bp = 0x%p, ino %Ld", |
2363 | __func__, item, dip, bp, in_f->ilf_ino); | 2373 | __func__, item, dip, bp, in_f->ilf_ino); |
2364 | error = EFSCORRUPTED; | 2374 | error = EFSCORRUPTED; |
2365 | goto error; | 2375 | goto error; |
2366 | } | 2376 | } |
2367 | } else if (unlikely(S_ISDIR(dicp->di_mode))) { | 2377 | } else if (unlikely(S_ISDIR(dicp->di_mode))) { |
2368 | if ((dicp->di_format != XFS_DINODE_FMT_EXTENTS) && | 2378 | if ((dicp->di_format != XFS_DINODE_FMT_EXTENTS) && |
2369 | (dicp->di_format != XFS_DINODE_FMT_BTREE) && | 2379 | (dicp->di_format != XFS_DINODE_FMT_BTREE) && |
2370 | (dicp->di_format != XFS_DINODE_FMT_LOCAL)) { | 2380 | (dicp->di_format != XFS_DINODE_FMT_LOCAL)) { |
2371 | XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(4)", | 2381 | XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(4)", |
2372 | XFS_ERRLEVEL_LOW, mp, dicp); | 2382 | XFS_ERRLEVEL_LOW, mp, dicp); |
2373 | xfs_buf_relse(bp); | 2383 | xfs_buf_relse(bp); |
2374 | xfs_alert(mp, | 2384 | xfs_alert(mp, |
2375 | "%s: Bad dir inode log record, rec ptr 0x%p, " | 2385 | "%s: Bad dir inode log record, rec ptr 0x%p, " |
2376 | "ino ptr = 0x%p, ino bp = 0x%p, ino %Ld", | 2386 | "ino ptr = 0x%p, ino bp = 0x%p, ino %Ld", |
2377 | __func__, item, dip, bp, in_f->ilf_ino); | 2387 | __func__, item, dip, bp, in_f->ilf_ino); |
2378 | error = EFSCORRUPTED; | 2388 | error = EFSCORRUPTED; |
2379 | goto error; | 2389 | goto error; |
2380 | } | 2390 | } |
2381 | } | 2391 | } |
2382 | if (unlikely(dicp->di_nextents + dicp->di_anextents > dicp->di_nblocks)){ | 2392 | if (unlikely(dicp->di_nextents + dicp->di_anextents > dicp->di_nblocks)){ |
2383 | XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(5)", | 2393 | XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(5)", |
2384 | XFS_ERRLEVEL_LOW, mp, dicp); | 2394 | XFS_ERRLEVEL_LOW, mp, dicp); |
2385 | xfs_buf_relse(bp); | 2395 | xfs_buf_relse(bp); |
2386 | xfs_alert(mp, | 2396 | xfs_alert(mp, |
2387 | "%s: Bad inode log record, rec ptr 0x%p, dino ptr 0x%p, " | 2397 | "%s: Bad inode log record, rec ptr 0x%p, dino ptr 0x%p, " |
2388 | "dino bp 0x%p, ino %Ld, total extents = %d, nblocks = %Ld", | 2398 | "dino bp 0x%p, ino %Ld, total extents = %d, nblocks = %Ld", |
2389 | __func__, item, dip, bp, in_f->ilf_ino, | 2399 | __func__, item, dip, bp, in_f->ilf_ino, |
2390 | dicp->di_nextents + dicp->di_anextents, | 2400 | dicp->di_nextents + dicp->di_anextents, |
2391 | dicp->di_nblocks); | 2401 | dicp->di_nblocks); |
2392 | error = EFSCORRUPTED; | 2402 | error = EFSCORRUPTED; |
2393 | goto error; | 2403 | goto error; |
2394 | } | 2404 | } |
2395 | if (unlikely(dicp->di_forkoff > mp->m_sb.sb_inodesize)) { | 2405 | if (unlikely(dicp->di_forkoff > mp->m_sb.sb_inodesize)) { |
2396 | XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(6)", | 2406 | XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(6)", |
2397 | XFS_ERRLEVEL_LOW, mp, dicp); | 2407 | XFS_ERRLEVEL_LOW, mp, dicp); |
2398 | xfs_buf_relse(bp); | 2408 | xfs_buf_relse(bp); |
2399 | xfs_alert(mp, | 2409 | xfs_alert(mp, |
2400 | "%s: Bad inode log record, rec ptr 0x%p, dino ptr 0x%p, " | 2410 | "%s: Bad inode log record, rec ptr 0x%p, dino ptr 0x%p, " |
2401 | "dino bp 0x%p, ino %Ld, forkoff 0x%x", __func__, | 2411 | "dino bp 0x%p, ino %Ld, forkoff 0x%x", __func__, |
2402 | item, dip, bp, in_f->ilf_ino, dicp->di_forkoff); | 2412 | item, dip, bp, in_f->ilf_ino, dicp->di_forkoff); |
2403 | error = EFSCORRUPTED; | 2413 | error = EFSCORRUPTED; |
2404 | goto error; | 2414 | goto error; |
2405 | } | 2415 | } |
2406 | if (unlikely(item->ri_buf[1].i_len > sizeof(struct xfs_icdinode))) { | 2416 | if (unlikely(item->ri_buf[1].i_len > sizeof(struct xfs_icdinode))) { |
2407 | XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(7)", | 2417 | XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(7)", |
2408 | XFS_ERRLEVEL_LOW, mp, dicp); | 2418 | XFS_ERRLEVEL_LOW, mp, dicp); |
2409 | xfs_buf_relse(bp); | 2419 | xfs_buf_relse(bp); |
2410 | xfs_alert(mp, | 2420 | xfs_alert(mp, |
2411 | "%s: Bad inode log record length %d, rec ptr 0x%p", | 2421 | "%s: Bad inode log record length %d, rec ptr 0x%p", |
2412 | __func__, item->ri_buf[1].i_len, item); | 2422 | __func__, item->ri_buf[1].i_len, item); |
2413 | error = EFSCORRUPTED; | 2423 | error = EFSCORRUPTED; |
2414 | goto error; | 2424 | goto error; |
2415 | } | 2425 | } |
2416 | 2426 | ||
2417 | /* The core is in in-core format */ | 2427 | /* The core is in in-core format */ |
2418 | xfs_dinode_to_disk(dip, item->ri_buf[1].i_addr); | 2428 | xfs_dinode_to_disk(dip, item->ri_buf[1].i_addr); |
2419 | 2429 | ||
2420 | /* the rest is in on-disk format */ | 2430 | /* the rest is in on-disk format */ |
2421 | if (item->ri_buf[1].i_len > sizeof(struct xfs_icdinode)) { | 2431 | if (item->ri_buf[1].i_len > sizeof(struct xfs_icdinode)) { |
2422 | memcpy((xfs_caddr_t) dip + sizeof(struct xfs_icdinode), | 2432 | memcpy((xfs_caddr_t) dip + sizeof(struct xfs_icdinode), |
2423 | item->ri_buf[1].i_addr + sizeof(struct xfs_icdinode), | 2433 | item->ri_buf[1].i_addr + sizeof(struct xfs_icdinode), |
2424 | item->ri_buf[1].i_len - sizeof(struct xfs_icdinode)); | 2434 | item->ri_buf[1].i_len - sizeof(struct xfs_icdinode)); |
2425 | } | 2435 | } |
2426 | 2436 | ||
2427 | fields = in_f->ilf_fields; | 2437 | fields = in_f->ilf_fields; |
2428 | switch (fields & (XFS_ILOG_DEV | XFS_ILOG_UUID)) { | 2438 | switch (fields & (XFS_ILOG_DEV | XFS_ILOG_UUID)) { |
2429 | case XFS_ILOG_DEV: | 2439 | case XFS_ILOG_DEV: |
2430 | xfs_dinode_put_rdev(dip, in_f->ilf_u.ilfu_rdev); | 2440 | xfs_dinode_put_rdev(dip, in_f->ilf_u.ilfu_rdev); |
2431 | break; | 2441 | break; |
2432 | case XFS_ILOG_UUID: | 2442 | case XFS_ILOG_UUID: |
2433 | memcpy(XFS_DFORK_DPTR(dip), | 2443 | memcpy(XFS_DFORK_DPTR(dip), |
2434 | &in_f->ilf_u.ilfu_uuid, | 2444 | &in_f->ilf_u.ilfu_uuid, |
2435 | sizeof(uuid_t)); | 2445 | sizeof(uuid_t)); |
2436 | break; | 2446 | break; |
2437 | } | 2447 | } |
2438 | 2448 | ||
2439 | if (in_f->ilf_size == 2) | 2449 | if (in_f->ilf_size == 2) |
2440 | goto write_inode_buffer; | 2450 | goto write_inode_buffer; |
2441 | len = item->ri_buf[2].i_len; | 2451 | len = item->ri_buf[2].i_len; |
2442 | src = item->ri_buf[2].i_addr; | 2452 | src = item->ri_buf[2].i_addr; |
2443 | ASSERT(in_f->ilf_size <= 4); | 2453 | ASSERT(in_f->ilf_size <= 4); |
2444 | ASSERT((in_f->ilf_size == 3) || (fields & XFS_ILOG_AFORK)); | 2454 | ASSERT((in_f->ilf_size == 3) || (fields & XFS_ILOG_AFORK)); |
2445 | ASSERT(!(fields & XFS_ILOG_DFORK) || | 2455 | ASSERT(!(fields & XFS_ILOG_DFORK) || |
2446 | (len == in_f->ilf_dsize)); | 2456 | (len == in_f->ilf_dsize)); |
2447 | 2457 | ||
2448 | switch (fields & XFS_ILOG_DFORK) { | 2458 | switch (fields & XFS_ILOG_DFORK) { |
2449 | case XFS_ILOG_DDATA: | 2459 | case XFS_ILOG_DDATA: |
2450 | case XFS_ILOG_DEXT: | 2460 | case XFS_ILOG_DEXT: |
2451 | memcpy(XFS_DFORK_DPTR(dip), src, len); | 2461 | memcpy(XFS_DFORK_DPTR(dip), src, len); |
2452 | break; | 2462 | break; |
2453 | 2463 | ||
2454 | case XFS_ILOG_DBROOT: | 2464 | case XFS_ILOG_DBROOT: |
2455 | xfs_bmbt_to_bmdr(mp, (struct xfs_btree_block *)src, len, | 2465 | xfs_bmbt_to_bmdr(mp, (struct xfs_btree_block *)src, len, |
2456 | (xfs_bmdr_block_t *)XFS_DFORK_DPTR(dip), | 2466 | (xfs_bmdr_block_t *)XFS_DFORK_DPTR(dip), |
2457 | XFS_DFORK_DSIZE(dip, mp)); | 2467 | XFS_DFORK_DSIZE(dip, mp)); |
2458 | break; | 2468 | break; |
2459 | 2469 | ||
2460 | default: | 2470 | default: |
2461 | /* | 2471 | /* |
2462 | * There are no data fork flags set. | 2472 | * There are no data fork flags set. |
2463 | */ | 2473 | */ |
2464 | ASSERT((fields & XFS_ILOG_DFORK) == 0); | 2474 | ASSERT((fields & XFS_ILOG_DFORK) == 0); |
2465 | break; | 2475 | break; |
2466 | } | 2476 | } |
2467 | 2477 | ||
2468 | /* | 2478 | /* |
2469 | * If we logged any attribute data, recover it. There may or | 2479 | * If we logged any attribute data, recover it. There may or |
2470 | * may not have been any other non-core data logged in this | 2480 | * may not have been any other non-core data logged in this |
2471 | * transaction. | 2481 | * transaction. |
2472 | */ | 2482 | */ |
2473 | if (in_f->ilf_fields & XFS_ILOG_AFORK) { | 2483 | if (in_f->ilf_fields & XFS_ILOG_AFORK) { |
2474 | if (in_f->ilf_fields & XFS_ILOG_DFORK) { | 2484 | if (in_f->ilf_fields & XFS_ILOG_DFORK) { |
2475 | attr_index = 3; | 2485 | attr_index = 3; |
2476 | } else { | 2486 | } else { |
2477 | attr_index = 2; | 2487 | attr_index = 2; |
2478 | } | 2488 | } |
2479 | len = item->ri_buf[attr_index].i_len; | 2489 | len = item->ri_buf[attr_index].i_len; |
2480 | src = item->ri_buf[attr_index].i_addr; | 2490 | src = item->ri_buf[attr_index].i_addr; |
2481 | ASSERT(len == in_f->ilf_asize); | 2491 | ASSERT(len == in_f->ilf_asize); |
2482 | 2492 | ||
2483 | switch (in_f->ilf_fields & XFS_ILOG_AFORK) { | 2493 | switch (in_f->ilf_fields & XFS_ILOG_AFORK) { |
2484 | case XFS_ILOG_ADATA: | 2494 | case XFS_ILOG_ADATA: |
2485 | case XFS_ILOG_AEXT: | 2495 | case XFS_ILOG_AEXT: |
2486 | dest = XFS_DFORK_APTR(dip); | 2496 | dest = XFS_DFORK_APTR(dip); |
2487 | ASSERT(len <= XFS_DFORK_ASIZE(dip, mp)); | 2497 | ASSERT(len <= XFS_DFORK_ASIZE(dip, mp)); |
2488 | memcpy(dest, src, len); | 2498 | memcpy(dest, src, len); |
2489 | break; | 2499 | break; |
2490 | 2500 | ||
2491 | case XFS_ILOG_ABROOT: | 2501 | case XFS_ILOG_ABROOT: |
2492 | dest = XFS_DFORK_APTR(dip); | 2502 | dest = XFS_DFORK_APTR(dip); |
2493 | xfs_bmbt_to_bmdr(mp, (struct xfs_btree_block *)src, | 2503 | xfs_bmbt_to_bmdr(mp, (struct xfs_btree_block *)src, |
2494 | len, (xfs_bmdr_block_t*)dest, | 2504 | len, (xfs_bmdr_block_t*)dest, |
2495 | XFS_DFORK_ASIZE(dip, mp)); | 2505 | XFS_DFORK_ASIZE(dip, mp)); |
2496 | break; | 2506 | break; |
2497 | 2507 | ||
2498 | default: | 2508 | default: |
2499 | xfs_warn(log->l_mp, "%s: Invalid flag", __func__); | 2509 | xfs_warn(log->l_mp, "%s: Invalid flag", __func__); |
2500 | ASSERT(0); | 2510 | ASSERT(0); |
2501 | xfs_buf_relse(bp); | 2511 | xfs_buf_relse(bp); |
2502 | error = EIO; | 2512 | error = EIO; |
2503 | goto error; | 2513 | goto error; |
2504 | } | 2514 | } |
2505 | } | 2515 | } |
2506 | 2516 | ||
2507 | write_inode_buffer: | 2517 | write_inode_buffer: |
2508 | ASSERT(bp->b_target->bt_mount == mp); | 2518 | ASSERT(bp->b_target->bt_mount == mp); |
2509 | bp->b_iodone = xlog_recover_iodone; | 2519 | bp->b_iodone = xlog_recover_iodone; |
2510 | xfs_buf_delwri_queue(bp, buffer_list); | 2520 | xfs_buf_delwri_queue(bp, buffer_list); |
2511 | xfs_buf_relse(bp); | 2521 | xfs_buf_relse(bp); |
2512 | error: | 2522 | error: |
2513 | if (need_free) | 2523 | if (need_free) |
2514 | kmem_free(in_f); | 2524 | kmem_free(in_f); |
2515 | return XFS_ERROR(error); | 2525 | return XFS_ERROR(error); |
2516 | } | 2526 | } |
2517 | 2527 | ||
2518 | /* | 2528 | /* |
2519 | * Recover QUOTAOFF records. We simply make a note of it in the xlog | 2529 | * Recover QUOTAOFF records. We simply make a note of it in the xlog |
2520 | * structure, so that we know not to do any dquot item or dquot buffer recovery, | 2530 | * structure, so that we know not to do any dquot item or dquot buffer recovery, |
2521 | * of that type. | 2531 | * of that type. |
2522 | */ | 2532 | */ |
2523 | STATIC int | 2533 | STATIC int |
2524 | xlog_recover_quotaoff_pass1( | 2534 | xlog_recover_quotaoff_pass1( |
2525 | struct xlog *log, | 2535 | struct xlog *log, |
2526 | struct xlog_recover_item *item) | 2536 | struct xlog_recover_item *item) |
2527 | { | 2537 | { |
2528 | xfs_qoff_logformat_t *qoff_f = item->ri_buf[0].i_addr; | 2538 | xfs_qoff_logformat_t *qoff_f = item->ri_buf[0].i_addr; |
2529 | ASSERT(qoff_f); | 2539 | ASSERT(qoff_f); |
2530 | 2540 | ||
2531 | /* | 2541 | /* |
2532 | * The logitem format's flag tells us if this was user quotaoff, | 2542 | * The logitem format's flag tells us if this was user quotaoff, |
2533 | * group/project quotaoff or both. | 2543 | * group/project quotaoff or both. |
2534 | */ | 2544 | */ |
2535 | if (qoff_f->qf_flags & XFS_UQUOTA_ACCT) | 2545 | if (qoff_f->qf_flags & XFS_UQUOTA_ACCT) |
2536 | log->l_quotaoffs_flag |= XFS_DQ_USER; | 2546 | log->l_quotaoffs_flag |= XFS_DQ_USER; |
2537 | if (qoff_f->qf_flags & XFS_PQUOTA_ACCT) | 2547 | if (qoff_f->qf_flags & XFS_PQUOTA_ACCT) |
2538 | log->l_quotaoffs_flag |= XFS_DQ_PROJ; | 2548 | log->l_quotaoffs_flag |= XFS_DQ_PROJ; |
2539 | if (qoff_f->qf_flags & XFS_GQUOTA_ACCT) | 2549 | if (qoff_f->qf_flags & XFS_GQUOTA_ACCT) |
2540 | log->l_quotaoffs_flag |= XFS_DQ_GROUP; | 2550 | log->l_quotaoffs_flag |= XFS_DQ_GROUP; |
2541 | 2551 | ||
2542 | return (0); | 2552 | return (0); |
2543 | } | 2553 | } |
2544 | 2554 | ||
2545 | /* | 2555 | /* |
2546 | * Recover a dquot record | 2556 | * Recover a dquot record |
2547 | */ | 2557 | */ |
2548 | STATIC int | 2558 | STATIC int |
2549 | xlog_recover_dquot_pass2( | 2559 | xlog_recover_dquot_pass2( |
2550 | struct xlog *log, | 2560 | struct xlog *log, |
2551 | struct list_head *buffer_list, | 2561 | struct list_head *buffer_list, |
2552 | struct xlog_recover_item *item) | 2562 | struct xlog_recover_item *item) |
2553 | { | 2563 | { |
2554 | xfs_mount_t *mp = log->l_mp; | 2564 | xfs_mount_t *mp = log->l_mp; |
2555 | xfs_buf_t *bp; | 2565 | xfs_buf_t *bp; |
2556 | struct xfs_disk_dquot *ddq, *recddq; | 2566 | struct xfs_disk_dquot *ddq, *recddq; |
2557 | int error; | 2567 | int error; |
2558 | xfs_dq_logformat_t *dq_f; | 2568 | xfs_dq_logformat_t *dq_f; |
2559 | uint type; | 2569 | uint type; |
2560 | 2570 | ||
2561 | 2571 | ||
2562 | /* | 2572 | /* |
2563 | * Filesystems are required to send in quota flags at mount time. | 2573 | * Filesystems are required to send in quota flags at mount time. |
2564 | */ | 2574 | */ |
2565 | if (mp->m_qflags == 0) | 2575 | if (mp->m_qflags == 0) |
2566 | return (0); | 2576 | return (0); |
2567 | 2577 | ||
2568 | recddq = item->ri_buf[1].i_addr; | 2578 | recddq = item->ri_buf[1].i_addr; |
2569 | if (recddq == NULL) { | 2579 | if (recddq == NULL) { |
2570 | xfs_alert(log->l_mp, "NULL dquot in %s.", __func__); | 2580 | xfs_alert(log->l_mp, "NULL dquot in %s.", __func__); |
2571 | return XFS_ERROR(EIO); | 2581 | return XFS_ERROR(EIO); |
2572 | } | 2582 | } |
2573 | if (item->ri_buf[1].i_len < sizeof(xfs_disk_dquot_t)) { | 2583 | if (item->ri_buf[1].i_len < sizeof(xfs_disk_dquot_t)) { |
2574 | xfs_alert(log->l_mp, "dquot too small (%d) in %s.", | 2584 | xfs_alert(log->l_mp, "dquot too small (%d) in %s.", |
2575 | item->ri_buf[1].i_len, __func__); | 2585 | item->ri_buf[1].i_len, __func__); |
2576 | return XFS_ERROR(EIO); | 2586 | return XFS_ERROR(EIO); |
2577 | } | 2587 | } |
2578 | 2588 | ||
2579 | /* | 2589 | /* |
2580 | * This type of quotas was turned off, so ignore this record. | 2590 | * This type of quotas was turned off, so ignore this record. |
2581 | */ | 2591 | */ |
2582 | type = recddq->d_flags & (XFS_DQ_USER | XFS_DQ_PROJ | XFS_DQ_GROUP); | 2592 | type = recddq->d_flags & (XFS_DQ_USER | XFS_DQ_PROJ | XFS_DQ_GROUP); |
2583 | ASSERT(type); | 2593 | ASSERT(type); |
2584 | if (log->l_quotaoffs_flag & type) | 2594 | if (log->l_quotaoffs_flag & type) |
2585 | return (0); | 2595 | return (0); |
2586 | 2596 | ||
2587 | /* | 2597 | /* |
2588 | * At this point we know that quota was _not_ turned off. | 2598 | * At this point we know that quota was _not_ turned off. |
2589 | * Since the mount flags are not indicating to us otherwise, this | 2599 | * Since the mount flags are not indicating to us otherwise, this |
2590 | * must mean that quota is on, and the dquot needs to be replayed. | 2600 | * must mean that quota is on, and the dquot needs to be replayed. |
2591 | * Remember that we may not have fully recovered the superblock yet, | 2601 | * Remember that we may not have fully recovered the superblock yet, |
2592 | * so we can't do the usual trick of looking at the SB quota bits. | 2602 | * so we can't do the usual trick of looking at the SB quota bits. |
2593 | * | 2603 | * |
2594 | * The other possibility, of course, is that the quota subsystem was | 2604 | * The other possibility, of course, is that the quota subsystem was |
2595 | * removed since the last mount - ENOSYS. | 2605 | * removed since the last mount - ENOSYS. |
2596 | */ | 2606 | */ |
2597 | dq_f = item->ri_buf[0].i_addr; | 2607 | dq_f = item->ri_buf[0].i_addr; |
2598 | ASSERT(dq_f); | 2608 | ASSERT(dq_f); |
2599 | error = xfs_qm_dqcheck(mp, recddq, dq_f->qlf_id, 0, XFS_QMOPT_DOWARN, | 2609 | error = xfs_qm_dqcheck(mp, recddq, dq_f->qlf_id, 0, XFS_QMOPT_DOWARN, |
2600 | "xlog_recover_dquot_pass2 (log copy)"); | 2610 | "xlog_recover_dquot_pass2 (log copy)"); |
2601 | if (error) | 2611 | if (error) |
2602 | return XFS_ERROR(EIO); | 2612 | return XFS_ERROR(EIO); |
2603 | ASSERT(dq_f->qlf_len == 1); | 2613 | ASSERT(dq_f->qlf_len == 1); |
2604 | 2614 | ||
2605 | error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp, dq_f->qlf_blkno, | 2615 | error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp, dq_f->qlf_blkno, |
2606 | XFS_FSB_TO_BB(mp, dq_f->qlf_len), 0, &bp, | 2616 | XFS_FSB_TO_BB(mp, dq_f->qlf_len), 0, &bp, |
2607 | NULL); | 2617 | NULL); |
2608 | if (error) | 2618 | if (error) |
2609 | return error; | 2619 | return error; |
2610 | 2620 | ||
2611 | ASSERT(bp); | 2621 | ASSERT(bp); |
2612 | ddq = (xfs_disk_dquot_t *)xfs_buf_offset(bp, dq_f->qlf_boffset); | 2622 | ddq = (xfs_disk_dquot_t *)xfs_buf_offset(bp, dq_f->qlf_boffset); |
2613 | 2623 | ||
2614 | /* | 2624 | /* |
2615 | * At least the magic num portion should be on disk because this | 2625 | * At least the magic num portion should be on disk because this |
2616 | * was among a chunk of dquots created earlier, and we did some | 2626 | * was among a chunk of dquots created earlier, and we did some |
2617 | * minimal initialization then. | 2627 | * minimal initialization then. |
2618 | */ | 2628 | */ |
2619 | error = xfs_qm_dqcheck(mp, ddq, dq_f->qlf_id, 0, XFS_QMOPT_DOWARN, | 2629 | error = xfs_qm_dqcheck(mp, ddq, dq_f->qlf_id, 0, XFS_QMOPT_DOWARN, |
2620 | "xlog_recover_dquot_pass2"); | 2630 | "xlog_recover_dquot_pass2"); |
2621 | if (error) { | 2631 | if (error) { |
2622 | xfs_buf_relse(bp); | 2632 | xfs_buf_relse(bp); |
2623 | return XFS_ERROR(EIO); | 2633 | return XFS_ERROR(EIO); |
2624 | } | 2634 | } |
2625 | 2635 | ||
2626 | memcpy(ddq, recddq, item->ri_buf[1].i_len); | 2636 | memcpy(ddq, recddq, item->ri_buf[1].i_len); |
2627 | 2637 | ||
2628 | ASSERT(dq_f->qlf_size == 2); | 2638 | ASSERT(dq_f->qlf_size == 2); |
2629 | ASSERT(bp->b_target->bt_mount == mp); | 2639 | ASSERT(bp->b_target->bt_mount == mp); |
2630 | bp->b_iodone = xlog_recover_iodone; | 2640 | bp->b_iodone = xlog_recover_iodone; |
2631 | xfs_buf_delwri_queue(bp, buffer_list); | 2641 | xfs_buf_delwri_queue(bp, buffer_list); |
2632 | xfs_buf_relse(bp); | 2642 | xfs_buf_relse(bp); |
2633 | 2643 | ||
2634 | return (0); | 2644 | return (0); |
2635 | } | 2645 | } |
2636 | 2646 | ||
2637 | /* | 2647 | /* |
2638 | * This routine is called to create an in-core extent free intent | 2648 | * This routine is called to create an in-core extent free intent |
2639 | * item from the efi format structure which was logged on disk. | 2649 | * item from the efi format structure which was logged on disk. |
2640 | * It allocates an in-core efi, copies the extents from the format | 2650 | * It allocates an in-core efi, copies the extents from the format |
2641 | * structure into it, and adds the efi to the AIL with the given | 2651 | * structure into it, and adds the efi to the AIL with the given |
2642 | * LSN. | 2652 | * LSN. |
2643 | */ | 2653 | */ |
2644 | STATIC int | 2654 | STATIC int |
2645 | xlog_recover_efi_pass2( | 2655 | xlog_recover_efi_pass2( |
2646 | struct xlog *log, | 2656 | struct xlog *log, |
2647 | struct xlog_recover_item *item, | 2657 | struct xlog_recover_item *item, |
2648 | xfs_lsn_t lsn) | 2658 | xfs_lsn_t lsn) |
2649 | { | 2659 | { |
2650 | int error; | 2660 | int error; |
2651 | xfs_mount_t *mp = log->l_mp; | 2661 | xfs_mount_t *mp = log->l_mp; |
2652 | xfs_efi_log_item_t *efip; | 2662 | xfs_efi_log_item_t *efip; |
2653 | xfs_efi_log_format_t *efi_formatp; | 2663 | xfs_efi_log_format_t *efi_formatp; |
2654 | 2664 | ||
2655 | efi_formatp = item->ri_buf[0].i_addr; | 2665 | efi_formatp = item->ri_buf[0].i_addr; |
2656 | 2666 | ||
2657 | efip = xfs_efi_init(mp, efi_formatp->efi_nextents); | 2667 | efip = xfs_efi_init(mp, efi_formatp->efi_nextents); |
2658 | if ((error = xfs_efi_copy_format(&(item->ri_buf[0]), | 2668 | if ((error = xfs_efi_copy_format(&(item->ri_buf[0]), |
2659 | &(efip->efi_format)))) { | 2669 | &(efip->efi_format)))) { |
2660 | xfs_efi_item_free(efip); | 2670 | xfs_efi_item_free(efip); |
2661 | return error; | 2671 | return error; |
2662 | } | 2672 | } |
2663 | atomic_set(&efip->efi_next_extent, efi_formatp->efi_nextents); | 2673 | atomic_set(&efip->efi_next_extent, efi_formatp->efi_nextents); |
2664 | 2674 | ||
2665 | spin_lock(&log->l_ailp->xa_lock); | 2675 | spin_lock(&log->l_ailp->xa_lock); |
2666 | /* | 2676 | /* |
2667 | * xfs_trans_ail_update() drops the AIL lock. | 2677 | * xfs_trans_ail_update() drops the AIL lock. |
2668 | */ | 2678 | */ |
2669 | xfs_trans_ail_update(log->l_ailp, &efip->efi_item, lsn); | 2679 | xfs_trans_ail_update(log->l_ailp, &efip->efi_item, lsn); |
2670 | return 0; | 2680 | return 0; |
2671 | } | 2681 | } |
2672 | 2682 | ||
2673 | 2683 | ||
2674 | /* | 2684 | /* |
2675 | * This routine is called when an efd format structure is found in | 2685 | * This routine is called when an efd format structure is found in |
2676 | * a committed transaction in the log. It's purpose is to cancel | 2686 | * a committed transaction in the log. It's purpose is to cancel |
2677 | * the corresponding efi if it was still in the log. To do this | 2687 | * the corresponding efi if it was still in the log. To do this |
2678 | * it searches the AIL for the efi with an id equal to that in the | 2688 | * it searches the AIL for the efi with an id equal to that in the |
2679 | * efd format structure. If we find it, we remove the efi from the | 2689 | * efd format structure. If we find it, we remove the efi from the |
2680 | * AIL and free it. | 2690 | * AIL and free it. |
2681 | */ | 2691 | */ |
2682 | STATIC int | 2692 | STATIC int |
2683 | xlog_recover_efd_pass2( | 2693 | xlog_recover_efd_pass2( |
2684 | struct xlog *log, | 2694 | struct xlog *log, |
2685 | struct xlog_recover_item *item) | 2695 | struct xlog_recover_item *item) |
2686 | { | 2696 | { |
2687 | xfs_efd_log_format_t *efd_formatp; | 2697 | xfs_efd_log_format_t *efd_formatp; |
2688 | xfs_efi_log_item_t *efip = NULL; | 2698 | xfs_efi_log_item_t *efip = NULL; |
2689 | xfs_log_item_t *lip; | 2699 | xfs_log_item_t *lip; |
2690 | __uint64_t efi_id; | 2700 | __uint64_t efi_id; |
2691 | struct xfs_ail_cursor cur; | 2701 | struct xfs_ail_cursor cur; |
2692 | struct xfs_ail *ailp = log->l_ailp; | 2702 | struct xfs_ail *ailp = log->l_ailp; |
2693 | 2703 | ||
2694 | efd_formatp = item->ri_buf[0].i_addr; | 2704 | efd_formatp = item->ri_buf[0].i_addr; |
2695 | ASSERT((item->ri_buf[0].i_len == (sizeof(xfs_efd_log_format_32_t) + | 2705 | ASSERT((item->ri_buf[0].i_len == (sizeof(xfs_efd_log_format_32_t) + |
2696 | ((efd_formatp->efd_nextents - 1) * sizeof(xfs_extent_32_t)))) || | 2706 | ((efd_formatp->efd_nextents - 1) * sizeof(xfs_extent_32_t)))) || |
2697 | (item->ri_buf[0].i_len == (sizeof(xfs_efd_log_format_64_t) + | 2707 | (item->ri_buf[0].i_len == (sizeof(xfs_efd_log_format_64_t) + |
2698 | ((efd_formatp->efd_nextents - 1) * sizeof(xfs_extent_64_t))))); | 2708 | ((efd_formatp->efd_nextents - 1) * sizeof(xfs_extent_64_t))))); |
2699 | efi_id = efd_formatp->efd_efi_id; | 2709 | efi_id = efd_formatp->efd_efi_id; |
2700 | 2710 | ||
2701 | /* | 2711 | /* |
2702 | * Search for the efi with the id in the efd format structure | 2712 | * Search for the efi with the id in the efd format structure |
2703 | * in the AIL. | 2713 | * in the AIL. |
2704 | */ | 2714 | */ |
2705 | spin_lock(&ailp->xa_lock); | 2715 | spin_lock(&ailp->xa_lock); |
2706 | lip = xfs_trans_ail_cursor_first(ailp, &cur, 0); | 2716 | lip = xfs_trans_ail_cursor_first(ailp, &cur, 0); |
2707 | while (lip != NULL) { | 2717 | while (lip != NULL) { |
2708 | if (lip->li_type == XFS_LI_EFI) { | 2718 | if (lip->li_type == XFS_LI_EFI) { |
2709 | efip = (xfs_efi_log_item_t *)lip; | 2719 | efip = (xfs_efi_log_item_t *)lip; |
2710 | if (efip->efi_format.efi_id == efi_id) { | 2720 | if (efip->efi_format.efi_id == efi_id) { |
2711 | /* | 2721 | /* |
2712 | * xfs_trans_ail_delete() drops the | 2722 | * xfs_trans_ail_delete() drops the |
2713 | * AIL lock. | 2723 | * AIL lock. |
2714 | */ | 2724 | */ |
2715 | xfs_trans_ail_delete(ailp, lip, | 2725 | xfs_trans_ail_delete(ailp, lip, |
2716 | SHUTDOWN_CORRUPT_INCORE); | 2726 | SHUTDOWN_CORRUPT_INCORE); |
2717 | xfs_efi_item_free(efip); | 2727 | xfs_efi_item_free(efip); |
2718 | spin_lock(&ailp->xa_lock); | 2728 | spin_lock(&ailp->xa_lock); |
2719 | break; | 2729 | break; |
2720 | } | 2730 | } |
2721 | } | 2731 | } |
2722 | lip = xfs_trans_ail_cursor_next(ailp, &cur); | 2732 | lip = xfs_trans_ail_cursor_next(ailp, &cur); |
2723 | } | 2733 | } |
2724 | xfs_trans_ail_cursor_done(ailp, &cur); | 2734 | xfs_trans_ail_cursor_done(ailp, &cur); |
2725 | spin_unlock(&ailp->xa_lock); | 2735 | spin_unlock(&ailp->xa_lock); |
2726 | 2736 | ||
2727 | return 0; | 2737 | return 0; |
2728 | } | 2738 | } |
2729 | 2739 | ||
2730 | /* | 2740 | /* |
2731 | * Free up any resources allocated by the transaction | 2741 | * Free up any resources allocated by the transaction |
2732 | * | 2742 | * |
2733 | * Remember that EFIs, EFDs, and IUNLINKs are handled later. | 2743 | * Remember that EFIs, EFDs, and IUNLINKs are handled later. |
2734 | */ | 2744 | */ |
2735 | STATIC void | 2745 | STATIC void |
2736 | xlog_recover_free_trans( | 2746 | xlog_recover_free_trans( |
2737 | struct xlog_recover *trans) | 2747 | struct xlog_recover *trans) |
2738 | { | 2748 | { |
2739 | xlog_recover_item_t *item, *n; | 2749 | xlog_recover_item_t *item, *n; |
2740 | int i; | 2750 | int i; |
2741 | 2751 | ||
2742 | list_for_each_entry_safe(item, n, &trans->r_itemq, ri_list) { | 2752 | list_for_each_entry_safe(item, n, &trans->r_itemq, ri_list) { |
2743 | /* Free the regions in the item. */ | 2753 | /* Free the regions in the item. */ |
2744 | list_del(&item->ri_list); | 2754 | list_del(&item->ri_list); |
2745 | for (i = 0; i < item->ri_cnt; i++) | 2755 | for (i = 0; i < item->ri_cnt; i++) |
2746 | kmem_free(item->ri_buf[i].i_addr); | 2756 | kmem_free(item->ri_buf[i].i_addr); |
2747 | /* Free the item itself */ | 2757 | /* Free the item itself */ |
2748 | kmem_free(item->ri_buf); | 2758 | kmem_free(item->ri_buf); |
2749 | kmem_free(item); | 2759 | kmem_free(item); |
2750 | } | 2760 | } |
2751 | /* Free the transaction recover structure */ | 2761 | /* Free the transaction recover structure */ |
2752 | kmem_free(trans); | 2762 | kmem_free(trans); |
2753 | } | 2763 | } |
2754 | 2764 | ||
2755 | STATIC int | 2765 | STATIC int |
2756 | xlog_recover_commit_pass1( | 2766 | xlog_recover_commit_pass1( |
2757 | struct xlog *log, | 2767 | struct xlog *log, |
2758 | struct xlog_recover *trans, | 2768 | struct xlog_recover *trans, |
2759 | struct xlog_recover_item *item) | 2769 | struct xlog_recover_item *item) |
2760 | { | 2770 | { |
2761 | trace_xfs_log_recover_item_recover(log, trans, item, XLOG_RECOVER_PASS1); | 2771 | trace_xfs_log_recover_item_recover(log, trans, item, XLOG_RECOVER_PASS1); |
2762 | 2772 | ||
2763 | switch (ITEM_TYPE(item)) { | 2773 | switch (ITEM_TYPE(item)) { |
2764 | case XFS_LI_BUF: | 2774 | case XFS_LI_BUF: |
2765 | return xlog_recover_buffer_pass1(log, item); | 2775 | return xlog_recover_buffer_pass1(log, item); |
2766 | case XFS_LI_QUOTAOFF: | 2776 | case XFS_LI_QUOTAOFF: |
2767 | return xlog_recover_quotaoff_pass1(log, item); | 2777 | return xlog_recover_quotaoff_pass1(log, item); |
2768 | case XFS_LI_INODE: | 2778 | case XFS_LI_INODE: |
2769 | case XFS_LI_EFI: | 2779 | case XFS_LI_EFI: |
2770 | case XFS_LI_EFD: | 2780 | case XFS_LI_EFD: |
2771 | case XFS_LI_DQUOT: | 2781 | case XFS_LI_DQUOT: |
2772 | /* nothing to do in pass 1 */ | 2782 | /* nothing to do in pass 1 */ |
2773 | return 0; | 2783 | return 0; |
2774 | default: | 2784 | default: |
2775 | xfs_warn(log->l_mp, "%s: invalid item type (%d)", | 2785 | xfs_warn(log->l_mp, "%s: invalid item type (%d)", |
2776 | __func__, ITEM_TYPE(item)); | 2786 | __func__, ITEM_TYPE(item)); |
2777 | ASSERT(0); | 2787 | ASSERT(0); |
2778 | return XFS_ERROR(EIO); | 2788 | return XFS_ERROR(EIO); |
2779 | } | 2789 | } |
2780 | } | 2790 | } |
2781 | 2791 | ||
2782 | STATIC int | 2792 | STATIC int |
2783 | xlog_recover_commit_pass2( | 2793 | xlog_recover_commit_pass2( |
2784 | struct xlog *log, | 2794 | struct xlog *log, |
2785 | struct xlog_recover *trans, | 2795 | struct xlog_recover *trans, |
2786 | struct list_head *buffer_list, | 2796 | struct list_head *buffer_list, |
2787 | struct xlog_recover_item *item) | 2797 | struct xlog_recover_item *item) |
2788 | { | 2798 | { |
2789 | trace_xfs_log_recover_item_recover(log, trans, item, XLOG_RECOVER_PASS2); | 2799 | trace_xfs_log_recover_item_recover(log, trans, item, XLOG_RECOVER_PASS2); |
2790 | 2800 | ||
2791 | switch (ITEM_TYPE(item)) { | 2801 | switch (ITEM_TYPE(item)) { |
2792 | case XFS_LI_BUF: | 2802 | case XFS_LI_BUF: |
2793 | return xlog_recover_buffer_pass2(log, buffer_list, item); | 2803 | return xlog_recover_buffer_pass2(log, buffer_list, item); |
2794 | case XFS_LI_INODE: | 2804 | case XFS_LI_INODE: |
2795 | return xlog_recover_inode_pass2(log, buffer_list, item); | 2805 | return xlog_recover_inode_pass2(log, buffer_list, item); |
2796 | case XFS_LI_EFI: | 2806 | case XFS_LI_EFI: |
2797 | return xlog_recover_efi_pass2(log, item, trans->r_lsn); | 2807 | return xlog_recover_efi_pass2(log, item, trans->r_lsn); |
2798 | case XFS_LI_EFD: | 2808 | case XFS_LI_EFD: |
2799 | return xlog_recover_efd_pass2(log, item); | 2809 | return xlog_recover_efd_pass2(log, item); |
2800 | case XFS_LI_DQUOT: | 2810 | case XFS_LI_DQUOT: |
2801 | return xlog_recover_dquot_pass2(log, buffer_list, item); | 2811 | return xlog_recover_dquot_pass2(log, buffer_list, item); |
2802 | case XFS_LI_QUOTAOFF: | 2812 | case XFS_LI_QUOTAOFF: |
2803 | /* nothing to do in pass2 */ | 2813 | /* nothing to do in pass2 */ |
2804 | return 0; | 2814 | return 0; |
2805 | default: | 2815 | default: |
2806 | xfs_warn(log->l_mp, "%s: invalid item type (%d)", | 2816 | xfs_warn(log->l_mp, "%s: invalid item type (%d)", |
2807 | __func__, ITEM_TYPE(item)); | 2817 | __func__, ITEM_TYPE(item)); |
2808 | ASSERT(0); | 2818 | ASSERT(0); |
2809 | return XFS_ERROR(EIO); | 2819 | return XFS_ERROR(EIO); |
2810 | } | 2820 | } |
2811 | } | 2821 | } |
2812 | 2822 | ||
2813 | /* | 2823 | /* |
2814 | * Perform the transaction. | 2824 | * Perform the transaction. |
2815 | * | 2825 | * |
2816 | * If the transaction modifies a buffer or inode, do it now. Otherwise, | 2826 | * If the transaction modifies a buffer or inode, do it now. Otherwise, |
2817 | * EFIs and EFDs get queued up by adding entries into the AIL for them. | 2827 | * EFIs and EFDs get queued up by adding entries into the AIL for them. |
2818 | */ | 2828 | */ |
2819 | STATIC int | 2829 | STATIC int |
2820 | xlog_recover_commit_trans( | 2830 | xlog_recover_commit_trans( |
2821 | struct xlog *log, | 2831 | struct xlog *log, |
2822 | struct xlog_recover *trans, | 2832 | struct xlog_recover *trans, |
2823 | int pass) | 2833 | int pass) |
2824 | { | 2834 | { |
2825 | int error = 0, error2; | 2835 | int error = 0, error2; |
2826 | xlog_recover_item_t *item; | 2836 | xlog_recover_item_t *item; |
2827 | LIST_HEAD (buffer_list); | 2837 | LIST_HEAD (buffer_list); |
2828 | 2838 | ||
2829 | hlist_del(&trans->r_list); | 2839 | hlist_del(&trans->r_list); |
2830 | 2840 | ||
2831 | error = xlog_recover_reorder_trans(log, trans, pass); | 2841 | error = xlog_recover_reorder_trans(log, trans, pass); |
2832 | if (error) | 2842 | if (error) |
2833 | return error; | 2843 | return error; |
2834 | 2844 | ||
2835 | list_for_each_entry(item, &trans->r_itemq, ri_list) { | 2845 | list_for_each_entry(item, &trans->r_itemq, ri_list) { |
2836 | switch (pass) { | 2846 | switch (pass) { |
2837 | case XLOG_RECOVER_PASS1: | 2847 | case XLOG_RECOVER_PASS1: |
2838 | error = xlog_recover_commit_pass1(log, trans, item); | 2848 | error = xlog_recover_commit_pass1(log, trans, item); |
2839 | break; | 2849 | break; |
2840 | case XLOG_RECOVER_PASS2: | 2850 | case XLOG_RECOVER_PASS2: |
2841 | error = xlog_recover_commit_pass2(log, trans, | 2851 | error = xlog_recover_commit_pass2(log, trans, |
2842 | &buffer_list, item); | 2852 | &buffer_list, item); |
2843 | break; | 2853 | break; |
2844 | default: | 2854 | default: |
2845 | ASSERT(0); | 2855 | ASSERT(0); |
2846 | } | 2856 | } |
2847 | 2857 | ||
2848 | if (error) | 2858 | if (error) |
2849 | goto out; | 2859 | goto out; |
2850 | } | 2860 | } |
2851 | 2861 | ||
2852 | xlog_recover_free_trans(trans); | 2862 | xlog_recover_free_trans(trans); |
2853 | 2863 | ||
2854 | out: | 2864 | out: |
2855 | error2 = xfs_buf_delwri_submit(&buffer_list); | 2865 | error2 = xfs_buf_delwri_submit(&buffer_list); |
2856 | return error ? error : error2; | 2866 | return error ? error : error2; |
2857 | } | 2867 | } |
2858 | 2868 | ||
2859 | STATIC int | 2869 | STATIC int |
2860 | xlog_recover_unmount_trans( | 2870 | xlog_recover_unmount_trans( |
2861 | struct xlog *log, | 2871 | struct xlog *log, |
2862 | struct xlog_recover *trans) | 2872 | struct xlog_recover *trans) |
2863 | { | 2873 | { |
2864 | /* Do nothing now */ | 2874 | /* Do nothing now */ |
2865 | xfs_warn(log->l_mp, "%s: Unmount LR", __func__); | 2875 | xfs_warn(log->l_mp, "%s: Unmount LR", __func__); |
2866 | return 0; | 2876 | return 0; |
2867 | } | 2877 | } |
2868 | 2878 | ||
2869 | /* | 2879 | /* |
2870 | * There are two valid states of the r_state field. 0 indicates that the | 2880 | * There are two valid states of the r_state field. 0 indicates that the |
2871 | * transaction structure is in a normal state. We have either seen the | 2881 | * transaction structure is in a normal state. We have either seen the |
2872 | * start of the transaction or the last operation we added was not a partial | 2882 | * start of the transaction or the last operation we added was not a partial |
2873 | * operation. If the last operation we added to the transaction was a | 2883 | * operation. If the last operation we added to the transaction was a |
2874 | * partial operation, we need to mark r_state with XLOG_WAS_CONT_TRANS. | 2884 | * partial operation, we need to mark r_state with XLOG_WAS_CONT_TRANS. |
2875 | * | 2885 | * |
2876 | * NOTE: skip LRs with 0 data length. | 2886 | * NOTE: skip LRs with 0 data length. |
2877 | */ | 2887 | */ |
2878 | STATIC int | 2888 | STATIC int |
2879 | xlog_recover_process_data( | 2889 | xlog_recover_process_data( |
2880 | struct xlog *log, | 2890 | struct xlog *log, |
2881 | struct hlist_head rhash[], | 2891 | struct hlist_head rhash[], |
2882 | struct xlog_rec_header *rhead, | 2892 | struct xlog_rec_header *rhead, |
2883 | xfs_caddr_t dp, | 2893 | xfs_caddr_t dp, |
2884 | int pass) | 2894 | int pass) |
2885 | { | 2895 | { |
2886 | xfs_caddr_t lp; | 2896 | xfs_caddr_t lp; |
2887 | int num_logops; | 2897 | int num_logops; |
2888 | xlog_op_header_t *ohead; | 2898 | xlog_op_header_t *ohead; |
2889 | xlog_recover_t *trans; | 2899 | xlog_recover_t *trans; |
2890 | xlog_tid_t tid; | 2900 | xlog_tid_t tid; |
2891 | int error; | 2901 | int error; |
2892 | unsigned long hash; | 2902 | unsigned long hash; |
2893 | uint flags; | 2903 | uint flags; |
2894 | 2904 | ||
2895 | lp = dp + be32_to_cpu(rhead->h_len); | 2905 | lp = dp + be32_to_cpu(rhead->h_len); |
2896 | num_logops = be32_to_cpu(rhead->h_num_logops); | 2906 | num_logops = be32_to_cpu(rhead->h_num_logops); |
2897 | 2907 | ||
2898 | /* check the log format matches our own - else we can't recover */ | 2908 | /* check the log format matches our own - else we can't recover */ |
2899 | if (xlog_header_check_recover(log->l_mp, rhead)) | 2909 | if (xlog_header_check_recover(log->l_mp, rhead)) |
2900 | return (XFS_ERROR(EIO)); | 2910 | return (XFS_ERROR(EIO)); |
2901 | 2911 | ||
2902 | while ((dp < lp) && num_logops) { | 2912 | while ((dp < lp) && num_logops) { |
2903 | ASSERT(dp + sizeof(xlog_op_header_t) <= lp); | 2913 | ASSERT(dp + sizeof(xlog_op_header_t) <= lp); |
2904 | ohead = (xlog_op_header_t *)dp; | 2914 | ohead = (xlog_op_header_t *)dp; |
2905 | dp += sizeof(xlog_op_header_t); | 2915 | dp += sizeof(xlog_op_header_t); |
2906 | if (ohead->oh_clientid != XFS_TRANSACTION && | 2916 | if (ohead->oh_clientid != XFS_TRANSACTION && |
2907 | ohead->oh_clientid != XFS_LOG) { | 2917 | ohead->oh_clientid != XFS_LOG) { |
2908 | xfs_warn(log->l_mp, "%s: bad clientid 0x%x", | 2918 | xfs_warn(log->l_mp, "%s: bad clientid 0x%x", |
2909 | __func__, ohead->oh_clientid); | 2919 | __func__, ohead->oh_clientid); |
2910 | ASSERT(0); | 2920 | ASSERT(0); |
2911 | return (XFS_ERROR(EIO)); | 2921 | return (XFS_ERROR(EIO)); |
2912 | } | 2922 | } |
2913 | tid = be32_to_cpu(ohead->oh_tid); | 2923 | tid = be32_to_cpu(ohead->oh_tid); |
2914 | hash = XLOG_RHASH(tid); | 2924 | hash = XLOG_RHASH(tid); |
2915 | trans = xlog_recover_find_tid(&rhash[hash], tid); | 2925 | trans = xlog_recover_find_tid(&rhash[hash], tid); |
2916 | if (trans == NULL) { /* not found; add new tid */ | 2926 | if (trans == NULL) { /* not found; add new tid */ |
2917 | if (ohead->oh_flags & XLOG_START_TRANS) | 2927 | if (ohead->oh_flags & XLOG_START_TRANS) |
2918 | xlog_recover_new_tid(&rhash[hash], tid, | 2928 | xlog_recover_new_tid(&rhash[hash], tid, |
2919 | be64_to_cpu(rhead->h_lsn)); | 2929 | be64_to_cpu(rhead->h_lsn)); |
2920 | } else { | 2930 | } else { |
2921 | if (dp + be32_to_cpu(ohead->oh_len) > lp) { | 2931 | if (dp + be32_to_cpu(ohead->oh_len) > lp) { |
2922 | xfs_warn(log->l_mp, "%s: bad length 0x%x", | 2932 | xfs_warn(log->l_mp, "%s: bad length 0x%x", |
2923 | __func__, be32_to_cpu(ohead->oh_len)); | 2933 | __func__, be32_to_cpu(ohead->oh_len)); |
2924 | WARN_ON(1); | 2934 | WARN_ON(1); |
2925 | return (XFS_ERROR(EIO)); | 2935 | return (XFS_ERROR(EIO)); |
2926 | } | 2936 | } |
2927 | flags = ohead->oh_flags & ~XLOG_END_TRANS; | 2937 | flags = ohead->oh_flags & ~XLOG_END_TRANS; |
2928 | if (flags & XLOG_WAS_CONT_TRANS) | 2938 | if (flags & XLOG_WAS_CONT_TRANS) |
2929 | flags &= ~XLOG_CONTINUE_TRANS; | 2939 | flags &= ~XLOG_CONTINUE_TRANS; |
2930 | switch (flags) { | 2940 | switch (flags) { |
2931 | case XLOG_COMMIT_TRANS: | 2941 | case XLOG_COMMIT_TRANS: |
2932 | error = xlog_recover_commit_trans(log, | 2942 | error = xlog_recover_commit_trans(log, |
2933 | trans, pass); | 2943 | trans, pass); |
2934 | break; | 2944 | break; |
2935 | case XLOG_UNMOUNT_TRANS: | 2945 | case XLOG_UNMOUNT_TRANS: |
2936 | error = xlog_recover_unmount_trans(log, trans); | 2946 | error = xlog_recover_unmount_trans(log, trans); |
2937 | break; | 2947 | break; |
2938 | case XLOG_WAS_CONT_TRANS: | 2948 | case XLOG_WAS_CONT_TRANS: |
2939 | error = xlog_recover_add_to_cont_trans(log, | 2949 | error = xlog_recover_add_to_cont_trans(log, |
2940 | trans, dp, | 2950 | trans, dp, |
2941 | be32_to_cpu(ohead->oh_len)); | 2951 | be32_to_cpu(ohead->oh_len)); |
2942 | break; | 2952 | break; |
2943 | case XLOG_START_TRANS: | 2953 | case XLOG_START_TRANS: |
2944 | xfs_warn(log->l_mp, "%s: bad transaction", | 2954 | xfs_warn(log->l_mp, "%s: bad transaction", |
2945 | __func__); | 2955 | __func__); |
2946 | ASSERT(0); | 2956 | ASSERT(0); |
2947 | error = XFS_ERROR(EIO); | 2957 | error = XFS_ERROR(EIO); |
2948 | break; | 2958 | break; |
2949 | case 0: | 2959 | case 0: |
2950 | case XLOG_CONTINUE_TRANS: | 2960 | case XLOG_CONTINUE_TRANS: |
2951 | error = xlog_recover_add_to_trans(log, trans, | 2961 | error = xlog_recover_add_to_trans(log, trans, |
2952 | dp, be32_to_cpu(ohead->oh_len)); | 2962 | dp, be32_to_cpu(ohead->oh_len)); |
2953 | break; | 2963 | break; |
2954 | default: | 2964 | default: |
2955 | xfs_warn(log->l_mp, "%s: bad flag 0x%x", | 2965 | xfs_warn(log->l_mp, "%s: bad flag 0x%x", |
2956 | __func__, flags); | 2966 | __func__, flags); |
2957 | ASSERT(0); | 2967 | ASSERT(0); |
2958 | error = XFS_ERROR(EIO); | 2968 | error = XFS_ERROR(EIO); |
2959 | break; | 2969 | break; |
2960 | } | 2970 | } |
2961 | if (error) | 2971 | if (error) |
2962 | return error; | 2972 | return error; |
2963 | } | 2973 | } |
2964 | dp += be32_to_cpu(ohead->oh_len); | 2974 | dp += be32_to_cpu(ohead->oh_len); |
2965 | num_logops--; | 2975 | num_logops--; |
2966 | } | 2976 | } |
2967 | return 0; | 2977 | return 0; |
2968 | } | 2978 | } |
2969 | 2979 | ||
2970 | /* | 2980 | /* |
2971 | * Process an extent free intent item that was recovered from | 2981 | * Process an extent free intent item that was recovered from |
2972 | * the log. We need to free the extents that it describes. | 2982 | * the log. We need to free the extents that it describes. |
2973 | */ | 2983 | */ |
2974 | STATIC int | 2984 | STATIC int |
2975 | xlog_recover_process_efi( | 2985 | xlog_recover_process_efi( |
2976 | xfs_mount_t *mp, | 2986 | xfs_mount_t *mp, |
2977 | xfs_efi_log_item_t *efip) | 2987 | xfs_efi_log_item_t *efip) |
2978 | { | 2988 | { |
2979 | xfs_efd_log_item_t *efdp; | 2989 | xfs_efd_log_item_t *efdp; |
2980 | xfs_trans_t *tp; | 2990 | xfs_trans_t *tp; |
2981 | int i; | 2991 | int i; |
2982 | int error = 0; | 2992 | int error = 0; |
2983 | xfs_extent_t *extp; | 2993 | xfs_extent_t *extp; |
2984 | xfs_fsblock_t startblock_fsb; | 2994 | xfs_fsblock_t startblock_fsb; |
2985 | 2995 | ||
2986 | ASSERT(!test_bit(XFS_EFI_RECOVERED, &efip->efi_flags)); | 2996 | ASSERT(!test_bit(XFS_EFI_RECOVERED, &efip->efi_flags)); |
2987 | 2997 | ||
2988 | /* | 2998 | /* |
2989 | * First check the validity of the extents described by the | 2999 | * First check the validity of the extents described by the |
2990 | * EFI. If any are bad, then assume that all are bad and | 3000 | * EFI. If any are bad, then assume that all are bad and |
2991 | * just toss the EFI. | 3001 | * just toss the EFI. |
2992 | */ | 3002 | */ |
2993 | for (i = 0; i < efip->efi_format.efi_nextents; i++) { | 3003 | for (i = 0; i < efip->efi_format.efi_nextents; i++) { |
2994 | extp = &(efip->efi_format.efi_extents[i]); | 3004 | extp = &(efip->efi_format.efi_extents[i]); |
2995 | startblock_fsb = XFS_BB_TO_FSB(mp, | 3005 | startblock_fsb = XFS_BB_TO_FSB(mp, |
2996 | XFS_FSB_TO_DADDR(mp, extp->ext_start)); | 3006 | XFS_FSB_TO_DADDR(mp, extp->ext_start)); |
2997 | if ((startblock_fsb == 0) || | 3007 | if ((startblock_fsb == 0) || |
2998 | (extp->ext_len == 0) || | 3008 | (extp->ext_len == 0) || |
2999 | (startblock_fsb >= mp->m_sb.sb_dblocks) || | 3009 | (startblock_fsb >= mp->m_sb.sb_dblocks) || |
3000 | (extp->ext_len >= mp->m_sb.sb_agblocks)) { | 3010 | (extp->ext_len >= mp->m_sb.sb_agblocks)) { |
3001 | /* | 3011 | /* |
3002 | * This will pull the EFI from the AIL and | 3012 | * This will pull the EFI from the AIL and |
3003 | * free the memory associated with it. | 3013 | * free the memory associated with it. |
3004 | */ | 3014 | */ |
3005 | set_bit(XFS_EFI_RECOVERED, &efip->efi_flags); | 3015 | set_bit(XFS_EFI_RECOVERED, &efip->efi_flags); |
3006 | xfs_efi_release(efip, efip->efi_format.efi_nextents); | 3016 | xfs_efi_release(efip, efip->efi_format.efi_nextents); |
3007 | return XFS_ERROR(EIO); | 3017 | return XFS_ERROR(EIO); |
3008 | } | 3018 | } |
3009 | } | 3019 | } |
3010 | 3020 | ||
3011 | tp = xfs_trans_alloc(mp, 0); | 3021 | tp = xfs_trans_alloc(mp, 0); |
3012 | error = xfs_trans_reserve(tp, 0, XFS_ITRUNCATE_LOG_RES(mp), 0, 0, 0); | 3022 | error = xfs_trans_reserve(tp, 0, XFS_ITRUNCATE_LOG_RES(mp), 0, 0, 0); |
3013 | if (error) | 3023 | if (error) |
3014 | goto abort_error; | 3024 | goto abort_error; |
3015 | efdp = xfs_trans_get_efd(tp, efip, efip->efi_format.efi_nextents); | 3025 | efdp = xfs_trans_get_efd(tp, efip, efip->efi_format.efi_nextents); |
3016 | 3026 | ||
3017 | for (i = 0; i < efip->efi_format.efi_nextents; i++) { | 3027 | for (i = 0; i < efip->efi_format.efi_nextents; i++) { |
3018 | extp = &(efip->efi_format.efi_extents[i]); | 3028 | extp = &(efip->efi_format.efi_extents[i]); |
3019 | error = xfs_free_extent(tp, extp->ext_start, extp->ext_len); | 3029 | error = xfs_free_extent(tp, extp->ext_start, extp->ext_len); |
3020 | if (error) | 3030 | if (error) |
3021 | goto abort_error; | 3031 | goto abort_error; |
3022 | xfs_trans_log_efd_extent(tp, efdp, extp->ext_start, | 3032 | xfs_trans_log_efd_extent(tp, efdp, extp->ext_start, |
3023 | extp->ext_len); | 3033 | extp->ext_len); |
3024 | } | 3034 | } |
3025 | 3035 | ||
3026 | set_bit(XFS_EFI_RECOVERED, &efip->efi_flags); | 3036 | set_bit(XFS_EFI_RECOVERED, &efip->efi_flags); |
3027 | error = xfs_trans_commit(tp, 0); | 3037 | error = xfs_trans_commit(tp, 0); |
3028 | return error; | 3038 | return error; |
3029 | 3039 | ||
3030 | abort_error: | 3040 | abort_error: |
3031 | xfs_trans_cancel(tp, XFS_TRANS_ABORT); | 3041 | xfs_trans_cancel(tp, XFS_TRANS_ABORT); |
3032 | return error; | 3042 | return error; |
3033 | } | 3043 | } |
3034 | 3044 | ||
3035 | /* | 3045 | /* |
3036 | * When this is called, all of the EFIs which did not have | 3046 | * When this is called, all of the EFIs which did not have |
3037 | * corresponding EFDs should be in the AIL. What we do now | 3047 | * corresponding EFDs should be in the AIL. What we do now |
3038 | * is free the extents associated with each one. | 3048 | * is free the extents associated with each one. |
3039 | * | 3049 | * |
3040 | * Since we process the EFIs in normal transactions, they | 3050 | * Since we process the EFIs in normal transactions, they |
3041 | * will be removed at some point after the commit. This prevents | 3051 | * will be removed at some point after the commit. This prevents |
3042 | * us from just walking down the list processing each one. | 3052 | * us from just walking down the list processing each one. |
3043 | * We'll use a flag in the EFI to skip those that we've already | 3053 | * We'll use a flag in the EFI to skip those that we've already |
3044 | * processed and use the AIL iteration mechanism's generation | 3054 | * processed and use the AIL iteration mechanism's generation |
3045 | * count to try to speed this up at least a bit. | 3055 | * count to try to speed this up at least a bit. |
3046 | * | 3056 | * |
3047 | * When we start, we know that the EFIs are the only things in | 3057 | * When we start, we know that the EFIs are the only things in |
3048 | * the AIL. As we process them, however, other items are added | 3058 | * the AIL. As we process them, however, other items are added |
3049 | * to the AIL. Since everything added to the AIL must come after | 3059 | * to the AIL. Since everything added to the AIL must come after |
3050 | * everything already in the AIL, we stop processing as soon as | 3060 | * everything already in the AIL, we stop processing as soon as |
3051 | * we see something other than an EFI in the AIL. | 3061 | * we see something other than an EFI in the AIL. |
3052 | */ | 3062 | */ |
3053 | STATIC int | 3063 | STATIC int |
3054 | xlog_recover_process_efis( | 3064 | xlog_recover_process_efis( |
3055 | struct xlog *log) | 3065 | struct xlog *log) |
3056 | { | 3066 | { |
3057 | xfs_log_item_t *lip; | 3067 | xfs_log_item_t *lip; |
3058 | xfs_efi_log_item_t *efip; | 3068 | xfs_efi_log_item_t *efip; |
3059 | int error = 0; | 3069 | int error = 0; |
3060 | struct xfs_ail_cursor cur; | 3070 | struct xfs_ail_cursor cur; |
3061 | struct xfs_ail *ailp; | 3071 | struct xfs_ail *ailp; |
3062 | 3072 | ||
3063 | ailp = log->l_ailp; | 3073 | ailp = log->l_ailp; |
3064 | spin_lock(&ailp->xa_lock); | 3074 | spin_lock(&ailp->xa_lock); |
3065 | lip = xfs_trans_ail_cursor_first(ailp, &cur, 0); | 3075 | lip = xfs_trans_ail_cursor_first(ailp, &cur, 0); |
3066 | while (lip != NULL) { | 3076 | while (lip != NULL) { |
3067 | /* | 3077 | /* |
3068 | * We're done when we see something other than an EFI. | 3078 | * We're done when we see something other than an EFI. |
3069 | * There should be no EFIs left in the AIL now. | 3079 | * There should be no EFIs left in the AIL now. |
3070 | */ | 3080 | */ |
3071 | if (lip->li_type != XFS_LI_EFI) { | 3081 | if (lip->li_type != XFS_LI_EFI) { |
3072 | #ifdef DEBUG | 3082 | #ifdef DEBUG |
3073 | for (; lip; lip = xfs_trans_ail_cursor_next(ailp, &cur)) | 3083 | for (; lip; lip = xfs_trans_ail_cursor_next(ailp, &cur)) |
3074 | ASSERT(lip->li_type != XFS_LI_EFI); | 3084 | ASSERT(lip->li_type != XFS_LI_EFI); |
3075 | #endif | 3085 | #endif |
3076 | break; | 3086 | break; |
3077 | } | 3087 | } |
3078 | 3088 | ||
3079 | /* | 3089 | /* |
3080 | * Skip EFIs that we've already processed. | 3090 | * Skip EFIs that we've already processed. |
3081 | */ | 3091 | */ |
3082 | efip = (xfs_efi_log_item_t *)lip; | 3092 | efip = (xfs_efi_log_item_t *)lip; |
3083 | if (test_bit(XFS_EFI_RECOVERED, &efip->efi_flags)) { | 3093 | if (test_bit(XFS_EFI_RECOVERED, &efip->efi_flags)) { |
3084 | lip = xfs_trans_ail_cursor_next(ailp, &cur); | 3094 | lip = xfs_trans_ail_cursor_next(ailp, &cur); |
3085 | continue; | 3095 | continue; |
3086 | } | 3096 | } |
3087 | 3097 | ||
3088 | spin_unlock(&ailp->xa_lock); | 3098 | spin_unlock(&ailp->xa_lock); |
3089 | error = xlog_recover_process_efi(log->l_mp, efip); | 3099 | error = xlog_recover_process_efi(log->l_mp, efip); |
3090 | spin_lock(&ailp->xa_lock); | 3100 | spin_lock(&ailp->xa_lock); |
3091 | if (error) | 3101 | if (error) |
3092 | goto out; | 3102 | goto out; |
3093 | lip = xfs_trans_ail_cursor_next(ailp, &cur); | 3103 | lip = xfs_trans_ail_cursor_next(ailp, &cur); |
3094 | } | 3104 | } |
3095 | out: | 3105 | out: |
3096 | xfs_trans_ail_cursor_done(ailp, &cur); | 3106 | xfs_trans_ail_cursor_done(ailp, &cur); |
3097 | spin_unlock(&ailp->xa_lock); | 3107 | spin_unlock(&ailp->xa_lock); |
3098 | return error; | 3108 | return error; |
3099 | } | 3109 | } |
3100 | 3110 | ||
3101 | /* | 3111 | /* |
3102 | * This routine performs a transaction to null out a bad inode pointer | 3112 | * This routine performs a transaction to null out a bad inode pointer |
3103 | * in an agi unlinked inode hash bucket. | 3113 | * in an agi unlinked inode hash bucket. |
3104 | */ | 3114 | */ |
3105 | STATIC void | 3115 | STATIC void |
3106 | xlog_recover_clear_agi_bucket( | 3116 | xlog_recover_clear_agi_bucket( |
3107 | xfs_mount_t *mp, | 3117 | xfs_mount_t *mp, |
3108 | xfs_agnumber_t agno, | 3118 | xfs_agnumber_t agno, |
3109 | int bucket) | 3119 | int bucket) |
3110 | { | 3120 | { |
3111 | xfs_trans_t *tp; | 3121 | xfs_trans_t *tp; |
3112 | xfs_agi_t *agi; | 3122 | xfs_agi_t *agi; |
3113 | xfs_buf_t *agibp; | 3123 | xfs_buf_t *agibp; |
3114 | int offset; | 3124 | int offset; |
3115 | int error; | 3125 | int error; |
3116 | 3126 | ||
3117 | tp = xfs_trans_alloc(mp, XFS_TRANS_CLEAR_AGI_BUCKET); | 3127 | tp = xfs_trans_alloc(mp, XFS_TRANS_CLEAR_AGI_BUCKET); |
3118 | error = xfs_trans_reserve(tp, 0, XFS_CLEAR_AGI_BUCKET_LOG_RES(mp), | 3128 | error = xfs_trans_reserve(tp, 0, XFS_CLEAR_AGI_BUCKET_LOG_RES(mp), |
3119 | 0, 0, 0); | 3129 | 0, 0, 0); |
3120 | if (error) | 3130 | if (error) |
3121 | goto out_abort; | 3131 | goto out_abort; |
3122 | 3132 | ||
3123 | error = xfs_read_agi(mp, tp, agno, &agibp); | 3133 | error = xfs_read_agi(mp, tp, agno, &agibp); |
3124 | if (error) | 3134 | if (error) |
3125 | goto out_abort; | 3135 | goto out_abort; |
3126 | 3136 | ||
3127 | agi = XFS_BUF_TO_AGI(agibp); | 3137 | agi = XFS_BUF_TO_AGI(agibp); |
3128 | agi->agi_unlinked[bucket] = cpu_to_be32(NULLAGINO); | 3138 | agi->agi_unlinked[bucket] = cpu_to_be32(NULLAGINO); |
3129 | offset = offsetof(xfs_agi_t, agi_unlinked) + | 3139 | offset = offsetof(xfs_agi_t, agi_unlinked) + |
3130 | (sizeof(xfs_agino_t) * bucket); | 3140 | (sizeof(xfs_agino_t) * bucket); |
3131 | xfs_trans_log_buf(tp, agibp, offset, | 3141 | xfs_trans_log_buf(tp, agibp, offset, |
3132 | (offset + sizeof(xfs_agino_t) - 1)); | 3142 | (offset + sizeof(xfs_agino_t) - 1)); |
3133 | 3143 | ||
3134 | error = xfs_trans_commit(tp, 0); | 3144 | error = xfs_trans_commit(tp, 0); |
3135 | if (error) | 3145 | if (error) |
3136 | goto out_error; | 3146 | goto out_error; |
3137 | return; | 3147 | return; |
3138 | 3148 | ||
3139 | out_abort: | 3149 | out_abort: |
3140 | xfs_trans_cancel(tp, XFS_TRANS_ABORT); | 3150 | xfs_trans_cancel(tp, XFS_TRANS_ABORT); |
3141 | out_error: | 3151 | out_error: |
3142 | xfs_warn(mp, "%s: failed to clear agi %d. Continuing.", __func__, agno); | 3152 | xfs_warn(mp, "%s: failed to clear agi %d. Continuing.", __func__, agno); |
3143 | return; | 3153 | return; |
3144 | } | 3154 | } |
3145 | 3155 | ||
3146 | STATIC xfs_agino_t | 3156 | STATIC xfs_agino_t |
3147 | xlog_recover_process_one_iunlink( | 3157 | xlog_recover_process_one_iunlink( |
3148 | struct xfs_mount *mp, | 3158 | struct xfs_mount *mp, |
3149 | xfs_agnumber_t agno, | 3159 | xfs_agnumber_t agno, |
3150 | xfs_agino_t agino, | 3160 | xfs_agino_t agino, |
3151 | int bucket) | 3161 | int bucket) |
3152 | { | 3162 | { |
3153 | struct xfs_buf *ibp; | 3163 | struct xfs_buf *ibp; |
3154 | struct xfs_dinode *dip; | 3164 | struct xfs_dinode *dip; |
3155 | struct xfs_inode *ip; | 3165 | struct xfs_inode *ip; |
3156 | xfs_ino_t ino; | 3166 | xfs_ino_t ino; |
3157 | int error; | 3167 | int error; |
3158 | 3168 | ||
3159 | ino = XFS_AGINO_TO_INO(mp, agno, agino); | 3169 | ino = XFS_AGINO_TO_INO(mp, agno, agino); |
3160 | error = xfs_iget(mp, NULL, ino, 0, 0, &ip); | 3170 | error = xfs_iget(mp, NULL, ino, 0, 0, &ip); |
3161 | if (error) | 3171 | if (error) |
3162 | goto fail; | 3172 | goto fail; |
3163 | 3173 | ||
3164 | /* | 3174 | /* |
3165 | * Get the on disk inode to find the next inode in the bucket. | 3175 | * Get the on disk inode to find the next inode in the bucket. |
3166 | */ | 3176 | */ |
3167 | error = xfs_imap_to_bp(mp, NULL, &ip->i_imap, &dip, &ibp, 0, 0); | 3177 | error = xfs_imap_to_bp(mp, NULL, &ip->i_imap, &dip, &ibp, 0, 0); |
3168 | if (error) | 3178 | if (error) |
3169 | goto fail_iput; | 3179 | goto fail_iput; |
3170 | 3180 | ||
3171 | ASSERT(ip->i_d.di_nlink == 0); | 3181 | ASSERT(ip->i_d.di_nlink == 0); |
3172 | ASSERT(ip->i_d.di_mode != 0); | 3182 | ASSERT(ip->i_d.di_mode != 0); |
3173 | 3183 | ||
3174 | /* setup for the next pass */ | 3184 | /* setup for the next pass */ |
3175 | agino = be32_to_cpu(dip->di_next_unlinked); | 3185 | agino = be32_to_cpu(dip->di_next_unlinked); |
3176 | xfs_buf_relse(ibp); | 3186 | xfs_buf_relse(ibp); |
3177 | 3187 | ||
3178 | /* | 3188 | /* |
3179 | * Prevent any DMAPI event from being sent when the reference on | 3189 | * Prevent any DMAPI event from being sent when the reference on |
3180 | * the inode is dropped. | 3190 | * the inode is dropped. |
3181 | */ | 3191 | */ |
3182 | ip->i_d.di_dmevmask = 0; | 3192 | ip->i_d.di_dmevmask = 0; |
3183 | 3193 | ||
3184 | IRELE(ip); | 3194 | IRELE(ip); |
3185 | return agino; | 3195 | return agino; |
3186 | 3196 | ||
3187 | fail_iput: | 3197 | fail_iput: |
3188 | IRELE(ip); | 3198 | IRELE(ip); |
3189 | fail: | 3199 | fail: |
3190 | /* | 3200 | /* |
3191 | * We can't read in the inode this bucket points to, or this inode | 3201 | * We can't read in the inode this bucket points to, or this inode |
3192 | * is messed up. Just ditch this bucket of inodes. We will lose | 3202 | * is messed up. Just ditch this bucket of inodes. We will lose |
3193 | * some inodes and space, but at least we won't hang. | 3203 | * some inodes and space, but at least we won't hang. |
3194 | * | 3204 | * |
3195 | * Call xlog_recover_clear_agi_bucket() to perform a transaction to | 3205 | * Call xlog_recover_clear_agi_bucket() to perform a transaction to |
3196 | * clear the inode pointer in the bucket. | 3206 | * clear the inode pointer in the bucket. |
3197 | */ | 3207 | */ |
3198 | xlog_recover_clear_agi_bucket(mp, agno, bucket); | 3208 | xlog_recover_clear_agi_bucket(mp, agno, bucket); |
3199 | return NULLAGINO; | 3209 | return NULLAGINO; |
3200 | } | 3210 | } |
3201 | 3211 | ||
3202 | /* | 3212 | /* |
3203 | * xlog_iunlink_recover | 3213 | * xlog_iunlink_recover |
3204 | * | 3214 | * |
3205 | * This is called during recovery to process any inodes which | 3215 | * This is called during recovery to process any inodes which |
3206 | * we unlinked but not freed when the system crashed. These | 3216 | * we unlinked but not freed when the system crashed. These |
3207 | * inodes will be on the lists in the AGI blocks. What we do | 3217 | * inodes will be on the lists in the AGI blocks. What we do |
3208 | * here is scan all the AGIs and fully truncate and free any | 3218 | * here is scan all the AGIs and fully truncate and free any |
3209 | * inodes found on the lists. Each inode is removed from the | 3219 | * inodes found on the lists. Each inode is removed from the |
3210 | * lists when it has been fully truncated and is freed. The | 3220 | * lists when it has been fully truncated and is freed. The |
3211 | * freeing of the inode and its removal from the list must be | 3221 | * freeing of the inode and its removal from the list must be |
3212 | * atomic. | 3222 | * atomic. |
3213 | */ | 3223 | */ |
3214 | STATIC void | 3224 | STATIC void |
3215 | xlog_recover_process_iunlinks( | 3225 | xlog_recover_process_iunlinks( |
3216 | struct xlog *log) | 3226 | struct xlog *log) |
3217 | { | 3227 | { |
3218 | xfs_mount_t *mp; | 3228 | xfs_mount_t *mp; |
3219 | xfs_agnumber_t agno; | 3229 | xfs_agnumber_t agno; |
3220 | xfs_agi_t *agi; | 3230 | xfs_agi_t *agi; |
3221 | xfs_buf_t *agibp; | 3231 | xfs_buf_t *agibp; |
3222 | xfs_agino_t agino; | 3232 | xfs_agino_t agino; |
3223 | int bucket; | 3233 | int bucket; |
3224 | int error; | 3234 | int error; |
3225 | uint mp_dmevmask; | 3235 | uint mp_dmevmask; |
3226 | 3236 | ||
3227 | mp = log->l_mp; | 3237 | mp = log->l_mp; |
3228 | 3238 | ||
3229 | /* | 3239 | /* |
3230 | * Prevent any DMAPI event from being sent while in this function. | 3240 | * Prevent any DMAPI event from being sent while in this function. |
3231 | */ | 3241 | */ |
3232 | mp_dmevmask = mp->m_dmevmask; | 3242 | mp_dmevmask = mp->m_dmevmask; |
3233 | mp->m_dmevmask = 0; | 3243 | mp->m_dmevmask = 0; |
3234 | 3244 | ||
3235 | for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) { | 3245 | for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) { |
3236 | /* | 3246 | /* |
3237 | * Find the agi for this ag. | 3247 | * Find the agi for this ag. |
3238 | */ | 3248 | */ |
3239 | error = xfs_read_agi(mp, NULL, agno, &agibp); | 3249 | error = xfs_read_agi(mp, NULL, agno, &agibp); |
3240 | if (error) { | 3250 | if (error) { |
3241 | /* | 3251 | /* |
3242 | * AGI is b0rked. Don't process it. | 3252 | * AGI is b0rked. Don't process it. |
3243 | * | 3253 | * |
3244 | * We should probably mark the filesystem as corrupt | 3254 | * We should probably mark the filesystem as corrupt |
3245 | * after we've recovered all the ag's we can.... | 3255 | * after we've recovered all the ag's we can.... |
3246 | */ | 3256 | */ |
3247 | continue; | 3257 | continue; |
3248 | } | 3258 | } |
3249 | /* | 3259 | /* |
3250 | * Unlock the buffer so that it can be acquired in the normal | 3260 | * Unlock the buffer so that it can be acquired in the normal |
3251 | * course of the transaction to truncate and free each inode. | 3261 | * course of the transaction to truncate and free each inode. |
3252 | * Because we are not racing with anyone else here for the AGI | 3262 | * Because we are not racing with anyone else here for the AGI |
3253 | * buffer, we don't even need to hold it locked to read the | 3263 | * buffer, we don't even need to hold it locked to read the |
3254 | * initial unlinked bucket entries out of the buffer. We keep | 3264 | * initial unlinked bucket entries out of the buffer. We keep |
3255 | * buffer reference though, so that it stays pinned in memory | 3265 | * buffer reference though, so that it stays pinned in memory |
3256 | * while we need the buffer. | 3266 | * while we need the buffer. |
3257 | */ | 3267 | */ |
3258 | agi = XFS_BUF_TO_AGI(agibp); | 3268 | agi = XFS_BUF_TO_AGI(agibp); |
3259 | xfs_buf_unlock(agibp); | 3269 | xfs_buf_unlock(agibp); |
3260 | 3270 | ||
3261 | for (bucket = 0; bucket < XFS_AGI_UNLINKED_BUCKETS; bucket++) { | 3271 | for (bucket = 0; bucket < XFS_AGI_UNLINKED_BUCKETS; bucket++) { |
3262 | agino = be32_to_cpu(agi->agi_unlinked[bucket]); | 3272 | agino = be32_to_cpu(agi->agi_unlinked[bucket]); |
3263 | while (agino != NULLAGINO) { | 3273 | while (agino != NULLAGINO) { |
3264 | agino = xlog_recover_process_one_iunlink(mp, | 3274 | agino = xlog_recover_process_one_iunlink(mp, |
3265 | agno, agino, bucket); | 3275 | agno, agino, bucket); |
3266 | } | 3276 | } |
3267 | } | 3277 | } |
3268 | xfs_buf_rele(agibp); | 3278 | xfs_buf_rele(agibp); |
3269 | } | 3279 | } |
3270 | 3280 | ||
3271 | mp->m_dmevmask = mp_dmevmask; | 3281 | mp->m_dmevmask = mp_dmevmask; |
3272 | } | 3282 | } |
3273 | 3283 | ||
3274 | /* | 3284 | /* |
3275 | * Upack the log buffer data and crc check it. If the check fails, issue a | 3285 | * Upack the log buffer data and crc check it. If the check fails, issue a |
3276 | * warning if and only if the CRC in the header is non-zero. This makes the | 3286 | * warning if and only if the CRC in the header is non-zero. This makes the |
3277 | * check an advisory warning, and the zero CRC check will prevent failure | 3287 | * check an advisory warning, and the zero CRC check will prevent failure |
3278 | * warnings from being emitted when upgrading the kernel from one that does not | 3288 | * warnings from being emitted when upgrading the kernel from one that does not |
3279 | * add CRCs by default. | 3289 | * add CRCs by default. |
3280 | * | 3290 | * |
3281 | * When filesystems are CRC enabled, this CRC mismatch becomes a fatal log | 3291 | * When filesystems are CRC enabled, this CRC mismatch becomes a fatal log |
3282 | * corruption failure | 3292 | * corruption failure |
3283 | */ | 3293 | */ |
3284 | STATIC int | 3294 | STATIC int |
3285 | xlog_unpack_data_crc( | 3295 | xlog_unpack_data_crc( |
3286 | struct xlog_rec_header *rhead, | 3296 | struct xlog_rec_header *rhead, |
3287 | xfs_caddr_t dp, | 3297 | xfs_caddr_t dp, |
3288 | struct xlog *log) | 3298 | struct xlog *log) |
3289 | { | 3299 | { |
3290 | __le32 crc; | 3300 | __le32 crc; |
3291 | 3301 | ||
3292 | crc = xlog_cksum(log, rhead, dp, be32_to_cpu(rhead->h_len)); | 3302 | crc = xlog_cksum(log, rhead, dp, be32_to_cpu(rhead->h_len)); |
3293 | if (crc != rhead->h_crc) { | 3303 | if (crc != rhead->h_crc) { |
3294 | if (rhead->h_crc || xfs_sb_version_hascrc(&log->l_mp->m_sb)) { | 3304 | if (rhead->h_crc || xfs_sb_version_hascrc(&log->l_mp->m_sb)) { |
3295 | xfs_alert(log->l_mp, | 3305 | xfs_alert(log->l_mp, |
3296 | "log record CRC mismatch: found 0x%x, expected 0x%x.\n", | 3306 | "log record CRC mismatch: found 0x%x, expected 0x%x.\n", |
3297 | le32_to_cpu(rhead->h_crc), | 3307 | le32_to_cpu(rhead->h_crc), |
3298 | le32_to_cpu(crc)); | 3308 | le32_to_cpu(crc)); |
3299 | xfs_hex_dump(dp, 32); | 3309 | xfs_hex_dump(dp, 32); |
3300 | } | 3310 | } |
3301 | 3311 | ||
3302 | /* | 3312 | /* |
3303 | * If we've detected a log record corruption, then we can't | 3313 | * If we've detected a log record corruption, then we can't |
3304 | * recover past this point. Abort recovery if we are enforcing | 3314 | * recover past this point. Abort recovery if we are enforcing |
3305 | * CRC protection by punting an error back up the stack. | 3315 | * CRC protection by punting an error back up the stack. |
3306 | */ | 3316 | */ |
3307 | if (xfs_sb_version_hascrc(&log->l_mp->m_sb)) | 3317 | if (xfs_sb_version_hascrc(&log->l_mp->m_sb)) |
3308 | return EFSCORRUPTED; | 3318 | return EFSCORRUPTED; |
3309 | } | 3319 | } |
3310 | 3320 | ||
3311 | return 0; | 3321 | return 0; |
3312 | } | 3322 | } |
3313 | 3323 | ||
3314 | STATIC int | 3324 | STATIC int |
3315 | xlog_unpack_data( | 3325 | xlog_unpack_data( |
3316 | struct xlog_rec_header *rhead, | 3326 | struct xlog_rec_header *rhead, |
3317 | xfs_caddr_t dp, | 3327 | xfs_caddr_t dp, |
3318 | struct xlog *log) | 3328 | struct xlog *log) |
3319 | { | 3329 | { |
3320 | int i, j, k; | 3330 | int i, j, k; |
3321 | int error; | 3331 | int error; |
3322 | 3332 | ||
3323 | error = xlog_unpack_data_crc(rhead, dp, log); | 3333 | error = xlog_unpack_data_crc(rhead, dp, log); |
3324 | if (error) | 3334 | if (error) |
3325 | return error; | 3335 | return error; |
3326 | 3336 | ||
3327 | for (i = 0; i < BTOBB(be32_to_cpu(rhead->h_len)) && | 3337 | for (i = 0; i < BTOBB(be32_to_cpu(rhead->h_len)) && |
3328 | i < (XLOG_HEADER_CYCLE_SIZE / BBSIZE); i++) { | 3338 | i < (XLOG_HEADER_CYCLE_SIZE / BBSIZE); i++) { |
3329 | *(__be32 *)dp = *(__be32 *)&rhead->h_cycle_data[i]; | 3339 | *(__be32 *)dp = *(__be32 *)&rhead->h_cycle_data[i]; |
3330 | dp += BBSIZE; | 3340 | dp += BBSIZE; |
3331 | } | 3341 | } |
3332 | 3342 | ||
3333 | if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) { | 3343 | if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) { |
3334 | xlog_in_core_2_t *xhdr = (xlog_in_core_2_t *)rhead; | 3344 | xlog_in_core_2_t *xhdr = (xlog_in_core_2_t *)rhead; |
3335 | for ( ; i < BTOBB(be32_to_cpu(rhead->h_len)); i++) { | 3345 | for ( ; i < BTOBB(be32_to_cpu(rhead->h_len)); i++) { |
3336 | j = i / (XLOG_HEADER_CYCLE_SIZE / BBSIZE); | 3346 | j = i / (XLOG_HEADER_CYCLE_SIZE / BBSIZE); |
3337 | k = i % (XLOG_HEADER_CYCLE_SIZE / BBSIZE); | 3347 | k = i % (XLOG_HEADER_CYCLE_SIZE / BBSIZE); |
3338 | *(__be32 *)dp = xhdr[j].hic_xheader.xh_cycle_data[k]; | 3348 | *(__be32 *)dp = xhdr[j].hic_xheader.xh_cycle_data[k]; |
3339 | dp += BBSIZE; | 3349 | dp += BBSIZE; |
3340 | } | 3350 | } |
3341 | } | 3351 | } |
3342 | 3352 | ||
3343 | return 0; | 3353 | return 0; |
3344 | } | 3354 | } |
3345 | 3355 | ||
3346 | STATIC int | 3356 | STATIC int |
3347 | xlog_valid_rec_header( | 3357 | xlog_valid_rec_header( |
3348 | struct xlog *log, | 3358 | struct xlog *log, |
3349 | struct xlog_rec_header *rhead, | 3359 | struct xlog_rec_header *rhead, |
3350 | xfs_daddr_t blkno) | 3360 | xfs_daddr_t blkno) |
3351 | { | 3361 | { |
3352 | int hlen; | 3362 | int hlen; |
3353 | 3363 | ||
3354 | if (unlikely(rhead->h_magicno != cpu_to_be32(XLOG_HEADER_MAGIC_NUM))) { | 3364 | if (unlikely(rhead->h_magicno != cpu_to_be32(XLOG_HEADER_MAGIC_NUM))) { |
3355 | XFS_ERROR_REPORT("xlog_valid_rec_header(1)", | 3365 | XFS_ERROR_REPORT("xlog_valid_rec_header(1)", |
3356 | XFS_ERRLEVEL_LOW, log->l_mp); | 3366 | XFS_ERRLEVEL_LOW, log->l_mp); |
3357 | return XFS_ERROR(EFSCORRUPTED); | 3367 | return XFS_ERROR(EFSCORRUPTED); |
3358 | } | 3368 | } |
3359 | if (unlikely( | 3369 | if (unlikely( |
3360 | (!rhead->h_version || | 3370 | (!rhead->h_version || |
3361 | (be32_to_cpu(rhead->h_version) & (~XLOG_VERSION_OKBITS))))) { | 3371 | (be32_to_cpu(rhead->h_version) & (~XLOG_VERSION_OKBITS))))) { |
3362 | xfs_warn(log->l_mp, "%s: unrecognised log version (%d).", | 3372 | xfs_warn(log->l_mp, "%s: unrecognised log version (%d).", |
3363 | __func__, be32_to_cpu(rhead->h_version)); | 3373 | __func__, be32_to_cpu(rhead->h_version)); |
3364 | return XFS_ERROR(EIO); | 3374 | return XFS_ERROR(EIO); |
3365 | } | 3375 | } |
3366 | 3376 | ||
3367 | /* LR body must have data or it wouldn't have been written */ | 3377 | /* LR body must have data or it wouldn't have been written */ |
3368 | hlen = be32_to_cpu(rhead->h_len); | 3378 | hlen = be32_to_cpu(rhead->h_len); |
3369 | if (unlikely( hlen <= 0 || hlen > INT_MAX )) { | 3379 | if (unlikely( hlen <= 0 || hlen > INT_MAX )) { |
3370 | XFS_ERROR_REPORT("xlog_valid_rec_header(2)", | 3380 | XFS_ERROR_REPORT("xlog_valid_rec_header(2)", |
3371 | XFS_ERRLEVEL_LOW, log->l_mp); | 3381 | XFS_ERRLEVEL_LOW, log->l_mp); |
3372 | return XFS_ERROR(EFSCORRUPTED); | 3382 | return XFS_ERROR(EFSCORRUPTED); |
3373 | } | 3383 | } |
3374 | if (unlikely( blkno > log->l_logBBsize || blkno > INT_MAX )) { | 3384 | if (unlikely( blkno > log->l_logBBsize || blkno > INT_MAX )) { |
3375 | XFS_ERROR_REPORT("xlog_valid_rec_header(3)", | 3385 | XFS_ERROR_REPORT("xlog_valid_rec_header(3)", |
3376 | XFS_ERRLEVEL_LOW, log->l_mp); | 3386 | XFS_ERRLEVEL_LOW, log->l_mp); |
3377 | return XFS_ERROR(EFSCORRUPTED); | 3387 | return XFS_ERROR(EFSCORRUPTED); |
3378 | } | 3388 | } |
3379 | return 0; | 3389 | return 0; |
3380 | } | 3390 | } |
3381 | 3391 | ||
3382 | /* | 3392 | /* |
3383 | * Read the log from tail to head and process the log records found. | 3393 | * Read the log from tail to head and process the log records found. |
3384 | * Handle the two cases where the tail and head are in the same cycle | 3394 | * Handle the two cases where the tail and head are in the same cycle |
3385 | * and where the active portion of the log wraps around the end of | 3395 | * and where the active portion of the log wraps around the end of |
3386 | * the physical log separately. The pass parameter is passed through | 3396 | * the physical log separately. The pass parameter is passed through |
3387 | * to the routines called to process the data and is not looked at | 3397 | * to the routines called to process the data and is not looked at |
3388 | * here. | 3398 | * here. |
3389 | */ | 3399 | */ |
3390 | STATIC int | 3400 | STATIC int |
3391 | xlog_do_recovery_pass( | 3401 | xlog_do_recovery_pass( |
3392 | struct xlog *log, | 3402 | struct xlog *log, |
3393 | xfs_daddr_t head_blk, | 3403 | xfs_daddr_t head_blk, |
3394 | xfs_daddr_t tail_blk, | 3404 | xfs_daddr_t tail_blk, |
3395 | int pass) | 3405 | int pass) |
3396 | { | 3406 | { |
3397 | xlog_rec_header_t *rhead; | 3407 | xlog_rec_header_t *rhead; |
3398 | xfs_daddr_t blk_no; | 3408 | xfs_daddr_t blk_no; |
3399 | xfs_caddr_t offset; | 3409 | xfs_caddr_t offset; |
3400 | xfs_buf_t *hbp, *dbp; | 3410 | xfs_buf_t *hbp, *dbp; |
3401 | int error = 0, h_size; | 3411 | int error = 0, h_size; |
3402 | int bblks, split_bblks; | 3412 | int bblks, split_bblks; |
3403 | int hblks, split_hblks, wrapped_hblks; | 3413 | int hblks, split_hblks, wrapped_hblks; |
3404 | struct hlist_head rhash[XLOG_RHASH_SIZE]; | 3414 | struct hlist_head rhash[XLOG_RHASH_SIZE]; |
3405 | 3415 | ||
3406 | ASSERT(head_blk != tail_blk); | 3416 | ASSERT(head_blk != tail_blk); |
3407 | 3417 | ||
3408 | /* | 3418 | /* |
3409 | * Read the header of the tail block and get the iclog buffer size from | 3419 | * Read the header of the tail block and get the iclog buffer size from |
3410 | * h_size. Use this to tell how many sectors make up the log header. | 3420 | * h_size. Use this to tell how many sectors make up the log header. |
3411 | */ | 3421 | */ |
3412 | if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) { | 3422 | if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) { |
3413 | /* | 3423 | /* |
3414 | * When using variable length iclogs, read first sector of | 3424 | * When using variable length iclogs, read first sector of |
3415 | * iclog header and extract the header size from it. Get a | 3425 | * iclog header and extract the header size from it. Get a |
3416 | * new hbp that is the correct size. | 3426 | * new hbp that is the correct size. |
3417 | */ | 3427 | */ |
3418 | hbp = xlog_get_bp(log, 1); | 3428 | hbp = xlog_get_bp(log, 1); |
3419 | if (!hbp) | 3429 | if (!hbp) |
3420 | return ENOMEM; | 3430 | return ENOMEM; |
3421 | 3431 | ||
3422 | error = xlog_bread(log, tail_blk, 1, hbp, &offset); | 3432 | error = xlog_bread(log, tail_blk, 1, hbp, &offset); |
3423 | if (error) | 3433 | if (error) |
3424 | goto bread_err1; | 3434 | goto bread_err1; |
3425 | 3435 | ||
3426 | rhead = (xlog_rec_header_t *)offset; | 3436 | rhead = (xlog_rec_header_t *)offset; |
3427 | error = xlog_valid_rec_header(log, rhead, tail_blk); | 3437 | error = xlog_valid_rec_header(log, rhead, tail_blk); |
3428 | if (error) | 3438 | if (error) |
3429 | goto bread_err1; | 3439 | goto bread_err1; |
3430 | h_size = be32_to_cpu(rhead->h_size); | 3440 | h_size = be32_to_cpu(rhead->h_size); |
3431 | if ((be32_to_cpu(rhead->h_version) & XLOG_VERSION_2) && | 3441 | if ((be32_to_cpu(rhead->h_version) & XLOG_VERSION_2) && |
3432 | (h_size > XLOG_HEADER_CYCLE_SIZE)) { | 3442 | (h_size > XLOG_HEADER_CYCLE_SIZE)) { |
3433 | hblks = h_size / XLOG_HEADER_CYCLE_SIZE; | 3443 | hblks = h_size / XLOG_HEADER_CYCLE_SIZE; |
3434 | if (h_size % XLOG_HEADER_CYCLE_SIZE) | 3444 | if (h_size % XLOG_HEADER_CYCLE_SIZE) |
3435 | hblks++; | 3445 | hblks++; |
3436 | xlog_put_bp(hbp); | 3446 | xlog_put_bp(hbp); |
3437 | hbp = xlog_get_bp(log, hblks); | 3447 | hbp = xlog_get_bp(log, hblks); |
3438 | } else { | 3448 | } else { |
3439 | hblks = 1; | 3449 | hblks = 1; |
3440 | } | 3450 | } |
3441 | } else { | 3451 | } else { |
3442 | ASSERT(log->l_sectBBsize == 1); | 3452 | ASSERT(log->l_sectBBsize == 1); |
3443 | hblks = 1; | 3453 | hblks = 1; |
3444 | hbp = xlog_get_bp(log, 1); | 3454 | hbp = xlog_get_bp(log, 1); |
3445 | h_size = XLOG_BIG_RECORD_BSIZE; | 3455 | h_size = XLOG_BIG_RECORD_BSIZE; |
3446 | } | 3456 | } |
3447 | 3457 | ||
3448 | if (!hbp) | 3458 | if (!hbp) |
3449 | return ENOMEM; | 3459 | return ENOMEM; |
3450 | dbp = xlog_get_bp(log, BTOBB(h_size)); | 3460 | dbp = xlog_get_bp(log, BTOBB(h_size)); |
3451 | if (!dbp) { | 3461 | if (!dbp) { |
3452 | xlog_put_bp(hbp); | 3462 | xlog_put_bp(hbp); |
3453 | return ENOMEM; | 3463 | return ENOMEM; |
3454 | } | 3464 | } |
3455 | 3465 | ||
3456 | memset(rhash, 0, sizeof(rhash)); | 3466 | memset(rhash, 0, sizeof(rhash)); |
3457 | if (tail_blk <= head_blk) { | 3467 | if (tail_blk <= head_blk) { |
3458 | for (blk_no = tail_blk; blk_no < head_blk; ) { | 3468 | for (blk_no = tail_blk; blk_no < head_blk; ) { |
3459 | error = xlog_bread(log, blk_no, hblks, hbp, &offset); | 3469 | error = xlog_bread(log, blk_no, hblks, hbp, &offset); |
3460 | if (error) | 3470 | if (error) |
3461 | goto bread_err2; | 3471 | goto bread_err2; |
3462 | 3472 | ||
3463 | rhead = (xlog_rec_header_t *)offset; | 3473 | rhead = (xlog_rec_header_t *)offset; |
3464 | error = xlog_valid_rec_header(log, rhead, blk_no); | 3474 | error = xlog_valid_rec_header(log, rhead, blk_no); |
3465 | if (error) | 3475 | if (error) |
3466 | goto bread_err2; | 3476 | goto bread_err2; |
3467 | 3477 | ||
3468 | /* blocks in data section */ | 3478 | /* blocks in data section */ |
3469 | bblks = (int)BTOBB(be32_to_cpu(rhead->h_len)); | 3479 | bblks = (int)BTOBB(be32_to_cpu(rhead->h_len)); |
3470 | error = xlog_bread(log, blk_no + hblks, bblks, dbp, | 3480 | error = xlog_bread(log, blk_no + hblks, bblks, dbp, |
3471 | &offset); | 3481 | &offset); |
3472 | if (error) | 3482 | if (error) |
3473 | goto bread_err2; | 3483 | goto bread_err2; |
3474 | 3484 | ||
3475 | error = xlog_unpack_data(rhead, offset, log); | 3485 | error = xlog_unpack_data(rhead, offset, log); |
3476 | if (error) | 3486 | if (error) |
3477 | goto bread_err2; | 3487 | goto bread_err2; |
3478 | 3488 | ||
3479 | error = xlog_recover_process_data(log, | 3489 | error = xlog_recover_process_data(log, |
3480 | rhash, rhead, offset, pass); | 3490 | rhash, rhead, offset, pass); |
3481 | if (error) | 3491 | if (error) |
3482 | goto bread_err2; | 3492 | goto bread_err2; |
3483 | blk_no += bblks + hblks; | 3493 | blk_no += bblks + hblks; |
3484 | } | 3494 | } |
3485 | } else { | 3495 | } else { |
3486 | /* | 3496 | /* |
3487 | * Perform recovery around the end of the physical log. | 3497 | * Perform recovery around the end of the physical log. |
3488 | * When the head is not on the same cycle number as the tail, | 3498 | * When the head is not on the same cycle number as the tail, |
3489 | * we can't do a sequential recovery as above. | 3499 | * we can't do a sequential recovery as above. |
3490 | */ | 3500 | */ |
3491 | blk_no = tail_blk; | 3501 | blk_no = tail_blk; |
3492 | while (blk_no < log->l_logBBsize) { | 3502 | while (blk_no < log->l_logBBsize) { |
3493 | /* | 3503 | /* |
3494 | * Check for header wrapping around physical end-of-log | 3504 | * Check for header wrapping around physical end-of-log |
3495 | */ | 3505 | */ |
3496 | offset = hbp->b_addr; | 3506 | offset = hbp->b_addr; |
3497 | split_hblks = 0; | 3507 | split_hblks = 0; |
3498 | wrapped_hblks = 0; | 3508 | wrapped_hblks = 0; |
3499 | if (blk_no + hblks <= log->l_logBBsize) { | 3509 | if (blk_no + hblks <= log->l_logBBsize) { |
3500 | /* Read header in one read */ | 3510 | /* Read header in one read */ |
3501 | error = xlog_bread(log, blk_no, hblks, hbp, | 3511 | error = xlog_bread(log, blk_no, hblks, hbp, |
3502 | &offset); | 3512 | &offset); |
3503 | if (error) | 3513 | if (error) |
3504 | goto bread_err2; | 3514 | goto bread_err2; |
3505 | } else { | 3515 | } else { |
3506 | /* This LR is split across physical log end */ | 3516 | /* This LR is split across physical log end */ |
3507 | if (blk_no != log->l_logBBsize) { | 3517 | if (blk_no != log->l_logBBsize) { |
3508 | /* some data before physical log end */ | 3518 | /* some data before physical log end */ |
3509 | ASSERT(blk_no <= INT_MAX); | 3519 | ASSERT(blk_no <= INT_MAX); |
3510 | split_hblks = log->l_logBBsize - (int)blk_no; | 3520 | split_hblks = log->l_logBBsize - (int)blk_no; |
3511 | ASSERT(split_hblks > 0); | 3521 | ASSERT(split_hblks > 0); |
3512 | error = xlog_bread(log, blk_no, | 3522 | error = xlog_bread(log, blk_no, |
3513 | split_hblks, hbp, | 3523 | split_hblks, hbp, |
3514 | &offset); | 3524 | &offset); |
3515 | if (error) | 3525 | if (error) |
3516 | goto bread_err2; | 3526 | goto bread_err2; |
3517 | } | 3527 | } |
3518 | 3528 | ||
3519 | /* | 3529 | /* |
3520 | * Note: this black magic still works with | 3530 | * Note: this black magic still works with |
3521 | * large sector sizes (non-512) only because: | 3531 | * large sector sizes (non-512) only because: |
3522 | * - we increased the buffer size originally | 3532 | * - we increased the buffer size originally |
3523 | * by 1 sector giving us enough extra space | 3533 | * by 1 sector giving us enough extra space |
3524 | * for the second read; | 3534 | * for the second read; |
3525 | * - the log start is guaranteed to be sector | 3535 | * - the log start is guaranteed to be sector |
3526 | * aligned; | 3536 | * aligned; |
3527 | * - we read the log end (LR header start) | 3537 | * - we read the log end (LR header start) |
3528 | * _first_, then the log start (LR header end) | 3538 | * _first_, then the log start (LR header end) |
3529 | * - order is important. | 3539 | * - order is important. |
3530 | */ | 3540 | */ |
3531 | wrapped_hblks = hblks - split_hblks; | 3541 | wrapped_hblks = hblks - split_hblks; |
3532 | error = xlog_bread_offset(log, 0, | 3542 | error = xlog_bread_offset(log, 0, |
3533 | wrapped_hblks, hbp, | 3543 | wrapped_hblks, hbp, |
3534 | offset + BBTOB(split_hblks)); | 3544 | offset + BBTOB(split_hblks)); |
3535 | if (error) | 3545 | if (error) |
3536 | goto bread_err2; | 3546 | goto bread_err2; |
3537 | } | 3547 | } |
3538 | rhead = (xlog_rec_header_t *)offset; | 3548 | rhead = (xlog_rec_header_t *)offset; |
3539 | error = xlog_valid_rec_header(log, rhead, | 3549 | error = xlog_valid_rec_header(log, rhead, |
3540 | split_hblks ? blk_no : 0); | 3550 | split_hblks ? blk_no : 0); |
3541 | if (error) | 3551 | if (error) |
3542 | goto bread_err2; | 3552 | goto bread_err2; |
3543 | 3553 | ||
3544 | bblks = (int)BTOBB(be32_to_cpu(rhead->h_len)); | 3554 | bblks = (int)BTOBB(be32_to_cpu(rhead->h_len)); |
3545 | blk_no += hblks; | 3555 | blk_no += hblks; |
3546 | 3556 | ||
3547 | /* Read in data for log record */ | 3557 | /* Read in data for log record */ |
3548 | if (blk_no + bblks <= log->l_logBBsize) { | 3558 | if (blk_no + bblks <= log->l_logBBsize) { |
3549 | error = xlog_bread(log, blk_no, bblks, dbp, | 3559 | error = xlog_bread(log, blk_no, bblks, dbp, |
3550 | &offset); | 3560 | &offset); |
3551 | if (error) | 3561 | if (error) |
3552 | goto bread_err2; | 3562 | goto bread_err2; |
3553 | } else { | 3563 | } else { |
3554 | /* This log record is split across the | 3564 | /* This log record is split across the |
3555 | * physical end of log */ | 3565 | * physical end of log */ |
3556 | offset = dbp->b_addr; | 3566 | offset = dbp->b_addr; |
3557 | split_bblks = 0; | 3567 | split_bblks = 0; |
3558 | if (blk_no != log->l_logBBsize) { | 3568 | if (blk_no != log->l_logBBsize) { |
3559 | /* some data is before the physical | 3569 | /* some data is before the physical |
3560 | * end of log */ | 3570 | * end of log */ |
3561 | ASSERT(!wrapped_hblks); | 3571 | ASSERT(!wrapped_hblks); |
3562 | ASSERT(blk_no <= INT_MAX); | 3572 | ASSERT(blk_no <= INT_MAX); |
3563 | split_bblks = | 3573 | split_bblks = |
3564 | log->l_logBBsize - (int)blk_no; | 3574 | log->l_logBBsize - (int)blk_no; |
3565 | ASSERT(split_bblks > 0); | 3575 | ASSERT(split_bblks > 0); |
3566 | error = xlog_bread(log, blk_no, | 3576 | error = xlog_bread(log, blk_no, |
3567 | split_bblks, dbp, | 3577 | split_bblks, dbp, |
3568 | &offset); | 3578 | &offset); |
3569 | if (error) | 3579 | if (error) |
3570 | goto bread_err2; | 3580 | goto bread_err2; |
3571 | } | 3581 | } |
3572 | 3582 | ||
3573 | /* | 3583 | /* |
3574 | * Note: this black magic still works with | 3584 | * Note: this black magic still works with |
3575 | * large sector sizes (non-512) only because: | 3585 | * large sector sizes (non-512) only because: |
3576 | * - we increased the buffer size originally | 3586 | * - we increased the buffer size originally |
3577 | * by 1 sector giving us enough extra space | 3587 | * by 1 sector giving us enough extra space |
3578 | * for the second read; | 3588 | * for the second read; |
3579 | * - the log start is guaranteed to be sector | 3589 | * - the log start is guaranteed to be sector |
3580 | * aligned; | 3590 | * aligned; |
3581 | * - we read the log end (LR header start) | 3591 | * - we read the log end (LR header start) |
3582 | * _first_, then the log start (LR header end) | 3592 | * _first_, then the log start (LR header end) |
3583 | * - order is important. | 3593 | * - order is important. |
3584 | */ | 3594 | */ |
3585 | error = xlog_bread_offset(log, 0, | 3595 | error = xlog_bread_offset(log, 0, |
3586 | bblks - split_bblks, dbp, | 3596 | bblks - split_bblks, dbp, |
3587 | offset + BBTOB(split_bblks)); | 3597 | offset + BBTOB(split_bblks)); |
3588 | if (error) | 3598 | if (error) |
3589 | goto bread_err2; | 3599 | goto bread_err2; |
3590 | } | 3600 | } |
3591 | 3601 | ||
3592 | error = xlog_unpack_data(rhead, offset, log); | 3602 | error = xlog_unpack_data(rhead, offset, log); |
3593 | if (error) | 3603 | if (error) |
3594 | goto bread_err2; | 3604 | goto bread_err2; |
3595 | 3605 | ||
3596 | error = xlog_recover_process_data(log, rhash, | 3606 | error = xlog_recover_process_data(log, rhash, |
3597 | rhead, offset, pass); | 3607 | rhead, offset, pass); |
3598 | if (error) | 3608 | if (error) |
3599 | goto bread_err2; | 3609 | goto bread_err2; |
3600 | blk_no += bblks; | 3610 | blk_no += bblks; |
3601 | } | 3611 | } |
3602 | 3612 | ||
3603 | ASSERT(blk_no >= log->l_logBBsize); | 3613 | ASSERT(blk_no >= log->l_logBBsize); |
3604 | blk_no -= log->l_logBBsize; | 3614 | blk_no -= log->l_logBBsize; |
3605 | 3615 | ||
3606 | /* read first part of physical log */ | 3616 | /* read first part of physical log */ |
3607 | while (blk_no < head_blk) { | 3617 | while (blk_no < head_blk) { |
3608 | error = xlog_bread(log, blk_no, hblks, hbp, &offset); | 3618 | error = xlog_bread(log, blk_no, hblks, hbp, &offset); |
3609 | if (error) | 3619 | if (error) |
3610 | goto bread_err2; | 3620 | goto bread_err2; |
3611 | 3621 | ||
3612 | rhead = (xlog_rec_header_t *)offset; | 3622 | rhead = (xlog_rec_header_t *)offset; |
3613 | error = xlog_valid_rec_header(log, rhead, blk_no); | 3623 | error = xlog_valid_rec_header(log, rhead, blk_no); |
3614 | if (error) | 3624 | if (error) |
3615 | goto bread_err2; | 3625 | goto bread_err2; |
3616 | 3626 | ||
3617 | bblks = (int)BTOBB(be32_to_cpu(rhead->h_len)); | 3627 | bblks = (int)BTOBB(be32_to_cpu(rhead->h_len)); |
3618 | error = xlog_bread(log, blk_no+hblks, bblks, dbp, | 3628 | error = xlog_bread(log, blk_no+hblks, bblks, dbp, |
3619 | &offset); | 3629 | &offset); |
3620 | if (error) | 3630 | if (error) |
3621 | goto bread_err2; | 3631 | goto bread_err2; |
3622 | 3632 | ||
3623 | error = xlog_unpack_data(rhead, offset, log); | 3633 | error = xlog_unpack_data(rhead, offset, log); |
3624 | if (error) | 3634 | if (error) |
3625 | goto bread_err2; | 3635 | goto bread_err2; |
3626 | 3636 | ||
3627 | error = xlog_recover_process_data(log, rhash, | 3637 | error = xlog_recover_process_data(log, rhash, |
3628 | rhead, offset, pass); | 3638 | rhead, offset, pass); |
3629 | if (error) | 3639 | if (error) |
3630 | goto bread_err2; | 3640 | goto bread_err2; |
3631 | blk_no += bblks + hblks; | 3641 | blk_no += bblks + hblks; |
3632 | } | 3642 | } |
3633 | } | 3643 | } |
3634 | 3644 | ||
3635 | bread_err2: | 3645 | bread_err2: |
3636 | xlog_put_bp(dbp); | 3646 | xlog_put_bp(dbp); |
3637 | bread_err1: | 3647 | bread_err1: |
3638 | xlog_put_bp(hbp); | 3648 | xlog_put_bp(hbp); |
3639 | return error; | 3649 | return error; |
3640 | } | 3650 | } |
3641 | 3651 | ||
3642 | /* | 3652 | /* |
3643 | * Do the recovery of the log. We actually do this in two phases. | 3653 | * Do the recovery of the log. We actually do this in two phases. |
3644 | * The two passes are necessary in order to implement the function | 3654 | * The two passes are necessary in order to implement the function |
3645 | * of cancelling a record written into the log. The first pass | 3655 | * of cancelling a record written into the log. The first pass |
3646 | * determines those things which have been cancelled, and the | 3656 | * determines those things which have been cancelled, and the |
3647 | * second pass replays log items normally except for those which | 3657 | * second pass replays log items normally except for those which |
3648 | * have been cancelled. The handling of the replay and cancellations | 3658 | * have been cancelled. The handling of the replay and cancellations |
3649 | * takes place in the log item type specific routines. | 3659 | * takes place in the log item type specific routines. |
3650 | * | 3660 | * |
3651 | * The table of items which have cancel records in the log is allocated | 3661 | * The table of items which have cancel records in the log is allocated |
3652 | * and freed at this level, since only here do we know when all of | 3662 | * and freed at this level, since only here do we know when all of |
3653 | * the log recovery has been completed. | 3663 | * the log recovery has been completed. |
3654 | */ | 3664 | */ |
3655 | STATIC int | 3665 | STATIC int |
3656 | xlog_do_log_recovery( | 3666 | xlog_do_log_recovery( |
3657 | struct xlog *log, | 3667 | struct xlog *log, |
3658 | xfs_daddr_t head_blk, | 3668 | xfs_daddr_t head_blk, |
3659 | xfs_daddr_t tail_blk) | 3669 | xfs_daddr_t tail_blk) |
3660 | { | 3670 | { |
3661 | int error, i; | 3671 | int error, i; |
3662 | 3672 | ||
3663 | ASSERT(head_blk != tail_blk); | 3673 | ASSERT(head_blk != tail_blk); |
3664 | 3674 | ||
3665 | /* | 3675 | /* |
3666 | * First do a pass to find all of the cancelled buf log items. | 3676 | * First do a pass to find all of the cancelled buf log items. |
3667 | * Store them in the buf_cancel_table for use in the second pass. | 3677 | * Store them in the buf_cancel_table for use in the second pass. |
3668 | */ | 3678 | */ |
3669 | log->l_buf_cancel_table = kmem_zalloc(XLOG_BC_TABLE_SIZE * | 3679 | log->l_buf_cancel_table = kmem_zalloc(XLOG_BC_TABLE_SIZE * |
3670 | sizeof(struct list_head), | 3680 | sizeof(struct list_head), |
3671 | KM_SLEEP); | 3681 | KM_SLEEP); |
3672 | for (i = 0; i < XLOG_BC_TABLE_SIZE; i++) | 3682 | for (i = 0; i < XLOG_BC_TABLE_SIZE; i++) |
3673 | INIT_LIST_HEAD(&log->l_buf_cancel_table[i]); | 3683 | INIT_LIST_HEAD(&log->l_buf_cancel_table[i]); |
3674 | 3684 | ||
3675 | error = xlog_do_recovery_pass(log, head_blk, tail_blk, | 3685 | error = xlog_do_recovery_pass(log, head_blk, tail_blk, |
3676 | XLOG_RECOVER_PASS1); | 3686 | XLOG_RECOVER_PASS1); |
3677 | if (error != 0) { | 3687 | if (error != 0) { |
3678 | kmem_free(log->l_buf_cancel_table); | 3688 | kmem_free(log->l_buf_cancel_table); |
3679 | log->l_buf_cancel_table = NULL; | 3689 | log->l_buf_cancel_table = NULL; |
3680 | return error; | 3690 | return error; |
3681 | } | 3691 | } |
3682 | /* | 3692 | /* |
3683 | * Then do a second pass to actually recover the items in the log. | 3693 | * Then do a second pass to actually recover the items in the log. |
3684 | * When it is complete free the table of buf cancel items. | 3694 | * When it is complete free the table of buf cancel items. |
3685 | */ | 3695 | */ |
3686 | error = xlog_do_recovery_pass(log, head_blk, tail_blk, | 3696 | error = xlog_do_recovery_pass(log, head_blk, tail_blk, |
3687 | XLOG_RECOVER_PASS2); | 3697 | XLOG_RECOVER_PASS2); |
3688 | #ifdef DEBUG | 3698 | #ifdef DEBUG |
3689 | if (!error) { | 3699 | if (!error) { |
3690 | int i; | 3700 | int i; |
3691 | 3701 | ||
3692 | for (i = 0; i < XLOG_BC_TABLE_SIZE; i++) | 3702 | for (i = 0; i < XLOG_BC_TABLE_SIZE; i++) |
3693 | ASSERT(list_empty(&log->l_buf_cancel_table[i])); | 3703 | ASSERT(list_empty(&log->l_buf_cancel_table[i])); |
3694 | } | 3704 | } |
3695 | #endif /* DEBUG */ | 3705 | #endif /* DEBUG */ |
3696 | 3706 | ||
3697 | kmem_free(log->l_buf_cancel_table); | 3707 | kmem_free(log->l_buf_cancel_table); |
3698 | log->l_buf_cancel_table = NULL; | 3708 | log->l_buf_cancel_table = NULL; |
3699 | 3709 | ||
3700 | return error; | 3710 | return error; |
3701 | } | 3711 | } |
3702 | 3712 | ||
3703 | /* | 3713 | /* |
3704 | * Do the actual recovery | 3714 | * Do the actual recovery |
3705 | */ | 3715 | */ |
3706 | STATIC int | 3716 | STATIC int |
3707 | xlog_do_recover( | 3717 | xlog_do_recover( |
3708 | struct xlog *log, | 3718 | struct xlog *log, |
3709 | xfs_daddr_t head_blk, | 3719 | xfs_daddr_t head_blk, |
3710 | xfs_daddr_t tail_blk) | 3720 | xfs_daddr_t tail_blk) |
3711 | { | 3721 | { |
3712 | int error; | 3722 | int error; |
3713 | xfs_buf_t *bp; | 3723 | xfs_buf_t *bp; |
3714 | xfs_sb_t *sbp; | 3724 | xfs_sb_t *sbp; |
3715 | 3725 | ||
3716 | /* | 3726 | /* |
3717 | * First replay the images in the log. | 3727 | * First replay the images in the log. |
3718 | */ | 3728 | */ |
3719 | error = xlog_do_log_recovery(log, head_blk, tail_blk); | 3729 | error = xlog_do_log_recovery(log, head_blk, tail_blk); |
3720 | if (error) | 3730 | if (error) |
3721 | return error; | 3731 | return error; |
3722 | 3732 | ||
3723 | /* | 3733 | /* |
3724 | * If IO errors happened during recovery, bail out. | 3734 | * If IO errors happened during recovery, bail out. |
3725 | */ | 3735 | */ |
3726 | if (XFS_FORCED_SHUTDOWN(log->l_mp)) { | 3736 | if (XFS_FORCED_SHUTDOWN(log->l_mp)) { |
3727 | return (EIO); | 3737 | return (EIO); |
3728 | } | 3738 | } |
3729 | 3739 | ||
3730 | /* | 3740 | /* |
3731 | * We now update the tail_lsn since much of the recovery has completed | 3741 | * We now update the tail_lsn since much of the recovery has completed |
3732 | * and there may be space available to use. If there were no extent | 3742 | * and there may be space available to use. If there were no extent |
3733 | * or iunlinks, we can free up the entire log and set the tail_lsn to | 3743 | * or iunlinks, we can free up the entire log and set the tail_lsn to |
3734 | * be the last_sync_lsn. This was set in xlog_find_tail to be the | 3744 | * be the last_sync_lsn. This was set in xlog_find_tail to be the |
3735 | * lsn of the last known good LR on disk. If there are extent frees | 3745 | * lsn of the last known good LR on disk. If there are extent frees |
3736 | * or iunlinks they will have some entries in the AIL; so we look at | 3746 | * or iunlinks they will have some entries in the AIL; so we look at |
3737 | * the AIL to determine how to set the tail_lsn. | 3747 | * the AIL to determine how to set the tail_lsn. |
3738 | */ | 3748 | */ |
3739 | xlog_assign_tail_lsn(log->l_mp); | 3749 | xlog_assign_tail_lsn(log->l_mp); |
3740 | 3750 | ||
3741 | /* | 3751 | /* |
3742 | * Now that we've finished replaying all buffer and inode | 3752 | * Now that we've finished replaying all buffer and inode |
3743 | * updates, re-read in the superblock and reverify it. | 3753 | * updates, re-read in the superblock and reverify it. |
3744 | */ | 3754 | */ |
3745 | bp = xfs_getsb(log->l_mp, 0); | 3755 | bp = xfs_getsb(log->l_mp, 0); |
3746 | XFS_BUF_UNDONE(bp); | 3756 | XFS_BUF_UNDONE(bp); |
3747 | ASSERT(!(XFS_BUF_ISWRITE(bp))); | 3757 | ASSERT(!(XFS_BUF_ISWRITE(bp))); |
3748 | XFS_BUF_READ(bp); | 3758 | XFS_BUF_READ(bp); |
3749 | XFS_BUF_UNASYNC(bp); | 3759 | XFS_BUF_UNASYNC(bp); |
3750 | bp->b_ops = &xfs_sb_buf_ops; | 3760 | bp->b_ops = &xfs_sb_buf_ops; |
3751 | xfsbdstrat(log->l_mp, bp); | 3761 | xfsbdstrat(log->l_mp, bp); |
3752 | error = xfs_buf_iowait(bp); | 3762 | error = xfs_buf_iowait(bp); |
3753 | if (error) { | 3763 | if (error) { |
3754 | xfs_buf_ioerror_alert(bp, __func__); | 3764 | xfs_buf_ioerror_alert(bp, __func__); |
3755 | ASSERT(0); | 3765 | ASSERT(0); |
3756 | xfs_buf_relse(bp); | 3766 | xfs_buf_relse(bp); |
3757 | return error; | 3767 | return error; |
3758 | } | 3768 | } |
3759 | 3769 | ||
3760 | /* Convert superblock from on-disk format */ | 3770 | /* Convert superblock from on-disk format */ |
3761 | sbp = &log->l_mp->m_sb; | 3771 | sbp = &log->l_mp->m_sb; |
3762 | xfs_sb_from_disk(sbp, XFS_BUF_TO_SBP(bp)); | 3772 | xfs_sb_from_disk(sbp, XFS_BUF_TO_SBP(bp)); |
3763 | ASSERT(sbp->sb_magicnum == XFS_SB_MAGIC); | 3773 | ASSERT(sbp->sb_magicnum == XFS_SB_MAGIC); |
3764 | ASSERT(xfs_sb_good_version(sbp)); | 3774 | ASSERT(xfs_sb_good_version(sbp)); |
3765 | xfs_buf_relse(bp); | 3775 | xfs_buf_relse(bp); |
3766 | 3776 | ||
3767 | /* We've re-read the superblock so re-initialize per-cpu counters */ | 3777 | /* We've re-read the superblock so re-initialize per-cpu counters */ |
3768 | xfs_icsb_reinit_counters(log->l_mp); | 3778 | xfs_icsb_reinit_counters(log->l_mp); |
3769 | 3779 | ||
3770 | xlog_recover_check_summary(log); | 3780 | xlog_recover_check_summary(log); |
3771 | 3781 | ||
3772 | /* Normal transactions can now occur */ | 3782 | /* Normal transactions can now occur */ |
3773 | log->l_flags &= ~XLOG_ACTIVE_RECOVERY; | 3783 | log->l_flags &= ~XLOG_ACTIVE_RECOVERY; |
3774 | return 0; | 3784 | return 0; |
3775 | } | 3785 | } |
3776 | 3786 | ||
3777 | /* | 3787 | /* |
3778 | * Perform recovery and re-initialize some log variables in xlog_find_tail. | 3788 | * Perform recovery and re-initialize some log variables in xlog_find_tail. |
3779 | * | 3789 | * |
3780 | * Return error or zero. | 3790 | * Return error or zero. |
3781 | */ | 3791 | */ |
3782 | int | 3792 | int |
3783 | xlog_recover( | 3793 | xlog_recover( |
3784 | struct xlog *log) | 3794 | struct xlog *log) |
3785 | { | 3795 | { |
3786 | xfs_daddr_t head_blk, tail_blk; | 3796 | xfs_daddr_t head_blk, tail_blk; |
3787 | int error; | 3797 | int error; |
3788 | 3798 | ||
3789 | /* find the tail of the log */ | 3799 | /* find the tail of the log */ |
3790 | if ((error = xlog_find_tail(log, &head_blk, &tail_blk))) | 3800 | if ((error = xlog_find_tail(log, &head_blk, &tail_blk))) |
3791 | return error; | 3801 | return error; |
3792 | 3802 | ||
3793 | if (tail_blk != head_blk) { | 3803 | if (tail_blk != head_blk) { |
3794 | /* There used to be a comment here: | 3804 | /* There used to be a comment here: |
3795 | * | 3805 | * |
3796 | * disallow recovery on read-only mounts. note -- mount | 3806 | * disallow recovery on read-only mounts. note -- mount |
3797 | * checks for ENOSPC and turns it into an intelligent | 3807 | * checks for ENOSPC and turns it into an intelligent |
3798 | * error message. | 3808 | * error message. |
3799 | * ...but this is no longer true. Now, unless you specify | 3809 | * ...but this is no longer true. Now, unless you specify |
3800 | * NORECOVERY (in which case this function would never be | 3810 | * NORECOVERY (in which case this function would never be |
3801 | * called), we just go ahead and recover. We do this all | 3811 | * called), we just go ahead and recover. We do this all |
3802 | * under the vfs layer, so we can get away with it unless | 3812 | * under the vfs layer, so we can get away with it unless |
3803 | * the device itself is read-only, in which case we fail. | 3813 | * the device itself is read-only, in which case we fail. |
3804 | */ | 3814 | */ |
3805 | if ((error = xfs_dev_is_read_only(log->l_mp, "recovery"))) { | 3815 | if ((error = xfs_dev_is_read_only(log->l_mp, "recovery"))) { |
3806 | return error; | 3816 | return error; |
3807 | } | 3817 | } |
3808 | 3818 | ||
3809 | xfs_notice(log->l_mp, "Starting recovery (logdev: %s)", | 3819 | xfs_notice(log->l_mp, "Starting recovery (logdev: %s)", |
3810 | log->l_mp->m_logname ? log->l_mp->m_logname | 3820 | log->l_mp->m_logname ? log->l_mp->m_logname |
3811 | : "internal"); | 3821 | : "internal"); |
3812 | 3822 | ||
3813 | error = xlog_do_recover(log, head_blk, tail_blk); | 3823 | error = xlog_do_recover(log, head_blk, tail_blk); |
3814 | log->l_flags |= XLOG_RECOVERY_NEEDED; | 3824 | log->l_flags |= XLOG_RECOVERY_NEEDED; |
3815 | } | 3825 | } |
3816 | return error; | 3826 | return error; |
3817 | } | 3827 | } |
3818 | 3828 | ||
3819 | /* | 3829 | /* |
3820 | * In the first part of recovery we replay inodes and buffers and build | 3830 | * In the first part of recovery we replay inodes and buffers and build |
3821 | * up the list of extent free items which need to be processed. Here | 3831 | * up the list of extent free items which need to be processed. Here |
3822 | * we process the extent free items and clean up the on disk unlinked | 3832 | * we process the extent free items and clean up the on disk unlinked |
3823 | * inode lists. This is separated from the first part of recovery so | 3833 | * inode lists. This is separated from the first part of recovery so |
3824 | * that the root and real-time bitmap inodes can be read in from disk in | 3834 | * that the root and real-time bitmap inodes can be read in from disk in |
3825 | * between the two stages. This is necessary so that we can free space | 3835 | * between the two stages. This is necessary so that we can free space |
3826 | * in the real-time portion of the file system. | 3836 | * in the real-time portion of the file system. |
3827 | */ | 3837 | */ |
3828 | int | 3838 | int |
3829 | xlog_recover_finish( | 3839 | xlog_recover_finish( |
3830 | struct xlog *log) | 3840 | struct xlog *log) |
3831 | { | 3841 | { |
3832 | /* | 3842 | /* |
3833 | * Now we're ready to do the transactions needed for the | 3843 | * Now we're ready to do the transactions needed for the |
3834 | * rest of recovery. Start with completing all the extent | 3844 | * rest of recovery. Start with completing all the extent |
3835 | * free intent records and then process the unlinked inode | 3845 | * free intent records and then process the unlinked inode |
3836 | * lists. At this point, we essentially run in normal mode | 3846 | * lists. At this point, we essentially run in normal mode |
3837 | * except that we're still performing recovery actions | 3847 | * except that we're still performing recovery actions |
3838 | * rather than accepting new requests. | 3848 | * rather than accepting new requests. |
3839 | */ | 3849 | */ |
3840 | if (log->l_flags & XLOG_RECOVERY_NEEDED) { | 3850 | if (log->l_flags & XLOG_RECOVERY_NEEDED) { |
3841 | int error; | 3851 | int error; |
3842 | error = xlog_recover_process_efis(log); | 3852 | error = xlog_recover_process_efis(log); |
3843 | if (error) { | 3853 | if (error) { |
3844 | xfs_alert(log->l_mp, "Failed to recover EFIs"); | 3854 | xfs_alert(log->l_mp, "Failed to recover EFIs"); |
3845 | return error; | 3855 | return error; |
3846 | } | 3856 | } |
3847 | /* | 3857 | /* |
3848 | * Sync the log to get all the EFIs out of the AIL. | 3858 | * Sync the log to get all the EFIs out of the AIL. |
3849 | * This isn't absolutely necessary, but it helps in | 3859 | * This isn't absolutely necessary, but it helps in |
3850 | * case the unlink transactions would have problems | 3860 | * case the unlink transactions would have problems |
3851 | * pushing the EFIs out of the way. | 3861 | * pushing the EFIs out of the way. |
3852 | */ | 3862 | */ |
3853 | xfs_log_force(log->l_mp, XFS_LOG_SYNC); | 3863 | xfs_log_force(log->l_mp, XFS_LOG_SYNC); |
3854 | 3864 | ||
3855 | xlog_recover_process_iunlinks(log); | 3865 | xlog_recover_process_iunlinks(log); |
3856 | 3866 | ||
3857 | xlog_recover_check_summary(log); | 3867 | xlog_recover_check_summary(log); |
3858 | 3868 | ||
3859 | xfs_notice(log->l_mp, "Ending recovery (logdev: %s)", | 3869 | xfs_notice(log->l_mp, "Ending recovery (logdev: %s)", |
3860 | log->l_mp->m_logname ? log->l_mp->m_logname | 3870 | log->l_mp->m_logname ? log->l_mp->m_logname |
3861 | : "internal"); | 3871 | : "internal"); |
3862 | log->l_flags &= ~XLOG_RECOVERY_NEEDED; | 3872 | log->l_flags &= ~XLOG_RECOVERY_NEEDED; |
3863 | } else { | 3873 | } else { |
3864 | xfs_info(log->l_mp, "Ending clean mount"); | 3874 | xfs_info(log->l_mp, "Ending clean mount"); |
3865 | } | 3875 | } |
3866 | return 0; | 3876 | return 0; |
3867 | } | 3877 | } |
3868 | 3878 | ||
3869 | 3879 | ||
3870 | #if defined(DEBUG) | 3880 | #if defined(DEBUG) |
3871 | /* | 3881 | /* |
3872 | * Read all of the agf and agi counters and check that they | 3882 | * Read all of the agf and agi counters and check that they |
3873 | * are consistent with the superblock counters. | 3883 | * are consistent with the superblock counters. |
3874 | */ | 3884 | */ |
3875 | void | 3885 | void |
3876 | xlog_recover_check_summary( | 3886 | xlog_recover_check_summary( |
3877 | struct xlog *log) | 3887 | struct xlog *log) |
3878 | { | 3888 | { |
3879 | xfs_mount_t *mp; | 3889 | xfs_mount_t *mp; |
3880 | xfs_agf_t *agfp; | 3890 | xfs_agf_t *agfp; |
3881 | xfs_buf_t *agfbp; | 3891 | xfs_buf_t *agfbp; |
3882 | xfs_buf_t *agibp; | 3892 | xfs_buf_t *agibp; |
3883 | xfs_agnumber_t agno; | 3893 | xfs_agnumber_t agno; |
3884 | __uint64_t freeblks; | 3894 | __uint64_t freeblks; |
3885 | __uint64_t itotal; | 3895 | __uint64_t itotal; |
3886 | __uint64_t ifree; | 3896 | __uint64_t ifree; |
3887 | int error; | 3897 | int error; |
3888 | 3898 | ||
3889 | mp = log->l_mp; | 3899 | mp = log->l_mp; |
3890 | 3900 | ||
3891 | freeblks = 0LL; | 3901 | freeblks = 0LL; |
3892 | itotal = 0LL; | 3902 | itotal = 0LL; |
3893 | ifree = 0LL; | 3903 | ifree = 0LL; |
3894 | for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) { | 3904 | for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) { |
3895 | error = xfs_read_agf(mp, NULL, agno, 0, &agfbp); | 3905 | error = xfs_read_agf(mp, NULL, agno, 0, &agfbp); |
3896 | if (error) { | 3906 | if (error) { |
3897 | xfs_alert(mp, "%s agf read failed agno %d error %d", | 3907 | xfs_alert(mp, "%s agf read failed agno %d error %d", |
3898 | __func__, agno, error); | 3908 | __func__, agno, error); |
3899 | } else { | 3909 | } else { |
3900 | agfp = XFS_BUF_TO_AGF(agfbp); | 3910 | agfp = XFS_BUF_TO_AGF(agfbp); |
3901 | freeblks += be32_to_cpu(agfp->agf_freeblks) + | 3911 | freeblks += be32_to_cpu(agfp->agf_freeblks) + |
3902 | be32_to_cpu(agfp->agf_flcount); | 3912 | be32_to_cpu(agfp->agf_flcount); |
3903 | xfs_buf_relse(agfbp); | 3913 | xfs_buf_relse(agfbp); |
3904 | } | 3914 | } |
3905 | 3915 | ||
3906 | error = xfs_read_agi(mp, NULL, agno, &agibp); | 3916 | error = xfs_read_agi(mp, NULL, agno, &agibp); |
3907 | if (error) { | 3917 | if (error) { |
3908 | xfs_alert(mp, "%s agi read failed agno %d error %d", | 3918 | xfs_alert(mp, "%s agi read failed agno %d error %d", |
3909 | __func__, agno, error); | 3919 | __func__, agno, error); |
3910 | } else { | 3920 | } else { |
3911 | struct xfs_agi *agi = XFS_BUF_TO_AGI(agibp); | 3921 | struct xfs_agi *agi = XFS_BUF_TO_AGI(agibp); |
3912 | 3922 | ||
3913 | itotal += be32_to_cpu(agi->agi_count); | 3923 | itotal += be32_to_cpu(agi->agi_count); |
3914 | ifree += be32_to_cpu(agi->agi_freecount); | 3924 | ifree += be32_to_cpu(agi->agi_freecount); |
3915 | xfs_buf_relse(agibp); | 3925 | xfs_buf_relse(agibp); |
3916 | } | 3926 | } |
3917 | } | 3927 | } |
3918 | } | 3928 | } |
3919 | #endif /* DEBUG */ | 3929 | #endif /* DEBUG */ |
3920 | 3930 |
fs/xfs/xfs_qm.c
1 | /* | 1 | /* |
2 | * Copyright (c) 2000-2005 Silicon Graphics, Inc. | 2 | * Copyright (c) 2000-2005 Silicon Graphics, Inc. |
3 | * All Rights Reserved. | 3 | * All Rights Reserved. |
4 | * | 4 | * |
5 | * This program is free software; you can redistribute it and/or | 5 | * This program is free software; you can redistribute it and/or |
6 | * modify it under the terms of the GNU General Public License as | 6 | * modify it under the terms of the GNU General Public License as |
7 | * published by the Free Software Foundation. | 7 | * published by the Free Software Foundation. |
8 | * | 8 | * |
9 | * This program is distributed in the hope that it would be useful, | 9 | * This program is distributed in the hope that it would be useful, |
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
12 | * GNU General Public License for more details. | 12 | * GNU General Public License for more details. |
13 | * | 13 | * |
14 | * You should have received a copy of the GNU General Public License | 14 | * You should have received a copy of the GNU General Public License |
15 | * along with this program; if not, write the Free Software Foundation, | 15 | * along with this program; if not, write the Free Software Foundation, |
16 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | 16 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA |
17 | */ | 17 | */ |
18 | #include "xfs.h" | 18 | #include "xfs.h" |
19 | #include "xfs_fs.h" | 19 | #include "xfs_fs.h" |
20 | #include "xfs_bit.h" | 20 | #include "xfs_bit.h" |
21 | #include "xfs_log.h" | 21 | #include "xfs_log.h" |
22 | #include "xfs_trans.h" | 22 | #include "xfs_trans.h" |
23 | #include "xfs_sb.h" | 23 | #include "xfs_sb.h" |
24 | #include "xfs_ag.h" | 24 | #include "xfs_ag.h" |
25 | #include "xfs_alloc.h" | 25 | #include "xfs_alloc.h" |
26 | #include "xfs_quota.h" | 26 | #include "xfs_quota.h" |
27 | #include "xfs_mount.h" | 27 | #include "xfs_mount.h" |
28 | #include "xfs_bmap_btree.h" | 28 | #include "xfs_bmap_btree.h" |
29 | #include "xfs_ialloc_btree.h" | 29 | #include "xfs_ialloc_btree.h" |
30 | #include "xfs_dinode.h" | 30 | #include "xfs_dinode.h" |
31 | #include "xfs_inode.h" | 31 | #include "xfs_inode.h" |
32 | #include "xfs_ialloc.h" | 32 | #include "xfs_ialloc.h" |
33 | #include "xfs_itable.h" | 33 | #include "xfs_itable.h" |
34 | #include "xfs_rtalloc.h" | 34 | #include "xfs_rtalloc.h" |
35 | #include "xfs_error.h" | 35 | #include "xfs_error.h" |
36 | #include "xfs_bmap.h" | 36 | #include "xfs_bmap.h" |
37 | #include "xfs_attr.h" | 37 | #include "xfs_attr.h" |
38 | #include "xfs_buf_item.h" | 38 | #include "xfs_buf_item.h" |
39 | #include "xfs_trans_space.h" | 39 | #include "xfs_trans_space.h" |
40 | #include "xfs_utils.h" | 40 | #include "xfs_utils.h" |
41 | #include "xfs_qm.h" | 41 | #include "xfs_qm.h" |
42 | #include "xfs_trace.h" | 42 | #include "xfs_trace.h" |
43 | #include "xfs_icache.h" | 43 | #include "xfs_icache.h" |
44 | 44 | ||
45 | /* | 45 | /* |
46 | * The global quota manager. There is only one of these for the entire | 46 | * The global quota manager. There is only one of these for the entire |
47 | * system, _not_ one per file system. XQM keeps track of the overall | 47 | * system, _not_ one per file system. XQM keeps track of the overall |
48 | * quota functionality, including maintaining the freelist and hash | 48 | * quota functionality, including maintaining the freelist and hash |
49 | * tables of dquots. | 49 | * tables of dquots. |
50 | */ | 50 | */ |
51 | STATIC int xfs_qm_init_quotainos(xfs_mount_t *); | 51 | STATIC int xfs_qm_init_quotainos(xfs_mount_t *); |
52 | STATIC int xfs_qm_init_quotainfo(xfs_mount_t *); | 52 | STATIC int xfs_qm_init_quotainfo(xfs_mount_t *); |
53 | STATIC int xfs_qm_shake(struct shrinker *, struct shrink_control *); | 53 | STATIC int xfs_qm_shake(struct shrinker *, struct shrink_control *); |
54 | 54 | ||
55 | /* | 55 | /* |
56 | * We use the batch lookup interface to iterate over the dquots as it | 56 | * We use the batch lookup interface to iterate over the dquots as it |
57 | * currently is the only interface into the radix tree code that allows | 57 | * currently is the only interface into the radix tree code that allows |
58 | * fuzzy lookups instead of exact matches. Holding the lock over multiple | 58 | * fuzzy lookups instead of exact matches. Holding the lock over multiple |
59 | * operations is fine as all callers are used either during mount/umount | 59 | * operations is fine as all callers are used either during mount/umount |
60 | * or quotaoff. | 60 | * or quotaoff. |
61 | */ | 61 | */ |
62 | #define XFS_DQ_LOOKUP_BATCH 32 | 62 | #define XFS_DQ_LOOKUP_BATCH 32 |
63 | 63 | ||
64 | STATIC int | 64 | STATIC int |
65 | xfs_qm_dquot_walk( | 65 | xfs_qm_dquot_walk( |
66 | struct xfs_mount *mp, | 66 | struct xfs_mount *mp, |
67 | int type, | 67 | int type, |
68 | int (*execute)(struct xfs_dquot *dqp, void *data), | 68 | int (*execute)(struct xfs_dquot *dqp, void *data), |
69 | void *data) | 69 | void *data) |
70 | { | 70 | { |
71 | struct xfs_quotainfo *qi = mp->m_quotainfo; | 71 | struct xfs_quotainfo *qi = mp->m_quotainfo; |
72 | struct radix_tree_root *tree = XFS_DQUOT_TREE(qi, type); | 72 | struct radix_tree_root *tree = XFS_DQUOT_TREE(qi, type); |
73 | uint32_t next_index; | 73 | uint32_t next_index; |
74 | int last_error = 0; | 74 | int last_error = 0; |
75 | int skipped; | 75 | int skipped; |
76 | int nr_found; | 76 | int nr_found; |
77 | 77 | ||
78 | restart: | 78 | restart: |
79 | skipped = 0; | 79 | skipped = 0; |
80 | next_index = 0; | 80 | next_index = 0; |
81 | nr_found = 0; | 81 | nr_found = 0; |
82 | 82 | ||
83 | while (1) { | 83 | while (1) { |
84 | struct xfs_dquot *batch[XFS_DQ_LOOKUP_BATCH]; | 84 | struct xfs_dquot *batch[XFS_DQ_LOOKUP_BATCH]; |
85 | int error = 0; | 85 | int error = 0; |
86 | int i; | 86 | int i; |
87 | 87 | ||
88 | mutex_lock(&qi->qi_tree_lock); | 88 | mutex_lock(&qi->qi_tree_lock); |
89 | nr_found = radix_tree_gang_lookup(tree, (void **)batch, | 89 | nr_found = radix_tree_gang_lookup(tree, (void **)batch, |
90 | next_index, XFS_DQ_LOOKUP_BATCH); | 90 | next_index, XFS_DQ_LOOKUP_BATCH); |
91 | if (!nr_found) { | 91 | if (!nr_found) { |
92 | mutex_unlock(&qi->qi_tree_lock); | 92 | mutex_unlock(&qi->qi_tree_lock); |
93 | break; | 93 | break; |
94 | } | 94 | } |
95 | 95 | ||
96 | for (i = 0; i < nr_found; i++) { | 96 | for (i = 0; i < nr_found; i++) { |
97 | struct xfs_dquot *dqp = batch[i]; | 97 | struct xfs_dquot *dqp = batch[i]; |
98 | 98 | ||
99 | next_index = be32_to_cpu(dqp->q_core.d_id) + 1; | 99 | next_index = be32_to_cpu(dqp->q_core.d_id) + 1; |
100 | 100 | ||
101 | error = execute(batch[i], data); | 101 | error = execute(batch[i], data); |
102 | if (error == EAGAIN) { | 102 | if (error == EAGAIN) { |
103 | skipped++; | 103 | skipped++; |
104 | continue; | 104 | continue; |
105 | } | 105 | } |
106 | if (error && last_error != EFSCORRUPTED) | 106 | if (error && last_error != EFSCORRUPTED) |
107 | last_error = error; | 107 | last_error = error; |
108 | } | 108 | } |
109 | 109 | ||
110 | mutex_unlock(&qi->qi_tree_lock); | 110 | mutex_unlock(&qi->qi_tree_lock); |
111 | 111 | ||
112 | /* bail out if the filesystem is corrupted. */ | 112 | /* bail out if the filesystem is corrupted. */ |
113 | if (last_error == EFSCORRUPTED) { | 113 | if (last_error == EFSCORRUPTED) { |
114 | skipped = 0; | 114 | skipped = 0; |
115 | break; | 115 | break; |
116 | } | 116 | } |
117 | } | 117 | } |
118 | 118 | ||
119 | if (skipped) { | 119 | if (skipped) { |
120 | delay(1); | 120 | delay(1); |
121 | goto restart; | 121 | goto restart; |
122 | } | 122 | } |
123 | 123 | ||
124 | return last_error; | 124 | return last_error; |
125 | } | 125 | } |
126 | 126 | ||
127 | 127 | ||
128 | /* | 128 | /* |
129 | * Purge a dquot from all tracking data structures and free it. | 129 | * Purge a dquot from all tracking data structures and free it. |
130 | */ | 130 | */ |
131 | STATIC int | 131 | STATIC int |
132 | xfs_qm_dqpurge( | 132 | xfs_qm_dqpurge( |
133 | struct xfs_dquot *dqp, | 133 | struct xfs_dquot *dqp, |
134 | void *data) | 134 | void *data) |
135 | { | 135 | { |
136 | struct xfs_mount *mp = dqp->q_mount; | 136 | struct xfs_mount *mp = dqp->q_mount; |
137 | struct xfs_quotainfo *qi = mp->m_quotainfo; | 137 | struct xfs_quotainfo *qi = mp->m_quotainfo; |
138 | struct xfs_dquot *gdqp = NULL; | 138 | struct xfs_dquot *gdqp = NULL; |
139 | 139 | ||
140 | xfs_dqlock(dqp); | 140 | xfs_dqlock(dqp); |
141 | if ((dqp->dq_flags & XFS_DQ_FREEING) || dqp->q_nrefs != 0) { | 141 | if ((dqp->dq_flags & XFS_DQ_FREEING) || dqp->q_nrefs != 0) { |
142 | xfs_dqunlock(dqp); | 142 | xfs_dqunlock(dqp); |
143 | return EAGAIN; | 143 | return EAGAIN; |
144 | } | 144 | } |
145 | 145 | ||
146 | /* | 146 | /* |
147 | * If this quota has a group hint attached, prepare for releasing it | 147 | * If this quota has a group hint attached, prepare for releasing it |
148 | * now. | 148 | * now. |
149 | */ | 149 | */ |
150 | gdqp = dqp->q_gdquot; | 150 | gdqp = dqp->q_gdquot; |
151 | if (gdqp) { | 151 | if (gdqp) { |
152 | xfs_dqlock(gdqp); | 152 | xfs_dqlock(gdqp); |
153 | dqp->q_gdquot = NULL; | 153 | dqp->q_gdquot = NULL; |
154 | } | 154 | } |
155 | 155 | ||
156 | dqp->dq_flags |= XFS_DQ_FREEING; | 156 | dqp->dq_flags |= XFS_DQ_FREEING; |
157 | 157 | ||
158 | xfs_dqflock(dqp); | 158 | xfs_dqflock(dqp); |
159 | 159 | ||
160 | /* | 160 | /* |
161 | * If we are turning this type of quotas off, we don't care | 161 | * If we are turning this type of quotas off, we don't care |
162 | * about the dirty metadata sitting in this dquot. OTOH, if | 162 | * about the dirty metadata sitting in this dquot. OTOH, if |
163 | * we're unmounting, we do care, so we flush it and wait. | 163 | * we're unmounting, we do care, so we flush it and wait. |
164 | */ | 164 | */ |
165 | if (XFS_DQ_IS_DIRTY(dqp)) { | 165 | if (XFS_DQ_IS_DIRTY(dqp)) { |
166 | struct xfs_buf *bp = NULL; | 166 | struct xfs_buf *bp = NULL; |
167 | int error; | 167 | int error; |
168 | 168 | ||
169 | /* | 169 | /* |
170 | * We don't care about getting disk errors here. We need | 170 | * We don't care about getting disk errors here. We need |
171 | * to purge this dquot anyway, so we go ahead regardless. | 171 | * to purge this dquot anyway, so we go ahead regardless. |
172 | */ | 172 | */ |
173 | error = xfs_qm_dqflush(dqp, &bp); | 173 | error = xfs_qm_dqflush(dqp, &bp); |
174 | if (error) { | 174 | if (error) { |
175 | xfs_warn(mp, "%s: dquot %p flush failed", | 175 | xfs_warn(mp, "%s: dquot %p flush failed", |
176 | __func__, dqp); | 176 | __func__, dqp); |
177 | } else { | 177 | } else { |
178 | error = xfs_bwrite(bp); | 178 | error = xfs_bwrite(bp); |
179 | xfs_buf_relse(bp); | 179 | xfs_buf_relse(bp); |
180 | } | 180 | } |
181 | xfs_dqflock(dqp); | 181 | xfs_dqflock(dqp); |
182 | } | 182 | } |
183 | 183 | ||
184 | ASSERT(atomic_read(&dqp->q_pincount) == 0); | 184 | ASSERT(atomic_read(&dqp->q_pincount) == 0); |
185 | ASSERT(XFS_FORCED_SHUTDOWN(mp) || | 185 | ASSERT(XFS_FORCED_SHUTDOWN(mp) || |
186 | !(dqp->q_logitem.qli_item.li_flags & XFS_LI_IN_AIL)); | 186 | !(dqp->q_logitem.qli_item.li_flags & XFS_LI_IN_AIL)); |
187 | 187 | ||
188 | xfs_dqfunlock(dqp); | 188 | xfs_dqfunlock(dqp); |
189 | xfs_dqunlock(dqp); | 189 | xfs_dqunlock(dqp); |
190 | 190 | ||
191 | radix_tree_delete(XFS_DQUOT_TREE(qi, dqp->q_core.d_flags), | 191 | radix_tree_delete(XFS_DQUOT_TREE(qi, dqp->q_core.d_flags), |
192 | be32_to_cpu(dqp->q_core.d_id)); | 192 | be32_to_cpu(dqp->q_core.d_id)); |
193 | qi->qi_dquots--; | 193 | qi->qi_dquots--; |
194 | 194 | ||
195 | /* | 195 | /* |
196 | * We move dquots to the freelist as soon as their reference count | 196 | * We move dquots to the freelist as soon as their reference count |
197 | * hits zero, so it really should be on the freelist here. | 197 | * hits zero, so it really should be on the freelist here. |
198 | */ | 198 | */ |
199 | mutex_lock(&qi->qi_lru_lock); | 199 | mutex_lock(&qi->qi_lru_lock); |
200 | ASSERT(!list_empty(&dqp->q_lru)); | 200 | ASSERT(!list_empty(&dqp->q_lru)); |
201 | list_del_init(&dqp->q_lru); | 201 | list_del_init(&dqp->q_lru); |
202 | qi->qi_lru_count--; | 202 | qi->qi_lru_count--; |
203 | XFS_STATS_DEC(xs_qm_dquot_unused); | 203 | XFS_STATS_DEC(xs_qm_dquot_unused); |
204 | mutex_unlock(&qi->qi_lru_lock); | 204 | mutex_unlock(&qi->qi_lru_lock); |
205 | 205 | ||
206 | xfs_qm_dqdestroy(dqp); | 206 | xfs_qm_dqdestroy(dqp); |
207 | 207 | ||
208 | if (gdqp) | 208 | if (gdqp) |
209 | xfs_qm_dqput(gdqp); | 209 | xfs_qm_dqput(gdqp); |
210 | return 0; | 210 | return 0; |
211 | } | 211 | } |
212 | 212 | ||
213 | /* | 213 | /* |
214 | * Purge the dquot cache. | 214 | * Purge the dquot cache. |
215 | */ | 215 | */ |
216 | void | 216 | void |
217 | xfs_qm_dqpurge_all( | 217 | xfs_qm_dqpurge_all( |
218 | struct xfs_mount *mp, | 218 | struct xfs_mount *mp, |
219 | uint flags) | 219 | uint flags) |
220 | { | 220 | { |
221 | if (flags & XFS_QMOPT_UQUOTA) | 221 | if (flags & XFS_QMOPT_UQUOTA) |
222 | xfs_qm_dquot_walk(mp, XFS_DQ_USER, xfs_qm_dqpurge, NULL); | 222 | xfs_qm_dquot_walk(mp, XFS_DQ_USER, xfs_qm_dqpurge, NULL); |
223 | if (flags & XFS_QMOPT_GQUOTA) | 223 | if (flags & XFS_QMOPT_GQUOTA) |
224 | xfs_qm_dquot_walk(mp, XFS_DQ_GROUP, xfs_qm_dqpurge, NULL); | 224 | xfs_qm_dquot_walk(mp, XFS_DQ_GROUP, xfs_qm_dqpurge, NULL); |
225 | if (flags & XFS_QMOPT_PQUOTA) | 225 | if (flags & XFS_QMOPT_PQUOTA) |
226 | xfs_qm_dquot_walk(mp, XFS_DQ_PROJ, xfs_qm_dqpurge, NULL); | 226 | xfs_qm_dquot_walk(mp, XFS_DQ_PROJ, xfs_qm_dqpurge, NULL); |
227 | } | 227 | } |
228 | 228 | ||
229 | /* | 229 | /* |
230 | * Just destroy the quotainfo structure. | 230 | * Just destroy the quotainfo structure. |
231 | */ | 231 | */ |
232 | void | 232 | void |
233 | xfs_qm_unmount( | 233 | xfs_qm_unmount( |
234 | struct xfs_mount *mp) | 234 | struct xfs_mount *mp) |
235 | { | 235 | { |
236 | if (mp->m_quotainfo) { | 236 | if (mp->m_quotainfo) { |
237 | xfs_qm_dqpurge_all(mp, XFS_QMOPT_QUOTALL); | 237 | xfs_qm_dqpurge_all(mp, XFS_QMOPT_QUOTALL); |
238 | xfs_qm_destroy_quotainfo(mp); | 238 | xfs_qm_destroy_quotainfo(mp); |
239 | } | 239 | } |
240 | } | 240 | } |
241 | 241 | ||
242 | 242 | ||
243 | /* | 243 | /* |
244 | * This is called from xfs_mountfs to start quotas and initialize all | 244 | * This is called from xfs_mountfs to start quotas and initialize all |
245 | * necessary data structures like quotainfo. This is also responsible for | 245 | * necessary data structures like quotainfo. This is also responsible for |
246 | * running a quotacheck as necessary. We are guaranteed that the superblock | 246 | * running a quotacheck as necessary. We are guaranteed that the superblock |
247 | * is consistently read in at this point. | 247 | * is consistently read in at this point. |
248 | * | 248 | * |
249 | * If we fail here, the mount will continue with quota turned off. We don't | 249 | * If we fail here, the mount will continue with quota turned off. We don't |
250 | * need to inidicate success or failure at all. | 250 | * need to inidicate success or failure at all. |
251 | */ | 251 | */ |
252 | void | 252 | void |
253 | xfs_qm_mount_quotas( | 253 | xfs_qm_mount_quotas( |
254 | xfs_mount_t *mp) | 254 | xfs_mount_t *mp) |
255 | { | 255 | { |
256 | int error = 0; | 256 | int error = 0; |
257 | uint sbf; | 257 | uint sbf; |
258 | 258 | ||
259 | /* | 259 | /* |
260 | * If quotas on realtime volumes is not supported, we disable | 260 | * If quotas on realtime volumes is not supported, we disable |
261 | * quotas immediately. | 261 | * quotas immediately. |
262 | */ | 262 | */ |
263 | if (mp->m_sb.sb_rextents) { | 263 | if (mp->m_sb.sb_rextents) { |
264 | xfs_notice(mp, "Cannot turn on quotas for realtime filesystem"); | 264 | xfs_notice(mp, "Cannot turn on quotas for realtime filesystem"); |
265 | mp->m_qflags = 0; | 265 | mp->m_qflags = 0; |
266 | goto write_changes; | 266 | goto write_changes; |
267 | } | 267 | } |
268 | 268 | ||
269 | ASSERT(XFS_IS_QUOTA_RUNNING(mp)); | 269 | ASSERT(XFS_IS_QUOTA_RUNNING(mp)); |
270 | 270 | ||
271 | /* | 271 | /* |
272 | * Allocate the quotainfo structure inside the mount struct, and | 272 | * Allocate the quotainfo structure inside the mount struct, and |
273 | * create quotainode(s), and change/rev superblock if necessary. | 273 | * create quotainode(s), and change/rev superblock if necessary. |
274 | */ | 274 | */ |
275 | error = xfs_qm_init_quotainfo(mp); | 275 | error = xfs_qm_init_quotainfo(mp); |
276 | if (error) { | 276 | if (error) { |
277 | /* | 277 | /* |
278 | * We must turn off quotas. | 278 | * We must turn off quotas. |
279 | */ | 279 | */ |
280 | ASSERT(mp->m_quotainfo == NULL); | 280 | ASSERT(mp->m_quotainfo == NULL); |
281 | mp->m_qflags = 0; | 281 | mp->m_qflags = 0; |
282 | goto write_changes; | 282 | goto write_changes; |
283 | } | 283 | } |
284 | /* | 284 | /* |
285 | * If any of the quotas are not consistent, do a quotacheck. | 285 | * If any of the quotas are not consistent, do a quotacheck. |
286 | */ | 286 | */ |
287 | if (XFS_QM_NEED_QUOTACHECK(mp)) { | 287 | if (XFS_QM_NEED_QUOTACHECK(mp)) { |
288 | error = xfs_qm_quotacheck(mp); | 288 | error = xfs_qm_quotacheck(mp); |
289 | if (error) { | 289 | if (error) { |
290 | /* Quotacheck failed and disabled quotas. */ | 290 | /* Quotacheck failed and disabled quotas. */ |
291 | return; | 291 | return; |
292 | } | 292 | } |
293 | } | 293 | } |
294 | /* | 294 | /* |
295 | * If one type of quotas is off, then it will lose its | 295 | * If one type of quotas is off, then it will lose its |
296 | * quotachecked status, since we won't be doing accounting for | 296 | * quotachecked status, since we won't be doing accounting for |
297 | * that type anymore. | 297 | * that type anymore. |
298 | */ | 298 | */ |
299 | if (!XFS_IS_UQUOTA_ON(mp)) | 299 | if (!XFS_IS_UQUOTA_ON(mp)) |
300 | mp->m_qflags &= ~XFS_UQUOTA_CHKD; | 300 | mp->m_qflags &= ~XFS_UQUOTA_CHKD; |
301 | if (!(XFS_IS_GQUOTA_ON(mp) || XFS_IS_PQUOTA_ON(mp))) | 301 | if (!(XFS_IS_GQUOTA_ON(mp) || XFS_IS_PQUOTA_ON(mp))) |
302 | mp->m_qflags &= ~XFS_OQUOTA_CHKD; | 302 | mp->m_qflags &= ~XFS_OQUOTA_CHKD; |
303 | 303 | ||
304 | write_changes: | 304 | write_changes: |
305 | /* | 305 | /* |
306 | * We actually don't have to acquire the m_sb_lock at all. | 306 | * We actually don't have to acquire the m_sb_lock at all. |
307 | * This can only be called from mount, and that's single threaded. XXX | 307 | * This can only be called from mount, and that's single threaded. XXX |
308 | */ | 308 | */ |
309 | spin_lock(&mp->m_sb_lock); | 309 | spin_lock(&mp->m_sb_lock); |
310 | sbf = mp->m_sb.sb_qflags; | 310 | sbf = mp->m_sb.sb_qflags; |
311 | mp->m_sb.sb_qflags = mp->m_qflags & XFS_MOUNT_QUOTA_ALL; | 311 | mp->m_sb.sb_qflags = mp->m_qflags & XFS_MOUNT_QUOTA_ALL; |
312 | spin_unlock(&mp->m_sb_lock); | 312 | spin_unlock(&mp->m_sb_lock); |
313 | 313 | ||
314 | if (sbf != (mp->m_qflags & XFS_MOUNT_QUOTA_ALL)) { | 314 | if (sbf != (mp->m_qflags & XFS_MOUNT_QUOTA_ALL)) { |
315 | if (xfs_qm_write_sb_changes(mp, XFS_SB_QFLAGS)) { | 315 | if (xfs_qm_write_sb_changes(mp, XFS_SB_QFLAGS)) { |
316 | /* | 316 | /* |
317 | * We could only have been turning quotas off. | 317 | * We could only have been turning quotas off. |
318 | * We aren't in very good shape actually because | 318 | * We aren't in very good shape actually because |
319 | * the incore structures are convinced that quotas are | 319 | * the incore structures are convinced that quotas are |
320 | * off, but the on disk superblock doesn't know that ! | 320 | * off, but the on disk superblock doesn't know that ! |
321 | */ | 321 | */ |
322 | ASSERT(!(XFS_IS_QUOTA_RUNNING(mp))); | 322 | ASSERT(!(XFS_IS_QUOTA_RUNNING(mp))); |
323 | xfs_alert(mp, "%s: Superblock update failed!", | 323 | xfs_alert(mp, "%s: Superblock update failed!", |
324 | __func__); | 324 | __func__); |
325 | } | 325 | } |
326 | } | 326 | } |
327 | 327 | ||
328 | if (error) { | 328 | if (error) { |
329 | xfs_warn(mp, "Failed to initialize disk quotas."); | 329 | xfs_warn(mp, "Failed to initialize disk quotas."); |
330 | return; | 330 | return; |
331 | } | 331 | } |
332 | } | 332 | } |
333 | 333 | ||
334 | /* | 334 | /* |
335 | * Called from the vfsops layer. | 335 | * Called from the vfsops layer. |
336 | */ | 336 | */ |
337 | void | 337 | void |
338 | xfs_qm_unmount_quotas( | 338 | xfs_qm_unmount_quotas( |
339 | xfs_mount_t *mp) | 339 | xfs_mount_t *mp) |
340 | { | 340 | { |
341 | /* | 341 | /* |
342 | * Release the dquots that root inode, et al might be holding, | 342 | * Release the dquots that root inode, et al might be holding, |
343 | * before we flush quotas and blow away the quotainfo structure. | 343 | * before we flush quotas and blow away the quotainfo structure. |
344 | */ | 344 | */ |
345 | ASSERT(mp->m_rootip); | 345 | ASSERT(mp->m_rootip); |
346 | xfs_qm_dqdetach(mp->m_rootip); | 346 | xfs_qm_dqdetach(mp->m_rootip); |
347 | if (mp->m_rbmip) | 347 | if (mp->m_rbmip) |
348 | xfs_qm_dqdetach(mp->m_rbmip); | 348 | xfs_qm_dqdetach(mp->m_rbmip); |
349 | if (mp->m_rsumip) | 349 | if (mp->m_rsumip) |
350 | xfs_qm_dqdetach(mp->m_rsumip); | 350 | xfs_qm_dqdetach(mp->m_rsumip); |
351 | 351 | ||
352 | /* | 352 | /* |
353 | * Release the quota inodes. | 353 | * Release the quota inodes. |
354 | */ | 354 | */ |
355 | if (mp->m_quotainfo) { | 355 | if (mp->m_quotainfo) { |
356 | if (mp->m_quotainfo->qi_uquotaip) { | 356 | if (mp->m_quotainfo->qi_uquotaip) { |
357 | IRELE(mp->m_quotainfo->qi_uquotaip); | 357 | IRELE(mp->m_quotainfo->qi_uquotaip); |
358 | mp->m_quotainfo->qi_uquotaip = NULL; | 358 | mp->m_quotainfo->qi_uquotaip = NULL; |
359 | } | 359 | } |
360 | if (mp->m_quotainfo->qi_gquotaip) { | 360 | if (mp->m_quotainfo->qi_gquotaip) { |
361 | IRELE(mp->m_quotainfo->qi_gquotaip); | 361 | IRELE(mp->m_quotainfo->qi_gquotaip); |
362 | mp->m_quotainfo->qi_gquotaip = NULL; | 362 | mp->m_quotainfo->qi_gquotaip = NULL; |
363 | } | 363 | } |
364 | } | 364 | } |
365 | } | 365 | } |
366 | 366 | ||
367 | STATIC int | 367 | STATIC int |
368 | xfs_qm_dqattach_one( | 368 | xfs_qm_dqattach_one( |
369 | xfs_inode_t *ip, | 369 | xfs_inode_t *ip, |
370 | xfs_dqid_t id, | 370 | xfs_dqid_t id, |
371 | uint type, | 371 | uint type, |
372 | uint doalloc, | 372 | uint doalloc, |
373 | xfs_dquot_t *udqhint, /* hint */ | 373 | xfs_dquot_t *udqhint, /* hint */ |
374 | xfs_dquot_t **IO_idqpp) | 374 | xfs_dquot_t **IO_idqpp) |
375 | { | 375 | { |
376 | xfs_dquot_t *dqp; | 376 | xfs_dquot_t *dqp; |
377 | int error; | 377 | int error; |
378 | 378 | ||
379 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); | 379 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); |
380 | error = 0; | 380 | error = 0; |
381 | 381 | ||
382 | /* | 382 | /* |
383 | * See if we already have it in the inode itself. IO_idqpp is | 383 | * See if we already have it in the inode itself. IO_idqpp is |
384 | * &i_udquot or &i_gdquot. This made the code look weird, but | 384 | * &i_udquot or &i_gdquot. This made the code look weird, but |
385 | * made the logic a lot simpler. | 385 | * made the logic a lot simpler. |
386 | */ | 386 | */ |
387 | dqp = *IO_idqpp; | 387 | dqp = *IO_idqpp; |
388 | if (dqp) { | 388 | if (dqp) { |
389 | trace_xfs_dqattach_found(dqp); | 389 | trace_xfs_dqattach_found(dqp); |
390 | return 0; | 390 | return 0; |
391 | } | 391 | } |
392 | 392 | ||
393 | /* | 393 | /* |
394 | * udqhint is the i_udquot field in inode, and is non-NULL only | 394 | * udqhint is the i_udquot field in inode, and is non-NULL only |
395 | * when the type arg is group/project. Its purpose is to save a | 395 | * when the type arg is group/project. Its purpose is to save a |
396 | * lookup by dqid (xfs_qm_dqget) by caching a group dquot inside | 396 | * lookup by dqid (xfs_qm_dqget) by caching a group dquot inside |
397 | * the user dquot. | 397 | * the user dquot. |
398 | */ | 398 | */ |
399 | if (udqhint) { | 399 | if (udqhint) { |
400 | ASSERT(type == XFS_DQ_GROUP || type == XFS_DQ_PROJ); | 400 | ASSERT(type == XFS_DQ_GROUP || type == XFS_DQ_PROJ); |
401 | xfs_dqlock(udqhint); | 401 | xfs_dqlock(udqhint); |
402 | 402 | ||
403 | /* | 403 | /* |
404 | * No need to take dqlock to look at the id. | 404 | * No need to take dqlock to look at the id. |
405 | * | 405 | * |
406 | * The ID can't change until it gets reclaimed, and it won't | 406 | * The ID can't change until it gets reclaimed, and it won't |
407 | * be reclaimed as long as we have a ref from inode and we | 407 | * be reclaimed as long as we have a ref from inode and we |
408 | * hold the ilock. | 408 | * hold the ilock. |
409 | */ | 409 | */ |
410 | dqp = udqhint->q_gdquot; | 410 | dqp = udqhint->q_gdquot; |
411 | if (dqp && be32_to_cpu(dqp->q_core.d_id) == id) { | 411 | if (dqp && be32_to_cpu(dqp->q_core.d_id) == id) { |
412 | ASSERT(*IO_idqpp == NULL); | 412 | ASSERT(*IO_idqpp == NULL); |
413 | 413 | ||
414 | *IO_idqpp = xfs_qm_dqhold(dqp); | 414 | *IO_idqpp = xfs_qm_dqhold(dqp); |
415 | xfs_dqunlock(udqhint); | 415 | xfs_dqunlock(udqhint); |
416 | return 0; | 416 | return 0; |
417 | } | 417 | } |
418 | 418 | ||
419 | /* | 419 | /* |
420 | * We can't hold a dquot lock when we call the dqget code. | 420 | * We can't hold a dquot lock when we call the dqget code. |
421 | * We'll deadlock in no time, because of (not conforming to) | 421 | * We'll deadlock in no time, because of (not conforming to) |
422 | * lock ordering - the inodelock comes before any dquot lock, | 422 | * lock ordering - the inodelock comes before any dquot lock, |
423 | * and we may drop and reacquire the ilock in xfs_qm_dqget(). | 423 | * and we may drop and reacquire the ilock in xfs_qm_dqget(). |
424 | */ | 424 | */ |
425 | xfs_dqunlock(udqhint); | 425 | xfs_dqunlock(udqhint); |
426 | } | 426 | } |
427 | 427 | ||
428 | /* | 428 | /* |
429 | * Find the dquot from somewhere. This bumps the | 429 | * Find the dquot from somewhere. This bumps the |
430 | * reference count of dquot and returns it locked. | 430 | * reference count of dquot and returns it locked. |
431 | * This can return ENOENT if dquot didn't exist on | 431 | * This can return ENOENT if dquot didn't exist on |
432 | * disk and we didn't ask it to allocate; | 432 | * disk and we didn't ask it to allocate; |
433 | * ESRCH if quotas got turned off suddenly. | 433 | * ESRCH if quotas got turned off suddenly. |
434 | */ | 434 | */ |
435 | error = xfs_qm_dqget(ip->i_mount, ip, id, type, | 435 | error = xfs_qm_dqget(ip->i_mount, ip, id, type, |
436 | doalloc | XFS_QMOPT_DOWARN, &dqp); | 436 | doalloc | XFS_QMOPT_DOWARN, &dqp); |
437 | if (error) | 437 | if (error) |
438 | return error; | 438 | return error; |
439 | 439 | ||
440 | trace_xfs_dqattach_get(dqp); | 440 | trace_xfs_dqattach_get(dqp); |
441 | 441 | ||
442 | /* | 442 | /* |
443 | * dqget may have dropped and re-acquired the ilock, but it guarantees | 443 | * dqget may have dropped and re-acquired the ilock, but it guarantees |
444 | * that the dquot returned is the one that should go in the inode. | 444 | * that the dquot returned is the one that should go in the inode. |
445 | */ | 445 | */ |
446 | *IO_idqpp = dqp; | 446 | *IO_idqpp = dqp; |
447 | xfs_dqunlock(dqp); | 447 | xfs_dqunlock(dqp); |
448 | return 0; | 448 | return 0; |
449 | } | 449 | } |
450 | 450 | ||
451 | 451 | ||
452 | /* | 452 | /* |
453 | * Given a udquot and gdquot, attach a ptr to the group dquot in the | 453 | * Given a udquot and gdquot, attach a ptr to the group dquot in the |
454 | * udquot as a hint for future lookups. | 454 | * udquot as a hint for future lookups. |
455 | */ | 455 | */ |
456 | STATIC void | 456 | STATIC void |
457 | xfs_qm_dqattach_grouphint( | 457 | xfs_qm_dqattach_grouphint( |
458 | xfs_dquot_t *udq, | 458 | xfs_dquot_t *udq, |
459 | xfs_dquot_t *gdq) | 459 | xfs_dquot_t *gdq) |
460 | { | 460 | { |
461 | xfs_dquot_t *tmp; | 461 | xfs_dquot_t *tmp; |
462 | 462 | ||
463 | xfs_dqlock(udq); | 463 | xfs_dqlock(udq); |
464 | 464 | ||
465 | tmp = udq->q_gdquot; | 465 | tmp = udq->q_gdquot; |
466 | if (tmp) { | 466 | if (tmp) { |
467 | if (tmp == gdq) | 467 | if (tmp == gdq) |
468 | goto done; | 468 | goto done; |
469 | 469 | ||
470 | udq->q_gdquot = NULL; | 470 | udq->q_gdquot = NULL; |
471 | xfs_qm_dqrele(tmp); | 471 | xfs_qm_dqrele(tmp); |
472 | } | 472 | } |
473 | 473 | ||
474 | udq->q_gdquot = xfs_qm_dqhold(gdq); | 474 | udq->q_gdquot = xfs_qm_dqhold(gdq); |
475 | done: | 475 | done: |
476 | xfs_dqunlock(udq); | 476 | xfs_dqunlock(udq); |
477 | } | 477 | } |
478 | 478 | ||
479 | static bool | 479 | static bool |
480 | xfs_qm_need_dqattach( | 480 | xfs_qm_need_dqattach( |
481 | struct xfs_inode *ip) | 481 | struct xfs_inode *ip) |
482 | { | 482 | { |
483 | struct xfs_mount *mp = ip->i_mount; | 483 | struct xfs_mount *mp = ip->i_mount; |
484 | 484 | ||
485 | if (!XFS_IS_QUOTA_RUNNING(mp)) | 485 | if (!XFS_IS_QUOTA_RUNNING(mp)) |
486 | return false; | 486 | return false; |
487 | if (!XFS_IS_QUOTA_ON(mp)) | 487 | if (!XFS_IS_QUOTA_ON(mp)) |
488 | return false; | 488 | return false; |
489 | if (!XFS_NOT_DQATTACHED(mp, ip)) | 489 | if (!XFS_NOT_DQATTACHED(mp, ip)) |
490 | return false; | 490 | return false; |
491 | if (ip->i_ino == mp->m_sb.sb_uquotino || | 491 | if (ip->i_ino == mp->m_sb.sb_uquotino || |
492 | ip->i_ino == mp->m_sb.sb_gquotino) | 492 | ip->i_ino == mp->m_sb.sb_gquotino) |
493 | return false; | 493 | return false; |
494 | return true; | 494 | return true; |
495 | } | 495 | } |
496 | 496 | ||
497 | /* | 497 | /* |
498 | * Given a locked inode, attach dquot(s) to it, taking U/G/P-QUOTAON | 498 | * Given a locked inode, attach dquot(s) to it, taking U/G/P-QUOTAON |
499 | * into account. | 499 | * into account. |
500 | * If XFS_QMOPT_DQALLOC, the dquot(s) will be allocated if needed. | 500 | * If XFS_QMOPT_DQALLOC, the dquot(s) will be allocated if needed. |
501 | * Inode may get unlocked and relocked in here, and the caller must deal with | 501 | * Inode may get unlocked and relocked in here, and the caller must deal with |
502 | * the consequences. | 502 | * the consequences. |
503 | */ | 503 | */ |
504 | int | 504 | int |
505 | xfs_qm_dqattach_locked( | 505 | xfs_qm_dqattach_locked( |
506 | xfs_inode_t *ip, | 506 | xfs_inode_t *ip, |
507 | uint flags) | 507 | uint flags) |
508 | { | 508 | { |
509 | xfs_mount_t *mp = ip->i_mount; | 509 | xfs_mount_t *mp = ip->i_mount; |
510 | uint nquotas = 0; | 510 | uint nquotas = 0; |
511 | int error = 0; | 511 | int error = 0; |
512 | 512 | ||
513 | if (!xfs_qm_need_dqattach(ip)) | 513 | if (!xfs_qm_need_dqattach(ip)) |
514 | return 0; | 514 | return 0; |
515 | 515 | ||
516 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); | 516 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); |
517 | 517 | ||
518 | if (XFS_IS_UQUOTA_ON(mp)) { | 518 | if (XFS_IS_UQUOTA_ON(mp)) { |
519 | error = xfs_qm_dqattach_one(ip, ip->i_d.di_uid, XFS_DQ_USER, | 519 | error = xfs_qm_dqattach_one(ip, ip->i_d.di_uid, XFS_DQ_USER, |
520 | flags & XFS_QMOPT_DQALLOC, | 520 | flags & XFS_QMOPT_DQALLOC, |
521 | NULL, &ip->i_udquot); | 521 | NULL, &ip->i_udquot); |
522 | if (error) | 522 | if (error) |
523 | goto done; | 523 | goto done; |
524 | nquotas++; | 524 | nquotas++; |
525 | } | 525 | } |
526 | 526 | ||
527 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); | 527 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); |
528 | if (XFS_IS_OQUOTA_ON(mp)) { | 528 | if (XFS_IS_OQUOTA_ON(mp)) { |
529 | error = XFS_IS_GQUOTA_ON(mp) ? | 529 | error = XFS_IS_GQUOTA_ON(mp) ? |
530 | xfs_qm_dqattach_one(ip, ip->i_d.di_gid, XFS_DQ_GROUP, | 530 | xfs_qm_dqattach_one(ip, ip->i_d.di_gid, XFS_DQ_GROUP, |
531 | flags & XFS_QMOPT_DQALLOC, | 531 | flags & XFS_QMOPT_DQALLOC, |
532 | ip->i_udquot, &ip->i_gdquot) : | 532 | ip->i_udquot, &ip->i_gdquot) : |
533 | xfs_qm_dqattach_one(ip, xfs_get_projid(ip), XFS_DQ_PROJ, | 533 | xfs_qm_dqattach_one(ip, xfs_get_projid(ip), XFS_DQ_PROJ, |
534 | flags & XFS_QMOPT_DQALLOC, | 534 | flags & XFS_QMOPT_DQALLOC, |
535 | ip->i_udquot, &ip->i_gdquot); | 535 | ip->i_udquot, &ip->i_gdquot); |
536 | /* | 536 | /* |
537 | * Don't worry about the udquot that we may have | 537 | * Don't worry about the udquot that we may have |
538 | * attached above. It'll get detached, if not already. | 538 | * attached above. It'll get detached, if not already. |
539 | */ | 539 | */ |
540 | if (error) | 540 | if (error) |
541 | goto done; | 541 | goto done; |
542 | nquotas++; | 542 | nquotas++; |
543 | } | 543 | } |
544 | 544 | ||
545 | /* | 545 | /* |
546 | * Attach this group quota to the user quota as a hint. | 546 | * Attach this group quota to the user quota as a hint. |
547 | * This WON'T, in general, result in a thrash. | 547 | * This WON'T, in general, result in a thrash. |
548 | */ | 548 | */ |
549 | if (nquotas == 2) { | 549 | if (nquotas == 2) { |
550 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); | 550 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); |
551 | ASSERT(ip->i_udquot); | 551 | ASSERT(ip->i_udquot); |
552 | ASSERT(ip->i_gdquot); | 552 | ASSERT(ip->i_gdquot); |
553 | 553 | ||
554 | /* | 554 | /* |
555 | * We do not have i_udquot locked at this point, but this check | 555 | * We do not have i_udquot locked at this point, but this check |
556 | * is OK since we don't depend on the i_gdquot to be accurate | 556 | * is OK since we don't depend on the i_gdquot to be accurate |
557 | * 100% all the time. It is just a hint, and this will | 557 | * 100% all the time. It is just a hint, and this will |
558 | * succeed in general. | 558 | * succeed in general. |
559 | */ | 559 | */ |
560 | if (ip->i_udquot->q_gdquot != ip->i_gdquot) | 560 | if (ip->i_udquot->q_gdquot != ip->i_gdquot) |
561 | xfs_qm_dqattach_grouphint(ip->i_udquot, ip->i_gdquot); | 561 | xfs_qm_dqattach_grouphint(ip->i_udquot, ip->i_gdquot); |
562 | } | 562 | } |
563 | 563 | ||
564 | done: | 564 | done: |
565 | #ifdef DEBUG | 565 | #ifdef DEBUG |
566 | if (!error) { | 566 | if (!error) { |
567 | if (XFS_IS_UQUOTA_ON(mp)) | 567 | if (XFS_IS_UQUOTA_ON(mp)) |
568 | ASSERT(ip->i_udquot); | 568 | ASSERT(ip->i_udquot); |
569 | if (XFS_IS_OQUOTA_ON(mp)) | 569 | if (XFS_IS_OQUOTA_ON(mp)) |
570 | ASSERT(ip->i_gdquot); | 570 | ASSERT(ip->i_gdquot); |
571 | } | 571 | } |
572 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); | 572 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); |
573 | #endif | 573 | #endif |
574 | return error; | 574 | return error; |
575 | } | 575 | } |
576 | 576 | ||
577 | int | 577 | int |
578 | xfs_qm_dqattach( | 578 | xfs_qm_dqattach( |
579 | struct xfs_inode *ip, | 579 | struct xfs_inode *ip, |
580 | uint flags) | 580 | uint flags) |
581 | { | 581 | { |
582 | int error; | 582 | int error; |
583 | 583 | ||
584 | if (!xfs_qm_need_dqattach(ip)) | 584 | if (!xfs_qm_need_dqattach(ip)) |
585 | return 0; | 585 | return 0; |
586 | 586 | ||
587 | xfs_ilock(ip, XFS_ILOCK_EXCL); | 587 | xfs_ilock(ip, XFS_ILOCK_EXCL); |
588 | error = xfs_qm_dqattach_locked(ip, flags); | 588 | error = xfs_qm_dqattach_locked(ip, flags); |
589 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | 589 | xfs_iunlock(ip, XFS_ILOCK_EXCL); |
590 | 590 | ||
591 | return error; | 591 | return error; |
592 | } | 592 | } |
593 | 593 | ||
594 | /* | 594 | /* |
595 | * Release dquots (and their references) if any. | 595 | * Release dquots (and their references) if any. |
596 | * The inode should be locked EXCL except when this's called by | 596 | * The inode should be locked EXCL except when this's called by |
597 | * xfs_ireclaim. | 597 | * xfs_ireclaim. |
598 | */ | 598 | */ |
599 | void | 599 | void |
600 | xfs_qm_dqdetach( | 600 | xfs_qm_dqdetach( |
601 | xfs_inode_t *ip) | 601 | xfs_inode_t *ip) |
602 | { | 602 | { |
603 | if (!(ip->i_udquot || ip->i_gdquot)) | 603 | if (!(ip->i_udquot || ip->i_gdquot)) |
604 | return; | 604 | return; |
605 | 605 | ||
606 | trace_xfs_dquot_dqdetach(ip); | 606 | trace_xfs_dquot_dqdetach(ip); |
607 | 607 | ||
608 | ASSERT(ip->i_ino != ip->i_mount->m_sb.sb_uquotino); | 608 | ASSERT(ip->i_ino != ip->i_mount->m_sb.sb_uquotino); |
609 | ASSERT(ip->i_ino != ip->i_mount->m_sb.sb_gquotino); | 609 | ASSERT(ip->i_ino != ip->i_mount->m_sb.sb_gquotino); |
610 | if (ip->i_udquot) { | 610 | if (ip->i_udquot) { |
611 | xfs_qm_dqrele(ip->i_udquot); | 611 | xfs_qm_dqrele(ip->i_udquot); |
612 | ip->i_udquot = NULL; | 612 | ip->i_udquot = NULL; |
613 | } | 613 | } |
614 | if (ip->i_gdquot) { | 614 | if (ip->i_gdquot) { |
615 | xfs_qm_dqrele(ip->i_gdquot); | 615 | xfs_qm_dqrele(ip->i_gdquot); |
616 | ip->i_gdquot = NULL; | 616 | ip->i_gdquot = NULL; |
617 | } | 617 | } |
618 | } | 618 | } |
619 | 619 | ||
620 | int | ||
621 | xfs_qm_calc_dquots_per_chunk( | ||
622 | struct xfs_mount *mp, | ||
623 | unsigned int nbblks) /* basic block units */ | ||
624 | { | ||
625 | unsigned int ndquots; | ||
626 | |||
627 | ASSERT(nbblks > 0); | ||
628 | ndquots = BBTOB(nbblks); | ||
629 | do_div(ndquots, sizeof(xfs_dqblk_t)); | ||
630 | |||
631 | return ndquots; | ||
632 | } | ||
633 | |||
620 | /* | 634 | /* |
621 | * This initializes all the quota information that's kept in the | 635 | * This initializes all the quota information that's kept in the |
622 | * mount structure | 636 | * mount structure |
623 | */ | 637 | */ |
624 | STATIC int | 638 | STATIC int |
625 | xfs_qm_init_quotainfo( | 639 | xfs_qm_init_quotainfo( |
626 | xfs_mount_t *mp) | 640 | xfs_mount_t *mp) |
627 | { | 641 | { |
628 | xfs_quotainfo_t *qinf; | 642 | xfs_quotainfo_t *qinf; |
629 | int error; | 643 | int error; |
630 | xfs_dquot_t *dqp; | 644 | xfs_dquot_t *dqp; |
631 | 645 | ||
632 | ASSERT(XFS_IS_QUOTA_RUNNING(mp)); | 646 | ASSERT(XFS_IS_QUOTA_RUNNING(mp)); |
633 | 647 | ||
634 | qinf = mp->m_quotainfo = kmem_zalloc(sizeof(xfs_quotainfo_t), KM_SLEEP); | 648 | qinf = mp->m_quotainfo = kmem_zalloc(sizeof(xfs_quotainfo_t), KM_SLEEP); |
635 | 649 | ||
636 | /* | 650 | /* |
637 | * See if quotainodes are setup, and if not, allocate them, | 651 | * See if quotainodes are setup, and if not, allocate them, |
638 | * and change the superblock accordingly. | 652 | * and change the superblock accordingly. |
639 | */ | 653 | */ |
640 | if ((error = xfs_qm_init_quotainos(mp))) { | 654 | if ((error = xfs_qm_init_quotainos(mp))) { |
641 | kmem_free(qinf); | 655 | kmem_free(qinf); |
642 | mp->m_quotainfo = NULL; | 656 | mp->m_quotainfo = NULL; |
643 | return error; | 657 | return error; |
644 | } | 658 | } |
645 | 659 | ||
646 | INIT_RADIX_TREE(&qinf->qi_uquota_tree, GFP_NOFS); | 660 | INIT_RADIX_TREE(&qinf->qi_uquota_tree, GFP_NOFS); |
647 | INIT_RADIX_TREE(&qinf->qi_gquota_tree, GFP_NOFS); | 661 | INIT_RADIX_TREE(&qinf->qi_gquota_tree, GFP_NOFS); |
648 | mutex_init(&qinf->qi_tree_lock); | 662 | mutex_init(&qinf->qi_tree_lock); |
649 | 663 | ||
650 | INIT_LIST_HEAD(&qinf->qi_lru_list); | 664 | INIT_LIST_HEAD(&qinf->qi_lru_list); |
651 | qinf->qi_lru_count = 0; | 665 | qinf->qi_lru_count = 0; |
652 | mutex_init(&qinf->qi_lru_lock); | 666 | mutex_init(&qinf->qi_lru_lock); |
653 | 667 | ||
654 | /* mutex used to serialize quotaoffs */ | 668 | /* mutex used to serialize quotaoffs */ |
655 | mutex_init(&qinf->qi_quotaofflock); | 669 | mutex_init(&qinf->qi_quotaofflock); |
656 | 670 | ||
657 | /* Precalc some constants */ | 671 | /* Precalc some constants */ |
658 | qinf->qi_dqchunklen = XFS_FSB_TO_BB(mp, XFS_DQUOT_CLUSTER_SIZE_FSB); | 672 | qinf->qi_dqchunklen = XFS_FSB_TO_BB(mp, XFS_DQUOT_CLUSTER_SIZE_FSB); |
659 | ASSERT(qinf->qi_dqchunklen); | 673 | qinf->qi_dqperchunk = xfs_qm_calc_dquots_per_chunk(mp, |
660 | qinf->qi_dqperchunk = BBTOB(qinf->qi_dqchunklen); | 674 | qinf->qi_dqchunklen); |
661 | do_div(qinf->qi_dqperchunk, sizeof(xfs_dqblk_t)); | ||
662 | 675 | ||
663 | mp->m_qflags |= (mp->m_sb.sb_qflags & XFS_ALL_QUOTA_CHKD); | 676 | mp->m_qflags |= (mp->m_sb.sb_qflags & XFS_ALL_QUOTA_CHKD); |
664 | 677 | ||
665 | /* | 678 | /* |
666 | * We try to get the limits from the superuser's limits fields. | 679 | * We try to get the limits from the superuser's limits fields. |
667 | * This is quite hacky, but it is standard quota practice. | 680 | * This is quite hacky, but it is standard quota practice. |
668 | * | 681 | * |
669 | * We look at the USR dquot with id == 0 first, but if user quotas | 682 | * We look at the USR dquot with id == 0 first, but if user quotas |
670 | * are not enabled we goto the GRP dquot with id == 0. | 683 | * are not enabled we goto the GRP dquot with id == 0. |
671 | * We don't really care to keep separate default limits for user | 684 | * We don't really care to keep separate default limits for user |
672 | * and group quotas, at least not at this point. | 685 | * and group quotas, at least not at this point. |
673 | * | 686 | * |
674 | * Since we may not have done a quotacheck by this point, just read | 687 | * Since we may not have done a quotacheck by this point, just read |
675 | * the dquot without attaching it to any hashtables or lists. | 688 | * the dquot without attaching it to any hashtables or lists. |
676 | */ | 689 | */ |
677 | error = xfs_qm_dqread(mp, 0, | 690 | error = xfs_qm_dqread(mp, 0, |
678 | XFS_IS_UQUOTA_RUNNING(mp) ? XFS_DQ_USER : | 691 | XFS_IS_UQUOTA_RUNNING(mp) ? XFS_DQ_USER : |
679 | (XFS_IS_GQUOTA_RUNNING(mp) ? XFS_DQ_GROUP : | 692 | (XFS_IS_GQUOTA_RUNNING(mp) ? XFS_DQ_GROUP : |
680 | XFS_DQ_PROJ), | 693 | XFS_DQ_PROJ), |
681 | XFS_QMOPT_DOWARN, &dqp); | 694 | XFS_QMOPT_DOWARN, &dqp); |
682 | if (!error) { | 695 | if (!error) { |
683 | xfs_disk_dquot_t *ddqp = &dqp->q_core; | 696 | xfs_disk_dquot_t *ddqp = &dqp->q_core; |
684 | 697 | ||
685 | /* | 698 | /* |
686 | * The warnings and timers set the grace period given to | 699 | * The warnings and timers set the grace period given to |
687 | * a user or group before he or she can not perform any | 700 | * a user or group before he or she can not perform any |
688 | * more writing. If it is zero, a default is used. | 701 | * more writing. If it is zero, a default is used. |
689 | */ | 702 | */ |
690 | qinf->qi_btimelimit = ddqp->d_btimer ? | 703 | qinf->qi_btimelimit = ddqp->d_btimer ? |
691 | be32_to_cpu(ddqp->d_btimer) : XFS_QM_BTIMELIMIT; | 704 | be32_to_cpu(ddqp->d_btimer) : XFS_QM_BTIMELIMIT; |
692 | qinf->qi_itimelimit = ddqp->d_itimer ? | 705 | qinf->qi_itimelimit = ddqp->d_itimer ? |
693 | be32_to_cpu(ddqp->d_itimer) : XFS_QM_ITIMELIMIT; | 706 | be32_to_cpu(ddqp->d_itimer) : XFS_QM_ITIMELIMIT; |
694 | qinf->qi_rtbtimelimit = ddqp->d_rtbtimer ? | 707 | qinf->qi_rtbtimelimit = ddqp->d_rtbtimer ? |
695 | be32_to_cpu(ddqp->d_rtbtimer) : XFS_QM_RTBTIMELIMIT; | 708 | be32_to_cpu(ddqp->d_rtbtimer) : XFS_QM_RTBTIMELIMIT; |
696 | qinf->qi_bwarnlimit = ddqp->d_bwarns ? | 709 | qinf->qi_bwarnlimit = ddqp->d_bwarns ? |
697 | be16_to_cpu(ddqp->d_bwarns) : XFS_QM_BWARNLIMIT; | 710 | be16_to_cpu(ddqp->d_bwarns) : XFS_QM_BWARNLIMIT; |
698 | qinf->qi_iwarnlimit = ddqp->d_iwarns ? | 711 | qinf->qi_iwarnlimit = ddqp->d_iwarns ? |
699 | be16_to_cpu(ddqp->d_iwarns) : XFS_QM_IWARNLIMIT; | 712 | be16_to_cpu(ddqp->d_iwarns) : XFS_QM_IWARNLIMIT; |
700 | qinf->qi_rtbwarnlimit = ddqp->d_rtbwarns ? | 713 | qinf->qi_rtbwarnlimit = ddqp->d_rtbwarns ? |
701 | be16_to_cpu(ddqp->d_rtbwarns) : XFS_QM_RTBWARNLIMIT; | 714 | be16_to_cpu(ddqp->d_rtbwarns) : XFS_QM_RTBWARNLIMIT; |
702 | qinf->qi_bhardlimit = be64_to_cpu(ddqp->d_blk_hardlimit); | 715 | qinf->qi_bhardlimit = be64_to_cpu(ddqp->d_blk_hardlimit); |
703 | qinf->qi_bsoftlimit = be64_to_cpu(ddqp->d_blk_softlimit); | 716 | qinf->qi_bsoftlimit = be64_to_cpu(ddqp->d_blk_softlimit); |
704 | qinf->qi_ihardlimit = be64_to_cpu(ddqp->d_ino_hardlimit); | 717 | qinf->qi_ihardlimit = be64_to_cpu(ddqp->d_ino_hardlimit); |
705 | qinf->qi_isoftlimit = be64_to_cpu(ddqp->d_ino_softlimit); | 718 | qinf->qi_isoftlimit = be64_to_cpu(ddqp->d_ino_softlimit); |
706 | qinf->qi_rtbhardlimit = be64_to_cpu(ddqp->d_rtb_hardlimit); | 719 | qinf->qi_rtbhardlimit = be64_to_cpu(ddqp->d_rtb_hardlimit); |
707 | qinf->qi_rtbsoftlimit = be64_to_cpu(ddqp->d_rtb_softlimit); | 720 | qinf->qi_rtbsoftlimit = be64_to_cpu(ddqp->d_rtb_softlimit); |
708 | 721 | ||
709 | xfs_qm_dqdestroy(dqp); | 722 | xfs_qm_dqdestroy(dqp); |
710 | } else { | 723 | } else { |
711 | qinf->qi_btimelimit = XFS_QM_BTIMELIMIT; | 724 | qinf->qi_btimelimit = XFS_QM_BTIMELIMIT; |
712 | qinf->qi_itimelimit = XFS_QM_ITIMELIMIT; | 725 | qinf->qi_itimelimit = XFS_QM_ITIMELIMIT; |
713 | qinf->qi_rtbtimelimit = XFS_QM_RTBTIMELIMIT; | 726 | qinf->qi_rtbtimelimit = XFS_QM_RTBTIMELIMIT; |
714 | qinf->qi_bwarnlimit = XFS_QM_BWARNLIMIT; | 727 | qinf->qi_bwarnlimit = XFS_QM_BWARNLIMIT; |
715 | qinf->qi_iwarnlimit = XFS_QM_IWARNLIMIT; | 728 | qinf->qi_iwarnlimit = XFS_QM_IWARNLIMIT; |
716 | qinf->qi_rtbwarnlimit = XFS_QM_RTBWARNLIMIT; | 729 | qinf->qi_rtbwarnlimit = XFS_QM_RTBWARNLIMIT; |
717 | } | 730 | } |
718 | 731 | ||
719 | qinf->qi_shrinker.shrink = xfs_qm_shake; | 732 | qinf->qi_shrinker.shrink = xfs_qm_shake; |
720 | qinf->qi_shrinker.seeks = DEFAULT_SEEKS; | 733 | qinf->qi_shrinker.seeks = DEFAULT_SEEKS; |
721 | register_shrinker(&qinf->qi_shrinker); | 734 | register_shrinker(&qinf->qi_shrinker); |
722 | return 0; | 735 | return 0; |
723 | } | 736 | } |
724 | 737 | ||
725 | 738 | ||
726 | /* | 739 | /* |
727 | * Gets called when unmounting a filesystem or when all quotas get | 740 | * Gets called when unmounting a filesystem or when all quotas get |
728 | * turned off. | 741 | * turned off. |
729 | * This purges the quota inodes, destroys locks and frees itself. | 742 | * This purges the quota inodes, destroys locks and frees itself. |
730 | */ | 743 | */ |
731 | void | 744 | void |
732 | xfs_qm_destroy_quotainfo( | 745 | xfs_qm_destroy_quotainfo( |
733 | xfs_mount_t *mp) | 746 | xfs_mount_t *mp) |
734 | { | 747 | { |
735 | xfs_quotainfo_t *qi; | 748 | xfs_quotainfo_t *qi; |
736 | 749 | ||
737 | qi = mp->m_quotainfo; | 750 | qi = mp->m_quotainfo; |
738 | ASSERT(qi != NULL); | 751 | ASSERT(qi != NULL); |
739 | 752 | ||
740 | unregister_shrinker(&qi->qi_shrinker); | 753 | unregister_shrinker(&qi->qi_shrinker); |
741 | 754 | ||
742 | if (qi->qi_uquotaip) { | 755 | if (qi->qi_uquotaip) { |
743 | IRELE(qi->qi_uquotaip); | 756 | IRELE(qi->qi_uquotaip); |
744 | qi->qi_uquotaip = NULL; /* paranoia */ | 757 | qi->qi_uquotaip = NULL; /* paranoia */ |
745 | } | 758 | } |
746 | if (qi->qi_gquotaip) { | 759 | if (qi->qi_gquotaip) { |
747 | IRELE(qi->qi_gquotaip); | 760 | IRELE(qi->qi_gquotaip); |
748 | qi->qi_gquotaip = NULL; | 761 | qi->qi_gquotaip = NULL; |
749 | } | 762 | } |
750 | mutex_destroy(&qi->qi_quotaofflock); | 763 | mutex_destroy(&qi->qi_quotaofflock); |
751 | kmem_free(qi); | 764 | kmem_free(qi); |
752 | mp->m_quotainfo = NULL; | 765 | mp->m_quotainfo = NULL; |
753 | } | 766 | } |
754 | 767 | ||
755 | /* | 768 | /* |
756 | * Create an inode and return with a reference already taken, but unlocked | 769 | * Create an inode and return with a reference already taken, but unlocked |
757 | * This is how we create quota inodes | 770 | * This is how we create quota inodes |
758 | */ | 771 | */ |
759 | STATIC int | 772 | STATIC int |
760 | xfs_qm_qino_alloc( | 773 | xfs_qm_qino_alloc( |
761 | xfs_mount_t *mp, | 774 | xfs_mount_t *mp, |
762 | xfs_inode_t **ip, | 775 | xfs_inode_t **ip, |
763 | __int64_t sbfields, | 776 | __int64_t sbfields, |
764 | uint flags) | 777 | uint flags) |
765 | { | 778 | { |
766 | xfs_trans_t *tp; | 779 | xfs_trans_t *tp; |
767 | int error; | 780 | int error; |
768 | int committed; | 781 | int committed; |
769 | 782 | ||
770 | tp = xfs_trans_alloc(mp, XFS_TRANS_QM_QINOCREATE); | 783 | tp = xfs_trans_alloc(mp, XFS_TRANS_QM_QINOCREATE); |
771 | if ((error = xfs_trans_reserve(tp, | 784 | if ((error = xfs_trans_reserve(tp, |
772 | XFS_QM_QINOCREATE_SPACE_RES(mp), | 785 | XFS_QM_QINOCREATE_SPACE_RES(mp), |
773 | XFS_CREATE_LOG_RES(mp), 0, | 786 | XFS_CREATE_LOG_RES(mp), 0, |
774 | XFS_TRANS_PERM_LOG_RES, | 787 | XFS_TRANS_PERM_LOG_RES, |
775 | XFS_CREATE_LOG_COUNT))) { | 788 | XFS_CREATE_LOG_COUNT))) { |
776 | xfs_trans_cancel(tp, 0); | 789 | xfs_trans_cancel(tp, 0); |
777 | return error; | 790 | return error; |
778 | } | 791 | } |
779 | 792 | ||
780 | error = xfs_dir_ialloc(&tp, NULL, S_IFREG, 1, 0, 0, 1, ip, &committed); | 793 | error = xfs_dir_ialloc(&tp, NULL, S_IFREG, 1, 0, 0, 1, ip, &committed); |
781 | if (error) { | 794 | if (error) { |
782 | xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | | 795 | xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | |
783 | XFS_TRANS_ABORT); | 796 | XFS_TRANS_ABORT); |
784 | return error; | 797 | return error; |
785 | } | 798 | } |
786 | 799 | ||
787 | /* | 800 | /* |
788 | * Make the changes in the superblock, and log those too. | 801 | * Make the changes in the superblock, and log those too. |
789 | * sbfields arg may contain fields other than *QUOTINO; | 802 | * sbfields arg may contain fields other than *QUOTINO; |
790 | * VERSIONNUM for example. | 803 | * VERSIONNUM for example. |
791 | */ | 804 | */ |
792 | spin_lock(&mp->m_sb_lock); | 805 | spin_lock(&mp->m_sb_lock); |
793 | if (flags & XFS_QMOPT_SBVERSION) { | 806 | if (flags & XFS_QMOPT_SBVERSION) { |
794 | ASSERT(!xfs_sb_version_hasquota(&mp->m_sb)); | 807 | ASSERT(!xfs_sb_version_hasquota(&mp->m_sb)); |
795 | ASSERT((sbfields & (XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO | | 808 | ASSERT((sbfields & (XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO | |
796 | XFS_SB_GQUOTINO | XFS_SB_QFLAGS)) == | 809 | XFS_SB_GQUOTINO | XFS_SB_QFLAGS)) == |
797 | (XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO | | 810 | (XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO | |
798 | XFS_SB_GQUOTINO | XFS_SB_QFLAGS)); | 811 | XFS_SB_GQUOTINO | XFS_SB_QFLAGS)); |
799 | 812 | ||
800 | xfs_sb_version_addquota(&mp->m_sb); | 813 | xfs_sb_version_addquota(&mp->m_sb); |
801 | mp->m_sb.sb_uquotino = NULLFSINO; | 814 | mp->m_sb.sb_uquotino = NULLFSINO; |
802 | mp->m_sb.sb_gquotino = NULLFSINO; | 815 | mp->m_sb.sb_gquotino = NULLFSINO; |
803 | 816 | ||
804 | /* qflags will get updated _after_ quotacheck */ | 817 | /* qflags will get updated _after_ quotacheck */ |
805 | mp->m_sb.sb_qflags = 0; | 818 | mp->m_sb.sb_qflags = 0; |
806 | } | 819 | } |
807 | if (flags & XFS_QMOPT_UQUOTA) | 820 | if (flags & XFS_QMOPT_UQUOTA) |
808 | mp->m_sb.sb_uquotino = (*ip)->i_ino; | 821 | mp->m_sb.sb_uquotino = (*ip)->i_ino; |
809 | else | 822 | else |
810 | mp->m_sb.sb_gquotino = (*ip)->i_ino; | 823 | mp->m_sb.sb_gquotino = (*ip)->i_ino; |
811 | spin_unlock(&mp->m_sb_lock); | 824 | spin_unlock(&mp->m_sb_lock); |
812 | xfs_mod_sb(tp, sbfields); | 825 | xfs_mod_sb(tp, sbfields); |
813 | 826 | ||
814 | if ((error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES))) { | 827 | if ((error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES))) { |
815 | xfs_alert(mp, "%s failed (error %d)!", __func__, error); | 828 | xfs_alert(mp, "%s failed (error %d)!", __func__, error); |
816 | return error; | 829 | return error; |
817 | } | 830 | } |
818 | return 0; | 831 | return 0; |
819 | } | 832 | } |
820 | 833 | ||
821 | 834 | ||
822 | STATIC void | 835 | STATIC void |
823 | xfs_qm_reset_dqcounts( | 836 | xfs_qm_reset_dqcounts( |
824 | xfs_mount_t *mp, | 837 | xfs_mount_t *mp, |
825 | xfs_buf_t *bp, | 838 | xfs_buf_t *bp, |
826 | xfs_dqid_t id, | 839 | xfs_dqid_t id, |
827 | uint type) | 840 | uint type) |
828 | { | 841 | { |
829 | xfs_disk_dquot_t *ddq; | 842 | xfs_disk_dquot_t *ddq; |
830 | int j; | 843 | int j; |
831 | 844 | ||
832 | trace_xfs_reset_dqcounts(bp, _RET_IP_); | 845 | trace_xfs_reset_dqcounts(bp, _RET_IP_); |
833 | 846 | ||
834 | /* | 847 | /* |
835 | * Reset all counters and timers. They'll be | 848 | * Reset all counters and timers. They'll be |
836 | * started afresh by xfs_qm_quotacheck. | 849 | * started afresh by xfs_qm_quotacheck. |
837 | */ | 850 | */ |
838 | #ifdef DEBUG | 851 | #ifdef DEBUG |
839 | j = XFS_FSB_TO_B(mp, XFS_DQUOT_CLUSTER_SIZE_FSB); | 852 | j = XFS_FSB_TO_B(mp, XFS_DQUOT_CLUSTER_SIZE_FSB); |
840 | do_div(j, sizeof(xfs_dqblk_t)); | 853 | do_div(j, sizeof(xfs_dqblk_t)); |
841 | ASSERT(mp->m_quotainfo->qi_dqperchunk == j); | 854 | ASSERT(mp->m_quotainfo->qi_dqperchunk == j); |
842 | #endif | 855 | #endif |
843 | ddq = bp->b_addr; | 856 | ddq = bp->b_addr; |
844 | for (j = 0; j < mp->m_quotainfo->qi_dqperchunk; j++) { | 857 | for (j = 0; j < mp->m_quotainfo->qi_dqperchunk; j++) { |
845 | /* | 858 | /* |
846 | * Do a sanity check, and if needed, repair the dqblk. Don't | 859 | * Do a sanity check, and if needed, repair the dqblk. Don't |
847 | * output any warnings because it's perfectly possible to | 860 | * output any warnings because it's perfectly possible to |
848 | * find uninitialised dquot blks. See comment in xfs_qm_dqcheck. | 861 | * find uninitialised dquot blks. See comment in xfs_qm_dqcheck. |
849 | */ | 862 | */ |
850 | (void) xfs_qm_dqcheck(mp, ddq, id+j, type, XFS_QMOPT_DQREPAIR, | 863 | (void) xfs_qm_dqcheck(mp, ddq, id+j, type, XFS_QMOPT_DQREPAIR, |
851 | "xfs_quotacheck"); | 864 | "xfs_quotacheck"); |
852 | ddq->d_bcount = 0; | 865 | ddq->d_bcount = 0; |
853 | ddq->d_icount = 0; | 866 | ddq->d_icount = 0; |
854 | ddq->d_rtbcount = 0; | 867 | ddq->d_rtbcount = 0; |
855 | ddq->d_btimer = 0; | 868 | ddq->d_btimer = 0; |
856 | ddq->d_itimer = 0; | 869 | ddq->d_itimer = 0; |
857 | ddq->d_rtbtimer = 0; | 870 | ddq->d_rtbtimer = 0; |
858 | ddq->d_bwarns = 0; | 871 | ddq->d_bwarns = 0; |
859 | ddq->d_iwarns = 0; | 872 | ddq->d_iwarns = 0; |
860 | ddq->d_rtbwarns = 0; | 873 | ddq->d_rtbwarns = 0; |
861 | ddq = (xfs_disk_dquot_t *) ((xfs_dqblk_t *)ddq + 1); | 874 | ddq = (xfs_disk_dquot_t *) ((xfs_dqblk_t *)ddq + 1); |
862 | } | 875 | } |
863 | } | 876 | } |
864 | 877 | ||
865 | STATIC int | 878 | STATIC int |
866 | xfs_qm_dqiter_bufs( | 879 | xfs_qm_dqiter_bufs( |
867 | struct xfs_mount *mp, | 880 | struct xfs_mount *mp, |
868 | xfs_dqid_t firstid, | 881 | xfs_dqid_t firstid, |
869 | xfs_fsblock_t bno, | 882 | xfs_fsblock_t bno, |
870 | xfs_filblks_t blkcnt, | 883 | xfs_filblks_t blkcnt, |
871 | uint flags, | 884 | uint flags, |
872 | struct list_head *buffer_list) | 885 | struct list_head *buffer_list) |
873 | { | 886 | { |
874 | struct xfs_buf *bp; | 887 | struct xfs_buf *bp; |
875 | int error; | 888 | int error; |
876 | int type; | 889 | int type; |
877 | 890 | ||
878 | ASSERT(blkcnt > 0); | 891 | ASSERT(blkcnt > 0); |
879 | type = flags & XFS_QMOPT_UQUOTA ? XFS_DQ_USER : | 892 | type = flags & XFS_QMOPT_UQUOTA ? XFS_DQ_USER : |
880 | (flags & XFS_QMOPT_PQUOTA ? XFS_DQ_PROJ : XFS_DQ_GROUP); | 893 | (flags & XFS_QMOPT_PQUOTA ? XFS_DQ_PROJ : XFS_DQ_GROUP); |
881 | error = 0; | 894 | error = 0; |
882 | 895 | ||
883 | /* | 896 | /* |
884 | * Blkcnt arg can be a very big number, and might even be | 897 | * Blkcnt arg can be a very big number, and might even be |
885 | * larger than the log itself. So, we have to break it up into | 898 | * larger than the log itself. So, we have to break it up into |
886 | * manageable-sized transactions. | 899 | * manageable-sized transactions. |
887 | * Note that we don't start a permanent transaction here; we might | 900 | * Note that we don't start a permanent transaction here; we might |
888 | * not be able to get a log reservation for the whole thing up front, | 901 | * not be able to get a log reservation for the whole thing up front, |
889 | * and we don't really care to either, because we just discard | 902 | * and we don't really care to either, because we just discard |
890 | * everything if we were to crash in the middle of this loop. | 903 | * everything if we were to crash in the middle of this loop. |
891 | */ | 904 | */ |
892 | while (blkcnt--) { | 905 | while (blkcnt--) { |
893 | error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp, | 906 | error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp, |
894 | XFS_FSB_TO_DADDR(mp, bno), | 907 | XFS_FSB_TO_DADDR(mp, bno), |
895 | mp->m_quotainfo->qi_dqchunklen, 0, &bp, | 908 | mp->m_quotainfo->qi_dqchunklen, 0, &bp, |
896 | &xfs_dquot_buf_ops); | 909 | &xfs_dquot_buf_ops); |
897 | if (error) | 910 | if (error) |
898 | break; | 911 | break; |
899 | 912 | ||
913 | /* | ||
914 | * XXX(hch): need to figure out if it makes sense to validate | ||
915 | * the CRC here. | ||
916 | */ | ||
900 | xfs_qm_reset_dqcounts(mp, bp, firstid, type); | 917 | xfs_qm_reset_dqcounts(mp, bp, firstid, type); |
901 | xfs_buf_delwri_queue(bp, buffer_list); | 918 | xfs_buf_delwri_queue(bp, buffer_list); |
902 | xfs_buf_relse(bp); | 919 | xfs_buf_relse(bp); |
903 | /* | 920 | /* |
904 | * goto the next block. | 921 | * goto the next block. |
905 | */ | 922 | */ |
906 | bno++; | 923 | bno++; |
907 | firstid += mp->m_quotainfo->qi_dqperchunk; | 924 | firstid += mp->m_quotainfo->qi_dqperchunk; |
908 | } | 925 | } |
909 | 926 | ||
910 | return error; | 927 | return error; |
911 | } | 928 | } |
912 | 929 | ||
913 | /* | 930 | /* |
914 | * Iterate over all allocated USR/GRP/PRJ dquots in the system, calling a | 931 | * Iterate over all allocated USR/GRP/PRJ dquots in the system, calling a |
915 | * caller supplied function for every chunk of dquots that we find. | 932 | * caller supplied function for every chunk of dquots that we find. |
916 | */ | 933 | */ |
917 | STATIC int | 934 | STATIC int |
918 | xfs_qm_dqiterate( | 935 | xfs_qm_dqiterate( |
919 | struct xfs_mount *mp, | 936 | struct xfs_mount *mp, |
920 | struct xfs_inode *qip, | 937 | struct xfs_inode *qip, |
921 | uint flags, | 938 | uint flags, |
922 | struct list_head *buffer_list) | 939 | struct list_head *buffer_list) |
923 | { | 940 | { |
924 | struct xfs_bmbt_irec *map; | 941 | struct xfs_bmbt_irec *map; |
925 | int i, nmaps; /* number of map entries */ | 942 | int i, nmaps; /* number of map entries */ |
926 | int error; /* return value */ | 943 | int error; /* return value */ |
927 | xfs_fileoff_t lblkno; | 944 | xfs_fileoff_t lblkno; |
928 | xfs_filblks_t maxlblkcnt; | 945 | xfs_filblks_t maxlblkcnt; |
929 | xfs_dqid_t firstid; | 946 | xfs_dqid_t firstid; |
930 | xfs_fsblock_t rablkno; | 947 | xfs_fsblock_t rablkno; |
931 | xfs_filblks_t rablkcnt; | 948 | xfs_filblks_t rablkcnt; |
932 | 949 | ||
933 | error = 0; | 950 | error = 0; |
934 | /* | 951 | /* |
935 | * This looks racy, but we can't keep an inode lock across a | 952 | * This looks racy, but we can't keep an inode lock across a |
936 | * trans_reserve. But, this gets called during quotacheck, and that | 953 | * trans_reserve. But, this gets called during quotacheck, and that |
937 | * happens only at mount time which is single threaded. | 954 | * happens only at mount time which is single threaded. |
938 | */ | 955 | */ |
939 | if (qip->i_d.di_nblocks == 0) | 956 | if (qip->i_d.di_nblocks == 0) |
940 | return 0; | 957 | return 0; |
941 | 958 | ||
942 | map = kmem_alloc(XFS_DQITER_MAP_SIZE * sizeof(*map), KM_SLEEP); | 959 | map = kmem_alloc(XFS_DQITER_MAP_SIZE * sizeof(*map), KM_SLEEP); |
943 | 960 | ||
944 | lblkno = 0; | 961 | lblkno = 0; |
945 | maxlblkcnt = XFS_B_TO_FSB(mp, mp->m_super->s_maxbytes); | 962 | maxlblkcnt = XFS_B_TO_FSB(mp, mp->m_super->s_maxbytes); |
946 | do { | 963 | do { |
947 | nmaps = XFS_DQITER_MAP_SIZE; | 964 | nmaps = XFS_DQITER_MAP_SIZE; |
948 | /* | 965 | /* |
949 | * We aren't changing the inode itself. Just changing | 966 | * We aren't changing the inode itself. Just changing |
950 | * some of its data. No new blocks are added here, and | 967 | * some of its data. No new blocks are added here, and |
951 | * the inode is never added to the transaction. | 968 | * the inode is never added to the transaction. |
952 | */ | 969 | */ |
953 | xfs_ilock(qip, XFS_ILOCK_SHARED); | 970 | xfs_ilock(qip, XFS_ILOCK_SHARED); |
954 | error = xfs_bmapi_read(qip, lblkno, maxlblkcnt - lblkno, | 971 | error = xfs_bmapi_read(qip, lblkno, maxlblkcnt - lblkno, |
955 | map, &nmaps, 0); | 972 | map, &nmaps, 0); |
956 | xfs_iunlock(qip, XFS_ILOCK_SHARED); | 973 | xfs_iunlock(qip, XFS_ILOCK_SHARED); |
957 | if (error) | 974 | if (error) |
958 | break; | 975 | break; |
959 | 976 | ||
960 | ASSERT(nmaps <= XFS_DQITER_MAP_SIZE); | 977 | ASSERT(nmaps <= XFS_DQITER_MAP_SIZE); |
961 | for (i = 0; i < nmaps; i++) { | 978 | for (i = 0; i < nmaps; i++) { |
962 | ASSERT(map[i].br_startblock != DELAYSTARTBLOCK); | 979 | ASSERT(map[i].br_startblock != DELAYSTARTBLOCK); |
963 | ASSERT(map[i].br_blockcount); | 980 | ASSERT(map[i].br_blockcount); |
964 | 981 | ||
965 | 982 | ||
966 | lblkno += map[i].br_blockcount; | 983 | lblkno += map[i].br_blockcount; |
967 | 984 | ||
968 | if (map[i].br_startblock == HOLESTARTBLOCK) | 985 | if (map[i].br_startblock == HOLESTARTBLOCK) |
969 | continue; | 986 | continue; |
970 | 987 | ||
971 | firstid = (xfs_dqid_t) map[i].br_startoff * | 988 | firstid = (xfs_dqid_t) map[i].br_startoff * |
972 | mp->m_quotainfo->qi_dqperchunk; | 989 | mp->m_quotainfo->qi_dqperchunk; |
973 | /* | 990 | /* |
974 | * Do a read-ahead on the next extent. | 991 | * Do a read-ahead on the next extent. |
975 | */ | 992 | */ |
976 | if ((i+1 < nmaps) && | 993 | if ((i+1 < nmaps) && |
977 | (map[i+1].br_startblock != HOLESTARTBLOCK)) { | 994 | (map[i+1].br_startblock != HOLESTARTBLOCK)) { |
978 | rablkcnt = map[i+1].br_blockcount; | 995 | rablkcnt = map[i+1].br_blockcount; |
979 | rablkno = map[i+1].br_startblock; | 996 | rablkno = map[i+1].br_startblock; |
980 | while (rablkcnt--) { | 997 | while (rablkcnt--) { |
981 | xfs_buf_readahead(mp->m_ddev_targp, | 998 | xfs_buf_readahead(mp->m_ddev_targp, |
982 | XFS_FSB_TO_DADDR(mp, rablkno), | 999 | XFS_FSB_TO_DADDR(mp, rablkno), |
983 | mp->m_quotainfo->qi_dqchunklen, | 1000 | mp->m_quotainfo->qi_dqchunklen, |
984 | NULL); | 1001 | NULL); |
985 | rablkno++; | 1002 | rablkno++; |
986 | } | 1003 | } |
987 | } | 1004 | } |
988 | /* | 1005 | /* |
989 | * Iterate thru all the blks in the extent and | 1006 | * Iterate thru all the blks in the extent and |
990 | * reset the counters of all the dquots inside them. | 1007 | * reset the counters of all the dquots inside them. |
991 | */ | 1008 | */ |
992 | error = xfs_qm_dqiter_bufs(mp, firstid, | 1009 | error = xfs_qm_dqiter_bufs(mp, firstid, |
993 | map[i].br_startblock, | 1010 | map[i].br_startblock, |
994 | map[i].br_blockcount, | 1011 | map[i].br_blockcount, |
995 | flags, buffer_list); | 1012 | flags, buffer_list); |
996 | if (error) | 1013 | if (error) |
997 | goto out; | 1014 | goto out; |
998 | } | 1015 | } |
999 | } while (nmaps > 0); | 1016 | } while (nmaps > 0); |
1000 | 1017 | ||
1001 | out: | 1018 | out: |
1002 | kmem_free(map); | 1019 | kmem_free(map); |
1003 | return error; | 1020 | return error; |
1004 | } | 1021 | } |
1005 | 1022 | ||
1006 | /* | 1023 | /* |
1007 | * Called by dqusage_adjust in doing a quotacheck. | 1024 | * Called by dqusage_adjust in doing a quotacheck. |
1008 | * | 1025 | * |
1009 | * Given the inode, and a dquot id this updates both the incore dqout as well | 1026 | * Given the inode, and a dquot id this updates both the incore dqout as well |
1010 | * as the buffer copy. This is so that once the quotacheck is done, we can | 1027 | * as the buffer copy. This is so that once the quotacheck is done, we can |
1011 | * just log all the buffers, as opposed to logging numerous updates to | 1028 | * just log all the buffers, as opposed to logging numerous updates to |
1012 | * individual dquots. | 1029 | * individual dquots. |
1013 | */ | 1030 | */ |
1014 | STATIC int | 1031 | STATIC int |
1015 | xfs_qm_quotacheck_dqadjust( | 1032 | xfs_qm_quotacheck_dqadjust( |
1016 | struct xfs_inode *ip, | 1033 | struct xfs_inode *ip, |
1017 | xfs_dqid_t id, | 1034 | xfs_dqid_t id, |
1018 | uint type, | 1035 | uint type, |
1019 | xfs_qcnt_t nblks, | 1036 | xfs_qcnt_t nblks, |
1020 | xfs_qcnt_t rtblks) | 1037 | xfs_qcnt_t rtblks) |
1021 | { | 1038 | { |
1022 | struct xfs_mount *mp = ip->i_mount; | 1039 | struct xfs_mount *mp = ip->i_mount; |
1023 | struct xfs_dquot *dqp; | 1040 | struct xfs_dquot *dqp; |
1024 | int error; | 1041 | int error; |
1025 | 1042 | ||
1026 | error = xfs_qm_dqget(mp, ip, id, type, | 1043 | error = xfs_qm_dqget(mp, ip, id, type, |
1027 | XFS_QMOPT_DQALLOC | XFS_QMOPT_DOWARN, &dqp); | 1044 | XFS_QMOPT_DQALLOC | XFS_QMOPT_DOWARN, &dqp); |
1028 | if (error) { | 1045 | if (error) { |
1029 | /* | 1046 | /* |
1030 | * Shouldn't be able to turn off quotas here. | 1047 | * Shouldn't be able to turn off quotas here. |
1031 | */ | 1048 | */ |
1032 | ASSERT(error != ESRCH); | 1049 | ASSERT(error != ESRCH); |
1033 | ASSERT(error != ENOENT); | 1050 | ASSERT(error != ENOENT); |
1034 | return error; | 1051 | return error; |
1035 | } | 1052 | } |
1036 | 1053 | ||
1037 | trace_xfs_dqadjust(dqp); | 1054 | trace_xfs_dqadjust(dqp); |
1038 | 1055 | ||
1039 | /* | 1056 | /* |
1040 | * Adjust the inode count and the block count to reflect this inode's | 1057 | * Adjust the inode count and the block count to reflect this inode's |
1041 | * resource usage. | 1058 | * resource usage. |
1042 | */ | 1059 | */ |
1043 | be64_add_cpu(&dqp->q_core.d_icount, 1); | 1060 | be64_add_cpu(&dqp->q_core.d_icount, 1); |
1044 | dqp->q_res_icount++; | 1061 | dqp->q_res_icount++; |
1045 | if (nblks) { | 1062 | if (nblks) { |
1046 | be64_add_cpu(&dqp->q_core.d_bcount, nblks); | 1063 | be64_add_cpu(&dqp->q_core.d_bcount, nblks); |
1047 | dqp->q_res_bcount += nblks; | 1064 | dqp->q_res_bcount += nblks; |
1048 | } | 1065 | } |
1049 | if (rtblks) { | 1066 | if (rtblks) { |
1050 | be64_add_cpu(&dqp->q_core.d_rtbcount, rtblks); | 1067 | be64_add_cpu(&dqp->q_core.d_rtbcount, rtblks); |
1051 | dqp->q_res_rtbcount += rtblks; | 1068 | dqp->q_res_rtbcount += rtblks; |
1052 | } | 1069 | } |
1053 | 1070 | ||
1054 | /* | 1071 | /* |
1055 | * Set default limits, adjust timers (since we changed usages) | 1072 | * Set default limits, adjust timers (since we changed usages) |
1056 | * | 1073 | * |
1057 | * There are no timers for the default values set in the root dquot. | 1074 | * There are no timers for the default values set in the root dquot. |
1058 | */ | 1075 | */ |
1059 | if (dqp->q_core.d_id) { | 1076 | if (dqp->q_core.d_id) { |
1060 | xfs_qm_adjust_dqlimits(mp, dqp); | 1077 | xfs_qm_adjust_dqlimits(mp, dqp); |
1061 | xfs_qm_adjust_dqtimers(mp, &dqp->q_core); | 1078 | xfs_qm_adjust_dqtimers(mp, &dqp->q_core); |
1062 | } | 1079 | } |
1063 | 1080 | ||
1064 | dqp->dq_flags |= XFS_DQ_DIRTY; | 1081 | dqp->dq_flags |= XFS_DQ_DIRTY; |
1065 | xfs_qm_dqput(dqp); | 1082 | xfs_qm_dqput(dqp); |
1066 | return 0; | 1083 | return 0; |
1067 | } | 1084 | } |
1068 | 1085 | ||
1069 | STATIC int | 1086 | STATIC int |
1070 | xfs_qm_get_rtblks( | 1087 | xfs_qm_get_rtblks( |
1071 | xfs_inode_t *ip, | 1088 | xfs_inode_t *ip, |
1072 | xfs_qcnt_t *O_rtblks) | 1089 | xfs_qcnt_t *O_rtblks) |
1073 | { | 1090 | { |
1074 | xfs_filblks_t rtblks; /* total rt blks */ | 1091 | xfs_filblks_t rtblks; /* total rt blks */ |
1075 | xfs_extnum_t idx; /* extent record index */ | 1092 | xfs_extnum_t idx; /* extent record index */ |
1076 | xfs_ifork_t *ifp; /* inode fork pointer */ | 1093 | xfs_ifork_t *ifp; /* inode fork pointer */ |
1077 | xfs_extnum_t nextents; /* number of extent entries */ | 1094 | xfs_extnum_t nextents; /* number of extent entries */ |
1078 | int error; | 1095 | int error; |
1079 | 1096 | ||
1080 | ASSERT(XFS_IS_REALTIME_INODE(ip)); | 1097 | ASSERT(XFS_IS_REALTIME_INODE(ip)); |
1081 | ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK); | 1098 | ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK); |
1082 | if (!(ifp->if_flags & XFS_IFEXTENTS)) { | 1099 | if (!(ifp->if_flags & XFS_IFEXTENTS)) { |
1083 | if ((error = xfs_iread_extents(NULL, ip, XFS_DATA_FORK))) | 1100 | if ((error = xfs_iread_extents(NULL, ip, XFS_DATA_FORK))) |
1084 | return error; | 1101 | return error; |
1085 | } | 1102 | } |
1086 | rtblks = 0; | 1103 | rtblks = 0; |
1087 | nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); | 1104 | nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); |
1088 | for (idx = 0; idx < nextents; idx++) | 1105 | for (idx = 0; idx < nextents; idx++) |
1089 | rtblks += xfs_bmbt_get_blockcount(xfs_iext_get_ext(ifp, idx)); | 1106 | rtblks += xfs_bmbt_get_blockcount(xfs_iext_get_ext(ifp, idx)); |
1090 | *O_rtblks = (xfs_qcnt_t)rtblks; | 1107 | *O_rtblks = (xfs_qcnt_t)rtblks; |
1091 | return 0; | 1108 | return 0; |
1092 | } | 1109 | } |
1093 | 1110 | ||
1094 | /* | 1111 | /* |
1095 | * callback routine supplied to bulkstat(). Given an inumber, find its | 1112 | * callback routine supplied to bulkstat(). Given an inumber, find its |
1096 | * dquots and update them to account for resources taken by that inode. | 1113 | * dquots and update them to account for resources taken by that inode. |
1097 | */ | 1114 | */ |
1098 | /* ARGSUSED */ | 1115 | /* ARGSUSED */ |
1099 | STATIC int | 1116 | STATIC int |
1100 | xfs_qm_dqusage_adjust( | 1117 | xfs_qm_dqusage_adjust( |
1101 | xfs_mount_t *mp, /* mount point for filesystem */ | 1118 | xfs_mount_t *mp, /* mount point for filesystem */ |
1102 | xfs_ino_t ino, /* inode number to get data for */ | 1119 | xfs_ino_t ino, /* inode number to get data for */ |
1103 | void __user *buffer, /* not used */ | 1120 | void __user *buffer, /* not used */ |
1104 | int ubsize, /* not used */ | 1121 | int ubsize, /* not used */ |
1105 | int *ubused, /* not used */ | 1122 | int *ubused, /* not used */ |
1106 | int *res) /* result code value */ | 1123 | int *res) /* result code value */ |
1107 | { | 1124 | { |
1108 | xfs_inode_t *ip; | 1125 | xfs_inode_t *ip; |
1109 | xfs_qcnt_t nblks, rtblks = 0; | 1126 | xfs_qcnt_t nblks, rtblks = 0; |
1110 | int error; | 1127 | int error; |
1111 | 1128 | ||
1112 | ASSERT(XFS_IS_QUOTA_RUNNING(mp)); | 1129 | ASSERT(XFS_IS_QUOTA_RUNNING(mp)); |
1113 | 1130 | ||
1114 | /* | 1131 | /* |
1115 | * rootino must have its resources accounted for, not so with the quota | 1132 | * rootino must have its resources accounted for, not so with the quota |
1116 | * inodes. | 1133 | * inodes. |
1117 | */ | 1134 | */ |
1118 | if (ino == mp->m_sb.sb_uquotino || ino == mp->m_sb.sb_gquotino) { | 1135 | if (ino == mp->m_sb.sb_uquotino || ino == mp->m_sb.sb_gquotino) { |
1119 | *res = BULKSTAT_RV_NOTHING; | 1136 | *res = BULKSTAT_RV_NOTHING; |
1120 | return XFS_ERROR(EINVAL); | 1137 | return XFS_ERROR(EINVAL); |
1121 | } | 1138 | } |
1122 | 1139 | ||
1123 | /* | 1140 | /* |
1124 | * We don't _need_ to take the ilock EXCL. However, the xfs_qm_dqget | 1141 | * We don't _need_ to take the ilock EXCL. However, the xfs_qm_dqget |
1125 | * interface expects the inode to be exclusively locked because that's | 1142 | * interface expects the inode to be exclusively locked because that's |
1126 | * the case in all other instances. It's OK that we do this because | 1143 | * the case in all other instances. It's OK that we do this because |
1127 | * quotacheck is done only at mount time. | 1144 | * quotacheck is done only at mount time. |
1128 | */ | 1145 | */ |
1129 | error = xfs_iget(mp, NULL, ino, 0, XFS_ILOCK_EXCL, &ip); | 1146 | error = xfs_iget(mp, NULL, ino, 0, XFS_ILOCK_EXCL, &ip); |
1130 | if (error) { | 1147 | if (error) { |
1131 | *res = BULKSTAT_RV_NOTHING; | 1148 | *res = BULKSTAT_RV_NOTHING; |
1132 | return error; | 1149 | return error; |
1133 | } | 1150 | } |
1134 | 1151 | ||
1135 | ASSERT(ip->i_delayed_blks == 0); | 1152 | ASSERT(ip->i_delayed_blks == 0); |
1136 | 1153 | ||
1137 | if (XFS_IS_REALTIME_INODE(ip)) { | 1154 | if (XFS_IS_REALTIME_INODE(ip)) { |
1138 | /* | 1155 | /* |
1139 | * Walk thru the extent list and count the realtime blocks. | 1156 | * Walk thru the extent list and count the realtime blocks. |
1140 | */ | 1157 | */ |
1141 | error = xfs_qm_get_rtblks(ip, &rtblks); | 1158 | error = xfs_qm_get_rtblks(ip, &rtblks); |
1142 | if (error) | 1159 | if (error) |
1143 | goto error0; | 1160 | goto error0; |
1144 | } | 1161 | } |
1145 | 1162 | ||
1146 | nblks = (xfs_qcnt_t)ip->i_d.di_nblocks - rtblks; | 1163 | nblks = (xfs_qcnt_t)ip->i_d.di_nblocks - rtblks; |
1147 | 1164 | ||
1148 | /* | 1165 | /* |
1149 | * Add the (disk blocks and inode) resources occupied by this | 1166 | * Add the (disk blocks and inode) resources occupied by this |
1150 | * inode to its dquots. We do this adjustment in the incore dquot, | 1167 | * inode to its dquots. We do this adjustment in the incore dquot, |
1151 | * and also copy the changes to its buffer. | 1168 | * and also copy the changes to its buffer. |
1152 | * We don't care about putting these changes in a transaction | 1169 | * We don't care about putting these changes in a transaction |
1153 | * envelope because if we crash in the middle of a 'quotacheck' | 1170 | * envelope because if we crash in the middle of a 'quotacheck' |
1154 | * we have to start from the beginning anyway. | 1171 | * we have to start from the beginning anyway. |
1155 | * Once we're done, we'll log all the dquot bufs. | 1172 | * Once we're done, we'll log all the dquot bufs. |
1156 | * | 1173 | * |
1157 | * The *QUOTA_ON checks below may look pretty racy, but quotachecks | 1174 | * The *QUOTA_ON checks below may look pretty racy, but quotachecks |
1158 | * and quotaoffs don't race. (Quotachecks happen at mount time only). | 1175 | * and quotaoffs don't race. (Quotachecks happen at mount time only). |
1159 | */ | 1176 | */ |
1160 | if (XFS_IS_UQUOTA_ON(mp)) { | 1177 | if (XFS_IS_UQUOTA_ON(mp)) { |
1161 | error = xfs_qm_quotacheck_dqadjust(ip, ip->i_d.di_uid, | 1178 | error = xfs_qm_quotacheck_dqadjust(ip, ip->i_d.di_uid, |
1162 | XFS_DQ_USER, nblks, rtblks); | 1179 | XFS_DQ_USER, nblks, rtblks); |
1163 | if (error) | 1180 | if (error) |
1164 | goto error0; | 1181 | goto error0; |
1165 | } | 1182 | } |
1166 | 1183 | ||
1167 | if (XFS_IS_GQUOTA_ON(mp)) { | 1184 | if (XFS_IS_GQUOTA_ON(mp)) { |
1168 | error = xfs_qm_quotacheck_dqadjust(ip, ip->i_d.di_gid, | 1185 | error = xfs_qm_quotacheck_dqadjust(ip, ip->i_d.di_gid, |
1169 | XFS_DQ_GROUP, nblks, rtblks); | 1186 | XFS_DQ_GROUP, nblks, rtblks); |
1170 | if (error) | 1187 | if (error) |
1171 | goto error0; | 1188 | goto error0; |
1172 | } | 1189 | } |
1173 | 1190 | ||
1174 | if (XFS_IS_PQUOTA_ON(mp)) { | 1191 | if (XFS_IS_PQUOTA_ON(mp)) { |
1175 | error = xfs_qm_quotacheck_dqadjust(ip, xfs_get_projid(ip), | 1192 | error = xfs_qm_quotacheck_dqadjust(ip, xfs_get_projid(ip), |
1176 | XFS_DQ_PROJ, nblks, rtblks); | 1193 | XFS_DQ_PROJ, nblks, rtblks); |
1177 | if (error) | 1194 | if (error) |
1178 | goto error0; | 1195 | goto error0; |
1179 | } | 1196 | } |
1180 | 1197 | ||
1181 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | 1198 | xfs_iunlock(ip, XFS_ILOCK_EXCL); |
1182 | IRELE(ip); | 1199 | IRELE(ip); |
1183 | *res = BULKSTAT_RV_DIDONE; | 1200 | *res = BULKSTAT_RV_DIDONE; |
1184 | return 0; | 1201 | return 0; |
1185 | 1202 | ||
1186 | error0: | 1203 | error0: |
1187 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | 1204 | xfs_iunlock(ip, XFS_ILOCK_EXCL); |
1188 | IRELE(ip); | 1205 | IRELE(ip); |
1189 | *res = BULKSTAT_RV_GIVEUP; | 1206 | *res = BULKSTAT_RV_GIVEUP; |
1190 | return error; | 1207 | return error; |
1191 | } | 1208 | } |
1192 | 1209 | ||
1193 | STATIC int | 1210 | STATIC int |
1194 | xfs_qm_flush_one( | 1211 | xfs_qm_flush_one( |
1195 | struct xfs_dquot *dqp, | 1212 | struct xfs_dquot *dqp, |
1196 | void *data) | 1213 | void *data) |
1197 | { | 1214 | { |
1198 | struct list_head *buffer_list = data; | 1215 | struct list_head *buffer_list = data; |
1199 | struct xfs_buf *bp = NULL; | 1216 | struct xfs_buf *bp = NULL; |
1200 | int error = 0; | 1217 | int error = 0; |
1201 | 1218 | ||
1202 | xfs_dqlock(dqp); | 1219 | xfs_dqlock(dqp); |
1203 | if (dqp->dq_flags & XFS_DQ_FREEING) | 1220 | if (dqp->dq_flags & XFS_DQ_FREEING) |
1204 | goto out_unlock; | 1221 | goto out_unlock; |
1205 | if (!XFS_DQ_IS_DIRTY(dqp)) | 1222 | if (!XFS_DQ_IS_DIRTY(dqp)) |
1206 | goto out_unlock; | 1223 | goto out_unlock; |
1207 | 1224 | ||
1208 | xfs_dqflock(dqp); | 1225 | xfs_dqflock(dqp); |
1209 | error = xfs_qm_dqflush(dqp, &bp); | 1226 | error = xfs_qm_dqflush(dqp, &bp); |
1210 | if (error) | 1227 | if (error) |
1211 | goto out_unlock; | 1228 | goto out_unlock; |
1212 | 1229 | ||
1213 | xfs_buf_delwri_queue(bp, buffer_list); | 1230 | xfs_buf_delwri_queue(bp, buffer_list); |
1214 | xfs_buf_relse(bp); | 1231 | xfs_buf_relse(bp); |
1215 | out_unlock: | 1232 | out_unlock: |
1216 | xfs_dqunlock(dqp); | 1233 | xfs_dqunlock(dqp); |
1217 | return error; | 1234 | return error; |
1218 | } | 1235 | } |
1219 | 1236 | ||
1220 | /* | 1237 | /* |
1221 | * Walk thru all the filesystem inodes and construct a consistent view | 1238 | * Walk thru all the filesystem inodes and construct a consistent view |
1222 | * of the disk quota world. If the quotacheck fails, disable quotas. | 1239 | * of the disk quota world. If the quotacheck fails, disable quotas. |
1223 | */ | 1240 | */ |
1224 | int | 1241 | int |
1225 | xfs_qm_quotacheck( | 1242 | xfs_qm_quotacheck( |
1226 | xfs_mount_t *mp) | 1243 | xfs_mount_t *mp) |
1227 | { | 1244 | { |
1228 | int done, count, error, error2; | 1245 | int done, count, error, error2; |
1229 | xfs_ino_t lastino; | 1246 | xfs_ino_t lastino; |
1230 | size_t structsz; | 1247 | size_t structsz; |
1231 | xfs_inode_t *uip, *gip; | 1248 | xfs_inode_t *uip, *gip; |
1232 | uint flags; | 1249 | uint flags; |
1233 | LIST_HEAD (buffer_list); | 1250 | LIST_HEAD (buffer_list); |
1234 | 1251 | ||
1235 | count = INT_MAX; | 1252 | count = INT_MAX; |
1236 | structsz = 1; | 1253 | structsz = 1; |
1237 | lastino = 0; | 1254 | lastino = 0; |
1238 | flags = 0; | 1255 | flags = 0; |
1239 | 1256 | ||
1240 | ASSERT(mp->m_quotainfo->qi_uquotaip || mp->m_quotainfo->qi_gquotaip); | 1257 | ASSERT(mp->m_quotainfo->qi_uquotaip || mp->m_quotainfo->qi_gquotaip); |
1241 | ASSERT(XFS_IS_QUOTA_RUNNING(mp)); | 1258 | ASSERT(XFS_IS_QUOTA_RUNNING(mp)); |
1242 | 1259 | ||
1243 | xfs_notice(mp, "Quotacheck needed: Please wait."); | 1260 | xfs_notice(mp, "Quotacheck needed: Please wait."); |
1244 | 1261 | ||
1245 | /* | 1262 | /* |
1246 | * First we go thru all the dquots on disk, USR and GRP/PRJ, and reset | 1263 | * First we go thru all the dquots on disk, USR and GRP/PRJ, and reset |
1247 | * their counters to zero. We need a clean slate. | 1264 | * their counters to zero. We need a clean slate. |
1248 | * We don't log our changes till later. | 1265 | * We don't log our changes till later. |
1249 | */ | 1266 | */ |
1250 | uip = mp->m_quotainfo->qi_uquotaip; | 1267 | uip = mp->m_quotainfo->qi_uquotaip; |
1251 | if (uip) { | 1268 | if (uip) { |
1252 | error = xfs_qm_dqiterate(mp, uip, XFS_QMOPT_UQUOTA, | 1269 | error = xfs_qm_dqiterate(mp, uip, XFS_QMOPT_UQUOTA, |
1253 | &buffer_list); | 1270 | &buffer_list); |
1254 | if (error) | 1271 | if (error) |
1255 | goto error_return; | 1272 | goto error_return; |
1256 | flags |= XFS_UQUOTA_CHKD; | 1273 | flags |= XFS_UQUOTA_CHKD; |
1257 | } | 1274 | } |
1258 | 1275 | ||
1259 | gip = mp->m_quotainfo->qi_gquotaip; | 1276 | gip = mp->m_quotainfo->qi_gquotaip; |
1260 | if (gip) { | 1277 | if (gip) { |
1261 | error = xfs_qm_dqiterate(mp, gip, XFS_IS_GQUOTA_ON(mp) ? | 1278 | error = xfs_qm_dqiterate(mp, gip, XFS_IS_GQUOTA_ON(mp) ? |
1262 | XFS_QMOPT_GQUOTA : XFS_QMOPT_PQUOTA, | 1279 | XFS_QMOPT_GQUOTA : XFS_QMOPT_PQUOTA, |
1263 | &buffer_list); | 1280 | &buffer_list); |
1264 | if (error) | 1281 | if (error) |
1265 | goto error_return; | 1282 | goto error_return; |
1266 | flags |= XFS_OQUOTA_CHKD; | 1283 | flags |= XFS_OQUOTA_CHKD; |
1267 | } | 1284 | } |
1268 | 1285 | ||
1269 | do { | 1286 | do { |
1270 | /* | 1287 | /* |
1271 | * Iterate thru all the inodes in the file system, | 1288 | * Iterate thru all the inodes in the file system, |
1272 | * adjusting the corresponding dquot counters in core. | 1289 | * adjusting the corresponding dquot counters in core. |
1273 | */ | 1290 | */ |
1274 | error = xfs_bulkstat(mp, &lastino, &count, | 1291 | error = xfs_bulkstat(mp, &lastino, &count, |
1275 | xfs_qm_dqusage_adjust, | 1292 | xfs_qm_dqusage_adjust, |
1276 | structsz, NULL, &done); | 1293 | structsz, NULL, &done); |
1277 | if (error) | 1294 | if (error) |
1278 | break; | 1295 | break; |
1279 | 1296 | ||
1280 | } while (!done); | 1297 | } while (!done); |
1281 | 1298 | ||
1282 | /* | 1299 | /* |
1283 | * We've made all the changes that we need to make incore. Flush them | 1300 | * We've made all the changes that we need to make incore. Flush them |
1284 | * down to disk buffers if everything was updated successfully. | 1301 | * down to disk buffers if everything was updated successfully. |
1285 | */ | 1302 | */ |
1286 | if (XFS_IS_UQUOTA_ON(mp)) { | 1303 | if (XFS_IS_UQUOTA_ON(mp)) { |
1287 | error = xfs_qm_dquot_walk(mp, XFS_DQ_USER, xfs_qm_flush_one, | 1304 | error = xfs_qm_dquot_walk(mp, XFS_DQ_USER, xfs_qm_flush_one, |
1288 | &buffer_list); | 1305 | &buffer_list); |
1289 | } | 1306 | } |
1290 | if (XFS_IS_GQUOTA_ON(mp)) { | 1307 | if (XFS_IS_GQUOTA_ON(mp)) { |
1291 | error2 = xfs_qm_dquot_walk(mp, XFS_DQ_GROUP, xfs_qm_flush_one, | 1308 | error2 = xfs_qm_dquot_walk(mp, XFS_DQ_GROUP, xfs_qm_flush_one, |
1292 | &buffer_list); | 1309 | &buffer_list); |
1293 | if (!error) | 1310 | if (!error) |
1294 | error = error2; | 1311 | error = error2; |
1295 | } | 1312 | } |
1296 | if (XFS_IS_PQUOTA_ON(mp)) { | 1313 | if (XFS_IS_PQUOTA_ON(mp)) { |
1297 | error2 = xfs_qm_dquot_walk(mp, XFS_DQ_PROJ, xfs_qm_flush_one, | 1314 | error2 = xfs_qm_dquot_walk(mp, XFS_DQ_PROJ, xfs_qm_flush_one, |
1298 | &buffer_list); | 1315 | &buffer_list); |
1299 | if (!error) | 1316 | if (!error) |
1300 | error = error2; | 1317 | error = error2; |
1301 | } | 1318 | } |
1302 | 1319 | ||
1303 | error2 = xfs_buf_delwri_submit(&buffer_list); | 1320 | error2 = xfs_buf_delwri_submit(&buffer_list); |
1304 | if (!error) | 1321 | if (!error) |
1305 | error = error2; | 1322 | error = error2; |
1306 | 1323 | ||
1307 | /* | 1324 | /* |
1308 | * We can get this error if we couldn't do a dquot allocation inside | 1325 | * We can get this error if we couldn't do a dquot allocation inside |
1309 | * xfs_qm_dqusage_adjust (via bulkstat). We don't care about the | 1326 | * xfs_qm_dqusage_adjust (via bulkstat). We don't care about the |
1310 | * dirty dquots that might be cached, we just want to get rid of them | 1327 | * dirty dquots that might be cached, we just want to get rid of them |
1311 | * and turn quotaoff. The dquots won't be attached to any of the inodes | 1328 | * and turn quotaoff. The dquots won't be attached to any of the inodes |
1312 | * at this point (because we intentionally didn't in dqget_noattach). | 1329 | * at this point (because we intentionally didn't in dqget_noattach). |
1313 | */ | 1330 | */ |
1314 | if (error) { | 1331 | if (error) { |
1315 | xfs_qm_dqpurge_all(mp, XFS_QMOPT_QUOTALL); | 1332 | xfs_qm_dqpurge_all(mp, XFS_QMOPT_QUOTALL); |
1316 | goto error_return; | 1333 | goto error_return; |
1317 | } | 1334 | } |
1318 | 1335 | ||
1319 | /* | 1336 | /* |
1320 | * If one type of quotas is off, then it will lose its | 1337 | * If one type of quotas is off, then it will lose its |
1321 | * quotachecked status, since we won't be doing accounting for | 1338 | * quotachecked status, since we won't be doing accounting for |
1322 | * that type anymore. | 1339 | * that type anymore. |
1323 | */ | 1340 | */ |
1324 | mp->m_qflags &= ~XFS_ALL_QUOTA_CHKD; | 1341 | mp->m_qflags &= ~XFS_ALL_QUOTA_CHKD; |
1325 | mp->m_qflags |= flags; | 1342 | mp->m_qflags |= flags; |
1326 | 1343 | ||
1327 | error_return: | 1344 | error_return: |
1328 | while (!list_empty(&buffer_list)) { | 1345 | while (!list_empty(&buffer_list)) { |
1329 | struct xfs_buf *bp = | 1346 | struct xfs_buf *bp = |
1330 | list_first_entry(&buffer_list, struct xfs_buf, b_list); | 1347 | list_first_entry(&buffer_list, struct xfs_buf, b_list); |
1331 | list_del_init(&bp->b_list); | 1348 | list_del_init(&bp->b_list); |
1332 | xfs_buf_relse(bp); | 1349 | xfs_buf_relse(bp); |
1333 | } | 1350 | } |
1334 | 1351 | ||
1335 | if (error) { | 1352 | if (error) { |
1336 | xfs_warn(mp, | 1353 | xfs_warn(mp, |
1337 | "Quotacheck: Unsuccessful (Error %d): Disabling quotas.", | 1354 | "Quotacheck: Unsuccessful (Error %d): Disabling quotas.", |
1338 | error); | 1355 | error); |
1339 | /* | 1356 | /* |
1340 | * We must turn off quotas. | 1357 | * We must turn off quotas. |
1341 | */ | 1358 | */ |
1342 | ASSERT(mp->m_quotainfo != NULL); | 1359 | ASSERT(mp->m_quotainfo != NULL); |
1343 | xfs_qm_destroy_quotainfo(mp); | 1360 | xfs_qm_destroy_quotainfo(mp); |
1344 | if (xfs_mount_reset_sbqflags(mp)) { | 1361 | if (xfs_mount_reset_sbqflags(mp)) { |
1345 | xfs_warn(mp, | 1362 | xfs_warn(mp, |
1346 | "Quotacheck: Failed to reset quota flags."); | 1363 | "Quotacheck: Failed to reset quota flags."); |
1347 | } | 1364 | } |
1348 | } else | 1365 | } else |
1349 | xfs_notice(mp, "Quotacheck: Done."); | 1366 | xfs_notice(mp, "Quotacheck: Done."); |
1350 | return (error); | 1367 | return (error); |
1351 | } | 1368 | } |
1352 | 1369 | ||
1353 | /* | 1370 | /* |
1354 | * This is called after the superblock has been read in and we're ready to | 1371 | * This is called after the superblock has been read in and we're ready to |
1355 | * iget the quota inodes. | 1372 | * iget the quota inodes. |
1356 | */ | 1373 | */ |
1357 | STATIC int | 1374 | STATIC int |
1358 | xfs_qm_init_quotainos( | 1375 | xfs_qm_init_quotainos( |
1359 | xfs_mount_t *mp) | 1376 | xfs_mount_t *mp) |
1360 | { | 1377 | { |
1361 | xfs_inode_t *uip, *gip; | 1378 | xfs_inode_t *uip, *gip; |
1362 | int error; | 1379 | int error; |
1363 | __int64_t sbflags; | 1380 | __int64_t sbflags; |
1364 | uint flags; | 1381 | uint flags; |
1365 | 1382 | ||
1366 | ASSERT(mp->m_quotainfo); | 1383 | ASSERT(mp->m_quotainfo); |
1367 | uip = gip = NULL; | 1384 | uip = gip = NULL; |
1368 | sbflags = 0; | 1385 | sbflags = 0; |
1369 | flags = 0; | 1386 | flags = 0; |
1370 | 1387 | ||
1371 | /* | 1388 | /* |
1372 | * Get the uquota and gquota inodes | 1389 | * Get the uquota and gquota inodes |
1373 | */ | 1390 | */ |
1374 | if (xfs_sb_version_hasquota(&mp->m_sb)) { | 1391 | if (xfs_sb_version_hasquota(&mp->m_sb)) { |
1375 | if (XFS_IS_UQUOTA_ON(mp) && | 1392 | if (XFS_IS_UQUOTA_ON(mp) && |
1376 | mp->m_sb.sb_uquotino != NULLFSINO) { | 1393 | mp->m_sb.sb_uquotino != NULLFSINO) { |
1377 | ASSERT(mp->m_sb.sb_uquotino > 0); | 1394 | ASSERT(mp->m_sb.sb_uquotino > 0); |
1378 | if ((error = xfs_iget(mp, NULL, mp->m_sb.sb_uquotino, | 1395 | if ((error = xfs_iget(mp, NULL, mp->m_sb.sb_uquotino, |
1379 | 0, 0, &uip))) | 1396 | 0, 0, &uip))) |
1380 | return XFS_ERROR(error); | 1397 | return XFS_ERROR(error); |
1381 | } | 1398 | } |
1382 | if (XFS_IS_OQUOTA_ON(mp) && | 1399 | if (XFS_IS_OQUOTA_ON(mp) && |
1383 | mp->m_sb.sb_gquotino != NULLFSINO) { | 1400 | mp->m_sb.sb_gquotino != NULLFSINO) { |
1384 | ASSERT(mp->m_sb.sb_gquotino > 0); | 1401 | ASSERT(mp->m_sb.sb_gquotino > 0); |
1385 | if ((error = xfs_iget(mp, NULL, mp->m_sb.sb_gquotino, | 1402 | if ((error = xfs_iget(mp, NULL, mp->m_sb.sb_gquotino, |
1386 | 0, 0, &gip))) { | 1403 | 0, 0, &gip))) { |
1387 | if (uip) | 1404 | if (uip) |
1388 | IRELE(uip); | 1405 | IRELE(uip); |
1389 | return XFS_ERROR(error); | 1406 | return XFS_ERROR(error); |
1390 | } | 1407 | } |
1391 | } | 1408 | } |
1392 | } else { | 1409 | } else { |
1393 | flags |= XFS_QMOPT_SBVERSION; | 1410 | flags |= XFS_QMOPT_SBVERSION; |
1394 | sbflags |= (XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO | | 1411 | sbflags |= (XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO | |
1395 | XFS_SB_GQUOTINO | XFS_SB_QFLAGS); | 1412 | XFS_SB_GQUOTINO | XFS_SB_QFLAGS); |
1396 | } | 1413 | } |
1397 | 1414 | ||
1398 | /* | 1415 | /* |
1399 | * Create the two inodes, if they don't exist already. The changes | 1416 | * Create the two inodes, if they don't exist already. The changes |
1400 | * made above will get added to a transaction and logged in one of | 1417 | * made above will get added to a transaction and logged in one of |
1401 | * the qino_alloc calls below. If the device is readonly, | 1418 | * the qino_alloc calls below. If the device is readonly, |
1402 | * temporarily switch to read-write to do this. | 1419 | * temporarily switch to read-write to do this. |
1403 | */ | 1420 | */ |
1404 | if (XFS_IS_UQUOTA_ON(mp) && uip == NULL) { | 1421 | if (XFS_IS_UQUOTA_ON(mp) && uip == NULL) { |
1405 | if ((error = xfs_qm_qino_alloc(mp, &uip, | 1422 | if ((error = xfs_qm_qino_alloc(mp, &uip, |
1406 | sbflags | XFS_SB_UQUOTINO, | 1423 | sbflags | XFS_SB_UQUOTINO, |
1407 | flags | XFS_QMOPT_UQUOTA))) | 1424 | flags | XFS_QMOPT_UQUOTA))) |
1408 | return XFS_ERROR(error); | 1425 | return XFS_ERROR(error); |
1409 | 1426 | ||
1410 | flags &= ~XFS_QMOPT_SBVERSION; | 1427 | flags &= ~XFS_QMOPT_SBVERSION; |
1411 | } | 1428 | } |
1412 | if (XFS_IS_OQUOTA_ON(mp) && gip == NULL) { | 1429 | if (XFS_IS_OQUOTA_ON(mp) && gip == NULL) { |
1413 | flags |= (XFS_IS_GQUOTA_ON(mp) ? | 1430 | flags |= (XFS_IS_GQUOTA_ON(mp) ? |
1414 | XFS_QMOPT_GQUOTA : XFS_QMOPT_PQUOTA); | 1431 | XFS_QMOPT_GQUOTA : XFS_QMOPT_PQUOTA); |
1415 | error = xfs_qm_qino_alloc(mp, &gip, | 1432 | error = xfs_qm_qino_alloc(mp, &gip, |
1416 | sbflags | XFS_SB_GQUOTINO, flags); | 1433 | sbflags | XFS_SB_GQUOTINO, flags); |
1417 | if (error) { | 1434 | if (error) { |
1418 | if (uip) | 1435 | if (uip) |
1419 | IRELE(uip); | 1436 | IRELE(uip); |
1420 | 1437 | ||
1421 | return XFS_ERROR(error); | 1438 | return XFS_ERROR(error); |
1422 | } | 1439 | } |
1423 | } | 1440 | } |
1424 | 1441 | ||
1425 | mp->m_quotainfo->qi_uquotaip = uip; | 1442 | mp->m_quotainfo->qi_uquotaip = uip; |
1426 | mp->m_quotainfo->qi_gquotaip = gip; | 1443 | mp->m_quotainfo->qi_gquotaip = gip; |
1427 | 1444 | ||
1428 | return 0; | 1445 | return 0; |
1429 | } | 1446 | } |
1430 | 1447 | ||
1431 | STATIC void | 1448 | STATIC void |
1432 | xfs_qm_dqfree_one( | 1449 | xfs_qm_dqfree_one( |
1433 | struct xfs_dquot *dqp) | 1450 | struct xfs_dquot *dqp) |
1434 | { | 1451 | { |
1435 | struct xfs_mount *mp = dqp->q_mount; | 1452 | struct xfs_mount *mp = dqp->q_mount; |
1436 | struct xfs_quotainfo *qi = mp->m_quotainfo; | 1453 | struct xfs_quotainfo *qi = mp->m_quotainfo; |
1437 | 1454 | ||
1438 | mutex_lock(&qi->qi_tree_lock); | 1455 | mutex_lock(&qi->qi_tree_lock); |
1439 | radix_tree_delete(XFS_DQUOT_TREE(qi, dqp->q_core.d_flags), | 1456 | radix_tree_delete(XFS_DQUOT_TREE(qi, dqp->q_core.d_flags), |
1440 | be32_to_cpu(dqp->q_core.d_id)); | 1457 | be32_to_cpu(dqp->q_core.d_id)); |
1441 | 1458 | ||
1442 | qi->qi_dquots--; | 1459 | qi->qi_dquots--; |
1443 | mutex_unlock(&qi->qi_tree_lock); | 1460 | mutex_unlock(&qi->qi_tree_lock); |
1444 | 1461 | ||
1445 | xfs_qm_dqdestroy(dqp); | 1462 | xfs_qm_dqdestroy(dqp); |
1446 | } | 1463 | } |
1447 | 1464 | ||
1448 | STATIC void | 1465 | STATIC void |
1449 | xfs_qm_dqreclaim_one( | 1466 | xfs_qm_dqreclaim_one( |
1450 | struct xfs_dquot *dqp, | 1467 | struct xfs_dquot *dqp, |
1451 | struct list_head *buffer_list, | 1468 | struct list_head *buffer_list, |
1452 | struct list_head *dispose_list) | 1469 | struct list_head *dispose_list) |
1453 | { | 1470 | { |
1454 | struct xfs_mount *mp = dqp->q_mount; | 1471 | struct xfs_mount *mp = dqp->q_mount; |
1455 | struct xfs_quotainfo *qi = mp->m_quotainfo; | 1472 | struct xfs_quotainfo *qi = mp->m_quotainfo; |
1456 | int error; | 1473 | int error; |
1457 | 1474 | ||
1458 | if (!xfs_dqlock_nowait(dqp)) | 1475 | if (!xfs_dqlock_nowait(dqp)) |
1459 | goto out_move_tail; | 1476 | goto out_move_tail; |
1460 | 1477 | ||
1461 | /* | 1478 | /* |
1462 | * This dquot has acquired a reference in the meantime remove it from | 1479 | * This dquot has acquired a reference in the meantime remove it from |
1463 | * the freelist and try again. | 1480 | * the freelist and try again. |
1464 | */ | 1481 | */ |
1465 | if (dqp->q_nrefs) { | 1482 | if (dqp->q_nrefs) { |
1466 | xfs_dqunlock(dqp); | 1483 | xfs_dqunlock(dqp); |
1467 | 1484 | ||
1468 | trace_xfs_dqreclaim_want(dqp); | 1485 | trace_xfs_dqreclaim_want(dqp); |
1469 | XFS_STATS_INC(xs_qm_dqwants); | 1486 | XFS_STATS_INC(xs_qm_dqwants); |
1470 | 1487 | ||
1471 | list_del_init(&dqp->q_lru); | 1488 | list_del_init(&dqp->q_lru); |
1472 | qi->qi_lru_count--; | 1489 | qi->qi_lru_count--; |
1473 | XFS_STATS_DEC(xs_qm_dquot_unused); | 1490 | XFS_STATS_DEC(xs_qm_dquot_unused); |
1474 | return; | 1491 | return; |
1475 | } | 1492 | } |
1476 | 1493 | ||
1477 | /* | 1494 | /* |
1478 | * Try to grab the flush lock. If this dquot is in the process of | 1495 | * Try to grab the flush lock. If this dquot is in the process of |
1479 | * getting flushed to disk, we don't want to reclaim it. | 1496 | * getting flushed to disk, we don't want to reclaim it. |
1480 | */ | 1497 | */ |
1481 | if (!xfs_dqflock_nowait(dqp)) | 1498 | if (!xfs_dqflock_nowait(dqp)) |
1482 | goto out_unlock_move_tail; | 1499 | goto out_unlock_move_tail; |
1483 | 1500 | ||
1484 | if (XFS_DQ_IS_DIRTY(dqp)) { | 1501 | if (XFS_DQ_IS_DIRTY(dqp)) { |
1485 | struct xfs_buf *bp = NULL; | 1502 | struct xfs_buf *bp = NULL; |
1486 | 1503 | ||
1487 | trace_xfs_dqreclaim_dirty(dqp); | 1504 | trace_xfs_dqreclaim_dirty(dqp); |
1488 | 1505 | ||
1489 | error = xfs_qm_dqflush(dqp, &bp); | 1506 | error = xfs_qm_dqflush(dqp, &bp); |
1490 | if (error) { | 1507 | if (error) { |
1491 | xfs_warn(mp, "%s: dquot %p flush failed", | 1508 | xfs_warn(mp, "%s: dquot %p flush failed", |
1492 | __func__, dqp); | 1509 | __func__, dqp); |
1493 | goto out_unlock_move_tail; | 1510 | goto out_unlock_move_tail; |
1494 | } | 1511 | } |
1495 | 1512 | ||
1496 | xfs_buf_delwri_queue(bp, buffer_list); | 1513 | xfs_buf_delwri_queue(bp, buffer_list); |
1497 | xfs_buf_relse(bp); | 1514 | xfs_buf_relse(bp); |
1498 | /* | 1515 | /* |
1499 | * Give the dquot another try on the freelist, as the | 1516 | * Give the dquot another try on the freelist, as the |
1500 | * flushing will take some time. | 1517 | * flushing will take some time. |
1501 | */ | 1518 | */ |
1502 | goto out_unlock_move_tail; | 1519 | goto out_unlock_move_tail; |
1503 | } | 1520 | } |
1504 | xfs_dqfunlock(dqp); | 1521 | xfs_dqfunlock(dqp); |
1505 | 1522 | ||
1506 | /* | 1523 | /* |
1507 | * Prevent lookups now that we are past the point of no return. | 1524 | * Prevent lookups now that we are past the point of no return. |
1508 | */ | 1525 | */ |
1509 | dqp->dq_flags |= XFS_DQ_FREEING; | 1526 | dqp->dq_flags |= XFS_DQ_FREEING; |
1510 | xfs_dqunlock(dqp); | 1527 | xfs_dqunlock(dqp); |
1511 | 1528 | ||
1512 | ASSERT(dqp->q_nrefs == 0); | 1529 | ASSERT(dqp->q_nrefs == 0); |
1513 | list_move_tail(&dqp->q_lru, dispose_list); | 1530 | list_move_tail(&dqp->q_lru, dispose_list); |
1514 | qi->qi_lru_count--; | 1531 | qi->qi_lru_count--; |
1515 | XFS_STATS_DEC(xs_qm_dquot_unused); | 1532 | XFS_STATS_DEC(xs_qm_dquot_unused); |
1516 | 1533 | ||
1517 | trace_xfs_dqreclaim_done(dqp); | 1534 | trace_xfs_dqreclaim_done(dqp); |
1518 | XFS_STATS_INC(xs_qm_dqreclaims); | 1535 | XFS_STATS_INC(xs_qm_dqreclaims); |
1519 | return; | 1536 | return; |
1520 | 1537 | ||
1521 | /* | 1538 | /* |
1522 | * Move the dquot to the tail of the list so that we don't spin on it. | 1539 | * Move the dquot to the tail of the list so that we don't spin on it. |
1523 | */ | 1540 | */ |
1524 | out_unlock_move_tail: | 1541 | out_unlock_move_tail: |
1525 | xfs_dqunlock(dqp); | 1542 | xfs_dqunlock(dqp); |
1526 | out_move_tail: | 1543 | out_move_tail: |
1527 | list_move_tail(&dqp->q_lru, &qi->qi_lru_list); | 1544 | list_move_tail(&dqp->q_lru, &qi->qi_lru_list); |
1528 | trace_xfs_dqreclaim_busy(dqp); | 1545 | trace_xfs_dqreclaim_busy(dqp); |
1529 | XFS_STATS_INC(xs_qm_dqreclaim_misses); | 1546 | XFS_STATS_INC(xs_qm_dqreclaim_misses); |
1530 | } | 1547 | } |
1531 | 1548 | ||
1532 | STATIC int | 1549 | STATIC int |
1533 | xfs_qm_shake( | 1550 | xfs_qm_shake( |
1534 | struct shrinker *shrink, | 1551 | struct shrinker *shrink, |
1535 | struct shrink_control *sc) | 1552 | struct shrink_control *sc) |
1536 | { | 1553 | { |
1537 | struct xfs_quotainfo *qi = | 1554 | struct xfs_quotainfo *qi = |
1538 | container_of(shrink, struct xfs_quotainfo, qi_shrinker); | 1555 | container_of(shrink, struct xfs_quotainfo, qi_shrinker); |
1539 | int nr_to_scan = sc->nr_to_scan; | 1556 | int nr_to_scan = sc->nr_to_scan; |
1540 | LIST_HEAD (buffer_list); | 1557 | LIST_HEAD (buffer_list); |
1541 | LIST_HEAD (dispose_list); | 1558 | LIST_HEAD (dispose_list); |
1542 | struct xfs_dquot *dqp; | 1559 | struct xfs_dquot *dqp; |
1543 | int error; | 1560 | int error; |
1544 | 1561 | ||
1545 | if ((sc->gfp_mask & (__GFP_FS|__GFP_WAIT)) != (__GFP_FS|__GFP_WAIT)) | 1562 | if ((sc->gfp_mask & (__GFP_FS|__GFP_WAIT)) != (__GFP_FS|__GFP_WAIT)) |
1546 | return 0; | 1563 | return 0; |
1547 | if (!nr_to_scan) | 1564 | if (!nr_to_scan) |
1548 | goto out; | 1565 | goto out; |
1549 | 1566 | ||
1550 | mutex_lock(&qi->qi_lru_lock); | 1567 | mutex_lock(&qi->qi_lru_lock); |
1551 | while (!list_empty(&qi->qi_lru_list)) { | 1568 | while (!list_empty(&qi->qi_lru_list)) { |
1552 | if (nr_to_scan-- <= 0) | 1569 | if (nr_to_scan-- <= 0) |
1553 | break; | 1570 | break; |
1554 | dqp = list_first_entry(&qi->qi_lru_list, struct xfs_dquot, | 1571 | dqp = list_first_entry(&qi->qi_lru_list, struct xfs_dquot, |
1555 | q_lru); | 1572 | q_lru); |
1556 | xfs_qm_dqreclaim_one(dqp, &buffer_list, &dispose_list); | 1573 | xfs_qm_dqreclaim_one(dqp, &buffer_list, &dispose_list); |
1557 | } | 1574 | } |
1558 | mutex_unlock(&qi->qi_lru_lock); | 1575 | mutex_unlock(&qi->qi_lru_lock); |
1559 | 1576 | ||
1560 | error = xfs_buf_delwri_submit(&buffer_list); | 1577 | error = xfs_buf_delwri_submit(&buffer_list); |
1561 | if (error) | 1578 | if (error) |
1562 | xfs_warn(NULL, "%s: dquot reclaim failed", __func__); | 1579 | xfs_warn(NULL, "%s: dquot reclaim failed", __func__); |
1563 | 1580 | ||
1564 | while (!list_empty(&dispose_list)) { | 1581 | while (!list_empty(&dispose_list)) { |
1565 | dqp = list_first_entry(&dispose_list, struct xfs_dquot, q_lru); | 1582 | dqp = list_first_entry(&dispose_list, struct xfs_dquot, q_lru); |
1566 | list_del_init(&dqp->q_lru); | 1583 | list_del_init(&dqp->q_lru); |
1567 | xfs_qm_dqfree_one(dqp); | 1584 | xfs_qm_dqfree_one(dqp); |
1568 | } | 1585 | } |
1569 | 1586 | ||
1570 | out: | 1587 | out: |
1571 | return (qi->qi_lru_count / 100) * sysctl_vfs_cache_pressure; | 1588 | return (qi->qi_lru_count / 100) * sysctl_vfs_cache_pressure; |
1572 | } | 1589 | } |
1573 | 1590 | ||
1574 | /* | 1591 | /* |
1575 | * Start a transaction and write the incore superblock changes to | 1592 | * Start a transaction and write the incore superblock changes to |
1576 | * disk. flags parameter indicates which fields have changed. | 1593 | * disk. flags parameter indicates which fields have changed. |
1577 | */ | 1594 | */ |
1578 | int | 1595 | int |
1579 | xfs_qm_write_sb_changes( | 1596 | xfs_qm_write_sb_changes( |
1580 | xfs_mount_t *mp, | 1597 | xfs_mount_t *mp, |
1581 | __int64_t flags) | 1598 | __int64_t flags) |
1582 | { | 1599 | { |
1583 | xfs_trans_t *tp; | 1600 | xfs_trans_t *tp; |
1584 | int error; | 1601 | int error; |
1585 | 1602 | ||
1586 | tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SBCHANGE); | 1603 | tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SBCHANGE); |
1587 | error = xfs_trans_reserve(tp, 0, XFS_QM_SBCHANGE_LOG_RES(mp), | 1604 | error = xfs_trans_reserve(tp, 0, XFS_QM_SBCHANGE_LOG_RES(mp), |
1588 | 0, 0, XFS_DEFAULT_LOG_COUNT); | 1605 | 0, 0, XFS_DEFAULT_LOG_COUNT); |
1589 | if (error) { | 1606 | if (error) { |
1590 | xfs_trans_cancel(tp, 0); | 1607 | xfs_trans_cancel(tp, 0); |
1591 | return error; | 1608 | return error; |
1592 | } | 1609 | } |
1593 | 1610 | ||
1594 | xfs_mod_sb(tp, flags); | 1611 | xfs_mod_sb(tp, flags); |
1595 | error = xfs_trans_commit(tp, 0); | 1612 | error = xfs_trans_commit(tp, 0); |
1596 | 1613 | ||
1597 | return error; | 1614 | return error; |
1598 | } | 1615 | } |
1599 | 1616 | ||
1600 | 1617 | ||
1601 | /* --------------- utility functions for vnodeops ---------------- */ | 1618 | /* --------------- utility functions for vnodeops ---------------- */ |
1602 | 1619 | ||
1603 | 1620 | ||
1604 | /* | 1621 | /* |
1605 | * Given an inode, a uid, gid and prid make sure that we have | 1622 | * Given an inode, a uid, gid and prid make sure that we have |
1606 | * allocated relevant dquot(s) on disk, and that we won't exceed inode | 1623 | * allocated relevant dquot(s) on disk, and that we won't exceed inode |
1607 | * quotas by creating this file. | 1624 | * quotas by creating this file. |
1608 | * This also attaches dquot(s) to the given inode after locking it, | 1625 | * This also attaches dquot(s) to the given inode after locking it, |
1609 | * and returns the dquots corresponding to the uid and/or gid. | 1626 | * and returns the dquots corresponding to the uid and/or gid. |
1610 | * | 1627 | * |
1611 | * in : inode (unlocked) | 1628 | * in : inode (unlocked) |
1612 | * out : udquot, gdquot with references taken and unlocked | 1629 | * out : udquot, gdquot with references taken and unlocked |
1613 | */ | 1630 | */ |
1614 | int | 1631 | int |
1615 | xfs_qm_vop_dqalloc( | 1632 | xfs_qm_vop_dqalloc( |
1616 | struct xfs_inode *ip, | 1633 | struct xfs_inode *ip, |
1617 | uid_t uid, | 1634 | uid_t uid, |
1618 | gid_t gid, | 1635 | gid_t gid, |
1619 | prid_t prid, | 1636 | prid_t prid, |
1620 | uint flags, | 1637 | uint flags, |
1621 | struct xfs_dquot **O_udqpp, | 1638 | struct xfs_dquot **O_udqpp, |
1622 | struct xfs_dquot **O_gdqpp) | 1639 | struct xfs_dquot **O_gdqpp) |
1623 | { | 1640 | { |
1624 | struct xfs_mount *mp = ip->i_mount; | 1641 | struct xfs_mount *mp = ip->i_mount; |
1625 | struct xfs_dquot *uq, *gq; | 1642 | struct xfs_dquot *uq, *gq; |
1626 | int error; | 1643 | int error; |
1627 | uint lockflags; | 1644 | uint lockflags; |
1628 | 1645 | ||
1629 | if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp)) | 1646 | if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp)) |
1630 | return 0; | 1647 | return 0; |
1631 | 1648 | ||
1632 | lockflags = XFS_ILOCK_EXCL; | 1649 | lockflags = XFS_ILOCK_EXCL; |
1633 | xfs_ilock(ip, lockflags); | 1650 | xfs_ilock(ip, lockflags); |
1634 | 1651 | ||
1635 | if ((flags & XFS_QMOPT_INHERIT) && XFS_INHERIT_GID(ip)) | 1652 | if ((flags & XFS_QMOPT_INHERIT) && XFS_INHERIT_GID(ip)) |
1636 | gid = ip->i_d.di_gid; | 1653 | gid = ip->i_d.di_gid; |
1637 | 1654 | ||
1638 | /* | 1655 | /* |
1639 | * Attach the dquot(s) to this inode, doing a dquot allocation | 1656 | * Attach the dquot(s) to this inode, doing a dquot allocation |
1640 | * if necessary. The dquot(s) will not be locked. | 1657 | * if necessary. The dquot(s) will not be locked. |
1641 | */ | 1658 | */ |
1642 | if (XFS_NOT_DQATTACHED(mp, ip)) { | 1659 | if (XFS_NOT_DQATTACHED(mp, ip)) { |
1643 | error = xfs_qm_dqattach_locked(ip, XFS_QMOPT_DQALLOC); | 1660 | error = xfs_qm_dqattach_locked(ip, XFS_QMOPT_DQALLOC); |
1644 | if (error) { | 1661 | if (error) { |
1645 | xfs_iunlock(ip, lockflags); | 1662 | xfs_iunlock(ip, lockflags); |
1646 | return error; | 1663 | return error; |
1647 | } | 1664 | } |
1648 | } | 1665 | } |
1649 | 1666 | ||
1650 | uq = gq = NULL; | 1667 | uq = gq = NULL; |
1651 | if ((flags & XFS_QMOPT_UQUOTA) && XFS_IS_UQUOTA_ON(mp)) { | 1668 | if ((flags & XFS_QMOPT_UQUOTA) && XFS_IS_UQUOTA_ON(mp)) { |
1652 | if (ip->i_d.di_uid != uid) { | 1669 | if (ip->i_d.di_uid != uid) { |
1653 | /* | 1670 | /* |
1654 | * What we need is the dquot that has this uid, and | 1671 | * What we need is the dquot that has this uid, and |
1655 | * if we send the inode to dqget, the uid of the inode | 1672 | * if we send the inode to dqget, the uid of the inode |
1656 | * takes priority over what's sent in the uid argument. | 1673 | * takes priority over what's sent in the uid argument. |
1657 | * We must unlock inode here before calling dqget if | 1674 | * We must unlock inode here before calling dqget if |
1658 | * we're not sending the inode, because otherwise | 1675 | * we're not sending the inode, because otherwise |
1659 | * we'll deadlock by doing trans_reserve while | 1676 | * we'll deadlock by doing trans_reserve while |
1660 | * holding ilock. | 1677 | * holding ilock. |
1661 | */ | 1678 | */ |
1662 | xfs_iunlock(ip, lockflags); | 1679 | xfs_iunlock(ip, lockflags); |
1663 | if ((error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t) uid, | 1680 | if ((error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t) uid, |
1664 | XFS_DQ_USER, | 1681 | XFS_DQ_USER, |
1665 | XFS_QMOPT_DQALLOC | | 1682 | XFS_QMOPT_DQALLOC | |
1666 | XFS_QMOPT_DOWARN, | 1683 | XFS_QMOPT_DOWARN, |
1667 | &uq))) { | 1684 | &uq))) { |
1668 | ASSERT(error != ENOENT); | 1685 | ASSERT(error != ENOENT); |
1669 | return error; | 1686 | return error; |
1670 | } | 1687 | } |
1671 | /* | 1688 | /* |
1672 | * Get the ilock in the right order. | 1689 | * Get the ilock in the right order. |
1673 | */ | 1690 | */ |
1674 | xfs_dqunlock(uq); | 1691 | xfs_dqunlock(uq); |
1675 | lockflags = XFS_ILOCK_SHARED; | 1692 | lockflags = XFS_ILOCK_SHARED; |
1676 | xfs_ilock(ip, lockflags); | 1693 | xfs_ilock(ip, lockflags); |
1677 | } else { | 1694 | } else { |
1678 | /* | 1695 | /* |
1679 | * Take an extra reference, because we'll return | 1696 | * Take an extra reference, because we'll return |
1680 | * this to caller | 1697 | * this to caller |
1681 | */ | 1698 | */ |
1682 | ASSERT(ip->i_udquot); | 1699 | ASSERT(ip->i_udquot); |
1683 | uq = xfs_qm_dqhold(ip->i_udquot); | 1700 | uq = xfs_qm_dqhold(ip->i_udquot); |
1684 | } | 1701 | } |
1685 | } | 1702 | } |
1686 | if ((flags & XFS_QMOPT_GQUOTA) && XFS_IS_GQUOTA_ON(mp)) { | 1703 | if ((flags & XFS_QMOPT_GQUOTA) && XFS_IS_GQUOTA_ON(mp)) { |
1687 | if (ip->i_d.di_gid != gid) { | 1704 | if (ip->i_d.di_gid != gid) { |
1688 | xfs_iunlock(ip, lockflags); | 1705 | xfs_iunlock(ip, lockflags); |
1689 | if ((error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t)gid, | 1706 | if ((error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t)gid, |
1690 | XFS_DQ_GROUP, | 1707 | XFS_DQ_GROUP, |
1691 | XFS_QMOPT_DQALLOC | | 1708 | XFS_QMOPT_DQALLOC | |
1692 | XFS_QMOPT_DOWARN, | 1709 | XFS_QMOPT_DOWARN, |
1693 | &gq))) { | 1710 | &gq))) { |
1694 | if (uq) | 1711 | if (uq) |
1695 | xfs_qm_dqrele(uq); | 1712 | xfs_qm_dqrele(uq); |
1696 | ASSERT(error != ENOENT); | 1713 | ASSERT(error != ENOENT); |
1697 | return error; | 1714 | return error; |
1698 | } | 1715 | } |
1699 | xfs_dqunlock(gq); | 1716 | xfs_dqunlock(gq); |
1700 | lockflags = XFS_ILOCK_SHARED; | 1717 | lockflags = XFS_ILOCK_SHARED; |
1701 | xfs_ilock(ip, lockflags); | 1718 | xfs_ilock(ip, lockflags); |
1702 | } else { | 1719 | } else { |
1703 | ASSERT(ip->i_gdquot); | 1720 | ASSERT(ip->i_gdquot); |
1704 | gq = xfs_qm_dqhold(ip->i_gdquot); | 1721 | gq = xfs_qm_dqhold(ip->i_gdquot); |
1705 | } | 1722 | } |
1706 | } else if ((flags & XFS_QMOPT_PQUOTA) && XFS_IS_PQUOTA_ON(mp)) { | 1723 | } else if ((flags & XFS_QMOPT_PQUOTA) && XFS_IS_PQUOTA_ON(mp)) { |
1707 | if (xfs_get_projid(ip) != prid) { | 1724 | if (xfs_get_projid(ip) != prid) { |
1708 | xfs_iunlock(ip, lockflags); | 1725 | xfs_iunlock(ip, lockflags); |
1709 | if ((error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t)prid, | 1726 | if ((error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t)prid, |
1710 | XFS_DQ_PROJ, | 1727 | XFS_DQ_PROJ, |
1711 | XFS_QMOPT_DQALLOC | | 1728 | XFS_QMOPT_DQALLOC | |
1712 | XFS_QMOPT_DOWARN, | 1729 | XFS_QMOPT_DOWARN, |
1713 | &gq))) { | 1730 | &gq))) { |
1714 | if (uq) | 1731 | if (uq) |
1715 | xfs_qm_dqrele(uq); | 1732 | xfs_qm_dqrele(uq); |
1716 | ASSERT(error != ENOENT); | 1733 | ASSERT(error != ENOENT); |
1717 | return (error); | 1734 | return (error); |
1718 | } | 1735 | } |
1719 | xfs_dqunlock(gq); | 1736 | xfs_dqunlock(gq); |
1720 | lockflags = XFS_ILOCK_SHARED; | 1737 | lockflags = XFS_ILOCK_SHARED; |
1721 | xfs_ilock(ip, lockflags); | 1738 | xfs_ilock(ip, lockflags); |
1722 | } else { | 1739 | } else { |
1723 | ASSERT(ip->i_gdquot); | 1740 | ASSERT(ip->i_gdquot); |
1724 | gq = xfs_qm_dqhold(ip->i_gdquot); | 1741 | gq = xfs_qm_dqhold(ip->i_gdquot); |
1725 | } | 1742 | } |
1726 | } | 1743 | } |
1727 | if (uq) | 1744 | if (uq) |
1728 | trace_xfs_dquot_dqalloc(ip); | 1745 | trace_xfs_dquot_dqalloc(ip); |
1729 | 1746 | ||
1730 | xfs_iunlock(ip, lockflags); | 1747 | xfs_iunlock(ip, lockflags); |
1731 | if (O_udqpp) | 1748 | if (O_udqpp) |
1732 | *O_udqpp = uq; | 1749 | *O_udqpp = uq; |
1733 | else if (uq) | 1750 | else if (uq) |
1734 | xfs_qm_dqrele(uq); | 1751 | xfs_qm_dqrele(uq); |
1735 | if (O_gdqpp) | 1752 | if (O_gdqpp) |
1736 | *O_gdqpp = gq; | 1753 | *O_gdqpp = gq; |
1737 | else if (gq) | 1754 | else if (gq) |
1738 | xfs_qm_dqrele(gq); | 1755 | xfs_qm_dqrele(gq); |
1739 | return 0; | 1756 | return 0; |
1740 | } | 1757 | } |
1741 | 1758 | ||
1742 | /* | 1759 | /* |
1743 | * Actually transfer ownership, and do dquot modifications. | 1760 | * Actually transfer ownership, and do dquot modifications. |
1744 | * These were already reserved. | 1761 | * These were already reserved. |
1745 | */ | 1762 | */ |
1746 | xfs_dquot_t * | 1763 | xfs_dquot_t * |
1747 | xfs_qm_vop_chown( | 1764 | xfs_qm_vop_chown( |
1748 | xfs_trans_t *tp, | 1765 | xfs_trans_t *tp, |
1749 | xfs_inode_t *ip, | 1766 | xfs_inode_t *ip, |
1750 | xfs_dquot_t **IO_olddq, | 1767 | xfs_dquot_t **IO_olddq, |
1751 | xfs_dquot_t *newdq) | 1768 | xfs_dquot_t *newdq) |
1752 | { | 1769 | { |
1753 | xfs_dquot_t *prevdq; | 1770 | xfs_dquot_t *prevdq; |
1754 | uint bfield = XFS_IS_REALTIME_INODE(ip) ? | 1771 | uint bfield = XFS_IS_REALTIME_INODE(ip) ? |
1755 | XFS_TRANS_DQ_RTBCOUNT : XFS_TRANS_DQ_BCOUNT; | 1772 | XFS_TRANS_DQ_RTBCOUNT : XFS_TRANS_DQ_BCOUNT; |
1756 | 1773 | ||
1757 | 1774 | ||
1758 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); | 1775 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); |
1759 | ASSERT(XFS_IS_QUOTA_RUNNING(ip->i_mount)); | 1776 | ASSERT(XFS_IS_QUOTA_RUNNING(ip->i_mount)); |
1760 | 1777 | ||
1761 | /* old dquot */ | 1778 | /* old dquot */ |
1762 | prevdq = *IO_olddq; | 1779 | prevdq = *IO_olddq; |
1763 | ASSERT(prevdq); | 1780 | ASSERT(prevdq); |
1764 | ASSERT(prevdq != newdq); | 1781 | ASSERT(prevdq != newdq); |
1765 | 1782 | ||
1766 | xfs_trans_mod_dquot(tp, prevdq, bfield, -(ip->i_d.di_nblocks)); | 1783 | xfs_trans_mod_dquot(tp, prevdq, bfield, -(ip->i_d.di_nblocks)); |
1767 | xfs_trans_mod_dquot(tp, prevdq, XFS_TRANS_DQ_ICOUNT, -1); | 1784 | xfs_trans_mod_dquot(tp, prevdq, XFS_TRANS_DQ_ICOUNT, -1); |
1768 | 1785 | ||
1769 | /* the sparkling new dquot */ | 1786 | /* the sparkling new dquot */ |
1770 | xfs_trans_mod_dquot(tp, newdq, bfield, ip->i_d.di_nblocks); | 1787 | xfs_trans_mod_dquot(tp, newdq, bfield, ip->i_d.di_nblocks); |
1771 | xfs_trans_mod_dquot(tp, newdq, XFS_TRANS_DQ_ICOUNT, 1); | 1788 | xfs_trans_mod_dquot(tp, newdq, XFS_TRANS_DQ_ICOUNT, 1); |
1772 | 1789 | ||
1773 | /* | 1790 | /* |
1774 | * Take an extra reference, because the inode is going to keep | 1791 | * Take an extra reference, because the inode is going to keep |
1775 | * this dquot pointer even after the trans_commit. | 1792 | * this dquot pointer even after the trans_commit. |
1776 | */ | 1793 | */ |
1777 | *IO_olddq = xfs_qm_dqhold(newdq); | 1794 | *IO_olddq = xfs_qm_dqhold(newdq); |
1778 | 1795 | ||
1779 | return prevdq; | 1796 | return prevdq; |
1780 | } | 1797 | } |
1781 | 1798 | ||
1782 | /* | 1799 | /* |
1783 | * Quota reservations for setattr(AT_UID|AT_GID|AT_PROJID). | 1800 | * Quota reservations for setattr(AT_UID|AT_GID|AT_PROJID). |
1784 | */ | 1801 | */ |
1785 | int | 1802 | int |
1786 | xfs_qm_vop_chown_reserve( | 1803 | xfs_qm_vop_chown_reserve( |
1787 | xfs_trans_t *tp, | 1804 | xfs_trans_t *tp, |
1788 | xfs_inode_t *ip, | 1805 | xfs_inode_t *ip, |
1789 | xfs_dquot_t *udqp, | 1806 | xfs_dquot_t *udqp, |
1790 | xfs_dquot_t *gdqp, | 1807 | xfs_dquot_t *gdqp, |
1791 | uint flags) | 1808 | uint flags) |
1792 | { | 1809 | { |
1793 | xfs_mount_t *mp = ip->i_mount; | 1810 | xfs_mount_t *mp = ip->i_mount; |
1794 | uint delblks, blkflags, prjflags = 0; | 1811 | uint delblks, blkflags, prjflags = 0; |
1795 | xfs_dquot_t *unresudq, *unresgdq, *delblksudq, *delblksgdq; | 1812 | xfs_dquot_t *unresudq, *unresgdq, *delblksudq, *delblksgdq; |
1796 | int error; | 1813 | int error; |
1797 | 1814 | ||
1798 | 1815 | ||
1799 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)); | 1816 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)); |
1800 | ASSERT(XFS_IS_QUOTA_RUNNING(mp)); | 1817 | ASSERT(XFS_IS_QUOTA_RUNNING(mp)); |
1801 | 1818 | ||
1802 | delblks = ip->i_delayed_blks; | 1819 | delblks = ip->i_delayed_blks; |
1803 | delblksudq = delblksgdq = unresudq = unresgdq = NULL; | 1820 | delblksudq = delblksgdq = unresudq = unresgdq = NULL; |
1804 | blkflags = XFS_IS_REALTIME_INODE(ip) ? | 1821 | blkflags = XFS_IS_REALTIME_INODE(ip) ? |
1805 | XFS_QMOPT_RES_RTBLKS : XFS_QMOPT_RES_REGBLKS; | 1822 | XFS_QMOPT_RES_RTBLKS : XFS_QMOPT_RES_REGBLKS; |
1806 | 1823 | ||
1807 | if (XFS_IS_UQUOTA_ON(mp) && udqp && | 1824 | if (XFS_IS_UQUOTA_ON(mp) && udqp && |
1808 | ip->i_d.di_uid != (uid_t)be32_to_cpu(udqp->q_core.d_id)) { | 1825 | ip->i_d.di_uid != (uid_t)be32_to_cpu(udqp->q_core.d_id)) { |
1809 | delblksudq = udqp; | 1826 | delblksudq = udqp; |
1810 | /* | 1827 | /* |
1811 | * If there are delayed allocation blocks, then we have to | 1828 | * If there are delayed allocation blocks, then we have to |
1812 | * unreserve those from the old dquot, and add them to the | 1829 | * unreserve those from the old dquot, and add them to the |
1813 | * new dquot. | 1830 | * new dquot. |
1814 | */ | 1831 | */ |
1815 | if (delblks) { | 1832 | if (delblks) { |
1816 | ASSERT(ip->i_udquot); | 1833 | ASSERT(ip->i_udquot); |
1817 | unresudq = ip->i_udquot; | 1834 | unresudq = ip->i_udquot; |
1818 | } | 1835 | } |
1819 | } | 1836 | } |
1820 | if (XFS_IS_OQUOTA_ON(ip->i_mount) && gdqp) { | 1837 | if (XFS_IS_OQUOTA_ON(ip->i_mount) && gdqp) { |
1821 | if (XFS_IS_PQUOTA_ON(ip->i_mount) && | 1838 | if (XFS_IS_PQUOTA_ON(ip->i_mount) && |
1822 | xfs_get_projid(ip) != be32_to_cpu(gdqp->q_core.d_id)) | 1839 | xfs_get_projid(ip) != be32_to_cpu(gdqp->q_core.d_id)) |
1823 | prjflags = XFS_QMOPT_ENOSPC; | 1840 | prjflags = XFS_QMOPT_ENOSPC; |
1824 | 1841 | ||
1825 | if (prjflags || | 1842 | if (prjflags || |
1826 | (XFS_IS_GQUOTA_ON(ip->i_mount) && | 1843 | (XFS_IS_GQUOTA_ON(ip->i_mount) && |
1827 | ip->i_d.di_gid != be32_to_cpu(gdqp->q_core.d_id))) { | 1844 | ip->i_d.di_gid != be32_to_cpu(gdqp->q_core.d_id))) { |
1828 | delblksgdq = gdqp; | 1845 | delblksgdq = gdqp; |
1829 | if (delblks) { | 1846 | if (delblks) { |
1830 | ASSERT(ip->i_gdquot); | 1847 | ASSERT(ip->i_gdquot); |
1831 | unresgdq = ip->i_gdquot; | 1848 | unresgdq = ip->i_gdquot; |
1832 | } | 1849 | } |
1833 | } | 1850 | } |
1834 | } | 1851 | } |
1835 | 1852 | ||
1836 | if ((error = xfs_trans_reserve_quota_bydquots(tp, ip->i_mount, | 1853 | if ((error = xfs_trans_reserve_quota_bydquots(tp, ip->i_mount, |
1837 | delblksudq, delblksgdq, ip->i_d.di_nblocks, 1, | 1854 | delblksudq, delblksgdq, ip->i_d.di_nblocks, 1, |
1838 | flags | blkflags | prjflags))) | 1855 | flags | blkflags | prjflags))) |
1839 | return (error); | 1856 | return (error); |
1840 | 1857 | ||
1841 | /* | 1858 | /* |
1842 | * Do the delayed blks reservations/unreservations now. Since, these | 1859 | * Do the delayed blks reservations/unreservations now. Since, these |
1843 | * are done without the help of a transaction, if a reservation fails | 1860 | * are done without the help of a transaction, if a reservation fails |
1844 | * its previous reservations won't be automatically undone by trans | 1861 | * its previous reservations won't be automatically undone by trans |
1845 | * code. So, we have to do it manually here. | 1862 | * code. So, we have to do it manually here. |
1846 | */ | 1863 | */ |
1847 | if (delblks) { | 1864 | if (delblks) { |
1848 | /* | 1865 | /* |
1849 | * Do the reservations first. Unreservation can't fail. | 1866 | * Do the reservations first. Unreservation can't fail. |
1850 | */ | 1867 | */ |
1851 | ASSERT(delblksudq || delblksgdq); | 1868 | ASSERT(delblksudq || delblksgdq); |
1852 | ASSERT(unresudq || unresgdq); | 1869 | ASSERT(unresudq || unresgdq); |
1853 | if ((error = xfs_trans_reserve_quota_bydquots(NULL, ip->i_mount, | 1870 | if ((error = xfs_trans_reserve_quota_bydquots(NULL, ip->i_mount, |
1854 | delblksudq, delblksgdq, (xfs_qcnt_t)delblks, 0, | 1871 | delblksudq, delblksgdq, (xfs_qcnt_t)delblks, 0, |
1855 | flags | blkflags | prjflags))) | 1872 | flags | blkflags | prjflags))) |
1856 | return (error); | 1873 | return (error); |
1857 | xfs_trans_reserve_quota_bydquots(NULL, ip->i_mount, | 1874 | xfs_trans_reserve_quota_bydquots(NULL, ip->i_mount, |
1858 | unresudq, unresgdq, -((xfs_qcnt_t)delblks), 0, | 1875 | unresudq, unresgdq, -((xfs_qcnt_t)delblks), 0, |
1859 | blkflags); | 1876 | blkflags); |
1860 | } | 1877 | } |
1861 | 1878 | ||
1862 | return (0); | 1879 | return (0); |
1863 | } | 1880 | } |
1864 | 1881 | ||
1865 | int | 1882 | int |
1866 | xfs_qm_vop_rename_dqattach( | 1883 | xfs_qm_vop_rename_dqattach( |
1867 | struct xfs_inode **i_tab) | 1884 | struct xfs_inode **i_tab) |
1868 | { | 1885 | { |
1869 | struct xfs_mount *mp = i_tab[0]->i_mount; | 1886 | struct xfs_mount *mp = i_tab[0]->i_mount; |
1870 | int i; | 1887 | int i; |
1871 | 1888 | ||
1872 | if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp)) | 1889 | if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp)) |
1873 | return 0; | 1890 | return 0; |
1874 | 1891 | ||
1875 | for (i = 0; (i < 4 && i_tab[i]); i++) { | 1892 | for (i = 0; (i < 4 && i_tab[i]); i++) { |
1876 | struct xfs_inode *ip = i_tab[i]; | 1893 | struct xfs_inode *ip = i_tab[i]; |
1877 | int error; | 1894 | int error; |
1878 | 1895 | ||
1879 | /* | 1896 | /* |
1880 | * Watch out for duplicate entries in the table. | 1897 | * Watch out for duplicate entries in the table. |
1881 | */ | 1898 | */ |
1882 | if (i == 0 || ip != i_tab[i-1]) { | 1899 | if (i == 0 || ip != i_tab[i-1]) { |
1883 | if (XFS_NOT_DQATTACHED(mp, ip)) { | 1900 | if (XFS_NOT_DQATTACHED(mp, ip)) { |
1884 | error = xfs_qm_dqattach(ip, 0); | 1901 | error = xfs_qm_dqattach(ip, 0); |
1885 | if (error) | 1902 | if (error) |
1886 | return error; | 1903 | return error; |
1887 | } | 1904 | } |
1888 | } | 1905 | } |
1889 | } | 1906 | } |
1890 | return 0; | 1907 | return 0; |
1891 | } | 1908 | } |
1892 | 1909 | ||
1893 | void | 1910 | void |
1894 | xfs_qm_vop_create_dqattach( | 1911 | xfs_qm_vop_create_dqattach( |
1895 | struct xfs_trans *tp, | 1912 | struct xfs_trans *tp, |
1896 | struct xfs_inode *ip, | 1913 | struct xfs_inode *ip, |
1897 | struct xfs_dquot *udqp, | 1914 | struct xfs_dquot *udqp, |
1898 | struct xfs_dquot *gdqp) | 1915 | struct xfs_dquot *gdqp) |
1899 | { | 1916 | { |
1900 | struct xfs_mount *mp = tp->t_mountp; | 1917 | struct xfs_mount *mp = tp->t_mountp; |
1901 | 1918 | ||
1902 | if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp)) | 1919 | if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp)) |
1903 | return; | 1920 | return; |
1904 | 1921 | ||
1905 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); | 1922 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); |
1906 | ASSERT(XFS_IS_QUOTA_RUNNING(mp)); | 1923 | ASSERT(XFS_IS_QUOTA_RUNNING(mp)); |
1907 | 1924 | ||
1908 | if (udqp) { | 1925 | if (udqp) { |
1909 | ASSERT(ip->i_udquot == NULL); | 1926 | ASSERT(ip->i_udquot == NULL); |
1910 | ASSERT(XFS_IS_UQUOTA_ON(mp)); | 1927 | ASSERT(XFS_IS_UQUOTA_ON(mp)); |
1911 | ASSERT(ip->i_d.di_uid == be32_to_cpu(udqp->q_core.d_id)); | 1928 | ASSERT(ip->i_d.di_uid == be32_to_cpu(udqp->q_core.d_id)); |
1912 | 1929 | ||
1913 | ip->i_udquot = xfs_qm_dqhold(udqp); | 1930 | ip->i_udquot = xfs_qm_dqhold(udqp); |
1914 | xfs_trans_mod_dquot(tp, udqp, XFS_TRANS_DQ_ICOUNT, 1); | 1931 | xfs_trans_mod_dquot(tp, udqp, XFS_TRANS_DQ_ICOUNT, 1); |
1915 | } | 1932 | } |
1916 | if (gdqp) { | 1933 | if (gdqp) { |
1917 | ASSERT(ip->i_gdquot == NULL); | 1934 | ASSERT(ip->i_gdquot == NULL); |
1918 | ASSERT(XFS_IS_OQUOTA_ON(mp)); | 1935 | ASSERT(XFS_IS_OQUOTA_ON(mp)); |
1919 | ASSERT((XFS_IS_GQUOTA_ON(mp) ? | 1936 | ASSERT((XFS_IS_GQUOTA_ON(mp) ? |
1920 | ip->i_d.di_gid : xfs_get_projid(ip)) == | 1937 | ip->i_d.di_gid : xfs_get_projid(ip)) == |
1921 | be32_to_cpu(gdqp->q_core.d_id)); | 1938 | be32_to_cpu(gdqp->q_core.d_id)); |
1922 | 1939 | ||
1923 | ip->i_gdquot = xfs_qm_dqhold(gdqp); | 1940 | ip->i_gdquot = xfs_qm_dqhold(gdqp); |
1924 | xfs_trans_mod_dquot(tp, gdqp, XFS_TRANS_DQ_ICOUNT, 1); | 1941 | xfs_trans_mod_dquot(tp, gdqp, XFS_TRANS_DQ_ICOUNT, 1); |
1925 | } | 1942 | } |
1926 | } | 1943 | } |
1927 | 1944 |
fs/xfs/xfs_qm.h
1 | /* | 1 | /* |
2 | * Copyright (c) 2000-2005 Silicon Graphics, Inc. | 2 | * Copyright (c) 2000-2005 Silicon Graphics, Inc. |
3 | * All Rights Reserved. | 3 | * All Rights Reserved. |
4 | * | 4 | * |
5 | * This program is free software; you can redistribute it and/or | 5 | * This program is free software; you can redistribute it and/or |
6 | * modify it under the terms of the GNU General Public License as | 6 | * modify it under the terms of the GNU General Public License as |
7 | * published by the Free Software Foundation. | 7 | * published by the Free Software Foundation. |
8 | * | 8 | * |
9 | * This program is distributed in the hope that it would be useful, | 9 | * This program is distributed in the hope that it would be useful, |
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
12 | * GNU General Public License for more details. | 12 | * GNU General Public License for more details. |
13 | * | 13 | * |
14 | * You should have received a copy of the GNU General Public License | 14 | * You should have received a copy of the GNU General Public License |
15 | * along with this program; if not, write the Free Software Foundation, | 15 | * along with this program; if not, write the Free Software Foundation, |
16 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | 16 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA |
17 | */ | 17 | */ |
18 | #ifndef __XFS_QM_H__ | 18 | #ifndef __XFS_QM_H__ |
19 | #define __XFS_QM_H__ | 19 | #define __XFS_QM_H__ |
20 | 20 | ||
21 | #include "xfs_dquot_item.h" | 21 | #include "xfs_dquot_item.h" |
22 | #include "xfs_dquot.h" | 22 | #include "xfs_dquot.h" |
23 | #include "xfs_quota_priv.h" | 23 | #include "xfs_quota_priv.h" |
24 | 24 | ||
25 | struct xfs_inode; | 25 | struct xfs_inode; |
26 | 26 | ||
27 | extern struct kmem_zone *xfs_qm_dqtrxzone; | 27 | extern struct kmem_zone *xfs_qm_dqtrxzone; |
28 | 28 | ||
29 | /* | 29 | /* |
30 | * This defines the unit of allocation of dquots. | 30 | * This defines the unit of allocation of dquots. |
31 | * Currently, it is just one file system block, and a 4K blk contains 30 | 31 | * Currently, it is just one file system block, and a 4K blk contains 30 |
32 | * (136 * 30 = 4080) dquots. It's probably not worth trying to make | 32 | * (136 * 30 = 4080) dquots. It's probably not worth trying to make |
33 | * this more dynamic. | 33 | * this more dynamic. |
34 | * XXXsup However, if this number is changed, we have to make sure that we don't | 34 | * XXXsup However, if this number is changed, we have to make sure that we don't |
35 | * implicitly assume that we do allocations in chunks of a single filesystem | 35 | * implicitly assume that we do allocations in chunks of a single filesystem |
36 | * block in the dquot/xqm code. | 36 | * block in the dquot/xqm code. |
37 | */ | 37 | */ |
38 | #define XFS_DQUOT_CLUSTER_SIZE_FSB (xfs_filblks_t)1 | 38 | #define XFS_DQUOT_CLUSTER_SIZE_FSB (xfs_filblks_t)1 |
39 | 39 | ||
40 | /* | 40 | /* |
41 | * Various quota information for individual filesystems. | 41 | * Various quota information for individual filesystems. |
42 | * The mount structure keeps a pointer to this. | 42 | * The mount structure keeps a pointer to this. |
43 | */ | 43 | */ |
44 | typedef struct xfs_quotainfo { | 44 | typedef struct xfs_quotainfo { |
45 | struct radix_tree_root qi_uquota_tree; | 45 | struct radix_tree_root qi_uquota_tree; |
46 | struct radix_tree_root qi_gquota_tree; | 46 | struct radix_tree_root qi_gquota_tree; |
47 | struct mutex qi_tree_lock; | 47 | struct mutex qi_tree_lock; |
48 | xfs_inode_t *qi_uquotaip; /* user quota inode */ | 48 | xfs_inode_t *qi_uquotaip; /* user quota inode */ |
49 | xfs_inode_t *qi_gquotaip; /* group quota inode */ | 49 | xfs_inode_t *qi_gquotaip; /* group quota inode */ |
50 | struct list_head qi_lru_list; | 50 | struct list_head qi_lru_list; |
51 | struct mutex qi_lru_lock; | 51 | struct mutex qi_lru_lock; |
52 | int qi_lru_count; | 52 | int qi_lru_count; |
53 | int qi_dquots; | 53 | int qi_dquots; |
54 | time_t qi_btimelimit; /* limit for blks timer */ | 54 | time_t qi_btimelimit; /* limit for blks timer */ |
55 | time_t qi_itimelimit; /* limit for inodes timer */ | 55 | time_t qi_itimelimit; /* limit for inodes timer */ |
56 | time_t qi_rtbtimelimit;/* limit for rt blks timer */ | 56 | time_t qi_rtbtimelimit;/* limit for rt blks timer */ |
57 | xfs_qwarncnt_t qi_bwarnlimit; /* limit for blks warnings */ | 57 | xfs_qwarncnt_t qi_bwarnlimit; /* limit for blks warnings */ |
58 | xfs_qwarncnt_t qi_iwarnlimit; /* limit for inodes warnings */ | 58 | xfs_qwarncnt_t qi_iwarnlimit; /* limit for inodes warnings */ |
59 | xfs_qwarncnt_t qi_rtbwarnlimit;/* limit for rt blks warnings */ | 59 | xfs_qwarncnt_t qi_rtbwarnlimit;/* limit for rt blks warnings */ |
60 | struct mutex qi_quotaofflock;/* to serialize quotaoff */ | 60 | struct mutex qi_quotaofflock;/* to serialize quotaoff */ |
61 | xfs_filblks_t qi_dqchunklen; /* # BBs in a chunk of dqs */ | 61 | xfs_filblks_t qi_dqchunklen; /* # BBs in a chunk of dqs */ |
62 | uint qi_dqperchunk; /* # ondisk dqs in above chunk */ | 62 | uint qi_dqperchunk; /* # ondisk dqs in above chunk */ |
63 | xfs_qcnt_t qi_bhardlimit; /* default data blk hard limit */ | 63 | xfs_qcnt_t qi_bhardlimit; /* default data blk hard limit */ |
64 | xfs_qcnt_t qi_bsoftlimit; /* default data blk soft limit */ | 64 | xfs_qcnt_t qi_bsoftlimit; /* default data blk soft limit */ |
65 | xfs_qcnt_t qi_ihardlimit; /* default inode count hard limit */ | 65 | xfs_qcnt_t qi_ihardlimit; /* default inode count hard limit */ |
66 | xfs_qcnt_t qi_isoftlimit; /* default inode count soft limit */ | 66 | xfs_qcnt_t qi_isoftlimit; /* default inode count soft limit */ |
67 | xfs_qcnt_t qi_rtbhardlimit;/* default realtime blk hard limit */ | 67 | xfs_qcnt_t qi_rtbhardlimit;/* default realtime blk hard limit */ |
68 | xfs_qcnt_t qi_rtbsoftlimit;/* default realtime blk soft limit */ | 68 | xfs_qcnt_t qi_rtbsoftlimit;/* default realtime blk soft limit */ |
69 | struct shrinker qi_shrinker; | 69 | struct shrinker qi_shrinker; |
70 | } xfs_quotainfo_t; | 70 | } xfs_quotainfo_t; |
71 | 71 | ||
72 | #define XFS_DQUOT_TREE(qi, type) \ | 72 | #define XFS_DQUOT_TREE(qi, type) \ |
73 | ((type & XFS_DQ_USER) ? \ | 73 | ((type & XFS_DQ_USER) ? \ |
74 | &((qi)->qi_uquota_tree) : \ | 74 | &((qi)->qi_uquota_tree) : \ |
75 | &((qi)->qi_gquota_tree)) | 75 | &((qi)->qi_gquota_tree)) |
76 | 76 | ||
77 | 77 | ||
78 | extern int xfs_qm_calc_dquots_per_chunk(struct xfs_mount *mp, | ||
79 | unsigned int nbblks); | ||
78 | extern void xfs_trans_mod_dquot(xfs_trans_t *, xfs_dquot_t *, uint, long); | 80 | extern void xfs_trans_mod_dquot(xfs_trans_t *, xfs_dquot_t *, uint, long); |
79 | extern int xfs_trans_reserve_quota_bydquots(xfs_trans_t *, xfs_mount_t *, | 81 | extern int xfs_trans_reserve_quota_bydquots(xfs_trans_t *, xfs_mount_t *, |
80 | xfs_dquot_t *, xfs_dquot_t *, long, long, uint); | 82 | xfs_dquot_t *, xfs_dquot_t *, long, long, uint); |
81 | extern void xfs_trans_dqjoin(xfs_trans_t *, xfs_dquot_t *); | 83 | extern void xfs_trans_dqjoin(xfs_trans_t *, xfs_dquot_t *); |
82 | extern void xfs_trans_log_dquot(xfs_trans_t *, xfs_dquot_t *); | 84 | extern void xfs_trans_log_dquot(xfs_trans_t *, xfs_dquot_t *); |
83 | 85 | ||
84 | /* | 86 | /* |
85 | * We keep the usr and grp dquots separately so that locking will be easier | 87 | * We keep the usr and grp dquots separately so that locking will be easier |
86 | * to do at commit time. All transactions that we know of at this point | 88 | * to do at commit time. All transactions that we know of at this point |
87 | * affect no more than two dquots of one type. Hence, the TRANS_MAXDQS value. | 89 | * affect no more than two dquots of one type. Hence, the TRANS_MAXDQS value. |
88 | */ | 90 | */ |
89 | #define XFS_QM_TRANS_MAXDQS 2 | 91 | #define XFS_QM_TRANS_MAXDQS 2 |
90 | typedef struct xfs_dquot_acct { | 92 | typedef struct xfs_dquot_acct { |
91 | xfs_dqtrx_t dqa_usrdquots[XFS_QM_TRANS_MAXDQS]; | 93 | xfs_dqtrx_t dqa_usrdquots[XFS_QM_TRANS_MAXDQS]; |
92 | xfs_dqtrx_t dqa_grpdquots[XFS_QM_TRANS_MAXDQS]; | 94 | xfs_dqtrx_t dqa_grpdquots[XFS_QM_TRANS_MAXDQS]; |
93 | } xfs_dquot_acct_t; | 95 | } xfs_dquot_acct_t; |
94 | 96 | ||
95 | /* | 97 | /* |
96 | * Users are allowed to have a usage exceeding their softlimit for | 98 | * Users are allowed to have a usage exceeding their softlimit for |
97 | * a period this long. | 99 | * a period this long. |
98 | */ | 100 | */ |
99 | #define XFS_QM_BTIMELIMIT (7 * 24*60*60) /* 1 week */ | 101 | #define XFS_QM_BTIMELIMIT (7 * 24*60*60) /* 1 week */ |
100 | #define XFS_QM_RTBTIMELIMIT (7 * 24*60*60) /* 1 week */ | 102 | #define XFS_QM_RTBTIMELIMIT (7 * 24*60*60) /* 1 week */ |
101 | #define XFS_QM_ITIMELIMIT (7 * 24*60*60) /* 1 week */ | 103 | #define XFS_QM_ITIMELIMIT (7 * 24*60*60) /* 1 week */ |
102 | 104 | ||
103 | #define XFS_QM_BWARNLIMIT 5 | 105 | #define XFS_QM_BWARNLIMIT 5 |
104 | #define XFS_QM_IWARNLIMIT 5 | 106 | #define XFS_QM_IWARNLIMIT 5 |
105 | #define XFS_QM_RTBWARNLIMIT 5 | 107 | #define XFS_QM_RTBWARNLIMIT 5 |
106 | 108 | ||
107 | extern void xfs_qm_destroy_quotainfo(xfs_mount_t *); | 109 | extern void xfs_qm_destroy_quotainfo(xfs_mount_t *); |
108 | extern int xfs_qm_quotacheck(xfs_mount_t *); | 110 | extern int xfs_qm_quotacheck(xfs_mount_t *); |
109 | extern int xfs_qm_write_sb_changes(xfs_mount_t *, __int64_t); | 111 | extern int xfs_qm_write_sb_changes(xfs_mount_t *, __int64_t); |
110 | 112 | ||
111 | /* dquot stuff */ | 113 | /* dquot stuff */ |
112 | extern void xfs_qm_dqpurge_all(xfs_mount_t *, uint); | 114 | extern void xfs_qm_dqpurge_all(xfs_mount_t *, uint); |
113 | extern void xfs_qm_dqrele_all_inodes(xfs_mount_t *, uint); | 115 | extern void xfs_qm_dqrele_all_inodes(xfs_mount_t *, uint); |
114 | 116 | ||
115 | /* quota ops */ | 117 | /* quota ops */ |
116 | extern int xfs_qm_scall_trunc_qfiles(xfs_mount_t *, uint); | 118 | extern int xfs_qm_scall_trunc_qfiles(xfs_mount_t *, uint); |
117 | extern int xfs_qm_scall_getquota(xfs_mount_t *, xfs_dqid_t, uint, | 119 | extern int xfs_qm_scall_getquota(xfs_mount_t *, xfs_dqid_t, uint, |
118 | fs_disk_quota_t *); | 120 | fs_disk_quota_t *); |
119 | extern int xfs_qm_scall_setqlim(struct xfs_mount *, xfs_dqid_t, uint, | 121 | extern int xfs_qm_scall_setqlim(struct xfs_mount *, xfs_dqid_t, uint, |
120 | fs_disk_quota_t *); | 122 | fs_disk_quota_t *); |
121 | extern int xfs_qm_scall_getqstat(xfs_mount_t *, fs_quota_stat_t *); | 123 | extern int xfs_qm_scall_getqstat(xfs_mount_t *, fs_quota_stat_t *); |
122 | extern int xfs_qm_scall_quotaon(xfs_mount_t *, uint); | 124 | extern int xfs_qm_scall_quotaon(xfs_mount_t *, uint); |
123 | extern int xfs_qm_scall_quotaoff(xfs_mount_t *, uint); | 125 | extern int xfs_qm_scall_quotaoff(xfs_mount_t *, uint); |
124 | 126 | ||
125 | #endif /* __XFS_QM_H__ */ | 127 | #endif /* __XFS_QM_H__ */ |
126 | 128 |
fs/xfs/xfs_quota.h
1 | /* | 1 | /* |
2 | * Copyright (c) 2000-2005 Silicon Graphics, Inc. | 2 | * Copyright (c) 2000-2005 Silicon Graphics, Inc. |
3 | * All Rights Reserved. | 3 | * All Rights Reserved. |
4 | * | 4 | * |
5 | * This program is free software; you can redistribute it and/or | 5 | * This program is free software; you can redistribute it and/or |
6 | * modify it under the terms of the GNU General Public License as | 6 | * modify it under the terms of the GNU General Public License as |
7 | * published by the Free Software Foundation. | 7 | * published by the Free Software Foundation. |
8 | * | 8 | * |
9 | * This program is distributed in the hope that it would be useful, | 9 | * This program is distributed in the hope that it would be useful, |
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
12 | * GNU General Public License for more details. | 12 | * GNU General Public License for more details. |
13 | * | 13 | * |
14 | * You should have received a copy of the GNU General Public License | 14 | * You should have received a copy of the GNU General Public License |
15 | * along with this program; if not, write the Free Software Foundation, | 15 | * along with this program; if not, write the Free Software Foundation, |
16 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | 16 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA |
17 | */ | 17 | */ |
18 | #ifndef __XFS_QUOTA_H__ | 18 | #ifndef __XFS_QUOTA_H__ |
19 | #define __XFS_QUOTA_H__ | 19 | #define __XFS_QUOTA_H__ |
20 | 20 | ||
21 | struct xfs_trans; | 21 | struct xfs_trans; |
22 | 22 | ||
23 | /* | 23 | /* |
24 | * The ondisk form of a dquot structure. | 24 | * The ondisk form of a dquot structure. |
25 | */ | 25 | */ |
26 | #define XFS_DQUOT_MAGIC 0x4451 /* 'DQ' */ | 26 | #define XFS_DQUOT_MAGIC 0x4451 /* 'DQ' */ |
27 | #define XFS_DQUOT_VERSION (u_int8_t)0x01 /* latest version number */ | 27 | #define XFS_DQUOT_VERSION (u_int8_t)0x01 /* latest version number */ |
28 | 28 | ||
29 | /* | 29 | /* |
30 | * uid_t and gid_t are hard-coded to 32 bits in the inode. | 30 | * uid_t and gid_t are hard-coded to 32 bits in the inode. |
31 | * Hence, an 'id' in a dquot is 32 bits.. | 31 | * Hence, an 'id' in a dquot is 32 bits.. |
32 | */ | 32 | */ |
33 | typedef __uint32_t xfs_dqid_t; | 33 | typedef __uint32_t xfs_dqid_t; |
34 | 34 | ||
35 | /* | 35 | /* |
36 | * Even though users may not have quota limits occupying all 64-bits, | 36 | * Even though users may not have quota limits occupying all 64-bits, |
37 | * they may need 64-bit accounting. Hence, 64-bit quota-counters, | 37 | * they may need 64-bit accounting. Hence, 64-bit quota-counters, |
38 | * and quota-limits. This is a waste in the common case, but hey ... | 38 | * and quota-limits. This is a waste in the common case, but hey ... |
39 | */ | 39 | */ |
40 | typedef __uint64_t xfs_qcnt_t; | 40 | typedef __uint64_t xfs_qcnt_t; |
41 | typedef __uint16_t xfs_qwarncnt_t; | 41 | typedef __uint16_t xfs_qwarncnt_t; |
42 | 42 | ||
43 | /* | 43 | /* |
44 | * This is the main portion of the on-disk representation of quota | 44 | * This is the main portion of the on-disk representation of quota |
45 | * information for a user. This is the q_core of the xfs_dquot_t that | 45 | * information for a user. This is the q_core of the xfs_dquot_t that |
46 | * is kept in kernel memory. We pad this with some more expansion room | 46 | * is kept in kernel memory. We pad this with some more expansion room |
47 | * to construct the on disk structure. | 47 | * to construct the on disk structure. |
48 | */ | 48 | */ |
49 | typedef struct xfs_disk_dquot { | 49 | typedef struct xfs_disk_dquot { |
50 | __be16 d_magic; /* dquot magic = XFS_DQUOT_MAGIC */ | 50 | __be16 d_magic; /* dquot magic = XFS_DQUOT_MAGIC */ |
51 | __u8 d_version; /* dquot version */ | 51 | __u8 d_version; /* dquot version */ |
52 | __u8 d_flags; /* XFS_DQ_USER/PROJ/GROUP */ | 52 | __u8 d_flags; /* XFS_DQ_USER/PROJ/GROUP */ |
53 | __be32 d_id; /* user,project,group id */ | 53 | __be32 d_id; /* user,project,group id */ |
54 | __be64 d_blk_hardlimit;/* absolute limit on disk blks */ | 54 | __be64 d_blk_hardlimit;/* absolute limit on disk blks */ |
55 | __be64 d_blk_softlimit;/* preferred limit on disk blks */ | 55 | __be64 d_blk_softlimit;/* preferred limit on disk blks */ |
56 | __be64 d_ino_hardlimit;/* maximum # allocated inodes */ | 56 | __be64 d_ino_hardlimit;/* maximum # allocated inodes */ |
57 | __be64 d_ino_softlimit;/* preferred inode limit */ | 57 | __be64 d_ino_softlimit;/* preferred inode limit */ |
58 | __be64 d_bcount; /* disk blocks owned by the user */ | 58 | __be64 d_bcount; /* disk blocks owned by the user */ |
59 | __be64 d_icount; /* inodes owned by the user */ | 59 | __be64 d_icount; /* inodes owned by the user */ |
60 | __be32 d_itimer; /* zero if within inode limits if not, | 60 | __be32 d_itimer; /* zero if within inode limits if not, |
61 | this is when we refuse service */ | 61 | this is when we refuse service */ |
62 | __be32 d_btimer; /* similar to above; for disk blocks */ | 62 | __be32 d_btimer; /* similar to above; for disk blocks */ |
63 | __be16 d_iwarns; /* warnings issued wrt num inodes */ | 63 | __be16 d_iwarns; /* warnings issued wrt num inodes */ |
64 | __be16 d_bwarns; /* warnings issued wrt disk blocks */ | 64 | __be16 d_bwarns; /* warnings issued wrt disk blocks */ |
65 | __be32 d_pad0; /* 64 bit align */ | 65 | __be32 d_pad0; /* 64 bit align */ |
66 | __be64 d_rtb_hardlimit;/* absolute limit on realtime blks */ | 66 | __be64 d_rtb_hardlimit;/* absolute limit on realtime blks */ |
67 | __be64 d_rtb_softlimit;/* preferred limit on RT disk blks */ | 67 | __be64 d_rtb_softlimit;/* preferred limit on RT disk blks */ |
68 | __be64 d_rtbcount; /* realtime blocks owned */ | 68 | __be64 d_rtbcount; /* realtime blocks owned */ |
69 | __be32 d_rtbtimer; /* similar to above; for RT disk blocks */ | 69 | __be32 d_rtbtimer; /* similar to above; for RT disk blocks */ |
70 | __be16 d_rtbwarns; /* warnings issued wrt RT disk blocks */ | 70 | __be16 d_rtbwarns; /* warnings issued wrt RT disk blocks */ |
71 | __be16 d_pad; | 71 | __be16 d_pad; |
72 | } xfs_disk_dquot_t; | 72 | } xfs_disk_dquot_t; |
73 | 73 | ||
74 | /* | 74 | /* |
75 | * This is what goes on disk. This is separated from the xfs_disk_dquot because | 75 | * This is what goes on disk. This is separated from the xfs_disk_dquot because |
76 | * carrying the unnecessary padding would be a waste of memory. | 76 | * carrying the unnecessary padding would be a waste of memory. |
77 | */ | 77 | */ |
78 | typedef struct xfs_dqblk { | 78 | typedef struct xfs_dqblk { |
79 | xfs_disk_dquot_t dd_diskdq; /* portion that lives incore as well */ | 79 | xfs_disk_dquot_t dd_diskdq; /* portion that lives incore as well */ |
80 | char dd_fill[32]; /* filling for posterity */ | 80 | char dd_fill[4]; /* filling for posterity */ |
81 | |||
82 | /* | ||
83 | * These two are only present on filesystems with the CRC bits set. | ||
84 | */ | ||
85 | __be32 dd_crc; /* checksum */ | ||
86 | __be64 dd_lsn; /* last modification in log */ | ||
87 | uuid_t dd_uuid; /* location information */ | ||
81 | } xfs_dqblk_t; | 88 | } xfs_dqblk_t; |
82 | 89 | ||
83 | /* | 90 | /* |
84 | * flags for q_flags field in the dquot. | 91 | * flags for q_flags field in the dquot. |
85 | */ | 92 | */ |
86 | #define XFS_DQ_USER 0x0001 /* a user quota */ | 93 | #define XFS_DQ_USER 0x0001 /* a user quota */ |
87 | #define XFS_DQ_PROJ 0x0002 /* project quota */ | 94 | #define XFS_DQ_PROJ 0x0002 /* project quota */ |
88 | #define XFS_DQ_GROUP 0x0004 /* a group quota */ | 95 | #define XFS_DQ_GROUP 0x0004 /* a group quota */ |
89 | #define XFS_DQ_DIRTY 0x0008 /* dquot is dirty */ | 96 | #define XFS_DQ_DIRTY 0x0008 /* dquot is dirty */ |
90 | #define XFS_DQ_FREEING 0x0010 /* dquot is beeing torn down */ | 97 | #define XFS_DQ_FREEING 0x0010 /* dquot is beeing torn down */ |
91 | 98 | ||
92 | #define XFS_DQ_ALLTYPES (XFS_DQ_USER|XFS_DQ_PROJ|XFS_DQ_GROUP) | 99 | #define XFS_DQ_ALLTYPES (XFS_DQ_USER|XFS_DQ_PROJ|XFS_DQ_GROUP) |
93 | 100 | ||
94 | #define XFS_DQ_FLAGS \ | 101 | #define XFS_DQ_FLAGS \ |
95 | { XFS_DQ_USER, "USER" }, \ | 102 | { XFS_DQ_USER, "USER" }, \ |
96 | { XFS_DQ_PROJ, "PROJ" }, \ | 103 | { XFS_DQ_PROJ, "PROJ" }, \ |
97 | { XFS_DQ_GROUP, "GROUP" }, \ | 104 | { XFS_DQ_GROUP, "GROUP" }, \ |
98 | { XFS_DQ_DIRTY, "DIRTY" }, \ | 105 | { XFS_DQ_DIRTY, "DIRTY" }, \ |
99 | { XFS_DQ_FREEING, "FREEING" } | 106 | { XFS_DQ_FREEING, "FREEING" } |
100 | 107 | ||
101 | /* | 108 | /* |
102 | * In the worst case, when both user and group quotas are on, | 109 | * In the worst case, when both user and group quotas are on, |
103 | * we can have a max of three dquots changing in a single transaction. | 110 | * we can have a max of three dquots changing in a single transaction. |
104 | */ | 111 | */ |
105 | #define XFS_DQUOT_LOGRES(mp) (sizeof(xfs_disk_dquot_t) * 3) | 112 | #define XFS_DQUOT_LOGRES(mp) (sizeof(xfs_disk_dquot_t) * 3) |
106 | 113 | ||
107 | 114 | ||
108 | /* | 115 | /* |
109 | * These are the structures used to lay out dquots and quotaoff | 116 | * These are the structures used to lay out dquots and quotaoff |
110 | * records on the log. Quite similar to those of inodes. | 117 | * records on the log. Quite similar to those of inodes. |
111 | */ | 118 | */ |
112 | 119 | ||
113 | /* | 120 | /* |
114 | * log format struct for dquots. | 121 | * log format struct for dquots. |
115 | * The first two fields must be the type and size fitting into | 122 | * The first two fields must be the type and size fitting into |
116 | * 32 bits : log_recovery code assumes that. | 123 | * 32 bits : log_recovery code assumes that. |
117 | */ | 124 | */ |
118 | typedef struct xfs_dq_logformat { | 125 | typedef struct xfs_dq_logformat { |
119 | __uint16_t qlf_type; /* dquot log item type */ | 126 | __uint16_t qlf_type; /* dquot log item type */ |
120 | __uint16_t qlf_size; /* size of this item */ | 127 | __uint16_t qlf_size; /* size of this item */ |
121 | xfs_dqid_t qlf_id; /* usr/grp/proj id : 32 bits */ | 128 | xfs_dqid_t qlf_id; /* usr/grp/proj id : 32 bits */ |
122 | __int64_t qlf_blkno; /* blkno of dquot buffer */ | 129 | __int64_t qlf_blkno; /* blkno of dquot buffer */ |
123 | __int32_t qlf_len; /* len of dquot buffer */ | 130 | __int32_t qlf_len; /* len of dquot buffer */ |
124 | __uint32_t qlf_boffset; /* off of dquot in buffer */ | 131 | __uint32_t qlf_boffset; /* off of dquot in buffer */ |
125 | } xfs_dq_logformat_t; | 132 | } xfs_dq_logformat_t; |
126 | 133 | ||
127 | /* | 134 | /* |
128 | * log format struct for QUOTAOFF records. | 135 | * log format struct for QUOTAOFF records. |
129 | * The first two fields must be the type and size fitting into | 136 | * The first two fields must be the type and size fitting into |
130 | * 32 bits : log_recovery code assumes that. | 137 | * 32 bits : log_recovery code assumes that. |
131 | * We write two LI_QUOTAOFF logitems per quotaoff, the last one keeps a pointer | 138 | * We write two LI_QUOTAOFF logitems per quotaoff, the last one keeps a pointer |
132 | * to the first and ensures that the first logitem is taken out of the AIL | 139 | * to the first and ensures that the first logitem is taken out of the AIL |
133 | * only when the last one is securely committed. | 140 | * only when the last one is securely committed. |
134 | */ | 141 | */ |
135 | typedef struct xfs_qoff_logformat { | 142 | typedef struct xfs_qoff_logformat { |
136 | unsigned short qf_type; /* quotaoff log item type */ | 143 | unsigned short qf_type; /* quotaoff log item type */ |
137 | unsigned short qf_size; /* size of this item */ | 144 | unsigned short qf_size; /* size of this item */ |
138 | unsigned int qf_flags; /* USR and/or GRP */ | 145 | unsigned int qf_flags; /* USR and/or GRP */ |
139 | char qf_pad[12]; /* padding for future */ | 146 | char qf_pad[12]; /* padding for future */ |
140 | } xfs_qoff_logformat_t; | 147 | } xfs_qoff_logformat_t; |
141 | 148 | ||
142 | 149 | ||
143 | /* | 150 | /* |
144 | * Disk quotas status in m_qflags, and also sb_qflags. 16 bits. | 151 | * Disk quotas status in m_qflags, and also sb_qflags. 16 bits. |
145 | */ | 152 | */ |
146 | #define XFS_UQUOTA_ACCT 0x0001 /* user quota accounting ON */ | 153 | #define XFS_UQUOTA_ACCT 0x0001 /* user quota accounting ON */ |
147 | #define XFS_UQUOTA_ENFD 0x0002 /* user quota limits enforced */ | 154 | #define XFS_UQUOTA_ENFD 0x0002 /* user quota limits enforced */ |
148 | #define XFS_UQUOTA_CHKD 0x0004 /* quotacheck run on usr quotas */ | 155 | #define XFS_UQUOTA_CHKD 0x0004 /* quotacheck run on usr quotas */ |
149 | #define XFS_PQUOTA_ACCT 0x0008 /* project quota accounting ON */ | 156 | #define XFS_PQUOTA_ACCT 0x0008 /* project quota accounting ON */ |
150 | #define XFS_OQUOTA_ENFD 0x0010 /* other (grp/prj) quota limits enforced */ | 157 | #define XFS_OQUOTA_ENFD 0x0010 /* other (grp/prj) quota limits enforced */ |
151 | #define XFS_OQUOTA_CHKD 0x0020 /* quotacheck run on other (grp/prj) quotas */ | 158 | #define XFS_OQUOTA_CHKD 0x0020 /* quotacheck run on other (grp/prj) quotas */ |
152 | #define XFS_GQUOTA_ACCT 0x0040 /* group quota accounting ON */ | 159 | #define XFS_GQUOTA_ACCT 0x0040 /* group quota accounting ON */ |
153 | 160 | ||
154 | /* | 161 | /* |
155 | * Quota Accounting/Enforcement flags | 162 | * Quota Accounting/Enforcement flags |
156 | */ | 163 | */ |
157 | #define XFS_ALL_QUOTA_ACCT \ | 164 | #define XFS_ALL_QUOTA_ACCT \ |
158 | (XFS_UQUOTA_ACCT | XFS_GQUOTA_ACCT | XFS_PQUOTA_ACCT) | 165 | (XFS_UQUOTA_ACCT | XFS_GQUOTA_ACCT | XFS_PQUOTA_ACCT) |
159 | #define XFS_ALL_QUOTA_ENFD (XFS_UQUOTA_ENFD | XFS_OQUOTA_ENFD) | 166 | #define XFS_ALL_QUOTA_ENFD (XFS_UQUOTA_ENFD | XFS_OQUOTA_ENFD) |
160 | #define XFS_ALL_QUOTA_CHKD (XFS_UQUOTA_CHKD | XFS_OQUOTA_CHKD) | 167 | #define XFS_ALL_QUOTA_CHKD (XFS_UQUOTA_CHKD | XFS_OQUOTA_CHKD) |
161 | 168 | ||
162 | #define XFS_IS_QUOTA_RUNNING(mp) ((mp)->m_qflags & XFS_ALL_QUOTA_ACCT) | 169 | #define XFS_IS_QUOTA_RUNNING(mp) ((mp)->m_qflags & XFS_ALL_QUOTA_ACCT) |
163 | #define XFS_IS_UQUOTA_RUNNING(mp) ((mp)->m_qflags & XFS_UQUOTA_ACCT) | 170 | #define XFS_IS_UQUOTA_RUNNING(mp) ((mp)->m_qflags & XFS_UQUOTA_ACCT) |
164 | #define XFS_IS_PQUOTA_RUNNING(mp) ((mp)->m_qflags & XFS_PQUOTA_ACCT) | 171 | #define XFS_IS_PQUOTA_RUNNING(mp) ((mp)->m_qflags & XFS_PQUOTA_ACCT) |
165 | #define XFS_IS_GQUOTA_RUNNING(mp) ((mp)->m_qflags & XFS_GQUOTA_ACCT) | 172 | #define XFS_IS_GQUOTA_RUNNING(mp) ((mp)->m_qflags & XFS_GQUOTA_ACCT) |
166 | #define XFS_IS_UQUOTA_ENFORCED(mp) ((mp)->m_qflags & XFS_UQUOTA_ENFD) | 173 | #define XFS_IS_UQUOTA_ENFORCED(mp) ((mp)->m_qflags & XFS_UQUOTA_ENFD) |
167 | #define XFS_IS_OQUOTA_ENFORCED(mp) ((mp)->m_qflags & XFS_OQUOTA_ENFD) | 174 | #define XFS_IS_OQUOTA_ENFORCED(mp) ((mp)->m_qflags & XFS_OQUOTA_ENFD) |
168 | 175 | ||
169 | /* | 176 | /* |
170 | * Incore only flags for quotaoff - these bits get cleared when quota(s) | 177 | * Incore only flags for quotaoff - these bits get cleared when quota(s) |
171 | * are in the process of getting turned off. These flags are in m_qflags but | 178 | * are in the process of getting turned off. These flags are in m_qflags but |
172 | * never in sb_qflags. | 179 | * never in sb_qflags. |
173 | */ | 180 | */ |
174 | #define XFS_UQUOTA_ACTIVE 0x0100 /* uquotas are being turned off */ | 181 | #define XFS_UQUOTA_ACTIVE 0x0100 /* uquotas are being turned off */ |
175 | #define XFS_PQUOTA_ACTIVE 0x0200 /* pquotas are being turned off */ | 182 | #define XFS_PQUOTA_ACTIVE 0x0200 /* pquotas are being turned off */ |
176 | #define XFS_GQUOTA_ACTIVE 0x0400 /* gquotas are being turned off */ | 183 | #define XFS_GQUOTA_ACTIVE 0x0400 /* gquotas are being turned off */ |
177 | #define XFS_ALL_QUOTA_ACTIVE \ | 184 | #define XFS_ALL_QUOTA_ACTIVE \ |
178 | (XFS_UQUOTA_ACTIVE | XFS_PQUOTA_ACTIVE | XFS_GQUOTA_ACTIVE) | 185 | (XFS_UQUOTA_ACTIVE | XFS_PQUOTA_ACTIVE | XFS_GQUOTA_ACTIVE) |
179 | 186 | ||
180 | /* | 187 | /* |
181 | * Checking XFS_IS_*QUOTA_ON() while holding any inode lock guarantees | 188 | * Checking XFS_IS_*QUOTA_ON() while holding any inode lock guarantees |
182 | * quota will be not be switched off as long as that inode lock is held. | 189 | * quota will be not be switched off as long as that inode lock is held. |
183 | */ | 190 | */ |
184 | #define XFS_IS_QUOTA_ON(mp) ((mp)->m_qflags & (XFS_UQUOTA_ACTIVE | \ | 191 | #define XFS_IS_QUOTA_ON(mp) ((mp)->m_qflags & (XFS_UQUOTA_ACTIVE | \ |
185 | XFS_GQUOTA_ACTIVE | \ | 192 | XFS_GQUOTA_ACTIVE | \ |
186 | XFS_PQUOTA_ACTIVE)) | 193 | XFS_PQUOTA_ACTIVE)) |
187 | #define XFS_IS_OQUOTA_ON(mp) ((mp)->m_qflags & (XFS_GQUOTA_ACTIVE | \ | 194 | #define XFS_IS_OQUOTA_ON(mp) ((mp)->m_qflags & (XFS_GQUOTA_ACTIVE | \ |
188 | XFS_PQUOTA_ACTIVE)) | 195 | XFS_PQUOTA_ACTIVE)) |
189 | #define XFS_IS_UQUOTA_ON(mp) ((mp)->m_qflags & XFS_UQUOTA_ACTIVE) | 196 | #define XFS_IS_UQUOTA_ON(mp) ((mp)->m_qflags & XFS_UQUOTA_ACTIVE) |
190 | #define XFS_IS_GQUOTA_ON(mp) ((mp)->m_qflags & XFS_GQUOTA_ACTIVE) | 197 | #define XFS_IS_GQUOTA_ON(mp) ((mp)->m_qflags & XFS_GQUOTA_ACTIVE) |
191 | #define XFS_IS_PQUOTA_ON(mp) ((mp)->m_qflags & XFS_PQUOTA_ACTIVE) | 198 | #define XFS_IS_PQUOTA_ON(mp) ((mp)->m_qflags & XFS_PQUOTA_ACTIVE) |
192 | 199 | ||
193 | /* | 200 | /* |
194 | * Flags to tell various functions what to do. Not all of these are meaningful | 201 | * Flags to tell various functions what to do. Not all of these are meaningful |
195 | * to a single function. None of these XFS_QMOPT_* flags are meant to have | 202 | * to a single function. None of these XFS_QMOPT_* flags are meant to have |
196 | * persistent values (ie. their values can and will change between versions) | 203 | * persistent values (ie. their values can and will change between versions) |
197 | */ | 204 | */ |
198 | #define XFS_QMOPT_DQALLOC 0x0000002 /* alloc dquot ondisk if needed */ | 205 | #define XFS_QMOPT_DQALLOC 0x0000002 /* alloc dquot ondisk if needed */ |
199 | #define XFS_QMOPT_UQUOTA 0x0000004 /* user dquot requested */ | 206 | #define XFS_QMOPT_UQUOTA 0x0000004 /* user dquot requested */ |
200 | #define XFS_QMOPT_PQUOTA 0x0000008 /* project dquot requested */ | 207 | #define XFS_QMOPT_PQUOTA 0x0000008 /* project dquot requested */ |
201 | #define XFS_QMOPT_FORCE_RES 0x0000010 /* ignore quota limits */ | 208 | #define XFS_QMOPT_FORCE_RES 0x0000010 /* ignore quota limits */ |
202 | #define XFS_QMOPT_SBVERSION 0x0000040 /* change superblock version num */ | 209 | #define XFS_QMOPT_SBVERSION 0x0000040 /* change superblock version num */ |
203 | #define XFS_QMOPT_DOWARN 0x0000400 /* increase warning cnt if needed */ | 210 | #define XFS_QMOPT_DOWARN 0x0000400 /* increase warning cnt if needed */ |
204 | #define XFS_QMOPT_DQREPAIR 0x0001000 /* repair dquot if damaged */ | 211 | #define XFS_QMOPT_DQREPAIR 0x0001000 /* repair dquot if damaged */ |
205 | #define XFS_QMOPT_GQUOTA 0x0002000 /* group dquot requested */ | 212 | #define XFS_QMOPT_GQUOTA 0x0002000 /* group dquot requested */ |
206 | #define XFS_QMOPT_ENOSPC 0x0004000 /* enospc instead of edquot (prj) */ | 213 | #define XFS_QMOPT_ENOSPC 0x0004000 /* enospc instead of edquot (prj) */ |
207 | 214 | ||
208 | /* | 215 | /* |
209 | * flags to xfs_trans_mod_dquot to indicate which field needs to be | 216 | * flags to xfs_trans_mod_dquot to indicate which field needs to be |
210 | * modified. | 217 | * modified. |
211 | */ | 218 | */ |
212 | #define XFS_QMOPT_RES_REGBLKS 0x0010000 | 219 | #define XFS_QMOPT_RES_REGBLKS 0x0010000 |
213 | #define XFS_QMOPT_RES_RTBLKS 0x0020000 | 220 | #define XFS_QMOPT_RES_RTBLKS 0x0020000 |
214 | #define XFS_QMOPT_BCOUNT 0x0040000 | 221 | #define XFS_QMOPT_BCOUNT 0x0040000 |
215 | #define XFS_QMOPT_ICOUNT 0x0080000 | 222 | #define XFS_QMOPT_ICOUNT 0x0080000 |
216 | #define XFS_QMOPT_RTBCOUNT 0x0100000 | 223 | #define XFS_QMOPT_RTBCOUNT 0x0100000 |
217 | #define XFS_QMOPT_DELBCOUNT 0x0200000 | 224 | #define XFS_QMOPT_DELBCOUNT 0x0200000 |
218 | #define XFS_QMOPT_DELRTBCOUNT 0x0400000 | 225 | #define XFS_QMOPT_DELRTBCOUNT 0x0400000 |
219 | #define XFS_QMOPT_RES_INOS 0x0800000 | 226 | #define XFS_QMOPT_RES_INOS 0x0800000 |
220 | 227 | ||
221 | /* | 228 | /* |
222 | * flags for dqalloc. | 229 | * flags for dqalloc. |
223 | */ | 230 | */ |
224 | #define XFS_QMOPT_INHERIT 0x1000000 | 231 | #define XFS_QMOPT_INHERIT 0x1000000 |
225 | 232 | ||
226 | /* | 233 | /* |
227 | * flags to xfs_trans_mod_dquot. | 234 | * flags to xfs_trans_mod_dquot. |
228 | */ | 235 | */ |
229 | #define XFS_TRANS_DQ_RES_BLKS XFS_QMOPT_RES_REGBLKS | 236 | #define XFS_TRANS_DQ_RES_BLKS XFS_QMOPT_RES_REGBLKS |
230 | #define XFS_TRANS_DQ_RES_RTBLKS XFS_QMOPT_RES_RTBLKS | 237 | #define XFS_TRANS_DQ_RES_RTBLKS XFS_QMOPT_RES_RTBLKS |
231 | #define XFS_TRANS_DQ_RES_INOS XFS_QMOPT_RES_INOS | 238 | #define XFS_TRANS_DQ_RES_INOS XFS_QMOPT_RES_INOS |
232 | #define XFS_TRANS_DQ_BCOUNT XFS_QMOPT_BCOUNT | 239 | #define XFS_TRANS_DQ_BCOUNT XFS_QMOPT_BCOUNT |
233 | #define XFS_TRANS_DQ_DELBCOUNT XFS_QMOPT_DELBCOUNT | 240 | #define XFS_TRANS_DQ_DELBCOUNT XFS_QMOPT_DELBCOUNT |
234 | #define XFS_TRANS_DQ_ICOUNT XFS_QMOPT_ICOUNT | 241 | #define XFS_TRANS_DQ_ICOUNT XFS_QMOPT_ICOUNT |
235 | #define XFS_TRANS_DQ_RTBCOUNT XFS_QMOPT_RTBCOUNT | 242 | #define XFS_TRANS_DQ_RTBCOUNT XFS_QMOPT_RTBCOUNT |
236 | #define XFS_TRANS_DQ_DELRTBCOUNT XFS_QMOPT_DELRTBCOUNT | 243 | #define XFS_TRANS_DQ_DELRTBCOUNT XFS_QMOPT_DELRTBCOUNT |
237 | 244 | ||
238 | 245 | ||
239 | #define XFS_QMOPT_QUOTALL \ | 246 | #define XFS_QMOPT_QUOTALL \ |
240 | (XFS_QMOPT_UQUOTA | XFS_QMOPT_PQUOTA | XFS_QMOPT_GQUOTA) | 247 | (XFS_QMOPT_UQUOTA | XFS_QMOPT_PQUOTA | XFS_QMOPT_GQUOTA) |
241 | #define XFS_QMOPT_RESBLK_MASK (XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_RES_RTBLKS) | 248 | #define XFS_QMOPT_RESBLK_MASK (XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_RES_RTBLKS) |
242 | 249 | ||
243 | #ifdef __KERNEL__ | 250 | #ifdef __KERNEL__ |
244 | /* | 251 | /* |
245 | * This check is done typically without holding the inode lock; | 252 | * This check is done typically without holding the inode lock; |
246 | * that may seem racy, but it is harmless in the context that it is used. | 253 | * that may seem racy, but it is harmless in the context that it is used. |
247 | * The inode cannot go inactive as long a reference is kept, and | 254 | * The inode cannot go inactive as long a reference is kept, and |
248 | * therefore if dquot(s) were attached, they'll stay consistent. | 255 | * therefore if dquot(s) were attached, they'll stay consistent. |
249 | * If, for example, the ownership of the inode changes while | 256 | * If, for example, the ownership of the inode changes while |
250 | * we didn't have the inode locked, the appropriate dquot(s) will be | 257 | * we didn't have the inode locked, the appropriate dquot(s) will be |
251 | * attached atomically. | 258 | * attached atomically. |
252 | */ | 259 | */ |
253 | #define XFS_NOT_DQATTACHED(mp, ip) ((XFS_IS_UQUOTA_ON(mp) &&\ | 260 | #define XFS_NOT_DQATTACHED(mp, ip) ((XFS_IS_UQUOTA_ON(mp) &&\ |
254 | (ip)->i_udquot == NULL) || \ | 261 | (ip)->i_udquot == NULL) || \ |
255 | (XFS_IS_OQUOTA_ON(mp) && \ | 262 | (XFS_IS_OQUOTA_ON(mp) && \ |
256 | (ip)->i_gdquot == NULL)) | 263 | (ip)->i_gdquot == NULL)) |
257 | 264 | ||
258 | #define XFS_QM_NEED_QUOTACHECK(mp) \ | 265 | #define XFS_QM_NEED_QUOTACHECK(mp) \ |
259 | ((XFS_IS_UQUOTA_ON(mp) && \ | 266 | ((XFS_IS_UQUOTA_ON(mp) && \ |
260 | (mp->m_sb.sb_qflags & XFS_UQUOTA_CHKD) == 0) || \ | 267 | (mp->m_sb.sb_qflags & XFS_UQUOTA_CHKD) == 0) || \ |
261 | (XFS_IS_GQUOTA_ON(mp) && \ | 268 | (XFS_IS_GQUOTA_ON(mp) && \ |
262 | ((mp->m_sb.sb_qflags & XFS_OQUOTA_CHKD) == 0 || \ | 269 | ((mp->m_sb.sb_qflags & XFS_OQUOTA_CHKD) == 0 || \ |
263 | (mp->m_sb.sb_qflags & XFS_PQUOTA_ACCT))) || \ | 270 | (mp->m_sb.sb_qflags & XFS_PQUOTA_ACCT))) || \ |
264 | (XFS_IS_PQUOTA_ON(mp) && \ | 271 | (XFS_IS_PQUOTA_ON(mp) && \ |
265 | ((mp->m_sb.sb_qflags & XFS_OQUOTA_CHKD) == 0 || \ | 272 | ((mp->m_sb.sb_qflags & XFS_OQUOTA_CHKD) == 0 || \ |
266 | (mp->m_sb.sb_qflags & XFS_GQUOTA_ACCT)))) | 273 | (mp->m_sb.sb_qflags & XFS_GQUOTA_ACCT)))) |
267 | 274 | ||
268 | #define XFS_MOUNT_QUOTA_SET1 (XFS_UQUOTA_ACCT|XFS_UQUOTA_ENFD|\ | 275 | #define XFS_MOUNT_QUOTA_SET1 (XFS_UQUOTA_ACCT|XFS_UQUOTA_ENFD|\ |
269 | XFS_UQUOTA_CHKD|XFS_PQUOTA_ACCT|\ | 276 | XFS_UQUOTA_CHKD|XFS_PQUOTA_ACCT|\ |
270 | XFS_OQUOTA_ENFD|XFS_OQUOTA_CHKD) | 277 | XFS_OQUOTA_ENFD|XFS_OQUOTA_CHKD) |
271 | 278 | ||
272 | #define XFS_MOUNT_QUOTA_SET2 (XFS_UQUOTA_ACCT|XFS_UQUOTA_ENFD|\ | 279 | #define XFS_MOUNT_QUOTA_SET2 (XFS_UQUOTA_ACCT|XFS_UQUOTA_ENFD|\ |
273 | XFS_UQUOTA_CHKD|XFS_GQUOTA_ACCT|\ | 280 | XFS_UQUOTA_CHKD|XFS_GQUOTA_ACCT|\ |
274 | XFS_OQUOTA_ENFD|XFS_OQUOTA_CHKD) | 281 | XFS_OQUOTA_ENFD|XFS_OQUOTA_CHKD) |
275 | 282 | ||
276 | #define XFS_MOUNT_QUOTA_ALL (XFS_UQUOTA_ACCT|XFS_UQUOTA_ENFD|\ | 283 | #define XFS_MOUNT_QUOTA_ALL (XFS_UQUOTA_ACCT|XFS_UQUOTA_ENFD|\ |
277 | XFS_UQUOTA_CHKD|XFS_PQUOTA_ACCT|\ | 284 | XFS_UQUOTA_CHKD|XFS_PQUOTA_ACCT|\ |
278 | XFS_OQUOTA_ENFD|XFS_OQUOTA_CHKD|\ | 285 | XFS_OQUOTA_ENFD|XFS_OQUOTA_CHKD|\ |
279 | XFS_GQUOTA_ACCT) | 286 | XFS_GQUOTA_ACCT) |
280 | 287 | ||
281 | 288 | ||
282 | /* | 289 | /* |
283 | * The structure kept inside the xfs_trans_t keep track of dquot changes | 290 | * The structure kept inside the xfs_trans_t keep track of dquot changes |
284 | * within a transaction and apply them later. | 291 | * within a transaction and apply them later. |
285 | */ | 292 | */ |
286 | typedef struct xfs_dqtrx { | 293 | typedef struct xfs_dqtrx { |
287 | struct xfs_dquot *qt_dquot; /* the dquot this refers to */ | 294 | struct xfs_dquot *qt_dquot; /* the dquot this refers to */ |
288 | ulong qt_blk_res; /* blks reserved on a dquot */ | 295 | ulong qt_blk_res; /* blks reserved on a dquot */ |
289 | ulong qt_blk_res_used; /* blks used from the reservation */ | 296 | ulong qt_blk_res_used; /* blks used from the reservation */ |
290 | ulong qt_ino_res; /* inode reserved on a dquot */ | 297 | ulong qt_ino_res; /* inode reserved on a dquot */ |
291 | ulong qt_ino_res_used; /* inodes used from the reservation */ | 298 | ulong qt_ino_res_used; /* inodes used from the reservation */ |
292 | long qt_bcount_delta; /* dquot blk count changes */ | 299 | long qt_bcount_delta; /* dquot blk count changes */ |
293 | long qt_delbcnt_delta; /* delayed dquot blk count changes */ | 300 | long qt_delbcnt_delta; /* delayed dquot blk count changes */ |
294 | long qt_icount_delta; /* dquot inode count changes */ | 301 | long qt_icount_delta; /* dquot inode count changes */ |
295 | ulong qt_rtblk_res; /* # blks reserved on a dquot */ | 302 | ulong qt_rtblk_res; /* # blks reserved on a dquot */ |
296 | ulong qt_rtblk_res_used;/* # blks used from reservation */ | 303 | ulong qt_rtblk_res_used;/* # blks used from reservation */ |
297 | long qt_rtbcount_delta;/* dquot realtime blk changes */ | 304 | long qt_rtbcount_delta;/* dquot realtime blk changes */ |
298 | long qt_delrtb_delta; /* delayed RT blk count changes */ | 305 | long qt_delrtb_delta; /* delayed RT blk count changes */ |
299 | } xfs_dqtrx_t; | 306 | } xfs_dqtrx_t; |
300 | 307 | ||
301 | #ifdef CONFIG_XFS_QUOTA | 308 | #ifdef CONFIG_XFS_QUOTA |
302 | extern void xfs_trans_dup_dqinfo(struct xfs_trans *, struct xfs_trans *); | 309 | extern void xfs_trans_dup_dqinfo(struct xfs_trans *, struct xfs_trans *); |
303 | extern void xfs_trans_free_dqinfo(struct xfs_trans *); | 310 | extern void xfs_trans_free_dqinfo(struct xfs_trans *); |
304 | extern void xfs_trans_mod_dquot_byino(struct xfs_trans *, struct xfs_inode *, | 311 | extern void xfs_trans_mod_dquot_byino(struct xfs_trans *, struct xfs_inode *, |
305 | uint, long); | 312 | uint, long); |
306 | extern void xfs_trans_apply_dquot_deltas(struct xfs_trans *); | 313 | extern void xfs_trans_apply_dquot_deltas(struct xfs_trans *); |
307 | extern void xfs_trans_unreserve_and_mod_dquots(struct xfs_trans *); | 314 | extern void xfs_trans_unreserve_and_mod_dquots(struct xfs_trans *); |
308 | extern int xfs_trans_reserve_quota_nblks(struct xfs_trans *, | 315 | extern int xfs_trans_reserve_quota_nblks(struct xfs_trans *, |
309 | struct xfs_inode *, long, long, uint); | 316 | struct xfs_inode *, long, long, uint); |
310 | extern int xfs_trans_reserve_quota_bydquots(struct xfs_trans *, | 317 | extern int xfs_trans_reserve_quota_bydquots(struct xfs_trans *, |
311 | struct xfs_mount *, struct xfs_dquot *, | 318 | struct xfs_mount *, struct xfs_dquot *, |
312 | struct xfs_dquot *, long, long, uint); | 319 | struct xfs_dquot *, long, long, uint); |
313 | 320 | ||
314 | extern int xfs_qm_vop_dqalloc(struct xfs_inode *, uid_t, gid_t, prid_t, uint, | 321 | extern int xfs_qm_vop_dqalloc(struct xfs_inode *, uid_t, gid_t, prid_t, uint, |
315 | struct xfs_dquot **, struct xfs_dquot **); | 322 | struct xfs_dquot **, struct xfs_dquot **); |
316 | extern void xfs_qm_vop_create_dqattach(struct xfs_trans *, struct xfs_inode *, | 323 | extern void xfs_qm_vop_create_dqattach(struct xfs_trans *, struct xfs_inode *, |
317 | struct xfs_dquot *, struct xfs_dquot *); | 324 | struct xfs_dquot *, struct xfs_dquot *); |
318 | extern int xfs_qm_vop_rename_dqattach(struct xfs_inode **); | 325 | extern int xfs_qm_vop_rename_dqattach(struct xfs_inode **); |
319 | extern struct xfs_dquot *xfs_qm_vop_chown(struct xfs_trans *, | 326 | extern struct xfs_dquot *xfs_qm_vop_chown(struct xfs_trans *, |
320 | struct xfs_inode *, struct xfs_dquot **, struct xfs_dquot *); | 327 | struct xfs_inode *, struct xfs_dquot **, struct xfs_dquot *); |
321 | extern int xfs_qm_vop_chown_reserve(struct xfs_trans *, struct xfs_inode *, | 328 | extern int xfs_qm_vop_chown_reserve(struct xfs_trans *, struct xfs_inode *, |
322 | struct xfs_dquot *, struct xfs_dquot *, uint); | 329 | struct xfs_dquot *, struct xfs_dquot *, uint); |
323 | extern int xfs_qm_dqattach(struct xfs_inode *, uint); | 330 | extern int xfs_qm_dqattach(struct xfs_inode *, uint); |
324 | extern int xfs_qm_dqattach_locked(struct xfs_inode *, uint); | 331 | extern int xfs_qm_dqattach_locked(struct xfs_inode *, uint); |
325 | extern void xfs_qm_dqdetach(struct xfs_inode *); | 332 | extern void xfs_qm_dqdetach(struct xfs_inode *); |
326 | extern void xfs_qm_dqrele(struct xfs_dquot *); | 333 | extern void xfs_qm_dqrele(struct xfs_dquot *); |
327 | extern void xfs_qm_statvfs(struct xfs_inode *, struct kstatfs *); | 334 | extern void xfs_qm_statvfs(struct xfs_inode *, struct kstatfs *); |
328 | extern int xfs_qm_newmount(struct xfs_mount *, uint *, uint *); | 335 | extern int xfs_qm_newmount(struct xfs_mount *, uint *, uint *); |
329 | extern void xfs_qm_mount_quotas(struct xfs_mount *); | 336 | extern void xfs_qm_mount_quotas(struct xfs_mount *); |
330 | extern void xfs_qm_unmount(struct xfs_mount *); | 337 | extern void xfs_qm_unmount(struct xfs_mount *); |
331 | extern void xfs_qm_unmount_quotas(struct xfs_mount *); | 338 | extern void xfs_qm_unmount_quotas(struct xfs_mount *); |
332 | 339 | ||
333 | #else | 340 | #else |
334 | static inline int | 341 | static inline int |
335 | xfs_qm_vop_dqalloc(struct xfs_inode *ip, uid_t uid, gid_t gid, prid_t prid, | 342 | xfs_qm_vop_dqalloc(struct xfs_inode *ip, uid_t uid, gid_t gid, prid_t prid, |
336 | uint flags, struct xfs_dquot **udqp, struct xfs_dquot **gdqp) | 343 | uint flags, struct xfs_dquot **udqp, struct xfs_dquot **gdqp) |
337 | { | 344 | { |
338 | *udqp = NULL; | 345 | *udqp = NULL; |
339 | *gdqp = NULL; | 346 | *gdqp = NULL; |
340 | return 0; | 347 | return 0; |
341 | } | 348 | } |
342 | #define xfs_trans_dup_dqinfo(tp, tp2) | 349 | #define xfs_trans_dup_dqinfo(tp, tp2) |
343 | #define xfs_trans_free_dqinfo(tp) | 350 | #define xfs_trans_free_dqinfo(tp) |
344 | #define xfs_trans_mod_dquot_byino(tp, ip, fields, delta) | 351 | #define xfs_trans_mod_dquot_byino(tp, ip, fields, delta) |
345 | #define xfs_trans_apply_dquot_deltas(tp) | 352 | #define xfs_trans_apply_dquot_deltas(tp) |
346 | #define xfs_trans_unreserve_and_mod_dquots(tp) | 353 | #define xfs_trans_unreserve_and_mod_dquots(tp) |
347 | static inline int xfs_trans_reserve_quota_nblks(struct xfs_trans *tp, | 354 | static inline int xfs_trans_reserve_quota_nblks(struct xfs_trans *tp, |
348 | struct xfs_inode *ip, long nblks, long ninos, uint flags) | 355 | struct xfs_inode *ip, long nblks, long ninos, uint flags) |
349 | { | 356 | { |
350 | return 0; | 357 | return 0; |
351 | } | 358 | } |
352 | static inline int xfs_trans_reserve_quota_bydquots(struct xfs_trans *tp, | 359 | static inline int xfs_trans_reserve_quota_bydquots(struct xfs_trans *tp, |
353 | struct xfs_mount *mp, struct xfs_dquot *udqp, | 360 | struct xfs_mount *mp, struct xfs_dquot *udqp, |
354 | struct xfs_dquot *gdqp, long nblks, long nions, uint flags) | 361 | struct xfs_dquot *gdqp, long nblks, long nions, uint flags) |
355 | { | 362 | { |
356 | return 0; | 363 | return 0; |
357 | } | 364 | } |
358 | #define xfs_qm_vop_create_dqattach(tp, ip, u, g) | 365 | #define xfs_qm_vop_create_dqattach(tp, ip, u, g) |
359 | #define xfs_qm_vop_rename_dqattach(it) (0) | 366 | #define xfs_qm_vop_rename_dqattach(it) (0) |
360 | #define xfs_qm_vop_chown(tp, ip, old, new) (NULL) | 367 | #define xfs_qm_vop_chown(tp, ip, old, new) (NULL) |
361 | #define xfs_qm_vop_chown_reserve(tp, ip, u, g, fl) (0) | 368 | #define xfs_qm_vop_chown_reserve(tp, ip, u, g, fl) (0) |
362 | #define xfs_qm_dqattach(ip, fl) (0) | 369 | #define xfs_qm_dqattach(ip, fl) (0) |
363 | #define xfs_qm_dqattach_locked(ip, fl) (0) | 370 | #define xfs_qm_dqattach_locked(ip, fl) (0) |
364 | #define xfs_qm_dqdetach(ip) | 371 | #define xfs_qm_dqdetach(ip) |
365 | #define xfs_qm_dqrele(d) | 372 | #define xfs_qm_dqrele(d) |
366 | #define xfs_qm_statvfs(ip, s) | 373 | #define xfs_qm_statvfs(ip, s) |
367 | #define xfs_qm_newmount(mp, a, b) (0) | 374 | #define xfs_qm_newmount(mp, a, b) (0) |
368 | #define xfs_qm_mount_quotas(mp) | 375 | #define xfs_qm_mount_quotas(mp) |
369 | #define xfs_qm_unmount(mp) | 376 | #define xfs_qm_unmount(mp) |
370 | #define xfs_qm_unmount_quotas(mp) | 377 | #define xfs_qm_unmount_quotas(mp) |
371 | #endif /* CONFIG_XFS_QUOTA */ | 378 | #endif /* CONFIG_XFS_QUOTA */ |
372 | 379 | ||
373 | #define xfs_trans_unreserve_quota_nblks(tp, ip, nblks, ninos, flags) \ | 380 | #define xfs_trans_unreserve_quota_nblks(tp, ip, nblks, ninos, flags) \ |
374 | xfs_trans_reserve_quota_nblks(tp, ip, -(nblks), -(ninos), flags) | 381 | xfs_trans_reserve_quota_nblks(tp, ip, -(nblks), -(ninos), flags) |
375 | #define xfs_trans_reserve_quota(tp, mp, ud, gd, nb, ni, f) \ | 382 | #define xfs_trans_reserve_quota(tp, mp, ud, gd, nb, ni, f) \ |
376 | xfs_trans_reserve_quota_bydquots(tp, mp, ud, gd, nb, ni, \ | 383 | xfs_trans_reserve_quota_bydquots(tp, mp, ud, gd, nb, ni, \ |
377 | f | XFS_QMOPT_RES_REGBLKS) | 384 | f | XFS_QMOPT_RES_REGBLKS) |
378 | 385 | ||
379 | extern int xfs_qm_dqcheck(struct xfs_mount *, xfs_disk_dquot_t *, | 386 | extern int xfs_qm_dqcheck(struct xfs_mount *, xfs_disk_dquot_t *, |
380 | xfs_dqid_t, uint, uint, char *); | 387 | xfs_dqid_t, uint, uint, char *); |
381 | extern int xfs_mount_reset_sbqflags(struct xfs_mount *); | 388 | extern int xfs_mount_reset_sbqflags(struct xfs_mount *); |
389 | |||
390 | extern const struct xfs_buf_ops xfs_dquot_buf_ops; | ||
382 | 391 | ||
383 | #endif /* __KERNEL__ */ | 392 | #endif /* __KERNEL__ */ |
384 | #endif /* __XFS_QUOTA_H__ */ | 393 | #endif /* __XFS_QUOTA_H__ */ |
385 | 394 |