Commit 3211949f8998dde71d9fe2e063de045ece5e0473

Authored by Sunil Mushran
Committed by Joel Becker
1 parent 692684e19e

ocfs2: Do not initialize lvb in ocfs2_orphan_scan_lock_res_init()

We don't access the LVB in our ocfs2_*_lock_res_init() functions.

Since the LVB can become invalid during some cluster recovery
operations, the dlmglue must be able to handle an uninitialized
LVB.

For the orphan scan lock, we initialized an uninitialzed LVB with our
scan sequence number plus one.  This starts a normal orphan scan
cycle.

Signed-off-by: Sunil Mushran <sunil.mushran@oracle.com>
Signed-off-by: Joel Becker <joel.becker@oracle.com>

Showing 2 changed files with 4 additions and 4 deletions Inline Diff

1 /* -*- mode: c; c-basic-offset: 8; -*- 1 /* -*- mode: c; c-basic-offset: 8; -*-
2 * vim: noexpandtab sw=8 ts=8 sts=0: 2 * vim: noexpandtab sw=8 ts=8 sts=0:
3 * 3 *
4 * dlmglue.c 4 * dlmglue.c
5 * 5 *
6 * Code which implements an OCFS2 specific interface to our DLM. 6 * Code which implements an OCFS2 specific interface to our DLM.
7 * 7 *
8 * Copyright (C) 2003, 2004 Oracle. All rights reserved. 8 * Copyright (C) 2003, 2004 Oracle. All rights reserved.
9 * 9 *
10 * This program is free software; you can redistribute it and/or 10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public 11 * modify it under the terms of the GNU General Public
12 * License as published by the Free Software Foundation; either 12 * License as published by the Free Software Foundation; either
13 * version 2 of the License, or (at your option) any later version. 13 * version 2 of the License, or (at your option) any later version.
14 * 14 *
15 * This program is distributed in the hope that it will be useful, 15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of 16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * General Public License for more details. 18 * General Public License for more details.
19 * 19 *
20 * You should have received a copy of the GNU General Public 20 * You should have received a copy of the GNU General Public
21 * License along with this program; if not, write to the 21 * License along with this program; if not, write to the
22 * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 22 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
23 * Boston, MA 021110-1307, USA. 23 * Boston, MA 021110-1307, USA.
24 */ 24 */
25 25
26 #include <linux/types.h> 26 #include <linux/types.h>
27 #include <linux/slab.h> 27 #include <linux/slab.h>
28 #include <linux/highmem.h> 28 #include <linux/highmem.h>
29 #include <linux/mm.h> 29 #include <linux/mm.h>
30 #include <linux/kthread.h> 30 #include <linux/kthread.h>
31 #include <linux/pagemap.h> 31 #include <linux/pagemap.h>
32 #include <linux/debugfs.h> 32 #include <linux/debugfs.h>
33 #include <linux/seq_file.h> 33 #include <linux/seq_file.h>
34 #include <linux/time.h> 34 #include <linux/time.h>
35 #include <linux/quotaops.h> 35 #include <linux/quotaops.h>
36 36
37 #define MLOG_MASK_PREFIX ML_DLM_GLUE 37 #define MLOG_MASK_PREFIX ML_DLM_GLUE
38 #include <cluster/masklog.h> 38 #include <cluster/masklog.h>
39 39
40 #include "ocfs2.h" 40 #include "ocfs2.h"
41 #include "ocfs2_lockingver.h" 41 #include "ocfs2_lockingver.h"
42 42
43 #include "alloc.h" 43 #include "alloc.h"
44 #include "dcache.h" 44 #include "dcache.h"
45 #include "dlmglue.h" 45 #include "dlmglue.h"
46 #include "extent_map.h" 46 #include "extent_map.h"
47 #include "file.h" 47 #include "file.h"
48 #include "heartbeat.h" 48 #include "heartbeat.h"
49 #include "inode.h" 49 #include "inode.h"
50 #include "journal.h" 50 #include "journal.h"
51 #include "stackglue.h" 51 #include "stackglue.h"
52 #include "slot_map.h" 52 #include "slot_map.h"
53 #include "super.h" 53 #include "super.h"
54 #include "uptodate.h" 54 #include "uptodate.h"
55 #include "quota.h" 55 #include "quota.h"
56 56
57 #include "buffer_head_io.h" 57 #include "buffer_head_io.h"
58 58
59 struct ocfs2_mask_waiter { 59 struct ocfs2_mask_waiter {
60 struct list_head mw_item; 60 struct list_head mw_item;
61 int mw_status; 61 int mw_status;
62 struct completion mw_complete; 62 struct completion mw_complete;
63 unsigned long mw_mask; 63 unsigned long mw_mask;
64 unsigned long mw_goal; 64 unsigned long mw_goal;
65 #ifdef CONFIG_OCFS2_FS_STATS 65 #ifdef CONFIG_OCFS2_FS_STATS
66 unsigned long long mw_lock_start; 66 unsigned long long mw_lock_start;
67 #endif 67 #endif
68 }; 68 };
69 69
70 static struct ocfs2_super *ocfs2_get_dentry_osb(struct ocfs2_lock_res *lockres); 70 static struct ocfs2_super *ocfs2_get_dentry_osb(struct ocfs2_lock_res *lockres);
71 static struct ocfs2_super *ocfs2_get_inode_osb(struct ocfs2_lock_res *lockres); 71 static struct ocfs2_super *ocfs2_get_inode_osb(struct ocfs2_lock_res *lockres);
72 static struct ocfs2_super *ocfs2_get_file_osb(struct ocfs2_lock_res *lockres); 72 static struct ocfs2_super *ocfs2_get_file_osb(struct ocfs2_lock_res *lockres);
73 static struct ocfs2_super *ocfs2_get_qinfo_osb(struct ocfs2_lock_res *lockres); 73 static struct ocfs2_super *ocfs2_get_qinfo_osb(struct ocfs2_lock_res *lockres);
74 74
75 /* 75 /*
76 * Return value from ->downconvert_worker functions. 76 * Return value from ->downconvert_worker functions.
77 * 77 *
78 * These control the precise actions of ocfs2_unblock_lock() 78 * These control the precise actions of ocfs2_unblock_lock()
79 * and ocfs2_process_blocked_lock() 79 * and ocfs2_process_blocked_lock()
80 * 80 *
81 */ 81 */
82 enum ocfs2_unblock_action { 82 enum ocfs2_unblock_action {
83 UNBLOCK_CONTINUE = 0, /* Continue downconvert */ 83 UNBLOCK_CONTINUE = 0, /* Continue downconvert */
84 UNBLOCK_CONTINUE_POST = 1, /* Continue downconvert, fire 84 UNBLOCK_CONTINUE_POST = 1, /* Continue downconvert, fire
85 * ->post_unlock callback */ 85 * ->post_unlock callback */
86 UNBLOCK_STOP_POST = 2, /* Do not downconvert, fire 86 UNBLOCK_STOP_POST = 2, /* Do not downconvert, fire
87 * ->post_unlock() callback. */ 87 * ->post_unlock() callback. */
88 }; 88 };
89 89
90 struct ocfs2_unblock_ctl { 90 struct ocfs2_unblock_ctl {
91 int requeue; 91 int requeue;
92 enum ocfs2_unblock_action unblock_action; 92 enum ocfs2_unblock_action unblock_action;
93 }; 93 };
94 94
95 static int ocfs2_check_meta_downconvert(struct ocfs2_lock_res *lockres, 95 static int ocfs2_check_meta_downconvert(struct ocfs2_lock_res *lockres,
96 int new_level); 96 int new_level);
97 static void ocfs2_set_meta_lvb(struct ocfs2_lock_res *lockres); 97 static void ocfs2_set_meta_lvb(struct ocfs2_lock_res *lockres);
98 98
99 static int ocfs2_data_convert_worker(struct ocfs2_lock_res *lockres, 99 static int ocfs2_data_convert_worker(struct ocfs2_lock_res *lockres,
100 int blocking); 100 int blocking);
101 101
102 static int ocfs2_dentry_convert_worker(struct ocfs2_lock_res *lockres, 102 static int ocfs2_dentry_convert_worker(struct ocfs2_lock_res *lockres,
103 int blocking); 103 int blocking);
104 104
105 static void ocfs2_dentry_post_unlock(struct ocfs2_super *osb, 105 static void ocfs2_dentry_post_unlock(struct ocfs2_super *osb,
106 struct ocfs2_lock_res *lockres); 106 struct ocfs2_lock_res *lockres);
107 107
108 static void ocfs2_set_qinfo_lvb(struct ocfs2_lock_res *lockres); 108 static void ocfs2_set_qinfo_lvb(struct ocfs2_lock_res *lockres);
109 109
110 #define mlog_meta_lvb(__level, __lockres) ocfs2_dump_meta_lvb_info(__level, __PRETTY_FUNCTION__, __LINE__, __lockres) 110 #define mlog_meta_lvb(__level, __lockres) ocfs2_dump_meta_lvb_info(__level, __PRETTY_FUNCTION__, __LINE__, __lockres)
111 111
112 /* This aids in debugging situations where a bad LVB might be involved. */ 112 /* This aids in debugging situations where a bad LVB might be involved. */
113 static void ocfs2_dump_meta_lvb_info(u64 level, 113 static void ocfs2_dump_meta_lvb_info(u64 level,
114 const char *function, 114 const char *function,
115 unsigned int line, 115 unsigned int line,
116 struct ocfs2_lock_res *lockres) 116 struct ocfs2_lock_res *lockres)
117 { 117 {
118 struct ocfs2_meta_lvb *lvb = ocfs2_dlm_lvb(&lockres->l_lksb); 118 struct ocfs2_meta_lvb *lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
119 119
120 mlog(level, "LVB information for %s (called from %s:%u):\n", 120 mlog(level, "LVB information for %s (called from %s:%u):\n",
121 lockres->l_name, function, line); 121 lockres->l_name, function, line);
122 mlog(level, "version: %u, clusters: %u, generation: 0x%x\n", 122 mlog(level, "version: %u, clusters: %u, generation: 0x%x\n",
123 lvb->lvb_version, be32_to_cpu(lvb->lvb_iclusters), 123 lvb->lvb_version, be32_to_cpu(lvb->lvb_iclusters),
124 be32_to_cpu(lvb->lvb_igeneration)); 124 be32_to_cpu(lvb->lvb_igeneration));
125 mlog(level, "size: %llu, uid %u, gid %u, mode 0x%x\n", 125 mlog(level, "size: %llu, uid %u, gid %u, mode 0x%x\n",
126 (unsigned long long)be64_to_cpu(lvb->lvb_isize), 126 (unsigned long long)be64_to_cpu(lvb->lvb_isize),
127 be32_to_cpu(lvb->lvb_iuid), be32_to_cpu(lvb->lvb_igid), 127 be32_to_cpu(lvb->lvb_iuid), be32_to_cpu(lvb->lvb_igid),
128 be16_to_cpu(lvb->lvb_imode)); 128 be16_to_cpu(lvb->lvb_imode));
129 mlog(level, "nlink %u, atime_packed 0x%llx, ctime_packed 0x%llx, " 129 mlog(level, "nlink %u, atime_packed 0x%llx, ctime_packed 0x%llx, "
130 "mtime_packed 0x%llx iattr 0x%x\n", be16_to_cpu(lvb->lvb_inlink), 130 "mtime_packed 0x%llx iattr 0x%x\n", be16_to_cpu(lvb->lvb_inlink),
131 (long long)be64_to_cpu(lvb->lvb_iatime_packed), 131 (long long)be64_to_cpu(lvb->lvb_iatime_packed),
132 (long long)be64_to_cpu(lvb->lvb_ictime_packed), 132 (long long)be64_to_cpu(lvb->lvb_ictime_packed),
133 (long long)be64_to_cpu(lvb->lvb_imtime_packed), 133 (long long)be64_to_cpu(lvb->lvb_imtime_packed),
134 be32_to_cpu(lvb->lvb_iattr)); 134 be32_to_cpu(lvb->lvb_iattr));
135 } 135 }
136 136
137 137
138 /* 138 /*
139 * OCFS2 Lock Resource Operations 139 * OCFS2 Lock Resource Operations
140 * 140 *
141 * These fine tune the behavior of the generic dlmglue locking infrastructure. 141 * These fine tune the behavior of the generic dlmglue locking infrastructure.
142 * 142 *
143 * The most basic of lock types can point ->l_priv to their respective 143 * The most basic of lock types can point ->l_priv to their respective
144 * struct ocfs2_super and allow the default actions to manage things. 144 * struct ocfs2_super and allow the default actions to manage things.
145 * 145 *
146 * Right now, each lock type also needs to implement an init function, 146 * Right now, each lock type also needs to implement an init function,
147 * and trivial lock/unlock wrappers. ocfs2_simple_drop_lockres() 147 * and trivial lock/unlock wrappers. ocfs2_simple_drop_lockres()
148 * should be called when the lock is no longer needed (i.e., object 148 * should be called when the lock is no longer needed (i.e., object
149 * destruction time). 149 * destruction time).
150 */ 150 */
151 struct ocfs2_lock_res_ops { 151 struct ocfs2_lock_res_ops {
152 /* 152 /*
153 * Translate an ocfs2_lock_res * into an ocfs2_super *. Define 153 * Translate an ocfs2_lock_res * into an ocfs2_super *. Define
154 * this callback if ->l_priv is not an ocfs2_super pointer 154 * this callback if ->l_priv is not an ocfs2_super pointer
155 */ 155 */
156 struct ocfs2_super * (*get_osb)(struct ocfs2_lock_res *); 156 struct ocfs2_super * (*get_osb)(struct ocfs2_lock_res *);
157 157
158 /* 158 /*
159 * Optionally called in the downconvert thread after a 159 * Optionally called in the downconvert thread after a
160 * successful downconvert. The lockres will not be referenced 160 * successful downconvert. The lockres will not be referenced
161 * after this callback is called, so it is safe to free 161 * after this callback is called, so it is safe to free
162 * memory, etc. 162 * memory, etc.
163 * 163 *
164 * The exact semantics of when this is called are controlled 164 * The exact semantics of when this is called are controlled
165 * by ->downconvert_worker() 165 * by ->downconvert_worker()
166 */ 166 */
167 void (*post_unlock)(struct ocfs2_super *, struct ocfs2_lock_res *); 167 void (*post_unlock)(struct ocfs2_super *, struct ocfs2_lock_res *);
168 168
169 /* 169 /*
170 * Allow a lock type to add checks to determine whether it is 170 * Allow a lock type to add checks to determine whether it is
171 * safe to downconvert a lock. Return 0 to re-queue the 171 * safe to downconvert a lock. Return 0 to re-queue the
172 * downconvert at a later time, nonzero to continue. 172 * downconvert at a later time, nonzero to continue.
173 * 173 *
174 * For most locks, the default checks that there are no 174 * For most locks, the default checks that there are no
175 * incompatible holders are sufficient. 175 * incompatible holders are sufficient.
176 * 176 *
177 * Called with the lockres spinlock held. 177 * Called with the lockres spinlock held.
178 */ 178 */
179 int (*check_downconvert)(struct ocfs2_lock_res *, int); 179 int (*check_downconvert)(struct ocfs2_lock_res *, int);
180 180
181 /* 181 /*
182 * Allows a lock type to populate the lock value block. This 182 * Allows a lock type to populate the lock value block. This
183 * is called on downconvert, and when we drop a lock. 183 * is called on downconvert, and when we drop a lock.
184 * 184 *
185 * Locks that want to use this should set LOCK_TYPE_USES_LVB 185 * Locks that want to use this should set LOCK_TYPE_USES_LVB
186 * in the flags field. 186 * in the flags field.
187 * 187 *
188 * Called with the lockres spinlock held. 188 * Called with the lockres spinlock held.
189 */ 189 */
190 void (*set_lvb)(struct ocfs2_lock_res *); 190 void (*set_lvb)(struct ocfs2_lock_res *);
191 191
192 /* 192 /*
193 * Called from the downconvert thread when it is determined 193 * Called from the downconvert thread when it is determined
194 * that a lock will be downconverted. This is called without 194 * that a lock will be downconverted. This is called without
195 * any locks held so the function can do work that might 195 * any locks held so the function can do work that might
196 * schedule (syncing out data, etc). 196 * schedule (syncing out data, etc).
197 * 197 *
198 * This should return any one of the ocfs2_unblock_action 198 * This should return any one of the ocfs2_unblock_action
199 * values, depending on what it wants the thread to do. 199 * values, depending on what it wants the thread to do.
200 */ 200 */
201 int (*downconvert_worker)(struct ocfs2_lock_res *, int); 201 int (*downconvert_worker)(struct ocfs2_lock_res *, int);
202 202
203 /* 203 /*
204 * LOCK_TYPE_* flags which describe the specific requirements 204 * LOCK_TYPE_* flags which describe the specific requirements
205 * of a lock type. Descriptions of each individual flag follow. 205 * of a lock type. Descriptions of each individual flag follow.
206 */ 206 */
207 int flags; 207 int flags;
208 }; 208 };
209 209
210 /* 210 /*
211 * Some locks want to "refresh" potentially stale data when a 211 * Some locks want to "refresh" potentially stale data when a
212 * meaningful (PRMODE or EXMODE) lock level is first obtained. If this 212 * meaningful (PRMODE or EXMODE) lock level is first obtained. If this
213 * flag is set, the OCFS2_LOCK_NEEDS_REFRESH flag will be set on the 213 * flag is set, the OCFS2_LOCK_NEEDS_REFRESH flag will be set on the
214 * individual lockres l_flags member from the ast function. It is 214 * individual lockres l_flags member from the ast function. It is
215 * expected that the locking wrapper will clear the 215 * expected that the locking wrapper will clear the
216 * OCFS2_LOCK_NEEDS_REFRESH flag when done. 216 * OCFS2_LOCK_NEEDS_REFRESH flag when done.
217 */ 217 */
218 #define LOCK_TYPE_REQUIRES_REFRESH 0x1 218 #define LOCK_TYPE_REQUIRES_REFRESH 0x1
219 219
220 /* 220 /*
221 * Indicate that a lock type makes use of the lock value block. The 221 * Indicate that a lock type makes use of the lock value block. The
222 * ->set_lvb lock type callback must be defined. 222 * ->set_lvb lock type callback must be defined.
223 */ 223 */
224 #define LOCK_TYPE_USES_LVB 0x2 224 #define LOCK_TYPE_USES_LVB 0x2
225 225
226 static struct ocfs2_lock_res_ops ocfs2_inode_rw_lops = { 226 static struct ocfs2_lock_res_ops ocfs2_inode_rw_lops = {
227 .get_osb = ocfs2_get_inode_osb, 227 .get_osb = ocfs2_get_inode_osb,
228 .flags = 0, 228 .flags = 0,
229 }; 229 };
230 230
231 static struct ocfs2_lock_res_ops ocfs2_inode_inode_lops = { 231 static struct ocfs2_lock_res_ops ocfs2_inode_inode_lops = {
232 .get_osb = ocfs2_get_inode_osb, 232 .get_osb = ocfs2_get_inode_osb,
233 .check_downconvert = ocfs2_check_meta_downconvert, 233 .check_downconvert = ocfs2_check_meta_downconvert,
234 .set_lvb = ocfs2_set_meta_lvb, 234 .set_lvb = ocfs2_set_meta_lvb,
235 .downconvert_worker = ocfs2_data_convert_worker, 235 .downconvert_worker = ocfs2_data_convert_worker,
236 .flags = LOCK_TYPE_REQUIRES_REFRESH|LOCK_TYPE_USES_LVB, 236 .flags = LOCK_TYPE_REQUIRES_REFRESH|LOCK_TYPE_USES_LVB,
237 }; 237 };
238 238
239 static struct ocfs2_lock_res_ops ocfs2_super_lops = { 239 static struct ocfs2_lock_res_ops ocfs2_super_lops = {
240 .flags = LOCK_TYPE_REQUIRES_REFRESH, 240 .flags = LOCK_TYPE_REQUIRES_REFRESH,
241 }; 241 };
242 242
243 static struct ocfs2_lock_res_ops ocfs2_rename_lops = { 243 static struct ocfs2_lock_res_ops ocfs2_rename_lops = {
244 .flags = 0, 244 .flags = 0,
245 }; 245 };
246 246
247 static struct ocfs2_lock_res_ops ocfs2_nfs_sync_lops = { 247 static struct ocfs2_lock_res_ops ocfs2_nfs_sync_lops = {
248 .flags = 0, 248 .flags = 0,
249 }; 249 };
250 250
251 static struct ocfs2_lock_res_ops ocfs2_orphan_scan_lops = { 251 static struct ocfs2_lock_res_ops ocfs2_orphan_scan_lops = {
252 .flags = LOCK_TYPE_REQUIRES_REFRESH|LOCK_TYPE_USES_LVB, 252 .flags = LOCK_TYPE_REQUIRES_REFRESH|LOCK_TYPE_USES_LVB,
253 }; 253 };
254 254
255 static struct ocfs2_lock_res_ops ocfs2_dentry_lops = { 255 static struct ocfs2_lock_res_ops ocfs2_dentry_lops = {
256 .get_osb = ocfs2_get_dentry_osb, 256 .get_osb = ocfs2_get_dentry_osb,
257 .post_unlock = ocfs2_dentry_post_unlock, 257 .post_unlock = ocfs2_dentry_post_unlock,
258 .downconvert_worker = ocfs2_dentry_convert_worker, 258 .downconvert_worker = ocfs2_dentry_convert_worker,
259 .flags = 0, 259 .flags = 0,
260 }; 260 };
261 261
262 static struct ocfs2_lock_res_ops ocfs2_inode_open_lops = { 262 static struct ocfs2_lock_res_ops ocfs2_inode_open_lops = {
263 .get_osb = ocfs2_get_inode_osb, 263 .get_osb = ocfs2_get_inode_osb,
264 .flags = 0, 264 .flags = 0,
265 }; 265 };
266 266
267 static struct ocfs2_lock_res_ops ocfs2_flock_lops = { 267 static struct ocfs2_lock_res_ops ocfs2_flock_lops = {
268 .get_osb = ocfs2_get_file_osb, 268 .get_osb = ocfs2_get_file_osb,
269 .flags = 0, 269 .flags = 0,
270 }; 270 };
271 271
272 static struct ocfs2_lock_res_ops ocfs2_qinfo_lops = { 272 static struct ocfs2_lock_res_ops ocfs2_qinfo_lops = {
273 .set_lvb = ocfs2_set_qinfo_lvb, 273 .set_lvb = ocfs2_set_qinfo_lvb,
274 .get_osb = ocfs2_get_qinfo_osb, 274 .get_osb = ocfs2_get_qinfo_osb,
275 .flags = LOCK_TYPE_REQUIRES_REFRESH | LOCK_TYPE_USES_LVB, 275 .flags = LOCK_TYPE_REQUIRES_REFRESH | LOCK_TYPE_USES_LVB,
276 }; 276 };
277 277
278 static inline int ocfs2_is_inode_lock(struct ocfs2_lock_res *lockres) 278 static inline int ocfs2_is_inode_lock(struct ocfs2_lock_res *lockres)
279 { 279 {
280 return lockres->l_type == OCFS2_LOCK_TYPE_META || 280 return lockres->l_type == OCFS2_LOCK_TYPE_META ||
281 lockres->l_type == OCFS2_LOCK_TYPE_RW || 281 lockres->l_type == OCFS2_LOCK_TYPE_RW ||
282 lockres->l_type == OCFS2_LOCK_TYPE_OPEN; 282 lockres->l_type == OCFS2_LOCK_TYPE_OPEN;
283 } 283 }
284 284
285 static inline struct inode *ocfs2_lock_res_inode(struct ocfs2_lock_res *lockres) 285 static inline struct inode *ocfs2_lock_res_inode(struct ocfs2_lock_res *lockres)
286 { 286 {
287 BUG_ON(!ocfs2_is_inode_lock(lockres)); 287 BUG_ON(!ocfs2_is_inode_lock(lockres));
288 288
289 return (struct inode *) lockres->l_priv; 289 return (struct inode *) lockres->l_priv;
290 } 290 }
291 291
292 static inline struct ocfs2_dentry_lock *ocfs2_lock_res_dl(struct ocfs2_lock_res *lockres) 292 static inline struct ocfs2_dentry_lock *ocfs2_lock_res_dl(struct ocfs2_lock_res *lockres)
293 { 293 {
294 BUG_ON(lockres->l_type != OCFS2_LOCK_TYPE_DENTRY); 294 BUG_ON(lockres->l_type != OCFS2_LOCK_TYPE_DENTRY);
295 295
296 return (struct ocfs2_dentry_lock *)lockres->l_priv; 296 return (struct ocfs2_dentry_lock *)lockres->l_priv;
297 } 297 }
298 298
299 static inline struct ocfs2_mem_dqinfo *ocfs2_lock_res_qinfo(struct ocfs2_lock_res *lockres) 299 static inline struct ocfs2_mem_dqinfo *ocfs2_lock_res_qinfo(struct ocfs2_lock_res *lockres)
300 { 300 {
301 BUG_ON(lockres->l_type != OCFS2_LOCK_TYPE_QINFO); 301 BUG_ON(lockres->l_type != OCFS2_LOCK_TYPE_QINFO);
302 302
303 return (struct ocfs2_mem_dqinfo *)lockres->l_priv; 303 return (struct ocfs2_mem_dqinfo *)lockres->l_priv;
304 } 304 }
305 305
306 static inline struct ocfs2_super *ocfs2_get_lockres_osb(struct ocfs2_lock_res *lockres) 306 static inline struct ocfs2_super *ocfs2_get_lockres_osb(struct ocfs2_lock_res *lockres)
307 { 307 {
308 if (lockres->l_ops->get_osb) 308 if (lockres->l_ops->get_osb)
309 return lockres->l_ops->get_osb(lockres); 309 return lockres->l_ops->get_osb(lockres);
310 310
311 return (struct ocfs2_super *)lockres->l_priv; 311 return (struct ocfs2_super *)lockres->l_priv;
312 } 312 }
313 313
314 static int ocfs2_lock_create(struct ocfs2_super *osb, 314 static int ocfs2_lock_create(struct ocfs2_super *osb,
315 struct ocfs2_lock_res *lockres, 315 struct ocfs2_lock_res *lockres,
316 int level, 316 int level,
317 u32 dlm_flags); 317 u32 dlm_flags);
318 static inline int ocfs2_may_continue_on_blocked_lock(struct ocfs2_lock_res *lockres, 318 static inline int ocfs2_may_continue_on_blocked_lock(struct ocfs2_lock_res *lockres,
319 int wanted); 319 int wanted);
320 static void ocfs2_cluster_unlock(struct ocfs2_super *osb, 320 static void ocfs2_cluster_unlock(struct ocfs2_super *osb,
321 struct ocfs2_lock_res *lockres, 321 struct ocfs2_lock_res *lockres,
322 int level); 322 int level);
323 static inline void ocfs2_generic_handle_downconvert_action(struct ocfs2_lock_res *lockres); 323 static inline void ocfs2_generic_handle_downconvert_action(struct ocfs2_lock_res *lockres);
324 static inline void ocfs2_generic_handle_convert_action(struct ocfs2_lock_res *lockres); 324 static inline void ocfs2_generic_handle_convert_action(struct ocfs2_lock_res *lockres);
325 static inline void ocfs2_generic_handle_attach_action(struct ocfs2_lock_res *lockres); 325 static inline void ocfs2_generic_handle_attach_action(struct ocfs2_lock_res *lockres);
326 static int ocfs2_generic_handle_bast(struct ocfs2_lock_res *lockres, int level); 326 static int ocfs2_generic_handle_bast(struct ocfs2_lock_res *lockres, int level);
327 static void ocfs2_schedule_blocked_lock(struct ocfs2_super *osb, 327 static void ocfs2_schedule_blocked_lock(struct ocfs2_super *osb,
328 struct ocfs2_lock_res *lockres); 328 struct ocfs2_lock_res *lockres);
329 static inline void ocfs2_recover_from_dlm_error(struct ocfs2_lock_res *lockres, 329 static inline void ocfs2_recover_from_dlm_error(struct ocfs2_lock_res *lockres,
330 int convert); 330 int convert);
331 #define ocfs2_log_dlm_error(_func, _err, _lockres) do { \ 331 #define ocfs2_log_dlm_error(_func, _err, _lockres) do { \
332 if ((_lockres)->l_type != OCFS2_LOCK_TYPE_DENTRY) \ 332 if ((_lockres)->l_type != OCFS2_LOCK_TYPE_DENTRY) \
333 mlog(ML_ERROR, "DLM error %d while calling %s on resource %s\n", \ 333 mlog(ML_ERROR, "DLM error %d while calling %s on resource %s\n", \
334 _err, _func, _lockres->l_name); \ 334 _err, _func, _lockres->l_name); \
335 else \ 335 else \
336 mlog(ML_ERROR, "DLM error %d while calling %s on resource %.*s%08x\n", \ 336 mlog(ML_ERROR, "DLM error %d while calling %s on resource %.*s%08x\n", \
337 _err, _func, OCFS2_DENTRY_LOCK_INO_START - 1, (_lockres)->l_name, \ 337 _err, _func, OCFS2_DENTRY_LOCK_INO_START - 1, (_lockres)->l_name, \
338 (unsigned int)ocfs2_get_dentry_lock_ino(_lockres)); \ 338 (unsigned int)ocfs2_get_dentry_lock_ino(_lockres)); \
339 } while (0) 339 } while (0)
340 static int ocfs2_downconvert_thread(void *arg); 340 static int ocfs2_downconvert_thread(void *arg);
341 static void ocfs2_downconvert_on_unlock(struct ocfs2_super *osb, 341 static void ocfs2_downconvert_on_unlock(struct ocfs2_super *osb,
342 struct ocfs2_lock_res *lockres); 342 struct ocfs2_lock_res *lockres);
343 static int ocfs2_inode_lock_update(struct inode *inode, 343 static int ocfs2_inode_lock_update(struct inode *inode,
344 struct buffer_head **bh); 344 struct buffer_head **bh);
345 static void ocfs2_drop_osb_locks(struct ocfs2_super *osb); 345 static void ocfs2_drop_osb_locks(struct ocfs2_super *osb);
346 static inline int ocfs2_highest_compat_lock_level(int level); 346 static inline int ocfs2_highest_compat_lock_level(int level);
347 static unsigned int ocfs2_prepare_downconvert(struct ocfs2_lock_res *lockres, 347 static unsigned int ocfs2_prepare_downconvert(struct ocfs2_lock_res *lockres,
348 int new_level); 348 int new_level);
349 static int ocfs2_downconvert_lock(struct ocfs2_super *osb, 349 static int ocfs2_downconvert_lock(struct ocfs2_super *osb,
350 struct ocfs2_lock_res *lockres, 350 struct ocfs2_lock_res *lockres,
351 int new_level, 351 int new_level,
352 int lvb, 352 int lvb,
353 unsigned int generation); 353 unsigned int generation);
354 static int ocfs2_prepare_cancel_convert(struct ocfs2_super *osb, 354 static int ocfs2_prepare_cancel_convert(struct ocfs2_super *osb,
355 struct ocfs2_lock_res *lockres); 355 struct ocfs2_lock_res *lockres);
356 static int ocfs2_cancel_convert(struct ocfs2_super *osb, 356 static int ocfs2_cancel_convert(struct ocfs2_super *osb,
357 struct ocfs2_lock_res *lockres); 357 struct ocfs2_lock_res *lockres);
358 358
359 359
360 static void ocfs2_build_lock_name(enum ocfs2_lock_type type, 360 static void ocfs2_build_lock_name(enum ocfs2_lock_type type,
361 u64 blkno, 361 u64 blkno,
362 u32 generation, 362 u32 generation,
363 char *name) 363 char *name)
364 { 364 {
365 int len; 365 int len;
366 366
367 mlog_entry_void(); 367 mlog_entry_void();
368 368
369 BUG_ON(type >= OCFS2_NUM_LOCK_TYPES); 369 BUG_ON(type >= OCFS2_NUM_LOCK_TYPES);
370 370
371 len = snprintf(name, OCFS2_LOCK_ID_MAX_LEN, "%c%s%016llx%08x", 371 len = snprintf(name, OCFS2_LOCK_ID_MAX_LEN, "%c%s%016llx%08x",
372 ocfs2_lock_type_char(type), OCFS2_LOCK_ID_PAD, 372 ocfs2_lock_type_char(type), OCFS2_LOCK_ID_PAD,
373 (long long)blkno, generation); 373 (long long)blkno, generation);
374 374
375 BUG_ON(len != (OCFS2_LOCK_ID_MAX_LEN - 1)); 375 BUG_ON(len != (OCFS2_LOCK_ID_MAX_LEN - 1));
376 376
377 mlog(0, "built lock resource with name: %s\n", name); 377 mlog(0, "built lock resource with name: %s\n", name);
378 378
379 mlog_exit_void(); 379 mlog_exit_void();
380 } 380 }
381 381
382 static DEFINE_SPINLOCK(ocfs2_dlm_tracking_lock); 382 static DEFINE_SPINLOCK(ocfs2_dlm_tracking_lock);
383 383
384 static void ocfs2_add_lockres_tracking(struct ocfs2_lock_res *res, 384 static void ocfs2_add_lockres_tracking(struct ocfs2_lock_res *res,
385 struct ocfs2_dlm_debug *dlm_debug) 385 struct ocfs2_dlm_debug *dlm_debug)
386 { 386 {
387 mlog(0, "Add tracking for lockres %s\n", res->l_name); 387 mlog(0, "Add tracking for lockres %s\n", res->l_name);
388 388
389 spin_lock(&ocfs2_dlm_tracking_lock); 389 spin_lock(&ocfs2_dlm_tracking_lock);
390 list_add(&res->l_debug_list, &dlm_debug->d_lockres_tracking); 390 list_add(&res->l_debug_list, &dlm_debug->d_lockres_tracking);
391 spin_unlock(&ocfs2_dlm_tracking_lock); 391 spin_unlock(&ocfs2_dlm_tracking_lock);
392 } 392 }
393 393
394 static void ocfs2_remove_lockres_tracking(struct ocfs2_lock_res *res) 394 static void ocfs2_remove_lockres_tracking(struct ocfs2_lock_res *res)
395 { 395 {
396 spin_lock(&ocfs2_dlm_tracking_lock); 396 spin_lock(&ocfs2_dlm_tracking_lock);
397 if (!list_empty(&res->l_debug_list)) 397 if (!list_empty(&res->l_debug_list))
398 list_del_init(&res->l_debug_list); 398 list_del_init(&res->l_debug_list);
399 spin_unlock(&ocfs2_dlm_tracking_lock); 399 spin_unlock(&ocfs2_dlm_tracking_lock);
400 } 400 }
401 401
402 #ifdef CONFIG_OCFS2_FS_STATS 402 #ifdef CONFIG_OCFS2_FS_STATS
403 static void ocfs2_init_lock_stats(struct ocfs2_lock_res *res) 403 static void ocfs2_init_lock_stats(struct ocfs2_lock_res *res)
404 { 404 {
405 res->l_lock_num_prmode = 0; 405 res->l_lock_num_prmode = 0;
406 res->l_lock_num_prmode_failed = 0; 406 res->l_lock_num_prmode_failed = 0;
407 res->l_lock_total_prmode = 0; 407 res->l_lock_total_prmode = 0;
408 res->l_lock_max_prmode = 0; 408 res->l_lock_max_prmode = 0;
409 res->l_lock_num_exmode = 0; 409 res->l_lock_num_exmode = 0;
410 res->l_lock_num_exmode_failed = 0; 410 res->l_lock_num_exmode_failed = 0;
411 res->l_lock_total_exmode = 0; 411 res->l_lock_total_exmode = 0;
412 res->l_lock_max_exmode = 0; 412 res->l_lock_max_exmode = 0;
413 res->l_lock_refresh = 0; 413 res->l_lock_refresh = 0;
414 } 414 }
415 415
416 static void ocfs2_update_lock_stats(struct ocfs2_lock_res *res, int level, 416 static void ocfs2_update_lock_stats(struct ocfs2_lock_res *res, int level,
417 struct ocfs2_mask_waiter *mw, int ret) 417 struct ocfs2_mask_waiter *mw, int ret)
418 { 418 {
419 unsigned long long *num, *sum; 419 unsigned long long *num, *sum;
420 unsigned int *max, *failed; 420 unsigned int *max, *failed;
421 struct timespec ts = current_kernel_time(); 421 struct timespec ts = current_kernel_time();
422 unsigned long long time = timespec_to_ns(&ts) - mw->mw_lock_start; 422 unsigned long long time = timespec_to_ns(&ts) - mw->mw_lock_start;
423 423
424 if (level == LKM_PRMODE) { 424 if (level == LKM_PRMODE) {
425 num = &res->l_lock_num_prmode; 425 num = &res->l_lock_num_prmode;
426 sum = &res->l_lock_total_prmode; 426 sum = &res->l_lock_total_prmode;
427 max = &res->l_lock_max_prmode; 427 max = &res->l_lock_max_prmode;
428 failed = &res->l_lock_num_prmode_failed; 428 failed = &res->l_lock_num_prmode_failed;
429 } else if (level == LKM_EXMODE) { 429 } else if (level == LKM_EXMODE) {
430 num = &res->l_lock_num_exmode; 430 num = &res->l_lock_num_exmode;
431 sum = &res->l_lock_total_exmode; 431 sum = &res->l_lock_total_exmode;
432 max = &res->l_lock_max_exmode; 432 max = &res->l_lock_max_exmode;
433 failed = &res->l_lock_num_exmode_failed; 433 failed = &res->l_lock_num_exmode_failed;
434 } else 434 } else
435 return; 435 return;
436 436
437 (*num)++; 437 (*num)++;
438 (*sum) += time; 438 (*sum) += time;
439 if (time > *max) 439 if (time > *max)
440 *max = time; 440 *max = time;
441 if (ret) 441 if (ret)
442 (*failed)++; 442 (*failed)++;
443 } 443 }
444 444
445 static inline void ocfs2_track_lock_refresh(struct ocfs2_lock_res *lockres) 445 static inline void ocfs2_track_lock_refresh(struct ocfs2_lock_res *lockres)
446 { 446 {
447 lockres->l_lock_refresh++; 447 lockres->l_lock_refresh++;
448 } 448 }
449 449
450 static inline void ocfs2_init_start_time(struct ocfs2_mask_waiter *mw) 450 static inline void ocfs2_init_start_time(struct ocfs2_mask_waiter *mw)
451 { 451 {
452 struct timespec ts = current_kernel_time(); 452 struct timespec ts = current_kernel_time();
453 mw->mw_lock_start = timespec_to_ns(&ts); 453 mw->mw_lock_start = timespec_to_ns(&ts);
454 } 454 }
455 #else 455 #else
456 static inline void ocfs2_init_lock_stats(struct ocfs2_lock_res *res) 456 static inline void ocfs2_init_lock_stats(struct ocfs2_lock_res *res)
457 { 457 {
458 } 458 }
459 static inline void ocfs2_update_lock_stats(struct ocfs2_lock_res *res, 459 static inline void ocfs2_update_lock_stats(struct ocfs2_lock_res *res,
460 int level, struct ocfs2_mask_waiter *mw, int ret) 460 int level, struct ocfs2_mask_waiter *mw, int ret)
461 { 461 {
462 } 462 }
463 static inline void ocfs2_track_lock_refresh(struct ocfs2_lock_res *lockres) 463 static inline void ocfs2_track_lock_refresh(struct ocfs2_lock_res *lockres)
464 { 464 {
465 } 465 }
466 static inline void ocfs2_init_start_time(struct ocfs2_mask_waiter *mw) 466 static inline void ocfs2_init_start_time(struct ocfs2_mask_waiter *mw)
467 { 467 {
468 } 468 }
469 #endif 469 #endif
470 470
471 static void ocfs2_lock_res_init_common(struct ocfs2_super *osb, 471 static void ocfs2_lock_res_init_common(struct ocfs2_super *osb,
472 struct ocfs2_lock_res *res, 472 struct ocfs2_lock_res *res,
473 enum ocfs2_lock_type type, 473 enum ocfs2_lock_type type,
474 struct ocfs2_lock_res_ops *ops, 474 struct ocfs2_lock_res_ops *ops,
475 void *priv) 475 void *priv)
476 { 476 {
477 res->l_type = type; 477 res->l_type = type;
478 res->l_ops = ops; 478 res->l_ops = ops;
479 res->l_priv = priv; 479 res->l_priv = priv;
480 480
481 res->l_level = DLM_LOCK_IV; 481 res->l_level = DLM_LOCK_IV;
482 res->l_requested = DLM_LOCK_IV; 482 res->l_requested = DLM_LOCK_IV;
483 res->l_blocking = DLM_LOCK_IV; 483 res->l_blocking = DLM_LOCK_IV;
484 res->l_action = OCFS2_AST_INVALID; 484 res->l_action = OCFS2_AST_INVALID;
485 res->l_unlock_action = OCFS2_UNLOCK_INVALID; 485 res->l_unlock_action = OCFS2_UNLOCK_INVALID;
486 486
487 res->l_flags = OCFS2_LOCK_INITIALIZED; 487 res->l_flags = OCFS2_LOCK_INITIALIZED;
488 488
489 ocfs2_add_lockres_tracking(res, osb->osb_dlm_debug); 489 ocfs2_add_lockres_tracking(res, osb->osb_dlm_debug);
490 490
491 ocfs2_init_lock_stats(res); 491 ocfs2_init_lock_stats(res);
492 } 492 }
493 493
494 void ocfs2_lock_res_init_once(struct ocfs2_lock_res *res) 494 void ocfs2_lock_res_init_once(struct ocfs2_lock_res *res)
495 { 495 {
496 /* This also clears out the lock status block */ 496 /* This also clears out the lock status block */
497 memset(res, 0, sizeof(struct ocfs2_lock_res)); 497 memset(res, 0, sizeof(struct ocfs2_lock_res));
498 spin_lock_init(&res->l_lock); 498 spin_lock_init(&res->l_lock);
499 init_waitqueue_head(&res->l_event); 499 init_waitqueue_head(&res->l_event);
500 INIT_LIST_HEAD(&res->l_blocked_list); 500 INIT_LIST_HEAD(&res->l_blocked_list);
501 INIT_LIST_HEAD(&res->l_mask_waiters); 501 INIT_LIST_HEAD(&res->l_mask_waiters);
502 } 502 }
503 503
504 void ocfs2_inode_lock_res_init(struct ocfs2_lock_res *res, 504 void ocfs2_inode_lock_res_init(struct ocfs2_lock_res *res,
505 enum ocfs2_lock_type type, 505 enum ocfs2_lock_type type,
506 unsigned int generation, 506 unsigned int generation,
507 struct inode *inode) 507 struct inode *inode)
508 { 508 {
509 struct ocfs2_lock_res_ops *ops; 509 struct ocfs2_lock_res_ops *ops;
510 510
511 switch(type) { 511 switch(type) {
512 case OCFS2_LOCK_TYPE_RW: 512 case OCFS2_LOCK_TYPE_RW:
513 ops = &ocfs2_inode_rw_lops; 513 ops = &ocfs2_inode_rw_lops;
514 break; 514 break;
515 case OCFS2_LOCK_TYPE_META: 515 case OCFS2_LOCK_TYPE_META:
516 ops = &ocfs2_inode_inode_lops; 516 ops = &ocfs2_inode_inode_lops;
517 break; 517 break;
518 case OCFS2_LOCK_TYPE_OPEN: 518 case OCFS2_LOCK_TYPE_OPEN:
519 ops = &ocfs2_inode_open_lops; 519 ops = &ocfs2_inode_open_lops;
520 break; 520 break;
521 default: 521 default:
522 mlog_bug_on_msg(1, "type: %d\n", type); 522 mlog_bug_on_msg(1, "type: %d\n", type);
523 ops = NULL; /* thanks, gcc */ 523 ops = NULL; /* thanks, gcc */
524 break; 524 break;
525 }; 525 };
526 526
527 ocfs2_build_lock_name(type, OCFS2_I(inode)->ip_blkno, 527 ocfs2_build_lock_name(type, OCFS2_I(inode)->ip_blkno,
528 generation, res->l_name); 528 generation, res->l_name);
529 ocfs2_lock_res_init_common(OCFS2_SB(inode->i_sb), res, type, ops, inode); 529 ocfs2_lock_res_init_common(OCFS2_SB(inode->i_sb), res, type, ops, inode);
530 } 530 }
531 531
532 static struct ocfs2_super *ocfs2_get_inode_osb(struct ocfs2_lock_res *lockres) 532 static struct ocfs2_super *ocfs2_get_inode_osb(struct ocfs2_lock_res *lockres)
533 { 533 {
534 struct inode *inode = ocfs2_lock_res_inode(lockres); 534 struct inode *inode = ocfs2_lock_res_inode(lockres);
535 535
536 return OCFS2_SB(inode->i_sb); 536 return OCFS2_SB(inode->i_sb);
537 } 537 }
538 538
539 static struct ocfs2_super *ocfs2_get_qinfo_osb(struct ocfs2_lock_res *lockres) 539 static struct ocfs2_super *ocfs2_get_qinfo_osb(struct ocfs2_lock_res *lockres)
540 { 540 {
541 struct ocfs2_mem_dqinfo *info = lockres->l_priv; 541 struct ocfs2_mem_dqinfo *info = lockres->l_priv;
542 542
543 return OCFS2_SB(info->dqi_gi.dqi_sb); 543 return OCFS2_SB(info->dqi_gi.dqi_sb);
544 } 544 }
545 545
546 static struct ocfs2_super *ocfs2_get_file_osb(struct ocfs2_lock_res *lockres) 546 static struct ocfs2_super *ocfs2_get_file_osb(struct ocfs2_lock_res *lockres)
547 { 547 {
548 struct ocfs2_file_private *fp = lockres->l_priv; 548 struct ocfs2_file_private *fp = lockres->l_priv;
549 549
550 return OCFS2_SB(fp->fp_file->f_mapping->host->i_sb); 550 return OCFS2_SB(fp->fp_file->f_mapping->host->i_sb);
551 } 551 }
552 552
553 static __u64 ocfs2_get_dentry_lock_ino(struct ocfs2_lock_res *lockres) 553 static __u64 ocfs2_get_dentry_lock_ino(struct ocfs2_lock_res *lockres)
554 { 554 {
555 __be64 inode_blkno_be; 555 __be64 inode_blkno_be;
556 556
557 memcpy(&inode_blkno_be, &lockres->l_name[OCFS2_DENTRY_LOCK_INO_START], 557 memcpy(&inode_blkno_be, &lockres->l_name[OCFS2_DENTRY_LOCK_INO_START],
558 sizeof(__be64)); 558 sizeof(__be64));
559 559
560 return be64_to_cpu(inode_blkno_be); 560 return be64_to_cpu(inode_blkno_be);
561 } 561 }
562 562
563 static struct ocfs2_super *ocfs2_get_dentry_osb(struct ocfs2_lock_res *lockres) 563 static struct ocfs2_super *ocfs2_get_dentry_osb(struct ocfs2_lock_res *lockres)
564 { 564 {
565 struct ocfs2_dentry_lock *dl = lockres->l_priv; 565 struct ocfs2_dentry_lock *dl = lockres->l_priv;
566 566
567 return OCFS2_SB(dl->dl_inode->i_sb); 567 return OCFS2_SB(dl->dl_inode->i_sb);
568 } 568 }
569 569
570 void ocfs2_dentry_lock_res_init(struct ocfs2_dentry_lock *dl, 570 void ocfs2_dentry_lock_res_init(struct ocfs2_dentry_lock *dl,
571 u64 parent, struct inode *inode) 571 u64 parent, struct inode *inode)
572 { 572 {
573 int len; 573 int len;
574 u64 inode_blkno = OCFS2_I(inode)->ip_blkno; 574 u64 inode_blkno = OCFS2_I(inode)->ip_blkno;
575 __be64 inode_blkno_be = cpu_to_be64(inode_blkno); 575 __be64 inode_blkno_be = cpu_to_be64(inode_blkno);
576 struct ocfs2_lock_res *lockres = &dl->dl_lockres; 576 struct ocfs2_lock_res *lockres = &dl->dl_lockres;
577 577
578 ocfs2_lock_res_init_once(lockres); 578 ocfs2_lock_res_init_once(lockres);
579 579
580 /* 580 /*
581 * Unfortunately, the standard lock naming scheme won't work 581 * Unfortunately, the standard lock naming scheme won't work
582 * here because we have two 16 byte values to use. Instead, 582 * here because we have two 16 byte values to use. Instead,
583 * we'll stuff the inode number as a binary value. We still 583 * we'll stuff the inode number as a binary value. We still
584 * want error prints to show something without garbling the 584 * want error prints to show something without garbling the
585 * display, so drop a null byte in there before the inode 585 * display, so drop a null byte in there before the inode
586 * number. A future version of OCFS2 will likely use all 586 * number. A future version of OCFS2 will likely use all
587 * binary lock names. The stringified names have been a 587 * binary lock names. The stringified names have been a
588 * tremendous aid in debugging, but now that the debugfs 588 * tremendous aid in debugging, but now that the debugfs
589 * interface exists, we can mangle things there if need be. 589 * interface exists, we can mangle things there if need be.
590 * 590 *
591 * NOTE: We also drop the standard "pad" value (the total lock 591 * NOTE: We also drop the standard "pad" value (the total lock
592 * name size stays the same though - the last part is all 592 * name size stays the same though - the last part is all
593 * zeros due to the memset in ocfs2_lock_res_init_once() 593 * zeros due to the memset in ocfs2_lock_res_init_once()
594 */ 594 */
595 len = snprintf(lockres->l_name, OCFS2_DENTRY_LOCK_INO_START, 595 len = snprintf(lockres->l_name, OCFS2_DENTRY_LOCK_INO_START,
596 "%c%016llx", 596 "%c%016llx",
597 ocfs2_lock_type_char(OCFS2_LOCK_TYPE_DENTRY), 597 ocfs2_lock_type_char(OCFS2_LOCK_TYPE_DENTRY),
598 (long long)parent); 598 (long long)parent);
599 599
600 BUG_ON(len != (OCFS2_DENTRY_LOCK_INO_START - 1)); 600 BUG_ON(len != (OCFS2_DENTRY_LOCK_INO_START - 1));
601 601
602 memcpy(&lockres->l_name[OCFS2_DENTRY_LOCK_INO_START], &inode_blkno_be, 602 memcpy(&lockres->l_name[OCFS2_DENTRY_LOCK_INO_START], &inode_blkno_be,
603 sizeof(__be64)); 603 sizeof(__be64));
604 604
605 ocfs2_lock_res_init_common(OCFS2_SB(inode->i_sb), lockres, 605 ocfs2_lock_res_init_common(OCFS2_SB(inode->i_sb), lockres,
606 OCFS2_LOCK_TYPE_DENTRY, &ocfs2_dentry_lops, 606 OCFS2_LOCK_TYPE_DENTRY, &ocfs2_dentry_lops,
607 dl); 607 dl);
608 } 608 }
609 609
610 static void ocfs2_super_lock_res_init(struct ocfs2_lock_res *res, 610 static void ocfs2_super_lock_res_init(struct ocfs2_lock_res *res,
611 struct ocfs2_super *osb) 611 struct ocfs2_super *osb)
612 { 612 {
613 /* Superblock lockres doesn't come from a slab so we call init 613 /* Superblock lockres doesn't come from a slab so we call init
614 * once on it manually. */ 614 * once on it manually. */
615 ocfs2_lock_res_init_once(res); 615 ocfs2_lock_res_init_once(res);
616 ocfs2_build_lock_name(OCFS2_LOCK_TYPE_SUPER, OCFS2_SUPER_BLOCK_BLKNO, 616 ocfs2_build_lock_name(OCFS2_LOCK_TYPE_SUPER, OCFS2_SUPER_BLOCK_BLKNO,
617 0, res->l_name); 617 0, res->l_name);
618 ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_SUPER, 618 ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_SUPER,
619 &ocfs2_super_lops, osb); 619 &ocfs2_super_lops, osb);
620 } 620 }
621 621
622 static void ocfs2_rename_lock_res_init(struct ocfs2_lock_res *res, 622 static void ocfs2_rename_lock_res_init(struct ocfs2_lock_res *res,
623 struct ocfs2_super *osb) 623 struct ocfs2_super *osb)
624 { 624 {
625 /* Rename lockres doesn't come from a slab so we call init 625 /* Rename lockres doesn't come from a slab so we call init
626 * once on it manually. */ 626 * once on it manually. */
627 ocfs2_lock_res_init_once(res); 627 ocfs2_lock_res_init_once(res);
628 ocfs2_build_lock_name(OCFS2_LOCK_TYPE_RENAME, 0, 0, res->l_name); 628 ocfs2_build_lock_name(OCFS2_LOCK_TYPE_RENAME, 0, 0, res->l_name);
629 ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_RENAME, 629 ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_RENAME,
630 &ocfs2_rename_lops, osb); 630 &ocfs2_rename_lops, osb);
631 } 631 }
632 632
633 static void ocfs2_nfs_sync_lock_res_init(struct ocfs2_lock_res *res, 633 static void ocfs2_nfs_sync_lock_res_init(struct ocfs2_lock_res *res,
634 struct ocfs2_super *osb) 634 struct ocfs2_super *osb)
635 { 635 {
636 /* nfs_sync lockres doesn't come from a slab so we call init 636 /* nfs_sync lockres doesn't come from a slab so we call init
637 * once on it manually. */ 637 * once on it manually. */
638 ocfs2_lock_res_init_once(res); 638 ocfs2_lock_res_init_once(res);
639 ocfs2_build_lock_name(OCFS2_LOCK_TYPE_NFS_SYNC, 0, 0, res->l_name); 639 ocfs2_build_lock_name(OCFS2_LOCK_TYPE_NFS_SYNC, 0, 0, res->l_name);
640 ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_NFS_SYNC, 640 ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_NFS_SYNC,
641 &ocfs2_nfs_sync_lops, osb); 641 &ocfs2_nfs_sync_lops, osb);
642 } 642 }
643 643
644 static void ocfs2_orphan_scan_lock_res_init(struct ocfs2_lock_res *res, 644 static void ocfs2_orphan_scan_lock_res_init(struct ocfs2_lock_res *res,
645 struct ocfs2_super *osb) 645 struct ocfs2_super *osb)
646 { 646 {
647 struct ocfs2_orphan_scan_lvb *lvb;
648
649 ocfs2_lock_res_init_once(res); 647 ocfs2_lock_res_init_once(res);
650 ocfs2_build_lock_name(OCFS2_LOCK_TYPE_ORPHAN_SCAN, 0, 0, res->l_name); 648 ocfs2_build_lock_name(OCFS2_LOCK_TYPE_ORPHAN_SCAN, 0, 0, res->l_name);
651 ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_ORPHAN_SCAN, 649 ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_ORPHAN_SCAN,
652 &ocfs2_orphan_scan_lops, osb); 650 &ocfs2_orphan_scan_lops, osb);
653 lvb = ocfs2_dlm_lvb(&res->l_lksb);
654 lvb->lvb_version = OCFS2_ORPHAN_LVB_VERSION;
655 } 651 }
656 652
657 void ocfs2_file_lock_res_init(struct ocfs2_lock_res *lockres, 653 void ocfs2_file_lock_res_init(struct ocfs2_lock_res *lockres,
658 struct ocfs2_file_private *fp) 654 struct ocfs2_file_private *fp)
659 { 655 {
660 struct inode *inode = fp->fp_file->f_mapping->host; 656 struct inode *inode = fp->fp_file->f_mapping->host;
661 struct ocfs2_inode_info *oi = OCFS2_I(inode); 657 struct ocfs2_inode_info *oi = OCFS2_I(inode);
662 658
663 ocfs2_lock_res_init_once(lockres); 659 ocfs2_lock_res_init_once(lockres);
664 ocfs2_build_lock_name(OCFS2_LOCK_TYPE_FLOCK, oi->ip_blkno, 660 ocfs2_build_lock_name(OCFS2_LOCK_TYPE_FLOCK, oi->ip_blkno,
665 inode->i_generation, lockres->l_name); 661 inode->i_generation, lockres->l_name);
666 ocfs2_lock_res_init_common(OCFS2_SB(inode->i_sb), lockres, 662 ocfs2_lock_res_init_common(OCFS2_SB(inode->i_sb), lockres,
667 OCFS2_LOCK_TYPE_FLOCK, &ocfs2_flock_lops, 663 OCFS2_LOCK_TYPE_FLOCK, &ocfs2_flock_lops,
668 fp); 664 fp);
669 lockres->l_flags |= OCFS2_LOCK_NOCACHE; 665 lockres->l_flags |= OCFS2_LOCK_NOCACHE;
670 } 666 }
671 667
672 void ocfs2_qinfo_lock_res_init(struct ocfs2_lock_res *lockres, 668 void ocfs2_qinfo_lock_res_init(struct ocfs2_lock_res *lockres,
673 struct ocfs2_mem_dqinfo *info) 669 struct ocfs2_mem_dqinfo *info)
674 { 670 {
675 ocfs2_lock_res_init_once(lockres); 671 ocfs2_lock_res_init_once(lockres);
676 ocfs2_build_lock_name(OCFS2_LOCK_TYPE_QINFO, info->dqi_gi.dqi_type, 672 ocfs2_build_lock_name(OCFS2_LOCK_TYPE_QINFO, info->dqi_gi.dqi_type,
677 0, lockres->l_name); 673 0, lockres->l_name);
678 ocfs2_lock_res_init_common(OCFS2_SB(info->dqi_gi.dqi_sb), lockres, 674 ocfs2_lock_res_init_common(OCFS2_SB(info->dqi_gi.dqi_sb), lockres,
679 OCFS2_LOCK_TYPE_QINFO, &ocfs2_qinfo_lops, 675 OCFS2_LOCK_TYPE_QINFO, &ocfs2_qinfo_lops,
680 info); 676 info);
681 } 677 }
682 678
683 void ocfs2_lock_res_free(struct ocfs2_lock_res *res) 679 void ocfs2_lock_res_free(struct ocfs2_lock_res *res)
684 { 680 {
685 mlog_entry_void(); 681 mlog_entry_void();
686 682
687 if (!(res->l_flags & OCFS2_LOCK_INITIALIZED)) 683 if (!(res->l_flags & OCFS2_LOCK_INITIALIZED))
688 return; 684 return;
689 685
690 ocfs2_remove_lockres_tracking(res); 686 ocfs2_remove_lockres_tracking(res);
691 687
692 mlog_bug_on_msg(!list_empty(&res->l_blocked_list), 688 mlog_bug_on_msg(!list_empty(&res->l_blocked_list),
693 "Lockres %s is on the blocked list\n", 689 "Lockres %s is on the blocked list\n",
694 res->l_name); 690 res->l_name);
695 mlog_bug_on_msg(!list_empty(&res->l_mask_waiters), 691 mlog_bug_on_msg(!list_empty(&res->l_mask_waiters),
696 "Lockres %s has mask waiters pending\n", 692 "Lockres %s has mask waiters pending\n",
697 res->l_name); 693 res->l_name);
698 mlog_bug_on_msg(spin_is_locked(&res->l_lock), 694 mlog_bug_on_msg(spin_is_locked(&res->l_lock),
699 "Lockres %s is locked\n", 695 "Lockres %s is locked\n",
700 res->l_name); 696 res->l_name);
701 mlog_bug_on_msg(res->l_ro_holders, 697 mlog_bug_on_msg(res->l_ro_holders,
702 "Lockres %s has %u ro holders\n", 698 "Lockres %s has %u ro holders\n",
703 res->l_name, res->l_ro_holders); 699 res->l_name, res->l_ro_holders);
704 mlog_bug_on_msg(res->l_ex_holders, 700 mlog_bug_on_msg(res->l_ex_holders,
705 "Lockres %s has %u ex holders\n", 701 "Lockres %s has %u ex holders\n",
706 res->l_name, res->l_ex_holders); 702 res->l_name, res->l_ex_holders);
707 703
708 /* Need to clear out the lock status block for the dlm */ 704 /* Need to clear out the lock status block for the dlm */
709 memset(&res->l_lksb, 0, sizeof(res->l_lksb)); 705 memset(&res->l_lksb, 0, sizeof(res->l_lksb));
710 706
711 res->l_flags = 0UL; 707 res->l_flags = 0UL;
712 mlog_exit_void(); 708 mlog_exit_void();
713 } 709 }
714 710
715 static inline void ocfs2_inc_holders(struct ocfs2_lock_res *lockres, 711 static inline void ocfs2_inc_holders(struct ocfs2_lock_res *lockres,
716 int level) 712 int level)
717 { 713 {
718 mlog_entry_void(); 714 mlog_entry_void();
719 715
720 BUG_ON(!lockres); 716 BUG_ON(!lockres);
721 717
722 switch(level) { 718 switch(level) {
723 case DLM_LOCK_EX: 719 case DLM_LOCK_EX:
724 lockres->l_ex_holders++; 720 lockres->l_ex_holders++;
725 break; 721 break;
726 case DLM_LOCK_PR: 722 case DLM_LOCK_PR:
727 lockres->l_ro_holders++; 723 lockres->l_ro_holders++;
728 break; 724 break;
729 default: 725 default:
730 BUG(); 726 BUG();
731 } 727 }
732 728
733 mlog_exit_void(); 729 mlog_exit_void();
734 } 730 }
735 731
736 static inline void ocfs2_dec_holders(struct ocfs2_lock_res *lockres, 732 static inline void ocfs2_dec_holders(struct ocfs2_lock_res *lockres,
737 int level) 733 int level)
738 { 734 {
739 mlog_entry_void(); 735 mlog_entry_void();
740 736
741 BUG_ON(!lockres); 737 BUG_ON(!lockres);
742 738
743 switch(level) { 739 switch(level) {
744 case DLM_LOCK_EX: 740 case DLM_LOCK_EX:
745 BUG_ON(!lockres->l_ex_holders); 741 BUG_ON(!lockres->l_ex_holders);
746 lockres->l_ex_holders--; 742 lockres->l_ex_holders--;
747 break; 743 break;
748 case DLM_LOCK_PR: 744 case DLM_LOCK_PR:
749 BUG_ON(!lockres->l_ro_holders); 745 BUG_ON(!lockres->l_ro_holders);
750 lockres->l_ro_holders--; 746 lockres->l_ro_holders--;
751 break; 747 break;
752 default: 748 default:
753 BUG(); 749 BUG();
754 } 750 }
755 mlog_exit_void(); 751 mlog_exit_void();
756 } 752 }
757 753
758 /* WARNING: This function lives in a world where the only three lock 754 /* WARNING: This function lives in a world where the only three lock
759 * levels are EX, PR, and NL. It *will* have to be adjusted when more 755 * levels are EX, PR, and NL. It *will* have to be adjusted when more
760 * lock types are added. */ 756 * lock types are added. */
761 static inline int ocfs2_highest_compat_lock_level(int level) 757 static inline int ocfs2_highest_compat_lock_level(int level)
762 { 758 {
763 int new_level = DLM_LOCK_EX; 759 int new_level = DLM_LOCK_EX;
764 760
765 if (level == DLM_LOCK_EX) 761 if (level == DLM_LOCK_EX)
766 new_level = DLM_LOCK_NL; 762 new_level = DLM_LOCK_NL;
767 else if (level == DLM_LOCK_PR) 763 else if (level == DLM_LOCK_PR)
768 new_level = DLM_LOCK_PR; 764 new_level = DLM_LOCK_PR;
769 return new_level; 765 return new_level;
770 } 766 }
771 767
772 static void lockres_set_flags(struct ocfs2_lock_res *lockres, 768 static void lockres_set_flags(struct ocfs2_lock_res *lockres,
773 unsigned long newflags) 769 unsigned long newflags)
774 { 770 {
775 struct ocfs2_mask_waiter *mw, *tmp; 771 struct ocfs2_mask_waiter *mw, *tmp;
776 772
777 assert_spin_locked(&lockres->l_lock); 773 assert_spin_locked(&lockres->l_lock);
778 774
779 lockres->l_flags = newflags; 775 lockres->l_flags = newflags;
780 776
781 list_for_each_entry_safe(mw, tmp, &lockres->l_mask_waiters, mw_item) { 777 list_for_each_entry_safe(mw, tmp, &lockres->l_mask_waiters, mw_item) {
782 if ((lockres->l_flags & mw->mw_mask) != mw->mw_goal) 778 if ((lockres->l_flags & mw->mw_mask) != mw->mw_goal)
783 continue; 779 continue;
784 780
785 list_del_init(&mw->mw_item); 781 list_del_init(&mw->mw_item);
786 mw->mw_status = 0; 782 mw->mw_status = 0;
787 complete(&mw->mw_complete); 783 complete(&mw->mw_complete);
788 } 784 }
789 } 785 }
790 static void lockres_or_flags(struct ocfs2_lock_res *lockres, unsigned long or) 786 static void lockres_or_flags(struct ocfs2_lock_res *lockres, unsigned long or)
791 { 787 {
792 lockres_set_flags(lockres, lockres->l_flags | or); 788 lockres_set_flags(lockres, lockres->l_flags | or);
793 } 789 }
794 static void lockres_clear_flags(struct ocfs2_lock_res *lockres, 790 static void lockres_clear_flags(struct ocfs2_lock_res *lockres,
795 unsigned long clear) 791 unsigned long clear)
796 { 792 {
797 lockres_set_flags(lockres, lockres->l_flags & ~clear); 793 lockres_set_flags(lockres, lockres->l_flags & ~clear);
798 } 794 }
799 795
800 static inline void ocfs2_generic_handle_downconvert_action(struct ocfs2_lock_res *lockres) 796 static inline void ocfs2_generic_handle_downconvert_action(struct ocfs2_lock_res *lockres)
801 { 797 {
802 mlog_entry_void(); 798 mlog_entry_void();
803 799
804 BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BUSY)); 800 BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BUSY));
805 BUG_ON(!(lockres->l_flags & OCFS2_LOCK_ATTACHED)); 801 BUG_ON(!(lockres->l_flags & OCFS2_LOCK_ATTACHED));
806 BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BLOCKED)); 802 BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BLOCKED));
807 BUG_ON(lockres->l_blocking <= DLM_LOCK_NL); 803 BUG_ON(lockres->l_blocking <= DLM_LOCK_NL);
808 804
809 lockres->l_level = lockres->l_requested; 805 lockres->l_level = lockres->l_requested;
810 if (lockres->l_level <= 806 if (lockres->l_level <=
811 ocfs2_highest_compat_lock_level(lockres->l_blocking)) { 807 ocfs2_highest_compat_lock_level(lockres->l_blocking)) {
812 lockres->l_blocking = DLM_LOCK_NL; 808 lockres->l_blocking = DLM_LOCK_NL;
813 lockres_clear_flags(lockres, OCFS2_LOCK_BLOCKED); 809 lockres_clear_flags(lockres, OCFS2_LOCK_BLOCKED);
814 } 810 }
815 lockres_clear_flags(lockres, OCFS2_LOCK_BUSY); 811 lockres_clear_flags(lockres, OCFS2_LOCK_BUSY);
816 812
817 mlog_exit_void(); 813 mlog_exit_void();
818 } 814 }
819 815
820 static inline void ocfs2_generic_handle_convert_action(struct ocfs2_lock_res *lockres) 816 static inline void ocfs2_generic_handle_convert_action(struct ocfs2_lock_res *lockres)
821 { 817 {
822 mlog_entry_void(); 818 mlog_entry_void();
823 819
824 BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BUSY)); 820 BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BUSY));
825 BUG_ON(!(lockres->l_flags & OCFS2_LOCK_ATTACHED)); 821 BUG_ON(!(lockres->l_flags & OCFS2_LOCK_ATTACHED));
826 822
827 /* Convert from RO to EX doesn't really need anything as our 823 /* Convert from RO to EX doesn't really need anything as our
828 * information is already up to data. Convert from NL to 824 * information is already up to data. Convert from NL to
829 * *anything* however should mark ourselves as needing an 825 * *anything* however should mark ourselves as needing an
830 * update */ 826 * update */
831 if (lockres->l_level == DLM_LOCK_NL && 827 if (lockres->l_level == DLM_LOCK_NL &&
832 lockres->l_ops->flags & LOCK_TYPE_REQUIRES_REFRESH) 828 lockres->l_ops->flags & LOCK_TYPE_REQUIRES_REFRESH)
833 lockres_or_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH); 829 lockres_or_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH);
834 830
835 lockres->l_level = lockres->l_requested; 831 lockres->l_level = lockres->l_requested;
836 lockres_clear_flags(lockres, OCFS2_LOCK_BUSY); 832 lockres_clear_flags(lockres, OCFS2_LOCK_BUSY);
837 833
838 mlog_exit_void(); 834 mlog_exit_void();
839 } 835 }
840 836
841 static inline void ocfs2_generic_handle_attach_action(struct ocfs2_lock_res *lockres) 837 static inline void ocfs2_generic_handle_attach_action(struct ocfs2_lock_res *lockres)
842 { 838 {
843 mlog_entry_void(); 839 mlog_entry_void();
844 840
845 BUG_ON((!(lockres->l_flags & OCFS2_LOCK_BUSY))); 841 BUG_ON((!(lockres->l_flags & OCFS2_LOCK_BUSY)));
846 BUG_ON(lockres->l_flags & OCFS2_LOCK_ATTACHED); 842 BUG_ON(lockres->l_flags & OCFS2_LOCK_ATTACHED);
847 843
848 if (lockres->l_requested > DLM_LOCK_NL && 844 if (lockres->l_requested > DLM_LOCK_NL &&
849 !(lockres->l_flags & OCFS2_LOCK_LOCAL) && 845 !(lockres->l_flags & OCFS2_LOCK_LOCAL) &&
850 lockres->l_ops->flags & LOCK_TYPE_REQUIRES_REFRESH) 846 lockres->l_ops->flags & LOCK_TYPE_REQUIRES_REFRESH)
851 lockres_or_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH); 847 lockres_or_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH);
852 848
853 lockres->l_level = lockres->l_requested; 849 lockres->l_level = lockres->l_requested;
854 lockres_or_flags(lockres, OCFS2_LOCK_ATTACHED); 850 lockres_or_flags(lockres, OCFS2_LOCK_ATTACHED);
855 lockres_clear_flags(lockres, OCFS2_LOCK_BUSY); 851 lockres_clear_flags(lockres, OCFS2_LOCK_BUSY);
856 852
857 mlog_exit_void(); 853 mlog_exit_void();
858 } 854 }
859 855
860 static int ocfs2_generic_handle_bast(struct ocfs2_lock_res *lockres, 856 static int ocfs2_generic_handle_bast(struct ocfs2_lock_res *lockres,
861 int level) 857 int level)
862 { 858 {
863 int needs_downconvert = 0; 859 int needs_downconvert = 0;
864 mlog_entry_void(); 860 mlog_entry_void();
865 861
866 assert_spin_locked(&lockres->l_lock); 862 assert_spin_locked(&lockres->l_lock);
867 863
868 lockres_or_flags(lockres, OCFS2_LOCK_BLOCKED); 864 lockres_or_flags(lockres, OCFS2_LOCK_BLOCKED);
869 865
870 if (level > lockres->l_blocking) { 866 if (level > lockres->l_blocking) {
871 /* only schedule a downconvert if we haven't already scheduled 867 /* only schedule a downconvert if we haven't already scheduled
872 * one that goes low enough to satisfy the level we're 868 * one that goes low enough to satisfy the level we're
873 * blocking. this also catches the case where we get 869 * blocking. this also catches the case where we get
874 * duplicate BASTs */ 870 * duplicate BASTs */
875 if (ocfs2_highest_compat_lock_level(level) < 871 if (ocfs2_highest_compat_lock_level(level) <
876 ocfs2_highest_compat_lock_level(lockres->l_blocking)) 872 ocfs2_highest_compat_lock_level(lockres->l_blocking))
877 needs_downconvert = 1; 873 needs_downconvert = 1;
878 874
879 lockres->l_blocking = level; 875 lockres->l_blocking = level;
880 } 876 }
881 877
882 mlog_exit(needs_downconvert); 878 mlog_exit(needs_downconvert);
883 return needs_downconvert; 879 return needs_downconvert;
884 } 880 }
885 881
886 /* 882 /*
887 * OCFS2_LOCK_PENDING and l_pending_gen. 883 * OCFS2_LOCK_PENDING and l_pending_gen.
888 * 884 *
889 * Why does OCFS2_LOCK_PENDING exist? To close a race between setting 885 * Why does OCFS2_LOCK_PENDING exist? To close a race between setting
890 * OCFS2_LOCK_BUSY and calling ocfs2_dlm_lock(). See ocfs2_unblock_lock() 886 * OCFS2_LOCK_BUSY and calling ocfs2_dlm_lock(). See ocfs2_unblock_lock()
891 * for more details on the race. 887 * for more details on the race.
892 * 888 *
893 * OCFS2_LOCK_PENDING closes the race quite nicely. However, it introduces 889 * OCFS2_LOCK_PENDING closes the race quite nicely. However, it introduces
894 * a race on itself. In o2dlm, we can get the ast before ocfs2_dlm_lock() 890 * a race on itself. In o2dlm, we can get the ast before ocfs2_dlm_lock()
895 * returns. The ast clears OCFS2_LOCK_BUSY, and must therefore clear 891 * returns. The ast clears OCFS2_LOCK_BUSY, and must therefore clear
896 * OCFS2_LOCK_PENDING at the same time. When ocfs2_dlm_lock() returns, 892 * OCFS2_LOCK_PENDING at the same time. When ocfs2_dlm_lock() returns,
897 * the caller is going to try to clear PENDING again. If nothing else is 893 * the caller is going to try to clear PENDING again. If nothing else is
898 * happening, __lockres_clear_pending() sees PENDING is unset and does 894 * happening, __lockres_clear_pending() sees PENDING is unset and does
899 * nothing. 895 * nothing.
900 * 896 *
901 * But what if another path (eg downconvert thread) has just started a 897 * But what if another path (eg downconvert thread) has just started a
902 * new locking action? The other path has re-set PENDING. Our path 898 * new locking action? The other path has re-set PENDING. Our path
903 * cannot clear PENDING, because that will re-open the original race 899 * cannot clear PENDING, because that will re-open the original race
904 * window. 900 * window.
905 * 901 *
906 * [Example] 902 * [Example]
907 * 903 *
908 * ocfs2_meta_lock() 904 * ocfs2_meta_lock()
909 * ocfs2_cluster_lock() 905 * ocfs2_cluster_lock()
910 * set BUSY 906 * set BUSY
911 * set PENDING 907 * set PENDING
912 * drop l_lock 908 * drop l_lock
913 * ocfs2_dlm_lock() 909 * ocfs2_dlm_lock()
914 * ocfs2_locking_ast() ocfs2_downconvert_thread() 910 * ocfs2_locking_ast() ocfs2_downconvert_thread()
915 * clear PENDING ocfs2_unblock_lock() 911 * clear PENDING ocfs2_unblock_lock()
916 * take_l_lock 912 * take_l_lock
917 * !BUSY 913 * !BUSY
918 * ocfs2_prepare_downconvert() 914 * ocfs2_prepare_downconvert()
919 * set BUSY 915 * set BUSY
920 * set PENDING 916 * set PENDING
921 * drop l_lock 917 * drop l_lock
922 * take l_lock 918 * take l_lock
923 * clear PENDING 919 * clear PENDING
924 * drop l_lock 920 * drop l_lock
925 * <window> 921 * <window>
926 * ocfs2_dlm_lock() 922 * ocfs2_dlm_lock()
927 * 923 *
928 * So as you can see, we now have a window where l_lock is not held, 924 * So as you can see, we now have a window where l_lock is not held,
929 * PENDING is not set, and ocfs2_dlm_lock() has not been called. 925 * PENDING is not set, and ocfs2_dlm_lock() has not been called.
930 * 926 *
931 * The core problem is that ocfs2_cluster_lock() has cleared the PENDING 927 * The core problem is that ocfs2_cluster_lock() has cleared the PENDING
932 * set by ocfs2_prepare_downconvert(). That wasn't nice. 928 * set by ocfs2_prepare_downconvert(). That wasn't nice.
933 * 929 *
934 * To solve this we introduce l_pending_gen. A call to 930 * To solve this we introduce l_pending_gen. A call to
935 * lockres_clear_pending() will only do so when it is passed a generation 931 * lockres_clear_pending() will only do so when it is passed a generation
936 * number that matches the lockres. lockres_set_pending() will return the 932 * number that matches the lockres. lockres_set_pending() will return the
937 * current generation number. When ocfs2_cluster_lock() goes to clear 933 * current generation number. When ocfs2_cluster_lock() goes to clear
938 * PENDING, it passes the generation it got from set_pending(). In our 934 * PENDING, it passes the generation it got from set_pending(). In our
939 * example above, the generation numbers will *not* match. Thus, 935 * example above, the generation numbers will *not* match. Thus,
940 * ocfs2_cluster_lock() will not clear the PENDING set by 936 * ocfs2_cluster_lock() will not clear the PENDING set by
941 * ocfs2_prepare_downconvert(). 937 * ocfs2_prepare_downconvert().
942 */ 938 */
943 939
944 /* Unlocked version for ocfs2_locking_ast() */ 940 /* Unlocked version for ocfs2_locking_ast() */
945 static void __lockres_clear_pending(struct ocfs2_lock_res *lockres, 941 static void __lockres_clear_pending(struct ocfs2_lock_res *lockres,
946 unsigned int generation, 942 unsigned int generation,
947 struct ocfs2_super *osb) 943 struct ocfs2_super *osb)
948 { 944 {
949 assert_spin_locked(&lockres->l_lock); 945 assert_spin_locked(&lockres->l_lock);
950 946
951 /* 947 /*
952 * The ast and locking functions can race us here. The winner 948 * The ast and locking functions can race us here. The winner
953 * will clear pending, the loser will not. 949 * will clear pending, the loser will not.
954 */ 950 */
955 if (!(lockres->l_flags & OCFS2_LOCK_PENDING) || 951 if (!(lockres->l_flags & OCFS2_LOCK_PENDING) ||
956 (lockres->l_pending_gen != generation)) 952 (lockres->l_pending_gen != generation))
957 return; 953 return;
958 954
959 lockres_clear_flags(lockres, OCFS2_LOCK_PENDING); 955 lockres_clear_flags(lockres, OCFS2_LOCK_PENDING);
960 lockres->l_pending_gen++; 956 lockres->l_pending_gen++;
961 957
962 /* 958 /*
963 * The downconvert thread may have skipped us because we 959 * The downconvert thread may have skipped us because we
964 * were PENDING. Wake it up. 960 * were PENDING. Wake it up.
965 */ 961 */
966 if (lockres->l_flags & OCFS2_LOCK_BLOCKED) 962 if (lockres->l_flags & OCFS2_LOCK_BLOCKED)
967 ocfs2_wake_downconvert_thread(osb); 963 ocfs2_wake_downconvert_thread(osb);
968 } 964 }
969 965
970 /* Locked version for callers of ocfs2_dlm_lock() */ 966 /* Locked version for callers of ocfs2_dlm_lock() */
971 static void lockres_clear_pending(struct ocfs2_lock_res *lockres, 967 static void lockres_clear_pending(struct ocfs2_lock_res *lockres,
972 unsigned int generation, 968 unsigned int generation,
973 struct ocfs2_super *osb) 969 struct ocfs2_super *osb)
974 { 970 {
975 unsigned long flags; 971 unsigned long flags;
976 972
977 spin_lock_irqsave(&lockres->l_lock, flags); 973 spin_lock_irqsave(&lockres->l_lock, flags);
978 __lockres_clear_pending(lockres, generation, osb); 974 __lockres_clear_pending(lockres, generation, osb);
979 spin_unlock_irqrestore(&lockres->l_lock, flags); 975 spin_unlock_irqrestore(&lockres->l_lock, flags);
980 } 976 }
981 977
982 static unsigned int lockres_set_pending(struct ocfs2_lock_res *lockres) 978 static unsigned int lockres_set_pending(struct ocfs2_lock_res *lockres)
983 { 979 {
984 assert_spin_locked(&lockres->l_lock); 980 assert_spin_locked(&lockres->l_lock);
985 BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BUSY)); 981 BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BUSY));
986 982
987 lockres_or_flags(lockres, OCFS2_LOCK_PENDING); 983 lockres_or_flags(lockres, OCFS2_LOCK_PENDING);
988 984
989 return lockres->l_pending_gen; 985 return lockres->l_pending_gen;
990 } 986 }
991 987
992 988
993 static void ocfs2_blocking_ast(void *opaque, int level) 989 static void ocfs2_blocking_ast(void *opaque, int level)
994 { 990 {
995 struct ocfs2_lock_res *lockres = opaque; 991 struct ocfs2_lock_res *lockres = opaque;
996 struct ocfs2_super *osb = ocfs2_get_lockres_osb(lockres); 992 struct ocfs2_super *osb = ocfs2_get_lockres_osb(lockres);
997 int needs_downconvert; 993 int needs_downconvert;
998 unsigned long flags; 994 unsigned long flags;
999 995
1000 BUG_ON(level <= DLM_LOCK_NL); 996 BUG_ON(level <= DLM_LOCK_NL);
1001 997
1002 mlog(0, "BAST fired for lockres %s, blocking %d, level %d type %s\n", 998 mlog(0, "BAST fired for lockres %s, blocking %d, level %d type %s\n",
1003 lockres->l_name, level, lockres->l_level, 999 lockres->l_name, level, lockres->l_level,
1004 ocfs2_lock_type_string(lockres->l_type)); 1000 ocfs2_lock_type_string(lockres->l_type));
1005 1001
1006 /* 1002 /*
1007 * We can skip the bast for locks which don't enable caching - 1003 * We can skip the bast for locks which don't enable caching -
1008 * they'll be dropped at the earliest possible time anyway. 1004 * they'll be dropped at the earliest possible time anyway.
1009 */ 1005 */
1010 if (lockres->l_flags & OCFS2_LOCK_NOCACHE) 1006 if (lockres->l_flags & OCFS2_LOCK_NOCACHE)
1011 return; 1007 return;
1012 1008
1013 spin_lock_irqsave(&lockres->l_lock, flags); 1009 spin_lock_irqsave(&lockres->l_lock, flags);
1014 needs_downconvert = ocfs2_generic_handle_bast(lockres, level); 1010 needs_downconvert = ocfs2_generic_handle_bast(lockres, level);
1015 if (needs_downconvert) 1011 if (needs_downconvert)
1016 ocfs2_schedule_blocked_lock(osb, lockres); 1012 ocfs2_schedule_blocked_lock(osb, lockres);
1017 spin_unlock_irqrestore(&lockres->l_lock, flags); 1013 spin_unlock_irqrestore(&lockres->l_lock, flags);
1018 1014
1019 wake_up(&lockres->l_event); 1015 wake_up(&lockres->l_event);
1020 1016
1021 ocfs2_wake_downconvert_thread(osb); 1017 ocfs2_wake_downconvert_thread(osb);
1022 } 1018 }
1023 1019
1024 static void ocfs2_locking_ast(void *opaque) 1020 static void ocfs2_locking_ast(void *opaque)
1025 { 1021 {
1026 struct ocfs2_lock_res *lockres = opaque; 1022 struct ocfs2_lock_res *lockres = opaque;
1027 struct ocfs2_super *osb = ocfs2_get_lockres_osb(lockres); 1023 struct ocfs2_super *osb = ocfs2_get_lockres_osb(lockres);
1028 unsigned long flags; 1024 unsigned long flags;
1029 int status; 1025 int status;
1030 1026
1031 spin_lock_irqsave(&lockres->l_lock, flags); 1027 spin_lock_irqsave(&lockres->l_lock, flags);
1032 1028
1033 status = ocfs2_dlm_lock_status(&lockres->l_lksb); 1029 status = ocfs2_dlm_lock_status(&lockres->l_lksb);
1034 1030
1035 if (status == -EAGAIN) { 1031 if (status == -EAGAIN) {
1036 lockres_clear_flags(lockres, OCFS2_LOCK_BUSY); 1032 lockres_clear_flags(lockres, OCFS2_LOCK_BUSY);
1037 goto out; 1033 goto out;
1038 } 1034 }
1039 1035
1040 if (status) { 1036 if (status) {
1041 mlog(ML_ERROR, "lockres %s: lksb status value of %d!\n", 1037 mlog(ML_ERROR, "lockres %s: lksb status value of %d!\n",
1042 lockres->l_name, status); 1038 lockres->l_name, status);
1043 spin_unlock_irqrestore(&lockres->l_lock, flags); 1039 spin_unlock_irqrestore(&lockres->l_lock, flags);
1044 return; 1040 return;
1045 } 1041 }
1046 1042
1047 switch(lockres->l_action) { 1043 switch(lockres->l_action) {
1048 case OCFS2_AST_ATTACH: 1044 case OCFS2_AST_ATTACH:
1049 ocfs2_generic_handle_attach_action(lockres); 1045 ocfs2_generic_handle_attach_action(lockres);
1050 lockres_clear_flags(lockres, OCFS2_LOCK_LOCAL); 1046 lockres_clear_flags(lockres, OCFS2_LOCK_LOCAL);
1051 break; 1047 break;
1052 case OCFS2_AST_CONVERT: 1048 case OCFS2_AST_CONVERT:
1053 ocfs2_generic_handle_convert_action(lockres); 1049 ocfs2_generic_handle_convert_action(lockres);
1054 break; 1050 break;
1055 case OCFS2_AST_DOWNCONVERT: 1051 case OCFS2_AST_DOWNCONVERT:
1056 ocfs2_generic_handle_downconvert_action(lockres); 1052 ocfs2_generic_handle_downconvert_action(lockres);
1057 break; 1053 break;
1058 default: 1054 default:
1059 mlog(ML_ERROR, "lockres %s: ast fired with invalid action: %u " 1055 mlog(ML_ERROR, "lockres %s: ast fired with invalid action: %u "
1060 "lockres flags = 0x%lx, unlock action: %u\n", 1056 "lockres flags = 0x%lx, unlock action: %u\n",
1061 lockres->l_name, lockres->l_action, lockres->l_flags, 1057 lockres->l_name, lockres->l_action, lockres->l_flags,
1062 lockres->l_unlock_action); 1058 lockres->l_unlock_action);
1063 BUG(); 1059 BUG();
1064 } 1060 }
1065 out: 1061 out:
1066 /* set it to something invalid so if we get called again we 1062 /* set it to something invalid so if we get called again we
1067 * can catch it. */ 1063 * can catch it. */
1068 lockres->l_action = OCFS2_AST_INVALID; 1064 lockres->l_action = OCFS2_AST_INVALID;
1069 1065
1070 /* Did we try to cancel this lock? Clear that state */ 1066 /* Did we try to cancel this lock? Clear that state */
1071 if (lockres->l_unlock_action == OCFS2_UNLOCK_CANCEL_CONVERT) 1067 if (lockres->l_unlock_action == OCFS2_UNLOCK_CANCEL_CONVERT)
1072 lockres->l_unlock_action = OCFS2_UNLOCK_INVALID; 1068 lockres->l_unlock_action = OCFS2_UNLOCK_INVALID;
1073 1069
1074 /* 1070 /*
1075 * We may have beaten the locking functions here. We certainly 1071 * We may have beaten the locking functions here. We certainly
1076 * know that dlm_lock() has been called :-) 1072 * know that dlm_lock() has been called :-)
1077 * Because we can't have two lock calls in flight at once, we 1073 * Because we can't have two lock calls in flight at once, we
1078 * can use lockres->l_pending_gen. 1074 * can use lockres->l_pending_gen.
1079 */ 1075 */
1080 __lockres_clear_pending(lockres, lockres->l_pending_gen, osb); 1076 __lockres_clear_pending(lockres, lockres->l_pending_gen, osb);
1081 1077
1082 wake_up(&lockres->l_event); 1078 wake_up(&lockres->l_event);
1083 spin_unlock_irqrestore(&lockres->l_lock, flags); 1079 spin_unlock_irqrestore(&lockres->l_lock, flags);
1084 } 1080 }
1085 1081
1086 static inline void ocfs2_recover_from_dlm_error(struct ocfs2_lock_res *lockres, 1082 static inline void ocfs2_recover_from_dlm_error(struct ocfs2_lock_res *lockres,
1087 int convert) 1083 int convert)
1088 { 1084 {
1089 unsigned long flags; 1085 unsigned long flags;
1090 1086
1091 mlog_entry_void(); 1087 mlog_entry_void();
1092 spin_lock_irqsave(&lockres->l_lock, flags); 1088 spin_lock_irqsave(&lockres->l_lock, flags);
1093 lockres_clear_flags(lockres, OCFS2_LOCK_BUSY); 1089 lockres_clear_flags(lockres, OCFS2_LOCK_BUSY);
1094 if (convert) 1090 if (convert)
1095 lockres->l_action = OCFS2_AST_INVALID; 1091 lockres->l_action = OCFS2_AST_INVALID;
1096 else 1092 else
1097 lockres->l_unlock_action = OCFS2_UNLOCK_INVALID; 1093 lockres->l_unlock_action = OCFS2_UNLOCK_INVALID;
1098 spin_unlock_irqrestore(&lockres->l_lock, flags); 1094 spin_unlock_irqrestore(&lockres->l_lock, flags);
1099 1095
1100 wake_up(&lockres->l_event); 1096 wake_up(&lockres->l_event);
1101 mlog_exit_void(); 1097 mlog_exit_void();
1102 } 1098 }
1103 1099
1104 /* Note: If we detect another process working on the lock (i.e., 1100 /* Note: If we detect another process working on the lock (i.e.,
1105 * OCFS2_LOCK_BUSY), we'll bail out returning 0. It's up to the caller 1101 * OCFS2_LOCK_BUSY), we'll bail out returning 0. It's up to the caller
1106 * to do the right thing in that case. 1102 * to do the right thing in that case.
1107 */ 1103 */
1108 static int ocfs2_lock_create(struct ocfs2_super *osb, 1104 static int ocfs2_lock_create(struct ocfs2_super *osb,
1109 struct ocfs2_lock_res *lockres, 1105 struct ocfs2_lock_res *lockres,
1110 int level, 1106 int level,
1111 u32 dlm_flags) 1107 u32 dlm_flags)
1112 { 1108 {
1113 int ret = 0; 1109 int ret = 0;
1114 unsigned long flags; 1110 unsigned long flags;
1115 unsigned int gen; 1111 unsigned int gen;
1116 1112
1117 mlog_entry_void(); 1113 mlog_entry_void();
1118 1114
1119 mlog(0, "lock %s, level = %d, flags = %u\n", lockres->l_name, level, 1115 mlog(0, "lock %s, level = %d, flags = %u\n", lockres->l_name, level,
1120 dlm_flags); 1116 dlm_flags);
1121 1117
1122 spin_lock_irqsave(&lockres->l_lock, flags); 1118 spin_lock_irqsave(&lockres->l_lock, flags);
1123 if ((lockres->l_flags & OCFS2_LOCK_ATTACHED) || 1119 if ((lockres->l_flags & OCFS2_LOCK_ATTACHED) ||
1124 (lockres->l_flags & OCFS2_LOCK_BUSY)) { 1120 (lockres->l_flags & OCFS2_LOCK_BUSY)) {
1125 spin_unlock_irqrestore(&lockres->l_lock, flags); 1121 spin_unlock_irqrestore(&lockres->l_lock, flags);
1126 goto bail; 1122 goto bail;
1127 } 1123 }
1128 1124
1129 lockres->l_action = OCFS2_AST_ATTACH; 1125 lockres->l_action = OCFS2_AST_ATTACH;
1130 lockres->l_requested = level; 1126 lockres->l_requested = level;
1131 lockres_or_flags(lockres, OCFS2_LOCK_BUSY); 1127 lockres_or_flags(lockres, OCFS2_LOCK_BUSY);
1132 gen = lockres_set_pending(lockres); 1128 gen = lockres_set_pending(lockres);
1133 spin_unlock_irqrestore(&lockres->l_lock, flags); 1129 spin_unlock_irqrestore(&lockres->l_lock, flags);
1134 1130
1135 ret = ocfs2_dlm_lock(osb->cconn, 1131 ret = ocfs2_dlm_lock(osb->cconn,
1136 level, 1132 level,
1137 &lockres->l_lksb, 1133 &lockres->l_lksb,
1138 dlm_flags, 1134 dlm_flags,
1139 lockres->l_name, 1135 lockres->l_name,
1140 OCFS2_LOCK_ID_MAX_LEN - 1, 1136 OCFS2_LOCK_ID_MAX_LEN - 1,
1141 lockres); 1137 lockres);
1142 lockres_clear_pending(lockres, gen, osb); 1138 lockres_clear_pending(lockres, gen, osb);
1143 if (ret) { 1139 if (ret) {
1144 ocfs2_log_dlm_error("ocfs2_dlm_lock", ret, lockres); 1140 ocfs2_log_dlm_error("ocfs2_dlm_lock", ret, lockres);
1145 ocfs2_recover_from_dlm_error(lockres, 1); 1141 ocfs2_recover_from_dlm_error(lockres, 1);
1146 } 1142 }
1147 1143
1148 mlog(0, "lock %s, return from ocfs2_dlm_lock\n", lockres->l_name); 1144 mlog(0, "lock %s, return from ocfs2_dlm_lock\n", lockres->l_name);
1149 1145
1150 bail: 1146 bail:
1151 mlog_exit(ret); 1147 mlog_exit(ret);
1152 return ret; 1148 return ret;
1153 } 1149 }
1154 1150
1155 static inline int ocfs2_check_wait_flag(struct ocfs2_lock_res *lockres, 1151 static inline int ocfs2_check_wait_flag(struct ocfs2_lock_res *lockres,
1156 int flag) 1152 int flag)
1157 { 1153 {
1158 unsigned long flags; 1154 unsigned long flags;
1159 int ret; 1155 int ret;
1160 1156
1161 spin_lock_irqsave(&lockres->l_lock, flags); 1157 spin_lock_irqsave(&lockres->l_lock, flags);
1162 ret = lockres->l_flags & flag; 1158 ret = lockres->l_flags & flag;
1163 spin_unlock_irqrestore(&lockres->l_lock, flags); 1159 spin_unlock_irqrestore(&lockres->l_lock, flags);
1164 1160
1165 return ret; 1161 return ret;
1166 } 1162 }
1167 1163
1168 static inline void ocfs2_wait_on_busy_lock(struct ocfs2_lock_res *lockres) 1164 static inline void ocfs2_wait_on_busy_lock(struct ocfs2_lock_res *lockres)
1169 1165
1170 { 1166 {
1171 wait_event(lockres->l_event, 1167 wait_event(lockres->l_event,
1172 !ocfs2_check_wait_flag(lockres, OCFS2_LOCK_BUSY)); 1168 !ocfs2_check_wait_flag(lockres, OCFS2_LOCK_BUSY));
1173 } 1169 }
1174 1170
1175 static inline void ocfs2_wait_on_refreshing_lock(struct ocfs2_lock_res *lockres) 1171 static inline void ocfs2_wait_on_refreshing_lock(struct ocfs2_lock_res *lockres)
1176 1172
1177 { 1173 {
1178 wait_event(lockres->l_event, 1174 wait_event(lockres->l_event,
1179 !ocfs2_check_wait_flag(lockres, OCFS2_LOCK_REFRESHING)); 1175 !ocfs2_check_wait_flag(lockres, OCFS2_LOCK_REFRESHING));
1180 } 1176 }
1181 1177
1182 /* predict what lock level we'll be dropping down to on behalf 1178 /* predict what lock level we'll be dropping down to on behalf
1183 * of another node, and return true if the currently wanted 1179 * of another node, and return true if the currently wanted
1184 * level will be compatible with it. */ 1180 * level will be compatible with it. */
1185 static inline int ocfs2_may_continue_on_blocked_lock(struct ocfs2_lock_res *lockres, 1181 static inline int ocfs2_may_continue_on_blocked_lock(struct ocfs2_lock_res *lockres,
1186 int wanted) 1182 int wanted)
1187 { 1183 {
1188 BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BLOCKED)); 1184 BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BLOCKED));
1189 1185
1190 return wanted <= ocfs2_highest_compat_lock_level(lockres->l_blocking); 1186 return wanted <= ocfs2_highest_compat_lock_level(lockres->l_blocking);
1191 } 1187 }
1192 1188
1193 static void ocfs2_init_mask_waiter(struct ocfs2_mask_waiter *mw) 1189 static void ocfs2_init_mask_waiter(struct ocfs2_mask_waiter *mw)
1194 { 1190 {
1195 INIT_LIST_HEAD(&mw->mw_item); 1191 INIT_LIST_HEAD(&mw->mw_item);
1196 init_completion(&mw->mw_complete); 1192 init_completion(&mw->mw_complete);
1197 ocfs2_init_start_time(mw); 1193 ocfs2_init_start_time(mw);
1198 } 1194 }
1199 1195
1200 static int ocfs2_wait_for_mask(struct ocfs2_mask_waiter *mw) 1196 static int ocfs2_wait_for_mask(struct ocfs2_mask_waiter *mw)
1201 { 1197 {
1202 wait_for_completion(&mw->mw_complete); 1198 wait_for_completion(&mw->mw_complete);
1203 /* Re-arm the completion in case we want to wait on it again */ 1199 /* Re-arm the completion in case we want to wait on it again */
1204 INIT_COMPLETION(mw->mw_complete); 1200 INIT_COMPLETION(mw->mw_complete);
1205 return mw->mw_status; 1201 return mw->mw_status;
1206 } 1202 }
1207 1203
1208 static void lockres_add_mask_waiter(struct ocfs2_lock_res *lockres, 1204 static void lockres_add_mask_waiter(struct ocfs2_lock_res *lockres,
1209 struct ocfs2_mask_waiter *mw, 1205 struct ocfs2_mask_waiter *mw,
1210 unsigned long mask, 1206 unsigned long mask,
1211 unsigned long goal) 1207 unsigned long goal)
1212 { 1208 {
1213 BUG_ON(!list_empty(&mw->mw_item)); 1209 BUG_ON(!list_empty(&mw->mw_item));
1214 1210
1215 assert_spin_locked(&lockres->l_lock); 1211 assert_spin_locked(&lockres->l_lock);
1216 1212
1217 list_add_tail(&mw->mw_item, &lockres->l_mask_waiters); 1213 list_add_tail(&mw->mw_item, &lockres->l_mask_waiters);
1218 mw->mw_mask = mask; 1214 mw->mw_mask = mask;
1219 mw->mw_goal = goal; 1215 mw->mw_goal = goal;
1220 } 1216 }
1221 1217
1222 /* returns 0 if the mw that was removed was already satisfied, -EBUSY 1218 /* returns 0 if the mw that was removed was already satisfied, -EBUSY
1223 * if the mask still hadn't reached its goal */ 1219 * if the mask still hadn't reached its goal */
1224 static int lockres_remove_mask_waiter(struct ocfs2_lock_res *lockres, 1220 static int lockres_remove_mask_waiter(struct ocfs2_lock_res *lockres,
1225 struct ocfs2_mask_waiter *mw) 1221 struct ocfs2_mask_waiter *mw)
1226 { 1222 {
1227 unsigned long flags; 1223 unsigned long flags;
1228 int ret = 0; 1224 int ret = 0;
1229 1225
1230 spin_lock_irqsave(&lockres->l_lock, flags); 1226 spin_lock_irqsave(&lockres->l_lock, flags);
1231 if (!list_empty(&mw->mw_item)) { 1227 if (!list_empty(&mw->mw_item)) {
1232 if ((lockres->l_flags & mw->mw_mask) != mw->mw_goal) 1228 if ((lockres->l_flags & mw->mw_mask) != mw->mw_goal)
1233 ret = -EBUSY; 1229 ret = -EBUSY;
1234 1230
1235 list_del_init(&mw->mw_item); 1231 list_del_init(&mw->mw_item);
1236 init_completion(&mw->mw_complete); 1232 init_completion(&mw->mw_complete);
1237 } 1233 }
1238 spin_unlock_irqrestore(&lockres->l_lock, flags); 1234 spin_unlock_irqrestore(&lockres->l_lock, flags);
1239 1235
1240 return ret; 1236 return ret;
1241 1237
1242 } 1238 }
1243 1239
1244 static int ocfs2_wait_for_mask_interruptible(struct ocfs2_mask_waiter *mw, 1240 static int ocfs2_wait_for_mask_interruptible(struct ocfs2_mask_waiter *mw,
1245 struct ocfs2_lock_res *lockres) 1241 struct ocfs2_lock_res *lockres)
1246 { 1242 {
1247 int ret; 1243 int ret;
1248 1244
1249 ret = wait_for_completion_interruptible(&mw->mw_complete); 1245 ret = wait_for_completion_interruptible(&mw->mw_complete);
1250 if (ret) 1246 if (ret)
1251 lockres_remove_mask_waiter(lockres, mw); 1247 lockres_remove_mask_waiter(lockres, mw);
1252 else 1248 else
1253 ret = mw->mw_status; 1249 ret = mw->mw_status;
1254 /* Re-arm the completion in case we want to wait on it again */ 1250 /* Re-arm the completion in case we want to wait on it again */
1255 INIT_COMPLETION(mw->mw_complete); 1251 INIT_COMPLETION(mw->mw_complete);
1256 return ret; 1252 return ret;
1257 } 1253 }
1258 1254
1259 static int ocfs2_cluster_lock(struct ocfs2_super *osb, 1255 static int ocfs2_cluster_lock(struct ocfs2_super *osb,
1260 struct ocfs2_lock_res *lockres, 1256 struct ocfs2_lock_res *lockres,
1261 int level, 1257 int level,
1262 u32 lkm_flags, 1258 u32 lkm_flags,
1263 int arg_flags) 1259 int arg_flags)
1264 { 1260 {
1265 struct ocfs2_mask_waiter mw; 1261 struct ocfs2_mask_waiter mw;
1266 int wait, catch_signals = !(osb->s_mount_opt & OCFS2_MOUNT_NOINTR); 1262 int wait, catch_signals = !(osb->s_mount_opt & OCFS2_MOUNT_NOINTR);
1267 int ret = 0; /* gcc doesn't realize wait = 1 guarantees ret is set */ 1263 int ret = 0; /* gcc doesn't realize wait = 1 guarantees ret is set */
1268 unsigned long flags; 1264 unsigned long flags;
1269 unsigned int gen; 1265 unsigned int gen;
1270 int noqueue_attempted = 0; 1266 int noqueue_attempted = 0;
1271 1267
1272 mlog_entry_void(); 1268 mlog_entry_void();
1273 1269
1274 ocfs2_init_mask_waiter(&mw); 1270 ocfs2_init_mask_waiter(&mw);
1275 1271
1276 if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB) 1272 if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB)
1277 lkm_flags |= DLM_LKF_VALBLK; 1273 lkm_flags |= DLM_LKF_VALBLK;
1278 1274
1279 again: 1275 again:
1280 wait = 0; 1276 wait = 0;
1281 1277
1282 if (catch_signals && signal_pending(current)) { 1278 if (catch_signals && signal_pending(current)) {
1283 ret = -ERESTARTSYS; 1279 ret = -ERESTARTSYS;
1284 goto out; 1280 goto out;
1285 } 1281 }
1286 1282
1287 spin_lock_irqsave(&lockres->l_lock, flags); 1283 spin_lock_irqsave(&lockres->l_lock, flags);
1288 1284
1289 mlog_bug_on_msg(lockres->l_flags & OCFS2_LOCK_FREEING, 1285 mlog_bug_on_msg(lockres->l_flags & OCFS2_LOCK_FREEING,
1290 "Cluster lock called on freeing lockres %s! flags " 1286 "Cluster lock called on freeing lockres %s! flags "
1291 "0x%lx\n", lockres->l_name, lockres->l_flags); 1287 "0x%lx\n", lockres->l_name, lockres->l_flags);
1292 1288
1293 /* We only compare against the currently granted level 1289 /* We only compare against the currently granted level
1294 * here. If the lock is blocked waiting on a downconvert, 1290 * here. If the lock is blocked waiting on a downconvert,
1295 * we'll get caught below. */ 1291 * we'll get caught below. */
1296 if (lockres->l_flags & OCFS2_LOCK_BUSY && 1292 if (lockres->l_flags & OCFS2_LOCK_BUSY &&
1297 level > lockres->l_level) { 1293 level > lockres->l_level) {
1298 /* is someone sitting in dlm_lock? If so, wait on 1294 /* is someone sitting in dlm_lock? If so, wait on
1299 * them. */ 1295 * them. */
1300 lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0); 1296 lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0);
1301 wait = 1; 1297 wait = 1;
1302 goto unlock; 1298 goto unlock;
1303 } 1299 }
1304 1300
1305 if (lockres->l_flags & OCFS2_LOCK_BLOCKED && 1301 if (lockres->l_flags & OCFS2_LOCK_BLOCKED &&
1306 !ocfs2_may_continue_on_blocked_lock(lockres, level)) { 1302 !ocfs2_may_continue_on_blocked_lock(lockres, level)) {
1307 /* is the lock is currently blocked on behalf of 1303 /* is the lock is currently blocked on behalf of
1308 * another node */ 1304 * another node */
1309 lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BLOCKED, 0); 1305 lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BLOCKED, 0);
1310 wait = 1; 1306 wait = 1;
1311 goto unlock; 1307 goto unlock;
1312 } 1308 }
1313 1309
1314 if (level > lockres->l_level) { 1310 if (level > lockres->l_level) {
1315 if (noqueue_attempted > 0) { 1311 if (noqueue_attempted > 0) {
1316 ret = -EAGAIN; 1312 ret = -EAGAIN;
1317 goto unlock; 1313 goto unlock;
1318 } 1314 }
1319 if (lkm_flags & DLM_LKF_NOQUEUE) 1315 if (lkm_flags & DLM_LKF_NOQUEUE)
1320 noqueue_attempted = 1; 1316 noqueue_attempted = 1;
1321 1317
1322 if (lockres->l_action != OCFS2_AST_INVALID) 1318 if (lockres->l_action != OCFS2_AST_INVALID)
1323 mlog(ML_ERROR, "lockres %s has action %u pending\n", 1319 mlog(ML_ERROR, "lockres %s has action %u pending\n",
1324 lockres->l_name, lockres->l_action); 1320 lockres->l_name, lockres->l_action);
1325 1321
1326 if (!(lockres->l_flags & OCFS2_LOCK_ATTACHED)) { 1322 if (!(lockres->l_flags & OCFS2_LOCK_ATTACHED)) {
1327 lockres->l_action = OCFS2_AST_ATTACH; 1323 lockres->l_action = OCFS2_AST_ATTACH;
1328 lkm_flags &= ~DLM_LKF_CONVERT; 1324 lkm_flags &= ~DLM_LKF_CONVERT;
1329 } else { 1325 } else {
1330 lockres->l_action = OCFS2_AST_CONVERT; 1326 lockres->l_action = OCFS2_AST_CONVERT;
1331 lkm_flags |= DLM_LKF_CONVERT; 1327 lkm_flags |= DLM_LKF_CONVERT;
1332 } 1328 }
1333 1329
1334 lockres->l_requested = level; 1330 lockres->l_requested = level;
1335 lockres_or_flags(lockres, OCFS2_LOCK_BUSY); 1331 lockres_or_flags(lockres, OCFS2_LOCK_BUSY);
1336 gen = lockres_set_pending(lockres); 1332 gen = lockres_set_pending(lockres);
1337 spin_unlock_irqrestore(&lockres->l_lock, flags); 1333 spin_unlock_irqrestore(&lockres->l_lock, flags);
1338 1334
1339 BUG_ON(level == DLM_LOCK_IV); 1335 BUG_ON(level == DLM_LOCK_IV);
1340 BUG_ON(level == DLM_LOCK_NL); 1336 BUG_ON(level == DLM_LOCK_NL);
1341 1337
1342 mlog(0, "lock %s, convert from %d to level = %d\n", 1338 mlog(0, "lock %s, convert from %d to level = %d\n",
1343 lockres->l_name, lockres->l_level, level); 1339 lockres->l_name, lockres->l_level, level);
1344 1340
1345 /* call dlm_lock to upgrade lock now */ 1341 /* call dlm_lock to upgrade lock now */
1346 ret = ocfs2_dlm_lock(osb->cconn, 1342 ret = ocfs2_dlm_lock(osb->cconn,
1347 level, 1343 level,
1348 &lockres->l_lksb, 1344 &lockres->l_lksb,
1349 lkm_flags, 1345 lkm_flags,
1350 lockres->l_name, 1346 lockres->l_name,
1351 OCFS2_LOCK_ID_MAX_LEN - 1, 1347 OCFS2_LOCK_ID_MAX_LEN - 1,
1352 lockres); 1348 lockres);
1353 lockres_clear_pending(lockres, gen, osb); 1349 lockres_clear_pending(lockres, gen, osb);
1354 if (ret) { 1350 if (ret) {
1355 if (!(lkm_flags & DLM_LKF_NOQUEUE) || 1351 if (!(lkm_flags & DLM_LKF_NOQUEUE) ||
1356 (ret != -EAGAIN)) { 1352 (ret != -EAGAIN)) {
1357 ocfs2_log_dlm_error("ocfs2_dlm_lock", 1353 ocfs2_log_dlm_error("ocfs2_dlm_lock",
1358 ret, lockres); 1354 ret, lockres);
1359 } 1355 }
1360 ocfs2_recover_from_dlm_error(lockres, 1); 1356 ocfs2_recover_from_dlm_error(lockres, 1);
1361 goto out; 1357 goto out;
1362 } 1358 }
1363 1359
1364 mlog(0, "lock %s, successful return from ocfs2_dlm_lock\n", 1360 mlog(0, "lock %s, successful return from ocfs2_dlm_lock\n",
1365 lockres->l_name); 1361 lockres->l_name);
1366 1362
1367 /* At this point we've gone inside the dlm and need to 1363 /* At this point we've gone inside the dlm and need to
1368 * complete our work regardless. */ 1364 * complete our work regardless. */
1369 catch_signals = 0; 1365 catch_signals = 0;
1370 1366
1371 /* wait for busy to clear and carry on */ 1367 /* wait for busy to clear and carry on */
1372 goto again; 1368 goto again;
1373 } 1369 }
1374 1370
1375 /* Ok, if we get here then we're good to go. */ 1371 /* Ok, if we get here then we're good to go. */
1376 ocfs2_inc_holders(lockres, level); 1372 ocfs2_inc_holders(lockres, level);
1377 1373
1378 ret = 0; 1374 ret = 0;
1379 unlock: 1375 unlock:
1380 spin_unlock_irqrestore(&lockres->l_lock, flags); 1376 spin_unlock_irqrestore(&lockres->l_lock, flags);
1381 out: 1377 out:
1382 /* 1378 /*
1383 * This is helping work around a lock inversion between the page lock 1379 * This is helping work around a lock inversion between the page lock
1384 * and dlm locks. One path holds the page lock while calling aops 1380 * and dlm locks. One path holds the page lock while calling aops
1385 * which block acquiring dlm locks. The voting thread holds dlm 1381 * which block acquiring dlm locks. The voting thread holds dlm
1386 * locks while acquiring page locks while down converting data locks. 1382 * locks while acquiring page locks while down converting data locks.
1387 * This block is helping an aop path notice the inversion and back 1383 * This block is helping an aop path notice the inversion and back
1388 * off to unlock its page lock before trying the dlm lock again. 1384 * off to unlock its page lock before trying the dlm lock again.
1389 */ 1385 */
1390 if (wait && arg_flags & OCFS2_LOCK_NONBLOCK && 1386 if (wait && arg_flags & OCFS2_LOCK_NONBLOCK &&
1391 mw.mw_mask & (OCFS2_LOCK_BUSY|OCFS2_LOCK_BLOCKED)) { 1387 mw.mw_mask & (OCFS2_LOCK_BUSY|OCFS2_LOCK_BLOCKED)) {
1392 wait = 0; 1388 wait = 0;
1393 if (lockres_remove_mask_waiter(lockres, &mw)) 1389 if (lockres_remove_mask_waiter(lockres, &mw))
1394 ret = -EAGAIN; 1390 ret = -EAGAIN;
1395 else 1391 else
1396 goto again; 1392 goto again;
1397 } 1393 }
1398 if (wait) { 1394 if (wait) {
1399 ret = ocfs2_wait_for_mask(&mw); 1395 ret = ocfs2_wait_for_mask(&mw);
1400 if (ret == 0) 1396 if (ret == 0)
1401 goto again; 1397 goto again;
1402 mlog_errno(ret); 1398 mlog_errno(ret);
1403 } 1399 }
1404 ocfs2_update_lock_stats(lockres, level, &mw, ret); 1400 ocfs2_update_lock_stats(lockres, level, &mw, ret);
1405 1401
1406 mlog_exit(ret); 1402 mlog_exit(ret);
1407 return ret; 1403 return ret;
1408 } 1404 }
1409 1405
1410 static void ocfs2_cluster_unlock(struct ocfs2_super *osb, 1406 static void ocfs2_cluster_unlock(struct ocfs2_super *osb,
1411 struct ocfs2_lock_res *lockres, 1407 struct ocfs2_lock_res *lockres,
1412 int level) 1408 int level)
1413 { 1409 {
1414 unsigned long flags; 1410 unsigned long flags;
1415 1411
1416 mlog_entry_void(); 1412 mlog_entry_void();
1417 spin_lock_irqsave(&lockres->l_lock, flags); 1413 spin_lock_irqsave(&lockres->l_lock, flags);
1418 ocfs2_dec_holders(lockres, level); 1414 ocfs2_dec_holders(lockres, level);
1419 ocfs2_downconvert_on_unlock(osb, lockres); 1415 ocfs2_downconvert_on_unlock(osb, lockres);
1420 spin_unlock_irqrestore(&lockres->l_lock, flags); 1416 spin_unlock_irqrestore(&lockres->l_lock, flags);
1421 mlog_exit_void(); 1417 mlog_exit_void();
1422 } 1418 }
1423 1419
1424 static int ocfs2_create_new_lock(struct ocfs2_super *osb, 1420 static int ocfs2_create_new_lock(struct ocfs2_super *osb,
1425 struct ocfs2_lock_res *lockres, 1421 struct ocfs2_lock_res *lockres,
1426 int ex, 1422 int ex,
1427 int local) 1423 int local)
1428 { 1424 {
1429 int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; 1425 int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
1430 unsigned long flags; 1426 unsigned long flags;
1431 u32 lkm_flags = local ? DLM_LKF_LOCAL : 0; 1427 u32 lkm_flags = local ? DLM_LKF_LOCAL : 0;
1432 1428
1433 spin_lock_irqsave(&lockres->l_lock, flags); 1429 spin_lock_irqsave(&lockres->l_lock, flags);
1434 BUG_ON(lockres->l_flags & OCFS2_LOCK_ATTACHED); 1430 BUG_ON(lockres->l_flags & OCFS2_LOCK_ATTACHED);
1435 lockres_or_flags(lockres, OCFS2_LOCK_LOCAL); 1431 lockres_or_flags(lockres, OCFS2_LOCK_LOCAL);
1436 spin_unlock_irqrestore(&lockres->l_lock, flags); 1432 spin_unlock_irqrestore(&lockres->l_lock, flags);
1437 1433
1438 return ocfs2_lock_create(osb, lockres, level, lkm_flags); 1434 return ocfs2_lock_create(osb, lockres, level, lkm_flags);
1439 } 1435 }
1440 1436
1441 /* Grants us an EX lock on the data and metadata resources, skipping 1437 /* Grants us an EX lock on the data and metadata resources, skipping
1442 * the normal cluster directory lookup. Use this ONLY on newly created 1438 * the normal cluster directory lookup. Use this ONLY on newly created
1443 * inodes which other nodes can't possibly see, and which haven't been 1439 * inodes which other nodes can't possibly see, and which haven't been
1444 * hashed in the inode hash yet. This can give us a good performance 1440 * hashed in the inode hash yet. This can give us a good performance
1445 * increase as it'll skip the network broadcast normally associated 1441 * increase as it'll skip the network broadcast normally associated
1446 * with creating a new lock resource. */ 1442 * with creating a new lock resource. */
1447 int ocfs2_create_new_inode_locks(struct inode *inode) 1443 int ocfs2_create_new_inode_locks(struct inode *inode)
1448 { 1444 {
1449 int ret; 1445 int ret;
1450 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 1446 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1451 1447
1452 BUG_ON(!inode); 1448 BUG_ON(!inode);
1453 BUG_ON(!ocfs2_inode_is_new(inode)); 1449 BUG_ON(!ocfs2_inode_is_new(inode));
1454 1450
1455 mlog_entry_void(); 1451 mlog_entry_void();
1456 1452
1457 mlog(0, "Inode %llu\n", (unsigned long long)OCFS2_I(inode)->ip_blkno); 1453 mlog(0, "Inode %llu\n", (unsigned long long)OCFS2_I(inode)->ip_blkno);
1458 1454
1459 /* NOTE: That we don't increment any of the holder counts, nor 1455 /* NOTE: That we don't increment any of the holder counts, nor
1460 * do we add anything to a journal handle. Since this is 1456 * do we add anything to a journal handle. Since this is
1461 * supposed to be a new inode which the cluster doesn't know 1457 * supposed to be a new inode which the cluster doesn't know
1462 * about yet, there is no need to. As far as the LVB handling 1458 * about yet, there is no need to. As far as the LVB handling
1463 * is concerned, this is basically like acquiring an EX lock 1459 * is concerned, this is basically like acquiring an EX lock
1464 * on a resource which has an invalid one -- we'll set it 1460 * on a resource which has an invalid one -- we'll set it
1465 * valid when we release the EX. */ 1461 * valid when we release the EX. */
1466 1462
1467 ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_rw_lockres, 1, 1); 1463 ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_rw_lockres, 1, 1);
1468 if (ret) { 1464 if (ret) {
1469 mlog_errno(ret); 1465 mlog_errno(ret);
1470 goto bail; 1466 goto bail;
1471 } 1467 }
1472 1468
1473 /* 1469 /*
1474 * We don't want to use DLM_LKF_LOCAL on a meta data lock as they 1470 * We don't want to use DLM_LKF_LOCAL on a meta data lock as they
1475 * don't use a generation in their lock names. 1471 * don't use a generation in their lock names.
1476 */ 1472 */
1477 ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_inode_lockres, 1, 0); 1473 ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_inode_lockres, 1, 0);
1478 if (ret) { 1474 if (ret) {
1479 mlog_errno(ret); 1475 mlog_errno(ret);
1480 goto bail; 1476 goto bail;
1481 } 1477 }
1482 1478
1483 ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_open_lockres, 0, 0); 1479 ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_open_lockres, 0, 0);
1484 if (ret) { 1480 if (ret) {
1485 mlog_errno(ret); 1481 mlog_errno(ret);
1486 goto bail; 1482 goto bail;
1487 } 1483 }
1488 1484
1489 bail: 1485 bail:
1490 mlog_exit(ret); 1486 mlog_exit(ret);
1491 return ret; 1487 return ret;
1492 } 1488 }
1493 1489
1494 int ocfs2_rw_lock(struct inode *inode, int write) 1490 int ocfs2_rw_lock(struct inode *inode, int write)
1495 { 1491 {
1496 int status, level; 1492 int status, level;
1497 struct ocfs2_lock_res *lockres; 1493 struct ocfs2_lock_res *lockres;
1498 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 1494 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1499 1495
1500 BUG_ON(!inode); 1496 BUG_ON(!inode);
1501 1497
1502 mlog_entry_void(); 1498 mlog_entry_void();
1503 1499
1504 mlog(0, "inode %llu take %s RW lock\n", 1500 mlog(0, "inode %llu take %s RW lock\n",
1505 (unsigned long long)OCFS2_I(inode)->ip_blkno, 1501 (unsigned long long)OCFS2_I(inode)->ip_blkno,
1506 write ? "EXMODE" : "PRMODE"); 1502 write ? "EXMODE" : "PRMODE");
1507 1503
1508 if (ocfs2_mount_local(osb)) 1504 if (ocfs2_mount_local(osb))
1509 return 0; 1505 return 0;
1510 1506
1511 lockres = &OCFS2_I(inode)->ip_rw_lockres; 1507 lockres = &OCFS2_I(inode)->ip_rw_lockres;
1512 1508
1513 level = write ? DLM_LOCK_EX : DLM_LOCK_PR; 1509 level = write ? DLM_LOCK_EX : DLM_LOCK_PR;
1514 1510
1515 status = ocfs2_cluster_lock(OCFS2_SB(inode->i_sb), lockres, level, 0, 1511 status = ocfs2_cluster_lock(OCFS2_SB(inode->i_sb), lockres, level, 0,
1516 0); 1512 0);
1517 if (status < 0) 1513 if (status < 0)
1518 mlog_errno(status); 1514 mlog_errno(status);
1519 1515
1520 mlog_exit(status); 1516 mlog_exit(status);
1521 return status; 1517 return status;
1522 } 1518 }
1523 1519
1524 void ocfs2_rw_unlock(struct inode *inode, int write) 1520 void ocfs2_rw_unlock(struct inode *inode, int write)
1525 { 1521 {
1526 int level = write ? DLM_LOCK_EX : DLM_LOCK_PR; 1522 int level = write ? DLM_LOCK_EX : DLM_LOCK_PR;
1527 struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_rw_lockres; 1523 struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_rw_lockres;
1528 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 1524 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1529 1525
1530 mlog_entry_void(); 1526 mlog_entry_void();
1531 1527
1532 mlog(0, "inode %llu drop %s RW lock\n", 1528 mlog(0, "inode %llu drop %s RW lock\n",
1533 (unsigned long long)OCFS2_I(inode)->ip_blkno, 1529 (unsigned long long)OCFS2_I(inode)->ip_blkno,
1534 write ? "EXMODE" : "PRMODE"); 1530 write ? "EXMODE" : "PRMODE");
1535 1531
1536 if (!ocfs2_mount_local(osb)) 1532 if (!ocfs2_mount_local(osb))
1537 ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres, level); 1533 ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres, level);
1538 1534
1539 mlog_exit_void(); 1535 mlog_exit_void();
1540 } 1536 }
1541 1537
1542 /* 1538 /*
1543 * ocfs2_open_lock always get PR mode lock. 1539 * ocfs2_open_lock always get PR mode lock.
1544 */ 1540 */
1545 int ocfs2_open_lock(struct inode *inode) 1541 int ocfs2_open_lock(struct inode *inode)
1546 { 1542 {
1547 int status = 0; 1543 int status = 0;
1548 struct ocfs2_lock_res *lockres; 1544 struct ocfs2_lock_res *lockres;
1549 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 1545 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1550 1546
1551 BUG_ON(!inode); 1547 BUG_ON(!inode);
1552 1548
1553 mlog_entry_void(); 1549 mlog_entry_void();
1554 1550
1555 mlog(0, "inode %llu take PRMODE open lock\n", 1551 mlog(0, "inode %llu take PRMODE open lock\n",
1556 (unsigned long long)OCFS2_I(inode)->ip_blkno); 1552 (unsigned long long)OCFS2_I(inode)->ip_blkno);
1557 1553
1558 if (ocfs2_mount_local(osb)) 1554 if (ocfs2_mount_local(osb))
1559 goto out; 1555 goto out;
1560 1556
1561 lockres = &OCFS2_I(inode)->ip_open_lockres; 1557 lockres = &OCFS2_I(inode)->ip_open_lockres;
1562 1558
1563 status = ocfs2_cluster_lock(OCFS2_SB(inode->i_sb), lockres, 1559 status = ocfs2_cluster_lock(OCFS2_SB(inode->i_sb), lockres,
1564 DLM_LOCK_PR, 0, 0); 1560 DLM_LOCK_PR, 0, 0);
1565 if (status < 0) 1561 if (status < 0)
1566 mlog_errno(status); 1562 mlog_errno(status);
1567 1563
1568 out: 1564 out:
1569 mlog_exit(status); 1565 mlog_exit(status);
1570 return status; 1566 return status;
1571 } 1567 }
1572 1568
1573 int ocfs2_try_open_lock(struct inode *inode, int write) 1569 int ocfs2_try_open_lock(struct inode *inode, int write)
1574 { 1570 {
1575 int status = 0, level; 1571 int status = 0, level;
1576 struct ocfs2_lock_res *lockres; 1572 struct ocfs2_lock_res *lockres;
1577 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 1573 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1578 1574
1579 BUG_ON(!inode); 1575 BUG_ON(!inode);
1580 1576
1581 mlog_entry_void(); 1577 mlog_entry_void();
1582 1578
1583 mlog(0, "inode %llu try to take %s open lock\n", 1579 mlog(0, "inode %llu try to take %s open lock\n",
1584 (unsigned long long)OCFS2_I(inode)->ip_blkno, 1580 (unsigned long long)OCFS2_I(inode)->ip_blkno,
1585 write ? "EXMODE" : "PRMODE"); 1581 write ? "EXMODE" : "PRMODE");
1586 1582
1587 if (ocfs2_mount_local(osb)) 1583 if (ocfs2_mount_local(osb))
1588 goto out; 1584 goto out;
1589 1585
1590 lockres = &OCFS2_I(inode)->ip_open_lockres; 1586 lockres = &OCFS2_I(inode)->ip_open_lockres;
1591 1587
1592 level = write ? DLM_LOCK_EX : DLM_LOCK_PR; 1588 level = write ? DLM_LOCK_EX : DLM_LOCK_PR;
1593 1589
1594 /* 1590 /*
1595 * The file system may already holding a PRMODE/EXMODE open lock. 1591 * The file system may already holding a PRMODE/EXMODE open lock.
1596 * Since we pass DLM_LKF_NOQUEUE, the request won't block waiting on 1592 * Since we pass DLM_LKF_NOQUEUE, the request won't block waiting on
1597 * other nodes and the -EAGAIN will indicate to the caller that 1593 * other nodes and the -EAGAIN will indicate to the caller that
1598 * this inode is still in use. 1594 * this inode is still in use.
1599 */ 1595 */
1600 status = ocfs2_cluster_lock(OCFS2_SB(inode->i_sb), lockres, 1596 status = ocfs2_cluster_lock(OCFS2_SB(inode->i_sb), lockres,
1601 level, DLM_LKF_NOQUEUE, 0); 1597 level, DLM_LKF_NOQUEUE, 0);
1602 1598
1603 out: 1599 out:
1604 mlog_exit(status); 1600 mlog_exit(status);
1605 return status; 1601 return status;
1606 } 1602 }
1607 1603
1608 /* 1604 /*
1609 * ocfs2_open_unlock unlock PR and EX mode open locks. 1605 * ocfs2_open_unlock unlock PR and EX mode open locks.
1610 */ 1606 */
1611 void ocfs2_open_unlock(struct inode *inode) 1607 void ocfs2_open_unlock(struct inode *inode)
1612 { 1608 {
1613 struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_open_lockres; 1609 struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_open_lockres;
1614 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 1610 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1615 1611
1616 mlog_entry_void(); 1612 mlog_entry_void();
1617 1613
1618 mlog(0, "inode %llu drop open lock\n", 1614 mlog(0, "inode %llu drop open lock\n",
1619 (unsigned long long)OCFS2_I(inode)->ip_blkno); 1615 (unsigned long long)OCFS2_I(inode)->ip_blkno);
1620 1616
1621 if (ocfs2_mount_local(osb)) 1617 if (ocfs2_mount_local(osb))
1622 goto out; 1618 goto out;
1623 1619
1624 if(lockres->l_ro_holders) 1620 if(lockres->l_ro_holders)
1625 ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres, 1621 ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres,
1626 DLM_LOCK_PR); 1622 DLM_LOCK_PR);
1627 if(lockres->l_ex_holders) 1623 if(lockres->l_ex_holders)
1628 ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres, 1624 ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres,
1629 DLM_LOCK_EX); 1625 DLM_LOCK_EX);
1630 1626
1631 out: 1627 out:
1632 mlog_exit_void(); 1628 mlog_exit_void();
1633 } 1629 }
1634 1630
1635 static int ocfs2_flock_handle_signal(struct ocfs2_lock_res *lockres, 1631 static int ocfs2_flock_handle_signal(struct ocfs2_lock_res *lockres,
1636 int level) 1632 int level)
1637 { 1633 {
1638 int ret; 1634 int ret;
1639 struct ocfs2_super *osb = ocfs2_get_lockres_osb(lockres); 1635 struct ocfs2_super *osb = ocfs2_get_lockres_osb(lockres);
1640 unsigned long flags; 1636 unsigned long flags;
1641 struct ocfs2_mask_waiter mw; 1637 struct ocfs2_mask_waiter mw;
1642 1638
1643 ocfs2_init_mask_waiter(&mw); 1639 ocfs2_init_mask_waiter(&mw);
1644 1640
1645 retry_cancel: 1641 retry_cancel:
1646 spin_lock_irqsave(&lockres->l_lock, flags); 1642 spin_lock_irqsave(&lockres->l_lock, flags);
1647 if (lockres->l_flags & OCFS2_LOCK_BUSY) { 1643 if (lockres->l_flags & OCFS2_LOCK_BUSY) {
1648 ret = ocfs2_prepare_cancel_convert(osb, lockres); 1644 ret = ocfs2_prepare_cancel_convert(osb, lockres);
1649 if (ret) { 1645 if (ret) {
1650 spin_unlock_irqrestore(&lockres->l_lock, flags); 1646 spin_unlock_irqrestore(&lockres->l_lock, flags);
1651 ret = ocfs2_cancel_convert(osb, lockres); 1647 ret = ocfs2_cancel_convert(osb, lockres);
1652 if (ret < 0) { 1648 if (ret < 0) {
1653 mlog_errno(ret); 1649 mlog_errno(ret);
1654 goto out; 1650 goto out;
1655 } 1651 }
1656 goto retry_cancel; 1652 goto retry_cancel;
1657 } 1653 }
1658 lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0); 1654 lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0);
1659 spin_unlock_irqrestore(&lockres->l_lock, flags); 1655 spin_unlock_irqrestore(&lockres->l_lock, flags);
1660 1656
1661 ocfs2_wait_for_mask(&mw); 1657 ocfs2_wait_for_mask(&mw);
1662 goto retry_cancel; 1658 goto retry_cancel;
1663 } 1659 }
1664 1660
1665 ret = -ERESTARTSYS; 1661 ret = -ERESTARTSYS;
1666 /* 1662 /*
1667 * We may still have gotten the lock, in which case there's no 1663 * We may still have gotten the lock, in which case there's no
1668 * point to restarting the syscall. 1664 * point to restarting the syscall.
1669 */ 1665 */
1670 if (lockres->l_level == level) 1666 if (lockres->l_level == level)
1671 ret = 0; 1667 ret = 0;
1672 1668
1673 mlog(0, "Cancel returning %d. flags: 0x%lx, level: %d, act: %d\n", ret, 1669 mlog(0, "Cancel returning %d. flags: 0x%lx, level: %d, act: %d\n", ret,
1674 lockres->l_flags, lockres->l_level, lockres->l_action); 1670 lockres->l_flags, lockres->l_level, lockres->l_action);
1675 1671
1676 spin_unlock_irqrestore(&lockres->l_lock, flags); 1672 spin_unlock_irqrestore(&lockres->l_lock, flags);
1677 1673
1678 out: 1674 out:
1679 return ret; 1675 return ret;
1680 } 1676 }
1681 1677
1682 /* 1678 /*
1683 * ocfs2_file_lock() and ocfs2_file_unlock() map to a single pair of 1679 * ocfs2_file_lock() and ocfs2_file_unlock() map to a single pair of
1684 * flock() calls. The locking approach this requires is sufficiently 1680 * flock() calls. The locking approach this requires is sufficiently
1685 * different from all other cluster lock types that we implement a 1681 * different from all other cluster lock types that we implement a
1686 * seperate path to the "low-level" dlm calls. In particular: 1682 * seperate path to the "low-level" dlm calls. In particular:
1687 * 1683 *
1688 * - No optimization of lock levels is done - we take at exactly 1684 * - No optimization of lock levels is done - we take at exactly
1689 * what's been requested. 1685 * what's been requested.
1690 * 1686 *
1691 * - No lock caching is employed. We immediately downconvert to 1687 * - No lock caching is employed. We immediately downconvert to
1692 * no-lock at unlock time. This also means flock locks never go on 1688 * no-lock at unlock time. This also means flock locks never go on
1693 * the blocking list). 1689 * the blocking list).
1694 * 1690 *
1695 * - Since userspace can trivially deadlock itself with flock, we make 1691 * - Since userspace can trivially deadlock itself with flock, we make
1696 * sure to allow cancellation of a misbehaving applications flock() 1692 * sure to allow cancellation of a misbehaving applications flock()
1697 * request. 1693 * request.
1698 * 1694 *
1699 * - Access to any flock lockres doesn't require concurrency, so we 1695 * - Access to any flock lockres doesn't require concurrency, so we
1700 * can simplify the code by requiring the caller to guarantee 1696 * can simplify the code by requiring the caller to guarantee
1701 * serialization of dlmglue flock calls. 1697 * serialization of dlmglue flock calls.
1702 */ 1698 */
1703 int ocfs2_file_lock(struct file *file, int ex, int trylock) 1699 int ocfs2_file_lock(struct file *file, int ex, int trylock)
1704 { 1700 {
1705 int ret, level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; 1701 int ret, level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
1706 unsigned int lkm_flags = trylock ? DLM_LKF_NOQUEUE : 0; 1702 unsigned int lkm_flags = trylock ? DLM_LKF_NOQUEUE : 0;
1707 unsigned long flags; 1703 unsigned long flags;
1708 struct ocfs2_file_private *fp = file->private_data; 1704 struct ocfs2_file_private *fp = file->private_data;
1709 struct ocfs2_lock_res *lockres = &fp->fp_flock; 1705 struct ocfs2_lock_res *lockres = &fp->fp_flock;
1710 struct ocfs2_super *osb = OCFS2_SB(file->f_mapping->host->i_sb); 1706 struct ocfs2_super *osb = OCFS2_SB(file->f_mapping->host->i_sb);
1711 struct ocfs2_mask_waiter mw; 1707 struct ocfs2_mask_waiter mw;
1712 1708
1713 ocfs2_init_mask_waiter(&mw); 1709 ocfs2_init_mask_waiter(&mw);
1714 1710
1715 if ((lockres->l_flags & OCFS2_LOCK_BUSY) || 1711 if ((lockres->l_flags & OCFS2_LOCK_BUSY) ||
1716 (lockres->l_level > DLM_LOCK_NL)) { 1712 (lockres->l_level > DLM_LOCK_NL)) {
1717 mlog(ML_ERROR, 1713 mlog(ML_ERROR,
1718 "File lock \"%s\" has busy or locked state: flags: 0x%lx, " 1714 "File lock \"%s\" has busy or locked state: flags: 0x%lx, "
1719 "level: %u\n", lockres->l_name, lockres->l_flags, 1715 "level: %u\n", lockres->l_name, lockres->l_flags,
1720 lockres->l_level); 1716 lockres->l_level);
1721 return -EINVAL; 1717 return -EINVAL;
1722 } 1718 }
1723 1719
1724 spin_lock_irqsave(&lockres->l_lock, flags); 1720 spin_lock_irqsave(&lockres->l_lock, flags);
1725 if (!(lockres->l_flags & OCFS2_LOCK_ATTACHED)) { 1721 if (!(lockres->l_flags & OCFS2_LOCK_ATTACHED)) {
1726 lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0); 1722 lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0);
1727 spin_unlock_irqrestore(&lockres->l_lock, flags); 1723 spin_unlock_irqrestore(&lockres->l_lock, flags);
1728 1724
1729 /* 1725 /*
1730 * Get the lock at NLMODE to start - that way we 1726 * Get the lock at NLMODE to start - that way we
1731 * can cancel the upconvert request if need be. 1727 * can cancel the upconvert request if need be.
1732 */ 1728 */
1733 ret = ocfs2_lock_create(osb, lockres, DLM_LOCK_NL, 0); 1729 ret = ocfs2_lock_create(osb, lockres, DLM_LOCK_NL, 0);
1734 if (ret < 0) { 1730 if (ret < 0) {
1735 mlog_errno(ret); 1731 mlog_errno(ret);
1736 goto out; 1732 goto out;
1737 } 1733 }
1738 1734
1739 ret = ocfs2_wait_for_mask(&mw); 1735 ret = ocfs2_wait_for_mask(&mw);
1740 if (ret) { 1736 if (ret) {
1741 mlog_errno(ret); 1737 mlog_errno(ret);
1742 goto out; 1738 goto out;
1743 } 1739 }
1744 spin_lock_irqsave(&lockres->l_lock, flags); 1740 spin_lock_irqsave(&lockres->l_lock, flags);
1745 } 1741 }
1746 1742
1747 lockres->l_action = OCFS2_AST_CONVERT; 1743 lockres->l_action = OCFS2_AST_CONVERT;
1748 lkm_flags |= DLM_LKF_CONVERT; 1744 lkm_flags |= DLM_LKF_CONVERT;
1749 lockres->l_requested = level; 1745 lockres->l_requested = level;
1750 lockres_or_flags(lockres, OCFS2_LOCK_BUSY); 1746 lockres_or_flags(lockres, OCFS2_LOCK_BUSY);
1751 1747
1752 lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0); 1748 lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0);
1753 spin_unlock_irqrestore(&lockres->l_lock, flags); 1749 spin_unlock_irqrestore(&lockres->l_lock, flags);
1754 1750
1755 ret = ocfs2_dlm_lock(osb->cconn, level, &lockres->l_lksb, lkm_flags, 1751 ret = ocfs2_dlm_lock(osb->cconn, level, &lockres->l_lksb, lkm_flags,
1756 lockres->l_name, OCFS2_LOCK_ID_MAX_LEN - 1, 1752 lockres->l_name, OCFS2_LOCK_ID_MAX_LEN - 1,
1757 lockres); 1753 lockres);
1758 if (ret) { 1754 if (ret) {
1759 if (!trylock || (ret != -EAGAIN)) { 1755 if (!trylock || (ret != -EAGAIN)) {
1760 ocfs2_log_dlm_error("ocfs2_dlm_lock", ret, lockres); 1756 ocfs2_log_dlm_error("ocfs2_dlm_lock", ret, lockres);
1761 ret = -EINVAL; 1757 ret = -EINVAL;
1762 } 1758 }
1763 1759
1764 ocfs2_recover_from_dlm_error(lockres, 1); 1760 ocfs2_recover_from_dlm_error(lockres, 1);
1765 lockres_remove_mask_waiter(lockres, &mw); 1761 lockres_remove_mask_waiter(lockres, &mw);
1766 goto out; 1762 goto out;
1767 } 1763 }
1768 1764
1769 ret = ocfs2_wait_for_mask_interruptible(&mw, lockres); 1765 ret = ocfs2_wait_for_mask_interruptible(&mw, lockres);
1770 if (ret == -ERESTARTSYS) { 1766 if (ret == -ERESTARTSYS) {
1771 /* 1767 /*
1772 * Userspace can cause deadlock itself with 1768 * Userspace can cause deadlock itself with
1773 * flock(). Current behavior locally is to allow the 1769 * flock(). Current behavior locally is to allow the
1774 * deadlock, but abort the system call if a signal is 1770 * deadlock, but abort the system call if a signal is
1775 * received. We follow this example, otherwise a 1771 * received. We follow this example, otherwise a
1776 * poorly written program could sit in kernel until 1772 * poorly written program could sit in kernel until
1777 * reboot. 1773 * reboot.
1778 * 1774 *
1779 * Handling this is a bit more complicated for Ocfs2 1775 * Handling this is a bit more complicated for Ocfs2
1780 * though. We can't exit this function with an 1776 * though. We can't exit this function with an
1781 * outstanding lock request, so a cancel convert is 1777 * outstanding lock request, so a cancel convert is
1782 * required. We intentionally overwrite 'ret' - if the 1778 * required. We intentionally overwrite 'ret' - if the
1783 * cancel fails and the lock was granted, it's easier 1779 * cancel fails and the lock was granted, it's easier
1784 * to just bubble sucess back up to the user. 1780 * to just bubble sucess back up to the user.
1785 */ 1781 */
1786 ret = ocfs2_flock_handle_signal(lockres, level); 1782 ret = ocfs2_flock_handle_signal(lockres, level);
1787 } else if (!ret && (level > lockres->l_level)) { 1783 } else if (!ret && (level > lockres->l_level)) {
1788 /* Trylock failed asynchronously */ 1784 /* Trylock failed asynchronously */
1789 BUG_ON(!trylock); 1785 BUG_ON(!trylock);
1790 ret = -EAGAIN; 1786 ret = -EAGAIN;
1791 } 1787 }
1792 1788
1793 out: 1789 out:
1794 1790
1795 mlog(0, "Lock: \"%s\" ex: %d, trylock: %d, returns: %d\n", 1791 mlog(0, "Lock: \"%s\" ex: %d, trylock: %d, returns: %d\n",
1796 lockres->l_name, ex, trylock, ret); 1792 lockres->l_name, ex, trylock, ret);
1797 return ret; 1793 return ret;
1798 } 1794 }
1799 1795
1800 void ocfs2_file_unlock(struct file *file) 1796 void ocfs2_file_unlock(struct file *file)
1801 { 1797 {
1802 int ret; 1798 int ret;
1803 unsigned int gen; 1799 unsigned int gen;
1804 unsigned long flags; 1800 unsigned long flags;
1805 struct ocfs2_file_private *fp = file->private_data; 1801 struct ocfs2_file_private *fp = file->private_data;
1806 struct ocfs2_lock_res *lockres = &fp->fp_flock; 1802 struct ocfs2_lock_res *lockres = &fp->fp_flock;
1807 struct ocfs2_super *osb = OCFS2_SB(file->f_mapping->host->i_sb); 1803 struct ocfs2_super *osb = OCFS2_SB(file->f_mapping->host->i_sb);
1808 struct ocfs2_mask_waiter mw; 1804 struct ocfs2_mask_waiter mw;
1809 1805
1810 ocfs2_init_mask_waiter(&mw); 1806 ocfs2_init_mask_waiter(&mw);
1811 1807
1812 if (!(lockres->l_flags & OCFS2_LOCK_ATTACHED)) 1808 if (!(lockres->l_flags & OCFS2_LOCK_ATTACHED))
1813 return; 1809 return;
1814 1810
1815 if (lockres->l_level == DLM_LOCK_NL) 1811 if (lockres->l_level == DLM_LOCK_NL)
1816 return; 1812 return;
1817 1813
1818 mlog(0, "Unlock: \"%s\" flags: 0x%lx, level: %d, act: %d\n", 1814 mlog(0, "Unlock: \"%s\" flags: 0x%lx, level: %d, act: %d\n",
1819 lockres->l_name, lockres->l_flags, lockres->l_level, 1815 lockres->l_name, lockres->l_flags, lockres->l_level,
1820 lockres->l_action); 1816 lockres->l_action);
1821 1817
1822 spin_lock_irqsave(&lockres->l_lock, flags); 1818 spin_lock_irqsave(&lockres->l_lock, flags);
1823 /* 1819 /*
1824 * Fake a blocking ast for the downconvert code. 1820 * Fake a blocking ast for the downconvert code.
1825 */ 1821 */
1826 lockres_or_flags(lockres, OCFS2_LOCK_BLOCKED); 1822 lockres_or_flags(lockres, OCFS2_LOCK_BLOCKED);
1827 lockres->l_blocking = DLM_LOCK_EX; 1823 lockres->l_blocking = DLM_LOCK_EX;
1828 1824
1829 gen = ocfs2_prepare_downconvert(lockres, DLM_LOCK_NL); 1825 gen = ocfs2_prepare_downconvert(lockres, DLM_LOCK_NL);
1830 lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0); 1826 lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0);
1831 spin_unlock_irqrestore(&lockres->l_lock, flags); 1827 spin_unlock_irqrestore(&lockres->l_lock, flags);
1832 1828
1833 ret = ocfs2_downconvert_lock(osb, lockres, DLM_LOCK_NL, 0, gen); 1829 ret = ocfs2_downconvert_lock(osb, lockres, DLM_LOCK_NL, 0, gen);
1834 if (ret) { 1830 if (ret) {
1835 mlog_errno(ret); 1831 mlog_errno(ret);
1836 return; 1832 return;
1837 } 1833 }
1838 1834
1839 ret = ocfs2_wait_for_mask(&mw); 1835 ret = ocfs2_wait_for_mask(&mw);
1840 if (ret) 1836 if (ret)
1841 mlog_errno(ret); 1837 mlog_errno(ret);
1842 } 1838 }
1843 1839
1844 static void ocfs2_downconvert_on_unlock(struct ocfs2_super *osb, 1840 static void ocfs2_downconvert_on_unlock(struct ocfs2_super *osb,
1845 struct ocfs2_lock_res *lockres) 1841 struct ocfs2_lock_res *lockres)
1846 { 1842 {
1847 int kick = 0; 1843 int kick = 0;
1848 1844
1849 mlog_entry_void(); 1845 mlog_entry_void();
1850 1846
1851 /* If we know that another node is waiting on our lock, kick 1847 /* If we know that another node is waiting on our lock, kick
1852 * the downconvert thread * pre-emptively when we reach a release 1848 * the downconvert thread * pre-emptively when we reach a release
1853 * condition. */ 1849 * condition. */
1854 if (lockres->l_flags & OCFS2_LOCK_BLOCKED) { 1850 if (lockres->l_flags & OCFS2_LOCK_BLOCKED) {
1855 switch(lockres->l_blocking) { 1851 switch(lockres->l_blocking) {
1856 case DLM_LOCK_EX: 1852 case DLM_LOCK_EX:
1857 if (!lockres->l_ex_holders && !lockres->l_ro_holders) 1853 if (!lockres->l_ex_holders && !lockres->l_ro_holders)
1858 kick = 1; 1854 kick = 1;
1859 break; 1855 break;
1860 case DLM_LOCK_PR: 1856 case DLM_LOCK_PR:
1861 if (!lockres->l_ex_holders) 1857 if (!lockres->l_ex_holders)
1862 kick = 1; 1858 kick = 1;
1863 break; 1859 break;
1864 default: 1860 default:
1865 BUG(); 1861 BUG();
1866 } 1862 }
1867 } 1863 }
1868 1864
1869 if (kick) 1865 if (kick)
1870 ocfs2_wake_downconvert_thread(osb); 1866 ocfs2_wake_downconvert_thread(osb);
1871 1867
1872 mlog_exit_void(); 1868 mlog_exit_void();
1873 } 1869 }
1874 1870
1875 #define OCFS2_SEC_BITS 34 1871 #define OCFS2_SEC_BITS 34
1876 #define OCFS2_SEC_SHIFT (64 - 34) 1872 #define OCFS2_SEC_SHIFT (64 - 34)
1877 #define OCFS2_NSEC_MASK ((1ULL << OCFS2_SEC_SHIFT) - 1) 1873 #define OCFS2_NSEC_MASK ((1ULL << OCFS2_SEC_SHIFT) - 1)
1878 1874
1879 /* LVB only has room for 64 bits of time here so we pack it for 1875 /* LVB only has room for 64 bits of time here so we pack it for
1880 * now. */ 1876 * now. */
1881 static u64 ocfs2_pack_timespec(struct timespec *spec) 1877 static u64 ocfs2_pack_timespec(struct timespec *spec)
1882 { 1878 {
1883 u64 res; 1879 u64 res;
1884 u64 sec = spec->tv_sec; 1880 u64 sec = spec->tv_sec;
1885 u32 nsec = spec->tv_nsec; 1881 u32 nsec = spec->tv_nsec;
1886 1882
1887 res = (sec << OCFS2_SEC_SHIFT) | (nsec & OCFS2_NSEC_MASK); 1883 res = (sec << OCFS2_SEC_SHIFT) | (nsec & OCFS2_NSEC_MASK);
1888 1884
1889 return res; 1885 return res;
1890 } 1886 }
1891 1887
1892 /* Call this with the lockres locked. I am reasonably sure we don't 1888 /* Call this with the lockres locked. I am reasonably sure we don't
1893 * need ip_lock in this function as anyone who would be changing those 1889 * need ip_lock in this function as anyone who would be changing those
1894 * values is supposed to be blocked in ocfs2_inode_lock right now. */ 1890 * values is supposed to be blocked in ocfs2_inode_lock right now. */
1895 static void __ocfs2_stuff_meta_lvb(struct inode *inode) 1891 static void __ocfs2_stuff_meta_lvb(struct inode *inode)
1896 { 1892 {
1897 struct ocfs2_inode_info *oi = OCFS2_I(inode); 1893 struct ocfs2_inode_info *oi = OCFS2_I(inode);
1898 struct ocfs2_lock_res *lockres = &oi->ip_inode_lockres; 1894 struct ocfs2_lock_res *lockres = &oi->ip_inode_lockres;
1899 struct ocfs2_meta_lvb *lvb; 1895 struct ocfs2_meta_lvb *lvb;
1900 1896
1901 mlog_entry_void(); 1897 mlog_entry_void();
1902 1898
1903 lvb = ocfs2_dlm_lvb(&lockres->l_lksb); 1899 lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
1904 1900
1905 /* 1901 /*
1906 * Invalidate the LVB of a deleted inode - this way other 1902 * Invalidate the LVB of a deleted inode - this way other
1907 * nodes are forced to go to disk and discover the new inode 1903 * nodes are forced to go to disk and discover the new inode
1908 * status. 1904 * status.
1909 */ 1905 */
1910 if (oi->ip_flags & OCFS2_INODE_DELETED) { 1906 if (oi->ip_flags & OCFS2_INODE_DELETED) {
1911 lvb->lvb_version = 0; 1907 lvb->lvb_version = 0;
1912 goto out; 1908 goto out;
1913 } 1909 }
1914 1910
1915 lvb->lvb_version = OCFS2_LVB_VERSION; 1911 lvb->lvb_version = OCFS2_LVB_VERSION;
1916 lvb->lvb_isize = cpu_to_be64(i_size_read(inode)); 1912 lvb->lvb_isize = cpu_to_be64(i_size_read(inode));
1917 lvb->lvb_iclusters = cpu_to_be32(oi->ip_clusters); 1913 lvb->lvb_iclusters = cpu_to_be32(oi->ip_clusters);
1918 lvb->lvb_iuid = cpu_to_be32(inode->i_uid); 1914 lvb->lvb_iuid = cpu_to_be32(inode->i_uid);
1919 lvb->lvb_igid = cpu_to_be32(inode->i_gid); 1915 lvb->lvb_igid = cpu_to_be32(inode->i_gid);
1920 lvb->lvb_imode = cpu_to_be16(inode->i_mode); 1916 lvb->lvb_imode = cpu_to_be16(inode->i_mode);
1921 lvb->lvb_inlink = cpu_to_be16(inode->i_nlink); 1917 lvb->lvb_inlink = cpu_to_be16(inode->i_nlink);
1922 lvb->lvb_iatime_packed = 1918 lvb->lvb_iatime_packed =
1923 cpu_to_be64(ocfs2_pack_timespec(&inode->i_atime)); 1919 cpu_to_be64(ocfs2_pack_timespec(&inode->i_atime));
1924 lvb->lvb_ictime_packed = 1920 lvb->lvb_ictime_packed =
1925 cpu_to_be64(ocfs2_pack_timespec(&inode->i_ctime)); 1921 cpu_to_be64(ocfs2_pack_timespec(&inode->i_ctime));
1926 lvb->lvb_imtime_packed = 1922 lvb->lvb_imtime_packed =
1927 cpu_to_be64(ocfs2_pack_timespec(&inode->i_mtime)); 1923 cpu_to_be64(ocfs2_pack_timespec(&inode->i_mtime));
1928 lvb->lvb_iattr = cpu_to_be32(oi->ip_attr); 1924 lvb->lvb_iattr = cpu_to_be32(oi->ip_attr);
1929 lvb->lvb_idynfeatures = cpu_to_be16(oi->ip_dyn_features); 1925 lvb->lvb_idynfeatures = cpu_to_be16(oi->ip_dyn_features);
1930 lvb->lvb_igeneration = cpu_to_be32(inode->i_generation); 1926 lvb->lvb_igeneration = cpu_to_be32(inode->i_generation);
1931 1927
1932 out: 1928 out:
1933 mlog_meta_lvb(0, lockres); 1929 mlog_meta_lvb(0, lockres);
1934 1930
1935 mlog_exit_void(); 1931 mlog_exit_void();
1936 } 1932 }
1937 1933
1938 static void ocfs2_unpack_timespec(struct timespec *spec, 1934 static void ocfs2_unpack_timespec(struct timespec *spec,
1939 u64 packed_time) 1935 u64 packed_time)
1940 { 1936 {
1941 spec->tv_sec = packed_time >> OCFS2_SEC_SHIFT; 1937 spec->tv_sec = packed_time >> OCFS2_SEC_SHIFT;
1942 spec->tv_nsec = packed_time & OCFS2_NSEC_MASK; 1938 spec->tv_nsec = packed_time & OCFS2_NSEC_MASK;
1943 } 1939 }
1944 1940
1945 static void ocfs2_refresh_inode_from_lvb(struct inode *inode) 1941 static void ocfs2_refresh_inode_from_lvb(struct inode *inode)
1946 { 1942 {
1947 struct ocfs2_inode_info *oi = OCFS2_I(inode); 1943 struct ocfs2_inode_info *oi = OCFS2_I(inode);
1948 struct ocfs2_lock_res *lockres = &oi->ip_inode_lockres; 1944 struct ocfs2_lock_res *lockres = &oi->ip_inode_lockres;
1949 struct ocfs2_meta_lvb *lvb; 1945 struct ocfs2_meta_lvb *lvb;
1950 1946
1951 mlog_entry_void(); 1947 mlog_entry_void();
1952 1948
1953 mlog_meta_lvb(0, lockres); 1949 mlog_meta_lvb(0, lockres);
1954 1950
1955 lvb = ocfs2_dlm_lvb(&lockres->l_lksb); 1951 lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
1956 1952
1957 /* We're safe here without the lockres lock... */ 1953 /* We're safe here without the lockres lock... */
1958 spin_lock(&oi->ip_lock); 1954 spin_lock(&oi->ip_lock);
1959 oi->ip_clusters = be32_to_cpu(lvb->lvb_iclusters); 1955 oi->ip_clusters = be32_to_cpu(lvb->lvb_iclusters);
1960 i_size_write(inode, be64_to_cpu(lvb->lvb_isize)); 1956 i_size_write(inode, be64_to_cpu(lvb->lvb_isize));
1961 1957
1962 oi->ip_attr = be32_to_cpu(lvb->lvb_iattr); 1958 oi->ip_attr = be32_to_cpu(lvb->lvb_iattr);
1963 oi->ip_dyn_features = be16_to_cpu(lvb->lvb_idynfeatures); 1959 oi->ip_dyn_features = be16_to_cpu(lvb->lvb_idynfeatures);
1964 ocfs2_set_inode_flags(inode); 1960 ocfs2_set_inode_flags(inode);
1965 1961
1966 /* fast-symlinks are a special case */ 1962 /* fast-symlinks are a special case */
1967 if (S_ISLNK(inode->i_mode) && !oi->ip_clusters) 1963 if (S_ISLNK(inode->i_mode) && !oi->ip_clusters)
1968 inode->i_blocks = 0; 1964 inode->i_blocks = 0;
1969 else 1965 else
1970 inode->i_blocks = ocfs2_inode_sector_count(inode); 1966 inode->i_blocks = ocfs2_inode_sector_count(inode);
1971 1967
1972 inode->i_uid = be32_to_cpu(lvb->lvb_iuid); 1968 inode->i_uid = be32_to_cpu(lvb->lvb_iuid);
1973 inode->i_gid = be32_to_cpu(lvb->lvb_igid); 1969 inode->i_gid = be32_to_cpu(lvb->lvb_igid);
1974 inode->i_mode = be16_to_cpu(lvb->lvb_imode); 1970 inode->i_mode = be16_to_cpu(lvb->lvb_imode);
1975 inode->i_nlink = be16_to_cpu(lvb->lvb_inlink); 1971 inode->i_nlink = be16_to_cpu(lvb->lvb_inlink);
1976 ocfs2_unpack_timespec(&inode->i_atime, 1972 ocfs2_unpack_timespec(&inode->i_atime,
1977 be64_to_cpu(lvb->lvb_iatime_packed)); 1973 be64_to_cpu(lvb->lvb_iatime_packed));
1978 ocfs2_unpack_timespec(&inode->i_mtime, 1974 ocfs2_unpack_timespec(&inode->i_mtime,
1979 be64_to_cpu(lvb->lvb_imtime_packed)); 1975 be64_to_cpu(lvb->lvb_imtime_packed));
1980 ocfs2_unpack_timespec(&inode->i_ctime, 1976 ocfs2_unpack_timespec(&inode->i_ctime,
1981 be64_to_cpu(lvb->lvb_ictime_packed)); 1977 be64_to_cpu(lvb->lvb_ictime_packed));
1982 spin_unlock(&oi->ip_lock); 1978 spin_unlock(&oi->ip_lock);
1983 1979
1984 mlog_exit_void(); 1980 mlog_exit_void();
1985 } 1981 }
1986 1982
1987 static inline int ocfs2_meta_lvb_is_trustable(struct inode *inode, 1983 static inline int ocfs2_meta_lvb_is_trustable(struct inode *inode,
1988 struct ocfs2_lock_res *lockres) 1984 struct ocfs2_lock_res *lockres)
1989 { 1985 {
1990 struct ocfs2_meta_lvb *lvb = ocfs2_dlm_lvb(&lockres->l_lksb); 1986 struct ocfs2_meta_lvb *lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
1991 1987
1992 if (ocfs2_dlm_lvb_valid(&lockres->l_lksb) 1988 if (ocfs2_dlm_lvb_valid(&lockres->l_lksb)
1993 && lvb->lvb_version == OCFS2_LVB_VERSION 1989 && lvb->lvb_version == OCFS2_LVB_VERSION
1994 && be32_to_cpu(lvb->lvb_igeneration) == inode->i_generation) 1990 && be32_to_cpu(lvb->lvb_igeneration) == inode->i_generation)
1995 return 1; 1991 return 1;
1996 return 0; 1992 return 0;
1997 } 1993 }
1998 1994
1999 /* Determine whether a lock resource needs to be refreshed, and 1995 /* Determine whether a lock resource needs to be refreshed, and
2000 * arbitrate who gets to refresh it. 1996 * arbitrate who gets to refresh it.
2001 * 1997 *
2002 * 0 means no refresh needed. 1998 * 0 means no refresh needed.
2003 * 1999 *
2004 * > 0 means you need to refresh this and you MUST call 2000 * > 0 means you need to refresh this and you MUST call
2005 * ocfs2_complete_lock_res_refresh afterwards. */ 2001 * ocfs2_complete_lock_res_refresh afterwards. */
2006 static int ocfs2_should_refresh_lock_res(struct ocfs2_lock_res *lockres) 2002 static int ocfs2_should_refresh_lock_res(struct ocfs2_lock_res *lockres)
2007 { 2003 {
2008 unsigned long flags; 2004 unsigned long flags;
2009 int status = 0; 2005 int status = 0;
2010 2006
2011 mlog_entry_void(); 2007 mlog_entry_void();
2012 2008
2013 refresh_check: 2009 refresh_check:
2014 spin_lock_irqsave(&lockres->l_lock, flags); 2010 spin_lock_irqsave(&lockres->l_lock, flags);
2015 if (!(lockres->l_flags & OCFS2_LOCK_NEEDS_REFRESH)) { 2011 if (!(lockres->l_flags & OCFS2_LOCK_NEEDS_REFRESH)) {
2016 spin_unlock_irqrestore(&lockres->l_lock, flags); 2012 spin_unlock_irqrestore(&lockres->l_lock, flags);
2017 goto bail; 2013 goto bail;
2018 } 2014 }
2019 2015
2020 if (lockres->l_flags & OCFS2_LOCK_REFRESHING) { 2016 if (lockres->l_flags & OCFS2_LOCK_REFRESHING) {
2021 spin_unlock_irqrestore(&lockres->l_lock, flags); 2017 spin_unlock_irqrestore(&lockres->l_lock, flags);
2022 2018
2023 ocfs2_wait_on_refreshing_lock(lockres); 2019 ocfs2_wait_on_refreshing_lock(lockres);
2024 goto refresh_check; 2020 goto refresh_check;
2025 } 2021 }
2026 2022
2027 /* Ok, I'll be the one to refresh this lock. */ 2023 /* Ok, I'll be the one to refresh this lock. */
2028 lockres_or_flags(lockres, OCFS2_LOCK_REFRESHING); 2024 lockres_or_flags(lockres, OCFS2_LOCK_REFRESHING);
2029 spin_unlock_irqrestore(&lockres->l_lock, flags); 2025 spin_unlock_irqrestore(&lockres->l_lock, flags);
2030 2026
2031 status = 1; 2027 status = 1;
2032 bail: 2028 bail:
2033 mlog_exit(status); 2029 mlog_exit(status);
2034 return status; 2030 return status;
2035 } 2031 }
2036 2032
2037 /* If status is non zero, I'll mark it as not being in refresh 2033 /* If status is non zero, I'll mark it as not being in refresh
2038 * anymroe, but i won't clear the needs refresh flag. */ 2034 * anymroe, but i won't clear the needs refresh flag. */
2039 static inline void ocfs2_complete_lock_res_refresh(struct ocfs2_lock_res *lockres, 2035 static inline void ocfs2_complete_lock_res_refresh(struct ocfs2_lock_res *lockres,
2040 int status) 2036 int status)
2041 { 2037 {
2042 unsigned long flags; 2038 unsigned long flags;
2043 mlog_entry_void(); 2039 mlog_entry_void();
2044 2040
2045 spin_lock_irqsave(&lockres->l_lock, flags); 2041 spin_lock_irqsave(&lockres->l_lock, flags);
2046 lockres_clear_flags(lockres, OCFS2_LOCK_REFRESHING); 2042 lockres_clear_flags(lockres, OCFS2_LOCK_REFRESHING);
2047 if (!status) 2043 if (!status)
2048 lockres_clear_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH); 2044 lockres_clear_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH);
2049 spin_unlock_irqrestore(&lockres->l_lock, flags); 2045 spin_unlock_irqrestore(&lockres->l_lock, flags);
2050 2046
2051 wake_up(&lockres->l_event); 2047 wake_up(&lockres->l_event);
2052 2048
2053 mlog_exit_void(); 2049 mlog_exit_void();
2054 } 2050 }
2055 2051
2056 /* may or may not return a bh if it went to disk. */ 2052 /* may or may not return a bh if it went to disk. */
2057 static int ocfs2_inode_lock_update(struct inode *inode, 2053 static int ocfs2_inode_lock_update(struct inode *inode,
2058 struct buffer_head **bh) 2054 struct buffer_head **bh)
2059 { 2055 {
2060 int status = 0; 2056 int status = 0;
2061 struct ocfs2_inode_info *oi = OCFS2_I(inode); 2057 struct ocfs2_inode_info *oi = OCFS2_I(inode);
2062 struct ocfs2_lock_res *lockres = &oi->ip_inode_lockres; 2058 struct ocfs2_lock_res *lockres = &oi->ip_inode_lockres;
2063 struct ocfs2_dinode *fe; 2059 struct ocfs2_dinode *fe;
2064 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 2060 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
2065 2061
2066 mlog_entry_void(); 2062 mlog_entry_void();
2067 2063
2068 if (ocfs2_mount_local(osb)) 2064 if (ocfs2_mount_local(osb))
2069 goto bail; 2065 goto bail;
2070 2066
2071 spin_lock(&oi->ip_lock); 2067 spin_lock(&oi->ip_lock);
2072 if (oi->ip_flags & OCFS2_INODE_DELETED) { 2068 if (oi->ip_flags & OCFS2_INODE_DELETED) {
2073 mlog(0, "Orphaned inode %llu was deleted while we " 2069 mlog(0, "Orphaned inode %llu was deleted while we "
2074 "were waiting on a lock. ip_flags = 0x%x\n", 2070 "were waiting on a lock. ip_flags = 0x%x\n",
2075 (unsigned long long)oi->ip_blkno, oi->ip_flags); 2071 (unsigned long long)oi->ip_blkno, oi->ip_flags);
2076 spin_unlock(&oi->ip_lock); 2072 spin_unlock(&oi->ip_lock);
2077 status = -ENOENT; 2073 status = -ENOENT;
2078 goto bail; 2074 goto bail;
2079 } 2075 }
2080 spin_unlock(&oi->ip_lock); 2076 spin_unlock(&oi->ip_lock);
2081 2077
2082 if (!ocfs2_should_refresh_lock_res(lockres)) 2078 if (!ocfs2_should_refresh_lock_res(lockres))
2083 goto bail; 2079 goto bail;
2084 2080
2085 /* This will discard any caching information we might have had 2081 /* This will discard any caching information we might have had
2086 * for the inode metadata. */ 2082 * for the inode metadata. */
2087 ocfs2_metadata_cache_purge(inode); 2083 ocfs2_metadata_cache_purge(inode);
2088 2084
2089 ocfs2_extent_map_trunc(inode, 0); 2085 ocfs2_extent_map_trunc(inode, 0);
2090 2086
2091 if (ocfs2_meta_lvb_is_trustable(inode, lockres)) { 2087 if (ocfs2_meta_lvb_is_trustable(inode, lockres)) {
2092 mlog(0, "Trusting LVB on inode %llu\n", 2088 mlog(0, "Trusting LVB on inode %llu\n",
2093 (unsigned long long)oi->ip_blkno); 2089 (unsigned long long)oi->ip_blkno);
2094 ocfs2_refresh_inode_from_lvb(inode); 2090 ocfs2_refresh_inode_from_lvb(inode);
2095 } else { 2091 } else {
2096 /* Boo, we have to go to disk. */ 2092 /* Boo, we have to go to disk. */
2097 /* read bh, cast, ocfs2_refresh_inode */ 2093 /* read bh, cast, ocfs2_refresh_inode */
2098 status = ocfs2_read_inode_block(inode, bh); 2094 status = ocfs2_read_inode_block(inode, bh);
2099 if (status < 0) { 2095 if (status < 0) {
2100 mlog_errno(status); 2096 mlog_errno(status);
2101 goto bail_refresh; 2097 goto bail_refresh;
2102 } 2098 }
2103 fe = (struct ocfs2_dinode *) (*bh)->b_data; 2099 fe = (struct ocfs2_dinode *) (*bh)->b_data;
2104 2100
2105 /* This is a good chance to make sure we're not 2101 /* This is a good chance to make sure we're not
2106 * locking an invalid object. ocfs2_read_inode_block() 2102 * locking an invalid object. ocfs2_read_inode_block()
2107 * already checked that the inode block is sane. 2103 * already checked that the inode block is sane.
2108 * 2104 *
2109 * We bug on a stale inode here because we checked 2105 * We bug on a stale inode here because we checked
2110 * above whether it was wiped from disk. The wiping 2106 * above whether it was wiped from disk. The wiping
2111 * node provides a guarantee that we receive that 2107 * node provides a guarantee that we receive that
2112 * message and can mark the inode before dropping any 2108 * message and can mark the inode before dropping any
2113 * locks associated with it. */ 2109 * locks associated with it. */
2114 mlog_bug_on_msg(inode->i_generation != 2110 mlog_bug_on_msg(inode->i_generation !=
2115 le32_to_cpu(fe->i_generation), 2111 le32_to_cpu(fe->i_generation),
2116 "Invalid dinode %llu disk generation: %u " 2112 "Invalid dinode %llu disk generation: %u "
2117 "inode->i_generation: %u\n", 2113 "inode->i_generation: %u\n",
2118 (unsigned long long)oi->ip_blkno, 2114 (unsigned long long)oi->ip_blkno,
2119 le32_to_cpu(fe->i_generation), 2115 le32_to_cpu(fe->i_generation),
2120 inode->i_generation); 2116 inode->i_generation);
2121 mlog_bug_on_msg(le64_to_cpu(fe->i_dtime) || 2117 mlog_bug_on_msg(le64_to_cpu(fe->i_dtime) ||
2122 !(fe->i_flags & cpu_to_le32(OCFS2_VALID_FL)), 2118 !(fe->i_flags & cpu_to_le32(OCFS2_VALID_FL)),
2123 "Stale dinode %llu dtime: %llu flags: 0x%x\n", 2119 "Stale dinode %llu dtime: %llu flags: 0x%x\n",
2124 (unsigned long long)oi->ip_blkno, 2120 (unsigned long long)oi->ip_blkno,
2125 (unsigned long long)le64_to_cpu(fe->i_dtime), 2121 (unsigned long long)le64_to_cpu(fe->i_dtime),
2126 le32_to_cpu(fe->i_flags)); 2122 le32_to_cpu(fe->i_flags));
2127 2123
2128 ocfs2_refresh_inode(inode, fe); 2124 ocfs2_refresh_inode(inode, fe);
2129 ocfs2_track_lock_refresh(lockres); 2125 ocfs2_track_lock_refresh(lockres);
2130 } 2126 }
2131 2127
2132 status = 0; 2128 status = 0;
2133 bail_refresh: 2129 bail_refresh:
2134 ocfs2_complete_lock_res_refresh(lockres, status); 2130 ocfs2_complete_lock_res_refresh(lockres, status);
2135 bail: 2131 bail:
2136 mlog_exit(status); 2132 mlog_exit(status);
2137 return status; 2133 return status;
2138 } 2134 }
2139 2135
2140 static int ocfs2_assign_bh(struct inode *inode, 2136 static int ocfs2_assign_bh(struct inode *inode,
2141 struct buffer_head **ret_bh, 2137 struct buffer_head **ret_bh,
2142 struct buffer_head *passed_bh) 2138 struct buffer_head *passed_bh)
2143 { 2139 {
2144 int status; 2140 int status;
2145 2141
2146 if (passed_bh) { 2142 if (passed_bh) {
2147 /* Ok, the update went to disk for us, use the 2143 /* Ok, the update went to disk for us, use the
2148 * returned bh. */ 2144 * returned bh. */
2149 *ret_bh = passed_bh; 2145 *ret_bh = passed_bh;
2150 get_bh(*ret_bh); 2146 get_bh(*ret_bh);
2151 2147
2152 return 0; 2148 return 0;
2153 } 2149 }
2154 2150
2155 status = ocfs2_read_inode_block(inode, ret_bh); 2151 status = ocfs2_read_inode_block(inode, ret_bh);
2156 if (status < 0) 2152 if (status < 0)
2157 mlog_errno(status); 2153 mlog_errno(status);
2158 2154
2159 return status; 2155 return status;
2160 } 2156 }
2161 2157
2162 /* 2158 /*
2163 * returns < 0 error if the callback will never be called, otherwise 2159 * returns < 0 error if the callback will never be called, otherwise
2164 * the result of the lock will be communicated via the callback. 2160 * the result of the lock will be communicated via the callback.
2165 */ 2161 */
2166 int ocfs2_inode_lock_full(struct inode *inode, 2162 int ocfs2_inode_lock_full(struct inode *inode,
2167 struct buffer_head **ret_bh, 2163 struct buffer_head **ret_bh,
2168 int ex, 2164 int ex,
2169 int arg_flags) 2165 int arg_flags)
2170 { 2166 {
2171 int status, level, acquired; 2167 int status, level, acquired;
2172 u32 dlm_flags; 2168 u32 dlm_flags;
2173 struct ocfs2_lock_res *lockres = NULL; 2169 struct ocfs2_lock_res *lockres = NULL;
2174 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 2170 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
2175 struct buffer_head *local_bh = NULL; 2171 struct buffer_head *local_bh = NULL;
2176 2172
2177 BUG_ON(!inode); 2173 BUG_ON(!inode);
2178 2174
2179 mlog_entry_void(); 2175 mlog_entry_void();
2180 2176
2181 mlog(0, "inode %llu, take %s META lock\n", 2177 mlog(0, "inode %llu, take %s META lock\n",
2182 (unsigned long long)OCFS2_I(inode)->ip_blkno, 2178 (unsigned long long)OCFS2_I(inode)->ip_blkno,
2183 ex ? "EXMODE" : "PRMODE"); 2179 ex ? "EXMODE" : "PRMODE");
2184 2180
2185 status = 0; 2181 status = 0;
2186 acquired = 0; 2182 acquired = 0;
2187 /* We'll allow faking a readonly metadata lock for 2183 /* We'll allow faking a readonly metadata lock for
2188 * rodevices. */ 2184 * rodevices. */
2189 if (ocfs2_is_hard_readonly(osb)) { 2185 if (ocfs2_is_hard_readonly(osb)) {
2190 if (ex) 2186 if (ex)
2191 status = -EROFS; 2187 status = -EROFS;
2192 goto bail; 2188 goto bail;
2193 } 2189 }
2194 2190
2195 if (ocfs2_mount_local(osb)) 2191 if (ocfs2_mount_local(osb))
2196 goto local; 2192 goto local;
2197 2193
2198 if (!(arg_flags & OCFS2_META_LOCK_RECOVERY)) 2194 if (!(arg_flags & OCFS2_META_LOCK_RECOVERY))
2199 ocfs2_wait_for_recovery(osb); 2195 ocfs2_wait_for_recovery(osb);
2200 2196
2201 lockres = &OCFS2_I(inode)->ip_inode_lockres; 2197 lockres = &OCFS2_I(inode)->ip_inode_lockres;
2202 level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; 2198 level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
2203 dlm_flags = 0; 2199 dlm_flags = 0;
2204 if (arg_flags & OCFS2_META_LOCK_NOQUEUE) 2200 if (arg_flags & OCFS2_META_LOCK_NOQUEUE)
2205 dlm_flags |= DLM_LKF_NOQUEUE; 2201 dlm_flags |= DLM_LKF_NOQUEUE;
2206 2202
2207 status = ocfs2_cluster_lock(osb, lockres, level, dlm_flags, arg_flags); 2203 status = ocfs2_cluster_lock(osb, lockres, level, dlm_flags, arg_flags);
2208 if (status < 0) { 2204 if (status < 0) {
2209 if (status != -EAGAIN && status != -EIOCBRETRY) 2205 if (status != -EAGAIN && status != -EIOCBRETRY)
2210 mlog_errno(status); 2206 mlog_errno(status);
2211 goto bail; 2207 goto bail;
2212 } 2208 }
2213 2209
2214 /* Notify the error cleanup path to drop the cluster lock. */ 2210 /* Notify the error cleanup path to drop the cluster lock. */
2215 acquired = 1; 2211 acquired = 1;
2216 2212
2217 /* We wait twice because a node may have died while we were in 2213 /* We wait twice because a node may have died while we were in
2218 * the lower dlm layers. The second time though, we've 2214 * the lower dlm layers. The second time though, we've
2219 * committed to owning this lock so we don't allow signals to 2215 * committed to owning this lock so we don't allow signals to
2220 * abort the operation. */ 2216 * abort the operation. */
2221 if (!(arg_flags & OCFS2_META_LOCK_RECOVERY)) 2217 if (!(arg_flags & OCFS2_META_LOCK_RECOVERY))
2222 ocfs2_wait_for_recovery(osb); 2218 ocfs2_wait_for_recovery(osb);
2223 2219
2224 local: 2220 local:
2225 /* 2221 /*
2226 * We only see this flag if we're being called from 2222 * We only see this flag if we're being called from
2227 * ocfs2_read_locked_inode(). It means we're locking an inode 2223 * ocfs2_read_locked_inode(). It means we're locking an inode
2228 * which hasn't been populated yet, so clear the refresh flag 2224 * which hasn't been populated yet, so clear the refresh flag
2229 * and let the caller handle it. 2225 * and let the caller handle it.
2230 */ 2226 */
2231 if (inode->i_state & I_NEW) { 2227 if (inode->i_state & I_NEW) {
2232 status = 0; 2228 status = 0;
2233 if (lockres) 2229 if (lockres)
2234 ocfs2_complete_lock_res_refresh(lockres, 0); 2230 ocfs2_complete_lock_res_refresh(lockres, 0);
2235 goto bail; 2231 goto bail;
2236 } 2232 }
2237 2233
2238 /* This is fun. The caller may want a bh back, or it may 2234 /* This is fun. The caller may want a bh back, or it may
2239 * not. ocfs2_inode_lock_update definitely wants one in, but 2235 * not. ocfs2_inode_lock_update definitely wants one in, but
2240 * may or may not read one, depending on what's in the 2236 * may or may not read one, depending on what's in the
2241 * LVB. The result of all of this is that we've *only* gone to 2237 * LVB. The result of all of this is that we've *only* gone to
2242 * disk if we have to, so the complexity is worthwhile. */ 2238 * disk if we have to, so the complexity is worthwhile. */
2243 status = ocfs2_inode_lock_update(inode, &local_bh); 2239 status = ocfs2_inode_lock_update(inode, &local_bh);
2244 if (status < 0) { 2240 if (status < 0) {
2245 if (status != -ENOENT) 2241 if (status != -ENOENT)
2246 mlog_errno(status); 2242 mlog_errno(status);
2247 goto bail; 2243 goto bail;
2248 } 2244 }
2249 2245
2250 if (ret_bh) { 2246 if (ret_bh) {
2251 status = ocfs2_assign_bh(inode, ret_bh, local_bh); 2247 status = ocfs2_assign_bh(inode, ret_bh, local_bh);
2252 if (status < 0) { 2248 if (status < 0) {
2253 mlog_errno(status); 2249 mlog_errno(status);
2254 goto bail; 2250 goto bail;
2255 } 2251 }
2256 } 2252 }
2257 2253
2258 bail: 2254 bail:
2259 if (status < 0) { 2255 if (status < 0) {
2260 if (ret_bh && (*ret_bh)) { 2256 if (ret_bh && (*ret_bh)) {
2261 brelse(*ret_bh); 2257 brelse(*ret_bh);
2262 *ret_bh = NULL; 2258 *ret_bh = NULL;
2263 } 2259 }
2264 if (acquired) 2260 if (acquired)
2265 ocfs2_inode_unlock(inode, ex); 2261 ocfs2_inode_unlock(inode, ex);
2266 } 2262 }
2267 2263
2268 if (local_bh) 2264 if (local_bh)
2269 brelse(local_bh); 2265 brelse(local_bh);
2270 2266
2271 mlog_exit(status); 2267 mlog_exit(status);
2272 return status; 2268 return status;
2273 } 2269 }
2274 2270
2275 /* 2271 /*
2276 * This is working around a lock inversion between tasks acquiring DLM 2272 * This is working around a lock inversion between tasks acquiring DLM
2277 * locks while holding a page lock and the downconvert thread which 2273 * locks while holding a page lock and the downconvert thread which
2278 * blocks dlm lock acquiry while acquiring page locks. 2274 * blocks dlm lock acquiry while acquiring page locks.
2279 * 2275 *
2280 * ** These _with_page variantes are only intended to be called from aop 2276 * ** These _with_page variantes are only intended to be called from aop
2281 * methods that hold page locks and return a very specific *positive* error 2277 * methods that hold page locks and return a very specific *positive* error
2282 * code that aop methods pass up to the VFS -- test for errors with != 0. ** 2278 * code that aop methods pass up to the VFS -- test for errors with != 0. **
2283 * 2279 *
2284 * The DLM is called such that it returns -EAGAIN if it would have 2280 * The DLM is called such that it returns -EAGAIN if it would have
2285 * blocked waiting for the downconvert thread. In that case we unlock 2281 * blocked waiting for the downconvert thread. In that case we unlock
2286 * our page so the downconvert thread can make progress. Once we've 2282 * our page so the downconvert thread can make progress. Once we've
2287 * done this we have to return AOP_TRUNCATED_PAGE so the aop method 2283 * done this we have to return AOP_TRUNCATED_PAGE so the aop method
2288 * that called us can bubble that back up into the VFS who will then 2284 * that called us can bubble that back up into the VFS who will then
2289 * immediately retry the aop call. 2285 * immediately retry the aop call.
2290 * 2286 *
2291 * We do a blocking lock and immediate unlock before returning, though, so that 2287 * We do a blocking lock and immediate unlock before returning, though, so that
2292 * the lock has a great chance of being cached on this node by the time the VFS 2288 * the lock has a great chance of being cached on this node by the time the VFS
2293 * calls back to retry the aop. This has a potential to livelock as nodes 2289 * calls back to retry the aop. This has a potential to livelock as nodes
2294 * ping locks back and forth, but that's a risk we're willing to take to avoid 2290 * ping locks back and forth, but that's a risk we're willing to take to avoid
2295 * the lock inversion simply. 2291 * the lock inversion simply.
2296 */ 2292 */
2297 int ocfs2_inode_lock_with_page(struct inode *inode, 2293 int ocfs2_inode_lock_with_page(struct inode *inode,
2298 struct buffer_head **ret_bh, 2294 struct buffer_head **ret_bh,
2299 int ex, 2295 int ex,
2300 struct page *page) 2296 struct page *page)
2301 { 2297 {
2302 int ret; 2298 int ret;
2303 2299
2304 ret = ocfs2_inode_lock_full(inode, ret_bh, ex, OCFS2_LOCK_NONBLOCK); 2300 ret = ocfs2_inode_lock_full(inode, ret_bh, ex, OCFS2_LOCK_NONBLOCK);
2305 if (ret == -EAGAIN) { 2301 if (ret == -EAGAIN) {
2306 unlock_page(page); 2302 unlock_page(page);
2307 if (ocfs2_inode_lock(inode, ret_bh, ex) == 0) 2303 if (ocfs2_inode_lock(inode, ret_bh, ex) == 0)
2308 ocfs2_inode_unlock(inode, ex); 2304 ocfs2_inode_unlock(inode, ex);
2309 ret = AOP_TRUNCATED_PAGE; 2305 ret = AOP_TRUNCATED_PAGE;
2310 } 2306 }
2311 2307
2312 return ret; 2308 return ret;
2313 } 2309 }
2314 2310
2315 int ocfs2_inode_lock_atime(struct inode *inode, 2311 int ocfs2_inode_lock_atime(struct inode *inode,
2316 struct vfsmount *vfsmnt, 2312 struct vfsmount *vfsmnt,
2317 int *level) 2313 int *level)
2318 { 2314 {
2319 int ret; 2315 int ret;
2320 2316
2321 mlog_entry_void(); 2317 mlog_entry_void();
2322 ret = ocfs2_inode_lock(inode, NULL, 0); 2318 ret = ocfs2_inode_lock(inode, NULL, 0);
2323 if (ret < 0) { 2319 if (ret < 0) {
2324 mlog_errno(ret); 2320 mlog_errno(ret);
2325 return ret; 2321 return ret;
2326 } 2322 }
2327 2323
2328 /* 2324 /*
2329 * If we should update atime, we will get EX lock, 2325 * If we should update atime, we will get EX lock,
2330 * otherwise we just get PR lock. 2326 * otherwise we just get PR lock.
2331 */ 2327 */
2332 if (ocfs2_should_update_atime(inode, vfsmnt)) { 2328 if (ocfs2_should_update_atime(inode, vfsmnt)) {
2333 struct buffer_head *bh = NULL; 2329 struct buffer_head *bh = NULL;
2334 2330
2335 ocfs2_inode_unlock(inode, 0); 2331 ocfs2_inode_unlock(inode, 0);
2336 ret = ocfs2_inode_lock(inode, &bh, 1); 2332 ret = ocfs2_inode_lock(inode, &bh, 1);
2337 if (ret < 0) { 2333 if (ret < 0) {
2338 mlog_errno(ret); 2334 mlog_errno(ret);
2339 return ret; 2335 return ret;
2340 } 2336 }
2341 *level = 1; 2337 *level = 1;
2342 if (ocfs2_should_update_atime(inode, vfsmnt)) 2338 if (ocfs2_should_update_atime(inode, vfsmnt))
2343 ocfs2_update_inode_atime(inode, bh); 2339 ocfs2_update_inode_atime(inode, bh);
2344 if (bh) 2340 if (bh)
2345 brelse(bh); 2341 brelse(bh);
2346 } else 2342 } else
2347 *level = 0; 2343 *level = 0;
2348 2344
2349 mlog_exit(ret); 2345 mlog_exit(ret);
2350 return ret; 2346 return ret;
2351 } 2347 }
2352 2348
2353 void ocfs2_inode_unlock(struct inode *inode, 2349 void ocfs2_inode_unlock(struct inode *inode,
2354 int ex) 2350 int ex)
2355 { 2351 {
2356 int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; 2352 int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
2357 struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_inode_lockres; 2353 struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_inode_lockres;
2358 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 2354 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
2359 2355
2360 mlog_entry_void(); 2356 mlog_entry_void();
2361 2357
2362 mlog(0, "inode %llu drop %s META lock\n", 2358 mlog(0, "inode %llu drop %s META lock\n",
2363 (unsigned long long)OCFS2_I(inode)->ip_blkno, 2359 (unsigned long long)OCFS2_I(inode)->ip_blkno,
2364 ex ? "EXMODE" : "PRMODE"); 2360 ex ? "EXMODE" : "PRMODE");
2365 2361
2366 if (!ocfs2_is_hard_readonly(OCFS2_SB(inode->i_sb)) && 2362 if (!ocfs2_is_hard_readonly(OCFS2_SB(inode->i_sb)) &&
2367 !ocfs2_mount_local(osb)) 2363 !ocfs2_mount_local(osb))
2368 ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres, level); 2364 ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres, level);
2369 2365
2370 mlog_exit_void(); 2366 mlog_exit_void();
2371 } 2367 }
2372 2368
2373 int ocfs2_orphan_scan_lock(struct ocfs2_super *osb, u32 *seqno, int ex) 2369 int ocfs2_orphan_scan_lock(struct ocfs2_super *osb, u32 *seqno, int ex)
2374 { 2370 {
2375 struct ocfs2_lock_res *lockres; 2371 struct ocfs2_lock_res *lockres;
2376 struct ocfs2_orphan_scan_lvb *lvb; 2372 struct ocfs2_orphan_scan_lvb *lvb;
2377 int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; 2373 int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
2378 int status = 0; 2374 int status = 0;
2379 2375
2380 lockres = &osb->osb_orphan_scan.os_lockres; 2376 lockres = &osb->osb_orphan_scan.os_lockres;
2381 status = ocfs2_cluster_lock(osb, lockres, level, 0, 0); 2377 status = ocfs2_cluster_lock(osb, lockres, level, 0, 0);
2382 if (status < 0) 2378 if (status < 0)
2383 return status; 2379 return status;
2384 2380
2385 lvb = ocfs2_dlm_lvb(&lockres->l_lksb); 2381 lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
2386 if (ocfs2_dlm_lvb_valid(&lockres->l_lksb) && 2382 if (ocfs2_dlm_lvb_valid(&lockres->l_lksb) &&
2387 lvb->lvb_version == OCFS2_ORPHAN_LVB_VERSION) 2383 lvb->lvb_version == OCFS2_ORPHAN_LVB_VERSION)
2388 *seqno = be32_to_cpu(lvb->lvb_os_seqno); 2384 *seqno = be32_to_cpu(lvb->lvb_os_seqno);
2385 else
2386 *seqno = osb->osb_orphan_scan.os_seqno + 1;
2387
2389 return status; 2388 return status;
2390 } 2389 }
2391 2390
2392 void ocfs2_orphan_scan_unlock(struct ocfs2_super *osb, u32 seqno, int ex) 2391 void ocfs2_orphan_scan_unlock(struct ocfs2_super *osb, u32 seqno, int ex)
2393 { 2392 {
2394 struct ocfs2_lock_res *lockres; 2393 struct ocfs2_lock_res *lockres;
2395 struct ocfs2_orphan_scan_lvb *lvb; 2394 struct ocfs2_orphan_scan_lvb *lvb;
2396 int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; 2395 int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
2397 2396
2398 lockres = &osb->osb_orphan_scan.os_lockres; 2397 lockres = &osb->osb_orphan_scan.os_lockres;
2399 lvb = ocfs2_dlm_lvb(&lockres->l_lksb); 2398 lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
2400 lvb->lvb_version = OCFS2_ORPHAN_LVB_VERSION; 2399 lvb->lvb_version = OCFS2_ORPHAN_LVB_VERSION;
2401 lvb->lvb_os_seqno = cpu_to_be32(seqno); 2400 lvb->lvb_os_seqno = cpu_to_be32(seqno);
2402 ocfs2_cluster_unlock(osb, lockres, level); 2401 ocfs2_cluster_unlock(osb, lockres, level);
2403 } 2402 }
2404 2403
2405 int ocfs2_super_lock(struct ocfs2_super *osb, 2404 int ocfs2_super_lock(struct ocfs2_super *osb,
2406 int ex) 2405 int ex)
2407 { 2406 {
2408 int status = 0; 2407 int status = 0;
2409 int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; 2408 int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
2410 struct ocfs2_lock_res *lockres = &osb->osb_super_lockres; 2409 struct ocfs2_lock_res *lockres = &osb->osb_super_lockres;
2411 2410
2412 mlog_entry_void(); 2411 mlog_entry_void();
2413 2412
2414 if (ocfs2_is_hard_readonly(osb)) 2413 if (ocfs2_is_hard_readonly(osb))
2415 return -EROFS; 2414 return -EROFS;
2416 2415
2417 if (ocfs2_mount_local(osb)) 2416 if (ocfs2_mount_local(osb))
2418 goto bail; 2417 goto bail;
2419 2418
2420 status = ocfs2_cluster_lock(osb, lockres, level, 0, 0); 2419 status = ocfs2_cluster_lock(osb, lockres, level, 0, 0);
2421 if (status < 0) { 2420 if (status < 0) {
2422 mlog_errno(status); 2421 mlog_errno(status);
2423 goto bail; 2422 goto bail;
2424 } 2423 }
2425 2424
2426 /* The super block lock path is really in the best position to 2425 /* The super block lock path is really in the best position to
2427 * know when resources covered by the lock need to be 2426 * know when resources covered by the lock need to be
2428 * refreshed, so we do it here. Of course, making sense of 2427 * refreshed, so we do it here. Of course, making sense of
2429 * everything is up to the caller :) */ 2428 * everything is up to the caller :) */
2430 status = ocfs2_should_refresh_lock_res(lockres); 2429 status = ocfs2_should_refresh_lock_res(lockres);
2431 if (status < 0) { 2430 if (status < 0) {
2432 mlog_errno(status); 2431 mlog_errno(status);
2433 goto bail; 2432 goto bail;
2434 } 2433 }
2435 if (status) { 2434 if (status) {
2436 status = ocfs2_refresh_slot_info(osb); 2435 status = ocfs2_refresh_slot_info(osb);
2437 2436
2438 ocfs2_complete_lock_res_refresh(lockres, status); 2437 ocfs2_complete_lock_res_refresh(lockres, status);
2439 2438
2440 if (status < 0) 2439 if (status < 0)
2441 mlog_errno(status); 2440 mlog_errno(status);
2442 ocfs2_track_lock_refresh(lockres); 2441 ocfs2_track_lock_refresh(lockres);
2443 } 2442 }
2444 bail: 2443 bail:
2445 mlog_exit(status); 2444 mlog_exit(status);
2446 return status; 2445 return status;
2447 } 2446 }
2448 2447
2449 void ocfs2_super_unlock(struct ocfs2_super *osb, 2448 void ocfs2_super_unlock(struct ocfs2_super *osb,
2450 int ex) 2449 int ex)
2451 { 2450 {
2452 int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; 2451 int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
2453 struct ocfs2_lock_res *lockres = &osb->osb_super_lockres; 2452 struct ocfs2_lock_res *lockres = &osb->osb_super_lockres;
2454 2453
2455 if (!ocfs2_mount_local(osb)) 2454 if (!ocfs2_mount_local(osb))
2456 ocfs2_cluster_unlock(osb, lockres, level); 2455 ocfs2_cluster_unlock(osb, lockres, level);
2457 } 2456 }
2458 2457
2459 int ocfs2_rename_lock(struct ocfs2_super *osb) 2458 int ocfs2_rename_lock(struct ocfs2_super *osb)
2460 { 2459 {
2461 int status; 2460 int status;
2462 struct ocfs2_lock_res *lockres = &osb->osb_rename_lockres; 2461 struct ocfs2_lock_res *lockres = &osb->osb_rename_lockres;
2463 2462
2464 if (ocfs2_is_hard_readonly(osb)) 2463 if (ocfs2_is_hard_readonly(osb))
2465 return -EROFS; 2464 return -EROFS;
2466 2465
2467 if (ocfs2_mount_local(osb)) 2466 if (ocfs2_mount_local(osb))
2468 return 0; 2467 return 0;
2469 2468
2470 status = ocfs2_cluster_lock(osb, lockres, DLM_LOCK_EX, 0, 0); 2469 status = ocfs2_cluster_lock(osb, lockres, DLM_LOCK_EX, 0, 0);
2471 if (status < 0) 2470 if (status < 0)
2472 mlog_errno(status); 2471 mlog_errno(status);
2473 2472
2474 return status; 2473 return status;
2475 } 2474 }
2476 2475
2477 void ocfs2_rename_unlock(struct ocfs2_super *osb) 2476 void ocfs2_rename_unlock(struct ocfs2_super *osb)
2478 { 2477 {
2479 struct ocfs2_lock_res *lockres = &osb->osb_rename_lockres; 2478 struct ocfs2_lock_res *lockres = &osb->osb_rename_lockres;
2480 2479
2481 if (!ocfs2_mount_local(osb)) 2480 if (!ocfs2_mount_local(osb))
2482 ocfs2_cluster_unlock(osb, lockres, DLM_LOCK_EX); 2481 ocfs2_cluster_unlock(osb, lockres, DLM_LOCK_EX);
2483 } 2482 }
2484 2483
2485 int ocfs2_nfs_sync_lock(struct ocfs2_super *osb, int ex) 2484 int ocfs2_nfs_sync_lock(struct ocfs2_super *osb, int ex)
2486 { 2485 {
2487 int status; 2486 int status;
2488 struct ocfs2_lock_res *lockres = &osb->osb_nfs_sync_lockres; 2487 struct ocfs2_lock_res *lockres = &osb->osb_nfs_sync_lockres;
2489 2488
2490 if (ocfs2_is_hard_readonly(osb)) 2489 if (ocfs2_is_hard_readonly(osb))
2491 return -EROFS; 2490 return -EROFS;
2492 2491
2493 if (ocfs2_mount_local(osb)) 2492 if (ocfs2_mount_local(osb))
2494 return 0; 2493 return 0;
2495 2494
2496 status = ocfs2_cluster_lock(osb, lockres, ex ? LKM_EXMODE : LKM_PRMODE, 2495 status = ocfs2_cluster_lock(osb, lockres, ex ? LKM_EXMODE : LKM_PRMODE,
2497 0, 0); 2496 0, 0);
2498 if (status < 0) 2497 if (status < 0)
2499 mlog(ML_ERROR, "lock on nfs sync lock failed %d\n", status); 2498 mlog(ML_ERROR, "lock on nfs sync lock failed %d\n", status);
2500 2499
2501 return status; 2500 return status;
2502 } 2501 }
2503 2502
2504 void ocfs2_nfs_sync_unlock(struct ocfs2_super *osb, int ex) 2503 void ocfs2_nfs_sync_unlock(struct ocfs2_super *osb, int ex)
2505 { 2504 {
2506 struct ocfs2_lock_res *lockres = &osb->osb_nfs_sync_lockres; 2505 struct ocfs2_lock_res *lockres = &osb->osb_nfs_sync_lockres;
2507 2506
2508 if (!ocfs2_mount_local(osb)) 2507 if (!ocfs2_mount_local(osb))
2509 ocfs2_cluster_unlock(osb, lockres, 2508 ocfs2_cluster_unlock(osb, lockres,
2510 ex ? LKM_EXMODE : LKM_PRMODE); 2509 ex ? LKM_EXMODE : LKM_PRMODE);
2511 } 2510 }
2512 2511
2513 int ocfs2_dentry_lock(struct dentry *dentry, int ex) 2512 int ocfs2_dentry_lock(struct dentry *dentry, int ex)
2514 { 2513 {
2515 int ret; 2514 int ret;
2516 int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; 2515 int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
2517 struct ocfs2_dentry_lock *dl = dentry->d_fsdata; 2516 struct ocfs2_dentry_lock *dl = dentry->d_fsdata;
2518 struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb); 2517 struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb);
2519 2518
2520 BUG_ON(!dl); 2519 BUG_ON(!dl);
2521 2520
2522 if (ocfs2_is_hard_readonly(osb)) 2521 if (ocfs2_is_hard_readonly(osb))
2523 return -EROFS; 2522 return -EROFS;
2524 2523
2525 if (ocfs2_mount_local(osb)) 2524 if (ocfs2_mount_local(osb))
2526 return 0; 2525 return 0;
2527 2526
2528 ret = ocfs2_cluster_lock(osb, &dl->dl_lockres, level, 0, 0); 2527 ret = ocfs2_cluster_lock(osb, &dl->dl_lockres, level, 0, 0);
2529 if (ret < 0) 2528 if (ret < 0)
2530 mlog_errno(ret); 2529 mlog_errno(ret);
2531 2530
2532 return ret; 2531 return ret;
2533 } 2532 }
2534 2533
2535 void ocfs2_dentry_unlock(struct dentry *dentry, int ex) 2534 void ocfs2_dentry_unlock(struct dentry *dentry, int ex)
2536 { 2535 {
2537 int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; 2536 int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
2538 struct ocfs2_dentry_lock *dl = dentry->d_fsdata; 2537 struct ocfs2_dentry_lock *dl = dentry->d_fsdata;
2539 struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb); 2538 struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb);
2540 2539
2541 if (!ocfs2_mount_local(osb)) 2540 if (!ocfs2_mount_local(osb))
2542 ocfs2_cluster_unlock(osb, &dl->dl_lockres, level); 2541 ocfs2_cluster_unlock(osb, &dl->dl_lockres, level);
2543 } 2542 }
2544 2543
2545 /* Reference counting of the dlm debug structure. We want this because 2544 /* Reference counting of the dlm debug structure. We want this because
2546 * open references on the debug inodes can live on after a mount, so 2545 * open references on the debug inodes can live on after a mount, so
2547 * we can't rely on the ocfs2_super to always exist. */ 2546 * we can't rely on the ocfs2_super to always exist. */
2548 static void ocfs2_dlm_debug_free(struct kref *kref) 2547 static void ocfs2_dlm_debug_free(struct kref *kref)
2549 { 2548 {
2550 struct ocfs2_dlm_debug *dlm_debug; 2549 struct ocfs2_dlm_debug *dlm_debug;
2551 2550
2552 dlm_debug = container_of(kref, struct ocfs2_dlm_debug, d_refcnt); 2551 dlm_debug = container_of(kref, struct ocfs2_dlm_debug, d_refcnt);
2553 2552
2554 kfree(dlm_debug); 2553 kfree(dlm_debug);
2555 } 2554 }
2556 2555
2557 void ocfs2_put_dlm_debug(struct ocfs2_dlm_debug *dlm_debug) 2556 void ocfs2_put_dlm_debug(struct ocfs2_dlm_debug *dlm_debug)
2558 { 2557 {
2559 if (dlm_debug) 2558 if (dlm_debug)
2560 kref_put(&dlm_debug->d_refcnt, ocfs2_dlm_debug_free); 2559 kref_put(&dlm_debug->d_refcnt, ocfs2_dlm_debug_free);
2561 } 2560 }
2562 2561
2563 static void ocfs2_get_dlm_debug(struct ocfs2_dlm_debug *debug) 2562 static void ocfs2_get_dlm_debug(struct ocfs2_dlm_debug *debug)
2564 { 2563 {
2565 kref_get(&debug->d_refcnt); 2564 kref_get(&debug->d_refcnt);
2566 } 2565 }
2567 2566
2568 struct ocfs2_dlm_debug *ocfs2_new_dlm_debug(void) 2567 struct ocfs2_dlm_debug *ocfs2_new_dlm_debug(void)
2569 { 2568 {
2570 struct ocfs2_dlm_debug *dlm_debug; 2569 struct ocfs2_dlm_debug *dlm_debug;
2571 2570
2572 dlm_debug = kmalloc(sizeof(struct ocfs2_dlm_debug), GFP_KERNEL); 2571 dlm_debug = kmalloc(sizeof(struct ocfs2_dlm_debug), GFP_KERNEL);
2573 if (!dlm_debug) { 2572 if (!dlm_debug) {
2574 mlog_errno(-ENOMEM); 2573 mlog_errno(-ENOMEM);
2575 goto out; 2574 goto out;
2576 } 2575 }
2577 2576
2578 kref_init(&dlm_debug->d_refcnt); 2577 kref_init(&dlm_debug->d_refcnt);
2579 INIT_LIST_HEAD(&dlm_debug->d_lockres_tracking); 2578 INIT_LIST_HEAD(&dlm_debug->d_lockres_tracking);
2580 dlm_debug->d_locking_state = NULL; 2579 dlm_debug->d_locking_state = NULL;
2581 out: 2580 out:
2582 return dlm_debug; 2581 return dlm_debug;
2583 } 2582 }
2584 2583
2585 /* Access to this is arbitrated for us via seq_file->sem. */ 2584 /* Access to this is arbitrated for us via seq_file->sem. */
2586 struct ocfs2_dlm_seq_priv { 2585 struct ocfs2_dlm_seq_priv {
2587 struct ocfs2_dlm_debug *p_dlm_debug; 2586 struct ocfs2_dlm_debug *p_dlm_debug;
2588 struct ocfs2_lock_res p_iter_res; 2587 struct ocfs2_lock_res p_iter_res;
2589 struct ocfs2_lock_res p_tmp_res; 2588 struct ocfs2_lock_res p_tmp_res;
2590 }; 2589 };
2591 2590
2592 static struct ocfs2_lock_res *ocfs2_dlm_next_res(struct ocfs2_lock_res *start, 2591 static struct ocfs2_lock_res *ocfs2_dlm_next_res(struct ocfs2_lock_res *start,
2593 struct ocfs2_dlm_seq_priv *priv) 2592 struct ocfs2_dlm_seq_priv *priv)
2594 { 2593 {
2595 struct ocfs2_lock_res *iter, *ret = NULL; 2594 struct ocfs2_lock_res *iter, *ret = NULL;
2596 struct ocfs2_dlm_debug *dlm_debug = priv->p_dlm_debug; 2595 struct ocfs2_dlm_debug *dlm_debug = priv->p_dlm_debug;
2597 2596
2598 assert_spin_locked(&ocfs2_dlm_tracking_lock); 2597 assert_spin_locked(&ocfs2_dlm_tracking_lock);
2599 2598
2600 list_for_each_entry(iter, &start->l_debug_list, l_debug_list) { 2599 list_for_each_entry(iter, &start->l_debug_list, l_debug_list) {
2601 /* discover the head of the list */ 2600 /* discover the head of the list */
2602 if (&iter->l_debug_list == &dlm_debug->d_lockres_tracking) { 2601 if (&iter->l_debug_list == &dlm_debug->d_lockres_tracking) {
2603 mlog(0, "End of list found, %p\n", ret); 2602 mlog(0, "End of list found, %p\n", ret);
2604 break; 2603 break;
2605 } 2604 }
2606 2605
2607 /* We track our "dummy" iteration lockres' by a NULL 2606 /* We track our "dummy" iteration lockres' by a NULL
2608 * l_ops field. */ 2607 * l_ops field. */
2609 if (iter->l_ops != NULL) { 2608 if (iter->l_ops != NULL) {
2610 ret = iter; 2609 ret = iter;
2611 break; 2610 break;
2612 } 2611 }
2613 } 2612 }
2614 2613
2615 return ret; 2614 return ret;
2616 } 2615 }
2617 2616
2618 static void *ocfs2_dlm_seq_start(struct seq_file *m, loff_t *pos) 2617 static void *ocfs2_dlm_seq_start(struct seq_file *m, loff_t *pos)
2619 { 2618 {
2620 struct ocfs2_dlm_seq_priv *priv = m->private; 2619 struct ocfs2_dlm_seq_priv *priv = m->private;
2621 struct ocfs2_lock_res *iter; 2620 struct ocfs2_lock_res *iter;
2622 2621
2623 spin_lock(&ocfs2_dlm_tracking_lock); 2622 spin_lock(&ocfs2_dlm_tracking_lock);
2624 iter = ocfs2_dlm_next_res(&priv->p_iter_res, priv); 2623 iter = ocfs2_dlm_next_res(&priv->p_iter_res, priv);
2625 if (iter) { 2624 if (iter) {
2626 /* Since lockres' have the lifetime of their container 2625 /* Since lockres' have the lifetime of their container
2627 * (which can be inodes, ocfs2_supers, etc) we want to 2626 * (which can be inodes, ocfs2_supers, etc) we want to
2628 * copy this out to a temporary lockres while still 2627 * copy this out to a temporary lockres while still
2629 * under the spinlock. Obviously after this we can't 2628 * under the spinlock. Obviously after this we can't
2630 * trust any pointers on the copy returned, but that's 2629 * trust any pointers on the copy returned, but that's
2631 * ok as the information we want isn't typically held 2630 * ok as the information we want isn't typically held
2632 * in them. */ 2631 * in them. */
2633 priv->p_tmp_res = *iter; 2632 priv->p_tmp_res = *iter;
2634 iter = &priv->p_tmp_res; 2633 iter = &priv->p_tmp_res;
2635 } 2634 }
2636 spin_unlock(&ocfs2_dlm_tracking_lock); 2635 spin_unlock(&ocfs2_dlm_tracking_lock);
2637 2636
2638 return iter; 2637 return iter;
2639 } 2638 }
2640 2639
2641 static void ocfs2_dlm_seq_stop(struct seq_file *m, void *v) 2640 static void ocfs2_dlm_seq_stop(struct seq_file *m, void *v)
2642 { 2641 {
2643 } 2642 }
2644 2643
2645 static void *ocfs2_dlm_seq_next(struct seq_file *m, void *v, loff_t *pos) 2644 static void *ocfs2_dlm_seq_next(struct seq_file *m, void *v, loff_t *pos)
2646 { 2645 {
2647 struct ocfs2_dlm_seq_priv *priv = m->private; 2646 struct ocfs2_dlm_seq_priv *priv = m->private;
2648 struct ocfs2_lock_res *iter = v; 2647 struct ocfs2_lock_res *iter = v;
2649 struct ocfs2_lock_res *dummy = &priv->p_iter_res; 2648 struct ocfs2_lock_res *dummy = &priv->p_iter_res;
2650 2649
2651 spin_lock(&ocfs2_dlm_tracking_lock); 2650 spin_lock(&ocfs2_dlm_tracking_lock);
2652 iter = ocfs2_dlm_next_res(iter, priv); 2651 iter = ocfs2_dlm_next_res(iter, priv);
2653 list_del_init(&dummy->l_debug_list); 2652 list_del_init(&dummy->l_debug_list);
2654 if (iter) { 2653 if (iter) {
2655 list_add(&dummy->l_debug_list, &iter->l_debug_list); 2654 list_add(&dummy->l_debug_list, &iter->l_debug_list);
2656 priv->p_tmp_res = *iter; 2655 priv->p_tmp_res = *iter;
2657 iter = &priv->p_tmp_res; 2656 iter = &priv->p_tmp_res;
2658 } 2657 }
2659 spin_unlock(&ocfs2_dlm_tracking_lock); 2658 spin_unlock(&ocfs2_dlm_tracking_lock);
2660 2659
2661 return iter; 2660 return iter;
2662 } 2661 }
2663 2662
2664 /* So that debugfs.ocfs2 can determine which format is being used */ 2663 /* So that debugfs.ocfs2 can determine which format is being used */
2665 #define OCFS2_DLM_DEBUG_STR_VERSION 2 2664 #define OCFS2_DLM_DEBUG_STR_VERSION 2
2666 static int ocfs2_dlm_seq_show(struct seq_file *m, void *v) 2665 static int ocfs2_dlm_seq_show(struct seq_file *m, void *v)
2667 { 2666 {
2668 int i; 2667 int i;
2669 char *lvb; 2668 char *lvb;
2670 struct ocfs2_lock_res *lockres = v; 2669 struct ocfs2_lock_res *lockres = v;
2671 2670
2672 if (!lockres) 2671 if (!lockres)
2673 return -EINVAL; 2672 return -EINVAL;
2674 2673
2675 seq_printf(m, "0x%x\t", OCFS2_DLM_DEBUG_STR_VERSION); 2674 seq_printf(m, "0x%x\t", OCFS2_DLM_DEBUG_STR_VERSION);
2676 2675
2677 if (lockres->l_type == OCFS2_LOCK_TYPE_DENTRY) 2676 if (lockres->l_type == OCFS2_LOCK_TYPE_DENTRY)
2678 seq_printf(m, "%.*s%08x\t", OCFS2_DENTRY_LOCK_INO_START - 1, 2677 seq_printf(m, "%.*s%08x\t", OCFS2_DENTRY_LOCK_INO_START - 1,
2679 lockres->l_name, 2678 lockres->l_name,
2680 (unsigned int)ocfs2_get_dentry_lock_ino(lockres)); 2679 (unsigned int)ocfs2_get_dentry_lock_ino(lockres));
2681 else 2680 else
2682 seq_printf(m, "%.*s\t", OCFS2_LOCK_ID_MAX_LEN, lockres->l_name); 2681 seq_printf(m, "%.*s\t", OCFS2_LOCK_ID_MAX_LEN, lockres->l_name);
2683 2682
2684 seq_printf(m, "%d\t" 2683 seq_printf(m, "%d\t"
2685 "0x%lx\t" 2684 "0x%lx\t"
2686 "0x%x\t" 2685 "0x%x\t"
2687 "0x%x\t" 2686 "0x%x\t"
2688 "%u\t" 2687 "%u\t"
2689 "%u\t" 2688 "%u\t"
2690 "%d\t" 2689 "%d\t"
2691 "%d\t", 2690 "%d\t",
2692 lockres->l_level, 2691 lockres->l_level,
2693 lockres->l_flags, 2692 lockres->l_flags,
2694 lockres->l_action, 2693 lockres->l_action,
2695 lockres->l_unlock_action, 2694 lockres->l_unlock_action,
2696 lockres->l_ro_holders, 2695 lockres->l_ro_holders,
2697 lockres->l_ex_holders, 2696 lockres->l_ex_holders,
2698 lockres->l_requested, 2697 lockres->l_requested,
2699 lockres->l_blocking); 2698 lockres->l_blocking);
2700 2699
2701 /* Dump the raw LVB */ 2700 /* Dump the raw LVB */
2702 lvb = ocfs2_dlm_lvb(&lockres->l_lksb); 2701 lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
2703 for(i = 0; i < DLM_LVB_LEN; i++) 2702 for(i = 0; i < DLM_LVB_LEN; i++)
2704 seq_printf(m, "0x%x\t", lvb[i]); 2703 seq_printf(m, "0x%x\t", lvb[i]);
2705 2704
2706 #ifdef CONFIG_OCFS2_FS_STATS 2705 #ifdef CONFIG_OCFS2_FS_STATS
2707 # define lock_num_prmode(_l) (_l)->l_lock_num_prmode 2706 # define lock_num_prmode(_l) (_l)->l_lock_num_prmode
2708 # define lock_num_exmode(_l) (_l)->l_lock_num_exmode 2707 # define lock_num_exmode(_l) (_l)->l_lock_num_exmode
2709 # define lock_num_prmode_failed(_l) (_l)->l_lock_num_prmode_failed 2708 # define lock_num_prmode_failed(_l) (_l)->l_lock_num_prmode_failed
2710 # define lock_num_exmode_failed(_l) (_l)->l_lock_num_exmode_failed 2709 # define lock_num_exmode_failed(_l) (_l)->l_lock_num_exmode_failed
2711 # define lock_total_prmode(_l) (_l)->l_lock_total_prmode 2710 # define lock_total_prmode(_l) (_l)->l_lock_total_prmode
2712 # define lock_total_exmode(_l) (_l)->l_lock_total_exmode 2711 # define lock_total_exmode(_l) (_l)->l_lock_total_exmode
2713 # define lock_max_prmode(_l) (_l)->l_lock_max_prmode 2712 # define lock_max_prmode(_l) (_l)->l_lock_max_prmode
2714 # define lock_max_exmode(_l) (_l)->l_lock_max_exmode 2713 # define lock_max_exmode(_l) (_l)->l_lock_max_exmode
2715 # define lock_refresh(_l) (_l)->l_lock_refresh 2714 # define lock_refresh(_l) (_l)->l_lock_refresh
2716 #else 2715 #else
2717 # define lock_num_prmode(_l) (0ULL) 2716 # define lock_num_prmode(_l) (0ULL)
2718 # define lock_num_exmode(_l) (0ULL) 2717 # define lock_num_exmode(_l) (0ULL)
2719 # define lock_num_prmode_failed(_l) (0) 2718 # define lock_num_prmode_failed(_l) (0)
2720 # define lock_num_exmode_failed(_l) (0) 2719 # define lock_num_exmode_failed(_l) (0)
2721 # define lock_total_prmode(_l) (0ULL) 2720 # define lock_total_prmode(_l) (0ULL)
2722 # define lock_total_exmode(_l) (0ULL) 2721 # define lock_total_exmode(_l) (0ULL)
2723 # define lock_max_prmode(_l) (0) 2722 # define lock_max_prmode(_l) (0)
2724 # define lock_max_exmode(_l) (0) 2723 # define lock_max_exmode(_l) (0)
2725 # define lock_refresh(_l) (0) 2724 # define lock_refresh(_l) (0)
2726 #endif 2725 #endif
2727 /* The following seq_print was added in version 2 of this output */ 2726 /* The following seq_print was added in version 2 of this output */
2728 seq_printf(m, "%llu\t" 2727 seq_printf(m, "%llu\t"
2729 "%llu\t" 2728 "%llu\t"
2730 "%u\t" 2729 "%u\t"
2731 "%u\t" 2730 "%u\t"
2732 "%llu\t" 2731 "%llu\t"
2733 "%llu\t" 2732 "%llu\t"
2734 "%u\t" 2733 "%u\t"
2735 "%u\t" 2734 "%u\t"
2736 "%u\t", 2735 "%u\t",
2737 lock_num_prmode(lockres), 2736 lock_num_prmode(lockres),
2738 lock_num_exmode(lockres), 2737 lock_num_exmode(lockres),
2739 lock_num_prmode_failed(lockres), 2738 lock_num_prmode_failed(lockres),
2740 lock_num_exmode_failed(lockres), 2739 lock_num_exmode_failed(lockres),
2741 lock_total_prmode(lockres), 2740 lock_total_prmode(lockres),
2742 lock_total_exmode(lockres), 2741 lock_total_exmode(lockres),
2743 lock_max_prmode(lockres), 2742 lock_max_prmode(lockres),
2744 lock_max_exmode(lockres), 2743 lock_max_exmode(lockres),
2745 lock_refresh(lockres)); 2744 lock_refresh(lockres));
2746 2745
2747 /* End the line */ 2746 /* End the line */
2748 seq_printf(m, "\n"); 2747 seq_printf(m, "\n");
2749 return 0; 2748 return 0;
2750 } 2749 }
2751 2750
2752 static const struct seq_operations ocfs2_dlm_seq_ops = { 2751 static const struct seq_operations ocfs2_dlm_seq_ops = {
2753 .start = ocfs2_dlm_seq_start, 2752 .start = ocfs2_dlm_seq_start,
2754 .stop = ocfs2_dlm_seq_stop, 2753 .stop = ocfs2_dlm_seq_stop,
2755 .next = ocfs2_dlm_seq_next, 2754 .next = ocfs2_dlm_seq_next,
2756 .show = ocfs2_dlm_seq_show, 2755 .show = ocfs2_dlm_seq_show,
2757 }; 2756 };
2758 2757
2759 static int ocfs2_dlm_debug_release(struct inode *inode, struct file *file) 2758 static int ocfs2_dlm_debug_release(struct inode *inode, struct file *file)
2760 { 2759 {
2761 struct seq_file *seq = (struct seq_file *) file->private_data; 2760 struct seq_file *seq = (struct seq_file *) file->private_data;
2762 struct ocfs2_dlm_seq_priv *priv = seq->private; 2761 struct ocfs2_dlm_seq_priv *priv = seq->private;
2763 struct ocfs2_lock_res *res = &priv->p_iter_res; 2762 struct ocfs2_lock_res *res = &priv->p_iter_res;
2764 2763
2765 ocfs2_remove_lockres_tracking(res); 2764 ocfs2_remove_lockres_tracking(res);
2766 ocfs2_put_dlm_debug(priv->p_dlm_debug); 2765 ocfs2_put_dlm_debug(priv->p_dlm_debug);
2767 return seq_release_private(inode, file); 2766 return seq_release_private(inode, file);
2768 } 2767 }
2769 2768
2770 static int ocfs2_dlm_debug_open(struct inode *inode, struct file *file) 2769 static int ocfs2_dlm_debug_open(struct inode *inode, struct file *file)
2771 { 2770 {
2772 int ret; 2771 int ret;
2773 struct ocfs2_dlm_seq_priv *priv; 2772 struct ocfs2_dlm_seq_priv *priv;
2774 struct seq_file *seq; 2773 struct seq_file *seq;
2775 struct ocfs2_super *osb; 2774 struct ocfs2_super *osb;
2776 2775
2777 priv = kzalloc(sizeof(struct ocfs2_dlm_seq_priv), GFP_KERNEL); 2776 priv = kzalloc(sizeof(struct ocfs2_dlm_seq_priv), GFP_KERNEL);
2778 if (!priv) { 2777 if (!priv) {
2779 ret = -ENOMEM; 2778 ret = -ENOMEM;
2780 mlog_errno(ret); 2779 mlog_errno(ret);
2781 goto out; 2780 goto out;
2782 } 2781 }
2783 osb = inode->i_private; 2782 osb = inode->i_private;
2784 ocfs2_get_dlm_debug(osb->osb_dlm_debug); 2783 ocfs2_get_dlm_debug(osb->osb_dlm_debug);
2785 priv->p_dlm_debug = osb->osb_dlm_debug; 2784 priv->p_dlm_debug = osb->osb_dlm_debug;
2786 INIT_LIST_HEAD(&priv->p_iter_res.l_debug_list); 2785 INIT_LIST_HEAD(&priv->p_iter_res.l_debug_list);
2787 2786
2788 ret = seq_open(file, &ocfs2_dlm_seq_ops); 2787 ret = seq_open(file, &ocfs2_dlm_seq_ops);
2789 if (ret) { 2788 if (ret) {
2790 kfree(priv); 2789 kfree(priv);
2791 mlog_errno(ret); 2790 mlog_errno(ret);
2792 goto out; 2791 goto out;
2793 } 2792 }
2794 2793
2795 seq = (struct seq_file *) file->private_data; 2794 seq = (struct seq_file *) file->private_data;
2796 seq->private = priv; 2795 seq->private = priv;
2797 2796
2798 ocfs2_add_lockres_tracking(&priv->p_iter_res, 2797 ocfs2_add_lockres_tracking(&priv->p_iter_res,
2799 priv->p_dlm_debug); 2798 priv->p_dlm_debug);
2800 2799
2801 out: 2800 out:
2802 return ret; 2801 return ret;
2803 } 2802 }
2804 2803
2805 static const struct file_operations ocfs2_dlm_debug_fops = { 2804 static const struct file_operations ocfs2_dlm_debug_fops = {
2806 .open = ocfs2_dlm_debug_open, 2805 .open = ocfs2_dlm_debug_open,
2807 .release = ocfs2_dlm_debug_release, 2806 .release = ocfs2_dlm_debug_release,
2808 .read = seq_read, 2807 .read = seq_read,
2809 .llseek = seq_lseek, 2808 .llseek = seq_lseek,
2810 }; 2809 };
2811 2810
2812 static int ocfs2_dlm_init_debug(struct ocfs2_super *osb) 2811 static int ocfs2_dlm_init_debug(struct ocfs2_super *osb)
2813 { 2812 {
2814 int ret = 0; 2813 int ret = 0;
2815 struct ocfs2_dlm_debug *dlm_debug = osb->osb_dlm_debug; 2814 struct ocfs2_dlm_debug *dlm_debug = osb->osb_dlm_debug;
2816 2815
2817 dlm_debug->d_locking_state = debugfs_create_file("locking_state", 2816 dlm_debug->d_locking_state = debugfs_create_file("locking_state",
2818 S_IFREG|S_IRUSR, 2817 S_IFREG|S_IRUSR,
2819 osb->osb_debug_root, 2818 osb->osb_debug_root,
2820 osb, 2819 osb,
2821 &ocfs2_dlm_debug_fops); 2820 &ocfs2_dlm_debug_fops);
2822 if (!dlm_debug->d_locking_state) { 2821 if (!dlm_debug->d_locking_state) {
2823 ret = -EINVAL; 2822 ret = -EINVAL;
2824 mlog(ML_ERROR, 2823 mlog(ML_ERROR,
2825 "Unable to create locking state debugfs file.\n"); 2824 "Unable to create locking state debugfs file.\n");
2826 goto out; 2825 goto out;
2827 } 2826 }
2828 2827
2829 ocfs2_get_dlm_debug(dlm_debug); 2828 ocfs2_get_dlm_debug(dlm_debug);
2830 out: 2829 out:
2831 return ret; 2830 return ret;
2832 } 2831 }
2833 2832
2834 static void ocfs2_dlm_shutdown_debug(struct ocfs2_super *osb) 2833 static void ocfs2_dlm_shutdown_debug(struct ocfs2_super *osb)
2835 { 2834 {
2836 struct ocfs2_dlm_debug *dlm_debug = osb->osb_dlm_debug; 2835 struct ocfs2_dlm_debug *dlm_debug = osb->osb_dlm_debug;
2837 2836
2838 if (dlm_debug) { 2837 if (dlm_debug) {
2839 debugfs_remove(dlm_debug->d_locking_state); 2838 debugfs_remove(dlm_debug->d_locking_state);
2840 ocfs2_put_dlm_debug(dlm_debug); 2839 ocfs2_put_dlm_debug(dlm_debug);
2841 } 2840 }
2842 } 2841 }
2843 2842
2844 int ocfs2_dlm_init(struct ocfs2_super *osb) 2843 int ocfs2_dlm_init(struct ocfs2_super *osb)
2845 { 2844 {
2846 int status = 0; 2845 int status = 0;
2847 struct ocfs2_cluster_connection *conn = NULL; 2846 struct ocfs2_cluster_connection *conn = NULL;
2848 2847
2849 mlog_entry_void(); 2848 mlog_entry_void();
2850 2849
2851 if (ocfs2_mount_local(osb)) { 2850 if (ocfs2_mount_local(osb)) {
2852 osb->node_num = 0; 2851 osb->node_num = 0;
2853 goto local; 2852 goto local;
2854 } 2853 }
2855 2854
2856 status = ocfs2_dlm_init_debug(osb); 2855 status = ocfs2_dlm_init_debug(osb);
2857 if (status < 0) { 2856 if (status < 0) {
2858 mlog_errno(status); 2857 mlog_errno(status);
2859 goto bail; 2858 goto bail;
2860 } 2859 }
2861 2860
2862 /* launch downconvert thread */ 2861 /* launch downconvert thread */
2863 osb->dc_task = kthread_run(ocfs2_downconvert_thread, osb, "ocfs2dc"); 2862 osb->dc_task = kthread_run(ocfs2_downconvert_thread, osb, "ocfs2dc");
2864 if (IS_ERR(osb->dc_task)) { 2863 if (IS_ERR(osb->dc_task)) {
2865 status = PTR_ERR(osb->dc_task); 2864 status = PTR_ERR(osb->dc_task);
2866 osb->dc_task = NULL; 2865 osb->dc_task = NULL;
2867 mlog_errno(status); 2866 mlog_errno(status);
2868 goto bail; 2867 goto bail;
2869 } 2868 }
2870 2869
2871 /* for now, uuid == domain */ 2870 /* for now, uuid == domain */
2872 status = ocfs2_cluster_connect(osb->osb_cluster_stack, 2871 status = ocfs2_cluster_connect(osb->osb_cluster_stack,
2873 osb->uuid_str, 2872 osb->uuid_str,
2874 strlen(osb->uuid_str), 2873 strlen(osb->uuid_str),
2875 ocfs2_do_node_down, osb, 2874 ocfs2_do_node_down, osb,
2876 &conn); 2875 &conn);
2877 if (status) { 2876 if (status) {
2878 mlog_errno(status); 2877 mlog_errno(status);
2879 goto bail; 2878 goto bail;
2880 } 2879 }
2881 2880
2882 status = ocfs2_cluster_this_node(&osb->node_num); 2881 status = ocfs2_cluster_this_node(&osb->node_num);
2883 if (status < 0) { 2882 if (status < 0) {
2884 mlog_errno(status); 2883 mlog_errno(status);
2885 mlog(ML_ERROR, 2884 mlog(ML_ERROR,
2886 "could not find this host's node number\n"); 2885 "could not find this host's node number\n");
2887 ocfs2_cluster_disconnect(conn, 0); 2886 ocfs2_cluster_disconnect(conn, 0);
2888 goto bail; 2887 goto bail;
2889 } 2888 }
2890 2889
2891 local: 2890 local:
2892 ocfs2_super_lock_res_init(&osb->osb_super_lockres, osb); 2891 ocfs2_super_lock_res_init(&osb->osb_super_lockres, osb);
2893 ocfs2_rename_lock_res_init(&osb->osb_rename_lockres, osb); 2892 ocfs2_rename_lock_res_init(&osb->osb_rename_lockres, osb);
2894 ocfs2_nfs_sync_lock_res_init(&osb->osb_nfs_sync_lockres, osb); 2893 ocfs2_nfs_sync_lock_res_init(&osb->osb_nfs_sync_lockres, osb);
2895 ocfs2_orphan_scan_lock_res_init(&osb->osb_orphan_scan.os_lockres, osb); 2894 ocfs2_orphan_scan_lock_res_init(&osb->osb_orphan_scan.os_lockres, osb);
2896 2895
2897 osb->cconn = conn; 2896 osb->cconn = conn;
2898 2897
2899 status = 0; 2898 status = 0;
2900 bail: 2899 bail:
2901 if (status < 0) { 2900 if (status < 0) {
2902 ocfs2_dlm_shutdown_debug(osb); 2901 ocfs2_dlm_shutdown_debug(osb);
2903 if (osb->dc_task) 2902 if (osb->dc_task)
2904 kthread_stop(osb->dc_task); 2903 kthread_stop(osb->dc_task);
2905 } 2904 }
2906 2905
2907 mlog_exit(status); 2906 mlog_exit(status);
2908 return status; 2907 return status;
2909 } 2908 }
2910 2909
2911 void ocfs2_dlm_shutdown(struct ocfs2_super *osb, 2910 void ocfs2_dlm_shutdown(struct ocfs2_super *osb,
2912 int hangup_pending) 2911 int hangup_pending)
2913 { 2912 {
2914 mlog_entry_void(); 2913 mlog_entry_void();
2915 2914
2916 ocfs2_drop_osb_locks(osb); 2915 ocfs2_drop_osb_locks(osb);
2917 2916
2918 /* 2917 /*
2919 * Now that we have dropped all locks and ocfs2_dismount_volume() 2918 * Now that we have dropped all locks and ocfs2_dismount_volume()
2920 * has disabled recovery, the DLM won't be talking to us. It's 2919 * has disabled recovery, the DLM won't be talking to us. It's
2921 * safe to tear things down before disconnecting the cluster. 2920 * safe to tear things down before disconnecting the cluster.
2922 */ 2921 */
2923 2922
2924 if (osb->dc_task) { 2923 if (osb->dc_task) {
2925 kthread_stop(osb->dc_task); 2924 kthread_stop(osb->dc_task);
2926 osb->dc_task = NULL; 2925 osb->dc_task = NULL;
2927 } 2926 }
2928 2927
2929 ocfs2_lock_res_free(&osb->osb_super_lockres); 2928 ocfs2_lock_res_free(&osb->osb_super_lockres);
2930 ocfs2_lock_res_free(&osb->osb_rename_lockres); 2929 ocfs2_lock_res_free(&osb->osb_rename_lockres);
2931 ocfs2_lock_res_free(&osb->osb_nfs_sync_lockres); 2930 ocfs2_lock_res_free(&osb->osb_nfs_sync_lockres);
2932 ocfs2_lock_res_free(&osb->osb_orphan_scan.os_lockres); 2931 ocfs2_lock_res_free(&osb->osb_orphan_scan.os_lockres);
2933 2932
2934 ocfs2_cluster_disconnect(osb->cconn, hangup_pending); 2933 ocfs2_cluster_disconnect(osb->cconn, hangup_pending);
2935 osb->cconn = NULL; 2934 osb->cconn = NULL;
2936 2935
2937 ocfs2_dlm_shutdown_debug(osb); 2936 ocfs2_dlm_shutdown_debug(osb);
2938 2937
2939 mlog_exit_void(); 2938 mlog_exit_void();
2940 } 2939 }
2941 2940
2942 static void ocfs2_unlock_ast(void *opaque, int error) 2941 static void ocfs2_unlock_ast(void *opaque, int error)
2943 { 2942 {
2944 struct ocfs2_lock_res *lockres = opaque; 2943 struct ocfs2_lock_res *lockres = opaque;
2945 unsigned long flags; 2944 unsigned long flags;
2946 2945
2947 mlog_entry_void(); 2946 mlog_entry_void();
2948 2947
2949 mlog(0, "UNLOCK AST called on lock %s, action = %d\n", lockres->l_name, 2948 mlog(0, "UNLOCK AST called on lock %s, action = %d\n", lockres->l_name,
2950 lockres->l_unlock_action); 2949 lockres->l_unlock_action);
2951 2950
2952 spin_lock_irqsave(&lockres->l_lock, flags); 2951 spin_lock_irqsave(&lockres->l_lock, flags);
2953 if (error) { 2952 if (error) {
2954 mlog(ML_ERROR, "Dlm passes error %d for lock %s, " 2953 mlog(ML_ERROR, "Dlm passes error %d for lock %s, "
2955 "unlock_action %d\n", error, lockres->l_name, 2954 "unlock_action %d\n", error, lockres->l_name,
2956 lockres->l_unlock_action); 2955 lockres->l_unlock_action);
2957 spin_unlock_irqrestore(&lockres->l_lock, flags); 2956 spin_unlock_irqrestore(&lockres->l_lock, flags);
2958 return; 2957 return;
2959 } 2958 }
2960 2959
2961 switch(lockres->l_unlock_action) { 2960 switch(lockres->l_unlock_action) {
2962 case OCFS2_UNLOCK_CANCEL_CONVERT: 2961 case OCFS2_UNLOCK_CANCEL_CONVERT:
2963 mlog(0, "Cancel convert success for %s\n", lockres->l_name); 2962 mlog(0, "Cancel convert success for %s\n", lockres->l_name);
2964 lockres->l_action = OCFS2_AST_INVALID; 2963 lockres->l_action = OCFS2_AST_INVALID;
2965 /* Downconvert thread may have requeued this lock, we 2964 /* Downconvert thread may have requeued this lock, we
2966 * need to wake it. */ 2965 * need to wake it. */
2967 if (lockres->l_flags & OCFS2_LOCK_BLOCKED) 2966 if (lockres->l_flags & OCFS2_LOCK_BLOCKED)
2968 ocfs2_wake_downconvert_thread(ocfs2_get_lockres_osb(lockres)); 2967 ocfs2_wake_downconvert_thread(ocfs2_get_lockres_osb(lockres));
2969 break; 2968 break;
2970 case OCFS2_UNLOCK_DROP_LOCK: 2969 case OCFS2_UNLOCK_DROP_LOCK:
2971 lockres->l_level = DLM_LOCK_IV; 2970 lockres->l_level = DLM_LOCK_IV;
2972 break; 2971 break;
2973 default: 2972 default:
2974 BUG(); 2973 BUG();
2975 } 2974 }
2976 2975
2977 lockres_clear_flags(lockres, OCFS2_LOCK_BUSY); 2976 lockres_clear_flags(lockres, OCFS2_LOCK_BUSY);
2978 lockres->l_unlock_action = OCFS2_UNLOCK_INVALID; 2977 lockres->l_unlock_action = OCFS2_UNLOCK_INVALID;
2979 wake_up(&lockres->l_event); 2978 wake_up(&lockres->l_event);
2980 spin_unlock_irqrestore(&lockres->l_lock, flags); 2979 spin_unlock_irqrestore(&lockres->l_lock, flags);
2981 2980
2982 mlog_exit_void(); 2981 mlog_exit_void();
2983 } 2982 }
2984 2983
2985 static int ocfs2_drop_lock(struct ocfs2_super *osb, 2984 static int ocfs2_drop_lock(struct ocfs2_super *osb,
2986 struct ocfs2_lock_res *lockres) 2985 struct ocfs2_lock_res *lockres)
2987 { 2986 {
2988 int ret; 2987 int ret;
2989 unsigned long flags; 2988 unsigned long flags;
2990 u32 lkm_flags = 0; 2989 u32 lkm_flags = 0;
2991 2990
2992 /* We didn't get anywhere near actually using this lockres. */ 2991 /* We didn't get anywhere near actually using this lockres. */
2993 if (!(lockres->l_flags & OCFS2_LOCK_INITIALIZED)) 2992 if (!(lockres->l_flags & OCFS2_LOCK_INITIALIZED))
2994 goto out; 2993 goto out;
2995 2994
2996 if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB) 2995 if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB)
2997 lkm_flags |= DLM_LKF_VALBLK; 2996 lkm_flags |= DLM_LKF_VALBLK;
2998 2997
2999 spin_lock_irqsave(&lockres->l_lock, flags); 2998 spin_lock_irqsave(&lockres->l_lock, flags);
3000 2999
3001 mlog_bug_on_msg(!(lockres->l_flags & OCFS2_LOCK_FREEING), 3000 mlog_bug_on_msg(!(lockres->l_flags & OCFS2_LOCK_FREEING),
3002 "lockres %s, flags 0x%lx\n", 3001 "lockres %s, flags 0x%lx\n",
3003 lockres->l_name, lockres->l_flags); 3002 lockres->l_name, lockres->l_flags);
3004 3003
3005 while (lockres->l_flags & OCFS2_LOCK_BUSY) { 3004 while (lockres->l_flags & OCFS2_LOCK_BUSY) {
3006 mlog(0, "waiting on busy lock \"%s\": flags = %lx, action = " 3005 mlog(0, "waiting on busy lock \"%s\": flags = %lx, action = "
3007 "%u, unlock_action = %u\n", 3006 "%u, unlock_action = %u\n",
3008 lockres->l_name, lockres->l_flags, lockres->l_action, 3007 lockres->l_name, lockres->l_flags, lockres->l_action,
3009 lockres->l_unlock_action); 3008 lockres->l_unlock_action);
3010 3009
3011 spin_unlock_irqrestore(&lockres->l_lock, flags); 3010 spin_unlock_irqrestore(&lockres->l_lock, flags);
3012 3011
3013 /* XXX: Today we just wait on any busy 3012 /* XXX: Today we just wait on any busy
3014 * locks... Perhaps we need to cancel converts in the 3013 * locks... Perhaps we need to cancel converts in the
3015 * future? */ 3014 * future? */
3016 ocfs2_wait_on_busy_lock(lockres); 3015 ocfs2_wait_on_busy_lock(lockres);
3017 3016
3018 spin_lock_irqsave(&lockres->l_lock, flags); 3017 spin_lock_irqsave(&lockres->l_lock, flags);
3019 } 3018 }
3020 3019
3021 if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB) { 3020 if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB) {
3022 if (lockres->l_flags & OCFS2_LOCK_ATTACHED && 3021 if (lockres->l_flags & OCFS2_LOCK_ATTACHED &&
3023 lockres->l_level == DLM_LOCK_EX && 3022 lockres->l_level == DLM_LOCK_EX &&
3024 !(lockres->l_flags & OCFS2_LOCK_NEEDS_REFRESH)) 3023 !(lockres->l_flags & OCFS2_LOCK_NEEDS_REFRESH))
3025 lockres->l_ops->set_lvb(lockres); 3024 lockres->l_ops->set_lvb(lockres);
3026 } 3025 }
3027 3026
3028 if (lockres->l_flags & OCFS2_LOCK_BUSY) 3027 if (lockres->l_flags & OCFS2_LOCK_BUSY)
3029 mlog(ML_ERROR, "destroying busy lock: \"%s\"\n", 3028 mlog(ML_ERROR, "destroying busy lock: \"%s\"\n",
3030 lockres->l_name); 3029 lockres->l_name);
3031 if (lockres->l_flags & OCFS2_LOCK_BLOCKED) 3030 if (lockres->l_flags & OCFS2_LOCK_BLOCKED)
3032 mlog(0, "destroying blocked lock: \"%s\"\n", lockres->l_name); 3031 mlog(0, "destroying blocked lock: \"%s\"\n", lockres->l_name);
3033 3032
3034 if (!(lockres->l_flags & OCFS2_LOCK_ATTACHED)) { 3033 if (!(lockres->l_flags & OCFS2_LOCK_ATTACHED)) {
3035 spin_unlock_irqrestore(&lockres->l_lock, flags); 3034 spin_unlock_irqrestore(&lockres->l_lock, flags);
3036 goto out; 3035 goto out;
3037 } 3036 }
3038 3037
3039 lockres_clear_flags(lockres, OCFS2_LOCK_ATTACHED); 3038 lockres_clear_flags(lockres, OCFS2_LOCK_ATTACHED);
3040 3039
3041 /* make sure we never get here while waiting for an ast to 3040 /* make sure we never get here while waiting for an ast to
3042 * fire. */ 3041 * fire. */
3043 BUG_ON(lockres->l_action != OCFS2_AST_INVALID); 3042 BUG_ON(lockres->l_action != OCFS2_AST_INVALID);
3044 3043
3045 /* is this necessary? */ 3044 /* is this necessary? */
3046 lockres_or_flags(lockres, OCFS2_LOCK_BUSY); 3045 lockres_or_flags(lockres, OCFS2_LOCK_BUSY);
3047 lockres->l_unlock_action = OCFS2_UNLOCK_DROP_LOCK; 3046 lockres->l_unlock_action = OCFS2_UNLOCK_DROP_LOCK;
3048 spin_unlock_irqrestore(&lockres->l_lock, flags); 3047 spin_unlock_irqrestore(&lockres->l_lock, flags);
3049 3048
3050 mlog(0, "lock %s\n", lockres->l_name); 3049 mlog(0, "lock %s\n", lockres->l_name);
3051 3050
3052 ret = ocfs2_dlm_unlock(osb->cconn, &lockres->l_lksb, lkm_flags, 3051 ret = ocfs2_dlm_unlock(osb->cconn, &lockres->l_lksb, lkm_flags,
3053 lockres); 3052 lockres);
3054 if (ret) { 3053 if (ret) {
3055 ocfs2_log_dlm_error("ocfs2_dlm_unlock", ret, lockres); 3054 ocfs2_log_dlm_error("ocfs2_dlm_unlock", ret, lockres);
3056 mlog(ML_ERROR, "lockres flags: %lu\n", lockres->l_flags); 3055 mlog(ML_ERROR, "lockres flags: %lu\n", lockres->l_flags);
3057 ocfs2_dlm_dump_lksb(&lockres->l_lksb); 3056 ocfs2_dlm_dump_lksb(&lockres->l_lksb);
3058 BUG(); 3057 BUG();
3059 } 3058 }
3060 mlog(0, "lock %s, successful return from ocfs2_dlm_unlock\n", 3059 mlog(0, "lock %s, successful return from ocfs2_dlm_unlock\n",
3061 lockres->l_name); 3060 lockres->l_name);
3062 3061
3063 ocfs2_wait_on_busy_lock(lockres); 3062 ocfs2_wait_on_busy_lock(lockres);
3064 out: 3063 out:
3065 mlog_exit(0); 3064 mlog_exit(0);
3066 return 0; 3065 return 0;
3067 } 3066 }
3068 3067
3069 /* Mark the lockres as being dropped. It will no longer be 3068 /* Mark the lockres as being dropped. It will no longer be
3070 * queued if blocking, but we still may have to wait on it 3069 * queued if blocking, but we still may have to wait on it
3071 * being dequeued from the downconvert thread before we can consider 3070 * being dequeued from the downconvert thread before we can consider
3072 * it safe to drop. 3071 * it safe to drop.
3073 * 3072 *
3074 * You can *not* attempt to call cluster_lock on this lockres anymore. */ 3073 * You can *not* attempt to call cluster_lock on this lockres anymore. */
3075 void ocfs2_mark_lockres_freeing(struct ocfs2_lock_res *lockres) 3074 void ocfs2_mark_lockres_freeing(struct ocfs2_lock_res *lockres)
3076 { 3075 {
3077 int status; 3076 int status;
3078 struct ocfs2_mask_waiter mw; 3077 struct ocfs2_mask_waiter mw;
3079 unsigned long flags; 3078 unsigned long flags;
3080 3079
3081 ocfs2_init_mask_waiter(&mw); 3080 ocfs2_init_mask_waiter(&mw);
3082 3081
3083 spin_lock_irqsave(&lockres->l_lock, flags); 3082 spin_lock_irqsave(&lockres->l_lock, flags);
3084 lockres->l_flags |= OCFS2_LOCK_FREEING; 3083 lockres->l_flags |= OCFS2_LOCK_FREEING;
3085 while (lockres->l_flags & OCFS2_LOCK_QUEUED) { 3084 while (lockres->l_flags & OCFS2_LOCK_QUEUED) {
3086 lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_QUEUED, 0); 3085 lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_QUEUED, 0);
3087 spin_unlock_irqrestore(&lockres->l_lock, flags); 3086 spin_unlock_irqrestore(&lockres->l_lock, flags);
3088 3087
3089 mlog(0, "Waiting on lockres %s\n", lockres->l_name); 3088 mlog(0, "Waiting on lockres %s\n", lockres->l_name);
3090 3089
3091 status = ocfs2_wait_for_mask(&mw); 3090 status = ocfs2_wait_for_mask(&mw);
3092 if (status) 3091 if (status)
3093 mlog_errno(status); 3092 mlog_errno(status);
3094 3093
3095 spin_lock_irqsave(&lockres->l_lock, flags); 3094 spin_lock_irqsave(&lockres->l_lock, flags);
3096 } 3095 }
3097 spin_unlock_irqrestore(&lockres->l_lock, flags); 3096 spin_unlock_irqrestore(&lockres->l_lock, flags);
3098 } 3097 }
3099 3098
3100 void ocfs2_simple_drop_lockres(struct ocfs2_super *osb, 3099 void ocfs2_simple_drop_lockres(struct ocfs2_super *osb,
3101 struct ocfs2_lock_res *lockres) 3100 struct ocfs2_lock_res *lockres)
3102 { 3101 {
3103 int ret; 3102 int ret;
3104 3103
3105 ocfs2_mark_lockres_freeing(lockres); 3104 ocfs2_mark_lockres_freeing(lockres);
3106 ret = ocfs2_drop_lock(osb, lockres); 3105 ret = ocfs2_drop_lock(osb, lockres);
3107 if (ret) 3106 if (ret)
3108 mlog_errno(ret); 3107 mlog_errno(ret);
3109 } 3108 }
3110 3109
3111 static void ocfs2_drop_osb_locks(struct ocfs2_super *osb) 3110 static void ocfs2_drop_osb_locks(struct ocfs2_super *osb)
3112 { 3111 {
3113 ocfs2_simple_drop_lockres(osb, &osb->osb_super_lockres); 3112 ocfs2_simple_drop_lockres(osb, &osb->osb_super_lockres);
3114 ocfs2_simple_drop_lockres(osb, &osb->osb_rename_lockres); 3113 ocfs2_simple_drop_lockres(osb, &osb->osb_rename_lockres);
3115 ocfs2_simple_drop_lockres(osb, &osb->osb_nfs_sync_lockres); 3114 ocfs2_simple_drop_lockres(osb, &osb->osb_nfs_sync_lockres);
3116 ocfs2_simple_drop_lockres(osb, &osb->osb_orphan_scan.os_lockres); 3115 ocfs2_simple_drop_lockres(osb, &osb->osb_orphan_scan.os_lockres);
3117 } 3116 }
3118 3117
3119 int ocfs2_drop_inode_locks(struct inode *inode) 3118 int ocfs2_drop_inode_locks(struct inode *inode)
3120 { 3119 {
3121 int status, err; 3120 int status, err;
3122 3121
3123 mlog_entry_void(); 3122 mlog_entry_void();
3124 3123
3125 /* No need to call ocfs2_mark_lockres_freeing here - 3124 /* No need to call ocfs2_mark_lockres_freeing here -
3126 * ocfs2_clear_inode has done it for us. */ 3125 * ocfs2_clear_inode has done it for us. */
3127 3126
3128 err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb), 3127 err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb),
3129 &OCFS2_I(inode)->ip_open_lockres); 3128 &OCFS2_I(inode)->ip_open_lockres);
3130 if (err < 0) 3129 if (err < 0)
3131 mlog_errno(err); 3130 mlog_errno(err);
3132 3131
3133 status = err; 3132 status = err;
3134 3133
3135 err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb), 3134 err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb),
3136 &OCFS2_I(inode)->ip_inode_lockres); 3135 &OCFS2_I(inode)->ip_inode_lockres);
3137 if (err < 0) 3136 if (err < 0)
3138 mlog_errno(err); 3137 mlog_errno(err);
3139 if (err < 0 && !status) 3138 if (err < 0 && !status)
3140 status = err; 3139 status = err;
3141 3140
3142 err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb), 3141 err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb),
3143 &OCFS2_I(inode)->ip_rw_lockres); 3142 &OCFS2_I(inode)->ip_rw_lockres);
3144 if (err < 0) 3143 if (err < 0)
3145 mlog_errno(err); 3144 mlog_errno(err);
3146 if (err < 0 && !status) 3145 if (err < 0 && !status)
3147 status = err; 3146 status = err;
3148 3147
3149 mlog_exit(status); 3148 mlog_exit(status);
3150 return status; 3149 return status;
3151 } 3150 }
3152 3151
3153 static unsigned int ocfs2_prepare_downconvert(struct ocfs2_lock_res *lockres, 3152 static unsigned int ocfs2_prepare_downconvert(struct ocfs2_lock_res *lockres,
3154 int new_level) 3153 int new_level)
3155 { 3154 {
3156 assert_spin_locked(&lockres->l_lock); 3155 assert_spin_locked(&lockres->l_lock);
3157 3156
3158 BUG_ON(lockres->l_blocking <= DLM_LOCK_NL); 3157 BUG_ON(lockres->l_blocking <= DLM_LOCK_NL);
3159 3158
3160 if (lockres->l_level <= new_level) { 3159 if (lockres->l_level <= new_level) {
3161 mlog(ML_ERROR, "lockres->l_level (%d) <= new_level (%d)\n", 3160 mlog(ML_ERROR, "lockres->l_level (%d) <= new_level (%d)\n",
3162 lockres->l_level, new_level); 3161 lockres->l_level, new_level);
3163 BUG(); 3162 BUG();
3164 } 3163 }
3165 3164
3166 mlog(0, "lock %s, new_level = %d, l_blocking = %d\n", 3165 mlog(0, "lock %s, new_level = %d, l_blocking = %d\n",
3167 lockres->l_name, new_level, lockres->l_blocking); 3166 lockres->l_name, new_level, lockres->l_blocking);
3168 3167
3169 lockres->l_action = OCFS2_AST_DOWNCONVERT; 3168 lockres->l_action = OCFS2_AST_DOWNCONVERT;
3170 lockres->l_requested = new_level; 3169 lockres->l_requested = new_level;
3171 lockres_or_flags(lockres, OCFS2_LOCK_BUSY); 3170 lockres_or_flags(lockres, OCFS2_LOCK_BUSY);
3172 return lockres_set_pending(lockres); 3171 return lockres_set_pending(lockres);
3173 } 3172 }
3174 3173
3175 static int ocfs2_downconvert_lock(struct ocfs2_super *osb, 3174 static int ocfs2_downconvert_lock(struct ocfs2_super *osb,
3176 struct ocfs2_lock_res *lockres, 3175 struct ocfs2_lock_res *lockres,
3177 int new_level, 3176 int new_level,
3178 int lvb, 3177 int lvb,
3179 unsigned int generation) 3178 unsigned int generation)
3180 { 3179 {
3181 int ret; 3180 int ret;
3182 u32 dlm_flags = DLM_LKF_CONVERT; 3181 u32 dlm_flags = DLM_LKF_CONVERT;
3183 3182
3184 mlog_entry_void(); 3183 mlog_entry_void();
3185 3184
3186 if (lvb) 3185 if (lvb)
3187 dlm_flags |= DLM_LKF_VALBLK; 3186 dlm_flags |= DLM_LKF_VALBLK;
3188 3187
3189 ret = ocfs2_dlm_lock(osb->cconn, 3188 ret = ocfs2_dlm_lock(osb->cconn,
3190 new_level, 3189 new_level,
3191 &lockres->l_lksb, 3190 &lockres->l_lksb,
3192 dlm_flags, 3191 dlm_flags,
3193 lockres->l_name, 3192 lockres->l_name,
3194 OCFS2_LOCK_ID_MAX_LEN - 1, 3193 OCFS2_LOCK_ID_MAX_LEN - 1,
3195 lockres); 3194 lockres);
3196 lockres_clear_pending(lockres, generation, osb); 3195 lockres_clear_pending(lockres, generation, osb);
3197 if (ret) { 3196 if (ret) {
3198 ocfs2_log_dlm_error("ocfs2_dlm_lock", ret, lockres); 3197 ocfs2_log_dlm_error("ocfs2_dlm_lock", ret, lockres);
3199 ocfs2_recover_from_dlm_error(lockres, 1); 3198 ocfs2_recover_from_dlm_error(lockres, 1);
3200 goto bail; 3199 goto bail;
3201 } 3200 }
3202 3201
3203 ret = 0; 3202 ret = 0;
3204 bail: 3203 bail:
3205 mlog_exit(ret); 3204 mlog_exit(ret);
3206 return ret; 3205 return ret;
3207 } 3206 }
3208 3207
3209 /* returns 1 when the caller should unlock and call ocfs2_dlm_unlock */ 3208 /* returns 1 when the caller should unlock and call ocfs2_dlm_unlock */
3210 static int ocfs2_prepare_cancel_convert(struct ocfs2_super *osb, 3209 static int ocfs2_prepare_cancel_convert(struct ocfs2_super *osb,
3211 struct ocfs2_lock_res *lockres) 3210 struct ocfs2_lock_res *lockres)
3212 { 3211 {
3213 assert_spin_locked(&lockres->l_lock); 3212 assert_spin_locked(&lockres->l_lock);
3214 3213
3215 mlog_entry_void(); 3214 mlog_entry_void();
3216 mlog(0, "lock %s\n", lockres->l_name); 3215 mlog(0, "lock %s\n", lockres->l_name);
3217 3216
3218 if (lockres->l_unlock_action == OCFS2_UNLOCK_CANCEL_CONVERT) { 3217 if (lockres->l_unlock_action == OCFS2_UNLOCK_CANCEL_CONVERT) {
3219 /* If we're already trying to cancel a lock conversion 3218 /* If we're already trying to cancel a lock conversion
3220 * then just drop the spinlock and allow the caller to 3219 * then just drop the spinlock and allow the caller to
3221 * requeue this lock. */ 3220 * requeue this lock. */
3222 3221
3223 mlog(0, "Lockres %s, skip convert\n", lockres->l_name); 3222 mlog(0, "Lockres %s, skip convert\n", lockres->l_name);
3224 return 0; 3223 return 0;
3225 } 3224 }
3226 3225
3227 /* were we in a convert when we got the bast fire? */ 3226 /* were we in a convert when we got the bast fire? */
3228 BUG_ON(lockres->l_action != OCFS2_AST_CONVERT && 3227 BUG_ON(lockres->l_action != OCFS2_AST_CONVERT &&
3229 lockres->l_action != OCFS2_AST_DOWNCONVERT); 3228 lockres->l_action != OCFS2_AST_DOWNCONVERT);
3230 /* set things up for the unlockast to know to just 3229 /* set things up for the unlockast to know to just
3231 * clear out the ast_action and unset busy, etc. */ 3230 * clear out the ast_action and unset busy, etc. */
3232 lockres->l_unlock_action = OCFS2_UNLOCK_CANCEL_CONVERT; 3231 lockres->l_unlock_action = OCFS2_UNLOCK_CANCEL_CONVERT;
3233 3232
3234 mlog_bug_on_msg(!(lockres->l_flags & OCFS2_LOCK_BUSY), 3233 mlog_bug_on_msg(!(lockres->l_flags & OCFS2_LOCK_BUSY),
3235 "lock %s, invalid flags: 0x%lx\n", 3234 "lock %s, invalid flags: 0x%lx\n",
3236 lockres->l_name, lockres->l_flags); 3235 lockres->l_name, lockres->l_flags);
3237 3236
3238 return 1; 3237 return 1;
3239 } 3238 }
3240 3239
3241 static int ocfs2_cancel_convert(struct ocfs2_super *osb, 3240 static int ocfs2_cancel_convert(struct ocfs2_super *osb,
3242 struct ocfs2_lock_res *lockres) 3241 struct ocfs2_lock_res *lockres)
3243 { 3242 {
3244 int ret; 3243 int ret;
3245 3244
3246 mlog_entry_void(); 3245 mlog_entry_void();
3247 mlog(0, "lock %s\n", lockres->l_name); 3246 mlog(0, "lock %s\n", lockres->l_name);
3248 3247
3249 ret = ocfs2_dlm_unlock(osb->cconn, &lockres->l_lksb, 3248 ret = ocfs2_dlm_unlock(osb->cconn, &lockres->l_lksb,
3250 DLM_LKF_CANCEL, lockres); 3249 DLM_LKF_CANCEL, lockres);
3251 if (ret) { 3250 if (ret) {
3252 ocfs2_log_dlm_error("ocfs2_dlm_unlock", ret, lockres); 3251 ocfs2_log_dlm_error("ocfs2_dlm_unlock", ret, lockres);
3253 ocfs2_recover_from_dlm_error(lockres, 0); 3252 ocfs2_recover_from_dlm_error(lockres, 0);
3254 } 3253 }
3255 3254
3256 mlog(0, "lock %s return from ocfs2_dlm_unlock\n", lockres->l_name); 3255 mlog(0, "lock %s return from ocfs2_dlm_unlock\n", lockres->l_name);
3257 3256
3258 mlog_exit(ret); 3257 mlog_exit(ret);
3259 return ret; 3258 return ret;
3260 } 3259 }
3261 3260
3262 static int ocfs2_unblock_lock(struct ocfs2_super *osb, 3261 static int ocfs2_unblock_lock(struct ocfs2_super *osb,
3263 struct ocfs2_lock_res *lockres, 3262 struct ocfs2_lock_res *lockres,
3264 struct ocfs2_unblock_ctl *ctl) 3263 struct ocfs2_unblock_ctl *ctl)
3265 { 3264 {
3266 unsigned long flags; 3265 unsigned long flags;
3267 int blocking; 3266 int blocking;
3268 int new_level; 3267 int new_level;
3269 int ret = 0; 3268 int ret = 0;
3270 int set_lvb = 0; 3269 int set_lvb = 0;
3271 unsigned int gen; 3270 unsigned int gen;
3272 3271
3273 mlog_entry_void(); 3272 mlog_entry_void();
3274 3273
3275 spin_lock_irqsave(&lockres->l_lock, flags); 3274 spin_lock_irqsave(&lockres->l_lock, flags);
3276 3275
3277 BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BLOCKED)); 3276 BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BLOCKED));
3278 3277
3279 recheck: 3278 recheck:
3280 if (lockres->l_flags & OCFS2_LOCK_BUSY) { 3279 if (lockres->l_flags & OCFS2_LOCK_BUSY) {
3281 /* XXX 3280 /* XXX
3282 * This is a *big* race. The OCFS2_LOCK_PENDING flag 3281 * This is a *big* race. The OCFS2_LOCK_PENDING flag
3283 * exists entirely for one reason - another thread has set 3282 * exists entirely for one reason - another thread has set
3284 * OCFS2_LOCK_BUSY, but has *NOT* yet called dlm_lock(). 3283 * OCFS2_LOCK_BUSY, but has *NOT* yet called dlm_lock().
3285 * 3284 *
3286 * If we do ocfs2_cancel_convert() before the other thread 3285 * If we do ocfs2_cancel_convert() before the other thread
3287 * calls dlm_lock(), our cancel will do nothing. We will 3286 * calls dlm_lock(), our cancel will do nothing. We will
3288 * get no ast, and we will have no way of knowing the 3287 * get no ast, and we will have no way of knowing the
3289 * cancel failed. Meanwhile, the other thread will call 3288 * cancel failed. Meanwhile, the other thread will call
3290 * into dlm_lock() and wait...forever. 3289 * into dlm_lock() and wait...forever.
3291 * 3290 *
3292 * Why forever? Because another node has asked for the 3291 * Why forever? Because another node has asked for the
3293 * lock first; that's why we're here in unblock_lock(). 3292 * lock first; that's why we're here in unblock_lock().
3294 * 3293 *
3295 * The solution is OCFS2_LOCK_PENDING. When PENDING is 3294 * The solution is OCFS2_LOCK_PENDING. When PENDING is
3296 * set, we just requeue the unblock. Only when the other 3295 * set, we just requeue the unblock. Only when the other
3297 * thread has called dlm_lock() and cleared PENDING will 3296 * thread has called dlm_lock() and cleared PENDING will
3298 * we then cancel their request. 3297 * we then cancel their request.
3299 * 3298 *
3300 * All callers of dlm_lock() must set OCFS2_DLM_PENDING 3299 * All callers of dlm_lock() must set OCFS2_DLM_PENDING
3301 * at the same time they set OCFS2_DLM_BUSY. They must 3300 * at the same time they set OCFS2_DLM_BUSY. They must
3302 * clear OCFS2_DLM_PENDING after dlm_lock() returns. 3301 * clear OCFS2_DLM_PENDING after dlm_lock() returns.
3303 */ 3302 */
3304 if (lockres->l_flags & OCFS2_LOCK_PENDING) 3303 if (lockres->l_flags & OCFS2_LOCK_PENDING)
3305 goto leave_requeue; 3304 goto leave_requeue;
3306 3305
3307 ctl->requeue = 1; 3306 ctl->requeue = 1;
3308 ret = ocfs2_prepare_cancel_convert(osb, lockres); 3307 ret = ocfs2_prepare_cancel_convert(osb, lockres);
3309 spin_unlock_irqrestore(&lockres->l_lock, flags); 3308 spin_unlock_irqrestore(&lockres->l_lock, flags);
3310 if (ret) { 3309 if (ret) {
3311 ret = ocfs2_cancel_convert(osb, lockres); 3310 ret = ocfs2_cancel_convert(osb, lockres);
3312 if (ret < 0) 3311 if (ret < 0)
3313 mlog_errno(ret); 3312 mlog_errno(ret);
3314 } 3313 }
3315 goto leave; 3314 goto leave;
3316 } 3315 }
3317 3316
3318 /* if we're blocking an exclusive and we have *any* holders, 3317 /* if we're blocking an exclusive and we have *any* holders,
3319 * then requeue. */ 3318 * then requeue. */
3320 if ((lockres->l_blocking == DLM_LOCK_EX) 3319 if ((lockres->l_blocking == DLM_LOCK_EX)
3321 && (lockres->l_ex_holders || lockres->l_ro_holders)) 3320 && (lockres->l_ex_holders || lockres->l_ro_holders))
3322 goto leave_requeue; 3321 goto leave_requeue;
3323 3322
3324 /* If it's a PR we're blocking, then only 3323 /* If it's a PR we're blocking, then only
3325 * requeue if we've got any EX holders */ 3324 * requeue if we've got any EX holders */
3326 if (lockres->l_blocking == DLM_LOCK_PR && 3325 if (lockres->l_blocking == DLM_LOCK_PR &&
3327 lockres->l_ex_holders) 3326 lockres->l_ex_holders)
3328 goto leave_requeue; 3327 goto leave_requeue;
3329 3328
3330 /* 3329 /*
3331 * Can we get a lock in this state if the holder counts are 3330 * Can we get a lock in this state if the holder counts are
3332 * zero? The meta data unblock code used to check this. 3331 * zero? The meta data unblock code used to check this.
3333 */ 3332 */
3334 if ((lockres->l_ops->flags & LOCK_TYPE_REQUIRES_REFRESH) 3333 if ((lockres->l_ops->flags & LOCK_TYPE_REQUIRES_REFRESH)
3335 && (lockres->l_flags & OCFS2_LOCK_REFRESHING)) 3334 && (lockres->l_flags & OCFS2_LOCK_REFRESHING))
3336 goto leave_requeue; 3335 goto leave_requeue;
3337 3336
3338 new_level = ocfs2_highest_compat_lock_level(lockres->l_blocking); 3337 new_level = ocfs2_highest_compat_lock_level(lockres->l_blocking);
3339 3338
3340 if (lockres->l_ops->check_downconvert 3339 if (lockres->l_ops->check_downconvert
3341 && !lockres->l_ops->check_downconvert(lockres, new_level)) 3340 && !lockres->l_ops->check_downconvert(lockres, new_level))
3342 goto leave_requeue; 3341 goto leave_requeue;
3343 3342
3344 /* If we get here, then we know that there are no more 3343 /* If we get here, then we know that there are no more
3345 * incompatible holders (and anyone asking for an incompatible 3344 * incompatible holders (and anyone asking for an incompatible
3346 * lock is blocked). We can now downconvert the lock */ 3345 * lock is blocked). We can now downconvert the lock */
3347 if (!lockres->l_ops->downconvert_worker) 3346 if (!lockres->l_ops->downconvert_worker)
3348 goto downconvert; 3347 goto downconvert;
3349 3348
3350 /* Some lockres types want to do a bit of work before 3349 /* Some lockres types want to do a bit of work before
3351 * downconverting a lock. Allow that here. The worker function 3350 * downconverting a lock. Allow that here. The worker function
3352 * may sleep, so we save off a copy of what we're blocking as 3351 * may sleep, so we save off a copy of what we're blocking as
3353 * it may change while we're not holding the spin lock. */ 3352 * it may change while we're not holding the spin lock. */
3354 blocking = lockres->l_blocking; 3353 blocking = lockres->l_blocking;
3355 spin_unlock_irqrestore(&lockres->l_lock, flags); 3354 spin_unlock_irqrestore(&lockres->l_lock, flags);
3356 3355
3357 ctl->unblock_action = lockres->l_ops->downconvert_worker(lockres, blocking); 3356 ctl->unblock_action = lockres->l_ops->downconvert_worker(lockres, blocking);
3358 3357
3359 if (ctl->unblock_action == UNBLOCK_STOP_POST) 3358 if (ctl->unblock_action == UNBLOCK_STOP_POST)
3360 goto leave; 3359 goto leave;
3361 3360
3362 spin_lock_irqsave(&lockres->l_lock, flags); 3361 spin_lock_irqsave(&lockres->l_lock, flags);
3363 if (blocking != lockres->l_blocking) { 3362 if (blocking != lockres->l_blocking) {
3364 /* If this changed underneath us, then we can't drop 3363 /* If this changed underneath us, then we can't drop
3365 * it just yet. */ 3364 * it just yet. */
3366 goto recheck; 3365 goto recheck;
3367 } 3366 }
3368 3367
3369 downconvert: 3368 downconvert:
3370 ctl->requeue = 0; 3369 ctl->requeue = 0;
3371 3370
3372 if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB) { 3371 if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB) {
3373 if (lockres->l_level == DLM_LOCK_EX) 3372 if (lockres->l_level == DLM_LOCK_EX)
3374 set_lvb = 1; 3373 set_lvb = 1;
3375 3374
3376 /* 3375 /*
3377 * We only set the lvb if the lock has been fully 3376 * We only set the lvb if the lock has been fully
3378 * refreshed - otherwise we risk setting stale 3377 * refreshed - otherwise we risk setting stale
3379 * data. Otherwise, there's no need to actually clear 3378 * data. Otherwise, there's no need to actually clear
3380 * out the lvb here as it's value is still valid. 3379 * out the lvb here as it's value is still valid.
3381 */ 3380 */
3382 if (set_lvb && !(lockres->l_flags & OCFS2_LOCK_NEEDS_REFRESH)) 3381 if (set_lvb && !(lockres->l_flags & OCFS2_LOCK_NEEDS_REFRESH))
3383 lockres->l_ops->set_lvb(lockres); 3382 lockres->l_ops->set_lvb(lockres);
3384 } 3383 }
3385 3384
3386 gen = ocfs2_prepare_downconvert(lockres, new_level); 3385 gen = ocfs2_prepare_downconvert(lockres, new_level);
3387 spin_unlock_irqrestore(&lockres->l_lock, flags); 3386 spin_unlock_irqrestore(&lockres->l_lock, flags);
3388 ret = ocfs2_downconvert_lock(osb, lockres, new_level, set_lvb, 3387 ret = ocfs2_downconvert_lock(osb, lockres, new_level, set_lvb,
3389 gen); 3388 gen);
3390 3389
3391 leave: 3390 leave:
3392 mlog_exit(ret); 3391 mlog_exit(ret);
3393 return ret; 3392 return ret;
3394 3393
3395 leave_requeue: 3394 leave_requeue:
3396 spin_unlock_irqrestore(&lockres->l_lock, flags); 3395 spin_unlock_irqrestore(&lockres->l_lock, flags);
3397 ctl->requeue = 1; 3396 ctl->requeue = 1;
3398 3397
3399 mlog_exit(0); 3398 mlog_exit(0);
3400 return 0; 3399 return 0;
3401 } 3400 }
3402 3401
3403 static int ocfs2_data_convert_worker(struct ocfs2_lock_res *lockres, 3402 static int ocfs2_data_convert_worker(struct ocfs2_lock_res *lockres,
3404 int blocking) 3403 int blocking)
3405 { 3404 {
3406 struct inode *inode; 3405 struct inode *inode;
3407 struct address_space *mapping; 3406 struct address_space *mapping;
3408 3407
3409 inode = ocfs2_lock_res_inode(lockres); 3408 inode = ocfs2_lock_res_inode(lockres);
3410 mapping = inode->i_mapping; 3409 mapping = inode->i_mapping;
3411 3410
3412 if (!S_ISREG(inode->i_mode)) 3411 if (!S_ISREG(inode->i_mode))
3413 goto out; 3412 goto out;
3414 3413
3415 /* 3414 /*
3416 * We need this before the filemap_fdatawrite() so that it can 3415 * We need this before the filemap_fdatawrite() so that it can
3417 * transfer the dirty bit from the PTE to the 3416 * transfer the dirty bit from the PTE to the
3418 * page. Unfortunately this means that even for EX->PR 3417 * page. Unfortunately this means that even for EX->PR
3419 * downconverts, we'll lose our mappings and have to build 3418 * downconverts, we'll lose our mappings and have to build
3420 * them up again. 3419 * them up again.
3421 */ 3420 */
3422 unmap_mapping_range(mapping, 0, 0, 0); 3421 unmap_mapping_range(mapping, 0, 0, 0);
3423 3422
3424 if (filemap_fdatawrite(mapping)) { 3423 if (filemap_fdatawrite(mapping)) {
3425 mlog(ML_ERROR, "Could not sync inode %llu for downconvert!", 3424 mlog(ML_ERROR, "Could not sync inode %llu for downconvert!",
3426 (unsigned long long)OCFS2_I(inode)->ip_blkno); 3425 (unsigned long long)OCFS2_I(inode)->ip_blkno);
3427 } 3426 }
3428 sync_mapping_buffers(mapping); 3427 sync_mapping_buffers(mapping);
3429 if (blocking == DLM_LOCK_EX) { 3428 if (blocking == DLM_LOCK_EX) {
3430 truncate_inode_pages(mapping, 0); 3429 truncate_inode_pages(mapping, 0);
3431 } else { 3430 } else {
3432 /* We only need to wait on the I/O if we're not also 3431 /* We only need to wait on the I/O if we're not also
3433 * truncating pages because truncate_inode_pages waits 3432 * truncating pages because truncate_inode_pages waits
3434 * for us above. We don't truncate pages if we're 3433 * for us above. We don't truncate pages if we're
3435 * blocking anything < EXMODE because we want to keep 3434 * blocking anything < EXMODE because we want to keep
3436 * them around in that case. */ 3435 * them around in that case. */
3437 filemap_fdatawait(mapping); 3436 filemap_fdatawait(mapping);
3438 } 3437 }
3439 3438
3440 out: 3439 out:
3441 return UNBLOCK_CONTINUE; 3440 return UNBLOCK_CONTINUE;
3442 } 3441 }
3443 3442
3444 static int ocfs2_check_meta_downconvert(struct ocfs2_lock_res *lockres, 3443 static int ocfs2_check_meta_downconvert(struct ocfs2_lock_res *lockres,
3445 int new_level) 3444 int new_level)
3446 { 3445 {
3447 struct inode *inode = ocfs2_lock_res_inode(lockres); 3446 struct inode *inode = ocfs2_lock_res_inode(lockres);
3448 int checkpointed = ocfs2_inode_fully_checkpointed(inode); 3447 int checkpointed = ocfs2_inode_fully_checkpointed(inode);
3449 3448
3450 BUG_ON(new_level != DLM_LOCK_NL && new_level != DLM_LOCK_PR); 3449 BUG_ON(new_level != DLM_LOCK_NL && new_level != DLM_LOCK_PR);
3451 BUG_ON(lockres->l_level != DLM_LOCK_EX && !checkpointed); 3450 BUG_ON(lockres->l_level != DLM_LOCK_EX && !checkpointed);
3452 3451
3453 if (checkpointed) 3452 if (checkpointed)
3454 return 1; 3453 return 1;
3455 3454
3456 ocfs2_start_checkpoint(OCFS2_SB(inode->i_sb)); 3455 ocfs2_start_checkpoint(OCFS2_SB(inode->i_sb));
3457 return 0; 3456 return 0;
3458 } 3457 }
3459 3458
3460 static void ocfs2_set_meta_lvb(struct ocfs2_lock_res *lockres) 3459 static void ocfs2_set_meta_lvb(struct ocfs2_lock_res *lockres)
3461 { 3460 {
3462 struct inode *inode = ocfs2_lock_res_inode(lockres); 3461 struct inode *inode = ocfs2_lock_res_inode(lockres);
3463 3462
3464 __ocfs2_stuff_meta_lvb(inode); 3463 __ocfs2_stuff_meta_lvb(inode);
3465 } 3464 }
3466 3465
3467 /* 3466 /*
3468 * Does the final reference drop on our dentry lock. Right now this 3467 * Does the final reference drop on our dentry lock. Right now this
3469 * happens in the downconvert thread, but we could choose to simplify the 3468 * happens in the downconvert thread, but we could choose to simplify the
3470 * dlmglue API and push these off to the ocfs2_wq in the future. 3469 * dlmglue API and push these off to the ocfs2_wq in the future.
3471 */ 3470 */
3472 static void ocfs2_dentry_post_unlock(struct ocfs2_super *osb, 3471 static void ocfs2_dentry_post_unlock(struct ocfs2_super *osb,
3473 struct ocfs2_lock_res *lockres) 3472 struct ocfs2_lock_res *lockres)
3474 { 3473 {
3475 struct ocfs2_dentry_lock *dl = ocfs2_lock_res_dl(lockres); 3474 struct ocfs2_dentry_lock *dl = ocfs2_lock_res_dl(lockres);
3476 ocfs2_dentry_lock_put(osb, dl); 3475 ocfs2_dentry_lock_put(osb, dl);
3477 } 3476 }
3478 3477
3479 /* 3478 /*
3480 * d_delete() matching dentries before the lock downconvert. 3479 * d_delete() matching dentries before the lock downconvert.
3481 * 3480 *
3482 * At this point, any process waiting to destroy the 3481 * At this point, any process waiting to destroy the
3483 * dentry_lock due to last ref count is stopped by the 3482 * dentry_lock due to last ref count is stopped by the
3484 * OCFS2_LOCK_QUEUED flag. 3483 * OCFS2_LOCK_QUEUED flag.
3485 * 3484 *
3486 * We have two potential problems 3485 * We have two potential problems
3487 * 3486 *
3488 * 1) If we do the last reference drop on our dentry_lock (via dput) 3487 * 1) If we do the last reference drop on our dentry_lock (via dput)
3489 * we'll wind up in ocfs2_release_dentry_lock(), waiting on 3488 * we'll wind up in ocfs2_release_dentry_lock(), waiting on
3490 * the downconvert to finish. Instead we take an elevated 3489 * the downconvert to finish. Instead we take an elevated
3491 * reference and push the drop until after we've completed our 3490 * reference and push the drop until after we've completed our
3492 * unblock processing. 3491 * unblock processing.
3493 * 3492 *
3494 * 2) There might be another process with a final reference, 3493 * 2) There might be another process with a final reference,
3495 * waiting on us to finish processing. If this is the case, we 3494 * waiting on us to finish processing. If this is the case, we
3496 * detect it and exit out - there's no more dentries anyway. 3495 * detect it and exit out - there's no more dentries anyway.
3497 */ 3496 */
3498 static int ocfs2_dentry_convert_worker(struct ocfs2_lock_res *lockres, 3497 static int ocfs2_dentry_convert_worker(struct ocfs2_lock_res *lockres,
3499 int blocking) 3498 int blocking)
3500 { 3499 {
3501 struct ocfs2_dentry_lock *dl = ocfs2_lock_res_dl(lockres); 3500 struct ocfs2_dentry_lock *dl = ocfs2_lock_res_dl(lockres);
3502 struct ocfs2_inode_info *oi = OCFS2_I(dl->dl_inode); 3501 struct ocfs2_inode_info *oi = OCFS2_I(dl->dl_inode);
3503 struct dentry *dentry; 3502 struct dentry *dentry;
3504 unsigned long flags; 3503 unsigned long flags;
3505 int extra_ref = 0; 3504 int extra_ref = 0;
3506 3505
3507 /* 3506 /*
3508 * This node is blocking another node from getting a read 3507 * This node is blocking another node from getting a read
3509 * lock. This happens when we've renamed within a 3508 * lock. This happens when we've renamed within a
3510 * directory. We've forced the other nodes to d_delete(), but 3509 * directory. We've forced the other nodes to d_delete(), but
3511 * we never actually dropped our lock because it's still 3510 * we never actually dropped our lock because it's still
3512 * valid. The downconvert code will retain a PR for this node, 3511 * valid. The downconvert code will retain a PR for this node,
3513 * so there's no further work to do. 3512 * so there's no further work to do.
3514 */ 3513 */
3515 if (blocking == DLM_LOCK_PR) 3514 if (blocking == DLM_LOCK_PR)
3516 return UNBLOCK_CONTINUE; 3515 return UNBLOCK_CONTINUE;
3517 3516
3518 /* 3517 /*
3519 * Mark this inode as potentially orphaned. The code in 3518 * Mark this inode as potentially orphaned. The code in
3520 * ocfs2_delete_inode() will figure out whether it actually 3519 * ocfs2_delete_inode() will figure out whether it actually
3521 * needs to be freed or not. 3520 * needs to be freed or not.
3522 */ 3521 */
3523 spin_lock(&oi->ip_lock); 3522 spin_lock(&oi->ip_lock);
3524 oi->ip_flags |= OCFS2_INODE_MAYBE_ORPHANED; 3523 oi->ip_flags |= OCFS2_INODE_MAYBE_ORPHANED;
3525 spin_unlock(&oi->ip_lock); 3524 spin_unlock(&oi->ip_lock);
3526 3525
3527 /* 3526 /*
3528 * Yuck. We need to make sure however that the check of 3527 * Yuck. We need to make sure however that the check of
3529 * OCFS2_LOCK_FREEING and the extra reference are atomic with 3528 * OCFS2_LOCK_FREEING and the extra reference are atomic with
3530 * respect to a reference decrement or the setting of that 3529 * respect to a reference decrement or the setting of that
3531 * flag. 3530 * flag.
3532 */ 3531 */
3533 spin_lock_irqsave(&lockres->l_lock, flags); 3532 spin_lock_irqsave(&lockres->l_lock, flags);
3534 spin_lock(&dentry_attach_lock); 3533 spin_lock(&dentry_attach_lock);
3535 if (!(lockres->l_flags & OCFS2_LOCK_FREEING) 3534 if (!(lockres->l_flags & OCFS2_LOCK_FREEING)
3536 && dl->dl_count) { 3535 && dl->dl_count) {
3537 dl->dl_count++; 3536 dl->dl_count++;
3538 extra_ref = 1; 3537 extra_ref = 1;
3539 } 3538 }
3540 spin_unlock(&dentry_attach_lock); 3539 spin_unlock(&dentry_attach_lock);
3541 spin_unlock_irqrestore(&lockres->l_lock, flags); 3540 spin_unlock_irqrestore(&lockres->l_lock, flags);
3542 3541
3543 mlog(0, "extra_ref = %d\n", extra_ref); 3542 mlog(0, "extra_ref = %d\n", extra_ref);
3544 3543
3545 /* 3544 /*
3546 * We have a process waiting on us in ocfs2_dentry_iput(), 3545 * We have a process waiting on us in ocfs2_dentry_iput(),
3547 * which means we can't have any more outstanding 3546 * which means we can't have any more outstanding
3548 * aliases. There's no need to do any more work. 3547 * aliases. There's no need to do any more work.
3549 */ 3548 */
3550 if (!extra_ref) 3549 if (!extra_ref)
3551 return UNBLOCK_CONTINUE; 3550 return UNBLOCK_CONTINUE;
3552 3551
3553 spin_lock(&dentry_attach_lock); 3552 spin_lock(&dentry_attach_lock);
3554 while (1) { 3553 while (1) {
3555 dentry = ocfs2_find_local_alias(dl->dl_inode, 3554 dentry = ocfs2_find_local_alias(dl->dl_inode,
3556 dl->dl_parent_blkno, 1); 3555 dl->dl_parent_blkno, 1);
3557 if (!dentry) 3556 if (!dentry)
3558 break; 3557 break;
3559 spin_unlock(&dentry_attach_lock); 3558 spin_unlock(&dentry_attach_lock);
3560 3559
3561 mlog(0, "d_delete(%.*s);\n", dentry->d_name.len, 3560 mlog(0, "d_delete(%.*s);\n", dentry->d_name.len,
3562 dentry->d_name.name); 3561 dentry->d_name.name);
3563 3562
3564 /* 3563 /*
3565 * The following dcache calls may do an 3564 * The following dcache calls may do an
3566 * iput(). Normally we don't want that from the 3565 * iput(). Normally we don't want that from the
3567 * downconverting thread, but in this case it's ok 3566 * downconverting thread, but in this case it's ok
3568 * because the requesting node already has an 3567 * because the requesting node already has an
3569 * exclusive lock on the inode, so it can't be queued 3568 * exclusive lock on the inode, so it can't be queued
3570 * for a downconvert. 3569 * for a downconvert.
3571 */ 3570 */
3572 d_delete(dentry); 3571 d_delete(dentry);
3573 dput(dentry); 3572 dput(dentry);
3574 3573
3575 spin_lock(&dentry_attach_lock); 3574 spin_lock(&dentry_attach_lock);
3576 } 3575 }
3577 spin_unlock(&dentry_attach_lock); 3576 spin_unlock(&dentry_attach_lock);
3578 3577
3579 /* 3578 /*
3580 * If we are the last holder of this dentry lock, there is no 3579 * If we are the last holder of this dentry lock, there is no
3581 * reason to downconvert so skip straight to the unlock. 3580 * reason to downconvert so skip straight to the unlock.
3582 */ 3581 */
3583 if (dl->dl_count == 1) 3582 if (dl->dl_count == 1)
3584 return UNBLOCK_STOP_POST; 3583 return UNBLOCK_STOP_POST;
3585 3584
3586 return UNBLOCK_CONTINUE_POST; 3585 return UNBLOCK_CONTINUE_POST;
3587 } 3586 }
3588 3587
3589 static void ocfs2_set_qinfo_lvb(struct ocfs2_lock_res *lockres) 3588 static void ocfs2_set_qinfo_lvb(struct ocfs2_lock_res *lockres)
3590 { 3589 {
3591 struct ocfs2_qinfo_lvb *lvb; 3590 struct ocfs2_qinfo_lvb *lvb;
3592 struct ocfs2_mem_dqinfo *oinfo = ocfs2_lock_res_qinfo(lockres); 3591 struct ocfs2_mem_dqinfo *oinfo = ocfs2_lock_res_qinfo(lockres);
3593 struct mem_dqinfo *info = sb_dqinfo(oinfo->dqi_gi.dqi_sb, 3592 struct mem_dqinfo *info = sb_dqinfo(oinfo->dqi_gi.dqi_sb,
3594 oinfo->dqi_gi.dqi_type); 3593 oinfo->dqi_gi.dqi_type);
3595 3594
3596 mlog_entry_void(); 3595 mlog_entry_void();
3597 3596
3598 lvb = ocfs2_dlm_lvb(&lockres->l_lksb); 3597 lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
3599 lvb->lvb_version = OCFS2_QINFO_LVB_VERSION; 3598 lvb->lvb_version = OCFS2_QINFO_LVB_VERSION;
3600 lvb->lvb_bgrace = cpu_to_be32(info->dqi_bgrace); 3599 lvb->lvb_bgrace = cpu_to_be32(info->dqi_bgrace);
3601 lvb->lvb_igrace = cpu_to_be32(info->dqi_igrace); 3600 lvb->lvb_igrace = cpu_to_be32(info->dqi_igrace);
3602 lvb->lvb_syncms = cpu_to_be32(oinfo->dqi_syncms); 3601 lvb->lvb_syncms = cpu_to_be32(oinfo->dqi_syncms);
3603 lvb->lvb_blocks = cpu_to_be32(oinfo->dqi_gi.dqi_blocks); 3602 lvb->lvb_blocks = cpu_to_be32(oinfo->dqi_gi.dqi_blocks);
3604 lvb->lvb_free_blk = cpu_to_be32(oinfo->dqi_gi.dqi_free_blk); 3603 lvb->lvb_free_blk = cpu_to_be32(oinfo->dqi_gi.dqi_free_blk);
3605 lvb->lvb_free_entry = cpu_to_be32(oinfo->dqi_gi.dqi_free_entry); 3604 lvb->lvb_free_entry = cpu_to_be32(oinfo->dqi_gi.dqi_free_entry);
3606 3605
3607 mlog_exit_void(); 3606 mlog_exit_void();
3608 } 3607 }
3609 3608
3610 void ocfs2_qinfo_unlock(struct ocfs2_mem_dqinfo *oinfo, int ex) 3609 void ocfs2_qinfo_unlock(struct ocfs2_mem_dqinfo *oinfo, int ex)
3611 { 3610 {
3612 struct ocfs2_lock_res *lockres = &oinfo->dqi_gqlock; 3611 struct ocfs2_lock_res *lockres = &oinfo->dqi_gqlock;
3613 struct ocfs2_super *osb = OCFS2_SB(oinfo->dqi_gi.dqi_sb); 3612 struct ocfs2_super *osb = OCFS2_SB(oinfo->dqi_gi.dqi_sb);
3614 int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; 3613 int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
3615 3614
3616 mlog_entry_void(); 3615 mlog_entry_void();
3617 if (!ocfs2_is_hard_readonly(osb) && !ocfs2_mount_local(osb)) 3616 if (!ocfs2_is_hard_readonly(osb) && !ocfs2_mount_local(osb))
3618 ocfs2_cluster_unlock(osb, lockres, level); 3617 ocfs2_cluster_unlock(osb, lockres, level);
3619 mlog_exit_void(); 3618 mlog_exit_void();
3620 } 3619 }
3621 3620
3622 static int ocfs2_refresh_qinfo(struct ocfs2_mem_dqinfo *oinfo) 3621 static int ocfs2_refresh_qinfo(struct ocfs2_mem_dqinfo *oinfo)
3623 { 3622 {
3624 struct mem_dqinfo *info = sb_dqinfo(oinfo->dqi_gi.dqi_sb, 3623 struct mem_dqinfo *info = sb_dqinfo(oinfo->dqi_gi.dqi_sb,
3625 oinfo->dqi_gi.dqi_type); 3624 oinfo->dqi_gi.dqi_type);
3626 struct ocfs2_lock_res *lockres = &oinfo->dqi_gqlock; 3625 struct ocfs2_lock_res *lockres = &oinfo->dqi_gqlock;
3627 struct ocfs2_qinfo_lvb *lvb = ocfs2_dlm_lvb(&lockres->l_lksb); 3626 struct ocfs2_qinfo_lvb *lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
3628 struct buffer_head *bh = NULL; 3627 struct buffer_head *bh = NULL;
3629 struct ocfs2_global_disk_dqinfo *gdinfo; 3628 struct ocfs2_global_disk_dqinfo *gdinfo;
3630 int status = 0; 3629 int status = 0;
3631 3630
3632 if (ocfs2_dlm_lvb_valid(&lockres->l_lksb) && 3631 if (ocfs2_dlm_lvb_valid(&lockres->l_lksb) &&
3633 lvb->lvb_version == OCFS2_QINFO_LVB_VERSION) { 3632 lvb->lvb_version == OCFS2_QINFO_LVB_VERSION) {
3634 info->dqi_bgrace = be32_to_cpu(lvb->lvb_bgrace); 3633 info->dqi_bgrace = be32_to_cpu(lvb->lvb_bgrace);
3635 info->dqi_igrace = be32_to_cpu(lvb->lvb_igrace); 3634 info->dqi_igrace = be32_to_cpu(lvb->lvb_igrace);
3636 oinfo->dqi_syncms = be32_to_cpu(lvb->lvb_syncms); 3635 oinfo->dqi_syncms = be32_to_cpu(lvb->lvb_syncms);
3637 oinfo->dqi_gi.dqi_blocks = be32_to_cpu(lvb->lvb_blocks); 3636 oinfo->dqi_gi.dqi_blocks = be32_to_cpu(lvb->lvb_blocks);
3638 oinfo->dqi_gi.dqi_free_blk = be32_to_cpu(lvb->lvb_free_blk); 3637 oinfo->dqi_gi.dqi_free_blk = be32_to_cpu(lvb->lvb_free_blk);
3639 oinfo->dqi_gi.dqi_free_entry = 3638 oinfo->dqi_gi.dqi_free_entry =
3640 be32_to_cpu(lvb->lvb_free_entry); 3639 be32_to_cpu(lvb->lvb_free_entry);
3641 } else { 3640 } else {
3642 status = ocfs2_read_quota_block(oinfo->dqi_gqinode, 0, &bh); 3641 status = ocfs2_read_quota_block(oinfo->dqi_gqinode, 0, &bh);
3643 if (status) { 3642 if (status) {
3644 mlog_errno(status); 3643 mlog_errno(status);
3645 goto bail; 3644 goto bail;
3646 } 3645 }
3647 gdinfo = (struct ocfs2_global_disk_dqinfo *) 3646 gdinfo = (struct ocfs2_global_disk_dqinfo *)
3648 (bh->b_data + OCFS2_GLOBAL_INFO_OFF); 3647 (bh->b_data + OCFS2_GLOBAL_INFO_OFF);
3649 info->dqi_bgrace = le32_to_cpu(gdinfo->dqi_bgrace); 3648 info->dqi_bgrace = le32_to_cpu(gdinfo->dqi_bgrace);
3650 info->dqi_igrace = le32_to_cpu(gdinfo->dqi_igrace); 3649 info->dqi_igrace = le32_to_cpu(gdinfo->dqi_igrace);
3651 oinfo->dqi_syncms = le32_to_cpu(gdinfo->dqi_syncms); 3650 oinfo->dqi_syncms = le32_to_cpu(gdinfo->dqi_syncms);
3652 oinfo->dqi_gi.dqi_blocks = le32_to_cpu(gdinfo->dqi_blocks); 3651 oinfo->dqi_gi.dqi_blocks = le32_to_cpu(gdinfo->dqi_blocks);
3653 oinfo->dqi_gi.dqi_free_blk = le32_to_cpu(gdinfo->dqi_free_blk); 3652 oinfo->dqi_gi.dqi_free_blk = le32_to_cpu(gdinfo->dqi_free_blk);
3654 oinfo->dqi_gi.dqi_free_entry = 3653 oinfo->dqi_gi.dqi_free_entry =
3655 le32_to_cpu(gdinfo->dqi_free_entry); 3654 le32_to_cpu(gdinfo->dqi_free_entry);
3656 brelse(bh); 3655 brelse(bh);
3657 ocfs2_track_lock_refresh(lockres); 3656 ocfs2_track_lock_refresh(lockres);
3658 } 3657 }
3659 3658
3660 bail: 3659 bail:
3661 return status; 3660 return status;
3662 } 3661 }
3663 3662
3664 /* Lock quota info, this function expects at least shared lock on the quota file 3663 /* Lock quota info, this function expects at least shared lock on the quota file
3665 * so that we can safely refresh quota info from disk. */ 3664 * so that we can safely refresh quota info from disk. */
3666 int ocfs2_qinfo_lock(struct ocfs2_mem_dqinfo *oinfo, int ex) 3665 int ocfs2_qinfo_lock(struct ocfs2_mem_dqinfo *oinfo, int ex)
3667 { 3666 {
3668 struct ocfs2_lock_res *lockres = &oinfo->dqi_gqlock; 3667 struct ocfs2_lock_res *lockres = &oinfo->dqi_gqlock;
3669 struct ocfs2_super *osb = OCFS2_SB(oinfo->dqi_gi.dqi_sb); 3668 struct ocfs2_super *osb = OCFS2_SB(oinfo->dqi_gi.dqi_sb);
3670 int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; 3669 int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
3671 int status = 0; 3670 int status = 0;
3672 3671
3673 mlog_entry_void(); 3672 mlog_entry_void();
3674 3673
3675 /* On RO devices, locking really isn't needed... */ 3674 /* On RO devices, locking really isn't needed... */
3676 if (ocfs2_is_hard_readonly(osb)) { 3675 if (ocfs2_is_hard_readonly(osb)) {
3677 if (ex) 3676 if (ex)
3678 status = -EROFS; 3677 status = -EROFS;
3679 goto bail; 3678 goto bail;
3680 } 3679 }
3681 if (ocfs2_mount_local(osb)) 3680 if (ocfs2_mount_local(osb))
3682 goto bail; 3681 goto bail;
3683 3682
3684 status = ocfs2_cluster_lock(osb, lockres, level, 0, 0); 3683 status = ocfs2_cluster_lock(osb, lockres, level, 0, 0);
3685 if (status < 0) { 3684 if (status < 0) {
3686 mlog_errno(status); 3685 mlog_errno(status);
3687 goto bail; 3686 goto bail;
3688 } 3687 }
3689 if (!ocfs2_should_refresh_lock_res(lockres)) 3688 if (!ocfs2_should_refresh_lock_res(lockres))
3690 goto bail; 3689 goto bail;
3691 /* OK, we have the lock but we need to refresh the quota info */ 3690 /* OK, we have the lock but we need to refresh the quota info */
3692 status = ocfs2_refresh_qinfo(oinfo); 3691 status = ocfs2_refresh_qinfo(oinfo);
3693 if (status) 3692 if (status)
3694 ocfs2_qinfo_unlock(oinfo, ex); 3693 ocfs2_qinfo_unlock(oinfo, ex);
3695 ocfs2_complete_lock_res_refresh(lockres, status); 3694 ocfs2_complete_lock_res_refresh(lockres, status);
3696 bail: 3695 bail:
3697 mlog_exit(status); 3696 mlog_exit(status);
3698 return status; 3697 return status;
3699 } 3698 }
3700 3699
3701 /* 3700 /*
3702 * This is the filesystem locking protocol. It provides the lock handling 3701 * This is the filesystem locking protocol. It provides the lock handling
3703 * hooks for the underlying DLM. It has a maximum version number. 3702 * hooks for the underlying DLM. It has a maximum version number.
3704 * The version number allows interoperability with systems running at 3703 * The version number allows interoperability with systems running at
3705 * the same major number and an equal or smaller minor number. 3704 * the same major number and an equal or smaller minor number.
3706 * 3705 *
3707 * Whenever the filesystem does new things with locks (adds or removes a 3706 * Whenever the filesystem does new things with locks (adds or removes a
3708 * lock, orders them differently, does different things underneath a lock), 3707 * lock, orders them differently, does different things underneath a lock),
3709 * the version must be changed. The protocol is negotiated when joining 3708 * the version must be changed. The protocol is negotiated when joining
3710 * the dlm domain. A node may join the domain if its major version is 3709 * the dlm domain. A node may join the domain if its major version is
3711 * identical to all other nodes and its minor version is greater than 3710 * identical to all other nodes and its minor version is greater than
3712 * or equal to all other nodes. When its minor version is greater than 3711 * or equal to all other nodes. When its minor version is greater than
3713 * the other nodes, it will run at the minor version specified by the 3712 * the other nodes, it will run at the minor version specified by the
3714 * other nodes. 3713 * other nodes.
3715 * 3714 *
3716 * If a locking change is made that will not be compatible with older 3715 * If a locking change is made that will not be compatible with older
3717 * versions, the major number must be increased and the minor version set 3716 * versions, the major number must be increased and the minor version set
3718 * to zero. If a change merely adds a behavior that can be disabled when 3717 * to zero. If a change merely adds a behavior that can be disabled when
3719 * speaking to older versions, the minor version must be increased. If a 3718 * speaking to older versions, the minor version must be increased. If a
3720 * change adds a fully backwards compatible change (eg, LVB changes that 3719 * change adds a fully backwards compatible change (eg, LVB changes that
3721 * are just ignored by older versions), the version does not need to be 3720 * are just ignored by older versions), the version does not need to be
3722 * updated. 3721 * updated.
3723 */ 3722 */
3724 static struct ocfs2_locking_protocol lproto = { 3723 static struct ocfs2_locking_protocol lproto = {
3725 .lp_max_version = { 3724 .lp_max_version = {
3726 .pv_major = OCFS2_LOCKING_PROTOCOL_MAJOR, 3725 .pv_major = OCFS2_LOCKING_PROTOCOL_MAJOR,
3727 .pv_minor = OCFS2_LOCKING_PROTOCOL_MINOR, 3726 .pv_minor = OCFS2_LOCKING_PROTOCOL_MINOR,
3728 }, 3727 },
3729 .lp_lock_ast = ocfs2_locking_ast, 3728 .lp_lock_ast = ocfs2_locking_ast,
3730 .lp_blocking_ast = ocfs2_blocking_ast, 3729 .lp_blocking_ast = ocfs2_blocking_ast,
3731 .lp_unlock_ast = ocfs2_unlock_ast, 3730 .lp_unlock_ast = ocfs2_unlock_ast,
3732 }; 3731 };
3733 3732
3734 void ocfs2_set_locking_protocol(void) 3733 void ocfs2_set_locking_protocol(void)
3735 { 3734 {
3736 ocfs2_stack_glue_set_locking_protocol(&lproto); 3735 ocfs2_stack_glue_set_locking_protocol(&lproto);
3737 } 3736 }
3738 3737
3739 3738
3740 static void ocfs2_process_blocked_lock(struct ocfs2_super *osb, 3739 static void ocfs2_process_blocked_lock(struct ocfs2_super *osb,
3741 struct ocfs2_lock_res *lockres) 3740 struct ocfs2_lock_res *lockres)
3742 { 3741 {
3743 int status; 3742 int status;
3744 struct ocfs2_unblock_ctl ctl = {0, 0,}; 3743 struct ocfs2_unblock_ctl ctl = {0, 0,};
3745 unsigned long flags; 3744 unsigned long flags;
3746 3745
3747 /* Our reference to the lockres in this function can be 3746 /* Our reference to the lockres in this function can be
3748 * considered valid until we remove the OCFS2_LOCK_QUEUED 3747 * considered valid until we remove the OCFS2_LOCK_QUEUED
3749 * flag. */ 3748 * flag. */
3750 3749
3751 mlog_entry_void(); 3750 mlog_entry_void();
3752 3751
3753 BUG_ON(!lockres); 3752 BUG_ON(!lockres);
3754 BUG_ON(!lockres->l_ops); 3753 BUG_ON(!lockres->l_ops);
3755 3754
3756 mlog(0, "lockres %s blocked.\n", lockres->l_name); 3755 mlog(0, "lockres %s blocked.\n", lockres->l_name);
3757 3756
3758 /* Detect whether a lock has been marked as going away while 3757 /* Detect whether a lock has been marked as going away while
3759 * the downconvert thread was processing other things. A lock can 3758 * the downconvert thread was processing other things. A lock can
3760 * still be marked with OCFS2_LOCK_FREEING after this check, 3759 * still be marked with OCFS2_LOCK_FREEING after this check,
3761 * but short circuiting here will still save us some 3760 * but short circuiting here will still save us some
3762 * performance. */ 3761 * performance. */
3763 spin_lock_irqsave(&lockres->l_lock, flags); 3762 spin_lock_irqsave(&lockres->l_lock, flags);
3764 if (lockres->l_flags & OCFS2_LOCK_FREEING) 3763 if (lockres->l_flags & OCFS2_LOCK_FREEING)
3765 goto unqueue; 3764 goto unqueue;
3766 spin_unlock_irqrestore(&lockres->l_lock, flags); 3765 spin_unlock_irqrestore(&lockres->l_lock, flags);
3767 3766
3768 status = ocfs2_unblock_lock(osb, lockres, &ctl); 3767 status = ocfs2_unblock_lock(osb, lockres, &ctl);
3769 if (status < 0) 3768 if (status < 0)
3770 mlog_errno(status); 3769 mlog_errno(status);
3771 3770
3772 spin_lock_irqsave(&lockres->l_lock, flags); 3771 spin_lock_irqsave(&lockres->l_lock, flags);
3773 unqueue: 3772 unqueue:
3774 if (lockres->l_flags & OCFS2_LOCK_FREEING || !ctl.requeue) { 3773 if (lockres->l_flags & OCFS2_LOCK_FREEING || !ctl.requeue) {
3775 lockres_clear_flags(lockres, OCFS2_LOCK_QUEUED); 3774 lockres_clear_flags(lockres, OCFS2_LOCK_QUEUED);
3776 } else 3775 } else
3777 ocfs2_schedule_blocked_lock(osb, lockres); 3776 ocfs2_schedule_blocked_lock(osb, lockres);
3778 3777
3779 mlog(0, "lockres %s, requeue = %s.\n", lockres->l_name, 3778 mlog(0, "lockres %s, requeue = %s.\n", lockres->l_name,
3780 ctl.requeue ? "yes" : "no"); 3779 ctl.requeue ? "yes" : "no");
3781 spin_unlock_irqrestore(&lockres->l_lock, flags); 3780 spin_unlock_irqrestore(&lockres->l_lock, flags);
3782 3781
3783 if (ctl.unblock_action != UNBLOCK_CONTINUE 3782 if (ctl.unblock_action != UNBLOCK_CONTINUE
3784 && lockres->l_ops->post_unlock) 3783 && lockres->l_ops->post_unlock)
3785 lockres->l_ops->post_unlock(osb, lockres); 3784 lockres->l_ops->post_unlock(osb, lockres);
3786 3785
3787 mlog_exit_void(); 3786 mlog_exit_void();
3788 } 3787 }
3789 3788
3790 static void ocfs2_schedule_blocked_lock(struct ocfs2_super *osb, 3789 static void ocfs2_schedule_blocked_lock(struct ocfs2_super *osb,
3791 struct ocfs2_lock_res *lockres) 3790 struct ocfs2_lock_res *lockres)
3792 { 3791 {
3793 mlog_entry_void(); 3792 mlog_entry_void();
3794 3793
3795 assert_spin_locked(&lockres->l_lock); 3794 assert_spin_locked(&lockres->l_lock);
3796 3795
3797 if (lockres->l_flags & OCFS2_LOCK_FREEING) { 3796 if (lockres->l_flags & OCFS2_LOCK_FREEING) {
3798 /* Do not schedule a lock for downconvert when it's on 3797 /* Do not schedule a lock for downconvert when it's on
3799 * the way to destruction - any nodes wanting access 3798 * the way to destruction - any nodes wanting access
3800 * to the resource will get it soon. */ 3799 * to the resource will get it soon. */
3801 mlog(0, "Lockres %s won't be scheduled: flags 0x%lx\n", 3800 mlog(0, "Lockres %s won't be scheduled: flags 0x%lx\n",
3802 lockres->l_name, lockres->l_flags); 3801 lockres->l_name, lockres->l_flags);
3803 return; 3802 return;
3804 } 3803 }
3805 3804
3806 lockres_or_flags(lockres, OCFS2_LOCK_QUEUED); 3805 lockres_or_flags(lockres, OCFS2_LOCK_QUEUED);
3807 3806
3808 spin_lock(&osb->dc_task_lock); 3807 spin_lock(&osb->dc_task_lock);
3809 if (list_empty(&lockres->l_blocked_list)) { 3808 if (list_empty(&lockres->l_blocked_list)) {
3810 list_add_tail(&lockres->l_blocked_list, 3809 list_add_tail(&lockres->l_blocked_list,
3811 &osb->blocked_lock_list); 3810 &osb->blocked_lock_list);
3812 osb->blocked_lock_count++; 3811 osb->blocked_lock_count++;
3813 } 3812 }
3814 spin_unlock(&osb->dc_task_lock); 3813 spin_unlock(&osb->dc_task_lock);
3815 3814
3816 mlog_exit_void(); 3815 mlog_exit_void();
3817 } 3816 }
3818 3817
3819 static void ocfs2_downconvert_thread_do_work(struct ocfs2_super *osb) 3818 static void ocfs2_downconvert_thread_do_work(struct ocfs2_super *osb)
3820 { 3819 {
3821 unsigned long processed; 3820 unsigned long processed;
3822 struct ocfs2_lock_res *lockres; 3821 struct ocfs2_lock_res *lockres;
3823 3822
3824 mlog_entry_void(); 3823 mlog_entry_void();
3825 3824
3826 spin_lock(&osb->dc_task_lock); 3825 spin_lock(&osb->dc_task_lock);
3827 /* grab this early so we know to try again if a state change and 3826 /* grab this early so we know to try again if a state change and
3828 * wake happens part-way through our work */ 3827 * wake happens part-way through our work */
3829 osb->dc_work_sequence = osb->dc_wake_sequence; 3828 osb->dc_work_sequence = osb->dc_wake_sequence;
3830 3829
3831 processed = osb->blocked_lock_count; 3830 processed = osb->blocked_lock_count;
3832 while (processed) { 3831 while (processed) {
3833 BUG_ON(list_empty(&osb->blocked_lock_list)); 3832 BUG_ON(list_empty(&osb->blocked_lock_list));
3834 3833
3835 lockres = list_entry(osb->blocked_lock_list.next, 3834 lockres = list_entry(osb->blocked_lock_list.next,
3836 struct ocfs2_lock_res, l_blocked_list); 3835 struct ocfs2_lock_res, l_blocked_list);
3837 list_del_init(&lockres->l_blocked_list); 3836 list_del_init(&lockres->l_blocked_list);
3838 osb->blocked_lock_count--; 3837 osb->blocked_lock_count--;
3839 spin_unlock(&osb->dc_task_lock); 3838 spin_unlock(&osb->dc_task_lock);
3840 3839
3841 BUG_ON(!processed); 3840 BUG_ON(!processed);
3842 processed--; 3841 processed--;
3843 3842
3844 ocfs2_process_blocked_lock(osb, lockres); 3843 ocfs2_process_blocked_lock(osb, lockres);
3845 3844
3846 spin_lock(&osb->dc_task_lock); 3845 spin_lock(&osb->dc_task_lock);
3847 } 3846 }
3848 spin_unlock(&osb->dc_task_lock); 3847 spin_unlock(&osb->dc_task_lock);
3849 3848
3850 mlog_exit_void(); 3849 mlog_exit_void();
3851 } 3850 }
3852 3851
3853 static int ocfs2_downconvert_thread_lists_empty(struct ocfs2_super *osb) 3852 static int ocfs2_downconvert_thread_lists_empty(struct ocfs2_super *osb)
3854 { 3853 {
3855 int empty = 0; 3854 int empty = 0;
3856 3855
3857 spin_lock(&osb->dc_task_lock); 3856 spin_lock(&osb->dc_task_lock);
3858 if (list_empty(&osb->blocked_lock_list)) 3857 if (list_empty(&osb->blocked_lock_list))
3859 empty = 1; 3858 empty = 1;
3860 3859
3861 spin_unlock(&osb->dc_task_lock); 3860 spin_unlock(&osb->dc_task_lock);
3862 return empty; 3861 return empty;
3863 } 3862 }
3864 3863
3865 static int ocfs2_downconvert_thread_should_wake(struct ocfs2_super *osb) 3864 static int ocfs2_downconvert_thread_should_wake(struct ocfs2_super *osb)
3866 { 3865 {
3867 int should_wake = 0; 3866 int should_wake = 0;
3868 3867
3869 spin_lock(&osb->dc_task_lock); 3868 spin_lock(&osb->dc_task_lock);
3870 if (osb->dc_work_sequence != osb->dc_wake_sequence) 3869 if (osb->dc_work_sequence != osb->dc_wake_sequence)
3871 should_wake = 1; 3870 should_wake = 1;
3872 spin_unlock(&osb->dc_task_lock); 3871 spin_unlock(&osb->dc_task_lock);
3873 3872
3874 return should_wake; 3873 return should_wake;
3875 } 3874 }
3876 3875
3877 static int ocfs2_downconvert_thread(void *arg) 3876 static int ocfs2_downconvert_thread(void *arg)
3878 { 3877 {
3879 int status = 0; 3878 int status = 0;
3880 struct ocfs2_super *osb = arg; 3879 struct ocfs2_super *osb = arg;
3881 3880
3882 /* only quit once we've been asked to stop and there is no more 3881 /* only quit once we've been asked to stop and there is no more
3883 * work available */ 3882 * work available */
3884 while (!(kthread_should_stop() && 3883 while (!(kthread_should_stop() &&
3885 ocfs2_downconvert_thread_lists_empty(osb))) { 3884 ocfs2_downconvert_thread_lists_empty(osb))) {
3886 3885
3887 wait_event_interruptible(osb->dc_event, 3886 wait_event_interruptible(osb->dc_event,
3888 ocfs2_downconvert_thread_should_wake(osb) || 3887 ocfs2_downconvert_thread_should_wake(osb) ||
3889 kthread_should_stop()); 3888 kthread_should_stop());
3890 3889
3891 mlog(0, "downconvert_thread: awoken\n"); 3890 mlog(0, "downconvert_thread: awoken\n");
3892 3891
3893 ocfs2_downconvert_thread_do_work(osb); 3892 ocfs2_downconvert_thread_do_work(osb);
3894 } 3893 }
3895 3894
3896 osb->dc_task = NULL; 3895 osb->dc_task = NULL;
3897 return status; 3896 return status;
3898 } 3897 }
3899 3898
3900 void ocfs2_wake_downconvert_thread(struct ocfs2_super *osb) 3899 void ocfs2_wake_downconvert_thread(struct ocfs2_super *osb)
3901 { 3900 {
3902 spin_lock(&osb->dc_task_lock); 3901 spin_lock(&osb->dc_task_lock);
3903 /* make sure the voting thread gets a swipe at whatever changes 3902 /* make sure the voting thread gets a swipe at whatever changes
3904 * the caller may have made to the voting state */ 3903 * the caller may have made to the voting state */
3905 osb->dc_wake_sequence++; 3904 osb->dc_wake_sequence++;
3906 spin_unlock(&osb->dc_task_lock); 3905 spin_unlock(&osb->dc_task_lock);
1 /* -*- mode: c; c-basic-offset: 8; -*- 1 /* -*- mode: c; c-basic-offset: 8; -*-
2 * vim: noexpandtab sw=8 ts=8 sts=0: 2 * vim: noexpandtab sw=8 ts=8 sts=0:
3 * 3 *
4 * journal.c 4 * journal.c
5 * 5 *
6 * Defines functions of journalling api 6 * Defines functions of journalling api
7 * 7 *
8 * Copyright (C) 2003, 2004 Oracle. All rights reserved. 8 * Copyright (C) 2003, 2004 Oracle. All rights reserved.
9 * 9 *
10 * This program is free software; you can redistribute it and/or 10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public 11 * modify it under the terms of the GNU General Public
12 * License as published by the Free Software Foundation; either 12 * License as published by the Free Software Foundation; either
13 * version 2 of the License, or (at your option) any later version. 13 * version 2 of the License, or (at your option) any later version.
14 * 14 *
15 * This program is distributed in the hope that it will be useful, 15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of 16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * General Public License for more details. 18 * General Public License for more details.
19 * 19 *
20 * You should have received a copy of the GNU General Public 20 * You should have received a copy of the GNU General Public
21 * License along with this program; if not, write to the 21 * License along with this program; if not, write to the
22 * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 22 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
23 * Boston, MA 021110-1307, USA. 23 * Boston, MA 021110-1307, USA.
24 */ 24 */
25 25
26 #include <linux/fs.h> 26 #include <linux/fs.h>
27 #include <linux/types.h> 27 #include <linux/types.h>
28 #include <linux/slab.h> 28 #include <linux/slab.h>
29 #include <linux/highmem.h> 29 #include <linux/highmem.h>
30 #include <linux/kthread.h> 30 #include <linux/kthread.h>
31 #include <linux/time.h> 31 #include <linux/time.h>
32 #include <linux/random.h> 32 #include <linux/random.h>
33 33
34 #define MLOG_MASK_PREFIX ML_JOURNAL 34 #define MLOG_MASK_PREFIX ML_JOURNAL
35 #include <cluster/masklog.h> 35 #include <cluster/masklog.h>
36 36
37 #include "ocfs2.h" 37 #include "ocfs2.h"
38 38
39 #include "alloc.h" 39 #include "alloc.h"
40 #include "blockcheck.h" 40 #include "blockcheck.h"
41 #include "dir.h" 41 #include "dir.h"
42 #include "dlmglue.h" 42 #include "dlmglue.h"
43 #include "extent_map.h" 43 #include "extent_map.h"
44 #include "heartbeat.h" 44 #include "heartbeat.h"
45 #include "inode.h" 45 #include "inode.h"
46 #include "journal.h" 46 #include "journal.h"
47 #include "localalloc.h" 47 #include "localalloc.h"
48 #include "slot_map.h" 48 #include "slot_map.h"
49 #include "super.h" 49 #include "super.h"
50 #include "sysfile.h" 50 #include "sysfile.h"
51 #include "quota.h" 51 #include "quota.h"
52 52
53 #include "buffer_head_io.h" 53 #include "buffer_head_io.h"
54 54
55 DEFINE_SPINLOCK(trans_inc_lock); 55 DEFINE_SPINLOCK(trans_inc_lock);
56 56
57 #define ORPHAN_SCAN_SCHEDULE_TIMEOUT 300000 57 #define ORPHAN_SCAN_SCHEDULE_TIMEOUT 300000
58 58
59 static int ocfs2_force_read_journal(struct inode *inode); 59 static int ocfs2_force_read_journal(struct inode *inode);
60 static int ocfs2_recover_node(struct ocfs2_super *osb, 60 static int ocfs2_recover_node(struct ocfs2_super *osb,
61 int node_num, int slot_num); 61 int node_num, int slot_num);
62 static int __ocfs2_recovery_thread(void *arg); 62 static int __ocfs2_recovery_thread(void *arg);
63 static int ocfs2_commit_cache(struct ocfs2_super *osb); 63 static int ocfs2_commit_cache(struct ocfs2_super *osb);
64 static int __ocfs2_wait_on_mount(struct ocfs2_super *osb, int quota); 64 static int __ocfs2_wait_on_mount(struct ocfs2_super *osb, int quota);
65 static int ocfs2_journal_toggle_dirty(struct ocfs2_super *osb, 65 static int ocfs2_journal_toggle_dirty(struct ocfs2_super *osb,
66 int dirty, int replayed); 66 int dirty, int replayed);
67 static int ocfs2_trylock_journal(struct ocfs2_super *osb, 67 static int ocfs2_trylock_journal(struct ocfs2_super *osb,
68 int slot_num); 68 int slot_num);
69 static int ocfs2_recover_orphans(struct ocfs2_super *osb, 69 static int ocfs2_recover_orphans(struct ocfs2_super *osb,
70 int slot); 70 int slot);
71 static int ocfs2_commit_thread(void *arg); 71 static int ocfs2_commit_thread(void *arg);
72 static void ocfs2_queue_recovery_completion(struct ocfs2_journal *journal, 72 static void ocfs2_queue_recovery_completion(struct ocfs2_journal *journal,
73 int slot_num, 73 int slot_num,
74 struct ocfs2_dinode *la_dinode, 74 struct ocfs2_dinode *la_dinode,
75 struct ocfs2_dinode *tl_dinode, 75 struct ocfs2_dinode *tl_dinode,
76 struct ocfs2_quota_recovery *qrec); 76 struct ocfs2_quota_recovery *qrec);
77 77
78 static inline int ocfs2_wait_on_mount(struct ocfs2_super *osb) 78 static inline int ocfs2_wait_on_mount(struct ocfs2_super *osb)
79 { 79 {
80 return __ocfs2_wait_on_mount(osb, 0); 80 return __ocfs2_wait_on_mount(osb, 0);
81 } 81 }
82 82
83 static inline int ocfs2_wait_on_quotas(struct ocfs2_super *osb) 83 static inline int ocfs2_wait_on_quotas(struct ocfs2_super *osb)
84 { 84 {
85 return __ocfs2_wait_on_mount(osb, 1); 85 return __ocfs2_wait_on_mount(osb, 1);
86 } 86 }
87 87
88 /* 88 /*
89 * This replay_map is to track online/offline slots, so we could recover 89 * This replay_map is to track online/offline slots, so we could recover
90 * offline slots during recovery and mount 90 * offline slots during recovery and mount
91 */ 91 */
92 92
93 enum ocfs2_replay_state { 93 enum ocfs2_replay_state {
94 REPLAY_UNNEEDED = 0, /* Replay is not needed, so ignore this map */ 94 REPLAY_UNNEEDED = 0, /* Replay is not needed, so ignore this map */
95 REPLAY_NEEDED, /* Replay slots marked in rm_replay_slots */ 95 REPLAY_NEEDED, /* Replay slots marked in rm_replay_slots */
96 REPLAY_DONE /* Replay was already queued */ 96 REPLAY_DONE /* Replay was already queued */
97 }; 97 };
98 98
99 struct ocfs2_replay_map { 99 struct ocfs2_replay_map {
100 unsigned int rm_slots; 100 unsigned int rm_slots;
101 enum ocfs2_replay_state rm_state; 101 enum ocfs2_replay_state rm_state;
102 unsigned char rm_replay_slots[0]; 102 unsigned char rm_replay_slots[0];
103 }; 103 };
104 104
105 void ocfs2_replay_map_set_state(struct ocfs2_super *osb, int state) 105 void ocfs2_replay_map_set_state(struct ocfs2_super *osb, int state)
106 { 106 {
107 if (!osb->replay_map) 107 if (!osb->replay_map)
108 return; 108 return;
109 109
110 /* If we've already queued the replay, we don't have any more to do */ 110 /* If we've already queued the replay, we don't have any more to do */
111 if (osb->replay_map->rm_state == REPLAY_DONE) 111 if (osb->replay_map->rm_state == REPLAY_DONE)
112 return; 112 return;
113 113
114 osb->replay_map->rm_state = state; 114 osb->replay_map->rm_state = state;
115 } 115 }
116 116
117 int ocfs2_compute_replay_slots(struct ocfs2_super *osb) 117 int ocfs2_compute_replay_slots(struct ocfs2_super *osb)
118 { 118 {
119 struct ocfs2_replay_map *replay_map; 119 struct ocfs2_replay_map *replay_map;
120 int i, node_num; 120 int i, node_num;
121 121
122 /* If replay map is already set, we don't do it again */ 122 /* If replay map is already set, we don't do it again */
123 if (osb->replay_map) 123 if (osb->replay_map)
124 return 0; 124 return 0;
125 125
126 replay_map = kzalloc(sizeof(struct ocfs2_replay_map) + 126 replay_map = kzalloc(sizeof(struct ocfs2_replay_map) +
127 (osb->max_slots * sizeof(char)), GFP_KERNEL); 127 (osb->max_slots * sizeof(char)), GFP_KERNEL);
128 128
129 if (!replay_map) { 129 if (!replay_map) {
130 mlog_errno(-ENOMEM); 130 mlog_errno(-ENOMEM);
131 return -ENOMEM; 131 return -ENOMEM;
132 } 132 }
133 133
134 spin_lock(&osb->osb_lock); 134 spin_lock(&osb->osb_lock);
135 135
136 replay_map->rm_slots = osb->max_slots; 136 replay_map->rm_slots = osb->max_slots;
137 replay_map->rm_state = REPLAY_UNNEEDED; 137 replay_map->rm_state = REPLAY_UNNEEDED;
138 138
139 /* set rm_replay_slots for offline slot(s) */ 139 /* set rm_replay_slots for offline slot(s) */
140 for (i = 0; i < replay_map->rm_slots; i++) { 140 for (i = 0; i < replay_map->rm_slots; i++) {
141 if (ocfs2_slot_to_node_num_locked(osb, i, &node_num) == -ENOENT) 141 if (ocfs2_slot_to_node_num_locked(osb, i, &node_num) == -ENOENT)
142 replay_map->rm_replay_slots[i] = 1; 142 replay_map->rm_replay_slots[i] = 1;
143 } 143 }
144 144
145 osb->replay_map = replay_map; 145 osb->replay_map = replay_map;
146 spin_unlock(&osb->osb_lock); 146 spin_unlock(&osb->osb_lock);
147 return 0; 147 return 0;
148 } 148 }
149 149
150 void ocfs2_queue_replay_slots(struct ocfs2_super *osb) 150 void ocfs2_queue_replay_slots(struct ocfs2_super *osb)
151 { 151 {
152 struct ocfs2_replay_map *replay_map = osb->replay_map; 152 struct ocfs2_replay_map *replay_map = osb->replay_map;
153 int i; 153 int i;
154 154
155 if (!replay_map) 155 if (!replay_map)
156 return; 156 return;
157 157
158 if (replay_map->rm_state != REPLAY_NEEDED) 158 if (replay_map->rm_state != REPLAY_NEEDED)
159 return; 159 return;
160 160
161 for (i = 0; i < replay_map->rm_slots; i++) 161 for (i = 0; i < replay_map->rm_slots; i++)
162 if (replay_map->rm_replay_slots[i]) 162 if (replay_map->rm_replay_slots[i])
163 ocfs2_queue_recovery_completion(osb->journal, i, NULL, 163 ocfs2_queue_recovery_completion(osb->journal, i, NULL,
164 NULL, NULL); 164 NULL, NULL);
165 replay_map->rm_state = REPLAY_DONE; 165 replay_map->rm_state = REPLAY_DONE;
166 } 166 }
167 167
168 void ocfs2_free_replay_slots(struct ocfs2_super *osb) 168 void ocfs2_free_replay_slots(struct ocfs2_super *osb)
169 { 169 {
170 struct ocfs2_replay_map *replay_map = osb->replay_map; 170 struct ocfs2_replay_map *replay_map = osb->replay_map;
171 171
172 if (!osb->replay_map) 172 if (!osb->replay_map)
173 return; 173 return;
174 174
175 kfree(replay_map); 175 kfree(replay_map);
176 osb->replay_map = NULL; 176 osb->replay_map = NULL;
177 } 177 }
178 178
179 int ocfs2_recovery_init(struct ocfs2_super *osb) 179 int ocfs2_recovery_init(struct ocfs2_super *osb)
180 { 180 {
181 struct ocfs2_recovery_map *rm; 181 struct ocfs2_recovery_map *rm;
182 182
183 mutex_init(&osb->recovery_lock); 183 mutex_init(&osb->recovery_lock);
184 osb->disable_recovery = 0; 184 osb->disable_recovery = 0;
185 osb->recovery_thread_task = NULL; 185 osb->recovery_thread_task = NULL;
186 init_waitqueue_head(&osb->recovery_event); 186 init_waitqueue_head(&osb->recovery_event);
187 187
188 rm = kzalloc(sizeof(struct ocfs2_recovery_map) + 188 rm = kzalloc(sizeof(struct ocfs2_recovery_map) +
189 osb->max_slots * sizeof(unsigned int), 189 osb->max_slots * sizeof(unsigned int),
190 GFP_KERNEL); 190 GFP_KERNEL);
191 if (!rm) { 191 if (!rm) {
192 mlog_errno(-ENOMEM); 192 mlog_errno(-ENOMEM);
193 return -ENOMEM; 193 return -ENOMEM;
194 } 194 }
195 195
196 rm->rm_entries = (unsigned int *)((char *)rm + 196 rm->rm_entries = (unsigned int *)((char *)rm +
197 sizeof(struct ocfs2_recovery_map)); 197 sizeof(struct ocfs2_recovery_map));
198 osb->recovery_map = rm; 198 osb->recovery_map = rm;
199 199
200 return 0; 200 return 0;
201 } 201 }
202 202
203 /* we can't grab the goofy sem lock from inside wait_event, so we use 203 /* we can't grab the goofy sem lock from inside wait_event, so we use
204 * memory barriers to make sure that we'll see the null task before 204 * memory barriers to make sure that we'll see the null task before
205 * being woken up */ 205 * being woken up */
206 static int ocfs2_recovery_thread_running(struct ocfs2_super *osb) 206 static int ocfs2_recovery_thread_running(struct ocfs2_super *osb)
207 { 207 {
208 mb(); 208 mb();
209 return osb->recovery_thread_task != NULL; 209 return osb->recovery_thread_task != NULL;
210 } 210 }
211 211
212 void ocfs2_recovery_exit(struct ocfs2_super *osb) 212 void ocfs2_recovery_exit(struct ocfs2_super *osb)
213 { 213 {
214 struct ocfs2_recovery_map *rm; 214 struct ocfs2_recovery_map *rm;
215 215
216 /* disable any new recovery threads and wait for any currently 216 /* disable any new recovery threads and wait for any currently
217 * running ones to exit. Do this before setting the vol_state. */ 217 * running ones to exit. Do this before setting the vol_state. */
218 mutex_lock(&osb->recovery_lock); 218 mutex_lock(&osb->recovery_lock);
219 osb->disable_recovery = 1; 219 osb->disable_recovery = 1;
220 mutex_unlock(&osb->recovery_lock); 220 mutex_unlock(&osb->recovery_lock);
221 wait_event(osb->recovery_event, !ocfs2_recovery_thread_running(osb)); 221 wait_event(osb->recovery_event, !ocfs2_recovery_thread_running(osb));
222 222
223 /* At this point, we know that no more recovery threads can be 223 /* At this point, we know that no more recovery threads can be
224 * launched, so wait for any recovery completion work to 224 * launched, so wait for any recovery completion work to
225 * complete. */ 225 * complete. */
226 flush_workqueue(ocfs2_wq); 226 flush_workqueue(ocfs2_wq);
227 227
228 /* 228 /*
229 * Now that recovery is shut down, and the osb is about to be 229 * Now that recovery is shut down, and the osb is about to be
230 * freed, the osb_lock is not taken here. 230 * freed, the osb_lock is not taken here.
231 */ 231 */
232 rm = osb->recovery_map; 232 rm = osb->recovery_map;
233 /* XXX: Should we bug if there are dirty entries? */ 233 /* XXX: Should we bug if there are dirty entries? */
234 234
235 kfree(rm); 235 kfree(rm);
236 } 236 }
237 237
238 static int __ocfs2_recovery_map_test(struct ocfs2_super *osb, 238 static int __ocfs2_recovery_map_test(struct ocfs2_super *osb,
239 unsigned int node_num) 239 unsigned int node_num)
240 { 240 {
241 int i; 241 int i;
242 struct ocfs2_recovery_map *rm = osb->recovery_map; 242 struct ocfs2_recovery_map *rm = osb->recovery_map;
243 243
244 assert_spin_locked(&osb->osb_lock); 244 assert_spin_locked(&osb->osb_lock);
245 245
246 for (i = 0; i < rm->rm_used; i++) { 246 for (i = 0; i < rm->rm_used; i++) {
247 if (rm->rm_entries[i] == node_num) 247 if (rm->rm_entries[i] == node_num)
248 return 1; 248 return 1;
249 } 249 }
250 250
251 return 0; 251 return 0;
252 } 252 }
253 253
254 /* Behaves like test-and-set. Returns the previous value */ 254 /* Behaves like test-and-set. Returns the previous value */
255 static int ocfs2_recovery_map_set(struct ocfs2_super *osb, 255 static int ocfs2_recovery_map_set(struct ocfs2_super *osb,
256 unsigned int node_num) 256 unsigned int node_num)
257 { 257 {
258 struct ocfs2_recovery_map *rm = osb->recovery_map; 258 struct ocfs2_recovery_map *rm = osb->recovery_map;
259 259
260 spin_lock(&osb->osb_lock); 260 spin_lock(&osb->osb_lock);
261 if (__ocfs2_recovery_map_test(osb, node_num)) { 261 if (__ocfs2_recovery_map_test(osb, node_num)) {
262 spin_unlock(&osb->osb_lock); 262 spin_unlock(&osb->osb_lock);
263 return 1; 263 return 1;
264 } 264 }
265 265
266 /* XXX: Can this be exploited? Not from o2dlm... */ 266 /* XXX: Can this be exploited? Not from o2dlm... */
267 BUG_ON(rm->rm_used >= osb->max_slots); 267 BUG_ON(rm->rm_used >= osb->max_slots);
268 268
269 rm->rm_entries[rm->rm_used] = node_num; 269 rm->rm_entries[rm->rm_used] = node_num;
270 rm->rm_used++; 270 rm->rm_used++;
271 spin_unlock(&osb->osb_lock); 271 spin_unlock(&osb->osb_lock);
272 272
273 return 0; 273 return 0;
274 } 274 }
275 275
276 static void ocfs2_recovery_map_clear(struct ocfs2_super *osb, 276 static void ocfs2_recovery_map_clear(struct ocfs2_super *osb,
277 unsigned int node_num) 277 unsigned int node_num)
278 { 278 {
279 int i; 279 int i;
280 struct ocfs2_recovery_map *rm = osb->recovery_map; 280 struct ocfs2_recovery_map *rm = osb->recovery_map;
281 281
282 spin_lock(&osb->osb_lock); 282 spin_lock(&osb->osb_lock);
283 283
284 for (i = 0; i < rm->rm_used; i++) { 284 for (i = 0; i < rm->rm_used; i++) {
285 if (rm->rm_entries[i] == node_num) 285 if (rm->rm_entries[i] == node_num)
286 break; 286 break;
287 } 287 }
288 288
289 if (i < rm->rm_used) { 289 if (i < rm->rm_used) {
290 /* XXX: be careful with the pointer math */ 290 /* XXX: be careful with the pointer math */
291 memmove(&(rm->rm_entries[i]), &(rm->rm_entries[i + 1]), 291 memmove(&(rm->rm_entries[i]), &(rm->rm_entries[i + 1]),
292 (rm->rm_used - i - 1) * sizeof(unsigned int)); 292 (rm->rm_used - i - 1) * sizeof(unsigned int));
293 rm->rm_used--; 293 rm->rm_used--;
294 } 294 }
295 295
296 spin_unlock(&osb->osb_lock); 296 spin_unlock(&osb->osb_lock);
297 } 297 }
298 298
299 static int ocfs2_commit_cache(struct ocfs2_super *osb) 299 static int ocfs2_commit_cache(struct ocfs2_super *osb)
300 { 300 {
301 int status = 0; 301 int status = 0;
302 unsigned int flushed; 302 unsigned int flushed;
303 unsigned long old_id; 303 unsigned long old_id;
304 struct ocfs2_journal *journal = NULL; 304 struct ocfs2_journal *journal = NULL;
305 305
306 mlog_entry_void(); 306 mlog_entry_void();
307 307
308 journal = osb->journal; 308 journal = osb->journal;
309 309
310 /* Flush all pending commits and checkpoint the journal. */ 310 /* Flush all pending commits and checkpoint the journal. */
311 down_write(&journal->j_trans_barrier); 311 down_write(&journal->j_trans_barrier);
312 312
313 if (atomic_read(&journal->j_num_trans) == 0) { 313 if (atomic_read(&journal->j_num_trans) == 0) {
314 up_write(&journal->j_trans_barrier); 314 up_write(&journal->j_trans_barrier);
315 mlog(0, "No transactions for me to flush!\n"); 315 mlog(0, "No transactions for me to flush!\n");
316 goto finally; 316 goto finally;
317 } 317 }
318 318
319 jbd2_journal_lock_updates(journal->j_journal); 319 jbd2_journal_lock_updates(journal->j_journal);
320 status = jbd2_journal_flush(journal->j_journal); 320 status = jbd2_journal_flush(journal->j_journal);
321 jbd2_journal_unlock_updates(journal->j_journal); 321 jbd2_journal_unlock_updates(journal->j_journal);
322 if (status < 0) { 322 if (status < 0) {
323 up_write(&journal->j_trans_barrier); 323 up_write(&journal->j_trans_barrier);
324 mlog_errno(status); 324 mlog_errno(status);
325 goto finally; 325 goto finally;
326 } 326 }
327 327
328 old_id = ocfs2_inc_trans_id(journal); 328 old_id = ocfs2_inc_trans_id(journal);
329 329
330 flushed = atomic_read(&journal->j_num_trans); 330 flushed = atomic_read(&journal->j_num_trans);
331 atomic_set(&journal->j_num_trans, 0); 331 atomic_set(&journal->j_num_trans, 0);
332 up_write(&journal->j_trans_barrier); 332 up_write(&journal->j_trans_barrier);
333 333
334 mlog(0, "commit_thread: flushed transaction %lu (%u handles)\n", 334 mlog(0, "commit_thread: flushed transaction %lu (%u handles)\n",
335 journal->j_trans_id, flushed); 335 journal->j_trans_id, flushed);
336 336
337 ocfs2_wake_downconvert_thread(osb); 337 ocfs2_wake_downconvert_thread(osb);
338 wake_up(&journal->j_checkpointed); 338 wake_up(&journal->j_checkpointed);
339 finally: 339 finally:
340 mlog_exit(status); 340 mlog_exit(status);
341 return status; 341 return status;
342 } 342 }
343 343
344 /* pass it NULL and it will allocate a new handle object for you. If 344 /* pass it NULL and it will allocate a new handle object for you. If
345 * you pass it a handle however, it may still return error, in which 345 * you pass it a handle however, it may still return error, in which
346 * case it has free'd the passed handle for you. */ 346 * case it has free'd the passed handle for you. */
347 handle_t *ocfs2_start_trans(struct ocfs2_super *osb, int max_buffs) 347 handle_t *ocfs2_start_trans(struct ocfs2_super *osb, int max_buffs)
348 { 348 {
349 journal_t *journal = osb->journal->j_journal; 349 journal_t *journal = osb->journal->j_journal;
350 handle_t *handle; 350 handle_t *handle;
351 351
352 BUG_ON(!osb || !osb->journal->j_journal); 352 BUG_ON(!osb || !osb->journal->j_journal);
353 353
354 if (ocfs2_is_hard_readonly(osb)) 354 if (ocfs2_is_hard_readonly(osb))
355 return ERR_PTR(-EROFS); 355 return ERR_PTR(-EROFS);
356 356
357 BUG_ON(osb->journal->j_state == OCFS2_JOURNAL_FREE); 357 BUG_ON(osb->journal->j_state == OCFS2_JOURNAL_FREE);
358 BUG_ON(max_buffs <= 0); 358 BUG_ON(max_buffs <= 0);
359 359
360 /* Nested transaction? Just return the handle... */ 360 /* Nested transaction? Just return the handle... */
361 if (journal_current_handle()) 361 if (journal_current_handle())
362 return jbd2_journal_start(journal, max_buffs); 362 return jbd2_journal_start(journal, max_buffs);
363 363
364 down_read(&osb->journal->j_trans_barrier); 364 down_read(&osb->journal->j_trans_barrier);
365 365
366 handle = jbd2_journal_start(journal, max_buffs); 366 handle = jbd2_journal_start(journal, max_buffs);
367 if (IS_ERR(handle)) { 367 if (IS_ERR(handle)) {
368 up_read(&osb->journal->j_trans_barrier); 368 up_read(&osb->journal->j_trans_barrier);
369 369
370 mlog_errno(PTR_ERR(handle)); 370 mlog_errno(PTR_ERR(handle));
371 371
372 if (is_journal_aborted(journal)) { 372 if (is_journal_aborted(journal)) {
373 ocfs2_abort(osb->sb, "Detected aborted journal"); 373 ocfs2_abort(osb->sb, "Detected aborted journal");
374 handle = ERR_PTR(-EROFS); 374 handle = ERR_PTR(-EROFS);
375 } 375 }
376 } else { 376 } else {
377 if (!ocfs2_mount_local(osb)) 377 if (!ocfs2_mount_local(osb))
378 atomic_inc(&(osb->journal->j_num_trans)); 378 atomic_inc(&(osb->journal->j_num_trans));
379 } 379 }
380 380
381 return handle; 381 return handle;
382 } 382 }
383 383
384 int ocfs2_commit_trans(struct ocfs2_super *osb, 384 int ocfs2_commit_trans(struct ocfs2_super *osb,
385 handle_t *handle) 385 handle_t *handle)
386 { 386 {
387 int ret, nested; 387 int ret, nested;
388 struct ocfs2_journal *journal = osb->journal; 388 struct ocfs2_journal *journal = osb->journal;
389 389
390 BUG_ON(!handle); 390 BUG_ON(!handle);
391 391
392 nested = handle->h_ref > 1; 392 nested = handle->h_ref > 1;
393 ret = jbd2_journal_stop(handle); 393 ret = jbd2_journal_stop(handle);
394 if (ret < 0) 394 if (ret < 0)
395 mlog_errno(ret); 395 mlog_errno(ret);
396 396
397 if (!nested) 397 if (!nested)
398 up_read(&journal->j_trans_barrier); 398 up_read(&journal->j_trans_barrier);
399 399
400 return ret; 400 return ret;
401 } 401 }
402 402
403 /* 403 /*
404 * 'nblocks' is what you want to add to the current 404 * 'nblocks' is what you want to add to the current
405 * transaction. extend_trans will either extend the current handle by 405 * transaction. extend_trans will either extend the current handle by
406 * nblocks, or commit it and start a new one with nblocks credits. 406 * nblocks, or commit it and start a new one with nblocks credits.
407 * 407 *
408 * This might call jbd2_journal_restart() which will commit dirty buffers 408 * This might call jbd2_journal_restart() which will commit dirty buffers
409 * and then restart the transaction. Before calling 409 * and then restart the transaction. Before calling
410 * ocfs2_extend_trans(), any changed blocks should have been 410 * ocfs2_extend_trans(), any changed blocks should have been
411 * dirtied. After calling it, all blocks which need to be changed must 411 * dirtied. After calling it, all blocks which need to be changed must
412 * go through another set of journal_access/journal_dirty calls. 412 * go through another set of journal_access/journal_dirty calls.
413 * 413 *
414 * WARNING: This will not release any semaphores or disk locks taken 414 * WARNING: This will not release any semaphores or disk locks taken
415 * during the transaction, so make sure they were taken *before* 415 * during the transaction, so make sure they were taken *before*
416 * start_trans or we'll have ordering deadlocks. 416 * start_trans or we'll have ordering deadlocks.
417 * 417 *
418 * WARNING2: Note that we do *not* drop j_trans_barrier here. This is 418 * WARNING2: Note that we do *not* drop j_trans_barrier here. This is
419 * good because transaction ids haven't yet been recorded on the 419 * good because transaction ids haven't yet been recorded on the
420 * cluster locks associated with this handle. 420 * cluster locks associated with this handle.
421 */ 421 */
422 int ocfs2_extend_trans(handle_t *handle, int nblocks) 422 int ocfs2_extend_trans(handle_t *handle, int nblocks)
423 { 423 {
424 int status; 424 int status;
425 425
426 BUG_ON(!handle); 426 BUG_ON(!handle);
427 BUG_ON(!nblocks); 427 BUG_ON(!nblocks);
428 428
429 mlog_entry_void(); 429 mlog_entry_void();
430 430
431 mlog(0, "Trying to extend transaction by %d blocks\n", nblocks); 431 mlog(0, "Trying to extend transaction by %d blocks\n", nblocks);
432 432
433 #ifdef CONFIG_OCFS2_DEBUG_FS 433 #ifdef CONFIG_OCFS2_DEBUG_FS
434 status = 1; 434 status = 1;
435 #else 435 #else
436 status = jbd2_journal_extend(handle, nblocks); 436 status = jbd2_journal_extend(handle, nblocks);
437 if (status < 0) { 437 if (status < 0) {
438 mlog_errno(status); 438 mlog_errno(status);
439 goto bail; 439 goto bail;
440 } 440 }
441 #endif 441 #endif
442 442
443 if (status > 0) { 443 if (status > 0) {
444 mlog(0, 444 mlog(0,
445 "jbd2_journal_extend failed, trying " 445 "jbd2_journal_extend failed, trying "
446 "jbd2_journal_restart\n"); 446 "jbd2_journal_restart\n");
447 status = jbd2_journal_restart(handle, nblocks); 447 status = jbd2_journal_restart(handle, nblocks);
448 if (status < 0) { 448 if (status < 0) {
449 mlog_errno(status); 449 mlog_errno(status);
450 goto bail; 450 goto bail;
451 } 451 }
452 } 452 }
453 453
454 status = 0; 454 status = 0;
455 bail: 455 bail:
456 456
457 mlog_exit(status); 457 mlog_exit(status);
458 return status; 458 return status;
459 } 459 }
460 460
461 struct ocfs2_triggers { 461 struct ocfs2_triggers {
462 struct jbd2_buffer_trigger_type ot_triggers; 462 struct jbd2_buffer_trigger_type ot_triggers;
463 int ot_offset; 463 int ot_offset;
464 }; 464 };
465 465
466 static inline struct ocfs2_triggers *to_ocfs2_trigger(struct jbd2_buffer_trigger_type *triggers) 466 static inline struct ocfs2_triggers *to_ocfs2_trigger(struct jbd2_buffer_trigger_type *triggers)
467 { 467 {
468 return container_of(triggers, struct ocfs2_triggers, ot_triggers); 468 return container_of(triggers, struct ocfs2_triggers, ot_triggers);
469 } 469 }
470 470
471 static void ocfs2_commit_trigger(struct jbd2_buffer_trigger_type *triggers, 471 static void ocfs2_commit_trigger(struct jbd2_buffer_trigger_type *triggers,
472 struct buffer_head *bh, 472 struct buffer_head *bh,
473 void *data, size_t size) 473 void *data, size_t size)
474 { 474 {
475 struct ocfs2_triggers *ot = to_ocfs2_trigger(triggers); 475 struct ocfs2_triggers *ot = to_ocfs2_trigger(triggers);
476 476
477 /* 477 /*
478 * We aren't guaranteed to have the superblock here, so we 478 * We aren't guaranteed to have the superblock here, so we
479 * must unconditionally compute the ecc data. 479 * must unconditionally compute the ecc data.
480 * __ocfs2_journal_access() will only set the triggers if 480 * __ocfs2_journal_access() will only set the triggers if
481 * metaecc is enabled. 481 * metaecc is enabled.
482 */ 482 */
483 ocfs2_block_check_compute(data, size, data + ot->ot_offset); 483 ocfs2_block_check_compute(data, size, data + ot->ot_offset);
484 } 484 }
485 485
486 /* 486 /*
487 * Quota blocks have their own trigger because the struct ocfs2_block_check 487 * Quota blocks have their own trigger because the struct ocfs2_block_check
488 * offset depends on the blocksize. 488 * offset depends on the blocksize.
489 */ 489 */
490 static void ocfs2_dq_commit_trigger(struct jbd2_buffer_trigger_type *triggers, 490 static void ocfs2_dq_commit_trigger(struct jbd2_buffer_trigger_type *triggers,
491 struct buffer_head *bh, 491 struct buffer_head *bh,
492 void *data, size_t size) 492 void *data, size_t size)
493 { 493 {
494 struct ocfs2_disk_dqtrailer *dqt = 494 struct ocfs2_disk_dqtrailer *dqt =
495 ocfs2_block_dqtrailer(size, data); 495 ocfs2_block_dqtrailer(size, data);
496 496
497 /* 497 /*
498 * We aren't guaranteed to have the superblock here, so we 498 * We aren't guaranteed to have the superblock here, so we
499 * must unconditionally compute the ecc data. 499 * must unconditionally compute the ecc data.
500 * __ocfs2_journal_access() will only set the triggers if 500 * __ocfs2_journal_access() will only set the triggers if
501 * metaecc is enabled. 501 * metaecc is enabled.
502 */ 502 */
503 ocfs2_block_check_compute(data, size, &dqt->dq_check); 503 ocfs2_block_check_compute(data, size, &dqt->dq_check);
504 } 504 }
505 505
506 /* 506 /*
507 * Directory blocks also have their own trigger because the 507 * Directory blocks also have their own trigger because the
508 * struct ocfs2_block_check offset depends on the blocksize. 508 * struct ocfs2_block_check offset depends on the blocksize.
509 */ 509 */
510 static void ocfs2_db_commit_trigger(struct jbd2_buffer_trigger_type *triggers, 510 static void ocfs2_db_commit_trigger(struct jbd2_buffer_trigger_type *triggers,
511 struct buffer_head *bh, 511 struct buffer_head *bh,
512 void *data, size_t size) 512 void *data, size_t size)
513 { 513 {
514 struct ocfs2_dir_block_trailer *trailer = 514 struct ocfs2_dir_block_trailer *trailer =
515 ocfs2_dir_trailer_from_size(size, data); 515 ocfs2_dir_trailer_from_size(size, data);
516 516
517 /* 517 /*
518 * We aren't guaranteed to have the superblock here, so we 518 * We aren't guaranteed to have the superblock here, so we
519 * must unconditionally compute the ecc data. 519 * must unconditionally compute the ecc data.
520 * __ocfs2_journal_access() will only set the triggers if 520 * __ocfs2_journal_access() will only set the triggers if
521 * metaecc is enabled. 521 * metaecc is enabled.
522 */ 522 */
523 ocfs2_block_check_compute(data, size, &trailer->db_check); 523 ocfs2_block_check_compute(data, size, &trailer->db_check);
524 } 524 }
525 525
526 static void ocfs2_abort_trigger(struct jbd2_buffer_trigger_type *triggers, 526 static void ocfs2_abort_trigger(struct jbd2_buffer_trigger_type *triggers,
527 struct buffer_head *bh) 527 struct buffer_head *bh)
528 { 528 {
529 mlog(ML_ERROR, 529 mlog(ML_ERROR,
530 "ocfs2_abort_trigger called by JBD2. bh = 0x%lx, " 530 "ocfs2_abort_trigger called by JBD2. bh = 0x%lx, "
531 "bh->b_blocknr = %llu\n", 531 "bh->b_blocknr = %llu\n",
532 (unsigned long)bh, 532 (unsigned long)bh,
533 (unsigned long long)bh->b_blocknr); 533 (unsigned long long)bh->b_blocknr);
534 534
535 /* We aren't guaranteed to have the superblock here - but if we 535 /* We aren't guaranteed to have the superblock here - but if we
536 * don't, it'll just crash. */ 536 * don't, it'll just crash. */
537 ocfs2_error(bh->b_assoc_map->host->i_sb, 537 ocfs2_error(bh->b_assoc_map->host->i_sb,
538 "JBD2 has aborted our journal, ocfs2 cannot continue\n"); 538 "JBD2 has aborted our journal, ocfs2 cannot continue\n");
539 } 539 }
540 540
541 static struct ocfs2_triggers di_triggers = { 541 static struct ocfs2_triggers di_triggers = {
542 .ot_triggers = { 542 .ot_triggers = {
543 .t_commit = ocfs2_commit_trigger, 543 .t_commit = ocfs2_commit_trigger,
544 .t_abort = ocfs2_abort_trigger, 544 .t_abort = ocfs2_abort_trigger,
545 }, 545 },
546 .ot_offset = offsetof(struct ocfs2_dinode, i_check), 546 .ot_offset = offsetof(struct ocfs2_dinode, i_check),
547 }; 547 };
548 548
549 static struct ocfs2_triggers eb_triggers = { 549 static struct ocfs2_triggers eb_triggers = {
550 .ot_triggers = { 550 .ot_triggers = {
551 .t_commit = ocfs2_commit_trigger, 551 .t_commit = ocfs2_commit_trigger,
552 .t_abort = ocfs2_abort_trigger, 552 .t_abort = ocfs2_abort_trigger,
553 }, 553 },
554 .ot_offset = offsetof(struct ocfs2_extent_block, h_check), 554 .ot_offset = offsetof(struct ocfs2_extent_block, h_check),
555 }; 555 };
556 556
557 static struct ocfs2_triggers gd_triggers = { 557 static struct ocfs2_triggers gd_triggers = {
558 .ot_triggers = { 558 .ot_triggers = {
559 .t_commit = ocfs2_commit_trigger, 559 .t_commit = ocfs2_commit_trigger,
560 .t_abort = ocfs2_abort_trigger, 560 .t_abort = ocfs2_abort_trigger,
561 }, 561 },
562 .ot_offset = offsetof(struct ocfs2_group_desc, bg_check), 562 .ot_offset = offsetof(struct ocfs2_group_desc, bg_check),
563 }; 563 };
564 564
565 static struct ocfs2_triggers db_triggers = { 565 static struct ocfs2_triggers db_triggers = {
566 .ot_triggers = { 566 .ot_triggers = {
567 .t_commit = ocfs2_db_commit_trigger, 567 .t_commit = ocfs2_db_commit_trigger,
568 .t_abort = ocfs2_abort_trigger, 568 .t_abort = ocfs2_abort_trigger,
569 }, 569 },
570 }; 570 };
571 571
572 static struct ocfs2_triggers xb_triggers = { 572 static struct ocfs2_triggers xb_triggers = {
573 .ot_triggers = { 573 .ot_triggers = {
574 .t_commit = ocfs2_commit_trigger, 574 .t_commit = ocfs2_commit_trigger,
575 .t_abort = ocfs2_abort_trigger, 575 .t_abort = ocfs2_abort_trigger,
576 }, 576 },
577 .ot_offset = offsetof(struct ocfs2_xattr_block, xb_check), 577 .ot_offset = offsetof(struct ocfs2_xattr_block, xb_check),
578 }; 578 };
579 579
580 static struct ocfs2_triggers dq_triggers = { 580 static struct ocfs2_triggers dq_triggers = {
581 .ot_triggers = { 581 .ot_triggers = {
582 .t_commit = ocfs2_dq_commit_trigger, 582 .t_commit = ocfs2_dq_commit_trigger,
583 .t_abort = ocfs2_abort_trigger, 583 .t_abort = ocfs2_abort_trigger,
584 }, 584 },
585 }; 585 };
586 586
587 static struct ocfs2_triggers dr_triggers = { 587 static struct ocfs2_triggers dr_triggers = {
588 .ot_triggers = { 588 .ot_triggers = {
589 .t_commit = ocfs2_commit_trigger, 589 .t_commit = ocfs2_commit_trigger,
590 .t_abort = ocfs2_abort_trigger, 590 .t_abort = ocfs2_abort_trigger,
591 }, 591 },
592 .ot_offset = offsetof(struct ocfs2_dx_root_block, dr_check), 592 .ot_offset = offsetof(struct ocfs2_dx_root_block, dr_check),
593 }; 593 };
594 594
595 static struct ocfs2_triggers dl_triggers = { 595 static struct ocfs2_triggers dl_triggers = {
596 .ot_triggers = { 596 .ot_triggers = {
597 .t_commit = ocfs2_commit_trigger, 597 .t_commit = ocfs2_commit_trigger,
598 .t_abort = ocfs2_abort_trigger, 598 .t_abort = ocfs2_abort_trigger,
599 }, 599 },
600 .ot_offset = offsetof(struct ocfs2_dx_leaf, dl_check), 600 .ot_offset = offsetof(struct ocfs2_dx_leaf, dl_check),
601 }; 601 };
602 602
603 static int __ocfs2_journal_access(handle_t *handle, 603 static int __ocfs2_journal_access(handle_t *handle,
604 struct inode *inode, 604 struct inode *inode,
605 struct buffer_head *bh, 605 struct buffer_head *bh,
606 struct ocfs2_triggers *triggers, 606 struct ocfs2_triggers *triggers,
607 int type) 607 int type)
608 { 608 {
609 int status; 609 int status;
610 610
611 BUG_ON(!inode); 611 BUG_ON(!inode);
612 BUG_ON(!handle); 612 BUG_ON(!handle);
613 BUG_ON(!bh); 613 BUG_ON(!bh);
614 614
615 mlog_entry("bh->b_blocknr=%llu, type=%d (\"%s\"), bh->b_size = %zu\n", 615 mlog_entry("bh->b_blocknr=%llu, type=%d (\"%s\"), bh->b_size = %zu\n",
616 (unsigned long long)bh->b_blocknr, type, 616 (unsigned long long)bh->b_blocknr, type,
617 (type == OCFS2_JOURNAL_ACCESS_CREATE) ? 617 (type == OCFS2_JOURNAL_ACCESS_CREATE) ?
618 "OCFS2_JOURNAL_ACCESS_CREATE" : 618 "OCFS2_JOURNAL_ACCESS_CREATE" :
619 "OCFS2_JOURNAL_ACCESS_WRITE", 619 "OCFS2_JOURNAL_ACCESS_WRITE",
620 bh->b_size); 620 bh->b_size);
621 621
622 /* we can safely remove this assertion after testing. */ 622 /* we can safely remove this assertion after testing. */
623 if (!buffer_uptodate(bh)) { 623 if (!buffer_uptodate(bh)) {
624 mlog(ML_ERROR, "giving me a buffer that's not uptodate!\n"); 624 mlog(ML_ERROR, "giving me a buffer that's not uptodate!\n");
625 mlog(ML_ERROR, "b_blocknr=%llu\n", 625 mlog(ML_ERROR, "b_blocknr=%llu\n",
626 (unsigned long long)bh->b_blocknr); 626 (unsigned long long)bh->b_blocknr);
627 BUG(); 627 BUG();
628 } 628 }
629 629
630 /* Set the current transaction information on the inode so 630 /* Set the current transaction information on the inode so
631 * that the locking code knows whether it can drop it's locks 631 * that the locking code knows whether it can drop it's locks
632 * on this inode or not. We're protected from the commit 632 * on this inode or not. We're protected from the commit
633 * thread updating the current transaction id until 633 * thread updating the current transaction id until
634 * ocfs2_commit_trans() because ocfs2_start_trans() took 634 * ocfs2_commit_trans() because ocfs2_start_trans() took
635 * j_trans_barrier for us. */ 635 * j_trans_barrier for us. */
636 ocfs2_set_inode_lock_trans(OCFS2_SB(inode->i_sb)->journal, inode); 636 ocfs2_set_inode_lock_trans(OCFS2_SB(inode->i_sb)->journal, inode);
637 637
638 mutex_lock(&OCFS2_I(inode)->ip_io_mutex); 638 mutex_lock(&OCFS2_I(inode)->ip_io_mutex);
639 switch (type) { 639 switch (type) {
640 case OCFS2_JOURNAL_ACCESS_CREATE: 640 case OCFS2_JOURNAL_ACCESS_CREATE:
641 case OCFS2_JOURNAL_ACCESS_WRITE: 641 case OCFS2_JOURNAL_ACCESS_WRITE:
642 status = jbd2_journal_get_write_access(handle, bh); 642 status = jbd2_journal_get_write_access(handle, bh);
643 break; 643 break;
644 644
645 case OCFS2_JOURNAL_ACCESS_UNDO: 645 case OCFS2_JOURNAL_ACCESS_UNDO:
646 status = jbd2_journal_get_undo_access(handle, bh); 646 status = jbd2_journal_get_undo_access(handle, bh);
647 break; 647 break;
648 648
649 default: 649 default:
650 status = -EINVAL; 650 status = -EINVAL;
651 mlog(ML_ERROR, "Uknown access type!\n"); 651 mlog(ML_ERROR, "Uknown access type!\n");
652 } 652 }
653 if (!status && ocfs2_meta_ecc(OCFS2_SB(inode->i_sb)) && triggers) 653 if (!status && ocfs2_meta_ecc(OCFS2_SB(inode->i_sb)) && triggers)
654 jbd2_journal_set_triggers(bh, &triggers->ot_triggers); 654 jbd2_journal_set_triggers(bh, &triggers->ot_triggers);
655 mutex_unlock(&OCFS2_I(inode)->ip_io_mutex); 655 mutex_unlock(&OCFS2_I(inode)->ip_io_mutex);
656 656
657 if (status < 0) 657 if (status < 0)
658 mlog(ML_ERROR, "Error %d getting %d access to buffer!\n", 658 mlog(ML_ERROR, "Error %d getting %d access to buffer!\n",
659 status, type); 659 status, type);
660 660
661 mlog_exit(status); 661 mlog_exit(status);
662 return status; 662 return status;
663 } 663 }
664 664
665 int ocfs2_journal_access_di(handle_t *handle, struct inode *inode, 665 int ocfs2_journal_access_di(handle_t *handle, struct inode *inode,
666 struct buffer_head *bh, int type) 666 struct buffer_head *bh, int type)
667 { 667 {
668 return __ocfs2_journal_access(handle, inode, bh, &di_triggers, 668 return __ocfs2_journal_access(handle, inode, bh, &di_triggers,
669 type); 669 type);
670 } 670 }
671 671
672 int ocfs2_journal_access_eb(handle_t *handle, struct inode *inode, 672 int ocfs2_journal_access_eb(handle_t *handle, struct inode *inode,
673 struct buffer_head *bh, int type) 673 struct buffer_head *bh, int type)
674 { 674 {
675 return __ocfs2_journal_access(handle, inode, bh, &eb_triggers, 675 return __ocfs2_journal_access(handle, inode, bh, &eb_triggers,
676 type); 676 type);
677 } 677 }
678 678
679 int ocfs2_journal_access_gd(handle_t *handle, struct inode *inode, 679 int ocfs2_journal_access_gd(handle_t *handle, struct inode *inode,
680 struct buffer_head *bh, int type) 680 struct buffer_head *bh, int type)
681 { 681 {
682 return __ocfs2_journal_access(handle, inode, bh, &gd_triggers, 682 return __ocfs2_journal_access(handle, inode, bh, &gd_triggers,
683 type); 683 type);
684 } 684 }
685 685
686 int ocfs2_journal_access_db(handle_t *handle, struct inode *inode, 686 int ocfs2_journal_access_db(handle_t *handle, struct inode *inode,
687 struct buffer_head *bh, int type) 687 struct buffer_head *bh, int type)
688 { 688 {
689 return __ocfs2_journal_access(handle, inode, bh, &db_triggers, 689 return __ocfs2_journal_access(handle, inode, bh, &db_triggers,
690 type); 690 type);
691 } 691 }
692 692
693 int ocfs2_journal_access_xb(handle_t *handle, struct inode *inode, 693 int ocfs2_journal_access_xb(handle_t *handle, struct inode *inode,
694 struct buffer_head *bh, int type) 694 struct buffer_head *bh, int type)
695 { 695 {
696 return __ocfs2_journal_access(handle, inode, bh, &xb_triggers, 696 return __ocfs2_journal_access(handle, inode, bh, &xb_triggers,
697 type); 697 type);
698 } 698 }
699 699
700 int ocfs2_journal_access_dq(handle_t *handle, struct inode *inode, 700 int ocfs2_journal_access_dq(handle_t *handle, struct inode *inode,
701 struct buffer_head *bh, int type) 701 struct buffer_head *bh, int type)
702 { 702 {
703 return __ocfs2_journal_access(handle, inode, bh, &dq_triggers, 703 return __ocfs2_journal_access(handle, inode, bh, &dq_triggers,
704 type); 704 type);
705 } 705 }
706 706
707 int ocfs2_journal_access_dr(handle_t *handle, struct inode *inode, 707 int ocfs2_journal_access_dr(handle_t *handle, struct inode *inode,
708 struct buffer_head *bh, int type) 708 struct buffer_head *bh, int type)
709 { 709 {
710 return __ocfs2_journal_access(handle, inode, bh, &dr_triggers, 710 return __ocfs2_journal_access(handle, inode, bh, &dr_triggers,
711 type); 711 type);
712 } 712 }
713 713
714 int ocfs2_journal_access_dl(handle_t *handle, struct inode *inode, 714 int ocfs2_journal_access_dl(handle_t *handle, struct inode *inode,
715 struct buffer_head *bh, int type) 715 struct buffer_head *bh, int type)
716 { 716 {
717 return __ocfs2_journal_access(handle, inode, bh, &dl_triggers, 717 return __ocfs2_journal_access(handle, inode, bh, &dl_triggers,
718 type); 718 type);
719 } 719 }
720 720
721 int ocfs2_journal_access(handle_t *handle, struct inode *inode, 721 int ocfs2_journal_access(handle_t *handle, struct inode *inode,
722 struct buffer_head *bh, int type) 722 struct buffer_head *bh, int type)
723 { 723 {
724 return __ocfs2_journal_access(handle, inode, bh, NULL, type); 724 return __ocfs2_journal_access(handle, inode, bh, NULL, type);
725 } 725 }
726 726
727 int ocfs2_journal_dirty(handle_t *handle, 727 int ocfs2_journal_dirty(handle_t *handle,
728 struct buffer_head *bh) 728 struct buffer_head *bh)
729 { 729 {
730 int status; 730 int status;
731 731
732 mlog_entry("(bh->b_blocknr=%llu)\n", 732 mlog_entry("(bh->b_blocknr=%llu)\n",
733 (unsigned long long)bh->b_blocknr); 733 (unsigned long long)bh->b_blocknr);
734 734
735 status = jbd2_journal_dirty_metadata(handle, bh); 735 status = jbd2_journal_dirty_metadata(handle, bh);
736 if (status < 0) 736 if (status < 0)
737 mlog(ML_ERROR, "Could not dirty metadata buffer. " 737 mlog(ML_ERROR, "Could not dirty metadata buffer. "
738 "(bh->b_blocknr=%llu)\n", 738 "(bh->b_blocknr=%llu)\n",
739 (unsigned long long)bh->b_blocknr); 739 (unsigned long long)bh->b_blocknr);
740 740
741 mlog_exit(status); 741 mlog_exit(status);
742 return status; 742 return status;
743 } 743 }
744 744
745 #define OCFS2_DEFAULT_COMMIT_INTERVAL (HZ * JBD2_DEFAULT_MAX_COMMIT_AGE) 745 #define OCFS2_DEFAULT_COMMIT_INTERVAL (HZ * JBD2_DEFAULT_MAX_COMMIT_AGE)
746 746
747 void ocfs2_set_journal_params(struct ocfs2_super *osb) 747 void ocfs2_set_journal_params(struct ocfs2_super *osb)
748 { 748 {
749 journal_t *journal = osb->journal->j_journal; 749 journal_t *journal = osb->journal->j_journal;
750 unsigned long commit_interval = OCFS2_DEFAULT_COMMIT_INTERVAL; 750 unsigned long commit_interval = OCFS2_DEFAULT_COMMIT_INTERVAL;
751 751
752 if (osb->osb_commit_interval) 752 if (osb->osb_commit_interval)
753 commit_interval = osb->osb_commit_interval; 753 commit_interval = osb->osb_commit_interval;
754 754
755 spin_lock(&journal->j_state_lock); 755 spin_lock(&journal->j_state_lock);
756 journal->j_commit_interval = commit_interval; 756 journal->j_commit_interval = commit_interval;
757 if (osb->s_mount_opt & OCFS2_MOUNT_BARRIER) 757 if (osb->s_mount_opt & OCFS2_MOUNT_BARRIER)
758 journal->j_flags |= JBD2_BARRIER; 758 journal->j_flags |= JBD2_BARRIER;
759 else 759 else
760 journal->j_flags &= ~JBD2_BARRIER; 760 journal->j_flags &= ~JBD2_BARRIER;
761 spin_unlock(&journal->j_state_lock); 761 spin_unlock(&journal->j_state_lock);
762 } 762 }
763 763
764 int ocfs2_journal_init(struct ocfs2_journal *journal, int *dirty) 764 int ocfs2_journal_init(struct ocfs2_journal *journal, int *dirty)
765 { 765 {
766 int status = -1; 766 int status = -1;
767 struct inode *inode = NULL; /* the journal inode */ 767 struct inode *inode = NULL; /* the journal inode */
768 journal_t *j_journal = NULL; 768 journal_t *j_journal = NULL;
769 struct ocfs2_dinode *di = NULL; 769 struct ocfs2_dinode *di = NULL;
770 struct buffer_head *bh = NULL; 770 struct buffer_head *bh = NULL;
771 struct ocfs2_super *osb; 771 struct ocfs2_super *osb;
772 int inode_lock = 0; 772 int inode_lock = 0;
773 773
774 mlog_entry_void(); 774 mlog_entry_void();
775 775
776 BUG_ON(!journal); 776 BUG_ON(!journal);
777 777
778 osb = journal->j_osb; 778 osb = journal->j_osb;
779 779
780 /* already have the inode for our journal */ 780 /* already have the inode for our journal */
781 inode = ocfs2_get_system_file_inode(osb, JOURNAL_SYSTEM_INODE, 781 inode = ocfs2_get_system_file_inode(osb, JOURNAL_SYSTEM_INODE,
782 osb->slot_num); 782 osb->slot_num);
783 if (inode == NULL) { 783 if (inode == NULL) {
784 status = -EACCES; 784 status = -EACCES;
785 mlog_errno(status); 785 mlog_errno(status);
786 goto done; 786 goto done;
787 } 787 }
788 if (is_bad_inode(inode)) { 788 if (is_bad_inode(inode)) {
789 mlog(ML_ERROR, "access error (bad inode)\n"); 789 mlog(ML_ERROR, "access error (bad inode)\n");
790 iput(inode); 790 iput(inode);
791 inode = NULL; 791 inode = NULL;
792 status = -EACCES; 792 status = -EACCES;
793 goto done; 793 goto done;
794 } 794 }
795 795
796 SET_INODE_JOURNAL(inode); 796 SET_INODE_JOURNAL(inode);
797 OCFS2_I(inode)->ip_open_count++; 797 OCFS2_I(inode)->ip_open_count++;
798 798
799 /* Skip recovery waits here - journal inode metadata never 799 /* Skip recovery waits here - journal inode metadata never
800 * changes in a live cluster so it can be considered an 800 * changes in a live cluster so it can be considered an
801 * exception to the rule. */ 801 * exception to the rule. */
802 status = ocfs2_inode_lock_full(inode, &bh, 1, OCFS2_META_LOCK_RECOVERY); 802 status = ocfs2_inode_lock_full(inode, &bh, 1, OCFS2_META_LOCK_RECOVERY);
803 if (status < 0) { 803 if (status < 0) {
804 if (status != -ERESTARTSYS) 804 if (status != -ERESTARTSYS)
805 mlog(ML_ERROR, "Could not get lock on journal!\n"); 805 mlog(ML_ERROR, "Could not get lock on journal!\n");
806 goto done; 806 goto done;
807 } 807 }
808 808
809 inode_lock = 1; 809 inode_lock = 1;
810 di = (struct ocfs2_dinode *)bh->b_data; 810 di = (struct ocfs2_dinode *)bh->b_data;
811 811
812 if (inode->i_size < OCFS2_MIN_JOURNAL_SIZE) { 812 if (inode->i_size < OCFS2_MIN_JOURNAL_SIZE) {
813 mlog(ML_ERROR, "Journal file size (%lld) is too small!\n", 813 mlog(ML_ERROR, "Journal file size (%lld) is too small!\n",
814 inode->i_size); 814 inode->i_size);
815 status = -EINVAL; 815 status = -EINVAL;
816 goto done; 816 goto done;
817 } 817 }
818 818
819 mlog(0, "inode->i_size = %lld\n", inode->i_size); 819 mlog(0, "inode->i_size = %lld\n", inode->i_size);
820 mlog(0, "inode->i_blocks = %llu\n", 820 mlog(0, "inode->i_blocks = %llu\n",
821 (unsigned long long)inode->i_blocks); 821 (unsigned long long)inode->i_blocks);
822 mlog(0, "inode->ip_clusters = %u\n", OCFS2_I(inode)->ip_clusters); 822 mlog(0, "inode->ip_clusters = %u\n", OCFS2_I(inode)->ip_clusters);
823 823
824 /* call the kernels journal init function now */ 824 /* call the kernels journal init function now */
825 j_journal = jbd2_journal_init_inode(inode); 825 j_journal = jbd2_journal_init_inode(inode);
826 if (j_journal == NULL) { 826 if (j_journal == NULL) {
827 mlog(ML_ERROR, "Linux journal layer error\n"); 827 mlog(ML_ERROR, "Linux journal layer error\n");
828 status = -EINVAL; 828 status = -EINVAL;
829 goto done; 829 goto done;
830 } 830 }
831 831
832 mlog(0, "Returned from jbd2_journal_init_inode\n"); 832 mlog(0, "Returned from jbd2_journal_init_inode\n");
833 mlog(0, "j_journal->j_maxlen = %u\n", j_journal->j_maxlen); 833 mlog(0, "j_journal->j_maxlen = %u\n", j_journal->j_maxlen);
834 834
835 *dirty = (le32_to_cpu(di->id1.journal1.ij_flags) & 835 *dirty = (le32_to_cpu(di->id1.journal1.ij_flags) &
836 OCFS2_JOURNAL_DIRTY_FL); 836 OCFS2_JOURNAL_DIRTY_FL);
837 837
838 journal->j_journal = j_journal; 838 journal->j_journal = j_journal;
839 journal->j_inode = inode; 839 journal->j_inode = inode;
840 journal->j_bh = bh; 840 journal->j_bh = bh;
841 841
842 ocfs2_set_journal_params(osb); 842 ocfs2_set_journal_params(osb);
843 843
844 journal->j_state = OCFS2_JOURNAL_LOADED; 844 journal->j_state = OCFS2_JOURNAL_LOADED;
845 845
846 status = 0; 846 status = 0;
847 done: 847 done:
848 if (status < 0) { 848 if (status < 0) {
849 if (inode_lock) 849 if (inode_lock)
850 ocfs2_inode_unlock(inode, 1); 850 ocfs2_inode_unlock(inode, 1);
851 brelse(bh); 851 brelse(bh);
852 if (inode) { 852 if (inode) {
853 OCFS2_I(inode)->ip_open_count--; 853 OCFS2_I(inode)->ip_open_count--;
854 iput(inode); 854 iput(inode);
855 } 855 }
856 } 856 }
857 857
858 mlog_exit(status); 858 mlog_exit(status);
859 return status; 859 return status;
860 } 860 }
861 861
862 static void ocfs2_bump_recovery_generation(struct ocfs2_dinode *di) 862 static void ocfs2_bump_recovery_generation(struct ocfs2_dinode *di)
863 { 863 {
864 le32_add_cpu(&(di->id1.journal1.ij_recovery_generation), 1); 864 le32_add_cpu(&(di->id1.journal1.ij_recovery_generation), 1);
865 } 865 }
866 866
867 static u32 ocfs2_get_recovery_generation(struct ocfs2_dinode *di) 867 static u32 ocfs2_get_recovery_generation(struct ocfs2_dinode *di)
868 { 868 {
869 return le32_to_cpu(di->id1.journal1.ij_recovery_generation); 869 return le32_to_cpu(di->id1.journal1.ij_recovery_generation);
870 } 870 }
871 871
872 static int ocfs2_journal_toggle_dirty(struct ocfs2_super *osb, 872 static int ocfs2_journal_toggle_dirty(struct ocfs2_super *osb,
873 int dirty, int replayed) 873 int dirty, int replayed)
874 { 874 {
875 int status; 875 int status;
876 unsigned int flags; 876 unsigned int flags;
877 struct ocfs2_journal *journal = osb->journal; 877 struct ocfs2_journal *journal = osb->journal;
878 struct buffer_head *bh = journal->j_bh; 878 struct buffer_head *bh = journal->j_bh;
879 struct ocfs2_dinode *fe; 879 struct ocfs2_dinode *fe;
880 880
881 mlog_entry_void(); 881 mlog_entry_void();
882 882
883 fe = (struct ocfs2_dinode *)bh->b_data; 883 fe = (struct ocfs2_dinode *)bh->b_data;
884 884
885 /* The journal bh on the osb always comes from ocfs2_journal_init() 885 /* The journal bh on the osb always comes from ocfs2_journal_init()
886 * and was validated there inside ocfs2_inode_lock_full(). It's a 886 * and was validated there inside ocfs2_inode_lock_full(). It's a
887 * code bug if we mess it up. */ 887 * code bug if we mess it up. */
888 BUG_ON(!OCFS2_IS_VALID_DINODE(fe)); 888 BUG_ON(!OCFS2_IS_VALID_DINODE(fe));
889 889
890 flags = le32_to_cpu(fe->id1.journal1.ij_flags); 890 flags = le32_to_cpu(fe->id1.journal1.ij_flags);
891 if (dirty) 891 if (dirty)
892 flags |= OCFS2_JOURNAL_DIRTY_FL; 892 flags |= OCFS2_JOURNAL_DIRTY_FL;
893 else 893 else
894 flags &= ~OCFS2_JOURNAL_DIRTY_FL; 894 flags &= ~OCFS2_JOURNAL_DIRTY_FL;
895 fe->id1.journal1.ij_flags = cpu_to_le32(flags); 895 fe->id1.journal1.ij_flags = cpu_to_le32(flags);
896 896
897 if (replayed) 897 if (replayed)
898 ocfs2_bump_recovery_generation(fe); 898 ocfs2_bump_recovery_generation(fe);
899 899
900 ocfs2_compute_meta_ecc(osb->sb, bh->b_data, &fe->i_check); 900 ocfs2_compute_meta_ecc(osb->sb, bh->b_data, &fe->i_check);
901 status = ocfs2_write_block(osb, bh, journal->j_inode); 901 status = ocfs2_write_block(osb, bh, journal->j_inode);
902 if (status < 0) 902 if (status < 0)
903 mlog_errno(status); 903 mlog_errno(status);
904 904
905 mlog_exit(status); 905 mlog_exit(status);
906 return status; 906 return status;
907 } 907 }
908 908
909 /* 909 /*
910 * If the journal has been kmalloc'd it needs to be freed after this 910 * If the journal has been kmalloc'd it needs to be freed after this
911 * call. 911 * call.
912 */ 912 */
913 void ocfs2_journal_shutdown(struct ocfs2_super *osb) 913 void ocfs2_journal_shutdown(struct ocfs2_super *osb)
914 { 914 {
915 struct ocfs2_journal *journal = NULL; 915 struct ocfs2_journal *journal = NULL;
916 int status = 0; 916 int status = 0;
917 struct inode *inode = NULL; 917 struct inode *inode = NULL;
918 int num_running_trans = 0; 918 int num_running_trans = 0;
919 919
920 mlog_entry_void(); 920 mlog_entry_void();
921 921
922 BUG_ON(!osb); 922 BUG_ON(!osb);
923 923
924 journal = osb->journal; 924 journal = osb->journal;
925 if (!journal) 925 if (!journal)
926 goto done; 926 goto done;
927 927
928 inode = journal->j_inode; 928 inode = journal->j_inode;
929 929
930 if (journal->j_state != OCFS2_JOURNAL_LOADED) 930 if (journal->j_state != OCFS2_JOURNAL_LOADED)
931 goto done; 931 goto done;
932 932
933 /* need to inc inode use count - jbd2_journal_destroy will iput. */ 933 /* need to inc inode use count - jbd2_journal_destroy will iput. */
934 if (!igrab(inode)) 934 if (!igrab(inode))
935 BUG(); 935 BUG();
936 936
937 num_running_trans = atomic_read(&(osb->journal->j_num_trans)); 937 num_running_trans = atomic_read(&(osb->journal->j_num_trans));
938 if (num_running_trans > 0) 938 if (num_running_trans > 0)
939 mlog(0, "Shutting down journal: must wait on %d " 939 mlog(0, "Shutting down journal: must wait on %d "
940 "running transactions!\n", 940 "running transactions!\n",
941 num_running_trans); 941 num_running_trans);
942 942
943 /* Do a commit_cache here. It will flush our journal, *and* 943 /* Do a commit_cache here. It will flush our journal, *and*
944 * release any locks that are still held. 944 * release any locks that are still held.
945 * set the SHUTDOWN flag and release the trans lock. 945 * set the SHUTDOWN flag and release the trans lock.
946 * the commit thread will take the trans lock for us below. */ 946 * the commit thread will take the trans lock for us below. */
947 journal->j_state = OCFS2_JOURNAL_IN_SHUTDOWN; 947 journal->j_state = OCFS2_JOURNAL_IN_SHUTDOWN;
948 948
949 /* The OCFS2_JOURNAL_IN_SHUTDOWN will signal to commit_cache to not 949 /* The OCFS2_JOURNAL_IN_SHUTDOWN will signal to commit_cache to not
950 * drop the trans_lock (which we want to hold until we 950 * drop the trans_lock (which we want to hold until we
951 * completely destroy the journal. */ 951 * completely destroy the journal. */
952 if (osb->commit_task) { 952 if (osb->commit_task) {
953 /* Wait for the commit thread */ 953 /* Wait for the commit thread */
954 mlog(0, "Waiting for ocfs2commit to exit....\n"); 954 mlog(0, "Waiting for ocfs2commit to exit....\n");
955 kthread_stop(osb->commit_task); 955 kthread_stop(osb->commit_task);
956 osb->commit_task = NULL; 956 osb->commit_task = NULL;
957 } 957 }
958 958
959 BUG_ON(atomic_read(&(osb->journal->j_num_trans)) != 0); 959 BUG_ON(atomic_read(&(osb->journal->j_num_trans)) != 0);
960 960
961 if (ocfs2_mount_local(osb)) { 961 if (ocfs2_mount_local(osb)) {
962 jbd2_journal_lock_updates(journal->j_journal); 962 jbd2_journal_lock_updates(journal->j_journal);
963 status = jbd2_journal_flush(journal->j_journal); 963 status = jbd2_journal_flush(journal->j_journal);
964 jbd2_journal_unlock_updates(journal->j_journal); 964 jbd2_journal_unlock_updates(journal->j_journal);
965 if (status < 0) 965 if (status < 0)
966 mlog_errno(status); 966 mlog_errno(status);
967 } 967 }
968 968
969 if (status == 0) { 969 if (status == 0) {
970 /* 970 /*
971 * Do not toggle if flush was unsuccessful otherwise 971 * Do not toggle if flush was unsuccessful otherwise
972 * will leave dirty metadata in a "clean" journal 972 * will leave dirty metadata in a "clean" journal
973 */ 973 */
974 status = ocfs2_journal_toggle_dirty(osb, 0, 0); 974 status = ocfs2_journal_toggle_dirty(osb, 0, 0);
975 if (status < 0) 975 if (status < 0)
976 mlog_errno(status); 976 mlog_errno(status);
977 } 977 }
978 978
979 /* Shutdown the kernel journal system */ 979 /* Shutdown the kernel journal system */
980 jbd2_journal_destroy(journal->j_journal); 980 jbd2_journal_destroy(journal->j_journal);
981 journal->j_journal = NULL; 981 journal->j_journal = NULL;
982 982
983 OCFS2_I(inode)->ip_open_count--; 983 OCFS2_I(inode)->ip_open_count--;
984 984
985 /* unlock our journal */ 985 /* unlock our journal */
986 ocfs2_inode_unlock(inode, 1); 986 ocfs2_inode_unlock(inode, 1);
987 987
988 brelse(journal->j_bh); 988 brelse(journal->j_bh);
989 journal->j_bh = NULL; 989 journal->j_bh = NULL;
990 990
991 journal->j_state = OCFS2_JOURNAL_FREE; 991 journal->j_state = OCFS2_JOURNAL_FREE;
992 992
993 // up_write(&journal->j_trans_barrier); 993 // up_write(&journal->j_trans_barrier);
994 done: 994 done:
995 if (inode) 995 if (inode)
996 iput(inode); 996 iput(inode);
997 mlog_exit_void(); 997 mlog_exit_void();
998 } 998 }
999 999
1000 static void ocfs2_clear_journal_error(struct super_block *sb, 1000 static void ocfs2_clear_journal_error(struct super_block *sb,
1001 journal_t *journal, 1001 journal_t *journal,
1002 int slot) 1002 int slot)
1003 { 1003 {
1004 int olderr; 1004 int olderr;
1005 1005
1006 olderr = jbd2_journal_errno(journal); 1006 olderr = jbd2_journal_errno(journal);
1007 if (olderr) { 1007 if (olderr) {
1008 mlog(ML_ERROR, "File system error %d recorded in " 1008 mlog(ML_ERROR, "File system error %d recorded in "
1009 "journal %u.\n", olderr, slot); 1009 "journal %u.\n", olderr, slot);
1010 mlog(ML_ERROR, "File system on device %s needs checking.\n", 1010 mlog(ML_ERROR, "File system on device %s needs checking.\n",
1011 sb->s_id); 1011 sb->s_id);
1012 1012
1013 jbd2_journal_ack_err(journal); 1013 jbd2_journal_ack_err(journal);
1014 jbd2_journal_clear_err(journal); 1014 jbd2_journal_clear_err(journal);
1015 } 1015 }
1016 } 1016 }
1017 1017
1018 int ocfs2_journal_load(struct ocfs2_journal *journal, int local, int replayed) 1018 int ocfs2_journal_load(struct ocfs2_journal *journal, int local, int replayed)
1019 { 1019 {
1020 int status = 0; 1020 int status = 0;
1021 struct ocfs2_super *osb; 1021 struct ocfs2_super *osb;
1022 1022
1023 mlog_entry_void(); 1023 mlog_entry_void();
1024 1024
1025 BUG_ON(!journal); 1025 BUG_ON(!journal);
1026 1026
1027 osb = journal->j_osb; 1027 osb = journal->j_osb;
1028 1028
1029 status = jbd2_journal_load(journal->j_journal); 1029 status = jbd2_journal_load(journal->j_journal);
1030 if (status < 0) { 1030 if (status < 0) {
1031 mlog(ML_ERROR, "Failed to load journal!\n"); 1031 mlog(ML_ERROR, "Failed to load journal!\n");
1032 goto done; 1032 goto done;
1033 } 1033 }
1034 1034
1035 ocfs2_clear_journal_error(osb->sb, journal->j_journal, osb->slot_num); 1035 ocfs2_clear_journal_error(osb->sb, journal->j_journal, osb->slot_num);
1036 1036
1037 status = ocfs2_journal_toggle_dirty(osb, 1, replayed); 1037 status = ocfs2_journal_toggle_dirty(osb, 1, replayed);
1038 if (status < 0) { 1038 if (status < 0) {
1039 mlog_errno(status); 1039 mlog_errno(status);
1040 goto done; 1040 goto done;
1041 } 1041 }
1042 1042
1043 /* Launch the commit thread */ 1043 /* Launch the commit thread */
1044 if (!local) { 1044 if (!local) {
1045 osb->commit_task = kthread_run(ocfs2_commit_thread, osb, 1045 osb->commit_task = kthread_run(ocfs2_commit_thread, osb,
1046 "ocfs2cmt"); 1046 "ocfs2cmt");
1047 if (IS_ERR(osb->commit_task)) { 1047 if (IS_ERR(osb->commit_task)) {
1048 status = PTR_ERR(osb->commit_task); 1048 status = PTR_ERR(osb->commit_task);
1049 osb->commit_task = NULL; 1049 osb->commit_task = NULL;
1050 mlog(ML_ERROR, "unable to launch ocfs2commit thread, " 1050 mlog(ML_ERROR, "unable to launch ocfs2commit thread, "
1051 "error=%d", status); 1051 "error=%d", status);
1052 goto done; 1052 goto done;
1053 } 1053 }
1054 } else 1054 } else
1055 osb->commit_task = NULL; 1055 osb->commit_task = NULL;
1056 1056
1057 done: 1057 done:
1058 mlog_exit(status); 1058 mlog_exit(status);
1059 return status; 1059 return status;
1060 } 1060 }
1061 1061
1062 1062
1063 /* 'full' flag tells us whether we clear out all blocks or if we just 1063 /* 'full' flag tells us whether we clear out all blocks or if we just
1064 * mark the journal clean */ 1064 * mark the journal clean */
1065 int ocfs2_journal_wipe(struct ocfs2_journal *journal, int full) 1065 int ocfs2_journal_wipe(struct ocfs2_journal *journal, int full)
1066 { 1066 {
1067 int status; 1067 int status;
1068 1068
1069 mlog_entry_void(); 1069 mlog_entry_void();
1070 1070
1071 BUG_ON(!journal); 1071 BUG_ON(!journal);
1072 1072
1073 status = jbd2_journal_wipe(journal->j_journal, full); 1073 status = jbd2_journal_wipe(journal->j_journal, full);
1074 if (status < 0) { 1074 if (status < 0) {
1075 mlog_errno(status); 1075 mlog_errno(status);
1076 goto bail; 1076 goto bail;
1077 } 1077 }
1078 1078
1079 status = ocfs2_journal_toggle_dirty(journal->j_osb, 0, 0); 1079 status = ocfs2_journal_toggle_dirty(journal->j_osb, 0, 0);
1080 if (status < 0) 1080 if (status < 0)
1081 mlog_errno(status); 1081 mlog_errno(status);
1082 1082
1083 bail: 1083 bail:
1084 mlog_exit(status); 1084 mlog_exit(status);
1085 return status; 1085 return status;
1086 } 1086 }
1087 1087
1088 static int ocfs2_recovery_completed(struct ocfs2_super *osb) 1088 static int ocfs2_recovery_completed(struct ocfs2_super *osb)
1089 { 1089 {
1090 int empty; 1090 int empty;
1091 struct ocfs2_recovery_map *rm = osb->recovery_map; 1091 struct ocfs2_recovery_map *rm = osb->recovery_map;
1092 1092
1093 spin_lock(&osb->osb_lock); 1093 spin_lock(&osb->osb_lock);
1094 empty = (rm->rm_used == 0); 1094 empty = (rm->rm_used == 0);
1095 spin_unlock(&osb->osb_lock); 1095 spin_unlock(&osb->osb_lock);
1096 1096
1097 return empty; 1097 return empty;
1098 } 1098 }
1099 1099
1100 void ocfs2_wait_for_recovery(struct ocfs2_super *osb) 1100 void ocfs2_wait_for_recovery(struct ocfs2_super *osb)
1101 { 1101 {
1102 wait_event(osb->recovery_event, ocfs2_recovery_completed(osb)); 1102 wait_event(osb->recovery_event, ocfs2_recovery_completed(osb));
1103 } 1103 }
1104 1104
1105 /* 1105 /*
1106 * JBD Might read a cached version of another nodes journal file. We 1106 * JBD Might read a cached version of another nodes journal file. We
1107 * don't want this as this file changes often and we get no 1107 * don't want this as this file changes often and we get no
1108 * notification on those changes. The only way to be sure that we've 1108 * notification on those changes. The only way to be sure that we've
1109 * got the most up to date version of those blocks then is to force 1109 * got the most up to date version of those blocks then is to force
1110 * read them off disk. Just searching through the buffer cache won't 1110 * read them off disk. Just searching through the buffer cache won't
1111 * work as there may be pages backing this file which are still marked 1111 * work as there may be pages backing this file which are still marked
1112 * up to date. We know things can't change on this file underneath us 1112 * up to date. We know things can't change on this file underneath us
1113 * as we have the lock by now :) 1113 * as we have the lock by now :)
1114 */ 1114 */
1115 static int ocfs2_force_read_journal(struct inode *inode) 1115 static int ocfs2_force_read_journal(struct inode *inode)
1116 { 1116 {
1117 int status = 0; 1117 int status = 0;
1118 int i; 1118 int i;
1119 u64 v_blkno, p_blkno, p_blocks, num_blocks; 1119 u64 v_blkno, p_blkno, p_blocks, num_blocks;
1120 #define CONCURRENT_JOURNAL_FILL 32ULL 1120 #define CONCURRENT_JOURNAL_FILL 32ULL
1121 struct buffer_head *bhs[CONCURRENT_JOURNAL_FILL]; 1121 struct buffer_head *bhs[CONCURRENT_JOURNAL_FILL];
1122 1122
1123 mlog_entry_void(); 1123 mlog_entry_void();
1124 1124
1125 memset(bhs, 0, sizeof(struct buffer_head *) * CONCURRENT_JOURNAL_FILL); 1125 memset(bhs, 0, sizeof(struct buffer_head *) * CONCURRENT_JOURNAL_FILL);
1126 1126
1127 num_blocks = ocfs2_blocks_for_bytes(inode->i_sb, inode->i_size); 1127 num_blocks = ocfs2_blocks_for_bytes(inode->i_sb, inode->i_size);
1128 v_blkno = 0; 1128 v_blkno = 0;
1129 while (v_blkno < num_blocks) { 1129 while (v_blkno < num_blocks) {
1130 status = ocfs2_extent_map_get_blocks(inode, v_blkno, 1130 status = ocfs2_extent_map_get_blocks(inode, v_blkno,
1131 &p_blkno, &p_blocks, NULL); 1131 &p_blkno, &p_blocks, NULL);
1132 if (status < 0) { 1132 if (status < 0) {
1133 mlog_errno(status); 1133 mlog_errno(status);
1134 goto bail; 1134 goto bail;
1135 } 1135 }
1136 1136
1137 if (p_blocks > CONCURRENT_JOURNAL_FILL) 1137 if (p_blocks > CONCURRENT_JOURNAL_FILL)
1138 p_blocks = CONCURRENT_JOURNAL_FILL; 1138 p_blocks = CONCURRENT_JOURNAL_FILL;
1139 1139
1140 /* We are reading journal data which should not 1140 /* We are reading journal data which should not
1141 * be put in the uptodate cache */ 1141 * be put in the uptodate cache */
1142 status = ocfs2_read_blocks_sync(OCFS2_SB(inode->i_sb), 1142 status = ocfs2_read_blocks_sync(OCFS2_SB(inode->i_sb),
1143 p_blkno, p_blocks, bhs); 1143 p_blkno, p_blocks, bhs);
1144 if (status < 0) { 1144 if (status < 0) {
1145 mlog_errno(status); 1145 mlog_errno(status);
1146 goto bail; 1146 goto bail;
1147 } 1147 }
1148 1148
1149 for(i = 0; i < p_blocks; i++) { 1149 for(i = 0; i < p_blocks; i++) {
1150 brelse(bhs[i]); 1150 brelse(bhs[i]);
1151 bhs[i] = NULL; 1151 bhs[i] = NULL;
1152 } 1152 }
1153 1153
1154 v_blkno += p_blocks; 1154 v_blkno += p_blocks;
1155 } 1155 }
1156 1156
1157 bail: 1157 bail:
1158 for(i = 0; i < CONCURRENT_JOURNAL_FILL; i++) 1158 for(i = 0; i < CONCURRENT_JOURNAL_FILL; i++)
1159 brelse(bhs[i]); 1159 brelse(bhs[i]);
1160 mlog_exit(status); 1160 mlog_exit(status);
1161 return status; 1161 return status;
1162 } 1162 }
1163 1163
1164 struct ocfs2_la_recovery_item { 1164 struct ocfs2_la_recovery_item {
1165 struct list_head lri_list; 1165 struct list_head lri_list;
1166 int lri_slot; 1166 int lri_slot;
1167 struct ocfs2_dinode *lri_la_dinode; 1167 struct ocfs2_dinode *lri_la_dinode;
1168 struct ocfs2_dinode *lri_tl_dinode; 1168 struct ocfs2_dinode *lri_tl_dinode;
1169 struct ocfs2_quota_recovery *lri_qrec; 1169 struct ocfs2_quota_recovery *lri_qrec;
1170 }; 1170 };
1171 1171
1172 /* Does the second half of the recovery process. By this point, the 1172 /* Does the second half of the recovery process. By this point, the
1173 * node is marked clean and can actually be considered recovered, 1173 * node is marked clean and can actually be considered recovered,
1174 * hence it's no longer in the recovery map, but there's still some 1174 * hence it's no longer in the recovery map, but there's still some
1175 * cleanup we can do which shouldn't happen within the recovery thread 1175 * cleanup we can do which shouldn't happen within the recovery thread
1176 * as locking in that context becomes very difficult if we are to take 1176 * as locking in that context becomes very difficult if we are to take
1177 * recovering nodes into account. 1177 * recovering nodes into account.
1178 * 1178 *
1179 * NOTE: This function can and will sleep on recovery of other nodes 1179 * NOTE: This function can and will sleep on recovery of other nodes
1180 * during cluster locking, just like any other ocfs2 process. 1180 * during cluster locking, just like any other ocfs2 process.
1181 */ 1181 */
1182 void ocfs2_complete_recovery(struct work_struct *work) 1182 void ocfs2_complete_recovery(struct work_struct *work)
1183 { 1183 {
1184 int ret; 1184 int ret;
1185 struct ocfs2_journal *journal = 1185 struct ocfs2_journal *journal =
1186 container_of(work, struct ocfs2_journal, j_recovery_work); 1186 container_of(work, struct ocfs2_journal, j_recovery_work);
1187 struct ocfs2_super *osb = journal->j_osb; 1187 struct ocfs2_super *osb = journal->j_osb;
1188 struct ocfs2_dinode *la_dinode, *tl_dinode; 1188 struct ocfs2_dinode *la_dinode, *tl_dinode;
1189 struct ocfs2_la_recovery_item *item, *n; 1189 struct ocfs2_la_recovery_item *item, *n;
1190 struct ocfs2_quota_recovery *qrec; 1190 struct ocfs2_quota_recovery *qrec;
1191 LIST_HEAD(tmp_la_list); 1191 LIST_HEAD(tmp_la_list);
1192 1192
1193 mlog_entry_void(); 1193 mlog_entry_void();
1194 1194
1195 mlog(0, "completing recovery from keventd\n"); 1195 mlog(0, "completing recovery from keventd\n");
1196 1196
1197 spin_lock(&journal->j_lock); 1197 spin_lock(&journal->j_lock);
1198 list_splice_init(&journal->j_la_cleanups, &tmp_la_list); 1198 list_splice_init(&journal->j_la_cleanups, &tmp_la_list);
1199 spin_unlock(&journal->j_lock); 1199 spin_unlock(&journal->j_lock);
1200 1200
1201 list_for_each_entry_safe(item, n, &tmp_la_list, lri_list) { 1201 list_for_each_entry_safe(item, n, &tmp_la_list, lri_list) {
1202 list_del_init(&item->lri_list); 1202 list_del_init(&item->lri_list);
1203 1203
1204 mlog(0, "Complete recovery for slot %d\n", item->lri_slot); 1204 mlog(0, "Complete recovery for slot %d\n", item->lri_slot);
1205 1205
1206 ocfs2_wait_on_quotas(osb); 1206 ocfs2_wait_on_quotas(osb);
1207 1207
1208 la_dinode = item->lri_la_dinode; 1208 la_dinode = item->lri_la_dinode;
1209 if (la_dinode) { 1209 if (la_dinode) {
1210 mlog(0, "Clean up local alloc %llu\n", 1210 mlog(0, "Clean up local alloc %llu\n",
1211 (unsigned long long)le64_to_cpu(la_dinode->i_blkno)); 1211 (unsigned long long)le64_to_cpu(la_dinode->i_blkno));
1212 1212
1213 ret = ocfs2_complete_local_alloc_recovery(osb, 1213 ret = ocfs2_complete_local_alloc_recovery(osb,
1214 la_dinode); 1214 la_dinode);
1215 if (ret < 0) 1215 if (ret < 0)
1216 mlog_errno(ret); 1216 mlog_errno(ret);
1217 1217
1218 kfree(la_dinode); 1218 kfree(la_dinode);
1219 } 1219 }
1220 1220
1221 tl_dinode = item->lri_tl_dinode; 1221 tl_dinode = item->lri_tl_dinode;
1222 if (tl_dinode) { 1222 if (tl_dinode) {
1223 mlog(0, "Clean up truncate log %llu\n", 1223 mlog(0, "Clean up truncate log %llu\n",
1224 (unsigned long long)le64_to_cpu(tl_dinode->i_blkno)); 1224 (unsigned long long)le64_to_cpu(tl_dinode->i_blkno));
1225 1225
1226 ret = ocfs2_complete_truncate_log_recovery(osb, 1226 ret = ocfs2_complete_truncate_log_recovery(osb,
1227 tl_dinode); 1227 tl_dinode);
1228 if (ret < 0) 1228 if (ret < 0)
1229 mlog_errno(ret); 1229 mlog_errno(ret);
1230 1230
1231 kfree(tl_dinode); 1231 kfree(tl_dinode);
1232 } 1232 }
1233 1233
1234 ret = ocfs2_recover_orphans(osb, item->lri_slot); 1234 ret = ocfs2_recover_orphans(osb, item->lri_slot);
1235 if (ret < 0) 1235 if (ret < 0)
1236 mlog_errno(ret); 1236 mlog_errno(ret);
1237 1237
1238 qrec = item->lri_qrec; 1238 qrec = item->lri_qrec;
1239 if (qrec) { 1239 if (qrec) {
1240 mlog(0, "Recovering quota files"); 1240 mlog(0, "Recovering quota files");
1241 ret = ocfs2_finish_quota_recovery(osb, qrec, 1241 ret = ocfs2_finish_quota_recovery(osb, qrec,
1242 item->lri_slot); 1242 item->lri_slot);
1243 if (ret < 0) 1243 if (ret < 0)
1244 mlog_errno(ret); 1244 mlog_errno(ret);
1245 /* Recovery info is already freed now */ 1245 /* Recovery info is already freed now */
1246 } 1246 }
1247 1247
1248 kfree(item); 1248 kfree(item);
1249 } 1249 }
1250 1250
1251 mlog(0, "Recovery completion\n"); 1251 mlog(0, "Recovery completion\n");
1252 mlog_exit_void(); 1252 mlog_exit_void();
1253 } 1253 }
1254 1254
1255 /* NOTE: This function always eats your references to la_dinode and 1255 /* NOTE: This function always eats your references to la_dinode and
1256 * tl_dinode, either manually on error, or by passing them to 1256 * tl_dinode, either manually on error, or by passing them to
1257 * ocfs2_complete_recovery */ 1257 * ocfs2_complete_recovery */
1258 static void ocfs2_queue_recovery_completion(struct ocfs2_journal *journal, 1258 static void ocfs2_queue_recovery_completion(struct ocfs2_journal *journal,
1259 int slot_num, 1259 int slot_num,
1260 struct ocfs2_dinode *la_dinode, 1260 struct ocfs2_dinode *la_dinode,
1261 struct ocfs2_dinode *tl_dinode, 1261 struct ocfs2_dinode *tl_dinode,
1262 struct ocfs2_quota_recovery *qrec) 1262 struct ocfs2_quota_recovery *qrec)
1263 { 1263 {
1264 struct ocfs2_la_recovery_item *item; 1264 struct ocfs2_la_recovery_item *item;
1265 1265
1266 item = kmalloc(sizeof(struct ocfs2_la_recovery_item), GFP_NOFS); 1266 item = kmalloc(sizeof(struct ocfs2_la_recovery_item), GFP_NOFS);
1267 if (!item) { 1267 if (!item) {
1268 /* Though we wish to avoid it, we are in fact safe in 1268 /* Though we wish to avoid it, we are in fact safe in
1269 * skipping local alloc cleanup as fsck.ocfs2 is more 1269 * skipping local alloc cleanup as fsck.ocfs2 is more
1270 * than capable of reclaiming unused space. */ 1270 * than capable of reclaiming unused space. */
1271 if (la_dinode) 1271 if (la_dinode)
1272 kfree(la_dinode); 1272 kfree(la_dinode);
1273 1273
1274 if (tl_dinode) 1274 if (tl_dinode)
1275 kfree(tl_dinode); 1275 kfree(tl_dinode);
1276 1276
1277 if (qrec) 1277 if (qrec)
1278 ocfs2_free_quota_recovery(qrec); 1278 ocfs2_free_quota_recovery(qrec);
1279 1279
1280 mlog_errno(-ENOMEM); 1280 mlog_errno(-ENOMEM);
1281 return; 1281 return;
1282 } 1282 }
1283 1283
1284 INIT_LIST_HEAD(&item->lri_list); 1284 INIT_LIST_HEAD(&item->lri_list);
1285 item->lri_la_dinode = la_dinode; 1285 item->lri_la_dinode = la_dinode;
1286 item->lri_slot = slot_num; 1286 item->lri_slot = slot_num;
1287 item->lri_tl_dinode = tl_dinode; 1287 item->lri_tl_dinode = tl_dinode;
1288 item->lri_qrec = qrec; 1288 item->lri_qrec = qrec;
1289 1289
1290 spin_lock(&journal->j_lock); 1290 spin_lock(&journal->j_lock);
1291 list_add_tail(&item->lri_list, &journal->j_la_cleanups); 1291 list_add_tail(&item->lri_list, &journal->j_la_cleanups);
1292 queue_work(ocfs2_wq, &journal->j_recovery_work); 1292 queue_work(ocfs2_wq, &journal->j_recovery_work);
1293 spin_unlock(&journal->j_lock); 1293 spin_unlock(&journal->j_lock);
1294 } 1294 }
1295 1295
1296 /* Called by the mount code to queue recovery the last part of 1296 /* Called by the mount code to queue recovery the last part of
1297 * recovery for it's own and offline slot(s). */ 1297 * recovery for it's own and offline slot(s). */
1298 void ocfs2_complete_mount_recovery(struct ocfs2_super *osb) 1298 void ocfs2_complete_mount_recovery(struct ocfs2_super *osb)
1299 { 1299 {
1300 struct ocfs2_journal *journal = osb->journal; 1300 struct ocfs2_journal *journal = osb->journal;
1301 1301
1302 /* No need to queue up our truncate_log as regular cleanup will catch 1302 /* No need to queue up our truncate_log as regular cleanup will catch
1303 * that */ 1303 * that */
1304 ocfs2_queue_recovery_completion(journal, osb->slot_num, 1304 ocfs2_queue_recovery_completion(journal, osb->slot_num,
1305 osb->local_alloc_copy, NULL, NULL); 1305 osb->local_alloc_copy, NULL, NULL);
1306 ocfs2_schedule_truncate_log_flush(osb, 0); 1306 ocfs2_schedule_truncate_log_flush(osb, 0);
1307 1307
1308 osb->local_alloc_copy = NULL; 1308 osb->local_alloc_copy = NULL;
1309 osb->dirty = 0; 1309 osb->dirty = 0;
1310 1310
1311 /* queue to recover orphan slots for all offline slots */ 1311 /* queue to recover orphan slots for all offline slots */
1312 ocfs2_replay_map_set_state(osb, REPLAY_NEEDED); 1312 ocfs2_replay_map_set_state(osb, REPLAY_NEEDED);
1313 ocfs2_queue_replay_slots(osb); 1313 ocfs2_queue_replay_slots(osb);
1314 ocfs2_free_replay_slots(osb); 1314 ocfs2_free_replay_slots(osb);
1315 } 1315 }
1316 1316
1317 void ocfs2_complete_quota_recovery(struct ocfs2_super *osb) 1317 void ocfs2_complete_quota_recovery(struct ocfs2_super *osb)
1318 { 1318 {
1319 if (osb->quota_rec) { 1319 if (osb->quota_rec) {
1320 ocfs2_queue_recovery_completion(osb->journal, 1320 ocfs2_queue_recovery_completion(osb->journal,
1321 osb->slot_num, 1321 osb->slot_num,
1322 NULL, 1322 NULL,
1323 NULL, 1323 NULL,
1324 osb->quota_rec); 1324 osb->quota_rec);
1325 osb->quota_rec = NULL; 1325 osb->quota_rec = NULL;
1326 } 1326 }
1327 } 1327 }
1328 1328
1329 static int __ocfs2_recovery_thread(void *arg) 1329 static int __ocfs2_recovery_thread(void *arg)
1330 { 1330 {
1331 int status, node_num, slot_num; 1331 int status, node_num, slot_num;
1332 struct ocfs2_super *osb = arg; 1332 struct ocfs2_super *osb = arg;
1333 struct ocfs2_recovery_map *rm = osb->recovery_map; 1333 struct ocfs2_recovery_map *rm = osb->recovery_map;
1334 int *rm_quota = NULL; 1334 int *rm_quota = NULL;
1335 int rm_quota_used = 0, i; 1335 int rm_quota_used = 0, i;
1336 struct ocfs2_quota_recovery *qrec; 1336 struct ocfs2_quota_recovery *qrec;
1337 1337
1338 mlog_entry_void(); 1338 mlog_entry_void();
1339 1339
1340 status = ocfs2_wait_on_mount(osb); 1340 status = ocfs2_wait_on_mount(osb);
1341 if (status < 0) { 1341 if (status < 0) {
1342 goto bail; 1342 goto bail;
1343 } 1343 }
1344 1344
1345 rm_quota = kzalloc(osb->max_slots * sizeof(int), GFP_NOFS); 1345 rm_quota = kzalloc(osb->max_slots * sizeof(int), GFP_NOFS);
1346 if (!rm_quota) { 1346 if (!rm_quota) {
1347 status = -ENOMEM; 1347 status = -ENOMEM;
1348 goto bail; 1348 goto bail;
1349 } 1349 }
1350 restart: 1350 restart:
1351 status = ocfs2_super_lock(osb, 1); 1351 status = ocfs2_super_lock(osb, 1);
1352 if (status < 0) { 1352 if (status < 0) {
1353 mlog_errno(status); 1353 mlog_errno(status);
1354 goto bail; 1354 goto bail;
1355 } 1355 }
1356 1356
1357 status = ocfs2_compute_replay_slots(osb); 1357 status = ocfs2_compute_replay_slots(osb);
1358 if (status < 0) 1358 if (status < 0)
1359 mlog_errno(status); 1359 mlog_errno(status);
1360 1360
1361 /* queue recovery for our own slot */ 1361 /* queue recovery for our own slot */
1362 ocfs2_queue_recovery_completion(osb->journal, osb->slot_num, NULL, 1362 ocfs2_queue_recovery_completion(osb->journal, osb->slot_num, NULL,
1363 NULL, NULL); 1363 NULL, NULL);
1364 1364
1365 spin_lock(&osb->osb_lock); 1365 spin_lock(&osb->osb_lock);
1366 while (rm->rm_used) { 1366 while (rm->rm_used) {
1367 /* It's always safe to remove entry zero, as we won't 1367 /* It's always safe to remove entry zero, as we won't
1368 * clear it until ocfs2_recover_node() has succeeded. */ 1368 * clear it until ocfs2_recover_node() has succeeded. */
1369 node_num = rm->rm_entries[0]; 1369 node_num = rm->rm_entries[0];
1370 spin_unlock(&osb->osb_lock); 1370 spin_unlock(&osb->osb_lock);
1371 mlog(0, "checking node %d\n", node_num); 1371 mlog(0, "checking node %d\n", node_num);
1372 slot_num = ocfs2_node_num_to_slot(osb, node_num); 1372 slot_num = ocfs2_node_num_to_slot(osb, node_num);
1373 if (slot_num == -ENOENT) { 1373 if (slot_num == -ENOENT) {
1374 status = 0; 1374 status = 0;
1375 mlog(0, "no slot for this node, so no recovery" 1375 mlog(0, "no slot for this node, so no recovery"
1376 "required.\n"); 1376 "required.\n");
1377 goto skip_recovery; 1377 goto skip_recovery;
1378 } 1378 }
1379 mlog(0, "node %d was using slot %d\n", node_num, slot_num); 1379 mlog(0, "node %d was using slot %d\n", node_num, slot_num);
1380 1380
1381 /* It is a bit subtle with quota recovery. We cannot do it 1381 /* It is a bit subtle with quota recovery. We cannot do it
1382 * immediately because we have to obtain cluster locks from 1382 * immediately because we have to obtain cluster locks from
1383 * quota files and we also don't want to just skip it because 1383 * quota files and we also don't want to just skip it because
1384 * then quota usage would be out of sync until some node takes 1384 * then quota usage would be out of sync until some node takes
1385 * the slot. So we remember which nodes need quota recovery 1385 * the slot. So we remember which nodes need quota recovery
1386 * and when everything else is done, we recover quotas. */ 1386 * and when everything else is done, we recover quotas. */
1387 for (i = 0; i < rm_quota_used && rm_quota[i] != slot_num; i++); 1387 for (i = 0; i < rm_quota_used && rm_quota[i] != slot_num; i++);
1388 if (i == rm_quota_used) 1388 if (i == rm_quota_used)
1389 rm_quota[rm_quota_used++] = slot_num; 1389 rm_quota[rm_quota_used++] = slot_num;
1390 1390
1391 status = ocfs2_recover_node(osb, node_num, slot_num); 1391 status = ocfs2_recover_node(osb, node_num, slot_num);
1392 skip_recovery: 1392 skip_recovery:
1393 if (!status) { 1393 if (!status) {
1394 ocfs2_recovery_map_clear(osb, node_num); 1394 ocfs2_recovery_map_clear(osb, node_num);
1395 } else { 1395 } else {
1396 mlog(ML_ERROR, 1396 mlog(ML_ERROR,
1397 "Error %d recovering node %d on device (%u,%u)!\n", 1397 "Error %d recovering node %d on device (%u,%u)!\n",
1398 status, node_num, 1398 status, node_num,
1399 MAJOR(osb->sb->s_dev), MINOR(osb->sb->s_dev)); 1399 MAJOR(osb->sb->s_dev), MINOR(osb->sb->s_dev));
1400 mlog(ML_ERROR, "Volume requires unmount.\n"); 1400 mlog(ML_ERROR, "Volume requires unmount.\n");
1401 } 1401 }
1402 1402
1403 spin_lock(&osb->osb_lock); 1403 spin_lock(&osb->osb_lock);
1404 } 1404 }
1405 spin_unlock(&osb->osb_lock); 1405 spin_unlock(&osb->osb_lock);
1406 mlog(0, "All nodes recovered\n"); 1406 mlog(0, "All nodes recovered\n");
1407 1407
1408 /* Refresh all journal recovery generations from disk */ 1408 /* Refresh all journal recovery generations from disk */
1409 status = ocfs2_check_journals_nolocks(osb); 1409 status = ocfs2_check_journals_nolocks(osb);
1410 status = (status == -EROFS) ? 0 : status; 1410 status = (status == -EROFS) ? 0 : status;
1411 if (status < 0) 1411 if (status < 0)
1412 mlog_errno(status); 1412 mlog_errno(status);
1413 1413
1414 /* Now it is right time to recover quotas... We have to do this under 1414 /* Now it is right time to recover quotas... We have to do this under
1415 * superblock lock so that noone can start using the slot (and crash) 1415 * superblock lock so that noone can start using the slot (and crash)
1416 * before we recover it */ 1416 * before we recover it */
1417 for (i = 0; i < rm_quota_used; i++) { 1417 for (i = 0; i < rm_quota_used; i++) {
1418 qrec = ocfs2_begin_quota_recovery(osb, rm_quota[i]); 1418 qrec = ocfs2_begin_quota_recovery(osb, rm_quota[i]);
1419 if (IS_ERR(qrec)) { 1419 if (IS_ERR(qrec)) {
1420 status = PTR_ERR(qrec); 1420 status = PTR_ERR(qrec);
1421 mlog_errno(status); 1421 mlog_errno(status);
1422 continue; 1422 continue;
1423 } 1423 }
1424 ocfs2_queue_recovery_completion(osb->journal, rm_quota[i], 1424 ocfs2_queue_recovery_completion(osb->journal, rm_quota[i],
1425 NULL, NULL, qrec); 1425 NULL, NULL, qrec);
1426 } 1426 }
1427 1427
1428 ocfs2_super_unlock(osb, 1); 1428 ocfs2_super_unlock(osb, 1);
1429 1429
1430 /* queue recovery for offline slots */ 1430 /* queue recovery for offline slots */
1431 ocfs2_queue_replay_slots(osb); 1431 ocfs2_queue_replay_slots(osb);
1432 1432
1433 bail: 1433 bail:
1434 mutex_lock(&osb->recovery_lock); 1434 mutex_lock(&osb->recovery_lock);
1435 if (!status && !ocfs2_recovery_completed(osb)) { 1435 if (!status && !ocfs2_recovery_completed(osb)) {
1436 mutex_unlock(&osb->recovery_lock); 1436 mutex_unlock(&osb->recovery_lock);
1437 goto restart; 1437 goto restart;
1438 } 1438 }
1439 1439
1440 ocfs2_free_replay_slots(osb); 1440 ocfs2_free_replay_slots(osb);
1441 osb->recovery_thread_task = NULL; 1441 osb->recovery_thread_task = NULL;
1442 mb(); /* sync with ocfs2_recovery_thread_running */ 1442 mb(); /* sync with ocfs2_recovery_thread_running */
1443 wake_up(&osb->recovery_event); 1443 wake_up(&osb->recovery_event);
1444 1444
1445 mutex_unlock(&osb->recovery_lock); 1445 mutex_unlock(&osb->recovery_lock);
1446 1446
1447 if (rm_quota) 1447 if (rm_quota)
1448 kfree(rm_quota); 1448 kfree(rm_quota);
1449 1449
1450 mlog_exit(status); 1450 mlog_exit(status);
1451 /* no one is callint kthread_stop() for us so the kthread() api 1451 /* no one is callint kthread_stop() for us so the kthread() api
1452 * requires that we call do_exit(). And it isn't exported, but 1452 * requires that we call do_exit(). And it isn't exported, but
1453 * complete_and_exit() seems to be a minimal wrapper around it. */ 1453 * complete_and_exit() seems to be a minimal wrapper around it. */
1454 complete_and_exit(NULL, status); 1454 complete_and_exit(NULL, status);
1455 return status; 1455 return status;
1456 } 1456 }
1457 1457
1458 void ocfs2_recovery_thread(struct ocfs2_super *osb, int node_num) 1458 void ocfs2_recovery_thread(struct ocfs2_super *osb, int node_num)
1459 { 1459 {
1460 mlog_entry("(node_num=%d, osb->node_num = %d)\n", 1460 mlog_entry("(node_num=%d, osb->node_num = %d)\n",
1461 node_num, osb->node_num); 1461 node_num, osb->node_num);
1462 1462
1463 mutex_lock(&osb->recovery_lock); 1463 mutex_lock(&osb->recovery_lock);
1464 if (osb->disable_recovery) 1464 if (osb->disable_recovery)
1465 goto out; 1465 goto out;
1466 1466
1467 /* People waiting on recovery will wait on 1467 /* People waiting on recovery will wait on
1468 * the recovery map to empty. */ 1468 * the recovery map to empty. */
1469 if (ocfs2_recovery_map_set(osb, node_num)) 1469 if (ocfs2_recovery_map_set(osb, node_num))
1470 mlog(0, "node %d already in recovery map.\n", node_num); 1470 mlog(0, "node %d already in recovery map.\n", node_num);
1471 1471
1472 mlog(0, "starting recovery thread...\n"); 1472 mlog(0, "starting recovery thread...\n");
1473 1473
1474 if (osb->recovery_thread_task) 1474 if (osb->recovery_thread_task)
1475 goto out; 1475 goto out;
1476 1476
1477 osb->recovery_thread_task = kthread_run(__ocfs2_recovery_thread, osb, 1477 osb->recovery_thread_task = kthread_run(__ocfs2_recovery_thread, osb,
1478 "ocfs2rec"); 1478 "ocfs2rec");
1479 if (IS_ERR(osb->recovery_thread_task)) { 1479 if (IS_ERR(osb->recovery_thread_task)) {
1480 mlog_errno((int)PTR_ERR(osb->recovery_thread_task)); 1480 mlog_errno((int)PTR_ERR(osb->recovery_thread_task));
1481 osb->recovery_thread_task = NULL; 1481 osb->recovery_thread_task = NULL;
1482 } 1482 }
1483 1483
1484 out: 1484 out:
1485 mutex_unlock(&osb->recovery_lock); 1485 mutex_unlock(&osb->recovery_lock);
1486 wake_up(&osb->recovery_event); 1486 wake_up(&osb->recovery_event);
1487 1487
1488 mlog_exit_void(); 1488 mlog_exit_void();
1489 } 1489 }
1490 1490
1491 static int ocfs2_read_journal_inode(struct ocfs2_super *osb, 1491 static int ocfs2_read_journal_inode(struct ocfs2_super *osb,
1492 int slot_num, 1492 int slot_num,
1493 struct buffer_head **bh, 1493 struct buffer_head **bh,
1494 struct inode **ret_inode) 1494 struct inode **ret_inode)
1495 { 1495 {
1496 int status = -EACCES; 1496 int status = -EACCES;
1497 struct inode *inode = NULL; 1497 struct inode *inode = NULL;
1498 1498
1499 BUG_ON(slot_num >= osb->max_slots); 1499 BUG_ON(slot_num >= osb->max_slots);
1500 1500
1501 inode = ocfs2_get_system_file_inode(osb, JOURNAL_SYSTEM_INODE, 1501 inode = ocfs2_get_system_file_inode(osb, JOURNAL_SYSTEM_INODE,
1502 slot_num); 1502 slot_num);
1503 if (!inode || is_bad_inode(inode)) { 1503 if (!inode || is_bad_inode(inode)) {
1504 mlog_errno(status); 1504 mlog_errno(status);
1505 goto bail; 1505 goto bail;
1506 } 1506 }
1507 SET_INODE_JOURNAL(inode); 1507 SET_INODE_JOURNAL(inode);
1508 1508
1509 status = ocfs2_read_inode_block_full(inode, bh, OCFS2_BH_IGNORE_CACHE); 1509 status = ocfs2_read_inode_block_full(inode, bh, OCFS2_BH_IGNORE_CACHE);
1510 if (status < 0) { 1510 if (status < 0) {
1511 mlog_errno(status); 1511 mlog_errno(status);
1512 goto bail; 1512 goto bail;
1513 } 1513 }
1514 1514
1515 status = 0; 1515 status = 0;
1516 1516
1517 bail: 1517 bail:
1518 if (inode) { 1518 if (inode) {
1519 if (status || !ret_inode) 1519 if (status || !ret_inode)
1520 iput(inode); 1520 iput(inode);
1521 else 1521 else
1522 *ret_inode = inode; 1522 *ret_inode = inode;
1523 } 1523 }
1524 return status; 1524 return status;
1525 } 1525 }
1526 1526
1527 /* Does the actual journal replay and marks the journal inode as 1527 /* Does the actual journal replay and marks the journal inode as
1528 * clean. Will only replay if the journal inode is marked dirty. */ 1528 * clean. Will only replay if the journal inode is marked dirty. */
1529 static int ocfs2_replay_journal(struct ocfs2_super *osb, 1529 static int ocfs2_replay_journal(struct ocfs2_super *osb,
1530 int node_num, 1530 int node_num,
1531 int slot_num) 1531 int slot_num)
1532 { 1532 {
1533 int status; 1533 int status;
1534 int got_lock = 0; 1534 int got_lock = 0;
1535 unsigned int flags; 1535 unsigned int flags;
1536 struct inode *inode = NULL; 1536 struct inode *inode = NULL;
1537 struct ocfs2_dinode *fe; 1537 struct ocfs2_dinode *fe;
1538 journal_t *journal = NULL; 1538 journal_t *journal = NULL;
1539 struct buffer_head *bh = NULL; 1539 struct buffer_head *bh = NULL;
1540 u32 slot_reco_gen; 1540 u32 slot_reco_gen;
1541 1541
1542 status = ocfs2_read_journal_inode(osb, slot_num, &bh, &inode); 1542 status = ocfs2_read_journal_inode(osb, slot_num, &bh, &inode);
1543 if (status) { 1543 if (status) {
1544 mlog_errno(status); 1544 mlog_errno(status);
1545 goto done; 1545 goto done;
1546 } 1546 }
1547 1547
1548 fe = (struct ocfs2_dinode *)bh->b_data; 1548 fe = (struct ocfs2_dinode *)bh->b_data;
1549 slot_reco_gen = ocfs2_get_recovery_generation(fe); 1549 slot_reco_gen = ocfs2_get_recovery_generation(fe);
1550 brelse(bh); 1550 brelse(bh);
1551 bh = NULL; 1551 bh = NULL;
1552 1552
1553 /* 1553 /*
1554 * As the fs recovery is asynchronous, there is a small chance that 1554 * As the fs recovery is asynchronous, there is a small chance that
1555 * another node mounted (and recovered) the slot before the recovery 1555 * another node mounted (and recovered) the slot before the recovery
1556 * thread could get the lock. To handle that, we dirty read the journal 1556 * thread could get the lock. To handle that, we dirty read the journal
1557 * inode for that slot to get the recovery generation. If it is 1557 * inode for that slot to get the recovery generation. If it is
1558 * different than what we expected, the slot has been recovered. 1558 * different than what we expected, the slot has been recovered.
1559 * If not, it needs recovery. 1559 * If not, it needs recovery.
1560 */ 1560 */
1561 if (osb->slot_recovery_generations[slot_num] != slot_reco_gen) { 1561 if (osb->slot_recovery_generations[slot_num] != slot_reco_gen) {
1562 mlog(0, "Slot %u already recovered (old/new=%u/%u)\n", slot_num, 1562 mlog(0, "Slot %u already recovered (old/new=%u/%u)\n", slot_num,
1563 osb->slot_recovery_generations[slot_num], slot_reco_gen); 1563 osb->slot_recovery_generations[slot_num], slot_reco_gen);
1564 osb->slot_recovery_generations[slot_num] = slot_reco_gen; 1564 osb->slot_recovery_generations[slot_num] = slot_reco_gen;
1565 status = -EBUSY; 1565 status = -EBUSY;
1566 goto done; 1566 goto done;
1567 } 1567 }
1568 1568
1569 /* Continue with recovery as the journal has not yet been recovered */ 1569 /* Continue with recovery as the journal has not yet been recovered */
1570 1570
1571 status = ocfs2_inode_lock_full(inode, &bh, 1, OCFS2_META_LOCK_RECOVERY); 1571 status = ocfs2_inode_lock_full(inode, &bh, 1, OCFS2_META_LOCK_RECOVERY);
1572 if (status < 0) { 1572 if (status < 0) {
1573 mlog(0, "status returned from ocfs2_inode_lock=%d\n", status); 1573 mlog(0, "status returned from ocfs2_inode_lock=%d\n", status);
1574 if (status != -ERESTARTSYS) 1574 if (status != -ERESTARTSYS)
1575 mlog(ML_ERROR, "Could not lock journal!\n"); 1575 mlog(ML_ERROR, "Could not lock journal!\n");
1576 goto done; 1576 goto done;
1577 } 1577 }
1578 got_lock = 1; 1578 got_lock = 1;
1579 1579
1580 fe = (struct ocfs2_dinode *) bh->b_data; 1580 fe = (struct ocfs2_dinode *) bh->b_data;
1581 1581
1582 flags = le32_to_cpu(fe->id1.journal1.ij_flags); 1582 flags = le32_to_cpu(fe->id1.journal1.ij_flags);
1583 slot_reco_gen = ocfs2_get_recovery_generation(fe); 1583 slot_reco_gen = ocfs2_get_recovery_generation(fe);
1584 1584
1585 if (!(flags & OCFS2_JOURNAL_DIRTY_FL)) { 1585 if (!(flags & OCFS2_JOURNAL_DIRTY_FL)) {
1586 mlog(0, "No recovery required for node %d\n", node_num); 1586 mlog(0, "No recovery required for node %d\n", node_num);
1587 /* Refresh recovery generation for the slot */ 1587 /* Refresh recovery generation for the slot */
1588 osb->slot_recovery_generations[slot_num] = slot_reco_gen; 1588 osb->slot_recovery_generations[slot_num] = slot_reco_gen;
1589 goto done; 1589 goto done;
1590 } 1590 }
1591 1591
1592 /* we need to run complete recovery for offline orphan slots */ 1592 /* we need to run complete recovery for offline orphan slots */
1593 ocfs2_replay_map_set_state(osb, REPLAY_NEEDED); 1593 ocfs2_replay_map_set_state(osb, REPLAY_NEEDED);
1594 1594
1595 mlog(ML_NOTICE, "Recovering node %d from slot %d on device (%u,%u)\n", 1595 mlog(ML_NOTICE, "Recovering node %d from slot %d on device (%u,%u)\n",
1596 node_num, slot_num, 1596 node_num, slot_num,
1597 MAJOR(osb->sb->s_dev), MINOR(osb->sb->s_dev)); 1597 MAJOR(osb->sb->s_dev), MINOR(osb->sb->s_dev));
1598 1598
1599 OCFS2_I(inode)->ip_clusters = le32_to_cpu(fe->i_clusters); 1599 OCFS2_I(inode)->ip_clusters = le32_to_cpu(fe->i_clusters);
1600 1600
1601 status = ocfs2_force_read_journal(inode); 1601 status = ocfs2_force_read_journal(inode);
1602 if (status < 0) { 1602 if (status < 0) {
1603 mlog_errno(status); 1603 mlog_errno(status);
1604 goto done; 1604 goto done;
1605 } 1605 }
1606 1606
1607 mlog(0, "calling journal_init_inode\n"); 1607 mlog(0, "calling journal_init_inode\n");
1608 journal = jbd2_journal_init_inode(inode); 1608 journal = jbd2_journal_init_inode(inode);
1609 if (journal == NULL) { 1609 if (journal == NULL) {
1610 mlog(ML_ERROR, "Linux journal layer error\n"); 1610 mlog(ML_ERROR, "Linux journal layer error\n");
1611 status = -EIO; 1611 status = -EIO;
1612 goto done; 1612 goto done;
1613 } 1613 }
1614 1614
1615 status = jbd2_journal_load(journal); 1615 status = jbd2_journal_load(journal);
1616 if (status < 0) { 1616 if (status < 0) {
1617 mlog_errno(status); 1617 mlog_errno(status);
1618 if (!igrab(inode)) 1618 if (!igrab(inode))
1619 BUG(); 1619 BUG();
1620 jbd2_journal_destroy(journal); 1620 jbd2_journal_destroy(journal);
1621 goto done; 1621 goto done;
1622 } 1622 }
1623 1623
1624 ocfs2_clear_journal_error(osb->sb, journal, slot_num); 1624 ocfs2_clear_journal_error(osb->sb, journal, slot_num);
1625 1625
1626 /* wipe the journal */ 1626 /* wipe the journal */
1627 mlog(0, "flushing the journal.\n"); 1627 mlog(0, "flushing the journal.\n");
1628 jbd2_journal_lock_updates(journal); 1628 jbd2_journal_lock_updates(journal);
1629 status = jbd2_journal_flush(journal); 1629 status = jbd2_journal_flush(journal);
1630 jbd2_journal_unlock_updates(journal); 1630 jbd2_journal_unlock_updates(journal);
1631 if (status < 0) 1631 if (status < 0)
1632 mlog_errno(status); 1632 mlog_errno(status);
1633 1633
1634 /* This will mark the node clean */ 1634 /* This will mark the node clean */
1635 flags = le32_to_cpu(fe->id1.journal1.ij_flags); 1635 flags = le32_to_cpu(fe->id1.journal1.ij_flags);
1636 flags &= ~OCFS2_JOURNAL_DIRTY_FL; 1636 flags &= ~OCFS2_JOURNAL_DIRTY_FL;
1637 fe->id1.journal1.ij_flags = cpu_to_le32(flags); 1637 fe->id1.journal1.ij_flags = cpu_to_le32(flags);
1638 1638
1639 /* Increment recovery generation to indicate successful recovery */ 1639 /* Increment recovery generation to indicate successful recovery */
1640 ocfs2_bump_recovery_generation(fe); 1640 ocfs2_bump_recovery_generation(fe);
1641 osb->slot_recovery_generations[slot_num] = 1641 osb->slot_recovery_generations[slot_num] =
1642 ocfs2_get_recovery_generation(fe); 1642 ocfs2_get_recovery_generation(fe);
1643 1643
1644 ocfs2_compute_meta_ecc(osb->sb, bh->b_data, &fe->i_check); 1644 ocfs2_compute_meta_ecc(osb->sb, bh->b_data, &fe->i_check);
1645 status = ocfs2_write_block(osb, bh, inode); 1645 status = ocfs2_write_block(osb, bh, inode);
1646 if (status < 0) 1646 if (status < 0)
1647 mlog_errno(status); 1647 mlog_errno(status);
1648 1648
1649 if (!igrab(inode)) 1649 if (!igrab(inode))
1650 BUG(); 1650 BUG();
1651 1651
1652 jbd2_journal_destroy(journal); 1652 jbd2_journal_destroy(journal);
1653 1653
1654 done: 1654 done:
1655 /* drop the lock on this nodes journal */ 1655 /* drop the lock on this nodes journal */
1656 if (got_lock) 1656 if (got_lock)
1657 ocfs2_inode_unlock(inode, 1); 1657 ocfs2_inode_unlock(inode, 1);
1658 1658
1659 if (inode) 1659 if (inode)
1660 iput(inode); 1660 iput(inode);
1661 1661
1662 brelse(bh); 1662 brelse(bh);
1663 1663
1664 mlog_exit(status); 1664 mlog_exit(status);
1665 return status; 1665 return status;
1666 } 1666 }
1667 1667
1668 /* 1668 /*
1669 * Do the most important parts of node recovery: 1669 * Do the most important parts of node recovery:
1670 * - Replay it's journal 1670 * - Replay it's journal
1671 * - Stamp a clean local allocator file 1671 * - Stamp a clean local allocator file
1672 * - Stamp a clean truncate log 1672 * - Stamp a clean truncate log
1673 * - Mark the node clean 1673 * - Mark the node clean
1674 * 1674 *
1675 * If this function completes without error, a node in OCFS2 can be 1675 * If this function completes without error, a node in OCFS2 can be
1676 * said to have been safely recovered. As a result, failure during the 1676 * said to have been safely recovered. As a result, failure during the
1677 * second part of a nodes recovery process (local alloc recovery) is 1677 * second part of a nodes recovery process (local alloc recovery) is
1678 * far less concerning. 1678 * far less concerning.
1679 */ 1679 */
1680 static int ocfs2_recover_node(struct ocfs2_super *osb, 1680 static int ocfs2_recover_node(struct ocfs2_super *osb,
1681 int node_num, int slot_num) 1681 int node_num, int slot_num)
1682 { 1682 {
1683 int status = 0; 1683 int status = 0;
1684 struct ocfs2_dinode *la_copy = NULL; 1684 struct ocfs2_dinode *la_copy = NULL;
1685 struct ocfs2_dinode *tl_copy = NULL; 1685 struct ocfs2_dinode *tl_copy = NULL;
1686 1686
1687 mlog_entry("(node_num=%d, slot_num=%d, osb->node_num = %d)\n", 1687 mlog_entry("(node_num=%d, slot_num=%d, osb->node_num = %d)\n",
1688 node_num, slot_num, osb->node_num); 1688 node_num, slot_num, osb->node_num);
1689 1689
1690 /* Should not ever be called to recover ourselves -- in that 1690 /* Should not ever be called to recover ourselves -- in that
1691 * case we should've called ocfs2_journal_load instead. */ 1691 * case we should've called ocfs2_journal_load instead. */
1692 BUG_ON(osb->node_num == node_num); 1692 BUG_ON(osb->node_num == node_num);
1693 1693
1694 status = ocfs2_replay_journal(osb, node_num, slot_num); 1694 status = ocfs2_replay_journal(osb, node_num, slot_num);
1695 if (status < 0) { 1695 if (status < 0) {
1696 if (status == -EBUSY) { 1696 if (status == -EBUSY) {
1697 mlog(0, "Skipping recovery for slot %u (node %u) " 1697 mlog(0, "Skipping recovery for slot %u (node %u) "
1698 "as another node has recovered it\n", slot_num, 1698 "as another node has recovered it\n", slot_num,
1699 node_num); 1699 node_num);
1700 status = 0; 1700 status = 0;
1701 goto done; 1701 goto done;
1702 } 1702 }
1703 mlog_errno(status); 1703 mlog_errno(status);
1704 goto done; 1704 goto done;
1705 } 1705 }
1706 1706
1707 /* Stamp a clean local alloc file AFTER recovering the journal... */ 1707 /* Stamp a clean local alloc file AFTER recovering the journal... */
1708 status = ocfs2_begin_local_alloc_recovery(osb, slot_num, &la_copy); 1708 status = ocfs2_begin_local_alloc_recovery(osb, slot_num, &la_copy);
1709 if (status < 0) { 1709 if (status < 0) {
1710 mlog_errno(status); 1710 mlog_errno(status);
1711 goto done; 1711 goto done;
1712 } 1712 }
1713 1713
1714 /* An error from begin_truncate_log_recovery is not 1714 /* An error from begin_truncate_log_recovery is not
1715 * serious enough to warrant halting the rest of 1715 * serious enough to warrant halting the rest of
1716 * recovery. */ 1716 * recovery. */
1717 status = ocfs2_begin_truncate_log_recovery(osb, slot_num, &tl_copy); 1717 status = ocfs2_begin_truncate_log_recovery(osb, slot_num, &tl_copy);
1718 if (status < 0) 1718 if (status < 0)
1719 mlog_errno(status); 1719 mlog_errno(status);
1720 1720
1721 /* Likewise, this would be a strange but ultimately not so 1721 /* Likewise, this would be a strange but ultimately not so
1722 * harmful place to get an error... */ 1722 * harmful place to get an error... */
1723 status = ocfs2_clear_slot(osb, slot_num); 1723 status = ocfs2_clear_slot(osb, slot_num);
1724 if (status < 0) 1724 if (status < 0)
1725 mlog_errno(status); 1725 mlog_errno(status);
1726 1726
1727 /* This will kfree the memory pointed to by la_copy and tl_copy */ 1727 /* This will kfree the memory pointed to by la_copy and tl_copy */
1728 ocfs2_queue_recovery_completion(osb->journal, slot_num, la_copy, 1728 ocfs2_queue_recovery_completion(osb->journal, slot_num, la_copy,
1729 tl_copy, NULL); 1729 tl_copy, NULL);
1730 1730
1731 status = 0; 1731 status = 0;
1732 done: 1732 done:
1733 1733
1734 mlog_exit(status); 1734 mlog_exit(status);
1735 return status; 1735 return status;
1736 } 1736 }
1737 1737
1738 /* Test node liveness by trylocking his journal. If we get the lock, 1738 /* Test node liveness by trylocking his journal. If we get the lock,
1739 * we drop it here. Return 0 if we got the lock, -EAGAIN if node is 1739 * we drop it here. Return 0 if we got the lock, -EAGAIN if node is
1740 * still alive (we couldn't get the lock) and < 0 on error. */ 1740 * still alive (we couldn't get the lock) and < 0 on error. */
1741 static int ocfs2_trylock_journal(struct ocfs2_super *osb, 1741 static int ocfs2_trylock_journal(struct ocfs2_super *osb,
1742 int slot_num) 1742 int slot_num)
1743 { 1743 {
1744 int status, flags; 1744 int status, flags;
1745 struct inode *inode = NULL; 1745 struct inode *inode = NULL;
1746 1746
1747 inode = ocfs2_get_system_file_inode(osb, JOURNAL_SYSTEM_INODE, 1747 inode = ocfs2_get_system_file_inode(osb, JOURNAL_SYSTEM_INODE,
1748 slot_num); 1748 slot_num);
1749 if (inode == NULL) { 1749 if (inode == NULL) {
1750 mlog(ML_ERROR, "access error\n"); 1750 mlog(ML_ERROR, "access error\n");
1751 status = -EACCES; 1751 status = -EACCES;
1752 goto bail; 1752 goto bail;
1753 } 1753 }
1754 if (is_bad_inode(inode)) { 1754 if (is_bad_inode(inode)) {
1755 mlog(ML_ERROR, "access error (bad inode)\n"); 1755 mlog(ML_ERROR, "access error (bad inode)\n");
1756 iput(inode); 1756 iput(inode);
1757 inode = NULL; 1757 inode = NULL;
1758 status = -EACCES; 1758 status = -EACCES;
1759 goto bail; 1759 goto bail;
1760 } 1760 }
1761 SET_INODE_JOURNAL(inode); 1761 SET_INODE_JOURNAL(inode);
1762 1762
1763 flags = OCFS2_META_LOCK_RECOVERY | OCFS2_META_LOCK_NOQUEUE; 1763 flags = OCFS2_META_LOCK_RECOVERY | OCFS2_META_LOCK_NOQUEUE;
1764 status = ocfs2_inode_lock_full(inode, NULL, 1, flags); 1764 status = ocfs2_inode_lock_full(inode, NULL, 1, flags);
1765 if (status < 0) { 1765 if (status < 0) {
1766 if (status != -EAGAIN) 1766 if (status != -EAGAIN)
1767 mlog_errno(status); 1767 mlog_errno(status);
1768 goto bail; 1768 goto bail;
1769 } 1769 }
1770 1770
1771 ocfs2_inode_unlock(inode, 1); 1771 ocfs2_inode_unlock(inode, 1);
1772 bail: 1772 bail:
1773 if (inode) 1773 if (inode)
1774 iput(inode); 1774 iput(inode);
1775 1775
1776 return status; 1776 return status;
1777 } 1777 }
1778 1778
1779 /* Call this underneath ocfs2_super_lock. It also assumes that the 1779 /* Call this underneath ocfs2_super_lock. It also assumes that the
1780 * slot info struct has been updated from disk. */ 1780 * slot info struct has been updated from disk. */
1781 int ocfs2_mark_dead_nodes(struct ocfs2_super *osb) 1781 int ocfs2_mark_dead_nodes(struct ocfs2_super *osb)
1782 { 1782 {
1783 unsigned int node_num; 1783 unsigned int node_num;
1784 int status, i; 1784 int status, i;
1785 u32 gen; 1785 u32 gen;
1786 struct buffer_head *bh = NULL; 1786 struct buffer_head *bh = NULL;
1787 struct ocfs2_dinode *di; 1787 struct ocfs2_dinode *di;
1788 1788
1789 /* This is called with the super block cluster lock, so we 1789 /* This is called with the super block cluster lock, so we
1790 * know that the slot map can't change underneath us. */ 1790 * know that the slot map can't change underneath us. */
1791 1791
1792 for (i = 0; i < osb->max_slots; i++) { 1792 for (i = 0; i < osb->max_slots; i++) {
1793 /* Read journal inode to get the recovery generation */ 1793 /* Read journal inode to get the recovery generation */
1794 status = ocfs2_read_journal_inode(osb, i, &bh, NULL); 1794 status = ocfs2_read_journal_inode(osb, i, &bh, NULL);
1795 if (status) { 1795 if (status) {
1796 mlog_errno(status); 1796 mlog_errno(status);
1797 goto bail; 1797 goto bail;
1798 } 1798 }
1799 di = (struct ocfs2_dinode *)bh->b_data; 1799 di = (struct ocfs2_dinode *)bh->b_data;
1800 gen = ocfs2_get_recovery_generation(di); 1800 gen = ocfs2_get_recovery_generation(di);
1801 brelse(bh); 1801 brelse(bh);
1802 bh = NULL; 1802 bh = NULL;
1803 1803
1804 spin_lock(&osb->osb_lock); 1804 spin_lock(&osb->osb_lock);
1805 osb->slot_recovery_generations[i] = gen; 1805 osb->slot_recovery_generations[i] = gen;
1806 1806
1807 mlog(0, "Slot %u recovery generation is %u\n", i, 1807 mlog(0, "Slot %u recovery generation is %u\n", i,
1808 osb->slot_recovery_generations[i]); 1808 osb->slot_recovery_generations[i]);
1809 1809
1810 if (i == osb->slot_num) { 1810 if (i == osb->slot_num) {
1811 spin_unlock(&osb->osb_lock); 1811 spin_unlock(&osb->osb_lock);
1812 continue; 1812 continue;
1813 } 1813 }
1814 1814
1815 status = ocfs2_slot_to_node_num_locked(osb, i, &node_num); 1815 status = ocfs2_slot_to_node_num_locked(osb, i, &node_num);
1816 if (status == -ENOENT) { 1816 if (status == -ENOENT) {
1817 spin_unlock(&osb->osb_lock); 1817 spin_unlock(&osb->osb_lock);
1818 continue; 1818 continue;
1819 } 1819 }
1820 1820
1821 if (__ocfs2_recovery_map_test(osb, node_num)) { 1821 if (__ocfs2_recovery_map_test(osb, node_num)) {
1822 spin_unlock(&osb->osb_lock); 1822 spin_unlock(&osb->osb_lock);
1823 continue; 1823 continue;
1824 } 1824 }
1825 spin_unlock(&osb->osb_lock); 1825 spin_unlock(&osb->osb_lock);
1826 1826
1827 /* Ok, we have a slot occupied by another node which 1827 /* Ok, we have a slot occupied by another node which
1828 * is not in the recovery map. We trylock his journal 1828 * is not in the recovery map. We trylock his journal
1829 * file here to test if he's alive. */ 1829 * file here to test if he's alive. */
1830 status = ocfs2_trylock_journal(osb, i); 1830 status = ocfs2_trylock_journal(osb, i);
1831 if (!status) { 1831 if (!status) {
1832 /* Since we're called from mount, we know that 1832 /* Since we're called from mount, we know that
1833 * the recovery thread can't race us on 1833 * the recovery thread can't race us on
1834 * setting / checking the recovery bits. */ 1834 * setting / checking the recovery bits. */
1835 ocfs2_recovery_thread(osb, node_num); 1835 ocfs2_recovery_thread(osb, node_num);
1836 } else if ((status < 0) && (status != -EAGAIN)) { 1836 } else if ((status < 0) && (status != -EAGAIN)) {
1837 mlog_errno(status); 1837 mlog_errno(status);
1838 goto bail; 1838 goto bail;
1839 } 1839 }
1840 } 1840 }
1841 1841
1842 status = 0; 1842 status = 0;
1843 bail: 1843 bail:
1844 mlog_exit(status); 1844 mlog_exit(status);
1845 return status; 1845 return status;
1846 } 1846 }
1847 1847
1848 /* 1848 /*
1849 * Scan timer should get fired every ORPHAN_SCAN_SCHEDULE_TIMEOUT. Add some 1849 * Scan timer should get fired every ORPHAN_SCAN_SCHEDULE_TIMEOUT. Add some
1850 * randomness to the timeout to minimize multple nodes firing the timer at the 1850 * randomness to the timeout to minimize multple nodes firing the timer at the
1851 * same time. 1851 * same time.
1852 */ 1852 */
1853 static inline unsigned long ocfs2_orphan_scan_timeout(void) 1853 static inline unsigned long ocfs2_orphan_scan_timeout(void)
1854 { 1854 {
1855 unsigned long time; 1855 unsigned long time;
1856 1856
1857 get_random_bytes(&time, sizeof(time)); 1857 get_random_bytes(&time, sizeof(time));
1858 time = ORPHAN_SCAN_SCHEDULE_TIMEOUT + (time % 5000); 1858 time = ORPHAN_SCAN_SCHEDULE_TIMEOUT + (time % 5000);
1859 return msecs_to_jiffies(time); 1859 return msecs_to_jiffies(time);
1860 } 1860 }
1861 1861
1862 /* 1862 /*
1863 * ocfs2_queue_orphan_scan calls ocfs2_queue_recovery_completion for 1863 * ocfs2_queue_orphan_scan calls ocfs2_queue_recovery_completion for
1864 * every slot, queuing a recovery of the slot on the ocfs2_wq thread. This 1864 * every slot, queuing a recovery of the slot on the ocfs2_wq thread. This
1865 * is done to catch any orphans that are left over in orphan directories. 1865 * is done to catch any orphans that are left over in orphan directories.
1866 * 1866 *
1867 * ocfs2_queue_orphan_scan gets called every ORPHAN_SCAN_SCHEDULE_TIMEOUT 1867 * ocfs2_queue_orphan_scan gets called every ORPHAN_SCAN_SCHEDULE_TIMEOUT
1868 * seconds. It gets an EX lock on os_lockres and checks sequence number 1868 * seconds. It gets an EX lock on os_lockres and checks sequence number
1869 * stored in LVB. If the sequence number has changed, it means some other 1869 * stored in LVB. If the sequence number has changed, it means some other
1870 * node has done the scan. This node skips the scan and tracks the 1870 * node has done the scan. This node skips the scan and tracks the
1871 * sequence number. If the sequence number didn't change, it means a scan 1871 * sequence number. If the sequence number didn't change, it means a scan
1872 * hasn't happened. The node queues a scan and increments the 1872 * hasn't happened. The node queues a scan and increments the
1873 * sequence number in the LVB. 1873 * sequence number in the LVB.
1874 */ 1874 */
1875 void ocfs2_queue_orphan_scan(struct ocfs2_super *osb) 1875 void ocfs2_queue_orphan_scan(struct ocfs2_super *osb)
1876 { 1876 {
1877 struct ocfs2_orphan_scan *os; 1877 struct ocfs2_orphan_scan *os;
1878 int status, i; 1878 int status, i;
1879 u32 seqno = 0; 1879 u32 seqno = 0;
1880 1880
1881 os = &osb->osb_orphan_scan; 1881 os = &osb->osb_orphan_scan;
1882 1882
1883 if (atomic_read(&os->os_state) == ORPHAN_SCAN_INACTIVE) 1883 if (atomic_read(&os->os_state) == ORPHAN_SCAN_INACTIVE)
1884 goto out; 1884 goto out;
1885 1885
1886 status = ocfs2_orphan_scan_lock(osb, &seqno, DLM_LOCK_EX); 1886 status = ocfs2_orphan_scan_lock(osb, &seqno, DLM_LOCK_EX);
1887 if (status < 0) { 1887 if (status < 0) {
1888 if (status != -EAGAIN) 1888 if (status != -EAGAIN)
1889 mlog_errno(status); 1889 mlog_errno(status);
1890 goto out; 1890 goto out;
1891 } 1891 }
1892 1892
1893 /* Do no queue the tasks if the volume is being umounted */ 1893 /* Do no queue the tasks if the volume is being umounted */
1894 if (atomic_read(&os->os_state) == ORPHAN_SCAN_INACTIVE) 1894 if (atomic_read(&os->os_state) == ORPHAN_SCAN_INACTIVE)
1895 goto unlock; 1895 goto unlock;
1896 1896
1897 if (os->os_seqno != seqno) { 1897 if (os->os_seqno != seqno) {
1898 os->os_seqno = seqno; 1898 os->os_seqno = seqno;
1899 goto unlock; 1899 goto unlock;
1900 } 1900 }
1901 1901
1902 for (i = 0; i < osb->max_slots; i++) 1902 for (i = 0; i < osb->max_slots; i++)
1903 ocfs2_queue_recovery_completion(osb->journal, i, NULL, NULL, 1903 ocfs2_queue_recovery_completion(osb->journal, i, NULL, NULL,
1904 NULL); 1904 NULL);
1905 /* 1905 /*
1906 * We queued a recovery on orphan slots, increment the sequence 1906 * We queued a recovery on orphan slots, increment the sequence
1907 * number and update LVB so other node will skip the scan for a while 1907 * number and update LVB so other node will skip the scan for a while
1908 */ 1908 */
1909 seqno++; 1909 seqno++;
1910 os->os_count++; 1910 os->os_count++;
1911 os->os_scantime = CURRENT_TIME; 1911 os->os_scantime = CURRENT_TIME;
1912 unlock: 1912 unlock:
1913 ocfs2_orphan_scan_unlock(osb, seqno, DLM_LOCK_EX); 1913 ocfs2_orphan_scan_unlock(osb, seqno, DLM_LOCK_EX);
1914 out: 1914 out:
1915 return; 1915 return;
1916 } 1916 }
1917 1917
1918 /* Worker task that gets fired every ORPHAN_SCAN_SCHEDULE_TIMEOUT millsec */ 1918 /* Worker task that gets fired every ORPHAN_SCAN_SCHEDULE_TIMEOUT millsec */
1919 void ocfs2_orphan_scan_work(struct work_struct *work) 1919 void ocfs2_orphan_scan_work(struct work_struct *work)
1920 { 1920 {
1921 struct ocfs2_orphan_scan *os; 1921 struct ocfs2_orphan_scan *os;
1922 struct ocfs2_super *osb; 1922 struct ocfs2_super *osb;
1923 1923
1924 os = container_of(work, struct ocfs2_orphan_scan, 1924 os = container_of(work, struct ocfs2_orphan_scan,
1925 os_orphan_scan_work.work); 1925 os_orphan_scan_work.work);
1926 osb = os->os_osb; 1926 osb = os->os_osb;
1927 1927
1928 mutex_lock(&os->os_lock); 1928 mutex_lock(&os->os_lock);
1929 ocfs2_queue_orphan_scan(osb); 1929 ocfs2_queue_orphan_scan(osb);
1930 if (atomic_read(&os->os_state) == ORPHAN_SCAN_ACTIVE) 1930 if (atomic_read(&os->os_state) == ORPHAN_SCAN_ACTIVE)
1931 schedule_delayed_work(&os->os_orphan_scan_work, 1931 schedule_delayed_work(&os->os_orphan_scan_work,
1932 ocfs2_orphan_scan_timeout()); 1932 ocfs2_orphan_scan_timeout());
1933 mutex_unlock(&os->os_lock); 1933 mutex_unlock(&os->os_lock);
1934 } 1934 }
1935 1935
1936 void ocfs2_orphan_scan_stop(struct ocfs2_super *osb) 1936 void ocfs2_orphan_scan_stop(struct ocfs2_super *osb)
1937 { 1937 {
1938 struct ocfs2_orphan_scan *os; 1938 struct ocfs2_orphan_scan *os;
1939 1939
1940 os = &osb->osb_orphan_scan; 1940 os = &osb->osb_orphan_scan;
1941 atomic_set(&os->os_state, ORPHAN_SCAN_INACTIVE); 1941 atomic_set(&os->os_state, ORPHAN_SCAN_INACTIVE);
1942 mutex_lock(&os->os_lock); 1942 mutex_lock(&os->os_lock);
1943 cancel_delayed_work(&os->os_orphan_scan_work); 1943 cancel_delayed_work(&os->os_orphan_scan_work);
1944 mutex_unlock(&os->os_lock); 1944 mutex_unlock(&os->os_lock);
1945 } 1945 }
1946 1946
1947 int ocfs2_orphan_scan_init(struct ocfs2_super *osb) 1947 int ocfs2_orphan_scan_init(struct ocfs2_super *osb)
1948 { 1948 {
1949 struct ocfs2_orphan_scan *os; 1949 struct ocfs2_orphan_scan *os;
1950 1950
1951 os = &osb->osb_orphan_scan; 1951 os = &osb->osb_orphan_scan;
1952 atomic_set(&os->os_state, ORPHAN_SCAN_ACTIVE); 1952 atomic_set(&os->os_state, ORPHAN_SCAN_ACTIVE);
1953 os->os_osb = osb; 1953 os->os_osb = osb;
1954 os->os_count = 0; 1954 os->os_count = 0;
1955 os->os_seqno = 0;
1955 os->os_scantime = CURRENT_TIME; 1956 os->os_scantime = CURRENT_TIME;
1956 mutex_init(&os->os_lock); 1957 mutex_init(&os->os_lock);
1957 1958
1958 INIT_DELAYED_WORK(&os->os_orphan_scan_work, 1959 INIT_DELAYED_WORK(&os->os_orphan_scan_work,
1959 ocfs2_orphan_scan_work); 1960 ocfs2_orphan_scan_work);
1960 schedule_delayed_work(&os->os_orphan_scan_work, 1961 schedule_delayed_work(&os->os_orphan_scan_work,
1961 ocfs2_orphan_scan_timeout()); 1962 ocfs2_orphan_scan_timeout());
1962 return 0; 1963 return 0;
1963 } 1964 }
1964 1965
1965 struct ocfs2_orphan_filldir_priv { 1966 struct ocfs2_orphan_filldir_priv {
1966 struct inode *head; 1967 struct inode *head;
1967 struct ocfs2_super *osb; 1968 struct ocfs2_super *osb;
1968 }; 1969 };
1969 1970
1970 static int ocfs2_orphan_filldir(void *priv, const char *name, int name_len, 1971 static int ocfs2_orphan_filldir(void *priv, const char *name, int name_len,
1971 loff_t pos, u64 ino, unsigned type) 1972 loff_t pos, u64 ino, unsigned type)
1972 { 1973 {
1973 struct ocfs2_orphan_filldir_priv *p = priv; 1974 struct ocfs2_orphan_filldir_priv *p = priv;
1974 struct inode *iter; 1975 struct inode *iter;
1975 1976
1976 if (name_len == 1 && !strncmp(".", name, 1)) 1977 if (name_len == 1 && !strncmp(".", name, 1))
1977 return 0; 1978 return 0;
1978 if (name_len == 2 && !strncmp("..", name, 2)) 1979 if (name_len == 2 && !strncmp("..", name, 2))
1979 return 0; 1980 return 0;
1980 1981
1981 /* Skip bad inodes so that recovery can continue */ 1982 /* Skip bad inodes so that recovery can continue */
1982 iter = ocfs2_iget(p->osb, ino, 1983 iter = ocfs2_iget(p->osb, ino,
1983 OCFS2_FI_FLAG_ORPHAN_RECOVERY, 0); 1984 OCFS2_FI_FLAG_ORPHAN_RECOVERY, 0);
1984 if (IS_ERR(iter)) 1985 if (IS_ERR(iter))
1985 return 0; 1986 return 0;
1986 1987
1987 mlog(0, "queue orphan %llu\n", 1988 mlog(0, "queue orphan %llu\n",
1988 (unsigned long long)OCFS2_I(iter)->ip_blkno); 1989 (unsigned long long)OCFS2_I(iter)->ip_blkno);
1989 /* No locking is required for the next_orphan queue as there 1990 /* No locking is required for the next_orphan queue as there
1990 * is only ever a single process doing orphan recovery. */ 1991 * is only ever a single process doing orphan recovery. */
1991 OCFS2_I(iter)->ip_next_orphan = p->head; 1992 OCFS2_I(iter)->ip_next_orphan = p->head;
1992 p->head = iter; 1993 p->head = iter;
1993 1994
1994 return 0; 1995 return 0;
1995 } 1996 }
1996 1997
1997 static int ocfs2_queue_orphans(struct ocfs2_super *osb, 1998 static int ocfs2_queue_orphans(struct ocfs2_super *osb,
1998 int slot, 1999 int slot,
1999 struct inode **head) 2000 struct inode **head)
2000 { 2001 {
2001 int status; 2002 int status;
2002 struct inode *orphan_dir_inode = NULL; 2003 struct inode *orphan_dir_inode = NULL;
2003 struct ocfs2_orphan_filldir_priv priv; 2004 struct ocfs2_orphan_filldir_priv priv;
2004 loff_t pos = 0; 2005 loff_t pos = 0;
2005 2006
2006 priv.osb = osb; 2007 priv.osb = osb;
2007 priv.head = *head; 2008 priv.head = *head;
2008 2009
2009 orphan_dir_inode = ocfs2_get_system_file_inode(osb, 2010 orphan_dir_inode = ocfs2_get_system_file_inode(osb,
2010 ORPHAN_DIR_SYSTEM_INODE, 2011 ORPHAN_DIR_SYSTEM_INODE,
2011 slot); 2012 slot);
2012 if (!orphan_dir_inode) { 2013 if (!orphan_dir_inode) {
2013 status = -ENOENT; 2014 status = -ENOENT;
2014 mlog_errno(status); 2015 mlog_errno(status);
2015 return status; 2016 return status;
2016 } 2017 }
2017 2018
2018 mutex_lock(&orphan_dir_inode->i_mutex); 2019 mutex_lock(&orphan_dir_inode->i_mutex);
2019 status = ocfs2_inode_lock(orphan_dir_inode, NULL, 0); 2020 status = ocfs2_inode_lock(orphan_dir_inode, NULL, 0);
2020 if (status < 0) { 2021 if (status < 0) {
2021 mlog_errno(status); 2022 mlog_errno(status);
2022 goto out; 2023 goto out;
2023 } 2024 }
2024 2025
2025 status = ocfs2_dir_foreach(orphan_dir_inode, &pos, &priv, 2026 status = ocfs2_dir_foreach(orphan_dir_inode, &pos, &priv,
2026 ocfs2_orphan_filldir); 2027 ocfs2_orphan_filldir);
2027 if (status) { 2028 if (status) {
2028 mlog_errno(status); 2029 mlog_errno(status);
2029 goto out_cluster; 2030 goto out_cluster;
2030 } 2031 }
2031 2032
2032 *head = priv.head; 2033 *head = priv.head;
2033 2034
2034 out_cluster: 2035 out_cluster:
2035 ocfs2_inode_unlock(orphan_dir_inode, 0); 2036 ocfs2_inode_unlock(orphan_dir_inode, 0);
2036 out: 2037 out:
2037 mutex_unlock(&orphan_dir_inode->i_mutex); 2038 mutex_unlock(&orphan_dir_inode->i_mutex);
2038 iput(orphan_dir_inode); 2039 iput(orphan_dir_inode);
2039 return status; 2040 return status;
2040 } 2041 }
2041 2042
2042 static int ocfs2_orphan_recovery_can_continue(struct ocfs2_super *osb, 2043 static int ocfs2_orphan_recovery_can_continue(struct ocfs2_super *osb,
2043 int slot) 2044 int slot)
2044 { 2045 {
2045 int ret; 2046 int ret;
2046 2047
2047 spin_lock(&osb->osb_lock); 2048 spin_lock(&osb->osb_lock);
2048 ret = !osb->osb_orphan_wipes[slot]; 2049 ret = !osb->osb_orphan_wipes[slot];
2049 spin_unlock(&osb->osb_lock); 2050 spin_unlock(&osb->osb_lock);
2050 return ret; 2051 return ret;
2051 } 2052 }
2052 2053
2053 static void ocfs2_mark_recovering_orphan_dir(struct ocfs2_super *osb, 2054 static void ocfs2_mark_recovering_orphan_dir(struct ocfs2_super *osb,
2054 int slot) 2055 int slot)
2055 { 2056 {
2056 spin_lock(&osb->osb_lock); 2057 spin_lock(&osb->osb_lock);
2057 /* Mark ourselves such that new processes in delete_inode() 2058 /* Mark ourselves such that new processes in delete_inode()
2058 * know to quit early. */ 2059 * know to quit early. */
2059 ocfs2_node_map_set_bit(osb, &osb->osb_recovering_orphan_dirs, slot); 2060 ocfs2_node_map_set_bit(osb, &osb->osb_recovering_orphan_dirs, slot);
2060 while (osb->osb_orphan_wipes[slot]) { 2061 while (osb->osb_orphan_wipes[slot]) {
2061 /* If any processes are already in the middle of an 2062 /* If any processes are already in the middle of an
2062 * orphan wipe on this dir, then we need to wait for 2063 * orphan wipe on this dir, then we need to wait for
2063 * them. */ 2064 * them. */
2064 spin_unlock(&osb->osb_lock); 2065 spin_unlock(&osb->osb_lock);
2065 wait_event_interruptible(osb->osb_wipe_event, 2066 wait_event_interruptible(osb->osb_wipe_event,
2066 ocfs2_orphan_recovery_can_continue(osb, slot)); 2067 ocfs2_orphan_recovery_can_continue(osb, slot));
2067 spin_lock(&osb->osb_lock); 2068 spin_lock(&osb->osb_lock);
2068 } 2069 }
2069 spin_unlock(&osb->osb_lock); 2070 spin_unlock(&osb->osb_lock);
2070 } 2071 }
2071 2072
2072 static void ocfs2_clear_recovering_orphan_dir(struct ocfs2_super *osb, 2073 static void ocfs2_clear_recovering_orphan_dir(struct ocfs2_super *osb,
2073 int slot) 2074 int slot)
2074 { 2075 {
2075 ocfs2_node_map_clear_bit(osb, &osb->osb_recovering_orphan_dirs, slot); 2076 ocfs2_node_map_clear_bit(osb, &osb->osb_recovering_orphan_dirs, slot);
2076 } 2077 }
2077 2078
2078 /* 2079 /*
2079 * Orphan recovery. Each mounted node has it's own orphan dir which we 2080 * Orphan recovery. Each mounted node has it's own orphan dir which we
2080 * must run during recovery. Our strategy here is to build a list of 2081 * must run during recovery. Our strategy here is to build a list of
2081 * the inodes in the orphan dir and iget/iput them. The VFS does 2082 * the inodes in the orphan dir and iget/iput them. The VFS does
2082 * (most) of the rest of the work. 2083 * (most) of the rest of the work.
2083 * 2084 *
2084 * Orphan recovery can happen at any time, not just mount so we have a 2085 * Orphan recovery can happen at any time, not just mount so we have a
2085 * couple of extra considerations. 2086 * couple of extra considerations.
2086 * 2087 *
2087 * - We grab as many inodes as we can under the orphan dir lock - 2088 * - We grab as many inodes as we can under the orphan dir lock -
2088 * doing iget() outside the orphan dir risks getting a reference on 2089 * doing iget() outside the orphan dir risks getting a reference on
2089 * an invalid inode. 2090 * an invalid inode.
2090 * - We must be sure not to deadlock with other processes on the 2091 * - We must be sure not to deadlock with other processes on the
2091 * system wanting to run delete_inode(). This can happen when they go 2092 * system wanting to run delete_inode(). This can happen when they go
2092 * to lock the orphan dir and the orphan recovery process attempts to 2093 * to lock the orphan dir and the orphan recovery process attempts to
2093 * iget() inside the orphan dir lock. This can be avoided by 2094 * iget() inside the orphan dir lock. This can be avoided by
2094 * advertising our state to ocfs2_delete_inode(). 2095 * advertising our state to ocfs2_delete_inode().
2095 */ 2096 */
2096 static int ocfs2_recover_orphans(struct ocfs2_super *osb, 2097 static int ocfs2_recover_orphans(struct ocfs2_super *osb,
2097 int slot) 2098 int slot)
2098 { 2099 {
2099 int ret = 0; 2100 int ret = 0;
2100 struct inode *inode = NULL; 2101 struct inode *inode = NULL;
2101 struct inode *iter; 2102 struct inode *iter;
2102 struct ocfs2_inode_info *oi; 2103 struct ocfs2_inode_info *oi;
2103 2104
2104 mlog(0, "Recover inodes from orphan dir in slot %d\n", slot); 2105 mlog(0, "Recover inodes from orphan dir in slot %d\n", slot);
2105 2106
2106 ocfs2_mark_recovering_orphan_dir(osb, slot); 2107 ocfs2_mark_recovering_orphan_dir(osb, slot);
2107 ret = ocfs2_queue_orphans(osb, slot, &inode); 2108 ret = ocfs2_queue_orphans(osb, slot, &inode);
2108 ocfs2_clear_recovering_orphan_dir(osb, slot); 2109 ocfs2_clear_recovering_orphan_dir(osb, slot);
2109 2110
2110 /* Error here should be noted, but we want to continue with as 2111 /* Error here should be noted, but we want to continue with as
2111 * many queued inodes as we've got. */ 2112 * many queued inodes as we've got. */
2112 if (ret) 2113 if (ret)
2113 mlog_errno(ret); 2114 mlog_errno(ret);
2114 2115
2115 while (inode) { 2116 while (inode) {
2116 oi = OCFS2_I(inode); 2117 oi = OCFS2_I(inode);
2117 mlog(0, "iput orphan %llu\n", (unsigned long long)oi->ip_blkno); 2118 mlog(0, "iput orphan %llu\n", (unsigned long long)oi->ip_blkno);
2118 2119
2119 iter = oi->ip_next_orphan; 2120 iter = oi->ip_next_orphan;
2120 2121
2121 spin_lock(&oi->ip_lock); 2122 spin_lock(&oi->ip_lock);
2122 /* The remote delete code may have set these on the 2123 /* The remote delete code may have set these on the
2123 * assumption that the other node would wipe them 2124 * assumption that the other node would wipe them
2124 * successfully. If they are still in the node's 2125 * successfully. If they are still in the node's
2125 * orphan dir, we need to reset that state. */ 2126 * orphan dir, we need to reset that state. */
2126 oi->ip_flags &= ~(OCFS2_INODE_DELETED|OCFS2_INODE_SKIP_DELETE); 2127 oi->ip_flags &= ~(OCFS2_INODE_DELETED|OCFS2_INODE_SKIP_DELETE);
2127 2128
2128 /* Set the proper information to get us going into 2129 /* Set the proper information to get us going into
2129 * ocfs2_delete_inode. */ 2130 * ocfs2_delete_inode. */
2130 oi->ip_flags |= OCFS2_INODE_MAYBE_ORPHANED; 2131 oi->ip_flags |= OCFS2_INODE_MAYBE_ORPHANED;
2131 spin_unlock(&oi->ip_lock); 2132 spin_unlock(&oi->ip_lock);
2132 2133
2133 iput(inode); 2134 iput(inode);
2134 2135
2135 inode = iter; 2136 inode = iter;
2136 } 2137 }
2137 2138
2138 return ret; 2139 return ret;
2139 } 2140 }
2140 2141
2141 static int __ocfs2_wait_on_mount(struct ocfs2_super *osb, int quota) 2142 static int __ocfs2_wait_on_mount(struct ocfs2_super *osb, int quota)
2142 { 2143 {
2143 /* This check is good because ocfs2 will wait on our recovery 2144 /* This check is good because ocfs2 will wait on our recovery
2144 * thread before changing it to something other than MOUNTED 2145 * thread before changing it to something other than MOUNTED
2145 * or DISABLED. */ 2146 * or DISABLED. */
2146 wait_event(osb->osb_mount_event, 2147 wait_event(osb->osb_mount_event,
2147 (!quota && atomic_read(&osb->vol_state) == VOLUME_MOUNTED) || 2148 (!quota && atomic_read(&osb->vol_state) == VOLUME_MOUNTED) ||
2148 atomic_read(&osb->vol_state) == VOLUME_MOUNTED_QUOTAS || 2149 atomic_read(&osb->vol_state) == VOLUME_MOUNTED_QUOTAS ||
2149 atomic_read(&osb->vol_state) == VOLUME_DISABLED); 2150 atomic_read(&osb->vol_state) == VOLUME_DISABLED);
2150 2151
2151 /* If there's an error on mount, then we may never get to the 2152 /* If there's an error on mount, then we may never get to the
2152 * MOUNTED flag, but this is set right before 2153 * MOUNTED flag, but this is set right before
2153 * dismount_volume() so we can trust it. */ 2154 * dismount_volume() so we can trust it. */
2154 if (atomic_read(&osb->vol_state) == VOLUME_DISABLED) { 2155 if (atomic_read(&osb->vol_state) == VOLUME_DISABLED) {
2155 mlog(0, "mount error, exiting!\n"); 2156 mlog(0, "mount error, exiting!\n");
2156 return -EBUSY; 2157 return -EBUSY;
2157 } 2158 }
2158 2159
2159 return 0; 2160 return 0;
2160 } 2161 }
2161 2162
2162 static int ocfs2_commit_thread(void *arg) 2163 static int ocfs2_commit_thread(void *arg)
2163 { 2164 {
2164 int status; 2165 int status;
2165 struct ocfs2_super *osb = arg; 2166 struct ocfs2_super *osb = arg;
2166 struct ocfs2_journal *journal = osb->journal; 2167 struct ocfs2_journal *journal = osb->journal;
2167 2168
2168 /* we can trust j_num_trans here because _should_stop() is only set in 2169 /* we can trust j_num_trans here because _should_stop() is only set in
2169 * shutdown and nobody other than ourselves should be able to start 2170 * shutdown and nobody other than ourselves should be able to start
2170 * transactions. committing on shutdown might take a few iterations 2171 * transactions. committing on shutdown might take a few iterations
2171 * as final transactions put deleted inodes on the list */ 2172 * as final transactions put deleted inodes on the list */
2172 while (!(kthread_should_stop() && 2173 while (!(kthread_should_stop() &&
2173 atomic_read(&journal->j_num_trans) == 0)) { 2174 atomic_read(&journal->j_num_trans) == 0)) {
2174 2175
2175 wait_event_interruptible(osb->checkpoint_event, 2176 wait_event_interruptible(osb->checkpoint_event,
2176 atomic_read(&journal->j_num_trans) 2177 atomic_read(&journal->j_num_trans)
2177 || kthread_should_stop()); 2178 || kthread_should_stop());
2178 2179
2179 status = ocfs2_commit_cache(osb); 2180 status = ocfs2_commit_cache(osb);
2180 if (status < 0) 2181 if (status < 0)
2181 mlog_errno(status); 2182 mlog_errno(status);
2182 2183
2183 if (kthread_should_stop() && atomic_read(&journal->j_num_trans)){ 2184 if (kthread_should_stop() && atomic_read(&journal->j_num_trans)){
2184 mlog(ML_KTHREAD, 2185 mlog(ML_KTHREAD,
2185 "commit_thread: %u transactions pending on " 2186 "commit_thread: %u transactions pending on "
2186 "shutdown\n", 2187 "shutdown\n",
2187 atomic_read(&journal->j_num_trans)); 2188 atomic_read(&journal->j_num_trans));
2188 } 2189 }
2189 } 2190 }
2190 2191
2191 return 0; 2192 return 0;
2192 } 2193 }
2193 2194
2194 /* Reads all the journal inodes without taking any cluster locks. Used 2195 /* Reads all the journal inodes without taking any cluster locks. Used
2195 * for hard readonly access to determine whether any journal requires 2196 * for hard readonly access to determine whether any journal requires
2196 * recovery. Also used to refresh the recovery generation numbers after 2197 * recovery. Also used to refresh the recovery generation numbers after
2197 * a journal has been recovered by another node. 2198 * a journal has been recovered by another node.
2198 */ 2199 */
2199 int ocfs2_check_journals_nolocks(struct ocfs2_super *osb) 2200 int ocfs2_check_journals_nolocks(struct ocfs2_super *osb)
2200 { 2201 {
2201 int ret = 0; 2202 int ret = 0;
2202 unsigned int slot; 2203 unsigned int slot;
2203 struct buffer_head *di_bh = NULL; 2204 struct buffer_head *di_bh = NULL;
2204 struct ocfs2_dinode *di; 2205 struct ocfs2_dinode *di;
2205 int journal_dirty = 0; 2206 int journal_dirty = 0;
2206 2207
2207 for(slot = 0; slot < osb->max_slots; slot++) { 2208 for(slot = 0; slot < osb->max_slots; slot++) {
2208 ret = ocfs2_read_journal_inode(osb, slot, &di_bh, NULL); 2209 ret = ocfs2_read_journal_inode(osb, slot, &di_bh, NULL);
2209 if (ret) { 2210 if (ret) {
2210 mlog_errno(ret); 2211 mlog_errno(ret);
2211 goto out; 2212 goto out;
2212 } 2213 }
2213 2214
2214 di = (struct ocfs2_dinode *) di_bh->b_data; 2215 di = (struct ocfs2_dinode *) di_bh->b_data;
2215 2216
2216 osb->slot_recovery_generations[slot] = 2217 osb->slot_recovery_generations[slot] =
2217 ocfs2_get_recovery_generation(di); 2218 ocfs2_get_recovery_generation(di);
2218 2219
2219 if (le32_to_cpu(di->id1.journal1.ij_flags) & 2220 if (le32_to_cpu(di->id1.journal1.ij_flags) &
2220 OCFS2_JOURNAL_DIRTY_FL) 2221 OCFS2_JOURNAL_DIRTY_FL)
2221 journal_dirty = 1; 2222 journal_dirty = 1;
2222 2223
2223 brelse(di_bh); 2224 brelse(di_bh);
2224 di_bh = NULL; 2225 di_bh = NULL;
2225 } 2226 }
2226 2227
2227 out: 2228 out:
2228 if (journal_dirty) 2229 if (journal_dirty)
2229 ret = -EROFS; 2230 ret = -EROFS;
2230 return ret; 2231 return ret;
2231 } 2232 }
2232 2233