Commit 5bc970e803ad2b1f26771f39376a79dbf0f5bf64

Authored by Sunil Mushran
Committed by Joel Becker
1 parent 0cc9d52578

ocfs2: Use hrtimer to track ocfs2 fs lock stats

Patch makes use of the hrtimer to track times in ocfs2 lock stats.

The patch is a bit involved to ensure no additional impact on the memory
footprint. The size of ocfs2_inode_cache remains 1280 bytes on 32-bit systems.

A related change was to modify the unit of the max wait time from nanosec to
microsec allowing us to track max time larger than 4 secs. This change
necessitated the bumping of the output version in the debugfs file,
locking_state, from 2 to 3.

Signed-off-by: Sunil Mushran <sunil.mushran@oracle.com>
Signed-off-by: Joel Becker <jlbec@evilplan.org>

Showing 2 changed files with 64 additions and 56 deletions Inline Diff

1 /* -*- mode: c; c-basic-offset: 8; -*- 1 /* -*- mode: c; c-basic-offset: 8; -*-
2 * vim: noexpandtab sw=8 ts=8 sts=0: 2 * vim: noexpandtab sw=8 ts=8 sts=0:
3 * 3 *
4 * dlmglue.c 4 * dlmglue.c
5 * 5 *
6 * Code which implements an OCFS2 specific interface to our DLM. 6 * Code which implements an OCFS2 specific interface to our DLM.
7 * 7 *
8 * Copyright (C) 2003, 2004 Oracle. All rights reserved. 8 * Copyright (C) 2003, 2004 Oracle. All rights reserved.
9 * 9 *
10 * This program is free software; you can redistribute it and/or 10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public 11 * modify it under the terms of the GNU General Public
12 * License as published by the Free Software Foundation; either 12 * License as published by the Free Software Foundation; either
13 * version 2 of the License, or (at your option) any later version. 13 * version 2 of the License, or (at your option) any later version.
14 * 14 *
15 * This program is distributed in the hope that it will be useful, 15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of 16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * General Public License for more details. 18 * General Public License for more details.
19 * 19 *
20 * You should have received a copy of the GNU General Public 20 * You should have received a copy of the GNU General Public
21 * License along with this program; if not, write to the 21 * License along with this program; if not, write to the
22 * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 22 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
23 * Boston, MA 021110-1307, USA. 23 * Boston, MA 021110-1307, USA.
24 */ 24 */
25 25
26 #include <linux/types.h> 26 #include <linux/types.h>
27 #include <linux/slab.h> 27 #include <linux/slab.h>
28 #include <linux/highmem.h> 28 #include <linux/highmem.h>
29 #include <linux/mm.h> 29 #include <linux/mm.h>
30 #include <linux/kthread.h> 30 #include <linux/kthread.h>
31 #include <linux/pagemap.h> 31 #include <linux/pagemap.h>
32 #include <linux/debugfs.h> 32 #include <linux/debugfs.h>
33 #include <linux/seq_file.h> 33 #include <linux/seq_file.h>
34 #include <linux/time.h> 34 #include <linux/time.h>
35 #include <linux/quotaops.h> 35 #include <linux/quotaops.h>
36 36
37 #define MLOG_MASK_PREFIX ML_DLM_GLUE 37 #define MLOG_MASK_PREFIX ML_DLM_GLUE
38 #include <cluster/masklog.h> 38 #include <cluster/masklog.h>
39 39
40 #include "ocfs2.h" 40 #include "ocfs2.h"
41 #include "ocfs2_lockingver.h" 41 #include "ocfs2_lockingver.h"
42 42
43 #include "alloc.h" 43 #include "alloc.h"
44 #include "dcache.h" 44 #include "dcache.h"
45 #include "dlmglue.h" 45 #include "dlmglue.h"
46 #include "extent_map.h" 46 #include "extent_map.h"
47 #include "file.h" 47 #include "file.h"
48 #include "heartbeat.h" 48 #include "heartbeat.h"
49 #include "inode.h" 49 #include "inode.h"
50 #include "journal.h" 50 #include "journal.h"
51 #include "stackglue.h" 51 #include "stackglue.h"
52 #include "slot_map.h" 52 #include "slot_map.h"
53 #include "super.h" 53 #include "super.h"
54 #include "uptodate.h" 54 #include "uptodate.h"
55 #include "quota.h" 55 #include "quota.h"
56 #include "refcounttree.h" 56 #include "refcounttree.h"
57 57
58 #include "buffer_head_io.h" 58 #include "buffer_head_io.h"
59 59
60 struct ocfs2_mask_waiter { 60 struct ocfs2_mask_waiter {
61 struct list_head mw_item; 61 struct list_head mw_item;
62 int mw_status; 62 int mw_status;
63 struct completion mw_complete; 63 struct completion mw_complete;
64 unsigned long mw_mask; 64 unsigned long mw_mask;
65 unsigned long mw_goal; 65 unsigned long mw_goal;
66 #ifdef CONFIG_OCFS2_FS_STATS 66 #ifdef CONFIG_OCFS2_FS_STATS
67 unsigned long long mw_lock_start; 67 ktime_t mw_lock_start;
68 #endif 68 #endif
69 }; 69 };
70 70
71 static struct ocfs2_super *ocfs2_get_dentry_osb(struct ocfs2_lock_res *lockres); 71 static struct ocfs2_super *ocfs2_get_dentry_osb(struct ocfs2_lock_res *lockres);
72 static struct ocfs2_super *ocfs2_get_inode_osb(struct ocfs2_lock_res *lockres); 72 static struct ocfs2_super *ocfs2_get_inode_osb(struct ocfs2_lock_res *lockres);
73 static struct ocfs2_super *ocfs2_get_file_osb(struct ocfs2_lock_res *lockres); 73 static struct ocfs2_super *ocfs2_get_file_osb(struct ocfs2_lock_res *lockres);
74 static struct ocfs2_super *ocfs2_get_qinfo_osb(struct ocfs2_lock_res *lockres); 74 static struct ocfs2_super *ocfs2_get_qinfo_osb(struct ocfs2_lock_res *lockres);
75 75
76 /* 76 /*
77 * Return value from ->downconvert_worker functions. 77 * Return value from ->downconvert_worker functions.
78 * 78 *
79 * These control the precise actions of ocfs2_unblock_lock() 79 * These control the precise actions of ocfs2_unblock_lock()
80 * and ocfs2_process_blocked_lock() 80 * and ocfs2_process_blocked_lock()
81 * 81 *
82 */ 82 */
83 enum ocfs2_unblock_action { 83 enum ocfs2_unblock_action {
84 UNBLOCK_CONTINUE = 0, /* Continue downconvert */ 84 UNBLOCK_CONTINUE = 0, /* Continue downconvert */
85 UNBLOCK_CONTINUE_POST = 1, /* Continue downconvert, fire 85 UNBLOCK_CONTINUE_POST = 1, /* Continue downconvert, fire
86 * ->post_unlock callback */ 86 * ->post_unlock callback */
87 UNBLOCK_STOP_POST = 2, /* Do not downconvert, fire 87 UNBLOCK_STOP_POST = 2, /* Do not downconvert, fire
88 * ->post_unlock() callback. */ 88 * ->post_unlock() callback. */
89 }; 89 };
90 90
91 struct ocfs2_unblock_ctl { 91 struct ocfs2_unblock_ctl {
92 int requeue; 92 int requeue;
93 enum ocfs2_unblock_action unblock_action; 93 enum ocfs2_unblock_action unblock_action;
94 }; 94 };
95 95
96 /* Lockdep class keys */ 96 /* Lockdep class keys */
97 struct lock_class_key lockdep_keys[OCFS2_NUM_LOCK_TYPES]; 97 struct lock_class_key lockdep_keys[OCFS2_NUM_LOCK_TYPES];
98 98
99 static int ocfs2_check_meta_downconvert(struct ocfs2_lock_res *lockres, 99 static int ocfs2_check_meta_downconvert(struct ocfs2_lock_res *lockres,
100 int new_level); 100 int new_level);
101 static void ocfs2_set_meta_lvb(struct ocfs2_lock_res *lockres); 101 static void ocfs2_set_meta_lvb(struct ocfs2_lock_res *lockres);
102 102
103 static int ocfs2_data_convert_worker(struct ocfs2_lock_res *lockres, 103 static int ocfs2_data_convert_worker(struct ocfs2_lock_res *lockres,
104 int blocking); 104 int blocking);
105 105
106 static int ocfs2_dentry_convert_worker(struct ocfs2_lock_res *lockres, 106 static int ocfs2_dentry_convert_worker(struct ocfs2_lock_res *lockres,
107 int blocking); 107 int blocking);
108 108
109 static void ocfs2_dentry_post_unlock(struct ocfs2_super *osb, 109 static void ocfs2_dentry_post_unlock(struct ocfs2_super *osb,
110 struct ocfs2_lock_res *lockres); 110 struct ocfs2_lock_res *lockres);
111 111
112 static void ocfs2_set_qinfo_lvb(struct ocfs2_lock_res *lockres); 112 static void ocfs2_set_qinfo_lvb(struct ocfs2_lock_res *lockres);
113 113
114 static int ocfs2_check_refcount_downconvert(struct ocfs2_lock_res *lockres, 114 static int ocfs2_check_refcount_downconvert(struct ocfs2_lock_res *lockres,
115 int new_level); 115 int new_level);
116 static int ocfs2_refcount_convert_worker(struct ocfs2_lock_res *lockres, 116 static int ocfs2_refcount_convert_worker(struct ocfs2_lock_res *lockres,
117 int blocking); 117 int blocking);
118 118
119 #define mlog_meta_lvb(__level, __lockres) ocfs2_dump_meta_lvb_info(__level, __PRETTY_FUNCTION__, __LINE__, __lockres) 119 #define mlog_meta_lvb(__level, __lockres) ocfs2_dump_meta_lvb_info(__level, __PRETTY_FUNCTION__, __LINE__, __lockres)
120 120
121 /* This aids in debugging situations where a bad LVB might be involved. */ 121 /* This aids in debugging situations where a bad LVB might be involved. */
122 static void ocfs2_dump_meta_lvb_info(u64 level, 122 static void ocfs2_dump_meta_lvb_info(u64 level,
123 const char *function, 123 const char *function,
124 unsigned int line, 124 unsigned int line,
125 struct ocfs2_lock_res *lockres) 125 struct ocfs2_lock_res *lockres)
126 { 126 {
127 struct ocfs2_meta_lvb *lvb = ocfs2_dlm_lvb(&lockres->l_lksb); 127 struct ocfs2_meta_lvb *lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
128 128
129 mlog(level, "LVB information for %s (called from %s:%u):\n", 129 mlog(level, "LVB information for %s (called from %s:%u):\n",
130 lockres->l_name, function, line); 130 lockres->l_name, function, line);
131 mlog(level, "version: %u, clusters: %u, generation: 0x%x\n", 131 mlog(level, "version: %u, clusters: %u, generation: 0x%x\n",
132 lvb->lvb_version, be32_to_cpu(lvb->lvb_iclusters), 132 lvb->lvb_version, be32_to_cpu(lvb->lvb_iclusters),
133 be32_to_cpu(lvb->lvb_igeneration)); 133 be32_to_cpu(lvb->lvb_igeneration));
134 mlog(level, "size: %llu, uid %u, gid %u, mode 0x%x\n", 134 mlog(level, "size: %llu, uid %u, gid %u, mode 0x%x\n",
135 (unsigned long long)be64_to_cpu(lvb->lvb_isize), 135 (unsigned long long)be64_to_cpu(lvb->lvb_isize),
136 be32_to_cpu(lvb->lvb_iuid), be32_to_cpu(lvb->lvb_igid), 136 be32_to_cpu(lvb->lvb_iuid), be32_to_cpu(lvb->lvb_igid),
137 be16_to_cpu(lvb->lvb_imode)); 137 be16_to_cpu(lvb->lvb_imode));
138 mlog(level, "nlink %u, atime_packed 0x%llx, ctime_packed 0x%llx, " 138 mlog(level, "nlink %u, atime_packed 0x%llx, ctime_packed 0x%llx, "
139 "mtime_packed 0x%llx iattr 0x%x\n", be16_to_cpu(lvb->lvb_inlink), 139 "mtime_packed 0x%llx iattr 0x%x\n", be16_to_cpu(lvb->lvb_inlink),
140 (long long)be64_to_cpu(lvb->lvb_iatime_packed), 140 (long long)be64_to_cpu(lvb->lvb_iatime_packed),
141 (long long)be64_to_cpu(lvb->lvb_ictime_packed), 141 (long long)be64_to_cpu(lvb->lvb_ictime_packed),
142 (long long)be64_to_cpu(lvb->lvb_imtime_packed), 142 (long long)be64_to_cpu(lvb->lvb_imtime_packed),
143 be32_to_cpu(lvb->lvb_iattr)); 143 be32_to_cpu(lvb->lvb_iattr));
144 } 144 }
145 145
146 146
147 /* 147 /*
148 * OCFS2 Lock Resource Operations 148 * OCFS2 Lock Resource Operations
149 * 149 *
150 * These fine tune the behavior of the generic dlmglue locking infrastructure. 150 * These fine tune the behavior of the generic dlmglue locking infrastructure.
151 * 151 *
152 * The most basic of lock types can point ->l_priv to their respective 152 * The most basic of lock types can point ->l_priv to their respective
153 * struct ocfs2_super and allow the default actions to manage things. 153 * struct ocfs2_super and allow the default actions to manage things.
154 * 154 *
155 * Right now, each lock type also needs to implement an init function, 155 * Right now, each lock type also needs to implement an init function,
156 * and trivial lock/unlock wrappers. ocfs2_simple_drop_lockres() 156 * and trivial lock/unlock wrappers. ocfs2_simple_drop_lockres()
157 * should be called when the lock is no longer needed (i.e., object 157 * should be called when the lock is no longer needed (i.e., object
158 * destruction time). 158 * destruction time).
159 */ 159 */
160 struct ocfs2_lock_res_ops { 160 struct ocfs2_lock_res_ops {
161 /* 161 /*
162 * Translate an ocfs2_lock_res * into an ocfs2_super *. Define 162 * Translate an ocfs2_lock_res * into an ocfs2_super *. Define
163 * this callback if ->l_priv is not an ocfs2_super pointer 163 * this callback if ->l_priv is not an ocfs2_super pointer
164 */ 164 */
165 struct ocfs2_super * (*get_osb)(struct ocfs2_lock_res *); 165 struct ocfs2_super * (*get_osb)(struct ocfs2_lock_res *);
166 166
167 /* 167 /*
168 * Optionally called in the downconvert thread after a 168 * Optionally called in the downconvert thread after a
169 * successful downconvert. The lockres will not be referenced 169 * successful downconvert. The lockres will not be referenced
170 * after this callback is called, so it is safe to free 170 * after this callback is called, so it is safe to free
171 * memory, etc. 171 * memory, etc.
172 * 172 *
173 * The exact semantics of when this is called are controlled 173 * The exact semantics of when this is called are controlled
174 * by ->downconvert_worker() 174 * by ->downconvert_worker()
175 */ 175 */
176 void (*post_unlock)(struct ocfs2_super *, struct ocfs2_lock_res *); 176 void (*post_unlock)(struct ocfs2_super *, struct ocfs2_lock_res *);
177 177
178 /* 178 /*
179 * Allow a lock type to add checks to determine whether it is 179 * Allow a lock type to add checks to determine whether it is
180 * safe to downconvert a lock. Return 0 to re-queue the 180 * safe to downconvert a lock. Return 0 to re-queue the
181 * downconvert at a later time, nonzero to continue. 181 * downconvert at a later time, nonzero to continue.
182 * 182 *
183 * For most locks, the default checks that there are no 183 * For most locks, the default checks that there are no
184 * incompatible holders are sufficient. 184 * incompatible holders are sufficient.
185 * 185 *
186 * Called with the lockres spinlock held. 186 * Called with the lockres spinlock held.
187 */ 187 */
188 int (*check_downconvert)(struct ocfs2_lock_res *, int); 188 int (*check_downconvert)(struct ocfs2_lock_res *, int);
189 189
190 /* 190 /*
191 * Allows a lock type to populate the lock value block. This 191 * Allows a lock type to populate the lock value block. This
192 * is called on downconvert, and when we drop a lock. 192 * is called on downconvert, and when we drop a lock.
193 * 193 *
194 * Locks that want to use this should set LOCK_TYPE_USES_LVB 194 * Locks that want to use this should set LOCK_TYPE_USES_LVB
195 * in the flags field. 195 * in the flags field.
196 * 196 *
197 * Called with the lockres spinlock held. 197 * Called with the lockres spinlock held.
198 */ 198 */
199 void (*set_lvb)(struct ocfs2_lock_res *); 199 void (*set_lvb)(struct ocfs2_lock_res *);
200 200
201 /* 201 /*
202 * Called from the downconvert thread when it is determined 202 * Called from the downconvert thread when it is determined
203 * that a lock will be downconverted. This is called without 203 * that a lock will be downconverted. This is called without
204 * any locks held so the function can do work that might 204 * any locks held so the function can do work that might
205 * schedule (syncing out data, etc). 205 * schedule (syncing out data, etc).
206 * 206 *
207 * This should return any one of the ocfs2_unblock_action 207 * This should return any one of the ocfs2_unblock_action
208 * values, depending on what it wants the thread to do. 208 * values, depending on what it wants the thread to do.
209 */ 209 */
210 int (*downconvert_worker)(struct ocfs2_lock_res *, int); 210 int (*downconvert_worker)(struct ocfs2_lock_res *, int);
211 211
212 /* 212 /*
213 * LOCK_TYPE_* flags which describe the specific requirements 213 * LOCK_TYPE_* flags which describe the specific requirements
214 * of a lock type. Descriptions of each individual flag follow. 214 * of a lock type. Descriptions of each individual flag follow.
215 */ 215 */
216 int flags; 216 int flags;
217 }; 217 };
218 218
219 /* 219 /*
220 * Some locks want to "refresh" potentially stale data when a 220 * Some locks want to "refresh" potentially stale data when a
221 * meaningful (PRMODE or EXMODE) lock level is first obtained. If this 221 * meaningful (PRMODE or EXMODE) lock level is first obtained. If this
222 * flag is set, the OCFS2_LOCK_NEEDS_REFRESH flag will be set on the 222 * flag is set, the OCFS2_LOCK_NEEDS_REFRESH flag will be set on the
223 * individual lockres l_flags member from the ast function. It is 223 * individual lockres l_flags member from the ast function. It is
224 * expected that the locking wrapper will clear the 224 * expected that the locking wrapper will clear the
225 * OCFS2_LOCK_NEEDS_REFRESH flag when done. 225 * OCFS2_LOCK_NEEDS_REFRESH flag when done.
226 */ 226 */
227 #define LOCK_TYPE_REQUIRES_REFRESH 0x1 227 #define LOCK_TYPE_REQUIRES_REFRESH 0x1
228 228
229 /* 229 /*
230 * Indicate that a lock type makes use of the lock value block. The 230 * Indicate that a lock type makes use of the lock value block. The
231 * ->set_lvb lock type callback must be defined. 231 * ->set_lvb lock type callback must be defined.
232 */ 232 */
233 #define LOCK_TYPE_USES_LVB 0x2 233 #define LOCK_TYPE_USES_LVB 0x2
234 234
235 static struct ocfs2_lock_res_ops ocfs2_inode_rw_lops = { 235 static struct ocfs2_lock_res_ops ocfs2_inode_rw_lops = {
236 .get_osb = ocfs2_get_inode_osb, 236 .get_osb = ocfs2_get_inode_osb,
237 .flags = 0, 237 .flags = 0,
238 }; 238 };
239 239
240 static struct ocfs2_lock_res_ops ocfs2_inode_inode_lops = { 240 static struct ocfs2_lock_res_ops ocfs2_inode_inode_lops = {
241 .get_osb = ocfs2_get_inode_osb, 241 .get_osb = ocfs2_get_inode_osb,
242 .check_downconvert = ocfs2_check_meta_downconvert, 242 .check_downconvert = ocfs2_check_meta_downconvert,
243 .set_lvb = ocfs2_set_meta_lvb, 243 .set_lvb = ocfs2_set_meta_lvb,
244 .downconvert_worker = ocfs2_data_convert_worker, 244 .downconvert_worker = ocfs2_data_convert_worker,
245 .flags = LOCK_TYPE_REQUIRES_REFRESH|LOCK_TYPE_USES_LVB, 245 .flags = LOCK_TYPE_REQUIRES_REFRESH|LOCK_TYPE_USES_LVB,
246 }; 246 };
247 247
248 static struct ocfs2_lock_res_ops ocfs2_super_lops = { 248 static struct ocfs2_lock_res_ops ocfs2_super_lops = {
249 .flags = LOCK_TYPE_REQUIRES_REFRESH, 249 .flags = LOCK_TYPE_REQUIRES_REFRESH,
250 }; 250 };
251 251
252 static struct ocfs2_lock_res_ops ocfs2_rename_lops = { 252 static struct ocfs2_lock_res_ops ocfs2_rename_lops = {
253 .flags = 0, 253 .flags = 0,
254 }; 254 };
255 255
256 static struct ocfs2_lock_res_ops ocfs2_nfs_sync_lops = { 256 static struct ocfs2_lock_res_ops ocfs2_nfs_sync_lops = {
257 .flags = 0, 257 .flags = 0,
258 }; 258 };
259 259
260 static struct ocfs2_lock_res_ops ocfs2_orphan_scan_lops = { 260 static struct ocfs2_lock_res_ops ocfs2_orphan_scan_lops = {
261 .flags = LOCK_TYPE_REQUIRES_REFRESH|LOCK_TYPE_USES_LVB, 261 .flags = LOCK_TYPE_REQUIRES_REFRESH|LOCK_TYPE_USES_LVB,
262 }; 262 };
263 263
264 static struct ocfs2_lock_res_ops ocfs2_dentry_lops = { 264 static struct ocfs2_lock_res_ops ocfs2_dentry_lops = {
265 .get_osb = ocfs2_get_dentry_osb, 265 .get_osb = ocfs2_get_dentry_osb,
266 .post_unlock = ocfs2_dentry_post_unlock, 266 .post_unlock = ocfs2_dentry_post_unlock,
267 .downconvert_worker = ocfs2_dentry_convert_worker, 267 .downconvert_worker = ocfs2_dentry_convert_worker,
268 .flags = 0, 268 .flags = 0,
269 }; 269 };
270 270
271 static struct ocfs2_lock_res_ops ocfs2_inode_open_lops = { 271 static struct ocfs2_lock_res_ops ocfs2_inode_open_lops = {
272 .get_osb = ocfs2_get_inode_osb, 272 .get_osb = ocfs2_get_inode_osb,
273 .flags = 0, 273 .flags = 0,
274 }; 274 };
275 275
276 static struct ocfs2_lock_res_ops ocfs2_flock_lops = { 276 static struct ocfs2_lock_res_ops ocfs2_flock_lops = {
277 .get_osb = ocfs2_get_file_osb, 277 .get_osb = ocfs2_get_file_osb,
278 .flags = 0, 278 .flags = 0,
279 }; 279 };
280 280
281 static struct ocfs2_lock_res_ops ocfs2_qinfo_lops = { 281 static struct ocfs2_lock_res_ops ocfs2_qinfo_lops = {
282 .set_lvb = ocfs2_set_qinfo_lvb, 282 .set_lvb = ocfs2_set_qinfo_lvb,
283 .get_osb = ocfs2_get_qinfo_osb, 283 .get_osb = ocfs2_get_qinfo_osb,
284 .flags = LOCK_TYPE_REQUIRES_REFRESH | LOCK_TYPE_USES_LVB, 284 .flags = LOCK_TYPE_REQUIRES_REFRESH | LOCK_TYPE_USES_LVB,
285 }; 285 };
286 286
287 static struct ocfs2_lock_res_ops ocfs2_refcount_block_lops = { 287 static struct ocfs2_lock_res_ops ocfs2_refcount_block_lops = {
288 .check_downconvert = ocfs2_check_refcount_downconvert, 288 .check_downconvert = ocfs2_check_refcount_downconvert,
289 .downconvert_worker = ocfs2_refcount_convert_worker, 289 .downconvert_worker = ocfs2_refcount_convert_worker,
290 .flags = 0, 290 .flags = 0,
291 }; 291 };
292 292
293 static inline int ocfs2_is_inode_lock(struct ocfs2_lock_res *lockres) 293 static inline int ocfs2_is_inode_lock(struct ocfs2_lock_res *lockres)
294 { 294 {
295 return lockres->l_type == OCFS2_LOCK_TYPE_META || 295 return lockres->l_type == OCFS2_LOCK_TYPE_META ||
296 lockres->l_type == OCFS2_LOCK_TYPE_RW || 296 lockres->l_type == OCFS2_LOCK_TYPE_RW ||
297 lockres->l_type == OCFS2_LOCK_TYPE_OPEN; 297 lockres->l_type == OCFS2_LOCK_TYPE_OPEN;
298 } 298 }
299 299
300 static inline struct ocfs2_lock_res *ocfs2_lksb_to_lock_res(struct ocfs2_dlm_lksb *lksb) 300 static inline struct ocfs2_lock_res *ocfs2_lksb_to_lock_res(struct ocfs2_dlm_lksb *lksb)
301 { 301 {
302 return container_of(lksb, struct ocfs2_lock_res, l_lksb); 302 return container_of(lksb, struct ocfs2_lock_res, l_lksb);
303 } 303 }
304 304
305 static inline struct inode *ocfs2_lock_res_inode(struct ocfs2_lock_res *lockres) 305 static inline struct inode *ocfs2_lock_res_inode(struct ocfs2_lock_res *lockres)
306 { 306 {
307 BUG_ON(!ocfs2_is_inode_lock(lockres)); 307 BUG_ON(!ocfs2_is_inode_lock(lockres));
308 308
309 return (struct inode *) lockres->l_priv; 309 return (struct inode *) lockres->l_priv;
310 } 310 }
311 311
312 static inline struct ocfs2_dentry_lock *ocfs2_lock_res_dl(struct ocfs2_lock_res *lockres) 312 static inline struct ocfs2_dentry_lock *ocfs2_lock_res_dl(struct ocfs2_lock_res *lockres)
313 { 313 {
314 BUG_ON(lockres->l_type != OCFS2_LOCK_TYPE_DENTRY); 314 BUG_ON(lockres->l_type != OCFS2_LOCK_TYPE_DENTRY);
315 315
316 return (struct ocfs2_dentry_lock *)lockres->l_priv; 316 return (struct ocfs2_dentry_lock *)lockres->l_priv;
317 } 317 }
318 318
319 static inline struct ocfs2_mem_dqinfo *ocfs2_lock_res_qinfo(struct ocfs2_lock_res *lockres) 319 static inline struct ocfs2_mem_dqinfo *ocfs2_lock_res_qinfo(struct ocfs2_lock_res *lockres)
320 { 320 {
321 BUG_ON(lockres->l_type != OCFS2_LOCK_TYPE_QINFO); 321 BUG_ON(lockres->l_type != OCFS2_LOCK_TYPE_QINFO);
322 322
323 return (struct ocfs2_mem_dqinfo *)lockres->l_priv; 323 return (struct ocfs2_mem_dqinfo *)lockres->l_priv;
324 } 324 }
325 325
326 static inline struct ocfs2_refcount_tree * 326 static inline struct ocfs2_refcount_tree *
327 ocfs2_lock_res_refcount_tree(struct ocfs2_lock_res *res) 327 ocfs2_lock_res_refcount_tree(struct ocfs2_lock_res *res)
328 { 328 {
329 return container_of(res, struct ocfs2_refcount_tree, rf_lockres); 329 return container_of(res, struct ocfs2_refcount_tree, rf_lockres);
330 } 330 }
331 331
332 static inline struct ocfs2_super *ocfs2_get_lockres_osb(struct ocfs2_lock_res *lockres) 332 static inline struct ocfs2_super *ocfs2_get_lockres_osb(struct ocfs2_lock_res *lockres)
333 { 333 {
334 if (lockres->l_ops->get_osb) 334 if (lockres->l_ops->get_osb)
335 return lockres->l_ops->get_osb(lockres); 335 return lockres->l_ops->get_osb(lockres);
336 336
337 return (struct ocfs2_super *)lockres->l_priv; 337 return (struct ocfs2_super *)lockres->l_priv;
338 } 338 }
339 339
340 static int ocfs2_lock_create(struct ocfs2_super *osb, 340 static int ocfs2_lock_create(struct ocfs2_super *osb,
341 struct ocfs2_lock_res *lockres, 341 struct ocfs2_lock_res *lockres,
342 int level, 342 int level,
343 u32 dlm_flags); 343 u32 dlm_flags);
344 static inline int ocfs2_may_continue_on_blocked_lock(struct ocfs2_lock_res *lockres, 344 static inline int ocfs2_may_continue_on_blocked_lock(struct ocfs2_lock_res *lockres,
345 int wanted); 345 int wanted);
346 static void __ocfs2_cluster_unlock(struct ocfs2_super *osb, 346 static void __ocfs2_cluster_unlock(struct ocfs2_super *osb,
347 struct ocfs2_lock_res *lockres, 347 struct ocfs2_lock_res *lockres,
348 int level, unsigned long caller_ip); 348 int level, unsigned long caller_ip);
349 static inline void ocfs2_cluster_unlock(struct ocfs2_super *osb, 349 static inline void ocfs2_cluster_unlock(struct ocfs2_super *osb,
350 struct ocfs2_lock_res *lockres, 350 struct ocfs2_lock_res *lockres,
351 int level) 351 int level)
352 { 352 {
353 __ocfs2_cluster_unlock(osb, lockres, level, _RET_IP_); 353 __ocfs2_cluster_unlock(osb, lockres, level, _RET_IP_);
354 } 354 }
355 355
356 static inline void ocfs2_generic_handle_downconvert_action(struct ocfs2_lock_res *lockres); 356 static inline void ocfs2_generic_handle_downconvert_action(struct ocfs2_lock_res *lockres);
357 static inline void ocfs2_generic_handle_convert_action(struct ocfs2_lock_res *lockres); 357 static inline void ocfs2_generic_handle_convert_action(struct ocfs2_lock_res *lockres);
358 static inline void ocfs2_generic_handle_attach_action(struct ocfs2_lock_res *lockres); 358 static inline void ocfs2_generic_handle_attach_action(struct ocfs2_lock_res *lockres);
359 static int ocfs2_generic_handle_bast(struct ocfs2_lock_res *lockres, int level); 359 static int ocfs2_generic_handle_bast(struct ocfs2_lock_res *lockres, int level);
360 static void ocfs2_schedule_blocked_lock(struct ocfs2_super *osb, 360 static void ocfs2_schedule_blocked_lock(struct ocfs2_super *osb,
361 struct ocfs2_lock_res *lockres); 361 struct ocfs2_lock_res *lockres);
362 static inline void ocfs2_recover_from_dlm_error(struct ocfs2_lock_res *lockres, 362 static inline void ocfs2_recover_from_dlm_error(struct ocfs2_lock_res *lockres,
363 int convert); 363 int convert);
364 #define ocfs2_log_dlm_error(_func, _err, _lockres) do { \ 364 #define ocfs2_log_dlm_error(_func, _err, _lockres) do { \
365 if ((_lockres)->l_type != OCFS2_LOCK_TYPE_DENTRY) \ 365 if ((_lockres)->l_type != OCFS2_LOCK_TYPE_DENTRY) \
366 mlog(ML_ERROR, "DLM error %d while calling %s on resource %s\n", \ 366 mlog(ML_ERROR, "DLM error %d while calling %s on resource %s\n", \
367 _err, _func, _lockres->l_name); \ 367 _err, _func, _lockres->l_name); \
368 else \ 368 else \
369 mlog(ML_ERROR, "DLM error %d while calling %s on resource %.*s%08x\n", \ 369 mlog(ML_ERROR, "DLM error %d while calling %s on resource %.*s%08x\n", \
370 _err, _func, OCFS2_DENTRY_LOCK_INO_START - 1, (_lockres)->l_name, \ 370 _err, _func, OCFS2_DENTRY_LOCK_INO_START - 1, (_lockres)->l_name, \
371 (unsigned int)ocfs2_get_dentry_lock_ino(_lockres)); \ 371 (unsigned int)ocfs2_get_dentry_lock_ino(_lockres)); \
372 } while (0) 372 } while (0)
373 static int ocfs2_downconvert_thread(void *arg); 373 static int ocfs2_downconvert_thread(void *arg);
374 static void ocfs2_downconvert_on_unlock(struct ocfs2_super *osb, 374 static void ocfs2_downconvert_on_unlock(struct ocfs2_super *osb,
375 struct ocfs2_lock_res *lockres); 375 struct ocfs2_lock_res *lockres);
376 static int ocfs2_inode_lock_update(struct inode *inode, 376 static int ocfs2_inode_lock_update(struct inode *inode,
377 struct buffer_head **bh); 377 struct buffer_head **bh);
378 static void ocfs2_drop_osb_locks(struct ocfs2_super *osb); 378 static void ocfs2_drop_osb_locks(struct ocfs2_super *osb);
379 static inline int ocfs2_highest_compat_lock_level(int level); 379 static inline int ocfs2_highest_compat_lock_level(int level);
380 static unsigned int ocfs2_prepare_downconvert(struct ocfs2_lock_res *lockres, 380 static unsigned int ocfs2_prepare_downconvert(struct ocfs2_lock_res *lockres,
381 int new_level); 381 int new_level);
382 static int ocfs2_downconvert_lock(struct ocfs2_super *osb, 382 static int ocfs2_downconvert_lock(struct ocfs2_super *osb,
383 struct ocfs2_lock_res *lockres, 383 struct ocfs2_lock_res *lockres,
384 int new_level, 384 int new_level,
385 int lvb, 385 int lvb,
386 unsigned int generation); 386 unsigned int generation);
387 static int ocfs2_prepare_cancel_convert(struct ocfs2_super *osb, 387 static int ocfs2_prepare_cancel_convert(struct ocfs2_super *osb,
388 struct ocfs2_lock_res *lockres); 388 struct ocfs2_lock_res *lockres);
389 static int ocfs2_cancel_convert(struct ocfs2_super *osb, 389 static int ocfs2_cancel_convert(struct ocfs2_super *osb,
390 struct ocfs2_lock_res *lockres); 390 struct ocfs2_lock_res *lockres);
391 391
392 392
393 static void ocfs2_build_lock_name(enum ocfs2_lock_type type, 393 static void ocfs2_build_lock_name(enum ocfs2_lock_type type,
394 u64 blkno, 394 u64 blkno,
395 u32 generation, 395 u32 generation,
396 char *name) 396 char *name)
397 { 397 {
398 int len; 398 int len;
399 399
400 mlog_entry_void(); 400 mlog_entry_void();
401 401
402 BUG_ON(type >= OCFS2_NUM_LOCK_TYPES); 402 BUG_ON(type >= OCFS2_NUM_LOCK_TYPES);
403 403
404 len = snprintf(name, OCFS2_LOCK_ID_MAX_LEN, "%c%s%016llx%08x", 404 len = snprintf(name, OCFS2_LOCK_ID_MAX_LEN, "%c%s%016llx%08x",
405 ocfs2_lock_type_char(type), OCFS2_LOCK_ID_PAD, 405 ocfs2_lock_type_char(type), OCFS2_LOCK_ID_PAD,
406 (long long)blkno, generation); 406 (long long)blkno, generation);
407 407
408 BUG_ON(len != (OCFS2_LOCK_ID_MAX_LEN - 1)); 408 BUG_ON(len != (OCFS2_LOCK_ID_MAX_LEN - 1));
409 409
410 mlog(0, "built lock resource with name: %s\n", name); 410 mlog(0, "built lock resource with name: %s\n", name);
411 411
412 mlog_exit_void(); 412 mlog_exit_void();
413 } 413 }
414 414
415 static DEFINE_SPINLOCK(ocfs2_dlm_tracking_lock); 415 static DEFINE_SPINLOCK(ocfs2_dlm_tracking_lock);
416 416
417 static void ocfs2_add_lockres_tracking(struct ocfs2_lock_res *res, 417 static void ocfs2_add_lockres_tracking(struct ocfs2_lock_res *res,
418 struct ocfs2_dlm_debug *dlm_debug) 418 struct ocfs2_dlm_debug *dlm_debug)
419 { 419 {
420 mlog(0, "Add tracking for lockres %s\n", res->l_name); 420 mlog(0, "Add tracking for lockres %s\n", res->l_name);
421 421
422 spin_lock(&ocfs2_dlm_tracking_lock); 422 spin_lock(&ocfs2_dlm_tracking_lock);
423 list_add(&res->l_debug_list, &dlm_debug->d_lockres_tracking); 423 list_add(&res->l_debug_list, &dlm_debug->d_lockres_tracking);
424 spin_unlock(&ocfs2_dlm_tracking_lock); 424 spin_unlock(&ocfs2_dlm_tracking_lock);
425 } 425 }
426 426
427 static void ocfs2_remove_lockres_tracking(struct ocfs2_lock_res *res) 427 static void ocfs2_remove_lockres_tracking(struct ocfs2_lock_res *res)
428 { 428 {
429 spin_lock(&ocfs2_dlm_tracking_lock); 429 spin_lock(&ocfs2_dlm_tracking_lock);
430 if (!list_empty(&res->l_debug_list)) 430 if (!list_empty(&res->l_debug_list))
431 list_del_init(&res->l_debug_list); 431 list_del_init(&res->l_debug_list);
432 spin_unlock(&ocfs2_dlm_tracking_lock); 432 spin_unlock(&ocfs2_dlm_tracking_lock);
433 } 433 }
434 434
435 #ifdef CONFIG_OCFS2_FS_STATS 435 #ifdef CONFIG_OCFS2_FS_STATS
436 static void ocfs2_init_lock_stats(struct ocfs2_lock_res *res) 436 static void ocfs2_init_lock_stats(struct ocfs2_lock_res *res)
437 { 437 {
438 res->l_lock_num_prmode = 0;
439 res->l_lock_num_prmode_failed = 0;
440 res->l_lock_total_prmode = 0;
441 res->l_lock_max_prmode = 0;
442 res->l_lock_num_exmode = 0;
443 res->l_lock_num_exmode_failed = 0;
444 res->l_lock_total_exmode = 0;
445 res->l_lock_max_exmode = 0;
446 res->l_lock_refresh = 0; 438 res->l_lock_refresh = 0;
439 memset(&res->l_lock_prmode, 0, sizeof(struct ocfs2_lock_stats));
440 memset(&res->l_lock_exmode, 0, sizeof(struct ocfs2_lock_stats));
447 } 441 }
448 442
449 static void ocfs2_update_lock_stats(struct ocfs2_lock_res *res, int level, 443 static void ocfs2_update_lock_stats(struct ocfs2_lock_res *res, int level,
450 struct ocfs2_mask_waiter *mw, int ret) 444 struct ocfs2_mask_waiter *mw, int ret)
451 { 445 {
452 unsigned long long *num, *sum; 446 u32 usec;
453 unsigned int *max, *failed; 447 ktime_t kt;
454 struct timespec ts = current_kernel_time(); 448 struct ocfs2_lock_stats *stats;
455 unsigned long long time = timespec_to_ns(&ts) - mw->mw_lock_start;
456 449
457 if (level == LKM_PRMODE) { 450 if (level == LKM_PRMODE)
458 num = &res->l_lock_num_prmode; 451 stats = &res->l_lock_prmode;
459 sum = &res->l_lock_total_prmode; 452 else if (level == LKM_EXMODE)
460 max = &res->l_lock_max_prmode; 453 stats = &res->l_lock_exmode;
461 failed = &res->l_lock_num_prmode_failed; 454 else
462 } else if (level == LKM_EXMODE) {
463 num = &res->l_lock_num_exmode;
464 sum = &res->l_lock_total_exmode;
465 max = &res->l_lock_max_exmode;
466 failed = &res->l_lock_num_exmode_failed;
467 } else
468 return; 455 return;
469 456
470 (*num)++; 457 kt = ktime_sub(ktime_get(), mw->mw_lock_start);
471 (*sum) += time; 458 usec = ktime_to_us(kt);
472 if (time > *max) 459
473 *max = time; 460 stats->ls_gets++;
461 stats->ls_total += ktime_to_ns(kt);
462 /* overflow */
463 if (unlikely(stats->ls_gets) == 0) {
464 stats->ls_gets++;
465 stats->ls_total = ktime_to_ns(kt);
466 }
467
468 if (stats->ls_max < usec)
469 stats->ls_max = usec;
470
474 if (ret) 471 if (ret)
475 (*failed)++; 472 stats->ls_fail++;
476 } 473 }
477 474
478 static inline void ocfs2_track_lock_refresh(struct ocfs2_lock_res *lockres) 475 static inline void ocfs2_track_lock_refresh(struct ocfs2_lock_res *lockres)
479 { 476 {
480 lockres->l_lock_refresh++; 477 lockres->l_lock_refresh++;
481 } 478 }
482 479
483 static inline void ocfs2_init_start_time(struct ocfs2_mask_waiter *mw) 480 static inline void ocfs2_init_start_time(struct ocfs2_mask_waiter *mw)
484 { 481 {
485 struct timespec ts = current_kernel_time(); 482 mw->mw_lock_start = ktime_get();
486 mw->mw_lock_start = timespec_to_ns(&ts);
487 } 483 }
488 #else 484 #else
489 static inline void ocfs2_init_lock_stats(struct ocfs2_lock_res *res) 485 static inline void ocfs2_init_lock_stats(struct ocfs2_lock_res *res)
490 { 486 {
491 } 487 }
492 static inline void ocfs2_update_lock_stats(struct ocfs2_lock_res *res, 488 static inline void ocfs2_update_lock_stats(struct ocfs2_lock_res *res,
493 int level, struct ocfs2_mask_waiter *mw, int ret) 489 int level, struct ocfs2_mask_waiter *mw, int ret)
494 { 490 {
495 } 491 }
496 static inline void ocfs2_track_lock_refresh(struct ocfs2_lock_res *lockres) 492 static inline void ocfs2_track_lock_refresh(struct ocfs2_lock_res *lockres)
497 { 493 {
498 } 494 }
499 static inline void ocfs2_init_start_time(struct ocfs2_mask_waiter *mw) 495 static inline void ocfs2_init_start_time(struct ocfs2_mask_waiter *mw)
500 { 496 {
501 } 497 }
502 #endif 498 #endif
503 499
504 static void ocfs2_lock_res_init_common(struct ocfs2_super *osb, 500 static void ocfs2_lock_res_init_common(struct ocfs2_super *osb,
505 struct ocfs2_lock_res *res, 501 struct ocfs2_lock_res *res,
506 enum ocfs2_lock_type type, 502 enum ocfs2_lock_type type,
507 struct ocfs2_lock_res_ops *ops, 503 struct ocfs2_lock_res_ops *ops,
508 void *priv) 504 void *priv)
509 { 505 {
510 res->l_type = type; 506 res->l_type = type;
511 res->l_ops = ops; 507 res->l_ops = ops;
512 res->l_priv = priv; 508 res->l_priv = priv;
513 509
514 res->l_level = DLM_LOCK_IV; 510 res->l_level = DLM_LOCK_IV;
515 res->l_requested = DLM_LOCK_IV; 511 res->l_requested = DLM_LOCK_IV;
516 res->l_blocking = DLM_LOCK_IV; 512 res->l_blocking = DLM_LOCK_IV;
517 res->l_action = OCFS2_AST_INVALID; 513 res->l_action = OCFS2_AST_INVALID;
518 res->l_unlock_action = OCFS2_UNLOCK_INVALID; 514 res->l_unlock_action = OCFS2_UNLOCK_INVALID;
519 515
520 res->l_flags = OCFS2_LOCK_INITIALIZED; 516 res->l_flags = OCFS2_LOCK_INITIALIZED;
521 517
522 ocfs2_add_lockres_tracking(res, osb->osb_dlm_debug); 518 ocfs2_add_lockres_tracking(res, osb->osb_dlm_debug);
523 519
524 ocfs2_init_lock_stats(res); 520 ocfs2_init_lock_stats(res);
525 #ifdef CONFIG_DEBUG_LOCK_ALLOC 521 #ifdef CONFIG_DEBUG_LOCK_ALLOC
526 if (type != OCFS2_LOCK_TYPE_OPEN) 522 if (type != OCFS2_LOCK_TYPE_OPEN)
527 lockdep_init_map(&res->l_lockdep_map, ocfs2_lock_type_strings[type], 523 lockdep_init_map(&res->l_lockdep_map, ocfs2_lock_type_strings[type],
528 &lockdep_keys[type], 0); 524 &lockdep_keys[type], 0);
529 else 525 else
530 res->l_lockdep_map.key = NULL; 526 res->l_lockdep_map.key = NULL;
531 #endif 527 #endif
532 } 528 }
533 529
534 void ocfs2_lock_res_init_once(struct ocfs2_lock_res *res) 530 void ocfs2_lock_res_init_once(struct ocfs2_lock_res *res)
535 { 531 {
536 /* This also clears out the lock status block */ 532 /* This also clears out the lock status block */
537 memset(res, 0, sizeof(struct ocfs2_lock_res)); 533 memset(res, 0, sizeof(struct ocfs2_lock_res));
538 spin_lock_init(&res->l_lock); 534 spin_lock_init(&res->l_lock);
539 init_waitqueue_head(&res->l_event); 535 init_waitqueue_head(&res->l_event);
540 INIT_LIST_HEAD(&res->l_blocked_list); 536 INIT_LIST_HEAD(&res->l_blocked_list);
541 INIT_LIST_HEAD(&res->l_mask_waiters); 537 INIT_LIST_HEAD(&res->l_mask_waiters);
542 } 538 }
543 539
544 void ocfs2_inode_lock_res_init(struct ocfs2_lock_res *res, 540 void ocfs2_inode_lock_res_init(struct ocfs2_lock_res *res,
545 enum ocfs2_lock_type type, 541 enum ocfs2_lock_type type,
546 unsigned int generation, 542 unsigned int generation,
547 struct inode *inode) 543 struct inode *inode)
548 { 544 {
549 struct ocfs2_lock_res_ops *ops; 545 struct ocfs2_lock_res_ops *ops;
550 546
551 switch(type) { 547 switch(type) {
552 case OCFS2_LOCK_TYPE_RW: 548 case OCFS2_LOCK_TYPE_RW:
553 ops = &ocfs2_inode_rw_lops; 549 ops = &ocfs2_inode_rw_lops;
554 break; 550 break;
555 case OCFS2_LOCK_TYPE_META: 551 case OCFS2_LOCK_TYPE_META:
556 ops = &ocfs2_inode_inode_lops; 552 ops = &ocfs2_inode_inode_lops;
557 break; 553 break;
558 case OCFS2_LOCK_TYPE_OPEN: 554 case OCFS2_LOCK_TYPE_OPEN:
559 ops = &ocfs2_inode_open_lops; 555 ops = &ocfs2_inode_open_lops;
560 break; 556 break;
561 default: 557 default:
562 mlog_bug_on_msg(1, "type: %d\n", type); 558 mlog_bug_on_msg(1, "type: %d\n", type);
563 ops = NULL; /* thanks, gcc */ 559 ops = NULL; /* thanks, gcc */
564 break; 560 break;
565 }; 561 };
566 562
567 ocfs2_build_lock_name(type, OCFS2_I(inode)->ip_blkno, 563 ocfs2_build_lock_name(type, OCFS2_I(inode)->ip_blkno,
568 generation, res->l_name); 564 generation, res->l_name);
569 ocfs2_lock_res_init_common(OCFS2_SB(inode->i_sb), res, type, ops, inode); 565 ocfs2_lock_res_init_common(OCFS2_SB(inode->i_sb), res, type, ops, inode);
570 } 566 }
571 567
572 static struct ocfs2_super *ocfs2_get_inode_osb(struct ocfs2_lock_res *lockres) 568 static struct ocfs2_super *ocfs2_get_inode_osb(struct ocfs2_lock_res *lockres)
573 { 569 {
574 struct inode *inode = ocfs2_lock_res_inode(lockres); 570 struct inode *inode = ocfs2_lock_res_inode(lockres);
575 571
576 return OCFS2_SB(inode->i_sb); 572 return OCFS2_SB(inode->i_sb);
577 } 573 }
578 574
579 static struct ocfs2_super *ocfs2_get_qinfo_osb(struct ocfs2_lock_res *lockres) 575 static struct ocfs2_super *ocfs2_get_qinfo_osb(struct ocfs2_lock_res *lockres)
580 { 576 {
581 struct ocfs2_mem_dqinfo *info = lockres->l_priv; 577 struct ocfs2_mem_dqinfo *info = lockres->l_priv;
582 578
583 return OCFS2_SB(info->dqi_gi.dqi_sb); 579 return OCFS2_SB(info->dqi_gi.dqi_sb);
584 } 580 }
585 581
586 static struct ocfs2_super *ocfs2_get_file_osb(struct ocfs2_lock_res *lockres) 582 static struct ocfs2_super *ocfs2_get_file_osb(struct ocfs2_lock_res *lockres)
587 { 583 {
588 struct ocfs2_file_private *fp = lockres->l_priv; 584 struct ocfs2_file_private *fp = lockres->l_priv;
589 585
590 return OCFS2_SB(fp->fp_file->f_mapping->host->i_sb); 586 return OCFS2_SB(fp->fp_file->f_mapping->host->i_sb);
591 } 587 }
592 588
593 static __u64 ocfs2_get_dentry_lock_ino(struct ocfs2_lock_res *lockres) 589 static __u64 ocfs2_get_dentry_lock_ino(struct ocfs2_lock_res *lockres)
594 { 590 {
595 __be64 inode_blkno_be; 591 __be64 inode_blkno_be;
596 592
597 memcpy(&inode_blkno_be, &lockres->l_name[OCFS2_DENTRY_LOCK_INO_START], 593 memcpy(&inode_blkno_be, &lockres->l_name[OCFS2_DENTRY_LOCK_INO_START],
598 sizeof(__be64)); 594 sizeof(__be64));
599 595
600 return be64_to_cpu(inode_blkno_be); 596 return be64_to_cpu(inode_blkno_be);
601 } 597 }
602 598
603 static struct ocfs2_super *ocfs2_get_dentry_osb(struct ocfs2_lock_res *lockres) 599 static struct ocfs2_super *ocfs2_get_dentry_osb(struct ocfs2_lock_res *lockres)
604 { 600 {
605 struct ocfs2_dentry_lock *dl = lockres->l_priv; 601 struct ocfs2_dentry_lock *dl = lockres->l_priv;
606 602
607 return OCFS2_SB(dl->dl_inode->i_sb); 603 return OCFS2_SB(dl->dl_inode->i_sb);
608 } 604 }
609 605
610 void ocfs2_dentry_lock_res_init(struct ocfs2_dentry_lock *dl, 606 void ocfs2_dentry_lock_res_init(struct ocfs2_dentry_lock *dl,
611 u64 parent, struct inode *inode) 607 u64 parent, struct inode *inode)
612 { 608 {
613 int len; 609 int len;
614 u64 inode_blkno = OCFS2_I(inode)->ip_blkno; 610 u64 inode_blkno = OCFS2_I(inode)->ip_blkno;
615 __be64 inode_blkno_be = cpu_to_be64(inode_blkno); 611 __be64 inode_blkno_be = cpu_to_be64(inode_blkno);
616 struct ocfs2_lock_res *lockres = &dl->dl_lockres; 612 struct ocfs2_lock_res *lockres = &dl->dl_lockres;
617 613
618 ocfs2_lock_res_init_once(lockres); 614 ocfs2_lock_res_init_once(lockres);
619 615
620 /* 616 /*
621 * Unfortunately, the standard lock naming scheme won't work 617 * Unfortunately, the standard lock naming scheme won't work
622 * here because we have two 16 byte values to use. Instead, 618 * here because we have two 16 byte values to use. Instead,
623 * we'll stuff the inode number as a binary value. We still 619 * we'll stuff the inode number as a binary value. We still
624 * want error prints to show something without garbling the 620 * want error prints to show something without garbling the
625 * display, so drop a null byte in there before the inode 621 * display, so drop a null byte in there before the inode
626 * number. A future version of OCFS2 will likely use all 622 * number. A future version of OCFS2 will likely use all
627 * binary lock names. The stringified names have been a 623 * binary lock names. The stringified names have been a
628 * tremendous aid in debugging, but now that the debugfs 624 * tremendous aid in debugging, but now that the debugfs
629 * interface exists, we can mangle things there if need be. 625 * interface exists, we can mangle things there if need be.
630 * 626 *
631 * NOTE: We also drop the standard "pad" value (the total lock 627 * NOTE: We also drop the standard "pad" value (the total lock
632 * name size stays the same though - the last part is all 628 * name size stays the same though - the last part is all
633 * zeros due to the memset in ocfs2_lock_res_init_once() 629 * zeros due to the memset in ocfs2_lock_res_init_once()
634 */ 630 */
635 len = snprintf(lockres->l_name, OCFS2_DENTRY_LOCK_INO_START, 631 len = snprintf(lockres->l_name, OCFS2_DENTRY_LOCK_INO_START,
636 "%c%016llx", 632 "%c%016llx",
637 ocfs2_lock_type_char(OCFS2_LOCK_TYPE_DENTRY), 633 ocfs2_lock_type_char(OCFS2_LOCK_TYPE_DENTRY),
638 (long long)parent); 634 (long long)parent);
639 635
640 BUG_ON(len != (OCFS2_DENTRY_LOCK_INO_START - 1)); 636 BUG_ON(len != (OCFS2_DENTRY_LOCK_INO_START - 1));
641 637
642 memcpy(&lockres->l_name[OCFS2_DENTRY_LOCK_INO_START], &inode_blkno_be, 638 memcpy(&lockres->l_name[OCFS2_DENTRY_LOCK_INO_START], &inode_blkno_be,
643 sizeof(__be64)); 639 sizeof(__be64));
644 640
645 ocfs2_lock_res_init_common(OCFS2_SB(inode->i_sb), lockres, 641 ocfs2_lock_res_init_common(OCFS2_SB(inode->i_sb), lockres,
646 OCFS2_LOCK_TYPE_DENTRY, &ocfs2_dentry_lops, 642 OCFS2_LOCK_TYPE_DENTRY, &ocfs2_dentry_lops,
647 dl); 643 dl);
648 } 644 }
649 645
650 static void ocfs2_super_lock_res_init(struct ocfs2_lock_res *res, 646 static void ocfs2_super_lock_res_init(struct ocfs2_lock_res *res,
651 struct ocfs2_super *osb) 647 struct ocfs2_super *osb)
652 { 648 {
653 /* Superblock lockres doesn't come from a slab so we call init 649 /* Superblock lockres doesn't come from a slab so we call init
654 * once on it manually. */ 650 * once on it manually. */
655 ocfs2_lock_res_init_once(res); 651 ocfs2_lock_res_init_once(res);
656 ocfs2_build_lock_name(OCFS2_LOCK_TYPE_SUPER, OCFS2_SUPER_BLOCK_BLKNO, 652 ocfs2_build_lock_name(OCFS2_LOCK_TYPE_SUPER, OCFS2_SUPER_BLOCK_BLKNO,
657 0, res->l_name); 653 0, res->l_name);
658 ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_SUPER, 654 ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_SUPER,
659 &ocfs2_super_lops, osb); 655 &ocfs2_super_lops, osb);
660 } 656 }
661 657
662 static void ocfs2_rename_lock_res_init(struct ocfs2_lock_res *res, 658 static void ocfs2_rename_lock_res_init(struct ocfs2_lock_res *res,
663 struct ocfs2_super *osb) 659 struct ocfs2_super *osb)
664 { 660 {
665 /* Rename lockres doesn't come from a slab so we call init 661 /* Rename lockres doesn't come from a slab so we call init
666 * once on it manually. */ 662 * once on it manually. */
667 ocfs2_lock_res_init_once(res); 663 ocfs2_lock_res_init_once(res);
668 ocfs2_build_lock_name(OCFS2_LOCK_TYPE_RENAME, 0, 0, res->l_name); 664 ocfs2_build_lock_name(OCFS2_LOCK_TYPE_RENAME, 0, 0, res->l_name);
669 ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_RENAME, 665 ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_RENAME,
670 &ocfs2_rename_lops, osb); 666 &ocfs2_rename_lops, osb);
671 } 667 }
672 668
673 static void ocfs2_nfs_sync_lock_res_init(struct ocfs2_lock_res *res, 669 static void ocfs2_nfs_sync_lock_res_init(struct ocfs2_lock_res *res,
674 struct ocfs2_super *osb) 670 struct ocfs2_super *osb)
675 { 671 {
676 /* nfs_sync lockres doesn't come from a slab so we call init 672 /* nfs_sync lockres doesn't come from a slab so we call init
677 * once on it manually. */ 673 * once on it manually. */
678 ocfs2_lock_res_init_once(res); 674 ocfs2_lock_res_init_once(res);
679 ocfs2_build_lock_name(OCFS2_LOCK_TYPE_NFS_SYNC, 0, 0, res->l_name); 675 ocfs2_build_lock_name(OCFS2_LOCK_TYPE_NFS_SYNC, 0, 0, res->l_name);
680 ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_NFS_SYNC, 676 ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_NFS_SYNC,
681 &ocfs2_nfs_sync_lops, osb); 677 &ocfs2_nfs_sync_lops, osb);
682 } 678 }
683 679
684 static void ocfs2_orphan_scan_lock_res_init(struct ocfs2_lock_res *res, 680 static void ocfs2_orphan_scan_lock_res_init(struct ocfs2_lock_res *res,
685 struct ocfs2_super *osb) 681 struct ocfs2_super *osb)
686 { 682 {
687 ocfs2_lock_res_init_once(res); 683 ocfs2_lock_res_init_once(res);
688 ocfs2_build_lock_name(OCFS2_LOCK_TYPE_ORPHAN_SCAN, 0, 0, res->l_name); 684 ocfs2_build_lock_name(OCFS2_LOCK_TYPE_ORPHAN_SCAN, 0, 0, res->l_name);
689 ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_ORPHAN_SCAN, 685 ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_ORPHAN_SCAN,
690 &ocfs2_orphan_scan_lops, osb); 686 &ocfs2_orphan_scan_lops, osb);
691 } 687 }
692 688
693 void ocfs2_file_lock_res_init(struct ocfs2_lock_res *lockres, 689 void ocfs2_file_lock_res_init(struct ocfs2_lock_res *lockres,
694 struct ocfs2_file_private *fp) 690 struct ocfs2_file_private *fp)
695 { 691 {
696 struct inode *inode = fp->fp_file->f_mapping->host; 692 struct inode *inode = fp->fp_file->f_mapping->host;
697 struct ocfs2_inode_info *oi = OCFS2_I(inode); 693 struct ocfs2_inode_info *oi = OCFS2_I(inode);
698 694
699 ocfs2_lock_res_init_once(lockres); 695 ocfs2_lock_res_init_once(lockres);
700 ocfs2_build_lock_name(OCFS2_LOCK_TYPE_FLOCK, oi->ip_blkno, 696 ocfs2_build_lock_name(OCFS2_LOCK_TYPE_FLOCK, oi->ip_blkno,
701 inode->i_generation, lockres->l_name); 697 inode->i_generation, lockres->l_name);
702 ocfs2_lock_res_init_common(OCFS2_SB(inode->i_sb), lockres, 698 ocfs2_lock_res_init_common(OCFS2_SB(inode->i_sb), lockres,
703 OCFS2_LOCK_TYPE_FLOCK, &ocfs2_flock_lops, 699 OCFS2_LOCK_TYPE_FLOCK, &ocfs2_flock_lops,
704 fp); 700 fp);
705 lockres->l_flags |= OCFS2_LOCK_NOCACHE; 701 lockres->l_flags |= OCFS2_LOCK_NOCACHE;
706 } 702 }
707 703
708 void ocfs2_qinfo_lock_res_init(struct ocfs2_lock_res *lockres, 704 void ocfs2_qinfo_lock_res_init(struct ocfs2_lock_res *lockres,
709 struct ocfs2_mem_dqinfo *info) 705 struct ocfs2_mem_dqinfo *info)
710 { 706 {
711 ocfs2_lock_res_init_once(lockres); 707 ocfs2_lock_res_init_once(lockres);
712 ocfs2_build_lock_name(OCFS2_LOCK_TYPE_QINFO, info->dqi_gi.dqi_type, 708 ocfs2_build_lock_name(OCFS2_LOCK_TYPE_QINFO, info->dqi_gi.dqi_type,
713 0, lockres->l_name); 709 0, lockres->l_name);
714 ocfs2_lock_res_init_common(OCFS2_SB(info->dqi_gi.dqi_sb), lockres, 710 ocfs2_lock_res_init_common(OCFS2_SB(info->dqi_gi.dqi_sb), lockres,
715 OCFS2_LOCK_TYPE_QINFO, &ocfs2_qinfo_lops, 711 OCFS2_LOCK_TYPE_QINFO, &ocfs2_qinfo_lops,
716 info); 712 info);
717 } 713 }
718 714
719 void ocfs2_refcount_lock_res_init(struct ocfs2_lock_res *lockres, 715 void ocfs2_refcount_lock_res_init(struct ocfs2_lock_res *lockres,
720 struct ocfs2_super *osb, u64 ref_blkno, 716 struct ocfs2_super *osb, u64 ref_blkno,
721 unsigned int generation) 717 unsigned int generation)
722 { 718 {
723 ocfs2_lock_res_init_once(lockres); 719 ocfs2_lock_res_init_once(lockres);
724 ocfs2_build_lock_name(OCFS2_LOCK_TYPE_REFCOUNT, ref_blkno, 720 ocfs2_build_lock_name(OCFS2_LOCK_TYPE_REFCOUNT, ref_blkno,
725 generation, lockres->l_name); 721 generation, lockres->l_name);
726 ocfs2_lock_res_init_common(osb, lockres, OCFS2_LOCK_TYPE_REFCOUNT, 722 ocfs2_lock_res_init_common(osb, lockres, OCFS2_LOCK_TYPE_REFCOUNT,
727 &ocfs2_refcount_block_lops, osb); 723 &ocfs2_refcount_block_lops, osb);
728 } 724 }
729 725
730 void ocfs2_lock_res_free(struct ocfs2_lock_res *res) 726 void ocfs2_lock_res_free(struct ocfs2_lock_res *res)
731 { 727 {
732 mlog_entry_void(); 728 mlog_entry_void();
733 729
734 if (!(res->l_flags & OCFS2_LOCK_INITIALIZED)) 730 if (!(res->l_flags & OCFS2_LOCK_INITIALIZED))
735 return; 731 return;
736 732
737 ocfs2_remove_lockres_tracking(res); 733 ocfs2_remove_lockres_tracking(res);
738 734
739 mlog_bug_on_msg(!list_empty(&res->l_blocked_list), 735 mlog_bug_on_msg(!list_empty(&res->l_blocked_list),
740 "Lockres %s is on the blocked list\n", 736 "Lockres %s is on the blocked list\n",
741 res->l_name); 737 res->l_name);
742 mlog_bug_on_msg(!list_empty(&res->l_mask_waiters), 738 mlog_bug_on_msg(!list_empty(&res->l_mask_waiters),
743 "Lockres %s has mask waiters pending\n", 739 "Lockres %s has mask waiters pending\n",
744 res->l_name); 740 res->l_name);
745 mlog_bug_on_msg(spin_is_locked(&res->l_lock), 741 mlog_bug_on_msg(spin_is_locked(&res->l_lock),
746 "Lockres %s is locked\n", 742 "Lockres %s is locked\n",
747 res->l_name); 743 res->l_name);
748 mlog_bug_on_msg(res->l_ro_holders, 744 mlog_bug_on_msg(res->l_ro_holders,
749 "Lockres %s has %u ro holders\n", 745 "Lockres %s has %u ro holders\n",
750 res->l_name, res->l_ro_holders); 746 res->l_name, res->l_ro_holders);
751 mlog_bug_on_msg(res->l_ex_holders, 747 mlog_bug_on_msg(res->l_ex_holders,
752 "Lockres %s has %u ex holders\n", 748 "Lockres %s has %u ex holders\n",
753 res->l_name, res->l_ex_holders); 749 res->l_name, res->l_ex_holders);
754 750
755 /* Need to clear out the lock status block for the dlm */ 751 /* Need to clear out the lock status block for the dlm */
756 memset(&res->l_lksb, 0, sizeof(res->l_lksb)); 752 memset(&res->l_lksb, 0, sizeof(res->l_lksb));
757 753
758 res->l_flags = 0UL; 754 res->l_flags = 0UL;
759 mlog_exit_void(); 755 mlog_exit_void();
760 } 756 }
761 757
762 static inline void ocfs2_inc_holders(struct ocfs2_lock_res *lockres, 758 static inline void ocfs2_inc_holders(struct ocfs2_lock_res *lockres,
763 int level) 759 int level)
764 { 760 {
765 mlog_entry_void(); 761 mlog_entry_void();
766 762
767 BUG_ON(!lockres); 763 BUG_ON(!lockres);
768 764
769 switch(level) { 765 switch(level) {
770 case DLM_LOCK_EX: 766 case DLM_LOCK_EX:
771 lockres->l_ex_holders++; 767 lockres->l_ex_holders++;
772 break; 768 break;
773 case DLM_LOCK_PR: 769 case DLM_LOCK_PR:
774 lockres->l_ro_holders++; 770 lockres->l_ro_holders++;
775 break; 771 break;
776 default: 772 default:
777 BUG(); 773 BUG();
778 } 774 }
779 775
780 mlog_exit_void(); 776 mlog_exit_void();
781 } 777 }
782 778
783 static inline void ocfs2_dec_holders(struct ocfs2_lock_res *lockres, 779 static inline void ocfs2_dec_holders(struct ocfs2_lock_res *lockres,
784 int level) 780 int level)
785 { 781 {
786 mlog_entry_void(); 782 mlog_entry_void();
787 783
788 BUG_ON(!lockres); 784 BUG_ON(!lockres);
789 785
790 switch(level) { 786 switch(level) {
791 case DLM_LOCK_EX: 787 case DLM_LOCK_EX:
792 BUG_ON(!lockres->l_ex_holders); 788 BUG_ON(!lockres->l_ex_holders);
793 lockres->l_ex_holders--; 789 lockres->l_ex_holders--;
794 break; 790 break;
795 case DLM_LOCK_PR: 791 case DLM_LOCK_PR:
796 BUG_ON(!lockres->l_ro_holders); 792 BUG_ON(!lockres->l_ro_holders);
797 lockres->l_ro_holders--; 793 lockres->l_ro_holders--;
798 break; 794 break;
799 default: 795 default:
800 BUG(); 796 BUG();
801 } 797 }
802 mlog_exit_void(); 798 mlog_exit_void();
803 } 799 }
804 800
805 /* WARNING: This function lives in a world where the only three lock 801 /* WARNING: This function lives in a world where the only three lock
806 * levels are EX, PR, and NL. It *will* have to be adjusted when more 802 * levels are EX, PR, and NL. It *will* have to be adjusted when more
807 * lock types are added. */ 803 * lock types are added. */
808 static inline int ocfs2_highest_compat_lock_level(int level) 804 static inline int ocfs2_highest_compat_lock_level(int level)
809 { 805 {
810 int new_level = DLM_LOCK_EX; 806 int new_level = DLM_LOCK_EX;
811 807
812 if (level == DLM_LOCK_EX) 808 if (level == DLM_LOCK_EX)
813 new_level = DLM_LOCK_NL; 809 new_level = DLM_LOCK_NL;
814 else if (level == DLM_LOCK_PR) 810 else if (level == DLM_LOCK_PR)
815 new_level = DLM_LOCK_PR; 811 new_level = DLM_LOCK_PR;
816 return new_level; 812 return new_level;
817 } 813 }
818 814
819 static void lockres_set_flags(struct ocfs2_lock_res *lockres, 815 static void lockres_set_flags(struct ocfs2_lock_res *lockres,
820 unsigned long newflags) 816 unsigned long newflags)
821 { 817 {
822 struct ocfs2_mask_waiter *mw, *tmp; 818 struct ocfs2_mask_waiter *mw, *tmp;
823 819
824 assert_spin_locked(&lockres->l_lock); 820 assert_spin_locked(&lockres->l_lock);
825 821
826 lockres->l_flags = newflags; 822 lockres->l_flags = newflags;
827 823
828 list_for_each_entry_safe(mw, tmp, &lockres->l_mask_waiters, mw_item) { 824 list_for_each_entry_safe(mw, tmp, &lockres->l_mask_waiters, mw_item) {
829 if ((lockres->l_flags & mw->mw_mask) != mw->mw_goal) 825 if ((lockres->l_flags & mw->mw_mask) != mw->mw_goal)
830 continue; 826 continue;
831 827
832 list_del_init(&mw->mw_item); 828 list_del_init(&mw->mw_item);
833 mw->mw_status = 0; 829 mw->mw_status = 0;
834 complete(&mw->mw_complete); 830 complete(&mw->mw_complete);
835 } 831 }
836 } 832 }
837 static void lockres_or_flags(struct ocfs2_lock_res *lockres, unsigned long or) 833 static void lockres_or_flags(struct ocfs2_lock_res *lockres, unsigned long or)
838 { 834 {
839 lockres_set_flags(lockres, lockres->l_flags | or); 835 lockres_set_flags(lockres, lockres->l_flags | or);
840 } 836 }
841 static void lockres_clear_flags(struct ocfs2_lock_res *lockres, 837 static void lockres_clear_flags(struct ocfs2_lock_res *lockres,
842 unsigned long clear) 838 unsigned long clear)
843 { 839 {
844 lockres_set_flags(lockres, lockres->l_flags & ~clear); 840 lockres_set_flags(lockres, lockres->l_flags & ~clear);
845 } 841 }
846 842
847 static inline void ocfs2_generic_handle_downconvert_action(struct ocfs2_lock_res *lockres) 843 static inline void ocfs2_generic_handle_downconvert_action(struct ocfs2_lock_res *lockres)
848 { 844 {
849 mlog_entry_void(); 845 mlog_entry_void();
850 846
851 BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BUSY)); 847 BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BUSY));
852 BUG_ON(!(lockres->l_flags & OCFS2_LOCK_ATTACHED)); 848 BUG_ON(!(lockres->l_flags & OCFS2_LOCK_ATTACHED));
853 BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BLOCKED)); 849 BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BLOCKED));
854 BUG_ON(lockres->l_blocking <= DLM_LOCK_NL); 850 BUG_ON(lockres->l_blocking <= DLM_LOCK_NL);
855 851
856 lockres->l_level = lockres->l_requested; 852 lockres->l_level = lockres->l_requested;
857 if (lockres->l_level <= 853 if (lockres->l_level <=
858 ocfs2_highest_compat_lock_level(lockres->l_blocking)) { 854 ocfs2_highest_compat_lock_level(lockres->l_blocking)) {
859 lockres->l_blocking = DLM_LOCK_NL; 855 lockres->l_blocking = DLM_LOCK_NL;
860 lockres_clear_flags(lockres, OCFS2_LOCK_BLOCKED); 856 lockres_clear_flags(lockres, OCFS2_LOCK_BLOCKED);
861 } 857 }
862 lockres_clear_flags(lockres, OCFS2_LOCK_BUSY); 858 lockres_clear_flags(lockres, OCFS2_LOCK_BUSY);
863 859
864 mlog_exit_void(); 860 mlog_exit_void();
865 } 861 }
866 862
867 static inline void ocfs2_generic_handle_convert_action(struct ocfs2_lock_res *lockres) 863 static inline void ocfs2_generic_handle_convert_action(struct ocfs2_lock_res *lockres)
868 { 864 {
869 mlog_entry_void(); 865 mlog_entry_void();
870 866
871 BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BUSY)); 867 BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BUSY));
872 BUG_ON(!(lockres->l_flags & OCFS2_LOCK_ATTACHED)); 868 BUG_ON(!(lockres->l_flags & OCFS2_LOCK_ATTACHED));
873 869
874 /* Convert from RO to EX doesn't really need anything as our 870 /* Convert from RO to EX doesn't really need anything as our
875 * information is already up to data. Convert from NL to 871 * information is already up to data. Convert from NL to
876 * *anything* however should mark ourselves as needing an 872 * *anything* however should mark ourselves as needing an
877 * update */ 873 * update */
878 if (lockres->l_level == DLM_LOCK_NL && 874 if (lockres->l_level == DLM_LOCK_NL &&
879 lockres->l_ops->flags & LOCK_TYPE_REQUIRES_REFRESH) 875 lockres->l_ops->flags & LOCK_TYPE_REQUIRES_REFRESH)
880 lockres_or_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH); 876 lockres_or_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH);
881 877
882 lockres->l_level = lockres->l_requested; 878 lockres->l_level = lockres->l_requested;
883 879
884 /* 880 /*
885 * We set the OCFS2_LOCK_UPCONVERT_FINISHING flag before clearing 881 * We set the OCFS2_LOCK_UPCONVERT_FINISHING flag before clearing
886 * the OCFS2_LOCK_BUSY flag to prevent the dc thread from 882 * the OCFS2_LOCK_BUSY flag to prevent the dc thread from
887 * downconverting the lock before the upconvert has fully completed. 883 * downconverting the lock before the upconvert has fully completed.
888 */ 884 */
889 lockres_or_flags(lockres, OCFS2_LOCK_UPCONVERT_FINISHING); 885 lockres_or_flags(lockres, OCFS2_LOCK_UPCONVERT_FINISHING);
890 886
891 lockres_clear_flags(lockres, OCFS2_LOCK_BUSY); 887 lockres_clear_flags(lockres, OCFS2_LOCK_BUSY);
892 888
893 mlog_exit_void(); 889 mlog_exit_void();
894 } 890 }
895 891
896 static inline void ocfs2_generic_handle_attach_action(struct ocfs2_lock_res *lockres) 892 static inline void ocfs2_generic_handle_attach_action(struct ocfs2_lock_res *lockres)
897 { 893 {
898 mlog_entry_void(); 894 mlog_entry_void();
899 895
900 BUG_ON((!(lockres->l_flags & OCFS2_LOCK_BUSY))); 896 BUG_ON((!(lockres->l_flags & OCFS2_LOCK_BUSY)));
901 BUG_ON(lockres->l_flags & OCFS2_LOCK_ATTACHED); 897 BUG_ON(lockres->l_flags & OCFS2_LOCK_ATTACHED);
902 898
903 if (lockres->l_requested > DLM_LOCK_NL && 899 if (lockres->l_requested > DLM_LOCK_NL &&
904 !(lockres->l_flags & OCFS2_LOCK_LOCAL) && 900 !(lockres->l_flags & OCFS2_LOCK_LOCAL) &&
905 lockres->l_ops->flags & LOCK_TYPE_REQUIRES_REFRESH) 901 lockres->l_ops->flags & LOCK_TYPE_REQUIRES_REFRESH)
906 lockres_or_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH); 902 lockres_or_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH);
907 903
908 lockres->l_level = lockres->l_requested; 904 lockres->l_level = lockres->l_requested;
909 lockres_or_flags(lockres, OCFS2_LOCK_ATTACHED); 905 lockres_or_flags(lockres, OCFS2_LOCK_ATTACHED);
910 lockres_clear_flags(lockres, OCFS2_LOCK_BUSY); 906 lockres_clear_flags(lockres, OCFS2_LOCK_BUSY);
911 907
912 mlog_exit_void(); 908 mlog_exit_void();
913 } 909 }
914 910
915 static int ocfs2_generic_handle_bast(struct ocfs2_lock_res *lockres, 911 static int ocfs2_generic_handle_bast(struct ocfs2_lock_res *lockres,
916 int level) 912 int level)
917 { 913 {
918 int needs_downconvert = 0; 914 int needs_downconvert = 0;
919 mlog_entry_void(); 915 mlog_entry_void();
920 916
921 assert_spin_locked(&lockres->l_lock); 917 assert_spin_locked(&lockres->l_lock);
922 918
923 if (level > lockres->l_blocking) { 919 if (level > lockres->l_blocking) {
924 /* only schedule a downconvert if we haven't already scheduled 920 /* only schedule a downconvert if we haven't already scheduled
925 * one that goes low enough to satisfy the level we're 921 * one that goes low enough to satisfy the level we're
926 * blocking. this also catches the case where we get 922 * blocking. this also catches the case where we get
927 * duplicate BASTs */ 923 * duplicate BASTs */
928 if (ocfs2_highest_compat_lock_level(level) < 924 if (ocfs2_highest_compat_lock_level(level) <
929 ocfs2_highest_compat_lock_level(lockres->l_blocking)) 925 ocfs2_highest_compat_lock_level(lockres->l_blocking))
930 needs_downconvert = 1; 926 needs_downconvert = 1;
931 927
932 lockres->l_blocking = level; 928 lockres->l_blocking = level;
933 } 929 }
934 930
935 mlog(ML_BASTS, "lockres %s, block %d, level %d, l_block %d, dwn %d\n", 931 mlog(ML_BASTS, "lockres %s, block %d, level %d, l_block %d, dwn %d\n",
936 lockres->l_name, level, lockres->l_level, lockres->l_blocking, 932 lockres->l_name, level, lockres->l_level, lockres->l_blocking,
937 needs_downconvert); 933 needs_downconvert);
938 934
939 if (needs_downconvert) 935 if (needs_downconvert)
940 lockres_or_flags(lockres, OCFS2_LOCK_BLOCKED); 936 lockres_or_flags(lockres, OCFS2_LOCK_BLOCKED);
941 937
942 mlog_exit(needs_downconvert); 938 mlog_exit(needs_downconvert);
943 return needs_downconvert; 939 return needs_downconvert;
944 } 940 }
945 941
946 /* 942 /*
947 * OCFS2_LOCK_PENDING and l_pending_gen. 943 * OCFS2_LOCK_PENDING and l_pending_gen.
948 * 944 *
949 * Why does OCFS2_LOCK_PENDING exist? To close a race between setting 945 * Why does OCFS2_LOCK_PENDING exist? To close a race between setting
950 * OCFS2_LOCK_BUSY and calling ocfs2_dlm_lock(). See ocfs2_unblock_lock() 946 * OCFS2_LOCK_BUSY and calling ocfs2_dlm_lock(). See ocfs2_unblock_lock()
951 * for more details on the race. 947 * for more details on the race.
952 * 948 *
953 * OCFS2_LOCK_PENDING closes the race quite nicely. However, it introduces 949 * OCFS2_LOCK_PENDING closes the race quite nicely. However, it introduces
954 * a race on itself. In o2dlm, we can get the ast before ocfs2_dlm_lock() 950 * a race on itself. In o2dlm, we can get the ast before ocfs2_dlm_lock()
955 * returns. The ast clears OCFS2_LOCK_BUSY, and must therefore clear 951 * returns. The ast clears OCFS2_LOCK_BUSY, and must therefore clear
956 * OCFS2_LOCK_PENDING at the same time. When ocfs2_dlm_lock() returns, 952 * OCFS2_LOCK_PENDING at the same time. When ocfs2_dlm_lock() returns,
957 * the caller is going to try to clear PENDING again. If nothing else is 953 * the caller is going to try to clear PENDING again. If nothing else is
958 * happening, __lockres_clear_pending() sees PENDING is unset and does 954 * happening, __lockres_clear_pending() sees PENDING is unset and does
959 * nothing. 955 * nothing.
960 * 956 *
961 * But what if another path (eg downconvert thread) has just started a 957 * But what if another path (eg downconvert thread) has just started a
962 * new locking action? The other path has re-set PENDING. Our path 958 * new locking action? The other path has re-set PENDING. Our path
963 * cannot clear PENDING, because that will re-open the original race 959 * cannot clear PENDING, because that will re-open the original race
964 * window. 960 * window.
965 * 961 *
966 * [Example] 962 * [Example]
967 * 963 *
968 * ocfs2_meta_lock() 964 * ocfs2_meta_lock()
969 * ocfs2_cluster_lock() 965 * ocfs2_cluster_lock()
970 * set BUSY 966 * set BUSY
971 * set PENDING 967 * set PENDING
972 * drop l_lock 968 * drop l_lock
973 * ocfs2_dlm_lock() 969 * ocfs2_dlm_lock()
974 * ocfs2_locking_ast() ocfs2_downconvert_thread() 970 * ocfs2_locking_ast() ocfs2_downconvert_thread()
975 * clear PENDING ocfs2_unblock_lock() 971 * clear PENDING ocfs2_unblock_lock()
976 * take_l_lock 972 * take_l_lock
977 * !BUSY 973 * !BUSY
978 * ocfs2_prepare_downconvert() 974 * ocfs2_prepare_downconvert()
979 * set BUSY 975 * set BUSY
980 * set PENDING 976 * set PENDING
981 * drop l_lock 977 * drop l_lock
982 * take l_lock 978 * take l_lock
983 * clear PENDING 979 * clear PENDING
984 * drop l_lock 980 * drop l_lock
985 * <window> 981 * <window>
986 * ocfs2_dlm_lock() 982 * ocfs2_dlm_lock()
987 * 983 *
988 * So as you can see, we now have a window where l_lock is not held, 984 * So as you can see, we now have a window where l_lock is not held,
989 * PENDING is not set, and ocfs2_dlm_lock() has not been called. 985 * PENDING is not set, and ocfs2_dlm_lock() has not been called.
990 * 986 *
991 * The core problem is that ocfs2_cluster_lock() has cleared the PENDING 987 * The core problem is that ocfs2_cluster_lock() has cleared the PENDING
992 * set by ocfs2_prepare_downconvert(). That wasn't nice. 988 * set by ocfs2_prepare_downconvert(). That wasn't nice.
993 * 989 *
994 * To solve this we introduce l_pending_gen. A call to 990 * To solve this we introduce l_pending_gen. A call to
995 * lockres_clear_pending() will only do so when it is passed a generation 991 * lockres_clear_pending() will only do so when it is passed a generation
996 * number that matches the lockres. lockres_set_pending() will return the 992 * number that matches the lockres. lockres_set_pending() will return the
997 * current generation number. When ocfs2_cluster_lock() goes to clear 993 * current generation number. When ocfs2_cluster_lock() goes to clear
998 * PENDING, it passes the generation it got from set_pending(). In our 994 * PENDING, it passes the generation it got from set_pending(). In our
999 * example above, the generation numbers will *not* match. Thus, 995 * example above, the generation numbers will *not* match. Thus,
1000 * ocfs2_cluster_lock() will not clear the PENDING set by 996 * ocfs2_cluster_lock() will not clear the PENDING set by
1001 * ocfs2_prepare_downconvert(). 997 * ocfs2_prepare_downconvert().
1002 */ 998 */
1003 999
1004 /* Unlocked version for ocfs2_locking_ast() */ 1000 /* Unlocked version for ocfs2_locking_ast() */
1005 static void __lockres_clear_pending(struct ocfs2_lock_res *lockres, 1001 static void __lockres_clear_pending(struct ocfs2_lock_res *lockres,
1006 unsigned int generation, 1002 unsigned int generation,
1007 struct ocfs2_super *osb) 1003 struct ocfs2_super *osb)
1008 { 1004 {
1009 assert_spin_locked(&lockres->l_lock); 1005 assert_spin_locked(&lockres->l_lock);
1010 1006
1011 /* 1007 /*
1012 * The ast and locking functions can race us here. The winner 1008 * The ast and locking functions can race us here. The winner
1013 * will clear pending, the loser will not. 1009 * will clear pending, the loser will not.
1014 */ 1010 */
1015 if (!(lockres->l_flags & OCFS2_LOCK_PENDING) || 1011 if (!(lockres->l_flags & OCFS2_LOCK_PENDING) ||
1016 (lockres->l_pending_gen != generation)) 1012 (lockres->l_pending_gen != generation))
1017 return; 1013 return;
1018 1014
1019 lockres_clear_flags(lockres, OCFS2_LOCK_PENDING); 1015 lockres_clear_flags(lockres, OCFS2_LOCK_PENDING);
1020 lockres->l_pending_gen++; 1016 lockres->l_pending_gen++;
1021 1017
1022 /* 1018 /*
1023 * The downconvert thread may have skipped us because we 1019 * The downconvert thread may have skipped us because we
1024 * were PENDING. Wake it up. 1020 * were PENDING. Wake it up.
1025 */ 1021 */
1026 if (lockres->l_flags & OCFS2_LOCK_BLOCKED) 1022 if (lockres->l_flags & OCFS2_LOCK_BLOCKED)
1027 ocfs2_wake_downconvert_thread(osb); 1023 ocfs2_wake_downconvert_thread(osb);
1028 } 1024 }
1029 1025
1030 /* Locked version for callers of ocfs2_dlm_lock() */ 1026 /* Locked version for callers of ocfs2_dlm_lock() */
1031 static void lockres_clear_pending(struct ocfs2_lock_res *lockres, 1027 static void lockres_clear_pending(struct ocfs2_lock_res *lockres,
1032 unsigned int generation, 1028 unsigned int generation,
1033 struct ocfs2_super *osb) 1029 struct ocfs2_super *osb)
1034 { 1030 {
1035 unsigned long flags; 1031 unsigned long flags;
1036 1032
1037 spin_lock_irqsave(&lockres->l_lock, flags); 1033 spin_lock_irqsave(&lockres->l_lock, flags);
1038 __lockres_clear_pending(lockres, generation, osb); 1034 __lockres_clear_pending(lockres, generation, osb);
1039 spin_unlock_irqrestore(&lockres->l_lock, flags); 1035 spin_unlock_irqrestore(&lockres->l_lock, flags);
1040 } 1036 }
1041 1037
1042 static unsigned int lockres_set_pending(struct ocfs2_lock_res *lockres) 1038 static unsigned int lockres_set_pending(struct ocfs2_lock_res *lockres)
1043 { 1039 {
1044 assert_spin_locked(&lockres->l_lock); 1040 assert_spin_locked(&lockres->l_lock);
1045 BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BUSY)); 1041 BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BUSY));
1046 1042
1047 lockres_or_flags(lockres, OCFS2_LOCK_PENDING); 1043 lockres_or_flags(lockres, OCFS2_LOCK_PENDING);
1048 1044
1049 return lockres->l_pending_gen; 1045 return lockres->l_pending_gen;
1050 } 1046 }
1051 1047
1052 static void ocfs2_blocking_ast(struct ocfs2_dlm_lksb *lksb, int level) 1048 static void ocfs2_blocking_ast(struct ocfs2_dlm_lksb *lksb, int level)
1053 { 1049 {
1054 struct ocfs2_lock_res *lockres = ocfs2_lksb_to_lock_res(lksb); 1050 struct ocfs2_lock_res *lockres = ocfs2_lksb_to_lock_res(lksb);
1055 struct ocfs2_super *osb = ocfs2_get_lockres_osb(lockres); 1051 struct ocfs2_super *osb = ocfs2_get_lockres_osb(lockres);
1056 int needs_downconvert; 1052 int needs_downconvert;
1057 unsigned long flags; 1053 unsigned long flags;
1058 1054
1059 BUG_ON(level <= DLM_LOCK_NL); 1055 BUG_ON(level <= DLM_LOCK_NL);
1060 1056
1061 mlog(ML_BASTS, "BAST fired for lockres %s, blocking %d, level %d, " 1057 mlog(ML_BASTS, "BAST fired for lockres %s, blocking %d, level %d, "
1062 "type %s\n", lockres->l_name, level, lockres->l_level, 1058 "type %s\n", lockres->l_name, level, lockres->l_level,
1063 ocfs2_lock_type_string(lockres->l_type)); 1059 ocfs2_lock_type_string(lockres->l_type));
1064 1060
1065 /* 1061 /*
1066 * We can skip the bast for locks which don't enable caching - 1062 * We can skip the bast for locks which don't enable caching -
1067 * they'll be dropped at the earliest possible time anyway. 1063 * they'll be dropped at the earliest possible time anyway.
1068 */ 1064 */
1069 if (lockres->l_flags & OCFS2_LOCK_NOCACHE) 1065 if (lockres->l_flags & OCFS2_LOCK_NOCACHE)
1070 return; 1066 return;
1071 1067
1072 spin_lock_irqsave(&lockres->l_lock, flags); 1068 spin_lock_irqsave(&lockres->l_lock, flags);
1073 needs_downconvert = ocfs2_generic_handle_bast(lockres, level); 1069 needs_downconvert = ocfs2_generic_handle_bast(lockres, level);
1074 if (needs_downconvert) 1070 if (needs_downconvert)
1075 ocfs2_schedule_blocked_lock(osb, lockres); 1071 ocfs2_schedule_blocked_lock(osb, lockres);
1076 spin_unlock_irqrestore(&lockres->l_lock, flags); 1072 spin_unlock_irqrestore(&lockres->l_lock, flags);
1077 1073
1078 wake_up(&lockres->l_event); 1074 wake_up(&lockres->l_event);
1079 1075
1080 ocfs2_wake_downconvert_thread(osb); 1076 ocfs2_wake_downconvert_thread(osb);
1081 } 1077 }
1082 1078
1083 static void ocfs2_locking_ast(struct ocfs2_dlm_lksb *lksb) 1079 static void ocfs2_locking_ast(struct ocfs2_dlm_lksb *lksb)
1084 { 1080 {
1085 struct ocfs2_lock_res *lockres = ocfs2_lksb_to_lock_res(lksb); 1081 struct ocfs2_lock_res *lockres = ocfs2_lksb_to_lock_res(lksb);
1086 struct ocfs2_super *osb = ocfs2_get_lockres_osb(lockres); 1082 struct ocfs2_super *osb = ocfs2_get_lockres_osb(lockres);
1087 unsigned long flags; 1083 unsigned long flags;
1088 int status; 1084 int status;
1089 1085
1090 spin_lock_irqsave(&lockres->l_lock, flags); 1086 spin_lock_irqsave(&lockres->l_lock, flags);
1091 1087
1092 status = ocfs2_dlm_lock_status(&lockres->l_lksb); 1088 status = ocfs2_dlm_lock_status(&lockres->l_lksb);
1093 1089
1094 if (status == -EAGAIN) { 1090 if (status == -EAGAIN) {
1095 lockres_clear_flags(lockres, OCFS2_LOCK_BUSY); 1091 lockres_clear_flags(lockres, OCFS2_LOCK_BUSY);
1096 goto out; 1092 goto out;
1097 } 1093 }
1098 1094
1099 if (status) { 1095 if (status) {
1100 mlog(ML_ERROR, "lockres %s: lksb status value of %d!\n", 1096 mlog(ML_ERROR, "lockres %s: lksb status value of %d!\n",
1101 lockres->l_name, status); 1097 lockres->l_name, status);
1102 spin_unlock_irqrestore(&lockres->l_lock, flags); 1098 spin_unlock_irqrestore(&lockres->l_lock, flags);
1103 return; 1099 return;
1104 } 1100 }
1105 1101
1106 mlog(ML_BASTS, "AST fired for lockres %s, action %d, unlock %d, " 1102 mlog(ML_BASTS, "AST fired for lockres %s, action %d, unlock %d, "
1107 "level %d => %d\n", lockres->l_name, lockres->l_action, 1103 "level %d => %d\n", lockres->l_name, lockres->l_action,
1108 lockres->l_unlock_action, lockres->l_level, lockres->l_requested); 1104 lockres->l_unlock_action, lockres->l_level, lockres->l_requested);
1109 1105
1110 switch(lockres->l_action) { 1106 switch(lockres->l_action) {
1111 case OCFS2_AST_ATTACH: 1107 case OCFS2_AST_ATTACH:
1112 ocfs2_generic_handle_attach_action(lockres); 1108 ocfs2_generic_handle_attach_action(lockres);
1113 lockres_clear_flags(lockres, OCFS2_LOCK_LOCAL); 1109 lockres_clear_flags(lockres, OCFS2_LOCK_LOCAL);
1114 break; 1110 break;
1115 case OCFS2_AST_CONVERT: 1111 case OCFS2_AST_CONVERT:
1116 ocfs2_generic_handle_convert_action(lockres); 1112 ocfs2_generic_handle_convert_action(lockres);
1117 break; 1113 break;
1118 case OCFS2_AST_DOWNCONVERT: 1114 case OCFS2_AST_DOWNCONVERT:
1119 ocfs2_generic_handle_downconvert_action(lockres); 1115 ocfs2_generic_handle_downconvert_action(lockres);
1120 break; 1116 break;
1121 default: 1117 default:
1122 mlog(ML_ERROR, "lockres %s: AST fired with invalid action: %u, " 1118 mlog(ML_ERROR, "lockres %s: AST fired with invalid action: %u, "
1123 "flags 0x%lx, unlock: %u\n", 1119 "flags 0x%lx, unlock: %u\n",
1124 lockres->l_name, lockres->l_action, lockres->l_flags, 1120 lockres->l_name, lockres->l_action, lockres->l_flags,
1125 lockres->l_unlock_action); 1121 lockres->l_unlock_action);
1126 BUG(); 1122 BUG();
1127 } 1123 }
1128 out: 1124 out:
1129 /* set it to something invalid so if we get called again we 1125 /* set it to something invalid so if we get called again we
1130 * can catch it. */ 1126 * can catch it. */
1131 lockres->l_action = OCFS2_AST_INVALID; 1127 lockres->l_action = OCFS2_AST_INVALID;
1132 1128
1133 /* Did we try to cancel this lock? Clear that state */ 1129 /* Did we try to cancel this lock? Clear that state */
1134 if (lockres->l_unlock_action == OCFS2_UNLOCK_CANCEL_CONVERT) 1130 if (lockres->l_unlock_action == OCFS2_UNLOCK_CANCEL_CONVERT)
1135 lockres->l_unlock_action = OCFS2_UNLOCK_INVALID; 1131 lockres->l_unlock_action = OCFS2_UNLOCK_INVALID;
1136 1132
1137 /* 1133 /*
1138 * We may have beaten the locking functions here. We certainly 1134 * We may have beaten the locking functions here. We certainly
1139 * know that dlm_lock() has been called :-) 1135 * know that dlm_lock() has been called :-)
1140 * Because we can't have two lock calls in flight at once, we 1136 * Because we can't have two lock calls in flight at once, we
1141 * can use lockres->l_pending_gen. 1137 * can use lockres->l_pending_gen.
1142 */ 1138 */
1143 __lockres_clear_pending(lockres, lockres->l_pending_gen, osb); 1139 __lockres_clear_pending(lockres, lockres->l_pending_gen, osb);
1144 1140
1145 wake_up(&lockres->l_event); 1141 wake_up(&lockres->l_event);
1146 spin_unlock_irqrestore(&lockres->l_lock, flags); 1142 spin_unlock_irqrestore(&lockres->l_lock, flags);
1147 } 1143 }
1148 1144
1149 static void ocfs2_unlock_ast(struct ocfs2_dlm_lksb *lksb, int error) 1145 static void ocfs2_unlock_ast(struct ocfs2_dlm_lksb *lksb, int error)
1150 { 1146 {
1151 struct ocfs2_lock_res *lockres = ocfs2_lksb_to_lock_res(lksb); 1147 struct ocfs2_lock_res *lockres = ocfs2_lksb_to_lock_res(lksb);
1152 unsigned long flags; 1148 unsigned long flags;
1153 1149
1154 mlog_entry_void(); 1150 mlog_entry_void();
1155 1151
1156 mlog(ML_BASTS, "UNLOCK AST fired for lockres %s, action = %d\n", 1152 mlog(ML_BASTS, "UNLOCK AST fired for lockres %s, action = %d\n",
1157 lockres->l_name, lockres->l_unlock_action); 1153 lockres->l_name, lockres->l_unlock_action);
1158 1154
1159 spin_lock_irqsave(&lockres->l_lock, flags); 1155 spin_lock_irqsave(&lockres->l_lock, flags);
1160 if (error) { 1156 if (error) {
1161 mlog(ML_ERROR, "Dlm passes error %d for lock %s, " 1157 mlog(ML_ERROR, "Dlm passes error %d for lock %s, "
1162 "unlock_action %d\n", error, lockres->l_name, 1158 "unlock_action %d\n", error, lockres->l_name,
1163 lockres->l_unlock_action); 1159 lockres->l_unlock_action);
1164 spin_unlock_irqrestore(&lockres->l_lock, flags); 1160 spin_unlock_irqrestore(&lockres->l_lock, flags);
1165 mlog_exit_void(); 1161 mlog_exit_void();
1166 return; 1162 return;
1167 } 1163 }
1168 1164
1169 switch(lockres->l_unlock_action) { 1165 switch(lockres->l_unlock_action) {
1170 case OCFS2_UNLOCK_CANCEL_CONVERT: 1166 case OCFS2_UNLOCK_CANCEL_CONVERT:
1171 mlog(0, "Cancel convert success for %s\n", lockres->l_name); 1167 mlog(0, "Cancel convert success for %s\n", lockres->l_name);
1172 lockres->l_action = OCFS2_AST_INVALID; 1168 lockres->l_action = OCFS2_AST_INVALID;
1173 /* Downconvert thread may have requeued this lock, we 1169 /* Downconvert thread may have requeued this lock, we
1174 * need to wake it. */ 1170 * need to wake it. */
1175 if (lockres->l_flags & OCFS2_LOCK_BLOCKED) 1171 if (lockres->l_flags & OCFS2_LOCK_BLOCKED)
1176 ocfs2_wake_downconvert_thread(ocfs2_get_lockres_osb(lockres)); 1172 ocfs2_wake_downconvert_thread(ocfs2_get_lockres_osb(lockres));
1177 break; 1173 break;
1178 case OCFS2_UNLOCK_DROP_LOCK: 1174 case OCFS2_UNLOCK_DROP_LOCK:
1179 lockres->l_level = DLM_LOCK_IV; 1175 lockres->l_level = DLM_LOCK_IV;
1180 break; 1176 break;
1181 default: 1177 default:
1182 BUG(); 1178 BUG();
1183 } 1179 }
1184 1180
1185 lockres_clear_flags(lockres, OCFS2_LOCK_BUSY); 1181 lockres_clear_flags(lockres, OCFS2_LOCK_BUSY);
1186 lockres->l_unlock_action = OCFS2_UNLOCK_INVALID; 1182 lockres->l_unlock_action = OCFS2_UNLOCK_INVALID;
1187 wake_up(&lockres->l_event); 1183 wake_up(&lockres->l_event);
1188 spin_unlock_irqrestore(&lockres->l_lock, flags); 1184 spin_unlock_irqrestore(&lockres->l_lock, flags);
1189 1185
1190 mlog_exit_void(); 1186 mlog_exit_void();
1191 } 1187 }
1192 1188
1193 /* 1189 /*
1194 * This is the filesystem locking protocol. It provides the lock handling 1190 * This is the filesystem locking protocol. It provides the lock handling
1195 * hooks for the underlying DLM. It has a maximum version number. 1191 * hooks for the underlying DLM. It has a maximum version number.
1196 * The version number allows interoperability with systems running at 1192 * The version number allows interoperability with systems running at
1197 * the same major number and an equal or smaller minor number. 1193 * the same major number and an equal or smaller minor number.
1198 * 1194 *
1199 * Whenever the filesystem does new things with locks (adds or removes a 1195 * Whenever the filesystem does new things with locks (adds or removes a
1200 * lock, orders them differently, does different things underneath a lock), 1196 * lock, orders them differently, does different things underneath a lock),
1201 * the version must be changed. The protocol is negotiated when joining 1197 * the version must be changed. The protocol is negotiated when joining
1202 * the dlm domain. A node may join the domain if its major version is 1198 * the dlm domain. A node may join the domain if its major version is
1203 * identical to all other nodes and its minor version is greater than 1199 * identical to all other nodes and its minor version is greater than
1204 * or equal to all other nodes. When its minor version is greater than 1200 * or equal to all other nodes. When its minor version is greater than
1205 * the other nodes, it will run at the minor version specified by the 1201 * the other nodes, it will run at the minor version specified by the
1206 * other nodes. 1202 * other nodes.
1207 * 1203 *
1208 * If a locking change is made that will not be compatible with older 1204 * If a locking change is made that will not be compatible with older
1209 * versions, the major number must be increased and the minor version set 1205 * versions, the major number must be increased and the minor version set
1210 * to zero. If a change merely adds a behavior that can be disabled when 1206 * to zero. If a change merely adds a behavior that can be disabled when
1211 * speaking to older versions, the minor version must be increased. If a 1207 * speaking to older versions, the minor version must be increased. If a
1212 * change adds a fully backwards compatible change (eg, LVB changes that 1208 * change adds a fully backwards compatible change (eg, LVB changes that
1213 * are just ignored by older versions), the version does not need to be 1209 * are just ignored by older versions), the version does not need to be
1214 * updated. 1210 * updated.
1215 */ 1211 */
1216 static struct ocfs2_locking_protocol lproto = { 1212 static struct ocfs2_locking_protocol lproto = {
1217 .lp_max_version = { 1213 .lp_max_version = {
1218 .pv_major = OCFS2_LOCKING_PROTOCOL_MAJOR, 1214 .pv_major = OCFS2_LOCKING_PROTOCOL_MAJOR,
1219 .pv_minor = OCFS2_LOCKING_PROTOCOL_MINOR, 1215 .pv_minor = OCFS2_LOCKING_PROTOCOL_MINOR,
1220 }, 1216 },
1221 .lp_lock_ast = ocfs2_locking_ast, 1217 .lp_lock_ast = ocfs2_locking_ast,
1222 .lp_blocking_ast = ocfs2_blocking_ast, 1218 .lp_blocking_ast = ocfs2_blocking_ast,
1223 .lp_unlock_ast = ocfs2_unlock_ast, 1219 .lp_unlock_ast = ocfs2_unlock_ast,
1224 }; 1220 };
1225 1221
1226 void ocfs2_set_locking_protocol(void) 1222 void ocfs2_set_locking_protocol(void)
1227 { 1223 {
1228 ocfs2_stack_glue_set_max_proto_version(&lproto.lp_max_version); 1224 ocfs2_stack_glue_set_max_proto_version(&lproto.lp_max_version);
1229 } 1225 }
1230 1226
1231 static inline void ocfs2_recover_from_dlm_error(struct ocfs2_lock_res *lockres, 1227 static inline void ocfs2_recover_from_dlm_error(struct ocfs2_lock_res *lockres,
1232 int convert) 1228 int convert)
1233 { 1229 {
1234 unsigned long flags; 1230 unsigned long flags;
1235 1231
1236 mlog_entry_void(); 1232 mlog_entry_void();
1237 spin_lock_irqsave(&lockres->l_lock, flags); 1233 spin_lock_irqsave(&lockres->l_lock, flags);
1238 lockres_clear_flags(lockres, OCFS2_LOCK_BUSY); 1234 lockres_clear_flags(lockres, OCFS2_LOCK_BUSY);
1239 lockres_clear_flags(lockres, OCFS2_LOCK_UPCONVERT_FINISHING); 1235 lockres_clear_flags(lockres, OCFS2_LOCK_UPCONVERT_FINISHING);
1240 if (convert) 1236 if (convert)
1241 lockres->l_action = OCFS2_AST_INVALID; 1237 lockres->l_action = OCFS2_AST_INVALID;
1242 else 1238 else
1243 lockres->l_unlock_action = OCFS2_UNLOCK_INVALID; 1239 lockres->l_unlock_action = OCFS2_UNLOCK_INVALID;
1244 spin_unlock_irqrestore(&lockres->l_lock, flags); 1240 spin_unlock_irqrestore(&lockres->l_lock, flags);
1245 1241
1246 wake_up(&lockres->l_event); 1242 wake_up(&lockres->l_event);
1247 mlog_exit_void(); 1243 mlog_exit_void();
1248 } 1244 }
1249 1245
1250 /* Note: If we detect another process working on the lock (i.e., 1246 /* Note: If we detect another process working on the lock (i.e.,
1251 * OCFS2_LOCK_BUSY), we'll bail out returning 0. It's up to the caller 1247 * OCFS2_LOCK_BUSY), we'll bail out returning 0. It's up to the caller
1252 * to do the right thing in that case. 1248 * to do the right thing in that case.
1253 */ 1249 */
1254 static int ocfs2_lock_create(struct ocfs2_super *osb, 1250 static int ocfs2_lock_create(struct ocfs2_super *osb,
1255 struct ocfs2_lock_res *lockres, 1251 struct ocfs2_lock_res *lockres,
1256 int level, 1252 int level,
1257 u32 dlm_flags) 1253 u32 dlm_flags)
1258 { 1254 {
1259 int ret = 0; 1255 int ret = 0;
1260 unsigned long flags; 1256 unsigned long flags;
1261 unsigned int gen; 1257 unsigned int gen;
1262 1258
1263 mlog_entry_void(); 1259 mlog_entry_void();
1264 1260
1265 mlog(0, "lock %s, level = %d, flags = %u\n", lockres->l_name, level, 1261 mlog(0, "lock %s, level = %d, flags = %u\n", lockres->l_name, level,
1266 dlm_flags); 1262 dlm_flags);
1267 1263
1268 spin_lock_irqsave(&lockres->l_lock, flags); 1264 spin_lock_irqsave(&lockres->l_lock, flags);
1269 if ((lockres->l_flags & OCFS2_LOCK_ATTACHED) || 1265 if ((lockres->l_flags & OCFS2_LOCK_ATTACHED) ||
1270 (lockres->l_flags & OCFS2_LOCK_BUSY)) { 1266 (lockres->l_flags & OCFS2_LOCK_BUSY)) {
1271 spin_unlock_irqrestore(&lockres->l_lock, flags); 1267 spin_unlock_irqrestore(&lockres->l_lock, flags);
1272 goto bail; 1268 goto bail;
1273 } 1269 }
1274 1270
1275 lockres->l_action = OCFS2_AST_ATTACH; 1271 lockres->l_action = OCFS2_AST_ATTACH;
1276 lockres->l_requested = level; 1272 lockres->l_requested = level;
1277 lockres_or_flags(lockres, OCFS2_LOCK_BUSY); 1273 lockres_or_flags(lockres, OCFS2_LOCK_BUSY);
1278 gen = lockres_set_pending(lockres); 1274 gen = lockres_set_pending(lockres);
1279 spin_unlock_irqrestore(&lockres->l_lock, flags); 1275 spin_unlock_irqrestore(&lockres->l_lock, flags);
1280 1276
1281 ret = ocfs2_dlm_lock(osb->cconn, 1277 ret = ocfs2_dlm_lock(osb->cconn,
1282 level, 1278 level,
1283 &lockres->l_lksb, 1279 &lockres->l_lksb,
1284 dlm_flags, 1280 dlm_flags,
1285 lockres->l_name, 1281 lockres->l_name,
1286 OCFS2_LOCK_ID_MAX_LEN - 1); 1282 OCFS2_LOCK_ID_MAX_LEN - 1);
1287 lockres_clear_pending(lockres, gen, osb); 1283 lockres_clear_pending(lockres, gen, osb);
1288 if (ret) { 1284 if (ret) {
1289 ocfs2_log_dlm_error("ocfs2_dlm_lock", ret, lockres); 1285 ocfs2_log_dlm_error("ocfs2_dlm_lock", ret, lockres);
1290 ocfs2_recover_from_dlm_error(lockres, 1); 1286 ocfs2_recover_from_dlm_error(lockres, 1);
1291 } 1287 }
1292 1288
1293 mlog(0, "lock %s, return from ocfs2_dlm_lock\n", lockres->l_name); 1289 mlog(0, "lock %s, return from ocfs2_dlm_lock\n", lockres->l_name);
1294 1290
1295 bail: 1291 bail:
1296 mlog_exit(ret); 1292 mlog_exit(ret);
1297 return ret; 1293 return ret;
1298 } 1294 }
1299 1295
1300 static inline int ocfs2_check_wait_flag(struct ocfs2_lock_res *lockres, 1296 static inline int ocfs2_check_wait_flag(struct ocfs2_lock_res *lockres,
1301 int flag) 1297 int flag)
1302 { 1298 {
1303 unsigned long flags; 1299 unsigned long flags;
1304 int ret; 1300 int ret;
1305 1301
1306 spin_lock_irqsave(&lockres->l_lock, flags); 1302 spin_lock_irqsave(&lockres->l_lock, flags);
1307 ret = lockres->l_flags & flag; 1303 ret = lockres->l_flags & flag;
1308 spin_unlock_irqrestore(&lockres->l_lock, flags); 1304 spin_unlock_irqrestore(&lockres->l_lock, flags);
1309 1305
1310 return ret; 1306 return ret;
1311 } 1307 }
1312 1308
1313 static inline void ocfs2_wait_on_busy_lock(struct ocfs2_lock_res *lockres) 1309 static inline void ocfs2_wait_on_busy_lock(struct ocfs2_lock_res *lockres)
1314 1310
1315 { 1311 {
1316 wait_event(lockres->l_event, 1312 wait_event(lockres->l_event,
1317 !ocfs2_check_wait_flag(lockres, OCFS2_LOCK_BUSY)); 1313 !ocfs2_check_wait_flag(lockres, OCFS2_LOCK_BUSY));
1318 } 1314 }
1319 1315
1320 static inline void ocfs2_wait_on_refreshing_lock(struct ocfs2_lock_res *lockres) 1316 static inline void ocfs2_wait_on_refreshing_lock(struct ocfs2_lock_res *lockres)
1321 1317
1322 { 1318 {
1323 wait_event(lockres->l_event, 1319 wait_event(lockres->l_event,
1324 !ocfs2_check_wait_flag(lockres, OCFS2_LOCK_REFRESHING)); 1320 !ocfs2_check_wait_flag(lockres, OCFS2_LOCK_REFRESHING));
1325 } 1321 }
1326 1322
1327 /* predict what lock level we'll be dropping down to on behalf 1323 /* predict what lock level we'll be dropping down to on behalf
1328 * of another node, and return true if the currently wanted 1324 * of another node, and return true if the currently wanted
1329 * level will be compatible with it. */ 1325 * level will be compatible with it. */
1330 static inline int ocfs2_may_continue_on_blocked_lock(struct ocfs2_lock_res *lockres, 1326 static inline int ocfs2_may_continue_on_blocked_lock(struct ocfs2_lock_res *lockres,
1331 int wanted) 1327 int wanted)
1332 { 1328 {
1333 BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BLOCKED)); 1329 BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BLOCKED));
1334 1330
1335 return wanted <= ocfs2_highest_compat_lock_level(lockres->l_blocking); 1331 return wanted <= ocfs2_highest_compat_lock_level(lockres->l_blocking);
1336 } 1332 }
1337 1333
1338 static void ocfs2_init_mask_waiter(struct ocfs2_mask_waiter *mw) 1334 static void ocfs2_init_mask_waiter(struct ocfs2_mask_waiter *mw)
1339 { 1335 {
1340 INIT_LIST_HEAD(&mw->mw_item); 1336 INIT_LIST_HEAD(&mw->mw_item);
1341 init_completion(&mw->mw_complete); 1337 init_completion(&mw->mw_complete);
1342 ocfs2_init_start_time(mw); 1338 ocfs2_init_start_time(mw);
1343 } 1339 }
1344 1340
1345 static int ocfs2_wait_for_mask(struct ocfs2_mask_waiter *mw) 1341 static int ocfs2_wait_for_mask(struct ocfs2_mask_waiter *mw)
1346 { 1342 {
1347 wait_for_completion(&mw->mw_complete); 1343 wait_for_completion(&mw->mw_complete);
1348 /* Re-arm the completion in case we want to wait on it again */ 1344 /* Re-arm the completion in case we want to wait on it again */
1349 INIT_COMPLETION(mw->mw_complete); 1345 INIT_COMPLETION(mw->mw_complete);
1350 return mw->mw_status; 1346 return mw->mw_status;
1351 } 1347 }
1352 1348
1353 static void lockres_add_mask_waiter(struct ocfs2_lock_res *lockres, 1349 static void lockres_add_mask_waiter(struct ocfs2_lock_res *lockres,
1354 struct ocfs2_mask_waiter *mw, 1350 struct ocfs2_mask_waiter *mw,
1355 unsigned long mask, 1351 unsigned long mask,
1356 unsigned long goal) 1352 unsigned long goal)
1357 { 1353 {
1358 BUG_ON(!list_empty(&mw->mw_item)); 1354 BUG_ON(!list_empty(&mw->mw_item));
1359 1355
1360 assert_spin_locked(&lockres->l_lock); 1356 assert_spin_locked(&lockres->l_lock);
1361 1357
1362 list_add_tail(&mw->mw_item, &lockres->l_mask_waiters); 1358 list_add_tail(&mw->mw_item, &lockres->l_mask_waiters);
1363 mw->mw_mask = mask; 1359 mw->mw_mask = mask;
1364 mw->mw_goal = goal; 1360 mw->mw_goal = goal;
1365 } 1361 }
1366 1362
1367 /* returns 0 if the mw that was removed was already satisfied, -EBUSY 1363 /* returns 0 if the mw that was removed was already satisfied, -EBUSY
1368 * if the mask still hadn't reached its goal */ 1364 * if the mask still hadn't reached its goal */
1369 static int lockres_remove_mask_waiter(struct ocfs2_lock_res *lockres, 1365 static int lockres_remove_mask_waiter(struct ocfs2_lock_res *lockres,
1370 struct ocfs2_mask_waiter *mw) 1366 struct ocfs2_mask_waiter *mw)
1371 { 1367 {
1372 unsigned long flags; 1368 unsigned long flags;
1373 int ret = 0; 1369 int ret = 0;
1374 1370
1375 spin_lock_irqsave(&lockres->l_lock, flags); 1371 spin_lock_irqsave(&lockres->l_lock, flags);
1376 if (!list_empty(&mw->mw_item)) { 1372 if (!list_empty(&mw->mw_item)) {
1377 if ((lockres->l_flags & mw->mw_mask) != mw->mw_goal) 1373 if ((lockres->l_flags & mw->mw_mask) != mw->mw_goal)
1378 ret = -EBUSY; 1374 ret = -EBUSY;
1379 1375
1380 list_del_init(&mw->mw_item); 1376 list_del_init(&mw->mw_item);
1381 init_completion(&mw->mw_complete); 1377 init_completion(&mw->mw_complete);
1382 } 1378 }
1383 spin_unlock_irqrestore(&lockres->l_lock, flags); 1379 spin_unlock_irqrestore(&lockres->l_lock, flags);
1384 1380
1385 return ret; 1381 return ret;
1386 1382
1387 } 1383 }
1388 1384
1389 static int ocfs2_wait_for_mask_interruptible(struct ocfs2_mask_waiter *mw, 1385 static int ocfs2_wait_for_mask_interruptible(struct ocfs2_mask_waiter *mw,
1390 struct ocfs2_lock_res *lockres) 1386 struct ocfs2_lock_res *lockres)
1391 { 1387 {
1392 int ret; 1388 int ret;
1393 1389
1394 ret = wait_for_completion_interruptible(&mw->mw_complete); 1390 ret = wait_for_completion_interruptible(&mw->mw_complete);
1395 if (ret) 1391 if (ret)
1396 lockres_remove_mask_waiter(lockres, mw); 1392 lockres_remove_mask_waiter(lockres, mw);
1397 else 1393 else
1398 ret = mw->mw_status; 1394 ret = mw->mw_status;
1399 /* Re-arm the completion in case we want to wait on it again */ 1395 /* Re-arm the completion in case we want to wait on it again */
1400 INIT_COMPLETION(mw->mw_complete); 1396 INIT_COMPLETION(mw->mw_complete);
1401 return ret; 1397 return ret;
1402 } 1398 }
1403 1399
1404 static int __ocfs2_cluster_lock(struct ocfs2_super *osb, 1400 static int __ocfs2_cluster_lock(struct ocfs2_super *osb,
1405 struct ocfs2_lock_res *lockres, 1401 struct ocfs2_lock_res *lockres,
1406 int level, 1402 int level,
1407 u32 lkm_flags, 1403 u32 lkm_flags,
1408 int arg_flags, 1404 int arg_flags,
1409 int l_subclass, 1405 int l_subclass,
1410 unsigned long caller_ip) 1406 unsigned long caller_ip)
1411 { 1407 {
1412 struct ocfs2_mask_waiter mw; 1408 struct ocfs2_mask_waiter mw;
1413 int wait, catch_signals = !(osb->s_mount_opt & OCFS2_MOUNT_NOINTR); 1409 int wait, catch_signals = !(osb->s_mount_opt & OCFS2_MOUNT_NOINTR);
1414 int ret = 0; /* gcc doesn't realize wait = 1 guarantees ret is set */ 1410 int ret = 0; /* gcc doesn't realize wait = 1 guarantees ret is set */
1415 unsigned long flags; 1411 unsigned long flags;
1416 unsigned int gen; 1412 unsigned int gen;
1417 int noqueue_attempted = 0; 1413 int noqueue_attempted = 0;
1418 1414
1419 mlog_entry_void(); 1415 mlog_entry_void();
1420 1416
1421 ocfs2_init_mask_waiter(&mw); 1417 ocfs2_init_mask_waiter(&mw);
1422 1418
1423 if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB) 1419 if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB)
1424 lkm_flags |= DLM_LKF_VALBLK; 1420 lkm_flags |= DLM_LKF_VALBLK;
1425 1421
1426 again: 1422 again:
1427 wait = 0; 1423 wait = 0;
1428 1424
1429 spin_lock_irqsave(&lockres->l_lock, flags); 1425 spin_lock_irqsave(&lockres->l_lock, flags);
1430 1426
1431 if (catch_signals && signal_pending(current)) { 1427 if (catch_signals && signal_pending(current)) {
1432 ret = -ERESTARTSYS; 1428 ret = -ERESTARTSYS;
1433 goto unlock; 1429 goto unlock;
1434 } 1430 }
1435 1431
1436 mlog_bug_on_msg(lockres->l_flags & OCFS2_LOCK_FREEING, 1432 mlog_bug_on_msg(lockres->l_flags & OCFS2_LOCK_FREEING,
1437 "Cluster lock called on freeing lockres %s! flags " 1433 "Cluster lock called on freeing lockres %s! flags "
1438 "0x%lx\n", lockres->l_name, lockres->l_flags); 1434 "0x%lx\n", lockres->l_name, lockres->l_flags);
1439 1435
1440 /* We only compare against the currently granted level 1436 /* We only compare against the currently granted level
1441 * here. If the lock is blocked waiting on a downconvert, 1437 * here. If the lock is blocked waiting on a downconvert,
1442 * we'll get caught below. */ 1438 * we'll get caught below. */
1443 if (lockres->l_flags & OCFS2_LOCK_BUSY && 1439 if (lockres->l_flags & OCFS2_LOCK_BUSY &&
1444 level > lockres->l_level) { 1440 level > lockres->l_level) {
1445 /* is someone sitting in dlm_lock? If so, wait on 1441 /* is someone sitting in dlm_lock? If so, wait on
1446 * them. */ 1442 * them. */
1447 lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0); 1443 lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0);
1448 wait = 1; 1444 wait = 1;
1449 goto unlock; 1445 goto unlock;
1450 } 1446 }
1451 1447
1452 if (lockres->l_flags & OCFS2_LOCK_UPCONVERT_FINISHING) { 1448 if (lockres->l_flags & OCFS2_LOCK_UPCONVERT_FINISHING) {
1453 /* 1449 /*
1454 * We've upconverted. If the lock now has a level we can 1450 * We've upconverted. If the lock now has a level we can
1455 * work with, we take it. If, however, the lock is not at the 1451 * work with, we take it. If, however, the lock is not at the
1456 * required level, we go thru the full cycle. One way this could 1452 * required level, we go thru the full cycle. One way this could
1457 * happen is if a process requesting an upconvert to PR is 1453 * happen is if a process requesting an upconvert to PR is
1458 * closely followed by another requesting upconvert to an EX. 1454 * closely followed by another requesting upconvert to an EX.
1459 * If the process requesting EX lands here, we want it to 1455 * If the process requesting EX lands here, we want it to
1460 * continue attempting to upconvert and let the process 1456 * continue attempting to upconvert and let the process
1461 * requesting PR take the lock. 1457 * requesting PR take the lock.
1462 * If multiple processes request upconvert to PR, the first one 1458 * If multiple processes request upconvert to PR, the first one
1463 * here will take the lock. The others will have to go thru the 1459 * here will take the lock. The others will have to go thru the
1464 * OCFS2_LOCK_BLOCKED check to ensure that there is no pending 1460 * OCFS2_LOCK_BLOCKED check to ensure that there is no pending
1465 * downconvert request. 1461 * downconvert request.
1466 */ 1462 */
1467 if (level <= lockres->l_level) 1463 if (level <= lockres->l_level)
1468 goto update_holders; 1464 goto update_holders;
1469 } 1465 }
1470 1466
1471 if (lockres->l_flags & OCFS2_LOCK_BLOCKED && 1467 if (lockres->l_flags & OCFS2_LOCK_BLOCKED &&
1472 !ocfs2_may_continue_on_blocked_lock(lockres, level)) { 1468 !ocfs2_may_continue_on_blocked_lock(lockres, level)) {
1473 /* is the lock is currently blocked on behalf of 1469 /* is the lock is currently blocked on behalf of
1474 * another node */ 1470 * another node */
1475 lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BLOCKED, 0); 1471 lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BLOCKED, 0);
1476 wait = 1; 1472 wait = 1;
1477 goto unlock; 1473 goto unlock;
1478 } 1474 }
1479 1475
1480 if (level > lockres->l_level) { 1476 if (level > lockres->l_level) {
1481 if (noqueue_attempted > 0) { 1477 if (noqueue_attempted > 0) {
1482 ret = -EAGAIN; 1478 ret = -EAGAIN;
1483 goto unlock; 1479 goto unlock;
1484 } 1480 }
1485 if (lkm_flags & DLM_LKF_NOQUEUE) 1481 if (lkm_flags & DLM_LKF_NOQUEUE)
1486 noqueue_attempted = 1; 1482 noqueue_attempted = 1;
1487 1483
1488 if (lockres->l_action != OCFS2_AST_INVALID) 1484 if (lockres->l_action != OCFS2_AST_INVALID)
1489 mlog(ML_ERROR, "lockres %s has action %u pending\n", 1485 mlog(ML_ERROR, "lockres %s has action %u pending\n",
1490 lockres->l_name, lockres->l_action); 1486 lockres->l_name, lockres->l_action);
1491 1487
1492 if (!(lockres->l_flags & OCFS2_LOCK_ATTACHED)) { 1488 if (!(lockres->l_flags & OCFS2_LOCK_ATTACHED)) {
1493 lockres->l_action = OCFS2_AST_ATTACH; 1489 lockres->l_action = OCFS2_AST_ATTACH;
1494 lkm_flags &= ~DLM_LKF_CONVERT; 1490 lkm_flags &= ~DLM_LKF_CONVERT;
1495 } else { 1491 } else {
1496 lockres->l_action = OCFS2_AST_CONVERT; 1492 lockres->l_action = OCFS2_AST_CONVERT;
1497 lkm_flags |= DLM_LKF_CONVERT; 1493 lkm_flags |= DLM_LKF_CONVERT;
1498 } 1494 }
1499 1495
1500 lockres->l_requested = level; 1496 lockres->l_requested = level;
1501 lockres_or_flags(lockres, OCFS2_LOCK_BUSY); 1497 lockres_or_flags(lockres, OCFS2_LOCK_BUSY);
1502 gen = lockres_set_pending(lockres); 1498 gen = lockres_set_pending(lockres);
1503 spin_unlock_irqrestore(&lockres->l_lock, flags); 1499 spin_unlock_irqrestore(&lockres->l_lock, flags);
1504 1500
1505 BUG_ON(level == DLM_LOCK_IV); 1501 BUG_ON(level == DLM_LOCK_IV);
1506 BUG_ON(level == DLM_LOCK_NL); 1502 BUG_ON(level == DLM_LOCK_NL);
1507 1503
1508 mlog(ML_BASTS, "lockres %s, convert from %d to %d\n", 1504 mlog(ML_BASTS, "lockres %s, convert from %d to %d\n",
1509 lockres->l_name, lockres->l_level, level); 1505 lockres->l_name, lockres->l_level, level);
1510 1506
1511 /* call dlm_lock to upgrade lock now */ 1507 /* call dlm_lock to upgrade lock now */
1512 ret = ocfs2_dlm_lock(osb->cconn, 1508 ret = ocfs2_dlm_lock(osb->cconn,
1513 level, 1509 level,
1514 &lockres->l_lksb, 1510 &lockres->l_lksb,
1515 lkm_flags, 1511 lkm_flags,
1516 lockres->l_name, 1512 lockres->l_name,
1517 OCFS2_LOCK_ID_MAX_LEN - 1); 1513 OCFS2_LOCK_ID_MAX_LEN - 1);
1518 lockres_clear_pending(lockres, gen, osb); 1514 lockres_clear_pending(lockres, gen, osb);
1519 if (ret) { 1515 if (ret) {
1520 if (!(lkm_flags & DLM_LKF_NOQUEUE) || 1516 if (!(lkm_flags & DLM_LKF_NOQUEUE) ||
1521 (ret != -EAGAIN)) { 1517 (ret != -EAGAIN)) {
1522 ocfs2_log_dlm_error("ocfs2_dlm_lock", 1518 ocfs2_log_dlm_error("ocfs2_dlm_lock",
1523 ret, lockres); 1519 ret, lockres);
1524 } 1520 }
1525 ocfs2_recover_from_dlm_error(lockres, 1); 1521 ocfs2_recover_from_dlm_error(lockres, 1);
1526 goto out; 1522 goto out;
1527 } 1523 }
1528 1524
1529 mlog(0, "lock %s, successful return from ocfs2_dlm_lock\n", 1525 mlog(0, "lock %s, successful return from ocfs2_dlm_lock\n",
1530 lockres->l_name); 1526 lockres->l_name);
1531 1527
1532 /* At this point we've gone inside the dlm and need to 1528 /* At this point we've gone inside the dlm and need to
1533 * complete our work regardless. */ 1529 * complete our work regardless. */
1534 catch_signals = 0; 1530 catch_signals = 0;
1535 1531
1536 /* wait for busy to clear and carry on */ 1532 /* wait for busy to clear and carry on */
1537 goto again; 1533 goto again;
1538 } 1534 }
1539 1535
1540 update_holders: 1536 update_holders:
1541 /* Ok, if we get here then we're good to go. */ 1537 /* Ok, if we get here then we're good to go. */
1542 ocfs2_inc_holders(lockres, level); 1538 ocfs2_inc_holders(lockres, level);
1543 1539
1544 ret = 0; 1540 ret = 0;
1545 unlock: 1541 unlock:
1546 lockres_clear_flags(lockres, OCFS2_LOCK_UPCONVERT_FINISHING); 1542 lockres_clear_flags(lockres, OCFS2_LOCK_UPCONVERT_FINISHING);
1547 1543
1548 spin_unlock_irqrestore(&lockres->l_lock, flags); 1544 spin_unlock_irqrestore(&lockres->l_lock, flags);
1549 out: 1545 out:
1550 /* 1546 /*
1551 * This is helping work around a lock inversion between the page lock 1547 * This is helping work around a lock inversion between the page lock
1552 * and dlm locks. One path holds the page lock while calling aops 1548 * and dlm locks. One path holds the page lock while calling aops
1553 * which block acquiring dlm locks. The voting thread holds dlm 1549 * which block acquiring dlm locks. The voting thread holds dlm
1554 * locks while acquiring page locks while down converting data locks. 1550 * locks while acquiring page locks while down converting data locks.
1555 * This block is helping an aop path notice the inversion and back 1551 * This block is helping an aop path notice the inversion and back
1556 * off to unlock its page lock before trying the dlm lock again. 1552 * off to unlock its page lock before trying the dlm lock again.
1557 */ 1553 */
1558 if (wait && arg_flags & OCFS2_LOCK_NONBLOCK && 1554 if (wait && arg_flags & OCFS2_LOCK_NONBLOCK &&
1559 mw.mw_mask & (OCFS2_LOCK_BUSY|OCFS2_LOCK_BLOCKED)) { 1555 mw.mw_mask & (OCFS2_LOCK_BUSY|OCFS2_LOCK_BLOCKED)) {
1560 wait = 0; 1556 wait = 0;
1561 if (lockres_remove_mask_waiter(lockres, &mw)) 1557 if (lockres_remove_mask_waiter(lockres, &mw))
1562 ret = -EAGAIN; 1558 ret = -EAGAIN;
1563 else 1559 else
1564 goto again; 1560 goto again;
1565 } 1561 }
1566 if (wait) { 1562 if (wait) {
1567 ret = ocfs2_wait_for_mask(&mw); 1563 ret = ocfs2_wait_for_mask(&mw);
1568 if (ret == 0) 1564 if (ret == 0)
1569 goto again; 1565 goto again;
1570 mlog_errno(ret); 1566 mlog_errno(ret);
1571 } 1567 }
1572 ocfs2_update_lock_stats(lockres, level, &mw, ret); 1568 ocfs2_update_lock_stats(lockres, level, &mw, ret);
1573 1569
1574 #ifdef CONFIG_DEBUG_LOCK_ALLOC 1570 #ifdef CONFIG_DEBUG_LOCK_ALLOC
1575 if (!ret && lockres->l_lockdep_map.key != NULL) { 1571 if (!ret && lockres->l_lockdep_map.key != NULL) {
1576 if (level == DLM_LOCK_PR) 1572 if (level == DLM_LOCK_PR)
1577 rwsem_acquire_read(&lockres->l_lockdep_map, l_subclass, 1573 rwsem_acquire_read(&lockres->l_lockdep_map, l_subclass,
1578 !!(arg_flags & OCFS2_META_LOCK_NOQUEUE), 1574 !!(arg_flags & OCFS2_META_LOCK_NOQUEUE),
1579 caller_ip); 1575 caller_ip);
1580 else 1576 else
1581 rwsem_acquire(&lockres->l_lockdep_map, l_subclass, 1577 rwsem_acquire(&lockres->l_lockdep_map, l_subclass,
1582 !!(arg_flags & OCFS2_META_LOCK_NOQUEUE), 1578 !!(arg_flags & OCFS2_META_LOCK_NOQUEUE),
1583 caller_ip); 1579 caller_ip);
1584 } 1580 }
1585 #endif 1581 #endif
1586 mlog_exit(ret); 1582 mlog_exit(ret);
1587 return ret; 1583 return ret;
1588 } 1584 }
1589 1585
1590 static inline int ocfs2_cluster_lock(struct ocfs2_super *osb, 1586 static inline int ocfs2_cluster_lock(struct ocfs2_super *osb,
1591 struct ocfs2_lock_res *lockres, 1587 struct ocfs2_lock_res *lockres,
1592 int level, 1588 int level,
1593 u32 lkm_flags, 1589 u32 lkm_flags,
1594 int arg_flags) 1590 int arg_flags)
1595 { 1591 {
1596 return __ocfs2_cluster_lock(osb, lockres, level, lkm_flags, arg_flags, 1592 return __ocfs2_cluster_lock(osb, lockres, level, lkm_flags, arg_flags,
1597 0, _RET_IP_); 1593 0, _RET_IP_);
1598 } 1594 }
1599 1595
1600 1596
1601 static void __ocfs2_cluster_unlock(struct ocfs2_super *osb, 1597 static void __ocfs2_cluster_unlock(struct ocfs2_super *osb,
1602 struct ocfs2_lock_res *lockres, 1598 struct ocfs2_lock_res *lockres,
1603 int level, 1599 int level,
1604 unsigned long caller_ip) 1600 unsigned long caller_ip)
1605 { 1601 {
1606 unsigned long flags; 1602 unsigned long flags;
1607 1603
1608 mlog_entry_void(); 1604 mlog_entry_void();
1609 spin_lock_irqsave(&lockres->l_lock, flags); 1605 spin_lock_irqsave(&lockres->l_lock, flags);
1610 ocfs2_dec_holders(lockres, level); 1606 ocfs2_dec_holders(lockres, level);
1611 ocfs2_downconvert_on_unlock(osb, lockres); 1607 ocfs2_downconvert_on_unlock(osb, lockres);
1612 spin_unlock_irqrestore(&lockres->l_lock, flags); 1608 spin_unlock_irqrestore(&lockres->l_lock, flags);
1613 #ifdef CONFIG_DEBUG_LOCK_ALLOC 1609 #ifdef CONFIG_DEBUG_LOCK_ALLOC
1614 if (lockres->l_lockdep_map.key != NULL) 1610 if (lockres->l_lockdep_map.key != NULL)
1615 rwsem_release(&lockres->l_lockdep_map, 1, caller_ip); 1611 rwsem_release(&lockres->l_lockdep_map, 1, caller_ip);
1616 #endif 1612 #endif
1617 mlog_exit_void(); 1613 mlog_exit_void();
1618 } 1614 }
1619 1615
1620 static int ocfs2_create_new_lock(struct ocfs2_super *osb, 1616 static int ocfs2_create_new_lock(struct ocfs2_super *osb,
1621 struct ocfs2_lock_res *lockres, 1617 struct ocfs2_lock_res *lockres,
1622 int ex, 1618 int ex,
1623 int local) 1619 int local)
1624 { 1620 {
1625 int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; 1621 int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
1626 unsigned long flags; 1622 unsigned long flags;
1627 u32 lkm_flags = local ? DLM_LKF_LOCAL : 0; 1623 u32 lkm_flags = local ? DLM_LKF_LOCAL : 0;
1628 1624
1629 spin_lock_irqsave(&lockres->l_lock, flags); 1625 spin_lock_irqsave(&lockres->l_lock, flags);
1630 BUG_ON(lockres->l_flags & OCFS2_LOCK_ATTACHED); 1626 BUG_ON(lockres->l_flags & OCFS2_LOCK_ATTACHED);
1631 lockres_or_flags(lockres, OCFS2_LOCK_LOCAL); 1627 lockres_or_flags(lockres, OCFS2_LOCK_LOCAL);
1632 spin_unlock_irqrestore(&lockres->l_lock, flags); 1628 spin_unlock_irqrestore(&lockres->l_lock, flags);
1633 1629
1634 return ocfs2_lock_create(osb, lockres, level, lkm_flags); 1630 return ocfs2_lock_create(osb, lockres, level, lkm_flags);
1635 } 1631 }
1636 1632
1637 /* Grants us an EX lock on the data and metadata resources, skipping 1633 /* Grants us an EX lock on the data and metadata resources, skipping
1638 * the normal cluster directory lookup. Use this ONLY on newly created 1634 * the normal cluster directory lookup. Use this ONLY on newly created
1639 * inodes which other nodes can't possibly see, and which haven't been 1635 * inodes which other nodes can't possibly see, and which haven't been
1640 * hashed in the inode hash yet. This can give us a good performance 1636 * hashed in the inode hash yet. This can give us a good performance
1641 * increase as it'll skip the network broadcast normally associated 1637 * increase as it'll skip the network broadcast normally associated
1642 * with creating a new lock resource. */ 1638 * with creating a new lock resource. */
1643 int ocfs2_create_new_inode_locks(struct inode *inode) 1639 int ocfs2_create_new_inode_locks(struct inode *inode)
1644 { 1640 {
1645 int ret; 1641 int ret;
1646 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 1642 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1647 1643
1648 BUG_ON(!inode); 1644 BUG_ON(!inode);
1649 BUG_ON(!ocfs2_inode_is_new(inode)); 1645 BUG_ON(!ocfs2_inode_is_new(inode));
1650 1646
1651 mlog_entry_void(); 1647 mlog_entry_void();
1652 1648
1653 mlog(0, "Inode %llu\n", (unsigned long long)OCFS2_I(inode)->ip_blkno); 1649 mlog(0, "Inode %llu\n", (unsigned long long)OCFS2_I(inode)->ip_blkno);
1654 1650
1655 /* NOTE: That we don't increment any of the holder counts, nor 1651 /* NOTE: That we don't increment any of the holder counts, nor
1656 * do we add anything to a journal handle. Since this is 1652 * do we add anything to a journal handle. Since this is
1657 * supposed to be a new inode which the cluster doesn't know 1653 * supposed to be a new inode which the cluster doesn't know
1658 * about yet, there is no need to. As far as the LVB handling 1654 * about yet, there is no need to. As far as the LVB handling
1659 * is concerned, this is basically like acquiring an EX lock 1655 * is concerned, this is basically like acquiring an EX lock
1660 * on a resource which has an invalid one -- we'll set it 1656 * on a resource which has an invalid one -- we'll set it
1661 * valid when we release the EX. */ 1657 * valid when we release the EX. */
1662 1658
1663 ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_rw_lockres, 1, 1); 1659 ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_rw_lockres, 1, 1);
1664 if (ret) { 1660 if (ret) {
1665 mlog_errno(ret); 1661 mlog_errno(ret);
1666 goto bail; 1662 goto bail;
1667 } 1663 }
1668 1664
1669 /* 1665 /*
1670 * We don't want to use DLM_LKF_LOCAL on a meta data lock as they 1666 * We don't want to use DLM_LKF_LOCAL on a meta data lock as they
1671 * don't use a generation in their lock names. 1667 * don't use a generation in their lock names.
1672 */ 1668 */
1673 ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_inode_lockres, 1, 0); 1669 ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_inode_lockres, 1, 0);
1674 if (ret) { 1670 if (ret) {
1675 mlog_errno(ret); 1671 mlog_errno(ret);
1676 goto bail; 1672 goto bail;
1677 } 1673 }
1678 1674
1679 ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_open_lockres, 0, 0); 1675 ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_open_lockres, 0, 0);
1680 if (ret) { 1676 if (ret) {
1681 mlog_errno(ret); 1677 mlog_errno(ret);
1682 goto bail; 1678 goto bail;
1683 } 1679 }
1684 1680
1685 bail: 1681 bail:
1686 mlog_exit(ret); 1682 mlog_exit(ret);
1687 return ret; 1683 return ret;
1688 } 1684 }
1689 1685
1690 int ocfs2_rw_lock(struct inode *inode, int write) 1686 int ocfs2_rw_lock(struct inode *inode, int write)
1691 { 1687 {
1692 int status, level; 1688 int status, level;
1693 struct ocfs2_lock_res *lockres; 1689 struct ocfs2_lock_res *lockres;
1694 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 1690 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1695 1691
1696 BUG_ON(!inode); 1692 BUG_ON(!inode);
1697 1693
1698 mlog_entry_void(); 1694 mlog_entry_void();
1699 1695
1700 mlog(0, "inode %llu take %s RW lock\n", 1696 mlog(0, "inode %llu take %s RW lock\n",
1701 (unsigned long long)OCFS2_I(inode)->ip_blkno, 1697 (unsigned long long)OCFS2_I(inode)->ip_blkno,
1702 write ? "EXMODE" : "PRMODE"); 1698 write ? "EXMODE" : "PRMODE");
1703 1699
1704 if (ocfs2_mount_local(osb)) { 1700 if (ocfs2_mount_local(osb)) {
1705 mlog_exit(0); 1701 mlog_exit(0);
1706 return 0; 1702 return 0;
1707 } 1703 }
1708 1704
1709 lockres = &OCFS2_I(inode)->ip_rw_lockres; 1705 lockres = &OCFS2_I(inode)->ip_rw_lockres;
1710 1706
1711 level = write ? DLM_LOCK_EX : DLM_LOCK_PR; 1707 level = write ? DLM_LOCK_EX : DLM_LOCK_PR;
1712 1708
1713 status = ocfs2_cluster_lock(OCFS2_SB(inode->i_sb), lockres, level, 0, 1709 status = ocfs2_cluster_lock(OCFS2_SB(inode->i_sb), lockres, level, 0,
1714 0); 1710 0);
1715 if (status < 0) 1711 if (status < 0)
1716 mlog_errno(status); 1712 mlog_errno(status);
1717 1713
1718 mlog_exit(status); 1714 mlog_exit(status);
1719 return status; 1715 return status;
1720 } 1716 }
1721 1717
1722 void ocfs2_rw_unlock(struct inode *inode, int write) 1718 void ocfs2_rw_unlock(struct inode *inode, int write)
1723 { 1719 {
1724 int level = write ? DLM_LOCK_EX : DLM_LOCK_PR; 1720 int level = write ? DLM_LOCK_EX : DLM_LOCK_PR;
1725 struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_rw_lockres; 1721 struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_rw_lockres;
1726 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 1722 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1727 1723
1728 mlog_entry_void(); 1724 mlog_entry_void();
1729 1725
1730 mlog(0, "inode %llu drop %s RW lock\n", 1726 mlog(0, "inode %llu drop %s RW lock\n",
1731 (unsigned long long)OCFS2_I(inode)->ip_blkno, 1727 (unsigned long long)OCFS2_I(inode)->ip_blkno,
1732 write ? "EXMODE" : "PRMODE"); 1728 write ? "EXMODE" : "PRMODE");
1733 1729
1734 if (!ocfs2_mount_local(osb)) 1730 if (!ocfs2_mount_local(osb))
1735 ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres, level); 1731 ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres, level);
1736 1732
1737 mlog_exit_void(); 1733 mlog_exit_void();
1738 } 1734 }
1739 1735
1740 /* 1736 /*
1741 * ocfs2_open_lock always get PR mode lock. 1737 * ocfs2_open_lock always get PR mode lock.
1742 */ 1738 */
1743 int ocfs2_open_lock(struct inode *inode) 1739 int ocfs2_open_lock(struct inode *inode)
1744 { 1740 {
1745 int status = 0; 1741 int status = 0;
1746 struct ocfs2_lock_res *lockres; 1742 struct ocfs2_lock_res *lockres;
1747 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 1743 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1748 1744
1749 BUG_ON(!inode); 1745 BUG_ON(!inode);
1750 1746
1751 mlog_entry_void(); 1747 mlog_entry_void();
1752 1748
1753 mlog(0, "inode %llu take PRMODE open lock\n", 1749 mlog(0, "inode %llu take PRMODE open lock\n",
1754 (unsigned long long)OCFS2_I(inode)->ip_blkno); 1750 (unsigned long long)OCFS2_I(inode)->ip_blkno);
1755 1751
1756 if (ocfs2_mount_local(osb)) 1752 if (ocfs2_mount_local(osb))
1757 goto out; 1753 goto out;
1758 1754
1759 lockres = &OCFS2_I(inode)->ip_open_lockres; 1755 lockres = &OCFS2_I(inode)->ip_open_lockres;
1760 1756
1761 status = ocfs2_cluster_lock(OCFS2_SB(inode->i_sb), lockres, 1757 status = ocfs2_cluster_lock(OCFS2_SB(inode->i_sb), lockres,
1762 DLM_LOCK_PR, 0, 0); 1758 DLM_LOCK_PR, 0, 0);
1763 if (status < 0) 1759 if (status < 0)
1764 mlog_errno(status); 1760 mlog_errno(status);
1765 1761
1766 out: 1762 out:
1767 mlog_exit(status); 1763 mlog_exit(status);
1768 return status; 1764 return status;
1769 } 1765 }
1770 1766
1771 int ocfs2_try_open_lock(struct inode *inode, int write) 1767 int ocfs2_try_open_lock(struct inode *inode, int write)
1772 { 1768 {
1773 int status = 0, level; 1769 int status = 0, level;
1774 struct ocfs2_lock_res *lockres; 1770 struct ocfs2_lock_res *lockres;
1775 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 1771 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1776 1772
1777 BUG_ON(!inode); 1773 BUG_ON(!inode);
1778 1774
1779 mlog_entry_void(); 1775 mlog_entry_void();
1780 1776
1781 mlog(0, "inode %llu try to take %s open lock\n", 1777 mlog(0, "inode %llu try to take %s open lock\n",
1782 (unsigned long long)OCFS2_I(inode)->ip_blkno, 1778 (unsigned long long)OCFS2_I(inode)->ip_blkno,
1783 write ? "EXMODE" : "PRMODE"); 1779 write ? "EXMODE" : "PRMODE");
1784 1780
1785 if (ocfs2_mount_local(osb)) 1781 if (ocfs2_mount_local(osb))
1786 goto out; 1782 goto out;
1787 1783
1788 lockres = &OCFS2_I(inode)->ip_open_lockres; 1784 lockres = &OCFS2_I(inode)->ip_open_lockres;
1789 1785
1790 level = write ? DLM_LOCK_EX : DLM_LOCK_PR; 1786 level = write ? DLM_LOCK_EX : DLM_LOCK_PR;
1791 1787
1792 /* 1788 /*
1793 * The file system may already holding a PRMODE/EXMODE open lock. 1789 * The file system may already holding a PRMODE/EXMODE open lock.
1794 * Since we pass DLM_LKF_NOQUEUE, the request won't block waiting on 1790 * Since we pass DLM_LKF_NOQUEUE, the request won't block waiting on
1795 * other nodes and the -EAGAIN will indicate to the caller that 1791 * other nodes and the -EAGAIN will indicate to the caller that
1796 * this inode is still in use. 1792 * this inode is still in use.
1797 */ 1793 */
1798 status = ocfs2_cluster_lock(OCFS2_SB(inode->i_sb), lockres, 1794 status = ocfs2_cluster_lock(OCFS2_SB(inode->i_sb), lockres,
1799 level, DLM_LKF_NOQUEUE, 0); 1795 level, DLM_LKF_NOQUEUE, 0);
1800 1796
1801 out: 1797 out:
1802 mlog_exit(status); 1798 mlog_exit(status);
1803 return status; 1799 return status;
1804 } 1800 }
1805 1801
1806 /* 1802 /*
1807 * ocfs2_open_unlock unlock PR and EX mode open locks. 1803 * ocfs2_open_unlock unlock PR and EX mode open locks.
1808 */ 1804 */
1809 void ocfs2_open_unlock(struct inode *inode) 1805 void ocfs2_open_unlock(struct inode *inode)
1810 { 1806 {
1811 struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_open_lockres; 1807 struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_open_lockres;
1812 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 1808 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1813 1809
1814 mlog_entry_void(); 1810 mlog_entry_void();
1815 1811
1816 mlog(0, "inode %llu drop open lock\n", 1812 mlog(0, "inode %llu drop open lock\n",
1817 (unsigned long long)OCFS2_I(inode)->ip_blkno); 1813 (unsigned long long)OCFS2_I(inode)->ip_blkno);
1818 1814
1819 if (ocfs2_mount_local(osb)) 1815 if (ocfs2_mount_local(osb))
1820 goto out; 1816 goto out;
1821 1817
1822 if(lockres->l_ro_holders) 1818 if(lockres->l_ro_holders)
1823 ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres, 1819 ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres,
1824 DLM_LOCK_PR); 1820 DLM_LOCK_PR);
1825 if(lockres->l_ex_holders) 1821 if(lockres->l_ex_holders)
1826 ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres, 1822 ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres,
1827 DLM_LOCK_EX); 1823 DLM_LOCK_EX);
1828 1824
1829 out: 1825 out:
1830 mlog_exit_void(); 1826 mlog_exit_void();
1831 } 1827 }
1832 1828
1833 static int ocfs2_flock_handle_signal(struct ocfs2_lock_res *lockres, 1829 static int ocfs2_flock_handle_signal(struct ocfs2_lock_res *lockres,
1834 int level) 1830 int level)
1835 { 1831 {
1836 int ret; 1832 int ret;
1837 struct ocfs2_super *osb = ocfs2_get_lockres_osb(lockres); 1833 struct ocfs2_super *osb = ocfs2_get_lockres_osb(lockres);
1838 unsigned long flags; 1834 unsigned long flags;
1839 struct ocfs2_mask_waiter mw; 1835 struct ocfs2_mask_waiter mw;
1840 1836
1841 ocfs2_init_mask_waiter(&mw); 1837 ocfs2_init_mask_waiter(&mw);
1842 1838
1843 retry_cancel: 1839 retry_cancel:
1844 spin_lock_irqsave(&lockres->l_lock, flags); 1840 spin_lock_irqsave(&lockres->l_lock, flags);
1845 if (lockres->l_flags & OCFS2_LOCK_BUSY) { 1841 if (lockres->l_flags & OCFS2_LOCK_BUSY) {
1846 ret = ocfs2_prepare_cancel_convert(osb, lockres); 1842 ret = ocfs2_prepare_cancel_convert(osb, lockres);
1847 if (ret) { 1843 if (ret) {
1848 spin_unlock_irqrestore(&lockres->l_lock, flags); 1844 spin_unlock_irqrestore(&lockres->l_lock, flags);
1849 ret = ocfs2_cancel_convert(osb, lockres); 1845 ret = ocfs2_cancel_convert(osb, lockres);
1850 if (ret < 0) { 1846 if (ret < 0) {
1851 mlog_errno(ret); 1847 mlog_errno(ret);
1852 goto out; 1848 goto out;
1853 } 1849 }
1854 goto retry_cancel; 1850 goto retry_cancel;
1855 } 1851 }
1856 lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0); 1852 lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0);
1857 spin_unlock_irqrestore(&lockres->l_lock, flags); 1853 spin_unlock_irqrestore(&lockres->l_lock, flags);
1858 1854
1859 ocfs2_wait_for_mask(&mw); 1855 ocfs2_wait_for_mask(&mw);
1860 goto retry_cancel; 1856 goto retry_cancel;
1861 } 1857 }
1862 1858
1863 ret = -ERESTARTSYS; 1859 ret = -ERESTARTSYS;
1864 /* 1860 /*
1865 * We may still have gotten the lock, in which case there's no 1861 * We may still have gotten the lock, in which case there's no
1866 * point to restarting the syscall. 1862 * point to restarting the syscall.
1867 */ 1863 */
1868 if (lockres->l_level == level) 1864 if (lockres->l_level == level)
1869 ret = 0; 1865 ret = 0;
1870 1866
1871 mlog(0, "Cancel returning %d. flags: 0x%lx, level: %d, act: %d\n", ret, 1867 mlog(0, "Cancel returning %d. flags: 0x%lx, level: %d, act: %d\n", ret,
1872 lockres->l_flags, lockres->l_level, lockres->l_action); 1868 lockres->l_flags, lockres->l_level, lockres->l_action);
1873 1869
1874 spin_unlock_irqrestore(&lockres->l_lock, flags); 1870 spin_unlock_irqrestore(&lockres->l_lock, flags);
1875 1871
1876 out: 1872 out:
1877 return ret; 1873 return ret;
1878 } 1874 }
1879 1875
1880 /* 1876 /*
1881 * ocfs2_file_lock() and ocfs2_file_unlock() map to a single pair of 1877 * ocfs2_file_lock() and ocfs2_file_unlock() map to a single pair of
1882 * flock() calls. The locking approach this requires is sufficiently 1878 * flock() calls. The locking approach this requires is sufficiently
1883 * different from all other cluster lock types that we implement a 1879 * different from all other cluster lock types that we implement a
1884 * separate path to the "low-level" dlm calls. In particular: 1880 * separate path to the "low-level" dlm calls. In particular:
1885 * 1881 *
1886 * - No optimization of lock levels is done - we take at exactly 1882 * - No optimization of lock levels is done - we take at exactly
1887 * what's been requested. 1883 * what's been requested.
1888 * 1884 *
1889 * - No lock caching is employed. We immediately downconvert to 1885 * - No lock caching is employed. We immediately downconvert to
1890 * no-lock at unlock time. This also means flock locks never go on 1886 * no-lock at unlock time. This also means flock locks never go on
1891 * the blocking list). 1887 * the blocking list).
1892 * 1888 *
1893 * - Since userspace can trivially deadlock itself with flock, we make 1889 * - Since userspace can trivially deadlock itself with flock, we make
1894 * sure to allow cancellation of a misbehaving applications flock() 1890 * sure to allow cancellation of a misbehaving applications flock()
1895 * request. 1891 * request.
1896 * 1892 *
1897 * - Access to any flock lockres doesn't require concurrency, so we 1893 * - Access to any flock lockres doesn't require concurrency, so we
1898 * can simplify the code by requiring the caller to guarantee 1894 * can simplify the code by requiring the caller to guarantee
1899 * serialization of dlmglue flock calls. 1895 * serialization of dlmglue flock calls.
1900 */ 1896 */
1901 int ocfs2_file_lock(struct file *file, int ex, int trylock) 1897 int ocfs2_file_lock(struct file *file, int ex, int trylock)
1902 { 1898 {
1903 int ret, level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; 1899 int ret, level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
1904 unsigned int lkm_flags = trylock ? DLM_LKF_NOQUEUE : 0; 1900 unsigned int lkm_flags = trylock ? DLM_LKF_NOQUEUE : 0;
1905 unsigned long flags; 1901 unsigned long flags;
1906 struct ocfs2_file_private *fp = file->private_data; 1902 struct ocfs2_file_private *fp = file->private_data;
1907 struct ocfs2_lock_res *lockres = &fp->fp_flock; 1903 struct ocfs2_lock_res *lockres = &fp->fp_flock;
1908 struct ocfs2_super *osb = OCFS2_SB(file->f_mapping->host->i_sb); 1904 struct ocfs2_super *osb = OCFS2_SB(file->f_mapping->host->i_sb);
1909 struct ocfs2_mask_waiter mw; 1905 struct ocfs2_mask_waiter mw;
1910 1906
1911 ocfs2_init_mask_waiter(&mw); 1907 ocfs2_init_mask_waiter(&mw);
1912 1908
1913 if ((lockres->l_flags & OCFS2_LOCK_BUSY) || 1909 if ((lockres->l_flags & OCFS2_LOCK_BUSY) ||
1914 (lockres->l_level > DLM_LOCK_NL)) { 1910 (lockres->l_level > DLM_LOCK_NL)) {
1915 mlog(ML_ERROR, 1911 mlog(ML_ERROR,
1916 "File lock \"%s\" has busy or locked state: flags: 0x%lx, " 1912 "File lock \"%s\" has busy or locked state: flags: 0x%lx, "
1917 "level: %u\n", lockres->l_name, lockres->l_flags, 1913 "level: %u\n", lockres->l_name, lockres->l_flags,
1918 lockres->l_level); 1914 lockres->l_level);
1919 return -EINVAL; 1915 return -EINVAL;
1920 } 1916 }
1921 1917
1922 spin_lock_irqsave(&lockres->l_lock, flags); 1918 spin_lock_irqsave(&lockres->l_lock, flags);
1923 if (!(lockres->l_flags & OCFS2_LOCK_ATTACHED)) { 1919 if (!(lockres->l_flags & OCFS2_LOCK_ATTACHED)) {
1924 lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0); 1920 lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0);
1925 spin_unlock_irqrestore(&lockres->l_lock, flags); 1921 spin_unlock_irqrestore(&lockres->l_lock, flags);
1926 1922
1927 /* 1923 /*
1928 * Get the lock at NLMODE to start - that way we 1924 * Get the lock at NLMODE to start - that way we
1929 * can cancel the upconvert request if need be. 1925 * can cancel the upconvert request if need be.
1930 */ 1926 */
1931 ret = ocfs2_lock_create(osb, lockres, DLM_LOCK_NL, 0); 1927 ret = ocfs2_lock_create(osb, lockres, DLM_LOCK_NL, 0);
1932 if (ret < 0) { 1928 if (ret < 0) {
1933 mlog_errno(ret); 1929 mlog_errno(ret);
1934 goto out; 1930 goto out;
1935 } 1931 }
1936 1932
1937 ret = ocfs2_wait_for_mask(&mw); 1933 ret = ocfs2_wait_for_mask(&mw);
1938 if (ret) { 1934 if (ret) {
1939 mlog_errno(ret); 1935 mlog_errno(ret);
1940 goto out; 1936 goto out;
1941 } 1937 }
1942 spin_lock_irqsave(&lockres->l_lock, flags); 1938 spin_lock_irqsave(&lockres->l_lock, flags);
1943 } 1939 }
1944 1940
1945 lockres->l_action = OCFS2_AST_CONVERT; 1941 lockres->l_action = OCFS2_AST_CONVERT;
1946 lkm_flags |= DLM_LKF_CONVERT; 1942 lkm_flags |= DLM_LKF_CONVERT;
1947 lockres->l_requested = level; 1943 lockres->l_requested = level;
1948 lockres_or_flags(lockres, OCFS2_LOCK_BUSY); 1944 lockres_or_flags(lockres, OCFS2_LOCK_BUSY);
1949 1945
1950 lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0); 1946 lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0);
1951 spin_unlock_irqrestore(&lockres->l_lock, flags); 1947 spin_unlock_irqrestore(&lockres->l_lock, flags);
1952 1948
1953 ret = ocfs2_dlm_lock(osb->cconn, level, &lockres->l_lksb, lkm_flags, 1949 ret = ocfs2_dlm_lock(osb->cconn, level, &lockres->l_lksb, lkm_flags,
1954 lockres->l_name, OCFS2_LOCK_ID_MAX_LEN - 1); 1950 lockres->l_name, OCFS2_LOCK_ID_MAX_LEN - 1);
1955 if (ret) { 1951 if (ret) {
1956 if (!trylock || (ret != -EAGAIN)) { 1952 if (!trylock || (ret != -EAGAIN)) {
1957 ocfs2_log_dlm_error("ocfs2_dlm_lock", ret, lockres); 1953 ocfs2_log_dlm_error("ocfs2_dlm_lock", ret, lockres);
1958 ret = -EINVAL; 1954 ret = -EINVAL;
1959 } 1955 }
1960 1956
1961 ocfs2_recover_from_dlm_error(lockres, 1); 1957 ocfs2_recover_from_dlm_error(lockres, 1);
1962 lockres_remove_mask_waiter(lockres, &mw); 1958 lockres_remove_mask_waiter(lockres, &mw);
1963 goto out; 1959 goto out;
1964 } 1960 }
1965 1961
1966 ret = ocfs2_wait_for_mask_interruptible(&mw, lockres); 1962 ret = ocfs2_wait_for_mask_interruptible(&mw, lockres);
1967 if (ret == -ERESTARTSYS) { 1963 if (ret == -ERESTARTSYS) {
1968 /* 1964 /*
1969 * Userspace can cause deadlock itself with 1965 * Userspace can cause deadlock itself with
1970 * flock(). Current behavior locally is to allow the 1966 * flock(). Current behavior locally is to allow the
1971 * deadlock, but abort the system call if a signal is 1967 * deadlock, but abort the system call if a signal is
1972 * received. We follow this example, otherwise a 1968 * received. We follow this example, otherwise a
1973 * poorly written program could sit in kernel until 1969 * poorly written program could sit in kernel until
1974 * reboot. 1970 * reboot.
1975 * 1971 *
1976 * Handling this is a bit more complicated for Ocfs2 1972 * Handling this is a bit more complicated for Ocfs2
1977 * though. We can't exit this function with an 1973 * though. We can't exit this function with an
1978 * outstanding lock request, so a cancel convert is 1974 * outstanding lock request, so a cancel convert is
1979 * required. We intentionally overwrite 'ret' - if the 1975 * required. We intentionally overwrite 'ret' - if the
1980 * cancel fails and the lock was granted, it's easier 1976 * cancel fails and the lock was granted, it's easier
1981 * to just bubble success back up to the user. 1977 * to just bubble success back up to the user.
1982 */ 1978 */
1983 ret = ocfs2_flock_handle_signal(lockres, level); 1979 ret = ocfs2_flock_handle_signal(lockres, level);
1984 } else if (!ret && (level > lockres->l_level)) { 1980 } else if (!ret && (level > lockres->l_level)) {
1985 /* Trylock failed asynchronously */ 1981 /* Trylock failed asynchronously */
1986 BUG_ON(!trylock); 1982 BUG_ON(!trylock);
1987 ret = -EAGAIN; 1983 ret = -EAGAIN;
1988 } 1984 }
1989 1985
1990 out: 1986 out:
1991 1987
1992 mlog(0, "Lock: \"%s\" ex: %d, trylock: %d, returns: %d\n", 1988 mlog(0, "Lock: \"%s\" ex: %d, trylock: %d, returns: %d\n",
1993 lockres->l_name, ex, trylock, ret); 1989 lockres->l_name, ex, trylock, ret);
1994 return ret; 1990 return ret;
1995 } 1991 }
1996 1992
1997 void ocfs2_file_unlock(struct file *file) 1993 void ocfs2_file_unlock(struct file *file)
1998 { 1994 {
1999 int ret; 1995 int ret;
2000 unsigned int gen; 1996 unsigned int gen;
2001 unsigned long flags; 1997 unsigned long flags;
2002 struct ocfs2_file_private *fp = file->private_data; 1998 struct ocfs2_file_private *fp = file->private_data;
2003 struct ocfs2_lock_res *lockres = &fp->fp_flock; 1999 struct ocfs2_lock_res *lockres = &fp->fp_flock;
2004 struct ocfs2_super *osb = OCFS2_SB(file->f_mapping->host->i_sb); 2000 struct ocfs2_super *osb = OCFS2_SB(file->f_mapping->host->i_sb);
2005 struct ocfs2_mask_waiter mw; 2001 struct ocfs2_mask_waiter mw;
2006 2002
2007 ocfs2_init_mask_waiter(&mw); 2003 ocfs2_init_mask_waiter(&mw);
2008 2004
2009 if (!(lockres->l_flags & OCFS2_LOCK_ATTACHED)) 2005 if (!(lockres->l_flags & OCFS2_LOCK_ATTACHED))
2010 return; 2006 return;
2011 2007
2012 if (lockres->l_level == DLM_LOCK_NL) 2008 if (lockres->l_level == DLM_LOCK_NL)
2013 return; 2009 return;
2014 2010
2015 mlog(0, "Unlock: \"%s\" flags: 0x%lx, level: %d, act: %d\n", 2011 mlog(0, "Unlock: \"%s\" flags: 0x%lx, level: %d, act: %d\n",
2016 lockres->l_name, lockres->l_flags, lockres->l_level, 2012 lockres->l_name, lockres->l_flags, lockres->l_level,
2017 lockres->l_action); 2013 lockres->l_action);
2018 2014
2019 spin_lock_irqsave(&lockres->l_lock, flags); 2015 spin_lock_irqsave(&lockres->l_lock, flags);
2020 /* 2016 /*
2021 * Fake a blocking ast for the downconvert code. 2017 * Fake a blocking ast for the downconvert code.
2022 */ 2018 */
2023 lockres_or_flags(lockres, OCFS2_LOCK_BLOCKED); 2019 lockres_or_flags(lockres, OCFS2_LOCK_BLOCKED);
2024 lockres->l_blocking = DLM_LOCK_EX; 2020 lockres->l_blocking = DLM_LOCK_EX;
2025 2021
2026 gen = ocfs2_prepare_downconvert(lockres, DLM_LOCK_NL); 2022 gen = ocfs2_prepare_downconvert(lockres, DLM_LOCK_NL);
2027 lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0); 2023 lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0);
2028 spin_unlock_irqrestore(&lockres->l_lock, flags); 2024 spin_unlock_irqrestore(&lockres->l_lock, flags);
2029 2025
2030 ret = ocfs2_downconvert_lock(osb, lockres, DLM_LOCK_NL, 0, gen); 2026 ret = ocfs2_downconvert_lock(osb, lockres, DLM_LOCK_NL, 0, gen);
2031 if (ret) { 2027 if (ret) {
2032 mlog_errno(ret); 2028 mlog_errno(ret);
2033 return; 2029 return;
2034 } 2030 }
2035 2031
2036 ret = ocfs2_wait_for_mask(&mw); 2032 ret = ocfs2_wait_for_mask(&mw);
2037 if (ret) 2033 if (ret)
2038 mlog_errno(ret); 2034 mlog_errno(ret);
2039 } 2035 }
2040 2036
2041 static void ocfs2_downconvert_on_unlock(struct ocfs2_super *osb, 2037 static void ocfs2_downconvert_on_unlock(struct ocfs2_super *osb,
2042 struct ocfs2_lock_res *lockres) 2038 struct ocfs2_lock_res *lockres)
2043 { 2039 {
2044 int kick = 0; 2040 int kick = 0;
2045 2041
2046 mlog_entry_void(); 2042 mlog_entry_void();
2047 2043
2048 /* If we know that another node is waiting on our lock, kick 2044 /* If we know that another node is waiting on our lock, kick
2049 * the downconvert thread * pre-emptively when we reach a release 2045 * the downconvert thread * pre-emptively when we reach a release
2050 * condition. */ 2046 * condition. */
2051 if (lockres->l_flags & OCFS2_LOCK_BLOCKED) { 2047 if (lockres->l_flags & OCFS2_LOCK_BLOCKED) {
2052 switch(lockres->l_blocking) { 2048 switch(lockres->l_blocking) {
2053 case DLM_LOCK_EX: 2049 case DLM_LOCK_EX:
2054 if (!lockres->l_ex_holders && !lockres->l_ro_holders) 2050 if (!lockres->l_ex_holders && !lockres->l_ro_holders)
2055 kick = 1; 2051 kick = 1;
2056 break; 2052 break;
2057 case DLM_LOCK_PR: 2053 case DLM_LOCK_PR:
2058 if (!lockres->l_ex_holders) 2054 if (!lockres->l_ex_holders)
2059 kick = 1; 2055 kick = 1;
2060 break; 2056 break;
2061 default: 2057 default:
2062 BUG(); 2058 BUG();
2063 } 2059 }
2064 } 2060 }
2065 2061
2066 if (kick) 2062 if (kick)
2067 ocfs2_wake_downconvert_thread(osb); 2063 ocfs2_wake_downconvert_thread(osb);
2068 2064
2069 mlog_exit_void(); 2065 mlog_exit_void();
2070 } 2066 }
2071 2067
2072 #define OCFS2_SEC_BITS 34 2068 #define OCFS2_SEC_BITS 34
2073 #define OCFS2_SEC_SHIFT (64 - 34) 2069 #define OCFS2_SEC_SHIFT (64 - 34)
2074 #define OCFS2_NSEC_MASK ((1ULL << OCFS2_SEC_SHIFT) - 1) 2070 #define OCFS2_NSEC_MASK ((1ULL << OCFS2_SEC_SHIFT) - 1)
2075 2071
2076 /* LVB only has room for 64 bits of time here so we pack it for 2072 /* LVB only has room for 64 bits of time here so we pack it for
2077 * now. */ 2073 * now. */
2078 static u64 ocfs2_pack_timespec(struct timespec *spec) 2074 static u64 ocfs2_pack_timespec(struct timespec *spec)
2079 { 2075 {
2080 u64 res; 2076 u64 res;
2081 u64 sec = spec->tv_sec; 2077 u64 sec = spec->tv_sec;
2082 u32 nsec = spec->tv_nsec; 2078 u32 nsec = spec->tv_nsec;
2083 2079
2084 res = (sec << OCFS2_SEC_SHIFT) | (nsec & OCFS2_NSEC_MASK); 2080 res = (sec << OCFS2_SEC_SHIFT) | (nsec & OCFS2_NSEC_MASK);
2085 2081
2086 return res; 2082 return res;
2087 } 2083 }
2088 2084
2089 /* Call this with the lockres locked. I am reasonably sure we don't 2085 /* Call this with the lockres locked. I am reasonably sure we don't
2090 * need ip_lock in this function as anyone who would be changing those 2086 * need ip_lock in this function as anyone who would be changing those
2091 * values is supposed to be blocked in ocfs2_inode_lock right now. */ 2087 * values is supposed to be blocked in ocfs2_inode_lock right now. */
2092 static void __ocfs2_stuff_meta_lvb(struct inode *inode) 2088 static void __ocfs2_stuff_meta_lvb(struct inode *inode)
2093 { 2089 {
2094 struct ocfs2_inode_info *oi = OCFS2_I(inode); 2090 struct ocfs2_inode_info *oi = OCFS2_I(inode);
2095 struct ocfs2_lock_res *lockres = &oi->ip_inode_lockres; 2091 struct ocfs2_lock_res *lockres = &oi->ip_inode_lockres;
2096 struct ocfs2_meta_lvb *lvb; 2092 struct ocfs2_meta_lvb *lvb;
2097 2093
2098 mlog_entry_void(); 2094 mlog_entry_void();
2099 2095
2100 lvb = ocfs2_dlm_lvb(&lockres->l_lksb); 2096 lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
2101 2097
2102 /* 2098 /*
2103 * Invalidate the LVB of a deleted inode - this way other 2099 * Invalidate the LVB of a deleted inode - this way other
2104 * nodes are forced to go to disk and discover the new inode 2100 * nodes are forced to go to disk and discover the new inode
2105 * status. 2101 * status.
2106 */ 2102 */
2107 if (oi->ip_flags & OCFS2_INODE_DELETED) { 2103 if (oi->ip_flags & OCFS2_INODE_DELETED) {
2108 lvb->lvb_version = 0; 2104 lvb->lvb_version = 0;
2109 goto out; 2105 goto out;
2110 } 2106 }
2111 2107
2112 lvb->lvb_version = OCFS2_LVB_VERSION; 2108 lvb->lvb_version = OCFS2_LVB_VERSION;
2113 lvb->lvb_isize = cpu_to_be64(i_size_read(inode)); 2109 lvb->lvb_isize = cpu_to_be64(i_size_read(inode));
2114 lvb->lvb_iclusters = cpu_to_be32(oi->ip_clusters); 2110 lvb->lvb_iclusters = cpu_to_be32(oi->ip_clusters);
2115 lvb->lvb_iuid = cpu_to_be32(inode->i_uid); 2111 lvb->lvb_iuid = cpu_to_be32(inode->i_uid);
2116 lvb->lvb_igid = cpu_to_be32(inode->i_gid); 2112 lvb->lvb_igid = cpu_to_be32(inode->i_gid);
2117 lvb->lvb_imode = cpu_to_be16(inode->i_mode); 2113 lvb->lvb_imode = cpu_to_be16(inode->i_mode);
2118 lvb->lvb_inlink = cpu_to_be16(inode->i_nlink); 2114 lvb->lvb_inlink = cpu_to_be16(inode->i_nlink);
2119 lvb->lvb_iatime_packed = 2115 lvb->lvb_iatime_packed =
2120 cpu_to_be64(ocfs2_pack_timespec(&inode->i_atime)); 2116 cpu_to_be64(ocfs2_pack_timespec(&inode->i_atime));
2121 lvb->lvb_ictime_packed = 2117 lvb->lvb_ictime_packed =
2122 cpu_to_be64(ocfs2_pack_timespec(&inode->i_ctime)); 2118 cpu_to_be64(ocfs2_pack_timespec(&inode->i_ctime));
2123 lvb->lvb_imtime_packed = 2119 lvb->lvb_imtime_packed =
2124 cpu_to_be64(ocfs2_pack_timespec(&inode->i_mtime)); 2120 cpu_to_be64(ocfs2_pack_timespec(&inode->i_mtime));
2125 lvb->lvb_iattr = cpu_to_be32(oi->ip_attr); 2121 lvb->lvb_iattr = cpu_to_be32(oi->ip_attr);
2126 lvb->lvb_idynfeatures = cpu_to_be16(oi->ip_dyn_features); 2122 lvb->lvb_idynfeatures = cpu_to_be16(oi->ip_dyn_features);
2127 lvb->lvb_igeneration = cpu_to_be32(inode->i_generation); 2123 lvb->lvb_igeneration = cpu_to_be32(inode->i_generation);
2128 2124
2129 out: 2125 out:
2130 mlog_meta_lvb(0, lockres); 2126 mlog_meta_lvb(0, lockres);
2131 2127
2132 mlog_exit_void(); 2128 mlog_exit_void();
2133 } 2129 }
2134 2130
2135 static void ocfs2_unpack_timespec(struct timespec *spec, 2131 static void ocfs2_unpack_timespec(struct timespec *spec,
2136 u64 packed_time) 2132 u64 packed_time)
2137 { 2133 {
2138 spec->tv_sec = packed_time >> OCFS2_SEC_SHIFT; 2134 spec->tv_sec = packed_time >> OCFS2_SEC_SHIFT;
2139 spec->tv_nsec = packed_time & OCFS2_NSEC_MASK; 2135 spec->tv_nsec = packed_time & OCFS2_NSEC_MASK;
2140 } 2136 }
2141 2137
2142 static void ocfs2_refresh_inode_from_lvb(struct inode *inode) 2138 static void ocfs2_refresh_inode_from_lvb(struct inode *inode)
2143 { 2139 {
2144 struct ocfs2_inode_info *oi = OCFS2_I(inode); 2140 struct ocfs2_inode_info *oi = OCFS2_I(inode);
2145 struct ocfs2_lock_res *lockres = &oi->ip_inode_lockres; 2141 struct ocfs2_lock_res *lockres = &oi->ip_inode_lockres;
2146 struct ocfs2_meta_lvb *lvb; 2142 struct ocfs2_meta_lvb *lvb;
2147 2143
2148 mlog_entry_void(); 2144 mlog_entry_void();
2149 2145
2150 mlog_meta_lvb(0, lockres); 2146 mlog_meta_lvb(0, lockres);
2151 2147
2152 lvb = ocfs2_dlm_lvb(&lockres->l_lksb); 2148 lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
2153 2149
2154 /* We're safe here without the lockres lock... */ 2150 /* We're safe here without the lockres lock... */
2155 spin_lock(&oi->ip_lock); 2151 spin_lock(&oi->ip_lock);
2156 oi->ip_clusters = be32_to_cpu(lvb->lvb_iclusters); 2152 oi->ip_clusters = be32_to_cpu(lvb->lvb_iclusters);
2157 i_size_write(inode, be64_to_cpu(lvb->lvb_isize)); 2153 i_size_write(inode, be64_to_cpu(lvb->lvb_isize));
2158 2154
2159 oi->ip_attr = be32_to_cpu(lvb->lvb_iattr); 2155 oi->ip_attr = be32_to_cpu(lvb->lvb_iattr);
2160 oi->ip_dyn_features = be16_to_cpu(lvb->lvb_idynfeatures); 2156 oi->ip_dyn_features = be16_to_cpu(lvb->lvb_idynfeatures);
2161 ocfs2_set_inode_flags(inode); 2157 ocfs2_set_inode_flags(inode);
2162 2158
2163 /* fast-symlinks are a special case */ 2159 /* fast-symlinks are a special case */
2164 if (S_ISLNK(inode->i_mode) && !oi->ip_clusters) 2160 if (S_ISLNK(inode->i_mode) && !oi->ip_clusters)
2165 inode->i_blocks = 0; 2161 inode->i_blocks = 0;
2166 else 2162 else
2167 inode->i_blocks = ocfs2_inode_sector_count(inode); 2163 inode->i_blocks = ocfs2_inode_sector_count(inode);
2168 2164
2169 inode->i_uid = be32_to_cpu(lvb->lvb_iuid); 2165 inode->i_uid = be32_to_cpu(lvb->lvb_iuid);
2170 inode->i_gid = be32_to_cpu(lvb->lvb_igid); 2166 inode->i_gid = be32_to_cpu(lvb->lvb_igid);
2171 inode->i_mode = be16_to_cpu(lvb->lvb_imode); 2167 inode->i_mode = be16_to_cpu(lvb->lvb_imode);
2172 inode->i_nlink = be16_to_cpu(lvb->lvb_inlink); 2168 inode->i_nlink = be16_to_cpu(lvb->lvb_inlink);
2173 ocfs2_unpack_timespec(&inode->i_atime, 2169 ocfs2_unpack_timespec(&inode->i_atime,
2174 be64_to_cpu(lvb->lvb_iatime_packed)); 2170 be64_to_cpu(lvb->lvb_iatime_packed));
2175 ocfs2_unpack_timespec(&inode->i_mtime, 2171 ocfs2_unpack_timespec(&inode->i_mtime,
2176 be64_to_cpu(lvb->lvb_imtime_packed)); 2172 be64_to_cpu(lvb->lvb_imtime_packed));
2177 ocfs2_unpack_timespec(&inode->i_ctime, 2173 ocfs2_unpack_timespec(&inode->i_ctime,
2178 be64_to_cpu(lvb->lvb_ictime_packed)); 2174 be64_to_cpu(lvb->lvb_ictime_packed));
2179 spin_unlock(&oi->ip_lock); 2175 spin_unlock(&oi->ip_lock);
2180 2176
2181 mlog_exit_void(); 2177 mlog_exit_void();
2182 } 2178 }
2183 2179
2184 static inline int ocfs2_meta_lvb_is_trustable(struct inode *inode, 2180 static inline int ocfs2_meta_lvb_is_trustable(struct inode *inode,
2185 struct ocfs2_lock_res *lockres) 2181 struct ocfs2_lock_res *lockres)
2186 { 2182 {
2187 struct ocfs2_meta_lvb *lvb = ocfs2_dlm_lvb(&lockres->l_lksb); 2183 struct ocfs2_meta_lvb *lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
2188 2184
2189 if (ocfs2_dlm_lvb_valid(&lockres->l_lksb) 2185 if (ocfs2_dlm_lvb_valid(&lockres->l_lksb)
2190 && lvb->lvb_version == OCFS2_LVB_VERSION 2186 && lvb->lvb_version == OCFS2_LVB_VERSION
2191 && be32_to_cpu(lvb->lvb_igeneration) == inode->i_generation) 2187 && be32_to_cpu(lvb->lvb_igeneration) == inode->i_generation)
2192 return 1; 2188 return 1;
2193 return 0; 2189 return 0;
2194 } 2190 }
2195 2191
2196 /* Determine whether a lock resource needs to be refreshed, and 2192 /* Determine whether a lock resource needs to be refreshed, and
2197 * arbitrate who gets to refresh it. 2193 * arbitrate who gets to refresh it.
2198 * 2194 *
2199 * 0 means no refresh needed. 2195 * 0 means no refresh needed.
2200 * 2196 *
2201 * > 0 means you need to refresh this and you MUST call 2197 * > 0 means you need to refresh this and you MUST call
2202 * ocfs2_complete_lock_res_refresh afterwards. */ 2198 * ocfs2_complete_lock_res_refresh afterwards. */
2203 static int ocfs2_should_refresh_lock_res(struct ocfs2_lock_res *lockres) 2199 static int ocfs2_should_refresh_lock_res(struct ocfs2_lock_res *lockres)
2204 { 2200 {
2205 unsigned long flags; 2201 unsigned long flags;
2206 int status = 0; 2202 int status = 0;
2207 2203
2208 mlog_entry_void(); 2204 mlog_entry_void();
2209 2205
2210 refresh_check: 2206 refresh_check:
2211 spin_lock_irqsave(&lockres->l_lock, flags); 2207 spin_lock_irqsave(&lockres->l_lock, flags);
2212 if (!(lockres->l_flags & OCFS2_LOCK_NEEDS_REFRESH)) { 2208 if (!(lockres->l_flags & OCFS2_LOCK_NEEDS_REFRESH)) {
2213 spin_unlock_irqrestore(&lockres->l_lock, flags); 2209 spin_unlock_irqrestore(&lockres->l_lock, flags);
2214 goto bail; 2210 goto bail;
2215 } 2211 }
2216 2212
2217 if (lockres->l_flags & OCFS2_LOCK_REFRESHING) { 2213 if (lockres->l_flags & OCFS2_LOCK_REFRESHING) {
2218 spin_unlock_irqrestore(&lockres->l_lock, flags); 2214 spin_unlock_irqrestore(&lockres->l_lock, flags);
2219 2215
2220 ocfs2_wait_on_refreshing_lock(lockres); 2216 ocfs2_wait_on_refreshing_lock(lockres);
2221 goto refresh_check; 2217 goto refresh_check;
2222 } 2218 }
2223 2219
2224 /* Ok, I'll be the one to refresh this lock. */ 2220 /* Ok, I'll be the one to refresh this lock. */
2225 lockres_or_flags(lockres, OCFS2_LOCK_REFRESHING); 2221 lockres_or_flags(lockres, OCFS2_LOCK_REFRESHING);
2226 spin_unlock_irqrestore(&lockres->l_lock, flags); 2222 spin_unlock_irqrestore(&lockres->l_lock, flags);
2227 2223
2228 status = 1; 2224 status = 1;
2229 bail: 2225 bail:
2230 mlog_exit(status); 2226 mlog_exit(status);
2231 return status; 2227 return status;
2232 } 2228 }
2233 2229
2234 /* If status is non zero, I'll mark it as not being in refresh 2230 /* If status is non zero, I'll mark it as not being in refresh
2235 * anymroe, but i won't clear the needs refresh flag. */ 2231 * anymroe, but i won't clear the needs refresh flag. */
2236 static inline void ocfs2_complete_lock_res_refresh(struct ocfs2_lock_res *lockres, 2232 static inline void ocfs2_complete_lock_res_refresh(struct ocfs2_lock_res *lockres,
2237 int status) 2233 int status)
2238 { 2234 {
2239 unsigned long flags; 2235 unsigned long flags;
2240 mlog_entry_void(); 2236 mlog_entry_void();
2241 2237
2242 spin_lock_irqsave(&lockres->l_lock, flags); 2238 spin_lock_irqsave(&lockres->l_lock, flags);
2243 lockres_clear_flags(lockres, OCFS2_LOCK_REFRESHING); 2239 lockres_clear_flags(lockres, OCFS2_LOCK_REFRESHING);
2244 if (!status) 2240 if (!status)
2245 lockres_clear_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH); 2241 lockres_clear_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH);
2246 spin_unlock_irqrestore(&lockres->l_lock, flags); 2242 spin_unlock_irqrestore(&lockres->l_lock, flags);
2247 2243
2248 wake_up(&lockres->l_event); 2244 wake_up(&lockres->l_event);
2249 2245
2250 mlog_exit_void(); 2246 mlog_exit_void();
2251 } 2247 }
2252 2248
2253 /* may or may not return a bh if it went to disk. */ 2249 /* may or may not return a bh if it went to disk. */
2254 static int ocfs2_inode_lock_update(struct inode *inode, 2250 static int ocfs2_inode_lock_update(struct inode *inode,
2255 struct buffer_head **bh) 2251 struct buffer_head **bh)
2256 { 2252 {
2257 int status = 0; 2253 int status = 0;
2258 struct ocfs2_inode_info *oi = OCFS2_I(inode); 2254 struct ocfs2_inode_info *oi = OCFS2_I(inode);
2259 struct ocfs2_lock_res *lockres = &oi->ip_inode_lockres; 2255 struct ocfs2_lock_res *lockres = &oi->ip_inode_lockres;
2260 struct ocfs2_dinode *fe; 2256 struct ocfs2_dinode *fe;
2261 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 2257 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
2262 2258
2263 mlog_entry_void(); 2259 mlog_entry_void();
2264 2260
2265 if (ocfs2_mount_local(osb)) 2261 if (ocfs2_mount_local(osb))
2266 goto bail; 2262 goto bail;
2267 2263
2268 spin_lock(&oi->ip_lock); 2264 spin_lock(&oi->ip_lock);
2269 if (oi->ip_flags & OCFS2_INODE_DELETED) { 2265 if (oi->ip_flags & OCFS2_INODE_DELETED) {
2270 mlog(0, "Orphaned inode %llu was deleted while we " 2266 mlog(0, "Orphaned inode %llu was deleted while we "
2271 "were waiting on a lock. ip_flags = 0x%x\n", 2267 "were waiting on a lock. ip_flags = 0x%x\n",
2272 (unsigned long long)oi->ip_blkno, oi->ip_flags); 2268 (unsigned long long)oi->ip_blkno, oi->ip_flags);
2273 spin_unlock(&oi->ip_lock); 2269 spin_unlock(&oi->ip_lock);
2274 status = -ENOENT; 2270 status = -ENOENT;
2275 goto bail; 2271 goto bail;
2276 } 2272 }
2277 spin_unlock(&oi->ip_lock); 2273 spin_unlock(&oi->ip_lock);
2278 2274
2279 if (!ocfs2_should_refresh_lock_res(lockres)) 2275 if (!ocfs2_should_refresh_lock_res(lockres))
2280 goto bail; 2276 goto bail;
2281 2277
2282 /* This will discard any caching information we might have had 2278 /* This will discard any caching information we might have had
2283 * for the inode metadata. */ 2279 * for the inode metadata. */
2284 ocfs2_metadata_cache_purge(INODE_CACHE(inode)); 2280 ocfs2_metadata_cache_purge(INODE_CACHE(inode));
2285 2281
2286 ocfs2_extent_map_trunc(inode, 0); 2282 ocfs2_extent_map_trunc(inode, 0);
2287 2283
2288 if (ocfs2_meta_lvb_is_trustable(inode, lockres)) { 2284 if (ocfs2_meta_lvb_is_trustable(inode, lockres)) {
2289 mlog(0, "Trusting LVB on inode %llu\n", 2285 mlog(0, "Trusting LVB on inode %llu\n",
2290 (unsigned long long)oi->ip_blkno); 2286 (unsigned long long)oi->ip_blkno);
2291 ocfs2_refresh_inode_from_lvb(inode); 2287 ocfs2_refresh_inode_from_lvb(inode);
2292 } else { 2288 } else {
2293 /* Boo, we have to go to disk. */ 2289 /* Boo, we have to go to disk. */
2294 /* read bh, cast, ocfs2_refresh_inode */ 2290 /* read bh, cast, ocfs2_refresh_inode */
2295 status = ocfs2_read_inode_block(inode, bh); 2291 status = ocfs2_read_inode_block(inode, bh);
2296 if (status < 0) { 2292 if (status < 0) {
2297 mlog_errno(status); 2293 mlog_errno(status);
2298 goto bail_refresh; 2294 goto bail_refresh;
2299 } 2295 }
2300 fe = (struct ocfs2_dinode *) (*bh)->b_data; 2296 fe = (struct ocfs2_dinode *) (*bh)->b_data;
2301 2297
2302 /* This is a good chance to make sure we're not 2298 /* This is a good chance to make sure we're not
2303 * locking an invalid object. ocfs2_read_inode_block() 2299 * locking an invalid object. ocfs2_read_inode_block()
2304 * already checked that the inode block is sane. 2300 * already checked that the inode block is sane.
2305 * 2301 *
2306 * We bug on a stale inode here because we checked 2302 * We bug on a stale inode here because we checked
2307 * above whether it was wiped from disk. The wiping 2303 * above whether it was wiped from disk. The wiping
2308 * node provides a guarantee that we receive that 2304 * node provides a guarantee that we receive that
2309 * message and can mark the inode before dropping any 2305 * message and can mark the inode before dropping any
2310 * locks associated with it. */ 2306 * locks associated with it. */
2311 mlog_bug_on_msg(inode->i_generation != 2307 mlog_bug_on_msg(inode->i_generation !=
2312 le32_to_cpu(fe->i_generation), 2308 le32_to_cpu(fe->i_generation),
2313 "Invalid dinode %llu disk generation: %u " 2309 "Invalid dinode %llu disk generation: %u "
2314 "inode->i_generation: %u\n", 2310 "inode->i_generation: %u\n",
2315 (unsigned long long)oi->ip_blkno, 2311 (unsigned long long)oi->ip_blkno,
2316 le32_to_cpu(fe->i_generation), 2312 le32_to_cpu(fe->i_generation),
2317 inode->i_generation); 2313 inode->i_generation);
2318 mlog_bug_on_msg(le64_to_cpu(fe->i_dtime) || 2314 mlog_bug_on_msg(le64_to_cpu(fe->i_dtime) ||
2319 !(fe->i_flags & cpu_to_le32(OCFS2_VALID_FL)), 2315 !(fe->i_flags & cpu_to_le32(OCFS2_VALID_FL)),
2320 "Stale dinode %llu dtime: %llu flags: 0x%x\n", 2316 "Stale dinode %llu dtime: %llu flags: 0x%x\n",
2321 (unsigned long long)oi->ip_blkno, 2317 (unsigned long long)oi->ip_blkno,
2322 (unsigned long long)le64_to_cpu(fe->i_dtime), 2318 (unsigned long long)le64_to_cpu(fe->i_dtime),
2323 le32_to_cpu(fe->i_flags)); 2319 le32_to_cpu(fe->i_flags));
2324 2320
2325 ocfs2_refresh_inode(inode, fe); 2321 ocfs2_refresh_inode(inode, fe);
2326 ocfs2_track_lock_refresh(lockres); 2322 ocfs2_track_lock_refresh(lockres);
2327 } 2323 }
2328 2324
2329 status = 0; 2325 status = 0;
2330 bail_refresh: 2326 bail_refresh:
2331 ocfs2_complete_lock_res_refresh(lockres, status); 2327 ocfs2_complete_lock_res_refresh(lockres, status);
2332 bail: 2328 bail:
2333 mlog_exit(status); 2329 mlog_exit(status);
2334 return status; 2330 return status;
2335 } 2331 }
2336 2332
2337 static int ocfs2_assign_bh(struct inode *inode, 2333 static int ocfs2_assign_bh(struct inode *inode,
2338 struct buffer_head **ret_bh, 2334 struct buffer_head **ret_bh,
2339 struct buffer_head *passed_bh) 2335 struct buffer_head *passed_bh)
2340 { 2336 {
2341 int status; 2337 int status;
2342 2338
2343 if (passed_bh) { 2339 if (passed_bh) {
2344 /* Ok, the update went to disk for us, use the 2340 /* Ok, the update went to disk for us, use the
2345 * returned bh. */ 2341 * returned bh. */
2346 *ret_bh = passed_bh; 2342 *ret_bh = passed_bh;
2347 get_bh(*ret_bh); 2343 get_bh(*ret_bh);
2348 2344
2349 return 0; 2345 return 0;
2350 } 2346 }
2351 2347
2352 status = ocfs2_read_inode_block(inode, ret_bh); 2348 status = ocfs2_read_inode_block(inode, ret_bh);
2353 if (status < 0) 2349 if (status < 0)
2354 mlog_errno(status); 2350 mlog_errno(status);
2355 2351
2356 return status; 2352 return status;
2357 } 2353 }
2358 2354
2359 /* 2355 /*
2360 * returns < 0 error if the callback will never be called, otherwise 2356 * returns < 0 error if the callback will never be called, otherwise
2361 * the result of the lock will be communicated via the callback. 2357 * the result of the lock will be communicated via the callback.
2362 */ 2358 */
2363 int ocfs2_inode_lock_full_nested(struct inode *inode, 2359 int ocfs2_inode_lock_full_nested(struct inode *inode,
2364 struct buffer_head **ret_bh, 2360 struct buffer_head **ret_bh,
2365 int ex, 2361 int ex,
2366 int arg_flags, 2362 int arg_flags,
2367 int subclass) 2363 int subclass)
2368 { 2364 {
2369 int status, level, acquired; 2365 int status, level, acquired;
2370 u32 dlm_flags; 2366 u32 dlm_flags;
2371 struct ocfs2_lock_res *lockres = NULL; 2367 struct ocfs2_lock_res *lockres = NULL;
2372 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 2368 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
2373 struct buffer_head *local_bh = NULL; 2369 struct buffer_head *local_bh = NULL;
2374 2370
2375 BUG_ON(!inode); 2371 BUG_ON(!inode);
2376 2372
2377 mlog_entry_void(); 2373 mlog_entry_void();
2378 2374
2379 mlog(0, "inode %llu, take %s META lock\n", 2375 mlog(0, "inode %llu, take %s META lock\n",
2380 (unsigned long long)OCFS2_I(inode)->ip_blkno, 2376 (unsigned long long)OCFS2_I(inode)->ip_blkno,
2381 ex ? "EXMODE" : "PRMODE"); 2377 ex ? "EXMODE" : "PRMODE");
2382 2378
2383 status = 0; 2379 status = 0;
2384 acquired = 0; 2380 acquired = 0;
2385 /* We'll allow faking a readonly metadata lock for 2381 /* We'll allow faking a readonly metadata lock for
2386 * rodevices. */ 2382 * rodevices. */
2387 if (ocfs2_is_hard_readonly(osb)) { 2383 if (ocfs2_is_hard_readonly(osb)) {
2388 if (ex) 2384 if (ex)
2389 status = -EROFS; 2385 status = -EROFS;
2390 goto bail; 2386 goto bail;
2391 } 2387 }
2392 2388
2393 if (ocfs2_mount_local(osb)) 2389 if (ocfs2_mount_local(osb))
2394 goto local; 2390 goto local;
2395 2391
2396 if (!(arg_flags & OCFS2_META_LOCK_RECOVERY)) 2392 if (!(arg_flags & OCFS2_META_LOCK_RECOVERY))
2397 ocfs2_wait_for_recovery(osb); 2393 ocfs2_wait_for_recovery(osb);
2398 2394
2399 lockres = &OCFS2_I(inode)->ip_inode_lockres; 2395 lockres = &OCFS2_I(inode)->ip_inode_lockres;
2400 level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; 2396 level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
2401 dlm_flags = 0; 2397 dlm_flags = 0;
2402 if (arg_flags & OCFS2_META_LOCK_NOQUEUE) 2398 if (arg_flags & OCFS2_META_LOCK_NOQUEUE)
2403 dlm_flags |= DLM_LKF_NOQUEUE; 2399 dlm_flags |= DLM_LKF_NOQUEUE;
2404 2400
2405 status = __ocfs2_cluster_lock(osb, lockres, level, dlm_flags, 2401 status = __ocfs2_cluster_lock(osb, lockres, level, dlm_flags,
2406 arg_flags, subclass, _RET_IP_); 2402 arg_flags, subclass, _RET_IP_);
2407 if (status < 0) { 2403 if (status < 0) {
2408 if (status != -EAGAIN && status != -EIOCBRETRY) 2404 if (status != -EAGAIN && status != -EIOCBRETRY)
2409 mlog_errno(status); 2405 mlog_errno(status);
2410 goto bail; 2406 goto bail;
2411 } 2407 }
2412 2408
2413 /* Notify the error cleanup path to drop the cluster lock. */ 2409 /* Notify the error cleanup path to drop the cluster lock. */
2414 acquired = 1; 2410 acquired = 1;
2415 2411
2416 /* We wait twice because a node may have died while we were in 2412 /* We wait twice because a node may have died while we were in
2417 * the lower dlm layers. The second time though, we've 2413 * the lower dlm layers. The second time though, we've
2418 * committed to owning this lock so we don't allow signals to 2414 * committed to owning this lock so we don't allow signals to
2419 * abort the operation. */ 2415 * abort the operation. */
2420 if (!(arg_flags & OCFS2_META_LOCK_RECOVERY)) 2416 if (!(arg_flags & OCFS2_META_LOCK_RECOVERY))
2421 ocfs2_wait_for_recovery(osb); 2417 ocfs2_wait_for_recovery(osb);
2422 2418
2423 local: 2419 local:
2424 /* 2420 /*
2425 * We only see this flag if we're being called from 2421 * We only see this flag if we're being called from
2426 * ocfs2_read_locked_inode(). It means we're locking an inode 2422 * ocfs2_read_locked_inode(). It means we're locking an inode
2427 * which hasn't been populated yet, so clear the refresh flag 2423 * which hasn't been populated yet, so clear the refresh flag
2428 * and let the caller handle it. 2424 * and let the caller handle it.
2429 */ 2425 */
2430 if (inode->i_state & I_NEW) { 2426 if (inode->i_state & I_NEW) {
2431 status = 0; 2427 status = 0;
2432 if (lockres) 2428 if (lockres)
2433 ocfs2_complete_lock_res_refresh(lockres, 0); 2429 ocfs2_complete_lock_res_refresh(lockres, 0);
2434 goto bail; 2430 goto bail;
2435 } 2431 }
2436 2432
2437 /* This is fun. The caller may want a bh back, or it may 2433 /* This is fun. The caller may want a bh back, or it may
2438 * not. ocfs2_inode_lock_update definitely wants one in, but 2434 * not. ocfs2_inode_lock_update definitely wants one in, but
2439 * may or may not read one, depending on what's in the 2435 * may or may not read one, depending on what's in the
2440 * LVB. The result of all of this is that we've *only* gone to 2436 * LVB. The result of all of this is that we've *only* gone to
2441 * disk if we have to, so the complexity is worthwhile. */ 2437 * disk if we have to, so the complexity is worthwhile. */
2442 status = ocfs2_inode_lock_update(inode, &local_bh); 2438 status = ocfs2_inode_lock_update(inode, &local_bh);
2443 if (status < 0) { 2439 if (status < 0) {
2444 if (status != -ENOENT) 2440 if (status != -ENOENT)
2445 mlog_errno(status); 2441 mlog_errno(status);
2446 goto bail; 2442 goto bail;
2447 } 2443 }
2448 2444
2449 if (ret_bh) { 2445 if (ret_bh) {
2450 status = ocfs2_assign_bh(inode, ret_bh, local_bh); 2446 status = ocfs2_assign_bh(inode, ret_bh, local_bh);
2451 if (status < 0) { 2447 if (status < 0) {
2452 mlog_errno(status); 2448 mlog_errno(status);
2453 goto bail; 2449 goto bail;
2454 } 2450 }
2455 } 2451 }
2456 2452
2457 bail: 2453 bail:
2458 if (status < 0) { 2454 if (status < 0) {
2459 if (ret_bh && (*ret_bh)) { 2455 if (ret_bh && (*ret_bh)) {
2460 brelse(*ret_bh); 2456 brelse(*ret_bh);
2461 *ret_bh = NULL; 2457 *ret_bh = NULL;
2462 } 2458 }
2463 if (acquired) 2459 if (acquired)
2464 ocfs2_inode_unlock(inode, ex); 2460 ocfs2_inode_unlock(inode, ex);
2465 } 2461 }
2466 2462
2467 if (local_bh) 2463 if (local_bh)
2468 brelse(local_bh); 2464 brelse(local_bh);
2469 2465
2470 mlog_exit(status); 2466 mlog_exit(status);
2471 return status; 2467 return status;
2472 } 2468 }
2473 2469
2474 /* 2470 /*
2475 * This is working around a lock inversion between tasks acquiring DLM 2471 * This is working around a lock inversion between tasks acquiring DLM
2476 * locks while holding a page lock and the downconvert thread which 2472 * locks while holding a page lock and the downconvert thread which
2477 * blocks dlm lock acquiry while acquiring page locks. 2473 * blocks dlm lock acquiry while acquiring page locks.
2478 * 2474 *
2479 * ** These _with_page variantes are only intended to be called from aop 2475 * ** These _with_page variantes are only intended to be called from aop
2480 * methods that hold page locks and return a very specific *positive* error 2476 * methods that hold page locks and return a very specific *positive* error
2481 * code that aop methods pass up to the VFS -- test for errors with != 0. ** 2477 * code that aop methods pass up to the VFS -- test for errors with != 0. **
2482 * 2478 *
2483 * The DLM is called such that it returns -EAGAIN if it would have 2479 * The DLM is called such that it returns -EAGAIN if it would have
2484 * blocked waiting for the downconvert thread. In that case we unlock 2480 * blocked waiting for the downconvert thread. In that case we unlock
2485 * our page so the downconvert thread can make progress. Once we've 2481 * our page so the downconvert thread can make progress. Once we've
2486 * done this we have to return AOP_TRUNCATED_PAGE so the aop method 2482 * done this we have to return AOP_TRUNCATED_PAGE so the aop method
2487 * that called us can bubble that back up into the VFS who will then 2483 * that called us can bubble that back up into the VFS who will then
2488 * immediately retry the aop call. 2484 * immediately retry the aop call.
2489 * 2485 *
2490 * We do a blocking lock and immediate unlock before returning, though, so that 2486 * We do a blocking lock and immediate unlock before returning, though, so that
2491 * the lock has a great chance of being cached on this node by the time the VFS 2487 * the lock has a great chance of being cached on this node by the time the VFS
2492 * calls back to retry the aop. This has a potential to livelock as nodes 2488 * calls back to retry the aop. This has a potential to livelock as nodes
2493 * ping locks back and forth, but that's a risk we're willing to take to avoid 2489 * ping locks back and forth, but that's a risk we're willing to take to avoid
2494 * the lock inversion simply. 2490 * the lock inversion simply.
2495 */ 2491 */
2496 int ocfs2_inode_lock_with_page(struct inode *inode, 2492 int ocfs2_inode_lock_with_page(struct inode *inode,
2497 struct buffer_head **ret_bh, 2493 struct buffer_head **ret_bh,
2498 int ex, 2494 int ex,
2499 struct page *page) 2495 struct page *page)
2500 { 2496 {
2501 int ret; 2497 int ret;
2502 2498
2503 ret = ocfs2_inode_lock_full(inode, ret_bh, ex, OCFS2_LOCK_NONBLOCK); 2499 ret = ocfs2_inode_lock_full(inode, ret_bh, ex, OCFS2_LOCK_NONBLOCK);
2504 if (ret == -EAGAIN) { 2500 if (ret == -EAGAIN) {
2505 unlock_page(page); 2501 unlock_page(page);
2506 if (ocfs2_inode_lock(inode, ret_bh, ex) == 0) 2502 if (ocfs2_inode_lock(inode, ret_bh, ex) == 0)
2507 ocfs2_inode_unlock(inode, ex); 2503 ocfs2_inode_unlock(inode, ex);
2508 ret = AOP_TRUNCATED_PAGE; 2504 ret = AOP_TRUNCATED_PAGE;
2509 } 2505 }
2510 2506
2511 return ret; 2507 return ret;
2512 } 2508 }
2513 2509
2514 int ocfs2_inode_lock_atime(struct inode *inode, 2510 int ocfs2_inode_lock_atime(struct inode *inode,
2515 struct vfsmount *vfsmnt, 2511 struct vfsmount *vfsmnt,
2516 int *level) 2512 int *level)
2517 { 2513 {
2518 int ret; 2514 int ret;
2519 2515
2520 mlog_entry_void(); 2516 mlog_entry_void();
2521 ret = ocfs2_inode_lock(inode, NULL, 0); 2517 ret = ocfs2_inode_lock(inode, NULL, 0);
2522 if (ret < 0) { 2518 if (ret < 0) {
2523 mlog_errno(ret); 2519 mlog_errno(ret);
2524 return ret; 2520 return ret;
2525 } 2521 }
2526 2522
2527 /* 2523 /*
2528 * If we should update atime, we will get EX lock, 2524 * If we should update atime, we will get EX lock,
2529 * otherwise we just get PR lock. 2525 * otherwise we just get PR lock.
2530 */ 2526 */
2531 if (ocfs2_should_update_atime(inode, vfsmnt)) { 2527 if (ocfs2_should_update_atime(inode, vfsmnt)) {
2532 struct buffer_head *bh = NULL; 2528 struct buffer_head *bh = NULL;
2533 2529
2534 ocfs2_inode_unlock(inode, 0); 2530 ocfs2_inode_unlock(inode, 0);
2535 ret = ocfs2_inode_lock(inode, &bh, 1); 2531 ret = ocfs2_inode_lock(inode, &bh, 1);
2536 if (ret < 0) { 2532 if (ret < 0) {
2537 mlog_errno(ret); 2533 mlog_errno(ret);
2538 return ret; 2534 return ret;
2539 } 2535 }
2540 *level = 1; 2536 *level = 1;
2541 if (ocfs2_should_update_atime(inode, vfsmnt)) 2537 if (ocfs2_should_update_atime(inode, vfsmnt))
2542 ocfs2_update_inode_atime(inode, bh); 2538 ocfs2_update_inode_atime(inode, bh);
2543 if (bh) 2539 if (bh)
2544 brelse(bh); 2540 brelse(bh);
2545 } else 2541 } else
2546 *level = 0; 2542 *level = 0;
2547 2543
2548 mlog_exit(ret); 2544 mlog_exit(ret);
2549 return ret; 2545 return ret;
2550 } 2546 }
2551 2547
2552 void ocfs2_inode_unlock(struct inode *inode, 2548 void ocfs2_inode_unlock(struct inode *inode,
2553 int ex) 2549 int ex)
2554 { 2550 {
2555 int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; 2551 int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
2556 struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_inode_lockres; 2552 struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_inode_lockres;
2557 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 2553 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
2558 2554
2559 mlog_entry_void(); 2555 mlog_entry_void();
2560 2556
2561 mlog(0, "inode %llu drop %s META lock\n", 2557 mlog(0, "inode %llu drop %s META lock\n",
2562 (unsigned long long)OCFS2_I(inode)->ip_blkno, 2558 (unsigned long long)OCFS2_I(inode)->ip_blkno,
2563 ex ? "EXMODE" : "PRMODE"); 2559 ex ? "EXMODE" : "PRMODE");
2564 2560
2565 if (!ocfs2_is_hard_readonly(OCFS2_SB(inode->i_sb)) && 2561 if (!ocfs2_is_hard_readonly(OCFS2_SB(inode->i_sb)) &&
2566 !ocfs2_mount_local(osb)) 2562 !ocfs2_mount_local(osb))
2567 ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres, level); 2563 ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres, level);
2568 2564
2569 mlog_exit_void(); 2565 mlog_exit_void();
2570 } 2566 }
2571 2567
2572 int ocfs2_orphan_scan_lock(struct ocfs2_super *osb, u32 *seqno) 2568 int ocfs2_orphan_scan_lock(struct ocfs2_super *osb, u32 *seqno)
2573 { 2569 {
2574 struct ocfs2_lock_res *lockres; 2570 struct ocfs2_lock_res *lockres;
2575 struct ocfs2_orphan_scan_lvb *lvb; 2571 struct ocfs2_orphan_scan_lvb *lvb;
2576 int status = 0; 2572 int status = 0;
2577 2573
2578 if (ocfs2_is_hard_readonly(osb)) 2574 if (ocfs2_is_hard_readonly(osb))
2579 return -EROFS; 2575 return -EROFS;
2580 2576
2581 if (ocfs2_mount_local(osb)) 2577 if (ocfs2_mount_local(osb))
2582 return 0; 2578 return 0;
2583 2579
2584 lockres = &osb->osb_orphan_scan.os_lockres; 2580 lockres = &osb->osb_orphan_scan.os_lockres;
2585 status = ocfs2_cluster_lock(osb, lockres, DLM_LOCK_EX, 0, 0); 2581 status = ocfs2_cluster_lock(osb, lockres, DLM_LOCK_EX, 0, 0);
2586 if (status < 0) 2582 if (status < 0)
2587 return status; 2583 return status;
2588 2584
2589 lvb = ocfs2_dlm_lvb(&lockres->l_lksb); 2585 lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
2590 if (ocfs2_dlm_lvb_valid(&lockres->l_lksb) && 2586 if (ocfs2_dlm_lvb_valid(&lockres->l_lksb) &&
2591 lvb->lvb_version == OCFS2_ORPHAN_LVB_VERSION) 2587 lvb->lvb_version == OCFS2_ORPHAN_LVB_VERSION)
2592 *seqno = be32_to_cpu(lvb->lvb_os_seqno); 2588 *seqno = be32_to_cpu(lvb->lvb_os_seqno);
2593 else 2589 else
2594 *seqno = osb->osb_orphan_scan.os_seqno + 1; 2590 *seqno = osb->osb_orphan_scan.os_seqno + 1;
2595 2591
2596 return status; 2592 return status;
2597 } 2593 }
2598 2594
2599 void ocfs2_orphan_scan_unlock(struct ocfs2_super *osb, u32 seqno) 2595 void ocfs2_orphan_scan_unlock(struct ocfs2_super *osb, u32 seqno)
2600 { 2596 {
2601 struct ocfs2_lock_res *lockres; 2597 struct ocfs2_lock_res *lockres;
2602 struct ocfs2_orphan_scan_lvb *lvb; 2598 struct ocfs2_orphan_scan_lvb *lvb;
2603 2599
2604 if (!ocfs2_is_hard_readonly(osb) && !ocfs2_mount_local(osb)) { 2600 if (!ocfs2_is_hard_readonly(osb) && !ocfs2_mount_local(osb)) {
2605 lockres = &osb->osb_orphan_scan.os_lockres; 2601 lockres = &osb->osb_orphan_scan.os_lockres;
2606 lvb = ocfs2_dlm_lvb(&lockres->l_lksb); 2602 lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
2607 lvb->lvb_version = OCFS2_ORPHAN_LVB_VERSION; 2603 lvb->lvb_version = OCFS2_ORPHAN_LVB_VERSION;
2608 lvb->lvb_os_seqno = cpu_to_be32(seqno); 2604 lvb->lvb_os_seqno = cpu_to_be32(seqno);
2609 ocfs2_cluster_unlock(osb, lockres, DLM_LOCK_EX); 2605 ocfs2_cluster_unlock(osb, lockres, DLM_LOCK_EX);
2610 } 2606 }
2611 } 2607 }
2612 2608
2613 int ocfs2_super_lock(struct ocfs2_super *osb, 2609 int ocfs2_super_lock(struct ocfs2_super *osb,
2614 int ex) 2610 int ex)
2615 { 2611 {
2616 int status = 0; 2612 int status = 0;
2617 int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; 2613 int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
2618 struct ocfs2_lock_res *lockres = &osb->osb_super_lockres; 2614 struct ocfs2_lock_res *lockres = &osb->osb_super_lockres;
2619 2615
2620 mlog_entry_void(); 2616 mlog_entry_void();
2621 2617
2622 if (ocfs2_is_hard_readonly(osb)) 2618 if (ocfs2_is_hard_readonly(osb))
2623 return -EROFS; 2619 return -EROFS;
2624 2620
2625 if (ocfs2_mount_local(osb)) 2621 if (ocfs2_mount_local(osb))
2626 goto bail; 2622 goto bail;
2627 2623
2628 status = ocfs2_cluster_lock(osb, lockres, level, 0, 0); 2624 status = ocfs2_cluster_lock(osb, lockres, level, 0, 0);
2629 if (status < 0) { 2625 if (status < 0) {
2630 mlog_errno(status); 2626 mlog_errno(status);
2631 goto bail; 2627 goto bail;
2632 } 2628 }
2633 2629
2634 /* The super block lock path is really in the best position to 2630 /* The super block lock path is really in the best position to
2635 * know when resources covered by the lock need to be 2631 * know when resources covered by the lock need to be
2636 * refreshed, so we do it here. Of course, making sense of 2632 * refreshed, so we do it here. Of course, making sense of
2637 * everything is up to the caller :) */ 2633 * everything is up to the caller :) */
2638 status = ocfs2_should_refresh_lock_res(lockres); 2634 status = ocfs2_should_refresh_lock_res(lockres);
2639 if (status < 0) { 2635 if (status < 0) {
2640 mlog_errno(status); 2636 mlog_errno(status);
2641 goto bail; 2637 goto bail;
2642 } 2638 }
2643 if (status) { 2639 if (status) {
2644 status = ocfs2_refresh_slot_info(osb); 2640 status = ocfs2_refresh_slot_info(osb);
2645 2641
2646 ocfs2_complete_lock_res_refresh(lockres, status); 2642 ocfs2_complete_lock_res_refresh(lockres, status);
2647 2643
2648 if (status < 0) 2644 if (status < 0)
2649 mlog_errno(status); 2645 mlog_errno(status);
2650 ocfs2_track_lock_refresh(lockres); 2646 ocfs2_track_lock_refresh(lockres);
2651 } 2647 }
2652 bail: 2648 bail:
2653 mlog_exit(status); 2649 mlog_exit(status);
2654 return status; 2650 return status;
2655 } 2651 }
2656 2652
2657 void ocfs2_super_unlock(struct ocfs2_super *osb, 2653 void ocfs2_super_unlock(struct ocfs2_super *osb,
2658 int ex) 2654 int ex)
2659 { 2655 {
2660 int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; 2656 int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
2661 struct ocfs2_lock_res *lockres = &osb->osb_super_lockres; 2657 struct ocfs2_lock_res *lockres = &osb->osb_super_lockres;
2662 2658
2663 if (!ocfs2_mount_local(osb)) 2659 if (!ocfs2_mount_local(osb))
2664 ocfs2_cluster_unlock(osb, lockres, level); 2660 ocfs2_cluster_unlock(osb, lockres, level);
2665 } 2661 }
2666 2662
2667 int ocfs2_rename_lock(struct ocfs2_super *osb) 2663 int ocfs2_rename_lock(struct ocfs2_super *osb)
2668 { 2664 {
2669 int status; 2665 int status;
2670 struct ocfs2_lock_res *lockres = &osb->osb_rename_lockres; 2666 struct ocfs2_lock_res *lockres = &osb->osb_rename_lockres;
2671 2667
2672 if (ocfs2_is_hard_readonly(osb)) 2668 if (ocfs2_is_hard_readonly(osb))
2673 return -EROFS; 2669 return -EROFS;
2674 2670
2675 if (ocfs2_mount_local(osb)) 2671 if (ocfs2_mount_local(osb))
2676 return 0; 2672 return 0;
2677 2673
2678 status = ocfs2_cluster_lock(osb, lockres, DLM_LOCK_EX, 0, 0); 2674 status = ocfs2_cluster_lock(osb, lockres, DLM_LOCK_EX, 0, 0);
2679 if (status < 0) 2675 if (status < 0)
2680 mlog_errno(status); 2676 mlog_errno(status);
2681 2677
2682 return status; 2678 return status;
2683 } 2679 }
2684 2680
2685 void ocfs2_rename_unlock(struct ocfs2_super *osb) 2681 void ocfs2_rename_unlock(struct ocfs2_super *osb)
2686 { 2682 {
2687 struct ocfs2_lock_res *lockres = &osb->osb_rename_lockres; 2683 struct ocfs2_lock_res *lockres = &osb->osb_rename_lockres;
2688 2684
2689 if (!ocfs2_mount_local(osb)) 2685 if (!ocfs2_mount_local(osb))
2690 ocfs2_cluster_unlock(osb, lockres, DLM_LOCK_EX); 2686 ocfs2_cluster_unlock(osb, lockres, DLM_LOCK_EX);
2691 } 2687 }
2692 2688
2693 int ocfs2_nfs_sync_lock(struct ocfs2_super *osb, int ex) 2689 int ocfs2_nfs_sync_lock(struct ocfs2_super *osb, int ex)
2694 { 2690 {
2695 int status; 2691 int status;
2696 struct ocfs2_lock_res *lockres = &osb->osb_nfs_sync_lockres; 2692 struct ocfs2_lock_res *lockres = &osb->osb_nfs_sync_lockres;
2697 2693
2698 if (ocfs2_is_hard_readonly(osb)) 2694 if (ocfs2_is_hard_readonly(osb))
2699 return -EROFS; 2695 return -EROFS;
2700 2696
2701 if (ocfs2_mount_local(osb)) 2697 if (ocfs2_mount_local(osb))
2702 return 0; 2698 return 0;
2703 2699
2704 status = ocfs2_cluster_lock(osb, lockres, ex ? LKM_EXMODE : LKM_PRMODE, 2700 status = ocfs2_cluster_lock(osb, lockres, ex ? LKM_EXMODE : LKM_PRMODE,
2705 0, 0); 2701 0, 0);
2706 if (status < 0) 2702 if (status < 0)
2707 mlog(ML_ERROR, "lock on nfs sync lock failed %d\n", status); 2703 mlog(ML_ERROR, "lock on nfs sync lock failed %d\n", status);
2708 2704
2709 return status; 2705 return status;
2710 } 2706 }
2711 2707
2712 void ocfs2_nfs_sync_unlock(struct ocfs2_super *osb, int ex) 2708 void ocfs2_nfs_sync_unlock(struct ocfs2_super *osb, int ex)
2713 { 2709 {
2714 struct ocfs2_lock_res *lockres = &osb->osb_nfs_sync_lockres; 2710 struct ocfs2_lock_res *lockres = &osb->osb_nfs_sync_lockres;
2715 2711
2716 if (!ocfs2_mount_local(osb)) 2712 if (!ocfs2_mount_local(osb))
2717 ocfs2_cluster_unlock(osb, lockres, 2713 ocfs2_cluster_unlock(osb, lockres,
2718 ex ? LKM_EXMODE : LKM_PRMODE); 2714 ex ? LKM_EXMODE : LKM_PRMODE);
2719 } 2715 }
2720 2716
2721 int ocfs2_dentry_lock(struct dentry *dentry, int ex) 2717 int ocfs2_dentry_lock(struct dentry *dentry, int ex)
2722 { 2718 {
2723 int ret; 2719 int ret;
2724 int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; 2720 int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
2725 struct ocfs2_dentry_lock *dl = dentry->d_fsdata; 2721 struct ocfs2_dentry_lock *dl = dentry->d_fsdata;
2726 struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb); 2722 struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb);
2727 2723
2728 BUG_ON(!dl); 2724 BUG_ON(!dl);
2729 2725
2730 if (ocfs2_is_hard_readonly(osb)) 2726 if (ocfs2_is_hard_readonly(osb))
2731 return -EROFS; 2727 return -EROFS;
2732 2728
2733 if (ocfs2_mount_local(osb)) 2729 if (ocfs2_mount_local(osb))
2734 return 0; 2730 return 0;
2735 2731
2736 ret = ocfs2_cluster_lock(osb, &dl->dl_lockres, level, 0, 0); 2732 ret = ocfs2_cluster_lock(osb, &dl->dl_lockres, level, 0, 0);
2737 if (ret < 0) 2733 if (ret < 0)
2738 mlog_errno(ret); 2734 mlog_errno(ret);
2739 2735
2740 return ret; 2736 return ret;
2741 } 2737 }
2742 2738
2743 void ocfs2_dentry_unlock(struct dentry *dentry, int ex) 2739 void ocfs2_dentry_unlock(struct dentry *dentry, int ex)
2744 { 2740 {
2745 int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; 2741 int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
2746 struct ocfs2_dentry_lock *dl = dentry->d_fsdata; 2742 struct ocfs2_dentry_lock *dl = dentry->d_fsdata;
2747 struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb); 2743 struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb);
2748 2744
2749 if (!ocfs2_mount_local(osb)) 2745 if (!ocfs2_mount_local(osb))
2750 ocfs2_cluster_unlock(osb, &dl->dl_lockres, level); 2746 ocfs2_cluster_unlock(osb, &dl->dl_lockres, level);
2751 } 2747 }
2752 2748
2753 /* Reference counting of the dlm debug structure. We want this because 2749 /* Reference counting of the dlm debug structure. We want this because
2754 * open references on the debug inodes can live on after a mount, so 2750 * open references on the debug inodes can live on after a mount, so
2755 * we can't rely on the ocfs2_super to always exist. */ 2751 * we can't rely on the ocfs2_super to always exist. */
2756 static void ocfs2_dlm_debug_free(struct kref *kref) 2752 static void ocfs2_dlm_debug_free(struct kref *kref)
2757 { 2753 {
2758 struct ocfs2_dlm_debug *dlm_debug; 2754 struct ocfs2_dlm_debug *dlm_debug;
2759 2755
2760 dlm_debug = container_of(kref, struct ocfs2_dlm_debug, d_refcnt); 2756 dlm_debug = container_of(kref, struct ocfs2_dlm_debug, d_refcnt);
2761 2757
2762 kfree(dlm_debug); 2758 kfree(dlm_debug);
2763 } 2759 }
2764 2760
2765 void ocfs2_put_dlm_debug(struct ocfs2_dlm_debug *dlm_debug) 2761 void ocfs2_put_dlm_debug(struct ocfs2_dlm_debug *dlm_debug)
2766 { 2762 {
2767 if (dlm_debug) 2763 if (dlm_debug)
2768 kref_put(&dlm_debug->d_refcnt, ocfs2_dlm_debug_free); 2764 kref_put(&dlm_debug->d_refcnt, ocfs2_dlm_debug_free);
2769 } 2765 }
2770 2766
2771 static void ocfs2_get_dlm_debug(struct ocfs2_dlm_debug *debug) 2767 static void ocfs2_get_dlm_debug(struct ocfs2_dlm_debug *debug)
2772 { 2768 {
2773 kref_get(&debug->d_refcnt); 2769 kref_get(&debug->d_refcnt);
2774 } 2770 }
2775 2771
2776 struct ocfs2_dlm_debug *ocfs2_new_dlm_debug(void) 2772 struct ocfs2_dlm_debug *ocfs2_new_dlm_debug(void)
2777 { 2773 {
2778 struct ocfs2_dlm_debug *dlm_debug; 2774 struct ocfs2_dlm_debug *dlm_debug;
2779 2775
2780 dlm_debug = kmalloc(sizeof(struct ocfs2_dlm_debug), GFP_KERNEL); 2776 dlm_debug = kmalloc(sizeof(struct ocfs2_dlm_debug), GFP_KERNEL);
2781 if (!dlm_debug) { 2777 if (!dlm_debug) {
2782 mlog_errno(-ENOMEM); 2778 mlog_errno(-ENOMEM);
2783 goto out; 2779 goto out;
2784 } 2780 }
2785 2781
2786 kref_init(&dlm_debug->d_refcnt); 2782 kref_init(&dlm_debug->d_refcnt);
2787 INIT_LIST_HEAD(&dlm_debug->d_lockres_tracking); 2783 INIT_LIST_HEAD(&dlm_debug->d_lockres_tracking);
2788 dlm_debug->d_locking_state = NULL; 2784 dlm_debug->d_locking_state = NULL;
2789 out: 2785 out:
2790 return dlm_debug; 2786 return dlm_debug;
2791 } 2787 }
2792 2788
2793 /* Access to this is arbitrated for us via seq_file->sem. */ 2789 /* Access to this is arbitrated for us via seq_file->sem. */
2794 struct ocfs2_dlm_seq_priv { 2790 struct ocfs2_dlm_seq_priv {
2795 struct ocfs2_dlm_debug *p_dlm_debug; 2791 struct ocfs2_dlm_debug *p_dlm_debug;
2796 struct ocfs2_lock_res p_iter_res; 2792 struct ocfs2_lock_res p_iter_res;
2797 struct ocfs2_lock_res p_tmp_res; 2793 struct ocfs2_lock_res p_tmp_res;
2798 }; 2794 };
2799 2795
2800 static struct ocfs2_lock_res *ocfs2_dlm_next_res(struct ocfs2_lock_res *start, 2796 static struct ocfs2_lock_res *ocfs2_dlm_next_res(struct ocfs2_lock_res *start,
2801 struct ocfs2_dlm_seq_priv *priv) 2797 struct ocfs2_dlm_seq_priv *priv)
2802 { 2798 {
2803 struct ocfs2_lock_res *iter, *ret = NULL; 2799 struct ocfs2_lock_res *iter, *ret = NULL;
2804 struct ocfs2_dlm_debug *dlm_debug = priv->p_dlm_debug; 2800 struct ocfs2_dlm_debug *dlm_debug = priv->p_dlm_debug;
2805 2801
2806 assert_spin_locked(&ocfs2_dlm_tracking_lock); 2802 assert_spin_locked(&ocfs2_dlm_tracking_lock);
2807 2803
2808 list_for_each_entry(iter, &start->l_debug_list, l_debug_list) { 2804 list_for_each_entry(iter, &start->l_debug_list, l_debug_list) {
2809 /* discover the head of the list */ 2805 /* discover the head of the list */
2810 if (&iter->l_debug_list == &dlm_debug->d_lockres_tracking) { 2806 if (&iter->l_debug_list == &dlm_debug->d_lockres_tracking) {
2811 mlog(0, "End of list found, %p\n", ret); 2807 mlog(0, "End of list found, %p\n", ret);
2812 break; 2808 break;
2813 } 2809 }
2814 2810
2815 /* We track our "dummy" iteration lockres' by a NULL 2811 /* We track our "dummy" iteration lockres' by a NULL
2816 * l_ops field. */ 2812 * l_ops field. */
2817 if (iter->l_ops != NULL) { 2813 if (iter->l_ops != NULL) {
2818 ret = iter; 2814 ret = iter;
2819 break; 2815 break;
2820 } 2816 }
2821 } 2817 }
2822 2818
2823 return ret; 2819 return ret;
2824 } 2820 }
2825 2821
2826 static void *ocfs2_dlm_seq_start(struct seq_file *m, loff_t *pos) 2822 static void *ocfs2_dlm_seq_start(struct seq_file *m, loff_t *pos)
2827 { 2823 {
2828 struct ocfs2_dlm_seq_priv *priv = m->private; 2824 struct ocfs2_dlm_seq_priv *priv = m->private;
2829 struct ocfs2_lock_res *iter; 2825 struct ocfs2_lock_res *iter;
2830 2826
2831 spin_lock(&ocfs2_dlm_tracking_lock); 2827 spin_lock(&ocfs2_dlm_tracking_lock);
2832 iter = ocfs2_dlm_next_res(&priv->p_iter_res, priv); 2828 iter = ocfs2_dlm_next_res(&priv->p_iter_res, priv);
2833 if (iter) { 2829 if (iter) {
2834 /* Since lockres' have the lifetime of their container 2830 /* Since lockres' have the lifetime of their container
2835 * (which can be inodes, ocfs2_supers, etc) we want to 2831 * (which can be inodes, ocfs2_supers, etc) we want to
2836 * copy this out to a temporary lockres while still 2832 * copy this out to a temporary lockres while still
2837 * under the spinlock. Obviously after this we can't 2833 * under the spinlock. Obviously after this we can't
2838 * trust any pointers on the copy returned, but that's 2834 * trust any pointers on the copy returned, but that's
2839 * ok as the information we want isn't typically held 2835 * ok as the information we want isn't typically held
2840 * in them. */ 2836 * in them. */
2841 priv->p_tmp_res = *iter; 2837 priv->p_tmp_res = *iter;
2842 iter = &priv->p_tmp_res; 2838 iter = &priv->p_tmp_res;
2843 } 2839 }
2844 spin_unlock(&ocfs2_dlm_tracking_lock); 2840 spin_unlock(&ocfs2_dlm_tracking_lock);
2845 2841
2846 return iter; 2842 return iter;
2847 } 2843 }
2848 2844
2849 static void ocfs2_dlm_seq_stop(struct seq_file *m, void *v) 2845 static void ocfs2_dlm_seq_stop(struct seq_file *m, void *v)
2850 { 2846 {
2851 } 2847 }
2852 2848
2853 static void *ocfs2_dlm_seq_next(struct seq_file *m, void *v, loff_t *pos) 2849 static void *ocfs2_dlm_seq_next(struct seq_file *m, void *v, loff_t *pos)
2854 { 2850 {
2855 struct ocfs2_dlm_seq_priv *priv = m->private; 2851 struct ocfs2_dlm_seq_priv *priv = m->private;
2856 struct ocfs2_lock_res *iter = v; 2852 struct ocfs2_lock_res *iter = v;
2857 struct ocfs2_lock_res *dummy = &priv->p_iter_res; 2853 struct ocfs2_lock_res *dummy = &priv->p_iter_res;
2858 2854
2859 spin_lock(&ocfs2_dlm_tracking_lock); 2855 spin_lock(&ocfs2_dlm_tracking_lock);
2860 iter = ocfs2_dlm_next_res(iter, priv); 2856 iter = ocfs2_dlm_next_res(iter, priv);
2861 list_del_init(&dummy->l_debug_list); 2857 list_del_init(&dummy->l_debug_list);
2862 if (iter) { 2858 if (iter) {
2863 list_add(&dummy->l_debug_list, &iter->l_debug_list); 2859 list_add(&dummy->l_debug_list, &iter->l_debug_list);
2864 priv->p_tmp_res = *iter; 2860 priv->p_tmp_res = *iter;
2865 iter = &priv->p_tmp_res; 2861 iter = &priv->p_tmp_res;
2866 } 2862 }
2867 spin_unlock(&ocfs2_dlm_tracking_lock); 2863 spin_unlock(&ocfs2_dlm_tracking_lock);
2868 2864
2869 return iter; 2865 return iter;
2870 } 2866 }
2871 2867
2872 /* So that debugfs.ocfs2 can determine which format is being used */ 2868 /*
2873 #define OCFS2_DLM_DEBUG_STR_VERSION 2 2869 * Version is used by debugfs.ocfs2 to determine the format being used
2870 *
2871 * New in version 2
2872 * - Lock stats printed
2873 * New in version 3
2874 * - Max time in lock stats is in usecs (instead of nsecs)
2875 */
2876 #define OCFS2_DLM_DEBUG_STR_VERSION 3
2874 static int ocfs2_dlm_seq_show(struct seq_file *m, void *v) 2877 static int ocfs2_dlm_seq_show(struct seq_file *m, void *v)
2875 { 2878 {
2876 int i; 2879 int i;
2877 char *lvb; 2880 char *lvb;
2878 struct ocfs2_lock_res *lockres = v; 2881 struct ocfs2_lock_res *lockres = v;
2879 2882
2880 if (!lockres) 2883 if (!lockres)
2881 return -EINVAL; 2884 return -EINVAL;
2882 2885
2883 seq_printf(m, "0x%x\t", OCFS2_DLM_DEBUG_STR_VERSION); 2886 seq_printf(m, "0x%x\t", OCFS2_DLM_DEBUG_STR_VERSION);
2884 2887
2885 if (lockres->l_type == OCFS2_LOCK_TYPE_DENTRY) 2888 if (lockres->l_type == OCFS2_LOCK_TYPE_DENTRY)
2886 seq_printf(m, "%.*s%08x\t", OCFS2_DENTRY_LOCK_INO_START - 1, 2889 seq_printf(m, "%.*s%08x\t", OCFS2_DENTRY_LOCK_INO_START - 1,
2887 lockres->l_name, 2890 lockres->l_name,
2888 (unsigned int)ocfs2_get_dentry_lock_ino(lockres)); 2891 (unsigned int)ocfs2_get_dentry_lock_ino(lockres));
2889 else 2892 else
2890 seq_printf(m, "%.*s\t", OCFS2_LOCK_ID_MAX_LEN, lockres->l_name); 2893 seq_printf(m, "%.*s\t", OCFS2_LOCK_ID_MAX_LEN, lockres->l_name);
2891 2894
2892 seq_printf(m, "%d\t" 2895 seq_printf(m, "%d\t"
2893 "0x%lx\t" 2896 "0x%lx\t"
2894 "0x%x\t" 2897 "0x%x\t"
2895 "0x%x\t" 2898 "0x%x\t"
2896 "%u\t" 2899 "%u\t"
2897 "%u\t" 2900 "%u\t"
2898 "%d\t" 2901 "%d\t"
2899 "%d\t", 2902 "%d\t",
2900 lockres->l_level, 2903 lockres->l_level,
2901 lockres->l_flags, 2904 lockres->l_flags,
2902 lockres->l_action, 2905 lockres->l_action,
2903 lockres->l_unlock_action, 2906 lockres->l_unlock_action,
2904 lockres->l_ro_holders, 2907 lockres->l_ro_holders,
2905 lockres->l_ex_holders, 2908 lockres->l_ex_holders,
2906 lockres->l_requested, 2909 lockres->l_requested,
2907 lockres->l_blocking); 2910 lockres->l_blocking);
2908 2911
2909 /* Dump the raw LVB */ 2912 /* Dump the raw LVB */
2910 lvb = ocfs2_dlm_lvb(&lockres->l_lksb); 2913 lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
2911 for(i = 0; i < DLM_LVB_LEN; i++) 2914 for(i = 0; i < DLM_LVB_LEN; i++)
2912 seq_printf(m, "0x%x\t", lvb[i]); 2915 seq_printf(m, "0x%x\t", lvb[i]);
2913 2916
2914 #ifdef CONFIG_OCFS2_FS_STATS 2917 #ifdef CONFIG_OCFS2_FS_STATS
2915 # define lock_num_prmode(_l) (_l)->l_lock_num_prmode 2918 # define lock_num_prmode(_l) ((_l)->l_lock_prmode.ls_gets)
2916 # define lock_num_exmode(_l) (_l)->l_lock_num_exmode 2919 # define lock_num_exmode(_l) ((_l)->l_lock_exmode.ls_gets)
2917 # define lock_num_prmode_failed(_l) (_l)->l_lock_num_prmode_failed 2920 # define lock_num_prmode_failed(_l) ((_l)->l_lock_prmode.ls_fail)
2918 # define lock_num_exmode_failed(_l) (_l)->l_lock_num_exmode_failed 2921 # define lock_num_exmode_failed(_l) ((_l)->l_lock_exmode.ls_fail)
2919 # define lock_total_prmode(_l) (_l)->l_lock_total_prmode 2922 # define lock_total_prmode(_l) ((_l)->l_lock_prmode.ls_total)
2920 # define lock_total_exmode(_l) (_l)->l_lock_total_exmode 2923 # define lock_total_exmode(_l) ((_l)->l_lock_exmode.ls_total)
2921 # define lock_max_prmode(_l) (_l)->l_lock_max_prmode 2924 # define lock_max_prmode(_l) ((_l)->l_lock_prmode.ls_max)
2922 # define lock_max_exmode(_l) (_l)->l_lock_max_exmode 2925 # define lock_max_exmode(_l) ((_l)->l_lock_exmode.ls_max)
2923 # define lock_refresh(_l) (_l)->l_lock_refresh 2926 # define lock_refresh(_l) ((_l)->l_lock_refresh)
2924 #else 2927 #else
2925 # define lock_num_prmode(_l) (0ULL) 2928 # define lock_num_prmode(_l) (0)
2926 # define lock_num_exmode(_l) (0ULL) 2929 # define lock_num_exmode(_l) (0)
2927 # define lock_num_prmode_failed(_l) (0) 2930 # define lock_num_prmode_failed(_l) (0)
2928 # define lock_num_exmode_failed(_l) (0) 2931 # define lock_num_exmode_failed(_l) (0)
2929 # define lock_total_prmode(_l) (0ULL) 2932 # define lock_total_prmode(_l) (0ULL)
2930 # define lock_total_exmode(_l) (0ULL) 2933 # define lock_total_exmode(_l) (0ULL)
2931 # define lock_max_prmode(_l) (0) 2934 # define lock_max_prmode(_l) (0)
2932 # define lock_max_exmode(_l) (0) 2935 # define lock_max_exmode(_l) (0)
2933 # define lock_refresh(_l) (0) 2936 # define lock_refresh(_l) (0)
2934 #endif 2937 #endif
2935 /* The following seq_print was added in version 2 of this output */ 2938 /* The following seq_print was added in version 2 of this output */
2936 seq_printf(m, "%llu\t" 2939 seq_printf(m, "%u\t"
2937 "%llu\t" 2940 "%u\t"
2938 "%u\t" 2941 "%u\t"
2939 "%u\t" 2942 "%u\t"
2940 "%llu\t" 2943 "%llu\t"
2941 "%llu\t" 2944 "%llu\t"
2942 "%u\t" 2945 "%u\t"
2943 "%u\t" 2946 "%u\t"
2944 "%u\t", 2947 "%u\t",
2945 lock_num_prmode(lockres), 2948 lock_num_prmode(lockres),
2946 lock_num_exmode(lockres), 2949 lock_num_exmode(lockres),
2947 lock_num_prmode_failed(lockres), 2950 lock_num_prmode_failed(lockres),
2948 lock_num_exmode_failed(lockres), 2951 lock_num_exmode_failed(lockres),
2949 lock_total_prmode(lockres), 2952 lock_total_prmode(lockres),
2950 lock_total_exmode(lockres), 2953 lock_total_exmode(lockres),
2951 lock_max_prmode(lockres), 2954 lock_max_prmode(lockres),
2952 lock_max_exmode(lockres), 2955 lock_max_exmode(lockres),
2953 lock_refresh(lockres)); 2956 lock_refresh(lockres));
2954 2957
2955 /* End the line */ 2958 /* End the line */
2956 seq_printf(m, "\n"); 2959 seq_printf(m, "\n");
2957 return 0; 2960 return 0;
2958 } 2961 }
2959 2962
2960 static const struct seq_operations ocfs2_dlm_seq_ops = { 2963 static const struct seq_operations ocfs2_dlm_seq_ops = {
2961 .start = ocfs2_dlm_seq_start, 2964 .start = ocfs2_dlm_seq_start,
2962 .stop = ocfs2_dlm_seq_stop, 2965 .stop = ocfs2_dlm_seq_stop,
2963 .next = ocfs2_dlm_seq_next, 2966 .next = ocfs2_dlm_seq_next,
2964 .show = ocfs2_dlm_seq_show, 2967 .show = ocfs2_dlm_seq_show,
2965 }; 2968 };
2966 2969
2967 static int ocfs2_dlm_debug_release(struct inode *inode, struct file *file) 2970 static int ocfs2_dlm_debug_release(struct inode *inode, struct file *file)
2968 { 2971 {
2969 struct seq_file *seq = file->private_data; 2972 struct seq_file *seq = file->private_data;
2970 struct ocfs2_dlm_seq_priv *priv = seq->private; 2973 struct ocfs2_dlm_seq_priv *priv = seq->private;
2971 struct ocfs2_lock_res *res = &priv->p_iter_res; 2974 struct ocfs2_lock_res *res = &priv->p_iter_res;
2972 2975
2973 ocfs2_remove_lockres_tracking(res); 2976 ocfs2_remove_lockres_tracking(res);
2974 ocfs2_put_dlm_debug(priv->p_dlm_debug); 2977 ocfs2_put_dlm_debug(priv->p_dlm_debug);
2975 return seq_release_private(inode, file); 2978 return seq_release_private(inode, file);
2976 } 2979 }
2977 2980
2978 static int ocfs2_dlm_debug_open(struct inode *inode, struct file *file) 2981 static int ocfs2_dlm_debug_open(struct inode *inode, struct file *file)
2979 { 2982 {
2980 int ret; 2983 int ret;
2981 struct ocfs2_dlm_seq_priv *priv; 2984 struct ocfs2_dlm_seq_priv *priv;
2982 struct seq_file *seq; 2985 struct seq_file *seq;
2983 struct ocfs2_super *osb; 2986 struct ocfs2_super *osb;
2984 2987
2985 priv = kzalloc(sizeof(struct ocfs2_dlm_seq_priv), GFP_KERNEL); 2988 priv = kzalloc(sizeof(struct ocfs2_dlm_seq_priv), GFP_KERNEL);
2986 if (!priv) { 2989 if (!priv) {
2987 ret = -ENOMEM; 2990 ret = -ENOMEM;
2988 mlog_errno(ret); 2991 mlog_errno(ret);
2989 goto out; 2992 goto out;
2990 } 2993 }
2991 osb = inode->i_private; 2994 osb = inode->i_private;
2992 ocfs2_get_dlm_debug(osb->osb_dlm_debug); 2995 ocfs2_get_dlm_debug(osb->osb_dlm_debug);
2993 priv->p_dlm_debug = osb->osb_dlm_debug; 2996 priv->p_dlm_debug = osb->osb_dlm_debug;
2994 INIT_LIST_HEAD(&priv->p_iter_res.l_debug_list); 2997 INIT_LIST_HEAD(&priv->p_iter_res.l_debug_list);
2995 2998
2996 ret = seq_open(file, &ocfs2_dlm_seq_ops); 2999 ret = seq_open(file, &ocfs2_dlm_seq_ops);
2997 if (ret) { 3000 if (ret) {
2998 kfree(priv); 3001 kfree(priv);
2999 mlog_errno(ret); 3002 mlog_errno(ret);
3000 goto out; 3003 goto out;
3001 } 3004 }
3002 3005
3003 seq = file->private_data; 3006 seq = file->private_data;
3004 seq->private = priv; 3007 seq->private = priv;
3005 3008
3006 ocfs2_add_lockres_tracking(&priv->p_iter_res, 3009 ocfs2_add_lockres_tracking(&priv->p_iter_res,
3007 priv->p_dlm_debug); 3010 priv->p_dlm_debug);
3008 3011
3009 out: 3012 out:
3010 return ret; 3013 return ret;
3011 } 3014 }
3012 3015
3013 static const struct file_operations ocfs2_dlm_debug_fops = { 3016 static const struct file_operations ocfs2_dlm_debug_fops = {
3014 .open = ocfs2_dlm_debug_open, 3017 .open = ocfs2_dlm_debug_open,
3015 .release = ocfs2_dlm_debug_release, 3018 .release = ocfs2_dlm_debug_release,
3016 .read = seq_read, 3019 .read = seq_read,
3017 .llseek = seq_lseek, 3020 .llseek = seq_lseek,
3018 }; 3021 };
3019 3022
3020 static int ocfs2_dlm_init_debug(struct ocfs2_super *osb) 3023 static int ocfs2_dlm_init_debug(struct ocfs2_super *osb)
3021 { 3024 {
3022 int ret = 0; 3025 int ret = 0;
3023 struct ocfs2_dlm_debug *dlm_debug = osb->osb_dlm_debug; 3026 struct ocfs2_dlm_debug *dlm_debug = osb->osb_dlm_debug;
3024 3027
3025 dlm_debug->d_locking_state = debugfs_create_file("locking_state", 3028 dlm_debug->d_locking_state = debugfs_create_file("locking_state",
3026 S_IFREG|S_IRUSR, 3029 S_IFREG|S_IRUSR,
3027 osb->osb_debug_root, 3030 osb->osb_debug_root,
3028 osb, 3031 osb,
3029 &ocfs2_dlm_debug_fops); 3032 &ocfs2_dlm_debug_fops);
3030 if (!dlm_debug->d_locking_state) { 3033 if (!dlm_debug->d_locking_state) {
3031 ret = -EINVAL; 3034 ret = -EINVAL;
3032 mlog(ML_ERROR, 3035 mlog(ML_ERROR,
3033 "Unable to create locking state debugfs file.\n"); 3036 "Unable to create locking state debugfs file.\n");
3034 goto out; 3037 goto out;
3035 } 3038 }
3036 3039
3037 ocfs2_get_dlm_debug(dlm_debug); 3040 ocfs2_get_dlm_debug(dlm_debug);
3038 out: 3041 out:
3039 return ret; 3042 return ret;
3040 } 3043 }
3041 3044
3042 static void ocfs2_dlm_shutdown_debug(struct ocfs2_super *osb) 3045 static void ocfs2_dlm_shutdown_debug(struct ocfs2_super *osb)
3043 { 3046 {
3044 struct ocfs2_dlm_debug *dlm_debug = osb->osb_dlm_debug; 3047 struct ocfs2_dlm_debug *dlm_debug = osb->osb_dlm_debug;
3045 3048
3046 if (dlm_debug) { 3049 if (dlm_debug) {
3047 debugfs_remove(dlm_debug->d_locking_state); 3050 debugfs_remove(dlm_debug->d_locking_state);
3048 ocfs2_put_dlm_debug(dlm_debug); 3051 ocfs2_put_dlm_debug(dlm_debug);
3049 } 3052 }
3050 } 3053 }
3051 3054
3052 int ocfs2_dlm_init(struct ocfs2_super *osb) 3055 int ocfs2_dlm_init(struct ocfs2_super *osb)
3053 { 3056 {
3054 int status = 0; 3057 int status = 0;
3055 struct ocfs2_cluster_connection *conn = NULL; 3058 struct ocfs2_cluster_connection *conn = NULL;
3056 3059
3057 mlog_entry_void(); 3060 mlog_entry_void();
3058 3061
3059 if (ocfs2_mount_local(osb)) { 3062 if (ocfs2_mount_local(osb)) {
3060 osb->node_num = 0; 3063 osb->node_num = 0;
3061 goto local; 3064 goto local;
3062 } 3065 }
3063 3066
3064 status = ocfs2_dlm_init_debug(osb); 3067 status = ocfs2_dlm_init_debug(osb);
3065 if (status < 0) { 3068 if (status < 0) {
3066 mlog_errno(status); 3069 mlog_errno(status);
3067 goto bail; 3070 goto bail;
3068 } 3071 }
3069 3072
3070 /* launch downconvert thread */ 3073 /* launch downconvert thread */
3071 osb->dc_task = kthread_run(ocfs2_downconvert_thread, osb, "ocfs2dc"); 3074 osb->dc_task = kthread_run(ocfs2_downconvert_thread, osb, "ocfs2dc");
3072 if (IS_ERR(osb->dc_task)) { 3075 if (IS_ERR(osb->dc_task)) {
3073 status = PTR_ERR(osb->dc_task); 3076 status = PTR_ERR(osb->dc_task);
3074 osb->dc_task = NULL; 3077 osb->dc_task = NULL;
3075 mlog_errno(status); 3078 mlog_errno(status);
3076 goto bail; 3079 goto bail;
3077 } 3080 }
3078 3081
3079 /* for now, uuid == domain */ 3082 /* for now, uuid == domain */
3080 status = ocfs2_cluster_connect(osb->osb_cluster_stack, 3083 status = ocfs2_cluster_connect(osb->osb_cluster_stack,
3081 osb->uuid_str, 3084 osb->uuid_str,
3082 strlen(osb->uuid_str), 3085 strlen(osb->uuid_str),
3083 &lproto, ocfs2_do_node_down, osb, 3086 &lproto, ocfs2_do_node_down, osb,
3084 &conn); 3087 &conn);
3085 if (status) { 3088 if (status) {
3086 mlog_errno(status); 3089 mlog_errno(status);
3087 goto bail; 3090 goto bail;
3088 } 3091 }
3089 3092
3090 status = ocfs2_cluster_this_node(&osb->node_num); 3093 status = ocfs2_cluster_this_node(&osb->node_num);
3091 if (status < 0) { 3094 if (status < 0) {
3092 mlog_errno(status); 3095 mlog_errno(status);
3093 mlog(ML_ERROR, 3096 mlog(ML_ERROR,
3094 "could not find this host's node number\n"); 3097 "could not find this host's node number\n");
3095 ocfs2_cluster_disconnect(conn, 0); 3098 ocfs2_cluster_disconnect(conn, 0);
3096 goto bail; 3099 goto bail;
3097 } 3100 }
3098 3101
3099 local: 3102 local:
3100 ocfs2_super_lock_res_init(&osb->osb_super_lockres, osb); 3103 ocfs2_super_lock_res_init(&osb->osb_super_lockres, osb);
3101 ocfs2_rename_lock_res_init(&osb->osb_rename_lockres, osb); 3104 ocfs2_rename_lock_res_init(&osb->osb_rename_lockres, osb);
3102 ocfs2_nfs_sync_lock_res_init(&osb->osb_nfs_sync_lockres, osb); 3105 ocfs2_nfs_sync_lock_res_init(&osb->osb_nfs_sync_lockres, osb);
3103 ocfs2_orphan_scan_lock_res_init(&osb->osb_orphan_scan.os_lockres, osb); 3106 ocfs2_orphan_scan_lock_res_init(&osb->osb_orphan_scan.os_lockres, osb);
3104 3107
3105 osb->cconn = conn; 3108 osb->cconn = conn;
3106 3109
3107 status = 0; 3110 status = 0;
3108 bail: 3111 bail:
3109 if (status < 0) { 3112 if (status < 0) {
3110 ocfs2_dlm_shutdown_debug(osb); 3113 ocfs2_dlm_shutdown_debug(osb);
3111 if (osb->dc_task) 3114 if (osb->dc_task)
3112 kthread_stop(osb->dc_task); 3115 kthread_stop(osb->dc_task);
3113 } 3116 }
3114 3117
3115 mlog_exit(status); 3118 mlog_exit(status);
3116 return status; 3119 return status;
3117 } 3120 }
3118 3121
3119 void ocfs2_dlm_shutdown(struct ocfs2_super *osb, 3122 void ocfs2_dlm_shutdown(struct ocfs2_super *osb,
3120 int hangup_pending) 3123 int hangup_pending)
3121 { 3124 {
3122 mlog_entry_void(); 3125 mlog_entry_void();
3123 3126
3124 ocfs2_drop_osb_locks(osb); 3127 ocfs2_drop_osb_locks(osb);
3125 3128
3126 /* 3129 /*
3127 * Now that we have dropped all locks and ocfs2_dismount_volume() 3130 * Now that we have dropped all locks and ocfs2_dismount_volume()
3128 * has disabled recovery, the DLM won't be talking to us. It's 3131 * has disabled recovery, the DLM won't be talking to us. It's
3129 * safe to tear things down before disconnecting the cluster. 3132 * safe to tear things down before disconnecting the cluster.
3130 */ 3133 */
3131 3134
3132 if (osb->dc_task) { 3135 if (osb->dc_task) {
3133 kthread_stop(osb->dc_task); 3136 kthread_stop(osb->dc_task);
3134 osb->dc_task = NULL; 3137 osb->dc_task = NULL;
3135 } 3138 }
3136 3139
3137 ocfs2_lock_res_free(&osb->osb_super_lockres); 3140 ocfs2_lock_res_free(&osb->osb_super_lockres);
3138 ocfs2_lock_res_free(&osb->osb_rename_lockres); 3141 ocfs2_lock_res_free(&osb->osb_rename_lockres);
3139 ocfs2_lock_res_free(&osb->osb_nfs_sync_lockres); 3142 ocfs2_lock_res_free(&osb->osb_nfs_sync_lockres);
3140 ocfs2_lock_res_free(&osb->osb_orphan_scan.os_lockres); 3143 ocfs2_lock_res_free(&osb->osb_orphan_scan.os_lockres);
3141 3144
3142 ocfs2_cluster_disconnect(osb->cconn, hangup_pending); 3145 ocfs2_cluster_disconnect(osb->cconn, hangup_pending);
3143 osb->cconn = NULL; 3146 osb->cconn = NULL;
3144 3147
3145 ocfs2_dlm_shutdown_debug(osb); 3148 ocfs2_dlm_shutdown_debug(osb);
3146 3149
3147 mlog_exit_void(); 3150 mlog_exit_void();
3148 } 3151 }
3149 3152
3150 static int ocfs2_drop_lock(struct ocfs2_super *osb, 3153 static int ocfs2_drop_lock(struct ocfs2_super *osb,
3151 struct ocfs2_lock_res *lockres) 3154 struct ocfs2_lock_res *lockres)
3152 { 3155 {
3153 int ret; 3156 int ret;
3154 unsigned long flags; 3157 unsigned long flags;
3155 u32 lkm_flags = 0; 3158 u32 lkm_flags = 0;
3156 3159
3157 /* We didn't get anywhere near actually using this lockres. */ 3160 /* We didn't get anywhere near actually using this lockres. */
3158 if (!(lockres->l_flags & OCFS2_LOCK_INITIALIZED)) 3161 if (!(lockres->l_flags & OCFS2_LOCK_INITIALIZED))
3159 goto out; 3162 goto out;
3160 3163
3161 if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB) 3164 if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB)
3162 lkm_flags |= DLM_LKF_VALBLK; 3165 lkm_flags |= DLM_LKF_VALBLK;
3163 3166
3164 spin_lock_irqsave(&lockres->l_lock, flags); 3167 spin_lock_irqsave(&lockres->l_lock, flags);
3165 3168
3166 mlog_bug_on_msg(!(lockres->l_flags & OCFS2_LOCK_FREEING), 3169 mlog_bug_on_msg(!(lockres->l_flags & OCFS2_LOCK_FREEING),
3167 "lockres %s, flags 0x%lx\n", 3170 "lockres %s, flags 0x%lx\n",
3168 lockres->l_name, lockres->l_flags); 3171 lockres->l_name, lockres->l_flags);
3169 3172
3170 while (lockres->l_flags & OCFS2_LOCK_BUSY) { 3173 while (lockres->l_flags & OCFS2_LOCK_BUSY) {
3171 mlog(0, "waiting on busy lock \"%s\": flags = %lx, action = " 3174 mlog(0, "waiting on busy lock \"%s\": flags = %lx, action = "
3172 "%u, unlock_action = %u\n", 3175 "%u, unlock_action = %u\n",
3173 lockres->l_name, lockres->l_flags, lockres->l_action, 3176 lockres->l_name, lockres->l_flags, lockres->l_action,
3174 lockres->l_unlock_action); 3177 lockres->l_unlock_action);
3175 3178
3176 spin_unlock_irqrestore(&lockres->l_lock, flags); 3179 spin_unlock_irqrestore(&lockres->l_lock, flags);
3177 3180
3178 /* XXX: Today we just wait on any busy 3181 /* XXX: Today we just wait on any busy
3179 * locks... Perhaps we need to cancel converts in the 3182 * locks... Perhaps we need to cancel converts in the
3180 * future? */ 3183 * future? */
3181 ocfs2_wait_on_busy_lock(lockres); 3184 ocfs2_wait_on_busy_lock(lockres);
3182 3185
3183 spin_lock_irqsave(&lockres->l_lock, flags); 3186 spin_lock_irqsave(&lockres->l_lock, flags);
3184 } 3187 }
3185 3188
3186 if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB) { 3189 if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB) {
3187 if (lockres->l_flags & OCFS2_LOCK_ATTACHED && 3190 if (lockres->l_flags & OCFS2_LOCK_ATTACHED &&
3188 lockres->l_level == DLM_LOCK_EX && 3191 lockres->l_level == DLM_LOCK_EX &&
3189 !(lockres->l_flags & OCFS2_LOCK_NEEDS_REFRESH)) 3192 !(lockres->l_flags & OCFS2_LOCK_NEEDS_REFRESH))
3190 lockres->l_ops->set_lvb(lockres); 3193 lockres->l_ops->set_lvb(lockres);
3191 } 3194 }
3192 3195
3193 if (lockres->l_flags & OCFS2_LOCK_BUSY) 3196 if (lockres->l_flags & OCFS2_LOCK_BUSY)
3194 mlog(ML_ERROR, "destroying busy lock: \"%s\"\n", 3197 mlog(ML_ERROR, "destroying busy lock: \"%s\"\n",
3195 lockres->l_name); 3198 lockres->l_name);
3196 if (lockres->l_flags & OCFS2_LOCK_BLOCKED) 3199 if (lockres->l_flags & OCFS2_LOCK_BLOCKED)
3197 mlog(0, "destroying blocked lock: \"%s\"\n", lockres->l_name); 3200 mlog(0, "destroying blocked lock: \"%s\"\n", lockres->l_name);
3198 3201
3199 if (!(lockres->l_flags & OCFS2_LOCK_ATTACHED)) { 3202 if (!(lockres->l_flags & OCFS2_LOCK_ATTACHED)) {
3200 spin_unlock_irqrestore(&lockres->l_lock, flags); 3203 spin_unlock_irqrestore(&lockres->l_lock, flags);
3201 goto out; 3204 goto out;
3202 } 3205 }
3203 3206
3204 lockres_clear_flags(lockres, OCFS2_LOCK_ATTACHED); 3207 lockres_clear_flags(lockres, OCFS2_LOCK_ATTACHED);
3205 3208
3206 /* make sure we never get here while waiting for an ast to 3209 /* make sure we never get here while waiting for an ast to
3207 * fire. */ 3210 * fire. */
3208 BUG_ON(lockres->l_action != OCFS2_AST_INVALID); 3211 BUG_ON(lockres->l_action != OCFS2_AST_INVALID);
3209 3212
3210 /* is this necessary? */ 3213 /* is this necessary? */
3211 lockres_or_flags(lockres, OCFS2_LOCK_BUSY); 3214 lockres_or_flags(lockres, OCFS2_LOCK_BUSY);
3212 lockres->l_unlock_action = OCFS2_UNLOCK_DROP_LOCK; 3215 lockres->l_unlock_action = OCFS2_UNLOCK_DROP_LOCK;
3213 spin_unlock_irqrestore(&lockres->l_lock, flags); 3216 spin_unlock_irqrestore(&lockres->l_lock, flags);
3214 3217
3215 mlog(0, "lock %s\n", lockres->l_name); 3218 mlog(0, "lock %s\n", lockres->l_name);
3216 3219
3217 ret = ocfs2_dlm_unlock(osb->cconn, &lockres->l_lksb, lkm_flags); 3220 ret = ocfs2_dlm_unlock(osb->cconn, &lockres->l_lksb, lkm_flags);
3218 if (ret) { 3221 if (ret) {
3219 ocfs2_log_dlm_error("ocfs2_dlm_unlock", ret, lockres); 3222 ocfs2_log_dlm_error("ocfs2_dlm_unlock", ret, lockres);
3220 mlog(ML_ERROR, "lockres flags: %lu\n", lockres->l_flags); 3223 mlog(ML_ERROR, "lockres flags: %lu\n", lockres->l_flags);
3221 ocfs2_dlm_dump_lksb(&lockres->l_lksb); 3224 ocfs2_dlm_dump_lksb(&lockres->l_lksb);
3222 BUG(); 3225 BUG();
3223 } 3226 }
3224 mlog(0, "lock %s, successful return from ocfs2_dlm_unlock\n", 3227 mlog(0, "lock %s, successful return from ocfs2_dlm_unlock\n",
3225 lockres->l_name); 3228 lockres->l_name);
3226 3229
3227 ocfs2_wait_on_busy_lock(lockres); 3230 ocfs2_wait_on_busy_lock(lockres);
3228 out: 3231 out:
3229 mlog_exit(0); 3232 mlog_exit(0);
3230 return 0; 3233 return 0;
3231 } 3234 }
3232 3235
3233 /* Mark the lockres as being dropped. It will no longer be 3236 /* Mark the lockres as being dropped. It will no longer be
3234 * queued if blocking, but we still may have to wait on it 3237 * queued if blocking, but we still may have to wait on it
3235 * being dequeued from the downconvert thread before we can consider 3238 * being dequeued from the downconvert thread before we can consider
3236 * it safe to drop. 3239 * it safe to drop.
3237 * 3240 *
3238 * You can *not* attempt to call cluster_lock on this lockres anymore. */ 3241 * You can *not* attempt to call cluster_lock on this lockres anymore. */
3239 void ocfs2_mark_lockres_freeing(struct ocfs2_lock_res *lockres) 3242 void ocfs2_mark_lockres_freeing(struct ocfs2_lock_res *lockres)
3240 { 3243 {
3241 int status; 3244 int status;
3242 struct ocfs2_mask_waiter mw; 3245 struct ocfs2_mask_waiter mw;
3243 unsigned long flags; 3246 unsigned long flags;
3244 3247
3245 ocfs2_init_mask_waiter(&mw); 3248 ocfs2_init_mask_waiter(&mw);
3246 3249
3247 spin_lock_irqsave(&lockres->l_lock, flags); 3250 spin_lock_irqsave(&lockres->l_lock, flags);
3248 lockres->l_flags |= OCFS2_LOCK_FREEING; 3251 lockres->l_flags |= OCFS2_LOCK_FREEING;
3249 while (lockres->l_flags & OCFS2_LOCK_QUEUED) { 3252 while (lockres->l_flags & OCFS2_LOCK_QUEUED) {
3250 lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_QUEUED, 0); 3253 lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_QUEUED, 0);
3251 spin_unlock_irqrestore(&lockres->l_lock, flags); 3254 spin_unlock_irqrestore(&lockres->l_lock, flags);
3252 3255
3253 mlog(0, "Waiting on lockres %s\n", lockres->l_name); 3256 mlog(0, "Waiting on lockres %s\n", lockres->l_name);
3254 3257
3255 status = ocfs2_wait_for_mask(&mw); 3258 status = ocfs2_wait_for_mask(&mw);
3256 if (status) 3259 if (status)
3257 mlog_errno(status); 3260 mlog_errno(status);
3258 3261
3259 spin_lock_irqsave(&lockres->l_lock, flags); 3262 spin_lock_irqsave(&lockres->l_lock, flags);
3260 } 3263 }
3261 spin_unlock_irqrestore(&lockres->l_lock, flags); 3264 spin_unlock_irqrestore(&lockres->l_lock, flags);
3262 } 3265 }
3263 3266
3264 void ocfs2_simple_drop_lockres(struct ocfs2_super *osb, 3267 void ocfs2_simple_drop_lockres(struct ocfs2_super *osb,
3265 struct ocfs2_lock_res *lockres) 3268 struct ocfs2_lock_res *lockres)
3266 { 3269 {
3267 int ret; 3270 int ret;
3268 3271
3269 ocfs2_mark_lockres_freeing(lockres); 3272 ocfs2_mark_lockres_freeing(lockres);
3270 ret = ocfs2_drop_lock(osb, lockres); 3273 ret = ocfs2_drop_lock(osb, lockres);
3271 if (ret) 3274 if (ret)
3272 mlog_errno(ret); 3275 mlog_errno(ret);
3273 } 3276 }
3274 3277
3275 static void ocfs2_drop_osb_locks(struct ocfs2_super *osb) 3278 static void ocfs2_drop_osb_locks(struct ocfs2_super *osb)
3276 { 3279 {
3277 ocfs2_simple_drop_lockres(osb, &osb->osb_super_lockres); 3280 ocfs2_simple_drop_lockres(osb, &osb->osb_super_lockres);
3278 ocfs2_simple_drop_lockres(osb, &osb->osb_rename_lockres); 3281 ocfs2_simple_drop_lockres(osb, &osb->osb_rename_lockres);
3279 ocfs2_simple_drop_lockres(osb, &osb->osb_nfs_sync_lockres); 3282 ocfs2_simple_drop_lockres(osb, &osb->osb_nfs_sync_lockres);
3280 ocfs2_simple_drop_lockres(osb, &osb->osb_orphan_scan.os_lockres); 3283 ocfs2_simple_drop_lockres(osb, &osb->osb_orphan_scan.os_lockres);
3281 } 3284 }
3282 3285
3283 int ocfs2_drop_inode_locks(struct inode *inode) 3286 int ocfs2_drop_inode_locks(struct inode *inode)
3284 { 3287 {
3285 int status, err; 3288 int status, err;
3286 3289
3287 mlog_entry_void(); 3290 mlog_entry_void();
3288 3291
3289 /* No need to call ocfs2_mark_lockres_freeing here - 3292 /* No need to call ocfs2_mark_lockres_freeing here -
3290 * ocfs2_clear_inode has done it for us. */ 3293 * ocfs2_clear_inode has done it for us. */
3291 3294
3292 err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb), 3295 err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb),
3293 &OCFS2_I(inode)->ip_open_lockres); 3296 &OCFS2_I(inode)->ip_open_lockres);
3294 if (err < 0) 3297 if (err < 0)
3295 mlog_errno(err); 3298 mlog_errno(err);
3296 3299
3297 status = err; 3300 status = err;
3298 3301
3299 err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb), 3302 err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb),
3300 &OCFS2_I(inode)->ip_inode_lockres); 3303 &OCFS2_I(inode)->ip_inode_lockres);
3301 if (err < 0) 3304 if (err < 0)
3302 mlog_errno(err); 3305 mlog_errno(err);
3303 if (err < 0 && !status) 3306 if (err < 0 && !status)
3304 status = err; 3307 status = err;
3305 3308
3306 err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb), 3309 err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb),
3307 &OCFS2_I(inode)->ip_rw_lockres); 3310 &OCFS2_I(inode)->ip_rw_lockres);
3308 if (err < 0) 3311 if (err < 0)
3309 mlog_errno(err); 3312 mlog_errno(err);
3310 if (err < 0 && !status) 3313 if (err < 0 && !status)
3311 status = err; 3314 status = err;
3312 3315
3313 mlog_exit(status); 3316 mlog_exit(status);
3314 return status; 3317 return status;
3315 } 3318 }
3316 3319
3317 static unsigned int ocfs2_prepare_downconvert(struct ocfs2_lock_res *lockres, 3320 static unsigned int ocfs2_prepare_downconvert(struct ocfs2_lock_res *lockres,
3318 int new_level) 3321 int new_level)
3319 { 3322 {
3320 assert_spin_locked(&lockres->l_lock); 3323 assert_spin_locked(&lockres->l_lock);
3321 3324
3322 BUG_ON(lockres->l_blocking <= DLM_LOCK_NL); 3325 BUG_ON(lockres->l_blocking <= DLM_LOCK_NL);
3323 3326
3324 if (lockres->l_level <= new_level) { 3327 if (lockres->l_level <= new_level) {
3325 mlog(ML_ERROR, "lockres %s, lvl %d <= %d, blcklst %d, mask %d, " 3328 mlog(ML_ERROR, "lockres %s, lvl %d <= %d, blcklst %d, mask %d, "
3326 "type %d, flags 0x%lx, hold %d %d, act %d %d, req %d, " 3329 "type %d, flags 0x%lx, hold %d %d, act %d %d, req %d, "
3327 "block %d, pgen %d\n", lockres->l_name, lockres->l_level, 3330 "block %d, pgen %d\n", lockres->l_name, lockres->l_level,
3328 new_level, list_empty(&lockres->l_blocked_list), 3331 new_level, list_empty(&lockres->l_blocked_list),
3329 list_empty(&lockres->l_mask_waiters), lockres->l_type, 3332 list_empty(&lockres->l_mask_waiters), lockres->l_type,
3330 lockres->l_flags, lockres->l_ro_holders, 3333 lockres->l_flags, lockres->l_ro_holders,
3331 lockres->l_ex_holders, lockres->l_action, 3334 lockres->l_ex_holders, lockres->l_action,
3332 lockres->l_unlock_action, lockres->l_requested, 3335 lockres->l_unlock_action, lockres->l_requested,
3333 lockres->l_blocking, lockres->l_pending_gen); 3336 lockres->l_blocking, lockres->l_pending_gen);
3334 BUG(); 3337 BUG();
3335 } 3338 }
3336 3339
3337 mlog(ML_BASTS, "lockres %s, level %d => %d, blocking %d\n", 3340 mlog(ML_BASTS, "lockres %s, level %d => %d, blocking %d\n",
3338 lockres->l_name, lockres->l_level, new_level, lockres->l_blocking); 3341 lockres->l_name, lockres->l_level, new_level, lockres->l_blocking);
3339 3342
3340 lockres->l_action = OCFS2_AST_DOWNCONVERT; 3343 lockres->l_action = OCFS2_AST_DOWNCONVERT;
3341 lockres->l_requested = new_level; 3344 lockres->l_requested = new_level;
3342 lockres_or_flags(lockres, OCFS2_LOCK_BUSY); 3345 lockres_or_flags(lockres, OCFS2_LOCK_BUSY);
3343 return lockres_set_pending(lockres); 3346 return lockres_set_pending(lockres);
3344 } 3347 }
3345 3348
3346 static int ocfs2_downconvert_lock(struct ocfs2_super *osb, 3349 static int ocfs2_downconvert_lock(struct ocfs2_super *osb,
3347 struct ocfs2_lock_res *lockres, 3350 struct ocfs2_lock_res *lockres,
3348 int new_level, 3351 int new_level,
3349 int lvb, 3352 int lvb,
3350 unsigned int generation) 3353 unsigned int generation)
3351 { 3354 {
3352 int ret; 3355 int ret;
3353 u32 dlm_flags = DLM_LKF_CONVERT; 3356 u32 dlm_flags = DLM_LKF_CONVERT;
3354 3357
3355 mlog_entry_void(); 3358 mlog_entry_void();
3356 3359
3357 mlog(ML_BASTS, "lockres %s, level %d => %d\n", lockres->l_name, 3360 mlog(ML_BASTS, "lockres %s, level %d => %d\n", lockres->l_name,
3358 lockres->l_level, new_level); 3361 lockres->l_level, new_level);
3359 3362
3360 if (lvb) 3363 if (lvb)
3361 dlm_flags |= DLM_LKF_VALBLK; 3364 dlm_flags |= DLM_LKF_VALBLK;
3362 3365
3363 ret = ocfs2_dlm_lock(osb->cconn, 3366 ret = ocfs2_dlm_lock(osb->cconn,
3364 new_level, 3367 new_level,
3365 &lockres->l_lksb, 3368 &lockres->l_lksb,
3366 dlm_flags, 3369 dlm_flags,
3367 lockres->l_name, 3370 lockres->l_name,
3368 OCFS2_LOCK_ID_MAX_LEN - 1); 3371 OCFS2_LOCK_ID_MAX_LEN - 1);
3369 lockres_clear_pending(lockres, generation, osb); 3372 lockres_clear_pending(lockres, generation, osb);
3370 if (ret) { 3373 if (ret) {
3371 ocfs2_log_dlm_error("ocfs2_dlm_lock", ret, lockres); 3374 ocfs2_log_dlm_error("ocfs2_dlm_lock", ret, lockres);
3372 ocfs2_recover_from_dlm_error(lockres, 1); 3375 ocfs2_recover_from_dlm_error(lockres, 1);
3373 goto bail; 3376 goto bail;
3374 } 3377 }
3375 3378
3376 ret = 0; 3379 ret = 0;
3377 bail: 3380 bail:
3378 mlog_exit(ret); 3381 mlog_exit(ret);
3379 return ret; 3382 return ret;
3380 } 3383 }
3381 3384
3382 /* returns 1 when the caller should unlock and call ocfs2_dlm_unlock */ 3385 /* returns 1 when the caller should unlock and call ocfs2_dlm_unlock */
3383 static int ocfs2_prepare_cancel_convert(struct ocfs2_super *osb, 3386 static int ocfs2_prepare_cancel_convert(struct ocfs2_super *osb,
3384 struct ocfs2_lock_res *lockres) 3387 struct ocfs2_lock_res *lockres)
3385 { 3388 {
3386 assert_spin_locked(&lockres->l_lock); 3389 assert_spin_locked(&lockres->l_lock);
3387 3390
3388 mlog_entry_void(); 3391 mlog_entry_void();
3389 3392
3390 if (lockres->l_unlock_action == OCFS2_UNLOCK_CANCEL_CONVERT) { 3393 if (lockres->l_unlock_action == OCFS2_UNLOCK_CANCEL_CONVERT) {
3391 /* If we're already trying to cancel a lock conversion 3394 /* If we're already trying to cancel a lock conversion
3392 * then just drop the spinlock and allow the caller to 3395 * then just drop the spinlock and allow the caller to
3393 * requeue this lock. */ 3396 * requeue this lock. */
3394 mlog(ML_BASTS, "lockres %s, skip convert\n", lockres->l_name); 3397 mlog(ML_BASTS, "lockres %s, skip convert\n", lockres->l_name);
3395 return 0; 3398 return 0;
3396 } 3399 }
3397 3400
3398 /* were we in a convert when we got the bast fire? */ 3401 /* were we in a convert when we got the bast fire? */
3399 BUG_ON(lockres->l_action != OCFS2_AST_CONVERT && 3402 BUG_ON(lockres->l_action != OCFS2_AST_CONVERT &&
3400 lockres->l_action != OCFS2_AST_DOWNCONVERT); 3403 lockres->l_action != OCFS2_AST_DOWNCONVERT);
3401 /* set things up for the unlockast to know to just 3404 /* set things up for the unlockast to know to just
3402 * clear out the ast_action and unset busy, etc. */ 3405 * clear out the ast_action and unset busy, etc. */
3403 lockres->l_unlock_action = OCFS2_UNLOCK_CANCEL_CONVERT; 3406 lockres->l_unlock_action = OCFS2_UNLOCK_CANCEL_CONVERT;
3404 3407
3405 mlog_bug_on_msg(!(lockres->l_flags & OCFS2_LOCK_BUSY), 3408 mlog_bug_on_msg(!(lockres->l_flags & OCFS2_LOCK_BUSY),
3406 "lock %s, invalid flags: 0x%lx\n", 3409 "lock %s, invalid flags: 0x%lx\n",
3407 lockres->l_name, lockres->l_flags); 3410 lockres->l_name, lockres->l_flags);
3408 3411
3409 mlog(ML_BASTS, "lockres %s\n", lockres->l_name); 3412 mlog(ML_BASTS, "lockres %s\n", lockres->l_name);
3410 3413
3411 return 1; 3414 return 1;
3412 } 3415 }
3413 3416
3414 static int ocfs2_cancel_convert(struct ocfs2_super *osb, 3417 static int ocfs2_cancel_convert(struct ocfs2_super *osb,
3415 struct ocfs2_lock_res *lockres) 3418 struct ocfs2_lock_res *lockres)
3416 { 3419 {
3417 int ret; 3420 int ret;
3418 3421
3419 mlog_entry_void(); 3422 mlog_entry_void();
3420 3423
3421 ret = ocfs2_dlm_unlock(osb->cconn, &lockres->l_lksb, 3424 ret = ocfs2_dlm_unlock(osb->cconn, &lockres->l_lksb,
3422 DLM_LKF_CANCEL); 3425 DLM_LKF_CANCEL);
3423 if (ret) { 3426 if (ret) {
3424 ocfs2_log_dlm_error("ocfs2_dlm_unlock", ret, lockres); 3427 ocfs2_log_dlm_error("ocfs2_dlm_unlock", ret, lockres);
3425 ocfs2_recover_from_dlm_error(lockres, 0); 3428 ocfs2_recover_from_dlm_error(lockres, 0);
3426 } 3429 }
3427 3430
3428 mlog(ML_BASTS, "lockres %s\n", lockres->l_name); 3431 mlog(ML_BASTS, "lockres %s\n", lockres->l_name);
3429 3432
3430 mlog_exit(ret); 3433 mlog_exit(ret);
3431 return ret; 3434 return ret;
3432 } 3435 }
3433 3436
3434 static int ocfs2_unblock_lock(struct ocfs2_super *osb, 3437 static int ocfs2_unblock_lock(struct ocfs2_super *osb,
3435 struct ocfs2_lock_res *lockres, 3438 struct ocfs2_lock_res *lockres,
3436 struct ocfs2_unblock_ctl *ctl) 3439 struct ocfs2_unblock_ctl *ctl)
3437 { 3440 {
3438 unsigned long flags; 3441 unsigned long flags;
3439 int blocking; 3442 int blocking;
3440 int new_level; 3443 int new_level;
3441 int level; 3444 int level;
3442 int ret = 0; 3445 int ret = 0;
3443 int set_lvb = 0; 3446 int set_lvb = 0;
3444 unsigned int gen; 3447 unsigned int gen;
3445 3448
3446 mlog_entry_void(); 3449 mlog_entry_void();
3447 3450
3448 spin_lock_irqsave(&lockres->l_lock, flags); 3451 spin_lock_irqsave(&lockres->l_lock, flags);
3449 3452
3450 recheck: 3453 recheck:
3451 /* 3454 /*
3452 * Is it still blocking? If not, we have no more work to do. 3455 * Is it still blocking? If not, we have no more work to do.
3453 */ 3456 */
3454 if (!(lockres->l_flags & OCFS2_LOCK_BLOCKED)) { 3457 if (!(lockres->l_flags & OCFS2_LOCK_BLOCKED)) {
3455 BUG_ON(lockres->l_blocking != DLM_LOCK_NL); 3458 BUG_ON(lockres->l_blocking != DLM_LOCK_NL);
3456 spin_unlock_irqrestore(&lockres->l_lock, flags); 3459 spin_unlock_irqrestore(&lockres->l_lock, flags);
3457 ret = 0; 3460 ret = 0;
3458 goto leave; 3461 goto leave;
3459 } 3462 }
3460 3463
3461 if (lockres->l_flags & OCFS2_LOCK_BUSY) { 3464 if (lockres->l_flags & OCFS2_LOCK_BUSY) {
3462 /* XXX 3465 /* XXX
3463 * This is a *big* race. The OCFS2_LOCK_PENDING flag 3466 * This is a *big* race. The OCFS2_LOCK_PENDING flag
3464 * exists entirely for one reason - another thread has set 3467 * exists entirely for one reason - another thread has set
3465 * OCFS2_LOCK_BUSY, but has *NOT* yet called dlm_lock(). 3468 * OCFS2_LOCK_BUSY, but has *NOT* yet called dlm_lock().
3466 * 3469 *
3467 * If we do ocfs2_cancel_convert() before the other thread 3470 * If we do ocfs2_cancel_convert() before the other thread
3468 * calls dlm_lock(), our cancel will do nothing. We will 3471 * calls dlm_lock(), our cancel will do nothing. We will
3469 * get no ast, and we will have no way of knowing the 3472 * get no ast, and we will have no way of knowing the
3470 * cancel failed. Meanwhile, the other thread will call 3473 * cancel failed. Meanwhile, the other thread will call
3471 * into dlm_lock() and wait...forever. 3474 * into dlm_lock() and wait...forever.
3472 * 3475 *
3473 * Why forever? Because another node has asked for the 3476 * Why forever? Because another node has asked for the
3474 * lock first; that's why we're here in unblock_lock(). 3477 * lock first; that's why we're here in unblock_lock().
3475 * 3478 *
3476 * The solution is OCFS2_LOCK_PENDING. When PENDING is 3479 * The solution is OCFS2_LOCK_PENDING. When PENDING is
3477 * set, we just requeue the unblock. Only when the other 3480 * set, we just requeue the unblock. Only when the other
3478 * thread has called dlm_lock() and cleared PENDING will 3481 * thread has called dlm_lock() and cleared PENDING will
3479 * we then cancel their request. 3482 * we then cancel their request.
3480 * 3483 *
3481 * All callers of dlm_lock() must set OCFS2_DLM_PENDING 3484 * All callers of dlm_lock() must set OCFS2_DLM_PENDING
3482 * at the same time they set OCFS2_DLM_BUSY. They must 3485 * at the same time they set OCFS2_DLM_BUSY. They must
3483 * clear OCFS2_DLM_PENDING after dlm_lock() returns. 3486 * clear OCFS2_DLM_PENDING after dlm_lock() returns.
3484 */ 3487 */
3485 if (lockres->l_flags & OCFS2_LOCK_PENDING) { 3488 if (lockres->l_flags & OCFS2_LOCK_PENDING) {
3486 mlog(ML_BASTS, "lockres %s, ReQ: Pending\n", 3489 mlog(ML_BASTS, "lockres %s, ReQ: Pending\n",
3487 lockres->l_name); 3490 lockres->l_name);
3488 goto leave_requeue; 3491 goto leave_requeue;
3489 } 3492 }
3490 3493
3491 ctl->requeue = 1; 3494 ctl->requeue = 1;
3492 ret = ocfs2_prepare_cancel_convert(osb, lockres); 3495 ret = ocfs2_prepare_cancel_convert(osb, lockres);
3493 spin_unlock_irqrestore(&lockres->l_lock, flags); 3496 spin_unlock_irqrestore(&lockres->l_lock, flags);
3494 if (ret) { 3497 if (ret) {
3495 ret = ocfs2_cancel_convert(osb, lockres); 3498 ret = ocfs2_cancel_convert(osb, lockres);
3496 if (ret < 0) 3499 if (ret < 0)
3497 mlog_errno(ret); 3500 mlog_errno(ret);
3498 } 3501 }
3499 goto leave; 3502 goto leave;
3500 } 3503 }
3501 3504
3502 /* 3505 /*
3503 * This prevents livelocks. OCFS2_LOCK_UPCONVERT_FINISHING flag is 3506 * This prevents livelocks. OCFS2_LOCK_UPCONVERT_FINISHING flag is
3504 * set when the ast is received for an upconvert just before the 3507 * set when the ast is received for an upconvert just before the
3505 * OCFS2_LOCK_BUSY flag is cleared. Now if the fs received a bast 3508 * OCFS2_LOCK_BUSY flag is cleared. Now if the fs received a bast
3506 * on the heels of the ast, we want to delay the downconvert just 3509 * on the heels of the ast, we want to delay the downconvert just
3507 * enough to allow the up requestor to do its task. Because this 3510 * enough to allow the up requestor to do its task. Because this
3508 * lock is in the blocked queue, the lock will be downconverted 3511 * lock is in the blocked queue, the lock will be downconverted
3509 * as soon as the requestor is done with the lock. 3512 * as soon as the requestor is done with the lock.
3510 */ 3513 */
3511 if (lockres->l_flags & OCFS2_LOCK_UPCONVERT_FINISHING) 3514 if (lockres->l_flags & OCFS2_LOCK_UPCONVERT_FINISHING)
3512 goto leave_requeue; 3515 goto leave_requeue;
3513 3516
3514 /* 3517 /*
3515 * How can we block and yet be at NL? We were trying to upconvert 3518 * How can we block and yet be at NL? We were trying to upconvert
3516 * from NL and got canceled. The code comes back here, and now 3519 * from NL and got canceled. The code comes back here, and now
3517 * we notice and clear BLOCKING. 3520 * we notice and clear BLOCKING.
3518 */ 3521 */
3519 if (lockres->l_level == DLM_LOCK_NL) { 3522 if (lockres->l_level == DLM_LOCK_NL) {
3520 BUG_ON(lockres->l_ex_holders || lockres->l_ro_holders); 3523 BUG_ON(lockres->l_ex_holders || lockres->l_ro_holders);
3521 mlog(ML_BASTS, "lockres %s, Aborting dc\n", lockres->l_name); 3524 mlog(ML_BASTS, "lockres %s, Aborting dc\n", lockres->l_name);
3522 lockres->l_blocking = DLM_LOCK_NL; 3525 lockres->l_blocking = DLM_LOCK_NL;
3523 lockres_clear_flags(lockres, OCFS2_LOCK_BLOCKED); 3526 lockres_clear_flags(lockres, OCFS2_LOCK_BLOCKED);
3524 spin_unlock_irqrestore(&lockres->l_lock, flags); 3527 spin_unlock_irqrestore(&lockres->l_lock, flags);
3525 goto leave; 3528 goto leave;
3526 } 3529 }
3527 3530
3528 /* if we're blocking an exclusive and we have *any* holders, 3531 /* if we're blocking an exclusive and we have *any* holders,
3529 * then requeue. */ 3532 * then requeue. */
3530 if ((lockres->l_blocking == DLM_LOCK_EX) 3533 if ((lockres->l_blocking == DLM_LOCK_EX)
3531 && (lockres->l_ex_holders || lockres->l_ro_holders)) { 3534 && (lockres->l_ex_holders || lockres->l_ro_holders)) {
3532 mlog(ML_BASTS, "lockres %s, ReQ: EX/PR Holders %u,%u\n", 3535 mlog(ML_BASTS, "lockres %s, ReQ: EX/PR Holders %u,%u\n",
3533 lockres->l_name, lockres->l_ex_holders, 3536 lockres->l_name, lockres->l_ex_holders,
3534 lockres->l_ro_holders); 3537 lockres->l_ro_holders);
3535 goto leave_requeue; 3538 goto leave_requeue;
3536 } 3539 }
3537 3540
3538 /* If it's a PR we're blocking, then only 3541 /* If it's a PR we're blocking, then only
3539 * requeue if we've got any EX holders */ 3542 * requeue if we've got any EX holders */
3540 if (lockres->l_blocking == DLM_LOCK_PR && 3543 if (lockres->l_blocking == DLM_LOCK_PR &&
3541 lockres->l_ex_holders) { 3544 lockres->l_ex_holders) {
3542 mlog(ML_BASTS, "lockres %s, ReQ: EX Holders %u\n", 3545 mlog(ML_BASTS, "lockres %s, ReQ: EX Holders %u\n",
3543 lockres->l_name, lockres->l_ex_holders); 3546 lockres->l_name, lockres->l_ex_holders);
3544 goto leave_requeue; 3547 goto leave_requeue;
3545 } 3548 }
3546 3549
3547 /* 3550 /*
3548 * Can we get a lock in this state if the holder counts are 3551 * Can we get a lock in this state if the holder counts are
3549 * zero? The meta data unblock code used to check this. 3552 * zero? The meta data unblock code used to check this.
3550 */ 3553 */
3551 if ((lockres->l_ops->flags & LOCK_TYPE_REQUIRES_REFRESH) 3554 if ((lockres->l_ops->flags & LOCK_TYPE_REQUIRES_REFRESH)
3552 && (lockres->l_flags & OCFS2_LOCK_REFRESHING)) { 3555 && (lockres->l_flags & OCFS2_LOCK_REFRESHING)) {
3553 mlog(ML_BASTS, "lockres %s, ReQ: Lock Refreshing\n", 3556 mlog(ML_BASTS, "lockres %s, ReQ: Lock Refreshing\n",
3554 lockres->l_name); 3557 lockres->l_name);
3555 goto leave_requeue; 3558 goto leave_requeue;
3556 } 3559 }
3557 3560
3558 new_level = ocfs2_highest_compat_lock_level(lockres->l_blocking); 3561 new_level = ocfs2_highest_compat_lock_level(lockres->l_blocking);
3559 3562
3560 if (lockres->l_ops->check_downconvert 3563 if (lockres->l_ops->check_downconvert
3561 && !lockres->l_ops->check_downconvert(lockres, new_level)) { 3564 && !lockres->l_ops->check_downconvert(lockres, new_level)) {
3562 mlog(ML_BASTS, "lockres %s, ReQ: Checkpointing\n", 3565 mlog(ML_BASTS, "lockres %s, ReQ: Checkpointing\n",
3563 lockres->l_name); 3566 lockres->l_name);
3564 goto leave_requeue; 3567 goto leave_requeue;
3565 } 3568 }
3566 3569
3567 /* If we get here, then we know that there are no more 3570 /* If we get here, then we know that there are no more
3568 * incompatible holders (and anyone asking for an incompatible 3571 * incompatible holders (and anyone asking for an incompatible
3569 * lock is blocked). We can now downconvert the lock */ 3572 * lock is blocked). We can now downconvert the lock */
3570 if (!lockres->l_ops->downconvert_worker) 3573 if (!lockres->l_ops->downconvert_worker)
3571 goto downconvert; 3574 goto downconvert;
3572 3575
3573 /* Some lockres types want to do a bit of work before 3576 /* Some lockres types want to do a bit of work before
3574 * downconverting a lock. Allow that here. The worker function 3577 * downconverting a lock. Allow that here. The worker function
3575 * may sleep, so we save off a copy of what we're blocking as 3578 * may sleep, so we save off a copy of what we're blocking as
3576 * it may change while we're not holding the spin lock. */ 3579 * it may change while we're not holding the spin lock. */
3577 blocking = lockres->l_blocking; 3580 blocking = lockres->l_blocking;
3578 level = lockres->l_level; 3581 level = lockres->l_level;
3579 spin_unlock_irqrestore(&lockres->l_lock, flags); 3582 spin_unlock_irqrestore(&lockres->l_lock, flags);
3580 3583
3581 ctl->unblock_action = lockres->l_ops->downconvert_worker(lockres, blocking); 3584 ctl->unblock_action = lockres->l_ops->downconvert_worker(lockres, blocking);
3582 3585
3583 if (ctl->unblock_action == UNBLOCK_STOP_POST) { 3586 if (ctl->unblock_action == UNBLOCK_STOP_POST) {
3584 mlog(ML_BASTS, "lockres %s, UNBLOCK_STOP_POST\n", 3587 mlog(ML_BASTS, "lockres %s, UNBLOCK_STOP_POST\n",
3585 lockres->l_name); 3588 lockres->l_name);
3586 goto leave; 3589 goto leave;
3587 } 3590 }
3588 3591
3589 spin_lock_irqsave(&lockres->l_lock, flags); 3592 spin_lock_irqsave(&lockres->l_lock, flags);
3590 if ((blocking != lockres->l_blocking) || (level != lockres->l_level)) { 3593 if ((blocking != lockres->l_blocking) || (level != lockres->l_level)) {
3591 /* If this changed underneath us, then we can't drop 3594 /* If this changed underneath us, then we can't drop
3592 * it just yet. */ 3595 * it just yet. */
3593 mlog(ML_BASTS, "lockres %s, block=%d:%d, level=%d:%d, " 3596 mlog(ML_BASTS, "lockres %s, block=%d:%d, level=%d:%d, "
3594 "Recheck\n", lockres->l_name, blocking, 3597 "Recheck\n", lockres->l_name, blocking,
3595 lockres->l_blocking, level, lockres->l_level); 3598 lockres->l_blocking, level, lockres->l_level);
3596 goto recheck; 3599 goto recheck;
3597 } 3600 }
3598 3601
3599 downconvert: 3602 downconvert:
3600 ctl->requeue = 0; 3603 ctl->requeue = 0;
3601 3604
3602 if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB) { 3605 if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB) {
3603 if (lockres->l_level == DLM_LOCK_EX) 3606 if (lockres->l_level == DLM_LOCK_EX)
3604 set_lvb = 1; 3607 set_lvb = 1;
3605 3608
3606 /* 3609 /*
3607 * We only set the lvb if the lock has been fully 3610 * We only set the lvb if the lock has been fully
3608 * refreshed - otherwise we risk setting stale 3611 * refreshed - otherwise we risk setting stale
3609 * data. Otherwise, there's no need to actually clear 3612 * data. Otherwise, there's no need to actually clear
3610 * out the lvb here as it's value is still valid. 3613 * out the lvb here as it's value is still valid.
3611 */ 3614 */
3612 if (set_lvb && !(lockres->l_flags & OCFS2_LOCK_NEEDS_REFRESH)) 3615 if (set_lvb && !(lockres->l_flags & OCFS2_LOCK_NEEDS_REFRESH))
3613 lockres->l_ops->set_lvb(lockres); 3616 lockres->l_ops->set_lvb(lockres);
3614 } 3617 }
3615 3618
3616 gen = ocfs2_prepare_downconvert(lockres, new_level); 3619 gen = ocfs2_prepare_downconvert(lockres, new_level);
3617 spin_unlock_irqrestore(&lockres->l_lock, flags); 3620 spin_unlock_irqrestore(&lockres->l_lock, flags);
3618 ret = ocfs2_downconvert_lock(osb, lockres, new_level, set_lvb, 3621 ret = ocfs2_downconvert_lock(osb, lockres, new_level, set_lvb,
3619 gen); 3622 gen);
3620 3623
3621 leave: 3624 leave:
3622 mlog_exit(ret); 3625 mlog_exit(ret);
3623 return ret; 3626 return ret;
3624 3627
3625 leave_requeue: 3628 leave_requeue:
3626 spin_unlock_irqrestore(&lockres->l_lock, flags); 3629 spin_unlock_irqrestore(&lockres->l_lock, flags);
3627 ctl->requeue = 1; 3630 ctl->requeue = 1;
3628 3631
3629 mlog_exit(0); 3632 mlog_exit(0);
3630 return 0; 3633 return 0;
3631 } 3634 }
3632 3635
3633 static int ocfs2_data_convert_worker(struct ocfs2_lock_res *lockres, 3636 static int ocfs2_data_convert_worker(struct ocfs2_lock_res *lockres,
3634 int blocking) 3637 int blocking)
3635 { 3638 {
3636 struct inode *inode; 3639 struct inode *inode;
3637 struct address_space *mapping; 3640 struct address_space *mapping;
3638 struct ocfs2_inode_info *oi; 3641 struct ocfs2_inode_info *oi;
3639 3642
3640 inode = ocfs2_lock_res_inode(lockres); 3643 inode = ocfs2_lock_res_inode(lockres);
3641 mapping = inode->i_mapping; 3644 mapping = inode->i_mapping;
3642 3645
3643 if (S_ISDIR(inode->i_mode)) { 3646 if (S_ISDIR(inode->i_mode)) {
3644 oi = OCFS2_I(inode); 3647 oi = OCFS2_I(inode);
3645 oi->ip_dir_lock_gen++; 3648 oi->ip_dir_lock_gen++;
3646 mlog(0, "generation: %u\n", oi->ip_dir_lock_gen); 3649 mlog(0, "generation: %u\n", oi->ip_dir_lock_gen);
3647 goto out; 3650 goto out;
3648 } 3651 }
3649 3652
3650 if (!S_ISREG(inode->i_mode)) 3653 if (!S_ISREG(inode->i_mode))
3651 goto out; 3654 goto out;
3652 3655
3653 /* 3656 /*
3654 * We need this before the filemap_fdatawrite() so that it can 3657 * We need this before the filemap_fdatawrite() so that it can
3655 * transfer the dirty bit from the PTE to the 3658 * transfer the dirty bit from the PTE to the
3656 * page. Unfortunately this means that even for EX->PR 3659 * page. Unfortunately this means that even for EX->PR
3657 * downconverts, we'll lose our mappings and have to build 3660 * downconverts, we'll lose our mappings and have to build
3658 * them up again. 3661 * them up again.
3659 */ 3662 */
3660 unmap_mapping_range(mapping, 0, 0, 0); 3663 unmap_mapping_range(mapping, 0, 0, 0);
3661 3664
3662 if (filemap_fdatawrite(mapping)) { 3665 if (filemap_fdatawrite(mapping)) {
3663 mlog(ML_ERROR, "Could not sync inode %llu for downconvert!", 3666 mlog(ML_ERROR, "Could not sync inode %llu for downconvert!",
3664 (unsigned long long)OCFS2_I(inode)->ip_blkno); 3667 (unsigned long long)OCFS2_I(inode)->ip_blkno);
3665 } 3668 }
3666 sync_mapping_buffers(mapping); 3669 sync_mapping_buffers(mapping);
3667 if (blocking == DLM_LOCK_EX) { 3670 if (blocking == DLM_LOCK_EX) {
3668 truncate_inode_pages(mapping, 0); 3671 truncate_inode_pages(mapping, 0);
3669 } else { 3672 } else {
3670 /* We only need to wait on the I/O if we're not also 3673 /* We only need to wait on the I/O if we're not also
3671 * truncating pages because truncate_inode_pages waits 3674 * truncating pages because truncate_inode_pages waits
3672 * for us above. We don't truncate pages if we're 3675 * for us above. We don't truncate pages if we're
3673 * blocking anything < EXMODE because we want to keep 3676 * blocking anything < EXMODE because we want to keep
3674 * them around in that case. */ 3677 * them around in that case. */
3675 filemap_fdatawait(mapping); 3678 filemap_fdatawait(mapping);
3676 } 3679 }
3677 3680
3678 out: 3681 out:
3679 return UNBLOCK_CONTINUE; 3682 return UNBLOCK_CONTINUE;
3680 } 3683 }
3681 3684
3682 static int ocfs2_ci_checkpointed(struct ocfs2_caching_info *ci, 3685 static int ocfs2_ci_checkpointed(struct ocfs2_caching_info *ci,
3683 struct ocfs2_lock_res *lockres, 3686 struct ocfs2_lock_res *lockres,
3684 int new_level) 3687 int new_level)
3685 { 3688 {
3686 int checkpointed = ocfs2_ci_fully_checkpointed(ci); 3689 int checkpointed = ocfs2_ci_fully_checkpointed(ci);
3687 3690
3688 BUG_ON(new_level != DLM_LOCK_NL && new_level != DLM_LOCK_PR); 3691 BUG_ON(new_level != DLM_LOCK_NL && new_level != DLM_LOCK_PR);
3689 BUG_ON(lockres->l_level != DLM_LOCK_EX && !checkpointed); 3692 BUG_ON(lockres->l_level != DLM_LOCK_EX && !checkpointed);
3690 3693
3691 if (checkpointed) 3694 if (checkpointed)
3692 return 1; 3695 return 1;
3693 3696
3694 ocfs2_start_checkpoint(OCFS2_SB(ocfs2_metadata_cache_get_super(ci))); 3697 ocfs2_start_checkpoint(OCFS2_SB(ocfs2_metadata_cache_get_super(ci)));
3695 return 0; 3698 return 0;
3696 } 3699 }
3697 3700
3698 static int ocfs2_check_meta_downconvert(struct ocfs2_lock_res *lockres, 3701 static int ocfs2_check_meta_downconvert(struct ocfs2_lock_res *lockres,
3699 int new_level) 3702 int new_level)
3700 { 3703 {
3701 struct inode *inode = ocfs2_lock_res_inode(lockres); 3704 struct inode *inode = ocfs2_lock_res_inode(lockres);
3702 3705
3703 return ocfs2_ci_checkpointed(INODE_CACHE(inode), lockres, new_level); 3706 return ocfs2_ci_checkpointed(INODE_CACHE(inode), lockres, new_level);
3704 } 3707 }
3705 3708
3706 static void ocfs2_set_meta_lvb(struct ocfs2_lock_res *lockres) 3709 static void ocfs2_set_meta_lvb(struct ocfs2_lock_res *lockres)
3707 { 3710 {
3708 struct inode *inode = ocfs2_lock_res_inode(lockres); 3711 struct inode *inode = ocfs2_lock_res_inode(lockres);
3709 3712
3710 __ocfs2_stuff_meta_lvb(inode); 3713 __ocfs2_stuff_meta_lvb(inode);
3711 } 3714 }
3712 3715
3713 /* 3716 /*
3714 * Does the final reference drop on our dentry lock. Right now this 3717 * Does the final reference drop on our dentry lock. Right now this
3715 * happens in the downconvert thread, but we could choose to simplify the 3718 * happens in the downconvert thread, but we could choose to simplify the
3716 * dlmglue API and push these off to the ocfs2_wq in the future. 3719 * dlmglue API and push these off to the ocfs2_wq in the future.
3717 */ 3720 */
3718 static void ocfs2_dentry_post_unlock(struct ocfs2_super *osb, 3721 static void ocfs2_dentry_post_unlock(struct ocfs2_super *osb,
3719 struct ocfs2_lock_res *lockres) 3722 struct ocfs2_lock_res *lockres)
3720 { 3723 {
3721 struct ocfs2_dentry_lock *dl = ocfs2_lock_res_dl(lockres); 3724 struct ocfs2_dentry_lock *dl = ocfs2_lock_res_dl(lockres);
3722 ocfs2_dentry_lock_put(osb, dl); 3725 ocfs2_dentry_lock_put(osb, dl);
3723 } 3726 }
3724 3727
3725 /* 3728 /*
3726 * d_delete() matching dentries before the lock downconvert. 3729 * d_delete() matching dentries before the lock downconvert.
3727 * 3730 *
3728 * At this point, any process waiting to destroy the 3731 * At this point, any process waiting to destroy the
3729 * dentry_lock due to last ref count is stopped by the 3732 * dentry_lock due to last ref count is stopped by the
3730 * OCFS2_LOCK_QUEUED flag. 3733 * OCFS2_LOCK_QUEUED flag.
3731 * 3734 *
3732 * We have two potential problems 3735 * We have two potential problems
3733 * 3736 *
3734 * 1) If we do the last reference drop on our dentry_lock (via dput) 3737 * 1) If we do the last reference drop on our dentry_lock (via dput)
3735 * we'll wind up in ocfs2_release_dentry_lock(), waiting on 3738 * we'll wind up in ocfs2_release_dentry_lock(), waiting on
3736 * the downconvert to finish. Instead we take an elevated 3739 * the downconvert to finish. Instead we take an elevated
3737 * reference and push the drop until after we've completed our 3740 * reference and push the drop until after we've completed our
3738 * unblock processing. 3741 * unblock processing.
3739 * 3742 *
3740 * 2) There might be another process with a final reference, 3743 * 2) There might be another process with a final reference,
3741 * waiting on us to finish processing. If this is the case, we 3744 * waiting on us to finish processing. If this is the case, we
3742 * detect it and exit out - there's no more dentries anyway. 3745 * detect it and exit out - there's no more dentries anyway.
3743 */ 3746 */
3744 static int ocfs2_dentry_convert_worker(struct ocfs2_lock_res *lockres, 3747 static int ocfs2_dentry_convert_worker(struct ocfs2_lock_res *lockres,
3745 int blocking) 3748 int blocking)
3746 { 3749 {
3747 struct ocfs2_dentry_lock *dl = ocfs2_lock_res_dl(lockres); 3750 struct ocfs2_dentry_lock *dl = ocfs2_lock_res_dl(lockres);
3748 struct ocfs2_inode_info *oi = OCFS2_I(dl->dl_inode); 3751 struct ocfs2_inode_info *oi = OCFS2_I(dl->dl_inode);
3749 struct dentry *dentry; 3752 struct dentry *dentry;
3750 unsigned long flags; 3753 unsigned long flags;
3751 int extra_ref = 0; 3754 int extra_ref = 0;
3752 3755
3753 /* 3756 /*
3754 * This node is blocking another node from getting a read 3757 * This node is blocking another node from getting a read
3755 * lock. This happens when we've renamed within a 3758 * lock. This happens when we've renamed within a
3756 * directory. We've forced the other nodes to d_delete(), but 3759 * directory. We've forced the other nodes to d_delete(), but
3757 * we never actually dropped our lock because it's still 3760 * we never actually dropped our lock because it's still
3758 * valid. The downconvert code will retain a PR for this node, 3761 * valid. The downconvert code will retain a PR for this node,
3759 * so there's no further work to do. 3762 * so there's no further work to do.
3760 */ 3763 */
3761 if (blocking == DLM_LOCK_PR) 3764 if (blocking == DLM_LOCK_PR)
3762 return UNBLOCK_CONTINUE; 3765 return UNBLOCK_CONTINUE;
3763 3766
3764 /* 3767 /*
3765 * Mark this inode as potentially orphaned. The code in 3768 * Mark this inode as potentially orphaned. The code in
3766 * ocfs2_delete_inode() will figure out whether it actually 3769 * ocfs2_delete_inode() will figure out whether it actually
3767 * needs to be freed or not. 3770 * needs to be freed or not.
3768 */ 3771 */
3769 spin_lock(&oi->ip_lock); 3772 spin_lock(&oi->ip_lock);
3770 oi->ip_flags |= OCFS2_INODE_MAYBE_ORPHANED; 3773 oi->ip_flags |= OCFS2_INODE_MAYBE_ORPHANED;
3771 spin_unlock(&oi->ip_lock); 3774 spin_unlock(&oi->ip_lock);
3772 3775
3773 /* 3776 /*
3774 * Yuck. We need to make sure however that the check of 3777 * Yuck. We need to make sure however that the check of
3775 * OCFS2_LOCK_FREEING and the extra reference are atomic with 3778 * OCFS2_LOCK_FREEING and the extra reference are atomic with
3776 * respect to a reference decrement or the setting of that 3779 * respect to a reference decrement or the setting of that
3777 * flag. 3780 * flag.
3778 */ 3781 */
3779 spin_lock_irqsave(&lockres->l_lock, flags); 3782 spin_lock_irqsave(&lockres->l_lock, flags);
3780 spin_lock(&dentry_attach_lock); 3783 spin_lock(&dentry_attach_lock);
3781 if (!(lockres->l_flags & OCFS2_LOCK_FREEING) 3784 if (!(lockres->l_flags & OCFS2_LOCK_FREEING)
3782 && dl->dl_count) { 3785 && dl->dl_count) {
3783 dl->dl_count++; 3786 dl->dl_count++;
3784 extra_ref = 1; 3787 extra_ref = 1;
3785 } 3788 }
3786 spin_unlock(&dentry_attach_lock); 3789 spin_unlock(&dentry_attach_lock);
3787 spin_unlock_irqrestore(&lockres->l_lock, flags); 3790 spin_unlock_irqrestore(&lockres->l_lock, flags);
3788 3791
3789 mlog(0, "extra_ref = %d\n", extra_ref); 3792 mlog(0, "extra_ref = %d\n", extra_ref);
3790 3793
3791 /* 3794 /*
3792 * We have a process waiting on us in ocfs2_dentry_iput(), 3795 * We have a process waiting on us in ocfs2_dentry_iput(),
3793 * which means we can't have any more outstanding 3796 * which means we can't have any more outstanding
3794 * aliases. There's no need to do any more work. 3797 * aliases. There's no need to do any more work.
3795 */ 3798 */
3796 if (!extra_ref) 3799 if (!extra_ref)
3797 return UNBLOCK_CONTINUE; 3800 return UNBLOCK_CONTINUE;
3798 3801
3799 spin_lock(&dentry_attach_lock); 3802 spin_lock(&dentry_attach_lock);
3800 while (1) { 3803 while (1) {
3801 dentry = ocfs2_find_local_alias(dl->dl_inode, 3804 dentry = ocfs2_find_local_alias(dl->dl_inode,
3802 dl->dl_parent_blkno, 1); 3805 dl->dl_parent_blkno, 1);
3803 if (!dentry) 3806 if (!dentry)
3804 break; 3807 break;
3805 spin_unlock(&dentry_attach_lock); 3808 spin_unlock(&dentry_attach_lock);
3806 3809
3807 mlog(0, "d_delete(%.*s);\n", dentry->d_name.len, 3810 mlog(0, "d_delete(%.*s);\n", dentry->d_name.len,
3808 dentry->d_name.name); 3811 dentry->d_name.name);
3809 3812
3810 /* 3813 /*
3811 * The following dcache calls may do an 3814 * The following dcache calls may do an
3812 * iput(). Normally we don't want that from the 3815 * iput(). Normally we don't want that from the
3813 * downconverting thread, but in this case it's ok 3816 * downconverting thread, but in this case it's ok
3814 * because the requesting node already has an 3817 * because the requesting node already has an
3815 * exclusive lock on the inode, so it can't be queued 3818 * exclusive lock on the inode, so it can't be queued
3816 * for a downconvert. 3819 * for a downconvert.
3817 */ 3820 */
3818 d_delete(dentry); 3821 d_delete(dentry);
3819 dput(dentry); 3822 dput(dentry);
3820 3823
3821 spin_lock(&dentry_attach_lock); 3824 spin_lock(&dentry_attach_lock);
3822 } 3825 }
3823 spin_unlock(&dentry_attach_lock); 3826 spin_unlock(&dentry_attach_lock);
3824 3827
3825 /* 3828 /*
3826 * If we are the last holder of this dentry lock, there is no 3829 * If we are the last holder of this dentry lock, there is no
3827 * reason to downconvert so skip straight to the unlock. 3830 * reason to downconvert so skip straight to the unlock.
3828 */ 3831 */
3829 if (dl->dl_count == 1) 3832 if (dl->dl_count == 1)
3830 return UNBLOCK_STOP_POST; 3833 return UNBLOCK_STOP_POST;
3831 3834
3832 return UNBLOCK_CONTINUE_POST; 3835 return UNBLOCK_CONTINUE_POST;
3833 } 3836 }
3834 3837
3835 static int ocfs2_check_refcount_downconvert(struct ocfs2_lock_res *lockres, 3838 static int ocfs2_check_refcount_downconvert(struct ocfs2_lock_res *lockres,
3836 int new_level) 3839 int new_level)
3837 { 3840 {
3838 struct ocfs2_refcount_tree *tree = 3841 struct ocfs2_refcount_tree *tree =
3839 ocfs2_lock_res_refcount_tree(lockres); 3842 ocfs2_lock_res_refcount_tree(lockres);
3840 3843
3841 return ocfs2_ci_checkpointed(&tree->rf_ci, lockres, new_level); 3844 return ocfs2_ci_checkpointed(&tree->rf_ci, lockres, new_level);
3842 } 3845 }
3843 3846
3844 static int ocfs2_refcount_convert_worker(struct ocfs2_lock_res *lockres, 3847 static int ocfs2_refcount_convert_worker(struct ocfs2_lock_res *lockres,
3845 int blocking) 3848 int blocking)
3846 { 3849 {
3847 struct ocfs2_refcount_tree *tree = 3850 struct ocfs2_refcount_tree *tree =
3848 ocfs2_lock_res_refcount_tree(lockres); 3851 ocfs2_lock_res_refcount_tree(lockres);
3849 3852
3850 ocfs2_metadata_cache_purge(&tree->rf_ci); 3853 ocfs2_metadata_cache_purge(&tree->rf_ci);
3851 3854
3852 return UNBLOCK_CONTINUE; 3855 return UNBLOCK_CONTINUE;
3853 } 3856 }
3854 3857
3855 static void ocfs2_set_qinfo_lvb(struct ocfs2_lock_res *lockres) 3858 static void ocfs2_set_qinfo_lvb(struct ocfs2_lock_res *lockres)
3856 { 3859 {
3857 struct ocfs2_qinfo_lvb *lvb; 3860 struct ocfs2_qinfo_lvb *lvb;
3858 struct ocfs2_mem_dqinfo *oinfo = ocfs2_lock_res_qinfo(lockres); 3861 struct ocfs2_mem_dqinfo *oinfo = ocfs2_lock_res_qinfo(lockres);
3859 struct mem_dqinfo *info = sb_dqinfo(oinfo->dqi_gi.dqi_sb, 3862 struct mem_dqinfo *info = sb_dqinfo(oinfo->dqi_gi.dqi_sb,
3860 oinfo->dqi_gi.dqi_type); 3863 oinfo->dqi_gi.dqi_type);
3861 3864
3862 mlog_entry_void(); 3865 mlog_entry_void();
3863 3866
3864 lvb = ocfs2_dlm_lvb(&lockres->l_lksb); 3867 lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
3865 lvb->lvb_version = OCFS2_QINFO_LVB_VERSION; 3868 lvb->lvb_version = OCFS2_QINFO_LVB_VERSION;
3866 lvb->lvb_bgrace = cpu_to_be32(info->dqi_bgrace); 3869 lvb->lvb_bgrace = cpu_to_be32(info->dqi_bgrace);
3867 lvb->lvb_igrace = cpu_to_be32(info->dqi_igrace); 3870 lvb->lvb_igrace = cpu_to_be32(info->dqi_igrace);
3868 lvb->lvb_syncms = cpu_to_be32(oinfo->dqi_syncms); 3871 lvb->lvb_syncms = cpu_to_be32(oinfo->dqi_syncms);
3869 lvb->lvb_blocks = cpu_to_be32(oinfo->dqi_gi.dqi_blocks); 3872 lvb->lvb_blocks = cpu_to_be32(oinfo->dqi_gi.dqi_blocks);
3870 lvb->lvb_free_blk = cpu_to_be32(oinfo->dqi_gi.dqi_free_blk); 3873 lvb->lvb_free_blk = cpu_to_be32(oinfo->dqi_gi.dqi_free_blk);
3871 lvb->lvb_free_entry = cpu_to_be32(oinfo->dqi_gi.dqi_free_entry); 3874 lvb->lvb_free_entry = cpu_to_be32(oinfo->dqi_gi.dqi_free_entry);
3872 3875
3873 mlog_exit_void(); 3876 mlog_exit_void();
3874 } 3877 }
3875 3878
3876 void ocfs2_qinfo_unlock(struct ocfs2_mem_dqinfo *oinfo, int ex) 3879 void ocfs2_qinfo_unlock(struct ocfs2_mem_dqinfo *oinfo, int ex)
3877 { 3880 {
3878 struct ocfs2_lock_res *lockres = &oinfo->dqi_gqlock; 3881 struct ocfs2_lock_res *lockres = &oinfo->dqi_gqlock;
3879 struct ocfs2_super *osb = OCFS2_SB(oinfo->dqi_gi.dqi_sb); 3882 struct ocfs2_super *osb = OCFS2_SB(oinfo->dqi_gi.dqi_sb);
3880 int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; 3883 int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
3881 3884
3882 mlog_entry_void(); 3885 mlog_entry_void();
3883 if (!ocfs2_is_hard_readonly(osb) && !ocfs2_mount_local(osb)) 3886 if (!ocfs2_is_hard_readonly(osb) && !ocfs2_mount_local(osb))
3884 ocfs2_cluster_unlock(osb, lockres, level); 3887 ocfs2_cluster_unlock(osb, lockres, level);
3885 mlog_exit_void(); 3888 mlog_exit_void();
3886 } 3889 }
3887 3890
3888 static int ocfs2_refresh_qinfo(struct ocfs2_mem_dqinfo *oinfo) 3891 static int ocfs2_refresh_qinfo(struct ocfs2_mem_dqinfo *oinfo)
3889 { 3892 {
3890 struct mem_dqinfo *info = sb_dqinfo(oinfo->dqi_gi.dqi_sb, 3893 struct mem_dqinfo *info = sb_dqinfo(oinfo->dqi_gi.dqi_sb,
3891 oinfo->dqi_gi.dqi_type); 3894 oinfo->dqi_gi.dqi_type);
3892 struct ocfs2_lock_res *lockres = &oinfo->dqi_gqlock; 3895 struct ocfs2_lock_res *lockres = &oinfo->dqi_gqlock;
3893 struct ocfs2_qinfo_lvb *lvb = ocfs2_dlm_lvb(&lockres->l_lksb); 3896 struct ocfs2_qinfo_lvb *lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
3894 struct buffer_head *bh = NULL; 3897 struct buffer_head *bh = NULL;
3895 struct ocfs2_global_disk_dqinfo *gdinfo; 3898 struct ocfs2_global_disk_dqinfo *gdinfo;
3896 int status = 0; 3899 int status = 0;
3897 3900
3898 if (ocfs2_dlm_lvb_valid(&lockres->l_lksb) && 3901 if (ocfs2_dlm_lvb_valid(&lockres->l_lksb) &&
3899 lvb->lvb_version == OCFS2_QINFO_LVB_VERSION) { 3902 lvb->lvb_version == OCFS2_QINFO_LVB_VERSION) {
3900 info->dqi_bgrace = be32_to_cpu(lvb->lvb_bgrace); 3903 info->dqi_bgrace = be32_to_cpu(lvb->lvb_bgrace);
3901 info->dqi_igrace = be32_to_cpu(lvb->lvb_igrace); 3904 info->dqi_igrace = be32_to_cpu(lvb->lvb_igrace);
3902 oinfo->dqi_syncms = be32_to_cpu(lvb->lvb_syncms); 3905 oinfo->dqi_syncms = be32_to_cpu(lvb->lvb_syncms);
3903 oinfo->dqi_gi.dqi_blocks = be32_to_cpu(lvb->lvb_blocks); 3906 oinfo->dqi_gi.dqi_blocks = be32_to_cpu(lvb->lvb_blocks);
3904 oinfo->dqi_gi.dqi_free_blk = be32_to_cpu(lvb->lvb_free_blk); 3907 oinfo->dqi_gi.dqi_free_blk = be32_to_cpu(lvb->lvb_free_blk);
3905 oinfo->dqi_gi.dqi_free_entry = 3908 oinfo->dqi_gi.dqi_free_entry =
3906 be32_to_cpu(lvb->lvb_free_entry); 3909 be32_to_cpu(lvb->lvb_free_entry);
3907 } else { 3910 } else {
3908 status = ocfs2_read_quota_phys_block(oinfo->dqi_gqinode, 3911 status = ocfs2_read_quota_phys_block(oinfo->dqi_gqinode,
3909 oinfo->dqi_giblk, &bh); 3912 oinfo->dqi_giblk, &bh);
3910 if (status) { 3913 if (status) {
3911 mlog_errno(status); 3914 mlog_errno(status);
3912 goto bail; 3915 goto bail;
3913 } 3916 }
3914 gdinfo = (struct ocfs2_global_disk_dqinfo *) 3917 gdinfo = (struct ocfs2_global_disk_dqinfo *)
3915 (bh->b_data + OCFS2_GLOBAL_INFO_OFF); 3918 (bh->b_data + OCFS2_GLOBAL_INFO_OFF);
3916 info->dqi_bgrace = le32_to_cpu(gdinfo->dqi_bgrace); 3919 info->dqi_bgrace = le32_to_cpu(gdinfo->dqi_bgrace);
3917 info->dqi_igrace = le32_to_cpu(gdinfo->dqi_igrace); 3920 info->dqi_igrace = le32_to_cpu(gdinfo->dqi_igrace);
3918 oinfo->dqi_syncms = le32_to_cpu(gdinfo->dqi_syncms); 3921 oinfo->dqi_syncms = le32_to_cpu(gdinfo->dqi_syncms);
3919 oinfo->dqi_gi.dqi_blocks = le32_to_cpu(gdinfo->dqi_blocks); 3922 oinfo->dqi_gi.dqi_blocks = le32_to_cpu(gdinfo->dqi_blocks);
3920 oinfo->dqi_gi.dqi_free_blk = le32_to_cpu(gdinfo->dqi_free_blk); 3923 oinfo->dqi_gi.dqi_free_blk = le32_to_cpu(gdinfo->dqi_free_blk);
3921 oinfo->dqi_gi.dqi_free_entry = 3924 oinfo->dqi_gi.dqi_free_entry =
3922 le32_to_cpu(gdinfo->dqi_free_entry); 3925 le32_to_cpu(gdinfo->dqi_free_entry);
3923 brelse(bh); 3926 brelse(bh);
3924 ocfs2_track_lock_refresh(lockres); 3927 ocfs2_track_lock_refresh(lockres);
3925 } 3928 }
3926 3929
3927 bail: 3930 bail:
3928 return status; 3931 return status;
3929 } 3932 }
3930 3933
3931 /* Lock quota info, this function expects at least shared lock on the quota file 3934 /* Lock quota info, this function expects at least shared lock on the quota file
3932 * so that we can safely refresh quota info from disk. */ 3935 * so that we can safely refresh quota info from disk. */
3933 int ocfs2_qinfo_lock(struct ocfs2_mem_dqinfo *oinfo, int ex) 3936 int ocfs2_qinfo_lock(struct ocfs2_mem_dqinfo *oinfo, int ex)
3934 { 3937 {
3935 struct ocfs2_lock_res *lockres = &oinfo->dqi_gqlock; 3938 struct ocfs2_lock_res *lockres = &oinfo->dqi_gqlock;
3936 struct ocfs2_super *osb = OCFS2_SB(oinfo->dqi_gi.dqi_sb); 3939 struct ocfs2_super *osb = OCFS2_SB(oinfo->dqi_gi.dqi_sb);
3937 int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; 3940 int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
3938 int status = 0; 3941 int status = 0;
3939 3942
3940 mlog_entry_void(); 3943 mlog_entry_void();
3941 3944
3942 /* On RO devices, locking really isn't needed... */ 3945 /* On RO devices, locking really isn't needed... */
3943 if (ocfs2_is_hard_readonly(osb)) { 3946 if (ocfs2_is_hard_readonly(osb)) {
3944 if (ex) 3947 if (ex)
3945 status = -EROFS; 3948 status = -EROFS;
3946 goto bail; 3949 goto bail;
3947 } 3950 }
3948 if (ocfs2_mount_local(osb)) 3951 if (ocfs2_mount_local(osb))
3949 goto bail; 3952 goto bail;
3950 3953
3951 status = ocfs2_cluster_lock(osb, lockres, level, 0, 0); 3954 status = ocfs2_cluster_lock(osb, lockres, level, 0, 0);
3952 if (status < 0) { 3955 if (status < 0) {
3953 mlog_errno(status); 3956 mlog_errno(status);
3954 goto bail; 3957 goto bail;
3955 } 3958 }
3956 if (!ocfs2_should_refresh_lock_res(lockres)) 3959 if (!ocfs2_should_refresh_lock_res(lockres))
3957 goto bail; 3960 goto bail;
3958 /* OK, we have the lock but we need to refresh the quota info */ 3961 /* OK, we have the lock but we need to refresh the quota info */
3959 status = ocfs2_refresh_qinfo(oinfo); 3962 status = ocfs2_refresh_qinfo(oinfo);
3960 if (status) 3963 if (status)
3961 ocfs2_qinfo_unlock(oinfo, ex); 3964 ocfs2_qinfo_unlock(oinfo, ex);
3962 ocfs2_complete_lock_res_refresh(lockres, status); 3965 ocfs2_complete_lock_res_refresh(lockres, status);
3963 bail: 3966 bail:
3964 mlog_exit(status); 3967 mlog_exit(status);
3965 return status; 3968 return status;
3966 } 3969 }
3967 3970
3968 int ocfs2_refcount_lock(struct ocfs2_refcount_tree *ref_tree, int ex) 3971 int ocfs2_refcount_lock(struct ocfs2_refcount_tree *ref_tree, int ex)
3969 { 3972 {
3970 int status; 3973 int status;
3971 int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; 3974 int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
3972 struct ocfs2_lock_res *lockres = &ref_tree->rf_lockres; 3975 struct ocfs2_lock_res *lockres = &ref_tree->rf_lockres;
3973 struct ocfs2_super *osb = lockres->l_priv; 3976 struct ocfs2_super *osb = lockres->l_priv;
3974 3977
3975 3978
3976 if (ocfs2_is_hard_readonly(osb)) 3979 if (ocfs2_is_hard_readonly(osb))
3977 return -EROFS; 3980 return -EROFS;
3978 3981
3979 if (ocfs2_mount_local(osb)) 3982 if (ocfs2_mount_local(osb))
3980 return 0; 3983 return 0;
3981 3984
3982 status = ocfs2_cluster_lock(osb, lockres, level, 0, 0); 3985 status = ocfs2_cluster_lock(osb, lockres, level, 0, 0);
3983 if (status < 0) 3986 if (status < 0)
3984 mlog_errno(status); 3987 mlog_errno(status);
3985 3988
3986 return status; 3989 return status;
3987 } 3990 }
3988 3991
3989 void ocfs2_refcount_unlock(struct ocfs2_refcount_tree *ref_tree, int ex) 3992 void ocfs2_refcount_unlock(struct ocfs2_refcount_tree *ref_tree, int ex)
3990 { 3993 {
3991 int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; 3994 int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
3992 struct ocfs2_lock_res *lockres = &ref_tree->rf_lockres; 3995 struct ocfs2_lock_res *lockres = &ref_tree->rf_lockres;
3993 struct ocfs2_super *osb = lockres->l_priv; 3996 struct ocfs2_super *osb = lockres->l_priv;
3994 3997
3995 if (!ocfs2_mount_local(osb)) 3998 if (!ocfs2_mount_local(osb))
3996 ocfs2_cluster_unlock(osb, lockres, level); 3999 ocfs2_cluster_unlock(osb, lockres, level);
3997 } 4000 }
3998 4001
3999 static void ocfs2_process_blocked_lock(struct ocfs2_super *osb, 4002 static void ocfs2_process_blocked_lock(struct ocfs2_super *osb,
4000 struct ocfs2_lock_res *lockres) 4003 struct ocfs2_lock_res *lockres)
4001 { 4004 {
4002 int status; 4005 int status;
4003 struct ocfs2_unblock_ctl ctl = {0, 0,}; 4006 struct ocfs2_unblock_ctl ctl = {0, 0,};
4004 unsigned long flags; 4007 unsigned long flags;
4005 4008
4006 /* Our reference to the lockres in this function can be 4009 /* Our reference to the lockres in this function can be
4007 * considered valid until we remove the OCFS2_LOCK_QUEUED 4010 * considered valid until we remove the OCFS2_LOCK_QUEUED
4008 * flag. */ 4011 * flag. */
4009 4012
4010 mlog_entry_void(); 4013 mlog_entry_void();
4011 4014
4012 BUG_ON(!lockres); 4015 BUG_ON(!lockres);
4013 BUG_ON(!lockres->l_ops); 4016 BUG_ON(!lockres->l_ops);
4014 4017
4015 mlog(ML_BASTS, "lockres %s blocked\n", lockres->l_name); 4018 mlog(ML_BASTS, "lockres %s blocked\n", lockres->l_name);
4016 4019
4017 /* Detect whether a lock has been marked as going away while 4020 /* Detect whether a lock has been marked as going away while
4018 * the downconvert thread was processing other things. A lock can 4021 * the downconvert thread was processing other things. A lock can
4019 * still be marked with OCFS2_LOCK_FREEING after this check, 4022 * still be marked with OCFS2_LOCK_FREEING after this check,
4020 * but short circuiting here will still save us some 4023 * but short circuiting here will still save us some
4021 * performance. */ 4024 * performance. */
4022 spin_lock_irqsave(&lockres->l_lock, flags); 4025 spin_lock_irqsave(&lockres->l_lock, flags);
4023 if (lockres->l_flags & OCFS2_LOCK_FREEING) 4026 if (lockres->l_flags & OCFS2_LOCK_FREEING)
4024 goto unqueue; 4027 goto unqueue;
4025 spin_unlock_irqrestore(&lockres->l_lock, flags); 4028 spin_unlock_irqrestore(&lockres->l_lock, flags);
4026 4029
4027 status = ocfs2_unblock_lock(osb, lockres, &ctl); 4030 status = ocfs2_unblock_lock(osb, lockres, &ctl);
4028 if (status < 0) 4031 if (status < 0)
4029 mlog_errno(status); 4032 mlog_errno(status);
4030 4033
4031 spin_lock_irqsave(&lockres->l_lock, flags); 4034 spin_lock_irqsave(&lockres->l_lock, flags);
4032 unqueue: 4035 unqueue:
4033 if (lockres->l_flags & OCFS2_LOCK_FREEING || !ctl.requeue) { 4036 if (lockres->l_flags & OCFS2_LOCK_FREEING || !ctl.requeue) {
4034 lockres_clear_flags(lockres, OCFS2_LOCK_QUEUED); 4037 lockres_clear_flags(lockres, OCFS2_LOCK_QUEUED);
4035 } else 4038 } else
4036 ocfs2_schedule_blocked_lock(osb, lockres); 4039 ocfs2_schedule_blocked_lock(osb, lockres);
4037 4040
4038 mlog(ML_BASTS, "lockres %s, requeue = %s.\n", lockres->l_name, 4041 mlog(ML_BASTS, "lockres %s, requeue = %s.\n", lockres->l_name,
4039 ctl.requeue ? "yes" : "no"); 4042 ctl.requeue ? "yes" : "no");
4040 spin_unlock_irqrestore(&lockres->l_lock, flags); 4043 spin_unlock_irqrestore(&lockres->l_lock, flags);
4041 4044
4042 if (ctl.unblock_action != UNBLOCK_CONTINUE 4045 if (ctl.unblock_action != UNBLOCK_CONTINUE
4043 && lockres->l_ops->post_unlock) 4046 && lockres->l_ops->post_unlock)
4044 lockres->l_ops->post_unlock(osb, lockres); 4047 lockres->l_ops->post_unlock(osb, lockres);
4045 4048
4046 mlog_exit_void(); 4049 mlog_exit_void();
4047 } 4050 }
4048 4051
4049 static void ocfs2_schedule_blocked_lock(struct ocfs2_super *osb, 4052 static void ocfs2_schedule_blocked_lock(struct ocfs2_super *osb,
4050 struct ocfs2_lock_res *lockres) 4053 struct ocfs2_lock_res *lockres)
4051 { 4054 {
4052 mlog_entry_void(); 4055 mlog_entry_void();
4053 4056
4054 assert_spin_locked(&lockres->l_lock); 4057 assert_spin_locked(&lockres->l_lock);
4055 4058
4056 if (lockres->l_flags & OCFS2_LOCK_FREEING) { 4059 if (lockres->l_flags & OCFS2_LOCK_FREEING) {
4057 /* Do not schedule a lock for downconvert when it's on 4060 /* Do not schedule a lock for downconvert when it's on
4058 * the way to destruction - any nodes wanting access 4061 * the way to destruction - any nodes wanting access
4059 * to the resource will get it soon. */ 4062 * to the resource will get it soon. */
4060 mlog(ML_BASTS, "lockres %s won't be scheduled: flags 0x%lx\n", 4063 mlog(ML_BASTS, "lockres %s won't be scheduled: flags 0x%lx\n",
4061 lockres->l_name, lockres->l_flags); 4064 lockres->l_name, lockres->l_flags);
4062 return; 4065 return;
4063 } 4066 }
4064 4067
4065 lockres_or_flags(lockres, OCFS2_LOCK_QUEUED); 4068 lockres_or_flags(lockres, OCFS2_LOCK_QUEUED);
4066 4069
4067 spin_lock(&osb->dc_task_lock); 4070 spin_lock(&osb->dc_task_lock);
4068 if (list_empty(&lockres->l_blocked_list)) { 4071 if (list_empty(&lockres->l_blocked_list)) {
4069 list_add_tail(&lockres->l_blocked_list, 4072 list_add_tail(&lockres->l_blocked_list,
4070 &osb->blocked_lock_list); 4073 &osb->blocked_lock_list);
4071 osb->blocked_lock_count++; 4074 osb->blocked_lock_count++;
4072 } 4075 }
4073 spin_unlock(&osb->dc_task_lock); 4076 spin_unlock(&osb->dc_task_lock);
4074 4077
4075 mlog_exit_void(); 4078 mlog_exit_void();
4076 } 4079 }
4077 4080
4078 static void ocfs2_downconvert_thread_do_work(struct ocfs2_super *osb) 4081 static void ocfs2_downconvert_thread_do_work(struct ocfs2_super *osb)
4079 { 4082 {
4080 unsigned long processed; 4083 unsigned long processed;
4081 struct ocfs2_lock_res *lockres; 4084 struct ocfs2_lock_res *lockres;
4082 4085
4083 mlog_entry_void(); 4086 mlog_entry_void();
4084 4087
4085 spin_lock(&osb->dc_task_lock); 4088 spin_lock(&osb->dc_task_lock);
4086 /* grab this early so we know to try again if a state change and 4089 /* grab this early so we know to try again if a state change and
4087 * wake happens part-way through our work */ 4090 * wake happens part-way through our work */
4088 osb->dc_work_sequence = osb->dc_wake_sequence; 4091 osb->dc_work_sequence = osb->dc_wake_sequence;
4089 4092
4090 processed = osb->blocked_lock_count; 4093 processed = osb->blocked_lock_count;
4091 while (processed) { 4094 while (processed) {
4092 BUG_ON(list_empty(&osb->blocked_lock_list)); 4095 BUG_ON(list_empty(&osb->blocked_lock_list));
4093 4096
4094 lockres = list_entry(osb->blocked_lock_list.next, 4097 lockres = list_entry(osb->blocked_lock_list.next,
4095 struct ocfs2_lock_res, l_blocked_list); 4098 struct ocfs2_lock_res, l_blocked_list);
4096 list_del_init(&lockres->l_blocked_list); 4099 list_del_init(&lockres->l_blocked_list);
4097 osb->blocked_lock_count--; 4100 osb->blocked_lock_count--;
4098 spin_unlock(&osb->dc_task_lock); 4101 spin_unlock(&osb->dc_task_lock);
4099 4102
4100 BUG_ON(!processed); 4103 BUG_ON(!processed);
4101 processed--; 4104 processed--;
4102 4105
4103 ocfs2_process_blocked_lock(osb, lockres); 4106 ocfs2_process_blocked_lock(osb, lockres);
4104 4107
4105 spin_lock(&osb->dc_task_lock); 4108 spin_lock(&osb->dc_task_lock);
4106 } 4109 }
4107 spin_unlock(&osb->dc_task_lock); 4110 spin_unlock(&osb->dc_task_lock);
4108 4111
4109 mlog_exit_void(); 4112 mlog_exit_void();
4110 } 4113 }
4111 4114
4112 static int ocfs2_downconvert_thread_lists_empty(struct ocfs2_super *osb) 4115 static int ocfs2_downconvert_thread_lists_empty(struct ocfs2_super *osb)
4113 { 4116 {
4114 int empty = 0; 4117 int empty = 0;
4115 4118
4116 spin_lock(&osb->dc_task_lock); 4119 spin_lock(&osb->dc_task_lock);
4117 if (list_empty(&osb->blocked_lock_list)) 4120 if (list_empty(&osb->blocked_lock_list))
4118 empty = 1; 4121 empty = 1;
4119 4122
4120 spin_unlock(&osb->dc_task_lock); 4123 spin_unlock(&osb->dc_task_lock);
4121 return empty; 4124 return empty;
4122 } 4125 }
4123 4126
4124 static int ocfs2_downconvert_thread_should_wake(struct ocfs2_super *osb) 4127 static int ocfs2_downconvert_thread_should_wake(struct ocfs2_super *osb)
4125 { 4128 {
4126 int should_wake = 0; 4129 int should_wake = 0;
4127 4130
4128 spin_lock(&osb->dc_task_lock); 4131 spin_lock(&osb->dc_task_lock);
4129 if (osb->dc_work_sequence != osb->dc_wake_sequence) 4132 if (osb->dc_work_sequence != osb->dc_wake_sequence)
4130 should_wake = 1; 4133 should_wake = 1;
4131 spin_unlock(&osb->dc_task_lock); 4134 spin_unlock(&osb->dc_task_lock);
4132 4135
4133 return should_wake; 4136 return should_wake;
4134 } 4137 }
4135 4138
4136 static int ocfs2_downconvert_thread(void *arg) 4139 static int ocfs2_downconvert_thread(void *arg)
4137 { 4140 {
4138 int status = 0; 4141 int status = 0;
4139 struct ocfs2_super *osb = arg; 4142 struct ocfs2_super *osb = arg;
4140 4143
4141 /* only quit once we've been asked to stop and there is no more 4144 /* only quit once we've been asked to stop and there is no more
4142 * work available */ 4145 * work available */
4143 while (!(kthread_should_stop() && 4146 while (!(kthread_should_stop() &&
4144 ocfs2_downconvert_thread_lists_empty(osb))) { 4147 ocfs2_downconvert_thread_lists_empty(osb))) {
4145 4148
4146 wait_event_interruptible(osb->dc_event, 4149 wait_event_interruptible(osb->dc_event,
4147 ocfs2_downconvert_thread_should_wake(osb) || 4150 ocfs2_downconvert_thread_should_wake(osb) ||
4148 kthread_should_stop()); 4151 kthread_should_stop());
4149 4152
4150 mlog(0, "downconvert_thread: awoken\n"); 4153 mlog(0, "downconvert_thread: awoken\n");
4151 4154
4152 ocfs2_downconvert_thread_do_work(osb); 4155 ocfs2_downconvert_thread_do_work(osb);
1 /* -*- mode: c; c-basic-offset: 8; -*- 1 /* -*- mode: c; c-basic-offset: 8; -*-
2 * vim: noexpandtab sw=8 ts=8 sts=0: 2 * vim: noexpandtab sw=8 ts=8 sts=0:
3 * 3 *
4 * ocfs2.h 4 * ocfs2.h
5 * 5 *
6 * Defines macros and structures used in OCFS2 6 * Defines macros and structures used in OCFS2
7 * 7 *
8 * Copyright (C) 2002, 2004 Oracle. All rights reserved. 8 * Copyright (C) 2002, 2004 Oracle. All rights reserved.
9 * 9 *
10 * This program is free software; you can redistribute it and/or 10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public 11 * modify it under the terms of the GNU General Public
12 * License as published by the Free Software Foundation; either 12 * License as published by the Free Software Foundation; either
13 * version 2 of the License, or (at your option) any later version. 13 * version 2 of the License, or (at your option) any later version.
14 * 14 *
15 * This program is distributed in the hope that it will be useful, 15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of 16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * General Public License for more details. 18 * General Public License for more details.
19 * 19 *
20 * You should have received a copy of the GNU General Public 20 * You should have received a copy of the GNU General Public
21 * License along with this program; if not, write to the 21 * License along with this program; if not, write to the
22 * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 22 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
23 * Boston, MA 021110-1307, USA. 23 * Boston, MA 021110-1307, USA.
24 */ 24 */
25 25
26 #ifndef OCFS2_H 26 #ifndef OCFS2_H
27 #define OCFS2_H 27 #define OCFS2_H
28 28
29 #include <linux/spinlock.h> 29 #include <linux/spinlock.h>
30 #include <linux/sched.h> 30 #include <linux/sched.h>
31 #include <linux/wait.h> 31 #include <linux/wait.h>
32 #include <linux/list.h> 32 #include <linux/list.h>
33 #include <linux/rbtree.h> 33 #include <linux/rbtree.h>
34 #include <linux/workqueue.h> 34 #include <linux/workqueue.h>
35 #include <linux/kref.h> 35 #include <linux/kref.h>
36 #include <linux/mutex.h> 36 #include <linux/mutex.h>
37 #include <linux/lockdep.h> 37 #include <linux/lockdep.h>
38 #include <linux/jbd2.h> 38 #include <linux/jbd2.h>
39 39
40 /* For union ocfs2_dlm_lksb */ 40 /* For union ocfs2_dlm_lksb */
41 #include "stackglue.h" 41 #include "stackglue.h"
42 42
43 #include "ocfs2_fs.h" 43 #include "ocfs2_fs.h"
44 #include "ocfs2_lockid.h" 44 #include "ocfs2_lockid.h"
45 #include "ocfs2_ioctl.h" 45 #include "ocfs2_ioctl.h"
46 46
47 /* For struct ocfs2_blockcheck_stats */ 47 /* For struct ocfs2_blockcheck_stats */
48 #include "blockcheck.h" 48 #include "blockcheck.h"
49 49
50 #include "reservations.h" 50 #include "reservations.h"
51 51
52 /* Caching of metadata buffers */ 52 /* Caching of metadata buffers */
53 53
54 /* Most user visible OCFS2 inodes will have very few pieces of 54 /* Most user visible OCFS2 inodes will have very few pieces of
55 * metadata, but larger files (including bitmaps, etc) must be taken 55 * metadata, but larger files (including bitmaps, etc) must be taken
56 * into account when designing an access scheme. We allow a small 56 * into account when designing an access scheme. We allow a small
57 * amount of inlined blocks to be stored on an array and grow the 57 * amount of inlined blocks to be stored on an array and grow the
58 * structure into a rb tree when necessary. */ 58 * structure into a rb tree when necessary. */
59 #define OCFS2_CACHE_INFO_MAX_ARRAY 2 59 #define OCFS2_CACHE_INFO_MAX_ARRAY 2
60 60
61 /* Flags for ocfs2_caching_info */ 61 /* Flags for ocfs2_caching_info */
62 62
63 enum ocfs2_caching_info_flags { 63 enum ocfs2_caching_info_flags {
64 /* Indicates that the metadata cache is using the inline array */ 64 /* Indicates that the metadata cache is using the inline array */
65 OCFS2_CACHE_FL_INLINE = 1<<1, 65 OCFS2_CACHE_FL_INLINE = 1<<1,
66 }; 66 };
67 67
68 struct ocfs2_caching_operations; 68 struct ocfs2_caching_operations;
69 struct ocfs2_caching_info { 69 struct ocfs2_caching_info {
70 /* 70 /*
71 * The parent structure provides the locks, but because the 71 * The parent structure provides the locks, but because the
72 * parent structure can differ, it provides locking operations 72 * parent structure can differ, it provides locking operations
73 * to struct ocfs2_caching_info. 73 * to struct ocfs2_caching_info.
74 */ 74 */
75 const struct ocfs2_caching_operations *ci_ops; 75 const struct ocfs2_caching_operations *ci_ops;
76 76
77 /* next two are protected by trans_inc_lock */ 77 /* next two are protected by trans_inc_lock */
78 /* which transaction were we created on? Zero if none. */ 78 /* which transaction were we created on? Zero if none. */
79 unsigned long ci_created_trans; 79 unsigned long ci_created_trans;
80 /* last transaction we were a part of. */ 80 /* last transaction we were a part of. */
81 unsigned long ci_last_trans; 81 unsigned long ci_last_trans;
82 82
83 /* Cache structures */ 83 /* Cache structures */
84 unsigned int ci_flags; 84 unsigned int ci_flags;
85 unsigned int ci_num_cached; 85 unsigned int ci_num_cached;
86 union { 86 union {
87 sector_t ci_array[OCFS2_CACHE_INFO_MAX_ARRAY]; 87 sector_t ci_array[OCFS2_CACHE_INFO_MAX_ARRAY];
88 struct rb_root ci_tree; 88 struct rb_root ci_tree;
89 } ci_cache; 89 } ci_cache;
90 }; 90 };
91 /* 91 /*
92 * Need this prototype here instead of in uptodate.h because journal.h 92 * Need this prototype here instead of in uptodate.h because journal.h
93 * uses it. 93 * uses it.
94 */ 94 */
95 struct super_block *ocfs2_metadata_cache_get_super(struct ocfs2_caching_info *ci); 95 struct super_block *ocfs2_metadata_cache_get_super(struct ocfs2_caching_info *ci);
96 96
97 /* this limits us to 256 nodes 97 /* this limits us to 256 nodes
98 * if we need more, we can do a kmalloc for the map */ 98 * if we need more, we can do a kmalloc for the map */
99 #define OCFS2_NODE_MAP_MAX_NODES 256 99 #define OCFS2_NODE_MAP_MAX_NODES 256
100 struct ocfs2_node_map { 100 struct ocfs2_node_map {
101 u16 num_nodes; 101 u16 num_nodes;
102 unsigned long map[BITS_TO_LONGS(OCFS2_NODE_MAP_MAX_NODES)]; 102 unsigned long map[BITS_TO_LONGS(OCFS2_NODE_MAP_MAX_NODES)];
103 }; 103 };
104 104
105 enum ocfs2_ast_action { 105 enum ocfs2_ast_action {
106 OCFS2_AST_INVALID = 0, 106 OCFS2_AST_INVALID = 0,
107 OCFS2_AST_ATTACH, 107 OCFS2_AST_ATTACH,
108 OCFS2_AST_CONVERT, 108 OCFS2_AST_CONVERT,
109 OCFS2_AST_DOWNCONVERT, 109 OCFS2_AST_DOWNCONVERT,
110 }; 110 };
111 111
112 /* actions for an unlockast function to take. */ 112 /* actions for an unlockast function to take. */
113 enum ocfs2_unlock_action { 113 enum ocfs2_unlock_action {
114 OCFS2_UNLOCK_INVALID = 0, 114 OCFS2_UNLOCK_INVALID = 0,
115 OCFS2_UNLOCK_CANCEL_CONVERT, 115 OCFS2_UNLOCK_CANCEL_CONVERT,
116 OCFS2_UNLOCK_DROP_LOCK, 116 OCFS2_UNLOCK_DROP_LOCK,
117 }; 117 };
118 118
119 /* ocfs2_lock_res->l_flags flags. */ 119 /* ocfs2_lock_res->l_flags flags. */
120 #define OCFS2_LOCK_ATTACHED (0x00000001) /* we have initialized 120 #define OCFS2_LOCK_ATTACHED (0x00000001) /* we have initialized
121 * the lvb */ 121 * the lvb */
122 #define OCFS2_LOCK_BUSY (0x00000002) /* we are currently in 122 #define OCFS2_LOCK_BUSY (0x00000002) /* we are currently in
123 * dlm_lock */ 123 * dlm_lock */
124 #define OCFS2_LOCK_BLOCKED (0x00000004) /* blocked waiting to 124 #define OCFS2_LOCK_BLOCKED (0x00000004) /* blocked waiting to
125 * downconvert*/ 125 * downconvert*/
126 #define OCFS2_LOCK_LOCAL (0x00000008) /* newly created inode */ 126 #define OCFS2_LOCK_LOCAL (0x00000008) /* newly created inode */
127 #define OCFS2_LOCK_NEEDS_REFRESH (0x00000010) 127 #define OCFS2_LOCK_NEEDS_REFRESH (0x00000010)
128 #define OCFS2_LOCK_REFRESHING (0x00000020) 128 #define OCFS2_LOCK_REFRESHING (0x00000020)
129 #define OCFS2_LOCK_INITIALIZED (0x00000040) /* track initialization 129 #define OCFS2_LOCK_INITIALIZED (0x00000040) /* track initialization
130 * for shutdown paths */ 130 * for shutdown paths */
131 #define OCFS2_LOCK_FREEING (0x00000080) /* help dlmglue track 131 #define OCFS2_LOCK_FREEING (0x00000080) /* help dlmglue track
132 * when to skip queueing 132 * when to skip queueing
133 * a lock because it's 133 * a lock because it's
134 * about to be 134 * about to be
135 * dropped. */ 135 * dropped. */
136 #define OCFS2_LOCK_QUEUED (0x00000100) /* queued for downconvert */ 136 #define OCFS2_LOCK_QUEUED (0x00000100) /* queued for downconvert */
137 #define OCFS2_LOCK_NOCACHE (0x00000200) /* don't use a holder count */ 137 #define OCFS2_LOCK_NOCACHE (0x00000200) /* don't use a holder count */
138 #define OCFS2_LOCK_PENDING (0x00000400) /* This lockres is pending a 138 #define OCFS2_LOCK_PENDING (0x00000400) /* This lockres is pending a
139 call to dlm_lock. Only 139 call to dlm_lock. Only
140 exists with BUSY set. */ 140 exists with BUSY set. */
141 #define OCFS2_LOCK_UPCONVERT_FINISHING (0x00000800) /* blocks the dc thread 141 #define OCFS2_LOCK_UPCONVERT_FINISHING (0x00000800) /* blocks the dc thread
142 * from downconverting 142 * from downconverting
143 * before the upconvert 143 * before the upconvert
144 * has completed */ 144 * has completed */
145 145
146 struct ocfs2_lock_res_ops; 146 struct ocfs2_lock_res_ops;
147 147
148 typedef void (*ocfs2_lock_callback)(int status, unsigned long data); 148 typedef void (*ocfs2_lock_callback)(int status, unsigned long data);
149 149
150 #ifdef CONFIG_OCFS2_FS_STATS
151 struct ocfs2_lock_stats {
152 u64 ls_total; /* Total wait in NSEC */
153 u32 ls_gets; /* Num acquires */
154 u32 ls_fail; /* Num failed acquires */
155
156 /* Storing max wait in usecs saves 24 bytes per inode */
157 u32 ls_max; /* Max wait in USEC */
158 };
159 #endif
160
150 struct ocfs2_lock_res { 161 struct ocfs2_lock_res {
151 void *l_priv; 162 void *l_priv;
152 struct ocfs2_lock_res_ops *l_ops; 163 struct ocfs2_lock_res_ops *l_ops;
153 164
154 165
155 struct list_head l_blocked_list; 166 struct list_head l_blocked_list;
156 struct list_head l_mask_waiters; 167 struct list_head l_mask_waiters;
157 168
158 unsigned long l_flags; 169 unsigned long l_flags;
159 char l_name[OCFS2_LOCK_ID_MAX_LEN]; 170 char l_name[OCFS2_LOCK_ID_MAX_LEN];
160 unsigned int l_ro_holders; 171 unsigned int l_ro_holders;
161 unsigned int l_ex_holders; 172 unsigned int l_ex_holders;
162 signed char l_level; 173 signed char l_level;
163 signed char l_requested; 174 signed char l_requested;
164 signed char l_blocking; 175 signed char l_blocking;
165 176
166 /* Data packed - type enum ocfs2_lock_type */ 177 /* Data packed - type enum ocfs2_lock_type */
167 unsigned char l_type; 178 unsigned char l_type;
168 179
169 /* used from AST/BAST funcs. */ 180 /* used from AST/BAST funcs. */
170 /* Data packed - enum type ocfs2_ast_action */ 181 /* Data packed - enum type ocfs2_ast_action */
171 unsigned char l_action; 182 unsigned char l_action;
172 /* Data packed - enum type ocfs2_unlock_action */ 183 /* Data packed - enum type ocfs2_unlock_action */
173 unsigned char l_unlock_action; 184 unsigned char l_unlock_action;
174 unsigned int l_pending_gen; 185 unsigned int l_pending_gen;
175 186
176 spinlock_t l_lock; 187 spinlock_t l_lock;
177 188
178 struct ocfs2_dlm_lksb l_lksb; 189 struct ocfs2_dlm_lksb l_lksb;
179 190
180 wait_queue_head_t l_event; 191 wait_queue_head_t l_event;
181 192
182 struct list_head l_debug_list; 193 struct list_head l_debug_list;
183 194
184 #ifdef CONFIG_OCFS2_FS_STATS 195 #ifdef CONFIG_OCFS2_FS_STATS
185 unsigned long long l_lock_num_prmode; /* PR acquires */ 196 struct ocfs2_lock_stats l_lock_prmode; /* PR mode stats */
186 unsigned long long l_lock_num_exmode; /* EX acquires */ 197 u32 l_lock_refresh; /* Disk refreshes */
187 unsigned int l_lock_num_prmode_failed; /* Failed PR gets */ 198 struct ocfs2_lock_stats l_lock_exmode; /* EX mode stats */
188 unsigned int l_lock_num_exmode_failed; /* Failed EX gets */
189 unsigned long long l_lock_total_prmode; /* Tot wait for PR */
190 unsigned long long l_lock_total_exmode; /* Tot wait for EX */
191 unsigned int l_lock_max_prmode; /* Max wait for PR */
192 unsigned int l_lock_max_exmode; /* Max wait for EX */
193 unsigned int l_lock_refresh; /* Disk refreshes */
194 #endif 199 #endif
195 #ifdef CONFIG_DEBUG_LOCK_ALLOC 200 #ifdef CONFIG_DEBUG_LOCK_ALLOC
196 struct lockdep_map l_lockdep_map; 201 struct lockdep_map l_lockdep_map;
197 #endif 202 #endif
198 }; 203 };
199 204
200 enum ocfs2_orphan_scan_state { 205 enum ocfs2_orphan_scan_state {
201 ORPHAN_SCAN_ACTIVE, 206 ORPHAN_SCAN_ACTIVE,
202 ORPHAN_SCAN_INACTIVE 207 ORPHAN_SCAN_INACTIVE
203 }; 208 };
204 209
205 struct ocfs2_orphan_scan { 210 struct ocfs2_orphan_scan {
206 struct mutex os_lock; 211 struct mutex os_lock;
207 struct ocfs2_super *os_osb; 212 struct ocfs2_super *os_osb;
208 struct ocfs2_lock_res os_lockres; /* lock to synchronize scans */ 213 struct ocfs2_lock_res os_lockres; /* lock to synchronize scans */
209 struct delayed_work os_orphan_scan_work; 214 struct delayed_work os_orphan_scan_work;
210 struct timespec os_scantime; /* time this node ran the scan */ 215 struct timespec os_scantime; /* time this node ran the scan */
211 u32 os_count; /* tracks node specific scans */ 216 u32 os_count; /* tracks node specific scans */
212 u32 os_seqno; /* tracks cluster wide scans */ 217 u32 os_seqno; /* tracks cluster wide scans */
213 atomic_t os_state; /* ACTIVE or INACTIVE */ 218 atomic_t os_state; /* ACTIVE or INACTIVE */
214 }; 219 };
215 220
216 struct ocfs2_dlm_debug { 221 struct ocfs2_dlm_debug {
217 struct kref d_refcnt; 222 struct kref d_refcnt;
218 struct dentry *d_locking_state; 223 struct dentry *d_locking_state;
219 struct list_head d_lockres_tracking; 224 struct list_head d_lockres_tracking;
220 }; 225 };
221 226
222 enum ocfs2_vol_state 227 enum ocfs2_vol_state
223 { 228 {
224 VOLUME_INIT = 0, 229 VOLUME_INIT = 0,
225 VOLUME_MOUNTED, 230 VOLUME_MOUNTED,
226 VOLUME_MOUNTED_QUOTAS, 231 VOLUME_MOUNTED_QUOTAS,
227 VOLUME_DISMOUNTED, 232 VOLUME_DISMOUNTED,
228 VOLUME_DISABLED 233 VOLUME_DISABLED
229 }; 234 };
230 235
231 struct ocfs2_alloc_stats 236 struct ocfs2_alloc_stats
232 { 237 {
233 atomic_t moves; 238 atomic_t moves;
234 atomic_t local_data; 239 atomic_t local_data;
235 atomic_t bitmap_data; 240 atomic_t bitmap_data;
236 atomic_t bg_allocs; 241 atomic_t bg_allocs;
237 atomic_t bg_extends; 242 atomic_t bg_extends;
238 }; 243 };
239 244
240 enum ocfs2_local_alloc_state 245 enum ocfs2_local_alloc_state
241 { 246 {
242 OCFS2_LA_UNUSED = 0, /* Local alloc will never be used for 247 OCFS2_LA_UNUSED = 0, /* Local alloc will never be used for
243 * this mountpoint. */ 248 * this mountpoint. */
244 OCFS2_LA_ENABLED, /* Local alloc is in use. */ 249 OCFS2_LA_ENABLED, /* Local alloc is in use. */
245 OCFS2_LA_THROTTLED, /* Local alloc is in use, but number 250 OCFS2_LA_THROTTLED, /* Local alloc is in use, but number
246 * of bits has been reduced. */ 251 * of bits has been reduced. */
247 OCFS2_LA_DISABLED /* Local alloc has temporarily been 252 OCFS2_LA_DISABLED /* Local alloc has temporarily been
248 * disabled. */ 253 * disabled. */
249 }; 254 };
250 255
251 enum ocfs2_mount_options 256 enum ocfs2_mount_options
252 { 257 {
253 OCFS2_MOUNT_HB_LOCAL = 1 << 0, /* Local heartbeat */ 258 OCFS2_MOUNT_HB_LOCAL = 1 << 0, /* Local heartbeat */
254 OCFS2_MOUNT_BARRIER = 1 << 1, /* Use block barriers */ 259 OCFS2_MOUNT_BARRIER = 1 << 1, /* Use block barriers */
255 OCFS2_MOUNT_NOINTR = 1 << 2, /* Don't catch signals */ 260 OCFS2_MOUNT_NOINTR = 1 << 2, /* Don't catch signals */
256 OCFS2_MOUNT_ERRORS_PANIC = 1 << 3, /* Panic on errors */ 261 OCFS2_MOUNT_ERRORS_PANIC = 1 << 3, /* Panic on errors */
257 OCFS2_MOUNT_DATA_WRITEBACK = 1 << 4, /* No data ordering */ 262 OCFS2_MOUNT_DATA_WRITEBACK = 1 << 4, /* No data ordering */
258 OCFS2_MOUNT_LOCALFLOCKS = 1 << 5, /* No cluster aware user file locks */ 263 OCFS2_MOUNT_LOCALFLOCKS = 1 << 5, /* No cluster aware user file locks */
259 OCFS2_MOUNT_NOUSERXATTR = 1 << 6, /* No user xattr */ 264 OCFS2_MOUNT_NOUSERXATTR = 1 << 6, /* No user xattr */
260 OCFS2_MOUNT_INODE64 = 1 << 7, /* Allow inode numbers > 2^32 */ 265 OCFS2_MOUNT_INODE64 = 1 << 7, /* Allow inode numbers > 2^32 */
261 OCFS2_MOUNT_POSIX_ACL = 1 << 8, /* Force POSIX access control lists */ 266 OCFS2_MOUNT_POSIX_ACL = 1 << 8, /* Force POSIX access control lists */
262 OCFS2_MOUNT_NO_POSIX_ACL = 1 << 9, /* Disable POSIX access 267 OCFS2_MOUNT_NO_POSIX_ACL = 1 << 9, /* Disable POSIX access
263 control lists */ 268 control lists */
264 OCFS2_MOUNT_USRQUOTA = 1 << 10, /* We support user quotas */ 269 OCFS2_MOUNT_USRQUOTA = 1 << 10, /* We support user quotas */
265 OCFS2_MOUNT_GRPQUOTA = 1 << 11, /* We support group quotas */ 270 OCFS2_MOUNT_GRPQUOTA = 1 << 11, /* We support group quotas */
266 OCFS2_MOUNT_COHERENCY_BUFFERED = 1 << 12, /* Allow concurrent O_DIRECT 271 OCFS2_MOUNT_COHERENCY_BUFFERED = 1 << 12, /* Allow concurrent O_DIRECT
267 writes */ 272 writes */
268 OCFS2_MOUNT_HB_NONE = 1 << 13, /* No heartbeat */ 273 OCFS2_MOUNT_HB_NONE = 1 << 13, /* No heartbeat */
269 OCFS2_MOUNT_HB_GLOBAL = 1 << 14, /* Global heartbeat */ 274 OCFS2_MOUNT_HB_GLOBAL = 1 << 14, /* Global heartbeat */
270 }; 275 };
271 276
272 #define OCFS2_OSB_SOFT_RO 0x0001 277 #define OCFS2_OSB_SOFT_RO 0x0001
273 #define OCFS2_OSB_HARD_RO 0x0002 278 #define OCFS2_OSB_HARD_RO 0x0002
274 #define OCFS2_OSB_ERROR_FS 0x0004 279 #define OCFS2_OSB_ERROR_FS 0x0004
275 #define OCFS2_OSB_DROP_DENTRY_LOCK_IMMED 0x0008 280 #define OCFS2_OSB_DROP_DENTRY_LOCK_IMMED 0x0008
276 281
277 #define OCFS2_DEFAULT_ATIME_QUANTUM 60 282 #define OCFS2_DEFAULT_ATIME_QUANTUM 60
278 283
279 struct ocfs2_journal; 284 struct ocfs2_journal;
280 struct ocfs2_slot_info; 285 struct ocfs2_slot_info;
281 struct ocfs2_recovery_map; 286 struct ocfs2_recovery_map;
282 struct ocfs2_replay_map; 287 struct ocfs2_replay_map;
283 struct ocfs2_quota_recovery; 288 struct ocfs2_quota_recovery;
284 struct ocfs2_dentry_lock; 289 struct ocfs2_dentry_lock;
285 struct ocfs2_super 290 struct ocfs2_super
286 { 291 {
287 struct task_struct *commit_task; 292 struct task_struct *commit_task;
288 struct super_block *sb; 293 struct super_block *sb;
289 struct inode *root_inode; 294 struct inode *root_inode;
290 struct inode *sys_root_inode; 295 struct inode *sys_root_inode;
291 struct inode *global_system_inodes[NUM_GLOBAL_SYSTEM_INODES]; 296 struct inode *global_system_inodes[NUM_GLOBAL_SYSTEM_INODES];
292 struct inode **local_system_inodes; 297 struct inode **local_system_inodes;
293 298
294 struct ocfs2_slot_info *slot_info; 299 struct ocfs2_slot_info *slot_info;
295 300
296 u32 *slot_recovery_generations; 301 u32 *slot_recovery_generations;
297 302
298 spinlock_t node_map_lock; 303 spinlock_t node_map_lock;
299 304
300 u64 root_blkno; 305 u64 root_blkno;
301 u64 system_dir_blkno; 306 u64 system_dir_blkno;
302 u64 bitmap_blkno; 307 u64 bitmap_blkno;
303 u32 bitmap_cpg; 308 u32 bitmap_cpg;
304 u8 *uuid; 309 u8 *uuid;
305 char *uuid_str; 310 char *uuid_str;
306 u32 uuid_hash; 311 u32 uuid_hash;
307 u8 *vol_label; 312 u8 *vol_label;
308 u64 first_cluster_group_blkno; 313 u64 first_cluster_group_blkno;
309 u32 fs_generation; 314 u32 fs_generation;
310 315
311 u32 s_feature_compat; 316 u32 s_feature_compat;
312 u32 s_feature_incompat; 317 u32 s_feature_incompat;
313 u32 s_feature_ro_compat; 318 u32 s_feature_ro_compat;
314 319
315 /* Protects s_next_generation, osb_flags and s_inode_steal_slot. 320 /* Protects s_next_generation, osb_flags and s_inode_steal_slot.
316 * Could protect more on osb as it's very short lived. 321 * Could protect more on osb as it's very short lived.
317 */ 322 */
318 spinlock_t osb_lock; 323 spinlock_t osb_lock;
319 u32 s_next_generation; 324 u32 s_next_generation;
320 unsigned long osb_flags; 325 unsigned long osb_flags;
321 s16 s_inode_steal_slot; 326 s16 s_inode_steal_slot;
322 s16 s_meta_steal_slot; 327 s16 s_meta_steal_slot;
323 atomic_t s_num_inodes_stolen; 328 atomic_t s_num_inodes_stolen;
324 atomic_t s_num_meta_stolen; 329 atomic_t s_num_meta_stolen;
325 330
326 unsigned long s_mount_opt; 331 unsigned long s_mount_opt;
327 unsigned int s_atime_quantum; 332 unsigned int s_atime_quantum;
328 333
329 unsigned int max_slots; 334 unsigned int max_slots;
330 unsigned int node_num; 335 unsigned int node_num;
331 int slot_num; 336 int slot_num;
332 int preferred_slot; 337 int preferred_slot;
333 int s_sectsize_bits; 338 int s_sectsize_bits;
334 int s_clustersize; 339 int s_clustersize;
335 int s_clustersize_bits; 340 int s_clustersize_bits;
336 unsigned int s_xattr_inline_size; 341 unsigned int s_xattr_inline_size;
337 342
338 atomic_t vol_state; 343 atomic_t vol_state;
339 struct mutex recovery_lock; 344 struct mutex recovery_lock;
340 struct ocfs2_recovery_map *recovery_map; 345 struct ocfs2_recovery_map *recovery_map;
341 struct ocfs2_replay_map *replay_map; 346 struct ocfs2_replay_map *replay_map;
342 struct task_struct *recovery_thread_task; 347 struct task_struct *recovery_thread_task;
343 int disable_recovery; 348 int disable_recovery;
344 wait_queue_head_t checkpoint_event; 349 wait_queue_head_t checkpoint_event;
345 atomic_t needs_checkpoint; 350 atomic_t needs_checkpoint;
346 struct ocfs2_journal *journal; 351 struct ocfs2_journal *journal;
347 unsigned long osb_commit_interval; 352 unsigned long osb_commit_interval;
348 353
349 struct delayed_work la_enable_wq; 354 struct delayed_work la_enable_wq;
350 355
351 /* 356 /*
352 * Must hold local alloc i_mutex and osb->osb_lock to change 357 * Must hold local alloc i_mutex and osb->osb_lock to change
353 * local_alloc_bits. Reads can be done under either lock. 358 * local_alloc_bits. Reads can be done under either lock.
354 */ 359 */
355 unsigned int local_alloc_bits; 360 unsigned int local_alloc_bits;
356 unsigned int local_alloc_default_bits; 361 unsigned int local_alloc_default_bits;
357 /* osb_clusters_at_boot can become stale! Do not trust it to 362 /* osb_clusters_at_boot can become stale! Do not trust it to
358 * be up to date. */ 363 * be up to date. */
359 unsigned int osb_clusters_at_boot; 364 unsigned int osb_clusters_at_boot;
360 365
361 enum ocfs2_local_alloc_state local_alloc_state; /* protected 366 enum ocfs2_local_alloc_state local_alloc_state; /* protected
362 * by osb_lock */ 367 * by osb_lock */
363 368
364 struct buffer_head *local_alloc_bh; 369 struct buffer_head *local_alloc_bh;
365 370
366 u64 la_last_gd; 371 u64 la_last_gd;
367 372
368 struct ocfs2_reservation_map osb_la_resmap; 373 struct ocfs2_reservation_map osb_la_resmap;
369 374
370 unsigned int osb_resv_level; 375 unsigned int osb_resv_level;
371 unsigned int osb_dir_resv_level; 376 unsigned int osb_dir_resv_level;
372 377
373 /* Next three fields are for local node slot recovery during 378 /* Next three fields are for local node slot recovery during
374 * mount. */ 379 * mount. */
375 int dirty; 380 int dirty;
376 struct ocfs2_dinode *local_alloc_copy; 381 struct ocfs2_dinode *local_alloc_copy;
377 struct ocfs2_quota_recovery *quota_rec; 382 struct ocfs2_quota_recovery *quota_rec;
378 383
379 struct ocfs2_blockcheck_stats osb_ecc_stats; 384 struct ocfs2_blockcheck_stats osb_ecc_stats;
380 struct ocfs2_alloc_stats alloc_stats; 385 struct ocfs2_alloc_stats alloc_stats;
381 char dev_str[20]; /* "major,minor" of the device */ 386 char dev_str[20]; /* "major,minor" of the device */
382 387
383 u8 osb_stackflags; 388 u8 osb_stackflags;
384 389
385 char osb_cluster_stack[OCFS2_STACK_LABEL_LEN + 1]; 390 char osb_cluster_stack[OCFS2_STACK_LABEL_LEN + 1];
386 struct ocfs2_cluster_connection *cconn; 391 struct ocfs2_cluster_connection *cconn;
387 struct ocfs2_lock_res osb_super_lockres; 392 struct ocfs2_lock_res osb_super_lockres;
388 struct ocfs2_lock_res osb_rename_lockres; 393 struct ocfs2_lock_res osb_rename_lockres;
389 struct ocfs2_lock_res osb_nfs_sync_lockres; 394 struct ocfs2_lock_res osb_nfs_sync_lockres;
390 struct ocfs2_dlm_debug *osb_dlm_debug; 395 struct ocfs2_dlm_debug *osb_dlm_debug;
391 396
392 struct dentry *osb_debug_root; 397 struct dentry *osb_debug_root;
393 struct dentry *osb_ctxt; 398 struct dentry *osb_ctxt;
394 399
395 wait_queue_head_t recovery_event; 400 wait_queue_head_t recovery_event;
396 401
397 spinlock_t dc_task_lock; 402 spinlock_t dc_task_lock;
398 struct task_struct *dc_task; 403 struct task_struct *dc_task;
399 wait_queue_head_t dc_event; 404 wait_queue_head_t dc_event;
400 unsigned long dc_wake_sequence; 405 unsigned long dc_wake_sequence;
401 unsigned long dc_work_sequence; 406 unsigned long dc_work_sequence;
402 407
403 /* 408 /*
404 * Any thread can add locks to the list, but the downconvert 409 * Any thread can add locks to the list, but the downconvert
405 * thread is the only one allowed to remove locks. Any change 410 * thread is the only one allowed to remove locks. Any change
406 * to this rule requires updating 411 * to this rule requires updating
407 * ocfs2_downconvert_thread_do_work(). 412 * ocfs2_downconvert_thread_do_work().
408 */ 413 */
409 struct list_head blocked_lock_list; 414 struct list_head blocked_lock_list;
410 unsigned long blocked_lock_count; 415 unsigned long blocked_lock_count;
411 416
412 /* List of dentry locks to release. Anyone can add locks to 417 /* List of dentry locks to release. Anyone can add locks to
413 * the list, ocfs2_wq processes the list */ 418 * the list, ocfs2_wq processes the list */
414 struct ocfs2_dentry_lock *dentry_lock_list; 419 struct ocfs2_dentry_lock *dentry_lock_list;
415 struct work_struct dentry_lock_work; 420 struct work_struct dentry_lock_work;
416 421
417 wait_queue_head_t osb_mount_event; 422 wait_queue_head_t osb_mount_event;
418 423
419 /* Truncate log info */ 424 /* Truncate log info */
420 struct inode *osb_tl_inode; 425 struct inode *osb_tl_inode;
421 struct buffer_head *osb_tl_bh; 426 struct buffer_head *osb_tl_bh;
422 struct delayed_work osb_truncate_log_wq; 427 struct delayed_work osb_truncate_log_wq;
423 /* 428 /*
424 * How many clusters in our truncate log. 429 * How many clusters in our truncate log.
425 * It must be protected by osb_tl_inode->i_mutex. 430 * It must be protected by osb_tl_inode->i_mutex.
426 */ 431 */
427 unsigned int truncated_clusters; 432 unsigned int truncated_clusters;
428 433
429 struct ocfs2_node_map osb_recovering_orphan_dirs; 434 struct ocfs2_node_map osb_recovering_orphan_dirs;
430 unsigned int *osb_orphan_wipes; 435 unsigned int *osb_orphan_wipes;
431 wait_queue_head_t osb_wipe_event; 436 wait_queue_head_t osb_wipe_event;
432 437
433 struct ocfs2_orphan_scan osb_orphan_scan; 438 struct ocfs2_orphan_scan osb_orphan_scan;
434 439
435 /* used to protect metaecc calculation check of xattr. */ 440 /* used to protect metaecc calculation check of xattr. */
436 spinlock_t osb_xattr_lock; 441 spinlock_t osb_xattr_lock;
437 442
438 unsigned int osb_dx_mask; 443 unsigned int osb_dx_mask;
439 u32 osb_dx_seed[4]; 444 u32 osb_dx_seed[4];
440 445
441 /* the group we used to allocate inodes. */ 446 /* the group we used to allocate inodes. */
442 u64 osb_inode_alloc_group; 447 u64 osb_inode_alloc_group;
443 448
444 /* rb tree root for refcount lock. */ 449 /* rb tree root for refcount lock. */
445 struct rb_root osb_rf_lock_tree; 450 struct rb_root osb_rf_lock_tree;
446 struct ocfs2_refcount_tree *osb_ref_tree_lru; 451 struct ocfs2_refcount_tree *osb_ref_tree_lru;
447 }; 452 };
448 453
449 #define OCFS2_SB(sb) ((struct ocfs2_super *)(sb)->s_fs_info) 454 #define OCFS2_SB(sb) ((struct ocfs2_super *)(sb)->s_fs_info)
450 455
451 /* Useful typedef for passing around journal access functions */ 456 /* Useful typedef for passing around journal access functions */
452 typedef int (*ocfs2_journal_access_func)(handle_t *handle, 457 typedef int (*ocfs2_journal_access_func)(handle_t *handle,
453 struct ocfs2_caching_info *ci, 458 struct ocfs2_caching_info *ci,
454 struct buffer_head *bh, int type); 459 struct buffer_head *bh, int type);
455 460
456 static inline int ocfs2_should_order_data(struct inode *inode) 461 static inline int ocfs2_should_order_data(struct inode *inode)
457 { 462 {
458 if (!S_ISREG(inode->i_mode)) 463 if (!S_ISREG(inode->i_mode))
459 return 0; 464 return 0;
460 if (OCFS2_SB(inode->i_sb)->s_mount_opt & OCFS2_MOUNT_DATA_WRITEBACK) 465 if (OCFS2_SB(inode->i_sb)->s_mount_opt & OCFS2_MOUNT_DATA_WRITEBACK)
461 return 0; 466 return 0;
462 return 1; 467 return 1;
463 } 468 }
464 469
465 static inline int ocfs2_sparse_alloc(struct ocfs2_super *osb) 470 static inline int ocfs2_sparse_alloc(struct ocfs2_super *osb)
466 { 471 {
467 if (osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_SPARSE_ALLOC) 472 if (osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_SPARSE_ALLOC)
468 return 1; 473 return 1;
469 return 0; 474 return 0;
470 } 475 }
471 476
472 static inline int ocfs2_writes_unwritten_extents(struct ocfs2_super *osb) 477 static inline int ocfs2_writes_unwritten_extents(struct ocfs2_super *osb)
473 { 478 {
474 /* 479 /*
475 * Support for sparse files is a pre-requisite 480 * Support for sparse files is a pre-requisite
476 */ 481 */
477 if (!ocfs2_sparse_alloc(osb)) 482 if (!ocfs2_sparse_alloc(osb))
478 return 0; 483 return 0;
479 484
480 if (osb->s_feature_ro_compat & OCFS2_FEATURE_RO_COMPAT_UNWRITTEN) 485 if (osb->s_feature_ro_compat & OCFS2_FEATURE_RO_COMPAT_UNWRITTEN)
481 return 1; 486 return 1;
482 return 0; 487 return 0;
483 } 488 }
484 489
485 static inline int ocfs2_supports_inline_data(struct ocfs2_super *osb) 490 static inline int ocfs2_supports_inline_data(struct ocfs2_super *osb)
486 { 491 {
487 if (osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_INLINE_DATA) 492 if (osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_INLINE_DATA)
488 return 1; 493 return 1;
489 return 0; 494 return 0;
490 } 495 }
491 496
492 static inline int ocfs2_supports_xattr(struct ocfs2_super *osb) 497 static inline int ocfs2_supports_xattr(struct ocfs2_super *osb)
493 { 498 {
494 if (osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_XATTR) 499 if (osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_XATTR)
495 return 1; 500 return 1;
496 return 0; 501 return 0;
497 } 502 }
498 503
499 static inline int ocfs2_meta_ecc(struct ocfs2_super *osb) 504 static inline int ocfs2_meta_ecc(struct ocfs2_super *osb)
500 { 505 {
501 if (osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_META_ECC) 506 if (osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_META_ECC)
502 return 1; 507 return 1;
503 return 0; 508 return 0;
504 } 509 }
505 510
506 static inline int ocfs2_supports_indexed_dirs(struct ocfs2_super *osb) 511 static inline int ocfs2_supports_indexed_dirs(struct ocfs2_super *osb)
507 { 512 {
508 if (osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_INDEXED_DIRS) 513 if (osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_INDEXED_DIRS)
509 return 1; 514 return 1;
510 return 0; 515 return 0;
511 } 516 }
512 517
513 static inline int ocfs2_supports_discontig_bg(struct ocfs2_super *osb) 518 static inline int ocfs2_supports_discontig_bg(struct ocfs2_super *osb)
514 { 519 {
515 if (osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_DISCONTIG_BG) 520 if (osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_DISCONTIG_BG)
516 return 1; 521 return 1;
517 return 0; 522 return 0;
518 } 523 }
519 524
520 static inline unsigned int ocfs2_link_max(struct ocfs2_super *osb) 525 static inline unsigned int ocfs2_link_max(struct ocfs2_super *osb)
521 { 526 {
522 if (ocfs2_supports_indexed_dirs(osb)) 527 if (ocfs2_supports_indexed_dirs(osb))
523 return OCFS2_DX_LINK_MAX; 528 return OCFS2_DX_LINK_MAX;
524 return OCFS2_LINK_MAX; 529 return OCFS2_LINK_MAX;
525 } 530 }
526 531
527 static inline unsigned int ocfs2_read_links_count(struct ocfs2_dinode *di) 532 static inline unsigned int ocfs2_read_links_count(struct ocfs2_dinode *di)
528 { 533 {
529 u32 nlink = le16_to_cpu(di->i_links_count); 534 u32 nlink = le16_to_cpu(di->i_links_count);
530 u32 hi = le16_to_cpu(di->i_links_count_hi); 535 u32 hi = le16_to_cpu(di->i_links_count_hi);
531 536
532 if (di->i_dyn_features & cpu_to_le16(OCFS2_INDEXED_DIR_FL)) 537 if (di->i_dyn_features & cpu_to_le16(OCFS2_INDEXED_DIR_FL))
533 nlink |= (hi << OCFS2_LINKS_HI_SHIFT); 538 nlink |= (hi << OCFS2_LINKS_HI_SHIFT);
534 539
535 return nlink; 540 return nlink;
536 } 541 }
537 542
538 static inline void ocfs2_set_links_count(struct ocfs2_dinode *di, u32 nlink) 543 static inline void ocfs2_set_links_count(struct ocfs2_dinode *di, u32 nlink)
539 { 544 {
540 u16 lo, hi; 545 u16 lo, hi;
541 546
542 lo = nlink; 547 lo = nlink;
543 hi = nlink >> OCFS2_LINKS_HI_SHIFT; 548 hi = nlink >> OCFS2_LINKS_HI_SHIFT;
544 549
545 di->i_links_count = cpu_to_le16(lo); 550 di->i_links_count = cpu_to_le16(lo);
546 di->i_links_count_hi = cpu_to_le16(hi); 551 di->i_links_count_hi = cpu_to_le16(hi);
547 } 552 }
548 553
549 static inline void ocfs2_add_links_count(struct ocfs2_dinode *di, int n) 554 static inline void ocfs2_add_links_count(struct ocfs2_dinode *di, int n)
550 { 555 {
551 u32 links = ocfs2_read_links_count(di); 556 u32 links = ocfs2_read_links_count(di);
552 557
553 links += n; 558 links += n;
554 559
555 ocfs2_set_links_count(di, links); 560 ocfs2_set_links_count(di, links);
556 } 561 }
557 562
558 static inline int ocfs2_refcount_tree(struct ocfs2_super *osb) 563 static inline int ocfs2_refcount_tree(struct ocfs2_super *osb)
559 { 564 {
560 if (osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_REFCOUNT_TREE) 565 if (osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_REFCOUNT_TREE)
561 return 1; 566 return 1;
562 return 0; 567 return 0;
563 } 568 }
564 569
565 /* set / clear functions because cluster events can make these happen 570 /* set / clear functions because cluster events can make these happen
566 * in parallel so we want the transitions to be atomic. this also 571 * in parallel so we want the transitions to be atomic. this also
567 * means that any future flags osb_flags must be protected by spinlock 572 * means that any future flags osb_flags must be protected by spinlock
568 * too! */ 573 * too! */
569 static inline void ocfs2_set_osb_flag(struct ocfs2_super *osb, 574 static inline void ocfs2_set_osb_flag(struct ocfs2_super *osb,
570 unsigned long flag) 575 unsigned long flag)
571 { 576 {
572 spin_lock(&osb->osb_lock); 577 spin_lock(&osb->osb_lock);
573 osb->osb_flags |= flag; 578 osb->osb_flags |= flag;
574 spin_unlock(&osb->osb_lock); 579 spin_unlock(&osb->osb_lock);
575 } 580 }
576 581
577 582
578 static inline unsigned long ocfs2_test_osb_flag(struct ocfs2_super *osb, 583 static inline unsigned long ocfs2_test_osb_flag(struct ocfs2_super *osb,
579 unsigned long flag) 584 unsigned long flag)
580 { 585 {
581 unsigned long ret; 586 unsigned long ret;
582 587
583 spin_lock(&osb->osb_lock); 588 spin_lock(&osb->osb_lock);
584 ret = osb->osb_flags & flag; 589 ret = osb->osb_flags & flag;
585 spin_unlock(&osb->osb_lock); 590 spin_unlock(&osb->osb_lock);
586 return ret; 591 return ret;
587 } 592 }
588 593
589 static inline void ocfs2_set_ro_flag(struct ocfs2_super *osb, 594 static inline void ocfs2_set_ro_flag(struct ocfs2_super *osb,
590 int hard) 595 int hard)
591 { 596 {
592 spin_lock(&osb->osb_lock); 597 spin_lock(&osb->osb_lock);
593 osb->osb_flags &= ~(OCFS2_OSB_SOFT_RO|OCFS2_OSB_HARD_RO); 598 osb->osb_flags &= ~(OCFS2_OSB_SOFT_RO|OCFS2_OSB_HARD_RO);
594 if (hard) 599 if (hard)
595 osb->osb_flags |= OCFS2_OSB_HARD_RO; 600 osb->osb_flags |= OCFS2_OSB_HARD_RO;
596 else 601 else
597 osb->osb_flags |= OCFS2_OSB_SOFT_RO; 602 osb->osb_flags |= OCFS2_OSB_SOFT_RO;
598 spin_unlock(&osb->osb_lock); 603 spin_unlock(&osb->osb_lock);
599 } 604 }
600 605
601 static inline int ocfs2_is_hard_readonly(struct ocfs2_super *osb) 606 static inline int ocfs2_is_hard_readonly(struct ocfs2_super *osb)
602 { 607 {
603 int ret; 608 int ret;
604 609
605 spin_lock(&osb->osb_lock); 610 spin_lock(&osb->osb_lock);
606 ret = osb->osb_flags & OCFS2_OSB_HARD_RO; 611 ret = osb->osb_flags & OCFS2_OSB_HARD_RO;
607 spin_unlock(&osb->osb_lock); 612 spin_unlock(&osb->osb_lock);
608 613
609 return ret; 614 return ret;
610 } 615 }
611 616
612 static inline int ocfs2_is_soft_readonly(struct ocfs2_super *osb) 617 static inline int ocfs2_is_soft_readonly(struct ocfs2_super *osb)
613 { 618 {
614 int ret; 619 int ret;
615 620
616 spin_lock(&osb->osb_lock); 621 spin_lock(&osb->osb_lock);
617 ret = osb->osb_flags & OCFS2_OSB_SOFT_RO; 622 ret = osb->osb_flags & OCFS2_OSB_SOFT_RO;
618 spin_unlock(&osb->osb_lock); 623 spin_unlock(&osb->osb_lock);
619 624
620 return ret; 625 return ret;
621 } 626 }
622 627
623 static inline int ocfs2_clusterinfo_valid(struct ocfs2_super *osb) 628 static inline int ocfs2_clusterinfo_valid(struct ocfs2_super *osb)
624 { 629 {
625 return (osb->s_feature_incompat & 630 return (osb->s_feature_incompat &
626 (OCFS2_FEATURE_INCOMPAT_USERSPACE_STACK | 631 (OCFS2_FEATURE_INCOMPAT_USERSPACE_STACK |
627 OCFS2_FEATURE_INCOMPAT_CLUSTERINFO)); 632 OCFS2_FEATURE_INCOMPAT_CLUSTERINFO));
628 } 633 }
629 634
630 static inline int ocfs2_userspace_stack(struct ocfs2_super *osb) 635 static inline int ocfs2_userspace_stack(struct ocfs2_super *osb)
631 { 636 {
632 if (ocfs2_clusterinfo_valid(osb) && 637 if (ocfs2_clusterinfo_valid(osb) &&
633 memcmp(osb->osb_cluster_stack, OCFS2_CLASSIC_CLUSTER_STACK, 638 memcmp(osb->osb_cluster_stack, OCFS2_CLASSIC_CLUSTER_STACK,
634 OCFS2_STACK_LABEL_LEN)) 639 OCFS2_STACK_LABEL_LEN))
635 return 1; 640 return 1;
636 return 0; 641 return 0;
637 } 642 }
638 643
639 static inline int ocfs2_o2cb_stack(struct ocfs2_super *osb) 644 static inline int ocfs2_o2cb_stack(struct ocfs2_super *osb)
640 { 645 {
641 if (ocfs2_clusterinfo_valid(osb) && 646 if (ocfs2_clusterinfo_valid(osb) &&
642 !memcmp(osb->osb_cluster_stack, OCFS2_CLASSIC_CLUSTER_STACK, 647 !memcmp(osb->osb_cluster_stack, OCFS2_CLASSIC_CLUSTER_STACK,
643 OCFS2_STACK_LABEL_LEN)) 648 OCFS2_STACK_LABEL_LEN))
644 return 1; 649 return 1;
645 return 0; 650 return 0;
646 } 651 }
647 652
648 static inline int ocfs2_cluster_o2cb_global_heartbeat(struct ocfs2_super *osb) 653 static inline int ocfs2_cluster_o2cb_global_heartbeat(struct ocfs2_super *osb)
649 { 654 {
650 return ocfs2_o2cb_stack(osb) && 655 return ocfs2_o2cb_stack(osb) &&
651 (osb->osb_stackflags & OCFS2_CLUSTER_O2CB_GLOBAL_HEARTBEAT); 656 (osb->osb_stackflags & OCFS2_CLUSTER_O2CB_GLOBAL_HEARTBEAT);
652 } 657 }
653 658
654 static inline int ocfs2_mount_local(struct ocfs2_super *osb) 659 static inline int ocfs2_mount_local(struct ocfs2_super *osb)
655 { 660 {
656 return (osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_LOCAL_MOUNT); 661 return (osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_LOCAL_MOUNT);
657 } 662 }
658 663
659 static inline int ocfs2_uses_extended_slot_map(struct ocfs2_super *osb) 664 static inline int ocfs2_uses_extended_slot_map(struct ocfs2_super *osb)
660 { 665 {
661 return (osb->s_feature_incompat & 666 return (osb->s_feature_incompat &
662 OCFS2_FEATURE_INCOMPAT_EXTENDED_SLOT_MAP); 667 OCFS2_FEATURE_INCOMPAT_EXTENDED_SLOT_MAP);
663 } 668 }
664 669
665 670
666 #define OCFS2_IS_VALID_DINODE(ptr) \ 671 #define OCFS2_IS_VALID_DINODE(ptr) \
667 (!strcmp((ptr)->i_signature, OCFS2_INODE_SIGNATURE)) 672 (!strcmp((ptr)->i_signature, OCFS2_INODE_SIGNATURE))
668 673
669 #define OCFS2_IS_VALID_EXTENT_BLOCK(ptr) \ 674 #define OCFS2_IS_VALID_EXTENT_BLOCK(ptr) \
670 (!strcmp((ptr)->h_signature, OCFS2_EXTENT_BLOCK_SIGNATURE)) 675 (!strcmp((ptr)->h_signature, OCFS2_EXTENT_BLOCK_SIGNATURE))
671 676
672 #define OCFS2_IS_VALID_GROUP_DESC(ptr) \ 677 #define OCFS2_IS_VALID_GROUP_DESC(ptr) \
673 (!strcmp((ptr)->bg_signature, OCFS2_GROUP_DESC_SIGNATURE)) 678 (!strcmp((ptr)->bg_signature, OCFS2_GROUP_DESC_SIGNATURE))
674 679
675 680
676 #define OCFS2_IS_VALID_XATTR_BLOCK(ptr) \ 681 #define OCFS2_IS_VALID_XATTR_BLOCK(ptr) \
677 (!strcmp((ptr)->xb_signature, OCFS2_XATTR_BLOCK_SIGNATURE)) 682 (!strcmp((ptr)->xb_signature, OCFS2_XATTR_BLOCK_SIGNATURE))
678 683
679 #define OCFS2_IS_VALID_DIR_TRAILER(ptr) \ 684 #define OCFS2_IS_VALID_DIR_TRAILER(ptr) \
680 (!strcmp((ptr)->db_signature, OCFS2_DIR_TRAILER_SIGNATURE)) 685 (!strcmp((ptr)->db_signature, OCFS2_DIR_TRAILER_SIGNATURE))
681 686
682 #define OCFS2_IS_VALID_DX_ROOT(ptr) \ 687 #define OCFS2_IS_VALID_DX_ROOT(ptr) \
683 (!strcmp((ptr)->dr_signature, OCFS2_DX_ROOT_SIGNATURE)) 688 (!strcmp((ptr)->dr_signature, OCFS2_DX_ROOT_SIGNATURE))
684 689
685 #define OCFS2_IS_VALID_DX_LEAF(ptr) \ 690 #define OCFS2_IS_VALID_DX_LEAF(ptr) \
686 (!strcmp((ptr)->dl_signature, OCFS2_DX_LEAF_SIGNATURE)) 691 (!strcmp((ptr)->dl_signature, OCFS2_DX_LEAF_SIGNATURE))
687 692
688 #define OCFS2_IS_VALID_REFCOUNT_BLOCK(ptr) \ 693 #define OCFS2_IS_VALID_REFCOUNT_BLOCK(ptr) \
689 (!strcmp((ptr)->rf_signature, OCFS2_REFCOUNT_BLOCK_SIGNATURE)) 694 (!strcmp((ptr)->rf_signature, OCFS2_REFCOUNT_BLOCK_SIGNATURE))
690 695
691 static inline unsigned long ino_from_blkno(struct super_block *sb, 696 static inline unsigned long ino_from_blkno(struct super_block *sb,
692 u64 blkno) 697 u64 blkno)
693 { 698 {
694 return (unsigned long)(blkno & (u64)ULONG_MAX); 699 return (unsigned long)(blkno & (u64)ULONG_MAX);
695 } 700 }
696 701
697 static inline u64 ocfs2_clusters_to_blocks(struct super_block *sb, 702 static inline u64 ocfs2_clusters_to_blocks(struct super_block *sb,
698 u32 clusters) 703 u32 clusters)
699 { 704 {
700 int c_to_b_bits = OCFS2_SB(sb)->s_clustersize_bits - 705 int c_to_b_bits = OCFS2_SB(sb)->s_clustersize_bits -
701 sb->s_blocksize_bits; 706 sb->s_blocksize_bits;
702 707
703 return (u64)clusters << c_to_b_bits; 708 return (u64)clusters << c_to_b_bits;
704 } 709 }
705 710
706 static inline u32 ocfs2_blocks_to_clusters(struct super_block *sb, 711 static inline u32 ocfs2_blocks_to_clusters(struct super_block *sb,
707 u64 blocks) 712 u64 blocks)
708 { 713 {
709 int b_to_c_bits = OCFS2_SB(sb)->s_clustersize_bits - 714 int b_to_c_bits = OCFS2_SB(sb)->s_clustersize_bits -
710 sb->s_blocksize_bits; 715 sb->s_blocksize_bits;
711 716
712 return (u32)(blocks >> b_to_c_bits); 717 return (u32)(blocks >> b_to_c_bits);
713 } 718 }
714 719
715 static inline unsigned int ocfs2_clusters_for_bytes(struct super_block *sb, 720 static inline unsigned int ocfs2_clusters_for_bytes(struct super_block *sb,
716 u64 bytes) 721 u64 bytes)
717 { 722 {
718 int cl_bits = OCFS2_SB(sb)->s_clustersize_bits; 723 int cl_bits = OCFS2_SB(sb)->s_clustersize_bits;
719 unsigned int clusters; 724 unsigned int clusters;
720 725
721 bytes += OCFS2_SB(sb)->s_clustersize - 1; 726 bytes += OCFS2_SB(sb)->s_clustersize - 1;
722 /* OCFS2 just cannot have enough clusters to overflow this */ 727 /* OCFS2 just cannot have enough clusters to overflow this */
723 clusters = (unsigned int)(bytes >> cl_bits); 728 clusters = (unsigned int)(bytes >> cl_bits);
724 729
725 return clusters; 730 return clusters;
726 } 731 }
727 732
728 static inline u64 ocfs2_blocks_for_bytes(struct super_block *sb, 733 static inline u64 ocfs2_blocks_for_bytes(struct super_block *sb,
729 u64 bytes) 734 u64 bytes)
730 { 735 {
731 bytes += sb->s_blocksize - 1; 736 bytes += sb->s_blocksize - 1;
732 return bytes >> sb->s_blocksize_bits; 737 return bytes >> sb->s_blocksize_bits;
733 } 738 }
734 739
735 static inline u64 ocfs2_clusters_to_bytes(struct super_block *sb, 740 static inline u64 ocfs2_clusters_to_bytes(struct super_block *sb,
736 u32 clusters) 741 u32 clusters)
737 { 742 {
738 return (u64)clusters << OCFS2_SB(sb)->s_clustersize_bits; 743 return (u64)clusters << OCFS2_SB(sb)->s_clustersize_bits;
739 } 744 }
740 745
741 static inline u64 ocfs2_block_to_cluster_start(struct super_block *sb, 746 static inline u64 ocfs2_block_to_cluster_start(struct super_block *sb,
742 u64 blocks) 747 u64 blocks)
743 { 748 {
744 int bits = OCFS2_SB(sb)->s_clustersize_bits - sb->s_blocksize_bits; 749 int bits = OCFS2_SB(sb)->s_clustersize_bits - sb->s_blocksize_bits;
745 unsigned int clusters; 750 unsigned int clusters;
746 751
747 clusters = ocfs2_blocks_to_clusters(sb, blocks); 752 clusters = ocfs2_blocks_to_clusters(sb, blocks);
748 return (u64)clusters << bits; 753 return (u64)clusters << bits;
749 } 754 }
750 755
751 static inline u64 ocfs2_align_bytes_to_clusters(struct super_block *sb, 756 static inline u64 ocfs2_align_bytes_to_clusters(struct super_block *sb,
752 u64 bytes) 757 u64 bytes)
753 { 758 {
754 int cl_bits = OCFS2_SB(sb)->s_clustersize_bits; 759 int cl_bits = OCFS2_SB(sb)->s_clustersize_bits;
755 unsigned int clusters; 760 unsigned int clusters;
756 761
757 clusters = ocfs2_clusters_for_bytes(sb, bytes); 762 clusters = ocfs2_clusters_for_bytes(sb, bytes);
758 return (u64)clusters << cl_bits; 763 return (u64)clusters << cl_bits;
759 } 764 }
760 765
761 static inline u64 ocfs2_align_bytes_to_blocks(struct super_block *sb, 766 static inline u64 ocfs2_align_bytes_to_blocks(struct super_block *sb,
762 u64 bytes) 767 u64 bytes)
763 { 768 {
764 u64 blocks; 769 u64 blocks;
765 770
766 blocks = ocfs2_blocks_for_bytes(sb, bytes); 771 blocks = ocfs2_blocks_for_bytes(sb, bytes);
767 return blocks << sb->s_blocksize_bits; 772 return blocks << sb->s_blocksize_bits;
768 } 773 }
769 774
770 static inline unsigned long ocfs2_align_bytes_to_sectors(u64 bytes) 775 static inline unsigned long ocfs2_align_bytes_to_sectors(u64 bytes)
771 { 776 {
772 return (unsigned long)((bytes + 511) >> 9); 777 return (unsigned long)((bytes + 511) >> 9);
773 } 778 }
774 779
775 static inline unsigned int ocfs2_page_index_to_clusters(struct super_block *sb, 780 static inline unsigned int ocfs2_page_index_to_clusters(struct super_block *sb,
776 unsigned long pg_index) 781 unsigned long pg_index)
777 { 782 {
778 u32 clusters = pg_index; 783 u32 clusters = pg_index;
779 unsigned int cbits = OCFS2_SB(sb)->s_clustersize_bits; 784 unsigned int cbits = OCFS2_SB(sb)->s_clustersize_bits;
780 785
781 if (unlikely(PAGE_CACHE_SHIFT > cbits)) 786 if (unlikely(PAGE_CACHE_SHIFT > cbits))
782 clusters = pg_index << (PAGE_CACHE_SHIFT - cbits); 787 clusters = pg_index << (PAGE_CACHE_SHIFT - cbits);
783 else if (PAGE_CACHE_SHIFT < cbits) 788 else if (PAGE_CACHE_SHIFT < cbits)
784 clusters = pg_index >> (cbits - PAGE_CACHE_SHIFT); 789 clusters = pg_index >> (cbits - PAGE_CACHE_SHIFT);
785 790
786 return clusters; 791 return clusters;
787 } 792 }
788 793
789 /* 794 /*
790 * Find the 1st page index which covers the given clusters. 795 * Find the 1st page index which covers the given clusters.
791 */ 796 */
792 static inline pgoff_t ocfs2_align_clusters_to_page_index(struct super_block *sb, 797 static inline pgoff_t ocfs2_align_clusters_to_page_index(struct super_block *sb,
793 u32 clusters) 798 u32 clusters)
794 { 799 {
795 unsigned int cbits = OCFS2_SB(sb)->s_clustersize_bits; 800 unsigned int cbits = OCFS2_SB(sb)->s_clustersize_bits;
796 pgoff_t index = clusters; 801 pgoff_t index = clusters;
797 802
798 if (PAGE_CACHE_SHIFT > cbits) { 803 if (PAGE_CACHE_SHIFT > cbits) {
799 index = (pgoff_t)clusters >> (PAGE_CACHE_SHIFT - cbits); 804 index = (pgoff_t)clusters >> (PAGE_CACHE_SHIFT - cbits);
800 } else if (PAGE_CACHE_SHIFT < cbits) { 805 } else if (PAGE_CACHE_SHIFT < cbits) {
801 index = (pgoff_t)clusters << (cbits - PAGE_CACHE_SHIFT); 806 index = (pgoff_t)clusters << (cbits - PAGE_CACHE_SHIFT);
802 } 807 }
803 808
804 return index; 809 return index;
805 } 810 }
806 811
807 static inline unsigned int ocfs2_pages_per_cluster(struct super_block *sb) 812 static inline unsigned int ocfs2_pages_per_cluster(struct super_block *sb)
808 { 813 {
809 unsigned int cbits = OCFS2_SB(sb)->s_clustersize_bits; 814 unsigned int cbits = OCFS2_SB(sb)->s_clustersize_bits;
810 unsigned int pages_per_cluster = 1; 815 unsigned int pages_per_cluster = 1;
811 816
812 if (PAGE_CACHE_SHIFT < cbits) 817 if (PAGE_CACHE_SHIFT < cbits)
813 pages_per_cluster = 1 << (cbits - PAGE_CACHE_SHIFT); 818 pages_per_cluster = 1 << (cbits - PAGE_CACHE_SHIFT);
814 819
815 return pages_per_cluster; 820 return pages_per_cluster;
816 } 821 }
817 822
818 static inline unsigned int ocfs2_megabytes_to_clusters(struct super_block *sb, 823 static inline unsigned int ocfs2_megabytes_to_clusters(struct super_block *sb,
819 unsigned int megs) 824 unsigned int megs)
820 { 825 {
821 BUILD_BUG_ON(OCFS2_MAX_CLUSTERSIZE > 1048576); 826 BUILD_BUG_ON(OCFS2_MAX_CLUSTERSIZE > 1048576);
822 827
823 return megs << (20 - OCFS2_SB(sb)->s_clustersize_bits); 828 return megs << (20 - OCFS2_SB(sb)->s_clustersize_bits);
824 } 829 }
825 830
826 static inline unsigned int ocfs2_clusters_to_megabytes(struct super_block *sb, 831 static inline unsigned int ocfs2_clusters_to_megabytes(struct super_block *sb,
827 unsigned int clusters) 832 unsigned int clusters)
828 { 833 {
829 return clusters >> (20 - OCFS2_SB(sb)->s_clustersize_bits); 834 return clusters >> (20 - OCFS2_SB(sb)->s_clustersize_bits);
830 } 835 }
831 836
832 static inline void _ocfs2_set_bit(unsigned int bit, unsigned long *bitmap) 837 static inline void _ocfs2_set_bit(unsigned int bit, unsigned long *bitmap)
833 { 838 {
834 ext2_set_bit(bit, bitmap); 839 ext2_set_bit(bit, bitmap);
835 } 840 }
836 #define ocfs2_set_bit(bit, addr) _ocfs2_set_bit((bit), (unsigned long *)(addr)) 841 #define ocfs2_set_bit(bit, addr) _ocfs2_set_bit((bit), (unsigned long *)(addr))
837 842
838 static inline void _ocfs2_clear_bit(unsigned int bit, unsigned long *bitmap) 843 static inline void _ocfs2_clear_bit(unsigned int bit, unsigned long *bitmap)
839 { 844 {
840 ext2_clear_bit(bit, bitmap); 845 ext2_clear_bit(bit, bitmap);
841 } 846 }
842 #define ocfs2_clear_bit(bit, addr) _ocfs2_clear_bit((bit), (unsigned long *)(addr)) 847 #define ocfs2_clear_bit(bit, addr) _ocfs2_clear_bit((bit), (unsigned long *)(addr))
843 848