Commit a19128260107f951d1b4c421cf98b92f8092b069

Authored by Sunil Mushran
Committed by Joel Becker
1 parent 0b94a909eb

ocfs2: Prevent a livelock in dlmglue

There is possibility of a livelock in __ocfs2_cluster_lock(). If a node were
to get an ast for an upconvert request, followed immediately by a bast,
there is a small window where the fs may downconvert the lock before the
process requesting the upconvert is able to take the lock.

This patch adds a new flag to indicate that the upconvert is still in
progress and that the dc thread should not downconvert it right now.

Wengang Wang <wen.gang.wang@oracle.com> and Joel Becker
<joel.becker@oracle.com> contributed heavily to this patch.

Reported-by: David Teigland <teigland@redhat.com>
Signed-off-by: Sunil Mushran <sunil.mushran@oracle.com>
Signed-off-by: Joel Becker <joel.becker@oracle.com>

Showing 2 changed files with 50 additions and 3 deletions Inline Diff

1 /* -*- mode: c; c-basic-offset: 8; -*- 1 /* -*- mode: c; c-basic-offset: 8; -*-
2 * vim: noexpandtab sw=8 ts=8 sts=0: 2 * vim: noexpandtab sw=8 ts=8 sts=0:
3 * 3 *
4 * dlmglue.c 4 * dlmglue.c
5 * 5 *
6 * Code which implements an OCFS2 specific interface to our DLM. 6 * Code which implements an OCFS2 specific interface to our DLM.
7 * 7 *
8 * Copyright (C) 2003, 2004 Oracle. All rights reserved. 8 * Copyright (C) 2003, 2004 Oracle. All rights reserved.
9 * 9 *
10 * This program is free software; you can redistribute it and/or 10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public 11 * modify it under the terms of the GNU General Public
12 * License as published by the Free Software Foundation; either 12 * License as published by the Free Software Foundation; either
13 * version 2 of the License, or (at your option) any later version. 13 * version 2 of the License, or (at your option) any later version.
14 * 14 *
15 * This program is distributed in the hope that it will be useful, 15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of 16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * General Public License for more details. 18 * General Public License for more details.
19 * 19 *
20 * You should have received a copy of the GNU General Public 20 * You should have received a copy of the GNU General Public
21 * License along with this program; if not, write to the 21 * License along with this program; if not, write to the
22 * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 22 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
23 * Boston, MA 021110-1307, USA. 23 * Boston, MA 021110-1307, USA.
24 */ 24 */
25 25
26 #include <linux/types.h> 26 #include <linux/types.h>
27 #include <linux/slab.h> 27 #include <linux/slab.h>
28 #include <linux/highmem.h> 28 #include <linux/highmem.h>
29 #include <linux/mm.h> 29 #include <linux/mm.h>
30 #include <linux/kthread.h> 30 #include <linux/kthread.h>
31 #include <linux/pagemap.h> 31 #include <linux/pagemap.h>
32 #include <linux/debugfs.h> 32 #include <linux/debugfs.h>
33 #include <linux/seq_file.h> 33 #include <linux/seq_file.h>
34 #include <linux/time.h> 34 #include <linux/time.h>
35 #include <linux/quotaops.h> 35 #include <linux/quotaops.h>
36 36
37 #define MLOG_MASK_PREFIX ML_DLM_GLUE 37 #define MLOG_MASK_PREFIX ML_DLM_GLUE
38 #include <cluster/masklog.h> 38 #include <cluster/masklog.h>
39 39
40 #include "ocfs2.h" 40 #include "ocfs2.h"
41 #include "ocfs2_lockingver.h" 41 #include "ocfs2_lockingver.h"
42 42
43 #include "alloc.h" 43 #include "alloc.h"
44 #include "dcache.h" 44 #include "dcache.h"
45 #include "dlmglue.h" 45 #include "dlmglue.h"
46 #include "extent_map.h" 46 #include "extent_map.h"
47 #include "file.h" 47 #include "file.h"
48 #include "heartbeat.h" 48 #include "heartbeat.h"
49 #include "inode.h" 49 #include "inode.h"
50 #include "journal.h" 50 #include "journal.h"
51 #include "stackglue.h" 51 #include "stackglue.h"
52 #include "slot_map.h" 52 #include "slot_map.h"
53 #include "super.h" 53 #include "super.h"
54 #include "uptodate.h" 54 #include "uptodate.h"
55 #include "quota.h" 55 #include "quota.h"
56 #include "refcounttree.h" 56 #include "refcounttree.h"
57 57
58 #include "buffer_head_io.h" 58 #include "buffer_head_io.h"
59 59
60 struct ocfs2_mask_waiter { 60 struct ocfs2_mask_waiter {
61 struct list_head mw_item; 61 struct list_head mw_item;
62 int mw_status; 62 int mw_status;
63 struct completion mw_complete; 63 struct completion mw_complete;
64 unsigned long mw_mask; 64 unsigned long mw_mask;
65 unsigned long mw_goal; 65 unsigned long mw_goal;
66 #ifdef CONFIG_OCFS2_FS_STATS 66 #ifdef CONFIG_OCFS2_FS_STATS
67 unsigned long long mw_lock_start; 67 unsigned long long mw_lock_start;
68 #endif 68 #endif
69 }; 69 };
70 70
71 static struct ocfs2_super *ocfs2_get_dentry_osb(struct ocfs2_lock_res *lockres); 71 static struct ocfs2_super *ocfs2_get_dentry_osb(struct ocfs2_lock_res *lockres);
72 static struct ocfs2_super *ocfs2_get_inode_osb(struct ocfs2_lock_res *lockres); 72 static struct ocfs2_super *ocfs2_get_inode_osb(struct ocfs2_lock_res *lockres);
73 static struct ocfs2_super *ocfs2_get_file_osb(struct ocfs2_lock_res *lockres); 73 static struct ocfs2_super *ocfs2_get_file_osb(struct ocfs2_lock_res *lockres);
74 static struct ocfs2_super *ocfs2_get_qinfo_osb(struct ocfs2_lock_res *lockres); 74 static struct ocfs2_super *ocfs2_get_qinfo_osb(struct ocfs2_lock_res *lockres);
75 75
76 /* 76 /*
77 * Return value from ->downconvert_worker functions. 77 * Return value from ->downconvert_worker functions.
78 * 78 *
79 * These control the precise actions of ocfs2_unblock_lock() 79 * These control the precise actions of ocfs2_unblock_lock()
80 * and ocfs2_process_blocked_lock() 80 * and ocfs2_process_blocked_lock()
81 * 81 *
82 */ 82 */
83 enum ocfs2_unblock_action { 83 enum ocfs2_unblock_action {
84 UNBLOCK_CONTINUE = 0, /* Continue downconvert */ 84 UNBLOCK_CONTINUE = 0, /* Continue downconvert */
85 UNBLOCK_CONTINUE_POST = 1, /* Continue downconvert, fire 85 UNBLOCK_CONTINUE_POST = 1, /* Continue downconvert, fire
86 * ->post_unlock callback */ 86 * ->post_unlock callback */
87 UNBLOCK_STOP_POST = 2, /* Do not downconvert, fire 87 UNBLOCK_STOP_POST = 2, /* Do not downconvert, fire
88 * ->post_unlock() callback. */ 88 * ->post_unlock() callback. */
89 }; 89 };
90 90
91 struct ocfs2_unblock_ctl { 91 struct ocfs2_unblock_ctl {
92 int requeue; 92 int requeue;
93 enum ocfs2_unblock_action unblock_action; 93 enum ocfs2_unblock_action unblock_action;
94 }; 94 };
95 95
96 /* Lockdep class keys */ 96 /* Lockdep class keys */
97 struct lock_class_key lockdep_keys[OCFS2_NUM_LOCK_TYPES]; 97 struct lock_class_key lockdep_keys[OCFS2_NUM_LOCK_TYPES];
98 98
99 static int ocfs2_check_meta_downconvert(struct ocfs2_lock_res *lockres, 99 static int ocfs2_check_meta_downconvert(struct ocfs2_lock_res *lockres,
100 int new_level); 100 int new_level);
101 static void ocfs2_set_meta_lvb(struct ocfs2_lock_res *lockres); 101 static void ocfs2_set_meta_lvb(struct ocfs2_lock_res *lockres);
102 102
103 static int ocfs2_data_convert_worker(struct ocfs2_lock_res *lockres, 103 static int ocfs2_data_convert_worker(struct ocfs2_lock_res *lockres,
104 int blocking); 104 int blocking);
105 105
106 static int ocfs2_dentry_convert_worker(struct ocfs2_lock_res *lockres, 106 static int ocfs2_dentry_convert_worker(struct ocfs2_lock_res *lockres,
107 int blocking); 107 int blocking);
108 108
109 static void ocfs2_dentry_post_unlock(struct ocfs2_super *osb, 109 static void ocfs2_dentry_post_unlock(struct ocfs2_super *osb,
110 struct ocfs2_lock_res *lockres); 110 struct ocfs2_lock_res *lockres);
111 111
112 static void ocfs2_set_qinfo_lvb(struct ocfs2_lock_res *lockres); 112 static void ocfs2_set_qinfo_lvb(struct ocfs2_lock_res *lockres);
113 113
114 static int ocfs2_check_refcount_downconvert(struct ocfs2_lock_res *lockres, 114 static int ocfs2_check_refcount_downconvert(struct ocfs2_lock_res *lockres,
115 int new_level); 115 int new_level);
116 static int ocfs2_refcount_convert_worker(struct ocfs2_lock_res *lockres, 116 static int ocfs2_refcount_convert_worker(struct ocfs2_lock_res *lockres,
117 int blocking); 117 int blocking);
118 118
119 #define mlog_meta_lvb(__level, __lockres) ocfs2_dump_meta_lvb_info(__level, __PRETTY_FUNCTION__, __LINE__, __lockres) 119 #define mlog_meta_lvb(__level, __lockres) ocfs2_dump_meta_lvb_info(__level, __PRETTY_FUNCTION__, __LINE__, __lockres)
120 120
121 /* This aids in debugging situations where a bad LVB might be involved. */ 121 /* This aids in debugging situations where a bad LVB might be involved. */
122 static void ocfs2_dump_meta_lvb_info(u64 level, 122 static void ocfs2_dump_meta_lvb_info(u64 level,
123 const char *function, 123 const char *function,
124 unsigned int line, 124 unsigned int line,
125 struct ocfs2_lock_res *lockres) 125 struct ocfs2_lock_res *lockres)
126 { 126 {
127 struct ocfs2_meta_lvb *lvb = ocfs2_dlm_lvb(&lockres->l_lksb); 127 struct ocfs2_meta_lvb *lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
128 128
129 mlog(level, "LVB information for %s (called from %s:%u):\n", 129 mlog(level, "LVB information for %s (called from %s:%u):\n",
130 lockres->l_name, function, line); 130 lockres->l_name, function, line);
131 mlog(level, "version: %u, clusters: %u, generation: 0x%x\n", 131 mlog(level, "version: %u, clusters: %u, generation: 0x%x\n",
132 lvb->lvb_version, be32_to_cpu(lvb->lvb_iclusters), 132 lvb->lvb_version, be32_to_cpu(lvb->lvb_iclusters),
133 be32_to_cpu(lvb->lvb_igeneration)); 133 be32_to_cpu(lvb->lvb_igeneration));
134 mlog(level, "size: %llu, uid %u, gid %u, mode 0x%x\n", 134 mlog(level, "size: %llu, uid %u, gid %u, mode 0x%x\n",
135 (unsigned long long)be64_to_cpu(lvb->lvb_isize), 135 (unsigned long long)be64_to_cpu(lvb->lvb_isize),
136 be32_to_cpu(lvb->lvb_iuid), be32_to_cpu(lvb->lvb_igid), 136 be32_to_cpu(lvb->lvb_iuid), be32_to_cpu(lvb->lvb_igid),
137 be16_to_cpu(lvb->lvb_imode)); 137 be16_to_cpu(lvb->lvb_imode));
138 mlog(level, "nlink %u, atime_packed 0x%llx, ctime_packed 0x%llx, " 138 mlog(level, "nlink %u, atime_packed 0x%llx, ctime_packed 0x%llx, "
139 "mtime_packed 0x%llx iattr 0x%x\n", be16_to_cpu(lvb->lvb_inlink), 139 "mtime_packed 0x%llx iattr 0x%x\n", be16_to_cpu(lvb->lvb_inlink),
140 (long long)be64_to_cpu(lvb->lvb_iatime_packed), 140 (long long)be64_to_cpu(lvb->lvb_iatime_packed),
141 (long long)be64_to_cpu(lvb->lvb_ictime_packed), 141 (long long)be64_to_cpu(lvb->lvb_ictime_packed),
142 (long long)be64_to_cpu(lvb->lvb_imtime_packed), 142 (long long)be64_to_cpu(lvb->lvb_imtime_packed),
143 be32_to_cpu(lvb->lvb_iattr)); 143 be32_to_cpu(lvb->lvb_iattr));
144 } 144 }
145 145
146 146
147 /* 147 /*
148 * OCFS2 Lock Resource Operations 148 * OCFS2 Lock Resource Operations
149 * 149 *
150 * These fine tune the behavior of the generic dlmglue locking infrastructure. 150 * These fine tune the behavior of the generic dlmglue locking infrastructure.
151 * 151 *
152 * The most basic of lock types can point ->l_priv to their respective 152 * The most basic of lock types can point ->l_priv to their respective
153 * struct ocfs2_super and allow the default actions to manage things. 153 * struct ocfs2_super and allow the default actions to manage things.
154 * 154 *
155 * Right now, each lock type also needs to implement an init function, 155 * Right now, each lock type also needs to implement an init function,
156 * and trivial lock/unlock wrappers. ocfs2_simple_drop_lockres() 156 * and trivial lock/unlock wrappers. ocfs2_simple_drop_lockres()
157 * should be called when the lock is no longer needed (i.e., object 157 * should be called when the lock is no longer needed (i.e., object
158 * destruction time). 158 * destruction time).
159 */ 159 */
160 struct ocfs2_lock_res_ops { 160 struct ocfs2_lock_res_ops {
161 /* 161 /*
162 * Translate an ocfs2_lock_res * into an ocfs2_super *. Define 162 * Translate an ocfs2_lock_res * into an ocfs2_super *. Define
163 * this callback if ->l_priv is not an ocfs2_super pointer 163 * this callback if ->l_priv is not an ocfs2_super pointer
164 */ 164 */
165 struct ocfs2_super * (*get_osb)(struct ocfs2_lock_res *); 165 struct ocfs2_super * (*get_osb)(struct ocfs2_lock_res *);
166 166
167 /* 167 /*
168 * Optionally called in the downconvert thread after a 168 * Optionally called in the downconvert thread after a
169 * successful downconvert. The lockres will not be referenced 169 * successful downconvert. The lockres will not be referenced
170 * after this callback is called, so it is safe to free 170 * after this callback is called, so it is safe to free
171 * memory, etc. 171 * memory, etc.
172 * 172 *
173 * The exact semantics of when this is called are controlled 173 * The exact semantics of when this is called are controlled
174 * by ->downconvert_worker() 174 * by ->downconvert_worker()
175 */ 175 */
176 void (*post_unlock)(struct ocfs2_super *, struct ocfs2_lock_res *); 176 void (*post_unlock)(struct ocfs2_super *, struct ocfs2_lock_res *);
177 177
178 /* 178 /*
179 * Allow a lock type to add checks to determine whether it is 179 * Allow a lock type to add checks to determine whether it is
180 * safe to downconvert a lock. Return 0 to re-queue the 180 * safe to downconvert a lock. Return 0 to re-queue the
181 * downconvert at a later time, nonzero to continue. 181 * downconvert at a later time, nonzero to continue.
182 * 182 *
183 * For most locks, the default checks that there are no 183 * For most locks, the default checks that there are no
184 * incompatible holders are sufficient. 184 * incompatible holders are sufficient.
185 * 185 *
186 * Called with the lockres spinlock held. 186 * Called with the lockres spinlock held.
187 */ 187 */
188 int (*check_downconvert)(struct ocfs2_lock_res *, int); 188 int (*check_downconvert)(struct ocfs2_lock_res *, int);
189 189
190 /* 190 /*
191 * Allows a lock type to populate the lock value block. This 191 * Allows a lock type to populate the lock value block. This
192 * is called on downconvert, and when we drop a lock. 192 * is called on downconvert, and when we drop a lock.
193 * 193 *
194 * Locks that want to use this should set LOCK_TYPE_USES_LVB 194 * Locks that want to use this should set LOCK_TYPE_USES_LVB
195 * in the flags field. 195 * in the flags field.
196 * 196 *
197 * Called with the lockres spinlock held. 197 * Called with the lockres spinlock held.
198 */ 198 */
199 void (*set_lvb)(struct ocfs2_lock_res *); 199 void (*set_lvb)(struct ocfs2_lock_res *);
200 200
201 /* 201 /*
202 * Called from the downconvert thread when it is determined 202 * Called from the downconvert thread when it is determined
203 * that a lock will be downconverted. This is called without 203 * that a lock will be downconverted. This is called without
204 * any locks held so the function can do work that might 204 * any locks held so the function can do work that might
205 * schedule (syncing out data, etc). 205 * schedule (syncing out data, etc).
206 * 206 *
207 * This should return any one of the ocfs2_unblock_action 207 * This should return any one of the ocfs2_unblock_action
208 * values, depending on what it wants the thread to do. 208 * values, depending on what it wants the thread to do.
209 */ 209 */
210 int (*downconvert_worker)(struct ocfs2_lock_res *, int); 210 int (*downconvert_worker)(struct ocfs2_lock_res *, int);
211 211
212 /* 212 /*
213 * LOCK_TYPE_* flags which describe the specific requirements 213 * LOCK_TYPE_* flags which describe the specific requirements
214 * of a lock type. Descriptions of each individual flag follow. 214 * of a lock type. Descriptions of each individual flag follow.
215 */ 215 */
216 int flags; 216 int flags;
217 }; 217 };
218 218
219 /* 219 /*
220 * Some locks want to "refresh" potentially stale data when a 220 * Some locks want to "refresh" potentially stale data when a
221 * meaningful (PRMODE or EXMODE) lock level is first obtained. If this 221 * meaningful (PRMODE or EXMODE) lock level is first obtained. If this
222 * flag is set, the OCFS2_LOCK_NEEDS_REFRESH flag will be set on the 222 * flag is set, the OCFS2_LOCK_NEEDS_REFRESH flag will be set on the
223 * individual lockres l_flags member from the ast function. It is 223 * individual lockres l_flags member from the ast function. It is
224 * expected that the locking wrapper will clear the 224 * expected that the locking wrapper will clear the
225 * OCFS2_LOCK_NEEDS_REFRESH flag when done. 225 * OCFS2_LOCK_NEEDS_REFRESH flag when done.
226 */ 226 */
227 #define LOCK_TYPE_REQUIRES_REFRESH 0x1 227 #define LOCK_TYPE_REQUIRES_REFRESH 0x1
228 228
229 /* 229 /*
230 * Indicate that a lock type makes use of the lock value block. The 230 * Indicate that a lock type makes use of the lock value block. The
231 * ->set_lvb lock type callback must be defined. 231 * ->set_lvb lock type callback must be defined.
232 */ 232 */
233 #define LOCK_TYPE_USES_LVB 0x2 233 #define LOCK_TYPE_USES_LVB 0x2
234 234
235 static struct ocfs2_lock_res_ops ocfs2_inode_rw_lops = { 235 static struct ocfs2_lock_res_ops ocfs2_inode_rw_lops = {
236 .get_osb = ocfs2_get_inode_osb, 236 .get_osb = ocfs2_get_inode_osb,
237 .flags = 0, 237 .flags = 0,
238 }; 238 };
239 239
240 static struct ocfs2_lock_res_ops ocfs2_inode_inode_lops = { 240 static struct ocfs2_lock_res_ops ocfs2_inode_inode_lops = {
241 .get_osb = ocfs2_get_inode_osb, 241 .get_osb = ocfs2_get_inode_osb,
242 .check_downconvert = ocfs2_check_meta_downconvert, 242 .check_downconvert = ocfs2_check_meta_downconvert,
243 .set_lvb = ocfs2_set_meta_lvb, 243 .set_lvb = ocfs2_set_meta_lvb,
244 .downconvert_worker = ocfs2_data_convert_worker, 244 .downconvert_worker = ocfs2_data_convert_worker,
245 .flags = LOCK_TYPE_REQUIRES_REFRESH|LOCK_TYPE_USES_LVB, 245 .flags = LOCK_TYPE_REQUIRES_REFRESH|LOCK_TYPE_USES_LVB,
246 }; 246 };
247 247
248 static struct ocfs2_lock_res_ops ocfs2_super_lops = { 248 static struct ocfs2_lock_res_ops ocfs2_super_lops = {
249 .flags = LOCK_TYPE_REQUIRES_REFRESH, 249 .flags = LOCK_TYPE_REQUIRES_REFRESH,
250 }; 250 };
251 251
252 static struct ocfs2_lock_res_ops ocfs2_rename_lops = { 252 static struct ocfs2_lock_res_ops ocfs2_rename_lops = {
253 .flags = 0, 253 .flags = 0,
254 }; 254 };
255 255
256 static struct ocfs2_lock_res_ops ocfs2_nfs_sync_lops = { 256 static struct ocfs2_lock_res_ops ocfs2_nfs_sync_lops = {
257 .flags = 0, 257 .flags = 0,
258 }; 258 };
259 259
260 static struct ocfs2_lock_res_ops ocfs2_orphan_scan_lops = { 260 static struct ocfs2_lock_res_ops ocfs2_orphan_scan_lops = {
261 .flags = LOCK_TYPE_REQUIRES_REFRESH|LOCK_TYPE_USES_LVB, 261 .flags = LOCK_TYPE_REQUIRES_REFRESH|LOCK_TYPE_USES_LVB,
262 }; 262 };
263 263
264 static struct ocfs2_lock_res_ops ocfs2_dentry_lops = { 264 static struct ocfs2_lock_res_ops ocfs2_dentry_lops = {
265 .get_osb = ocfs2_get_dentry_osb, 265 .get_osb = ocfs2_get_dentry_osb,
266 .post_unlock = ocfs2_dentry_post_unlock, 266 .post_unlock = ocfs2_dentry_post_unlock,
267 .downconvert_worker = ocfs2_dentry_convert_worker, 267 .downconvert_worker = ocfs2_dentry_convert_worker,
268 .flags = 0, 268 .flags = 0,
269 }; 269 };
270 270
271 static struct ocfs2_lock_res_ops ocfs2_inode_open_lops = { 271 static struct ocfs2_lock_res_ops ocfs2_inode_open_lops = {
272 .get_osb = ocfs2_get_inode_osb, 272 .get_osb = ocfs2_get_inode_osb,
273 .flags = 0, 273 .flags = 0,
274 }; 274 };
275 275
276 static struct ocfs2_lock_res_ops ocfs2_flock_lops = { 276 static struct ocfs2_lock_res_ops ocfs2_flock_lops = {
277 .get_osb = ocfs2_get_file_osb, 277 .get_osb = ocfs2_get_file_osb,
278 .flags = 0, 278 .flags = 0,
279 }; 279 };
280 280
281 static struct ocfs2_lock_res_ops ocfs2_qinfo_lops = { 281 static struct ocfs2_lock_res_ops ocfs2_qinfo_lops = {
282 .set_lvb = ocfs2_set_qinfo_lvb, 282 .set_lvb = ocfs2_set_qinfo_lvb,
283 .get_osb = ocfs2_get_qinfo_osb, 283 .get_osb = ocfs2_get_qinfo_osb,
284 .flags = LOCK_TYPE_REQUIRES_REFRESH | LOCK_TYPE_USES_LVB, 284 .flags = LOCK_TYPE_REQUIRES_REFRESH | LOCK_TYPE_USES_LVB,
285 }; 285 };
286 286
287 static struct ocfs2_lock_res_ops ocfs2_refcount_block_lops = { 287 static struct ocfs2_lock_res_ops ocfs2_refcount_block_lops = {
288 .check_downconvert = ocfs2_check_refcount_downconvert, 288 .check_downconvert = ocfs2_check_refcount_downconvert,
289 .downconvert_worker = ocfs2_refcount_convert_worker, 289 .downconvert_worker = ocfs2_refcount_convert_worker,
290 .flags = 0, 290 .flags = 0,
291 }; 291 };
292 292
293 static inline int ocfs2_is_inode_lock(struct ocfs2_lock_res *lockres) 293 static inline int ocfs2_is_inode_lock(struct ocfs2_lock_res *lockres)
294 { 294 {
295 return lockres->l_type == OCFS2_LOCK_TYPE_META || 295 return lockres->l_type == OCFS2_LOCK_TYPE_META ||
296 lockres->l_type == OCFS2_LOCK_TYPE_RW || 296 lockres->l_type == OCFS2_LOCK_TYPE_RW ||
297 lockres->l_type == OCFS2_LOCK_TYPE_OPEN; 297 lockres->l_type == OCFS2_LOCK_TYPE_OPEN;
298 } 298 }
299 299
300 static inline struct inode *ocfs2_lock_res_inode(struct ocfs2_lock_res *lockres) 300 static inline struct inode *ocfs2_lock_res_inode(struct ocfs2_lock_res *lockres)
301 { 301 {
302 BUG_ON(!ocfs2_is_inode_lock(lockres)); 302 BUG_ON(!ocfs2_is_inode_lock(lockres));
303 303
304 return (struct inode *) lockres->l_priv; 304 return (struct inode *) lockres->l_priv;
305 } 305 }
306 306
307 static inline struct ocfs2_dentry_lock *ocfs2_lock_res_dl(struct ocfs2_lock_res *lockres) 307 static inline struct ocfs2_dentry_lock *ocfs2_lock_res_dl(struct ocfs2_lock_res *lockres)
308 { 308 {
309 BUG_ON(lockres->l_type != OCFS2_LOCK_TYPE_DENTRY); 309 BUG_ON(lockres->l_type != OCFS2_LOCK_TYPE_DENTRY);
310 310
311 return (struct ocfs2_dentry_lock *)lockres->l_priv; 311 return (struct ocfs2_dentry_lock *)lockres->l_priv;
312 } 312 }
313 313
314 static inline struct ocfs2_mem_dqinfo *ocfs2_lock_res_qinfo(struct ocfs2_lock_res *lockres) 314 static inline struct ocfs2_mem_dqinfo *ocfs2_lock_res_qinfo(struct ocfs2_lock_res *lockres)
315 { 315 {
316 BUG_ON(lockres->l_type != OCFS2_LOCK_TYPE_QINFO); 316 BUG_ON(lockres->l_type != OCFS2_LOCK_TYPE_QINFO);
317 317
318 return (struct ocfs2_mem_dqinfo *)lockres->l_priv; 318 return (struct ocfs2_mem_dqinfo *)lockres->l_priv;
319 } 319 }
320 320
321 static inline struct ocfs2_refcount_tree * 321 static inline struct ocfs2_refcount_tree *
322 ocfs2_lock_res_refcount_tree(struct ocfs2_lock_res *res) 322 ocfs2_lock_res_refcount_tree(struct ocfs2_lock_res *res)
323 { 323 {
324 return container_of(res, struct ocfs2_refcount_tree, rf_lockres); 324 return container_of(res, struct ocfs2_refcount_tree, rf_lockres);
325 } 325 }
326 326
327 static inline struct ocfs2_super *ocfs2_get_lockres_osb(struct ocfs2_lock_res *lockres) 327 static inline struct ocfs2_super *ocfs2_get_lockres_osb(struct ocfs2_lock_res *lockres)
328 { 328 {
329 if (lockres->l_ops->get_osb) 329 if (lockres->l_ops->get_osb)
330 return lockres->l_ops->get_osb(lockres); 330 return lockres->l_ops->get_osb(lockres);
331 331
332 return (struct ocfs2_super *)lockres->l_priv; 332 return (struct ocfs2_super *)lockres->l_priv;
333 } 333 }
334 334
335 static int ocfs2_lock_create(struct ocfs2_super *osb, 335 static int ocfs2_lock_create(struct ocfs2_super *osb,
336 struct ocfs2_lock_res *lockres, 336 struct ocfs2_lock_res *lockres,
337 int level, 337 int level,
338 u32 dlm_flags); 338 u32 dlm_flags);
339 static inline int ocfs2_may_continue_on_blocked_lock(struct ocfs2_lock_res *lockres, 339 static inline int ocfs2_may_continue_on_blocked_lock(struct ocfs2_lock_res *lockres,
340 int wanted); 340 int wanted);
341 static void __ocfs2_cluster_unlock(struct ocfs2_super *osb, 341 static void __ocfs2_cluster_unlock(struct ocfs2_super *osb,
342 struct ocfs2_lock_res *lockres, 342 struct ocfs2_lock_res *lockres,
343 int level, unsigned long caller_ip); 343 int level, unsigned long caller_ip);
344 static inline void ocfs2_cluster_unlock(struct ocfs2_super *osb, 344 static inline void ocfs2_cluster_unlock(struct ocfs2_super *osb,
345 struct ocfs2_lock_res *lockres, 345 struct ocfs2_lock_res *lockres,
346 int level) 346 int level)
347 { 347 {
348 __ocfs2_cluster_unlock(osb, lockres, level, _RET_IP_); 348 __ocfs2_cluster_unlock(osb, lockres, level, _RET_IP_);
349 } 349 }
350 350
351 static inline void ocfs2_generic_handle_downconvert_action(struct ocfs2_lock_res *lockres); 351 static inline void ocfs2_generic_handle_downconvert_action(struct ocfs2_lock_res *lockres);
352 static inline void ocfs2_generic_handle_convert_action(struct ocfs2_lock_res *lockres); 352 static inline void ocfs2_generic_handle_convert_action(struct ocfs2_lock_res *lockres);
353 static inline void ocfs2_generic_handle_attach_action(struct ocfs2_lock_res *lockres); 353 static inline void ocfs2_generic_handle_attach_action(struct ocfs2_lock_res *lockres);
354 static int ocfs2_generic_handle_bast(struct ocfs2_lock_res *lockres, int level); 354 static int ocfs2_generic_handle_bast(struct ocfs2_lock_res *lockres, int level);
355 static void ocfs2_schedule_blocked_lock(struct ocfs2_super *osb, 355 static void ocfs2_schedule_blocked_lock(struct ocfs2_super *osb,
356 struct ocfs2_lock_res *lockres); 356 struct ocfs2_lock_res *lockres);
357 static inline void ocfs2_recover_from_dlm_error(struct ocfs2_lock_res *lockres, 357 static inline void ocfs2_recover_from_dlm_error(struct ocfs2_lock_res *lockres,
358 int convert); 358 int convert);
359 #define ocfs2_log_dlm_error(_func, _err, _lockres) do { \ 359 #define ocfs2_log_dlm_error(_func, _err, _lockres) do { \
360 if ((_lockres)->l_type != OCFS2_LOCK_TYPE_DENTRY) \ 360 if ((_lockres)->l_type != OCFS2_LOCK_TYPE_DENTRY) \
361 mlog(ML_ERROR, "DLM error %d while calling %s on resource %s\n", \ 361 mlog(ML_ERROR, "DLM error %d while calling %s on resource %s\n", \
362 _err, _func, _lockres->l_name); \ 362 _err, _func, _lockres->l_name); \
363 else \ 363 else \
364 mlog(ML_ERROR, "DLM error %d while calling %s on resource %.*s%08x\n", \ 364 mlog(ML_ERROR, "DLM error %d while calling %s on resource %.*s%08x\n", \
365 _err, _func, OCFS2_DENTRY_LOCK_INO_START - 1, (_lockres)->l_name, \ 365 _err, _func, OCFS2_DENTRY_LOCK_INO_START - 1, (_lockres)->l_name, \
366 (unsigned int)ocfs2_get_dentry_lock_ino(_lockres)); \ 366 (unsigned int)ocfs2_get_dentry_lock_ino(_lockres)); \
367 } while (0) 367 } while (0)
368 static int ocfs2_downconvert_thread(void *arg); 368 static int ocfs2_downconvert_thread(void *arg);
369 static void ocfs2_downconvert_on_unlock(struct ocfs2_super *osb, 369 static void ocfs2_downconvert_on_unlock(struct ocfs2_super *osb,
370 struct ocfs2_lock_res *lockres); 370 struct ocfs2_lock_res *lockres);
371 static int ocfs2_inode_lock_update(struct inode *inode, 371 static int ocfs2_inode_lock_update(struct inode *inode,
372 struct buffer_head **bh); 372 struct buffer_head **bh);
373 static void ocfs2_drop_osb_locks(struct ocfs2_super *osb); 373 static void ocfs2_drop_osb_locks(struct ocfs2_super *osb);
374 static inline int ocfs2_highest_compat_lock_level(int level); 374 static inline int ocfs2_highest_compat_lock_level(int level);
375 static unsigned int ocfs2_prepare_downconvert(struct ocfs2_lock_res *lockres, 375 static unsigned int ocfs2_prepare_downconvert(struct ocfs2_lock_res *lockres,
376 int new_level); 376 int new_level);
377 static int ocfs2_downconvert_lock(struct ocfs2_super *osb, 377 static int ocfs2_downconvert_lock(struct ocfs2_super *osb,
378 struct ocfs2_lock_res *lockres, 378 struct ocfs2_lock_res *lockres,
379 int new_level, 379 int new_level,
380 int lvb, 380 int lvb,
381 unsigned int generation); 381 unsigned int generation);
382 static int ocfs2_prepare_cancel_convert(struct ocfs2_super *osb, 382 static int ocfs2_prepare_cancel_convert(struct ocfs2_super *osb,
383 struct ocfs2_lock_res *lockres); 383 struct ocfs2_lock_res *lockres);
384 static int ocfs2_cancel_convert(struct ocfs2_super *osb, 384 static int ocfs2_cancel_convert(struct ocfs2_super *osb,
385 struct ocfs2_lock_res *lockres); 385 struct ocfs2_lock_res *lockres);
386 386
387 387
388 static void ocfs2_build_lock_name(enum ocfs2_lock_type type, 388 static void ocfs2_build_lock_name(enum ocfs2_lock_type type,
389 u64 blkno, 389 u64 blkno,
390 u32 generation, 390 u32 generation,
391 char *name) 391 char *name)
392 { 392 {
393 int len; 393 int len;
394 394
395 mlog_entry_void(); 395 mlog_entry_void();
396 396
397 BUG_ON(type >= OCFS2_NUM_LOCK_TYPES); 397 BUG_ON(type >= OCFS2_NUM_LOCK_TYPES);
398 398
399 len = snprintf(name, OCFS2_LOCK_ID_MAX_LEN, "%c%s%016llx%08x", 399 len = snprintf(name, OCFS2_LOCK_ID_MAX_LEN, "%c%s%016llx%08x",
400 ocfs2_lock_type_char(type), OCFS2_LOCK_ID_PAD, 400 ocfs2_lock_type_char(type), OCFS2_LOCK_ID_PAD,
401 (long long)blkno, generation); 401 (long long)blkno, generation);
402 402
403 BUG_ON(len != (OCFS2_LOCK_ID_MAX_LEN - 1)); 403 BUG_ON(len != (OCFS2_LOCK_ID_MAX_LEN - 1));
404 404
405 mlog(0, "built lock resource with name: %s\n", name); 405 mlog(0, "built lock resource with name: %s\n", name);
406 406
407 mlog_exit_void(); 407 mlog_exit_void();
408 } 408 }
409 409
410 static DEFINE_SPINLOCK(ocfs2_dlm_tracking_lock); 410 static DEFINE_SPINLOCK(ocfs2_dlm_tracking_lock);
411 411
412 static void ocfs2_add_lockres_tracking(struct ocfs2_lock_res *res, 412 static void ocfs2_add_lockres_tracking(struct ocfs2_lock_res *res,
413 struct ocfs2_dlm_debug *dlm_debug) 413 struct ocfs2_dlm_debug *dlm_debug)
414 { 414 {
415 mlog(0, "Add tracking for lockres %s\n", res->l_name); 415 mlog(0, "Add tracking for lockres %s\n", res->l_name);
416 416
417 spin_lock(&ocfs2_dlm_tracking_lock); 417 spin_lock(&ocfs2_dlm_tracking_lock);
418 list_add(&res->l_debug_list, &dlm_debug->d_lockres_tracking); 418 list_add(&res->l_debug_list, &dlm_debug->d_lockres_tracking);
419 spin_unlock(&ocfs2_dlm_tracking_lock); 419 spin_unlock(&ocfs2_dlm_tracking_lock);
420 } 420 }
421 421
422 static void ocfs2_remove_lockres_tracking(struct ocfs2_lock_res *res) 422 static void ocfs2_remove_lockres_tracking(struct ocfs2_lock_res *res)
423 { 423 {
424 spin_lock(&ocfs2_dlm_tracking_lock); 424 spin_lock(&ocfs2_dlm_tracking_lock);
425 if (!list_empty(&res->l_debug_list)) 425 if (!list_empty(&res->l_debug_list))
426 list_del_init(&res->l_debug_list); 426 list_del_init(&res->l_debug_list);
427 spin_unlock(&ocfs2_dlm_tracking_lock); 427 spin_unlock(&ocfs2_dlm_tracking_lock);
428 } 428 }
429 429
430 #ifdef CONFIG_OCFS2_FS_STATS 430 #ifdef CONFIG_OCFS2_FS_STATS
431 static void ocfs2_init_lock_stats(struct ocfs2_lock_res *res) 431 static void ocfs2_init_lock_stats(struct ocfs2_lock_res *res)
432 { 432 {
433 res->l_lock_num_prmode = 0; 433 res->l_lock_num_prmode = 0;
434 res->l_lock_num_prmode_failed = 0; 434 res->l_lock_num_prmode_failed = 0;
435 res->l_lock_total_prmode = 0; 435 res->l_lock_total_prmode = 0;
436 res->l_lock_max_prmode = 0; 436 res->l_lock_max_prmode = 0;
437 res->l_lock_num_exmode = 0; 437 res->l_lock_num_exmode = 0;
438 res->l_lock_num_exmode_failed = 0; 438 res->l_lock_num_exmode_failed = 0;
439 res->l_lock_total_exmode = 0; 439 res->l_lock_total_exmode = 0;
440 res->l_lock_max_exmode = 0; 440 res->l_lock_max_exmode = 0;
441 res->l_lock_refresh = 0; 441 res->l_lock_refresh = 0;
442 } 442 }
443 443
444 static void ocfs2_update_lock_stats(struct ocfs2_lock_res *res, int level, 444 static void ocfs2_update_lock_stats(struct ocfs2_lock_res *res, int level,
445 struct ocfs2_mask_waiter *mw, int ret) 445 struct ocfs2_mask_waiter *mw, int ret)
446 { 446 {
447 unsigned long long *num, *sum; 447 unsigned long long *num, *sum;
448 unsigned int *max, *failed; 448 unsigned int *max, *failed;
449 struct timespec ts = current_kernel_time(); 449 struct timespec ts = current_kernel_time();
450 unsigned long long time = timespec_to_ns(&ts) - mw->mw_lock_start; 450 unsigned long long time = timespec_to_ns(&ts) - mw->mw_lock_start;
451 451
452 if (level == LKM_PRMODE) { 452 if (level == LKM_PRMODE) {
453 num = &res->l_lock_num_prmode; 453 num = &res->l_lock_num_prmode;
454 sum = &res->l_lock_total_prmode; 454 sum = &res->l_lock_total_prmode;
455 max = &res->l_lock_max_prmode; 455 max = &res->l_lock_max_prmode;
456 failed = &res->l_lock_num_prmode_failed; 456 failed = &res->l_lock_num_prmode_failed;
457 } else if (level == LKM_EXMODE) { 457 } else if (level == LKM_EXMODE) {
458 num = &res->l_lock_num_exmode; 458 num = &res->l_lock_num_exmode;
459 sum = &res->l_lock_total_exmode; 459 sum = &res->l_lock_total_exmode;
460 max = &res->l_lock_max_exmode; 460 max = &res->l_lock_max_exmode;
461 failed = &res->l_lock_num_exmode_failed; 461 failed = &res->l_lock_num_exmode_failed;
462 } else 462 } else
463 return; 463 return;
464 464
465 (*num)++; 465 (*num)++;
466 (*sum) += time; 466 (*sum) += time;
467 if (time > *max) 467 if (time > *max)
468 *max = time; 468 *max = time;
469 if (ret) 469 if (ret)
470 (*failed)++; 470 (*failed)++;
471 } 471 }
472 472
473 static inline void ocfs2_track_lock_refresh(struct ocfs2_lock_res *lockres) 473 static inline void ocfs2_track_lock_refresh(struct ocfs2_lock_res *lockres)
474 { 474 {
475 lockres->l_lock_refresh++; 475 lockres->l_lock_refresh++;
476 } 476 }
477 477
478 static inline void ocfs2_init_start_time(struct ocfs2_mask_waiter *mw) 478 static inline void ocfs2_init_start_time(struct ocfs2_mask_waiter *mw)
479 { 479 {
480 struct timespec ts = current_kernel_time(); 480 struct timespec ts = current_kernel_time();
481 mw->mw_lock_start = timespec_to_ns(&ts); 481 mw->mw_lock_start = timespec_to_ns(&ts);
482 } 482 }
483 #else 483 #else
484 static inline void ocfs2_init_lock_stats(struct ocfs2_lock_res *res) 484 static inline void ocfs2_init_lock_stats(struct ocfs2_lock_res *res)
485 { 485 {
486 } 486 }
487 static inline void ocfs2_update_lock_stats(struct ocfs2_lock_res *res, 487 static inline void ocfs2_update_lock_stats(struct ocfs2_lock_res *res,
488 int level, struct ocfs2_mask_waiter *mw, int ret) 488 int level, struct ocfs2_mask_waiter *mw, int ret)
489 { 489 {
490 } 490 }
491 static inline void ocfs2_track_lock_refresh(struct ocfs2_lock_res *lockres) 491 static inline void ocfs2_track_lock_refresh(struct ocfs2_lock_res *lockres)
492 { 492 {
493 } 493 }
494 static inline void ocfs2_init_start_time(struct ocfs2_mask_waiter *mw) 494 static inline void ocfs2_init_start_time(struct ocfs2_mask_waiter *mw)
495 { 495 {
496 } 496 }
497 #endif 497 #endif
498 498
499 static void ocfs2_lock_res_init_common(struct ocfs2_super *osb, 499 static void ocfs2_lock_res_init_common(struct ocfs2_super *osb,
500 struct ocfs2_lock_res *res, 500 struct ocfs2_lock_res *res,
501 enum ocfs2_lock_type type, 501 enum ocfs2_lock_type type,
502 struct ocfs2_lock_res_ops *ops, 502 struct ocfs2_lock_res_ops *ops,
503 void *priv) 503 void *priv)
504 { 504 {
505 res->l_type = type; 505 res->l_type = type;
506 res->l_ops = ops; 506 res->l_ops = ops;
507 res->l_priv = priv; 507 res->l_priv = priv;
508 508
509 res->l_level = DLM_LOCK_IV; 509 res->l_level = DLM_LOCK_IV;
510 res->l_requested = DLM_LOCK_IV; 510 res->l_requested = DLM_LOCK_IV;
511 res->l_blocking = DLM_LOCK_IV; 511 res->l_blocking = DLM_LOCK_IV;
512 res->l_action = OCFS2_AST_INVALID; 512 res->l_action = OCFS2_AST_INVALID;
513 res->l_unlock_action = OCFS2_UNLOCK_INVALID; 513 res->l_unlock_action = OCFS2_UNLOCK_INVALID;
514 514
515 res->l_flags = OCFS2_LOCK_INITIALIZED; 515 res->l_flags = OCFS2_LOCK_INITIALIZED;
516 516
517 ocfs2_add_lockres_tracking(res, osb->osb_dlm_debug); 517 ocfs2_add_lockres_tracking(res, osb->osb_dlm_debug);
518 518
519 ocfs2_init_lock_stats(res); 519 ocfs2_init_lock_stats(res);
520 #ifdef CONFIG_DEBUG_LOCK_ALLOC 520 #ifdef CONFIG_DEBUG_LOCK_ALLOC
521 if (type != OCFS2_LOCK_TYPE_OPEN) 521 if (type != OCFS2_LOCK_TYPE_OPEN)
522 lockdep_init_map(&res->l_lockdep_map, ocfs2_lock_type_strings[type], 522 lockdep_init_map(&res->l_lockdep_map, ocfs2_lock_type_strings[type],
523 &lockdep_keys[type], 0); 523 &lockdep_keys[type], 0);
524 else 524 else
525 res->l_lockdep_map.key = NULL; 525 res->l_lockdep_map.key = NULL;
526 #endif 526 #endif
527 } 527 }
528 528
529 void ocfs2_lock_res_init_once(struct ocfs2_lock_res *res) 529 void ocfs2_lock_res_init_once(struct ocfs2_lock_res *res)
530 { 530 {
531 /* This also clears out the lock status block */ 531 /* This also clears out the lock status block */
532 memset(res, 0, sizeof(struct ocfs2_lock_res)); 532 memset(res, 0, sizeof(struct ocfs2_lock_res));
533 spin_lock_init(&res->l_lock); 533 spin_lock_init(&res->l_lock);
534 init_waitqueue_head(&res->l_event); 534 init_waitqueue_head(&res->l_event);
535 INIT_LIST_HEAD(&res->l_blocked_list); 535 INIT_LIST_HEAD(&res->l_blocked_list);
536 INIT_LIST_HEAD(&res->l_mask_waiters); 536 INIT_LIST_HEAD(&res->l_mask_waiters);
537 } 537 }
538 538
539 void ocfs2_inode_lock_res_init(struct ocfs2_lock_res *res, 539 void ocfs2_inode_lock_res_init(struct ocfs2_lock_res *res,
540 enum ocfs2_lock_type type, 540 enum ocfs2_lock_type type,
541 unsigned int generation, 541 unsigned int generation,
542 struct inode *inode) 542 struct inode *inode)
543 { 543 {
544 struct ocfs2_lock_res_ops *ops; 544 struct ocfs2_lock_res_ops *ops;
545 545
546 switch(type) { 546 switch(type) {
547 case OCFS2_LOCK_TYPE_RW: 547 case OCFS2_LOCK_TYPE_RW:
548 ops = &ocfs2_inode_rw_lops; 548 ops = &ocfs2_inode_rw_lops;
549 break; 549 break;
550 case OCFS2_LOCK_TYPE_META: 550 case OCFS2_LOCK_TYPE_META:
551 ops = &ocfs2_inode_inode_lops; 551 ops = &ocfs2_inode_inode_lops;
552 break; 552 break;
553 case OCFS2_LOCK_TYPE_OPEN: 553 case OCFS2_LOCK_TYPE_OPEN:
554 ops = &ocfs2_inode_open_lops; 554 ops = &ocfs2_inode_open_lops;
555 break; 555 break;
556 default: 556 default:
557 mlog_bug_on_msg(1, "type: %d\n", type); 557 mlog_bug_on_msg(1, "type: %d\n", type);
558 ops = NULL; /* thanks, gcc */ 558 ops = NULL; /* thanks, gcc */
559 break; 559 break;
560 }; 560 };
561 561
562 ocfs2_build_lock_name(type, OCFS2_I(inode)->ip_blkno, 562 ocfs2_build_lock_name(type, OCFS2_I(inode)->ip_blkno,
563 generation, res->l_name); 563 generation, res->l_name);
564 ocfs2_lock_res_init_common(OCFS2_SB(inode->i_sb), res, type, ops, inode); 564 ocfs2_lock_res_init_common(OCFS2_SB(inode->i_sb), res, type, ops, inode);
565 } 565 }
566 566
567 static struct ocfs2_super *ocfs2_get_inode_osb(struct ocfs2_lock_res *lockres) 567 static struct ocfs2_super *ocfs2_get_inode_osb(struct ocfs2_lock_res *lockres)
568 { 568 {
569 struct inode *inode = ocfs2_lock_res_inode(lockres); 569 struct inode *inode = ocfs2_lock_res_inode(lockres);
570 570
571 return OCFS2_SB(inode->i_sb); 571 return OCFS2_SB(inode->i_sb);
572 } 572 }
573 573
574 static struct ocfs2_super *ocfs2_get_qinfo_osb(struct ocfs2_lock_res *lockres) 574 static struct ocfs2_super *ocfs2_get_qinfo_osb(struct ocfs2_lock_res *lockres)
575 { 575 {
576 struct ocfs2_mem_dqinfo *info = lockres->l_priv; 576 struct ocfs2_mem_dqinfo *info = lockres->l_priv;
577 577
578 return OCFS2_SB(info->dqi_gi.dqi_sb); 578 return OCFS2_SB(info->dqi_gi.dqi_sb);
579 } 579 }
580 580
581 static struct ocfs2_super *ocfs2_get_file_osb(struct ocfs2_lock_res *lockres) 581 static struct ocfs2_super *ocfs2_get_file_osb(struct ocfs2_lock_res *lockres)
582 { 582 {
583 struct ocfs2_file_private *fp = lockres->l_priv; 583 struct ocfs2_file_private *fp = lockres->l_priv;
584 584
585 return OCFS2_SB(fp->fp_file->f_mapping->host->i_sb); 585 return OCFS2_SB(fp->fp_file->f_mapping->host->i_sb);
586 } 586 }
587 587
588 static __u64 ocfs2_get_dentry_lock_ino(struct ocfs2_lock_res *lockres) 588 static __u64 ocfs2_get_dentry_lock_ino(struct ocfs2_lock_res *lockres)
589 { 589 {
590 __be64 inode_blkno_be; 590 __be64 inode_blkno_be;
591 591
592 memcpy(&inode_blkno_be, &lockres->l_name[OCFS2_DENTRY_LOCK_INO_START], 592 memcpy(&inode_blkno_be, &lockres->l_name[OCFS2_DENTRY_LOCK_INO_START],
593 sizeof(__be64)); 593 sizeof(__be64));
594 594
595 return be64_to_cpu(inode_blkno_be); 595 return be64_to_cpu(inode_blkno_be);
596 } 596 }
597 597
598 static struct ocfs2_super *ocfs2_get_dentry_osb(struct ocfs2_lock_res *lockres) 598 static struct ocfs2_super *ocfs2_get_dentry_osb(struct ocfs2_lock_res *lockres)
599 { 599 {
600 struct ocfs2_dentry_lock *dl = lockres->l_priv; 600 struct ocfs2_dentry_lock *dl = lockres->l_priv;
601 601
602 return OCFS2_SB(dl->dl_inode->i_sb); 602 return OCFS2_SB(dl->dl_inode->i_sb);
603 } 603 }
604 604
605 void ocfs2_dentry_lock_res_init(struct ocfs2_dentry_lock *dl, 605 void ocfs2_dentry_lock_res_init(struct ocfs2_dentry_lock *dl,
606 u64 parent, struct inode *inode) 606 u64 parent, struct inode *inode)
607 { 607 {
608 int len; 608 int len;
609 u64 inode_blkno = OCFS2_I(inode)->ip_blkno; 609 u64 inode_blkno = OCFS2_I(inode)->ip_blkno;
610 __be64 inode_blkno_be = cpu_to_be64(inode_blkno); 610 __be64 inode_blkno_be = cpu_to_be64(inode_blkno);
611 struct ocfs2_lock_res *lockres = &dl->dl_lockres; 611 struct ocfs2_lock_res *lockres = &dl->dl_lockres;
612 612
613 ocfs2_lock_res_init_once(lockres); 613 ocfs2_lock_res_init_once(lockres);
614 614
615 /* 615 /*
616 * Unfortunately, the standard lock naming scheme won't work 616 * Unfortunately, the standard lock naming scheme won't work
617 * here because we have two 16 byte values to use. Instead, 617 * here because we have two 16 byte values to use. Instead,
618 * we'll stuff the inode number as a binary value. We still 618 * we'll stuff the inode number as a binary value. We still
619 * want error prints to show something without garbling the 619 * want error prints to show something without garbling the
620 * display, so drop a null byte in there before the inode 620 * display, so drop a null byte in there before the inode
621 * number. A future version of OCFS2 will likely use all 621 * number. A future version of OCFS2 will likely use all
622 * binary lock names. The stringified names have been a 622 * binary lock names. The stringified names have been a
623 * tremendous aid in debugging, but now that the debugfs 623 * tremendous aid in debugging, but now that the debugfs
624 * interface exists, we can mangle things there if need be. 624 * interface exists, we can mangle things there if need be.
625 * 625 *
626 * NOTE: We also drop the standard "pad" value (the total lock 626 * NOTE: We also drop the standard "pad" value (the total lock
627 * name size stays the same though - the last part is all 627 * name size stays the same though - the last part is all
628 * zeros due to the memset in ocfs2_lock_res_init_once() 628 * zeros due to the memset in ocfs2_lock_res_init_once()
629 */ 629 */
630 len = snprintf(lockres->l_name, OCFS2_DENTRY_LOCK_INO_START, 630 len = snprintf(lockres->l_name, OCFS2_DENTRY_LOCK_INO_START,
631 "%c%016llx", 631 "%c%016llx",
632 ocfs2_lock_type_char(OCFS2_LOCK_TYPE_DENTRY), 632 ocfs2_lock_type_char(OCFS2_LOCK_TYPE_DENTRY),
633 (long long)parent); 633 (long long)parent);
634 634
635 BUG_ON(len != (OCFS2_DENTRY_LOCK_INO_START - 1)); 635 BUG_ON(len != (OCFS2_DENTRY_LOCK_INO_START - 1));
636 636
637 memcpy(&lockres->l_name[OCFS2_DENTRY_LOCK_INO_START], &inode_blkno_be, 637 memcpy(&lockres->l_name[OCFS2_DENTRY_LOCK_INO_START], &inode_blkno_be,
638 sizeof(__be64)); 638 sizeof(__be64));
639 639
640 ocfs2_lock_res_init_common(OCFS2_SB(inode->i_sb), lockres, 640 ocfs2_lock_res_init_common(OCFS2_SB(inode->i_sb), lockres,
641 OCFS2_LOCK_TYPE_DENTRY, &ocfs2_dentry_lops, 641 OCFS2_LOCK_TYPE_DENTRY, &ocfs2_dentry_lops,
642 dl); 642 dl);
643 } 643 }
644 644
645 static void ocfs2_super_lock_res_init(struct ocfs2_lock_res *res, 645 static void ocfs2_super_lock_res_init(struct ocfs2_lock_res *res,
646 struct ocfs2_super *osb) 646 struct ocfs2_super *osb)
647 { 647 {
648 /* Superblock lockres doesn't come from a slab so we call init 648 /* Superblock lockres doesn't come from a slab so we call init
649 * once on it manually. */ 649 * once on it manually. */
650 ocfs2_lock_res_init_once(res); 650 ocfs2_lock_res_init_once(res);
651 ocfs2_build_lock_name(OCFS2_LOCK_TYPE_SUPER, OCFS2_SUPER_BLOCK_BLKNO, 651 ocfs2_build_lock_name(OCFS2_LOCK_TYPE_SUPER, OCFS2_SUPER_BLOCK_BLKNO,
652 0, res->l_name); 652 0, res->l_name);
653 ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_SUPER, 653 ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_SUPER,
654 &ocfs2_super_lops, osb); 654 &ocfs2_super_lops, osb);
655 } 655 }
656 656
657 static void ocfs2_rename_lock_res_init(struct ocfs2_lock_res *res, 657 static void ocfs2_rename_lock_res_init(struct ocfs2_lock_res *res,
658 struct ocfs2_super *osb) 658 struct ocfs2_super *osb)
659 { 659 {
660 /* Rename lockres doesn't come from a slab so we call init 660 /* Rename lockres doesn't come from a slab so we call init
661 * once on it manually. */ 661 * once on it manually. */
662 ocfs2_lock_res_init_once(res); 662 ocfs2_lock_res_init_once(res);
663 ocfs2_build_lock_name(OCFS2_LOCK_TYPE_RENAME, 0, 0, res->l_name); 663 ocfs2_build_lock_name(OCFS2_LOCK_TYPE_RENAME, 0, 0, res->l_name);
664 ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_RENAME, 664 ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_RENAME,
665 &ocfs2_rename_lops, osb); 665 &ocfs2_rename_lops, osb);
666 } 666 }
667 667
668 static void ocfs2_nfs_sync_lock_res_init(struct ocfs2_lock_res *res, 668 static void ocfs2_nfs_sync_lock_res_init(struct ocfs2_lock_res *res,
669 struct ocfs2_super *osb) 669 struct ocfs2_super *osb)
670 { 670 {
671 /* nfs_sync lockres doesn't come from a slab so we call init 671 /* nfs_sync lockres doesn't come from a slab so we call init
672 * once on it manually. */ 672 * once on it manually. */
673 ocfs2_lock_res_init_once(res); 673 ocfs2_lock_res_init_once(res);
674 ocfs2_build_lock_name(OCFS2_LOCK_TYPE_NFS_SYNC, 0, 0, res->l_name); 674 ocfs2_build_lock_name(OCFS2_LOCK_TYPE_NFS_SYNC, 0, 0, res->l_name);
675 ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_NFS_SYNC, 675 ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_NFS_SYNC,
676 &ocfs2_nfs_sync_lops, osb); 676 &ocfs2_nfs_sync_lops, osb);
677 } 677 }
678 678
679 static void ocfs2_orphan_scan_lock_res_init(struct ocfs2_lock_res *res, 679 static void ocfs2_orphan_scan_lock_res_init(struct ocfs2_lock_res *res,
680 struct ocfs2_super *osb) 680 struct ocfs2_super *osb)
681 { 681 {
682 ocfs2_lock_res_init_once(res); 682 ocfs2_lock_res_init_once(res);
683 ocfs2_build_lock_name(OCFS2_LOCK_TYPE_ORPHAN_SCAN, 0, 0, res->l_name); 683 ocfs2_build_lock_name(OCFS2_LOCK_TYPE_ORPHAN_SCAN, 0, 0, res->l_name);
684 ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_ORPHAN_SCAN, 684 ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_ORPHAN_SCAN,
685 &ocfs2_orphan_scan_lops, osb); 685 &ocfs2_orphan_scan_lops, osb);
686 } 686 }
687 687
688 void ocfs2_file_lock_res_init(struct ocfs2_lock_res *lockres, 688 void ocfs2_file_lock_res_init(struct ocfs2_lock_res *lockres,
689 struct ocfs2_file_private *fp) 689 struct ocfs2_file_private *fp)
690 { 690 {
691 struct inode *inode = fp->fp_file->f_mapping->host; 691 struct inode *inode = fp->fp_file->f_mapping->host;
692 struct ocfs2_inode_info *oi = OCFS2_I(inode); 692 struct ocfs2_inode_info *oi = OCFS2_I(inode);
693 693
694 ocfs2_lock_res_init_once(lockres); 694 ocfs2_lock_res_init_once(lockres);
695 ocfs2_build_lock_name(OCFS2_LOCK_TYPE_FLOCK, oi->ip_blkno, 695 ocfs2_build_lock_name(OCFS2_LOCK_TYPE_FLOCK, oi->ip_blkno,
696 inode->i_generation, lockres->l_name); 696 inode->i_generation, lockres->l_name);
697 ocfs2_lock_res_init_common(OCFS2_SB(inode->i_sb), lockres, 697 ocfs2_lock_res_init_common(OCFS2_SB(inode->i_sb), lockres,
698 OCFS2_LOCK_TYPE_FLOCK, &ocfs2_flock_lops, 698 OCFS2_LOCK_TYPE_FLOCK, &ocfs2_flock_lops,
699 fp); 699 fp);
700 lockres->l_flags |= OCFS2_LOCK_NOCACHE; 700 lockres->l_flags |= OCFS2_LOCK_NOCACHE;
701 } 701 }
702 702
703 void ocfs2_qinfo_lock_res_init(struct ocfs2_lock_res *lockres, 703 void ocfs2_qinfo_lock_res_init(struct ocfs2_lock_res *lockres,
704 struct ocfs2_mem_dqinfo *info) 704 struct ocfs2_mem_dqinfo *info)
705 { 705 {
706 ocfs2_lock_res_init_once(lockres); 706 ocfs2_lock_res_init_once(lockres);
707 ocfs2_build_lock_name(OCFS2_LOCK_TYPE_QINFO, info->dqi_gi.dqi_type, 707 ocfs2_build_lock_name(OCFS2_LOCK_TYPE_QINFO, info->dqi_gi.dqi_type,
708 0, lockres->l_name); 708 0, lockres->l_name);
709 ocfs2_lock_res_init_common(OCFS2_SB(info->dqi_gi.dqi_sb), lockres, 709 ocfs2_lock_res_init_common(OCFS2_SB(info->dqi_gi.dqi_sb), lockres,
710 OCFS2_LOCK_TYPE_QINFO, &ocfs2_qinfo_lops, 710 OCFS2_LOCK_TYPE_QINFO, &ocfs2_qinfo_lops,
711 info); 711 info);
712 } 712 }
713 713
714 void ocfs2_refcount_lock_res_init(struct ocfs2_lock_res *lockres, 714 void ocfs2_refcount_lock_res_init(struct ocfs2_lock_res *lockres,
715 struct ocfs2_super *osb, u64 ref_blkno, 715 struct ocfs2_super *osb, u64 ref_blkno,
716 unsigned int generation) 716 unsigned int generation)
717 { 717 {
718 ocfs2_lock_res_init_once(lockres); 718 ocfs2_lock_res_init_once(lockres);
719 ocfs2_build_lock_name(OCFS2_LOCK_TYPE_REFCOUNT, ref_blkno, 719 ocfs2_build_lock_name(OCFS2_LOCK_TYPE_REFCOUNT, ref_blkno,
720 generation, lockres->l_name); 720 generation, lockres->l_name);
721 ocfs2_lock_res_init_common(osb, lockres, OCFS2_LOCK_TYPE_REFCOUNT, 721 ocfs2_lock_res_init_common(osb, lockres, OCFS2_LOCK_TYPE_REFCOUNT,
722 &ocfs2_refcount_block_lops, osb); 722 &ocfs2_refcount_block_lops, osb);
723 } 723 }
724 724
725 void ocfs2_lock_res_free(struct ocfs2_lock_res *res) 725 void ocfs2_lock_res_free(struct ocfs2_lock_res *res)
726 { 726 {
727 mlog_entry_void(); 727 mlog_entry_void();
728 728
729 if (!(res->l_flags & OCFS2_LOCK_INITIALIZED)) 729 if (!(res->l_flags & OCFS2_LOCK_INITIALIZED))
730 return; 730 return;
731 731
732 ocfs2_remove_lockres_tracking(res); 732 ocfs2_remove_lockres_tracking(res);
733 733
734 mlog_bug_on_msg(!list_empty(&res->l_blocked_list), 734 mlog_bug_on_msg(!list_empty(&res->l_blocked_list),
735 "Lockres %s is on the blocked list\n", 735 "Lockres %s is on the blocked list\n",
736 res->l_name); 736 res->l_name);
737 mlog_bug_on_msg(!list_empty(&res->l_mask_waiters), 737 mlog_bug_on_msg(!list_empty(&res->l_mask_waiters),
738 "Lockres %s has mask waiters pending\n", 738 "Lockres %s has mask waiters pending\n",
739 res->l_name); 739 res->l_name);
740 mlog_bug_on_msg(spin_is_locked(&res->l_lock), 740 mlog_bug_on_msg(spin_is_locked(&res->l_lock),
741 "Lockres %s is locked\n", 741 "Lockres %s is locked\n",
742 res->l_name); 742 res->l_name);
743 mlog_bug_on_msg(res->l_ro_holders, 743 mlog_bug_on_msg(res->l_ro_holders,
744 "Lockres %s has %u ro holders\n", 744 "Lockres %s has %u ro holders\n",
745 res->l_name, res->l_ro_holders); 745 res->l_name, res->l_ro_holders);
746 mlog_bug_on_msg(res->l_ex_holders, 746 mlog_bug_on_msg(res->l_ex_holders,
747 "Lockres %s has %u ex holders\n", 747 "Lockres %s has %u ex holders\n",
748 res->l_name, res->l_ex_holders); 748 res->l_name, res->l_ex_holders);
749 749
750 /* Need to clear out the lock status block for the dlm */ 750 /* Need to clear out the lock status block for the dlm */
751 memset(&res->l_lksb, 0, sizeof(res->l_lksb)); 751 memset(&res->l_lksb, 0, sizeof(res->l_lksb));
752 752
753 res->l_flags = 0UL; 753 res->l_flags = 0UL;
754 mlog_exit_void(); 754 mlog_exit_void();
755 } 755 }
756 756
757 static inline void ocfs2_inc_holders(struct ocfs2_lock_res *lockres, 757 static inline void ocfs2_inc_holders(struct ocfs2_lock_res *lockres,
758 int level) 758 int level)
759 { 759 {
760 mlog_entry_void(); 760 mlog_entry_void();
761 761
762 BUG_ON(!lockres); 762 BUG_ON(!lockres);
763 763
764 switch(level) { 764 switch(level) {
765 case DLM_LOCK_EX: 765 case DLM_LOCK_EX:
766 lockres->l_ex_holders++; 766 lockres->l_ex_holders++;
767 break; 767 break;
768 case DLM_LOCK_PR: 768 case DLM_LOCK_PR:
769 lockres->l_ro_holders++; 769 lockres->l_ro_holders++;
770 break; 770 break;
771 default: 771 default:
772 BUG(); 772 BUG();
773 } 773 }
774 774
775 mlog_exit_void(); 775 mlog_exit_void();
776 } 776 }
777 777
778 static inline void ocfs2_dec_holders(struct ocfs2_lock_res *lockres, 778 static inline void ocfs2_dec_holders(struct ocfs2_lock_res *lockres,
779 int level) 779 int level)
780 { 780 {
781 mlog_entry_void(); 781 mlog_entry_void();
782 782
783 BUG_ON(!lockres); 783 BUG_ON(!lockres);
784 784
785 switch(level) { 785 switch(level) {
786 case DLM_LOCK_EX: 786 case DLM_LOCK_EX:
787 BUG_ON(!lockres->l_ex_holders); 787 BUG_ON(!lockres->l_ex_holders);
788 lockres->l_ex_holders--; 788 lockres->l_ex_holders--;
789 break; 789 break;
790 case DLM_LOCK_PR: 790 case DLM_LOCK_PR:
791 BUG_ON(!lockres->l_ro_holders); 791 BUG_ON(!lockres->l_ro_holders);
792 lockres->l_ro_holders--; 792 lockres->l_ro_holders--;
793 break; 793 break;
794 default: 794 default:
795 BUG(); 795 BUG();
796 } 796 }
797 mlog_exit_void(); 797 mlog_exit_void();
798 } 798 }
799 799
800 /* WARNING: This function lives in a world where the only three lock 800 /* WARNING: This function lives in a world where the only three lock
801 * levels are EX, PR, and NL. It *will* have to be adjusted when more 801 * levels are EX, PR, and NL. It *will* have to be adjusted when more
802 * lock types are added. */ 802 * lock types are added. */
803 static inline int ocfs2_highest_compat_lock_level(int level) 803 static inline int ocfs2_highest_compat_lock_level(int level)
804 { 804 {
805 int new_level = DLM_LOCK_EX; 805 int new_level = DLM_LOCK_EX;
806 806
807 if (level == DLM_LOCK_EX) 807 if (level == DLM_LOCK_EX)
808 new_level = DLM_LOCK_NL; 808 new_level = DLM_LOCK_NL;
809 else if (level == DLM_LOCK_PR) 809 else if (level == DLM_LOCK_PR)
810 new_level = DLM_LOCK_PR; 810 new_level = DLM_LOCK_PR;
811 return new_level; 811 return new_level;
812 } 812 }
813 813
814 static void lockres_set_flags(struct ocfs2_lock_res *lockres, 814 static void lockres_set_flags(struct ocfs2_lock_res *lockres,
815 unsigned long newflags) 815 unsigned long newflags)
816 { 816 {
817 struct ocfs2_mask_waiter *mw, *tmp; 817 struct ocfs2_mask_waiter *mw, *tmp;
818 818
819 assert_spin_locked(&lockres->l_lock); 819 assert_spin_locked(&lockres->l_lock);
820 820
821 lockres->l_flags = newflags; 821 lockres->l_flags = newflags;
822 822
823 list_for_each_entry_safe(mw, tmp, &lockres->l_mask_waiters, mw_item) { 823 list_for_each_entry_safe(mw, tmp, &lockres->l_mask_waiters, mw_item) {
824 if ((lockres->l_flags & mw->mw_mask) != mw->mw_goal) 824 if ((lockres->l_flags & mw->mw_mask) != mw->mw_goal)
825 continue; 825 continue;
826 826
827 list_del_init(&mw->mw_item); 827 list_del_init(&mw->mw_item);
828 mw->mw_status = 0; 828 mw->mw_status = 0;
829 complete(&mw->mw_complete); 829 complete(&mw->mw_complete);
830 } 830 }
831 } 831 }
832 static void lockres_or_flags(struct ocfs2_lock_res *lockres, unsigned long or) 832 static void lockres_or_flags(struct ocfs2_lock_res *lockres, unsigned long or)
833 { 833 {
834 lockres_set_flags(lockres, lockres->l_flags | or); 834 lockres_set_flags(lockres, lockres->l_flags | or);
835 } 835 }
836 static void lockres_clear_flags(struct ocfs2_lock_res *lockres, 836 static void lockres_clear_flags(struct ocfs2_lock_res *lockres,
837 unsigned long clear) 837 unsigned long clear)
838 { 838 {
839 lockres_set_flags(lockres, lockres->l_flags & ~clear); 839 lockres_set_flags(lockres, lockres->l_flags & ~clear);
840 } 840 }
841 841
842 static inline void ocfs2_generic_handle_downconvert_action(struct ocfs2_lock_res *lockres) 842 static inline void ocfs2_generic_handle_downconvert_action(struct ocfs2_lock_res *lockres)
843 { 843 {
844 mlog_entry_void(); 844 mlog_entry_void();
845 845
846 BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BUSY)); 846 BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BUSY));
847 BUG_ON(!(lockres->l_flags & OCFS2_LOCK_ATTACHED)); 847 BUG_ON(!(lockres->l_flags & OCFS2_LOCK_ATTACHED));
848 BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BLOCKED)); 848 BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BLOCKED));
849 BUG_ON(lockres->l_blocking <= DLM_LOCK_NL); 849 BUG_ON(lockres->l_blocking <= DLM_LOCK_NL);
850 850
851 lockres->l_level = lockres->l_requested; 851 lockres->l_level = lockres->l_requested;
852 if (lockres->l_level <= 852 if (lockres->l_level <=
853 ocfs2_highest_compat_lock_level(lockres->l_blocking)) { 853 ocfs2_highest_compat_lock_level(lockres->l_blocking)) {
854 lockres->l_blocking = DLM_LOCK_NL; 854 lockres->l_blocking = DLM_LOCK_NL;
855 lockres_clear_flags(lockres, OCFS2_LOCK_BLOCKED); 855 lockres_clear_flags(lockres, OCFS2_LOCK_BLOCKED);
856 } 856 }
857 lockres_clear_flags(lockres, OCFS2_LOCK_BUSY); 857 lockres_clear_flags(lockres, OCFS2_LOCK_BUSY);
858 858
859 mlog_exit_void(); 859 mlog_exit_void();
860 } 860 }
861 861
862 static inline void ocfs2_generic_handle_convert_action(struct ocfs2_lock_res *lockres) 862 static inline void ocfs2_generic_handle_convert_action(struct ocfs2_lock_res *lockres)
863 { 863 {
864 mlog_entry_void(); 864 mlog_entry_void();
865 865
866 BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BUSY)); 866 BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BUSY));
867 BUG_ON(!(lockres->l_flags & OCFS2_LOCK_ATTACHED)); 867 BUG_ON(!(lockres->l_flags & OCFS2_LOCK_ATTACHED));
868 868
869 /* Convert from RO to EX doesn't really need anything as our 869 /* Convert from RO to EX doesn't really need anything as our
870 * information is already up to data. Convert from NL to 870 * information is already up to data. Convert from NL to
871 * *anything* however should mark ourselves as needing an 871 * *anything* however should mark ourselves as needing an
872 * update */ 872 * update */
873 if (lockres->l_level == DLM_LOCK_NL && 873 if (lockres->l_level == DLM_LOCK_NL &&
874 lockres->l_ops->flags & LOCK_TYPE_REQUIRES_REFRESH) 874 lockres->l_ops->flags & LOCK_TYPE_REQUIRES_REFRESH)
875 lockres_or_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH); 875 lockres_or_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH);
876 876
877 lockres->l_level = lockres->l_requested; 877 lockres->l_level = lockres->l_requested;
878
879 /*
880 * We set the OCFS2_LOCK_UPCONVERT_FINISHING flag before clearing
881 * the OCFS2_LOCK_BUSY flag to prevent the dc thread from
882 * downconverting the lock before the upconvert has fully completed.
883 */
884 lockres_or_flags(lockres, OCFS2_LOCK_UPCONVERT_FINISHING);
885
878 lockres_clear_flags(lockres, OCFS2_LOCK_BUSY); 886 lockres_clear_flags(lockres, OCFS2_LOCK_BUSY);
879 887
880 mlog_exit_void(); 888 mlog_exit_void();
881 } 889 }
882 890
883 static inline void ocfs2_generic_handle_attach_action(struct ocfs2_lock_res *lockres) 891 static inline void ocfs2_generic_handle_attach_action(struct ocfs2_lock_res *lockres)
884 { 892 {
885 mlog_entry_void(); 893 mlog_entry_void();
886 894
887 BUG_ON((!(lockres->l_flags & OCFS2_LOCK_BUSY))); 895 BUG_ON((!(lockres->l_flags & OCFS2_LOCK_BUSY)));
888 BUG_ON(lockres->l_flags & OCFS2_LOCK_ATTACHED); 896 BUG_ON(lockres->l_flags & OCFS2_LOCK_ATTACHED);
889 897
890 if (lockres->l_requested > DLM_LOCK_NL && 898 if (lockres->l_requested > DLM_LOCK_NL &&
891 !(lockres->l_flags & OCFS2_LOCK_LOCAL) && 899 !(lockres->l_flags & OCFS2_LOCK_LOCAL) &&
892 lockres->l_ops->flags & LOCK_TYPE_REQUIRES_REFRESH) 900 lockres->l_ops->flags & LOCK_TYPE_REQUIRES_REFRESH)
893 lockres_or_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH); 901 lockres_or_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH);
894 902
895 lockres->l_level = lockres->l_requested; 903 lockres->l_level = lockres->l_requested;
896 lockres_or_flags(lockres, OCFS2_LOCK_ATTACHED); 904 lockres_or_flags(lockres, OCFS2_LOCK_ATTACHED);
897 lockres_clear_flags(lockres, OCFS2_LOCK_BUSY); 905 lockres_clear_flags(lockres, OCFS2_LOCK_BUSY);
898 906
899 mlog_exit_void(); 907 mlog_exit_void();
900 } 908 }
901 909
902 static int ocfs2_generic_handle_bast(struct ocfs2_lock_res *lockres, 910 static int ocfs2_generic_handle_bast(struct ocfs2_lock_res *lockres,
903 int level) 911 int level)
904 { 912 {
905 int needs_downconvert = 0; 913 int needs_downconvert = 0;
906 mlog_entry_void(); 914 mlog_entry_void();
907 915
908 assert_spin_locked(&lockres->l_lock); 916 assert_spin_locked(&lockres->l_lock);
909 917
910 if (level > lockres->l_blocking) { 918 if (level > lockres->l_blocking) {
911 /* only schedule a downconvert if we haven't already scheduled 919 /* only schedule a downconvert if we haven't already scheduled
912 * one that goes low enough to satisfy the level we're 920 * one that goes low enough to satisfy the level we're
913 * blocking. this also catches the case where we get 921 * blocking. this also catches the case where we get
914 * duplicate BASTs */ 922 * duplicate BASTs */
915 if (ocfs2_highest_compat_lock_level(level) < 923 if (ocfs2_highest_compat_lock_level(level) <
916 ocfs2_highest_compat_lock_level(lockres->l_blocking)) 924 ocfs2_highest_compat_lock_level(lockres->l_blocking))
917 needs_downconvert = 1; 925 needs_downconvert = 1;
918 926
919 lockres->l_blocking = level; 927 lockres->l_blocking = level;
920 } 928 }
921 929
922 if (needs_downconvert) 930 if (needs_downconvert)
923 lockres_or_flags(lockres, OCFS2_LOCK_BLOCKED); 931 lockres_or_flags(lockres, OCFS2_LOCK_BLOCKED);
924 932
925 mlog_exit(needs_downconvert); 933 mlog_exit(needs_downconvert);
926 return needs_downconvert; 934 return needs_downconvert;
927 } 935 }
928 936
929 /* 937 /*
930 * OCFS2_LOCK_PENDING and l_pending_gen. 938 * OCFS2_LOCK_PENDING and l_pending_gen.
931 * 939 *
932 * Why does OCFS2_LOCK_PENDING exist? To close a race between setting 940 * Why does OCFS2_LOCK_PENDING exist? To close a race between setting
933 * OCFS2_LOCK_BUSY and calling ocfs2_dlm_lock(). See ocfs2_unblock_lock() 941 * OCFS2_LOCK_BUSY and calling ocfs2_dlm_lock(). See ocfs2_unblock_lock()
934 * for more details on the race. 942 * for more details on the race.
935 * 943 *
936 * OCFS2_LOCK_PENDING closes the race quite nicely. However, it introduces 944 * OCFS2_LOCK_PENDING closes the race quite nicely. However, it introduces
937 * a race on itself. In o2dlm, we can get the ast before ocfs2_dlm_lock() 945 * a race on itself. In o2dlm, we can get the ast before ocfs2_dlm_lock()
938 * returns. The ast clears OCFS2_LOCK_BUSY, and must therefore clear 946 * returns. The ast clears OCFS2_LOCK_BUSY, and must therefore clear
939 * OCFS2_LOCK_PENDING at the same time. When ocfs2_dlm_lock() returns, 947 * OCFS2_LOCK_PENDING at the same time. When ocfs2_dlm_lock() returns,
940 * the caller is going to try to clear PENDING again. If nothing else is 948 * the caller is going to try to clear PENDING again. If nothing else is
941 * happening, __lockres_clear_pending() sees PENDING is unset and does 949 * happening, __lockres_clear_pending() sees PENDING is unset and does
942 * nothing. 950 * nothing.
943 * 951 *
944 * But what if another path (eg downconvert thread) has just started a 952 * But what if another path (eg downconvert thread) has just started a
945 * new locking action? The other path has re-set PENDING. Our path 953 * new locking action? The other path has re-set PENDING. Our path
946 * cannot clear PENDING, because that will re-open the original race 954 * cannot clear PENDING, because that will re-open the original race
947 * window. 955 * window.
948 * 956 *
949 * [Example] 957 * [Example]
950 * 958 *
951 * ocfs2_meta_lock() 959 * ocfs2_meta_lock()
952 * ocfs2_cluster_lock() 960 * ocfs2_cluster_lock()
953 * set BUSY 961 * set BUSY
954 * set PENDING 962 * set PENDING
955 * drop l_lock 963 * drop l_lock
956 * ocfs2_dlm_lock() 964 * ocfs2_dlm_lock()
957 * ocfs2_locking_ast() ocfs2_downconvert_thread() 965 * ocfs2_locking_ast() ocfs2_downconvert_thread()
958 * clear PENDING ocfs2_unblock_lock() 966 * clear PENDING ocfs2_unblock_lock()
959 * take_l_lock 967 * take_l_lock
960 * !BUSY 968 * !BUSY
961 * ocfs2_prepare_downconvert() 969 * ocfs2_prepare_downconvert()
962 * set BUSY 970 * set BUSY
963 * set PENDING 971 * set PENDING
964 * drop l_lock 972 * drop l_lock
965 * take l_lock 973 * take l_lock
966 * clear PENDING 974 * clear PENDING
967 * drop l_lock 975 * drop l_lock
968 * <window> 976 * <window>
969 * ocfs2_dlm_lock() 977 * ocfs2_dlm_lock()
970 * 978 *
971 * So as you can see, we now have a window where l_lock is not held, 979 * So as you can see, we now have a window where l_lock is not held,
972 * PENDING is not set, and ocfs2_dlm_lock() has not been called. 980 * PENDING is not set, and ocfs2_dlm_lock() has not been called.
973 * 981 *
974 * The core problem is that ocfs2_cluster_lock() has cleared the PENDING 982 * The core problem is that ocfs2_cluster_lock() has cleared the PENDING
975 * set by ocfs2_prepare_downconvert(). That wasn't nice. 983 * set by ocfs2_prepare_downconvert(). That wasn't nice.
976 * 984 *
977 * To solve this we introduce l_pending_gen. A call to 985 * To solve this we introduce l_pending_gen. A call to
978 * lockres_clear_pending() will only do so when it is passed a generation 986 * lockres_clear_pending() will only do so when it is passed a generation
979 * number that matches the lockres. lockres_set_pending() will return the 987 * number that matches the lockres. lockres_set_pending() will return the
980 * current generation number. When ocfs2_cluster_lock() goes to clear 988 * current generation number. When ocfs2_cluster_lock() goes to clear
981 * PENDING, it passes the generation it got from set_pending(). In our 989 * PENDING, it passes the generation it got from set_pending(). In our
982 * example above, the generation numbers will *not* match. Thus, 990 * example above, the generation numbers will *not* match. Thus,
983 * ocfs2_cluster_lock() will not clear the PENDING set by 991 * ocfs2_cluster_lock() will not clear the PENDING set by
984 * ocfs2_prepare_downconvert(). 992 * ocfs2_prepare_downconvert().
985 */ 993 */
986 994
987 /* Unlocked version for ocfs2_locking_ast() */ 995 /* Unlocked version for ocfs2_locking_ast() */
988 static void __lockres_clear_pending(struct ocfs2_lock_res *lockres, 996 static void __lockres_clear_pending(struct ocfs2_lock_res *lockres,
989 unsigned int generation, 997 unsigned int generation,
990 struct ocfs2_super *osb) 998 struct ocfs2_super *osb)
991 { 999 {
992 assert_spin_locked(&lockres->l_lock); 1000 assert_spin_locked(&lockres->l_lock);
993 1001
994 /* 1002 /*
995 * The ast and locking functions can race us here. The winner 1003 * The ast and locking functions can race us here. The winner
996 * will clear pending, the loser will not. 1004 * will clear pending, the loser will not.
997 */ 1005 */
998 if (!(lockres->l_flags & OCFS2_LOCK_PENDING) || 1006 if (!(lockres->l_flags & OCFS2_LOCK_PENDING) ||
999 (lockres->l_pending_gen != generation)) 1007 (lockres->l_pending_gen != generation))
1000 return; 1008 return;
1001 1009
1002 lockres_clear_flags(lockres, OCFS2_LOCK_PENDING); 1010 lockres_clear_flags(lockres, OCFS2_LOCK_PENDING);
1003 lockres->l_pending_gen++; 1011 lockres->l_pending_gen++;
1004 1012
1005 /* 1013 /*
1006 * The downconvert thread may have skipped us because we 1014 * The downconvert thread may have skipped us because we
1007 * were PENDING. Wake it up. 1015 * were PENDING. Wake it up.
1008 */ 1016 */
1009 if (lockres->l_flags & OCFS2_LOCK_BLOCKED) 1017 if (lockres->l_flags & OCFS2_LOCK_BLOCKED)
1010 ocfs2_wake_downconvert_thread(osb); 1018 ocfs2_wake_downconvert_thread(osb);
1011 } 1019 }
1012 1020
1013 /* Locked version for callers of ocfs2_dlm_lock() */ 1021 /* Locked version for callers of ocfs2_dlm_lock() */
1014 static void lockres_clear_pending(struct ocfs2_lock_res *lockres, 1022 static void lockres_clear_pending(struct ocfs2_lock_res *lockres,
1015 unsigned int generation, 1023 unsigned int generation,
1016 struct ocfs2_super *osb) 1024 struct ocfs2_super *osb)
1017 { 1025 {
1018 unsigned long flags; 1026 unsigned long flags;
1019 1027
1020 spin_lock_irqsave(&lockres->l_lock, flags); 1028 spin_lock_irqsave(&lockres->l_lock, flags);
1021 __lockres_clear_pending(lockres, generation, osb); 1029 __lockres_clear_pending(lockres, generation, osb);
1022 spin_unlock_irqrestore(&lockres->l_lock, flags); 1030 spin_unlock_irqrestore(&lockres->l_lock, flags);
1023 } 1031 }
1024 1032
1025 static unsigned int lockres_set_pending(struct ocfs2_lock_res *lockres) 1033 static unsigned int lockres_set_pending(struct ocfs2_lock_res *lockres)
1026 { 1034 {
1027 assert_spin_locked(&lockres->l_lock); 1035 assert_spin_locked(&lockres->l_lock);
1028 BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BUSY)); 1036 BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BUSY));
1029 1037
1030 lockres_or_flags(lockres, OCFS2_LOCK_PENDING); 1038 lockres_or_flags(lockres, OCFS2_LOCK_PENDING);
1031 1039
1032 return lockres->l_pending_gen; 1040 return lockres->l_pending_gen;
1033 } 1041 }
1034 1042
1035 1043
1036 static void ocfs2_blocking_ast(void *opaque, int level) 1044 static void ocfs2_blocking_ast(void *opaque, int level)
1037 { 1045 {
1038 struct ocfs2_lock_res *lockres = opaque; 1046 struct ocfs2_lock_res *lockres = opaque;
1039 struct ocfs2_super *osb = ocfs2_get_lockres_osb(lockres); 1047 struct ocfs2_super *osb = ocfs2_get_lockres_osb(lockres);
1040 int needs_downconvert; 1048 int needs_downconvert;
1041 unsigned long flags; 1049 unsigned long flags;
1042 1050
1043 BUG_ON(level <= DLM_LOCK_NL); 1051 BUG_ON(level <= DLM_LOCK_NL);
1044 1052
1045 mlog(0, "BAST fired for lockres %s, blocking %d, level %d type %s\n", 1053 mlog(0, "BAST fired for lockres %s, blocking %d, level %d type %s\n",
1046 lockres->l_name, level, lockres->l_level, 1054 lockres->l_name, level, lockres->l_level,
1047 ocfs2_lock_type_string(lockres->l_type)); 1055 ocfs2_lock_type_string(lockres->l_type));
1048 1056
1049 /* 1057 /*
1050 * We can skip the bast for locks which don't enable caching - 1058 * We can skip the bast for locks which don't enable caching -
1051 * they'll be dropped at the earliest possible time anyway. 1059 * they'll be dropped at the earliest possible time anyway.
1052 */ 1060 */
1053 if (lockres->l_flags & OCFS2_LOCK_NOCACHE) 1061 if (lockres->l_flags & OCFS2_LOCK_NOCACHE)
1054 return; 1062 return;
1055 1063
1056 spin_lock_irqsave(&lockres->l_lock, flags); 1064 spin_lock_irqsave(&lockres->l_lock, flags);
1057 needs_downconvert = ocfs2_generic_handle_bast(lockres, level); 1065 needs_downconvert = ocfs2_generic_handle_bast(lockres, level);
1058 if (needs_downconvert) 1066 if (needs_downconvert)
1059 ocfs2_schedule_blocked_lock(osb, lockres); 1067 ocfs2_schedule_blocked_lock(osb, lockres);
1060 spin_unlock_irqrestore(&lockres->l_lock, flags); 1068 spin_unlock_irqrestore(&lockres->l_lock, flags);
1061 1069
1062 wake_up(&lockres->l_event); 1070 wake_up(&lockres->l_event);
1063 1071
1064 ocfs2_wake_downconvert_thread(osb); 1072 ocfs2_wake_downconvert_thread(osb);
1065 } 1073 }
1066 1074
1067 static void ocfs2_locking_ast(void *opaque) 1075 static void ocfs2_locking_ast(void *opaque)
1068 { 1076 {
1069 struct ocfs2_lock_res *lockres = opaque; 1077 struct ocfs2_lock_res *lockres = opaque;
1070 struct ocfs2_super *osb = ocfs2_get_lockres_osb(lockres); 1078 struct ocfs2_super *osb = ocfs2_get_lockres_osb(lockres);
1071 unsigned long flags; 1079 unsigned long flags;
1072 int status; 1080 int status;
1073 1081
1074 spin_lock_irqsave(&lockres->l_lock, flags); 1082 spin_lock_irqsave(&lockres->l_lock, flags);
1075 1083
1076 status = ocfs2_dlm_lock_status(&lockres->l_lksb); 1084 status = ocfs2_dlm_lock_status(&lockres->l_lksb);
1077 1085
1078 if (status == -EAGAIN) { 1086 if (status == -EAGAIN) {
1079 lockres_clear_flags(lockres, OCFS2_LOCK_BUSY); 1087 lockres_clear_flags(lockres, OCFS2_LOCK_BUSY);
1080 goto out; 1088 goto out;
1081 } 1089 }
1082 1090
1083 if (status) { 1091 if (status) {
1084 mlog(ML_ERROR, "lockres %s: lksb status value of %d!\n", 1092 mlog(ML_ERROR, "lockres %s: lksb status value of %d!\n",
1085 lockres->l_name, status); 1093 lockres->l_name, status);
1086 spin_unlock_irqrestore(&lockres->l_lock, flags); 1094 spin_unlock_irqrestore(&lockres->l_lock, flags);
1087 return; 1095 return;
1088 } 1096 }
1089 1097
1090 switch(lockres->l_action) { 1098 switch(lockres->l_action) {
1091 case OCFS2_AST_ATTACH: 1099 case OCFS2_AST_ATTACH:
1092 ocfs2_generic_handle_attach_action(lockres); 1100 ocfs2_generic_handle_attach_action(lockres);
1093 lockres_clear_flags(lockres, OCFS2_LOCK_LOCAL); 1101 lockres_clear_flags(lockres, OCFS2_LOCK_LOCAL);
1094 break; 1102 break;
1095 case OCFS2_AST_CONVERT: 1103 case OCFS2_AST_CONVERT:
1096 ocfs2_generic_handle_convert_action(lockres); 1104 ocfs2_generic_handle_convert_action(lockres);
1097 break; 1105 break;
1098 case OCFS2_AST_DOWNCONVERT: 1106 case OCFS2_AST_DOWNCONVERT:
1099 ocfs2_generic_handle_downconvert_action(lockres); 1107 ocfs2_generic_handle_downconvert_action(lockres);
1100 break; 1108 break;
1101 default: 1109 default:
1102 mlog(ML_ERROR, "lockres %s: ast fired with invalid action: %u " 1110 mlog(ML_ERROR, "lockres %s: ast fired with invalid action: %u "
1103 "lockres flags = 0x%lx, unlock action: %u\n", 1111 "lockres flags = 0x%lx, unlock action: %u\n",
1104 lockres->l_name, lockres->l_action, lockres->l_flags, 1112 lockres->l_name, lockres->l_action, lockres->l_flags,
1105 lockres->l_unlock_action); 1113 lockres->l_unlock_action);
1106 BUG(); 1114 BUG();
1107 } 1115 }
1108 out: 1116 out:
1109 /* set it to something invalid so if we get called again we 1117 /* set it to something invalid so if we get called again we
1110 * can catch it. */ 1118 * can catch it. */
1111 lockres->l_action = OCFS2_AST_INVALID; 1119 lockres->l_action = OCFS2_AST_INVALID;
1112 1120
1113 /* Did we try to cancel this lock? Clear that state */ 1121 /* Did we try to cancel this lock? Clear that state */
1114 if (lockres->l_unlock_action == OCFS2_UNLOCK_CANCEL_CONVERT) 1122 if (lockres->l_unlock_action == OCFS2_UNLOCK_CANCEL_CONVERT)
1115 lockres->l_unlock_action = OCFS2_UNLOCK_INVALID; 1123 lockres->l_unlock_action = OCFS2_UNLOCK_INVALID;
1116 1124
1117 /* 1125 /*
1118 * We may have beaten the locking functions here. We certainly 1126 * We may have beaten the locking functions here. We certainly
1119 * know that dlm_lock() has been called :-) 1127 * know that dlm_lock() has been called :-)
1120 * Because we can't have two lock calls in flight at once, we 1128 * Because we can't have two lock calls in flight at once, we
1121 * can use lockres->l_pending_gen. 1129 * can use lockres->l_pending_gen.
1122 */ 1130 */
1123 __lockres_clear_pending(lockres, lockres->l_pending_gen, osb); 1131 __lockres_clear_pending(lockres, lockres->l_pending_gen, osb);
1124 1132
1125 wake_up(&lockres->l_event); 1133 wake_up(&lockres->l_event);
1126 spin_unlock_irqrestore(&lockres->l_lock, flags); 1134 spin_unlock_irqrestore(&lockres->l_lock, flags);
1127 } 1135 }
1128 1136
1129 static inline void ocfs2_recover_from_dlm_error(struct ocfs2_lock_res *lockres, 1137 static inline void ocfs2_recover_from_dlm_error(struct ocfs2_lock_res *lockres,
1130 int convert) 1138 int convert)
1131 { 1139 {
1132 unsigned long flags; 1140 unsigned long flags;
1133 1141
1134 mlog_entry_void(); 1142 mlog_entry_void();
1135 spin_lock_irqsave(&lockres->l_lock, flags); 1143 spin_lock_irqsave(&lockres->l_lock, flags);
1136 lockres_clear_flags(lockres, OCFS2_LOCK_BUSY); 1144 lockres_clear_flags(lockres, OCFS2_LOCK_BUSY);
1145 lockres_clear_flags(lockres, OCFS2_LOCK_UPCONVERT_FINISHING);
1137 if (convert) 1146 if (convert)
1138 lockres->l_action = OCFS2_AST_INVALID; 1147 lockres->l_action = OCFS2_AST_INVALID;
1139 else 1148 else
1140 lockres->l_unlock_action = OCFS2_UNLOCK_INVALID; 1149 lockres->l_unlock_action = OCFS2_UNLOCK_INVALID;
1141 spin_unlock_irqrestore(&lockres->l_lock, flags); 1150 spin_unlock_irqrestore(&lockres->l_lock, flags);
1142 1151
1143 wake_up(&lockres->l_event); 1152 wake_up(&lockres->l_event);
1144 mlog_exit_void(); 1153 mlog_exit_void();
1145 } 1154 }
1146 1155
1147 /* Note: If we detect another process working on the lock (i.e., 1156 /* Note: If we detect another process working on the lock (i.e.,
1148 * OCFS2_LOCK_BUSY), we'll bail out returning 0. It's up to the caller 1157 * OCFS2_LOCK_BUSY), we'll bail out returning 0. It's up to the caller
1149 * to do the right thing in that case. 1158 * to do the right thing in that case.
1150 */ 1159 */
1151 static int ocfs2_lock_create(struct ocfs2_super *osb, 1160 static int ocfs2_lock_create(struct ocfs2_super *osb,
1152 struct ocfs2_lock_res *lockres, 1161 struct ocfs2_lock_res *lockres,
1153 int level, 1162 int level,
1154 u32 dlm_flags) 1163 u32 dlm_flags)
1155 { 1164 {
1156 int ret = 0; 1165 int ret = 0;
1157 unsigned long flags; 1166 unsigned long flags;
1158 unsigned int gen; 1167 unsigned int gen;
1159 1168
1160 mlog_entry_void(); 1169 mlog_entry_void();
1161 1170
1162 mlog(0, "lock %s, level = %d, flags = %u\n", lockres->l_name, level, 1171 mlog(0, "lock %s, level = %d, flags = %u\n", lockres->l_name, level,
1163 dlm_flags); 1172 dlm_flags);
1164 1173
1165 spin_lock_irqsave(&lockres->l_lock, flags); 1174 spin_lock_irqsave(&lockres->l_lock, flags);
1166 if ((lockres->l_flags & OCFS2_LOCK_ATTACHED) || 1175 if ((lockres->l_flags & OCFS2_LOCK_ATTACHED) ||
1167 (lockres->l_flags & OCFS2_LOCK_BUSY)) { 1176 (lockres->l_flags & OCFS2_LOCK_BUSY)) {
1168 spin_unlock_irqrestore(&lockres->l_lock, flags); 1177 spin_unlock_irqrestore(&lockres->l_lock, flags);
1169 goto bail; 1178 goto bail;
1170 } 1179 }
1171 1180
1172 lockres->l_action = OCFS2_AST_ATTACH; 1181 lockres->l_action = OCFS2_AST_ATTACH;
1173 lockres->l_requested = level; 1182 lockres->l_requested = level;
1174 lockres_or_flags(lockres, OCFS2_LOCK_BUSY); 1183 lockres_or_flags(lockres, OCFS2_LOCK_BUSY);
1175 gen = lockres_set_pending(lockres); 1184 gen = lockres_set_pending(lockres);
1176 spin_unlock_irqrestore(&lockres->l_lock, flags); 1185 spin_unlock_irqrestore(&lockres->l_lock, flags);
1177 1186
1178 ret = ocfs2_dlm_lock(osb->cconn, 1187 ret = ocfs2_dlm_lock(osb->cconn,
1179 level, 1188 level,
1180 &lockres->l_lksb, 1189 &lockres->l_lksb,
1181 dlm_flags, 1190 dlm_flags,
1182 lockres->l_name, 1191 lockres->l_name,
1183 OCFS2_LOCK_ID_MAX_LEN - 1, 1192 OCFS2_LOCK_ID_MAX_LEN - 1,
1184 lockres); 1193 lockres);
1185 lockres_clear_pending(lockres, gen, osb); 1194 lockres_clear_pending(lockres, gen, osb);
1186 if (ret) { 1195 if (ret) {
1187 ocfs2_log_dlm_error("ocfs2_dlm_lock", ret, lockres); 1196 ocfs2_log_dlm_error("ocfs2_dlm_lock", ret, lockres);
1188 ocfs2_recover_from_dlm_error(lockres, 1); 1197 ocfs2_recover_from_dlm_error(lockres, 1);
1189 } 1198 }
1190 1199
1191 mlog(0, "lock %s, return from ocfs2_dlm_lock\n", lockres->l_name); 1200 mlog(0, "lock %s, return from ocfs2_dlm_lock\n", lockres->l_name);
1192 1201
1193 bail: 1202 bail:
1194 mlog_exit(ret); 1203 mlog_exit(ret);
1195 return ret; 1204 return ret;
1196 } 1205 }
1197 1206
1198 static inline int ocfs2_check_wait_flag(struct ocfs2_lock_res *lockres, 1207 static inline int ocfs2_check_wait_flag(struct ocfs2_lock_res *lockres,
1199 int flag) 1208 int flag)
1200 { 1209 {
1201 unsigned long flags; 1210 unsigned long flags;
1202 int ret; 1211 int ret;
1203 1212
1204 spin_lock_irqsave(&lockres->l_lock, flags); 1213 spin_lock_irqsave(&lockres->l_lock, flags);
1205 ret = lockres->l_flags & flag; 1214 ret = lockres->l_flags & flag;
1206 spin_unlock_irqrestore(&lockres->l_lock, flags); 1215 spin_unlock_irqrestore(&lockres->l_lock, flags);
1207 1216
1208 return ret; 1217 return ret;
1209 } 1218 }
1210 1219
1211 static inline void ocfs2_wait_on_busy_lock(struct ocfs2_lock_res *lockres) 1220 static inline void ocfs2_wait_on_busy_lock(struct ocfs2_lock_res *lockres)
1212 1221
1213 { 1222 {
1214 wait_event(lockres->l_event, 1223 wait_event(lockres->l_event,
1215 !ocfs2_check_wait_flag(lockres, OCFS2_LOCK_BUSY)); 1224 !ocfs2_check_wait_flag(lockres, OCFS2_LOCK_BUSY));
1216 } 1225 }
1217 1226
1218 static inline void ocfs2_wait_on_refreshing_lock(struct ocfs2_lock_res *lockres) 1227 static inline void ocfs2_wait_on_refreshing_lock(struct ocfs2_lock_res *lockres)
1219 1228
1220 { 1229 {
1221 wait_event(lockres->l_event, 1230 wait_event(lockres->l_event,
1222 !ocfs2_check_wait_flag(lockres, OCFS2_LOCK_REFRESHING)); 1231 !ocfs2_check_wait_flag(lockres, OCFS2_LOCK_REFRESHING));
1223 } 1232 }
1224 1233
1225 /* predict what lock level we'll be dropping down to on behalf 1234 /* predict what lock level we'll be dropping down to on behalf
1226 * of another node, and return true if the currently wanted 1235 * of another node, and return true if the currently wanted
1227 * level will be compatible with it. */ 1236 * level will be compatible with it. */
1228 static inline int ocfs2_may_continue_on_blocked_lock(struct ocfs2_lock_res *lockres, 1237 static inline int ocfs2_may_continue_on_blocked_lock(struct ocfs2_lock_res *lockres,
1229 int wanted) 1238 int wanted)
1230 { 1239 {
1231 BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BLOCKED)); 1240 BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BLOCKED));
1232 1241
1233 return wanted <= ocfs2_highest_compat_lock_level(lockres->l_blocking); 1242 return wanted <= ocfs2_highest_compat_lock_level(lockres->l_blocking);
1234 } 1243 }
1235 1244
1236 static void ocfs2_init_mask_waiter(struct ocfs2_mask_waiter *mw) 1245 static void ocfs2_init_mask_waiter(struct ocfs2_mask_waiter *mw)
1237 { 1246 {
1238 INIT_LIST_HEAD(&mw->mw_item); 1247 INIT_LIST_HEAD(&mw->mw_item);
1239 init_completion(&mw->mw_complete); 1248 init_completion(&mw->mw_complete);
1240 ocfs2_init_start_time(mw); 1249 ocfs2_init_start_time(mw);
1241 } 1250 }
1242 1251
1243 static int ocfs2_wait_for_mask(struct ocfs2_mask_waiter *mw) 1252 static int ocfs2_wait_for_mask(struct ocfs2_mask_waiter *mw)
1244 { 1253 {
1245 wait_for_completion(&mw->mw_complete); 1254 wait_for_completion(&mw->mw_complete);
1246 /* Re-arm the completion in case we want to wait on it again */ 1255 /* Re-arm the completion in case we want to wait on it again */
1247 INIT_COMPLETION(mw->mw_complete); 1256 INIT_COMPLETION(mw->mw_complete);
1248 return mw->mw_status; 1257 return mw->mw_status;
1249 } 1258 }
1250 1259
1251 static void lockres_add_mask_waiter(struct ocfs2_lock_res *lockres, 1260 static void lockres_add_mask_waiter(struct ocfs2_lock_res *lockres,
1252 struct ocfs2_mask_waiter *mw, 1261 struct ocfs2_mask_waiter *mw,
1253 unsigned long mask, 1262 unsigned long mask,
1254 unsigned long goal) 1263 unsigned long goal)
1255 { 1264 {
1256 BUG_ON(!list_empty(&mw->mw_item)); 1265 BUG_ON(!list_empty(&mw->mw_item));
1257 1266
1258 assert_spin_locked(&lockres->l_lock); 1267 assert_spin_locked(&lockres->l_lock);
1259 1268
1260 list_add_tail(&mw->mw_item, &lockres->l_mask_waiters); 1269 list_add_tail(&mw->mw_item, &lockres->l_mask_waiters);
1261 mw->mw_mask = mask; 1270 mw->mw_mask = mask;
1262 mw->mw_goal = goal; 1271 mw->mw_goal = goal;
1263 } 1272 }
1264 1273
1265 /* returns 0 if the mw that was removed was already satisfied, -EBUSY 1274 /* returns 0 if the mw that was removed was already satisfied, -EBUSY
1266 * if the mask still hadn't reached its goal */ 1275 * if the mask still hadn't reached its goal */
1267 static int lockres_remove_mask_waiter(struct ocfs2_lock_res *lockres, 1276 static int lockres_remove_mask_waiter(struct ocfs2_lock_res *lockres,
1268 struct ocfs2_mask_waiter *mw) 1277 struct ocfs2_mask_waiter *mw)
1269 { 1278 {
1270 unsigned long flags; 1279 unsigned long flags;
1271 int ret = 0; 1280 int ret = 0;
1272 1281
1273 spin_lock_irqsave(&lockres->l_lock, flags); 1282 spin_lock_irqsave(&lockres->l_lock, flags);
1274 if (!list_empty(&mw->mw_item)) { 1283 if (!list_empty(&mw->mw_item)) {
1275 if ((lockres->l_flags & mw->mw_mask) != mw->mw_goal) 1284 if ((lockres->l_flags & mw->mw_mask) != mw->mw_goal)
1276 ret = -EBUSY; 1285 ret = -EBUSY;
1277 1286
1278 list_del_init(&mw->mw_item); 1287 list_del_init(&mw->mw_item);
1279 init_completion(&mw->mw_complete); 1288 init_completion(&mw->mw_complete);
1280 } 1289 }
1281 spin_unlock_irqrestore(&lockres->l_lock, flags); 1290 spin_unlock_irqrestore(&lockres->l_lock, flags);
1282 1291
1283 return ret; 1292 return ret;
1284 1293
1285 } 1294 }
1286 1295
1287 static int ocfs2_wait_for_mask_interruptible(struct ocfs2_mask_waiter *mw, 1296 static int ocfs2_wait_for_mask_interruptible(struct ocfs2_mask_waiter *mw,
1288 struct ocfs2_lock_res *lockres) 1297 struct ocfs2_lock_res *lockres)
1289 { 1298 {
1290 int ret; 1299 int ret;
1291 1300
1292 ret = wait_for_completion_interruptible(&mw->mw_complete); 1301 ret = wait_for_completion_interruptible(&mw->mw_complete);
1293 if (ret) 1302 if (ret)
1294 lockres_remove_mask_waiter(lockres, mw); 1303 lockres_remove_mask_waiter(lockres, mw);
1295 else 1304 else
1296 ret = mw->mw_status; 1305 ret = mw->mw_status;
1297 /* Re-arm the completion in case we want to wait on it again */ 1306 /* Re-arm the completion in case we want to wait on it again */
1298 INIT_COMPLETION(mw->mw_complete); 1307 INIT_COMPLETION(mw->mw_complete);
1299 return ret; 1308 return ret;
1300 } 1309 }
1301 1310
1302 static int __ocfs2_cluster_lock(struct ocfs2_super *osb, 1311 static int __ocfs2_cluster_lock(struct ocfs2_super *osb,
1303 struct ocfs2_lock_res *lockres, 1312 struct ocfs2_lock_res *lockres,
1304 int level, 1313 int level,
1305 u32 lkm_flags, 1314 u32 lkm_flags,
1306 int arg_flags, 1315 int arg_flags,
1307 int l_subclass, 1316 int l_subclass,
1308 unsigned long caller_ip) 1317 unsigned long caller_ip)
1309 { 1318 {
1310 struct ocfs2_mask_waiter mw; 1319 struct ocfs2_mask_waiter mw;
1311 int wait, catch_signals = !(osb->s_mount_opt & OCFS2_MOUNT_NOINTR); 1320 int wait, catch_signals = !(osb->s_mount_opt & OCFS2_MOUNT_NOINTR);
1312 int ret = 0; /* gcc doesn't realize wait = 1 guarantees ret is set */ 1321 int ret = 0; /* gcc doesn't realize wait = 1 guarantees ret is set */
1313 unsigned long flags; 1322 unsigned long flags;
1314 unsigned int gen; 1323 unsigned int gen;
1315 int noqueue_attempted = 0; 1324 int noqueue_attempted = 0;
1316 1325
1317 mlog_entry_void(); 1326 mlog_entry_void();
1318 1327
1319 ocfs2_init_mask_waiter(&mw); 1328 ocfs2_init_mask_waiter(&mw);
1320 1329
1321 if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB) 1330 if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB)
1322 lkm_flags |= DLM_LKF_VALBLK; 1331 lkm_flags |= DLM_LKF_VALBLK;
1323 1332
1324 again: 1333 again:
1325 wait = 0; 1334 wait = 0;
1326 1335
1336 spin_lock_irqsave(&lockres->l_lock, flags);
1337
1327 if (catch_signals && signal_pending(current)) { 1338 if (catch_signals && signal_pending(current)) {
1328 ret = -ERESTARTSYS; 1339 ret = -ERESTARTSYS;
1329 goto out; 1340 goto unlock;
1330 } 1341 }
1331 1342
1332 spin_lock_irqsave(&lockres->l_lock, flags);
1333
1334 mlog_bug_on_msg(lockres->l_flags & OCFS2_LOCK_FREEING, 1343 mlog_bug_on_msg(lockres->l_flags & OCFS2_LOCK_FREEING,
1335 "Cluster lock called on freeing lockres %s! flags " 1344 "Cluster lock called on freeing lockres %s! flags "
1336 "0x%lx\n", lockres->l_name, lockres->l_flags); 1345 "0x%lx\n", lockres->l_name, lockres->l_flags);
1337 1346
1338 /* We only compare against the currently granted level 1347 /* We only compare against the currently granted level
1339 * here. If the lock is blocked waiting on a downconvert, 1348 * here. If the lock is blocked waiting on a downconvert,
1340 * we'll get caught below. */ 1349 * we'll get caught below. */
1341 if (lockres->l_flags & OCFS2_LOCK_BUSY && 1350 if (lockres->l_flags & OCFS2_LOCK_BUSY &&
1342 level > lockres->l_level) { 1351 level > lockres->l_level) {
1343 /* is someone sitting in dlm_lock? If so, wait on 1352 /* is someone sitting in dlm_lock? If so, wait on
1344 * them. */ 1353 * them. */
1345 lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0); 1354 lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0);
1346 wait = 1; 1355 wait = 1;
1347 goto unlock; 1356 goto unlock;
1348 } 1357 }
1349 1358
1359 if (lockres->l_flags & OCFS2_LOCK_UPCONVERT_FINISHING) {
1360 /*
1361 * We've upconverted. If the lock now has a level we can
1362 * work with, we take it. If, however, the lock is not at the
1363 * required level, we go thru the full cycle. One way this could
1364 * happen is if a process requesting an upconvert to PR is
1365 * closely followed by another requesting upconvert to an EX.
1366 * If the process requesting EX lands here, we want it to
1367 * continue attempting to upconvert and let the process
1368 * requesting PR take the lock.
1369 * If multiple processes request upconvert to PR, the first one
1370 * here will take the lock. The others will have to go thru the
1371 * OCFS2_LOCK_BLOCKED check to ensure that there is no pending
1372 * downconvert request.
1373 */
1374 if (level <= lockres->l_level)
1375 goto update_holders;
1376 }
1377
1350 if (lockres->l_flags & OCFS2_LOCK_BLOCKED && 1378 if (lockres->l_flags & OCFS2_LOCK_BLOCKED &&
1351 !ocfs2_may_continue_on_blocked_lock(lockres, level)) { 1379 !ocfs2_may_continue_on_blocked_lock(lockres, level)) {
1352 /* is the lock is currently blocked on behalf of 1380 /* is the lock is currently blocked on behalf of
1353 * another node */ 1381 * another node */
1354 lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BLOCKED, 0); 1382 lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BLOCKED, 0);
1355 wait = 1; 1383 wait = 1;
1356 goto unlock; 1384 goto unlock;
1357 } 1385 }
1358 1386
1359 if (level > lockres->l_level) { 1387 if (level > lockres->l_level) {
1360 if (noqueue_attempted > 0) { 1388 if (noqueue_attempted > 0) {
1361 ret = -EAGAIN; 1389 ret = -EAGAIN;
1362 goto unlock; 1390 goto unlock;
1363 } 1391 }
1364 if (lkm_flags & DLM_LKF_NOQUEUE) 1392 if (lkm_flags & DLM_LKF_NOQUEUE)
1365 noqueue_attempted = 1; 1393 noqueue_attempted = 1;
1366 1394
1367 if (lockres->l_action != OCFS2_AST_INVALID) 1395 if (lockres->l_action != OCFS2_AST_INVALID)
1368 mlog(ML_ERROR, "lockres %s has action %u pending\n", 1396 mlog(ML_ERROR, "lockres %s has action %u pending\n",
1369 lockres->l_name, lockres->l_action); 1397 lockres->l_name, lockres->l_action);
1370 1398
1371 if (!(lockres->l_flags & OCFS2_LOCK_ATTACHED)) { 1399 if (!(lockres->l_flags & OCFS2_LOCK_ATTACHED)) {
1372 lockres->l_action = OCFS2_AST_ATTACH; 1400 lockres->l_action = OCFS2_AST_ATTACH;
1373 lkm_flags &= ~DLM_LKF_CONVERT; 1401 lkm_flags &= ~DLM_LKF_CONVERT;
1374 } else { 1402 } else {
1375 lockres->l_action = OCFS2_AST_CONVERT; 1403 lockres->l_action = OCFS2_AST_CONVERT;
1376 lkm_flags |= DLM_LKF_CONVERT; 1404 lkm_flags |= DLM_LKF_CONVERT;
1377 } 1405 }
1378 1406
1379 lockres->l_requested = level; 1407 lockres->l_requested = level;
1380 lockres_or_flags(lockres, OCFS2_LOCK_BUSY); 1408 lockres_or_flags(lockres, OCFS2_LOCK_BUSY);
1381 gen = lockres_set_pending(lockres); 1409 gen = lockres_set_pending(lockres);
1382 spin_unlock_irqrestore(&lockres->l_lock, flags); 1410 spin_unlock_irqrestore(&lockres->l_lock, flags);
1383 1411
1384 BUG_ON(level == DLM_LOCK_IV); 1412 BUG_ON(level == DLM_LOCK_IV);
1385 BUG_ON(level == DLM_LOCK_NL); 1413 BUG_ON(level == DLM_LOCK_NL);
1386 1414
1387 mlog(0, "lock %s, convert from %d to level = %d\n", 1415 mlog(0, "lock %s, convert from %d to level = %d\n",
1388 lockres->l_name, lockres->l_level, level); 1416 lockres->l_name, lockres->l_level, level);
1389 1417
1390 /* call dlm_lock to upgrade lock now */ 1418 /* call dlm_lock to upgrade lock now */
1391 ret = ocfs2_dlm_lock(osb->cconn, 1419 ret = ocfs2_dlm_lock(osb->cconn,
1392 level, 1420 level,
1393 &lockres->l_lksb, 1421 &lockres->l_lksb,
1394 lkm_flags, 1422 lkm_flags,
1395 lockres->l_name, 1423 lockres->l_name,
1396 OCFS2_LOCK_ID_MAX_LEN - 1, 1424 OCFS2_LOCK_ID_MAX_LEN - 1,
1397 lockres); 1425 lockres);
1398 lockres_clear_pending(lockres, gen, osb); 1426 lockres_clear_pending(lockres, gen, osb);
1399 if (ret) { 1427 if (ret) {
1400 if (!(lkm_flags & DLM_LKF_NOQUEUE) || 1428 if (!(lkm_flags & DLM_LKF_NOQUEUE) ||
1401 (ret != -EAGAIN)) { 1429 (ret != -EAGAIN)) {
1402 ocfs2_log_dlm_error("ocfs2_dlm_lock", 1430 ocfs2_log_dlm_error("ocfs2_dlm_lock",
1403 ret, lockres); 1431 ret, lockres);
1404 } 1432 }
1405 ocfs2_recover_from_dlm_error(lockres, 1); 1433 ocfs2_recover_from_dlm_error(lockres, 1);
1406 goto out; 1434 goto out;
1407 } 1435 }
1408 1436
1409 mlog(0, "lock %s, successful return from ocfs2_dlm_lock\n", 1437 mlog(0, "lock %s, successful return from ocfs2_dlm_lock\n",
1410 lockres->l_name); 1438 lockres->l_name);
1411 1439
1412 /* At this point we've gone inside the dlm and need to 1440 /* At this point we've gone inside the dlm and need to
1413 * complete our work regardless. */ 1441 * complete our work regardless. */
1414 catch_signals = 0; 1442 catch_signals = 0;
1415 1443
1416 /* wait for busy to clear and carry on */ 1444 /* wait for busy to clear and carry on */
1417 goto again; 1445 goto again;
1418 } 1446 }
1419 1447
1448 update_holders:
1420 /* Ok, if we get here then we're good to go. */ 1449 /* Ok, if we get here then we're good to go. */
1421 ocfs2_inc_holders(lockres, level); 1450 ocfs2_inc_holders(lockres, level);
1422 1451
1423 ret = 0; 1452 ret = 0;
1424 unlock: 1453 unlock:
1454 lockres_clear_flags(lockres, OCFS2_LOCK_UPCONVERT_FINISHING);
1455
1425 spin_unlock_irqrestore(&lockres->l_lock, flags); 1456 spin_unlock_irqrestore(&lockres->l_lock, flags);
1426 out: 1457 out:
1427 /* 1458 /*
1428 * This is helping work around a lock inversion between the page lock 1459 * This is helping work around a lock inversion between the page lock
1429 * and dlm locks. One path holds the page lock while calling aops 1460 * and dlm locks. One path holds the page lock while calling aops
1430 * which block acquiring dlm locks. The voting thread holds dlm 1461 * which block acquiring dlm locks. The voting thread holds dlm
1431 * locks while acquiring page locks while down converting data locks. 1462 * locks while acquiring page locks while down converting data locks.
1432 * This block is helping an aop path notice the inversion and back 1463 * This block is helping an aop path notice the inversion and back
1433 * off to unlock its page lock before trying the dlm lock again. 1464 * off to unlock its page lock before trying the dlm lock again.
1434 */ 1465 */
1435 if (wait && arg_flags & OCFS2_LOCK_NONBLOCK && 1466 if (wait && arg_flags & OCFS2_LOCK_NONBLOCK &&
1436 mw.mw_mask & (OCFS2_LOCK_BUSY|OCFS2_LOCK_BLOCKED)) { 1467 mw.mw_mask & (OCFS2_LOCK_BUSY|OCFS2_LOCK_BLOCKED)) {
1437 wait = 0; 1468 wait = 0;
1438 if (lockres_remove_mask_waiter(lockres, &mw)) 1469 if (lockres_remove_mask_waiter(lockres, &mw))
1439 ret = -EAGAIN; 1470 ret = -EAGAIN;
1440 else 1471 else
1441 goto again; 1472 goto again;
1442 } 1473 }
1443 if (wait) { 1474 if (wait) {
1444 ret = ocfs2_wait_for_mask(&mw); 1475 ret = ocfs2_wait_for_mask(&mw);
1445 if (ret == 0) 1476 if (ret == 0)
1446 goto again; 1477 goto again;
1447 mlog_errno(ret); 1478 mlog_errno(ret);
1448 } 1479 }
1449 ocfs2_update_lock_stats(lockres, level, &mw, ret); 1480 ocfs2_update_lock_stats(lockres, level, &mw, ret);
1450 1481
1451 #ifdef CONFIG_DEBUG_LOCK_ALLOC 1482 #ifdef CONFIG_DEBUG_LOCK_ALLOC
1452 if (!ret && lockres->l_lockdep_map.key != NULL) { 1483 if (!ret && lockres->l_lockdep_map.key != NULL) {
1453 if (level == DLM_LOCK_PR) 1484 if (level == DLM_LOCK_PR)
1454 rwsem_acquire_read(&lockres->l_lockdep_map, l_subclass, 1485 rwsem_acquire_read(&lockres->l_lockdep_map, l_subclass,
1455 !!(arg_flags & OCFS2_META_LOCK_NOQUEUE), 1486 !!(arg_flags & OCFS2_META_LOCK_NOQUEUE),
1456 caller_ip); 1487 caller_ip);
1457 else 1488 else
1458 rwsem_acquire(&lockres->l_lockdep_map, l_subclass, 1489 rwsem_acquire(&lockres->l_lockdep_map, l_subclass,
1459 !!(arg_flags & OCFS2_META_LOCK_NOQUEUE), 1490 !!(arg_flags & OCFS2_META_LOCK_NOQUEUE),
1460 caller_ip); 1491 caller_ip);
1461 } 1492 }
1462 #endif 1493 #endif
1463 mlog_exit(ret); 1494 mlog_exit(ret);
1464 return ret; 1495 return ret;
1465 } 1496 }
1466 1497
1467 static inline int ocfs2_cluster_lock(struct ocfs2_super *osb, 1498 static inline int ocfs2_cluster_lock(struct ocfs2_super *osb,
1468 struct ocfs2_lock_res *lockres, 1499 struct ocfs2_lock_res *lockres,
1469 int level, 1500 int level,
1470 u32 lkm_flags, 1501 u32 lkm_flags,
1471 int arg_flags) 1502 int arg_flags)
1472 { 1503 {
1473 return __ocfs2_cluster_lock(osb, lockres, level, lkm_flags, arg_flags, 1504 return __ocfs2_cluster_lock(osb, lockres, level, lkm_flags, arg_flags,
1474 0, _RET_IP_); 1505 0, _RET_IP_);
1475 } 1506 }
1476 1507
1477 1508
1478 static void __ocfs2_cluster_unlock(struct ocfs2_super *osb, 1509 static void __ocfs2_cluster_unlock(struct ocfs2_super *osb,
1479 struct ocfs2_lock_res *lockres, 1510 struct ocfs2_lock_res *lockres,
1480 int level, 1511 int level,
1481 unsigned long caller_ip) 1512 unsigned long caller_ip)
1482 { 1513 {
1483 unsigned long flags; 1514 unsigned long flags;
1484 1515
1485 mlog_entry_void(); 1516 mlog_entry_void();
1486 spin_lock_irqsave(&lockres->l_lock, flags); 1517 spin_lock_irqsave(&lockres->l_lock, flags);
1487 ocfs2_dec_holders(lockres, level); 1518 ocfs2_dec_holders(lockres, level);
1488 ocfs2_downconvert_on_unlock(osb, lockres); 1519 ocfs2_downconvert_on_unlock(osb, lockres);
1489 spin_unlock_irqrestore(&lockres->l_lock, flags); 1520 spin_unlock_irqrestore(&lockres->l_lock, flags);
1490 #ifdef CONFIG_DEBUG_LOCK_ALLOC 1521 #ifdef CONFIG_DEBUG_LOCK_ALLOC
1491 if (lockres->l_lockdep_map.key != NULL) 1522 if (lockres->l_lockdep_map.key != NULL)
1492 rwsem_release(&lockres->l_lockdep_map, 1, caller_ip); 1523 rwsem_release(&lockres->l_lockdep_map, 1, caller_ip);
1493 #endif 1524 #endif
1494 mlog_exit_void(); 1525 mlog_exit_void();
1495 } 1526 }
1496 1527
1497 static int ocfs2_create_new_lock(struct ocfs2_super *osb, 1528 static int ocfs2_create_new_lock(struct ocfs2_super *osb,
1498 struct ocfs2_lock_res *lockres, 1529 struct ocfs2_lock_res *lockres,
1499 int ex, 1530 int ex,
1500 int local) 1531 int local)
1501 { 1532 {
1502 int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; 1533 int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
1503 unsigned long flags; 1534 unsigned long flags;
1504 u32 lkm_flags = local ? DLM_LKF_LOCAL : 0; 1535 u32 lkm_flags = local ? DLM_LKF_LOCAL : 0;
1505 1536
1506 spin_lock_irqsave(&lockres->l_lock, flags); 1537 spin_lock_irqsave(&lockres->l_lock, flags);
1507 BUG_ON(lockres->l_flags & OCFS2_LOCK_ATTACHED); 1538 BUG_ON(lockres->l_flags & OCFS2_LOCK_ATTACHED);
1508 lockres_or_flags(lockres, OCFS2_LOCK_LOCAL); 1539 lockres_or_flags(lockres, OCFS2_LOCK_LOCAL);
1509 spin_unlock_irqrestore(&lockres->l_lock, flags); 1540 spin_unlock_irqrestore(&lockres->l_lock, flags);
1510 1541
1511 return ocfs2_lock_create(osb, lockres, level, lkm_flags); 1542 return ocfs2_lock_create(osb, lockres, level, lkm_flags);
1512 } 1543 }
1513 1544
1514 /* Grants us an EX lock on the data and metadata resources, skipping 1545 /* Grants us an EX lock on the data and metadata resources, skipping
1515 * the normal cluster directory lookup. Use this ONLY on newly created 1546 * the normal cluster directory lookup. Use this ONLY on newly created
1516 * inodes which other nodes can't possibly see, and which haven't been 1547 * inodes which other nodes can't possibly see, and which haven't been
1517 * hashed in the inode hash yet. This can give us a good performance 1548 * hashed in the inode hash yet. This can give us a good performance
1518 * increase as it'll skip the network broadcast normally associated 1549 * increase as it'll skip the network broadcast normally associated
1519 * with creating a new lock resource. */ 1550 * with creating a new lock resource. */
1520 int ocfs2_create_new_inode_locks(struct inode *inode) 1551 int ocfs2_create_new_inode_locks(struct inode *inode)
1521 { 1552 {
1522 int ret; 1553 int ret;
1523 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 1554 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1524 1555
1525 BUG_ON(!inode); 1556 BUG_ON(!inode);
1526 BUG_ON(!ocfs2_inode_is_new(inode)); 1557 BUG_ON(!ocfs2_inode_is_new(inode));
1527 1558
1528 mlog_entry_void(); 1559 mlog_entry_void();
1529 1560
1530 mlog(0, "Inode %llu\n", (unsigned long long)OCFS2_I(inode)->ip_blkno); 1561 mlog(0, "Inode %llu\n", (unsigned long long)OCFS2_I(inode)->ip_blkno);
1531 1562
1532 /* NOTE: That we don't increment any of the holder counts, nor 1563 /* NOTE: That we don't increment any of the holder counts, nor
1533 * do we add anything to a journal handle. Since this is 1564 * do we add anything to a journal handle. Since this is
1534 * supposed to be a new inode which the cluster doesn't know 1565 * supposed to be a new inode which the cluster doesn't know
1535 * about yet, there is no need to. As far as the LVB handling 1566 * about yet, there is no need to. As far as the LVB handling
1536 * is concerned, this is basically like acquiring an EX lock 1567 * is concerned, this is basically like acquiring an EX lock
1537 * on a resource which has an invalid one -- we'll set it 1568 * on a resource which has an invalid one -- we'll set it
1538 * valid when we release the EX. */ 1569 * valid when we release the EX. */
1539 1570
1540 ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_rw_lockres, 1, 1); 1571 ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_rw_lockres, 1, 1);
1541 if (ret) { 1572 if (ret) {
1542 mlog_errno(ret); 1573 mlog_errno(ret);
1543 goto bail; 1574 goto bail;
1544 } 1575 }
1545 1576
1546 /* 1577 /*
1547 * We don't want to use DLM_LKF_LOCAL on a meta data lock as they 1578 * We don't want to use DLM_LKF_LOCAL on a meta data lock as they
1548 * don't use a generation in their lock names. 1579 * don't use a generation in their lock names.
1549 */ 1580 */
1550 ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_inode_lockres, 1, 0); 1581 ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_inode_lockres, 1, 0);
1551 if (ret) { 1582 if (ret) {
1552 mlog_errno(ret); 1583 mlog_errno(ret);
1553 goto bail; 1584 goto bail;
1554 } 1585 }
1555 1586
1556 ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_open_lockres, 0, 0); 1587 ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_open_lockres, 0, 0);
1557 if (ret) { 1588 if (ret) {
1558 mlog_errno(ret); 1589 mlog_errno(ret);
1559 goto bail; 1590 goto bail;
1560 } 1591 }
1561 1592
1562 bail: 1593 bail:
1563 mlog_exit(ret); 1594 mlog_exit(ret);
1564 return ret; 1595 return ret;
1565 } 1596 }
1566 1597
1567 int ocfs2_rw_lock(struct inode *inode, int write) 1598 int ocfs2_rw_lock(struct inode *inode, int write)
1568 { 1599 {
1569 int status, level; 1600 int status, level;
1570 struct ocfs2_lock_res *lockres; 1601 struct ocfs2_lock_res *lockres;
1571 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 1602 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1572 1603
1573 BUG_ON(!inode); 1604 BUG_ON(!inode);
1574 1605
1575 mlog_entry_void(); 1606 mlog_entry_void();
1576 1607
1577 mlog(0, "inode %llu take %s RW lock\n", 1608 mlog(0, "inode %llu take %s RW lock\n",
1578 (unsigned long long)OCFS2_I(inode)->ip_blkno, 1609 (unsigned long long)OCFS2_I(inode)->ip_blkno,
1579 write ? "EXMODE" : "PRMODE"); 1610 write ? "EXMODE" : "PRMODE");
1580 1611
1581 if (ocfs2_mount_local(osb)) { 1612 if (ocfs2_mount_local(osb)) {
1582 mlog_exit(0); 1613 mlog_exit(0);
1583 return 0; 1614 return 0;
1584 } 1615 }
1585 1616
1586 lockres = &OCFS2_I(inode)->ip_rw_lockres; 1617 lockres = &OCFS2_I(inode)->ip_rw_lockres;
1587 1618
1588 level = write ? DLM_LOCK_EX : DLM_LOCK_PR; 1619 level = write ? DLM_LOCK_EX : DLM_LOCK_PR;
1589 1620
1590 status = ocfs2_cluster_lock(OCFS2_SB(inode->i_sb), lockres, level, 0, 1621 status = ocfs2_cluster_lock(OCFS2_SB(inode->i_sb), lockres, level, 0,
1591 0); 1622 0);
1592 if (status < 0) 1623 if (status < 0)
1593 mlog_errno(status); 1624 mlog_errno(status);
1594 1625
1595 mlog_exit(status); 1626 mlog_exit(status);
1596 return status; 1627 return status;
1597 } 1628 }
1598 1629
1599 void ocfs2_rw_unlock(struct inode *inode, int write) 1630 void ocfs2_rw_unlock(struct inode *inode, int write)
1600 { 1631 {
1601 int level = write ? DLM_LOCK_EX : DLM_LOCK_PR; 1632 int level = write ? DLM_LOCK_EX : DLM_LOCK_PR;
1602 struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_rw_lockres; 1633 struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_rw_lockres;
1603 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 1634 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1604 1635
1605 mlog_entry_void(); 1636 mlog_entry_void();
1606 1637
1607 mlog(0, "inode %llu drop %s RW lock\n", 1638 mlog(0, "inode %llu drop %s RW lock\n",
1608 (unsigned long long)OCFS2_I(inode)->ip_blkno, 1639 (unsigned long long)OCFS2_I(inode)->ip_blkno,
1609 write ? "EXMODE" : "PRMODE"); 1640 write ? "EXMODE" : "PRMODE");
1610 1641
1611 if (!ocfs2_mount_local(osb)) 1642 if (!ocfs2_mount_local(osb))
1612 ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres, level); 1643 ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres, level);
1613 1644
1614 mlog_exit_void(); 1645 mlog_exit_void();
1615 } 1646 }
1616 1647
1617 /* 1648 /*
1618 * ocfs2_open_lock always get PR mode lock. 1649 * ocfs2_open_lock always get PR mode lock.
1619 */ 1650 */
1620 int ocfs2_open_lock(struct inode *inode) 1651 int ocfs2_open_lock(struct inode *inode)
1621 { 1652 {
1622 int status = 0; 1653 int status = 0;
1623 struct ocfs2_lock_res *lockres; 1654 struct ocfs2_lock_res *lockres;
1624 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 1655 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1625 1656
1626 BUG_ON(!inode); 1657 BUG_ON(!inode);
1627 1658
1628 mlog_entry_void(); 1659 mlog_entry_void();
1629 1660
1630 mlog(0, "inode %llu take PRMODE open lock\n", 1661 mlog(0, "inode %llu take PRMODE open lock\n",
1631 (unsigned long long)OCFS2_I(inode)->ip_blkno); 1662 (unsigned long long)OCFS2_I(inode)->ip_blkno);
1632 1663
1633 if (ocfs2_mount_local(osb)) 1664 if (ocfs2_mount_local(osb))
1634 goto out; 1665 goto out;
1635 1666
1636 lockres = &OCFS2_I(inode)->ip_open_lockres; 1667 lockres = &OCFS2_I(inode)->ip_open_lockres;
1637 1668
1638 status = ocfs2_cluster_lock(OCFS2_SB(inode->i_sb), lockres, 1669 status = ocfs2_cluster_lock(OCFS2_SB(inode->i_sb), lockres,
1639 DLM_LOCK_PR, 0, 0); 1670 DLM_LOCK_PR, 0, 0);
1640 if (status < 0) 1671 if (status < 0)
1641 mlog_errno(status); 1672 mlog_errno(status);
1642 1673
1643 out: 1674 out:
1644 mlog_exit(status); 1675 mlog_exit(status);
1645 return status; 1676 return status;
1646 } 1677 }
1647 1678
1648 int ocfs2_try_open_lock(struct inode *inode, int write) 1679 int ocfs2_try_open_lock(struct inode *inode, int write)
1649 { 1680 {
1650 int status = 0, level; 1681 int status = 0, level;
1651 struct ocfs2_lock_res *lockres; 1682 struct ocfs2_lock_res *lockres;
1652 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 1683 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1653 1684
1654 BUG_ON(!inode); 1685 BUG_ON(!inode);
1655 1686
1656 mlog_entry_void(); 1687 mlog_entry_void();
1657 1688
1658 mlog(0, "inode %llu try to take %s open lock\n", 1689 mlog(0, "inode %llu try to take %s open lock\n",
1659 (unsigned long long)OCFS2_I(inode)->ip_blkno, 1690 (unsigned long long)OCFS2_I(inode)->ip_blkno,
1660 write ? "EXMODE" : "PRMODE"); 1691 write ? "EXMODE" : "PRMODE");
1661 1692
1662 if (ocfs2_mount_local(osb)) 1693 if (ocfs2_mount_local(osb))
1663 goto out; 1694 goto out;
1664 1695
1665 lockres = &OCFS2_I(inode)->ip_open_lockres; 1696 lockres = &OCFS2_I(inode)->ip_open_lockres;
1666 1697
1667 level = write ? DLM_LOCK_EX : DLM_LOCK_PR; 1698 level = write ? DLM_LOCK_EX : DLM_LOCK_PR;
1668 1699
1669 /* 1700 /*
1670 * The file system may already holding a PRMODE/EXMODE open lock. 1701 * The file system may already holding a PRMODE/EXMODE open lock.
1671 * Since we pass DLM_LKF_NOQUEUE, the request won't block waiting on 1702 * Since we pass DLM_LKF_NOQUEUE, the request won't block waiting on
1672 * other nodes and the -EAGAIN will indicate to the caller that 1703 * other nodes and the -EAGAIN will indicate to the caller that
1673 * this inode is still in use. 1704 * this inode is still in use.
1674 */ 1705 */
1675 status = ocfs2_cluster_lock(OCFS2_SB(inode->i_sb), lockres, 1706 status = ocfs2_cluster_lock(OCFS2_SB(inode->i_sb), lockres,
1676 level, DLM_LKF_NOQUEUE, 0); 1707 level, DLM_LKF_NOQUEUE, 0);
1677 1708
1678 out: 1709 out:
1679 mlog_exit(status); 1710 mlog_exit(status);
1680 return status; 1711 return status;
1681 } 1712 }
1682 1713
1683 /* 1714 /*
1684 * ocfs2_open_unlock unlock PR and EX mode open locks. 1715 * ocfs2_open_unlock unlock PR and EX mode open locks.
1685 */ 1716 */
1686 void ocfs2_open_unlock(struct inode *inode) 1717 void ocfs2_open_unlock(struct inode *inode)
1687 { 1718 {
1688 struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_open_lockres; 1719 struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_open_lockres;
1689 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 1720 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1690 1721
1691 mlog_entry_void(); 1722 mlog_entry_void();
1692 1723
1693 mlog(0, "inode %llu drop open lock\n", 1724 mlog(0, "inode %llu drop open lock\n",
1694 (unsigned long long)OCFS2_I(inode)->ip_blkno); 1725 (unsigned long long)OCFS2_I(inode)->ip_blkno);
1695 1726
1696 if (ocfs2_mount_local(osb)) 1727 if (ocfs2_mount_local(osb))
1697 goto out; 1728 goto out;
1698 1729
1699 if(lockres->l_ro_holders) 1730 if(lockres->l_ro_holders)
1700 ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres, 1731 ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres,
1701 DLM_LOCK_PR); 1732 DLM_LOCK_PR);
1702 if(lockres->l_ex_holders) 1733 if(lockres->l_ex_holders)
1703 ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres, 1734 ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres,
1704 DLM_LOCK_EX); 1735 DLM_LOCK_EX);
1705 1736
1706 out: 1737 out:
1707 mlog_exit_void(); 1738 mlog_exit_void();
1708 } 1739 }
1709 1740
1710 static int ocfs2_flock_handle_signal(struct ocfs2_lock_res *lockres, 1741 static int ocfs2_flock_handle_signal(struct ocfs2_lock_res *lockres,
1711 int level) 1742 int level)
1712 { 1743 {
1713 int ret; 1744 int ret;
1714 struct ocfs2_super *osb = ocfs2_get_lockres_osb(lockres); 1745 struct ocfs2_super *osb = ocfs2_get_lockres_osb(lockres);
1715 unsigned long flags; 1746 unsigned long flags;
1716 struct ocfs2_mask_waiter mw; 1747 struct ocfs2_mask_waiter mw;
1717 1748
1718 ocfs2_init_mask_waiter(&mw); 1749 ocfs2_init_mask_waiter(&mw);
1719 1750
1720 retry_cancel: 1751 retry_cancel:
1721 spin_lock_irqsave(&lockres->l_lock, flags); 1752 spin_lock_irqsave(&lockres->l_lock, flags);
1722 if (lockres->l_flags & OCFS2_LOCK_BUSY) { 1753 if (lockres->l_flags & OCFS2_LOCK_BUSY) {
1723 ret = ocfs2_prepare_cancel_convert(osb, lockres); 1754 ret = ocfs2_prepare_cancel_convert(osb, lockres);
1724 if (ret) { 1755 if (ret) {
1725 spin_unlock_irqrestore(&lockres->l_lock, flags); 1756 spin_unlock_irqrestore(&lockres->l_lock, flags);
1726 ret = ocfs2_cancel_convert(osb, lockres); 1757 ret = ocfs2_cancel_convert(osb, lockres);
1727 if (ret < 0) { 1758 if (ret < 0) {
1728 mlog_errno(ret); 1759 mlog_errno(ret);
1729 goto out; 1760 goto out;
1730 } 1761 }
1731 goto retry_cancel; 1762 goto retry_cancel;
1732 } 1763 }
1733 lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0); 1764 lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0);
1734 spin_unlock_irqrestore(&lockres->l_lock, flags); 1765 spin_unlock_irqrestore(&lockres->l_lock, flags);
1735 1766
1736 ocfs2_wait_for_mask(&mw); 1767 ocfs2_wait_for_mask(&mw);
1737 goto retry_cancel; 1768 goto retry_cancel;
1738 } 1769 }
1739 1770
1740 ret = -ERESTARTSYS; 1771 ret = -ERESTARTSYS;
1741 /* 1772 /*
1742 * We may still have gotten the lock, in which case there's no 1773 * We may still have gotten the lock, in which case there's no
1743 * point to restarting the syscall. 1774 * point to restarting the syscall.
1744 */ 1775 */
1745 if (lockres->l_level == level) 1776 if (lockres->l_level == level)
1746 ret = 0; 1777 ret = 0;
1747 1778
1748 mlog(0, "Cancel returning %d. flags: 0x%lx, level: %d, act: %d\n", ret, 1779 mlog(0, "Cancel returning %d. flags: 0x%lx, level: %d, act: %d\n", ret,
1749 lockres->l_flags, lockres->l_level, lockres->l_action); 1780 lockres->l_flags, lockres->l_level, lockres->l_action);
1750 1781
1751 spin_unlock_irqrestore(&lockres->l_lock, flags); 1782 spin_unlock_irqrestore(&lockres->l_lock, flags);
1752 1783
1753 out: 1784 out:
1754 return ret; 1785 return ret;
1755 } 1786 }
1756 1787
1757 /* 1788 /*
1758 * ocfs2_file_lock() and ocfs2_file_unlock() map to a single pair of 1789 * ocfs2_file_lock() and ocfs2_file_unlock() map to a single pair of
1759 * flock() calls. The locking approach this requires is sufficiently 1790 * flock() calls. The locking approach this requires is sufficiently
1760 * different from all other cluster lock types that we implement a 1791 * different from all other cluster lock types that we implement a
1761 * seperate path to the "low-level" dlm calls. In particular: 1792 * seperate path to the "low-level" dlm calls. In particular:
1762 * 1793 *
1763 * - No optimization of lock levels is done - we take at exactly 1794 * - No optimization of lock levels is done - we take at exactly
1764 * what's been requested. 1795 * what's been requested.
1765 * 1796 *
1766 * - No lock caching is employed. We immediately downconvert to 1797 * - No lock caching is employed. We immediately downconvert to
1767 * no-lock at unlock time. This also means flock locks never go on 1798 * no-lock at unlock time. This also means flock locks never go on
1768 * the blocking list). 1799 * the blocking list).
1769 * 1800 *
1770 * - Since userspace can trivially deadlock itself with flock, we make 1801 * - Since userspace can trivially deadlock itself with flock, we make
1771 * sure to allow cancellation of a misbehaving applications flock() 1802 * sure to allow cancellation of a misbehaving applications flock()
1772 * request. 1803 * request.
1773 * 1804 *
1774 * - Access to any flock lockres doesn't require concurrency, so we 1805 * - Access to any flock lockres doesn't require concurrency, so we
1775 * can simplify the code by requiring the caller to guarantee 1806 * can simplify the code by requiring the caller to guarantee
1776 * serialization of dlmglue flock calls. 1807 * serialization of dlmglue flock calls.
1777 */ 1808 */
1778 int ocfs2_file_lock(struct file *file, int ex, int trylock) 1809 int ocfs2_file_lock(struct file *file, int ex, int trylock)
1779 { 1810 {
1780 int ret, level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; 1811 int ret, level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
1781 unsigned int lkm_flags = trylock ? DLM_LKF_NOQUEUE : 0; 1812 unsigned int lkm_flags = trylock ? DLM_LKF_NOQUEUE : 0;
1782 unsigned long flags; 1813 unsigned long flags;
1783 struct ocfs2_file_private *fp = file->private_data; 1814 struct ocfs2_file_private *fp = file->private_data;
1784 struct ocfs2_lock_res *lockres = &fp->fp_flock; 1815 struct ocfs2_lock_res *lockres = &fp->fp_flock;
1785 struct ocfs2_super *osb = OCFS2_SB(file->f_mapping->host->i_sb); 1816 struct ocfs2_super *osb = OCFS2_SB(file->f_mapping->host->i_sb);
1786 struct ocfs2_mask_waiter mw; 1817 struct ocfs2_mask_waiter mw;
1787 1818
1788 ocfs2_init_mask_waiter(&mw); 1819 ocfs2_init_mask_waiter(&mw);
1789 1820
1790 if ((lockres->l_flags & OCFS2_LOCK_BUSY) || 1821 if ((lockres->l_flags & OCFS2_LOCK_BUSY) ||
1791 (lockres->l_level > DLM_LOCK_NL)) { 1822 (lockres->l_level > DLM_LOCK_NL)) {
1792 mlog(ML_ERROR, 1823 mlog(ML_ERROR,
1793 "File lock \"%s\" has busy or locked state: flags: 0x%lx, " 1824 "File lock \"%s\" has busy or locked state: flags: 0x%lx, "
1794 "level: %u\n", lockres->l_name, lockres->l_flags, 1825 "level: %u\n", lockres->l_name, lockres->l_flags,
1795 lockres->l_level); 1826 lockres->l_level);
1796 return -EINVAL; 1827 return -EINVAL;
1797 } 1828 }
1798 1829
1799 spin_lock_irqsave(&lockres->l_lock, flags); 1830 spin_lock_irqsave(&lockres->l_lock, flags);
1800 if (!(lockres->l_flags & OCFS2_LOCK_ATTACHED)) { 1831 if (!(lockres->l_flags & OCFS2_LOCK_ATTACHED)) {
1801 lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0); 1832 lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0);
1802 spin_unlock_irqrestore(&lockres->l_lock, flags); 1833 spin_unlock_irqrestore(&lockres->l_lock, flags);
1803 1834
1804 /* 1835 /*
1805 * Get the lock at NLMODE to start - that way we 1836 * Get the lock at NLMODE to start - that way we
1806 * can cancel the upconvert request if need be. 1837 * can cancel the upconvert request if need be.
1807 */ 1838 */
1808 ret = ocfs2_lock_create(osb, lockres, DLM_LOCK_NL, 0); 1839 ret = ocfs2_lock_create(osb, lockres, DLM_LOCK_NL, 0);
1809 if (ret < 0) { 1840 if (ret < 0) {
1810 mlog_errno(ret); 1841 mlog_errno(ret);
1811 goto out; 1842 goto out;
1812 } 1843 }
1813 1844
1814 ret = ocfs2_wait_for_mask(&mw); 1845 ret = ocfs2_wait_for_mask(&mw);
1815 if (ret) { 1846 if (ret) {
1816 mlog_errno(ret); 1847 mlog_errno(ret);
1817 goto out; 1848 goto out;
1818 } 1849 }
1819 spin_lock_irqsave(&lockres->l_lock, flags); 1850 spin_lock_irqsave(&lockres->l_lock, flags);
1820 } 1851 }
1821 1852
1822 lockres->l_action = OCFS2_AST_CONVERT; 1853 lockres->l_action = OCFS2_AST_CONVERT;
1823 lkm_flags |= DLM_LKF_CONVERT; 1854 lkm_flags |= DLM_LKF_CONVERT;
1824 lockres->l_requested = level; 1855 lockres->l_requested = level;
1825 lockres_or_flags(lockres, OCFS2_LOCK_BUSY); 1856 lockres_or_flags(lockres, OCFS2_LOCK_BUSY);
1826 1857
1827 lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0); 1858 lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0);
1828 spin_unlock_irqrestore(&lockres->l_lock, flags); 1859 spin_unlock_irqrestore(&lockres->l_lock, flags);
1829 1860
1830 ret = ocfs2_dlm_lock(osb->cconn, level, &lockres->l_lksb, lkm_flags, 1861 ret = ocfs2_dlm_lock(osb->cconn, level, &lockres->l_lksb, lkm_flags,
1831 lockres->l_name, OCFS2_LOCK_ID_MAX_LEN - 1, 1862 lockres->l_name, OCFS2_LOCK_ID_MAX_LEN - 1,
1832 lockres); 1863 lockres);
1833 if (ret) { 1864 if (ret) {
1834 if (!trylock || (ret != -EAGAIN)) { 1865 if (!trylock || (ret != -EAGAIN)) {
1835 ocfs2_log_dlm_error("ocfs2_dlm_lock", ret, lockres); 1866 ocfs2_log_dlm_error("ocfs2_dlm_lock", ret, lockres);
1836 ret = -EINVAL; 1867 ret = -EINVAL;
1837 } 1868 }
1838 1869
1839 ocfs2_recover_from_dlm_error(lockres, 1); 1870 ocfs2_recover_from_dlm_error(lockres, 1);
1840 lockres_remove_mask_waiter(lockres, &mw); 1871 lockres_remove_mask_waiter(lockres, &mw);
1841 goto out; 1872 goto out;
1842 } 1873 }
1843 1874
1844 ret = ocfs2_wait_for_mask_interruptible(&mw, lockres); 1875 ret = ocfs2_wait_for_mask_interruptible(&mw, lockres);
1845 if (ret == -ERESTARTSYS) { 1876 if (ret == -ERESTARTSYS) {
1846 /* 1877 /*
1847 * Userspace can cause deadlock itself with 1878 * Userspace can cause deadlock itself with
1848 * flock(). Current behavior locally is to allow the 1879 * flock(). Current behavior locally is to allow the
1849 * deadlock, but abort the system call if a signal is 1880 * deadlock, but abort the system call if a signal is
1850 * received. We follow this example, otherwise a 1881 * received. We follow this example, otherwise a
1851 * poorly written program could sit in kernel until 1882 * poorly written program could sit in kernel until
1852 * reboot. 1883 * reboot.
1853 * 1884 *
1854 * Handling this is a bit more complicated for Ocfs2 1885 * Handling this is a bit more complicated for Ocfs2
1855 * though. We can't exit this function with an 1886 * though. We can't exit this function with an
1856 * outstanding lock request, so a cancel convert is 1887 * outstanding lock request, so a cancel convert is
1857 * required. We intentionally overwrite 'ret' - if the 1888 * required. We intentionally overwrite 'ret' - if the
1858 * cancel fails and the lock was granted, it's easier 1889 * cancel fails and the lock was granted, it's easier
1859 * to just bubble success back up to the user. 1890 * to just bubble success back up to the user.
1860 */ 1891 */
1861 ret = ocfs2_flock_handle_signal(lockres, level); 1892 ret = ocfs2_flock_handle_signal(lockres, level);
1862 } else if (!ret && (level > lockres->l_level)) { 1893 } else if (!ret && (level > lockres->l_level)) {
1863 /* Trylock failed asynchronously */ 1894 /* Trylock failed asynchronously */
1864 BUG_ON(!trylock); 1895 BUG_ON(!trylock);
1865 ret = -EAGAIN; 1896 ret = -EAGAIN;
1866 } 1897 }
1867 1898
1868 out: 1899 out:
1869 1900
1870 mlog(0, "Lock: \"%s\" ex: %d, trylock: %d, returns: %d\n", 1901 mlog(0, "Lock: \"%s\" ex: %d, trylock: %d, returns: %d\n",
1871 lockres->l_name, ex, trylock, ret); 1902 lockres->l_name, ex, trylock, ret);
1872 return ret; 1903 return ret;
1873 } 1904 }
1874 1905
1875 void ocfs2_file_unlock(struct file *file) 1906 void ocfs2_file_unlock(struct file *file)
1876 { 1907 {
1877 int ret; 1908 int ret;
1878 unsigned int gen; 1909 unsigned int gen;
1879 unsigned long flags; 1910 unsigned long flags;
1880 struct ocfs2_file_private *fp = file->private_data; 1911 struct ocfs2_file_private *fp = file->private_data;
1881 struct ocfs2_lock_res *lockres = &fp->fp_flock; 1912 struct ocfs2_lock_res *lockres = &fp->fp_flock;
1882 struct ocfs2_super *osb = OCFS2_SB(file->f_mapping->host->i_sb); 1913 struct ocfs2_super *osb = OCFS2_SB(file->f_mapping->host->i_sb);
1883 struct ocfs2_mask_waiter mw; 1914 struct ocfs2_mask_waiter mw;
1884 1915
1885 ocfs2_init_mask_waiter(&mw); 1916 ocfs2_init_mask_waiter(&mw);
1886 1917
1887 if (!(lockres->l_flags & OCFS2_LOCK_ATTACHED)) 1918 if (!(lockres->l_flags & OCFS2_LOCK_ATTACHED))
1888 return; 1919 return;
1889 1920
1890 if (lockres->l_level == DLM_LOCK_NL) 1921 if (lockres->l_level == DLM_LOCK_NL)
1891 return; 1922 return;
1892 1923
1893 mlog(0, "Unlock: \"%s\" flags: 0x%lx, level: %d, act: %d\n", 1924 mlog(0, "Unlock: \"%s\" flags: 0x%lx, level: %d, act: %d\n",
1894 lockres->l_name, lockres->l_flags, lockres->l_level, 1925 lockres->l_name, lockres->l_flags, lockres->l_level,
1895 lockres->l_action); 1926 lockres->l_action);
1896 1927
1897 spin_lock_irqsave(&lockres->l_lock, flags); 1928 spin_lock_irqsave(&lockres->l_lock, flags);
1898 /* 1929 /*
1899 * Fake a blocking ast for the downconvert code. 1930 * Fake a blocking ast for the downconvert code.
1900 */ 1931 */
1901 lockres_or_flags(lockres, OCFS2_LOCK_BLOCKED); 1932 lockres_or_flags(lockres, OCFS2_LOCK_BLOCKED);
1902 lockres->l_blocking = DLM_LOCK_EX; 1933 lockres->l_blocking = DLM_LOCK_EX;
1903 1934
1904 gen = ocfs2_prepare_downconvert(lockres, DLM_LOCK_NL); 1935 gen = ocfs2_prepare_downconvert(lockres, DLM_LOCK_NL);
1905 lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0); 1936 lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0);
1906 spin_unlock_irqrestore(&lockres->l_lock, flags); 1937 spin_unlock_irqrestore(&lockres->l_lock, flags);
1907 1938
1908 ret = ocfs2_downconvert_lock(osb, lockres, DLM_LOCK_NL, 0, gen); 1939 ret = ocfs2_downconvert_lock(osb, lockres, DLM_LOCK_NL, 0, gen);
1909 if (ret) { 1940 if (ret) {
1910 mlog_errno(ret); 1941 mlog_errno(ret);
1911 return; 1942 return;
1912 } 1943 }
1913 1944
1914 ret = ocfs2_wait_for_mask(&mw); 1945 ret = ocfs2_wait_for_mask(&mw);
1915 if (ret) 1946 if (ret)
1916 mlog_errno(ret); 1947 mlog_errno(ret);
1917 } 1948 }
1918 1949
1919 static void ocfs2_downconvert_on_unlock(struct ocfs2_super *osb, 1950 static void ocfs2_downconvert_on_unlock(struct ocfs2_super *osb,
1920 struct ocfs2_lock_res *lockres) 1951 struct ocfs2_lock_res *lockres)
1921 { 1952 {
1922 int kick = 0; 1953 int kick = 0;
1923 1954
1924 mlog_entry_void(); 1955 mlog_entry_void();
1925 1956
1926 /* If we know that another node is waiting on our lock, kick 1957 /* If we know that another node is waiting on our lock, kick
1927 * the downconvert thread * pre-emptively when we reach a release 1958 * the downconvert thread * pre-emptively when we reach a release
1928 * condition. */ 1959 * condition. */
1929 if (lockres->l_flags & OCFS2_LOCK_BLOCKED) { 1960 if (lockres->l_flags & OCFS2_LOCK_BLOCKED) {
1930 switch(lockres->l_blocking) { 1961 switch(lockres->l_blocking) {
1931 case DLM_LOCK_EX: 1962 case DLM_LOCK_EX:
1932 if (!lockres->l_ex_holders && !lockres->l_ro_holders) 1963 if (!lockres->l_ex_holders && !lockres->l_ro_holders)
1933 kick = 1; 1964 kick = 1;
1934 break; 1965 break;
1935 case DLM_LOCK_PR: 1966 case DLM_LOCK_PR:
1936 if (!lockres->l_ex_holders) 1967 if (!lockres->l_ex_holders)
1937 kick = 1; 1968 kick = 1;
1938 break; 1969 break;
1939 default: 1970 default:
1940 BUG(); 1971 BUG();
1941 } 1972 }
1942 } 1973 }
1943 1974
1944 if (kick) 1975 if (kick)
1945 ocfs2_wake_downconvert_thread(osb); 1976 ocfs2_wake_downconvert_thread(osb);
1946 1977
1947 mlog_exit_void(); 1978 mlog_exit_void();
1948 } 1979 }
1949 1980
1950 #define OCFS2_SEC_BITS 34 1981 #define OCFS2_SEC_BITS 34
1951 #define OCFS2_SEC_SHIFT (64 - 34) 1982 #define OCFS2_SEC_SHIFT (64 - 34)
1952 #define OCFS2_NSEC_MASK ((1ULL << OCFS2_SEC_SHIFT) - 1) 1983 #define OCFS2_NSEC_MASK ((1ULL << OCFS2_SEC_SHIFT) - 1)
1953 1984
1954 /* LVB only has room for 64 bits of time here so we pack it for 1985 /* LVB only has room for 64 bits of time here so we pack it for
1955 * now. */ 1986 * now. */
1956 static u64 ocfs2_pack_timespec(struct timespec *spec) 1987 static u64 ocfs2_pack_timespec(struct timespec *spec)
1957 { 1988 {
1958 u64 res; 1989 u64 res;
1959 u64 sec = spec->tv_sec; 1990 u64 sec = spec->tv_sec;
1960 u32 nsec = spec->tv_nsec; 1991 u32 nsec = spec->tv_nsec;
1961 1992
1962 res = (sec << OCFS2_SEC_SHIFT) | (nsec & OCFS2_NSEC_MASK); 1993 res = (sec << OCFS2_SEC_SHIFT) | (nsec & OCFS2_NSEC_MASK);
1963 1994
1964 return res; 1995 return res;
1965 } 1996 }
1966 1997
1967 /* Call this with the lockres locked. I am reasonably sure we don't 1998 /* Call this with the lockres locked. I am reasonably sure we don't
1968 * need ip_lock in this function as anyone who would be changing those 1999 * need ip_lock in this function as anyone who would be changing those
1969 * values is supposed to be blocked in ocfs2_inode_lock right now. */ 2000 * values is supposed to be blocked in ocfs2_inode_lock right now. */
1970 static void __ocfs2_stuff_meta_lvb(struct inode *inode) 2001 static void __ocfs2_stuff_meta_lvb(struct inode *inode)
1971 { 2002 {
1972 struct ocfs2_inode_info *oi = OCFS2_I(inode); 2003 struct ocfs2_inode_info *oi = OCFS2_I(inode);
1973 struct ocfs2_lock_res *lockres = &oi->ip_inode_lockres; 2004 struct ocfs2_lock_res *lockres = &oi->ip_inode_lockres;
1974 struct ocfs2_meta_lvb *lvb; 2005 struct ocfs2_meta_lvb *lvb;
1975 2006
1976 mlog_entry_void(); 2007 mlog_entry_void();
1977 2008
1978 lvb = ocfs2_dlm_lvb(&lockres->l_lksb); 2009 lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
1979 2010
1980 /* 2011 /*
1981 * Invalidate the LVB of a deleted inode - this way other 2012 * Invalidate the LVB of a deleted inode - this way other
1982 * nodes are forced to go to disk and discover the new inode 2013 * nodes are forced to go to disk and discover the new inode
1983 * status. 2014 * status.
1984 */ 2015 */
1985 if (oi->ip_flags & OCFS2_INODE_DELETED) { 2016 if (oi->ip_flags & OCFS2_INODE_DELETED) {
1986 lvb->lvb_version = 0; 2017 lvb->lvb_version = 0;
1987 goto out; 2018 goto out;
1988 } 2019 }
1989 2020
1990 lvb->lvb_version = OCFS2_LVB_VERSION; 2021 lvb->lvb_version = OCFS2_LVB_VERSION;
1991 lvb->lvb_isize = cpu_to_be64(i_size_read(inode)); 2022 lvb->lvb_isize = cpu_to_be64(i_size_read(inode));
1992 lvb->lvb_iclusters = cpu_to_be32(oi->ip_clusters); 2023 lvb->lvb_iclusters = cpu_to_be32(oi->ip_clusters);
1993 lvb->lvb_iuid = cpu_to_be32(inode->i_uid); 2024 lvb->lvb_iuid = cpu_to_be32(inode->i_uid);
1994 lvb->lvb_igid = cpu_to_be32(inode->i_gid); 2025 lvb->lvb_igid = cpu_to_be32(inode->i_gid);
1995 lvb->lvb_imode = cpu_to_be16(inode->i_mode); 2026 lvb->lvb_imode = cpu_to_be16(inode->i_mode);
1996 lvb->lvb_inlink = cpu_to_be16(inode->i_nlink); 2027 lvb->lvb_inlink = cpu_to_be16(inode->i_nlink);
1997 lvb->lvb_iatime_packed = 2028 lvb->lvb_iatime_packed =
1998 cpu_to_be64(ocfs2_pack_timespec(&inode->i_atime)); 2029 cpu_to_be64(ocfs2_pack_timespec(&inode->i_atime));
1999 lvb->lvb_ictime_packed = 2030 lvb->lvb_ictime_packed =
2000 cpu_to_be64(ocfs2_pack_timespec(&inode->i_ctime)); 2031 cpu_to_be64(ocfs2_pack_timespec(&inode->i_ctime));
2001 lvb->lvb_imtime_packed = 2032 lvb->lvb_imtime_packed =
2002 cpu_to_be64(ocfs2_pack_timespec(&inode->i_mtime)); 2033 cpu_to_be64(ocfs2_pack_timespec(&inode->i_mtime));
2003 lvb->lvb_iattr = cpu_to_be32(oi->ip_attr); 2034 lvb->lvb_iattr = cpu_to_be32(oi->ip_attr);
2004 lvb->lvb_idynfeatures = cpu_to_be16(oi->ip_dyn_features); 2035 lvb->lvb_idynfeatures = cpu_to_be16(oi->ip_dyn_features);
2005 lvb->lvb_igeneration = cpu_to_be32(inode->i_generation); 2036 lvb->lvb_igeneration = cpu_to_be32(inode->i_generation);
2006 2037
2007 out: 2038 out:
2008 mlog_meta_lvb(0, lockres); 2039 mlog_meta_lvb(0, lockres);
2009 2040
2010 mlog_exit_void(); 2041 mlog_exit_void();
2011 } 2042 }
2012 2043
2013 static void ocfs2_unpack_timespec(struct timespec *spec, 2044 static void ocfs2_unpack_timespec(struct timespec *spec,
2014 u64 packed_time) 2045 u64 packed_time)
2015 { 2046 {
2016 spec->tv_sec = packed_time >> OCFS2_SEC_SHIFT; 2047 spec->tv_sec = packed_time >> OCFS2_SEC_SHIFT;
2017 spec->tv_nsec = packed_time & OCFS2_NSEC_MASK; 2048 spec->tv_nsec = packed_time & OCFS2_NSEC_MASK;
2018 } 2049 }
2019 2050
2020 static void ocfs2_refresh_inode_from_lvb(struct inode *inode) 2051 static void ocfs2_refresh_inode_from_lvb(struct inode *inode)
2021 { 2052 {
2022 struct ocfs2_inode_info *oi = OCFS2_I(inode); 2053 struct ocfs2_inode_info *oi = OCFS2_I(inode);
2023 struct ocfs2_lock_res *lockres = &oi->ip_inode_lockres; 2054 struct ocfs2_lock_res *lockres = &oi->ip_inode_lockres;
2024 struct ocfs2_meta_lvb *lvb; 2055 struct ocfs2_meta_lvb *lvb;
2025 2056
2026 mlog_entry_void(); 2057 mlog_entry_void();
2027 2058
2028 mlog_meta_lvb(0, lockres); 2059 mlog_meta_lvb(0, lockres);
2029 2060
2030 lvb = ocfs2_dlm_lvb(&lockres->l_lksb); 2061 lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
2031 2062
2032 /* We're safe here without the lockres lock... */ 2063 /* We're safe here without the lockres lock... */
2033 spin_lock(&oi->ip_lock); 2064 spin_lock(&oi->ip_lock);
2034 oi->ip_clusters = be32_to_cpu(lvb->lvb_iclusters); 2065 oi->ip_clusters = be32_to_cpu(lvb->lvb_iclusters);
2035 i_size_write(inode, be64_to_cpu(lvb->lvb_isize)); 2066 i_size_write(inode, be64_to_cpu(lvb->lvb_isize));
2036 2067
2037 oi->ip_attr = be32_to_cpu(lvb->lvb_iattr); 2068 oi->ip_attr = be32_to_cpu(lvb->lvb_iattr);
2038 oi->ip_dyn_features = be16_to_cpu(lvb->lvb_idynfeatures); 2069 oi->ip_dyn_features = be16_to_cpu(lvb->lvb_idynfeatures);
2039 ocfs2_set_inode_flags(inode); 2070 ocfs2_set_inode_flags(inode);
2040 2071
2041 /* fast-symlinks are a special case */ 2072 /* fast-symlinks are a special case */
2042 if (S_ISLNK(inode->i_mode) && !oi->ip_clusters) 2073 if (S_ISLNK(inode->i_mode) && !oi->ip_clusters)
2043 inode->i_blocks = 0; 2074 inode->i_blocks = 0;
2044 else 2075 else
2045 inode->i_blocks = ocfs2_inode_sector_count(inode); 2076 inode->i_blocks = ocfs2_inode_sector_count(inode);
2046 2077
2047 inode->i_uid = be32_to_cpu(lvb->lvb_iuid); 2078 inode->i_uid = be32_to_cpu(lvb->lvb_iuid);
2048 inode->i_gid = be32_to_cpu(lvb->lvb_igid); 2079 inode->i_gid = be32_to_cpu(lvb->lvb_igid);
2049 inode->i_mode = be16_to_cpu(lvb->lvb_imode); 2080 inode->i_mode = be16_to_cpu(lvb->lvb_imode);
2050 inode->i_nlink = be16_to_cpu(lvb->lvb_inlink); 2081 inode->i_nlink = be16_to_cpu(lvb->lvb_inlink);
2051 ocfs2_unpack_timespec(&inode->i_atime, 2082 ocfs2_unpack_timespec(&inode->i_atime,
2052 be64_to_cpu(lvb->lvb_iatime_packed)); 2083 be64_to_cpu(lvb->lvb_iatime_packed));
2053 ocfs2_unpack_timespec(&inode->i_mtime, 2084 ocfs2_unpack_timespec(&inode->i_mtime,
2054 be64_to_cpu(lvb->lvb_imtime_packed)); 2085 be64_to_cpu(lvb->lvb_imtime_packed));
2055 ocfs2_unpack_timespec(&inode->i_ctime, 2086 ocfs2_unpack_timespec(&inode->i_ctime,
2056 be64_to_cpu(lvb->lvb_ictime_packed)); 2087 be64_to_cpu(lvb->lvb_ictime_packed));
2057 spin_unlock(&oi->ip_lock); 2088 spin_unlock(&oi->ip_lock);
2058 2089
2059 mlog_exit_void(); 2090 mlog_exit_void();
2060 } 2091 }
2061 2092
2062 static inline int ocfs2_meta_lvb_is_trustable(struct inode *inode, 2093 static inline int ocfs2_meta_lvb_is_trustable(struct inode *inode,
2063 struct ocfs2_lock_res *lockres) 2094 struct ocfs2_lock_res *lockres)
2064 { 2095 {
2065 struct ocfs2_meta_lvb *lvb = ocfs2_dlm_lvb(&lockres->l_lksb); 2096 struct ocfs2_meta_lvb *lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
2066 2097
2067 if (ocfs2_dlm_lvb_valid(&lockres->l_lksb) 2098 if (ocfs2_dlm_lvb_valid(&lockres->l_lksb)
2068 && lvb->lvb_version == OCFS2_LVB_VERSION 2099 && lvb->lvb_version == OCFS2_LVB_VERSION
2069 && be32_to_cpu(lvb->lvb_igeneration) == inode->i_generation) 2100 && be32_to_cpu(lvb->lvb_igeneration) == inode->i_generation)
2070 return 1; 2101 return 1;
2071 return 0; 2102 return 0;
2072 } 2103 }
2073 2104
2074 /* Determine whether a lock resource needs to be refreshed, and 2105 /* Determine whether a lock resource needs to be refreshed, and
2075 * arbitrate who gets to refresh it. 2106 * arbitrate who gets to refresh it.
2076 * 2107 *
2077 * 0 means no refresh needed. 2108 * 0 means no refresh needed.
2078 * 2109 *
2079 * > 0 means you need to refresh this and you MUST call 2110 * > 0 means you need to refresh this and you MUST call
2080 * ocfs2_complete_lock_res_refresh afterwards. */ 2111 * ocfs2_complete_lock_res_refresh afterwards. */
2081 static int ocfs2_should_refresh_lock_res(struct ocfs2_lock_res *lockres) 2112 static int ocfs2_should_refresh_lock_res(struct ocfs2_lock_res *lockres)
2082 { 2113 {
2083 unsigned long flags; 2114 unsigned long flags;
2084 int status = 0; 2115 int status = 0;
2085 2116
2086 mlog_entry_void(); 2117 mlog_entry_void();
2087 2118
2088 refresh_check: 2119 refresh_check:
2089 spin_lock_irqsave(&lockres->l_lock, flags); 2120 spin_lock_irqsave(&lockres->l_lock, flags);
2090 if (!(lockres->l_flags & OCFS2_LOCK_NEEDS_REFRESH)) { 2121 if (!(lockres->l_flags & OCFS2_LOCK_NEEDS_REFRESH)) {
2091 spin_unlock_irqrestore(&lockres->l_lock, flags); 2122 spin_unlock_irqrestore(&lockres->l_lock, flags);
2092 goto bail; 2123 goto bail;
2093 } 2124 }
2094 2125
2095 if (lockres->l_flags & OCFS2_LOCK_REFRESHING) { 2126 if (lockres->l_flags & OCFS2_LOCK_REFRESHING) {
2096 spin_unlock_irqrestore(&lockres->l_lock, flags); 2127 spin_unlock_irqrestore(&lockres->l_lock, flags);
2097 2128
2098 ocfs2_wait_on_refreshing_lock(lockres); 2129 ocfs2_wait_on_refreshing_lock(lockres);
2099 goto refresh_check; 2130 goto refresh_check;
2100 } 2131 }
2101 2132
2102 /* Ok, I'll be the one to refresh this lock. */ 2133 /* Ok, I'll be the one to refresh this lock. */
2103 lockres_or_flags(lockres, OCFS2_LOCK_REFRESHING); 2134 lockres_or_flags(lockres, OCFS2_LOCK_REFRESHING);
2104 spin_unlock_irqrestore(&lockres->l_lock, flags); 2135 spin_unlock_irqrestore(&lockres->l_lock, flags);
2105 2136
2106 status = 1; 2137 status = 1;
2107 bail: 2138 bail:
2108 mlog_exit(status); 2139 mlog_exit(status);
2109 return status; 2140 return status;
2110 } 2141 }
2111 2142
2112 /* If status is non zero, I'll mark it as not being in refresh 2143 /* If status is non zero, I'll mark it as not being in refresh
2113 * anymroe, but i won't clear the needs refresh flag. */ 2144 * anymroe, but i won't clear the needs refresh flag. */
2114 static inline void ocfs2_complete_lock_res_refresh(struct ocfs2_lock_res *lockres, 2145 static inline void ocfs2_complete_lock_res_refresh(struct ocfs2_lock_res *lockres,
2115 int status) 2146 int status)
2116 { 2147 {
2117 unsigned long flags; 2148 unsigned long flags;
2118 mlog_entry_void(); 2149 mlog_entry_void();
2119 2150
2120 spin_lock_irqsave(&lockres->l_lock, flags); 2151 spin_lock_irqsave(&lockres->l_lock, flags);
2121 lockres_clear_flags(lockres, OCFS2_LOCK_REFRESHING); 2152 lockres_clear_flags(lockres, OCFS2_LOCK_REFRESHING);
2122 if (!status) 2153 if (!status)
2123 lockres_clear_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH); 2154 lockres_clear_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH);
2124 spin_unlock_irqrestore(&lockres->l_lock, flags); 2155 spin_unlock_irqrestore(&lockres->l_lock, flags);
2125 2156
2126 wake_up(&lockres->l_event); 2157 wake_up(&lockres->l_event);
2127 2158
2128 mlog_exit_void(); 2159 mlog_exit_void();
2129 } 2160 }
2130 2161
2131 /* may or may not return a bh if it went to disk. */ 2162 /* may or may not return a bh if it went to disk. */
2132 static int ocfs2_inode_lock_update(struct inode *inode, 2163 static int ocfs2_inode_lock_update(struct inode *inode,
2133 struct buffer_head **bh) 2164 struct buffer_head **bh)
2134 { 2165 {
2135 int status = 0; 2166 int status = 0;
2136 struct ocfs2_inode_info *oi = OCFS2_I(inode); 2167 struct ocfs2_inode_info *oi = OCFS2_I(inode);
2137 struct ocfs2_lock_res *lockres = &oi->ip_inode_lockres; 2168 struct ocfs2_lock_res *lockres = &oi->ip_inode_lockres;
2138 struct ocfs2_dinode *fe; 2169 struct ocfs2_dinode *fe;
2139 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 2170 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
2140 2171
2141 mlog_entry_void(); 2172 mlog_entry_void();
2142 2173
2143 if (ocfs2_mount_local(osb)) 2174 if (ocfs2_mount_local(osb))
2144 goto bail; 2175 goto bail;
2145 2176
2146 spin_lock(&oi->ip_lock); 2177 spin_lock(&oi->ip_lock);
2147 if (oi->ip_flags & OCFS2_INODE_DELETED) { 2178 if (oi->ip_flags & OCFS2_INODE_DELETED) {
2148 mlog(0, "Orphaned inode %llu was deleted while we " 2179 mlog(0, "Orphaned inode %llu was deleted while we "
2149 "were waiting on a lock. ip_flags = 0x%x\n", 2180 "were waiting on a lock. ip_flags = 0x%x\n",
2150 (unsigned long long)oi->ip_blkno, oi->ip_flags); 2181 (unsigned long long)oi->ip_blkno, oi->ip_flags);
2151 spin_unlock(&oi->ip_lock); 2182 spin_unlock(&oi->ip_lock);
2152 status = -ENOENT; 2183 status = -ENOENT;
2153 goto bail; 2184 goto bail;
2154 } 2185 }
2155 spin_unlock(&oi->ip_lock); 2186 spin_unlock(&oi->ip_lock);
2156 2187
2157 if (!ocfs2_should_refresh_lock_res(lockres)) 2188 if (!ocfs2_should_refresh_lock_res(lockres))
2158 goto bail; 2189 goto bail;
2159 2190
2160 /* This will discard any caching information we might have had 2191 /* This will discard any caching information we might have had
2161 * for the inode metadata. */ 2192 * for the inode metadata. */
2162 ocfs2_metadata_cache_purge(INODE_CACHE(inode)); 2193 ocfs2_metadata_cache_purge(INODE_CACHE(inode));
2163 2194
2164 ocfs2_extent_map_trunc(inode, 0); 2195 ocfs2_extent_map_trunc(inode, 0);
2165 2196
2166 if (ocfs2_meta_lvb_is_trustable(inode, lockres)) { 2197 if (ocfs2_meta_lvb_is_trustable(inode, lockres)) {
2167 mlog(0, "Trusting LVB on inode %llu\n", 2198 mlog(0, "Trusting LVB on inode %llu\n",
2168 (unsigned long long)oi->ip_blkno); 2199 (unsigned long long)oi->ip_blkno);
2169 ocfs2_refresh_inode_from_lvb(inode); 2200 ocfs2_refresh_inode_from_lvb(inode);
2170 } else { 2201 } else {
2171 /* Boo, we have to go to disk. */ 2202 /* Boo, we have to go to disk. */
2172 /* read bh, cast, ocfs2_refresh_inode */ 2203 /* read bh, cast, ocfs2_refresh_inode */
2173 status = ocfs2_read_inode_block(inode, bh); 2204 status = ocfs2_read_inode_block(inode, bh);
2174 if (status < 0) { 2205 if (status < 0) {
2175 mlog_errno(status); 2206 mlog_errno(status);
2176 goto bail_refresh; 2207 goto bail_refresh;
2177 } 2208 }
2178 fe = (struct ocfs2_dinode *) (*bh)->b_data; 2209 fe = (struct ocfs2_dinode *) (*bh)->b_data;
2179 2210
2180 /* This is a good chance to make sure we're not 2211 /* This is a good chance to make sure we're not
2181 * locking an invalid object. ocfs2_read_inode_block() 2212 * locking an invalid object. ocfs2_read_inode_block()
2182 * already checked that the inode block is sane. 2213 * already checked that the inode block is sane.
2183 * 2214 *
2184 * We bug on a stale inode here because we checked 2215 * We bug on a stale inode here because we checked
2185 * above whether it was wiped from disk. The wiping 2216 * above whether it was wiped from disk. The wiping
2186 * node provides a guarantee that we receive that 2217 * node provides a guarantee that we receive that
2187 * message and can mark the inode before dropping any 2218 * message and can mark the inode before dropping any
2188 * locks associated with it. */ 2219 * locks associated with it. */
2189 mlog_bug_on_msg(inode->i_generation != 2220 mlog_bug_on_msg(inode->i_generation !=
2190 le32_to_cpu(fe->i_generation), 2221 le32_to_cpu(fe->i_generation),
2191 "Invalid dinode %llu disk generation: %u " 2222 "Invalid dinode %llu disk generation: %u "
2192 "inode->i_generation: %u\n", 2223 "inode->i_generation: %u\n",
2193 (unsigned long long)oi->ip_blkno, 2224 (unsigned long long)oi->ip_blkno,
2194 le32_to_cpu(fe->i_generation), 2225 le32_to_cpu(fe->i_generation),
2195 inode->i_generation); 2226 inode->i_generation);
2196 mlog_bug_on_msg(le64_to_cpu(fe->i_dtime) || 2227 mlog_bug_on_msg(le64_to_cpu(fe->i_dtime) ||
2197 !(fe->i_flags & cpu_to_le32(OCFS2_VALID_FL)), 2228 !(fe->i_flags & cpu_to_le32(OCFS2_VALID_FL)),
2198 "Stale dinode %llu dtime: %llu flags: 0x%x\n", 2229 "Stale dinode %llu dtime: %llu flags: 0x%x\n",
2199 (unsigned long long)oi->ip_blkno, 2230 (unsigned long long)oi->ip_blkno,
2200 (unsigned long long)le64_to_cpu(fe->i_dtime), 2231 (unsigned long long)le64_to_cpu(fe->i_dtime),
2201 le32_to_cpu(fe->i_flags)); 2232 le32_to_cpu(fe->i_flags));
2202 2233
2203 ocfs2_refresh_inode(inode, fe); 2234 ocfs2_refresh_inode(inode, fe);
2204 ocfs2_track_lock_refresh(lockres); 2235 ocfs2_track_lock_refresh(lockres);
2205 } 2236 }
2206 2237
2207 status = 0; 2238 status = 0;
2208 bail_refresh: 2239 bail_refresh:
2209 ocfs2_complete_lock_res_refresh(lockres, status); 2240 ocfs2_complete_lock_res_refresh(lockres, status);
2210 bail: 2241 bail:
2211 mlog_exit(status); 2242 mlog_exit(status);
2212 return status; 2243 return status;
2213 } 2244 }
2214 2245
2215 static int ocfs2_assign_bh(struct inode *inode, 2246 static int ocfs2_assign_bh(struct inode *inode,
2216 struct buffer_head **ret_bh, 2247 struct buffer_head **ret_bh,
2217 struct buffer_head *passed_bh) 2248 struct buffer_head *passed_bh)
2218 { 2249 {
2219 int status; 2250 int status;
2220 2251
2221 if (passed_bh) { 2252 if (passed_bh) {
2222 /* Ok, the update went to disk for us, use the 2253 /* Ok, the update went to disk for us, use the
2223 * returned bh. */ 2254 * returned bh. */
2224 *ret_bh = passed_bh; 2255 *ret_bh = passed_bh;
2225 get_bh(*ret_bh); 2256 get_bh(*ret_bh);
2226 2257
2227 return 0; 2258 return 0;
2228 } 2259 }
2229 2260
2230 status = ocfs2_read_inode_block(inode, ret_bh); 2261 status = ocfs2_read_inode_block(inode, ret_bh);
2231 if (status < 0) 2262 if (status < 0)
2232 mlog_errno(status); 2263 mlog_errno(status);
2233 2264
2234 return status; 2265 return status;
2235 } 2266 }
2236 2267
2237 /* 2268 /*
2238 * returns < 0 error if the callback will never be called, otherwise 2269 * returns < 0 error if the callback will never be called, otherwise
2239 * the result of the lock will be communicated via the callback. 2270 * the result of the lock will be communicated via the callback.
2240 */ 2271 */
2241 int ocfs2_inode_lock_full_nested(struct inode *inode, 2272 int ocfs2_inode_lock_full_nested(struct inode *inode,
2242 struct buffer_head **ret_bh, 2273 struct buffer_head **ret_bh,
2243 int ex, 2274 int ex,
2244 int arg_flags, 2275 int arg_flags,
2245 int subclass) 2276 int subclass)
2246 { 2277 {
2247 int status, level, acquired; 2278 int status, level, acquired;
2248 u32 dlm_flags; 2279 u32 dlm_flags;
2249 struct ocfs2_lock_res *lockres = NULL; 2280 struct ocfs2_lock_res *lockres = NULL;
2250 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 2281 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
2251 struct buffer_head *local_bh = NULL; 2282 struct buffer_head *local_bh = NULL;
2252 2283
2253 BUG_ON(!inode); 2284 BUG_ON(!inode);
2254 2285
2255 mlog_entry_void(); 2286 mlog_entry_void();
2256 2287
2257 mlog(0, "inode %llu, take %s META lock\n", 2288 mlog(0, "inode %llu, take %s META lock\n",
2258 (unsigned long long)OCFS2_I(inode)->ip_blkno, 2289 (unsigned long long)OCFS2_I(inode)->ip_blkno,
2259 ex ? "EXMODE" : "PRMODE"); 2290 ex ? "EXMODE" : "PRMODE");
2260 2291
2261 status = 0; 2292 status = 0;
2262 acquired = 0; 2293 acquired = 0;
2263 /* We'll allow faking a readonly metadata lock for 2294 /* We'll allow faking a readonly metadata lock for
2264 * rodevices. */ 2295 * rodevices. */
2265 if (ocfs2_is_hard_readonly(osb)) { 2296 if (ocfs2_is_hard_readonly(osb)) {
2266 if (ex) 2297 if (ex)
2267 status = -EROFS; 2298 status = -EROFS;
2268 goto bail; 2299 goto bail;
2269 } 2300 }
2270 2301
2271 if (ocfs2_mount_local(osb)) 2302 if (ocfs2_mount_local(osb))
2272 goto local; 2303 goto local;
2273 2304
2274 if (!(arg_flags & OCFS2_META_LOCK_RECOVERY)) 2305 if (!(arg_flags & OCFS2_META_LOCK_RECOVERY))
2275 ocfs2_wait_for_recovery(osb); 2306 ocfs2_wait_for_recovery(osb);
2276 2307
2277 lockres = &OCFS2_I(inode)->ip_inode_lockres; 2308 lockres = &OCFS2_I(inode)->ip_inode_lockres;
2278 level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; 2309 level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
2279 dlm_flags = 0; 2310 dlm_flags = 0;
2280 if (arg_flags & OCFS2_META_LOCK_NOQUEUE) 2311 if (arg_flags & OCFS2_META_LOCK_NOQUEUE)
2281 dlm_flags |= DLM_LKF_NOQUEUE; 2312 dlm_flags |= DLM_LKF_NOQUEUE;
2282 2313
2283 status = __ocfs2_cluster_lock(osb, lockres, level, dlm_flags, 2314 status = __ocfs2_cluster_lock(osb, lockres, level, dlm_flags,
2284 arg_flags, subclass, _RET_IP_); 2315 arg_flags, subclass, _RET_IP_);
2285 if (status < 0) { 2316 if (status < 0) {
2286 if (status != -EAGAIN && status != -EIOCBRETRY) 2317 if (status != -EAGAIN && status != -EIOCBRETRY)
2287 mlog_errno(status); 2318 mlog_errno(status);
2288 goto bail; 2319 goto bail;
2289 } 2320 }
2290 2321
2291 /* Notify the error cleanup path to drop the cluster lock. */ 2322 /* Notify the error cleanup path to drop the cluster lock. */
2292 acquired = 1; 2323 acquired = 1;
2293 2324
2294 /* We wait twice because a node may have died while we were in 2325 /* We wait twice because a node may have died while we were in
2295 * the lower dlm layers. The second time though, we've 2326 * the lower dlm layers. The second time though, we've
2296 * committed to owning this lock so we don't allow signals to 2327 * committed to owning this lock so we don't allow signals to
2297 * abort the operation. */ 2328 * abort the operation. */
2298 if (!(arg_flags & OCFS2_META_LOCK_RECOVERY)) 2329 if (!(arg_flags & OCFS2_META_LOCK_RECOVERY))
2299 ocfs2_wait_for_recovery(osb); 2330 ocfs2_wait_for_recovery(osb);
2300 2331
2301 local: 2332 local:
2302 /* 2333 /*
2303 * We only see this flag if we're being called from 2334 * We only see this flag if we're being called from
2304 * ocfs2_read_locked_inode(). It means we're locking an inode 2335 * ocfs2_read_locked_inode(). It means we're locking an inode
2305 * which hasn't been populated yet, so clear the refresh flag 2336 * which hasn't been populated yet, so clear the refresh flag
2306 * and let the caller handle it. 2337 * and let the caller handle it.
2307 */ 2338 */
2308 if (inode->i_state & I_NEW) { 2339 if (inode->i_state & I_NEW) {
2309 status = 0; 2340 status = 0;
2310 if (lockres) 2341 if (lockres)
2311 ocfs2_complete_lock_res_refresh(lockres, 0); 2342 ocfs2_complete_lock_res_refresh(lockres, 0);
2312 goto bail; 2343 goto bail;
2313 } 2344 }
2314 2345
2315 /* This is fun. The caller may want a bh back, or it may 2346 /* This is fun. The caller may want a bh back, or it may
2316 * not. ocfs2_inode_lock_update definitely wants one in, but 2347 * not. ocfs2_inode_lock_update definitely wants one in, but
2317 * may or may not read one, depending on what's in the 2348 * may or may not read one, depending on what's in the
2318 * LVB. The result of all of this is that we've *only* gone to 2349 * LVB. The result of all of this is that we've *only* gone to
2319 * disk if we have to, so the complexity is worthwhile. */ 2350 * disk if we have to, so the complexity is worthwhile. */
2320 status = ocfs2_inode_lock_update(inode, &local_bh); 2351 status = ocfs2_inode_lock_update(inode, &local_bh);
2321 if (status < 0) { 2352 if (status < 0) {
2322 if (status != -ENOENT) 2353 if (status != -ENOENT)
2323 mlog_errno(status); 2354 mlog_errno(status);
2324 goto bail; 2355 goto bail;
2325 } 2356 }
2326 2357
2327 if (ret_bh) { 2358 if (ret_bh) {
2328 status = ocfs2_assign_bh(inode, ret_bh, local_bh); 2359 status = ocfs2_assign_bh(inode, ret_bh, local_bh);
2329 if (status < 0) { 2360 if (status < 0) {
2330 mlog_errno(status); 2361 mlog_errno(status);
2331 goto bail; 2362 goto bail;
2332 } 2363 }
2333 } 2364 }
2334 2365
2335 bail: 2366 bail:
2336 if (status < 0) { 2367 if (status < 0) {
2337 if (ret_bh && (*ret_bh)) { 2368 if (ret_bh && (*ret_bh)) {
2338 brelse(*ret_bh); 2369 brelse(*ret_bh);
2339 *ret_bh = NULL; 2370 *ret_bh = NULL;
2340 } 2371 }
2341 if (acquired) 2372 if (acquired)
2342 ocfs2_inode_unlock(inode, ex); 2373 ocfs2_inode_unlock(inode, ex);
2343 } 2374 }
2344 2375
2345 if (local_bh) 2376 if (local_bh)
2346 brelse(local_bh); 2377 brelse(local_bh);
2347 2378
2348 mlog_exit(status); 2379 mlog_exit(status);
2349 return status; 2380 return status;
2350 } 2381 }
2351 2382
2352 /* 2383 /*
2353 * This is working around a lock inversion between tasks acquiring DLM 2384 * This is working around a lock inversion between tasks acquiring DLM
2354 * locks while holding a page lock and the downconvert thread which 2385 * locks while holding a page lock and the downconvert thread which
2355 * blocks dlm lock acquiry while acquiring page locks. 2386 * blocks dlm lock acquiry while acquiring page locks.
2356 * 2387 *
2357 * ** These _with_page variantes are only intended to be called from aop 2388 * ** These _with_page variantes are only intended to be called from aop
2358 * methods that hold page locks and return a very specific *positive* error 2389 * methods that hold page locks and return a very specific *positive* error
2359 * code that aop methods pass up to the VFS -- test for errors with != 0. ** 2390 * code that aop methods pass up to the VFS -- test for errors with != 0. **
2360 * 2391 *
2361 * The DLM is called such that it returns -EAGAIN if it would have 2392 * The DLM is called such that it returns -EAGAIN if it would have
2362 * blocked waiting for the downconvert thread. In that case we unlock 2393 * blocked waiting for the downconvert thread. In that case we unlock
2363 * our page so the downconvert thread can make progress. Once we've 2394 * our page so the downconvert thread can make progress. Once we've
2364 * done this we have to return AOP_TRUNCATED_PAGE so the aop method 2395 * done this we have to return AOP_TRUNCATED_PAGE so the aop method
2365 * that called us can bubble that back up into the VFS who will then 2396 * that called us can bubble that back up into the VFS who will then
2366 * immediately retry the aop call. 2397 * immediately retry the aop call.
2367 * 2398 *
2368 * We do a blocking lock and immediate unlock before returning, though, so that 2399 * We do a blocking lock and immediate unlock before returning, though, so that
2369 * the lock has a great chance of being cached on this node by the time the VFS 2400 * the lock has a great chance of being cached on this node by the time the VFS
2370 * calls back to retry the aop. This has a potential to livelock as nodes 2401 * calls back to retry the aop. This has a potential to livelock as nodes
2371 * ping locks back and forth, but that's a risk we're willing to take to avoid 2402 * ping locks back and forth, but that's a risk we're willing to take to avoid
2372 * the lock inversion simply. 2403 * the lock inversion simply.
2373 */ 2404 */
2374 int ocfs2_inode_lock_with_page(struct inode *inode, 2405 int ocfs2_inode_lock_with_page(struct inode *inode,
2375 struct buffer_head **ret_bh, 2406 struct buffer_head **ret_bh,
2376 int ex, 2407 int ex,
2377 struct page *page) 2408 struct page *page)
2378 { 2409 {
2379 int ret; 2410 int ret;
2380 2411
2381 ret = ocfs2_inode_lock_full(inode, ret_bh, ex, OCFS2_LOCK_NONBLOCK); 2412 ret = ocfs2_inode_lock_full(inode, ret_bh, ex, OCFS2_LOCK_NONBLOCK);
2382 if (ret == -EAGAIN) { 2413 if (ret == -EAGAIN) {
2383 unlock_page(page); 2414 unlock_page(page);
2384 if (ocfs2_inode_lock(inode, ret_bh, ex) == 0) 2415 if (ocfs2_inode_lock(inode, ret_bh, ex) == 0)
2385 ocfs2_inode_unlock(inode, ex); 2416 ocfs2_inode_unlock(inode, ex);
2386 ret = AOP_TRUNCATED_PAGE; 2417 ret = AOP_TRUNCATED_PAGE;
2387 } 2418 }
2388 2419
2389 return ret; 2420 return ret;
2390 } 2421 }
2391 2422
2392 int ocfs2_inode_lock_atime(struct inode *inode, 2423 int ocfs2_inode_lock_atime(struct inode *inode,
2393 struct vfsmount *vfsmnt, 2424 struct vfsmount *vfsmnt,
2394 int *level) 2425 int *level)
2395 { 2426 {
2396 int ret; 2427 int ret;
2397 2428
2398 mlog_entry_void(); 2429 mlog_entry_void();
2399 ret = ocfs2_inode_lock(inode, NULL, 0); 2430 ret = ocfs2_inode_lock(inode, NULL, 0);
2400 if (ret < 0) { 2431 if (ret < 0) {
2401 mlog_errno(ret); 2432 mlog_errno(ret);
2402 return ret; 2433 return ret;
2403 } 2434 }
2404 2435
2405 /* 2436 /*
2406 * If we should update atime, we will get EX lock, 2437 * If we should update atime, we will get EX lock,
2407 * otherwise we just get PR lock. 2438 * otherwise we just get PR lock.
2408 */ 2439 */
2409 if (ocfs2_should_update_atime(inode, vfsmnt)) { 2440 if (ocfs2_should_update_atime(inode, vfsmnt)) {
2410 struct buffer_head *bh = NULL; 2441 struct buffer_head *bh = NULL;
2411 2442
2412 ocfs2_inode_unlock(inode, 0); 2443 ocfs2_inode_unlock(inode, 0);
2413 ret = ocfs2_inode_lock(inode, &bh, 1); 2444 ret = ocfs2_inode_lock(inode, &bh, 1);
2414 if (ret < 0) { 2445 if (ret < 0) {
2415 mlog_errno(ret); 2446 mlog_errno(ret);
2416 return ret; 2447 return ret;
2417 } 2448 }
2418 *level = 1; 2449 *level = 1;
2419 if (ocfs2_should_update_atime(inode, vfsmnt)) 2450 if (ocfs2_should_update_atime(inode, vfsmnt))
2420 ocfs2_update_inode_atime(inode, bh); 2451 ocfs2_update_inode_atime(inode, bh);
2421 if (bh) 2452 if (bh)
2422 brelse(bh); 2453 brelse(bh);
2423 } else 2454 } else
2424 *level = 0; 2455 *level = 0;
2425 2456
2426 mlog_exit(ret); 2457 mlog_exit(ret);
2427 return ret; 2458 return ret;
2428 } 2459 }
2429 2460
2430 void ocfs2_inode_unlock(struct inode *inode, 2461 void ocfs2_inode_unlock(struct inode *inode,
2431 int ex) 2462 int ex)
2432 { 2463 {
2433 int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; 2464 int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
2434 struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_inode_lockres; 2465 struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_inode_lockres;
2435 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 2466 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
2436 2467
2437 mlog_entry_void(); 2468 mlog_entry_void();
2438 2469
2439 mlog(0, "inode %llu drop %s META lock\n", 2470 mlog(0, "inode %llu drop %s META lock\n",
2440 (unsigned long long)OCFS2_I(inode)->ip_blkno, 2471 (unsigned long long)OCFS2_I(inode)->ip_blkno,
2441 ex ? "EXMODE" : "PRMODE"); 2472 ex ? "EXMODE" : "PRMODE");
2442 2473
2443 if (!ocfs2_is_hard_readonly(OCFS2_SB(inode->i_sb)) && 2474 if (!ocfs2_is_hard_readonly(OCFS2_SB(inode->i_sb)) &&
2444 !ocfs2_mount_local(osb)) 2475 !ocfs2_mount_local(osb))
2445 ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres, level); 2476 ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres, level);
2446 2477
2447 mlog_exit_void(); 2478 mlog_exit_void();
2448 } 2479 }
2449 2480
2450 int ocfs2_orphan_scan_lock(struct ocfs2_super *osb, u32 *seqno) 2481 int ocfs2_orphan_scan_lock(struct ocfs2_super *osb, u32 *seqno)
2451 { 2482 {
2452 struct ocfs2_lock_res *lockres; 2483 struct ocfs2_lock_res *lockres;
2453 struct ocfs2_orphan_scan_lvb *lvb; 2484 struct ocfs2_orphan_scan_lvb *lvb;
2454 int status = 0; 2485 int status = 0;
2455 2486
2456 if (ocfs2_is_hard_readonly(osb)) 2487 if (ocfs2_is_hard_readonly(osb))
2457 return -EROFS; 2488 return -EROFS;
2458 2489
2459 if (ocfs2_mount_local(osb)) 2490 if (ocfs2_mount_local(osb))
2460 return 0; 2491 return 0;
2461 2492
2462 lockres = &osb->osb_orphan_scan.os_lockres; 2493 lockres = &osb->osb_orphan_scan.os_lockres;
2463 status = ocfs2_cluster_lock(osb, lockres, DLM_LOCK_EX, 0, 0); 2494 status = ocfs2_cluster_lock(osb, lockres, DLM_LOCK_EX, 0, 0);
2464 if (status < 0) 2495 if (status < 0)
2465 return status; 2496 return status;
2466 2497
2467 lvb = ocfs2_dlm_lvb(&lockres->l_lksb); 2498 lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
2468 if (ocfs2_dlm_lvb_valid(&lockres->l_lksb) && 2499 if (ocfs2_dlm_lvb_valid(&lockres->l_lksb) &&
2469 lvb->lvb_version == OCFS2_ORPHAN_LVB_VERSION) 2500 lvb->lvb_version == OCFS2_ORPHAN_LVB_VERSION)
2470 *seqno = be32_to_cpu(lvb->lvb_os_seqno); 2501 *seqno = be32_to_cpu(lvb->lvb_os_seqno);
2471 else 2502 else
2472 *seqno = osb->osb_orphan_scan.os_seqno + 1; 2503 *seqno = osb->osb_orphan_scan.os_seqno + 1;
2473 2504
2474 return status; 2505 return status;
2475 } 2506 }
2476 2507
2477 void ocfs2_orphan_scan_unlock(struct ocfs2_super *osb, u32 seqno) 2508 void ocfs2_orphan_scan_unlock(struct ocfs2_super *osb, u32 seqno)
2478 { 2509 {
2479 struct ocfs2_lock_res *lockres; 2510 struct ocfs2_lock_res *lockres;
2480 struct ocfs2_orphan_scan_lvb *lvb; 2511 struct ocfs2_orphan_scan_lvb *lvb;
2481 2512
2482 if (!ocfs2_is_hard_readonly(osb) && !ocfs2_mount_local(osb)) { 2513 if (!ocfs2_is_hard_readonly(osb) && !ocfs2_mount_local(osb)) {
2483 lockres = &osb->osb_orphan_scan.os_lockres; 2514 lockres = &osb->osb_orphan_scan.os_lockres;
2484 lvb = ocfs2_dlm_lvb(&lockres->l_lksb); 2515 lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
2485 lvb->lvb_version = OCFS2_ORPHAN_LVB_VERSION; 2516 lvb->lvb_version = OCFS2_ORPHAN_LVB_VERSION;
2486 lvb->lvb_os_seqno = cpu_to_be32(seqno); 2517 lvb->lvb_os_seqno = cpu_to_be32(seqno);
2487 ocfs2_cluster_unlock(osb, lockres, DLM_LOCK_EX); 2518 ocfs2_cluster_unlock(osb, lockres, DLM_LOCK_EX);
2488 } 2519 }
2489 } 2520 }
2490 2521
2491 int ocfs2_super_lock(struct ocfs2_super *osb, 2522 int ocfs2_super_lock(struct ocfs2_super *osb,
2492 int ex) 2523 int ex)
2493 { 2524 {
2494 int status = 0; 2525 int status = 0;
2495 int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; 2526 int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
2496 struct ocfs2_lock_res *lockres = &osb->osb_super_lockres; 2527 struct ocfs2_lock_res *lockres = &osb->osb_super_lockres;
2497 2528
2498 mlog_entry_void(); 2529 mlog_entry_void();
2499 2530
2500 if (ocfs2_is_hard_readonly(osb)) 2531 if (ocfs2_is_hard_readonly(osb))
2501 return -EROFS; 2532 return -EROFS;
2502 2533
2503 if (ocfs2_mount_local(osb)) 2534 if (ocfs2_mount_local(osb))
2504 goto bail; 2535 goto bail;
2505 2536
2506 status = ocfs2_cluster_lock(osb, lockres, level, 0, 0); 2537 status = ocfs2_cluster_lock(osb, lockres, level, 0, 0);
2507 if (status < 0) { 2538 if (status < 0) {
2508 mlog_errno(status); 2539 mlog_errno(status);
2509 goto bail; 2540 goto bail;
2510 } 2541 }
2511 2542
2512 /* The super block lock path is really in the best position to 2543 /* The super block lock path is really in the best position to
2513 * know when resources covered by the lock need to be 2544 * know when resources covered by the lock need to be
2514 * refreshed, so we do it here. Of course, making sense of 2545 * refreshed, so we do it here. Of course, making sense of
2515 * everything is up to the caller :) */ 2546 * everything is up to the caller :) */
2516 status = ocfs2_should_refresh_lock_res(lockres); 2547 status = ocfs2_should_refresh_lock_res(lockres);
2517 if (status < 0) { 2548 if (status < 0) {
2518 mlog_errno(status); 2549 mlog_errno(status);
2519 goto bail; 2550 goto bail;
2520 } 2551 }
2521 if (status) { 2552 if (status) {
2522 status = ocfs2_refresh_slot_info(osb); 2553 status = ocfs2_refresh_slot_info(osb);
2523 2554
2524 ocfs2_complete_lock_res_refresh(lockres, status); 2555 ocfs2_complete_lock_res_refresh(lockres, status);
2525 2556
2526 if (status < 0) 2557 if (status < 0)
2527 mlog_errno(status); 2558 mlog_errno(status);
2528 ocfs2_track_lock_refresh(lockres); 2559 ocfs2_track_lock_refresh(lockres);
2529 } 2560 }
2530 bail: 2561 bail:
2531 mlog_exit(status); 2562 mlog_exit(status);
2532 return status; 2563 return status;
2533 } 2564 }
2534 2565
2535 void ocfs2_super_unlock(struct ocfs2_super *osb, 2566 void ocfs2_super_unlock(struct ocfs2_super *osb,
2536 int ex) 2567 int ex)
2537 { 2568 {
2538 int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; 2569 int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
2539 struct ocfs2_lock_res *lockres = &osb->osb_super_lockres; 2570 struct ocfs2_lock_res *lockres = &osb->osb_super_lockres;
2540 2571
2541 if (!ocfs2_mount_local(osb)) 2572 if (!ocfs2_mount_local(osb))
2542 ocfs2_cluster_unlock(osb, lockres, level); 2573 ocfs2_cluster_unlock(osb, lockres, level);
2543 } 2574 }
2544 2575
2545 int ocfs2_rename_lock(struct ocfs2_super *osb) 2576 int ocfs2_rename_lock(struct ocfs2_super *osb)
2546 { 2577 {
2547 int status; 2578 int status;
2548 struct ocfs2_lock_res *lockres = &osb->osb_rename_lockres; 2579 struct ocfs2_lock_res *lockres = &osb->osb_rename_lockres;
2549 2580
2550 if (ocfs2_is_hard_readonly(osb)) 2581 if (ocfs2_is_hard_readonly(osb))
2551 return -EROFS; 2582 return -EROFS;
2552 2583
2553 if (ocfs2_mount_local(osb)) 2584 if (ocfs2_mount_local(osb))
2554 return 0; 2585 return 0;
2555 2586
2556 status = ocfs2_cluster_lock(osb, lockres, DLM_LOCK_EX, 0, 0); 2587 status = ocfs2_cluster_lock(osb, lockres, DLM_LOCK_EX, 0, 0);
2557 if (status < 0) 2588 if (status < 0)
2558 mlog_errno(status); 2589 mlog_errno(status);
2559 2590
2560 return status; 2591 return status;
2561 } 2592 }
2562 2593
2563 void ocfs2_rename_unlock(struct ocfs2_super *osb) 2594 void ocfs2_rename_unlock(struct ocfs2_super *osb)
2564 { 2595 {
2565 struct ocfs2_lock_res *lockres = &osb->osb_rename_lockres; 2596 struct ocfs2_lock_res *lockres = &osb->osb_rename_lockres;
2566 2597
2567 if (!ocfs2_mount_local(osb)) 2598 if (!ocfs2_mount_local(osb))
2568 ocfs2_cluster_unlock(osb, lockres, DLM_LOCK_EX); 2599 ocfs2_cluster_unlock(osb, lockres, DLM_LOCK_EX);
2569 } 2600 }
2570 2601
2571 int ocfs2_nfs_sync_lock(struct ocfs2_super *osb, int ex) 2602 int ocfs2_nfs_sync_lock(struct ocfs2_super *osb, int ex)
2572 { 2603 {
2573 int status; 2604 int status;
2574 struct ocfs2_lock_res *lockres = &osb->osb_nfs_sync_lockres; 2605 struct ocfs2_lock_res *lockres = &osb->osb_nfs_sync_lockres;
2575 2606
2576 if (ocfs2_is_hard_readonly(osb)) 2607 if (ocfs2_is_hard_readonly(osb))
2577 return -EROFS; 2608 return -EROFS;
2578 2609
2579 if (ocfs2_mount_local(osb)) 2610 if (ocfs2_mount_local(osb))
2580 return 0; 2611 return 0;
2581 2612
2582 status = ocfs2_cluster_lock(osb, lockres, ex ? LKM_EXMODE : LKM_PRMODE, 2613 status = ocfs2_cluster_lock(osb, lockres, ex ? LKM_EXMODE : LKM_PRMODE,
2583 0, 0); 2614 0, 0);
2584 if (status < 0) 2615 if (status < 0)
2585 mlog(ML_ERROR, "lock on nfs sync lock failed %d\n", status); 2616 mlog(ML_ERROR, "lock on nfs sync lock failed %d\n", status);
2586 2617
2587 return status; 2618 return status;
2588 } 2619 }
2589 2620
2590 void ocfs2_nfs_sync_unlock(struct ocfs2_super *osb, int ex) 2621 void ocfs2_nfs_sync_unlock(struct ocfs2_super *osb, int ex)
2591 { 2622 {
2592 struct ocfs2_lock_res *lockres = &osb->osb_nfs_sync_lockres; 2623 struct ocfs2_lock_res *lockres = &osb->osb_nfs_sync_lockres;
2593 2624
2594 if (!ocfs2_mount_local(osb)) 2625 if (!ocfs2_mount_local(osb))
2595 ocfs2_cluster_unlock(osb, lockres, 2626 ocfs2_cluster_unlock(osb, lockres,
2596 ex ? LKM_EXMODE : LKM_PRMODE); 2627 ex ? LKM_EXMODE : LKM_PRMODE);
2597 } 2628 }
2598 2629
2599 int ocfs2_dentry_lock(struct dentry *dentry, int ex) 2630 int ocfs2_dentry_lock(struct dentry *dentry, int ex)
2600 { 2631 {
2601 int ret; 2632 int ret;
2602 int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; 2633 int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
2603 struct ocfs2_dentry_lock *dl = dentry->d_fsdata; 2634 struct ocfs2_dentry_lock *dl = dentry->d_fsdata;
2604 struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb); 2635 struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb);
2605 2636
2606 BUG_ON(!dl); 2637 BUG_ON(!dl);
2607 2638
2608 if (ocfs2_is_hard_readonly(osb)) 2639 if (ocfs2_is_hard_readonly(osb))
2609 return -EROFS; 2640 return -EROFS;
2610 2641
2611 if (ocfs2_mount_local(osb)) 2642 if (ocfs2_mount_local(osb))
2612 return 0; 2643 return 0;
2613 2644
2614 ret = ocfs2_cluster_lock(osb, &dl->dl_lockres, level, 0, 0); 2645 ret = ocfs2_cluster_lock(osb, &dl->dl_lockres, level, 0, 0);
2615 if (ret < 0) 2646 if (ret < 0)
2616 mlog_errno(ret); 2647 mlog_errno(ret);
2617 2648
2618 return ret; 2649 return ret;
2619 } 2650 }
2620 2651
2621 void ocfs2_dentry_unlock(struct dentry *dentry, int ex) 2652 void ocfs2_dentry_unlock(struct dentry *dentry, int ex)
2622 { 2653 {
2623 int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; 2654 int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
2624 struct ocfs2_dentry_lock *dl = dentry->d_fsdata; 2655 struct ocfs2_dentry_lock *dl = dentry->d_fsdata;
2625 struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb); 2656 struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb);
2626 2657
2627 if (!ocfs2_mount_local(osb)) 2658 if (!ocfs2_mount_local(osb))
2628 ocfs2_cluster_unlock(osb, &dl->dl_lockres, level); 2659 ocfs2_cluster_unlock(osb, &dl->dl_lockres, level);
2629 } 2660 }
2630 2661
2631 /* Reference counting of the dlm debug structure. We want this because 2662 /* Reference counting of the dlm debug structure. We want this because
2632 * open references on the debug inodes can live on after a mount, so 2663 * open references on the debug inodes can live on after a mount, so
2633 * we can't rely on the ocfs2_super to always exist. */ 2664 * we can't rely on the ocfs2_super to always exist. */
2634 static void ocfs2_dlm_debug_free(struct kref *kref) 2665 static void ocfs2_dlm_debug_free(struct kref *kref)
2635 { 2666 {
2636 struct ocfs2_dlm_debug *dlm_debug; 2667 struct ocfs2_dlm_debug *dlm_debug;
2637 2668
2638 dlm_debug = container_of(kref, struct ocfs2_dlm_debug, d_refcnt); 2669 dlm_debug = container_of(kref, struct ocfs2_dlm_debug, d_refcnt);
2639 2670
2640 kfree(dlm_debug); 2671 kfree(dlm_debug);
2641 } 2672 }
2642 2673
2643 void ocfs2_put_dlm_debug(struct ocfs2_dlm_debug *dlm_debug) 2674 void ocfs2_put_dlm_debug(struct ocfs2_dlm_debug *dlm_debug)
2644 { 2675 {
2645 if (dlm_debug) 2676 if (dlm_debug)
2646 kref_put(&dlm_debug->d_refcnt, ocfs2_dlm_debug_free); 2677 kref_put(&dlm_debug->d_refcnt, ocfs2_dlm_debug_free);
2647 } 2678 }
2648 2679
2649 static void ocfs2_get_dlm_debug(struct ocfs2_dlm_debug *debug) 2680 static void ocfs2_get_dlm_debug(struct ocfs2_dlm_debug *debug)
2650 { 2681 {
2651 kref_get(&debug->d_refcnt); 2682 kref_get(&debug->d_refcnt);
2652 } 2683 }
2653 2684
2654 struct ocfs2_dlm_debug *ocfs2_new_dlm_debug(void) 2685 struct ocfs2_dlm_debug *ocfs2_new_dlm_debug(void)
2655 { 2686 {
2656 struct ocfs2_dlm_debug *dlm_debug; 2687 struct ocfs2_dlm_debug *dlm_debug;
2657 2688
2658 dlm_debug = kmalloc(sizeof(struct ocfs2_dlm_debug), GFP_KERNEL); 2689 dlm_debug = kmalloc(sizeof(struct ocfs2_dlm_debug), GFP_KERNEL);
2659 if (!dlm_debug) { 2690 if (!dlm_debug) {
2660 mlog_errno(-ENOMEM); 2691 mlog_errno(-ENOMEM);
2661 goto out; 2692 goto out;
2662 } 2693 }
2663 2694
2664 kref_init(&dlm_debug->d_refcnt); 2695 kref_init(&dlm_debug->d_refcnt);
2665 INIT_LIST_HEAD(&dlm_debug->d_lockres_tracking); 2696 INIT_LIST_HEAD(&dlm_debug->d_lockres_tracking);
2666 dlm_debug->d_locking_state = NULL; 2697 dlm_debug->d_locking_state = NULL;
2667 out: 2698 out:
2668 return dlm_debug; 2699 return dlm_debug;
2669 } 2700 }
2670 2701
2671 /* Access to this is arbitrated for us via seq_file->sem. */ 2702 /* Access to this is arbitrated for us via seq_file->sem. */
2672 struct ocfs2_dlm_seq_priv { 2703 struct ocfs2_dlm_seq_priv {
2673 struct ocfs2_dlm_debug *p_dlm_debug; 2704 struct ocfs2_dlm_debug *p_dlm_debug;
2674 struct ocfs2_lock_res p_iter_res; 2705 struct ocfs2_lock_res p_iter_res;
2675 struct ocfs2_lock_res p_tmp_res; 2706 struct ocfs2_lock_res p_tmp_res;
2676 }; 2707 };
2677 2708
2678 static struct ocfs2_lock_res *ocfs2_dlm_next_res(struct ocfs2_lock_res *start, 2709 static struct ocfs2_lock_res *ocfs2_dlm_next_res(struct ocfs2_lock_res *start,
2679 struct ocfs2_dlm_seq_priv *priv) 2710 struct ocfs2_dlm_seq_priv *priv)
2680 { 2711 {
2681 struct ocfs2_lock_res *iter, *ret = NULL; 2712 struct ocfs2_lock_res *iter, *ret = NULL;
2682 struct ocfs2_dlm_debug *dlm_debug = priv->p_dlm_debug; 2713 struct ocfs2_dlm_debug *dlm_debug = priv->p_dlm_debug;
2683 2714
2684 assert_spin_locked(&ocfs2_dlm_tracking_lock); 2715 assert_spin_locked(&ocfs2_dlm_tracking_lock);
2685 2716
2686 list_for_each_entry(iter, &start->l_debug_list, l_debug_list) { 2717 list_for_each_entry(iter, &start->l_debug_list, l_debug_list) {
2687 /* discover the head of the list */ 2718 /* discover the head of the list */
2688 if (&iter->l_debug_list == &dlm_debug->d_lockres_tracking) { 2719 if (&iter->l_debug_list == &dlm_debug->d_lockres_tracking) {
2689 mlog(0, "End of list found, %p\n", ret); 2720 mlog(0, "End of list found, %p\n", ret);
2690 break; 2721 break;
2691 } 2722 }
2692 2723
2693 /* We track our "dummy" iteration lockres' by a NULL 2724 /* We track our "dummy" iteration lockres' by a NULL
2694 * l_ops field. */ 2725 * l_ops field. */
2695 if (iter->l_ops != NULL) { 2726 if (iter->l_ops != NULL) {
2696 ret = iter; 2727 ret = iter;
2697 break; 2728 break;
2698 } 2729 }
2699 } 2730 }
2700 2731
2701 return ret; 2732 return ret;
2702 } 2733 }
2703 2734
2704 static void *ocfs2_dlm_seq_start(struct seq_file *m, loff_t *pos) 2735 static void *ocfs2_dlm_seq_start(struct seq_file *m, loff_t *pos)
2705 { 2736 {
2706 struct ocfs2_dlm_seq_priv *priv = m->private; 2737 struct ocfs2_dlm_seq_priv *priv = m->private;
2707 struct ocfs2_lock_res *iter; 2738 struct ocfs2_lock_res *iter;
2708 2739
2709 spin_lock(&ocfs2_dlm_tracking_lock); 2740 spin_lock(&ocfs2_dlm_tracking_lock);
2710 iter = ocfs2_dlm_next_res(&priv->p_iter_res, priv); 2741 iter = ocfs2_dlm_next_res(&priv->p_iter_res, priv);
2711 if (iter) { 2742 if (iter) {
2712 /* Since lockres' have the lifetime of their container 2743 /* Since lockres' have the lifetime of their container
2713 * (which can be inodes, ocfs2_supers, etc) we want to 2744 * (which can be inodes, ocfs2_supers, etc) we want to
2714 * copy this out to a temporary lockres while still 2745 * copy this out to a temporary lockres while still
2715 * under the spinlock. Obviously after this we can't 2746 * under the spinlock. Obviously after this we can't
2716 * trust any pointers on the copy returned, but that's 2747 * trust any pointers on the copy returned, but that's
2717 * ok as the information we want isn't typically held 2748 * ok as the information we want isn't typically held
2718 * in them. */ 2749 * in them. */
2719 priv->p_tmp_res = *iter; 2750 priv->p_tmp_res = *iter;
2720 iter = &priv->p_tmp_res; 2751 iter = &priv->p_tmp_res;
2721 } 2752 }
2722 spin_unlock(&ocfs2_dlm_tracking_lock); 2753 spin_unlock(&ocfs2_dlm_tracking_lock);
2723 2754
2724 return iter; 2755 return iter;
2725 } 2756 }
2726 2757
2727 static void ocfs2_dlm_seq_stop(struct seq_file *m, void *v) 2758 static void ocfs2_dlm_seq_stop(struct seq_file *m, void *v)
2728 { 2759 {
2729 } 2760 }
2730 2761
2731 static void *ocfs2_dlm_seq_next(struct seq_file *m, void *v, loff_t *pos) 2762 static void *ocfs2_dlm_seq_next(struct seq_file *m, void *v, loff_t *pos)
2732 { 2763 {
2733 struct ocfs2_dlm_seq_priv *priv = m->private; 2764 struct ocfs2_dlm_seq_priv *priv = m->private;
2734 struct ocfs2_lock_res *iter = v; 2765 struct ocfs2_lock_res *iter = v;
2735 struct ocfs2_lock_res *dummy = &priv->p_iter_res; 2766 struct ocfs2_lock_res *dummy = &priv->p_iter_res;
2736 2767
2737 spin_lock(&ocfs2_dlm_tracking_lock); 2768 spin_lock(&ocfs2_dlm_tracking_lock);
2738 iter = ocfs2_dlm_next_res(iter, priv); 2769 iter = ocfs2_dlm_next_res(iter, priv);
2739 list_del_init(&dummy->l_debug_list); 2770 list_del_init(&dummy->l_debug_list);
2740 if (iter) { 2771 if (iter) {
2741 list_add(&dummy->l_debug_list, &iter->l_debug_list); 2772 list_add(&dummy->l_debug_list, &iter->l_debug_list);
2742 priv->p_tmp_res = *iter; 2773 priv->p_tmp_res = *iter;
2743 iter = &priv->p_tmp_res; 2774 iter = &priv->p_tmp_res;
2744 } 2775 }
2745 spin_unlock(&ocfs2_dlm_tracking_lock); 2776 spin_unlock(&ocfs2_dlm_tracking_lock);
2746 2777
2747 return iter; 2778 return iter;
2748 } 2779 }
2749 2780
2750 /* So that debugfs.ocfs2 can determine which format is being used */ 2781 /* So that debugfs.ocfs2 can determine which format is being used */
2751 #define OCFS2_DLM_DEBUG_STR_VERSION 2 2782 #define OCFS2_DLM_DEBUG_STR_VERSION 2
2752 static int ocfs2_dlm_seq_show(struct seq_file *m, void *v) 2783 static int ocfs2_dlm_seq_show(struct seq_file *m, void *v)
2753 { 2784 {
2754 int i; 2785 int i;
2755 char *lvb; 2786 char *lvb;
2756 struct ocfs2_lock_res *lockres = v; 2787 struct ocfs2_lock_res *lockres = v;
2757 2788
2758 if (!lockres) 2789 if (!lockres)
2759 return -EINVAL; 2790 return -EINVAL;
2760 2791
2761 seq_printf(m, "0x%x\t", OCFS2_DLM_DEBUG_STR_VERSION); 2792 seq_printf(m, "0x%x\t", OCFS2_DLM_DEBUG_STR_VERSION);
2762 2793
2763 if (lockres->l_type == OCFS2_LOCK_TYPE_DENTRY) 2794 if (lockres->l_type == OCFS2_LOCK_TYPE_DENTRY)
2764 seq_printf(m, "%.*s%08x\t", OCFS2_DENTRY_LOCK_INO_START - 1, 2795 seq_printf(m, "%.*s%08x\t", OCFS2_DENTRY_LOCK_INO_START - 1,
2765 lockres->l_name, 2796 lockres->l_name,
2766 (unsigned int)ocfs2_get_dentry_lock_ino(lockres)); 2797 (unsigned int)ocfs2_get_dentry_lock_ino(lockres));
2767 else 2798 else
2768 seq_printf(m, "%.*s\t", OCFS2_LOCK_ID_MAX_LEN, lockres->l_name); 2799 seq_printf(m, "%.*s\t", OCFS2_LOCK_ID_MAX_LEN, lockres->l_name);
2769 2800
2770 seq_printf(m, "%d\t" 2801 seq_printf(m, "%d\t"
2771 "0x%lx\t" 2802 "0x%lx\t"
2772 "0x%x\t" 2803 "0x%x\t"
2773 "0x%x\t" 2804 "0x%x\t"
2774 "%u\t" 2805 "%u\t"
2775 "%u\t" 2806 "%u\t"
2776 "%d\t" 2807 "%d\t"
2777 "%d\t", 2808 "%d\t",
2778 lockres->l_level, 2809 lockres->l_level,
2779 lockres->l_flags, 2810 lockres->l_flags,
2780 lockres->l_action, 2811 lockres->l_action,
2781 lockres->l_unlock_action, 2812 lockres->l_unlock_action,
2782 lockres->l_ro_holders, 2813 lockres->l_ro_holders,
2783 lockres->l_ex_holders, 2814 lockres->l_ex_holders,
2784 lockres->l_requested, 2815 lockres->l_requested,
2785 lockres->l_blocking); 2816 lockres->l_blocking);
2786 2817
2787 /* Dump the raw LVB */ 2818 /* Dump the raw LVB */
2788 lvb = ocfs2_dlm_lvb(&lockres->l_lksb); 2819 lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
2789 for(i = 0; i < DLM_LVB_LEN; i++) 2820 for(i = 0; i < DLM_LVB_LEN; i++)
2790 seq_printf(m, "0x%x\t", lvb[i]); 2821 seq_printf(m, "0x%x\t", lvb[i]);
2791 2822
2792 #ifdef CONFIG_OCFS2_FS_STATS 2823 #ifdef CONFIG_OCFS2_FS_STATS
2793 # define lock_num_prmode(_l) (_l)->l_lock_num_prmode 2824 # define lock_num_prmode(_l) (_l)->l_lock_num_prmode
2794 # define lock_num_exmode(_l) (_l)->l_lock_num_exmode 2825 # define lock_num_exmode(_l) (_l)->l_lock_num_exmode
2795 # define lock_num_prmode_failed(_l) (_l)->l_lock_num_prmode_failed 2826 # define lock_num_prmode_failed(_l) (_l)->l_lock_num_prmode_failed
2796 # define lock_num_exmode_failed(_l) (_l)->l_lock_num_exmode_failed 2827 # define lock_num_exmode_failed(_l) (_l)->l_lock_num_exmode_failed
2797 # define lock_total_prmode(_l) (_l)->l_lock_total_prmode 2828 # define lock_total_prmode(_l) (_l)->l_lock_total_prmode
2798 # define lock_total_exmode(_l) (_l)->l_lock_total_exmode 2829 # define lock_total_exmode(_l) (_l)->l_lock_total_exmode
2799 # define lock_max_prmode(_l) (_l)->l_lock_max_prmode 2830 # define lock_max_prmode(_l) (_l)->l_lock_max_prmode
2800 # define lock_max_exmode(_l) (_l)->l_lock_max_exmode 2831 # define lock_max_exmode(_l) (_l)->l_lock_max_exmode
2801 # define lock_refresh(_l) (_l)->l_lock_refresh 2832 # define lock_refresh(_l) (_l)->l_lock_refresh
2802 #else 2833 #else
2803 # define lock_num_prmode(_l) (0ULL) 2834 # define lock_num_prmode(_l) (0ULL)
2804 # define lock_num_exmode(_l) (0ULL) 2835 # define lock_num_exmode(_l) (0ULL)
2805 # define lock_num_prmode_failed(_l) (0) 2836 # define lock_num_prmode_failed(_l) (0)
2806 # define lock_num_exmode_failed(_l) (0) 2837 # define lock_num_exmode_failed(_l) (0)
2807 # define lock_total_prmode(_l) (0ULL) 2838 # define lock_total_prmode(_l) (0ULL)
2808 # define lock_total_exmode(_l) (0ULL) 2839 # define lock_total_exmode(_l) (0ULL)
2809 # define lock_max_prmode(_l) (0) 2840 # define lock_max_prmode(_l) (0)
2810 # define lock_max_exmode(_l) (0) 2841 # define lock_max_exmode(_l) (0)
2811 # define lock_refresh(_l) (0) 2842 # define lock_refresh(_l) (0)
2812 #endif 2843 #endif
2813 /* The following seq_print was added in version 2 of this output */ 2844 /* The following seq_print was added in version 2 of this output */
2814 seq_printf(m, "%llu\t" 2845 seq_printf(m, "%llu\t"
2815 "%llu\t" 2846 "%llu\t"
2816 "%u\t" 2847 "%u\t"
2817 "%u\t" 2848 "%u\t"
2818 "%llu\t" 2849 "%llu\t"
2819 "%llu\t" 2850 "%llu\t"
2820 "%u\t" 2851 "%u\t"
2821 "%u\t" 2852 "%u\t"
2822 "%u\t", 2853 "%u\t",
2823 lock_num_prmode(lockres), 2854 lock_num_prmode(lockres),
2824 lock_num_exmode(lockres), 2855 lock_num_exmode(lockres),
2825 lock_num_prmode_failed(lockres), 2856 lock_num_prmode_failed(lockres),
2826 lock_num_exmode_failed(lockres), 2857 lock_num_exmode_failed(lockres),
2827 lock_total_prmode(lockres), 2858 lock_total_prmode(lockres),
2828 lock_total_exmode(lockres), 2859 lock_total_exmode(lockres),
2829 lock_max_prmode(lockres), 2860 lock_max_prmode(lockres),
2830 lock_max_exmode(lockres), 2861 lock_max_exmode(lockres),
2831 lock_refresh(lockres)); 2862 lock_refresh(lockres));
2832 2863
2833 /* End the line */ 2864 /* End the line */
2834 seq_printf(m, "\n"); 2865 seq_printf(m, "\n");
2835 return 0; 2866 return 0;
2836 } 2867 }
2837 2868
2838 static const struct seq_operations ocfs2_dlm_seq_ops = { 2869 static const struct seq_operations ocfs2_dlm_seq_ops = {
2839 .start = ocfs2_dlm_seq_start, 2870 .start = ocfs2_dlm_seq_start,
2840 .stop = ocfs2_dlm_seq_stop, 2871 .stop = ocfs2_dlm_seq_stop,
2841 .next = ocfs2_dlm_seq_next, 2872 .next = ocfs2_dlm_seq_next,
2842 .show = ocfs2_dlm_seq_show, 2873 .show = ocfs2_dlm_seq_show,
2843 }; 2874 };
2844 2875
2845 static int ocfs2_dlm_debug_release(struct inode *inode, struct file *file) 2876 static int ocfs2_dlm_debug_release(struct inode *inode, struct file *file)
2846 { 2877 {
2847 struct seq_file *seq = (struct seq_file *) file->private_data; 2878 struct seq_file *seq = (struct seq_file *) file->private_data;
2848 struct ocfs2_dlm_seq_priv *priv = seq->private; 2879 struct ocfs2_dlm_seq_priv *priv = seq->private;
2849 struct ocfs2_lock_res *res = &priv->p_iter_res; 2880 struct ocfs2_lock_res *res = &priv->p_iter_res;
2850 2881
2851 ocfs2_remove_lockres_tracking(res); 2882 ocfs2_remove_lockres_tracking(res);
2852 ocfs2_put_dlm_debug(priv->p_dlm_debug); 2883 ocfs2_put_dlm_debug(priv->p_dlm_debug);
2853 return seq_release_private(inode, file); 2884 return seq_release_private(inode, file);
2854 } 2885 }
2855 2886
2856 static int ocfs2_dlm_debug_open(struct inode *inode, struct file *file) 2887 static int ocfs2_dlm_debug_open(struct inode *inode, struct file *file)
2857 { 2888 {
2858 int ret; 2889 int ret;
2859 struct ocfs2_dlm_seq_priv *priv; 2890 struct ocfs2_dlm_seq_priv *priv;
2860 struct seq_file *seq; 2891 struct seq_file *seq;
2861 struct ocfs2_super *osb; 2892 struct ocfs2_super *osb;
2862 2893
2863 priv = kzalloc(sizeof(struct ocfs2_dlm_seq_priv), GFP_KERNEL); 2894 priv = kzalloc(sizeof(struct ocfs2_dlm_seq_priv), GFP_KERNEL);
2864 if (!priv) { 2895 if (!priv) {
2865 ret = -ENOMEM; 2896 ret = -ENOMEM;
2866 mlog_errno(ret); 2897 mlog_errno(ret);
2867 goto out; 2898 goto out;
2868 } 2899 }
2869 osb = inode->i_private; 2900 osb = inode->i_private;
2870 ocfs2_get_dlm_debug(osb->osb_dlm_debug); 2901 ocfs2_get_dlm_debug(osb->osb_dlm_debug);
2871 priv->p_dlm_debug = osb->osb_dlm_debug; 2902 priv->p_dlm_debug = osb->osb_dlm_debug;
2872 INIT_LIST_HEAD(&priv->p_iter_res.l_debug_list); 2903 INIT_LIST_HEAD(&priv->p_iter_res.l_debug_list);
2873 2904
2874 ret = seq_open(file, &ocfs2_dlm_seq_ops); 2905 ret = seq_open(file, &ocfs2_dlm_seq_ops);
2875 if (ret) { 2906 if (ret) {
2876 kfree(priv); 2907 kfree(priv);
2877 mlog_errno(ret); 2908 mlog_errno(ret);
2878 goto out; 2909 goto out;
2879 } 2910 }
2880 2911
2881 seq = (struct seq_file *) file->private_data; 2912 seq = (struct seq_file *) file->private_data;
2882 seq->private = priv; 2913 seq->private = priv;
2883 2914
2884 ocfs2_add_lockres_tracking(&priv->p_iter_res, 2915 ocfs2_add_lockres_tracking(&priv->p_iter_res,
2885 priv->p_dlm_debug); 2916 priv->p_dlm_debug);
2886 2917
2887 out: 2918 out:
2888 return ret; 2919 return ret;
2889 } 2920 }
2890 2921
2891 static const struct file_operations ocfs2_dlm_debug_fops = { 2922 static const struct file_operations ocfs2_dlm_debug_fops = {
2892 .open = ocfs2_dlm_debug_open, 2923 .open = ocfs2_dlm_debug_open,
2893 .release = ocfs2_dlm_debug_release, 2924 .release = ocfs2_dlm_debug_release,
2894 .read = seq_read, 2925 .read = seq_read,
2895 .llseek = seq_lseek, 2926 .llseek = seq_lseek,
2896 }; 2927 };
2897 2928
2898 static int ocfs2_dlm_init_debug(struct ocfs2_super *osb) 2929 static int ocfs2_dlm_init_debug(struct ocfs2_super *osb)
2899 { 2930 {
2900 int ret = 0; 2931 int ret = 0;
2901 struct ocfs2_dlm_debug *dlm_debug = osb->osb_dlm_debug; 2932 struct ocfs2_dlm_debug *dlm_debug = osb->osb_dlm_debug;
2902 2933
2903 dlm_debug->d_locking_state = debugfs_create_file("locking_state", 2934 dlm_debug->d_locking_state = debugfs_create_file("locking_state",
2904 S_IFREG|S_IRUSR, 2935 S_IFREG|S_IRUSR,
2905 osb->osb_debug_root, 2936 osb->osb_debug_root,
2906 osb, 2937 osb,
2907 &ocfs2_dlm_debug_fops); 2938 &ocfs2_dlm_debug_fops);
2908 if (!dlm_debug->d_locking_state) { 2939 if (!dlm_debug->d_locking_state) {
2909 ret = -EINVAL; 2940 ret = -EINVAL;
2910 mlog(ML_ERROR, 2941 mlog(ML_ERROR,
2911 "Unable to create locking state debugfs file.\n"); 2942 "Unable to create locking state debugfs file.\n");
2912 goto out; 2943 goto out;
2913 } 2944 }
2914 2945
2915 ocfs2_get_dlm_debug(dlm_debug); 2946 ocfs2_get_dlm_debug(dlm_debug);
2916 out: 2947 out:
2917 return ret; 2948 return ret;
2918 } 2949 }
2919 2950
2920 static void ocfs2_dlm_shutdown_debug(struct ocfs2_super *osb) 2951 static void ocfs2_dlm_shutdown_debug(struct ocfs2_super *osb)
2921 { 2952 {
2922 struct ocfs2_dlm_debug *dlm_debug = osb->osb_dlm_debug; 2953 struct ocfs2_dlm_debug *dlm_debug = osb->osb_dlm_debug;
2923 2954
2924 if (dlm_debug) { 2955 if (dlm_debug) {
2925 debugfs_remove(dlm_debug->d_locking_state); 2956 debugfs_remove(dlm_debug->d_locking_state);
2926 ocfs2_put_dlm_debug(dlm_debug); 2957 ocfs2_put_dlm_debug(dlm_debug);
2927 } 2958 }
2928 } 2959 }
2929 2960
2930 int ocfs2_dlm_init(struct ocfs2_super *osb) 2961 int ocfs2_dlm_init(struct ocfs2_super *osb)
2931 { 2962 {
2932 int status = 0; 2963 int status = 0;
2933 struct ocfs2_cluster_connection *conn = NULL; 2964 struct ocfs2_cluster_connection *conn = NULL;
2934 2965
2935 mlog_entry_void(); 2966 mlog_entry_void();
2936 2967
2937 if (ocfs2_mount_local(osb)) { 2968 if (ocfs2_mount_local(osb)) {
2938 osb->node_num = 0; 2969 osb->node_num = 0;
2939 goto local; 2970 goto local;
2940 } 2971 }
2941 2972
2942 status = ocfs2_dlm_init_debug(osb); 2973 status = ocfs2_dlm_init_debug(osb);
2943 if (status < 0) { 2974 if (status < 0) {
2944 mlog_errno(status); 2975 mlog_errno(status);
2945 goto bail; 2976 goto bail;
2946 } 2977 }
2947 2978
2948 /* launch downconvert thread */ 2979 /* launch downconvert thread */
2949 osb->dc_task = kthread_run(ocfs2_downconvert_thread, osb, "ocfs2dc"); 2980 osb->dc_task = kthread_run(ocfs2_downconvert_thread, osb, "ocfs2dc");
2950 if (IS_ERR(osb->dc_task)) { 2981 if (IS_ERR(osb->dc_task)) {
2951 status = PTR_ERR(osb->dc_task); 2982 status = PTR_ERR(osb->dc_task);
2952 osb->dc_task = NULL; 2983 osb->dc_task = NULL;
2953 mlog_errno(status); 2984 mlog_errno(status);
2954 goto bail; 2985 goto bail;
2955 } 2986 }
2956 2987
2957 /* for now, uuid == domain */ 2988 /* for now, uuid == domain */
2958 status = ocfs2_cluster_connect(osb->osb_cluster_stack, 2989 status = ocfs2_cluster_connect(osb->osb_cluster_stack,
2959 osb->uuid_str, 2990 osb->uuid_str,
2960 strlen(osb->uuid_str), 2991 strlen(osb->uuid_str),
2961 ocfs2_do_node_down, osb, 2992 ocfs2_do_node_down, osb,
2962 &conn); 2993 &conn);
2963 if (status) { 2994 if (status) {
2964 mlog_errno(status); 2995 mlog_errno(status);
2965 goto bail; 2996 goto bail;
2966 } 2997 }
2967 2998
2968 status = ocfs2_cluster_this_node(&osb->node_num); 2999 status = ocfs2_cluster_this_node(&osb->node_num);
2969 if (status < 0) { 3000 if (status < 0) {
2970 mlog_errno(status); 3001 mlog_errno(status);
2971 mlog(ML_ERROR, 3002 mlog(ML_ERROR,
2972 "could not find this host's node number\n"); 3003 "could not find this host's node number\n");
2973 ocfs2_cluster_disconnect(conn, 0); 3004 ocfs2_cluster_disconnect(conn, 0);
2974 goto bail; 3005 goto bail;
2975 } 3006 }
2976 3007
2977 local: 3008 local:
2978 ocfs2_super_lock_res_init(&osb->osb_super_lockres, osb); 3009 ocfs2_super_lock_res_init(&osb->osb_super_lockres, osb);
2979 ocfs2_rename_lock_res_init(&osb->osb_rename_lockres, osb); 3010 ocfs2_rename_lock_res_init(&osb->osb_rename_lockres, osb);
2980 ocfs2_nfs_sync_lock_res_init(&osb->osb_nfs_sync_lockres, osb); 3011 ocfs2_nfs_sync_lock_res_init(&osb->osb_nfs_sync_lockres, osb);
2981 ocfs2_orphan_scan_lock_res_init(&osb->osb_orphan_scan.os_lockres, osb); 3012 ocfs2_orphan_scan_lock_res_init(&osb->osb_orphan_scan.os_lockres, osb);
2982 3013
2983 osb->cconn = conn; 3014 osb->cconn = conn;
2984 3015
2985 status = 0; 3016 status = 0;
2986 bail: 3017 bail:
2987 if (status < 0) { 3018 if (status < 0) {
2988 ocfs2_dlm_shutdown_debug(osb); 3019 ocfs2_dlm_shutdown_debug(osb);
2989 if (osb->dc_task) 3020 if (osb->dc_task)
2990 kthread_stop(osb->dc_task); 3021 kthread_stop(osb->dc_task);
2991 } 3022 }
2992 3023
2993 mlog_exit(status); 3024 mlog_exit(status);
2994 return status; 3025 return status;
2995 } 3026 }
2996 3027
2997 void ocfs2_dlm_shutdown(struct ocfs2_super *osb, 3028 void ocfs2_dlm_shutdown(struct ocfs2_super *osb,
2998 int hangup_pending) 3029 int hangup_pending)
2999 { 3030 {
3000 mlog_entry_void(); 3031 mlog_entry_void();
3001 3032
3002 ocfs2_drop_osb_locks(osb); 3033 ocfs2_drop_osb_locks(osb);
3003 3034
3004 /* 3035 /*
3005 * Now that we have dropped all locks and ocfs2_dismount_volume() 3036 * Now that we have dropped all locks and ocfs2_dismount_volume()
3006 * has disabled recovery, the DLM won't be talking to us. It's 3037 * has disabled recovery, the DLM won't be talking to us. It's
3007 * safe to tear things down before disconnecting the cluster. 3038 * safe to tear things down before disconnecting the cluster.
3008 */ 3039 */
3009 3040
3010 if (osb->dc_task) { 3041 if (osb->dc_task) {
3011 kthread_stop(osb->dc_task); 3042 kthread_stop(osb->dc_task);
3012 osb->dc_task = NULL; 3043 osb->dc_task = NULL;
3013 } 3044 }
3014 3045
3015 ocfs2_lock_res_free(&osb->osb_super_lockres); 3046 ocfs2_lock_res_free(&osb->osb_super_lockres);
3016 ocfs2_lock_res_free(&osb->osb_rename_lockres); 3047 ocfs2_lock_res_free(&osb->osb_rename_lockres);
3017 ocfs2_lock_res_free(&osb->osb_nfs_sync_lockres); 3048 ocfs2_lock_res_free(&osb->osb_nfs_sync_lockres);
3018 ocfs2_lock_res_free(&osb->osb_orphan_scan.os_lockres); 3049 ocfs2_lock_res_free(&osb->osb_orphan_scan.os_lockres);
3019 3050
3020 ocfs2_cluster_disconnect(osb->cconn, hangup_pending); 3051 ocfs2_cluster_disconnect(osb->cconn, hangup_pending);
3021 osb->cconn = NULL; 3052 osb->cconn = NULL;
3022 3053
3023 ocfs2_dlm_shutdown_debug(osb); 3054 ocfs2_dlm_shutdown_debug(osb);
3024 3055
3025 mlog_exit_void(); 3056 mlog_exit_void();
3026 } 3057 }
3027 3058
3028 static void ocfs2_unlock_ast(void *opaque, int error) 3059 static void ocfs2_unlock_ast(void *opaque, int error)
3029 { 3060 {
3030 struct ocfs2_lock_res *lockres = opaque; 3061 struct ocfs2_lock_res *lockres = opaque;
3031 unsigned long flags; 3062 unsigned long flags;
3032 3063
3033 mlog_entry_void(); 3064 mlog_entry_void();
3034 3065
3035 mlog(0, "UNLOCK AST called on lock %s, action = %d\n", lockres->l_name, 3066 mlog(0, "UNLOCK AST called on lock %s, action = %d\n", lockres->l_name,
3036 lockres->l_unlock_action); 3067 lockres->l_unlock_action);
3037 3068
3038 spin_lock_irqsave(&lockres->l_lock, flags); 3069 spin_lock_irqsave(&lockres->l_lock, flags);
3039 if (error) { 3070 if (error) {
3040 mlog(ML_ERROR, "Dlm passes error %d for lock %s, " 3071 mlog(ML_ERROR, "Dlm passes error %d for lock %s, "
3041 "unlock_action %d\n", error, lockres->l_name, 3072 "unlock_action %d\n", error, lockres->l_name,
3042 lockres->l_unlock_action); 3073 lockres->l_unlock_action);
3043 spin_unlock_irqrestore(&lockres->l_lock, flags); 3074 spin_unlock_irqrestore(&lockres->l_lock, flags);
3044 mlog_exit_void(); 3075 mlog_exit_void();
3045 return; 3076 return;
3046 } 3077 }
3047 3078
3048 switch(lockres->l_unlock_action) { 3079 switch(lockres->l_unlock_action) {
3049 case OCFS2_UNLOCK_CANCEL_CONVERT: 3080 case OCFS2_UNLOCK_CANCEL_CONVERT:
3050 mlog(0, "Cancel convert success for %s\n", lockres->l_name); 3081 mlog(0, "Cancel convert success for %s\n", lockres->l_name);
3051 lockres->l_action = OCFS2_AST_INVALID; 3082 lockres->l_action = OCFS2_AST_INVALID;
3052 /* Downconvert thread may have requeued this lock, we 3083 /* Downconvert thread may have requeued this lock, we
3053 * need to wake it. */ 3084 * need to wake it. */
3054 if (lockres->l_flags & OCFS2_LOCK_BLOCKED) 3085 if (lockres->l_flags & OCFS2_LOCK_BLOCKED)
3055 ocfs2_wake_downconvert_thread(ocfs2_get_lockres_osb(lockres)); 3086 ocfs2_wake_downconvert_thread(ocfs2_get_lockres_osb(lockres));
3056 break; 3087 break;
3057 case OCFS2_UNLOCK_DROP_LOCK: 3088 case OCFS2_UNLOCK_DROP_LOCK:
3058 lockres->l_level = DLM_LOCK_IV; 3089 lockres->l_level = DLM_LOCK_IV;
3059 break; 3090 break;
3060 default: 3091 default:
3061 BUG(); 3092 BUG();
3062 } 3093 }
3063 3094
3064 lockres_clear_flags(lockres, OCFS2_LOCK_BUSY); 3095 lockres_clear_flags(lockres, OCFS2_LOCK_BUSY);
3065 lockres->l_unlock_action = OCFS2_UNLOCK_INVALID; 3096 lockres->l_unlock_action = OCFS2_UNLOCK_INVALID;
3066 wake_up(&lockres->l_event); 3097 wake_up(&lockres->l_event);
3067 spin_unlock_irqrestore(&lockres->l_lock, flags); 3098 spin_unlock_irqrestore(&lockres->l_lock, flags);
3068 3099
3069 mlog_exit_void(); 3100 mlog_exit_void();
3070 } 3101 }
3071 3102
3072 static int ocfs2_drop_lock(struct ocfs2_super *osb, 3103 static int ocfs2_drop_lock(struct ocfs2_super *osb,
3073 struct ocfs2_lock_res *lockres) 3104 struct ocfs2_lock_res *lockres)
3074 { 3105 {
3075 int ret; 3106 int ret;
3076 unsigned long flags; 3107 unsigned long flags;
3077 u32 lkm_flags = 0; 3108 u32 lkm_flags = 0;
3078 3109
3079 /* We didn't get anywhere near actually using this lockres. */ 3110 /* We didn't get anywhere near actually using this lockres. */
3080 if (!(lockres->l_flags & OCFS2_LOCK_INITIALIZED)) 3111 if (!(lockres->l_flags & OCFS2_LOCK_INITIALIZED))
3081 goto out; 3112 goto out;
3082 3113
3083 if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB) 3114 if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB)
3084 lkm_flags |= DLM_LKF_VALBLK; 3115 lkm_flags |= DLM_LKF_VALBLK;
3085 3116
3086 spin_lock_irqsave(&lockres->l_lock, flags); 3117 spin_lock_irqsave(&lockres->l_lock, flags);
3087 3118
3088 mlog_bug_on_msg(!(lockres->l_flags & OCFS2_LOCK_FREEING), 3119 mlog_bug_on_msg(!(lockres->l_flags & OCFS2_LOCK_FREEING),
3089 "lockres %s, flags 0x%lx\n", 3120 "lockres %s, flags 0x%lx\n",
3090 lockres->l_name, lockres->l_flags); 3121 lockres->l_name, lockres->l_flags);
3091 3122
3092 while (lockres->l_flags & OCFS2_LOCK_BUSY) { 3123 while (lockres->l_flags & OCFS2_LOCK_BUSY) {
3093 mlog(0, "waiting on busy lock \"%s\": flags = %lx, action = " 3124 mlog(0, "waiting on busy lock \"%s\": flags = %lx, action = "
3094 "%u, unlock_action = %u\n", 3125 "%u, unlock_action = %u\n",
3095 lockres->l_name, lockres->l_flags, lockres->l_action, 3126 lockres->l_name, lockres->l_flags, lockres->l_action,
3096 lockres->l_unlock_action); 3127 lockres->l_unlock_action);
3097 3128
3098 spin_unlock_irqrestore(&lockres->l_lock, flags); 3129 spin_unlock_irqrestore(&lockres->l_lock, flags);
3099 3130
3100 /* XXX: Today we just wait on any busy 3131 /* XXX: Today we just wait on any busy
3101 * locks... Perhaps we need to cancel converts in the 3132 * locks... Perhaps we need to cancel converts in the
3102 * future? */ 3133 * future? */
3103 ocfs2_wait_on_busy_lock(lockres); 3134 ocfs2_wait_on_busy_lock(lockres);
3104 3135
3105 spin_lock_irqsave(&lockres->l_lock, flags); 3136 spin_lock_irqsave(&lockres->l_lock, flags);
3106 } 3137 }
3107 3138
3108 if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB) { 3139 if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB) {
3109 if (lockres->l_flags & OCFS2_LOCK_ATTACHED && 3140 if (lockres->l_flags & OCFS2_LOCK_ATTACHED &&
3110 lockres->l_level == DLM_LOCK_EX && 3141 lockres->l_level == DLM_LOCK_EX &&
3111 !(lockres->l_flags & OCFS2_LOCK_NEEDS_REFRESH)) 3142 !(lockres->l_flags & OCFS2_LOCK_NEEDS_REFRESH))
3112 lockres->l_ops->set_lvb(lockres); 3143 lockres->l_ops->set_lvb(lockres);
3113 } 3144 }
3114 3145
3115 if (lockres->l_flags & OCFS2_LOCK_BUSY) 3146 if (lockres->l_flags & OCFS2_LOCK_BUSY)
3116 mlog(ML_ERROR, "destroying busy lock: \"%s\"\n", 3147 mlog(ML_ERROR, "destroying busy lock: \"%s\"\n",
3117 lockres->l_name); 3148 lockres->l_name);
3118 if (lockres->l_flags & OCFS2_LOCK_BLOCKED) 3149 if (lockres->l_flags & OCFS2_LOCK_BLOCKED)
3119 mlog(0, "destroying blocked lock: \"%s\"\n", lockres->l_name); 3150 mlog(0, "destroying blocked lock: \"%s\"\n", lockres->l_name);
3120 3151
3121 if (!(lockres->l_flags & OCFS2_LOCK_ATTACHED)) { 3152 if (!(lockres->l_flags & OCFS2_LOCK_ATTACHED)) {
3122 spin_unlock_irqrestore(&lockres->l_lock, flags); 3153 spin_unlock_irqrestore(&lockres->l_lock, flags);
3123 goto out; 3154 goto out;
3124 } 3155 }
3125 3156
3126 lockres_clear_flags(lockres, OCFS2_LOCK_ATTACHED); 3157 lockres_clear_flags(lockres, OCFS2_LOCK_ATTACHED);
3127 3158
3128 /* make sure we never get here while waiting for an ast to 3159 /* make sure we never get here while waiting for an ast to
3129 * fire. */ 3160 * fire. */
3130 BUG_ON(lockres->l_action != OCFS2_AST_INVALID); 3161 BUG_ON(lockres->l_action != OCFS2_AST_INVALID);
3131 3162
3132 /* is this necessary? */ 3163 /* is this necessary? */
3133 lockres_or_flags(lockres, OCFS2_LOCK_BUSY); 3164 lockres_or_flags(lockres, OCFS2_LOCK_BUSY);
3134 lockres->l_unlock_action = OCFS2_UNLOCK_DROP_LOCK; 3165 lockres->l_unlock_action = OCFS2_UNLOCK_DROP_LOCK;
3135 spin_unlock_irqrestore(&lockres->l_lock, flags); 3166 spin_unlock_irqrestore(&lockres->l_lock, flags);
3136 3167
3137 mlog(0, "lock %s\n", lockres->l_name); 3168 mlog(0, "lock %s\n", lockres->l_name);
3138 3169
3139 ret = ocfs2_dlm_unlock(osb->cconn, &lockres->l_lksb, lkm_flags, 3170 ret = ocfs2_dlm_unlock(osb->cconn, &lockres->l_lksb, lkm_flags,
3140 lockres); 3171 lockres);
3141 if (ret) { 3172 if (ret) {
3142 ocfs2_log_dlm_error("ocfs2_dlm_unlock", ret, lockres); 3173 ocfs2_log_dlm_error("ocfs2_dlm_unlock", ret, lockres);
3143 mlog(ML_ERROR, "lockres flags: %lu\n", lockres->l_flags); 3174 mlog(ML_ERROR, "lockres flags: %lu\n", lockres->l_flags);
3144 ocfs2_dlm_dump_lksb(&lockres->l_lksb); 3175 ocfs2_dlm_dump_lksb(&lockres->l_lksb);
3145 BUG(); 3176 BUG();
3146 } 3177 }
3147 mlog(0, "lock %s, successful return from ocfs2_dlm_unlock\n", 3178 mlog(0, "lock %s, successful return from ocfs2_dlm_unlock\n",
3148 lockres->l_name); 3179 lockres->l_name);
3149 3180
3150 ocfs2_wait_on_busy_lock(lockres); 3181 ocfs2_wait_on_busy_lock(lockres);
3151 out: 3182 out:
3152 mlog_exit(0); 3183 mlog_exit(0);
3153 return 0; 3184 return 0;
3154 } 3185 }
3155 3186
3156 /* Mark the lockres as being dropped. It will no longer be 3187 /* Mark the lockres as being dropped. It will no longer be
3157 * queued if blocking, but we still may have to wait on it 3188 * queued if blocking, but we still may have to wait on it
3158 * being dequeued from the downconvert thread before we can consider 3189 * being dequeued from the downconvert thread before we can consider
3159 * it safe to drop. 3190 * it safe to drop.
3160 * 3191 *
3161 * You can *not* attempt to call cluster_lock on this lockres anymore. */ 3192 * You can *not* attempt to call cluster_lock on this lockres anymore. */
3162 void ocfs2_mark_lockres_freeing(struct ocfs2_lock_res *lockres) 3193 void ocfs2_mark_lockres_freeing(struct ocfs2_lock_res *lockres)
3163 { 3194 {
3164 int status; 3195 int status;
3165 struct ocfs2_mask_waiter mw; 3196 struct ocfs2_mask_waiter mw;
3166 unsigned long flags; 3197 unsigned long flags;
3167 3198
3168 ocfs2_init_mask_waiter(&mw); 3199 ocfs2_init_mask_waiter(&mw);
3169 3200
3170 spin_lock_irqsave(&lockres->l_lock, flags); 3201 spin_lock_irqsave(&lockres->l_lock, flags);
3171 lockres->l_flags |= OCFS2_LOCK_FREEING; 3202 lockres->l_flags |= OCFS2_LOCK_FREEING;
3172 while (lockres->l_flags & OCFS2_LOCK_QUEUED) { 3203 while (lockres->l_flags & OCFS2_LOCK_QUEUED) {
3173 lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_QUEUED, 0); 3204 lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_QUEUED, 0);
3174 spin_unlock_irqrestore(&lockres->l_lock, flags); 3205 spin_unlock_irqrestore(&lockres->l_lock, flags);
3175 3206
3176 mlog(0, "Waiting on lockres %s\n", lockres->l_name); 3207 mlog(0, "Waiting on lockres %s\n", lockres->l_name);
3177 3208
3178 status = ocfs2_wait_for_mask(&mw); 3209 status = ocfs2_wait_for_mask(&mw);
3179 if (status) 3210 if (status)
3180 mlog_errno(status); 3211 mlog_errno(status);
3181 3212
3182 spin_lock_irqsave(&lockres->l_lock, flags); 3213 spin_lock_irqsave(&lockres->l_lock, flags);
3183 } 3214 }
3184 spin_unlock_irqrestore(&lockres->l_lock, flags); 3215 spin_unlock_irqrestore(&lockres->l_lock, flags);
3185 } 3216 }
3186 3217
3187 void ocfs2_simple_drop_lockres(struct ocfs2_super *osb, 3218 void ocfs2_simple_drop_lockres(struct ocfs2_super *osb,
3188 struct ocfs2_lock_res *lockres) 3219 struct ocfs2_lock_res *lockres)
3189 { 3220 {
3190 int ret; 3221 int ret;
3191 3222
3192 ocfs2_mark_lockres_freeing(lockres); 3223 ocfs2_mark_lockres_freeing(lockres);
3193 ret = ocfs2_drop_lock(osb, lockres); 3224 ret = ocfs2_drop_lock(osb, lockres);
3194 if (ret) 3225 if (ret)
3195 mlog_errno(ret); 3226 mlog_errno(ret);
3196 } 3227 }
3197 3228
3198 static void ocfs2_drop_osb_locks(struct ocfs2_super *osb) 3229 static void ocfs2_drop_osb_locks(struct ocfs2_super *osb)
3199 { 3230 {
3200 ocfs2_simple_drop_lockres(osb, &osb->osb_super_lockres); 3231 ocfs2_simple_drop_lockres(osb, &osb->osb_super_lockres);
3201 ocfs2_simple_drop_lockres(osb, &osb->osb_rename_lockres); 3232 ocfs2_simple_drop_lockres(osb, &osb->osb_rename_lockres);
3202 ocfs2_simple_drop_lockres(osb, &osb->osb_nfs_sync_lockres); 3233 ocfs2_simple_drop_lockres(osb, &osb->osb_nfs_sync_lockres);
3203 ocfs2_simple_drop_lockres(osb, &osb->osb_orphan_scan.os_lockres); 3234 ocfs2_simple_drop_lockres(osb, &osb->osb_orphan_scan.os_lockres);
3204 } 3235 }
3205 3236
3206 int ocfs2_drop_inode_locks(struct inode *inode) 3237 int ocfs2_drop_inode_locks(struct inode *inode)
3207 { 3238 {
3208 int status, err; 3239 int status, err;
3209 3240
3210 mlog_entry_void(); 3241 mlog_entry_void();
3211 3242
3212 /* No need to call ocfs2_mark_lockres_freeing here - 3243 /* No need to call ocfs2_mark_lockres_freeing here -
3213 * ocfs2_clear_inode has done it for us. */ 3244 * ocfs2_clear_inode has done it for us. */
3214 3245
3215 err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb), 3246 err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb),
3216 &OCFS2_I(inode)->ip_open_lockres); 3247 &OCFS2_I(inode)->ip_open_lockres);
3217 if (err < 0) 3248 if (err < 0)
3218 mlog_errno(err); 3249 mlog_errno(err);
3219 3250
3220 status = err; 3251 status = err;
3221 3252
3222 err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb), 3253 err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb),
3223 &OCFS2_I(inode)->ip_inode_lockres); 3254 &OCFS2_I(inode)->ip_inode_lockres);
3224 if (err < 0) 3255 if (err < 0)
3225 mlog_errno(err); 3256 mlog_errno(err);
3226 if (err < 0 && !status) 3257 if (err < 0 && !status)
3227 status = err; 3258 status = err;
3228 3259
3229 err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb), 3260 err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb),
3230 &OCFS2_I(inode)->ip_rw_lockres); 3261 &OCFS2_I(inode)->ip_rw_lockres);
3231 if (err < 0) 3262 if (err < 0)
3232 mlog_errno(err); 3263 mlog_errno(err);
3233 if (err < 0 && !status) 3264 if (err < 0 && !status)
3234 status = err; 3265 status = err;
3235 3266
3236 mlog_exit(status); 3267 mlog_exit(status);
3237 return status; 3268 return status;
3238 } 3269 }
3239 3270
3240 static unsigned int ocfs2_prepare_downconvert(struct ocfs2_lock_res *lockres, 3271 static unsigned int ocfs2_prepare_downconvert(struct ocfs2_lock_res *lockres,
3241 int new_level) 3272 int new_level)
3242 { 3273 {
3243 assert_spin_locked(&lockres->l_lock); 3274 assert_spin_locked(&lockres->l_lock);
3244 3275
3245 BUG_ON(lockres->l_blocking <= DLM_LOCK_NL); 3276 BUG_ON(lockres->l_blocking <= DLM_LOCK_NL);
3246 3277
3247 if (lockres->l_level <= new_level) { 3278 if (lockres->l_level <= new_level) {
3248 mlog(ML_ERROR, "lockres->l_level (%d) <= new_level (%d)\n", 3279 mlog(ML_ERROR, "lockres->l_level (%d) <= new_level (%d)\n",
3249 lockres->l_level, new_level); 3280 lockres->l_level, new_level);
3250 BUG(); 3281 BUG();
3251 } 3282 }
3252 3283
3253 mlog(0, "lock %s, new_level = %d, l_blocking = %d\n", 3284 mlog(0, "lock %s, new_level = %d, l_blocking = %d\n",
3254 lockres->l_name, new_level, lockres->l_blocking); 3285 lockres->l_name, new_level, lockres->l_blocking);
3255 3286
3256 lockres->l_action = OCFS2_AST_DOWNCONVERT; 3287 lockres->l_action = OCFS2_AST_DOWNCONVERT;
3257 lockres->l_requested = new_level; 3288 lockres->l_requested = new_level;
3258 lockres_or_flags(lockres, OCFS2_LOCK_BUSY); 3289 lockres_or_flags(lockres, OCFS2_LOCK_BUSY);
3259 return lockres_set_pending(lockres); 3290 return lockres_set_pending(lockres);
3260 } 3291 }
3261 3292
3262 static int ocfs2_downconvert_lock(struct ocfs2_super *osb, 3293 static int ocfs2_downconvert_lock(struct ocfs2_super *osb,
3263 struct ocfs2_lock_res *lockres, 3294 struct ocfs2_lock_res *lockres,
3264 int new_level, 3295 int new_level,
3265 int lvb, 3296 int lvb,
3266 unsigned int generation) 3297 unsigned int generation)
3267 { 3298 {
3268 int ret; 3299 int ret;
3269 u32 dlm_flags = DLM_LKF_CONVERT; 3300 u32 dlm_flags = DLM_LKF_CONVERT;
3270 3301
3271 mlog_entry_void(); 3302 mlog_entry_void();
3272 3303
3273 if (lvb) 3304 if (lvb)
3274 dlm_flags |= DLM_LKF_VALBLK; 3305 dlm_flags |= DLM_LKF_VALBLK;
3275 3306
3276 ret = ocfs2_dlm_lock(osb->cconn, 3307 ret = ocfs2_dlm_lock(osb->cconn,
3277 new_level, 3308 new_level,
3278 &lockres->l_lksb, 3309 &lockres->l_lksb,
3279 dlm_flags, 3310 dlm_flags,
3280 lockres->l_name, 3311 lockres->l_name,
3281 OCFS2_LOCK_ID_MAX_LEN - 1, 3312 OCFS2_LOCK_ID_MAX_LEN - 1,
3282 lockres); 3313 lockres);
3283 lockres_clear_pending(lockres, generation, osb); 3314 lockres_clear_pending(lockres, generation, osb);
3284 if (ret) { 3315 if (ret) {
3285 ocfs2_log_dlm_error("ocfs2_dlm_lock", ret, lockres); 3316 ocfs2_log_dlm_error("ocfs2_dlm_lock", ret, lockres);
3286 ocfs2_recover_from_dlm_error(lockres, 1); 3317 ocfs2_recover_from_dlm_error(lockres, 1);
3287 goto bail; 3318 goto bail;
3288 } 3319 }
3289 3320
3290 ret = 0; 3321 ret = 0;
3291 bail: 3322 bail:
3292 mlog_exit(ret); 3323 mlog_exit(ret);
3293 return ret; 3324 return ret;
3294 } 3325 }
3295 3326
3296 /* returns 1 when the caller should unlock and call ocfs2_dlm_unlock */ 3327 /* returns 1 when the caller should unlock and call ocfs2_dlm_unlock */
3297 static int ocfs2_prepare_cancel_convert(struct ocfs2_super *osb, 3328 static int ocfs2_prepare_cancel_convert(struct ocfs2_super *osb,
3298 struct ocfs2_lock_res *lockres) 3329 struct ocfs2_lock_res *lockres)
3299 { 3330 {
3300 assert_spin_locked(&lockres->l_lock); 3331 assert_spin_locked(&lockres->l_lock);
3301 3332
3302 mlog_entry_void(); 3333 mlog_entry_void();
3303 mlog(0, "lock %s\n", lockres->l_name); 3334 mlog(0, "lock %s\n", lockres->l_name);
3304 3335
3305 if (lockres->l_unlock_action == OCFS2_UNLOCK_CANCEL_CONVERT) { 3336 if (lockres->l_unlock_action == OCFS2_UNLOCK_CANCEL_CONVERT) {
3306 /* If we're already trying to cancel a lock conversion 3337 /* If we're already trying to cancel a lock conversion
3307 * then just drop the spinlock and allow the caller to 3338 * then just drop the spinlock and allow the caller to
3308 * requeue this lock. */ 3339 * requeue this lock. */
3309 3340
3310 mlog(0, "Lockres %s, skip convert\n", lockres->l_name); 3341 mlog(0, "Lockres %s, skip convert\n", lockres->l_name);
3311 return 0; 3342 return 0;
3312 } 3343 }
3313 3344
3314 /* were we in a convert when we got the bast fire? */ 3345 /* were we in a convert when we got the bast fire? */
3315 BUG_ON(lockres->l_action != OCFS2_AST_CONVERT && 3346 BUG_ON(lockres->l_action != OCFS2_AST_CONVERT &&
3316 lockres->l_action != OCFS2_AST_DOWNCONVERT); 3347 lockres->l_action != OCFS2_AST_DOWNCONVERT);
3317 /* set things up for the unlockast to know to just 3348 /* set things up for the unlockast to know to just
3318 * clear out the ast_action and unset busy, etc. */ 3349 * clear out the ast_action and unset busy, etc. */
3319 lockres->l_unlock_action = OCFS2_UNLOCK_CANCEL_CONVERT; 3350 lockres->l_unlock_action = OCFS2_UNLOCK_CANCEL_CONVERT;
3320 3351
3321 mlog_bug_on_msg(!(lockres->l_flags & OCFS2_LOCK_BUSY), 3352 mlog_bug_on_msg(!(lockres->l_flags & OCFS2_LOCK_BUSY),
3322 "lock %s, invalid flags: 0x%lx\n", 3353 "lock %s, invalid flags: 0x%lx\n",
3323 lockres->l_name, lockres->l_flags); 3354 lockres->l_name, lockres->l_flags);
3324 3355
3325 return 1; 3356 return 1;
3326 } 3357 }
3327 3358
3328 static int ocfs2_cancel_convert(struct ocfs2_super *osb, 3359 static int ocfs2_cancel_convert(struct ocfs2_super *osb,
3329 struct ocfs2_lock_res *lockres) 3360 struct ocfs2_lock_res *lockres)
3330 { 3361 {
3331 int ret; 3362 int ret;
3332 3363
3333 mlog_entry_void(); 3364 mlog_entry_void();
3334 mlog(0, "lock %s\n", lockres->l_name); 3365 mlog(0, "lock %s\n", lockres->l_name);
3335 3366
3336 ret = ocfs2_dlm_unlock(osb->cconn, &lockres->l_lksb, 3367 ret = ocfs2_dlm_unlock(osb->cconn, &lockres->l_lksb,
3337 DLM_LKF_CANCEL, lockres); 3368 DLM_LKF_CANCEL, lockres);
3338 if (ret) { 3369 if (ret) {
3339 ocfs2_log_dlm_error("ocfs2_dlm_unlock", ret, lockres); 3370 ocfs2_log_dlm_error("ocfs2_dlm_unlock", ret, lockres);
3340 ocfs2_recover_from_dlm_error(lockres, 0); 3371 ocfs2_recover_from_dlm_error(lockres, 0);
3341 } 3372 }
3342 3373
3343 mlog(0, "lock %s return from ocfs2_dlm_unlock\n", lockres->l_name); 3374 mlog(0, "lock %s return from ocfs2_dlm_unlock\n", lockres->l_name);
3344 3375
3345 mlog_exit(ret); 3376 mlog_exit(ret);
3346 return ret; 3377 return ret;
3347 } 3378 }
3348 3379
3349 static int ocfs2_unblock_lock(struct ocfs2_super *osb, 3380 static int ocfs2_unblock_lock(struct ocfs2_super *osb,
3350 struct ocfs2_lock_res *lockres, 3381 struct ocfs2_lock_res *lockres,
3351 struct ocfs2_unblock_ctl *ctl) 3382 struct ocfs2_unblock_ctl *ctl)
3352 { 3383 {
3353 unsigned long flags; 3384 unsigned long flags;
3354 int blocking; 3385 int blocking;
3355 int new_level; 3386 int new_level;
3356 int ret = 0; 3387 int ret = 0;
3357 int set_lvb = 0; 3388 int set_lvb = 0;
3358 unsigned int gen; 3389 unsigned int gen;
3359 3390
3360 mlog_entry_void(); 3391 mlog_entry_void();
3361 3392
3362 spin_lock_irqsave(&lockres->l_lock, flags); 3393 spin_lock_irqsave(&lockres->l_lock, flags);
3363 3394
3364 BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BLOCKED)); 3395 BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BLOCKED));
3365 3396
3366 recheck: 3397 recheck:
3367 if (lockres->l_flags & OCFS2_LOCK_BUSY) { 3398 if (lockres->l_flags & OCFS2_LOCK_BUSY) {
3368 /* XXX 3399 /* XXX
3369 * This is a *big* race. The OCFS2_LOCK_PENDING flag 3400 * This is a *big* race. The OCFS2_LOCK_PENDING flag
3370 * exists entirely for one reason - another thread has set 3401 * exists entirely for one reason - another thread has set
3371 * OCFS2_LOCK_BUSY, but has *NOT* yet called dlm_lock(). 3402 * OCFS2_LOCK_BUSY, but has *NOT* yet called dlm_lock().
3372 * 3403 *
3373 * If we do ocfs2_cancel_convert() before the other thread 3404 * If we do ocfs2_cancel_convert() before the other thread
3374 * calls dlm_lock(), our cancel will do nothing. We will 3405 * calls dlm_lock(), our cancel will do nothing. We will
3375 * get no ast, and we will have no way of knowing the 3406 * get no ast, and we will have no way of knowing the
3376 * cancel failed. Meanwhile, the other thread will call 3407 * cancel failed. Meanwhile, the other thread will call
3377 * into dlm_lock() and wait...forever. 3408 * into dlm_lock() and wait...forever.
3378 * 3409 *
3379 * Why forever? Because another node has asked for the 3410 * Why forever? Because another node has asked for the
3380 * lock first; that's why we're here in unblock_lock(). 3411 * lock first; that's why we're here in unblock_lock().
3381 * 3412 *
3382 * The solution is OCFS2_LOCK_PENDING. When PENDING is 3413 * The solution is OCFS2_LOCK_PENDING. When PENDING is
3383 * set, we just requeue the unblock. Only when the other 3414 * set, we just requeue the unblock. Only when the other
3384 * thread has called dlm_lock() and cleared PENDING will 3415 * thread has called dlm_lock() and cleared PENDING will
3385 * we then cancel their request. 3416 * we then cancel their request.
3386 * 3417 *
3387 * All callers of dlm_lock() must set OCFS2_DLM_PENDING 3418 * All callers of dlm_lock() must set OCFS2_DLM_PENDING
3388 * at the same time they set OCFS2_DLM_BUSY. They must 3419 * at the same time they set OCFS2_DLM_BUSY. They must
3389 * clear OCFS2_DLM_PENDING after dlm_lock() returns. 3420 * clear OCFS2_DLM_PENDING after dlm_lock() returns.
3390 */ 3421 */
3391 if (lockres->l_flags & OCFS2_LOCK_PENDING) 3422 if (lockres->l_flags & OCFS2_LOCK_PENDING)
3392 goto leave_requeue; 3423 goto leave_requeue;
3393 3424
3394 ctl->requeue = 1; 3425 ctl->requeue = 1;
3395 ret = ocfs2_prepare_cancel_convert(osb, lockres); 3426 ret = ocfs2_prepare_cancel_convert(osb, lockres);
3396 spin_unlock_irqrestore(&lockres->l_lock, flags); 3427 spin_unlock_irqrestore(&lockres->l_lock, flags);
3397 if (ret) { 3428 if (ret) {
3398 ret = ocfs2_cancel_convert(osb, lockres); 3429 ret = ocfs2_cancel_convert(osb, lockres);
3399 if (ret < 0) 3430 if (ret < 0)
3400 mlog_errno(ret); 3431 mlog_errno(ret);
3401 } 3432 }
3402 goto leave; 3433 goto leave;
3403 } 3434 }
3435
3436 /*
3437 * This prevents livelocks. OCFS2_LOCK_UPCONVERT_FINISHING flag is
3438 * set when the ast is received for an upconvert just before the
3439 * OCFS2_LOCK_BUSY flag is cleared. Now if the fs received a bast
3440 * on the heels of the ast, we want to delay the downconvert just
3441 * enough to allow the up requestor to do its task. Because this
3442 * lock is in the blocked queue, the lock will be downconverted
3443 * as soon as the requestor is done with the lock.
3444 */
3445 if (lockres->l_flags & OCFS2_LOCK_UPCONVERT_FINISHING)
3446 goto leave_requeue;
3404 3447
3405 /* if we're blocking an exclusive and we have *any* holders, 3448 /* if we're blocking an exclusive and we have *any* holders,
3406 * then requeue. */ 3449 * then requeue. */
3407 if ((lockres->l_blocking == DLM_LOCK_EX) 3450 if ((lockres->l_blocking == DLM_LOCK_EX)
3408 && (lockres->l_ex_holders || lockres->l_ro_holders)) 3451 && (lockres->l_ex_holders || lockres->l_ro_holders))
3409 goto leave_requeue; 3452 goto leave_requeue;
3410 3453
3411 /* If it's a PR we're blocking, then only 3454 /* If it's a PR we're blocking, then only
3412 * requeue if we've got any EX holders */ 3455 * requeue if we've got any EX holders */
3413 if (lockres->l_blocking == DLM_LOCK_PR && 3456 if (lockres->l_blocking == DLM_LOCK_PR &&
3414 lockres->l_ex_holders) 3457 lockres->l_ex_holders)
3415 goto leave_requeue; 3458 goto leave_requeue;
3416 3459
3417 /* 3460 /*
3418 * Can we get a lock in this state if the holder counts are 3461 * Can we get a lock in this state if the holder counts are
3419 * zero? The meta data unblock code used to check this. 3462 * zero? The meta data unblock code used to check this.
3420 */ 3463 */
3421 if ((lockres->l_ops->flags & LOCK_TYPE_REQUIRES_REFRESH) 3464 if ((lockres->l_ops->flags & LOCK_TYPE_REQUIRES_REFRESH)
3422 && (lockres->l_flags & OCFS2_LOCK_REFRESHING)) 3465 && (lockres->l_flags & OCFS2_LOCK_REFRESHING))
3423 goto leave_requeue; 3466 goto leave_requeue;
3424 3467
3425 new_level = ocfs2_highest_compat_lock_level(lockres->l_blocking); 3468 new_level = ocfs2_highest_compat_lock_level(lockres->l_blocking);
3426 3469
3427 if (lockres->l_ops->check_downconvert 3470 if (lockres->l_ops->check_downconvert
3428 && !lockres->l_ops->check_downconvert(lockres, new_level)) 3471 && !lockres->l_ops->check_downconvert(lockres, new_level))
3429 goto leave_requeue; 3472 goto leave_requeue;
3430 3473
3431 /* If we get here, then we know that there are no more 3474 /* If we get here, then we know that there are no more
3432 * incompatible holders (and anyone asking for an incompatible 3475 * incompatible holders (and anyone asking for an incompatible
3433 * lock is blocked). We can now downconvert the lock */ 3476 * lock is blocked). We can now downconvert the lock */
3434 if (!lockres->l_ops->downconvert_worker) 3477 if (!lockres->l_ops->downconvert_worker)
3435 goto downconvert; 3478 goto downconvert;
3436 3479
3437 /* Some lockres types want to do a bit of work before 3480 /* Some lockres types want to do a bit of work before
3438 * downconverting a lock. Allow that here. The worker function 3481 * downconverting a lock. Allow that here. The worker function
3439 * may sleep, so we save off a copy of what we're blocking as 3482 * may sleep, so we save off a copy of what we're blocking as
3440 * it may change while we're not holding the spin lock. */ 3483 * it may change while we're not holding the spin lock. */
3441 blocking = lockres->l_blocking; 3484 blocking = lockres->l_blocking;
3442 spin_unlock_irqrestore(&lockres->l_lock, flags); 3485 spin_unlock_irqrestore(&lockres->l_lock, flags);
3443 3486
3444 ctl->unblock_action = lockres->l_ops->downconvert_worker(lockres, blocking); 3487 ctl->unblock_action = lockres->l_ops->downconvert_worker(lockres, blocking);
3445 3488
3446 if (ctl->unblock_action == UNBLOCK_STOP_POST) 3489 if (ctl->unblock_action == UNBLOCK_STOP_POST)
3447 goto leave; 3490 goto leave;
3448 3491
3449 spin_lock_irqsave(&lockres->l_lock, flags); 3492 spin_lock_irqsave(&lockres->l_lock, flags);
3450 if (blocking != lockres->l_blocking) { 3493 if (blocking != lockres->l_blocking) {
3451 /* If this changed underneath us, then we can't drop 3494 /* If this changed underneath us, then we can't drop
3452 * it just yet. */ 3495 * it just yet. */
3453 goto recheck; 3496 goto recheck;
3454 } 3497 }
3455 3498
3456 downconvert: 3499 downconvert:
3457 ctl->requeue = 0; 3500 ctl->requeue = 0;
3458 3501
3459 if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB) { 3502 if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB) {
3460 if (lockres->l_level == DLM_LOCK_EX) 3503 if (lockres->l_level == DLM_LOCK_EX)
3461 set_lvb = 1; 3504 set_lvb = 1;
3462 3505
3463 /* 3506 /*
3464 * We only set the lvb if the lock has been fully 3507 * We only set the lvb if the lock has been fully
3465 * refreshed - otherwise we risk setting stale 3508 * refreshed - otherwise we risk setting stale
3466 * data. Otherwise, there's no need to actually clear 3509 * data. Otherwise, there's no need to actually clear
3467 * out the lvb here as it's value is still valid. 3510 * out the lvb here as it's value is still valid.
3468 */ 3511 */
3469 if (set_lvb && !(lockres->l_flags & OCFS2_LOCK_NEEDS_REFRESH)) 3512 if (set_lvb && !(lockres->l_flags & OCFS2_LOCK_NEEDS_REFRESH))
3470 lockres->l_ops->set_lvb(lockres); 3513 lockres->l_ops->set_lvb(lockres);
3471 } 3514 }
3472 3515
3473 gen = ocfs2_prepare_downconvert(lockres, new_level); 3516 gen = ocfs2_prepare_downconvert(lockres, new_level);
3474 spin_unlock_irqrestore(&lockres->l_lock, flags); 3517 spin_unlock_irqrestore(&lockres->l_lock, flags);
3475 ret = ocfs2_downconvert_lock(osb, lockres, new_level, set_lvb, 3518 ret = ocfs2_downconvert_lock(osb, lockres, new_level, set_lvb,
3476 gen); 3519 gen);
3477 3520
3478 leave: 3521 leave:
3479 mlog_exit(ret); 3522 mlog_exit(ret);
3480 return ret; 3523 return ret;
3481 3524
3482 leave_requeue: 3525 leave_requeue:
3483 spin_unlock_irqrestore(&lockres->l_lock, flags); 3526 spin_unlock_irqrestore(&lockres->l_lock, flags);
3484 ctl->requeue = 1; 3527 ctl->requeue = 1;
3485 3528
3486 mlog_exit(0); 3529 mlog_exit(0);
3487 return 0; 3530 return 0;
3488 } 3531 }
3489 3532
3490 static int ocfs2_data_convert_worker(struct ocfs2_lock_res *lockres, 3533 static int ocfs2_data_convert_worker(struct ocfs2_lock_res *lockres,
3491 int blocking) 3534 int blocking)
3492 { 3535 {
3493 struct inode *inode; 3536 struct inode *inode;
3494 struct address_space *mapping; 3537 struct address_space *mapping;
3495 3538
3496 inode = ocfs2_lock_res_inode(lockres); 3539 inode = ocfs2_lock_res_inode(lockres);
3497 mapping = inode->i_mapping; 3540 mapping = inode->i_mapping;
3498 3541
3499 if (!S_ISREG(inode->i_mode)) 3542 if (!S_ISREG(inode->i_mode))
3500 goto out; 3543 goto out;
3501 3544
3502 /* 3545 /*
3503 * We need this before the filemap_fdatawrite() so that it can 3546 * We need this before the filemap_fdatawrite() so that it can
3504 * transfer the dirty bit from the PTE to the 3547 * transfer the dirty bit from the PTE to the
3505 * page. Unfortunately this means that even for EX->PR 3548 * page. Unfortunately this means that even for EX->PR
3506 * downconverts, we'll lose our mappings and have to build 3549 * downconverts, we'll lose our mappings and have to build
3507 * them up again. 3550 * them up again.
3508 */ 3551 */
3509 unmap_mapping_range(mapping, 0, 0, 0); 3552 unmap_mapping_range(mapping, 0, 0, 0);
3510 3553
3511 if (filemap_fdatawrite(mapping)) { 3554 if (filemap_fdatawrite(mapping)) {
3512 mlog(ML_ERROR, "Could not sync inode %llu for downconvert!", 3555 mlog(ML_ERROR, "Could not sync inode %llu for downconvert!",
3513 (unsigned long long)OCFS2_I(inode)->ip_blkno); 3556 (unsigned long long)OCFS2_I(inode)->ip_blkno);
3514 } 3557 }
3515 sync_mapping_buffers(mapping); 3558 sync_mapping_buffers(mapping);
3516 if (blocking == DLM_LOCK_EX) { 3559 if (blocking == DLM_LOCK_EX) {
3517 truncate_inode_pages(mapping, 0); 3560 truncate_inode_pages(mapping, 0);
3518 } else { 3561 } else {
3519 /* We only need to wait on the I/O if we're not also 3562 /* We only need to wait on the I/O if we're not also
3520 * truncating pages because truncate_inode_pages waits 3563 * truncating pages because truncate_inode_pages waits
3521 * for us above. We don't truncate pages if we're 3564 * for us above. We don't truncate pages if we're
3522 * blocking anything < EXMODE because we want to keep 3565 * blocking anything < EXMODE because we want to keep
3523 * them around in that case. */ 3566 * them around in that case. */
3524 filemap_fdatawait(mapping); 3567 filemap_fdatawait(mapping);
3525 } 3568 }
3526 3569
3527 out: 3570 out:
3528 return UNBLOCK_CONTINUE; 3571 return UNBLOCK_CONTINUE;
3529 } 3572 }
3530 3573
3531 static int ocfs2_ci_checkpointed(struct ocfs2_caching_info *ci, 3574 static int ocfs2_ci_checkpointed(struct ocfs2_caching_info *ci,
3532 struct ocfs2_lock_res *lockres, 3575 struct ocfs2_lock_res *lockres,
3533 int new_level) 3576 int new_level)
3534 { 3577 {
3535 int checkpointed = ocfs2_ci_fully_checkpointed(ci); 3578 int checkpointed = ocfs2_ci_fully_checkpointed(ci);
3536 3579
3537 BUG_ON(new_level != DLM_LOCK_NL && new_level != DLM_LOCK_PR); 3580 BUG_ON(new_level != DLM_LOCK_NL && new_level != DLM_LOCK_PR);
3538 BUG_ON(lockres->l_level != DLM_LOCK_EX && !checkpointed); 3581 BUG_ON(lockres->l_level != DLM_LOCK_EX && !checkpointed);
3539 3582
3540 if (checkpointed) 3583 if (checkpointed)
3541 return 1; 3584 return 1;
3542 3585
3543 ocfs2_start_checkpoint(OCFS2_SB(ocfs2_metadata_cache_get_super(ci))); 3586 ocfs2_start_checkpoint(OCFS2_SB(ocfs2_metadata_cache_get_super(ci)));
3544 return 0; 3587 return 0;
3545 } 3588 }
3546 3589
3547 static int ocfs2_check_meta_downconvert(struct ocfs2_lock_res *lockres, 3590 static int ocfs2_check_meta_downconvert(struct ocfs2_lock_res *lockres,
3548 int new_level) 3591 int new_level)
3549 { 3592 {
3550 struct inode *inode = ocfs2_lock_res_inode(lockres); 3593 struct inode *inode = ocfs2_lock_res_inode(lockres);
3551 3594
3552 return ocfs2_ci_checkpointed(INODE_CACHE(inode), lockres, new_level); 3595 return ocfs2_ci_checkpointed(INODE_CACHE(inode), lockres, new_level);
3553 } 3596 }
3554 3597
3555 static void ocfs2_set_meta_lvb(struct ocfs2_lock_res *lockres) 3598 static void ocfs2_set_meta_lvb(struct ocfs2_lock_res *lockres)
3556 { 3599 {
3557 struct inode *inode = ocfs2_lock_res_inode(lockres); 3600 struct inode *inode = ocfs2_lock_res_inode(lockres);
3558 3601
3559 __ocfs2_stuff_meta_lvb(inode); 3602 __ocfs2_stuff_meta_lvb(inode);
3560 } 3603 }
3561 3604
3562 /* 3605 /*
3563 * Does the final reference drop on our dentry lock. Right now this 3606 * Does the final reference drop on our dentry lock. Right now this
3564 * happens in the downconvert thread, but we could choose to simplify the 3607 * happens in the downconvert thread, but we could choose to simplify the
3565 * dlmglue API and push these off to the ocfs2_wq in the future. 3608 * dlmglue API and push these off to the ocfs2_wq in the future.
3566 */ 3609 */
3567 static void ocfs2_dentry_post_unlock(struct ocfs2_super *osb, 3610 static void ocfs2_dentry_post_unlock(struct ocfs2_super *osb,
3568 struct ocfs2_lock_res *lockres) 3611 struct ocfs2_lock_res *lockres)
3569 { 3612 {
3570 struct ocfs2_dentry_lock *dl = ocfs2_lock_res_dl(lockres); 3613 struct ocfs2_dentry_lock *dl = ocfs2_lock_res_dl(lockres);
3571 ocfs2_dentry_lock_put(osb, dl); 3614 ocfs2_dentry_lock_put(osb, dl);
3572 } 3615 }
3573 3616
3574 /* 3617 /*
3575 * d_delete() matching dentries before the lock downconvert. 3618 * d_delete() matching dentries before the lock downconvert.
3576 * 3619 *
3577 * At this point, any process waiting to destroy the 3620 * At this point, any process waiting to destroy the
3578 * dentry_lock due to last ref count is stopped by the 3621 * dentry_lock due to last ref count is stopped by the
3579 * OCFS2_LOCK_QUEUED flag. 3622 * OCFS2_LOCK_QUEUED flag.
3580 * 3623 *
3581 * We have two potential problems 3624 * We have two potential problems
3582 * 3625 *
3583 * 1) If we do the last reference drop on our dentry_lock (via dput) 3626 * 1) If we do the last reference drop on our dentry_lock (via dput)
3584 * we'll wind up in ocfs2_release_dentry_lock(), waiting on 3627 * we'll wind up in ocfs2_release_dentry_lock(), waiting on
3585 * the downconvert to finish. Instead we take an elevated 3628 * the downconvert to finish. Instead we take an elevated
3586 * reference and push the drop until after we've completed our 3629 * reference and push the drop until after we've completed our
3587 * unblock processing. 3630 * unblock processing.
3588 * 3631 *
3589 * 2) There might be another process with a final reference, 3632 * 2) There might be another process with a final reference,
3590 * waiting on us to finish processing. If this is the case, we 3633 * waiting on us to finish processing. If this is the case, we
3591 * detect it and exit out - there's no more dentries anyway. 3634 * detect it and exit out - there's no more dentries anyway.
3592 */ 3635 */
3593 static int ocfs2_dentry_convert_worker(struct ocfs2_lock_res *lockres, 3636 static int ocfs2_dentry_convert_worker(struct ocfs2_lock_res *lockres,
3594 int blocking) 3637 int blocking)
3595 { 3638 {
3596 struct ocfs2_dentry_lock *dl = ocfs2_lock_res_dl(lockres); 3639 struct ocfs2_dentry_lock *dl = ocfs2_lock_res_dl(lockres);
3597 struct ocfs2_inode_info *oi = OCFS2_I(dl->dl_inode); 3640 struct ocfs2_inode_info *oi = OCFS2_I(dl->dl_inode);
3598 struct dentry *dentry; 3641 struct dentry *dentry;
3599 unsigned long flags; 3642 unsigned long flags;
3600 int extra_ref = 0; 3643 int extra_ref = 0;
3601 3644
3602 /* 3645 /*
3603 * This node is blocking another node from getting a read 3646 * This node is blocking another node from getting a read
3604 * lock. This happens when we've renamed within a 3647 * lock. This happens when we've renamed within a
3605 * directory. We've forced the other nodes to d_delete(), but 3648 * directory. We've forced the other nodes to d_delete(), but
3606 * we never actually dropped our lock because it's still 3649 * we never actually dropped our lock because it's still
3607 * valid. The downconvert code will retain a PR for this node, 3650 * valid. The downconvert code will retain a PR for this node,
3608 * so there's no further work to do. 3651 * so there's no further work to do.
3609 */ 3652 */
3610 if (blocking == DLM_LOCK_PR) 3653 if (blocking == DLM_LOCK_PR)
3611 return UNBLOCK_CONTINUE; 3654 return UNBLOCK_CONTINUE;
3612 3655
3613 /* 3656 /*
3614 * Mark this inode as potentially orphaned. The code in 3657 * Mark this inode as potentially orphaned. The code in
3615 * ocfs2_delete_inode() will figure out whether it actually 3658 * ocfs2_delete_inode() will figure out whether it actually
3616 * needs to be freed or not. 3659 * needs to be freed or not.
3617 */ 3660 */
3618 spin_lock(&oi->ip_lock); 3661 spin_lock(&oi->ip_lock);
3619 oi->ip_flags |= OCFS2_INODE_MAYBE_ORPHANED; 3662 oi->ip_flags |= OCFS2_INODE_MAYBE_ORPHANED;
3620 spin_unlock(&oi->ip_lock); 3663 spin_unlock(&oi->ip_lock);
3621 3664
3622 /* 3665 /*
3623 * Yuck. We need to make sure however that the check of 3666 * Yuck. We need to make sure however that the check of
3624 * OCFS2_LOCK_FREEING and the extra reference are atomic with 3667 * OCFS2_LOCK_FREEING and the extra reference are atomic with
3625 * respect to a reference decrement or the setting of that 3668 * respect to a reference decrement or the setting of that
3626 * flag. 3669 * flag.
3627 */ 3670 */
3628 spin_lock_irqsave(&lockres->l_lock, flags); 3671 spin_lock_irqsave(&lockres->l_lock, flags);
3629 spin_lock(&dentry_attach_lock); 3672 spin_lock(&dentry_attach_lock);
3630 if (!(lockres->l_flags & OCFS2_LOCK_FREEING) 3673 if (!(lockres->l_flags & OCFS2_LOCK_FREEING)
3631 && dl->dl_count) { 3674 && dl->dl_count) {
3632 dl->dl_count++; 3675 dl->dl_count++;
3633 extra_ref = 1; 3676 extra_ref = 1;
3634 } 3677 }
3635 spin_unlock(&dentry_attach_lock); 3678 spin_unlock(&dentry_attach_lock);
3636 spin_unlock_irqrestore(&lockres->l_lock, flags); 3679 spin_unlock_irqrestore(&lockres->l_lock, flags);
3637 3680
3638 mlog(0, "extra_ref = %d\n", extra_ref); 3681 mlog(0, "extra_ref = %d\n", extra_ref);
3639 3682
3640 /* 3683 /*
3641 * We have a process waiting on us in ocfs2_dentry_iput(), 3684 * We have a process waiting on us in ocfs2_dentry_iput(),
3642 * which means we can't have any more outstanding 3685 * which means we can't have any more outstanding
3643 * aliases. There's no need to do any more work. 3686 * aliases. There's no need to do any more work.
3644 */ 3687 */
3645 if (!extra_ref) 3688 if (!extra_ref)
3646 return UNBLOCK_CONTINUE; 3689 return UNBLOCK_CONTINUE;
3647 3690
3648 spin_lock(&dentry_attach_lock); 3691 spin_lock(&dentry_attach_lock);
3649 while (1) { 3692 while (1) {
3650 dentry = ocfs2_find_local_alias(dl->dl_inode, 3693 dentry = ocfs2_find_local_alias(dl->dl_inode,
3651 dl->dl_parent_blkno, 1); 3694 dl->dl_parent_blkno, 1);
3652 if (!dentry) 3695 if (!dentry)
3653 break; 3696 break;
3654 spin_unlock(&dentry_attach_lock); 3697 spin_unlock(&dentry_attach_lock);
3655 3698
3656 mlog(0, "d_delete(%.*s);\n", dentry->d_name.len, 3699 mlog(0, "d_delete(%.*s);\n", dentry->d_name.len,
3657 dentry->d_name.name); 3700 dentry->d_name.name);
3658 3701
3659 /* 3702 /*
3660 * The following dcache calls may do an 3703 * The following dcache calls may do an
3661 * iput(). Normally we don't want that from the 3704 * iput(). Normally we don't want that from the
3662 * downconverting thread, but in this case it's ok 3705 * downconverting thread, but in this case it's ok
3663 * because the requesting node already has an 3706 * because the requesting node already has an
3664 * exclusive lock on the inode, so it can't be queued 3707 * exclusive lock on the inode, so it can't be queued
3665 * for a downconvert. 3708 * for a downconvert.
3666 */ 3709 */
3667 d_delete(dentry); 3710 d_delete(dentry);
3668 dput(dentry); 3711 dput(dentry);
3669 3712
3670 spin_lock(&dentry_attach_lock); 3713 spin_lock(&dentry_attach_lock);
3671 } 3714 }
3672 spin_unlock(&dentry_attach_lock); 3715 spin_unlock(&dentry_attach_lock);
3673 3716
3674 /* 3717 /*
3675 * If we are the last holder of this dentry lock, there is no 3718 * If we are the last holder of this dentry lock, there is no
3676 * reason to downconvert so skip straight to the unlock. 3719 * reason to downconvert so skip straight to the unlock.
3677 */ 3720 */
3678 if (dl->dl_count == 1) 3721 if (dl->dl_count == 1)
3679 return UNBLOCK_STOP_POST; 3722 return UNBLOCK_STOP_POST;
3680 3723
3681 return UNBLOCK_CONTINUE_POST; 3724 return UNBLOCK_CONTINUE_POST;
3682 } 3725 }
3683 3726
3684 static int ocfs2_check_refcount_downconvert(struct ocfs2_lock_res *lockres, 3727 static int ocfs2_check_refcount_downconvert(struct ocfs2_lock_res *lockres,
3685 int new_level) 3728 int new_level)
3686 { 3729 {
3687 struct ocfs2_refcount_tree *tree = 3730 struct ocfs2_refcount_tree *tree =
3688 ocfs2_lock_res_refcount_tree(lockres); 3731 ocfs2_lock_res_refcount_tree(lockres);
3689 3732
3690 return ocfs2_ci_checkpointed(&tree->rf_ci, lockres, new_level); 3733 return ocfs2_ci_checkpointed(&tree->rf_ci, lockres, new_level);
3691 } 3734 }
3692 3735
3693 static int ocfs2_refcount_convert_worker(struct ocfs2_lock_res *lockres, 3736 static int ocfs2_refcount_convert_worker(struct ocfs2_lock_res *lockres,
3694 int blocking) 3737 int blocking)
3695 { 3738 {
3696 struct ocfs2_refcount_tree *tree = 3739 struct ocfs2_refcount_tree *tree =
3697 ocfs2_lock_res_refcount_tree(lockres); 3740 ocfs2_lock_res_refcount_tree(lockres);
3698 3741
3699 ocfs2_metadata_cache_purge(&tree->rf_ci); 3742 ocfs2_metadata_cache_purge(&tree->rf_ci);
3700 3743
3701 return UNBLOCK_CONTINUE; 3744 return UNBLOCK_CONTINUE;
3702 } 3745 }
3703 3746
3704 static void ocfs2_set_qinfo_lvb(struct ocfs2_lock_res *lockres) 3747 static void ocfs2_set_qinfo_lvb(struct ocfs2_lock_res *lockres)
3705 { 3748 {
3706 struct ocfs2_qinfo_lvb *lvb; 3749 struct ocfs2_qinfo_lvb *lvb;
3707 struct ocfs2_mem_dqinfo *oinfo = ocfs2_lock_res_qinfo(lockres); 3750 struct ocfs2_mem_dqinfo *oinfo = ocfs2_lock_res_qinfo(lockres);
3708 struct mem_dqinfo *info = sb_dqinfo(oinfo->dqi_gi.dqi_sb, 3751 struct mem_dqinfo *info = sb_dqinfo(oinfo->dqi_gi.dqi_sb,
3709 oinfo->dqi_gi.dqi_type); 3752 oinfo->dqi_gi.dqi_type);
3710 3753
3711 mlog_entry_void(); 3754 mlog_entry_void();
3712 3755
3713 lvb = ocfs2_dlm_lvb(&lockres->l_lksb); 3756 lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
3714 lvb->lvb_version = OCFS2_QINFO_LVB_VERSION; 3757 lvb->lvb_version = OCFS2_QINFO_LVB_VERSION;
3715 lvb->lvb_bgrace = cpu_to_be32(info->dqi_bgrace); 3758 lvb->lvb_bgrace = cpu_to_be32(info->dqi_bgrace);
3716 lvb->lvb_igrace = cpu_to_be32(info->dqi_igrace); 3759 lvb->lvb_igrace = cpu_to_be32(info->dqi_igrace);
3717 lvb->lvb_syncms = cpu_to_be32(oinfo->dqi_syncms); 3760 lvb->lvb_syncms = cpu_to_be32(oinfo->dqi_syncms);
3718 lvb->lvb_blocks = cpu_to_be32(oinfo->dqi_gi.dqi_blocks); 3761 lvb->lvb_blocks = cpu_to_be32(oinfo->dqi_gi.dqi_blocks);
3719 lvb->lvb_free_blk = cpu_to_be32(oinfo->dqi_gi.dqi_free_blk); 3762 lvb->lvb_free_blk = cpu_to_be32(oinfo->dqi_gi.dqi_free_blk);
3720 lvb->lvb_free_entry = cpu_to_be32(oinfo->dqi_gi.dqi_free_entry); 3763 lvb->lvb_free_entry = cpu_to_be32(oinfo->dqi_gi.dqi_free_entry);
3721 3764
3722 mlog_exit_void(); 3765 mlog_exit_void();
3723 } 3766 }
3724 3767
3725 void ocfs2_qinfo_unlock(struct ocfs2_mem_dqinfo *oinfo, int ex) 3768 void ocfs2_qinfo_unlock(struct ocfs2_mem_dqinfo *oinfo, int ex)
3726 { 3769 {
3727 struct ocfs2_lock_res *lockres = &oinfo->dqi_gqlock; 3770 struct ocfs2_lock_res *lockres = &oinfo->dqi_gqlock;
3728 struct ocfs2_super *osb = OCFS2_SB(oinfo->dqi_gi.dqi_sb); 3771 struct ocfs2_super *osb = OCFS2_SB(oinfo->dqi_gi.dqi_sb);
3729 int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; 3772 int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
3730 3773
3731 mlog_entry_void(); 3774 mlog_entry_void();
3732 if (!ocfs2_is_hard_readonly(osb) && !ocfs2_mount_local(osb)) 3775 if (!ocfs2_is_hard_readonly(osb) && !ocfs2_mount_local(osb))
3733 ocfs2_cluster_unlock(osb, lockres, level); 3776 ocfs2_cluster_unlock(osb, lockres, level);
3734 mlog_exit_void(); 3777 mlog_exit_void();
3735 } 3778 }
3736 3779
3737 static int ocfs2_refresh_qinfo(struct ocfs2_mem_dqinfo *oinfo) 3780 static int ocfs2_refresh_qinfo(struct ocfs2_mem_dqinfo *oinfo)
3738 { 3781 {
3739 struct mem_dqinfo *info = sb_dqinfo(oinfo->dqi_gi.dqi_sb, 3782 struct mem_dqinfo *info = sb_dqinfo(oinfo->dqi_gi.dqi_sb,
3740 oinfo->dqi_gi.dqi_type); 3783 oinfo->dqi_gi.dqi_type);
3741 struct ocfs2_lock_res *lockres = &oinfo->dqi_gqlock; 3784 struct ocfs2_lock_res *lockres = &oinfo->dqi_gqlock;
3742 struct ocfs2_qinfo_lvb *lvb = ocfs2_dlm_lvb(&lockres->l_lksb); 3785 struct ocfs2_qinfo_lvb *lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
3743 struct buffer_head *bh = NULL; 3786 struct buffer_head *bh = NULL;
3744 struct ocfs2_global_disk_dqinfo *gdinfo; 3787 struct ocfs2_global_disk_dqinfo *gdinfo;
3745 int status = 0; 3788 int status = 0;
3746 3789
3747 if (ocfs2_dlm_lvb_valid(&lockres->l_lksb) && 3790 if (ocfs2_dlm_lvb_valid(&lockres->l_lksb) &&
3748 lvb->lvb_version == OCFS2_QINFO_LVB_VERSION) { 3791 lvb->lvb_version == OCFS2_QINFO_LVB_VERSION) {
3749 info->dqi_bgrace = be32_to_cpu(lvb->lvb_bgrace); 3792 info->dqi_bgrace = be32_to_cpu(lvb->lvb_bgrace);
3750 info->dqi_igrace = be32_to_cpu(lvb->lvb_igrace); 3793 info->dqi_igrace = be32_to_cpu(lvb->lvb_igrace);
3751 oinfo->dqi_syncms = be32_to_cpu(lvb->lvb_syncms); 3794 oinfo->dqi_syncms = be32_to_cpu(lvb->lvb_syncms);
3752 oinfo->dqi_gi.dqi_blocks = be32_to_cpu(lvb->lvb_blocks); 3795 oinfo->dqi_gi.dqi_blocks = be32_to_cpu(lvb->lvb_blocks);
3753 oinfo->dqi_gi.dqi_free_blk = be32_to_cpu(lvb->lvb_free_blk); 3796 oinfo->dqi_gi.dqi_free_blk = be32_to_cpu(lvb->lvb_free_blk);
3754 oinfo->dqi_gi.dqi_free_entry = 3797 oinfo->dqi_gi.dqi_free_entry =
3755 be32_to_cpu(lvb->lvb_free_entry); 3798 be32_to_cpu(lvb->lvb_free_entry);
3756 } else { 3799 } else {
3757 status = ocfs2_read_quota_block(oinfo->dqi_gqinode, 0, &bh); 3800 status = ocfs2_read_quota_block(oinfo->dqi_gqinode, 0, &bh);
3758 if (status) { 3801 if (status) {
3759 mlog_errno(status); 3802 mlog_errno(status);
3760 goto bail; 3803 goto bail;
3761 } 3804 }
3762 gdinfo = (struct ocfs2_global_disk_dqinfo *) 3805 gdinfo = (struct ocfs2_global_disk_dqinfo *)
3763 (bh->b_data + OCFS2_GLOBAL_INFO_OFF); 3806 (bh->b_data + OCFS2_GLOBAL_INFO_OFF);
3764 info->dqi_bgrace = le32_to_cpu(gdinfo->dqi_bgrace); 3807 info->dqi_bgrace = le32_to_cpu(gdinfo->dqi_bgrace);
3765 info->dqi_igrace = le32_to_cpu(gdinfo->dqi_igrace); 3808 info->dqi_igrace = le32_to_cpu(gdinfo->dqi_igrace);
3766 oinfo->dqi_syncms = le32_to_cpu(gdinfo->dqi_syncms); 3809 oinfo->dqi_syncms = le32_to_cpu(gdinfo->dqi_syncms);
3767 oinfo->dqi_gi.dqi_blocks = le32_to_cpu(gdinfo->dqi_blocks); 3810 oinfo->dqi_gi.dqi_blocks = le32_to_cpu(gdinfo->dqi_blocks);
3768 oinfo->dqi_gi.dqi_free_blk = le32_to_cpu(gdinfo->dqi_free_blk); 3811 oinfo->dqi_gi.dqi_free_blk = le32_to_cpu(gdinfo->dqi_free_blk);
3769 oinfo->dqi_gi.dqi_free_entry = 3812 oinfo->dqi_gi.dqi_free_entry =
3770 le32_to_cpu(gdinfo->dqi_free_entry); 3813 le32_to_cpu(gdinfo->dqi_free_entry);
3771 brelse(bh); 3814 brelse(bh);
3772 ocfs2_track_lock_refresh(lockres); 3815 ocfs2_track_lock_refresh(lockres);
3773 } 3816 }
3774 3817
3775 bail: 3818 bail:
3776 return status; 3819 return status;
3777 } 3820 }
3778 3821
3779 /* Lock quota info, this function expects at least shared lock on the quota file 3822 /* Lock quota info, this function expects at least shared lock on the quota file
3780 * so that we can safely refresh quota info from disk. */ 3823 * so that we can safely refresh quota info from disk. */
3781 int ocfs2_qinfo_lock(struct ocfs2_mem_dqinfo *oinfo, int ex) 3824 int ocfs2_qinfo_lock(struct ocfs2_mem_dqinfo *oinfo, int ex)
3782 { 3825 {
3783 struct ocfs2_lock_res *lockres = &oinfo->dqi_gqlock; 3826 struct ocfs2_lock_res *lockres = &oinfo->dqi_gqlock;
3784 struct ocfs2_super *osb = OCFS2_SB(oinfo->dqi_gi.dqi_sb); 3827 struct ocfs2_super *osb = OCFS2_SB(oinfo->dqi_gi.dqi_sb);
3785 int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; 3828 int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
3786 int status = 0; 3829 int status = 0;
3787 3830
3788 mlog_entry_void(); 3831 mlog_entry_void();
3789 3832
3790 /* On RO devices, locking really isn't needed... */ 3833 /* On RO devices, locking really isn't needed... */
3791 if (ocfs2_is_hard_readonly(osb)) { 3834 if (ocfs2_is_hard_readonly(osb)) {
3792 if (ex) 3835 if (ex)
3793 status = -EROFS; 3836 status = -EROFS;
3794 goto bail; 3837 goto bail;
3795 } 3838 }
3796 if (ocfs2_mount_local(osb)) 3839 if (ocfs2_mount_local(osb))
3797 goto bail; 3840 goto bail;
3798 3841
3799 status = ocfs2_cluster_lock(osb, lockres, level, 0, 0); 3842 status = ocfs2_cluster_lock(osb, lockres, level, 0, 0);
3800 if (status < 0) { 3843 if (status < 0) {
3801 mlog_errno(status); 3844 mlog_errno(status);
3802 goto bail; 3845 goto bail;
3803 } 3846 }
3804 if (!ocfs2_should_refresh_lock_res(lockres)) 3847 if (!ocfs2_should_refresh_lock_res(lockres))
3805 goto bail; 3848 goto bail;
3806 /* OK, we have the lock but we need to refresh the quota info */ 3849 /* OK, we have the lock but we need to refresh the quota info */
3807 status = ocfs2_refresh_qinfo(oinfo); 3850 status = ocfs2_refresh_qinfo(oinfo);
3808 if (status) 3851 if (status)
3809 ocfs2_qinfo_unlock(oinfo, ex); 3852 ocfs2_qinfo_unlock(oinfo, ex);
3810 ocfs2_complete_lock_res_refresh(lockres, status); 3853 ocfs2_complete_lock_res_refresh(lockres, status);
3811 bail: 3854 bail:
3812 mlog_exit(status); 3855 mlog_exit(status);
3813 return status; 3856 return status;
3814 } 3857 }
3815 3858
3816 int ocfs2_refcount_lock(struct ocfs2_refcount_tree *ref_tree, int ex) 3859 int ocfs2_refcount_lock(struct ocfs2_refcount_tree *ref_tree, int ex)
3817 { 3860 {
3818 int status; 3861 int status;
3819 int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; 3862 int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
3820 struct ocfs2_lock_res *lockres = &ref_tree->rf_lockres; 3863 struct ocfs2_lock_res *lockres = &ref_tree->rf_lockres;
3821 struct ocfs2_super *osb = lockres->l_priv; 3864 struct ocfs2_super *osb = lockres->l_priv;
3822 3865
3823 3866
3824 if (ocfs2_is_hard_readonly(osb)) 3867 if (ocfs2_is_hard_readonly(osb))
3825 return -EROFS; 3868 return -EROFS;
3826 3869
3827 if (ocfs2_mount_local(osb)) 3870 if (ocfs2_mount_local(osb))
3828 return 0; 3871 return 0;
3829 3872
3830 status = ocfs2_cluster_lock(osb, lockres, level, 0, 0); 3873 status = ocfs2_cluster_lock(osb, lockres, level, 0, 0);
3831 if (status < 0) 3874 if (status < 0)
3832 mlog_errno(status); 3875 mlog_errno(status);
3833 3876
3834 return status; 3877 return status;
3835 } 3878 }
3836 3879
3837 void ocfs2_refcount_unlock(struct ocfs2_refcount_tree *ref_tree, int ex) 3880 void ocfs2_refcount_unlock(struct ocfs2_refcount_tree *ref_tree, int ex)
3838 { 3881 {
3839 int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; 3882 int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
3840 struct ocfs2_lock_res *lockres = &ref_tree->rf_lockres; 3883 struct ocfs2_lock_res *lockres = &ref_tree->rf_lockres;
3841 struct ocfs2_super *osb = lockres->l_priv; 3884 struct ocfs2_super *osb = lockres->l_priv;
3842 3885
3843 if (!ocfs2_mount_local(osb)) 3886 if (!ocfs2_mount_local(osb))
3844 ocfs2_cluster_unlock(osb, lockres, level); 3887 ocfs2_cluster_unlock(osb, lockres, level);
3845 } 3888 }
3846 3889
3847 /* 3890 /*
3848 * This is the filesystem locking protocol. It provides the lock handling 3891 * This is the filesystem locking protocol. It provides the lock handling
3849 * hooks for the underlying DLM. It has a maximum version number. 3892 * hooks for the underlying DLM. It has a maximum version number.
3850 * The version number allows interoperability with systems running at 3893 * The version number allows interoperability with systems running at
3851 * the same major number and an equal or smaller minor number. 3894 * the same major number and an equal or smaller minor number.
3852 * 3895 *
3853 * Whenever the filesystem does new things with locks (adds or removes a 3896 * Whenever the filesystem does new things with locks (adds or removes a
3854 * lock, orders them differently, does different things underneath a lock), 3897 * lock, orders them differently, does different things underneath a lock),
3855 * the version must be changed. The protocol is negotiated when joining 3898 * the version must be changed. The protocol is negotiated when joining
3856 * the dlm domain. A node may join the domain if its major version is 3899 * the dlm domain. A node may join the domain if its major version is
3857 * identical to all other nodes and its minor version is greater than 3900 * identical to all other nodes and its minor version is greater than
3858 * or equal to all other nodes. When its minor version is greater than 3901 * or equal to all other nodes. When its minor version is greater than
3859 * the other nodes, it will run at the minor version specified by the 3902 * the other nodes, it will run at the minor version specified by the
3860 * other nodes. 3903 * other nodes.
3861 * 3904 *
3862 * If a locking change is made that will not be compatible with older 3905 * If a locking change is made that will not be compatible with older
3863 * versions, the major number must be increased and the minor version set 3906 * versions, the major number must be increased and the minor version set
3864 * to zero. If a change merely adds a behavior that can be disabled when 3907 * to zero. If a change merely adds a behavior that can be disabled when
3865 * speaking to older versions, the minor version must be increased. If a 3908 * speaking to older versions, the minor version must be increased. If a
3866 * change adds a fully backwards compatible change (eg, LVB changes that 3909 * change adds a fully backwards compatible change (eg, LVB changes that
3867 * are just ignored by older versions), the version does not need to be 3910 * are just ignored by older versions), the version does not need to be
3868 * updated. 3911 * updated.
3869 */ 3912 */
3870 static struct ocfs2_locking_protocol lproto = { 3913 static struct ocfs2_locking_protocol lproto = {
3871 .lp_max_version = { 3914 .lp_max_version = {
3872 .pv_major = OCFS2_LOCKING_PROTOCOL_MAJOR, 3915 .pv_major = OCFS2_LOCKING_PROTOCOL_MAJOR,
3873 .pv_minor = OCFS2_LOCKING_PROTOCOL_MINOR, 3916 .pv_minor = OCFS2_LOCKING_PROTOCOL_MINOR,
3874 }, 3917 },
3875 .lp_lock_ast = ocfs2_locking_ast, 3918 .lp_lock_ast = ocfs2_locking_ast,
3876 .lp_blocking_ast = ocfs2_blocking_ast, 3919 .lp_blocking_ast = ocfs2_blocking_ast,
3877 .lp_unlock_ast = ocfs2_unlock_ast, 3920 .lp_unlock_ast = ocfs2_unlock_ast,
3878 }; 3921 };
3879 3922
3880 void ocfs2_set_locking_protocol(void) 3923 void ocfs2_set_locking_protocol(void)
3881 { 3924 {
3882 ocfs2_stack_glue_set_locking_protocol(&lproto); 3925 ocfs2_stack_glue_set_locking_protocol(&lproto);
3883 } 3926 }
3884 3927
3885 3928
3886 static void ocfs2_process_blocked_lock(struct ocfs2_super *osb, 3929 static void ocfs2_process_blocked_lock(struct ocfs2_super *osb,
3887 struct ocfs2_lock_res *lockres) 3930 struct ocfs2_lock_res *lockres)
3888 { 3931 {
3889 int status; 3932 int status;
3890 struct ocfs2_unblock_ctl ctl = {0, 0,}; 3933 struct ocfs2_unblock_ctl ctl = {0, 0,};
3891 unsigned long flags; 3934 unsigned long flags;
3892 3935
3893 /* Our reference to the lockres in this function can be 3936 /* Our reference to the lockres in this function can be
3894 * considered valid until we remove the OCFS2_LOCK_QUEUED 3937 * considered valid until we remove the OCFS2_LOCK_QUEUED
3895 * flag. */ 3938 * flag. */
3896 3939
3897 mlog_entry_void(); 3940 mlog_entry_void();
3898 3941
3899 BUG_ON(!lockres); 3942 BUG_ON(!lockres);
3900 BUG_ON(!lockres->l_ops); 3943 BUG_ON(!lockres->l_ops);
3901 3944
3902 mlog(0, "lockres %s blocked.\n", lockres->l_name); 3945 mlog(0, "lockres %s blocked.\n", lockres->l_name);
3903 3946
3904 /* Detect whether a lock has been marked as going away while 3947 /* Detect whether a lock has been marked as going away while
3905 * the downconvert thread was processing other things. A lock can 3948 * the downconvert thread was processing other things. A lock can
3906 * still be marked with OCFS2_LOCK_FREEING after this check, 3949 * still be marked with OCFS2_LOCK_FREEING after this check,
3907 * but short circuiting here will still save us some 3950 * but short circuiting here will still save us some
3908 * performance. */ 3951 * performance. */
3909 spin_lock_irqsave(&lockres->l_lock, flags); 3952 spin_lock_irqsave(&lockres->l_lock, flags);
3910 if (lockres->l_flags & OCFS2_LOCK_FREEING) 3953 if (lockres->l_flags & OCFS2_LOCK_FREEING)
3911 goto unqueue; 3954 goto unqueue;
3912 spin_unlock_irqrestore(&lockres->l_lock, flags); 3955 spin_unlock_irqrestore(&lockres->l_lock, flags);
3913 3956
3914 status = ocfs2_unblock_lock(osb, lockres, &ctl); 3957 status = ocfs2_unblock_lock(osb, lockres, &ctl);
3915 if (status < 0) 3958 if (status < 0)
3916 mlog_errno(status); 3959 mlog_errno(status);
3917 3960
3918 spin_lock_irqsave(&lockres->l_lock, flags); 3961 spin_lock_irqsave(&lockres->l_lock, flags);
3919 unqueue: 3962 unqueue:
3920 if (lockres->l_flags & OCFS2_LOCK_FREEING || !ctl.requeue) { 3963 if (lockres->l_flags & OCFS2_LOCK_FREEING || !ctl.requeue) {
3921 lockres_clear_flags(lockres, OCFS2_LOCK_QUEUED); 3964 lockres_clear_flags(lockres, OCFS2_LOCK_QUEUED);
3922 } else 3965 } else
3923 ocfs2_schedule_blocked_lock(osb, lockres); 3966 ocfs2_schedule_blocked_lock(osb, lockres);
3924 3967
3925 mlog(0, "lockres %s, requeue = %s.\n", lockres->l_name, 3968 mlog(0, "lockres %s, requeue = %s.\n", lockres->l_name,
3926 ctl.requeue ? "yes" : "no"); 3969 ctl.requeue ? "yes" : "no");
3927 spin_unlock_irqrestore(&lockres->l_lock, flags); 3970 spin_unlock_irqrestore(&lockres->l_lock, flags);
3928 3971
3929 if (ctl.unblock_action != UNBLOCK_CONTINUE 3972 if (ctl.unblock_action != UNBLOCK_CONTINUE
3930 && lockres->l_ops->post_unlock) 3973 && lockres->l_ops->post_unlock)
3931 lockres->l_ops->post_unlock(osb, lockres); 3974 lockres->l_ops->post_unlock(osb, lockres);
3932 3975
3933 mlog_exit_void(); 3976 mlog_exit_void();
3934 } 3977 }
3935 3978
3936 static void ocfs2_schedule_blocked_lock(struct ocfs2_super *osb, 3979 static void ocfs2_schedule_blocked_lock(struct ocfs2_super *osb,
3937 struct ocfs2_lock_res *lockres) 3980 struct ocfs2_lock_res *lockres)
3938 { 3981 {
3939 mlog_entry_void(); 3982 mlog_entry_void();
3940 3983
3941 assert_spin_locked(&lockres->l_lock); 3984 assert_spin_locked(&lockres->l_lock);
3942 3985
3943 if (lockres->l_flags & OCFS2_LOCK_FREEING) { 3986 if (lockres->l_flags & OCFS2_LOCK_FREEING) {
3944 /* Do not schedule a lock for downconvert when it's on 3987 /* Do not schedule a lock for downconvert when it's on
3945 * the way to destruction - any nodes wanting access 3988 * the way to destruction - any nodes wanting access
3946 * to the resource will get it soon. */ 3989 * to the resource will get it soon. */
3947 mlog(0, "Lockres %s won't be scheduled: flags 0x%lx\n", 3990 mlog(0, "Lockres %s won't be scheduled: flags 0x%lx\n",
3948 lockres->l_name, lockres->l_flags); 3991 lockres->l_name, lockres->l_flags);
3949 return; 3992 return;
3950 } 3993 }
3951 3994
3952 lockres_or_flags(lockres, OCFS2_LOCK_QUEUED); 3995 lockres_or_flags(lockres, OCFS2_LOCK_QUEUED);
3953 3996
3954 spin_lock(&osb->dc_task_lock); 3997 spin_lock(&osb->dc_task_lock);
3955 if (list_empty(&lockres->l_blocked_list)) { 3998 if (list_empty(&lockres->l_blocked_list)) {
3956 list_add_tail(&lockres->l_blocked_list, 3999 list_add_tail(&lockres->l_blocked_list,
3957 &osb->blocked_lock_list); 4000 &osb->blocked_lock_list);
3958 osb->blocked_lock_count++; 4001 osb->blocked_lock_count++;
3959 } 4002 }
3960 spin_unlock(&osb->dc_task_lock); 4003 spin_unlock(&osb->dc_task_lock);
3961 4004
3962 mlog_exit_void(); 4005 mlog_exit_void();
3963 } 4006 }
3964 4007
3965 static void ocfs2_downconvert_thread_do_work(struct ocfs2_super *osb) 4008 static void ocfs2_downconvert_thread_do_work(struct ocfs2_super *osb)
3966 { 4009 {
3967 unsigned long processed; 4010 unsigned long processed;
3968 struct ocfs2_lock_res *lockres; 4011 struct ocfs2_lock_res *lockres;
3969 4012
3970 mlog_entry_void(); 4013 mlog_entry_void();
3971 4014
3972 spin_lock(&osb->dc_task_lock); 4015 spin_lock(&osb->dc_task_lock);
3973 /* grab this early so we know to try again if a state change and 4016 /* grab this early so we know to try again if a state change and
3974 * wake happens part-way through our work */ 4017 * wake happens part-way through our work */
3975 osb->dc_work_sequence = osb->dc_wake_sequence; 4018 osb->dc_work_sequence = osb->dc_wake_sequence;
3976 4019
3977 processed = osb->blocked_lock_count; 4020 processed = osb->blocked_lock_count;
3978 while (processed) { 4021 while (processed) {
3979 BUG_ON(list_empty(&osb->blocked_lock_list)); 4022 BUG_ON(list_empty(&osb->blocked_lock_list));
3980 4023
3981 lockres = list_entry(osb->blocked_lock_list.next, 4024 lockres = list_entry(osb->blocked_lock_list.next,
3982 struct ocfs2_lock_res, l_blocked_list); 4025 struct ocfs2_lock_res, l_blocked_list);
3983 list_del_init(&lockres->l_blocked_list); 4026 list_del_init(&lockres->l_blocked_list);
3984 osb->blocked_lock_count--; 4027 osb->blocked_lock_count--;
3985 spin_unlock(&osb->dc_task_lock); 4028 spin_unlock(&osb->dc_task_lock);
3986 4029
3987 BUG_ON(!processed); 4030 BUG_ON(!processed);
3988 processed--; 4031 processed--;
3989 4032
3990 ocfs2_process_blocked_lock(osb, lockres); 4033 ocfs2_process_blocked_lock(osb, lockres);
3991 4034
3992 spin_lock(&osb->dc_task_lock); 4035 spin_lock(&osb->dc_task_lock);
3993 } 4036 }
3994 spin_unlock(&osb->dc_task_lock); 4037 spin_unlock(&osb->dc_task_lock);
3995 4038
3996 mlog_exit_void(); 4039 mlog_exit_void();
3997 } 4040 }
3998 4041
3999 static int ocfs2_downconvert_thread_lists_empty(struct ocfs2_super *osb) 4042 static int ocfs2_downconvert_thread_lists_empty(struct ocfs2_super *osb)
4000 { 4043 {
4001 int empty = 0; 4044 int empty = 0;
4002 4045
4003 spin_lock(&osb->dc_task_lock); 4046 spin_lock(&osb->dc_task_lock);
4004 if (list_empty(&osb->blocked_lock_list)) 4047 if (list_empty(&osb->blocked_lock_list))
4005 empty = 1; 4048 empty = 1;
4006 4049
4007 spin_unlock(&osb->dc_task_lock); 4050 spin_unlock(&osb->dc_task_lock);
4008 return empty; 4051 return empty;
4009 } 4052 }
4010 4053
4011 static int ocfs2_downconvert_thread_should_wake(struct ocfs2_super *osb) 4054 static int ocfs2_downconvert_thread_should_wake(struct ocfs2_super *osb)
4012 { 4055 {
4013 int should_wake = 0; 4056 int should_wake = 0;
4014 4057
4015 spin_lock(&osb->dc_task_lock); 4058 spin_lock(&osb->dc_task_lock);
4016 if (osb->dc_work_sequence != osb->dc_wake_sequence) 4059 if (osb->dc_work_sequence != osb->dc_wake_sequence)
4017 should_wake = 1; 4060 should_wake = 1;
4018 spin_unlock(&osb->dc_task_lock); 4061 spin_unlock(&osb->dc_task_lock);
4019 4062
4020 return should_wake; 4063 return should_wake;
4021 } 4064 }
4022 4065
4023 static int ocfs2_downconvert_thread(void *arg) 4066 static int ocfs2_downconvert_thread(void *arg)
4024 { 4067 {
4025 int status = 0; 4068 int status = 0;
4026 struct ocfs2_super *osb = arg; 4069 struct ocfs2_super *osb = arg;
4027 4070
4028 /* only quit once we've been asked to stop and there is no more 4071 /* only quit once we've been asked to stop and there is no more
4029 * work available */ 4072 * work available */
4030 while (!(kthread_should_stop() && 4073 while (!(kthread_should_stop() &&
4031 ocfs2_downconvert_thread_lists_empty(osb))) { 4074 ocfs2_downconvert_thread_lists_empty(osb))) {
4032 4075
4033 wait_event_interruptible(osb->dc_event, 4076 wait_event_interruptible(osb->dc_event,
4034 ocfs2_downconvert_thread_should_wake(osb) || 4077 ocfs2_downconvert_thread_should_wake(osb) ||
4035 kthread_should_stop()); 4078 kthread_should_stop());
4036 4079
4037 mlog(0, "downconvert_thread: awoken\n"); 4080 mlog(0, "downconvert_thread: awoken\n");
4038 4081
4039 ocfs2_downconvert_thread_do_work(osb); 4082 ocfs2_downconvert_thread_do_work(osb);
4040 } 4083 }
4041 4084
4042 osb->dc_task = NULL; 4085 osb->dc_task = NULL;
4043 return status; 4086 return status;
4044 } 4087 }
4045 4088
4046 void ocfs2_wake_downconvert_thread(struct ocfs2_super *osb) 4089 void ocfs2_wake_downconvert_thread(struct ocfs2_super *osb)
4047 { 4090 {
4048 spin_lock(&osb->dc_task_lock); 4091 spin_lock(&osb->dc_task_lock);
4049 /* make sure the voting thread gets a swipe at whatever changes 4092 /* make sure the voting thread gets a swipe at whatever changes
4050 * the caller may have made to the voting state */ 4093 * the caller may have made to the voting state */
4051 osb->dc_wake_sequence++; 4094 osb->dc_wake_sequence++;
4052 spin_unlock(&osb->dc_task_lock); 4095 spin_unlock(&osb->dc_task_lock);
4053 wake_up(&osb->dc_event); 4096 wake_up(&osb->dc_event);
1 /* -*- mode: c; c-basic-offset: 8; -*- 1 /* -*- mode: c; c-basic-offset: 8; -*-
2 * vim: noexpandtab sw=8 ts=8 sts=0: 2 * vim: noexpandtab sw=8 ts=8 sts=0:
3 * 3 *
4 * ocfs2.h 4 * ocfs2.h
5 * 5 *
6 * Defines macros and structures used in OCFS2 6 * Defines macros and structures used in OCFS2
7 * 7 *
8 * Copyright (C) 2002, 2004 Oracle. All rights reserved. 8 * Copyright (C) 2002, 2004 Oracle. All rights reserved.
9 * 9 *
10 * This program is free software; you can redistribute it and/or 10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public 11 * modify it under the terms of the GNU General Public
12 * License as published by the Free Software Foundation; either 12 * License as published by the Free Software Foundation; either
13 * version 2 of the License, or (at your option) any later version. 13 * version 2 of the License, or (at your option) any later version.
14 * 14 *
15 * This program is distributed in the hope that it will be useful, 15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of 16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * General Public License for more details. 18 * General Public License for more details.
19 * 19 *
20 * You should have received a copy of the GNU General Public 20 * You should have received a copy of the GNU General Public
21 * License along with this program; if not, write to the 21 * License along with this program; if not, write to the
22 * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 22 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
23 * Boston, MA 021110-1307, USA. 23 * Boston, MA 021110-1307, USA.
24 */ 24 */
25 25
26 #ifndef OCFS2_H 26 #ifndef OCFS2_H
27 #define OCFS2_H 27 #define OCFS2_H
28 28
29 #include <linux/spinlock.h> 29 #include <linux/spinlock.h>
30 #include <linux/sched.h> 30 #include <linux/sched.h>
31 #include <linux/wait.h> 31 #include <linux/wait.h>
32 #include <linux/list.h> 32 #include <linux/list.h>
33 #include <linux/rbtree.h> 33 #include <linux/rbtree.h>
34 #include <linux/workqueue.h> 34 #include <linux/workqueue.h>
35 #include <linux/kref.h> 35 #include <linux/kref.h>
36 #include <linux/mutex.h> 36 #include <linux/mutex.h>
37 #include <linux/lockdep.h> 37 #include <linux/lockdep.h>
38 #include <linux/jbd2.h> 38 #include <linux/jbd2.h>
39 39
40 /* For union ocfs2_dlm_lksb */ 40 /* For union ocfs2_dlm_lksb */
41 #include "stackglue.h" 41 #include "stackglue.h"
42 42
43 #include "ocfs2_fs.h" 43 #include "ocfs2_fs.h"
44 #include "ocfs2_lockid.h" 44 #include "ocfs2_lockid.h"
45 45
46 /* For struct ocfs2_blockcheck_stats */ 46 /* For struct ocfs2_blockcheck_stats */
47 #include "blockcheck.h" 47 #include "blockcheck.h"
48 48
49 49
50 /* Caching of metadata buffers */ 50 /* Caching of metadata buffers */
51 51
52 /* Most user visible OCFS2 inodes will have very few pieces of 52 /* Most user visible OCFS2 inodes will have very few pieces of
53 * metadata, but larger files (including bitmaps, etc) must be taken 53 * metadata, but larger files (including bitmaps, etc) must be taken
54 * into account when designing an access scheme. We allow a small 54 * into account when designing an access scheme. We allow a small
55 * amount of inlined blocks to be stored on an array and grow the 55 * amount of inlined blocks to be stored on an array and grow the
56 * structure into a rb tree when necessary. */ 56 * structure into a rb tree when necessary. */
57 #define OCFS2_CACHE_INFO_MAX_ARRAY 2 57 #define OCFS2_CACHE_INFO_MAX_ARRAY 2
58 58
59 /* Flags for ocfs2_caching_info */ 59 /* Flags for ocfs2_caching_info */
60 60
61 enum ocfs2_caching_info_flags { 61 enum ocfs2_caching_info_flags {
62 /* Indicates that the metadata cache is using the inline array */ 62 /* Indicates that the metadata cache is using the inline array */
63 OCFS2_CACHE_FL_INLINE = 1<<1, 63 OCFS2_CACHE_FL_INLINE = 1<<1,
64 }; 64 };
65 65
66 struct ocfs2_caching_operations; 66 struct ocfs2_caching_operations;
67 struct ocfs2_caching_info { 67 struct ocfs2_caching_info {
68 /* 68 /*
69 * The parent structure provides the locks, but because the 69 * The parent structure provides the locks, but because the
70 * parent structure can differ, it provides locking operations 70 * parent structure can differ, it provides locking operations
71 * to struct ocfs2_caching_info. 71 * to struct ocfs2_caching_info.
72 */ 72 */
73 const struct ocfs2_caching_operations *ci_ops; 73 const struct ocfs2_caching_operations *ci_ops;
74 74
75 /* next two are protected by trans_inc_lock */ 75 /* next two are protected by trans_inc_lock */
76 /* which transaction were we created on? Zero if none. */ 76 /* which transaction were we created on? Zero if none. */
77 unsigned long ci_created_trans; 77 unsigned long ci_created_trans;
78 /* last transaction we were a part of. */ 78 /* last transaction we were a part of. */
79 unsigned long ci_last_trans; 79 unsigned long ci_last_trans;
80 80
81 /* Cache structures */ 81 /* Cache structures */
82 unsigned int ci_flags; 82 unsigned int ci_flags;
83 unsigned int ci_num_cached; 83 unsigned int ci_num_cached;
84 union { 84 union {
85 sector_t ci_array[OCFS2_CACHE_INFO_MAX_ARRAY]; 85 sector_t ci_array[OCFS2_CACHE_INFO_MAX_ARRAY];
86 struct rb_root ci_tree; 86 struct rb_root ci_tree;
87 } ci_cache; 87 } ci_cache;
88 }; 88 };
89 /* 89 /*
90 * Need this prototype here instead of in uptodate.h because journal.h 90 * Need this prototype here instead of in uptodate.h because journal.h
91 * uses it. 91 * uses it.
92 */ 92 */
93 struct super_block *ocfs2_metadata_cache_get_super(struct ocfs2_caching_info *ci); 93 struct super_block *ocfs2_metadata_cache_get_super(struct ocfs2_caching_info *ci);
94 94
95 /* this limits us to 256 nodes 95 /* this limits us to 256 nodes
96 * if we need more, we can do a kmalloc for the map */ 96 * if we need more, we can do a kmalloc for the map */
97 #define OCFS2_NODE_MAP_MAX_NODES 256 97 #define OCFS2_NODE_MAP_MAX_NODES 256
98 struct ocfs2_node_map { 98 struct ocfs2_node_map {
99 u16 num_nodes; 99 u16 num_nodes;
100 unsigned long map[BITS_TO_LONGS(OCFS2_NODE_MAP_MAX_NODES)]; 100 unsigned long map[BITS_TO_LONGS(OCFS2_NODE_MAP_MAX_NODES)];
101 }; 101 };
102 102
103 enum ocfs2_ast_action { 103 enum ocfs2_ast_action {
104 OCFS2_AST_INVALID = 0, 104 OCFS2_AST_INVALID = 0,
105 OCFS2_AST_ATTACH, 105 OCFS2_AST_ATTACH,
106 OCFS2_AST_CONVERT, 106 OCFS2_AST_CONVERT,
107 OCFS2_AST_DOWNCONVERT, 107 OCFS2_AST_DOWNCONVERT,
108 }; 108 };
109 109
110 /* actions for an unlockast function to take. */ 110 /* actions for an unlockast function to take. */
111 enum ocfs2_unlock_action { 111 enum ocfs2_unlock_action {
112 OCFS2_UNLOCK_INVALID = 0, 112 OCFS2_UNLOCK_INVALID = 0,
113 OCFS2_UNLOCK_CANCEL_CONVERT, 113 OCFS2_UNLOCK_CANCEL_CONVERT,
114 OCFS2_UNLOCK_DROP_LOCK, 114 OCFS2_UNLOCK_DROP_LOCK,
115 }; 115 };
116 116
117 /* ocfs2_lock_res->l_flags flags. */ 117 /* ocfs2_lock_res->l_flags flags. */
118 #define OCFS2_LOCK_ATTACHED (0x00000001) /* we have initialized 118 #define OCFS2_LOCK_ATTACHED (0x00000001) /* we have initialized
119 * the lvb */ 119 * the lvb */
120 #define OCFS2_LOCK_BUSY (0x00000002) /* we are currently in 120 #define OCFS2_LOCK_BUSY (0x00000002) /* we are currently in
121 * dlm_lock */ 121 * dlm_lock */
122 #define OCFS2_LOCK_BLOCKED (0x00000004) /* blocked waiting to 122 #define OCFS2_LOCK_BLOCKED (0x00000004) /* blocked waiting to
123 * downconvert*/ 123 * downconvert*/
124 #define OCFS2_LOCK_LOCAL (0x00000008) /* newly created inode */ 124 #define OCFS2_LOCK_LOCAL (0x00000008) /* newly created inode */
125 #define OCFS2_LOCK_NEEDS_REFRESH (0x00000010) 125 #define OCFS2_LOCK_NEEDS_REFRESH (0x00000010)
126 #define OCFS2_LOCK_REFRESHING (0x00000020) 126 #define OCFS2_LOCK_REFRESHING (0x00000020)
127 #define OCFS2_LOCK_INITIALIZED (0x00000040) /* track initialization 127 #define OCFS2_LOCK_INITIALIZED (0x00000040) /* track initialization
128 * for shutdown paths */ 128 * for shutdown paths */
129 #define OCFS2_LOCK_FREEING (0x00000080) /* help dlmglue track 129 #define OCFS2_LOCK_FREEING (0x00000080) /* help dlmglue track
130 * when to skip queueing 130 * when to skip queueing
131 * a lock because it's 131 * a lock because it's
132 * about to be 132 * about to be
133 * dropped. */ 133 * dropped. */
134 #define OCFS2_LOCK_QUEUED (0x00000100) /* queued for downconvert */ 134 #define OCFS2_LOCK_QUEUED (0x00000100) /* queued for downconvert */
135 #define OCFS2_LOCK_NOCACHE (0x00000200) /* don't use a holder count */ 135 #define OCFS2_LOCK_NOCACHE (0x00000200) /* don't use a holder count */
136 #define OCFS2_LOCK_PENDING (0x00000400) /* This lockres is pending a 136 #define OCFS2_LOCK_PENDING (0x00000400) /* This lockres is pending a
137 call to dlm_lock. Only 137 call to dlm_lock. Only
138 exists with BUSY set. */ 138 exists with BUSY set. */
139 #define OCFS2_LOCK_UPCONVERT_FINISHING (0x00000800) /* blocks the dc thread
140 * from downconverting
141 * before the upconvert
142 * has completed */
139 143
140 struct ocfs2_lock_res_ops; 144 struct ocfs2_lock_res_ops;
141 145
142 typedef void (*ocfs2_lock_callback)(int status, unsigned long data); 146 typedef void (*ocfs2_lock_callback)(int status, unsigned long data);
143 147
144 struct ocfs2_lock_res { 148 struct ocfs2_lock_res {
145 void *l_priv; 149 void *l_priv;
146 struct ocfs2_lock_res_ops *l_ops; 150 struct ocfs2_lock_res_ops *l_ops;
147 spinlock_t l_lock; 151 spinlock_t l_lock;
148 152
149 struct list_head l_blocked_list; 153 struct list_head l_blocked_list;
150 struct list_head l_mask_waiters; 154 struct list_head l_mask_waiters;
151 155
152 enum ocfs2_lock_type l_type; 156 enum ocfs2_lock_type l_type;
153 unsigned long l_flags; 157 unsigned long l_flags;
154 char l_name[OCFS2_LOCK_ID_MAX_LEN]; 158 char l_name[OCFS2_LOCK_ID_MAX_LEN];
155 int l_level; 159 int l_level;
156 unsigned int l_ro_holders; 160 unsigned int l_ro_holders;
157 unsigned int l_ex_holders; 161 unsigned int l_ex_holders;
158 union ocfs2_dlm_lksb l_lksb; 162 union ocfs2_dlm_lksb l_lksb;
159 163
160 /* used from AST/BAST funcs. */ 164 /* used from AST/BAST funcs. */
161 enum ocfs2_ast_action l_action; 165 enum ocfs2_ast_action l_action;
162 enum ocfs2_unlock_action l_unlock_action; 166 enum ocfs2_unlock_action l_unlock_action;
163 int l_requested; 167 int l_requested;
164 int l_blocking; 168 int l_blocking;
165 unsigned int l_pending_gen; 169 unsigned int l_pending_gen;
166 170
167 wait_queue_head_t l_event; 171 wait_queue_head_t l_event;
168 172
169 struct list_head l_debug_list; 173 struct list_head l_debug_list;
170 174
171 #ifdef CONFIG_OCFS2_FS_STATS 175 #ifdef CONFIG_OCFS2_FS_STATS
172 unsigned long long l_lock_num_prmode; /* PR acquires */ 176 unsigned long long l_lock_num_prmode; /* PR acquires */
173 unsigned long long l_lock_num_exmode; /* EX acquires */ 177 unsigned long long l_lock_num_exmode; /* EX acquires */
174 unsigned int l_lock_num_prmode_failed; /* Failed PR gets */ 178 unsigned int l_lock_num_prmode_failed; /* Failed PR gets */
175 unsigned int l_lock_num_exmode_failed; /* Failed EX gets */ 179 unsigned int l_lock_num_exmode_failed; /* Failed EX gets */
176 unsigned long long l_lock_total_prmode; /* Tot wait for PR */ 180 unsigned long long l_lock_total_prmode; /* Tot wait for PR */
177 unsigned long long l_lock_total_exmode; /* Tot wait for EX */ 181 unsigned long long l_lock_total_exmode; /* Tot wait for EX */
178 unsigned int l_lock_max_prmode; /* Max wait for PR */ 182 unsigned int l_lock_max_prmode; /* Max wait for PR */
179 unsigned int l_lock_max_exmode; /* Max wait for EX */ 183 unsigned int l_lock_max_exmode; /* Max wait for EX */
180 unsigned int l_lock_refresh; /* Disk refreshes */ 184 unsigned int l_lock_refresh; /* Disk refreshes */
181 #endif 185 #endif
182 #ifdef CONFIG_DEBUG_LOCK_ALLOC 186 #ifdef CONFIG_DEBUG_LOCK_ALLOC
183 struct lockdep_map l_lockdep_map; 187 struct lockdep_map l_lockdep_map;
184 #endif 188 #endif
185 }; 189 };
186 190
187 enum ocfs2_orphan_scan_state { 191 enum ocfs2_orphan_scan_state {
188 ORPHAN_SCAN_ACTIVE, 192 ORPHAN_SCAN_ACTIVE,
189 ORPHAN_SCAN_INACTIVE 193 ORPHAN_SCAN_INACTIVE
190 }; 194 };
191 195
192 struct ocfs2_orphan_scan { 196 struct ocfs2_orphan_scan {
193 struct mutex os_lock; 197 struct mutex os_lock;
194 struct ocfs2_super *os_osb; 198 struct ocfs2_super *os_osb;
195 struct ocfs2_lock_res os_lockres; /* lock to synchronize scans */ 199 struct ocfs2_lock_res os_lockres; /* lock to synchronize scans */
196 struct delayed_work os_orphan_scan_work; 200 struct delayed_work os_orphan_scan_work;
197 struct timespec os_scantime; /* time this node ran the scan */ 201 struct timespec os_scantime; /* time this node ran the scan */
198 u32 os_count; /* tracks node specific scans */ 202 u32 os_count; /* tracks node specific scans */
199 u32 os_seqno; /* tracks cluster wide scans */ 203 u32 os_seqno; /* tracks cluster wide scans */
200 atomic_t os_state; /* ACTIVE or INACTIVE */ 204 atomic_t os_state; /* ACTIVE or INACTIVE */
201 }; 205 };
202 206
203 struct ocfs2_dlm_debug { 207 struct ocfs2_dlm_debug {
204 struct kref d_refcnt; 208 struct kref d_refcnt;
205 struct dentry *d_locking_state; 209 struct dentry *d_locking_state;
206 struct list_head d_lockres_tracking; 210 struct list_head d_lockres_tracking;
207 }; 211 };
208 212
209 enum ocfs2_vol_state 213 enum ocfs2_vol_state
210 { 214 {
211 VOLUME_INIT = 0, 215 VOLUME_INIT = 0,
212 VOLUME_MOUNTED, 216 VOLUME_MOUNTED,
213 VOLUME_MOUNTED_QUOTAS, 217 VOLUME_MOUNTED_QUOTAS,
214 VOLUME_DISMOUNTED, 218 VOLUME_DISMOUNTED,
215 VOLUME_DISABLED 219 VOLUME_DISABLED
216 }; 220 };
217 221
218 struct ocfs2_alloc_stats 222 struct ocfs2_alloc_stats
219 { 223 {
220 atomic_t moves; 224 atomic_t moves;
221 atomic_t local_data; 225 atomic_t local_data;
222 atomic_t bitmap_data; 226 atomic_t bitmap_data;
223 atomic_t bg_allocs; 227 atomic_t bg_allocs;
224 atomic_t bg_extends; 228 atomic_t bg_extends;
225 }; 229 };
226 230
227 enum ocfs2_local_alloc_state 231 enum ocfs2_local_alloc_state
228 { 232 {
229 OCFS2_LA_UNUSED = 0, /* Local alloc will never be used for 233 OCFS2_LA_UNUSED = 0, /* Local alloc will never be used for
230 * this mountpoint. */ 234 * this mountpoint. */
231 OCFS2_LA_ENABLED, /* Local alloc is in use. */ 235 OCFS2_LA_ENABLED, /* Local alloc is in use. */
232 OCFS2_LA_THROTTLED, /* Local alloc is in use, but number 236 OCFS2_LA_THROTTLED, /* Local alloc is in use, but number
233 * of bits has been reduced. */ 237 * of bits has been reduced. */
234 OCFS2_LA_DISABLED /* Local alloc has temporarily been 238 OCFS2_LA_DISABLED /* Local alloc has temporarily been
235 * disabled. */ 239 * disabled. */
236 }; 240 };
237 241
238 enum ocfs2_mount_options 242 enum ocfs2_mount_options
239 { 243 {
240 OCFS2_MOUNT_HB_LOCAL = 1 << 0, /* Heartbeat started in local mode */ 244 OCFS2_MOUNT_HB_LOCAL = 1 << 0, /* Heartbeat started in local mode */
241 OCFS2_MOUNT_BARRIER = 1 << 1, /* Use block barriers */ 245 OCFS2_MOUNT_BARRIER = 1 << 1, /* Use block barriers */
242 OCFS2_MOUNT_NOINTR = 1 << 2, /* Don't catch signals */ 246 OCFS2_MOUNT_NOINTR = 1 << 2, /* Don't catch signals */
243 OCFS2_MOUNT_ERRORS_PANIC = 1 << 3, /* Panic on errors */ 247 OCFS2_MOUNT_ERRORS_PANIC = 1 << 3, /* Panic on errors */
244 OCFS2_MOUNT_DATA_WRITEBACK = 1 << 4, /* No data ordering */ 248 OCFS2_MOUNT_DATA_WRITEBACK = 1 << 4, /* No data ordering */
245 OCFS2_MOUNT_LOCALFLOCKS = 1 << 5, /* No cluster aware user file locks */ 249 OCFS2_MOUNT_LOCALFLOCKS = 1 << 5, /* No cluster aware user file locks */
246 OCFS2_MOUNT_NOUSERXATTR = 1 << 6, /* No user xattr */ 250 OCFS2_MOUNT_NOUSERXATTR = 1 << 6, /* No user xattr */
247 OCFS2_MOUNT_INODE64 = 1 << 7, /* Allow inode numbers > 2^32 */ 251 OCFS2_MOUNT_INODE64 = 1 << 7, /* Allow inode numbers > 2^32 */
248 OCFS2_MOUNT_POSIX_ACL = 1 << 8, /* Force POSIX access control lists */ 252 OCFS2_MOUNT_POSIX_ACL = 1 << 8, /* Force POSIX access control lists */
249 OCFS2_MOUNT_NO_POSIX_ACL = 1 << 9, /* Disable POSIX access 253 OCFS2_MOUNT_NO_POSIX_ACL = 1 << 9, /* Disable POSIX access
250 control lists */ 254 control lists */
251 OCFS2_MOUNT_USRQUOTA = 1 << 10, /* We support user quotas */ 255 OCFS2_MOUNT_USRQUOTA = 1 << 10, /* We support user quotas */
252 OCFS2_MOUNT_GRPQUOTA = 1 << 11, /* We support group quotas */ 256 OCFS2_MOUNT_GRPQUOTA = 1 << 11, /* We support group quotas */
253 }; 257 };
254 258
255 #define OCFS2_OSB_SOFT_RO 0x0001 259 #define OCFS2_OSB_SOFT_RO 0x0001
256 #define OCFS2_OSB_HARD_RO 0x0002 260 #define OCFS2_OSB_HARD_RO 0x0002
257 #define OCFS2_OSB_ERROR_FS 0x0004 261 #define OCFS2_OSB_ERROR_FS 0x0004
258 #define OCFS2_OSB_DROP_DENTRY_LOCK_IMMED 0x0008 262 #define OCFS2_OSB_DROP_DENTRY_LOCK_IMMED 0x0008
259 263
260 #define OCFS2_DEFAULT_ATIME_QUANTUM 60 264 #define OCFS2_DEFAULT_ATIME_QUANTUM 60
261 265
262 struct ocfs2_journal; 266 struct ocfs2_journal;
263 struct ocfs2_slot_info; 267 struct ocfs2_slot_info;
264 struct ocfs2_recovery_map; 268 struct ocfs2_recovery_map;
265 struct ocfs2_replay_map; 269 struct ocfs2_replay_map;
266 struct ocfs2_quota_recovery; 270 struct ocfs2_quota_recovery;
267 struct ocfs2_dentry_lock; 271 struct ocfs2_dentry_lock;
268 struct ocfs2_super 272 struct ocfs2_super
269 { 273 {
270 struct task_struct *commit_task; 274 struct task_struct *commit_task;
271 struct super_block *sb; 275 struct super_block *sb;
272 struct inode *root_inode; 276 struct inode *root_inode;
273 struct inode *sys_root_inode; 277 struct inode *sys_root_inode;
274 struct inode *system_inodes[NUM_SYSTEM_INODES]; 278 struct inode *system_inodes[NUM_SYSTEM_INODES];
275 279
276 struct ocfs2_slot_info *slot_info; 280 struct ocfs2_slot_info *slot_info;
277 281
278 u32 *slot_recovery_generations; 282 u32 *slot_recovery_generations;
279 283
280 spinlock_t node_map_lock; 284 spinlock_t node_map_lock;
281 285
282 u64 root_blkno; 286 u64 root_blkno;
283 u64 system_dir_blkno; 287 u64 system_dir_blkno;
284 u64 bitmap_blkno; 288 u64 bitmap_blkno;
285 u32 bitmap_cpg; 289 u32 bitmap_cpg;
286 u8 *uuid; 290 u8 *uuid;
287 char *uuid_str; 291 char *uuid_str;
288 u32 uuid_hash; 292 u32 uuid_hash;
289 u8 *vol_label; 293 u8 *vol_label;
290 u64 first_cluster_group_blkno; 294 u64 first_cluster_group_blkno;
291 u32 fs_generation; 295 u32 fs_generation;
292 296
293 u32 s_feature_compat; 297 u32 s_feature_compat;
294 u32 s_feature_incompat; 298 u32 s_feature_incompat;
295 u32 s_feature_ro_compat; 299 u32 s_feature_ro_compat;
296 300
297 /* Protects s_next_generation, osb_flags and s_inode_steal_slot. 301 /* Protects s_next_generation, osb_flags and s_inode_steal_slot.
298 * Could protect more on osb as it's very short lived. 302 * Could protect more on osb as it's very short lived.
299 */ 303 */
300 spinlock_t osb_lock; 304 spinlock_t osb_lock;
301 u32 s_next_generation; 305 u32 s_next_generation;
302 unsigned long osb_flags; 306 unsigned long osb_flags;
303 s16 s_inode_steal_slot; 307 s16 s_inode_steal_slot;
304 atomic_t s_num_inodes_stolen; 308 atomic_t s_num_inodes_stolen;
305 309
306 unsigned long s_mount_opt; 310 unsigned long s_mount_opt;
307 unsigned int s_atime_quantum; 311 unsigned int s_atime_quantum;
308 312
309 unsigned int max_slots; 313 unsigned int max_slots;
310 unsigned int node_num; 314 unsigned int node_num;
311 int slot_num; 315 int slot_num;
312 int preferred_slot; 316 int preferred_slot;
313 int s_sectsize_bits; 317 int s_sectsize_bits;
314 int s_clustersize; 318 int s_clustersize;
315 int s_clustersize_bits; 319 int s_clustersize_bits;
316 unsigned int s_xattr_inline_size; 320 unsigned int s_xattr_inline_size;
317 321
318 atomic_t vol_state; 322 atomic_t vol_state;
319 struct mutex recovery_lock; 323 struct mutex recovery_lock;
320 struct ocfs2_recovery_map *recovery_map; 324 struct ocfs2_recovery_map *recovery_map;
321 struct ocfs2_replay_map *replay_map; 325 struct ocfs2_replay_map *replay_map;
322 struct task_struct *recovery_thread_task; 326 struct task_struct *recovery_thread_task;
323 int disable_recovery; 327 int disable_recovery;
324 wait_queue_head_t checkpoint_event; 328 wait_queue_head_t checkpoint_event;
325 atomic_t needs_checkpoint; 329 atomic_t needs_checkpoint;
326 struct ocfs2_journal *journal; 330 struct ocfs2_journal *journal;
327 unsigned long osb_commit_interval; 331 unsigned long osb_commit_interval;
328 332
329 struct delayed_work la_enable_wq; 333 struct delayed_work la_enable_wq;
330 334
331 /* 335 /*
332 * Must hold local alloc i_mutex and osb->osb_lock to change 336 * Must hold local alloc i_mutex and osb->osb_lock to change
333 * local_alloc_bits. Reads can be done under either lock. 337 * local_alloc_bits. Reads can be done under either lock.
334 */ 338 */
335 unsigned int local_alloc_bits; 339 unsigned int local_alloc_bits;
336 unsigned int local_alloc_default_bits; 340 unsigned int local_alloc_default_bits;
337 341
338 enum ocfs2_local_alloc_state local_alloc_state; /* protected 342 enum ocfs2_local_alloc_state local_alloc_state; /* protected
339 * by osb_lock */ 343 * by osb_lock */
340 344
341 struct buffer_head *local_alloc_bh; 345 struct buffer_head *local_alloc_bh;
342 346
343 u64 la_last_gd; 347 u64 la_last_gd;
344 348
345 /* Next three fields are for local node slot recovery during 349 /* Next three fields are for local node slot recovery during
346 * mount. */ 350 * mount. */
347 int dirty; 351 int dirty;
348 struct ocfs2_dinode *local_alloc_copy; 352 struct ocfs2_dinode *local_alloc_copy;
349 struct ocfs2_quota_recovery *quota_rec; 353 struct ocfs2_quota_recovery *quota_rec;
350 354
351 struct ocfs2_blockcheck_stats osb_ecc_stats; 355 struct ocfs2_blockcheck_stats osb_ecc_stats;
352 struct ocfs2_alloc_stats alloc_stats; 356 struct ocfs2_alloc_stats alloc_stats;
353 char dev_str[20]; /* "major,minor" of the device */ 357 char dev_str[20]; /* "major,minor" of the device */
354 358
355 char osb_cluster_stack[OCFS2_STACK_LABEL_LEN + 1]; 359 char osb_cluster_stack[OCFS2_STACK_LABEL_LEN + 1];
356 struct ocfs2_cluster_connection *cconn; 360 struct ocfs2_cluster_connection *cconn;
357 struct ocfs2_lock_res osb_super_lockres; 361 struct ocfs2_lock_res osb_super_lockres;
358 struct ocfs2_lock_res osb_rename_lockres; 362 struct ocfs2_lock_res osb_rename_lockres;
359 struct ocfs2_lock_res osb_nfs_sync_lockres; 363 struct ocfs2_lock_res osb_nfs_sync_lockres;
360 struct ocfs2_dlm_debug *osb_dlm_debug; 364 struct ocfs2_dlm_debug *osb_dlm_debug;
361 365
362 struct dentry *osb_debug_root; 366 struct dentry *osb_debug_root;
363 struct dentry *osb_ctxt; 367 struct dentry *osb_ctxt;
364 368
365 wait_queue_head_t recovery_event; 369 wait_queue_head_t recovery_event;
366 370
367 spinlock_t dc_task_lock; 371 spinlock_t dc_task_lock;
368 struct task_struct *dc_task; 372 struct task_struct *dc_task;
369 wait_queue_head_t dc_event; 373 wait_queue_head_t dc_event;
370 unsigned long dc_wake_sequence; 374 unsigned long dc_wake_sequence;
371 unsigned long dc_work_sequence; 375 unsigned long dc_work_sequence;
372 376
373 /* 377 /*
374 * Any thread can add locks to the list, but the downconvert 378 * Any thread can add locks to the list, but the downconvert
375 * thread is the only one allowed to remove locks. Any change 379 * thread is the only one allowed to remove locks. Any change
376 * to this rule requires updating 380 * to this rule requires updating
377 * ocfs2_downconvert_thread_do_work(). 381 * ocfs2_downconvert_thread_do_work().
378 */ 382 */
379 struct list_head blocked_lock_list; 383 struct list_head blocked_lock_list;
380 unsigned long blocked_lock_count; 384 unsigned long blocked_lock_count;
381 385
382 /* List of dentry locks to release. Anyone can add locks to 386 /* List of dentry locks to release. Anyone can add locks to
383 * the list, ocfs2_wq processes the list */ 387 * the list, ocfs2_wq processes the list */
384 struct ocfs2_dentry_lock *dentry_lock_list; 388 struct ocfs2_dentry_lock *dentry_lock_list;
385 struct work_struct dentry_lock_work; 389 struct work_struct dentry_lock_work;
386 390
387 wait_queue_head_t osb_mount_event; 391 wait_queue_head_t osb_mount_event;
388 392
389 /* Truncate log info */ 393 /* Truncate log info */
390 struct inode *osb_tl_inode; 394 struct inode *osb_tl_inode;
391 struct buffer_head *osb_tl_bh; 395 struct buffer_head *osb_tl_bh;
392 struct delayed_work osb_truncate_log_wq; 396 struct delayed_work osb_truncate_log_wq;
393 397
394 struct ocfs2_node_map osb_recovering_orphan_dirs; 398 struct ocfs2_node_map osb_recovering_orphan_dirs;
395 unsigned int *osb_orphan_wipes; 399 unsigned int *osb_orphan_wipes;
396 wait_queue_head_t osb_wipe_event; 400 wait_queue_head_t osb_wipe_event;
397 401
398 struct ocfs2_orphan_scan osb_orphan_scan; 402 struct ocfs2_orphan_scan osb_orphan_scan;
399 403
400 /* used to protect metaecc calculation check of xattr. */ 404 /* used to protect metaecc calculation check of xattr. */
401 spinlock_t osb_xattr_lock; 405 spinlock_t osb_xattr_lock;
402 406
403 unsigned int osb_dx_mask; 407 unsigned int osb_dx_mask;
404 u32 osb_dx_seed[4]; 408 u32 osb_dx_seed[4];
405 409
406 /* the group we used to allocate inodes. */ 410 /* the group we used to allocate inodes. */
407 u64 osb_inode_alloc_group; 411 u64 osb_inode_alloc_group;
408 412
409 /* rb tree root for refcount lock. */ 413 /* rb tree root for refcount lock. */
410 struct rb_root osb_rf_lock_tree; 414 struct rb_root osb_rf_lock_tree;
411 struct ocfs2_refcount_tree *osb_ref_tree_lru; 415 struct ocfs2_refcount_tree *osb_ref_tree_lru;
412 }; 416 };
413 417
414 #define OCFS2_SB(sb) ((struct ocfs2_super *)(sb)->s_fs_info) 418 #define OCFS2_SB(sb) ((struct ocfs2_super *)(sb)->s_fs_info)
415 419
416 /* Useful typedef for passing around journal access functions */ 420 /* Useful typedef for passing around journal access functions */
417 typedef int (*ocfs2_journal_access_func)(handle_t *handle, 421 typedef int (*ocfs2_journal_access_func)(handle_t *handle,
418 struct ocfs2_caching_info *ci, 422 struct ocfs2_caching_info *ci,
419 struct buffer_head *bh, int type); 423 struct buffer_head *bh, int type);
420 424
421 static inline int ocfs2_should_order_data(struct inode *inode) 425 static inline int ocfs2_should_order_data(struct inode *inode)
422 { 426 {
423 if (!S_ISREG(inode->i_mode)) 427 if (!S_ISREG(inode->i_mode))
424 return 0; 428 return 0;
425 if (OCFS2_SB(inode->i_sb)->s_mount_opt & OCFS2_MOUNT_DATA_WRITEBACK) 429 if (OCFS2_SB(inode->i_sb)->s_mount_opt & OCFS2_MOUNT_DATA_WRITEBACK)
426 return 0; 430 return 0;
427 return 1; 431 return 1;
428 } 432 }
429 433
430 static inline int ocfs2_sparse_alloc(struct ocfs2_super *osb) 434 static inline int ocfs2_sparse_alloc(struct ocfs2_super *osb)
431 { 435 {
432 if (osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_SPARSE_ALLOC) 436 if (osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_SPARSE_ALLOC)
433 return 1; 437 return 1;
434 return 0; 438 return 0;
435 } 439 }
436 440
437 static inline int ocfs2_writes_unwritten_extents(struct ocfs2_super *osb) 441 static inline int ocfs2_writes_unwritten_extents(struct ocfs2_super *osb)
438 { 442 {
439 /* 443 /*
440 * Support for sparse files is a pre-requisite 444 * Support for sparse files is a pre-requisite
441 */ 445 */
442 if (!ocfs2_sparse_alloc(osb)) 446 if (!ocfs2_sparse_alloc(osb))
443 return 0; 447 return 0;
444 448
445 if (osb->s_feature_ro_compat & OCFS2_FEATURE_RO_COMPAT_UNWRITTEN) 449 if (osb->s_feature_ro_compat & OCFS2_FEATURE_RO_COMPAT_UNWRITTEN)
446 return 1; 450 return 1;
447 return 0; 451 return 0;
448 } 452 }
449 453
450 static inline int ocfs2_supports_inline_data(struct ocfs2_super *osb) 454 static inline int ocfs2_supports_inline_data(struct ocfs2_super *osb)
451 { 455 {
452 if (osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_INLINE_DATA) 456 if (osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_INLINE_DATA)
453 return 1; 457 return 1;
454 return 0; 458 return 0;
455 } 459 }
456 460
457 static inline int ocfs2_supports_xattr(struct ocfs2_super *osb) 461 static inline int ocfs2_supports_xattr(struct ocfs2_super *osb)
458 { 462 {
459 if (osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_XATTR) 463 if (osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_XATTR)
460 return 1; 464 return 1;
461 return 0; 465 return 0;
462 } 466 }
463 467
464 static inline int ocfs2_meta_ecc(struct ocfs2_super *osb) 468 static inline int ocfs2_meta_ecc(struct ocfs2_super *osb)
465 { 469 {
466 if (osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_META_ECC) 470 if (osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_META_ECC)
467 return 1; 471 return 1;
468 return 0; 472 return 0;
469 } 473 }
470 474
471 static inline int ocfs2_supports_indexed_dirs(struct ocfs2_super *osb) 475 static inline int ocfs2_supports_indexed_dirs(struct ocfs2_super *osb)
472 { 476 {
473 if (osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_INDEXED_DIRS) 477 if (osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_INDEXED_DIRS)
474 return 1; 478 return 1;
475 return 0; 479 return 0;
476 } 480 }
477 481
478 static inline unsigned int ocfs2_link_max(struct ocfs2_super *osb) 482 static inline unsigned int ocfs2_link_max(struct ocfs2_super *osb)
479 { 483 {
480 if (ocfs2_supports_indexed_dirs(osb)) 484 if (ocfs2_supports_indexed_dirs(osb))
481 return OCFS2_DX_LINK_MAX; 485 return OCFS2_DX_LINK_MAX;
482 return OCFS2_LINK_MAX; 486 return OCFS2_LINK_MAX;
483 } 487 }
484 488
485 static inline unsigned int ocfs2_read_links_count(struct ocfs2_dinode *di) 489 static inline unsigned int ocfs2_read_links_count(struct ocfs2_dinode *di)
486 { 490 {
487 u32 nlink = le16_to_cpu(di->i_links_count); 491 u32 nlink = le16_to_cpu(di->i_links_count);
488 u32 hi = le16_to_cpu(di->i_links_count_hi); 492 u32 hi = le16_to_cpu(di->i_links_count_hi);
489 493
490 if (di->i_dyn_features & cpu_to_le16(OCFS2_INDEXED_DIR_FL)) 494 if (di->i_dyn_features & cpu_to_le16(OCFS2_INDEXED_DIR_FL))
491 nlink |= (hi << OCFS2_LINKS_HI_SHIFT); 495 nlink |= (hi << OCFS2_LINKS_HI_SHIFT);
492 496
493 return nlink; 497 return nlink;
494 } 498 }
495 499
496 static inline void ocfs2_set_links_count(struct ocfs2_dinode *di, u32 nlink) 500 static inline void ocfs2_set_links_count(struct ocfs2_dinode *di, u32 nlink)
497 { 501 {
498 u16 lo, hi; 502 u16 lo, hi;
499 503
500 lo = nlink; 504 lo = nlink;
501 hi = nlink >> OCFS2_LINKS_HI_SHIFT; 505 hi = nlink >> OCFS2_LINKS_HI_SHIFT;
502 506
503 di->i_links_count = cpu_to_le16(lo); 507 di->i_links_count = cpu_to_le16(lo);
504 di->i_links_count_hi = cpu_to_le16(hi); 508 di->i_links_count_hi = cpu_to_le16(hi);
505 } 509 }
506 510
507 static inline void ocfs2_add_links_count(struct ocfs2_dinode *di, int n) 511 static inline void ocfs2_add_links_count(struct ocfs2_dinode *di, int n)
508 { 512 {
509 u32 links = ocfs2_read_links_count(di); 513 u32 links = ocfs2_read_links_count(di);
510 514
511 links += n; 515 links += n;
512 516
513 ocfs2_set_links_count(di, links); 517 ocfs2_set_links_count(di, links);
514 } 518 }
515 519
516 static inline int ocfs2_refcount_tree(struct ocfs2_super *osb) 520 static inline int ocfs2_refcount_tree(struct ocfs2_super *osb)
517 { 521 {
518 if (osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_REFCOUNT_TREE) 522 if (osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_REFCOUNT_TREE)
519 return 1; 523 return 1;
520 return 0; 524 return 0;
521 } 525 }
522 526
523 /* set / clear functions because cluster events can make these happen 527 /* set / clear functions because cluster events can make these happen
524 * in parallel so we want the transitions to be atomic. this also 528 * in parallel so we want the transitions to be atomic. this also
525 * means that any future flags osb_flags must be protected by spinlock 529 * means that any future flags osb_flags must be protected by spinlock
526 * too! */ 530 * too! */
527 static inline void ocfs2_set_osb_flag(struct ocfs2_super *osb, 531 static inline void ocfs2_set_osb_flag(struct ocfs2_super *osb,
528 unsigned long flag) 532 unsigned long flag)
529 { 533 {
530 spin_lock(&osb->osb_lock); 534 spin_lock(&osb->osb_lock);
531 osb->osb_flags |= flag; 535 osb->osb_flags |= flag;
532 spin_unlock(&osb->osb_lock); 536 spin_unlock(&osb->osb_lock);
533 } 537 }
534 538
535 539
536 static inline unsigned long ocfs2_test_osb_flag(struct ocfs2_super *osb, 540 static inline unsigned long ocfs2_test_osb_flag(struct ocfs2_super *osb,
537 unsigned long flag) 541 unsigned long flag)
538 { 542 {
539 unsigned long ret; 543 unsigned long ret;
540 544
541 spin_lock(&osb->osb_lock); 545 spin_lock(&osb->osb_lock);
542 ret = osb->osb_flags & flag; 546 ret = osb->osb_flags & flag;
543 spin_unlock(&osb->osb_lock); 547 spin_unlock(&osb->osb_lock);
544 return ret; 548 return ret;
545 } 549 }
546 550
547 static inline void ocfs2_set_ro_flag(struct ocfs2_super *osb, 551 static inline void ocfs2_set_ro_flag(struct ocfs2_super *osb,
548 int hard) 552 int hard)
549 { 553 {
550 spin_lock(&osb->osb_lock); 554 spin_lock(&osb->osb_lock);
551 osb->osb_flags &= ~(OCFS2_OSB_SOFT_RO|OCFS2_OSB_HARD_RO); 555 osb->osb_flags &= ~(OCFS2_OSB_SOFT_RO|OCFS2_OSB_HARD_RO);
552 if (hard) 556 if (hard)
553 osb->osb_flags |= OCFS2_OSB_HARD_RO; 557 osb->osb_flags |= OCFS2_OSB_HARD_RO;
554 else 558 else
555 osb->osb_flags |= OCFS2_OSB_SOFT_RO; 559 osb->osb_flags |= OCFS2_OSB_SOFT_RO;
556 spin_unlock(&osb->osb_lock); 560 spin_unlock(&osb->osb_lock);
557 } 561 }
558 562
559 static inline int ocfs2_is_hard_readonly(struct ocfs2_super *osb) 563 static inline int ocfs2_is_hard_readonly(struct ocfs2_super *osb)
560 { 564 {
561 int ret; 565 int ret;
562 566
563 spin_lock(&osb->osb_lock); 567 spin_lock(&osb->osb_lock);
564 ret = osb->osb_flags & OCFS2_OSB_HARD_RO; 568 ret = osb->osb_flags & OCFS2_OSB_HARD_RO;
565 spin_unlock(&osb->osb_lock); 569 spin_unlock(&osb->osb_lock);
566 570
567 return ret; 571 return ret;
568 } 572 }
569 573
570 static inline int ocfs2_is_soft_readonly(struct ocfs2_super *osb) 574 static inline int ocfs2_is_soft_readonly(struct ocfs2_super *osb)
571 { 575 {
572 int ret; 576 int ret;
573 577
574 spin_lock(&osb->osb_lock); 578 spin_lock(&osb->osb_lock);
575 ret = osb->osb_flags & OCFS2_OSB_SOFT_RO; 579 ret = osb->osb_flags & OCFS2_OSB_SOFT_RO;
576 spin_unlock(&osb->osb_lock); 580 spin_unlock(&osb->osb_lock);
577 581
578 return ret; 582 return ret;
579 } 583 }
580 584
581 static inline int ocfs2_userspace_stack(struct ocfs2_super *osb) 585 static inline int ocfs2_userspace_stack(struct ocfs2_super *osb)
582 { 586 {
583 return (osb->s_feature_incompat & 587 return (osb->s_feature_incompat &
584 OCFS2_FEATURE_INCOMPAT_USERSPACE_STACK); 588 OCFS2_FEATURE_INCOMPAT_USERSPACE_STACK);
585 } 589 }
586 590
587 static inline int ocfs2_mount_local(struct ocfs2_super *osb) 591 static inline int ocfs2_mount_local(struct ocfs2_super *osb)
588 { 592 {
589 return (osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_LOCAL_MOUNT); 593 return (osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_LOCAL_MOUNT);
590 } 594 }
591 595
592 static inline int ocfs2_uses_extended_slot_map(struct ocfs2_super *osb) 596 static inline int ocfs2_uses_extended_slot_map(struct ocfs2_super *osb)
593 { 597 {
594 return (osb->s_feature_incompat & 598 return (osb->s_feature_incompat &
595 OCFS2_FEATURE_INCOMPAT_EXTENDED_SLOT_MAP); 599 OCFS2_FEATURE_INCOMPAT_EXTENDED_SLOT_MAP);
596 } 600 }
597 601
598 602
599 #define OCFS2_IS_VALID_DINODE(ptr) \ 603 #define OCFS2_IS_VALID_DINODE(ptr) \
600 (!strcmp((ptr)->i_signature, OCFS2_INODE_SIGNATURE)) 604 (!strcmp((ptr)->i_signature, OCFS2_INODE_SIGNATURE))
601 605
602 #define OCFS2_IS_VALID_EXTENT_BLOCK(ptr) \ 606 #define OCFS2_IS_VALID_EXTENT_BLOCK(ptr) \
603 (!strcmp((ptr)->h_signature, OCFS2_EXTENT_BLOCK_SIGNATURE)) 607 (!strcmp((ptr)->h_signature, OCFS2_EXTENT_BLOCK_SIGNATURE))
604 608
605 #define OCFS2_IS_VALID_GROUP_DESC(ptr) \ 609 #define OCFS2_IS_VALID_GROUP_DESC(ptr) \
606 (!strcmp((ptr)->bg_signature, OCFS2_GROUP_DESC_SIGNATURE)) 610 (!strcmp((ptr)->bg_signature, OCFS2_GROUP_DESC_SIGNATURE))
607 611
608 612
609 #define OCFS2_IS_VALID_XATTR_BLOCK(ptr) \ 613 #define OCFS2_IS_VALID_XATTR_BLOCK(ptr) \
610 (!strcmp((ptr)->xb_signature, OCFS2_XATTR_BLOCK_SIGNATURE)) 614 (!strcmp((ptr)->xb_signature, OCFS2_XATTR_BLOCK_SIGNATURE))
611 615
612 #define OCFS2_IS_VALID_DIR_TRAILER(ptr) \ 616 #define OCFS2_IS_VALID_DIR_TRAILER(ptr) \
613 (!strcmp((ptr)->db_signature, OCFS2_DIR_TRAILER_SIGNATURE)) 617 (!strcmp((ptr)->db_signature, OCFS2_DIR_TRAILER_SIGNATURE))
614 618
615 #define OCFS2_IS_VALID_DX_ROOT(ptr) \ 619 #define OCFS2_IS_VALID_DX_ROOT(ptr) \
616 (!strcmp((ptr)->dr_signature, OCFS2_DX_ROOT_SIGNATURE)) 620 (!strcmp((ptr)->dr_signature, OCFS2_DX_ROOT_SIGNATURE))
617 621
618 #define OCFS2_IS_VALID_DX_LEAF(ptr) \ 622 #define OCFS2_IS_VALID_DX_LEAF(ptr) \
619 (!strcmp((ptr)->dl_signature, OCFS2_DX_LEAF_SIGNATURE)) 623 (!strcmp((ptr)->dl_signature, OCFS2_DX_LEAF_SIGNATURE))
620 624
621 #define OCFS2_IS_VALID_REFCOUNT_BLOCK(ptr) \ 625 #define OCFS2_IS_VALID_REFCOUNT_BLOCK(ptr) \
622 (!strcmp((ptr)->rf_signature, OCFS2_REFCOUNT_BLOCK_SIGNATURE)) 626 (!strcmp((ptr)->rf_signature, OCFS2_REFCOUNT_BLOCK_SIGNATURE))
623 627
624 static inline unsigned long ino_from_blkno(struct super_block *sb, 628 static inline unsigned long ino_from_blkno(struct super_block *sb,
625 u64 blkno) 629 u64 blkno)
626 { 630 {
627 return (unsigned long)(blkno & (u64)ULONG_MAX); 631 return (unsigned long)(blkno & (u64)ULONG_MAX);
628 } 632 }
629 633
630 static inline u64 ocfs2_clusters_to_blocks(struct super_block *sb, 634 static inline u64 ocfs2_clusters_to_blocks(struct super_block *sb,
631 u32 clusters) 635 u32 clusters)
632 { 636 {
633 int c_to_b_bits = OCFS2_SB(sb)->s_clustersize_bits - 637 int c_to_b_bits = OCFS2_SB(sb)->s_clustersize_bits -
634 sb->s_blocksize_bits; 638 sb->s_blocksize_bits;
635 639
636 return (u64)clusters << c_to_b_bits; 640 return (u64)clusters << c_to_b_bits;
637 } 641 }
638 642
639 static inline u32 ocfs2_blocks_to_clusters(struct super_block *sb, 643 static inline u32 ocfs2_blocks_to_clusters(struct super_block *sb,
640 u64 blocks) 644 u64 blocks)
641 { 645 {
642 int b_to_c_bits = OCFS2_SB(sb)->s_clustersize_bits - 646 int b_to_c_bits = OCFS2_SB(sb)->s_clustersize_bits -
643 sb->s_blocksize_bits; 647 sb->s_blocksize_bits;
644 648
645 return (u32)(blocks >> b_to_c_bits); 649 return (u32)(blocks >> b_to_c_bits);
646 } 650 }
647 651
648 static inline unsigned int ocfs2_clusters_for_bytes(struct super_block *sb, 652 static inline unsigned int ocfs2_clusters_for_bytes(struct super_block *sb,
649 u64 bytes) 653 u64 bytes)
650 { 654 {
651 int cl_bits = OCFS2_SB(sb)->s_clustersize_bits; 655 int cl_bits = OCFS2_SB(sb)->s_clustersize_bits;
652 unsigned int clusters; 656 unsigned int clusters;
653 657
654 bytes += OCFS2_SB(sb)->s_clustersize - 1; 658 bytes += OCFS2_SB(sb)->s_clustersize - 1;
655 /* OCFS2 just cannot have enough clusters to overflow this */ 659 /* OCFS2 just cannot have enough clusters to overflow this */
656 clusters = (unsigned int)(bytes >> cl_bits); 660 clusters = (unsigned int)(bytes >> cl_bits);
657 661
658 return clusters; 662 return clusters;
659 } 663 }
660 664
661 static inline u64 ocfs2_blocks_for_bytes(struct super_block *sb, 665 static inline u64 ocfs2_blocks_for_bytes(struct super_block *sb,
662 u64 bytes) 666 u64 bytes)
663 { 667 {
664 bytes += sb->s_blocksize - 1; 668 bytes += sb->s_blocksize - 1;
665 return bytes >> sb->s_blocksize_bits; 669 return bytes >> sb->s_blocksize_bits;
666 } 670 }
667 671
668 static inline u64 ocfs2_clusters_to_bytes(struct super_block *sb, 672 static inline u64 ocfs2_clusters_to_bytes(struct super_block *sb,
669 u32 clusters) 673 u32 clusters)
670 { 674 {
671 return (u64)clusters << OCFS2_SB(sb)->s_clustersize_bits; 675 return (u64)clusters << OCFS2_SB(sb)->s_clustersize_bits;
672 } 676 }
673 677
674 static inline u64 ocfs2_block_to_cluster_start(struct super_block *sb, 678 static inline u64 ocfs2_block_to_cluster_start(struct super_block *sb,
675 u64 blocks) 679 u64 blocks)
676 { 680 {
677 int bits = OCFS2_SB(sb)->s_clustersize_bits - sb->s_blocksize_bits; 681 int bits = OCFS2_SB(sb)->s_clustersize_bits - sb->s_blocksize_bits;
678 unsigned int clusters; 682 unsigned int clusters;
679 683
680 clusters = ocfs2_blocks_to_clusters(sb, blocks); 684 clusters = ocfs2_blocks_to_clusters(sb, blocks);
681 return (u64)clusters << bits; 685 return (u64)clusters << bits;
682 } 686 }
683 687
684 static inline u64 ocfs2_align_bytes_to_clusters(struct super_block *sb, 688 static inline u64 ocfs2_align_bytes_to_clusters(struct super_block *sb,
685 u64 bytes) 689 u64 bytes)
686 { 690 {
687 int cl_bits = OCFS2_SB(sb)->s_clustersize_bits; 691 int cl_bits = OCFS2_SB(sb)->s_clustersize_bits;
688 unsigned int clusters; 692 unsigned int clusters;
689 693
690 clusters = ocfs2_clusters_for_bytes(sb, bytes); 694 clusters = ocfs2_clusters_for_bytes(sb, bytes);
691 return (u64)clusters << cl_bits; 695 return (u64)clusters << cl_bits;
692 } 696 }
693 697
694 static inline u64 ocfs2_align_bytes_to_blocks(struct super_block *sb, 698 static inline u64 ocfs2_align_bytes_to_blocks(struct super_block *sb,
695 u64 bytes) 699 u64 bytes)
696 { 700 {
697 u64 blocks; 701 u64 blocks;
698 702
699 blocks = ocfs2_blocks_for_bytes(sb, bytes); 703 blocks = ocfs2_blocks_for_bytes(sb, bytes);
700 return blocks << sb->s_blocksize_bits; 704 return blocks << sb->s_blocksize_bits;
701 } 705 }
702 706
703 static inline unsigned long ocfs2_align_bytes_to_sectors(u64 bytes) 707 static inline unsigned long ocfs2_align_bytes_to_sectors(u64 bytes)
704 { 708 {
705 return (unsigned long)((bytes + 511) >> 9); 709 return (unsigned long)((bytes + 511) >> 9);
706 } 710 }
707 711
708 static inline unsigned int ocfs2_page_index_to_clusters(struct super_block *sb, 712 static inline unsigned int ocfs2_page_index_to_clusters(struct super_block *sb,
709 unsigned long pg_index) 713 unsigned long pg_index)
710 { 714 {
711 u32 clusters = pg_index; 715 u32 clusters = pg_index;
712 unsigned int cbits = OCFS2_SB(sb)->s_clustersize_bits; 716 unsigned int cbits = OCFS2_SB(sb)->s_clustersize_bits;
713 717
714 if (unlikely(PAGE_CACHE_SHIFT > cbits)) 718 if (unlikely(PAGE_CACHE_SHIFT > cbits))
715 clusters = pg_index << (PAGE_CACHE_SHIFT - cbits); 719 clusters = pg_index << (PAGE_CACHE_SHIFT - cbits);
716 else if (PAGE_CACHE_SHIFT < cbits) 720 else if (PAGE_CACHE_SHIFT < cbits)
717 clusters = pg_index >> (cbits - PAGE_CACHE_SHIFT); 721 clusters = pg_index >> (cbits - PAGE_CACHE_SHIFT);
718 722
719 return clusters; 723 return clusters;
720 } 724 }
721 725
722 /* 726 /*
723 * Find the 1st page index which covers the given clusters. 727 * Find the 1st page index which covers the given clusters.
724 */ 728 */
725 static inline pgoff_t ocfs2_align_clusters_to_page_index(struct super_block *sb, 729 static inline pgoff_t ocfs2_align_clusters_to_page_index(struct super_block *sb,
726 u32 clusters) 730 u32 clusters)
727 { 731 {
728 unsigned int cbits = OCFS2_SB(sb)->s_clustersize_bits; 732 unsigned int cbits = OCFS2_SB(sb)->s_clustersize_bits;
729 pgoff_t index = clusters; 733 pgoff_t index = clusters;
730 734
731 if (PAGE_CACHE_SHIFT > cbits) { 735 if (PAGE_CACHE_SHIFT > cbits) {
732 index = (pgoff_t)clusters >> (PAGE_CACHE_SHIFT - cbits); 736 index = (pgoff_t)clusters >> (PAGE_CACHE_SHIFT - cbits);
733 } else if (PAGE_CACHE_SHIFT < cbits) { 737 } else if (PAGE_CACHE_SHIFT < cbits) {
734 index = (pgoff_t)clusters << (cbits - PAGE_CACHE_SHIFT); 738 index = (pgoff_t)clusters << (cbits - PAGE_CACHE_SHIFT);
735 } 739 }
736 740
737 return index; 741 return index;
738 } 742 }
739 743
740 static inline unsigned int ocfs2_pages_per_cluster(struct super_block *sb) 744 static inline unsigned int ocfs2_pages_per_cluster(struct super_block *sb)
741 { 745 {
742 unsigned int cbits = OCFS2_SB(sb)->s_clustersize_bits; 746 unsigned int cbits = OCFS2_SB(sb)->s_clustersize_bits;
743 unsigned int pages_per_cluster = 1; 747 unsigned int pages_per_cluster = 1;
744 748
745 if (PAGE_CACHE_SHIFT < cbits) 749 if (PAGE_CACHE_SHIFT < cbits)
746 pages_per_cluster = 1 << (cbits - PAGE_CACHE_SHIFT); 750 pages_per_cluster = 1 << (cbits - PAGE_CACHE_SHIFT);
747 751
748 return pages_per_cluster; 752 return pages_per_cluster;
749 } 753 }
750 754
751 static inline unsigned int ocfs2_megabytes_to_clusters(struct super_block *sb, 755 static inline unsigned int ocfs2_megabytes_to_clusters(struct super_block *sb,
752 unsigned int megs) 756 unsigned int megs)
753 { 757 {
754 BUILD_BUG_ON(OCFS2_MAX_CLUSTERSIZE > 1048576); 758 BUILD_BUG_ON(OCFS2_MAX_CLUSTERSIZE > 1048576);
755 759
756 return megs << (20 - OCFS2_SB(sb)->s_clustersize_bits); 760 return megs << (20 - OCFS2_SB(sb)->s_clustersize_bits);
757 } 761 }
758 762
759 static inline void ocfs2_init_inode_steal_slot(struct ocfs2_super *osb) 763 static inline void ocfs2_init_inode_steal_slot(struct ocfs2_super *osb)
760 { 764 {
761 spin_lock(&osb->osb_lock); 765 spin_lock(&osb->osb_lock);
762 osb->s_inode_steal_slot = OCFS2_INVALID_SLOT; 766 osb->s_inode_steal_slot = OCFS2_INVALID_SLOT;
763 spin_unlock(&osb->osb_lock); 767 spin_unlock(&osb->osb_lock);
764 atomic_set(&osb->s_num_inodes_stolen, 0); 768 atomic_set(&osb->s_num_inodes_stolen, 0);
765 } 769 }
766 770
767 static inline void ocfs2_set_inode_steal_slot(struct ocfs2_super *osb, 771 static inline void ocfs2_set_inode_steal_slot(struct ocfs2_super *osb,
768 s16 slot) 772 s16 slot)
769 { 773 {
770 spin_lock(&osb->osb_lock); 774 spin_lock(&osb->osb_lock);
771 osb->s_inode_steal_slot = slot; 775 osb->s_inode_steal_slot = slot;
772 spin_unlock(&osb->osb_lock); 776 spin_unlock(&osb->osb_lock);
773 } 777 }
774 778
775 static inline s16 ocfs2_get_inode_steal_slot(struct ocfs2_super *osb) 779 static inline s16 ocfs2_get_inode_steal_slot(struct ocfs2_super *osb)
776 { 780 {
777 s16 slot; 781 s16 slot;
778 782
779 spin_lock(&osb->osb_lock); 783 spin_lock(&osb->osb_lock);
780 slot = osb->s_inode_steal_slot; 784 slot = osb->s_inode_steal_slot;
781 spin_unlock(&osb->osb_lock); 785 spin_unlock(&osb->osb_lock);
782 786
783 return slot; 787 return slot;
784 } 788 }
785 789
786 #define ocfs2_set_bit ext2_set_bit 790 #define ocfs2_set_bit ext2_set_bit
787 #define ocfs2_clear_bit ext2_clear_bit 791 #define ocfs2_clear_bit ext2_clear_bit
788 #define ocfs2_test_bit ext2_test_bit 792 #define ocfs2_test_bit ext2_test_bit
789 #define ocfs2_find_next_zero_bit ext2_find_next_zero_bit 793 #define ocfs2_find_next_zero_bit ext2_find_next_zero_bit
790 #define ocfs2_find_next_bit ext2_find_next_bit 794 #define ocfs2_find_next_bit ext2_find_next_bit
791 #endif /* OCFS2_H */ 795 #endif /* OCFS2_H */
792 796
793 797