Commit 5bc970e803ad2b1f26771f39376a79dbf0f5bf64
Committed by
Joel Becker
1 parent
0cc9d52578
Exists in
master
and in
7 other branches
ocfs2: Use hrtimer to track ocfs2 fs lock stats
Patch makes use of the hrtimer to track times in ocfs2 lock stats. The patch is a bit involved to ensure no additional impact on the memory footprint. The size of ocfs2_inode_cache remains 1280 bytes on 32-bit systems. A related change was to modify the unit of the max wait time from nanosec to microsec allowing us to track max time larger than 4 secs. This change necessitated the bumping of the output version in the debugfs file, locking_state, from 2 to 3. Signed-off-by: Sunil Mushran <sunil.mushran@oracle.com> Signed-off-by: Joel Becker <jlbec@evilplan.org>
Showing 2 changed files with 64 additions and 56 deletions Inline Diff
fs/ocfs2/dlmglue.c
1 | /* -*- mode: c; c-basic-offset: 8; -*- | 1 | /* -*- mode: c; c-basic-offset: 8; -*- |
2 | * vim: noexpandtab sw=8 ts=8 sts=0: | 2 | * vim: noexpandtab sw=8 ts=8 sts=0: |
3 | * | 3 | * |
4 | * dlmglue.c | 4 | * dlmglue.c |
5 | * | 5 | * |
6 | * Code which implements an OCFS2 specific interface to our DLM. | 6 | * Code which implements an OCFS2 specific interface to our DLM. |
7 | * | 7 | * |
8 | * Copyright (C) 2003, 2004 Oracle. All rights reserved. | 8 | * Copyright (C) 2003, 2004 Oracle. All rights reserved. |
9 | * | 9 | * |
10 | * This program is free software; you can redistribute it and/or | 10 | * This program is free software; you can redistribute it and/or |
11 | * modify it under the terms of the GNU General Public | 11 | * modify it under the terms of the GNU General Public |
12 | * License as published by the Free Software Foundation; either | 12 | * License as published by the Free Software Foundation; either |
13 | * version 2 of the License, or (at your option) any later version. | 13 | * version 2 of the License, or (at your option) any later version. |
14 | * | 14 | * |
15 | * This program is distributed in the hope that it will be useful, | 15 | * This program is distributed in the hope that it will be useful, |
16 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | 16 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | 17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
18 | * General Public License for more details. | 18 | * General Public License for more details. |
19 | * | 19 | * |
20 | * You should have received a copy of the GNU General Public | 20 | * You should have received a copy of the GNU General Public |
21 | * License along with this program; if not, write to the | 21 | * License along with this program; if not, write to the |
22 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | 22 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, |
23 | * Boston, MA 021110-1307, USA. | 23 | * Boston, MA 021110-1307, USA. |
24 | */ | 24 | */ |
25 | 25 | ||
26 | #include <linux/types.h> | 26 | #include <linux/types.h> |
27 | #include <linux/slab.h> | 27 | #include <linux/slab.h> |
28 | #include <linux/highmem.h> | 28 | #include <linux/highmem.h> |
29 | #include <linux/mm.h> | 29 | #include <linux/mm.h> |
30 | #include <linux/kthread.h> | 30 | #include <linux/kthread.h> |
31 | #include <linux/pagemap.h> | 31 | #include <linux/pagemap.h> |
32 | #include <linux/debugfs.h> | 32 | #include <linux/debugfs.h> |
33 | #include <linux/seq_file.h> | 33 | #include <linux/seq_file.h> |
34 | #include <linux/time.h> | 34 | #include <linux/time.h> |
35 | #include <linux/quotaops.h> | 35 | #include <linux/quotaops.h> |
36 | 36 | ||
37 | #define MLOG_MASK_PREFIX ML_DLM_GLUE | 37 | #define MLOG_MASK_PREFIX ML_DLM_GLUE |
38 | #include <cluster/masklog.h> | 38 | #include <cluster/masklog.h> |
39 | 39 | ||
40 | #include "ocfs2.h" | 40 | #include "ocfs2.h" |
41 | #include "ocfs2_lockingver.h" | 41 | #include "ocfs2_lockingver.h" |
42 | 42 | ||
43 | #include "alloc.h" | 43 | #include "alloc.h" |
44 | #include "dcache.h" | 44 | #include "dcache.h" |
45 | #include "dlmglue.h" | 45 | #include "dlmglue.h" |
46 | #include "extent_map.h" | 46 | #include "extent_map.h" |
47 | #include "file.h" | 47 | #include "file.h" |
48 | #include "heartbeat.h" | 48 | #include "heartbeat.h" |
49 | #include "inode.h" | 49 | #include "inode.h" |
50 | #include "journal.h" | 50 | #include "journal.h" |
51 | #include "stackglue.h" | 51 | #include "stackglue.h" |
52 | #include "slot_map.h" | 52 | #include "slot_map.h" |
53 | #include "super.h" | 53 | #include "super.h" |
54 | #include "uptodate.h" | 54 | #include "uptodate.h" |
55 | #include "quota.h" | 55 | #include "quota.h" |
56 | #include "refcounttree.h" | 56 | #include "refcounttree.h" |
57 | 57 | ||
58 | #include "buffer_head_io.h" | 58 | #include "buffer_head_io.h" |
59 | 59 | ||
60 | struct ocfs2_mask_waiter { | 60 | struct ocfs2_mask_waiter { |
61 | struct list_head mw_item; | 61 | struct list_head mw_item; |
62 | int mw_status; | 62 | int mw_status; |
63 | struct completion mw_complete; | 63 | struct completion mw_complete; |
64 | unsigned long mw_mask; | 64 | unsigned long mw_mask; |
65 | unsigned long mw_goal; | 65 | unsigned long mw_goal; |
66 | #ifdef CONFIG_OCFS2_FS_STATS | 66 | #ifdef CONFIG_OCFS2_FS_STATS |
67 | unsigned long long mw_lock_start; | 67 | ktime_t mw_lock_start; |
68 | #endif | 68 | #endif |
69 | }; | 69 | }; |
70 | 70 | ||
71 | static struct ocfs2_super *ocfs2_get_dentry_osb(struct ocfs2_lock_res *lockres); | 71 | static struct ocfs2_super *ocfs2_get_dentry_osb(struct ocfs2_lock_res *lockres); |
72 | static struct ocfs2_super *ocfs2_get_inode_osb(struct ocfs2_lock_res *lockres); | 72 | static struct ocfs2_super *ocfs2_get_inode_osb(struct ocfs2_lock_res *lockres); |
73 | static struct ocfs2_super *ocfs2_get_file_osb(struct ocfs2_lock_res *lockres); | 73 | static struct ocfs2_super *ocfs2_get_file_osb(struct ocfs2_lock_res *lockres); |
74 | static struct ocfs2_super *ocfs2_get_qinfo_osb(struct ocfs2_lock_res *lockres); | 74 | static struct ocfs2_super *ocfs2_get_qinfo_osb(struct ocfs2_lock_res *lockres); |
75 | 75 | ||
76 | /* | 76 | /* |
77 | * Return value from ->downconvert_worker functions. | 77 | * Return value from ->downconvert_worker functions. |
78 | * | 78 | * |
79 | * These control the precise actions of ocfs2_unblock_lock() | 79 | * These control the precise actions of ocfs2_unblock_lock() |
80 | * and ocfs2_process_blocked_lock() | 80 | * and ocfs2_process_blocked_lock() |
81 | * | 81 | * |
82 | */ | 82 | */ |
83 | enum ocfs2_unblock_action { | 83 | enum ocfs2_unblock_action { |
84 | UNBLOCK_CONTINUE = 0, /* Continue downconvert */ | 84 | UNBLOCK_CONTINUE = 0, /* Continue downconvert */ |
85 | UNBLOCK_CONTINUE_POST = 1, /* Continue downconvert, fire | 85 | UNBLOCK_CONTINUE_POST = 1, /* Continue downconvert, fire |
86 | * ->post_unlock callback */ | 86 | * ->post_unlock callback */ |
87 | UNBLOCK_STOP_POST = 2, /* Do not downconvert, fire | 87 | UNBLOCK_STOP_POST = 2, /* Do not downconvert, fire |
88 | * ->post_unlock() callback. */ | 88 | * ->post_unlock() callback. */ |
89 | }; | 89 | }; |
90 | 90 | ||
91 | struct ocfs2_unblock_ctl { | 91 | struct ocfs2_unblock_ctl { |
92 | int requeue; | 92 | int requeue; |
93 | enum ocfs2_unblock_action unblock_action; | 93 | enum ocfs2_unblock_action unblock_action; |
94 | }; | 94 | }; |
95 | 95 | ||
96 | /* Lockdep class keys */ | 96 | /* Lockdep class keys */ |
97 | struct lock_class_key lockdep_keys[OCFS2_NUM_LOCK_TYPES]; | 97 | struct lock_class_key lockdep_keys[OCFS2_NUM_LOCK_TYPES]; |
98 | 98 | ||
99 | static int ocfs2_check_meta_downconvert(struct ocfs2_lock_res *lockres, | 99 | static int ocfs2_check_meta_downconvert(struct ocfs2_lock_res *lockres, |
100 | int new_level); | 100 | int new_level); |
101 | static void ocfs2_set_meta_lvb(struct ocfs2_lock_res *lockres); | 101 | static void ocfs2_set_meta_lvb(struct ocfs2_lock_res *lockres); |
102 | 102 | ||
103 | static int ocfs2_data_convert_worker(struct ocfs2_lock_res *lockres, | 103 | static int ocfs2_data_convert_worker(struct ocfs2_lock_res *lockres, |
104 | int blocking); | 104 | int blocking); |
105 | 105 | ||
106 | static int ocfs2_dentry_convert_worker(struct ocfs2_lock_res *lockres, | 106 | static int ocfs2_dentry_convert_worker(struct ocfs2_lock_res *lockres, |
107 | int blocking); | 107 | int blocking); |
108 | 108 | ||
109 | static void ocfs2_dentry_post_unlock(struct ocfs2_super *osb, | 109 | static void ocfs2_dentry_post_unlock(struct ocfs2_super *osb, |
110 | struct ocfs2_lock_res *lockres); | 110 | struct ocfs2_lock_res *lockres); |
111 | 111 | ||
112 | static void ocfs2_set_qinfo_lvb(struct ocfs2_lock_res *lockres); | 112 | static void ocfs2_set_qinfo_lvb(struct ocfs2_lock_res *lockres); |
113 | 113 | ||
114 | static int ocfs2_check_refcount_downconvert(struct ocfs2_lock_res *lockres, | 114 | static int ocfs2_check_refcount_downconvert(struct ocfs2_lock_res *lockres, |
115 | int new_level); | 115 | int new_level); |
116 | static int ocfs2_refcount_convert_worker(struct ocfs2_lock_res *lockres, | 116 | static int ocfs2_refcount_convert_worker(struct ocfs2_lock_res *lockres, |
117 | int blocking); | 117 | int blocking); |
118 | 118 | ||
119 | #define mlog_meta_lvb(__level, __lockres) ocfs2_dump_meta_lvb_info(__level, __PRETTY_FUNCTION__, __LINE__, __lockres) | 119 | #define mlog_meta_lvb(__level, __lockres) ocfs2_dump_meta_lvb_info(__level, __PRETTY_FUNCTION__, __LINE__, __lockres) |
120 | 120 | ||
121 | /* This aids in debugging situations where a bad LVB might be involved. */ | 121 | /* This aids in debugging situations where a bad LVB might be involved. */ |
122 | static void ocfs2_dump_meta_lvb_info(u64 level, | 122 | static void ocfs2_dump_meta_lvb_info(u64 level, |
123 | const char *function, | 123 | const char *function, |
124 | unsigned int line, | 124 | unsigned int line, |
125 | struct ocfs2_lock_res *lockres) | 125 | struct ocfs2_lock_res *lockres) |
126 | { | 126 | { |
127 | struct ocfs2_meta_lvb *lvb = ocfs2_dlm_lvb(&lockres->l_lksb); | 127 | struct ocfs2_meta_lvb *lvb = ocfs2_dlm_lvb(&lockres->l_lksb); |
128 | 128 | ||
129 | mlog(level, "LVB information for %s (called from %s:%u):\n", | 129 | mlog(level, "LVB information for %s (called from %s:%u):\n", |
130 | lockres->l_name, function, line); | 130 | lockres->l_name, function, line); |
131 | mlog(level, "version: %u, clusters: %u, generation: 0x%x\n", | 131 | mlog(level, "version: %u, clusters: %u, generation: 0x%x\n", |
132 | lvb->lvb_version, be32_to_cpu(lvb->lvb_iclusters), | 132 | lvb->lvb_version, be32_to_cpu(lvb->lvb_iclusters), |
133 | be32_to_cpu(lvb->lvb_igeneration)); | 133 | be32_to_cpu(lvb->lvb_igeneration)); |
134 | mlog(level, "size: %llu, uid %u, gid %u, mode 0x%x\n", | 134 | mlog(level, "size: %llu, uid %u, gid %u, mode 0x%x\n", |
135 | (unsigned long long)be64_to_cpu(lvb->lvb_isize), | 135 | (unsigned long long)be64_to_cpu(lvb->lvb_isize), |
136 | be32_to_cpu(lvb->lvb_iuid), be32_to_cpu(lvb->lvb_igid), | 136 | be32_to_cpu(lvb->lvb_iuid), be32_to_cpu(lvb->lvb_igid), |
137 | be16_to_cpu(lvb->lvb_imode)); | 137 | be16_to_cpu(lvb->lvb_imode)); |
138 | mlog(level, "nlink %u, atime_packed 0x%llx, ctime_packed 0x%llx, " | 138 | mlog(level, "nlink %u, atime_packed 0x%llx, ctime_packed 0x%llx, " |
139 | "mtime_packed 0x%llx iattr 0x%x\n", be16_to_cpu(lvb->lvb_inlink), | 139 | "mtime_packed 0x%llx iattr 0x%x\n", be16_to_cpu(lvb->lvb_inlink), |
140 | (long long)be64_to_cpu(lvb->lvb_iatime_packed), | 140 | (long long)be64_to_cpu(lvb->lvb_iatime_packed), |
141 | (long long)be64_to_cpu(lvb->lvb_ictime_packed), | 141 | (long long)be64_to_cpu(lvb->lvb_ictime_packed), |
142 | (long long)be64_to_cpu(lvb->lvb_imtime_packed), | 142 | (long long)be64_to_cpu(lvb->lvb_imtime_packed), |
143 | be32_to_cpu(lvb->lvb_iattr)); | 143 | be32_to_cpu(lvb->lvb_iattr)); |
144 | } | 144 | } |
145 | 145 | ||
146 | 146 | ||
147 | /* | 147 | /* |
148 | * OCFS2 Lock Resource Operations | 148 | * OCFS2 Lock Resource Operations |
149 | * | 149 | * |
150 | * These fine tune the behavior of the generic dlmglue locking infrastructure. | 150 | * These fine tune the behavior of the generic dlmglue locking infrastructure. |
151 | * | 151 | * |
152 | * The most basic of lock types can point ->l_priv to their respective | 152 | * The most basic of lock types can point ->l_priv to their respective |
153 | * struct ocfs2_super and allow the default actions to manage things. | 153 | * struct ocfs2_super and allow the default actions to manage things. |
154 | * | 154 | * |
155 | * Right now, each lock type also needs to implement an init function, | 155 | * Right now, each lock type also needs to implement an init function, |
156 | * and trivial lock/unlock wrappers. ocfs2_simple_drop_lockres() | 156 | * and trivial lock/unlock wrappers. ocfs2_simple_drop_lockres() |
157 | * should be called when the lock is no longer needed (i.e., object | 157 | * should be called when the lock is no longer needed (i.e., object |
158 | * destruction time). | 158 | * destruction time). |
159 | */ | 159 | */ |
160 | struct ocfs2_lock_res_ops { | 160 | struct ocfs2_lock_res_ops { |
161 | /* | 161 | /* |
162 | * Translate an ocfs2_lock_res * into an ocfs2_super *. Define | 162 | * Translate an ocfs2_lock_res * into an ocfs2_super *. Define |
163 | * this callback if ->l_priv is not an ocfs2_super pointer | 163 | * this callback if ->l_priv is not an ocfs2_super pointer |
164 | */ | 164 | */ |
165 | struct ocfs2_super * (*get_osb)(struct ocfs2_lock_res *); | 165 | struct ocfs2_super * (*get_osb)(struct ocfs2_lock_res *); |
166 | 166 | ||
167 | /* | 167 | /* |
168 | * Optionally called in the downconvert thread after a | 168 | * Optionally called in the downconvert thread after a |
169 | * successful downconvert. The lockres will not be referenced | 169 | * successful downconvert. The lockres will not be referenced |
170 | * after this callback is called, so it is safe to free | 170 | * after this callback is called, so it is safe to free |
171 | * memory, etc. | 171 | * memory, etc. |
172 | * | 172 | * |
173 | * The exact semantics of when this is called are controlled | 173 | * The exact semantics of when this is called are controlled |
174 | * by ->downconvert_worker() | 174 | * by ->downconvert_worker() |
175 | */ | 175 | */ |
176 | void (*post_unlock)(struct ocfs2_super *, struct ocfs2_lock_res *); | 176 | void (*post_unlock)(struct ocfs2_super *, struct ocfs2_lock_res *); |
177 | 177 | ||
178 | /* | 178 | /* |
179 | * Allow a lock type to add checks to determine whether it is | 179 | * Allow a lock type to add checks to determine whether it is |
180 | * safe to downconvert a lock. Return 0 to re-queue the | 180 | * safe to downconvert a lock. Return 0 to re-queue the |
181 | * downconvert at a later time, nonzero to continue. | 181 | * downconvert at a later time, nonzero to continue. |
182 | * | 182 | * |
183 | * For most locks, the default checks that there are no | 183 | * For most locks, the default checks that there are no |
184 | * incompatible holders are sufficient. | 184 | * incompatible holders are sufficient. |
185 | * | 185 | * |
186 | * Called with the lockres spinlock held. | 186 | * Called with the lockres spinlock held. |
187 | */ | 187 | */ |
188 | int (*check_downconvert)(struct ocfs2_lock_res *, int); | 188 | int (*check_downconvert)(struct ocfs2_lock_res *, int); |
189 | 189 | ||
190 | /* | 190 | /* |
191 | * Allows a lock type to populate the lock value block. This | 191 | * Allows a lock type to populate the lock value block. This |
192 | * is called on downconvert, and when we drop a lock. | 192 | * is called on downconvert, and when we drop a lock. |
193 | * | 193 | * |
194 | * Locks that want to use this should set LOCK_TYPE_USES_LVB | 194 | * Locks that want to use this should set LOCK_TYPE_USES_LVB |
195 | * in the flags field. | 195 | * in the flags field. |
196 | * | 196 | * |
197 | * Called with the lockres spinlock held. | 197 | * Called with the lockres spinlock held. |
198 | */ | 198 | */ |
199 | void (*set_lvb)(struct ocfs2_lock_res *); | 199 | void (*set_lvb)(struct ocfs2_lock_res *); |
200 | 200 | ||
201 | /* | 201 | /* |
202 | * Called from the downconvert thread when it is determined | 202 | * Called from the downconvert thread when it is determined |
203 | * that a lock will be downconverted. This is called without | 203 | * that a lock will be downconverted. This is called without |
204 | * any locks held so the function can do work that might | 204 | * any locks held so the function can do work that might |
205 | * schedule (syncing out data, etc). | 205 | * schedule (syncing out data, etc). |
206 | * | 206 | * |
207 | * This should return any one of the ocfs2_unblock_action | 207 | * This should return any one of the ocfs2_unblock_action |
208 | * values, depending on what it wants the thread to do. | 208 | * values, depending on what it wants the thread to do. |
209 | */ | 209 | */ |
210 | int (*downconvert_worker)(struct ocfs2_lock_res *, int); | 210 | int (*downconvert_worker)(struct ocfs2_lock_res *, int); |
211 | 211 | ||
212 | /* | 212 | /* |
213 | * LOCK_TYPE_* flags which describe the specific requirements | 213 | * LOCK_TYPE_* flags which describe the specific requirements |
214 | * of a lock type. Descriptions of each individual flag follow. | 214 | * of a lock type. Descriptions of each individual flag follow. |
215 | */ | 215 | */ |
216 | int flags; | 216 | int flags; |
217 | }; | 217 | }; |
218 | 218 | ||
219 | /* | 219 | /* |
220 | * Some locks want to "refresh" potentially stale data when a | 220 | * Some locks want to "refresh" potentially stale data when a |
221 | * meaningful (PRMODE or EXMODE) lock level is first obtained. If this | 221 | * meaningful (PRMODE or EXMODE) lock level is first obtained. If this |
222 | * flag is set, the OCFS2_LOCK_NEEDS_REFRESH flag will be set on the | 222 | * flag is set, the OCFS2_LOCK_NEEDS_REFRESH flag will be set on the |
223 | * individual lockres l_flags member from the ast function. It is | 223 | * individual lockres l_flags member from the ast function. It is |
224 | * expected that the locking wrapper will clear the | 224 | * expected that the locking wrapper will clear the |
225 | * OCFS2_LOCK_NEEDS_REFRESH flag when done. | 225 | * OCFS2_LOCK_NEEDS_REFRESH flag when done. |
226 | */ | 226 | */ |
227 | #define LOCK_TYPE_REQUIRES_REFRESH 0x1 | 227 | #define LOCK_TYPE_REQUIRES_REFRESH 0x1 |
228 | 228 | ||
229 | /* | 229 | /* |
230 | * Indicate that a lock type makes use of the lock value block. The | 230 | * Indicate that a lock type makes use of the lock value block. The |
231 | * ->set_lvb lock type callback must be defined. | 231 | * ->set_lvb lock type callback must be defined. |
232 | */ | 232 | */ |
233 | #define LOCK_TYPE_USES_LVB 0x2 | 233 | #define LOCK_TYPE_USES_LVB 0x2 |
234 | 234 | ||
235 | static struct ocfs2_lock_res_ops ocfs2_inode_rw_lops = { | 235 | static struct ocfs2_lock_res_ops ocfs2_inode_rw_lops = { |
236 | .get_osb = ocfs2_get_inode_osb, | 236 | .get_osb = ocfs2_get_inode_osb, |
237 | .flags = 0, | 237 | .flags = 0, |
238 | }; | 238 | }; |
239 | 239 | ||
240 | static struct ocfs2_lock_res_ops ocfs2_inode_inode_lops = { | 240 | static struct ocfs2_lock_res_ops ocfs2_inode_inode_lops = { |
241 | .get_osb = ocfs2_get_inode_osb, | 241 | .get_osb = ocfs2_get_inode_osb, |
242 | .check_downconvert = ocfs2_check_meta_downconvert, | 242 | .check_downconvert = ocfs2_check_meta_downconvert, |
243 | .set_lvb = ocfs2_set_meta_lvb, | 243 | .set_lvb = ocfs2_set_meta_lvb, |
244 | .downconvert_worker = ocfs2_data_convert_worker, | 244 | .downconvert_worker = ocfs2_data_convert_worker, |
245 | .flags = LOCK_TYPE_REQUIRES_REFRESH|LOCK_TYPE_USES_LVB, | 245 | .flags = LOCK_TYPE_REQUIRES_REFRESH|LOCK_TYPE_USES_LVB, |
246 | }; | 246 | }; |
247 | 247 | ||
248 | static struct ocfs2_lock_res_ops ocfs2_super_lops = { | 248 | static struct ocfs2_lock_res_ops ocfs2_super_lops = { |
249 | .flags = LOCK_TYPE_REQUIRES_REFRESH, | 249 | .flags = LOCK_TYPE_REQUIRES_REFRESH, |
250 | }; | 250 | }; |
251 | 251 | ||
252 | static struct ocfs2_lock_res_ops ocfs2_rename_lops = { | 252 | static struct ocfs2_lock_res_ops ocfs2_rename_lops = { |
253 | .flags = 0, | 253 | .flags = 0, |
254 | }; | 254 | }; |
255 | 255 | ||
256 | static struct ocfs2_lock_res_ops ocfs2_nfs_sync_lops = { | 256 | static struct ocfs2_lock_res_ops ocfs2_nfs_sync_lops = { |
257 | .flags = 0, | 257 | .flags = 0, |
258 | }; | 258 | }; |
259 | 259 | ||
260 | static struct ocfs2_lock_res_ops ocfs2_orphan_scan_lops = { | 260 | static struct ocfs2_lock_res_ops ocfs2_orphan_scan_lops = { |
261 | .flags = LOCK_TYPE_REQUIRES_REFRESH|LOCK_TYPE_USES_LVB, | 261 | .flags = LOCK_TYPE_REQUIRES_REFRESH|LOCK_TYPE_USES_LVB, |
262 | }; | 262 | }; |
263 | 263 | ||
264 | static struct ocfs2_lock_res_ops ocfs2_dentry_lops = { | 264 | static struct ocfs2_lock_res_ops ocfs2_dentry_lops = { |
265 | .get_osb = ocfs2_get_dentry_osb, | 265 | .get_osb = ocfs2_get_dentry_osb, |
266 | .post_unlock = ocfs2_dentry_post_unlock, | 266 | .post_unlock = ocfs2_dentry_post_unlock, |
267 | .downconvert_worker = ocfs2_dentry_convert_worker, | 267 | .downconvert_worker = ocfs2_dentry_convert_worker, |
268 | .flags = 0, | 268 | .flags = 0, |
269 | }; | 269 | }; |
270 | 270 | ||
271 | static struct ocfs2_lock_res_ops ocfs2_inode_open_lops = { | 271 | static struct ocfs2_lock_res_ops ocfs2_inode_open_lops = { |
272 | .get_osb = ocfs2_get_inode_osb, | 272 | .get_osb = ocfs2_get_inode_osb, |
273 | .flags = 0, | 273 | .flags = 0, |
274 | }; | 274 | }; |
275 | 275 | ||
276 | static struct ocfs2_lock_res_ops ocfs2_flock_lops = { | 276 | static struct ocfs2_lock_res_ops ocfs2_flock_lops = { |
277 | .get_osb = ocfs2_get_file_osb, | 277 | .get_osb = ocfs2_get_file_osb, |
278 | .flags = 0, | 278 | .flags = 0, |
279 | }; | 279 | }; |
280 | 280 | ||
281 | static struct ocfs2_lock_res_ops ocfs2_qinfo_lops = { | 281 | static struct ocfs2_lock_res_ops ocfs2_qinfo_lops = { |
282 | .set_lvb = ocfs2_set_qinfo_lvb, | 282 | .set_lvb = ocfs2_set_qinfo_lvb, |
283 | .get_osb = ocfs2_get_qinfo_osb, | 283 | .get_osb = ocfs2_get_qinfo_osb, |
284 | .flags = LOCK_TYPE_REQUIRES_REFRESH | LOCK_TYPE_USES_LVB, | 284 | .flags = LOCK_TYPE_REQUIRES_REFRESH | LOCK_TYPE_USES_LVB, |
285 | }; | 285 | }; |
286 | 286 | ||
287 | static struct ocfs2_lock_res_ops ocfs2_refcount_block_lops = { | 287 | static struct ocfs2_lock_res_ops ocfs2_refcount_block_lops = { |
288 | .check_downconvert = ocfs2_check_refcount_downconvert, | 288 | .check_downconvert = ocfs2_check_refcount_downconvert, |
289 | .downconvert_worker = ocfs2_refcount_convert_worker, | 289 | .downconvert_worker = ocfs2_refcount_convert_worker, |
290 | .flags = 0, | 290 | .flags = 0, |
291 | }; | 291 | }; |
292 | 292 | ||
293 | static inline int ocfs2_is_inode_lock(struct ocfs2_lock_res *lockres) | 293 | static inline int ocfs2_is_inode_lock(struct ocfs2_lock_res *lockres) |
294 | { | 294 | { |
295 | return lockres->l_type == OCFS2_LOCK_TYPE_META || | 295 | return lockres->l_type == OCFS2_LOCK_TYPE_META || |
296 | lockres->l_type == OCFS2_LOCK_TYPE_RW || | 296 | lockres->l_type == OCFS2_LOCK_TYPE_RW || |
297 | lockres->l_type == OCFS2_LOCK_TYPE_OPEN; | 297 | lockres->l_type == OCFS2_LOCK_TYPE_OPEN; |
298 | } | 298 | } |
299 | 299 | ||
300 | static inline struct ocfs2_lock_res *ocfs2_lksb_to_lock_res(struct ocfs2_dlm_lksb *lksb) | 300 | static inline struct ocfs2_lock_res *ocfs2_lksb_to_lock_res(struct ocfs2_dlm_lksb *lksb) |
301 | { | 301 | { |
302 | return container_of(lksb, struct ocfs2_lock_res, l_lksb); | 302 | return container_of(lksb, struct ocfs2_lock_res, l_lksb); |
303 | } | 303 | } |
304 | 304 | ||
305 | static inline struct inode *ocfs2_lock_res_inode(struct ocfs2_lock_res *lockres) | 305 | static inline struct inode *ocfs2_lock_res_inode(struct ocfs2_lock_res *lockres) |
306 | { | 306 | { |
307 | BUG_ON(!ocfs2_is_inode_lock(lockres)); | 307 | BUG_ON(!ocfs2_is_inode_lock(lockres)); |
308 | 308 | ||
309 | return (struct inode *) lockres->l_priv; | 309 | return (struct inode *) lockres->l_priv; |
310 | } | 310 | } |
311 | 311 | ||
312 | static inline struct ocfs2_dentry_lock *ocfs2_lock_res_dl(struct ocfs2_lock_res *lockres) | 312 | static inline struct ocfs2_dentry_lock *ocfs2_lock_res_dl(struct ocfs2_lock_res *lockres) |
313 | { | 313 | { |
314 | BUG_ON(lockres->l_type != OCFS2_LOCK_TYPE_DENTRY); | 314 | BUG_ON(lockres->l_type != OCFS2_LOCK_TYPE_DENTRY); |
315 | 315 | ||
316 | return (struct ocfs2_dentry_lock *)lockres->l_priv; | 316 | return (struct ocfs2_dentry_lock *)lockres->l_priv; |
317 | } | 317 | } |
318 | 318 | ||
319 | static inline struct ocfs2_mem_dqinfo *ocfs2_lock_res_qinfo(struct ocfs2_lock_res *lockres) | 319 | static inline struct ocfs2_mem_dqinfo *ocfs2_lock_res_qinfo(struct ocfs2_lock_res *lockres) |
320 | { | 320 | { |
321 | BUG_ON(lockres->l_type != OCFS2_LOCK_TYPE_QINFO); | 321 | BUG_ON(lockres->l_type != OCFS2_LOCK_TYPE_QINFO); |
322 | 322 | ||
323 | return (struct ocfs2_mem_dqinfo *)lockres->l_priv; | 323 | return (struct ocfs2_mem_dqinfo *)lockres->l_priv; |
324 | } | 324 | } |
325 | 325 | ||
326 | static inline struct ocfs2_refcount_tree * | 326 | static inline struct ocfs2_refcount_tree * |
327 | ocfs2_lock_res_refcount_tree(struct ocfs2_lock_res *res) | 327 | ocfs2_lock_res_refcount_tree(struct ocfs2_lock_res *res) |
328 | { | 328 | { |
329 | return container_of(res, struct ocfs2_refcount_tree, rf_lockres); | 329 | return container_of(res, struct ocfs2_refcount_tree, rf_lockres); |
330 | } | 330 | } |
331 | 331 | ||
332 | static inline struct ocfs2_super *ocfs2_get_lockres_osb(struct ocfs2_lock_res *lockres) | 332 | static inline struct ocfs2_super *ocfs2_get_lockres_osb(struct ocfs2_lock_res *lockres) |
333 | { | 333 | { |
334 | if (lockres->l_ops->get_osb) | 334 | if (lockres->l_ops->get_osb) |
335 | return lockres->l_ops->get_osb(lockres); | 335 | return lockres->l_ops->get_osb(lockres); |
336 | 336 | ||
337 | return (struct ocfs2_super *)lockres->l_priv; | 337 | return (struct ocfs2_super *)lockres->l_priv; |
338 | } | 338 | } |
339 | 339 | ||
340 | static int ocfs2_lock_create(struct ocfs2_super *osb, | 340 | static int ocfs2_lock_create(struct ocfs2_super *osb, |
341 | struct ocfs2_lock_res *lockres, | 341 | struct ocfs2_lock_res *lockres, |
342 | int level, | 342 | int level, |
343 | u32 dlm_flags); | 343 | u32 dlm_flags); |
344 | static inline int ocfs2_may_continue_on_blocked_lock(struct ocfs2_lock_res *lockres, | 344 | static inline int ocfs2_may_continue_on_blocked_lock(struct ocfs2_lock_res *lockres, |
345 | int wanted); | 345 | int wanted); |
346 | static void __ocfs2_cluster_unlock(struct ocfs2_super *osb, | 346 | static void __ocfs2_cluster_unlock(struct ocfs2_super *osb, |
347 | struct ocfs2_lock_res *lockres, | 347 | struct ocfs2_lock_res *lockres, |
348 | int level, unsigned long caller_ip); | 348 | int level, unsigned long caller_ip); |
349 | static inline void ocfs2_cluster_unlock(struct ocfs2_super *osb, | 349 | static inline void ocfs2_cluster_unlock(struct ocfs2_super *osb, |
350 | struct ocfs2_lock_res *lockres, | 350 | struct ocfs2_lock_res *lockres, |
351 | int level) | 351 | int level) |
352 | { | 352 | { |
353 | __ocfs2_cluster_unlock(osb, lockres, level, _RET_IP_); | 353 | __ocfs2_cluster_unlock(osb, lockres, level, _RET_IP_); |
354 | } | 354 | } |
355 | 355 | ||
356 | static inline void ocfs2_generic_handle_downconvert_action(struct ocfs2_lock_res *lockres); | 356 | static inline void ocfs2_generic_handle_downconvert_action(struct ocfs2_lock_res *lockres); |
357 | static inline void ocfs2_generic_handle_convert_action(struct ocfs2_lock_res *lockres); | 357 | static inline void ocfs2_generic_handle_convert_action(struct ocfs2_lock_res *lockres); |
358 | static inline void ocfs2_generic_handle_attach_action(struct ocfs2_lock_res *lockres); | 358 | static inline void ocfs2_generic_handle_attach_action(struct ocfs2_lock_res *lockres); |
359 | static int ocfs2_generic_handle_bast(struct ocfs2_lock_res *lockres, int level); | 359 | static int ocfs2_generic_handle_bast(struct ocfs2_lock_res *lockres, int level); |
360 | static void ocfs2_schedule_blocked_lock(struct ocfs2_super *osb, | 360 | static void ocfs2_schedule_blocked_lock(struct ocfs2_super *osb, |
361 | struct ocfs2_lock_res *lockres); | 361 | struct ocfs2_lock_res *lockres); |
362 | static inline void ocfs2_recover_from_dlm_error(struct ocfs2_lock_res *lockres, | 362 | static inline void ocfs2_recover_from_dlm_error(struct ocfs2_lock_res *lockres, |
363 | int convert); | 363 | int convert); |
364 | #define ocfs2_log_dlm_error(_func, _err, _lockres) do { \ | 364 | #define ocfs2_log_dlm_error(_func, _err, _lockres) do { \ |
365 | if ((_lockres)->l_type != OCFS2_LOCK_TYPE_DENTRY) \ | 365 | if ((_lockres)->l_type != OCFS2_LOCK_TYPE_DENTRY) \ |
366 | mlog(ML_ERROR, "DLM error %d while calling %s on resource %s\n", \ | 366 | mlog(ML_ERROR, "DLM error %d while calling %s on resource %s\n", \ |
367 | _err, _func, _lockres->l_name); \ | 367 | _err, _func, _lockres->l_name); \ |
368 | else \ | 368 | else \ |
369 | mlog(ML_ERROR, "DLM error %d while calling %s on resource %.*s%08x\n", \ | 369 | mlog(ML_ERROR, "DLM error %d while calling %s on resource %.*s%08x\n", \ |
370 | _err, _func, OCFS2_DENTRY_LOCK_INO_START - 1, (_lockres)->l_name, \ | 370 | _err, _func, OCFS2_DENTRY_LOCK_INO_START - 1, (_lockres)->l_name, \ |
371 | (unsigned int)ocfs2_get_dentry_lock_ino(_lockres)); \ | 371 | (unsigned int)ocfs2_get_dentry_lock_ino(_lockres)); \ |
372 | } while (0) | 372 | } while (0) |
373 | static int ocfs2_downconvert_thread(void *arg); | 373 | static int ocfs2_downconvert_thread(void *arg); |
374 | static void ocfs2_downconvert_on_unlock(struct ocfs2_super *osb, | 374 | static void ocfs2_downconvert_on_unlock(struct ocfs2_super *osb, |
375 | struct ocfs2_lock_res *lockres); | 375 | struct ocfs2_lock_res *lockres); |
376 | static int ocfs2_inode_lock_update(struct inode *inode, | 376 | static int ocfs2_inode_lock_update(struct inode *inode, |
377 | struct buffer_head **bh); | 377 | struct buffer_head **bh); |
378 | static void ocfs2_drop_osb_locks(struct ocfs2_super *osb); | 378 | static void ocfs2_drop_osb_locks(struct ocfs2_super *osb); |
379 | static inline int ocfs2_highest_compat_lock_level(int level); | 379 | static inline int ocfs2_highest_compat_lock_level(int level); |
380 | static unsigned int ocfs2_prepare_downconvert(struct ocfs2_lock_res *lockres, | 380 | static unsigned int ocfs2_prepare_downconvert(struct ocfs2_lock_res *lockres, |
381 | int new_level); | 381 | int new_level); |
382 | static int ocfs2_downconvert_lock(struct ocfs2_super *osb, | 382 | static int ocfs2_downconvert_lock(struct ocfs2_super *osb, |
383 | struct ocfs2_lock_res *lockres, | 383 | struct ocfs2_lock_res *lockres, |
384 | int new_level, | 384 | int new_level, |
385 | int lvb, | 385 | int lvb, |
386 | unsigned int generation); | 386 | unsigned int generation); |
387 | static int ocfs2_prepare_cancel_convert(struct ocfs2_super *osb, | 387 | static int ocfs2_prepare_cancel_convert(struct ocfs2_super *osb, |
388 | struct ocfs2_lock_res *lockres); | 388 | struct ocfs2_lock_res *lockres); |
389 | static int ocfs2_cancel_convert(struct ocfs2_super *osb, | 389 | static int ocfs2_cancel_convert(struct ocfs2_super *osb, |
390 | struct ocfs2_lock_res *lockres); | 390 | struct ocfs2_lock_res *lockres); |
391 | 391 | ||
392 | 392 | ||
393 | static void ocfs2_build_lock_name(enum ocfs2_lock_type type, | 393 | static void ocfs2_build_lock_name(enum ocfs2_lock_type type, |
394 | u64 blkno, | 394 | u64 blkno, |
395 | u32 generation, | 395 | u32 generation, |
396 | char *name) | 396 | char *name) |
397 | { | 397 | { |
398 | int len; | 398 | int len; |
399 | 399 | ||
400 | mlog_entry_void(); | 400 | mlog_entry_void(); |
401 | 401 | ||
402 | BUG_ON(type >= OCFS2_NUM_LOCK_TYPES); | 402 | BUG_ON(type >= OCFS2_NUM_LOCK_TYPES); |
403 | 403 | ||
404 | len = snprintf(name, OCFS2_LOCK_ID_MAX_LEN, "%c%s%016llx%08x", | 404 | len = snprintf(name, OCFS2_LOCK_ID_MAX_LEN, "%c%s%016llx%08x", |
405 | ocfs2_lock_type_char(type), OCFS2_LOCK_ID_PAD, | 405 | ocfs2_lock_type_char(type), OCFS2_LOCK_ID_PAD, |
406 | (long long)blkno, generation); | 406 | (long long)blkno, generation); |
407 | 407 | ||
408 | BUG_ON(len != (OCFS2_LOCK_ID_MAX_LEN - 1)); | 408 | BUG_ON(len != (OCFS2_LOCK_ID_MAX_LEN - 1)); |
409 | 409 | ||
410 | mlog(0, "built lock resource with name: %s\n", name); | 410 | mlog(0, "built lock resource with name: %s\n", name); |
411 | 411 | ||
412 | mlog_exit_void(); | 412 | mlog_exit_void(); |
413 | } | 413 | } |
414 | 414 | ||
415 | static DEFINE_SPINLOCK(ocfs2_dlm_tracking_lock); | 415 | static DEFINE_SPINLOCK(ocfs2_dlm_tracking_lock); |
416 | 416 | ||
417 | static void ocfs2_add_lockres_tracking(struct ocfs2_lock_res *res, | 417 | static void ocfs2_add_lockres_tracking(struct ocfs2_lock_res *res, |
418 | struct ocfs2_dlm_debug *dlm_debug) | 418 | struct ocfs2_dlm_debug *dlm_debug) |
419 | { | 419 | { |
420 | mlog(0, "Add tracking for lockres %s\n", res->l_name); | 420 | mlog(0, "Add tracking for lockres %s\n", res->l_name); |
421 | 421 | ||
422 | spin_lock(&ocfs2_dlm_tracking_lock); | 422 | spin_lock(&ocfs2_dlm_tracking_lock); |
423 | list_add(&res->l_debug_list, &dlm_debug->d_lockres_tracking); | 423 | list_add(&res->l_debug_list, &dlm_debug->d_lockres_tracking); |
424 | spin_unlock(&ocfs2_dlm_tracking_lock); | 424 | spin_unlock(&ocfs2_dlm_tracking_lock); |
425 | } | 425 | } |
426 | 426 | ||
427 | static void ocfs2_remove_lockres_tracking(struct ocfs2_lock_res *res) | 427 | static void ocfs2_remove_lockres_tracking(struct ocfs2_lock_res *res) |
428 | { | 428 | { |
429 | spin_lock(&ocfs2_dlm_tracking_lock); | 429 | spin_lock(&ocfs2_dlm_tracking_lock); |
430 | if (!list_empty(&res->l_debug_list)) | 430 | if (!list_empty(&res->l_debug_list)) |
431 | list_del_init(&res->l_debug_list); | 431 | list_del_init(&res->l_debug_list); |
432 | spin_unlock(&ocfs2_dlm_tracking_lock); | 432 | spin_unlock(&ocfs2_dlm_tracking_lock); |
433 | } | 433 | } |
434 | 434 | ||
435 | #ifdef CONFIG_OCFS2_FS_STATS | 435 | #ifdef CONFIG_OCFS2_FS_STATS |
436 | static void ocfs2_init_lock_stats(struct ocfs2_lock_res *res) | 436 | static void ocfs2_init_lock_stats(struct ocfs2_lock_res *res) |
437 | { | 437 | { |
438 | res->l_lock_num_prmode = 0; | ||
439 | res->l_lock_num_prmode_failed = 0; | ||
440 | res->l_lock_total_prmode = 0; | ||
441 | res->l_lock_max_prmode = 0; | ||
442 | res->l_lock_num_exmode = 0; | ||
443 | res->l_lock_num_exmode_failed = 0; | ||
444 | res->l_lock_total_exmode = 0; | ||
445 | res->l_lock_max_exmode = 0; | ||
446 | res->l_lock_refresh = 0; | 438 | res->l_lock_refresh = 0; |
439 | memset(&res->l_lock_prmode, 0, sizeof(struct ocfs2_lock_stats)); | ||
440 | memset(&res->l_lock_exmode, 0, sizeof(struct ocfs2_lock_stats)); | ||
447 | } | 441 | } |
448 | 442 | ||
449 | static void ocfs2_update_lock_stats(struct ocfs2_lock_res *res, int level, | 443 | static void ocfs2_update_lock_stats(struct ocfs2_lock_res *res, int level, |
450 | struct ocfs2_mask_waiter *mw, int ret) | 444 | struct ocfs2_mask_waiter *mw, int ret) |
451 | { | 445 | { |
452 | unsigned long long *num, *sum; | 446 | u32 usec; |
453 | unsigned int *max, *failed; | 447 | ktime_t kt; |
454 | struct timespec ts = current_kernel_time(); | 448 | struct ocfs2_lock_stats *stats; |
455 | unsigned long long time = timespec_to_ns(&ts) - mw->mw_lock_start; | ||
456 | 449 | ||
457 | if (level == LKM_PRMODE) { | 450 | if (level == LKM_PRMODE) |
458 | num = &res->l_lock_num_prmode; | 451 | stats = &res->l_lock_prmode; |
459 | sum = &res->l_lock_total_prmode; | 452 | else if (level == LKM_EXMODE) |
460 | max = &res->l_lock_max_prmode; | 453 | stats = &res->l_lock_exmode; |
461 | failed = &res->l_lock_num_prmode_failed; | 454 | else |
462 | } else if (level == LKM_EXMODE) { | ||
463 | num = &res->l_lock_num_exmode; | ||
464 | sum = &res->l_lock_total_exmode; | ||
465 | max = &res->l_lock_max_exmode; | ||
466 | failed = &res->l_lock_num_exmode_failed; | ||
467 | } else | ||
468 | return; | 455 | return; |
469 | 456 | ||
470 | (*num)++; | 457 | kt = ktime_sub(ktime_get(), mw->mw_lock_start); |
471 | (*sum) += time; | 458 | usec = ktime_to_us(kt); |
472 | if (time > *max) | 459 | |
473 | *max = time; | 460 | stats->ls_gets++; |
461 | stats->ls_total += ktime_to_ns(kt); | ||
462 | /* overflow */ | ||
463 | if (unlikely(stats->ls_gets) == 0) { | ||
464 | stats->ls_gets++; | ||
465 | stats->ls_total = ktime_to_ns(kt); | ||
466 | } | ||
467 | |||
468 | if (stats->ls_max < usec) | ||
469 | stats->ls_max = usec; | ||
470 | |||
474 | if (ret) | 471 | if (ret) |
475 | (*failed)++; | 472 | stats->ls_fail++; |
476 | } | 473 | } |
477 | 474 | ||
478 | static inline void ocfs2_track_lock_refresh(struct ocfs2_lock_res *lockres) | 475 | static inline void ocfs2_track_lock_refresh(struct ocfs2_lock_res *lockres) |
479 | { | 476 | { |
480 | lockres->l_lock_refresh++; | 477 | lockres->l_lock_refresh++; |
481 | } | 478 | } |
482 | 479 | ||
483 | static inline void ocfs2_init_start_time(struct ocfs2_mask_waiter *mw) | 480 | static inline void ocfs2_init_start_time(struct ocfs2_mask_waiter *mw) |
484 | { | 481 | { |
485 | struct timespec ts = current_kernel_time(); | 482 | mw->mw_lock_start = ktime_get(); |
486 | mw->mw_lock_start = timespec_to_ns(&ts); | ||
487 | } | 483 | } |
488 | #else | 484 | #else |
489 | static inline void ocfs2_init_lock_stats(struct ocfs2_lock_res *res) | 485 | static inline void ocfs2_init_lock_stats(struct ocfs2_lock_res *res) |
490 | { | 486 | { |
491 | } | 487 | } |
492 | static inline void ocfs2_update_lock_stats(struct ocfs2_lock_res *res, | 488 | static inline void ocfs2_update_lock_stats(struct ocfs2_lock_res *res, |
493 | int level, struct ocfs2_mask_waiter *mw, int ret) | 489 | int level, struct ocfs2_mask_waiter *mw, int ret) |
494 | { | 490 | { |
495 | } | 491 | } |
496 | static inline void ocfs2_track_lock_refresh(struct ocfs2_lock_res *lockres) | 492 | static inline void ocfs2_track_lock_refresh(struct ocfs2_lock_res *lockres) |
497 | { | 493 | { |
498 | } | 494 | } |
499 | static inline void ocfs2_init_start_time(struct ocfs2_mask_waiter *mw) | 495 | static inline void ocfs2_init_start_time(struct ocfs2_mask_waiter *mw) |
500 | { | 496 | { |
501 | } | 497 | } |
502 | #endif | 498 | #endif |
503 | 499 | ||
504 | static void ocfs2_lock_res_init_common(struct ocfs2_super *osb, | 500 | static void ocfs2_lock_res_init_common(struct ocfs2_super *osb, |
505 | struct ocfs2_lock_res *res, | 501 | struct ocfs2_lock_res *res, |
506 | enum ocfs2_lock_type type, | 502 | enum ocfs2_lock_type type, |
507 | struct ocfs2_lock_res_ops *ops, | 503 | struct ocfs2_lock_res_ops *ops, |
508 | void *priv) | 504 | void *priv) |
509 | { | 505 | { |
510 | res->l_type = type; | 506 | res->l_type = type; |
511 | res->l_ops = ops; | 507 | res->l_ops = ops; |
512 | res->l_priv = priv; | 508 | res->l_priv = priv; |
513 | 509 | ||
514 | res->l_level = DLM_LOCK_IV; | 510 | res->l_level = DLM_LOCK_IV; |
515 | res->l_requested = DLM_LOCK_IV; | 511 | res->l_requested = DLM_LOCK_IV; |
516 | res->l_blocking = DLM_LOCK_IV; | 512 | res->l_blocking = DLM_LOCK_IV; |
517 | res->l_action = OCFS2_AST_INVALID; | 513 | res->l_action = OCFS2_AST_INVALID; |
518 | res->l_unlock_action = OCFS2_UNLOCK_INVALID; | 514 | res->l_unlock_action = OCFS2_UNLOCK_INVALID; |
519 | 515 | ||
520 | res->l_flags = OCFS2_LOCK_INITIALIZED; | 516 | res->l_flags = OCFS2_LOCK_INITIALIZED; |
521 | 517 | ||
522 | ocfs2_add_lockres_tracking(res, osb->osb_dlm_debug); | 518 | ocfs2_add_lockres_tracking(res, osb->osb_dlm_debug); |
523 | 519 | ||
524 | ocfs2_init_lock_stats(res); | 520 | ocfs2_init_lock_stats(res); |
525 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | 521 | #ifdef CONFIG_DEBUG_LOCK_ALLOC |
526 | if (type != OCFS2_LOCK_TYPE_OPEN) | 522 | if (type != OCFS2_LOCK_TYPE_OPEN) |
527 | lockdep_init_map(&res->l_lockdep_map, ocfs2_lock_type_strings[type], | 523 | lockdep_init_map(&res->l_lockdep_map, ocfs2_lock_type_strings[type], |
528 | &lockdep_keys[type], 0); | 524 | &lockdep_keys[type], 0); |
529 | else | 525 | else |
530 | res->l_lockdep_map.key = NULL; | 526 | res->l_lockdep_map.key = NULL; |
531 | #endif | 527 | #endif |
532 | } | 528 | } |
533 | 529 | ||
534 | void ocfs2_lock_res_init_once(struct ocfs2_lock_res *res) | 530 | void ocfs2_lock_res_init_once(struct ocfs2_lock_res *res) |
535 | { | 531 | { |
536 | /* This also clears out the lock status block */ | 532 | /* This also clears out the lock status block */ |
537 | memset(res, 0, sizeof(struct ocfs2_lock_res)); | 533 | memset(res, 0, sizeof(struct ocfs2_lock_res)); |
538 | spin_lock_init(&res->l_lock); | 534 | spin_lock_init(&res->l_lock); |
539 | init_waitqueue_head(&res->l_event); | 535 | init_waitqueue_head(&res->l_event); |
540 | INIT_LIST_HEAD(&res->l_blocked_list); | 536 | INIT_LIST_HEAD(&res->l_blocked_list); |
541 | INIT_LIST_HEAD(&res->l_mask_waiters); | 537 | INIT_LIST_HEAD(&res->l_mask_waiters); |
542 | } | 538 | } |
543 | 539 | ||
544 | void ocfs2_inode_lock_res_init(struct ocfs2_lock_res *res, | 540 | void ocfs2_inode_lock_res_init(struct ocfs2_lock_res *res, |
545 | enum ocfs2_lock_type type, | 541 | enum ocfs2_lock_type type, |
546 | unsigned int generation, | 542 | unsigned int generation, |
547 | struct inode *inode) | 543 | struct inode *inode) |
548 | { | 544 | { |
549 | struct ocfs2_lock_res_ops *ops; | 545 | struct ocfs2_lock_res_ops *ops; |
550 | 546 | ||
551 | switch(type) { | 547 | switch(type) { |
552 | case OCFS2_LOCK_TYPE_RW: | 548 | case OCFS2_LOCK_TYPE_RW: |
553 | ops = &ocfs2_inode_rw_lops; | 549 | ops = &ocfs2_inode_rw_lops; |
554 | break; | 550 | break; |
555 | case OCFS2_LOCK_TYPE_META: | 551 | case OCFS2_LOCK_TYPE_META: |
556 | ops = &ocfs2_inode_inode_lops; | 552 | ops = &ocfs2_inode_inode_lops; |
557 | break; | 553 | break; |
558 | case OCFS2_LOCK_TYPE_OPEN: | 554 | case OCFS2_LOCK_TYPE_OPEN: |
559 | ops = &ocfs2_inode_open_lops; | 555 | ops = &ocfs2_inode_open_lops; |
560 | break; | 556 | break; |
561 | default: | 557 | default: |
562 | mlog_bug_on_msg(1, "type: %d\n", type); | 558 | mlog_bug_on_msg(1, "type: %d\n", type); |
563 | ops = NULL; /* thanks, gcc */ | 559 | ops = NULL; /* thanks, gcc */ |
564 | break; | 560 | break; |
565 | }; | 561 | }; |
566 | 562 | ||
567 | ocfs2_build_lock_name(type, OCFS2_I(inode)->ip_blkno, | 563 | ocfs2_build_lock_name(type, OCFS2_I(inode)->ip_blkno, |
568 | generation, res->l_name); | 564 | generation, res->l_name); |
569 | ocfs2_lock_res_init_common(OCFS2_SB(inode->i_sb), res, type, ops, inode); | 565 | ocfs2_lock_res_init_common(OCFS2_SB(inode->i_sb), res, type, ops, inode); |
570 | } | 566 | } |
571 | 567 | ||
572 | static struct ocfs2_super *ocfs2_get_inode_osb(struct ocfs2_lock_res *lockres) | 568 | static struct ocfs2_super *ocfs2_get_inode_osb(struct ocfs2_lock_res *lockres) |
573 | { | 569 | { |
574 | struct inode *inode = ocfs2_lock_res_inode(lockres); | 570 | struct inode *inode = ocfs2_lock_res_inode(lockres); |
575 | 571 | ||
576 | return OCFS2_SB(inode->i_sb); | 572 | return OCFS2_SB(inode->i_sb); |
577 | } | 573 | } |
578 | 574 | ||
579 | static struct ocfs2_super *ocfs2_get_qinfo_osb(struct ocfs2_lock_res *lockres) | 575 | static struct ocfs2_super *ocfs2_get_qinfo_osb(struct ocfs2_lock_res *lockres) |
580 | { | 576 | { |
581 | struct ocfs2_mem_dqinfo *info = lockres->l_priv; | 577 | struct ocfs2_mem_dqinfo *info = lockres->l_priv; |
582 | 578 | ||
583 | return OCFS2_SB(info->dqi_gi.dqi_sb); | 579 | return OCFS2_SB(info->dqi_gi.dqi_sb); |
584 | } | 580 | } |
585 | 581 | ||
586 | static struct ocfs2_super *ocfs2_get_file_osb(struct ocfs2_lock_res *lockres) | 582 | static struct ocfs2_super *ocfs2_get_file_osb(struct ocfs2_lock_res *lockres) |
587 | { | 583 | { |
588 | struct ocfs2_file_private *fp = lockres->l_priv; | 584 | struct ocfs2_file_private *fp = lockres->l_priv; |
589 | 585 | ||
590 | return OCFS2_SB(fp->fp_file->f_mapping->host->i_sb); | 586 | return OCFS2_SB(fp->fp_file->f_mapping->host->i_sb); |
591 | } | 587 | } |
592 | 588 | ||
593 | static __u64 ocfs2_get_dentry_lock_ino(struct ocfs2_lock_res *lockres) | 589 | static __u64 ocfs2_get_dentry_lock_ino(struct ocfs2_lock_res *lockres) |
594 | { | 590 | { |
595 | __be64 inode_blkno_be; | 591 | __be64 inode_blkno_be; |
596 | 592 | ||
597 | memcpy(&inode_blkno_be, &lockres->l_name[OCFS2_DENTRY_LOCK_INO_START], | 593 | memcpy(&inode_blkno_be, &lockres->l_name[OCFS2_DENTRY_LOCK_INO_START], |
598 | sizeof(__be64)); | 594 | sizeof(__be64)); |
599 | 595 | ||
600 | return be64_to_cpu(inode_blkno_be); | 596 | return be64_to_cpu(inode_blkno_be); |
601 | } | 597 | } |
602 | 598 | ||
603 | static struct ocfs2_super *ocfs2_get_dentry_osb(struct ocfs2_lock_res *lockres) | 599 | static struct ocfs2_super *ocfs2_get_dentry_osb(struct ocfs2_lock_res *lockres) |
604 | { | 600 | { |
605 | struct ocfs2_dentry_lock *dl = lockres->l_priv; | 601 | struct ocfs2_dentry_lock *dl = lockres->l_priv; |
606 | 602 | ||
607 | return OCFS2_SB(dl->dl_inode->i_sb); | 603 | return OCFS2_SB(dl->dl_inode->i_sb); |
608 | } | 604 | } |
609 | 605 | ||
610 | void ocfs2_dentry_lock_res_init(struct ocfs2_dentry_lock *dl, | 606 | void ocfs2_dentry_lock_res_init(struct ocfs2_dentry_lock *dl, |
611 | u64 parent, struct inode *inode) | 607 | u64 parent, struct inode *inode) |
612 | { | 608 | { |
613 | int len; | 609 | int len; |
614 | u64 inode_blkno = OCFS2_I(inode)->ip_blkno; | 610 | u64 inode_blkno = OCFS2_I(inode)->ip_blkno; |
615 | __be64 inode_blkno_be = cpu_to_be64(inode_blkno); | 611 | __be64 inode_blkno_be = cpu_to_be64(inode_blkno); |
616 | struct ocfs2_lock_res *lockres = &dl->dl_lockres; | 612 | struct ocfs2_lock_res *lockres = &dl->dl_lockres; |
617 | 613 | ||
618 | ocfs2_lock_res_init_once(lockres); | 614 | ocfs2_lock_res_init_once(lockres); |
619 | 615 | ||
620 | /* | 616 | /* |
621 | * Unfortunately, the standard lock naming scheme won't work | 617 | * Unfortunately, the standard lock naming scheme won't work |
622 | * here because we have two 16 byte values to use. Instead, | 618 | * here because we have two 16 byte values to use. Instead, |
623 | * we'll stuff the inode number as a binary value. We still | 619 | * we'll stuff the inode number as a binary value. We still |
624 | * want error prints to show something without garbling the | 620 | * want error prints to show something without garbling the |
625 | * display, so drop a null byte in there before the inode | 621 | * display, so drop a null byte in there before the inode |
626 | * number. A future version of OCFS2 will likely use all | 622 | * number. A future version of OCFS2 will likely use all |
627 | * binary lock names. The stringified names have been a | 623 | * binary lock names. The stringified names have been a |
628 | * tremendous aid in debugging, but now that the debugfs | 624 | * tremendous aid in debugging, but now that the debugfs |
629 | * interface exists, we can mangle things there if need be. | 625 | * interface exists, we can mangle things there if need be. |
630 | * | 626 | * |
631 | * NOTE: We also drop the standard "pad" value (the total lock | 627 | * NOTE: We also drop the standard "pad" value (the total lock |
632 | * name size stays the same though - the last part is all | 628 | * name size stays the same though - the last part is all |
633 | * zeros due to the memset in ocfs2_lock_res_init_once() | 629 | * zeros due to the memset in ocfs2_lock_res_init_once() |
634 | */ | 630 | */ |
635 | len = snprintf(lockres->l_name, OCFS2_DENTRY_LOCK_INO_START, | 631 | len = snprintf(lockres->l_name, OCFS2_DENTRY_LOCK_INO_START, |
636 | "%c%016llx", | 632 | "%c%016llx", |
637 | ocfs2_lock_type_char(OCFS2_LOCK_TYPE_DENTRY), | 633 | ocfs2_lock_type_char(OCFS2_LOCK_TYPE_DENTRY), |
638 | (long long)parent); | 634 | (long long)parent); |
639 | 635 | ||
640 | BUG_ON(len != (OCFS2_DENTRY_LOCK_INO_START - 1)); | 636 | BUG_ON(len != (OCFS2_DENTRY_LOCK_INO_START - 1)); |
641 | 637 | ||
642 | memcpy(&lockres->l_name[OCFS2_DENTRY_LOCK_INO_START], &inode_blkno_be, | 638 | memcpy(&lockres->l_name[OCFS2_DENTRY_LOCK_INO_START], &inode_blkno_be, |
643 | sizeof(__be64)); | 639 | sizeof(__be64)); |
644 | 640 | ||
645 | ocfs2_lock_res_init_common(OCFS2_SB(inode->i_sb), lockres, | 641 | ocfs2_lock_res_init_common(OCFS2_SB(inode->i_sb), lockres, |
646 | OCFS2_LOCK_TYPE_DENTRY, &ocfs2_dentry_lops, | 642 | OCFS2_LOCK_TYPE_DENTRY, &ocfs2_dentry_lops, |
647 | dl); | 643 | dl); |
648 | } | 644 | } |
649 | 645 | ||
650 | static void ocfs2_super_lock_res_init(struct ocfs2_lock_res *res, | 646 | static void ocfs2_super_lock_res_init(struct ocfs2_lock_res *res, |
651 | struct ocfs2_super *osb) | 647 | struct ocfs2_super *osb) |
652 | { | 648 | { |
653 | /* Superblock lockres doesn't come from a slab so we call init | 649 | /* Superblock lockres doesn't come from a slab so we call init |
654 | * once on it manually. */ | 650 | * once on it manually. */ |
655 | ocfs2_lock_res_init_once(res); | 651 | ocfs2_lock_res_init_once(res); |
656 | ocfs2_build_lock_name(OCFS2_LOCK_TYPE_SUPER, OCFS2_SUPER_BLOCK_BLKNO, | 652 | ocfs2_build_lock_name(OCFS2_LOCK_TYPE_SUPER, OCFS2_SUPER_BLOCK_BLKNO, |
657 | 0, res->l_name); | 653 | 0, res->l_name); |
658 | ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_SUPER, | 654 | ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_SUPER, |
659 | &ocfs2_super_lops, osb); | 655 | &ocfs2_super_lops, osb); |
660 | } | 656 | } |
661 | 657 | ||
662 | static void ocfs2_rename_lock_res_init(struct ocfs2_lock_res *res, | 658 | static void ocfs2_rename_lock_res_init(struct ocfs2_lock_res *res, |
663 | struct ocfs2_super *osb) | 659 | struct ocfs2_super *osb) |
664 | { | 660 | { |
665 | /* Rename lockres doesn't come from a slab so we call init | 661 | /* Rename lockres doesn't come from a slab so we call init |
666 | * once on it manually. */ | 662 | * once on it manually. */ |
667 | ocfs2_lock_res_init_once(res); | 663 | ocfs2_lock_res_init_once(res); |
668 | ocfs2_build_lock_name(OCFS2_LOCK_TYPE_RENAME, 0, 0, res->l_name); | 664 | ocfs2_build_lock_name(OCFS2_LOCK_TYPE_RENAME, 0, 0, res->l_name); |
669 | ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_RENAME, | 665 | ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_RENAME, |
670 | &ocfs2_rename_lops, osb); | 666 | &ocfs2_rename_lops, osb); |
671 | } | 667 | } |
672 | 668 | ||
673 | static void ocfs2_nfs_sync_lock_res_init(struct ocfs2_lock_res *res, | 669 | static void ocfs2_nfs_sync_lock_res_init(struct ocfs2_lock_res *res, |
674 | struct ocfs2_super *osb) | 670 | struct ocfs2_super *osb) |
675 | { | 671 | { |
676 | /* nfs_sync lockres doesn't come from a slab so we call init | 672 | /* nfs_sync lockres doesn't come from a slab so we call init |
677 | * once on it manually. */ | 673 | * once on it manually. */ |
678 | ocfs2_lock_res_init_once(res); | 674 | ocfs2_lock_res_init_once(res); |
679 | ocfs2_build_lock_name(OCFS2_LOCK_TYPE_NFS_SYNC, 0, 0, res->l_name); | 675 | ocfs2_build_lock_name(OCFS2_LOCK_TYPE_NFS_SYNC, 0, 0, res->l_name); |
680 | ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_NFS_SYNC, | 676 | ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_NFS_SYNC, |
681 | &ocfs2_nfs_sync_lops, osb); | 677 | &ocfs2_nfs_sync_lops, osb); |
682 | } | 678 | } |
683 | 679 | ||
684 | static void ocfs2_orphan_scan_lock_res_init(struct ocfs2_lock_res *res, | 680 | static void ocfs2_orphan_scan_lock_res_init(struct ocfs2_lock_res *res, |
685 | struct ocfs2_super *osb) | 681 | struct ocfs2_super *osb) |
686 | { | 682 | { |
687 | ocfs2_lock_res_init_once(res); | 683 | ocfs2_lock_res_init_once(res); |
688 | ocfs2_build_lock_name(OCFS2_LOCK_TYPE_ORPHAN_SCAN, 0, 0, res->l_name); | 684 | ocfs2_build_lock_name(OCFS2_LOCK_TYPE_ORPHAN_SCAN, 0, 0, res->l_name); |
689 | ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_ORPHAN_SCAN, | 685 | ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_ORPHAN_SCAN, |
690 | &ocfs2_orphan_scan_lops, osb); | 686 | &ocfs2_orphan_scan_lops, osb); |
691 | } | 687 | } |
692 | 688 | ||
693 | void ocfs2_file_lock_res_init(struct ocfs2_lock_res *lockres, | 689 | void ocfs2_file_lock_res_init(struct ocfs2_lock_res *lockres, |
694 | struct ocfs2_file_private *fp) | 690 | struct ocfs2_file_private *fp) |
695 | { | 691 | { |
696 | struct inode *inode = fp->fp_file->f_mapping->host; | 692 | struct inode *inode = fp->fp_file->f_mapping->host; |
697 | struct ocfs2_inode_info *oi = OCFS2_I(inode); | 693 | struct ocfs2_inode_info *oi = OCFS2_I(inode); |
698 | 694 | ||
699 | ocfs2_lock_res_init_once(lockres); | 695 | ocfs2_lock_res_init_once(lockres); |
700 | ocfs2_build_lock_name(OCFS2_LOCK_TYPE_FLOCK, oi->ip_blkno, | 696 | ocfs2_build_lock_name(OCFS2_LOCK_TYPE_FLOCK, oi->ip_blkno, |
701 | inode->i_generation, lockres->l_name); | 697 | inode->i_generation, lockres->l_name); |
702 | ocfs2_lock_res_init_common(OCFS2_SB(inode->i_sb), lockres, | 698 | ocfs2_lock_res_init_common(OCFS2_SB(inode->i_sb), lockres, |
703 | OCFS2_LOCK_TYPE_FLOCK, &ocfs2_flock_lops, | 699 | OCFS2_LOCK_TYPE_FLOCK, &ocfs2_flock_lops, |
704 | fp); | 700 | fp); |
705 | lockres->l_flags |= OCFS2_LOCK_NOCACHE; | 701 | lockres->l_flags |= OCFS2_LOCK_NOCACHE; |
706 | } | 702 | } |
707 | 703 | ||
708 | void ocfs2_qinfo_lock_res_init(struct ocfs2_lock_res *lockres, | 704 | void ocfs2_qinfo_lock_res_init(struct ocfs2_lock_res *lockres, |
709 | struct ocfs2_mem_dqinfo *info) | 705 | struct ocfs2_mem_dqinfo *info) |
710 | { | 706 | { |
711 | ocfs2_lock_res_init_once(lockres); | 707 | ocfs2_lock_res_init_once(lockres); |
712 | ocfs2_build_lock_name(OCFS2_LOCK_TYPE_QINFO, info->dqi_gi.dqi_type, | 708 | ocfs2_build_lock_name(OCFS2_LOCK_TYPE_QINFO, info->dqi_gi.dqi_type, |
713 | 0, lockres->l_name); | 709 | 0, lockres->l_name); |
714 | ocfs2_lock_res_init_common(OCFS2_SB(info->dqi_gi.dqi_sb), lockres, | 710 | ocfs2_lock_res_init_common(OCFS2_SB(info->dqi_gi.dqi_sb), lockres, |
715 | OCFS2_LOCK_TYPE_QINFO, &ocfs2_qinfo_lops, | 711 | OCFS2_LOCK_TYPE_QINFO, &ocfs2_qinfo_lops, |
716 | info); | 712 | info); |
717 | } | 713 | } |
718 | 714 | ||
719 | void ocfs2_refcount_lock_res_init(struct ocfs2_lock_res *lockres, | 715 | void ocfs2_refcount_lock_res_init(struct ocfs2_lock_res *lockres, |
720 | struct ocfs2_super *osb, u64 ref_blkno, | 716 | struct ocfs2_super *osb, u64 ref_blkno, |
721 | unsigned int generation) | 717 | unsigned int generation) |
722 | { | 718 | { |
723 | ocfs2_lock_res_init_once(lockres); | 719 | ocfs2_lock_res_init_once(lockres); |
724 | ocfs2_build_lock_name(OCFS2_LOCK_TYPE_REFCOUNT, ref_blkno, | 720 | ocfs2_build_lock_name(OCFS2_LOCK_TYPE_REFCOUNT, ref_blkno, |
725 | generation, lockres->l_name); | 721 | generation, lockres->l_name); |
726 | ocfs2_lock_res_init_common(osb, lockres, OCFS2_LOCK_TYPE_REFCOUNT, | 722 | ocfs2_lock_res_init_common(osb, lockres, OCFS2_LOCK_TYPE_REFCOUNT, |
727 | &ocfs2_refcount_block_lops, osb); | 723 | &ocfs2_refcount_block_lops, osb); |
728 | } | 724 | } |
729 | 725 | ||
730 | void ocfs2_lock_res_free(struct ocfs2_lock_res *res) | 726 | void ocfs2_lock_res_free(struct ocfs2_lock_res *res) |
731 | { | 727 | { |
732 | mlog_entry_void(); | 728 | mlog_entry_void(); |
733 | 729 | ||
734 | if (!(res->l_flags & OCFS2_LOCK_INITIALIZED)) | 730 | if (!(res->l_flags & OCFS2_LOCK_INITIALIZED)) |
735 | return; | 731 | return; |
736 | 732 | ||
737 | ocfs2_remove_lockres_tracking(res); | 733 | ocfs2_remove_lockres_tracking(res); |
738 | 734 | ||
739 | mlog_bug_on_msg(!list_empty(&res->l_blocked_list), | 735 | mlog_bug_on_msg(!list_empty(&res->l_blocked_list), |
740 | "Lockres %s is on the blocked list\n", | 736 | "Lockres %s is on the blocked list\n", |
741 | res->l_name); | 737 | res->l_name); |
742 | mlog_bug_on_msg(!list_empty(&res->l_mask_waiters), | 738 | mlog_bug_on_msg(!list_empty(&res->l_mask_waiters), |
743 | "Lockres %s has mask waiters pending\n", | 739 | "Lockres %s has mask waiters pending\n", |
744 | res->l_name); | 740 | res->l_name); |
745 | mlog_bug_on_msg(spin_is_locked(&res->l_lock), | 741 | mlog_bug_on_msg(spin_is_locked(&res->l_lock), |
746 | "Lockres %s is locked\n", | 742 | "Lockres %s is locked\n", |
747 | res->l_name); | 743 | res->l_name); |
748 | mlog_bug_on_msg(res->l_ro_holders, | 744 | mlog_bug_on_msg(res->l_ro_holders, |
749 | "Lockres %s has %u ro holders\n", | 745 | "Lockres %s has %u ro holders\n", |
750 | res->l_name, res->l_ro_holders); | 746 | res->l_name, res->l_ro_holders); |
751 | mlog_bug_on_msg(res->l_ex_holders, | 747 | mlog_bug_on_msg(res->l_ex_holders, |
752 | "Lockres %s has %u ex holders\n", | 748 | "Lockres %s has %u ex holders\n", |
753 | res->l_name, res->l_ex_holders); | 749 | res->l_name, res->l_ex_holders); |
754 | 750 | ||
755 | /* Need to clear out the lock status block for the dlm */ | 751 | /* Need to clear out the lock status block for the dlm */ |
756 | memset(&res->l_lksb, 0, sizeof(res->l_lksb)); | 752 | memset(&res->l_lksb, 0, sizeof(res->l_lksb)); |
757 | 753 | ||
758 | res->l_flags = 0UL; | 754 | res->l_flags = 0UL; |
759 | mlog_exit_void(); | 755 | mlog_exit_void(); |
760 | } | 756 | } |
761 | 757 | ||
762 | static inline void ocfs2_inc_holders(struct ocfs2_lock_res *lockres, | 758 | static inline void ocfs2_inc_holders(struct ocfs2_lock_res *lockres, |
763 | int level) | 759 | int level) |
764 | { | 760 | { |
765 | mlog_entry_void(); | 761 | mlog_entry_void(); |
766 | 762 | ||
767 | BUG_ON(!lockres); | 763 | BUG_ON(!lockres); |
768 | 764 | ||
769 | switch(level) { | 765 | switch(level) { |
770 | case DLM_LOCK_EX: | 766 | case DLM_LOCK_EX: |
771 | lockres->l_ex_holders++; | 767 | lockres->l_ex_holders++; |
772 | break; | 768 | break; |
773 | case DLM_LOCK_PR: | 769 | case DLM_LOCK_PR: |
774 | lockres->l_ro_holders++; | 770 | lockres->l_ro_holders++; |
775 | break; | 771 | break; |
776 | default: | 772 | default: |
777 | BUG(); | 773 | BUG(); |
778 | } | 774 | } |
779 | 775 | ||
780 | mlog_exit_void(); | 776 | mlog_exit_void(); |
781 | } | 777 | } |
782 | 778 | ||
783 | static inline void ocfs2_dec_holders(struct ocfs2_lock_res *lockres, | 779 | static inline void ocfs2_dec_holders(struct ocfs2_lock_res *lockres, |
784 | int level) | 780 | int level) |
785 | { | 781 | { |
786 | mlog_entry_void(); | 782 | mlog_entry_void(); |
787 | 783 | ||
788 | BUG_ON(!lockres); | 784 | BUG_ON(!lockres); |
789 | 785 | ||
790 | switch(level) { | 786 | switch(level) { |
791 | case DLM_LOCK_EX: | 787 | case DLM_LOCK_EX: |
792 | BUG_ON(!lockres->l_ex_holders); | 788 | BUG_ON(!lockres->l_ex_holders); |
793 | lockres->l_ex_holders--; | 789 | lockres->l_ex_holders--; |
794 | break; | 790 | break; |
795 | case DLM_LOCK_PR: | 791 | case DLM_LOCK_PR: |
796 | BUG_ON(!lockres->l_ro_holders); | 792 | BUG_ON(!lockres->l_ro_holders); |
797 | lockres->l_ro_holders--; | 793 | lockres->l_ro_holders--; |
798 | break; | 794 | break; |
799 | default: | 795 | default: |
800 | BUG(); | 796 | BUG(); |
801 | } | 797 | } |
802 | mlog_exit_void(); | 798 | mlog_exit_void(); |
803 | } | 799 | } |
804 | 800 | ||
805 | /* WARNING: This function lives in a world where the only three lock | 801 | /* WARNING: This function lives in a world where the only three lock |
806 | * levels are EX, PR, and NL. It *will* have to be adjusted when more | 802 | * levels are EX, PR, and NL. It *will* have to be adjusted when more |
807 | * lock types are added. */ | 803 | * lock types are added. */ |
808 | static inline int ocfs2_highest_compat_lock_level(int level) | 804 | static inline int ocfs2_highest_compat_lock_level(int level) |
809 | { | 805 | { |
810 | int new_level = DLM_LOCK_EX; | 806 | int new_level = DLM_LOCK_EX; |
811 | 807 | ||
812 | if (level == DLM_LOCK_EX) | 808 | if (level == DLM_LOCK_EX) |
813 | new_level = DLM_LOCK_NL; | 809 | new_level = DLM_LOCK_NL; |
814 | else if (level == DLM_LOCK_PR) | 810 | else if (level == DLM_LOCK_PR) |
815 | new_level = DLM_LOCK_PR; | 811 | new_level = DLM_LOCK_PR; |
816 | return new_level; | 812 | return new_level; |
817 | } | 813 | } |
818 | 814 | ||
819 | static void lockres_set_flags(struct ocfs2_lock_res *lockres, | 815 | static void lockres_set_flags(struct ocfs2_lock_res *lockres, |
820 | unsigned long newflags) | 816 | unsigned long newflags) |
821 | { | 817 | { |
822 | struct ocfs2_mask_waiter *mw, *tmp; | 818 | struct ocfs2_mask_waiter *mw, *tmp; |
823 | 819 | ||
824 | assert_spin_locked(&lockres->l_lock); | 820 | assert_spin_locked(&lockres->l_lock); |
825 | 821 | ||
826 | lockres->l_flags = newflags; | 822 | lockres->l_flags = newflags; |
827 | 823 | ||
828 | list_for_each_entry_safe(mw, tmp, &lockres->l_mask_waiters, mw_item) { | 824 | list_for_each_entry_safe(mw, tmp, &lockres->l_mask_waiters, mw_item) { |
829 | if ((lockres->l_flags & mw->mw_mask) != mw->mw_goal) | 825 | if ((lockres->l_flags & mw->mw_mask) != mw->mw_goal) |
830 | continue; | 826 | continue; |
831 | 827 | ||
832 | list_del_init(&mw->mw_item); | 828 | list_del_init(&mw->mw_item); |
833 | mw->mw_status = 0; | 829 | mw->mw_status = 0; |
834 | complete(&mw->mw_complete); | 830 | complete(&mw->mw_complete); |
835 | } | 831 | } |
836 | } | 832 | } |
837 | static void lockres_or_flags(struct ocfs2_lock_res *lockres, unsigned long or) | 833 | static void lockres_or_flags(struct ocfs2_lock_res *lockres, unsigned long or) |
838 | { | 834 | { |
839 | lockres_set_flags(lockres, lockres->l_flags | or); | 835 | lockres_set_flags(lockres, lockres->l_flags | or); |
840 | } | 836 | } |
841 | static void lockres_clear_flags(struct ocfs2_lock_res *lockres, | 837 | static void lockres_clear_flags(struct ocfs2_lock_res *lockres, |
842 | unsigned long clear) | 838 | unsigned long clear) |
843 | { | 839 | { |
844 | lockres_set_flags(lockres, lockres->l_flags & ~clear); | 840 | lockres_set_flags(lockres, lockres->l_flags & ~clear); |
845 | } | 841 | } |
846 | 842 | ||
847 | static inline void ocfs2_generic_handle_downconvert_action(struct ocfs2_lock_res *lockres) | 843 | static inline void ocfs2_generic_handle_downconvert_action(struct ocfs2_lock_res *lockres) |
848 | { | 844 | { |
849 | mlog_entry_void(); | 845 | mlog_entry_void(); |
850 | 846 | ||
851 | BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BUSY)); | 847 | BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BUSY)); |
852 | BUG_ON(!(lockres->l_flags & OCFS2_LOCK_ATTACHED)); | 848 | BUG_ON(!(lockres->l_flags & OCFS2_LOCK_ATTACHED)); |
853 | BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BLOCKED)); | 849 | BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BLOCKED)); |
854 | BUG_ON(lockres->l_blocking <= DLM_LOCK_NL); | 850 | BUG_ON(lockres->l_blocking <= DLM_LOCK_NL); |
855 | 851 | ||
856 | lockres->l_level = lockres->l_requested; | 852 | lockres->l_level = lockres->l_requested; |
857 | if (lockres->l_level <= | 853 | if (lockres->l_level <= |
858 | ocfs2_highest_compat_lock_level(lockres->l_blocking)) { | 854 | ocfs2_highest_compat_lock_level(lockres->l_blocking)) { |
859 | lockres->l_blocking = DLM_LOCK_NL; | 855 | lockres->l_blocking = DLM_LOCK_NL; |
860 | lockres_clear_flags(lockres, OCFS2_LOCK_BLOCKED); | 856 | lockres_clear_flags(lockres, OCFS2_LOCK_BLOCKED); |
861 | } | 857 | } |
862 | lockres_clear_flags(lockres, OCFS2_LOCK_BUSY); | 858 | lockres_clear_flags(lockres, OCFS2_LOCK_BUSY); |
863 | 859 | ||
864 | mlog_exit_void(); | 860 | mlog_exit_void(); |
865 | } | 861 | } |
866 | 862 | ||
867 | static inline void ocfs2_generic_handle_convert_action(struct ocfs2_lock_res *lockres) | 863 | static inline void ocfs2_generic_handle_convert_action(struct ocfs2_lock_res *lockres) |
868 | { | 864 | { |
869 | mlog_entry_void(); | 865 | mlog_entry_void(); |
870 | 866 | ||
871 | BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BUSY)); | 867 | BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BUSY)); |
872 | BUG_ON(!(lockres->l_flags & OCFS2_LOCK_ATTACHED)); | 868 | BUG_ON(!(lockres->l_flags & OCFS2_LOCK_ATTACHED)); |
873 | 869 | ||
874 | /* Convert from RO to EX doesn't really need anything as our | 870 | /* Convert from RO to EX doesn't really need anything as our |
875 | * information is already up to data. Convert from NL to | 871 | * information is already up to data. Convert from NL to |
876 | * *anything* however should mark ourselves as needing an | 872 | * *anything* however should mark ourselves as needing an |
877 | * update */ | 873 | * update */ |
878 | if (lockres->l_level == DLM_LOCK_NL && | 874 | if (lockres->l_level == DLM_LOCK_NL && |
879 | lockres->l_ops->flags & LOCK_TYPE_REQUIRES_REFRESH) | 875 | lockres->l_ops->flags & LOCK_TYPE_REQUIRES_REFRESH) |
880 | lockres_or_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH); | 876 | lockres_or_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH); |
881 | 877 | ||
882 | lockres->l_level = lockres->l_requested; | 878 | lockres->l_level = lockres->l_requested; |
883 | 879 | ||
884 | /* | 880 | /* |
885 | * We set the OCFS2_LOCK_UPCONVERT_FINISHING flag before clearing | 881 | * We set the OCFS2_LOCK_UPCONVERT_FINISHING flag before clearing |
886 | * the OCFS2_LOCK_BUSY flag to prevent the dc thread from | 882 | * the OCFS2_LOCK_BUSY flag to prevent the dc thread from |
887 | * downconverting the lock before the upconvert has fully completed. | 883 | * downconverting the lock before the upconvert has fully completed. |
888 | */ | 884 | */ |
889 | lockres_or_flags(lockres, OCFS2_LOCK_UPCONVERT_FINISHING); | 885 | lockres_or_flags(lockres, OCFS2_LOCK_UPCONVERT_FINISHING); |
890 | 886 | ||
891 | lockres_clear_flags(lockres, OCFS2_LOCK_BUSY); | 887 | lockres_clear_flags(lockres, OCFS2_LOCK_BUSY); |
892 | 888 | ||
893 | mlog_exit_void(); | 889 | mlog_exit_void(); |
894 | } | 890 | } |
895 | 891 | ||
896 | static inline void ocfs2_generic_handle_attach_action(struct ocfs2_lock_res *lockres) | 892 | static inline void ocfs2_generic_handle_attach_action(struct ocfs2_lock_res *lockres) |
897 | { | 893 | { |
898 | mlog_entry_void(); | 894 | mlog_entry_void(); |
899 | 895 | ||
900 | BUG_ON((!(lockres->l_flags & OCFS2_LOCK_BUSY))); | 896 | BUG_ON((!(lockres->l_flags & OCFS2_LOCK_BUSY))); |
901 | BUG_ON(lockres->l_flags & OCFS2_LOCK_ATTACHED); | 897 | BUG_ON(lockres->l_flags & OCFS2_LOCK_ATTACHED); |
902 | 898 | ||
903 | if (lockres->l_requested > DLM_LOCK_NL && | 899 | if (lockres->l_requested > DLM_LOCK_NL && |
904 | !(lockres->l_flags & OCFS2_LOCK_LOCAL) && | 900 | !(lockres->l_flags & OCFS2_LOCK_LOCAL) && |
905 | lockres->l_ops->flags & LOCK_TYPE_REQUIRES_REFRESH) | 901 | lockres->l_ops->flags & LOCK_TYPE_REQUIRES_REFRESH) |
906 | lockres_or_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH); | 902 | lockres_or_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH); |
907 | 903 | ||
908 | lockres->l_level = lockres->l_requested; | 904 | lockres->l_level = lockres->l_requested; |
909 | lockres_or_flags(lockres, OCFS2_LOCK_ATTACHED); | 905 | lockres_or_flags(lockres, OCFS2_LOCK_ATTACHED); |
910 | lockres_clear_flags(lockres, OCFS2_LOCK_BUSY); | 906 | lockres_clear_flags(lockres, OCFS2_LOCK_BUSY); |
911 | 907 | ||
912 | mlog_exit_void(); | 908 | mlog_exit_void(); |
913 | } | 909 | } |
914 | 910 | ||
915 | static int ocfs2_generic_handle_bast(struct ocfs2_lock_res *lockres, | 911 | static int ocfs2_generic_handle_bast(struct ocfs2_lock_res *lockres, |
916 | int level) | 912 | int level) |
917 | { | 913 | { |
918 | int needs_downconvert = 0; | 914 | int needs_downconvert = 0; |
919 | mlog_entry_void(); | 915 | mlog_entry_void(); |
920 | 916 | ||
921 | assert_spin_locked(&lockres->l_lock); | 917 | assert_spin_locked(&lockres->l_lock); |
922 | 918 | ||
923 | if (level > lockres->l_blocking) { | 919 | if (level > lockres->l_blocking) { |
924 | /* only schedule a downconvert if we haven't already scheduled | 920 | /* only schedule a downconvert if we haven't already scheduled |
925 | * one that goes low enough to satisfy the level we're | 921 | * one that goes low enough to satisfy the level we're |
926 | * blocking. this also catches the case where we get | 922 | * blocking. this also catches the case where we get |
927 | * duplicate BASTs */ | 923 | * duplicate BASTs */ |
928 | if (ocfs2_highest_compat_lock_level(level) < | 924 | if (ocfs2_highest_compat_lock_level(level) < |
929 | ocfs2_highest_compat_lock_level(lockres->l_blocking)) | 925 | ocfs2_highest_compat_lock_level(lockres->l_blocking)) |
930 | needs_downconvert = 1; | 926 | needs_downconvert = 1; |
931 | 927 | ||
932 | lockres->l_blocking = level; | 928 | lockres->l_blocking = level; |
933 | } | 929 | } |
934 | 930 | ||
935 | mlog(ML_BASTS, "lockres %s, block %d, level %d, l_block %d, dwn %d\n", | 931 | mlog(ML_BASTS, "lockres %s, block %d, level %d, l_block %d, dwn %d\n", |
936 | lockres->l_name, level, lockres->l_level, lockres->l_blocking, | 932 | lockres->l_name, level, lockres->l_level, lockres->l_blocking, |
937 | needs_downconvert); | 933 | needs_downconvert); |
938 | 934 | ||
939 | if (needs_downconvert) | 935 | if (needs_downconvert) |
940 | lockres_or_flags(lockres, OCFS2_LOCK_BLOCKED); | 936 | lockres_or_flags(lockres, OCFS2_LOCK_BLOCKED); |
941 | 937 | ||
942 | mlog_exit(needs_downconvert); | 938 | mlog_exit(needs_downconvert); |
943 | return needs_downconvert; | 939 | return needs_downconvert; |
944 | } | 940 | } |
945 | 941 | ||
946 | /* | 942 | /* |
947 | * OCFS2_LOCK_PENDING and l_pending_gen. | 943 | * OCFS2_LOCK_PENDING and l_pending_gen. |
948 | * | 944 | * |
949 | * Why does OCFS2_LOCK_PENDING exist? To close a race between setting | 945 | * Why does OCFS2_LOCK_PENDING exist? To close a race between setting |
950 | * OCFS2_LOCK_BUSY and calling ocfs2_dlm_lock(). See ocfs2_unblock_lock() | 946 | * OCFS2_LOCK_BUSY and calling ocfs2_dlm_lock(). See ocfs2_unblock_lock() |
951 | * for more details on the race. | 947 | * for more details on the race. |
952 | * | 948 | * |
953 | * OCFS2_LOCK_PENDING closes the race quite nicely. However, it introduces | 949 | * OCFS2_LOCK_PENDING closes the race quite nicely. However, it introduces |
954 | * a race on itself. In o2dlm, we can get the ast before ocfs2_dlm_lock() | 950 | * a race on itself. In o2dlm, we can get the ast before ocfs2_dlm_lock() |
955 | * returns. The ast clears OCFS2_LOCK_BUSY, and must therefore clear | 951 | * returns. The ast clears OCFS2_LOCK_BUSY, and must therefore clear |
956 | * OCFS2_LOCK_PENDING at the same time. When ocfs2_dlm_lock() returns, | 952 | * OCFS2_LOCK_PENDING at the same time. When ocfs2_dlm_lock() returns, |
957 | * the caller is going to try to clear PENDING again. If nothing else is | 953 | * the caller is going to try to clear PENDING again. If nothing else is |
958 | * happening, __lockres_clear_pending() sees PENDING is unset and does | 954 | * happening, __lockres_clear_pending() sees PENDING is unset and does |
959 | * nothing. | 955 | * nothing. |
960 | * | 956 | * |
961 | * But what if another path (eg downconvert thread) has just started a | 957 | * But what if another path (eg downconvert thread) has just started a |
962 | * new locking action? The other path has re-set PENDING. Our path | 958 | * new locking action? The other path has re-set PENDING. Our path |
963 | * cannot clear PENDING, because that will re-open the original race | 959 | * cannot clear PENDING, because that will re-open the original race |
964 | * window. | 960 | * window. |
965 | * | 961 | * |
966 | * [Example] | 962 | * [Example] |
967 | * | 963 | * |
968 | * ocfs2_meta_lock() | 964 | * ocfs2_meta_lock() |
969 | * ocfs2_cluster_lock() | 965 | * ocfs2_cluster_lock() |
970 | * set BUSY | 966 | * set BUSY |
971 | * set PENDING | 967 | * set PENDING |
972 | * drop l_lock | 968 | * drop l_lock |
973 | * ocfs2_dlm_lock() | 969 | * ocfs2_dlm_lock() |
974 | * ocfs2_locking_ast() ocfs2_downconvert_thread() | 970 | * ocfs2_locking_ast() ocfs2_downconvert_thread() |
975 | * clear PENDING ocfs2_unblock_lock() | 971 | * clear PENDING ocfs2_unblock_lock() |
976 | * take_l_lock | 972 | * take_l_lock |
977 | * !BUSY | 973 | * !BUSY |
978 | * ocfs2_prepare_downconvert() | 974 | * ocfs2_prepare_downconvert() |
979 | * set BUSY | 975 | * set BUSY |
980 | * set PENDING | 976 | * set PENDING |
981 | * drop l_lock | 977 | * drop l_lock |
982 | * take l_lock | 978 | * take l_lock |
983 | * clear PENDING | 979 | * clear PENDING |
984 | * drop l_lock | 980 | * drop l_lock |
985 | * <window> | 981 | * <window> |
986 | * ocfs2_dlm_lock() | 982 | * ocfs2_dlm_lock() |
987 | * | 983 | * |
988 | * So as you can see, we now have a window where l_lock is not held, | 984 | * So as you can see, we now have a window where l_lock is not held, |
989 | * PENDING is not set, and ocfs2_dlm_lock() has not been called. | 985 | * PENDING is not set, and ocfs2_dlm_lock() has not been called. |
990 | * | 986 | * |
991 | * The core problem is that ocfs2_cluster_lock() has cleared the PENDING | 987 | * The core problem is that ocfs2_cluster_lock() has cleared the PENDING |
992 | * set by ocfs2_prepare_downconvert(). That wasn't nice. | 988 | * set by ocfs2_prepare_downconvert(). That wasn't nice. |
993 | * | 989 | * |
994 | * To solve this we introduce l_pending_gen. A call to | 990 | * To solve this we introduce l_pending_gen. A call to |
995 | * lockres_clear_pending() will only do so when it is passed a generation | 991 | * lockres_clear_pending() will only do so when it is passed a generation |
996 | * number that matches the lockres. lockres_set_pending() will return the | 992 | * number that matches the lockres. lockres_set_pending() will return the |
997 | * current generation number. When ocfs2_cluster_lock() goes to clear | 993 | * current generation number. When ocfs2_cluster_lock() goes to clear |
998 | * PENDING, it passes the generation it got from set_pending(). In our | 994 | * PENDING, it passes the generation it got from set_pending(). In our |
999 | * example above, the generation numbers will *not* match. Thus, | 995 | * example above, the generation numbers will *not* match. Thus, |
1000 | * ocfs2_cluster_lock() will not clear the PENDING set by | 996 | * ocfs2_cluster_lock() will not clear the PENDING set by |
1001 | * ocfs2_prepare_downconvert(). | 997 | * ocfs2_prepare_downconvert(). |
1002 | */ | 998 | */ |
1003 | 999 | ||
1004 | /* Unlocked version for ocfs2_locking_ast() */ | 1000 | /* Unlocked version for ocfs2_locking_ast() */ |
1005 | static void __lockres_clear_pending(struct ocfs2_lock_res *lockres, | 1001 | static void __lockres_clear_pending(struct ocfs2_lock_res *lockres, |
1006 | unsigned int generation, | 1002 | unsigned int generation, |
1007 | struct ocfs2_super *osb) | 1003 | struct ocfs2_super *osb) |
1008 | { | 1004 | { |
1009 | assert_spin_locked(&lockres->l_lock); | 1005 | assert_spin_locked(&lockres->l_lock); |
1010 | 1006 | ||
1011 | /* | 1007 | /* |
1012 | * The ast and locking functions can race us here. The winner | 1008 | * The ast and locking functions can race us here. The winner |
1013 | * will clear pending, the loser will not. | 1009 | * will clear pending, the loser will not. |
1014 | */ | 1010 | */ |
1015 | if (!(lockres->l_flags & OCFS2_LOCK_PENDING) || | 1011 | if (!(lockres->l_flags & OCFS2_LOCK_PENDING) || |
1016 | (lockres->l_pending_gen != generation)) | 1012 | (lockres->l_pending_gen != generation)) |
1017 | return; | 1013 | return; |
1018 | 1014 | ||
1019 | lockres_clear_flags(lockres, OCFS2_LOCK_PENDING); | 1015 | lockres_clear_flags(lockres, OCFS2_LOCK_PENDING); |
1020 | lockres->l_pending_gen++; | 1016 | lockres->l_pending_gen++; |
1021 | 1017 | ||
1022 | /* | 1018 | /* |
1023 | * The downconvert thread may have skipped us because we | 1019 | * The downconvert thread may have skipped us because we |
1024 | * were PENDING. Wake it up. | 1020 | * were PENDING. Wake it up. |
1025 | */ | 1021 | */ |
1026 | if (lockres->l_flags & OCFS2_LOCK_BLOCKED) | 1022 | if (lockres->l_flags & OCFS2_LOCK_BLOCKED) |
1027 | ocfs2_wake_downconvert_thread(osb); | 1023 | ocfs2_wake_downconvert_thread(osb); |
1028 | } | 1024 | } |
1029 | 1025 | ||
1030 | /* Locked version for callers of ocfs2_dlm_lock() */ | 1026 | /* Locked version for callers of ocfs2_dlm_lock() */ |
1031 | static void lockres_clear_pending(struct ocfs2_lock_res *lockres, | 1027 | static void lockres_clear_pending(struct ocfs2_lock_res *lockres, |
1032 | unsigned int generation, | 1028 | unsigned int generation, |
1033 | struct ocfs2_super *osb) | 1029 | struct ocfs2_super *osb) |
1034 | { | 1030 | { |
1035 | unsigned long flags; | 1031 | unsigned long flags; |
1036 | 1032 | ||
1037 | spin_lock_irqsave(&lockres->l_lock, flags); | 1033 | spin_lock_irqsave(&lockres->l_lock, flags); |
1038 | __lockres_clear_pending(lockres, generation, osb); | 1034 | __lockres_clear_pending(lockres, generation, osb); |
1039 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 1035 | spin_unlock_irqrestore(&lockres->l_lock, flags); |
1040 | } | 1036 | } |
1041 | 1037 | ||
1042 | static unsigned int lockres_set_pending(struct ocfs2_lock_res *lockres) | 1038 | static unsigned int lockres_set_pending(struct ocfs2_lock_res *lockres) |
1043 | { | 1039 | { |
1044 | assert_spin_locked(&lockres->l_lock); | 1040 | assert_spin_locked(&lockres->l_lock); |
1045 | BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BUSY)); | 1041 | BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BUSY)); |
1046 | 1042 | ||
1047 | lockres_or_flags(lockres, OCFS2_LOCK_PENDING); | 1043 | lockres_or_flags(lockres, OCFS2_LOCK_PENDING); |
1048 | 1044 | ||
1049 | return lockres->l_pending_gen; | 1045 | return lockres->l_pending_gen; |
1050 | } | 1046 | } |
1051 | 1047 | ||
1052 | static void ocfs2_blocking_ast(struct ocfs2_dlm_lksb *lksb, int level) | 1048 | static void ocfs2_blocking_ast(struct ocfs2_dlm_lksb *lksb, int level) |
1053 | { | 1049 | { |
1054 | struct ocfs2_lock_res *lockres = ocfs2_lksb_to_lock_res(lksb); | 1050 | struct ocfs2_lock_res *lockres = ocfs2_lksb_to_lock_res(lksb); |
1055 | struct ocfs2_super *osb = ocfs2_get_lockres_osb(lockres); | 1051 | struct ocfs2_super *osb = ocfs2_get_lockres_osb(lockres); |
1056 | int needs_downconvert; | 1052 | int needs_downconvert; |
1057 | unsigned long flags; | 1053 | unsigned long flags; |
1058 | 1054 | ||
1059 | BUG_ON(level <= DLM_LOCK_NL); | 1055 | BUG_ON(level <= DLM_LOCK_NL); |
1060 | 1056 | ||
1061 | mlog(ML_BASTS, "BAST fired for lockres %s, blocking %d, level %d, " | 1057 | mlog(ML_BASTS, "BAST fired for lockres %s, blocking %d, level %d, " |
1062 | "type %s\n", lockres->l_name, level, lockres->l_level, | 1058 | "type %s\n", lockres->l_name, level, lockres->l_level, |
1063 | ocfs2_lock_type_string(lockres->l_type)); | 1059 | ocfs2_lock_type_string(lockres->l_type)); |
1064 | 1060 | ||
1065 | /* | 1061 | /* |
1066 | * We can skip the bast for locks which don't enable caching - | 1062 | * We can skip the bast for locks which don't enable caching - |
1067 | * they'll be dropped at the earliest possible time anyway. | 1063 | * they'll be dropped at the earliest possible time anyway. |
1068 | */ | 1064 | */ |
1069 | if (lockres->l_flags & OCFS2_LOCK_NOCACHE) | 1065 | if (lockres->l_flags & OCFS2_LOCK_NOCACHE) |
1070 | return; | 1066 | return; |
1071 | 1067 | ||
1072 | spin_lock_irqsave(&lockres->l_lock, flags); | 1068 | spin_lock_irqsave(&lockres->l_lock, flags); |
1073 | needs_downconvert = ocfs2_generic_handle_bast(lockres, level); | 1069 | needs_downconvert = ocfs2_generic_handle_bast(lockres, level); |
1074 | if (needs_downconvert) | 1070 | if (needs_downconvert) |
1075 | ocfs2_schedule_blocked_lock(osb, lockres); | 1071 | ocfs2_schedule_blocked_lock(osb, lockres); |
1076 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 1072 | spin_unlock_irqrestore(&lockres->l_lock, flags); |
1077 | 1073 | ||
1078 | wake_up(&lockres->l_event); | 1074 | wake_up(&lockres->l_event); |
1079 | 1075 | ||
1080 | ocfs2_wake_downconvert_thread(osb); | 1076 | ocfs2_wake_downconvert_thread(osb); |
1081 | } | 1077 | } |
1082 | 1078 | ||
1083 | static void ocfs2_locking_ast(struct ocfs2_dlm_lksb *lksb) | 1079 | static void ocfs2_locking_ast(struct ocfs2_dlm_lksb *lksb) |
1084 | { | 1080 | { |
1085 | struct ocfs2_lock_res *lockres = ocfs2_lksb_to_lock_res(lksb); | 1081 | struct ocfs2_lock_res *lockres = ocfs2_lksb_to_lock_res(lksb); |
1086 | struct ocfs2_super *osb = ocfs2_get_lockres_osb(lockres); | 1082 | struct ocfs2_super *osb = ocfs2_get_lockres_osb(lockres); |
1087 | unsigned long flags; | 1083 | unsigned long flags; |
1088 | int status; | 1084 | int status; |
1089 | 1085 | ||
1090 | spin_lock_irqsave(&lockres->l_lock, flags); | 1086 | spin_lock_irqsave(&lockres->l_lock, flags); |
1091 | 1087 | ||
1092 | status = ocfs2_dlm_lock_status(&lockres->l_lksb); | 1088 | status = ocfs2_dlm_lock_status(&lockres->l_lksb); |
1093 | 1089 | ||
1094 | if (status == -EAGAIN) { | 1090 | if (status == -EAGAIN) { |
1095 | lockres_clear_flags(lockres, OCFS2_LOCK_BUSY); | 1091 | lockres_clear_flags(lockres, OCFS2_LOCK_BUSY); |
1096 | goto out; | 1092 | goto out; |
1097 | } | 1093 | } |
1098 | 1094 | ||
1099 | if (status) { | 1095 | if (status) { |
1100 | mlog(ML_ERROR, "lockres %s: lksb status value of %d!\n", | 1096 | mlog(ML_ERROR, "lockres %s: lksb status value of %d!\n", |
1101 | lockres->l_name, status); | 1097 | lockres->l_name, status); |
1102 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 1098 | spin_unlock_irqrestore(&lockres->l_lock, flags); |
1103 | return; | 1099 | return; |
1104 | } | 1100 | } |
1105 | 1101 | ||
1106 | mlog(ML_BASTS, "AST fired for lockres %s, action %d, unlock %d, " | 1102 | mlog(ML_BASTS, "AST fired for lockres %s, action %d, unlock %d, " |
1107 | "level %d => %d\n", lockres->l_name, lockres->l_action, | 1103 | "level %d => %d\n", lockres->l_name, lockres->l_action, |
1108 | lockres->l_unlock_action, lockres->l_level, lockres->l_requested); | 1104 | lockres->l_unlock_action, lockres->l_level, lockres->l_requested); |
1109 | 1105 | ||
1110 | switch(lockres->l_action) { | 1106 | switch(lockres->l_action) { |
1111 | case OCFS2_AST_ATTACH: | 1107 | case OCFS2_AST_ATTACH: |
1112 | ocfs2_generic_handle_attach_action(lockres); | 1108 | ocfs2_generic_handle_attach_action(lockres); |
1113 | lockres_clear_flags(lockres, OCFS2_LOCK_LOCAL); | 1109 | lockres_clear_flags(lockres, OCFS2_LOCK_LOCAL); |
1114 | break; | 1110 | break; |
1115 | case OCFS2_AST_CONVERT: | 1111 | case OCFS2_AST_CONVERT: |
1116 | ocfs2_generic_handle_convert_action(lockres); | 1112 | ocfs2_generic_handle_convert_action(lockres); |
1117 | break; | 1113 | break; |
1118 | case OCFS2_AST_DOWNCONVERT: | 1114 | case OCFS2_AST_DOWNCONVERT: |
1119 | ocfs2_generic_handle_downconvert_action(lockres); | 1115 | ocfs2_generic_handle_downconvert_action(lockres); |
1120 | break; | 1116 | break; |
1121 | default: | 1117 | default: |
1122 | mlog(ML_ERROR, "lockres %s: AST fired with invalid action: %u, " | 1118 | mlog(ML_ERROR, "lockres %s: AST fired with invalid action: %u, " |
1123 | "flags 0x%lx, unlock: %u\n", | 1119 | "flags 0x%lx, unlock: %u\n", |
1124 | lockres->l_name, lockres->l_action, lockres->l_flags, | 1120 | lockres->l_name, lockres->l_action, lockres->l_flags, |
1125 | lockres->l_unlock_action); | 1121 | lockres->l_unlock_action); |
1126 | BUG(); | 1122 | BUG(); |
1127 | } | 1123 | } |
1128 | out: | 1124 | out: |
1129 | /* set it to something invalid so if we get called again we | 1125 | /* set it to something invalid so if we get called again we |
1130 | * can catch it. */ | 1126 | * can catch it. */ |
1131 | lockres->l_action = OCFS2_AST_INVALID; | 1127 | lockres->l_action = OCFS2_AST_INVALID; |
1132 | 1128 | ||
1133 | /* Did we try to cancel this lock? Clear that state */ | 1129 | /* Did we try to cancel this lock? Clear that state */ |
1134 | if (lockres->l_unlock_action == OCFS2_UNLOCK_CANCEL_CONVERT) | 1130 | if (lockres->l_unlock_action == OCFS2_UNLOCK_CANCEL_CONVERT) |
1135 | lockres->l_unlock_action = OCFS2_UNLOCK_INVALID; | 1131 | lockres->l_unlock_action = OCFS2_UNLOCK_INVALID; |
1136 | 1132 | ||
1137 | /* | 1133 | /* |
1138 | * We may have beaten the locking functions here. We certainly | 1134 | * We may have beaten the locking functions here. We certainly |
1139 | * know that dlm_lock() has been called :-) | 1135 | * know that dlm_lock() has been called :-) |
1140 | * Because we can't have two lock calls in flight at once, we | 1136 | * Because we can't have two lock calls in flight at once, we |
1141 | * can use lockres->l_pending_gen. | 1137 | * can use lockres->l_pending_gen. |
1142 | */ | 1138 | */ |
1143 | __lockres_clear_pending(lockres, lockres->l_pending_gen, osb); | 1139 | __lockres_clear_pending(lockres, lockres->l_pending_gen, osb); |
1144 | 1140 | ||
1145 | wake_up(&lockres->l_event); | 1141 | wake_up(&lockres->l_event); |
1146 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 1142 | spin_unlock_irqrestore(&lockres->l_lock, flags); |
1147 | } | 1143 | } |
1148 | 1144 | ||
1149 | static void ocfs2_unlock_ast(struct ocfs2_dlm_lksb *lksb, int error) | 1145 | static void ocfs2_unlock_ast(struct ocfs2_dlm_lksb *lksb, int error) |
1150 | { | 1146 | { |
1151 | struct ocfs2_lock_res *lockres = ocfs2_lksb_to_lock_res(lksb); | 1147 | struct ocfs2_lock_res *lockres = ocfs2_lksb_to_lock_res(lksb); |
1152 | unsigned long flags; | 1148 | unsigned long flags; |
1153 | 1149 | ||
1154 | mlog_entry_void(); | 1150 | mlog_entry_void(); |
1155 | 1151 | ||
1156 | mlog(ML_BASTS, "UNLOCK AST fired for lockres %s, action = %d\n", | 1152 | mlog(ML_BASTS, "UNLOCK AST fired for lockres %s, action = %d\n", |
1157 | lockres->l_name, lockres->l_unlock_action); | 1153 | lockres->l_name, lockres->l_unlock_action); |
1158 | 1154 | ||
1159 | spin_lock_irqsave(&lockres->l_lock, flags); | 1155 | spin_lock_irqsave(&lockres->l_lock, flags); |
1160 | if (error) { | 1156 | if (error) { |
1161 | mlog(ML_ERROR, "Dlm passes error %d for lock %s, " | 1157 | mlog(ML_ERROR, "Dlm passes error %d for lock %s, " |
1162 | "unlock_action %d\n", error, lockres->l_name, | 1158 | "unlock_action %d\n", error, lockres->l_name, |
1163 | lockres->l_unlock_action); | 1159 | lockres->l_unlock_action); |
1164 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 1160 | spin_unlock_irqrestore(&lockres->l_lock, flags); |
1165 | mlog_exit_void(); | 1161 | mlog_exit_void(); |
1166 | return; | 1162 | return; |
1167 | } | 1163 | } |
1168 | 1164 | ||
1169 | switch(lockres->l_unlock_action) { | 1165 | switch(lockres->l_unlock_action) { |
1170 | case OCFS2_UNLOCK_CANCEL_CONVERT: | 1166 | case OCFS2_UNLOCK_CANCEL_CONVERT: |
1171 | mlog(0, "Cancel convert success for %s\n", lockres->l_name); | 1167 | mlog(0, "Cancel convert success for %s\n", lockres->l_name); |
1172 | lockres->l_action = OCFS2_AST_INVALID; | 1168 | lockres->l_action = OCFS2_AST_INVALID; |
1173 | /* Downconvert thread may have requeued this lock, we | 1169 | /* Downconvert thread may have requeued this lock, we |
1174 | * need to wake it. */ | 1170 | * need to wake it. */ |
1175 | if (lockres->l_flags & OCFS2_LOCK_BLOCKED) | 1171 | if (lockres->l_flags & OCFS2_LOCK_BLOCKED) |
1176 | ocfs2_wake_downconvert_thread(ocfs2_get_lockres_osb(lockres)); | 1172 | ocfs2_wake_downconvert_thread(ocfs2_get_lockres_osb(lockres)); |
1177 | break; | 1173 | break; |
1178 | case OCFS2_UNLOCK_DROP_LOCK: | 1174 | case OCFS2_UNLOCK_DROP_LOCK: |
1179 | lockres->l_level = DLM_LOCK_IV; | 1175 | lockres->l_level = DLM_LOCK_IV; |
1180 | break; | 1176 | break; |
1181 | default: | 1177 | default: |
1182 | BUG(); | 1178 | BUG(); |
1183 | } | 1179 | } |
1184 | 1180 | ||
1185 | lockres_clear_flags(lockres, OCFS2_LOCK_BUSY); | 1181 | lockres_clear_flags(lockres, OCFS2_LOCK_BUSY); |
1186 | lockres->l_unlock_action = OCFS2_UNLOCK_INVALID; | 1182 | lockres->l_unlock_action = OCFS2_UNLOCK_INVALID; |
1187 | wake_up(&lockres->l_event); | 1183 | wake_up(&lockres->l_event); |
1188 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 1184 | spin_unlock_irqrestore(&lockres->l_lock, flags); |
1189 | 1185 | ||
1190 | mlog_exit_void(); | 1186 | mlog_exit_void(); |
1191 | } | 1187 | } |
1192 | 1188 | ||
1193 | /* | 1189 | /* |
1194 | * This is the filesystem locking protocol. It provides the lock handling | 1190 | * This is the filesystem locking protocol. It provides the lock handling |
1195 | * hooks for the underlying DLM. It has a maximum version number. | 1191 | * hooks for the underlying DLM. It has a maximum version number. |
1196 | * The version number allows interoperability with systems running at | 1192 | * The version number allows interoperability with systems running at |
1197 | * the same major number and an equal or smaller minor number. | 1193 | * the same major number and an equal or smaller minor number. |
1198 | * | 1194 | * |
1199 | * Whenever the filesystem does new things with locks (adds or removes a | 1195 | * Whenever the filesystem does new things with locks (adds or removes a |
1200 | * lock, orders them differently, does different things underneath a lock), | 1196 | * lock, orders them differently, does different things underneath a lock), |
1201 | * the version must be changed. The protocol is negotiated when joining | 1197 | * the version must be changed. The protocol is negotiated when joining |
1202 | * the dlm domain. A node may join the domain if its major version is | 1198 | * the dlm domain. A node may join the domain if its major version is |
1203 | * identical to all other nodes and its minor version is greater than | 1199 | * identical to all other nodes and its minor version is greater than |
1204 | * or equal to all other nodes. When its minor version is greater than | 1200 | * or equal to all other nodes. When its minor version is greater than |
1205 | * the other nodes, it will run at the minor version specified by the | 1201 | * the other nodes, it will run at the minor version specified by the |
1206 | * other nodes. | 1202 | * other nodes. |
1207 | * | 1203 | * |
1208 | * If a locking change is made that will not be compatible with older | 1204 | * If a locking change is made that will not be compatible with older |
1209 | * versions, the major number must be increased and the minor version set | 1205 | * versions, the major number must be increased and the minor version set |
1210 | * to zero. If a change merely adds a behavior that can be disabled when | 1206 | * to zero. If a change merely adds a behavior that can be disabled when |
1211 | * speaking to older versions, the minor version must be increased. If a | 1207 | * speaking to older versions, the minor version must be increased. If a |
1212 | * change adds a fully backwards compatible change (eg, LVB changes that | 1208 | * change adds a fully backwards compatible change (eg, LVB changes that |
1213 | * are just ignored by older versions), the version does not need to be | 1209 | * are just ignored by older versions), the version does not need to be |
1214 | * updated. | 1210 | * updated. |
1215 | */ | 1211 | */ |
1216 | static struct ocfs2_locking_protocol lproto = { | 1212 | static struct ocfs2_locking_protocol lproto = { |
1217 | .lp_max_version = { | 1213 | .lp_max_version = { |
1218 | .pv_major = OCFS2_LOCKING_PROTOCOL_MAJOR, | 1214 | .pv_major = OCFS2_LOCKING_PROTOCOL_MAJOR, |
1219 | .pv_minor = OCFS2_LOCKING_PROTOCOL_MINOR, | 1215 | .pv_minor = OCFS2_LOCKING_PROTOCOL_MINOR, |
1220 | }, | 1216 | }, |
1221 | .lp_lock_ast = ocfs2_locking_ast, | 1217 | .lp_lock_ast = ocfs2_locking_ast, |
1222 | .lp_blocking_ast = ocfs2_blocking_ast, | 1218 | .lp_blocking_ast = ocfs2_blocking_ast, |
1223 | .lp_unlock_ast = ocfs2_unlock_ast, | 1219 | .lp_unlock_ast = ocfs2_unlock_ast, |
1224 | }; | 1220 | }; |
1225 | 1221 | ||
1226 | void ocfs2_set_locking_protocol(void) | 1222 | void ocfs2_set_locking_protocol(void) |
1227 | { | 1223 | { |
1228 | ocfs2_stack_glue_set_max_proto_version(&lproto.lp_max_version); | 1224 | ocfs2_stack_glue_set_max_proto_version(&lproto.lp_max_version); |
1229 | } | 1225 | } |
1230 | 1226 | ||
1231 | static inline void ocfs2_recover_from_dlm_error(struct ocfs2_lock_res *lockres, | 1227 | static inline void ocfs2_recover_from_dlm_error(struct ocfs2_lock_res *lockres, |
1232 | int convert) | 1228 | int convert) |
1233 | { | 1229 | { |
1234 | unsigned long flags; | 1230 | unsigned long flags; |
1235 | 1231 | ||
1236 | mlog_entry_void(); | 1232 | mlog_entry_void(); |
1237 | spin_lock_irqsave(&lockres->l_lock, flags); | 1233 | spin_lock_irqsave(&lockres->l_lock, flags); |
1238 | lockres_clear_flags(lockres, OCFS2_LOCK_BUSY); | 1234 | lockres_clear_flags(lockres, OCFS2_LOCK_BUSY); |
1239 | lockres_clear_flags(lockres, OCFS2_LOCK_UPCONVERT_FINISHING); | 1235 | lockres_clear_flags(lockres, OCFS2_LOCK_UPCONVERT_FINISHING); |
1240 | if (convert) | 1236 | if (convert) |
1241 | lockres->l_action = OCFS2_AST_INVALID; | 1237 | lockres->l_action = OCFS2_AST_INVALID; |
1242 | else | 1238 | else |
1243 | lockres->l_unlock_action = OCFS2_UNLOCK_INVALID; | 1239 | lockres->l_unlock_action = OCFS2_UNLOCK_INVALID; |
1244 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 1240 | spin_unlock_irqrestore(&lockres->l_lock, flags); |
1245 | 1241 | ||
1246 | wake_up(&lockres->l_event); | 1242 | wake_up(&lockres->l_event); |
1247 | mlog_exit_void(); | 1243 | mlog_exit_void(); |
1248 | } | 1244 | } |
1249 | 1245 | ||
1250 | /* Note: If we detect another process working on the lock (i.e., | 1246 | /* Note: If we detect another process working on the lock (i.e., |
1251 | * OCFS2_LOCK_BUSY), we'll bail out returning 0. It's up to the caller | 1247 | * OCFS2_LOCK_BUSY), we'll bail out returning 0. It's up to the caller |
1252 | * to do the right thing in that case. | 1248 | * to do the right thing in that case. |
1253 | */ | 1249 | */ |
1254 | static int ocfs2_lock_create(struct ocfs2_super *osb, | 1250 | static int ocfs2_lock_create(struct ocfs2_super *osb, |
1255 | struct ocfs2_lock_res *lockres, | 1251 | struct ocfs2_lock_res *lockres, |
1256 | int level, | 1252 | int level, |
1257 | u32 dlm_flags) | 1253 | u32 dlm_flags) |
1258 | { | 1254 | { |
1259 | int ret = 0; | 1255 | int ret = 0; |
1260 | unsigned long flags; | 1256 | unsigned long flags; |
1261 | unsigned int gen; | 1257 | unsigned int gen; |
1262 | 1258 | ||
1263 | mlog_entry_void(); | 1259 | mlog_entry_void(); |
1264 | 1260 | ||
1265 | mlog(0, "lock %s, level = %d, flags = %u\n", lockres->l_name, level, | 1261 | mlog(0, "lock %s, level = %d, flags = %u\n", lockres->l_name, level, |
1266 | dlm_flags); | 1262 | dlm_flags); |
1267 | 1263 | ||
1268 | spin_lock_irqsave(&lockres->l_lock, flags); | 1264 | spin_lock_irqsave(&lockres->l_lock, flags); |
1269 | if ((lockres->l_flags & OCFS2_LOCK_ATTACHED) || | 1265 | if ((lockres->l_flags & OCFS2_LOCK_ATTACHED) || |
1270 | (lockres->l_flags & OCFS2_LOCK_BUSY)) { | 1266 | (lockres->l_flags & OCFS2_LOCK_BUSY)) { |
1271 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 1267 | spin_unlock_irqrestore(&lockres->l_lock, flags); |
1272 | goto bail; | 1268 | goto bail; |
1273 | } | 1269 | } |
1274 | 1270 | ||
1275 | lockres->l_action = OCFS2_AST_ATTACH; | 1271 | lockres->l_action = OCFS2_AST_ATTACH; |
1276 | lockres->l_requested = level; | 1272 | lockres->l_requested = level; |
1277 | lockres_or_flags(lockres, OCFS2_LOCK_BUSY); | 1273 | lockres_or_flags(lockres, OCFS2_LOCK_BUSY); |
1278 | gen = lockres_set_pending(lockres); | 1274 | gen = lockres_set_pending(lockres); |
1279 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 1275 | spin_unlock_irqrestore(&lockres->l_lock, flags); |
1280 | 1276 | ||
1281 | ret = ocfs2_dlm_lock(osb->cconn, | 1277 | ret = ocfs2_dlm_lock(osb->cconn, |
1282 | level, | 1278 | level, |
1283 | &lockres->l_lksb, | 1279 | &lockres->l_lksb, |
1284 | dlm_flags, | 1280 | dlm_flags, |
1285 | lockres->l_name, | 1281 | lockres->l_name, |
1286 | OCFS2_LOCK_ID_MAX_LEN - 1); | 1282 | OCFS2_LOCK_ID_MAX_LEN - 1); |
1287 | lockres_clear_pending(lockres, gen, osb); | 1283 | lockres_clear_pending(lockres, gen, osb); |
1288 | if (ret) { | 1284 | if (ret) { |
1289 | ocfs2_log_dlm_error("ocfs2_dlm_lock", ret, lockres); | 1285 | ocfs2_log_dlm_error("ocfs2_dlm_lock", ret, lockres); |
1290 | ocfs2_recover_from_dlm_error(lockres, 1); | 1286 | ocfs2_recover_from_dlm_error(lockres, 1); |
1291 | } | 1287 | } |
1292 | 1288 | ||
1293 | mlog(0, "lock %s, return from ocfs2_dlm_lock\n", lockres->l_name); | 1289 | mlog(0, "lock %s, return from ocfs2_dlm_lock\n", lockres->l_name); |
1294 | 1290 | ||
1295 | bail: | 1291 | bail: |
1296 | mlog_exit(ret); | 1292 | mlog_exit(ret); |
1297 | return ret; | 1293 | return ret; |
1298 | } | 1294 | } |
1299 | 1295 | ||
1300 | static inline int ocfs2_check_wait_flag(struct ocfs2_lock_res *lockres, | 1296 | static inline int ocfs2_check_wait_flag(struct ocfs2_lock_res *lockres, |
1301 | int flag) | 1297 | int flag) |
1302 | { | 1298 | { |
1303 | unsigned long flags; | 1299 | unsigned long flags; |
1304 | int ret; | 1300 | int ret; |
1305 | 1301 | ||
1306 | spin_lock_irqsave(&lockres->l_lock, flags); | 1302 | spin_lock_irqsave(&lockres->l_lock, flags); |
1307 | ret = lockres->l_flags & flag; | 1303 | ret = lockres->l_flags & flag; |
1308 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 1304 | spin_unlock_irqrestore(&lockres->l_lock, flags); |
1309 | 1305 | ||
1310 | return ret; | 1306 | return ret; |
1311 | } | 1307 | } |
1312 | 1308 | ||
1313 | static inline void ocfs2_wait_on_busy_lock(struct ocfs2_lock_res *lockres) | 1309 | static inline void ocfs2_wait_on_busy_lock(struct ocfs2_lock_res *lockres) |
1314 | 1310 | ||
1315 | { | 1311 | { |
1316 | wait_event(lockres->l_event, | 1312 | wait_event(lockres->l_event, |
1317 | !ocfs2_check_wait_flag(lockres, OCFS2_LOCK_BUSY)); | 1313 | !ocfs2_check_wait_flag(lockres, OCFS2_LOCK_BUSY)); |
1318 | } | 1314 | } |
1319 | 1315 | ||
1320 | static inline void ocfs2_wait_on_refreshing_lock(struct ocfs2_lock_res *lockres) | 1316 | static inline void ocfs2_wait_on_refreshing_lock(struct ocfs2_lock_res *lockres) |
1321 | 1317 | ||
1322 | { | 1318 | { |
1323 | wait_event(lockres->l_event, | 1319 | wait_event(lockres->l_event, |
1324 | !ocfs2_check_wait_flag(lockres, OCFS2_LOCK_REFRESHING)); | 1320 | !ocfs2_check_wait_flag(lockres, OCFS2_LOCK_REFRESHING)); |
1325 | } | 1321 | } |
1326 | 1322 | ||
1327 | /* predict what lock level we'll be dropping down to on behalf | 1323 | /* predict what lock level we'll be dropping down to on behalf |
1328 | * of another node, and return true if the currently wanted | 1324 | * of another node, and return true if the currently wanted |
1329 | * level will be compatible with it. */ | 1325 | * level will be compatible with it. */ |
1330 | static inline int ocfs2_may_continue_on_blocked_lock(struct ocfs2_lock_res *lockres, | 1326 | static inline int ocfs2_may_continue_on_blocked_lock(struct ocfs2_lock_res *lockres, |
1331 | int wanted) | 1327 | int wanted) |
1332 | { | 1328 | { |
1333 | BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BLOCKED)); | 1329 | BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BLOCKED)); |
1334 | 1330 | ||
1335 | return wanted <= ocfs2_highest_compat_lock_level(lockres->l_blocking); | 1331 | return wanted <= ocfs2_highest_compat_lock_level(lockres->l_blocking); |
1336 | } | 1332 | } |
1337 | 1333 | ||
1338 | static void ocfs2_init_mask_waiter(struct ocfs2_mask_waiter *mw) | 1334 | static void ocfs2_init_mask_waiter(struct ocfs2_mask_waiter *mw) |
1339 | { | 1335 | { |
1340 | INIT_LIST_HEAD(&mw->mw_item); | 1336 | INIT_LIST_HEAD(&mw->mw_item); |
1341 | init_completion(&mw->mw_complete); | 1337 | init_completion(&mw->mw_complete); |
1342 | ocfs2_init_start_time(mw); | 1338 | ocfs2_init_start_time(mw); |
1343 | } | 1339 | } |
1344 | 1340 | ||
1345 | static int ocfs2_wait_for_mask(struct ocfs2_mask_waiter *mw) | 1341 | static int ocfs2_wait_for_mask(struct ocfs2_mask_waiter *mw) |
1346 | { | 1342 | { |
1347 | wait_for_completion(&mw->mw_complete); | 1343 | wait_for_completion(&mw->mw_complete); |
1348 | /* Re-arm the completion in case we want to wait on it again */ | 1344 | /* Re-arm the completion in case we want to wait on it again */ |
1349 | INIT_COMPLETION(mw->mw_complete); | 1345 | INIT_COMPLETION(mw->mw_complete); |
1350 | return mw->mw_status; | 1346 | return mw->mw_status; |
1351 | } | 1347 | } |
1352 | 1348 | ||
1353 | static void lockres_add_mask_waiter(struct ocfs2_lock_res *lockres, | 1349 | static void lockres_add_mask_waiter(struct ocfs2_lock_res *lockres, |
1354 | struct ocfs2_mask_waiter *mw, | 1350 | struct ocfs2_mask_waiter *mw, |
1355 | unsigned long mask, | 1351 | unsigned long mask, |
1356 | unsigned long goal) | 1352 | unsigned long goal) |
1357 | { | 1353 | { |
1358 | BUG_ON(!list_empty(&mw->mw_item)); | 1354 | BUG_ON(!list_empty(&mw->mw_item)); |
1359 | 1355 | ||
1360 | assert_spin_locked(&lockres->l_lock); | 1356 | assert_spin_locked(&lockres->l_lock); |
1361 | 1357 | ||
1362 | list_add_tail(&mw->mw_item, &lockres->l_mask_waiters); | 1358 | list_add_tail(&mw->mw_item, &lockres->l_mask_waiters); |
1363 | mw->mw_mask = mask; | 1359 | mw->mw_mask = mask; |
1364 | mw->mw_goal = goal; | 1360 | mw->mw_goal = goal; |
1365 | } | 1361 | } |
1366 | 1362 | ||
1367 | /* returns 0 if the mw that was removed was already satisfied, -EBUSY | 1363 | /* returns 0 if the mw that was removed was already satisfied, -EBUSY |
1368 | * if the mask still hadn't reached its goal */ | 1364 | * if the mask still hadn't reached its goal */ |
1369 | static int lockres_remove_mask_waiter(struct ocfs2_lock_res *lockres, | 1365 | static int lockres_remove_mask_waiter(struct ocfs2_lock_res *lockres, |
1370 | struct ocfs2_mask_waiter *mw) | 1366 | struct ocfs2_mask_waiter *mw) |
1371 | { | 1367 | { |
1372 | unsigned long flags; | 1368 | unsigned long flags; |
1373 | int ret = 0; | 1369 | int ret = 0; |
1374 | 1370 | ||
1375 | spin_lock_irqsave(&lockres->l_lock, flags); | 1371 | spin_lock_irqsave(&lockres->l_lock, flags); |
1376 | if (!list_empty(&mw->mw_item)) { | 1372 | if (!list_empty(&mw->mw_item)) { |
1377 | if ((lockres->l_flags & mw->mw_mask) != mw->mw_goal) | 1373 | if ((lockres->l_flags & mw->mw_mask) != mw->mw_goal) |
1378 | ret = -EBUSY; | 1374 | ret = -EBUSY; |
1379 | 1375 | ||
1380 | list_del_init(&mw->mw_item); | 1376 | list_del_init(&mw->mw_item); |
1381 | init_completion(&mw->mw_complete); | 1377 | init_completion(&mw->mw_complete); |
1382 | } | 1378 | } |
1383 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 1379 | spin_unlock_irqrestore(&lockres->l_lock, flags); |
1384 | 1380 | ||
1385 | return ret; | 1381 | return ret; |
1386 | 1382 | ||
1387 | } | 1383 | } |
1388 | 1384 | ||
1389 | static int ocfs2_wait_for_mask_interruptible(struct ocfs2_mask_waiter *mw, | 1385 | static int ocfs2_wait_for_mask_interruptible(struct ocfs2_mask_waiter *mw, |
1390 | struct ocfs2_lock_res *lockres) | 1386 | struct ocfs2_lock_res *lockres) |
1391 | { | 1387 | { |
1392 | int ret; | 1388 | int ret; |
1393 | 1389 | ||
1394 | ret = wait_for_completion_interruptible(&mw->mw_complete); | 1390 | ret = wait_for_completion_interruptible(&mw->mw_complete); |
1395 | if (ret) | 1391 | if (ret) |
1396 | lockres_remove_mask_waiter(lockres, mw); | 1392 | lockres_remove_mask_waiter(lockres, mw); |
1397 | else | 1393 | else |
1398 | ret = mw->mw_status; | 1394 | ret = mw->mw_status; |
1399 | /* Re-arm the completion in case we want to wait on it again */ | 1395 | /* Re-arm the completion in case we want to wait on it again */ |
1400 | INIT_COMPLETION(mw->mw_complete); | 1396 | INIT_COMPLETION(mw->mw_complete); |
1401 | return ret; | 1397 | return ret; |
1402 | } | 1398 | } |
1403 | 1399 | ||
1404 | static int __ocfs2_cluster_lock(struct ocfs2_super *osb, | 1400 | static int __ocfs2_cluster_lock(struct ocfs2_super *osb, |
1405 | struct ocfs2_lock_res *lockres, | 1401 | struct ocfs2_lock_res *lockres, |
1406 | int level, | 1402 | int level, |
1407 | u32 lkm_flags, | 1403 | u32 lkm_flags, |
1408 | int arg_flags, | 1404 | int arg_flags, |
1409 | int l_subclass, | 1405 | int l_subclass, |
1410 | unsigned long caller_ip) | 1406 | unsigned long caller_ip) |
1411 | { | 1407 | { |
1412 | struct ocfs2_mask_waiter mw; | 1408 | struct ocfs2_mask_waiter mw; |
1413 | int wait, catch_signals = !(osb->s_mount_opt & OCFS2_MOUNT_NOINTR); | 1409 | int wait, catch_signals = !(osb->s_mount_opt & OCFS2_MOUNT_NOINTR); |
1414 | int ret = 0; /* gcc doesn't realize wait = 1 guarantees ret is set */ | 1410 | int ret = 0; /* gcc doesn't realize wait = 1 guarantees ret is set */ |
1415 | unsigned long flags; | 1411 | unsigned long flags; |
1416 | unsigned int gen; | 1412 | unsigned int gen; |
1417 | int noqueue_attempted = 0; | 1413 | int noqueue_attempted = 0; |
1418 | 1414 | ||
1419 | mlog_entry_void(); | 1415 | mlog_entry_void(); |
1420 | 1416 | ||
1421 | ocfs2_init_mask_waiter(&mw); | 1417 | ocfs2_init_mask_waiter(&mw); |
1422 | 1418 | ||
1423 | if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB) | 1419 | if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB) |
1424 | lkm_flags |= DLM_LKF_VALBLK; | 1420 | lkm_flags |= DLM_LKF_VALBLK; |
1425 | 1421 | ||
1426 | again: | 1422 | again: |
1427 | wait = 0; | 1423 | wait = 0; |
1428 | 1424 | ||
1429 | spin_lock_irqsave(&lockres->l_lock, flags); | 1425 | spin_lock_irqsave(&lockres->l_lock, flags); |
1430 | 1426 | ||
1431 | if (catch_signals && signal_pending(current)) { | 1427 | if (catch_signals && signal_pending(current)) { |
1432 | ret = -ERESTARTSYS; | 1428 | ret = -ERESTARTSYS; |
1433 | goto unlock; | 1429 | goto unlock; |
1434 | } | 1430 | } |
1435 | 1431 | ||
1436 | mlog_bug_on_msg(lockres->l_flags & OCFS2_LOCK_FREEING, | 1432 | mlog_bug_on_msg(lockres->l_flags & OCFS2_LOCK_FREEING, |
1437 | "Cluster lock called on freeing lockres %s! flags " | 1433 | "Cluster lock called on freeing lockres %s! flags " |
1438 | "0x%lx\n", lockres->l_name, lockres->l_flags); | 1434 | "0x%lx\n", lockres->l_name, lockres->l_flags); |
1439 | 1435 | ||
1440 | /* We only compare against the currently granted level | 1436 | /* We only compare against the currently granted level |
1441 | * here. If the lock is blocked waiting on a downconvert, | 1437 | * here. If the lock is blocked waiting on a downconvert, |
1442 | * we'll get caught below. */ | 1438 | * we'll get caught below. */ |
1443 | if (lockres->l_flags & OCFS2_LOCK_BUSY && | 1439 | if (lockres->l_flags & OCFS2_LOCK_BUSY && |
1444 | level > lockres->l_level) { | 1440 | level > lockres->l_level) { |
1445 | /* is someone sitting in dlm_lock? If so, wait on | 1441 | /* is someone sitting in dlm_lock? If so, wait on |
1446 | * them. */ | 1442 | * them. */ |
1447 | lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0); | 1443 | lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0); |
1448 | wait = 1; | 1444 | wait = 1; |
1449 | goto unlock; | 1445 | goto unlock; |
1450 | } | 1446 | } |
1451 | 1447 | ||
1452 | if (lockres->l_flags & OCFS2_LOCK_UPCONVERT_FINISHING) { | 1448 | if (lockres->l_flags & OCFS2_LOCK_UPCONVERT_FINISHING) { |
1453 | /* | 1449 | /* |
1454 | * We've upconverted. If the lock now has a level we can | 1450 | * We've upconverted. If the lock now has a level we can |
1455 | * work with, we take it. If, however, the lock is not at the | 1451 | * work with, we take it. If, however, the lock is not at the |
1456 | * required level, we go thru the full cycle. One way this could | 1452 | * required level, we go thru the full cycle. One way this could |
1457 | * happen is if a process requesting an upconvert to PR is | 1453 | * happen is if a process requesting an upconvert to PR is |
1458 | * closely followed by another requesting upconvert to an EX. | 1454 | * closely followed by another requesting upconvert to an EX. |
1459 | * If the process requesting EX lands here, we want it to | 1455 | * If the process requesting EX lands here, we want it to |
1460 | * continue attempting to upconvert and let the process | 1456 | * continue attempting to upconvert and let the process |
1461 | * requesting PR take the lock. | 1457 | * requesting PR take the lock. |
1462 | * If multiple processes request upconvert to PR, the first one | 1458 | * If multiple processes request upconvert to PR, the first one |
1463 | * here will take the lock. The others will have to go thru the | 1459 | * here will take the lock. The others will have to go thru the |
1464 | * OCFS2_LOCK_BLOCKED check to ensure that there is no pending | 1460 | * OCFS2_LOCK_BLOCKED check to ensure that there is no pending |
1465 | * downconvert request. | 1461 | * downconvert request. |
1466 | */ | 1462 | */ |
1467 | if (level <= lockres->l_level) | 1463 | if (level <= lockres->l_level) |
1468 | goto update_holders; | 1464 | goto update_holders; |
1469 | } | 1465 | } |
1470 | 1466 | ||
1471 | if (lockres->l_flags & OCFS2_LOCK_BLOCKED && | 1467 | if (lockres->l_flags & OCFS2_LOCK_BLOCKED && |
1472 | !ocfs2_may_continue_on_blocked_lock(lockres, level)) { | 1468 | !ocfs2_may_continue_on_blocked_lock(lockres, level)) { |
1473 | /* is the lock is currently blocked on behalf of | 1469 | /* is the lock is currently blocked on behalf of |
1474 | * another node */ | 1470 | * another node */ |
1475 | lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BLOCKED, 0); | 1471 | lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BLOCKED, 0); |
1476 | wait = 1; | 1472 | wait = 1; |
1477 | goto unlock; | 1473 | goto unlock; |
1478 | } | 1474 | } |
1479 | 1475 | ||
1480 | if (level > lockres->l_level) { | 1476 | if (level > lockres->l_level) { |
1481 | if (noqueue_attempted > 0) { | 1477 | if (noqueue_attempted > 0) { |
1482 | ret = -EAGAIN; | 1478 | ret = -EAGAIN; |
1483 | goto unlock; | 1479 | goto unlock; |
1484 | } | 1480 | } |
1485 | if (lkm_flags & DLM_LKF_NOQUEUE) | 1481 | if (lkm_flags & DLM_LKF_NOQUEUE) |
1486 | noqueue_attempted = 1; | 1482 | noqueue_attempted = 1; |
1487 | 1483 | ||
1488 | if (lockres->l_action != OCFS2_AST_INVALID) | 1484 | if (lockres->l_action != OCFS2_AST_INVALID) |
1489 | mlog(ML_ERROR, "lockres %s has action %u pending\n", | 1485 | mlog(ML_ERROR, "lockres %s has action %u pending\n", |
1490 | lockres->l_name, lockres->l_action); | 1486 | lockres->l_name, lockres->l_action); |
1491 | 1487 | ||
1492 | if (!(lockres->l_flags & OCFS2_LOCK_ATTACHED)) { | 1488 | if (!(lockres->l_flags & OCFS2_LOCK_ATTACHED)) { |
1493 | lockres->l_action = OCFS2_AST_ATTACH; | 1489 | lockres->l_action = OCFS2_AST_ATTACH; |
1494 | lkm_flags &= ~DLM_LKF_CONVERT; | 1490 | lkm_flags &= ~DLM_LKF_CONVERT; |
1495 | } else { | 1491 | } else { |
1496 | lockres->l_action = OCFS2_AST_CONVERT; | 1492 | lockres->l_action = OCFS2_AST_CONVERT; |
1497 | lkm_flags |= DLM_LKF_CONVERT; | 1493 | lkm_flags |= DLM_LKF_CONVERT; |
1498 | } | 1494 | } |
1499 | 1495 | ||
1500 | lockres->l_requested = level; | 1496 | lockres->l_requested = level; |
1501 | lockres_or_flags(lockres, OCFS2_LOCK_BUSY); | 1497 | lockres_or_flags(lockres, OCFS2_LOCK_BUSY); |
1502 | gen = lockres_set_pending(lockres); | 1498 | gen = lockres_set_pending(lockres); |
1503 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 1499 | spin_unlock_irqrestore(&lockres->l_lock, flags); |
1504 | 1500 | ||
1505 | BUG_ON(level == DLM_LOCK_IV); | 1501 | BUG_ON(level == DLM_LOCK_IV); |
1506 | BUG_ON(level == DLM_LOCK_NL); | 1502 | BUG_ON(level == DLM_LOCK_NL); |
1507 | 1503 | ||
1508 | mlog(ML_BASTS, "lockres %s, convert from %d to %d\n", | 1504 | mlog(ML_BASTS, "lockres %s, convert from %d to %d\n", |
1509 | lockres->l_name, lockres->l_level, level); | 1505 | lockres->l_name, lockres->l_level, level); |
1510 | 1506 | ||
1511 | /* call dlm_lock to upgrade lock now */ | 1507 | /* call dlm_lock to upgrade lock now */ |
1512 | ret = ocfs2_dlm_lock(osb->cconn, | 1508 | ret = ocfs2_dlm_lock(osb->cconn, |
1513 | level, | 1509 | level, |
1514 | &lockres->l_lksb, | 1510 | &lockres->l_lksb, |
1515 | lkm_flags, | 1511 | lkm_flags, |
1516 | lockres->l_name, | 1512 | lockres->l_name, |
1517 | OCFS2_LOCK_ID_MAX_LEN - 1); | 1513 | OCFS2_LOCK_ID_MAX_LEN - 1); |
1518 | lockres_clear_pending(lockres, gen, osb); | 1514 | lockres_clear_pending(lockres, gen, osb); |
1519 | if (ret) { | 1515 | if (ret) { |
1520 | if (!(lkm_flags & DLM_LKF_NOQUEUE) || | 1516 | if (!(lkm_flags & DLM_LKF_NOQUEUE) || |
1521 | (ret != -EAGAIN)) { | 1517 | (ret != -EAGAIN)) { |
1522 | ocfs2_log_dlm_error("ocfs2_dlm_lock", | 1518 | ocfs2_log_dlm_error("ocfs2_dlm_lock", |
1523 | ret, lockres); | 1519 | ret, lockres); |
1524 | } | 1520 | } |
1525 | ocfs2_recover_from_dlm_error(lockres, 1); | 1521 | ocfs2_recover_from_dlm_error(lockres, 1); |
1526 | goto out; | 1522 | goto out; |
1527 | } | 1523 | } |
1528 | 1524 | ||
1529 | mlog(0, "lock %s, successful return from ocfs2_dlm_lock\n", | 1525 | mlog(0, "lock %s, successful return from ocfs2_dlm_lock\n", |
1530 | lockres->l_name); | 1526 | lockres->l_name); |
1531 | 1527 | ||
1532 | /* At this point we've gone inside the dlm and need to | 1528 | /* At this point we've gone inside the dlm and need to |
1533 | * complete our work regardless. */ | 1529 | * complete our work regardless. */ |
1534 | catch_signals = 0; | 1530 | catch_signals = 0; |
1535 | 1531 | ||
1536 | /* wait for busy to clear and carry on */ | 1532 | /* wait for busy to clear and carry on */ |
1537 | goto again; | 1533 | goto again; |
1538 | } | 1534 | } |
1539 | 1535 | ||
1540 | update_holders: | 1536 | update_holders: |
1541 | /* Ok, if we get here then we're good to go. */ | 1537 | /* Ok, if we get here then we're good to go. */ |
1542 | ocfs2_inc_holders(lockres, level); | 1538 | ocfs2_inc_holders(lockres, level); |
1543 | 1539 | ||
1544 | ret = 0; | 1540 | ret = 0; |
1545 | unlock: | 1541 | unlock: |
1546 | lockres_clear_flags(lockres, OCFS2_LOCK_UPCONVERT_FINISHING); | 1542 | lockres_clear_flags(lockres, OCFS2_LOCK_UPCONVERT_FINISHING); |
1547 | 1543 | ||
1548 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 1544 | spin_unlock_irqrestore(&lockres->l_lock, flags); |
1549 | out: | 1545 | out: |
1550 | /* | 1546 | /* |
1551 | * This is helping work around a lock inversion between the page lock | 1547 | * This is helping work around a lock inversion between the page lock |
1552 | * and dlm locks. One path holds the page lock while calling aops | 1548 | * and dlm locks. One path holds the page lock while calling aops |
1553 | * which block acquiring dlm locks. The voting thread holds dlm | 1549 | * which block acquiring dlm locks. The voting thread holds dlm |
1554 | * locks while acquiring page locks while down converting data locks. | 1550 | * locks while acquiring page locks while down converting data locks. |
1555 | * This block is helping an aop path notice the inversion and back | 1551 | * This block is helping an aop path notice the inversion and back |
1556 | * off to unlock its page lock before trying the dlm lock again. | 1552 | * off to unlock its page lock before trying the dlm lock again. |
1557 | */ | 1553 | */ |
1558 | if (wait && arg_flags & OCFS2_LOCK_NONBLOCK && | 1554 | if (wait && arg_flags & OCFS2_LOCK_NONBLOCK && |
1559 | mw.mw_mask & (OCFS2_LOCK_BUSY|OCFS2_LOCK_BLOCKED)) { | 1555 | mw.mw_mask & (OCFS2_LOCK_BUSY|OCFS2_LOCK_BLOCKED)) { |
1560 | wait = 0; | 1556 | wait = 0; |
1561 | if (lockres_remove_mask_waiter(lockres, &mw)) | 1557 | if (lockres_remove_mask_waiter(lockres, &mw)) |
1562 | ret = -EAGAIN; | 1558 | ret = -EAGAIN; |
1563 | else | 1559 | else |
1564 | goto again; | 1560 | goto again; |
1565 | } | 1561 | } |
1566 | if (wait) { | 1562 | if (wait) { |
1567 | ret = ocfs2_wait_for_mask(&mw); | 1563 | ret = ocfs2_wait_for_mask(&mw); |
1568 | if (ret == 0) | 1564 | if (ret == 0) |
1569 | goto again; | 1565 | goto again; |
1570 | mlog_errno(ret); | 1566 | mlog_errno(ret); |
1571 | } | 1567 | } |
1572 | ocfs2_update_lock_stats(lockres, level, &mw, ret); | 1568 | ocfs2_update_lock_stats(lockres, level, &mw, ret); |
1573 | 1569 | ||
1574 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | 1570 | #ifdef CONFIG_DEBUG_LOCK_ALLOC |
1575 | if (!ret && lockres->l_lockdep_map.key != NULL) { | 1571 | if (!ret && lockres->l_lockdep_map.key != NULL) { |
1576 | if (level == DLM_LOCK_PR) | 1572 | if (level == DLM_LOCK_PR) |
1577 | rwsem_acquire_read(&lockres->l_lockdep_map, l_subclass, | 1573 | rwsem_acquire_read(&lockres->l_lockdep_map, l_subclass, |
1578 | !!(arg_flags & OCFS2_META_LOCK_NOQUEUE), | 1574 | !!(arg_flags & OCFS2_META_LOCK_NOQUEUE), |
1579 | caller_ip); | 1575 | caller_ip); |
1580 | else | 1576 | else |
1581 | rwsem_acquire(&lockres->l_lockdep_map, l_subclass, | 1577 | rwsem_acquire(&lockres->l_lockdep_map, l_subclass, |
1582 | !!(arg_flags & OCFS2_META_LOCK_NOQUEUE), | 1578 | !!(arg_flags & OCFS2_META_LOCK_NOQUEUE), |
1583 | caller_ip); | 1579 | caller_ip); |
1584 | } | 1580 | } |
1585 | #endif | 1581 | #endif |
1586 | mlog_exit(ret); | 1582 | mlog_exit(ret); |
1587 | return ret; | 1583 | return ret; |
1588 | } | 1584 | } |
1589 | 1585 | ||
1590 | static inline int ocfs2_cluster_lock(struct ocfs2_super *osb, | 1586 | static inline int ocfs2_cluster_lock(struct ocfs2_super *osb, |
1591 | struct ocfs2_lock_res *lockres, | 1587 | struct ocfs2_lock_res *lockres, |
1592 | int level, | 1588 | int level, |
1593 | u32 lkm_flags, | 1589 | u32 lkm_flags, |
1594 | int arg_flags) | 1590 | int arg_flags) |
1595 | { | 1591 | { |
1596 | return __ocfs2_cluster_lock(osb, lockres, level, lkm_flags, arg_flags, | 1592 | return __ocfs2_cluster_lock(osb, lockres, level, lkm_flags, arg_flags, |
1597 | 0, _RET_IP_); | 1593 | 0, _RET_IP_); |
1598 | } | 1594 | } |
1599 | 1595 | ||
1600 | 1596 | ||
1601 | static void __ocfs2_cluster_unlock(struct ocfs2_super *osb, | 1597 | static void __ocfs2_cluster_unlock(struct ocfs2_super *osb, |
1602 | struct ocfs2_lock_res *lockres, | 1598 | struct ocfs2_lock_res *lockres, |
1603 | int level, | 1599 | int level, |
1604 | unsigned long caller_ip) | 1600 | unsigned long caller_ip) |
1605 | { | 1601 | { |
1606 | unsigned long flags; | 1602 | unsigned long flags; |
1607 | 1603 | ||
1608 | mlog_entry_void(); | 1604 | mlog_entry_void(); |
1609 | spin_lock_irqsave(&lockres->l_lock, flags); | 1605 | spin_lock_irqsave(&lockres->l_lock, flags); |
1610 | ocfs2_dec_holders(lockres, level); | 1606 | ocfs2_dec_holders(lockres, level); |
1611 | ocfs2_downconvert_on_unlock(osb, lockres); | 1607 | ocfs2_downconvert_on_unlock(osb, lockres); |
1612 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 1608 | spin_unlock_irqrestore(&lockres->l_lock, flags); |
1613 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | 1609 | #ifdef CONFIG_DEBUG_LOCK_ALLOC |
1614 | if (lockres->l_lockdep_map.key != NULL) | 1610 | if (lockres->l_lockdep_map.key != NULL) |
1615 | rwsem_release(&lockres->l_lockdep_map, 1, caller_ip); | 1611 | rwsem_release(&lockres->l_lockdep_map, 1, caller_ip); |
1616 | #endif | 1612 | #endif |
1617 | mlog_exit_void(); | 1613 | mlog_exit_void(); |
1618 | } | 1614 | } |
1619 | 1615 | ||
1620 | static int ocfs2_create_new_lock(struct ocfs2_super *osb, | 1616 | static int ocfs2_create_new_lock(struct ocfs2_super *osb, |
1621 | struct ocfs2_lock_res *lockres, | 1617 | struct ocfs2_lock_res *lockres, |
1622 | int ex, | 1618 | int ex, |
1623 | int local) | 1619 | int local) |
1624 | { | 1620 | { |
1625 | int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; | 1621 | int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; |
1626 | unsigned long flags; | 1622 | unsigned long flags; |
1627 | u32 lkm_flags = local ? DLM_LKF_LOCAL : 0; | 1623 | u32 lkm_flags = local ? DLM_LKF_LOCAL : 0; |
1628 | 1624 | ||
1629 | spin_lock_irqsave(&lockres->l_lock, flags); | 1625 | spin_lock_irqsave(&lockres->l_lock, flags); |
1630 | BUG_ON(lockres->l_flags & OCFS2_LOCK_ATTACHED); | 1626 | BUG_ON(lockres->l_flags & OCFS2_LOCK_ATTACHED); |
1631 | lockres_or_flags(lockres, OCFS2_LOCK_LOCAL); | 1627 | lockres_or_flags(lockres, OCFS2_LOCK_LOCAL); |
1632 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 1628 | spin_unlock_irqrestore(&lockres->l_lock, flags); |
1633 | 1629 | ||
1634 | return ocfs2_lock_create(osb, lockres, level, lkm_flags); | 1630 | return ocfs2_lock_create(osb, lockres, level, lkm_flags); |
1635 | } | 1631 | } |
1636 | 1632 | ||
1637 | /* Grants us an EX lock on the data and metadata resources, skipping | 1633 | /* Grants us an EX lock on the data and metadata resources, skipping |
1638 | * the normal cluster directory lookup. Use this ONLY on newly created | 1634 | * the normal cluster directory lookup. Use this ONLY on newly created |
1639 | * inodes which other nodes can't possibly see, and which haven't been | 1635 | * inodes which other nodes can't possibly see, and which haven't been |
1640 | * hashed in the inode hash yet. This can give us a good performance | 1636 | * hashed in the inode hash yet. This can give us a good performance |
1641 | * increase as it'll skip the network broadcast normally associated | 1637 | * increase as it'll skip the network broadcast normally associated |
1642 | * with creating a new lock resource. */ | 1638 | * with creating a new lock resource. */ |
1643 | int ocfs2_create_new_inode_locks(struct inode *inode) | 1639 | int ocfs2_create_new_inode_locks(struct inode *inode) |
1644 | { | 1640 | { |
1645 | int ret; | 1641 | int ret; |
1646 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 1642 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); |
1647 | 1643 | ||
1648 | BUG_ON(!inode); | 1644 | BUG_ON(!inode); |
1649 | BUG_ON(!ocfs2_inode_is_new(inode)); | 1645 | BUG_ON(!ocfs2_inode_is_new(inode)); |
1650 | 1646 | ||
1651 | mlog_entry_void(); | 1647 | mlog_entry_void(); |
1652 | 1648 | ||
1653 | mlog(0, "Inode %llu\n", (unsigned long long)OCFS2_I(inode)->ip_blkno); | 1649 | mlog(0, "Inode %llu\n", (unsigned long long)OCFS2_I(inode)->ip_blkno); |
1654 | 1650 | ||
1655 | /* NOTE: That we don't increment any of the holder counts, nor | 1651 | /* NOTE: That we don't increment any of the holder counts, nor |
1656 | * do we add anything to a journal handle. Since this is | 1652 | * do we add anything to a journal handle. Since this is |
1657 | * supposed to be a new inode which the cluster doesn't know | 1653 | * supposed to be a new inode which the cluster doesn't know |
1658 | * about yet, there is no need to. As far as the LVB handling | 1654 | * about yet, there is no need to. As far as the LVB handling |
1659 | * is concerned, this is basically like acquiring an EX lock | 1655 | * is concerned, this is basically like acquiring an EX lock |
1660 | * on a resource which has an invalid one -- we'll set it | 1656 | * on a resource which has an invalid one -- we'll set it |
1661 | * valid when we release the EX. */ | 1657 | * valid when we release the EX. */ |
1662 | 1658 | ||
1663 | ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_rw_lockres, 1, 1); | 1659 | ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_rw_lockres, 1, 1); |
1664 | if (ret) { | 1660 | if (ret) { |
1665 | mlog_errno(ret); | 1661 | mlog_errno(ret); |
1666 | goto bail; | 1662 | goto bail; |
1667 | } | 1663 | } |
1668 | 1664 | ||
1669 | /* | 1665 | /* |
1670 | * We don't want to use DLM_LKF_LOCAL on a meta data lock as they | 1666 | * We don't want to use DLM_LKF_LOCAL on a meta data lock as they |
1671 | * don't use a generation in their lock names. | 1667 | * don't use a generation in their lock names. |
1672 | */ | 1668 | */ |
1673 | ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_inode_lockres, 1, 0); | 1669 | ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_inode_lockres, 1, 0); |
1674 | if (ret) { | 1670 | if (ret) { |
1675 | mlog_errno(ret); | 1671 | mlog_errno(ret); |
1676 | goto bail; | 1672 | goto bail; |
1677 | } | 1673 | } |
1678 | 1674 | ||
1679 | ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_open_lockres, 0, 0); | 1675 | ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_open_lockres, 0, 0); |
1680 | if (ret) { | 1676 | if (ret) { |
1681 | mlog_errno(ret); | 1677 | mlog_errno(ret); |
1682 | goto bail; | 1678 | goto bail; |
1683 | } | 1679 | } |
1684 | 1680 | ||
1685 | bail: | 1681 | bail: |
1686 | mlog_exit(ret); | 1682 | mlog_exit(ret); |
1687 | return ret; | 1683 | return ret; |
1688 | } | 1684 | } |
1689 | 1685 | ||
1690 | int ocfs2_rw_lock(struct inode *inode, int write) | 1686 | int ocfs2_rw_lock(struct inode *inode, int write) |
1691 | { | 1687 | { |
1692 | int status, level; | 1688 | int status, level; |
1693 | struct ocfs2_lock_res *lockres; | 1689 | struct ocfs2_lock_res *lockres; |
1694 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 1690 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); |
1695 | 1691 | ||
1696 | BUG_ON(!inode); | 1692 | BUG_ON(!inode); |
1697 | 1693 | ||
1698 | mlog_entry_void(); | 1694 | mlog_entry_void(); |
1699 | 1695 | ||
1700 | mlog(0, "inode %llu take %s RW lock\n", | 1696 | mlog(0, "inode %llu take %s RW lock\n", |
1701 | (unsigned long long)OCFS2_I(inode)->ip_blkno, | 1697 | (unsigned long long)OCFS2_I(inode)->ip_blkno, |
1702 | write ? "EXMODE" : "PRMODE"); | 1698 | write ? "EXMODE" : "PRMODE"); |
1703 | 1699 | ||
1704 | if (ocfs2_mount_local(osb)) { | 1700 | if (ocfs2_mount_local(osb)) { |
1705 | mlog_exit(0); | 1701 | mlog_exit(0); |
1706 | return 0; | 1702 | return 0; |
1707 | } | 1703 | } |
1708 | 1704 | ||
1709 | lockres = &OCFS2_I(inode)->ip_rw_lockres; | 1705 | lockres = &OCFS2_I(inode)->ip_rw_lockres; |
1710 | 1706 | ||
1711 | level = write ? DLM_LOCK_EX : DLM_LOCK_PR; | 1707 | level = write ? DLM_LOCK_EX : DLM_LOCK_PR; |
1712 | 1708 | ||
1713 | status = ocfs2_cluster_lock(OCFS2_SB(inode->i_sb), lockres, level, 0, | 1709 | status = ocfs2_cluster_lock(OCFS2_SB(inode->i_sb), lockres, level, 0, |
1714 | 0); | 1710 | 0); |
1715 | if (status < 0) | 1711 | if (status < 0) |
1716 | mlog_errno(status); | 1712 | mlog_errno(status); |
1717 | 1713 | ||
1718 | mlog_exit(status); | 1714 | mlog_exit(status); |
1719 | return status; | 1715 | return status; |
1720 | } | 1716 | } |
1721 | 1717 | ||
1722 | void ocfs2_rw_unlock(struct inode *inode, int write) | 1718 | void ocfs2_rw_unlock(struct inode *inode, int write) |
1723 | { | 1719 | { |
1724 | int level = write ? DLM_LOCK_EX : DLM_LOCK_PR; | 1720 | int level = write ? DLM_LOCK_EX : DLM_LOCK_PR; |
1725 | struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_rw_lockres; | 1721 | struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_rw_lockres; |
1726 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 1722 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); |
1727 | 1723 | ||
1728 | mlog_entry_void(); | 1724 | mlog_entry_void(); |
1729 | 1725 | ||
1730 | mlog(0, "inode %llu drop %s RW lock\n", | 1726 | mlog(0, "inode %llu drop %s RW lock\n", |
1731 | (unsigned long long)OCFS2_I(inode)->ip_blkno, | 1727 | (unsigned long long)OCFS2_I(inode)->ip_blkno, |
1732 | write ? "EXMODE" : "PRMODE"); | 1728 | write ? "EXMODE" : "PRMODE"); |
1733 | 1729 | ||
1734 | if (!ocfs2_mount_local(osb)) | 1730 | if (!ocfs2_mount_local(osb)) |
1735 | ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres, level); | 1731 | ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres, level); |
1736 | 1732 | ||
1737 | mlog_exit_void(); | 1733 | mlog_exit_void(); |
1738 | } | 1734 | } |
1739 | 1735 | ||
1740 | /* | 1736 | /* |
1741 | * ocfs2_open_lock always get PR mode lock. | 1737 | * ocfs2_open_lock always get PR mode lock. |
1742 | */ | 1738 | */ |
1743 | int ocfs2_open_lock(struct inode *inode) | 1739 | int ocfs2_open_lock(struct inode *inode) |
1744 | { | 1740 | { |
1745 | int status = 0; | 1741 | int status = 0; |
1746 | struct ocfs2_lock_res *lockres; | 1742 | struct ocfs2_lock_res *lockres; |
1747 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 1743 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); |
1748 | 1744 | ||
1749 | BUG_ON(!inode); | 1745 | BUG_ON(!inode); |
1750 | 1746 | ||
1751 | mlog_entry_void(); | 1747 | mlog_entry_void(); |
1752 | 1748 | ||
1753 | mlog(0, "inode %llu take PRMODE open lock\n", | 1749 | mlog(0, "inode %llu take PRMODE open lock\n", |
1754 | (unsigned long long)OCFS2_I(inode)->ip_blkno); | 1750 | (unsigned long long)OCFS2_I(inode)->ip_blkno); |
1755 | 1751 | ||
1756 | if (ocfs2_mount_local(osb)) | 1752 | if (ocfs2_mount_local(osb)) |
1757 | goto out; | 1753 | goto out; |
1758 | 1754 | ||
1759 | lockres = &OCFS2_I(inode)->ip_open_lockres; | 1755 | lockres = &OCFS2_I(inode)->ip_open_lockres; |
1760 | 1756 | ||
1761 | status = ocfs2_cluster_lock(OCFS2_SB(inode->i_sb), lockres, | 1757 | status = ocfs2_cluster_lock(OCFS2_SB(inode->i_sb), lockres, |
1762 | DLM_LOCK_PR, 0, 0); | 1758 | DLM_LOCK_PR, 0, 0); |
1763 | if (status < 0) | 1759 | if (status < 0) |
1764 | mlog_errno(status); | 1760 | mlog_errno(status); |
1765 | 1761 | ||
1766 | out: | 1762 | out: |
1767 | mlog_exit(status); | 1763 | mlog_exit(status); |
1768 | return status; | 1764 | return status; |
1769 | } | 1765 | } |
1770 | 1766 | ||
1771 | int ocfs2_try_open_lock(struct inode *inode, int write) | 1767 | int ocfs2_try_open_lock(struct inode *inode, int write) |
1772 | { | 1768 | { |
1773 | int status = 0, level; | 1769 | int status = 0, level; |
1774 | struct ocfs2_lock_res *lockres; | 1770 | struct ocfs2_lock_res *lockres; |
1775 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 1771 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); |
1776 | 1772 | ||
1777 | BUG_ON(!inode); | 1773 | BUG_ON(!inode); |
1778 | 1774 | ||
1779 | mlog_entry_void(); | 1775 | mlog_entry_void(); |
1780 | 1776 | ||
1781 | mlog(0, "inode %llu try to take %s open lock\n", | 1777 | mlog(0, "inode %llu try to take %s open lock\n", |
1782 | (unsigned long long)OCFS2_I(inode)->ip_blkno, | 1778 | (unsigned long long)OCFS2_I(inode)->ip_blkno, |
1783 | write ? "EXMODE" : "PRMODE"); | 1779 | write ? "EXMODE" : "PRMODE"); |
1784 | 1780 | ||
1785 | if (ocfs2_mount_local(osb)) | 1781 | if (ocfs2_mount_local(osb)) |
1786 | goto out; | 1782 | goto out; |
1787 | 1783 | ||
1788 | lockres = &OCFS2_I(inode)->ip_open_lockres; | 1784 | lockres = &OCFS2_I(inode)->ip_open_lockres; |
1789 | 1785 | ||
1790 | level = write ? DLM_LOCK_EX : DLM_LOCK_PR; | 1786 | level = write ? DLM_LOCK_EX : DLM_LOCK_PR; |
1791 | 1787 | ||
1792 | /* | 1788 | /* |
1793 | * The file system may already holding a PRMODE/EXMODE open lock. | 1789 | * The file system may already holding a PRMODE/EXMODE open lock. |
1794 | * Since we pass DLM_LKF_NOQUEUE, the request won't block waiting on | 1790 | * Since we pass DLM_LKF_NOQUEUE, the request won't block waiting on |
1795 | * other nodes and the -EAGAIN will indicate to the caller that | 1791 | * other nodes and the -EAGAIN will indicate to the caller that |
1796 | * this inode is still in use. | 1792 | * this inode is still in use. |
1797 | */ | 1793 | */ |
1798 | status = ocfs2_cluster_lock(OCFS2_SB(inode->i_sb), lockres, | 1794 | status = ocfs2_cluster_lock(OCFS2_SB(inode->i_sb), lockres, |
1799 | level, DLM_LKF_NOQUEUE, 0); | 1795 | level, DLM_LKF_NOQUEUE, 0); |
1800 | 1796 | ||
1801 | out: | 1797 | out: |
1802 | mlog_exit(status); | 1798 | mlog_exit(status); |
1803 | return status; | 1799 | return status; |
1804 | } | 1800 | } |
1805 | 1801 | ||
1806 | /* | 1802 | /* |
1807 | * ocfs2_open_unlock unlock PR and EX mode open locks. | 1803 | * ocfs2_open_unlock unlock PR and EX mode open locks. |
1808 | */ | 1804 | */ |
1809 | void ocfs2_open_unlock(struct inode *inode) | 1805 | void ocfs2_open_unlock(struct inode *inode) |
1810 | { | 1806 | { |
1811 | struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_open_lockres; | 1807 | struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_open_lockres; |
1812 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 1808 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); |
1813 | 1809 | ||
1814 | mlog_entry_void(); | 1810 | mlog_entry_void(); |
1815 | 1811 | ||
1816 | mlog(0, "inode %llu drop open lock\n", | 1812 | mlog(0, "inode %llu drop open lock\n", |
1817 | (unsigned long long)OCFS2_I(inode)->ip_blkno); | 1813 | (unsigned long long)OCFS2_I(inode)->ip_blkno); |
1818 | 1814 | ||
1819 | if (ocfs2_mount_local(osb)) | 1815 | if (ocfs2_mount_local(osb)) |
1820 | goto out; | 1816 | goto out; |
1821 | 1817 | ||
1822 | if(lockres->l_ro_holders) | 1818 | if(lockres->l_ro_holders) |
1823 | ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres, | 1819 | ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres, |
1824 | DLM_LOCK_PR); | 1820 | DLM_LOCK_PR); |
1825 | if(lockres->l_ex_holders) | 1821 | if(lockres->l_ex_holders) |
1826 | ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres, | 1822 | ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres, |
1827 | DLM_LOCK_EX); | 1823 | DLM_LOCK_EX); |
1828 | 1824 | ||
1829 | out: | 1825 | out: |
1830 | mlog_exit_void(); | 1826 | mlog_exit_void(); |
1831 | } | 1827 | } |
1832 | 1828 | ||
1833 | static int ocfs2_flock_handle_signal(struct ocfs2_lock_res *lockres, | 1829 | static int ocfs2_flock_handle_signal(struct ocfs2_lock_res *lockres, |
1834 | int level) | 1830 | int level) |
1835 | { | 1831 | { |
1836 | int ret; | 1832 | int ret; |
1837 | struct ocfs2_super *osb = ocfs2_get_lockres_osb(lockres); | 1833 | struct ocfs2_super *osb = ocfs2_get_lockres_osb(lockres); |
1838 | unsigned long flags; | 1834 | unsigned long flags; |
1839 | struct ocfs2_mask_waiter mw; | 1835 | struct ocfs2_mask_waiter mw; |
1840 | 1836 | ||
1841 | ocfs2_init_mask_waiter(&mw); | 1837 | ocfs2_init_mask_waiter(&mw); |
1842 | 1838 | ||
1843 | retry_cancel: | 1839 | retry_cancel: |
1844 | spin_lock_irqsave(&lockres->l_lock, flags); | 1840 | spin_lock_irqsave(&lockres->l_lock, flags); |
1845 | if (lockres->l_flags & OCFS2_LOCK_BUSY) { | 1841 | if (lockres->l_flags & OCFS2_LOCK_BUSY) { |
1846 | ret = ocfs2_prepare_cancel_convert(osb, lockres); | 1842 | ret = ocfs2_prepare_cancel_convert(osb, lockres); |
1847 | if (ret) { | 1843 | if (ret) { |
1848 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 1844 | spin_unlock_irqrestore(&lockres->l_lock, flags); |
1849 | ret = ocfs2_cancel_convert(osb, lockres); | 1845 | ret = ocfs2_cancel_convert(osb, lockres); |
1850 | if (ret < 0) { | 1846 | if (ret < 0) { |
1851 | mlog_errno(ret); | 1847 | mlog_errno(ret); |
1852 | goto out; | 1848 | goto out; |
1853 | } | 1849 | } |
1854 | goto retry_cancel; | 1850 | goto retry_cancel; |
1855 | } | 1851 | } |
1856 | lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0); | 1852 | lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0); |
1857 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 1853 | spin_unlock_irqrestore(&lockres->l_lock, flags); |
1858 | 1854 | ||
1859 | ocfs2_wait_for_mask(&mw); | 1855 | ocfs2_wait_for_mask(&mw); |
1860 | goto retry_cancel; | 1856 | goto retry_cancel; |
1861 | } | 1857 | } |
1862 | 1858 | ||
1863 | ret = -ERESTARTSYS; | 1859 | ret = -ERESTARTSYS; |
1864 | /* | 1860 | /* |
1865 | * We may still have gotten the lock, in which case there's no | 1861 | * We may still have gotten the lock, in which case there's no |
1866 | * point to restarting the syscall. | 1862 | * point to restarting the syscall. |
1867 | */ | 1863 | */ |
1868 | if (lockres->l_level == level) | 1864 | if (lockres->l_level == level) |
1869 | ret = 0; | 1865 | ret = 0; |
1870 | 1866 | ||
1871 | mlog(0, "Cancel returning %d. flags: 0x%lx, level: %d, act: %d\n", ret, | 1867 | mlog(0, "Cancel returning %d. flags: 0x%lx, level: %d, act: %d\n", ret, |
1872 | lockres->l_flags, lockres->l_level, lockres->l_action); | 1868 | lockres->l_flags, lockres->l_level, lockres->l_action); |
1873 | 1869 | ||
1874 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 1870 | spin_unlock_irqrestore(&lockres->l_lock, flags); |
1875 | 1871 | ||
1876 | out: | 1872 | out: |
1877 | return ret; | 1873 | return ret; |
1878 | } | 1874 | } |
1879 | 1875 | ||
1880 | /* | 1876 | /* |
1881 | * ocfs2_file_lock() and ocfs2_file_unlock() map to a single pair of | 1877 | * ocfs2_file_lock() and ocfs2_file_unlock() map to a single pair of |
1882 | * flock() calls. The locking approach this requires is sufficiently | 1878 | * flock() calls. The locking approach this requires is sufficiently |
1883 | * different from all other cluster lock types that we implement a | 1879 | * different from all other cluster lock types that we implement a |
1884 | * separate path to the "low-level" dlm calls. In particular: | 1880 | * separate path to the "low-level" dlm calls. In particular: |
1885 | * | 1881 | * |
1886 | * - No optimization of lock levels is done - we take at exactly | 1882 | * - No optimization of lock levels is done - we take at exactly |
1887 | * what's been requested. | 1883 | * what's been requested. |
1888 | * | 1884 | * |
1889 | * - No lock caching is employed. We immediately downconvert to | 1885 | * - No lock caching is employed. We immediately downconvert to |
1890 | * no-lock at unlock time. This also means flock locks never go on | 1886 | * no-lock at unlock time. This also means flock locks never go on |
1891 | * the blocking list). | 1887 | * the blocking list). |
1892 | * | 1888 | * |
1893 | * - Since userspace can trivially deadlock itself with flock, we make | 1889 | * - Since userspace can trivially deadlock itself with flock, we make |
1894 | * sure to allow cancellation of a misbehaving applications flock() | 1890 | * sure to allow cancellation of a misbehaving applications flock() |
1895 | * request. | 1891 | * request. |
1896 | * | 1892 | * |
1897 | * - Access to any flock lockres doesn't require concurrency, so we | 1893 | * - Access to any flock lockres doesn't require concurrency, so we |
1898 | * can simplify the code by requiring the caller to guarantee | 1894 | * can simplify the code by requiring the caller to guarantee |
1899 | * serialization of dlmglue flock calls. | 1895 | * serialization of dlmglue flock calls. |
1900 | */ | 1896 | */ |
1901 | int ocfs2_file_lock(struct file *file, int ex, int trylock) | 1897 | int ocfs2_file_lock(struct file *file, int ex, int trylock) |
1902 | { | 1898 | { |
1903 | int ret, level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; | 1899 | int ret, level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; |
1904 | unsigned int lkm_flags = trylock ? DLM_LKF_NOQUEUE : 0; | 1900 | unsigned int lkm_flags = trylock ? DLM_LKF_NOQUEUE : 0; |
1905 | unsigned long flags; | 1901 | unsigned long flags; |
1906 | struct ocfs2_file_private *fp = file->private_data; | 1902 | struct ocfs2_file_private *fp = file->private_data; |
1907 | struct ocfs2_lock_res *lockres = &fp->fp_flock; | 1903 | struct ocfs2_lock_res *lockres = &fp->fp_flock; |
1908 | struct ocfs2_super *osb = OCFS2_SB(file->f_mapping->host->i_sb); | 1904 | struct ocfs2_super *osb = OCFS2_SB(file->f_mapping->host->i_sb); |
1909 | struct ocfs2_mask_waiter mw; | 1905 | struct ocfs2_mask_waiter mw; |
1910 | 1906 | ||
1911 | ocfs2_init_mask_waiter(&mw); | 1907 | ocfs2_init_mask_waiter(&mw); |
1912 | 1908 | ||
1913 | if ((lockres->l_flags & OCFS2_LOCK_BUSY) || | 1909 | if ((lockres->l_flags & OCFS2_LOCK_BUSY) || |
1914 | (lockres->l_level > DLM_LOCK_NL)) { | 1910 | (lockres->l_level > DLM_LOCK_NL)) { |
1915 | mlog(ML_ERROR, | 1911 | mlog(ML_ERROR, |
1916 | "File lock \"%s\" has busy or locked state: flags: 0x%lx, " | 1912 | "File lock \"%s\" has busy or locked state: flags: 0x%lx, " |
1917 | "level: %u\n", lockres->l_name, lockres->l_flags, | 1913 | "level: %u\n", lockres->l_name, lockres->l_flags, |
1918 | lockres->l_level); | 1914 | lockres->l_level); |
1919 | return -EINVAL; | 1915 | return -EINVAL; |
1920 | } | 1916 | } |
1921 | 1917 | ||
1922 | spin_lock_irqsave(&lockres->l_lock, flags); | 1918 | spin_lock_irqsave(&lockres->l_lock, flags); |
1923 | if (!(lockres->l_flags & OCFS2_LOCK_ATTACHED)) { | 1919 | if (!(lockres->l_flags & OCFS2_LOCK_ATTACHED)) { |
1924 | lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0); | 1920 | lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0); |
1925 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 1921 | spin_unlock_irqrestore(&lockres->l_lock, flags); |
1926 | 1922 | ||
1927 | /* | 1923 | /* |
1928 | * Get the lock at NLMODE to start - that way we | 1924 | * Get the lock at NLMODE to start - that way we |
1929 | * can cancel the upconvert request if need be. | 1925 | * can cancel the upconvert request if need be. |
1930 | */ | 1926 | */ |
1931 | ret = ocfs2_lock_create(osb, lockres, DLM_LOCK_NL, 0); | 1927 | ret = ocfs2_lock_create(osb, lockres, DLM_LOCK_NL, 0); |
1932 | if (ret < 0) { | 1928 | if (ret < 0) { |
1933 | mlog_errno(ret); | 1929 | mlog_errno(ret); |
1934 | goto out; | 1930 | goto out; |
1935 | } | 1931 | } |
1936 | 1932 | ||
1937 | ret = ocfs2_wait_for_mask(&mw); | 1933 | ret = ocfs2_wait_for_mask(&mw); |
1938 | if (ret) { | 1934 | if (ret) { |
1939 | mlog_errno(ret); | 1935 | mlog_errno(ret); |
1940 | goto out; | 1936 | goto out; |
1941 | } | 1937 | } |
1942 | spin_lock_irqsave(&lockres->l_lock, flags); | 1938 | spin_lock_irqsave(&lockres->l_lock, flags); |
1943 | } | 1939 | } |
1944 | 1940 | ||
1945 | lockres->l_action = OCFS2_AST_CONVERT; | 1941 | lockres->l_action = OCFS2_AST_CONVERT; |
1946 | lkm_flags |= DLM_LKF_CONVERT; | 1942 | lkm_flags |= DLM_LKF_CONVERT; |
1947 | lockres->l_requested = level; | 1943 | lockres->l_requested = level; |
1948 | lockres_or_flags(lockres, OCFS2_LOCK_BUSY); | 1944 | lockres_or_flags(lockres, OCFS2_LOCK_BUSY); |
1949 | 1945 | ||
1950 | lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0); | 1946 | lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0); |
1951 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 1947 | spin_unlock_irqrestore(&lockres->l_lock, flags); |
1952 | 1948 | ||
1953 | ret = ocfs2_dlm_lock(osb->cconn, level, &lockres->l_lksb, lkm_flags, | 1949 | ret = ocfs2_dlm_lock(osb->cconn, level, &lockres->l_lksb, lkm_flags, |
1954 | lockres->l_name, OCFS2_LOCK_ID_MAX_LEN - 1); | 1950 | lockres->l_name, OCFS2_LOCK_ID_MAX_LEN - 1); |
1955 | if (ret) { | 1951 | if (ret) { |
1956 | if (!trylock || (ret != -EAGAIN)) { | 1952 | if (!trylock || (ret != -EAGAIN)) { |
1957 | ocfs2_log_dlm_error("ocfs2_dlm_lock", ret, lockres); | 1953 | ocfs2_log_dlm_error("ocfs2_dlm_lock", ret, lockres); |
1958 | ret = -EINVAL; | 1954 | ret = -EINVAL; |
1959 | } | 1955 | } |
1960 | 1956 | ||
1961 | ocfs2_recover_from_dlm_error(lockres, 1); | 1957 | ocfs2_recover_from_dlm_error(lockres, 1); |
1962 | lockres_remove_mask_waiter(lockres, &mw); | 1958 | lockres_remove_mask_waiter(lockres, &mw); |
1963 | goto out; | 1959 | goto out; |
1964 | } | 1960 | } |
1965 | 1961 | ||
1966 | ret = ocfs2_wait_for_mask_interruptible(&mw, lockres); | 1962 | ret = ocfs2_wait_for_mask_interruptible(&mw, lockres); |
1967 | if (ret == -ERESTARTSYS) { | 1963 | if (ret == -ERESTARTSYS) { |
1968 | /* | 1964 | /* |
1969 | * Userspace can cause deadlock itself with | 1965 | * Userspace can cause deadlock itself with |
1970 | * flock(). Current behavior locally is to allow the | 1966 | * flock(). Current behavior locally is to allow the |
1971 | * deadlock, but abort the system call if a signal is | 1967 | * deadlock, but abort the system call if a signal is |
1972 | * received. We follow this example, otherwise a | 1968 | * received. We follow this example, otherwise a |
1973 | * poorly written program could sit in kernel until | 1969 | * poorly written program could sit in kernel until |
1974 | * reboot. | 1970 | * reboot. |
1975 | * | 1971 | * |
1976 | * Handling this is a bit more complicated for Ocfs2 | 1972 | * Handling this is a bit more complicated for Ocfs2 |
1977 | * though. We can't exit this function with an | 1973 | * though. We can't exit this function with an |
1978 | * outstanding lock request, so a cancel convert is | 1974 | * outstanding lock request, so a cancel convert is |
1979 | * required. We intentionally overwrite 'ret' - if the | 1975 | * required. We intentionally overwrite 'ret' - if the |
1980 | * cancel fails and the lock was granted, it's easier | 1976 | * cancel fails and the lock was granted, it's easier |
1981 | * to just bubble success back up to the user. | 1977 | * to just bubble success back up to the user. |
1982 | */ | 1978 | */ |
1983 | ret = ocfs2_flock_handle_signal(lockres, level); | 1979 | ret = ocfs2_flock_handle_signal(lockres, level); |
1984 | } else if (!ret && (level > lockres->l_level)) { | 1980 | } else if (!ret && (level > lockres->l_level)) { |
1985 | /* Trylock failed asynchronously */ | 1981 | /* Trylock failed asynchronously */ |
1986 | BUG_ON(!trylock); | 1982 | BUG_ON(!trylock); |
1987 | ret = -EAGAIN; | 1983 | ret = -EAGAIN; |
1988 | } | 1984 | } |
1989 | 1985 | ||
1990 | out: | 1986 | out: |
1991 | 1987 | ||
1992 | mlog(0, "Lock: \"%s\" ex: %d, trylock: %d, returns: %d\n", | 1988 | mlog(0, "Lock: \"%s\" ex: %d, trylock: %d, returns: %d\n", |
1993 | lockres->l_name, ex, trylock, ret); | 1989 | lockres->l_name, ex, trylock, ret); |
1994 | return ret; | 1990 | return ret; |
1995 | } | 1991 | } |
1996 | 1992 | ||
1997 | void ocfs2_file_unlock(struct file *file) | 1993 | void ocfs2_file_unlock(struct file *file) |
1998 | { | 1994 | { |
1999 | int ret; | 1995 | int ret; |
2000 | unsigned int gen; | 1996 | unsigned int gen; |
2001 | unsigned long flags; | 1997 | unsigned long flags; |
2002 | struct ocfs2_file_private *fp = file->private_data; | 1998 | struct ocfs2_file_private *fp = file->private_data; |
2003 | struct ocfs2_lock_res *lockres = &fp->fp_flock; | 1999 | struct ocfs2_lock_res *lockres = &fp->fp_flock; |
2004 | struct ocfs2_super *osb = OCFS2_SB(file->f_mapping->host->i_sb); | 2000 | struct ocfs2_super *osb = OCFS2_SB(file->f_mapping->host->i_sb); |
2005 | struct ocfs2_mask_waiter mw; | 2001 | struct ocfs2_mask_waiter mw; |
2006 | 2002 | ||
2007 | ocfs2_init_mask_waiter(&mw); | 2003 | ocfs2_init_mask_waiter(&mw); |
2008 | 2004 | ||
2009 | if (!(lockres->l_flags & OCFS2_LOCK_ATTACHED)) | 2005 | if (!(lockres->l_flags & OCFS2_LOCK_ATTACHED)) |
2010 | return; | 2006 | return; |
2011 | 2007 | ||
2012 | if (lockres->l_level == DLM_LOCK_NL) | 2008 | if (lockres->l_level == DLM_LOCK_NL) |
2013 | return; | 2009 | return; |
2014 | 2010 | ||
2015 | mlog(0, "Unlock: \"%s\" flags: 0x%lx, level: %d, act: %d\n", | 2011 | mlog(0, "Unlock: \"%s\" flags: 0x%lx, level: %d, act: %d\n", |
2016 | lockres->l_name, lockres->l_flags, lockres->l_level, | 2012 | lockres->l_name, lockres->l_flags, lockres->l_level, |
2017 | lockres->l_action); | 2013 | lockres->l_action); |
2018 | 2014 | ||
2019 | spin_lock_irqsave(&lockres->l_lock, flags); | 2015 | spin_lock_irqsave(&lockres->l_lock, flags); |
2020 | /* | 2016 | /* |
2021 | * Fake a blocking ast for the downconvert code. | 2017 | * Fake a blocking ast for the downconvert code. |
2022 | */ | 2018 | */ |
2023 | lockres_or_flags(lockres, OCFS2_LOCK_BLOCKED); | 2019 | lockres_or_flags(lockres, OCFS2_LOCK_BLOCKED); |
2024 | lockres->l_blocking = DLM_LOCK_EX; | 2020 | lockres->l_blocking = DLM_LOCK_EX; |
2025 | 2021 | ||
2026 | gen = ocfs2_prepare_downconvert(lockres, DLM_LOCK_NL); | 2022 | gen = ocfs2_prepare_downconvert(lockres, DLM_LOCK_NL); |
2027 | lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0); | 2023 | lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0); |
2028 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 2024 | spin_unlock_irqrestore(&lockres->l_lock, flags); |
2029 | 2025 | ||
2030 | ret = ocfs2_downconvert_lock(osb, lockres, DLM_LOCK_NL, 0, gen); | 2026 | ret = ocfs2_downconvert_lock(osb, lockres, DLM_LOCK_NL, 0, gen); |
2031 | if (ret) { | 2027 | if (ret) { |
2032 | mlog_errno(ret); | 2028 | mlog_errno(ret); |
2033 | return; | 2029 | return; |
2034 | } | 2030 | } |
2035 | 2031 | ||
2036 | ret = ocfs2_wait_for_mask(&mw); | 2032 | ret = ocfs2_wait_for_mask(&mw); |
2037 | if (ret) | 2033 | if (ret) |
2038 | mlog_errno(ret); | 2034 | mlog_errno(ret); |
2039 | } | 2035 | } |
2040 | 2036 | ||
2041 | static void ocfs2_downconvert_on_unlock(struct ocfs2_super *osb, | 2037 | static void ocfs2_downconvert_on_unlock(struct ocfs2_super *osb, |
2042 | struct ocfs2_lock_res *lockres) | 2038 | struct ocfs2_lock_res *lockres) |
2043 | { | 2039 | { |
2044 | int kick = 0; | 2040 | int kick = 0; |
2045 | 2041 | ||
2046 | mlog_entry_void(); | 2042 | mlog_entry_void(); |
2047 | 2043 | ||
2048 | /* If we know that another node is waiting on our lock, kick | 2044 | /* If we know that another node is waiting on our lock, kick |
2049 | * the downconvert thread * pre-emptively when we reach a release | 2045 | * the downconvert thread * pre-emptively when we reach a release |
2050 | * condition. */ | 2046 | * condition. */ |
2051 | if (lockres->l_flags & OCFS2_LOCK_BLOCKED) { | 2047 | if (lockres->l_flags & OCFS2_LOCK_BLOCKED) { |
2052 | switch(lockres->l_blocking) { | 2048 | switch(lockres->l_blocking) { |
2053 | case DLM_LOCK_EX: | 2049 | case DLM_LOCK_EX: |
2054 | if (!lockres->l_ex_holders && !lockres->l_ro_holders) | 2050 | if (!lockres->l_ex_holders && !lockres->l_ro_holders) |
2055 | kick = 1; | 2051 | kick = 1; |
2056 | break; | 2052 | break; |
2057 | case DLM_LOCK_PR: | 2053 | case DLM_LOCK_PR: |
2058 | if (!lockres->l_ex_holders) | 2054 | if (!lockres->l_ex_holders) |
2059 | kick = 1; | 2055 | kick = 1; |
2060 | break; | 2056 | break; |
2061 | default: | 2057 | default: |
2062 | BUG(); | 2058 | BUG(); |
2063 | } | 2059 | } |
2064 | } | 2060 | } |
2065 | 2061 | ||
2066 | if (kick) | 2062 | if (kick) |
2067 | ocfs2_wake_downconvert_thread(osb); | 2063 | ocfs2_wake_downconvert_thread(osb); |
2068 | 2064 | ||
2069 | mlog_exit_void(); | 2065 | mlog_exit_void(); |
2070 | } | 2066 | } |
2071 | 2067 | ||
2072 | #define OCFS2_SEC_BITS 34 | 2068 | #define OCFS2_SEC_BITS 34 |
2073 | #define OCFS2_SEC_SHIFT (64 - 34) | 2069 | #define OCFS2_SEC_SHIFT (64 - 34) |
2074 | #define OCFS2_NSEC_MASK ((1ULL << OCFS2_SEC_SHIFT) - 1) | 2070 | #define OCFS2_NSEC_MASK ((1ULL << OCFS2_SEC_SHIFT) - 1) |
2075 | 2071 | ||
2076 | /* LVB only has room for 64 bits of time here so we pack it for | 2072 | /* LVB only has room for 64 bits of time here so we pack it for |
2077 | * now. */ | 2073 | * now. */ |
2078 | static u64 ocfs2_pack_timespec(struct timespec *spec) | 2074 | static u64 ocfs2_pack_timespec(struct timespec *spec) |
2079 | { | 2075 | { |
2080 | u64 res; | 2076 | u64 res; |
2081 | u64 sec = spec->tv_sec; | 2077 | u64 sec = spec->tv_sec; |
2082 | u32 nsec = spec->tv_nsec; | 2078 | u32 nsec = spec->tv_nsec; |
2083 | 2079 | ||
2084 | res = (sec << OCFS2_SEC_SHIFT) | (nsec & OCFS2_NSEC_MASK); | 2080 | res = (sec << OCFS2_SEC_SHIFT) | (nsec & OCFS2_NSEC_MASK); |
2085 | 2081 | ||
2086 | return res; | 2082 | return res; |
2087 | } | 2083 | } |
2088 | 2084 | ||
2089 | /* Call this with the lockres locked. I am reasonably sure we don't | 2085 | /* Call this with the lockres locked. I am reasonably sure we don't |
2090 | * need ip_lock in this function as anyone who would be changing those | 2086 | * need ip_lock in this function as anyone who would be changing those |
2091 | * values is supposed to be blocked in ocfs2_inode_lock right now. */ | 2087 | * values is supposed to be blocked in ocfs2_inode_lock right now. */ |
2092 | static void __ocfs2_stuff_meta_lvb(struct inode *inode) | 2088 | static void __ocfs2_stuff_meta_lvb(struct inode *inode) |
2093 | { | 2089 | { |
2094 | struct ocfs2_inode_info *oi = OCFS2_I(inode); | 2090 | struct ocfs2_inode_info *oi = OCFS2_I(inode); |
2095 | struct ocfs2_lock_res *lockres = &oi->ip_inode_lockres; | 2091 | struct ocfs2_lock_res *lockres = &oi->ip_inode_lockres; |
2096 | struct ocfs2_meta_lvb *lvb; | 2092 | struct ocfs2_meta_lvb *lvb; |
2097 | 2093 | ||
2098 | mlog_entry_void(); | 2094 | mlog_entry_void(); |
2099 | 2095 | ||
2100 | lvb = ocfs2_dlm_lvb(&lockres->l_lksb); | 2096 | lvb = ocfs2_dlm_lvb(&lockres->l_lksb); |
2101 | 2097 | ||
2102 | /* | 2098 | /* |
2103 | * Invalidate the LVB of a deleted inode - this way other | 2099 | * Invalidate the LVB of a deleted inode - this way other |
2104 | * nodes are forced to go to disk and discover the new inode | 2100 | * nodes are forced to go to disk and discover the new inode |
2105 | * status. | 2101 | * status. |
2106 | */ | 2102 | */ |
2107 | if (oi->ip_flags & OCFS2_INODE_DELETED) { | 2103 | if (oi->ip_flags & OCFS2_INODE_DELETED) { |
2108 | lvb->lvb_version = 0; | 2104 | lvb->lvb_version = 0; |
2109 | goto out; | 2105 | goto out; |
2110 | } | 2106 | } |
2111 | 2107 | ||
2112 | lvb->lvb_version = OCFS2_LVB_VERSION; | 2108 | lvb->lvb_version = OCFS2_LVB_VERSION; |
2113 | lvb->lvb_isize = cpu_to_be64(i_size_read(inode)); | 2109 | lvb->lvb_isize = cpu_to_be64(i_size_read(inode)); |
2114 | lvb->lvb_iclusters = cpu_to_be32(oi->ip_clusters); | 2110 | lvb->lvb_iclusters = cpu_to_be32(oi->ip_clusters); |
2115 | lvb->lvb_iuid = cpu_to_be32(inode->i_uid); | 2111 | lvb->lvb_iuid = cpu_to_be32(inode->i_uid); |
2116 | lvb->lvb_igid = cpu_to_be32(inode->i_gid); | 2112 | lvb->lvb_igid = cpu_to_be32(inode->i_gid); |
2117 | lvb->lvb_imode = cpu_to_be16(inode->i_mode); | 2113 | lvb->lvb_imode = cpu_to_be16(inode->i_mode); |
2118 | lvb->lvb_inlink = cpu_to_be16(inode->i_nlink); | 2114 | lvb->lvb_inlink = cpu_to_be16(inode->i_nlink); |
2119 | lvb->lvb_iatime_packed = | 2115 | lvb->lvb_iatime_packed = |
2120 | cpu_to_be64(ocfs2_pack_timespec(&inode->i_atime)); | 2116 | cpu_to_be64(ocfs2_pack_timespec(&inode->i_atime)); |
2121 | lvb->lvb_ictime_packed = | 2117 | lvb->lvb_ictime_packed = |
2122 | cpu_to_be64(ocfs2_pack_timespec(&inode->i_ctime)); | 2118 | cpu_to_be64(ocfs2_pack_timespec(&inode->i_ctime)); |
2123 | lvb->lvb_imtime_packed = | 2119 | lvb->lvb_imtime_packed = |
2124 | cpu_to_be64(ocfs2_pack_timespec(&inode->i_mtime)); | 2120 | cpu_to_be64(ocfs2_pack_timespec(&inode->i_mtime)); |
2125 | lvb->lvb_iattr = cpu_to_be32(oi->ip_attr); | 2121 | lvb->lvb_iattr = cpu_to_be32(oi->ip_attr); |
2126 | lvb->lvb_idynfeatures = cpu_to_be16(oi->ip_dyn_features); | 2122 | lvb->lvb_idynfeatures = cpu_to_be16(oi->ip_dyn_features); |
2127 | lvb->lvb_igeneration = cpu_to_be32(inode->i_generation); | 2123 | lvb->lvb_igeneration = cpu_to_be32(inode->i_generation); |
2128 | 2124 | ||
2129 | out: | 2125 | out: |
2130 | mlog_meta_lvb(0, lockres); | 2126 | mlog_meta_lvb(0, lockres); |
2131 | 2127 | ||
2132 | mlog_exit_void(); | 2128 | mlog_exit_void(); |
2133 | } | 2129 | } |
2134 | 2130 | ||
2135 | static void ocfs2_unpack_timespec(struct timespec *spec, | 2131 | static void ocfs2_unpack_timespec(struct timespec *spec, |
2136 | u64 packed_time) | 2132 | u64 packed_time) |
2137 | { | 2133 | { |
2138 | spec->tv_sec = packed_time >> OCFS2_SEC_SHIFT; | 2134 | spec->tv_sec = packed_time >> OCFS2_SEC_SHIFT; |
2139 | spec->tv_nsec = packed_time & OCFS2_NSEC_MASK; | 2135 | spec->tv_nsec = packed_time & OCFS2_NSEC_MASK; |
2140 | } | 2136 | } |
2141 | 2137 | ||
2142 | static void ocfs2_refresh_inode_from_lvb(struct inode *inode) | 2138 | static void ocfs2_refresh_inode_from_lvb(struct inode *inode) |
2143 | { | 2139 | { |
2144 | struct ocfs2_inode_info *oi = OCFS2_I(inode); | 2140 | struct ocfs2_inode_info *oi = OCFS2_I(inode); |
2145 | struct ocfs2_lock_res *lockres = &oi->ip_inode_lockres; | 2141 | struct ocfs2_lock_res *lockres = &oi->ip_inode_lockres; |
2146 | struct ocfs2_meta_lvb *lvb; | 2142 | struct ocfs2_meta_lvb *lvb; |
2147 | 2143 | ||
2148 | mlog_entry_void(); | 2144 | mlog_entry_void(); |
2149 | 2145 | ||
2150 | mlog_meta_lvb(0, lockres); | 2146 | mlog_meta_lvb(0, lockres); |
2151 | 2147 | ||
2152 | lvb = ocfs2_dlm_lvb(&lockres->l_lksb); | 2148 | lvb = ocfs2_dlm_lvb(&lockres->l_lksb); |
2153 | 2149 | ||
2154 | /* We're safe here without the lockres lock... */ | 2150 | /* We're safe here without the lockres lock... */ |
2155 | spin_lock(&oi->ip_lock); | 2151 | spin_lock(&oi->ip_lock); |
2156 | oi->ip_clusters = be32_to_cpu(lvb->lvb_iclusters); | 2152 | oi->ip_clusters = be32_to_cpu(lvb->lvb_iclusters); |
2157 | i_size_write(inode, be64_to_cpu(lvb->lvb_isize)); | 2153 | i_size_write(inode, be64_to_cpu(lvb->lvb_isize)); |
2158 | 2154 | ||
2159 | oi->ip_attr = be32_to_cpu(lvb->lvb_iattr); | 2155 | oi->ip_attr = be32_to_cpu(lvb->lvb_iattr); |
2160 | oi->ip_dyn_features = be16_to_cpu(lvb->lvb_idynfeatures); | 2156 | oi->ip_dyn_features = be16_to_cpu(lvb->lvb_idynfeatures); |
2161 | ocfs2_set_inode_flags(inode); | 2157 | ocfs2_set_inode_flags(inode); |
2162 | 2158 | ||
2163 | /* fast-symlinks are a special case */ | 2159 | /* fast-symlinks are a special case */ |
2164 | if (S_ISLNK(inode->i_mode) && !oi->ip_clusters) | 2160 | if (S_ISLNK(inode->i_mode) && !oi->ip_clusters) |
2165 | inode->i_blocks = 0; | 2161 | inode->i_blocks = 0; |
2166 | else | 2162 | else |
2167 | inode->i_blocks = ocfs2_inode_sector_count(inode); | 2163 | inode->i_blocks = ocfs2_inode_sector_count(inode); |
2168 | 2164 | ||
2169 | inode->i_uid = be32_to_cpu(lvb->lvb_iuid); | 2165 | inode->i_uid = be32_to_cpu(lvb->lvb_iuid); |
2170 | inode->i_gid = be32_to_cpu(lvb->lvb_igid); | 2166 | inode->i_gid = be32_to_cpu(lvb->lvb_igid); |
2171 | inode->i_mode = be16_to_cpu(lvb->lvb_imode); | 2167 | inode->i_mode = be16_to_cpu(lvb->lvb_imode); |
2172 | inode->i_nlink = be16_to_cpu(lvb->lvb_inlink); | 2168 | inode->i_nlink = be16_to_cpu(lvb->lvb_inlink); |
2173 | ocfs2_unpack_timespec(&inode->i_atime, | 2169 | ocfs2_unpack_timespec(&inode->i_atime, |
2174 | be64_to_cpu(lvb->lvb_iatime_packed)); | 2170 | be64_to_cpu(lvb->lvb_iatime_packed)); |
2175 | ocfs2_unpack_timespec(&inode->i_mtime, | 2171 | ocfs2_unpack_timespec(&inode->i_mtime, |
2176 | be64_to_cpu(lvb->lvb_imtime_packed)); | 2172 | be64_to_cpu(lvb->lvb_imtime_packed)); |
2177 | ocfs2_unpack_timespec(&inode->i_ctime, | 2173 | ocfs2_unpack_timespec(&inode->i_ctime, |
2178 | be64_to_cpu(lvb->lvb_ictime_packed)); | 2174 | be64_to_cpu(lvb->lvb_ictime_packed)); |
2179 | spin_unlock(&oi->ip_lock); | 2175 | spin_unlock(&oi->ip_lock); |
2180 | 2176 | ||
2181 | mlog_exit_void(); | 2177 | mlog_exit_void(); |
2182 | } | 2178 | } |
2183 | 2179 | ||
2184 | static inline int ocfs2_meta_lvb_is_trustable(struct inode *inode, | 2180 | static inline int ocfs2_meta_lvb_is_trustable(struct inode *inode, |
2185 | struct ocfs2_lock_res *lockres) | 2181 | struct ocfs2_lock_res *lockres) |
2186 | { | 2182 | { |
2187 | struct ocfs2_meta_lvb *lvb = ocfs2_dlm_lvb(&lockres->l_lksb); | 2183 | struct ocfs2_meta_lvb *lvb = ocfs2_dlm_lvb(&lockres->l_lksb); |
2188 | 2184 | ||
2189 | if (ocfs2_dlm_lvb_valid(&lockres->l_lksb) | 2185 | if (ocfs2_dlm_lvb_valid(&lockres->l_lksb) |
2190 | && lvb->lvb_version == OCFS2_LVB_VERSION | 2186 | && lvb->lvb_version == OCFS2_LVB_VERSION |
2191 | && be32_to_cpu(lvb->lvb_igeneration) == inode->i_generation) | 2187 | && be32_to_cpu(lvb->lvb_igeneration) == inode->i_generation) |
2192 | return 1; | 2188 | return 1; |
2193 | return 0; | 2189 | return 0; |
2194 | } | 2190 | } |
2195 | 2191 | ||
2196 | /* Determine whether a lock resource needs to be refreshed, and | 2192 | /* Determine whether a lock resource needs to be refreshed, and |
2197 | * arbitrate who gets to refresh it. | 2193 | * arbitrate who gets to refresh it. |
2198 | * | 2194 | * |
2199 | * 0 means no refresh needed. | 2195 | * 0 means no refresh needed. |
2200 | * | 2196 | * |
2201 | * > 0 means you need to refresh this and you MUST call | 2197 | * > 0 means you need to refresh this and you MUST call |
2202 | * ocfs2_complete_lock_res_refresh afterwards. */ | 2198 | * ocfs2_complete_lock_res_refresh afterwards. */ |
2203 | static int ocfs2_should_refresh_lock_res(struct ocfs2_lock_res *lockres) | 2199 | static int ocfs2_should_refresh_lock_res(struct ocfs2_lock_res *lockres) |
2204 | { | 2200 | { |
2205 | unsigned long flags; | 2201 | unsigned long flags; |
2206 | int status = 0; | 2202 | int status = 0; |
2207 | 2203 | ||
2208 | mlog_entry_void(); | 2204 | mlog_entry_void(); |
2209 | 2205 | ||
2210 | refresh_check: | 2206 | refresh_check: |
2211 | spin_lock_irqsave(&lockres->l_lock, flags); | 2207 | spin_lock_irqsave(&lockres->l_lock, flags); |
2212 | if (!(lockres->l_flags & OCFS2_LOCK_NEEDS_REFRESH)) { | 2208 | if (!(lockres->l_flags & OCFS2_LOCK_NEEDS_REFRESH)) { |
2213 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 2209 | spin_unlock_irqrestore(&lockres->l_lock, flags); |
2214 | goto bail; | 2210 | goto bail; |
2215 | } | 2211 | } |
2216 | 2212 | ||
2217 | if (lockres->l_flags & OCFS2_LOCK_REFRESHING) { | 2213 | if (lockres->l_flags & OCFS2_LOCK_REFRESHING) { |
2218 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 2214 | spin_unlock_irqrestore(&lockres->l_lock, flags); |
2219 | 2215 | ||
2220 | ocfs2_wait_on_refreshing_lock(lockres); | 2216 | ocfs2_wait_on_refreshing_lock(lockres); |
2221 | goto refresh_check; | 2217 | goto refresh_check; |
2222 | } | 2218 | } |
2223 | 2219 | ||
2224 | /* Ok, I'll be the one to refresh this lock. */ | 2220 | /* Ok, I'll be the one to refresh this lock. */ |
2225 | lockres_or_flags(lockres, OCFS2_LOCK_REFRESHING); | 2221 | lockres_or_flags(lockres, OCFS2_LOCK_REFRESHING); |
2226 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 2222 | spin_unlock_irqrestore(&lockres->l_lock, flags); |
2227 | 2223 | ||
2228 | status = 1; | 2224 | status = 1; |
2229 | bail: | 2225 | bail: |
2230 | mlog_exit(status); | 2226 | mlog_exit(status); |
2231 | return status; | 2227 | return status; |
2232 | } | 2228 | } |
2233 | 2229 | ||
2234 | /* If status is non zero, I'll mark it as not being in refresh | 2230 | /* If status is non zero, I'll mark it as not being in refresh |
2235 | * anymroe, but i won't clear the needs refresh flag. */ | 2231 | * anymroe, but i won't clear the needs refresh flag. */ |
2236 | static inline void ocfs2_complete_lock_res_refresh(struct ocfs2_lock_res *lockres, | 2232 | static inline void ocfs2_complete_lock_res_refresh(struct ocfs2_lock_res *lockres, |
2237 | int status) | 2233 | int status) |
2238 | { | 2234 | { |
2239 | unsigned long flags; | 2235 | unsigned long flags; |
2240 | mlog_entry_void(); | 2236 | mlog_entry_void(); |
2241 | 2237 | ||
2242 | spin_lock_irqsave(&lockres->l_lock, flags); | 2238 | spin_lock_irqsave(&lockres->l_lock, flags); |
2243 | lockres_clear_flags(lockres, OCFS2_LOCK_REFRESHING); | 2239 | lockres_clear_flags(lockres, OCFS2_LOCK_REFRESHING); |
2244 | if (!status) | 2240 | if (!status) |
2245 | lockres_clear_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH); | 2241 | lockres_clear_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH); |
2246 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 2242 | spin_unlock_irqrestore(&lockres->l_lock, flags); |
2247 | 2243 | ||
2248 | wake_up(&lockres->l_event); | 2244 | wake_up(&lockres->l_event); |
2249 | 2245 | ||
2250 | mlog_exit_void(); | 2246 | mlog_exit_void(); |
2251 | } | 2247 | } |
2252 | 2248 | ||
2253 | /* may or may not return a bh if it went to disk. */ | 2249 | /* may or may not return a bh if it went to disk. */ |
2254 | static int ocfs2_inode_lock_update(struct inode *inode, | 2250 | static int ocfs2_inode_lock_update(struct inode *inode, |
2255 | struct buffer_head **bh) | 2251 | struct buffer_head **bh) |
2256 | { | 2252 | { |
2257 | int status = 0; | 2253 | int status = 0; |
2258 | struct ocfs2_inode_info *oi = OCFS2_I(inode); | 2254 | struct ocfs2_inode_info *oi = OCFS2_I(inode); |
2259 | struct ocfs2_lock_res *lockres = &oi->ip_inode_lockres; | 2255 | struct ocfs2_lock_res *lockres = &oi->ip_inode_lockres; |
2260 | struct ocfs2_dinode *fe; | 2256 | struct ocfs2_dinode *fe; |
2261 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 2257 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); |
2262 | 2258 | ||
2263 | mlog_entry_void(); | 2259 | mlog_entry_void(); |
2264 | 2260 | ||
2265 | if (ocfs2_mount_local(osb)) | 2261 | if (ocfs2_mount_local(osb)) |
2266 | goto bail; | 2262 | goto bail; |
2267 | 2263 | ||
2268 | spin_lock(&oi->ip_lock); | 2264 | spin_lock(&oi->ip_lock); |
2269 | if (oi->ip_flags & OCFS2_INODE_DELETED) { | 2265 | if (oi->ip_flags & OCFS2_INODE_DELETED) { |
2270 | mlog(0, "Orphaned inode %llu was deleted while we " | 2266 | mlog(0, "Orphaned inode %llu was deleted while we " |
2271 | "were waiting on a lock. ip_flags = 0x%x\n", | 2267 | "were waiting on a lock. ip_flags = 0x%x\n", |
2272 | (unsigned long long)oi->ip_blkno, oi->ip_flags); | 2268 | (unsigned long long)oi->ip_blkno, oi->ip_flags); |
2273 | spin_unlock(&oi->ip_lock); | 2269 | spin_unlock(&oi->ip_lock); |
2274 | status = -ENOENT; | 2270 | status = -ENOENT; |
2275 | goto bail; | 2271 | goto bail; |
2276 | } | 2272 | } |
2277 | spin_unlock(&oi->ip_lock); | 2273 | spin_unlock(&oi->ip_lock); |
2278 | 2274 | ||
2279 | if (!ocfs2_should_refresh_lock_res(lockres)) | 2275 | if (!ocfs2_should_refresh_lock_res(lockres)) |
2280 | goto bail; | 2276 | goto bail; |
2281 | 2277 | ||
2282 | /* This will discard any caching information we might have had | 2278 | /* This will discard any caching information we might have had |
2283 | * for the inode metadata. */ | 2279 | * for the inode metadata. */ |
2284 | ocfs2_metadata_cache_purge(INODE_CACHE(inode)); | 2280 | ocfs2_metadata_cache_purge(INODE_CACHE(inode)); |
2285 | 2281 | ||
2286 | ocfs2_extent_map_trunc(inode, 0); | 2282 | ocfs2_extent_map_trunc(inode, 0); |
2287 | 2283 | ||
2288 | if (ocfs2_meta_lvb_is_trustable(inode, lockres)) { | 2284 | if (ocfs2_meta_lvb_is_trustable(inode, lockres)) { |
2289 | mlog(0, "Trusting LVB on inode %llu\n", | 2285 | mlog(0, "Trusting LVB on inode %llu\n", |
2290 | (unsigned long long)oi->ip_blkno); | 2286 | (unsigned long long)oi->ip_blkno); |
2291 | ocfs2_refresh_inode_from_lvb(inode); | 2287 | ocfs2_refresh_inode_from_lvb(inode); |
2292 | } else { | 2288 | } else { |
2293 | /* Boo, we have to go to disk. */ | 2289 | /* Boo, we have to go to disk. */ |
2294 | /* read bh, cast, ocfs2_refresh_inode */ | 2290 | /* read bh, cast, ocfs2_refresh_inode */ |
2295 | status = ocfs2_read_inode_block(inode, bh); | 2291 | status = ocfs2_read_inode_block(inode, bh); |
2296 | if (status < 0) { | 2292 | if (status < 0) { |
2297 | mlog_errno(status); | 2293 | mlog_errno(status); |
2298 | goto bail_refresh; | 2294 | goto bail_refresh; |
2299 | } | 2295 | } |
2300 | fe = (struct ocfs2_dinode *) (*bh)->b_data; | 2296 | fe = (struct ocfs2_dinode *) (*bh)->b_data; |
2301 | 2297 | ||
2302 | /* This is a good chance to make sure we're not | 2298 | /* This is a good chance to make sure we're not |
2303 | * locking an invalid object. ocfs2_read_inode_block() | 2299 | * locking an invalid object. ocfs2_read_inode_block() |
2304 | * already checked that the inode block is sane. | 2300 | * already checked that the inode block is sane. |
2305 | * | 2301 | * |
2306 | * We bug on a stale inode here because we checked | 2302 | * We bug on a stale inode here because we checked |
2307 | * above whether it was wiped from disk. The wiping | 2303 | * above whether it was wiped from disk. The wiping |
2308 | * node provides a guarantee that we receive that | 2304 | * node provides a guarantee that we receive that |
2309 | * message and can mark the inode before dropping any | 2305 | * message and can mark the inode before dropping any |
2310 | * locks associated with it. */ | 2306 | * locks associated with it. */ |
2311 | mlog_bug_on_msg(inode->i_generation != | 2307 | mlog_bug_on_msg(inode->i_generation != |
2312 | le32_to_cpu(fe->i_generation), | 2308 | le32_to_cpu(fe->i_generation), |
2313 | "Invalid dinode %llu disk generation: %u " | 2309 | "Invalid dinode %llu disk generation: %u " |
2314 | "inode->i_generation: %u\n", | 2310 | "inode->i_generation: %u\n", |
2315 | (unsigned long long)oi->ip_blkno, | 2311 | (unsigned long long)oi->ip_blkno, |
2316 | le32_to_cpu(fe->i_generation), | 2312 | le32_to_cpu(fe->i_generation), |
2317 | inode->i_generation); | 2313 | inode->i_generation); |
2318 | mlog_bug_on_msg(le64_to_cpu(fe->i_dtime) || | 2314 | mlog_bug_on_msg(le64_to_cpu(fe->i_dtime) || |
2319 | !(fe->i_flags & cpu_to_le32(OCFS2_VALID_FL)), | 2315 | !(fe->i_flags & cpu_to_le32(OCFS2_VALID_FL)), |
2320 | "Stale dinode %llu dtime: %llu flags: 0x%x\n", | 2316 | "Stale dinode %llu dtime: %llu flags: 0x%x\n", |
2321 | (unsigned long long)oi->ip_blkno, | 2317 | (unsigned long long)oi->ip_blkno, |
2322 | (unsigned long long)le64_to_cpu(fe->i_dtime), | 2318 | (unsigned long long)le64_to_cpu(fe->i_dtime), |
2323 | le32_to_cpu(fe->i_flags)); | 2319 | le32_to_cpu(fe->i_flags)); |
2324 | 2320 | ||
2325 | ocfs2_refresh_inode(inode, fe); | 2321 | ocfs2_refresh_inode(inode, fe); |
2326 | ocfs2_track_lock_refresh(lockres); | 2322 | ocfs2_track_lock_refresh(lockres); |
2327 | } | 2323 | } |
2328 | 2324 | ||
2329 | status = 0; | 2325 | status = 0; |
2330 | bail_refresh: | 2326 | bail_refresh: |
2331 | ocfs2_complete_lock_res_refresh(lockres, status); | 2327 | ocfs2_complete_lock_res_refresh(lockres, status); |
2332 | bail: | 2328 | bail: |
2333 | mlog_exit(status); | 2329 | mlog_exit(status); |
2334 | return status; | 2330 | return status; |
2335 | } | 2331 | } |
2336 | 2332 | ||
2337 | static int ocfs2_assign_bh(struct inode *inode, | 2333 | static int ocfs2_assign_bh(struct inode *inode, |
2338 | struct buffer_head **ret_bh, | 2334 | struct buffer_head **ret_bh, |
2339 | struct buffer_head *passed_bh) | 2335 | struct buffer_head *passed_bh) |
2340 | { | 2336 | { |
2341 | int status; | 2337 | int status; |
2342 | 2338 | ||
2343 | if (passed_bh) { | 2339 | if (passed_bh) { |
2344 | /* Ok, the update went to disk for us, use the | 2340 | /* Ok, the update went to disk for us, use the |
2345 | * returned bh. */ | 2341 | * returned bh. */ |
2346 | *ret_bh = passed_bh; | 2342 | *ret_bh = passed_bh; |
2347 | get_bh(*ret_bh); | 2343 | get_bh(*ret_bh); |
2348 | 2344 | ||
2349 | return 0; | 2345 | return 0; |
2350 | } | 2346 | } |
2351 | 2347 | ||
2352 | status = ocfs2_read_inode_block(inode, ret_bh); | 2348 | status = ocfs2_read_inode_block(inode, ret_bh); |
2353 | if (status < 0) | 2349 | if (status < 0) |
2354 | mlog_errno(status); | 2350 | mlog_errno(status); |
2355 | 2351 | ||
2356 | return status; | 2352 | return status; |
2357 | } | 2353 | } |
2358 | 2354 | ||
2359 | /* | 2355 | /* |
2360 | * returns < 0 error if the callback will never be called, otherwise | 2356 | * returns < 0 error if the callback will never be called, otherwise |
2361 | * the result of the lock will be communicated via the callback. | 2357 | * the result of the lock will be communicated via the callback. |
2362 | */ | 2358 | */ |
2363 | int ocfs2_inode_lock_full_nested(struct inode *inode, | 2359 | int ocfs2_inode_lock_full_nested(struct inode *inode, |
2364 | struct buffer_head **ret_bh, | 2360 | struct buffer_head **ret_bh, |
2365 | int ex, | 2361 | int ex, |
2366 | int arg_flags, | 2362 | int arg_flags, |
2367 | int subclass) | 2363 | int subclass) |
2368 | { | 2364 | { |
2369 | int status, level, acquired; | 2365 | int status, level, acquired; |
2370 | u32 dlm_flags; | 2366 | u32 dlm_flags; |
2371 | struct ocfs2_lock_res *lockres = NULL; | 2367 | struct ocfs2_lock_res *lockres = NULL; |
2372 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 2368 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); |
2373 | struct buffer_head *local_bh = NULL; | 2369 | struct buffer_head *local_bh = NULL; |
2374 | 2370 | ||
2375 | BUG_ON(!inode); | 2371 | BUG_ON(!inode); |
2376 | 2372 | ||
2377 | mlog_entry_void(); | 2373 | mlog_entry_void(); |
2378 | 2374 | ||
2379 | mlog(0, "inode %llu, take %s META lock\n", | 2375 | mlog(0, "inode %llu, take %s META lock\n", |
2380 | (unsigned long long)OCFS2_I(inode)->ip_blkno, | 2376 | (unsigned long long)OCFS2_I(inode)->ip_blkno, |
2381 | ex ? "EXMODE" : "PRMODE"); | 2377 | ex ? "EXMODE" : "PRMODE"); |
2382 | 2378 | ||
2383 | status = 0; | 2379 | status = 0; |
2384 | acquired = 0; | 2380 | acquired = 0; |
2385 | /* We'll allow faking a readonly metadata lock for | 2381 | /* We'll allow faking a readonly metadata lock for |
2386 | * rodevices. */ | 2382 | * rodevices. */ |
2387 | if (ocfs2_is_hard_readonly(osb)) { | 2383 | if (ocfs2_is_hard_readonly(osb)) { |
2388 | if (ex) | 2384 | if (ex) |
2389 | status = -EROFS; | 2385 | status = -EROFS; |
2390 | goto bail; | 2386 | goto bail; |
2391 | } | 2387 | } |
2392 | 2388 | ||
2393 | if (ocfs2_mount_local(osb)) | 2389 | if (ocfs2_mount_local(osb)) |
2394 | goto local; | 2390 | goto local; |
2395 | 2391 | ||
2396 | if (!(arg_flags & OCFS2_META_LOCK_RECOVERY)) | 2392 | if (!(arg_flags & OCFS2_META_LOCK_RECOVERY)) |
2397 | ocfs2_wait_for_recovery(osb); | 2393 | ocfs2_wait_for_recovery(osb); |
2398 | 2394 | ||
2399 | lockres = &OCFS2_I(inode)->ip_inode_lockres; | 2395 | lockres = &OCFS2_I(inode)->ip_inode_lockres; |
2400 | level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; | 2396 | level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; |
2401 | dlm_flags = 0; | 2397 | dlm_flags = 0; |
2402 | if (arg_flags & OCFS2_META_LOCK_NOQUEUE) | 2398 | if (arg_flags & OCFS2_META_LOCK_NOQUEUE) |
2403 | dlm_flags |= DLM_LKF_NOQUEUE; | 2399 | dlm_flags |= DLM_LKF_NOQUEUE; |
2404 | 2400 | ||
2405 | status = __ocfs2_cluster_lock(osb, lockres, level, dlm_flags, | 2401 | status = __ocfs2_cluster_lock(osb, lockres, level, dlm_flags, |
2406 | arg_flags, subclass, _RET_IP_); | 2402 | arg_flags, subclass, _RET_IP_); |
2407 | if (status < 0) { | 2403 | if (status < 0) { |
2408 | if (status != -EAGAIN && status != -EIOCBRETRY) | 2404 | if (status != -EAGAIN && status != -EIOCBRETRY) |
2409 | mlog_errno(status); | 2405 | mlog_errno(status); |
2410 | goto bail; | 2406 | goto bail; |
2411 | } | 2407 | } |
2412 | 2408 | ||
2413 | /* Notify the error cleanup path to drop the cluster lock. */ | 2409 | /* Notify the error cleanup path to drop the cluster lock. */ |
2414 | acquired = 1; | 2410 | acquired = 1; |
2415 | 2411 | ||
2416 | /* We wait twice because a node may have died while we were in | 2412 | /* We wait twice because a node may have died while we were in |
2417 | * the lower dlm layers. The second time though, we've | 2413 | * the lower dlm layers. The second time though, we've |
2418 | * committed to owning this lock so we don't allow signals to | 2414 | * committed to owning this lock so we don't allow signals to |
2419 | * abort the operation. */ | 2415 | * abort the operation. */ |
2420 | if (!(arg_flags & OCFS2_META_LOCK_RECOVERY)) | 2416 | if (!(arg_flags & OCFS2_META_LOCK_RECOVERY)) |
2421 | ocfs2_wait_for_recovery(osb); | 2417 | ocfs2_wait_for_recovery(osb); |
2422 | 2418 | ||
2423 | local: | 2419 | local: |
2424 | /* | 2420 | /* |
2425 | * We only see this flag if we're being called from | 2421 | * We only see this flag if we're being called from |
2426 | * ocfs2_read_locked_inode(). It means we're locking an inode | 2422 | * ocfs2_read_locked_inode(). It means we're locking an inode |
2427 | * which hasn't been populated yet, so clear the refresh flag | 2423 | * which hasn't been populated yet, so clear the refresh flag |
2428 | * and let the caller handle it. | 2424 | * and let the caller handle it. |
2429 | */ | 2425 | */ |
2430 | if (inode->i_state & I_NEW) { | 2426 | if (inode->i_state & I_NEW) { |
2431 | status = 0; | 2427 | status = 0; |
2432 | if (lockres) | 2428 | if (lockres) |
2433 | ocfs2_complete_lock_res_refresh(lockres, 0); | 2429 | ocfs2_complete_lock_res_refresh(lockres, 0); |
2434 | goto bail; | 2430 | goto bail; |
2435 | } | 2431 | } |
2436 | 2432 | ||
2437 | /* This is fun. The caller may want a bh back, or it may | 2433 | /* This is fun. The caller may want a bh back, or it may |
2438 | * not. ocfs2_inode_lock_update definitely wants one in, but | 2434 | * not. ocfs2_inode_lock_update definitely wants one in, but |
2439 | * may or may not read one, depending on what's in the | 2435 | * may or may not read one, depending on what's in the |
2440 | * LVB. The result of all of this is that we've *only* gone to | 2436 | * LVB. The result of all of this is that we've *only* gone to |
2441 | * disk if we have to, so the complexity is worthwhile. */ | 2437 | * disk if we have to, so the complexity is worthwhile. */ |
2442 | status = ocfs2_inode_lock_update(inode, &local_bh); | 2438 | status = ocfs2_inode_lock_update(inode, &local_bh); |
2443 | if (status < 0) { | 2439 | if (status < 0) { |
2444 | if (status != -ENOENT) | 2440 | if (status != -ENOENT) |
2445 | mlog_errno(status); | 2441 | mlog_errno(status); |
2446 | goto bail; | 2442 | goto bail; |
2447 | } | 2443 | } |
2448 | 2444 | ||
2449 | if (ret_bh) { | 2445 | if (ret_bh) { |
2450 | status = ocfs2_assign_bh(inode, ret_bh, local_bh); | 2446 | status = ocfs2_assign_bh(inode, ret_bh, local_bh); |
2451 | if (status < 0) { | 2447 | if (status < 0) { |
2452 | mlog_errno(status); | 2448 | mlog_errno(status); |
2453 | goto bail; | 2449 | goto bail; |
2454 | } | 2450 | } |
2455 | } | 2451 | } |
2456 | 2452 | ||
2457 | bail: | 2453 | bail: |
2458 | if (status < 0) { | 2454 | if (status < 0) { |
2459 | if (ret_bh && (*ret_bh)) { | 2455 | if (ret_bh && (*ret_bh)) { |
2460 | brelse(*ret_bh); | 2456 | brelse(*ret_bh); |
2461 | *ret_bh = NULL; | 2457 | *ret_bh = NULL; |
2462 | } | 2458 | } |
2463 | if (acquired) | 2459 | if (acquired) |
2464 | ocfs2_inode_unlock(inode, ex); | 2460 | ocfs2_inode_unlock(inode, ex); |
2465 | } | 2461 | } |
2466 | 2462 | ||
2467 | if (local_bh) | 2463 | if (local_bh) |
2468 | brelse(local_bh); | 2464 | brelse(local_bh); |
2469 | 2465 | ||
2470 | mlog_exit(status); | 2466 | mlog_exit(status); |
2471 | return status; | 2467 | return status; |
2472 | } | 2468 | } |
2473 | 2469 | ||
2474 | /* | 2470 | /* |
2475 | * This is working around a lock inversion between tasks acquiring DLM | 2471 | * This is working around a lock inversion between tasks acquiring DLM |
2476 | * locks while holding a page lock and the downconvert thread which | 2472 | * locks while holding a page lock and the downconvert thread which |
2477 | * blocks dlm lock acquiry while acquiring page locks. | 2473 | * blocks dlm lock acquiry while acquiring page locks. |
2478 | * | 2474 | * |
2479 | * ** These _with_page variantes are only intended to be called from aop | 2475 | * ** These _with_page variantes are only intended to be called from aop |
2480 | * methods that hold page locks and return a very specific *positive* error | 2476 | * methods that hold page locks and return a very specific *positive* error |
2481 | * code that aop methods pass up to the VFS -- test for errors with != 0. ** | 2477 | * code that aop methods pass up to the VFS -- test for errors with != 0. ** |
2482 | * | 2478 | * |
2483 | * The DLM is called such that it returns -EAGAIN if it would have | 2479 | * The DLM is called such that it returns -EAGAIN if it would have |
2484 | * blocked waiting for the downconvert thread. In that case we unlock | 2480 | * blocked waiting for the downconvert thread. In that case we unlock |
2485 | * our page so the downconvert thread can make progress. Once we've | 2481 | * our page so the downconvert thread can make progress. Once we've |
2486 | * done this we have to return AOP_TRUNCATED_PAGE so the aop method | 2482 | * done this we have to return AOP_TRUNCATED_PAGE so the aop method |
2487 | * that called us can bubble that back up into the VFS who will then | 2483 | * that called us can bubble that back up into the VFS who will then |
2488 | * immediately retry the aop call. | 2484 | * immediately retry the aop call. |
2489 | * | 2485 | * |
2490 | * We do a blocking lock and immediate unlock before returning, though, so that | 2486 | * We do a blocking lock and immediate unlock before returning, though, so that |
2491 | * the lock has a great chance of being cached on this node by the time the VFS | 2487 | * the lock has a great chance of being cached on this node by the time the VFS |
2492 | * calls back to retry the aop. This has a potential to livelock as nodes | 2488 | * calls back to retry the aop. This has a potential to livelock as nodes |
2493 | * ping locks back and forth, but that's a risk we're willing to take to avoid | 2489 | * ping locks back and forth, but that's a risk we're willing to take to avoid |
2494 | * the lock inversion simply. | 2490 | * the lock inversion simply. |
2495 | */ | 2491 | */ |
2496 | int ocfs2_inode_lock_with_page(struct inode *inode, | 2492 | int ocfs2_inode_lock_with_page(struct inode *inode, |
2497 | struct buffer_head **ret_bh, | 2493 | struct buffer_head **ret_bh, |
2498 | int ex, | 2494 | int ex, |
2499 | struct page *page) | 2495 | struct page *page) |
2500 | { | 2496 | { |
2501 | int ret; | 2497 | int ret; |
2502 | 2498 | ||
2503 | ret = ocfs2_inode_lock_full(inode, ret_bh, ex, OCFS2_LOCK_NONBLOCK); | 2499 | ret = ocfs2_inode_lock_full(inode, ret_bh, ex, OCFS2_LOCK_NONBLOCK); |
2504 | if (ret == -EAGAIN) { | 2500 | if (ret == -EAGAIN) { |
2505 | unlock_page(page); | 2501 | unlock_page(page); |
2506 | if (ocfs2_inode_lock(inode, ret_bh, ex) == 0) | 2502 | if (ocfs2_inode_lock(inode, ret_bh, ex) == 0) |
2507 | ocfs2_inode_unlock(inode, ex); | 2503 | ocfs2_inode_unlock(inode, ex); |
2508 | ret = AOP_TRUNCATED_PAGE; | 2504 | ret = AOP_TRUNCATED_PAGE; |
2509 | } | 2505 | } |
2510 | 2506 | ||
2511 | return ret; | 2507 | return ret; |
2512 | } | 2508 | } |
2513 | 2509 | ||
2514 | int ocfs2_inode_lock_atime(struct inode *inode, | 2510 | int ocfs2_inode_lock_atime(struct inode *inode, |
2515 | struct vfsmount *vfsmnt, | 2511 | struct vfsmount *vfsmnt, |
2516 | int *level) | 2512 | int *level) |
2517 | { | 2513 | { |
2518 | int ret; | 2514 | int ret; |
2519 | 2515 | ||
2520 | mlog_entry_void(); | 2516 | mlog_entry_void(); |
2521 | ret = ocfs2_inode_lock(inode, NULL, 0); | 2517 | ret = ocfs2_inode_lock(inode, NULL, 0); |
2522 | if (ret < 0) { | 2518 | if (ret < 0) { |
2523 | mlog_errno(ret); | 2519 | mlog_errno(ret); |
2524 | return ret; | 2520 | return ret; |
2525 | } | 2521 | } |
2526 | 2522 | ||
2527 | /* | 2523 | /* |
2528 | * If we should update atime, we will get EX lock, | 2524 | * If we should update atime, we will get EX lock, |
2529 | * otherwise we just get PR lock. | 2525 | * otherwise we just get PR lock. |
2530 | */ | 2526 | */ |
2531 | if (ocfs2_should_update_atime(inode, vfsmnt)) { | 2527 | if (ocfs2_should_update_atime(inode, vfsmnt)) { |
2532 | struct buffer_head *bh = NULL; | 2528 | struct buffer_head *bh = NULL; |
2533 | 2529 | ||
2534 | ocfs2_inode_unlock(inode, 0); | 2530 | ocfs2_inode_unlock(inode, 0); |
2535 | ret = ocfs2_inode_lock(inode, &bh, 1); | 2531 | ret = ocfs2_inode_lock(inode, &bh, 1); |
2536 | if (ret < 0) { | 2532 | if (ret < 0) { |
2537 | mlog_errno(ret); | 2533 | mlog_errno(ret); |
2538 | return ret; | 2534 | return ret; |
2539 | } | 2535 | } |
2540 | *level = 1; | 2536 | *level = 1; |
2541 | if (ocfs2_should_update_atime(inode, vfsmnt)) | 2537 | if (ocfs2_should_update_atime(inode, vfsmnt)) |
2542 | ocfs2_update_inode_atime(inode, bh); | 2538 | ocfs2_update_inode_atime(inode, bh); |
2543 | if (bh) | 2539 | if (bh) |
2544 | brelse(bh); | 2540 | brelse(bh); |
2545 | } else | 2541 | } else |
2546 | *level = 0; | 2542 | *level = 0; |
2547 | 2543 | ||
2548 | mlog_exit(ret); | 2544 | mlog_exit(ret); |
2549 | return ret; | 2545 | return ret; |
2550 | } | 2546 | } |
2551 | 2547 | ||
2552 | void ocfs2_inode_unlock(struct inode *inode, | 2548 | void ocfs2_inode_unlock(struct inode *inode, |
2553 | int ex) | 2549 | int ex) |
2554 | { | 2550 | { |
2555 | int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; | 2551 | int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; |
2556 | struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_inode_lockres; | 2552 | struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_inode_lockres; |
2557 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 2553 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); |
2558 | 2554 | ||
2559 | mlog_entry_void(); | 2555 | mlog_entry_void(); |
2560 | 2556 | ||
2561 | mlog(0, "inode %llu drop %s META lock\n", | 2557 | mlog(0, "inode %llu drop %s META lock\n", |
2562 | (unsigned long long)OCFS2_I(inode)->ip_blkno, | 2558 | (unsigned long long)OCFS2_I(inode)->ip_blkno, |
2563 | ex ? "EXMODE" : "PRMODE"); | 2559 | ex ? "EXMODE" : "PRMODE"); |
2564 | 2560 | ||
2565 | if (!ocfs2_is_hard_readonly(OCFS2_SB(inode->i_sb)) && | 2561 | if (!ocfs2_is_hard_readonly(OCFS2_SB(inode->i_sb)) && |
2566 | !ocfs2_mount_local(osb)) | 2562 | !ocfs2_mount_local(osb)) |
2567 | ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres, level); | 2563 | ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres, level); |
2568 | 2564 | ||
2569 | mlog_exit_void(); | 2565 | mlog_exit_void(); |
2570 | } | 2566 | } |
2571 | 2567 | ||
2572 | int ocfs2_orphan_scan_lock(struct ocfs2_super *osb, u32 *seqno) | 2568 | int ocfs2_orphan_scan_lock(struct ocfs2_super *osb, u32 *seqno) |
2573 | { | 2569 | { |
2574 | struct ocfs2_lock_res *lockres; | 2570 | struct ocfs2_lock_res *lockres; |
2575 | struct ocfs2_orphan_scan_lvb *lvb; | 2571 | struct ocfs2_orphan_scan_lvb *lvb; |
2576 | int status = 0; | 2572 | int status = 0; |
2577 | 2573 | ||
2578 | if (ocfs2_is_hard_readonly(osb)) | 2574 | if (ocfs2_is_hard_readonly(osb)) |
2579 | return -EROFS; | 2575 | return -EROFS; |
2580 | 2576 | ||
2581 | if (ocfs2_mount_local(osb)) | 2577 | if (ocfs2_mount_local(osb)) |
2582 | return 0; | 2578 | return 0; |
2583 | 2579 | ||
2584 | lockres = &osb->osb_orphan_scan.os_lockres; | 2580 | lockres = &osb->osb_orphan_scan.os_lockres; |
2585 | status = ocfs2_cluster_lock(osb, lockres, DLM_LOCK_EX, 0, 0); | 2581 | status = ocfs2_cluster_lock(osb, lockres, DLM_LOCK_EX, 0, 0); |
2586 | if (status < 0) | 2582 | if (status < 0) |
2587 | return status; | 2583 | return status; |
2588 | 2584 | ||
2589 | lvb = ocfs2_dlm_lvb(&lockres->l_lksb); | 2585 | lvb = ocfs2_dlm_lvb(&lockres->l_lksb); |
2590 | if (ocfs2_dlm_lvb_valid(&lockres->l_lksb) && | 2586 | if (ocfs2_dlm_lvb_valid(&lockres->l_lksb) && |
2591 | lvb->lvb_version == OCFS2_ORPHAN_LVB_VERSION) | 2587 | lvb->lvb_version == OCFS2_ORPHAN_LVB_VERSION) |
2592 | *seqno = be32_to_cpu(lvb->lvb_os_seqno); | 2588 | *seqno = be32_to_cpu(lvb->lvb_os_seqno); |
2593 | else | 2589 | else |
2594 | *seqno = osb->osb_orphan_scan.os_seqno + 1; | 2590 | *seqno = osb->osb_orphan_scan.os_seqno + 1; |
2595 | 2591 | ||
2596 | return status; | 2592 | return status; |
2597 | } | 2593 | } |
2598 | 2594 | ||
2599 | void ocfs2_orphan_scan_unlock(struct ocfs2_super *osb, u32 seqno) | 2595 | void ocfs2_orphan_scan_unlock(struct ocfs2_super *osb, u32 seqno) |
2600 | { | 2596 | { |
2601 | struct ocfs2_lock_res *lockres; | 2597 | struct ocfs2_lock_res *lockres; |
2602 | struct ocfs2_orphan_scan_lvb *lvb; | 2598 | struct ocfs2_orphan_scan_lvb *lvb; |
2603 | 2599 | ||
2604 | if (!ocfs2_is_hard_readonly(osb) && !ocfs2_mount_local(osb)) { | 2600 | if (!ocfs2_is_hard_readonly(osb) && !ocfs2_mount_local(osb)) { |
2605 | lockres = &osb->osb_orphan_scan.os_lockres; | 2601 | lockres = &osb->osb_orphan_scan.os_lockres; |
2606 | lvb = ocfs2_dlm_lvb(&lockres->l_lksb); | 2602 | lvb = ocfs2_dlm_lvb(&lockres->l_lksb); |
2607 | lvb->lvb_version = OCFS2_ORPHAN_LVB_VERSION; | 2603 | lvb->lvb_version = OCFS2_ORPHAN_LVB_VERSION; |
2608 | lvb->lvb_os_seqno = cpu_to_be32(seqno); | 2604 | lvb->lvb_os_seqno = cpu_to_be32(seqno); |
2609 | ocfs2_cluster_unlock(osb, lockres, DLM_LOCK_EX); | 2605 | ocfs2_cluster_unlock(osb, lockres, DLM_LOCK_EX); |
2610 | } | 2606 | } |
2611 | } | 2607 | } |
2612 | 2608 | ||
2613 | int ocfs2_super_lock(struct ocfs2_super *osb, | 2609 | int ocfs2_super_lock(struct ocfs2_super *osb, |
2614 | int ex) | 2610 | int ex) |
2615 | { | 2611 | { |
2616 | int status = 0; | 2612 | int status = 0; |
2617 | int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; | 2613 | int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; |
2618 | struct ocfs2_lock_res *lockres = &osb->osb_super_lockres; | 2614 | struct ocfs2_lock_res *lockres = &osb->osb_super_lockres; |
2619 | 2615 | ||
2620 | mlog_entry_void(); | 2616 | mlog_entry_void(); |
2621 | 2617 | ||
2622 | if (ocfs2_is_hard_readonly(osb)) | 2618 | if (ocfs2_is_hard_readonly(osb)) |
2623 | return -EROFS; | 2619 | return -EROFS; |
2624 | 2620 | ||
2625 | if (ocfs2_mount_local(osb)) | 2621 | if (ocfs2_mount_local(osb)) |
2626 | goto bail; | 2622 | goto bail; |
2627 | 2623 | ||
2628 | status = ocfs2_cluster_lock(osb, lockres, level, 0, 0); | 2624 | status = ocfs2_cluster_lock(osb, lockres, level, 0, 0); |
2629 | if (status < 0) { | 2625 | if (status < 0) { |
2630 | mlog_errno(status); | 2626 | mlog_errno(status); |
2631 | goto bail; | 2627 | goto bail; |
2632 | } | 2628 | } |
2633 | 2629 | ||
2634 | /* The super block lock path is really in the best position to | 2630 | /* The super block lock path is really in the best position to |
2635 | * know when resources covered by the lock need to be | 2631 | * know when resources covered by the lock need to be |
2636 | * refreshed, so we do it here. Of course, making sense of | 2632 | * refreshed, so we do it here. Of course, making sense of |
2637 | * everything is up to the caller :) */ | 2633 | * everything is up to the caller :) */ |
2638 | status = ocfs2_should_refresh_lock_res(lockres); | 2634 | status = ocfs2_should_refresh_lock_res(lockres); |
2639 | if (status < 0) { | 2635 | if (status < 0) { |
2640 | mlog_errno(status); | 2636 | mlog_errno(status); |
2641 | goto bail; | 2637 | goto bail; |
2642 | } | 2638 | } |
2643 | if (status) { | 2639 | if (status) { |
2644 | status = ocfs2_refresh_slot_info(osb); | 2640 | status = ocfs2_refresh_slot_info(osb); |
2645 | 2641 | ||
2646 | ocfs2_complete_lock_res_refresh(lockres, status); | 2642 | ocfs2_complete_lock_res_refresh(lockres, status); |
2647 | 2643 | ||
2648 | if (status < 0) | 2644 | if (status < 0) |
2649 | mlog_errno(status); | 2645 | mlog_errno(status); |
2650 | ocfs2_track_lock_refresh(lockres); | 2646 | ocfs2_track_lock_refresh(lockres); |
2651 | } | 2647 | } |
2652 | bail: | 2648 | bail: |
2653 | mlog_exit(status); | 2649 | mlog_exit(status); |
2654 | return status; | 2650 | return status; |
2655 | } | 2651 | } |
2656 | 2652 | ||
2657 | void ocfs2_super_unlock(struct ocfs2_super *osb, | 2653 | void ocfs2_super_unlock(struct ocfs2_super *osb, |
2658 | int ex) | 2654 | int ex) |
2659 | { | 2655 | { |
2660 | int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; | 2656 | int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; |
2661 | struct ocfs2_lock_res *lockres = &osb->osb_super_lockres; | 2657 | struct ocfs2_lock_res *lockres = &osb->osb_super_lockres; |
2662 | 2658 | ||
2663 | if (!ocfs2_mount_local(osb)) | 2659 | if (!ocfs2_mount_local(osb)) |
2664 | ocfs2_cluster_unlock(osb, lockres, level); | 2660 | ocfs2_cluster_unlock(osb, lockres, level); |
2665 | } | 2661 | } |
2666 | 2662 | ||
2667 | int ocfs2_rename_lock(struct ocfs2_super *osb) | 2663 | int ocfs2_rename_lock(struct ocfs2_super *osb) |
2668 | { | 2664 | { |
2669 | int status; | 2665 | int status; |
2670 | struct ocfs2_lock_res *lockres = &osb->osb_rename_lockres; | 2666 | struct ocfs2_lock_res *lockres = &osb->osb_rename_lockres; |
2671 | 2667 | ||
2672 | if (ocfs2_is_hard_readonly(osb)) | 2668 | if (ocfs2_is_hard_readonly(osb)) |
2673 | return -EROFS; | 2669 | return -EROFS; |
2674 | 2670 | ||
2675 | if (ocfs2_mount_local(osb)) | 2671 | if (ocfs2_mount_local(osb)) |
2676 | return 0; | 2672 | return 0; |
2677 | 2673 | ||
2678 | status = ocfs2_cluster_lock(osb, lockres, DLM_LOCK_EX, 0, 0); | 2674 | status = ocfs2_cluster_lock(osb, lockres, DLM_LOCK_EX, 0, 0); |
2679 | if (status < 0) | 2675 | if (status < 0) |
2680 | mlog_errno(status); | 2676 | mlog_errno(status); |
2681 | 2677 | ||
2682 | return status; | 2678 | return status; |
2683 | } | 2679 | } |
2684 | 2680 | ||
2685 | void ocfs2_rename_unlock(struct ocfs2_super *osb) | 2681 | void ocfs2_rename_unlock(struct ocfs2_super *osb) |
2686 | { | 2682 | { |
2687 | struct ocfs2_lock_res *lockres = &osb->osb_rename_lockres; | 2683 | struct ocfs2_lock_res *lockres = &osb->osb_rename_lockres; |
2688 | 2684 | ||
2689 | if (!ocfs2_mount_local(osb)) | 2685 | if (!ocfs2_mount_local(osb)) |
2690 | ocfs2_cluster_unlock(osb, lockres, DLM_LOCK_EX); | 2686 | ocfs2_cluster_unlock(osb, lockres, DLM_LOCK_EX); |
2691 | } | 2687 | } |
2692 | 2688 | ||
2693 | int ocfs2_nfs_sync_lock(struct ocfs2_super *osb, int ex) | 2689 | int ocfs2_nfs_sync_lock(struct ocfs2_super *osb, int ex) |
2694 | { | 2690 | { |
2695 | int status; | 2691 | int status; |
2696 | struct ocfs2_lock_res *lockres = &osb->osb_nfs_sync_lockres; | 2692 | struct ocfs2_lock_res *lockres = &osb->osb_nfs_sync_lockres; |
2697 | 2693 | ||
2698 | if (ocfs2_is_hard_readonly(osb)) | 2694 | if (ocfs2_is_hard_readonly(osb)) |
2699 | return -EROFS; | 2695 | return -EROFS; |
2700 | 2696 | ||
2701 | if (ocfs2_mount_local(osb)) | 2697 | if (ocfs2_mount_local(osb)) |
2702 | return 0; | 2698 | return 0; |
2703 | 2699 | ||
2704 | status = ocfs2_cluster_lock(osb, lockres, ex ? LKM_EXMODE : LKM_PRMODE, | 2700 | status = ocfs2_cluster_lock(osb, lockres, ex ? LKM_EXMODE : LKM_PRMODE, |
2705 | 0, 0); | 2701 | 0, 0); |
2706 | if (status < 0) | 2702 | if (status < 0) |
2707 | mlog(ML_ERROR, "lock on nfs sync lock failed %d\n", status); | 2703 | mlog(ML_ERROR, "lock on nfs sync lock failed %d\n", status); |
2708 | 2704 | ||
2709 | return status; | 2705 | return status; |
2710 | } | 2706 | } |
2711 | 2707 | ||
2712 | void ocfs2_nfs_sync_unlock(struct ocfs2_super *osb, int ex) | 2708 | void ocfs2_nfs_sync_unlock(struct ocfs2_super *osb, int ex) |
2713 | { | 2709 | { |
2714 | struct ocfs2_lock_res *lockres = &osb->osb_nfs_sync_lockres; | 2710 | struct ocfs2_lock_res *lockres = &osb->osb_nfs_sync_lockres; |
2715 | 2711 | ||
2716 | if (!ocfs2_mount_local(osb)) | 2712 | if (!ocfs2_mount_local(osb)) |
2717 | ocfs2_cluster_unlock(osb, lockres, | 2713 | ocfs2_cluster_unlock(osb, lockres, |
2718 | ex ? LKM_EXMODE : LKM_PRMODE); | 2714 | ex ? LKM_EXMODE : LKM_PRMODE); |
2719 | } | 2715 | } |
2720 | 2716 | ||
2721 | int ocfs2_dentry_lock(struct dentry *dentry, int ex) | 2717 | int ocfs2_dentry_lock(struct dentry *dentry, int ex) |
2722 | { | 2718 | { |
2723 | int ret; | 2719 | int ret; |
2724 | int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; | 2720 | int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; |
2725 | struct ocfs2_dentry_lock *dl = dentry->d_fsdata; | 2721 | struct ocfs2_dentry_lock *dl = dentry->d_fsdata; |
2726 | struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb); | 2722 | struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb); |
2727 | 2723 | ||
2728 | BUG_ON(!dl); | 2724 | BUG_ON(!dl); |
2729 | 2725 | ||
2730 | if (ocfs2_is_hard_readonly(osb)) | 2726 | if (ocfs2_is_hard_readonly(osb)) |
2731 | return -EROFS; | 2727 | return -EROFS; |
2732 | 2728 | ||
2733 | if (ocfs2_mount_local(osb)) | 2729 | if (ocfs2_mount_local(osb)) |
2734 | return 0; | 2730 | return 0; |
2735 | 2731 | ||
2736 | ret = ocfs2_cluster_lock(osb, &dl->dl_lockres, level, 0, 0); | 2732 | ret = ocfs2_cluster_lock(osb, &dl->dl_lockres, level, 0, 0); |
2737 | if (ret < 0) | 2733 | if (ret < 0) |
2738 | mlog_errno(ret); | 2734 | mlog_errno(ret); |
2739 | 2735 | ||
2740 | return ret; | 2736 | return ret; |
2741 | } | 2737 | } |
2742 | 2738 | ||
2743 | void ocfs2_dentry_unlock(struct dentry *dentry, int ex) | 2739 | void ocfs2_dentry_unlock(struct dentry *dentry, int ex) |
2744 | { | 2740 | { |
2745 | int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; | 2741 | int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; |
2746 | struct ocfs2_dentry_lock *dl = dentry->d_fsdata; | 2742 | struct ocfs2_dentry_lock *dl = dentry->d_fsdata; |
2747 | struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb); | 2743 | struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb); |
2748 | 2744 | ||
2749 | if (!ocfs2_mount_local(osb)) | 2745 | if (!ocfs2_mount_local(osb)) |
2750 | ocfs2_cluster_unlock(osb, &dl->dl_lockres, level); | 2746 | ocfs2_cluster_unlock(osb, &dl->dl_lockres, level); |
2751 | } | 2747 | } |
2752 | 2748 | ||
2753 | /* Reference counting of the dlm debug structure. We want this because | 2749 | /* Reference counting of the dlm debug structure. We want this because |
2754 | * open references on the debug inodes can live on after a mount, so | 2750 | * open references on the debug inodes can live on after a mount, so |
2755 | * we can't rely on the ocfs2_super to always exist. */ | 2751 | * we can't rely on the ocfs2_super to always exist. */ |
2756 | static void ocfs2_dlm_debug_free(struct kref *kref) | 2752 | static void ocfs2_dlm_debug_free(struct kref *kref) |
2757 | { | 2753 | { |
2758 | struct ocfs2_dlm_debug *dlm_debug; | 2754 | struct ocfs2_dlm_debug *dlm_debug; |
2759 | 2755 | ||
2760 | dlm_debug = container_of(kref, struct ocfs2_dlm_debug, d_refcnt); | 2756 | dlm_debug = container_of(kref, struct ocfs2_dlm_debug, d_refcnt); |
2761 | 2757 | ||
2762 | kfree(dlm_debug); | 2758 | kfree(dlm_debug); |
2763 | } | 2759 | } |
2764 | 2760 | ||
2765 | void ocfs2_put_dlm_debug(struct ocfs2_dlm_debug *dlm_debug) | 2761 | void ocfs2_put_dlm_debug(struct ocfs2_dlm_debug *dlm_debug) |
2766 | { | 2762 | { |
2767 | if (dlm_debug) | 2763 | if (dlm_debug) |
2768 | kref_put(&dlm_debug->d_refcnt, ocfs2_dlm_debug_free); | 2764 | kref_put(&dlm_debug->d_refcnt, ocfs2_dlm_debug_free); |
2769 | } | 2765 | } |
2770 | 2766 | ||
2771 | static void ocfs2_get_dlm_debug(struct ocfs2_dlm_debug *debug) | 2767 | static void ocfs2_get_dlm_debug(struct ocfs2_dlm_debug *debug) |
2772 | { | 2768 | { |
2773 | kref_get(&debug->d_refcnt); | 2769 | kref_get(&debug->d_refcnt); |
2774 | } | 2770 | } |
2775 | 2771 | ||
2776 | struct ocfs2_dlm_debug *ocfs2_new_dlm_debug(void) | 2772 | struct ocfs2_dlm_debug *ocfs2_new_dlm_debug(void) |
2777 | { | 2773 | { |
2778 | struct ocfs2_dlm_debug *dlm_debug; | 2774 | struct ocfs2_dlm_debug *dlm_debug; |
2779 | 2775 | ||
2780 | dlm_debug = kmalloc(sizeof(struct ocfs2_dlm_debug), GFP_KERNEL); | 2776 | dlm_debug = kmalloc(sizeof(struct ocfs2_dlm_debug), GFP_KERNEL); |
2781 | if (!dlm_debug) { | 2777 | if (!dlm_debug) { |
2782 | mlog_errno(-ENOMEM); | 2778 | mlog_errno(-ENOMEM); |
2783 | goto out; | 2779 | goto out; |
2784 | } | 2780 | } |
2785 | 2781 | ||
2786 | kref_init(&dlm_debug->d_refcnt); | 2782 | kref_init(&dlm_debug->d_refcnt); |
2787 | INIT_LIST_HEAD(&dlm_debug->d_lockres_tracking); | 2783 | INIT_LIST_HEAD(&dlm_debug->d_lockres_tracking); |
2788 | dlm_debug->d_locking_state = NULL; | 2784 | dlm_debug->d_locking_state = NULL; |
2789 | out: | 2785 | out: |
2790 | return dlm_debug; | 2786 | return dlm_debug; |
2791 | } | 2787 | } |
2792 | 2788 | ||
2793 | /* Access to this is arbitrated for us via seq_file->sem. */ | 2789 | /* Access to this is arbitrated for us via seq_file->sem. */ |
2794 | struct ocfs2_dlm_seq_priv { | 2790 | struct ocfs2_dlm_seq_priv { |
2795 | struct ocfs2_dlm_debug *p_dlm_debug; | 2791 | struct ocfs2_dlm_debug *p_dlm_debug; |
2796 | struct ocfs2_lock_res p_iter_res; | 2792 | struct ocfs2_lock_res p_iter_res; |
2797 | struct ocfs2_lock_res p_tmp_res; | 2793 | struct ocfs2_lock_res p_tmp_res; |
2798 | }; | 2794 | }; |
2799 | 2795 | ||
2800 | static struct ocfs2_lock_res *ocfs2_dlm_next_res(struct ocfs2_lock_res *start, | 2796 | static struct ocfs2_lock_res *ocfs2_dlm_next_res(struct ocfs2_lock_res *start, |
2801 | struct ocfs2_dlm_seq_priv *priv) | 2797 | struct ocfs2_dlm_seq_priv *priv) |
2802 | { | 2798 | { |
2803 | struct ocfs2_lock_res *iter, *ret = NULL; | 2799 | struct ocfs2_lock_res *iter, *ret = NULL; |
2804 | struct ocfs2_dlm_debug *dlm_debug = priv->p_dlm_debug; | 2800 | struct ocfs2_dlm_debug *dlm_debug = priv->p_dlm_debug; |
2805 | 2801 | ||
2806 | assert_spin_locked(&ocfs2_dlm_tracking_lock); | 2802 | assert_spin_locked(&ocfs2_dlm_tracking_lock); |
2807 | 2803 | ||
2808 | list_for_each_entry(iter, &start->l_debug_list, l_debug_list) { | 2804 | list_for_each_entry(iter, &start->l_debug_list, l_debug_list) { |
2809 | /* discover the head of the list */ | 2805 | /* discover the head of the list */ |
2810 | if (&iter->l_debug_list == &dlm_debug->d_lockres_tracking) { | 2806 | if (&iter->l_debug_list == &dlm_debug->d_lockres_tracking) { |
2811 | mlog(0, "End of list found, %p\n", ret); | 2807 | mlog(0, "End of list found, %p\n", ret); |
2812 | break; | 2808 | break; |
2813 | } | 2809 | } |
2814 | 2810 | ||
2815 | /* We track our "dummy" iteration lockres' by a NULL | 2811 | /* We track our "dummy" iteration lockres' by a NULL |
2816 | * l_ops field. */ | 2812 | * l_ops field. */ |
2817 | if (iter->l_ops != NULL) { | 2813 | if (iter->l_ops != NULL) { |
2818 | ret = iter; | 2814 | ret = iter; |
2819 | break; | 2815 | break; |
2820 | } | 2816 | } |
2821 | } | 2817 | } |
2822 | 2818 | ||
2823 | return ret; | 2819 | return ret; |
2824 | } | 2820 | } |
2825 | 2821 | ||
2826 | static void *ocfs2_dlm_seq_start(struct seq_file *m, loff_t *pos) | 2822 | static void *ocfs2_dlm_seq_start(struct seq_file *m, loff_t *pos) |
2827 | { | 2823 | { |
2828 | struct ocfs2_dlm_seq_priv *priv = m->private; | 2824 | struct ocfs2_dlm_seq_priv *priv = m->private; |
2829 | struct ocfs2_lock_res *iter; | 2825 | struct ocfs2_lock_res *iter; |
2830 | 2826 | ||
2831 | spin_lock(&ocfs2_dlm_tracking_lock); | 2827 | spin_lock(&ocfs2_dlm_tracking_lock); |
2832 | iter = ocfs2_dlm_next_res(&priv->p_iter_res, priv); | 2828 | iter = ocfs2_dlm_next_res(&priv->p_iter_res, priv); |
2833 | if (iter) { | 2829 | if (iter) { |
2834 | /* Since lockres' have the lifetime of their container | 2830 | /* Since lockres' have the lifetime of their container |
2835 | * (which can be inodes, ocfs2_supers, etc) we want to | 2831 | * (which can be inodes, ocfs2_supers, etc) we want to |
2836 | * copy this out to a temporary lockres while still | 2832 | * copy this out to a temporary lockres while still |
2837 | * under the spinlock. Obviously after this we can't | 2833 | * under the spinlock. Obviously after this we can't |
2838 | * trust any pointers on the copy returned, but that's | 2834 | * trust any pointers on the copy returned, but that's |
2839 | * ok as the information we want isn't typically held | 2835 | * ok as the information we want isn't typically held |
2840 | * in them. */ | 2836 | * in them. */ |
2841 | priv->p_tmp_res = *iter; | 2837 | priv->p_tmp_res = *iter; |
2842 | iter = &priv->p_tmp_res; | 2838 | iter = &priv->p_tmp_res; |
2843 | } | 2839 | } |
2844 | spin_unlock(&ocfs2_dlm_tracking_lock); | 2840 | spin_unlock(&ocfs2_dlm_tracking_lock); |
2845 | 2841 | ||
2846 | return iter; | 2842 | return iter; |
2847 | } | 2843 | } |
2848 | 2844 | ||
2849 | static void ocfs2_dlm_seq_stop(struct seq_file *m, void *v) | 2845 | static void ocfs2_dlm_seq_stop(struct seq_file *m, void *v) |
2850 | { | 2846 | { |
2851 | } | 2847 | } |
2852 | 2848 | ||
2853 | static void *ocfs2_dlm_seq_next(struct seq_file *m, void *v, loff_t *pos) | 2849 | static void *ocfs2_dlm_seq_next(struct seq_file *m, void *v, loff_t *pos) |
2854 | { | 2850 | { |
2855 | struct ocfs2_dlm_seq_priv *priv = m->private; | 2851 | struct ocfs2_dlm_seq_priv *priv = m->private; |
2856 | struct ocfs2_lock_res *iter = v; | 2852 | struct ocfs2_lock_res *iter = v; |
2857 | struct ocfs2_lock_res *dummy = &priv->p_iter_res; | 2853 | struct ocfs2_lock_res *dummy = &priv->p_iter_res; |
2858 | 2854 | ||
2859 | spin_lock(&ocfs2_dlm_tracking_lock); | 2855 | spin_lock(&ocfs2_dlm_tracking_lock); |
2860 | iter = ocfs2_dlm_next_res(iter, priv); | 2856 | iter = ocfs2_dlm_next_res(iter, priv); |
2861 | list_del_init(&dummy->l_debug_list); | 2857 | list_del_init(&dummy->l_debug_list); |
2862 | if (iter) { | 2858 | if (iter) { |
2863 | list_add(&dummy->l_debug_list, &iter->l_debug_list); | 2859 | list_add(&dummy->l_debug_list, &iter->l_debug_list); |
2864 | priv->p_tmp_res = *iter; | 2860 | priv->p_tmp_res = *iter; |
2865 | iter = &priv->p_tmp_res; | 2861 | iter = &priv->p_tmp_res; |
2866 | } | 2862 | } |
2867 | spin_unlock(&ocfs2_dlm_tracking_lock); | 2863 | spin_unlock(&ocfs2_dlm_tracking_lock); |
2868 | 2864 | ||
2869 | return iter; | 2865 | return iter; |
2870 | } | 2866 | } |
2871 | 2867 | ||
2872 | /* So that debugfs.ocfs2 can determine which format is being used */ | 2868 | /* |
2873 | #define OCFS2_DLM_DEBUG_STR_VERSION 2 | 2869 | * Version is used by debugfs.ocfs2 to determine the format being used |
2870 | * | ||
2871 | * New in version 2 | ||
2872 | * - Lock stats printed | ||
2873 | * New in version 3 | ||
2874 | * - Max time in lock stats is in usecs (instead of nsecs) | ||
2875 | */ | ||
2876 | #define OCFS2_DLM_DEBUG_STR_VERSION 3 | ||
2874 | static int ocfs2_dlm_seq_show(struct seq_file *m, void *v) | 2877 | static int ocfs2_dlm_seq_show(struct seq_file *m, void *v) |
2875 | { | 2878 | { |
2876 | int i; | 2879 | int i; |
2877 | char *lvb; | 2880 | char *lvb; |
2878 | struct ocfs2_lock_res *lockres = v; | 2881 | struct ocfs2_lock_res *lockres = v; |
2879 | 2882 | ||
2880 | if (!lockres) | 2883 | if (!lockres) |
2881 | return -EINVAL; | 2884 | return -EINVAL; |
2882 | 2885 | ||
2883 | seq_printf(m, "0x%x\t", OCFS2_DLM_DEBUG_STR_VERSION); | 2886 | seq_printf(m, "0x%x\t", OCFS2_DLM_DEBUG_STR_VERSION); |
2884 | 2887 | ||
2885 | if (lockres->l_type == OCFS2_LOCK_TYPE_DENTRY) | 2888 | if (lockres->l_type == OCFS2_LOCK_TYPE_DENTRY) |
2886 | seq_printf(m, "%.*s%08x\t", OCFS2_DENTRY_LOCK_INO_START - 1, | 2889 | seq_printf(m, "%.*s%08x\t", OCFS2_DENTRY_LOCK_INO_START - 1, |
2887 | lockres->l_name, | 2890 | lockres->l_name, |
2888 | (unsigned int)ocfs2_get_dentry_lock_ino(lockres)); | 2891 | (unsigned int)ocfs2_get_dentry_lock_ino(lockres)); |
2889 | else | 2892 | else |
2890 | seq_printf(m, "%.*s\t", OCFS2_LOCK_ID_MAX_LEN, lockres->l_name); | 2893 | seq_printf(m, "%.*s\t", OCFS2_LOCK_ID_MAX_LEN, lockres->l_name); |
2891 | 2894 | ||
2892 | seq_printf(m, "%d\t" | 2895 | seq_printf(m, "%d\t" |
2893 | "0x%lx\t" | 2896 | "0x%lx\t" |
2894 | "0x%x\t" | 2897 | "0x%x\t" |
2895 | "0x%x\t" | 2898 | "0x%x\t" |
2896 | "%u\t" | 2899 | "%u\t" |
2897 | "%u\t" | 2900 | "%u\t" |
2898 | "%d\t" | 2901 | "%d\t" |
2899 | "%d\t", | 2902 | "%d\t", |
2900 | lockres->l_level, | 2903 | lockres->l_level, |
2901 | lockres->l_flags, | 2904 | lockres->l_flags, |
2902 | lockres->l_action, | 2905 | lockres->l_action, |
2903 | lockres->l_unlock_action, | 2906 | lockres->l_unlock_action, |
2904 | lockres->l_ro_holders, | 2907 | lockres->l_ro_holders, |
2905 | lockres->l_ex_holders, | 2908 | lockres->l_ex_holders, |
2906 | lockres->l_requested, | 2909 | lockres->l_requested, |
2907 | lockres->l_blocking); | 2910 | lockres->l_blocking); |
2908 | 2911 | ||
2909 | /* Dump the raw LVB */ | 2912 | /* Dump the raw LVB */ |
2910 | lvb = ocfs2_dlm_lvb(&lockres->l_lksb); | 2913 | lvb = ocfs2_dlm_lvb(&lockres->l_lksb); |
2911 | for(i = 0; i < DLM_LVB_LEN; i++) | 2914 | for(i = 0; i < DLM_LVB_LEN; i++) |
2912 | seq_printf(m, "0x%x\t", lvb[i]); | 2915 | seq_printf(m, "0x%x\t", lvb[i]); |
2913 | 2916 | ||
2914 | #ifdef CONFIG_OCFS2_FS_STATS | 2917 | #ifdef CONFIG_OCFS2_FS_STATS |
2915 | # define lock_num_prmode(_l) (_l)->l_lock_num_prmode | 2918 | # define lock_num_prmode(_l) ((_l)->l_lock_prmode.ls_gets) |
2916 | # define lock_num_exmode(_l) (_l)->l_lock_num_exmode | 2919 | # define lock_num_exmode(_l) ((_l)->l_lock_exmode.ls_gets) |
2917 | # define lock_num_prmode_failed(_l) (_l)->l_lock_num_prmode_failed | 2920 | # define lock_num_prmode_failed(_l) ((_l)->l_lock_prmode.ls_fail) |
2918 | # define lock_num_exmode_failed(_l) (_l)->l_lock_num_exmode_failed | 2921 | # define lock_num_exmode_failed(_l) ((_l)->l_lock_exmode.ls_fail) |
2919 | # define lock_total_prmode(_l) (_l)->l_lock_total_prmode | 2922 | # define lock_total_prmode(_l) ((_l)->l_lock_prmode.ls_total) |
2920 | # define lock_total_exmode(_l) (_l)->l_lock_total_exmode | 2923 | # define lock_total_exmode(_l) ((_l)->l_lock_exmode.ls_total) |
2921 | # define lock_max_prmode(_l) (_l)->l_lock_max_prmode | 2924 | # define lock_max_prmode(_l) ((_l)->l_lock_prmode.ls_max) |
2922 | # define lock_max_exmode(_l) (_l)->l_lock_max_exmode | 2925 | # define lock_max_exmode(_l) ((_l)->l_lock_exmode.ls_max) |
2923 | # define lock_refresh(_l) (_l)->l_lock_refresh | 2926 | # define lock_refresh(_l) ((_l)->l_lock_refresh) |
2924 | #else | 2927 | #else |
2925 | # define lock_num_prmode(_l) (0ULL) | 2928 | # define lock_num_prmode(_l) (0) |
2926 | # define lock_num_exmode(_l) (0ULL) | 2929 | # define lock_num_exmode(_l) (0) |
2927 | # define lock_num_prmode_failed(_l) (0) | 2930 | # define lock_num_prmode_failed(_l) (0) |
2928 | # define lock_num_exmode_failed(_l) (0) | 2931 | # define lock_num_exmode_failed(_l) (0) |
2929 | # define lock_total_prmode(_l) (0ULL) | 2932 | # define lock_total_prmode(_l) (0ULL) |
2930 | # define lock_total_exmode(_l) (0ULL) | 2933 | # define lock_total_exmode(_l) (0ULL) |
2931 | # define lock_max_prmode(_l) (0) | 2934 | # define lock_max_prmode(_l) (0) |
2932 | # define lock_max_exmode(_l) (0) | 2935 | # define lock_max_exmode(_l) (0) |
2933 | # define lock_refresh(_l) (0) | 2936 | # define lock_refresh(_l) (0) |
2934 | #endif | 2937 | #endif |
2935 | /* The following seq_print was added in version 2 of this output */ | 2938 | /* The following seq_print was added in version 2 of this output */ |
2936 | seq_printf(m, "%llu\t" | 2939 | seq_printf(m, "%u\t" |
2937 | "%llu\t" | 2940 | "%u\t" |
2938 | "%u\t" | 2941 | "%u\t" |
2939 | "%u\t" | 2942 | "%u\t" |
2940 | "%llu\t" | 2943 | "%llu\t" |
2941 | "%llu\t" | 2944 | "%llu\t" |
2942 | "%u\t" | 2945 | "%u\t" |
2943 | "%u\t" | 2946 | "%u\t" |
2944 | "%u\t", | 2947 | "%u\t", |
2945 | lock_num_prmode(lockres), | 2948 | lock_num_prmode(lockres), |
2946 | lock_num_exmode(lockres), | 2949 | lock_num_exmode(lockres), |
2947 | lock_num_prmode_failed(lockres), | 2950 | lock_num_prmode_failed(lockres), |
2948 | lock_num_exmode_failed(lockres), | 2951 | lock_num_exmode_failed(lockres), |
2949 | lock_total_prmode(lockres), | 2952 | lock_total_prmode(lockres), |
2950 | lock_total_exmode(lockres), | 2953 | lock_total_exmode(lockres), |
2951 | lock_max_prmode(lockres), | 2954 | lock_max_prmode(lockres), |
2952 | lock_max_exmode(lockres), | 2955 | lock_max_exmode(lockres), |
2953 | lock_refresh(lockres)); | 2956 | lock_refresh(lockres)); |
2954 | 2957 | ||
2955 | /* End the line */ | 2958 | /* End the line */ |
2956 | seq_printf(m, "\n"); | 2959 | seq_printf(m, "\n"); |
2957 | return 0; | 2960 | return 0; |
2958 | } | 2961 | } |
2959 | 2962 | ||
2960 | static const struct seq_operations ocfs2_dlm_seq_ops = { | 2963 | static const struct seq_operations ocfs2_dlm_seq_ops = { |
2961 | .start = ocfs2_dlm_seq_start, | 2964 | .start = ocfs2_dlm_seq_start, |
2962 | .stop = ocfs2_dlm_seq_stop, | 2965 | .stop = ocfs2_dlm_seq_stop, |
2963 | .next = ocfs2_dlm_seq_next, | 2966 | .next = ocfs2_dlm_seq_next, |
2964 | .show = ocfs2_dlm_seq_show, | 2967 | .show = ocfs2_dlm_seq_show, |
2965 | }; | 2968 | }; |
2966 | 2969 | ||
2967 | static int ocfs2_dlm_debug_release(struct inode *inode, struct file *file) | 2970 | static int ocfs2_dlm_debug_release(struct inode *inode, struct file *file) |
2968 | { | 2971 | { |
2969 | struct seq_file *seq = file->private_data; | 2972 | struct seq_file *seq = file->private_data; |
2970 | struct ocfs2_dlm_seq_priv *priv = seq->private; | 2973 | struct ocfs2_dlm_seq_priv *priv = seq->private; |
2971 | struct ocfs2_lock_res *res = &priv->p_iter_res; | 2974 | struct ocfs2_lock_res *res = &priv->p_iter_res; |
2972 | 2975 | ||
2973 | ocfs2_remove_lockres_tracking(res); | 2976 | ocfs2_remove_lockres_tracking(res); |
2974 | ocfs2_put_dlm_debug(priv->p_dlm_debug); | 2977 | ocfs2_put_dlm_debug(priv->p_dlm_debug); |
2975 | return seq_release_private(inode, file); | 2978 | return seq_release_private(inode, file); |
2976 | } | 2979 | } |
2977 | 2980 | ||
2978 | static int ocfs2_dlm_debug_open(struct inode *inode, struct file *file) | 2981 | static int ocfs2_dlm_debug_open(struct inode *inode, struct file *file) |
2979 | { | 2982 | { |
2980 | int ret; | 2983 | int ret; |
2981 | struct ocfs2_dlm_seq_priv *priv; | 2984 | struct ocfs2_dlm_seq_priv *priv; |
2982 | struct seq_file *seq; | 2985 | struct seq_file *seq; |
2983 | struct ocfs2_super *osb; | 2986 | struct ocfs2_super *osb; |
2984 | 2987 | ||
2985 | priv = kzalloc(sizeof(struct ocfs2_dlm_seq_priv), GFP_KERNEL); | 2988 | priv = kzalloc(sizeof(struct ocfs2_dlm_seq_priv), GFP_KERNEL); |
2986 | if (!priv) { | 2989 | if (!priv) { |
2987 | ret = -ENOMEM; | 2990 | ret = -ENOMEM; |
2988 | mlog_errno(ret); | 2991 | mlog_errno(ret); |
2989 | goto out; | 2992 | goto out; |
2990 | } | 2993 | } |
2991 | osb = inode->i_private; | 2994 | osb = inode->i_private; |
2992 | ocfs2_get_dlm_debug(osb->osb_dlm_debug); | 2995 | ocfs2_get_dlm_debug(osb->osb_dlm_debug); |
2993 | priv->p_dlm_debug = osb->osb_dlm_debug; | 2996 | priv->p_dlm_debug = osb->osb_dlm_debug; |
2994 | INIT_LIST_HEAD(&priv->p_iter_res.l_debug_list); | 2997 | INIT_LIST_HEAD(&priv->p_iter_res.l_debug_list); |
2995 | 2998 | ||
2996 | ret = seq_open(file, &ocfs2_dlm_seq_ops); | 2999 | ret = seq_open(file, &ocfs2_dlm_seq_ops); |
2997 | if (ret) { | 3000 | if (ret) { |
2998 | kfree(priv); | 3001 | kfree(priv); |
2999 | mlog_errno(ret); | 3002 | mlog_errno(ret); |
3000 | goto out; | 3003 | goto out; |
3001 | } | 3004 | } |
3002 | 3005 | ||
3003 | seq = file->private_data; | 3006 | seq = file->private_data; |
3004 | seq->private = priv; | 3007 | seq->private = priv; |
3005 | 3008 | ||
3006 | ocfs2_add_lockres_tracking(&priv->p_iter_res, | 3009 | ocfs2_add_lockres_tracking(&priv->p_iter_res, |
3007 | priv->p_dlm_debug); | 3010 | priv->p_dlm_debug); |
3008 | 3011 | ||
3009 | out: | 3012 | out: |
3010 | return ret; | 3013 | return ret; |
3011 | } | 3014 | } |
3012 | 3015 | ||
3013 | static const struct file_operations ocfs2_dlm_debug_fops = { | 3016 | static const struct file_operations ocfs2_dlm_debug_fops = { |
3014 | .open = ocfs2_dlm_debug_open, | 3017 | .open = ocfs2_dlm_debug_open, |
3015 | .release = ocfs2_dlm_debug_release, | 3018 | .release = ocfs2_dlm_debug_release, |
3016 | .read = seq_read, | 3019 | .read = seq_read, |
3017 | .llseek = seq_lseek, | 3020 | .llseek = seq_lseek, |
3018 | }; | 3021 | }; |
3019 | 3022 | ||
3020 | static int ocfs2_dlm_init_debug(struct ocfs2_super *osb) | 3023 | static int ocfs2_dlm_init_debug(struct ocfs2_super *osb) |
3021 | { | 3024 | { |
3022 | int ret = 0; | 3025 | int ret = 0; |
3023 | struct ocfs2_dlm_debug *dlm_debug = osb->osb_dlm_debug; | 3026 | struct ocfs2_dlm_debug *dlm_debug = osb->osb_dlm_debug; |
3024 | 3027 | ||
3025 | dlm_debug->d_locking_state = debugfs_create_file("locking_state", | 3028 | dlm_debug->d_locking_state = debugfs_create_file("locking_state", |
3026 | S_IFREG|S_IRUSR, | 3029 | S_IFREG|S_IRUSR, |
3027 | osb->osb_debug_root, | 3030 | osb->osb_debug_root, |
3028 | osb, | 3031 | osb, |
3029 | &ocfs2_dlm_debug_fops); | 3032 | &ocfs2_dlm_debug_fops); |
3030 | if (!dlm_debug->d_locking_state) { | 3033 | if (!dlm_debug->d_locking_state) { |
3031 | ret = -EINVAL; | 3034 | ret = -EINVAL; |
3032 | mlog(ML_ERROR, | 3035 | mlog(ML_ERROR, |
3033 | "Unable to create locking state debugfs file.\n"); | 3036 | "Unable to create locking state debugfs file.\n"); |
3034 | goto out; | 3037 | goto out; |
3035 | } | 3038 | } |
3036 | 3039 | ||
3037 | ocfs2_get_dlm_debug(dlm_debug); | 3040 | ocfs2_get_dlm_debug(dlm_debug); |
3038 | out: | 3041 | out: |
3039 | return ret; | 3042 | return ret; |
3040 | } | 3043 | } |
3041 | 3044 | ||
3042 | static void ocfs2_dlm_shutdown_debug(struct ocfs2_super *osb) | 3045 | static void ocfs2_dlm_shutdown_debug(struct ocfs2_super *osb) |
3043 | { | 3046 | { |
3044 | struct ocfs2_dlm_debug *dlm_debug = osb->osb_dlm_debug; | 3047 | struct ocfs2_dlm_debug *dlm_debug = osb->osb_dlm_debug; |
3045 | 3048 | ||
3046 | if (dlm_debug) { | 3049 | if (dlm_debug) { |
3047 | debugfs_remove(dlm_debug->d_locking_state); | 3050 | debugfs_remove(dlm_debug->d_locking_state); |
3048 | ocfs2_put_dlm_debug(dlm_debug); | 3051 | ocfs2_put_dlm_debug(dlm_debug); |
3049 | } | 3052 | } |
3050 | } | 3053 | } |
3051 | 3054 | ||
3052 | int ocfs2_dlm_init(struct ocfs2_super *osb) | 3055 | int ocfs2_dlm_init(struct ocfs2_super *osb) |
3053 | { | 3056 | { |
3054 | int status = 0; | 3057 | int status = 0; |
3055 | struct ocfs2_cluster_connection *conn = NULL; | 3058 | struct ocfs2_cluster_connection *conn = NULL; |
3056 | 3059 | ||
3057 | mlog_entry_void(); | 3060 | mlog_entry_void(); |
3058 | 3061 | ||
3059 | if (ocfs2_mount_local(osb)) { | 3062 | if (ocfs2_mount_local(osb)) { |
3060 | osb->node_num = 0; | 3063 | osb->node_num = 0; |
3061 | goto local; | 3064 | goto local; |
3062 | } | 3065 | } |
3063 | 3066 | ||
3064 | status = ocfs2_dlm_init_debug(osb); | 3067 | status = ocfs2_dlm_init_debug(osb); |
3065 | if (status < 0) { | 3068 | if (status < 0) { |
3066 | mlog_errno(status); | 3069 | mlog_errno(status); |
3067 | goto bail; | 3070 | goto bail; |
3068 | } | 3071 | } |
3069 | 3072 | ||
3070 | /* launch downconvert thread */ | 3073 | /* launch downconvert thread */ |
3071 | osb->dc_task = kthread_run(ocfs2_downconvert_thread, osb, "ocfs2dc"); | 3074 | osb->dc_task = kthread_run(ocfs2_downconvert_thread, osb, "ocfs2dc"); |
3072 | if (IS_ERR(osb->dc_task)) { | 3075 | if (IS_ERR(osb->dc_task)) { |
3073 | status = PTR_ERR(osb->dc_task); | 3076 | status = PTR_ERR(osb->dc_task); |
3074 | osb->dc_task = NULL; | 3077 | osb->dc_task = NULL; |
3075 | mlog_errno(status); | 3078 | mlog_errno(status); |
3076 | goto bail; | 3079 | goto bail; |
3077 | } | 3080 | } |
3078 | 3081 | ||
3079 | /* for now, uuid == domain */ | 3082 | /* for now, uuid == domain */ |
3080 | status = ocfs2_cluster_connect(osb->osb_cluster_stack, | 3083 | status = ocfs2_cluster_connect(osb->osb_cluster_stack, |
3081 | osb->uuid_str, | 3084 | osb->uuid_str, |
3082 | strlen(osb->uuid_str), | 3085 | strlen(osb->uuid_str), |
3083 | &lproto, ocfs2_do_node_down, osb, | 3086 | &lproto, ocfs2_do_node_down, osb, |
3084 | &conn); | 3087 | &conn); |
3085 | if (status) { | 3088 | if (status) { |
3086 | mlog_errno(status); | 3089 | mlog_errno(status); |
3087 | goto bail; | 3090 | goto bail; |
3088 | } | 3091 | } |
3089 | 3092 | ||
3090 | status = ocfs2_cluster_this_node(&osb->node_num); | 3093 | status = ocfs2_cluster_this_node(&osb->node_num); |
3091 | if (status < 0) { | 3094 | if (status < 0) { |
3092 | mlog_errno(status); | 3095 | mlog_errno(status); |
3093 | mlog(ML_ERROR, | 3096 | mlog(ML_ERROR, |
3094 | "could not find this host's node number\n"); | 3097 | "could not find this host's node number\n"); |
3095 | ocfs2_cluster_disconnect(conn, 0); | 3098 | ocfs2_cluster_disconnect(conn, 0); |
3096 | goto bail; | 3099 | goto bail; |
3097 | } | 3100 | } |
3098 | 3101 | ||
3099 | local: | 3102 | local: |
3100 | ocfs2_super_lock_res_init(&osb->osb_super_lockres, osb); | 3103 | ocfs2_super_lock_res_init(&osb->osb_super_lockres, osb); |
3101 | ocfs2_rename_lock_res_init(&osb->osb_rename_lockres, osb); | 3104 | ocfs2_rename_lock_res_init(&osb->osb_rename_lockres, osb); |
3102 | ocfs2_nfs_sync_lock_res_init(&osb->osb_nfs_sync_lockres, osb); | 3105 | ocfs2_nfs_sync_lock_res_init(&osb->osb_nfs_sync_lockres, osb); |
3103 | ocfs2_orphan_scan_lock_res_init(&osb->osb_orphan_scan.os_lockres, osb); | 3106 | ocfs2_orphan_scan_lock_res_init(&osb->osb_orphan_scan.os_lockres, osb); |
3104 | 3107 | ||
3105 | osb->cconn = conn; | 3108 | osb->cconn = conn; |
3106 | 3109 | ||
3107 | status = 0; | 3110 | status = 0; |
3108 | bail: | 3111 | bail: |
3109 | if (status < 0) { | 3112 | if (status < 0) { |
3110 | ocfs2_dlm_shutdown_debug(osb); | 3113 | ocfs2_dlm_shutdown_debug(osb); |
3111 | if (osb->dc_task) | 3114 | if (osb->dc_task) |
3112 | kthread_stop(osb->dc_task); | 3115 | kthread_stop(osb->dc_task); |
3113 | } | 3116 | } |
3114 | 3117 | ||
3115 | mlog_exit(status); | 3118 | mlog_exit(status); |
3116 | return status; | 3119 | return status; |
3117 | } | 3120 | } |
3118 | 3121 | ||
3119 | void ocfs2_dlm_shutdown(struct ocfs2_super *osb, | 3122 | void ocfs2_dlm_shutdown(struct ocfs2_super *osb, |
3120 | int hangup_pending) | 3123 | int hangup_pending) |
3121 | { | 3124 | { |
3122 | mlog_entry_void(); | 3125 | mlog_entry_void(); |
3123 | 3126 | ||
3124 | ocfs2_drop_osb_locks(osb); | 3127 | ocfs2_drop_osb_locks(osb); |
3125 | 3128 | ||
3126 | /* | 3129 | /* |
3127 | * Now that we have dropped all locks and ocfs2_dismount_volume() | 3130 | * Now that we have dropped all locks and ocfs2_dismount_volume() |
3128 | * has disabled recovery, the DLM won't be talking to us. It's | 3131 | * has disabled recovery, the DLM won't be talking to us. It's |
3129 | * safe to tear things down before disconnecting the cluster. | 3132 | * safe to tear things down before disconnecting the cluster. |
3130 | */ | 3133 | */ |
3131 | 3134 | ||
3132 | if (osb->dc_task) { | 3135 | if (osb->dc_task) { |
3133 | kthread_stop(osb->dc_task); | 3136 | kthread_stop(osb->dc_task); |
3134 | osb->dc_task = NULL; | 3137 | osb->dc_task = NULL; |
3135 | } | 3138 | } |
3136 | 3139 | ||
3137 | ocfs2_lock_res_free(&osb->osb_super_lockres); | 3140 | ocfs2_lock_res_free(&osb->osb_super_lockres); |
3138 | ocfs2_lock_res_free(&osb->osb_rename_lockres); | 3141 | ocfs2_lock_res_free(&osb->osb_rename_lockres); |
3139 | ocfs2_lock_res_free(&osb->osb_nfs_sync_lockres); | 3142 | ocfs2_lock_res_free(&osb->osb_nfs_sync_lockres); |
3140 | ocfs2_lock_res_free(&osb->osb_orphan_scan.os_lockres); | 3143 | ocfs2_lock_res_free(&osb->osb_orphan_scan.os_lockres); |
3141 | 3144 | ||
3142 | ocfs2_cluster_disconnect(osb->cconn, hangup_pending); | 3145 | ocfs2_cluster_disconnect(osb->cconn, hangup_pending); |
3143 | osb->cconn = NULL; | 3146 | osb->cconn = NULL; |
3144 | 3147 | ||
3145 | ocfs2_dlm_shutdown_debug(osb); | 3148 | ocfs2_dlm_shutdown_debug(osb); |
3146 | 3149 | ||
3147 | mlog_exit_void(); | 3150 | mlog_exit_void(); |
3148 | } | 3151 | } |
3149 | 3152 | ||
3150 | static int ocfs2_drop_lock(struct ocfs2_super *osb, | 3153 | static int ocfs2_drop_lock(struct ocfs2_super *osb, |
3151 | struct ocfs2_lock_res *lockres) | 3154 | struct ocfs2_lock_res *lockres) |
3152 | { | 3155 | { |
3153 | int ret; | 3156 | int ret; |
3154 | unsigned long flags; | 3157 | unsigned long flags; |
3155 | u32 lkm_flags = 0; | 3158 | u32 lkm_flags = 0; |
3156 | 3159 | ||
3157 | /* We didn't get anywhere near actually using this lockres. */ | 3160 | /* We didn't get anywhere near actually using this lockres. */ |
3158 | if (!(lockres->l_flags & OCFS2_LOCK_INITIALIZED)) | 3161 | if (!(lockres->l_flags & OCFS2_LOCK_INITIALIZED)) |
3159 | goto out; | 3162 | goto out; |
3160 | 3163 | ||
3161 | if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB) | 3164 | if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB) |
3162 | lkm_flags |= DLM_LKF_VALBLK; | 3165 | lkm_flags |= DLM_LKF_VALBLK; |
3163 | 3166 | ||
3164 | spin_lock_irqsave(&lockres->l_lock, flags); | 3167 | spin_lock_irqsave(&lockres->l_lock, flags); |
3165 | 3168 | ||
3166 | mlog_bug_on_msg(!(lockres->l_flags & OCFS2_LOCK_FREEING), | 3169 | mlog_bug_on_msg(!(lockres->l_flags & OCFS2_LOCK_FREEING), |
3167 | "lockres %s, flags 0x%lx\n", | 3170 | "lockres %s, flags 0x%lx\n", |
3168 | lockres->l_name, lockres->l_flags); | 3171 | lockres->l_name, lockres->l_flags); |
3169 | 3172 | ||
3170 | while (lockres->l_flags & OCFS2_LOCK_BUSY) { | 3173 | while (lockres->l_flags & OCFS2_LOCK_BUSY) { |
3171 | mlog(0, "waiting on busy lock \"%s\": flags = %lx, action = " | 3174 | mlog(0, "waiting on busy lock \"%s\": flags = %lx, action = " |
3172 | "%u, unlock_action = %u\n", | 3175 | "%u, unlock_action = %u\n", |
3173 | lockres->l_name, lockres->l_flags, lockres->l_action, | 3176 | lockres->l_name, lockres->l_flags, lockres->l_action, |
3174 | lockres->l_unlock_action); | 3177 | lockres->l_unlock_action); |
3175 | 3178 | ||
3176 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 3179 | spin_unlock_irqrestore(&lockres->l_lock, flags); |
3177 | 3180 | ||
3178 | /* XXX: Today we just wait on any busy | 3181 | /* XXX: Today we just wait on any busy |
3179 | * locks... Perhaps we need to cancel converts in the | 3182 | * locks... Perhaps we need to cancel converts in the |
3180 | * future? */ | 3183 | * future? */ |
3181 | ocfs2_wait_on_busy_lock(lockres); | 3184 | ocfs2_wait_on_busy_lock(lockres); |
3182 | 3185 | ||
3183 | spin_lock_irqsave(&lockres->l_lock, flags); | 3186 | spin_lock_irqsave(&lockres->l_lock, flags); |
3184 | } | 3187 | } |
3185 | 3188 | ||
3186 | if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB) { | 3189 | if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB) { |
3187 | if (lockres->l_flags & OCFS2_LOCK_ATTACHED && | 3190 | if (lockres->l_flags & OCFS2_LOCK_ATTACHED && |
3188 | lockres->l_level == DLM_LOCK_EX && | 3191 | lockres->l_level == DLM_LOCK_EX && |
3189 | !(lockres->l_flags & OCFS2_LOCK_NEEDS_REFRESH)) | 3192 | !(lockres->l_flags & OCFS2_LOCK_NEEDS_REFRESH)) |
3190 | lockres->l_ops->set_lvb(lockres); | 3193 | lockres->l_ops->set_lvb(lockres); |
3191 | } | 3194 | } |
3192 | 3195 | ||
3193 | if (lockres->l_flags & OCFS2_LOCK_BUSY) | 3196 | if (lockres->l_flags & OCFS2_LOCK_BUSY) |
3194 | mlog(ML_ERROR, "destroying busy lock: \"%s\"\n", | 3197 | mlog(ML_ERROR, "destroying busy lock: \"%s\"\n", |
3195 | lockres->l_name); | 3198 | lockres->l_name); |
3196 | if (lockres->l_flags & OCFS2_LOCK_BLOCKED) | 3199 | if (lockres->l_flags & OCFS2_LOCK_BLOCKED) |
3197 | mlog(0, "destroying blocked lock: \"%s\"\n", lockres->l_name); | 3200 | mlog(0, "destroying blocked lock: \"%s\"\n", lockres->l_name); |
3198 | 3201 | ||
3199 | if (!(lockres->l_flags & OCFS2_LOCK_ATTACHED)) { | 3202 | if (!(lockres->l_flags & OCFS2_LOCK_ATTACHED)) { |
3200 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 3203 | spin_unlock_irqrestore(&lockres->l_lock, flags); |
3201 | goto out; | 3204 | goto out; |
3202 | } | 3205 | } |
3203 | 3206 | ||
3204 | lockres_clear_flags(lockres, OCFS2_LOCK_ATTACHED); | 3207 | lockres_clear_flags(lockres, OCFS2_LOCK_ATTACHED); |
3205 | 3208 | ||
3206 | /* make sure we never get here while waiting for an ast to | 3209 | /* make sure we never get here while waiting for an ast to |
3207 | * fire. */ | 3210 | * fire. */ |
3208 | BUG_ON(lockres->l_action != OCFS2_AST_INVALID); | 3211 | BUG_ON(lockres->l_action != OCFS2_AST_INVALID); |
3209 | 3212 | ||
3210 | /* is this necessary? */ | 3213 | /* is this necessary? */ |
3211 | lockres_or_flags(lockres, OCFS2_LOCK_BUSY); | 3214 | lockres_or_flags(lockres, OCFS2_LOCK_BUSY); |
3212 | lockres->l_unlock_action = OCFS2_UNLOCK_DROP_LOCK; | 3215 | lockres->l_unlock_action = OCFS2_UNLOCK_DROP_LOCK; |
3213 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 3216 | spin_unlock_irqrestore(&lockres->l_lock, flags); |
3214 | 3217 | ||
3215 | mlog(0, "lock %s\n", lockres->l_name); | 3218 | mlog(0, "lock %s\n", lockres->l_name); |
3216 | 3219 | ||
3217 | ret = ocfs2_dlm_unlock(osb->cconn, &lockres->l_lksb, lkm_flags); | 3220 | ret = ocfs2_dlm_unlock(osb->cconn, &lockres->l_lksb, lkm_flags); |
3218 | if (ret) { | 3221 | if (ret) { |
3219 | ocfs2_log_dlm_error("ocfs2_dlm_unlock", ret, lockres); | 3222 | ocfs2_log_dlm_error("ocfs2_dlm_unlock", ret, lockres); |
3220 | mlog(ML_ERROR, "lockres flags: %lu\n", lockres->l_flags); | 3223 | mlog(ML_ERROR, "lockres flags: %lu\n", lockres->l_flags); |
3221 | ocfs2_dlm_dump_lksb(&lockres->l_lksb); | 3224 | ocfs2_dlm_dump_lksb(&lockres->l_lksb); |
3222 | BUG(); | 3225 | BUG(); |
3223 | } | 3226 | } |
3224 | mlog(0, "lock %s, successful return from ocfs2_dlm_unlock\n", | 3227 | mlog(0, "lock %s, successful return from ocfs2_dlm_unlock\n", |
3225 | lockres->l_name); | 3228 | lockres->l_name); |
3226 | 3229 | ||
3227 | ocfs2_wait_on_busy_lock(lockres); | 3230 | ocfs2_wait_on_busy_lock(lockres); |
3228 | out: | 3231 | out: |
3229 | mlog_exit(0); | 3232 | mlog_exit(0); |
3230 | return 0; | 3233 | return 0; |
3231 | } | 3234 | } |
3232 | 3235 | ||
3233 | /* Mark the lockres as being dropped. It will no longer be | 3236 | /* Mark the lockres as being dropped. It will no longer be |
3234 | * queued if blocking, but we still may have to wait on it | 3237 | * queued if blocking, but we still may have to wait on it |
3235 | * being dequeued from the downconvert thread before we can consider | 3238 | * being dequeued from the downconvert thread before we can consider |
3236 | * it safe to drop. | 3239 | * it safe to drop. |
3237 | * | 3240 | * |
3238 | * You can *not* attempt to call cluster_lock on this lockres anymore. */ | 3241 | * You can *not* attempt to call cluster_lock on this lockres anymore. */ |
3239 | void ocfs2_mark_lockres_freeing(struct ocfs2_lock_res *lockres) | 3242 | void ocfs2_mark_lockres_freeing(struct ocfs2_lock_res *lockres) |
3240 | { | 3243 | { |
3241 | int status; | 3244 | int status; |
3242 | struct ocfs2_mask_waiter mw; | 3245 | struct ocfs2_mask_waiter mw; |
3243 | unsigned long flags; | 3246 | unsigned long flags; |
3244 | 3247 | ||
3245 | ocfs2_init_mask_waiter(&mw); | 3248 | ocfs2_init_mask_waiter(&mw); |
3246 | 3249 | ||
3247 | spin_lock_irqsave(&lockres->l_lock, flags); | 3250 | spin_lock_irqsave(&lockres->l_lock, flags); |
3248 | lockres->l_flags |= OCFS2_LOCK_FREEING; | 3251 | lockres->l_flags |= OCFS2_LOCK_FREEING; |
3249 | while (lockres->l_flags & OCFS2_LOCK_QUEUED) { | 3252 | while (lockres->l_flags & OCFS2_LOCK_QUEUED) { |
3250 | lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_QUEUED, 0); | 3253 | lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_QUEUED, 0); |
3251 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 3254 | spin_unlock_irqrestore(&lockres->l_lock, flags); |
3252 | 3255 | ||
3253 | mlog(0, "Waiting on lockres %s\n", lockres->l_name); | 3256 | mlog(0, "Waiting on lockres %s\n", lockres->l_name); |
3254 | 3257 | ||
3255 | status = ocfs2_wait_for_mask(&mw); | 3258 | status = ocfs2_wait_for_mask(&mw); |
3256 | if (status) | 3259 | if (status) |
3257 | mlog_errno(status); | 3260 | mlog_errno(status); |
3258 | 3261 | ||
3259 | spin_lock_irqsave(&lockres->l_lock, flags); | 3262 | spin_lock_irqsave(&lockres->l_lock, flags); |
3260 | } | 3263 | } |
3261 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 3264 | spin_unlock_irqrestore(&lockres->l_lock, flags); |
3262 | } | 3265 | } |
3263 | 3266 | ||
3264 | void ocfs2_simple_drop_lockres(struct ocfs2_super *osb, | 3267 | void ocfs2_simple_drop_lockres(struct ocfs2_super *osb, |
3265 | struct ocfs2_lock_res *lockres) | 3268 | struct ocfs2_lock_res *lockres) |
3266 | { | 3269 | { |
3267 | int ret; | 3270 | int ret; |
3268 | 3271 | ||
3269 | ocfs2_mark_lockres_freeing(lockres); | 3272 | ocfs2_mark_lockres_freeing(lockres); |
3270 | ret = ocfs2_drop_lock(osb, lockres); | 3273 | ret = ocfs2_drop_lock(osb, lockres); |
3271 | if (ret) | 3274 | if (ret) |
3272 | mlog_errno(ret); | 3275 | mlog_errno(ret); |
3273 | } | 3276 | } |
3274 | 3277 | ||
3275 | static void ocfs2_drop_osb_locks(struct ocfs2_super *osb) | 3278 | static void ocfs2_drop_osb_locks(struct ocfs2_super *osb) |
3276 | { | 3279 | { |
3277 | ocfs2_simple_drop_lockres(osb, &osb->osb_super_lockres); | 3280 | ocfs2_simple_drop_lockres(osb, &osb->osb_super_lockres); |
3278 | ocfs2_simple_drop_lockres(osb, &osb->osb_rename_lockres); | 3281 | ocfs2_simple_drop_lockres(osb, &osb->osb_rename_lockres); |
3279 | ocfs2_simple_drop_lockres(osb, &osb->osb_nfs_sync_lockres); | 3282 | ocfs2_simple_drop_lockres(osb, &osb->osb_nfs_sync_lockres); |
3280 | ocfs2_simple_drop_lockres(osb, &osb->osb_orphan_scan.os_lockres); | 3283 | ocfs2_simple_drop_lockres(osb, &osb->osb_orphan_scan.os_lockres); |
3281 | } | 3284 | } |
3282 | 3285 | ||
3283 | int ocfs2_drop_inode_locks(struct inode *inode) | 3286 | int ocfs2_drop_inode_locks(struct inode *inode) |
3284 | { | 3287 | { |
3285 | int status, err; | 3288 | int status, err; |
3286 | 3289 | ||
3287 | mlog_entry_void(); | 3290 | mlog_entry_void(); |
3288 | 3291 | ||
3289 | /* No need to call ocfs2_mark_lockres_freeing here - | 3292 | /* No need to call ocfs2_mark_lockres_freeing here - |
3290 | * ocfs2_clear_inode has done it for us. */ | 3293 | * ocfs2_clear_inode has done it for us. */ |
3291 | 3294 | ||
3292 | err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb), | 3295 | err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb), |
3293 | &OCFS2_I(inode)->ip_open_lockres); | 3296 | &OCFS2_I(inode)->ip_open_lockres); |
3294 | if (err < 0) | 3297 | if (err < 0) |
3295 | mlog_errno(err); | 3298 | mlog_errno(err); |
3296 | 3299 | ||
3297 | status = err; | 3300 | status = err; |
3298 | 3301 | ||
3299 | err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb), | 3302 | err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb), |
3300 | &OCFS2_I(inode)->ip_inode_lockres); | 3303 | &OCFS2_I(inode)->ip_inode_lockres); |
3301 | if (err < 0) | 3304 | if (err < 0) |
3302 | mlog_errno(err); | 3305 | mlog_errno(err); |
3303 | if (err < 0 && !status) | 3306 | if (err < 0 && !status) |
3304 | status = err; | 3307 | status = err; |
3305 | 3308 | ||
3306 | err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb), | 3309 | err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb), |
3307 | &OCFS2_I(inode)->ip_rw_lockres); | 3310 | &OCFS2_I(inode)->ip_rw_lockres); |
3308 | if (err < 0) | 3311 | if (err < 0) |
3309 | mlog_errno(err); | 3312 | mlog_errno(err); |
3310 | if (err < 0 && !status) | 3313 | if (err < 0 && !status) |
3311 | status = err; | 3314 | status = err; |
3312 | 3315 | ||
3313 | mlog_exit(status); | 3316 | mlog_exit(status); |
3314 | return status; | 3317 | return status; |
3315 | } | 3318 | } |
3316 | 3319 | ||
3317 | static unsigned int ocfs2_prepare_downconvert(struct ocfs2_lock_res *lockres, | 3320 | static unsigned int ocfs2_prepare_downconvert(struct ocfs2_lock_res *lockres, |
3318 | int new_level) | 3321 | int new_level) |
3319 | { | 3322 | { |
3320 | assert_spin_locked(&lockres->l_lock); | 3323 | assert_spin_locked(&lockres->l_lock); |
3321 | 3324 | ||
3322 | BUG_ON(lockres->l_blocking <= DLM_LOCK_NL); | 3325 | BUG_ON(lockres->l_blocking <= DLM_LOCK_NL); |
3323 | 3326 | ||
3324 | if (lockres->l_level <= new_level) { | 3327 | if (lockres->l_level <= new_level) { |
3325 | mlog(ML_ERROR, "lockres %s, lvl %d <= %d, blcklst %d, mask %d, " | 3328 | mlog(ML_ERROR, "lockres %s, lvl %d <= %d, blcklst %d, mask %d, " |
3326 | "type %d, flags 0x%lx, hold %d %d, act %d %d, req %d, " | 3329 | "type %d, flags 0x%lx, hold %d %d, act %d %d, req %d, " |
3327 | "block %d, pgen %d\n", lockres->l_name, lockres->l_level, | 3330 | "block %d, pgen %d\n", lockres->l_name, lockres->l_level, |
3328 | new_level, list_empty(&lockres->l_blocked_list), | 3331 | new_level, list_empty(&lockres->l_blocked_list), |
3329 | list_empty(&lockres->l_mask_waiters), lockres->l_type, | 3332 | list_empty(&lockres->l_mask_waiters), lockres->l_type, |
3330 | lockres->l_flags, lockres->l_ro_holders, | 3333 | lockres->l_flags, lockres->l_ro_holders, |
3331 | lockres->l_ex_holders, lockres->l_action, | 3334 | lockres->l_ex_holders, lockres->l_action, |
3332 | lockres->l_unlock_action, lockres->l_requested, | 3335 | lockres->l_unlock_action, lockres->l_requested, |
3333 | lockres->l_blocking, lockres->l_pending_gen); | 3336 | lockres->l_blocking, lockres->l_pending_gen); |
3334 | BUG(); | 3337 | BUG(); |
3335 | } | 3338 | } |
3336 | 3339 | ||
3337 | mlog(ML_BASTS, "lockres %s, level %d => %d, blocking %d\n", | 3340 | mlog(ML_BASTS, "lockres %s, level %d => %d, blocking %d\n", |
3338 | lockres->l_name, lockres->l_level, new_level, lockres->l_blocking); | 3341 | lockres->l_name, lockres->l_level, new_level, lockres->l_blocking); |
3339 | 3342 | ||
3340 | lockres->l_action = OCFS2_AST_DOWNCONVERT; | 3343 | lockres->l_action = OCFS2_AST_DOWNCONVERT; |
3341 | lockres->l_requested = new_level; | 3344 | lockres->l_requested = new_level; |
3342 | lockres_or_flags(lockres, OCFS2_LOCK_BUSY); | 3345 | lockres_or_flags(lockres, OCFS2_LOCK_BUSY); |
3343 | return lockres_set_pending(lockres); | 3346 | return lockres_set_pending(lockres); |
3344 | } | 3347 | } |
3345 | 3348 | ||
3346 | static int ocfs2_downconvert_lock(struct ocfs2_super *osb, | 3349 | static int ocfs2_downconvert_lock(struct ocfs2_super *osb, |
3347 | struct ocfs2_lock_res *lockres, | 3350 | struct ocfs2_lock_res *lockres, |
3348 | int new_level, | 3351 | int new_level, |
3349 | int lvb, | 3352 | int lvb, |
3350 | unsigned int generation) | 3353 | unsigned int generation) |
3351 | { | 3354 | { |
3352 | int ret; | 3355 | int ret; |
3353 | u32 dlm_flags = DLM_LKF_CONVERT; | 3356 | u32 dlm_flags = DLM_LKF_CONVERT; |
3354 | 3357 | ||
3355 | mlog_entry_void(); | 3358 | mlog_entry_void(); |
3356 | 3359 | ||
3357 | mlog(ML_BASTS, "lockres %s, level %d => %d\n", lockres->l_name, | 3360 | mlog(ML_BASTS, "lockres %s, level %d => %d\n", lockres->l_name, |
3358 | lockres->l_level, new_level); | 3361 | lockres->l_level, new_level); |
3359 | 3362 | ||
3360 | if (lvb) | 3363 | if (lvb) |
3361 | dlm_flags |= DLM_LKF_VALBLK; | 3364 | dlm_flags |= DLM_LKF_VALBLK; |
3362 | 3365 | ||
3363 | ret = ocfs2_dlm_lock(osb->cconn, | 3366 | ret = ocfs2_dlm_lock(osb->cconn, |
3364 | new_level, | 3367 | new_level, |
3365 | &lockres->l_lksb, | 3368 | &lockres->l_lksb, |
3366 | dlm_flags, | 3369 | dlm_flags, |
3367 | lockres->l_name, | 3370 | lockres->l_name, |
3368 | OCFS2_LOCK_ID_MAX_LEN - 1); | 3371 | OCFS2_LOCK_ID_MAX_LEN - 1); |
3369 | lockres_clear_pending(lockres, generation, osb); | 3372 | lockres_clear_pending(lockres, generation, osb); |
3370 | if (ret) { | 3373 | if (ret) { |
3371 | ocfs2_log_dlm_error("ocfs2_dlm_lock", ret, lockres); | 3374 | ocfs2_log_dlm_error("ocfs2_dlm_lock", ret, lockres); |
3372 | ocfs2_recover_from_dlm_error(lockres, 1); | 3375 | ocfs2_recover_from_dlm_error(lockres, 1); |
3373 | goto bail; | 3376 | goto bail; |
3374 | } | 3377 | } |
3375 | 3378 | ||
3376 | ret = 0; | 3379 | ret = 0; |
3377 | bail: | 3380 | bail: |
3378 | mlog_exit(ret); | 3381 | mlog_exit(ret); |
3379 | return ret; | 3382 | return ret; |
3380 | } | 3383 | } |
3381 | 3384 | ||
3382 | /* returns 1 when the caller should unlock and call ocfs2_dlm_unlock */ | 3385 | /* returns 1 when the caller should unlock and call ocfs2_dlm_unlock */ |
3383 | static int ocfs2_prepare_cancel_convert(struct ocfs2_super *osb, | 3386 | static int ocfs2_prepare_cancel_convert(struct ocfs2_super *osb, |
3384 | struct ocfs2_lock_res *lockres) | 3387 | struct ocfs2_lock_res *lockres) |
3385 | { | 3388 | { |
3386 | assert_spin_locked(&lockres->l_lock); | 3389 | assert_spin_locked(&lockres->l_lock); |
3387 | 3390 | ||
3388 | mlog_entry_void(); | 3391 | mlog_entry_void(); |
3389 | 3392 | ||
3390 | if (lockres->l_unlock_action == OCFS2_UNLOCK_CANCEL_CONVERT) { | 3393 | if (lockres->l_unlock_action == OCFS2_UNLOCK_CANCEL_CONVERT) { |
3391 | /* If we're already trying to cancel a lock conversion | 3394 | /* If we're already trying to cancel a lock conversion |
3392 | * then just drop the spinlock and allow the caller to | 3395 | * then just drop the spinlock and allow the caller to |
3393 | * requeue this lock. */ | 3396 | * requeue this lock. */ |
3394 | mlog(ML_BASTS, "lockres %s, skip convert\n", lockres->l_name); | 3397 | mlog(ML_BASTS, "lockres %s, skip convert\n", lockres->l_name); |
3395 | return 0; | 3398 | return 0; |
3396 | } | 3399 | } |
3397 | 3400 | ||
3398 | /* were we in a convert when we got the bast fire? */ | 3401 | /* were we in a convert when we got the bast fire? */ |
3399 | BUG_ON(lockres->l_action != OCFS2_AST_CONVERT && | 3402 | BUG_ON(lockres->l_action != OCFS2_AST_CONVERT && |
3400 | lockres->l_action != OCFS2_AST_DOWNCONVERT); | 3403 | lockres->l_action != OCFS2_AST_DOWNCONVERT); |
3401 | /* set things up for the unlockast to know to just | 3404 | /* set things up for the unlockast to know to just |
3402 | * clear out the ast_action and unset busy, etc. */ | 3405 | * clear out the ast_action and unset busy, etc. */ |
3403 | lockres->l_unlock_action = OCFS2_UNLOCK_CANCEL_CONVERT; | 3406 | lockres->l_unlock_action = OCFS2_UNLOCK_CANCEL_CONVERT; |
3404 | 3407 | ||
3405 | mlog_bug_on_msg(!(lockres->l_flags & OCFS2_LOCK_BUSY), | 3408 | mlog_bug_on_msg(!(lockres->l_flags & OCFS2_LOCK_BUSY), |
3406 | "lock %s, invalid flags: 0x%lx\n", | 3409 | "lock %s, invalid flags: 0x%lx\n", |
3407 | lockres->l_name, lockres->l_flags); | 3410 | lockres->l_name, lockres->l_flags); |
3408 | 3411 | ||
3409 | mlog(ML_BASTS, "lockres %s\n", lockres->l_name); | 3412 | mlog(ML_BASTS, "lockres %s\n", lockres->l_name); |
3410 | 3413 | ||
3411 | return 1; | 3414 | return 1; |
3412 | } | 3415 | } |
3413 | 3416 | ||
3414 | static int ocfs2_cancel_convert(struct ocfs2_super *osb, | 3417 | static int ocfs2_cancel_convert(struct ocfs2_super *osb, |
3415 | struct ocfs2_lock_res *lockres) | 3418 | struct ocfs2_lock_res *lockres) |
3416 | { | 3419 | { |
3417 | int ret; | 3420 | int ret; |
3418 | 3421 | ||
3419 | mlog_entry_void(); | 3422 | mlog_entry_void(); |
3420 | 3423 | ||
3421 | ret = ocfs2_dlm_unlock(osb->cconn, &lockres->l_lksb, | 3424 | ret = ocfs2_dlm_unlock(osb->cconn, &lockres->l_lksb, |
3422 | DLM_LKF_CANCEL); | 3425 | DLM_LKF_CANCEL); |
3423 | if (ret) { | 3426 | if (ret) { |
3424 | ocfs2_log_dlm_error("ocfs2_dlm_unlock", ret, lockres); | 3427 | ocfs2_log_dlm_error("ocfs2_dlm_unlock", ret, lockres); |
3425 | ocfs2_recover_from_dlm_error(lockres, 0); | 3428 | ocfs2_recover_from_dlm_error(lockres, 0); |
3426 | } | 3429 | } |
3427 | 3430 | ||
3428 | mlog(ML_BASTS, "lockres %s\n", lockres->l_name); | 3431 | mlog(ML_BASTS, "lockres %s\n", lockres->l_name); |
3429 | 3432 | ||
3430 | mlog_exit(ret); | 3433 | mlog_exit(ret); |
3431 | return ret; | 3434 | return ret; |
3432 | } | 3435 | } |
3433 | 3436 | ||
3434 | static int ocfs2_unblock_lock(struct ocfs2_super *osb, | 3437 | static int ocfs2_unblock_lock(struct ocfs2_super *osb, |
3435 | struct ocfs2_lock_res *lockres, | 3438 | struct ocfs2_lock_res *lockres, |
3436 | struct ocfs2_unblock_ctl *ctl) | 3439 | struct ocfs2_unblock_ctl *ctl) |
3437 | { | 3440 | { |
3438 | unsigned long flags; | 3441 | unsigned long flags; |
3439 | int blocking; | 3442 | int blocking; |
3440 | int new_level; | 3443 | int new_level; |
3441 | int level; | 3444 | int level; |
3442 | int ret = 0; | 3445 | int ret = 0; |
3443 | int set_lvb = 0; | 3446 | int set_lvb = 0; |
3444 | unsigned int gen; | 3447 | unsigned int gen; |
3445 | 3448 | ||
3446 | mlog_entry_void(); | 3449 | mlog_entry_void(); |
3447 | 3450 | ||
3448 | spin_lock_irqsave(&lockres->l_lock, flags); | 3451 | spin_lock_irqsave(&lockres->l_lock, flags); |
3449 | 3452 | ||
3450 | recheck: | 3453 | recheck: |
3451 | /* | 3454 | /* |
3452 | * Is it still blocking? If not, we have no more work to do. | 3455 | * Is it still blocking? If not, we have no more work to do. |
3453 | */ | 3456 | */ |
3454 | if (!(lockres->l_flags & OCFS2_LOCK_BLOCKED)) { | 3457 | if (!(lockres->l_flags & OCFS2_LOCK_BLOCKED)) { |
3455 | BUG_ON(lockres->l_blocking != DLM_LOCK_NL); | 3458 | BUG_ON(lockres->l_blocking != DLM_LOCK_NL); |
3456 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 3459 | spin_unlock_irqrestore(&lockres->l_lock, flags); |
3457 | ret = 0; | 3460 | ret = 0; |
3458 | goto leave; | 3461 | goto leave; |
3459 | } | 3462 | } |
3460 | 3463 | ||
3461 | if (lockres->l_flags & OCFS2_LOCK_BUSY) { | 3464 | if (lockres->l_flags & OCFS2_LOCK_BUSY) { |
3462 | /* XXX | 3465 | /* XXX |
3463 | * This is a *big* race. The OCFS2_LOCK_PENDING flag | 3466 | * This is a *big* race. The OCFS2_LOCK_PENDING flag |
3464 | * exists entirely for one reason - another thread has set | 3467 | * exists entirely for one reason - another thread has set |
3465 | * OCFS2_LOCK_BUSY, but has *NOT* yet called dlm_lock(). | 3468 | * OCFS2_LOCK_BUSY, but has *NOT* yet called dlm_lock(). |
3466 | * | 3469 | * |
3467 | * If we do ocfs2_cancel_convert() before the other thread | 3470 | * If we do ocfs2_cancel_convert() before the other thread |
3468 | * calls dlm_lock(), our cancel will do nothing. We will | 3471 | * calls dlm_lock(), our cancel will do nothing. We will |
3469 | * get no ast, and we will have no way of knowing the | 3472 | * get no ast, and we will have no way of knowing the |
3470 | * cancel failed. Meanwhile, the other thread will call | 3473 | * cancel failed. Meanwhile, the other thread will call |
3471 | * into dlm_lock() and wait...forever. | 3474 | * into dlm_lock() and wait...forever. |
3472 | * | 3475 | * |
3473 | * Why forever? Because another node has asked for the | 3476 | * Why forever? Because another node has asked for the |
3474 | * lock first; that's why we're here in unblock_lock(). | 3477 | * lock first; that's why we're here in unblock_lock(). |
3475 | * | 3478 | * |
3476 | * The solution is OCFS2_LOCK_PENDING. When PENDING is | 3479 | * The solution is OCFS2_LOCK_PENDING. When PENDING is |
3477 | * set, we just requeue the unblock. Only when the other | 3480 | * set, we just requeue the unblock. Only when the other |
3478 | * thread has called dlm_lock() and cleared PENDING will | 3481 | * thread has called dlm_lock() and cleared PENDING will |
3479 | * we then cancel their request. | 3482 | * we then cancel their request. |
3480 | * | 3483 | * |
3481 | * All callers of dlm_lock() must set OCFS2_DLM_PENDING | 3484 | * All callers of dlm_lock() must set OCFS2_DLM_PENDING |
3482 | * at the same time they set OCFS2_DLM_BUSY. They must | 3485 | * at the same time they set OCFS2_DLM_BUSY. They must |
3483 | * clear OCFS2_DLM_PENDING after dlm_lock() returns. | 3486 | * clear OCFS2_DLM_PENDING after dlm_lock() returns. |
3484 | */ | 3487 | */ |
3485 | if (lockres->l_flags & OCFS2_LOCK_PENDING) { | 3488 | if (lockres->l_flags & OCFS2_LOCK_PENDING) { |
3486 | mlog(ML_BASTS, "lockres %s, ReQ: Pending\n", | 3489 | mlog(ML_BASTS, "lockres %s, ReQ: Pending\n", |
3487 | lockres->l_name); | 3490 | lockres->l_name); |
3488 | goto leave_requeue; | 3491 | goto leave_requeue; |
3489 | } | 3492 | } |
3490 | 3493 | ||
3491 | ctl->requeue = 1; | 3494 | ctl->requeue = 1; |
3492 | ret = ocfs2_prepare_cancel_convert(osb, lockres); | 3495 | ret = ocfs2_prepare_cancel_convert(osb, lockres); |
3493 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 3496 | spin_unlock_irqrestore(&lockres->l_lock, flags); |
3494 | if (ret) { | 3497 | if (ret) { |
3495 | ret = ocfs2_cancel_convert(osb, lockres); | 3498 | ret = ocfs2_cancel_convert(osb, lockres); |
3496 | if (ret < 0) | 3499 | if (ret < 0) |
3497 | mlog_errno(ret); | 3500 | mlog_errno(ret); |
3498 | } | 3501 | } |
3499 | goto leave; | 3502 | goto leave; |
3500 | } | 3503 | } |
3501 | 3504 | ||
3502 | /* | 3505 | /* |
3503 | * This prevents livelocks. OCFS2_LOCK_UPCONVERT_FINISHING flag is | 3506 | * This prevents livelocks. OCFS2_LOCK_UPCONVERT_FINISHING flag is |
3504 | * set when the ast is received for an upconvert just before the | 3507 | * set when the ast is received for an upconvert just before the |
3505 | * OCFS2_LOCK_BUSY flag is cleared. Now if the fs received a bast | 3508 | * OCFS2_LOCK_BUSY flag is cleared. Now if the fs received a bast |
3506 | * on the heels of the ast, we want to delay the downconvert just | 3509 | * on the heels of the ast, we want to delay the downconvert just |
3507 | * enough to allow the up requestor to do its task. Because this | 3510 | * enough to allow the up requestor to do its task. Because this |
3508 | * lock is in the blocked queue, the lock will be downconverted | 3511 | * lock is in the blocked queue, the lock will be downconverted |
3509 | * as soon as the requestor is done with the lock. | 3512 | * as soon as the requestor is done with the lock. |
3510 | */ | 3513 | */ |
3511 | if (lockres->l_flags & OCFS2_LOCK_UPCONVERT_FINISHING) | 3514 | if (lockres->l_flags & OCFS2_LOCK_UPCONVERT_FINISHING) |
3512 | goto leave_requeue; | 3515 | goto leave_requeue; |
3513 | 3516 | ||
3514 | /* | 3517 | /* |
3515 | * How can we block and yet be at NL? We were trying to upconvert | 3518 | * How can we block and yet be at NL? We were trying to upconvert |
3516 | * from NL and got canceled. The code comes back here, and now | 3519 | * from NL and got canceled. The code comes back here, and now |
3517 | * we notice and clear BLOCKING. | 3520 | * we notice and clear BLOCKING. |
3518 | */ | 3521 | */ |
3519 | if (lockres->l_level == DLM_LOCK_NL) { | 3522 | if (lockres->l_level == DLM_LOCK_NL) { |
3520 | BUG_ON(lockres->l_ex_holders || lockres->l_ro_holders); | 3523 | BUG_ON(lockres->l_ex_holders || lockres->l_ro_holders); |
3521 | mlog(ML_BASTS, "lockres %s, Aborting dc\n", lockres->l_name); | 3524 | mlog(ML_BASTS, "lockres %s, Aborting dc\n", lockres->l_name); |
3522 | lockres->l_blocking = DLM_LOCK_NL; | 3525 | lockres->l_blocking = DLM_LOCK_NL; |
3523 | lockres_clear_flags(lockres, OCFS2_LOCK_BLOCKED); | 3526 | lockres_clear_flags(lockres, OCFS2_LOCK_BLOCKED); |
3524 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 3527 | spin_unlock_irqrestore(&lockres->l_lock, flags); |
3525 | goto leave; | 3528 | goto leave; |
3526 | } | 3529 | } |
3527 | 3530 | ||
3528 | /* if we're blocking an exclusive and we have *any* holders, | 3531 | /* if we're blocking an exclusive and we have *any* holders, |
3529 | * then requeue. */ | 3532 | * then requeue. */ |
3530 | if ((lockres->l_blocking == DLM_LOCK_EX) | 3533 | if ((lockres->l_blocking == DLM_LOCK_EX) |
3531 | && (lockres->l_ex_holders || lockres->l_ro_holders)) { | 3534 | && (lockres->l_ex_holders || lockres->l_ro_holders)) { |
3532 | mlog(ML_BASTS, "lockres %s, ReQ: EX/PR Holders %u,%u\n", | 3535 | mlog(ML_BASTS, "lockres %s, ReQ: EX/PR Holders %u,%u\n", |
3533 | lockres->l_name, lockres->l_ex_holders, | 3536 | lockres->l_name, lockres->l_ex_holders, |
3534 | lockres->l_ro_holders); | 3537 | lockres->l_ro_holders); |
3535 | goto leave_requeue; | 3538 | goto leave_requeue; |
3536 | } | 3539 | } |
3537 | 3540 | ||
3538 | /* If it's a PR we're blocking, then only | 3541 | /* If it's a PR we're blocking, then only |
3539 | * requeue if we've got any EX holders */ | 3542 | * requeue if we've got any EX holders */ |
3540 | if (lockres->l_blocking == DLM_LOCK_PR && | 3543 | if (lockres->l_blocking == DLM_LOCK_PR && |
3541 | lockres->l_ex_holders) { | 3544 | lockres->l_ex_holders) { |
3542 | mlog(ML_BASTS, "lockres %s, ReQ: EX Holders %u\n", | 3545 | mlog(ML_BASTS, "lockres %s, ReQ: EX Holders %u\n", |
3543 | lockres->l_name, lockres->l_ex_holders); | 3546 | lockres->l_name, lockres->l_ex_holders); |
3544 | goto leave_requeue; | 3547 | goto leave_requeue; |
3545 | } | 3548 | } |
3546 | 3549 | ||
3547 | /* | 3550 | /* |
3548 | * Can we get a lock in this state if the holder counts are | 3551 | * Can we get a lock in this state if the holder counts are |
3549 | * zero? The meta data unblock code used to check this. | 3552 | * zero? The meta data unblock code used to check this. |
3550 | */ | 3553 | */ |
3551 | if ((lockres->l_ops->flags & LOCK_TYPE_REQUIRES_REFRESH) | 3554 | if ((lockres->l_ops->flags & LOCK_TYPE_REQUIRES_REFRESH) |
3552 | && (lockres->l_flags & OCFS2_LOCK_REFRESHING)) { | 3555 | && (lockres->l_flags & OCFS2_LOCK_REFRESHING)) { |
3553 | mlog(ML_BASTS, "lockres %s, ReQ: Lock Refreshing\n", | 3556 | mlog(ML_BASTS, "lockres %s, ReQ: Lock Refreshing\n", |
3554 | lockres->l_name); | 3557 | lockres->l_name); |
3555 | goto leave_requeue; | 3558 | goto leave_requeue; |
3556 | } | 3559 | } |
3557 | 3560 | ||
3558 | new_level = ocfs2_highest_compat_lock_level(lockres->l_blocking); | 3561 | new_level = ocfs2_highest_compat_lock_level(lockres->l_blocking); |
3559 | 3562 | ||
3560 | if (lockres->l_ops->check_downconvert | 3563 | if (lockres->l_ops->check_downconvert |
3561 | && !lockres->l_ops->check_downconvert(lockres, new_level)) { | 3564 | && !lockres->l_ops->check_downconvert(lockres, new_level)) { |
3562 | mlog(ML_BASTS, "lockres %s, ReQ: Checkpointing\n", | 3565 | mlog(ML_BASTS, "lockres %s, ReQ: Checkpointing\n", |
3563 | lockres->l_name); | 3566 | lockres->l_name); |
3564 | goto leave_requeue; | 3567 | goto leave_requeue; |
3565 | } | 3568 | } |
3566 | 3569 | ||
3567 | /* If we get here, then we know that there are no more | 3570 | /* If we get here, then we know that there are no more |
3568 | * incompatible holders (and anyone asking for an incompatible | 3571 | * incompatible holders (and anyone asking for an incompatible |
3569 | * lock is blocked). We can now downconvert the lock */ | 3572 | * lock is blocked). We can now downconvert the lock */ |
3570 | if (!lockres->l_ops->downconvert_worker) | 3573 | if (!lockres->l_ops->downconvert_worker) |
3571 | goto downconvert; | 3574 | goto downconvert; |
3572 | 3575 | ||
3573 | /* Some lockres types want to do a bit of work before | 3576 | /* Some lockres types want to do a bit of work before |
3574 | * downconverting a lock. Allow that here. The worker function | 3577 | * downconverting a lock. Allow that here. The worker function |
3575 | * may sleep, so we save off a copy of what we're blocking as | 3578 | * may sleep, so we save off a copy of what we're blocking as |
3576 | * it may change while we're not holding the spin lock. */ | 3579 | * it may change while we're not holding the spin lock. */ |
3577 | blocking = lockres->l_blocking; | 3580 | blocking = lockres->l_blocking; |
3578 | level = lockres->l_level; | 3581 | level = lockres->l_level; |
3579 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 3582 | spin_unlock_irqrestore(&lockres->l_lock, flags); |
3580 | 3583 | ||
3581 | ctl->unblock_action = lockres->l_ops->downconvert_worker(lockres, blocking); | 3584 | ctl->unblock_action = lockres->l_ops->downconvert_worker(lockres, blocking); |
3582 | 3585 | ||
3583 | if (ctl->unblock_action == UNBLOCK_STOP_POST) { | 3586 | if (ctl->unblock_action == UNBLOCK_STOP_POST) { |
3584 | mlog(ML_BASTS, "lockres %s, UNBLOCK_STOP_POST\n", | 3587 | mlog(ML_BASTS, "lockres %s, UNBLOCK_STOP_POST\n", |
3585 | lockres->l_name); | 3588 | lockres->l_name); |
3586 | goto leave; | 3589 | goto leave; |
3587 | } | 3590 | } |
3588 | 3591 | ||
3589 | spin_lock_irqsave(&lockres->l_lock, flags); | 3592 | spin_lock_irqsave(&lockres->l_lock, flags); |
3590 | if ((blocking != lockres->l_blocking) || (level != lockres->l_level)) { | 3593 | if ((blocking != lockres->l_blocking) || (level != lockres->l_level)) { |
3591 | /* If this changed underneath us, then we can't drop | 3594 | /* If this changed underneath us, then we can't drop |
3592 | * it just yet. */ | 3595 | * it just yet. */ |
3593 | mlog(ML_BASTS, "lockres %s, block=%d:%d, level=%d:%d, " | 3596 | mlog(ML_BASTS, "lockres %s, block=%d:%d, level=%d:%d, " |
3594 | "Recheck\n", lockres->l_name, blocking, | 3597 | "Recheck\n", lockres->l_name, blocking, |
3595 | lockres->l_blocking, level, lockres->l_level); | 3598 | lockres->l_blocking, level, lockres->l_level); |
3596 | goto recheck; | 3599 | goto recheck; |
3597 | } | 3600 | } |
3598 | 3601 | ||
3599 | downconvert: | 3602 | downconvert: |
3600 | ctl->requeue = 0; | 3603 | ctl->requeue = 0; |
3601 | 3604 | ||
3602 | if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB) { | 3605 | if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB) { |
3603 | if (lockres->l_level == DLM_LOCK_EX) | 3606 | if (lockres->l_level == DLM_LOCK_EX) |
3604 | set_lvb = 1; | 3607 | set_lvb = 1; |
3605 | 3608 | ||
3606 | /* | 3609 | /* |
3607 | * We only set the lvb if the lock has been fully | 3610 | * We only set the lvb if the lock has been fully |
3608 | * refreshed - otherwise we risk setting stale | 3611 | * refreshed - otherwise we risk setting stale |
3609 | * data. Otherwise, there's no need to actually clear | 3612 | * data. Otherwise, there's no need to actually clear |
3610 | * out the lvb here as it's value is still valid. | 3613 | * out the lvb here as it's value is still valid. |
3611 | */ | 3614 | */ |
3612 | if (set_lvb && !(lockres->l_flags & OCFS2_LOCK_NEEDS_REFRESH)) | 3615 | if (set_lvb && !(lockres->l_flags & OCFS2_LOCK_NEEDS_REFRESH)) |
3613 | lockres->l_ops->set_lvb(lockres); | 3616 | lockres->l_ops->set_lvb(lockres); |
3614 | } | 3617 | } |
3615 | 3618 | ||
3616 | gen = ocfs2_prepare_downconvert(lockres, new_level); | 3619 | gen = ocfs2_prepare_downconvert(lockres, new_level); |
3617 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 3620 | spin_unlock_irqrestore(&lockres->l_lock, flags); |
3618 | ret = ocfs2_downconvert_lock(osb, lockres, new_level, set_lvb, | 3621 | ret = ocfs2_downconvert_lock(osb, lockres, new_level, set_lvb, |
3619 | gen); | 3622 | gen); |
3620 | 3623 | ||
3621 | leave: | 3624 | leave: |
3622 | mlog_exit(ret); | 3625 | mlog_exit(ret); |
3623 | return ret; | 3626 | return ret; |
3624 | 3627 | ||
3625 | leave_requeue: | 3628 | leave_requeue: |
3626 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 3629 | spin_unlock_irqrestore(&lockres->l_lock, flags); |
3627 | ctl->requeue = 1; | 3630 | ctl->requeue = 1; |
3628 | 3631 | ||
3629 | mlog_exit(0); | 3632 | mlog_exit(0); |
3630 | return 0; | 3633 | return 0; |
3631 | } | 3634 | } |
3632 | 3635 | ||
3633 | static int ocfs2_data_convert_worker(struct ocfs2_lock_res *lockres, | 3636 | static int ocfs2_data_convert_worker(struct ocfs2_lock_res *lockres, |
3634 | int blocking) | 3637 | int blocking) |
3635 | { | 3638 | { |
3636 | struct inode *inode; | 3639 | struct inode *inode; |
3637 | struct address_space *mapping; | 3640 | struct address_space *mapping; |
3638 | struct ocfs2_inode_info *oi; | 3641 | struct ocfs2_inode_info *oi; |
3639 | 3642 | ||
3640 | inode = ocfs2_lock_res_inode(lockres); | 3643 | inode = ocfs2_lock_res_inode(lockres); |
3641 | mapping = inode->i_mapping; | 3644 | mapping = inode->i_mapping; |
3642 | 3645 | ||
3643 | if (S_ISDIR(inode->i_mode)) { | 3646 | if (S_ISDIR(inode->i_mode)) { |
3644 | oi = OCFS2_I(inode); | 3647 | oi = OCFS2_I(inode); |
3645 | oi->ip_dir_lock_gen++; | 3648 | oi->ip_dir_lock_gen++; |
3646 | mlog(0, "generation: %u\n", oi->ip_dir_lock_gen); | 3649 | mlog(0, "generation: %u\n", oi->ip_dir_lock_gen); |
3647 | goto out; | 3650 | goto out; |
3648 | } | 3651 | } |
3649 | 3652 | ||
3650 | if (!S_ISREG(inode->i_mode)) | 3653 | if (!S_ISREG(inode->i_mode)) |
3651 | goto out; | 3654 | goto out; |
3652 | 3655 | ||
3653 | /* | 3656 | /* |
3654 | * We need this before the filemap_fdatawrite() so that it can | 3657 | * We need this before the filemap_fdatawrite() so that it can |
3655 | * transfer the dirty bit from the PTE to the | 3658 | * transfer the dirty bit from the PTE to the |
3656 | * page. Unfortunately this means that even for EX->PR | 3659 | * page. Unfortunately this means that even for EX->PR |
3657 | * downconverts, we'll lose our mappings and have to build | 3660 | * downconverts, we'll lose our mappings and have to build |
3658 | * them up again. | 3661 | * them up again. |
3659 | */ | 3662 | */ |
3660 | unmap_mapping_range(mapping, 0, 0, 0); | 3663 | unmap_mapping_range(mapping, 0, 0, 0); |
3661 | 3664 | ||
3662 | if (filemap_fdatawrite(mapping)) { | 3665 | if (filemap_fdatawrite(mapping)) { |
3663 | mlog(ML_ERROR, "Could not sync inode %llu for downconvert!", | 3666 | mlog(ML_ERROR, "Could not sync inode %llu for downconvert!", |
3664 | (unsigned long long)OCFS2_I(inode)->ip_blkno); | 3667 | (unsigned long long)OCFS2_I(inode)->ip_blkno); |
3665 | } | 3668 | } |
3666 | sync_mapping_buffers(mapping); | 3669 | sync_mapping_buffers(mapping); |
3667 | if (blocking == DLM_LOCK_EX) { | 3670 | if (blocking == DLM_LOCK_EX) { |
3668 | truncate_inode_pages(mapping, 0); | 3671 | truncate_inode_pages(mapping, 0); |
3669 | } else { | 3672 | } else { |
3670 | /* We only need to wait on the I/O if we're not also | 3673 | /* We only need to wait on the I/O if we're not also |
3671 | * truncating pages because truncate_inode_pages waits | 3674 | * truncating pages because truncate_inode_pages waits |
3672 | * for us above. We don't truncate pages if we're | 3675 | * for us above. We don't truncate pages if we're |
3673 | * blocking anything < EXMODE because we want to keep | 3676 | * blocking anything < EXMODE because we want to keep |
3674 | * them around in that case. */ | 3677 | * them around in that case. */ |
3675 | filemap_fdatawait(mapping); | 3678 | filemap_fdatawait(mapping); |
3676 | } | 3679 | } |
3677 | 3680 | ||
3678 | out: | 3681 | out: |
3679 | return UNBLOCK_CONTINUE; | 3682 | return UNBLOCK_CONTINUE; |
3680 | } | 3683 | } |
3681 | 3684 | ||
3682 | static int ocfs2_ci_checkpointed(struct ocfs2_caching_info *ci, | 3685 | static int ocfs2_ci_checkpointed(struct ocfs2_caching_info *ci, |
3683 | struct ocfs2_lock_res *lockres, | 3686 | struct ocfs2_lock_res *lockres, |
3684 | int new_level) | 3687 | int new_level) |
3685 | { | 3688 | { |
3686 | int checkpointed = ocfs2_ci_fully_checkpointed(ci); | 3689 | int checkpointed = ocfs2_ci_fully_checkpointed(ci); |
3687 | 3690 | ||
3688 | BUG_ON(new_level != DLM_LOCK_NL && new_level != DLM_LOCK_PR); | 3691 | BUG_ON(new_level != DLM_LOCK_NL && new_level != DLM_LOCK_PR); |
3689 | BUG_ON(lockres->l_level != DLM_LOCK_EX && !checkpointed); | 3692 | BUG_ON(lockres->l_level != DLM_LOCK_EX && !checkpointed); |
3690 | 3693 | ||
3691 | if (checkpointed) | 3694 | if (checkpointed) |
3692 | return 1; | 3695 | return 1; |
3693 | 3696 | ||
3694 | ocfs2_start_checkpoint(OCFS2_SB(ocfs2_metadata_cache_get_super(ci))); | 3697 | ocfs2_start_checkpoint(OCFS2_SB(ocfs2_metadata_cache_get_super(ci))); |
3695 | return 0; | 3698 | return 0; |
3696 | } | 3699 | } |
3697 | 3700 | ||
3698 | static int ocfs2_check_meta_downconvert(struct ocfs2_lock_res *lockres, | 3701 | static int ocfs2_check_meta_downconvert(struct ocfs2_lock_res *lockres, |
3699 | int new_level) | 3702 | int new_level) |
3700 | { | 3703 | { |
3701 | struct inode *inode = ocfs2_lock_res_inode(lockres); | 3704 | struct inode *inode = ocfs2_lock_res_inode(lockres); |
3702 | 3705 | ||
3703 | return ocfs2_ci_checkpointed(INODE_CACHE(inode), lockres, new_level); | 3706 | return ocfs2_ci_checkpointed(INODE_CACHE(inode), lockres, new_level); |
3704 | } | 3707 | } |
3705 | 3708 | ||
3706 | static void ocfs2_set_meta_lvb(struct ocfs2_lock_res *lockres) | 3709 | static void ocfs2_set_meta_lvb(struct ocfs2_lock_res *lockres) |
3707 | { | 3710 | { |
3708 | struct inode *inode = ocfs2_lock_res_inode(lockres); | 3711 | struct inode *inode = ocfs2_lock_res_inode(lockres); |
3709 | 3712 | ||
3710 | __ocfs2_stuff_meta_lvb(inode); | 3713 | __ocfs2_stuff_meta_lvb(inode); |
3711 | } | 3714 | } |
3712 | 3715 | ||
3713 | /* | 3716 | /* |
3714 | * Does the final reference drop on our dentry lock. Right now this | 3717 | * Does the final reference drop on our dentry lock. Right now this |
3715 | * happens in the downconvert thread, but we could choose to simplify the | 3718 | * happens in the downconvert thread, but we could choose to simplify the |
3716 | * dlmglue API and push these off to the ocfs2_wq in the future. | 3719 | * dlmglue API and push these off to the ocfs2_wq in the future. |
3717 | */ | 3720 | */ |
3718 | static void ocfs2_dentry_post_unlock(struct ocfs2_super *osb, | 3721 | static void ocfs2_dentry_post_unlock(struct ocfs2_super *osb, |
3719 | struct ocfs2_lock_res *lockres) | 3722 | struct ocfs2_lock_res *lockres) |
3720 | { | 3723 | { |
3721 | struct ocfs2_dentry_lock *dl = ocfs2_lock_res_dl(lockres); | 3724 | struct ocfs2_dentry_lock *dl = ocfs2_lock_res_dl(lockres); |
3722 | ocfs2_dentry_lock_put(osb, dl); | 3725 | ocfs2_dentry_lock_put(osb, dl); |
3723 | } | 3726 | } |
3724 | 3727 | ||
3725 | /* | 3728 | /* |
3726 | * d_delete() matching dentries before the lock downconvert. | 3729 | * d_delete() matching dentries before the lock downconvert. |
3727 | * | 3730 | * |
3728 | * At this point, any process waiting to destroy the | 3731 | * At this point, any process waiting to destroy the |
3729 | * dentry_lock due to last ref count is stopped by the | 3732 | * dentry_lock due to last ref count is stopped by the |
3730 | * OCFS2_LOCK_QUEUED flag. | 3733 | * OCFS2_LOCK_QUEUED flag. |
3731 | * | 3734 | * |
3732 | * We have two potential problems | 3735 | * We have two potential problems |
3733 | * | 3736 | * |
3734 | * 1) If we do the last reference drop on our dentry_lock (via dput) | 3737 | * 1) If we do the last reference drop on our dentry_lock (via dput) |
3735 | * we'll wind up in ocfs2_release_dentry_lock(), waiting on | 3738 | * we'll wind up in ocfs2_release_dentry_lock(), waiting on |
3736 | * the downconvert to finish. Instead we take an elevated | 3739 | * the downconvert to finish. Instead we take an elevated |
3737 | * reference and push the drop until after we've completed our | 3740 | * reference and push the drop until after we've completed our |
3738 | * unblock processing. | 3741 | * unblock processing. |
3739 | * | 3742 | * |
3740 | * 2) There might be another process with a final reference, | 3743 | * 2) There might be another process with a final reference, |
3741 | * waiting on us to finish processing. If this is the case, we | 3744 | * waiting on us to finish processing. If this is the case, we |
3742 | * detect it and exit out - there's no more dentries anyway. | 3745 | * detect it and exit out - there's no more dentries anyway. |
3743 | */ | 3746 | */ |
3744 | static int ocfs2_dentry_convert_worker(struct ocfs2_lock_res *lockres, | 3747 | static int ocfs2_dentry_convert_worker(struct ocfs2_lock_res *lockres, |
3745 | int blocking) | 3748 | int blocking) |
3746 | { | 3749 | { |
3747 | struct ocfs2_dentry_lock *dl = ocfs2_lock_res_dl(lockres); | 3750 | struct ocfs2_dentry_lock *dl = ocfs2_lock_res_dl(lockres); |
3748 | struct ocfs2_inode_info *oi = OCFS2_I(dl->dl_inode); | 3751 | struct ocfs2_inode_info *oi = OCFS2_I(dl->dl_inode); |
3749 | struct dentry *dentry; | 3752 | struct dentry *dentry; |
3750 | unsigned long flags; | 3753 | unsigned long flags; |
3751 | int extra_ref = 0; | 3754 | int extra_ref = 0; |
3752 | 3755 | ||
3753 | /* | 3756 | /* |
3754 | * This node is blocking another node from getting a read | 3757 | * This node is blocking another node from getting a read |
3755 | * lock. This happens when we've renamed within a | 3758 | * lock. This happens when we've renamed within a |
3756 | * directory. We've forced the other nodes to d_delete(), but | 3759 | * directory. We've forced the other nodes to d_delete(), but |
3757 | * we never actually dropped our lock because it's still | 3760 | * we never actually dropped our lock because it's still |
3758 | * valid. The downconvert code will retain a PR for this node, | 3761 | * valid. The downconvert code will retain a PR for this node, |
3759 | * so there's no further work to do. | 3762 | * so there's no further work to do. |
3760 | */ | 3763 | */ |
3761 | if (blocking == DLM_LOCK_PR) | 3764 | if (blocking == DLM_LOCK_PR) |
3762 | return UNBLOCK_CONTINUE; | 3765 | return UNBLOCK_CONTINUE; |
3763 | 3766 | ||
3764 | /* | 3767 | /* |
3765 | * Mark this inode as potentially orphaned. The code in | 3768 | * Mark this inode as potentially orphaned. The code in |
3766 | * ocfs2_delete_inode() will figure out whether it actually | 3769 | * ocfs2_delete_inode() will figure out whether it actually |
3767 | * needs to be freed or not. | 3770 | * needs to be freed or not. |
3768 | */ | 3771 | */ |
3769 | spin_lock(&oi->ip_lock); | 3772 | spin_lock(&oi->ip_lock); |
3770 | oi->ip_flags |= OCFS2_INODE_MAYBE_ORPHANED; | 3773 | oi->ip_flags |= OCFS2_INODE_MAYBE_ORPHANED; |
3771 | spin_unlock(&oi->ip_lock); | 3774 | spin_unlock(&oi->ip_lock); |
3772 | 3775 | ||
3773 | /* | 3776 | /* |
3774 | * Yuck. We need to make sure however that the check of | 3777 | * Yuck. We need to make sure however that the check of |
3775 | * OCFS2_LOCK_FREEING and the extra reference are atomic with | 3778 | * OCFS2_LOCK_FREEING and the extra reference are atomic with |
3776 | * respect to a reference decrement or the setting of that | 3779 | * respect to a reference decrement or the setting of that |
3777 | * flag. | 3780 | * flag. |
3778 | */ | 3781 | */ |
3779 | spin_lock_irqsave(&lockres->l_lock, flags); | 3782 | spin_lock_irqsave(&lockres->l_lock, flags); |
3780 | spin_lock(&dentry_attach_lock); | 3783 | spin_lock(&dentry_attach_lock); |
3781 | if (!(lockres->l_flags & OCFS2_LOCK_FREEING) | 3784 | if (!(lockres->l_flags & OCFS2_LOCK_FREEING) |
3782 | && dl->dl_count) { | 3785 | && dl->dl_count) { |
3783 | dl->dl_count++; | 3786 | dl->dl_count++; |
3784 | extra_ref = 1; | 3787 | extra_ref = 1; |
3785 | } | 3788 | } |
3786 | spin_unlock(&dentry_attach_lock); | 3789 | spin_unlock(&dentry_attach_lock); |
3787 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 3790 | spin_unlock_irqrestore(&lockres->l_lock, flags); |
3788 | 3791 | ||
3789 | mlog(0, "extra_ref = %d\n", extra_ref); | 3792 | mlog(0, "extra_ref = %d\n", extra_ref); |
3790 | 3793 | ||
3791 | /* | 3794 | /* |
3792 | * We have a process waiting on us in ocfs2_dentry_iput(), | 3795 | * We have a process waiting on us in ocfs2_dentry_iput(), |
3793 | * which means we can't have any more outstanding | 3796 | * which means we can't have any more outstanding |
3794 | * aliases. There's no need to do any more work. | 3797 | * aliases. There's no need to do any more work. |
3795 | */ | 3798 | */ |
3796 | if (!extra_ref) | 3799 | if (!extra_ref) |
3797 | return UNBLOCK_CONTINUE; | 3800 | return UNBLOCK_CONTINUE; |
3798 | 3801 | ||
3799 | spin_lock(&dentry_attach_lock); | 3802 | spin_lock(&dentry_attach_lock); |
3800 | while (1) { | 3803 | while (1) { |
3801 | dentry = ocfs2_find_local_alias(dl->dl_inode, | 3804 | dentry = ocfs2_find_local_alias(dl->dl_inode, |
3802 | dl->dl_parent_blkno, 1); | 3805 | dl->dl_parent_blkno, 1); |
3803 | if (!dentry) | 3806 | if (!dentry) |
3804 | break; | 3807 | break; |
3805 | spin_unlock(&dentry_attach_lock); | 3808 | spin_unlock(&dentry_attach_lock); |
3806 | 3809 | ||
3807 | mlog(0, "d_delete(%.*s);\n", dentry->d_name.len, | 3810 | mlog(0, "d_delete(%.*s);\n", dentry->d_name.len, |
3808 | dentry->d_name.name); | 3811 | dentry->d_name.name); |
3809 | 3812 | ||
3810 | /* | 3813 | /* |
3811 | * The following dcache calls may do an | 3814 | * The following dcache calls may do an |
3812 | * iput(). Normally we don't want that from the | 3815 | * iput(). Normally we don't want that from the |
3813 | * downconverting thread, but in this case it's ok | 3816 | * downconverting thread, but in this case it's ok |
3814 | * because the requesting node already has an | 3817 | * because the requesting node already has an |
3815 | * exclusive lock on the inode, so it can't be queued | 3818 | * exclusive lock on the inode, so it can't be queued |
3816 | * for a downconvert. | 3819 | * for a downconvert. |
3817 | */ | 3820 | */ |
3818 | d_delete(dentry); | 3821 | d_delete(dentry); |
3819 | dput(dentry); | 3822 | dput(dentry); |
3820 | 3823 | ||
3821 | spin_lock(&dentry_attach_lock); | 3824 | spin_lock(&dentry_attach_lock); |
3822 | } | 3825 | } |
3823 | spin_unlock(&dentry_attach_lock); | 3826 | spin_unlock(&dentry_attach_lock); |
3824 | 3827 | ||
3825 | /* | 3828 | /* |
3826 | * If we are the last holder of this dentry lock, there is no | 3829 | * If we are the last holder of this dentry lock, there is no |
3827 | * reason to downconvert so skip straight to the unlock. | 3830 | * reason to downconvert so skip straight to the unlock. |
3828 | */ | 3831 | */ |
3829 | if (dl->dl_count == 1) | 3832 | if (dl->dl_count == 1) |
3830 | return UNBLOCK_STOP_POST; | 3833 | return UNBLOCK_STOP_POST; |
3831 | 3834 | ||
3832 | return UNBLOCK_CONTINUE_POST; | 3835 | return UNBLOCK_CONTINUE_POST; |
3833 | } | 3836 | } |
3834 | 3837 | ||
3835 | static int ocfs2_check_refcount_downconvert(struct ocfs2_lock_res *lockres, | 3838 | static int ocfs2_check_refcount_downconvert(struct ocfs2_lock_res *lockres, |
3836 | int new_level) | 3839 | int new_level) |
3837 | { | 3840 | { |
3838 | struct ocfs2_refcount_tree *tree = | 3841 | struct ocfs2_refcount_tree *tree = |
3839 | ocfs2_lock_res_refcount_tree(lockres); | 3842 | ocfs2_lock_res_refcount_tree(lockres); |
3840 | 3843 | ||
3841 | return ocfs2_ci_checkpointed(&tree->rf_ci, lockres, new_level); | 3844 | return ocfs2_ci_checkpointed(&tree->rf_ci, lockres, new_level); |
3842 | } | 3845 | } |
3843 | 3846 | ||
3844 | static int ocfs2_refcount_convert_worker(struct ocfs2_lock_res *lockres, | 3847 | static int ocfs2_refcount_convert_worker(struct ocfs2_lock_res *lockres, |
3845 | int blocking) | 3848 | int blocking) |
3846 | { | 3849 | { |
3847 | struct ocfs2_refcount_tree *tree = | 3850 | struct ocfs2_refcount_tree *tree = |
3848 | ocfs2_lock_res_refcount_tree(lockres); | 3851 | ocfs2_lock_res_refcount_tree(lockres); |
3849 | 3852 | ||
3850 | ocfs2_metadata_cache_purge(&tree->rf_ci); | 3853 | ocfs2_metadata_cache_purge(&tree->rf_ci); |
3851 | 3854 | ||
3852 | return UNBLOCK_CONTINUE; | 3855 | return UNBLOCK_CONTINUE; |
3853 | } | 3856 | } |
3854 | 3857 | ||
3855 | static void ocfs2_set_qinfo_lvb(struct ocfs2_lock_res *lockres) | 3858 | static void ocfs2_set_qinfo_lvb(struct ocfs2_lock_res *lockres) |
3856 | { | 3859 | { |
3857 | struct ocfs2_qinfo_lvb *lvb; | 3860 | struct ocfs2_qinfo_lvb *lvb; |
3858 | struct ocfs2_mem_dqinfo *oinfo = ocfs2_lock_res_qinfo(lockres); | 3861 | struct ocfs2_mem_dqinfo *oinfo = ocfs2_lock_res_qinfo(lockres); |
3859 | struct mem_dqinfo *info = sb_dqinfo(oinfo->dqi_gi.dqi_sb, | 3862 | struct mem_dqinfo *info = sb_dqinfo(oinfo->dqi_gi.dqi_sb, |
3860 | oinfo->dqi_gi.dqi_type); | 3863 | oinfo->dqi_gi.dqi_type); |
3861 | 3864 | ||
3862 | mlog_entry_void(); | 3865 | mlog_entry_void(); |
3863 | 3866 | ||
3864 | lvb = ocfs2_dlm_lvb(&lockres->l_lksb); | 3867 | lvb = ocfs2_dlm_lvb(&lockres->l_lksb); |
3865 | lvb->lvb_version = OCFS2_QINFO_LVB_VERSION; | 3868 | lvb->lvb_version = OCFS2_QINFO_LVB_VERSION; |
3866 | lvb->lvb_bgrace = cpu_to_be32(info->dqi_bgrace); | 3869 | lvb->lvb_bgrace = cpu_to_be32(info->dqi_bgrace); |
3867 | lvb->lvb_igrace = cpu_to_be32(info->dqi_igrace); | 3870 | lvb->lvb_igrace = cpu_to_be32(info->dqi_igrace); |
3868 | lvb->lvb_syncms = cpu_to_be32(oinfo->dqi_syncms); | 3871 | lvb->lvb_syncms = cpu_to_be32(oinfo->dqi_syncms); |
3869 | lvb->lvb_blocks = cpu_to_be32(oinfo->dqi_gi.dqi_blocks); | 3872 | lvb->lvb_blocks = cpu_to_be32(oinfo->dqi_gi.dqi_blocks); |
3870 | lvb->lvb_free_blk = cpu_to_be32(oinfo->dqi_gi.dqi_free_blk); | 3873 | lvb->lvb_free_blk = cpu_to_be32(oinfo->dqi_gi.dqi_free_blk); |
3871 | lvb->lvb_free_entry = cpu_to_be32(oinfo->dqi_gi.dqi_free_entry); | 3874 | lvb->lvb_free_entry = cpu_to_be32(oinfo->dqi_gi.dqi_free_entry); |
3872 | 3875 | ||
3873 | mlog_exit_void(); | 3876 | mlog_exit_void(); |
3874 | } | 3877 | } |
3875 | 3878 | ||
3876 | void ocfs2_qinfo_unlock(struct ocfs2_mem_dqinfo *oinfo, int ex) | 3879 | void ocfs2_qinfo_unlock(struct ocfs2_mem_dqinfo *oinfo, int ex) |
3877 | { | 3880 | { |
3878 | struct ocfs2_lock_res *lockres = &oinfo->dqi_gqlock; | 3881 | struct ocfs2_lock_res *lockres = &oinfo->dqi_gqlock; |
3879 | struct ocfs2_super *osb = OCFS2_SB(oinfo->dqi_gi.dqi_sb); | 3882 | struct ocfs2_super *osb = OCFS2_SB(oinfo->dqi_gi.dqi_sb); |
3880 | int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; | 3883 | int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; |
3881 | 3884 | ||
3882 | mlog_entry_void(); | 3885 | mlog_entry_void(); |
3883 | if (!ocfs2_is_hard_readonly(osb) && !ocfs2_mount_local(osb)) | 3886 | if (!ocfs2_is_hard_readonly(osb) && !ocfs2_mount_local(osb)) |
3884 | ocfs2_cluster_unlock(osb, lockres, level); | 3887 | ocfs2_cluster_unlock(osb, lockres, level); |
3885 | mlog_exit_void(); | 3888 | mlog_exit_void(); |
3886 | } | 3889 | } |
3887 | 3890 | ||
3888 | static int ocfs2_refresh_qinfo(struct ocfs2_mem_dqinfo *oinfo) | 3891 | static int ocfs2_refresh_qinfo(struct ocfs2_mem_dqinfo *oinfo) |
3889 | { | 3892 | { |
3890 | struct mem_dqinfo *info = sb_dqinfo(oinfo->dqi_gi.dqi_sb, | 3893 | struct mem_dqinfo *info = sb_dqinfo(oinfo->dqi_gi.dqi_sb, |
3891 | oinfo->dqi_gi.dqi_type); | 3894 | oinfo->dqi_gi.dqi_type); |
3892 | struct ocfs2_lock_res *lockres = &oinfo->dqi_gqlock; | 3895 | struct ocfs2_lock_res *lockres = &oinfo->dqi_gqlock; |
3893 | struct ocfs2_qinfo_lvb *lvb = ocfs2_dlm_lvb(&lockres->l_lksb); | 3896 | struct ocfs2_qinfo_lvb *lvb = ocfs2_dlm_lvb(&lockres->l_lksb); |
3894 | struct buffer_head *bh = NULL; | 3897 | struct buffer_head *bh = NULL; |
3895 | struct ocfs2_global_disk_dqinfo *gdinfo; | 3898 | struct ocfs2_global_disk_dqinfo *gdinfo; |
3896 | int status = 0; | 3899 | int status = 0; |
3897 | 3900 | ||
3898 | if (ocfs2_dlm_lvb_valid(&lockres->l_lksb) && | 3901 | if (ocfs2_dlm_lvb_valid(&lockres->l_lksb) && |
3899 | lvb->lvb_version == OCFS2_QINFO_LVB_VERSION) { | 3902 | lvb->lvb_version == OCFS2_QINFO_LVB_VERSION) { |
3900 | info->dqi_bgrace = be32_to_cpu(lvb->lvb_bgrace); | 3903 | info->dqi_bgrace = be32_to_cpu(lvb->lvb_bgrace); |
3901 | info->dqi_igrace = be32_to_cpu(lvb->lvb_igrace); | 3904 | info->dqi_igrace = be32_to_cpu(lvb->lvb_igrace); |
3902 | oinfo->dqi_syncms = be32_to_cpu(lvb->lvb_syncms); | 3905 | oinfo->dqi_syncms = be32_to_cpu(lvb->lvb_syncms); |
3903 | oinfo->dqi_gi.dqi_blocks = be32_to_cpu(lvb->lvb_blocks); | 3906 | oinfo->dqi_gi.dqi_blocks = be32_to_cpu(lvb->lvb_blocks); |
3904 | oinfo->dqi_gi.dqi_free_blk = be32_to_cpu(lvb->lvb_free_blk); | 3907 | oinfo->dqi_gi.dqi_free_blk = be32_to_cpu(lvb->lvb_free_blk); |
3905 | oinfo->dqi_gi.dqi_free_entry = | 3908 | oinfo->dqi_gi.dqi_free_entry = |
3906 | be32_to_cpu(lvb->lvb_free_entry); | 3909 | be32_to_cpu(lvb->lvb_free_entry); |
3907 | } else { | 3910 | } else { |
3908 | status = ocfs2_read_quota_phys_block(oinfo->dqi_gqinode, | 3911 | status = ocfs2_read_quota_phys_block(oinfo->dqi_gqinode, |
3909 | oinfo->dqi_giblk, &bh); | 3912 | oinfo->dqi_giblk, &bh); |
3910 | if (status) { | 3913 | if (status) { |
3911 | mlog_errno(status); | 3914 | mlog_errno(status); |
3912 | goto bail; | 3915 | goto bail; |
3913 | } | 3916 | } |
3914 | gdinfo = (struct ocfs2_global_disk_dqinfo *) | 3917 | gdinfo = (struct ocfs2_global_disk_dqinfo *) |
3915 | (bh->b_data + OCFS2_GLOBAL_INFO_OFF); | 3918 | (bh->b_data + OCFS2_GLOBAL_INFO_OFF); |
3916 | info->dqi_bgrace = le32_to_cpu(gdinfo->dqi_bgrace); | 3919 | info->dqi_bgrace = le32_to_cpu(gdinfo->dqi_bgrace); |
3917 | info->dqi_igrace = le32_to_cpu(gdinfo->dqi_igrace); | 3920 | info->dqi_igrace = le32_to_cpu(gdinfo->dqi_igrace); |
3918 | oinfo->dqi_syncms = le32_to_cpu(gdinfo->dqi_syncms); | 3921 | oinfo->dqi_syncms = le32_to_cpu(gdinfo->dqi_syncms); |
3919 | oinfo->dqi_gi.dqi_blocks = le32_to_cpu(gdinfo->dqi_blocks); | 3922 | oinfo->dqi_gi.dqi_blocks = le32_to_cpu(gdinfo->dqi_blocks); |
3920 | oinfo->dqi_gi.dqi_free_blk = le32_to_cpu(gdinfo->dqi_free_blk); | 3923 | oinfo->dqi_gi.dqi_free_blk = le32_to_cpu(gdinfo->dqi_free_blk); |
3921 | oinfo->dqi_gi.dqi_free_entry = | 3924 | oinfo->dqi_gi.dqi_free_entry = |
3922 | le32_to_cpu(gdinfo->dqi_free_entry); | 3925 | le32_to_cpu(gdinfo->dqi_free_entry); |
3923 | brelse(bh); | 3926 | brelse(bh); |
3924 | ocfs2_track_lock_refresh(lockres); | 3927 | ocfs2_track_lock_refresh(lockres); |
3925 | } | 3928 | } |
3926 | 3929 | ||
3927 | bail: | 3930 | bail: |
3928 | return status; | 3931 | return status; |
3929 | } | 3932 | } |
3930 | 3933 | ||
3931 | /* Lock quota info, this function expects at least shared lock on the quota file | 3934 | /* Lock quota info, this function expects at least shared lock on the quota file |
3932 | * so that we can safely refresh quota info from disk. */ | 3935 | * so that we can safely refresh quota info from disk. */ |
3933 | int ocfs2_qinfo_lock(struct ocfs2_mem_dqinfo *oinfo, int ex) | 3936 | int ocfs2_qinfo_lock(struct ocfs2_mem_dqinfo *oinfo, int ex) |
3934 | { | 3937 | { |
3935 | struct ocfs2_lock_res *lockres = &oinfo->dqi_gqlock; | 3938 | struct ocfs2_lock_res *lockres = &oinfo->dqi_gqlock; |
3936 | struct ocfs2_super *osb = OCFS2_SB(oinfo->dqi_gi.dqi_sb); | 3939 | struct ocfs2_super *osb = OCFS2_SB(oinfo->dqi_gi.dqi_sb); |
3937 | int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; | 3940 | int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; |
3938 | int status = 0; | 3941 | int status = 0; |
3939 | 3942 | ||
3940 | mlog_entry_void(); | 3943 | mlog_entry_void(); |
3941 | 3944 | ||
3942 | /* On RO devices, locking really isn't needed... */ | 3945 | /* On RO devices, locking really isn't needed... */ |
3943 | if (ocfs2_is_hard_readonly(osb)) { | 3946 | if (ocfs2_is_hard_readonly(osb)) { |
3944 | if (ex) | 3947 | if (ex) |
3945 | status = -EROFS; | 3948 | status = -EROFS; |
3946 | goto bail; | 3949 | goto bail; |
3947 | } | 3950 | } |
3948 | if (ocfs2_mount_local(osb)) | 3951 | if (ocfs2_mount_local(osb)) |
3949 | goto bail; | 3952 | goto bail; |
3950 | 3953 | ||
3951 | status = ocfs2_cluster_lock(osb, lockres, level, 0, 0); | 3954 | status = ocfs2_cluster_lock(osb, lockres, level, 0, 0); |
3952 | if (status < 0) { | 3955 | if (status < 0) { |
3953 | mlog_errno(status); | 3956 | mlog_errno(status); |
3954 | goto bail; | 3957 | goto bail; |
3955 | } | 3958 | } |
3956 | if (!ocfs2_should_refresh_lock_res(lockres)) | 3959 | if (!ocfs2_should_refresh_lock_res(lockres)) |
3957 | goto bail; | 3960 | goto bail; |
3958 | /* OK, we have the lock but we need to refresh the quota info */ | 3961 | /* OK, we have the lock but we need to refresh the quota info */ |
3959 | status = ocfs2_refresh_qinfo(oinfo); | 3962 | status = ocfs2_refresh_qinfo(oinfo); |
3960 | if (status) | 3963 | if (status) |
3961 | ocfs2_qinfo_unlock(oinfo, ex); | 3964 | ocfs2_qinfo_unlock(oinfo, ex); |
3962 | ocfs2_complete_lock_res_refresh(lockres, status); | 3965 | ocfs2_complete_lock_res_refresh(lockres, status); |
3963 | bail: | 3966 | bail: |
3964 | mlog_exit(status); | 3967 | mlog_exit(status); |
3965 | return status; | 3968 | return status; |
3966 | } | 3969 | } |
3967 | 3970 | ||
3968 | int ocfs2_refcount_lock(struct ocfs2_refcount_tree *ref_tree, int ex) | 3971 | int ocfs2_refcount_lock(struct ocfs2_refcount_tree *ref_tree, int ex) |
3969 | { | 3972 | { |
3970 | int status; | 3973 | int status; |
3971 | int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; | 3974 | int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; |
3972 | struct ocfs2_lock_res *lockres = &ref_tree->rf_lockres; | 3975 | struct ocfs2_lock_res *lockres = &ref_tree->rf_lockres; |
3973 | struct ocfs2_super *osb = lockres->l_priv; | 3976 | struct ocfs2_super *osb = lockres->l_priv; |
3974 | 3977 | ||
3975 | 3978 | ||
3976 | if (ocfs2_is_hard_readonly(osb)) | 3979 | if (ocfs2_is_hard_readonly(osb)) |
3977 | return -EROFS; | 3980 | return -EROFS; |
3978 | 3981 | ||
3979 | if (ocfs2_mount_local(osb)) | 3982 | if (ocfs2_mount_local(osb)) |
3980 | return 0; | 3983 | return 0; |
3981 | 3984 | ||
3982 | status = ocfs2_cluster_lock(osb, lockres, level, 0, 0); | 3985 | status = ocfs2_cluster_lock(osb, lockres, level, 0, 0); |
3983 | if (status < 0) | 3986 | if (status < 0) |
3984 | mlog_errno(status); | 3987 | mlog_errno(status); |
3985 | 3988 | ||
3986 | return status; | 3989 | return status; |
3987 | } | 3990 | } |
3988 | 3991 | ||
3989 | void ocfs2_refcount_unlock(struct ocfs2_refcount_tree *ref_tree, int ex) | 3992 | void ocfs2_refcount_unlock(struct ocfs2_refcount_tree *ref_tree, int ex) |
3990 | { | 3993 | { |
3991 | int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; | 3994 | int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; |
3992 | struct ocfs2_lock_res *lockres = &ref_tree->rf_lockres; | 3995 | struct ocfs2_lock_res *lockres = &ref_tree->rf_lockres; |
3993 | struct ocfs2_super *osb = lockres->l_priv; | 3996 | struct ocfs2_super *osb = lockres->l_priv; |
3994 | 3997 | ||
3995 | if (!ocfs2_mount_local(osb)) | 3998 | if (!ocfs2_mount_local(osb)) |
3996 | ocfs2_cluster_unlock(osb, lockres, level); | 3999 | ocfs2_cluster_unlock(osb, lockres, level); |
3997 | } | 4000 | } |
3998 | 4001 | ||
3999 | static void ocfs2_process_blocked_lock(struct ocfs2_super *osb, | 4002 | static void ocfs2_process_blocked_lock(struct ocfs2_super *osb, |
4000 | struct ocfs2_lock_res *lockres) | 4003 | struct ocfs2_lock_res *lockres) |
4001 | { | 4004 | { |
4002 | int status; | 4005 | int status; |
4003 | struct ocfs2_unblock_ctl ctl = {0, 0,}; | 4006 | struct ocfs2_unblock_ctl ctl = {0, 0,}; |
4004 | unsigned long flags; | 4007 | unsigned long flags; |
4005 | 4008 | ||
4006 | /* Our reference to the lockres in this function can be | 4009 | /* Our reference to the lockres in this function can be |
4007 | * considered valid until we remove the OCFS2_LOCK_QUEUED | 4010 | * considered valid until we remove the OCFS2_LOCK_QUEUED |
4008 | * flag. */ | 4011 | * flag. */ |
4009 | 4012 | ||
4010 | mlog_entry_void(); | 4013 | mlog_entry_void(); |
4011 | 4014 | ||
4012 | BUG_ON(!lockres); | 4015 | BUG_ON(!lockres); |
4013 | BUG_ON(!lockres->l_ops); | 4016 | BUG_ON(!lockres->l_ops); |
4014 | 4017 | ||
4015 | mlog(ML_BASTS, "lockres %s blocked\n", lockres->l_name); | 4018 | mlog(ML_BASTS, "lockres %s blocked\n", lockres->l_name); |
4016 | 4019 | ||
4017 | /* Detect whether a lock has been marked as going away while | 4020 | /* Detect whether a lock has been marked as going away while |
4018 | * the downconvert thread was processing other things. A lock can | 4021 | * the downconvert thread was processing other things. A lock can |
4019 | * still be marked with OCFS2_LOCK_FREEING after this check, | 4022 | * still be marked with OCFS2_LOCK_FREEING after this check, |
4020 | * but short circuiting here will still save us some | 4023 | * but short circuiting here will still save us some |
4021 | * performance. */ | 4024 | * performance. */ |
4022 | spin_lock_irqsave(&lockres->l_lock, flags); | 4025 | spin_lock_irqsave(&lockres->l_lock, flags); |
4023 | if (lockres->l_flags & OCFS2_LOCK_FREEING) | 4026 | if (lockres->l_flags & OCFS2_LOCK_FREEING) |
4024 | goto unqueue; | 4027 | goto unqueue; |
4025 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 4028 | spin_unlock_irqrestore(&lockres->l_lock, flags); |
4026 | 4029 | ||
4027 | status = ocfs2_unblock_lock(osb, lockres, &ctl); | 4030 | status = ocfs2_unblock_lock(osb, lockres, &ctl); |
4028 | if (status < 0) | 4031 | if (status < 0) |
4029 | mlog_errno(status); | 4032 | mlog_errno(status); |
4030 | 4033 | ||
4031 | spin_lock_irqsave(&lockres->l_lock, flags); | 4034 | spin_lock_irqsave(&lockres->l_lock, flags); |
4032 | unqueue: | 4035 | unqueue: |
4033 | if (lockres->l_flags & OCFS2_LOCK_FREEING || !ctl.requeue) { | 4036 | if (lockres->l_flags & OCFS2_LOCK_FREEING || !ctl.requeue) { |
4034 | lockres_clear_flags(lockres, OCFS2_LOCK_QUEUED); | 4037 | lockres_clear_flags(lockres, OCFS2_LOCK_QUEUED); |
4035 | } else | 4038 | } else |
4036 | ocfs2_schedule_blocked_lock(osb, lockres); | 4039 | ocfs2_schedule_blocked_lock(osb, lockres); |
4037 | 4040 | ||
4038 | mlog(ML_BASTS, "lockres %s, requeue = %s.\n", lockres->l_name, | 4041 | mlog(ML_BASTS, "lockres %s, requeue = %s.\n", lockres->l_name, |
4039 | ctl.requeue ? "yes" : "no"); | 4042 | ctl.requeue ? "yes" : "no"); |
4040 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 4043 | spin_unlock_irqrestore(&lockres->l_lock, flags); |
4041 | 4044 | ||
4042 | if (ctl.unblock_action != UNBLOCK_CONTINUE | 4045 | if (ctl.unblock_action != UNBLOCK_CONTINUE |
4043 | && lockres->l_ops->post_unlock) | 4046 | && lockres->l_ops->post_unlock) |
4044 | lockres->l_ops->post_unlock(osb, lockres); | 4047 | lockres->l_ops->post_unlock(osb, lockres); |
4045 | 4048 | ||
4046 | mlog_exit_void(); | 4049 | mlog_exit_void(); |
4047 | } | 4050 | } |
4048 | 4051 | ||
4049 | static void ocfs2_schedule_blocked_lock(struct ocfs2_super *osb, | 4052 | static void ocfs2_schedule_blocked_lock(struct ocfs2_super *osb, |
4050 | struct ocfs2_lock_res *lockres) | 4053 | struct ocfs2_lock_res *lockres) |
4051 | { | 4054 | { |
4052 | mlog_entry_void(); | 4055 | mlog_entry_void(); |
4053 | 4056 | ||
4054 | assert_spin_locked(&lockres->l_lock); | 4057 | assert_spin_locked(&lockres->l_lock); |
4055 | 4058 | ||
4056 | if (lockres->l_flags & OCFS2_LOCK_FREEING) { | 4059 | if (lockres->l_flags & OCFS2_LOCK_FREEING) { |
4057 | /* Do not schedule a lock for downconvert when it's on | 4060 | /* Do not schedule a lock for downconvert when it's on |
4058 | * the way to destruction - any nodes wanting access | 4061 | * the way to destruction - any nodes wanting access |
4059 | * to the resource will get it soon. */ | 4062 | * to the resource will get it soon. */ |
4060 | mlog(ML_BASTS, "lockres %s won't be scheduled: flags 0x%lx\n", | 4063 | mlog(ML_BASTS, "lockres %s won't be scheduled: flags 0x%lx\n", |
4061 | lockres->l_name, lockres->l_flags); | 4064 | lockres->l_name, lockres->l_flags); |
4062 | return; | 4065 | return; |
4063 | } | 4066 | } |
4064 | 4067 | ||
4065 | lockres_or_flags(lockres, OCFS2_LOCK_QUEUED); | 4068 | lockres_or_flags(lockres, OCFS2_LOCK_QUEUED); |
4066 | 4069 | ||
4067 | spin_lock(&osb->dc_task_lock); | 4070 | spin_lock(&osb->dc_task_lock); |
4068 | if (list_empty(&lockres->l_blocked_list)) { | 4071 | if (list_empty(&lockres->l_blocked_list)) { |
4069 | list_add_tail(&lockres->l_blocked_list, | 4072 | list_add_tail(&lockres->l_blocked_list, |
4070 | &osb->blocked_lock_list); | 4073 | &osb->blocked_lock_list); |
4071 | osb->blocked_lock_count++; | 4074 | osb->blocked_lock_count++; |
4072 | } | 4075 | } |
4073 | spin_unlock(&osb->dc_task_lock); | 4076 | spin_unlock(&osb->dc_task_lock); |
4074 | 4077 | ||
4075 | mlog_exit_void(); | 4078 | mlog_exit_void(); |
4076 | } | 4079 | } |
4077 | 4080 | ||
4078 | static void ocfs2_downconvert_thread_do_work(struct ocfs2_super *osb) | 4081 | static void ocfs2_downconvert_thread_do_work(struct ocfs2_super *osb) |
4079 | { | 4082 | { |
4080 | unsigned long processed; | 4083 | unsigned long processed; |
4081 | struct ocfs2_lock_res *lockres; | 4084 | struct ocfs2_lock_res *lockres; |
4082 | 4085 | ||
4083 | mlog_entry_void(); | 4086 | mlog_entry_void(); |
4084 | 4087 | ||
4085 | spin_lock(&osb->dc_task_lock); | 4088 | spin_lock(&osb->dc_task_lock); |
4086 | /* grab this early so we know to try again if a state change and | 4089 | /* grab this early so we know to try again if a state change and |
4087 | * wake happens part-way through our work */ | 4090 | * wake happens part-way through our work */ |
4088 | osb->dc_work_sequence = osb->dc_wake_sequence; | 4091 | osb->dc_work_sequence = osb->dc_wake_sequence; |
4089 | 4092 | ||
4090 | processed = osb->blocked_lock_count; | 4093 | processed = osb->blocked_lock_count; |
4091 | while (processed) { | 4094 | while (processed) { |
4092 | BUG_ON(list_empty(&osb->blocked_lock_list)); | 4095 | BUG_ON(list_empty(&osb->blocked_lock_list)); |
4093 | 4096 | ||
4094 | lockres = list_entry(osb->blocked_lock_list.next, | 4097 | lockres = list_entry(osb->blocked_lock_list.next, |
4095 | struct ocfs2_lock_res, l_blocked_list); | 4098 | struct ocfs2_lock_res, l_blocked_list); |
4096 | list_del_init(&lockres->l_blocked_list); | 4099 | list_del_init(&lockres->l_blocked_list); |
4097 | osb->blocked_lock_count--; | 4100 | osb->blocked_lock_count--; |
4098 | spin_unlock(&osb->dc_task_lock); | 4101 | spin_unlock(&osb->dc_task_lock); |
4099 | 4102 | ||
4100 | BUG_ON(!processed); | 4103 | BUG_ON(!processed); |
4101 | processed--; | 4104 | processed--; |
4102 | 4105 | ||
4103 | ocfs2_process_blocked_lock(osb, lockres); | 4106 | ocfs2_process_blocked_lock(osb, lockres); |
4104 | 4107 | ||
4105 | spin_lock(&osb->dc_task_lock); | 4108 | spin_lock(&osb->dc_task_lock); |
4106 | } | 4109 | } |
4107 | spin_unlock(&osb->dc_task_lock); | 4110 | spin_unlock(&osb->dc_task_lock); |
4108 | 4111 | ||
4109 | mlog_exit_void(); | 4112 | mlog_exit_void(); |
4110 | } | 4113 | } |
4111 | 4114 | ||
4112 | static int ocfs2_downconvert_thread_lists_empty(struct ocfs2_super *osb) | 4115 | static int ocfs2_downconvert_thread_lists_empty(struct ocfs2_super *osb) |
4113 | { | 4116 | { |
4114 | int empty = 0; | 4117 | int empty = 0; |
4115 | 4118 | ||
4116 | spin_lock(&osb->dc_task_lock); | 4119 | spin_lock(&osb->dc_task_lock); |
4117 | if (list_empty(&osb->blocked_lock_list)) | 4120 | if (list_empty(&osb->blocked_lock_list)) |
4118 | empty = 1; | 4121 | empty = 1; |
4119 | 4122 | ||
4120 | spin_unlock(&osb->dc_task_lock); | 4123 | spin_unlock(&osb->dc_task_lock); |
4121 | return empty; | 4124 | return empty; |
4122 | } | 4125 | } |
4123 | 4126 | ||
4124 | static int ocfs2_downconvert_thread_should_wake(struct ocfs2_super *osb) | 4127 | static int ocfs2_downconvert_thread_should_wake(struct ocfs2_super *osb) |
4125 | { | 4128 | { |
4126 | int should_wake = 0; | 4129 | int should_wake = 0; |
4127 | 4130 | ||
4128 | spin_lock(&osb->dc_task_lock); | 4131 | spin_lock(&osb->dc_task_lock); |
4129 | if (osb->dc_work_sequence != osb->dc_wake_sequence) | 4132 | if (osb->dc_work_sequence != osb->dc_wake_sequence) |
4130 | should_wake = 1; | 4133 | should_wake = 1; |
4131 | spin_unlock(&osb->dc_task_lock); | 4134 | spin_unlock(&osb->dc_task_lock); |
4132 | 4135 | ||
4133 | return should_wake; | 4136 | return should_wake; |
4134 | } | 4137 | } |
4135 | 4138 | ||
4136 | static int ocfs2_downconvert_thread(void *arg) | 4139 | static int ocfs2_downconvert_thread(void *arg) |
4137 | { | 4140 | { |
4138 | int status = 0; | 4141 | int status = 0; |
4139 | struct ocfs2_super *osb = arg; | 4142 | struct ocfs2_super *osb = arg; |
4140 | 4143 | ||
4141 | /* only quit once we've been asked to stop and there is no more | 4144 | /* only quit once we've been asked to stop and there is no more |
4142 | * work available */ | 4145 | * work available */ |
4143 | while (!(kthread_should_stop() && | 4146 | while (!(kthread_should_stop() && |
4144 | ocfs2_downconvert_thread_lists_empty(osb))) { | 4147 | ocfs2_downconvert_thread_lists_empty(osb))) { |
4145 | 4148 | ||
4146 | wait_event_interruptible(osb->dc_event, | 4149 | wait_event_interruptible(osb->dc_event, |
4147 | ocfs2_downconvert_thread_should_wake(osb) || | 4150 | ocfs2_downconvert_thread_should_wake(osb) || |
4148 | kthread_should_stop()); | 4151 | kthread_should_stop()); |
4149 | 4152 | ||
4150 | mlog(0, "downconvert_thread: awoken\n"); | 4153 | mlog(0, "downconvert_thread: awoken\n"); |
4151 | 4154 | ||
4152 | ocfs2_downconvert_thread_do_work(osb); | 4155 | ocfs2_downconvert_thread_do_work(osb); |
fs/ocfs2/ocfs2.h
1 | /* -*- mode: c; c-basic-offset: 8; -*- | 1 | /* -*- mode: c; c-basic-offset: 8; -*- |
2 | * vim: noexpandtab sw=8 ts=8 sts=0: | 2 | * vim: noexpandtab sw=8 ts=8 sts=0: |
3 | * | 3 | * |
4 | * ocfs2.h | 4 | * ocfs2.h |
5 | * | 5 | * |
6 | * Defines macros and structures used in OCFS2 | 6 | * Defines macros and structures used in OCFS2 |
7 | * | 7 | * |
8 | * Copyright (C) 2002, 2004 Oracle. All rights reserved. | 8 | * Copyright (C) 2002, 2004 Oracle. All rights reserved. |
9 | * | 9 | * |
10 | * This program is free software; you can redistribute it and/or | 10 | * This program is free software; you can redistribute it and/or |
11 | * modify it under the terms of the GNU General Public | 11 | * modify it under the terms of the GNU General Public |
12 | * License as published by the Free Software Foundation; either | 12 | * License as published by the Free Software Foundation; either |
13 | * version 2 of the License, or (at your option) any later version. | 13 | * version 2 of the License, or (at your option) any later version. |
14 | * | 14 | * |
15 | * This program is distributed in the hope that it will be useful, | 15 | * This program is distributed in the hope that it will be useful, |
16 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | 16 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | 17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
18 | * General Public License for more details. | 18 | * General Public License for more details. |
19 | * | 19 | * |
20 | * You should have received a copy of the GNU General Public | 20 | * You should have received a copy of the GNU General Public |
21 | * License along with this program; if not, write to the | 21 | * License along with this program; if not, write to the |
22 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | 22 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, |
23 | * Boston, MA 021110-1307, USA. | 23 | * Boston, MA 021110-1307, USA. |
24 | */ | 24 | */ |
25 | 25 | ||
26 | #ifndef OCFS2_H | 26 | #ifndef OCFS2_H |
27 | #define OCFS2_H | 27 | #define OCFS2_H |
28 | 28 | ||
29 | #include <linux/spinlock.h> | 29 | #include <linux/spinlock.h> |
30 | #include <linux/sched.h> | 30 | #include <linux/sched.h> |
31 | #include <linux/wait.h> | 31 | #include <linux/wait.h> |
32 | #include <linux/list.h> | 32 | #include <linux/list.h> |
33 | #include <linux/rbtree.h> | 33 | #include <linux/rbtree.h> |
34 | #include <linux/workqueue.h> | 34 | #include <linux/workqueue.h> |
35 | #include <linux/kref.h> | 35 | #include <linux/kref.h> |
36 | #include <linux/mutex.h> | 36 | #include <linux/mutex.h> |
37 | #include <linux/lockdep.h> | 37 | #include <linux/lockdep.h> |
38 | #include <linux/jbd2.h> | 38 | #include <linux/jbd2.h> |
39 | 39 | ||
40 | /* For union ocfs2_dlm_lksb */ | 40 | /* For union ocfs2_dlm_lksb */ |
41 | #include "stackglue.h" | 41 | #include "stackglue.h" |
42 | 42 | ||
43 | #include "ocfs2_fs.h" | 43 | #include "ocfs2_fs.h" |
44 | #include "ocfs2_lockid.h" | 44 | #include "ocfs2_lockid.h" |
45 | #include "ocfs2_ioctl.h" | 45 | #include "ocfs2_ioctl.h" |
46 | 46 | ||
47 | /* For struct ocfs2_blockcheck_stats */ | 47 | /* For struct ocfs2_blockcheck_stats */ |
48 | #include "blockcheck.h" | 48 | #include "blockcheck.h" |
49 | 49 | ||
50 | #include "reservations.h" | 50 | #include "reservations.h" |
51 | 51 | ||
52 | /* Caching of metadata buffers */ | 52 | /* Caching of metadata buffers */ |
53 | 53 | ||
54 | /* Most user visible OCFS2 inodes will have very few pieces of | 54 | /* Most user visible OCFS2 inodes will have very few pieces of |
55 | * metadata, but larger files (including bitmaps, etc) must be taken | 55 | * metadata, but larger files (including bitmaps, etc) must be taken |
56 | * into account when designing an access scheme. We allow a small | 56 | * into account when designing an access scheme. We allow a small |
57 | * amount of inlined blocks to be stored on an array and grow the | 57 | * amount of inlined blocks to be stored on an array and grow the |
58 | * structure into a rb tree when necessary. */ | 58 | * structure into a rb tree when necessary. */ |
59 | #define OCFS2_CACHE_INFO_MAX_ARRAY 2 | 59 | #define OCFS2_CACHE_INFO_MAX_ARRAY 2 |
60 | 60 | ||
61 | /* Flags for ocfs2_caching_info */ | 61 | /* Flags for ocfs2_caching_info */ |
62 | 62 | ||
63 | enum ocfs2_caching_info_flags { | 63 | enum ocfs2_caching_info_flags { |
64 | /* Indicates that the metadata cache is using the inline array */ | 64 | /* Indicates that the metadata cache is using the inline array */ |
65 | OCFS2_CACHE_FL_INLINE = 1<<1, | 65 | OCFS2_CACHE_FL_INLINE = 1<<1, |
66 | }; | 66 | }; |
67 | 67 | ||
68 | struct ocfs2_caching_operations; | 68 | struct ocfs2_caching_operations; |
69 | struct ocfs2_caching_info { | 69 | struct ocfs2_caching_info { |
70 | /* | 70 | /* |
71 | * The parent structure provides the locks, but because the | 71 | * The parent structure provides the locks, but because the |
72 | * parent structure can differ, it provides locking operations | 72 | * parent structure can differ, it provides locking operations |
73 | * to struct ocfs2_caching_info. | 73 | * to struct ocfs2_caching_info. |
74 | */ | 74 | */ |
75 | const struct ocfs2_caching_operations *ci_ops; | 75 | const struct ocfs2_caching_operations *ci_ops; |
76 | 76 | ||
77 | /* next two are protected by trans_inc_lock */ | 77 | /* next two are protected by trans_inc_lock */ |
78 | /* which transaction were we created on? Zero if none. */ | 78 | /* which transaction were we created on? Zero if none. */ |
79 | unsigned long ci_created_trans; | 79 | unsigned long ci_created_trans; |
80 | /* last transaction we were a part of. */ | 80 | /* last transaction we were a part of. */ |
81 | unsigned long ci_last_trans; | 81 | unsigned long ci_last_trans; |
82 | 82 | ||
83 | /* Cache structures */ | 83 | /* Cache structures */ |
84 | unsigned int ci_flags; | 84 | unsigned int ci_flags; |
85 | unsigned int ci_num_cached; | 85 | unsigned int ci_num_cached; |
86 | union { | 86 | union { |
87 | sector_t ci_array[OCFS2_CACHE_INFO_MAX_ARRAY]; | 87 | sector_t ci_array[OCFS2_CACHE_INFO_MAX_ARRAY]; |
88 | struct rb_root ci_tree; | 88 | struct rb_root ci_tree; |
89 | } ci_cache; | 89 | } ci_cache; |
90 | }; | 90 | }; |
91 | /* | 91 | /* |
92 | * Need this prototype here instead of in uptodate.h because journal.h | 92 | * Need this prototype here instead of in uptodate.h because journal.h |
93 | * uses it. | 93 | * uses it. |
94 | */ | 94 | */ |
95 | struct super_block *ocfs2_metadata_cache_get_super(struct ocfs2_caching_info *ci); | 95 | struct super_block *ocfs2_metadata_cache_get_super(struct ocfs2_caching_info *ci); |
96 | 96 | ||
97 | /* this limits us to 256 nodes | 97 | /* this limits us to 256 nodes |
98 | * if we need more, we can do a kmalloc for the map */ | 98 | * if we need more, we can do a kmalloc for the map */ |
99 | #define OCFS2_NODE_MAP_MAX_NODES 256 | 99 | #define OCFS2_NODE_MAP_MAX_NODES 256 |
100 | struct ocfs2_node_map { | 100 | struct ocfs2_node_map { |
101 | u16 num_nodes; | 101 | u16 num_nodes; |
102 | unsigned long map[BITS_TO_LONGS(OCFS2_NODE_MAP_MAX_NODES)]; | 102 | unsigned long map[BITS_TO_LONGS(OCFS2_NODE_MAP_MAX_NODES)]; |
103 | }; | 103 | }; |
104 | 104 | ||
105 | enum ocfs2_ast_action { | 105 | enum ocfs2_ast_action { |
106 | OCFS2_AST_INVALID = 0, | 106 | OCFS2_AST_INVALID = 0, |
107 | OCFS2_AST_ATTACH, | 107 | OCFS2_AST_ATTACH, |
108 | OCFS2_AST_CONVERT, | 108 | OCFS2_AST_CONVERT, |
109 | OCFS2_AST_DOWNCONVERT, | 109 | OCFS2_AST_DOWNCONVERT, |
110 | }; | 110 | }; |
111 | 111 | ||
112 | /* actions for an unlockast function to take. */ | 112 | /* actions for an unlockast function to take. */ |
113 | enum ocfs2_unlock_action { | 113 | enum ocfs2_unlock_action { |
114 | OCFS2_UNLOCK_INVALID = 0, | 114 | OCFS2_UNLOCK_INVALID = 0, |
115 | OCFS2_UNLOCK_CANCEL_CONVERT, | 115 | OCFS2_UNLOCK_CANCEL_CONVERT, |
116 | OCFS2_UNLOCK_DROP_LOCK, | 116 | OCFS2_UNLOCK_DROP_LOCK, |
117 | }; | 117 | }; |
118 | 118 | ||
119 | /* ocfs2_lock_res->l_flags flags. */ | 119 | /* ocfs2_lock_res->l_flags flags. */ |
120 | #define OCFS2_LOCK_ATTACHED (0x00000001) /* we have initialized | 120 | #define OCFS2_LOCK_ATTACHED (0x00000001) /* we have initialized |
121 | * the lvb */ | 121 | * the lvb */ |
122 | #define OCFS2_LOCK_BUSY (0x00000002) /* we are currently in | 122 | #define OCFS2_LOCK_BUSY (0x00000002) /* we are currently in |
123 | * dlm_lock */ | 123 | * dlm_lock */ |
124 | #define OCFS2_LOCK_BLOCKED (0x00000004) /* blocked waiting to | 124 | #define OCFS2_LOCK_BLOCKED (0x00000004) /* blocked waiting to |
125 | * downconvert*/ | 125 | * downconvert*/ |
126 | #define OCFS2_LOCK_LOCAL (0x00000008) /* newly created inode */ | 126 | #define OCFS2_LOCK_LOCAL (0x00000008) /* newly created inode */ |
127 | #define OCFS2_LOCK_NEEDS_REFRESH (0x00000010) | 127 | #define OCFS2_LOCK_NEEDS_REFRESH (0x00000010) |
128 | #define OCFS2_LOCK_REFRESHING (0x00000020) | 128 | #define OCFS2_LOCK_REFRESHING (0x00000020) |
129 | #define OCFS2_LOCK_INITIALIZED (0x00000040) /* track initialization | 129 | #define OCFS2_LOCK_INITIALIZED (0x00000040) /* track initialization |
130 | * for shutdown paths */ | 130 | * for shutdown paths */ |
131 | #define OCFS2_LOCK_FREEING (0x00000080) /* help dlmglue track | 131 | #define OCFS2_LOCK_FREEING (0x00000080) /* help dlmglue track |
132 | * when to skip queueing | 132 | * when to skip queueing |
133 | * a lock because it's | 133 | * a lock because it's |
134 | * about to be | 134 | * about to be |
135 | * dropped. */ | 135 | * dropped. */ |
136 | #define OCFS2_LOCK_QUEUED (0x00000100) /* queued for downconvert */ | 136 | #define OCFS2_LOCK_QUEUED (0x00000100) /* queued for downconvert */ |
137 | #define OCFS2_LOCK_NOCACHE (0x00000200) /* don't use a holder count */ | 137 | #define OCFS2_LOCK_NOCACHE (0x00000200) /* don't use a holder count */ |
138 | #define OCFS2_LOCK_PENDING (0x00000400) /* This lockres is pending a | 138 | #define OCFS2_LOCK_PENDING (0x00000400) /* This lockres is pending a |
139 | call to dlm_lock. Only | 139 | call to dlm_lock. Only |
140 | exists with BUSY set. */ | 140 | exists with BUSY set. */ |
141 | #define OCFS2_LOCK_UPCONVERT_FINISHING (0x00000800) /* blocks the dc thread | 141 | #define OCFS2_LOCK_UPCONVERT_FINISHING (0x00000800) /* blocks the dc thread |
142 | * from downconverting | 142 | * from downconverting |
143 | * before the upconvert | 143 | * before the upconvert |
144 | * has completed */ | 144 | * has completed */ |
145 | 145 | ||
146 | struct ocfs2_lock_res_ops; | 146 | struct ocfs2_lock_res_ops; |
147 | 147 | ||
148 | typedef void (*ocfs2_lock_callback)(int status, unsigned long data); | 148 | typedef void (*ocfs2_lock_callback)(int status, unsigned long data); |
149 | 149 | ||
150 | #ifdef CONFIG_OCFS2_FS_STATS | ||
151 | struct ocfs2_lock_stats { | ||
152 | u64 ls_total; /* Total wait in NSEC */ | ||
153 | u32 ls_gets; /* Num acquires */ | ||
154 | u32 ls_fail; /* Num failed acquires */ | ||
155 | |||
156 | /* Storing max wait in usecs saves 24 bytes per inode */ | ||
157 | u32 ls_max; /* Max wait in USEC */ | ||
158 | }; | ||
159 | #endif | ||
160 | |||
150 | struct ocfs2_lock_res { | 161 | struct ocfs2_lock_res { |
151 | void *l_priv; | 162 | void *l_priv; |
152 | struct ocfs2_lock_res_ops *l_ops; | 163 | struct ocfs2_lock_res_ops *l_ops; |
153 | 164 | ||
154 | 165 | ||
155 | struct list_head l_blocked_list; | 166 | struct list_head l_blocked_list; |
156 | struct list_head l_mask_waiters; | 167 | struct list_head l_mask_waiters; |
157 | 168 | ||
158 | unsigned long l_flags; | 169 | unsigned long l_flags; |
159 | char l_name[OCFS2_LOCK_ID_MAX_LEN]; | 170 | char l_name[OCFS2_LOCK_ID_MAX_LEN]; |
160 | unsigned int l_ro_holders; | 171 | unsigned int l_ro_holders; |
161 | unsigned int l_ex_holders; | 172 | unsigned int l_ex_holders; |
162 | signed char l_level; | 173 | signed char l_level; |
163 | signed char l_requested; | 174 | signed char l_requested; |
164 | signed char l_blocking; | 175 | signed char l_blocking; |
165 | 176 | ||
166 | /* Data packed - type enum ocfs2_lock_type */ | 177 | /* Data packed - type enum ocfs2_lock_type */ |
167 | unsigned char l_type; | 178 | unsigned char l_type; |
168 | 179 | ||
169 | /* used from AST/BAST funcs. */ | 180 | /* used from AST/BAST funcs. */ |
170 | /* Data packed - enum type ocfs2_ast_action */ | 181 | /* Data packed - enum type ocfs2_ast_action */ |
171 | unsigned char l_action; | 182 | unsigned char l_action; |
172 | /* Data packed - enum type ocfs2_unlock_action */ | 183 | /* Data packed - enum type ocfs2_unlock_action */ |
173 | unsigned char l_unlock_action; | 184 | unsigned char l_unlock_action; |
174 | unsigned int l_pending_gen; | 185 | unsigned int l_pending_gen; |
175 | 186 | ||
176 | spinlock_t l_lock; | 187 | spinlock_t l_lock; |
177 | 188 | ||
178 | struct ocfs2_dlm_lksb l_lksb; | 189 | struct ocfs2_dlm_lksb l_lksb; |
179 | 190 | ||
180 | wait_queue_head_t l_event; | 191 | wait_queue_head_t l_event; |
181 | 192 | ||
182 | struct list_head l_debug_list; | 193 | struct list_head l_debug_list; |
183 | 194 | ||
184 | #ifdef CONFIG_OCFS2_FS_STATS | 195 | #ifdef CONFIG_OCFS2_FS_STATS |
185 | unsigned long long l_lock_num_prmode; /* PR acquires */ | 196 | struct ocfs2_lock_stats l_lock_prmode; /* PR mode stats */ |
186 | unsigned long long l_lock_num_exmode; /* EX acquires */ | 197 | u32 l_lock_refresh; /* Disk refreshes */ |
187 | unsigned int l_lock_num_prmode_failed; /* Failed PR gets */ | 198 | struct ocfs2_lock_stats l_lock_exmode; /* EX mode stats */ |
188 | unsigned int l_lock_num_exmode_failed; /* Failed EX gets */ | ||
189 | unsigned long long l_lock_total_prmode; /* Tot wait for PR */ | ||
190 | unsigned long long l_lock_total_exmode; /* Tot wait for EX */ | ||
191 | unsigned int l_lock_max_prmode; /* Max wait for PR */ | ||
192 | unsigned int l_lock_max_exmode; /* Max wait for EX */ | ||
193 | unsigned int l_lock_refresh; /* Disk refreshes */ | ||
194 | #endif | 199 | #endif |
195 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | 200 | #ifdef CONFIG_DEBUG_LOCK_ALLOC |
196 | struct lockdep_map l_lockdep_map; | 201 | struct lockdep_map l_lockdep_map; |
197 | #endif | 202 | #endif |
198 | }; | 203 | }; |
199 | 204 | ||
200 | enum ocfs2_orphan_scan_state { | 205 | enum ocfs2_orphan_scan_state { |
201 | ORPHAN_SCAN_ACTIVE, | 206 | ORPHAN_SCAN_ACTIVE, |
202 | ORPHAN_SCAN_INACTIVE | 207 | ORPHAN_SCAN_INACTIVE |
203 | }; | 208 | }; |
204 | 209 | ||
205 | struct ocfs2_orphan_scan { | 210 | struct ocfs2_orphan_scan { |
206 | struct mutex os_lock; | 211 | struct mutex os_lock; |
207 | struct ocfs2_super *os_osb; | 212 | struct ocfs2_super *os_osb; |
208 | struct ocfs2_lock_res os_lockres; /* lock to synchronize scans */ | 213 | struct ocfs2_lock_res os_lockres; /* lock to synchronize scans */ |
209 | struct delayed_work os_orphan_scan_work; | 214 | struct delayed_work os_orphan_scan_work; |
210 | struct timespec os_scantime; /* time this node ran the scan */ | 215 | struct timespec os_scantime; /* time this node ran the scan */ |
211 | u32 os_count; /* tracks node specific scans */ | 216 | u32 os_count; /* tracks node specific scans */ |
212 | u32 os_seqno; /* tracks cluster wide scans */ | 217 | u32 os_seqno; /* tracks cluster wide scans */ |
213 | atomic_t os_state; /* ACTIVE or INACTIVE */ | 218 | atomic_t os_state; /* ACTIVE or INACTIVE */ |
214 | }; | 219 | }; |
215 | 220 | ||
216 | struct ocfs2_dlm_debug { | 221 | struct ocfs2_dlm_debug { |
217 | struct kref d_refcnt; | 222 | struct kref d_refcnt; |
218 | struct dentry *d_locking_state; | 223 | struct dentry *d_locking_state; |
219 | struct list_head d_lockres_tracking; | 224 | struct list_head d_lockres_tracking; |
220 | }; | 225 | }; |
221 | 226 | ||
222 | enum ocfs2_vol_state | 227 | enum ocfs2_vol_state |
223 | { | 228 | { |
224 | VOLUME_INIT = 0, | 229 | VOLUME_INIT = 0, |
225 | VOLUME_MOUNTED, | 230 | VOLUME_MOUNTED, |
226 | VOLUME_MOUNTED_QUOTAS, | 231 | VOLUME_MOUNTED_QUOTAS, |
227 | VOLUME_DISMOUNTED, | 232 | VOLUME_DISMOUNTED, |
228 | VOLUME_DISABLED | 233 | VOLUME_DISABLED |
229 | }; | 234 | }; |
230 | 235 | ||
231 | struct ocfs2_alloc_stats | 236 | struct ocfs2_alloc_stats |
232 | { | 237 | { |
233 | atomic_t moves; | 238 | atomic_t moves; |
234 | atomic_t local_data; | 239 | atomic_t local_data; |
235 | atomic_t bitmap_data; | 240 | atomic_t bitmap_data; |
236 | atomic_t bg_allocs; | 241 | atomic_t bg_allocs; |
237 | atomic_t bg_extends; | 242 | atomic_t bg_extends; |
238 | }; | 243 | }; |
239 | 244 | ||
240 | enum ocfs2_local_alloc_state | 245 | enum ocfs2_local_alloc_state |
241 | { | 246 | { |
242 | OCFS2_LA_UNUSED = 0, /* Local alloc will never be used for | 247 | OCFS2_LA_UNUSED = 0, /* Local alloc will never be used for |
243 | * this mountpoint. */ | 248 | * this mountpoint. */ |
244 | OCFS2_LA_ENABLED, /* Local alloc is in use. */ | 249 | OCFS2_LA_ENABLED, /* Local alloc is in use. */ |
245 | OCFS2_LA_THROTTLED, /* Local alloc is in use, but number | 250 | OCFS2_LA_THROTTLED, /* Local alloc is in use, but number |
246 | * of bits has been reduced. */ | 251 | * of bits has been reduced. */ |
247 | OCFS2_LA_DISABLED /* Local alloc has temporarily been | 252 | OCFS2_LA_DISABLED /* Local alloc has temporarily been |
248 | * disabled. */ | 253 | * disabled. */ |
249 | }; | 254 | }; |
250 | 255 | ||
251 | enum ocfs2_mount_options | 256 | enum ocfs2_mount_options |
252 | { | 257 | { |
253 | OCFS2_MOUNT_HB_LOCAL = 1 << 0, /* Local heartbeat */ | 258 | OCFS2_MOUNT_HB_LOCAL = 1 << 0, /* Local heartbeat */ |
254 | OCFS2_MOUNT_BARRIER = 1 << 1, /* Use block barriers */ | 259 | OCFS2_MOUNT_BARRIER = 1 << 1, /* Use block barriers */ |
255 | OCFS2_MOUNT_NOINTR = 1 << 2, /* Don't catch signals */ | 260 | OCFS2_MOUNT_NOINTR = 1 << 2, /* Don't catch signals */ |
256 | OCFS2_MOUNT_ERRORS_PANIC = 1 << 3, /* Panic on errors */ | 261 | OCFS2_MOUNT_ERRORS_PANIC = 1 << 3, /* Panic on errors */ |
257 | OCFS2_MOUNT_DATA_WRITEBACK = 1 << 4, /* No data ordering */ | 262 | OCFS2_MOUNT_DATA_WRITEBACK = 1 << 4, /* No data ordering */ |
258 | OCFS2_MOUNT_LOCALFLOCKS = 1 << 5, /* No cluster aware user file locks */ | 263 | OCFS2_MOUNT_LOCALFLOCKS = 1 << 5, /* No cluster aware user file locks */ |
259 | OCFS2_MOUNT_NOUSERXATTR = 1 << 6, /* No user xattr */ | 264 | OCFS2_MOUNT_NOUSERXATTR = 1 << 6, /* No user xattr */ |
260 | OCFS2_MOUNT_INODE64 = 1 << 7, /* Allow inode numbers > 2^32 */ | 265 | OCFS2_MOUNT_INODE64 = 1 << 7, /* Allow inode numbers > 2^32 */ |
261 | OCFS2_MOUNT_POSIX_ACL = 1 << 8, /* Force POSIX access control lists */ | 266 | OCFS2_MOUNT_POSIX_ACL = 1 << 8, /* Force POSIX access control lists */ |
262 | OCFS2_MOUNT_NO_POSIX_ACL = 1 << 9, /* Disable POSIX access | 267 | OCFS2_MOUNT_NO_POSIX_ACL = 1 << 9, /* Disable POSIX access |
263 | control lists */ | 268 | control lists */ |
264 | OCFS2_MOUNT_USRQUOTA = 1 << 10, /* We support user quotas */ | 269 | OCFS2_MOUNT_USRQUOTA = 1 << 10, /* We support user quotas */ |
265 | OCFS2_MOUNT_GRPQUOTA = 1 << 11, /* We support group quotas */ | 270 | OCFS2_MOUNT_GRPQUOTA = 1 << 11, /* We support group quotas */ |
266 | OCFS2_MOUNT_COHERENCY_BUFFERED = 1 << 12, /* Allow concurrent O_DIRECT | 271 | OCFS2_MOUNT_COHERENCY_BUFFERED = 1 << 12, /* Allow concurrent O_DIRECT |
267 | writes */ | 272 | writes */ |
268 | OCFS2_MOUNT_HB_NONE = 1 << 13, /* No heartbeat */ | 273 | OCFS2_MOUNT_HB_NONE = 1 << 13, /* No heartbeat */ |
269 | OCFS2_MOUNT_HB_GLOBAL = 1 << 14, /* Global heartbeat */ | 274 | OCFS2_MOUNT_HB_GLOBAL = 1 << 14, /* Global heartbeat */ |
270 | }; | 275 | }; |
271 | 276 | ||
272 | #define OCFS2_OSB_SOFT_RO 0x0001 | 277 | #define OCFS2_OSB_SOFT_RO 0x0001 |
273 | #define OCFS2_OSB_HARD_RO 0x0002 | 278 | #define OCFS2_OSB_HARD_RO 0x0002 |
274 | #define OCFS2_OSB_ERROR_FS 0x0004 | 279 | #define OCFS2_OSB_ERROR_FS 0x0004 |
275 | #define OCFS2_OSB_DROP_DENTRY_LOCK_IMMED 0x0008 | 280 | #define OCFS2_OSB_DROP_DENTRY_LOCK_IMMED 0x0008 |
276 | 281 | ||
277 | #define OCFS2_DEFAULT_ATIME_QUANTUM 60 | 282 | #define OCFS2_DEFAULT_ATIME_QUANTUM 60 |
278 | 283 | ||
279 | struct ocfs2_journal; | 284 | struct ocfs2_journal; |
280 | struct ocfs2_slot_info; | 285 | struct ocfs2_slot_info; |
281 | struct ocfs2_recovery_map; | 286 | struct ocfs2_recovery_map; |
282 | struct ocfs2_replay_map; | 287 | struct ocfs2_replay_map; |
283 | struct ocfs2_quota_recovery; | 288 | struct ocfs2_quota_recovery; |
284 | struct ocfs2_dentry_lock; | 289 | struct ocfs2_dentry_lock; |
285 | struct ocfs2_super | 290 | struct ocfs2_super |
286 | { | 291 | { |
287 | struct task_struct *commit_task; | 292 | struct task_struct *commit_task; |
288 | struct super_block *sb; | 293 | struct super_block *sb; |
289 | struct inode *root_inode; | 294 | struct inode *root_inode; |
290 | struct inode *sys_root_inode; | 295 | struct inode *sys_root_inode; |
291 | struct inode *global_system_inodes[NUM_GLOBAL_SYSTEM_INODES]; | 296 | struct inode *global_system_inodes[NUM_GLOBAL_SYSTEM_INODES]; |
292 | struct inode **local_system_inodes; | 297 | struct inode **local_system_inodes; |
293 | 298 | ||
294 | struct ocfs2_slot_info *slot_info; | 299 | struct ocfs2_slot_info *slot_info; |
295 | 300 | ||
296 | u32 *slot_recovery_generations; | 301 | u32 *slot_recovery_generations; |
297 | 302 | ||
298 | spinlock_t node_map_lock; | 303 | spinlock_t node_map_lock; |
299 | 304 | ||
300 | u64 root_blkno; | 305 | u64 root_blkno; |
301 | u64 system_dir_blkno; | 306 | u64 system_dir_blkno; |
302 | u64 bitmap_blkno; | 307 | u64 bitmap_blkno; |
303 | u32 bitmap_cpg; | 308 | u32 bitmap_cpg; |
304 | u8 *uuid; | 309 | u8 *uuid; |
305 | char *uuid_str; | 310 | char *uuid_str; |
306 | u32 uuid_hash; | 311 | u32 uuid_hash; |
307 | u8 *vol_label; | 312 | u8 *vol_label; |
308 | u64 first_cluster_group_blkno; | 313 | u64 first_cluster_group_blkno; |
309 | u32 fs_generation; | 314 | u32 fs_generation; |
310 | 315 | ||
311 | u32 s_feature_compat; | 316 | u32 s_feature_compat; |
312 | u32 s_feature_incompat; | 317 | u32 s_feature_incompat; |
313 | u32 s_feature_ro_compat; | 318 | u32 s_feature_ro_compat; |
314 | 319 | ||
315 | /* Protects s_next_generation, osb_flags and s_inode_steal_slot. | 320 | /* Protects s_next_generation, osb_flags and s_inode_steal_slot. |
316 | * Could protect more on osb as it's very short lived. | 321 | * Could protect more on osb as it's very short lived. |
317 | */ | 322 | */ |
318 | spinlock_t osb_lock; | 323 | spinlock_t osb_lock; |
319 | u32 s_next_generation; | 324 | u32 s_next_generation; |
320 | unsigned long osb_flags; | 325 | unsigned long osb_flags; |
321 | s16 s_inode_steal_slot; | 326 | s16 s_inode_steal_slot; |
322 | s16 s_meta_steal_slot; | 327 | s16 s_meta_steal_slot; |
323 | atomic_t s_num_inodes_stolen; | 328 | atomic_t s_num_inodes_stolen; |
324 | atomic_t s_num_meta_stolen; | 329 | atomic_t s_num_meta_stolen; |
325 | 330 | ||
326 | unsigned long s_mount_opt; | 331 | unsigned long s_mount_opt; |
327 | unsigned int s_atime_quantum; | 332 | unsigned int s_atime_quantum; |
328 | 333 | ||
329 | unsigned int max_slots; | 334 | unsigned int max_slots; |
330 | unsigned int node_num; | 335 | unsigned int node_num; |
331 | int slot_num; | 336 | int slot_num; |
332 | int preferred_slot; | 337 | int preferred_slot; |
333 | int s_sectsize_bits; | 338 | int s_sectsize_bits; |
334 | int s_clustersize; | 339 | int s_clustersize; |
335 | int s_clustersize_bits; | 340 | int s_clustersize_bits; |
336 | unsigned int s_xattr_inline_size; | 341 | unsigned int s_xattr_inline_size; |
337 | 342 | ||
338 | atomic_t vol_state; | 343 | atomic_t vol_state; |
339 | struct mutex recovery_lock; | 344 | struct mutex recovery_lock; |
340 | struct ocfs2_recovery_map *recovery_map; | 345 | struct ocfs2_recovery_map *recovery_map; |
341 | struct ocfs2_replay_map *replay_map; | 346 | struct ocfs2_replay_map *replay_map; |
342 | struct task_struct *recovery_thread_task; | 347 | struct task_struct *recovery_thread_task; |
343 | int disable_recovery; | 348 | int disable_recovery; |
344 | wait_queue_head_t checkpoint_event; | 349 | wait_queue_head_t checkpoint_event; |
345 | atomic_t needs_checkpoint; | 350 | atomic_t needs_checkpoint; |
346 | struct ocfs2_journal *journal; | 351 | struct ocfs2_journal *journal; |
347 | unsigned long osb_commit_interval; | 352 | unsigned long osb_commit_interval; |
348 | 353 | ||
349 | struct delayed_work la_enable_wq; | 354 | struct delayed_work la_enable_wq; |
350 | 355 | ||
351 | /* | 356 | /* |
352 | * Must hold local alloc i_mutex and osb->osb_lock to change | 357 | * Must hold local alloc i_mutex and osb->osb_lock to change |
353 | * local_alloc_bits. Reads can be done under either lock. | 358 | * local_alloc_bits. Reads can be done under either lock. |
354 | */ | 359 | */ |
355 | unsigned int local_alloc_bits; | 360 | unsigned int local_alloc_bits; |
356 | unsigned int local_alloc_default_bits; | 361 | unsigned int local_alloc_default_bits; |
357 | /* osb_clusters_at_boot can become stale! Do not trust it to | 362 | /* osb_clusters_at_boot can become stale! Do not trust it to |
358 | * be up to date. */ | 363 | * be up to date. */ |
359 | unsigned int osb_clusters_at_boot; | 364 | unsigned int osb_clusters_at_boot; |
360 | 365 | ||
361 | enum ocfs2_local_alloc_state local_alloc_state; /* protected | 366 | enum ocfs2_local_alloc_state local_alloc_state; /* protected |
362 | * by osb_lock */ | 367 | * by osb_lock */ |
363 | 368 | ||
364 | struct buffer_head *local_alloc_bh; | 369 | struct buffer_head *local_alloc_bh; |
365 | 370 | ||
366 | u64 la_last_gd; | 371 | u64 la_last_gd; |
367 | 372 | ||
368 | struct ocfs2_reservation_map osb_la_resmap; | 373 | struct ocfs2_reservation_map osb_la_resmap; |
369 | 374 | ||
370 | unsigned int osb_resv_level; | 375 | unsigned int osb_resv_level; |
371 | unsigned int osb_dir_resv_level; | 376 | unsigned int osb_dir_resv_level; |
372 | 377 | ||
373 | /* Next three fields are for local node slot recovery during | 378 | /* Next three fields are for local node slot recovery during |
374 | * mount. */ | 379 | * mount. */ |
375 | int dirty; | 380 | int dirty; |
376 | struct ocfs2_dinode *local_alloc_copy; | 381 | struct ocfs2_dinode *local_alloc_copy; |
377 | struct ocfs2_quota_recovery *quota_rec; | 382 | struct ocfs2_quota_recovery *quota_rec; |
378 | 383 | ||
379 | struct ocfs2_blockcheck_stats osb_ecc_stats; | 384 | struct ocfs2_blockcheck_stats osb_ecc_stats; |
380 | struct ocfs2_alloc_stats alloc_stats; | 385 | struct ocfs2_alloc_stats alloc_stats; |
381 | char dev_str[20]; /* "major,minor" of the device */ | 386 | char dev_str[20]; /* "major,minor" of the device */ |
382 | 387 | ||
383 | u8 osb_stackflags; | 388 | u8 osb_stackflags; |
384 | 389 | ||
385 | char osb_cluster_stack[OCFS2_STACK_LABEL_LEN + 1]; | 390 | char osb_cluster_stack[OCFS2_STACK_LABEL_LEN + 1]; |
386 | struct ocfs2_cluster_connection *cconn; | 391 | struct ocfs2_cluster_connection *cconn; |
387 | struct ocfs2_lock_res osb_super_lockres; | 392 | struct ocfs2_lock_res osb_super_lockres; |
388 | struct ocfs2_lock_res osb_rename_lockres; | 393 | struct ocfs2_lock_res osb_rename_lockres; |
389 | struct ocfs2_lock_res osb_nfs_sync_lockres; | 394 | struct ocfs2_lock_res osb_nfs_sync_lockres; |
390 | struct ocfs2_dlm_debug *osb_dlm_debug; | 395 | struct ocfs2_dlm_debug *osb_dlm_debug; |
391 | 396 | ||
392 | struct dentry *osb_debug_root; | 397 | struct dentry *osb_debug_root; |
393 | struct dentry *osb_ctxt; | 398 | struct dentry *osb_ctxt; |
394 | 399 | ||
395 | wait_queue_head_t recovery_event; | 400 | wait_queue_head_t recovery_event; |
396 | 401 | ||
397 | spinlock_t dc_task_lock; | 402 | spinlock_t dc_task_lock; |
398 | struct task_struct *dc_task; | 403 | struct task_struct *dc_task; |
399 | wait_queue_head_t dc_event; | 404 | wait_queue_head_t dc_event; |
400 | unsigned long dc_wake_sequence; | 405 | unsigned long dc_wake_sequence; |
401 | unsigned long dc_work_sequence; | 406 | unsigned long dc_work_sequence; |
402 | 407 | ||
403 | /* | 408 | /* |
404 | * Any thread can add locks to the list, but the downconvert | 409 | * Any thread can add locks to the list, but the downconvert |
405 | * thread is the only one allowed to remove locks. Any change | 410 | * thread is the only one allowed to remove locks. Any change |
406 | * to this rule requires updating | 411 | * to this rule requires updating |
407 | * ocfs2_downconvert_thread_do_work(). | 412 | * ocfs2_downconvert_thread_do_work(). |
408 | */ | 413 | */ |
409 | struct list_head blocked_lock_list; | 414 | struct list_head blocked_lock_list; |
410 | unsigned long blocked_lock_count; | 415 | unsigned long blocked_lock_count; |
411 | 416 | ||
412 | /* List of dentry locks to release. Anyone can add locks to | 417 | /* List of dentry locks to release. Anyone can add locks to |
413 | * the list, ocfs2_wq processes the list */ | 418 | * the list, ocfs2_wq processes the list */ |
414 | struct ocfs2_dentry_lock *dentry_lock_list; | 419 | struct ocfs2_dentry_lock *dentry_lock_list; |
415 | struct work_struct dentry_lock_work; | 420 | struct work_struct dentry_lock_work; |
416 | 421 | ||
417 | wait_queue_head_t osb_mount_event; | 422 | wait_queue_head_t osb_mount_event; |
418 | 423 | ||
419 | /* Truncate log info */ | 424 | /* Truncate log info */ |
420 | struct inode *osb_tl_inode; | 425 | struct inode *osb_tl_inode; |
421 | struct buffer_head *osb_tl_bh; | 426 | struct buffer_head *osb_tl_bh; |
422 | struct delayed_work osb_truncate_log_wq; | 427 | struct delayed_work osb_truncate_log_wq; |
423 | /* | 428 | /* |
424 | * How many clusters in our truncate log. | 429 | * How many clusters in our truncate log. |
425 | * It must be protected by osb_tl_inode->i_mutex. | 430 | * It must be protected by osb_tl_inode->i_mutex. |
426 | */ | 431 | */ |
427 | unsigned int truncated_clusters; | 432 | unsigned int truncated_clusters; |
428 | 433 | ||
429 | struct ocfs2_node_map osb_recovering_orphan_dirs; | 434 | struct ocfs2_node_map osb_recovering_orphan_dirs; |
430 | unsigned int *osb_orphan_wipes; | 435 | unsigned int *osb_orphan_wipes; |
431 | wait_queue_head_t osb_wipe_event; | 436 | wait_queue_head_t osb_wipe_event; |
432 | 437 | ||
433 | struct ocfs2_orphan_scan osb_orphan_scan; | 438 | struct ocfs2_orphan_scan osb_orphan_scan; |
434 | 439 | ||
435 | /* used to protect metaecc calculation check of xattr. */ | 440 | /* used to protect metaecc calculation check of xattr. */ |
436 | spinlock_t osb_xattr_lock; | 441 | spinlock_t osb_xattr_lock; |
437 | 442 | ||
438 | unsigned int osb_dx_mask; | 443 | unsigned int osb_dx_mask; |
439 | u32 osb_dx_seed[4]; | 444 | u32 osb_dx_seed[4]; |
440 | 445 | ||
441 | /* the group we used to allocate inodes. */ | 446 | /* the group we used to allocate inodes. */ |
442 | u64 osb_inode_alloc_group; | 447 | u64 osb_inode_alloc_group; |
443 | 448 | ||
444 | /* rb tree root for refcount lock. */ | 449 | /* rb tree root for refcount lock. */ |
445 | struct rb_root osb_rf_lock_tree; | 450 | struct rb_root osb_rf_lock_tree; |
446 | struct ocfs2_refcount_tree *osb_ref_tree_lru; | 451 | struct ocfs2_refcount_tree *osb_ref_tree_lru; |
447 | }; | 452 | }; |
448 | 453 | ||
449 | #define OCFS2_SB(sb) ((struct ocfs2_super *)(sb)->s_fs_info) | 454 | #define OCFS2_SB(sb) ((struct ocfs2_super *)(sb)->s_fs_info) |
450 | 455 | ||
451 | /* Useful typedef for passing around journal access functions */ | 456 | /* Useful typedef for passing around journal access functions */ |
452 | typedef int (*ocfs2_journal_access_func)(handle_t *handle, | 457 | typedef int (*ocfs2_journal_access_func)(handle_t *handle, |
453 | struct ocfs2_caching_info *ci, | 458 | struct ocfs2_caching_info *ci, |
454 | struct buffer_head *bh, int type); | 459 | struct buffer_head *bh, int type); |
455 | 460 | ||
456 | static inline int ocfs2_should_order_data(struct inode *inode) | 461 | static inline int ocfs2_should_order_data(struct inode *inode) |
457 | { | 462 | { |
458 | if (!S_ISREG(inode->i_mode)) | 463 | if (!S_ISREG(inode->i_mode)) |
459 | return 0; | 464 | return 0; |
460 | if (OCFS2_SB(inode->i_sb)->s_mount_opt & OCFS2_MOUNT_DATA_WRITEBACK) | 465 | if (OCFS2_SB(inode->i_sb)->s_mount_opt & OCFS2_MOUNT_DATA_WRITEBACK) |
461 | return 0; | 466 | return 0; |
462 | return 1; | 467 | return 1; |
463 | } | 468 | } |
464 | 469 | ||
465 | static inline int ocfs2_sparse_alloc(struct ocfs2_super *osb) | 470 | static inline int ocfs2_sparse_alloc(struct ocfs2_super *osb) |
466 | { | 471 | { |
467 | if (osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_SPARSE_ALLOC) | 472 | if (osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_SPARSE_ALLOC) |
468 | return 1; | 473 | return 1; |
469 | return 0; | 474 | return 0; |
470 | } | 475 | } |
471 | 476 | ||
472 | static inline int ocfs2_writes_unwritten_extents(struct ocfs2_super *osb) | 477 | static inline int ocfs2_writes_unwritten_extents(struct ocfs2_super *osb) |
473 | { | 478 | { |
474 | /* | 479 | /* |
475 | * Support for sparse files is a pre-requisite | 480 | * Support for sparse files is a pre-requisite |
476 | */ | 481 | */ |
477 | if (!ocfs2_sparse_alloc(osb)) | 482 | if (!ocfs2_sparse_alloc(osb)) |
478 | return 0; | 483 | return 0; |
479 | 484 | ||
480 | if (osb->s_feature_ro_compat & OCFS2_FEATURE_RO_COMPAT_UNWRITTEN) | 485 | if (osb->s_feature_ro_compat & OCFS2_FEATURE_RO_COMPAT_UNWRITTEN) |
481 | return 1; | 486 | return 1; |
482 | return 0; | 487 | return 0; |
483 | } | 488 | } |
484 | 489 | ||
485 | static inline int ocfs2_supports_inline_data(struct ocfs2_super *osb) | 490 | static inline int ocfs2_supports_inline_data(struct ocfs2_super *osb) |
486 | { | 491 | { |
487 | if (osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_INLINE_DATA) | 492 | if (osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_INLINE_DATA) |
488 | return 1; | 493 | return 1; |
489 | return 0; | 494 | return 0; |
490 | } | 495 | } |
491 | 496 | ||
492 | static inline int ocfs2_supports_xattr(struct ocfs2_super *osb) | 497 | static inline int ocfs2_supports_xattr(struct ocfs2_super *osb) |
493 | { | 498 | { |
494 | if (osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_XATTR) | 499 | if (osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_XATTR) |
495 | return 1; | 500 | return 1; |
496 | return 0; | 501 | return 0; |
497 | } | 502 | } |
498 | 503 | ||
499 | static inline int ocfs2_meta_ecc(struct ocfs2_super *osb) | 504 | static inline int ocfs2_meta_ecc(struct ocfs2_super *osb) |
500 | { | 505 | { |
501 | if (osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_META_ECC) | 506 | if (osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_META_ECC) |
502 | return 1; | 507 | return 1; |
503 | return 0; | 508 | return 0; |
504 | } | 509 | } |
505 | 510 | ||
506 | static inline int ocfs2_supports_indexed_dirs(struct ocfs2_super *osb) | 511 | static inline int ocfs2_supports_indexed_dirs(struct ocfs2_super *osb) |
507 | { | 512 | { |
508 | if (osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_INDEXED_DIRS) | 513 | if (osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_INDEXED_DIRS) |
509 | return 1; | 514 | return 1; |
510 | return 0; | 515 | return 0; |
511 | } | 516 | } |
512 | 517 | ||
513 | static inline int ocfs2_supports_discontig_bg(struct ocfs2_super *osb) | 518 | static inline int ocfs2_supports_discontig_bg(struct ocfs2_super *osb) |
514 | { | 519 | { |
515 | if (osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_DISCONTIG_BG) | 520 | if (osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_DISCONTIG_BG) |
516 | return 1; | 521 | return 1; |
517 | return 0; | 522 | return 0; |
518 | } | 523 | } |
519 | 524 | ||
520 | static inline unsigned int ocfs2_link_max(struct ocfs2_super *osb) | 525 | static inline unsigned int ocfs2_link_max(struct ocfs2_super *osb) |
521 | { | 526 | { |
522 | if (ocfs2_supports_indexed_dirs(osb)) | 527 | if (ocfs2_supports_indexed_dirs(osb)) |
523 | return OCFS2_DX_LINK_MAX; | 528 | return OCFS2_DX_LINK_MAX; |
524 | return OCFS2_LINK_MAX; | 529 | return OCFS2_LINK_MAX; |
525 | } | 530 | } |
526 | 531 | ||
527 | static inline unsigned int ocfs2_read_links_count(struct ocfs2_dinode *di) | 532 | static inline unsigned int ocfs2_read_links_count(struct ocfs2_dinode *di) |
528 | { | 533 | { |
529 | u32 nlink = le16_to_cpu(di->i_links_count); | 534 | u32 nlink = le16_to_cpu(di->i_links_count); |
530 | u32 hi = le16_to_cpu(di->i_links_count_hi); | 535 | u32 hi = le16_to_cpu(di->i_links_count_hi); |
531 | 536 | ||
532 | if (di->i_dyn_features & cpu_to_le16(OCFS2_INDEXED_DIR_FL)) | 537 | if (di->i_dyn_features & cpu_to_le16(OCFS2_INDEXED_DIR_FL)) |
533 | nlink |= (hi << OCFS2_LINKS_HI_SHIFT); | 538 | nlink |= (hi << OCFS2_LINKS_HI_SHIFT); |
534 | 539 | ||
535 | return nlink; | 540 | return nlink; |
536 | } | 541 | } |
537 | 542 | ||
538 | static inline void ocfs2_set_links_count(struct ocfs2_dinode *di, u32 nlink) | 543 | static inline void ocfs2_set_links_count(struct ocfs2_dinode *di, u32 nlink) |
539 | { | 544 | { |
540 | u16 lo, hi; | 545 | u16 lo, hi; |
541 | 546 | ||
542 | lo = nlink; | 547 | lo = nlink; |
543 | hi = nlink >> OCFS2_LINKS_HI_SHIFT; | 548 | hi = nlink >> OCFS2_LINKS_HI_SHIFT; |
544 | 549 | ||
545 | di->i_links_count = cpu_to_le16(lo); | 550 | di->i_links_count = cpu_to_le16(lo); |
546 | di->i_links_count_hi = cpu_to_le16(hi); | 551 | di->i_links_count_hi = cpu_to_le16(hi); |
547 | } | 552 | } |
548 | 553 | ||
549 | static inline void ocfs2_add_links_count(struct ocfs2_dinode *di, int n) | 554 | static inline void ocfs2_add_links_count(struct ocfs2_dinode *di, int n) |
550 | { | 555 | { |
551 | u32 links = ocfs2_read_links_count(di); | 556 | u32 links = ocfs2_read_links_count(di); |
552 | 557 | ||
553 | links += n; | 558 | links += n; |
554 | 559 | ||
555 | ocfs2_set_links_count(di, links); | 560 | ocfs2_set_links_count(di, links); |
556 | } | 561 | } |
557 | 562 | ||
558 | static inline int ocfs2_refcount_tree(struct ocfs2_super *osb) | 563 | static inline int ocfs2_refcount_tree(struct ocfs2_super *osb) |
559 | { | 564 | { |
560 | if (osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_REFCOUNT_TREE) | 565 | if (osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_REFCOUNT_TREE) |
561 | return 1; | 566 | return 1; |
562 | return 0; | 567 | return 0; |
563 | } | 568 | } |
564 | 569 | ||
565 | /* set / clear functions because cluster events can make these happen | 570 | /* set / clear functions because cluster events can make these happen |
566 | * in parallel so we want the transitions to be atomic. this also | 571 | * in parallel so we want the transitions to be atomic. this also |
567 | * means that any future flags osb_flags must be protected by spinlock | 572 | * means that any future flags osb_flags must be protected by spinlock |
568 | * too! */ | 573 | * too! */ |
569 | static inline void ocfs2_set_osb_flag(struct ocfs2_super *osb, | 574 | static inline void ocfs2_set_osb_flag(struct ocfs2_super *osb, |
570 | unsigned long flag) | 575 | unsigned long flag) |
571 | { | 576 | { |
572 | spin_lock(&osb->osb_lock); | 577 | spin_lock(&osb->osb_lock); |
573 | osb->osb_flags |= flag; | 578 | osb->osb_flags |= flag; |
574 | spin_unlock(&osb->osb_lock); | 579 | spin_unlock(&osb->osb_lock); |
575 | } | 580 | } |
576 | 581 | ||
577 | 582 | ||
578 | static inline unsigned long ocfs2_test_osb_flag(struct ocfs2_super *osb, | 583 | static inline unsigned long ocfs2_test_osb_flag(struct ocfs2_super *osb, |
579 | unsigned long flag) | 584 | unsigned long flag) |
580 | { | 585 | { |
581 | unsigned long ret; | 586 | unsigned long ret; |
582 | 587 | ||
583 | spin_lock(&osb->osb_lock); | 588 | spin_lock(&osb->osb_lock); |
584 | ret = osb->osb_flags & flag; | 589 | ret = osb->osb_flags & flag; |
585 | spin_unlock(&osb->osb_lock); | 590 | spin_unlock(&osb->osb_lock); |
586 | return ret; | 591 | return ret; |
587 | } | 592 | } |
588 | 593 | ||
589 | static inline void ocfs2_set_ro_flag(struct ocfs2_super *osb, | 594 | static inline void ocfs2_set_ro_flag(struct ocfs2_super *osb, |
590 | int hard) | 595 | int hard) |
591 | { | 596 | { |
592 | spin_lock(&osb->osb_lock); | 597 | spin_lock(&osb->osb_lock); |
593 | osb->osb_flags &= ~(OCFS2_OSB_SOFT_RO|OCFS2_OSB_HARD_RO); | 598 | osb->osb_flags &= ~(OCFS2_OSB_SOFT_RO|OCFS2_OSB_HARD_RO); |
594 | if (hard) | 599 | if (hard) |
595 | osb->osb_flags |= OCFS2_OSB_HARD_RO; | 600 | osb->osb_flags |= OCFS2_OSB_HARD_RO; |
596 | else | 601 | else |
597 | osb->osb_flags |= OCFS2_OSB_SOFT_RO; | 602 | osb->osb_flags |= OCFS2_OSB_SOFT_RO; |
598 | spin_unlock(&osb->osb_lock); | 603 | spin_unlock(&osb->osb_lock); |
599 | } | 604 | } |
600 | 605 | ||
601 | static inline int ocfs2_is_hard_readonly(struct ocfs2_super *osb) | 606 | static inline int ocfs2_is_hard_readonly(struct ocfs2_super *osb) |
602 | { | 607 | { |
603 | int ret; | 608 | int ret; |
604 | 609 | ||
605 | spin_lock(&osb->osb_lock); | 610 | spin_lock(&osb->osb_lock); |
606 | ret = osb->osb_flags & OCFS2_OSB_HARD_RO; | 611 | ret = osb->osb_flags & OCFS2_OSB_HARD_RO; |
607 | spin_unlock(&osb->osb_lock); | 612 | spin_unlock(&osb->osb_lock); |
608 | 613 | ||
609 | return ret; | 614 | return ret; |
610 | } | 615 | } |
611 | 616 | ||
612 | static inline int ocfs2_is_soft_readonly(struct ocfs2_super *osb) | 617 | static inline int ocfs2_is_soft_readonly(struct ocfs2_super *osb) |
613 | { | 618 | { |
614 | int ret; | 619 | int ret; |
615 | 620 | ||
616 | spin_lock(&osb->osb_lock); | 621 | spin_lock(&osb->osb_lock); |
617 | ret = osb->osb_flags & OCFS2_OSB_SOFT_RO; | 622 | ret = osb->osb_flags & OCFS2_OSB_SOFT_RO; |
618 | spin_unlock(&osb->osb_lock); | 623 | spin_unlock(&osb->osb_lock); |
619 | 624 | ||
620 | return ret; | 625 | return ret; |
621 | } | 626 | } |
622 | 627 | ||
623 | static inline int ocfs2_clusterinfo_valid(struct ocfs2_super *osb) | 628 | static inline int ocfs2_clusterinfo_valid(struct ocfs2_super *osb) |
624 | { | 629 | { |
625 | return (osb->s_feature_incompat & | 630 | return (osb->s_feature_incompat & |
626 | (OCFS2_FEATURE_INCOMPAT_USERSPACE_STACK | | 631 | (OCFS2_FEATURE_INCOMPAT_USERSPACE_STACK | |
627 | OCFS2_FEATURE_INCOMPAT_CLUSTERINFO)); | 632 | OCFS2_FEATURE_INCOMPAT_CLUSTERINFO)); |
628 | } | 633 | } |
629 | 634 | ||
630 | static inline int ocfs2_userspace_stack(struct ocfs2_super *osb) | 635 | static inline int ocfs2_userspace_stack(struct ocfs2_super *osb) |
631 | { | 636 | { |
632 | if (ocfs2_clusterinfo_valid(osb) && | 637 | if (ocfs2_clusterinfo_valid(osb) && |
633 | memcmp(osb->osb_cluster_stack, OCFS2_CLASSIC_CLUSTER_STACK, | 638 | memcmp(osb->osb_cluster_stack, OCFS2_CLASSIC_CLUSTER_STACK, |
634 | OCFS2_STACK_LABEL_LEN)) | 639 | OCFS2_STACK_LABEL_LEN)) |
635 | return 1; | 640 | return 1; |
636 | return 0; | 641 | return 0; |
637 | } | 642 | } |
638 | 643 | ||
639 | static inline int ocfs2_o2cb_stack(struct ocfs2_super *osb) | 644 | static inline int ocfs2_o2cb_stack(struct ocfs2_super *osb) |
640 | { | 645 | { |
641 | if (ocfs2_clusterinfo_valid(osb) && | 646 | if (ocfs2_clusterinfo_valid(osb) && |
642 | !memcmp(osb->osb_cluster_stack, OCFS2_CLASSIC_CLUSTER_STACK, | 647 | !memcmp(osb->osb_cluster_stack, OCFS2_CLASSIC_CLUSTER_STACK, |
643 | OCFS2_STACK_LABEL_LEN)) | 648 | OCFS2_STACK_LABEL_LEN)) |
644 | return 1; | 649 | return 1; |
645 | return 0; | 650 | return 0; |
646 | } | 651 | } |
647 | 652 | ||
648 | static inline int ocfs2_cluster_o2cb_global_heartbeat(struct ocfs2_super *osb) | 653 | static inline int ocfs2_cluster_o2cb_global_heartbeat(struct ocfs2_super *osb) |
649 | { | 654 | { |
650 | return ocfs2_o2cb_stack(osb) && | 655 | return ocfs2_o2cb_stack(osb) && |
651 | (osb->osb_stackflags & OCFS2_CLUSTER_O2CB_GLOBAL_HEARTBEAT); | 656 | (osb->osb_stackflags & OCFS2_CLUSTER_O2CB_GLOBAL_HEARTBEAT); |
652 | } | 657 | } |
653 | 658 | ||
654 | static inline int ocfs2_mount_local(struct ocfs2_super *osb) | 659 | static inline int ocfs2_mount_local(struct ocfs2_super *osb) |
655 | { | 660 | { |
656 | return (osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_LOCAL_MOUNT); | 661 | return (osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_LOCAL_MOUNT); |
657 | } | 662 | } |
658 | 663 | ||
659 | static inline int ocfs2_uses_extended_slot_map(struct ocfs2_super *osb) | 664 | static inline int ocfs2_uses_extended_slot_map(struct ocfs2_super *osb) |
660 | { | 665 | { |
661 | return (osb->s_feature_incompat & | 666 | return (osb->s_feature_incompat & |
662 | OCFS2_FEATURE_INCOMPAT_EXTENDED_SLOT_MAP); | 667 | OCFS2_FEATURE_INCOMPAT_EXTENDED_SLOT_MAP); |
663 | } | 668 | } |
664 | 669 | ||
665 | 670 | ||
666 | #define OCFS2_IS_VALID_DINODE(ptr) \ | 671 | #define OCFS2_IS_VALID_DINODE(ptr) \ |
667 | (!strcmp((ptr)->i_signature, OCFS2_INODE_SIGNATURE)) | 672 | (!strcmp((ptr)->i_signature, OCFS2_INODE_SIGNATURE)) |
668 | 673 | ||
669 | #define OCFS2_IS_VALID_EXTENT_BLOCK(ptr) \ | 674 | #define OCFS2_IS_VALID_EXTENT_BLOCK(ptr) \ |
670 | (!strcmp((ptr)->h_signature, OCFS2_EXTENT_BLOCK_SIGNATURE)) | 675 | (!strcmp((ptr)->h_signature, OCFS2_EXTENT_BLOCK_SIGNATURE)) |
671 | 676 | ||
672 | #define OCFS2_IS_VALID_GROUP_DESC(ptr) \ | 677 | #define OCFS2_IS_VALID_GROUP_DESC(ptr) \ |
673 | (!strcmp((ptr)->bg_signature, OCFS2_GROUP_DESC_SIGNATURE)) | 678 | (!strcmp((ptr)->bg_signature, OCFS2_GROUP_DESC_SIGNATURE)) |
674 | 679 | ||
675 | 680 | ||
676 | #define OCFS2_IS_VALID_XATTR_BLOCK(ptr) \ | 681 | #define OCFS2_IS_VALID_XATTR_BLOCK(ptr) \ |
677 | (!strcmp((ptr)->xb_signature, OCFS2_XATTR_BLOCK_SIGNATURE)) | 682 | (!strcmp((ptr)->xb_signature, OCFS2_XATTR_BLOCK_SIGNATURE)) |
678 | 683 | ||
679 | #define OCFS2_IS_VALID_DIR_TRAILER(ptr) \ | 684 | #define OCFS2_IS_VALID_DIR_TRAILER(ptr) \ |
680 | (!strcmp((ptr)->db_signature, OCFS2_DIR_TRAILER_SIGNATURE)) | 685 | (!strcmp((ptr)->db_signature, OCFS2_DIR_TRAILER_SIGNATURE)) |
681 | 686 | ||
682 | #define OCFS2_IS_VALID_DX_ROOT(ptr) \ | 687 | #define OCFS2_IS_VALID_DX_ROOT(ptr) \ |
683 | (!strcmp((ptr)->dr_signature, OCFS2_DX_ROOT_SIGNATURE)) | 688 | (!strcmp((ptr)->dr_signature, OCFS2_DX_ROOT_SIGNATURE)) |
684 | 689 | ||
685 | #define OCFS2_IS_VALID_DX_LEAF(ptr) \ | 690 | #define OCFS2_IS_VALID_DX_LEAF(ptr) \ |
686 | (!strcmp((ptr)->dl_signature, OCFS2_DX_LEAF_SIGNATURE)) | 691 | (!strcmp((ptr)->dl_signature, OCFS2_DX_LEAF_SIGNATURE)) |
687 | 692 | ||
688 | #define OCFS2_IS_VALID_REFCOUNT_BLOCK(ptr) \ | 693 | #define OCFS2_IS_VALID_REFCOUNT_BLOCK(ptr) \ |
689 | (!strcmp((ptr)->rf_signature, OCFS2_REFCOUNT_BLOCK_SIGNATURE)) | 694 | (!strcmp((ptr)->rf_signature, OCFS2_REFCOUNT_BLOCK_SIGNATURE)) |
690 | 695 | ||
691 | static inline unsigned long ino_from_blkno(struct super_block *sb, | 696 | static inline unsigned long ino_from_blkno(struct super_block *sb, |
692 | u64 blkno) | 697 | u64 blkno) |
693 | { | 698 | { |
694 | return (unsigned long)(blkno & (u64)ULONG_MAX); | 699 | return (unsigned long)(blkno & (u64)ULONG_MAX); |
695 | } | 700 | } |
696 | 701 | ||
697 | static inline u64 ocfs2_clusters_to_blocks(struct super_block *sb, | 702 | static inline u64 ocfs2_clusters_to_blocks(struct super_block *sb, |
698 | u32 clusters) | 703 | u32 clusters) |
699 | { | 704 | { |
700 | int c_to_b_bits = OCFS2_SB(sb)->s_clustersize_bits - | 705 | int c_to_b_bits = OCFS2_SB(sb)->s_clustersize_bits - |
701 | sb->s_blocksize_bits; | 706 | sb->s_blocksize_bits; |
702 | 707 | ||
703 | return (u64)clusters << c_to_b_bits; | 708 | return (u64)clusters << c_to_b_bits; |
704 | } | 709 | } |
705 | 710 | ||
706 | static inline u32 ocfs2_blocks_to_clusters(struct super_block *sb, | 711 | static inline u32 ocfs2_blocks_to_clusters(struct super_block *sb, |
707 | u64 blocks) | 712 | u64 blocks) |
708 | { | 713 | { |
709 | int b_to_c_bits = OCFS2_SB(sb)->s_clustersize_bits - | 714 | int b_to_c_bits = OCFS2_SB(sb)->s_clustersize_bits - |
710 | sb->s_blocksize_bits; | 715 | sb->s_blocksize_bits; |
711 | 716 | ||
712 | return (u32)(blocks >> b_to_c_bits); | 717 | return (u32)(blocks >> b_to_c_bits); |
713 | } | 718 | } |
714 | 719 | ||
715 | static inline unsigned int ocfs2_clusters_for_bytes(struct super_block *sb, | 720 | static inline unsigned int ocfs2_clusters_for_bytes(struct super_block *sb, |
716 | u64 bytes) | 721 | u64 bytes) |
717 | { | 722 | { |
718 | int cl_bits = OCFS2_SB(sb)->s_clustersize_bits; | 723 | int cl_bits = OCFS2_SB(sb)->s_clustersize_bits; |
719 | unsigned int clusters; | 724 | unsigned int clusters; |
720 | 725 | ||
721 | bytes += OCFS2_SB(sb)->s_clustersize - 1; | 726 | bytes += OCFS2_SB(sb)->s_clustersize - 1; |
722 | /* OCFS2 just cannot have enough clusters to overflow this */ | 727 | /* OCFS2 just cannot have enough clusters to overflow this */ |
723 | clusters = (unsigned int)(bytes >> cl_bits); | 728 | clusters = (unsigned int)(bytes >> cl_bits); |
724 | 729 | ||
725 | return clusters; | 730 | return clusters; |
726 | } | 731 | } |
727 | 732 | ||
728 | static inline u64 ocfs2_blocks_for_bytes(struct super_block *sb, | 733 | static inline u64 ocfs2_blocks_for_bytes(struct super_block *sb, |
729 | u64 bytes) | 734 | u64 bytes) |
730 | { | 735 | { |
731 | bytes += sb->s_blocksize - 1; | 736 | bytes += sb->s_blocksize - 1; |
732 | return bytes >> sb->s_blocksize_bits; | 737 | return bytes >> sb->s_blocksize_bits; |
733 | } | 738 | } |
734 | 739 | ||
735 | static inline u64 ocfs2_clusters_to_bytes(struct super_block *sb, | 740 | static inline u64 ocfs2_clusters_to_bytes(struct super_block *sb, |
736 | u32 clusters) | 741 | u32 clusters) |
737 | { | 742 | { |
738 | return (u64)clusters << OCFS2_SB(sb)->s_clustersize_bits; | 743 | return (u64)clusters << OCFS2_SB(sb)->s_clustersize_bits; |
739 | } | 744 | } |
740 | 745 | ||
741 | static inline u64 ocfs2_block_to_cluster_start(struct super_block *sb, | 746 | static inline u64 ocfs2_block_to_cluster_start(struct super_block *sb, |
742 | u64 blocks) | 747 | u64 blocks) |
743 | { | 748 | { |
744 | int bits = OCFS2_SB(sb)->s_clustersize_bits - sb->s_blocksize_bits; | 749 | int bits = OCFS2_SB(sb)->s_clustersize_bits - sb->s_blocksize_bits; |
745 | unsigned int clusters; | 750 | unsigned int clusters; |
746 | 751 | ||
747 | clusters = ocfs2_blocks_to_clusters(sb, blocks); | 752 | clusters = ocfs2_blocks_to_clusters(sb, blocks); |
748 | return (u64)clusters << bits; | 753 | return (u64)clusters << bits; |
749 | } | 754 | } |
750 | 755 | ||
751 | static inline u64 ocfs2_align_bytes_to_clusters(struct super_block *sb, | 756 | static inline u64 ocfs2_align_bytes_to_clusters(struct super_block *sb, |
752 | u64 bytes) | 757 | u64 bytes) |
753 | { | 758 | { |
754 | int cl_bits = OCFS2_SB(sb)->s_clustersize_bits; | 759 | int cl_bits = OCFS2_SB(sb)->s_clustersize_bits; |
755 | unsigned int clusters; | 760 | unsigned int clusters; |
756 | 761 | ||
757 | clusters = ocfs2_clusters_for_bytes(sb, bytes); | 762 | clusters = ocfs2_clusters_for_bytes(sb, bytes); |
758 | return (u64)clusters << cl_bits; | 763 | return (u64)clusters << cl_bits; |
759 | } | 764 | } |
760 | 765 | ||
761 | static inline u64 ocfs2_align_bytes_to_blocks(struct super_block *sb, | 766 | static inline u64 ocfs2_align_bytes_to_blocks(struct super_block *sb, |
762 | u64 bytes) | 767 | u64 bytes) |
763 | { | 768 | { |
764 | u64 blocks; | 769 | u64 blocks; |
765 | 770 | ||
766 | blocks = ocfs2_blocks_for_bytes(sb, bytes); | 771 | blocks = ocfs2_blocks_for_bytes(sb, bytes); |
767 | return blocks << sb->s_blocksize_bits; | 772 | return blocks << sb->s_blocksize_bits; |
768 | } | 773 | } |
769 | 774 | ||
770 | static inline unsigned long ocfs2_align_bytes_to_sectors(u64 bytes) | 775 | static inline unsigned long ocfs2_align_bytes_to_sectors(u64 bytes) |
771 | { | 776 | { |
772 | return (unsigned long)((bytes + 511) >> 9); | 777 | return (unsigned long)((bytes + 511) >> 9); |
773 | } | 778 | } |
774 | 779 | ||
775 | static inline unsigned int ocfs2_page_index_to_clusters(struct super_block *sb, | 780 | static inline unsigned int ocfs2_page_index_to_clusters(struct super_block *sb, |
776 | unsigned long pg_index) | 781 | unsigned long pg_index) |
777 | { | 782 | { |
778 | u32 clusters = pg_index; | 783 | u32 clusters = pg_index; |
779 | unsigned int cbits = OCFS2_SB(sb)->s_clustersize_bits; | 784 | unsigned int cbits = OCFS2_SB(sb)->s_clustersize_bits; |
780 | 785 | ||
781 | if (unlikely(PAGE_CACHE_SHIFT > cbits)) | 786 | if (unlikely(PAGE_CACHE_SHIFT > cbits)) |
782 | clusters = pg_index << (PAGE_CACHE_SHIFT - cbits); | 787 | clusters = pg_index << (PAGE_CACHE_SHIFT - cbits); |
783 | else if (PAGE_CACHE_SHIFT < cbits) | 788 | else if (PAGE_CACHE_SHIFT < cbits) |
784 | clusters = pg_index >> (cbits - PAGE_CACHE_SHIFT); | 789 | clusters = pg_index >> (cbits - PAGE_CACHE_SHIFT); |
785 | 790 | ||
786 | return clusters; | 791 | return clusters; |
787 | } | 792 | } |
788 | 793 | ||
789 | /* | 794 | /* |
790 | * Find the 1st page index which covers the given clusters. | 795 | * Find the 1st page index which covers the given clusters. |
791 | */ | 796 | */ |
792 | static inline pgoff_t ocfs2_align_clusters_to_page_index(struct super_block *sb, | 797 | static inline pgoff_t ocfs2_align_clusters_to_page_index(struct super_block *sb, |
793 | u32 clusters) | 798 | u32 clusters) |
794 | { | 799 | { |
795 | unsigned int cbits = OCFS2_SB(sb)->s_clustersize_bits; | 800 | unsigned int cbits = OCFS2_SB(sb)->s_clustersize_bits; |
796 | pgoff_t index = clusters; | 801 | pgoff_t index = clusters; |
797 | 802 | ||
798 | if (PAGE_CACHE_SHIFT > cbits) { | 803 | if (PAGE_CACHE_SHIFT > cbits) { |
799 | index = (pgoff_t)clusters >> (PAGE_CACHE_SHIFT - cbits); | 804 | index = (pgoff_t)clusters >> (PAGE_CACHE_SHIFT - cbits); |
800 | } else if (PAGE_CACHE_SHIFT < cbits) { | 805 | } else if (PAGE_CACHE_SHIFT < cbits) { |
801 | index = (pgoff_t)clusters << (cbits - PAGE_CACHE_SHIFT); | 806 | index = (pgoff_t)clusters << (cbits - PAGE_CACHE_SHIFT); |
802 | } | 807 | } |
803 | 808 | ||
804 | return index; | 809 | return index; |
805 | } | 810 | } |
806 | 811 | ||
807 | static inline unsigned int ocfs2_pages_per_cluster(struct super_block *sb) | 812 | static inline unsigned int ocfs2_pages_per_cluster(struct super_block *sb) |
808 | { | 813 | { |
809 | unsigned int cbits = OCFS2_SB(sb)->s_clustersize_bits; | 814 | unsigned int cbits = OCFS2_SB(sb)->s_clustersize_bits; |
810 | unsigned int pages_per_cluster = 1; | 815 | unsigned int pages_per_cluster = 1; |
811 | 816 | ||
812 | if (PAGE_CACHE_SHIFT < cbits) | 817 | if (PAGE_CACHE_SHIFT < cbits) |
813 | pages_per_cluster = 1 << (cbits - PAGE_CACHE_SHIFT); | 818 | pages_per_cluster = 1 << (cbits - PAGE_CACHE_SHIFT); |
814 | 819 | ||
815 | return pages_per_cluster; | 820 | return pages_per_cluster; |
816 | } | 821 | } |
817 | 822 | ||
818 | static inline unsigned int ocfs2_megabytes_to_clusters(struct super_block *sb, | 823 | static inline unsigned int ocfs2_megabytes_to_clusters(struct super_block *sb, |
819 | unsigned int megs) | 824 | unsigned int megs) |
820 | { | 825 | { |
821 | BUILD_BUG_ON(OCFS2_MAX_CLUSTERSIZE > 1048576); | 826 | BUILD_BUG_ON(OCFS2_MAX_CLUSTERSIZE > 1048576); |
822 | 827 | ||
823 | return megs << (20 - OCFS2_SB(sb)->s_clustersize_bits); | 828 | return megs << (20 - OCFS2_SB(sb)->s_clustersize_bits); |
824 | } | 829 | } |
825 | 830 | ||
826 | static inline unsigned int ocfs2_clusters_to_megabytes(struct super_block *sb, | 831 | static inline unsigned int ocfs2_clusters_to_megabytes(struct super_block *sb, |
827 | unsigned int clusters) | 832 | unsigned int clusters) |
828 | { | 833 | { |
829 | return clusters >> (20 - OCFS2_SB(sb)->s_clustersize_bits); | 834 | return clusters >> (20 - OCFS2_SB(sb)->s_clustersize_bits); |
830 | } | 835 | } |
831 | 836 | ||
832 | static inline void _ocfs2_set_bit(unsigned int bit, unsigned long *bitmap) | 837 | static inline void _ocfs2_set_bit(unsigned int bit, unsigned long *bitmap) |
833 | { | 838 | { |
834 | ext2_set_bit(bit, bitmap); | 839 | ext2_set_bit(bit, bitmap); |
835 | } | 840 | } |
836 | #define ocfs2_set_bit(bit, addr) _ocfs2_set_bit((bit), (unsigned long *)(addr)) | 841 | #define ocfs2_set_bit(bit, addr) _ocfs2_set_bit((bit), (unsigned long *)(addr)) |
837 | 842 | ||
838 | static inline void _ocfs2_clear_bit(unsigned int bit, unsigned long *bitmap) | 843 | static inline void _ocfs2_clear_bit(unsigned int bit, unsigned long *bitmap) |
839 | { | 844 | { |
840 | ext2_clear_bit(bit, bitmap); | 845 | ext2_clear_bit(bit, bitmap); |
841 | } | 846 | } |
842 | #define ocfs2_clear_bit(bit, addr) _ocfs2_clear_bit((bit), (unsigned long *)(addr)) | 847 | #define ocfs2_clear_bit(bit, addr) _ocfs2_clear_bit((bit), (unsigned long *)(addr)) |
843 | 848 |