Doug / smarc-fsl-linux-kernel

1

/* -*- mode: c; c-basic-offset: 8; -*-

1

/* -*- mode: c; c-basic-offset: 8; -*-

2

* vim: noexpandtab sw=8 ts=8 sts=0:

2

* vim: noexpandtab sw=8 ts=8 sts=0:

3

*

3

*

4

* dlmglue.c

4

* dlmglue.c

5

*

5

*

6

* Code which implements an OCFS2 specific interface to our DLM.

6

* Code which implements an OCFS2 specific interface to our DLM.

7

*

7

*

8

9

*

9

*

10

* This program is free software; you can redistribute it and/or

10

* This program is free software; you can redistribute it and/or

11

* modify it under the terms of the GNU General Public

11

* modify it under the terms of the GNU General Public

12

* License as published by the Free Software Foundation; either

12

* License as published by the Free Software Foundation; either

13

* version 2 of the License, or (at your option) any later version.

13

* version 2 of the License, or (at your option) any later version.

14

*

14

*

15

* This program is distributed in the hope that it will be useful,

15

* This program is distributed in the hope that it will be useful,

16

* but WITHOUT ANY WARRANTY; without even the implied warranty of

16

* but WITHOUT ANY WARRANTY; without even the implied warranty of

17

* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU

17

* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU

18

* General Public License for more details.

18

* General Public License for more details.

19

*

19

*

20

* You should have received a copy of the GNU General Public

20

* You should have received a copy of the GNU General Public

21

* License along with this program; if not, write to the

21

* License along with this program; if not, write to the

22

* Free Software Foundation, Inc., 59 Temple Place - Suite 330,

22

* Free Software Foundation, Inc., 59 Temple Place - Suite 330,

23

* Boston, MA 021110-1307, USA.

23

* Boston, MA 021110-1307, USA.

24

*/

24

*/

25

26

#include <linux/types.h>

26

#include <linux/types.h>

27

#include <linux/slab.h>

27

#include <linux/slab.h>

28

#include <linux/highmem.h>

28

#include <linux/highmem.h>

29

#include <linux/mm.h>

29

#include <linux/mm.h>

30

#include <linux/kthread.h>

30

#include <linux/kthread.h>

31

#include <linux/pagemap.h>

31

#include <linux/pagemap.h>

32

#include <linux/debugfs.h>

32

#include <linux/debugfs.h>

33

#include <linux/seq_file.h>

33

#include <linux/seq_file.h>

34

#include <linux/time.h>

34

#include <linux/time.h>

35

#include <linux/quotaops.h>

35

#include <linux/quotaops.h>

36

37

#define MLOG_MASK_PREFIX ML_DLM_GLUE

37

#define MLOG_MASK_PREFIX ML_DLM_GLUE

38

#include <cluster/masklog.h>

38

#include <cluster/masklog.h>

39

40

#include "ocfs2.h"

40

#include "ocfs2.h"

41

#include "ocfs2_lockingver.h"

41

#include "ocfs2_lockingver.h"

42

43

#include "alloc.h"

43

#include "alloc.h"

44

#include "dcache.h"

44

#include "dcache.h"

45

#include "dlmglue.h"

45

#include "dlmglue.h"

46

#include "extent_map.h"

46

#include "extent_map.h"

47

#include "file.h"

47

#include "file.h"

48

#include "heartbeat.h"

48

#include "heartbeat.h"

49

#include "inode.h"

49

#include "inode.h"

50

#include "journal.h"

50

#include "journal.h"

51

#include "stackglue.h"

51

#include "stackglue.h"

52

#include "slot_map.h"

52

#include "slot_map.h"

53

#include "super.h"

53

#include "super.h"

54

#include "uptodate.h"

54

#include "uptodate.h"

55

#include "quota.h"

55

#include "quota.h"

56

57

#include "buffer_head_io.h"

57

#include "buffer_head_io.h"

58

59

struct ocfs2_mask_waiter {

59

struct ocfs2_mask_waiter {

60

struct list_head mw_item;

60

struct list_head mw_item;

61

int mw_status;

61

int mw_status;

62

struct completion mw_complete;

62

struct completion mw_complete;

63

unsigned long mw_mask;

63

unsigned long mw_mask;

64

unsigned long mw_goal;

64

unsigned long mw_goal;

65

#ifdef CONFIG_OCFS2_FS_STATS

65

#ifdef CONFIG_OCFS2_FS_STATS

66

unsigned long long mw_lock_start;

66

unsigned long long mw_lock_start;

67

#endif

67

#endif

68

};

68

};

69

70

static struct ocfs2_super *ocfs2_get_dentry_osb(struct ocfs2_lock_res *lockres);

70

static struct ocfs2_super *ocfs2_get_dentry_osb(struct ocfs2_lock_res *lockres);

71

static struct ocfs2_super *ocfs2_get_inode_osb(struct ocfs2_lock_res *lockres);

71

static struct ocfs2_super *ocfs2_get_inode_osb(struct ocfs2_lock_res *lockres);

72

static struct ocfs2_super *ocfs2_get_file_osb(struct ocfs2_lock_res *lockres);

72

static struct ocfs2_super *ocfs2_get_file_osb(struct ocfs2_lock_res *lockres);

73

static struct ocfs2_super *ocfs2_get_qinfo_osb(struct ocfs2_lock_res *lockres);

73

static struct ocfs2_super *ocfs2_get_qinfo_osb(struct ocfs2_lock_res *lockres);

74

75

/*

75

/*

76

* Return value from ->downconvert_worker functions.

76

* Return value from ->downconvert_worker functions.

77

*

77

*

78

* These control the precise actions of ocfs2_unblock_lock()

78

* These control the precise actions of ocfs2_unblock_lock()

79

* and ocfs2_process_blocked_lock()

79

* and ocfs2_process_blocked_lock()

80

*

80

*

81

*/

81

*/

82

enum ocfs2_unblock_action {

82

enum ocfs2_unblock_action {

83

UNBLOCK_CONTINUE = 0, /* Continue downconvert */

83

UNBLOCK_CONTINUE = 0, /* Continue downconvert */

84

UNBLOCK_CONTINUE_POST = 1, /* Continue downconvert, fire

84

UNBLOCK_CONTINUE_POST = 1, /* Continue downconvert, fire

85

* ->post_unlock callback */

85

* ->post_unlock callback */

86

UNBLOCK_STOP_POST = 2, /* Do not downconvert, fire

86

UNBLOCK_STOP_POST = 2, /* Do not downconvert, fire

87

* ->post_unlock() callback. */

87

* ->post_unlock() callback. */

88

};

88

};

89

90

struct ocfs2_unblock_ctl {

90

struct ocfs2_unblock_ctl {

91

int requeue;

91

int requeue;

92

enum ocfs2_unblock_action unblock_action;

92

enum ocfs2_unblock_action unblock_action;

93

};

93

};

94

95

static int ocfs2_check_meta_downconvert(struct ocfs2_lock_res *lockres,

95

static int ocfs2_check_meta_downconvert(struct ocfs2_lock_res *lockres,

96

int new_level);

96

int new_level);

97

static void ocfs2_set_meta_lvb(struct ocfs2_lock_res *lockres);

97

static void ocfs2_set_meta_lvb(struct ocfs2_lock_res *lockres);

98

99

static int ocfs2_data_convert_worker(struct ocfs2_lock_res *lockres,

99

static int ocfs2_data_convert_worker(struct ocfs2_lock_res *lockres,

100

int blocking);

100

int blocking);

101

102

static int ocfs2_dentry_convert_worker(struct ocfs2_lock_res *lockres,

102

static int ocfs2_dentry_convert_worker(struct ocfs2_lock_res *lockres,

103

int blocking);

103

int blocking);

104

105

static void ocfs2_dentry_post_unlock(struct ocfs2_super *osb,

105

static void ocfs2_dentry_post_unlock(struct ocfs2_super *osb,

106

struct ocfs2_lock_res *lockres);

106

struct ocfs2_lock_res *lockres);

107

108

static void ocfs2_set_qinfo_lvb(struct ocfs2_lock_res *lockres);

108

static void ocfs2_set_qinfo_lvb(struct ocfs2_lock_res *lockres);

109

110

#define mlog_meta_lvb(__level, __lockres) ocfs2_dump_meta_lvb_info(__level, __PRETTY_FUNCTION__, __LINE__, __lockres)

110

#define mlog_meta_lvb(__level, __lockres) ocfs2_dump_meta_lvb_info(__level, __PRETTY_FUNCTION__, __LINE__, __lockres)

111

112

/* This aids in debugging situations where a bad LVB might be involved. */

112

/* This aids in debugging situations where a bad LVB might be involved. */

113

static void ocfs2_dump_meta_lvb_info(u64 level,

113

static void ocfs2_dump_meta_lvb_info(u64 level,

114

const char *function,

114

const char *function,

115

unsigned int line,

115

unsigned int line,

116

struct ocfs2_lock_res *lockres)

116

struct ocfs2_lock_res *lockres)

117

{

117

{

118

struct ocfs2_meta_lvb *lvb = ocfs2_dlm_lvb(&lockres->l_lksb);

118

struct ocfs2_meta_lvb *lvb = ocfs2_dlm_lvb(&lockres->l_lksb);

119

120

mlog(level, "LVB information for %s (called from %s:%u):\n",

120

mlog(level, "LVB information for %s (called from %s:%u):\n",

121

lockres->l_name, function, line);

121

lockres->l_name, function, line);

122

mlog(level, "version: %u, clusters: %u, generation: 0x%x\n",

122

mlog(level, "version: %u, clusters: %u, generation: 0x%x\n",

123

lvb->lvb_version, be32_to_cpu(lvb->lvb_iclusters),

123

lvb->lvb_version, be32_to_cpu(lvb->lvb_iclusters),

124

be32_to_cpu(lvb->lvb_igeneration));

124

be32_to_cpu(lvb->lvb_igeneration));

125

mlog(level, "size: %llu, uid %u, gid %u, mode 0x%x\n",

125

mlog(level, "size: %llu, uid %u, gid %u, mode 0x%x\n",

126

(unsigned long long)be64_to_cpu(lvb->lvb_isize),

126

(unsigned long long)be64_to_cpu(lvb->lvb_isize),

127

be32_to_cpu(lvb->lvb_iuid), be32_to_cpu(lvb->lvb_igid),

127

be32_to_cpu(lvb->lvb_iuid), be32_to_cpu(lvb->lvb_igid),

128

be16_to_cpu(lvb->lvb_imode));

128

be16_to_cpu(lvb->lvb_imode));

129

mlog(level, "nlink %u, atime_packed 0x%llx, ctime_packed 0x%llx, "

129

mlog(level, "nlink %u, atime_packed 0x%llx, ctime_packed 0x%llx, "

130

"mtime_packed 0x%llx iattr 0x%x\n", be16_to_cpu(lvb->lvb_inlink),

130

"mtime_packed 0x%llx iattr 0x%x\n", be16_to_cpu(lvb->lvb_inlink),

131

(long long)be64_to_cpu(lvb->lvb_iatime_packed),

131

(long long)be64_to_cpu(lvb->lvb_iatime_packed),

132

(long long)be64_to_cpu(lvb->lvb_ictime_packed),

132

(long long)be64_to_cpu(lvb->lvb_ictime_packed),

133

(long long)be64_to_cpu(lvb->lvb_imtime_packed),

133

(long long)be64_to_cpu(lvb->lvb_imtime_packed),

134

be32_to_cpu(lvb->lvb_iattr));

134

be32_to_cpu(lvb->lvb_iattr));

135

}

135

}

136

137

138

/*

138

/*

139

* OCFS2 Lock Resource Operations

139

* OCFS2 Lock Resource Operations

140

*

140

*

141

* These fine tune the behavior of the generic dlmglue locking infrastructure.

141

* These fine tune the behavior of the generic dlmglue locking infrastructure.

142

*

142

*

143

* The most basic of lock types can point ->l_priv to their respective

143

* The most basic of lock types can point ->l_priv to their respective

144

* struct ocfs2_super and allow the default actions to manage things.

144

* struct ocfs2_super and allow the default actions to manage things.

145

*

145

*

146

* Right now, each lock type also needs to implement an init function,

146

* Right now, each lock type also needs to implement an init function,

147

* and trivial lock/unlock wrappers. ocfs2_simple_drop_lockres()

147

* and trivial lock/unlock wrappers. ocfs2_simple_drop_lockres()

148

* should be called when the lock is no longer needed (i.e., object

148

* should be called when the lock is no longer needed (i.e., object

149

* destruction time).

149

* destruction time).

150

*/

150

*/

151

struct ocfs2_lock_res_ops {

151

struct ocfs2_lock_res_ops {

152

/*

152

/*

153

* Translate an ocfs2_lock_res * into an ocfs2_super *. Define

153

* Translate an ocfs2_lock_res * into an ocfs2_super *. Define

154

* this callback if ->l_priv is not an ocfs2_super pointer

154

* this callback if ->l_priv is not an ocfs2_super pointer

155

*/

155

*/

156

struct ocfs2_super * (*get_osb)(struct ocfs2_lock_res *);

156

struct ocfs2_super * (*get_osb)(struct ocfs2_lock_res *);

157

158

/*

158

/*

159

* Optionally called in the downconvert thread after a

159

* Optionally called in the downconvert thread after a

160

* successful downconvert. The lockres will not be referenced

160

* successful downconvert. The lockres will not be referenced

161

* after this callback is called, so it is safe to free

161

* after this callback is called, so it is safe to free

162

* memory, etc.

162

* memory, etc.

163

*

163

*

164

* The exact semantics of when this is called are controlled

164

* The exact semantics of when this is called are controlled

165

* by ->downconvert_worker()

165

* by ->downconvert_worker()

166

*/

166

*/

167

void (*post_unlock)(struct ocfs2_super *, struct ocfs2_lock_res *);

167

void (*post_unlock)(struct ocfs2_super *, struct ocfs2_lock_res *);

168

169

/*

169

/*

170

* Allow a lock type to add checks to determine whether it is

170

* Allow a lock type to add checks to determine whether it is

171

* safe to downconvert a lock. Return 0 to re-queue the

171

* safe to downconvert a lock. Return 0 to re-queue the

172

* downconvert at a later time, nonzero to continue.

172

* downconvert at a later time, nonzero to continue.

173

*

173

*

174

* For most locks, the default checks that there are no

174

* For most locks, the default checks that there are no

175

* incompatible holders are sufficient.

175

* incompatible holders are sufficient.

176

*

176

*

177

* Called with the lockres spinlock held.

177

* Called with the lockres spinlock held.

178

*/

178

*/

179

int (*check_downconvert)(struct ocfs2_lock_res *, int);

179

int (*check_downconvert)(struct ocfs2_lock_res *, int);

180

181

/*

181

/*

182

* Allows a lock type to populate the lock value block. This

182

* Allows a lock type to populate the lock value block. This

183

* is called on downconvert, and when we drop a lock.

183

* is called on downconvert, and when we drop a lock.

184

*

184

*

185

* Locks that want to use this should set LOCK_TYPE_USES_LVB

185

* Locks that want to use this should set LOCK_TYPE_USES_LVB

186

* in the flags field.

186

* in the flags field.

187

*

187

*

188

* Called with the lockres spinlock held.

188

* Called with the lockres spinlock held.

189

*/

189

*/

190

void (*set_lvb)(struct ocfs2_lock_res *);

190

void (*set_lvb)(struct ocfs2_lock_res *);

191

192

/*

192

/*

193

* Called from the downconvert thread when it is determined

193

* Called from the downconvert thread when it is determined

194

* that a lock will be downconverted. This is called without

194

* that a lock will be downconverted. This is called without

195

* any locks held so the function can do work that might

195

* any locks held so the function can do work that might

196

* schedule (syncing out data, etc).

196

* schedule (syncing out data, etc).

197

*

197

*

198

* This should return any one of the ocfs2_unblock_action

198

* This should return any one of the ocfs2_unblock_action

199

* values, depending on what it wants the thread to do.

199

* values, depending on what it wants the thread to do.

200

*/

200

*/

201

int (*downconvert_worker)(struct ocfs2_lock_res *, int);

201

int (*downconvert_worker)(struct ocfs2_lock_res *, int);

202

203

/*

203

/*

204

* LOCK_TYPE_* flags which describe the specific requirements

204

* LOCK_TYPE_* flags which describe the specific requirements

205

* of a lock type. Descriptions of each individual flag follow.

205

* of a lock type. Descriptions of each individual flag follow.

206

*/

206

*/

207

int flags;

207

int flags;

208

};

208

};

209

210

/*

210

/*

211

* Some locks want to "refresh" potentially stale data when a

211

* Some locks want to "refresh" potentially stale data when a

212

* meaningful (PRMODE or EXMODE) lock level is first obtained. If this

212

* meaningful (PRMODE or EXMODE) lock level is first obtained. If this

213

* flag is set, the OCFS2_LOCK_NEEDS_REFRESH flag will be set on the

213

* flag is set, the OCFS2_LOCK_NEEDS_REFRESH flag will be set on the

214

* individual lockres l_flags member from the ast function. It is

214

* individual lockres l_flags member from the ast function. It is

215

* expected that the locking wrapper will clear the

215

* expected that the locking wrapper will clear the

216

* OCFS2_LOCK_NEEDS_REFRESH flag when done.

216

* OCFS2_LOCK_NEEDS_REFRESH flag when done.

217

*/

217

*/

218

#define LOCK_TYPE_REQUIRES_REFRESH 0x1

218

#define LOCK_TYPE_REQUIRES_REFRESH 0x1

219

220

/*

220

/*

221

* Indicate that a lock type makes use of the lock value block. The

221

* Indicate that a lock type makes use of the lock value block. The

222

* ->set_lvb lock type callback must be defined.

222

* ->set_lvb lock type callback must be defined.

223

*/

223

*/

224

#define LOCK_TYPE_USES_LVB 0x2

224

#define LOCK_TYPE_USES_LVB 0x2

225

226

static struct ocfs2_lock_res_ops ocfs2_inode_rw_lops = {

226

static struct ocfs2_lock_res_ops ocfs2_inode_rw_lops = {

227

.get_osb = ocfs2_get_inode_osb,

227

.get_osb = ocfs2_get_inode_osb,

228

.flags = 0,

228

.flags = 0,

229

};

229

};

230

231

static struct ocfs2_lock_res_ops ocfs2_inode_inode_lops = {

231

static struct ocfs2_lock_res_ops ocfs2_inode_inode_lops = {

232

.get_osb = ocfs2_get_inode_osb,

232

.get_osb = ocfs2_get_inode_osb,

233

.check_downconvert = ocfs2_check_meta_downconvert,

233

.check_downconvert = ocfs2_check_meta_downconvert,

234

.set_lvb = ocfs2_set_meta_lvb,

234

.set_lvb = ocfs2_set_meta_lvb,

235

.downconvert_worker = ocfs2_data_convert_worker,

235

.downconvert_worker = ocfs2_data_convert_worker,

236

.flags = LOCK_TYPE_REQUIRES_REFRESH|LOCK_TYPE_USES_LVB,

236

.flags = LOCK_TYPE_REQUIRES_REFRESH|LOCK_TYPE_USES_LVB,

237

};

237

};

238

239

static struct ocfs2_lock_res_ops ocfs2_super_lops = {

239

static struct ocfs2_lock_res_ops ocfs2_super_lops = {

240

.flags = LOCK_TYPE_REQUIRES_REFRESH,

240

.flags = LOCK_TYPE_REQUIRES_REFRESH,

241

};

241

};

242

243

static struct ocfs2_lock_res_ops ocfs2_rename_lops = {

243

static struct ocfs2_lock_res_ops ocfs2_rename_lops = {

244

.flags = 0,

244

.flags = 0,

245

};

245

};

246

247

static struct ocfs2_lock_res_ops ocfs2_nfs_sync_lops = {

247

static struct ocfs2_lock_res_ops ocfs2_nfs_sync_lops = {

248

.flags = 0,

248

.flags = 0,

249

};

249

};

250

251

static struct ocfs2_lock_res_ops ocfs2_orphan_scan_lops = {

251

static struct ocfs2_lock_res_ops ocfs2_orphan_scan_lops = {

252

.flags = LOCK_TYPE_REQUIRES_REFRESH|LOCK_TYPE_USES_LVB,

252

.flags = LOCK_TYPE_REQUIRES_REFRESH|LOCK_TYPE_USES_LVB,

253

};

253

};

254

255

static struct ocfs2_lock_res_ops ocfs2_dentry_lops = {

255

static struct ocfs2_lock_res_ops ocfs2_dentry_lops = {

256

.get_osb = ocfs2_get_dentry_osb,

256

.get_osb = ocfs2_get_dentry_osb,

257

.post_unlock = ocfs2_dentry_post_unlock,

257

.post_unlock = ocfs2_dentry_post_unlock,

258

.downconvert_worker = ocfs2_dentry_convert_worker,

258

.downconvert_worker = ocfs2_dentry_convert_worker,

259

.flags = 0,

259

.flags = 0,

260

};

260

};

261

262

static struct ocfs2_lock_res_ops ocfs2_inode_open_lops = {

262

static struct ocfs2_lock_res_ops ocfs2_inode_open_lops = {

263

.get_osb = ocfs2_get_inode_osb,

263

.get_osb = ocfs2_get_inode_osb,

264

.flags = 0,

264

.flags = 0,

265

};

265

};

266

267

static struct ocfs2_lock_res_ops ocfs2_flock_lops = {

267

static struct ocfs2_lock_res_ops ocfs2_flock_lops = {

268

.get_osb = ocfs2_get_file_osb,

268

.get_osb = ocfs2_get_file_osb,

269

.flags = 0,

269

.flags = 0,

270

};

270

};

271

272

static struct ocfs2_lock_res_ops ocfs2_qinfo_lops = {

272

static struct ocfs2_lock_res_ops ocfs2_qinfo_lops = {

273

.set_lvb = ocfs2_set_qinfo_lvb,

273

.set_lvb = ocfs2_set_qinfo_lvb,

274

.get_osb = ocfs2_get_qinfo_osb,

274

.get_osb = ocfs2_get_qinfo_osb,

275

.flags = LOCK_TYPE_REQUIRES_REFRESH | LOCK_TYPE_USES_LVB,

275

.flags = LOCK_TYPE_REQUIRES_REFRESH | LOCK_TYPE_USES_LVB,

276

};

276

};

277

278

static inline int ocfs2_is_inode_lock(struct ocfs2_lock_res *lockres)

278

static inline int ocfs2_is_inode_lock(struct ocfs2_lock_res *lockres)

279

{

279

{

280

return lockres->l_type == OCFS2_LOCK_TYPE_META ||

280

return lockres->l_type == OCFS2_LOCK_TYPE_META ||

281

lockres->l_type == OCFS2_LOCK_TYPE_RW ||

281

lockres->l_type == OCFS2_LOCK_TYPE_RW ||

282

lockres->l_type == OCFS2_LOCK_TYPE_OPEN;

282

lockres->l_type == OCFS2_LOCK_TYPE_OPEN;

283

}

283

}

284

285

static inline struct inode *ocfs2_lock_res_inode(struct ocfs2_lock_res *lockres)

285

static inline struct inode *ocfs2_lock_res_inode(struct ocfs2_lock_res *lockres)

286

{

286

{

287

BUG_ON(!ocfs2_is_inode_lock(lockres));

287

BUG_ON(!ocfs2_is_inode_lock(lockres));

288

289

return (struct inode *) lockres->l_priv;

289

return (struct inode *) lockres->l_priv;

290

}

290

}

291

292

static inline struct ocfs2_dentry_lock *ocfs2_lock_res_dl(struct ocfs2_lock_res *lockres)

292

static inline struct ocfs2_dentry_lock *ocfs2_lock_res_dl(struct ocfs2_lock_res *lockres)

293

{

293

{

294

BUG_ON(lockres->l_type != OCFS2_LOCK_TYPE_DENTRY);

294

BUG_ON(lockres->l_type != OCFS2_LOCK_TYPE_DENTRY);

295

296

return (struct ocfs2_dentry_lock *)lockres->l_priv;

296

return (struct ocfs2_dentry_lock *)lockres->l_priv;

297

}

297

}

298

299

static inline struct ocfs2_mem_dqinfo *ocfs2_lock_res_qinfo(struct ocfs2_lock_res *lockres)

299

static inline struct ocfs2_mem_dqinfo *ocfs2_lock_res_qinfo(struct ocfs2_lock_res *lockres)

300

{

300

{

301

BUG_ON(lockres->l_type != OCFS2_LOCK_TYPE_QINFO);

301

BUG_ON(lockres->l_type != OCFS2_LOCK_TYPE_QINFO);

302

303

return (struct ocfs2_mem_dqinfo *)lockres->l_priv;

303

return (struct ocfs2_mem_dqinfo *)lockres->l_priv;

304

}

304

}

305

306

static inline struct ocfs2_super *ocfs2_get_lockres_osb(struct ocfs2_lock_res *lockres)

306

static inline struct ocfs2_super *ocfs2_get_lockres_osb(struct ocfs2_lock_res *lockres)

307

{

307

{

308

if (lockres->l_ops->get_osb)

308

if (lockres->l_ops->get_osb)

309

return lockres->l_ops->get_osb(lockres);

309

return lockres->l_ops->get_osb(lockres);

310

311

return (struct ocfs2_super *)lockres->l_priv;

311

return (struct ocfs2_super *)lockres->l_priv;

312

}

312

}

313

314

static int ocfs2_lock_create(struct ocfs2_super *osb,

314

static int ocfs2_lock_create(struct ocfs2_super *osb,

315

struct ocfs2_lock_res *lockres,

315

struct ocfs2_lock_res *lockres,

316

int level,

316

int level,

317

u32 dlm_flags);

317

u32 dlm_flags);

318

static inline int ocfs2_may_continue_on_blocked_lock(struct ocfs2_lock_res *lockres,

318

static inline int ocfs2_may_continue_on_blocked_lock(struct ocfs2_lock_res *lockres,

319

int wanted);

319

int wanted);

320

static void ocfs2_cluster_unlock(struct ocfs2_super *osb,

320

static void ocfs2_cluster_unlock(struct ocfs2_super *osb,

321

struct ocfs2_lock_res *lockres,

321

struct ocfs2_lock_res *lockres,

322

int level);

322

int level);

323

static inline void ocfs2_generic_handle_downconvert_action(struct ocfs2_lock_res *lockres);

323

static inline void ocfs2_generic_handle_downconvert_action(struct ocfs2_lock_res *lockres);

324

static inline void ocfs2_generic_handle_convert_action(struct ocfs2_lock_res *lockres);

324

static inline void ocfs2_generic_handle_convert_action(struct ocfs2_lock_res *lockres);

325

static inline void ocfs2_generic_handle_attach_action(struct ocfs2_lock_res *lockres);

325

static inline void ocfs2_generic_handle_attach_action(struct ocfs2_lock_res *lockres);

326

static int ocfs2_generic_handle_bast(struct ocfs2_lock_res *lockres, int level);

326

static int ocfs2_generic_handle_bast(struct ocfs2_lock_res *lockres, int level);

327

static void ocfs2_schedule_blocked_lock(struct ocfs2_super *osb,

327

static void ocfs2_schedule_blocked_lock(struct ocfs2_super *osb,

328

struct ocfs2_lock_res *lockres);

328

struct ocfs2_lock_res *lockres);

329

static inline void ocfs2_recover_from_dlm_error(struct ocfs2_lock_res *lockres,

329

static inline void ocfs2_recover_from_dlm_error(struct ocfs2_lock_res *lockres,

330

int convert);

330

int convert);

331

#define ocfs2_log_dlm_error(_func, _err, _lockres) do { \

331

#define ocfs2_log_dlm_error(_func, _err, _lockres) do { \

332

if ((_lockres)->l_type != OCFS2_LOCK_TYPE_DENTRY) \

332

if ((_lockres)->l_type != OCFS2_LOCK_TYPE_DENTRY) \

333

mlog(ML_ERROR, "DLM error %d while calling %s on resource %s\n", \

333

mlog(ML_ERROR, "DLM error %d while calling %s on resource %s\n", \

334

_err, _func, _lockres->l_name); \

334

_err, _func, _lockres->l_name); \

335

else \

335

else \

336

mlog(ML_ERROR, "DLM error %d while calling %s on resource %.*s%08x\n", \

336

mlog(ML_ERROR, "DLM error %d while calling %s on resource %.*s%08x\n", \

337

_err, _func, OCFS2_DENTRY_LOCK_INO_START - 1, (_lockres)->l_name, \

337

_err, _func, OCFS2_DENTRY_LOCK_INO_START - 1, (_lockres)->l_name, \

338

(unsigned int)ocfs2_get_dentry_lock_ino(_lockres)); \

338

(unsigned int)ocfs2_get_dentry_lock_ino(_lockres)); \

339

} while (0)

339

} while (0)

340

static int ocfs2_downconvert_thread(void *arg);

340

static int ocfs2_downconvert_thread(void *arg);

341

static void ocfs2_downconvert_on_unlock(struct ocfs2_super *osb,

341

static void ocfs2_downconvert_on_unlock(struct ocfs2_super *osb,

342

struct ocfs2_lock_res *lockres);

342

struct ocfs2_lock_res *lockres);

343

static int ocfs2_inode_lock_update(struct inode *inode,

343

static int ocfs2_inode_lock_update(struct inode *inode,

344

struct buffer_head **bh);

344

struct buffer_head **bh);

345

static void ocfs2_drop_osb_locks(struct ocfs2_super *osb);

345

static void ocfs2_drop_osb_locks(struct ocfs2_super *osb);

346

static inline int ocfs2_highest_compat_lock_level(int level);

346

static inline int ocfs2_highest_compat_lock_level(int level);

347

static unsigned int ocfs2_prepare_downconvert(struct ocfs2_lock_res *lockres,

347

static unsigned int ocfs2_prepare_downconvert(struct ocfs2_lock_res *lockres,

348

int new_level);

348

int new_level);

349

static int ocfs2_downconvert_lock(struct ocfs2_super *osb,

349

static int ocfs2_downconvert_lock(struct ocfs2_super *osb,

350

struct ocfs2_lock_res *lockres,

350

struct ocfs2_lock_res *lockres,

351

int new_level,

351

int new_level,

352

int lvb,

352

int lvb,

353

unsigned int generation);

353

unsigned int generation);

354

static int ocfs2_prepare_cancel_convert(struct ocfs2_super *osb,

354

static int ocfs2_prepare_cancel_convert(struct ocfs2_super *osb,

355

struct ocfs2_lock_res *lockres);

355

struct ocfs2_lock_res *lockres);

356

static int ocfs2_cancel_convert(struct ocfs2_super *osb,

356

static int ocfs2_cancel_convert(struct ocfs2_super *osb,

357

struct ocfs2_lock_res *lockres);

357

struct ocfs2_lock_res *lockres);

358

359

360

static void ocfs2_build_lock_name(enum ocfs2_lock_type type,

360

static void ocfs2_build_lock_name(enum ocfs2_lock_type type,

361

u64 blkno,

361

u64 blkno,

362

u32 generation,

362

u32 generation,

363

char *name)

363

char *name)

364

{

364

{

365

int len;

365

int len;

366

367

mlog_entry_void();

367

mlog_entry_void();

368

369

BUG_ON(type >= OCFS2_NUM_LOCK_TYPES);

369

BUG_ON(type >= OCFS2_NUM_LOCK_TYPES);

370

371

len = snprintf(name, OCFS2_LOCK_ID_MAX_LEN, "%c%s%016llx%08x",

371

len = snprintf(name, OCFS2_LOCK_ID_MAX_LEN, "%c%s%016llx%08x",

372

ocfs2_lock_type_char(type), OCFS2_LOCK_ID_PAD,

372

ocfs2_lock_type_char(type), OCFS2_LOCK_ID_PAD,

373

(long long)blkno, generation);

373

(long long)blkno, generation);

374

375

BUG_ON(len != (OCFS2_LOCK_ID_MAX_LEN - 1));

375

BUG_ON(len != (OCFS2_LOCK_ID_MAX_LEN - 1));

376

377

mlog(0, "built lock resource with name: %s\n", name);

377

mlog(0, "built lock resource with name: %s\n", name);

378

379

mlog_exit_void();

379

mlog_exit_void();

380

}

380

}

381

382

static DEFINE_SPINLOCK(ocfs2_dlm_tracking_lock);

382

static DEFINE_SPINLOCK(ocfs2_dlm_tracking_lock);

383

384

static void ocfs2_add_lockres_tracking(struct ocfs2_lock_res *res,

384

static void ocfs2_add_lockres_tracking(struct ocfs2_lock_res *res,

385

struct ocfs2_dlm_debug *dlm_debug)

385

struct ocfs2_dlm_debug *dlm_debug)

386

{

386

{

387

mlog(0, "Add tracking for lockres %s\n", res->l_name);

387

mlog(0, "Add tracking for lockres %s\n", res->l_name);

388

389

spin_lock(&ocfs2_dlm_tracking_lock);

389

spin_lock(&ocfs2_dlm_tracking_lock);

390

list_add(&res->l_debug_list, &dlm_debug->d_lockres_tracking);

390

list_add(&res->l_debug_list, &dlm_debug->d_lockres_tracking);

391

spin_unlock(&ocfs2_dlm_tracking_lock);

391

spin_unlock(&ocfs2_dlm_tracking_lock);

392

}

392

}

393

394

static void ocfs2_remove_lockres_tracking(struct ocfs2_lock_res *res)

394

static void ocfs2_remove_lockres_tracking(struct ocfs2_lock_res *res)

395

{

395

{

396

spin_lock(&ocfs2_dlm_tracking_lock);

396

spin_lock(&ocfs2_dlm_tracking_lock);

397

if (!list_empty(&res->l_debug_list))

397

if (!list_empty(&res->l_debug_list))

398

list_del_init(&res->l_debug_list);

398

list_del_init(&res->l_debug_list);

399

spin_unlock(&ocfs2_dlm_tracking_lock);

399

spin_unlock(&ocfs2_dlm_tracking_lock);

400

}

400

}

401

402

#ifdef CONFIG_OCFS2_FS_STATS

402

#ifdef CONFIG_OCFS2_FS_STATS

403

static void ocfs2_init_lock_stats(struct ocfs2_lock_res *res)

403

static void ocfs2_init_lock_stats(struct ocfs2_lock_res *res)

404

{

404

{

405

res->l_lock_num_prmode = 0;

405

res->l_lock_num_prmode = 0;

406

res->l_lock_num_prmode_failed = 0;

406

res->l_lock_num_prmode_failed = 0;

407

res->l_lock_total_prmode = 0;

407

res->l_lock_total_prmode = 0;

408

res->l_lock_max_prmode = 0;

408

res->l_lock_max_prmode = 0;

409

res->l_lock_num_exmode = 0;

409

res->l_lock_num_exmode = 0;

410

res->l_lock_num_exmode_failed = 0;

410

res->l_lock_num_exmode_failed = 0;

411

res->l_lock_total_exmode = 0;

411

res->l_lock_total_exmode = 0;

412

res->l_lock_max_exmode = 0;

412

res->l_lock_max_exmode = 0;

413

res->l_lock_refresh = 0;

413

res->l_lock_refresh = 0;

414

}

414

}

415

416

static void ocfs2_update_lock_stats(struct ocfs2_lock_res *res, int level,

416

static void ocfs2_update_lock_stats(struct ocfs2_lock_res *res, int level,

417

struct ocfs2_mask_waiter *mw, int ret)

417

struct ocfs2_mask_waiter *mw, int ret)

418

{

418

{

419

unsigned long long *num, *sum;

419

unsigned long long *num, *sum;

420

unsigned int *max, *failed;

420

unsigned int *max, *failed;

421

struct timespec ts = current_kernel_time();

421

struct timespec ts = current_kernel_time();

422

unsigned long long time = timespec_to_ns(&ts) - mw->mw_lock_start;

422

unsigned long long time = timespec_to_ns(&ts) - mw->mw_lock_start;

423

424

if (level == LKM_PRMODE) {

424

if (level == LKM_PRMODE) {

425

num = &res->l_lock_num_prmode;

425

num = &res->l_lock_num_prmode;

426

sum = &res->l_lock_total_prmode;

426

sum = &res->l_lock_total_prmode;

427

max = &res->l_lock_max_prmode;

427

max = &res->l_lock_max_prmode;

428

failed = &res->l_lock_num_prmode_failed;

428

failed = &res->l_lock_num_prmode_failed;

429

} else if (level == LKM_EXMODE) {

429

} else if (level == LKM_EXMODE) {

430

num = &res->l_lock_num_exmode;

430

num = &res->l_lock_num_exmode;

431

sum = &res->l_lock_total_exmode;

431

sum = &res->l_lock_total_exmode;

432

max = &res->l_lock_max_exmode;

432

max = &res->l_lock_max_exmode;

433

failed = &res->l_lock_num_exmode_failed;

433

failed = &res->l_lock_num_exmode_failed;

434

} else

434

} else

435

return;

435

return;

436

437

(*num)++;

437

(*num)++;

438

(*sum) += time;

438

(*sum) += time;

439

if (time > *max)

439

if (time > *max)

440

*max = time;

440

*max = time;

441

if (ret)

441

if (ret)

442

(*failed)++;

442

(*failed)++;

443

}

443

}

444

445

static inline void ocfs2_track_lock_refresh(struct ocfs2_lock_res *lockres)

445

static inline void ocfs2_track_lock_refresh(struct ocfs2_lock_res *lockres)

446

{

446

{

447

lockres->l_lock_refresh++;

447

lockres->l_lock_refresh++;

448

}

448

}

449

450

static inline void ocfs2_init_start_time(struct ocfs2_mask_waiter *mw)

450

static inline void ocfs2_init_start_time(struct ocfs2_mask_waiter *mw)

451

{

451

{

452

struct timespec ts = current_kernel_time();

452

struct timespec ts = current_kernel_time();

453

mw->mw_lock_start = timespec_to_ns(&ts);

453

mw->mw_lock_start = timespec_to_ns(&ts);

454

}

454

}

455

#else

455

#else

456

static inline void ocfs2_init_lock_stats(struct ocfs2_lock_res *res)

456

static inline void ocfs2_init_lock_stats(struct ocfs2_lock_res *res)

457

{

457

{

458

}

458

}

459

static inline void ocfs2_update_lock_stats(struct ocfs2_lock_res *res,

459

static inline void ocfs2_update_lock_stats(struct ocfs2_lock_res *res,

460

int level, struct ocfs2_mask_waiter *mw, int ret)

460

int level, struct ocfs2_mask_waiter *mw, int ret)

461

{

461

{

462

}

462

}

463

static inline void ocfs2_track_lock_refresh(struct ocfs2_lock_res *lockres)

463

static inline void ocfs2_track_lock_refresh(struct ocfs2_lock_res *lockres)

464

{

464

{

465

}

465

}

466

static inline void ocfs2_init_start_time(struct ocfs2_mask_waiter *mw)

466

static inline void ocfs2_init_start_time(struct ocfs2_mask_waiter *mw)

467

{

467

{

468

}

468

}

469

#endif

469

#endif

470

471

static void ocfs2_lock_res_init_common(struct ocfs2_super *osb,

471

static void ocfs2_lock_res_init_common(struct ocfs2_super *osb,

472

struct ocfs2_lock_res *res,

472

struct ocfs2_lock_res *res,

473

enum ocfs2_lock_type type,

473

enum ocfs2_lock_type type,

474

struct ocfs2_lock_res_ops *ops,

474

struct ocfs2_lock_res_ops *ops,

475

void *priv)

475

void *priv)

476

{

476

{

477

res->l_type = type;

477

res->l_type = type;

478

res->l_ops = ops;

478

res->l_ops = ops;

479

res->l_priv = priv;

479

res->l_priv = priv;

480

481

res->l_level = DLM_LOCK_IV;

481

res->l_level = DLM_LOCK_IV;

482

res->l_requested = DLM_LOCK_IV;

482

res->l_requested = DLM_LOCK_IV;

483

res->l_blocking = DLM_LOCK_IV;

483

res->l_blocking = DLM_LOCK_IV;

484

res->l_action = OCFS2_AST_INVALID;

484

res->l_action = OCFS2_AST_INVALID;

485

res->l_unlock_action = OCFS2_UNLOCK_INVALID;

485

res->l_unlock_action = OCFS2_UNLOCK_INVALID;

486

487

res->l_flags = OCFS2_LOCK_INITIALIZED;

487

res->l_flags = OCFS2_LOCK_INITIALIZED;

488

489

ocfs2_add_lockres_tracking(res, osb->osb_dlm_debug);

489

ocfs2_add_lockres_tracking(res, osb->osb_dlm_debug);

490

491

ocfs2_init_lock_stats(res);

491

ocfs2_init_lock_stats(res);

492

}

492

}

493

494

void ocfs2_lock_res_init_once(struct ocfs2_lock_res *res)

494

void ocfs2_lock_res_init_once(struct ocfs2_lock_res *res)

495

{

495

{

496

/* This also clears out the lock status block */

496

/* This also clears out the lock status block */

497

memset(res, 0, sizeof(struct ocfs2_lock_res));

497

memset(res, 0, sizeof(struct ocfs2_lock_res));

498

spin_lock_init(&res->l_lock);

498

spin_lock_init(&res->l_lock);

499

init_waitqueue_head(&res->l_event);

499

init_waitqueue_head(&res->l_event);

500

INIT_LIST_HEAD(&res->l_blocked_list);

500

INIT_LIST_HEAD(&res->l_blocked_list);

501

INIT_LIST_HEAD(&res->l_mask_waiters);

501

INIT_LIST_HEAD(&res->l_mask_waiters);

502

}

502

}

503

504

void ocfs2_inode_lock_res_init(struct ocfs2_lock_res *res,

504

void ocfs2_inode_lock_res_init(struct ocfs2_lock_res *res,

505

enum ocfs2_lock_type type,

505

enum ocfs2_lock_type type,

506

unsigned int generation,

506

unsigned int generation,

507

struct inode *inode)

507

struct inode *inode)

508

{

508

{

509

struct ocfs2_lock_res_ops *ops;

509

struct ocfs2_lock_res_ops *ops;

510

511

switch(type) {

511

switch(type) {

512

case OCFS2_LOCK_TYPE_RW:

512

case OCFS2_LOCK_TYPE_RW:

513

ops = &ocfs2_inode_rw_lops;

513

ops = &ocfs2_inode_rw_lops;

514

break;

514

break;

515

case OCFS2_LOCK_TYPE_META:

515

case OCFS2_LOCK_TYPE_META:

516

ops = &ocfs2_inode_inode_lops;

516

ops = &ocfs2_inode_inode_lops;

517

break;

517

break;

518

case OCFS2_LOCK_TYPE_OPEN:

518

case OCFS2_LOCK_TYPE_OPEN:

519

ops = &ocfs2_inode_open_lops;

519

ops = &ocfs2_inode_open_lops;

520

break;

520

break;

521

default:

521

default:

522

mlog_bug_on_msg(1, "type: %d\n", type);

522

mlog_bug_on_msg(1, "type: %d\n", type);

523

ops = NULL; /* thanks, gcc */

523

ops = NULL; /* thanks, gcc */

524

break;

524

break;

525

};

525

};

526

527

ocfs2_build_lock_name(type, OCFS2_I(inode)->ip_blkno,

527

ocfs2_build_lock_name(type, OCFS2_I(inode)->ip_blkno,

528

generation, res->l_name);

528

generation, res->l_name);

529

ocfs2_lock_res_init_common(OCFS2_SB(inode->i_sb), res, type, ops, inode);

529

ocfs2_lock_res_init_common(OCFS2_SB(inode->i_sb), res, type, ops, inode);

530

}

530

}

531

532

static struct ocfs2_super *ocfs2_get_inode_osb(struct ocfs2_lock_res *lockres)

532

static struct ocfs2_super *ocfs2_get_inode_osb(struct ocfs2_lock_res *lockres)

533

{

533

{

534

struct inode *inode = ocfs2_lock_res_inode(lockres);

534

struct inode *inode = ocfs2_lock_res_inode(lockres);

535

536

return OCFS2_SB(inode->i_sb);

536

return OCFS2_SB(inode->i_sb);

537

}

537

}

538

539

static struct ocfs2_super *ocfs2_get_qinfo_osb(struct ocfs2_lock_res *lockres)

539

static struct ocfs2_super *ocfs2_get_qinfo_osb(struct ocfs2_lock_res *lockres)

540

{

540

{

541

struct ocfs2_mem_dqinfo *info = lockres->l_priv;

541

struct ocfs2_mem_dqinfo *info = lockres->l_priv;

542

543

return OCFS2_SB(info->dqi_gi.dqi_sb);

543

return OCFS2_SB(info->dqi_gi.dqi_sb);

544

}

544

}

545

546

static struct ocfs2_super *ocfs2_get_file_osb(struct ocfs2_lock_res *lockres)

546

static struct ocfs2_super *ocfs2_get_file_osb(struct ocfs2_lock_res *lockres)

547

{

547

{

548

struct ocfs2_file_private *fp = lockres->l_priv;

548

struct ocfs2_file_private *fp = lockres->l_priv;

549

550

return OCFS2_SB(fp->fp_file->f_mapping->host->i_sb);

550

return OCFS2_SB(fp->fp_file->f_mapping->host->i_sb);

551

}

551

}

552

553

static __u64 ocfs2_get_dentry_lock_ino(struct ocfs2_lock_res *lockres)

553

static __u64 ocfs2_get_dentry_lock_ino(struct ocfs2_lock_res *lockres)

554

{

554

{

555

__be64 inode_blkno_be;

555

__be64 inode_blkno_be;

556

557

memcpy(&inode_blkno_be, &lockres->l_name[OCFS2_DENTRY_LOCK_INO_START],

557

memcpy(&inode_blkno_be, &lockres->l_name[OCFS2_DENTRY_LOCK_INO_START],

558

sizeof(__be64));

558

sizeof(__be64));

559

560

return be64_to_cpu(inode_blkno_be);

560

return be64_to_cpu(inode_blkno_be);

561

}

561

}

562

563

static struct ocfs2_super *ocfs2_get_dentry_osb(struct ocfs2_lock_res *lockres)

563

static struct ocfs2_super *ocfs2_get_dentry_osb(struct ocfs2_lock_res *lockres)

564

{

564

{

565

struct ocfs2_dentry_lock *dl = lockres->l_priv;

565

struct ocfs2_dentry_lock *dl = lockres->l_priv;

566

567

return OCFS2_SB(dl->dl_inode->i_sb);

567

return OCFS2_SB(dl->dl_inode->i_sb);

568

}

568

}

569

570

void ocfs2_dentry_lock_res_init(struct ocfs2_dentry_lock *dl,

570

void ocfs2_dentry_lock_res_init(struct ocfs2_dentry_lock *dl,

571

u64 parent, struct inode *inode)

571

u64 parent, struct inode *inode)

572

{

572

{

573

int len;

573

int len;

574

u64 inode_blkno = OCFS2_I(inode)->ip_blkno;

574

u64 inode_blkno = OCFS2_I(inode)->ip_blkno;

575

__be64 inode_blkno_be = cpu_to_be64(inode_blkno);

575

__be64 inode_blkno_be = cpu_to_be64(inode_blkno);

576

struct ocfs2_lock_res *lockres = &dl->dl_lockres;

576

struct ocfs2_lock_res *lockres = &dl->dl_lockres;

577

578

ocfs2_lock_res_init_once(lockres);

578

ocfs2_lock_res_init_once(lockres);

579

580

/*

580

/*

581

* Unfortunately, the standard lock naming scheme won't work

581

* Unfortunately, the standard lock naming scheme won't work

582

* here because we have two 16 byte values to use. Instead,

582

* here because we have two 16 byte values to use. Instead,

583

* we'll stuff the inode number as a binary value. We still

583

* we'll stuff the inode number as a binary value. We still

584

* want error prints to show something without garbling the

584

* want error prints to show something without garbling the

585

* display, so drop a null byte in there before the inode

585

* display, so drop a null byte in there before the inode

586

* number. A future version of OCFS2 will likely use all

586

* number. A future version of OCFS2 will likely use all

587

* binary lock names. The stringified names have been a

587

* binary lock names. The stringified names have been a

588

* tremendous aid in debugging, but now that the debugfs

588

* tremendous aid in debugging, but now that the debugfs

589

* interface exists, we can mangle things there if need be.

589

* interface exists, we can mangle things there if need be.

590

*

590

*

591

* NOTE: We also drop the standard "pad" value (the total lock

591

* NOTE: We also drop the standard "pad" value (the total lock

592

* name size stays the same though - the last part is all

592

* name size stays the same though - the last part is all

593

* zeros due to the memset in ocfs2_lock_res_init_once()

593

* zeros due to the memset in ocfs2_lock_res_init_once()

594

*/

594

*/

595

len = snprintf(lockres->l_name, OCFS2_DENTRY_LOCK_INO_START,

595

len = snprintf(lockres->l_name, OCFS2_DENTRY_LOCK_INO_START,

596

"%c%016llx",

596

"%c%016llx",

597

ocfs2_lock_type_char(OCFS2_LOCK_TYPE_DENTRY),

597

ocfs2_lock_type_char(OCFS2_LOCK_TYPE_DENTRY),

598

(long long)parent);

598

(long long)parent);

599

600

BUG_ON(len != (OCFS2_DENTRY_LOCK_INO_START - 1));

600

BUG_ON(len != (OCFS2_DENTRY_LOCK_INO_START - 1));

601

602

memcpy(&lockres->l_name[OCFS2_DENTRY_LOCK_INO_START], &inode_blkno_be,

602

memcpy(&lockres->l_name[OCFS2_DENTRY_LOCK_INO_START], &inode_blkno_be,

603

sizeof(__be64));

603

sizeof(__be64));

604

605

ocfs2_lock_res_init_common(OCFS2_SB(inode->i_sb), lockres,

605

ocfs2_lock_res_init_common(OCFS2_SB(inode->i_sb), lockres,

606

OCFS2_LOCK_TYPE_DENTRY, &ocfs2_dentry_lops,

606

OCFS2_LOCK_TYPE_DENTRY, &ocfs2_dentry_lops,

607

dl);

607

dl);

608

}

608

}

609

610

static void ocfs2_super_lock_res_init(struct ocfs2_lock_res *res,

610

static void ocfs2_super_lock_res_init(struct ocfs2_lock_res *res,

611

struct ocfs2_super *osb)

611

struct ocfs2_super *osb)

612

{

612

{

613

/* Superblock lockres doesn't come from a slab so we call init

613

/* Superblock lockres doesn't come from a slab so we call init

614

* once on it manually. */

614

* once on it manually. */

615

ocfs2_lock_res_init_once(res);

615

ocfs2_lock_res_init_once(res);

616

ocfs2_build_lock_name(OCFS2_LOCK_TYPE_SUPER, OCFS2_SUPER_BLOCK_BLKNO,

616

ocfs2_build_lock_name(OCFS2_LOCK_TYPE_SUPER, OCFS2_SUPER_BLOCK_BLKNO,

617

0, res->l_name);

617

0, res->l_name);

618

ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_SUPER,

618

ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_SUPER,

619

&ocfs2_super_lops, osb);

619

&ocfs2_super_lops, osb);

620

}

620

}

621

622

static void ocfs2_rename_lock_res_init(struct ocfs2_lock_res *res,

622

static void ocfs2_rename_lock_res_init(struct ocfs2_lock_res *res,

623

struct ocfs2_super *osb)

623

struct ocfs2_super *osb)

624

{

624

{

625

/* Rename lockres doesn't come from a slab so we call init

625

/* Rename lockres doesn't come from a slab so we call init

626

* once on it manually. */

626

* once on it manually. */

627

ocfs2_lock_res_init_once(res);

627

ocfs2_lock_res_init_once(res);

628

ocfs2_build_lock_name(OCFS2_LOCK_TYPE_RENAME, 0, 0, res->l_name);

628

ocfs2_build_lock_name(OCFS2_LOCK_TYPE_RENAME, 0, 0, res->l_name);

629

ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_RENAME,

629

ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_RENAME,

630

&ocfs2_rename_lops, osb);

630

&ocfs2_rename_lops, osb);

631

}

631

}

632

633

static void ocfs2_nfs_sync_lock_res_init(struct ocfs2_lock_res *res,

633

static void ocfs2_nfs_sync_lock_res_init(struct ocfs2_lock_res *res,

634

struct ocfs2_super *osb)

634

struct ocfs2_super *osb)

635

{

635

{

636

/* nfs_sync lockres doesn't come from a slab so we call init

636

/* nfs_sync lockres doesn't come from a slab so we call init

637

* once on it manually. */

637

* once on it manually. */

638

ocfs2_lock_res_init_once(res);

638

ocfs2_lock_res_init_once(res);

639

ocfs2_build_lock_name(OCFS2_LOCK_TYPE_NFS_SYNC, 0, 0, res->l_name);

639

ocfs2_build_lock_name(OCFS2_LOCK_TYPE_NFS_SYNC, 0, 0, res->l_name);

640

ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_NFS_SYNC,

640

ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_NFS_SYNC,

641

&ocfs2_nfs_sync_lops, osb);

641

&ocfs2_nfs_sync_lops, osb);

642

}

642

}

643

644

static void ocfs2_orphan_scan_lock_res_init(struct ocfs2_lock_res *res,

644

static void ocfs2_orphan_scan_lock_res_init(struct ocfs2_lock_res *res,

645

struct ocfs2_super *osb)

645

struct ocfs2_super *osb)

646

{

646

{

647

struct ocfs2_orphan_scan_lvb *lvb;

648

649

ocfs2_lock_res_init_once(res);

647

ocfs2_lock_res_init_once(res);

650

ocfs2_build_lock_name(OCFS2_LOCK_TYPE_ORPHAN_SCAN, 0, 0, res->l_name);

648

ocfs2_build_lock_name(OCFS2_LOCK_TYPE_ORPHAN_SCAN, 0, 0, res->l_name);

651

ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_ORPHAN_SCAN,

649

ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_ORPHAN_SCAN,

652

&ocfs2_orphan_scan_lops, osb);

650

&ocfs2_orphan_scan_lops, osb);

653

lvb = ocfs2_dlm_lvb(&res->l_lksb);

654

lvb->lvb_version = OCFS2_ORPHAN_LVB_VERSION;

655

}

651

}

656

652

657

void ocfs2_file_lock_res_init(struct ocfs2_lock_res *lockres,

653

void ocfs2_file_lock_res_init(struct ocfs2_lock_res *lockres,

658

struct ocfs2_file_private *fp)

654

struct ocfs2_file_private *fp)

659

{

655

{

660

struct inode *inode = fp->fp_file->f_mapping->host;

656

struct inode *inode = fp->fp_file->f_mapping->host;

661

struct ocfs2_inode_info *oi = OCFS2_I(inode);

657

struct ocfs2_inode_info *oi = OCFS2_I(inode);

662

658

663

ocfs2_lock_res_init_once(lockres);

659

ocfs2_lock_res_init_once(lockres);

664

ocfs2_build_lock_name(OCFS2_LOCK_TYPE_FLOCK, oi->ip_blkno,

660

ocfs2_build_lock_name(OCFS2_LOCK_TYPE_FLOCK, oi->ip_blkno,

665

inode->i_generation, lockres->l_name);

661

inode->i_generation, lockres->l_name);

666

ocfs2_lock_res_init_common(OCFS2_SB(inode->i_sb), lockres,

662

ocfs2_lock_res_init_common(OCFS2_SB(inode->i_sb), lockres,

667

OCFS2_LOCK_TYPE_FLOCK, &ocfs2_flock_lops,

663

OCFS2_LOCK_TYPE_FLOCK, &ocfs2_flock_lops,

668

fp);

664

fp);

669

lockres->l_flags |= OCFS2_LOCK_NOCACHE;

665

lockres->l_flags |= OCFS2_LOCK_NOCACHE;

670

}

666

}

671

667

672

void ocfs2_qinfo_lock_res_init(struct ocfs2_lock_res *lockres,

668

void ocfs2_qinfo_lock_res_init(struct ocfs2_lock_res *lockres,

673

struct ocfs2_mem_dqinfo *info)

669

struct ocfs2_mem_dqinfo *info)

674

{

670

{

675

ocfs2_lock_res_init_once(lockres);

671

ocfs2_lock_res_init_once(lockres);

676

ocfs2_build_lock_name(OCFS2_LOCK_TYPE_QINFO, info->dqi_gi.dqi_type,

672

ocfs2_build_lock_name(OCFS2_LOCK_TYPE_QINFO, info->dqi_gi.dqi_type,

677

0, lockres->l_name);

673

0, lockres->l_name);

678

ocfs2_lock_res_init_common(OCFS2_SB(info->dqi_gi.dqi_sb), lockres,

674

ocfs2_lock_res_init_common(OCFS2_SB(info->dqi_gi.dqi_sb), lockres,

679

OCFS2_LOCK_TYPE_QINFO, &ocfs2_qinfo_lops,

675

OCFS2_LOCK_TYPE_QINFO, &ocfs2_qinfo_lops,

680

info);

676

info);

681

}

677

}

682

678

683

void ocfs2_lock_res_free(struct ocfs2_lock_res *res)

679

void ocfs2_lock_res_free(struct ocfs2_lock_res *res)

684

{

680

{

685

mlog_entry_void();

681

mlog_entry_void();

686

682

687

if (!(res->l_flags & OCFS2_LOCK_INITIALIZED))

683

if (!(res->l_flags & OCFS2_LOCK_INITIALIZED))

688

return;

684

return;

689

685

690

ocfs2_remove_lockres_tracking(res);

686

ocfs2_remove_lockres_tracking(res);

691

687

692

mlog_bug_on_msg(!list_empty(&res->l_blocked_list),

688

mlog_bug_on_msg(!list_empty(&res->l_blocked_list),

693

"Lockres %s is on the blocked list\n",

689

"Lockres %s is on the blocked list\n",

694

res->l_name);

690

res->l_name);

695

mlog_bug_on_msg(!list_empty(&res->l_mask_waiters),

691

mlog_bug_on_msg(!list_empty(&res->l_mask_waiters),

696

"Lockres %s has mask waiters pending\n",

692

"Lockres %s has mask waiters pending\n",

697

res->l_name);

693

res->l_name);

698

mlog_bug_on_msg(spin_is_locked(&res->l_lock),

694

mlog_bug_on_msg(spin_is_locked(&res->l_lock),

699

"Lockres %s is locked\n",

695

"Lockres %s is locked\n",

700

res->l_name);

696

res->l_name);

701

mlog_bug_on_msg(res->l_ro_holders,

697

mlog_bug_on_msg(res->l_ro_holders,

702

"Lockres %s has %u ro holders\n",

698

"Lockres %s has %u ro holders\n",

703

res->l_name, res->l_ro_holders);

699

res->l_name, res->l_ro_holders);

704

mlog_bug_on_msg(res->l_ex_holders,

700

mlog_bug_on_msg(res->l_ex_holders,

705

"Lockres %s has %u ex holders\n",

701

"Lockres %s has %u ex holders\n",

706

res->l_name, res->l_ex_holders);

702

res->l_name, res->l_ex_holders);

707

703

708

/* Need to clear out the lock status block for the dlm */

704

/* Need to clear out the lock status block for the dlm */

709

memset(&res->l_lksb, 0, sizeof(res->l_lksb));

705

memset(&res->l_lksb, 0, sizeof(res->l_lksb));

710

706

711

res->l_flags = 0UL;

707

res->l_flags = 0UL;

712

mlog_exit_void();

708

mlog_exit_void();

713

}

709

}

714

710

715

static inline void ocfs2_inc_holders(struct ocfs2_lock_res *lockres,

711

static inline void ocfs2_inc_holders(struct ocfs2_lock_res *lockres,

716

int level)

712

int level)

717

{

713

{

718

mlog_entry_void();

714

mlog_entry_void();

719

715

720

BUG_ON(!lockres);

716

BUG_ON(!lockres);

721

717

722

switch(level) {

718

switch(level) {

723

case DLM_LOCK_EX:

719

case DLM_LOCK_EX:

724

lockres->l_ex_holders++;

720

lockres->l_ex_holders++;

725

break;

721

break;

726

case DLM_LOCK_PR:

722

case DLM_LOCK_PR:

727

lockres->l_ro_holders++;

723

lockres->l_ro_holders++;

728

break;

724

break;

729

default:

725

default:

730

BUG();

726

BUG();

731

}

727

}

732

728

733

mlog_exit_void();

729

mlog_exit_void();

734

}

730

}

735

731

736

static inline void ocfs2_dec_holders(struct ocfs2_lock_res *lockres,

732

static inline void ocfs2_dec_holders(struct ocfs2_lock_res *lockres,

737

int level)

733

int level)

738

{

734

{

739

mlog_entry_void();

735

mlog_entry_void();

740

736

741

BUG_ON(!lockres);

737

BUG_ON(!lockres);

742

738

743

switch(level) {

739

switch(level) {

744

case DLM_LOCK_EX:

740

case DLM_LOCK_EX:

745

BUG_ON(!lockres->l_ex_holders);

741

BUG_ON(!lockres->l_ex_holders);

746

lockres->l_ex_holders--;

742

lockres->l_ex_holders--;

747

break;

743

break;

748

case DLM_LOCK_PR:

744

case DLM_LOCK_PR:

749

BUG_ON(!lockres->l_ro_holders);

745

BUG_ON(!lockres->l_ro_holders);

750

lockres->l_ro_holders--;

746

lockres->l_ro_holders--;

751

break;

747

break;

752

default:

748

default:

753

BUG();

749

BUG();

754

}

750

}

755

mlog_exit_void();

751

mlog_exit_void();

756

}

752

}

757

753

758

/* WARNING: This function lives in a world where the only three lock

754

/* WARNING: This function lives in a world where the only three lock

759

* levels are EX, PR, and NL. It *will* have to be adjusted when more

755

* levels are EX, PR, and NL. It *will* have to be adjusted when more

760

* lock types are added. */

756

* lock types are added. */

761

static inline int ocfs2_highest_compat_lock_level(int level)

757

static inline int ocfs2_highest_compat_lock_level(int level)

762

{

758

{

763

int new_level = DLM_LOCK_EX;

759

int new_level = DLM_LOCK_EX;

764

760

765

if (level == DLM_LOCK_EX)

761

if (level == DLM_LOCK_EX)

766

new_level = DLM_LOCK_NL;

762

new_level = DLM_LOCK_NL;

767

else if (level == DLM_LOCK_PR)

763

else if (level == DLM_LOCK_PR)

768

new_level = DLM_LOCK_PR;

764

new_level = DLM_LOCK_PR;

769

return new_level;

765

return new_level;

770

}

766

}

771

767

772

static void lockres_set_flags(struct ocfs2_lock_res *lockres,

768

static void lockres_set_flags(struct ocfs2_lock_res *lockres,

773

unsigned long newflags)

769

unsigned long newflags)

774

{

770

{

775

struct ocfs2_mask_waiter *mw, *tmp;

771

struct ocfs2_mask_waiter *mw, *tmp;

776

772

777

assert_spin_locked(&lockres->l_lock);

773

assert_spin_locked(&lockres->l_lock);

778

774

779

lockres->l_flags = newflags;

775

lockres->l_flags = newflags;

780

776

781

list_for_each_entry_safe(mw, tmp, &lockres->l_mask_waiters, mw_item) {

777

list_for_each_entry_safe(mw, tmp, &lockres->l_mask_waiters, mw_item) {

782

if ((lockres->l_flags & mw->mw_mask) != mw->mw_goal)

778

if ((lockres->l_flags & mw->mw_mask) != mw->mw_goal)

783

continue;

779

continue;

784

780

785

list_del_init(&mw->mw_item);

781

list_del_init(&mw->mw_item);

786

mw->mw_status = 0;

782

mw->mw_status = 0;

787

complete(&mw->mw_complete);

783

complete(&mw->mw_complete);

788

}

784

}

789

}

785

}

790

static void lockres_or_flags(struct ocfs2_lock_res *lockres, unsigned long or)

786

static void lockres_or_flags(struct ocfs2_lock_res *lockres, unsigned long or)

791

{

787

{

792

lockres_set_flags(lockres, lockres->l_flags | or);

788

lockres_set_flags(lockres, lockres->l_flags | or);

793

}

789

}

794

static void lockres_clear_flags(struct ocfs2_lock_res *lockres,

790

static void lockres_clear_flags(struct ocfs2_lock_res *lockres,

795

unsigned long clear)

791

unsigned long clear)

796

{

792

{

797

lockres_set_flags(lockres, lockres->l_flags & ~clear);

793

lockres_set_flags(lockres, lockres->l_flags & ~clear);

798

}

794

}

799

795

800

static inline void ocfs2_generic_handle_downconvert_action(struct ocfs2_lock_res *lockres)

796

static inline void ocfs2_generic_handle_downconvert_action(struct ocfs2_lock_res *lockres)

801

{

797

{

802

mlog_entry_void();

798

mlog_entry_void();

803

799

804

BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BUSY));

800

BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BUSY));

805

BUG_ON(!(lockres->l_flags & OCFS2_LOCK_ATTACHED));

801

BUG_ON(!(lockres->l_flags & OCFS2_LOCK_ATTACHED));

806

BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BLOCKED));

802

BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BLOCKED));

807

BUG_ON(lockres->l_blocking <= DLM_LOCK_NL);

803

BUG_ON(lockres->l_blocking <= DLM_LOCK_NL);

808

804

809

lockres->l_level = lockres->l_requested;

805

lockres->l_level = lockres->l_requested;

810

if (lockres->l_level <=

806

if (lockres->l_level <=

811

ocfs2_highest_compat_lock_level(lockres->l_blocking)) {

807

ocfs2_highest_compat_lock_level(lockres->l_blocking)) {

812

lockres->l_blocking = DLM_LOCK_NL;

808

lockres->l_blocking = DLM_LOCK_NL;

813

lockres_clear_flags(lockres, OCFS2_LOCK_BLOCKED);

809

lockres_clear_flags(lockres, OCFS2_LOCK_BLOCKED);

814

}

810

}

815

lockres_clear_flags(lockres, OCFS2_LOCK_BUSY);

811

lockres_clear_flags(lockres, OCFS2_LOCK_BUSY);

816

812

817

mlog_exit_void();

813

mlog_exit_void();

818

}

814

}

819

815

820

static inline void ocfs2_generic_handle_convert_action(struct ocfs2_lock_res *lockres)

816

static inline void ocfs2_generic_handle_convert_action(struct ocfs2_lock_res *lockres)

821

{

817

{

822

mlog_entry_void();

818

mlog_entry_void();

823

819

824

BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BUSY));

820

BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BUSY));

825

BUG_ON(!(lockres->l_flags & OCFS2_LOCK_ATTACHED));

821

BUG_ON(!(lockres->l_flags & OCFS2_LOCK_ATTACHED));

826

822

827

/* Convert from RO to EX doesn't really need anything as our

823

/* Convert from RO to EX doesn't really need anything as our

828

* information is already up to data. Convert from NL to

824

* information is already up to data. Convert from NL to

829

* *anything* however should mark ourselves as needing an

825

* *anything* however should mark ourselves as needing an

830

* update */

826

* update */

831

if (lockres->l_level == DLM_LOCK_NL &&

827

if (lockres->l_level == DLM_LOCK_NL &&

832

lockres->l_ops->flags & LOCK_TYPE_REQUIRES_REFRESH)

828

lockres->l_ops->flags & LOCK_TYPE_REQUIRES_REFRESH)

833

lockres_or_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH);

829

lockres_or_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH);

834

830

835

lockres->l_level = lockres->l_requested;

831

lockres->l_level = lockres->l_requested;

836

lockres_clear_flags(lockres, OCFS2_LOCK_BUSY);

832

lockres_clear_flags(lockres, OCFS2_LOCK_BUSY);

837

833

838

mlog_exit_void();

834

mlog_exit_void();

839

}

835

}

840

836

841

static inline void ocfs2_generic_handle_attach_action(struct ocfs2_lock_res *lockres)

837

static inline void ocfs2_generic_handle_attach_action(struct ocfs2_lock_res *lockres)

842

{

838

{

843

mlog_entry_void();

839

mlog_entry_void();

844

840

845

BUG_ON((!(lockres->l_flags & OCFS2_LOCK_BUSY)));

841

BUG_ON((!(lockres->l_flags & OCFS2_LOCK_BUSY)));

846

BUG_ON(lockres->l_flags & OCFS2_LOCK_ATTACHED);

842

BUG_ON(lockres->l_flags & OCFS2_LOCK_ATTACHED);

847

843

848

if (lockres->l_requested > DLM_LOCK_NL &&

844

if (lockres->l_requested > DLM_LOCK_NL &&

849

!(lockres->l_flags & OCFS2_LOCK_LOCAL) &&

845

!(lockres->l_flags & OCFS2_LOCK_LOCAL) &&

850

lockres->l_ops->flags & LOCK_TYPE_REQUIRES_REFRESH)

846

lockres->l_ops->flags & LOCK_TYPE_REQUIRES_REFRESH)

851

lockres_or_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH);

847

lockres_or_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH);

852

848

853

lockres->l_level = lockres->l_requested;

849

lockres->l_level = lockres->l_requested;

854

lockres_or_flags(lockres, OCFS2_LOCK_ATTACHED);

850

lockres_or_flags(lockres, OCFS2_LOCK_ATTACHED);

855

lockres_clear_flags(lockres, OCFS2_LOCK_BUSY);

851

lockres_clear_flags(lockres, OCFS2_LOCK_BUSY);

856

852

857

mlog_exit_void();

853

mlog_exit_void();

858

}

854

}

859

855

860

static int ocfs2_generic_handle_bast(struct ocfs2_lock_res *lockres,

856

static int ocfs2_generic_handle_bast(struct ocfs2_lock_res *lockres,

861

int level)

857

int level)

862

{

858

{

863

int needs_downconvert = 0;

859

int needs_downconvert = 0;

864

mlog_entry_void();

860

mlog_entry_void();

865

861

866

assert_spin_locked(&lockres->l_lock);

862

assert_spin_locked(&lockres->l_lock);

867

863

868

lockres_or_flags(lockres, OCFS2_LOCK_BLOCKED);

864

lockres_or_flags(lockres, OCFS2_LOCK_BLOCKED);

869

865

870

if (level > lockres->l_blocking) {

866

if (level > lockres->l_blocking) {

871

/* only schedule a downconvert if we haven't already scheduled

867

/* only schedule a downconvert if we haven't already scheduled

872

* one that goes low enough to satisfy the level we're

868

* one that goes low enough to satisfy the level we're

873

* blocking. this also catches the case where we get

869

* blocking. this also catches the case where we get

874

* duplicate BASTs */

870

* duplicate BASTs */

875

if (ocfs2_highest_compat_lock_level(level) <

871

if (ocfs2_highest_compat_lock_level(level) <

876

ocfs2_highest_compat_lock_level(lockres->l_blocking))

872

ocfs2_highest_compat_lock_level(lockres->l_blocking))

877

needs_downconvert = 1;

873

needs_downconvert = 1;

878

874

879

lockres->l_blocking = level;

875

lockres->l_blocking = level;

880

}

876

}

881

877

882

mlog_exit(needs_downconvert);

878

mlog_exit(needs_downconvert);

883

return needs_downconvert;

879

return needs_downconvert;

884

}

880

}

885

881

886

/*

882

/*

887

* OCFS2_LOCK_PENDING and l_pending_gen.

883

* OCFS2_LOCK_PENDING and l_pending_gen.

888

*

884

*

889

* Why does OCFS2_LOCK_PENDING exist? To close a race between setting

885

* Why does OCFS2_LOCK_PENDING exist? To close a race between setting

890

* OCFS2_LOCK_BUSY and calling ocfs2_dlm_lock(). See ocfs2_unblock_lock()

886

* OCFS2_LOCK_BUSY and calling ocfs2_dlm_lock(). See ocfs2_unblock_lock()

891

* for more details on the race.

887

* for more details on the race.

892

*

888

*

893

* OCFS2_LOCK_PENDING closes the race quite nicely. However, it introduces

889

* OCFS2_LOCK_PENDING closes the race quite nicely. However, it introduces

894

* a race on itself. In o2dlm, we can get the ast before ocfs2_dlm_lock()

890

* a race on itself. In o2dlm, we can get the ast before ocfs2_dlm_lock()

895

* returns. The ast clears OCFS2_LOCK_BUSY, and must therefore clear

891

* returns. The ast clears OCFS2_LOCK_BUSY, and must therefore clear

896

* OCFS2_LOCK_PENDING at the same time. When ocfs2_dlm_lock() returns,

892

* OCFS2_LOCK_PENDING at the same time. When ocfs2_dlm_lock() returns,

897

* the caller is going to try to clear PENDING again. If nothing else is

893

* the caller is going to try to clear PENDING again. If nothing else is

898

* happening, __lockres_clear_pending() sees PENDING is unset and does

894

* happening, __lockres_clear_pending() sees PENDING is unset and does

899

* nothing.

895

* nothing.

900

*

896

*

901

* But what if another path (eg downconvert thread) has just started a

897

* But what if another path (eg downconvert thread) has just started a

902

* new locking action? The other path has re-set PENDING. Our path

898

* new locking action? The other path has re-set PENDING. Our path

903

* cannot clear PENDING, because that will re-open the original race

899

* cannot clear PENDING, because that will re-open the original race

904

* window.

900

* window.

905

*

901

*

906

* [Example]

902

* [Example]

907

*

903

*

908

* ocfs2_meta_lock()

904

* ocfs2_meta_lock()

909

* ocfs2_cluster_lock()

905

* ocfs2_cluster_lock()

910

* set BUSY

906

* set BUSY

911

* set PENDING

907

* set PENDING

912

* drop l_lock

908

* drop l_lock

913

* ocfs2_dlm_lock()

909

* ocfs2_dlm_lock()

914

* ocfs2_locking_ast() ocfs2_downconvert_thread()

910

* ocfs2_locking_ast() ocfs2_downconvert_thread()

915

* clear PENDING ocfs2_unblock_lock()

911

* clear PENDING ocfs2_unblock_lock()

916

* take_l_lock

912

* take_l_lock

917

* !BUSY

913

* !BUSY

918

* ocfs2_prepare_downconvert()

914

* ocfs2_prepare_downconvert()

919

* set BUSY

915

* set BUSY

920

* set PENDING

916

* set PENDING

921

* drop l_lock

917

* drop l_lock

922

* take l_lock

918

* take l_lock

923

* clear PENDING

919

* clear PENDING

924

* drop l_lock

920

* drop l_lock

925

* <window>

921

* <window>

926

* ocfs2_dlm_lock()

922

* ocfs2_dlm_lock()

927

*

923

*

928

* So as you can see, we now have a window where l_lock is not held,

924

* So as you can see, we now have a window where l_lock is not held,

929

* PENDING is not set, and ocfs2_dlm_lock() has not been called.

925

* PENDING is not set, and ocfs2_dlm_lock() has not been called.

930

*

926

*

931

* The core problem is that ocfs2_cluster_lock() has cleared the PENDING

927

* The core problem is that ocfs2_cluster_lock() has cleared the PENDING

932

* set by ocfs2_prepare_downconvert(). That wasn't nice.

928

* set by ocfs2_prepare_downconvert(). That wasn't nice.

933

*

929

*

934

* To solve this we introduce l_pending_gen. A call to

930

* To solve this we introduce l_pending_gen. A call to

935

* lockres_clear_pending() will only do so when it is passed a generation

931

* lockres_clear_pending() will only do so when it is passed a generation

936

* number that matches the lockres. lockres_set_pending() will return the

932

* number that matches the lockres. lockres_set_pending() will return the

937

* current generation number. When ocfs2_cluster_lock() goes to clear

933

* current generation number. When ocfs2_cluster_lock() goes to clear

938

* PENDING, it passes the generation it got from set_pending(). In our

934

* PENDING, it passes the generation it got from set_pending(). In our

939

* example above, the generation numbers will *not* match. Thus,

935

* example above, the generation numbers will *not* match. Thus,

940

* ocfs2_cluster_lock() will not clear the PENDING set by

936

* ocfs2_cluster_lock() will not clear the PENDING set by

941

* ocfs2_prepare_downconvert().

937

* ocfs2_prepare_downconvert().

942

*/

938

*/

943

939

944

/* Unlocked version for ocfs2_locking_ast() */

940

/* Unlocked version for ocfs2_locking_ast() */

945

static void __lockres_clear_pending(struct ocfs2_lock_res *lockres,

941

static void __lockres_clear_pending(struct ocfs2_lock_res *lockres,

946

unsigned int generation,

942

unsigned int generation,

947

struct ocfs2_super *osb)

943

struct ocfs2_super *osb)

948

{

944

{

949

assert_spin_locked(&lockres->l_lock);

945

assert_spin_locked(&lockres->l_lock);

950

946

951

/*

947

/*

952

* The ast and locking functions can race us here. The winner

948

* The ast and locking functions can race us here. The winner

953

* will clear pending, the loser will not.

949

* will clear pending, the loser will not.

954

*/

950

*/

955

if (!(lockres->l_flags & OCFS2_LOCK_PENDING) ||

951

if (!(lockres->l_flags & OCFS2_LOCK_PENDING) ||

956

(lockres->l_pending_gen != generation))

952

(lockres->l_pending_gen != generation))

957

return;

953

return;

958

954

959

lockres_clear_flags(lockres, OCFS2_LOCK_PENDING);

955

lockres_clear_flags(lockres, OCFS2_LOCK_PENDING);

960

lockres->l_pending_gen++;

956

lockres->l_pending_gen++;

961

957

962

/*

958

/*

963

* The downconvert thread may have skipped us because we

959

* The downconvert thread may have skipped us because we

964

* were PENDING. Wake it up.

960

* were PENDING. Wake it up.

965

*/

961

*/

966

if (lockres->l_flags & OCFS2_LOCK_BLOCKED)

962

if (lockres->l_flags & OCFS2_LOCK_BLOCKED)

967

ocfs2_wake_downconvert_thread(osb);

963

ocfs2_wake_downconvert_thread(osb);

968

}

964

}

969

965

970

/* Locked version for callers of ocfs2_dlm_lock() */

966

/* Locked version for callers of ocfs2_dlm_lock() */

971

static void lockres_clear_pending(struct ocfs2_lock_res *lockres,

967

static void lockres_clear_pending(struct ocfs2_lock_res *lockres,

972

unsigned int generation,

968

unsigned int generation,

973

struct ocfs2_super *osb)

969

struct ocfs2_super *osb)

974

{

970

{

975

unsigned long flags;

971

unsigned long flags;

976

972

977

spin_lock_irqsave(&lockres->l_lock, flags);

973

spin_lock_irqsave(&lockres->l_lock, flags);

978

__lockres_clear_pending(lockres, generation, osb);

974

__lockres_clear_pending(lockres, generation, osb);

979

spin_unlock_irqrestore(&lockres->l_lock, flags);

975

spin_unlock_irqrestore(&lockres->l_lock, flags);

980

}

976

}

981

977

982

static unsigned int lockres_set_pending(struct ocfs2_lock_res *lockres)

978

static unsigned int lockres_set_pending(struct ocfs2_lock_res *lockres)

983

{

979

{

984

assert_spin_locked(&lockres->l_lock);

980

assert_spin_locked(&lockres->l_lock);

985

BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BUSY));

981

BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BUSY));

986

982

987

lockres_or_flags(lockres, OCFS2_LOCK_PENDING);

983

lockres_or_flags(lockres, OCFS2_LOCK_PENDING);

988

984

989

return lockres->l_pending_gen;

985

return lockres->l_pending_gen;

990

}

986

}

991

987

992

988

993

static void ocfs2_blocking_ast(void *opaque, int level)

989

static void ocfs2_blocking_ast(void *opaque, int level)

994

{

990

{

995

struct ocfs2_lock_res *lockres = opaque;

991

struct ocfs2_lock_res *lockres = opaque;

996

struct ocfs2_super *osb = ocfs2_get_lockres_osb(lockres);

992

struct ocfs2_super *osb = ocfs2_get_lockres_osb(lockres);

997

int needs_downconvert;

993

int needs_downconvert;

998

unsigned long flags;

994

unsigned long flags;

999

995

1000

BUG_ON(level <= DLM_LOCK_NL);

996

BUG_ON(level <= DLM_LOCK_NL);

1001

997

1002

mlog(0, "BAST fired for lockres %s, blocking %d, level %d type %s\n",

998

mlog(0, "BAST fired for lockres %s, blocking %d, level %d type %s\n",

1003

lockres->l_name, level, lockres->l_level,

999

lockres->l_name, level, lockres->l_level,

1004

ocfs2_lock_type_string(lockres->l_type));

1000

ocfs2_lock_type_string(lockres->l_type));

1005

1001

1006

/*

1002

/*

1007

* We can skip the bast for locks which don't enable caching -

1003

* We can skip the bast for locks which don't enable caching -

1008

* they'll be dropped at the earliest possible time anyway.

1004

* they'll be dropped at the earliest possible time anyway.

1009

*/

1005

*/

1010

if (lockres->l_flags & OCFS2_LOCK_NOCACHE)

1006

if (lockres->l_flags & OCFS2_LOCK_NOCACHE)

1011

return;

1007

return;

1012

1008

1013

spin_lock_irqsave(&lockres->l_lock, flags);

1009

spin_lock_irqsave(&lockres->l_lock, flags);

1014

needs_downconvert = ocfs2_generic_handle_bast(lockres, level);

1010

needs_downconvert = ocfs2_generic_handle_bast(lockres, level);

1015

if (needs_downconvert)

1011

if (needs_downconvert)

1016

ocfs2_schedule_blocked_lock(osb, lockres);

1012

ocfs2_schedule_blocked_lock(osb, lockres);

1017

spin_unlock_irqrestore(&lockres->l_lock, flags);

1013

spin_unlock_irqrestore(&lockres->l_lock, flags);

1018

1014

1019

wake_up(&lockres->l_event);

1015

wake_up(&lockres->l_event);

1020

1016

1021

ocfs2_wake_downconvert_thread(osb);

1017

ocfs2_wake_downconvert_thread(osb);

1022

}

1018

}

1023

1019

1024

static void ocfs2_locking_ast(void *opaque)

1020

static void ocfs2_locking_ast(void *opaque)

1025

{

1021

{

1026

struct ocfs2_lock_res *lockres = opaque;

1022

struct ocfs2_lock_res *lockres = opaque;

1027

struct ocfs2_super *osb = ocfs2_get_lockres_osb(lockres);

1023

struct ocfs2_super *osb = ocfs2_get_lockres_osb(lockres);

1028

unsigned long flags;

1024

unsigned long flags;

1029

int status;

1025

int status;

1030

1026

1031

spin_lock_irqsave(&lockres->l_lock, flags);

1027

spin_lock_irqsave(&lockres->l_lock, flags);

1032

1028

1033

status = ocfs2_dlm_lock_status(&lockres->l_lksb);

1029

status = ocfs2_dlm_lock_status(&lockres->l_lksb);

1034

1030

1035

if (status == -EAGAIN) {

1031

if (status == -EAGAIN) {

1036

lockres_clear_flags(lockres, OCFS2_LOCK_BUSY);

1032

lockres_clear_flags(lockres, OCFS2_LOCK_BUSY);

1037

goto out;

1033

goto out;

1038

}

1034

}

1039

1035

1040

if (status) {

1036

if (status) {

1041

mlog(ML_ERROR, "lockres %s: lksb status value of %d!\n",

1037

mlog(ML_ERROR, "lockres %s: lksb status value of %d!\n",

1042

lockres->l_name, status);

1038

lockres->l_name, status);

1043

spin_unlock_irqrestore(&lockres->l_lock, flags);

1039

spin_unlock_irqrestore(&lockres->l_lock, flags);

1044

return;

1040

return;

1045

}

1041

}

1046

1042

1047

switch(lockres->l_action) {

1043

switch(lockres->l_action) {

1048

case OCFS2_AST_ATTACH:

1044

case OCFS2_AST_ATTACH:

1049

ocfs2_generic_handle_attach_action(lockres);

1045

ocfs2_generic_handle_attach_action(lockres);

1050

lockres_clear_flags(lockres, OCFS2_LOCK_LOCAL);

1046

lockres_clear_flags(lockres, OCFS2_LOCK_LOCAL);

1051

break;

1047

break;

1052

case OCFS2_AST_CONVERT:

1048

case OCFS2_AST_CONVERT:

1053

ocfs2_generic_handle_convert_action(lockres);

1049

ocfs2_generic_handle_convert_action(lockres);

1054

break;

1050

break;

1055

case OCFS2_AST_DOWNCONVERT:

1051

case OCFS2_AST_DOWNCONVERT:

1056

ocfs2_generic_handle_downconvert_action(lockres);

1052

ocfs2_generic_handle_downconvert_action(lockres);

1057

break;

1053

break;

1058

default:

1054

default:

1059

mlog(ML_ERROR, "lockres %s: ast fired with invalid action: %u "

1055

mlog(ML_ERROR, "lockres %s: ast fired with invalid action: %u "

1060

"lockres flags = 0x%lx, unlock action: %u\n",

1056

"lockres flags = 0x%lx, unlock action: %u\n",

1061

lockres->l_name, lockres->l_action, lockres->l_flags,

1057

lockres->l_name, lockres->l_action, lockres->l_flags,

1062

lockres->l_unlock_action);

1058

lockres->l_unlock_action);

1063

BUG();

1059

BUG();

1064

}

1060

}

1065

out:

1061

out:

1066

/* set it to something invalid so if we get called again we

1062

/* set it to something invalid so if we get called again we

1067

* can catch it. */

1063

* can catch it. */

1068

lockres->l_action = OCFS2_AST_INVALID;

1064

lockres->l_action = OCFS2_AST_INVALID;

1069

1065

1070

/* Did we try to cancel this lock? Clear that state */

1066

/* Did we try to cancel this lock? Clear that state */

1071

if (lockres->l_unlock_action == OCFS2_UNLOCK_CANCEL_CONVERT)

1067

if (lockres->l_unlock_action == OCFS2_UNLOCK_CANCEL_CONVERT)

1072

lockres->l_unlock_action = OCFS2_UNLOCK_INVALID;

1068

lockres->l_unlock_action = OCFS2_UNLOCK_INVALID;

1073

1069

1074

/*

1070

/*

1075

* We may have beaten the locking functions here. We certainly

1071

* We may have beaten the locking functions here. We certainly

1076

* know that dlm_lock() has been called :-)

1072

* know that dlm_lock() has been called :-)

1077

* Because we can't have two lock calls in flight at once, we

1073

* Because we can't have two lock calls in flight at once, we

1078

* can use lockres->l_pending_gen.

1074

* can use lockres->l_pending_gen.

1079

*/

1075

*/

1080

__lockres_clear_pending(lockres, lockres->l_pending_gen, osb);

1076

__lockres_clear_pending(lockres, lockres->l_pending_gen, osb);

1081

1077

1082

wake_up(&lockres->l_event);

1078

wake_up(&lockres->l_event);

1083

spin_unlock_irqrestore(&lockres->l_lock, flags);

1079

spin_unlock_irqrestore(&lockres->l_lock, flags);

1084

}

1080

}

1085

1081

1086

static inline void ocfs2_recover_from_dlm_error(struct ocfs2_lock_res *lockres,

1082

static inline void ocfs2_recover_from_dlm_error(struct ocfs2_lock_res *lockres,

1087

int convert)

1083

int convert)

1088

{

1084

{

1089

unsigned long flags;

1085

unsigned long flags;

1090

1086

1091

mlog_entry_void();

1087

mlog_entry_void();

1092

spin_lock_irqsave(&lockres->l_lock, flags);

1088

spin_lock_irqsave(&lockres->l_lock, flags);

1093

lockres_clear_flags(lockres, OCFS2_LOCK_BUSY);

1089

lockres_clear_flags(lockres, OCFS2_LOCK_BUSY);

1094

if (convert)

1090

if (convert)

1095

lockres->l_action = OCFS2_AST_INVALID;

1091

lockres->l_action = OCFS2_AST_INVALID;

1096

else

1092

else

1097

lockres->l_unlock_action = OCFS2_UNLOCK_INVALID;

1093

lockres->l_unlock_action = OCFS2_UNLOCK_INVALID;

1098

spin_unlock_irqrestore(&lockres->l_lock, flags);

1094

spin_unlock_irqrestore(&lockres->l_lock, flags);

1099

1095

1100

wake_up(&lockres->l_event);

1096

wake_up(&lockres->l_event);

1101

mlog_exit_void();

1097

mlog_exit_void();

1102

}

1098

}

1103

1099

1104

/* Note: If we detect another process working on the lock (i.e.,

1100

/* Note: If we detect another process working on the lock (i.e.,

1105

* OCFS2_LOCK_BUSY), we'll bail out returning 0. It's up to the caller

1101

* OCFS2_LOCK_BUSY), we'll bail out returning 0. It's up to the caller

1106

* to do the right thing in that case.

1102

* to do the right thing in that case.

1107

*/

1103

*/

1108

static int ocfs2_lock_create(struct ocfs2_super *osb,

1104

static int ocfs2_lock_create(struct ocfs2_super *osb,

1109

struct ocfs2_lock_res *lockres,

1105

struct ocfs2_lock_res *lockres,

1110

int level,

1106

int level,

1111

u32 dlm_flags)

1107

u32 dlm_flags)

1112

{

1108

{

1113

int ret = 0;

1109

int ret = 0;

1114

unsigned long flags;

1110

unsigned long flags;

1115

unsigned int gen;

1111

unsigned int gen;

1116

1112

1117

mlog_entry_void();

1113

mlog_entry_void();

1118

1114

1119

mlog(0, "lock %s, level = %d, flags = %u\n", lockres->l_name, level,

1115

mlog(0, "lock %s, level = %d, flags = %u\n", lockres->l_name, level,

1120

dlm_flags);

1116

dlm_flags);

1121

1117

1122

spin_lock_irqsave(&lockres->l_lock, flags);

1118

spin_lock_irqsave(&lockres->l_lock, flags);

1123

if ((lockres->l_flags & OCFS2_LOCK_ATTACHED) ||

1119

if ((lockres->l_flags & OCFS2_LOCK_ATTACHED) ||

1124

(lockres->l_flags & OCFS2_LOCK_BUSY)) {

1120

(lockres->l_flags & OCFS2_LOCK_BUSY)) {

1125

spin_unlock_irqrestore(&lockres->l_lock, flags);

1121

spin_unlock_irqrestore(&lockres->l_lock, flags);

1126

goto bail;

1122

goto bail;

1127

}

1123

}

1128

1124

1129

lockres->l_action = OCFS2_AST_ATTACH;

1125

lockres->l_action = OCFS2_AST_ATTACH;

1130

lockres->l_requested = level;

1126

lockres->l_requested = level;

1131

lockres_or_flags(lockres, OCFS2_LOCK_BUSY);

1127

lockres_or_flags(lockres, OCFS2_LOCK_BUSY);

1132

gen = lockres_set_pending(lockres);

1128

gen = lockres_set_pending(lockres);

1133

spin_unlock_irqrestore(&lockres->l_lock, flags);

1129

spin_unlock_irqrestore(&lockres->l_lock, flags);

1134

1130

1135

ret = ocfs2_dlm_lock(osb->cconn,

1131

ret = ocfs2_dlm_lock(osb->cconn,

1136

level,

1132

level,

1137

&lockres->l_lksb,

1133

&lockres->l_lksb,

1138

dlm_flags,

1134

dlm_flags,

1139

lockres->l_name,

1135

lockres->l_name,

1140

OCFS2_LOCK_ID_MAX_LEN - 1,

1136

OCFS2_LOCK_ID_MAX_LEN - 1,

1141

lockres);

1137

lockres);

1142

lockres_clear_pending(lockres, gen, osb);

1138

lockres_clear_pending(lockres, gen, osb);

1143

if (ret) {

1139

if (ret) {

1144

ocfs2_log_dlm_error("ocfs2_dlm_lock", ret, lockres);

1140

ocfs2_log_dlm_error("ocfs2_dlm_lock", ret, lockres);

1145

ocfs2_recover_from_dlm_error(lockres, 1);

1141

ocfs2_recover_from_dlm_error(lockres, 1);

1146

}

1142

}

1147

1143

1148

mlog(0, "lock %s, return from ocfs2_dlm_lock\n", lockres->l_name);

1144

mlog(0, "lock %s, return from ocfs2_dlm_lock\n", lockres->l_name);

1149

1145

1150

bail:

1146

bail:

1151

mlog_exit(ret);

1147

mlog_exit(ret);

1152

return ret;

1148

return ret;

1153

}

1149

}

1154

1150

1155

static inline int ocfs2_check_wait_flag(struct ocfs2_lock_res *lockres,

1151

static inline int ocfs2_check_wait_flag(struct ocfs2_lock_res *lockres,

1156

int flag)

1152

int flag)

1157

{

1153

{

1158

unsigned long flags;

1154

unsigned long flags;

1159

int ret;

1155

int ret;

1160

1156

1161

spin_lock_irqsave(&lockres->l_lock, flags);

1157

spin_lock_irqsave(&lockres->l_lock, flags);

1162

ret = lockres->l_flags & flag;

1158

ret = lockres->l_flags & flag;

1163

spin_unlock_irqrestore(&lockres->l_lock, flags);

1159

spin_unlock_irqrestore(&lockres->l_lock, flags);

1164

1160

1165

return ret;

1161

return ret;

1166

}

1162

}

1167

1163

1168

static inline void ocfs2_wait_on_busy_lock(struct ocfs2_lock_res *lockres)

1164

static inline void ocfs2_wait_on_busy_lock(struct ocfs2_lock_res *lockres)

1169

1165

1170

{

1166

{

1171

wait_event(lockres->l_event,

1167

wait_event(lockres->l_event,

1172

!ocfs2_check_wait_flag(lockres, OCFS2_LOCK_BUSY));

1168

!ocfs2_check_wait_flag(lockres, OCFS2_LOCK_BUSY));

1173

}

1169

}

1174

1170

1175

static inline void ocfs2_wait_on_refreshing_lock(struct ocfs2_lock_res *lockres)

1171

static inline void ocfs2_wait_on_refreshing_lock(struct ocfs2_lock_res *lockres)

1176

1172

1177

{

1173

{

1178

wait_event(lockres->l_event,

1174

wait_event(lockres->l_event,

1179

!ocfs2_check_wait_flag(lockres, OCFS2_LOCK_REFRESHING));

1175

!ocfs2_check_wait_flag(lockres, OCFS2_LOCK_REFRESHING));

1180

}

1176

}

1181

1177

1182

/* predict what lock level we'll be dropping down to on behalf

1178

/* predict what lock level we'll be dropping down to on behalf

1183

* of another node, and return true if the currently wanted

1179

* of another node, and return true if the currently wanted

1184

* level will be compatible with it. */

1180

* level will be compatible with it. */

1185

static inline int ocfs2_may_continue_on_blocked_lock(struct ocfs2_lock_res *lockres,

1181

static inline int ocfs2_may_continue_on_blocked_lock(struct ocfs2_lock_res *lockres,

1186

int wanted)

1182

int wanted)

1187

{

1183

{

1188

BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BLOCKED));

1184

BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BLOCKED));

1189

1185

1190

return wanted <= ocfs2_highest_compat_lock_level(lockres->l_blocking);

1186

return wanted <= ocfs2_highest_compat_lock_level(lockres->l_blocking);

1191

}

1187

}

1192

1188

1193

static void ocfs2_init_mask_waiter(struct ocfs2_mask_waiter *mw)

1189

static void ocfs2_init_mask_waiter(struct ocfs2_mask_waiter *mw)

1194

{

1190

{

1195

INIT_LIST_HEAD(&mw->mw_item);

1191

INIT_LIST_HEAD(&mw->mw_item);

1196

init_completion(&mw->mw_complete);

1192

init_completion(&mw->mw_complete);

1197

ocfs2_init_start_time(mw);

1193

ocfs2_init_start_time(mw);

1198

}

1194

}

1199

1195

1200

static int ocfs2_wait_for_mask(struct ocfs2_mask_waiter *mw)

1196

static int ocfs2_wait_for_mask(struct ocfs2_mask_waiter *mw)

1201

{

1197

{

1202

wait_for_completion(&mw->mw_complete);

1198

wait_for_completion(&mw->mw_complete);

1203

/* Re-arm the completion in case we want to wait on it again */

1199

/* Re-arm the completion in case we want to wait on it again */

1204

INIT_COMPLETION(mw->mw_complete);

1200

INIT_COMPLETION(mw->mw_complete);

1205

return mw->mw_status;

1201

return mw->mw_status;

1206

}

1202

}

1207

1203

1208

static void lockres_add_mask_waiter(struct ocfs2_lock_res *lockres,

1204

static void lockres_add_mask_waiter(struct ocfs2_lock_res *lockres,

1209

struct ocfs2_mask_waiter *mw,

1205

struct ocfs2_mask_waiter *mw,

1210

unsigned long mask,

1206

unsigned long mask,

1211

unsigned long goal)

1207

unsigned long goal)

1212

{

1208

{

1213

BUG_ON(!list_empty(&mw->mw_item));

1209

BUG_ON(!list_empty(&mw->mw_item));

1214

1210

1215

assert_spin_locked(&lockres->l_lock);

1211

assert_spin_locked(&lockres->l_lock);

1216

1212

1217

list_add_tail(&mw->mw_item, &lockres->l_mask_waiters);

1213

list_add_tail(&mw->mw_item, &lockres->l_mask_waiters);

1218

mw->mw_mask = mask;

1214

mw->mw_mask = mask;

1219

mw->mw_goal = goal;

1215

mw->mw_goal = goal;

1220

}

1216

}

1221

1217

1222

/* returns 0 if the mw that was removed was already satisfied, -EBUSY

1218

/* returns 0 if the mw that was removed was already satisfied, -EBUSY

1223

* if the mask still hadn't reached its goal */

1219

* if the mask still hadn't reached its goal */

1224

static int lockres_remove_mask_waiter(struct ocfs2_lock_res *lockres,

1220

static int lockres_remove_mask_waiter(struct ocfs2_lock_res *lockres,

1225

struct ocfs2_mask_waiter *mw)

1221

struct ocfs2_mask_waiter *mw)

1226

{

1222

{

1227

unsigned long flags;

1223

unsigned long flags;

1228

int ret = 0;

1224

int ret = 0;

1229

1225

1230

spin_lock_irqsave(&lockres->l_lock, flags);

1226

spin_lock_irqsave(&lockres->l_lock, flags);

1231

if (!list_empty(&mw->mw_item)) {

1227

if (!list_empty(&mw->mw_item)) {

1232

if ((lockres->l_flags & mw->mw_mask) != mw->mw_goal)

1228

if ((lockres->l_flags & mw->mw_mask) != mw->mw_goal)

1233

ret = -EBUSY;

1229

ret = -EBUSY;

1234

1230

1235

list_del_init(&mw->mw_item);

1231

list_del_init(&mw->mw_item);

1236

init_completion(&mw->mw_complete);

1232

init_completion(&mw->mw_complete);

1237

}

1233

}

1238

spin_unlock_irqrestore(&lockres->l_lock, flags);

1234

spin_unlock_irqrestore(&lockres->l_lock, flags);

1239

1235

1240

return ret;

1236

return ret;

1241

1237

1242

}

1238

}

1243

1239

1244

static int ocfs2_wait_for_mask_interruptible(struct ocfs2_mask_waiter *mw,

1240

static int ocfs2_wait_for_mask_interruptible(struct ocfs2_mask_waiter *mw,

1245

struct ocfs2_lock_res *lockres)

1241

struct ocfs2_lock_res *lockres)

1246

{

1242

{

1247

int ret;

1243

int ret;

1248

1244

1249

ret = wait_for_completion_interruptible(&mw->mw_complete);

1245

ret = wait_for_completion_interruptible(&mw->mw_complete);

1250

if (ret)

1246

if (ret)

1251

lockres_remove_mask_waiter(lockres, mw);

1247

lockres_remove_mask_waiter(lockres, mw);

1252

else

1248

else

1253

ret = mw->mw_status;

1249

ret = mw->mw_status;

1254

/* Re-arm the completion in case we want to wait on it again */

1250

/* Re-arm the completion in case we want to wait on it again */

1255

INIT_COMPLETION(mw->mw_complete);

1251

INIT_COMPLETION(mw->mw_complete);

1256

return ret;

1252

return ret;

1257

}

1253

}

1258

1254

1259

static int ocfs2_cluster_lock(struct ocfs2_super *osb,

1255

static int ocfs2_cluster_lock(struct ocfs2_super *osb,

1260

struct ocfs2_lock_res *lockres,

1256

struct ocfs2_lock_res *lockres,

1261

int level,

1257

int level,

1262

u32 lkm_flags,

1258

u32 lkm_flags,

1263

int arg_flags)

1259

int arg_flags)

1264

{

1260

{

1265

struct ocfs2_mask_waiter mw;

1261

struct ocfs2_mask_waiter mw;

1266

int wait, catch_signals = !(osb->s_mount_opt & OCFS2_MOUNT_NOINTR);

1262

int wait, catch_signals = !(osb->s_mount_opt & OCFS2_MOUNT_NOINTR);

1267

int ret = 0; /* gcc doesn't realize wait = 1 guarantees ret is set */

1263

int ret = 0; /* gcc doesn't realize wait = 1 guarantees ret is set */

1268

unsigned long flags;

1264

unsigned long flags;

1269

unsigned int gen;

1265

unsigned int gen;

1270

int noqueue_attempted = 0;

1266

int noqueue_attempted = 0;

1271

1267

1272

mlog_entry_void();

1268

mlog_entry_void();

1273

1269

1274

ocfs2_init_mask_waiter(&mw);

1270

ocfs2_init_mask_waiter(&mw);

1275

1271

1276

if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB)

1272

if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB)

1277

lkm_flags |= DLM_LKF_VALBLK;

1273

lkm_flags |= DLM_LKF_VALBLK;

1278

1274

1279

again:

1275

again:

1280

wait = 0;

1276

wait = 0;

1281

1277

1282

if (catch_signals && signal_pending(current)) {

1278

if (catch_signals && signal_pending(current)) {

1283

ret = -ERESTARTSYS;

1279

ret = -ERESTARTSYS;

1284

goto out;

1280

goto out;

1285

}

1281

}

1286

1282

1287

spin_lock_irqsave(&lockres->l_lock, flags);

1283

spin_lock_irqsave(&lockres->l_lock, flags);

1288

1284

1289

mlog_bug_on_msg(lockres->l_flags & OCFS2_LOCK_FREEING,

1285

mlog_bug_on_msg(lockres->l_flags & OCFS2_LOCK_FREEING,

1290

"Cluster lock called on freeing lockres %s! flags "

1286

"Cluster lock called on freeing lockres %s! flags "

1291

"0x%lx\n", lockres->l_name, lockres->l_flags);

1287

"0x%lx\n", lockres->l_name, lockres->l_flags);

1292

1288

1293

/* We only compare against the currently granted level

1289

/* We only compare against the currently granted level

1294

* here. If the lock is blocked waiting on a downconvert,

1290

* here. If the lock is blocked waiting on a downconvert,

1295

* we'll get caught below. */

1291

* we'll get caught below. */

1296

if (lockres->l_flags & OCFS2_LOCK_BUSY &&

1292

if (lockres->l_flags & OCFS2_LOCK_BUSY &&

1297

level > lockres->l_level) {

1293

level > lockres->l_level) {

1298

/* is someone sitting in dlm_lock? If so, wait on

1294

/* is someone sitting in dlm_lock? If so, wait on

1299

* them. */

1295

* them. */

1300

lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0);

1296

lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0);

1301

wait = 1;

1297

wait = 1;

1302

goto unlock;

1298

goto unlock;

1303

}

1299

}

1304

1300

1305

if (lockres->l_flags & OCFS2_LOCK_BLOCKED &&

1301

if (lockres->l_flags & OCFS2_LOCK_BLOCKED &&

1306

!ocfs2_may_continue_on_blocked_lock(lockres, level)) {

1302

!ocfs2_may_continue_on_blocked_lock(lockres, level)) {

1307

/* is the lock is currently blocked on behalf of

1303

/* is the lock is currently blocked on behalf of

1308

* another node */

1304

* another node */

1309

lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BLOCKED, 0);

1305

lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BLOCKED, 0);

1310

wait = 1;

1306

wait = 1;

1311

goto unlock;

1307

goto unlock;

1312

}

1308

}

1313

1309

1314

if (level > lockres->l_level) {

1310

if (level > lockres->l_level) {

1315

if (noqueue_attempted > 0) {

1311

if (noqueue_attempted > 0) {

1316

ret = -EAGAIN;

1312

ret = -EAGAIN;

1317

goto unlock;

1313

goto unlock;

1318

}

1314

}

1319

if (lkm_flags & DLM_LKF_NOQUEUE)

1315

if (lkm_flags & DLM_LKF_NOQUEUE)

1320

noqueue_attempted = 1;

1316

noqueue_attempted = 1;

1321

1317

1322

if (lockres->l_action != OCFS2_AST_INVALID)

1318

if (lockres->l_action != OCFS2_AST_INVALID)

1323

mlog(ML_ERROR, "lockres %s has action %u pending\n",

1319

mlog(ML_ERROR, "lockres %s has action %u pending\n",

1324

lockres->l_name, lockres->l_action);

1320

lockres->l_name, lockres->l_action);

1325

1321

1326

if (!(lockres->l_flags & OCFS2_LOCK_ATTACHED)) {

1322

if (!(lockres->l_flags & OCFS2_LOCK_ATTACHED)) {

1327

lockres->l_action = OCFS2_AST_ATTACH;

1323

lockres->l_action = OCFS2_AST_ATTACH;

1328

lkm_flags &= ~DLM_LKF_CONVERT;

1324

lkm_flags &= ~DLM_LKF_CONVERT;

1329

} else {

1325

} else {

1330

lockres->l_action = OCFS2_AST_CONVERT;

1326

lockres->l_action = OCFS2_AST_CONVERT;

1331

lkm_flags |= DLM_LKF_CONVERT;

1327

lkm_flags |= DLM_LKF_CONVERT;

1332

}

1328

}

1333

1329

1334

lockres->l_requested = level;

1330

lockres->l_requested = level;

1335

lockres_or_flags(lockres, OCFS2_LOCK_BUSY);

1331

lockres_or_flags(lockres, OCFS2_LOCK_BUSY);

1336

gen = lockres_set_pending(lockres);

1332

gen = lockres_set_pending(lockres);

1337

spin_unlock_irqrestore(&lockres->l_lock, flags);

1333

spin_unlock_irqrestore(&lockres->l_lock, flags);

1338

1334

1339

BUG_ON(level == DLM_LOCK_IV);

1335

BUG_ON(level == DLM_LOCK_IV);

1340

BUG_ON(level == DLM_LOCK_NL);

1336

BUG_ON(level == DLM_LOCK_NL);

1341

1337

1342

mlog(0, "lock %s, convert from %d to level = %d\n",

1338

mlog(0, "lock %s, convert from %d to level = %d\n",

1343

lockres->l_name, lockres->l_level, level);

1339

lockres->l_name, lockres->l_level, level);

1344

1340

1345

/* call dlm_lock to upgrade lock now */

1341

/* call dlm_lock to upgrade lock now */

1346

ret = ocfs2_dlm_lock(osb->cconn,

1342

ret = ocfs2_dlm_lock(osb->cconn,

1347

level,

1343

level,

1348

&lockres->l_lksb,

1344

&lockres->l_lksb,

1349

lkm_flags,

1345

lkm_flags,

1350

lockres->l_name,

1346

lockres->l_name,

1351

OCFS2_LOCK_ID_MAX_LEN - 1,

1347

OCFS2_LOCK_ID_MAX_LEN - 1,

1352

lockres);

1348

lockres);

1353

lockres_clear_pending(lockres, gen, osb);

1349

lockres_clear_pending(lockres, gen, osb);

1354

if (ret) {

1350

if (ret) {

1355

if (!(lkm_flags & DLM_LKF_NOQUEUE) ||

1351

if (!(lkm_flags & DLM_LKF_NOQUEUE) ||

1356

(ret != -EAGAIN)) {

1352

(ret != -EAGAIN)) {

1357

ocfs2_log_dlm_error("ocfs2_dlm_lock",

1353

ocfs2_log_dlm_error("ocfs2_dlm_lock",

1358

ret, lockres);

1354

ret, lockres);

1359

}

1355

}

1360

ocfs2_recover_from_dlm_error(lockres, 1);

1356

ocfs2_recover_from_dlm_error(lockres, 1);

1361

goto out;

1357

goto out;

1362

}

1358

}

1363

1359

1364

mlog(0, "lock %s, successful return from ocfs2_dlm_lock\n",

1360

mlog(0, "lock %s, successful return from ocfs2_dlm_lock\n",

1365

lockres->l_name);

1361

lockres->l_name);

1366

1362

1367

/* At this point we've gone inside the dlm and need to

1363

/* At this point we've gone inside the dlm and need to

1368

* complete our work regardless. */

1364

* complete our work regardless. */

1369

catch_signals = 0;

1365

catch_signals = 0;

1370

1366

1371

/* wait for busy to clear and carry on */

1367

/* wait for busy to clear and carry on */

1372

goto again;

1368

goto again;

1373

}

1369

}

1374

1370

1375

/* Ok, if we get here then we're good to go. */

1371

/* Ok, if we get here then we're good to go. */

1376

ocfs2_inc_holders(lockres, level);

1372

ocfs2_inc_holders(lockres, level);

1377

1373

1378

ret = 0;

1374

ret = 0;

1379

unlock:

1375

unlock:

1380

spin_unlock_irqrestore(&lockres->l_lock, flags);

1376

spin_unlock_irqrestore(&lockres->l_lock, flags);

1381

out:

1377

out:

1382

/*

1378

/*

1383

* This is helping work around a lock inversion between the page lock

1379

* This is helping work around a lock inversion between the page lock

1384

* and dlm locks. One path holds the page lock while calling aops

1380

* and dlm locks. One path holds the page lock while calling aops

1385

* which block acquiring dlm locks. The voting thread holds dlm

1381

* which block acquiring dlm locks. The voting thread holds dlm

1386

* locks while acquiring page locks while down converting data locks.

1382

* locks while acquiring page locks while down converting data locks.

1387

* This block is helping an aop path notice the inversion and back

1383

* This block is helping an aop path notice the inversion and back

1388

* off to unlock its page lock before trying the dlm lock again.

1384

* off to unlock its page lock before trying the dlm lock again.

1389

*/

1385

*/

1390

if (wait && arg_flags & OCFS2_LOCK_NONBLOCK &&

1386

if (wait && arg_flags & OCFS2_LOCK_NONBLOCK &&

1391

mw.mw_mask & (OCFS2_LOCK_BUSY|OCFS2_LOCK_BLOCKED)) {

1387

mw.mw_mask & (OCFS2_LOCK_BUSY|OCFS2_LOCK_BLOCKED)) {

1392

wait = 0;

1388

wait = 0;

1393

if (lockres_remove_mask_waiter(lockres, &mw))

1389

if (lockres_remove_mask_waiter(lockres, &mw))

1394

ret = -EAGAIN;

1390

ret = -EAGAIN;

1395

else

1391

else

1396

goto again;

1392

goto again;

1397

}

1393

}

1398

if (wait) {

1394

if (wait) {

1399

ret = ocfs2_wait_for_mask(&mw);

1395

ret = ocfs2_wait_for_mask(&mw);

1400

if (ret == 0)

1396

if (ret == 0)

1401

goto again;

1397

goto again;

1402

mlog_errno(ret);

1398

mlog_errno(ret);

1403

}

1399

}

1404

ocfs2_update_lock_stats(lockres, level, &mw, ret);

1400

ocfs2_update_lock_stats(lockres, level, &mw, ret);

1405

1401

1406

mlog_exit(ret);

1402

mlog_exit(ret);

1407

return ret;

1403

return ret;

1408

}

1404

}

1409

1405

1410

static void ocfs2_cluster_unlock(struct ocfs2_super *osb,

1406

static void ocfs2_cluster_unlock(struct ocfs2_super *osb,

1411

struct ocfs2_lock_res *lockres,

1407

struct ocfs2_lock_res *lockres,

1412

int level)

1408

int level)

1413

{

1409

{

1414

unsigned long flags;

1410

unsigned long flags;

1415

1411

1416

mlog_entry_void();

1412

mlog_entry_void();

1417

spin_lock_irqsave(&lockres->l_lock, flags);

1413

spin_lock_irqsave(&lockres->l_lock, flags);

1418

ocfs2_dec_holders(lockres, level);

1414

ocfs2_dec_holders(lockres, level);

1419

ocfs2_downconvert_on_unlock(osb, lockres);

1415

ocfs2_downconvert_on_unlock(osb, lockres);

1420

spin_unlock_irqrestore(&lockres->l_lock, flags);

1416

spin_unlock_irqrestore(&lockres->l_lock, flags);

1421

mlog_exit_void();

1417

mlog_exit_void();

1422

}

1418

}

1423

1419

1424

static int ocfs2_create_new_lock(struct ocfs2_super *osb,

1420

static int ocfs2_create_new_lock(struct ocfs2_super *osb,

1425

struct ocfs2_lock_res *lockres,

1421

struct ocfs2_lock_res *lockres,

1426

int ex,

1422

int ex,

1427

int local)

1423

int local)

1428

{

1424

{

1429

int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;

1425

int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;

1430

unsigned long flags;

1426

unsigned long flags;

1431

u32 lkm_flags = local ? DLM_LKF_LOCAL : 0;

1427

u32 lkm_flags = local ? DLM_LKF_LOCAL : 0;

1432

1428

1433

spin_lock_irqsave(&lockres->l_lock, flags);

1429

spin_lock_irqsave(&lockres->l_lock, flags);

1434

BUG_ON(lockres->l_flags & OCFS2_LOCK_ATTACHED);

1430

BUG_ON(lockres->l_flags & OCFS2_LOCK_ATTACHED);

1435

lockres_or_flags(lockres, OCFS2_LOCK_LOCAL);

1431

lockres_or_flags(lockres, OCFS2_LOCK_LOCAL);

1436

spin_unlock_irqrestore(&lockres->l_lock, flags);

1432

spin_unlock_irqrestore(&lockres->l_lock, flags);

1437

1433

1438

return ocfs2_lock_create(osb, lockres, level, lkm_flags);

1434

return ocfs2_lock_create(osb, lockres, level, lkm_flags);

1439

}

1435

}

1440

1436

1441

/* Grants us an EX lock on the data and metadata resources, skipping

1437

/* Grants us an EX lock on the data and metadata resources, skipping

1442

* the normal cluster directory lookup. Use this ONLY on newly created

1438

* the normal cluster directory lookup. Use this ONLY on newly created

1443

* inodes which other nodes can't possibly see, and which haven't been

1439

* inodes which other nodes can't possibly see, and which haven't been

1444

* hashed in the inode hash yet. This can give us a good performance

1440

* hashed in the inode hash yet. This can give us a good performance

1445

* increase as it'll skip the network broadcast normally associated

1441

* increase as it'll skip the network broadcast normally associated

1446

* with creating a new lock resource. */

1442

* with creating a new lock resource. */

1447

int ocfs2_create_new_inode_locks(struct inode *inode)

1443

int ocfs2_create_new_inode_locks(struct inode *inode)

1448

{

1444

{

1449

int ret;

1445

int ret;

1450

struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);

1446

struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);

1451

1447

1452

BUG_ON(!inode);

1448

BUG_ON(!inode);

1453

BUG_ON(!ocfs2_inode_is_new(inode));

1449

BUG_ON(!ocfs2_inode_is_new(inode));

1454

1450

1455

mlog_entry_void();

1451

mlog_entry_void();

1456

1452

1457

mlog(0, "Inode %llu\n", (unsigned long long)OCFS2_I(inode)->ip_blkno);

1453

mlog(0, "Inode %llu\n", (unsigned long long)OCFS2_I(inode)->ip_blkno);

1458

1454

1459

/* NOTE: That we don't increment any of the holder counts, nor

1455

/* NOTE: That we don't increment any of the holder counts, nor

1460

* do we add anything to a journal handle. Since this is

1456

* do we add anything to a journal handle. Since this is

1461

* supposed to be a new inode which the cluster doesn't know

1457

* supposed to be a new inode which the cluster doesn't know

1462

* about yet, there is no need to. As far as the LVB handling

1458

* about yet, there is no need to. As far as the LVB handling

1463

* is concerned, this is basically like acquiring an EX lock

1459

* is concerned, this is basically like acquiring an EX lock

1464

* on a resource which has an invalid one -- we'll set it

1460

* on a resource which has an invalid one -- we'll set it

1465

* valid when we release the EX. */

1461

* valid when we release the EX. */

1466

1462

1467

ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_rw_lockres, 1, 1);

1463

ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_rw_lockres, 1, 1);

1468

if (ret) {

1464

if (ret) {

1469

mlog_errno(ret);

1465

mlog_errno(ret);

1470

goto bail;

1466

goto bail;

1471

}

1467

}

1472

1468

1473

/*

1469

/*

1474

* We don't want to use DLM_LKF_LOCAL on a meta data lock as they

1470

* We don't want to use DLM_LKF_LOCAL on a meta data lock as they

1475

* don't use a generation in their lock names.

1471

* don't use a generation in their lock names.

1476

*/

1472

*/

1477

ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_inode_lockres, 1, 0);

1473

ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_inode_lockres, 1, 0);

1478

if (ret) {

1474

if (ret) {

1479

mlog_errno(ret);

1475

mlog_errno(ret);

1480

goto bail;

1476

goto bail;

1481

}

1477

}

1482

1478

1483

ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_open_lockres, 0, 0);

1479

ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_open_lockres, 0, 0);

1484

if (ret) {

1480

if (ret) {

1485

mlog_errno(ret);

1481

mlog_errno(ret);

1486

goto bail;

1482

goto bail;

1487

}

1483

}

1488

1484

1489

bail:

1485

bail:

1490

mlog_exit(ret);

1486

mlog_exit(ret);

1491

return ret;

1487

return ret;

1492

}

1488

}

1493

1489

1494

int ocfs2_rw_lock(struct inode *inode, int write)

1490

int ocfs2_rw_lock(struct inode *inode, int write)

1495

{

1491

{

1496

int status, level;

1492

int status, level;

1497

struct ocfs2_lock_res *lockres;

1493

struct ocfs2_lock_res *lockres;

1498

struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);

1494

struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);

1499

1495

1500

BUG_ON(!inode);

1496

BUG_ON(!inode);

1501

1497

1502

mlog_entry_void();

1498

mlog_entry_void();

1503

1499

1504

mlog(0, "inode %llu take %s RW lock\n",

1500

mlog(0, "inode %llu take %s RW lock\n",

1505

(unsigned long long)OCFS2_I(inode)->ip_blkno,

1501

(unsigned long long)OCFS2_I(inode)->ip_blkno,

1506

write ? "EXMODE" : "PRMODE");

1502

write ? "EXMODE" : "PRMODE");

1507

1503

1508

if (ocfs2_mount_local(osb))

1504

if (ocfs2_mount_local(osb))

1509

return 0;

1505

return 0;

1510

1506

1511

lockres = &OCFS2_I(inode)->ip_rw_lockres;

1507

lockres = &OCFS2_I(inode)->ip_rw_lockres;

1512

1508

1513

level = write ? DLM_LOCK_EX : DLM_LOCK_PR;

1509

level = write ? DLM_LOCK_EX : DLM_LOCK_PR;

1514

1510

1515

status = ocfs2_cluster_lock(OCFS2_SB(inode->i_sb), lockres, level, 0,

1511

status = ocfs2_cluster_lock(OCFS2_SB(inode->i_sb), lockres, level, 0,

1516

0);

1512

0);

1517

if (status < 0)

1513

if (status < 0)

1518

mlog_errno(status);

1514

mlog_errno(status);

1519

1515

1520

mlog_exit(status);

1516

mlog_exit(status);

1521

return status;

1517

return status;

1522

}

1518

}

1523

1519

1524

void ocfs2_rw_unlock(struct inode *inode, int write)

1520

void ocfs2_rw_unlock(struct inode *inode, int write)

1525

{

1521

{

1526

int level = write ? DLM_LOCK_EX : DLM_LOCK_PR;

1522

int level = write ? DLM_LOCK_EX : DLM_LOCK_PR;

1527

struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_rw_lockres;

1523

struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_rw_lockres;

1528

struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);

1524

struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);

1529

1525

1530

mlog_entry_void();

1526

mlog_entry_void();

1531

1527

1532

mlog(0, "inode %llu drop %s RW lock\n",

1528

mlog(0, "inode %llu drop %s RW lock\n",

1533

(unsigned long long)OCFS2_I(inode)->ip_blkno,

1529

(unsigned long long)OCFS2_I(inode)->ip_blkno,

1534

write ? "EXMODE" : "PRMODE");

1530

write ? "EXMODE" : "PRMODE");

1535

1531

1536

if (!ocfs2_mount_local(osb))

1532

if (!ocfs2_mount_local(osb))

1537

ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres, level);

1533

ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres, level);

1538

1534

1539

mlog_exit_void();

1535

mlog_exit_void();

1540

}

1536

}

1541

1537

1542

/*

1538

/*

1543

* ocfs2_open_lock always get PR mode lock.

1539

* ocfs2_open_lock always get PR mode lock.

1544

*/

1540

*/

1545

int ocfs2_open_lock(struct inode *inode)

1541

int ocfs2_open_lock(struct inode *inode)

1546

{

1542

{

1547

int status = 0;

1543

int status = 0;

1548

struct ocfs2_lock_res *lockres;

1544

struct ocfs2_lock_res *lockres;

1549

struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);

1545

struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);

1550

1546

1551

BUG_ON(!inode);

1547

BUG_ON(!inode);

1552

1548

1553

mlog_entry_void();

1549

mlog_entry_void();

1554

1550

1555

mlog(0, "inode %llu take PRMODE open lock\n",

1551

mlog(0, "inode %llu take PRMODE open lock\n",

1556

(unsigned long long)OCFS2_I(inode)->ip_blkno);

1552

(unsigned long long)OCFS2_I(inode)->ip_blkno);

1557

1553

1558

if (ocfs2_mount_local(osb))

1554

if (ocfs2_mount_local(osb))

1559

goto out;

1555

goto out;

1560

1556

1561

lockres = &OCFS2_I(inode)->ip_open_lockres;

1557

lockres = &OCFS2_I(inode)->ip_open_lockres;

1562

1558

1563

status = ocfs2_cluster_lock(OCFS2_SB(inode->i_sb), lockres,

1559

status = ocfs2_cluster_lock(OCFS2_SB(inode->i_sb), lockres,

1564

DLM_LOCK_PR, 0, 0);

1560

DLM_LOCK_PR, 0, 0);

1565

if (status < 0)

1561

if (status < 0)

1566

mlog_errno(status);

1562

mlog_errno(status);

1567

1563

1568

out:

1564

out:

1569

mlog_exit(status);

1565

mlog_exit(status);

1570

return status;

1566

return status;

1571

}

1567

}

1572

1568

1573

int ocfs2_try_open_lock(struct inode *inode, int write)

1569

int ocfs2_try_open_lock(struct inode *inode, int write)

1574

{

1570

{

1575

int status = 0, level;

1571

int status = 0, level;

1576

struct ocfs2_lock_res *lockres;

1572

struct ocfs2_lock_res *lockres;

1577

struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);

1573

struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);

1578

1574

1579

BUG_ON(!inode);

1575

BUG_ON(!inode);

1580

1576

1581

mlog_entry_void();

1577

mlog_entry_void();

1582

1578

1583

mlog(0, "inode %llu try to take %s open lock\n",

1579

mlog(0, "inode %llu try to take %s open lock\n",

1584

(unsigned long long)OCFS2_I(inode)->ip_blkno,

1580

(unsigned long long)OCFS2_I(inode)->ip_blkno,

1585

write ? "EXMODE" : "PRMODE");

1581

write ? "EXMODE" : "PRMODE");

1586

1582

1587

if (ocfs2_mount_local(osb))

1583

if (ocfs2_mount_local(osb))

1588

goto out;

1584

goto out;

1589

1585

1590

lockres = &OCFS2_I(inode)->ip_open_lockres;

1586

lockres = &OCFS2_I(inode)->ip_open_lockres;

1591

1587

1592

level = write ? DLM_LOCK_EX : DLM_LOCK_PR;

1588

level = write ? DLM_LOCK_EX : DLM_LOCK_PR;

1593

1589

1594

/*

1590

/*

1595

* The file system may already holding a PRMODE/EXMODE open lock.

1591

* The file system may already holding a PRMODE/EXMODE open lock.

1596

* Since we pass DLM_LKF_NOQUEUE, the request won't block waiting on

1592

* Since we pass DLM_LKF_NOQUEUE, the request won't block waiting on

1597

* other nodes and the -EAGAIN will indicate to the caller that

1593

* other nodes and the -EAGAIN will indicate to the caller that

1598

* this inode is still in use.

1594

* this inode is still in use.

1599

*/

1595

*/

1600

status = ocfs2_cluster_lock(OCFS2_SB(inode->i_sb), lockres,

1596

status = ocfs2_cluster_lock(OCFS2_SB(inode->i_sb), lockres,

1601

level, DLM_LKF_NOQUEUE, 0);

1597

level, DLM_LKF_NOQUEUE, 0);

1602

1598

1603

out:

1599

out:

1604

mlog_exit(status);

1600

mlog_exit(status);

1605

return status;

1601

return status;

1606

}

1602

}

1607

1603

1608

/*

1604

/*

1609

* ocfs2_open_unlock unlock PR and EX mode open locks.

1605

* ocfs2_open_unlock unlock PR and EX mode open locks.

1610

*/

1606

*/

1611

void ocfs2_open_unlock(struct inode *inode)

1607

void ocfs2_open_unlock(struct inode *inode)

1612

{

1608

{

1613

struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_open_lockres;

1609

struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_open_lockres;

1614

struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);

1610

struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);

1615

1611

1616

mlog_entry_void();

1612

mlog_entry_void();

1617

1613

1618

mlog(0, "inode %llu drop open lock\n",

1614

mlog(0, "inode %llu drop open lock\n",

1619

(unsigned long long)OCFS2_I(inode)->ip_blkno);

1615

(unsigned long long)OCFS2_I(inode)->ip_blkno);

1620

1616

1621

if (ocfs2_mount_local(osb))

1617

if (ocfs2_mount_local(osb))

1622

goto out;

1618

goto out;

1623

1619

1624

if(lockres->l_ro_holders)

1620

if(lockres->l_ro_holders)

1625

ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres,

1621

ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres,

1626

DLM_LOCK_PR);

1622

DLM_LOCK_PR);

1627

if(lockres->l_ex_holders)

1623

if(lockres->l_ex_holders)

1628

ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres,

1624

ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres,

1629

DLM_LOCK_EX);

1625

DLM_LOCK_EX);

1630

1626

1631

out:

1627

out:

1632

mlog_exit_void();

1628

mlog_exit_void();

1633

}

1629

}

1634

1630

1635

static int ocfs2_flock_handle_signal(struct ocfs2_lock_res *lockres,

1631

static int ocfs2_flock_handle_signal(struct ocfs2_lock_res *lockres,

1636

int level)

1632

int level)

1637

{

1633

{

1638

int ret;

1634

int ret;

1639

struct ocfs2_super *osb = ocfs2_get_lockres_osb(lockres);

1635

struct ocfs2_super *osb = ocfs2_get_lockres_osb(lockres);

1640

unsigned long flags;

1636

unsigned long flags;

1641

struct ocfs2_mask_waiter mw;

1637

struct ocfs2_mask_waiter mw;

1642

1638

1643

ocfs2_init_mask_waiter(&mw);

1639

ocfs2_init_mask_waiter(&mw);

1644

1640

1645

retry_cancel:

1641

retry_cancel:

1646

spin_lock_irqsave(&lockres->l_lock, flags);

1642

spin_lock_irqsave(&lockres->l_lock, flags);

1647

if (lockres->l_flags & OCFS2_LOCK_BUSY) {

1643

if (lockres->l_flags & OCFS2_LOCK_BUSY) {

1648

ret = ocfs2_prepare_cancel_convert(osb, lockres);

1644

ret = ocfs2_prepare_cancel_convert(osb, lockres);

1649

if (ret) {

1645

if (ret) {

1650

spin_unlock_irqrestore(&lockres->l_lock, flags);

1646

spin_unlock_irqrestore(&lockres->l_lock, flags);

1651

ret = ocfs2_cancel_convert(osb, lockres);

1647

ret = ocfs2_cancel_convert(osb, lockres);

1652

if (ret < 0) {

1648

if (ret < 0) {

1653

mlog_errno(ret);

1649

mlog_errno(ret);

1654

goto out;

1650

goto out;

1655

}

1651

}

1656

goto retry_cancel;

1652

goto retry_cancel;

1657

}

1653

}

1658

lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0);

1654

lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0);

1659

spin_unlock_irqrestore(&lockres->l_lock, flags);

1655

spin_unlock_irqrestore(&lockres->l_lock, flags);

1660

1656

1661

ocfs2_wait_for_mask(&mw);

1657

ocfs2_wait_for_mask(&mw);

1662

goto retry_cancel;

1658

goto retry_cancel;

1663

}

1659

}

1664

1660

1665

ret = -ERESTARTSYS;

1661

ret = -ERESTARTSYS;

1666

/*

1662

/*

1667

* We may still have gotten the lock, in which case there's no

1663

* We may still have gotten the lock, in which case there's no

1668

* point to restarting the syscall.

1664

* point to restarting the syscall.

1669

*/

1665

*/

1670

if (lockres->l_level == level)

1666

if (lockres->l_level == level)

1671

ret = 0;

1667

ret = 0;

1672

1668

1673

mlog(0, "Cancel returning %d. flags: 0x%lx, level: %d, act: %d\n", ret,

1669

mlog(0, "Cancel returning %d. flags: 0x%lx, level: %d, act: %d\n", ret,

1674

lockres->l_flags, lockres->l_level, lockres->l_action);

1670

lockres->l_flags, lockres->l_level, lockres->l_action);

1675

1671

1676

spin_unlock_irqrestore(&lockres->l_lock, flags);

1672

spin_unlock_irqrestore(&lockres->l_lock, flags);

1677

1673

1678

out:

1674

out:

1679

return ret;

1675

return ret;

1680

}

1676

}

1681

1677

1682

/*

1678

/*

1683

* ocfs2_file_lock() and ocfs2_file_unlock() map to a single pair of

1679

* ocfs2_file_lock() and ocfs2_file_unlock() map to a single pair of

1684

* flock() calls. The locking approach this requires is sufficiently

1680

* flock() calls. The locking approach this requires is sufficiently

1685

* different from all other cluster lock types that we implement a

1681

* different from all other cluster lock types that we implement a

1686

* seperate path to the "low-level" dlm calls. In particular:

1682

* seperate path to the "low-level" dlm calls. In particular:

1687

*

1683

*

1688

* - No optimization of lock levels is done - we take at exactly

1684

* - No optimization of lock levels is done - we take at exactly

1689

* what's been requested.

1685

* what's been requested.

1690

*

1686

*

1691

* - No lock caching is employed. We immediately downconvert to

1687

* - No lock caching is employed. We immediately downconvert to

1692

* no-lock at unlock time. This also means flock locks never go on

1688

* no-lock at unlock time. This also means flock locks never go on

1693

* the blocking list).

1689

* the blocking list).

1694

*

1690

*

1695

* - Since userspace can trivially deadlock itself with flock, we make

1691

* - Since userspace can trivially deadlock itself with flock, we make

1696

* sure to allow cancellation of a misbehaving applications flock()

1692

* sure to allow cancellation of a misbehaving applications flock()

1697

* request.

1693

* request.

1698

*

1694

*

1699

* - Access to any flock lockres doesn't require concurrency, so we

1695

* - Access to any flock lockres doesn't require concurrency, so we

1700

* can simplify the code by requiring the caller to guarantee

1696

* can simplify the code by requiring the caller to guarantee

1701

* serialization of dlmglue flock calls.

1697

* serialization of dlmglue flock calls.

1702

*/

1698

*/

1703

int ocfs2_file_lock(struct file *file, int ex, int trylock)

1699

int ocfs2_file_lock(struct file *file, int ex, int trylock)

1704

{

1700

{

1705

int ret, level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;

1701

int ret, level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;

1706

unsigned int lkm_flags = trylock ? DLM_LKF_NOQUEUE : 0;

1702

unsigned int lkm_flags = trylock ? DLM_LKF_NOQUEUE : 0;

1707

unsigned long flags;

1703

unsigned long flags;

1708

struct ocfs2_file_private *fp = file->private_data;

1704

struct ocfs2_file_private *fp = file->private_data;

1709

struct ocfs2_lock_res *lockres = &fp->fp_flock;

1705

struct ocfs2_lock_res *lockres = &fp->fp_flock;

1710

struct ocfs2_super *osb = OCFS2_SB(file->f_mapping->host->i_sb);

1706

struct ocfs2_super *osb = OCFS2_SB(file->f_mapping->host->i_sb);

1711

struct ocfs2_mask_waiter mw;

1707

struct ocfs2_mask_waiter mw;

1712

1708

1713

ocfs2_init_mask_waiter(&mw);

1709

ocfs2_init_mask_waiter(&mw);

1714

1710

1715

if ((lockres->l_flags & OCFS2_LOCK_BUSY) ||

1711

if ((lockres->l_flags & OCFS2_LOCK_BUSY) ||

1716

(lockres->l_level > DLM_LOCK_NL)) {

1712

(lockres->l_level > DLM_LOCK_NL)) {

1717

mlog(ML_ERROR,

1713

mlog(ML_ERROR,

1718

"File lock \"%s\" has busy or locked state: flags: 0x%lx, "

1714

"File lock \"%s\" has busy or locked state: flags: 0x%lx, "

1719

"level: %u\n", lockres->l_name, lockres->l_flags,

1715

"level: %u\n", lockres->l_name, lockres->l_flags,

1720

lockres->l_level);

1716

lockres->l_level);

1721

return -EINVAL;

1717

return -EINVAL;

1722

}

1718

}

1723

1719

1724

spin_lock_irqsave(&lockres->l_lock, flags);

1720

spin_lock_irqsave(&lockres->l_lock, flags);

1725

if (!(lockres->l_flags & OCFS2_LOCK_ATTACHED)) {

1721

if (!(lockres->l_flags & OCFS2_LOCK_ATTACHED)) {

1726

lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0);

1722

lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0);

1727

spin_unlock_irqrestore(&lockres->l_lock, flags);

1723

spin_unlock_irqrestore(&lockres->l_lock, flags);

1728

1724

1729

/*

1725

/*

1730

* Get the lock at NLMODE to start - that way we

1726

* Get the lock at NLMODE to start - that way we

1731

* can cancel the upconvert request if need be.

1727

* can cancel the upconvert request if need be.

1732

*/

1728

*/

1733

ret = ocfs2_lock_create(osb, lockres, DLM_LOCK_NL, 0);

1729

ret = ocfs2_lock_create(osb, lockres, DLM_LOCK_NL, 0);

1734

if (ret < 0) {

1730

if (ret < 0) {

1735

mlog_errno(ret);

1731

mlog_errno(ret);

1736

goto out;

1732

goto out;

1737

}

1733

}

1738

1734

1739

ret = ocfs2_wait_for_mask(&mw);

1735

ret = ocfs2_wait_for_mask(&mw);

1740

if (ret) {

1736

if (ret) {

1741

mlog_errno(ret);

1737

mlog_errno(ret);

1742

goto out;

1738

goto out;

1743

}

1739

}

1744

spin_lock_irqsave(&lockres->l_lock, flags);

1740

spin_lock_irqsave(&lockres->l_lock, flags);

1745

}

1741

}

1746

1742

1747

lockres->l_action = OCFS2_AST_CONVERT;

1743

lockres->l_action = OCFS2_AST_CONVERT;

1748

lkm_flags |= DLM_LKF_CONVERT;

1744

lkm_flags |= DLM_LKF_CONVERT;

1749

lockres->l_requested = level;

1745

lockres->l_requested = level;

1750

lockres_or_flags(lockres, OCFS2_LOCK_BUSY);

1746

lockres_or_flags(lockres, OCFS2_LOCK_BUSY);

1751

1747

1752

lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0);

1748

lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0);

1753

spin_unlock_irqrestore(&lockres->l_lock, flags);

1749

spin_unlock_irqrestore(&lockres->l_lock, flags);

1754

1750

1755

ret = ocfs2_dlm_lock(osb->cconn, level, &lockres->l_lksb, lkm_flags,

1751

ret = ocfs2_dlm_lock(osb->cconn, level, &lockres->l_lksb, lkm_flags,

1756

lockres->l_name, OCFS2_LOCK_ID_MAX_LEN - 1,

1752

lockres->l_name, OCFS2_LOCK_ID_MAX_LEN - 1,

1757

lockres);

1753

lockres);

1758

if (ret) {

1754

if (ret) {

1759

if (!trylock || (ret != -EAGAIN)) {

1755

if (!trylock || (ret != -EAGAIN)) {

1760

ocfs2_log_dlm_error("ocfs2_dlm_lock", ret, lockres);

1756

ocfs2_log_dlm_error("ocfs2_dlm_lock", ret, lockres);

1761

ret = -EINVAL;

1757

ret = -EINVAL;

1762

}

1758

}

1763

1759

1764

ocfs2_recover_from_dlm_error(lockres, 1);

1760

ocfs2_recover_from_dlm_error(lockres, 1);

1765

lockres_remove_mask_waiter(lockres, &mw);

1761

lockres_remove_mask_waiter(lockres, &mw);

1766

goto out;

1762

goto out;

1767

}

1763

}

1768

1764

1769

ret = ocfs2_wait_for_mask_interruptible(&mw, lockres);

1765

ret = ocfs2_wait_for_mask_interruptible(&mw, lockres);

1770

if (ret == -ERESTARTSYS) {

1766

if (ret == -ERESTARTSYS) {

1771

/*

1767

/*

1772

* Userspace can cause deadlock itself with

1768

* Userspace can cause deadlock itself with

1773

* flock(). Current behavior locally is to allow the

1769

* flock(). Current behavior locally is to allow the

1774

* deadlock, but abort the system call if a signal is

1770

* deadlock, but abort the system call if a signal is

1775

* received. We follow this example, otherwise a

1771

* received. We follow this example, otherwise a

1776

* poorly written program could sit in kernel until

1772

* poorly written program could sit in kernel until

1777

* reboot.

1773

* reboot.

1778

*

1774

*

1779

* Handling this is a bit more complicated for Ocfs2

1775

* Handling this is a bit more complicated for Ocfs2

1780

* though. We can't exit this function with an

1776

* though. We can't exit this function with an

1781

* outstanding lock request, so a cancel convert is

1777

* outstanding lock request, so a cancel convert is

1782

* required. We intentionally overwrite 'ret' - if the

1778

* required. We intentionally overwrite 'ret' - if the

1783

* cancel fails and the lock was granted, it's easier

1779

* cancel fails and the lock was granted, it's easier

1784

* to just bubble sucess back up to the user.

1780

* to just bubble sucess back up to the user.

1785

*/

1781

*/

1786

ret = ocfs2_flock_handle_signal(lockres, level);

1782

ret = ocfs2_flock_handle_signal(lockres, level);

1787

} else if (!ret && (level > lockres->l_level)) {

1783

} else if (!ret && (level > lockres->l_level)) {

1788

/* Trylock failed asynchronously */

1784

/* Trylock failed asynchronously */

1789

BUG_ON(!trylock);

1785

BUG_ON(!trylock);

1790

ret = -EAGAIN;

1786

ret = -EAGAIN;

1791

}

1787

}

1792

1788

1793

out:

1789

out:

1794

1790

1795

mlog(0, "Lock: \"%s\" ex: %d, trylock: %d, returns: %d\n",

1791

mlog(0, "Lock: \"%s\" ex: %d, trylock: %d, returns: %d\n",

1796

lockres->l_name, ex, trylock, ret);

1792

lockres->l_name, ex, trylock, ret);

1797

return ret;

1793

return ret;

1798

}

1794

}

1799

1795

1800

void ocfs2_file_unlock(struct file *file)

1796

void ocfs2_file_unlock(struct file *file)

1801

{

1797

{

1802

int ret;

1798

int ret;

1803

unsigned int gen;

1799

unsigned int gen;

1804

unsigned long flags;

1800

unsigned long flags;

1805

struct ocfs2_file_private *fp = file->private_data;

1801

struct ocfs2_file_private *fp = file->private_data;

1806

struct ocfs2_lock_res *lockres = &fp->fp_flock;

1802

struct ocfs2_lock_res *lockres = &fp->fp_flock;

1807

struct ocfs2_super *osb = OCFS2_SB(file->f_mapping->host->i_sb);

1803

struct ocfs2_super *osb = OCFS2_SB(file->f_mapping->host->i_sb);

1808

struct ocfs2_mask_waiter mw;

1804

struct ocfs2_mask_waiter mw;

1809

1805

1810

ocfs2_init_mask_waiter(&mw);

1806

ocfs2_init_mask_waiter(&mw);

1811

1807

1812

if (!(lockres->l_flags & OCFS2_LOCK_ATTACHED))

1808

if (!(lockres->l_flags & OCFS2_LOCK_ATTACHED))

1813

return;

1809

return;

1814

1810

1815

if (lockres->l_level == DLM_LOCK_NL)

1811

if (lockres->l_level == DLM_LOCK_NL)

1816

return;

1812

return;

1817

1813

1818

mlog(0, "Unlock: \"%s\" flags: 0x%lx, level: %d, act: %d\n",

1814

mlog(0, "Unlock: \"%s\" flags: 0x%lx, level: %d, act: %d\n",

1819

lockres->l_name, lockres->l_flags, lockres->l_level,

1815

lockres->l_name, lockres->l_flags, lockres->l_level,

1820

lockres->l_action);

1816

lockres->l_action);

1821

1817

1822

spin_lock_irqsave(&lockres->l_lock, flags);

1818

spin_lock_irqsave(&lockres->l_lock, flags);

1823

/*

1819

/*

1824

* Fake a blocking ast for the downconvert code.

1820

* Fake a blocking ast for the downconvert code.

1825

*/

1821

*/

1826

lockres_or_flags(lockres, OCFS2_LOCK_BLOCKED);

1822

lockres_or_flags(lockres, OCFS2_LOCK_BLOCKED);

1827

lockres->l_blocking = DLM_LOCK_EX;

1823

lockres->l_blocking = DLM_LOCK_EX;

1828

1824

1829

gen = ocfs2_prepare_downconvert(lockres, DLM_LOCK_NL);

1825

gen = ocfs2_prepare_downconvert(lockres, DLM_LOCK_NL);

1830

lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0);

1826

lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0);

1831

spin_unlock_irqrestore(&lockres->l_lock, flags);

1827

spin_unlock_irqrestore(&lockres->l_lock, flags);

1832

1828

1833

ret = ocfs2_downconvert_lock(osb, lockres, DLM_LOCK_NL, 0, gen);

1829

ret = ocfs2_downconvert_lock(osb, lockres, DLM_LOCK_NL, 0, gen);

1834

if (ret) {

1830

if (ret) {

1835

mlog_errno(ret);

1831

mlog_errno(ret);

1836

return;

1832

return;

1837

}

1833

}

1838

1834

1839

ret = ocfs2_wait_for_mask(&mw);

1835

ret = ocfs2_wait_for_mask(&mw);

1840

if (ret)

1836

if (ret)

1841

mlog_errno(ret);

1837

mlog_errno(ret);

1842

}

1838

}

1843

1839

1844

static void ocfs2_downconvert_on_unlock(struct ocfs2_super *osb,

1840

static void ocfs2_downconvert_on_unlock(struct ocfs2_super *osb,

1845

struct ocfs2_lock_res *lockres)

1841

struct ocfs2_lock_res *lockres)

1846

{

1842

{

1847

int kick = 0;

1843

int kick = 0;

1848

1844

1849

mlog_entry_void();

1845

mlog_entry_void();

1850

1846

1851

/* If we know that another node is waiting on our lock, kick

1847

/* If we know that another node is waiting on our lock, kick

1852

* the downconvert thread * pre-emptively when we reach a release

1848

* the downconvert thread * pre-emptively when we reach a release

1853

* condition. */

1849

* condition. */

1854

if (lockres->l_flags & OCFS2_LOCK_BLOCKED) {

1850

if (lockres->l_flags & OCFS2_LOCK_BLOCKED) {

1855

switch(lockres->l_blocking) {

1851

switch(lockres->l_blocking) {

1856

case DLM_LOCK_EX:

1852

case DLM_LOCK_EX:

1857

if (!lockres->l_ex_holders && !lockres->l_ro_holders)

1853

if (!lockres->l_ex_holders && !lockres->l_ro_holders)

1858

kick = 1;

1854

kick = 1;

1859

break;

1855

break;

1860

case DLM_LOCK_PR:

1856

case DLM_LOCK_PR:

1861

if (!lockres->l_ex_holders)

1857

if (!lockres->l_ex_holders)

1862

kick = 1;

1858

kick = 1;

1863

break;

1859

break;

1864

default:

1860

default:

1865

BUG();

1861

BUG();

1866

}

1862

}

1867

}

1863

}

1868

1864

1869

if (kick)

1865

if (kick)

1870

ocfs2_wake_downconvert_thread(osb);

1866

ocfs2_wake_downconvert_thread(osb);

1871

1867

1872

mlog_exit_void();

1868

mlog_exit_void();

1873

}

1869

}

1874

1870

1875

#define OCFS2_SEC_BITS 34

1871

#define OCFS2_SEC_BITS 34

1876

#define OCFS2_SEC_SHIFT (64 - 34)

1872

#define OCFS2_SEC_SHIFT (64 - 34)

1877

#define OCFS2_NSEC_MASK ((1ULL << OCFS2_SEC_SHIFT) - 1)

1873

#define OCFS2_NSEC_MASK ((1ULL << OCFS2_SEC_SHIFT) - 1)

1878

1874

1879

/* LVB only has room for 64 bits of time here so we pack it for

1875

/* LVB only has room for 64 bits of time here so we pack it for

1880

* now. */

1876

* now. */

1881

static u64 ocfs2_pack_timespec(struct timespec *spec)

1877

static u64 ocfs2_pack_timespec(struct timespec *spec)

1882

{

1878

{

1883

u64 res;

1879

u64 res;

1884

u64 sec = spec->tv_sec;

1880

u64 sec = spec->tv_sec;

1885

u32 nsec = spec->tv_nsec;

1881

u32 nsec = spec->tv_nsec;

1886

1882

1887

res = (sec << OCFS2_SEC_SHIFT) | (nsec & OCFS2_NSEC_MASK);

1883

res = (sec << OCFS2_SEC_SHIFT) | (nsec & OCFS2_NSEC_MASK);

1888

1884

1889

return res;

1885

return res;

1890

}

1886

}

1891

1887

1892

/* Call this with the lockres locked. I am reasonably sure we don't

1888

/* Call this with the lockres locked. I am reasonably sure we don't

1893

* need ip_lock in this function as anyone who would be changing those

1889

* need ip_lock in this function as anyone who would be changing those

1894

* values is supposed to be blocked in ocfs2_inode_lock right now. */

1890

* values is supposed to be blocked in ocfs2_inode_lock right now. */

1895

static void __ocfs2_stuff_meta_lvb(struct inode *inode)

1891

static void __ocfs2_stuff_meta_lvb(struct inode *inode)

1896

{

1892

{

1897

struct ocfs2_inode_info *oi = OCFS2_I(inode);

1893

struct ocfs2_inode_info *oi = OCFS2_I(inode);

1898

struct ocfs2_lock_res *lockres = &oi->ip_inode_lockres;

1894

struct ocfs2_lock_res *lockres = &oi->ip_inode_lockres;

1899

struct ocfs2_meta_lvb *lvb;

1895

struct ocfs2_meta_lvb *lvb;

1900

1896

1901

mlog_entry_void();

1897

mlog_entry_void();

1902

1898

1903

lvb = ocfs2_dlm_lvb(&lockres->l_lksb);

1899

lvb = ocfs2_dlm_lvb(&lockres->l_lksb);

1904

1900

1905

/*

1901

/*

1906

* Invalidate the LVB of a deleted inode - this way other

1902

* Invalidate the LVB of a deleted inode - this way other

1907

* nodes are forced to go to disk and discover the new inode

1903

* nodes are forced to go to disk and discover the new inode

1908

* status.

1904

* status.

1909

*/

1905

*/

1910

if (oi->ip_flags & OCFS2_INODE_DELETED) {

1906

if (oi->ip_flags & OCFS2_INODE_DELETED) {

1911

lvb->lvb_version = 0;

1907

lvb->lvb_version = 0;

1912

goto out;

1908

goto out;

1913

}

1909

}

1914

1910

1915

lvb->lvb_version = OCFS2_LVB_VERSION;

1911

lvb->lvb_version = OCFS2_LVB_VERSION;

1916

lvb->lvb_isize = cpu_to_be64(i_size_read(inode));

1912

lvb->lvb_isize = cpu_to_be64(i_size_read(inode));

1917

lvb->lvb_iclusters = cpu_to_be32(oi->ip_clusters);

1913

lvb->lvb_iclusters = cpu_to_be32(oi->ip_clusters);

1918

lvb->lvb_iuid = cpu_to_be32(inode->i_uid);

1914

lvb->lvb_iuid = cpu_to_be32(inode->i_uid);

1919

lvb->lvb_igid = cpu_to_be32(inode->i_gid);

1915

lvb->lvb_igid = cpu_to_be32(inode->i_gid);

1920

lvb->lvb_imode = cpu_to_be16(inode->i_mode);

1916

lvb->lvb_imode = cpu_to_be16(inode->i_mode);

1921

lvb->lvb_inlink = cpu_to_be16(inode->i_nlink);

1917

lvb->lvb_inlink = cpu_to_be16(inode->i_nlink);

1922

lvb->lvb_iatime_packed =

1918

lvb->lvb_iatime_packed =

1923

cpu_to_be64(ocfs2_pack_timespec(&inode->i_atime));

1919

cpu_to_be64(ocfs2_pack_timespec(&inode->i_atime));

1924

lvb->lvb_ictime_packed =

1920

lvb->lvb_ictime_packed =

1925

cpu_to_be64(ocfs2_pack_timespec(&inode->i_ctime));

1921

cpu_to_be64(ocfs2_pack_timespec(&inode->i_ctime));

1926

lvb->lvb_imtime_packed =

1922

lvb->lvb_imtime_packed =

1927

cpu_to_be64(ocfs2_pack_timespec(&inode->i_mtime));

1923

cpu_to_be64(ocfs2_pack_timespec(&inode->i_mtime));

1928

lvb->lvb_iattr = cpu_to_be32(oi->ip_attr);

1924

lvb->lvb_iattr = cpu_to_be32(oi->ip_attr);

1929

lvb->lvb_idynfeatures = cpu_to_be16(oi->ip_dyn_features);

1925

lvb->lvb_idynfeatures = cpu_to_be16(oi->ip_dyn_features);

1930

lvb->lvb_igeneration = cpu_to_be32(inode->i_generation);

1926

lvb->lvb_igeneration = cpu_to_be32(inode->i_generation);

1931

1927

1932

out:

1928

out:

1933

mlog_meta_lvb(0, lockres);

1929

mlog_meta_lvb(0, lockres);

1934

1930

1935

mlog_exit_void();

1931

mlog_exit_void();

1936

}

1932

}

1937

1933

1938

static void ocfs2_unpack_timespec(struct timespec *spec,

1934

static void ocfs2_unpack_timespec(struct timespec *spec,

1939

u64 packed_time)

1935

u64 packed_time)

1940

{

1936

{

1941

spec->tv_sec = packed_time >> OCFS2_SEC_SHIFT;

1937

spec->tv_sec = packed_time >> OCFS2_SEC_SHIFT;

1942

spec->tv_nsec = packed_time & OCFS2_NSEC_MASK;

1938

spec->tv_nsec = packed_time & OCFS2_NSEC_MASK;

1943

}

1939

}

1944

1940

1945

static void ocfs2_refresh_inode_from_lvb(struct inode *inode)

1941

static void ocfs2_refresh_inode_from_lvb(struct inode *inode)

1946

{

1942

{

1947

struct ocfs2_inode_info *oi = OCFS2_I(inode);

1943

struct ocfs2_inode_info *oi = OCFS2_I(inode);

1948

struct ocfs2_lock_res *lockres = &oi->ip_inode_lockres;

1944

struct ocfs2_lock_res *lockres = &oi->ip_inode_lockres;

1949

struct ocfs2_meta_lvb *lvb;

1945

struct ocfs2_meta_lvb *lvb;

1950

1946

1951

mlog_entry_void();

1947

mlog_entry_void();

1952

1948

1953

mlog_meta_lvb(0, lockres);

1949

mlog_meta_lvb(0, lockres);

1954

1950

1955

lvb = ocfs2_dlm_lvb(&lockres->l_lksb);

1951

lvb = ocfs2_dlm_lvb(&lockres->l_lksb);

1956

1952

1957

/* We're safe here without the lockres lock... */

1953

/* We're safe here without the lockres lock... */

1958

spin_lock(&oi->ip_lock);

1954

spin_lock(&oi->ip_lock);

1959

oi->ip_clusters = be32_to_cpu(lvb->lvb_iclusters);

1955

oi->ip_clusters = be32_to_cpu(lvb->lvb_iclusters);

1960

i_size_write(inode, be64_to_cpu(lvb->lvb_isize));

1956

i_size_write(inode, be64_to_cpu(lvb->lvb_isize));

1961

1957

1962

oi->ip_attr = be32_to_cpu(lvb->lvb_iattr);

1958

oi->ip_attr = be32_to_cpu(lvb->lvb_iattr);

1963

oi->ip_dyn_features = be16_to_cpu(lvb->lvb_idynfeatures);

1959

oi->ip_dyn_features = be16_to_cpu(lvb->lvb_idynfeatures);

1964

ocfs2_set_inode_flags(inode);

1960

ocfs2_set_inode_flags(inode);

1965

1961

1966

/* fast-symlinks are a special case */

1962

/* fast-symlinks are a special case */

1967

if (S_ISLNK(inode->i_mode) && !oi->ip_clusters)

1963

if (S_ISLNK(inode->i_mode) && !oi->ip_clusters)

1968

inode->i_blocks = 0;

1964

inode->i_blocks = 0;

1969

else

1965

else

1970

inode->i_blocks = ocfs2_inode_sector_count(inode);

1966

inode->i_blocks = ocfs2_inode_sector_count(inode);

1971

1967

1972

inode->i_uid = be32_to_cpu(lvb->lvb_iuid);

1968

inode->i_uid = be32_to_cpu(lvb->lvb_iuid);

1973

inode->i_gid = be32_to_cpu(lvb->lvb_igid);

1969

inode->i_gid = be32_to_cpu(lvb->lvb_igid);

1974

inode->i_mode = be16_to_cpu(lvb->lvb_imode);

1970

inode->i_mode = be16_to_cpu(lvb->lvb_imode);

1975

inode->i_nlink = be16_to_cpu(lvb->lvb_inlink);

1971

inode->i_nlink = be16_to_cpu(lvb->lvb_inlink);

1976

ocfs2_unpack_timespec(&inode->i_atime,

1972

ocfs2_unpack_timespec(&inode->i_atime,

1977

be64_to_cpu(lvb->lvb_iatime_packed));

1973

be64_to_cpu(lvb->lvb_iatime_packed));

1978

ocfs2_unpack_timespec(&inode->i_mtime,

1974

ocfs2_unpack_timespec(&inode->i_mtime,

1979

be64_to_cpu(lvb->lvb_imtime_packed));

1975

be64_to_cpu(lvb->lvb_imtime_packed));

1980

ocfs2_unpack_timespec(&inode->i_ctime,

1976

ocfs2_unpack_timespec(&inode->i_ctime,

1981

be64_to_cpu(lvb->lvb_ictime_packed));

1977

be64_to_cpu(lvb->lvb_ictime_packed));

1982

spin_unlock(&oi->ip_lock);

1978

spin_unlock(&oi->ip_lock);

1983

1979

1984

mlog_exit_void();

1980

mlog_exit_void();

1985

}

1981

}

1986

1982

1987

static inline int ocfs2_meta_lvb_is_trustable(struct inode *inode,

1983

static inline int ocfs2_meta_lvb_is_trustable(struct inode *inode,

1988

struct ocfs2_lock_res *lockres)

1984

struct ocfs2_lock_res *lockres)

1989

{

1985

{

1990

struct ocfs2_meta_lvb *lvb = ocfs2_dlm_lvb(&lockres->l_lksb);

1986

struct ocfs2_meta_lvb *lvb = ocfs2_dlm_lvb(&lockres->l_lksb);

1991

1987

1992

if (ocfs2_dlm_lvb_valid(&lockres->l_lksb)

1988

if (ocfs2_dlm_lvb_valid(&lockres->l_lksb)

1993

&& lvb->lvb_version == OCFS2_LVB_VERSION

1989

&& lvb->lvb_version == OCFS2_LVB_VERSION

1994

&& be32_to_cpu(lvb->lvb_igeneration) == inode->i_generation)

1990

&& be32_to_cpu(lvb->lvb_igeneration) == inode->i_generation)

1995

return 1;

1991

return 1;

1996

return 0;

1992

return 0;

1997

}

1993

}

1998

1994

1999

/* Determine whether a lock resource needs to be refreshed, and

1995

/* Determine whether a lock resource needs to be refreshed, and

2000

* arbitrate who gets to refresh it.

1996

* arbitrate who gets to refresh it.

2001

*

1997

*

2002

* 0 means no refresh needed.

1998

* 0 means no refresh needed.

2003

*

1999

*

2004

* > 0 means you need to refresh this and you MUST call

2000

* > 0 means you need to refresh this and you MUST call

2005

* ocfs2_complete_lock_res_refresh afterwards. */

2001

* ocfs2_complete_lock_res_refresh afterwards. */

2006

static int ocfs2_should_refresh_lock_res(struct ocfs2_lock_res *lockres)

2002

static int ocfs2_should_refresh_lock_res(struct ocfs2_lock_res *lockres)

2007

{

2003

{

2008

unsigned long flags;

2004

unsigned long flags;

2009

int status = 0;

2005

int status = 0;

2010

2006

2011

mlog_entry_void();

2007

mlog_entry_void();

2012

2008

2013

refresh_check:

2009

refresh_check:

2014

spin_lock_irqsave(&lockres->l_lock, flags);

2010

spin_lock_irqsave(&lockres->l_lock, flags);

2015

if (!(lockres->l_flags & OCFS2_LOCK_NEEDS_REFRESH)) {

2011

if (!(lockres->l_flags & OCFS2_LOCK_NEEDS_REFRESH)) {

2016

spin_unlock_irqrestore(&lockres->l_lock, flags);

2012

spin_unlock_irqrestore(&lockres->l_lock, flags);

2017

goto bail;

2013

goto bail;

2018

}

2014

}

2019

2015

2020

if (lockres->l_flags & OCFS2_LOCK_REFRESHING) {

2016

if (lockres->l_flags & OCFS2_LOCK_REFRESHING) {

2021

spin_unlock_irqrestore(&lockres->l_lock, flags);

2017

spin_unlock_irqrestore(&lockres->l_lock, flags);

2022

2018

2023

ocfs2_wait_on_refreshing_lock(lockres);

2019

ocfs2_wait_on_refreshing_lock(lockres);

2024

goto refresh_check;

2020

goto refresh_check;

2025

}

2021

}

2026

2022

2027

/* Ok, I'll be the one to refresh this lock. */

2023

/* Ok, I'll be the one to refresh this lock. */

2028

lockres_or_flags(lockres, OCFS2_LOCK_REFRESHING);

2024

lockres_or_flags(lockres, OCFS2_LOCK_REFRESHING);

2029

spin_unlock_irqrestore(&lockres->l_lock, flags);

2025

spin_unlock_irqrestore(&lockres->l_lock, flags);

2030

2026

2031

status = 1;

2027

status = 1;

2032

bail:

2028

bail:

2033

mlog_exit(status);

2029

mlog_exit(status);

2034

return status;

2030

return status;

2035

}

2031

}

2036

2032

2037

/* If status is non zero, I'll mark it as not being in refresh

2033

/* If status is non zero, I'll mark it as not being in refresh

2038

* anymroe, but i won't clear the needs refresh flag. */

2034

* anymroe, but i won't clear the needs refresh flag. */

2039

static inline void ocfs2_complete_lock_res_refresh(struct ocfs2_lock_res *lockres,

2035

static inline void ocfs2_complete_lock_res_refresh(struct ocfs2_lock_res *lockres,

2040

int status)

2036

int status)

2041

{

2037

{

2042

unsigned long flags;

2038

unsigned long flags;

2043

mlog_entry_void();

2039

mlog_entry_void();

2044

2040

2045

spin_lock_irqsave(&lockres->l_lock, flags);

2041

spin_lock_irqsave(&lockres->l_lock, flags);

2046

lockres_clear_flags(lockres, OCFS2_LOCK_REFRESHING);

2042

lockres_clear_flags(lockres, OCFS2_LOCK_REFRESHING);

2047

if (!status)

2043

if (!status)

2048

lockres_clear_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH);

2044

lockres_clear_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH);

2049

spin_unlock_irqrestore(&lockres->l_lock, flags);

2045

spin_unlock_irqrestore(&lockres->l_lock, flags);

2050

2046

2051

wake_up(&lockres->l_event);

2047

wake_up(&lockres->l_event);

2052

2048

2053

mlog_exit_void();

2049

mlog_exit_void();

2054

}

2050

}

2055

2051

2056

/* may or may not return a bh if it went to disk. */

2052

/* may or may not return a bh if it went to disk. */

2057

static int ocfs2_inode_lock_update(struct inode *inode,

2053

static int ocfs2_inode_lock_update(struct inode *inode,

2058

struct buffer_head **bh)

2054

struct buffer_head **bh)

2059

{

2055

{

2060

int status = 0;

2056

int status = 0;

2061

struct ocfs2_inode_info *oi = OCFS2_I(inode);

2057

struct ocfs2_inode_info *oi = OCFS2_I(inode);

2062

struct ocfs2_lock_res *lockres = &oi->ip_inode_lockres;

2058

struct ocfs2_lock_res *lockres = &oi->ip_inode_lockres;

2063

struct ocfs2_dinode *fe;

2059

struct ocfs2_dinode *fe;

2064

struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);

2060

struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);

2065

2061

2066

mlog_entry_void();

2062

mlog_entry_void();

2067

2063

2068

if (ocfs2_mount_local(osb))

2064

if (ocfs2_mount_local(osb))

2069

goto bail;

2065

goto bail;

2070

2066

2071

spin_lock(&oi->ip_lock);

2067

spin_lock(&oi->ip_lock);

2072

if (oi->ip_flags & OCFS2_INODE_DELETED) {

2068

if (oi->ip_flags & OCFS2_INODE_DELETED) {

2073

mlog(0, "Orphaned inode %llu was deleted while we "

2069

mlog(0, "Orphaned inode %llu was deleted while we "

2074

"were waiting on a lock. ip_flags = 0x%x\n",

2070

"were waiting on a lock. ip_flags = 0x%x\n",

2075

(unsigned long long)oi->ip_blkno, oi->ip_flags);

2071

(unsigned long long)oi->ip_blkno, oi->ip_flags);

2076

spin_unlock(&oi->ip_lock);

2072

spin_unlock(&oi->ip_lock);

2077

status = -ENOENT;

2073

status = -ENOENT;

2078

goto bail;

2074

goto bail;

2079

}

2075

}

2080

spin_unlock(&oi->ip_lock);

2076

spin_unlock(&oi->ip_lock);

2081

2077

2082

if (!ocfs2_should_refresh_lock_res(lockres))

2078

if (!ocfs2_should_refresh_lock_res(lockres))

2083

goto bail;

2079

goto bail;

2084

2080

2085

/* This will discard any caching information we might have had

2081

/* This will discard any caching information we might have had

2086

* for the inode metadata. */

2082

* for the inode metadata. */

2087

ocfs2_metadata_cache_purge(inode);

2083

ocfs2_metadata_cache_purge(inode);

2088

2084

2089

ocfs2_extent_map_trunc(inode, 0);

2085

ocfs2_extent_map_trunc(inode, 0);

2090

2086

2091

if (ocfs2_meta_lvb_is_trustable(inode, lockres)) {

2087

if (ocfs2_meta_lvb_is_trustable(inode, lockres)) {

2092

mlog(0, "Trusting LVB on inode %llu\n",

2088

mlog(0, "Trusting LVB on inode %llu\n",

2093

(unsigned long long)oi->ip_blkno);

2089

(unsigned long long)oi->ip_blkno);

2094

ocfs2_refresh_inode_from_lvb(inode);

2090

ocfs2_refresh_inode_from_lvb(inode);

2095

} else {

2091

} else {

2096

/* Boo, we have to go to disk. */

2092

/* Boo, we have to go to disk. */

2097

/* read bh, cast, ocfs2_refresh_inode */

2093

/* read bh, cast, ocfs2_refresh_inode */

2098

status = ocfs2_read_inode_block(inode, bh);

2094

status = ocfs2_read_inode_block(inode, bh);

2099

if (status < 0) {

2095

if (status < 0) {

2100

mlog_errno(status);

2096

mlog_errno(status);

2101

goto bail_refresh;

2097

goto bail_refresh;

2102

}

2098

}

2103

fe = (struct ocfs2_dinode *) (*bh)->b_data;

2099

fe = (struct ocfs2_dinode *) (*bh)->b_data;

2104

2100

2105

/* This is a good chance to make sure we're not

2101

/* This is a good chance to make sure we're not

2106

* locking an invalid object. ocfs2_read_inode_block()

2102

* locking an invalid object. ocfs2_read_inode_block()

2107

* already checked that the inode block is sane.

2103

* already checked that the inode block is sane.

2108

*

2104

*

2109

* We bug on a stale inode here because we checked

2105

* We bug on a stale inode here because we checked

2110

* above whether it was wiped from disk. The wiping

2106

* above whether it was wiped from disk. The wiping

2111

* node provides a guarantee that we receive that

2107

* node provides a guarantee that we receive that

2112

* message and can mark the inode before dropping any

2108

* message and can mark the inode before dropping any

2113

* locks associated with it. */

2109

* locks associated with it. */

2114

mlog_bug_on_msg(inode->i_generation !=

2110

mlog_bug_on_msg(inode->i_generation !=

2115

le32_to_cpu(fe->i_generation),

2111

le32_to_cpu(fe->i_generation),

2116

"Invalid dinode %llu disk generation: %u "

2112

"Invalid dinode %llu disk generation: %u "

2117

"inode->i_generation: %u\n",

2113

"inode->i_generation: %u\n",

2118

(unsigned long long)oi->ip_blkno,

2114

(unsigned long long)oi->ip_blkno,

2119

le32_to_cpu(fe->i_generation),

2115

le32_to_cpu(fe->i_generation),

2120

inode->i_generation);

2116

inode->i_generation);

2121

mlog_bug_on_msg(le64_to_cpu(fe->i_dtime) ||

2117

mlog_bug_on_msg(le64_to_cpu(fe->i_dtime) ||

2122

!(fe->i_flags & cpu_to_le32(OCFS2_VALID_FL)),

2118

!(fe->i_flags & cpu_to_le32(OCFS2_VALID_FL)),

2123

"Stale dinode %llu dtime: %llu flags: 0x%x\n",

2119

"Stale dinode %llu dtime: %llu flags: 0x%x\n",

2124

(unsigned long long)oi->ip_blkno,

2120

(unsigned long long)oi->ip_blkno,

2125

(unsigned long long)le64_to_cpu(fe->i_dtime),

2121

(unsigned long long)le64_to_cpu(fe->i_dtime),

2126

le32_to_cpu(fe->i_flags));

2122

le32_to_cpu(fe->i_flags));

2127

2123

2128

ocfs2_refresh_inode(inode, fe);

2124

ocfs2_refresh_inode(inode, fe);

2129

ocfs2_track_lock_refresh(lockres);

2125

ocfs2_track_lock_refresh(lockres);

2130

}

2126

}

2131

2127

2132

status = 0;

2128

status = 0;

2133

bail_refresh:

2129

bail_refresh:

2134

ocfs2_complete_lock_res_refresh(lockres, status);

2130

ocfs2_complete_lock_res_refresh(lockres, status);

2135

bail:

2131

bail:

2136

mlog_exit(status);

2132

mlog_exit(status);

2137

return status;

2133

return status;

2138

}

2134

}

2139

2135

2140

static int ocfs2_assign_bh(struct inode *inode,

2136

static int ocfs2_assign_bh(struct inode *inode,

2141

struct buffer_head **ret_bh,

2137

struct buffer_head **ret_bh,

2142

struct buffer_head *passed_bh)

2138

struct buffer_head *passed_bh)

2143

{

2139

{

2144

int status;

2140

int status;

2145

2141

2146

if (passed_bh) {

2142

if (passed_bh) {

2147

/* Ok, the update went to disk for us, use the

2143

/* Ok, the update went to disk for us, use the

2148

* returned bh. */

2144

* returned bh. */

2149

*ret_bh = passed_bh;

2145

*ret_bh = passed_bh;

2150

get_bh(*ret_bh);

2146

get_bh(*ret_bh);

2151

2147

2152

return 0;

2148

return 0;

2153

}

2149

}

2154

2150

2155

status = ocfs2_read_inode_block(inode, ret_bh);

2151

status = ocfs2_read_inode_block(inode, ret_bh);

2156

if (status < 0)

2152

if (status < 0)

2157

mlog_errno(status);

2153

mlog_errno(status);

2158

2154

2159

return status;

2155

return status;

2160

}

2156

}

2161

2157

2162

/*

2158

/*

2163

* returns < 0 error if the callback will never be called, otherwise

2159

* returns < 0 error if the callback will never be called, otherwise

2164

* the result of the lock will be communicated via the callback.

2160

* the result of the lock will be communicated via the callback.

2165

*/

2161

*/

2166

int ocfs2_inode_lock_full(struct inode *inode,

2162

int ocfs2_inode_lock_full(struct inode *inode,

2167

struct buffer_head **ret_bh,

2163

struct buffer_head **ret_bh,

2168

int ex,

2164

int ex,

2169

int arg_flags)

2165

int arg_flags)

2170

{

2166

{

2171

int status, level, acquired;

2167

int status, level, acquired;

2172

u32 dlm_flags;

2168

u32 dlm_flags;

2173

struct ocfs2_lock_res *lockres = NULL;

2169

struct ocfs2_lock_res *lockres = NULL;

2174

struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);

2170

struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);

2175

struct buffer_head *local_bh = NULL;

2171

struct buffer_head *local_bh = NULL;

2176

2172

2177

BUG_ON(!inode);

2173

BUG_ON(!inode);

2178

2174

2179

mlog_entry_void();

2175

mlog_entry_void();

2180

2176

2181

mlog(0, "inode %llu, take %s META lock\n",

2177

mlog(0, "inode %llu, take %s META lock\n",

2182

(unsigned long long)OCFS2_I(inode)->ip_blkno,

2178

(unsigned long long)OCFS2_I(inode)->ip_blkno,

2183

ex ? "EXMODE" : "PRMODE");

2179

ex ? "EXMODE" : "PRMODE");

2184

2180

2185

status = 0;

2181

status = 0;

2186

acquired = 0;

2182

acquired = 0;

2187

/* We'll allow faking a readonly metadata lock for

2183

/* We'll allow faking a readonly metadata lock for

2188

* rodevices. */

2184

* rodevices. */

2189

if (ocfs2_is_hard_readonly(osb)) {

2185

if (ocfs2_is_hard_readonly(osb)) {

2190

if (ex)

2186

if (ex)

2191

status = -EROFS;

2187

status = -EROFS;

2192

goto bail;

2188

goto bail;

2193

}

2189

}

2194

2190

2195

if (ocfs2_mount_local(osb))

2191

if (ocfs2_mount_local(osb))

2196

goto local;

2192

goto local;

2197

2193

2198

if (!(arg_flags & OCFS2_META_LOCK_RECOVERY))

2194

if (!(arg_flags & OCFS2_META_LOCK_RECOVERY))

2199

ocfs2_wait_for_recovery(osb);

2195

ocfs2_wait_for_recovery(osb);

2200

2196

2201

lockres = &OCFS2_I(inode)->ip_inode_lockres;

2197

lockres = &OCFS2_I(inode)->ip_inode_lockres;

2202

level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;

2198

level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;

2203

dlm_flags = 0;

2199

dlm_flags = 0;

2204

if (arg_flags & OCFS2_META_LOCK_NOQUEUE)

2200

if (arg_flags & OCFS2_META_LOCK_NOQUEUE)

2205

dlm_flags |= DLM_LKF_NOQUEUE;

2201

dlm_flags |= DLM_LKF_NOQUEUE;

2206

2202

2207

status = ocfs2_cluster_lock(osb, lockres, level, dlm_flags, arg_flags);

2203

status = ocfs2_cluster_lock(osb, lockres, level, dlm_flags, arg_flags);

2208

if (status < 0) {

2204

if (status < 0) {

2209

if (status != -EAGAIN && status != -EIOCBRETRY)

2205

if (status != -EAGAIN && status != -EIOCBRETRY)

2210

mlog_errno(status);

2206

mlog_errno(status);

2211

goto bail;

2207

goto bail;

2212

}

2208

}

2213

2209

2214

/* Notify the error cleanup path to drop the cluster lock. */

2210

/* Notify the error cleanup path to drop the cluster lock. */

2215

acquired = 1;

2211

acquired = 1;

2216

2212

2217

/* We wait twice because a node may have died while we were in

2213

/* We wait twice because a node may have died while we were in

2218

* the lower dlm layers. The second time though, we've

2214

* the lower dlm layers. The second time though, we've

2219

* committed to owning this lock so we don't allow signals to

2215

* committed to owning this lock so we don't allow signals to

2220

* abort the operation. */

2216

* abort the operation. */

2221

if (!(arg_flags & OCFS2_META_LOCK_RECOVERY))

2217

if (!(arg_flags & OCFS2_META_LOCK_RECOVERY))

2222

ocfs2_wait_for_recovery(osb);

2218

ocfs2_wait_for_recovery(osb);

2223

2219

2224

local:

2220

local:

2225

/*

2221

/*

2226

* We only see this flag if we're being called from

2222

* We only see this flag if we're being called from

2227

* ocfs2_read_locked_inode(). It means we're locking an inode

2223

* ocfs2_read_locked_inode(). It means we're locking an inode

2228

* which hasn't been populated yet, so clear the refresh flag

2224

* which hasn't been populated yet, so clear the refresh flag

2229

* and let the caller handle it.

2225

* and let the caller handle it.

2230

*/

2226

*/

2231

if (inode->i_state & I_NEW) {

2227

if (inode->i_state & I_NEW) {

2232

status = 0;

2228

status = 0;

2233

if (lockres)

2229

if (lockres)

2234

ocfs2_complete_lock_res_refresh(lockres, 0);

2230

ocfs2_complete_lock_res_refresh(lockres, 0);

2235

goto bail;

2231

goto bail;

2236

}

2232

}

2237

2233

2238

/* This is fun. The caller may want a bh back, or it may

2234

/* This is fun. The caller may want a bh back, or it may

2239

* not. ocfs2_inode_lock_update definitely wants one in, but

2235

* not. ocfs2_inode_lock_update definitely wants one in, but

2240

* may or may not read one, depending on what's in the

2236

* may or may not read one, depending on what's in the

2241

* LVB. The result of all of this is that we've *only* gone to

2237

* LVB. The result of all of this is that we've *only* gone to

2242

* disk if we have to, so the complexity is worthwhile. */

2238

* disk if we have to, so the complexity is worthwhile. */

2243

status = ocfs2_inode_lock_update(inode, &local_bh);

2239

status = ocfs2_inode_lock_update(inode, &local_bh);

2244

if (status < 0) {

2240

if (status < 0) {

2245

if (status != -ENOENT)

2241

if (status != -ENOENT)

2246

mlog_errno(status);

2242

mlog_errno(status);

2247

goto bail;

2243

goto bail;

2248

}

2244

}

2249

2245

2250

if (ret_bh) {

2246

if (ret_bh) {

2251

status = ocfs2_assign_bh(inode, ret_bh, local_bh);

2247

status = ocfs2_assign_bh(inode, ret_bh, local_bh);

2252

if (status < 0) {

2248

if (status < 0) {

2253

mlog_errno(status);

2249

mlog_errno(status);

2254

goto bail;

2250

goto bail;

2255

}

2251

}

2256

}

2252

}

2257

2253

2258

bail:

2254

bail:

2259

if (status < 0) {

2255

if (status < 0) {

2260

if (ret_bh && (*ret_bh)) {

2256

if (ret_bh && (*ret_bh)) {

2261

brelse(*ret_bh);

2257

brelse(*ret_bh);

2262

*ret_bh = NULL;

2258

*ret_bh = NULL;

2263

}

2259

}

2264

if (acquired)

2260

if (acquired)

2265

ocfs2_inode_unlock(inode, ex);

2261

ocfs2_inode_unlock(inode, ex);

2266

}

2262

}

2267

2263

2268

if (local_bh)

2264

if (local_bh)

2269

brelse(local_bh);

2265

brelse(local_bh);

2270

2266

2271

mlog_exit(status);

2267

mlog_exit(status);

2272

return status;

2268

return status;

2273

}

2269

}

2274

2270

2275

/*

2271

/*

2276

* This is working around a lock inversion between tasks acquiring DLM

2272

* This is working around a lock inversion between tasks acquiring DLM

2277

* locks while holding a page lock and the downconvert thread which

2273

* locks while holding a page lock and the downconvert thread which

2278

* blocks dlm lock acquiry while acquiring page locks.

2274

* blocks dlm lock acquiry while acquiring page locks.

2279

*

2275

*

2280

* ** These _with_page variantes are only intended to be called from aop

2276

* ** These _with_page variantes are only intended to be called from aop

2281

* methods that hold page locks and return a very specific *positive* error

2277

* methods that hold page locks and return a very specific *positive* error

2282

* code that aop methods pass up to the VFS -- test for errors with != 0. **

2278

* code that aop methods pass up to the VFS -- test for errors with != 0. **

2283

*

2279

*

2284

* The DLM is called such that it returns -EAGAIN if it would have

2280

* The DLM is called such that it returns -EAGAIN if it would have

2285

* blocked waiting for the downconvert thread. In that case we unlock

2281

* blocked waiting for the downconvert thread. In that case we unlock

2286

* our page so the downconvert thread can make progress. Once we've

2282

* our page so the downconvert thread can make progress. Once we've

2287

* done this we have to return AOP_TRUNCATED_PAGE so the aop method

2283

* done this we have to return AOP_TRUNCATED_PAGE so the aop method

2288

* that called us can bubble that back up into the VFS who will then

2284

* that called us can bubble that back up into the VFS who will then

2289

* immediately retry the aop call.

2285

* immediately retry the aop call.

2290

*

2286

*

2291

* We do a blocking lock and immediate unlock before returning, though, so that

2287

* We do a blocking lock and immediate unlock before returning, though, so that

2292

* the lock has a great chance of being cached on this node by the time the VFS

2288

* the lock has a great chance of being cached on this node by the time the VFS

2293

* calls back to retry the aop. This has a potential to livelock as nodes

2289

* calls back to retry the aop. This has a potential to livelock as nodes

2294

* ping locks back and forth, but that's a risk we're willing to take to avoid

2290

* ping locks back and forth, but that's a risk we're willing to take to avoid

2295

* the lock inversion simply.

2291

* the lock inversion simply.

2296

*/

2292

*/

2297

int ocfs2_inode_lock_with_page(struct inode *inode,

2293

int ocfs2_inode_lock_with_page(struct inode *inode,

2298

struct buffer_head **ret_bh,

2294

struct buffer_head **ret_bh,

2299

int ex,

2295

int ex,

2300

struct page *page)

2296

struct page *page)

2301

{

2297

{

2302

int ret;

2298

int ret;

2303

2299

2304

ret = ocfs2_inode_lock_full(inode, ret_bh, ex, OCFS2_LOCK_NONBLOCK);

2300

ret = ocfs2_inode_lock_full(inode, ret_bh, ex, OCFS2_LOCK_NONBLOCK);

2305

if (ret == -EAGAIN) {

2301

if (ret == -EAGAIN) {

2306

unlock_page(page);

2302

unlock_page(page);

2307

if (ocfs2_inode_lock(inode, ret_bh, ex) == 0)

2303

if (ocfs2_inode_lock(inode, ret_bh, ex) == 0)

2308

ocfs2_inode_unlock(inode, ex);

2304

ocfs2_inode_unlock(inode, ex);

2309

ret = AOP_TRUNCATED_PAGE;

2305

ret = AOP_TRUNCATED_PAGE;

2310

}

2306

}

2311

2307

2312

return ret;

2308

return ret;

2313

}

2309

}

2314

2310

2315

int ocfs2_inode_lock_atime(struct inode *inode,

2311

int ocfs2_inode_lock_atime(struct inode *inode,

2316

struct vfsmount *vfsmnt,

2312

struct vfsmount *vfsmnt,

2317

int *level)

2313

int *level)

2318

{

2314

{

2319

int ret;

2315

int ret;

2320

2316

2321

mlog_entry_void();

2317

mlog_entry_void();

2322

ret = ocfs2_inode_lock(inode, NULL, 0);

2318

ret = ocfs2_inode_lock(inode, NULL, 0);

2323

if (ret < 0) {

2319

if (ret < 0) {

2324

mlog_errno(ret);

2320

mlog_errno(ret);

2325

return ret;

2321

return ret;

2326

}

2322

}

2327

2323

2328

/*

2324

/*

2329

* If we should update atime, we will get EX lock,

2325

* If we should update atime, we will get EX lock,

2330

* otherwise we just get PR lock.

2326

* otherwise we just get PR lock.

2331

*/

2327

*/

2332

if (ocfs2_should_update_atime(inode, vfsmnt)) {

2328

if (ocfs2_should_update_atime(inode, vfsmnt)) {

2333

struct buffer_head *bh = NULL;

2329

struct buffer_head *bh = NULL;

2334

2330

2335

ocfs2_inode_unlock(inode, 0);

2331

ocfs2_inode_unlock(inode, 0);

2336

ret = ocfs2_inode_lock(inode, &bh, 1);

2332

ret = ocfs2_inode_lock(inode, &bh, 1);

2337

if (ret < 0) {

2333

if (ret < 0) {

2338

mlog_errno(ret);

2334

mlog_errno(ret);

2339

return ret;

2335

return ret;

2340

}

2336

}

2341

*level = 1;

2337

*level = 1;

2342

if (ocfs2_should_update_atime(inode, vfsmnt))

2338

if (ocfs2_should_update_atime(inode, vfsmnt))

2343

ocfs2_update_inode_atime(inode, bh);

2339

ocfs2_update_inode_atime(inode, bh);

2344

if (bh)

2340

if (bh)

2345

brelse(bh);

2341

brelse(bh);

2346

} else

2342

} else

2347

*level = 0;

2343

*level = 0;

2348

2344

2349

mlog_exit(ret);

2345

mlog_exit(ret);

2350

return ret;

2346

return ret;

2351

}

2347

}

2352

2348

2353

void ocfs2_inode_unlock(struct inode *inode,

2349

void ocfs2_inode_unlock(struct inode *inode,

2354

int ex)

2350

int ex)

2355

{

2351

{

2356

int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;

2352

int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;

2357

struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_inode_lockres;

2353

struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_inode_lockres;

2358

struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);

2354

struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);

2359

2355

2360

mlog_entry_void();

2356

mlog_entry_void();

2361

2357

2362

mlog(0, "inode %llu drop %s META lock\n",

2358

mlog(0, "inode %llu drop %s META lock\n",

2363

(unsigned long long)OCFS2_I(inode)->ip_blkno,

2359

(unsigned long long)OCFS2_I(inode)->ip_blkno,

2364

ex ? "EXMODE" : "PRMODE");

2360

ex ? "EXMODE" : "PRMODE");

2365

2361

2366

if (!ocfs2_is_hard_readonly(OCFS2_SB(inode->i_sb)) &&

2362

if (!ocfs2_is_hard_readonly(OCFS2_SB(inode->i_sb)) &&

2367

!ocfs2_mount_local(osb))

2363

!ocfs2_mount_local(osb))

2368

ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres, level);

2364

ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres, level);

2369

2365

2370

mlog_exit_void();

2366

mlog_exit_void();

2371

}

2367

}

2372

2368

2373

int ocfs2_orphan_scan_lock(struct ocfs2_super *osb, u32 *seqno, int ex)

2369

int ocfs2_orphan_scan_lock(struct ocfs2_super *osb, u32 *seqno, int ex)

2374

{

2370

{

2375

struct ocfs2_lock_res *lockres;

2371

struct ocfs2_lock_res *lockres;

2376

struct ocfs2_orphan_scan_lvb *lvb;

2372

struct ocfs2_orphan_scan_lvb *lvb;

2377

int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;

2373

int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;

2378

int status = 0;

2374

int status = 0;

2379

2375

2380

lockres = &osb->osb_orphan_scan.os_lockres;

2376

lockres = &osb->osb_orphan_scan.os_lockres;

2381

status = ocfs2_cluster_lock(osb, lockres, level, 0, 0);

2377

status = ocfs2_cluster_lock(osb, lockres, level, 0, 0);

2382

if (status < 0)

2378

if (status < 0)

2383

return status;

2379

return status;

2384

2380

2385

lvb = ocfs2_dlm_lvb(&lockres->l_lksb);

2381

lvb = ocfs2_dlm_lvb(&lockres->l_lksb);

2386

if (ocfs2_dlm_lvb_valid(&lockres->l_lksb) &&

2382

if (ocfs2_dlm_lvb_valid(&lockres->l_lksb) &&

2387

lvb->lvb_version == OCFS2_ORPHAN_LVB_VERSION)

2383

lvb->lvb_version == OCFS2_ORPHAN_LVB_VERSION)

2388

*seqno = be32_to_cpu(lvb->lvb_os_seqno);

2384

*seqno = be32_to_cpu(lvb->lvb_os_seqno);

2385

else

2386

*seqno = osb->osb_orphan_scan.os_seqno + 1;

2387

2389

return status;

2388

return status;

2390

}

2389

}

2391

2390

2392

void ocfs2_orphan_scan_unlock(struct ocfs2_super *osb, u32 seqno, int ex)

2391

void ocfs2_orphan_scan_unlock(struct ocfs2_super *osb, u32 seqno, int ex)

2393

{

2392

{

2394

struct ocfs2_lock_res *lockres;

2393

struct ocfs2_lock_res *lockres;

2395

struct ocfs2_orphan_scan_lvb *lvb;

2394

struct ocfs2_orphan_scan_lvb *lvb;

2396

int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;

2395

int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;

2397

2396

2398

lockres = &osb->osb_orphan_scan.os_lockres;

2397

lockres = &osb->osb_orphan_scan.os_lockres;

2399

lvb = ocfs2_dlm_lvb(&lockres->l_lksb);

2398

lvb = ocfs2_dlm_lvb(&lockres->l_lksb);

2400

lvb->lvb_version = OCFS2_ORPHAN_LVB_VERSION;

2399

lvb->lvb_version = OCFS2_ORPHAN_LVB_VERSION;

2401

lvb->lvb_os_seqno = cpu_to_be32(seqno);

2400

lvb->lvb_os_seqno = cpu_to_be32(seqno);

2402

ocfs2_cluster_unlock(osb, lockres, level);

2401

ocfs2_cluster_unlock(osb, lockres, level);

2403

}

2402

}

2404

2403

2405

int ocfs2_super_lock(struct ocfs2_super *osb,

2404

int ocfs2_super_lock(struct ocfs2_super *osb,

2406

int ex)

2405

int ex)

2407

{

2406

{

2408

int status = 0;

2407

int status = 0;

2409

int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;

2408

int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;

2410

struct ocfs2_lock_res *lockres = &osb->osb_super_lockres;

2409

struct ocfs2_lock_res *lockres = &osb->osb_super_lockres;

2411

2410

2412

mlog_entry_void();

2411

mlog_entry_void();

2413

2412

2414

if (ocfs2_is_hard_readonly(osb))

2413

if (ocfs2_is_hard_readonly(osb))

2415

return -EROFS;

2414

return -EROFS;

2416

2415

2417

if (ocfs2_mount_local(osb))

2416

if (ocfs2_mount_local(osb))

2418

goto bail;

2417

goto bail;

2419

2418

2420

status = ocfs2_cluster_lock(osb, lockres, level, 0, 0);

2419

status = ocfs2_cluster_lock(osb, lockres, level, 0, 0);

2421

if (status < 0) {

2420

if (status < 0) {

2422

mlog_errno(status);

2421

mlog_errno(status);

2423

goto bail;

2422

goto bail;

2424

}

2423

}

2425

2424

2426

/* The super block lock path is really in the best position to

2425

/* The super block lock path is really in the best position to

2427

* know when resources covered by the lock need to be

2426

* know when resources covered by the lock need to be

2428

* refreshed, so we do it here. Of course, making sense of

2427

* refreshed, so we do it here. Of course, making sense of

2429

* everything is up to the caller :) */

2428

* everything is up to the caller :) */

2430

status = ocfs2_should_refresh_lock_res(lockres);

2429

status = ocfs2_should_refresh_lock_res(lockres);

2431

if (status < 0) {

2430

if (status < 0) {

2432

mlog_errno(status);

2431

mlog_errno(status);

2433

goto bail;

2432

goto bail;

2434

}

2433

}

2435

if (status) {

2434

if (status) {

2436

status = ocfs2_refresh_slot_info(osb);

2435

status = ocfs2_refresh_slot_info(osb);

2437

2436

2438

ocfs2_complete_lock_res_refresh(lockres, status);

2437

ocfs2_complete_lock_res_refresh(lockres, status);

2439

2438

2440

if (status < 0)

2439

if (status < 0)

2441

mlog_errno(status);

2440

mlog_errno(status);

2442

ocfs2_track_lock_refresh(lockres);

2441

ocfs2_track_lock_refresh(lockres);

2443

}

2442

}

2444

bail:

2443

bail:

2445

mlog_exit(status);

2444

mlog_exit(status);

2446

return status;

2445

return status;

2447

}

2446

}

2448

2447

2449

void ocfs2_super_unlock(struct ocfs2_super *osb,

2448

void ocfs2_super_unlock(struct ocfs2_super *osb,

2450

int ex)

2449

int ex)

2451

{

2450

{

2452

int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;

2451

int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;

2453

struct ocfs2_lock_res *lockres = &osb->osb_super_lockres;

2452

struct ocfs2_lock_res *lockres = &osb->osb_super_lockres;

2454

2453

2455

if (!ocfs2_mount_local(osb))

2454

if (!ocfs2_mount_local(osb))

2456

ocfs2_cluster_unlock(osb, lockres, level);

2455

ocfs2_cluster_unlock(osb, lockres, level);

2457

}

2456

}

2458

2457

2459

int ocfs2_rename_lock(struct ocfs2_super *osb)

2458

int ocfs2_rename_lock(struct ocfs2_super *osb)

2460

{

2459

{

2461

int status;

2460

int status;

2462

struct ocfs2_lock_res *lockres = &osb->osb_rename_lockres;

2461

struct ocfs2_lock_res *lockres = &osb->osb_rename_lockres;

2463

2462

2464

if (ocfs2_is_hard_readonly(osb))

2463

if (ocfs2_is_hard_readonly(osb))

2465

return -EROFS;

2464

return -EROFS;

2466

2465

2467

if (ocfs2_mount_local(osb))

2466

if (ocfs2_mount_local(osb))

2468

return 0;

2467

return 0;

2469

2468

2470

status = ocfs2_cluster_lock(osb, lockres, DLM_LOCK_EX, 0, 0);

2469

status = ocfs2_cluster_lock(osb, lockres, DLM_LOCK_EX, 0, 0);

2471

if (status < 0)

2470

if (status < 0)

2472

mlog_errno(status);

2471

mlog_errno(status);

2473

2472

2474

return status;

2473

return status;

2475

}

2474

}

2476

2475

2477

void ocfs2_rename_unlock(struct ocfs2_super *osb)

2476

void ocfs2_rename_unlock(struct ocfs2_super *osb)

2478

{

2477

{

2479

struct ocfs2_lock_res *lockres = &osb->osb_rename_lockres;

2478

struct ocfs2_lock_res *lockres = &osb->osb_rename_lockres;

2480

2479

2481

if (!ocfs2_mount_local(osb))

2480

if (!ocfs2_mount_local(osb))

2482

ocfs2_cluster_unlock(osb, lockres, DLM_LOCK_EX);

2481

ocfs2_cluster_unlock(osb, lockres, DLM_LOCK_EX);

2483

}

2482

}

2484

2483

2485

int ocfs2_nfs_sync_lock(struct ocfs2_super *osb, int ex)

2484

int ocfs2_nfs_sync_lock(struct ocfs2_super *osb, int ex)

2486

{

2485

{

2487

int status;

2486

int status;

2488

struct ocfs2_lock_res *lockres = &osb->osb_nfs_sync_lockres;

2487

struct ocfs2_lock_res *lockres = &osb->osb_nfs_sync_lockres;

2489

2488

2490

if (ocfs2_is_hard_readonly(osb))

2489

if (ocfs2_is_hard_readonly(osb))

2491

return -EROFS;

2490

return -EROFS;

2492

2491

2493

if (ocfs2_mount_local(osb))

2492

if (ocfs2_mount_local(osb))

2494

return 0;

2493

return 0;

2495

2494

2496

status = ocfs2_cluster_lock(osb, lockres, ex ? LKM_EXMODE : LKM_PRMODE,

2495

status = ocfs2_cluster_lock(osb, lockres, ex ? LKM_EXMODE : LKM_PRMODE,

2497

0, 0);

2496

0, 0);

2498

if (status < 0)

2497

if (status < 0)

2499

mlog(ML_ERROR, "lock on nfs sync lock failed %d\n", status);

2498

mlog(ML_ERROR, "lock on nfs sync lock failed %d\n", status);

2500

2499

2501

return status;

2500

return status;

2502

}

2501

}

2503

2502

2504

void ocfs2_nfs_sync_unlock(struct ocfs2_super *osb, int ex)

2503

void ocfs2_nfs_sync_unlock(struct ocfs2_super *osb, int ex)

2505

{

2504

{

2506

struct ocfs2_lock_res *lockres = &osb->osb_nfs_sync_lockres;

2505

struct ocfs2_lock_res *lockres = &osb->osb_nfs_sync_lockres;

2507

2506

2508

if (!ocfs2_mount_local(osb))

2507

if (!ocfs2_mount_local(osb))

2509

ocfs2_cluster_unlock(osb, lockres,

2508

ocfs2_cluster_unlock(osb, lockres,

2510

ex ? LKM_EXMODE : LKM_PRMODE);

2509

ex ? LKM_EXMODE : LKM_PRMODE);

2511

}

2510

}

2512

2511

2513

int ocfs2_dentry_lock(struct dentry *dentry, int ex)

2512

int ocfs2_dentry_lock(struct dentry *dentry, int ex)

2514

{

2513

{

2515

int ret;

2514

int ret;

2516

int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;

2515

int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;

2517

struct ocfs2_dentry_lock *dl = dentry->d_fsdata;

2516

struct ocfs2_dentry_lock *dl = dentry->d_fsdata;

2518

struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb);

2517

struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb);

2519

2518

2520

BUG_ON(!dl);

2519

BUG_ON(!dl);

2521

2520

2522

if (ocfs2_is_hard_readonly(osb))

2521

if (ocfs2_is_hard_readonly(osb))

2523

return -EROFS;

2522

return -EROFS;

2524

2523

2525

if (ocfs2_mount_local(osb))

2524

if (ocfs2_mount_local(osb))

2526

return 0;

2525

return 0;

2527

2526

2528

ret = ocfs2_cluster_lock(osb, &dl->dl_lockres, level, 0, 0);

2527

ret = ocfs2_cluster_lock(osb, &dl->dl_lockres, level, 0, 0);

2529

if (ret < 0)

2528

if (ret < 0)

2530

mlog_errno(ret);

2529

mlog_errno(ret);

2531

2530

2532

return ret;

2531

return ret;

2533

}

2532

}

2534

2533

2535

void ocfs2_dentry_unlock(struct dentry *dentry, int ex)

2534

void ocfs2_dentry_unlock(struct dentry *dentry, int ex)

2536

{

2535

{

2537

int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;

2536

int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;

2538

struct ocfs2_dentry_lock *dl = dentry->d_fsdata;

2537

struct ocfs2_dentry_lock *dl = dentry->d_fsdata;

2539

struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb);

2538

struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb);

2540

2539

2541

if (!ocfs2_mount_local(osb))

2540

if (!ocfs2_mount_local(osb))

2542

ocfs2_cluster_unlock(osb, &dl->dl_lockres, level);

2541

ocfs2_cluster_unlock(osb, &dl->dl_lockres, level);

2543

}

2542

}

2544

2543

2545

/* Reference counting of the dlm debug structure. We want this because

2544

/* Reference counting of the dlm debug structure. We want this because

2546

* open references on the debug inodes can live on after a mount, so

2545

* open references on the debug inodes can live on after a mount, so

2547

* we can't rely on the ocfs2_super to always exist. */

2546

* we can't rely on the ocfs2_super to always exist. */

2548

static void ocfs2_dlm_debug_free(struct kref *kref)

2547

static void ocfs2_dlm_debug_free(struct kref *kref)

2549

{

2548

{

2550

struct ocfs2_dlm_debug *dlm_debug;

2549

struct ocfs2_dlm_debug *dlm_debug;

2551

2550

2552

dlm_debug = container_of(kref, struct ocfs2_dlm_debug, d_refcnt);

2551

dlm_debug = container_of(kref, struct ocfs2_dlm_debug, d_refcnt);

2553

2552

2554

kfree(dlm_debug);

2553

kfree(dlm_debug);

2555

}

2554

}

2556

2555

2557

void ocfs2_put_dlm_debug(struct ocfs2_dlm_debug *dlm_debug)

2556

void ocfs2_put_dlm_debug(struct ocfs2_dlm_debug *dlm_debug)

2558

{

2557

{

2559

if (dlm_debug)

2558

if (dlm_debug)

2560

kref_put(&dlm_debug->d_refcnt, ocfs2_dlm_debug_free);

2559

kref_put(&dlm_debug->d_refcnt, ocfs2_dlm_debug_free);

2561

}

2560

}

2562

2561

2563

static void ocfs2_get_dlm_debug(struct ocfs2_dlm_debug *debug)

2562

static void ocfs2_get_dlm_debug(struct ocfs2_dlm_debug *debug)

2564

{

2563

{

2565

kref_get(&debug->d_refcnt);

2564

kref_get(&debug->d_refcnt);

2566

}

2565

}

2567

2566

2568

struct ocfs2_dlm_debug *ocfs2_new_dlm_debug(void)

2567

struct ocfs2_dlm_debug *ocfs2_new_dlm_debug(void)

2569

{

2568

{

2570

struct ocfs2_dlm_debug *dlm_debug;

2569

struct ocfs2_dlm_debug *dlm_debug;

2571

2570

2572

dlm_debug = kmalloc(sizeof(struct ocfs2_dlm_debug), GFP_KERNEL);

2571

dlm_debug = kmalloc(sizeof(struct ocfs2_dlm_debug), GFP_KERNEL);

2573

if (!dlm_debug) {

2572

if (!dlm_debug) {

2574

mlog_errno(-ENOMEM);

2573

mlog_errno(-ENOMEM);

2575

goto out;

2574

goto out;

2576

}

2575

}

2577

2576

2578

kref_init(&dlm_debug->d_refcnt);

2577

kref_init(&dlm_debug->d_refcnt);

2579

INIT_LIST_HEAD(&dlm_debug->d_lockres_tracking);

2578

INIT_LIST_HEAD(&dlm_debug->d_lockres_tracking);

2580

dlm_debug->d_locking_state = NULL;

2579

dlm_debug->d_locking_state = NULL;

2581

out:

2580

out:

2582

return dlm_debug;

2581

return dlm_debug;

2583

}

2582

}

2584

2583

2585

/* Access to this is arbitrated for us via seq_file->sem. */

2584

/* Access to this is arbitrated for us via seq_file->sem. */

2586

struct ocfs2_dlm_seq_priv {

2585

struct ocfs2_dlm_seq_priv {

2587

struct ocfs2_dlm_debug *p_dlm_debug;

2586

struct ocfs2_dlm_debug *p_dlm_debug;

2588

struct ocfs2_lock_res p_iter_res;

2587

struct ocfs2_lock_res p_iter_res;

2589

struct ocfs2_lock_res p_tmp_res;

2588

struct ocfs2_lock_res p_tmp_res;

2590

};

2589

};

2591

2590

2592

static struct ocfs2_lock_res *ocfs2_dlm_next_res(struct ocfs2_lock_res *start,

2591

static struct ocfs2_lock_res *ocfs2_dlm_next_res(struct ocfs2_lock_res *start,

2593

struct ocfs2_dlm_seq_priv *priv)

2592

struct ocfs2_dlm_seq_priv *priv)

2594

{

2593

{

2595

struct ocfs2_lock_res *iter, *ret = NULL;

2594

struct ocfs2_lock_res *iter, *ret = NULL;

2596

struct ocfs2_dlm_debug *dlm_debug = priv->p_dlm_debug;

2595

struct ocfs2_dlm_debug *dlm_debug = priv->p_dlm_debug;

2597

2596

2598

assert_spin_locked(&ocfs2_dlm_tracking_lock);

2597

assert_spin_locked(&ocfs2_dlm_tracking_lock);

2599

2598

2600

list_for_each_entry(iter, &start->l_debug_list, l_debug_list) {

2599

list_for_each_entry(iter, &start->l_debug_list, l_debug_list) {

2601

/* discover the head of the list */

2600

/* discover the head of the list */

2602

if (&iter->l_debug_list == &dlm_debug->d_lockres_tracking) {

2601

if (&iter->l_debug_list == &dlm_debug->d_lockres_tracking) {

2603

mlog(0, "End of list found, %p\n", ret);

2602

mlog(0, "End of list found, %p\n", ret);

2604

break;

2603

break;

2605

}

2604

}

2606

2605

2607

/* We track our "dummy" iteration lockres' by a NULL

2606

/* We track our "dummy" iteration lockres' by a NULL

2608

* l_ops field. */

2607

* l_ops field. */

2609

if (iter->l_ops != NULL) {

2608

if (iter->l_ops != NULL) {

2610

ret = iter;

2609

ret = iter;

2611

break;

2610

break;

2612

}

2611

}

2613

}

2612

}

2614

2613

2615

return ret;

2614

return ret;

2616

}

2615

}

2617

2616

2618

static void *ocfs2_dlm_seq_start(struct seq_file *m, loff_t *pos)

2617

static void *ocfs2_dlm_seq_start(struct seq_file *m, loff_t *pos)

2619

{

2618

{

2620

struct ocfs2_dlm_seq_priv *priv = m->private;

2619

struct ocfs2_dlm_seq_priv *priv = m->private;

2621

struct ocfs2_lock_res *iter;

2620

struct ocfs2_lock_res *iter;

2622

2621

2623

spin_lock(&ocfs2_dlm_tracking_lock);

2622

spin_lock(&ocfs2_dlm_tracking_lock);

2624

iter = ocfs2_dlm_next_res(&priv->p_iter_res, priv);

2623

iter = ocfs2_dlm_next_res(&priv->p_iter_res, priv);

2625

if (iter) {

2624

if (iter) {

2626

/* Since lockres' have the lifetime of their container

2625

/* Since lockres' have the lifetime of their container

2627

* (which can be inodes, ocfs2_supers, etc) we want to

2626

* (which can be inodes, ocfs2_supers, etc) we want to

2628

* copy this out to a temporary lockres while still

2627

* copy this out to a temporary lockres while still

2629

* under the spinlock. Obviously after this we can't

2628

* under the spinlock. Obviously after this we can't

2630

* trust any pointers on the copy returned, but that's

2629

* trust any pointers on the copy returned, but that's

2631

* ok as the information we want isn't typically held

2630

* ok as the information we want isn't typically held

2632

* in them. */

2631

* in them. */

2633

priv->p_tmp_res = *iter;

2632

priv->p_tmp_res = *iter;

2634

iter = &priv->p_tmp_res;

2633

iter = &priv->p_tmp_res;

2635

}

2634

}

2636

spin_unlock(&ocfs2_dlm_tracking_lock);

2635

spin_unlock(&ocfs2_dlm_tracking_lock);

2637

2636

2638

return iter;

2637

return iter;

2639

}

2638

}

2640

2639

2641

static void ocfs2_dlm_seq_stop(struct seq_file *m, void *v)

2640

static void ocfs2_dlm_seq_stop(struct seq_file *m, void *v)

2642

{

2641

{

2643

}

2642

}

2644

2643

2645

static void *ocfs2_dlm_seq_next(struct seq_file *m, void *v, loff_t *pos)

2644

static void *ocfs2_dlm_seq_next(struct seq_file *m, void *v, loff_t *pos)

2646

{

2645

{

2647

struct ocfs2_dlm_seq_priv *priv = m->private;

2646

struct ocfs2_dlm_seq_priv *priv = m->private;

2648

struct ocfs2_lock_res *iter = v;

2647

struct ocfs2_lock_res *iter = v;

2649

struct ocfs2_lock_res *dummy = &priv->p_iter_res;

2648

struct ocfs2_lock_res *dummy = &priv->p_iter_res;

2650

2649

2651

spin_lock(&ocfs2_dlm_tracking_lock);

2650

spin_lock(&ocfs2_dlm_tracking_lock);

2652

iter = ocfs2_dlm_next_res(iter, priv);

2651

iter = ocfs2_dlm_next_res(iter, priv);

2653

list_del_init(&dummy->l_debug_list);

2652

list_del_init(&dummy->l_debug_list);

2654

if (iter) {

2653

if (iter) {

2655

list_add(&dummy->l_debug_list, &iter->l_debug_list);

2654

list_add(&dummy->l_debug_list, &iter->l_debug_list);

2656

priv->p_tmp_res = *iter;

2655

priv->p_tmp_res = *iter;

2657

iter = &priv->p_tmp_res;

2656

iter = &priv->p_tmp_res;

2658

}

2657

}

2659

spin_unlock(&ocfs2_dlm_tracking_lock);

2658

spin_unlock(&ocfs2_dlm_tracking_lock);

2660

2659

2661

return iter;

2660

return iter;

2662

}

2661

}

2663

2662

2664

/* So that debugfs.ocfs2 can determine which format is being used */

2663

/* So that debugfs.ocfs2 can determine which format is being used */

2665

#define OCFS2_DLM_DEBUG_STR_VERSION 2

2664

#define OCFS2_DLM_DEBUG_STR_VERSION 2

2666

static int ocfs2_dlm_seq_show(struct seq_file *m, void *v)

2665

static int ocfs2_dlm_seq_show(struct seq_file *m, void *v)

2667

{

2666

{

2668

int i;

2667

int i;

2669

char *lvb;

2668

char *lvb;

2670

struct ocfs2_lock_res *lockres = v;

2669

struct ocfs2_lock_res *lockres = v;

2671

2670

2672

if (!lockres)

2671

if (!lockres)

2673

return -EINVAL;

2672

return -EINVAL;

2674

2673

2675

seq_printf(m, "0x%x\t", OCFS2_DLM_DEBUG_STR_VERSION);

2674

seq_printf(m, "0x%x\t", OCFS2_DLM_DEBUG_STR_VERSION);

2676

2675

2677

if (lockres->l_type == OCFS2_LOCK_TYPE_DENTRY)

2676

if (lockres->l_type == OCFS2_LOCK_TYPE_DENTRY)

2678

seq_printf(m, "%.*s%08x\t", OCFS2_DENTRY_LOCK_INO_START - 1,

2677

seq_printf(m, "%.*s%08x\t", OCFS2_DENTRY_LOCK_INO_START - 1,

2679

lockres->l_name,

2678

lockres->l_name,

2680

(unsigned int)ocfs2_get_dentry_lock_ino(lockres));

2679

(unsigned int)ocfs2_get_dentry_lock_ino(lockres));

2681

else

2680

else

2682

seq_printf(m, "%.*s\t", OCFS2_LOCK_ID_MAX_LEN, lockres->l_name);

2681

seq_printf(m, "%.*s\t", OCFS2_LOCK_ID_MAX_LEN, lockres->l_name);

2683

2682

2684

seq_printf(m, "%d\t"

2683

seq_printf(m, "%d\t"

2685

"0x%lx\t"

2684

"0x%lx\t"

2686

"0x%x\t"

2685

"0x%x\t"

2687

"0x%x\t"

2686

"0x%x\t"

2688

"%u\t"

2687

"%u\t"

2689

"%u\t"

2688

"%u\t"

2690

"%d\t"

2689

"%d\t"

2691

"%d\t",

2690

"%d\t",

2692

lockres->l_level,

2691

lockres->l_level,

2693

lockres->l_flags,

2692

lockres->l_flags,

2694

lockres->l_action,

2693

lockres->l_action,

2695

lockres->l_unlock_action,

2694

lockres->l_unlock_action,

2696

lockres->l_ro_holders,

2695

lockres->l_ro_holders,

2697

lockres->l_ex_holders,

2696

lockres->l_ex_holders,

2698

lockres->l_requested,

2697

lockres->l_requested,

2699

lockres->l_blocking);

2698

lockres->l_blocking);

2700

2699

2701

/* Dump the raw LVB */

2700

/* Dump the raw LVB */

2702

lvb = ocfs2_dlm_lvb(&lockres->l_lksb);

2701

lvb = ocfs2_dlm_lvb(&lockres->l_lksb);

2703

for(i = 0; i < DLM_LVB_LEN; i++)

2702

for(i = 0; i < DLM_LVB_LEN; i++)

2704

seq_printf(m, "0x%x\t", lvb[i]);

2703

seq_printf(m, "0x%x\t", lvb[i]);

2705

2704

2706

#ifdef CONFIG_OCFS2_FS_STATS

2705

#ifdef CONFIG_OCFS2_FS_STATS

2707

# define lock_num_prmode(_l) (_l)->l_lock_num_prmode

2706

# define lock_num_prmode(_l) (_l)->l_lock_num_prmode

2708

# define lock_num_exmode(_l) (_l)->l_lock_num_exmode

2707

# define lock_num_exmode(_l) (_l)->l_lock_num_exmode

2709

# define lock_num_prmode_failed(_l) (_l)->l_lock_num_prmode_failed

2708

# define lock_num_prmode_failed(_l) (_l)->l_lock_num_prmode_failed

2710

# define lock_num_exmode_failed(_l) (_l)->l_lock_num_exmode_failed

2709

# define lock_num_exmode_failed(_l) (_l)->l_lock_num_exmode_failed

2711

# define lock_total_prmode(_l) (_l)->l_lock_total_prmode

2710

# define lock_total_prmode(_l) (_l)->l_lock_total_prmode

2712

# define lock_total_exmode(_l) (_l)->l_lock_total_exmode

2711

# define lock_total_exmode(_l) (_l)->l_lock_total_exmode

2713

# define lock_max_prmode(_l) (_l)->l_lock_max_prmode

2712

# define lock_max_prmode(_l) (_l)->l_lock_max_prmode

2714

# define lock_max_exmode(_l) (_l)->l_lock_max_exmode

2713

# define lock_max_exmode(_l) (_l)->l_lock_max_exmode

2715

# define lock_refresh(_l) (_l)->l_lock_refresh

2714

# define lock_refresh(_l) (_l)->l_lock_refresh

2716

#else

2715

#else

2717

# define lock_num_prmode(_l) (0ULL)

2716

# define lock_num_prmode(_l) (0ULL)

2718

# define lock_num_exmode(_l) (0ULL)

2717

# define lock_num_exmode(_l) (0ULL)

2719

# define lock_num_prmode_failed(_l) (0)

2718

# define lock_num_prmode_failed(_l) (0)

2720

# define lock_num_exmode_failed(_l) (0)

2719

# define lock_num_exmode_failed(_l) (0)

2721

# define lock_total_prmode(_l) (0ULL)

2720

# define lock_total_prmode(_l) (0ULL)

2722

# define lock_total_exmode(_l) (0ULL)

2721

# define lock_total_exmode(_l) (0ULL)

2723

# define lock_max_prmode(_l) (0)

2722

# define lock_max_prmode(_l) (0)

2724

# define lock_max_exmode(_l) (0)

2723

# define lock_max_exmode(_l) (0)

2725

# define lock_refresh(_l) (0)

2724

# define lock_refresh(_l) (0)

2726

#endif

2725

#endif

2727

/* The following seq_print was added in version 2 of this output */

2726

/* The following seq_print was added in version 2 of this output */

2728

seq_printf(m, "%llu\t"

2727

seq_printf(m, "%llu\t"

2729

"%llu\t"

2728

"%llu\t"

2730

"%u\t"

2729

"%u\t"

2731

"%u\t"

2730

"%u\t"

2732

"%llu\t"

2731

"%llu\t"

2733

"%llu\t"

2732

"%llu\t"

2734

"%u\t"

2733

"%u\t"

2735

"%u\t"

2734

"%u\t"

2736

"%u\t",

2735

"%u\t",

2737

lock_num_prmode(lockres),

2736

lock_num_prmode(lockres),

2738

lock_num_exmode(lockres),

2737

lock_num_exmode(lockres),

2739

lock_num_prmode_failed(lockres),

2738

lock_num_prmode_failed(lockres),

2740

lock_num_exmode_failed(lockres),

2739

lock_num_exmode_failed(lockres),

2741

lock_total_prmode(lockres),

2740

lock_total_prmode(lockres),

2742

lock_total_exmode(lockres),

2741

lock_total_exmode(lockres),

2743

lock_max_prmode(lockres),

2742

lock_max_prmode(lockres),

2744

lock_max_exmode(lockres),

2743

lock_max_exmode(lockres),

2745

lock_refresh(lockres));

2744

lock_refresh(lockres));

2746

2745

2747

/* End the line */

2746

/* End the line */

2748

seq_printf(m, "\n");

2747

seq_printf(m, "\n");

2749

return 0;

2748

return 0;

2750

}

2749

}

2751

2750

2752

static const struct seq_operations ocfs2_dlm_seq_ops = {

2751

static const struct seq_operations ocfs2_dlm_seq_ops = {

2753

.start = ocfs2_dlm_seq_start,

2752

.start = ocfs2_dlm_seq_start,

2754

.stop = ocfs2_dlm_seq_stop,

2753

.stop = ocfs2_dlm_seq_stop,

2755

.next = ocfs2_dlm_seq_next,

2754

.next = ocfs2_dlm_seq_next,

2756

.show = ocfs2_dlm_seq_show,

2755

.show = ocfs2_dlm_seq_show,

2757

};

2756

};

2758

2757

2759

static int ocfs2_dlm_debug_release(struct inode *inode, struct file *file)

2758

static int ocfs2_dlm_debug_release(struct inode *inode, struct file *file)

2760

{

2759

{

2761

struct seq_file *seq = (struct seq_file *) file->private_data;

2760

struct seq_file *seq = (struct seq_file *) file->private_data;

2762

struct ocfs2_dlm_seq_priv *priv = seq->private;

2761

struct ocfs2_dlm_seq_priv *priv = seq->private;

2763

struct ocfs2_lock_res *res = &priv->p_iter_res;

2762

struct ocfs2_lock_res *res = &priv->p_iter_res;

2764

2763

2765

ocfs2_remove_lockres_tracking(res);

2764

ocfs2_remove_lockres_tracking(res);

2766

ocfs2_put_dlm_debug(priv->p_dlm_debug);

2765

ocfs2_put_dlm_debug(priv->p_dlm_debug);

2767

return seq_release_private(inode, file);

2766

return seq_release_private(inode, file);

2768

}

2767

}

2769

2768

2770

static int ocfs2_dlm_debug_open(struct inode *inode, struct file *file)

2769

static int ocfs2_dlm_debug_open(struct inode *inode, struct file *file)

2771

{

2770

{

2772

int ret;

2771

int ret;

2773

struct ocfs2_dlm_seq_priv *priv;

2772

struct ocfs2_dlm_seq_priv *priv;

2774

struct seq_file *seq;

2773

struct seq_file *seq;

2775

struct ocfs2_super *osb;

2774

struct ocfs2_super *osb;

2776

2775

2777

priv = kzalloc(sizeof(struct ocfs2_dlm_seq_priv), GFP_KERNEL);

2776

priv = kzalloc(sizeof(struct ocfs2_dlm_seq_priv), GFP_KERNEL);

2778

if (!priv) {

2777

if (!priv) {

2779

ret = -ENOMEM;

2778

ret = -ENOMEM;

2780

mlog_errno(ret);

2779

mlog_errno(ret);

2781

goto out;

2780

goto out;

2782

}

2781

}

2783

osb = inode->i_private;

2782

osb = inode->i_private;

2784

ocfs2_get_dlm_debug(osb->osb_dlm_debug);

2783

ocfs2_get_dlm_debug(osb->osb_dlm_debug);

2785

priv->p_dlm_debug = osb->osb_dlm_debug;

2784

priv->p_dlm_debug = osb->osb_dlm_debug;

2786

INIT_LIST_HEAD(&priv->p_iter_res.l_debug_list);

2785

INIT_LIST_HEAD(&priv->p_iter_res.l_debug_list);

2787

2786

2788

ret = seq_open(file, &ocfs2_dlm_seq_ops);

2787

ret = seq_open(file, &ocfs2_dlm_seq_ops);

2789

if (ret) {

2788

if (ret) {

2790

kfree(priv);

2789

kfree(priv);

2791

mlog_errno(ret);

2790

mlog_errno(ret);

2792

goto out;

2791

goto out;

2793

}

2792

}

2794

2793

2795

seq = (struct seq_file *) file->private_data;

2794

seq = (struct seq_file *) file->private_data;

2796

seq->private = priv;

2795

seq->private = priv;

2797

2796

2798

ocfs2_add_lockres_tracking(&priv->p_iter_res,

2797

ocfs2_add_lockres_tracking(&priv->p_iter_res,

2799

priv->p_dlm_debug);

2798

priv->p_dlm_debug);

2800

2799

2801

out:

2800

out:

2802

return ret;

2801

return ret;

2803

}

2802

}

2804

2803

2805

static const struct file_operations ocfs2_dlm_debug_fops = {

2804

static const struct file_operations ocfs2_dlm_debug_fops = {

2806

.open = ocfs2_dlm_debug_open,

2805

.open = ocfs2_dlm_debug_open,

2807

.release = ocfs2_dlm_debug_release,

2806

.release = ocfs2_dlm_debug_release,

2808

.read = seq_read,

2807

.read = seq_read,

2809

.llseek = seq_lseek,

2808

.llseek = seq_lseek,

2810

};

2809

};

2811

2810

2812

static int ocfs2_dlm_init_debug(struct ocfs2_super *osb)

2811

static int ocfs2_dlm_init_debug(struct ocfs2_super *osb)

2813

{

2812

{

2814

int ret = 0;

2813

int ret = 0;

2815

struct ocfs2_dlm_debug *dlm_debug = osb->osb_dlm_debug;

2814

struct ocfs2_dlm_debug *dlm_debug = osb->osb_dlm_debug;

2816

2815

2817

dlm_debug->d_locking_state = debugfs_create_file("locking_state",

2816

dlm_debug->d_locking_state = debugfs_create_file("locking_state",

2818

S_IFREG|S_IRUSR,

2817

S_IFREG|S_IRUSR,

2819

osb->osb_debug_root,

2818

osb->osb_debug_root,

2820

osb,

2819

osb,

2821

&ocfs2_dlm_debug_fops);

2820

&ocfs2_dlm_debug_fops);

2822

if (!dlm_debug->d_locking_state) {

2821

if (!dlm_debug->d_locking_state) {

2823

ret = -EINVAL;

2822

ret = -EINVAL;

2824

mlog(ML_ERROR,

2823

mlog(ML_ERROR,

2825

"Unable to create locking state debugfs file.\n");

2824

"Unable to create locking state debugfs file.\n");

2826

goto out;

2825

goto out;

2827

}

2826

}

2828

2827

2829

ocfs2_get_dlm_debug(dlm_debug);

2828

ocfs2_get_dlm_debug(dlm_debug);

2830

out:

2829

out:

2831

return ret;

2830

return ret;

2832

}

2831

}

2833

2832

2834

static void ocfs2_dlm_shutdown_debug(struct ocfs2_super *osb)

2833

static void ocfs2_dlm_shutdown_debug(struct ocfs2_super *osb)

2835

{

2834

{

2836

struct ocfs2_dlm_debug *dlm_debug = osb->osb_dlm_debug;

2835

struct ocfs2_dlm_debug *dlm_debug = osb->osb_dlm_debug;

2837

2836

2838

if (dlm_debug) {

2837

if (dlm_debug) {

2839

debugfs_remove(dlm_debug->d_locking_state);

2838

debugfs_remove(dlm_debug->d_locking_state);

2840

ocfs2_put_dlm_debug(dlm_debug);

2839

ocfs2_put_dlm_debug(dlm_debug);

2841

}

2840

}

2842

}

2841

}

2843

2842

2844

int ocfs2_dlm_init(struct ocfs2_super *osb)

2843

int ocfs2_dlm_init(struct ocfs2_super *osb)

2845

{

2844

{

2846

int status = 0;

2845

int status = 0;

2847

struct ocfs2_cluster_connection *conn = NULL;

2846

struct ocfs2_cluster_connection *conn = NULL;

2848

2847

2849

mlog_entry_void();

2848

mlog_entry_void();

2850

2849

2851

if (ocfs2_mount_local(osb)) {

2850

if (ocfs2_mount_local(osb)) {

2852

osb->node_num = 0;

2851

osb->node_num = 0;

2853

goto local;

2852

goto local;

2854

}

2853

}

2855

2854

2856

status = ocfs2_dlm_init_debug(osb);

2855

status = ocfs2_dlm_init_debug(osb);

2857

if (status < 0) {

2856

if (status < 0) {

2858

mlog_errno(status);

2857

mlog_errno(status);

2859

goto bail;

2858

goto bail;

2860

}

2859

}

2861

2860

2862

/* launch downconvert thread */

2861

/* launch downconvert thread */

2863

osb->dc_task = kthread_run(ocfs2_downconvert_thread, osb, "ocfs2dc");

2862

osb->dc_task = kthread_run(ocfs2_downconvert_thread, osb, "ocfs2dc");

2864

if (IS_ERR(osb->dc_task)) {

2863

if (IS_ERR(osb->dc_task)) {

2865

status = PTR_ERR(osb->dc_task);

2864

status = PTR_ERR(osb->dc_task);

2866

osb->dc_task = NULL;

2865

osb->dc_task = NULL;

2867

mlog_errno(status);

2866

mlog_errno(status);

2868

goto bail;

2867

goto bail;

2869

}

2868

}

2870

2869

2871

/* for now, uuid == domain */

2870

/* for now, uuid == domain */

2872

status = ocfs2_cluster_connect(osb->osb_cluster_stack,

2871

status = ocfs2_cluster_connect(osb->osb_cluster_stack,

2873

osb->uuid_str,

2872

osb->uuid_str,

2874

strlen(osb->uuid_str),

2873

strlen(osb->uuid_str),

2875

ocfs2_do_node_down, osb,

2874

ocfs2_do_node_down, osb,

2876

&conn);

2875

&conn);

2877

if (status) {

2876

if (status) {

2878

mlog_errno(status);

2877

mlog_errno(status);

2879

goto bail;

2878

goto bail;

2880

}

2879

}

2881

2880

2882

status = ocfs2_cluster_this_node(&osb->node_num);

2881

status = ocfs2_cluster_this_node(&osb->node_num);

2883

if (status < 0) {

2882

if (status < 0) {

2884

mlog_errno(status);

2883

mlog_errno(status);

2885

mlog(ML_ERROR,

2884

mlog(ML_ERROR,

2886

"could not find this host's node number\n");

2885

"could not find this host's node number\n");

2887

ocfs2_cluster_disconnect(conn, 0);

2886

ocfs2_cluster_disconnect(conn, 0);

2888

goto bail;

2887

goto bail;

2889

}

2888

}

2890

2889

2891

local:

2890

local:

2892

ocfs2_super_lock_res_init(&osb->osb_super_lockres, osb);

2891

ocfs2_super_lock_res_init(&osb->osb_super_lockres, osb);

2893

ocfs2_rename_lock_res_init(&osb->osb_rename_lockres, osb);

2892

ocfs2_rename_lock_res_init(&osb->osb_rename_lockres, osb);

2894

ocfs2_nfs_sync_lock_res_init(&osb->osb_nfs_sync_lockres, osb);

2893

ocfs2_nfs_sync_lock_res_init(&osb->osb_nfs_sync_lockres, osb);

2895

ocfs2_orphan_scan_lock_res_init(&osb->osb_orphan_scan.os_lockres, osb);

2894

ocfs2_orphan_scan_lock_res_init(&osb->osb_orphan_scan.os_lockres, osb);

2896

2895

2897

osb->cconn = conn;

2896

osb->cconn = conn;

2898

2897

2899

status = 0;

2898

status = 0;

2900

bail:

2899

bail:

2901

if (status < 0) {

2900

if (status < 0) {

2902

ocfs2_dlm_shutdown_debug(osb);

2901

ocfs2_dlm_shutdown_debug(osb);

2903

if (osb->dc_task)

2902

if (osb->dc_task)

2904

kthread_stop(osb->dc_task);

2903

kthread_stop(osb->dc_task);

2905

}

2904

}

2906

2905

2907

mlog_exit(status);

2906

mlog_exit(status);

2908

return status;

2907

return status;

2909

}

2908

}

2910

2909

2911

void ocfs2_dlm_shutdown(struct ocfs2_super *osb,

2910

void ocfs2_dlm_shutdown(struct ocfs2_super *osb,

2912

int hangup_pending)

2911

int hangup_pending)

2913

{

2912

{

2914

mlog_entry_void();

2913

mlog_entry_void();

2915

2914

2916

ocfs2_drop_osb_locks(osb);

2915

ocfs2_drop_osb_locks(osb);

2917

2916

2918

/*

2917

/*

2919

* Now that we have dropped all locks and ocfs2_dismount_volume()

2918

* Now that we have dropped all locks and ocfs2_dismount_volume()

2920

* has disabled recovery, the DLM won't be talking to us. It's

2919

* has disabled recovery, the DLM won't be talking to us. It's

2921

* safe to tear things down before disconnecting the cluster.

2920

* safe to tear things down before disconnecting the cluster.

2922

*/

2921

*/

2923

2922

2924

if (osb->dc_task) {

2923

if (osb->dc_task) {

2925

kthread_stop(osb->dc_task);

2924

kthread_stop(osb->dc_task);

2926

osb->dc_task = NULL;

2925

osb->dc_task = NULL;

2927

}

2926

}

2928

2927

2929

ocfs2_lock_res_free(&osb->osb_super_lockres);

2928

ocfs2_lock_res_free(&osb->osb_super_lockres);

2930

ocfs2_lock_res_free(&osb->osb_rename_lockres);

2929

ocfs2_lock_res_free(&osb->osb_rename_lockres);

2931

ocfs2_lock_res_free(&osb->osb_nfs_sync_lockres);

2930

ocfs2_lock_res_free(&osb->osb_nfs_sync_lockres);

2932

ocfs2_lock_res_free(&osb->osb_orphan_scan.os_lockres);

2931

ocfs2_lock_res_free(&osb->osb_orphan_scan.os_lockres);

2933

2932

2934

ocfs2_cluster_disconnect(osb->cconn, hangup_pending);

2933

ocfs2_cluster_disconnect(osb->cconn, hangup_pending);

2935

osb->cconn = NULL;

2934

osb->cconn = NULL;

2936

2935

2937

ocfs2_dlm_shutdown_debug(osb);

2936

ocfs2_dlm_shutdown_debug(osb);

2938

2937

2939

mlog_exit_void();

2938

mlog_exit_void();

2940

}

2939

}

2941

2940

2942

static void ocfs2_unlock_ast(void *opaque, int error)

2941

static void ocfs2_unlock_ast(void *opaque, int error)

2943

{

2942

{

2944

struct ocfs2_lock_res *lockres = opaque;

2943

struct ocfs2_lock_res *lockres = opaque;

2945

unsigned long flags;

2944

unsigned long flags;

2946

2945

2947

mlog_entry_void();

2946

mlog_entry_void();

2948

2947

2949

mlog(0, "UNLOCK AST called on lock %s, action = %d\n", lockres->l_name,

2948

mlog(0, "UNLOCK AST called on lock %s, action = %d\n", lockres->l_name,

2950

lockres->l_unlock_action);

2949

lockres->l_unlock_action);

2951

2950

2952

spin_lock_irqsave(&lockres->l_lock, flags);

2951

spin_lock_irqsave(&lockres->l_lock, flags);

2953

if (error) {

2952

if (error) {

2954

mlog(ML_ERROR, "Dlm passes error %d for lock %s, "

2953

mlog(ML_ERROR, "Dlm passes error %d for lock %s, "

2955

"unlock_action %d\n", error, lockres->l_name,

2954

"unlock_action %d\n", error, lockres->l_name,

2956

lockres->l_unlock_action);

2955

lockres->l_unlock_action);

2957

spin_unlock_irqrestore(&lockres->l_lock, flags);

2956

spin_unlock_irqrestore(&lockres->l_lock, flags);

2958

return;

2957

return;

2959

}

2958

}

2960

2959

2961

switch(lockres->l_unlock_action) {

2960

switch(lockres->l_unlock_action) {

2962

case OCFS2_UNLOCK_CANCEL_CONVERT:

2961

case OCFS2_UNLOCK_CANCEL_CONVERT:

2963

mlog(0, "Cancel convert success for %s\n", lockres->l_name);

2962

mlog(0, "Cancel convert success for %s\n", lockres->l_name);

2964

lockres->l_action = OCFS2_AST_INVALID;

2963

lockres->l_action = OCFS2_AST_INVALID;

2965

/* Downconvert thread may have requeued this lock, we

2964

/* Downconvert thread may have requeued this lock, we

2966

* need to wake it. */

2965

* need to wake it. */

2967

if (lockres->l_flags & OCFS2_LOCK_BLOCKED)

2966

if (lockres->l_flags & OCFS2_LOCK_BLOCKED)

2968

ocfs2_wake_downconvert_thread(ocfs2_get_lockres_osb(lockres));

2967

ocfs2_wake_downconvert_thread(ocfs2_get_lockres_osb(lockres));

2969

break;

2968

break;

2970

case OCFS2_UNLOCK_DROP_LOCK:

2969

case OCFS2_UNLOCK_DROP_LOCK:

2971

lockres->l_level = DLM_LOCK_IV;

2970

lockres->l_level = DLM_LOCK_IV;

2972

break;

2971

break;

2973

default:

2972

default:

2974

BUG();

2973

BUG();

2975

}

2974

}

2976

2975

2977

lockres_clear_flags(lockres, OCFS2_LOCK_BUSY);

2976

lockres_clear_flags(lockres, OCFS2_LOCK_BUSY);

2978

lockres->l_unlock_action = OCFS2_UNLOCK_INVALID;

2977

lockres->l_unlock_action = OCFS2_UNLOCK_INVALID;

2979

wake_up(&lockres->l_event);

2978

wake_up(&lockres->l_event);

2980

spin_unlock_irqrestore(&lockres->l_lock, flags);

2979

spin_unlock_irqrestore(&lockres->l_lock, flags);

2981

2980

2982

mlog_exit_void();

2981

mlog_exit_void();

2983

}

2982

}

2984

2983

2985

static int ocfs2_drop_lock(struct ocfs2_super *osb,

2984

static int ocfs2_drop_lock(struct ocfs2_super *osb,

2986

struct ocfs2_lock_res *lockres)

2985

struct ocfs2_lock_res *lockres)

2987

{

2986

{

2988

int ret;

2987

int ret;

2989

unsigned long flags;

2988

unsigned long flags;

2990

u32 lkm_flags = 0;

2989

u32 lkm_flags = 0;

2991

2990

2992

/* We didn't get anywhere near actually using this lockres. */

2991

/* We didn't get anywhere near actually using this lockres. */

2993

if (!(lockres->l_flags & OCFS2_LOCK_INITIALIZED))

2992

if (!(lockres->l_flags & OCFS2_LOCK_INITIALIZED))

2994

goto out;

2993

goto out;

2995

2994

2996

if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB)

2995

if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB)

2997

lkm_flags |= DLM_LKF_VALBLK;

2996

lkm_flags |= DLM_LKF_VALBLK;

2998

2997

2999

spin_lock_irqsave(&lockres->l_lock, flags);

2998

spin_lock_irqsave(&lockres->l_lock, flags);

3000

2999

3001

mlog_bug_on_msg(!(lockres->l_flags & OCFS2_LOCK_FREEING),

3000

mlog_bug_on_msg(!(lockres->l_flags & OCFS2_LOCK_FREEING),

3002

"lockres %s, flags 0x%lx\n",

3001

"lockres %s, flags 0x%lx\n",

3003

lockres->l_name, lockres->l_flags);

3002

lockres->l_name, lockres->l_flags);

3004

3003

3005

while (lockres->l_flags & OCFS2_LOCK_BUSY) {

3004

while (lockres->l_flags & OCFS2_LOCK_BUSY) {

3006

mlog(0, "waiting on busy lock \"%s\": flags = %lx, action = "

3005

mlog(0, "waiting on busy lock \"%s\": flags = %lx, action = "

3007

"%u, unlock_action = %u\n",

3006

"%u, unlock_action = %u\n",

3008

lockres->l_name, lockres->l_flags, lockres->l_action,

3007

lockres->l_name, lockres->l_flags, lockres->l_action,

3009

lockres->l_unlock_action);

3008

lockres->l_unlock_action);

3010

3009

3011

spin_unlock_irqrestore(&lockres->l_lock, flags);

3010

spin_unlock_irqrestore(&lockres->l_lock, flags);

3012

3011

3013

/* XXX: Today we just wait on any busy

3012

/* XXX: Today we just wait on any busy

3014

* locks... Perhaps we need to cancel converts in the

3013

* locks... Perhaps we need to cancel converts in the

3015

* future? */

3014

* future? */

3016

ocfs2_wait_on_busy_lock(lockres);

3015

ocfs2_wait_on_busy_lock(lockres);

3017

3016

3018

spin_lock_irqsave(&lockres->l_lock, flags);

3017

spin_lock_irqsave(&lockres->l_lock, flags);

3019

}

3018

}

3020

3019

3021

if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB) {

3020

if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB) {

3022

if (lockres->l_flags & OCFS2_LOCK_ATTACHED &&

3021

if (lockres->l_flags & OCFS2_LOCK_ATTACHED &&

3023

lockres->l_level == DLM_LOCK_EX &&

3022

lockres->l_level == DLM_LOCK_EX &&

3024

!(lockres->l_flags & OCFS2_LOCK_NEEDS_REFRESH))

3023

!(lockres->l_flags & OCFS2_LOCK_NEEDS_REFRESH))

3025

lockres->l_ops->set_lvb(lockres);

3024

lockres->l_ops->set_lvb(lockres);

3026

}

3025

}

3027

3026

3028

if (lockres->l_flags & OCFS2_LOCK_BUSY)

3027

if (lockres->l_flags & OCFS2_LOCK_BUSY)

3029

mlog(ML_ERROR, "destroying busy lock: \"%s\"\n",

3028

mlog(ML_ERROR, "destroying busy lock: \"%s\"\n",

3030

lockres->l_name);

3029

lockres->l_name);

3031

if (lockres->l_flags & OCFS2_LOCK_BLOCKED)

3030

if (lockres->l_flags & OCFS2_LOCK_BLOCKED)

3032

mlog(0, "destroying blocked lock: \"%s\"\n", lockres->l_name);

3031

mlog(0, "destroying blocked lock: \"%s\"\n", lockres->l_name);

3033

3032

3034

if (!(lockres->l_flags & OCFS2_LOCK_ATTACHED)) {

3033

if (!(lockres->l_flags & OCFS2_LOCK_ATTACHED)) {

3035

spin_unlock_irqrestore(&lockres->l_lock, flags);

3034

spin_unlock_irqrestore(&lockres->l_lock, flags);

3036

goto out;

3035

goto out;

3037

}

3036

}

3038

3037

3039

lockres_clear_flags(lockres, OCFS2_LOCK_ATTACHED);

3038

lockres_clear_flags(lockres, OCFS2_LOCK_ATTACHED);

3040

3039

3041

/* make sure we never get here while waiting for an ast to

3040

/* make sure we never get here while waiting for an ast to

3042

* fire. */

3041

* fire. */

3043

BUG_ON(lockres->l_action != OCFS2_AST_INVALID);

3042

BUG_ON(lockres->l_action != OCFS2_AST_INVALID);

3044

3043

3045

/* is this necessary? */

3044

/* is this necessary? */

3046

lockres_or_flags(lockres, OCFS2_LOCK_BUSY);

3045

lockres_or_flags(lockres, OCFS2_LOCK_BUSY);

3047

lockres->l_unlock_action = OCFS2_UNLOCK_DROP_LOCK;

3046

lockres->l_unlock_action = OCFS2_UNLOCK_DROP_LOCK;

3048

spin_unlock_irqrestore(&lockres->l_lock, flags);

3047

spin_unlock_irqrestore(&lockres->l_lock, flags);

3049

3048

3050

mlog(0, "lock %s\n", lockres->l_name);

3049

mlog(0, "lock %s\n", lockres->l_name);

3051

3050

3052

ret = ocfs2_dlm_unlock(osb->cconn, &lockres->l_lksb, lkm_flags,

3051

ret = ocfs2_dlm_unlock(osb->cconn, &lockres->l_lksb, lkm_flags,

3053

lockres);

3052

lockres);

3054

if (ret) {

3053

if (ret) {

3055

ocfs2_log_dlm_error("ocfs2_dlm_unlock", ret, lockres);

3054

ocfs2_log_dlm_error("ocfs2_dlm_unlock", ret, lockres);

3056

mlog(ML_ERROR, "lockres flags: %lu\n", lockres->l_flags);

3055

mlog(ML_ERROR, "lockres flags: %lu\n", lockres->l_flags);

3057

ocfs2_dlm_dump_lksb(&lockres->l_lksb);

3056

ocfs2_dlm_dump_lksb(&lockres->l_lksb);

3058

BUG();

3057

BUG();

3059

}

3058

}

3060

mlog(0, "lock %s, successful return from ocfs2_dlm_unlock\n",

3059

mlog(0, "lock %s, successful return from ocfs2_dlm_unlock\n",

3061

lockres->l_name);

3060

lockres->l_name);

3062

3061

3063

ocfs2_wait_on_busy_lock(lockres);

3062

ocfs2_wait_on_busy_lock(lockres);

3064

out:

3063

out:

3065

mlog_exit(0);

3064

mlog_exit(0);

3066

return 0;

3065

return 0;

3067

}

3066

}

3068

3067

3069

/* Mark the lockres as being dropped. It will no longer be

3068

/* Mark the lockres as being dropped. It will no longer be

3070

* queued if blocking, but we still may have to wait on it

3069

* queued if blocking, but we still may have to wait on it

3071

* being dequeued from the downconvert thread before we can consider

3070

* being dequeued from the downconvert thread before we can consider

3072

* it safe to drop.

3071

* it safe to drop.

3073

*

3072

*

3074

* You can *not* attempt to call cluster_lock on this lockres anymore. */

3073

* You can *not* attempt to call cluster_lock on this lockres anymore. */

3075

void ocfs2_mark_lockres_freeing(struct ocfs2_lock_res *lockres)

3074

void ocfs2_mark_lockres_freeing(struct ocfs2_lock_res *lockres)

3076

{

3075

{

3077

int status;

3076

int status;

3078

struct ocfs2_mask_waiter mw;

3077

struct ocfs2_mask_waiter mw;

3079

unsigned long flags;

3078

unsigned long flags;

3080

3079

3081

ocfs2_init_mask_waiter(&mw);

3080

ocfs2_init_mask_waiter(&mw);

3082

3081

3083

spin_lock_irqsave(&lockres->l_lock, flags);

3082

spin_lock_irqsave(&lockres->l_lock, flags);

3084

lockres->l_flags |= OCFS2_LOCK_FREEING;

3083

lockres->l_flags |= OCFS2_LOCK_FREEING;

3085

while (lockres->l_flags & OCFS2_LOCK_QUEUED) {

3084

while (lockres->l_flags & OCFS2_LOCK_QUEUED) {

3086

lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_QUEUED, 0);

3085

lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_QUEUED, 0);

3087

spin_unlock_irqrestore(&lockres->l_lock, flags);

3086

spin_unlock_irqrestore(&lockres->l_lock, flags);

3088

3087

3089

mlog(0, "Waiting on lockres %s\n", lockres->l_name);

3088

mlog(0, "Waiting on lockres %s\n", lockres->l_name);

3090

3089

3091

status = ocfs2_wait_for_mask(&mw);

3090

status = ocfs2_wait_for_mask(&mw);

3092

if (status)

3091

if (status)

3093

mlog_errno(status);

3092

mlog_errno(status);

3094

3093

3095

spin_lock_irqsave(&lockres->l_lock, flags);

3094

spin_lock_irqsave(&lockres->l_lock, flags);

3096

}

3095

}

3097

spin_unlock_irqrestore(&lockres->l_lock, flags);

3096

spin_unlock_irqrestore(&lockres->l_lock, flags);

3098

}

3097

}

3099

3098

3100

void ocfs2_simple_drop_lockres(struct ocfs2_super *osb,

3099

void ocfs2_simple_drop_lockres(struct ocfs2_super *osb,

3101

struct ocfs2_lock_res *lockres)

3100

struct ocfs2_lock_res *lockres)

3102

{

3101

{

3103

int ret;

3102

int ret;

3104

3103

3105

ocfs2_mark_lockres_freeing(lockres);

3104

ocfs2_mark_lockres_freeing(lockres);

3106

ret = ocfs2_drop_lock(osb, lockres);

3105

ret = ocfs2_drop_lock(osb, lockres);

3107

if (ret)

3106

if (ret)

3108

mlog_errno(ret);

3107

mlog_errno(ret);

3109

}

3108

}

3110

3109

3111

static void ocfs2_drop_osb_locks(struct ocfs2_super *osb)

3110

static void ocfs2_drop_osb_locks(struct ocfs2_super *osb)

3112

{

3111

{

3113

ocfs2_simple_drop_lockres(osb, &osb->osb_super_lockres);

3112

ocfs2_simple_drop_lockres(osb, &osb->osb_super_lockres);

3114

ocfs2_simple_drop_lockres(osb, &osb->osb_rename_lockres);

3113

ocfs2_simple_drop_lockres(osb, &osb->osb_rename_lockres);

3115

ocfs2_simple_drop_lockres(osb, &osb->osb_nfs_sync_lockres);

3114

ocfs2_simple_drop_lockres(osb, &osb->osb_nfs_sync_lockres);

3116

ocfs2_simple_drop_lockres(osb, &osb->osb_orphan_scan.os_lockres);

3115

ocfs2_simple_drop_lockres(osb, &osb->osb_orphan_scan.os_lockres);

3117

}

3116

}

3118

3117

3119

int ocfs2_drop_inode_locks(struct inode *inode)

3118

int ocfs2_drop_inode_locks(struct inode *inode)

3120

{

3119

{

3121

int status, err;

3120

int status, err;

3122

3121

3123

mlog_entry_void();

3122

mlog_entry_void();

3124

3123

3125

/* No need to call ocfs2_mark_lockres_freeing here -

3124

/* No need to call ocfs2_mark_lockres_freeing here -

3126

* ocfs2_clear_inode has done it for us. */

3125

* ocfs2_clear_inode has done it for us. */

3127

3126

3128

err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb),

3127

err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb),

3129

&OCFS2_I(inode)->ip_open_lockres);

3128

&OCFS2_I(inode)->ip_open_lockres);

3130

if (err < 0)

3129

if (err < 0)

3131

mlog_errno(err);

3130

mlog_errno(err);

3132

3131

3133

status = err;

3132

status = err;

3134

3133

3135

err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb),

3134

err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb),

3136

&OCFS2_I(inode)->ip_inode_lockres);

3135

&OCFS2_I(inode)->ip_inode_lockres);

3137

if (err < 0)

3136

if (err < 0)

3138

mlog_errno(err);

3137

mlog_errno(err);

3139

if (err < 0 && !status)

3138

if (err < 0 && !status)

3140

status = err;

3139

status = err;

3141

3140

3142

err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb),

3141

err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb),

3143

&OCFS2_I(inode)->ip_rw_lockres);

3142

&OCFS2_I(inode)->ip_rw_lockres);

3144

if (err < 0)

3143

if (err < 0)

3145

mlog_errno(err);

3144

mlog_errno(err);

3146

if (err < 0 && !status)

3145

if (err < 0 && !status)

3147

status = err;

3146

status = err;

3148

3147

3149

mlog_exit(status);

3148

mlog_exit(status);

3150

return status;

3149

return status;

3151

}

3150

}

3152

3151

3153

static unsigned int ocfs2_prepare_downconvert(struct ocfs2_lock_res *lockres,

3152

static unsigned int ocfs2_prepare_downconvert(struct ocfs2_lock_res *lockres,

3154

int new_level)

3153

int new_level)

3155

{

3154

{

3156

assert_spin_locked(&lockres->l_lock);

3155

assert_spin_locked(&lockres->l_lock);

3157

3156

3158

BUG_ON(lockres->l_blocking <= DLM_LOCK_NL);

3157

BUG_ON(lockres->l_blocking <= DLM_LOCK_NL);

3159

3158

3160

if (lockres->l_level <= new_level) {

3159

if (lockres->l_level <= new_level) {

3161

mlog(ML_ERROR, "lockres->l_level (%d) <= new_level (%d)\n",

3160

mlog(ML_ERROR, "lockres->l_level (%d) <= new_level (%d)\n",

3162

lockres->l_level, new_level);

3161

lockres->l_level, new_level);

3163

BUG();

3162

BUG();

3164

}

3163

}

3165

3164

3166

mlog(0, "lock %s, new_level = %d, l_blocking = %d\n",

3165

mlog(0, "lock %s, new_level = %d, l_blocking = %d\n",

3167

lockres->l_name, new_level, lockres->l_blocking);

3166

lockres->l_name, new_level, lockres->l_blocking);

3168

3167

3169

lockres->l_action = OCFS2_AST_DOWNCONVERT;

3168

lockres->l_action = OCFS2_AST_DOWNCONVERT;

3170

lockres->l_requested = new_level;

3169

lockres->l_requested = new_level;

3171

lockres_or_flags(lockres, OCFS2_LOCK_BUSY);

3170

lockres_or_flags(lockres, OCFS2_LOCK_BUSY);

3172

return lockres_set_pending(lockres);

3171

return lockres_set_pending(lockres);

3173

}

3172

}

3174

3173

3175

static int ocfs2_downconvert_lock(struct ocfs2_super *osb,

3174

static int ocfs2_downconvert_lock(struct ocfs2_super *osb,

3176

struct ocfs2_lock_res *lockres,

3175

struct ocfs2_lock_res *lockres,

3177

int new_level,

3176

int new_level,

3178

int lvb,

3177

int lvb,

3179

unsigned int generation)

3178

unsigned int generation)

3180

{

3179

{

3181

int ret;

3180

int ret;

3182

u32 dlm_flags = DLM_LKF_CONVERT;

3181

u32 dlm_flags = DLM_LKF_CONVERT;

3183

3182

3184

mlog_entry_void();

3183

mlog_entry_void();

3185

3184

3186

if (lvb)

3185

if (lvb)

3187

dlm_flags |= DLM_LKF_VALBLK;

3186

dlm_flags |= DLM_LKF_VALBLK;

3188

3187

3189

ret = ocfs2_dlm_lock(osb->cconn,

3188

ret = ocfs2_dlm_lock(osb->cconn,

3190

new_level,

3189

new_level,

3191

&lockres->l_lksb,

3190

&lockres->l_lksb,

3192

dlm_flags,

3191

dlm_flags,

3193

lockres->l_name,

3192

lockres->l_name,

3194

OCFS2_LOCK_ID_MAX_LEN - 1,

3193

OCFS2_LOCK_ID_MAX_LEN - 1,

3195

lockres);

3194

lockres);

3196

lockres_clear_pending(lockres, generation, osb);

3195

lockres_clear_pending(lockres, generation, osb);

3197

if (ret) {

3196

if (ret) {

3198

ocfs2_log_dlm_error("ocfs2_dlm_lock", ret, lockres);

3197

ocfs2_log_dlm_error("ocfs2_dlm_lock", ret, lockres);

3199

ocfs2_recover_from_dlm_error(lockres, 1);

3198

ocfs2_recover_from_dlm_error(lockres, 1);

3200

goto bail;

3199

goto bail;

3201

}

3200

}

3202

3201

3203

ret = 0;

3202

ret = 0;

3204

bail:

3203

bail:

3205

mlog_exit(ret);

3204

mlog_exit(ret);

3206

return ret;

3205

return ret;

3207

}

3206

}

3208

3207

3209

/* returns 1 when the caller should unlock and call ocfs2_dlm_unlock */

3208

/* returns 1 when the caller should unlock and call ocfs2_dlm_unlock */

3210

static int ocfs2_prepare_cancel_convert(struct ocfs2_super *osb,

3209

static int ocfs2_prepare_cancel_convert(struct ocfs2_super *osb,

3211

struct ocfs2_lock_res *lockres)

3210

struct ocfs2_lock_res *lockres)

3212

{

3211

{

3213

assert_spin_locked(&lockres->l_lock);

3212

assert_spin_locked(&lockres->l_lock);

3214

3213

3215

mlog_entry_void();

3214

mlog_entry_void();

3216

mlog(0, "lock %s\n", lockres->l_name);

3215

mlog(0, "lock %s\n", lockres->l_name);

3217

3216

3218

if (lockres->l_unlock_action == OCFS2_UNLOCK_CANCEL_CONVERT) {

3217

if (lockres->l_unlock_action == OCFS2_UNLOCK_CANCEL_CONVERT) {

3219

/* If we're already trying to cancel a lock conversion

3218

/* If we're already trying to cancel a lock conversion

3220

* then just drop the spinlock and allow the caller to

3219

* then just drop the spinlock and allow the caller to

3221

* requeue this lock. */

3220

* requeue this lock. */

3222

3221

3223

mlog(0, "Lockres %s, skip convert\n", lockres->l_name);

3222

mlog(0, "Lockres %s, skip convert\n", lockres->l_name);

3224

return 0;

3223

return 0;

3225

}

3224

}

3226

3225

3227

/* were we in a convert when we got the bast fire? */

3226

/* were we in a convert when we got the bast fire? */

3228

BUG_ON(lockres->l_action != OCFS2_AST_CONVERT &&

3227

BUG_ON(lockres->l_action != OCFS2_AST_CONVERT &&

3229

lockres->l_action != OCFS2_AST_DOWNCONVERT);

3228

lockres->l_action != OCFS2_AST_DOWNCONVERT);

3230

/* set things up for the unlockast to know to just

3229

/* set things up for the unlockast to know to just

3231

* clear out the ast_action and unset busy, etc. */

3230

* clear out the ast_action and unset busy, etc. */

3232

lockres->l_unlock_action = OCFS2_UNLOCK_CANCEL_CONVERT;

3231

lockres->l_unlock_action = OCFS2_UNLOCK_CANCEL_CONVERT;

3233

3232

3234

mlog_bug_on_msg(!(lockres->l_flags & OCFS2_LOCK_BUSY),

3233

mlog_bug_on_msg(!(lockres->l_flags & OCFS2_LOCK_BUSY),

3235

"lock %s, invalid flags: 0x%lx\n",

3234

"lock %s, invalid flags: 0x%lx\n",

3236

lockres->l_name, lockres->l_flags);

3235

lockres->l_name, lockres->l_flags);

3237

3236

3238

return 1;

3237

return 1;

3239

}

3238

}

3240

3239

3241

static int ocfs2_cancel_convert(struct ocfs2_super *osb,

3240

static int ocfs2_cancel_convert(struct ocfs2_super *osb,

3242

struct ocfs2_lock_res *lockres)

3241

struct ocfs2_lock_res *lockres)

3243

{

3242

{

3244

int ret;

3243

int ret;

3245

3244

3246

mlog_entry_void();

3245

mlog_entry_void();

3247

mlog(0, "lock %s\n", lockres->l_name);

3246

mlog(0, "lock %s\n", lockres->l_name);

3248

3247

3249

ret = ocfs2_dlm_unlock(osb->cconn, &lockres->l_lksb,

3248

ret = ocfs2_dlm_unlock(osb->cconn, &lockres->l_lksb,

3250

DLM_LKF_CANCEL, lockres);

3249

DLM_LKF_CANCEL, lockres);

3251

if (ret) {

3250

if (ret) {

3252

ocfs2_log_dlm_error("ocfs2_dlm_unlock", ret, lockres);

3251

ocfs2_log_dlm_error("ocfs2_dlm_unlock", ret, lockres);

3253

ocfs2_recover_from_dlm_error(lockres, 0);

3252

ocfs2_recover_from_dlm_error(lockres, 0);

3254

}

3253

}

3255

3254

3256

mlog(0, "lock %s return from ocfs2_dlm_unlock\n", lockres->l_name);

3255

mlog(0, "lock %s return from ocfs2_dlm_unlock\n", lockres->l_name);

3257

3256

3258

mlog_exit(ret);

3257

mlog_exit(ret);

3259

return ret;

3258

return ret;

3260

}

3259

}

3261

3260

3262

static int ocfs2_unblock_lock(struct ocfs2_super *osb,

3261

static int ocfs2_unblock_lock(struct ocfs2_super *osb,

3263

struct ocfs2_lock_res *lockres,

3262

struct ocfs2_lock_res *lockres,

3264

struct ocfs2_unblock_ctl *ctl)

3263

struct ocfs2_unblock_ctl *ctl)

3265

{

3264

{

3266

unsigned long flags;

3265

unsigned long flags;

3267

int blocking;

3266

int blocking;

3268

int new_level;

3267

int new_level;

3269

int ret = 0;

3268

int ret = 0;

3270

int set_lvb = 0;

3269

int set_lvb = 0;

3271

unsigned int gen;

3270

unsigned int gen;

3272

3271

3273

mlog_entry_void();

3272

mlog_entry_void();

3274

3273

3275

spin_lock_irqsave(&lockres->l_lock, flags);

3274

spin_lock_irqsave(&lockres->l_lock, flags);

3276

3275

3277

BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BLOCKED));

3276

BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BLOCKED));

3278

3277

3279

recheck:

3278

recheck:

3280

if (lockres->l_flags & OCFS2_LOCK_BUSY) {

3279

if (lockres->l_flags & OCFS2_LOCK_BUSY) {

3281

/* XXX

3280

/* XXX

3282

* This is a *big* race. The OCFS2_LOCK_PENDING flag

3281

* This is a *big* race. The OCFS2_LOCK_PENDING flag

3283

* exists entirely for one reason - another thread has set

3282

* exists entirely for one reason - another thread has set

3284

* OCFS2_LOCK_BUSY, but has *NOT* yet called dlm_lock().

3283

* OCFS2_LOCK_BUSY, but has *NOT* yet called dlm_lock().

3285

*

3284

*

3286

* If we do ocfs2_cancel_convert() before the other thread

3285

* If we do ocfs2_cancel_convert() before the other thread

3287

* calls dlm_lock(), our cancel will do nothing. We will

3286

* calls dlm_lock(), our cancel will do nothing. We will

3288

* get no ast, and we will have no way of knowing the

3287

* get no ast, and we will have no way of knowing the

3289

* cancel failed. Meanwhile, the other thread will call

3288

* cancel failed. Meanwhile, the other thread will call

3290

* into dlm_lock() and wait...forever.

3289

* into dlm_lock() and wait...forever.

3291

*

3290

*

3292

* Why forever? Because another node has asked for the

3291

* Why forever? Because another node has asked for the

3293

* lock first; that's why we're here in unblock_lock().

3292

* lock first; that's why we're here in unblock_lock().

3294

*

3293

*

3295

* The solution is OCFS2_LOCK_PENDING. When PENDING is

3294

* The solution is OCFS2_LOCK_PENDING. When PENDING is

3296

* set, we just requeue the unblock. Only when the other

3295

* set, we just requeue the unblock. Only when the other

3297

* thread has called dlm_lock() and cleared PENDING will

3296

* thread has called dlm_lock() and cleared PENDING will

3298

* we then cancel their request.

3297

* we then cancel their request.

3299

*

3298

*

3300

* All callers of dlm_lock() must set OCFS2_DLM_PENDING

3299

* All callers of dlm_lock() must set OCFS2_DLM_PENDING

3301

* at the same time they set OCFS2_DLM_BUSY. They must

3300

* at the same time they set OCFS2_DLM_BUSY. They must

3302

* clear OCFS2_DLM_PENDING after dlm_lock() returns.

3301

* clear OCFS2_DLM_PENDING after dlm_lock() returns.

3303

*/

3302

*/

3304

if (lockres->l_flags & OCFS2_LOCK_PENDING)

3303

if (lockres->l_flags & OCFS2_LOCK_PENDING)

3305

goto leave_requeue;

3304

goto leave_requeue;

3306

3305

3307

ctl->requeue = 1;

3306

ctl->requeue = 1;

3308

ret = ocfs2_prepare_cancel_convert(osb, lockres);

3307

ret = ocfs2_prepare_cancel_convert(osb, lockres);

3309

spin_unlock_irqrestore(&lockres->l_lock, flags);

3308

spin_unlock_irqrestore(&lockres->l_lock, flags);

3310

if (ret) {

3309

if (ret) {

3311

ret = ocfs2_cancel_convert(osb, lockres);

3310

ret = ocfs2_cancel_convert(osb, lockres);

3312

if (ret < 0)

3311

if (ret < 0)

3313

mlog_errno(ret);

3312

mlog_errno(ret);

3314

}

3313

}

3315

goto leave;

3314

goto leave;

3316

}

3315

}

3317

3316

3318

/* if we're blocking an exclusive and we have *any* holders,

3317

/* if we're blocking an exclusive and we have *any* holders,

3319

* then requeue. */

3318

* then requeue. */

3320

if ((lockres->l_blocking == DLM_LOCK_EX)

3319

if ((lockres->l_blocking == DLM_LOCK_EX)

3321

&& (lockres->l_ex_holders || lockres->l_ro_holders))

3320

&& (lockres->l_ex_holders || lockres->l_ro_holders))

3322

goto leave_requeue;

3321

goto leave_requeue;

3323

3322

3324

/* If it's a PR we're blocking, then only

3323

/* If it's a PR we're blocking, then only

3325

* requeue if we've got any EX holders */

3324

* requeue if we've got any EX holders */

3326

if (lockres->l_blocking == DLM_LOCK_PR &&

3325

if (lockres->l_blocking == DLM_LOCK_PR &&

3327

lockres->l_ex_holders)

3326

lockres->l_ex_holders)

3328

goto leave_requeue;

3327

goto leave_requeue;

3329

3328

3330

/*

3329

/*

3331

* Can we get a lock in this state if the holder counts are

3330

* Can we get a lock in this state if the holder counts are

3332

* zero? The meta data unblock code used to check this.

3331

* zero? The meta data unblock code used to check this.

3333

*/

3332

*/

3334

if ((lockres->l_ops->flags & LOCK_TYPE_REQUIRES_REFRESH)

3333

if ((lockres->l_ops->flags & LOCK_TYPE_REQUIRES_REFRESH)

3335

&& (lockres->l_flags & OCFS2_LOCK_REFRESHING))

3334

&& (lockres->l_flags & OCFS2_LOCK_REFRESHING))

3336

goto leave_requeue;

3335

goto leave_requeue;

3337

3336

3338

new_level = ocfs2_highest_compat_lock_level(lockres->l_blocking);

3337

new_level = ocfs2_highest_compat_lock_level(lockres->l_blocking);

3339

3338

3340

if (lockres->l_ops->check_downconvert

3339

if (lockres->l_ops->check_downconvert

3341

&& !lockres->l_ops->check_downconvert(lockres, new_level))

3340

&& !lockres->l_ops->check_downconvert(lockres, new_level))

3342

goto leave_requeue;

3341

goto leave_requeue;

3343

3342

3344

/* If we get here, then we know that there are no more

3343

/* If we get here, then we know that there are no more

3345

* incompatible holders (and anyone asking for an incompatible

3344

* incompatible holders (and anyone asking for an incompatible

3346

* lock is blocked). We can now downconvert the lock */

3345

* lock is blocked). We can now downconvert the lock */

3347

if (!lockres->l_ops->downconvert_worker)

3346

if (!lockres->l_ops->downconvert_worker)

3348

goto downconvert;

3347

goto downconvert;

3349

3348

3350

/* Some lockres types want to do a bit of work before

3349

/* Some lockres types want to do a bit of work before

3351

* downconverting a lock. Allow that here. The worker function

3350

* downconverting a lock. Allow that here. The worker function

3352

* may sleep, so we save off a copy of what we're blocking as

3351

* may sleep, so we save off a copy of what we're blocking as

3353

* it may change while we're not holding the spin lock. */

3352

* it may change while we're not holding the spin lock. */

3354

blocking = lockres->l_blocking;

3353

blocking = lockres->l_blocking;

3355

spin_unlock_irqrestore(&lockres->l_lock, flags);

3354

spin_unlock_irqrestore(&lockres->l_lock, flags);

3356

3355

3357

ctl->unblock_action = lockres->l_ops->downconvert_worker(lockres, blocking);

3356

ctl->unblock_action = lockres->l_ops->downconvert_worker(lockres, blocking);

3358

3357

3359

if (ctl->unblock_action == UNBLOCK_STOP_POST)

3358

if (ctl->unblock_action == UNBLOCK_STOP_POST)

3360

goto leave;

3359

goto leave;

3361

3360

3362

spin_lock_irqsave(&lockres->l_lock, flags);

3361

spin_lock_irqsave(&lockres->l_lock, flags);

3363

if (blocking != lockres->l_blocking) {

3362

if (blocking != lockres->l_blocking) {

3364

/* If this changed underneath us, then we can't drop

3363

/* If this changed underneath us, then we can't drop

3365

* it just yet. */

3364

* it just yet. */

3366

goto recheck;

3365

goto recheck;

3367

}

3366

}

3368

3367

3369

downconvert:

3368

downconvert:

3370

ctl->requeue = 0;

3369

ctl->requeue = 0;

3371

3370

3372

if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB) {

3371

if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB) {

3373

if (lockres->l_level == DLM_LOCK_EX)

3372

if (lockres->l_level == DLM_LOCK_EX)

3374

set_lvb = 1;

3373

set_lvb = 1;

3375

3374

3376

/*

3375

/*

3377

* We only set the lvb if the lock has been fully

3376

* We only set the lvb if the lock has been fully

3378

* refreshed - otherwise we risk setting stale

3377

* refreshed - otherwise we risk setting stale

3379

* data. Otherwise, there's no need to actually clear

3378

* data. Otherwise, there's no need to actually clear

3380

* out the lvb here as it's value is still valid.

3379

* out the lvb here as it's value is still valid.

3381

*/

3380

*/

3382

if (set_lvb && !(lockres->l_flags & OCFS2_LOCK_NEEDS_REFRESH))

3381

if (set_lvb && !(lockres->l_flags & OCFS2_LOCK_NEEDS_REFRESH))

3383

lockres->l_ops->set_lvb(lockres);

3382

lockres->l_ops->set_lvb(lockres);

3384

}

3383

}

3385

3384

3386

gen = ocfs2_prepare_downconvert(lockres, new_level);

3385

gen = ocfs2_prepare_downconvert(lockres, new_level);

3387

spin_unlock_irqrestore(&lockres->l_lock, flags);

3386

spin_unlock_irqrestore(&lockres->l_lock, flags);

3388

ret = ocfs2_downconvert_lock(osb, lockres, new_level, set_lvb,

3387

ret = ocfs2_downconvert_lock(osb, lockres, new_level, set_lvb,

3389

gen);

3388

gen);

3390

3389

3391

leave:

3390

leave:

3392

mlog_exit(ret);

3391

mlog_exit(ret);

3393

return ret;

3392

return ret;

3394

3393

3395

leave_requeue:

3394

leave_requeue:

3396

spin_unlock_irqrestore(&lockres->l_lock, flags);

3395

spin_unlock_irqrestore(&lockres->l_lock, flags);

3397

ctl->requeue = 1;

3396

ctl->requeue = 1;

3398

3397

3399

mlog_exit(0);

3398

mlog_exit(0);

3400

return 0;

3399

return 0;

3401

}

3400

}

3402

3401

3403

static int ocfs2_data_convert_worker(struct ocfs2_lock_res *lockres,

3402

static int ocfs2_data_convert_worker(struct ocfs2_lock_res *lockres,

3404

int blocking)

3403

int blocking)

3405

{

3404

{

3406

struct inode *inode;

3405

struct inode *inode;

3407

struct address_space *mapping;

3406

struct address_space *mapping;

3408

3407

3409

inode = ocfs2_lock_res_inode(lockres);

3408

inode = ocfs2_lock_res_inode(lockres);

3410

mapping = inode->i_mapping;

3409

mapping = inode->i_mapping;

3411

3410

3412

if (!S_ISREG(inode->i_mode))

3411

if (!S_ISREG(inode->i_mode))

3413

goto out;

3412

goto out;

3414

3413

3415

/*

3414

/*

3416

* We need this before the filemap_fdatawrite() so that it can

3415

* We need this before the filemap_fdatawrite() so that it can

3417

* transfer the dirty bit from the PTE to the

3416

* transfer the dirty bit from the PTE to the

3418

* page. Unfortunately this means that even for EX->PR

3417

* page. Unfortunately this means that even for EX->PR

3419

* downconverts, we'll lose our mappings and have to build

3418

* downconverts, we'll lose our mappings and have to build

3420

* them up again.

3419

* them up again.

3421

*/

3420

*/

3422

unmap_mapping_range(mapping, 0, 0, 0);

3421

unmap_mapping_range(mapping, 0, 0, 0);

3423

3422

3424

if (filemap_fdatawrite(mapping)) {

3423

if (filemap_fdatawrite(mapping)) {

3425

mlog(ML_ERROR, "Could not sync inode %llu for downconvert!",

3424

mlog(ML_ERROR, "Could not sync inode %llu for downconvert!",

3426

(unsigned long long)OCFS2_I(inode)->ip_blkno);

3425

(unsigned long long)OCFS2_I(inode)->ip_blkno);

3427

}

3426

}

3428

sync_mapping_buffers(mapping);

3427

sync_mapping_buffers(mapping);

3429

if (blocking == DLM_LOCK_EX) {

3428

if (blocking == DLM_LOCK_EX) {

3430

truncate_inode_pages(mapping, 0);

3429

truncate_inode_pages(mapping, 0);

3431

} else {

3430

} else {

3432

/* We only need to wait on the I/O if we're not also

3431

/* We only need to wait on the I/O if we're not also

3433

* truncating pages because truncate_inode_pages waits

3432

* truncating pages because truncate_inode_pages waits

3434

* for us above. We don't truncate pages if we're

3433

* for us above. We don't truncate pages if we're

3435

* blocking anything < EXMODE because we want to keep

3434

* blocking anything < EXMODE because we want to keep

3436

* them around in that case. */

3435

* them around in that case. */

3437

filemap_fdatawait(mapping);

3436

filemap_fdatawait(mapping);

3438

}

3437

}

3439

3438

3440

out:

3439

out:

3441

return UNBLOCK_CONTINUE;

3440

return UNBLOCK_CONTINUE;

3442

}

3441

}

3443

3442

3444

static int ocfs2_check_meta_downconvert(struct ocfs2_lock_res *lockres,

3443

static int ocfs2_check_meta_downconvert(struct ocfs2_lock_res *lockres,

3445

int new_level)

3444

int new_level)

3446

{

3445

{

3447

struct inode *inode = ocfs2_lock_res_inode(lockres);

3446

struct inode *inode = ocfs2_lock_res_inode(lockres);

3448

int checkpointed = ocfs2_inode_fully_checkpointed(inode);

3447

int checkpointed = ocfs2_inode_fully_checkpointed(inode);

3449

3448

3450

BUG_ON(new_level != DLM_LOCK_NL && new_level != DLM_LOCK_PR);

3449

BUG_ON(new_level != DLM_LOCK_NL && new_level != DLM_LOCK_PR);

3451

BUG_ON(lockres->l_level != DLM_LOCK_EX && !checkpointed);

3450

BUG_ON(lockres->l_level != DLM_LOCK_EX && !checkpointed);

3452

3451

3453

if (checkpointed)

3452

if (checkpointed)

3454

return 1;

3453

return 1;

3455

3454

3456

ocfs2_start_checkpoint(OCFS2_SB(inode->i_sb));

3455

ocfs2_start_checkpoint(OCFS2_SB(inode->i_sb));

3457

return 0;

3456

return 0;

3458

}

3457

}

3459

3458

3460

static void ocfs2_set_meta_lvb(struct ocfs2_lock_res *lockres)

3459

static void ocfs2_set_meta_lvb(struct ocfs2_lock_res *lockres)

3461

{

3460

{

3462

struct inode *inode = ocfs2_lock_res_inode(lockres);

3461

struct inode *inode = ocfs2_lock_res_inode(lockres);

3463

3462

3464

__ocfs2_stuff_meta_lvb(inode);

3463

__ocfs2_stuff_meta_lvb(inode);

3465

}

3464

}

3466

3465

3467

/*

3466

/*

3468

* Does the final reference drop on our dentry lock. Right now this

3467

* Does the final reference drop on our dentry lock. Right now this

3469

* happens in the downconvert thread, but we could choose to simplify the

3468

* happens in the downconvert thread, but we could choose to simplify the

3470

* dlmglue API and push these off to the ocfs2_wq in the future.

3469

* dlmglue API and push these off to the ocfs2_wq in the future.

3471

*/

3470

*/

3472

static void ocfs2_dentry_post_unlock(struct ocfs2_super *osb,

3471

static void ocfs2_dentry_post_unlock(struct ocfs2_super *osb,

3473

struct ocfs2_lock_res *lockres)

3472

struct ocfs2_lock_res *lockres)

3474

{

3473

{

3475

struct ocfs2_dentry_lock *dl = ocfs2_lock_res_dl(lockres);

3474

struct ocfs2_dentry_lock *dl = ocfs2_lock_res_dl(lockres);

3476

ocfs2_dentry_lock_put(osb, dl);

3475

ocfs2_dentry_lock_put(osb, dl);

3477

}

3476

}

3478

3477

3479

/*

3478

/*

3480

* d_delete() matching dentries before the lock downconvert.

3479

* d_delete() matching dentries before the lock downconvert.

3481

*

3480

*

3482

* At this point, any process waiting to destroy the

3481

* At this point, any process waiting to destroy the

3483

* dentry_lock due to last ref count is stopped by the

3482

* dentry_lock due to last ref count is stopped by the

3484

* OCFS2_LOCK_QUEUED flag.

3483

* OCFS2_LOCK_QUEUED flag.

3485

*

3484

*

3486

* We have two potential problems

3485

* We have two potential problems

3487

*

3486

*

3488

* 1) If we do the last reference drop on our dentry_lock (via dput)

3487

* 1) If we do the last reference drop on our dentry_lock (via dput)

3489

* we'll wind up in ocfs2_release_dentry_lock(), waiting on

3488

* we'll wind up in ocfs2_release_dentry_lock(), waiting on

3490

* the downconvert to finish. Instead we take an elevated

3489

* the downconvert to finish. Instead we take an elevated

3491

* reference and push the drop until after we've completed our

3490

* reference and push the drop until after we've completed our

3492

* unblock processing.

3491

* unblock processing.

3493

*

3492

*

3494

* 2) There might be another process with a final reference,

3493

* 2) There might be another process with a final reference,

3495

* waiting on us to finish processing. If this is the case, we

3494

* waiting on us to finish processing. If this is the case, we

3496

* detect it and exit out - there's no more dentries anyway.

3495

* detect it and exit out - there's no more dentries anyway.

3497

*/

3496

*/

3498

static int ocfs2_dentry_convert_worker(struct ocfs2_lock_res *lockres,

3497

static int ocfs2_dentry_convert_worker(struct ocfs2_lock_res *lockres,

3499

int blocking)

3498

int blocking)

3500

{

3499

{

3501

struct ocfs2_dentry_lock *dl = ocfs2_lock_res_dl(lockres);

3500

struct ocfs2_dentry_lock *dl = ocfs2_lock_res_dl(lockres);

3502

struct ocfs2_inode_info *oi = OCFS2_I(dl->dl_inode);

3501

struct ocfs2_inode_info *oi = OCFS2_I(dl->dl_inode);

3503

struct dentry *dentry;

3502

struct dentry *dentry;

3504

unsigned long flags;

3503

unsigned long flags;

3505

int extra_ref = 0;

3504

int extra_ref = 0;

3506

3505

3507

/*

3506

/*

3508

* This node is blocking another node from getting a read

3507

* This node is blocking another node from getting a read

3509

* lock. This happens when we've renamed within a

3508

* lock. This happens when we've renamed within a

3510

* directory. We've forced the other nodes to d_delete(), but

3509

* directory. We've forced the other nodes to d_delete(), but

3511

* we never actually dropped our lock because it's still

3510

* we never actually dropped our lock because it's still

3512

* valid. The downconvert code will retain a PR for this node,

3511

* valid. The downconvert code will retain a PR for this node,

3513

* so there's no further work to do.

3512

* so there's no further work to do.

3514

*/

3513

*/

3515

if (blocking == DLM_LOCK_PR)

3514

if (blocking == DLM_LOCK_PR)

3516

return UNBLOCK_CONTINUE;

3515

return UNBLOCK_CONTINUE;

3517

3516

3518

/*

3517

/*

3519

* Mark this inode as potentially orphaned. The code in

3518

* Mark this inode as potentially orphaned. The code in

3520

* ocfs2_delete_inode() will figure out whether it actually

3519

* ocfs2_delete_inode() will figure out whether it actually

3521

* needs to be freed or not.

3520

* needs to be freed or not.

3522

*/

3521

*/

3523

spin_lock(&oi->ip_lock);

3522

spin_lock(&oi->ip_lock);

3524

oi->ip_flags |= OCFS2_INODE_MAYBE_ORPHANED;

3523

oi->ip_flags |= OCFS2_INODE_MAYBE_ORPHANED;

3525

spin_unlock(&oi->ip_lock);

3524

spin_unlock(&oi->ip_lock);

3526

3525

3527

/*

3526

/*

3528

* Yuck. We need to make sure however that the check of

3527

* Yuck. We need to make sure however that the check of

3529

* OCFS2_LOCK_FREEING and the extra reference are atomic with

3528

* OCFS2_LOCK_FREEING and the extra reference are atomic with

3530

* respect to a reference decrement or the setting of that

3529

* respect to a reference decrement or the setting of that

3531

* flag.

3530

* flag.

3532

*/

3531

*/

3533

spin_lock_irqsave(&lockres->l_lock, flags);

3532

spin_lock_irqsave(&lockres->l_lock, flags);

3534

spin_lock(&dentry_attach_lock);

3533

spin_lock(&dentry_attach_lock);

3535

if (!(lockres->l_flags & OCFS2_LOCK_FREEING)

3534

if (!(lockres->l_flags & OCFS2_LOCK_FREEING)

3536

&& dl->dl_count) {

3535

&& dl->dl_count) {

3537

dl->dl_count++;

3536

dl->dl_count++;

3538

extra_ref = 1;

3537

extra_ref = 1;

3539

}

3538

}

3540

spin_unlock(&dentry_attach_lock);

3539

spin_unlock(&dentry_attach_lock);

3541

spin_unlock_irqrestore(&lockres->l_lock, flags);

3540

spin_unlock_irqrestore(&lockres->l_lock, flags);

3542

3541

3543

mlog(0, "extra_ref = %d\n", extra_ref);

3542

mlog(0, "extra_ref = %d\n", extra_ref);

3544

3543

3545

/*

3544

/*

3546

* We have a process waiting on us in ocfs2_dentry_iput(),

3545

* We have a process waiting on us in ocfs2_dentry_iput(),

3547

* which means we can't have any more outstanding

3546

* which means we can't have any more outstanding

3548

* aliases. There's no need to do any more work.

3547

* aliases. There's no need to do any more work.

3549

*/

3548

*/

3550

if (!extra_ref)

3549

if (!extra_ref)

3551

return UNBLOCK_CONTINUE;

3550

return UNBLOCK_CONTINUE;

3552

3551

3553

spin_lock(&dentry_attach_lock);

3552

spin_lock(&dentry_attach_lock);

3554

while (1) {

3553

while (1) {

3555

dentry = ocfs2_find_local_alias(dl->dl_inode,

3554

dentry = ocfs2_find_local_alias(dl->dl_inode,

3556

dl->dl_parent_blkno, 1);

3555

dl->dl_parent_blkno, 1);

3557

if (!dentry)

3556

if (!dentry)

3558

break;

3557

break;

3559

spin_unlock(&dentry_attach_lock);

3558

spin_unlock(&dentry_attach_lock);

3560

3559

3561

mlog(0, "d_delete(%.*s);\n", dentry->d_name.len,

3560

mlog(0, "d_delete(%.*s);\n", dentry->d_name.len,

3562

dentry->d_name.name);

3561

dentry->d_name.name);

3563

3562

3564

/*

3563

/*

3565

* The following dcache calls may do an

3564

* The following dcache calls may do an

3566

* iput(). Normally we don't want that from the

3565

* iput(). Normally we don't want that from the

3567

* downconverting thread, but in this case it's ok

3566

* downconverting thread, but in this case it's ok

3568

* because the requesting node already has an

3567

* because the requesting node already has an

3569

* exclusive lock on the inode, so it can't be queued

3568

* exclusive lock on the inode, so it can't be queued

3570

* for a downconvert.

3569

* for a downconvert.

3571

*/

3570

*/

3572

d_delete(dentry);

3571

d_delete(dentry);

3573

dput(dentry);

3572

dput(dentry);

3574

3573

3575

spin_lock(&dentry_attach_lock);

3574

spin_lock(&dentry_attach_lock);

3576

}

3575

}

3577

spin_unlock(&dentry_attach_lock);

3576

spin_unlock(&dentry_attach_lock);

3578

3577

3579

/*

3578

/*

3580

* If we are the last holder of this dentry lock, there is no

3579

* If we are the last holder of this dentry lock, there is no

3581

* reason to downconvert so skip straight to the unlock.

3580

* reason to downconvert so skip straight to the unlock.

3582

*/

3581

*/

3583

if (dl->dl_count == 1)

3582

if (dl->dl_count == 1)

3584

return UNBLOCK_STOP_POST;

3583

return UNBLOCK_STOP_POST;

3585

3584

3586

return UNBLOCK_CONTINUE_POST;

3585

return UNBLOCK_CONTINUE_POST;

3587

}

3586

}

3588

3587

3589

static void ocfs2_set_qinfo_lvb(struct ocfs2_lock_res *lockres)

3588

static void ocfs2_set_qinfo_lvb(struct ocfs2_lock_res *lockres)

3590

{

3589

{

3591

struct ocfs2_qinfo_lvb *lvb;

3590

struct ocfs2_qinfo_lvb *lvb;

3592

struct ocfs2_mem_dqinfo *oinfo = ocfs2_lock_res_qinfo(lockres);

3591

struct ocfs2_mem_dqinfo *oinfo = ocfs2_lock_res_qinfo(lockres);

3593

struct mem_dqinfo *info = sb_dqinfo(oinfo->dqi_gi.dqi_sb,

3592

struct mem_dqinfo *info = sb_dqinfo(oinfo->dqi_gi.dqi_sb,

3594

oinfo->dqi_gi.dqi_type);

3593

oinfo->dqi_gi.dqi_type);

3595

3594

3596

mlog_entry_void();

3595

mlog_entry_void();

3597

3596

3598

lvb = ocfs2_dlm_lvb(&lockres->l_lksb);

3597

lvb = ocfs2_dlm_lvb(&lockres->l_lksb);

3599

lvb->lvb_version = OCFS2_QINFO_LVB_VERSION;

3598

lvb->lvb_version = OCFS2_QINFO_LVB_VERSION;

3600

lvb->lvb_bgrace = cpu_to_be32(info->dqi_bgrace);

3599

lvb->lvb_bgrace = cpu_to_be32(info->dqi_bgrace);

3601

lvb->lvb_igrace = cpu_to_be32(info->dqi_igrace);

3600

lvb->lvb_igrace = cpu_to_be32(info->dqi_igrace);

3602

lvb->lvb_syncms = cpu_to_be32(oinfo->dqi_syncms);

3601

lvb->lvb_syncms = cpu_to_be32(oinfo->dqi_syncms);

3603

lvb->lvb_blocks = cpu_to_be32(oinfo->dqi_gi.dqi_blocks);

3602

lvb->lvb_blocks = cpu_to_be32(oinfo->dqi_gi.dqi_blocks);

3604

lvb->lvb_free_blk = cpu_to_be32(oinfo->dqi_gi.dqi_free_blk);

3603

lvb->lvb_free_blk = cpu_to_be32(oinfo->dqi_gi.dqi_free_blk);

3605

lvb->lvb_free_entry = cpu_to_be32(oinfo->dqi_gi.dqi_free_entry);

3604

lvb->lvb_free_entry = cpu_to_be32(oinfo->dqi_gi.dqi_free_entry);

3606

3605

3607

mlog_exit_void();

3606

mlog_exit_void();

3608

}

3607

}

3609

3608

3610

void ocfs2_qinfo_unlock(struct ocfs2_mem_dqinfo *oinfo, int ex)

3609

void ocfs2_qinfo_unlock(struct ocfs2_mem_dqinfo *oinfo, int ex)

3611

{

3610

{

3612

struct ocfs2_lock_res *lockres = &oinfo->dqi_gqlock;

3611

struct ocfs2_lock_res *lockres = &oinfo->dqi_gqlock;

3613

struct ocfs2_super *osb = OCFS2_SB(oinfo->dqi_gi.dqi_sb);

3612

struct ocfs2_super *osb = OCFS2_SB(oinfo->dqi_gi.dqi_sb);

3614

int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;

3613

int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;

3615

3614

3616

mlog_entry_void();

3615

mlog_entry_void();

3617

if (!ocfs2_is_hard_readonly(osb) && !ocfs2_mount_local(osb))

3616

if (!ocfs2_is_hard_readonly(osb) && !ocfs2_mount_local(osb))

3618

ocfs2_cluster_unlock(osb, lockres, level);

3617

ocfs2_cluster_unlock(osb, lockres, level);

3619

mlog_exit_void();

3618

mlog_exit_void();

3620

}

3619

}

3621

3620

3622

static int ocfs2_refresh_qinfo(struct ocfs2_mem_dqinfo *oinfo)

3621

static int ocfs2_refresh_qinfo(struct ocfs2_mem_dqinfo *oinfo)

3623

{

3622

{

3624

struct mem_dqinfo *info = sb_dqinfo(oinfo->dqi_gi.dqi_sb,

3623

struct mem_dqinfo *info = sb_dqinfo(oinfo->dqi_gi.dqi_sb,

3625

oinfo->dqi_gi.dqi_type);

3624

oinfo->dqi_gi.dqi_type);

3626

struct ocfs2_lock_res *lockres = &oinfo->dqi_gqlock;

3625

struct ocfs2_lock_res *lockres = &oinfo->dqi_gqlock;

3627

struct ocfs2_qinfo_lvb *lvb = ocfs2_dlm_lvb(&lockres->l_lksb);

3626

struct ocfs2_qinfo_lvb *lvb = ocfs2_dlm_lvb(&lockres->l_lksb);

3628

struct buffer_head *bh = NULL;

3627

struct buffer_head *bh = NULL;

3629

struct ocfs2_global_disk_dqinfo *gdinfo;

3628

struct ocfs2_global_disk_dqinfo *gdinfo;

3630

int status = 0;

3629

int status = 0;

3631

3630

3632

if (ocfs2_dlm_lvb_valid(&lockres->l_lksb) &&

3631

if (ocfs2_dlm_lvb_valid(&lockres->l_lksb) &&

3633

lvb->lvb_version == OCFS2_QINFO_LVB_VERSION) {

3632

lvb->lvb_version == OCFS2_QINFO_LVB_VERSION) {

3634

info->dqi_bgrace = be32_to_cpu(lvb->lvb_bgrace);

3633

info->dqi_bgrace = be32_to_cpu(lvb->lvb_bgrace);

3635

info->dqi_igrace = be32_to_cpu(lvb->lvb_igrace);

3634

info->dqi_igrace = be32_to_cpu(lvb->lvb_igrace);

3636

oinfo->dqi_syncms = be32_to_cpu(lvb->lvb_syncms);

3635

oinfo->dqi_syncms = be32_to_cpu(lvb->lvb_syncms);

3637

oinfo->dqi_gi.dqi_blocks = be32_to_cpu(lvb->lvb_blocks);

3636

oinfo->dqi_gi.dqi_blocks = be32_to_cpu(lvb->lvb_blocks);

3638

oinfo->dqi_gi.dqi_free_blk = be32_to_cpu(lvb->lvb_free_blk);

3637

oinfo->dqi_gi.dqi_free_blk = be32_to_cpu(lvb->lvb_free_blk);

3639

oinfo->dqi_gi.dqi_free_entry =

3638

oinfo->dqi_gi.dqi_free_entry =

3640

be32_to_cpu(lvb->lvb_free_entry);

3639

be32_to_cpu(lvb->lvb_free_entry);

3641

} else {

3640

} else {

3642

status = ocfs2_read_quota_block(oinfo->dqi_gqinode, 0, &bh);

3641

status = ocfs2_read_quota_block(oinfo->dqi_gqinode, 0, &bh);

3643

if (status) {

3642

if (status) {

3644

mlog_errno(status);

3643

mlog_errno(status);

3645

goto bail;

3644

goto bail;

3646

}

3645

}

3647

gdinfo = (struct ocfs2_global_disk_dqinfo *)

3646

gdinfo = (struct ocfs2_global_disk_dqinfo *)

3648

(bh->b_data + OCFS2_GLOBAL_INFO_OFF);

3647

(bh->b_data + OCFS2_GLOBAL_INFO_OFF);

3649

info->dqi_bgrace = le32_to_cpu(gdinfo->dqi_bgrace);

3648

info->dqi_bgrace = le32_to_cpu(gdinfo->dqi_bgrace);

3650

info->dqi_igrace = le32_to_cpu(gdinfo->dqi_igrace);

3649

info->dqi_igrace = le32_to_cpu(gdinfo->dqi_igrace);

3651

oinfo->dqi_syncms = le32_to_cpu(gdinfo->dqi_syncms);

3650

oinfo->dqi_syncms = le32_to_cpu(gdinfo->dqi_syncms);

3652

oinfo->dqi_gi.dqi_blocks = le32_to_cpu(gdinfo->dqi_blocks);

3651

oinfo->dqi_gi.dqi_blocks = le32_to_cpu(gdinfo->dqi_blocks);

3653

oinfo->dqi_gi.dqi_free_blk = le32_to_cpu(gdinfo->dqi_free_blk);

3652

oinfo->dqi_gi.dqi_free_blk = le32_to_cpu(gdinfo->dqi_free_blk);

3654

oinfo->dqi_gi.dqi_free_entry =

3653

oinfo->dqi_gi.dqi_free_entry =

3655

le32_to_cpu(gdinfo->dqi_free_entry);

3654

le32_to_cpu(gdinfo->dqi_free_entry);

3656

brelse(bh);

3655

brelse(bh);

3657

ocfs2_track_lock_refresh(lockres);

3656

ocfs2_track_lock_refresh(lockres);

3658

}

3657

}

3659

3658

3660

bail:

3659

bail:

3661

return status;

3660

return status;

3662

}

3661

}

3663

3662

3664

/* Lock quota info, this function expects at least shared lock on the quota file

3663

/* Lock quota info, this function expects at least shared lock on the quota file

3665

* so that we can safely refresh quota info from disk. */

3664

* so that we can safely refresh quota info from disk. */

3666

int ocfs2_qinfo_lock(struct ocfs2_mem_dqinfo *oinfo, int ex)

3665

int ocfs2_qinfo_lock(struct ocfs2_mem_dqinfo *oinfo, int ex)

3667

{

3666

{

3668

struct ocfs2_lock_res *lockres = &oinfo->dqi_gqlock;

3667

struct ocfs2_lock_res *lockres = &oinfo->dqi_gqlock;

3669

struct ocfs2_super *osb = OCFS2_SB(oinfo->dqi_gi.dqi_sb);

3668

struct ocfs2_super *osb = OCFS2_SB(oinfo->dqi_gi.dqi_sb);

3670

int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;

3669

int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;

3671

int status = 0;

3670

int status = 0;

3672

3671

3673

mlog_entry_void();

3672

mlog_entry_void();

3674

3673

3675

/* On RO devices, locking really isn't needed... */

3674

/* On RO devices, locking really isn't needed... */

3676

if (ocfs2_is_hard_readonly(osb)) {

3675

if (ocfs2_is_hard_readonly(osb)) {

3677

if (ex)

3676

if (ex)

3678

status = -EROFS;

3677

status = -EROFS;

3679

goto bail;

3678

goto bail;

3680

}

3679

}

3681

if (ocfs2_mount_local(osb))

3680

if (ocfs2_mount_local(osb))

3682

goto bail;

3681

goto bail;

3683

3682

3684

status = ocfs2_cluster_lock(osb, lockres, level, 0, 0);

3683

status = ocfs2_cluster_lock(osb, lockres, level, 0, 0);

3685

if (status < 0) {

3684

if (status < 0) {

3686

mlog_errno(status);

3685

mlog_errno(status);

3687

goto bail;

3686

goto bail;

3688

}

3687

}

3689

if (!ocfs2_should_refresh_lock_res(lockres))

3688

if (!ocfs2_should_refresh_lock_res(lockres))

3690

goto bail;

3689

goto bail;

3691

/* OK, we have the lock but we need to refresh the quota info */

3690

/* OK, we have the lock but we need to refresh the quota info */

3692

status = ocfs2_refresh_qinfo(oinfo);

3691

status = ocfs2_refresh_qinfo(oinfo);

3693

if (status)

3692

if (status)

3694

ocfs2_qinfo_unlock(oinfo, ex);

3693

ocfs2_qinfo_unlock(oinfo, ex);

3695

ocfs2_complete_lock_res_refresh(lockres, status);

3694

ocfs2_complete_lock_res_refresh(lockres, status);

3696

bail:

3695

bail:

3697

mlog_exit(status);

3696

mlog_exit(status);

3698

return status;

3697

return status;

3699

}

3698

}

3700

3699

3701

/*

3700

/*

3702

* This is the filesystem locking protocol. It provides the lock handling

3701

* This is the filesystem locking protocol. It provides the lock handling

3703

* hooks for the underlying DLM. It has a maximum version number.

3702

* hooks for the underlying DLM. It has a maximum version number.

3704

* The version number allows interoperability with systems running at

3703

* The version number allows interoperability with systems running at

3705

* the same major number and an equal or smaller minor number.

3704

* the same major number and an equal or smaller minor number.

3706

*

3705

*

3707

* Whenever the filesystem does new things with locks (adds or removes a

3706

* Whenever the filesystem does new things with locks (adds or removes a

3708

* lock, orders them differently, does different things underneath a lock),

3707

* lock, orders them differently, does different things underneath a lock),

3709

* the version must be changed. The protocol is negotiated when joining

3708

* the version must be changed. The protocol is negotiated when joining

3710

* the dlm domain. A node may join the domain if its major version is

3709

* the dlm domain. A node may join the domain if its major version is

3711

* identical to all other nodes and its minor version is greater than

3710

* identical to all other nodes and its minor version is greater than

3712

* or equal to all other nodes. When its minor version is greater than

3711

* or equal to all other nodes. When its minor version is greater than

3713

* the other nodes, it will run at the minor version specified by the

3712

* the other nodes, it will run at the minor version specified by the

3714

* other nodes.

3713

* other nodes.

3715

*

3714

*

3716

* If a locking change is made that will not be compatible with older

3715

* If a locking change is made that will not be compatible with older

3717

* versions, the major number must be increased and the minor version set

3716

* versions, the major number must be increased and the minor version set

3718

* to zero. If a change merely adds a behavior that can be disabled when

3717

* to zero. If a change merely adds a behavior that can be disabled when

3719

* speaking to older versions, the minor version must be increased. If a

3718

* speaking to older versions, the minor version must be increased. If a

3720

* change adds a fully backwards compatible change (eg, LVB changes that

3719

* change adds a fully backwards compatible change (eg, LVB changes that

3721

* are just ignored by older versions), the version does not need to be

3720

* are just ignored by older versions), the version does not need to be

3722

* updated.

3721

* updated.

3723

*/

3722

*/

3724

static struct ocfs2_locking_protocol lproto = {

3723

static struct ocfs2_locking_protocol lproto = {

3725

.lp_max_version = {

3724

.lp_max_version = {

3726

.pv_major = OCFS2_LOCKING_PROTOCOL_MAJOR,

3725

.pv_major = OCFS2_LOCKING_PROTOCOL_MAJOR,

3727

.pv_minor = OCFS2_LOCKING_PROTOCOL_MINOR,

3726

.pv_minor = OCFS2_LOCKING_PROTOCOL_MINOR,

3728

},

3727

},

3729

.lp_lock_ast = ocfs2_locking_ast,

3728

.lp_lock_ast = ocfs2_locking_ast,

3730

.lp_blocking_ast = ocfs2_blocking_ast,

3729

.lp_blocking_ast = ocfs2_blocking_ast,

3731

.lp_unlock_ast = ocfs2_unlock_ast,

3730

.lp_unlock_ast = ocfs2_unlock_ast,

3732

};

3731

};

3733

3732

3734

void ocfs2_set_locking_protocol(void)

3733

void ocfs2_set_locking_protocol(void)

3735

{

3734

{

3736

ocfs2_stack_glue_set_locking_protocol(&lproto);

3735

ocfs2_stack_glue_set_locking_protocol(&lproto);

3737

}

3736

}

3738

3737

3739

3738

3740

static void ocfs2_process_blocked_lock(struct ocfs2_super *osb,

3739

static void ocfs2_process_blocked_lock(struct ocfs2_super *osb,

3741

struct ocfs2_lock_res *lockres)

3740

struct ocfs2_lock_res *lockres)

3742

{

3741

{

3743

int status;

3742

int status;

3744

struct ocfs2_unblock_ctl ctl = {0, 0,};

3743

struct ocfs2_unblock_ctl ctl = {0, 0,};

3745

unsigned long flags;

3744

unsigned long flags;

3746

3745

3747

/* Our reference to the lockres in this function can be

3746

/* Our reference to the lockres in this function can be

3748

* considered valid until we remove the OCFS2_LOCK_QUEUED

3747

* considered valid until we remove the OCFS2_LOCK_QUEUED

3749

* flag. */

3748

* flag. */

3750

3749

3751

mlog_entry_void();

3750

mlog_entry_void();

3752

3751

3753

BUG_ON(!lockres);

3752

BUG_ON(!lockres);

3754

BUG_ON(!lockres->l_ops);

3753

BUG_ON(!lockres->l_ops);

3755

3754

3756

mlog(0, "lockres %s blocked.\n", lockres->l_name);

3755

mlog(0, "lockres %s blocked.\n", lockres->l_name);

3757

3756

3758

/* Detect whether a lock has been marked as going away while

3757

/* Detect whether a lock has been marked as going away while

3759

* the downconvert thread was processing other things. A lock can

3758

* the downconvert thread was processing other things. A lock can

3760

* still be marked with OCFS2_LOCK_FREEING after this check,

3759

* still be marked with OCFS2_LOCK_FREEING after this check,

3761

* but short circuiting here will still save us some

3760

* but short circuiting here will still save us some

3762

* performance. */

3761

* performance. */

3763

spin_lock_irqsave(&lockres->l_lock, flags);

3762

spin_lock_irqsave(&lockres->l_lock, flags);

3764

if (lockres->l_flags & OCFS2_LOCK_FREEING)

3763

if (lockres->l_flags & OCFS2_LOCK_FREEING)

3765

goto unqueue;

3764

goto unqueue;

3766

spin_unlock_irqrestore(&lockres->l_lock, flags);

3765

spin_unlock_irqrestore(&lockres->l_lock, flags);

3767

3766

3768

status = ocfs2_unblock_lock(osb, lockres, &ctl);

3767

status = ocfs2_unblock_lock(osb, lockres, &ctl);

3769

if (status < 0)

3768

if (status < 0)

3770

mlog_errno(status);

3769

mlog_errno(status);

3771

3770

3772

spin_lock_irqsave(&lockres->l_lock, flags);

3771

spin_lock_irqsave(&lockres->l_lock, flags);

3773

unqueue:

3772

unqueue:

3774

if (lockres->l_flags & OCFS2_LOCK_FREEING || !ctl.requeue) {

3773

if (lockres->l_flags & OCFS2_LOCK_FREEING || !ctl.requeue) {

3775

lockres_clear_flags(lockres, OCFS2_LOCK_QUEUED);

3774

lockres_clear_flags(lockres, OCFS2_LOCK_QUEUED);

3776

} else

3775

} else

3777

ocfs2_schedule_blocked_lock(osb, lockres);

3776

ocfs2_schedule_blocked_lock(osb, lockres);

3778

3777

3779

mlog(0, "lockres %s, requeue = %s.\n", lockres->l_name,

3778

mlog(0, "lockres %s, requeue = %s.\n", lockres->l_name,

3780

ctl.requeue ? "yes" : "no");

3779

ctl.requeue ? "yes" : "no");

3781

spin_unlock_irqrestore(&lockres->l_lock, flags);

3780

spin_unlock_irqrestore(&lockres->l_lock, flags);

3782

3781

3783

if (ctl.unblock_action != UNBLOCK_CONTINUE

3782

if (ctl.unblock_action != UNBLOCK_CONTINUE

3784

&& lockres->l_ops->post_unlock)

3783

&& lockres->l_ops->post_unlock)

3785

lockres->l_ops->post_unlock(osb, lockres);

3784

lockres->l_ops->post_unlock(osb, lockres);

3786

3785

3787

mlog_exit_void();

3786

mlog_exit_void();

3788

}

3787

}

3789

3788

3790

static void ocfs2_schedule_blocked_lock(struct ocfs2_super *osb,

3789

static void ocfs2_schedule_blocked_lock(struct ocfs2_super *osb,

3791

struct ocfs2_lock_res *lockres)

3790

struct ocfs2_lock_res *lockres)

3792

{

3791

{

3793

mlog_entry_void();

3792

mlog_entry_void();

3794

3793

3795

assert_spin_locked(&lockres->l_lock);

3794

assert_spin_locked(&lockres->l_lock);

3796

3795

3797

if (lockres->l_flags & OCFS2_LOCK_FREEING) {

3796

if (lockres->l_flags & OCFS2_LOCK_FREEING) {

3798

/* Do not schedule a lock for downconvert when it's on

3797

/* Do not schedule a lock for downconvert when it's on

3799

* the way to destruction - any nodes wanting access

3798

* the way to destruction - any nodes wanting access

3800

* to the resource will get it soon. */

3799

* to the resource will get it soon. */

3801

mlog(0, "Lockres %s won't be scheduled: flags 0x%lx\n",

3800

mlog(0, "Lockres %s won't be scheduled: flags 0x%lx\n",

3802

lockres->l_name, lockres->l_flags);

3801

lockres->l_name, lockres->l_flags);

3803

return;

3802

return;

3804

}

3803

}

3805

3804

3806

lockres_or_flags(lockres, OCFS2_LOCK_QUEUED);

3805

lockres_or_flags(lockres, OCFS2_LOCK_QUEUED);

3807

3806

3808

spin_lock(&osb->dc_task_lock);

3807

spin_lock(&osb->dc_task_lock);

3809

if (list_empty(&lockres->l_blocked_list)) {

3808

if (list_empty(&lockres->l_blocked_list)) {

3810

list_add_tail(&lockres->l_blocked_list,

3809

list_add_tail(&lockres->l_blocked_list,

3811

&osb->blocked_lock_list);

3810

&osb->blocked_lock_list);

3812

osb->blocked_lock_count++;

3811

osb->blocked_lock_count++;

3813

}

3812

}

3814

spin_unlock(&osb->dc_task_lock);

3813

spin_unlock(&osb->dc_task_lock);

3815

3814

3816

mlog_exit_void();

3815

mlog_exit_void();

3817

}

3816

}

3818

3817

3819

static void ocfs2_downconvert_thread_do_work(struct ocfs2_super *osb)

3818

static void ocfs2_downconvert_thread_do_work(struct ocfs2_super *osb)

3820

{

3819

{

3821

unsigned long processed;

3820

unsigned long processed;

3822

struct ocfs2_lock_res *lockres;

3821

struct ocfs2_lock_res *lockres;

3823

3822

3824

mlog_entry_void();

3823

mlog_entry_void();

3825

3824

3826

spin_lock(&osb->dc_task_lock);

3825

spin_lock(&osb->dc_task_lock);

3827

/* grab this early so we know to try again if a state change and

3826

/* grab this early so we know to try again if a state change and

3828

* wake happens part-way through our work */

3827

* wake happens part-way through our work */

3829

osb->dc_work_sequence = osb->dc_wake_sequence;

3828

osb->dc_work_sequence = osb->dc_wake_sequence;

3830

3829

3831

processed = osb->blocked_lock_count;

3830

processed = osb->blocked_lock_count;

3832

while (processed) {

3831

while (processed) {

3833

BUG_ON(list_empty(&osb->blocked_lock_list));

3832

BUG_ON(list_empty(&osb->blocked_lock_list));

3834

3833

3835

lockres = list_entry(osb->blocked_lock_list.next,

3834

lockres = list_entry(osb->blocked_lock_list.next,

3836

struct ocfs2_lock_res, l_blocked_list);

3835

struct ocfs2_lock_res, l_blocked_list);

3837

list_del_init(&lockres->l_blocked_list);

3836

list_del_init(&lockres->l_blocked_list);

3838

osb->blocked_lock_count--;

3837

osb->blocked_lock_count--;

3839

spin_unlock(&osb->dc_task_lock);

3838

spin_unlock(&osb->dc_task_lock);

3840

3839

3841

BUG_ON(!processed);

3840

BUG_ON(!processed);

3842

processed--;

3841

processed--;

3843

3842

3844

ocfs2_process_blocked_lock(osb, lockres);

3843

ocfs2_process_blocked_lock(osb, lockres);

3845

3844

3846

spin_lock(&osb->dc_task_lock);

3845

spin_lock(&osb->dc_task_lock);

3847

}

3846

}

3848

spin_unlock(&osb->dc_task_lock);

3847

spin_unlock(&osb->dc_task_lock);

3849

3848

3850

mlog_exit_void();

3849

mlog_exit_void();

3851

}

3850

}

3852

3851

3853

static int ocfs2_downconvert_thread_lists_empty(struct ocfs2_super *osb)

3852

static int ocfs2_downconvert_thread_lists_empty(struct ocfs2_super *osb)

3854

{

3853

{

3855

int empty = 0;

3854

int empty = 0;

3856

3855

3857

spin_lock(&osb->dc_task_lock);

3856

spin_lock(&osb->dc_task_lock);

3858

if (list_empty(&osb->blocked_lock_list))

3857

if (list_empty(&osb->blocked_lock_list))

3859

empty = 1;

3858

empty = 1;

3860

3859

3861

spin_unlock(&osb->dc_task_lock);

3860

spin_unlock(&osb->dc_task_lock);

3862

return empty;

3861

return empty;

3863

}

3862

}

3864

3863

3865

static int ocfs2_downconvert_thread_should_wake(struct ocfs2_super *osb)

3864

static int ocfs2_downconvert_thread_should_wake(struct ocfs2_super *osb)

3866

{

3865

{

3867

int should_wake = 0;

3866

int should_wake = 0;

3868

3867

3869

spin_lock(&osb->dc_task_lock);

3868

spin_lock(&osb->dc_task_lock);

3870

if (osb->dc_work_sequence != osb->dc_wake_sequence)

3869

if (osb->dc_work_sequence != osb->dc_wake_sequence)

3871

should_wake = 1;

3870

should_wake = 1;

3872

spin_unlock(&osb->dc_task_lock);

3871

spin_unlock(&osb->dc_task_lock);

3873

3872

3874

return should_wake;

3873

return should_wake;

3875

}

3874

}

3876

3875

3877

static int ocfs2_downconvert_thread(void *arg)

3876

static int ocfs2_downconvert_thread(void *arg)

3878

{

3877

{

3879

int status = 0;

3878

int status = 0;

3880

struct ocfs2_super *osb = arg;

3879

struct ocfs2_super *osb = arg;

3881

3880

3882

/* only quit once we've been asked to stop and there is no more

3881

/* only quit once we've been asked to stop and there is no more

3883

* work available */

3882

* work available */

3884

while (!(kthread_should_stop() &&

3883

while (!(kthread_should_stop() &&

3885

ocfs2_downconvert_thread_lists_empty(osb))) {

3884

ocfs2_downconvert_thread_lists_empty(osb))) {

3886

3885

3887

wait_event_interruptible(osb->dc_event,

3886

wait_event_interruptible(osb->dc_event,

3888

ocfs2_downconvert_thread_should_wake(osb) ||

3887

ocfs2_downconvert_thread_should_wake(osb) ||

3889

kthread_should_stop());

3888

kthread_should_stop());

3890

3889

3891

mlog(0, "downconvert_thread: awoken\n");

3890

mlog(0, "downconvert_thread: awoken\n");

3892

3891

3893

ocfs2_downconvert_thread_do_work(osb);

3892

ocfs2_downconvert_thread_do_work(osb);

3894

}

3893

}

3895

3894

3896

osb->dc_task = NULL;

3895

osb->dc_task = NULL;

3897

return status;

3896

return status;

3898

}

3897

}

3899

3898

3900

void ocfs2_wake_downconvert_thread(struct ocfs2_super *osb)

3899

void ocfs2_wake_downconvert_thread(struct ocfs2_super *osb)

3901

{

3900

{

3902

spin_lock(&osb->dc_task_lock);

3901

spin_lock(&osb->dc_task_lock);

3903

/* make sure the voting thread gets a swipe at whatever changes

3902

/* make sure the voting thread gets a swipe at whatever changes

3904

* the caller may have made to the voting state */

3903

* the caller may have made to the voting state */

3905

osb->dc_wake_sequence++;

3904

osb->dc_wake_sequence++;

3906

spin_unlock(&osb->dc_task_lock);

3905

spin_unlock(&osb->dc_task_lock);

GITLAB

ocfs2: Do not initialize lvb in ocfs2_orphan_scan_lock_res_init()

 /* -*- mode: c; c-basic-offset: 8; -*-
  * vim: noexpandtab sw=8 ts=8 sts=0:
  *
  * dlmglue.c
  *
  * Code which implements an OCFS2 specific interface to our DLM.
  *
  * Copyright (C) 2003, 2004 Oracle.  All rights reserved.
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public
  * License as published by the Free Software Foundation; either
  * version 2 of the License, or (at your option) any later version.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * General Public License for more details.
  *
  * You should have received a copy of the GNU General Public
  * License along with this program; if not, write to the
  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  * Boston, MA 021110-1307, USA.
  */
 #include <linux/types.h>
 #include <linux/slab.h>
 #include <linux/highmem.h>
 #include <linux/mm.h>
 #include <linux/kthread.h>
 #include <linux/pagemap.h>
 #include <linux/debugfs.h>
 #include <linux/seq_file.h>
 #include <linux/time.h>
 #include <linux/quotaops.h>
 #define MLOG_MASK_PREFIX ML_DLM_GLUE
 #include <cluster/masklog.h>
 #include "ocfs2.h"
 #include "ocfs2_lockingver.h"
 #include "alloc.h"
 #include "dcache.h"
 #include "dlmglue.h"
 #include "extent_map.h"
 #include "file.h"
 #include "heartbeat.h"
 #include "inode.h"
 #include "journal.h"
 #include "stackglue.h"
 #include "slot_map.h"
 #include "super.h"
 #include "uptodate.h"
 #include "quota.h"
 #include "buffer_head_io.h"
 struct ocfs2_mask_waiter {
 	struct list_head	mw_item;
 	int			mw_status;
 	struct completion	mw_complete;
 	unsigned long		mw_mask;
 	unsigned long		mw_goal;
 #ifdef CONFIG_OCFS2_FS_STATS
 	unsigned long long 	mw_lock_start;
 #endif
 };
 static struct ocfs2_super *ocfs2_get_dentry_osb(struct ocfs2_lock_res *lockres);
 static struct ocfs2_super *ocfs2_get_inode_osb(struct ocfs2_lock_res *lockres);
 static struct ocfs2_super *ocfs2_get_file_osb(struct ocfs2_lock_res *lockres);
 static struct ocfs2_super *ocfs2_get_qinfo_osb(struct ocfs2_lock_res *lockres);
 /*
  * Return value from ->downconvert_worker functions.
  *
  * These control the precise actions of ocfs2_unblock_lock()
  * and ocfs2_process_blocked_lock()
  *
  */
 enum ocfs2_unblock_action {
 	UNBLOCK_CONTINUE	= 0, /* Continue downconvert */
 	UNBLOCK_CONTINUE_POST	= 1, /* Continue downconvert, fire
 				      * ->post_unlock callback */
 	UNBLOCK_STOP_POST	= 2, /* Do not downconvert, fire
 				      * ->post_unlock() callback. */
 };
 struct ocfs2_unblock_ctl {
 	int requeue;
 	enum ocfs2_unblock_action unblock_action;
 };
 static int ocfs2_check_meta_downconvert(struct ocfs2_lock_res *lockres,
 					int new_level);
 static void ocfs2_set_meta_lvb(struct ocfs2_lock_res *lockres);
 static int ocfs2_data_convert_worker(struct ocfs2_lock_res *lockres,
 				     int blocking);
 static int ocfs2_dentry_convert_worker(struct ocfs2_lock_res *lockres,
 				       int blocking);
 static void ocfs2_dentry_post_unlock(struct ocfs2_super *osb,
 				     struct ocfs2_lock_res *lockres);
 static void ocfs2_set_qinfo_lvb(struct ocfs2_lock_res *lockres);
 #define mlog_meta_lvb(__level, __lockres) ocfs2_dump_meta_lvb_info(__level, __PRETTY_FUNCTION__, __LINE__, __lockres)
 /* This aids in debugging situations where a bad LVB might be involved. */
 static void ocfs2_dump_meta_lvb_info(u64 level,
 				     const char *function,
 				     unsigned int line,
 				     struct ocfs2_lock_res *lockres)
 {
 	struct ocfs2_meta_lvb *lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
 	mlog(level, "LVB information for %s (called from %s:%u):\n",
 	     lockres->l_name, function, line);
 	mlog(level, "version: %u, clusters: %u, generation: 0x%x\n",
 	     lvb->lvb_version, be32_to_cpu(lvb->lvb_iclusters),
 	     be32_to_cpu(lvb->lvb_igeneration));
 	mlog(level, "size: %llu, uid %u, gid %u, mode 0x%x\n",
 	     (unsigned long long)be64_to_cpu(lvb->lvb_isize),
 	     be32_to_cpu(lvb->lvb_iuid), be32_to_cpu(lvb->lvb_igid),
 	     be16_to_cpu(lvb->lvb_imode));
 	mlog(level, "nlink %u, atime_packed 0x%llx, ctime_packed 0x%llx, "
 	     "mtime_packed 0x%llx iattr 0x%x\n", be16_to_cpu(lvb->lvb_inlink),
 	     (long long)be64_to_cpu(lvb->lvb_iatime_packed),
 	     (long long)be64_to_cpu(lvb->lvb_ictime_packed),
 	     (long long)be64_to_cpu(lvb->lvb_imtime_packed),
 	     be32_to_cpu(lvb->lvb_iattr));
 }
 /*
  * OCFS2 Lock Resource Operations
  *
  * These fine tune the behavior of the generic dlmglue locking infrastructure.
  *
  * The most basic of lock types can point ->l_priv to their respective
  * struct ocfs2_super and allow the default actions to manage things.
  *
  * Right now, each lock type also needs to implement an init function,
  * and trivial lock/unlock wrappers. ocfs2_simple_drop_lockres()
  * should be called when the lock is no longer needed (i.e., object
  * destruction time).
  */
 struct ocfs2_lock_res_ops {
 	/*
 	 * Translate an ocfs2_lock_res * into an ocfs2_super *. Define
 	 * this callback if ->l_priv is not an ocfs2_super pointer
 	 */
 	struct ocfs2_super * (*get_osb)(struct ocfs2_lock_res *);
 	/*
 	 * Optionally called in the downconvert thread after a
 	 * successful downconvert. The lockres will not be referenced
 	 * after this callback is called, so it is safe to free
 	 * memory, etc.
 	 *
 	 * The exact semantics of when this is called are controlled
 	 * by ->downconvert_worker()
 	 */
 	void (*post_unlock)(struct ocfs2_super *, struct ocfs2_lock_res *);
 	/*
 	 * Allow a lock type to add checks to determine whether it is
 	 * safe to downconvert a lock. Return 0 to re-queue the
 	 * downconvert at a later time, nonzero to continue.
 	 *
 	 * For most locks, the default checks that there are no
 	 * incompatible holders are sufficient.
 	 *
 	 * Called with the lockres spinlock held.
 	 */
 	int (*check_downconvert)(struct ocfs2_lock_res *, int);
 	/*
 	 * Allows a lock type to populate the lock value block. This
 	 * is called on downconvert, and when we drop a lock.
 	 *
 	 * Locks that want to use this should set LOCK_TYPE_USES_LVB
 	 * in the flags field.
 	 *
 	 * Called with the lockres spinlock held.
 	 */
 	void (*set_lvb)(struct ocfs2_lock_res *);
 	/*
 	 * Called from the downconvert thread when it is determined
 	 * that a lock will be downconverted. This is called without
 	 * any locks held so the function can do work that might
 	 * schedule (syncing out data, etc).
 	 *
 	 * This should return any one of the ocfs2_unblock_action
 	 * values, depending on what it wants the thread to do.
 	 */
 	int (*downconvert_worker)(struct ocfs2_lock_res *, int);
 	/*
 	 * LOCK_TYPE_* flags which describe the specific requirements
 	 * of a lock type. Descriptions of each individual flag follow.
 	 */
 	int flags;
 };
 /*
  * Some locks want to "refresh" potentially stale data when a
  * meaningful (PRMODE or EXMODE) lock level is first obtained. If this
  * flag is set, the OCFS2_LOCK_NEEDS_REFRESH flag will be set on the
  * individual lockres l_flags member from the ast function. It is
  * expected that the locking wrapper will clear the
  * OCFS2_LOCK_NEEDS_REFRESH flag when done.
  */
 #define LOCK_TYPE_REQUIRES_REFRESH 0x1
 /*
  * Indicate that a lock type makes use of the lock value block. The
  * ->set_lvb lock type callback must be defined.
  */
 #define LOCK_TYPE_USES_LVB		0x2
 static struct ocfs2_lock_res_ops ocfs2_inode_rw_lops = {
 	.get_osb	= ocfs2_get_inode_osb,
 	.flags		= 0,
 };
 static struct ocfs2_lock_res_ops ocfs2_inode_inode_lops = {
 	.get_osb	= ocfs2_get_inode_osb,
 	.check_downconvert = ocfs2_check_meta_downconvert,
 	.set_lvb	= ocfs2_set_meta_lvb,
 	.downconvert_worker = ocfs2_data_convert_worker,
 	.flags		= LOCK_TYPE_REQUIRES_REFRESH|LOCK_TYPE_USES_LVB,
 };
 static struct ocfs2_lock_res_ops ocfs2_super_lops = {
 	.flags		= LOCK_TYPE_REQUIRES_REFRESH,
 };
 static struct ocfs2_lock_res_ops ocfs2_rename_lops = {
 	.flags		= 0,
 };
 static struct ocfs2_lock_res_ops ocfs2_nfs_sync_lops = {
 	.flags		= 0,
 };
 static struct ocfs2_lock_res_ops ocfs2_orphan_scan_lops = {
 	.flags		= LOCK_TYPE_REQUIRES_REFRESH|LOCK_TYPE_USES_LVB,
 };
 static struct ocfs2_lock_res_ops ocfs2_dentry_lops = {
 	.get_osb	= ocfs2_get_dentry_osb,
 	.post_unlock	= ocfs2_dentry_post_unlock,
 	.downconvert_worker = ocfs2_dentry_convert_worker,
 	.flags		= 0,
 };
 static struct ocfs2_lock_res_ops ocfs2_inode_open_lops = {
 	.get_osb	= ocfs2_get_inode_osb,
 	.flags		= 0,
 };
 static struct ocfs2_lock_res_ops ocfs2_flock_lops = {
 	.get_osb	= ocfs2_get_file_osb,
 	.flags		= 0,
 };
 static struct ocfs2_lock_res_ops ocfs2_qinfo_lops = {
 	.set_lvb	= ocfs2_set_qinfo_lvb,
 	.get_osb	= ocfs2_get_qinfo_osb,
 	.flags		= LOCK_TYPE_REQUIRES_REFRESH | LOCK_TYPE_USES_LVB,
 };
 static inline int ocfs2_is_inode_lock(struct ocfs2_lock_res *lockres)
 {
 	return lockres->l_type == OCFS2_LOCK_TYPE_META ||
 		lockres->l_type == OCFS2_LOCK_TYPE_RW ||
 		lockres->l_type == OCFS2_LOCK_TYPE_OPEN;
 }
 static inline struct inode *ocfs2_lock_res_inode(struct ocfs2_lock_res *lockres)
 {
 	BUG_ON(!ocfs2_is_inode_lock(lockres));
 	return (struct inode *) lockres->l_priv;
 }
 static inline struct ocfs2_dentry_lock *ocfs2_lock_res_dl(struct ocfs2_lock_res *lockres)
 {
 	BUG_ON(lockres->l_type != OCFS2_LOCK_TYPE_DENTRY);
 	return (struct ocfs2_dentry_lock *)lockres->l_priv;
 }
 static inline struct ocfs2_mem_dqinfo *ocfs2_lock_res_qinfo(struct ocfs2_lock_res *lockres)
 {
 	BUG_ON(lockres->l_type != OCFS2_LOCK_TYPE_QINFO);
 	return (struct ocfs2_mem_dqinfo *)lockres->l_priv;
 }
 static inline struct ocfs2_super *ocfs2_get_lockres_osb(struct ocfs2_lock_res *lockres)
 {
 	if (lockres->l_ops->get_osb)
 		return lockres->l_ops->get_osb(lockres);
 	return (struct ocfs2_super *)lockres->l_priv;
 }
 static int ocfs2_lock_create(struct ocfs2_super *osb,
 			     struct ocfs2_lock_res *lockres,
 			     int level,
 			     u32 dlm_flags);
 static inline int ocfs2_may_continue_on_blocked_lock(struct ocfs2_lock_res *lockres,
 						     int wanted);
 static void ocfs2_cluster_unlock(struct ocfs2_super *osb,
 				 struct ocfs2_lock_res *lockres,
 				 int level);
 static inline void ocfs2_generic_handle_downconvert_action(struct ocfs2_lock_res *lockres);
 static inline void ocfs2_generic_handle_convert_action(struct ocfs2_lock_res *lockres);
 static inline void ocfs2_generic_handle_attach_action(struct ocfs2_lock_res *lockres);
 static int ocfs2_generic_handle_bast(struct ocfs2_lock_res *lockres, int level);
 static void ocfs2_schedule_blocked_lock(struct ocfs2_super *osb,
 					struct ocfs2_lock_res *lockres);
 static inline void ocfs2_recover_from_dlm_error(struct ocfs2_lock_res *lockres,
 						int convert);
 #define ocfs2_log_dlm_error(_func, _err, _lockres) do {					\
 	if ((_lockres)->l_type != OCFS2_LOCK_TYPE_DENTRY)				\
 		mlog(ML_ERROR, "DLM error %d while calling %s on resource %s\n",	\
 		     _err, _func, _lockres->l_name);					\
 	else										\
 		mlog(ML_ERROR, "DLM error %d while calling %s on resource %.*s%08x\n",	\
 		     _err, _func, OCFS2_DENTRY_LOCK_INO_START - 1, (_lockres)->l_name,	\
 		     (unsigned int)ocfs2_get_dentry_lock_ino(_lockres));		\
 } while (0)
 static int ocfs2_downconvert_thread(void *arg);
 static void ocfs2_downconvert_on_unlock(struct ocfs2_super *osb,
 					struct ocfs2_lock_res *lockres);
 static int ocfs2_inode_lock_update(struct inode *inode,
 				  struct buffer_head **bh);
 static void ocfs2_drop_osb_locks(struct ocfs2_super *osb);
 static inline int ocfs2_highest_compat_lock_level(int level);
 static unsigned int ocfs2_prepare_downconvert(struct ocfs2_lock_res *lockres,
 					      int new_level);
 static int ocfs2_downconvert_lock(struct ocfs2_super *osb,
 				  struct ocfs2_lock_res *lockres,
 				  int new_level,
 				  int lvb,
 				  unsigned int generation);
 static int ocfs2_prepare_cancel_convert(struct ocfs2_super *osb,
 				        struct ocfs2_lock_res *lockres);
 static int ocfs2_cancel_convert(struct ocfs2_super *osb,
 				struct ocfs2_lock_res *lockres);
 static void ocfs2_build_lock_name(enum ocfs2_lock_type type,
 				  u64 blkno,
 				  u32 generation,
 				  char *name)
 {
 	int len;
 	mlog_entry_void();
 	BUG_ON(type >= OCFS2_NUM_LOCK_TYPES);
 	len = snprintf(name, OCFS2_LOCK_ID_MAX_LEN, "%c%s%016llx%08x",
 		       ocfs2_lock_type_char(type), OCFS2_LOCK_ID_PAD,
 		       (long long)blkno, generation);
 	BUG_ON(len != (OCFS2_LOCK_ID_MAX_LEN - 1));
 	mlog(0, "built lock resource with name: %s\n", name);
 	mlog_exit_void();
 }
 static DEFINE_SPINLOCK(ocfs2_dlm_tracking_lock);
 static void ocfs2_add_lockres_tracking(struct ocfs2_lock_res *res,
 				       struct ocfs2_dlm_debug *dlm_debug)
 {
 	mlog(0, "Add tracking for lockres %s\n", res->l_name);
 	spin_lock(&ocfs2_dlm_tracking_lock);
 	list_add(&res->l_debug_list, &dlm_debug->d_lockres_tracking);
 	spin_unlock(&ocfs2_dlm_tracking_lock);
 }
 static void ocfs2_remove_lockres_tracking(struct ocfs2_lock_res *res)
 {
 	spin_lock(&ocfs2_dlm_tracking_lock);
 	if (!list_empty(&res->l_debug_list))
 		list_del_init(&res->l_debug_list);
 	spin_unlock(&ocfs2_dlm_tracking_lock);
 }
 #ifdef CONFIG_OCFS2_FS_STATS
 static void ocfs2_init_lock_stats(struct ocfs2_lock_res *res)
 {
 	res->l_lock_num_prmode = 0;
 	res->l_lock_num_prmode_failed = 0;
 	res->l_lock_total_prmode = 0;
 	res->l_lock_max_prmode = 0;
 	res->l_lock_num_exmode = 0;
 	res->l_lock_num_exmode_failed = 0;
 	res->l_lock_total_exmode = 0;
 	res->l_lock_max_exmode = 0;
 	res->l_lock_refresh = 0;
 }
 static void ocfs2_update_lock_stats(struct ocfs2_lock_res *res, int level,
 				    struct ocfs2_mask_waiter *mw, int ret)
 {
 	unsigned long long *num, *sum;
 	unsigned int *max, *failed;
 	struct timespec ts = current_kernel_time();
 	unsigned long long time = timespec_to_ns(&ts) - mw->mw_lock_start;
 	if (level == LKM_PRMODE) {
 		num = &res->l_lock_num_prmode;
 		sum = &res->l_lock_total_prmode;
 		max = &res->l_lock_max_prmode;
 		failed = &res->l_lock_num_prmode_failed;
 	} else if (level == LKM_EXMODE) {
 		num = &res->l_lock_num_exmode;
 		sum = &res->l_lock_total_exmode;
 		max = &res->l_lock_max_exmode;
 		failed = &res->l_lock_num_exmode_failed;
 	} else
 		return;
 	(*num)++;
 	(*sum) += time;
 	if (time > *max)
 		*max = time;
 	if (ret)
 		(*failed)++;
 }
 static inline void ocfs2_track_lock_refresh(struct ocfs2_lock_res *lockres)
 {
 	lockres->l_lock_refresh++;
 }
 static inline void ocfs2_init_start_time(struct ocfs2_mask_waiter *mw)
 {
 	struct timespec ts = current_kernel_time();
 	mw->mw_lock_start = timespec_to_ns(&ts);
 }
 #else
 static inline void ocfs2_init_lock_stats(struct ocfs2_lock_res *res)
 {
 }
 static inline void ocfs2_update_lock_stats(struct ocfs2_lock_res *res,
 			   int level, struct ocfs2_mask_waiter *mw, int ret)
 {
 }
 static inline void ocfs2_track_lock_refresh(struct ocfs2_lock_res *lockres)
 {
 }
 static inline void ocfs2_init_start_time(struct ocfs2_mask_waiter *mw)
 {
 }
 #endif
 static void ocfs2_lock_res_init_common(struct ocfs2_super *osb,
 				       struct ocfs2_lock_res *res,
 				       enum ocfs2_lock_type type,
 				       struct ocfs2_lock_res_ops *ops,
 				       void *priv)
 {
 	res->l_type          = type;
 	res->l_ops           = ops;
 	res->l_priv          = priv;
 	res->l_level         = DLM_LOCK_IV;
 	res->l_requested     = DLM_LOCK_IV;
 	res->l_blocking      = DLM_LOCK_IV;
 	res->l_action        = OCFS2_AST_INVALID;
 	res->l_unlock_action = OCFS2_UNLOCK_INVALID;
 	res->l_flags         = OCFS2_LOCK_INITIALIZED;
 	ocfs2_add_lockres_tracking(res, osb->osb_dlm_debug);
 	ocfs2_init_lock_stats(res);
 }
 void ocfs2_lock_res_init_once(struct ocfs2_lock_res *res)
 {
 	/* This also clears out the lock status block */
 	memset(res, 0, sizeof(struct ocfs2_lock_res));
 	spin_lock_init(&res->l_lock);
 	init_waitqueue_head(&res->l_event);
 	INIT_LIST_HEAD(&res->l_blocked_list);
 	INIT_LIST_HEAD(&res->l_mask_waiters);
 }
 void ocfs2_inode_lock_res_init(struct ocfs2_lock_res *res,
 			       enum ocfs2_lock_type type,
 			       unsigned int generation,
 			       struct inode *inode)
 {
 	struct ocfs2_lock_res_ops *ops;
 	switch(type) {
 		case OCFS2_LOCK_TYPE_RW:
 			ops = &ocfs2_inode_rw_lops;
 			break;
 		case OCFS2_LOCK_TYPE_META:
 			ops = &ocfs2_inode_inode_lops;
 			break;
 		case OCFS2_LOCK_TYPE_OPEN:
 			ops = &ocfs2_inode_open_lops;
 			break;
 		default:
 			mlog_bug_on_msg(1, "type: %d\n", type);
 			ops = NULL; /* thanks, gcc */
 			break;
 	};
 	ocfs2_build_lock_name(type, OCFS2_I(inode)->ip_blkno,
 			      generation, res->l_name);
 	ocfs2_lock_res_init_common(OCFS2_SB(inode->i_sb), res, type, ops, inode);
 }
 static struct ocfs2_super *ocfs2_get_inode_osb(struct ocfs2_lock_res *lockres)
 {
 	struct inode *inode = ocfs2_lock_res_inode(lockres);
 	return OCFS2_SB(inode->i_sb);
 }
 static struct ocfs2_super *ocfs2_get_qinfo_osb(struct ocfs2_lock_res *lockres)
 {
 	struct ocfs2_mem_dqinfo *info = lockres->l_priv;
 	return OCFS2_SB(info->dqi_gi.dqi_sb);
 }
 static struct ocfs2_super *ocfs2_get_file_osb(struct ocfs2_lock_res *lockres)
 {
 	struct ocfs2_file_private *fp = lockres->l_priv;
 	return OCFS2_SB(fp->fp_file->f_mapping->host->i_sb);
 }
 static __u64 ocfs2_get_dentry_lock_ino(struct ocfs2_lock_res *lockres)
 {
 	__be64 inode_blkno_be;
 	memcpy(&inode_blkno_be, &lockres->l_name[OCFS2_DENTRY_LOCK_INO_START],
 	       sizeof(__be64));
 	return be64_to_cpu(inode_blkno_be);
 }
 static struct ocfs2_super *ocfs2_get_dentry_osb(struct ocfs2_lock_res *lockres)
 {
 	struct ocfs2_dentry_lock *dl = lockres->l_priv;
 	return OCFS2_SB(dl->dl_inode->i_sb);
 }
 void ocfs2_dentry_lock_res_init(struct ocfs2_dentry_lock *dl,
 				u64 parent, struct inode *inode)
 {
 	int len;
 	u64 inode_blkno = OCFS2_I(inode)->ip_blkno;
 	__be64 inode_blkno_be = cpu_to_be64(inode_blkno);
 	struct ocfs2_lock_res *lockres = &dl->dl_lockres;
 	ocfs2_lock_res_init_once(lockres);
 	/*
 	 * Unfortunately, the standard lock naming scheme won't work
 	 * here because we have two 16 byte values to use. Instead,
 	 * we'll stuff the inode number as a binary value. We still
 	 * want error prints to show something without garbling the
 	 * display, so drop a null byte in there before the inode
 	 * number. A future version of OCFS2 will likely use all
 	 * binary lock names. The stringified names have been a
 	 * tremendous aid in debugging, but now that the debugfs
 	 * interface exists, we can mangle things there if need be.
 	 *
 	 * NOTE: We also drop the standard "pad" value (the total lock
 	 * name size stays the same though - the last part is all
 	 * zeros due to the memset in ocfs2_lock_res_init_once()
 	 */
 	len = snprintf(lockres->l_name, OCFS2_DENTRY_LOCK_INO_START,
 		       "%c%016llx",
 		       ocfs2_lock_type_char(OCFS2_LOCK_TYPE_DENTRY),
 		       (long long)parent);
 	BUG_ON(len != (OCFS2_DENTRY_LOCK_INO_START - 1));
 	memcpy(&lockres->l_name[OCFS2_DENTRY_LOCK_INO_START], &inode_blkno_be,
 	       sizeof(__be64));
 	ocfs2_lock_res_init_common(OCFS2_SB(inode->i_sb), lockres,
 				   OCFS2_LOCK_TYPE_DENTRY, &ocfs2_dentry_lops,
 				   dl);
 }
 static void ocfs2_super_lock_res_init(struct ocfs2_lock_res *res,
 				      struct ocfs2_super *osb)
 {
 	/* Superblock lockres doesn't come from a slab so we call init
 	 * once on it manually.  */
 	ocfs2_lock_res_init_once(res);
 	ocfs2_build_lock_name(OCFS2_LOCK_TYPE_SUPER, OCFS2_SUPER_BLOCK_BLKNO,
 			      0, res->l_name);
 	ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_SUPER,
 				   &ocfs2_super_lops, osb);
 }
 static void ocfs2_rename_lock_res_init(struct ocfs2_lock_res *res,
 				       struct ocfs2_super *osb)
 {
 	/* Rename lockres doesn't come from a slab so we call init
 	 * once on it manually.  */
 	ocfs2_lock_res_init_once(res);
 	ocfs2_build_lock_name(OCFS2_LOCK_TYPE_RENAME, 0, 0, res->l_name);
 	ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_RENAME,
 				   &ocfs2_rename_lops, osb);
 }
 static void ocfs2_nfs_sync_lock_res_init(struct ocfs2_lock_res *res,
 					 struct ocfs2_super *osb)
 {
 	/* nfs_sync lockres doesn't come from a slab so we call init
 	 * once on it manually.  */
 	ocfs2_lock_res_init_once(res);
 	ocfs2_build_lock_name(OCFS2_LOCK_TYPE_NFS_SYNC, 0, 0, res->l_name);
 	ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_NFS_SYNC,
 				   &ocfs2_nfs_sync_lops, osb);
 }
 static void ocfs2_orphan_scan_lock_res_init(struct ocfs2_lock_res *res,
 					    struct ocfs2_super *osb)
 {
-	struct ocfs2_orphan_scan_lvb *lvb;
 	ocfs2_lock_res_init_once(res);
 	ocfs2_build_lock_name(OCFS2_LOCK_TYPE_ORPHAN_SCAN, 0, 0, res->l_name);
 	ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_ORPHAN_SCAN,
 				   &ocfs2_orphan_scan_lops, osb);
-	lvb = ocfs2_dlm_lvb(&res->l_lksb);
-	lvb->lvb_version = OCFS2_ORPHAN_LVB_VERSION;
 }
 void ocfs2_file_lock_res_init(struct ocfs2_lock_res *lockres,
 			      struct ocfs2_file_private *fp)
 {
 	struct inode *inode = fp->fp_file->f_mapping->host;
 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
 	ocfs2_lock_res_init_once(lockres);
 	ocfs2_build_lock_name(OCFS2_LOCK_TYPE_FLOCK, oi->ip_blkno,
 			      inode->i_generation, lockres->l_name);
 	ocfs2_lock_res_init_common(OCFS2_SB(inode->i_sb), lockres,
 				   OCFS2_LOCK_TYPE_FLOCK, &ocfs2_flock_lops,
 				   fp);
 	lockres->l_flags |= OCFS2_LOCK_NOCACHE;
 }
 void ocfs2_qinfo_lock_res_init(struct ocfs2_lock_res *lockres,
 			       struct ocfs2_mem_dqinfo *info)
 {
 	ocfs2_lock_res_init_once(lockres);
 	ocfs2_build_lock_name(OCFS2_LOCK_TYPE_QINFO, info->dqi_gi.dqi_type,
 			      0, lockres->l_name);
 	ocfs2_lock_res_init_common(OCFS2_SB(info->dqi_gi.dqi_sb), lockres,
 				   OCFS2_LOCK_TYPE_QINFO, &ocfs2_qinfo_lops,
 				   info);
 }
 void ocfs2_lock_res_free(struct ocfs2_lock_res *res)
 {
 	mlog_entry_void();
 	if (!(res->l_flags & OCFS2_LOCK_INITIALIZED))
 		return;
 	ocfs2_remove_lockres_tracking(res);
 	mlog_bug_on_msg(!list_empty(&res->l_blocked_list),
 			"Lockres %s is on the blocked list\n",
 			res->l_name);
 	mlog_bug_on_msg(!list_empty(&res->l_mask_waiters),
 			"Lockres %s has mask waiters pending\n",
 			res->l_name);
 	mlog_bug_on_msg(spin_is_locked(&res->l_lock),
 			"Lockres %s is locked\n",
 			res->l_name);
 	mlog_bug_on_msg(res->l_ro_holders,
 			"Lockres %s has %u ro holders\n",
 			res->l_name, res->l_ro_holders);
 	mlog_bug_on_msg(res->l_ex_holders,
 			"Lockres %s has %u ex holders\n",
 			res->l_name, res->l_ex_holders);
 	/* Need to clear out the lock status block for the dlm */
 	memset(&res->l_lksb, 0, sizeof(res->l_lksb));
 	res->l_flags = 0UL;
 	mlog_exit_void();
 }
 static inline void ocfs2_inc_holders(struct ocfs2_lock_res *lockres,
 				     int level)
 {
 	mlog_entry_void();
 	BUG_ON(!lockres);
 	switch(level) {
 	case DLM_LOCK_EX:
 		lockres->l_ex_holders++;
 		break;
 	case DLM_LOCK_PR:
 		lockres->l_ro_holders++;
 		break;
 	default:
 		BUG();
 	}
 	mlog_exit_void();
 }
 static inline void ocfs2_dec_holders(struct ocfs2_lock_res *lockres,
 				     int level)
 {
 	mlog_entry_void();
 	BUG_ON(!lockres);
 	switch(level) {
 	case DLM_LOCK_EX:
 		BUG_ON(!lockres->l_ex_holders);
 		lockres->l_ex_holders--;
 		break;
 	case DLM_LOCK_PR:
 		BUG_ON(!lockres->l_ro_holders);
 		lockres->l_ro_holders--;
 		break;
 	default:
 		BUG();
 	}
 	mlog_exit_void();
 }
 /* WARNING: This function lives in a world where the only three lock
  * levels are EX, PR, and NL. It *will* have to be adjusted when more
  * lock types are added. */
 static inline int ocfs2_highest_compat_lock_level(int level)
 {
 	int new_level = DLM_LOCK_EX;
 	if (level == DLM_LOCK_EX)
 		new_level = DLM_LOCK_NL;
 	else if (level == DLM_LOCK_PR)
 		new_level = DLM_LOCK_PR;
 	return new_level;
 }
 static void lockres_set_flags(struct ocfs2_lock_res *lockres,
 			      unsigned long newflags)
 {
 	struct ocfs2_mask_waiter *mw, *tmp;
  	assert_spin_locked(&lockres->l_lock);
 	lockres->l_flags = newflags;
 	list_for_each_entry_safe(mw, tmp, &lockres->l_mask_waiters, mw_item) {
 		if ((lockres->l_flags & mw->mw_mask) != mw->mw_goal)
 			continue;
 		list_del_init(&mw->mw_item);
 		mw->mw_status = 0;
 		complete(&mw->mw_complete);
 	}
 }
 static void lockres_or_flags(struct ocfs2_lock_res *lockres, unsigned long or)
 {
 	lockres_set_flags(lockres, lockres->l_flags | or);
 }
 static void lockres_clear_flags(struct ocfs2_lock_res *lockres,
 				unsigned long clear)
 {
 	lockres_set_flags(lockres, lockres->l_flags & ~clear);
 }
 static inline void ocfs2_generic_handle_downconvert_action(struct ocfs2_lock_res *lockres)
 {
 	mlog_entry_void();
 	BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BUSY));
 	BUG_ON(!(lockres->l_flags & OCFS2_LOCK_ATTACHED));
 	BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BLOCKED));
 	BUG_ON(lockres->l_blocking <= DLM_LOCK_NL);
 	lockres->l_level = lockres->l_requested;
 	if (lockres->l_level <=
 	    ocfs2_highest_compat_lock_level(lockres->l_blocking)) {
 		lockres->l_blocking = DLM_LOCK_NL;
 		lockres_clear_flags(lockres, OCFS2_LOCK_BLOCKED);
 	}
 	lockres_clear_flags(lockres, OCFS2_LOCK_BUSY);
 	mlog_exit_void();
 }
 static inline void ocfs2_generic_handle_convert_action(struct ocfs2_lock_res *lockres)
 {
 	mlog_entry_void();
 	BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BUSY));
 	BUG_ON(!(lockres->l_flags & OCFS2_LOCK_ATTACHED));
 	/* Convert from RO to EX doesn't really need anything as our
 	 * information is already up to data. Convert from NL to
 	 * *anything* however should mark ourselves as needing an
 	 * update */
 	if (lockres->l_level == DLM_LOCK_NL &&
 	    lockres->l_ops->flags & LOCK_TYPE_REQUIRES_REFRESH)
 		lockres_or_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH);
 	lockres->l_level = lockres->l_requested;
 	lockres_clear_flags(lockres, OCFS2_LOCK_BUSY);
 	mlog_exit_void();
 }
 static inline void ocfs2_generic_handle_attach_action(struct ocfs2_lock_res *lockres)
 {
 	mlog_entry_void();
 	BUG_ON((!(lockres->l_flags & OCFS2_LOCK_BUSY)));
 	BUG_ON(lockres->l_flags & OCFS2_LOCK_ATTACHED);
 	if (lockres->l_requested > DLM_LOCK_NL &&
 	    !(lockres->l_flags & OCFS2_LOCK_LOCAL) &&
 	    lockres->l_ops->flags & LOCK_TYPE_REQUIRES_REFRESH)
 		lockres_or_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH);
 	lockres->l_level = lockres->l_requested;
 	lockres_or_flags(lockres, OCFS2_LOCK_ATTACHED);
 	lockres_clear_flags(lockres, OCFS2_LOCK_BUSY);
 	mlog_exit_void();
 }
 static int ocfs2_generic_handle_bast(struct ocfs2_lock_res *lockres,
 				     int level)
 {
 	int needs_downconvert = 0;
 	mlog_entry_void();
 	assert_spin_locked(&lockres->l_lock);
 	lockres_or_flags(lockres, OCFS2_LOCK_BLOCKED);
 	if (level > lockres->l_blocking) {
 		/* only schedule a downconvert if we haven't already scheduled
 		 * one that goes low enough to satisfy the level we're
 		 * blocking.  this also catches the case where we get
 		 * duplicate BASTs */
 		if (ocfs2_highest_compat_lock_level(level) <
 		    ocfs2_highest_compat_lock_level(lockres->l_blocking))
 			needs_downconvert = 1;
 		lockres->l_blocking = level;
 	}
 	mlog_exit(needs_downconvert);
 	return needs_downconvert;
 }
 /*
  * OCFS2_LOCK_PENDING and l_pending_gen.
  *
  * Why does OCFS2_LOCK_PENDING exist?  To close a race between setting
  * OCFS2_LOCK_BUSY and calling ocfs2_dlm_lock().  See ocfs2_unblock_lock()
  * for more details on the race.
  *
  * OCFS2_LOCK_PENDING closes the race quite nicely.  However, it introduces
  * a race on itself.  In o2dlm, we can get the ast before ocfs2_dlm_lock()
  * returns.  The ast clears OCFS2_LOCK_BUSY, and must therefore clear
  * OCFS2_LOCK_PENDING at the same time.  When ocfs2_dlm_lock() returns,
  * the caller is going to try to clear PENDING again.  If nothing else is
  * happening, __lockres_clear_pending() sees PENDING is unset and does
  * nothing.
  *
  * But what if another path (eg downconvert thread) has just started a
  * new locking action?  The other path has re-set PENDING.  Our path
  * cannot clear PENDING, because that will re-open the original race
  * window.
  *
  * [Example]
  *
  * ocfs2_meta_lock()
  *  ocfs2_cluster_lock()
  *   set BUSY
  *   set PENDING
  *   drop l_lock
  *   ocfs2_dlm_lock()
  *    ocfs2_locking_ast()		ocfs2_downconvert_thread()
  *     clear PENDING			 ocfs2_unblock_lock()
  *					  take_l_lock
  *					  !BUSY
  *					  ocfs2_prepare_downconvert()
  *					   set BUSY
  *					   set PENDING
  *					  drop l_lock
  *   take l_lock
  *   clear PENDING
  *   drop l_lock
  *			<window>
  *					  ocfs2_dlm_lock()
  *
  * So as you can see, we now have a window where l_lock is not held,
  * PENDING is not set, and ocfs2_dlm_lock() has not been called.
  *
  * The core problem is that ocfs2_cluster_lock() has cleared the PENDING
  * set by ocfs2_prepare_downconvert().  That wasn't nice.
  *
  * To solve this we introduce l_pending_gen.  A call to
  * lockres_clear_pending() will only do so when it is passed a generation
  * number that matches the lockres.  lockres_set_pending() will return the
  * current generation number.  When ocfs2_cluster_lock() goes to clear
  * PENDING, it passes the generation it got from set_pending().  In our
  * example above, the generation numbers will *not* match.  Thus,
  * ocfs2_cluster_lock() will not clear the PENDING set by
  * ocfs2_prepare_downconvert().
  */
 /* Unlocked version for ocfs2_locking_ast() */
 static void __lockres_clear_pending(struct ocfs2_lock_res *lockres,
 				    unsigned int generation,
 				    struct ocfs2_super *osb)
 {
 	assert_spin_locked(&lockres->l_lock);
 	/*
 	 * The ast and locking functions can race us here.  The winner
 	 * will clear pending, the loser will not.
 	 */
 	if (!(lockres->l_flags & OCFS2_LOCK_PENDING) ||
 	    (lockres->l_pending_gen != generation))
 		return;
 	lockres_clear_flags(lockres, OCFS2_LOCK_PENDING);
 	lockres->l_pending_gen++;
 	/*
 	 * The downconvert thread may have skipped us because we
 	 * were PENDING.  Wake it up.
 	 */
 	if (lockres->l_flags & OCFS2_LOCK_BLOCKED)
 		ocfs2_wake_downconvert_thread(osb);
 }
 /* Locked version for callers of ocfs2_dlm_lock() */
 static void lockres_clear_pending(struct ocfs2_lock_res *lockres,
 				  unsigned int generation,
 				  struct ocfs2_super *osb)
 {
 	unsigned long flags;
 	spin_lock_irqsave(&lockres->l_lock, flags);
 	__lockres_clear_pending(lockres, generation, osb);
 	spin_unlock_irqrestore(&lockres->l_lock, flags);
 }
 static unsigned int lockres_set_pending(struct ocfs2_lock_res *lockres)
 {
 	assert_spin_locked(&lockres->l_lock);
 	BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BUSY));
 	lockres_or_flags(lockres, OCFS2_LOCK_PENDING);
 	return lockres->l_pending_gen;
 }
 static void ocfs2_blocking_ast(void *opaque, int level)
 {
 	struct ocfs2_lock_res *lockres = opaque;
 	struct ocfs2_super *osb = ocfs2_get_lockres_osb(lockres);
 	int needs_downconvert;
 	unsigned long flags;
 	BUG_ON(level <= DLM_LOCK_NL);
 	mlog(0, "BAST fired for lockres %s, blocking %d, level %d type %s\n",
 	     lockres->l_name, level, lockres->l_level,
 	     ocfs2_lock_type_string(lockres->l_type));
 	/*
 	 * We can skip the bast for locks which don't enable caching -
 	 * they'll be dropped at the earliest possible time anyway.
 	 */
 	if (lockres->l_flags & OCFS2_LOCK_NOCACHE)
 		return;
 	spin_lock_irqsave(&lockres->l_lock, flags);
 	needs_downconvert = ocfs2_generic_handle_bast(lockres, level);
 	if (needs_downconvert)
 		ocfs2_schedule_blocked_lock(osb, lockres);
 	spin_unlock_irqrestore(&lockres->l_lock, flags);
 	wake_up(&lockres->l_event);
 	ocfs2_wake_downconvert_thread(osb);
 }
 static void ocfs2_locking_ast(void *opaque)
 {
 	struct ocfs2_lock_res *lockres = opaque;
 	struct ocfs2_super *osb = ocfs2_get_lockres_osb(lockres);
 	unsigned long flags;
 	int status;
 	spin_lock_irqsave(&lockres->l_lock, flags);
 	status = ocfs2_dlm_lock_status(&lockres->l_lksb);
 	if (status == -EAGAIN) {
 		lockres_clear_flags(lockres, OCFS2_LOCK_BUSY);
 		goto out;
 	}
 	if (status) {
 		mlog(ML_ERROR, "lockres %s: lksb status value of %d!\n",
 		     lockres->l_name, status);
 		spin_unlock_irqrestore(&lockres->l_lock, flags);
 		return;
 	}
 	switch(lockres->l_action) {
 	case OCFS2_AST_ATTACH:
 		ocfs2_generic_handle_attach_action(lockres);
 		lockres_clear_flags(lockres, OCFS2_LOCK_LOCAL);
 		break;
 	case OCFS2_AST_CONVERT:
 		ocfs2_generic_handle_convert_action(lockres);
 		break;
 	case OCFS2_AST_DOWNCONVERT:
 		ocfs2_generic_handle_downconvert_action(lockres);
 		break;
 	default:
 		mlog(ML_ERROR, "lockres %s: ast fired with invalid action: %u "
 		     "lockres flags = 0x%lx, unlock action: %u\n",
 		     lockres->l_name, lockres->l_action, lockres->l_flags,
 		     lockres->l_unlock_action);
 		BUG();
 	}
 out:
 	/* set it to something invalid so if we get called again we
 	 * can catch it. */
 	lockres->l_action = OCFS2_AST_INVALID;
 	/* Did we try to cancel this lock?  Clear that state */
 	if (lockres->l_unlock_action == OCFS2_UNLOCK_CANCEL_CONVERT)
 		lockres->l_unlock_action = OCFS2_UNLOCK_INVALID;
 	/*
 	 * We may have beaten the locking functions here.  We certainly
 	 * know that dlm_lock() has been called :-)
 	 * Because we can't have two lock calls in flight at once, we
 	 * can use lockres->l_pending_gen.
 	 */
 	__lockres_clear_pending(lockres, lockres->l_pending_gen,  osb);
 	wake_up(&lockres->l_event);
 	spin_unlock_irqrestore(&lockres->l_lock, flags);
 }
 static inline void ocfs2_recover_from_dlm_error(struct ocfs2_lock_res *lockres,
 						int convert)
 {
 	unsigned long flags;
 	mlog_entry_void();
 	spin_lock_irqsave(&lockres->l_lock, flags);
 	lockres_clear_flags(lockres, OCFS2_LOCK_BUSY);
 	if (convert)
 		lockres->l_action = OCFS2_AST_INVALID;
 	else
 		lockres->l_unlock_action = OCFS2_UNLOCK_INVALID;
 	spin_unlock_irqrestore(&lockres->l_lock, flags);
 	wake_up(&lockres->l_event);
 	mlog_exit_void();
 }
 /* Note: If we detect another process working on the lock (i.e.,
  * OCFS2_LOCK_BUSY), we'll bail out returning 0. It's up to the caller
  * to do the right thing in that case.
  */
 static int ocfs2_lock_create(struct ocfs2_super *osb,
 			     struct ocfs2_lock_res *lockres,
 			     int level,
 			     u32 dlm_flags)
 {
 	int ret = 0;
 	unsigned long flags;
 	unsigned int gen;
 	mlog_entry_void();
 	mlog(0, "lock %s, level = %d, flags = %u\n", lockres->l_name, level,
 	     dlm_flags);
 	spin_lock_irqsave(&lockres->l_lock, flags);
 	if ((lockres->l_flags & OCFS2_LOCK_ATTACHED) ||
 	    (lockres->l_flags & OCFS2_LOCK_BUSY)) {
 		spin_unlock_irqrestore(&lockres->l_lock, flags);
 		goto bail;
 	}
 	lockres->l_action = OCFS2_AST_ATTACH;
 	lockres->l_requested = level;
 	lockres_or_flags(lockres, OCFS2_LOCK_BUSY);
 	gen = lockres_set_pending(lockres);
 	spin_unlock_irqrestore(&lockres->l_lock, flags);
 	ret = ocfs2_dlm_lock(osb->cconn,
 			     level,
 			     &lockres->l_lksb,
 			     dlm_flags,
 			     lockres->l_name,
 			     OCFS2_LOCK_ID_MAX_LEN - 1,
 			     lockres);
 	lockres_clear_pending(lockres, gen, osb);
 	if (ret) {
 		ocfs2_log_dlm_error("ocfs2_dlm_lock", ret, lockres);
 		ocfs2_recover_from_dlm_error(lockres, 1);
 	}
 	mlog(0, "lock %s, return from ocfs2_dlm_lock\n", lockres->l_name);
 bail:
 	mlog_exit(ret);
 	return ret;
 }
 static inline int ocfs2_check_wait_flag(struct ocfs2_lock_res *lockres,
 					int flag)
 {
 	unsigned long flags;
 	int ret;
 	spin_lock_irqsave(&lockres->l_lock, flags);
 	ret = lockres->l_flags & flag;
 	spin_unlock_irqrestore(&lockres->l_lock, flags);
 	return ret;
 }
 static inline void ocfs2_wait_on_busy_lock(struct ocfs2_lock_res *lockres)
 {
 	wait_event(lockres->l_event,
 		   !ocfs2_check_wait_flag(lockres, OCFS2_LOCK_BUSY));
 }
 static inline void ocfs2_wait_on_refreshing_lock(struct ocfs2_lock_res *lockres)
 {
 	wait_event(lockres->l_event,
 		   !ocfs2_check_wait_flag(lockres, OCFS2_LOCK_REFRESHING));
 }
 /* predict what lock level we'll be dropping down to on behalf
  * of another node, and return true if the currently wanted
  * level will be compatible with it. */
 static inline int ocfs2_may_continue_on_blocked_lock(struct ocfs2_lock_res *lockres,
 						     int wanted)
 {
 	BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BLOCKED));
 	return wanted <= ocfs2_highest_compat_lock_level(lockres->l_blocking);
 }
 static void ocfs2_init_mask_waiter(struct ocfs2_mask_waiter *mw)
 {
 	INIT_LIST_HEAD(&mw->mw_item);
 	init_completion(&mw->mw_complete);
 	ocfs2_init_start_time(mw);
 }
 static int ocfs2_wait_for_mask(struct ocfs2_mask_waiter *mw)
 {
 	wait_for_completion(&mw->mw_complete);
 	/* Re-arm the completion in case we want to wait on it again */
 	INIT_COMPLETION(mw->mw_complete);
 	return mw->mw_status;
 }
 static void lockres_add_mask_waiter(struct ocfs2_lock_res *lockres,
 				    struct ocfs2_mask_waiter *mw,
 				    unsigned long mask,
 				    unsigned long goal)
 {
 	BUG_ON(!list_empty(&mw->mw_item));
 	assert_spin_locked(&lockres->l_lock);
 	list_add_tail(&mw->mw_item, &lockres->l_mask_waiters);
 	mw->mw_mask = mask;
 	mw->mw_goal = goal;
 }
 /* returns 0 if the mw that was removed was already satisfied, -EBUSY
  * if the mask still hadn't reached its goal */
 static int lockres_remove_mask_waiter(struct ocfs2_lock_res *lockres,
 				      struct ocfs2_mask_waiter *mw)
 {
 	unsigned long flags;
 	int ret = 0;
 	spin_lock_irqsave(&lockres->l_lock, flags);
 	if (!list_empty(&mw->mw_item)) {
 		if ((lockres->l_flags & mw->mw_mask) != mw->mw_goal)
 			ret = -EBUSY;
 		list_del_init(&mw->mw_item);
 		init_completion(&mw->mw_complete);
 	}
 	spin_unlock_irqrestore(&lockres->l_lock, flags);
 	return ret;
 }
 static int ocfs2_wait_for_mask_interruptible(struct ocfs2_mask_waiter *mw,
 					     struct ocfs2_lock_res *lockres)
 {
 	int ret;
 	ret = wait_for_completion_interruptible(&mw->mw_complete);
 	if (ret)
 		lockres_remove_mask_waiter(lockres, mw);
 	else
 		ret = mw->mw_status;
 	/* Re-arm the completion in case we want to wait on it again */
 	INIT_COMPLETION(mw->mw_complete);
 	return ret;
 }
 static int ocfs2_cluster_lock(struct ocfs2_super *osb,
 			      struct ocfs2_lock_res *lockres,
 			      int level,
 			      u32 lkm_flags,
 			      int arg_flags)
 {
 	struct ocfs2_mask_waiter mw;
 	int wait, catch_signals = !(osb->s_mount_opt & OCFS2_MOUNT_NOINTR);
 	int ret = 0; /* gcc doesn't realize wait = 1 guarantees ret is set */
 	unsigned long flags;
 	unsigned int gen;
 	int noqueue_attempted = 0;
 	mlog_entry_void();
 	ocfs2_init_mask_waiter(&mw);
 	if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB)
 		lkm_flags |= DLM_LKF_VALBLK;
 again:
 	wait = 0;
 	if (catch_signals && signal_pending(current)) {
 		ret = -ERESTARTSYS;
 		goto out;
 	}
 	spin_lock_irqsave(&lockres->l_lock, flags);
 	mlog_bug_on_msg(lockres->l_flags & OCFS2_LOCK_FREEING,
 			"Cluster lock called on freeing lockres %s! flags "
 			"0x%lx\n", lockres->l_name, lockres->l_flags);
 	/* We only compare against the currently granted level
 	 * here. If the lock is blocked waiting on a downconvert,
 	 * we'll get caught below. */
 	if (lockres->l_flags & OCFS2_LOCK_BUSY &&
 	    level > lockres->l_level) {
 		/* is someone sitting in dlm_lock? If so, wait on
 		 * them. */
 		lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0);
 		wait = 1;
 		goto unlock;
 	}
 	if (lockres->l_flags & OCFS2_LOCK_BLOCKED &&
 	    !ocfs2_may_continue_on_blocked_lock(lockres, level)) {
 		/* is the lock is currently blocked on behalf of
 		 * another node */
 		lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BLOCKED, 0);
 		wait = 1;
 		goto unlock;
 	}
 	if (level > lockres->l_level) {
 		if (noqueue_attempted > 0) {
 			ret = -EAGAIN;
 			goto unlock;
 		}
 		if (lkm_flags & DLM_LKF_NOQUEUE)
 			noqueue_attempted = 1;
 		if (lockres->l_action != OCFS2_AST_INVALID)
 			mlog(ML_ERROR, "lockres %s has action %u pending\n",
 			     lockres->l_name, lockres->l_action);
 		if (!(lockres->l_flags & OCFS2_LOCK_ATTACHED)) {
 			lockres->l_action = OCFS2_AST_ATTACH;
 			lkm_flags &= ~DLM_LKF_CONVERT;
 		} else {
 			lockres->l_action = OCFS2_AST_CONVERT;
 			lkm_flags |= DLM_LKF_CONVERT;
 		}
 		lockres->l_requested = level;
 		lockres_or_flags(lockres, OCFS2_LOCK_BUSY);
 		gen = lockres_set_pending(lockres);
 		spin_unlock_irqrestore(&lockres->l_lock, flags);
 		BUG_ON(level == DLM_LOCK_IV);
 		BUG_ON(level == DLM_LOCK_NL);
 		mlog(0, "lock %s, convert from %d to level = %d\n",
 		     lockres->l_name, lockres->l_level, level);
 		/* call dlm_lock to upgrade lock now */
 		ret = ocfs2_dlm_lock(osb->cconn,
 				     level,
 				     &lockres->l_lksb,
 				     lkm_flags,
 				     lockres->l_name,
 				     OCFS2_LOCK_ID_MAX_LEN - 1,
 				     lockres);
 		lockres_clear_pending(lockres, gen, osb);
 		if (ret) {
 			if (!(lkm_flags & DLM_LKF_NOQUEUE) ||
 			    (ret != -EAGAIN)) {
 				ocfs2_log_dlm_error("ocfs2_dlm_lock",
 						    ret, lockres);
 			}
 			ocfs2_recover_from_dlm_error(lockres, 1);
 			goto out;
 		}
 		mlog(0, "lock %s, successful return from ocfs2_dlm_lock\n",
 		     lockres->l_name);
 		/* At this point we've gone inside the dlm and need to
 		 * complete our work regardless. */
 		catch_signals = 0;
 		/* wait for busy to clear and carry on */
 		goto again;
 	}
 	/* Ok, if we get here then we're good to go. */
 	ocfs2_inc_holders(lockres, level);
 	ret = 0;
 unlock:
 	spin_unlock_irqrestore(&lockres->l_lock, flags);
 out:
 	/*
 	 * This is helping work around a lock inversion between the page lock
 	 * and dlm locks.  One path holds the page lock while calling aops
 	 * which block acquiring dlm locks.  The voting thread holds dlm
 	 * locks while acquiring page locks while down converting data locks.
 	 * This block is helping an aop path notice the inversion and back
 	 * off to unlock its page lock before trying the dlm lock again.
 	 */
 	if (wait && arg_flags & OCFS2_LOCK_NONBLOCK &&
 	    mw.mw_mask & (OCFS2_LOCK_BUSY|OCFS2_LOCK_BLOCKED)) {
 		wait = 0;
 		if (lockres_remove_mask_waiter(lockres, &mw))
 			ret = -EAGAIN;
 		else
 			goto again;
 	}
 	if (wait) {
 		ret = ocfs2_wait_for_mask(&mw);
 		if (ret == 0)
 			goto again;
 		mlog_errno(ret);
 	}
 	ocfs2_update_lock_stats(lockres, level, &mw, ret);
 	mlog_exit(ret);
 	return ret;
 }
 static void ocfs2_cluster_unlock(struct ocfs2_super *osb,
 				 struct ocfs2_lock_res *lockres,
 				 int level)
 {
 	unsigned long flags;
 	mlog_entry_void();
 	spin_lock_irqsave(&lockres->l_lock, flags);
 	ocfs2_dec_holders(lockres, level);
 	ocfs2_downconvert_on_unlock(osb, lockres);
 	spin_unlock_irqrestore(&lockres->l_lock, flags);
 	mlog_exit_void();
 }
 static int ocfs2_create_new_lock(struct ocfs2_super *osb,
 				 struct ocfs2_lock_res *lockres,
 				 int ex,
 				 int local)
 {
 	int level =  ex ? DLM_LOCK_EX : DLM_LOCK_PR;
 	unsigned long flags;
 	u32 lkm_flags = local ? DLM_LKF_LOCAL : 0;
 	spin_lock_irqsave(&lockres->l_lock, flags);
 	BUG_ON(lockres->l_flags & OCFS2_LOCK_ATTACHED);
 	lockres_or_flags(lockres, OCFS2_LOCK_LOCAL);
 	spin_unlock_irqrestore(&lockres->l_lock, flags);
 	return ocfs2_lock_create(osb, lockres, level, lkm_flags);
 }
 /* Grants us an EX lock on the data and metadata resources, skipping
  * the normal cluster directory lookup. Use this ONLY on newly created
  * inodes which other nodes can't possibly see, and which haven't been
  * hashed in the inode hash yet. This can give us a good performance
  * increase as it'll skip the network broadcast normally associated
  * with creating a new lock resource. */
 int ocfs2_create_new_inode_locks(struct inode *inode)
 {
 	int ret;
 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
 	BUG_ON(!inode);
 	BUG_ON(!ocfs2_inode_is_new(inode));
 	mlog_entry_void();
 	mlog(0, "Inode %llu\n", (unsigned long long)OCFS2_I(inode)->ip_blkno);
 	/* NOTE: That we don't increment any of the holder counts, nor
 	 * do we add anything to a journal handle. Since this is
 	 * supposed to be a new inode which the cluster doesn't know
 	 * about yet, there is no need to.  As far as the LVB handling
 	 * is concerned, this is basically like acquiring an EX lock
 	 * on a resource which has an invalid one -- we'll set it
 	 * valid when we release the EX. */
 	ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_rw_lockres, 1, 1);
 	if (ret) {
 		mlog_errno(ret);
 		goto bail;
 	}
 	/*
 	 * We don't want to use DLM_LKF_LOCAL on a meta data lock as they
 	 * don't use a generation in their lock names.
 	 */
 	ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_inode_lockres, 1, 0);
 	if (ret) {
 		mlog_errno(ret);
 		goto bail;
 	}
 	ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_open_lockres, 0, 0);
 	if (ret) {
 		mlog_errno(ret);
 		goto bail;
 	}
 bail:
 	mlog_exit(ret);
 	return ret;
 }
 int ocfs2_rw_lock(struct inode *inode, int write)
 {
 	int status, level;
 	struct ocfs2_lock_res *lockres;
 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
 	BUG_ON(!inode);
 	mlog_entry_void();
 	mlog(0, "inode %llu take %s RW lock\n",
 	     (unsigned long long)OCFS2_I(inode)->ip_blkno,
 	     write ? "EXMODE" : "PRMODE");
 	if (ocfs2_mount_local(osb))
 		return 0;
 	lockres = &OCFS2_I(inode)->ip_rw_lockres;
 	level = write ? DLM_LOCK_EX : DLM_LOCK_PR;
 	status = ocfs2_cluster_lock(OCFS2_SB(inode->i_sb), lockres, level, 0,
 				    0);
 	if (status < 0)
 		mlog_errno(status);
 	mlog_exit(status);
 	return status;
 }
 void ocfs2_rw_unlock(struct inode *inode, int write)
 {
 	int level = write ? DLM_LOCK_EX : DLM_LOCK_PR;
 	struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_rw_lockres;
 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
 	mlog_entry_void();
 	mlog(0, "inode %llu drop %s RW lock\n",
 	     (unsigned long long)OCFS2_I(inode)->ip_blkno,
 	     write ? "EXMODE" : "PRMODE");
 	if (!ocfs2_mount_local(osb))
 		ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres, level);
 	mlog_exit_void();
 }
 /*
  * ocfs2_open_lock always get PR mode lock.
  */
 int ocfs2_open_lock(struct inode *inode)
 {
 	int status = 0;
 	struct ocfs2_lock_res *lockres;
 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
 	BUG_ON(!inode);
 	mlog_entry_void();
 	mlog(0, "inode %llu take PRMODE open lock\n",
 	     (unsigned long long)OCFS2_I(inode)->ip_blkno);
 	if (ocfs2_mount_local(osb))
 		goto out;
 	lockres = &OCFS2_I(inode)->ip_open_lockres;
 	status = ocfs2_cluster_lock(OCFS2_SB(inode->i_sb), lockres,
 				    DLM_LOCK_PR, 0, 0);
 	if (status < 0)
 		mlog_errno(status);
 out:
 	mlog_exit(status);
 	return status;
 }
 int ocfs2_try_open_lock(struct inode *inode, int write)
 {
 	int status = 0, level;
 	struct ocfs2_lock_res *lockres;
 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
 	BUG_ON(!inode);
 	mlog_entry_void();
 	mlog(0, "inode %llu try to take %s open lock\n",
 	     (unsigned long long)OCFS2_I(inode)->ip_blkno,
 	     write ? "EXMODE" : "PRMODE");
 	if (ocfs2_mount_local(osb))
 		goto out;
 	lockres = &OCFS2_I(inode)->ip_open_lockres;
 	level = write ? DLM_LOCK_EX : DLM_LOCK_PR;
 	/*
 	 * The file system may already holding a PRMODE/EXMODE open lock.
 	 * Since we pass DLM_LKF_NOQUEUE, the request won't block waiting on
 	 * other nodes and the -EAGAIN will indicate to the caller that
 	 * this inode is still in use.
 	 */
 	status = ocfs2_cluster_lock(OCFS2_SB(inode->i_sb), lockres,
 				    level, DLM_LKF_NOQUEUE, 0);
 out:
 	mlog_exit(status);
 	return status;
 }
 /*
  * ocfs2_open_unlock unlock PR and EX mode open locks.
  */
 void ocfs2_open_unlock(struct inode *inode)
 {
 	struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_open_lockres;
 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
 	mlog_entry_void();
 	mlog(0, "inode %llu drop open lock\n",
 	     (unsigned long long)OCFS2_I(inode)->ip_blkno);
 	if (ocfs2_mount_local(osb))
 		goto out;
 	if(lockres->l_ro_holders)
 		ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres,
 				     DLM_LOCK_PR);
 	if(lockres->l_ex_holders)
 		ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres,
 				     DLM_LOCK_EX);
 out:
 	mlog_exit_void();
 }
 static int ocfs2_flock_handle_signal(struct ocfs2_lock_res *lockres,
 				     int level)
 {
 	int ret;
 	struct ocfs2_super *osb = ocfs2_get_lockres_osb(lockres);
 	unsigned long flags;
 	struct ocfs2_mask_waiter mw;
 	ocfs2_init_mask_waiter(&mw);
 retry_cancel:
 	spin_lock_irqsave(&lockres->l_lock, flags);
 	if (lockres->l_flags & OCFS2_LOCK_BUSY) {
 		ret = ocfs2_prepare_cancel_convert(osb, lockres);
 		if (ret) {
 			spin_unlock_irqrestore(&lockres->l_lock, flags);
 			ret = ocfs2_cancel_convert(osb, lockres);
 			if (ret < 0) {
 				mlog_errno(ret);
 				goto out;
 			}
 			goto retry_cancel;
 		}
 		lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0);
 		spin_unlock_irqrestore(&lockres->l_lock, flags);
 		ocfs2_wait_for_mask(&mw);
 		goto retry_cancel;
 	}
 	ret = -ERESTARTSYS;
 	/*
 	 * We may still have gotten the lock, in which case there's no
 	 * point to restarting the syscall.
 	 */
 	if (lockres->l_level == level)
 		ret = 0;
 	mlog(0, "Cancel returning %d. flags: 0x%lx, level: %d, act: %d\n", ret,
 	     lockres->l_flags, lockres->l_level, lockres->l_action);
 	spin_unlock_irqrestore(&lockres->l_lock, flags);
 out:
 	return ret;
 }
 /*
  * ocfs2_file_lock() and ocfs2_file_unlock() map to a single pair of
  * flock() calls. The locking approach this requires is sufficiently
  * different from all other cluster lock types that we implement a
  * seperate path to the "low-level" dlm calls. In particular:
  *
  * - No optimization of lock levels is done - we take at exactly
  *   what's been requested.
  *
  * - No lock caching is employed. We immediately downconvert to
  *   no-lock at unlock time. This also means flock locks never go on
  *   the blocking list).
  *
  * - Since userspace can trivially deadlock itself with flock, we make
  *   sure to allow cancellation of a misbehaving applications flock()
  *   request.
  *
  * - Access to any flock lockres doesn't require concurrency, so we
  *   can simplify the code by requiring the caller to guarantee
  *   serialization of dlmglue flock calls.
  */
 int ocfs2_file_lock(struct file *file, int ex, int trylock)
 {
 	int ret, level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
 	unsigned int lkm_flags = trylock ? DLM_LKF_NOQUEUE : 0;
 	unsigned long flags;
 	struct ocfs2_file_private *fp = file->private_data;
 	struct ocfs2_lock_res *lockres = &fp->fp_flock;
 	struct ocfs2_super *osb = OCFS2_SB(file->f_mapping->host->i_sb);
 	struct ocfs2_mask_waiter mw;
 	ocfs2_init_mask_waiter(&mw);
 	if ((lockres->l_flags & OCFS2_LOCK_BUSY) ||
 	    (lockres->l_level > DLM_LOCK_NL)) {
 		mlog(ML_ERROR,
 		     "File lock \"%s\" has busy or locked state: flags: 0x%lx, "
 		     "level: %u\n", lockres->l_name, lockres->l_flags,
 		     lockres->l_level);
 		return -EINVAL;
 	}
 	spin_lock_irqsave(&lockres->l_lock, flags);
 	if (!(lockres->l_flags & OCFS2_LOCK_ATTACHED)) {
 		lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0);
 		spin_unlock_irqrestore(&lockres->l_lock, flags);
 		/*
 		 * Get the lock at NLMODE to start - that way we
 		 * can cancel the upconvert request if need be.
 		 */
 		ret = ocfs2_lock_create(osb, lockres, DLM_LOCK_NL, 0);
 		if (ret < 0) {
 			mlog_errno(ret);
 			goto out;
 		}
 		ret = ocfs2_wait_for_mask(&mw);
 		if (ret) {
 			mlog_errno(ret);
 			goto out;
 		}
 		spin_lock_irqsave(&lockres->l_lock, flags);
 	}
 	lockres->l_action = OCFS2_AST_CONVERT;
 	lkm_flags |= DLM_LKF_CONVERT;
 	lockres->l_requested = level;
 	lockres_or_flags(lockres, OCFS2_LOCK_BUSY);
 	lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0);
 	spin_unlock_irqrestore(&lockres->l_lock, flags);
 	ret = ocfs2_dlm_lock(osb->cconn, level, &lockres->l_lksb, lkm_flags,
 			     lockres->l_name, OCFS2_LOCK_ID_MAX_LEN - 1,
 			     lockres);
 	if (ret) {
 		if (!trylock || (ret != -EAGAIN)) {
 			ocfs2_log_dlm_error("ocfs2_dlm_lock", ret, lockres);
 			ret = -EINVAL;
 		}
 		ocfs2_recover_from_dlm_error(lockres, 1);
 		lockres_remove_mask_waiter(lockres, &mw);
 		goto out;
 	}
 	ret = ocfs2_wait_for_mask_interruptible(&mw, lockres);
 	if (ret == -ERESTARTSYS) {
 		/*
 		 * Userspace can cause deadlock itself with
 		 * flock(). Current behavior locally is to allow the
 		 * deadlock, but abort the system call if a signal is
 		 * received. We follow this example, otherwise a
 		 * poorly written program could sit in kernel until
 		 * reboot.
 		 *
 		 * Handling this is a bit more complicated for Ocfs2
 		 * though. We can't exit this function with an
 		 * outstanding lock request, so a cancel convert is
 		 * required. We intentionally overwrite 'ret' - if the
 		 * cancel fails and the lock was granted, it's easier
 		 * to just bubble sucess back up to the user.
 		 */
 		ret = ocfs2_flock_handle_signal(lockres, level);
 	} else if (!ret && (level > lockres->l_level)) {
 		/* Trylock failed asynchronously */
 		BUG_ON(!trylock);
 		ret = -EAGAIN;
 	}
 out:
 	mlog(0, "Lock: \"%s\" ex: %d, trylock: %d, returns: %d\n",
 	     lockres->l_name, ex, trylock, ret);
 	return ret;
 }
 void ocfs2_file_unlock(struct file *file)
 {
 	int ret;
 	unsigned int gen;
 	unsigned long flags;
 	struct ocfs2_file_private *fp = file->private_data;
 	struct ocfs2_lock_res *lockres = &fp->fp_flock;
 	struct ocfs2_super *osb = OCFS2_SB(file->f_mapping->host->i_sb);
 	struct ocfs2_mask_waiter mw;
 	ocfs2_init_mask_waiter(&mw);
 	if (!(lockres->l_flags & OCFS2_LOCK_ATTACHED))
 		return;
 	if (lockres->l_level == DLM_LOCK_NL)
 		return;
 	mlog(0, "Unlock: \"%s\" flags: 0x%lx, level: %d, act: %d\n",
 	     lockres->l_name, lockres->l_flags, lockres->l_level,
 	     lockres->l_action);
 	spin_lock_irqsave(&lockres->l_lock, flags);
 	/*
 	 * Fake a blocking ast for the downconvert code.
 	 */
 	lockres_or_flags(lockres, OCFS2_LOCK_BLOCKED);
 	lockres->l_blocking = DLM_LOCK_EX;
 	gen = ocfs2_prepare_downconvert(lockres, DLM_LOCK_NL);
 	lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0);
 	spin_unlock_irqrestore(&lockres->l_lock, flags);
 	ret = ocfs2_downconvert_lock(osb, lockres, DLM_LOCK_NL, 0, gen);
 	if (ret) {
 		mlog_errno(ret);
 		return;
 	}
 	ret = ocfs2_wait_for_mask(&mw);
 	if (ret)
 		mlog_errno(ret);
 }
 static void ocfs2_downconvert_on_unlock(struct ocfs2_super *osb,
 					struct ocfs2_lock_res *lockres)
 {
 	int kick = 0;
 	mlog_entry_void();
 	/* If we know that another node is waiting on our lock, kick
 	 * the downconvert thread * pre-emptively when we reach a release
 	 * condition. */
 	if (lockres->l_flags & OCFS2_LOCK_BLOCKED) {
 		switch(lockres->l_blocking) {
 		case DLM_LOCK_EX:
 			if (!lockres->l_ex_holders && !lockres->l_ro_holders)
 				kick = 1;
 			break;
 		case DLM_LOCK_PR:
 			if (!lockres->l_ex_holders)
 				kick = 1;
 			break;
 		default:
 			BUG();
 		}
 	}
 	if (kick)
 		ocfs2_wake_downconvert_thread(osb);
 	mlog_exit_void();
 }
 #define OCFS2_SEC_BITS   34
 #define OCFS2_SEC_SHIFT  (64 - 34)
 #define OCFS2_NSEC_MASK  ((1ULL << OCFS2_SEC_SHIFT) - 1)
 /* LVB only has room for 64 bits of time here so we pack it for
  * now. */
 static u64 ocfs2_pack_timespec(struct timespec *spec)
 {
 	u64 res;
 	u64 sec = spec->tv_sec;
 	u32 nsec = spec->tv_nsec;
 	res = (sec << OCFS2_SEC_SHIFT) | (nsec & OCFS2_NSEC_MASK);
 	return res;
 }
 /* Call this with the lockres locked. I am reasonably sure we don't
  * need ip_lock in this function as anyone who would be changing those
  * values is supposed to be blocked in ocfs2_inode_lock right now. */
 static void __ocfs2_stuff_meta_lvb(struct inode *inode)
 {
 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
 	struct ocfs2_lock_res *lockres = &oi->ip_inode_lockres;
 	struct ocfs2_meta_lvb *lvb;
 	mlog_entry_void();
 	lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
 	/*
 	 * Invalidate the LVB of a deleted inode - this way other
 	 * nodes are forced to go to disk and discover the new inode
 	 * status.
 	 */
 	if (oi->ip_flags & OCFS2_INODE_DELETED) {
 		lvb->lvb_version = 0;
 		goto out;
 	}
 	lvb->lvb_version   = OCFS2_LVB_VERSION;
 	lvb->lvb_isize	   = cpu_to_be64(i_size_read(inode));
 	lvb->lvb_iclusters = cpu_to_be32(oi->ip_clusters);
 	lvb->lvb_iuid      = cpu_to_be32(inode->i_uid);
 	lvb->lvb_igid      = cpu_to_be32(inode->i_gid);
 	lvb->lvb_imode     = cpu_to_be16(inode->i_mode);
 	lvb->lvb_inlink    = cpu_to_be16(inode->i_nlink);
 	lvb->lvb_iatime_packed  =
 		cpu_to_be64(ocfs2_pack_timespec(&inode->i_atime));
 	lvb->lvb_ictime_packed =
 		cpu_to_be64(ocfs2_pack_timespec(&inode->i_ctime));
 	lvb->lvb_imtime_packed =
 		cpu_to_be64(ocfs2_pack_timespec(&inode->i_mtime));
 	lvb->lvb_iattr    = cpu_to_be32(oi->ip_attr);
 	lvb->lvb_idynfeatures = cpu_to_be16(oi->ip_dyn_features);
 	lvb->lvb_igeneration = cpu_to_be32(inode->i_generation);
 out:
 	mlog_meta_lvb(0, lockres);
 	mlog_exit_void();
 }
 static void ocfs2_unpack_timespec(struct timespec *spec,
 				  u64 packed_time)
 {
 	spec->tv_sec = packed_time >> OCFS2_SEC_SHIFT;
 	spec->tv_nsec = packed_time & OCFS2_NSEC_MASK;
 }
 static void ocfs2_refresh_inode_from_lvb(struct inode *inode)
 {
 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
 	struct ocfs2_lock_res *lockres = &oi->ip_inode_lockres;
 	struct ocfs2_meta_lvb *lvb;
 	mlog_entry_void();
 	mlog_meta_lvb(0, lockres);
 	lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
 	/* We're safe here without the lockres lock... */
 	spin_lock(&oi->ip_lock);
 	oi->ip_clusters = be32_to_cpu(lvb->lvb_iclusters);
 	i_size_write(inode, be64_to_cpu(lvb->lvb_isize));
 	oi->ip_attr = be32_to_cpu(lvb->lvb_iattr);
 	oi->ip_dyn_features = be16_to_cpu(lvb->lvb_idynfeatures);
 	ocfs2_set_inode_flags(inode);
 	/* fast-symlinks are a special case */
 	if (S_ISLNK(inode->i_mode) && !oi->ip_clusters)
 		inode->i_blocks = 0;
 	else
 		inode->i_blocks = ocfs2_inode_sector_count(inode);
 	inode->i_uid     = be32_to_cpu(lvb->lvb_iuid);
 	inode->i_gid     = be32_to_cpu(lvb->lvb_igid);
 	inode->i_mode    = be16_to_cpu(lvb->lvb_imode);
 	inode->i_nlink   = be16_to_cpu(lvb->lvb_inlink);
 	ocfs2_unpack_timespec(&inode->i_atime,
 			      be64_to_cpu(lvb->lvb_iatime_packed));
 	ocfs2_unpack_timespec(&inode->i_mtime,
 			      be64_to_cpu(lvb->lvb_imtime_packed));
 	ocfs2_unpack_timespec(&inode->i_ctime,
 			      be64_to_cpu(lvb->lvb_ictime_packed));
 	spin_unlock(&oi->ip_lock);
 	mlog_exit_void();
 }
 static inline int ocfs2_meta_lvb_is_trustable(struct inode *inode,
 					      struct ocfs2_lock_res *lockres)
 {
 	struct ocfs2_meta_lvb *lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
 	if (ocfs2_dlm_lvb_valid(&lockres->l_lksb)
 	    && lvb->lvb_version == OCFS2_LVB_VERSION
 	    && be32_to_cpu(lvb->lvb_igeneration) == inode->i_generation)
 		return 1;
 	return 0;
 }
 /* Determine whether a lock resource needs to be refreshed, and
  * arbitrate who gets to refresh it.
  *
  *   0 means no refresh needed.
  *
  *   > 0 means you need to refresh this and you MUST call
  *   ocfs2_complete_lock_res_refresh afterwards. */
 static int ocfs2_should_refresh_lock_res(struct ocfs2_lock_res *lockres)
 {
 	unsigned long flags;
 	int status = 0;
 	mlog_entry_void();
 refresh_check:
 	spin_lock_irqsave(&lockres->l_lock, flags);
 	if (!(lockres->l_flags & OCFS2_LOCK_NEEDS_REFRESH)) {
 		spin_unlock_irqrestore(&lockres->l_lock, flags);
 		goto bail;
 	}
 	if (lockres->l_flags & OCFS2_LOCK_REFRESHING) {
 		spin_unlock_irqrestore(&lockres->l_lock, flags);
 		ocfs2_wait_on_refreshing_lock(lockres);
 		goto refresh_check;
 	}
 	/* Ok, I'll be the one to refresh this lock. */
 	lockres_or_flags(lockres, OCFS2_LOCK_REFRESHING);
 	spin_unlock_irqrestore(&lockres->l_lock, flags);
 	status = 1;
 bail:
 	mlog_exit(status);
 	return status;
 }
 /* If status is non zero, I'll mark it as not being in refresh
  * anymroe, but i won't clear the needs refresh flag. */
 static inline void ocfs2_complete_lock_res_refresh(struct ocfs2_lock_res *lockres,
 						   int status)
 {
 	unsigned long flags;
 	mlog_entry_void();
 	spin_lock_irqsave(&lockres->l_lock, flags);
 	lockres_clear_flags(lockres, OCFS2_LOCK_REFRESHING);
 	if (!status)
 		lockres_clear_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH);
 	spin_unlock_irqrestore(&lockres->l_lock, flags);
 	wake_up(&lockres->l_event);
 	mlog_exit_void();
 }
 /* may or may not return a bh if it went to disk. */
 static int ocfs2_inode_lock_update(struct inode *inode,
 				  struct buffer_head **bh)
 {
 	int status = 0;
 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
 	struct ocfs2_lock_res *lockres = &oi->ip_inode_lockres;
 	struct ocfs2_dinode *fe;
 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
 	mlog_entry_void();
 	if (ocfs2_mount_local(osb))
 		goto bail;
 	spin_lock(&oi->ip_lock);
 	if (oi->ip_flags & OCFS2_INODE_DELETED) {
 		mlog(0, "Orphaned inode %llu was deleted while we "
 		     "were waiting on a lock. ip_flags = 0x%x\n",
 		     (unsigned long long)oi->ip_blkno, oi->ip_flags);
 		spin_unlock(&oi->ip_lock);
 		status = -ENOENT;
 		goto bail;
 	}
 	spin_unlock(&oi->ip_lock);
 	if (!ocfs2_should_refresh_lock_res(lockres))
 		goto bail;
 	/* This will discard any caching information we might have had
 	 * for the inode metadata. */
 	ocfs2_metadata_cache_purge(inode);
 	ocfs2_extent_map_trunc(inode, 0);
 	if (ocfs2_meta_lvb_is_trustable(inode, lockres)) {
 		mlog(0, "Trusting LVB on inode %llu\n",
 		     (unsigned long long)oi->ip_blkno);
 		ocfs2_refresh_inode_from_lvb(inode);
 	} else {
 		/* Boo, we have to go to disk. */
 		/* read bh, cast, ocfs2_refresh_inode */
 		status = ocfs2_read_inode_block(inode, bh);
 		if (status < 0) {
 			mlog_errno(status);
 			goto bail_refresh;
 		}
 		fe = (struct ocfs2_dinode *) (*bh)->b_data;
 		/* This is a good chance to make sure we're not
 		 * locking an invalid object.  ocfs2_read_inode_block()
 		 * already checked that the inode block is sane.
 		 *
 		 * We bug on a stale inode here because we checked
 		 * above whether it was wiped from disk. The wiping
 		 * node provides a guarantee that we receive that
 		 * message and can mark the inode before dropping any
 		 * locks associated with it. */
 		mlog_bug_on_msg(inode->i_generation !=
 				le32_to_cpu(fe->i_generation),
 				"Invalid dinode %llu disk generation: %u "
 				"inode->i_generation: %u\n",
 				(unsigned long long)oi->ip_blkno,
 				le32_to_cpu(fe->i_generation),
 				inode->i_generation);
 		mlog_bug_on_msg(le64_to_cpu(fe->i_dtime) ||
 				!(fe->i_flags & cpu_to_le32(OCFS2_VALID_FL)),
 				"Stale dinode %llu dtime: %llu flags: 0x%x\n",
 				(unsigned long long)oi->ip_blkno,
 				(unsigned long long)le64_to_cpu(fe->i_dtime),
 				le32_to_cpu(fe->i_flags));
 		ocfs2_refresh_inode(inode, fe);
 		ocfs2_track_lock_refresh(lockres);
 	}
 	status = 0;
 bail_refresh:
 	ocfs2_complete_lock_res_refresh(lockres, status);
 bail:
 	mlog_exit(status);
 	return status;
 }
 static int ocfs2_assign_bh(struct inode *inode,
 			   struct buffer_head **ret_bh,
 			   struct buffer_head *passed_bh)
 {
 	int status;
 	if (passed_bh) {
 		/* Ok, the update went to disk for us, use the
 		 * returned bh. */
 		*ret_bh = passed_bh;
 		get_bh(*ret_bh);
 		return 0;
 	}
 	status = ocfs2_read_inode_block(inode, ret_bh);
 	if (status < 0)
 		mlog_errno(status);
 	return status;
 }
 /*
  * returns < 0 error if the callback will never be called, otherwise
  * the result of the lock will be communicated via the callback.
  */
 int ocfs2_inode_lock_full(struct inode *inode,
 			 struct buffer_head **ret_bh,
 			 int ex,
 			 int arg_flags)
 {
 	int status, level, acquired;
 	u32 dlm_flags;
 	struct ocfs2_lock_res *lockres = NULL;
 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
 	struct buffer_head *local_bh = NULL;
 	BUG_ON(!inode);
 	mlog_entry_void();
 	mlog(0, "inode %llu, take %s META lock\n",
 	     (unsigned long long)OCFS2_I(inode)->ip_blkno,
 	     ex ? "EXMODE" : "PRMODE");
 	status = 0;
 	acquired = 0;
 	/* We'll allow faking a readonly metadata lock for
 	 * rodevices. */
 	if (ocfs2_is_hard_readonly(osb)) {
 		if (ex)
 			status = -EROFS;
 		goto bail;
 	}
 	if (ocfs2_mount_local(osb))
 		goto local;
 	if (!(arg_flags & OCFS2_META_LOCK_RECOVERY))
 		ocfs2_wait_for_recovery(osb);
 	lockres = &OCFS2_I(inode)->ip_inode_lockres;
 	level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
 	dlm_flags = 0;
 	if (arg_flags & OCFS2_META_LOCK_NOQUEUE)
 		dlm_flags |= DLM_LKF_NOQUEUE;
 	status = ocfs2_cluster_lock(osb, lockres, level, dlm_flags, arg_flags);
 	if (status < 0) {
 		if (status != -EAGAIN && status != -EIOCBRETRY)
 			mlog_errno(status);
 		goto bail;
 	}
 	/* Notify the error cleanup path to drop the cluster lock. */
 	acquired = 1;
 	/* We wait twice because a node may have died while we were in
 	 * the lower dlm layers. The second time though, we've
 	 * committed to owning this lock so we don't allow signals to
 	 * abort the operation. */
 	if (!(arg_flags & OCFS2_META_LOCK_RECOVERY))
 		ocfs2_wait_for_recovery(osb);
 local:
 	/*
 	 * We only see this flag if we're being called from
 	 * ocfs2_read_locked_inode(). It means we're locking an inode
 	 * which hasn't been populated yet, so clear the refresh flag
 	 * and let the caller handle it.
 	 */
 	if (inode->i_state & I_NEW) {
 		status = 0;
 		if (lockres)
 			ocfs2_complete_lock_res_refresh(lockres, 0);
 		goto bail;
 	}
 	/* This is fun. The caller may want a bh back, or it may
 	 * not. ocfs2_inode_lock_update definitely wants one in, but
 	 * may or may not read one, depending on what's in the
 	 * LVB. The result of all of this is that we've *only* gone to
 	 * disk if we have to, so the complexity is worthwhile. */
 	status = ocfs2_inode_lock_update(inode, &local_bh);
 	if (status < 0) {
 		if (status != -ENOENT)
 			mlog_errno(status);
 		goto bail;
 	}
 	if (ret_bh) {
 		status = ocfs2_assign_bh(inode, ret_bh, local_bh);
 		if (status < 0) {
 			mlog_errno(status);
 			goto bail;
 		}
 	}
 bail:
 	if (status < 0) {
 		if (ret_bh && (*ret_bh)) {
 			brelse(*ret_bh);
 			*ret_bh = NULL;
 		}
 		if (acquired)
 			ocfs2_inode_unlock(inode, ex);
 	}
 	if (local_bh)
 		brelse(local_bh);
 	mlog_exit(status);
 	return status;
 }
 /*
  * This is working around a lock inversion between tasks acquiring DLM
  * locks while holding a page lock and the downconvert thread which
  * blocks dlm lock acquiry while acquiring page locks.
  *
  * ** These _with_page variantes are only intended to be called from aop
  * methods that hold page locks and return a very specific *positive* error
  * code that aop methods pass up to the VFS -- test for errors with != 0. **
  *
  * The DLM is called such that it returns -EAGAIN if it would have
  * blocked waiting for the downconvert thread.  In that case we unlock
  * our page so the downconvert thread can make progress.  Once we've
  * done this we have to return AOP_TRUNCATED_PAGE so the aop method
  * that called us can bubble that back up into the VFS who will then
  * immediately retry the aop call.
  *
  * We do a blocking lock and immediate unlock before returning, though, so that
  * the lock has a great chance of being cached on this node by the time the VFS
  * calls back to retry the aop.    This has a potential to livelock as nodes
  * ping locks back and forth, but that's a risk we're willing to take to avoid
  * the lock inversion simply.
  */
 int ocfs2_inode_lock_with_page(struct inode *inode,
 			      struct buffer_head **ret_bh,
 			      int ex,
 			      struct page *page)
 {
 	int ret;
 	ret = ocfs2_inode_lock_full(inode, ret_bh, ex, OCFS2_LOCK_NONBLOCK);
 	if (ret == -EAGAIN) {
 		unlock_page(page);
 		if (ocfs2_inode_lock(inode, ret_bh, ex) == 0)
 			ocfs2_inode_unlock(inode, ex);
 		ret = AOP_TRUNCATED_PAGE;
 	}
 	return ret;
 }
 int ocfs2_inode_lock_atime(struct inode *inode,
 			  struct vfsmount *vfsmnt,
 			  int *level)
 {
 	int ret;
 	mlog_entry_void();
 	ret = ocfs2_inode_lock(inode, NULL, 0);
 	if (ret < 0) {
 		mlog_errno(ret);
 		return ret;
 	}
 	/*
 	 * If we should update atime, we will get EX lock,
 	 * otherwise we just get PR lock.
 	 */
 	if (ocfs2_should_update_atime(inode, vfsmnt)) {
 		struct buffer_head *bh = NULL;
 		ocfs2_inode_unlock(inode, 0);
 		ret = ocfs2_inode_lock(inode, &bh, 1);
 		if (ret < 0) {
 			mlog_errno(ret);
 			return ret;
 		}
 		*level = 1;
 		if (ocfs2_should_update_atime(inode, vfsmnt))
 			ocfs2_update_inode_atime(inode, bh);
 		if (bh)
 			brelse(bh);
 	} else
 		*level = 0;
 	mlog_exit(ret);
 	return ret;
 }
 void ocfs2_inode_unlock(struct inode *inode,
 		       int ex)
 {
 	int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
 	struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_inode_lockres;
 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
 	mlog_entry_void();
 	mlog(0, "inode %llu drop %s META lock\n",
 	     (unsigned long long)OCFS2_I(inode)->ip_blkno,
 	     ex ? "EXMODE" : "PRMODE");
 	if (!ocfs2_is_hard_readonly(OCFS2_SB(inode->i_sb)) &&
 	    !ocfs2_mount_local(osb))
 		ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres, level);
 	mlog_exit_void();
 }
 int ocfs2_orphan_scan_lock(struct ocfs2_super *osb, u32 *seqno, int ex)
 {
 	struct ocfs2_lock_res *lockres;
 	struct ocfs2_orphan_scan_lvb *lvb;
 	int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
 	int status = 0;
 	lockres = &osb->osb_orphan_scan.os_lockres;
 	status = ocfs2_cluster_lock(osb, lockres, level, 0, 0);
 	if (status < 0)
 		return status;
 	lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
 	if (ocfs2_dlm_lvb_valid(&lockres->l_lksb) &&
 	    lvb->lvb_version == OCFS2_ORPHAN_LVB_VERSION)
 		*seqno = be32_to_cpu(lvb->lvb_os_seqno);
+	else
+		*seqno = osb->osb_orphan_scan.os_seqno + 1;
 	return status;
 }
 void ocfs2_orphan_scan_unlock(struct ocfs2_super *osb, u32 seqno, int ex)
 {
 	struct ocfs2_lock_res *lockres;
 	struct ocfs2_orphan_scan_lvb *lvb;
 	int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
 	lockres = &osb->osb_orphan_scan.os_lockres;
 	lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
 	lvb->lvb_version = OCFS2_ORPHAN_LVB_VERSION;
 	lvb->lvb_os_seqno = cpu_to_be32(seqno);
 	ocfs2_cluster_unlock(osb, lockres, level);
 }
 int ocfs2_super_lock(struct ocfs2_super *osb,
 		     int ex)
 {
 	int status = 0;
 	int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
 	struct ocfs2_lock_res *lockres = &osb->osb_super_lockres;
 	mlog_entry_void();
 	if (ocfs2_is_hard_readonly(osb))
 		return -EROFS;
 	if (ocfs2_mount_local(osb))
 		goto bail;
 	status = ocfs2_cluster_lock(osb, lockres, level, 0, 0);
 	if (status < 0) {
 		mlog_errno(status);
 		goto bail;
 	}
 	/* The super block lock path is really in the best position to
 	 * know when resources covered by the lock need to be
 	 * refreshed, so we do it here. Of course, making sense of
 	 * everything is up to the caller :) */
 	status = ocfs2_should_refresh_lock_res(lockres);
 	if (status < 0) {
 		mlog_errno(status);
 		goto bail;
 	}
 	if (status) {
 		status = ocfs2_refresh_slot_info(osb);
 		ocfs2_complete_lock_res_refresh(lockres, status);
 		if (status < 0)
 			mlog_errno(status);
 		ocfs2_track_lock_refresh(lockres);
 	}
 bail:
 	mlog_exit(status);
 	return status;
 }
 void ocfs2_super_unlock(struct ocfs2_super *osb,
 			int ex)
 {
 	int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
 	struct ocfs2_lock_res *lockres = &osb->osb_super_lockres;
 	if (!ocfs2_mount_local(osb))
 		ocfs2_cluster_unlock(osb, lockres, level);
 }
 int ocfs2_rename_lock(struct ocfs2_super *osb)
 {
 	int status;
 	struct ocfs2_lock_res *lockres = &osb->osb_rename_lockres;
 	if (ocfs2_is_hard_readonly(osb))
 		return -EROFS;
 	if (ocfs2_mount_local(osb))
 		return 0;
 	status = ocfs2_cluster_lock(osb, lockres, DLM_LOCK_EX, 0, 0);
 	if (status < 0)
 		mlog_errno(status);
 	return status;
 }
 void ocfs2_rename_unlock(struct ocfs2_super *osb)
 {
 	struct ocfs2_lock_res *lockres = &osb->osb_rename_lockres;
 	if (!ocfs2_mount_local(osb))
 		ocfs2_cluster_unlock(osb, lockres, DLM_LOCK_EX);
 }
 int ocfs2_nfs_sync_lock(struct ocfs2_super *osb, int ex)
 {
 	int status;
 	struct ocfs2_lock_res *lockres = &osb->osb_nfs_sync_lockres;
 	if (ocfs2_is_hard_readonly(osb))
 		return -EROFS;
 	if (ocfs2_mount_local(osb))
 		return 0;
 	status = ocfs2_cluster_lock(osb, lockres, ex ? LKM_EXMODE : LKM_PRMODE,
 				    0, 0);
 	if (status < 0)
 		mlog(ML_ERROR, "lock on nfs sync lock failed %d\n", status);
 	return status;
 }
 void ocfs2_nfs_sync_unlock(struct ocfs2_super *osb, int ex)
 {
 	struct ocfs2_lock_res *lockres = &osb->osb_nfs_sync_lockres;
 	if (!ocfs2_mount_local(osb))
 		ocfs2_cluster_unlock(osb, lockres,
 				     ex ? LKM_EXMODE : LKM_PRMODE);
 }
 int ocfs2_dentry_lock(struct dentry *dentry, int ex)
 {
 	int ret;
 	int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
 	struct ocfs2_dentry_lock *dl = dentry->d_fsdata;
 	struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb);
 	BUG_ON(!dl);
 	if (ocfs2_is_hard_readonly(osb))
 		return -EROFS;
 	if (ocfs2_mount_local(osb))
 		return 0;
 	ret = ocfs2_cluster_lock(osb, &dl->dl_lockres, level, 0, 0);
 	if (ret < 0)
 		mlog_errno(ret);
 	return ret;
 }
 void ocfs2_dentry_unlock(struct dentry *dentry, int ex)
 {
 	int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
 	struct ocfs2_dentry_lock *dl = dentry->d_fsdata;
 	struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb);
 	if (!ocfs2_mount_local(osb))
 		ocfs2_cluster_unlock(osb, &dl->dl_lockres, level);
 }
 /* Reference counting of the dlm debug structure. We want this because
  * open references on the debug inodes can live on after a mount, so
  * we can't rely on the ocfs2_super to always exist. */
 static void ocfs2_dlm_debug_free(struct kref *kref)
 {
 	struct ocfs2_dlm_debug *dlm_debug;
 	dlm_debug = container_of(kref, struct ocfs2_dlm_debug, d_refcnt);
 	kfree(dlm_debug);
 }
 void ocfs2_put_dlm_debug(struct ocfs2_dlm_debug *dlm_debug)
 {
 	if (dlm_debug)
 		kref_put(&dlm_debug->d_refcnt, ocfs2_dlm_debug_free);
 }
 static void ocfs2_get_dlm_debug(struct ocfs2_dlm_debug *debug)
 {
 	kref_get(&debug->d_refcnt);
 }
 struct ocfs2_dlm_debug *ocfs2_new_dlm_debug(void)
 {
 	struct ocfs2_dlm_debug *dlm_debug;
 	dlm_debug = kmalloc(sizeof(struct ocfs2_dlm_debug), GFP_KERNEL);
 	if (!dlm_debug) {
 		mlog_errno(-ENOMEM);
 		goto out;
 	}
 	kref_init(&dlm_debug->d_refcnt);
 	INIT_LIST_HEAD(&dlm_debug->d_lockres_tracking);
 	dlm_debug->d_locking_state = NULL;
 out:
 	return dlm_debug;
 }
 /* Access to this is arbitrated for us via seq_file->sem. */
 struct ocfs2_dlm_seq_priv {
 	struct ocfs2_dlm_debug *p_dlm_debug;
 	struct ocfs2_lock_res p_iter_res;
 	struct ocfs2_lock_res p_tmp_res;
 };
 static struct ocfs2_lock_res *ocfs2_dlm_next_res(struct ocfs2_lock_res *start,
 						 struct ocfs2_dlm_seq_priv *priv)
 {
 	struct ocfs2_lock_res *iter, *ret = NULL;
 	struct ocfs2_dlm_debug *dlm_debug = priv->p_dlm_debug;
 	assert_spin_locked(&ocfs2_dlm_tracking_lock);
 	list_for_each_entry(iter, &start->l_debug_list, l_debug_list) {
 		/* discover the head of the list */
 		if (&iter->l_debug_list == &dlm_debug->d_lockres_tracking) {
 			mlog(0, "End of list found, %p\n", ret);
 			break;
 		}
 		/* We track our "dummy" iteration lockres' by a NULL
 		 * l_ops field. */
 		if (iter->l_ops != NULL) {
 			ret = iter;
 			break;
 		}
 	}
 	return ret;
 }
 static void *ocfs2_dlm_seq_start(struct seq_file *m, loff_t *pos)
 {
 	struct ocfs2_dlm_seq_priv *priv = m->private;
 	struct ocfs2_lock_res *iter;
 	spin_lock(&ocfs2_dlm_tracking_lock);
 	iter = ocfs2_dlm_next_res(&priv->p_iter_res, priv);
 	if (iter) {
 		/* Since lockres' have the lifetime of their container
 		 * (which can be inodes, ocfs2_supers, etc) we want to
 		 * copy this out to a temporary lockres while still
 		 * under the spinlock. Obviously after this we can't
 		 * trust any pointers on the copy returned, but that's
 		 * ok as the information we want isn't typically held
 		 * in them. */
 		priv->p_tmp_res = *iter;
 		iter = &priv->p_tmp_res;
 	}
 	spin_unlock(&ocfs2_dlm_tracking_lock);
 	return iter;
 }
 static void ocfs2_dlm_seq_stop(struct seq_file *m, void *v)
 {
 }
 static void *ocfs2_dlm_seq_next(struct seq_file *m, void *v, loff_t *pos)
 {
 	struct ocfs2_dlm_seq_priv *priv = m->private;
 	struct ocfs2_lock_res *iter = v;
 	struct ocfs2_lock_res *dummy = &priv->p_iter_res;
 	spin_lock(&ocfs2_dlm_tracking_lock);
 	iter = ocfs2_dlm_next_res(iter, priv);
 	list_del_init(&dummy->l_debug_list);
 	if (iter) {
 		list_add(&dummy->l_debug_list, &iter->l_debug_list);
 		priv->p_tmp_res = *iter;
 		iter = &priv->p_tmp_res;
 	}
 	spin_unlock(&ocfs2_dlm_tracking_lock);
 	return iter;
 }
 /* So that debugfs.ocfs2 can determine which format is being used */
 #define OCFS2_DLM_DEBUG_STR_VERSION 2
 static int ocfs2_dlm_seq_show(struct seq_file *m, void *v)
 {
 	int i;
 	char *lvb;
 	struct ocfs2_lock_res *lockres = v;
 	if (!lockres)
 		return -EINVAL;
 	seq_printf(m, "0x%x\t", OCFS2_DLM_DEBUG_STR_VERSION);
 	if (lockres->l_type == OCFS2_LOCK_TYPE_DENTRY)
 		seq_printf(m, "%.*s%08x\t", OCFS2_DENTRY_LOCK_INO_START - 1,
 			   lockres->l_name,
 			   (unsigned int)ocfs2_get_dentry_lock_ino(lockres));
 	else
 		seq_printf(m, "%.*s\t", OCFS2_LOCK_ID_MAX_LEN, lockres->l_name);
 	seq_printf(m, "%d\t"
 		   "0x%lx\t"
 		   "0x%x\t"
 		   "0x%x\t"
 		   "%u\t"
 		   "%u\t"
 		   "%d\t"
 		   "%d\t",
 		   lockres->l_level,
 		   lockres->l_flags,
 		   lockres->l_action,
 		   lockres->l_unlock_action,
 		   lockres->l_ro_holders,
 		   lockres->l_ex_holders,
 		   lockres->l_requested,
 		   lockres->l_blocking);
 	/* Dump the raw LVB */
 	lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
 	for(i = 0; i < DLM_LVB_LEN; i++)
 		seq_printf(m, "0x%x\t", lvb[i]);
 #ifdef CONFIG_OCFS2_FS_STATS
 # define lock_num_prmode(_l)		(_l)->l_lock_num_prmode
 # define lock_num_exmode(_l)		(_l)->l_lock_num_exmode
 # define lock_num_prmode_failed(_l)	(_l)->l_lock_num_prmode_failed
 # define lock_num_exmode_failed(_l)	(_l)->l_lock_num_exmode_failed
 # define lock_total_prmode(_l)		(_l)->l_lock_total_prmode
 # define lock_total_exmode(_l)		(_l)->l_lock_total_exmode
 # define lock_max_prmode(_l)		(_l)->l_lock_max_prmode
 # define lock_max_exmode(_l)		(_l)->l_lock_max_exmode
 # define lock_refresh(_l)		(_l)->l_lock_refresh
 #else
 # define lock_num_prmode(_l)		(0ULL)
 # define lock_num_exmode(_l)		(0ULL)
 # define lock_num_prmode_failed(_l)	(0)
 # define lock_num_exmode_failed(_l)	(0)
 # define lock_total_prmode(_l)		(0ULL)
 # define lock_total_exmode(_l)		(0ULL)
 # define lock_max_prmode(_l)		(0)
 # define lock_max_exmode(_l)		(0)
 # define lock_refresh(_l)		(0)
 #endif
 	/* The following seq_print was added in version 2 of this output */
 	seq_printf(m, "%llu\t"
 		   "%llu\t"
 		   "%u\t"
 		   "%u\t"
 		   "%llu\t"
 		   "%llu\t"
 		   "%u\t"
 		   "%u\t"
 		   "%u\t",
 		   lock_num_prmode(lockres),
 		   lock_num_exmode(lockres),
 		   lock_num_prmode_failed(lockres),
 		   lock_num_exmode_failed(lockres),
 		   lock_total_prmode(lockres),
 		   lock_total_exmode(lockres),
 		   lock_max_prmode(lockres),
 		   lock_max_exmode(lockres),
 		   lock_refresh(lockres));
 	/* End the line */
 	seq_printf(m, "\n");
 	return 0;
 }
 static const struct seq_operations ocfs2_dlm_seq_ops = {
 	.start =	ocfs2_dlm_seq_start,
 	.stop =		ocfs2_dlm_seq_stop,
 	.next =		ocfs2_dlm_seq_next,
 	.show =		ocfs2_dlm_seq_show,
 };
 static int ocfs2_dlm_debug_release(struct inode *inode, struct file *file)
 {
 	struct seq_file *seq = (struct seq_file *) file->private_data;
 	struct ocfs2_dlm_seq_priv *priv = seq->private;
 	struct ocfs2_lock_res *res = &priv->p_iter_res;
 	ocfs2_remove_lockres_tracking(res);
 	ocfs2_put_dlm_debug(priv->p_dlm_debug);
 	return seq_release_private(inode, file);
 }
 static int ocfs2_dlm_debug_open(struct inode *inode, struct file *file)
 {
 	int ret;
 	struct ocfs2_dlm_seq_priv *priv;
 	struct seq_file *seq;
 	struct ocfs2_super *osb;
 	priv = kzalloc(sizeof(struct ocfs2_dlm_seq_priv), GFP_KERNEL);
 	if (!priv) {
 		ret = -ENOMEM;
 		mlog_errno(ret);
 		goto out;
 	}
 	osb = inode->i_private;
 	ocfs2_get_dlm_debug(osb->osb_dlm_debug);
 	priv->p_dlm_debug = osb->osb_dlm_debug;
 	INIT_LIST_HEAD(&priv->p_iter_res.l_debug_list);
 	ret = seq_open(file, &ocfs2_dlm_seq_ops);
 	if (ret) {
 		kfree(priv);
 		mlog_errno(ret);
 		goto out;
 	}
 	seq = (struct seq_file *) file->private_data;
 	seq->private = priv;
 	ocfs2_add_lockres_tracking(&priv->p_iter_res,
 				   priv->p_dlm_debug);
 out:
 	return ret;
 }
 static const struct file_operations ocfs2_dlm_debug_fops = {
 	.open =		ocfs2_dlm_debug_open,
 	.release =	ocfs2_dlm_debug_release,
 	.read =		seq_read,
 	.llseek =	seq_lseek,
 };
 static int ocfs2_dlm_init_debug(struct ocfs2_super *osb)
 {
 	int ret = 0;
 	struct ocfs2_dlm_debug *dlm_debug = osb->osb_dlm_debug;
 	dlm_debug->d_locking_state = debugfs_create_file("locking_state",
 							 S_IFREG|S_IRUSR,
 							 osb->osb_debug_root,
 							 osb,
 							 &ocfs2_dlm_debug_fops);
 	if (!dlm_debug->d_locking_state) {
 		ret = -EINVAL;
 		mlog(ML_ERROR,
 		     "Unable to create locking state debugfs file.\n");
 		goto out;
 	}
 	ocfs2_get_dlm_debug(dlm_debug);
 out:
 	return ret;
 }
 static void ocfs2_dlm_shutdown_debug(struct ocfs2_super *osb)
 {
 	struct ocfs2_dlm_debug *dlm_debug = osb->osb_dlm_debug;
 	if (dlm_debug) {
 		debugfs_remove(dlm_debug->d_locking_state);
 		ocfs2_put_dlm_debug(dlm_debug);
 	}
 }
 int ocfs2_dlm_init(struct ocfs2_super *osb)
 {
 	int status = 0;
 	struct ocfs2_cluster_connection *conn = NULL;
 	mlog_entry_void();
 	if (ocfs2_mount_local(osb)) {
 		osb->node_num = 0;
 		goto local;
 	}
 	status = ocfs2_dlm_init_debug(osb);
 	if (status < 0) {
 		mlog_errno(status);
 		goto bail;
 	}
 	/* launch downconvert thread */
 	osb->dc_task = kthread_run(ocfs2_downconvert_thread, osb, "ocfs2dc");
 	if (IS_ERR(osb->dc_task)) {
 		status = PTR_ERR(osb->dc_task);
 		osb->dc_task = NULL;
 		mlog_errno(status);
 		goto bail;
 	}
 	/* for now, uuid == domain */
 	status = ocfs2_cluster_connect(osb->osb_cluster_stack,
 				       osb->uuid_str,
 				       strlen(osb->uuid_str),
 				       ocfs2_do_node_down, osb,
 				       &conn);
 	if (status) {
 		mlog_errno(status);
 		goto bail;
 	}
 	status = ocfs2_cluster_this_node(&osb->node_num);
 	if (status < 0) {
 		mlog_errno(status);
 		mlog(ML_ERROR,
 		     "could not find this host's node number\n");
 		ocfs2_cluster_disconnect(conn, 0);
 		goto bail;
 	}
 local:
 	ocfs2_super_lock_res_init(&osb->osb_super_lockres, osb);
 	ocfs2_rename_lock_res_init(&osb->osb_rename_lockres, osb);
 	ocfs2_nfs_sync_lock_res_init(&osb->osb_nfs_sync_lockres, osb);
 	ocfs2_orphan_scan_lock_res_init(&osb->osb_orphan_scan.os_lockres, osb);
 	osb->cconn = conn;
 	status = 0;
 bail:
 	if (status < 0) {
 		ocfs2_dlm_shutdown_debug(osb);
 		if (osb->dc_task)
 			kthread_stop(osb->dc_task);
 	}
 	mlog_exit(status);
 	return status;
 }
 void ocfs2_dlm_shutdown(struct ocfs2_super *osb,
 			int hangup_pending)
 {
 	mlog_entry_void();
 	ocfs2_drop_osb_locks(osb);
 	/*
 	 * Now that we have dropped all locks and ocfs2_dismount_volume()
 	 * has disabled recovery, the DLM won't be talking to us.  It's
 	 * safe to tear things down before disconnecting the cluster.
 	 */
 	if (osb->dc_task) {
 		kthread_stop(osb->dc_task);
 		osb->dc_task = NULL;
 	}
 	ocfs2_lock_res_free(&osb->osb_super_lockres);
 	ocfs2_lock_res_free(&osb->osb_rename_lockres);
 	ocfs2_lock_res_free(&osb->osb_nfs_sync_lockres);
 	ocfs2_lock_res_free(&osb->osb_orphan_scan.os_lockres);
 	ocfs2_cluster_disconnect(osb->cconn, hangup_pending);
 	osb->cconn = NULL;
 	ocfs2_dlm_shutdown_debug(osb);
 	mlog_exit_void();
 }
 static void ocfs2_unlock_ast(void *opaque, int error)
 {
 	struct ocfs2_lock_res *lockres = opaque;
 	unsigned long flags;
 	mlog_entry_void();
 	mlog(0, "UNLOCK AST called on lock %s, action = %d\n", lockres->l_name,
 	     lockres->l_unlock_action);
 	spin_lock_irqsave(&lockres->l_lock, flags);
 	if (error) {
 		mlog(ML_ERROR, "Dlm passes error %d for lock %s, "
 		     "unlock_action %d\n", error, lockres->l_name,
 		     lockres->l_unlock_action);
 		spin_unlock_irqrestore(&lockres->l_lock, flags);
 		return;
 	}
 	switch(lockres->l_unlock_action) {
 	case OCFS2_UNLOCK_CANCEL_CONVERT:
 		mlog(0, "Cancel convert success for %s\n", lockres->l_name);
 		lockres->l_action = OCFS2_AST_INVALID;
 		/* Downconvert thread may have requeued this lock, we
 		 * need to wake it. */
 		if (lockres->l_flags & OCFS2_LOCK_BLOCKED)
 			ocfs2_wake_downconvert_thread(ocfs2_get_lockres_osb(lockres));
 		break;
 	case OCFS2_UNLOCK_DROP_LOCK:
 		lockres->l_level = DLM_LOCK_IV;
 		break;
 	default:
 		BUG();
 	}
 	lockres_clear_flags(lockres, OCFS2_LOCK_BUSY);
 	lockres->l_unlock_action = OCFS2_UNLOCK_INVALID;
 	wake_up(&lockres->l_event);
 	spin_unlock_irqrestore(&lockres->l_lock, flags);
 	mlog_exit_void();
 }
 static int ocfs2_drop_lock(struct ocfs2_super *osb,
 			   struct ocfs2_lock_res *lockres)
 {
 	int ret;
 	unsigned long flags;
 	u32 lkm_flags = 0;
 	/* We didn't get anywhere near actually using this lockres. */
 	if (!(lockres->l_flags & OCFS2_LOCK_INITIALIZED))
 		goto out;
 	if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB)
 		lkm_flags |= DLM_LKF_VALBLK;
 	spin_lock_irqsave(&lockres->l_lock, flags);
 	mlog_bug_on_msg(!(lockres->l_flags & OCFS2_LOCK_FREEING),
 			"lockres %s, flags 0x%lx\n",
 			lockres->l_name, lockres->l_flags);
 	while (lockres->l_flags & OCFS2_LOCK_BUSY) {
 		mlog(0, "waiting on busy lock \"%s\": flags = %lx, action = "
 		     "%u, unlock_action = %u\n",
 		     lockres->l_name, lockres->l_flags, lockres->l_action,
 		     lockres->l_unlock_action);
 		spin_unlock_irqrestore(&lockres->l_lock, flags);
 		/* XXX: Today we just wait on any busy
 		 * locks... Perhaps we need to cancel converts in the
 		 * future? */
 		ocfs2_wait_on_busy_lock(lockres);
 		spin_lock_irqsave(&lockres->l_lock, flags);
 	}
 	if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB) {
 		if (lockres->l_flags & OCFS2_LOCK_ATTACHED &&
 		    lockres->l_level == DLM_LOCK_EX &&
 		    !(lockres->l_flags & OCFS2_LOCK_NEEDS_REFRESH))
 			lockres->l_ops->set_lvb(lockres);
 	}
 	if (lockres->l_flags & OCFS2_LOCK_BUSY)
 		mlog(ML_ERROR, "destroying busy lock: \"%s\"\n",
 		     lockres->l_name);
 	if (lockres->l_flags & OCFS2_LOCK_BLOCKED)
 		mlog(0, "destroying blocked lock: \"%s\"\n", lockres->l_name);
 	if (!(lockres->l_flags & OCFS2_LOCK_ATTACHED)) {
 		spin_unlock_irqrestore(&lockres->l_lock, flags);
 		goto out;
 	}
 	lockres_clear_flags(lockres, OCFS2_LOCK_ATTACHED);
 	/* make sure we never get here while waiting for an ast to
 	 * fire. */
 	BUG_ON(lockres->l_action != OCFS2_AST_INVALID);
 	/* is this necessary? */
 	lockres_or_flags(lockres, OCFS2_LOCK_BUSY);
 	lockres->l_unlock_action = OCFS2_UNLOCK_DROP_LOCK;
 	spin_unlock_irqrestore(&lockres->l_lock, flags);
 	mlog(0, "lock %s\n", lockres->l_name);
 	ret = ocfs2_dlm_unlock(osb->cconn, &lockres->l_lksb, lkm_flags,
 			       lockres);
 	if (ret) {
 		ocfs2_log_dlm_error("ocfs2_dlm_unlock", ret, lockres);
 		mlog(ML_ERROR, "lockres flags: %lu\n", lockres->l_flags);
 		ocfs2_dlm_dump_lksb(&lockres->l_lksb);
 		BUG();
 	}
 	mlog(0, "lock %s, successful return from ocfs2_dlm_unlock\n",
 	     lockres->l_name);
 	ocfs2_wait_on_busy_lock(lockres);
 out:
 	mlog_exit(0);
 	return 0;
 }
 /* Mark the lockres as being dropped. It will no longer be
  * queued if blocking, but we still may have to wait on it
  * being dequeued from the downconvert thread before we can consider
  * it safe to drop.
  *
  * You can *not* attempt to call cluster_lock on this lockres anymore. */
 void ocfs2_mark_lockres_freeing(struct ocfs2_lock_res *lockres)
 {
 	int status;
 	struct ocfs2_mask_waiter mw;
 	unsigned long flags;
 	ocfs2_init_mask_waiter(&mw);
 	spin_lock_irqsave(&lockres->l_lock, flags);
 	lockres->l_flags |= OCFS2_LOCK_FREEING;
 	while (lockres->l_flags & OCFS2_LOCK_QUEUED) {
 		lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_QUEUED, 0);
 		spin_unlock_irqrestore(&lockres->l_lock, flags);
 		mlog(0, "Waiting on lockres %s\n", lockres->l_name);
 		status = ocfs2_wait_for_mask(&mw);
 		if (status)
 			mlog_errno(status);
 		spin_lock_irqsave(&lockres->l_lock, flags);
 	}
 	spin_unlock_irqrestore(&lockres->l_lock, flags);
 }
 void ocfs2_simple_drop_lockres(struct ocfs2_super *osb,
 			       struct ocfs2_lock_res *lockres)
 {
 	int ret;
 	ocfs2_mark_lockres_freeing(lockres);
 	ret = ocfs2_drop_lock(osb, lockres);
 	if (ret)
 		mlog_errno(ret);
 }
 static void ocfs2_drop_osb_locks(struct ocfs2_super *osb)
 {
 	ocfs2_simple_drop_lockres(osb, &osb->osb_super_lockres);
 	ocfs2_simple_drop_lockres(osb, &osb->osb_rename_lockres);
 	ocfs2_simple_drop_lockres(osb, &osb->osb_nfs_sync_lockres);
 	ocfs2_simple_drop_lockres(osb, &osb->osb_orphan_scan.os_lockres);
 }
 int ocfs2_drop_inode_locks(struct inode *inode)
 {
 	int status, err;
 	mlog_entry_void();
 	/* No need to call ocfs2_mark_lockres_freeing here -
 	 * ocfs2_clear_inode has done it for us. */
 	err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb),
 			      &OCFS2_I(inode)->ip_open_lockres);
 	if (err < 0)
 		mlog_errno(err);
 	status = err;
 	err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb),
 			      &OCFS2_I(inode)->ip_inode_lockres);
 	if (err < 0)
 		mlog_errno(err);
 	if (err < 0 && !status)
 		status = err;
 	err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb),
 			      &OCFS2_I(inode)->ip_rw_lockres);
 	if (err < 0)
 		mlog_errno(err);
 	if (err < 0 && !status)
 		status = err;
 	mlog_exit(status);
 	return status;
 }
 static unsigned int ocfs2_prepare_downconvert(struct ocfs2_lock_res *lockres,
 					      int new_level)
 {
 	assert_spin_locked(&lockres->l_lock);
 	BUG_ON(lockres->l_blocking <= DLM_LOCK_NL);
 	if (lockres->l_level <= new_level) {
 		mlog(ML_ERROR, "lockres->l_level (%d) <= new_level (%d)\n",
 		     lockres->l_level, new_level);
 		BUG();
 	}
 	mlog(0, "lock %s, new_level = %d, l_blocking = %d\n",
 	     lockres->l_name, new_level, lockres->l_blocking);
 	lockres->l_action = OCFS2_AST_DOWNCONVERT;
 	lockres->l_requested = new_level;
 	lockres_or_flags(lockres, OCFS2_LOCK_BUSY);
 	return lockres_set_pending(lockres);
 }
 static int ocfs2_downconvert_lock(struct ocfs2_super *osb,
 				  struct ocfs2_lock_res *lockres,
 				  int new_level,
 				  int lvb,
 				  unsigned int generation)
 {
 	int ret;
 	u32 dlm_flags = DLM_LKF_CONVERT;
 	mlog_entry_void();
 	if (lvb)
 		dlm_flags |= DLM_LKF_VALBLK;
 	ret = ocfs2_dlm_lock(osb->cconn,
 			     new_level,
 			     &lockres->l_lksb,
 			     dlm_flags,
 			     lockres->l_name,
 			     OCFS2_LOCK_ID_MAX_LEN - 1,
 			     lockres);
 	lockres_clear_pending(lockres, generation, osb);
 	if (ret) {
 		ocfs2_log_dlm_error("ocfs2_dlm_lock", ret, lockres);
 		ocfs2_recover_from_dlm_error(lockres, 1);
 		goto bail;
 	}
 	ret = 0;
 bail:
 	mlog_exit(ret);
 	return ret;
 }
 /* returns 1 when the caller should unlock and call ocfs2_dlm_unlock */
 static int ocfs2_prepare_cancel_convert(struct ocfs2_super *osb,
 				        struct ocfs2_lock_res *lockres)
 {
 	assert_spin_locked(&lockres->l_lock);
 	mlog_entry_void();
 	mlog(0, "lock %s\n", lockres->l_name);
 	if (lockres->l_unlock_action == OCFS2_UNLOCK_CANCEL_CONVERT) {
 		/* If we're already trying to cancel a lock conversion
 		 * then just drop the spinlock and allow the caller to
 		 * requeue this lock. */
 		mlog(0, "Lockres %s, skip convert\n", lockres->l_name);
 		return 0;
 	}
 	/* were we in a convert when we got the bast fire? */
 	BUG_ON(lockres->l_action != OCFS2_AST_CONVERT &&
 	       lockres->l_action != OCFS2_AST_DOWNCONVERT);
 	/* set things up for the unlockast to know to just
 	 * clear out the ast_action and unset busy, etc. */
 	lockres->l_unlock_action = OCFS2_UNLOCK_CANCEL_CONVERT;
 	mlog_bug_on_msg(!(lockres->l_flags & OCFS2_LOCK_BUSY),
 			"lock %s, invalid flags: 0x%lx\n",
 			lockres->l_name, lockres->l_flags);
 	return 1;
 }
 static int ocfs2_cancel_convert(struct ocfs2_super *osb,
 				struct ocfs2_lock_res *lockres)
 {
 	int ret;
 	mlog_entry_void();
 	mlog(0, "lock %s\n", lockres->l_name);
 	ret = ocfs2_dlm_unlock(osb->cconn, &lockres->l_lksb,
 			       DLM_LKF_CANCEL, lockres);
 	if (ret) {
 		ocfs2_log_dlm_error("ocfs2_dlm_unlock", ret, lockres);
 		ocfs2_recover_from_dlm_error(lockres, 0);
 	}
 	mlog(0, "lock %s return from ocfs2_dlm_unlock\n", lockres->l_name);
 	mlog_exit(ret);
 	return ret;
 }
 static int ocfs2_unblock_lock(struct ocfs2_super *osb,
 			      struct ocfs2_lock_res *lockres,
 			      struct ocfs2_unblock_ctl *ctl)
 {
 	unsigned long flags;
 	int blocking;
 	int new_level;
 	int ret = 0;
 	int set_lvb = 0;
 	unsigned int gen;
 	mlog_entry_void();
 	spin_lock_irqsave(&lockres->l_lock, flags);
 	BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BLOCKED));
 recheck:
 	if (lockres->l_flags & OCFS2_LOCK_BUSY) {
 		/* XXX
 		 * This is a *big* race.  The OCFS2_LOCK_PENDING flag
 		 * exists entirely for one reason - another thread has set
 		 * OCFS2_LOCK_BUSY, but has *NOT* yet called dlm_lock().
 		 *
 		 * If we do ocfs2_cancel_convert() before the other thread
 		 * calls dlm_lock(), our cancel will do nothing.  We will
 		 * get no ast, and we will have no way of knowing the
 		 * cancel failed.  Meanwhile, the other thread will call
 		 * into dlm_lock() and wait...forever.
 		 *
 		 * Why forever?  Because another node has asked for the
 		 * lock first; that's why we're here in unblock_lock().
 		 *
 		 * The solution is OCFS2_LOCK_PENDING.  When PENDING is
 		 * set, we just requeue the unblock.  Only when the other
 		 * thread has called dlm_lock() and cleared PENDING will
 		 * we then cancel their request.
 		 *
 		 * All callers of dlm_lock() must set OCFS2_DLM_PENDING
 		 * at the same time they set OCFS2_DLM_BUSY.  They must
 		 * clear OCFS2_DLM_PENDING after dlm_lock() returns.
 		 */
 		if (lockres->l_flags & OCFS2_LOCK_PENDING)
 			goto leave_requeue;
 		ctl->requeue = 1;
 		ret = ocfs2_prepare_cancel_convert(osb, lockres);
 		spin_unlock_irqrestore(&lockres->l_lock, flags);
 		if (ret) {
 			ret = ocfs2_cancel_convert(osb, lockres);
 			if (ret < 0)
 				mlog_errno(ret);
 		}
 		goto leave;
 	}
 	/* if we're blocking an exclusive and we have *any* holders,
 	 * then requeue. */
 	if ((lockres->l_blocking == DLM_LOCK_EX)
 	    && (lockres->l_ex_holders || lockres->l_ro_holders))
 		goto leave_requeue;
 	/* If it's a PR we're blocking, then only
 	 * requeue if we've got any EX holders */
 	if (lockres->l_blocking == DLM_LOCK_PR &&
 	    lockres->l_ex_holders)
 		goto leave_requeue;
 	/*
 	 * Can we get a lock in this state if the holder counts are
 	 * zero? The meta data unblock code used to check this.
 	 */
 	if ((lockres->l_ops->flags & LOCK_TYPE_REQUIRES_REFRESH)
 	    && (lockres->l_flags & OCFS2_LOCK_REFRESHING))
 		goto leave_requeue;
 	new_level = ocfs2_highest_compat_lock_level(lockres->l_blocking);
 	if (lockres->l_ops->check_downconvert
 	    && !lockres->l_ops->check_downconvert(lockres, new_level))
 		goto leave_requeue;
 	/* If we get here, then we know that there are no more
 	 * incompatible holders (and anyone asking for an incompatible
 	 * lock is blocked). We can now downconvert the lock */
 	if (!lockres->l_ops->downconvert_worker)
 		goto downconvert;
 	/* Some lockres types want to do a bit of work before
 	 * downconverting a lock. Allow that here. The worker function
 	 * may sleep, so we save off a copy of what we're blocking as
 	 * it may change while we're not holding the spin lock. */
 	blocking = lockres->l_blocking;
 	spin_unlock_irqrestore(&lockres->l_lock, flags);
 	ctl->unblock_action = lockres->l_ops->downconvert_worker(lockres, blocking);
 	if (ctl->unblock_action == UNBLOCK_STOP_POST)
 		goto leave;
 	spin_lock_irqsave(&lockres->l_lock, flags);
 	if (blocking != lockres->l_blocking) {
 		/* If this changed underneath us, then we can't drop
 		 * it just yet. */
 		goto recheck;
 	}
 downconvert:
 	ctl->requeue = 0;
 	if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB) {
 		if (lockres->l_level == DLM_LOCK_EX)
 			set_lvb = 1;
 		/*
 		 * We only set the lvb if the lock has been fully
 		 * refreshed - otherwise we risk setting stale
 		 * data. Otherwise, there's no need to actually clear
 		 * out the lvb here as it's value is still valid.
 		 */
 		if (set_lvb && !(lockres->l_flags & OCFS2_LOCK_NEEDS_REFRESH))
 			lockres->l_ops->set_lvb(lockres);
 	}
 	gen = ocfs2_prepare_downconvert(lockres, new_level);
 	spin_unlock_irqrestore(&lockres->l_lock, flags);
 	ret = ocfs2_downconvert_lock(osb, lockres, new_level, set_lvb,
 				     gen);
 leave:
 	mlog_exit(ret);
 	return ret;
 leave_requeue:
 	spin_unlock_irqrestore(&lockres->l_lock, flags);
 	ctl->requeue = 1;
 	mlog_exit(0);
 	return 0;
 }
 static int ocfs2_data_convert_worker(struct ocfs2_lock_res *lockres,
 				     int blocking)
 {
 	struct inode *inode;
 	struct address_space *mapping;
        	inode = ocfs2_lock_res_inode(lockres);
 	mapping = inode->i_mapping;
 	if (!S_ISREG(inode->i_mode))
 		goto out;
 	/*
 	 * We need this before the filemap_fdatawrite() so that it can
 	 * transfer the dirty bit from the PTE to the
 	 * page. Unfortunately this means that even for EX->PR
 	 * downconverts, we'll lose our mappings and have to build
 	 * them up again.
 	 */
 	unmap_mapping_range(mapping, 0, 0, 0);
 	if (filemap_fdatawrite(mapping)) {
 		mlog(ML_ERROR, "Could not sync inode %llu for downconvert!",
 		     (unsigned long long)OCFS2_I(inode)->ip_blkno);
 	}
 	sync_mapping_buffers(mapping);
 	if (blocking == DLM_LOCK_EX) {
 		truncate_inode_pages(mapping, 0);
 	} else {
 		/* We only need to wait on the I/O if we're not also
 		 * truncating pages because truncate_inode_pages waits
 		 * for us above. We don't truncate pages if we're
 		 * blocking anything < EXMODE because we want to keep
 		 * them around in that case. */
 		filemap_fdatawait(mapping);
 	}
 out:
 	return UNBLOCK_CONTINUE;
 }
 static int ocfs2_check_meta_downconvert(struct ocfs2_lock_res *lockres,
 					int new_level)
 {
 	struct inode *inode = ocfs2_lock_res_inode(lockres);
 	int checkpointed = ocfs2_inode_fully_checkpointed(inode);
 	BUG_ON(new_level != DLM_LOCK_NL && new_level != DLM_LOCK_PR);
 	BUG_ON(lockres->l_level != DLM_LOCK_EX && !checkpointed);
 	if (checkpointed)
 		return 1;
 	ocfs2_start_checkpoint(OCFS2_SB(inode->i_sb));
 	return 0;
 }
 static void ocfs2_set_meta_lvb(struct ocfs2_lock_res *lockres)
 {
 	struct inode *inode = ocfs2_lock_res_inode(lockres);
 	__ocfs2_stuff_meta_lvb(inode);
 }
 /*
  * Does the final reference drop on our dentry lock. Right now this
  * happens in the downconvert thread, but we could choose to simplify the
  * dlmglue API and push these off to the ocfs2_wq in the future.
  */
 static void ocfs2_dentry_post_unlock(struct ocfs2_super *osb,
 				     struct ocfs2_lock_res *lockres)
 {
 	struct ocfs2_dentry_lock *dl = ocfs2_lock_res_dl(lockres);
 	ocfs2_dentry_lock_put(osb, dl);
 }
 /*
  * d_delete() matching dentries before the lock downconvert.
  *
  * At this point, any process waiting to destroy the
  * dentry_lock due to last ref count is stopped by the
  * OCFS2_LOCK_QUEUED flag.
  *
  * We have two potential problems
  *
  * 1) If we do the last reference drop on our dentry_lock (via dput)
  *    we'll wind up in ocfs2_release_dentry_lock(), waiting on
  *    the downconvert to finish. Instead we take an elevated
  *    reference and push the drop until after we've completed our
  *    unblock processing.
  *
  * 2) There might be another process with a final reference,
  *    waiting on us to finish processing. If this is the case, we
  *    detect it and exit out - there's no more dentries anyway.
  */
 static int ocfs2_dentry_convert_worker(struct ocfs2_lock_res *lockres,
 				       int blocking)
 {
 	struct ocfs2_dentry_lock *dl = ocfs2_lock_res_dl(lockres);
 	struct ocfs2_inode_info *oi = OCFS2_I(dl->dl_inode);
 	struct dentry *dentry;
 	unsigned long flags;
 	int extra_ref = 0;
 	/*
 	 * This node is blocking another node from getting a read
 	 * lock. This happens when we've renamed within a
 	 * directory. We've forced the other nodes to d_delete(), but
 	 * we never actually dropped our lock because it's still
 	 * valid. The downconvert code will retain a PR for this node,
 	 * so there's no further work to do.
 	 */
 	if (blocking == DLM_LOCK_PR)
 		return UNBLOCK_CONTINUE;
 	/*
 	 * Mark this inode as potentially orphaned. The code in
 	 * ocfs2_delete_inode() will figure out whether it actually
 	 * needs to be freed or not.
 	 */
 	spin_lock(&oi->ip_lock);
 	oi->ip_flags |= OCFS2_INODE_MAYBE_ORPHANED;
 	spin_unlock(&oi->ip_lock);
 	/*
 	 * Yuck. We need to make sure however that the check of
 	 * OCFS2_LOCK_FREEING and the extra reference are atomic with
 	 * respect to a reference decrement or the setting of that
 	 * flag.
 	 */
 	spin_lock_irqsave(&lockres->l_lock, flags);
 	spin_lock(&dentry_attach_lock);
 	if (!(lockres->l_flags & OCFS2_LOCK_FREEING)
 	    && dl->dl_count) {
 		dl->dl_count++;
 		extra_ref = 1;
 	}
 	spin_unlock(&dentry_attach_lock);
 	spin_unlock_irqrestore(&lockres->l_lock, flags);
 	mlog(0, "extra_ref = %d\n", extra_ref);
 	/*
 	 * We have a process waiting on us in ocfs2_dentry_iput(),
 	 * which means we can't have any more outstanding
 	 * aliases. There's no need to do any more work.
 	 */
 	if (!extra_ref)
 		return UNBLOCK_CONTINUE;
 	spin_lock(&dentry_attach_lock);
 	while (1) {
 		dentry = ocfs2_find_local_alias(dl->dl_inode,
 						dl->dl_parent_blkno, 1);
 		if (!dentry)
 			break;
 		spin_unlock(&dentry_attach_lock);
 		mlog(0, "d_delete(%.*s);\n", dentry->d_name.len,
 		     dentry->d_name.name);
 		/*
 		 * The following dcache calls may do an
 		 * iput(). Normally we don't want that from the
 		 * downconverting thread, but in this case it's ok
 		 * because the requesting node already has an
 		 * exclusive lock on the inode, so it can't be queued
 		 * for a downconvert.
 		 */
 		d_delete(dentry);
 		dput(dentry);
 		spin_lock(&dentry_attach_lock);
 	}
 	spin_unlock(&dentry_attach_lock);
 	/*
 	 * If we are the last holder of this dentry lock, there is no
 	 * reason to downconvert so skip straight to the unlock.
 	 */
 	if (dl->dl_count == 1)
 		return UNBLOCK_STOP_POST;
 	return UNBLOCK_CONTINUE_POST;
 }
 static void ocfs2_set_qinfo_lvb(struct ocfs2_lock_res *lockres)
 {
 	struct ocfs2_qinfo_lvb *lvb;
 	struct ocfs2_mem_dqinfo *oinfo = ocfs2_lock_res_qinfo(lockres);
 	struct mem_dqinfo *info = sb_dqinfo(oinfo->dqi_gi.dqi_sb,
 					    oinfo->dqi_gi.dqi_type);
 	mlog_entry_void();
 	lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
 	lvb->lvb_version = OCFS2_QINFO_LVB_VERSION;
 	lvb->lvb_bgrace = cpu_to_be32(info->dqi_bgrace);
 	lvb->lvb_igrace = cpu_to_be32(info->dqi_igrace);
 	lvb->lvb_syncms = cpu_to_be32(oinfo->dqi_syncms);
 	lvb->lvb_blocks = cpu_to_be32(oinfo->dqi_gi.dqi_blocks);
 	lvb->lvb_free_blk = cpu_to_be32(oinfo->dqi_gi.dqi_free_blk);
 	lvb->lvb_free_entry = cpu_to_be32(oinfo->dqi_gi.dqi_free_entry);
 	mlog_exit_void();
 }
 void ocfs2_qinfo_unlock(struct ocfs2_mem_dqinfo *oinfo, int ex)
 {
 	struct ocfs2_lock_res *lockres = &oinfo->dqi_gqlock;
 	struct ocfs2_super *osb = OCFS2_SB(oinfo->dqi_gi.dqi_sb);
 	int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
 	mlog_entry_void();
 	if (!ocfs2_is_hard_readonly(osb) && !ocfs2_mount_local(osb))
 		ocfs2_cluster_unlock(osb, lockres, level);
 	mlog_exit_void();
 }
 static int ocfs2_refresh_qinfo(struct ocfs2_mem_dqinfo *oinfo)
 {
 	struct mem_dqinfo *info = sb_dqinfo(oinfo->dqi_gi.dqi_sb,
 					    oinfo->dqi_gi.dqi_type);
 	struct ocfs2_lock_res *lockres = &oinfo->dqi_gqlock;
 	struct ocfs2_qinfo_lvb *lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
 	struct buffer_head *bh = NULL;
 	struct ocfs2_global_disk_dqinfo *gdinfo;
 	int status = 0;
 	if (ocfs2_dlm_lvb_valid(&lockres->l_lksb) &&
 	    lvb->lvb_version == OCFS2_QINFO_LVB_VERSION) {
 		info->dqi_bgrace = be32_to_cpu(lvb->lvb_bgrace);
 		info->dqi_igrace = be32_to_cpu(lvb->lvb_igrace);
 		oinfo->dqi_syncms = be32_to_cpu(lvb->lvb_syncms);
 		oinfo->dqi_gi.dqi_blocks = be32_to_cpu(lvb->lvb_blocks);
 		oinfo->dqi_gi.dqi_free_blk = be32_to_cpu(lvb->lvb_free_blk);
 		oinfo->dqi_gi.dqi_free_entry =
 					be32_to_cpu(lvb->lvb_free_entry);
 	} else {
 		status = ocfs2_read_quota_block(oinfo->dqi_gqinode, 0, &bh);
 		if (status) {
 			mlog_errno(status);
 			goto bail;
 		}
 		gdinfo = (struct ocfs2_global_disk_dqinfo *)
 					(bh->b_data + OCFS2_GLOBAL_INFO_OFF);
 		info->dqi_bgrace = le32_to_cpu(gdinfo->dqi_bgrace);
 		info->dqi_igrace = le32_to_cpu(gdinfo->dqi_igrace);
 		oinfo->dqi_syncms = le32_to_cpu(gdinfo->dqi_syncms);
 		oinfo->dqi_gi.dqi_blocks = le32_to_cpu(gdinfo->dqi_blocks);
 		oinfo->dqi_gi.dqi_free_blk = le32_to_cpu(gdinfo->dqi_free_blk);
 		oinfo->dqi_gi.dqi_free_entry =
 					le32_to_cpu(gdinfo->dqi_free_entry);
 		brelse(bh);
 		ocfs2_track_lock_refresh(lockres);
 	}
 bail:
 	return status;
 }
 /* Lock quota info, this function expects at least shared lock on the quota file
  * so that we can safely refresh quota info from disk. */
 int ocfs2_qinfo_lock(struct ocfs2_mem_dqinfo *oinfo, int ex)
 {
 	struct ocfs2_lock_res *lockres = &oinfo->dqi_gqlock;
 	struct ocfs2_super *osb = OCFS2_SB(oinfo->dqi_gi.dqi_sb);
 	int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
 	int status = 0;
 	mlog_entry_void();
 	/* On RO devices, locking really isn't needed... */
 	if (ocfs2_is_hard_readonly(osb)) {
 		if (ex)
 			status = -EROFS;
 		goto bail;
 	}
 	if (ocfs2_mount_local(osb))
 		goto bail;
 	status = ocfs2_cluster_lock(osb, lockres, level, 0, 0);
 	if (status < 0) {
 		mlog_errno(status);
 		goto bail;
 	}
 	if (!ocfs2_should_refresh_lock_res(lockres))
 		goto bail;
 	/* OK, we have the lock but we need to refresh the quota info */
 	status = ocfs2_refresh_qinfo(oinfo);
 	if (status)
 		ocfs2_qinfo_unlock(oinfo, ex);
 	ocfs2_complete_lock_res_refresh(lockres, status);
 bail:
 	mlog_exit(status);
 	return status;
 }
 /*
  * This is the filesystem locking protocol.  It provides the lock handling
  * hooks for the underlying DLM.  It has a maximum version number.
  * The version number allows interoperability with systems running at
  * the same major number and an equal or smaller minor number.
  *
  * Whenever the filesystem does new things with locks (adds or removes a
  * lock, orders them differently, does different things underneath a lock),
  * the version must be changed.  The protocol is negotiated when joining
  * the dlm domain.  A node may join the domain if its major version is
  * identical to all other nodes and its minor version is greater than
  * or equal to all other nodes.  When its minor version is greater than
  * the other nodes, it will run at the minor version specified by the
  * other nodes.
  *
  * If a locking change is made that will not be compatible with older
  * versions, the major number must be increased and the minor version set
  * to zero.  If a change merely adds a behavior that can be disabled when
  * speaking to older versions, the minor version must be increased.  If a
  * change adds a fully backwards compatible change (eg, LVB changes that
  * are just ignored by older versions), the version does not need to be
  * updated.
  */
 static struct ocfs2_locking_protocol lproto = {
 	.lp_max_version = {
 		.pv_major = OCFS2_LOCKING_PROTOCOL_MAJOR,
 		.pv_minor = OCFS2_LOCKING_PROTOCOL_MINOR,
 	},
 	.lp_lock_ast		= ocfs2_locking_ast,
 	.lp_blocking_ast	= ocfs2_blocking_ast,
 	.lp_unlock_ast		= ocfs2_unlock_ast,
 };
 void ocfs2_set_locking_protocol(void)
 {
 	ocfs2_stack_glue_set_locking_protocol(&lproto);
 }
 static void ocfs2_process_blocked_lock(struct ocfs2_super *osb,
 				       struct ocfs2_lock_res *lockres)
 {
 	int status;
 	struct ocfs2_unblock_ctl ctl = {0, 0,};
 	unsigned long flags;
 	/* Our reference to the lockres in this function can be
 	 * considered valid until we remove the OCFS2_LOCK_QUEUED
 	 * flag. */
 	mlog_entry_void();
 	BUG_ON(!lockres);
 	BUG_ON(!lockres->l_ops);
 	mlog(0, "lockres %s blocked.\n", lockres->l_name);
 	/* Detect whether a lock has been marked as going away while
 	 * the downconvert thread was processing other things. A lock can
 	 * still be marked with OCFS2_LOCK_FREEING after this check,
 	 * but short circuiting here will still save us some
 	 * performance. */
 	spin_lock_irqsave(&lockres->l_lock, flags);
 	if (lockres->l_flags & OCFS2_LOCK_FREEING)
 		goto unqueue;
 	spin_unlock_irqrestore(&lockres->l_lock, flags);
 	status = ocfs2_unblock_lock(osb, lockres, &ctl);
 	if (status < 0)
 		mlog_errno(status);
 	spin_lock_irqsave(&lockres->l_lock, flags);
 unqueue:
 	if (lockres->l_flags & OCFS2_LOCK_FREEING || !ctl.requeue) {
 		lockres_clear_flags(lockres, OCFS2_LOCK_QUEUED);
 	} else
 		ocfs2_schedule_blocked_lock(osb, lockres);
 	mlog(0, "lockres %s, requeue = %s.\n", lockres->l_name,
 	     ctl.requeue ? "yes" : "no");
 	spin_unlock_irqrestore(&lockres->l_lock, flags);
 	if (ctl.unblock_action != UNBLOCK_CONTINUE
 	    && lockres->l_ops->post_unlock)
 		lockres->l_ops->post_unlock(osb, lockres);
 	mlog_exit_void();
 }
 static void ocfs2_schedule_blocked_lock(struct ocfs2_super *osb,
 					struct ocfs2_lock_res *lockres)
 {
 	mlog_entry_void();
 	assert_spin_locked(&lockres->l_lock);
 	if (lockres->l_flags & OCFS2_LOCK_FREEING) {
 		/* Do not schedule a lock for downconvert when it's on
 		 * the way to destruction - any nodes wanting access
 		 * to the resource will get it soon. */
 		mlog(0, "Lockres %s won't be scheduled: flags 0x%lx\n",
 		     lockres->l_name, lockres->l_flags);
 		return;
 	}
 	lockres_or_flags(lockres, OCFS2_LOCK_QUEUED);
 	spin_lock(&osb->dc_task_lock);
 	if (list_empty(&lockres->l_blocked_list)) {
 		list_add_tail(&lockres->l_blocked_list,
 			      &osb->blocked_lock_list);
 		osb->blocked_lock_count++;
 	}
 	spin_unlock(&osb->dc_task_lock);
 	mlog_exit_void();
 }
 static void ocfs2_downconvert_thread_do_work(struct ocfs2_super *osb)
 {
 	unsigned long processed;
 	struct ocfs2_lock_res *lockres;
 	mlog_entry_void();
 	spin_lock(&osb->dc_task_lock);
 	/* grab this early so we know to try again if a state change and
 	 * wake happens part-way through our work  */
 	osb->dc_work_sequence = osb->dc_wake_sequence;
 	processed = osb->blocked_lock_count;
 	while (processed) {
 		BUG_ON(list_empty(&osb->blocked_lock_list));
 		lockres = list_entry(osb->blocked_lock_list.next,
 				     struct ocfs2_lock_res, l_blocked_list);
 		list_del_init(&lockres->l_blocked_list);
 		osb->blocked_lock_count--;
 		spin_unlock(&osb->dc_task_lock);
 		BUG_ON(!processed);
 		processed--;
 		ocfs2_process_blocked_lock(osb, lockres);
 		spin_lock(&osb->dc_task_lock);
 	}
 	spin_unlock(&osb->dc_task_lock);
 	mlog_exit_void();
 }
 static int ocfs2_downconvert_thread_lists_empty(struct ocfs2_super *osb)
 {
 	int empty = 0;
 	spin_lock(&osb->dc_task_lock);
 	if (list_empty(&osb->blocked_lock_list))
 		empty = 1;
 	spin_unlock(&osb->dc_task_lock);
 	return empty;
 }
 static int ocfs2_downconvert_thread_should_wake(struct ocfs2_super *osb)
 {
 	int should_wake = 0;
 	spin_lock(&osb->dc_task_lock);
 	if (osb->dc_work_sequence != osb->dc_wake_sequence)
 		should_wake = 1;
 	spin_unlock(&osb->dc_task_lock);
 	return should_wake;
 }
 static int ocfs2_downconvert_thread(void *arg)
 {
 	int status = 0;
 	struct ocfs2_super *osb = arg;
 	/* only quit once we've been asked to stop and there is no more
 	 * work available */
 	while (!(kthread_should_stop() &&
 		ocfs2_downconvert_thread_lists_empty(osb))) {
 		wait_event_interruptible(osb->dc_event,
 					 ocfs2_downconvert_thread_should_wake(osb) ||
 					 kthread_should_stop());
 		mlog(0, "downconvert_thread: awoken\n");
 		ocfs2_downconvert_thread_do_work(osb);
 	}
 	osb->dc_task = NULL;
 	return status;
 }
 void ocfs2_wake_downconvert_thread(struct ocfs2_super *osb)
 {
 	spin_lock(&osb->dc_task_lock);
 	/* make sure the voting thread gets a swipe at whatever changes
 	 * the caller may have made to the voting state */
 	osb->dc_wake_sequence++;
 	spin_unlock(&osb->dc_task_lock);