Blame view

fs/ceph/locks.c 9.87 KB
3d14c5d2b   Yehuda Sadeh   ceph: factor out ...
1
  #include <linux/ceph/ceph_debug.h>
40819f6fb   Greg Farnum   ceph: add flock/f...
2
3
4
  
  #include <linux/file.h>
  #include <linux/namei.h>
eb13e832f   Yan, Zheng   ceph: use fl->fl_...
5
  #include <linux/random.h>
40819f6fb   Greg Farnum   ceph: add flock/f...
6
7
8
  
  #include "super.h"
  #include "mds_client.h"
3d14c5d2b   Yehuda Sadeh   ceph: factor out ...
9
  #include <linux/ceph/pagelist.h>
40819f6fb   Greg Farnum   ceph: add flock/f...
10

eb13e832f   Yan, Zheng   ceph: use fl->fl_...
11
  static u64 lock_secret;
9280be24d   Yan, Zheng   ceph: fix file lo...
12
13
  static int ceph_lock_wait_for_completion(struct ceph_mds_client *mdsc,
                                           struct ceph_mds_request *req);
eb13e832f   Yan, Zheng   ceph: use fl->fl_...
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
  
  static inline u64 secure_addr(void *addr)
  {
  	u64 v = lock_secret ^ (u64)(unsigned long)addr;
  	/*
  	 * Set the most significant bit, so that MDS knows the 'owner'
  	 * is sufficient to identify the owner of lock. (old code uses
  	 * both 'owner' and 'pid')
  	 */
  	v |= (1ULL << 63);
  	return v;
  }
  
  void __init ceph_flock_init(void)
  {
  	get_random_bytes(&lock_secret, sizeof(lock_secret));
  }
40819f6fb   Greg Farnum   ceph: add flock/f...
31
32
33
34
  /**
   * Implement fcntl and flock locking functions.
   */
  static int ceph_lock_message(u8 lock_type, u16 operation, struct file *file,
637ae8d54   Herb Shiu   ceph: pass lock i...
35
  			     int cmd, u8 wait, struct file_lock *fl)
40819f6fb   Greg Farnum   ceph: add flock/f...
36
  {
496ad9aa8   Al Viro   new helper: file_...
37
  	struct inode *inode = file_inode(file);
eb13e832f   Yan, Zheng   ceph: use fl->fl_...
38
  	struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
40819f6fb   Greg Farnum   ceph: add flock/f...
39
40
  	struct ceph_mds_request *req;
  	int err;
637ae8d54   Herb Shiu   ceph: pass lock i...
41
  	u64 length = 0;
eb13e832f   Yan, Zheng   ceph: use fl->fl_...
42
  	u64 owner;
40819f6fb   Greg Farnum   ceph: add flock/f...
43

9280be24d   Yan, Zheng   ceph: fix file lo...
44
45
  	if (operation != CEPH_MDS_OP_SETFILELOCK || cmd == CEPH_LOCK_UNLOCK)
  		wait = 0;
40819f6fb   Greg Farnum   ceph: add flock/f...
46
47
48
  	req = ceph_mdsc_create_request(mdsc, operation, USE_AUTH_MDS);
  	if (IS_ERR(req))
  		return PTR_ERR(req);
70b666c3b   Sage Weil   ceph: use ihold w...
49
50
  	req->r_inode = inode;
  	ihold(inode);
3bd58143b   Yan, Zheng   ceph: reserve cap...
51
  	req->r_num_caps = 1;
40819f6fb   Greg Farnum   ceph: add flock/f...
52

637ae8d54   Herb Shiu   ceph: pass lock i...
53
54
55
56
57
  	/* mds requires start and length rather than start and end */
  	if (LLONG_MAX == fl->fl_end)
  		length = 0;
  	else
  		length = fl->fl_end - fl->fl_start + 1;
130d1f956   Jeff Layton   locks: ensure tha...
58
  	owner = secure_addr(fl->fl_owner);
eb13e832f   Yan, Zheng   ceph: use fl->fl_...
59
60
61
62
63
  
  	dout("ceph_lock_message: rule: %d, op: %d, owner: %llx, pid: %llu, "
  	     "start: %llu, length: %llu, wait: %d, type: %d", (int)lock_type,
  	     (int)operation, owner, (u64)fl->fl_pid, fl->fl_start, length,
  	     wait, fl->fl_type);
637ae8d54   Herb Shiu   ceph: pass lock i...
64

40819f6fb   Greg Farnum   ceph: add flock/f...
65
66
  	req->r_args.filelock_change.rule = lock_type;
  	req->r_args.filelock_change.type = cmd;
eb13e832f   Yan, Zheng   ceph: use fl->fl_...
67
  	req->r_args.filelock_change.owner = cpu_to_le64(owner);
637ae8d54   Herb Shiu   ceph: pass lock i...
68
  	req->r_args.filelock_change.pid = cpu_to_le64((u64)fl->fl_pid);
637ae8d54   Herb Shiu   ceph: pass lock i...
69
  	req->r_args.filelock_change.start = cpu_to_le64(fl->fl_start);
40819f6fb   Greg Farnum   ceph: add flock/f...
70
71
  	req->r_args.filelock_change.length = cpu_to_le64(length);
  	req->r_args.filelock_change.wait = wait;
9280be24d   Yan, Zheng   ceph: fix file lo...
72
73
  	if (wait)
  		req->r_wait_for_completion = ceph_lock_wait_for_completion;
40819f6fb   Greg Farnum   ceph: add flock/f...
74
  	err = ceph_mdsc_do_request(mdsc, inode, req);
a5b10629e   Herb Shiu   ceph: Behave bett...
75

eb13e832f   Yan, Zheng   ceph: use fl->fl_...
76
  	if (operation == CEPH_MDS_OP_GETFILELOCK) {
a5b10629e   Herb Shiu   ceph: Behave bett...
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
  		fl->fl_pid = le64_to_cpu(req->r_reply_info.filelock_reply->pid);
  		if (CEPH_LOCK_SHARED == req->r_reply_info.filelock_reply->type)
  			fl->fl_type = F_RDLCK;
  		else if (CEPH_LOCK_EXCL == req->r_reply_info.filelock_reply->type)
  			fl->fl_type = F_WRLCK;
  		else
  			fl->fl_type = F_UNLCK;
  
  		fl->fl_start = le64_to_cpu(req->r_reply_info.filelock_reply->start);
  		length = le64_to_cpu(req->r_reply_info.filelock_reply->start) +
  						 le64_to_cpu(req->r_reply_info.filelock_reply->length);
  		if (length >= 1)
  			fl->fl_end = length -1;
  		else
  			fl->fl_end = 0;
  
  	}
40819f6fb   Greg Farnum   ceph: add flock/f...
94
95
  	ceph_mdsc_put_request(req);
  	dout("ceph_lock_message: rule: %d, op: %d, pid: %llu, start: %llu, "
0c1f91f27   Sage Weil   ceph: unwind canc...
96
  	     "length: %llu, wait: %d, type: %d, err code %d", (int)lock_type,
637ae8d54   Herb Shiu   ceph: pass lock i...
97
98
  	     (int)operation, (u64)fl->fl_pid, fl->fl_start,
  	     length, wait, fl->fl_type, err);
40819f6fb   Greg Farnum   ceph: add flock/f...
99
100
  	return err;
  }
9280be24d   Yan, Zheng   ceph: fix file lo...
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
  static int ceph_lock_wait_for_completion(struct ceph_mds_client *mdsc,
                                           struct ceph_mds_request *req)
  {
  	struct ceph_mds_request *intr_req;
  	struct inode *inode = req->r_inode;
  	int err, lock_type;
  
  	BUG_ON(req->r_op != CEPH_MDS_OP_SETFILELOCK);
  	if (req->r_args.filelock_change.rule == CEPH_LOCK_FCNTL)
  		lock_type = CEPH_LOCK_FCNTL_INTR;
  	else if (req->r_args.filelock_change.rule == CEPH_LOCK_FLOCK)
  		lock_type = CEPH_LOCK_FLOCK_INTR;
  	else
  		BUG_ON(1);
  	BUG_ON(req->r_args.filelock_change.type == CEPH_LOCK_UNLOCK);
  
  	err = wait_for_completion_interruptible(&req->r_completion);
  	if (!err)
  		return 0;
  
  	dout("ceph_lock_wait_for_completion: request %llu was interrupted
  ",
  	     req->r_tid);
  
  	intr_req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_SETFILELOCK,
  					    USE_AUTH_MDS);
  	if (IS_ERR(intr_req))
  		return PTR_ERR(intr_req);
  
  	intr_req->r_inode = inode;
  	ihold(inode);
  	intr_req->r_num_caps = 1;
  
  	intr_req->r_args.filelock_change = req->r_args.filelock_change;
  	intr_req->r_args.filelock_change.rule = lock_type;
  	intr_req->r_args.filelock_change.type = CEPH_LOCK_UNLOCK;
  
  	err = ceph_mdsc_do_request(mdsc, inode, intr_req);
  	ceph_mdsc_put_request(intr_req);
  
  	if (err && err != -ERESTARTSYS)
  		return err;
  
  	wait_for_completion(&req->r_completion);
  	return 0;
  }
40819f6fb   Greg Farnum   ceph: add flock/f...
147
148
149
150
151
152
  /**
   * Attempt to set an fcntl lock.
   * For now, this just goes away to the server. Later it may be more awesome.
   */
  int ceph_lock(struct file *file, int cmd, struct file_lock *fl)
  {
40819f6fb   Greg Farnum   ceph: add flock/f...
153
154
155
156
  	u8 lock_cmd;
  	int err;
  	u8 wait = 0;
  	u16 op = CEPH_MDS_OP_SETFILELOCK;
eb70c0ce4   Yan, Zheng   ceph: forbid mand...
157
158
159
160
161
  	if (!(fl->fl_flags & FL_POSIX))
  		return -ENOLCK;
  	/* No mandatory locks */
  	if (__mandatory_lock(file->f_mapping->host) && fl->fl_type != F_UNLCK)
  		return -ENOLCK;
eb13e832f   Yan, Zheng   ceph: use fl->fl_...
162
  	dout("ceph_lock, fl_owner: %p", fl->fl_owner);
40819f6fb   Greg Farnum   ceph: add flock/f...
163
164
  
  	/* set wait bit as appropriate, then make command as Ceph expects it*/
0e8e95d6d   Yan, Zheng   ceph: use fl->fl_...
165
  	if (IS_GETLK(cmd))
40819f6fb   Greg Farnum   ceph: add flock/f...
166
  		op = CEPH_MDS_OP_GETFILELOCK;
0e8e95d6d   Yan, Zheng   ceph: use fl->fl_...
167
168
  	else if (IS_SETLKW(cmd))
  		wait = 1;
40819f6fb   Greg Farnum   ceph: add flock/f...
169
170
171
172
173
174
175
  
  	if (F_RDLCK == fl->fl_type)
  		lock_cmd = CEPH_LOCK_SHARED;
  	else if (F_WRLCK == fl->fl_type)
  		lock_cmd = CEPH_LOCK_EXCL;
  	else
  		lock_cmd = CEPH_LOCK_UNLOCK;
637ae8d54   Herb Shiu   ceph: pass lock i...
176
  	err = ceph_lock_message(CEPH_LOCK_FCNTL, op, file, lock_cmd, wait, fl);
40819f6fb   Greg Farnum   ceph: add flock/f...
177
  	if (!err) {
eb13e832f   Yan, Zheng   ceph: use fl->fl_...
178
  		if (op != CEPH_MDS_OP_GETFILELOCK) {
a5b10629e   Herb Shiu   ceph: Behave bett...
179
180
181
  			dout("mds locked, locking locally");
  			err = posix_lock_file(file, fl, NULL);
  			if (err && (CEPH_MDS_OP_SETFILELOCK == op)) {
0c1f91f27   Sage Weil   ceph: unwind canc...
182
183
184
  				/* undo! This should only happen if
  				 * the kernel detects local
  				 * deadlock. */
a5b10629e   Herb Shiu   ceph: Behave bett...
185
186
  				ceph_lock_message(CEPH_LOCK_FCNTL, op, file,
  						  CEPH_LOCK_UNLOCK, 0, fl);
0c1f91f27   Sage Weil   ceph: unwind canc...
187
188
  				dout("got %d on posix_lock_file, undid lock",
  				     err);
a5b10629e   Herb Shiu   ceph: Behave bett...
189
  			}
40819f6fb   Greg Farnum   ceph: add flock/f...
190
  		}
40819f6fb   Greg Farnum   ceph: add flock/f...
191
192
193
194
195
196
  	}
  	return err;
  }
  
  int ceph_flock(struct file *file, int cmd, struct file_lock *fl)
  {
40819f6fb   Greg Farnum   ceph: add flock/f...
197
198
  	u8 lock_cmd;
  	int err;
0e8e95d6d   Yan, Zheng   ceph: use fl->fl_...
199
  	u8 wait = 0;
40819f6fb   Greg Farnum   ceph: add flock/f...
200

eb70c0ce4   Yan, Zheng   ceph: forbid mand...
201
202
203
204
205
  	if (!(fl->fl_flags & FL_FLOCK))
  		return -ENOLCK;
  	/* No mandatory locks */
  	if (__mandatory_lock(file->f_mapping->host) && fl->fl_type != F_UNLCK)
  		return -ENOLCK;
eb13e832f   Yan, Zheng   ceph: use fl->fl_...
206
  	dout("ceph_flock, fl_file: %p", fl->fl_file);
40819f6fb   Greg Farnum   ceph: add flock/f...
207

0e8e95d6d   Yan, Zheng   ceph: use fl->fl_...
208
209
210
211
  	if (IS_SETLKW(cmd))
  		wait = 1;
  
  	if (F_RDLCK == fl->fl_type)
40819f6fb   Greg Farnum   ceph: add flock/f...
212
  		lock_cmd = CEPH_LOCK_SHARED;
0e8e95d6d   Yan, Zheng   ceph: use fl->fl_...
213
  	else if (F_WRLCK == fl->fl_type)
40819f6fb   Greg Farnum   ceph: add flock/f...
214
215
216
  		lock_cmd = CEPH_LOCK_EXCL;
  	else
  		lock_cmd = CEPH_LOCK_UNLOCK;
40819f6fb   Greg Farnum   ceph: add flock/f...
217
218
  
  	err = ceph_lock_message(CEPH_LOCK_FLOCK, CEPH_MDS_OP_SETFILELOCK,
637ae8d54   Herb Shiu   ceph: pass lock i...
219
  				file, lock_cmd, wait, fl);
40819f6fb   Greg Farnum   ceph: add flock/f...
220
  	if (!err) {
4f6563677   Benjamin Coddington   Move locks API us...
221
  		err = locks_lock_file_wait(file, fl);
40819f6fb   Greg Farnum   ceph: add flock/f...
222
223
224
  		if (err) {
  			ceph_lock_message(CEPH_LOCK_FLOCK,
  					  CEPH_MDS_OP_SETFILELOCK,
637ae8d54   Herb Shiu   ceph: pass lock i...
225
  					  file, CEPH_LOCK_UNLOCK, 0, fl);
4f6563677   Benjamin Coddington   Move locks API us...
226
  			dout("got %d on locks_lock_file_wait, undid lock", err);
40819f6fb   Greg Farnum   ceph: add flock/f...
227
  		}
40819f6fb   Greg Farnum   ceph: add flock/f...
228
229
230
  	}
  	return err;
  }
5263e31e4   Jeff Layton   locks: move flock...
231
232
233
  /*
   * Fills in the passed counter variables, so you can prepare pagelist metadata
   * before calling ceph_encode_locks.
40819f6fb   Greg Farnum   ceph: add flock/f...
234
235
236
   */
  void ceph_count_locks(struct inode *inode, int *fcntl_count, int *flock_count)
  {
e084c1bd4   Jeff Layton   Revert "locks: ke...
237
  	struct file_lock *lock;
5263e31e4   Jeff Layton   locks: move flock...
238
  	struct file_lock_context *ctx;
40819f6fb   Greg Farnum   ceph: add flock/f...
239
240
241
  
  	*fcntl_count = 0;
  	*flock_count = 0;
5263e31e4   Jeff Layton   locks: move flock...
242
243
  	ctx = inode->i_flctx;
  	if (ctx) {
e084c1bd4   Jeff Layton   Revert "locks: ke...
244
245
246
247
248
249
  		spin_lock(&ctx->flc_lock);
  		list_for_each_entry(lock, &ctx->flc_posix, fl_list)
  			++(*fcntl_count);
  		list_for_each_entry(lock, &ctx->flc_flock, fl_list)
  			++(*flock_count);
  		spin_unlock(&ctx->flc_lock);
40819f6fb   Greg Farnum   ceph: add flock/f...
250
251
252
253
254
255
  	}
  	dout("counted %d flock locks and %d fcntl locks",
  	     *flock_count, *fcntl_count);
  }
  
  /**
39be95e9c   Jim Schutt   ceph: ceph_pageli...
256
   * Encode the flock and fcntl locks for the given inode into the ceph_filelock
1c8c601a8   Jeff Layton   locks: protect mo...
257
   * array. Must be called with inode->i_lock already held.
39be95e9c   Jim Schutt   ceph: ceph_pageli...
258
   * If we encounter more of a specific lock type than expected, return -ENOSPC.
40819f6fb   Greg Farnum   ceph: add flock/f...
259
   */
39be95e9c   Jim Schutt   ceph: ceph_pageli...
260
261
262
  int ceph_encode_locks_to_buffer(struct inode *inode,
  				struct ceph_filelock *flocks,
  				int num_fcntl_locks, int num_flock_locks)
40819f6fb   Greg Farnum   ceph: add flock/f...
263
264
  {
  	struct file_lock *lock;
bd61e0a9c   Jeff Layton   locks: convert po...
265
  	struct file_lock_context *ctx = inode->i_flctx;
40819f6fb   Greg Farnum   ceph: add flock/f...
266
  	int err = 0;
fca4451ac   Greg Farnum   ceph: preallocate...
267
268
  	int seen_fcntl = 0;
  	int seen_flock = 0;
39be95e9c   Jim Schutt   ceph: ceph_pageli...
269
  	int l = 0;
40819f6fb   Greg Farnum   ceph: add flock/f...
270
271
272
  
  	dout("encoding %d flock and %d fcntl locks", num_flock_locks,
  	     num_fcntl_locks);
39be95e9c   Jim Schutt   ceph: ceph_pageli...
273

bd61e0a9c   Jeff Layton   locks: convert po...
274
275
  	if (!ctx)
  		return 0;
6109c8503   Jeff Layton   locks: add a dedi...
276
  	spin_lock(&ctx->flc_lock);
f6762cb2c   Yan, Zheng   ceph: fix ceph_en...
277
  	list_for_each_entry(lock, &ctx->flc_posix, fl_list) {
bd61e0a9c   Jeff Layton   locks: convert po...
278
279
280
281
  		++seen_fcntl;
  		if (seen_fcntl > num_fcntl_locks) {
  			err = -ENOSPC;
  			goto fail;
40819f6fb   Greg Farnum   ceph: add flock/f...
282
  		}
bd61e0a9c   Jeff Layton   locks: convert po...
283
284
285
286
  		err = lock_to_ceph_filelock(lock, &flocks[l]);
  		if (err)
  			goto fail;
  		++l;
40819f6fb   Greg Farnum   ceph: add flock/f...
287
  	}
bd61e0a9c   Jeff Layton   locks: convert po...
288
289
290
291
292
  	list_for_each_entry(lock, &ctx->flc_flock, fl_list) {
  		++seen_flock;
  		if (seen_flock > num_flock_locks) {
  			err = -ENOSPC;
  			goto fail;
40819f6fb   Greg Farnum   ceph: add flock/f...
293
  		}
bd61e0a9c   Jeff Layton   locks: convert po...
294
295
296
297
  		err = lock_to_ceph_filelock(lock, &flocks[l]);
  		if (err)
  			goto fail;
  		++l;
40819f6fb   Greg Farnum   ceph: add flock/f...
298
299
  	}
  fail:
6109c8503   Jeff Layton   locks: add a dedi...
300
  	spin_unlock(&ctx->flc_lock);
40819f6fb   Greg Farnum   ceph: add flock/f...
301
302
  	return err;
  }
39be95e9c   Jim Schutt   ceph: ceph_pageli...
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
  /**
   * Copy the encoded flock and fcntl locks into the pagelist.
   * Format is: #fcntl locks, sequential fcntl locks, #flock locks,
   * sequential flock locks.
   * Returns zero on success.
   */
  int ceph_locks_to_pagelist(struct ceph_filelock *flocks,
  			   struct ceph_pagelist *pagelist,
  			   int num_fcntl_locks, int num_flock_locks)
  {
  	int err = 0;
  	__le32 nlocks;
  
  	nlocks = cpu_to_le32(num_fcntl_locks);
  	err = ceph_pagelist_append(pagelist, &nlocks, sizeof(nlocks));
  	if (err)
  		goto out_fail;
  
  	err = ceph_pagelist_append(pagelist, flocks,
  				   num_fcntl_locks * sizeof(*flocks));
  	if (err)
  		goto out_fail;
  
  	nlocks = cpu_to_le32(num_flock_locks);
  	err = ceph_pagelist_append(pagelist, &nlocks, sizeof(nlocks));
  	if (err)
  		goto out_fail;
  
  	err = ceph_pagelist_append(pagelist,
  				   &flocks[num_fcntl_locks],
  				   num_flock_locks * sizeof(*flocks));
  out_fail:
  	return err;
  }
40819f6fb   Greg Farnum   ceph: add flock/f...
337
338
339
340
341
342
343
  /*
   * Given a pointer to a lock, convert it to a ceph filelock
   */
  int lock_to_ceph_filelock(struct file_lock *lock,
  			  struct ceph_filelock *cephlock)
  {
  	int err = 0;
40819f6fb   Greg Farnum   ceph: add flock/f...
344
345
346
  	cephlock->start = cpu_to_le64(lock->fl_start);
  	cephlock->length = cpu_to_le64(lock->fl_end - lock->fl_start + 1);
  	cephlock->client = cpu_to_le64(0);
eb13e832f   Yan, Zheng   ceph: use fl->fl_...
347
  	cephlock->pid = cpu_to_le64((u64)lock->fl_pid);
130d1f956   Jeff Layton   locks: ensure tha...
348
  	cephlock->owner = cpu_to_le64(secure_addr(lock->fl_owner));
40819f6fb   Greg Farnum   ceph: add flock/f...
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
  
  	switch (lock->fl_type) {
  	case F_RDLCK:
  		cephlock->type = CEPH_LOCK_SHARED;
  		break;
  	case F_WRLCK:
  		cephlock->type = CEPH_LOCK_EXCL;
  		break;
  	case F_UNLCK:
  		cephlock->type = CEPH_LOCK_UNLOCK;
  		break;
  	default:
  		dout("Have unknown lock type %d", lock->fl_type);
  		err = -EINVAL;
  	}
  
  	return err;
  }