Blame view

fs/ceph/locks.c 9.81 KB
3d14c5d2b   Yehuda Sadeh   ceph: factor out ...
1
  #include <linux/ceph/ceph_debug.h>
40819f6fb   Greg Farnum   ceph: add flock/f...
2
3
4
  
  #include <linux/file.h>
  #include <linux/namei.h>
eb13e832f   Yan, Zheng   ceph: use fl->fl_...
5
  #include <linux/random.h>
40819f6fb   Greg Farnum   ceph: add flock/f...
6
7
8
  
  #include "super.h"
  #include "mds_client.h"
3d14c5d2b   Yehuda Sadeh   ceph: factor out ...
9
  #include <linux/ceph/pagelist.h>
40819f6fb   Greg Farnum   ceph: add flock/f...
10

eb13e832f   Yan, Zheng   ceph: use fl->fl_...
11
  static u64 lock_secret;
9280be24d   Yan, Zheng   ceph: fix file lo...
12
13
  static int ceph_lock_wait_for_completion(struct ceph_mds_client *mdsc,
                                           struct ceph_mds_request *req);
eb13e832f   Yan, Zheng   ceph: use fl->fl_...
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
  
  static inline u64 secure_addr(void *addr)
  {
  	u64 v = lock_secret ^ (u64)(unsigned long)addr;
  	/*
  	 * Set the most significant bit, so that MDS knows the 'owner'
  	 * is sufficient to identify the owner of lock. (old code uses
  	 * both 'owner' and 'pid')
  	 */
  	v |= (1ULL << 63);
  	return v;
  }
  
  void __init ceph_flock_init(void)
  {
  	get_random_bytes(&lock_secret, sizeof(lock_secret));
  }
40819f6fb   Greg Farnum   ceph: add flock/f...
31
32
33
34
  /**
   * Implement fcntl and flock locking functions.
   */
  static int ceph_lock_message(u8 lock_type, u16 operation, struct file *file,
637ae8d54   Herb Shiu   ceph: pass lock i...
35
  			     int cmd, u8 wait, struct file_lock *fl)
40819f6fb   Greg Farnum   ceph: add flock/f...
36
  {
496ad9aa8   Al Viro   new helper: file_...
37
  	struct inode *inode = file_inode(file);
eb13e832f   Yan, Zheng   ceph: use fl->fl_...
38
  	struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
40819f6fb   Greg Farnum   ceph: add flock/f...
39
40
  	struct ceph_mds_request *req;
  	int err;
637ae8d54   Herb Shiu   ceph: pass lock i...
41
  	u64 length = 0;
eb13e832f   Yan, Zheng   ceph: use fl->fl_...
42
  	u64 owner;
40819f6fb   Greg Farnum   ceph: add flock/f...
43

9280be24d   Yan, Zheng   ceph: fix file lo...
44
45
  	if (operation != CEPH_MDS_OP_SETFILELOCK || cmd == CEPH_LOCK_UNLOCK)
  		wait = 0;
40819f6fb   Greg Farnum   ceph: add flock/f...
46
47
48
  	req = ceph_mdsc_create_request(mdsc, operation, USE_AUTH_MDS);
  	if (IS_ERR(req))
  		return PTR_ERR(req);
70b666c3b   Sage Weil   ceph: use ihold w...
49
50
  	req->r_inode = inode;
  	ihold(inode);
3bd58143b   Yan, Zheng   ceph: reserve cap...
51
  	req->r_num_caps = 1;
40819f6fb   Greg Farnum   ceph: add flock/f...
52

637ae8d54   Herb Shiu   ceph: pass lock i...
53
54
55
56
57
  	/* mds requires start and length rather than start and end */
  	if (LLONG_MAX == fl->fl_end)
  		length = 0;
  	else
  		length = fl->fl_end - fl->fl_start + 1;
130d1f956   Jeff Layton   locks: ensure tha...
58
  	owner = secure_addr(fl->fl_owner);
eb13e832f   Yan, Zheng   ceph: use fl->fl_...
59
60
61
62
63
  
  	dout("ceph_lock_message: rule: %d, op: %d, owner: %llx, pid: %llu, "
  	     "start: %llu, length: %llu, wait: %d, type: %d", (int)lock_type,
  	     (int)operation, owner, (u64)fl->fl_pid, fl->fl_start, length,
  	     wait, fl->fl_type);
637ae8d54   Herb Shiu   ceph: pass lock i...
64

40819f6fb   Greg Farnum   ceph: add flock/f...
65
66
  	req->r_args.filelock_change.rule = lock_type;
  	req->r_args.filelock_change.type = cmd;
eb13e832f   Yan, Zheng   ceph: use fl->fl_...
67
  	req->r_args.filelock_change.owner = cpu_to_le64(owner);
637ae8d54   Herb Shiu   ceph: pass lock i...
68
  	req->r_args.filelock_change.pid = cpu_to_le64((u64)fl->fl_pid);
637ae8d54   Herb Shiu   ceph: pass lock i...
69
  	req->r_args.filelock_change.start = cpu_to_le64(fl->fl_start);
40819f6fb   Greg Farnum   ceph: add flock/f...
70
71
  	req->r_args.filelock_change.length = cpu_to_le64(length);
  	req->r_args.filelock_change.wait = wait;
9280be24d   Yan, Zheng   ceph: fix file lo...
72
73
  	if (wait)
  		req->r_wait_for_completion = ceph_lock_wait_for_completion;
40819f6fb   Greg Farnum   ceph: add flock/f...
74
  	err = ceph_mdsc_do_request(mdsc, inode, req);
a5b10629e   Herb Shiu   ceph: Behave bett...
75

eb13e832f   Yan, Zheng   ceph: use fl->fl_...
76
  	if (operation == CEPH_MDS_OP_GETFILELOCK) {
a5b10629e   Herb Shiu   ceph: Behave bett...
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
  		fl->fl_pid = le64_to_cpu(req->r_reply_info.filelock_reply->pid);
  		if (CEPH_LOCK_SHARED == req->r_reply_info.filelock_reply->type)
  			fl->fl_type = F_RDLCK;
  		else if (CEPH_LOCK_EXCL == req->r_reply_info.filelock_reply->type)
  			fl->fl_type = F_WRLCK;
  		else
  			fl->fl_type = F_UNLCK;
  
  		fl->fl_start = le64_to_cpu(req->r_reply_info.filelock_reply->start);
  		length = le64_to_cpu(req->r_reply_info.filelock_reply->start) +
  						 le64_to_cpu(req->r_reply_info.filelock_reply->length);
  		if (length >= 1)
  			fl->fl_end = length -1;
  		else
  			fl->fl_end = 0;
  
  	}
40819f6fb   Greg Farnum   ceph: add flock/f...
94
95
  	ceph_mdsc_put_request(req);
  	dout("ceph_lock_message: rule: %d, op: %d, pid: %llu, start: %llu, "
0c1f91f27   Sage Weil   ceph: unwind canc...
96
  	     "length: %llu, wait: %d, type: %d, err code %d", (int)lock_type,
637ae8d54   Herb Shiu   ceph: pass lock i...
97
98
  	     (int)operation, (u64)fl->fl_pid, fl->fl_start,
  	     length, wait, fl->fl_type, err);
40819f6fb   Greg Farnum   ceph: add flock/f...
99
100
  	return err;
  }
9280be24d   Yan, Zheng   ceph: fix file lo...
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
  static int ceph_lock_wait_for_completion(struct ceph_mds_client *mdsc,
                                           struct ceph_mds_request *req)
  {
  	struct ceph_mds_request *intr_req;
  	struct inode *inode = req->r_inode;
  	int err, lock_type;
  
  	BUG_ON(req->r_op != CEPH_MDS_OP_SETFILELOCK);
  	if (req->r_args.filelock_change.rule == CEPH_LOCK_FCNTL)
  		lock_type = CEPH_LOCK_FCNTL_INTR;
  	else if (req->r_args.filelock_change.rule == CEPH_LOCK_FLOCK)
  		lock_type = CEPH_LOCK_FLOCK_INTR;
  	else
  		BUG_ON(1);
  	BUG_ON(req->r_args.filelock_change.type == CEPH_LOCK_UNLOCK);
  
  	err = wait_for_completion_interruptible(&req->r_completion);
  	if (!err)
  		return 0;
  
  	dout("ceph_lock_wait_for_completion: request %llu was interrupted
  ",
  	     req->r_tid);
  
  	intr_req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_SETFILELOCK,
  					    USE_AUTH_MDS);
  	if (IS_ERR(intr_req))
  		return PTR_ERR(intr_req);
  
  	intr_req->r_inode = inode;
  	ihold(inode);
  	intr_req->r_num_caps = 1;
  
  	intr_req->r_args.filelock_change = req->r_args.filelock_change;
  	intr_req->r_args.filelock_change.rule = lock_type;
  	intr_req->r_args.filelock_change.type = CEPH_LOCK_UNLOCK;
  
  	err = ceph_mdsc_do_request(mdsc, inode, intr_req);
  	ceph_mdsc_put_request(intr_req);
  
  	if (err && err != -ERESTARTSYS)
  		return err;
  
  	wait_for_completion(&req->r_completion);
  	return 0;
  }
40819f6fb   Greg Farnum   ceph: add flock/f...
147
148
149
150
151
152
  /**
   * Attempt to set an fcntl lock.
   * For now, this just goes away to the server. Later it may be more awesome.
   */
  int ceph_lock(struct file *file, int cmd, struct file_lock *fl)
  {
40819f6fb   Greg Farnum   ceph: add flock/f...
153
154
155
156
  	u8 lock_cmd;
  	int err;
  	u8 wait = 0;
  	u16 op = CEPH_MDS_OP_SETFILELOCK;
eb70c0ce4   Yan, Zheng   ceph: forbid mand...
157
158
159
160
161
  	if (!(fl->fl_flags & FL_POSIX))
  		return -ENOLCK;
  	/* No mandatory locks */
  	if (__mandatory_lock(file->f_mapping->host) && fl->fl_type != F_UNLCK)
  		return -ENOLCK;
eb13e832f   Yan, Zheng   ceph: use fl->fl_...
162
  	dout("ceph_lock, fl_owner: %p", fl->fl_owner);
40819f6fb   Greg Farnum   ceph: add flock/f...
163
164
  
  	/* set wait bit as appropriate, then make command as Ceph expects it*/
0e8e95d6d   Yan, Zheng   ceph: use fl->fl_...
165
  	if (IS_GETLK(cmd))
40819f6fb   Greg Farnum   ceph: add flock/f...
166
  		op = CEPH_MDS_OP_GETFILELOCK;
0e8e95d6d   Yan, Zheng   ceph: use fl->fl_...
167
168
  	else if (IS_SETLKW(cmd))
  		wait = 1;
40819f6fb   Greg Farnum   ceph: add flock/f...
169
170
171
172
173
174
175
  
  	if (F_RDLCK == fl->fl_type)
  		lock_cmd = CEPH_LOCK_SHARED;
  	else if (F_WRLCK == fl->fl_type)
  		lock_cmd = CEPH_LOCK_EXCL;
  	else
  		lock_cmd = CEPH_LOCK_UNLOCK;
637ae8d54   Herb Shiu   ceph: pass lock i...
176
  	err = ceph_lock_message(CEPH_LOCK_FCNTL, op, file, lock_cmd, wait, fl);
40819f6fb   Greg Farnum   ceph: add flock/f...
177
  	if (!err) {
eb13e832f   Yan, Zheng   ceph: use fl->fl_...
178
  		if (op != CEPH_MDS_OP_GETFILELOCK) {
a5b10629e   Herb Shiu   ceph: Behave bett...
179
180
181
  			dout("mds locked, locking locally");
  			err = posix_lock_file(file, fl, NULL);
  			if (err && (CEPH_MDS_OP_SETFILELOCK == op)) {
0c1f91f27   Sage Weil   ceph: unwind canc...
182
183
184
  				/* undo! This should only happen if
  				 * the kernel detects local
  				 * deadlock. */
a5b10629e   Herb Shiu   ceph: Behave bett...
185
186
  				ceph_lock_message(CEPH_LOCK_FCNTL, op, file,
  						  CEPH_LOCK_UNLOCK, 0, fl);
0c1f91f27   Sage Weil   ceph: unwind canc...
187
188
  				dout("got %d on posix_lock_file, undid lock",
  				     err);
a5b10629e   Herb Shiu   ceph: Behave bett...
189
  			}
40819f6fb   Greg Farnum   ceph: add flock/f...
190
  		}
40819f6fb   Greg Farnum   ceph: add flock/f...
191
192
193
194
195
196
  	}
  	return err;
  }
  
  int ceph_flock(struct file *file, int cmd, struct file_lock *fl)
  {
40819f6fb   Greg Farnum   ceph: add flock/f...
197
198
  	u8 lock_cmd;
  	int err;
0e8e95d6d   Yan, Zheng   ceph: use fl->fl_...
199
  	u8 wait = 0;
40819f6fb   Greg Farnum   ceph: add flock/f...
200

eb70c0ce4   Yan, Zheng   ceph: forbid mand...
201
202
203
204
205
  	if (!(fl->fl_flags & FL_FLOCK))
  		return -ENOLCK;
  	/* No mandatory locks */
  	if (__mandatory_lock(file->f_mapping->host) && fl->fl_type != F_UNLCK)
  		return -ENOLCK;
eb13e832f   Yan, Zheng   ceph: use fl->fl_...
206
  	dout("ceph_flock, fl_file: %p", fl->fl_file);
40819f6fb   Greg Farnum   ceph: add flock/f...
207

0e8e95d6d   Yan, Zheng   ceph: use fl->fl_...
208
209
210
211
  	if (IS_SETLKW(cmd))
  		wait = 1;
  
  	if (F_RDLCK == fl->fl_type)
40819f6fb   Greg Farnum   ceph: add flock/f...
212
  		lock_cmd = CEPH_LOCK_SHARED;
0e8e95d6d   Yan, Zheng   ceph: use fl->fl_...
213
  	else if (F_WRLCK == fl->fl_type)
40819f6fb   Greg Farnum   ceph: add flock/f...
214
215
216
  		lock_cmd = CEPH_LOCK_EXCL;
  	else
  		lock_cmd = CEPH_LOCK_UNLOCK;
40819f6fb   Greg Farnum   ceph: add flock/f...
217
218
  
  	err = ceph_lock_message(CEPH_LOCK_FLOCK, CEPH_MDS_OP_SETFILELOCK,
637ae8d54   Herb Shiu   ceph: pass lock i...
219
  				file, lock_cmd, wait, fl);
40819f6fb   Greg Farnum   ceph: add flock/f...
220
221
222
223
224
  	if (!err) {
  		err = flock_lock_file_wait(file, fl);
  		if (err) {
  			ceph_lock_message(CEPH_LOCK_FLOCK,
  					  CEPH_MDS_OP_SETFILELOCK,
637ae8d54   Herb Shiu   ceph: pass lock i...
225
  					  file, CEPH_LOCK_UNLOCK, 0, fl);
40819f6fb   Greg Farnum   ceph: add flock/f...
226
227
  			dout("got %d on flock_lock_file_wait, undid lock", err);
  		}
40819f6fb   Greg Farnum   ceph: add flock/f...
228
229
230
231
232
  	}
  	return err;
  }
  
  /**
4d1bf79af   Jim Schutt   ceph: fix up comm...
233
   * Must be called with lock_flocks() already held. Fills in the passed
40819f6fb   Greg Farnum   ceph: add flock/f...
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
   * counter variables, so you can prepare pagelist metadata before calling
   * ceph_encode_locks.
   */
  void ceph_count_locks(struct inode *inode, int *fcntl_count, int *flock_count)
  {
  	struct file_lock *lock;
  
  	*fcntl_count = 0;
  	*flock_count = 0;
  
  	for (lock = inode->i_flock; lock != NULL; lock = lock->fl_next) {
  		if (lock->fl_flags & FL_POSIX)
  			++(*fcntl_count);
  		else if (lock->fl_flags & FL_FLOCK)
  			++(*flock_count);
  	}
  	dout("counted %d flock locks and %d fcntl locks",
  	     *flock_count, *fcntl_count);
  }
  
  /**
39be95e9c   Jim Schutt   ceph: ceph_pageli...
255
   * Encode the flock and fcntl locks for the given inode into the ceph_filelock
1c8c601a8   Jeff Layton   locks: protect mo...
256
   * array. Must be called with inode->i_lock already held.
39be95e9c   Jim Schutt   ceph: ceph_pageli...
257
   * If we encounter more of a specific lock type than expected, return -ENOSPC.
40819f6fb   Greg Farnum   ceph: add flock/f...
258
   */
39be95e9c   Jim Schutt   ceph: ceph_pageli...
259
260
261
  int ceph_encode_locks_to_buffer(struct inode *inode,
  				struct ceph_filelock *flocks,
  				int num_fcntl_locks, int num_flock_locks)
40819f6fb   Greg Farnum   ceph: add flock/f...
262
263
  {
  	struct file_lock *lock;
40819f6fb   Greg Farnum   ceph: add flock/f...
264
  	int err = 0;
fca4451ac   Greg Farnum   ceph: preallocate...
265
266
  	int seen_fcntl = 0;
  	int seen_flock = 0;
39be95e9c   Jim Schutt   ceph: ceph_pageli...
267
  	int l = 0;
40819f6fb   Greg Farnum   ceph: add flock/f...
268
269
270
  
  	dout("encoding %d flock and %d fcntl locks", num_flock_locks,
  	     num_fcntl_locks);
39be95e9c   Jim Schutt   ceph: ceph_pageli...
271

40819f6fb   Greg Farnum   ceph: add flock/f...
272
273
  	for (lock = inode->i_flock; lock != NULL; lock = lock->fl_next) {
  		if (lock->fl_flags & FL_POSIX) {
fca4451ac   Greg Farnum   ceph: preallocate...
274
275
276
277
278
  			++seen_fcntl;
  			if (seen_fcntl > num_fcntl_locks) {
  				err = -ENOSPC;
  				goto fail;
  			}
39be95e9c   Jim Schutt   ceph: ceph_pageli...
279
  			err = lock_to_ceph_filelock(lock, &flocks[l]);
40819f6fb   Greg Farnum   ceph: add flock/f...
280
281
  			if (err)
  				goto fail;
39be95e9c   Jim Schutt   ceph: ceph_pageli...
282
  			++l;
40819f6fb   Greg Farnum   ceph: add flock/f...
283
  		}
40819f6fb   Greg Farnum   ceph: add flock/f...
284
  	}
40819f6fb   Greg Farnum   ceph: add flock/f...
285
286
  	for (lock = inode->i_flock; lock != NULL; lock = lock->fl_next) {
  		if (lock->fl_flags & FL_FLOCK) {
fca4451ac   Greg Farnum   ceph: preallocate...
287
288
289
290
291
  			++seen_flock;
  			if (seen_flock > num_flock_locks) {
  				err = -ENOSPC;
  				goto fail;
  			}
39be95e9c   Jim Schutt   ceph: ceph_pageli...
292
  			err = lock_to_ceph_filelock(lock, &flocks[l]);
40819f6fb   Greg Farnum   ceph: add flock/f...
293
294
  			if (err)
  				goto fail;
39be95e9c   Jim Schutt   ceph: ceph_pageli...
295
  			++l;
40819f6fb   Greg Farnum   ceph: add flock/f...
296
  		}
40819f6fb   Greg Farnum   ceph: add flock/f...
297
298
299
300
  	}
  fail:
  	return err;
  }
39be95e9c   Jim Schutt   ceph: ceph_pageli...
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
  /**
   * Copy the encoded flock and fcntl locks into the pagelist.
   * Format is: #fcntl locks, sequential fcntl locks, #flock locks,
   * sequential flock locks.
   * Returns zero on success.
   */
  int ceph_locks_to_pagelist(struct ceph_filelock *flocks,
  			   struct ceph_pagelist *pagelist,
  			   int num_fcntl_locks, int num_flock_locks)
  {
  	int err = 0;
  	__le32 nlocks;
  
  	nlocks = cpu_to_le32(num_fcntl_locks);
  	err = ceph_pagelist_append(pagelist, &nlocks, sizeof(nlocks));
  	if (err)
  		goto out_fail;
  
  	err = ceph_pagelist_append(pagelist, flocks,
  				   num_fcntl_locks * sizeof(*flocks));
  	if (err)
  		goto out_fail;
  
  	nlocks = cpu_to_le32(num_flock_locks);
  	err = ceph_pagelist_append(pagelist, &nlocks, sizeof(nlocks));
  	if (err)
  		goto out_fail;
  
  	err = ceph_pagelist_append(pagelist,
  				   &flocks[num_fcntl_locks],
  				   num_flock_locks * sizeof(*flocks));
  out_fail:
  	return err;
  }
40819f6fb   Greg Farnum   ceph: add flock/f...
335
336
337
338
339
340
341
  /*
   * Given a pointer to a lock, convert it to a ceph filelock
   */
  int lock_to_ceph_filelock(struct file_lock *lock,
  			  struct ceph_filelock *cephlock)
  {
  	int err = 0;
40819f6fb   Greg Farnum   ceph: add flock/f...
342
343
344
  	cephlock->start = cpu_to_le64(lock->fl_start);
  	cephlock->length = cpu_to_le64(lock->fl_end - lock->fl_start + 1);
  	cephlock->client = cpu_to_le64(0);
eb13e832f   Yan, Zheng   ceph: use fl->fl_...
345
  	cephlock->pid = cpu_to_le64((u64)lock->fl_pid);
130d1f956   Jeff Layton   locks: ensure tha...
346
  	cephlock->owner = cpu_to_le64(secure_addr(lock->fl_owner));
40819f6fb   Greg Farnum   ceph: add flock/f...
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
  
  	switch (lock->fl_type) {
  	case F_RDLCK:
  		cephlock->type = CEPH_LOCK_SHARED;
  		break;
  	case F_WRLCK:
  		cephlock->type = CEPH_LOCK_EXCL;
  		break;
  	case F_UNLCK:
  		cephlock->type = CEPH_LOCK_UNLOCK;
  		break;
  	default:
  		dout("Have unknown lock type %d", lock->fl_type);
  		err = -EINVAL;
  	}
  
  	return err;
  }