Blame view

fs/ceph/locks.c 7.69 KB
3d14c5d2b   Yehuda Sadeh   ceph: factor out ...
1
  #include <linux/ceph/ceph_debug.h>
40819f6fb   Greg Farnum   ceph: add flock/f...
2
3
4
5
6
7
  
  #include <linux/file.h>
  #include <linux/namei.h>
  
  #include "super.h"
  #include "mds_client.h"
3d14c5d2b   Yehuda Sadeh   ceph: factor out ...
8
  #include <linux/ceph/pagelist.h>
40819f6fb   Greg Farnum   ceph: add flock/f...
9
10
11
12
13
  
  /**
   * Implement fcntl and flock locking functions.
   */
  static int ceph_lock_message(u8 lock_type, u16 operation, struct file *file,
637ae8d54   Herb Shiu   ceph: pass lock i...
14
  			     int cmd, u8 wait, struct file_lock *fl)
40819f6fb   Greg Farnum   ceph: add flock/f...
15
16
17
  {
  	struct inode *inode = file->f_dentry->d_inode;
  	struct ceph_mds_client *mdsc =
3d14c5d2b   Yehuda Sadeh   ceph: factor out ...
18
  		ceph_sb_to_client(inode->i_sb)->mdsc;
40819f6fb   Greg Farnum   ceph: add flock/f...
19
20
  	struct ceph_mds_request *req;
  	int err;
637ae8d54   Herb Shiu   ceph: pass lock i...
21
  	u64 length = 0;
40819f6fb   Greg Farnum   ceph: add flock/f...
22
23
24
25
  
  	req = ceph_mdsc_create_request(mdsc, operation, USE_AUTH_MDS);
  	if (IS_ERR(req))
  		return PTR_ERR(req);
70b666c3b   Sage Weil   ceph: use ihold w...
26
27
  	req->r_inode = inode;
  	ihold(inode);
40819f6fb   Greg Farnum   ceph: add flock/f...
28

637ae8d54   Herb Shiu   ceph: pass lock i...
29
30
31
32
33
  	/* mds requires start and length rather than start and end */
  	if (LLONG_MAX == fl->fl_end)
  		length = 0;
  	else
  		length = fl->fl_end - fl->fl_start + 1;
40819f6fb   Greg Farnum   ceph: add flock/f...
34
  	dout("ceph_lock_message: rule: %d, op: %d, pid: %llu, start: %llu, "
0c1f91f27   Sage Weil   ceph: unwind canc...
35
  	     "length: %llu, wait: %d, type: %d", (int)lock_type,
637ae8d54   Herb Shiu   ceph: pass lock i...
36
37
  	     (int)operation, (u64)fl->fl_pid, fl->fl_start,
  	     length, wait, fl->fl_type);
40819f6fb   Greg Farnum   ceph: add flock/f...
38
39
  	req->r_args.filelock_change.rule = lock_type;
  	req->r_args.filelock_change.type = cmd;
637ae8d54   Herb Shiu   ceph: pass lock i...
40
  	req->r_args.filelock_change.pid = cpu_to_le64((u64)fl->fl_pid);
40819f6fb   Greg Farnum   ceph: add flock/f...
41
42
43
  	/* This should be adjusted, but I'm not sure if
  	   namespaces actually get id numbers*/
  	req->r_args.filelock_change.pid_namespace =
637ae8d54   Herb Shiu   ceph: pass lock i...
44
45
  		cpu_to_le64((u64)(unsigned long)fl->fl_nspid);
  	req->r_args.filelock_change.start = cpu_to_le64(fl->fl_start);
40819f6fb   Greg Farnum   ceph: add flock/f...
46
47
48
49
  	req->r_args.filelock_change.length = cpu_to_le64(length);
  	req->r_args.filelock_change.wait = wait;
  
  	err = ceph_mdsc_do_request(mdsc, inode, req);
a5b10629e   Herb Shiu   ceph: Behave bett...
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
  
  	if ( operation == CEPH_MDS_OP_GETFILELOCK){
  		fl->fl_pid = le64_to_cpu(req->r_reply_info.filelock_reply->pid);
  		if (CEPH_LOCK_SHARED == req->r_reply_info.filelock_reply->type)
  			fl->fl_type = F_RDLCK;
  		else if (CEPH_LOCK_EXCL == req->r_reply_info.filelock_reply->type)
  			fl->fl_type = F_WRLCK;
  		else
  			fl->fl_type = F_UNLCK;
  
  		fl->fl_start = le64_to_cpu(req->r_reply_info.filelock_reply->start);
  		length = le64_to_cpu(req->r_reply_info.filelock_reply->start) +
  						 le64_to_cpu(req->r_reply_info.filelock_reply->length);
  		if (length >= 1)
  			fl->fl_end = length -1;
  		else
  			fl->fl_end = 0;
  
  	}
40819f6fb   Greg Farnum   ceph: add flock/f...
69
70
  	ceph_mdsc_put_request(req);
  	dout("ceph_lock_message: rule: %d, op: %d, pid: %llu, start: %llu, "
0c1f91f27   Sage Weil   ceph: unwind canc...
71
  	     "length: %llu, wait: %d, type: %d, err code %d", (int)lock_type,
637ae8d54   Herb Shiu   ceph: pass lock i...
72
73
  	     (int)operation, (u64)fl->fl_pid, fl->fl_start,
  	     length, wait, fl->fl_type, err);
40819f6fb   Greg Farnum   ceph: add flock/f...
74
75
76
77
78
79
80
81
82
  	return err;
  }
  
  /**
   * Attempt to set an fcntl lock.
   * For now, this just goes away to the server. Later it may be more awesome.
   */
  int ceph_lock(struct file *file, int cmd, struct file_lock *fl)
  {
40819f6fb   Greg Farnum   ceph: add flock/f...
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
  	u8 lock_cmd;
  	int err;
  	u8 wait = 0;
  	u16 op = CEPH_MDS_OP_SETFILELOCK;
  
  	fl->fl_nspid = get_pid(task_tgid(current));
  	dout("ceph_lock, fl_pid:%d", fl->fl_pid);
  
  	/* set wait bit as appropriate, then make command as Ceph expects it*/
  	if (F_SETLKW == cmd)
  		wait = 1;
  	if (F_GETLK == cmd)
  		op = CEPH_MDS_OP_GETFILELOCK;
  
  	if (F_RDLCK == fl->fl_type)
  		lock_cmd = CEPH_LOCK_SHARED;
  	else if (F_WRLCK == fl->fl_type)
  		lock_cmd = CEPH_LOCK_EXCL;
  	else
  		lock_cmd = CEPH_LOCK_UNLOCK;
637ae8d54   Herb Shiu   ceph: pass lock i...
103
  	err = ceph_lock_message(CEPH_LOCK_FCNTL, op, file, lock_cmd, wait, fl);
40819f6fb   Greg Farnum   ceph: add flock/f...
104
  	if (!err) {
a5b10629e   Herb Shiu   ceph: Behave bett...
105
106
107
108
  		if ( op != CEPH_MDS_OP_GETFILELOCK ){
  			dout("mds locked, locking locally");
  			err = posix_lock_file(file, fl, NULL);
  			if (err && (CEPH_MDS_OP_SETFILELOCK == op)) {
0c1f91f27   Sage Weil   ceph: unwind canc...
109
110
111
  				/* undo! This should only happen if
  				 * the kernel detects local
  				 * deadlock. */
a5b10629e   Herb Shiu   ceph: Behave bett...
112
113
  				ceph_lock_message(CEPH_LOCK_FCNTL, op, file,
  						  CEPH_LOCK_UNLOCK, 0, fl);
0c1f91f27   Sage Weil   ceph: unwind canc...
114
115
  				dout("got %d on posix_lock_file, undid lock",
  				     err);
a5b10629e   Herb Shiu   ceph: Behave bett...
116
  			}
40819f6fb   Greg Farnum   ceph: add flock/f...
117
  		}
a5b10629e   Herb Shiu   ceph: Behave bett...
118

0c1f91f27   Sage Weil   ceph: unwind canc...
119
120
121
122
123
  	} else if (err == -ERESTARTSYS) {
  		dout("undoing lock
  ");
  		ceph_lock_message(CEPH_LOCK_FCNTL, op, file,
  				  CEPH_LOCK_UNLOCK, 0, fl);
40819f6fb   Greg Farnum   ceph: add flock/f...
124
125
126
127
128
129
  	}
  	return err;
  }
  
  int ceph_flock(struct file *file, int cmd, struct file_lock *fl)
  {
40819f6fb   Greg Farnum   ceph: add flock/f...
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
  	u8 lock_cmd;
  	int err;
  	u8 wait = 1;
  
  	fl->fl_nspid = get_pid(task_tgid(current));
  	dout("ceph_flock, fl_pid:%d", fl->fl_pid);
  
  	/* set wait bit, then clear it out of cmd*/
  	if (cmd & LOCK_NB)
  		wait = 0;
  	cmd = cmd & (LOCK_SH | LOCK_EX | LOCK_UN);
  	/* set command sequence that Ceph wants to see:
  	   shared lock, exclusive lock, or unlock */
  	if (LOCK_SH == cmd)
  		lock_cmd = CEPH_LOCK_SHARED;
  	else if (LOCK_EX == cmd)
  		lock_cmd = CEPH_LOCK_EXCL;
  	else
  		lock_cmd = CEPH_LOCK_UNLOCK;
40819f6fb   Greg Farnum   ceph: add flock/f...
149
150
  
  	err = ceph_lock_message(CEPH_LOCK_FLOCK, CEPH_MDS_OP_SETFILELOCK,
637ae8d54   Herb Shiu   ceph: pass lock i...
151
  				file, lock_cmd, wait, fl);
40819f6fb   Greg Farnum   ceph: add flock/f...
152
153
154
155
156
  	if (!err) {
  		err = flock_lock_file_wait(file, fl);
  		if (err) {
  			ceph_lock_message(CEPH_LOCK_FLOCK,
  					  CEPH_MDS_OP_SETFILELOCK,
637ae8d54   Herb Shiu   ceph: pass lock i...
157
  					  file, CEPH_LOCK_UNLOCK, 0, fl);
40819f6fb   Greg Farnum   ceph: add flock/f...
158
159
  			dout("got %d on flock_lock_file_wait, undid lock", err);
  		}
0c1f91f27   Sage Weil   ceph: unwind canc...
160
161
162
163
164
165
  	} else if (err == -ERESTARTSYS) {
  		dout("undoing lock
  ");
  		ceph_lock_message(CEPH_LOCK_FLOCK,
  				  CEPH_MDS_OP_SETFILELOCK,
  				  file, CEPH_LOCK_UNLOCK, 0, fl);
40819f6fb   Greg Farnum   ceph: add flock/f...
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
  	}
  	return err;
  }
  
  /**
   * Must be called with BKL already held. Fills in the passed
   * counter variables, so you can prepare pagelist metadata before calling
   * ceph_encode_locks.
   */
  void ceph_count_locks(struct inode *inode, int *fcntl_count, int *flock_count)
  {
  	struct file_lock *lock;
  
  	*fcntl_count = 0;
  	*flock_count = 0;
  
  	for (lock = inode->i_flock; lock != NULL; lock = lock->fl_next) {
  		if (lock->fl_flags & FL_POSIX)
  			++(*fcntl_count);
  		else if (lock->fl_flags & FL_FLOCK)
  			++(*flock_count);
  	}
  	dout("counted %d flock locks and %d fcntl locks",
  	     *flock_count, *fcntl_count);
  }
  
  /**
   * Encode the flock and fcntl locks for the given inode into the pagelist.
   * Format is: #fcntl locks, sequential fcntl locks, #flock locks,
   * sequential flock locks.
fca4451ac   Greg Farnum   ceph: preallocate...
196
197
198
   * Must be called with lock_flocks() already held.
   * If we encounter more of a specific lock type than expected,
   * we return the value 1.
40819f6fb   Greg Farnum   ceph: add flock/f...
199
200
201
202
203
204
205
   */
  int ceph_encode_locks(struct inode *inode, struct ceph_pagelist *pagelist,
  		      int num_fcntl_locks, int num_flock_locks)
  {
  	struct file_lock *lock;
  	struct ceph_filelock cephlock;
  	int err = 0;
fca4451ac   Greg Farnum   ceph: preallocate...
206
207
  	int seen_fcntl = 0;
  	int seen_flock = 0;
40819f6fb   Greg Farnum   ceph: add flock/f...
208
209
210
211
212
213
214
215
  
  	dout("encoding %d flock and %d fcntl locks", num_flock_locks,
  	     num_fcntl_locks);
  	err = ceph_pagelist_append(pagelist, &num_fcntl_locks, sizeof(u32));
  	if (err)
  		goto fail;
  	for (lock = inode->i_flock; lock != NULL; lock = lock->fl_next) {
  		if (lock->fl_flags & FL_POSIX) {
fca4451ac   Greg Farnum   ceph: preallocate...
216
217
218
219
220
  			++seen_fcntl;
  			if (seen_fcntl > num_fcntl_locks) {
  				err = -ENOSPC;
  				goto fail;
  			}
40819f6fb   Greg Farnum   ceph: add flock/f...
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
  			err = lock_to_ceph_filelock(lock, &cephlock);
  			if (err)
  				goto fail;
  			err = ceph_pagelist_append(pagelist, &cephlock,
  					   sizeof(struct ceph_filelock));
  		}
  		if (err)
  			goto fail;
  	}
  
  	err = ceph_pagelist_append(pagelist, &num_flock_locks, sizeof(u32));
  	if (err)
  		goto fail;
  	for (lock = inode->i_flock; lock != NULL; lock = lock->fl_next) {
  		if (lock->fl_flags & FL_FLOCK) {
fca4451ac   Greg Farnum   ceph: preallocate...
236
237
238
239
240
  			++seen_flock;
  			if (seen_flock > num_flock_locks) {
  				err = -ENOSPC;
  				goto fail;
  			}
40819f6fb   Greg Farnum   ceph: add flock/f...
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
  			err = lock_to_ceph_filelock(lock, &cephlock);
  			if (err)
  				goto fail;
  			err = ceph_pagelist_append(pagelist, &cephlock,
  					   sizeof(struct ceph_filelock));
  		}
  		if (err)
  			goto fail;
  	}
  fail:
  	return err;
  }
  
  /*
   * Given a pointer to a lock, convert it to a ceph filelock
   */
  int lock_to_ceph_filelock(struct file_lock *lock,
  			  struct ceph_filelock *cephlock)
  {
  	int err = 0;
  
  	cephlock->start = cpu_to_le64(lock->fl_start);
  	cephlock->length = cpu_to_le64(lock->fl_end - lock->fl_start + 1);
  	cephlock->client = cpu_to_le64(0);
  	cephlock->pid = cpu_to_le64(lock->fl_pid);
ad8453ab0   Alan Cox   ceph: Fix warnings
266
267
  	cephlock->pid_namespace =
  	        cpu_to_le64((u64)(unsigned long)lock->fl_nspid);
40819f6fb   Greg Farnum   ceph: add flock/f...
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
  
  	switch (lock->fl_type) {
  	case F_RDLCK:
  		cephlock->type = CEPH_LOCK_SHARED;
  		break;
  	case F_WRLCK:
  		cephlock->type = CEPH_LOCK_EXCL;
  		break;
  	case F_UNLCK:
  		cephlock->type = CEPH_LOCK_UNLOCK;
  		break;
  	default:
  		dout("Have unknown lock type %d", lock->fl_type);
  		err = -EINVAL;
  	}
  
  	return err;
  }