Blame view

fs/ceph/dir.c 51.1 KB
b24413180   Greg Kroah-Hartman   License cleanup: ...
1
  // SPDX-License-Identifier: GPL-2.0
3d14c5d2b   Yehuda Sadeh   ceph: factor out ...
2
  #include <linux/ceph/ceph_debug.h>
2817b000b   Sage Weil   ceph: directory o...
3
4
  
  #include <linux/spinlock.h>
2817b000b   Sage Weil   ceph: directory o...
5
  #include <linux/namei.h>
5a0e3ad6a   Tejun Heo   include cleanup: ...
6
  #include <linux/slab.h>
2817b000b   Sage Weil   ceph: directory o...
7
  #include <linux/sched.h>
2cdeb1e47   Andreas Gruenbacher   ceph: Switch to g...
8
  #include <linux/xattr.h>
2817b000b   Sage Weil   ceph: directory o...
9
10
  
  #include "super.h"
3d14c5d2b   Yehuda Sadeh   ceph: factor out ...
11
  #include "mds_client.h"
2817b000b   Sage Weil   ceph: directory o...
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
  
  /*
   * Directory operations: readdir, lookup, create, link, unlink,
   * rename, etc.
   */
  
  /*
   * Ceph MDS operations are specified in terms of a base ino and
   * relative path.  Thus, the client can specify an operation on a
   * specific inode (e.g., a getattr due to fstat(2)), or as a path
   * relative to, say, the root directory.
   *
   * Normally, we limit ourselves to strict inode ops (no path component)
   * or dentry operations (a single path component relative to an ino).  The
   * exception to this is open_root_dentry(), which will open the mount
   * point by name.
   */
52dfb8ac0   Sage Weil   ceph: constify de...
29
  const struct dentry_operations ceph_dentry_ops;
2817b000b   Sage Weil   ceph: directory o...
30

37c4efc1d   Yan, Zheng   ceph: periodicall...
31
32
  static bool __dentry_lease_is_valid(struct ceph_dentry_info *di);
  static int __dir_lease_try_check(const struct dentry *dentry);
2817b000b   Sage Weil   ceph: directory o...
33
34
35
  /*
   * Initialize ceph dentry state.
   */
ad5cb123f   Al Viro   ceph: switch to u...
36
  static int ceph_d_init(struct dentry *dentry)
2817b000b   Sage Weil   ceph: directory o...
37
38
  {
  	struct ceph_dentry_info *di;
2678da88f   Xiubo Li   ceph: add ceph_sb...
39
  	struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(dentry->d_sb);
2817b000b   Sage Weil   ceph: directory o...
40

99ec26977   Geliang Tang   ceph: use kmem_ca...
41
  	di = kmem_cache_zalloc(ceph_dentry_cachep, GFP_KERNEL);
2817b000b   Sage Weil   ceph: directory o...
42
43
  	if (!di)
  		return -ENOMEM;          /* oh well */
2817b000b   Sage Weil   ceph: directory o...
44
45
  	di->dentry = dentry;
  	di->lease_session = NULL;
9b16f03c4   Miklos Szeredi   ceph: don't use -...
46
  	di->time = jiffies;
48d0cbd12   Sage Weil   ceph: handle raci...
47
  	dentry->d_fsdata = di;
37c4efc1d   Yan, Zheng   ceph: periodicall...
48
  	INIT_LIST_HEAD(&di->lease_list);
f9009efac   Xiubo Li   ceph: add dentry ...
49
50
  
  	atomic64_inc(&mdsc->metric.total_dentries);
2817b000b   Sage Weil   ceph: directory o...
51
52
  	return 0;
  }
2817b000b   Sage Weil   ceph: directory o...
53
  /*
f3c4ebe65   Yan, Zheng   ceph: using hash ...
54
55
56
57
58
59
   * for f_pos for readdir:
   * - hash order:
   *	(0xff << 52) | ((24 bits hash) << 28) |
   *	(the nth entry has hash collision);
   * - frag+name order;
   *	((frag value) << 28) | (the nth entry in frag);
2817b000b   Sage Weil   ceph: directory o...
60
   */
f3c4ebe65   Yan, Zheng   ceph: using hash ...
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
  #define OFFSET_BITS	28
  #define OFFSET_MASK	((1 << OFFSET_BITS) - 1)
  #define HASH_ORDER	(0xffull << (OFFSET_BITS + 24))
  loff_t ceph_make_fpos(unsigned high, unsigned off, bool hash_order)
  {
  	loff_t fpos = ((loff_t)high << 28) | (loff_t)off;
  	if (hash_order)
  		fpos |= HASH_ORDER;
  	return fpos;
  }
  
  static bool is_hash_order(loff_t p)
  {
  	return (p & HASH_ORDER) == HASH_ORDER;
  }
2817b000b   Sage Weil   ceph: directory o...
76
77
  static unsigned fpos_frag(loff_t p)
  {
f3c4ebe65   Yan, Zheng   ceph: using hash ...
78
  	return p >> OFFSET_BITS;
2817b000b   Sage Weil   ceph: directory o...
79
  }
f3c4ebe65   Yan, Zheng   ceph: using hash ...
80
81
82
83
84
  
  static unsigned fpos_hash(loff_t p)
  {
  	return ceph_frag_value(fpos_frag(p));
  }
2817b000b   Sage Weil   ceph: directory o...
85
86
  static unsigned fpos_off(loff_t p)
  {
f3c4ebe65   Yan, Zheng   ceph: using hash ...
87
  	return p & OFFSET_MASK;
2817b000b   Sage Weil   ceph: directory o...
88
  }
4d5f5df67   Yan, Zheng   ceph: fix __dcach...
89
90
91
92
93
94
95
  static int fpos_cmp(loff_t l, loff_t r)
  {
  	int v = ceph_frag_compare(fpos_frag(l), fpos_frag(r));
  	if (v)
  		return v;
  	return (int)(fpos_off(l) - fpos_off(r));
  }
2817b000b   Sage Weil   ceph: directory o...
96
  /*
fdd4e1583   Yan, Zheng   ceph: rework dcac...
97
98
99
100
101
   * make note of the last dentry we read, so we can
   * continue at the same lexicographical point,
   * regardless of what dir changes take place on the
   * server.
   */
bb48bd4dc   Chengguang Xu   ceph: optimize me...
102
  static int note_last_dentry(struct ceph_dir_file_info *dfi, const char *name,
fdd4e1583   Yan, Zheng   ceph: rework dcac...
103
104
105
106
107
  		            int len, unsigned next_offset)
  {
  	char *buf = kmalloc(len+1, GFP_KERNEL);
  	if (!buf)
  		return -ENOMEM;
bb48bd4dc   Chengguang Xu   ceph: optimize me...
108
109
110
111
112
113
114
  	kfree(dfi->last_name);
  	dfi->last_name = buf;
  	memcpy(dfi->last_name, name, len);
  	dfi->last_name[len] = 0;
  	dfi->next_offset = next_offset;
  	dout("note_last_dentry '%s'
  ", dfi->last_name);
fdd4e1583   Yan, Zheng   ceph: rework dcac...
115
116
  	return 0;
  }
c530cd24c   Yan, Zheng   ceph: search cach...
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
  
  static struct dentry *
  __dcache_find_get_entry(struct dentry *parent, u64 idx,
  			struct ceph_readdir_cache_control *cache_ctl)
  {
  	struct inode *dir = d_inode(parent);
  	struct dentry *dentry;
  	unsigned idx_mask = (PAGE_SIZE / sizeof(struct dentry *)) - 1;
  	loff_t ptr_pos = idx * sizeof(struct dentry *);
  	pgoff_t ptr_pgoff = ptr_pos >> PAGE_SHIFT;
  
  	if (ptr_pos >= i_size_read(dir))
  		return NULL;
  
  	if (!cache_ctl->page || ptr_pgoff != page_index(cache_ctl->page)) {
  		ceph_readdir_cache_release(cache_ctl);
  		cache_ctl->page = find_lock_page(&dir->i_data, ptr_pgoff);
  		if (!cache_ctl->page) {
  			dout(" page %lu not found
  ", ptr_pgoff);
  			return ERR_PTR(-EAGAIN);
  		}
  		/* reading/filling the cache are serialized by
  		   i_mutex, no need to use page lock */
  		unlock_page(cache_ctl->page);
  		cache_ctl->dentries = kmap(cache_ctl->page);
  	}
  
  	cache_ctl->index = idx & idx_mask;
  
  	rcu_read_lock();
  	spin_lock(&parent->d_lock);
  	/* check i_size again here, because empty directory can be
  	 * marked as complete while not holding the i_mutex. */
  	if (ceph_dir_is_complete_ordered(dir) && ptr_pos < i_size_read(dir))
  		dentry = cache_ctl->dentries[cache_ctl->index];
  	else
  		dentry = NULL;
  	spin_unlock(&parent->d_lock);
  	if (dentry && !lockref_get_not_dead(&dentry->d_lockref))
  		dentry = NULL;
  	rcu_read_unlock();
  	return dentry ? : ERR_PTR(-EAGAIN);
  }
fdd4e1583   Yan, Zheng   ceph: rework dcac...
161
  /*
2817b000b   Sage Weil   ceph: directory o...
162
163
   * When possible, we try to satisfy a readdir by peeking at the
   * dcache.  We make this work by carefully ordering dentries on
946e51f2b   Al Viro   move d_rcu from o...
164
   * d_child when we initially get results back from the MDS, and
2817b000b   Sage Weil   ceph: directory o...
165
166
167
   * falling back to a "normal" sync readdir if any dentries in the dir
   * are dropped.
   *
2f276c511   Yan, Zheng   ceph: use i_relea...
168
   * Complete dir indicates that we have all dentries in the dir.  It is
2817b000b   Sage Weil   ceph: directory o...
169
170
171
   * defined IFF we hold CEPH_CAP_FILE_SHARED (which will be revoked by
   * the MDS if/when the directory is modified).
   */
a30be7cb2   Yan, Zheng   ceph: skip invali...
172
  static int __dcache_readdir(struct file *file,  struct dir_context *ctx,
97aeb6bf9   Yan, Zheng   ceph: use atomic_...
173
  			    int shared_gen)
2817b000b   Sage Weil   ceph: directory o...
174
  {
bb48bd4dc   Chengguang Xu   ceph: optimize me...
175
  	struct ceph_dir_file_info *dfi = file->private_data;
b583043e9   Al Viro   kill f_dentry uses
176
  	struct dentry *parent = file->f_path.dentry;
2b0143b5c   David Howells   VFS: normal files...
177
  	struct inode *dir = d_inode(parent);
fdd4e1583   Yan, Zheng   ceph: rework dcac...
178
  	struct dentry *dentry, *last = NULL;
2817b000b   Sage Weil   ceph: directory o...
179
  	struct ceph_dentry_info *di;
fdd4e1583   Yan, Zheng   ceph: rework dcac...
180
  	struct ceph_readdir_cache_control cache_ctl = {};
c530cd24c   Yan, Zheng   ceph: search cach...
181
182
  	u64 idx = 0;
  	int err = 0;
2817b000b   Sage Weil   ceph: directory o...
183

97aeb6bf9   Yan, Zheng   ceph: use atomic_...
184
185
  	dout("__dcache_readdir %p v%u at %llx
  ", dir, (unsigned)shared_gen, ctx->pos);
2817b000b   Sage Weil   ceph: directory o...
186

c530cd24c   Yan, Zheng   ceph: search cach...
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
  	/* search start position */
  	if (ctx->pos > 2) {
  		u64 count = div_u64(i_size_read(dir), sizeof(struct dentry *));
  		while (count > 0) {
  			u64 step = count >> 1;
  			dentry = __dcache_find_get_entry(parent, idx + step,
  							 &cache_ctl);
  			if (!dentry) {
  				/* use linar search */
  				idx = 0;
  				break;
  			}
  			if (IS_ERR(dentry)) {
  				err = PTR_ERR(dentry);
  				goto out;
  			}
  			di = ceph_dentry(dentry);
  			spin_lock(&dentry->d_lock);
  			if (fpos_cmp(di->offset, ctx->pos) < 0) {
  				idx += step + 1;
  				count -= step + 1;
  			} else {
  				count = step;
  			}
  			spin_unlock(&dentry->d_lock);
  			dput(dentry);
  		}
2817b000b   Sage Weil   ceph: directory o...
214

c530cd24c   Yan, Zheng   ceph: search cach...
215
216
  		dout("__dcache_readdir %p cache idx %llu
  ", dir, idx);
2817b000b   Sage Weil   ceph: directory o...
217
  	}
fdd4e1583   Yan, Zheng   ceph: rework dcac...
218

c530cd24c   Yan, Zheng   ceph: search cach...
219
220
221
222
  	for (;;) {
  		bool emit_dentry = false;
  		dentry = __dcache_find_get_entry(parent, idx++, &cache_ctl);
  		if (!dentry) {
bb48bd4dc   Chengguang Xu   ceph: optimize me...
223
  			dfi->file_info.flags |= CEPH_F_ATEND;
fdd4e1583   Yan, Zheng   ceph: rework dcac...
224
225
  			err = 0;
  			break;
2817b000b   Sage Weil   ceph: directory o...
226
  		}
c530cd24c   Yan, Zheng   ceph: search cach...
227
228
229
  		if (IS_ERR(dentry)) {
  			err = PTR_ERR(dentry);
  			goto out;
fdd4e1583   Yan, Zheng   ceph: rework dcac...
230
  		}
fdd4e1583   Yan, Zheng   ceph: rework dcac...
231
  		spin_lock(&dentry->d_lock);
5495c2d04   Yan, Zheng   ceph: avoid deref...
232
233
234
235
236
237
238
239
240
241
  		di = ceph_dentry(dentry);
  		if (d_unhashed(dentry) ||
  		    d_really_is_negative(dentry) ||
  		    di->lease_shared_gen != shared_gen) {
  			spin_unlock(&dentry->d_lock);
  			dput(dentry);
  			err = -EAGAIN;
  			goto out;
  		}
  		if (fpos_cmp(ctx->pos, di->offset) <= 0) {
37c4efc1d   Yan, Zheng   ceph: periodicall...
242
  			__ceph_dentry_dir_lease_touch(di);
fdd4e1583   Yan, Zheng   ceph: rework dcac...
243
244
  			emit_dentry = true;
  		}
da5029563   Nick Piggin   fs: dcache scale ...
245
  		spin_unlock(&dentry->d_lock);
2817b000b   Sage Weil   ceph: directory o...
246

fdd4e1583   Yan, Zheng   ceph: rework dcac...
247
  		if (emit_dentry) {
f3c4ebe65   Yan, Zheng   ceph: using hash ...
248
249
  			dout(" %llx dentry %p %pd %p
  ", di->offset,
fdd4e1583   Yan, Zheng   ceph: rework dcac...
250
251
252
  			     dentry, dentry, d_inode(dentry));
  			ctx->pos = di->offset;
  			if (!dir_emit(ctx, dentry->d_name.name,
ebce3eb2f   Jeff Layton   ceph: fix inode n...
253
  				      dentry->d_name.len, ceph_present_inode(d_inode(dentry)),
fdd4e1583   Yan, Zheng   ceph: rework dcac...
254
255
256
257
258
259
  				      d_inode(dentry)->i_mode >> 12)) {
  				dput(dentry);
  				err = 0;
  				break;
  			}
  			ctx->pos++;
0081bd83c   Yan, Zheng   ceph: check direc...
260

fdd4e1583   Yan, Zheng   ceph: rework dcac...
261
262
263
264
265
  			if (last)
  				dput(last);
  			last = dentry;
  		} else {
  			dput(dentry);
2817b000b   Sage Weil   ceph: directory o...
266
  		}
fdd4e1583   Yan, Zheng   ceph: rework dcac...
267
  	}
c530cd24c   Yan, Zheng   ceph: search cach...
268
  out:
fdd4e1583   Yan, Zheng   ceph: rework dcac...
269
270
271
272
  	ceph_readdir_cache_release(&cache_ctl);
  	if (last) {
  		int ret;
  		di = ceph_dentry(last);
bb48bd4dc   Chengguang Xu   ceph: optimize me...
273
  		ret = note_last_dentry(dfi, last->d_name.name, last->d_name.len,
fdd4e1583   Yan, Zheng   ceph: rework dcac...
274
275
276
  				       fpos_off(di->offset) + 1);
  		if (ret < 0)
  			err = ret;
2817b000b   Sage Weil   ceph: directory o...
277
  		dput(last);
84583cfb9   Yan, Zheng   ceph: fix race in...
278
  		/* last_name no longer match cache index */
bb48bd4dc   Chengguang Xu   ceph: optimize me...
279
280
281
  		if (dfi->readdir_cache_idx >= 0) {
  			dfi->readdir_cache_idx = -1;
  			dfi->dir_release_count = 0;
84583cfb9   Yan, Zheng   ceph: fix race in...
282
  		}
fdd4e1583   Yan, Zheng   ceph: rework dcac...
283
  	}
2817b000b   Sage Weil   ceph: directory o...
284
285
  	return err;
  }
bb48bd4dc   Chengguang Xu   ceph: optimize me...
286
  static bool need_send_readdir(struct ceph_dir_file_info *dfi, loff_t pos)
f3c4ebe65   Yan, Zheng   ceph: using hash ...
287
  {
bb48bd4dc   Chengguang Xu   ceph: optimize me...
288
  	if (!dfi->last_readdir)
f3c4ebe65   Yan, Zheng   ceph: using hash ...
289
290
  		return true;
  	if (is_hash_order(pos))
bb48bd4dc   Chengguang Xu   ceph: optimize me...
291
  		return !ceph_frag_contains_value(dfi->frag, fpos_hash(pos));
f3c4ebe65   Yan, Zheng   ceph: using hash ...
292
  	else
bb48bd4dc   Chengguang Xu   ceph: optimize me...
293
  		return dfi->frag != fpos_frag(pos);
f3c4ebe65   Yan, Zheng   ceph: using hash ...
294
  }
77acfa29e   Al Viro   [readdir] convert...
295
  static int ceph_readdir(struct file *file, struct dir_context *ctx)
2817b000b   Sage Weil   ceph: directory o...
296
  {
bb48bd4dc   Chengguang Xu   ceph: optimize me...
297
  	struct ceph_dir_file_info *dfi = file->private_data;
77acfa29e   Al Viro   [readdir] convert...
298
  	struct inode *inode = file_inode(file);
2817b000b   Sage Weil   ceph: directory o...
299
  	struct ceph_inode_info *ci = ceph_inode(inode);
3d14c5d2b   Yehuda Sadeh   ceph: factor out ...
300
301
  	struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
  	struct ceph_mds_client *mdsc = fsc->mdsc;
8974eebd3   Yan, Zheng   ceph: record 'off...
302
  	int i;
2817b000b   Sage Weil   ceph: directory o...
303
  	int err;
b50c2de51   Yan, Zheng   ceph: choose read...
304
  	unsigned frag = -1;
2817b000b   Sage Weil   ceph: directory o...
305
  	struct ceph_mds_reply_info_parsed *rinfo;
2817b000b   Sage Weil   ceph: directory o...
306

8974eebd3   Yan, Zheng   ceph: record 'off...
307
308
  	dout("readdir %p file %p pos %llx
  ", inode, file, ctx->pos);
bb48bd4dc   Chengguang Xu   ceph: optimize me...
309
  	if (dfi->file_info.flags & CEPH_F_ATEND)
2817b000b   Sage Weil   ceph: directory o...
310
311
312
  		return 0;
  
  	/* always start with . and .. */
77acfa29e   Al Viro   [readdir] convert...
313
  	if (ctx->pos == 0) {
2817b000b   Sage Weil   ceph: directory o...
314
315
  		dout("readdir off 0 -> '.'
  ");
ebce3eb2f   Jeff Layton   ceph: fix inode n...
316
  		if (!dir_emit(ctx, ".", 1, ceph_present_inode(inode),
77acfa29e   Al Viro   [readdir] convert...
317
  			    inode->i_mode >> 12))
2817b000b   Sage Weil   ceph: directory o...
318
  			return 0;
77acfa29e   Al Viro   [readdir] convert...
319
  		ctx->pos = 1;
2817b000b   Sage Weil   ceph: directory o...
320
  	}
77acfa29e   Al Viro   [readdir] convert...
321
  	if (ctx->pos == 1) {
ebce3eb2f   Jeff Layton   ceph: fix inode n...
322
323
324
325
326
327
  		u64 ino;
  		struct dentry *dentry = file->f_path.dentry;
  
  		spin_lock(&dentry->d_lock);
  		ino = ceph_present_inode(dentry->d_parent->d_inode);
  		spin_unlock(&dentry->d_lock);
2817b000b   Sage Weil   ceph: directory o...
328
329
  		dout("readdir off 1 -> '..'
  ");
ebce3eb2f   Jeff Layton   ceph: fix inode n...
330
  		if (!dir_emit(ctx, "..", 2, ino, inode->i_mode >> 12))
2817b000b   Sage Weil   ceph: directory o...
331
  			return 0;
77acfa29e   Al Viro   [readdir] convert...
332
  		ctx->pos = 2;
2817b000b   Sage Weil   ceph: directory o...
333
  	}
be655596b   Sage Weil   ceph: use i_ceph_...
334
  	spin_lock(&ci->i_ceph_lock);
719a2514e   Yan, Zheng   ceph: consider in...
335
336
337
338
  	/* request Fx cap. if have Fx, we don't need to release Fs cap
  	 * for later create/unlink. */
  	__ceph_touch_fmode(ci, mdsc, CEPH_FILE_MODE_WR);
  	/* can we use the dcache? */
fdd4e1583   Yan, Zheng   ceph: rework dcac...
339
  	if (ceph_test_mount_opt(fsc, DCACHE) &&
3d14c5d2b   Yehuda Sadeh   ceph: factor out ...
340
  	    !ceph_test_mount_opt(fsc, NOASYNCREADDIR) &&
a0dff78da   Sage Weil   ceph: avoid dcach...
341
  	    ceph_snap(inode) != CEPH_SNAPDIR &&
70db4f362   Yan, Zheng   ceph: introduce a...
342
  	    __ceph_dir_is_complete_ordered(ci) &&
1af16d547   Xiubo Li   ceph: add caps pe...
343
  	    __ceph_caps_issued_mask_metric(ci, CEPH_CAP_FILE_SHARED, 1)) {
97aeb6bf9   Yan, Zheng   ceph: use atomic_...
344
  		int shared_gen = atomic_read(&ci->i_shared_gen);
1af16d547   Xiubo Li   ceph: add caps pe...
345

be655596b   Sage Weil   ceph: use i_ceph_...
346
  		spin_unlock(&ci->i_ceph_lock);
a30be7cb2   Yan, Zheng   ceph: skip invali...
347
  		err = __dcache_readdir(file, ctx, shared_gen);
efa4c1206   Sage Weil   ceph: do not carr...
348
  		if (err != -EAGAIN)
2817b000b   Sage Weil   ceph: directory o...
349
  			return err;
efa4c1206   Sage Weil   ceph: do not carr...
350
  	} else {
be655596b   Sage Weil   ceph: use i_ceph_...
351
  		spin_unlock(&ci->i_ceph_lock);
2817b000b   Sage Weil   ceph: directory o...
352
  	}
2817b000b   Sage Weil   ceph: directory o...
353
354
  
  	/* proceed with a normal readdir */
2817b000b   Sage Weil   ceph: directory o...
355
356
  more:
  	/* do we have the correct frag content buffered? */
bb48bd4dc   Chengguang Xu   ceph: optimize me...
357
  	if (need_send_readdir(dfi, ctx->pos)) {
2817b000b   Sage Weil   ceph: directory o...
358
359
360
361
362
  		struct ceph_mds_request *req;
  		int op = ceph_snap(inode) == CEPH_SNAPDIR ?
  			CEPH_MDS_OP_LSSNAP : CEPH_MDS_OP_READDIR;
  
  		/* discard old result, if any */
bb48bd4dc   Chengguang Xu   ceph: optimize me...
363
364
365
  		if (dfi->last_readdir) {
  			ceph_mdsc_put_request(dfi->last_readdir);
  			dfi->last_readdir = NULL;
393f66209   Sage Weil   ceph: fix possibl...
366
  		}
2817b000b   Sage Weil   ceph: directory o...
367

f3c4ebe65   Yan, Zheng   ceph: using hash ...
368
  		if (is_hash_order(ctx->pos)) {
b50c2de51   Yan, Zheng   ceph: choose read...
369
370
371
372
373
  			/* fragtree isn't always accurate. choose frag
  			 * based on previous reply when possible. */
  			if (frag == (unsigned)-1)
  				frag = ceph_choose_frag(ci, fpos_hash(ctx->pos),
  							NULL, NULL);
f3c4ebe65   Yan, Zheng   ceph: using hash ...
374
375
376
  		} else {
  			frag = fpos_frag(ctx->pos);
  		}
2817b000b   Sage Weil   ceph: directory o...
377
378
  		dout("readdir fetching %llx.%llx frag %x offset '%s'
  ",
bb48bd4dc   Chengguang Xu   ceph: optimize me...
379
  		     ceph_vinop(inode), frag, dfi->last_name);
2817b000b   Sage Weil   ceph: directory o...
380
381
382
  		req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS);
  		if (IS_ERR(req))
  			return PTR_ERR(req);
54008399d   Yan, Zheng   ceph: preallocate...
383
384
385
386
387
  		err = ceph_alloc_readdir_reply_buffer(req, inode);
  		if (err) {
  			ceph_mdsc_put_request(req);
  			return err;
  		}
2817b000b   Sage Weil   ceph: directory o...
388
389
  		/* hints to request -> mds selection code */
  		req->r_direct_mode = USE_AUTH_MDS;
5d37ca148   Yan, Zheng   ceph: send LSSNAP...
390
391
392
  		if (op == CEPH_MDS_OP_READDIR) {
  			req->r_direct_hash = ceph_frag_value(frag);
  			__set_bit(CEPH_MDS_R_DIRECT_IS_HASH, &req->r_req_flags);
87c91a965   Yan, Zheng   ceph: voluntarily...
393
  			req->r_inode_drop = CEPH_CAP_FILE_EXCL;
5d37ca148   Yan, Zheng   ceph: send LSSNAP...
394
  		}
bb48bd4dc   Chengguang Xu   ceph: optimize me...
395
396
  		if (dfi->last_name) {
  			req->r_path2 = kstrdup(dfi->last_name, GFP_KERNEL);
a149bb9a2   Sanidhya Kashyap   ceph: kstrdup() m...
397
398
399
400
  			if (!req->r_path2) {
  				ceph_mdsc_put_request(req);
  				return -ENOMEM;
  			}
79162547b   Yan, Zheng   ceph: make seeky ...
401
402
403
  		} else if (is_hash_order(ctx->pos)) {
  			req->r_args.readdir.offset_hash =
  				cpu_to_le32(fpos_hash(ctx->pos));
a149bb9a2   Sanidhya Kashyap   ceph: kstrdup() m...
404
  		}
79162547b   Yan, Zheng   ceph: make seeky ...
405

bb48bd4dc   Chengguang Xu   ceph: optimize me...
406
407
408
409
  		req->r_dir_release_cnt = dfi->dir_release_count;
  		req->r_dir_ordered_cnt = dfi->dir_ordered_count;
  		req->r_readdir_cache_idx = dfi->readdir_cache_idx;
  		req->r_readdir_offset = dfi->next_offset;
2817b000b   Sage Weil   ceph: directory o...
410
  		req->r_args.readdir.frag = cpu_to_le32(frag);
956d39d63   Yan, Zheng   ceph: define 'end...
411
412
  		req->r_args.readdir.flags =
  				cpu_to_le16(CEPH_READDIR_REPLY_BITFLAGS);
a149bb9a2   Sanidhya Kashyap   ceph: kstrdup() m...
413
414
415
416
  
  		req->r_inode = inode;
  		ihold(inode);
  		req->r_dentry = dget(file->f_path.dentry);
2817b000b   Sage Weil   ceph: directory o...
417
418
419
420
421
  		err = ceph_mdsc_do_request(mdsc, NULL, req);
  		if (err < 0) {
  			ceph_mdsc_put_request(req);
  			return err;
  		}
f3c4ebe65   Yan, Zheng   ceph: using hash ...
422
423
424
425
  		dout("readdir got and parsed readdir result=%d on "
  		     "frag %x, end=%d, complete=%d, hash_order=%d
  ",
  		     err, frag,
2817b000b   Sage Weil   ceph: directory o...
426
  		     (int)req->r_reply_info.dir_end,
f3c4ebe65   Yan, Zheng   ceph: using hash ...
427
428
  		     (int)req->r_reply_info.dir_complete,
  		     (int)req->r_reply_info.hash_order);
2817b000b   Sage Weil   ceph: directory o...
429

81c6aea52   Yan, Zheng   ceph: handle frag...
430
431
432
  		rinfo = &req->r_reply_info;
  		if (le32_to_cpu(rinfo->dir_dir->frag) != frag) {
  			frag = le32_to_cpu(rinfo->dir_dir->frag);
f3c4ebe65   Yan, Zheng   ceph: using hash ...
433
  			if (!rinfo->hash_order) {
bb48bd4dc   Chengguang Xu   ceph: optimize me...
434
  				dfi->next_offset = req->r_readdir_offset;
f3c4ebe65   Yan, Zheng   ceph: using hash ...
435
436
  				/* adjust ctx->pos to beginning of frag */
  				ctx->pos = ceph_make_fpos(frag,
bb48bd4dc   Chengguang Xu   ceph: optimize me...
437
  							  dfi->next_offset,
f3c4ebe65   Yan, Zheng   ceph: using hash ...
438
439
  							  false);
  			}
81c6aea52   Yan, Zheng   ceph: handle frag...
440
  		}
fdd4e1583   Yan, Zheng   ceph: rework dcac...
441

bb48bd4dc   Chengguang Xu   ceph: optimize me...
442
443
  		dfi->frag = frag;
  		dfi->last_readdir = req;
2817b000b   Sage Weil   ceph: directory o...
444

bc2de10dc   Jeff Layton   ceph: convert boo...
445
  		if (test_bit(CEPH_MDS_R_DID_PREPOPULATE, &req->r_req_flags)) {
bb48bd4dc   Chengguang Xu   ceph: optimize me...
446
447
  			dfi->readdir_cache_idx = req->r_readdir_cache_idx;
  			if (dfi->readdir_cache_idx < 0) {
fdd4e1583   Yan, Zheng   ceph: rework dcac...
448
  				/* preclude from marking dir ordered */
bb48bd4dc   Chengguang Xu   ceph: optimize me...
449
  				dfi->dir_ordered_count = 0;
8974eebd3   Yan, Zheng   ceph: record 'off...
450
  			} else if (ceph_frag_is_leftmost(frag) &&
bb48bd4dc   Chengguang Xu   ceph: optimize me...
451
  				   dfi->next_offset == 2) {
fdd4e1583   Yan, Zheng   ceph: rework dcac...
452
453
  				/* note dir version at start of readdir so
  				 * we can tell if any dentries get dropped */
bb48bd4dc   Chengguang Xu   ceph: optimize me...
454
455
  				dfi->dir_release_count = req->r_dir_release_cnt;
  				dfi->dir_ordered_count = req->r_dir_ordered_cnt;
fdd4e1583   Yan, Zheng   ceph: rework dcac...
456
457
  			}
  		} else {
4c069a582   Chengguang Xu   ceph: add newline...
458
459
  			dout("readdir !did_prepopulate
  ");
fdd4e1583   Yan, Zheng   ceph: rework dcac...
460
  			/* disable readdir cache */
bb48bd4dc   Chengguang Xu   ceph: optimize me...
461
  			dfi->readdir_cache_idx = -1;
fdd4e1583   Yan, Zheng   ceph: rework dcac...
462
  			/* preclude from marking dir complete */
bb48bd4dc   Chengguang Xu   ceph: optimize me...
463
  			dfi->dir_release_count = 0;
fdd4e1583   Yan, Zheng   ceph: rework dcac...
464
  		}
f3c4ebe65   Yan, Zheng   ceph: using hash ...
465
466
  		/* note next offset and last dentry name */
  		if (rinfo->dir_nr > 0) {
2a5beea3f   Yan, Zheng   ceph: define stru...
467
468
  			struct ceph_mds_reply_dir_entry *rde =
  					rinfo->dir_entries + (rinfo->dir_nr-1);
f3c4ebe65   Yan, Zheng   ceph: using hash ...
469
470
  			unsigned next_offset = req->r_reply_info.dir_end ?
  					2 : (fpos_off(rde->offset) + 1);
bb48bd4dc   Chengguang Xu   ceph: optimize me...
471
  			err = note_last_dentry(dfi, rde->name, rde->name_len,
f3c4ebe65   Yan, Zheng   ceph: using hash ...
472
  					       next_offset);
2817b000b   Sage Weil   ceph: directory o...
473
474
  			if (err)
  				return err;
f3c4ebe65   Yan, Zheng   ceph: using hash ...
475
  		} else if (req->r_reply_info.dir_end) {
bb48bd4dc   Chengguang Xu   ceph: optimize me...
476
  			dfi->next_offset = 2;
f3c4ebe65   Yan, Zheng   ceph: using hash ...
477
  			/* keep last name */
2817b000b   Sage Weil   ceph: directory o...
478
479
  		}
  	}
bb48bd4dc   Chengguang Xu   ceph: optimize me...
480
  	rinfo = &dfi->last_readdir->r_reply_info;
8974eebd3   Yan, Zheng   ceph: record 'off...
481
482
  	dout("readdir frag %x num %d pos %llx chunk first %llx
  ",
bb48bd4dc   Chengguang Xu   ceph: optimize me...
483
  	     dfi->frag, rinfo->dir_nr, ctx->pos,
8974eebd3   Yan, Zheng   ceph: record 'off...
484
  	     rinfo->dir_nr ? rinfo->dir_entries[0].offset : 0LL);
77acfa29e   Al Viro   [readdir] convert...
485

8974eebd3   Yan, Zheng   ceph: record 'off...
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
  	i = 0;
  	/* search start position */
  	if (rinfo->dir_nr > 0) {
  		int step, nr = rinfo->dir_nr;
  		while (nr > 0) {
  			step = nr >> 1;
  			if (rinfo->dir_entries[i + step].offset < ctx->pos) {
  				i +=  step + 1;
  				nr -= step + 1;
  			} else {
  				nr = step;
  			}
  		}
  	}
  	for (; i < rinfo->dir_nr; i++) {
  		struct ceph_mds_reply_dir_entry *rde = rinfo->dir_entries + i;
3105c19c4   Sage Weil   ceph: fix readdir...
502

8974eebd3   Yan, Zheng   ceph: record 'off...
503
504
505
506
507
508
  		BUG_ON(rde->offset < ctx->pos);
  
  		ctx->pos = rde->offset;
  		dout("readdir (%d/%d) -> %llx '%.*s' %p
  ",
  		     i, rinfo->dir_nr, ctx->pos,
2a5beea3f   Yan, Zheng   ceph: define stru...
509
  		     rde->name_len, rde->name, &rde->inode.in);
8974eebd3   Yan, Zheng   ceph: record 'off...
510

2a5beea3f   Yan, Zheng   ceph: define stru...
511
  		BUG_ON(!rde->inode.in);
8974eebd3   Yan, Zheng   ceph: record 'off...
512

2a5beea3f   Yan, Zheng   ceph: define stru...
513
  		if (!dir_emit(ctx, rde->name, rde->name_len,
ebce3eb2f   Jeff Layton   ceph: fix inode n...
514
515
  			      ceph_present_ino(inode->i_sb, le64_to_cpu(rde->inode.in->ino)),
  			      le32_to_cpu(rde->inode.in->mode) >> 12)) {
2817b000b   Sage Weil   ceph: directory o...
516
517
518
519
  			dout("filldir stopping us...
  ");
  			return 0;
  		}
77acfa29e   Al Viro   [readdir] convert...
520
  		ctx->pos++;
2817b000b   Sage Weil   ceph: directory o...
521
  	}
bb48bd4dc   Chengguang Xu   ceph: optimize me...
522
523
  	ceph_mdsc_put_request(dfi->last_readdir);
  	dfi->last_readdir = NULL;
b50c2de51   Yan, Zheng   ceph: choose read...
524

bb48bd4dc   Chengguang Xu   ceph: optimize me...
525
526
  	if (dfi->next_offset > 2) {
  		frag = dfi->frag;
2817b000b   Sage Weil   ceph: directory o...
527
528
529
530
  		goto more;
  	}
  
  	/* more frags? */
bb48bd4dc   Chengguang Xu   ceph: optimize me...
531
532
  	if (!ceph_frag_is_rightmost(dfi->frag)) {
  		frag = ceph_frag_next(dfi->frag);
f3c4ebe65   Yan, Zheng   ceph: using hash ...
533
534
  		if (is_hash_order(ctx->pos)) {
  			loff_t new_pos = ceph_make_fpos(ceph_frag_value(frag),
bb48bd4dc   Chengguang Xu   ceph: optimize me...
535
  							dfi->next_offset, true);
f3c4ebe65   Yan, Zheng   ceph: using hash ...
536
537
538
539
  			if (new_pos > ctx->pos)
  				ctx->pos = new_pos;
  			/* keep last_name */
  		} else {
bb48bd4dc   Chengguang Xu   ceph: optimize me...
540
541
542
543
  			ctx->pos = ceph_make_fpos(frag, dfi->next_offset,
  							false);
  			kfree(dfi->last_name);
  			dfi->last_name = NULL;
f3c4ebe65   Yan, Zheng   ceph: using hash ...
544
  		}
2817b000b   Sage Weil   ceph: directory o...
545
546
547
548
  		dout("readdir next frag is %x
  ", frag);
  		goto more;
  	}
bb48bd4dc   Chengguang Xu   ceph: optimize me...
549
  	dfi->file_info.flags |= CEPH_F_ATEND;
2817b000b   Sage Weil   ceph: directory o...
550
551
552
553
554
555
  
  	/*
  	 * if dir_release_count still matches the dir, no dentries
  	 * were released during the whole readdir, and we should have
  	 * the complete dir contents in our cache.
  	 */
bb48bd4dc   Chengguang Xu   ceph: optimize me...
556
557
  	if (atomic64_read(&ci->i_release_count) ==
  					dfi->dir_release_count) {
fdd4e1583   Yan, Zheng   ceph: rework dcac...
558
  		spin_lock(&ci->i_ceph_lock);
bb48bd4dc   Chengguang Xu   ceph: optimize me...
559
560
  		if (dfi->dir_ordered_count ==
  				atomic64_read(&ci->i_ordered_count)) {
70db4f362   Yan, Zheng   ceph: introduce a...
561
562
  			dout(" marking %p complete and ordered
  ", inode);
fdd4e1583   Yan, Zheng   ceph: rework dcac...
563
564
  			/* use i_size to track number of entries in
  			 * readdir cache */
bb48bd4dc   Chengguang Xu   ceph: optimize me...
565
566
  			BUG_ON(dfi->readdir_cache_idx < 0);
  			i_size_write(inode, dfi->readdir_cache_idx *
fdd4e1583   Yan, Zheng   ceph: rework dcac...
567
568
  				     sizeof(struct dentry*));
  		} else {
70db4f362   Yan, Zheng   ceph: introduce a...
569
570
  			dout(" marking %p complete
  ", inode);
fdd4e1583   Yan, Zheng   ceph: rework dcac...
571
  		}
bb48bd4dc   Chengguang Xu   ceph: optimize me...
572
573
  		__ceph_dir_set_complete(ci, dfi->dir_release_count,
  					dfi->dir_ordered_count);
fdd4e1583   Yan, Zheng   ceph: rework dcac...
574
  		spin_unlock(&ci->i_ceph_lock);
2817b000b   Sage Weil   ceph: directory o...
575
  	}
2817b000b   Sage Weil   ceph: directory o...
576

77acfa29e   Al Viro   [readdir] convert...
577
578
  	dout("readdir %p file %p done.
  ", inode, file);
2817b000b   Sage Weil   ceph: directory o...
579
580
  	return 0;
  }
bb48bd4dc   Chengguang Xu   ceph: optimize me...
581
  static void reset_readdir(struct ceph_dir_file_info *dfi)
2817b000b   Sage Weil   ceph: directory o...
582
  {
bb48bd4dc   Chengguang Xu   ceph: optimize me...
583
584
585
  	if (dfi->last_readdir) {
  		ceph_mdsc_put_request(dfi->last_readdir);
  		dfi->last_readdir = NULL;
2817b000b   Sage Weil   ceph: directory o...
586
  	}
bb48bd4dc   Chengguang Xu   ceph: optimize me...
587
588
589
590
591
592
  	kfree(dfi->last_name);
  	dfi->last_name = NULL;
  	dfi->dir_release_count = 0;
  	dfi->readdir_cache_idx = -1;
  	dfi->next_offset = 2;  /* compensate for . and .. */
  	dfi->file_info.flags &= ~CEPH_F_ATEND;
2817b000b   Sage Weil   ceph: directory o...
593
  }
8974eebd3   Yan, Zheng   ceph: record 'off...
594
595
596
597
  /*
   * discard buffered readdir content on seekdir(0), or seek to new frag,
   * or seek prior to current chunk
   */
bb48bd4dc   Chengguang Xu   ceph: optimize me...
598
  static bool need_reset_readdir(struct ceph_dir_file_info *dfi, loff_t new_pos)
8974eebd3   Yan, Zheng   ceph: record 'off...
599
600
  {
  	struct ceph_mds_reply_info_parsed *rinfo;
f3c4ebe65   Yan, Zheng   ceph: using hash ...
601
  	loff_t chunk_offset;
8974eebd3   Yan, Zheng   ceph: record 'off...
602
603
  	if (new_pos == 0)
  		return true;
f3c4ebe65   Yan, Zheng   ceph: using hash ...
604
605
606
  	if (is_hash_order(new_pos)) {
  		/* no need to reset last_name for a forward seek when
  		 * dentries are sotred in hash order */
bb48bd4dc   Chengguang Xu   ceph: optimize me...
607
  	} else if (dfi->frag != fpos_frag(new_pos)) {
8974eebd3   Yan, Zheng   ceph: record 'off...
608
  		return true;
f3c4ebe65   Yan, Zheng   ceph: using hash ...
609
  	}
bb48bd4dc   Chengguang Xu   ceph: optimize me...
610
  	rinfo = dfi->last_readdir ? &dfi->last_readdir->r_reply_info : NULL;
8974eebd3   Yan, Zheng   ceph: record 'off...
611
612
  	if (!rinfo || !rinfo->dir_nr)
  		return true;
f3c4ebe65   Yan, Zheng   ceph: using hash ...
613
614
615
  	chunk_offset = rinfo->dir_entries[0].offset;
  	return new_pos < chunk_offset ||
  	       is_hash_order(new_pos) != is_hash_order(chunk_offset);
8974eebd3   Yan, Zheng   ceph: record 'off...
616
  }
965c8e59c   Andrew Morton   lseek: the "whenc...
617
  static loff_t ceph_dir_llseek(struct file *file, loff_t offset, int whence)
2817b000b   Sage Weil   ceph: directory o...
618
  {
bb48bd4dc   Chengguang Xu   ceph: optimize me...
619
  	struct ceph_dir_file_info *dfi = file->private_data;
2817b000b   Sage Weil   ceph: directory o...
620
  	struct inode *inode = file->f_mapping->host;
2817b000b   Sage Weil   ceph: directory o...
621
  	loff_t retval;
5955102c9   Al Viro   wrappers for ->i_...
622
  	inode_lock(inode);
06222e491   Josef Bacik   fs: handle SEEK_H...
623
  	retval = -EINVAL;
965c8e59c   Andrew Morton   lseek: the "whenc...
624
  	switch (whence) {
2817b000b   Sage Weil   ceph: directory o...
625
626
  	case SEEK_CUR:
  		offset += file->f_pos;
06222e491   Josef Bacik   fs: handle SEEK_H...
627
628
  	case SEEK_SET:
  		break;
fdd4e1583   Yan, Zheng   ceph: rework dcac...
629
630
  	case SEEK_END:
  		retval = -EOPNOTSUPP;
06222e491   Josef Bacik   fs: handle SEEK_H...
631
632
  	default:
  		goto out;
2817b000b   Sage Weil   ceph: directory o...
633
  	}
06222e491   Josef Bacik   fs: handle SEEK_H...
634

f04942060   Yan, Zheng   ceph: fix ceph_di...
635
  	if (offset >= 0) {
bb48bd4dc   Chengguang Xu   ceph: optimize me...
636
  		if (need_reset_readdir(dfi, offset)) {
f3c4ebe65   Yan, Zheng   ceph: using hash ...
637
638
  			dout("dir_llseek dropping %p content
  ", file);
bb48bd4dc   Chengguang Xu   ceph: optimize me...
639
  			reset_readdir(dfi);
f3c4ebe65   Yan, Zheng   ceph: using hash ...
640
641
642
  		} else if (is_hash_order(offset) && offset > file->f_pos) {
  			/* for hash offset, we don't know if a forward seek
  			 * is within same frag */
bb48bd4dc   Chengguang Xu   ceph: optimize me...
643
644
  			dfi->dir_release_count = 0;
  			dfi->readdir_cache_idx = -1;
f3c4ebe65   Yan, Zheng   ceph: using hash ...
645
  		}
2817b000b   Sage Weil   ceph: directory o...
646
647
648
  		if (offset != file->f_pos) {
  			file->f_pos = offset;
  			file->f_version = 0;
bb48bd4dc   Chengguang Xu   ceph: optimize me...
649
  			dfi->file_info.flags &= ~CEPH_F_ATEND;
2817b000b   Sage Weil   ceph: directory o...
650
651
  		}
  		retval = offset;
2817b000b   Sage Weil   ceph: directory o...
652
  	}
06222e491   Josef Bacik   fs: handle SEEK_H...
653
  out:
5955102c9   Al Viro   wrappers for ->i_...
654
  	inode_unlock(inode);
2817b000b   Sage Weil   ceph: directory o...
655
656
657
658
  	return retval;
  }
  
  /*
468640e32   Sage Weil   ceph: fix ceph_lo...
659
   * Handle lookups for the hidden .snap directory.
2817b000b   Sage Weil   ceph: directory o...
660
   */
468640e32   Sage Weil   ceph: fix ceph_lo...
661
662
  int ceph_handle_snapdir(struct ceph_mds_request *req,
  			struct dentry *dentry, int err)
2817b000b   Sage Weil   ceph: directory o...
663
  {
3d14c5d2b   Yehuda Sadeh   ceph: factor out ...
664
  	struct ceph_fs_client *fsc = ceph_sb_to_client(dentry->d_sb);
2b0143b5c   David Howells   VFS: normal files...
665
  	struct inode *parent = d_inode(dentry->d_parent); /* we hold i_mutex */
2817b000b   Sage Weil   ceph: directory o...
666
667
668
  
  	/* .snap dir? */
  	if (err == -ENOENT &&
455cec0ab   Sage Weil   ceph: no .snap in...
669
  	    ceph_snap(parent) == CEPH_NOSNAP &&
6b8051855   Sage Weil   ceph: allocate an...
670
  	    strcmp(dentry->d_name.name,
3d14c5d2b   Yehuda Sadeh   ceph: factor out ...
671
  		   fsc->mount_options->snapdir_name) == 0) {
2817b000b   Sage Weil   ceph: directory o...
672
  		struct inode *inode = ceph_get_snapdir(parent);
a455589f1   Al Viro   assorted conversi...
673
674
675
  		dout("ENOENT on snapdir %p '%pd', linking to snapdir %p
  ",
  		     dentry, dentry, inode);
9358c6d4c   Sage Weil   ceph: fix dentry ...
676
  		BUG_ON(!d_unhashed(dentry));
2817b000b   Sage Weil   ceph: directory o...
677
678
679
  		d_add(dentry, inode);
  		err = 0;
  	}
468640e32   Sage Weil   ceph: fix ceph_lo...
680
681
  	return err;
  }
2817b000b   Sage Weil   ceph: directory o...
682

468640e32   Sage Weil   ceph: fix ceph_lo...
683
684
685
686
687
688
689
690
691
692
693
694
695
696
  /*
   * Figure out final result of a lookup/open request.
   *
   * Mainly, make sure we return the final req->r_dentry (if it already
   * existed) in place of the original VFS-provided dentry when they
   * differ.
   *
   * Gracefully handle the case where the MDS replies with -ENOENT and
   * no trace (which it may do, at its discretion, e.g., if it doesn't
   * care to issue a lease on the negative dentry).
   */
  struct dentry *ceph_finish_lookup(struct ceph_mds_request *req,
  				  struct dentry *dentry, int err)
  {
2817b000b   Sage Weil   ceph: directory o...
697
698
699
700
701
702
  	if (err == -ENOENT) {
  		/* no trace? */
  		err = 0;
  		if (!req->r_reply_info.head->is_dentry) {
  			dout("ENOENT and no trace, dentry %p inode %p
  ",
2b0143b5c   David Howells   VFS: normal files...
703
704
  			     dentry, d_inode(dentry));
  			if (d_really_is_positive(dentry)) {
2817b000b   Sage Weil   ceph: directory o...
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
  				d_drop(dentry);
  				err = -ENOENT;
  			} else {
  				d_add(dentry, NULL);
  			}
  		}
  	}
  	if (err)
  		dentry = ERR_PTR(err);
  	else if (dentry != req->r_dentry)
  		dentry = dget(req->r_dentry);   /* we got spliced */
  	else
  		dentry = NULL;
  	return dentry;
  }
3b33f692c   Zhang Zhuoyu   ceph: make logica...
720
  static bool is_root_ceph_dentry(struct inode *inode, struct dentry *dentry)
1d1de9160   Sage Weil   ceph: hide /.ceph...
721
722
723
724
  {
  	return ceph_ino(inode) == CEPH_INO_ROOT &&
  		strncmp(dentry->d_name.name, ".ceph", 5) == 0;
  }
2817b000b   Sage Weil   ceph: directory o...
725
726
727
728
729
  /*
   * Look up a single dir entry.  If there is a lookup intent, inform
   * the MDS so that it gets our 'caps wanted' value in a single op.
   */
  static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry,
00cd8dd3b   Al Viro   stop passing name...
730
  				  unsigned int flags)
2817b000b   Sage Weil   ceph: directory o...
731
  {
3d14c5d2b   Yehuda Sadeh   ceph: factor out ...
732
  	struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb);
2678da88f   Xiubo Li   ceph: add ceph_sb...
733
  	struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(dir->i_sb);
2817b000b   Sage Weil   ceph: directory o...
734
735
  	struct ceph_mds_request *req;
  	int op;
315f24088   Yan, Zheng   ceph: fix securit...
736
  	int mask;
2817b000b   Sage Weil   ceph: directory o...
737
  	int err;
a455589f1   Al Viro   assorted conversi...
738
739
740
  	dout("lookup %p dentry %p '%pd'
  ",
  	     dir, dentry, dentry);
2817b000b   Sage Weil   ceph: directory o...
741
742
743
  
  	if (dentry->d_name.len > NAME_MAX)
  		return ERR_PTR(-ENAMETOOLONG);
2817b000b   Sage Weil   ceph: directory o...
744
  	/* can we conclude ENOENT locally? */
2b0143b5c   David Howells   VFS: normal files...
745
  	if (d_really_is_negative(dentry)) {
2817b000b   Sage Weil   ceph: directory o...
746
747
  		struct ceph_inode_info *ci = ceph_inode(dir);
  		struct ceph_dentry_info *di = ceph_dentry(dentry);
be655596b   Sage Weil   ceph: use i_ceph_...
748
  		spin_lock(&ci->i_ceph_lock);
891f3f5a6   Jeff Layton   ceph: add infrast...
749
750
  		dout(" dir %p flags are 0x%lx
  ", dir, ci->i_ceph_flags);
2817b000b   Sage Weil   ceph: directory o...
751
  		if (strncmp(dentry->d_name.name,
3d14c5d2b   Yehuda Sadeh   ceph: factor out ...
752
  			    fsc->mount_options->snapdir_name,
2817b000b   Sage Weil   ceph: directory o...
753
  			    dentry->d_name.len) &&
1d1de9160   Sage Weil   ceph: hide /.ceph...
754
  		    !is_root_ceph_dentry(dir, dentry) &&
e2c3de046   Yan, Zheng   ceph: fix dcache/...
755
  		    ceph_test_mount_opt(fsc, DCACHE) &&
2f276c511   Yan, Zheng   ceph: use i_relea...
756
  		    __ceph_dir_is_complete(ci) &&
1af16d547   Xiubo Li   ceph: add caps pe...
757
  		    __ceph_caps_issued_mask_metric(ci, CEPH_CAP_FILE_SHARED, 1)) {
719a2514e   Yan, Zheng   ceph: consider in...
758
  			__ceph_touch_fmode(ci, mdsc, CEPH_FILE_MODE_RD);
be655596b   Sage Weil   ceph: use i_ceph_...
759
  			spin_unlock(&ci->i_ceph_lock);
2817b000b   Sage Weil   ceph: directory o...
760
761
762
  			dout(" dir %p complete, -ENOENT
  ", dir);
  			d_add(dentry, NULL);
97aeb6bf9   Yan, Zheng   ceph: use atomic_...
763
  			di->lease_shared_gen = atomic_read(&ci->i_shared_gen);
2817b000b   Sage Weil   ceph: directory o...
764
765
  			return NULL;
  		}
be655596b   Sage Weil   ceph: use i_ceph_...
766
  		spin_unlock(&ci->i_ceph_lock);
2817b000b   Sage Weil   ceph: directory o...
767
768
769
770
771
772
  	}
  
  	op = ceph_snap(dir) == CEPH_SNAPDIR ?
  		CEPH_MDS_OP_LOOKUPSNAP : CEPH_MDS_OP_LOOKUP;
  	req = ceph_mdsc_create_request(mdsc, op, USE_ANY_MDS);
  	if (IS_ERR(req))
7e34bc524   Julia Lawall   fs/ceph: Use ERR_...
773
  		return ERR_CAST(req);
2817b000b   Sage Weil   ceph: directory o...
774
775
  	req->r_dentry = dget(dentry);
  	req->r_num_caps = 2;
315f24088   Yan, Zheng   ceph: fix securit...
776
777
778
779
780
  
  	mask = CEPH_STAT_CAP_INODE | CEPH_CAP_AUTH_SHARED;
  	if (ceph_security_xattr_wanted(dir))
  		mask |= CEPH_CAP_XATTR_SHARED;
  	req->r_args.getattr.mask = cpu_to_le32(mask);
3dd69aabc   Jeff Layton   ceph: add a new f...
781
782
  	req->r_parent = dir;
  	set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags);
2817b000b   Sage Weil   ceph: directory o...
783
  	err = ceph_mdsc_do_request(mdsc, NULL, req);
468640e32   Sage Weil   ceph: fix ceph_lo...
784
  	err = ceph_handle_snapdir(req, dentry, err);
2817b000b   Sage Weil   ceph: directory o...
785
786
787
788
789
790
791
792
793
794
795
796
797
  	dentry = ceph_finish_lookup(req, dentry, err);
  	ceph_mdsc_put_request(req);  /* will dput(dentry) */
  	dout("lookup result=%p
  ", dentry);
  	return dentry;
  }
  
  /*
   * If we do a create but get no trace back from the MDS, follow up with
   * a lookup (the VFS expects us to link up the provided dentry).
   */
  int ceph_handle_notrace_create(struct inode *dir, struct dentry *dentry)
  {
00cd8dd3b   Al Viro   stop passing name...
798
  	struct dentry *result = ceph_lookup(dir, dentry, 0);
2817b000b   Sage Weil   ceph: directory o...
799
800
801
802
803
  
  	if (result && !IS_ERR(result)) {
  		/*
  		 * We created the item, then did a lookup, and found
  		 * it was already linked to another inode we already
4d41cef27   Yan, Zheng   ceph: return erro...
804
805
806
807
808
809
810
811
  		 * had in our cache (and thus got spliced). To not
  		 * confuse VFS (especially when inode is a directory),
  		 * we don't link our dentry to that inode, return an
  		 * error instead.
  		 *
  		 * This event should be rare and it happens only when
  		 * we talk to old MDS. Recent MDS does not send traceless
  		 * reply for request that creates new inode.
2817b000b   Sage Weil   ceph: directory o...
812
  		 */
5cba372c0   Yan, Zheng   ceph: fix dentry ...
813
  		d_drop(result);
4d41cef27   Yan, Zheng   ceph: return erro...
814
  		return -ESTALE;
2817b000b   Sage Weil   ceph: directory o...
815
816
817
818
819
  	}
  	return PTR_ERR(result);
  }
  
  static int ceph_mknod(struct inode *dir, struct dentry *dentry,
1a67aafb5   Al Viro   switch ->mknod() ...
820
  		      umode_t mode, dev_t rdev)
2817b000b   Sage Weil   ceph: directory o...
821
  {
2678da88f   Xiubo Li   ceph: add ceph_sb...
822
  	struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(dir->i_sb);
2817b000b   Sage Weil   ceph: directory o...
823
  	struct ceph_mds_request *req;
5c31e92df   Yan, Zheng   ceph: rename stru...
824
  	struct ceph_acl_sec_ctx as_ctx = {};
2817b000b   Sage Weil   ceph: directory o...
825
826
827
828
  	int err;
  
  	if (ceph_snap(dir) != CEPH_NOSNAP)
  		return -EROFS;
0459871c4   Chengguang Xu   ceph: add d_drop ...
829
830
831
832
  	if (ceph_quota_is_max_files_exceeded(dir)) {
  		err = -EDQUOT;
  		goto out;
  	}
b7a292176   Luis Henriques   ceph: quota: supp...
833

5c31e92df   Yan, Zheng   ceph: rename stru...
834
  	err = ceph_pre_init_acls(dir, &mode, &as_ctx);
b1ee94aa5   Yan, Zheng   ceph: include the...
835
  	if (err < 0)
0459871c4   Chengguang Xu   ceph: add d_drop ...
836
  		goto out;
ac6713ccb   Yan, Zheng   ceph: add selinux...
837
838
839
  	err = ceph_security_init_secctx(dentry, mode, &as_ctx);
  	if (err < 0)
  		goto out;
b1ee94aa5   Yan, Zheng   ceph: include the...
840

1a67aafb5   Al Viro   switch ->mknod() ...
841
842
  	dout("mknod in dir %p dentry %p mode 0%ho rdev %d
  ",
2817b000b   Sage Weil   ceph: directory o...
843
844
845
  	     dir, dentry, mode, rdev);
  	req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_MKNOD, USE_AUTH_MDS);
  	if (IS_ERR(req)) {
b1ee94aa5   Yan, Zheng   ceph: include the...
846
847
  		err = PTR_ERR(req);
  		goto out;
2817b000b   Sage Weil   ceph: directory o...
848
849
850
  	}
  	req->r_dentry = dget(dentry);
  	req->r_num_caps = 2;
3dd69aabc   Jeff Layton   ceph: add a new f...
851
852
  	req->r_parent = dir;
  	set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags);
2817b000b   Sage Weil   ceph: directory o...
853
854
  	req->r_args.mknod.mode = cpu_to_le32(mode);
  	req->r_args.mknod.rdev = cpu_to_le32(rdev);
222b7f90b   Yan, Zheng   ceph: voluntarily...
855
  	req->r_dentry_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_AUTH_EXCL;
2817b000b   Sage Weil   ceph: directory o...
856
  	req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
5c31e92df   Yan, Zheng   ceph: rename stru...
857
858
859
  	if (as_ctx.pagelist) {
  		req->r_pagelist = as_ctx.pagelist;
  		as_ctx.pagelist = NULL;
b1ee94aa5   Yan, Zheng   ceph: include the...
860
  	}
2817b000b   Sage Weil   ceph: directory o...
861
862
863
864
  	err = ceph_mdsc_do_request(mdsc, dir, req);
  	if (!err && !req->r_reply_info.head->is_dentry)
  		err = ceph_handle_notrace_create(dir, dentry);
  	ceph_mdsc_put_request(req);
b1ee94aa5   Yan, Zheng   ceph: include the...
865
  out:
7221fe4c2   Guangliang Zhao   ceph: add acl for...
866
  	if (!err)
5c31e92df   Yan, Zheng   ceph: rename stru...
867
  		ceph_init_inode_acls(d_inode(dentry), &as_ctx);
b20a95a0d   Yan, Zheng   ceph: add missing...
868
  	else
2817b000b   Sage Weil   ceph: directory o...
869
  		d_drop(dentry);
5c31e92df   Yan, Zheng   ceph: rename stru...
870
  	ceph_release_acl_sec_ctx(&as_ctx);
2817b000b   Sage Weil   ceph: directory o...
871
872
  	return err;
  }
4acdaf27e   Al Viro   switch ->create()...
873
  static int ceph_create(struct inode *dir, struct dentry *dentry, umode_t mode,
ebfc3b49a   Al Viro   don't pass nameid...
874
  		       bool excl)
2817b000b   Sage Weil   ceph: directory o...
875
  {
2d83bde9a   Miklos Szeredi   ceph: implement i...
876
  	return ceph_mknod(dir, dentry, mode, 0);
2817b000b   Sage Weil   ceph: directory o...
877
878
879
880
881
  }
  
  static int ceph_symlink(struct inode *dir, struct dentry *dentry,
  			    const char *dest)
  {
2678da88f   Xiubo Li   ceph: add ceph_sb...
882
  	struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(dir->i_sb);
2817b000b   Sage Weil   ceph: directory o...
883
  	struct ceph_mds_request *req;
ac6713ccb   Yan, Zheng   ceph: add selinux...
884
  	struct ceph_acl_sec_ctx as_ctx = {};
2817b000b   Sage Weil   ceph: directory o...
885
886
887
888
  	int err;
  
  	if (ceph_snap(dir) != CEPH_NOSNAP)
  		return -EROFS;
67fcd1514   Chengguang Xu   ceph: add d_drop ...
889
890
891
892
  	if (ceph_quota_is_max_files_exceeded(dir)) {
  		err = -EDQUOT;
  		goto out;
  	}
b7a292176   Luis Henriques   ceph: quota: supp...
893

ac6713ccb   Yan, Zheng   ceph: add selinux...
894
895
896
  	err = ceph_security_init_secctx(dentry, S_IFLNK | 0777, &as_ctx);
  	if (err < 0)
  		goto out;
2817b000b   Sage Weil   ceph: directory o...
897
898
899
900
  	dout("symlink in dir %p dentry %p to '%s'
  ", dir, dentry, dest);
  	req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_SYMLINK, USE_AUTH_MDS);
  	if (IS_ERR(req)) {
b1ee94aa5   Yan, Zheng   ceph: include the...
901
902
  		err = PTR_ERR(req);
  		goto out;
2817b000b   Sage Weil   ceph: directory o...
903
  	}
687265e5a   Yan, Zheng   ceph: switch some...
904
  	req->r_path2 = kstrdup(dest, GFP_KERNEL);
a149bb9a2   Sanidhya Kashyap   ceph: kstrdup() m...
905
906
907
908
909
  	if (!req->r_path2) {
  		err = -ENOMEM;
  		ceph_mdsc_put_request(req);
  		goto out;
  	}
3dd69aabc   Jeff Layton   ceph: add a new f...
910
911
  	req->r_parent = dir;
  	set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags);
a149bb9a2   Sanidhya Kashyap   ceph: kstrdup() m...
912
913
  	req->r_dentry = dget(dentry);
  	req->r_num_caps = 2;
222b7f90b   Yan, Zheng   ceph: voluntarily...
914
  	req->r_dentry_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_AUTH_EXCL;
2817b000b   Sage Weil   ceph: directory o...
915
  	req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
b748fc7a8   Jeff Layton   ceph: set sec_con...
916
917
918
919
  	if (as_ctx.pagelist) {
  		req->r_pagelist = as_ctx.pagelist;
  		as_ctx.pagelist = NULL;
  	}
2817b000b   Sage Weil   ceph: directory o...
920
921
922
923
  	err = ceph_mdsc_do_request(mdsc, dir, req);
  	if (!err && !req->r_reply_info.head->is_dentry)
  		err = ceph_handle_notrace_create(dir, dentry);
  	ceph_mdsc_put_request(req);
b1ee94aa5   Yan, Zheng   ceph: include the...
924
925
  out:
  	if (err)
2817b000b   Sage Weil   ceph: directory o...
926
  		d_drop(dentry);
ac6713ccb   Yan, Zheng   ceph: add selinux...
927
  	ceph_release_acl_sec_ctx(&as_ctx);
2817b000b   Sage Weil   ceph: directory o...
928
929
  	return err;
  }
18bb1db3e   Al Viro   switch vfs_mkdir(...
930
  static int ceph_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
2817b000b   Sage Weil   ceph: directory o...
931
  {
2678da88f   Xiubo Li   ceph: add ceph_sb...
932
  	struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(dir->i_sb);
2817b000b   Sage Weil   ceph: directory o...
933
  	struct ceph_mds_request *req;
5c31e92df   Yan, Zheng   ceph: rename stru...
934
  	struct ceph_acl_sec_ctx as_ctx = {};
2817b000b   Sage Weil   ceph: directory o...
935
936
937
938
939
940
  	int err = -EROFS;
  	int op;
  
  	if (ceph_snap(dir) == CEPH_SNAPDIR) {
  		/* mkdir .snap/foo is a MKSNAP */
  		op = CEPH_MDS_OP_MKSNAP;
a455589f1   Al Viro   assorted conversi...
941
942
943
  		dout("mksnap dir %p snap '%pd' dn %p
  ", dir,
  		     dentry, dentry);
2817b000b   Sage Weil   ceph: directory o...
944
  	} else if (ceph_snap(dir) == CEPH_NOSNAP) {
18bb1db3e   Al Viro   switch vfs_mkdir(...
945
946
  		dout("mkdir dir %p dn %p mode 0%ho
  ", dir, dentry, mode);
2817b000b   Sage Weil   ceph: directory o...
947
948
949
950
  		op = CEPH_MDS_OP_MKDIR;
  	} else {
  		goto out;
  	}
b1ee94aa5   Yan, Zheng   ceph: include the...
951

259636690   Yan, Zheng   ceph: don't check...
952
953
  	if (op == CEPH_MDS_OP_MKDIR &&
  	    ceph_quota_is_max_files_exceeded(dir)) {
b7a292176   Luis Henriques   ceph: quota: supp...
954
955
956
  		err = -EDQUOT;
  		goto out;
  	}
b1ee94aa5   Yan, Zheng   ceph: include the...
957
  	mode |= S_IFDIR;
5c31e92df   Yan, Zheng   ceph: rename stru...
958
  	err = ceph_pre_init_acls(dir, &mode, &as_ctx);
b1ee94aa5   Yan, Zheng   ceph: include the...
959
960
  	if (err < 0)
  		goto out;
ac6713ccb   Yan, Zheng   ceph: add selinux...
961
962
963
  	err = ceph_security_init_secctx(dentry, mode, &as_ctx);
  	if (err < 0)
  		goto out;
b1ee94aa5   Yan, Zheng   ceph: include the...
964

2817b000b   Sage Weil   ceph: directory o...
965
966
967
968
969
970
971
972
  	req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS);
  	if (IS_ERR(req)) {
  		err = PTR_ERR(req);
  		goto out;
  	}
  
  	req->r_dentry = dget(dentry);
  	req->r_num_caps = 2;
3dd69aabc   Jeff Layton   ceph: add a new f...
973
974
  	req->r_parent = dir;
  	set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags);
2817b000b   Sage Weil   ceph: directory o...
975
  	req->r_args.mkdir.mode = cpu_to_le32(mode);
222b7f90b   Yan, Zheng   ceph: voluntarily...
976
  	req->r_dentry_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_AUTH_EXCL;
2817b000b   Sage Weil   ceph: directory o...
977
  	req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
5c31e92df   Yan, Zheng   ceph: rename stru...
978
979
980
  	if (as_ctx.pagelist) {
  		req->r_pagelist = as_ctx.pagelist;
  		as_ctx.pagelist = NULL;
b1ee94aa5   Yan, Zheng   ceph: include the...
981
  	}
2817b000b   Sage Weil   ceph: directory o...
982
  	err = ceph_mdsc_do_request(mdsc, dir, req);
275dd19ea   Yan, Zheng   ceph: fix mksnap ...
983
984
985
  	if (!err &&
  	    !req->r_reply_info.head->is_target &&
  	    !req->r_reply_info.head->is_dentry)
2817b000b   Sage Weil   ceph: directory o...
986
987
988
  		err = ceph_handle_notrace_create(dir, dentry);
  	ceph_mdsc_put_request(req);
  out:
b20a95a0d   Yan, Zheng   ceph: add missing...
989
  	if (!err)
5c31e92df   Yan, Zheng   ceph: rename stru...
990
  		ceph_init_inode_acls(d_inode(dentry), &as_ctx);
b20a95a0d   Yan, Zheng   ceph: add missing...
991
  	else
2817b000b   Sage Weil   ceph: directory o...
992
  		d_drop(dentry);
5c31e92df   Yan, Zheng   ceph: rename stru...
993
  	ceph_release_acl_sec_ctx(&as_ctx);
2817b000b   Sage Weil   ceph: directory o...
994
995
996
997
998
999
  	return err;
  }
  
  static int ceph_link(struct dentry *old_dentry, struct inode *dir,
  		     struct dentry *dentry)
  {
2678da88f   Xiubo Li   ceph: add ceph_sb...
1000
  	struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(dir->i_sb);
2817b000b   Sage Weil   ceph: directory o...
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
  	struct ceph_mds_request *req;
  	int err;
  
  	if (ceph_snap(dir) != CEPH_NOSNAP)
  		return -EROFS;
  
  	dout("link in dir %p old_dentry %p dentry %p
  ", dir,
  	     old_dentry, dentry);
  	req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_LINK, USE_AUTH_MDS);
  	if (IS_ERR(req)) {
  		d_drop(dentry);
  		return PTR_ERR(req);
  	}
  	req->r_dentry = dget(dentry);
  	req->r_num_caps = 2;
4b58c9b19   Sage Weil   ceph: do not set ...
1017
  	req->r_old_dentry = dget(old_dentry);
3dd69aabc   Jeff Layton   ceph: add a new f...
1018
1019
  	req->r_parent = dir;
  	set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags);
2817b000b   Sage Weil   ceph: directory o...
1020
1021
  	req->r_dentry_drop = CEPH_CAP_FILE_SHARED;
  	req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
ad88f23f4   Yan, Zheng   ceph: drop CAP_LI...
1022
  	/* release LINK_SHARED on source inode (mds will lock it) */
d19a0b540   Yan, Zheng   ceph: voluntarily...
1023
  	req->r_old_inode_drop = CEPH_CAP_LINK_SHARED | CEPH_CAP_LINK_EXCL;
2817b000b   Sage Weil   ceph: directory o...
1024
  	err = ceph_mdsc_do_request(mdsc, dir, req);
70b666c3b   Sage Weil   ceph: use ihold w...
1025
  	if (err) {
2817b000b   Sage Weil   ceph: directory o...
1026
  		d_drop(dentry);
70b666c3b   Sage Weil   ceph: use ihold w...
1027
  	} else if (!req->r_reply_info.head->is_dentry) {
2b0143b5c   David Howells   VFS: normal files...
1028
1029
  		ihold(d_inode(old_dentry));
  		d_instantiate(dentry, d_inode(old_dentry));
70b666c3b   Sage Weil   ceph: use ihold w...
1030
  	}
2817b000b   Sage Weil   ceph: directory o...
1031
1032
1033
  	ceph_mdsc_put_request(req);
  	return err;
  }
2ccb45462   Jeff Layton   ceph: perform asy...
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
  static void ceph_async_unlink_cb(struct ceph_mds_client *mdsc,
  				 struct ceph_mds_request *req)
  {
  	int result = req->r_err ? req->r_err :
  			le32_to_cpu(req->r_reply_info.head->result);
  
  	if (result == -EJUKEBOX)
  		goto out;
  
  	/* If op failed, mark everyone involved for errors */
  	if (result) {
2a575f138   Jeff Layton   ceph: fix potenti...
1045
1046
  		int pathlen = 0;
  		u64 base = 0;
2ccb45462   Jeff Layton   ceph: perform asy...
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
  		char *path = ceph_mdsc_build_path(req->r_dentry, &pathlen,
  						  &base, 0);
  
  		/* mark error on parent + clear complete */
  		mapping_set_error(req->r_parent->i_mapping, result);
  		ceph_dir_clear_complete(req->r_parent);
  
  		/* drop the dentry -- we don't know its status */
  		if (!d_unhashed(req->r_dentry))
  			d_drop(req->r_dentry);
  
  		/* mark inode itself for an error (since metadata is bogus) */
  		mapping_set_error(req->r_old_inode->i_mapping, result);
  
  		pr_warn("ceph: async unlink failure path=(%llx)%s result=%d!
  ",
  			base, IS_ERR(path) ? "<<bad>>" : path, result);
  		ceph_mdsc_free_path(path, pathlen);
  	}
  out:
  	iput(req->r_old_inode);
  	ceph_mdsc_release_dir_caps(req);
  }
  
  static int get_caps_for_async_unlink(struct inode *dir, struct dentry *dentry)
  {
  	struct ceph_inode_info *ci = ceph_inode(dir);
  	struct ceph_dentry_info *di;
  	int got = 0, want = CEPH_CAP_FILE_EXCL | CEPH_CAP_DIR_UNLINK;
  
  	spin_lock(&ci->i_ceph_lock);
  	if ((__ceph_caps_issued(ci, NULL) & want) == want) {
  		ceph_take_cap_refs(ci, want, false);
  		got = want;
  	}
  	spin_unlock(&ci->i_ceph_lock);
  
  	/* If we didn't get anything, return 0 */
  	if (!got)
  		return 0;
  
          spin_lock(&dentry->d_lock);
          di = ceph_dentry(dentry);
  	/*
  	 * - We are holding Fx, which implies Fs caps.
  	 * - Only support async unlink for primary linkage
  	 */
  	if (atomic_read(&ci->i_shared_gen) != di->lease_shared_gen ||
  	    !(di->flags & CEPH_DENTRY_PRIMARY_LINK))
  		want = 0;
          spin_unlock(&dentry->d_lock);
  
  	/* Do we still want what we've got? */
  	if (want == got)
  		return got;
  
  	ceph_put_cap_refs(ci, got);
  	return 0;
  }
2817b000b   Sage Weil   ceph: directory o...
1106
  /*
2817b000b   Sage Weil   ceph: directory o...
1107
1108
1109
1110
   * rmdir and unlink are differ only by the metadata op code
   */
  static int ceph_unlink(struct inode *dir, struct dentry *dentry)
  {
3d14c5d2b   Yehuda Sadeh   ceph: factor out ...
1111
1112
  	struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb);
  	struct ceph_mds_client *mdsc = fsc->mdsc;
2b0143b5c   David Howells   VFS: normal files...
1113
  	struct inode *inode = d_inode(dentry);
2817b000b   Sage Weil   ceph: directory o...
1114
  	struct ceph_mds_request *req;
2ccb45462   Jeff Layton   ceph: perform asy...
1115
  	bool try_async = ceph_test_mount_opt(fsc, ASYNC_DIROPS);
2817b000b   Sage Weil   ceph: directory o...
1116
1117
1118
1119
1120
  	int err = -EROFS;
  	int op;
  
  	if (ceph_snap(dir) == CEPH_SNAPDIR) {
  		/* rmdir .snap/foo is RMSNAP */
a455589f1   Al Viro   assorted conversi...
1121
1122
  		dout("rmsnap dir %p '%pd' dn %p
  ", dir, dentry, dentry);
2817b000b   Sage Weil   ceph: directory o...
1123
1124
1125
1126
1127
  		op = CEPH_MDS_OP_RMSNAP;
  	} else if (ceph_snap(dir) == CEPH_NOSNAP) {
  		dout("unlink/rmdir dir %p dn %p inode %p
  ",
  		     dir, dentry, inode);
e36cb0b89   David Howells   VFS: (Scripted) C...
1128
  		op = d_is_dir(dentry) ?
2817b000b   Sage Weil   ceph: directory o...
1129
1130
1131
  			CEPH_MDS_OP_RMDIR : CEPH_MDS_OP_UNLINK;
  	} else
  		goto out;
2ccb45462   Jeff Layton   ceph: perform asy...
1132
  retry:
2817b000b   Sage Weil   ceph: directory o...
1133
1134
1135
1136
1137
1138
1139
  	req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS);
  	if (IS_ERR(req)) {
  		err = PTR_ERR(req);
  		goto out;
  	}
  	req->r_dentry = dget(dentry);
  	req->r_num_caps = 2;
3dd69aabc   Jeff Layton   ceph: add a new f...
1140
  	req->r_parent = dir;
2817b000b   Sage Weil   ceph: directory o...
1141
1142
  	req->r_dentry_drop = CEPH_CAP_FILE_SHARED;
  	req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
6ef0bc6dd   Zhi Zhang   ceph: flush dirty...
1143
  	req->r_inode_drop = ceph_drop_caps_for_unlink(inode);
2ccb45462   Jeff Layton   ceph: perform asy...
1144
1145
1146
  
  	if (try_async && op == CEPH_MDS_OP_UNLINK &&
  	    (req->r_dir_caps = get_caps_for_async_unlink(dir, dentry))) {
ebce3eb2f   Jeff Layton   ceph: fix inode n...
1147
  		dout("async unlink on %llu/%.*s caps=%s", ceph_ino(dir),
2ccb45462   Jeff Layton   ceph: perform asy...
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
  		     dentry->d_name.len, dentry->d_name.name,
  		     ceph_cap_string(req->r_dir_caps));
  		set_bit(CEPH_MDS_R_ASYNC, &req->r_req_flags);
  		req->r_callback = ceph_async_unlink_cb;
  		req->r_old_inode = d_inode(dentry);
  		ihold(req->r_old_inode);
  		err = ceph_mdsc_submit_request(mdsc, dir, req);
  		if (!err) {
  			/*
  			 * We have enough caps, so we assume that the unlink
  			 * will succeed. Fix up the target inode and dcache.
  			 */
  			drop_nlink(inode);
  			d_delete(dentry);
  		} else if (err == -EJUKEBOX) {
  			try_async = false;
  			ceph_mdsc_put_request(req);
  			goto retry;
  		}
  	} else {
  		set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags);
  		err = ceph_mdsc_do_request(mdsc, dir, req);
  		if (!err && !req->r_reply_info.head->is_dentry)
  			d_delete(dentry);
  	}
2817b000b   Sage Weil   ceph: directory o...
1173
1174
1175
1176
1177
1178
  	ceph_mdsc_put_request(req);
  out:
  	return err;
  }
  
  static int ceph_rename(struct inode *old_dir, struct dentry *old_dentry,
1cd66c93b   Miklos Szeredi   fs: make remainin...
1179
1180
  		       struct inode *new_dir, struct dentry *new_dentry,
  		       unsigned int flags)
2817b000b   Sage Weil   ceph: directory o...
1181
  {
2678da88f   Xiubo Li   ceph: add ceph_sb...
1182
  	struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(old_dir->i_sb);
2817b000b   Sage Weil   ceph: directory o...
1183
  	struct ceph_mds_request *req;
0ea611a3b   Yan, Zheng   ceph: rename snap...
1184
  	int op = CEPH_MDS_OP_RENAME;
2817b000b   Sage Weil   ceph: directory o...
1185
  	int err;
1cd66c93b   Miklos Szeredi   fs: make remainin...
1186
1187
  	if (flags)
  		return -EINVAL;
2817b000b   Sage Weil   ceph: directory o...
1188
1189
  	if (ceph_snap(old_dir) != ceph_snap(new_dir))
  		return -EXDEV;
0ea611a3b   Yan, Zheng   ceph: rename snap...
1190
1191
1192
1193
1194
  	if (ceph_snap(old_dir) != CEPH_NOSNAP) {
  		if (old_dir == new_dir && ceph_snap(old_dir) == CEPH_SNAPDIR)
  			op = CEPH_MDS_OP_RENAMESNAP;
  		else
  			return -EROFS;
dffdcd714   Luis Henriques   ceph: allow renam...
1195
1196
1197
1198
1199
  	} else if (old_dir != new_dir) {
  		err = ceph_quota_check_rename(mdsc, d_inode(old_dentry),
  					      new_dir);
  		if (err)
  			return err;
0ea611a3b   Yan, Zheng   ceph: rename snap...
1200
  	}
cafe21a4f   Luis Henriques   ceph: quota: don'...
1201

2817b000b   Sage Weil   ceph: directory o...
1202
1203
1204
  	dout("rename dir %p dentry %p to dir %p dentry %p
  ",
  	     old_dir, old_dentry, new_dir, new_dentry);
0ea611a3b   Yan, Zheng   ceph: rename snap...
1205
  	req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS);
2817b000b   Sage Weil   ceph: directory o...
1206
1207
  	if (IS_ERR(req))
  		return PTR_ERR(req);
180061a58   Sage Weil   ceph: avoid usele...
1208
  	ihold(old_dir);
2817b000b   Sage Weil   ceph: directory o...
1209
1210
1211
  	req->r_dentry = dget(new_dentry);
  	req->r_num_caps = 2;
  	req->r_old_dentry = dget(old_dentry);
180061a58   Sage Weil   ceph: avoid usele...
1212
  	req->r_old_dentry_dir = old_dir;
3dd69aabc   Jeff Layton   ceph: add a new f...
1213
1214
  	req->r_parent = new_dir;
  	set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags);
2817b000b   Sage Weil   ceph: directory o...
1215
1216
1217
1218
1219
  	req->r_old_dentry_drop = CEPH_CAP_FILE_SHARED;
  	req->r_old_dentry_unless = CEPH_CAP_FILE_EXCL;
  	req->r_dentry_drop = CEPH_CAP_FILE_SHARED;
  	req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
  	/* release LINK_RDCACHE on source inode (mds will lock it) */
d19a0b540   Yan, Zheng   ceph: voluntarily...
1220
  	req->r_old_inode_drop = CEPH_CAP_LINK_SHARED | CEPH_CAP_LINK_EXCL;
6ef0bc6dd   Zhi Zhang   ceph: flush dirty...
1221
1222
1223
1224
  	if (d_really_is_positive(new_dentry)) {
  		req->r_inode_drop =
  			ceph_drop_caps_for_unlink(d_inode(new_dentry));
  	}
2817b000b   Sage Weil   ceph: directory o...
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
  	err = ceph_mdsc_do_request(mdsc, old_dir, req);
  	if (!err && !req->r_reply_info.head->is_dentry) {
  		/*
  		 * Normally d_move() is done by fill_trace (called by
  		 * do_request, above).  If there is no trace, we need
  		 * to do it here.
  		 */
  		d_move(old_dentry, new_dentry);
  	}
  	ceph_mdsc_put_request(req);
  	return err;
  }
81a6cf2d3   Sage Weil   ceph: invalidate ...
1237
  /*
37c4efc1d   Yan, Zheng   ceph: periodicall...
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
   * Move dentry to tail of mdsc->dentry_leases list when lease is updated.
   * Leases at front of the list will expire first. (Assume all leases have
   * similar duration)
   *
   * Called under dentry->d_lock.
   */
  void __ceph_dentry_lease_touch(struct ceph_dentry_info *di)
  {
  	struct dentry *dn = di->dentry;
  	struct ceph_mds_client *mdsc;
  
  	dout("dentry_lease_touch %p %p '%pd'
  ", di, dn, dn);
  
  	di->flags |= CEPH_DENTRY_LEASE_LIST;
  	if (di->flags & CEPH_DENTRY_SHRINK_LIST) {
  		di->flags |= CEPH_DENTRY_REFERENCED;
  		return;
  	}
  
  	mdsc = ceph_sb_to_client(dn->d_sb)->mdsc;
  	spin_lock(&mdsc->dentry_list_lock);
  	list_move_tail(&di->lease_list, &mdsc->dentry_leases);
  	spin_unlock(&mdsc->dentry_list_lock);
  }
  
  static void __dentry_dir_lease_touch(struct ceph_mds_client* mdsc,
  				     struct ceph_dentry_info *di)
  {
  	di->flags &= ~(CEPH_DENTRY_LEASE_LIST | CEPH_DENTRY_REFERENCED);
  	di->lease_gen = 0;
  	di->time = jiffies;
  	list_move_tail(&di->lease_list, &mdsc->dentry_dir_leases);
  }
  
  /*
   * When dir lease is used, add dentry to tail of mdsc->dentry_dir_leases
   * list if it's not in the list, otherwise set 'referenced' flag.
   *
   * Called under dentry->d_lock.
   */
  void __ceph_dentry_dir_lease_touch(struct ceph_dentry_info *di)
  {
  	struct dentry *dn = di->dentry;
  	struct ceph_mds_client *mdsc;
0eb308531   Xiubo Li   ceph: print dentr...
1283
1284
  	dout("dentry_dir_lease_touch %p %p '%pd' (offset 0x%llx)
  ",
37c4efc1d   Yan, Zheng   ceph: periodicall...
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
  	     di, dn, dn, di->offset);
  
  	if (!list_empty(&di->lease_list)) {
  		if (di->flags & CEPH_DENTRY_LEASE_LIST) {
  			/* don't remove dentry from dentry lease list
  			 * if its lease is valid */
  			if (__dentry_lease_is_valid(di))
  				return;
  		} else {
  			di->flags |= CEPH_DENTRY_REFERENCED;
  			return;
  		}
  	}
  
  	if (di->flags & CEPH_DENTRY_SHRINK_LIST) {
  		di->flags |= CEPH_DENTRY_REFERENCED;
  		di->flags &= ~CEPH_DENTRY_LEASE_LIST;
  		return;
  	}
  
  	mdsc = ceph_sb_to_client(dn->d_sb)->mdsc;
  	spin_lock(&mdsc->dentry_list_lock);
  	__dentry_dir_lease_touch(mdsc, di),
  	spin_unlock(&mdsc->dentry_list_lock);
  }
  
  static void __dentry_lease_unlist(struct ceph_dentry_info *di)
  {
  	struct ceph_mds_client *mdsc;
  	if (di->flags & CEPH_DENTRY_SHRINK_LIST)
  		return;
  	if (list_empty(&di->lease_list))
  		return;
  
  	mdsc = ceph_sb_to_client(di->dentry->d_sb)->mdsc;
  	spin_lock(&mdsc->dentry_list_lock);
  	list_del_init(&di->lease_list);
  	spin_unlock(&mdsc->dentry_list_lock);
  }
  
  enum {
  	KEEP	= 0,
  	DELETE	= 1,
  	TOUCH	= 2,
  	STOP	= 4,
  };
  
  struct ceph_lease_walk_control {
  	bool dir_lease;
fe33032da   Yan, Zheng   ceph: add mount o...
1334
  	bool expire_dir_lease;
37c4efc1d   Yan, Zheng   ceph: periodicall...
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
  	unsigned long nr_to_scan;
  	unsigned long dir_lease_ttl;
  };
  
  static unsigned long
  __dentry_leases_walk(struct ceph_mds_client *mdsc,
  		     struct ceph_lease_walk_control *lwc,
  		     int (*check)(struct dentry*, void*))
  {
  	struct ceph_dentry_info *di, *tmp;
  	struct dentry *dentry, *last = NULL;
  	struct list_head* list;
          LIST_HEAD(dispose);
  	unsigned long freed = 0;
  	int ret = 0;
  
  	list = lwc->dir_lease ? &mdsc->dentry_dir_leases : &mdsc->dentry_leases;
  	spin_lock(&mdsc->dentry_list_lock);
  	list_for_each_entry_safe(di, tmp, list, lease_list) {
  		if (!lwc->nr_to_scan)
  			break;
  		--lwc->nr_to_scan;
  
  		dentry = di->dentry;
  		if (last == dentry)
  			break;
  
  		if (!spin_trylock(&dentry->d_lock))
  			continue;
516162b92   Al Viro   ceph: don't open-...
1364
  		if (__lockref_is_dead(&dentry->d_lockref)) {
37c4efc1d   Yan, Zheng   ceph: periodicall...
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
  			list_del_init(&di->lease_list);
  			goto next;
  		}
  
  		ret = check(dentry, lwc);
  		if (ret & TOUCH) {
  			/* move it into tail of dir lease list */
  			__dentry_dir_lease_touch(mdsc, di);
  			if (!last)
  				last = dentry;
  		}
  		if (ret & DELETE) {
  			/* stale lease */
  			di->flags &= ~CEPH_DENTRY_REFERENCED;
  			if (dentry->d_lockref.count > 0) {
  				/* update_dentry_lease() will re-add
  				 * it to lease list, or
  				 * ceph_d_delete() will return 1 when
  				 * last reference is dropped */
  				list_del_init(&di->lease_list);
  			} else {
  				di->flags |= CEPH_DENTRY_SHRINK_LIST;
  				list_move_tail(&di->lease_list, &dispose);
  				dget_dlock(dentry);
  			}
  		}
  next:
  		spin_unlock(&dentry->d_lock);
  		if (ret & STOP)
  			break;
  	}
  	spin_unlock(&mdsc->dentry_list_lock);
  
  	while (!list_empty(&dispose)) {
  		di = list_first_entry(&dispose, struct ceph_dentry_info,
  				      lease_list);
  		dentry = di->dentry;
  		spin_lock(&dentry->d_lock);
  
  		list_del_init(&di->lease_list);
  		di->flags &= ~CEPH_DENTRY_SHRINK_LIST;
  		if (di->flags & CEPH_DENTRY_REFERENCED) {
  			spin_lock(&mdsc->dentry_list_lock);
  			if (di->flags & CEPH_DENTRY_LEASE_LIST) {
  				list_add_tail(&di->lease_list,
  					      &mdsc->dentry_leases);
  			} else {
  				__dentry_dir_lease_touch(mdsc, di);
  			}
  			spin_unlock(&mdsc->dentry_list_lock);
  		} else {
  			freed++;
  		}
  
  		spin_unlock(&dentry->d_lock);
  		/* ceph_d_delete() does the trick */
  		dput(dentry);
  	}
  	return freed;
  }
  
  static int __dentry_lease_check(struct dentry *dentry, void *arg)
  {
  	struct ceph_dentry_info *di = ceph_dentry(dentry);
  	int ret;
  
  	if (__dentry_lease_is_valid(di))
  		return STOP;
  	ret = __dir_lease_try_check(dentry);
  	if (ret == -EBUSY)
  		return KEEP;
  	if (ret > 0)
  		return TOUCH;
  	return DELETE;
  }
  
  static int __dir_lease_check(struct dentry *dentry, void *arg)
  {
  	struct ceph_lease_walk_control *lwc = arg;
  	struct ceph_dentry_info *di = ceph_dentry(dentry);
  
  	int ret = __dir_lease_try_check(dentry);
  	if (ret == -EBUSY)
  		return KEEP;
  	if (ret > 0) {
  		if (time_before(jiffies, di->time + lwc->dir_lease_ttl))
  			return STOP;
  		/* Move dentry to tail of dir lease list if we don't want
  		 * to delete it. So dentries in the list are checked in a
  		 * round robin manner */
fe33032da   Yan, Zheng   ceph: add mount o...
1455
1456
1457
1458
1459
1460
1461
  		if (!lwc->expire_dir_lease)
  			return TOUCH;
  		if (dentry->d_lockref.count > 0 ||
  		    (di->flags & CEPH_DENTRY_REFERENCED))
  			return TOUCH;
  		/* invalidate dir lease */
  		di->lease_shared_gen = 0;
37c4efc1d   Yan, Zheng   ceph: periodicall...
1462
1463
1464
1465
1466
1467
1468
  	}
  	return DELETE;
  }
  
  int ceph_trim_dentries(struct ceph_mds_client *mdsc)
  {
  	struct ceph_lease_walk_control lwc;
fe33032da   Yan, Zheng   ceph: add mount o...
1469
  	unsigned long count;
37c4efc1d   Yan, Zheng   ceph: periodicall...
1470
  	unsigned long freed;
fe33032da   Yan, Zheng   ceph: add mount o...
1471
1472
1473
1474
1475
1476
1477
  	spin_lock(&mdsc->caps_list_lock);
          if (mdsc->caps_use_max > 0 &&
              mdsc->caps_use_count > mdsc->caps_use_max)
  		count = mdsc->caps_use_count - mdsc->caps_use_max;
  	else
  		count = 0;
          spin_unlock(&mdsc->caps_list_lock);
37c4efc1d   Yan, Zheng   ceph: periodicall...
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
  	lwc.dir_lease = false;
  	lwc.nr_to_scan  = CEPH_CAPS_PER_RELEASE * 2;
  	freed = __dentry_leases_walk(mdsc, &lwc, __dentry_lease_check);
  	if (!lwc.nr_to_scan) /* more invalid leases */
  		return -EAGAIN;
  
  	if (lwc.nr_to_scan < CEPH_CAPS_PER_RELEASE)
  		lwc.nr_to_scan = CEPH_CAPS_PER_RELEASE;
  
  	lwc.dir_lease = true;
fe33032da   Yan, Zheng   ceph: add mount o...
1488
1489
  	lwc.expire_dir_lease = freed < count;
  	lwc.dir_lease_ttl = mdsc->fsc->mount_options->caps_wanted_delay_max * HZ;
37c4efc1d   Yan, Zheng   ceph: periodicall...
1490
1491
1492
1493
1494
1495
1496
1497
  	freed +=__dentry_leases_walk(mdsc, &lwc, __dir_lease_check);
  	if (!lwc.nr_to_scan) /* more to check */
  		return -EAGAIN;
  
  	return freed > 0 ? 1 : 0;
  }
  
  /*
81a6cf2d3   Sage Weil   ceph: invalidate ...
1498
1499
1500
1501
   * Ensure a dentry lease will no longer revalidate.
   */
  void ceph_invalidate_dentry_lease(struct dentry *dentry)
  {
37c4efc1d   Yan, Zheng   ceph: periodicall...
1502
  	struct ceph_dentry_info *di = ceph_dentry(dentry);
81a6cf2d3   Sage Weil   ceph: invalidate ...
1503
  	spin_lock(&dentry->d_lock);
37c4efc1d   Yan, Zheng   ceph: periodicall...
1504
1505
  	di->time = jiffies;
  	di->lease_shared_gen = 0;
f5e17aed3   Jeff Layton   ceph: track prima...
1506
  	di->flags &= ~CEPH_DENTRY_PRIMARY_LINK;
37c4efc1d   Yan, Zheng   ceph: periodicall...
1507
  	__dentry_lease_unlist(di);
81a6cf2d3   Sage Weil   ceph: invalidate ...
1508
1509
  	spin_unlock(&dentry->d_lock);
  }
2817b000b   Sage Weil   ceph: directory o...
1510
1511
1512
1513
1514
  
  /*
   * Check if dentry lease is valid.  If not, delete the lease.  Try to
   * renew if the least is more than half up.
   */
1e9c2eb68   Yan, Zheng   ceph: delete stal...
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
  static bool __dentry_lease_is_valid(struct ceph_dentry_info *di)
  {
  	struct ceph_mds_session *session;
  
  	if (!di->lease_gen)
  		return false;
  
  	session = di->lease_session;
  	if (session) {
  		u32 gen;
  		unsigned long ttl;
  
  		spin_lock(&session->s_gen_ttl_lock);
  		gen = session->s_cap_gen;
  		ttl = session->s_cap_ttl;
  		spin_unlock(&session->s_gen_ttl_lock);
  
  		if (di->lease_gen == gen &&
  		    time_before(jiffies, ttl) &&
  		    time_before(jiffies, di->time))
  			return true;
  	}
  	di->lease_gen = 0;
  	return false;
  }
8f2a98ef3   Yan, Zheng   ceph: ensure d_na...
1540
  static int dentry_lease_is_valid(struct dentry *dentry, unsigned int flags)
2817b000b   Sage Weil   ceph: directory o...
1541
1542
  {
  	struct ceph_dentry_info *di;
2817b000b   Sage Weil   ceph: directory o...
1543
  	struct ceph_mds_session *session = NULL;
2817b000b   Sage Weil   ceph: directory o...
1544
  	u32 seq = 0;
1e9c2eb68   Yan, Zheng   ceph: delete stal...
1545
  	int valid = 0;
2817b000b   Sage Weil   ceph: directory o...
1546
1547
1548
  
  	spin_lock(&dentry->d_lock);
  	di = ceph_dentry(dentry);
1e9c2eb68   Yan, Zheng   ceph: delete stal...
1549
1550
  	if (di && __dentry_lease_is_valid(di)) {
  		valid = 1;
2817b000b   Sage Weil   ceph: directory o...
1551

1e9c2eb68   Yan, Zheng   ceph: delete stal...
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
  		if (di->lease_renew_after &&
  		    time_after(jiffies, di->lease_renew_after)) {
  			/*
  			 * We should renew. If we're in RCU walk mode
  			 * though, we can't do that so just return
  			 * -ECHILD.
  			 */
  			if (flags & LOOKUP_RCU) {
  				valid = -ECHILD;
  			} else {
  				session = ceph_get_mds_session(di->lease_session);
  				seq = di->lease_seq;
  				di->lease_renew_after = 0;
  				di->lease_renew_from = jiffies;
2817b000b   Sage Weil   ceph: directory o...
1566
  			}
2817b000b   Sage Weil   ceph: directory o...
1567
1568
1569
1570
1571
  		}
  	}
  	spin_unlock(&dentry->d_lock);
  
  	if (session) {
8f2a98ef3   Yan, Zheng   ceph: ensure d_na...
1572
  		ceph_mdsc_lease_send_msg(session, dentry,
2817b000b   Sage Weil   ceph: directory o...
1573
1574
1575
1576
1577
1578
1579
1580
1581
  					 CEPH_MDS_LEASE_RENEW, seq);
  		ceph_put_mds_session(session);
  	}
  	dout("dentry_lease_is_valid - dentry %p = %d
  ", dentry, valid);
  	return valid;
  }
  
  /*
1e9c2eb68   Yan, Zheng   ceph: delete stal...
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
   * Called under dentry->d_lock.
   */
  static int __dir_lease_try_check(const struct dentry *dentry)
  {
  	struct ceph_dentry_info *di = ceph_dentry(dentry);
  	struct inode *dir;
  	struct ceph_inode_info *ci;
  	int valid = 0;
  
  	if (!di->lease_shared_gen)
  		return 0;
  	if (IS_ROOT(dentry))
  		return 0;
  
  	dir = d_inode(dentry->d_parent);
  	ci = ceph_inode(dir);
  
  	if (spin_trylock(&ci->i_ceph_lock)) {
  		if (atomic_read(&ci->i_shared_gen) == di->lease_shared_gen &&
  		    __ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 0))
  			valid = 1;
  		spin_unlock(&ci->i_ceph_lock);
  	} else {
  		valid = -EBUSY;
  	}
  
  	if (!valid)
  		di->lease_shared_gen = 0;
  	return valid;
  }
  
  /*
2817b000b   Sage Weil   ceph: directory o...
1614
1615
   * Check if directory-wide content lease/cap is valid.
   */
719a2514e   Yan, Zheng   ceph: consider in...
1616
1617
  static int dir_lease_is_valid(struct inode *dir, struct dentry *dentry,
  			      struct ceph_mds_client *mdsc)
2817b000b   Sage Weil   ceph: directory o...
1618
1619
  {
  	struct ceph_inode_info *ci = ceph_inode(dir);
feab6ac25   Yan, Zheng   ceph: fix dir_lea...
1620
1621
  	int valid;
  	int shared_gen;
2817b000b   Sage Weil   ceph: directory o...
1622

be655596b   Sage Weil   ceph: use i_ceph_...
1623
  	spin_lock(&ci->i_ceph_lock);
feab6ac25   Yan, Zheng   ceph: fix dir_lea...
1624
  	valid = __ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1);
719a2514e   Yan, Zheng   ceph: consider in...
1625
1626
1627
1628
  	if (valid) {
  		__ceph_touch_fmode(ci, mdsc, CEPH_FILE_MODE_RD);
  		shared_gen = atomic_read(&ci->i_shared_gen);
  	}
be655596b   Sage Weil   ceph: use i_ceph_...
1629
  	spin_unlock(&ci->i_ceph_lock);
feab6ac25   Yan, Zheng   ceph: fix dir_lea...
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
  	if (valid) {
  		struct ceph_dentry_info *di;
  		spin_lock(&dentry->d_lock);
  		di = ceph_dentry(dentry);
  		if (dir == d_inode(dentry->d_parent) &&
  		    di && di->lease_shared_gen == shared_gen)
  			__ceph_dentry_dir_lease_touch(di);
  		else
  			valid = 0;
  		spin_unlock(&dentry->d_lock);
  	}
  	dout("dir_lease_is_valid dir %p v%u dentry %p = %d
  ",
  	     dir, (unsigned)atomic_read(&ci->i_shared_gen), dentry, valid);
2817b000b   Sage Weil   ceph: directory o...
1644
1645
1646
1647
1648
1649
  	return valid;
  }
  
  /*
   * Check if cached dentry can be trusted.
   */
0b728e191   Al Viro   stop passing name...
1650
  static int ceph_d_revalidate(struct dentry *dentry, unsigned int flags)
2817b000b   Sage Weil   ceph: directory o...
1651
  {
bf1c6aca9   Sage Weil   ceph: protect d_p...
1652
  	int valid = 0;
641235d8f   Yan, Zheng   ceph: kill ceph_g...
1653
  	struct dentry *parent;
aa8dd8167   Al Viro   ceph: fix RCU cas...
1654
  	struct inode *dir, *inode;
719a2514e   Yan, Zheng   ceph: consider in...
1655
  	struct ceph_mds_client *mdsc;
34286d666   Nick Piggin   fs: rcu-walk awar...
1656

f49d1e058   Jeff Layton   ceph: handle LOOK...
1657
  	if (flags & LOOKUP_RCU) {
52953d559   Seraphime Kirkovski   ceph: cleanup ACC...
1658
  		parent = READ_ONCE(dentry->d_parent);
f49d1e058   Jeff Layton   ceph: handle LOOK...
1659
1660
1661
  		dir = d_inode_rcu(parent);
  		if (!dir)
  			return -ECHILD;
aa8dd8167   Al Viro   ceph: fix RCU cas...
1662
  		inode = d_inode_rcu(dentry);
f49d1e058   Jeff Layton   ceph: handle LOOK...
1663
1664
1665
  	} else {
  		parent = dget_parent(dentry);
  		dir = d_inode(parent);
aa8dd8167   Al Viro   ceph: fix RCU cas...
1666
  		inode = d_inode(dentry);
f49d1e058   Jeff Layton   ceph: handle LOOK...
1667
  	}
34286d666   Nick Piggin   fs: rcu-walk awar...
1668

0eb308531   Xiubo Li   ceph: print dentr...
1669
1670
  	dout("d_revalidate %p '%pd' inode %p offset 0x%llx
  ", dentry,
aa8dd8167   Al Viro   ceph: fix RCU cas...
1671
  	     dentry, inode, ceph_dentry(dentry)->offset);
2817b000b   Sage Weil   ceph: directory o...
1672

719a2514e   Yan, Zheng   ceph: consider in...
1673
  	mdsc = ceph_sb_to_client(dir->i_sb)->mdsc;
2817b000b   Sage Weil   ceph: directory o...
1674
1675
  	/* always trust cached snapped dentries, snapdir dentry */
  	if (ceph_snap(dir) != CEPH_NOSNAP) {
a455589f1   Al Viro   assorted conversi...
1676
1677
  		dout("d_revalidate %p '%pd' inode %p is SNAPPED
  ", dentry,
aa8dd8167   Al Viro   ceph: fix RCU cas...
1678
  		     dentry, inode);
bf1c6aca9   Sage Weil   ceph: protect d_p...
1679
  		valid = 1;
aa8dd8167   Al Viro   ceph: fix RCU cas...
1680
  	} else if (inode && ceph_snap(inode) == CEPH_SNAPDIR) {
bf1c6aca9   Sage Weil   ceph: protect d_p...
1681
  		valid = 1;
14fb9c9ef   Jeff Layton   ceph: allow dentr...
1682
  	} else {
8f2a98ef3   Yan, Zheng   ceph: ensure d_na...
1683
  		valid = dentry_lease_is_valid(dentry, flags);
14fb9c9ef   Jeff Layton   ceph: allow dentr...
1684
1685
  		if (valid == -ECHILD)
  			return valid;
719a2514e   Yan, Zheng   ceph: consider in...
1686
  		if (valid || dir_lease_is_valid(dir, dentry, mdsc)) {
aa8dd8167   Al Viro   ceph: fix RCU cas...
1687
1688
  			if (inode)
  				valid = ceph_is_any_caps(inode);
14fb9c9ef   Jeff Layton   ceph: allow dentr...
1689
1690
1691
  			else
  				valid = 1;
  		}
2817b000b   Sage Weil   ceph: directory o...
1692
  	}
2817b000b   Sage Weil   ceph: directory o...
1693

200fd27c8   Yan, Zheng   ceph: use lookup ...
1694
  	if (!valid) {
200fd27c8   Yan, Zheng   ceph: use lookup ...
1695
  		struct ceph_mds_request *req;
1097680d7   Jeff Layton   ceph: fix endiann...
1696
1697
  		int op, err;
  		u32 mask;
200fd27c8   Yan, Zheng   ceph: use lookup ...
1698

f49d1e058   Jeff Layton   ceph: handle LOOK...
1699
1700
  		if (flags & LOOKUP_RCU)
  			return -ECHILD;
f9009efac   Xiubo Li   ceph: add dentry ...
1701
  		percpu_counter_inc(&mdsc->metric.d_lease_mis);
200fd27c8   Yan, Zheng   ceph: use lookup ...
1702
  		op = ceph_snap(dir) == CEPH_SNAPDIR ?
5eb9f6040   Jeff Layton   ceph: do a LOOKUP...
1703
  			CEPH_MDS_OP_LOOKUPSNAP : CEPH_MDS_OP_LOOKUP;
200fd27c8   Yan, Zheng   ceph: use lookup ...
1704
1705
1706
  		req = ceph_mdsc_create_request(mdsc, op, USE_ANY_MDS);
  		if (!IS_ERR(req)) {
  			req->r_dentry = dget(dentry);
5eb9f6040   Jeff Layton   ceph: do a LOOKUP...
1707
1708
  			req->r_num_caps = 2;
  			req->r_parent = dir;
200fd27c8   Yan, Zheng   ceph: use lookup ...
1709
1710
1711
1712
  
  			mask = CEPH_STAT_CAP_INODE | CEPH_CAP_AUTH_SHARED;
  			if (ceph_security_xattr_wanted(dir))
  				mask |= CEPH_CAP_XATTR_SHARED;
1097680d7   Jeff Layton   ceph: fix endiann...
1713
  			req->r_args.getattr.mask = cpu_to_le32(mask);
200fd27c8   Yan, Zheng   ceph: use lookup ...
1714

200fd27c8   Yan, Zheng   ceph: use lookup ...
1715
  			err = ceph_mdsc_do_request(mdsc, NULL, req);
c3f4688a0   Jeff Layton   ceph: don't set r...
1716
1717
1718
1719
1720
1721
1722
1723
1724
  			switch (err) {
  			case 0:
  				if (d_really_is_positive(dentry) &&
  				    d_inode(dentry) == req->r_target_inode)
  					valid = 1;
  				break;
  			case -ENOENT:
  				if (d_really_is_negative(dentry))
  					valid = 1;
df561f668   Gustavo A. R. Silva   treewide: Use fal...
1725
  				fallthrough;
c3f4688a0   Jeff Layton   ceph: don't set r...
1726
1727
  			default:
  				break;
200fd27c8   Yan, Zheng   ceph: use lookup ...
1728
1729
1730
1731
1732
1733
  			}
  			ceph_mdsc_put_request(req);
  			dout("d_revalidate %p lookup result=%d
  ",
  			     dentry, err);
  		}
f9009efac   Xiubo Li   ceph: add dentry ...
1734
1735
  	} else {
  		percpu_counter_inc(&mdsc->metric.d_lease_hit);
200fd27c8   Yan, Zheng   ceph: use lookup ...
1736
  	}
bf1c6aca9   Sage Weil   ceph: protect d_p...
1737
1738
  	dout("d_revalidate %p %s
  ", dentry, valid ? "valid" : "invalid");
37c4efc1d   Yan, Zheng   ceph: periodicall...
1739
  	if (!valid)
9215aeea6   Yan, Zheng   ceph: check inode...
1740
  		ceph_dir_clear_complete(dir);
641235d8f   Yan, Zheng   ceph: kill ceph_g...
1741

f49d1e058   Jeff Layton   ceph: handle LOOK...
1742
1743
  	if (!(flags & LOOKUP_RCU))
  		dput(parent);
bf1c6aca9   Sage Weil   ceph: protect d_p...
1744
  	return valid;
2817b000b   Sage Weil   ceph: directory o...
1745
1746
1747
  }
  
  /*
1e9c2eb68   Yan, Zheng   ceph: delete stal...
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
   * Delete unused dentry that doesn't have valid lease
   *
   * Called under dentry->d_lock.
   */
  static int ceph_d_delete(const struct dentry *dentry)
  {
  	struct ceph_dentry_info *di;
  
  	/* won't release caps */
  	if (d_really_is_negative(dentry))
  		return 0;
  	if (ceph_snap(d_inode(dentry)) != CEPH_NOSNAP)
  		return 0;
  	/* vaild lease? */
  	di = ceph_dentry(dentry);
  	if (di) {
  		if (__dentry_lease_is_valid(di))
  			return 0;
  		if (__dir_lease_try_check(dentry))
  			return 0;
  	}
  	return 1;
  }
  
  /*
147851d2d   Sage Weil   ceph: rename dent...
1773
   * Release our ceph_dentry_info.
2817b000b   Sage Weil   ceph: directory o...
1774
   */
147851d2d   Sage Weil   ceph: rename dent...
1775
  static void ceph_d_release(struct dentry *dentry)
2817b000b   Sage Weil   ceph: directory o...
1776
1777
  {
  	struct ceph_dentry_info *di = ceph_dentry(dentry);
f9009efac   Xiubo Li   ceph: add dentry ...
1778
  	struct ceph_fs_client *fsc = ceph_sb_to_client(dentry->d_sb);
2817b000b   Sage Weil   ceph: directory o...
1779

147851d2d   Sage Weil   ceph: rename dent...
1780
1781
  	dout("d_release %p
  ", dentry);
5b484a513   Jeff Layton   ceph: clear d_fsi...
1782

f9009efac   Xiubo Li   ceph: add dentry ...
1783
  	atomic64_dec(&fsc->mdsc->metric.total_dentries);
5b484a513   Jeff Layton   ceph: clear d_fsi...
1784
  	spin_lock(&dentry->d_lock);
37c4efc1d   Yan, Zheng   ceph: periodicall...
1785
  	__dentry_lease_unlist(di);
5b484a513   Jeff Layton   ceph: clear d_fsi...
1786
1787
  	dentry->d_fsdata = NULL;
  	spin_unlock(&dentry->d_lock);
3d8eb7a94   Sage Weil   ceph: remove unne...
1788
1789
1790
  	if (di->lease_session)
  		ceph_put_mds_session(di->lease_session);
  	kmem_cache_free(ceph_dentry_cachep, di);
2817b000b   Sage Weil   ceph: directory o...
1791
  }
b58dc4100   Sage Weil   ceph: clear paren...
1792
1793
1794
1795
1796
1797
1798
1799
  /*
   * When the VFS prunes a dentry from the cache, we need to clear the
   * complete flag on the parent directory.
   *
   * Called under dentry->d_lock.
   */
  static void ceph_d_prune(struct dentry *dentry)
  {
5495c2d04   Yan, Zheng   ceph: avoid deref...
1800
1801
1802
1803
1804
  	struct ceph_inode_info *dir_ci;
  	struct ceph_dentry_info *di;
  
  	dout("ceph_d_prune %pd %p
  ", dentry, dentry);
b58dc4100   Sage Weil   ceph: clear paren...
1805
1806
  
  	/* do we have a valid parent? */
8842b3be9   Sage Weil   ceph: clean up us...
1807
  	if (IS_ROOT(dentry))
b58dc4100   Sage Weil   ceph: clear paren...
1808
  		return;
5495c2d04   Yan, Zheng   ceph: avoid deref...
1809
1810
1811
  	/* we hold d_lock, so d_parent is stable */
  	dir_ci = ceph_inode(d_inode(dentry->d_parent));
  	if (dir_ci->i_vino.snap == CEPH_SNAPDIR)
b58dc4100   Sage Weil   ceph: clear paren...
1812
  		return;
2817b000b   Sage Weil   ceph: directory o...
1813

5495c2d04   Yan, Zheng   ceph: avoid deref...
1814
1815
  	/* who calls d_delete() should also disable dcache readdir */
  	if (d_really_is_negative(dentry))
18fc8abdb   Al Viro   ceph: unify dentr...
1816
  		return;
5495c2d04   Yan, Zheng   ceph: avoid deref...
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
  	/* d_fsdata does not get cleared until d_release */
  	if (!d_unhashed(dentry)) {
  		__ceph_dir_clear_complete(dir_ci);
  		return;
  	}
  
  	/* Disable dcache readdir just in case that someone called d_drop()
  	 * or d_invalidate(), but MDS didn't revoke CEPH_CAP_FILE_SHARED
  	 * properly (dcache readdir is still enabled) */
  	di = ceph_dentry(dentry);
  	if (di->offset > 0 &&
  	    di->lease_shared_gen == atomic_read(&dir_ci->i_shared_gen))
  		__ceph_dir_clear_ordered(dir_ci);
b58dc4100   Sage Weil   ceph: clear paren...
1830
  }
2817b000b   Sage Weil   ceph: directory o...
1831
1832
1833
1834
1835
1836
1837
1838
  
  /*
   * read() on a dir.  This weird interface hack only works if mounted
   * with '-o dirstat'.
   */
  static ssize_t ceph_read_dir(struct file *file, char __user *buf, size_t size,
  			     loff_t *ppos)
  {
bb48bd4dc   Chengguang Xu   ceph: optimize me...
1839
  	struct ceph_dir_file_info *dfi = file->private_data;
496ad9aa8   Al Viro   new helper: file_...
1840
  	struct inode *inode = file_inode(file);
2817b000b   Sage Weil   ceph: directory o...
1841
1842
  	struct ceph_inode_info *ci = ceph_inode(inode);
  	int left;
ae5980830   Sage Weil   ceph: use snprint...
1843
  	const int bufsize = 1024;
2817b000b   Sage Weil   ceph: directory o...
1844

3d14c5d2b   Yehuda Sadeh   ceph: factor out ...
1845
  	if (!ceph_test_mount_opt(ceph_sb_to_client(inode->i_sb), DIRSTAT))
2817b000b   Sage Weil   ceph: directory o...
1846
  		return -EISDIR;
bb48bd4dc   Chengguang Xu   ceph: optimize me...
1847
1848
1849
  	if (!dfi->dir_info) {
  		dfi->dir_info = kmalloc(bufsize, GFP_KERNEL);
  		if (!dfi->dir_info)
2817b000b   Sage Weil   ceph: directory o...
1850
  			return -ENOMEM;
bb48bd4dc   Chengguang Xu   ceph: optimize me...
1851
1852
  		dfi->dir_info_len =
  			snprintf(dfi->dir_info, bufsize,
2817b000b   Sage Weil   ceph: directory o...
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
  				"entries:   %20lld
  "
  				" files:    %20lld
  "
  				" subdirs:  %20lld
  "
  				"rentries:  %20lld
  "
  				" rfiles:   %20lld
  "
  				" rsubdirs: %20lld
  "
  				"rbytes:    %20lld
  "
9bbeab41c   Arnd Bergmann   ceph: use timespe...
1867
1868
  				"rctime:    %10lld.%09ld
  ",
2817b000b   Sage Weil   ceph: directory o...
1869
1870
1871
1872
1873
1874
1875
  				ci->i_files + ci->i_subdirs,
  				ci->i_files,
  				ci->i_subdirs,
  				ci->i_rfiles + ci->i_rsubdirs,
  				ci->i_rfiles,
  				ci->i_rsubdirs,
  				ci->i_rbytes,
9bbeab41c   Arnd Bergmann   ceph: use timespe...
1876
1877
  				ci->i_rctime.tv_sec,
  				ci->i_rctime.tv_nsec);
2817b000b   Sage Weil   ceph: directory o...
1878
  	}
bb48bd4dc   Chengguang Xu   ceph: optimize me...
1879
  	if (*ppos >= dfi->dir_info_len)
2817b000b   Sage Weil   ceph: directory o...
1880
  		return 0;
bb48bd4dc   Chengguang Xu   ceph: optimize me...
1881
1882
  	size = min_t(unsigned, size, dfi->dir_info_len-*ppos);
  	left = copy_to_user(buf, dfi->dir_info + *ppos, size);
2817b000b   Sage Weil   ceph: directory o...
1883
1884
1885
1886
1887
  	if (left == size)
  		return -EFAULT;
  	*ppos += (size - left);
  	return size - left;
  }
2817b000b   Sage Weil   ceph: directory o...
1888

2817b000b   Sage Weil   ceph: directory o...
1889

6c0f3af72   Sage Weil   ceph: add dir_lay...
1890
1891
1892
1893
  /*
   * Return name hash for a given dentry.  This is dependent on
   * the parent directory's hash function.
   */
e5f86dc37   Sage Weil   ceph: avoid d_par...
1894
  unsigned ceph_dentry_hash(struct inode *dir, struct dentry *dn)
6c0f3af72   Sage Weil   ceph: add dir_lay...
1895
  {
6c0f3af72   Sage Weil   ceph: add dir_lay...
1896
  	struct ceph_inode_info *dci = ceph_inode(dir);
76a495d66   Jeff Layton   ceph: ensure d_na...
1897
  	unsigned hash;
6c0f3af72   Sage Weil   ceph: add dir_lay...
1898
1899
1900
1901
1902
1903
1904
  
  	switch (dci->i_dir_layout.dl_dir_hash) {
  	case 0:	/* for backward compat */
  	case CEPH_STR_HASH_LINUX:
  		return dn->d_name.hash;
  
  	default:
76a495d66   Jeff Layton   ceph: ensure d_na...
1905
1906
  		spin_lock(&dn->d_lock);
  		hash = ceph_str_hash(dci->i_dir_layout.dl_dir_hash,
6c0f3af72   Sage Weil   ceph: add dir_lay...
1907
  				     dn->d_name.name, dn->d_name.len);
76a495d66   Jeff Layton   ceph: ensure d_na...
1908
1909
  		spin_unlock(&dn->d_lock);
  		return hash;
6c0f3af72   Sage Weil   ceph: add dir_lay...
1910
1911
  	}
  }
2817b000b   Sage Weil   ceph: directory o...
1912
1913
  const struct file_operations ceph_dir_fops = {
  	.read = ceph_read_dir,
77acfa29e   Al Viro   [readdir] convert...
1914
  	.iterate = ceph_readdir,
2817b000b   Sage Weil   ceph: directory o...
1915
1916
1917
1918
  	.llseek = ceph_dir_llseek,
  	.open = ceph_open,
  	.release = ceph_release,
  	.unlocked_ioctl = ceph_ioctl,
18bd6caae   Arnd Bergmann   ceph: fix compat_...
1919
  	.compat_ioctl = compat_ptr_ioctl,
da819c815   Yan, Zheng   ceph: fix directo...
1920
  	.fsync = ceph_fsync,
597817ddb   Yan, Zheng   ceph: support fil...
1921
1922
  	.lock = ceph_lock,
  	.flock = ceph_flock,
2817b000b   Sage Weil   ceph: directory o...
1923
  };
38c48b5f0   Yan, Zheng   ceph: provide sep...
1924
1925
1926
1927
1928
1929
  const struct file_operations ceph_snapdir_fops = {
  	.iterate = ceph_readdir,
  	.llseek = ceph_dir_llseek,
  	.open = ceph_open,
  	.release = ceph_release,
  };
2817b000b   Sage Weil   ceph: directory o...
1930
1931
1932
1933
1934
  const struct inode_operations ceph_dir_iops = {
  	.lookup = ceph_lookup,
  	.permission = ceph_permission,
  	.getattr = ceph_getattr,
  	.setattr = ceph_setattr,
2817b000b   Sage Weil   ceph: directory o...
1935
  	.listxattr = ceph_listxattr,
7221fe4c2   Guangliang Zhao   ceph: add acl for...
1936
  	.get_acl = ceph_get_acl,
72466d0b9   Sage Weil   ceph: fix posix A...
1937
  	.set_acl = ceph_set_acl,
2817b000b   Sage Weil   ceph: directory o...
1938
1939
1940
1941
1942
1943
1944
1945
  	.mknod = ceph_mknod,
  	.symlink = ceph_symlink,
  	.mkdir = ceph_mkdir,
  	.link = ceph_link,
  	.unlink = ceph_unlink,
  	.rmdir = ceph_unlink,
  	.rename = ceph_rename,
  	.create = ceph_create,
2d83bde9a   Miklos Szeredi   ceph: implement i...
1946
  	.atomic_open = ceph_atomic_open,
2817b000b   Sage Weil   ceph: directory o...
1947
  };
38c48b5f0   Yan, Zheng   ceph: provide sep...
1948
1949
1950
1951
1952
1953
  const struct inode_operations ceph_snapdir_iops = {
  	.lookup = ceph_lookup,
  	.permission = ceph_permission,
  	.getattr = ceph_getattr,
  	.mkdir = ceph_mkdir,
  	.rmdir = ceph_unlink,
0ea611a3b   Yan, Zheng   ceph: rename snap...
1954
  	.rename = ceph_rename,
38c48b5f0   Yan, Zheng   ceph: provide sep...
1955
  };
52dfb8ac0   Sage Weil   ceph: constify de...
1956
  const struct dentry_operations ceph_dentry_ops = {
2817b000b   Sage Weil   ceph: directory o...
1957
  	.d_revalidate = ceph_d_revalidate,
1e9c2eb68   Yan, Zheng   ceph: delete stal...
1958
  	.d_delete = ceph_d_delete,
147851d2d   Sage Weil   ceph: rename dent...
1959
  	.d_release = ceph_d_release,
b58dc4100   Sage Weil   ceph: clear paren...
1960
  	.d_prune = ceph_d_prune,
ad5cb123f   Al Viro   ceph: switch to u...
1961
  	.d_init = ceph_d_init,
2817b000b   Sage Weil   ceph: directory o...
1962
  };