Blame view
fs/ceph/dir.c
51.1 KB
b24413180
|
1 |
// SPDX-License-Identifier: GPL-2.0 |
3d14c5d2b
|
2 |
#include <linux/ceph/ceph_debug.h> |
2817b000b
|
3 4 |
#include <linux/spinlock.h> |
2817b000b
|
5 |
#include <linux/namei.h> |
5a0e3ad6a
|
6 |
#include <linux/slab.h> |
2817b000b
|
7 |
#include <linux/sched.h> |
2cdeb1e47
|
8 |
#include <linux/xattr.h> |
2817b000b
|
9 10 |
#include "super.h" |
3d14c5d2b
|
11 |
#include "mds_client.h" |
2817b000b
|
12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 |
/* * Directory operations: readdir, lookup, create, link, unlink, * rename, etc. */ /* * Ceph MDS operations are specified in terms of a base ino and * relative path. Thus, the client can specify an operation on a * specific inode (e.g., a getattr due to fstat(2)), or as a path * relative to, say, the root directory. * * Normally, we limit ourselves to strict inode ops (no path component) * or dentry operations (a single path component relative to an ino). The * exception to this is open_root_dentry(), which will open the mount * point by name. */ |
52dfb8ac0
|
29 |
const struct dentry_operations ceph_dentry_ops; |
2817b000b
|
30 |
|
37c4efc1d
|
31 32 |
static bool __dentry_lease_is_valid(struct ceph_dentry_info *di); static int __dir_lease_try_check(const struct dentry *dentry); |
2817b000b
|
33 34 35 |
/* * Initialize ceph dentry state. */ |
ad5cb123f
|
36 |
static int ceph_d_init(struct dentry *dentry) |
2817b000b
|
37 38 |
{ struct ceph_dentry_info *di; |
2678da88f
|
39 |
struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(dentry->d_sb); |
2817b000b
|
40 |
|
99ec26977
|
41 |
di = kmem_cache_zalloc(ceph_dentry_cachep, GFP_KERNEL); |
2817b000b
|
42 43 |
if (!di) return -ENOMEM; /* oh well */ |
2817b000b
|
44 45 |
di->dentry = dentry; di->lease_session = NULL; |
9b16f03c4
|
46 |
di->time = jiffies; |
48d0cbd12
|
47 |
dentry->d_fsdata = di; |
37c4efc1d
|
48 |
INIT_LIST_HEAD(&di->lease_list); |
f9009efac
|
49 50 |
atomic64_inc(&mdsc->metric.total_dentries); |
2817b000b
|
51 52 |
return 0; } |
2817b000b
|
53 |
/* |
f3c4ebe65
|
54 55 56 57 58 59 |
* for f_pos for readdir: * - hash order: * (0xff << 52) | ((24 bits hash) << 28) | * (the nth entry has hash collision); * - frag+name order; * ((frag value) << 28) | (the nth entry in frag); |
2817b000b
|
60 |
*/ |
f3c4ebe65
|
61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 |
#define OFFSET_BITS 28 #define OFFSET_MASK ((1 << OFFSET_BITS) - 1) #define HASH_ORDER (0xffull << (OFFSET_BITS + 24)) loff_t ceph_make_fpos(unsigned high, unsigned off, bool hash_order) { loff_t fpos = ((loff_t)high << 28) | (loff_t)off; if (hash_order) fpos |= HASH_ORDER; return fpos; } static bool is_hash_order(loff_t p) { return (p & HASH_ORDER) == HASH_ORDER; } |
2817b000b
|
76 77 |
static unsigned fpos_frag(loff_t p) { |
f3c4ebe65
|
78 |
return p >> OFFSET_BITS; |
2817b000b
|
79 |
} |
f3c4ebe65
|
80 81 82 83 84 |
static unsigned fpos_hash(loff_t p) { return ceph_frag_value(fpos_frag(p)); } |
2817b000b
|
85 86 |
static unsigned fpos_off(loff_t p) { |
f3c4ebe65
|
87 |
return p & OFFSET_MASK; |
2817b000b
|
88 |
} |
4d5f5df67
|
89 90 91 92 93 94 95 |
static int fpos_cmp(loff_t l, loff_t r) { int v = ceph_frag_compare(fpos_frag(l), fpos_frag(r)); if (v) return v; return (int)(fpos_off(l) - fpos_off(r)); } |
2817b000b
|
96 |
/* |
fdd4e1583
|
97 98 99 100 101 |
* make note of the last dentry we read, so we can * continue at the same lexicographical point, * regardless of what dir changes take place on the * server. */ |
bb48bd4dc
|
102 |
static int note_last_dentry(struct ceph_dir_file_info *dfi, const char *name, |
fdd4e1583
|
103 104 105 106 107 |
int len, unsigned next_offset) { char *buf = kmalloc(len+1, GFP_KERNEL); if (!buf) return -ENOMEM; |
bb48bd4dc
|
108 109 110 111 112 113 114 |
kfree(dfi->last_name); dfi->last_name = buf; memcpy(dfi->last_name, name, len); dfi->last_name[len] = 0; dfi->next_offset = next_offset; dout("note_last_dentry '%s' ", dfi->last_name); |
fdd4e1583
|
115 116 |
return 0; } |
c530cd24c
|
117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 |
static struct dentry * __dcache_find_get_entry(struct dentry *parent, u64 idx, struct ceph_readdir_cache_control *cache_ctl) { struct inode *dir = d_inode(parent); struct dentry *dentry; unsigned idx_mask = (PAGE_SIZE / sizeof(struct dentry *)) - 1; loff_t ptr_pos = idx * sizeof(struct dentry *); pgoff_t ptr_pgoff = ptr_pos >> PAGE_SHIFT; if (ptr_pos >= i_size_read(dir)) return NULL; if (!cache_ctl->page || ptr_pgoff != page_index(cache_ctl->page)) { ceph_readdir_cache_release(cache_ctl); cache_ctl->page = find_lock_page(&dir->i_data, ptr_pgoff); if (!cache_ctl->page) { dout(" page %lu not found ", ptr_pgoff); return ERR_PTR(-EAGAIN); } /* reading/filling the cache are serialized by i_mutex, no need to use page lock */ unlock_page(cache_ctl->page); cache_ctl->dentries = kmap(cache_ctl->page); } cache_ctl->index = idx & idx_mask; rcu_read_lock(); spin_lock(&parent->d_lock); /* check i_size again here, because empty directory can be * marked as complete while not holding the i_mutex. */ if (ceph_dir_is_complete_ordered(dir) && ptr_pos < i_size_read(dir)) dentry = cache_ctl->dentries[cache_ctl->index]; else dentry = NULL; spin_unlock(&parent->d_lock); if (dentry && !lockref_get_not_dead(&dentry->d_lockref)) dentry = NULL; rcu_read_unlock(); return dentry ? : ERR_PTR(-EAGAIN); } |
fdd4e1583
|
161 |
/* |
2817b000b
|
162 163 |
* When possible, we try to satisfy a readdir by peeking at the * dcache. We make this work by carefully ordering dentries on |
946e51f2b
|
164 |
* d_child when we initially get results back from the MDS, and |
2817b000b
|
165 166 167 |
* falling back to a "normal" sync readdir if any dentries in the dir * are dropped. * |
2f276c511
|
168 |
* Complete dir indicates that we have all dentries in the dir. It is |
2817b000b
|
169 170 171 |
* defined IFF we hold CEPH_CAP_FILE_SHARED (which will be revoked by * the MDS if/when the directory is modified). */ |
a30be7cb2
|
172 |
static int __dcache_readdir(struct file *file, struct dir_context *ctx, |
97aeb6bf9
|
173 |
int shared_gen) |
2817b000b
|
174 |
{ |
bb48bd4dc
|
175 |
struct ceph_dir_file_info *dfi = file->private_data; |
b583043e9
|
176 |
struct dentry *parent = file->f_path.dentry; |
2b0143b5c
|
177 |
struct inode *dir = d_inode(parent); |
fdd4e1583
|
178 |
struct dentry *dentry, *last = NULL; |
2817b000b
|
179 |
struct ceph_dentry_info *di; |
fdd4e1583
|
180 |
struct ceph_readdir_cache_control cache_ctl = {}; |
c530cd24c
|
181 182 |
u64 idx = 0; int err = 0; |
2817b000b
|
183 |
|
97aeb6bf9
|
184 185 |
dout("__dcache_readdir %p v%u at %llx ", dir, (unsigned)shared_gen, ctx->pos); |
2817b000b
|
186 |
|
c530cd24c
|
187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 |
/* search start position */ if (ctx->pos > 2) { u64 count = div_u64(i_size_read(dir), sizeof(struct dentry *)); while (count > 0) { u64 step = count >> 1; dentry = __dcache_find_get_entry(parent, idx + step, &cache_ctl); if (!dentry) { /* use linar search */ idx = 0; break; } if (IS_ERR(dentry)) { err = PTR_ERR(dentry); goto out; } di = ceph_dentry(dentry); spin_lock(&dentry->d_lock); if (fpos_cmp(di->offset, ctx->pos) < 0) { idx += step + 1; count -= step + 1; } else { count = step; } spin_unlock(&dentry->d_lock); dput(dentry); } |
2817b000b
|
214 |
|
c530cd24c
|
215 216 |
dout("__dcache_readdir %p cache idx %llu ", dir, idx); |
2817b000b
|
217 |
} |
fdd4e1583
|
218 |
|
c530cd24c
|
219 220 221 222 |
for (;;) { bool emit_dentry = false; dentry = __dcache_find_get_entry(parent, idx++, &cache_ctl); if (!dentry) { |
bb48bd4dc
|
223 |
dfi->file_info.flags |= CEPH_F_ATEND; |
fdd4e1583
|
224 225 |
err = 0; break; |
2817b000b
|
226 |
} |
c530cd24c
|
227 228 229 |
if (IS_ERR(dentry)) { err = PTR_ERR(dentry); goto out; |
fdd4e1583
|
230 |
} |
fdd4e1583
|
231 |
spin_lock(&dentry->d_lock); |
5495c2d04
|
232 233 234 235 236 237 238 239 240 241 |
di = ceph_dentry(dentry); if (d_unhashed(dentry) || d_really_is_negative(dentry) || di->lease_shared_gen != shared_gen) { spin_unlock(&dentry->d_lock); dput(dentry); err = -EAGAIN; goto out; } if (fpos_cmp(ctx->pos, di->offset) <= 0) { |
37c4efc1d
|
242 |
__ceph_dentry_dir_lease_touch(di); |
fdd4e1583
|
243 244 |
emit_dentry = true; } |
da5029563
|
245 |
spin_unlock(&dentry->d_lock); |
2817b000b
|
246 |
|
fdd4e1583
|
247 |
if (emit_dentry) { |
f3c4ebe65
|
248 249 |
dout(" %llx dentry %p %pd %p ", di->offset, |
fdd4e1583
|
250 251 252 |
dentry, dentry, d_inode(dentry)); ctx->pos = di->offset; if (!dir_emit(ctx, dentry->d_name.name, |
ebce3eb2f
|
253 |
dentry->d_name.len, ceph_present_inode(d_inode(dentry)), |
fdd4e1583
|
254 255 256 257 258 259 |
d_inode(dentry)->i_mode >> 12)) { dput(dentry); err = 0; break; } ctx->pos++; |
0081bd83c
|
260 |
|
fdd4e1583
|
261 262 263 264 265 |
if (last) dput(last); last = dentry; } else { dput(dentry); |
2817b000b
|
266 |
} |
fdd4e1583
|
267 |
} |
c530cd24c
|
268 |
out: |
fdd4e1583
|
269 270 271 272 |
ceph_readdir_cache_release(&cache_ctl); if (last) { int ret; di = ceph_dentry(last); |
bb48bd4dc
|
273 |
ret = note_last_dentry(dfi, last->d_name.name, last->d_name.len, |
fdd4e1583
|
274 275 276 |
fpos_off(di->offset) + 1); if (ret < 0) err = ret; |
2817b000b
|
277 |
dput(last); |
84583cfb9
|
278 |
/* last_name no longer match cache index */ |
bb48bd4dc
|
279 280 281 |
if (dfi->readdir_cache_idx >= 0) { dfi->readdir_cache_idx = -1; dfi->dir_release_count = 0; |
84583cfb9
|
282 |
} |
fdd4e1583
|
283 |
} |
2817b000b
|
284 285 |
return err; } |
bb48bd4dc
|
286 |
static bool need_send_readdir(struct ceph_dir_file_info *dfi, loff_t pos) |
f3c4ebe65
|
287 |
{ |
bb48bd4dc
|
288 |
if (!dfi->last_readdir) |
f3c4ebe65
|
289 290 |
return true; if (is_hash_order(pos)) |
bb48bd4dc
|
291 |
return !ceph_frag_contains_value(dfi->frag, fpos_hash(pos)); |
f3c4ebe65
|
292 |
else |
bb48bd4dc
|
293 |
return dfi->frag != fpos_frag(pos); |
f3c4ebe65
|
294 |
} |
77acfa29e
|
295 |
static int ceph_readdir(struct file *file, struct dir_context *ctx) |
2817b000b
|
296 |
{ |
bb48bd4dc
|
297 |
struct ceph_dir_file_info *dfi = file->private_data; |
77acfa29e
|
298 |
struct inode *inode = file_inode(file); |
2817b000b
|
299 |
struct ceph_inode_info *ci = ceph_inode(inode); |
3d14c5d2b
|
300 301 |
struct ceph_fs_client *fsc = ceph_inode_to_client(inode); struct ceph_mds_client *mdsc = fsc->mdsc; |
8974eebd3
|
302 |
int i; |
2817b000b
|
303 |
int err; |
b50c2de51
|
304 |
unsigned frag = -1; |
2817b000b
|
305 |
struct ceph_mds_reply_info_parsed *rinfo; |
2817b000b
|
306 |
|
8974eebd3
|
307 308 |
dout("readdir %p file %p pos %llx ", inode, file, ctx->pos); |
bb48bd4dc
|
309 |
if (dfi->file_info.flags & CEPH_F_ATEND) |
2817b000b
|
310 311 312 |
return 0; /* always start with . and .. */ |
77acfa29e
|
313 |
if (ctx->pos == 0) { |
2817b000b
|
314 315 |
dout("readdir off 0 -> '.' "); |
ebce3eb2f
|
316 |
if (!dir_emit(ctx, ".", 1, ceph_present_inode(inode), |
77acfa29e
|
317 |
inode->i_mode >> 12)) |
2817b000b
|
318 |
return 0; |
77acfa29e
|
319 |
ctx->pos = 1; |
2817b000b
|
320 |
} |
77acfa29e
|
321 |
if (ctx->pos == 1) { |
ebce3eb2f
|
322 323 324 325 326 327 |
u64 ino; struct dentry *dentry = file->f_path.dentry; spin_lock(&dentry->d_lock); ino = ceph_present_inode(dentry->d_parent->d_inode); spin_unlock(&dentry->d_lock); |
2817b000b
|
328 329 |
dout("readdir off 1 -> '..' "); |
ebce3eb2f
|
330 |
if (!dir_emit(ctx, "..", 2, ino, inode->i_mode >> 12)) |
2817b000b
|
331 |
return 0; |
77acfa29e
|
332 |
ctx->pos = 2; |
2817b000b
|
333 |
} |
be655596b
|
334 |
spin_lock(&ci->i_ceph_lock); |
719a2514e
|
335 336 337 338 |
/* request Fx cap. if have Fx, we don't need to release Fs cap * for later create/unlink. */ __ceph_touch_fmode(ci, mdsc, CEPH_FILE_MODE_WR); /* can we use the dcache? */ |
fdd4e1583
|
339 |
if (ceph_test_mount_opt(fsc, DCACHE) && |
3d14c5d2b
|
340 |
!ceph_test_mount_opt(fsc, NOASYNCREADDIR) && |
a0dff78da
|
341 |
ceph_snap(inode) != CEPH_SNAPDIR && |
70db4f362
|
342 |
__ceph_dir_is_complete_ordered(ci) && |
1af16d547
|
343 |
__ceph_caps_issued_mask_metric(ci, CEPH_CAP_FILE_SHARED, 1)) { |
97aeb6bf9
|
344 |
int shared_gen = atomic_read(&ci->i_shared_gen); |
1af16d547
|
345 |
|
be655596b
|
346 |
spin_unlock(&ci->i_ceph_lock); |
a30be7cb2
|
347 |
err = __dcache_readdir(file, ctx, shared_gen); |
efa4c1206
|
348 |
if (err != -EAGAIN) |
2817b000b
|
349 |
return err; |
efa4c1206
|
350 |
} else { |
be655596b
|
351 |
spin_unlock(&ci->i_ceph_lock); |
2817b000b
|
352 |
} |
2817b000b
|
353 354 |
/* proceed with a normal readdir */ |
2817b000b
|
355 356 |
more: /* do we have the correct frag content buffered? */ |
bb48bd4dc
|
357 |
if (need_send_readdir(dfi, ctx->pos)) { |
2817b000b
|
358 359 360 361 362 |
struct ceph_mds_request *req; int op = ceph_snap(inode) == CEPH_SNAPDIR ? CEPH_MDS_OP_LSSNAP : CEPH_MDS_OP_READDIR; /* discard old result, if any */ |
bb48bd4dc
|
363 364 365 |
if (dfi->last_readdir) { ceph_mdsc_put_request(dfi->last_readdir); dfi->last_readdir = NULL; |
393f66209
|
366 |
} |
2817b000b
|
367 |
|
f3c4ebe65
|
368 |
if (is_hash_order(ctx->pos)) { |
b50c2de51
|
369 370 371 372 373 |
/* fragtree isn't always accurate. choose frag * based on previous reply when possible. */ if (frag == (unsigned)-1) frag = ceph_choose_frag(ci, fpos_hash(ctx->pos), NULL, NULL); |
f3c4ebe65
|
374 375 376 |
} else { frag = fpos_frag(ctx->pos); } |
2817b000b
|
377 378 |
dout("readdir fetching %llx.%llx frag %x offset '%s' ", |
bb48bd4dc
|
379 |
ceph_vinop(inode), frag, dfi->last_name); |
2817b000b
|
380 381 382 |
req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS); if (IS_ERR(req)) return PTR_ERR(req); |
54008399d
|
383 384 385 386 387 |
err = ceph_alloc_readdir_reply_buffer(req, inode); if (err) { ceph_mdsc_put_request(req); return err; } |
2817b000b
|
388 389 |
/* hints to request -> mds selection code */ req->r_direct_mode = USE_AUTH_MDS; |
5d37ca148
|
390 391 392 |
if (op == CEPH_MDS_OP_READDIR) { req->r_direct_hash = ceph_frag_value(frag); __set_bit(CEPH_MDS_R_DIRECT_IS_HASH, &req->r_req_flags); |
87c91a965
|
393 |
req->r_inode_drop = CEPH_CAP_FILE_EXCL; |
5d37ca148
|
394 |
} |
bb48bd4dc
|
395 396 |
if (dfi->last_name) { req->r_path2 = kstrdup(dfi->last_name, GFP_KERNEL); |
a149bb9a2
|
397 398 399 400 |
if (!req->r_path2) { ceph_mdsc_put_request(req); return -ENOMEM; } |
79162547b
|
401 402 403 |
} else if (is_hash_order(ctx->pos)) { req->r_args.readdir.offset_hash = cpu_to_le32(fpos_hash(ctx->pos)); |
a149bb9a2
|
404 |
} |
79162547b
|
405 |
|
bb48bd4dc
|
406 407 408 409 |
req->r_dir_release_cnt = dfi->dir_release_count; req->r_dir_ordered_cnt = dfi->dir_ordered_count; req->r_readdir_cache_idx = dfi->readdir_cache_idx; req->r_readdir_offset = dfi->next_offset; |
2817b000b
|
410 |
req->r_args.readdir.frag = cpu_to_le32(frag); |
956d39d63
|
411 412 |
req->r_args.readdir.flags = cpu_to_le16(CEPH_READDIR_REPLY_BITFLAGS); |
a149bb9a2
|
413 414 415 416 |
req->r_inode = inode; ihold(inode); req->r_dentry = dget(file->f_path.dentry); |
2817b000b
|
417 418 419 420 421 |
err = ceph_mdsc_do_request(mdsc, NULL, req); if (err < 0) { ceph_mdsc_put_request(req); return err; } |
f3c4ebe65
|
422 423 424 425 |
dout("readdir got and parsed readdir result=%d on " "frag %x, end=%d, complete=%d, hash_order=%d ", err, frag, |
2817b000b
|
426 |
(int)req->r_reply_info.dir_end, |
f3c4ebe65
|
427 428 |
(int)req->r_reply_info.dir_complete, (int)req->r_reply_info.hash_order); |
2817b000b
|
429 |
|
81c6aea52
|
430 431 432 |
rinfo = &req->r_reply_info; if (le32_to_cpu(rinfo->dir_dir->frag) != frag) { frag = le32_to_cpu(rinfo->dir_dir->frag); |
f3c4ebe65
|
433 |
if (!rinfo->hash_order) { |
bb48bd4dc
|
434 |
dfi->next_offset = req->r_readdir_offset; |
f3c4ebe65
|
435 436 |
/* adjust ctx->pos to beginning of frag */ ctx->pos = ceph_make_fpos(frag, |
bb48bd4dc
|
437 |
dfi->next_offset, |
f3c4ebe65
|
438 439 |
false); } |
81c6aea52
|
440 |
} |
fdd4e1583
|
441 |
|
bb48bd4dc
|
442 443 |
dfi->frag = frag; dfi->last_readdir = req; |
2817b000b
|
444 |
|
bc2de10dc
|
445 |
if (test_bit(CEPH_MDS_R_DID_PREPOPULATE, &req->r_req_flags)) { |
bb48bd4dc
|
446 447 |
dfi->readdir_cache_idx = req->r_readdir_cache_idx; if (dfi->readdir_cache_idx < 0) { |
fdd4e1583
|
448 |
/* preclude from marking dir ordered */ |
bb48bd4dc
|
449 |
dfi->dir_ordered_count = 0; |
8974eebd3
|
450 |
} else if (ceph_frag_is_leftmost(frag) && |
bb48bd4dc
|
451 |
dfi->next_offset == 2) { |
fdd4e1583
|
452 453 |
/* note dir version at start of readdir so * we can tell if any dentries get dropped */ |
bb48bd4dc
|
454 455 |
dfi->dir_release_count = req->r_dir_release_cnt; dfi->dir_ordered_count = req->r_dir_ordered_cnt; |
fdd4e1583
|
456 457 |
} } else { |
4c069a582
|
458 459 |
dout("readdir !did_prepopulate "); |
fdd4e1583
|
460 |
/* disable readdir cache */ |
bb48bd4dc
|
461 |
dfi->readdir_cache_idx = -1; |
fdd4e1583
|
462 |
/* preclude from marking dir complete */ |
bb48bd4dc
|
463 |
dfi->dir_release_count = 0; |
fdd4e1583
|
464 |
} |
f3c4ebe65
|
465 466 |
/* note next offset and last dentry name */ if (rinfo->dir_nr > 0) { |
2a5beea3f
|
467 468 |
struct ceph_mds_reply_dir_entry *rde = rinfo->dir_entries + (rinfo->dir_nr-1); |
f3c4ebe65
|
469 470 |
unsigned next_offset = req->r_reply_info.dir_end ? 2 : (fpos_off(rde->offset) + 1); |
bb48bd4dc
|
471 |
err = note_last_dentry(dfi, rde->name, rde->name_len, |
f3c4ebe65
|
472 |
next_offset); |
2817b000b
|
473 474 |
if (err) return err; |
f3c4ebe65
|
475 |
} else if (req->r_reply_info.dir_end) { |
bb48bd4dc
|
476 |
dfi->next_offset = 2; |
f3c4ebe65
|
477 |
/* keep last name */ |
2817b000b
|
478 479 |
} } |
bb48bd4dc
|
480 |
rinfo = &dfi->last_readdir->r_reply_info; |
8974eebd3
|
481 482 |
dout("readdir frag %x num %d pos %llx chunk first %llx ", |
bb48bd4dc
|
483 |
dfi->frag, rinfo->dir_nr, ctx->pos, |
8974eebd3
|
484 |
rinfo->dir_nr ? rinfo->dir_entries[0].offset : 0LL); |
77acfa29e
|
485 |
|
8974eebd3
|
486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 |
i = 0; /* search start position */ if (rinfo->dir_nr > 0) { int step, nr = rinfo->dir_nr; while (nr > 0) { step = nr >> 1; if (rinfo->dir_entries[i + step].offset < ctx->pos) { i += step + 1; nr -= step + 1; } else { nr = step; } } } for (; i < rinfo->dir_nr; i++) { struct ceph_mds_reply_dir_entry *rde = rinfo->dir_entries + i; |
3105c19c4
|
502 |
|
8974eebd3
|
503 504 505 506 507 508 |
BUG_ON(rde->offset < ctx->pos); ctx->pos = rde->offset; dout("readdir (%d/%d) -> %llx '%.*s' %p ", i, rinfo->dir_nr, ctx->pos, |
2a5beea3f
|
509 |
rde->name_len, rde->name, &rde->inode.in); |
8974eebd3
|
510 |
|
2a5beea3f
|
511 |
BUG_ON(!rde->inode.in); |
8974eebd3
|
512 |
|
2a5beea3f
|
513 |
if (!dir_emit(ctx, rde->name, rde->name_len, |
ebce3eb2f
|
514 515 |
ceph_present_ino(inode->i_sb, le64_to_cpu(rde->inode.in->ino)), le32_to_cpu(rde->inode.in->mode) >> 12)) { |
2817b000b
|
516 517 518 519 |
dout("filldir stopping us... "); return 0; } |
77acfa29e
|
520 |
ctx->pos++; |
2817b000b
|
521 |
} |
bb48bd4dc
|
522 523 |
ceph_mdsc_put_request(dfi->last_readdir); dfi->last_readdir = NULL; |
b50c2de51
|
524 |
|
bb48bd4dc
|
525 526 |
if (dfi->next_offset > 2) { frag = dfi->frag; |
2817b000b
|
527 528 529 530 |
goto more; } /* more frags? */ |
bb48bd4dc
|
531 532 |
if (!ceph_frag_is_rightmost(dfi->frag)) { frag = ceph_frag_next(dfi->frag); |
f3c4ebe65
|
533 534 |
if (is_hash_order(ctx->pos)) { loff_t new_pos = ceph_make_fpos(ceph_frag_value(frag), |
bb48bd4dc
|
535 |
dfi->next_offset, true); |
f3c4ebe65
|
536 537 538 539 |
if (new_pos > ctx->pos) ctx->pos = new_pos; /* keep last_name */ } else { |
bb48bd4dc
|
540 541 542 543 |
ctx->pos = ceph_make_fpos(frag, dfi->next_offset, false); kfree(dfi->last_name); dfi->last_name = NULL; |
f3c4ebe65
|
544 |
} |
2817b000b
|
545 546 547 548 |
dout("readdir next frag is %x ", frag); goto more; } |
bb48bd4dc
|
549 |
dfi->file_info.flags |= CEPH_F_ATEND; |
2817b000b
|
550 551 552 553 554 555 |
/* * if dir_release_count still matches the dir, no dentries * were released during the whole readdir, and we should have * the complete dir contents in our cache. */ |
bb48bd4dc
|
556 557 |
if (atomic64_read(&ci->i_release_count) == dfi->dir_release_count) { |
fdd4e1583
|
558 |
spin_lock(&ci->i_ceph_lock); |
bb48bd4dc
|
559 560 |
if (dfi->dir_ordered_count == atomic64_read(&ci->i_ordered_count)) { |
70db4f362
|
561 562 |
dout(" marking %p complete and ordered ", inode); |
fdd4e1583
|
563 564 |
/* use i_size to track number of entries in * readdir cache */ |
bb48bd4dc
|
565 566 |
BUG_ON(dfi->readdir_cache_idx < 0); i_size_write(inode, dfi->readdir_cache_idx * |
fdd4e1583
|
567 568 |
sizeof(struct dentry*)); } else { |
70db4f362
|
569 570 |
dout(" marking %p complete ", inode); |
fdd4e1583
|
571 |
} |
bb48bd4dc
|
572 573 |
__ceph_dir_set_complete(ci, dfi->dir_release_count, dfi->dir_ordered_count); |
fdd4e1583
|
574 |
spin_unlock(&ci->i_ceph_lock); |
2817b000b
|
575 |
} |
2817b000b
|
576 |
|
77acfa29e
|
577 578 |
dout("readdir %p file %p done. ", inode, file); |
2817b000b
|
579 580 |
return 0; } |
bb48bd4dc
|
581 |
static void reset_readdir(struct ceph_dir_file_info *dfi) |
2817b000b
|
582 |
{ |
bb48bd4dc
|
583 584 585 |
if (dfi->last_readdir) { ceph_mdsc_put_request(dfi->last_readdir); dfi->last_readdir = NULL; |
2817b000b
|
586 |
} |
bb48bd4dc
|
587 588 589 590 591 592 |
kfree(dfi->last_name); dfi->last_name = NULL; dfi->dir_release_count = 0; dfi->readdir_cache_idx = -1; dfi->next_offset = 2; /* compensate for . and .. */ dfi->file_info.flags &= ~CEPH_F_ATEND; |
2817b000b
|
593 |
} |
8974eebd3
|
594 595 596 597 |
/* * discard buffered readdir content on seekdir(0), or seek to new frag, * or seek prior to current chunk */ |
bb48bd4dc
|
598 |
static bool need_reset_readdir(struct ceph_dir_file_info *dfi, loff_t new_pos) |
8974eebd3
|
599 600 |
{ struct ceph_mds_reply_info_parsed *rinfo; |
f3c4ebe65
|
601 |
loff_t chunk_offset; |
8974eebd3
|
602 603 |
if (new_pos == 0) return true; |
f3c4ebe65
|
604 605 606 |
if (is_hash_order(new_pos)) { /* no need to reset last_name for a forward seek when * dentries are sotred in hash order */ |
bb48bd4dc
|
607 |
} else if (dfi->frag != fpos_frag(new_pos)) { |
8974eebd3
|
608 |
return true; |
f3c4ebe65
|
609 |
} |
bb48bd4dc
|
610 |
rinfo = dfi->last_readdir ? &dfi->last_readdir->r_reply_info : NULL; |
8974eebd3
|
611 612 |
if (!rinfo || !rinfo->dir_nr) return true; |
f3c4ebe65
|
613 614 615 |
chunk_offset = rinfo->dir_entries[0].offset; return new_pos < chunk_offset || is_hash_order(new_pos) != is_hash_order(chunk_offset); |
8974eebd3
|
616 |
} |
965c8e59c
|
617 |
static loff_t ceph_dir_llseek(struct file *file, loff_t offset, int whence) |
2817b000b
|
618 |
{ |
bb48bd4dc
|
619 |
struct ceph_dir_file_info *dfi = file->private_data; |
2817b000b
|
620 |
struct inode *inode = file->f_mapping->host; |
2817b000b
|
621 |
loff_t retval; |
5955102c9
|
622 |
inode_lock(inode); |
06222e491
|
623 |
retval = -EINVAL; |
965c8e59c
|
624 |
switch (whence) { |
2817b000b
|
625 626 |
case SEEK_CUR: offset += file->f_pos; |
06222e491
|
627 628 |
case SEEK_SET: break; |
fdd4e1583
|
629 630 |
case SEEK_END: retval = -EOPNOTSUPP; |
06222e491
|
631 632 |
default: goto out; |
2817b000b
|
633 |
} |
06222e491
|
634 |
|
f04942060
|
635 |
if (offset >= 0) { |
bb48bd4dc
|
636 |
if (need_reset_readdir(dfi, offset)) { |
f3c4ebe65
|
637 638 |
dout("dir_llseek dropping %p content ", file); |
bb48bd4dc
|
639 |
reset_readdir(dfi); |
f3c4ebe65
|
640 641 642 |
} else if (is_hash_order(offset) && offset > file->f_pos) { /* for hash offset, we don't know if a forward seek * is within same frag */ |
bb48bd4dc
|
643 644 |
dfi->dir_release_count = 0; dfi->readdir_cache_idx = -1; |
f3c4ebe65
|
645 |
} |
2817b000b
|
646 647 648 |
if (offset != file->f_pos) { file->f_pos = offset; file->f_version = 0; |
bb48bd4dc
|
649 |
dfi->file_info.flags &= ~CEPH_F_ATEND; |
2817b000b
|
650 651 |
} retval = offset; |
2817b000b
|
652 |
} |
06222e491
|
653 |
out: |
5955102c9
|
654 |
inode_unlock(inode); |
2817b000b
|
655 656 657 658 |
return retval; } /* |
468640e32
|
659 |
* Handle lookups for the hidden .snap directory. |
2817b000b
|
660 |
*/ |
468640e32
|
661 662 |
int ceph_handle_snapdir(struct ceph_mds_request *req, struct dentry *dentry, int err) |
2817b000b
|
663 |
{ |
3d14c5d2b
|
664 |
struct ceph_fs_client *fsc = ceph_sb_to_client(dentry->d_sb); |
2b0143b5c
|
665 |
struct inode *parent = d_inode(dentry->d_parent); /* we hold i_mutex */ |
2817b000b
|
666 667 668 |
/* .snap dir? */ if (err == -ENOENT && |
455cec0ab
|
669 |
ceph_snap(parent) == CEPH_NOSNAP && |
6b8051855
|
670 |
strcmp(dentry->d_name.name, |
3d14c5d2b
|
671 |
fsc->mount_options->snapdir_name) == 0) { |
2817b000b
|
672 |
struct inode *inode = ceph_get_snapdir(parent); |
a455589f1
|
673 674 675 |
dout("ENOENT on snapdir %p '%pd', linking to snapdir %p ", dentry, dentry, inode); |
9358c6d4c
|
676 |
BUG_ON(!d_unhashed(dentry)); |
2817b000b
|
677 678 679 |
d_add(dentry, inode); err = 0; } |
468640e32
|
680 681 |
return err; } |
2817b000b
|
682 |
|
468640e32
|
683 684 685 686 687 688 689 690 691 692 693 694 695 696 |
/* * Figure out final result of a lookup/open request. * * Mainly, make sure we return the final req->r_dentry (if it already * existed) in place of the original VFS-provided dentry when they * differ. * * Gracefully handle the case where the MDS replies with -ENOENT and * no trace (which it may do, at its discretion, e.g., if it doesn't * care to issue a lease on the negative dentry). */ struct dentry *ceph_finish_lookup(struct ceph_mds_request *req, struct dentry *dentry, int err) { |
2817b000b
|
697 698 699 700 701 702 |
if (err == -ENOENT) { /* no trace? */ err = 0; if (!req->r_reply_info.head->is_dentry) { dout("ENOENT and no trace, dentry %p inode %p ", |
2b0143b5c
|
703 704 |
dentry, d_inode(dentry)); if (d_really_is_positive(dentry)) { |
2817b000b
|
705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 |
d_drop(dentry); err = -ENOENT; } else { d_add(dentry, NULL); } } } if (err) dentry = ERR_PTR(err); else if (dentry != req->r_dentry) dentry = dget(req->r_dentry); /* we got spliced */ else dentry = NULL; return dentry; } |
3b33f692c
|
720 |
static bool is_root_ceph_dentry(struct inode *inode, struct dentry *dentry) |
1d1de9160
|
721 722 723 724 |
{ return ceph_ino(inode) == CEPH_INO_ROOT && strncmp(dentry->d_name.name, ".ceph", 5) == 0; } |
2817b000b
|
725 726 727 728 729 |
/* * Look up a single dir entry. If there is a lookup intent, inform * the MDS so that it gets our 'caps wanted' value in a single op. */ static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry, |
00cd8dd3b
|
730 |
unsigned int flags) |
2817b000b
|
731 |
{ |
3d14c5d2b
|
732 |
struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb); |
2678da88f
|
733 |
struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(dir->i_sb); |
2817b000b
|
734 735 |
struct ceph_mds_request *req; int op; |
315f24088
|
736 |
int mask; |
2817b000b
|
737 |
int err; |
a455589f1
|
738 739 740 |
dout("lookup %p dentry %p '%pd' ", dir, dentry, dentry); |
2817b000b
|
741 742 743 |
if (dentry->d_name.len > NAME_MAX) return ERR_PTR(-ENAMETOOLONG); |
2817b000b
|
744 |
/* can we conclude ENOENT locally? */ |
2b0143b5c
|
745 |
if (d_really_is_negative(dentry)) { |
2817b000b
|
746 747 |
struct ceph_inode_info *ci = ceph_inode(dir); struct ceph_dentry_info *di = ceph_dentry(dentry); |
be655596b
|
748 |
spin_lock(&ci->i_ceph_lock); |
891f3f5a6
|
749 750 |
dout(" dir %p flags are 0x%lx ", dir, ci->i_ceph_flags); |
2817b000b
|
751 |
if (strncmp(dentry->d_name.name, |
3d14c5d2b
|
752 |
fsc->mount_options->snapdir_name, |
2817b000b
|
753 |
dentry->d_name.len) && |
1d1de9160
|
754 |
!is_root_ceph_dentry(dir, dentry) && |
e2c3de046
|
755 |
ceph_test_mount_opt(fsc, DCACHE) && |
2f276c511
|
756 |
__ceph_dir_is_complete(ci) && |
1af16d547
|
757 |
__ceph_caps_issued_mask_metric(ci, CEPH_CAP_FILE_SHARED, 1)) { |
719a2514e
|
758 |
__ceph_touch_fmode(ci, mdsc, CEPH_FILE_MODE_RD); |
be655596b
|
759 |
spin_unlock(&ci->i_ceph_lock); |
2817b000b
|
760 761 762 |
dout(" dir %p complete, -ENOENT ", dir); d_add(dentry, NULL); |
97aeb6bf9
|
763 |
di->lease_shared_gen = atomic_read(&ci->i_shared_gen); |
2817b000b
|
764 765 |
return NULL; } |
be655596b
|
766 |
spin_unlock(&ci->i_ceph_lock); |
2817b000b
|
767 768 769 770 771 772 |
} op = ceph_snap(dir) == CEPH_SNAPDIR ? CEPH_MDS_OP_LOOKUPSNAP : CEPH_MDS_OP_LOOKUP; req = ceph_mdsc_create_request(mdsc, op, USE_ANY_MDS); if (IS_ERR(req)) |
7e34bc524
|
773 |
return ERR_CAST(req); |
2817b000b
|
774 775 |
req->r_dentry = dget(dentry); req->r_num_caps = 2; |
315f24088
|
776 777 778 779 780 |
mask = CEPH_STAT_CAP_INODE | CEPH_CAP_AUTH_SHARED; if (ceph_security_xattr_wanted(dir)) mask |= CEPH_CAP_XATTR_SHARED; req->r_args.getattr.mask = cpu_to_le32(mask); |
3dd69aabc
|
781 782 |
req->r_parent = dir; set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags); |
2817b000b
|
783 |
err = ceph_mdsc_do_request(mdsc, NULL, req); |
468640e32
|
784 |
err = ceph_handle_snapdir(req, dentry, err); |
2817b000b
|
785 786 787 788 789 790 791 792 793 794 795 796 797 |
dentry = ceph_finish_lookup(req, dentry, err); ceph_mdsc_put_request(req); /* will dput(dentry) */ dout("lookup result=%p ", dentry); return dentry; } /* * If we do a create but get no trace back from the MDS, follow up with * a lookup (the VFS expects us to link up the provided dentry). */ int ceph_handle_notrace_create(struct inode *dir, struct dentry *dentry) { |
00cd8dd3b
|
798 |
struct dentry *result = ceph_lookup(dir, dentry, 0); |
2817b000b
|
799 800 801 802 803 |
if (result && !IS_ERR(result)) { /* * We created the item, then did a lookup, and found * it was already linked to another inode we already |
4d41cef27
|
804 805 806 807 808 809 810 811 |
* had in our cache (and thus got spliced). To not * confuse VFS (especially when inode is a directory), * we don't link our dentry to that inode, return an * error instead. * * This event should be rare and it happens only when * we talk to old MDS. Recent MDS does not send traceless * reply for request that creates new inode. |
2817b000b
|
812 |
*/ |
5cba372c0
|
813 |
d_drop(result); |
4d41cef27
|
814 |
return -ESTALE; |
2817b000b
|
815 816 817 818 819 |
} return PTR_ERR(result); } static int ceph_mknod(struct inode *dir, struct dentry *dentry, |
1a67aafb5
|
820 |
umode_t mode, dev_t rdev) |
2817b000b
|
821 |
{ |
2678da88f
|
822 |
struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(dir->i_sb); |
2817b000b
|
823 |
struct ceph_mds_request *req; |
5c31e92df
|
824 |
struct ceph_acl_sec_ctx as_ctx = {}; |
2817b000b
|
825 826 827 828 |
int err; if (ceph_snap(dir) != CEPH_NOSNAP) return -EROFS; |
0459871c4
|
829 830 831 832 |
if (ceph_quota_is_max_files_exceeded(dir)) { err = -EDQUOT; goto out; } |
b7a292176
|
833 |
|
5c31e92df
|
834 |
err = ceph_pre_init_acls(dir, &mode, &as_ctx); |
b1ee94aa5
|
835 |
if (err < 0) |
0459871c4
|
836 |
goto out; |
ac6713ccb
|
837 838 839 |
err = ceph_security_init_secctx(dentry, mode, &as_ctx); if (err < 0) goto out; |
b1ee94aa5
|
840 |
|
1a67aafb5
|
841 842 |
dout("mknod in dir %p dentry %p mode 0%ho rdev %d ", |
2817b000b
|
843 844 845 |
dir, dentry, mode, rdev); req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_MKNOD, USE_AUTH_MDS); if (IS_ERR(req)) { |
b1ee94aa5
|
846 847 |
err = PTR_ERR(req); goto out; |
2817b000b
|
848 849 850 |
} req->r_dentry = dget(dentry); req->r_num_caps = 2; |
3dd69aabc
|
851 852 |
req->r_parent = dir; set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags); |
2817b000b
|
853 854 |
req->r_args.mknod.mode = cpu_to_le32(mode); req->r_args.mknod.rdev = cpu_to_le32(rdev); |
222b7f90b
|
855 |
req->r_dentry_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_AUTH_EXCL; |
2817b000b
|
856 |
req->r_dentry_unless = CEPH_CAP_FILE_EXCL; |
5c31e92df
|
857 858 859 |
if (as_ctx.pagelist) { req->r_pagelist = as_ctx.pagelist; as_ctx.pagelist = NULL; |
b1ee94aa5
|
860 |
} |
2817b000b
|
861 862 863 864 |
err = ceph_mdsc_do_request(mdsc, dir, req); if (!err && !req->r_reply_info.head->is_dentry) err = ceph_handle_notrace_create(dir, dentry); ceph_mdsc_put_request(req); |
b1ee94aa5
|
865 |
out: |
7221fe4c2
|
866 |
if (!err) |
5c31e92df
|
867 |
ceph_init_inode_acls(d_inode(dentry), &as_ctx); |
b20a95a0d
|
868 |
else |
2817b000b
|
869 |
d_drop(dentry); |
5c31e92df
|
870 |
ceph_release_acl_sec_ctx(&as_ctx); |
2817b000b
|
871 872 |
return err; } |
4acdaf27e
|
873 |
static int ceph_create(struct inode *dir, struct dentry *dentry, umode_t mode, |
ebfc3b49a
|
874 |
bool excl) |
2817b000b
|
875 |
{ |
2d83bde9a
|
876 |
return ceph_mknod(dir, dentry, mode, 0); |
2817b000b
|
877 878 879 880 881 |
} static int ceph_symlink(struct inode *dir, struct dentry *dentry, const char *dest) { |
2678da88f
|
882 |
struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(dir->i_sb); |
2817b000b
|
883 |
struct ceph_mds_request *req; |
ac6713ccb
|
884 |
struct ceph_acl_sec_ctx as_ctx = {}; |
2817b000b
|
885 886 887 888 |
int err; if (ceph_snap(dir) != CEPH_NOSNAP) return -EROFS; |
67fcd1514
|
889 890 891 892 |
if (ceph_quota_is_max_files_exceeded(dir)) { err = -EDQUOT; goto out; } |
b7a292176
|
893 |
|
ac6713ccb
|
894 895 896 |
err = ceph_security_init_secctx(dentry, S_IFLNK | 0777, &as_ctx); if (err < 0) goto out; |
2817b000b
|
897 898 899 900 |
dout("symlink in dir %p dentry %p to '%s' ", dir, dentry, dest); req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_SYMLINK, USE_AUTH_MDS); if (IS_ERR(req)) { |
b1ee94aa5
|
901 902 |
err = PTR_ERR(req); goto out; |
2817b000b
|
903 |
} |
687265e5a
|
904 |
req->r_path2 = kstrdup(dest, GFP_KERNEL); |
a149bb9a2
|
905 906 907 908 909 |
if (!req->r_path2) { err = -ENOMEM; ceph_mdsc_put_request(req); goto out; } |
3dd69aabc
|
910 911 |
req->r_parent = dir; set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags); |
a149bb9a2
|
912 913 |
req->r_dentry = dget(dentry); req->r_num_caps = 2; |
222b7f90b
|
914 |
req->r_dentry_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_AUTH_EXCL; |
2817b000b
|
915 |
req->r_dentry_unless = CEPH_CAP_FILE_EXCL; |
b748fc7a8
|
916 917 918 919 |
if (as_ctx.pagelist) { req->r_pagelist = as_ctx.pagelist; as_ctx.pagelist = NULL; } |
2817b000b
|
920 921 922 923 |
err = ceph_mdsc_do_request(mdsc, dir, req); if (!err && !req->r_reply_info.head->is_dentry) err = ceph_handle_notrace_create(dir, dentry); ceph_mdsc_put_request(req); |
b1ee94aa5
|
924 925 |
out: if (err) |
2817b000b
|
926 |
d_drop(dentry); |
ac6713ccb
|
927 |
ceph_release_acl_sec_ctx(&as_ctx); |
2817b000b
|
928 929 |
return err; } |
18bb1db3e
|
930 |
static int ceph_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) |
2817b000b
|
931 |
{ |
2678da88f
|
932 |
struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(dir->i_sb); |
2817b000b
|
933 |
struct ceph_mds_request *req; |
5c31e92df
|
934 |
struct ceph_acl_sec_ctx as_ctx = {}; |
2817b000b
|
935 936 937 938 939 940 |
int err = -EROFS; int op; if (ceph_snap(dir) == CEPH_SNAPDIR) { /* mkdir .snap/foo is a MKSNAP */ op = CEPH_MDS_OP_MKSNAP; |
a455589f1
|
941 942 943 |
dout("mksnap dir %p snap '%pd' dn %p ", dir, dentry, dentry); |
2817b000b
|
944 |
} else if (ceph_snap(dir) == CEPH_NOSNAP) { |
18bb1db3e
|
945 946 |
dout("mkdir dir %p dn %p mode 0%ho ", dir, dentry, mode); |
2817b000b
|
947 948 949 950 |
op = CEPH_MDS_OP_MKDIR; } else { goto out; } |
b1ee94aa5
|
951 |
|
259636690
|
952 953 |
if (op == CEPH_MDS_OP_MKDIR && ceph_quota_is_max_files_exceeded(dir)) { |
b7a292176
|
954 955 956 |
err = -EDQUOT; goto out; } |
b1ee94aa5
|
957 |
mode |= S_IFDIR; |
5c31e92df
|
958 |
err = ceph_pre_init_acls(dir, &mode, &as_ctx); |
b1ee94aa5
|
959 960 |
if (err < 0) goto out; |
ac6713ccb
|
961 962 963 |
err = ceph_security_init_secctx(dentry, mode, &as_ctx); if (err < 0) goto out; |
b1ee94aa5
|
964 |
|
2817b000b
|
965 966 967 968 969 970 971 972 |
req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS); if (IS_ERR(req)) { err = PTR_ERR(req); goto out; } req->r_dentry = dget(dentry); req->r_num_caps = 2; |
3dd69aabc
|
973 974 |
req->r_parent = dir; set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags); |
2817b000b
|
975 |
req->r_args.mkdir.mode = cpu_to_le32(mode); |
222b7f90b
|
976 |
req->r_dentry_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_AUTH_EXCL; |
2817b000b
|
977 |
req->r_dentry_unless = CEPH_CAP_FILE_EXCL; |
5c31e92df
|
978 979 980 |
if (as_ctx.pagelist) { req->r_pagelist = as_ctx.pagelist; as_ctx.pagelist = NULL; |
b1ee94aa5
|
981 |
} |
2817b000b
|
982 |
err = ceph_mdsc_do_request(mdsc, dir, req); |
275dd19ea
|
983 984 985 |
if (!err && !req->r_reply_info.head->is_target && !req->r_reply_info.head->is_dentry) |
2817b000b
|
986 987 988 |
err = ceph_handle_notrace_create(dir, dentry); ceph_mdsc_put_request(req); out: |
b20a95a0d
|
989 |
if (!err) |
5c31e92df
|
990 |
ceph_init_inode_acls(d_inode(dentry), &as_ctx); |
b20a95a0d
|
991 |
else |
2817b000b
|
992 |
d_drop(dentry); |
5c31e92df
|
993 |
ceph_release_acl_sec_ctx(&as_ctx); |
2817b000b
|
994 995 996 997 998 999 |
return err; } static int ceph_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry) { |
2678da88f
|
1000 |
struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(dir->i_sb); |
2817b000b
|
1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 |
struct ceph_mds_request *req; int err; if (ceph_snap(dir) != CEPH_NOSNAP) return -EROFS; dout("link in dir %p old_dentry %p dentry %p ", dir, old_dentry, dentry); req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_LINK, USE_AUTH_MDS); if (IS_ERR(req)) { d_drop(dentry); return PTR_ERR(req); } req->r_dentry = dget(dentry); req->r_num_caps = 2; |
4b58c9b19
|
1017 |
req->r_old_dentry = dget(old_dentry); |
3dd69aabc
|
1018 1019 |
req->r_parent = dir; set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags); |
2817b000b
|
1020 1021 |
req->r_dentry_drop = CEPH_CAP_FILE_SHARED; req->r_dentry_unless = CEPH_CAP_FILE_EXCL; |
ad88f23f4
|
1022 |
/* release LINK_SHARED on source inode (mds will lock it) */ |
d19a0b540
|
1023 |
req->r_old_inode_drop = CEPH_CAP_LINK_SHARED | CEPH_CAP_LINK_EXCL; |
2817b000b
|
1024 |
err = ceph_mdsc_do_request(mdsc, dir, req); |
70b666c3b
|
1025 |
if (err) { |
2817b000b
|
1026 |
d_drop(dentry); |
70b666c3b
|
1027 |
} else if (!req->r_reply_info.head->is_dentry) { |
2b0143b5c
|
1028 1029 |
ihold(d_inode(old_dentry)); d_instantiate(dentry, d_inode(old_dentry)); |
70b666c3b
|
1030 |
} |
2817b000b
|
1031 1032 1033 |
ceph_mdsc_put_request(req); return err; } |
2ccb45462
|
1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 |
static void ceph_async_unlink_cb(struct ceph_mds_client *mdsc, struct ceph_mds_request *req) { int result = req->r_err ? req->r_err : le32_to_cpu(req->r_reply_info.head->result); if (result == -EJUKEBOX) goto out; /* If op failed, mark everyone involved for errors */ if (result) { |
2a575f138
|
1045 1046 |
int pathlen = 0; u64 base = 0; |
2ccb45462
|
1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 |
char *path = ceph_mdsc_build_path(req->r_dentry, &pathlen, &base, 0); /* mark error on parent + clear complete */ mapping_set_error(req->r_parent->i_mapping, result); ceph_dir_clear_complete(req->r_parent); /* drop the dentry -- we don't know its status */ if (!d_unhashed(req->r_dentry)) d_drop(req->r_dentry); /* mark inode itself for an error (since metadata is bogus) */ mapping_set_error(req->r_old_inode->i_mapping, result); pr_warn("ceph: async unlink failure path=(%llx)%s result=%d! ", base, IS_ERR(path) ? "<<bad>>" : path, result); ceph_mdsc_free_path(path, pathlen); } out: iput(req->r_old_inode); ceph_mdsc_release_dir_caps(req); } static int get_caps_for_async_unlink(struct inode *dir, struct dentry *dentry) { struct ceph_inode_info *ci = ceph_inode(dir); struct ceph_dentry_info *di; int got = 0, want = CEPH_CAP_FILE_EXCL | CEPH_CAP_DIR_UNLINK; spin_lock(&ci->i_ceph_lock); if ((__ceph_caps_issued(ci, NULL) & want) == want) { ceph_take_cap_refs(ci, want, false); got = want; } spin_unlock(&ci->i_ceph_lock); /* If we didn't get anything, return 0 */ if (!got) return 0; spin_lock(&dentry->d_lock); di = ceph_dentry(dentry); /* * - We are holding Fx, which implies Fs caps. * - Only support async unlink for primary linkage */ if (atomic_read(&ci->i_shared_gen) != di->lease_shared_gen || !(di->flags & CEPH_DENTRY_PRIMARY_LINK)) want = 0; spin_unlock(&dentry->d_lock); /* Do we still want what we've got? */ if (want == got) return got; ceph_put_cap_refs(ci, got); return 0; } |
2817b000b
|
1106 |
/* |
2817b000b
|
1107 1108 1109 1110 |
* rmdir and unlink are differ only by the metadata op code */ static int ceph_unlink(struct inode *dir, struct dentry *dentry) { |
3d14c5d2b
|
1111 1112 |
struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb); struct ceph_mds_client *mdsc = fsc->mdsc; |
2b0143b5c
|
1113 |
struct inode *inode = d_inode(dentry); |
2817b000b
|
1114 |
struct ceph_mds_request *req; |
2ccb45462
|
1115 |
bool try_async = ceph_test_mount_opt(fsc, ASYNC_DIROPS); |
2817b000b
|
1116 1117 1118 1119 1120 |
int err = -EROFS; int op; if (ceph_snap(dir) == CEPH_SNAPDIR) { /* rmdir .snap/foo is RMSNAP */ |
a455589f1
|
1121 1122 |
dout("rmsnap dir %p '%pd' dn %p ", dir, dentry, dentry); |
2817b000b
|
1123 1124 1125 1126 1127 |
op = CEPH_MDS_OP_RMSNAP; } else if (ceph_snap(dir) == CEPH_NOSNAP) { dout("unlink/rmdir dir %p dn %p inode %p ", dir, dentry, inode); |
e36cb0b89
|
1128 |
op = d_is_dir(dentry) ? |
2817b000b
|
1129 1130 1131 |
CEPH_MDS_OP_RMDIR : CEPH_MDS_OP_UNLINK; } else goto out; |
2ccb45462
|
1132 |
retry: |
2817b000b
|
1133 1134 1135 1136 1137 1138 1139 |
req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS); if (IS_ERR(req)) { err = PTR_ERR(req); goto out; } req->r_dentry = dget(dentry); req->r_num_caps = 2; |
3dd69aabc
|
1140 |
req->r_parent = dir; |
2817b000b
|
1141 1142 |
req->r_dentry_drop = CEPH_CAP_FILE_SHARED; req->r_dentry_unless = CEPH_CAP_FILE_EXCL; |
6ef0bc6dd
|
1143 |
req->r_inode_drop = ceph_drop_caps_for_unlink(inode); |
2ccb45462
|
1144 1145 1146 |
if (try_async && op == CEPH_MDS_OP_UNLINK && (req->r_dir_caps = get_caps_for_async_unlink(dir, dentry))) { |
ebce3eb2f
|
1147 |
dout("async unlink on %llu/%.*s caps=%s", ceph_ino(dir), |
2ccb45462
|
1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 |
dentry->d_name.len, dentry->d_name.name, ceph_cap_string(req->r_dir_caps)); set_bit(CEPH_MDS_R_ASYNC, &req->r_req_flags); req->r_callback = ceph_async_unlink_cb; req->r_old_inode = d_inode(dentry); ihold(req->r_old_inode); err = ceph_mdsc_submit_request(mdsc, dir, req); if (!err) { /* * We have enough caps, so we assume that the unlink * will succeed. Fix up the target inode and dcache. */ drop_nlink(inode); d_delete(dentry); } else if (err == -EJUKEBOX) { try_async = false; ceph_mdsc_put_request(req); goto retry; } } else { set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags); err = ceph_mdsc_do_request(mdsc, dir, req); if (!err && !req->r_reply_info.head->is_dentry) d_delete(dentry); } |
2817b000b
|
1173 1174 1175 1176 1177 1178 |
ceph_mdsc_put_request(req); out: return err; } static int ceph_rename(struct inode *old_dir, struct dentry *old_dentry, |
1cd66c93b
|
1179 1180 |
struct inode *new_dir, struct dentry *new_dentry, unsigned int flags) |
2817b000b
|
1181 |
{ |
2678da88f
|
1182 |
struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(old_dir->i_sb); |
2817b000b
|
1183 |
struct ceph_mds_request *req; |
0ea611a3b
|
1184 |
int op = CEPH_MDS_OP_RENAME; |
2817b000b
|
1185 |
int err; |
1cd66c93b
|
1186 1187 |
if (flags) return -EINVAL; |
2817b000b
|
1188 1189 |
if (ceph_snap(old_dir) != ceph_snap(new_dir)) return -EXDEV; |
0ea611a3b
|
1190 1191 1192 1193 1194 |
if (ceph_snap(old_dir) != CEPH_NOSNAP) { if (old_dir == new_dir && ceph_snap(old_dir) == CEPH_SNAPDIR) op = CEPH_MDS_OP_RENAMESNAP; else return -EROFS; |
dffdcd714
|
1195 1196 1197 1198 1199 |
} else if (old_dir != new_dir) { err = ceph_quota_check_rename(mdsc, d_inode(old_dentry), new_dir); if (err) return err; |
0ea611a3b
|
1200 |
} |
cafe21a4f
|
1201 |
|
2817b000b
|
1202 1203 1204 |
dout("rename dir %p dentry %p to dir %p dentry %p ", old_dir, old_dentry, new_dir, new_dentry); |
0ea611a3b
|
1205 |
req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS); |
2817b000b
|
1206 1207 |
if (IS_ERR(req)) return PTR_ERR(req); |
180061a58
|
1208 |
ihold(old_dir); |
2817b000b
|
1209 1210 1211 |
req->r_dentry = dget(new_dentry); req->r_num_caps = 2; req->r_old_dentry = dget(old_dentry); |
180061a58
|
1212 |
req->r_old_dentry_dir = old_dir; |
3dd69aabc
|
1213 1214 |
req->r_parent = new_dir; set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags); |
2817b000b
|
1215 1216 1217 1218 1219 |
req->r_old_dentry_drop = CEPH_CAP_FILE_SHARED; req->r_old_dentry_unless = CEPH_CAP_FILE_EXCL; req->r_dentry_drop = CEPH_CAP_FILE_SHARED; req->r_dentry_unless = CEPH_CAP_FILE_EXCL; /* release LINK_RDCACHE on source inode (mds will lock it) */ |
d19a0b540
|
1220 |
req->r_old_inode_drop = CEPH_CAP_LINK_SHARED | CEPH_CAP_LINK_EXCL; |
6ef0bc6dd
|
1221 1222 1223 1224 |
if (d_really_is_positive(new_dentry)) { req->r_inode_drop = ceph_drop_caps_for_unlink(d_inode(new_dentry)); } |
2817b000b
|
1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 |
err = ceph_mdsc_do_request(mdsc, old_dir, req); if (!err && !req->r_reply_info.head->is_dentry) { /* * Normally d_move() is done by fill_trace (called by * do_request, above). If there is no trace, we need * to do it here. */ d_move(old_dentry, new_dentry); } ceph_mdsc_put_request(req); return err; } |
81a6cf2d3
|
1237 |
/* |
37c4efc1d
|
1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 |
* Move dentry to tail of mdsc->dentry_leases list when lease is updated. * Leases at front of the list will expire first. (Assume all leases have * similar duration) * * Called under dentry->d_lock. */ void __ceph_dentry_lease_touch(struct ceph_dentry_info *di) { struct dentry *dn = di->dentry; struct ceph_mds_client *mdsc; dout("dentry_lease_touch %p %p '%pd' ", di, dn, dn); di->flags |= CEPH_DENTRY_LEASE_LIST; if (di->flags & CEPH_DENTRY_SHRINK_LIST) { di->flags |= CEPH_DENTRY_REFERENCED; return; } mdsc = ceph_sb_to_client(dn->d_sb)->mdsc; spin_lock(&mdsc->dentry_list_lock); list_move_tail(&di->lease_list, &mdsc->dentry_leases); spin_unlock(&mdsc->dentry_list_lock); } static void __dentry_dir_lease_touch(struct ceph_mds_client* mdsc, struct ceph_dentry_info *di) { di->flags &= ~(CEPH_DENTRY_LEASE_LIST | CEPH_DENTRY_REFERENCED); di->lease_gen = 0; di->time = jiffies; list_move_tail(&di->lease_list, &mdsc->dentry_dir_leases); } /* * When dir lease is used, add dentry to tail of mdsc->dentry_dir_leases * list if it's not in the list, otherwise set 'referenced' flag. * * Called under dentry->d_lock. */ void __ceph_dentry_dir_lease_touch(struct ceph_dentry_info *di) { struct dentry *dn = di->dentry; struct ceph_mds_client *mdsc; |
0eb308531
|
1283 1284 |
dout("dentry_dir_lease_touch %p %p '%pd' (offset 0x%llx) ", |
37c4efc1d
|
1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 |
di, dn, dn, di->offset); if (!list_empty(&di->lease_list)) { if (di->flags & CEPH_DENTRY_LEASE_LIST) { /* don't remove dentry from dentry lease list * if its lease is valid */ if (__dentry_lease_is_valid(di)) return; } else { di->flags |= CEPH_DENTRY_REFERENCED; return; } } if (di->flags & CEPH_DENTRY_SHRINK_LIST) { di->flags |= CEPH_DENTRY_REFERENCED; di->flags &= ~CEPH_DENTRY_LEASE_LIST; return; } mdsc = ceph_sb_to_client(dn->d_sb)->mdsc; spin_lock(&mdsc->dentry_list_lock); __dentry_dir_lease_touch(mdsc, di), spin_unlock(&mdsc->dentry_list_lock); } static void __dentry_lease_unlist(struct ceph_dentry_info *di) { struct ceph_mds_client *mdsc; if (di->flags & CEPH_DENTRY_SHRINK_LIST) return; if (list_empty(&di->lease_list)) return; mdsc = ceph_sb_to_client(di->dentry->d_sb)->mdsc; spin_lock(&mdsc->dentry_list_lock); list_del_init(&di->lease_list); spin_unlock(&mdsc->dentry_list_lock); } enum { KEEP = 0, DELETE = 1, TOUCH = 2, STOP = 4, }; struct ceph_lease_walk_control { bool dir_lease; |
fe33032da
|
1334 |
bool expire_dir_lease; |
37c4efc1d
|
1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 |
unsigned long nr_to_scan; unsigned long dir_lease_ttl; }; static unsigned long __dentry_leases_walk(struct ceph_mds_client *mdsc, struct ceph_lease_walk_control *lwc, int (*check)(struct dentry*, void*)) { struct ceph_dentry_info *di, *tmp; struct dentry *dentry, *last = NULL; struct list_head* list; LIST_HEAD(dispose); unsigned long freed = 0; int ret = 0; list = lwc->dir_lease ? &mdsc->dentry_dir_leases : &mdsc->dentry_leases; spin_lock(&mdsc->dentry_list_lock); list_for_each_entry_safe(di, tmp, list, lease_list) { if (!lwc->nr_to_scan) break; --lwc->nr_to_scan; dentry = di->dentry; if (last == dentry) break; if (!spin_trylock(&dentry->d_lock)) continue; |
516162b92
|
1364 |
if (__lockref_is_dead(&dentry->d_lockref)) { |
37c4efc1d
|
1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 |
list_del_init(&di->lease_list); goto next; } ret = check(dentry, lwc); if (ret & TOUCH) { /* move it into tail of dir lease list */ __dentry_dir_lease_touch(mdsc, di); if (!last) last = dentry; } if (ret & DELETE) { /* stale lease */ di->flags &= ~CEPH_DENTRY_REFERENCED; if (dentry->d_lockref.count > 0) { /* update_dentry_lease() will re-add * it to lease list, or * ceph_d_delete() will return 1 when * last reference is dropped */ list_del_init(&di->lease_list); } else { di->flags |= CEPH_DENTRY_SHRINK_LIST; list_move_tail(&di->lease_list, &dispose); dget_dlock(dentry); } } next: spin_unlock(&dentry->d_lock); if (ret & STOP) break; } spin_unlock(&mdsc->dentry_list_lock); while (!list_empty(&dispose)) { di = list_first_entry(&dispose, struct ceph_dentry_info, lease_list); dentry = di->dentry; spin_lock(&dentry->d_lock); list_del_init(&di->lease_list); di->flags &= ~CEPH_DENTRY_SHRINK_LIST; if (di->flags & CEPH_DENTRY_REFERENCED) { spin_lock(&mdsc->dentry_list_lock); if (di->flags & CEPH_DENTRY_LEASE_LIST) { list_add_tail(&di->lease_list, &mdsc->dentry_leases); } else { __dentry_dir_lease_touch(mdsc, di); } spin_unlock(&mdsc->dentry_list_lock); } else { freed++; } spin_unlock(&dentry->d_lock); /* ceph_d_delete() does the trick */ dput(dentry); } return freed; } static int __dentry_lease_check(struct dentry *dentry, void *arg) { struct ceph_dentry_info *di = ceph_dentry(dentry); int ret; if (__dentry_lease_is_valid(di)) return STOP; ret = __dir_lease_try_check(dentry); if (ret == -EBUSY) return KEEP; if (ret > 0) return TOUCH; return DELETE; } static int __dir_lease_check(struct dentry *dentry, void *arg) { struct ceph_lease_walk_control *lwc = arg; struct ceph_dentry_info *di = ceph_dentry(dentry); int ret = __dir_lease_try_check(dentry); if (ret == -EBUSY) return KEEP; if (ret > 0) { if (time_before(jiffies, di->time + lwc->dir_lease_ttl)) return STOP; /* Move dentry to tail of dir lease list if we don't want * to delete it. So dentries in the list are checked in a * round robin manner */ |
fe33032da
|
1455 1456 1457 1458 1459 1460 1461 |
if (!lwc->expire_dir_lease) return TOUCH; if (dentry->d_lockref.count > 0 || (di->flags & CEPH_DENTRY_REFERENCED)) return TOUCH; /* invalidate dir lease */ di->lease_shared_gen = 0; |
37c4efc1d
|
1462 1463 1464 1465 1466 1467 1468 |
} return DELETE; } int ceph_trim_dentries(struct ceph_mds_client *mdsc) { struct ceph_lease_walk_control lwc; |
fe33032da
|
1469 |
unsigned long count; |
37c4efc1d
|
1470 |
unsigned long freed; |
fe33032da
|
1471 1472 1473 1474 1475 1476 1477 |
spin_lock(&mdsc->caps_list_lock); if (mdsc->caps_use_max > 0 && mdsc->caps_use_count > mdsc->caps_use_max) count = mdsc->caps_use_count - mdsc->caps_use_max; else count = 0; spin_unlock(&mdsc->caps_list_lock); |
37c4efc1d
|
1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 |
lwc.dir_lease = false; lwc.nr_to_scan = CEPH_CAPS_PER_RELEASE * 2; freed = __dentry_leases_walk(mdsc, &lwc, __dentry_lease_check); if (!lwc.nr_to_scan) /* more invalid leases */ return -EAGAIN; if (lwc.nr_to_scan < CEPH_CAPS_PER_RELEASE) lwc.nr_to_scan = CEPH_CAPS_PER_RELEASE; lwc.dir_lease = true; |
fe33032da
|
1488 1489 |
lwc.expire_dir_lease = freed < count; lwc.dir_lease_ttl = mdsc->fsc->mount_options->caps_wanted_delay_max * HZ; |
37c4efc1d
|
1490 1491 1492 1493 1494 1495 1496 1497 |
freed +=__dentry_leases_walk(mdsc, &lwc, __dir_lease_check); if (!lwc.nr_to_scan) /* more to check */ return -EAGAIN; return freed > 0 ? 1 : 0; } /* |
81a6cf2d3
|
1498 1499 1500 1501 |
* Ensure a dentry lease will no longer revalidate. */ void ceph_invalidate_dentry_lease(struct dentry *dentry) { |
37c4efc1d
|
1502 |
struct ceph_dentry_info *di = ceph_dentry(dentry); |
81a6cf2d3
|
1503 |
spin_lock(&dentry->d_lock); |
37c4efc1d
|
1504 1505 |
di->time = jiffies; di->lease_shared_gen = 0; |
f5e17aed3
|
1506 |
di->flags &= ~CEPH_DENTRY_PRIMARY_LINK; |
37c4efc1d
|
1507 |
__dentry_lease_unlist(di); |
81a6cf2d3
|
1508 1509 |
spin_unlock(&dentry->d_lock); } |
2817b000b
|
1510 1511 1512 1513 1514 |
/* * Check if dentry lease is valid. If not, delete the lease. Try to * renew if the least is more than half up. */ |
1e9c2eb68
|
1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 |
static bool __dentry_lease_is_valid(struct ceph_dentry_info *di) { struct ceph_mds_session *session; if (!di->lease_gen) return false; session = di->lease_session; if (session) { u32 gen; unsigned long ttl; spin_lock(&session->s_gen_ttl_lock); gen = session->s_cap_gen; ttl = session->s_cap_ttl; spin_unlock(&session->s_gen_ttl_lock); if (di->lease_gen == gen && time_before(jiffies, ttl) && time_before(jiffies, di->time)) return true; } di->lease_gen = 0; return false; } |
8f2a98ef3
|
1540 |
static int dentry_lease_is_valid(struct dentry *dentry, unsigned int flags) |
2817b000b
|
1541 1542 |
{ struct ceph_dentry_info *di; |
2817b000b
|
1543 |
struct ceph_mds_session *session = NULL; |
2817b000b
|
1544 |
u32 seq = 0; |
1e9c2eb68
|
1545 |
int valid = 0; |
2817b000b
|
1546 1547 1548 |
spin_lock(&dentry->d_lock); di = ceph_dentry(dentry); |
1e9c2eb68
|
1549 1550 |
if (di && __dentry_lease_is_valid(di)) { valid = 1; |
2817b000b
|
1551 |
|
1e9c2eb68
|
1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 |
if (di->lease_renew_after && time_after(jiffies, di->lease_renew_after)) { /* * We should renew. If we're in RCU walk mode * though, we can't do that so just return * -ECHILD. */ if (flags & LOOKUP_RCU) { valid = -ECHILD; } else { session = ceph_get_mds_session(di->lease_session); seq = di->lease_seq; di->lease_renew_after = 0; di->lease_renew_from = jiffies; |
2817b000b
|
1566 |
} |
2817b000b
|
1567 1568 1569 1570 1571 |
} } spin_unlock(&dentry->d_lock); if (session) { |
8f2a98ef3
|
1572 |
ceph_mdsc_lease_send_msg(session, dentry, |
2817b000b
|
1573 1574 1575 1576 1577 1578 1579 1580 1581 |
CEPH_MDS_LEASE_RENEW, seq); ceph_put_mds_session(session); } dout("dentry_lease_is_valid - dentry %p = %d ", dentry, valid); return valid; } /* |
1e9c2eb68
|
1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 |
* Called under dentry->d_lock. */ static int __dir_lease_try_check(const struct dentry *dentry) { struct ceph_dentry_info *di = ceph_dentry(dentry); struct inode *dir; struct ceph_inode_info *ci; int valid = 0; if (!di->lease_shared_gen) return 0; if (IS_ROOT(dentry)) return 0; dir = d_inode(dentry->d_parent); ci = ceph_inode(dir); if (spin_trylock(&ci->i_ceph_lock)) { if (atomic_read(&ci->i_shared_gen) == di->lease_shared_gen && __ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 0)) valid = 1; spin_unlock(&ci->i_ceph_lock); } else { valid = -EBUSY; } if (!valid) di->lease_shared_gen = 0; return valid; } /* |
2817b000b
|
1614 1615 |
* Check if directory-wide content lease/cap is valid. */ |
719a2514e
|
1616 1617 |
static int dir_lease_is_valid(struct inode *dir, struct dentry *dentry, struct ceph_mds_client *mdsc) |
2817b000b
|
1618 1619 |
{ struct ceph_inode_info *ci = ceph_inode(dir); |
feab6ac25
|
1620 1621 |
int valid; int shared_gen; |
2817b000b
|
1622 |
|
be655596b
|
1623 |
spin_lock(&ci->i_ceph_lock); |
feab6ac25
|
1624 |
valid = __ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1); |
719a2514e
|
1625 1626 1627 1628 |
if (valid) { __ceph_touch_fmode(ci, mdsc, CEPH_FILE_MODE_RD); shared_gen = atomic_read(&ci->i_shared_gen); } |
be655596b
|
1629 |
spin_unlock(&ci->i_ceph_lock); |
feab6ac25
|
1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 |
if (valid) { struct ceph_dentry_info *di; spin_lock(&dentry->d_lock); di = ceph_dentry(dentry); if (dir == d_inode(dentry->d_parent) && di && di->lease_shared_gen == shared_gen) __ceph_dentry_dir_lease_touch(di); else valid = 0; spin_unlock(&dentry->d_lock); } dout("dir_lease_is_valid dir %p v%u dentry %p = %d ", dir, (unsigned)atomic_read(&ci->i_shared_gen), dentry, valid); |
2817b000b
|
1644 1645 1646 1647 1648 1649 |
return valid; } /* * Check if cached dentry can be trusted. */ |
0b728e191
|
1650 |
static int ceph_d_revalidate(struct dentry *dentry, unsigned int flags) |
2817b000b
|
1651 |
{ |
bf1c6aca9
|
1652 |
int valid = 0; |
641235d8f
|
1653 |
struct dentry *parent; |
aa8dd8167
|
1654 |
struct inode *dir, *inode; |
719a2514e
|
1655 |
struct ceph_mds_client *mdsc; |
34286d666
|
1656 |
|
f49d1e058
|
1657 |
if (flags & LOOKUP_RCU) { |
52953d559
|
1658 |
parent = READ_ONCE(dentry->d_parent); |
f49d1e058
|
1659 1660 1661 |
dir = d_inode_rcu(parent); if (!dir) return -ECHILD; |
aa8dd8167
|
1662 |
inode = d_inode_rcu(dentry); |
f49d1e058
|
1663 1664 1665 |
} else { parent = dget_parent(dentry); dir = d_inode(parent); |
aa8dd8167
|
1666 |
inode = d_inode(dentry); |
f49d1e058
|
1667 |
} |
34286d666
|
1668 |
|
0eb308531
|
1669 1670 |
dout("d_revalidate %p '%pd' inode %p offset 0x%llx ", dentry, |
aa8dd8167
|
1671 |
dentry, inode, ceph_dentry(dentry)->offset); |
2817b000b
|
1672 |
|
719a2514e
|
1673 |
mdsc = ceph_sb_to_client(dir->i_sb)->mdsc; |
2817b000b
|
1674 1675 |
/* always trust cached snapped dentries, snapdir dentry */ if (ceph_snap(dir) != CEPH_NOSNAP) { |
a455589f1
|
1676 1677 |
dout("d_revalidate %p '%pd' inode %p is SNAPPED ", dentry, |
aa8dd8167
|
1678 |
dentry, inode); |
bf1c6aca9
|
1679 |
valid = 1; |
aa8dd8167
|
1680 |
} else if (inode && ceph_snap(inode) == CEPH_SNAPDIR) { |
bf1c6aca9
|
1681 |
valid = 1; |
14fb9c9ef
|
1682 |
} else { |
8f2a98ef3
|
1683 |
valid = dentry_lease_is_valid(dentry, flags); |
14fb9c9ef
|
1684 1685 |
if (valid == -ECHILD) return valid; |
719a2514e
|
1686 |
if (valid || dir_lease_is_valid(dir, dentry, mdsc)) { |
aa8dd8167
|
1687 1688 |
if (inode) valid = ceph_is_any_caps(inode); |
14fb9c9ef
|
1689 1690 1691 |
else valid = 1; } |
2817b000b
|
1692 |
} |
2817b000b
|
1693 |
|
200fd27c8
|
1694 |
if (!valid) { |
200fd27c8
|
1695 |
struct ceph_mds_request *req; |
1097680d7
|
1696 1697 |
int op, err; u32 mask; |
200fd27c8
|
1698 |
|
f49d1e058
|
1699 1700 |
if (flags & LOOKUP_RCU) return -ECHILD; |
f9009efac
|
1701 |
percpu_counter_inc(&mdsc->metric.d_lease_mis); |
200fd27c8
|
1702 |
op = ceph_snap(dir) == CEPH_SNAPDIR ? |
5eb9f6040
|
1703 |
CEPH_MDS_OP_LOOKUPSNAP : CEPH_MDS_OP_LOOKUP; |
200fd27c8
|
1704 1705 1706 |
req = ceph_mdsc_create_request(mdsc, op, USE_ANY_MDS); if (!IS_ERR(req)) { req->r_dentry = dget(dentry); |
5eb9f6040
|
1707 1708 |
req->r_num_caps = 2; req->r_parent = dir; |
200fd27c8
|
1709 1710 1711 1712 |
mask = CEPH_STAT_CAP_INODE | CEPH_CAP_AUTH_SHARED; if (ceph_security_xattr_wanted(dir)) mask |= CEPH_CAP_XATTR_SHARED; |
1097680d7
|
1713 |
req->r_args.getattr.mask = cpu_to_le32(mask); |
200fd27c8
|
1714 |
|
200fd27c8
|
1715 |
err = ceph_mdsc_do_request(mdsc, NULL, req); |
c3f4688a0
|
1716 1717 1718 1719 1720 1721 1722 1723 1724 |
switch (err) { case 0: if (d_really_is_positive(dentry) && d_inode(dentry) == req->r_target_inode) valid = 1; break; case -ENOENT: if (d_really_is_negative(dentry)) valid = 1; |
df561f668
|
1725 |
fallthrough; |
c3f4688a0
|
1726 1727 |
default: break; |
200fd27c8
|
1728 1729 1730 1731 1732 1733 |
} ceph_mdsc_put_request(req); dout("d_revalidate %p lookup result=%d ", dentry, err); } |
f9009efac
|
1734 1735 |
} else { percpu_counter_inc(&mdsc->metric.d_lease_hit); |
200fd27c8
|
1736 |
} |
bf1c6aca9
|
1737 1738 |
dout("d_revalidate %p %s ", dentry, valid ? "valid" : "invalid"); |
37c4efc1d
|
1739 |
if (!valid) |
9215aeea6
|
1740 |
ceph_dir_clear_complete(dir); |
641235d8f
|
1741 |
|
f49d1e058
|
1742 1743 |
if (!(flags & LOOKUP_RCU)) dput(parent); |
bf1c6aca9
|
1744 |
return valid; |
2817b000b
|
1745 1746 1747 |
} /* |
1e9c2eb68
|
1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 |
* Delete unused dentry that doesn't have valid lease * * Called under dentry->d_lock. */ static int ceph_d_delete(const struct dentry *dentry) { struct ceph_dentry_info *di; /* won't release caps */ if (d_really_is_negative(dentry)) return 0; if (ceph_snap(d_inode(dentry)) != CEPH_NOSNAP) return 0; /* vaild lease? */ di = ceph_dentry(dentry); if (di) { if (__dentry_lease_is_valid(di)) return 0; if (__dir_lease_try_check(dentry)) return 0; } return 1; } /* |
147851d2d
|
1773 |
* Release our ceph_dentry_info. |
2817b000b
|
1774 |
*/ |
147851d2d
|
1775 |
static void ceph_d_release(struct dentry *dentry) |
2817b000b
|
1776 1777 |
{ struct ceph_dentry_info *di = ceph_dentry(dentry); |
f9009efac
|
1778 |
struct ceph_fs_client *fsc = ceph_sb_to_client(dentry->d_sb); |
2817b000b
|
1779 |
|
147851d2d
|
1780 1781 |
dout("d_release %p ", dentry); |
5b484a513
|
1782 |
|
f9009efac
|
1783 |
atomic64_dec(&fsc->mdsc->metric.total_dentries); |
5b484a513
|
1784 |
spin_lock(&dentry->d_lock); |
37c4efc1d
|
1785 |
__dentry_lease_unlist(di); |
5b484a513
|
1786 1787 |
dentry->d_fsdata = NULL; spin_unlock(&dentry->d_lock); |
3d8eb7a94
|
1788 1789 1790 |
if (di->lease_session) ceph_put_mds_session(di->lease_session); kmem_cache_free(ceph_dentry_cachep, di); |
2817b000b
|
1791 |
} |
b58dc4100
|
1792 1793 1794 1795 1796 1797 1798 1799 |
/* * When the VFS prunes a dentry from the cache, we need to clear the * complete flag on the parent directory. * * Called under dentry->d_lock. */ static void ceph_d_prune(struct dentry *dentry) { |
5495c2d04
|
1800 1801 1802 1803 1804 |
struct ceph_inode_info *dir_ci; struct ceph_dentry_info *di; dout("ceph_d_prune %pd %p ", dentry, dentry); |
b58dc4100
|
1805 1806 |
/* do we have a valid parent? */ |
8842b3be9
|
1807 |
if (IS_ROOT(dentry)) |
b58dc4100
|
1808 |
return; |
5495c2d04
|
1809 1810 1811 |
/* we hold d_lock, so d_parent is stable */ dir_ci = ceph_inode(d_inode(dentry->d_parent)); if (dir_ci->i_vino.snap == CEPH_SNAPDIR) |
b58dc4100
|
1812 |
return; |
2817b000b
|
1813 |
|
5495c2d04
|
1814 1815 |
/* who calls d_delete() should also disable dcache readdir */ if (d_really_is_negative(dentry)) |
18fc8abdb
|
1816 |
return; |
5495c2d04
|
1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 |
/* d_fsdata does not get cleared until d_release */ if (!d_unhashed(dentry)) { __ceph_dir_clear_complete(dir_ci); return; } /* Disable dcache readdir just in case that someone called d_drop() * or d_invalidate(), but MDS didn't revoke CEPH_CAP_FILE_SHARED * properly (dcache readdir is still enabled) */ di = ceph_dentry(dentry); if (di->offset > 0 && di->lease_shared_gen == atomic_read(&dir_ci->i_shared_gen)) __ceph_dir_clear_ordered(dir_ci); |
b58dc4100
|
1830 |
} |
2817b000b
|
1831 1832 1833 1834 1835 1836 1837 1838 |
/* * read() on a dir. This weird interface hack only works if mounted * with '-o dirstat'. */ static ssize_t ceph_read_dir(struct file *file, char __user *buf, size_t size, loff_t *ppos) { |
bb48bd4dc
|
1839 |
struct ceph_dir_file_info *dfi = file->private_data; |
496ad9aa8
|
1840 |
struct inode *inode = file_inode(file); |
2817b000b
|
1841 1842 |
struct ceph_inode_info *ci = ceph_inode(inode); int left; |
ae5980830
|
1843 |
const int bufsize = 1024; |
2817b000b
|
1844 |
|
3d14c5d2b
|
1845 |
if (!ceph_test_mount_opt(ceph_sb_to_client(inode->i_sb), DIRSTAT)) |
2817b000b
|
1846 |
return -EISDIR; |
bb48bd4dc
|
1847 1848 1849 |
if (!dfi->dir_info) { dfi->dir_info = kmalloc(bufsize, GFP_KERNEL); if (!dfi->dir_info) |
2817b000b
|
1850 |
return -ENOMEM; |
bb48bd4dc
|
1851 1852 |
dfi->dir_info_len = snprintf(dfi->dir_info, bufsize, |
2817b000b
|
1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 |
"entries: %20lld " " files: %20lld " " subdirs: %20lld " "rentries: %20lld " " rfiles: %20lld " " rsubdirs: %20lld " "rbytes: %20lld " |
9bbeab41c
|
1867 1868 |
"rctime: %10lld.%09ld ", |
2817b000b
|
1869 1870 1871 1872 1873 1874 1875 |
ci->i_files + ci->i_subdirs, ci->i_files, ci->i_subdirs, ci->i_rfiles + ci->i_rsubdirs, ci->i_rfiles, ci->i_rsubdirs, ci->i_rbytes, |
9bbeab41c
|
1876 1877 |
ci->i_rctime.tv_sec, ci->i_rctime.tv_nsec); |
2817b000b
|
1878 |
} |
bb48bd4dc
|
1879 |
if (*ppos >= dfi->dir_info_len) |
2817b000b
|
1880 |
return 0; |
bb48bd4dc
|
1881 1882 |
size = min_t(unsigned, size, dfi->dir_info_len-*ppos); left = copy_to_user(buf, dfi->dir_info + *ppos, size); |
2817b000b
|
1883 1884 1885 1886 1887 |
if (left == size) return -EFAULT; *ppos += (size - left); return size - left; } |
2817b000b
|
1888 |
|
2817b000b
|
1889 |
|
6c0f3af72
|
1890 1891 1892 1893 |
/* * Return name hash for a given dentry. This is dependent on * the parent directory's hash function. */ |
e5f86dc37
|
1894 |
unsigned ceph_dentry_hash(struct inode *dir, struct dentry *dn) |
6c0f3af72
|
1895 |
{ |
6c0f3af72
|
1896 |
struct ceph_inode_info *dci = ceph_inode(dir); |
76a495d66
|
1897 |
unsigned hash; |
6c0f3af72
|
1898 1899 1900 1901 1902 1903 1904 |
switch (dci->i_dir_layout.dl_dir_hash) { case 0: /* for backward compat */ case CEPH_STR_HASH_LINUX: return dn->d_name.hash; default: |
76a495d66
|
1905 1906 |
spin_lock(&dn->d_lock); hash = ceph_str_hash(dci->i_dir_layout.dl_dir_hash, |
6c0f3af72
|
1907 |
dn->d_name.name, dn->d_name.len); |
76a495d66
|
1908 1909 |
spin_unlock(&dn->d_lock); return hash; |
6c0f3af72
|
1910 1911 |
} } |
2817b000b
|
1912 1913 |
const struct file_operations ceph_dir_fops = { .read = ceph_read_dir, |
77acfa29e
|
1914 |
.iterate = ceph_readdir, |
2817b000b
|
1915 1916 1917 1918 |
.llseek = ceph_dir_llseek, .open = ceph_open, .release = ceph_release, .unlocked_ioctl = ceph_ioctl, |
18bd6caae
|
1919 |
.compat_ioctl = compat_ptr_ioctl, |
da819c815
|
1920 |
.fsync = ceph_fsync, |
597817ddb
|
1921 1922 |
.lock = ceph_lock, .flock = ceph_flock, |
2817b000b
|
1923 |
}; |
38c48b5f0
|
1924 1925 1926 1927 1928 1929 |
const struct file_operations ceph_snapdir_fops = { .iterate = ceph_readdir, .llseek = ceph_dir_llseek, .open = ceph_open, .release = ceph_release, }; |
2817b000b
|
1930 1931 1932 1933 1934 |
const struct inode_operations ceph_dir_iops = { .lookup = ceph_lookup, .permission = ceph_permission, .getattr = ceph_getattr, .setattr = ceph_setattr, |
2817b000b
|
1935 |
.listxattr = ceph_listxattr, |
7221fe4c2
|
1936 |
.get_acl = ceph_get_acl, |
72466d0b9
|
1937 |
.set_acl = ceph_set_acl, |
2817b000b
|
1938 1939 1940 1941 1942 1943 1944 1945 |
.mknod = ceph_mknod, .symlink = ceph_symlink, .mkdir = ceph_mkdir, .link = ceph_link, .unlink = ceph_unlink, .rmdir = ceph_unlink, .rename = ceph_rename, .create = ceph_create, |
2d83bde9a
|
1946 |
.atomic_open = ceph_atomic_open, |
2817b000b
|
1947 |
}; |
38c48b5f0
|
1948 1949 1950 1951 1952 1953 |
const struct inode_operations ceph_snapdir_iops = { .lookup = ceph_lookup, .permission = ceph_permission, .getattr = ceph_getattr, .mkdir = ceph_mkdir, .rmdir = ceph_unlink, |
0ea611a3b
|
1954 |
.rename = ceph_rename, |
38c48b5f0
|
1955 |
}; |
52dfb8ac0
|
1956 |
const struct dentry_operations ceph_dentry_ops = { |
2817b000b
|
1957 |
.d_revalidate = ceph_d_revalidate, |
1e9c2eb68
|
1958 |
.d_delete = ceph_d_delete, |
147851d2d
|
1959 |
.d_release = ceph_d_release, |
b58dc4100
|
1960 |
.d_prune = ceph_d_prune, |
ad5cb123f
|
1961 |
.d_init = ceph_d_init, |
2817b000b
|
1962 |
}; |