Blame view

fs/ceph/inode.c 60.5 KB
b24413180   Greg Kroah-Hartman   License cleanup: ...
1
  // SPDX-License-Identifier: GPL-2.0
3d14c5d2b   Yehuda Sadeh   ceph: factor out ...
2
  #include <linux/ceph/ceph_debug.h>
355da1eb7   Sage Weil   ceph: inode opera...
3
4
5
  
  #include <linux/module.h>
  #include <linux/fs.h>
355da1eb7   Sage Weil   ceph: inode opera...
6
7
8
9
  #include <linux/slab.h>
  #include <linux/string.h>
  #include <linux/uaccess.h>
  #include <linux/kernel.h>
355da1eb7   Sage Weil   ceph: inode opera...
10
11
  #include <linux/writeback.h>
  #include <linux/vmalloc.h>
2cdeb1e47   Andreas Gruenbacher   ceph: Switch to g...
12
  #include <linux/xattr.h>
4db658ea0   Linus Torvalds   ceph: Fix up afte...
13
  #include <linux/posix_acl.h>
3e7fbe9ce   Yan, Zheng   ceph: introduce c...
14
  #include <linux/random.h>
a407846ef   Yan, Zheng   ceph: don't assum...
15
  #include <linux/sort.h>
355da1eb7   Sage Weil   ceph: inode opera...
16
17
  
  #include "super.h"
3d14c5d2b   Yehuda Sadeh   ceph: factor out ...
18
  #include "mds_client.h"
99ccbd229   Milosz Tanski   ceph: use fscache...
19
  #include "cache.h"
3d14c5d2b   Yehuda Sadeh   ceph: factor out ...
20
  #include <linux/ceph/decode.h>
355da1eb7   Sage Weil   ceph: inode opera...
21
22
23
24
25
26
27
28
29
30
31
32
33
34
  
  /*
   * Ceph inode operations
   *
   * Implement basic inode helpers (get, alloc) and inode ops (getattr,
   * setattr, etc.), xattr helpers, and helpers for assimilating
   * metadata returned by the MDS into our cache.
   *
   * Also define helpers for doing asynchronous writeback, invalidation,
   * and truncation for the benefit of those who can't afford to block
   * (typically because they are in the message handler path).
   */
  
  static const struct inode_operations ceph_symlink_iops;
3c6f6b79a   Sage Weil   ceph: cleanup asy...
35
36
37
  static void ceph_invalidate_work(struct work_struct *work);
  static void ceph_writeback_work(struct work_struct *work);
  static void ceph_vmtruncate_work(struct work_struct *work);
355da1eb7   Sage Weil   ceph: inode opera...
38
39
40
41
  
  /*
   * find or create an inode, given the ceph ino number
   */
ad1fee96c   Yehuda Sadeh   ceph: add ino32 m...
42
43
44
45
46
47
  static int ceph_set_ino_cb(struct inode *inode, void *data)
  {
  	ceph_inode(inode)->i_vino = *(struct ceph_vino *)data;
  	inode->i_ino = ceph_vino_to_ino(*(struct ceph_vino *)data);
  	return 0;
  }
355da1eb7   Sage Weil   ceph: inode opera...
48
49
50
51
52
53
  struct inode *ceph_get_inode(struct super_block *sb, struct ceph_vino vino)
  {
  	struct inode *inode;
  	ino_t t = ceph_vino_to_ino(vino);
  
  	inode = iget5_locked(sb, t, ceph_ino_compare, ceph_set_ino_cb, &vino);
d37b1d994   Markus Elfring   ceph: adjust 36 c...
54
  	if (!inode)
355da1eb7   Sage Weil   ceph: inode opera...
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
  		return ERR_PTR(-ENOMEM);
  	if (inode->i_state & I_NEW) {
  		dout("get_inode created new inode %p %llx.%llx ino %llx
  ",
  		     inode, ceph_vinop(inode), (u64)inode->i_ino);
  		unlock_new_inode(inode);
  	}
  
  	dout("get_inode on %lu=%llx.%llx got %p
  ", inode->i_ino, vino.ino,
  	     vino.snap, inode);
  	return inode;
  }
  
  /*
   * get/constuct snapdir inode for a given directory
   */
  struct inode *ceph_get_snapdir(struct inode *parent)
  {
  	struct ceph_vino vino = {
  		.ino = ceph_ino(parent),
  		.snap = CEPH_SNAPDIR,
  	};
  	struct inode *inode = ceph_get_inode(parent->i_sb, vino);
b377ff13b   Sage Weil   ceph: initialize ...
79
  	struct ceph_inode_info *ci = ceph_inode(inode);
355da1eb7   Sage Weil   ceph: inode opera...
80
81
82
  
  	BUG_ON(!S_ISDIR(parent->i_mode));
  	if (IS_ERR(inode))
7e34bc524   Julia Lawall   fs/ceph: Use ERR_...
83
  		return inode;
355da1eb7   Sage Weil   ceph: inode opera...
84
85
86
  	inode->i_mode = parent->i_mode;
  	inode->i_uid = parent->i_uid;
  	inode->i_gid = parent->i_gid;
38c48b5f0   Yan, Zheng   ceph: provide sep...
87
88
  	inode->i_op = &ceph_snapdir_iops;
  	inode->i_fop = &ceph_snapdir_fops;
b377ff13b   Sage Weil   ceph: initialize ...
89
90
  	ci->i_snap_caps = CEPH_CAP_PIN; /* so we can open */
  	ci->i_rbytes = 0;
355da1eb7   Sage Weil   ceph: inode opera...
91
92
93
94
95
96
97
  	return inode;
  }
  
  const struct inode_operations ceph_file_iops = {
  	.permission = ceph_permission,
  	.setattr = ceph_setattr,
  	.getattr = ceph_getattr,
355da1eb7   Sage Weil   ceph: inode opera...
98
  	.listxattr = ceph_listxattr,
7221fe4c2   Guangliang Zhao   ceph: add acl for...
99
  	.get_acl = ceph_get_acl,
72466d0b9   Sage Weil   ceph: fix posix A...
100
  	.set_acl = ceph_set_acl,
355da1eb7   Sage Weil   ceph: inode opera...
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
  };
  
  
  /*
   * We use a 'frag tree' to keep track of the MDS's directory fragments
   * for a given inode (usually there is just a single fragment).  We
   * need to know when a child frag is delegated to a new MDS, or when
   * it is flagged as replicated, so we can direct our requests
   * accordingly.
   */
  
  /*
   * find/create a frag in the tree
   */
  static struct ceph_inode_frag *__get_or_create_frag(struct ceph_inode_info *ci,
  						    u32 f)
  {
  	struct rb_node **p;
  	struct rb_node *parent = NULL;
  	struct ceph_inode_frag *frag;
  	int c;
  
  	p = &ci->i_fragtree.rb_node;
  	while (*p) {
  		parent = *p;
  		frag = rb_entry(parent, struct ceph_inode_frag, node);
  		c = ceph_frag_compare(f, frag->frag);
  		if (c < 0)
  			p = &(*p)->rb_left;
  		else if (c > 0)
  			p = &(*p)->rb_right;
  		else
  			return frag;
  	}
  
  	frag = kmalloc(sizeof(*frag), GFP_NOFS);
51308806f   Markus Elfring   ceph: ENOMEM pr_e...
137
  	if (!frag)
355da1eb7   Sage Weil   ceph: inode opera...
138
  		return ERR_PTR(-ENOMEM);
51308806f   Markus Elfring   ceph: ENOMEM pr_e...
139

355da1eb7   Sage Weil   ceph: inode opera...
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
  	frag->frag = f;
  	frag->split_by = 0;
  	frag->mds = -1;
  	frag->ndist = 0;
  
  	rb_link_node(&frag->node, parent, p);
  	rb_insert_color(&frag->node, &ci->i_fragtree);
  
  	dout("get_or_create_frag added %llx.%llx frag %x
  ",
  	     ceph_vinop(&ci->vfs_inode), f);
  	return frag;
  }
  
  /*
   * find a specific frag @f
   */
  struct ceph_inode_frag *__ceph_find_frag(struct ceph_inode_info *ci, u32 f)
  {
  	struct rb_node *n = ci->i_fragtree.rb_node;
  
  	while (n) {
  		struct ceph_inode_frag *frag =
  			rb_entry(n, struct ceph_inode_frag, node);
  		int c = ceph_frag_compare(f, frag->frag);
  		if (c < 0)
  			n = n->rb_left;
  		else if (c > 0)
  			n = n->rb_right;
  		else
  			return frag;
  	}
  	return NULL;
  }
  
  /*
   * Choose frag containing the given value @v.  If @pfrag is
   * specified, copy the frag delegation info to the caller if
   * it is present.
   */
3e7fbe9ce   Yan, Zheng   ceph: introduce c...
180
181
  static u32 __ceph_choose_frag(struct ceph_inode_info *ci, u32 v,
  			      struct ceph_inode_frag *pfrag, int *found)
355da1eb7   Sage Weil   ceph: inode opera...
182
183
184
185
186
187
188
189
  {
  	u32 t = ceph_frag_make(0, 0);
  	struct ceph_inode_frag *frag;
  	unsigned nway, i;
  	u32 n;
  
  	if (found)
  		*found = 0;
355da1eb7   Sage Weil   ceph: inode opera...
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
  	while (1) {
  		WARN_ON(!ceph_frag_contains_value(t, v));
  		frag = __ceph_find_frag(ci, t);
  		if (!frag)
  			break; /* t is a leaf */
  		if (frag->split_by == 0) {
  			if (pfrag)
  				memcpy(pfrag, frag, sizeof(*pfrag));
  			if (found)
  				*found = 1;
  			break;
  		}
  
  		/* choose child */
  		nway = 1 << frag->split_by;
  		dout("choose_frag(%x) %x splits by %d (%d ways)
  ", v, t,
  		     frag->split_by, nway);
  		for (i = 0; i < nway; i++) {
  			n = ceph_frag_make_child(t, frag->split_by, i);
  			if (ceph_frag_contains_value(n, v)) {
  				t = n;
  				break;
  			}
  		}
  		BUG_ON(i == nway);
  	}
  	dout("choose_frag(%x) = %x
  ", v, t);
355da1eb7   Sage Weil   ceph: inode opera...
219
220
  	return t;
  }
3e7fbe9ce   Yan, Zheng   ceph: introduce c...
221
222
223
224
225
226
227
228
229
  u32 ceph_choose_frag(struct ceph_inode_info *ci, u32 v,
  		     struct ceph_inode_frag *pfrag, int *found)
  {
  	u32 ret;
  	mutex_lock(&ci->i_fragtree_mutex);
  	ret = __ceph_choose_frag(ci, v, pfrag, found);
  	mutex_unlock(&ci->i_fragtree_mutex);
  	return ret;
  }
355da1eb7   Sage Weil   ceph: inode opera...
230
231
232
233
234
235
236
237
238
239
240
241
242
  /*
   * Process dirfrag (delegation) info from the mds.  Include leaf
   * fragment in tree ONLY if ndist > 0.  Otherwise, only
   * branches/splits are included in i_fragtree)
   */
  static int ceph_fill_dirfrag(struct inode *inode,
  			     struct ceph_mds_reply_dirfrag *dirinfo)
  {
  	struct ceph_inode_info *ci = ceph_inode(inode);
  	struct ceph_inode_frag *frag;
  	u32 id = le32_to_cpu(dirinfo->frag);
  	int mds = le32_to_cpu(dirinfo->auth);
  	int ndist = le32_to_cpu(dirinfo->ndist);
8d08503c1   Yan, Zheng   ceph: remember su...
243
  	int diri_auth = -1;
355da1eb7   Sage Weil   ceph: inode opera...
244
245
  	int i;
  	int err = 0;
8d08503c1   Yan, Zheng   ceph: remember su...
246
247
248
249
  	spin_lock(&ci->i_ceph_lock);
  	if (ci->i_auth_cap)
  		diri_auth = ci->i_auth_cap->mds;
  	spin_unlock(&ci->i_ceph_lock);
421721195   Yan, Zheng   ceph: fix dir_aut...
250
251
  	if (mds == -1) /* CDIR_AUTH_PARENT */
  		mds = diri_auth;
355da1eb7   Sage Weil   ceph: inode opera...
252
  	mutex_lock(&ci->i_fragtree_mutex);
8d08503c1   Yan, Zheng   ceph: remember su...
253
  	if (ndist == 0 && mds == diri_auth) {
355da1eb7   Sage Weil   ceph: inode opera...
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
  		/* no delegation info needed. */
  		frag = __ceph_find_frag(ci, id);
  		if (!frag)
  			goto out;
  		if (frag->split_by == 0) {
  			/* tree leaf, remove */
  			dout("fill_dirfrag removed %llx.%llx frag %x"
  			     " (no ref)
  ", ceph_vinop(inode), id);
  			rb_erase(&frag->node, &ci->i_fragtree);
  			kfree(frag);
  		} else {
  			/* tree branch, keep and clear */
  			dout("fill_dirfrag cleared %llx.%llx frag %x"
  			     " referral
  ", ceph_vinop(inode), id);
  			frag->mds = -1;
  			frag->ndist = 0;
  		}
  		goto out;
  	}
  
  
  	/* find/add this frag to store mds delegation info */
  	frag = __get_or_create_frag(ci, id);
  	if (IS_ERR(frag)) {
  		/* this is not the end of the world; we can continue
  		   with bad/inaccurate delegation info */
  		pr_err("fill_dirfrag ENOMEM on mds ref %llx.%llx fg %x
  ",
  		       ceph_vinop(inode), le32_to_cpu(dirinfo->frag));
  		err = -ENOMEM;
  		goto out;
  	}
  
  	frag->mds = mds;
  	frag->ndist = min_t(u32, ndist, CEPH_MAX_DIRFRAG_REP);
  	for (i = 0; i < frag->ndist; i++)
  		frag->dist[i] = le32_to_cpu(dirinfo->dist[i]);
  	dout("fill_dirfrag %llx.%llx frag %x ndist=%d
  ",
  	     ceph_vinop(inode), frag->frag, frag->ndist);
  
  out:
  	mutex_unlock(&ci->i_fragtree_mutex);
  	return err;
  }
a407846ef   Yan, Zheng   ceph: don't assum...
301
302
303
304
  static int frag_tree_split_cmp(const void *l, const void *r)
  {
  	struct ceph_frag_tree_split *ls = (struct ceph_frag_tree_split*)l;
  	struct ceph_frag_tree_split *rs = (struct ceph_frag_tree_split*)r;
fe2ed4251   Jeff Layton   ceph: fix endiann...
305
306
  	return ceph_frag_compare(le32_to_cpu(ls->frag),
  				 le32_to_cpu(rs->frag));
a407846ef   Yan, Zheng   ceph: don't assum...
307
  }
a4b7431f3   Yan, Zheng   ceph: keep leaf f...
308
309
310
311
312
313
314
315
  static bool is_frag_child(u32 f, struct ceph_inode_frag *frag)
  {
  	if (!frag)
  		return f == ceph_frag_make(0, 0);
  	if (ceph_frag_bits(f) != ceph_frag_bits(frag->frag) + frag->split_by)
  		return false;
  	return ceph_frag_contains_value(frag->frag, ceph_frag_value(f));
  }
3e7fbe9ce   Yan, Zheng   ceph: introduce c...
316
317
318
319
320
  static int ceph_fill_fragtree(struct inode *inode,
  			      struct ceph_frag_tree_head *fragtree,
  			      struct ceph_mds_reply_dirfrag *dirinfo)
  {
  	struct ceph_inode_info *ci = ceph_inode(inode);
a4b7431f3   Yan, Zheng   ceph: keep leaf f...
321
  	struct ceph_inode_frag *frag, *prev_frag = NULL;
3e7fbe9ce   Yan, Zheng   ceph: introduce c...
322
  	struct rb_node *rb_node;
1b1bc16d6   Yan, Zheng   ceph: improve fra...
323
324
  	unsigned i, split_by, nsplits;
  	u32 id;
3e7fbe9ce   Yan, Zheng   ceph: introduce c...
325
326
327
328
  	bool update = false;
  
  	mutex_lock(&ci->i_fragtree_mutex);
  	nsplits = le32_to_cpu(fragtree->nsplits);
1b1bc16d6   Yan, Zheng   ceph: improve fra...
329
330
331
  	if (nsplits != ci->i_fragtree_nsplits) {
  		update = true;
  	} else if (nsplits) {
3e7fbe9ce   Yan, Zheng   ceph: introduce c...
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
  		i = prandom_u32() % nsplits;
  		id = le32_to_cpu(fragtree->splits[i].frag);
  		if (!__ceph_find_frag(ci, id))
  			update = true;
  	} else if (!RB_EMPTY_ROOT(&ci->i_fragtree)) {
  		rb_node = rb_first(&ci->i_fragtree);
  		frag = rb_entry(rb_node, struct ceph_inode_frag, node);
  		if (frag->frag != ceph_frag_make(0, 0) || rb_next(rb_node))
  			update = true;
  	}
  	if (!update && dirinfo) {
  		id = le32_to_cpu(dirinfo->frag);
  		if (id != __ceph_choose_frag(ci, id, NULL, NULL))
  			update = true;
  	}
  	if (!update)
  		goto out_unlock;
a407846ef   Yan, Zheng   ceph: don't assum...
349
350
351
352
  	if (nsplits > 1) {
  		sort(fragtree->splits, nsplits, sizeof(fragtree->splits[0]),
  		     frag_tree_split_cmp, NULL);
  	}
3e7fbe9ce   Yan, Zheng   ceph: introduce c...
353
354
355
356
357
  	dout("fill_fragtree %llx.%llx
  ", ceph_vinop(inode));
  	rb_node = rb_first(&ci->i_fragtree);
  	for (i = 0; i < nsplits; i++) {
  		id = le32_to_cpu(fragtree->splits[i].frag);
1b1bc16d6   Yan, Zheng   ceph: improve fra...
358
359
360
361
362
363
364
365
  		split_by = le32_to_cpu(fragtree->splits[i].by);
  		if (split_by == 0 || ceph_frag_bits(id) + split_by > 24) {
  			pr_err("fill_fragtree %llx.%llx invalid split %d/%u, "
  			       "frag %x split by %d
  ", ceph_vinop(inode),
  			       i, nsplits, id, split_by);
  			continue;
  		}
3e7fbe9ce   Yan, Zheng   ceph: introduce c...
366
367
368
369
370
371
372
373
374
375
376
  		frag = NULL;
  		while (rb_node) {
  			frag = rb_entry(rb_node, struct ceph_inode_frag, node);
  			if (ceph_frag_compare(frag->frag, id) >= 0) {
  				if (frag->frag != id)
  					frag = NULL;
  				else
  					rb_node = rb_next(rb_node);
  				break;
  			}
  			rb_node = rb_next(rb_node);
a4b7431f3   Yan, Zheng   ceph: keep leaf f...
377
378
379
380
  			/* delete stale split/leaf node */
  			if (frag->split_by > 0 ||
  			    !is_frag_child(frag->frag, prev_frag)) {
  				rb_erase(&frag->node, &ci->i_fragtree);
1b1bc16d6   Yan, Zheng   ceph: improve fra...
381
382
  				if (frag->split_by > 0)
  					ci->i_fragtree_nsplits--;
a4b7431f3   Yan, Zheng   ceph: keep leaf f...
383
384
  				kfree(frag);
  			}
3e7fbe9ce   Yan, Zheng   ceph: introduce c...
385
386
387
388
389
390
391
  			frag = NULL;
  		}
  		if (!frag) {
  			frag = __get_or_create_frag(ci, id);
  			if (IS_ERR(frag))
  				continue;
  		}
1b1bc16d6   Yan, Zheng   ceph: improve fra...
392
393
394
  		if (frag->split_by == 0)
  			ci->i_fragtree_nsplits++;
  		frag->split_by = split_by;
3e7fbe9ce   Yan, Zheng   ceph: introduce c...
395
396
  		dout(" frag %x split by %d
  ", frag->frag, frag->split_by);
a4b7431f3   Yan, Zheng   ceph: keep leaf f...
397
  		prev_frag = frag;
3e7fbe9ce   Yan, Zheng   ceph: introduce c...
398
399
400
401
  	}
  	while (rb_node) {
  		frag = rb_entry(rb_node, struct ceph_inode_frag, node);
  		rb_node = rb_next(rb_node);
a4b7431f3   Yan, Zheng   ceph: keep leaf f...
402
403
404
405
  		/* delete stale split/leaf node */
  		if (frag->split_by > 0 ||
  		    !is_frag_child(frag->frag, prev_frag)) {
  			rb_erase(&frag->node, &ci->i_fragtree);
1b1bc16d6   Yan, Zheng   ceph: improve fra...
406
407
  			if (frag->split_by > 0)
  				ci->i_fragtree_nsplits--;
a4b7431f3   Yan, Zheng   ceph: keep leaf f...
408
409
  			kfree(frag);
  		}
3e7fbe9ce   Yan, Zheng   ceph: introduce c...
410
411
412
413
414
  	}
  out_unlock:
  	mutex_unlock(&ci->i_fragtree_mutex);
  	return 0;
  }
355da1eb7   Sage Weil   ceph: inode opera...
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
  
  /*
   * initialize a newly allocated inode.
   */
  struct inode *ceph_alloc_inode(struct super_block *sb)
  {
  	struct ceph_inode_info *ci;
  	int i;
  
  	ci = kmem_cache_alloc(ceph_inode_cachep, GFP_NOFS);
  	if (!ci)
  		return NULL;
  
  	dout("alloc_inode %p
  ", &ci->vfs_inode);
be655596b   Sage Weil   ceph: use i_ceph_...
430
  	spin_lock_init(&ci->i_ceph_lock);
355da1eb7   Sage Weil   ceph: inode opera...
431
  	ci->i_version = 0;
31c542a19   Yan, Zheng   ceph: add inline ...
432
  	ci->i_inline_version = 0;
355da1eb7   Sage Weil   ceph: inode opera...
433
434
  	ci->i_time_warp_seq = 0;
  	ci->i_ceph_flags = 0;
fdd4e1583   Yan, Zheng   ceph: rework dcac...
435
436
437
438
  	atomic64_set(&ci->i_ordered_count, 1);
  	atomic64_set(&ci->i_release_count, 1);
  	atomic64_set(&ci->i_complete_seq[0], 0);
  	atomic64_set(&ci->i_complete_seq[1], 0);
355da1eb7   Sage Weil   ceph: inode opera...
439
  	ci->i_symlink = NULL;
6c0f3af72   Sage Weil   ceph: add dir_lay...
440
  	memset(&ci->i_dir_layout, 0, sizeof(ci->i_dir_layout));
30c156d99   Yan, Zheng   libceph: rados po...
441
  	RCU_INIT_POINTER(ci->i_layout.pool_ns, NULL);
6c0f3af72   Sage Weil   ceph: add dir_lay...
442

355da1eb7   Sage Weil   ceph: inode opera...
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
  	ci->i_fragtree = RB_ROOT;
  	mutex_init(&ci->i_fragtree_mutex);
  
  	ci->i_xattrs.blob = NULL;
  	ci->i_xattrs.prealloc_blob = NULL;
  	ci->i_xattrs.dirty = false;
  	ci->i_xattrs.index = RB_ROOT;
  	ci->i_xattrs.count = 0;
  	ci->i_xattrs.names_size = 0;
  	ci->i_xattrs.vals_size = 0;
  	ci->i_xattrs.version = 0;
  	ci->i_xattrs.index_version = 0;
  
  	ci->i_caps = RB_ROOT;
  	ci->i_auth_cap = NULL;
  	ci->i_dirty_caps = 0;
  	ci->i_flushing_caps = 0;
  	INIT_LIST_HEAD(&ci->i_dirty_item);
  	INIT_LIST_HEAD(&ci->i_flushing_item);
f66fd9f09   Yan, Zheng   ceph: pre-allocat...
462
  	ci->i_prealloc_cap_flush = NULL;
e4500b5e3   Yan, Zheng   ceph: use list in...
463
  	INIT_LIST_HEAD(&ci->i_cap_flush_list);
355da1eb7   Sage Weil   ceph: inode opera...
464
465
466
467
  	init_waitqueue_head(&ci->i_cap_wq);
  	ci->i_hold_caps_min = 0;
  	ci->i_hold_caps_max = 0;
  	INIT_LIST_HEAD(&ci->i_cap_delay_list);
355da1eb7   Sage Weil   ceph: inode opera...
468
469
470
  	INIT_LIST_HEAD(&ci->i_cap_snaps);
  	ci->i_head_snapc = NULL;
  	ci->i_snap_caps = 0;
774a6a118   Yan, Zheng   ceph: reduce i_nr...
471
  	for (i = 0; i < CEPH_FILE_MODE_BITS; i++)
355da1eb7   Sage Weil   ceph: inode opera...
472
  		ci->i_nr_by_mode[i] = 0;
b0d7c2231   Yan, Zheng   ceph: introduce i...
473
  	mutex_init(&ci->i_truncate_mutex);
355da1eb7   Sage Weil   ceph: inode opera...
474
475
476
477
478
479
480
481
482
483
484
485
486
  	ci->i_truncate_seq = 0;
  	ci->i_truncate_size = 0;
  	ci->i_truncate_pending = 0;
  
  	ci->i_max_size = 0;
  	ci->i_reported_size = 0;
  	ci->i_wanted_max_size = 0;
  	ci->i_requested_max_size = 0;
  
  	ci->i_pin_ref = 0;
  	ci->i_rd_ref = 0;
  	ci->i_rdcache_ref = 0;
  	ci->i_wr_ref = 0;
d3d0720d4   Henry C Chang   ceph: do not use ...
487
  	ci->i_wb_ref = 0;
355da1eb7   Sage Weil   ceph: inode opera...
488
489
490
491
492
  	ci->i_wrbuffer_ref = 0;
  	ci->i_wrbuffer_ref_head = 0;
  	ci->i_shared_gen = 0;
  	ci->i_rdcache_gen = 0;
  	ci->i_rdcache_revoking = 0;
355da1eb7   Sage Weil   ceph: inode opera...
493
  	INIT_LIST_HEAD(&ci->i_unsafe_dirops);
68cd5b4b7   Yan, Zheng   ceph: make fsync(...
494
  	INIT_LIST_HEAD(&ci->i_unsafe_iops);
355da1eb7   Sage Weil   ceph: inode opera...
495
496
497
498
499
  	spin_lock_init(&ci->i_unsafe_lock);
  
  	ci->i_snap_realm = NULL;
  	INIT_LIST_HEAD(&ci->i_snap_realm_item);
  	INIT_LIST_HEAD(&ci->i_snap_flush_item);
3c6f6b79a   Sage Weil   ceph: cleanup asy...
500
501
  	INIT_WORK(&ci->i_wb_work, ceph_writeback_work);
  	INIT_WORK(&ci->i_pg_inv_work, ceph_invalidate_work);
355da1eb7   Sage Weil   ceph: inode opera...
502
503
  
  	INIT_WORK(&ci->i_vmtruncate_work, ceph_vmtruncate_work);
99ccbd229   Milosz Tanski   ceph: use fscache...
504
  	ceph_fscache_inode_init(ci);
355da1eb7   Sage Weil   ceph: inode opera...
505
506
  	return &ci->vfs_inode;
  }
fa0d7e3de   Nick Piggin   fs: icache RCU fr...
507
508
509
510
  static void ceph_i_callback(struct rcu_head *head)
  {
  	struct inode *inode = container_of(head, struct inode, i_rcu);
  	struct ceph_inode_info *ci = ceph_inode(inode);
fa0d7e3de   Nick Piggin   fs: icache RCU fr...
511
512
  	kmem_cache_free(ceph_inode_cachep, ci);
  }
355da1eb7   Sage Weil   ceph: inode opera...
513
514
515
516
517
518
519
520
  void ceph_destroy_inode(struct inode *inode)
  {
  	struct ceph_inode_info *ci = ceph_inode(inode);
  	struct ceph_inode_frag *frag;
  	struct rb_node *n;
  
  	dout("destroy_inode %p ino %llx.%llx
  ", inode, ceph_vinop(inode));
99ccbd229   Milosz Tanski   ceph: use fscache...
521
  	ceph_fscache_unregister_inode_cookie(ci);
355da1eb7   Sage Weil   ceph: inode opera...
522
  	ceph_queue_caps_release(inode);
8b218b8a4   Sage Weil   ceph: fix inode r...
523
524
  	/*
  	 * we may still have a snap_realm reference if there are stray
d9df27835   Yan, Zheng   ceph: pre-allocat...
525
  	 * caps in i_snap_caps.
8b218b8a4   Sage Weil   ceph: fix inode r...
526
527
528
  	 */
  	if (ci->i_snap_realm) {
  		struct ceph_mds_client *mdsc =
3d14c5d2b   Yehuda Sadeh   ceph: factor out ...
529
  			ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc;
8b218b8a4   Sage Weil   ceph: fix inode r...
530
531
532
533
534
535
536
537
538
  		struct ceph_snap_realm *realm = ci->i_snap_realm;
  
  		dout(" dropping residual ref to snap realm %p
  ", realm);
  		spin_lock(&realm->inodes_with_caps_lock);
  		list_del_init(&ci->i_snap_realm_item);
  		spin_unlock(&realm->inodes_with_caps_lock);
  		ceph_put_snap_realm(mdsc, realm);
  	}
355da1eb7   Sage Weil   ceph: inode opera...
539
540
541
542
543
544
  	kfree(ci->i_symlink);
  	while ((n = rb_first(&ci->i_fragtree)) != NULL) {
  		frag = rb_entry(n, struct ceph_inode_frag, node);
  		rb_erase(n, &ci->i_fragtree);
  		kfree(frag);
  	}
1b1bc16d6   Yan, Zheng   ceph: improve fra...
545
  	ci->i_fragtree_nsplits = 0;
355da1eb7   Sage Weil   ceph: inode opera...
546
547
  
  	__ceph_destroy_xattrs(ci);
b6c1d5b81   Sage Weil   ceph: simplify ce...
548
549
550
551
  	if (ci->i_xattrs.blob)
  		ceph_buffer_put(ci->i_xattrs.blob);
  	if (ci->i_xattrs.prealloc_blob)
  		ceph_buffer_put(ci->i_xattrs.prealloc_blob);
355da1eb7   Sage Weil   ceph: inode opera...
552

779fe0fb8   Yan, Zheng   ceph: rados pool ...
553
  	ceph_put_string(rcu_dereference_raw(ci->i_layout.pool_ns));
30c156d99   Yan, Zheng   libceph: rados po...
554

fa0d7e3de   Nick Piggin   fs: icache RCU fr...
555
  	call_rcu(&inode->i_rcu, ceph_i_callback);
355da1eb7   Sage Weil   ceph: inode opera...
556
  }
9f12bd119   Yan, Zheng   ceph: drop unconn...
557
558
559
560
561
562
563
564
565
  int ceph_drop_inode(struct inode *inode)
  {
  	/*
  	 * Positve dentry and corresponding inode are always accompanied
  	 * in MDS reply. So no need to keep inode in the cache after
  	 * dropping all its aliases.
  	 */
  	return 1;
  }
224a7542b   Yan, Zheng   ceph: tolerate ba...
566
567
568
569
  static inline blkcnt_t calc_inode_blocks(u64 size)
  {
  	return (size + (1<<9) - 1) >> 9;
  }
355da1eb7   Sage Weil   ceph: inode opera...
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
  /*
   * Helpers to fill in size, ctime, mtime, and atime.  We have to be
   * careful because either the client or MDS may have more up to date
   * info, depending on which capabilities are held, and whether
   * time_warp_seq or truncate_seq have increased.  (Ordinarily, mtime
   * and size are monotonically increasing, except when utimes() or
   * truncate() increments the corresponding _seq values.)
   */
  int ceph_fill_file_size(struct inode *inode, int issued,
  			u32 truncate_seq, u64 truncate_size, u64 size)
  {
  	struct ceph_inode_info *ci = ceph_inode(inode);
  	int queue_trunc = 0;
  
  	if (ceph_seq_cmp(truncate_seq, ci->i_truncate_seq) > 0 ||
  	    (truncate_seq == ci->i_truncate_seq && size > inode->i_size)) {
  		dout("size %lld -> %llu
  ", inode->i_size, size);
a3d714c33   Yan, Zheng   ceph: avoid updat...
588
589
590
591
592
  		if (size > 0 && S_ISDIR(inode->i_mode)) {
  			pr_err("fill_file_size non-zero size for directory
  ");
  			size = 0;
  		}
99c88e690   Yan, Zheng   ceph: use i_size_...
593
  		i_size_write(inode, size);
224a7542b   Yan, Zheng   ceph: tolerate ba...
594
  		inode->i_blocks = calc_inode_blocks(size);
355da1eb7   Sage Weil   ceph: inode opera...
595
596
597
598
599
600
  		ci->i_reported_size = size;
  		if (truncate_seq != ci->i_truncate_seq) {
  			dout("truncate_seq %u -> %u
  ",
  			     ci->i_truncate_seq, truncate_seq);
  			ci->i_truncate_seq = truncate_seq;
b0d7c2231   Yan, Zheng   ceph: introduce i...
601
602
603
604
605
606
  
  			/* the MDS should have revoked these caps */
  			WARN_ON_ONCE(issued & (CEPH_CAP_FILE_EXCL |
  					       CEPH_CAP_FILE_RD |
  					       CEPH_CAP_FILE_WR |
  					       CEPH_CAP_FILE_LAZYIO));
3d497d858   Yehuda Sadeh   ceph: fix truncat...
607
608
609
610
611
612
  			/*
  			 * If we hold relevant caps, or in the case where we're
  			 * not the only client referencing this file and we
  			 * don't hold those caps, then we need to check whether
  			 * the file is either opened or mmaped
  			 */
b0d7c2231   Yan, Zheng   ceph: introduce i...
613
614
  			if ((issued & (CEPH_CAP_FILE_CACHE|
  				       CEPH_CAP_FILE_BUFFER)) ||
3d497d858   Yehuda Sadeh   ceph: fix truncat...
615
616
  			    mapping_mapped(inode->i_mapping) ||
  			    __ceph_caps_file_wanted(ci)) {
355da1eb7   Sage Weil   ceph: inode opera...
617
618
619
620
621
622
623
624
625
626
627
628
  				ci->i_truncate_pending++;
  				queue_trunc = 1;
  			}
  		}
  	}
  	if (ceph_seq_cmp(truncate_seq, ci->i_truncate_seq) >= 0 &&
  	    ci->i_truncate_size != truncate_size) {
  		dout("truncate_size %lld -> %llu
  ", ci->i_truncate_size,
  		     truncate_size);
  		ci->i_truncate_size = truncate_size;
  	}
99ccbd229   Milosz Tanski   ceph: use fscache...
629
630
631
  
  	if (queue_trunc)
  		ceph_fscache_invalidate(inode);
355da1eb7   Sage Weil   ceph: inode opera...
632
633
634
635
636
637
638
639
640
641
642
643
  	return queue_trunc;
  }
  
  void ceph_fill_file_time(struct inode *inode, int issued,
  			 u64 time_warp_seq, struct timespec *ctime,
  			 struct timespec *mtime, struct timespec *atime)
  {
  	struct ceph_inode_info *ci = ceph_inode(inode);
  	int warn = 0;
  
  	if (issued & (CEPH_CAP_FILE_EXCL|
  		      CEPH_CAP_FILE_WR|
d8672d64b   Sage Weil   ceph: fix update ...
644
645
646
  		      CEPH_CAP_FILE_BUFFER|
  		      CEPH_CAP_AUTH_EXCL|
  		      CEPH_CAP_XATTR_EXCL)) {
355da1eb7   Sage Weil   ceph: inode opera...
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
  		if (timespec_compare(ctime, &inode->i_ctime) > 0) {
  			dout("ctime %ld.%09ld -> %ld.%09ld inc w/ cap
  ",
  			     inode->i_ctime.tv_sec, inode->i_ctime.tv_nsec,
  			     ctime->tv_sec, ctime->tv_nsec);
  			inode->i_ctime = *ctime;
  		}
  		if (ceph_seq_cmp(time_warp_seq, ci->i_time_warp_seq) > 0) {
  			/* the MDS did a utimes() */
  			dout("mtime %ld.%09ld -> %ld.%09ld "
  			     "tw %d -> %d
  ",
  			     inode->i_mtime.tv_sec, inode->i_mtime.tv_nsec,
  			     mtime->tv_sec, mtime->tv_nsec,
  			     ci->i_time_warp_seq, (int)time_warp_seq);
  
  			inode->i_mtime = *mtime;
  			inode->i_atime = *atime;
  			ci->i_time_warp_seq = time_warp_seq;
  		} else if (time_warp_seq == ci->i_time_warp_seq) {
  			/* nobody did utimes(); take the max */
  			if (timespec_compare(mtime, &inode->i_mtime) > 0) {
  				dout("mtime %ld.%09ld -> %ld.%09ld inc
  ",
  				     inode->i_mtime.tv_sec,
  				     inode->i_mtime.tv_nsec,
  				     mtime->tv_sec, mtime->tv_nsec);
  				inode->i_mtime = *mtime;
  			}
  			if (timespec_compare(atime, &inode->i_atime) > 0) {
  				dout("atime %ld.%09ld -> %ld.%09ld inc
  ",
  				     inode->i_atime.tv_sec,
  				     inode->i_atime.tv_nsec,
  				     atime->tv_sec, atime->tv_nsec);
  				inode->i_atime = *atime;
  			}
  		} else if (issued & CEPH_CAP_FILE_EXCL) {
  			/* we did a utimes(); ignore mds values */
  		} else {
  			warn = 1;
  		}
  	} else {
d8672d64b   Sage Weil   ceph: fix update ...
690
  		/* we have no write|excl caps; whatever the MDS says is true */
355da1eb7   Sage Weil   ceph: inode opera...
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
  		if (ceph_seq_cmp(time_warp_seq, ci->i_time_warp_seq) >= 0) {
  			inode->i_ctime = *ctime;
  			inode->i_mtime = *mtime;
  			inode->i_atime = *atime;
  			ci->i_time_warp_seq = time_warp_seq;
  		} else {
  			warn = 1;
  		}
  	}
  	if (warn) /* time_warp_seq shouldn't go backwards */
  		dout("%p mds time_warp_seq %llu < %u
  ",
  		     inode, time_warp_seq, ci->i_time_warp_seq);
  }
  
  /*
   * Populate an inode based on info from mds.  May be called on new or
   * existing inodes.
   */
01deead04   Yan, Zheng   ceph: use getattr...
710
  static int fill_inode(struct inode *inode, struct page *locked_page,
355da1eb7   Sage Weil   ceph: inode opera...
711
712
713
714
715
716
  		      struct ceph_mds_reply_info_in *iinfo,
  		      struct ceph_mds_reply_dirfrag *dirinfo,
  		      struct ceph_mds_session *session,
  		      unsigned long ttl_from, int cap_fmode,
  		      struct ceph_cap_reservation *caps_reservation)
  {
d9df27835   Yan, Zheng   ceph: pre-allocat...
717
  	struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
355da1eb7   Sage Weil   ceph: inode opera...
718
719
  	struct ceph_mds_reply_inode *info = iinfo->in;
  	struct ceph_inode_info *ci = ceph_inode(inode);
f98a128a5   Yan, Zheng   ceph: update inod...
720
  	int issued = 0, implemented, new_issued;
355da1eb7   Sage Weil   ceph: inode opera...
721
  	struct timespec mtime, atime, ctime;
355da1eb7   Sage Weil   ceph: inode opera...
722
  	struct ceph_buffer *xattr_blob = NULL;
779fe0fb8   Yan, Zheng   ceph: rados pool ...
723
  	struct ceph_string *pool_ns = NULL;
d9df27835   Yan, Zheng   ceph: pre-allocat...
724
  	struct ceph_cap *new_cap = NULL;
355da1eb7   Sage Weil   ceph: inode opera...
725
  	int err = 0;
d9df27835   Yan, Zheng   ceph: pre-allocat...
726
  	bool wake = false;
f98a128a5   Yan, Zheng   ceph: update inod...
727
728
  	bool queue_trunc = false;
  	bool new_version = false;
31c542a19   Yan, Zheng   ceph: add inline ...
729
  	bool fill_inline = false;
355da1eb7   Sage Weil   ceph: inode opera...
730
731
732
733
734
  
  	dout("fill_inode %p ino %llx.%llx v %llu had %llu
  ",
  	     inode, ceph_vinop(inode), le64_to_cpu(info->version),
  	     ci->i_version);
d9df27835   Yan, Zheng   ceph: pre-allocat...
735
736
737
  	/* prealloc new cap struct */
  	if (info->cap.caps && ceph_snap(inode) == CEPH_NOSNAP)
  		new_cap = ceph_get_cap(mdsc, caps_reservation);
355da1eb7   Sage Weil   ceph: inode opera...
738
739
740
741
742
743
  	/*
  	 * prealloc xattr data, if it looks like we'll need it.  only
  	 * if len > 4 (meaning there are actually xattrs; the first 4
  	 * bytes are the xattr count).
  	 */
  	if (iinfo->xattr_len > 4) {
b6c1d5b81   Sage Weil   ceph: simplify ce...
744
  		xattr_blob = ceph_buffer_new(iinfo->xattr_len, GFP_NOFS);
355da1eb7   Sage Weil   ceph: inode opera...
745
746
747
748
749
  		if (!xattr_blob)
  			pr_err("fill_inode ENOMEM xattr blob %d bytes
  ",
  			       iinfo->xattr_len);
  	}
779fe0fb8   Yan, Zheng   ceph: rados pool ...
750
751
752
  	if (iinfo->pool_ns_len > 0)
  		pool_ns = ceph_find_or_create_string(iinfo->pool_ns_data,
  						     iinfo->pool_ns_len);
be655596b   Sage Weil   ceph: use i_ceph_...
753
  	spin_lock(&ci->i_ceph_lock);
355da1eb7   Sage Weil   ceph: inode opera...
754
755
756
  
  	/*
  	 * provided version will be odd if inode value is projected,
8bd59e018   Sage Weil   ceph: fix version...
757
758
759
760
761
762
763
764
  	 * even if stable.  skip the update if we have newer stable
  	 * info (ours>=theirs, e.g. due to racing mds replies), unless
  	 * we are getting projected (unstable) info (in which case the
  	 * version is odd, and we want ours>theirs).
  	 *   us   them
  	 *   2    2     skip
  	 *   3    2     skip
  	 *   3    3     update
355da1eb7   Sage Weil   ceph: inode opera...
765
  	 */
f98a128a5   Yan, Zheng   ceph: update inod...
766
767
768
769
  	if (ci->i_version == 0 ||
  	    ((info->cap.flags & CEPH_CAP_FLAG_AUTH) &&
  	     le64_to_cpu(info->version) > (ci->i_version & ~1)))
  		new_version = true;
355da1eb7   Sage Weil   ceph: inode opera...
770
771
  	issued = __ceph_caps_issued(ci, &implemented);
  	issued |= implemented | __ceph_caps_dirty(ci);
f98a128a5   Yan, Zheng   ceph: update inod...
772
  	new_issued = ~issued & le32_to_cpu(info->cap.caps);
355da1eb7   Sage Weil   ceph: inode opera...
773
774
775
776
777
  
  	/* update inode */
  	ci->i_version = le64_to_cpu(info->version);
  	inode->i_version++;
  	inode->i_rdev = le32_to_cpu(info->rdev);
f98a128a5   Yan, Zheng   ceph: update inod...
778
  	inode->i_blkbits = fls(le32_to_cpu(info->layout.fl_stripe_unit)) - 1;
355da1eb7   Sage Weil   ceph: inode opera...
779

f98a128a5   Yan, Zheng   ceph: update inod...
780
781
  	if ((new_version || (new_issued & CEPH_CAP_AUTH_SHARED)) &&
  	    (issued & CEPH_CAP_AUTH_EXCL) == 0) {
355da1eb7   Sage Weil   ceph: inode opera...
782
  		inode->i_mode = le32_to_cpu(info->mode);
ab871b903   Eric W. Biederman   ceph: Translate i...
783
784
  		inode->i_uid = make_kuid(&init_user_ns, le32_to_cpu(info->uid));
  		inode->i_gid = make_kgid(&init_user_ns, le32_to_cpu(info->gid));
355da1eb7   Sage Weil   ceph: inode opera...
785
786
  		dout("%p mode 0%o uid.gid %d.%d
  ", inode, inode->i_mode,
bd2bae6a6   Eric W. Biederman   ceph: Convert kui...
787
788
  		     from_kuid(&init_user_ns, inode->i_uid),
  		     from_kgid(&init_user_ns, inode->i_gid));
355da1eb7   Sage Weil   ceph: inode opera...
789
  	}
f98a128a5   Yan, Zheng   ceph: update inod...
790
791
  	if ((new_version || (new_issued & CEPH_CAP_LINK_SHARED)) &&
  	    (issued & CEPH_CAP_LINK_EXCL) == 0)
bfe868486   Miklos Szeredi   filesystems: add ...
792
  		set_nlink(inode, le32_to_cpu(info->nlink));
355da1eb7   Sage Weil   ceph: inode opera...
793

f98a128a5   Yan, Zheng   ceph: update inod...
794
795
796
797
798
799
800
801
802
803
804
805
  	if (new_version || (new_issued & CEPH_CAP_ANY_RD)) {
  		/* be careful with mtime, atime, size */
  		ceph_decode_timespec(&atime, &info->atime);
  		ceph_decode_timespec(&mtime, &info->mtime);
  		ceph_decode_timespec(&ctime, &info->ctime);
  		ceph_fill_file_time(inode, issued,
  				le32_to_cpu(info->time_warp_seq),
  				&ctime, &mtime, &atime);
  	}
  
  	if (new_version ||
  	    (new_issued & (CEPH_CAP_ANY_FILE_RD | CEPH_CAP_ANY_FILE_WR))) {
7627151ea   Yan, Zheng   libceph: define n...
806
  		s64 old_pool = ci->i_layout.pool_id;
779fe0fb8   Yan, Zheng   ceph: rados pool ...
807
  		struct ceph_string *old_ns;
7627151ea   Yan, Zheng   libceph: define n...
808
  		ceph_file_layout_from_legacy(&ci->i_layout, &info->layout);
779fe0fb8   Yan, Zheng   ceph: rados pool ...
809
810
811
812
813
  		old_ns = rcu_dereference_protected(ci->i_layout.pool_ns,
  					lockdep_is_held(&ci->i_ceph_lock));
  		rcu_assign_pointer(ci->i_layout.pool_ns, pool_ns);
  
  		if (ci->i_layout.pool_id != old_pool || pool_ns != old_ns)
10183a695   Yan, Zheng   ceph: check OSD c...
814
  			ci->i_ceph_flags &= ~CEPH_I_POOL_PERM;
10183a695   Yan, Zheng   ceph: check OSD c...
815

779fe0fb8   Yan, Zheng   ceph: rados pool ...
816
  		pool_ns = old_ns;
10183a695   Yan, Zheng   ceph: check OSD c...
817

f98a128a5   Yan, Zheng   ceph: update inod...
818
819
820
821
822
823
824
825
826
827
828
829
830
  		queue_trunc = ceph_fill_file_size(inode, issued,
  					le32_to_cpu(info->truncate_seq),
  					le64_to_cpu(info->truncate_size),
  					le64_to_cpu(info->size));
  		/* only update max_size on auth cap */
  		if ((info->cap.flags & CEPH_CAP_FLAG_AUTH) &&
  		    ci->i_max_size != le64_to_cpu(info->max_size)) {
  			dout("max_size %lld -> %llu
  ", ci->i_max_size,
  					le64_to_cpu(info->max_size));
  			ci->i_max_size = le64_to_cpu(info->max_size);
  		}
  	}
355da1eb7   Sage Weil   ceph: inode opera...
831
832
833
  
  	/* xattrs */
  	/* note that if i_xattrs.len <= 4, i_xattrs.data will still be NULL. */
508b32d86   Yan, Zheng   ceph: request xat...
834
  	if ((ci->i_xattrs.version == 0 || !(issued & CEPH_CAP_XATTR_EXCL))  &&
355da1eb7   Sage Weil   ceph: inode opera...
835
836
837
838
839
840
841
842
  	    le64_to_cpu(info->xattr_version) > ci->i_xattrs.version) {
  		if (ci->i_xattrs.blob)
  			ceph_buffer_put(ci->i_xattrs.blob);
  		ci->i_xattrs.blob = xattr_blob;
  		if (xattr_blob)
  			memcpy(ci->i_xattrs.blob->vec.iov_base,
  			       iinfo->xattr_data, iinfo->xattr_len);
  		ci->i_xattrs.version = le64_to_cpu(info->xattr_version);
7221fe4c2   Guangliang Zhao   ceph: add acl for...
843
  		ceph_forget_all_cached_acls(inode);
a6424e48c   Sage Weil   ceph: fix xattr d...
844
  		xattr_blob = NULL;
355da1eb7   Sage Weil   ceph: inode opera...
845
846
847
  	}
  
  	inode->i_mapping->a_ops = &ceph_aops;
355da1eb7   Sage Weil   ceph: inode opera...
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
  
  	switch (inode->i_mode & S_IFMT) {
  	case S_IFIFO:
  	case S_IFBLK:
  	case S_IFCHR:
  	case S_IFSOCK:
  		init_special_inode(inode, inode->i_mode, inode->i_rdev);
  		inode->i_op = &ceph_file_iops;
  		break;
  	case S_IFREG:
  		inode->i_op = &ceph_file_iops;
  		inode->i_fop = &ceph_file_fops;
  		break;
  	case S_IFLNK:
  		inode->i_op = &ceph_symlink_iops;
  		if (!ci->i_symlink) {
810339ec2   Xi Wang   ceph: avoid panic...
864
  			u32 symlen = iinfo->symlink_len;
355da1eb7   Sage Weil   ceph: inode opera...
865
  			char *sym;
be655596b   Sage Weil   ceph: use i_ceph_...
866
  			spin_unlock(&ci->i_ceph_lock);
355da1eb7   Sage Weil   ceph: inode opera...
867

224a7542b   Yan, Zheng   ceph: tolerate ba...
868
869
870
871
872
873
874
875
  			if (symlen != i_size_read(inode)) {
  				pr_err("fill_inode %llx.%llx BAD symlink "
  					"size %lld
  ", ceph_vinop(inode),
  					i_size_read(inode));
  				i_size_write(inode, symlen);
  				inode->i_blocks = calc_inode_blocks(symlen);
  			}
810339ec2   Xi Wang   ceph: avoid panic...
876

355da1eb7   Sage Weil   ceph: inode opera...
877
  			err = -ENOMEM;
810339ec2   Xi Wang   ceph: avoid panic...
878
  			sym = kstrndup(iinfo->symlink, symlen, GFP_NOFS);
355da1eb7   Sage Weil   ceph: inode opera...
879
880
  			if (!sym)
  				goto out;
355da1eb7   Sage Weil   ceph: inode opera...
881

be655596b   Sage Weil   ceph: use i_ceph_...
882
  			spin_lock(&ci->i_ceph_lock);
355da1eb7   Sage Weil   ceph: inode opera...
883
884
885
886
887
  			if (!ci->i_symlink)
  				ci->i_symlink = sym;
  			else
  				kfree(sym); /* lost a race */
  		}
ac194dccd   Al Viro   ceph: switch to s...
888
  		inode->i_link = ci->i_symlink;
355da1eb7   Sage Weil   ceph: inode opera...
889
890
891
892
  		break;
  	case S_IFDIR:
  		inode->i_op = &ceph_dir_iops;
  		inode->i_fop = &ceph_dir_fops;
14303d20f   Sage Weil   ceph: implement D...
893
  		ci->i_dir_layout = iinfo->dir_layout;
355da1eb7   Sage Weil   ceph: inode opera...
894
895
896
897
898
899
  		ci->i_files = le64_to_cpu(info->files);
  		ci->i_subdirs = le64_to_cpu(info->subdirs);
  		ci->i_rbytes = le64_to_cpu(info->rbytes);
  		ci->i_rfiles = le64_to_cpu(info->rfiles);
  		ci->i_rsubdirs = le64_to_cpu(info->rsubdirs);
  		ceph_decode_timespec(&ci->i_rctime, &info->rctime);
355da1eb7   Sage Weil   ceph: inode opera...
900
901
902
903
904
905
  		break;
  	default:
  		pr_err("fill_inode %llx.%llx BAD mode 0%o
  ",
  		       ceph_vinop(inode), inode->i_mode);
  	}
355da1eb7   Sage Weil   ceph: inode opera...
906
907
908
  	/* were we issued a capability? */
  	if (info->cap.caps) {
  		if (ceph_snap(inode) == CEPH_NOSNAP) {
2f92b3d0a   Yan, Zheng   ceph: properly ma...
909
  			unsigned caps = le32_to_cpu(info->cap.caps);
355da1eb7   Sage Weil   ceph: inode opera...
910
911
  			ceph_add_cap(inode, session,
  				     le64_to_cpu(info->cap.cap_id),
2f92b3d0a   Yan, Zheng   ceph: properly ma...
912
  				     cap_fmode, caps,
355da1eb7   Sage Weil   ceph: inode opera...
913
914
915
916
  				     le32_to_cpu(info->cap.wanted),
  				     le32_to_cpu(info->cap.seq),
  				     le32_to_cpu(info->cap.mseq),
  				     le64_to_cpu(info->cap.realm),
d9df27835   Yan, Zheng   ceph: pre-allocat...
917
  				     info->cap.flags, &new_cap);
2f92b3d0a   Yan, Zheng   ceph: properly ma...
918
919
920
921
922
923
924
925
926
  
  			/* set dir completion flag? */
  			if (S_ISDIR(inode->i_mode) &&
  			    ci->i_files == 0 && ci->i_subdirs == 0 &&
  			    (caps & CEPH_CAP_FILE_SHARED) &&
  			    (issued & CEPH_CAP_FILE_EXCL) == 0 &&
  			    !__ceph_dir_is_complete(ci)) {
  				dout(" marking %p complete (empty)
  ", inode);
fdd4e1583   Yan, Zheng   ceph: rework dcac...
927
  				i_size_write(inode, 0);
2f92b3d0a   Yan, Zheng   ceph: properly ma...
928
  				__ceph_dir_set_complete(ci,
fdd4e1583   Yan, Zheng   ceph: rework dcac...
929
930
  					atomic64_read(&ci->i_release_count),
  					atomic64_read(&ci->i_ordered_count));
2f92b3d0a   Yan, Zheng   ceph: properly ma...
931
  			}
d9df27835   Yan, Zheng   ceph: pre-allocat...
932
  			wake = true;
355da1eb7   Sage Weil   ceph: inode opera...
933
  		} else {
355da1eb7   Sage Weil   ceph: inode opera...
934
935
936
937
938
939
  			dout(" %p got snap_caps %s
  ", inode,
  			     ceph_cap_string(le32_to_cpu(info->cap.caps)));
  			ci->i_snap_caps |= le32_to_cpu(info->cap.caps);
  			if (cap_fmode >= 0)
  				__ceph_get_fmode(ci, cap_fmode);
355da1eb7   Sage Weil   ceph: inode opera...
940
  		}
04d000eb3   Sage Weil   ceph: fix open fi...
941
  	} else if (cap_fmode >= 0) {
f3ae1b97b   Fabian Frederick   fs/ceph: replace ...
942
943
  		pr_warn("mds issued no caps on %llx.%llx
  ",
04d000eb3   Sage Weil   ceph: fix open fi...
944
945
  			   ceph_vinop(inode));
  		__ceph_get_fmode(ci, cap_fmode);
355da1eb7   Sage Weil   ceph: inode opera...
946
  	}
31c542a19   Yan, Zheng   ceph: add inline ...
947
948
949
950
951
952
  
  	if (iinfo->inline_version > 0 &&
  	    iinfo->inline_version >= ci->i_inline_version) {
  		int cache_caps = CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO;
  		ci->i_inline_version = iinfo->inline_version;
  		if (ci->i_inline_version != CEPH_INLINE_NONE &&
01deead04   Yan, Zheng   ceph: use getattr...
953
954
  		    (locked_page ||
  		     (le32_to_cpu(info->cap.caps) & cache_caps)))
31c542a19   Yan, Zheng   ceph: add inline ...
955
956
  			fill_inline = true;
  	}
be655596b   Sage Weil   ceph: use i_ceph_...
957
  	spin_unlock(&ci->i_ceph_lock);
355da1eb7   Sage Weil   ceph: inode opera...
958

31c542a19   Yan, Zheng   ceph: add inline ...
959
  	if (fill_inline)
01deead04   Yan, Zheng   ceph: use getattr...
960
  		ceph_fill_inline_data(inode, locked_page,
31c542a19   Yan, Zheng   ceph: add inline ...
961
  				      iinfo->inline_data, iinfo->inline_len);
d9df27835   Yan, Zheng   ceph: pre-allocat...
962
963
  	if (wake)
  		wake_up_all(&ci->i_cap_wq);
355da1eb7   Sage Weil   ceph: inode opera...
964
965
  	/* queue truncate if we saw i_size decrease */
  	if (queue_trunc)
3c6f6b79a   Sage Weil   ceph: cleanup asy...
966
  		ceph_queue_vmtruncate(inode);
355da1eb7   Sage Weil   ceph: inode opera...
967
968
  
  	/* populate frag tree */
3e7fbe9ce   Yan, Zheng   ceph: introduce c...
969
970
  	if (S_ISDIR(inode->i_mode))
  		ceph_fill_fragtree(inode, &info->fragtree, dirinfo);
355da1eb7   Sage Weil   ceph: inode opera...
971
972
973
974
975
976
  
  	/* update delegation info? */
  	if (dirinfo)
  		ceph_fill_dirfrag(inode, dirinfo);
  
  	err = 0;
355da1eb7   Sage Weil   ceph: inode opera...
977
  out:
d9df27835   Yan, Zheng   ceph: pre-allocat...
978
979
  	if (new_cap)
  		ceph_put_cap(mdsc, new_cap);
b6c1d5b81   Sage Weil   ceph: simplify ce...
980
981
  	if (xattr_blob)
  		ceph_buffer_put(xattr_blob);
779fe0fb8   Yan, Zheng   ceph: rados pool ...
982
  	ceph_put_string(pool_ns);
355da1eb7   Sage Weil   ceph: inode opera...
983
984
985
986
987
988
989
990
991
  	return err;
  }
  
  /*
   * caller should hold session s_mutex.
   */
  static void update_dentry_lease(struct dentry *dentry,
  				struct ceph_mds_reply_lease *lease,
  				struct ceph_mds_session *session,
f5d55f039   Jeff Layton   ceph: vet the tar...
992
993
994
  				unsigned long from_time,
  				struct ceph_vino *tgt_vino,
  				struct ceph_vino *dir_vino)
355da1eb7   Sage Weil   ceph: inode opera...
995
996
997
998
999
1000
  {
  	struct ceph_dentry_info *di = ceph_dentry(dentry);
  	long unsigned duration = le32_to_cpu(lease->duration_ms);
  	long unsigned ttl = from_time + (duration * HZ) / 1000;
  	long unsigned half_ttl = from_time + (duration * HZ / 2) / 1000;
  	struct inode *dir;
481f001ff   Yan, Zheng   ceph: update ceph...
1001
  	struct ceph_mds_session *old_lease_session = NULL;
355da1eb7   Sage Weil   ceph: inode opera...
1002

f5d55f039   Jeff Layton   ceph: vet the tar...
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
  	/*
  	 * Make sure dentry's inode matches tgt_vino. NULL tgt_vino means that
  	 * we expect a negative dentry.
  	 */
  	if (!tgt_vino && d_really_is_positive(dentry))
  		return;
  
  	if (tgt_vino && (d_really_is_negative(dentry) ||
  			!ceph_ino_compare(d_inode(dentry), tgt_vino)))
  		return;
355da1eb7   Sage Weil   ceph: inode opera...
1013
  	spin_lock(&dentry->d_lock);
2f90b852e   Sage Weil   ceph: ignore leas...
1014
1015
1016
  	dout("update_dentry_lease %p duration %lu ms ttl %lu
  ",
  	     dentry, duration, ttl);
355da1eb7   Sage Weil   ceph: inode opera...
1017

2b0143b5c   David Howells   VFS: normal files...
1018
  	dir = d_inode(dentry->d_parent);
18fc8abdb   Al Viro   ceph: unify dentr...
1019

f5d55f039   Jeff Layton   ceph: vet the tar...
1020
1021
1022
  	/* make sure parent matches dir_vino */
  	if (!ceph_ino_compare(dir, dir_vino))
  		goto out_unlock;
18fc8abdb   Al Viro   ceph: unify dentr...
1023
1024
1025
  	/* only track leases on regular dentries */
  	if (ceph_snap(dir) != CEPH_NOSNAP)
  		goto out_unlock;
355da1eb7   Sage Weil   ceph: inode opera...
1026
  	di->lease_shared_gen = ceph_inode(dir)->i_shared_gen;
2f90b852e   Sage Weil   ceph: ignore leas...
1027
  	if (duration == 0)
355da1eb7   Sage Weil   ceph: inode opera...
1028
1029
1030
  		goto out_unlock;
  
  	if (di->lease_gen == session->s_cap_gen &&
9b16f03c4   Miklos Szeredi   ceph: don't use -...
1031
  	    time_before(ttl, di->time))
355da1eb7   Sage Weil   ceph: inode opera...
1032
  		goto out_unlock;  /* we already have a newer lease. */
481f001ff   Yan, Zheng   ceph: update ceph...
1033
1034
1035
1036
  	if (di->lease_session && di->lease_session != session) {
  		old_lease_session = di->lease_session;
  		di->lease_session = NULL;
  	}
355da1eb7   Sage Weil   ceph: inode opera...
1037
1038
1039
1040
1041
1042
1043
1044
1045
  
  	ceph_dentry_lru_touch(dentry);
  
  	if (!di->lease_session)
  		di->lease_session = ceph_get_mds_session(session);
  	di->lease_gen = session->s_cap_gen;
  	di->lease_seq = le32_to_cpu(lease->seq);
  	di->lease_renew_after = half_ttl;
  	di->lease_renew_from = 0;
9b16f03c4   Miklos Szeredi   ceph: don't use -...
1046
  	di->time = ttl;
355da1eb7   Sage Weil   ceph: inode opera...
1047
1048
  out_unlock:
  	spin_unlock(&dentry->d_lock);
481f001ff   Yan, Zheng   ceph: update ceph...
1049
1050
  	if (old_lease_session)
  		ceph_put_mds_session(old_lease_session);
355da1eb7   Sage Weil   ceph: inode opera...
1051
1052
1053
1054
1055
  }
  
  /*
   * splice a dentry to an inode.
   * caller must hold directory i_mutex for this to be safe.
355da1eb7   Sage Weil   ceph: inode opera...
1056
   */
f7380af04   Al Viro   ceph: don't bothe...
1057
  static struct dentry *splice_dentry(struct dentry *dn, struct inode *in)
355da1eb7   Sage Weil   ceph: inode opera...
1058
1059
  {
  	struct dentry *realdn;
2b0143b5c   David Howells   VFS: normal files...
1060
  	BUG_ON(d_inode(dn));
1cd3935be   Sage Weil   ceph: set dn offs...
1061

355da1eb7   Sage Weil   ceph: inode opera...
1062
1063
1064
  	/* dn must be unhashed */
  	if (!d_unhashed(dn))
  		d_drop(dn);
41d28bca2   Al Viro   switch d_material...
1065
  	realdn = d_splice_alias(in, dn);
355da1eb7   Sage Weil   ceph: inode opera...
1066
  	if (IS_ERR(realdn)) {
d69ed05a8   Sage Weil   ceph: handle spli...
1067
1068
1069
  		pr_err("splice_dentry error %ld %p inode %p ino %llx.%llx
  ",
  		       PTR_ERR(realdn), dn, in, ceph_vinop(in));
32ffde79e   Yan, Zheng   Revert "ceph: fix...
1070
1071
1072
1073
1074
1075
  		dn = realdn;
  		/*
  		 * Caller should release 'dn' in the case of error.
  		 * If 'req->r_dentry' is passed to this function,
  		 * caller should leave 'req->r_dentry' untouched.
  		 */
355da1eb7   Sage Weil   ceph: inode opera...
1076
1077
1078
1079
1080
  		goto out;
  	} else if (realdn) {
  		dout("dn %p (%d) spliced with %p (%d) "
  		     "inode %p ino %llx.%llx
  ",
84d08fa88   Al Viro   helper for readin...
1081
1082
  		     dn, d_count(dn),
  		     realdn, d_count(realdn),
2b0143b5c   David Howells   VFS: normal files...
1083
  		     d_inode(realdn), ceph_vinop(d_inode(realdn)));
355da1eb7   Sage Weil   ceph: inode opera...
1084
1085
1086
1087
  		dput(dn);
  		dn = realdn;
  	} else {
  		BUG_ON(!ceph_dentry(dn));
355da1eb7   Sage Weil   ceph: inode opera...
1088
1089
  		dout("dn %p attached to %p ino %llx.%llx
  ",
2b0143b5c   David Howells   VFS: normal files...
1090
  		     dn, d_inode(dn), ceph_vinop(d_inode(dn)));
355da1eb7   Sage Weil   ceph: inode opera...
1091
  	}
355da1eb7   Sage Weil   ceph: inode opera...
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
  out:
  	return dn;
  }
  
  /*
   * Incorporate results into the local cache.  This is either just
   * one inode, or a directory, dentry, and possibly linked-to inode (e.g.,
   * after a lookup).
   *
   * A reply may contain
   *         a directory inode along with a dentry.
   *  and/or a target inode
   *
   * Called with snap_rwsem (read).
   */
f5a03b080   Jeff Layton   ceph: drop sessio...
1107
  int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req)
355da1eb7   Sage Weil   ceph: inode opera...
1108
  {
f5a03b080   Jeff Layton   ceph: drop sessio...
1109
  	struct ceph_mds_session *session = req->r_session;
355da1eb7   Sage Weil   ceph: inode opera...
1110
1111
  	struct ceph_mds_reply_info_parsed *rinfo = &req->r_reply_info;
  	struct inode *in = NULL;
f5d55f039   Jeff Layton   ceph: vet the tar...
1112
  	struct ceph_vino tvino, dvino;
3d14c5d2b   Yehuda Sadeh   ceph: factor out ...
1113
  	struct ceph_fs_client *fsc = ceph_sb_to_client(sb);
355da1eb7   Sage Weil   ceph: inode opera...
1114
1115
1116
1117
1118
  	int err = 0;
  
  	dout("fill_trace %p is_dentry %d is_target %d
  ", req,
  	     rinfo->head->is_dentry, rinfo->head->is_target);
355da1eb7   Sage Weil   ceph: inode opera...
1119
1120
1121
  	if (!rinfo->head->is_target && !rinfo->head->is_dentry) {
  		dout("fill_trace reply is empty!
  ");
3dd69aabc   Jeff Layton   ceph: add a new f...
1122
  		if (rinfo->head->result == 0 && req->r_parent)
167c9e352   Sage Weil   ceph: use common ...
1123
  			ceph_invalidate_dir_request(req);
355da1eb7   Sage Weil   ceph: inode opera...
1124
1125
1126
1127
  		return 0;
  	}
  
  	if (rinfo->head->is_dentry) {
3dd69aabc   Jeff Layton   ceph: add a new f...
1128
  		struct inode *dir = req->r_parent;
5b1daecd5   Sage Weil   ceph: properly ha...
1129

6c5e50fa6   Sage Weil   ceph: tolerate (a...
1130
  		if (dir) {
01deead04   Yan, Zheng   ceph: use getattr...
1131
1132
  			err = fill_inode(dir, NULL,
  					 &rinfo->diri, rinfo->dirfrag,
6c5e50fa6   Sage Weil   ceph: tolerate (a...
1133
1134
1135
  					 session, req->r_request_started, -1,
  					 &req->r_caps_reservation);
  			if (err < 0)
19913b4ea   Yan, Zheng   ceph: add get_nam...
1136
  				goto done;
6c5e50fa6   Sage Weil   ceph: tolerate (a...
1137
1138
1139
  		} else {
  			WARN_ON_ONCE(1);
  		}
19913b4ea   Yan, Zheng   ceph: add get_nam...
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
  
  		if (dir && req->r_op == CEPH_MDS_OP_LOOKUPNAME) {
  			struct qstr dname;
  			struct dentry *dn, *parent;
  
  			BUG_ON(!rinfo->head->is_target);
  			BUG_ON(req->r_dentry);
  
  			parent = d_find_any_alias(dir);
  			BUG_ON(!parent);
  
  			dname.name = rinfo->dname;
  			dname.len = rinfo->dname_len;
8387ff257   Linus Torvalds   vfs: make the str...
1153
  			dname.hash = full_name_hash(parent, dname.name, dname.len);
f5d55f039   Jeff Layton   ceph: vet the tar...
1154
1155
  			tvino.ino = le64_to_cpu(rinfo->targeti.in->ino);
  			tvino.snap = le64_to_cpu(rinfo->targeti.in->snapid);
19913b4ea   Yan, Zheng   ceph: add get_nam...
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
  retry_lookup:
  			dn = d_lookup(parent, &dname);
  			dout("d_lookup on parent=%p name=%.*s got %p
  ",
  			     parent, dname.len, dname.name, dn);
  
  			if (!dn) {
  				dn = d_alloc(parent, &dname);
  				dout("d_alloc %p '%.*s' = %p
  ", parent,
  				     dname.len, dname.name, dn);
d37b1d994   Markus Elfring   ceph: adjust 36 c...
1167
  				if (!dn) {
19913b4ea   Yan, Zheng   ceph: add get_nam...
1168
1169
1170
1171
  					dput(parent);
  					err = -ENOMEM;
  					goto done;
  				}
ad5cb123f   Al Viro   ceph: switch to u...
1172
  				err = 0;
2b0143b5c   David Howells   VFS: normal files...
1173
  			} else if (d_really_is_positive(dn) &&
f5d55f039   Jeff Layton   ceph: vet the tar...
1174
1175
  				   (ceph_ino(d_inode(dn)) != tvino.ino ||
  				    ceph_snap(d_inode(dn)) != tvino.snap)) {
19913b4ea   Yan, Zheng   ceph: add get_nam...
1176
1177
  				dout(" dn %p points to wrong inode %p
  ",
2b0143b5c   David Howells   VFS: normal files...
1178
  				     dn, d_inode(dn));
19913b4ea   Yan, Zheng   ceph: add get_nam...
1179
1180
1181
1182
1183
1184
1185
1186
  				d_delete(dn);
  				dput(dn);
  				goto retry_lookup;
  			}
  
  			req->r_dentry = dn;
  			dput(parent);
  		}
5b1daecd5   Sage Weil   ceph: properly ha...
1187
  	}
86b58d131   Yan, Zheng   ceph: initialize ...
1188
  	if (rinfo->head->is_target) {
f5d55f039   Jeff Layton   ceph: vet the tar...
1189
1190
  		tvino.ino = le64_to_cpu(rinfo->targeti.in->ino);
  		tvino.snap = le64_to_cpu(rinfo->targeti.in->snapid);
86b58d131   Yan, Zheng   ceph: initialize ...
1191

f5d55f039   Jeff Layton   ceph: vet the tar...
1192
  		in = ceph_get_inode(sb, tvino);
86b58d131   Yan, Zheng   ceph: initialize ...
1193
1194
1195
1196
1197
  		if (IS_ERR(in)) {
  			err = PTR_ERR(in);
  			goto done;
  		}
  		req->r_target_inode = in;
01deead04   Yan, Zheng   ceph: use getattr...
1198
  		err = fill_inode(in, req->r_locked_page, &rinfo->targeti, NULL,
86b58d131   Yan, Zheng   ceph: initialize ...
1199
  				session, req->r_request_started,
bc2de10dc   Jeff Layton   ceph: convert boo...
1200
1201
  				(!test_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags) &&
  				rinfo->head->result == 0) ?  req->r_fmode : -1,
86b58d131   Yan, Zheng   ceph: initialize ...
1202
1203
1204
1205
1206
1207
1208
1209
  				&req->r_caps_reservation);
  		if (err < 0) {
  			pr_err("fill_inode badness %p %llx.%llx
  ",
  				in, ceph_vinop(in));
  			goto done;
  		}
  	}
9358c6d4c   Sage Weil   ceph: fix dentry ...
1210
1211
1212
1213
  	/*
  	 * ignore null lease/binding on snapdir ENOENT, or else we
  	 * will have trouble splicing in the virtual snapdir later
  	 */
3dd69aabc   Jeff Layton   ceph: add a new f...
1214
1215
1216
  	if (rinfo->head->is_dentry &&
              !test_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags) &&
  	    test_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags) &&
9358c6d4c   Sage Weil   ceph: fix dentry ...
1217
  	    (rinfo->head->is_target || strncmp(req->r_dentry->d_name.name,
3d14c5d2b   Yehuda Sadeh   ceph: factor out ...
1218
  					       fsc->mount_options->snapdir_name,
9358c6d4c   Sage Weil   ceph: fix dentry ...
1219
  					       req->r_dentry->d_name.len))) {
355da1eb7   Sage Weil   ceph: inode opera...
1220
1221
1222
1223
1224
  		/*
  		 * lookup link rename   : null -> possibly existing inode
  		 * mknod symlink mkdir  : null -> new inode
  		 * unlink               : linked -> null
  		 */
3dd69aabc   Jeff Layton   ceph: add a new f...
1225
  		struct inode *dir = req->r_parent;
355da1eb7   Sage Weil   ceph: inode opera...
1226
1227
1228
1229
1230
  		struct dentry *dn = req->r_dentry;
  		bool have_dir_cap, have_lease;
  
  		BUG_ON(!dn);
  		BUG_ON(!dir);
2b0143b5c   David Howells   VFS: normal files...
1231
  		BUG_ON(d_inode(dn->d_parent) != dir);
f5d55f039   Jeff Layton   ceph: vet the tar...
1232
1233
1234
1235
1236
1237
  
  		dvino.ino = le64_to_cpu(rinfo->diri.in->ino);
  		dvino.snap = le64_to_cpu(rinfo->diri.in->snapid);
  
  		BUG_ON(ceph_ino(dir) != dvino.ino);
  		BUG_ON(ceph_snap(dir) != dvino.snap);
355da1eb7   Sage Weil   ceph: inode opera...
1238

355da1eb7   Sage Weil   ceph: inode opera...
1239
1240
1241
1242
1243
1244
1245
  		/* do we have a lease on the whole dir? */
  		have_dir_cap =
  			(le32_to_cpu(rinfo->diri.in->cap.caps) &
  			 CEPH_CAP_FILE_SHARED);
  
  		/* do we have a dn lease? */
  		have_lease = have_dir_cap ||
2f90b852e   Sage Weil   ceph: ignore leas...
1246
  			le32_to_cpu(rinfo->dlease->duration_ms);
355da1eb7   Sage Weil   ceph: inode opera...
1247
1248
1249
1250
1251
1252
  		if (!have_lease)
  			dout("fill_trace  no dentry lease or dir cap
  ");
  
  		/* rename? */
  		if (req->r_old_dentry && req->r_op == CEPH_MDS_OP_RENAME) {
0a8a70f96   Yan, Zheng   ceph: clear direc...
1253
1254
  			struct inode *olddir = req->r_old_dentry_dir;
  			BUG_ON(!olddir);
a455589f1   Al Viro   assorted conversi...
1255
1256
  			dout(" src %p '%pd' dst %p '%pd'
  ",
355da1eb7   Sage Weil   ceph: inode opera...
1257
  			     req->r_old_dentry,
a455589f1   Al Viro   assorted conversi...
1258
1259
  			     req->r_old_dentry,
  			     dn, dn);
355da1eb7   Sage Weil   ceph: inode opera...
1260
1261
1262
  			dout("fill_trace doing d_move %p -> %p
  ",
  			     req->r_old_dentry, dn);
c10f5e12b   Sage Weil   ceph: clear dir c...
1263

fdd4e1583   Yan, Zheng   ceph: rework dcac...
1264
1265
1266
  			/* d_move screws up sibling dentries' offsets */
  			ceph_dir_clear_ordered(dir);
  			ceph_dir_clear_ordered(olddir);
355da1eb7   Sage Weil   ceph: inode opera...
1267
  			d_move(req->r_old_dentry, dn);
a455589f1   Al Viro   assorted conversi...
1268
1269
1270
  			dout(" src %p '%pd' dst %p '%pd'
  ",
  			     req->r_old_dentry,
355da1eb7   Sage Weil   ceph: inode opera...
1271
  			     req->r_old_dentry,
a455589f1   Al Viro   assorted conversi...
1272
  			     dn, dn);
81a6cf2d3   Sage Weil   ceph: invalidate ...
1273

c4a29f26d   Sage Weil   ceph: ensure rena...
1274
1275
  			/* ensure target dentry is invalidated, despite
  			   rehashing bug in vfs_rename_dir */
81a6cf2d3   Sage Weil   ceph: invalidate ...
1276
  			ceph_invalidate_dentry_lease(dn);
99ccbd229   Milosz Tanski   ceph: use fscache...
1277
1278
  			dout("dn %p gets new offset %lld
  ", req->r_old_dentry,
1cd3935be   Sage Weil   ceph: set dn offs...
1279
  			     ceph_dentry(req->r_old_dentry)->offset);
81a6cf2d3   Sage Weil   ceph: invalidate ...
1280

355da1eb7   Sage Weil   ceph: inode opera...
1281
  			dn = req->r_old_dentry;  /* use old_dentry */
355da1eb7   Sage Weil   ceph: inode opera...
1282
1283
1284
1285
1286
1287
  		}
  
  		/* null dentry? */
  		if (!rinfo->head->is_target) {
  			dout("fill_trace null dentry
  ");
2b0143b5c   David Howells   VFS: normal files...
1288
  			if (d_really_is_positive(dn)) {
70db4f362   Yan, Zheng   ceph: introduce a...
1289
  				ceph_dir_clear_ordered(dir);
355da1eb7   Sage Weil   ceph: inode opera...
1290
1291
1292
  				dout("d_delete %p
  ", dn);
  				d_delete(dn);
80d025ffe   Jeff Layton   ceph: don't updat...
1293
1294
  			} else if (have_lease) {
  				if (d_unhashed(dn))
f8b31710e   Al Viro   ceph_fill_trace()...
1295
  					d_add(dn, NULL);
355da1eb7   Sage Weil   ceph: inode opera...
1296
1297
  				update_dentry_lease(dn, rinfo->dlease,
  						    session,
f5d55f039   Jeff Layton   ceph: vet the tar...
1298
1299
  						    req->r_request_started,
  						    NULL, &dvino);
355da1eb7   Sage Weil   ceph: inode opera...
1300
1301
1302
1303
1304
  			}
  			goto done;
  		}
  
  		/* attach proper inode */
2b0143b5c   David Howells   VFS: normal files...
1305
  		if (d_really_is_negative(dn)) {
70db4f362   Yan, Zheng   ceph: introduce a...
1306
  			ceph_dir_clear_ordered(dir);
86b58d131   Yan, Zheng   ceph: initialize ...
1307
  			ihold(in);
f7380af04   Al Viro   ceph: don't bothe...
1308
  			dn = splice_dentry(dn, in);
355da1eb7   Sage Weil   ceph: inode opera...
1309
1310
1311
1312
1313
  			if (IS_ERR(dn)) {
  				err = PTR_ERR(dn);
  				goto done;
  			}
  			req->r_dentry = dn;  /* may have spliced */
2b0143b5c   David Howells   VFS: normal files...
1314
  		} else if (d_really_is_positive(dn) && d_inode(dn) != in) {
355da1eb7   Sage Weil   ceph: inode opera...
1315
1316
  			dout(" %p links to %p %llx.%llx, not %llx.%llx
  ",
2b0143b5c   David Howells   VFS: normal files...
1317
  			     dn, d_inode(dn), ceph_vinop(d_inode(dn)),
86b58d131   Yan, Zheng   ceph: initialize ...
1318
  			     ceph_vinop(in));
200fd27c8   Yan, Zheng   ceph: use lookup ...
1319
  			d_invalidate(dn);
355da1eb7   Sage Weil   ceph: inode opera...
1320
  			have_lease = false;
355da1eb7   Sage Weil   ceph: inode opera...
1321
  		}
f5d55f039   Jeff Layton   ceph: vet the tar...
1322
1323
1324
  		if (have_lease) {
  			tvino.ino = le64_to_cpu(rinfo->targeti.in->ino);
  			tvino.snap = le64_to_cpu(rinfo->targeti.in->snapid);
355da1eb7   Sage Weil   ceph: inode opera...
1325
  			update_dentry_lease(dn, rinfo->dlease, session,
f5d55f039   Jeff Layton   ceph: vet the tar...
1326
1327
1328
  					    req->r_request_started,
  					    &tvino, &dvino);
  		}
355da1eb7   Sage Weil   ceph: inode opera...
1329
1330
  		dout(" final dn %p
  ", dn);
bc2de10dc   Jeff Layton   ceph: convert boo...
1331
1332
1333
  	} else if ((req->r_op == CEPH_MDS_OP_LOOKUPSNAP ||
  		    req->r_op == CEPH_MDS_OP_MKSNAP) &&
  		   !test_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags)) {
355da1eb7   Sage Weil   ceph: inode opera...
1334
  		struct dentry *dn = req->r_dentry;
3dd69aabc   Jeff Layton   ceph: add a new f...
1335
  		struct inode *dir = req->r_parent;
355da1eb7   Sage Weil   ceph: inode opera...
1336
1337
1338
  
  		/* fill out a snapdir LOOKUPSNAP dentry */
  		BUG_ON(!dn);
0a8a70f96   Yan, Zheng   ceph: clear direc...
1339
1340
  		BUG_ON(!dir);
  		BUG_ON(ceph_snap(dir) != CEPH_SNAPDIR);
355da1eb7   Sage Weil   ceph: inode opera...
1341
1342
  		dout(" linking snapped dir %p to dn %p
  ", in, dn);
70db4f362   Yan, Zheng   ceph: introduce a...
1343
  		ceph_dir_clear_ordered(dir);
86b58d131   Yan, Zheng   ceph: initialize ...
1344
  		ihold(in);
f7380af04   Al Viro   ceph: don't bothe...
1345
  		dn = splice_dentry(dn, in);
355da1eb7   Sage Weil   ceph: inode opera...
1346
1347
1348
1349
1350
  		if (IS_ERR(dn)) {
  			err = PTR_ERR(dn);
  			goto done;
  		}
  		req->r_dentry = dn;  /* may have spliced */
cdde7c435   Jeff Layton   ceph: call update...
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
  	} else if (rinfo->head->is_dentry) {
  		struct ceph_vino *ptvino = NULL;
  
  		if ((le32_to_cpu(rinfo->diri.in->cap.caps) & CEPH_CAP_FILE_SHARED) ||
  		    le32_to_cpu(rinfo->dlease->duration_ms)) {
  			dvino.ino = le64_to_cpu(rinfo->diri.in->ino);
  			dvino.snap = le64_to_cpu(rinfo->diri.in->snapid);
  
  			if (rinfo->head->is_target) {
  				tvino.ino = le64_to_cpu(rinfo->targeti.in->ino);
  				tvino.snap = le64_to_cpu(rinfo->targeti.in->snapid);
  				ptvino = &tvino;
  			}
  
  			update_dentry_lease(req->r_dentry, rinfo->dlease,
  				session, req->r_request_started, ptvino,
  				&dvino);
  		} else {
  			dout("%s: no dentry lease or dir cap
  ", __func__);
  		}
355da1eb7   Sage Weil   ceph: inode opera...
1372
  	}
355da1eb7   Sage Weil   ceph: inode opera...
1373
1374
1375
1376
1377
1378
1379
1380
1381
  done:
  	dout("fill_trace done err=%d
  ", err);
  	return err;
  }
  
  /*
   * Prepopulate our cache with readdir results, leases, etc.
   */
79f9f99ad   Sage Weil   ceph: prepopulate...
1382
1383
1384
1385
1386
1387
1388
  static int readdir_prepopulate_inodes_only(struct ceph_mds_request *req,
  					   struct ceph_mds_session *session)
  {
  	struct ceph_mds_reply_info_parsed *rinfo = &req->r_reply_info;
  	int i, err = 0;
  
  	for (i = 0; i < rinfo->dir_nr; i++) {
2a5beea3f   Yan, Zheng   ceph: define stru...
1389
  		struct ceph_mds_reply_dir_entry *rde = rinfo->dir_entries + i;
79f9f99ad   Sage Weil   ceph: prepopulate...
1390
1391
1392
  		struct ceph_vino vino;
  		struct inode *in;
  		int rc;
2a5beea3f   Yan, Zheng   ceph: define stru...
1393
1394
  		vino.ino = le64_to_cpu(rde->inode.in->ino);
  		vino.snap = le64_to_cpu(rde->inode.in->snapid);
79f9f99ad   Sage Weil   ceph: prepopulate...
1395
1396
1397
1398
1399
1400
1401
1402
  
  		in = ceph_get_inode(req->r_dentry->d_sb, vino);
  		if (IS_ERR(in)) {
  			err = PTR_ERR(in);
  			dout("new_inode badness got %d
  ", err);
  			continue;
  		}
2a5beea3f   Yan, Zheng   ceph: define stru...
1403
  		rc = fill_inode(in, NULL, &rde->inode, NULL, session,
79f9f99ad   Sage Weil   ceph: prepopulate...
1404
1405
1406
1407
1408
1409
  				req->r_request_started, -1,
  				&req->r_caps_reservation);
  		if (rc < 0) {
  			pr_err("fill_inode badness on %p got %d
  ", in, rc);
  			err = rc;
79f9f99ad   Sage Weil   ceph: prepopulate...
1410
  		}
209ae762a   Yan, Zheng   ceph: fix inode r...
1411
  		iput(in);
79f9f99ad   Sage Weil   ceph: prepopulate...
1412
1413
1414
1415
  	}
  
  	return err;
  }
fdd4e1583   Yan, Zheng   ceph: rework dcac...
1416
1417
1418
1419
  void ceph_readdir_cache_release(struct ceph_readdir_cache_control *ctl)
  {
  	if (ctl->page) {
  		kunmap(ctl->page);
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
1420
  		put_page(ctl->page);
fdd4e1583   Yan, Zheng   ceph: rework dcac...
1421
1422
1423
1424
1425
1426
1427
1428
1429
  		ctl->page = NULL;
  	}
  }
  
  static int fill_readdir_cache(struct inode *dir, struct dentry *dn,
  			      struct ceph_readdir_cache_control *ctl,
  			      struct ceph_mds_request *req)
  {
  	struct ceph_inode_info *ci = ceph_inode(dir);
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
1430
  	unsigned nsize = PAGE_SIZE / sizeof(struct dentry*);
fdd4e1583   Yan, Zheng   ceph: rework dcac...
1431
1432
1433
1434
1435
  	unsigned idx = ctl->index % nsize;
  	pgoff_t pgoff = ctl->index / nsize;
  
  	if (!ctl->page || pgoff != page_index(ctl->page)) {
  		ceph_readdir_cache_release(ctl);
af5e5eb57   Yan, Zheng   ceph: fix race du...
1436
1437
1438
1439
  		if (idx == 0)
  			ctl->page = grab_cache_page(&dir->i_data, pgoff);
  		else
  			ctl->page = find_lock_page(&dir->i_data, pgoff);
fdd4e1583   Yan, Zheng   ceph: rework dcac...
1440
1441
  		if (!ctl->page) {
  			ctl->index = -1;
af5e5eb57   Yan, Zheng   ceph: fix race du...
1442
  			return idx == 0 ? -ENOMEM : 0;
fdd4e1583   Yan, Zheng   ceph: rework dcac...
1443
1444
1445
1446
1447
  		}
  		/* reading/filling the cache are serialized by
  		 * i_mutex, no need to use page lock */
  		unlock_page(ctl->page);
  		ctl->dentries = kmap(ctl->page);
af5e5eb57   Yan, Zheng   ceph: fix race du...
1448
  		if (idx == 0)
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
1449
  			memset(ctl->dentries, 0, PAGE_SIZE);
fdd4e1583   Yan, Zheng   ceph: rework dcac...
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
  	}
  
  	if (req->r_dir_release_cnt == atomic64_read(&ci->i_release_count) &&
  	    req->r_dir_ordered_cnt == atomic64_read(&ci->i_ordered_count)) {
  		dout("readdir cache dn %p idx %d
  ", dn, ctl->index);
  		ctl->dentries[idx] = dn;
  		ctl->index++;
  	} else {
  		dout("disable readdir cache
  ");
  		ctl->index = -1;
  	}
  	return 0;
  }
355da1eb7   Sage Weil   ceph: inode opera...
1465
1466
1467
1468
  int ceph_readdir_prepopulate(struct ceph_mds_request *req,
  			     struct ceph_mds_session *session)
  {
  	struct dentry *parent = req->r_dentry;
f3c4ebe65   Yan, Zheng   ceph: using hash ...
1469
  	struct ceph_inode_info *ci = ceph_inode(d_inode(parent));
355da1eb7   Sage Weil   ceph: inode opera...
1470
1471
1472
1473
  	struct ceph_mds_reply_info_parsed *rinfo = &req->r_reply_info;
  	struct qstr dname;
  	struct dentry *dn;
  	struct inode *in;
315f24088   Yan, Zheng   ceph: fix securit...
1474
  	int err = 0, skipped = 0, ret, i;
355da1eb7   Sage Weil   ceph: inode opera...
1475
  	struct ceph_mds_request_head *rhead = req->r_request->front.iov_base;
81c6aea52   Yan, Zheng   ceph: handle frag...
1476
  	u32 frag = le32_to_cpu(rhead->args.readdir.frag);
f3c4ebe65   Yan, Zheng   ceph: using hash ...
1477
1478
  	u32 last_hash = 0;
  	u32 fpos_offset;
fdd4e1583   Yan, Zheng   ceph: rework dcac...
1479
  	struct ceph_readdir_cache_control cache_ctl = {};
bc2de10dc   Jeff Layton   ceph: convert boo...
1480
  	if (test_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags))
fdd4e1583   Yan, Zheng   ceph: rework dcac...
1481
  		return readdir_prepopulate_inodes_only(req, session);
81c6aea52   Yan, Zheng   ceph: handle frag...
1482

79162547b   Yan, Zheng   ceph: make seeky ...
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
  	if (rinfo->hash_order) {
  		if (req->r_path2) {
  			last_hash = ceph_str_hash(ci->i_dir_layout.dl_dir_hash,
  						  req->r_path2,
  						  strlen(req->r_path2));
  			last_hash = ceph_frag_value(last_hash);
  		} else if (rinfo->offset_hash) {
  			/* mds understands offset_hash */
  			WARN_ON_ONCE(req->r_readdir_offset != 2);
  			last_hash = le32_to_cpu(rhead->args.readdir.offset_hash);
  		}
f3c4ebe65   Yan, Zheng   ceph: using hash ...
1494
  	}
81c6aea52   Yan, Zheng   ceph: handle frag...
1495
1496
1497
1498
1499
1500
  	if (rinfo->dir_dir &&
  	    le32_to_cpu(rinfo->dir_dir->frag) != frag) {
  		dout("readdir_prepopulate got new frag %x -> %x
  ",
  		     frag, le32_to_cpu(rinfo->dir_dir->frag));
  		frag = le32_to_cpu(rinfo->dir_dir->frag);
f3c4ebe65   Yan, Zheng   ceph: using hash ...
1501
  		if (!rinfo->hash_order)
fdd4e1583   Yan, Zheng   ceph: rework dcac...
1502
  			req->r_readdir_offset = 2;
81c6aea52   Yan, Zheng   ceph: handle frag...
1503
  	}
355da1eb7   Sage Weil   ceph: inode opera...
1504
1505
  
  	if (le32_to_cpu(rinfo->head->op) == CEPH_MDS_OP_LSSNAP) {
355da1eb7   Sage Weil   ceph: inode opera...
1506
1507
1508
1509
1510
1511
1512
1513
  		dout("readdir_prepopulate %d items under SNAPDIR dn %p
  ",
  		     rinfo->dir_nr, parent);
  	} else {
  		dout("readdir_prepopulate %d items under dn %p
  ",
  		     rinfo->dir_nr, parent);
  		if (rinfo->dir_dir)
2b0143b5c   David Howells   VFS: normal files...
1514
  			ceph_fill_dirfrag(d_inode(parent), rinfo->dir_dir);
355da1eb7   Sage Weil   ceph: inode opera...
1515

8d45b911a   Yan, Zheng   ceph: don't fill ...
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
  		if (ceph_frag_is_leftmost(frag) &&
  		    req->r_readdir_offset == 2 &&
  		    !(rinfo->hash_order && last_hash)) {
  			/* note dir version at start of readdir so we can
  			 * tell if any dentries get dropped */
  			req->r_dir_release_cnt =
  				atomic64_read(&ci->i_release_count);
  			req->r_dir_ordered_cnt =
  				atomic64_read(&ci->i_ordered_count);
  			req->r_readdir_cache_idx = 0;
  		}
fdd4e1583   Yan, Zheng   ceph: rework dcac...
1527
1528
1529
  	}
  
  	cache_ctl.index = req->r_readdir_cache_idx;
f3c4ebe65   Yan, Zheng   ceph: using hash ...
1530
  	fpos_offset = req->r_readdir_offset;
fdd4e1583   Yan, Zheng   ceph: rework dcac...
1531

86b58d131   Yan, Zheng   ceph: initialize ...
1532
  	/* FIXME: release caps/leases if error occurs */
355da1eb7   Sage Weil   ceph: inode opera...
1533
  	for (i = 0; i < rinfo->dir_nr; i++) {
2a5beea3f   Yan, Zheng   ceph: define stru...
1534
  		struct ceph_mds_reply_dir_entry *rde = rinfo->dir_entries + i;
f5d55f039   Jeff Layton   ceph: vet the tar...
1535
  		struct ceph_vino tvino, dvino;
355da1eb7   Sage Weil   ceph: inode opera...
1536

2a5beea3f   Yan, Zheng   ceph: define stru...
1537
1538
  		dname.name = rde->name;
  		dname.len = rde->name_len;
8387ff257   Linus Torvalds   vfs: make the str...
1539
  		dname.hash = full_name_hash(parent, dname.name, dname.len);
355da1eb7   Sage Weil   ceph: inode opera...
1540

f5d55f039   Jeff Layton   ceph: vet the tar...
1541
1542
  		tvino.ino = le64_to_cpu(rde->inode.in->ino);
  		tvino.snap = le64_to_cpu(rde->inode.in->snapid);
355da1eb7   Sage Weil   ceph: inode opera...
1543

f3c4ebe65   Yan, Zheng   ceph: using hash ...
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
  		if (rinfo->hash_order) {
  			u32 hash = ceph_str_hash(ci->i_dir_layout.dl_dir_hash,
  						 rde->name, rde->name_len);
  			hash = ceph_frag_value(hash);
  			if (hash != last_hash)
  				fpos_offset = 2;
  			last_hash = hash;
  			rde->offset = ceph_make_fpos(hash, fpos_offset++, true);
  		} else {
  			rde->offset = ceph_make_fpos(frag, fpos_offset++, false);
  		}
355da1eb7   Sage Weil   ceph: inode opera...
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
  
  retry_lookup:
  		dn = d_lookup(parent, &dname);
  		dout("d_lookup on parent=%p name=%.*s got %p
  ",
  		     parent, dname.len, dname.name, dn);
  
  		if (!dn) {
  			dn = d_alloc(parent, &dname);
  			dout("d_alloc %p '%.*s' = %p
  ", parent,
  			     dname.len, dname.name, dn);
d37b1d994   Markus Elfring   ceph: adjust 36 c...
1567
  			if (!dn) {
355da1eb7   Sage Weil   ceph: inode opera...
1568
1569
1570
1571
1572
  				dout("d_alloc badness
  ");
  				err = -ENOMEM;
  				goto out;
  			}
2b0143b5c   David Howells   VFS: normal files...
1573
  		} else if (d_really_is_positive(dn) &&
f5d55f039   Jeff Layton   ceph: vet the tar...
1574
1575
  			   (ceph_ino(d_inode(dn)) != tvino.ino ||
  			    ceph_snap(d_inode(dn)) != tvino.snap)) {
355da1eb7   Sage Weil   ceph: inode opera...
1576
1577
  			dout(" dn %p points to wrong inode %p
  ",
2b0143b5c   David Howells   VFS: normal files...
1578
  			     dn, d_inode(dn));
355da1eb7   Sage Weil   ceph: inode opera...
1579
1580
1581
  			d_delete(dn);
  			dput(dn);
  			goto retry_lookup;
355da1eb7   Sage Weil   ceph: inode opera...
1582
  		}
355da1eb7   Sage Weil   ceph: inode opera...
1583
  		/* inode */
2b0143b5c   David Howells   VFS: normal files...
1584
1585
  		if (d_really_is_positive(dn)) {
  			in = d_inode(dn);
355da1eb7   Sage Weil   ceph: inode opera...
1586
  		} else {
f5d55f039   Jeff Layton   ceph: vet the tar...
1587
  			in = ceph_get_inode(parent->d_sb, tvino);
ac1f12ef5   Dan Carpenter   ceph: ceph_get_in...
1588
  			if (IS_ERR(in)) {
355da1eb7   Sage Weil   ceph: inode opera...
1589
1590
  				dout("new_inode badness
  ");
2744c171d   Al Viro   ceph: don't abuse...
1591
  				d_drop(dn);
355da1eb7   Sage Weil   ceph: inode opera...
1592
  				dput(dn);
ac1f12ef5   Dan Carpenter   ceph: ceph_get_in...
1593
  				err = PTR_ERR(in);
355da1eb7   Sage Weil   ceph: inode opera...
1594
1595
  				goto out;
  			}
355da1eb7   Sage Weil   ceph: inode opera...
1596
  		}
2a5beea3f   Yan, Zheng   ceph: define stru...
1597
  		ret = fill_inode(in, NULL, &rde->inode, NULL, session,
fdd4e1583   Yan, Zheng   ceph: rework dcac...
1598
1599
1600
  				 req->r_request_started, -1,
  				 &req->r_caps_reservation);
  		if (ret < 0) {
355da1eb7   Sage Weil   ceph: inode opera...
1601
1602
  			pr_err("fill_inode badness on %p
  ", in);
2b0143b5c   David Howells   VFS: normal files...
1603
  			if (d_really_is_negative(dn))
86b58d131   Yan, Zheng   ceph: initialize ...
1604
1605
  				iput(in);
  			d_drop(dn);
fdd4e1583   Yan, Zheng   ceph: rework dcac...
1606
  			err = ret;
d69ed05a8   Sage Weil   ceph: handle spli...
1607
  			goto next_item;
355da1eb7   Sage Weil   ceph: inode opera...
1608
  		}
86b58d131   Yan, Zheng   ceph: initialize ...
1609

2b0143b5c   David Howells   VFS: normal files...
1610
  		if (d_really_is_negative(dn)) {
315f24088   Yan, Zheng   ceph: fix securit...
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
  			struct dentry *realdn;
  
  			if (ceph_security_xattr_deadlock(in)) {
  				dout(" skip splicing dn %p to inode %p"
  				     " (security xattr deadlock)
  ", dn, in);
  				iput(in);
  				skipped++;
  				goto next_item;
  			}
  
  			realdn = splice_dentry(dn, in);
5cba372c0   Yan, Zheng   ceph: fix dentry ...
1623
1624
1625
  			if (IS_ERR(realdn)) {
  				err = PTR_ERR(realdn);
  				d_drop(dn);
86b58d131   Yan, Zheng   ceph: initialize ...
1626
1627
1628
  				dn = NULL;
  				goto next_item;
  			}
5cba372c0   Yan, Zheng   ceph: fix dentry ...
1629
  			dn = realdn;
86b58d131   Yan, Zheng   ceph: initialize ...
1630
  		}
f3c4ebe65   Yan, Zheng   ceph: using hash ...
1631
  		ceph_dentry(dn)->offset = rde->offset;
86b58d131   Yan, Zheng   ceph: initialize ...
1632

f5d55f039   Jeff Layton   ceph: vet the tar...
1633
  		dvino = ceph_vino(d_inode(parent));
2a5beea3f   Yan, Zheng   ceph: define stru...
1634
  		update_dentry_lease(dn, rde->lease, req->r_session,
f5d55f039   Jeff Layton   ceph: vet the tar...
1635
  				    req->r_request_started, &tvino, &dvino);
fdd4e1583   Yan, Zheng   ceph: rework dcac...
1636

315f24088   Yan, Zheng   ceph: fix securit...
1637
  		if (err == 0 && skipped == 0 && cache_ctl.index >= 0) {
fdd4e1583   Yan, Zheng   ceph: rework dcac...
1638
1639
1640
1641
1642
  			ret = fill_readdir_cache(d_inode(parent), dn,
  						 &cache_ctl, req);
  			if (ret < 0)
  				err = ret;
  		}
d69ed05a8   Sage Weil   ceph: handle spli...
1643
1644
1645
  next_item:
  		if (dn)
  			dput(dn);
355da1eb7   Sage Weil   ceph: inode opera...
1646
  	}
355da1eb7   Sage Weil   ceph: inode opera...
1647
  out:
315f24088   Yan, Zheng   ceph: fix securit...
1648
  	if (err == 0 && skipped == 0) {
bc2de10dc   Jeff Layton   ceph: convert boo...
1649
  		set_bit(CEPH_MDS_R_DID_PREPOPULATE, &req->r_req_flags);
fdd4e1583   Yan, Zheng   ceph: rework dcac...
1650
1651
1652
  		req->r_readdir_cache_idx = cache_ctl.index;
  	}
  	ceph_readdir_cache_release(&cache_ctl);
355da1eb7   Sage Weil   ceph: inode opera...
1653
1654
1655
1656
  	dout("readdir_prepopulate done
  ");
  	return err;
  }
efb0ca765   Yan, Zheng   ceph: update the ...
1657
  bool ceph_inode_set_size(struct inode *inode, loff_t size)
355da1eb7   Sage Weil   ceph: inode opera...
1658
1659
  {
  	struct ceph_inode_info *ci = ceph_inode(inode);
efb0ca765   Yan, Zheng   ceph: update the ...
1660
  	bool ret;
355da1eb7   Sage Weil   ceph: inode opera...
1661

be655596b   Sage Weil   ceph: use i_ceph_...
1662
  	spin_lock(&ci->i_ceph_lock);
355da1eb7   Sage Weil   ceph: inode opera...
1663
1664
  	dout("set_size %p %llu -> %llu
  ", inode, inode->i_size, size);
99c88e690   Yan, Zheng   ceph: use i_size_...
1665
  	i_size_write(inode, size);
224a7542b   Yan, Zheng   ceph: tolerate ba...
1666
  	inode->i_blocks = calc_inode_blocks(size);
355da1eb7   Sage Weil   ceph: inode opera...
1667

efb0ca765   Yan, Zheng   ceph: update the ...
1668
  	ret = __ceph_should_report_size(ci);
355da1eb7   Sage Weil   ceph: inode opera...
1669

be655596b   Sage Weil   ceph: use i_ceph_...
1670
  	spin_unlock(&ci->i_ceph_lock);
355da1eb7   Sage Weil   ceph: inode opera...
1671
1672
1673
1674
1675
1676
1677
  	return ret;
  }
  
  /*
   * Write back inode data in a worker thread.  (This can't be done
   * in the message handler context.)
   */
3c6f6b79a   Sage Weil   ceph: cleanup asy...
1678
1679
  void ceph_queue_writeback(struct inode *inode)
  {
15a2015fb   Sage Weil   ceph: fix iput ra...
1680
  	ihold(inode);
3c6f6b79a   Sage Weil   ceph: cleanup asy...
1681
1682
  	if (queue_work(ceph_inode_to_client(inode)->wb_wq,
  		       &ceph_inode(inode)->i_wb_work)) {
2c27c9a57   Sage Weil   ceph: fix typo in...
1683
1684
  		dout("ceph_queue_writeback %p
  ", inode);
3c6f6b79a   Sage Weil   ceph: cleanup asy...
1685
  	} else {
2c27c9a57   Sage Weil   ceph: fix typo in...
1686
1687
  		dout("ceph_queue_writeback %p failed
  ", inode);
15a2015fb   Sage Weil   ceph: fix iput ra...
1688
  		iput(inode);
3c6f6b79a   Sage Weil   ceph: cleanup asy...
1689
1690
1691
1692
  	}
  }
  
  static void ceph_writeback_work(struct work_struct *work)
355da1eb7   Sage Weil   ceph: inode opera...
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
  {
  	struct ceph_inode_info *ci = container_of(work, struct ceph_inode_info,
  						  i_wb_work);
  	struct inode *inode = &ci->vfs_inode;
  
  	dout("writeback %p
  ", inode);
  	filemap_fdatawrite(&inode->i_data);
  	iput(inode);
  }
  
  /*
3c6f6b79a   Sage Weil   ceph: cleanup asy...
1705
1706
1707
1708
   * queue an async invalidation
   */
  void ceph_queue_invalidate(struct inode *inode)
  {
15a2015fb   Sage Weil   ceph: fix iput ra...
1709
  	ihold(inode);
3c6f6b79a   Sage Weil   ceph: cleanup asy...
1710
1711
1712
1713
  	if (queue_work(ceph_inode_to_client(inode)->pg_inv_wq,
  		       &ceph_inode(inode)->i_pg_inv_work)) {
  		dout("ceph_queue_invalidate %p
  ", inode);
3c6f6b79a   Sage Weil   ceph: cleanup asy...
1714
1715
1716
  	} else {
  		dout("ceph_queue_invalidate %p failed
  ", inode);
15a2015fb   Sage Weil   ceph: fix iput ra...
1717
  		iput(inode);
3c6f6b79a   Sage Weil   ceph: cleanup asy...
1718
1719
1720
1721
  	}
  }
  
  /*
355da1eb7   Sage Weil   ceph: inode opera...
1722
1723
1724
   * Invalidate inode pages in a worker thread.  (This can't be done
   * in the message handler context.)
   */
3c6f6b79a   Sage Weil   ceph: cleanup asy...
1725
  static void ceph_invalidate_work(struct work_struct *work)
355da1eb7   Sage Weil   ceph: inode opera...
1726
1727
1728
1729
  {
  	struct ceph_inode_info *ci = container_of(work, struct ceph_inode_info,
  						  i_pg_inv_work);
  	struct inode *inode = &ci->vfs_inode;
6c93df5db   Yan, Zheng   ceph: don't call ...
1730
  	struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
355da1eb7   Sage Weil   ceph: inode opera...
1731
1732
  	u32 orig_gen;
  	int check = 0;
b0d7c2231   Yan, Zheng   ceph: introduce i...
1733
  	mutex_lock(&ci->i_truncate_mutex);
6c93df5db   Yan, Zheng   ceph: don't call ...
1734

52953d559   Seraphime Kirkovski   ceph: cleanup ACC...
1735
  	if (READ_ONCE(fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) {
6c93df5db   Yan, Zheng   ceph: don't call ...
1736
1737
1738
1739
1740
1741
1742
1743
  		pr_warn_ratelimited("invalidate_pages %p %lld forced umount
  ",
  				    inode, ceph_ino(inode));
  		mapping_set_error(inode->i_mapping, -EIO);
  		truncate_pagecache(inode, 0);
  		mutex_unlock(&ci->i_truncate_mutex);
  		goto out;
  	}
be655596b   Sage Weil   ceph: use i_ceph_...
1744
  	spin_lock(&ci->i_ceph_lock);
355da1eb7   Sage Weil   ceph: inode opera...
1745
1746
1747
  	dout("invalidate_pages %p gen %d revoking %d
  ", inode,
  	     ci->i_rdcache_gen, ci->i_rdcache_revoking);
cd045cb42   Sage Weil   ceph: fix rdcache...
1748
  	if (ci->i_rdcache_revoking != ci->i_rdcache_gen) {
9563f88c1   Yan, Zheng   ceph: fix cache r...
1749
1750
  		if (__ceph_caps_revoking_other(ci, NULL, CEPH_CAP_FILE_CACHE))
  			check = 1;
be655596b   Sage Weil   ceph: use i_ceph_...
1751
  		spin_unlock(&ci->i_ceph_lock);
b0d7c2231   Yan, Zheng   ceph: introduce i...
1752
  		mutex_unlock(&ci->i_truncate_mutex);
355da1eb7   Sage Weil   ceph: inode opera...
1753
1754
1755
  		goto out;
  	}
  	orig_gen = ci->i_rdcache_gen;
be655596b   Sage Weil   ceph: use i_ceph_...
1756
  	spin_unlock(&ci->i_ceph_lock);
355da1eb7   Sage Weil   ceph: inode opera...
1757

9abd4db71   Yan, Zheng   ceph: don't use t...
1758
1759
1760
1761
  	if (invalidate_inode_pages2(inode->i_mapping) < 0) {
  		pr_err("invalidate_pages %p fails
  ", inode);
  	}
355da1eb7   Sage Weil   ceph: inode opera...
1762

be655596b   Sage Weil   ceph: use i_ceph_...
1763
  	spin_lock(&ci->i_ceph_lock);
cd045cb42   Sage Weil   ceph: fix rdcache...
1764
1765
  	if (orig_gen == ci->i_rdcache_gen &&
  	    orig_gen == ci->i_rdcache_revoking) {
355da1eb7   Sage Weil   ceph: inode opera...
1766
1767
1768
  		dout("invalidate_pages %p gen %d successful
  ", inode,
  		     ci->i_rdcache_gen);
cd045cb42   Sage Weil   ceph: fix rdcache...
1769
  		ci->i_rdcache_revoking--;
355da1eb7   Sage Weil   ceph: inode opera...
1770
1771
  		check = 1;
  	} else {
cd045cb42   Sage Weil   ceph: fix rdcache...
1772
1773
1774
1775
  		dout("invalidate_pages %p gen %d raced, now %d revoking %d
  ",
  		     inode, orig_gen, ci->i_rdcache_gen,
  		     ci->i_rdcache_revoking);
9563f88c1   Yan, Zheng   ceph: fix cache r...
1776
1777
  		if (__ceph_caps_revoking_other(ci, NULL, CEPH_CAP_FILE_CACHE))
  			check = 1;
355da1eb7   Sage Weil   ceph: inode opera...
1778
  	}
be655596b   Sage Weil   ceph: use i_ceph_...
1779
  	spin_unlock(&ci->i_ceph_lock);
b0d7c2231   Yan, Zheng   ceph: introduce i...
1780
  	mutex_unlock(&ci->i_truncate_mutex);
9563f88c1   Yan, Zheng   ceph: fix cache r...
1781
  out:
355da1eb7   Sage Weil   ceph: inode opera...
1782
1783
  	if (check)
  		ceph_check_caps(ci, 0, NULL);
355da1eb7   Sage Weil   ceph: inode opera...
1784
1785
1786
1787
1788
  	iput(inode);
  }
  
  
  /*
3f99969f4   Yan, Zheng   ceph: acquire i_m...
1789
   * called by trunc_wq;
355da1eb7   Sage Weil   ceph: inode opera...
1790
1791
1792
   *
   * We also truncate in a separate thread as well.
   */
3c6f6b79a   Sage Weil   ceph: cleanup asy...
1793
  static void ceph_vmtruncate_work(struct work_struct *work)
355da1eb7   Sage Weil   ceph: inode opera...
1794
1795
1796
1797
1798
1799
1800
  {
  	struct ceph_inode_info *ci = container_of(work, struct ceph_inode_info,
  						  i_vmtruncate_work);
  	struct inode *inode = &ci->vfs_inode;
  
  	dout("vmtruncate_work %p
  ", inode);
b415bf4f9   Yan, Zheng   ceph: fix pending...
1801
  	__ceph_do_pending_vmtruncate(inode);
355da1eb7   Sage Weil   ceph: inode opera...
1802
1803
1804
1805
  	iput(inode);
  }
  
  /*
3c6f6b79a   Sage Weil   ceph: cleanup asy...
1806
1807
1808
1809
1810
1811
   * Queue an async vmtruncate.  If we fail to queue work, we will handle
   * the truncation the next time we call __ceph_do_pending_vmtruncate.
   */
  void ceph_queue_vmtruncate(struct inode *inode)
  {
  	struct ceph_inode_info *ci = ceph_inode(inode);
15a2015fb   Sage Weil   ceph: fix iput ra...
1812
  	ihold(inode);
99ccbd229   Milosz Tanski   ceph: use fscache...
1813

640ef79d2   Cheng Renquan   ceph: use ceph_sb...
1814
  	if (queue_work(ceph_sb_to_client(inode->i_sb)->trunc_wq,
3c6f6b79a   Sage Weil   ceph: cleanup asy...
1815
1816
1817
  		       &ci->i_vmtruncate_work)) {
  		dout("ceph_queue_vmtruncate %p
  ", inode);
3c6f6b79a   Sage Weil   ceph: cleanup asy...
1818
1819
1820
1821
  	} else {
  		dout("ceph_queue_vmtruncate %p failed, pending=%d
  ",
  		     inode, ci->i_truncate_pending);
15a2015fb   Sage Weil   ceph: fix iput ra...
1822
  		iput(inode);
3c6f6b79a   Sage Weil   ceph: cleanup asy...
1823
1824
1825
1826
  	}
  }
  
  /*
355da1eb7   Sage Weil   ceph: inode opera...
1827
1828
1829
   * Make sure any pending truncation is applied before doing anything
   * that may depend on it.
   */
b415bf4f9   Yan, Zheng   ceph: fix pending...
1830
  void __ceph_do_pending_vmtruncate(struct inode *inode)
355da1eb7   Sage Weil   ceph: inode opera...
1831
1832
1833
  {
  	struct ceph_inode_info *ci = ceph_inode(inode);
  	u64 to;
a85f50b6e   Yan, Zheng   ceph: Fix __ceph_...
1834
  	int wrbuffer_refs, finish = 0;
355da1eb7   Sage Weil   ceph: inode opera...
1835

b0d7c2231   Yan, Zheng   ceph: introduce i...
1836
  	mutex_lock(&ci->i_truncate_mutex);
355da1eb7   Sage Weil   ceph: inode opera...
1837
  retry:
be655596b   Sage Weil   ceph: use i_ceph_...
1838
  	spin_lock(&ci->i_ceph_lock);
355da1eb7   Sage Weil   ceph: inode opera...
1839
1840
1841
  	if (ci->i_truncate_pending == 0) {
  		dout("__do_pending_vmtruncate %p none pending
  ", inode);
be655596b   Sage Weil   ceph: use i_ceph_...
1842
  		spin_unlock(&ci->i_ceph_lock);
b0d7c2231   Yan, Zheng   ceph: introduce i...
1843
  		mutex_unlock(&ci->i_truncate_mutex);
355da1eb7   Sage Weil   ceph: inode opera...
1844
1845
1846
1847
1848
1849
1850
1851
  		return;
  	}
  
  	/*
  	 * make sure any dirty snapped pages are flushed before we
  	 * possibly truncate them.. so write AND block!
  	 */
  	if (ci->i_wrbuffer_ref_head < ci->i_wrbuffer_ref) {
c8fd0d37f   Yan, Zheng   ceph: handle race...
1852
1853
1854
1855
1856
1857
1858
1859
1860
  		struct ceph_cap_snap *capsnap;
  		to = ci->i_truncate_size;
  		list_for_each_entry(capsnap, &ci->i_cap_snaps, ci_item) {
  			// MDS should have revoked Frw caps
  			WARN_ON_ONCE(capsnap->writing);
  			if (capsnap->dirty_pages && capsnap->size > to)
  				to = capsnap->size;
  		}
  		spin_unlock(&ci->i_ceph_lock);
355da1eb7   Sage Weil   ceph: inode opera...
1861
1862
1863
  		dout("__do_pending_vmtruncate %p flushing snaps first
  ",
  		     inode);
c8fd0d37f   Yan, Zheng   ceph: handle race...
1864
1865
  
  		truncate_pagecache(inode, to);
355da1eb7   Sage Weil   ceph: inode opera...
1866
1867
1868
1869
  		filemap_write_and_wait_range(&inode->i_data, 0,
  					     inode->i_sb->s_maxbytes);
  		goto retry;
  	}
b0d7c2231   Yan, Zheng   ceph: introduce i...
1870
1871
  	/* there should be no reader or writer */
  	WARN_ON_ONCE(ci->i_rd_ref || ci->i_wr_ref);
355da1eb7   Sage Weil   ceph: inode opera...
1872
1873
1874
1875
1876
  	to = ci->i_truncate_size;
  	wrbuffer_refs = ci->i_wrbuffer_ref;
  	dout("__do_pending_vmtruncate %p (%d) to %lld
  ", inode,
  	     ci->i_truncate_pending, to);
be655596b   Sage Weil   ceph: use i_ceph_...
1877
  	spin_unlock(&ci->i_ceph_lock);
355da1eb7   Sage Weil   ceph: inode opera...
1878

4e217b5dc   Yan, Zheng   ceph: use truncat...
1879
  	truncate_pagecache(inode, to);
355da1eb7   Sage Weil   ceph: inode opera...
1880

be655596b   Sage Weil   ceph: use i_ceph_...
1881
  	spin_lock(&ci->i_ceph_lock);
a85f50b6e   Yan, Zheng   ceph: Fix __ceph_...
1882
1883
1884
1885
  	if (to == ci->i_truncate_size) {
  		ci->i_truncate_pending = 0;
  		finish = 1;
  	}
be655596b   Sage Weil   ceph: use i_ceph_...
1886
  	spin_unlock(&ci->i_ceph_lock);
a85f50b6e   Yan, Zheng   ceph: Fix __ceph_...
1887
1888
  	if (!finish)
  		goto retry;
355da1eb7   Sage Weil   ceph: inode opera...
1889

b0d7c2231   Yan, Zheng   ceph: introduce i...
1890
  	mutex_unlock(&ci->i_truncate_mutex);
355da1eb7   Sage Weil   ceph: inode opera...
1891
1892
  	if (wrbuffer_refs == 0)
  		ceph_check_caps(ci, CHECK_CAPS_AUTHONLY, NULL);
a85f50b6e   Yan, Zheng   ceph: Fix __ceph_...
1893
1894
  
  	wake_up_all(&ci->i_cap_wq);
355da1eb7   Sage Weil   ceph: inode opera...
1895
  }
355da1eb7   Sage Weil   ceph: inode opera...
1896
1897
1898
  /*
   * symlinks
   */
355da1eb7   Sage Weil   ceph: inode opera...
1899
  static const struct inode_operations ceph_symlink_iops = {
6b2553918   Al Viro   replace ->follow_...
1900
  	.get_link = simple_get_link,
0b9326725   Yan, Zheng   ceph: fix symlink...
1901
1902
  	.setattr = ceph_setattr,
  	.getattr = ceph_getattr,
0b9326725   Yan, Zheng   ceph: fix symlink...
1903
  	.listxattr = ceph_listxattr,
355da1eb7   Sage Weil   ceph: inode opera...
1904
  };
a26feccab   Andreas Gruenbacher   ceph: Get rid of ...
1905
  int __ceph_setattr(struct inode *inode, struct iattr *attr)
355da1eb7   Sage Weil   ceph: inode opera...
1906
  {
355da1eb7   Sage Weil   ceph: inode opera...
1907
  	struct ceph_inode_info *ci = ceph_inode(inode);
355da1eb7   Sage Weil   ceph: inode opera...
1908
1909
  	const unsigned int ia_valid = attr->ia_valid;
  	struct ceph_mds_request *req;
a26feccab   Andreas Gruenbacher   ceph: Get rid of ...
1910
  	struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
f66fd9f09   Yan, Zheng   ceph: pre-allocat...
1911
  	struct ceph_cap_flush *prealloc_cf;
355da1eb7   Sage Weil   ceph: inode opera...
1912
1913
1914
1915
  	int issued;
  	int release = 0, dirtied = 0;
  	int mask = 0;
  	int err = 0;
fca65b4ad   Sage Weil   ceph: do not call...
1916
  	int inode_dirty_flags = 0;
604d1b024   Yan, Zheng   ceph: take snap_r...
1917
  	bool lock_snap_rwsem = false;
355da1eb7   Sage Weil   ceph: inode opera...
1918

f66fd9f09   Yan, Zheng   ceph: pre-allocat...
1919
1920
1921
  	prealloc_cf = ceph_alloc_cap_flush();
  	if (!prealloc_cf)
  		return -ENOMEM;
355da1eb7   Sage Weil   ceph: inode opera...
1922
1923
  	req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_SETATTR,
  				       USE_AUTH_MDS);
f66fd9f09   Yan, Zheng   ceph: pre-allocat...
1924
1925
  	if (IS_ERR(req)) {
  		ceph_free_cap_flush(prealloc_cf);
355da1eb7   Sage Weil   ceph: inode opera...
1926
  		return PTR_ERR(req);
f66fd9f09   Yan, Zheng   ceph: pre-allocat...
1927
  	}
355da1eb7   Sage Weil   ceph: inode opera...
1928

be655596b   Sage Weil   ceph: use i_ceph_...
1929
  	spin_lock(&ci->i_ceph_lock);
355da1eb7   Sage Weil   ceph: inode opera...
1930
  	issued = __ceph_caps_issued(ci, NULL);
604d1b024   Yan, Zheng   ceph: take snap_r...
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
  
  	if (!ci->i_head_snapc &&
  	    (issued & (CEPH_CAP_ANY_EXCL | CEPH_CAP_FILE_WR))) {
  		lock_snap_rwsem = true;
  		if (!down_read_trylock(&mdsc->snap_rwsem)) {
  			spin_unlock(&ci->i_ceph_lock);
  			down_read(&mdsc->snap_rwsem);
  			spin_lock(&ci->i_ceph_lock);
  			issued = __ceph_caps_issued(ci, NULL);
  		}
  	}
355da1eb7   Sage Weil   ceph: inode opera...
1942
1943
1944
1945
1946
1947
  	dout("setattr %p issued %s
  ", inode, ceph_cap_string(issued));
  
  	if (ia_valid & ATTR_UID) {
  		dout("setattr %p uid %d -> %d
  ", inode,
bd2bae6a6   Eric W. Biederman   ceph: Convert kui...
1948
1949
  		     from_kuid(&init_user_ns, inode->i_uid),
  		     from_kuid(&init_user_ns, attr->ia_uid));
355da1eb7   Sage Weil   ceph: inode opera...
1950
1951
1952
1953
  		if (issued & CEPH_CAP_AUTH_EXCL) {
  			inode->i_uid = attr->ia_uid;
  			dirtied |= CEPH_CAP_AUTH_EXCL;
  		} else if ((issued & CEPH_CAP_AUTH_SHARED) == 0 ||
ab871b903   Eric W. Biederman   ceph: Translate i...
1954
1955
1956
  			   !uid_eq(attr->ia_uid, inode->i_uid)) {
  			req->r_args.setattr.uid = cpu_to_le32(
  				from_kuid(&init_user_ns, attr->ia_uid));
355da1eb7   Sage Weil   ceph: inode opera...
1957
1958
1959
1960
1961
1962
1963
  			mask |= CEPH_SETATTR_UID;
  			release |= CEPH_CAP_AUTH_SHARED;
  		}
  	}
  	if (ia_valid & ATTR_GID) {
  		dout("setattr %p gid %d -> %d
  ", inode,
bd2bae6a6   Eric W. Biederman   ceph: Convert kui...
1964
1965
  		     from_kgid(&init_user_ns, inode->i_gid),
  		     from_kgid(&init_user_ns, attr->ia_gid));
355da1eb7   Sage Weil   ceph: inode opera...
1966
1967
1968
1969
  		if (issued & CEPH_CAP_AUTH_EXCL) {
  			inode->i_gid = attr->ia_gid;
  			dirtied |= CEPH_CAP_AUTH_EXCL;
  		} else if ((issued & CEPH_CAP_AUTH_SHARED) == 0 ||
ab871b903   Eric W. Biederman   ceph: Translate i...
1970
1971
1972
  			   !gid_eq(attr->ia_gid, inode->i_gid)) {
  			req->r_args.setattr.gid = cpu_to_le32(
  				from_kgid(&init_user_ns, attr->ia_gid));
355da1eb7   Sage Weil   ceph: inode opera...
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
  			mask |= CEPH_SETATTR_GID;
  			release |= CEPH_CAP_AUTH_SHARED;
  		}
  	}
  	if (ia_valid & ATTR_MODE) {
  		dout("setattr %p mode 0%o -> 0%o
  ", inode, inode->i_mode,
  		     attr->ia_mode);
  		if (issued & CEPH_CAP_AUTH_EXCL) {
  			inode->i_mode = attr->ia_mode;
  			dirtied |= CEPH_CAP_AUTH_EXCL;
  		} else if ((issued & CEPH_CAP_AUTH_SHARED) == 0 ||
  			   attr->ia_mode != inode->i_mode) {
7221fe4c2   Guangliang Zhao   ceph: add acl for...
1986
  			inode->i_mode = attr->ia_mode;
355da1eb7   Sage Weil   ceph: inode opera...
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
  			req->r_args.setattr.mode = cpu_to_le32(attr->ia_mode);
  			mask |= CEPH_SETATTR_MODE;
  			release |= CEPH_CAP_AUTH_SHARED;
  		}
  	}
  
  	if (ia_valid & ATTR_ATIME) {
  		dout("setattr %p atime %ld.%ld -> %ld.%ld
  ", inode,
  		     inode->i_atime.tv_sec, inode->i_atime.tv_nsec,
  		     attr->ia_atime.tv_sec, attr->ia_atime.tv_nsec);
  		if (issued & CEPH_CAP_FILE_EXCL) {
  			ci->i_time_warp_seq++;
  			inode->i_atime = attr->ia_atime;
  			dirtied |= CEPH_CAP_FILE_EXCL;
  		} else if ((issued & CEPH_CAP_FILE_WR) &&
  			   timespec_compare(&inode->i_atime,
  					    &attr->ia_atime) < 0) {
  			inode->i_atime = attr->ia_atime;
  			dirtied |= CEPH_CAP_FILE_WR;
  		} else if ((issued & CEPH_CAP_FILE_SHARED) == 0 ||
  			   !timespec_equal(&inode->i_atime, &attr->ia_atime)) {
  			ceph_encode_timespec(&req->r_args.setattr.atime,
  					     &attr->ia_atime);
  			mask |= CEPH_SETATTR_ATIME;
  			release |= CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_RD |
  				CEPH_CAP_FILE_WR;
  		}
  	}
  	if (ia_valid & ATTR_MTIME) {
  		dout("setattr %p mtime %ld.%ld -> %ld.%ld
  ", inode,
  		     inode->i_mtime.tv_sec, inode->i_mtime.tv_nsec,
  		     attr->ia_mtime.tv_sec, attr->ia_mtime.tv_nsec);
  		if (issued & CEPH_CAP_FILE_EXCL) {
  			ci->i_time_warp_seq++;
  			inode->i_mtime = attr->ia_mtime;
  			dirtied |= CEPH_CAP_FILE_EXCL;
  		} else if ((issued & CEPH_CAP_FILE_WR) &&
  			   timespec_compare(&inode->i_mtime,
  					    &attr->ia_mtime) < 0) {
  			inode->i_mtime = attr->ia_mtime;
  			dirtied |= CEPH_CAP_FILE_WR;
  		} else if ((issued & CEPH_CAP_FILE_SHARED) == 0 ||
  			   !timespec_equal(&inode->i_mtime, &attr->ia_mtime)) {
  			ceph_encode_timespec(&req->r_args.setattr.mtime,
  					     &attr->ia_mtime);
  			mask |= CEPH_SETATTR_MTIME;
  			release |= CEPH_CAP_FILE_SHARED | CEPH_CAP_FILE_RD |
  				CEPH_CAP_FILE_WR;
  		}
  	}
  	if (ia_valid & ATTR_SIZE) {
  		dout("setattr %p size %lld -> %lld
  ", inode,
  		     inode->i_size, attr->ia_size);
355da1eb7   Sage Weil   ceph: inode opera...
2043
2044
  		if ((issued & CEPH_CAP_FILE_EXCL) &&
  		    attr->ia_size > inode->i_size) {
99c88e690   Yan, Zheng   ceph: use i_size_...
2045
  			i_size_write(inode, attr->ia_size);
224a7542b   Yan, Zheng   ceph: tolerate ba...
2046
  			inode->i_blocks = calc_inode_blocks(attr->ia_size);
355da1eb7   Sage Weil   ceph: inode opera...
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
  			ci->i_reported_size = attr->ia_size;
  			dirtied |= CEPH_CAP_FILE_EXCL;
  		} else if ((issued & CEPH_CAP_FILE_SHARED) == 0 ||
  			   attr->ia_size != inode->i_size) {
  			req->r_args.setattr.size = cpu_to_le64(attr->ia_size);
  			req->r_args.setattr.old_size =
  				cpu_to_le64(inode->i_size);
  			mask |= CEPH_SETATTR_SIZE;
  			release |= CEPH_CAP_FILE_SHARED | CEPH_CAP_FILE_RD |
  				CEPH_CAP_FILE_WR;
  		}
  	}
  
  	/* these do nothing */
  	if (ia_valid & ATTR_CTIME) {
  		bool only = (ia_valid & (ATTR_SIZE|ATTR_MTIME|ATTR_ATIME|
  					 ATTR_MODE|ATTR_UID|ATTR_GID)) == 0;
  		dout("setattr %p ctime %ld.%ld -> %ld.%ld (%s)
  ", inode,
  		     inode->i_ctime.tv_sec, inode->i_ctime.tv_nsec,
  		     attr->ia_ctime.tv_sec, attr->ia_ctime.tv_nsec,
  		     only ? "ctime only" : "ignored");
355da1eb7   Sage Weil   ceph: inode opera...
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
  		if (only) {
  			/*
  			 * if kernel wants to dirty ctime but nothing else,
  			 * we need to choose a cap to dirty under, or do
  			 * a almost-no-op setattr
  			 */
  			if (issued & CEPH_CAP_AUTH_EXCL)
  				dirtied |= CEPH_CAP_AUTH_EXCL;
  			else if (issued & CEPH_CAP_FILE_EXCL)
  				dirtied |= CEPH_CAP_FILE_EXCL;
  			else if (issued & CEPH_CAP_XATTR_EXCL)
  				dirtied |= CEPH_CAP_XATTR_EXCL;
  			else
  				mask |= CEPH_SETATTR_CTIME;
  		}
  	}
  	if (ia_valid & ATTR_FILE)
  		dout("setattr %p ATTR_FILE ... hrm!
  ", inode);
  
  	if (dirtied) {
f66fd9f09   Yan, Zheng   ceph: pre-allocat...
2090
2091
  		inode_dirty_flags = __ceph_mark_dirty_caps(ci, dirtied,
  							   &prealloc_cf);
4ca2fea6f   Yan, Zheng   ceph: unify inode...
2092
  		inode->i_ctime = attr->ia_ctime;
355da1eb7   Sage Weil   ceph: inode opera...
2093
2094
2095
  	}
  
  	release &= issued;
be655596b   Sage Weil   ceph: use i_ceph_...
2096
  	spin_unlock(&ci->i_ceph_lock);
604d1b024   Yan, Zheng   ceph: take snap_r...
2097
2098
  	if (lock_snap_rwsem)
  		up_read(&mdsc->snap_rwsem);
355da1eb7   Sage Weil   ceph: inode opera...
2099

fca65b4ad   Sage Weil   ceph: do not call...
2100
2101
  	if (inode_dirty_flags)
  		__mark_inode_dirty(inode, inode_dirty_flags);
7221fe4c2   Guangliang Zhao   ceph: add acl for...
2102

355da1eb7   Sage Weil   ceph: inode opera...
2103
  	if (mask) {
70b666c3b   Sage Weil   ceph: use ihold w...
2104
2105
  		req->r_inode = inode;
  		ihold(inode);
355da1eb7   Sage Weil   ceph: inode opera...
2106
2107
2108
  		req->r_inode_drop = release;
  		req->r_args.setattr.mask = cpu_to_le32(mask);
  		req->r_num_caps = 1;
4ca2fea6f   Yan, Zheng   ceph: unify inode...
2109
  		req->r_stamp = attr->ia_ctime;
752c8bdcf   Sage Weil   ceph: do not chai...
2110
  		err = ceph_mdsc_do_request(mdsc, NULL, req);
355da1eb7   Sage Weil   ceph: inode opera...
2111
2112
2113
2114
2115
2116
  	}
  	dout("setattr %p result=%d (%s locally, %d remote)
  ", inode, err,
  	     ceph_cap_string(dirtied), mask);
  
  	ceph_mdsc_put_request(req);
f66fd9f09   Yan, Zheng   ceph: pre-allocat...
2117
  	ceph_free_cap_flush(prealloc_cf);
8179a101e   Yan, Zheng   ceph: fix recursi...
2118
2119
2120
  
  	if (err >= 0 && (mask & CEPH_SETATTR_SIZE))
  		__ceph_do_pending_vmtruncate(inode);
355da1eb7   Sage Weil   ceph: inode opera...
2121
2122
2123
2124
  	return err;
  }
  
  /*
a26feccab   Andreas Gruenbacher   ceph: Get rid of ...
2125
2126
2127
2128
   * setattr
   */
  int ceph_setattr(struct dentry *dentry, struct iattr *attr)
  {
fd5472ed4   Jan Kara   ceph: Propagate d...
2129
2130
2131
2132
2133
  	struct inode *inode = d_inode(dentry);
  	int err;
  
  	if (ceph_snap(inode) != CEPH_NOSNAP)
  		return -EROFS;
31051c85b   Jan Kara   fs: Give dentry t...
2134
  	err = setattr_prepare(dentry, attr);
fd5472ed4   Jan Kara   ceph: Propagate d...
2135
2136
  	if (err != 0)
  		return err;
8179a101e   Yan, Zheng   ceph: fix recursi...
2137
2138
2139
2140
2141
2142
  	err = __ceph_setattr(inode, attr);
  
  	if (err >= 0 && (attr->ia_valid & ATTR_MODE))
  		err = posix_acl_chmod(inode, attr->ia_mode);
  
  	return err;
a26feccab   Andreas Gruenbacher   ceph: Get rid of ...
2143
2144
2145
  }
  
  /*
355da1eb7   Sage Weil   ceph: inode opera...
2146
2147
2148
   * Verify that we have a lease on the given mask.  If not,
   * do a getattr against an mds.
   */
01deead04   Yan, Zheng   ceph: use getattr...
2149
2150
  int __ceph_do_getattr(struct inode *inode, struct page *locked_page,
  		      int mask, bool force)
355da1eb7   Sage Weil   ceph: inode opera...
2151
  {
3d14c5d2b   Yehuda Sadeh   ceph: factor out ...
2152
2153
  	struct ceph_fs_client *fsc = ceph_sb_to_client(inode->i_sb);
  	struct ceph_mds_client *mdsc = fsc->mdsc;
355da1eb7   Sage Weil   ceph: inode opera...
2154
2155
2156
2157
2158
2159
2160
2161
  	struct ceph_mds_request *req;
  	int err;
  
  	if (ceph_snap(inode) == CEPH_SNAPDIR) {
  		dout("do_getattr inode %p SNAPDIR
  ", inode);
  		return 0;
  	}
01deead04   Yan, Zheng   ceph: use getattr...
2162
2163
2164
  	dout("do_getattr inode %p mask %s mode 0%o
  ",
  	     inode, ceph_cap_string(mask), inode->i_mode);
508b32d86   Yan, Zheng   ceph: request xat...
2165
  	if (!force && ceph_caps_issued_mask(ceph_inode(inode), mask, 1))
355da1eb7   Sage Weil   ceph: inode opera...
2166
2167
2168
2169
2170
  		return 0;
  
  	req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_GETATTR, USE_ANY_MDS);
  	if (IS_ERR(req))
  		return PTR_ERR(req);
70b666c3b   Sage Weil   ceph: use ihold w...
2171
2172
  	req->r_inode = inode;
  	ihold(inode);
355da1eb7   Sage Weil   ceph: inode opera...
2173
2174
  	req->r_num_caps = 1;
  	req->r_args.getattr.mask = cpu_to_le32(mask);
01deead04   Yan, Zheng   ceph: use getattr...
2175
  	req->r_locked_page = locked_page;
355da1eb7   Sage Weil   ceph: inode opera...
2176
  	err = ceph_mdsc_do_request(mdsc, NULL, req);
01deead04   Yan, Zheng   ceph: use getattr...
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
  	if (locked_page && err == 0) {
  		u64 inline_version = req->r_reply_info.targeti.inline_version;
  		if (inline_version == 0) {
  			/* the reply is supposed to contain inline data */
  			err = -EINVAL;
  		} else if (inline_version == CEPH_INLINE_NONE) {
  			err = -ENODATA;
  		} else {
  			err = req->r_reply_info.targeti.inline_len;
  		}
  	}
355da1eb7   Sage Weil   ceph: inode opera...
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
  	ceph_mdsc_put_request(req);
  	dout("do_getattr result=%d
  ", err);
  	return err;
  }
  
  
  /*
   * Check inode permissions.  We verify we have a valid value for
   * the AUTH cap, then call the generic handler.
   */
10556cb21   Al Viro   ->permission() sa...
2199
  int ceph_permission(struct inode *inode, int mask)
355da1eb7   Sage Weil   ceph: inode opera...
2200
  {
b74c79e99   Nick Piggin   fs: provide rcu-w...
2201
  	int err;
10556cb21   Al Viro   ->permission() sa...
2202
  	if (mask & MAY_NOT_BLOCK)
b74c79e99   Nick Piggin   fs: provide rcu-w...
2203
  		return -ECHILD;
508b32d86   Yan, Zheng   ceph: request xat...
2204
  	err = ceph_do_getattr(inode, CEPH_CAP_AUTH_SHARED, false);
355da1eb7   Sage Weil   ceph: inode opera...
2205
2206
  
  	if (!err)
2830ba7f3   Al Viro   ->permission() sa...
2207
  		err = generic_permission(inode, mask);
355da1eb7   Sage Weil   ceph: inode opera...
2208
2209
2210
2211
2212
2213
2214
  	return err;
  }
  
  /*
   * Get all attributes.  Hopefully somedata we'll have a statlite()
   * and can limit the fields we require to be accurate.
   */
a528d35e8   David Howells   statx: Add a syst...
2215
2216
  int ceph_getattr(const struct path *path, struct kstat *stat,
  		 u32 request_mask, unsigned int flags)
355da1eb7   Sage Weil   ceph: inode opera...
2217
  {
a528d35e8   David Howells   statx: Add a syst...
2218
  	struct inode *inode = d_inode(path->dentry);
232d4b013   Sage Weil   ceph: move direct...
2219
  	struct ceph_inode_info *ci = ceph_inode(inode);
355da1eb7   Sage Weil   ceph: inode opera...
2220
  	int err;
508b32d86   Yan, Zheng   ceph: request xat...
2221
  	err = ceph_do_getattr(inode, CEPH_STAT_CAP_INODE_ALL, false);
355da1eb7   Sage Weil   ceph: inode opera...
2222
2223
  	if (!err) {
  		generic_fillattr(inode, stat);
ad1fee96c   Yehuda Sadeh   ceph: add ino32 m...
2224
  		stat->ino = ceph_translate_ino(inode->i_sb, inode->i_ino);
355da1eb7   Sage Weil   ceph: inode opera...
2225
2226
2227
2228
  		if (ceph_snap(inode) != CEPH_NOSNAP)
  			stat->dev = ceph_snap(inode);
  		else
  			stat->dev = 0;
232d4b013   Sage Weil   ceph: move direct...
2229
  		if (S_ISDIR(inode->i_mode)) {
1c1266bb9   Yehuda Sadeh   ceph: fix getattr...
2230
2231
2232
2233
2234
  			if (ceph_test_mount_opt(ceph_sb_to_client(inode->i_sb),
  						RBYTES))
  				stat->size = ci->i_rbytes;
  			else
  				stat->size = ci->i_files + ci->i_subdirs;
232d4b013   Sage Weil   ceph: move direct...
2235
  			stat->blocks = 0;
355da1eb7   Sage Weil   ceph: inode opera...
2236
  			stat->blksize = 65536;
232d4b013   Sage Weil   ceph: move direct...
2237
  		}
355da1eb7   Sage Weil   ceph: inode opera...
2238
2239
2240
  	}
  	return err;
  }