Blame view

fs/ceph/super.h 26.2 KB
de57606c2   Sage Weil   ceph: client types
1
2
  #ifndef _FS_CEPH_SUPER_H
  #define _FS_CEPH_SUPER_H
3d14c5d2b   Yehuda Sadeh   ceph: factor out ...
3
  #include <linux/ceph/ceph_debug.h>
de57606c2   Sage Weil   ceph: client types
4
5
6
7
8
9
10
11
12
  
  #include <asm/unaligned.h>
  #include <linux/backing-dev.h>
  #include <linux/completion.h>
  #include <linux/exportfs.h>
  #include <linux/fs.h>
  #include <linux/mempool.h>
  #include <linux/pagemap.h>
  #include <linux/wait.h>
f1a3d5721   Stephen Rothwell   ceph: update for ...
13
  #include <linux/writeback.h>
5a0e3ad6a   Tejun Heo   include cleanup: ...
14
  #include <linux/slab.h>
de57606c2   Sage Weil   ceph: client types
15

3d14c5d2b   Yehuda Sadeh   ceph: factor out ...
16
  #include <linux/ceph/libceph.h>
de57606c2   Sage Weil   ceph: client types
17
18
19
20
21
22
23
24
  
  /* f_type in struct statfs */
  #define CEPH_SUPER_MAGIC 0x00c36400
  
  /* large granularity for statfs utilization stats to facilitate
   * large volume sizes on 32-bit machines. */
  #define CEPH_BLOCK_SHIFT   20  /* 1 MB */
  #define CEPH_BLOCK         (1 << CEPH_BLOCK_SHIFT)
3d14c5d2b   Yehuda Sadeh   ceph: factor out ...
25
26
27
  #define CEPH_MOUNT_OPT_DIRSTAT         (1<<4) /* `cat dirname` for stats */
  #define CEPH_MOUNT_OPT_RBYTES          (1<<5) /* dir st_bytes = rbytes */
  #define CEPH_MOUNT_OPT_NOASYNCREADDIR  (1<<7) /* no dcache readdir */
ad1fee96c   Yehuda Sadeh   ceph: add ino32 m...
28
  #define CEPH_MOUNT_OPT_INO32           (1<<8) /* 32 bit inos */
a40dc6cc2   Sage Weil   ceph: enable/disa...
29
  #define CEPH_MOUNT_OPT_DCACHE          (1<<9) /* use dcache for readdir etc */
6a2593823   Sage Weil   ceph: specify sup...
30

3d14c5d2b   Yehuda Sadeh   ceph: factor out ...
31
  #define CEPH_MOUNT_OPT_DEFAULT    (CEPH_MOUNT_OPT_RBYTES)
de57606c2   Sage Weil   ceph: client types
32

3d14c5d2b   Yehuda Sadeh   ceph: factor out ...
33
34
35
36
  #define ceph_set_mount_opt(fsc, opt) \
  	(fsc)->mount_options->flags |= CEPH_MOUNT_OPT_##opt;
  #define ceph_test_mount_opt(fsc, opt) \
  	(!!((fsc)->mount_options->flags & CEPH_MOUNT_OPT_##opt))
de57606c2   Sage Weil   ceph: client types
37

83817e35c   Sage Weil   ceph: rename rsiz...
38
39
  #define CEPH_RSIZE_DEFAULT             0           /* max read size */
  #define CEPH_RASIZE_DEFAULT            (8192*1024) /* readahead */
3d14c5d2b   Yehuda Sadeh   ceph: factor out ...
40
41
42
  #define CEPH_MAX_READDIR_DEFAULT        1024
  #define CEPH_MAX_READDIR_BYTES_DEFAULT  (512*1024)
  #define CEPH_SNAPDIRNAME_DEFAULT        ".snap"
de57606c2   Sage Weil   ceph: client types
43

3d14c5d2b   Yehuda Sadeh   ceph: factor out ...
44
  struct ceph_mount_options {
6e19a16ef   Sage Weil   ceph: clean up mo...
45
  	int flags;
3d14c5d2b   Yehuda Sadeh   ceph: factor out ...
46
  	int sb_flags;
83817e35c   Sage Weil   ceph: rename rsiz...
47
48
49
  	int wsize;            /* max write size */
  	int rsize;            /* max read size */
  	int rasize;           /* max readahead */
6e19a16ef   Sage Weil   ceph: clean up mo...
50
51
52
  	int congestion_kb;    /* max writeback in flight */
  	int caps_wanted_delay_min, caps_wanted_delay_max;
  	int cap_release_safety;
23804d91f   Sage Weil   ceph: specify max...
53
54
  	int max_readdir;       /* max readdir result (entires) */
  	int max_readdir_bytes; /* max readdir result (bytes) */
de57606c2   Sage Weil   ceph: client types
55

3d14c5d2b   Yehuda Sadeh   ceph: factor out ...
56
57
58
59
  	/*
  	 * everything above this point can be memcmp'd; everything below
  	 * is handled in compare_mount_options()
  	 */
de57606c2   Sage Weil   ceph: client types
60

3d14c5d2b   Yehuda Sadeh   ceph: factor out ...
61
62
  	char *snapdir_name;   /* default ".snap" */
  };
de57606c2   Sage Weil   ceph: client types
63

3d14c5d2b   Yehuda Sadeh   ceph: factor out ...
64
  struct ceph_fs_client {
de57606c2   Sage Weil   ceph: client types
65
  	struct super_block *sb;
3d14c5d2b   Yehuda Sadeh   ceph: factor out ...
66
67
  	struct ceph_mount_options *mount_options;
  	struct ceph_client *client;
de57606c2   Sage Weil   ceph: client types
68

3d14c5d2b   Yehuda Sadeh   ceph: factor out ...
69
  	unsigned long mount_state;
85ccce43a   Sage Weil   ceph: clean up re...
70
  	int min_caps;                  /* min caps i added */
3d14c5d2b   Yehuda Sadeh   ceph: factor out ...
71
  	struct ceph_mds_client *mdsc;
de57606c2   Sage Weil   ceph: client types
72
73
74
75
76
77
  
  	/* writeback */
  	mempool_t *wb_pagevec_pool;
  	struct workqueue_struct *wb_wq;
  	struct workqueue_struct *pg_inv_wq;
  	struct workqueue_struct *trunc_wq;
2baba2501   Yehuda Sadeh   ceph: writeback c...
78
  	atomic_long_t writeback_count;
de57606c2   Sage Weil   ceph: client types
79
80
  
  	struct backing_dev_info backing_dev_info;
0743304d8   Sage Weil   ceph: fix debugfs...
81
82
  
  #ifdef CONFIG_DEBUG_FS
3d14c5d2b   Yehuda Sadeh   ceph: factor out ...
83
  	struct dentry *debugfs_dentry_lru, *debugfs_caps;
2baba2501   Yehuda Sadeh   ceph: writeback c...
84
  	struct dentry *debugfs_congestion_kb;
06edf046d   Sage Weil   ceph: include lin...
85
  	struct dentry *debugfs_bdi;
3d14c5d2b   Yehuda Sadeh   ceph: factor out ...
86
  	struct dentry *debugfs_mdsc, *debugfs_mdsmap;
0743304d8   Sage Weil   ceph: fix debugfs...
87
  #endif
de57606c2   Sage Weil   ceph: client types
88
  };
3d14c5d2b   Yehuda Sadeh   ceph: factor out ...
89

de57606c2   Sage Weil   ceph: client types
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
  /*
   * File i/o capability.  This tracks shared state with the metadata
   * server that allows us to cache or writeback attributes or to read
   * and write data.  For any given inode, we should have one or more
   * capabilities, one issued by each metadata server, and our
   * cumulative access is the OR of all issued capabilities.
   *
   * Each cap is referenced by the inode's i_caps rbtree and by per-mds
   * session capability lists.
   */
  struct ceph_cap {
  	struct ceph_inode_info *ci;
  	struct rb_node ci_node;          /* per-ci cap tree */
  	struct ceph_mds_session *session;
  	struct list_head session_caps;   /* per-session caplist */
  	int mds;
  	u64 cap_id;       /* unique cap id (mds provided) */
  	int issued;       /* latest, from the mds */
  	int implemented;  /* implemented superset of issued (for revocation) */
  	int mds_wanted;
685f9a5d1   Sage Weil   ceph: do not conf...
110
111
  	u32 seq, issue_seq, mseq;
  	u32 cap_gen;      /* active/stale cycle */
de57606c2   Sage Weil   ceph: client types
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
  	unsigned long last_used;
  	struct list_head caps_item;
  };
  
  #define CHECK_CAPS_NODELAY    1  /* do not delay any further */
  #define CHECK_CAPS_AUTHONLY   2  /* only check auth cap */
  #define CHECK_CAPS_FLUSH      4  /* flush any dirty caps */
  
  /*
   * Snapped cap state that is pending flush to mds.  When a snapshot occurs,
   * we first complete any in-process sync writes and writeback any dirty
   * data before flushing the snapped state (tracked here) back to the MDS.
   */
  struct ceph_cap_snap {
  	atomic_t nref;
  	struct ceph_inode_info *ci;
  	struct list_head ci_item, flushing_item;
  
  	u64 follows, flush_tid;
  	int issued, dirty;
  	struct ceph_snap_context *context;
5706b27de   Al Viro   ceph: propagate u...
133
  	umode_t mode;
de57606c2   Sage Weil   ceph: client types
134
135
  	uid_t uid;
  	gid_t gid;
4a625be47   Sage Weil   ceph: include dir...
136
  	struct ceph_buffer *xattr_blob;
de57606c2   Sage Weil   ceph: client types
137
138
139
140
141
142
143
144
145
146
147
  	u64 xattr_version;
  
  	u64 size;
  	struct timespec mtime, atime, ctime;
  	u64 time_warp_seq;
  	int writing;   /* a sync write is still in progress */
  	int dirty_pages;     /* dirty pages awaiting writeback */
  };
  
  static inline void ceph_put_cap_snap(struct ceph_cap_snap *capsnap)
  {
4a625be47   Sage Weil   ceph: include dir...
148
149
150
  	if (atomic_dec_and_test(&capsnap->nref)) {
  		if (capsnap->xattr_blob)
  			ceph_buffer_put(capsnap->xattr_blob);
de57606c2   Sage Weil   ceph: client types
151
  		kfree(capsnap);
4a625be47   Sage Weil   ceph: include dir...
152
  	}
de57606c2   Sage Weil   ceph: client types
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
  }
  
  /*
   * The frag tree describes how a directory is fragmented, potentially across
   * multiple metadata servers.  It is also used to indicate points where
   * metadata authority is delegated, and whether/where metadata is replicated.
   *
   * A _leaf_ frag will be present in the i_fragtree IFF there is
   * delegation info.  That is, if mds >= 0 || ndist > 0.
   */
  #define CEPH_MAX_DIRFRAG_REP 4
  
  struct ceph_inode_frag {
  	struct rb_node node;
  
  	/* fragtree state */
  	u32 frag;
  	int split_by;         /* i.e. 2^(split_by) children */
  
  	/* delegation and replication info */
  	int mds;              /* -1 if same authority as parent */
  	int ndist;            /* >0 if replicated */
  	int dist[CEPH_MAX_DIRFRAG_REP];
  };
  
  /*
   * We cache inode xattrs as an encoded blob until they are first used,
   * at which point we parse them into an rbtree.
   */
  struct ceph_inode_xattr {
  	struct rb_node node;
  
  	const char *name;
  	int name_len;
  	const char *val;
  	int val_len;
  	int dirty;
  
  	int should_free_name;
  	int should_free_val;
  };
3d14c5d2b   Yehuda Sadeh   ceph: factor out ...
194
195
196
197
  /*
   * Ceph dentry state
   */
  struct ceph_dentry_info {
b58dc4100   Sage Weil   ceph: clear paren...
198
  	unsigned long flags;
3d14c5d2b   Yehuda Sadeh   ceph: factor out ...
199
200
201
202
203
204
205
206
207
  	struct ceph_mds_session *lease_session;
  	u32 lease_gen, lease_shared_gen;
  	u32 lease_seq;
  	unsigned long lease_renew_after, lease_renew_from;
  	struct list_head lru;
  	struct dentry *dentry;
  	u64 time;
  	u64 offset;
  };
b58dc4100   Sage Weil   ceph: clear paren...
208
209
210
211
212
213
  /*
   * dentry flags
   *
   * The locking for D_COMPLETE is a bit odd:
   *  - we can clear it at almost any time (see ceph_d_prune)
   *  - it is only meaningful if:
be655596b   Sage Weil   ceph: use i_ceph_...
214
   *    - we hold dir inode i_ceph_lock
b58dc4100   Sage Weil   ceph: clear paren...
215
216
217
218
   *    - we hold dir FILE_SHARED caps
   *    - the dentry D_COMPLETE is set
   */
  #define CEPH_D_COMPLETE 1  /* if set, d_u.d_subdirs is complete directory */
de57606c2   Sage Weil   ceph: client types
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
  struct ceph_inode_xattrs_info {
  	/*
  	 * (still encoded) xattr blob. we avoid the overhead of parsing
  	 * this until someone actually calls getxattr, etc.
  	 *
  	 * blob->vec.iov_len == 4 implies there are no xattrs; blob ==
  	 * NULL means we don't know.
  	*/
  	struct ceph_buffer *blob, *prealloc_blob;
  
  	struct rb_root index;
  	bool dirty;
  	int count;
  	int names_size;
  	int vals_size;
  	u64 version, index_version;
  };
  
  /*
   * Ceph inode.
   */
de57606c2   Sage Weil   ceph: client types
240
241
  struct ceph_inode_info {
  	struct ceph_vino i_vino;   /* ceph ino + snap */
be655596b   Sage Weil   ceph: use i_ceph_...
242
  	spinlock_t i_ceph_lock;
de57606c2   Sage Weil   ceph: client types
243
244
245
246
247
  	u64 i_version;
  	u32 i_time_warp_seq;
  
  	unsigned i_ceph_flags;
  	unsigned long i_release_count;
6c0f3af72   Sage Weil   ceph: add dir_lay...
248
  	struct ceph_dir_layout i_dir_layout;
de57606c2   Sage Weil   ceph: client types
249
250
251
252
253
254
255
  	struct ceph_file_layout i_layout;
  	char *i_symlink;
  
  	/* for dirs */
  	struct timespec i_rctime;
  	u64 i_rbytes, i_rfiles, i_rsubdirs;
  	u64 i_files, i_subdirs;
c6ffe1001   Sage Weil   ceph: use new D_C...
256
  	u64 i_max_offset;  /* largest readdir offset, set with D_COMPLETE */
de57606c2   Sage Weil   ceph: client types
257
258
259
260
261
  
  	struct rb_root i_fragtree;
  	struct mutex i_fragtree_mutex;
  
  	struct ceph_inode_xattrs_info i_xattrs;
be655596b   Sage Weil   ceph: use i_ceph_...
262
  	/* capabilities.  protected _both_ by i_ceph_lock and cap->session's
de57606c2   Sage Weil   ceph: client types
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
  	 * s_mutex. */
  	struct rb_root i_caps;           /* cap list */
  	struct ceph_cap *i_auth_cap;     /* authoritative cap, if any */
  	unsigned i_dirty_caps, i_flushing_caps;     /* mask of dirtied fields */
  	struct list_head i_dirty_item, i_flushing_item;
  	u64 i_cap_flush_seq;
  	/* we need to track cap writeback on a per-cap-bit basis, to allow
  	 * overlapping, pipelined cap flushes to the mds.  we can probably
  	 * reduce the tid to 8 bits if we're concerned about inode size. */
  	u16 i_cap_flush_last_tid, i_cap_flush_tid[CEPH_CAP_BITS];
  	wait_queue_head_t i_cap_wq;      /* threads waiting on a capability */
  	unsigned long i_hold_caps_min; /* jiffies */
  	unsigned long i_hold_caps_max; /* jiffies */
  	struct list_head i_cap_delay_list;  /* for delayed cap release to mds */
  	int i_cap_exporting_mds;         /* to handle cap migration between */
  	unsigned i_cap_exporting_mseq;   /*  mds's. */
  	unsigned i_cap_exporting_issued;
  	struct ceph_cap_reservation i_cap_migration_resv;
  	struct list_head i_cap_snaps;   /* snapped state pending flush to mds */
7d8cb26d7   Sage Weil   ceph: maintain i_...
282
283
  	struct ceph_snap_context *i_head_snapc;  /* set if wr_buffer_head > 0 or
  						    dirty|flushing caps */
de57606c2   Sage Weil   ceph: client types
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
  	unsigned i_snap_caps;           /* cap bits for snapped files */
  
  	int i_nr_by_mode[CEPH_FILE_MODE_NUM];  /* open file counts */
  
  	u32 i_truncate_seq;        /* last truncate to smaller size */
  	u64 i_truncate_size;       /*  and the size we last truncated down to */
  	int i_truncate_pending;    /*  still need to call vmtruncate */
  
  	u64 i_max_size;            /* max file size authorized by mds */
  	u64 i_reported_size; /* (max_)size reported to or requested of mds */
  	u64 i_wanted_max_size;     /* offset we'd like to write too */
  	u64 i_requested_max_size;  /* max_size we've requested */
  
  	/* held references to caps */
  	int i_pin_ref;
d3d0720d4   Henry C Chang   ceph: do not use ...
299
  	int i_rd_ref, i_rdcache_ref, i_wr_ref, i_wb_ref;
de57606c2   Sage Weil   ceph: client types
300
301
  	int i_wrbuffer_ref, i_wrbuffer_ref_head;
  	u32 i_shared_gen;       /* increment each time we get FILE_SHARED */
cd045cb42   Sage Weil   ceph: fix rdcache...
302
  	u32 i_rdcache_gen;      /* incremented each time we get FILE_CACHE. */
de57606c2   Sage Weil   ceph: client types
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
  	u32 i_rdcache_revoking; /* RDCACHE gen to async invalidate, if any */
  
  	struct list_head i_unsafe_writes; /* uncommitted sync writes */
  	struct list_head i_unsafe_dirops; /* uncommitted mds dir ops */
  	spinlock_t i_unsafe_lock;
  
  	struct ceph_snap_realm *i_snap_realm; /* snap realm (if caps) */
  	int i_snap_realm_counter; /* snap realm (if caps) */
  	struct list_head i_snap_realm_item;
  	struct list_head i_snap_flush_item;
  
  	struct work_struct i_wb_work;  /* writeback work */
  	struct work_struct i_pg_inv_work;  /* page invalidation work */
  
  	struct work_struct i_vmtruncate_work;
  
  	struct inode vfs_inode; /* at end */
  };
  
  static inline struct ceph_inode_info *ceph_inode(struct inode *inode)
  {
fbbccec9c   Noah Watkins   ceph: replace lis...
324
  	return container_of(inode, struct ceph_inode_info, vfs_inode);
de57606c2   Sage Weil   ceph: client types
325
  }
ad1fee96c   Yehuda Sadeh   ceph: add ino32 m...
326
327
328
329
330
331
332
333
334
  static inline struct ceph_fs_client *ceph_inode_to_client(struct inode *inode)
  {
  	return (struct ceph_fs_client *)inode->i_sb->s_fs_info;
  }
  
  static inline struct ceph_fs_client *ceph_sb_to_client(struct super_block *sb)
  {
  	return (struct ceph_fs_client *)sb->s_fs_info;
  }
3d14c5d2b   Yehuda Sadeh   ceph: factor out ...
335
336
337
338
339
340
341
342
  static inline struct ceph_vino ceph_vino(struct inode *inode)
  {
  	return ceph_inode(inode)->i_vino;
  }
  
  /*
   * ino_t is <64 bits on many architectures, blech.
   *
ad1fee96c   Yehuda Sadeh   ceph: add ino32 m...
343
344
345
346
347
   *               i_ino (kernel inode)   st_ino (userspace)
   * i386          32                     32
   * x86_64+ino32  64                     32
   * x86_64        64                     64
   */
3310f7541   Amon Ott   ceph: fix 32-bit ...
348
  static inline u32 ceph_ino_to_ino32(__u64 vino)
ad1fee96c   Yehuda Sadeh   ceph: add ino32 m...
349
  {
3310f7541   Amon Ott   ceph: fix 32-bit ...
350
351
  	u32 ino = vino & 0xffffffff;
  	ino ^= vino >> 32;
ad1fee96c   Yehuda Sadeh   ceph: add ino32 m...
352
353
354
355
356
357
358
  	if (!ino)
  		ino = 1;
  	return ino;
  }
  
  /*
   * kernel i_ino value
3d14c5d2b   Yehuda Sadeh   ceph: factor out ...
359
360
361
   */
  static inline ino_t ceph_vino_to_ino(struct ceph_vino vino)
  {
3d14c5d2b   Yehuda Sadeh   ceph: factor out ...
362
  #if BITS_PER_LONG == 32
3310f7541   Amon Ott   ceph: fix 32-bit ...
363
364
365
  	return ceph_ino_to_ino32(vino.ino);
  #else
  	return (ino_t)vino.ino;
3d14c5d2b   Yehuda Sadeh   ceph: factor out ...
366
  #endif
3d14c5d2b   Yehuda Sadeh   ceph: factor out ...
367
  }
ad1fee96c   Yehuda Sadeh   ceph: add ino32 m...
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
  /*
   * user-visible ino (stat, filldir)
   */
  #if BITS_PER_LONG == 32
  static inline ino_t ceph_translate_ino(struct super_block *sb, ino_t ino)
  {
  	return ino;
  }
  #else
  static inline ino_t ceph_translate_ino(struct super_block *sb, ino_t ino)
  {
  	if (ceph_test_mount_opt(ceph_sb_to_client(sb), INO32))
  		ino = ceph_ino_to_ino32(ino);
  	return ino;
  }
  #endif
3d14c5d2b   Yehuda Sadeh   ceph: factor out ...
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
  /* for printf-style formatting */
  #define ceph_vinop(i) ceph_inode(i)->i_vino.ino, ceph_inode(i)->i_vino.snap
  
  static inline u64 ceph_ino(struct inode *inode)
  {
  	return ceph_inode(inode)->i_vino.ino;
  }
  static inline u64 ceph_snap(struct inode *inode)
  {
  	return ceph_inode(inode)->i_vino.snap;
  }
  
  static inline int ceph_ino_compare(struct inode *inode, void *data)
  {
  	struct ceph_vino *pvino = (struct ceph_vino *)data;
  	struct ceph_inode_info *ci = ceph_inode(inode);
  	return ci->i_vino.ino == pvino->ino &&
  		ci->i_vino.snap == pvino->snap;
  }
  
  static inline struct inode *ceph_find_inode(struct super_block *sb,
  					    struct ceph_vino vino)
  {
  	ino_t t = ceph_vino_to_ino(vino);
  	return ilookup5(sb, t, ceph_ino_compare, &vino);
  }
  
  
  /*
   * Ceph inode.
   */
3d14c5d2b   Yehuda Sadeh   ceph: factor out ...
415
416
417
  #define CEPH_I_NODELAY   4  /* do not delay cap release */
  #define CEPH_I_FLUSH     8  /* do not delay flush of dirty metadata */
  #define CEPH_I_NOFLUSH  16  /* do not flush dirty caps */
de57606c2   Sage Weil   ceph: client types
418
419
420
  static inline void ceph_i_clear(struct inode *inode, unsigned mask)
  {
  	struct ceph_inode_info *ci = ceph_inode(inode);
be655596b   Sage Weil   ceph: use i_ceph_...
421
  	spin_lock(&ci->i_ceph_lock);
de57606c2   Sage Weil   ceph: client types
422
  	ci->i_ceph_flags &= ~mask;
be655596b   Sage Weil   ceph: use i_ceph_...
423
  	spin_unlock(&ci->i_ceph_lock);
de57606c2   Sage Weil   ceph: client types
424
425
426
427
428
  }
  
  static inline void ceph_i_set(struct inode *inode, unsigned mask)
  {
  	struct ceph_inode_info *ci = ceph_inode(inode);
be655596b   Sage Weil   ceph: use i_ceph_...
429
  	spin_lock(&ci->i_ceph_lock);
de57606c2   Sage Weil   ceph: client types
430
  	ci->i_ceph_flags |= mask;
be655596b   Sage Weil   ceph: use i_ceph_...
431
  	spin_unlock(&ci->i_ceph_lock);
de57606c2   Sage Weil   ceph: client types
432
433
434
435
436
437
  }
  
  static inline bool ceph_i_test(struct inode *inode, unsigned mask)
  {
  	struct ceph_inode_info *ci = ceph_inode(inode);
  	bool r;
be655596b   Sage Weil   ceph: use i_ceph_...
438
  	spin_lock(&ci->i_ceph_lock);
de57606c2   Sage Weil   ceph: client types
439
  	r = (ci->i_ceph_flags & mask) == mask;
be655596b   Sage Weil   ceph: use i_ceph_...
440
  	spin_unlock(&ci->i_ceph_lock);
de57606c2   Sage Weil   ceph: client types
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
  	return r;
  }
  
  
  /* find a specific frag @f */
  extern struct ceph_inode_frag *__ceph_find_frag(struct ceph_inode_info *ci,
  						u32 f);
  
  /*
   * choose fragment for value @v.  copy frag content to pfrag, if leaf
   * exists
   */
  extern u32 ceph_choose_frag(struct ceph_inode_info *ci, u32 v,
  			    struct ceph_inode_frag *pfrag,
  			    int *found);
de57606c2   Sage Weil   ceph: client types
456
457
458
459
460
461
462
463
464
  static inline struct ceph_dentry_info *ceph_dentry(struct dentry *dentry)
  {
  	return (struct ceph_dentry_info *)dentry->d_fsdata;
  }
  
  static inline loff_t ceph_make_fpos(unsigned frag, unsigned off)
  {
  	return ((loff_t)frag << 32) | (loff_t)off;
  }
de57606c2   Sage Weil   ceph: client types
465
  /*
c6ffe1001   Sage Weil   ceph: use new D_C...
466
467
468
469
470
471
472
   * set/clear directory D_COMPLETE flag
   */
  void ceph_dir_set_complete(struct inode *inode);
  void ceph_dir_clear_complete(struct inode *inode);
  bool ceph_dir_test_complete(struct inode *inode);
  
  /*
de57606c2   Sage Weil   ceph: client types
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
   * caps helpers
   */
  static inline bool __ceph_is_any_real_caps(struct ceph_inode_info *ci)
  {
  	return !RB_EMPTY_ROOT(&ci->i_caps);
  }
  
  extern int __ceph_caps_issued(struct ceph_inode_info *ci, int *implemented);
  extern int __ceph_caps_issued_mask(struct ceph_inode_info *ci, int mask, int t);
  extern int __ceph_caps_issued_other(struct ceph_inode_info *ci,
  				    struct ceph_cap *cap);
  
  static inline int ceph_caps_issued(struct ceph_inode_info *ci)
  {
  	int issued;
be655596b   Sage Weil   ceph: use i_ceph_...
488
  	spin_lock(&ci->i_ceph_lock);
de57606c2   Sage Weil   ceph: client types
489
  	issued = __ceph_caps_issued(ci, NULL);
be655596b   Sage Weil   ceph: use i_ceph_...
490
  	spin_unlock(&ci->i_ceph_lock);
de57606c2   Sage Weil   ceph: client types
491
492
493
494
495
496
497
  	return issued;
  }
  
  static inline int ceph_caps_issued_mask(struct ceph_inode_info *ci, int mask,
  					int touch)
  {
  	int r;
be655596b   Sage Weil   ceph: use i_ceph_...
498
  	spin_lock(&ci->i_ceph_lock);
de57606c2   Sage Weil   ceph: client types
499
  	r = __ceph_caps_issued_mask(ci, mask, touch);
be655596b   Sage Weil   ceph: use i_ceph_...
500
  	spin_unlock(&ci->i_ceph_lock);
de57606c2   Sage Weil   ceph: client types
501
502
503
504
505
506
507
  	return r;
  }
  
  static inline int __ceph_caps_dirty(struct ceph_inode_info *ci)
  {
  	return ci->i_dirty_caps | ci->i_flushing_caps;
  }
fca65b4ad   Sage Weil   ceph: do not call...
508
  extern int __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask);
de57606c2   Sage Weil   ceph: client types
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
  
  extern int ceph_caps_revoking(struct ceph_inode_info *ci, int mask);
  extern int __ceph_caps_used(struct ceph_inode_info *ci);
  
  extern int __ceph_caps_file_wanted(struct ceph_inode_info *ci);
  
  /*
   * wanted, by virtue of open file modes AND cap refs (buffered/cached data)
   */
  static inline int __ceph_caps_wanted(struct ceph_inode_info *ci)
  {
  	int w = __ceph_caps_file_wanted(ci) | __ceph_caps_used(ci);
  	if (w & CEPH_CAP_FILE_BUFFER)
  		w |= CEPH_CAP_FILE_EXCL;  /* we want EXCL if dirty data */
  	return w;
  }
  
  /* what the mds thinks we want */
  extern int __ceph_caps_mds_wanted(struct ceph_inode_info *ci);
37151668b   Yehuda Sadeh   ceph: do caps acc...
528
529
530
531
532
533
534
  extern void ceph_caps_init(struct ceph_mds_client *mdsc);
  extern void ceph_caps_finalize(struct ceph_mds_client *mdsc);
  extern void ceph_adjust_min_caps(struct ceph_mds_client *mdsc, int delta);
  extern int ceph_reserve_caps(struct ceph_mds_client *mdsc,
  			     struct ceph_cap_reservation *ctx, int need);
  extern int ceph_unreserve_caps(struct ceph_mds_client *mdsc,
  			       struct ceph_cap_reservation *ctx);
3d14c5d2b   Yehuda Sadeh   ceph: factor out ...
535
  extern void ceph_reservation_status(struct ceph_fs_client *client,
de57606c2   Sage Weil   ceph: client types
536
  				    int *total, int *avail, int *used,
85ccce43a   Sage Weil   ceph: clean up re...
537
  				    int *reserved, int *min);
de57606c2   Sage Weil   ceph: client types
538

de57606c2   Sage Weil   ceph: client types
539

de57606c2   Sage Weil   ceph: client types
540
541
542
543
  
  /*
   * we keep buffered readdir results attached to file->private_data
   */
4918b6d14   Sage Weil   ceph: add F_SYNC ...
544
  #define CEPH_F_SYNC     1
9cfa1098d   Sage Weil   ceph: use flag bi...
545
  #define CEPH_F_ATEND    2
4918b6d14   Sage Weil   ceph: add F_SYNC ...
546

de57606c2   Sage Weil   ceph: client types
547
  struct ceph_file_info {
252c6728d   Sage Weil   ceph: add flags f...
548
549
  	short fmode;     /* initialized on open */
  	short flags;     /* CEPH_F_* */
de57606c2   Sage Weil   ceph: client types
550
551
552
553
  
  	/* readdir: position within the dir */
  	u32 frag;
  	struct ceph_mds_request *last_readdir;
de57606c2   Sage Weil   ceph: client types
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
  
  	/* readdir: position within a frag */
  	unsigned offset;       /* offset of last chunk, adjusted for . and .. */
  	u64 next_offset;       /* offset of next chunk (last_name's + 1) */
  	char *last_name;       /* last entry in previous chunk */
  	struct dentry *dentry; /* next dentry (for dcache readdir) */
  	unsigned long dir_release_count;
  
  	/* used for -o dirstat read() on directory thing */
  	char *dir_info;
  	int dir_info_len;
  };
  
  
  
  /*
de57606c2   Sage Weil   ceph: client types
570
571
572
573
574
575
576
577
578
579
580
   * A "snap realm" describes a subset of the file hierarchy sharing
   * the same set of snapshots that apply to it.  The realms themselves
   * are organized into a hierarchy, such that children inherit (some of)
   * the snapshots of their parents.
   *
   * All inodes within the realm that have capabilities are linked into a
   * per-realm list.
   */
  struct ceph_snap_realm {
  	u64 ino;
  	atomic_t nref;
a105f00cf   Sage Weil   ceph: use rbtree ...
581
  	struct rb_node node;
de57606c2   Sage Weil   ceph: client types
582
583
584
585
586
587
588
589
590
591
592
593
594
595
  	u64 created, seq;
  	u64 parent_ino;
  	u64 parent_since;   /* snapid when our current parent became so */
  
  	u64 *prior_parent_snaps;      /* snaps inherited from any parents we */
  	int num_prior_parent_snaps;   /*  had prior to parent_since */
  	u64 *snaps;                   /* snaps specific to this realm */
  	int num_snaps;
  
  	struct ceph_snap_realm *parent;
  	struct list_head children;       /* list of child realms */
  	struct list_head child_item;
  
  	struct list_head empty_item;     /* if i have ref==0 */
ae00d4f37   Sage Weil   ceph: fix cap_sna...
596
  	struct list_head dirty_item;     /* if realm needs new context */
de57606c2   Sage Weil   ceph: client types
597
598
599
600
601
602
  	/* the current set of snaps for this realm */
  	struct ceph_snap_context *cached_context;
  
  	struct list_head inodes_with_caps;
  	spinlock_t inodes_with_caps_lock;
  };
3d14c5d2b   Yehuda Sadeh   ceph: factor out ...
603
  static inline int default_congestion_kb(void)
de57606c2   Sage Weil   ceph: client types
604
  {
3d14c5d2b   Yehuda Sadeh   ceph: factor out ...
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
  	int congestion_kb;
  
  	/*
  	 * Copied from NFS
  	 *
  	 * congestion size, scale with available memory.
  	 *
  	 *  64MB:    8192k
  	 * 128MB:   11585k
  	 * 256MB:   16384k
  	 * 512MB:   23170k
  	 *   1GB:   32768k
  	 *   2GB:   46340k
  	 *   4GB:   65536k
  	 *   8GB:   92681k
  	 *  16GB:  131072k
  	 *
  	 * This allows larger machines to have larger/more transfers.
  	 * Limit the default to 256M
  	 */
  	congestion_kb = (16*int_sqrt(totalram_pages)) << (PAGE_SHIFT-10);
  	if (congestion_kb > 256*1024)
  		congestion_kb = 256*1024;
  
  	return congestion_kb;
de57606c2   Sage Weil   ceph: client types
630
631
632
633
634
635
636
637
638
639
640
641
642
643
  }
  
  
  
  /* snap.c */
  struct ceph_snap_realm *ceph_lookup_snap_realm(struct ceph_mds_client *mdsc,
  					       u64 ino);
  extern void ceph_get_snap_realm(struct ceph_mds_client *mdsc,
  				struct ceph_snap_realm *realm);
  extern void ceph_put_snap_realm(struct ceph_mds_client *mdsc,
  				struct ceph_snap_realm *realm);
  extern int ceph_update_snap_trace(struct ceph_mds_client *m,
  				  void *p, void *e, bool deletion);
  extern void ceph_handle_snap(struct ceph_mds_client *mdsc,
2600d2dd5   Sage Weil   ceph: drop messag...
644
  			     struct ceph_mds_session *session,
de57606c2   Sage Weil   ceph: client types
645
  			     struct ceph_msg *msg);
fc837c8f0   Sage Weil   ceph: queue_cap_s...
646
  extern void ceph_queue_cap_snap(struct ceph_inode_info *ci);
de57606c2   Sage Weil   ceph: client types
647
648
649
650
651
652
653
654
655
656
657
658
659
660
  extern int __ceph_finish_cap_snap(struct ceph_inode_info *ci,
  				  struct ceph_cap_snap *capsnap);
  extern void ceph_cleanup_empty_realms(struct ceph_mds_client *mdsc);
  
  /*
   * a cap_snap is "pending" if it is still awaiting an in-progress
   * sync write (that may/may not still update size, mtime, etc.).
   */
  static inline bool __ceph_have_pending_cap_snap(struct ceph_inode_info *ci)
  {
  	return !list_empty(&ci->i_cap_snaps) &&
  		list_entry(ci->i_cap_snaps.prev, struct ceph_cap_snap,
  			   ci_item)->writing;
  }
de57606c2   Sage Weil   ceph: client types
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
  /* inode.c */
  extern const struct inode_operations ceph_file_iops;
  
  extern struct inode *ceph_alloc_inode(struct super_block *sb);
  extern void ceph_destroy_inode(struct inode *inode);
  
  extern struct inode *ceph_get_inode(struct super_block *sb,
  				    struct ceph_vino vino);
  extern struct inode *ceph_get_snapdir(struct inode *parent);
  extern int ceph_fill_file_size(struct inode *inode, int issued,
  			       u32 truncate_seq, u64 truncate_size, u64 size);
  extern void ceph_fill_file_time(struct inode *inode, int issued,
  				u64 time_warp_seq, struct timespec *ctime,
  				struct timespec *mtime, struct timespec *atime);
  extern int ceph_fill_trace(struct super_block *sb,
  			   struct ceph_mds_request *req,
  			   struct ceph_mds_session *session);
  extern int ceph_readdir_prepopulate(struct ceph_mds_request *req,
  				    struct ceph_mds_session *session);
  
  extern int ceph_inode_holds_cap(struct inode *inode, int mask);
  
  extern int ceph_inode_set_size(struct inode *inode, loff_t size);
de57606c2   Sage Weil   ceph: client types
684
  extern void __ceph_do_pending_vmtruncate(struct inode *inode);
3c6f6b79a   Sage Weil   ceph: cleanup asy...
685
686
687
688
  extern void ceph_queue_vmtruncate(struct inode *inode);
  
  extern void ceph_queue_invalidate(struct inode *inode);
  extern void ceph_queue_writeback(struct inode *inode);
de57606c2   Sage Weil   ceph: client types
689
690
  
  extern int ceph_do_getattr(struct inode *inode, int mask);
10556cb21   Al Viro   ->permission() sa...
691
  extern int ceph_permission(struct inode *inode, int mask);
de57606c2   Sage Weil   ceph: client types
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
  extern int ceph_setattr(struct dentry *dentry, struct iattr *attr);
  extern int ceph_getattr(struct vfsmount *mnt, struct dentry *dentry,
  			struct kstat *stat);
  
  /* xattr.c */
  extern int ceph_setxattr(struct dentry *, const char *, const void *,
  			 size_t, int);
  extern ssize_t ceph_getxattr(struct dentry *, const char *, void *, size_t);
  extern ssize_t ceph_listxattr(struct dentry *, char *, size_t);
  extern int ceph_removexattr(struct dentry *, const char *);
  extern void __ceph_build_xattrs_blob(struct ceph_inode_info *ci);
  extern void __ceph_destroy_xattrs(struct ceph_inode_info *ci);
  
  /* caps.c */
  extern const char *ceph_cap_string(int c);
  extern void ceph_handle_caps(struct ceph_mds_session *session,
  			     struct ceph_msg *msg);
  extern int ceph_add_cap(struct inode *inode,
  			struct ceph_mds_session *session, u64 cap_id,
  			int fmode, unsigned issued, unsigned wanted,
  			unsigned cap, unsigned seq, u64 realmino, int flags,
  			struct ceph_cap_reservation *caps_reservation);
7c1332b8c   Sage Weil   ceph: fix iterate...
714
  extern void __ceph_remove_cap(struct ceph_cap *cap);
de57606c2   Sage Weil   ceph: client types
715
716
  static inline void ceph_remove_cap(struct ceph_cap *cap)
  {
be655596b   Sage Weil   ceph: use i_ceph_...
717
  	spin_lock(&cap->ci->i_ceph_lock);
7c1332b8c   Sage Weil   ceph: fix iterate...
718
  	__ceph_remove_cap(cap);
be655596b   Sage Weil   ceph: use i_ceph_...
719
  	spin_unlock(&cap->ci->i_ceph_lock);
de57606c2   Sage Weil   ceph: client types
720
  }
37151668b   Yehuda Sadeh   ceph: do caps acc...
721
722
  extern void ceph_put_cap(struct ceph_mds_client *mdsc,
  			 struct ceph_cap *cap);
de57606c2   Sage Weil   ceph: client types
723
724
  
  extern void ceph_queue_caps_release(struct inode *inode);
f1a3d5721   Stephen Rothwell   ceph: update for ...
725
  extern int ceph_write_inode(struct inode *inode, struct writeback_control *wbc);
02c24a821   Josef Bacik   fs: push i_mutex ...
726
727
  extern int ceph_fsync(struct file *file, loff_t start, loff_t end,
  		      int datasync);
de57606c2   Sage Weil   ceph: client types
728
729
  extern void ceph_kick_flushing_caps(struct ceph_mds_client *mdsc,
  				    struct ceph_mds_session *session);
2bc50259f   Greg Farnum   ceph: add ceph_ge...
730
731
  extern struct ceph_cap *ceph_get_cap_for_mds(struct ceph_inode_info *ci,
  					     int mds);
de57606c2   Sage Weil   ceph: client types
732
733
734
735
736
737
  extern int ceph_get_cap_mds(struct inode *inode);
  extern void ceph_get_cap_refs(struct ceph_inode_info *ci, int caps);
  extern void ceph_put_cap_refs(struct ceph_inode_info *ci, int had);
  extern void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr,
  				       struct ceph_snap_context *snapc);
  extern void __ceph_flush_snaps(struct ceph_inode_info *ci,
e835124c2   Sage Weil   ceph: only send o...
738
739
  			       struct ceph_mds_session **psession,
  			       int again);
de57606c2   Sage Weil   ceph: client types
740
741
  extern void ceph_check_caps(struct ceph_inode_info *ci, int flags,
  			    struct ceph_mds_session *session);
afcdaea3f   Sage Weil   ceph: flush dirty...
742
743
  extern void ceph_check_delayed_caps(struct ceph_mds_client *mdsc);
  extern void ceph_flush_dirty_caps(struct ceph_mds_client *mdsc);
de57606c2   Sage Weil   ceph: client types
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
  
  extern int ceph_encode_inode_release(void **p, struct inode *inode,
  				     int mds, int drop, int unless, int force);
  extern int ceph_encode_dentry_release(void **p, struct dentry *dn,
  				      int mds, int drop, int unless);
  
  extern int ceph_get_caps(struct ceph_inode_info *ci, int need, int want,
  			 int *got, loff_t endoff);
  
  /* for counting open files by mode */
  static inline void __ceph_get_fmode(struct ceph_inode_info *ci, int mode)
  {
  	ci->i_nr_by_mode[mode]++;
  }
  extern void ceph_put_fmode(struct ceph_inode_info *ci, int mode);
  
  /* addr.c */
  extern const struct address_space_operations ceph_aops;
  extern int ceph_mmap(struct file *file, struct vm_area_struct *vma);
  
  /* file.c */
  extern const struct file_operations ceph_file_fops;
  extern const struct address_space_operations ceph_aops;
3d14c5d2b   Yehuda Sadeh   ceph: factor out ...
767
768
769
770
771
772
773
  extern int ceph_copy_to_page_vector(struct page **pages,
  				    const char *data,
  				    loff_t off, size_t len);
  extern int ceph_copy_from_page_vector(struct page **pages,
  				    char *data,
  				    loff_t off, size_t len);
  extern struct page **ceph_alloc_page_vector(int num_pages, gfp_t flags);
de57606c2   Sage Weil   ceph: client types
774
775
776
777
778
  extern int ceph_open(struct inode *inode, struct file *file);
  extern struct dentry *ceph_lookup_open(struct inode *dir, struct dentry *dentry,
  				       struct nameidata *nd, int mode,
  				       int locked_dir);
  extern int ceph_release(struct inode *inode, struct file *filp);
de57606c2   Sage Weil   ceph: client types
779
780
781
782
  
  /* dir.c */
  extern const struct file_operations ceph_dir_fops;
  extern const struct inode_operations ceph_dir_iops;
52dfb8ac0   Sage Weil   ceph: constify de...
783
  extern const struct dentry_operations ceph_dentry_ops, ceph_snap_dentry_ops,
de57606c2   Sage Weil   ceph: client types
784
785
786
  	ceph_snapdir_dentry_ops;
  
  extern int ceph_handle_notrace_create(struct inode *dir, struct dentry *dentry);
468640e32   Sage Weil   ceph: fix ceph_lo...
787
788
  extern int ceph_handle_snapdir(struct ceph_mds_request *req,
  			       struct dentry *dentry, int err);
de57606c2   Sage Weil   ceph: client types
789
790
791
792
793
794
  extern struct dentry *ceph_finish_lookup(struct ceph_mds_request *req,
  					 struct dentry *dentry, int err);
  
  extern void ceph_dentry_lru_add(struct dentry *dn);
  extern void ceph_dentry_lru_touch(struct dentry *dn);
  extern void ceph_dentry_lru_del(struct dentry *dn);
81a6cf2d3   Sage Weil   ceph: invalidate ...
795
  extern void ceph_invalidate_dentry_lease(struct dentry *dentry);
e5f86dc37   Sage Weil   ceph: avoid d_par...
796
  extern unsigned ceph_dentry_hash(struct inode *dir, struct dentry *dn);
5f21c96dd   Sage Weil   ceph: protect acc...
797
  extern struct inode *ceph_get_dentry_parent_inode(struct dentry *dentry);
de57606c2   Sage Weil   ceph: client types
798
799
800
801
802
803
804
805
806
807
808
809
810
  
  /*
   * our d_ops vary depending on whether the inode is live,
   * snapshotted (read-only), or a virtual ".snap" directory.
   */
  int ceph_init_dentry(struct dentry *dentry);
  
  
  /* ioctl.c */
  extern long ceph_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
  
  /* export.c */
  extern const struct export_operations ceph_export_ops;
40819f6fb   Greg Farnum   ceph: add flock/f...
811
812
813
814
815
816
817
  /* locks.c */
  extern int ceph_lock(struct file *file, int cmd, struct file_lock *fl);
  extern int ceph_flock(struct file *file, int cmd, struct file_lock *fl);
  extern void ceph_count_locks(struct inode *inode, int *p_num, int *f_num);
  extern int ceph_encode_locks(struct inode *i, struct ceph_pagelist *p,
  			     int p_locks, int f_locks);
  extern int lock_to_ceph_filelock(struct file_lock *fl, struct ceph_filelock *c);
3d14c5d2b   Yehuda Sadeh   ceph: factor out ...
818
819
820
  /* debugfs.c */
  extern int ceph_fs_debugfs_init(struct ceph_fs_client *client);
  extern void ceph_fs_debugfs_cleanup(struct ceph_fs_client *client);
de57606c2   Sage Weil   ceph: client types
821
  #endif /* _FS_CEPH_SUPER_H */