Commit 6c0f3af72cb1622a66962a1180c36ef8c41be8e2

Authored by Sage Weil
1 parent 3c0eee3fe6

ceph: add dir_layout to inode

Add a ceph_dir_layout to the inode, and calculate dentry hash values based
on the parent directory's specified dir_hash function.  This is needed
because the old default Linux dcache hash function is extremely week and
leads to a poor distribution of files among dir fragments.

Signed-off-by: Sage Weil <sage@newdream.net>

Showing 6 changed files with 41 additions and 4 deletions Side-by-side Diff

... ... @@ -1216,6 +1216,26 @@
1216 1216 }
1217 1217 }
1218 1218  
  1219 +/*
  1220 + * Return name hash for a given dentry. This is dependent on
  1221 + * the parent directory's hash function.
  1222 + */
  1223 +unsigned ceph_dentry_hash(struct dentry *dn)
  1224 +{
  1225 + struct inode *dir = dn->d_parent->d_inode;
  1226 + struct ceph_inode_info *dci = ceph_inode(dir);
  1227 +
  1228 + switch (dci->i_dir_layout.dl_dir_hash) {
  1229 + case 0: /* for backward compat */
  1230 + case CEPH_STR_HASH_LINUX:
  1231 + return dn->d_name.hash;
  1232 +
  1233 + default:
  1234 + return ceph_str_hash(dci->i_dir_layout.dl_dir_hash,
  1235 + dn->d_name.name, dn->d_name.len);
  1236 + }
  1237 +}
  1238 +
1219 1239 const struct file_operations ceph_dir_fops = {
1220 1240 .read = ceph_read_dir,
1221 1241 .readdir = ceph_readdir,
... ... @@ -59,7 +59,7 @@
59 59 dout("encode_fh %p connectable\n", dentry);
60 60 cfh->ino = ceph_ino(dentry->d_inode);
61 61 cfh->parent_ino = ceph_ino(parent->d_inode);
62   - cfh->parent_name_hash = parent->d_name.hash;
  62 + cfh->parent_name_hash = ceph_dentry_hash(parent);
63 63 *max_len = connected_handle_length;
64 64 type = 2;
65 65 } else if (*max_len >= handle_length) {
... ... @@ -297,6 +297,8 @@
297 297 ci->i_release_count = 0;
298 298 ci->i_symlink = NULL;
299 299  
  300 + memset(&ci->i_dir_layout, 0, sizeof(ci->i_dir_layout));
  301 +
300 302 ci->i_fragtree = RB_ROOT;
301 303 mutex_init(&ci->i_fragtree_mutex);
302 304  
... ... @@ -239,6 +239,7 @@
239 239 unsigned i_ceph_flags;
240 240 unsigned long i_release_count;
241 241  
  242 + struct ceph_dir_layout i_dir_layout;
242 243 struct ceph_file_layout i_layout;
243 244 char *i_symlink;
244 245  
... ... @@ -768,6 +769,7 @@
768 769 extern void ceph_dentry_lru_touch(struct dentry *dn);
769 770 extern void ceph_dentry_lru_del(struct dentry *dn);
770 771 extern void ceph_invalidate_dentry_lease(struct dentry *dentry);
  772 +extern unsigned ceph_dentry_hash(struct dentry *dn);
771 773  
772 774 /*
773 775 * our d_ops vary depending on whether the inode is live,
include/linux/ceph/ceph_fs.h
... ... @@ -43,6 +43,10 @@
43 43 #define CEPH_FEATURE_NOSRCADDR (1<<1)
44 44 #define CEPH_FEATURE_MONCLOCKCHECK (1<<2)
45 45 #define CEPH_FEATURE_FLOCK (1<<3)
  46 +#define CEPH_FEATURE_SUBSCRIBE2 (1<<4)
  47 +#define CEPH_FEATURE_MONNAMES (1<<5)
  48 +#define CEPH_FEATURE_RECONNECT_SEQ (1<<6)
  49 +#define CEPH_FEATURE_DIRLAYOUTHASH (1<<7)
46 50  
47 51  
48 52 /*
49 53  
... ... @@ -55,10 +59,10 @@
55 59 __le32 fl_stripe_count; /* over this many objects */
56 60 __le32 fl_object_size; /* until objects are this big, then move to
57 61 new objects */
58   - __le32 fl_cas_hash; /* 0 = none; 1 = sha256 */
  62 + __le32 fl_cas_hash; /* UNUSED. 0 = none; 1 = sha256 */
59 63  
60 64 /* pg -> disk layout */
61   - __le32 fl_object_stripe_unit; /* for per-object parity, if any */
  65 + __le32 fl_object_stripe_unit; /* UNUSED. for per-object parity, if any */
62 66  
63 67 /* object -> pg layout */
64 68 __le32 fl_pg_preferred; /* preferred primary for pg (-1 for none) */
... ... @@ -69,6 +73,12 @@
69 73  
70 74 int ceph_file_layout_is_valid(const struct ceph_file_layout *layout);
71 75  
  76 +struct ceph_dir_layout {
  77 + __u8 dl_dir_hash; /* see ceph_hash.h for ids */
  78 + __u8 dl_unused1;
  79 + __u16 dl_unused2;
  80 + __u32 dl_unused3;
  81 +} __attribute__ ((packed));
72 82  
73 83 /* crypto algorithms */
74 84 #define CEPH_CRYPTO_NONE 0x0
... ... @@ -457,7 +467,7 @@
457 467 struct ceph_timespec rctime;
458 468 struct ceph_frag_tree_head fragtree; /* (must be at end of struct) */
459 469 } __attribute__ ((packed));
460   -/* followed by frag array, then symlink string, then xattr blob */
  470 +/* followed by frag array, symlink string, dir layout, xattr blob */
461 471  
462 472 /* reply_lease follows dname, and reply_inode */
463 473 struct ceph_mds_reply_lease {
net/ceph/ceph_hash.c
1 1  
2 2 #include <linux/ceph/types.h>
  3 +#include <linux/module.h>
3 4  
4 5 /*
5 6 * Robert Jenkin's hash function.
... ... @@ -104,6 +105,7 @@
104 105 return -1;
105 106 }
106 107 }
  108 +EXPORT_SYMBOL(ceph_str_hash);
107 109  
108 110 const char *ceph_str_hash_name(int type)
109 111 {
... ... @@ -116,4 +118,5 @@
116 118 return "unknown";
117 119 }
118 120 }
  121 +EXPORT_SYMBOL(ceph_str_hash_name);