Commit 6c0f3af72cb1622a66962a1180c36ef8c41be8e2
1 parent
3c0eee3fe6
Exists in
master
and in
4 other branches
ceph: add dir_layout to inode
Add a ceph_dir_layout to the inode, and calculate dentry hash values based on the parent directory's specified dir_hash function. This is needed because the old default Linux dcache hash function is extremely week and leads to a poor distribution of files among dir fragments. Signed-off-by: Sage Weil <sage@newdream.net>
Showing 6 changed files with 41 additions and 4 deletions Side-by-side Diff
fs/ceph/dir.c
... | ... | @@ -1216,6 +1216,26 @@ |
1216 | 1216 | } |
1217 | 1217 | } |
1218 | 1218 | |
1219 | +/* | |
1220 | + * Return name hash for a given dentry. This is dependent on | |
1221 | + * the parent directory's hash function. | |
1222 | + */ | |
1223 | +unsigned ceph_dentry_hash(struct dentry *dn) | |
1224 | +{ | |
1225 | + struct inode *dir = dn->d_parent->d_inode; | |
1226 | + struct ceph_inode_info *dci = ceph_inode(dir); | |
1227 | + | |
1228 | + switch (dci->i_dir_layout.dl_dir_hash) { | |
1229 | + case 0: /* for backward compat */ | |
1230 | + case CEPH_STR_HASH_LINUX: | |
1231 | + return dn->d_name.hash; | |
1232 | + | |
1233 | + default: | |
1234 | + return ceph_str_hash(dci->i_dir_layout.dl_dir_hash, | |
1235 | + dn->d_name.name, dn->d_name.len); | |
1236 | + } | |
1237 | +} | |
1238 | + | |
1219 | 1239 | const struct file_operations ceph_dir_fops = { |
1220 | 1240 | .read = ceph_read_dir, |
1221 | 1241 | .readdir = ceph_readdir, |
fs/ceph/export.c
... | ... | @@ -59,7 +59,7 @@ |
59 | 59 | dout("encode_fh %p connectable\n", dentry); |
60 | 60 | cfh->ino = ceph_ino(dentry->d_inode); |
61 | 61 | cfh->parent_ino = ceph_ino(parent->d_inode); |
62 | - cfh->parent_name_hash = parent->d_name.hash; | |
62 | + cfh->parent_name_hash = ceph_dentry_hash(parent); | |
63 | 63 | *max_len = connected_handle_length; |
64 | 64 | type = 2; |
65 | 65 | } else if (*max_len >= handle_length) { |
fs/ceph/inode.c
fs/ceph/super.h
... | ... | @@ -239,6 +239,7 @@ |
239 | 239 | unsigned i_ceph_flags; |
240 | 240 | unsigned long i_release_count; |
241 | 241 | |
242 | + struct ceph_dir_layout i_dir_layout; | |
242 | 243 | struct ceph_file_layout i_layout; |
243 | 244 | char *i_symlink; |
244 | 245 | |
... | ... | @@ -768,6 +769,7 @@ |
768 | 769 | extern void ceph_dentry_lru_touch(struct dentry *dn); |
769 | 770 | extern void ceph_dentry_lru_del(struct dentry *dn); |
770 | 771 | extern void ceph_invalidate_dentry_lease(struct dentry *dentry); |
772 | +extern unsigned ceph_dentry_hash(struct dentry *dn); | |
771 | 773 | |
772 | 774 | /* |
773 | 775 | * our d_ops vary depending on whether the inode is live, |
include/linux/ceph/ceph_fs.h
... | ... | @@ -43,6 +43,10 @@ |
43 | 43 | #define CEPH_FEATURE_NOSRCADDR (1<<1) |
44 | 44 | #define CEPH_FEATURE_MONCLOCKCHECK (1<<2) |
45 | 45 | #define CEPH_FEATURE_FLOCK (1<<3) |
46 | +#define CEPH_FEATURE_SUBSCRIBE2 (1<<4) | |
47 | +#define CEPH_FEATURE_MONNAMES (1<<5) | |
48 | +#define CEPH_FEATURE_RECONNECT_SEQ (1<<6) | |
49 | +#define CEPH_FEATURE_DIRLAYOUTHASH (1<<7) | |
46 | 50 | |
47 | 51 | |
48 | 52 | /* |
49 | 53 | |
... | ... | @@ -55,10 +59,10 @@ |
55 | 59 | __le32 fl_stripe_count; /* over this many objects */ |
56 | 60 | __le32 fl_object_size; /* until objects are this big, then move to |
57 | 61 | new objects */ |
58 | - __le32 fl_cas_hash; /* 0 = none; 1 = sha256 */ | |
62 | + __le32 fl_cas_hash; /* UNUSED. 0 = none; 1 = sha256 */ | |
59 | 63 | |
60 | 64 | /* pg -> disk layout */ |
61 | - __le32 fl_object_stripe_unit; /* for per-object parity, if any */ | |
65 | + __le32 fl_object_stripe_unit; /* UNUSED. for per-object parity, if any */ | |
62 | 66 | |
63 | 67 | /* object -> pg layout */ |
64 | 68 | __le32 fl_pg_preferred; /* preferred primary for pg (-1 for none) */ |
... | ... | @@ -69,6 +73,12 @@ |
69 | 73 | |
70 | 74 | int ceph_file_layout_is_valid(const struct ceph_file_layout *layout); |
71 | 75 | |
76 | +struct ceph_dir_layout { | |
77 | + __u8 dl_dir_hash; /* see ceph_hash.h for ids */ | |
78 | + __u8 dl_unused1; | |
79 | + __u16 dl_unused2; | |
80 | + __u32 dl_unused3; | |
81 | +} __attribute__ ((packed)); | |
72 | 82 | |
73 | 83 | /* crypto algorithms */ |
74 | 84 | #define CEPH_CRYPTO_NONE 0x0 |
... | ... | @@ -457,7 +467,7 @@ |
457 | 467 | struct ceph_timespec rctime; |
458 | 468 | struct ceph_frag_tree_head fragtree; /* (must be at end of struct) */ |
459 | 469 | } __attribute__ ((packed)); |
460 | -/* followed by frag array, then symlink string, then xattr blob */ | |
470 | +/* followed by frag array, symlink string, dir layout, xattr blob */ | |
461 | 471 | |
462 | 472 | /* reply_lease follows dname, and reply_inode */ |
463 | 473 | struct ceph_mds_reply_lease { |
net/ceph/ceph_hash.c
1 | 1 | |
2 | 2 | #include <linux/ceph/types.h> |
3 | +#include <linux/module.h> | |
3 | 4 | |
4 | 5 | /* |
5 | 6 | * Robert Jenkin's hash function. |
... | ... | @@ -104,6 +105,7 @@ |
104 | 105 | return -1; |
105 | 106 | } |
106 | 107 | } |
108 | +EXPORT_SYMBOL(ceph_str_hash); | |
107 | 109 | |
108 | 110 | const char *ceph_str_hash_name(int type) |
109 | 111 | { |
... | ... | @@ -116,4 +118,5 @@ |
116 | 118 | return "unknown"; |
117 | 119 | } |
118 | 120 | } |
121 | +EXPORT_SYMBOL(ceph_str_hash_name); |