Commit 83418978827324918a8cd25ce5227312de1d4468

Authored by Mark Fasheh
1 parent 7cdfc3a1c3

ocfs2: Cache extent records

The extent map code was ripped out earlier because of an inability to deal
with holes. This patch adds back a simpler caching scheme requiring far less
code.

Our old extent map caching was designed back when meta data block caching in
Ocfs2 didn't work very well, resulting in many disk reads. These days our
metadata caching is much better, resulting in no un-necessary disk reads. As
a result, extent caching doesn't have to be as fancy, nor does it have to
cache as many extents. Keeping the last 3 extents seen should be sufficient
to give us a small performance boost on some streaming workloads.

Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>

Showing 7 changed files with 289 additions and 0 deletions Side-by-side Diff

... ... @@ -2417,6 +2417,8 @@
2417 2417 status = ocfs2_do_insert_extent(inode, handle, fe_bh, &rec, &insert);
2418 2418 if (status < 0)
2419 2419 mlog_errno(status);
  2420 + else
  2421 + ocfs2_extent_map_insert_rec(inode, &rec);
2420 2422  
2421 2423 bail:
2422 2424 if (bh)
... ... @@ -3640,6 +3642,9 @@
3640 3642 mlog_errno(status);
3641 3643 goto bail;
3642 3644 }
  3645 +
  3646 + ocfs2_extent_map_trunc(inode, new_highest_cpos);
  3647 +
3643 3648 start:
3644 3649 /*
3645 3650 * Check that we still have allocation to delete.
... ... @@ -1613,6 +1613,8 @@
1613 1613 * for the inode metadata. */
1614 1614 ocfs2_metadata_cache_purge(inode);
1615 1615  
  1616 + ocfs2_extent_map_trunc(inode, 0);
  1617 +
1616 1618 if (ocfs2_meta_lvb_is_trustable(inode, lockres)) {
1617 1619 mlog(0, "Trusting LVB on inode %llu\n",
1618 1620 (unsigned long long)oi->ip_blkno);
fs/ocfs2/extent_map.c
... ... @@ -39,6 +39,254 @@
39 39 #include "buffer_head_io.h"
40 40  
41 41 /*
  42 + * The extent caching implementation is intentionally trivial.
  43 + *
  44 + * We only cache a small number of extents stored directly on the
  45 + * inode, so linear order operations are acceptable. If we ever want
  46 + * to increase the size of the extent map, then these algorithms must
  47 + * get smarter.
  48 + */
  49 +
  50 +void ocfs2_extent_map_init(struct inode *inode)
  51 +{
  52 + struct ocfs2_inode_info *oi = OCFS2_I(inode);
  53 +
  54 + oi->ip_extent_map.em_num_items = 0;
  55 + INIT_LIST_HEAD(&oi->ip_extent_map.em_list);
  56 +}
  57 +
  58 +static void __ocfs2_extent_map_lookup(struct ocfs2_extent_map *em,
  59 + unsigned int cpos,
  60 + struct ocfs2_extent_map_item **ret_emi)
  61 +{
  62 + unsigned int range;
  63 + struct ocfs2_extent_map_item *emi;
  64 +
  65 + *ret_emi = NULL;
  66 +
  67 + list_for_each_entry(emi, &em->em_list, ei_list) {
  68 + range = emi->ei_cpos + emi->ei_clusters;
  69 +
  70 + if (cpos >= emi->ei_cpos && cpos < range) {
  71 + list_move(&emi->ei_list, &em->em_list);
  72 +
  73 + *ret_emi = emi;
  74 + break;
  75 + }
  76 + }
  77 +}
  78 +
  79 +static int ocfs2_extent_map_lookup(struct inode *inode, unsigned int cpos,
  80 + unsigned int *phys, unsigned int *len,
  81 + unsigned int *flags)
  82 +{
  83 + unsigned int coff;
  84 + struct ocfs2_inode_info *oi = OCFS2_I(inode);
  85 + struct ocfs2_extent_map_item *emi;
  86 +
  87 + spin_lock(&oi->ip_lock);
  88 +
  89 + __ocfs2_extent_map_lookup(&oi->ip_extent_map, cpos, &emi);
  90 + if (emi) {
  91 + coff = cpos - emi->ei_cpos;
  92 + *phys = emi->ei_phys + coff;
  93 + if (len)
  94 + *len = emi->ei_clusters - coff;
  95 + if (flags)
  96 + *flags = emi->ei_flags;
  97 + }
  98 +
  99 + spin_unlock(&oi->ip_lock);
  100 +
  101 + if (emi == NULL)
  102 + return -ENOENT;
  103 +
  104 + return 0;
  105 +}
  106 +
  107 +/*
  108 + * Forget about all clusters equal to or greater than cpos.
  109 + */
  110 +void ocfs2_extent_map_trunc(struct inode *inode, unsigned int cpos)
  111 +{
  112 + struct list_head *p, *n;
  113 + struct ocfs2_extent_map_item *emi;
  114 + struct ocfs2_inode_info *oi = OCFS2_I(inode);
  115 + struct ocfs2_extent_map *em = &oi->ip_extent_map;
  116 + LIST_HEAD(tmp_list);
  117 + unsigned int range;
  118 +
  119 + spin_lock(&oi->ip_lock);
  120 + list_for_each_safe(p, n, &em->em_list) {
  121 + emi = list_entry(p, struct ocfs2_extent_map_item, ei_list);
  122 +
  123 + if (emi->ei_cpos >= cpos) {
  124 + /* Full truncate of this record. */
  125 + list_move(&emi->ei_list, &tmp_list);
  126 + BUG_ON(em->em_num_items == 0);
  127 + em->em_num_items--;
  128 + continue;
  129 + }
  130 +
  131 + range = emi->ei_cpos + emi->ei_clusters;
  132 + if (range > cpos) {
  133 + /* Partial truncate */
  134 + emi->ei_clusters = cpos - emi->ei_cpos;
  135 + }
  136 + }
  137 + spin_unlock(&oi->ip_lock);
  138 +
  139 + list_for_each_safe(p, n, &tmp_list) {
  140 + emi = list_entry(p, struct ocfs2_extent_map_item, ei_list);
  141 + list_del(&emi->ei_list);
  142 + kfree(emi);
  143 + }
  144 +}
  145 +
  146 +/*
  147 + * Is any part of emi2 contained within emi1
  148 + */
  149 +static int ocfs2_ei_is_contained(struct ocfs2_extent_map_item *emi1,
  150 + struct ocfs2_extent_map_item *emi2)
  151 +{
  152 + unsigned int range1, range2;
  153 +
  154 + /*
  155 + * Check if logical start of emi2 is inside emi1
  156 + */
  157 + range1 = emi1->ei_cpos + emi1->ei_clusters;
  158 + if (emi2->ei_cpos >= emi1->ei_cpos && emi2->ei_cpos < range1)
  159 + return 1;
  160 +
  161 + /*
  162 + * Check if logical end of emi2 is inside emi1
  163 + */
  164 + range2 = emi2->ei_cpos + emi2->ei_clusters;
  165 + if (range2 > emi1->ei_cpos && range2 <= range1)
  166 + return 1;
  167 +
  168 + return 0;
  169 +}
  170 +
  171 +static void ocfs2_copy_emi_fields(struct ocfs2_extent_map_item *dest,
  172 + struct ocfs2_extent_map_item *src)
  173 +{
  174 + dest->ei_cpos = src->ei_cpos;
  175 + dest->ei_phys = src->ei_phys;
  176 + dest->ei_clusters = src->ei_clusters;
  177 + dest->ei_flags = src->ei_flags;
  178 +}
  179 +
  180 +/*
  181 + * Try to merge emi with ins. Returns 1 if merge succeeds, zero
  182 + * otherwise.
  183 + */
  184 +static int ocfs2_try_to_merge_extent_map(struct ocfs2_extent_map_item *emi,
  185 + struct ocfs2_extent_map_item *ins)
  186 +{
  187 + /*
  188 + * Handle contiguousness
  189 + */
  190 + if (ins->ei_phys == (emi->ei_phys + emi->ei_clusters) &&
  191 + ins->ei_cpos == (emi->ei_cpos + emi->ei_clusters) &&
  192 + ins->ei_flags == emi->ei_flags) {
  193 + emi->ei_clusters += ins->ei_clusters;
  194 + return 1;
  195 + } else if ((ins->ei_phys + ins->ei_clusters) == emi->ei_phys &&
  196 + (ins->ei_cpos + ins->ei_clusters) == emi->ei_phys &&
  197 + ins->ei_flags == emi->ei_flags) {
  198 + emi->ei_phys = ins->ei_phys;
  199 + emi->ei_cpos = ins->ei_cpos;
  200 + emi->ei_clusters += ins->ei_clusters;
  201 + return 1;
  202 + }
  203 +
  204 + /*
  205 + * Overlapping extents - this shouldn't happen unless we've
  206 + * split an extent to change it's flags. That is exceedingly
  207 + * rare, so there's no sense in trying to optimize it yet.
  208 + */
  209 + if (ocfs2_ei_is_contained(emi, ins) ||
  210 + ocfs2_ei_is_contained(ins, emi)) {
  211 + ocfs2_copy_emi_fields(emi, ins);
  212 + return 1;
  213 + }
  214 +
  215 + /* No merge was possible. */
  216 + return 0;
  217 +}
  218 +
  219 +/*
  220 + * In order to reduce complexity on the caller, this insert function
  221 + * is intentionally liberal in what it will accept.
  222 + *
  223 + * The only rule is that the truncate call *must* be used whenever
  224 + * records have been deleted. This avoids inserting overlapping
  225 + * records with different physical mappings.
  226 + */
  227 +void ocfs2_extent_map_insert_rec(struct inode *inode,
  228 + struct ocfs2_extent_rec *rec)
  229 +{
  230 + struct ocfs2_inode_info *oi = OCFS2_I(inode);
  231 + struct ocfs2_extent_map *em = &oi->ip_extent_map;
  232 + struct ocfs2_extent_map_item *emi, *new_emi = NULL;
  233 + struct ocfs2_extent_map_item ins;
  234 +
  235 + ins.ei_cpos = le32_to_cpu(rec->e_cpos);
  236 + ins.ei_phys = ocfs2_blocks_to_clusters(inode->i_sb,
  237 + le64_to_cpu(rec->e_blkno));
  238 + ins.ei_clusters = le16_to_cpu(rec->e_leaf_clusters);
  239 + ins.ei_flags = rec->e_flags;
  240 +
  241 +search:
  242 + spin_lock(&oi->ip_lock);
  243 +
  244 + list_for_each_entry(emi, &em->em_list, ei_list) {
  245 + if (ocfs2_try_to_merge_extent_map(emi, &ins)) {
  246 + list_move(&emi->ei_list, &em->em_list);
  247 + spin_unlock(&oi->ip_lock);
  248 + goto out;
  249 + }
  250 + }
  251 +
  252 + /*
  253 + * No item could be merged.
  254 + *
  255 + * Either allocate and add a new item, or overwrite the last recently
  256 + * inserted.
  257 + */
  258 +
  259 + if (em->em_num_items < OCFS2_MAX_EXTENT_MAP_ITEMS) {
  260 + if (new_emi == NULL) {
  261 + spin_unlock(&oi->ip_lock);
  262 +
  263 + new_emi = kmalloc(sizeof(*new_emi), GFP_NOFS);
  264 + if (new_emi == NULL)
  265 + goto out;
  266 +
  267 + goto search;
  268 + }
  269 +
  270 + ocfs2_copy_emi_fields(new_emi, &ins);
  271 + list_add(&new_emi->ei_list, &em->em_list);
  272 + em->em_num_items++;
  273 + new_emi = NULL;
  274 + } else {
  275 + BUG_ON(list_empty(&em->em_list) || em->em_num_items == 0);
  276 + emi = list_entry(em->em_list.prev,
  277 + struct ocfs2_extent_map_item, ei_list);
  278 + list_move(&emi->ei_list, &em->em_list);
  279 + ocfs2_copy_emi_fields(emi, &ins);
  280 + }
  281 +
  282 + spin_unlock(&oi->ip_lock);
  283 +
  284 +out:
  285 + if (new_emi)
  286 + kfree(new_emi);
  287 +}
  288 +
  289 +/*
42 290 * Return the 1st index within el which contains an extent start
43 291 * larger than v_cluster.
44 292 */
... ... @@ -174,6 +422,11 @@
174 422 struct ocfs2_extent_rec *rec;
175 423 u32 coff;
176 424  
  425 + ret = ocfs2_extent_map_lookup(inode, v_cluster, p_cluster,
  426 + num_clusters, extent_flags);
  427 + if (ret == 0)
  428 + goto out;
  429 +
177 430 ret = ocfs2_read_block(OCFS2_SB(inode->i_sb), OCFS2_I(inode)->ip_blkno,
178 431 &di_bh, OCFS2_BH_CACHED, inode);
179 432 if (ret) {
... ... @@ -245,6 +498,8 @@
245 498 *num_clusters = ocfs2_rec_clusters(el, rec) - coff;
246 499  
247 500 flags = rec->e_flags;
  501 +
  502 + ocfs2_extent_map_insert_rec(inode, rec);
248 503 }
249 504  
250 505 if (extent_flags)
fs/ocfs2/extent_map.h
... ... @@ -25,6 +25,26 @@
25 25 #ifndef _EXTENT_MAP_H
26 26 #define _EXTENT_MAP_H
27 27  
  28 +struct ocfs2_extent_map_item {
  29 + unsigned int ei_cpos;
  30 + unsigned int ei_phys;
  31 + unsigned int ei_clusters;
  32 + unsigned int ei_flags;
  33 +
  34 + struct list_head ei_list;
  35 +};
  36 +
  37 +#define OCFS2_MAX_EXTENT_MAP_ITEMS 3
  38 +struct ocfs2_extent_map {
  39 + unsigned int em_num_items;
  40 + struct list_head em_list;
  41 +};
  42 +
  43 +void ocfs2_extent_map_init(struct inode *inode);
  44 +void ocfs2_extent_map_trunc(struct inode *inode, unsigned int cluster);
  45 +void ocfs2_extent_map_insert_rec(struct inode *inode,
  46 + struct ocfs2_extent_rec *rec);
  47 +
28 48 int ocfs2_get_clusters(struct inode *inode, u32 v_cluster, u32 *p_cluster,
29 49 u32 *num_clusters, unsigned int *extent_flags);
30 50 int ocfs2_extent_map_get_blocks(struct inode *inode, u64 v_blkno, u64 *p_blkno,
... ... @@ -1008,6 +1008,8 @@
1008 1008 "Clear inode of %llu, inode has io markers\n",
1009 1009 (unsigned long long)oi->ip_blkno);
1010 1010  
  1011 + ocfs2_extent_map_trunc(inode, 0);
  1012 +
1011 1013 status = ocfs2_drop_inode_locks(inode);
1012 1014 if (status < 0)
1013 1015 mlog_errno(status);
... ... @@ -26,6 +26,8 @@
26 26 #ifndef OCFS2_INODE_H
27 27 #define OCFS2_INODE_H
28 28  
  29 +#include "extent_map.h"
  30 +
29 31 /* OCFS2 Inode Private Data */
30 32 struct ocfs2_inode_info
31 33 {
... ... @@ -62,6 +64,8 @@
62 64 unsigned long ip_last_trans;
63 65  
64 66 struct ocfs2_caching_info ip_metadata_cache;
  67 +
  68 + struct ocfs2_extent_map ip_extent_map;
65 69  
66 70 struct inode vfs_inode;
67 71 };
... ... @@ -942,6 +942,7 @@
942 942 oi->ip_flags = 0;
943 943 oi->ip_open_count = 0;
944 944 spin_lock_init(&oi->ip_lock);
  945 + ocfs2_extent_map_init(&oi->vfs_inode);
945 946 INIT_LIST_HEAD(&oi->ip_io_markers);
946 947 oi->ip_created_trans = 0;
947 948 oi->ip_last_trans = 0;