Commit 65d45231b56efb3db51eb441e2c68f8252ecdd12

Authored by Kent Overstreet
1 parent ee811287c9

bcache: Abstract out stuff needed for sorting

Signed-off-by: Kent Overstreet <kmo@daterainc.com>

Showing 9 changed files with 423 additions and 289 deletions Side-by-side Diff

drivers/md/bcache/Makefile
1 1  
2 2 obj-$(CONFIG_BCACHE) += bcache.o
3 3  
4   -bcache-y := alloc.o btree.o bset.o io.o journal.o writeback.o\
5   - movinggc.o request.o super.o sysfs.o debug.o util.o trace.o stats.o closure.o
  4 +bcache-y := alloc.o bset.o btree.o closure.o debug.o extents.o\
  5 + io.o journal.o movinggc.o request.o stats.o super.o sysfs.o trace.o\
  6 + util.o writeback.o
6 7  
7 8 CFLAGS_request.o += -Iblock
drivers/md/bcache/bset.c
... ... @@ -63,140 +63,6 @@
63 63 bch_keylist_bytes(l));
64 64 }
65 65  
66   -/* Pointer validation */
67   -
68   -static bool __ptr_invalid(struct cache_set *c, const struct bkey *k)
69   -{
70   - unsigned i;
71   -
72   - for (i = 0; i < KEY_PTRS(k); i++)
73   - if (ptr_available(c, k, i)) {
74   - struct cache *ca = PTR_CACHE(c, k, i);
75   - size_t bucket = PTR_BUCKET_NR(c, k, i);
76   - size_t r = bucket_remainder(c, PTR_OFFSET(k, i));
77   -
78   - if (KEY_SIZE(k) + r > c->sb.bucket_size ||
79   - bucket < ca->sb.first_bucket ||
80   - bucket >= ca->sb.nbuckets)
81   - return true;
82   - }
83   -
84   - return false;
85   -}
86   -
87   -bool bch_btree_ptr_invalid(struct cache_set *c, const struct bkey *k)
88   -{
89   - char buf[80];
90   -
91   - if (!KEY_PTRS(k) || !KEY_SIZE(k) || KEY_DIRTY(k))
92   - goto bad;
93   -
94   - if (__ptr_invalid(c, k))
95   - goto bad;
96   -
97   - return false;
98   -bad:
99   - bch_bkey_to_text(buf, sizeof(buf), k);
100   - cache_bug(c, "spotted btree ptr %s: %s", buf, bch_ptr_status(c, k));
101   - return true;
102   -}
103   -
104   -bool bch_extent_ptr_invalid(struct cache_set *c, const struct bkey *k)
105   -{
106   - char buf[80];
107   -
108   - if (!KEY_SIZE(k))
109   - return true;
110   -
111   - if (KEY_SIZE(k) > KEY_OFFSET(k))
112   - goto bad;
113   -
114   - if (__ptr_invalid(c, k))
115   - goto bad;
116   -
117   - return false;
118   -bad:
119   - bch_bkey_to_text(buf, sizeof(buf), k);
120   - cache_bug(c, "spotted extent %s: %s", buf, bch_ptr_status(c, k));
121   - return true;
122   -}
123   -
124   -static bool ptr_bad_expensive_checks(struct btree *b, const struct bkey *k,
125   - unsigned ptr)
126   -{
127   - struct bucket *g = PTR_BUCKET(b->c, k, ptr);
128   - char buf[80];
129   -
130   - if (mutex_trylock(&b->c->bucket_lock)) {
131   - if (b->level) {
132   - if (KEY_DIRTY(k) ||
133   - g->prio != BTREE_PRIO ||
134   - (b->c->gc_mark_valid &&
135   - GC_MARK(g) != GC_MARK_METADATA))
136   - goto err;
137   -
138   - } else {
139   - if (g->prio == BTREE_PRIO)
140   - goto err;
141   -
142   - if (KEY_DIRTY(k) &&
143   - b->c->gc_mark_valid &&
144   - GC_MARK(g) != GC_MARK_DIRTY)
145   - goto err;
146   - }
147   - mutex_unlock(&b->c->bucket_lock);
148   - }
149   -
150   - return false;
151   -err:
152   - mutex_unlock(&b->c->bucket_lock);
153   - bch_bkey_to_text(buf, sizeof(buf), k);
154   - btree_bug(b,
155   -"inconsistent pointer %s: bucket %zu pin %i prio %i gen %i last_gc %i mark %llu gc_gen %i",
156   - buf, PTR_BUCKET_NR(b->c, k, ptr), atomic_read(&g->pin),
157   - g->prio, g->gen, g->last_gc, GC_MARK(g), g->gc_gen);
158   - return true;
159   -}
160   -
161   -bool bch_ptr_bad(struct btree *b, const struct bkey *k)
162   -{
163   - struct bucket *g;
164   - unsigned i, stale;
165   -
166   - if (!bkey_cmp(k, &ZERO_KEY) ||
167   - !KEY_PTRS(k) ||
168   - bch_ptr_invalid(b, k))
169   - return true;
170   -
171   - for (i = 0; i < KEY_PTRS(k); i++)
172   - if (!ptr_available(b->c, k, i))
173   - return true;
174   -
175   - if (!expensive_debug_checks(b->c) && KEY_DIRTY(k))
176   - return false;
177   -
178   - for (i = 0; i < KEY_PTRS(k); i++) {
179   - g = PTR_BUCKET(b->c, k, i);
180   - stale = ptr_stale(b->c, k, i);
181   -
182   - btree_bug_on(stale > 96, b,
183   - "key too stale: %i, need_gc %u",
184   - stale, b->c->need_gc);
185   -
186   - btree_bug_on(stale && KEY_DIRTY(k) && KEY_SIZE(k),
187   - b, "stale dirty pointer");
188   -
189   - if (stale)
190   - return true;
191   -
192   - if (expensive_debug_checks(b->c) &&
193   - ptr_bad_expensive_checks(b, k, i))
194   - return true;
195   - }
196   -
197   - return false;
198   -}
199   -
200 66 /* Key/pointer manipulation */
201 67  
202 68 void bch_bkey_copy_single_ptr(struct bkey *dest, const struct bkey *src,
... ... @@ -251,57 +117,6 @@
251 117 return true;
252 118 }
253 119  
254   -static uint64_t merge_chksums(struct bkey *l, struct bkey *r)
255   -{
256   - return (l->ptr[KEY_PTRS(l)] + r->ptr[KEY_PTRS(r)]) &
257   - ~((uint64_t)1 << 63);
258   -}
259   -
260   -/* Tries to merge l and r: l should be lower than r
261   - * Returns true if we were able to merge. If we did merge, l will be the merged
262   - * key, r will be untouched.
263   - */
264   -bool bch_bkey_try_merge(struct btree *b, struct bkey *l, struct bkey *r)
265   -{
266   - unsigned i;
267   -
268   - if (key_merging_disabled(b->c))
269   - return false;
270   -
271   - if (KEY_PTRS(l) != KEY_PTRS(r) ||
272   - KEY_DIRTY(l) != KEY_DIRTY(r) ||
273   - bkey_cmp(l, &START_KEY(r)))
274   - return false;
275   -
276   - for (i = 0; i < KEY_PTRS(l); i++)
277   - if (l->ptr[i] + PTR(0, KEY_SIZE(l), 0) != r->ptr[i] ||
278   - PTR_BUCKET_NR(b->c, l, i) != PTR_BUCKET_NR(b->c, r, i))
279   - return false;
280   -
281   - /* Keys with no pointers aren't restricted to one bucket and could
282   - * overflow KEY_SIZE
283   - */
284   - if (KEY_SIZE(l) + KEY_SIZE(r) > USHRT_MAX) {
285   - SET_KEY_OFFSET(l, KEY_OFFSET(l) + USHRT_MAX - KEY_SIZE(l));
286   - SET_KEY_SIZE(l, USHRT_MAX);
287   -
288   - bch_cut_front(l, r);
289   - return false;
290   - }
291   -
292   - if (KEY_CSUM(l)) {
293   - if (KEY_CSUM(r))
294   - l->ptr[KEY_PTRS(l)] = merge_chksums(l, r);
295   - else
296   - SET_KEY_CSUM(l, 0);
297   - }
298   -
299   - SET_KEY_OFFSET(l, KEY_OFFSET(l) + KEY_SIZE(r));
300   - SET_KEY_SIZE(l, KEY_SIZE(l) + KEY_SIZE(r));
301   -
302   - return true;
303   -}
304   -
305 120 /* Auxiliary search trees */
306 121  
307 122 /* 32 bits total: */
... ... @@ -1099,85 +914,6 @@
1099 914 return 0;
1100 915 }
1101 916  
1102   -static void sort_key_next(struct btree_iter *iter,
1103   - struct btree_iter_set *i)
1104   -{
1105   - i->k = bkey_next(i->k);
1106   -
1107   - if (i->k == i->end)
1108   - *i = iter->data[--iter->used];
1109   -}
1110   -
1111   -/*
1112   - * Returns true if l > r - unless l == r, in which case returns true if l is
1113   - * older than r.
1114   - *
1115   - * Necessary for btree_sort_fixup() - if there are multiple keys that compare
1116   - * equal in different sets, we have to process them newest to oldest.
1117   - */
1118   -static inline bool sort_extent_cmp(struct btree_iter_set l,
1119   - struct btree_iter_set r)
1120   -{
1121   - int64_t c = bkey_cmp(&START_KEY(l.k), &START_KEY(r.k));
1122   -
1123   - return c ? c > 0 : l.k < r.k;
1124   -}
1125   -
1126   -static inline bool sort_cmp(struct btree_iter_set l,
1127   - struct btree_iter_set r)
1128   -{
1129   - int64_t c = bkey_cmp(l.k, r.k);
1130   -
1131   - return c ? c > 0 : l.k < r.k;
1132   -}
1133   -
1134   -static struct bkey *btree_sort_fixup_extents(struct btree_iter *iter,
1135   - struct bkey *tmp)
1136   -{
1137   - while (iter->used > 1) {
1138   - struct btree_iter_set *top = iter->data, *i = top + 1;
1139   -
1140   - if (iter->used > 2 &&
1141   - sort_extent_cmp(i[0], i[1]))
1142   - i++;
1143   -
1144   - if (bkey_cmp(top->k, &START_KEY(i->k)) <= 0)
1145   - break;
1146   -
1147   - if (!KEY_SIZE(i->k)) {
1148   - sort_key_next(iter, i);
1149   - heap_sift(iter, i - top, sort_extent_cmp);
1150   - continue;
1151   - }
1152   -
1153   - if (top->k > i->k) {
1154   - if (bkey_cmp(top->k, i->k) >= 0)
1155   - sort_key_next(iter, i);
1156   - else
1157   - bch_cut_front(top->k, i->k);
1158   -
1159   - heap_sift(iter, i - top, sort_extent_cmp);
1160   - } else {
1161   - /* can't happen because of comparison func */
1162   - BUG_ON(!bkey_cmp(&START_KEY(top->k), &START_KEY(i->k)));
1163   -
1164   - if (bkey_cmp(i->k, top->k) < 0) {
1165   - bkey_copy(tmp, top->k);
1166   -
1167   - bch_cut_back(&START_KEY(i->k), tmp);
1168   - bch_cut_front(i->k, top->k);
1169   - heap_sift(iter, 0, btree_iter_cmp);
1170   -
1171   - return tmp;
1172   - } else {
1173   - bch_cut_back(&START_KEY(i->k), top->k);
1174   - }
1175   - }
1176   - }
1177   -
1178   - return NULL;
1179   -}
1180   -
1181 917 static void btree_mergesort(struct btree *b, struct bset *out,
1182 918 struct btree_iter *iter,
1183 919 bool fixup, bool remove_stale)
1184 920  
1185 921  
1186 922  
... ... @@ -1185,25 +921,22 @@
1185 921 int i;
1186 922 struct bkey *k, *last = NULL;
1187 923 BKEY_PADDED(k) tmp;
1188   - btree_iter_cmp_fn *cmp = b->level
1189   - ? sort_cmp
1190   - : sort_extent_cmp;
1191 924 bool (*bad)(struct btree *, const struct bkey *) = remove_stale
1192 925 ? bch_ptr_bad
1193 926 : bch_ptr_invalid;
1194 927  
1195 928 /* Heapify the iterator, using our comparison function */
1196 929 for (i = iter->used / 2 - 1; i >= 0; --i)
1197   - heap_sift(iter, i, cmp);
  930 + heap_sift(iter, i, b->ops->sort_cmp);
1198 931  
1199 932 while (!btree_iter_end(iter)) {
1200   - if (fixup && !b->level)
1201   - k = btree_sort_fixup_extents(iter, &tmp.k);
  933 + if (b->ops->sort_fixup && fixup)
  934 + k = b->ops->sort_fixup(iter, &tmp.k);
1202 935 else
1203 936 k = NULL;
1204 937  
1205 938 if (!k)
1206   - k = __bch_btree_iter_next(iter, cmp);
  939 + k = __bch_btree_iter_next(iter, b->ops->sort_cmp);
1207 940  
1208 941 if (bad(b, k))
1209 942 continue;
... ... @@ -1211,8 +944,7 @@
1211 944 if (!last) {
1212 945 last = out->start;
1213 946 bkey_copy(last, k);
1214   - } else if (b->level ||
1215   - !bch_bkey_try_merge(b, last, k)) {
  947 + } else if (!bch_bkey_try_merge(b, last, k)) {
1216 948 last = bkey_next(last);
1217 949 bkey_copy(last, k);
1218 950 }
... ... @@ -1300,6 +1032,7 @@
1300 1032  
1301 1033 EBUG_ON(b->written && oldsize >= 0 && bch_count_data(b) != oldsize);
1302 1034 }
  1035 +EXPORT_SYMBOL(bch_btree_sort_partial);
1303 1036  
1304 1037 void bch_btree_sort_and_fix_extents(struct btree *b, struct btree_iter *iter,
1305 1038 struct bset_sort_state *state)
drivers/md/bcache/bset.h
... ... @@ -376,14 +376,6 @@
376 376  
377 377 struct cache_set;
378 378 const char *bch_ptr_status(struct cache_set *, const struct bkey *);
379   -bool bch_btree_ptr_invalid(struct cache_set *, const struct bkey *);
380   -bool bch_extent_ptr_invalid(struct cache_set *, const struct bkey *);
381   -bool bch_btree_ptr_bad(struct btree *, const struct bkey *);
382   -bool bch_extent_ptr_bad(struct btree *, const struct bkey *);
383   -
384   -bool bch_ptr_bad(struct btree *, const struct bkey *);
385   -
386   -bool bch_bkey_try_merge(struct btree *, struct bkey *, struct bkey *);
387 379  
388 380 int bch_bset_print_stats(struct cache_set *, char *);
389 381  
drivers/md/bcache/btree.c
... ... @@ -23,6 +23,7 @@
23 23 #include "bcache.h"
24 24 #include "btree.h"
25 25 #include "debug.h"
  26 +#include "extents.h"
26 27 #include "writeback.h"
27 28  
28 29 #include <linux/slab.h>
... ... @@ -930,6 +931,11 @@
930 931 lock_set_subclass(&b->lock.dep_map, level + 1, _THIS_IP_);
931 932 b->level = level;
932 933 b->parent = (void *) ~0UL;
  934 +
  935 + if (!b->level)
  936 + b->ops = &bch_extent_keys_ops;
  937 + else
  938 + b->ops = &bch_btree_keys_ops;
933 939  
934 940 mca_reinit(b);
935 941  
drivers/md/bcache/btree.h
... ... @@ -113,7 +113,28 @@
113 113 int prio_blocked;
114 114 };
115 115  
  116 +struct btree_keys_ops {
  117 + bool (*sort_cmp)(struct btree_iter_set,
  118 + struct btree_iter_set);
  119 + struct bkey *(*sort_fixup)(struct btree_iter *,
  120 + struct bkey *);
  121 + bool (*key_invalid)(struct btree *,
  122 + const struct bkey *);
  123 + bool (*key_bad)(struct btree *,
  124 + const struct bkey *);
  125 + bool (*key_merge)(struct btree *,
  126 + struct bkey *, struct bkey *);
  127 +
  128 +
  129 + /*
  130 + * Only used for deciding whether to use START_KEY(k) or just the key
  131 + * itself in a couple places
  132 + */
  133 + bool is_extents;
  134 +};
  135 +
116 136 struct btree {
  137 + const struct btree_keys_ops *ops;
117 138 /* Hottest entries first */
118 139 struct hlist_node hash;
119 140  
... ... @@ -232,10 +253,23 @@
232 253  
233 254 static inline bool bch_ptr_invalid(struct btree *b, const struct bkey *k)
234 255 {
235   - if (b->level)
236   - return bch_btree_ptr_invalid(b->c, k);
237   - else
238   - return bch_extent_ptr_invalid(b->c, k);
  256 + return b->ops->key_invalid(b, k);
  257 +}
  258 +
  259 +static inline bool bch_ptr_bad(struct btree *b, const struct bkey *k)
  260 +{
  261 + return b->ops->key_bad(b, k);
  262 +}
  263 +
  264 +/*
  265 + * Tries to merge l and r: l should be lower than r
  266 + * Returns true if we were able to merge. If we did merge, l will be the merged
  267 + * key, r will be untouched.
  268 + */
  269 +static inline bool bch_bkey_try_merge(struct btree *b,
  270 + struct bkey *l, struct bkey *r)
  271 +{
  272 + return b->ops->key_merge ? b->ops->key_merge(b, l, r) : false;
239 273 }
240 274  
241 275 void bkey_put(struct cache_set *c, struct bkey *k);
drivers/md/bcache/debug.c
... ... @@ -145,6 +145,7 @@
145 145 bkey_copy(&v->key, &b->key);
146 146 v->written = 0;
147 147 v->level = b->level;
  148 + v->ops = b->ops;
148 149  
149 150 bio = bch_bbio_alloc(b->c);
150 151 bio->bi_bdev = PTR_CACHE(b->c, &b->key, 0)->bdev;
drivers/md/bcache/extents.c
  1 +/*
  2 + * Copyright (C) 2010 Kent Overstreet <kent.overstreet@gmail.com>
  3 + *
  4 + * Uses a block device as cache for other block devices; optimized for SSDs.
  5 + * All allocation is done in buckets, which should match the erase block size
  6 + * of the device.
  7 + *
  8 + * Buckets containing cached data are kept on a heap sorted by priority;
  9 + * bucket priority is increased on cache hit, and periodically all the buckets
  10 + * on the heap have their priority scaled down. This currently is just used as
  11 + * an LRU but in the future should allow for more intelligent heuristics.
  12 + *
  13 + * Buckets have an 8 bit counter; freeing is accomplished by incrementing the
  14 + * counter. Garbage collection is used to remove stale pointers.
  15 + *
  16 + * Indexing is done via a btree; nodes are not necessarily fully sorted, rather
  17 + * as keys are inserted we only sort the pages that have not yet been written.
  18 + * When garbage collection is run, we resort the entire node.
  19 + *
  20 + * All configuration is done via sysfs; see Documentation/bcache.txt.
  21 + */
  22 +
  23 +#include "bcache.h"
  24 +#include "btree.h"
  25 +#include "debug.h"
  26 +#include "extents.h"
  27 +#include "writeback.h"
  28 +
  29 +static void sort_key_next(struct btree_iter *iter,
  30 + struct btree_iter_set *i)
  31 +{
  32 + i->k = bkey_next(i->k);
  33 +
  34 + if (i->k == i->end)
  35 + *i = iter->data[--iter->used];
  36 +}
  37 +
  38 +static bool bch_key_sort_cmp(struct btree_iter_set l,
  39 + struct btree_iter_set r)
  40 +{
  41 + int64_t c = bkey_cmp(l.k, r.k);
  42 +
  43 + return c ? c > 0 : l.k < r.k;
  44 +}
  45 +
  46 +static bool __ptr_invalid(struct cache_set *c, const struct bkey *k)
  47 +{
  48 + unsigned i;
  49 +
  50 + for (i = 0; i < KEY_PTRS(k); i++)
  51 + if (ptr_available(c, k, i)) {
  52 + struct cache *ca = PTR_CACHE(c, k, i);
  53 + size_t bucket = PTR_BUCKET_NR(c, k, i);
  54 + size_t r = bucket_remainder(c, PTR_OFFSET(k, i));
  55 +
  56 + if (KEY_SIZE(k) + r > c->sb.bucket_size ||
  57 + bucket < ca->sb.first_bucket ||
  58 + bucket >= ca->sb.nbuckets)
  59 + return true;
  60 + }
  61 +
  62 + return false;
  63 +}
  64 +
  65 +/* Btree ptrs */
  66 +
  67 +bool __bch_btree_ptr_invalid(struct cache_set *c, const struct bkey *k)
  68 +{
  69 + char buf[80];
  70 +
  71 + if (!KEY_PTRS(k) || !KEY_SIZE(k) || KEY_DIRTY(k))
  72 + goto bad;
  73 +
  74 + if (__ptr_invalid(c, k))
  75 + goto bad;
  76 +
  77 + return false;
  78 +bad:
  79 + bch_bkey_to_text(buf, sizeof(buf), k);
  80 + cache_bug(c, "spotted btree ptr %s: %s", buf, bch_ptr_status(c, k));
  81 + return true;
  82 +}
  83 +
  84 +static bool bch_btree_ptr_invalid(struct btree *b, const struct bkey *k)
  85 +{
  86 + return __bch_btree_ptr_invalid(b->c, k);
  87 +}
  88 +
  89 +static bool btree_ptr_bad_expensive(struct btree *b, const struct bkey *k)
  90 +{
  91 + unsigned i;
  92 + char buf[80];
  93 + struct bucket *g;
  94 +
  95 + if (mutex_trylock(&b->c->bucket_lock)) {
  96 + for (i = 0; i < KEY_PTRS(k); i++)
  97 + if (ptr_available(b->c, k, i)) {
  98 + g = PTR_BUCKET(b->c, k, i);
  99 +
  100 + if (KEY_DIRTY(k) ||
  101 + g->prio != BTREE_PRIO ||
  102 + (b->c->gc_mark_valid &&
  103 + GC_MARK(g) != GC_MARK_METADATA))
  104 + goto err;
  105 + }
  106 +
  107 + mutex_unlock(&b->c->bucket_lock);
  108 + }
  109 +
  110 + return false;
  111 +err:
  112 + mutex_unlock(&b->c->bucket_lock);
  113 + bch_bkey_to_text(buf, sizeof(buf), k);
  114 + btree_bug(b,
  115 +"inconsistent btree pointer %s: bucket %li pin %i prio %i gen %i last_gc %i mark %llu gc_gen %i",
  116 + buf, PTR_BUCKET_NR(b->c, k, i), atomic_read(&g->pin),
  117 + g->prio, g->gen, g->last_gc, GC_MARK(g), g->gc_gen);
  118 + return true;
  119 +}
  120 +
  121 +static bool bch_btree_ptr_bad(struct btree *b, const struct bkey *k)
  122 +{
  123 + unsigned i;
  124 +
  125 + if (!bkey_cmp(k, &ZERO_KEY) ||
  126 + !KEY_PTRS(k) ||
  127 + bch_ptr_invalid(b, k))
  128 + return true;
  129 +
  130 + for (i = 0; i < KEY_PTRS(k); i++)
  131 + if (!ptr_available(b->c, k, i) ||
  132 + ptr_stale(b->c, k, i))
  133 + return true;
  134 +
  135 + if (expensive_debug_checks(b->c) &&
  136 + btree_ptr_bad_expensive(b, k))
  137 + return true;
  138 +
  139 + return false;
  140 +}
  141 +
  142 +const struct btree_keys_ops bch_btree_keys_ops = {
  143 + .sort_cmp = bch_key_sort_cmp,
  144 + .key_invalid = bch_btree_ptr_invalid,
  145 + .key_bad = bch_btree_ptr_bad,
  146 +};
  147 +
  148 +/* Extents */
  149 +
  150 +/*
  151 + * Returns true if l > r - unless l == r, in which case returns true if l is
  152 + * older than r.
  153 + *
  154 + * Necessary for btree_sort_fixup() - if there are multiple keys that compare
  155 + * equal in different sets, we have to process them newest to oldest.
  156 + */
  157 +static bool bch_extent_sort_cmp(struct btree_iter_set l,
  158 + struct btree_iter_set r)
  159 +{
  160 + int64_t c = bkey_cmp(&START_KEY(l.k), &START_KEY(r.k));
  161 +
  162 + return c ? c > 0 : l.k < r.k;
  163 +}
  164 +
  165 +static struct bkey *bch_extent_sort_fixup(struct btree_iter *iter,
  166 + struct bkey *tmp)
  167 +{
  168 + while (iter->used > 1) {
  169 + struct btree_iter_set *top = iter->data, *i = top + 1;
  170 +
  171 + if (iter->used > 2 &&
  172 + bch_extent_sort_cmp(i[0], i[1]))
  173 + i++;
  174 +
  175 + if (bkey_cmp(top->k, &START_KEY(i->k)) <= 0)
  176 + break;
  177 +
  178 + if (!KEY_SIZE(i->k)) {
  179 + sort_key_next(iter, i);
  180 + heap_sift(iter, i - top, bch_extent_sort_cmp);
  181 + continue;
  182 + }
  183 +
  184 + if (top->k > i->k) {
  185 + if (bkey_cmp(top->k, i->k) >= 0)
  186 + sort_key_next(iter, i);
  187 + else
  188 + bch_cut_front(top->k, i->k);
  189 +
  190 + heap_sift(iter, i - top, bch_extent_sort_cmp);
  191 + } else {
  192 + /* can't happen because of comparison func */
  193 + BUG_ON(!bkey_cmp(&START_KEY(top->k), &START_KEY(i->k)));
  194 +
  195 + if (bkey_cmp(i->k, top->k) < 0) {
  196 + bkey_copy(tmp, top->k);
  197 +
  198 + bch_cut_back(&START_KEY(i->k), tmp);
  199 + bch_cut_front(i->k, top->k);
  200 + heap_sift(iter, 0, bch_extent_sort_cmp);
  201 +
  202 + return tmp;
  203 + } else {
  204 + bch_cut_back(&START_KEY(i->k), top->k);
  205 + }
  206 + }
  207 + }
  208 +
  209 + return NULL;
  210 +}
  211 +
  212 +static bool bch_extent_invalid(struct btree *b, const struct bkey *k)
  213 +{
  214 + char buf[80];
  215 +
  216 + if (!KEY_SIZE(k))
  217 + return true;
  218 +
  219 + if (KEY_SIZE(k) > KEY_OFFSET(k))
  220 + goto bad;
  221 +
  222 + if (__ptr_invalid(b->c, k))
  223 + goto bad;
  224 +
  225 + return false;
  226 +bad:
  227 + bch_bkey_to_text(buf, sizeof(buf), k);
  228 + cache_bug(b->c, "spotted extent %s: %s", buf, bch_ptr_status(b->c, k));
  229 + return true;
  230 +}
  231 +
  232 +static bool bch_extent_bad_expensive(struct btree *b, const struct bkey *k,
  233 + unsigned ptr)
  234 +{
  235 + struct bucket *g = PTR_BUCKET(b->c, k, ptr);
  236 + char buf[80];
  237 +
  238 + if (mutex_trylock(&b->c->bucket_lock)) {
  239 + if (b->c->gc_mark_valid &&
  240 + ((GC_MARK(g) != GC_MARK_DIRTY &&
  241 + KEY_DIRTY(k)) ||
  242 + GC_MARK(g) == GC_MARK_METADATA))
  243 + goto err;
  244 +
  245 + if (g->prio == BTREE_PRIO)
  246 + goto err;
  247 +
  248 + mutex_unlock(&b->c->bucket_lock);
  249 + }
  250 +
  251 + return false;
  252 +err:
  253 + mutex_unlock(&b->c->bucket_lock);
  254 + bch_bkey_to_text(buf, sizeof(buf), k);
  255 + btree_bug(b,
  256 +"inconsistent extent pointer %s:\nbucket %zu pin %i prio %i gen %i last_gc %i mark %llu gc_gen %i",
  257 + buf, PTR_BUCKET_NR(b->c, k, ptr), atomic_read(&g->pin),
  258 + g->prio, g->gen, g->last_gc, GC_MARK(g), g->gc_gen);
  259 + return true;
  260 +}
  261 +
  262 +static bool bch_extent_bad(struct btree *b, const struct bkey *k)
  263 +{
  264 + struct bucket *g;
  265 + unsigned i, stale;
  266 +
  267 + if (!KEY_PTRS(k) ||
  268 + bch_extent_invalid(b, k))
  269 + return true;
  270 +
  271 + for (i = 0; i < KEY_PTRS(k); i++)
  272 + if (!ptr_available(b->c, k, i))
  273 + return true;
  274 +
  275 + if (!expensive_debug_checks(b->c) && KEY_DIRTY(k))
  276 + return false;
  277 +
  278 + for (i = 0; i < KEY_PTRS(k); i++) {
  279 + g = PTR_BUCKET(b->c, k, i);
  280 + stale = ptr_stale(b->c, k, i);
  281 +
  282 + btree_bug_on(stale > 96, b,
  283 + "key too stale: %i, need_gc %u",
  284 + stale, b->c->need_gc);
  285 +
  286 + btree_bug_on(stale && KEY_DIRTY(k) && KEY_SIZE(k),
  287 + b, "stale dirty pointer");
  288 +
  289 + if (stale)
  290 + return true;
  291 +
  292 + if (expensive_debug_checks(b->c) &&
  293 + bch_extent_bad_expensive(b, k, i))
  294 + return true;
  295 + }
  296 +
  297 + return false;
  298 +}
  299 +
  300 +static uint64_t merge_chksums(struct bkey *l, struct bkey *r)
  301 +{
  302 + return (l->ptr[KEY_PTRS(l)] + r->ptr[KEY_PTRS(r)]) &
  303 + ~((uint64_t)1 << 63);
  304 +}
  305 +
  306 +static bool bch_extent_merge(struct btree *b, struct bkey *l, struct bkey *r)
  307 +{
  308 + unsigned i;
  309 +
  310 + if (key_merging_disabled(b->c))
  311 + return false;
  312 +
  313 + if (KEY_PTRS(l) != KEY_PTRS(r) ||
  314 + KEY_DIRTY(l) != KEY_DIRTY(r) ||
  315 + bkey_cmp(l, &START_KEY(r)))
  316 + return false;
  317 +
  318 + for (i = 0; i < KEY_PTRS(l); i++)
  319 + if (l->ptr[i] + PTR(0, KEY_SIZE(l), 0) != r->ptr[i] ||
  320 + PTR_BUCKET_NR(b->c, l, i) != PTR_BUCKET_NR(b->c, r, i))
  321 + return false;
  322 +
  323 + /* Keys with no pointers aren't restricted to one bucket and could
  324 + * overflow KEY_SIZE
  325 + */
  326 + if (KEY_SIZE(l) + KEY_SIZE(r) > USHRT_MAX) {
  327 + SET_KEY_OFFSET(l, KEY_OFFSET(l) + USHRT_MAX - KEY_SIZE(l));
  328 + SET_KEY_SIZE(l, USHRT_MAX);
  329 +
  330 + bch_cut_front(l, r);
  331 + return false;
  332 + }
  333 +
  334 + if (KEY_CSUM(l)) {
  335 + if (KEY_CSUM(r))
  336 + l->ptr[KEY_PTRS(l)] = merge_chksums(l, r);
  337 + else
  338 + SET_KEY_CSUM(l, 0);
  339 + }
  340 +
  341 + SET_KEY_OFFSET(l, KEY_OFFSET(l) + KEY_SIZE(r));
  342 + SET_KEY_SIZE(l, KEY_SIZE(l) + KEY_SIZE(r));
  343 +
  344 + return true;
  345 +}
  346 +
  347 +const struct btree_keys_ops bch_extent_keys_ops = {
  348 + .sort_cmp = bch_extent_sort_cmp,
  349 + .sort_fixup = bch_extent_sort_fixup,
  350 + .key_invalid = bch_extent_invalid,
  351 + .key_bad = bch_extent_bad,
  352 + .key_merge = bch_extent_merge,
  353 + .is_extents = true,
  354 +};
drivers/md/bcache/extents.h
  1 +#ifndef _BCACHE_EXTENTS_H
  2 +#define _BCACHE_EXTENTS_H
  3 +
  4 +extern const struct btree_keys_ops bch_btree_keys_ops;
  5 +extern const struct btree_keys_ops bch_extent_keys_ops;
  6 +
  7 +struct bkey;
  8 +struct cache_set;
  9 +
  10 +bool __bch_btree_ptr_invalid(struct cache_set *, const struct bkey *);
  11 +
  12 +#endif /* _BCACHE_EXTENTS_H */
drivers/md/bcache/super.c
... ... @@ -9,6 +9,7 @@
9 9 #include "bcache.h"
10 10 #include "btree.h"
11 11 #include "debug.h"
  12 +#include "extents.h"
12 13 #include "request.h"
13 14 #include "writeback.h"
14 15  
... ... @@ -399,7 +400,7 @@
399 400 {
400 401 struct bkey *k = &j->uuid_bucket;
401 402  
402   - if (bch_btree_ptr_invalid(c, k))
  403 + if (__bch_btree_ptr_invalid(c, k))
403 404 return "bad uuid pointer";
404 405  
405 406 bkey_copy(&c->uuid_bucket, k);
... ... @@ -1575,7 +1576,7 @@
1575 1576 k = &j->btree_root;
1576 1577  
1577 1578 err = "bad btree root";
1578   - if (bch_btree_ptr_invalid(c, k))
  1579 + if (__bch_btree_ptr_invalid(c, k))
1579 1580 goto err;
1580 1581  
1581 1582 err = "error reading btree root";