Commit 65d45231b56efb3db51eb441e2c68f8252ecdd12
1 parent
ee811287c9
Exists in
master
and in
16 other branches
bcache: Abstract out stuff needed for sorting
Signed-off-by: Kent Overstreet <kmo@daterainc.com>
Showing 9 changed files with 423 additions and 289 deletions Side-by-side Diff
drivers/md/bcache/Makefile
1 | 1 | |
2 | 2 | obj-$(CONFIG_BCACHE) += bcache.o |
3 | 3 | |
4 | -bcache-y := alloc.o btree.o bset.o io.o journal.o writeback.o\ | |
5 | - movinggc.o request.o super.o sysfs.o debug.o util.o trace.o stats.o closure.o | |
4 | +bcache-y := alloc.o bset.o btree.o closure.o debug.o extents.o\ | |
5 | + io.o journal.o movinggc.o request.o stats.o super.o sysfs.o trace.o\ | |
6 | + util.o writeback.o | |
6 | 7 | |
7 | 8 | CFLAGS_request.o += -Iblock |
drivers/md/bcache/bset.c
... | ... | @@ -63,140 +63,6 @@ |
63 | 63 | bch_keylist_bytes(l)); |
64 | 64 | } |
65 | 65 | |
66 | -/* Pointer validation */ | |
67 | - | |
68 | -static bool __ptr_invalid(struct cache_set *c, const struct bkey *k) | |
69 | -{ | |
70 | - unsigned i; | |
71 | - | |
72 | - for (i = 0; i < KEY_PTRS(k); i++) | |
73 | - if (ptr_available(c, k, i)) { | |
74 | - struct cache *ca = PTR_CACHE(c, k, i); | |
75 | - size_t bucket = PTR_BUCKET_NR(c, k, i); | |
76 | - size_t r = bucket_remainder(c, PTR_OFFSET(k, i)); | |
77 | - | |
78 | - if (KEY_SIZE(k) + r > c->sb.bucket_size || | |
79 | - bucket < ca->sb.first_bucket || | |
80 | - bucket >= ca->sb.nbuckets) | |
81 | - return true; | |
82 | - } | |
83 | - | |
84 | - return false; | |
85 | -} | |
86 | - | |
87 | -bool bch_btree_ptr_invalid(struct cache_set *c, const struct bkey *k) | |
88 | -{ | |
89 | - char buf[80]; | |
90 | - | |
91 | - if (!KEY_PTRS(k) || !KEY_SIZE(k) || KEY_DIRTY(k)) | |
92 | - goto bad; | |
93 | - | |
94 | - if (__ptr_invalid(c, k)) | |
95 | - goto bad; | |
96 | - | |
97 | - return false; | |
98 | -bad: | |
99 | - bch_bkey_to_text(buf, sizeof(buf), k); | |
100 | - cache_bug(c, "spotted btree ptr %s: %s", buf, bch_ptr_status(c, k)); | |
101 | - return true; | |
102 | -} | |
103 | - | |
104 | -bool bch_extent_ptr_invalid(struct cache_set *c, const struct bkey *k) | |
105 | -{ | |
106 | - char buf[80]; | |
107 | - | |
108 | - if (!KEY_SIZE(k)) | |
109 | - return true; | |
110 | - | |
111 | - if (KEY_SIZE(k) > KEY_OFFSET(k)) | |
112 | - goto bad; | |
113 | - | |
114 | - if (__ptr_invalid(c, k)) | |
115 | - goto bad; | |
116 | - | |
117 | - return false; | |
118 | -bad: | |
119 | - bch_bkey_to_text(buf, sizeof(buf), k); | |
120 | - cache_bug(c, "spotted extent %s: %s", buf, bch_ptr_status(c, k)); | |
121 | - return true; | |
122 | -} | |
123 | - | |
124 | -static bool ptr_bad_expensive_checks(struct btree *b, const struct bkey *k, | |
125 | - unsigned ptr) | |
126 | -{ | |
127 | - struct bucket *g = PTR_BUCKET(b->c, k, ptr); | |
128 | - char buf[80]; | |
129 | - | |
130 | - if (mutex_trylock(&b->c->bucket_lock)) { | |
131 | - if (b->level) { | |
132 | - if (KEY_DIRTY(k) || | |
133 | - g->prio != BTREE_PRIO || | |
134 | - (b->c->gc_mark_valid && | |
135 | - GC_MARK(g) != GC_MARK_METADATA)) | |
136 | - goto err; | |
137 | - | |
138 | - } else { | |
139 | - if (g->prio == BTREE_PRIO) | |
140 | - goto err; | |
141 | - | |
142 | - if (KEY_DIRTY(k) && | |
143 | - b->c->gc_mark_valid && | |
144 | - GC_MARK(g) != GC_MARK_DIRTY) | |
145 | - goto err; | |
146 | - } | |
147 | - mutex_unlock(&b->c->bucket_lock); | |
148 | - } | |
149 | - | |
150 | - return false; | |
151 | -err: | |
152 | - mutex_unlock(&b->c->bucket_lock); | |
153 | - bch_bkey_to_text(buf, sizeof(buf), k); | |
154 | - btree_bug(b, | |
155 | -"inconsistent pointer %s: bucket %zu pin %i prio %i gen %i last_gc %i mark %llu gc_gen %i", | |
156 | - buf, PTR_BUCKET_NR(b->c, k, ptr), atomic_read(&g->pin), | |
157 | - g->prio, g->gen, g->last_gc, GC_MARK(g), g->gc_gen); | |
158 | - return true; | |
159 | -} | |
160 | - | |
161 | -bool bch_ptr_bad(struct btree *b, const struct bkey *k) | |
162 | -{ | |
163 | - struct bucket *g; | |
164 | - unsigned i, stale; | |
165 | - | |
166 | - if (!bkey_cmp(k, &ZERO_KEY) || | |
167 | - !KEY_PTRS(k) || | |
168 | - bch_ptr_invalid(b, k)) | |
169 | - return true; | |
170 | - | |
171 | - for (i = 0; i < KEY_PTRS(k); i++) | |
172 | - if (!ptr_available(b->c, k, i)) | |
173 | - return true; | |
174 | - | |
175 | - if (!expensive_debug_checks(b->c) && KEY_DIRTY(k)) | |
176 | - return false; | |
177 | - | |
178 | - for (i = 0; i < KEY_PTRS(k); i++) { | |
179 | - g = PTR_BUCKET(b->c, k, i); | |
180 | - stale = ptr_stale(b->c, k, i); | |
181 | - | |
182 | - btree_bug_on(stale > 96, b, | |
183 | - "key too stale: %i, need_gc %u", | |
184 | - stale, b->c->need_gc); | |
185 | - | |
186 | - btree_bug_on(stale && KEY_DIRTY(k) && KEY_SIZE(k), | |
187 | - b, "stale dirty pointer"); | |
188 | - | |
189 | - if (stale) | |
190 | - return true; | |
191 | - | |
192 | - if (expensive_debug_checks(b->c) && | |
193 | - ptr_bad_expensive_checks(b, k, i)) | |
194 | - return true; | |
195 | - } | |
196 | - | |
197 | - return false; | |
198 | -} | |
199 | - | |
200 | 66 | /* Key/pointer manipulation */ |
201 | 67 | |
202 | 68 | void bch_bkey_copy_single_ptr(struct bkey *dest, const struct bkey *src, |
... | ... | @@ -251,57 +117,6 @@ |
251 | 117 | return true; |
252 | 118 | } |
253 | 119 | |
254 | -static uint64_t merge_chksums(struct bkey *l, struct bkey *r) | |
255 | -{ | |
256 | - return (l->ptr[KEY_PTRS(l)] + r->ptr[KEY_PTRS(r)]) & | |
257 | - ~((uint64_t)1 << 63); | |
258 | -} | |
259 | - | |
260 | -/* Tries to merge l and r: l should be lower than r | |
261 | - * Returns true if we were able to merge. If we did merge, l will be the merged | |
262 | - * key, r will be untouched. | |
263 | - */ | |
264 | -bool bch_bkey_try_merge(struct btree *b, struct bkey *l, struct bkey *r) | |
265 | -{ | |
266 | - unsigned i; | |
267 | - | |
268 | - if (key_merging_disabled(b->c)) | |
269 | - return false; | |
270 | - | |
271 | - if (KEY_PTRS(l) != KEY_PTRS(r) || | |
272 | - KEY_DIRTY(l) != KEY_DIRTY(r) || | |
273 | - bkey_cmp(l, &START_KEY(r))) | |
274 | - return false; | |
275 | - | |
276 | - for (i = 0; i < KEY_PTRS(l); i++) | |
277 | - if (l->ptr[i] + PTR(0, KEY_SIZE(l), 0) != r->ptr[i] || | |
278 | - PTR_BUCKET_NR(b->c, l, i) != PTR_BUCKET_NR(b->c, r, i)) | |
279 | - return false; | |
280 | - | |
281 | - /* Keys with no pointers aren't restricted to one bucket and could | |
282 | - * overflow KEY_SIZE | |
283 | - */ | |
284 | - if (KEY_SIZE(l) + KEY_SIZE(r) > USHRT_MAX) { | |
285 | - SET_KEY_OFFSET(l, KEY_OFFSET(l) + USHRT_MAX - KEY_SIZE(l)); | |
286 | - SET_KEY_SIZE(l, USHRT_MAX); | |
287 | - | |
288 | - bch_cut_front(l, r); | |
289 | - return false; | |
290 | - } | |
291 | - | |
292 | - if (KEY_CSUM(l)) { | |
293 | - if (KEY_CSUM(r)) | |
294 | - l->ptr[KEY_PTRS(l)] = merge_chksums(l, r); | |
295 | - else | |
296 | - SET_KEY_CSUM(l, 0); | |
297 | - } | |
298 | - | |
299 | - SET_KEY_OFFSET(l, KEY_OFFSET(l) + KEY_SIZE(r)); | |
300 | - SET_KEY_SIZE(l, KEY_SIZE(l) + KEY_SIZE(r)); | |
301 | - | |
302 | - return true; | |
303 | -} | |
304 | - | |
305 | 120 | /* Auxiliary search trees */ |
306 | 121 | |
307 | 122 | /* 32 bits total: */ |
... | ... | @@ -1099,85 +914,6 @@ |
1099 | 914 | return 0; |
1100 | 915 | } |
1101 | 916 | |
1102 | -static void sort_key_next(struct btree_iter *iter, | |
1103 | - struct btree_iter_set *i) | |
1104 | -{ | |
1105 | - i->k = bkey_next(i->k); | |
1106 | - | |
1107 | - if (i->k == i->end) | |
1108 | - *i = iter->data[--iter->used]; | |
1109 | -} | |
1110 | - | |
1111 | -/* | |
1112 | - * Returns true if l > r - unless l == r, in which case returns true if l is | |
1113 | - * older than r. | |
1114 | - * | |
1115 | - * Necessary for btree_sort_fixup() - if there are multiple keys that compare | |
1116 | - * equal in different sets, we have to process them newest to oldest. | |
1117 | - */ | |
1118 | -static inline bool sort_extent_cmp(struct btree_iter_set l, | |
1119 | - struct btree_iter_set r) | |
1120 | -{ | |
1121 | - int64_t c = bkey_cmp(&START_KEY(l.k), &START_KEY(r.k)); | |
1122 | - | |
1123 | - return c ? c > 0 : l.k < r.k; | |
1124 | -} | |
1125 | - | |
1126 | -static inline bool sort_cmp(struct btree_iter_set l, | |
1127 | - struct btree_iter_set r) | |
1128 | -{ | |
1129 | - int64_t c = bkey_cmp(l.k, r.k); | |
1130 | - | |
1131 | - return c ? c > 0 : l.k < r.k; | |
1132 | -} | |
1133 | - | |
1134 | -static struct bkey *btree_sort_fixup_extents(struct btree_iter *iter, | |
1135 | - struct bkey *tmp) | |
1136 | -{ | |
1137 | - while (iter->used > 1) { | |
1138 | - struct btree_iter_set *top = iter->data, *i = top + 1; | |
1139 | - | |
1140 | - if (iter->used > 2 && | |
1141 | - sort_extent_cmp(i[0], i[1])) | |
1142 | - i++; | |
1143 | - | |
1144 | - if (bkey_cmp(top->k, &START_KEY(i->k)) <= 0) | |
1145 | - break; | |
1146 | - | |
1147 | - if (!KEY_SIZE(i->k)) { | |
1148 | - sort_key_next(iter, i); | |
1149 | - heap_sift(iter, i - top, sort_extent_cmp); | |
1150 | - continue; | |
1151 | - } | |
1152 | - | |
1153 | - if (top->k > i->k) { | |
1154 | - if (bkey_cmp(top->k, i->k) >= 0) | |
1155 | - sort_key_next(iter, i); | |
1156 | - else | |
1157 | - bch_cut_front(top->k, i->k); | |
1158 | - | |
1159 | - heap_sift(iter, i - top, sort_extent_cmp); | |
1160 | - } else { | |
1161 | - /* can't happen because of comparison func */ | |
1162 | - BUG_ON(!bkey_cmp(&START_KEY(top->k), &START_KEY(i->k))); | |
1163 | - | |
1164 | - if (bkey_cmp(i->k, top->k) < 0) { | |
1165 | - bkey_copy(tmp, top->k); | |
1166 | - | |
1167 | - bch_cut_back(&START_KEY(i->k), tmp); | |
1168 | - bch_cut_front(i->k, top->k); | |
1169 | - heap_sift(iter, 0, btree_iter_cmp); | |
1170 | - | |
1171 | - return tmp; | |
1172 | - } else { | |
1173 | - bch_cut_back(&START_KEY(i->k), top->k); | |
1174 | - } | |
1175 | - } | |
1176 | - } | |
1177 | - | |
1178 | - return NULL; | |
1179 | -} | |
1180 | - | |
1181 | 917 | static void btree_mergesort(struct btree *b, struct bset *out, |
1182 | 918 | struct btree_iter *iter, |
1183 | 919 | bool fixup, bool remove_stale) |
1184 | 920 | |
1185 | 921 | |
1186 | 922 | |
... | ... | @@ -1185,25 +921,22 @@ |
1185 | 921 | int i; |
1186 | 922 | struct bkey *k, *last = NULL; |
1187 | 923 | BKEY_PADDED(k) tmp; |
1188 | - btree_iter_cmp_fn *cmp = b->level | |
1189 | - ? sort_cmp | |
1190 | - : sort_extent_cmp; | |
1191 | 924 | bool (*bad)(struct btree *, const struct bkey *) = remove_stale |
1192 | 925 | ? bch_ptr_bad |
1193 | 926 | : bch_ptr_invalid; |
1194 | 927 | |
1195 | 928 | /* Heapify the iterator, using our comparison function */ |
1196 | 929 | for (i = iter->used / 2 - 1; i >= 0; --i) |
1197 | - heap_sift(iter, i, cmp); | |
930 | + heap_sift(iter, i, b->ops->sort_cmp); | |
1198 | 931 | |
1199 | 932 | while (!btree_iter_end(iter)) { |
1200 | - if (fixup && !b->level) | |
1201 | - k = btree_sort_fixup_extents(iter, &tmp.k); | |
933 | + if (b->ops->sort_fixup && fixup) | |
934 | + k = b->ops->sort_fixup(iter, &tmp.k); | |
1202 | 935 | else |
1203 | 936 | k = NULL; |
1204 | 937 | |
1205 | 938 | if (!k) |
1206 | - k = __bch_btree_iter_next(iter, cmp); | |
939 | + k = __bch_btree_iter_next(iter, b->ops->sort_cmp); | |
1207 | 940 | |
1208 | 941 | if (bad(b, k)) |
1209 | 942 | continue; |
... | ... | @@ -1211,8 +944,7 @@ |
1211 | 944 | if (!last) { |
1212 | 945 | last = out->start; |
1213 | 946 | bkey_copy(last, k); |
1214 | - } else if (b->level || | |
1215 | - !bch_bkey_try_merge(b, last, k)) { | |
947 | + } else if (!bch_bkey_try_merge(b, last, k)) { | |
1216 | 948 | last = bkey_next(last); |
1217 | 949 | bkey_copy(last, k); |
1218 | 950 | } |
... | ... | @@ -1300,6 +1032,7 @@ |
1300 | 1032 | |
1301 | 1033 | EBUG_ON(b->written && oldsize >= 0 && bch_count_data(b) != oldsize); |
1302 | 1034 | } |
1035 | +EXPORT_SYMBOL(bch_btree_sort_partial); | |
1303 | 1036 | |
1304 | 1037 | void bch_btree_sort_and_fix_extents(struct btree *b, struct btree_iter *iter, |
1305 | 1038 | struct bset_sort_state *state) |
drivers/md/bcache/bset.h
... | ... | @@ -376,14 +376,6 @@ |
376 | 376 | |
377 | 377 | struct cache_set; |
378 | 378 | const char *bch_ptr_status(struct cache_set *, const struct bkey *); |
379 | -bool bch_btree_ptr_invalid(struct cache_set *, const struct bkey *); | |
380 | -bool bch_extent_ptr_invalid(struct cache_set *, const struct bkey *); | |
381 | -bool bch_btree_ptr_bad(struct btree *, const struct bkey *); | |
382 | -bool bch_extent_ptr_bad(struct btree *, const struct bkey *); | |
383 | - | |
384 | -bool bch_ptr_bad(struct btree *, const struct bkey *); | |
385 | - | |
386 | -bool bch_bkey_try_merge(struct btree *, struct bkey *, struct bkey *); | |
387 | 379 | |
388 | 380 | int bch_bset_print_stats(struct cache_set *, char *); |
389 | 381 |
drivers/md/bcache/btree.c
... | ... | @@ -23,6 +23,7 @@ |
23 | 23 | #include "bcache.h" |
24 | 24 | #include "btree.h" |
25 | 25 | #include "debug.h" |
26 | +#include "extents.h" | |
26 | 27 | #include "writeback.h" |
27 | 28 | |
28 | 29 | #include <linux/slab.h> |
... | ... | @@ -930,6 +931,11 @@ |
930 | 931 | lock_set_subclass(&b->lock.dep_map, level + 1, _THIS_IP_); |
931 | 932 | b->level = level; |
932 | 933 | b->parent = (void *) ~0UL; |
934 | + | |
935 | + if (!b->level) | |
936 | + b->ops = &bch_extent_keys_ops; | |
937 | + else | |
938 | + b->ops = &bch_btree_keys_ops; | |
933 | 939 | |
934 | 940 | mca_reinit(b); |
935 | 941 |
drivers/md/bcache/btree.h
... | ... | @@ -113,7 +113,28 @@ |
113 | 113 | int prio_blocked; |
114 | 114 | }; |
115 | 115 | |
116 | +struct btree_keys_ops { | |
117 | + bool (*sort_cmp)(struct btree_iter_set, | |
118 | + struct btree_iter_set); | |
119 | + struct bkey *(*sort_fixup)(struct btree_iter *, | |
120 | + struct bkey *); | |
121 | + bool (*key_invalid)(struct btree *, | |
122 | + const struct bkey *); | |
123 | + bool (*key_bad)(struct btree *, | |
124 | + const struct bkey *); | |
125 | + bool (*key_merge)(struct btree *, | |
126 | + struct bkey *, struct bkey *); | |
127 | + | |
128 | + | |
129 | + /* | |
130 | + * Only used for deciding whether to use START_KEY(k) or just the key | |
131 | + * itself in a couple places | |
132 | + */ | |
133 | + bool is_extents; | |
134 | +}; | |
135 | + | |
116 | 136 | struct btree { |
137 | + const struct btree_keys_ops *ops; | |
117 | 138 | /* Hottest entries first */ |
118 | 139 | struct hlist_node hash; |
119 | 140 | |
... | ... | @@ -232,10 +253,23 @@ |
232 | 253 | |
233 | 254 | static inline bool bch_ptr_invalid(struct btree *b, const struct bkey *k) |
234 | 255 | { |
235 | - if (b->level) | |
236 | - return bch_btree_ptr_invalid(b->c, k); | |
237 | - else | |
238 | - return bch_extent_ptr_invalid(b->c, k); | |
256 | + return b->ops->key_invalid(b, k); | |
257 | +} | |
258 | + | |
259 | +static inline bool bch_ptr_bad(struct btree *b, const struct bkey *k) | |
260 | +{ | |
261 | + return b->ops->key_bad(b, k); | |
262 | +} | |
263 | + | |
264 | +/* | |
265 | + * Tries to merge l and r: l should be lower than r | |
266 | + * Returns true if we were able to merge. If we did merge, l will be the merged | |
267 | + * key, r will be untouched. | |
268 | + */ | |
269 | +static inline bool bch_bkey_try_merge(struct btree *b, | |
270 | + struct bkey *l, struct bkey *r) | |
271 | +{ | |
272 | + return b->ops->key_merge ? b->ops->key_merge(b, l, r) : false; | |
239 | 273 | } |
240 | 274 | |
241 | 275 | void bkey_put(struct cache_set *c, struct bkey *k); |
drivers/md/bcache/debug.c
drivers/md/bcache/extents.c
1 | +/* | |
2 | + * Copyright (C) 2010 Kent Overstreet <kent.overstreet@gmail.com> | |
3 | + * | |
4 | + * Uses a block device as cache for other block devices; optimized for SSDs. | |
5 | + * All allocation is done in buckets, which should match the erase block size | |
6 | + * of the device. | |
7 | + * | |
8 | + * Buckets containing cached data are kept on a heap sorted by priority; | |
9 | + * bucket priority is increased on cache hit, and periodically all the buckets | |
10 | + * on the heap have their priority scaled down. This currently is just used as | |
11 | + * an LRU but in the future should allow for more intelligent heuristics. | |
12 | + * | |
13 | + * Buckets have an 8 bit counter; freeing is accomplished by incrementing the | |
14 | + * counter. Garbage collection is used to remove stale pointers. | |
15 | + * | |
16 | + * Indexing is done via a btree; nodes are not necessarily fully sorted, rather | |
17 | + * as keys are inserted we only sort the pages that have not yet been written. | |
18 | + * When garbage collection is run, we resort the entire node. | |
19 | + * | |
20 | + * All configuration is done via sysfs; see Documentation/bcache.txt. | |
21 | + */ | |
22 | + | |
23 | +#include "bcache.h" | |
24 | +#include "btree.h" | |
25 | +#include "debug.h" | |
26 | +#include "extents.h" | |
27 | +#include "writeback.h" | |
28 | + | |
29 | +static void sort_key_next(struct btree_iter *iter, | |
30 | + struct btree_iter_set *i) | |
31 | +{ | |
32 | + i->k = bkey_next(i->k); | |
33 | + | |
34 | + if (i->k == i->end) | |
35 | + *i = iter->data[--iter->used]; | |
36 | +} | |
37 | + | |
38 | +static bool bch_key_sort_cmp(struct btree_iter_set l, | |
39 | + struct btree_iter_set r) | |
40 | +{ | |
41 | + int64_t c = bkey_cmp(l.k, r.k); | |
42 | + | |
43 | + return c ? c > 0 : l.k < r.k; | |
44 | +} | |
45 | + | |
46 | +static bool __ptr_invalid(struct cache_set *c, const struct bkey *k) | |
47 | +{ | |
48 | + unsigned i; | |
49 | + | |
50 | + for (i = 0; i < KEY_PTRS(k); i++) | |
51 | + if (ptr_available(c, k, i)) { | |
52 | + struct cache *ca = PTR_CACHE(c, k, i); | |
53 | + size_t bucket = PTR_BUCKET_NR(c, k, i); | |
54 | + size_t r = bucket_remainder(c, PTR_OFFSET(k, i)); | |
55 | + | |
56 | + if (KEY_SIZE(k) + r > c->sb.bucket_size || | |
57 | + bucket < ca->sb.first_bucket || | |
58 | + bucket >= ca->sb.nbuckets) | |
59 | + return true; | |
60 | + } | |
61 | + | |
62 | + return false; | |
63 | +} | |
64 | + | |
65 | +/* Btree ptrs */ | |
66 | + | |
67 | +bool __bch_btree_ptr_invalid(struct cache_set *c, const struct bkey *k) | |
68 | +{ | |
69 | + char buf[80]; | |
70 | + | |
71 | + if (!KEY_PTRS(k) || !KEY_SIZE(k) || KEY_DIRTY(k)) | |
72 | + goto bad; | |
73 | + | |
74 | + if (__ptr_invalid(c, k)) | |
75 | + goto bad; | |
76 | + | |
77 | + return false; | |
78 | +bad: | |
79 | + bch_bkey_to_text(buf, sizeof(buf), k); | |
80 | + cache_bug(c, "spotted btree ptr %s: %s", buf, bch_ptr_status(c, k)); | |
81 | + return true; | |
82 | +} | |
83 | + | |
84 | +static bool bch_btree_ptr_invalid(struct btree *b, const struct bkey *k) | |
85 | +{ | |
86 | + return __bch_btree_ptr_invalid(b->c, k); | |
87 | +} | |
88 | + | |
89 | +static bool btree_ptr_bad_expensive(struct btree *b, const struct bkey *k) | |
90 | +{ | |
91 | + unsigned i; | |
92 | + char buf[80]; | |
93 | + struct bucket *g; | |
94 | + | |
95 | + if (mutex_trylock(&b->c->bucket_lock)) { | |
96 | + for (i = 0; i < KEY_PTRS(k); i++) | |
97 | + if (ptr_available(b->c, k, i)) { | |
98 | + g = PTR_BUCKET(b->c, k, i); | |
99 | + | |
100 | + if (KEY_DIRTY(k) || | |
101 | + g->prio != BTREE_PRIO || | |
102 | + (b->c->gc_mark_valid && | |
103 | + GC_MARK(g) != GC_MARK_METADATA)) | |
104 | + goto err; | |
105 | + } | |
106 | + | |
107 | + mutex_unlock(&b->c->bucket_lock); | |
108 | + } | |
109 | + | |
110 | + return false; | |
111 | +err: | |
112 | + mutex_unlock(&b->c->bucket_lock); | |
113 | + bch_bkey_to_text(buf, sizeof(buf), k); | |
114 | + btree_bug(b, | |
115 | +"inconsistent btree pointer %s: bucket %li pin %i prio %i gen %i last_gc %i mark %llu gc_gen %i", | |
116 | + buf, PTR_BUCKET_NR(b->c, k, i), atomic_read(&g->pin), | |
117 | + g->prio, g->gen, g->last_gc, GC_MARK(g), g->gc_gen); | |
118 | + return true; | |
119 | +} | |
120 | + | |
121 | +static bool bch_btree_ptr_bad(struct btree *b, const struct bkey *k) | |
122 | +{ | |
123 | + unsigned i; | |
124 | + | |
125 | + if (!bkey_cmp(k, &ZERO_KEY) || | |
126 | + !KEY_PTRS(k) || | |
127 | + bch_ptr_invalid(b, k)) | |
128 | + return true; | |
129 | + | |
130 | + for (i = 0; i < KEY_PTRS(k); i++) | |
131 | + if (!ptr_available(b->c, k, i) || | |
132 | + ptr_stale(b->c, k, i)) | |
133 | + return true; | |
134 | + | |
135 | + if (expensive_debug_checks(b->c) && | |
136 | + btree_ptr_bad_expensive(b, k)) | |
137 | + return true; | |
138 | + | |
139 | + return false; | |
140 | +} | |
141 | + | |
142 | +const struct btree_keys_ops bch_btree_keys_ops = { | |
143 | + .sort_cmp = bch_key_sort_cmp, | |
144 | + .key_invalid = bch_btree_ptr_invalid, | |
145 | + .key_bad = bch_btree_ptr_bad, | |
146 | +}; | |
147 | + | |
148 | +/* Extents */ | |
149 | + | |
150 | +/* | |
151 | + * Returns true if l > r - unless l == r, in which case returns true if l is | |
152 | + * older than r. | |
153 | + * | |
154 | + * Necessary for btree_sort_fixup() - if there are multiple keys that compare | |
155 | + * equal in different sets, we have to process them newest to oldest. | |
156 | + */ | |
157 | +static bool bch_extent_sort_cmp(struct btree_iter_set l, | |
158 | + struct btree_iter_set r) | |
159 | +{ | |
160 | + int64_t c = bkey_cmp(&START_KEY(l.k), &START_KEY(r.k)); | |
161 | + | |
162 | + return c ? c > 0 : l.k < r.k; | |
163 | +} | |
164 | + | |
165 | +static struct bkey *bch_extent_sort_fixup(struct btree_iter *iter, | |
166 | + struct bkey *tmp) | |
167 | +{ | |
168 | + while (iter->used > 1) { | |
169 | + struct btree_iter_set *top = iter->data, *i = top + 1; | |
170 | + | |
171 | + if (iter->used > 2 && | |
172 | + bch_extent_sort_cmp(i[0], i[1])) | |
173 | + i++; | |
174 | + | |
175 | + if (bkey_cmp(top->k, &START_KEY(i->k)) <= 0) | |
176 | + break; | |
177 | + | |
178 | + if (!KEY_SIZE(i->k)) { | |
179 | + sort_key_next(iter, i); | |
180 | + heap_sift(iter, i - top, bch_extent_sort_cmp); | |
181 | + continue; | |
182 | + } | |
183 | + | |
184 | + if (top->k > i->k) { | |
185 | + if (bkey_cmp(top->k, i->k) >= 0) | |
186 | + sort_key_next(iter, i); | |
187 | + else | |
188 | + bch_cut_front(top->k, i->k); | |
189 | + | |
190 | + heap_sift(iter, i - top, bch_extent_sort_cmp); | |
191 | + } else { | |
192 | + /* can't happen because of comparison func */ | |
193 | + BUG_ON(!bkey_cmp(&START_KEY(top->k), &START_KEY(i->k))); | |
194 | + | |
195 | + if (bkey_cmp(i->k, top->k) < 0) { | |
196 | + bkey_copy(tmp, top->k); | |
197 | + | |
198 | + bch_cut_back(&START_KEY(i->k), tmp); | |
199 | + bch_cut_front(i->k, top->k); | |
200 | + heap_sift(iter, 0, bch_extent_sort_cmp); | |
201 | + | |
202 | + return tmp; | |
203 | + } else { | |
204 | + bch_cut_back(&START_KEY(i->k), top->k); | |
205 | + } | |
206 | + } | |
207 | + } | |
208 | + | |
209 | + return NULL; | |
210 | +} | |
211 | + | |
212 | +static bool bch_extent_invalid(struct btree *b, const struct bkey *k) | |
213 | +{ | |
214 | + char buf[80]; | |
215 | + | |
216 | + if (!KEY_SIZE(k)) | |
217 | + return true; | |
218 | + | |
219 | + if (KEY_SIZE(k) > KEY_OFFSET(k)) | |
220 | + goto bad; | |
221 | + | |
222 | + if (__ptr_invalid(b->c, k)) | |
223 | + goto bad; | |
224 | + | |
225 | + return false; | |
226 | +bad: | |
227 | + bch_bkey_to_text(buf, sizeof(buf), k); | |
228 | + cache_bug(b->c, "spotted extent %s: %s", buf, bch_ptr_status(b->c, k)); | |
229 | + return true; | |
230 | +} | |
231 | + | |
232 | +static bool bch_extent_bad_expensive(struct btree *b, const struct bkey *k, | |
233 | + unsigned ptr) | |
234 | +{ | |
235 | + struct bucket *g = PTR_BUCKET(b->c, k, ptr); | |
236 | + char buf[80]; | |
237 | + | |
238 | + if (mutex_trylock(&b->c->bucket_lock)) { | |
239 | + if (b->c->gc_mark_valid && | |
240 | + ((GC_MARK(g) != GC_MARK_DIRTY && | |
241 | + KEY_DIRTY(k)) || | |
242 | + GC_MARK(g) == GC_MARK_METADATA)) | |
243 | + goto err; | |
244 | + | |
245 | + if (g->prio == BTREE_PRIO) | |
246 | + goto err; | |
247 | + | |
248 | + mutex_unlock(&b->c->bucket_lock); | |
249 | + } | |
250 | + | |
251 | + return false; | |
252 | +err: | |
253 | + mutex_unlock(&b->c->bucket_lock); | |
254 | + bch_bkey_to_text(buf, sizeof(buf), k); | |
255 | + btree_bug(b, | |
256 | +"inconsistent extent pointer %s:\nbucket %zu pin %i prio %i gen %i last_gc %i mark %llu gc_gen %i", | |
257 | + buf, PTR_BUCKET_NR(b->c, k, ptr), atomic_read(&g->pin), | |
258 | + g->prio, g->gen, g->last_gc, GC_MARK(g), g->gc_gen); | |
259 | + return true; | |
260 | +} | |
261 | + | |
262 | +static bool bch_extent_bad(struct btree *b, const struct bkey *k) | |
263 | +{ | |
264 | + struct bucket *g; | |
265 | + unsigned i, stale; | |
266 | + | |
267 | + if (!KEY_PTRS(k) || | |
268 | + bch_extent_invalid(b, k)) | |
269 | + return true; | |
270 | + | |
271 | + for (i = 0; i < KEY_PTRS(k); i++) | |
272 | + if (!ptr_available(b->c, k, i)) | |
273 | + return true; | |
274 | + | |
275 | + if (!expensive_debug_checks(b->c) && KEY_DIRTY(k)) | |
276 | + return false; | |
277 | + | |
278 | + for (i = 0; i < KEY_PTRS(k); i++) { | |
279 | + g = PTR_BUCKET(b->c, k, i); | |
280 | + stale = ptr_stale(b->c, k, i); | |
281 | + | |
282 | + btree_bug_on(stale > 96, b, | |
283 | + "key too stale: %i, need_gc %u", | |
284 | + stale, b->c->need_gc); | |
285 | + | |
286 | + btree_bug_on(stale && KEY_DIRTY(k) && KEY_SIZE(k), | |
287 | + b, "stale dirty pointer"); | |
288 | + | |
289 | + if (stale) | |
290 | + return true; | |
291 | + | |
292 | + if (expensive_debug_checks(b->c) && | |
293 | + bch_extent_bad_expensive(b, k, i)) | |
294 | + return true; | |
295 | + } | |
296 | + | |
297 | + return false; | |
298 | +} | |
299 | + | |
300 | +static uint64_t merge_chksums(struct bkey *l, struct bkey *r) | |
301 | +{ | |
302 | + return (l->ptr[KEY_PTRS(l)] + r->ptr[KEY_PTRS(r)]) & | |
303 | + ~((uint64_t)1 << 63); | |
304 | +} | |
305 | + | |
306 | +static bool bch_extent_merge(struct btree *b, struct bkey *l, struct bkey *r) | |
307 | +{ | |
308 | + unsigned i; | |
309 | + | |
310 | + if (key_merging_disabled(b->c)) | |
311 | + return false; | |
312 | + | |
313 | + if (KEY_PTRS(l) != KEY_PTRS(r) || | |
314 | + KEY_DIRTY(l) != KEY_DIRTY(r) || | |
315 | + bkey_cmp(l, &START_KEY(r))) | |
316 | + return false; | |
317 | + | |
318 | + for (i = 0; i < KEY_PTRS(l); i++) | |
319 | + if (l->ptr[i] + PTR(0, KEY_SIZE(l), 0) != r->ptr[i] || | |
320 | + PTR_BUCKET_NR(b->c, l, i) != PTR_BUCKET_NR(b->c, r, i)) | |
321 | + return false; | |
322 | + | |
323 | + /* Keys with no pointers aren't restricted to one bucket and could | |
324 | + * overflow KEY_SIZE | |
325 | + */ | |
326 | + if (KEY_SIZE(l) + KEY_SIZE(r) > USHRT_MAX) { | |
327 | + SET_KEY_OFFSET(l, KEY_OFFSET(l) + USHRT_MAX - KEY_SIZE(l)); | |
328 | + SET_KEY_SIZE(l, USHRT_MAX); | |
329 | + | |
330 | + bch_cut_front(l, r); | |
331 | + return false; | |
332 | + } | |
333 | + | |
334 | + if (KEY_CSUM(l)) { | |
335 | + if (KEY_CSUM(r)) | |
336 | + l->ptr[KEY_PTRS(l)] = merge_chksums(l, r); | |
337 | + else | |
338 | + SET_KEY_CSUM(l, 0); | |
339 | + } | |
340 | + | |
341 | + SET_KEY_OFFSET(l, KEY_OFFSET(l) + KEY_SIZE(r)); | |
342 | + SET_KEY_SIZE(l, KEY_SIZE(l) + KEY_SIZE(r)); | |
343 | + | |
344 | + return true; | |
345 | +} | |
346 | + | |
347 | +const struct btree_keys_ops bch_extent_keys_ops = { | |
348 | + .sort_cmp = bch_extent_sort_cmp, | |
349 | + .sort_fixup = bch_extent_sort_fixup, | |
350 | + .key_invalid = bch_extent_invalid, | |
351 | + .key_bad = bch_extent_bad, | |
352 | + .key_merge = bch_extent_merge, | |
353 | + .is_extents = true, | |
354 | +}; |
drivers/md/bcache/extents.h
1 | +#ifndef _BCACHE_EXTENTS_H | |
2 | +#define _BCACHE_EXTENTS_H | |
3 | + | |
4 | +extern const struct btree_keys_ops bch_btree_keys_ops; | |
5 | +extern const struct btree_keys_ops bch_extent_keys_ops; | |
6 | + | |
7 | +struct bkey; | |
8 | +struct cache_set; | |
9 | + | |
10 | +bool __bch_btree_ptr_invalid(struct cache_set *, const struct bkey *); | |
11 | + | |
12 | +#endif /* _BCACHE_EXTENTS_H */ |
drivers/md/bcache/super.c
... | ... | @@ -9,6 +9,7 @@ |
9 | 9 | #include "bcache.h" |
10 | 10 | #include "btree.h" |
11 | 11 | #include "debug.h" |
12 | +#include "extents.h" | |
12 | 13 | #include "request.h" |
13 | 14 | #include "writeback.h" |
14 | 15 | |
... | ... | @@ -399,7 +400,7 @@ |
399 | 400 | { |
400 | 401 | struct bkey *k = &j->uuid_bucket; |
401 | 402 | |
402 | - if (bch_btree_ptr_invalid(c, k)) | |
403 | + if (__bch_btree_ptr_invalid(c, k)) | |
403 | 404 | return "bad uuid pointer"; |
404 | 405 | |
405 | 406 | bkey_copy(&c->uuid_bucket, k); |
... | ... | @@ -1575,7 +1576,7 @@ |
1575 | 1576 | k = &j->btree_root; |
1576 | 1577 | |
1577 | 1578 | err = "bad btree root"; |
1578 | - if (bch_btree_ptr_invalid(c, k)) | |
1579 | + if (__bch_btree_ptr_invalid(c, k)) | |
1579 | 1580 | goto err; |
1580 | 1581 | |
1581 | 1582 | err = "error reading btree root"; |