Commit 72c270612bd33192fa836ad0f2939af1ca218292
1 parent
279afbad4e
Exists in
smarc-imx_3.14.28_1.0.0_ga
and in
1 other branch
bcache: Write out full stripes
Now that we're tracking dirty data per stripe, we can add two optimizations for raid5/6: * If a stripe is already dirty, force writes to that stripe to writeback mode - to help build up full stripes of dirty data * When flushing dirty data, preferentially write out full stripes first if there are any. Signed-off-by: Kent Overstreet <koverstreet@google.com>
Showing 9 changed files with 121 additions and 37 deletions Side-by-side Diff
drivers/md/bcache/bcache.h
... | ... | @@ -387,8 +387,6 @@ |
387 | 387 | typedef bool (keybuf_pred_fn)(struct keybuf *, struct bkey *); |
388 | 388 | |
389 | 389 | struct keybuf { |
390 | - keybuf_pred_fn *key_predicate; | |
391 | - | |
392 | 390 | struct bkey last_scanned; |
393 | 391 | spinlock_t lock; |
394 | 392 | |
... | ... | @@ -532,6 +530,7 @@ |
532 | 530 | unsigned sequential_merge:1; |
533 | 531 | unsigned verify:1; |
534 | 532 | |
533 | + unsigned partial_stripes_expensive:1; | |
535 | 534 | unsigned writeback_metadata:1; |
536 | 535 | unsigned writeback_running:1; |
537 | 536 | unsigned char writeback_percent; |
drivers/md/bcache/btree.c
... | ... | @@ -2252,7 +2252,8 @@ |
2252 | 2252 | } |
2253 | 2253 | |
2254 | 2254 | static int bch_btree_refill_keybuf(struct btree *b, struct btree_op *op, |
2255 | - struct keybuf *buf, struct bkey *end) | |
2255 | + struct keybuf *buf, struct bkey *end, | |
2256 | + keybuf_pred_fn *pred) | |
2256 | 2257 | { |
2257 | 2258 | struct btree_iter iter; |
2258 | 2259 | bch_btree_iter_init(b, &iter, &buf->last_scanned); |
... | ... | @@ -2271,7 +2272,7 @@ |
2271 | 2272 | if (bkey_cmp(&buf->last_scanned, end) >= 0) |
2272 | 2273 | break; |
2273 | 2274 | |
2274 | - if (buf->key_predicate(buf, k)) { | |
2275 | + if (pred(buf, k)) { | |
2275 | 2276 | struct keybuf_key *w; |
2276 | 2277 | |
2277 | 2278 | spin_lock(&buf->lock); |
... | ... | @@ -2290,7 +2291,7 @@ |
2290 | 2291 | if (!k) |
2291 | 2292 | break; |
2292 | 2293 | |
2293 | - btree(refill_keybuf, k, b, op, buf, end); | |
2294 | + btree(refill_keybuf, k, b, op, buf, end, pred); | |
2294 | 2295 | /* |
2295 | 2296 | * Might get an error here, but can't really do anything |
2296 | 2297 | * and it'll get logged elsewhere. Just read what we |
... | ... | @@ -2308,7 +2309,7 @@ |
2308 | 2309 | } |
2309 | 2310 | |
2310 | 2311 | void bch_refill_keybuf(struct cache_set *c, struct keybuf *buf, |
2311 | - struct bkey *end) | |
2312 | + struct bkey *end, keybuf_pred_fn *pred) | |
2312 | 2313 | { |
2313 | 2314 | struct bkey start = buf->last_scanned; |
2314 | 2315 | struct btree_op op; |
... | ... | @@ -2316,7 +2317,7 @@ |
2316 | 2317 | |
2317 | 2318 | cond_resched(); |
2318 | 2319 | |
2319 | - btree_root(refill_keybuf, c, &op, buf, end); | |
2320 | + btree_root(refill_keybuf, c, &op, buf, end, pred); | |
2320 | 2321 | closure_sync(&op.cl); |
2321 | 2322 | |
2322 | 2323 | pr_debug("found %s keys from %llu:%llu to %llu:%llu", |
... | ... | @@ -2402,7 +2403,8 @@ |
2402 | 2403 | |
2403 | 2404 | struct keybuf_key *bch_keybuf_next_rescan(struct cache_set *c, |
2404 | 2405 | struct keybuf *buf, |
2405 | - struct bkey *end) | |
2406 | + struct bkey *end, | |
2407 | + keybuf_pred_fn *pred) | |
2406 | 2408 | { |
2407 | 2409 | struct keybuf_key *ret; |
2408 | 2410 | |
2409 | 2411 | |
2410 | 2412 | |
... | ... | @@ -2416,15 +2418,14 @@ |
2416 | 2418 | break; |
2417 | 2419 | } |
2418 | 2420 | |
2419 | - bch_refill_keybuf(c, buf, end); | |
2421 | + bch_refill_keybuf(c, buf, end, pred); | |
2420 | 2422 | } |
2421 | 2423 | |
2422 | 2424 | return ret; |
2423 | 2425 | } |
2424 | 2426 | |
2425 | -void bch_keybuf_init(struct keybuf *buf, keybuf_pred_fn *fn) | |
2427 | +void bch_keybuf_init(struct keybuf *buf) | |
2426 | 2428 | { |
2427 | - buf->key_predicate = fn; | |
2428 | 2429 | buf->last_scanned = MAX_KEY; |
2429 | 2430 | buf->keys = RB_ROOT; |
2430 | 2431 |
drivers/md/bcache/btree.h
... | ... | @@ -391,14 +391,15 @@ |
391 | 391 | int bch_btree_check(struct cache_set *, struct btree_op *); |
392 | 392 | uint8_t __bch_btree_mark_key(struct cache_set *, int, struct bkey *); |
393 | 393 | |
394 | -void bch_keybuf_init(struct keybuf *, keybuf_pred_fn *); | |
395 | -void bch_refill_keybuf(struct cache_set *, struct keybuf *, struct bkey *); | |
394 | +void bch_keybuf_init(struct keybuf *); | |
395 | +void bch_refill_keybuf(struct cache_set *, struct keybuf *, struct bkey *, | |
396 | + keybuf_pred_fn *); | |
396 | 397 | bool bch_keybuf_check_overlapping(struct keybuf *, struct bkey *, |
397 | 398 | struct bkey *); |
398 | 399 | void bch_keybuf_del(struct keybuf *, struct keybuf_key *); |
399 | 400 | struct keybuf_key *bch_keybuf_next(struct keybuf *); |
400 | -struct keybuf_key *bch_keybuf_next_rescan(struct cache_set *, | |
401 | - struct keybuf *, struct bkey *); | |
401 | +struct keybuf_key *bch_keybuf_next_rescan(struct cache_set *, struct keybuf *, | |
402 | + struct bkey *, keybuf_pred_fn *); | |
402 | 403 | |
403 | 404 | #endif |
drivers/md/bcache/debug.c
... | ... | @@ -357,7 +357,7 @@ |
357 | 357 | if (i->bytes) |
358 | 358 | break; |
359 | 359 | |
360 | - w = bch_keybuf_next_rescan(i->c, &i->keys, &MAX_KEY); | |
360 | + w = bch_keybuf_next_rescan(i->c, &i->keys, &MAX_KEY, dump_pred); | |
361 | 361 | if (!w) |
362 | 362 | break; |
363 | 363 | |
... | ... | @@ -380,7 +380,7 @@ |
380 | 380 | |
381 | 381 | file->private_data = i; |
382 | 382 | i->c = c; |
383 | - bch_keybuf_init(&i->keys, dump_pred); | |
383 | + bch_keybuf_init(&i->keys); | |
384 | 384 | i->keys.last_scanned = KEY(0, 0, 0); |
385 | 385 | |
386 | 386 | return 0; |
drivers/md/bcache/movinggc.c
... | ... | @@ -136,7 +136,8 @@ |
136 | 136 | /* XXX: if we error, background writeback could stall indefinitely */ |
137 | 137 | |
138 | 138 | while (!test_bit(CACHE_SET_STOPPING, &c->flags)) { |
139 | - w = bch_keybuf_next_rescan(c, &c->moving_gc_keys, &MAX_KEY); | |
139 | + w = bch_keybuf_next_rescan(c, &c->moving_gc_keys, | |
140 | + &MAX_KEY, moving_pred); | |
140 | 141 | if (!w) |
141 | 142 | break; |
142 | 143 | |
... | ... | @@ -248,6 +249,6 @@ |
248 | 249 | |
249 | 250 | void bch_moving_init_cache_set(struct cache_set *c) |
250 | 251 | { |
251 | - bch_keybuf_init(&c->moving_gc_keys, moving_pred); | |
252 | + bch_keybuf_init(&c->moving_gc_keys); | |
252 | 253 | } |
drivers/md/bcache/request.c
... | ... | @@ -22,8 +22,6 @@ |
22 | 22 | |
23 | 23 | #define CUTOFF_CACHE_ADD 95 |
24 | 24 | #define CUTOFF_CACHE_READA 90 |
25 | -#define CUTOFF_WRITEBACK 50 | |
26 | -#define CUTOFF_WRITEBACK_SYNC 75 | |
27 | 25 | |
28 | 26 | struct kmem_cache *bch_search_cache; |
29 | 27 | |
... | ... | @@ -998,17 +996,6 @@ |
998 | 996 | cached_dev_bio_complete(cl); |
999 | 997 | } |
1000 | 998 | |
1001 | -static bool should_writeback(struct cached_dev *dc, struct bio *bio) | |
1002 | -{ | |
1003 | - unsigned threshold = (bio->bi_rw & REQ_SYNC) | |
1004 | - ? CUTOFF_WRITEBACK_SYNC | |
1005 | - : CUTOFF_WRITEBACK; | |
1006 | - | |
1007 | - return !atomic_read(&dc->disk.detaching) && | |
1008 | - cache_mode(dc, bio) == CACHE_MODE_WRITEBACK && | |
1009 | - dc->disk.c->gc_stats.in_use < threshold; | |
1010 | -} | |
1011 | - | |
1012 | 999 | static void request_write(struct cached_dev *dc, struct search *s) |
1013 | 1000 | { |
1014 | 1001 | struct closure *cl = &s->cl; |
1015 | 1002 | |
... | ... | @@ -1030,11 +1017,15 @@ |
1030 | 1017 | if (bio->bi_rw & REQ_DISCARD) |
1031 | 1018 | goto skip; |
1032 | 1019 | |
1020 | + if (should_writeback(dc, s->orig_bio, | |
1021 | + cache_mode(dc, bio), | |
1022 | + s->op.skip)) { | |
1023 | + s->op.skip = false; | |
1024 | + s->writeback = true; | |
1025 | + } | |
1026 | + | |
1033 | 1027 | if (s->op.skip) |
1034 | 1028 | goto skip; |
1035 | - | |
1036 | - if (should_writeback(dc, s->orig_bio)) | |
1037 | - s->writeback = true; | |
1038 | 1029 | |
1039 | 1030 | trace_bcache_write(s->orig_bio, s->writeback, s->op.skip); |
1040 | 1031 |
drivers/md/bcache/sysfs.c
... | ... | @@ -81,6 +81,9 @@ |
81 | 81 | rw_attribute(writeback_rate_d_smooth); |
82 | 82 | read_attribute(writeback_rate_debug); |
83 | 83 | |
84 | +read_attribute(stripe_size); | |
85 | +read_attribute(partial_stripes_expensive); | |
86 | + | |
84 | 87 | rw_attribute(synchronous); |
85 | 88 | rw_attribute(journal_delay_ms); |
86 | 89 | rw_attribute(discard); |
... | ... | @@ -147,6 +150,9 @@ |
147 | 150 | sysfs_hprint(dirty_data, |
148 | 151 | bcache_dev_sectors_dirty(&dc->disk) << 9); |
149 | 152 | |
153 | + sysfs_hprint(stripe_size, (1 << dc->disk.stripe_size_bits) << 9); | |
154 | + var_printf(partial_stripes_expensive, "%u"); | |
155 | + | |
150 | 156 | var_printf(sequential_merge, "%i"); |
151 | 157 | var_hprint(sequential_cutoff); |
152 | 158 | var_hprint(readahead); |
... | ... | @@ -286,6 +292,8 @@ |
286 | 292 | &sysfs_writeback_rate_d_smooth, |
287 | 293 | &sysfs_writeback_rate_debug, |
288 | 294 | &sysfs_dirty_data, |
295 | + &sysfs_stripe_size, | |
296 | + &sysfs_partial_stripes_expensive, | |
289 | 297 | &sysfs_sequential_cutoff, |
290 | 298 | &sysfs_sequential_merge, |
291 | 299 | &sysfs_clear_stats, |
drivers/md/bcache/writeback.c
... | ... | @@ -108,6 +108,31 @@ |
108 | 108 | return KEY_DIRTY(k); |
109 | 109 | } |
110 | 110 | |
111 | +static bool dirty_full_stripe_pred(struct keybuf *buf, struct bkey *k) | |
112 | +{ | |
113 | + uint64_t stripe; | |
114 | + unsigned nr_sectors = KEY_SIZE(k); | |
115 | + struct cached_dev *dc = container_of(buf, struct cached_dev, | |
116 | + writeback_keys); | |
117 | + unsigned stripe_size = 1 << dc->disk.stripe_size_bits; | |
118 | + | |
119 | + if (!KEY_DIRTY(k)) | |
120 | + return false; | |
121 | + | |
122 | + stripe = KEY_START(k) >> dc->disk.stripe_size_bits; | |
123 | + while (1) { | |
124 | + if (atomic_read(dc->disk.stripe_sectors_dirty + stripe) != | |
125 | + stripe_size) | |
126 | + return false; | |
127 | + | |
128 | + if (nr_sectors <= stripe_size) | |
129 | + return true; | |
130 | + | |
131 | + nr_sectors -= stripe_size; | |
132 | + stripe++; | |
133 | + } | |
134 | +} | |
135 | + | |
111 | 136 | static void dirty_init(struct keybuf_key *w) |
112 | 137 | { |
113 | 138 | struct dirty_io *io = w->private; |
114 | 139 | |
... | ... | @@ -152,8 +177,23 @@ |
152 | 177 | searched_from_start = true; |
153 | 178 | } |
154 | 179 | |
155 | - bch_refill_keybuf(dc->disk.c, buf, &end); | |
180 | + if (dc->partial_stripes_expensive) { | |
181 | + uint64_t i; | |
156 | 182 | |
183 | + for (i = 0; i < dc->disk.nr_stripes; i++) | |
184 | + if (atomic_read(dc->disk.stripe_sectors_dirty + i) == | |
185 | + 1 << dc->disk.stripe_size_bits) | |
186 | + goto full_stripes; | |
187 | + | |
188 | + goto normal_refill; | |
189 | +full_stripes: | |
190 | + bch_refill_keybuf(dc->disk.c, buf, &end, | |
191 | + dirty_full_stripe_pred); | |
192 | + } else { | |
193 | +normal_refill: | |
194 | + bch_refill_keybuf(dc->disk.c, buf, &end, dirty_pred); | |
195 | + } | |
196 | + | |
157 | 197 | if (bkey_cmp(&buf->last_scanned, &end) >= 0 && searched_from_start) { |
158 | 198 | /* Searched the entire btree - delay awhile */ |
159 | 199 | |
... | ... | @@ -446,7 +486,7 @@ |
446 | 486 | closure_init_unlocked(&dc->writeback); |
447 | 487 | init_rwsem(&dc->writeback_lock); |
448 | 488 | |
449 | - bch_keybuf_init(&dc->writeback_keys, dirty_pred); | |
489 | + bch_keybuf_init(&dc->writeback_keys); | |
450 | 490 | |
451 | 491 | dc->writeback_metadata = true; |
452 | 492 | dc->writeback_running = true; |
drivers/md/bcache/writeback.h
1 | 1 | #ifndef _BCACHE_WRITEBACK_H |
2 | 2 | #define _BCACHE_WRITEBACK_H |
3 | 3 | |
4 | +#define CUTOFF_WRITEBACK 40 | |
5 | +#define CUTOFF_WRITEBACK_SYNC 70 | |
6 | + | |
4 | 7 | static inline uint64_t bcache_dev_sectors_dirty(struct bcache_device *d) |
5 | 8 | { |
6 | 9 | uint64_t i, ret = 0; |
... | ... | @@ -9,6 +12,46 @@ |
9 | 12 | ret += atomic_read(d->stripe_sectors_dirty + i); |
10 | 13 | |
11 | 14 | return ret; |
15 | +} | |
16 | + | |
17 | +static inline bool bcache_dev_stripe_dirty(struct bcache_device *d, | |
18 | + uint64_t offset, | |
19 | + unsigned nr_sectors) | |
20 | +{ | |
21 | + uint64_t stripe = offset >> d->stripe_size_bits; | |
22 | + | |
23 | + while (1) { | |
24 | + if (atomic_read(d->stripe_sectors_dirty + stripe)) | |
25 | + return true; | |
26 | + | |
27 | + if (nr_sectors <= 1 << d->stripe_size_bits) | |
28 | + return false; | |
29 | + | |
30 | + nr_sectors -= 1 << d->stripe_size_bits; | |
31 | + stripe++; | |
32 | + } | |
33 | +} | |
34 | + | |
35 | +static inline bool should_writeback(struct cached_dev *dc, struct bio *bio, | |
36 | + unsigned cache_mode, bool would_skip) | |
37 | +{ | |
38 | + unsigned in_use = dc->disk.c->gc_stats.in_use; | |
39 | + | |
40 | + if (cache_mode != CACHE_MODE_WRITEBACK || | |
41 | + atomic_read(&dc->disk.detaching) || | |
42 | + in_use > CUTOFF_WRITEBACK_SYNC) | |
43 | + return false; | |
44 | + | |
45 | + if (dc->partial_stripes_expensive && | |
46 | + bcache_dev_stripe_dirty(&dc->disk, bio->bi_sector, | |
47 | + bio_sectors(bio))) | |
48 | + return true; | |
49 | + | |
50 | + if (would_skip) | |
51 | + return false; | |
52 | + | |
53 | + return bio->bi_rw & REQ_SYNC || | |
54 | + in_use <= CUTOFF_WRITEBACK; | |
12 | 55 | } |
13 | 56 | |
14 | 57 | void bcache_dev_sectors_dirty_add(struct cache_set *, unsigned, uint64_t, int); |