Commit 72c270612bd33192fa836ad0f2939af1ca218292

Authored by Kent Overstreet
1 parent 279afbad4e

bcache: Write out full stripes

Now that we're tracking dirty data per stripe, we can add two
optimizations for raid5/6:

 * If a stripe is already dirty, force writes to that stripe to
   writeback mode - to help build up full stripes of dirty data

 * When flushing dirty data, preferentially write out full stripes first
   if there are any.

Signed-off-by: Kent Overstreet <koverstreet@google.com>

Showing 9 changed files with 121 additions and 37 deletions Side-by-side Diff

drivers/md/bcache/bcache.h
... ... @@ -387,8 +387,6 @@
387 387 typedef bool (keybuf_pred_fn)(struct keybuf *, struct bkey *);
388 388  
389 389 struct keybuf {
390   - keybuf_pred_fn *key_predicate;
391   -
392 390 struct bkey last_scanned;
393 391 spinlock_t lock;
394 392  
... ... @@ -532,6 +530,7 @@
532 530 unsigned sequential_merge:1;
533 531 unsigned verify:1;
534 532  
  533 + unsigned partial_stripes_expensive:1;
535 534 unsigned writeback_metadata:1;
536 535 unsigned writeback_running:1;
537 536 unsigned char writeback_percent;
drivers/md/bcache/btree.c
... ... @@ -2252,7 +2252,8 @@
2252 2252 }
2253 2253  
2254 2254 static int bch_btree_refill_keybuf(struct btree *b, struct btree_op *op,
2255   - struct keybuf *buf, struct bkey *end)
  2255 + struct keybuf *buf, struct bkey *end,
  2256 + keybuf_pred_fn *pred)
2256 2257 {
2257 2258 struct btree_iter iter;
2258 2259 bch_btree_iter_init(b, &iter, &buf->last_scanned);
... ... @@ -2271,7 +2272,7 @@
2271 2272 if (bkey_cmp(&buf->last_scanned, end) >= 0)
2272 2273 break;
2273 2274  
2274   - if (buf->key_predicate(buf, k)) {
  2275 + if (pred(buf, k)) {
2275 2276 struct keybuf_key *w;
2276 2277  
2277 2278 spin_lock(&buf->lock);
... ... @@ -2290,7 +2291,7 @@
2290 2291 if (!k)
2291 2292 break;
2292 2293  
2293   - btree(refill_keybuf, k, b, op, buf, end);
  2294 + btree(refill_keybuf, k, b, op, buf, end, pred);
2294 2295 /*
2295 2296 * Might get an error here, but can't really do anything
2296 2297 * and it'll get logged elsewhere. Just read what we
... ... @@ -2308,7 +2309,7 @@
2308 2309 }
2309 2310  
2310 2311 void bch_refill_keybuf(struct cache_set *c, struct keybuf *buf,
2311   - struct bkey *end)
  2312 + struct bkey *end, keybuf_pred_fn *pred)
2312 2313 {
2313 2314 struct bkey start = buf->last_scanned;
2314 2315 struct btree_op op;
... ... @@ -2316,7 +2317,7 @@
2316 2317  
2317 2318 cond_resched();
2318 2319  
2319   - btree_root(refill_keybuf, c, &op, buf, end);
  2320 + btree_root(refill_keybuf, c, &op, buf, end, pred);
2320 2321 closure_sync(&op.cl);
2321 2322  
2322 2323 pr_debug("found %s keys from %llu:%llu to %llu:%llu",
... ... @@ -2402,7 +2403,8 @@
2402 2403  
2403 2404 struct keybuf_key *bch_keybuf_next_rescan(struct cache_set *c,
2404 2405 struct keybuf *buf,
2405   - struct bkey *end)
  2406 + struct bkey *end,
  2407 + keybuf_pred_fn *pred)
2406 2408 {
2407 2409 struct keybuf_key *ret;
2408 2410  
2409 2411  
2410 2412  
... ... @@ -2416,15 +2418,14 @@
2416 2418 break;
2417 2419 }
2418 2420  
2419   - bch_refill_keybuf(c, buf, end);
  2421 + bch_refill_keybuf(c, buf, end, pred);
2420 2422 }
2421 2423  
2422 2424 return ret;
2423 2425 }
2424 2426  
2425   -void bch_keybuf_init(struct keybuf *buf, keybuf_pred_fn *fn)
  2427 +void bch_keybuf_init(struct keybuf *buf)
2426 2428 {
2427   - buf->key_predicate = fn;
2428 2429 buf->last_scanned = MAX_KEY;
2429 2430 buf->keys = RB_ROOT;
2430 2431  
drivers/md/bcache/btree.h
... ... @@ -391,14 +391,15 @@
391 391 int bch_btree_check(struct cache_set *, struct btree_op *);
392 392 uint8_t __bch_btree_mark_key(struct cache_set *, int, struct bkey *);
393 393  
394   -void bch_keybuf_init(struct keybuf *, keybuf_pred_fn *);
395   -void bch_refill_keybuf(struct cache_set *, struct keybuf *, struct bkey *);
  394 +void bch_keybuf_init(struct keybuf *);
  395 +void bch_refill_keybuf(struct cache_set *, struct keybuf *, struct bkey *,
  396 + keybuf_pred_fn *);
396 397 bool bch_keybuf_check_overlapping(struct keybuf *, struct bkey *,
397 398 struct bkey *);
398 399 void bch_keybuf_del(struct keybuf *, struct keybuf_key *);
399 400 struct keybuf_key *bch_keybuf_next(struct keybuf *);
400   -struct keybuf_key *bch_keybuf_next_rescan(struct cache_set *,
401   - struct keybuf *, struct bkey *);
  401 +struct keybuf_key *bch_keybuf_next_rescan(struct cache_set *, struct keybuf *,
  402 + struct bkey *, keybuf_pred_fn *);
402 403  
403 404 #endif
drivers/md/bcache/debug.c
... ... @@ -357,7 +357,7 @@
357 357 if (i->bytes)
358 358 break;
359 359  
360   - w = bch_keybuf_next_rescan(i->c, &i->keys, &MAX_KEY);
  360 + w = bch_keybuf_next_rescan(i->c, &i->keys, &MAX_KEY, dump_pred);
361 361 if (!w)
362 362 break;
363 363  
... ... @@ -380,7 +380,7 @@
380 380  
381 381 file->private_data = i;
382 382 i->c = c;
383   - bch_keybuf_init(&i->keys, dump_pred);
  383 + bch_keybuf_init(&i->keys);
384 384 i->keys.last_scanned = KEY(0, 0, 0);
385 385  
386 386 return 0;
drivers/md/bcache/movinggc.c
... ... @@ -136,7 +136,8 @@
136 136 /* XXX: if we error, background writeback could stall indefinitely */
137 137  
138 138 while (!test_bit(CACHE_SET_STOPPING, &c->flags)) {
139   - w = bch_keybuf_next_rescan(c, &c->moving_gc_keys, &MAX_KEY);
  139 + w = bch_keybuf_next_rescan(c, &c->moving_gc_keys,
  140 + &MAX_KEY, moving_pred);
140 141 if (!w)
141 142 break;
142 143  
... ... @@ -248,6 +249,6 @@
248 249  
249 250 void bch_moving_init_cache_set(struct cache_set *c)
250 251 {
251   - bch_keybuf_init(&c->moving_gc_keys, moving_pred);
  252 + bch_keybuf_init(&c->moving_gc_keys);
252 253 }
drivers/md/bcache/request.c
... ... @@ -22,8 +22,6 @@
22 22  
23 23 #define CUTOFF_CACHE_ADD 95
24 24 #define CUTOFF_CACHE_READA 90
25   -#define CUTOFF_WRITEBACK 50
26   -#define CUTOFF_WRITEBACK_SYNC 75
27 25  
28 26 struct kmem_cache *bch_search_cache;
29 27  
... ... @@ -998,17 +996,6 @@
998 996 cached_dev_bio_complete(cl);
999 997 }
1000 998  
1001   -static bool should_writeback(struct cached_dev *dc, struct bio *bio)
1002   -{
1003   - unsigned threshold = (bio->bi_rw & REQ_SYNC)
1004   - ? CUTOFF_WRITEBACK_SYNC
1005   - : CUTOFF_WRITEBACK;
1006   -
1007   - return !atomic_read(&dc->disk.detaching) &&
1008   - cache_mode(dc, bio) == CACHE_MODE_WRITEBACK &&
1009   - dc->disk.c->gc_stats.in_use < threshold;
1010   -}
1011   -
1012 999 static void request_write(struct cached_dev *dc, struct search *s)
1013 1000 {
1014 1001 struct closure *cl = &s->cl;
1015 1002  
... ... @@ -1030,11 +1017,15 @@
1030 1017 if (bio->bi_rw & REQ_DISCARD)
1031 1018 goto skip;
1032 1019  
  1020 + if (should_writeback(dc, s->orig_bio,
  1021 + cache_mode(dc, bio),
  1022 + s->op.skip)) {
  1023 + s->op.skip = false;
  1024 + s->writeback = true;
  1025 + }
  1026 +
1033 1027 if (s->op.skip)
1034 1028 goto skip;
1035   -
1036   - if (should_writeback(dc, s->orig_bio))
1037   - s->writeback = true;
1038 1029  
1039 1030 trace_bcache_write(s->orig_bio, s->writeback, s->op.skip);
1040 1031  
drivers/md/bcache/sysfs.c
... ... @@ -81,6 +81,9 @@
81 81 rw_attribute(writeback_rate_d_smooth);
82 82 read_attribute(writeback_rate_debug);
83 83  
  84 +read_attribute(stripe_size);
  85 +read_attribute(partial_stripes_expensive);
  86 +
84 87 rw_attribute(synchronous);
85 88 rw_attribute(journal_delay_ms);
86 89 rw_attribute(discard);
... ... @@ -147,6 +150,9 @@
147 150 sysfs_hprint(dirty_data,
148 151 bcache_dev_sectors_dirty(&dc->disk) << 9);
149 152  
  153 + sysfs_hprint(stripe_size, (1 << dc->disk.stripe_size_bits) << 9);
  154 + var_printf(partial_stripes_expensive, "%u");
  155 +
150 156 var_printf(sequential_merge, "%i");
151 157 var_hprint(sequential_cutoff);
152 158 var_hprint(readahead);
... ... @@ -286,6 +292,8 @@
286 292 &sysfs_writeback_rate_d_smooth,
287 293 &sysfs_writeback_rate_debug,
288 294 &sysfs_dirty_data,
  295 + &sysfs_stripe_size,
  296 + &sysfs_partial_stripes_expensive,
289 297 &sysfs_sequential_cutoff,
290 298 &sysfs_sequential_merge,
291 299 &sysfs_clear_stats,
drivers/md/bcache/writeback.c
... ... @@ -108,6 +108,31 @@
108 108 return KEY_DIRTY(k);
109 109 }
110 110  
  111 +static bool dirty_full_stripe_pred(struct keybuf *buf, struct bkey *k)
  112 +{
  113 + uint64_t stripe;
  114 + unsigned nr_sectors = KEY_SIZE(k);
  115 + struct cached_dev *dc = container_of(buf, struct cached_dev,
  116 + writeback_keys);
  117 + unsigned stripe_size = 1 << dc->disk.stripe_size_bits;
  118 +
  119 + if (!KEY_DIRTY(k))
  120 + return false;
  121 +
  122 + stripe = KEY_START(k) >> dc->disk.stripe_size_bits;
  123 + while (1) {
  124 + if (atomic_read(dc->disk.stripe_sectors_dirty + stripe) !=
  125 + stripe_size)
  126 + return false;
  127 +
  128 + if (nr_sectors <= stripe_size)
  129 + return true;
  130 +
  131 + nr_sectors -= stripe_size;
  132 + stripe++;
  133 + }
  134 +}
  135 +
111 136 static void dirty_init(struct keybuf_key *w)
112 137 {
113 138 struct dirty_io *io = w->private;
114 139  
... ... @@ -152,8 +177,23 @@
152 177 searched_from_start = true;
153 178 }
154 179  
155   - bch_refill_keybuf(dc->disk.c, buf, &end);
  180 + if (dc->partial_stripes_expensive) {
  181 + uint64_t i;
156 182  
  183 + for (i = 0; i < dc->disk.nr_stripes; i++)
  184 + if (atomic_read(dc->disk.stripe_sectors_dirty + i) ==
  185 + 1 << dc->disk.stripe_size_bits)
  186 + goto full_stripes;
  187 +
  188 + goto normal_refill;
  189 +full_stripes:
  190 + bch_refill_keybuf(dc->disk.c, buf, &end,
  191 + dirty_full_stripe_pred);
  192 + } else {
  193 +normal_refill:
  194 + bch_refill_keybuf(dc->disk.c, buf, &end, dirty_pred);
  195 + }
  196 +
157 197 if (bkey_cmp(&buf->last_scanned, &end) >= 0 && searched_from_start) {
158 198 /* Searched the entire btree - delay awhile */
159 199  
... ... @@ -446,7 +486,7 @@
446 486 closure_init_unlocked(&dc->writeback);
447 487 init_rwsem(&dc->writeback_lock);
448 488  
449   - bch_keybuf_init(&dc->writeback_keys, dirty_pred);
  489 + bch_keybuf_init(&dc->writeback_keys);
450 490  
451 491 dc->writeback_metadata = true;
452 492 dc->writeback_running = true;
drivers/md/bcache/writeback.h
1 1 #ifndef _BCACHE_WRITEBACK_H
2 2 #define _BCACHE_WRITEBACK_H
3 3  
  4 +#define CUTOFF_WRITEBACK 40
  5 +#define CUTOFF_WRITEBACK_SYNC 70
  6 +
4 7 static inline uint64_t bcache_dev_sectors_dirty(struct bcache_device *d)
5 8 {
6 9 uint64_t i, ret = 0;
... ... @@ -9,6 +12,46 @@
9 12 ret += atomic_read(d->stripe_sectors_dirty + i);
10 13  
11 14 return ret;
  15 +}
  16 +
  17 +static inline bool bcache_dev_stripe_dirty(struct bcache_device *d,
  18 + uint64_t offset,
  19 + unsigned nr_sectors)
  20 +{
  21 + uint64_t stripe = offset >> d->stripe_size_bits;
  22 +
  23 + while (1) {
  24 + if (atomic_read(d->stripe_sectors_dirty + stripe))
  25 + return true;
  26 +
  27 + if (nr_sectors <= 1 << d->stripe_size_bits)
  28 + return false;
  29 +
  30 + nr_sectors -= 1 << d->stripe_size_bits;
  31 + stripe++;
  32 + }
  33 +}
  34 +
  35 +static inline bool should_writeback(struct cached_dev *dc, struct bio *bio,
  36 + unsigned cache_mode, bool would_skip)
  37 +{
  38 + unsigned in_use = dc->disk.c->gc_stats.in_use;
  39 +
  40 + if (cache_mode != CACHE_MODE_WRITEBACK ||
  41 + atomic_read(&dc->disk.detaching) ||
  42 + in_use > CUTOFF_WRITEBACK_SYNC)
  43 + return false;
  44 +
  45 + if (dc->partial_stripes_expensive &&
  46 + bcache_dev_stripe_dirty(&dc->disk, bio->bi_sector,
  47 + bio_sectors(bio)))
  48 + return true;
  49 +
  50 + if (would_skip)
  51 + return false;
  52 +
  53 + return bio->bi_rw & REQ_SYNC ||
  54 + in_use <= CUTOFF_WRITEBACK;
12 55 }
13 56  
14 57 void bcache_dev_sectors_dirty_add(struct cache_set *, unsigned, uint64_t, int);