Commit 67f455486d2ea20b2d94d6adf5b9b783d079e321

Authored by NeilBrown
1 parent bd8839e03b

md/raid56: Don't perform reads to support writes until stripe is ready.

If it is found that we need to pre-read some blocks before a write
can succeed, we normally set STRIPE_DELAYED and don't actually perform
the read until STRIPE_PREREAD_ACTIVE subsequently gets set.

However for a degraded RAID6 we currently perform the reads as soon
as we see that a write is pending.  This significantly hurts
throughput.

So:
 - when handle_stripe_dirtying find a block that it wants on a device
   that is failed, set STRIPE_DELAY, instead of doing nothing, and
 - when fetch_block detects that a read might be required to satisfy a
   write, only perform the read if STRIPE_PREREAD_ACTIVE is set,
   and if we would actually need to read something to complete the write.

This also helps RAID5, though less often as RAID5 supports a
read-modify-write cycle.  For RAID5 the read is performed too early
only if the write is not a full 4K aligned write (i.e. no an
R5_OVERWRITE).

Also clean up a couple of horrible bits of formatting.

Reported-by: Patrik Horník <patrik@dsl.sk>
Signed-off-by: NeilBrown <neilb@suse.de>

Showing 1 changed file with 18 additions and 12 deletions Side-by-side Diff

... ... @@ -292,9 +292,12 @@
292 292 BUG_ON(atomic_read(&conf->active_stripes)==0);
293 293 if (test_bit(STRIPE_HANDLE, &sh->state)) {
294 294 if (test_bit(STRIPE_DELAYED, &sh->state) &&
295   - !test_bit(STRIPE_PREREAD_ACTIVE, &sh->state))
  295 + !test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) {
296 296 list_add_tail(&sh->lru, &conf->delayed_list);
297   - else if (test_bit(STRIPE_BIT_DELAY, &sh->state) &&
  297 + if (atomic_read(&conf->preread_active_stripes)
  298 + < IO_THRESHOLD)
  299 + md_wakeup_thread(conf->mddev->thread);
  300 + } else if (test_bit(STRIPE_BIT_DELAY, &sh->state) &&
298 301 sh->bm_seq - conf->seq_write > 0)
299 302 list_add_tail(&sh->lru, &conf->bitmap_list);
300 303 else {
301 304  
... ... @@ -2886,8 +2889,11 @@
2886 2889 (s->failed >= 1 && fdev[0]->toread) ||
2887 2890 (s->failed >= 2 && fdev[1]->toread) ||
2888 2891 (sh->raid_conf->level <= 5 && s->failed && fdev[0]->towrite &&
  2892 + (!test_bit(R5_Insync, &dev->flags) || test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) &&
2889 2893 !test_bit(R5_OVERWRITE, &fdev[0]->flags)) ||
2890   - (sh->raid_conf->level == 6 && s->failed && s->to_write))) {
  2894 + (sh->raid_conf->level == 6 && s->failed && s->to_write &&
  2895 + s->to_write < sh->raid_conf->raid_disks - 2 &&
  2896 + (!test_bit(R5_Insync, &dev->flags) || test_bit(STRIPE_PREREAD_ACTIVE, &sh->state))))) {
2891 2897 /* we would like to get this block, possibly by computing it,
2892 2898 * otherwise read it if the backing disk is insync
2893 2899 */
... ... @@ -3086,7 +3092,8 @@
3086 3092 !test_bit(R5_LOCKED, &dev->flags) &&
3087 3093 !(test_bit(R5_UPTODATE, &dev->flags) ||
3088 3094 test_bit(R5_Wantcompute, &dev->flags))) {
3089   - if (test_bit(R5_Insync, &dev->flags)) rcw++;
  3095 + if (test_bit(R5_Insync, &dev->flags))
  3096 + rcw++;
3090 3097 else
3091 3098 rcw += 2*disks;
3092 3099 }
... ... @@ -3107,10 +3114,10 @@
3107 3114 !(test_bit(R5_UPTODATE, &dev->flags) ||
3108 3115 test_bit(R5_Wantcompute, &dev->flags)) &&
3109 3116 test_bit(R5_Insync, &dev->flags)) {
3110   - if (
3111   - test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) {
3112   - pr_debug("Read_old block "
3113   - "%d for r-m-w\n", i);
  3117 + if (test_bit(STRIPE_PREREAD_ACTIVE,
  3118 + &sh->state)) {
  3119 + pr_debug("Read_old block %d for r-m-w\n",
  3120 + i);
3114 3121 set_bit(R5_LOCKED, &dev->flags);
3115 3122 set_bit(R5_Wantread, &dev->flags);
3116 3123 s->locked++;
... ... @@ -3133,10 +3140,9 @@
3133 3140 !(test_bit(R5_UPTODATE, &dev->flags) ||
3134 3141 test_bit(R5_Wantcompute, &dev->flags))) {
3135 3142 rcw++;
3136   - if (!test_bit(R5_Insync, &dev->flags))
3137   - continue; /* it's a failed drive */
3138   - if (
3139   - test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) {
  3143 + if (test_bit(R5_Insync, &dev->flags) &&
  3144 + test_bit(STRIPE_PREREAD_ACTIVE,
  3145 + &sh->state)) {
3140 3146 pr_debug("Read_old block "
3141 3147 "%d for Reconstruct\n", i);
3142 3148 set_bit(R5_LOCKED, &dev->flags);