Commit 67f455486d2ea20b2d94d6adf5b9b783d079e321
md/raid56: Don't perform reads to support writes until stripe is ready.
If it is found that we need to pre-read some blocks before a write can succeed, we normally set STRIPE_DELAYED and don't actually perform the read until STRIPE_PREREAD_ACTIVE subsequently gets set. However for a degraded RAID6 we currently perform the reads as soon as we see that a write is pending. This significantly hurts throughput. So: - when handle_stripe_dirtying find a block that it wants on a device that is failed, set STRIPE_DELAY, instead of doing nothing, and - when fetch_block detects that a read might be required to satisfy a write, only perform the read if STRIPE_PREREAD_ACTIVE is set, and if we would actually need to read something to complete the write. This also helps RAID5, though less often as RAID5 supports a read-modify-write cycle. For RAID5 the read is performed too early only if the write is not a full 4K aligned write (i.e. no an R5_OVERWRITE). Also clean up a couple of horrible bits of formatting. Reported-by: Patrik Horník <patrik@dsl.sk> Signed-off-by: NeilBrown <neilb@suse.de>
Showing 1 changed file with 18 additions and 12 deletions Side-by-side Diff
... | ... | @@ -292,9 +292,12 @@ |
292 | 292 | BUG_ON(atomic_read(&conf->active_stripes)==0); |
293 | 293 | if (test_bit(STRIPE_HANDLE, &sh->state)) { |
294 | 294 | if (test_bit(STRIPE_DELAYED, &sh->state) && |
295 | - !test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) | |
295 | + !test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) { | |
296 | 296 | list_add_tail(&sh->lru, &conf->delayed_list); |
297 | - else if (test_bit(STRIPE_BIT_DELAY, &sh->state) && | |
297 | + if (atomic_read(&conf->preread_active_stripes) | |
298 | + < IO_THRESHOLD) | |
299 | + md_wakeup_thread(conf->mddev->thread); | |
300 | + } else if (test_bit(STRIPE_BIT_DELAY, &sh->state) && | |
298 | 301 | sh->bm_seq - conf->seq_write > 0) |
299 | 302 | list_add_tail(&sh->lru, &conf->bitmap_list); |
300 | 303 | else { |
301 | 304 | |
... | ... | @@ -2886,8 +2889,11 @@ |
2886 | 2889 | (s->failed >= 1 && fdev[0]->toread) || |
2887 | 2890 | (s->failed >= 2 && fdev[1]->toread) || |
2888 | 2891 | (sh->raid_conf->level <= 5 && s->failed && fdev[0]->towrite && |
2892 | + (!test_bit(R5_Insync, &dev->flags) || test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) && | |
2889 | 2893 | !test_bit(R5_OVERWRITE, &fdev[0]->flags)) || |
2890 | - (sh->raid_conf->level == 6 && s->failed && s->to_write))) { | |
2894 | + (sh->raid_conf->level == 6 && s->failed && s->to_write && | |
2895 | + s->to_write < sh->raid_conf->raid_disks - 2 && | |
2896 | + (!test_bit(R5_Insync, &dev->flags) || test_bit(STRIPE_PREREAD_ACTIVE, &sh->state))))) { | |
2891 | 2897 | /* we would like to get this block, possibly by computing it, |
2892 | 2898 | * otherwise read it if the backing disk is insync |
2893 | 2899 | */ |
... | ... | @@ -3086,7 +3092,8 @@ |
3086 | 3092 | !test_bit(R5_LOCKED, &dev->flags) && |
3087 | 3093 | !(test_bit(R5_UPTODATE, &dev->flags) || |
3088 | 3094 | test_bit(R5_Wantcompute, &dev->flags))) { |
3089 | - if (test_bit(R5_Insync, &dev->flags)) rcw++; | |
3095 | + if (test_bit(R5_Insync, &dev->flags)) | |
3096 | + rcw++; | |
3090 | 3097 | else |
3091 | 3098 | rcw += 2*disks; |
3092 | 3099 | } |
... | ... | @@ -3107,10 +3114,10 @@ |
3107 | 3114 | !(test_bit(R5_UPTODATE, &dev->flags) || |
3108 | 3115 | test_bit(R5_Wantcompute, &dev->flags)) && |
3109 | 3116 | test_bit(R5_Insync, &dev->flags)) { |
3110 | - if ( | |
3111 | - test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) { | |
3112 | - pr_debug("Read_old block " | |
3113 | - "%d for r-m-w\n", i); | |
3117 | + if (test_bit(STRIPE_PREREAD_ACTIVE, | |
3118 | + &sh->state)) { | |
3119 | + pr_debug("Read_old block %d for r-m-w\n", | |
3120 | + i); | |
3114 | 3121 | set_bit(R5_LOCKED, &dev->flags); |
3115 | 3122 | set_bit(R5_Wantread, &dev->flags); |
3116 | 3123 | s->locked++; |
... | ... | @@ -3133,10 +3140,9 @@ |
3133 | 3140 | !(test_bit(R5_UPTODATE, &dev->flags) || |
3134 | 3141 | test_bit(R5_Wantcompute, &dev->flags))) { |
3135 | 3142 | rcw++; |
3136 | - if (!test_bit(R5_Insync, &dev->flags)) | |
3137 | - continue; /* it's a failed drive */ | |
3138 | - if ( | |
3139 | - test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) { | |
3143 | + if (test_bit(R5_Insync, &dev->flags) && | |
3144 | + test_bit(STRIPE_PREREAD_ACTIVE, | |
3145 | + &sh->state)) { | |
3140 | 3146 | pr_debug("Read_old block " |
3141 | 3147 | "%d for Reconstruct\n", i); |
3142 | 3148 | set_bit(R5_LOCKED, &dev->flags); |
-
mentioned in commit a40687
-
mentioned in commit a40687
-
mentioned in commit b1b02f
-
mentioned in commit b1b02f
-
mentioned in commit b1b02f
-
mentioned in commit b1b02f
-
mentioned in commit ad3ab8
-
mentioned in commit ad3ab8
-
mentioned in commit a40687
-
mentioned in commit b1b02f
-
mentioned in commit b1b02f
-
mentioned in commit ad3ab8
-
mentioned in commit a40687
-
mentioned in commit b1b02f
-
mentioned in commit b1b02f
-
mentioned in commit ad3ab8
-
mentioned in commit a40687
-
mentioned in commit b1b02f
-
mentioned in commit b1b02f
-
mentioned in commit ad3ab8
-
mentioned in commit a40687
-
mentioned in commit b1b02f
-
mentioned in commit b1b02f
-
mentioned in commit ad3ab8
-
mentioned in commit a40687
-
mentioned in commit b1b02f
-
mentioned in commit b1b02f
-
mentioned in commit ad3ab8
-
mentioned in commit a40687
-
mentioned in commit b1b02f
-
mentioned in commit b1b02f
-
mentioned in commit ad3ab8
-
mentioned in commit a40687
-
mentioned in commit b1b02f
-
mentioned in commit b1b02f
-
mentioned in commit ad3ab8
-
mentioned in commit a40687
-
mentioned in commit b1b02f
-
mentioned in commit b1b02f
-
mentioned in commit ad3ab8
-
mentioned in commit a40687
-
mentioned in commit b1b02f
-
mentioned in commit b1b02f
-
mentioned in commit ad3ab8
-
mentioned in commit a40687
-
mentioned in commit a40687
-
mentioned in commit b1b02f
-
mentioned in commit b1b02f
-
mentioned in commit ad3ab8
-
mentioned in commit b1b02f
-
mentioned in commit b1b02f
-
mentioned in commit ad3ab8