Commit 3b080b2564287be91605bfd1d5ee985696e61d3c

Authored by Wang Shilong
Committed by Chris Mason
1 parent 68bb462d42

Btrfs: scrub raid56 stripes in the right way

Steps to reproduce:
 # mkfs.btrfs -f /dev/sda[8-11] -m raid5 -d raid5
 # mount /dev/sda8 /mnt
 # btrfs scrub start -BR /mnt
 # echo $? <--unverified errors make return value be 3

This is because we don't setup right mapping between physical
and logical address for raid56, which makes checksum mismatch.
But we will find everthing is fine later when rechecking using
btrfs_map_block().

This patch fixed the problem by settuping right mappings and
we only verify data stripes' checksums.

Signed-off-by: Wang Shilong <wangsl.fnst@cn.fujitsu.com>
Signed-off-by: Chris Mason <clm@fb.com>

Showing 1 changed file with 89 additions and 19 deletions Side-by-side Diff

... ... @@ -2235,6 +2235,47 @@
2235 2235 return 0;
2236 2236 }
2237 2237  
  2238 +/*
  2239 + * Given a physical address, this will calculate it's
  2240 + * logical offset. if this is a parity stripe, it will return
  2241 + * the most left data stripe's logical offset.
  2242 + *
  2243 + * return 0 if it is a data stripe, 1 means parity stripe.
  2244 + */
  2245 +static int get_raid56_logic_offset(u64 physical, int num,
  2246 + struct map_lookup *map, u64 *offset)
  2247 +{
  2248 + int i;
  2249 + int j = 0;
  2250 + u64 stripe_nr;
  2251 + u64 last_offset;
  2252 + int stripe_index;
  2253 + int rot;
  2254 +
  2255 + last_offset = (physical - map->stripes[num].physical) *
  2256 + nr_data_stripes(map);
  2257 + *offset = last_offset;
  2258 + for (i = 0; i < nr_data_stripes(map); i++) {
  2259 + *offset = last_offset + i * map->stripe_len;
  2260 +
  2261 + stripe_nr = *offset;
  2262 + do_div(stripe_nr, map->stripe_len);
  2263 + do_div(stripe_nr, nr_data_stripes(map));
  2264 +
  2265 + /* Work out the disk rotation on this stripe-set */
  2266 + rot = do_div(stripe_nr, map->num_stripes);
  2267 + /* calculate which stripe this data locates */
  2268 + rot += i;
  2269 + stripe_index = do_div(rot, map->num_stripes);
  2270 + if (stripe_index == num)
  2271 + return 0;
  2272 + if (stripe_index < num)
  2273 + j++;
  2274 + }
  2275 + *offset = last_offset + j * map->stripe_len;
  2276 + return 1;
  2277 +}
  2278 +
2238 2279 static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
2239 2280 struct map_lookup *map,
2240 2281 struct btrfs_device *scrub_dev,
... ... @@ -2256,6 +2297,7 @@
2256 2297 u64 physical;
2257 2298 u64 logical;
2258 2299 u64 logic_end;
  2300 + u64 physical_end;
2259 2301 u64 generation;
2260 2302 int mirror_num;
2261 2303 struct reada_control *reada1;
2262 2304  
2263 2305  
... ... @@ -2269,16 +2311,10 @@
2269 2311 u64 extent_len;
2270 2312 struct btrfs_device *extent_dev;
2271 2313 int extent_mirror_num;
2272   - int stop_loop;
  2314 + int stop_loop = 0;
2273 2315  
2274   - if (map->type & (BTRFS_BLOCK_GROUP_RAID5 |
2275   - BTRFS_BLOCK_GROUP_RAID6)) {
2276   - if (num >= nr_data_stripes(map)) {
2277   - return 0;
2278   - }
2279   - }
2280   -
2281 2316 nstripes = length;
  2317 + physical = map->stripes[num].physical;
2282 2318 offset = 0;
2283 2319 do_div(nstripes, map->stripe_len);
2284 2320 if (map->type & BTRFS_BLOCK_GROUP_RAID0) {
... ... @@ -2296,6 +2332,11 @@
2296 2332 } else if (map->type & BTRFS_BLOCK_GROUP_DUP) {
2297 2333 increment = map->stripe_len;
2298 2334 mirror_num = num % map->num_stripes + 1;
  2335 + } else if (map->type & (BTRFS_BLOCK_GROUP_RAID5 |
  2336 + BTRFS_BLOCK_GROUP_RAID6)) {
  2337 + get_raid56_logic_offset(physical, num, map, &offset);
  2338 + increment = map->stripe_len * nr_data_stripes(map);
  2339 + mirror_num = 1;
2299 2340 } else {
2300 2341 increment = map->stripe_len;
2301 2342 mirror_num = 1;
... ... @@ -2319,7 +2360,15 @@
2319 2360 * to not hold off transaction commits
2320 2361 */
2321 2362 logical = base + offset;
2322   -
  2363 + physical_end = physical + nstripes * map->stripe_len;
  2364 + if (map->type & (BTRFS_BLOCK_GROUP_RAID5 |
  2365 + BTRFS_BLOCK_GROUP_RAID6)) {
  2366 + get_raid56_logic_offset(physical_end, num,
  2367 + map, &logic_end);
  2368 + logic_end += base;
  2369 + } else {
  2370 + logic_end = logical + increment * nstripes;
  2371 + }
2323 2372 wait_event(sctx->list_wait,
2324 2373 atomic_read(&sctx->bios_in_flight) == 0);
2325 2374 scrub_blocked_if_needed(fs_info);
... ... @@ -2328,7 +2377,7 @@
2328 2377 key_start.objectid = logical;
2329 2378 key_start.type = BTRFS_EXTENT_ITEM_KEY;
2330 2379 key_start.offset = (u64)0;
2331   - key_end.objectid = base + offset + nstripes * increment;
  2380 + key_end.objectid = logic_end;
2332 2381 key_end.type = BTRFS_METADATA_ITEM_KEY;
2333 2382 key_end.offset = (u64)-1;
2334 2383 reada1 = btrfs_reada_add(root, &key_start, &key_end);
... ... @@ -2338,7 +2387,7 @@
2338 2387 key_start.offset = logical;
2339 2388 key_end.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
2340 2389 key_end.type = BTRFS_EXTENT_CSUM_KEY;
2341   - key_end.offset = base + offset + nstripes * increment;
  2390 + key_end.offset = logic_end;
2342 2391 reada2 = btrfs_reada_add(csum_root, &key_start, &key_end);
2343 2392  
2344 2393 if (!IS_ERR(reada1))
2345 2394  
... ... @@ -2356,11 +2405,17 @@
2356 2405 /*
2357 2406 * now find all extents for each stripe and scrub them
2358 2407 */
2359   - logical = base + offset;
2360   - physical = map->stripes[num].physical;
2361   - logic_end = logical + increment * nstripes;
2362 2408 ret = 0;
2363   - while (logical < logic_end) {
  2409 + while (physical < physical_end) {
  2410 + /* for raid56, we skip parity stripe */
  2411 + if (map->type & (BTRFS_BLOCK_GROUP_RAID5 |
  2412 + BTRFS_BLOCK_GROUP_RAID6)) {
  2413 + ret = get_raid56_logic_offset(physical, num,
  2414 + map, &logical);
  2415 + logical += base;
  2416 + if (ret)
  2417 + goto skip;
  2418 + }
2364 2419 /*
2365 2420 * canceled?
2366 2421 */
2367 2422  
... ... @@ -2504,15 +2559,29 @@
2504 2559 scrub_free_csums(sctx);
2505 2560 if (extent_logical + extent_len <
2506 2561 key.objectid + bytes) {
2507   - logical += increment;
2508   - physical += map->stripe_len;
2509   -
  2562 + if (map->type & (BTRFS_BLOCK_GROUP_RAID5 |
  2563 + BTRFS_BLOCK_GROUP_RAID6)) {
  2564 + /*
  2565 + * loop until we find next data stripe
  2566 + * or we have finished all stripes.
  2567 + */
  2568 + do {
  2569 + physical += map->stripe_len;
  2570 + ret = get_raid56_logic_offset(
  2571 + physical, num,
  2572 + map, &logical);
  2573 + logical += base;
  2574 + } while (physical < physical_end && ret);
  2575 + } else {
  2576 + physical += map->stripe_len;
  2577 + logical += increment;
  2578 + }
2510 2579 if (logical < key.objectid + bytes) {
2511 2580 cond_resched();
2512 2581 goto again;
2513 2582 }
2514 2583  
2515   - if (logical >= logic_end) {
  2584 + if (physical >= physical_end) {
2516 2585 stop_loop = 1;
2517 2586 break;
2518 2587 }
... ... @@ -2521,6 +2590,7 @@
2521 2590 path->slots[0]++;
2522 2591 }
2523 2592 btrfs_release_path(path);
  2593 +skip:
2524 2594 logical += increment;
2525 2595 physical += map->stripe_len;
2526 2596 spin_lock(&sctx->stat_lock);