Commit 22c3f2fff68abf1ccf88e1a72f61bfede7b91da0

Authored by Linus Torvalds

Merge tag 'md-3.9-fixes' of git://neil.brown.name/md

Pull md fixes from NeilBrown:
 "A few bugfixes for md

   - recent regressions in raid5
   - recent regressions in dmraid
   - a few instances of CONFIG_MULTICORE_RAID456 linger

  Several tagged for -stable"

* tag 'md-3.9-fixes' of git://neil.brown.name/md:
  md: remove CONFIG_MULTICORE_RAID456 entirely
  md/raid5: ensure sync and DISCARD don't happen at the same time.
  MD: Prevent sysfs operations on uninitialized kobjects
  MD RAID5: Avoid accessing gendisk or queue structs when not available
  md/raid5: schedule_construction should abort if nothing to do.

Showing 6 changed files Side-by-side Diff

arch/tile/configs/tilegx_defconfig
... ... @@ -330,7 +330,6 @@
330 330 CONFIG_MD_RAID1=m
331 331 CONFIG_MD_RAID10=m
332 332 CONFIG_MD_RAID456=m
333   -CONFIG_MULTICORE_RAID456=y
334 333 CONFIG_MD_FAULTY=m
335 334 CONFIG_BLK_DEV_DM=m
336 335 CONFIG_DM_DEBUG=y
arch/tile/configs/tilepro_defconfig
... ... @@ -324,7 +324,6 @@
324 324 CONFIG_MD_RAID1=m
325 325 CONFIG_MD_RAID10=m
326 326 CONFIG_MD_RAID456=m
327   -CONFIG_MULTICORE_RAID456=y
328 327 CONFIG_MD_FAULTY=m
329 328 CONFIG_BLK_DEV_DM=m
330 329 CONFIG_DM_DEBUG=y
... ... @@ -7663,10 +7663,8 @@
7663 7663 removed++;
7664 7664 }
7665 7665 }
7666   - if (removed)
7667   - sysfs_notify(&mddev->kobj, NULL,
7668   - "degraded");
7669   -
  7666 + if (removed && mddev->kobj.sd)
  7667 + sysfs_notify(&mddev->kobj, NULL, "degraded");
7670 7668  
7671 7669 rdev_for_each(rdev, mddev) {
7672 7670 if (rdev->raid_disk >= 0 &&
... ... @@ -506,7 +506,7 @@
506 506 static inline int sysfs_link_rdev(struct mddev *mddev, struct md_rdev *rdev)
507 507 {
508 508 char nm[20];
509   - if (!test_bit(Replacement, &rdev->flags)) {
  509 + if (!test_bit(Replacement, &rdev->flags) && mddev->kobj.sd) {
510 510 sprintf(nm, "rd%d", rdev->raid_disk);
511 511 return sysfs_create_link(&mddev->kobj, &rdev->kobj, nm);
512 512 } else
... ... @@ -516,7 +516,7 @@
516 516 static inline void sysfs_unlink_rdev(struct mddev *mddev, struct md_rdev *rdev)
517 517 {
518 518 char nm[20];
519   - if (!test_bit(Replacement, &rdev->flags)) {
  519 + if (!test_bit(Replacement, &rdev->flags) && mddev->kobj.sd) {
520 520 sprintf(nm, "rd%d", rdev->raid_disk);
521 521 sysfs_remove_link(&mddev->kobj, nm);
522 522 }
... ... @@ -671,9 +671,11 @@
671 671 bi->bi_next = NULL;
672 672 if (rrdev)
673 673 set_bit(R5_DOUBLE_LOCKED, &sh->dev[i].flags);
674   - trace_block_bio_remap(bdev_get_queue(bi->bi_bdev),
675   - bi, disk_devt(conf->mddev->gendisk),
676   - sh->dev[i].sector);
  674 +
  675 + if (conf->mddev->gendisk)
  676 + trace_block_bio_remap(bdev_get_queue(bi->bi_bdev),
  677 + bi, disk_devt(conf->mddev->gendisk),
  678 + sh->dev[i].sector);
677 679 generic_make_request(bi);
678 680 }
679 681 if (rrdev) {
... ... @@ -701,9 +703,10 @@
701 703 rbi->bi_io_vec[0].bv_offset = 0;
702 704 rbi->bi_size = STRIPE_SIZE;
703 705 rbi->bi_next = NULL;
704   - trace_block_bio_remap(bdev_get_queue(rbi->bi_bdev),
705   - rbi, disk_devt(conf->mddev->gendisk),
706   - sh->dev[i].sector);
  706 + if (conf->mddev->gendisk)
  707 + trace_block_bio_remap(bdev_get_queue(rbi->bi_bdev),
  708 + rbi, disk_devt(conf->mddev->gendisk),
  709 + sh->dev[i].sector);
707 710 generic_make_request(rbi);
708 711 }
709 712 if (!rdev && !rrdev) {
710 713  
... ... @@ -2280,18 +2283,7 @@
2280 2283 int level = conf->level;
2281 2284  
2282 2285 if (rcw) {
2283   - /* if we are not expanding this is a proper write request, and
2284   - * there will be bios with new data to be drained into the
2285   - * stripe cache
2286   - */
2287   - if (!expand) {
2288   - sh->reconstruct_state = reconstruct_state_drain_run;
2289   - set_bit(STRIPE_OP_BIODRAIN, &s->ops_request);
2290   - } else
2291   - sh->reconstruct_state = reconstruct_state_run;
2292 2286  
2293   - set_bit(STRIPE_OP_RECONSTRUCT, &s->ops_request);
2294   -
2295 2287 for (i = disks; i--; ) {
2296 2288 struct r5dev *dev = &sh->dev[i];
2297 2289  
... ... @@ -2303,6 +2295,21 @@
2303 2295 s->locked++;
2304 2296 }
2305 2297 }
  2298 + /* if we are not expanding this is a proper write request, and
  2299 + * there will be bios with new data to be drained into the
  2300 + * stripe cache
  2301 + */
  2302 + if (!expand) {
  2303 + if (!s->locked)
  2304 + /* False alarm, nothing to do */
  2305 + return;
  2306 + sh->reconstruct_state = reconstruct_state_drain_run;
  2307 + set_bit(STRIPE_OP_BIODRAIN, &s->ops_request);
  2308 + } else
  2309 + sh->reconstruct_state = reconstruct_state_run;
  2310 +
  2311 + set_bit(STRIPE_OP_RECONSTRUCT, &s->ops_request);
  2312 +
2306 2313 if (s->locked + conf->max_degraded == disks)
2307 2314 if (!test_and_set_bit(STRIPE_FULL_WRITE, &sh->state))
2308 2315 atomic_inc(&conf->pending_full_writes);
... ... @@ -2311,11 +2318,6 @@
2311 2318 BUG_ON(!(test_bit(R5_UPTODATE, &sh->dev[pd_idx].flags) ||
2312 2319 test_bit(R5_Wantcompute, &sh->dev[pd_idx].flags)));
2313 2320  
2314   - sh->reconstruct_state = reconstruct_state_prexor_drain_run;
2315   - set_bit(STRIPE_OP_PREXOR, &s->ops_request);
2316   - set_bit(STRIPE_OP_BIODRAIN, &s->ops_request);
2317   - set_bit(STRIPE_OP_RECONSTRUCT, &s->ops_request);
2318   -
2319 2321 for (i = disks; i--; ) {
2320 2322 struct r5dev *dev = &sh->dev[i];
2321 2323 if (i == pd_idx)
... ... @@ -2330,6 +2332,13 @@
2330 2332 s->locked++;
2331 2333 }
2332 2334 }
  2335 + if (!s->locked)
  2336 + /* False alarm - nothing to do */
  2337 + return;
  2338 + sh->reconstruct_state = reconstruct_state_prexor_drain_run;
  2339 + set_bit(STRIPE_OP_PREXOR, &s->ops_request);
  2340 + set_bit(STRIPE_OP_BIODRAIN, &s->ops_request);
  2341 + set_bit(STRIPE_OP_RECONSTRUCT, &s->ops_request);
2333 2342 }
2334 2343  
2335 2344 /* keep the parity disk(s) locked while asynchronous operations
... ... @@ -2564,6 +2573,8 @@
2564 2573 int i;
2565 2574  
2566 2575 clear_bit(STRIPE_SYNCING, &sh->state);
  2576 + if (test_and_clear_bit(R5_Overlap, &sh->dev[sh->pd_idx].flags))
  2577 + wake_up(&conf->wait_for_overlap);
2567 2578 s->syncing = 0;
2568 2579 s->replacing = 0;
2569 2580 /* There is nothing more to do for sync/check/repair.
... ... @@ -2737,6 +2748,7 @@
2737 2748 {
2738 2749 int i;
2739 2750 struct r5dev *dev;
  2751 + int discard_pending = 0;
2740 2752  
2741 2753 for (i = disks; i--; )
2742 2754 if (sh->dev[i].written) {
2743 2755  
... ... @@ -2765,10 +2777,24 @@
2765 2777 STRIPE_SECTORS,
2766 2778 !test_bit(STRIPE_DEGRADED, &sh->state),
2767 2779 0);
2768   - }
2769   - } else if (test_bit(R5_Discard, &sh->dev[i].flags))
2770   - clear_bit(R5_Discard, &sh->dev[i].flags);
  2780 + } else if (test_bit(R5_Discard, &dev->flags))
  2781 + discard_pending = 1;
  2782 + }
  2783 + if (!discard_pending &&
  2784 + test_bit(R5_Discard, &sh->dev[sh->pd_idx].flags)) {
  2785 + clear_bit(R5_Discard, &sh->dev[sh->pd_idx].flags);
  2786 + clear_bit(R5_UPTODATE, &sh->dev[sh->pd_idx].flags);
  2787 + if (sh->qd_idx >= 0) {
  2788 + clear_bit(R5_Discard, &sh->dev[sh->qd_idx].flags);
  2789 + clear_bit(R5_UPTODATE, &sh->dev[sh->qd_idx].flags);
  2790 + }
  2791 + /* now that discard is done we can proceed with any sync */
  2792 + clear_bit(STRIPE_DISCARD, &sh->state);
  2793 + if (test_bit(STRIPE_SYNC_REQUESTED, &sh->state))
  2794 + set_bit(STRIPE_HANDLE, &sh->state);
2771 2795  
  2796 + }
  2797 +
2772 2798 if (test_and_clear_bit(STRIPE_FULL_WRITE, &sh->state))
2773 2799 if (atomic_dec_and_test(&conf->pending_full_writes))
2774 2800 md_wakeup_thread(conf->mddev->thread);
... ... @@ -2826,8 +2852,10 @@
2826 2852 set_bit(STRIPE_HANDLE, &sh->state);
2827 2853 if (rmw < rcw && rmw > 0) {
2828 2854 /* prefer read-modify-write, but need to get some data */
2829   - blk_add_trace_msg(conf->mddev->queue, "raid5 rmw %llu %d",
2830   - (unsigned long long)sh->sector, rmw);
  2855 + if (conf->mddev->queue)
  2856 + blk_add_trace_msg(conf->mddev->queue,
  2857 + "raid5 rmw %llu %d",
  2858 + (unsigned long long)sh->sector, rmw);
2831 2859 for (i = disks; i--; ) {
2832 2860 struct r5dev *dev = &sh->dev[i];
2833 2861 if ((dev->towrite || i == sh->pd_idx) &&
... ... @@ -2877,7 +2905,7 @@
2877 2905 }
2878 2906 }
2879 2907 }
2880   - if (rcw)
  2908 + if (rcw && conf->mddev->queue)
2881 2909 blk_add_trace_msg(conf->mddev->queue, "raid5 rcw %llu %d %d %d",
2882 2910 (unsigned long long)sh->sector,
2883 2911 rcw, qread, test_bit(STRIPE_DELAYED, &sh->state));
... ... @@ -3417,9 +3445,15 @@
3417 3445 return;
3418 3446 }
3419 3447  
3420   - if (test_and_clear_bit(STRIPE_SYNC_REQUESTED, &sh->state)) {
3421   - set_bit(STRIPE_SYNCING, &sh->state);
3422   - clear_bit(STRIPE_INSYNC, &sh->state);
  3448 + if (test_bit(STRIPE_SYNC_REQUESTED, &sh->state)) {
  3449 + spin_lock(&sh->stripe_lock);
  3450 + /* Cannot process 'sync' concurrently with 'discard' */
  3451 + if (!test_bit(STRIPE_DISCARD, &sh->state) &&
  3452 + test_and_clear_bit(STRIPE_SYNC_REQUESTED, &sh->state)) {
  3453 + set_bit(STRIPE_SYNCING, &sh->state);
  3454 + clear_bit(STRIPE_INSYNC, &sh->state);
  3455 + }
  3456 + spin_unlock(&sh->stripe_lock);
3423 3457 }
3424 3458 clear_bit(STRIPE_DELAYED, &sh->state);
3425 3459  
... ... @@ -3579,6 +3613,8 @@
3579 3613 test_bit(STRIPE_INSYNC, &sh->state)) {
3580 3614 md_done_sync(conf->mddev, STRIPE_SECTORS, 1);
3581 3615 clear_bit(STRIPE_SYNCING, &sh->state);
  3616 + if (test_and_clear_bit(R5_Overlap, &sh->dev[sh->pd_idx].flags))
  3617 + wake_up(&conf->wait_for_overlap);
3582 3618 }
3583 3619  
3584 3620 /* If the failed drives are just a ReadError, then we might need
... ... @@ -3982,9 +4018,10 @@
3982 4018 atomic_inc(&conf->active_aligned_reads);
3983 4019 spin_unlock_irq(&conf->device_lock);
3984 4020  
3985   - trace_block_bio_remap(bdev_get_queue(align_bi->bi_bdev),
3986   - align_bi, disk_devt(mddev->gendisk),
3987   - raid_bio->bi_sector);
  4021 + if (mddev->gendisk)
  4022 + trace_block_bio_remap(bdev_get_queue(align_bi->bi_bdev),
  4023 + align_bi, disk_devt(mddev->gendisk),
  4024 + raid_bio->bi_sector);
3988 4025 generic_make_request(align_bi);
3989 4026 return 1;
3990 4027 } else {
... ... @@ -4078,7 +4115,8 @@
4078 4115 }
4079 4116 spin_unlock_irq(&conf->device_lock);
4080 4117 }
4081   - trace_block_unplug(mddev->queue, cnt, !from_schedule);
  4118 + if (mddev->queue)
  4119 + trace_block_unplug(mddev->queue, cnt, !from_schedule);
4082 4120 kfree(cb);
4083 4121 }
4084 4122  
... ... @@ -4141,6 +4179,13 @@
4141 4179 sh = get_active_stripe(conf, logical_sector, 0, 0, 0);
4142 4180 prepare_to_wait(&conf->wait_for_overlap, &w,
4143 4181 TASK_UNINTERRUPTIBLE);
  4182 + set_bit(R5_Overlap, &sh->dev[sh->pd_idx].flags);
  4183 + if (test_bit(STRIPE_SYNCING, &sh->state)) {
  4184 + release_stripe(sh);
  4185 + schedule();
  4186 + goto again;
  4187 + }
  4188 + clear_bit(R5_Overlap, &sh->dev[sh->pd_idx].flags);
4144 4189 spin_lock_irq(&sh->stripe_lock);
4145 4190 for (d = 0; d < conf->raid_disks; d++) {
4146 4191 if (d == sh->pd_idx || d == sh->qd_idx)
... ... @@ -4153,6 +4198,7 @@
4153 4198 goto again;
4154 4199 }
4155 4200 }
  4201 + set_bit(STRIPE_DISCARD, &sh->state);
4156 4202 finish_wait(&conf->wait_for_overlap, &w);
4157 4203 for (d = 0; d < conf->raid_disks; d++) {
4158 4204 if (d == sh->pd_idx || d == sh->qd_idx)
... ... @@ -221,10 +221,6 @@
221 221 struct stripe_operations {
222 222 int target, target2;
223 223 enum sum_check_flags zero_sum_result;
224   - #ifdef CONFIG_MULTICORE_RAID456
225   - unsigned long request;
226   - wait_queue_head_t wait_for_ops;
227   - #endif
228 224 } ops;
229 225 struct r5dev {
230 226 /* rreq and rvec are used for the replacement device when
... ... @@ -323,6 +319,7 @@
323 319 STRIPE_COMPUTE_RUN,
324 320 STRIPE_OPS_REQ_PENDING,
325 321 STRIPE_ON_UNPLUG_LIST,
  322 + STRIPE_DISCARD,
326 323 };
327 324  
328 325 /*