Commit 5e5e3e78ed9038b8f7112835d07084eefb9daa47

Authored by NeilBrown
1 parent e4424fee18

md: Fix handling of raid5 array which is being reshaped to fewer devices.

When a raid5 (or raid6) array is being reshaped to have fewer devices,
conf->raid_disks is the latter and hence smaller number of devices.
However sometimes we want to use a number which is the total number of
currently required devices - the larger of the 'old' and 'new' sizes.
Before we implemented reducing the number of devices, this was always
'new' i.e. ->raid_disks.
Now we need max(raid_disks, previous_raid_disks) in those places.

This particularly affects assembling an array that was shutdown while
in the middle of a reshape to fewer devices.

md.c needs a similar fix when interpreting the md metadata.

Signed-off-by: NeilBrown <neilb@suse.de>

Showing 2 changed files with 19 additions and 20 deletions Side-by-side Diff

... ... @@ -2631,7 +2631,7 @@
2631 2631 rdev->desc_nr = i++;
2632 2632 rdev->raid_disk = rdev->desc_nr;
2633 2633 set_bit(In_sync, &rdev->flags);
2634   - } else if (rdev->raid_disk >= mddev->raid_disks) {
  2634 + } else if (rdev->raid_disk >= (mddev->raid_disks - min(0, mddev->delta_disks))) {
2635 2635 rdev->raid_disk = -1;
2636 2636 clear_bit(In_sync, &rdev->flags);
2637 2637 }
... ... @@ -1238,22 +1238,22 @@
1238 1238 static int grow_one_stripe(raid5_conf_t *conf)
1239 1239 {
1240 1240 struct stripe_head *sh;
  1241 + int disks = max(conf->raid_disks, conf->previous_raid_disks);
1241 1242 sh = kmem_cache_alloc(conf->slab_cache, GFP_KERNEL);
1242 1243 if (!sh)
1243 1244 return 0;
1244   - memset(sh, 0, sizeof(*sh) + (conf->raid_disks-1)*sizeof(struct r5dev));
  1245 + memset(sh, 0, sizeof(*sh) + (disks-1)*sizeof(struct r5dev));
1245 1246 sh->raid_conf = conf;
1246 1247 spin_lock_init(&sh->lock);
1247 1248 #ifdef CONFIG_MULTICORE_RAID456
1248 1249 init_waitqueue_head(&sh->ops.wait_for_ops);
1249 1250 #endif
1250 1251  
1251   - if (grow_buffers(sh, conf->raid_disks)) {
1252   - shrink_buffers(sh, conf->raid_disks);
  1252 + if (grow_buffers(sh, disks)) {
  1253 + shrink_buffers(sh, disks);
1253 1254 kmem_cache_free(conf->slab_cache, sh);
1254 1255 return 0;
1255 1256 }
1256   - sh->disks = conf->raid_disks;
1257 1257 /* we just created an active stripe so... */
1258 1258 atomic_set(&sh->count, 1);
1259 1259 atomic_inc(&conf->active_stripes);
... ... @@ -1265,7 +1265,7 @@
1265 1265 static int grow_stripes(raid5_conf_t *conf, int num)
1266 1266 {
1267 1267 struct kmem_cache *sc;
1268   - int devs = conf->raid_disks;
  1268 + int devs = max(conf->raid_disks, conf->previous_raid_disks);
1269 1269  
1270 1270 sprintf(conf->cache_name[0],
1271 1271 "raid%d-%s", conf->level, mdname(conf->mddev));
1272 1272  
... ... @@ -3540,9 +3540,10 @@
3540 3540 {
3541 3541 raid5_conf_t *conf = mddev->private;
3542 3542 int i;
  3543 + int devs = max(conf->raid_disks, conf->previous_raid_disks);
3543 3544  
3544 3545 rcu_read_lock();
3545   - for (i = 0; i < conf->raid_disks; i++) {
  3546 + for (i = 0; i < devs; i++) {
3546 3547 mdk_rdev_t *rdev = rcu_dereference(conf->disks[i].rdev);
3547 3548 if (rdev && !test_bit(Faulty, &rdev->flags) && atomic_read(&rdev->nr_pending)) {
3548 3549 struct request_queue *r_queue = bdev_get_queue(rdev->bdev);
3549 3550  
... ... @@ -4562,13 +4563,9 @@
4562 4563  
4563 4564 if (!sectors)
4564 4565 sectors = mddev->dev_sectors;
4565   - if (!raid_disks) {
  4566 + if (!raid_disks)
4566 4567 /* size is defined by the smallest of previous and new size */
4567   - if (conf->raid_disks < conf->previous_raid_disks)
4568   - raid_disks = conf->raid_disks;
4569   - else
4570   - raid_disks = conf->previous_raid_disks;
4571   - }
  4568 + raid_disks = min(conf->raid_disks, conf->previous_raid_disks);
4572 4569  
4573 4570 sectors &= ~((sector_t)mddev->chunk_sectors - 1);
4574 4571 sectors &= ~((sector_t)mddev->new_chunk_sectors - 1);
... ... @@ -4669,7 +4666,7 @@
4669 4666 }
4670 4667 per_cpu_ptr(conf->percpu, cpu)->spare_page = spare_page;
4671 4668 }
4672   - scribble = kmalloc(scribble_len(conf->raid_disks), GFP_KERNEL);
  4669 + scribble = kmalloc(conf->scribble_len, GFP_KERNEL);
4673 4670 if (!scribble) {
4674 4671 err = -ENOMEM;
4675 4672 break;
... ... @@ -4690,7 +4687,7 @@
4690 4687 static raid5_conf_t *setup_conf(mddev_t *mddev)
4691 4688 {
4692 4689 raid5_conf_t *conf;
4693   - int raid_disk, memory;
  4690 + int raid_disk, memory, max_disks;
4694 4691 mdk_rdev_t *rdev;
4695 4692 struct disk_info *disk;
4696 4693  
4697 4694  
4698 4695  
... ... @@ -4740,13 +4737,14 @@
4740 4737 conf->bypass_threshold = BYPASS_THRESHOLD;
4741 4738  
4742 4739 conf->raid_disks = mddev->raid_disks;
4743   - conf->scribble_len = scribble_len(conf->raid_disks);
4744 4740 if (mddev->reshape_position == MaxSector)
4745 4741 conf->previous_raid_disks = mddev->raid_disks;
4746 4742 else
4747 4743 conf->previous_raid_disks = mddev->raid_disks - mddev->delta_disks;
  4744 + max_disks = max(conf->raid_disks, conf->previous_raid_disks);
  4745 + conf->scribble_len = scribble_len(max_disks);
4748 4746  
4749   - conf->disks = kzalloc(conf->raid_disks * sizeof(struct disk_info),
  4747 + conf->disks = kzalloc(max_disks * sizeof(struct disk_info),
4750 4748 GFP_KERNEL);
4751 4749 if (!conf->disks)
4752 4750 goto abort;
... ... @@ -4764,7 +4762,7 @@
4764 4762  
4765 4763 list_for_each_entry(rdev, &mddev->disks, same_set) {
4766 4764 raid_disk = rdev->raid_disk;
4767   - if (raid_disk >= conf->raid_disks
  4765 + if (raid_disk >= max_disks
4768 4766 || raid_disk < 0)
4769 4767 continue;
4770 4768 disk = conf->disks + raid_disk;
... ... @@ -4796,7 +4794,7 @@
4796 4794 }
4797 4795  
4798 4796 memory = conf->max_nr_stripes * (sizeof(struct stripe_head) +
4799   - conf->raid_disks * ((sizeof(struct bio) + PAGE_SIZE))) / 1024;
  4797 + max_disks * ((sizeof(struct bio) + PAGE_SIZE))) / 1024;
4800 4798 if (grow_stripes(conf, conf->max_nr_stripes)) {
4801 4799 printk(KERN_ERR
4802 4800 "raid5: couldn't allocate %dkB for buffers\n", memory);
... ... @@ -4921,7 +4919,8 @@
4921 4919 test_bit(In_sync, &rdev->flags))
4922 4920 working_disks++;
4923 4921  
4924   - mddev->degraded = conf->raid_disks - working_disks;
  4922 + mddev->degraded = (max(conf->raid_disks, conf->previous_raid_disks)
  4923 + - working_disks);
4925 4924  
4926 4925 if (mddev->degraded > conf->max_degraded) {
4927 4926 printk(KERN_ERR "raid5: not enough operational devices for %s"