Commit 69335ef3bc5b766f34db2d688be1d35313138bca

Authored by NeilBrown
1 parent 3a6de2924a

md/raid10: prepare data structures for handling replacement.

Allow each slot in the RAID10 to have 2 devices, the want_replacement
and the replacement.

Also an r10bio to have 2 bios, and for resync/recovery allocate the
second bio if there are any replacement devices.

Signed-off-by: NeilBrown <neilb@suse.de>

Showing 2 changed files with 78 additions and 31 deletions Side-by-side Diff

... ... @@ -73,7 +73,8 @@
73 73 struct r10conf *conf = data;
74 74 int size = offsetof(struct r10bio, devs[conf->copies]);
75 75  
76   - /* allocate a r10bio with room for raid_disks entries in the bios array */
  76 + /* allocate a r10bio with room for raid_disks entries in the
  77 + * bios array */
77 78 return kzalloc(size, gfp_flags);
78 79 }
79 80  
80 81  
... ... @@ -123,12 +124,19 @@
123 124 if (!bio)
124 125 goto out_free_bio;
125 126 r10_bio->devs[j].bio = bio;
  127 + if (!conf->have_replacement)
  128 + continue;
  129 + bio = bio_kmalloc(gfp_flags, RESYNC_PAGES);
  130 + if (!bio)
  131 + goto out_free_bio;
  132 + r10_bio->devs[j].repl_bio = bio;
126 133 }
127 134 /*
128 135 * Allocate RESYNC_PAGES data pages and attach them
129 136 * where needed.
130 137 */
131 138 for (j = 0 ; j < nalloc; j++) {
  139 + struct bio *rbio = r10_bio->devs[j].repl_bio;
132 140 bio = r10_bio->devs[j].bio;
133 141 for (i = 0; i < RESYNC_PAGES; i++) {
134 142 if (j == 1 && !test_bit(MD_RECOVERY_SYNC,
... ... @@ -143,6 +151,8 @@
143 151 goto out_free_pages;
144 152  
145 153 bio->bi_io_vec[i].bv_page = page;
  154 + if (rbio)
  155 + rbio->bi_io_vec[i].bv_page = page;
146 156 }
147 157 }
148 158  
149 159  
... ... @@ -156,8 +166,11 @@
156 166 safe_put_page(r10_bio->devs[j].bio->bi_io_vec[i].bv_page);
157 167 j = -1;
158 168 out_free_bio:
159   - while ( ++j < nalloc )
  169 + while (++j < nalloc) {
160 170 bio_put(r10_bio->devs[j].bio);
  171 + if (r10_bio->devs[j].repl_bio)
  172 + bio_put(r10_bio->devs[j].repl_bio);
  173 + }
161 174 r10bio_pool_free(r10_bio, conf);
162 175 return NULL;
163 176 }
... ... @@ -178,6 +191,9 @@
178 191 }
179 192 bio_put(bio);
180 193 }
  194 + bio = r10bio->devs[j].repl_bio;
  195 + if (bio)
  196 + bio_put(bio);
181 197 }
182 198 r10bio_pool_free(r10bio, conf);
183 199 }
... ... @@ -191,6 +207,10 @@
191 207 if (!BIO_SPECIAL(*bio))
192 208 bio_put(*bio);
193 209 *bio = NULL;
  210 + bio = &r10_bio->devs[i].repl_bio;
  211 + if (r10_bio->read_slot < 0 && !BIO_SPECIAL(*bio))
  212 + bio_put(*bio);
  213 + *bio = NULL;
194 214 }
195 215 }
196 216  
197 217  
198 218  
199 219  
200 220  
... ... @@ -275,19 +295,27 @@
275 295 * Find the disk number which triggered given bio
276 296 */
277 297 static int find_bio_disk(struct r10conf *conf, struct r10bio *r10_bio,
278   - struct bio *bio, int *slotp)
  298 + struct bio *bio, int *slotp, int *replp)
279 299 {
280 300 int slot;
  301 + int repl = 0;
281 302  
282   - for (slot = 0; slot < conf->copies; slot++)
  303 + for (slot = 0; slot < conf->copies; slot++) {
283 304 if (r10_bio->devs[slot].bio == bio)
284 305 break;
  306 + if (r10_bio->devs[slot].repl_bio == bio) {
  307 + repl = 1;
  308 + break;
  309 + }
  310 + }
285 311  
286 312 BUG_ON(slot == conf->copies);
287 313 update_head_pos(slot, r10_bio);
288 314  
289 315 if (slotp)
290 316 *slotp = slot;
  317 + if (replp)
  318 + *replp = repl;
291 319 return r10_bio->devs[slot].devnum;
292 320 }
293 321  
... ... @@ -368,7 +396,7 @@
368 396 struct r10conf *conf = r10_bio->mddev->private;
369 397 int slot;
370 398  
371   - dev = find_bio_disk(conf, r10_bio, bio, &slot);
  399 + dev = find_bio_disk(conf, r10_bio, bio, &slot, NULL);
372 400  
373 401 /*
374 402 * this branch is our 'one mirror IO has finished' event handler:
... ... @@ -1025,6 +1053,7 @@
1025 1053 */
1026 1054 plugged = mddev_check_plugged(mddev);
1027 1055  
  1056 + r10_bio->read_slot = -1; /* make sure repl_bio gets freed */
1028 1057 raid10_find_phys(conf, r10_bio);
1029 1058 retry_write:
1030 1059 blocked_rdev = NULL;
... ... @@ -1431,7 +1460,7 @@
1431 1460 struct r10conf *conf = r10_bio->mddev->private;
1432 1461 int d;
1433 1462  
1434   - d = find_bio_disk(conf, r10_bio, bio, NULL);
  1463 + d = find_bio_disk(conf, r10_bio, bio, NULL, NULL);
1435 1464  
1436 1465 if (test_bit(BIO_UPTODATE, &bio->bi_flags))
1437 1466 set_bit(R10BIO_Uptodate, &r10_bio->state);
... ... @@ -1493,7 +1522,7 @@
1493 1522 int bad_sectors;
1494 1523 int slot;
1495 1524  
1496   - d = find_bio_disk(conf, r10_bio, bio, &slot);
  1525 + d = find_bio_disk(conf, r10_bio, bio, &slot, NULL);
1497 1526  
1498 1527 if (!uptodate) {
1499 1528 set_bit(WriteErrorSeen, &conf->mirrors[d].rdev->flags);
1500 1529  
... ... @@ -2271,9 +2300,14 @@
2271 2300 static int init_resync(struct r10conf *conf)
2272 2301 {
2273 2302 int buffs;
  2303 + int i;
2274 2304  
2275 2305 buffs = RESYNC_WINDOW / RESYNC_BLOCK_SIZE;
2276 2306 BUG_ON(conf->r10buf_pool);
  2307 + conf->have_replacement = 0;
  2308 + for (i = 0; i < conf->raid_disks; i++)
  2309 + if (conf->mirrors[i].replacement)
  2310 + conf->have_replacement = 1;
2277 2311 conf->r10buf_pool = mempool_create(buffs, r10buf_pool_alloc, r10buf_pool_free, conf);
2278 2312 if (!conf->r10buf_pool)
2279 2313 return -ENOMEM;
... ... @@ -2,7 +2,7 @@
2 2 #define _RAID10_H
3 3  
4 4 struct mirror_info {
5   - struct md_rdev *rdev;
  5 + struct md_rdev *rdev, *replacement;
6 6 sector_t head_position;
7 7 int recovery_disabled; /* matches
8 8 * mddev->recovery_disabled
9 9  
... ... @@ -18,12 +18,13 @@
18 18 spinlock_t device_lock;
19 19  
20 20 /* geometry */
21   - int near_copies; /* number of copies laid out raid0 style */
  21 + int near_copies; /* number of copies laid out
  22 + * raid0 style */
22 23 int far_copies; /* number of copies laid out
23 24 * at large strides across drives
24 25 */
25   - int far_offset; /* far_copies are offset by 1 stripe
26   - * instead of many
  26 + int far_offset; /* far_copies are offset by 1
  27 + * stripe instead of many
27 28 */
28 29 int copies; /* near_copies * far_copies.
29 30 * must be <= raid_disks
30 31  
... ... @@ -34,10 +35,11 @@
34 35 * 1 stripe.
35 36 */
36 37  
37   - sector_t dev_sectors; /* temp copy of mddev->dev_sectors */
  38 + sector_t dev_sectors; /* temp copy of
  39 + * mddev->dev_sectors */
38 40  
39   - int chunk_shift; /* shift from chunks to sectors */
40   - sector_t chunk_mask;
  41 + int chunk_shift; /* shift from chunks to sectors */
  42 + sector_t chunk_mask;
41 43  
42 44 struct list_head retry_list;
43 45 /* queue pending writes and submit them on unplug */
44 46  
45 47  
... ... @@ -45,20 +47,22 @@
45 47 int pending_count;
46 48  
47 49 spinlock_t resync_lock;
48   - int nr_pending;
49   - int nr_waiting;
50   - int nr_queued;
51   - int barrier;
  50 + int nr_pending;
  51 + int nr_waiting;
  52 + int nr_queued;
  53 + int barrier;
52 54 sector_t next_resync;
53 55 int fullsync; /* set to 1 if a full sync is needed,
54 56 * (fresh device added).
55 57 * Cleared when a sync completes.
56 58 */
57   -
  59 + int have_replacement; /* There is at least one
  60 + * replacement device.
  61 + */
58 62 wait_queue_head_t wait_barrier;
59 63  
60   - mempool_t *r10bio_pool;
61   - mempool_t *r10buf_pool;
  64 + mempool_t *r10bio_pool;
  65 + mempool_t *r10buf_pool;
62 66 struct page *tmppage;
63 67  
64 68 /* When taking over an array from a different personality, we store
65 69  
... ... @@ -98,11 +102,18 @@
98 102 * When resyncing we also use one for each copy.
99 103 * When reconstructing, we use 2 bios, one for read, one for write.
100 104 * We choose the number when they are allocated.
  105 + * We sometimes need an extra bio to write to the replacement.
101 106 */
102 107 struct {
103   - struct bio *bio;
104   - sector_t addr;
105   - int devnum;
  108 + struct bio *bio;
  109 + union {
  110 + struct bio *repl_bio; /* used for resync and
  111 + * writes */
  112 + struct md_rdev *rdev; /* used for reads
  113 + * (read_slot >= 0) */
  114 + };
  115 + sector_t addr;
  116 + int devnum;
106 117 } devs[0];
107 118 };
108 119  
109 120  
110 121  
... ... @@ -121,18 +132,20 @@
121 132 #define BIO_SPECIAL(bio) ((unsigned long)bio <= 2)
122 133  
123 134 /* bits for r10bio.state */
124   -#define R10BIO_Uptodate 0
125   -#define R10BIO_IsSync 1
126   -#define R10BIO_IsRecover 2
127   -#define R10BIO_Degraded 3
  135 +enum r10bio_state {
  136 + R10BIO_Uptodate,
  137 + R10BIO_IsSync,
  138 + R10BIO_IsRecover,
  139 + R10BIO_Degraded,
128 140 /* Set ReadError on bios that experience a read error
129 141 * so that raid10d knows what to do with them.
130 142 */
131   -#define R10BIO_ReadError 4
  143 + R10BIO_ReadError,
132 144 /* If a write for this request means we can clear some
133 145 * known-bad-block records, we set this flag.
134 146 */
135   -#define R10BIO_MadeGood 5
136   -#define R10BIO_WriteError 6
  147 + R10BIO_MadeGood,
  148 + R10BIO_WriteError,
  149 +};
137 150 #endif