Blame view
drivers/md/dm-raid1.c
34.1 KB
1da177e4c Linux-2.6.12-rc2 |
1 2 |
/* * Copyright (C) 2003 Sistina Software Limited. |
1f965b194 dm raid1: separat... |
3 |
* Copyright (C) 2005-2008 Red Hat, Inc. All rights reserved. |
1da177e4c Linux-2.6.12-rc2 |
4 5 6 |
* * This file is released under the GPL. */ |
06386bbfd dm raid1: handle ... |
7 |
#include "dm-bio-record.h" |
1da177e4c Linux-2.6.12-rc2 |
8 |
|
1da177e4c Linux-2.6.12-rc2 |
9 10 11 12 13 |
#include <linux/init.h> #include <linux/mempool.h> #include <linux/module.h> #include <linux/pagemap.h> #include <linux/slab.h> |
1da177e4c Linux-2.6.12-rc2 |
14 |
#include <linux/workqueue.h> |
1f965b194 dm raid1: separat... |
15 |
#include <linux/device-mapper.h> |
a765e20ee dm: move include ... |
16 17 18 |
#include <linux/dm-io.h> #include <linux/dm-dirty-log.h> #include <linux/dm-kcopyd.h> |
1f965b194 dm raid1: separat... |
19 |
#include <linux/dm-region-hash.h> |
1da177e4c Linux-2.6.12-rc2 |
20 |
|
72d948616 [PATCH] dm: impro... |
21 |
#define DM_MSG_PREFIX "raid1" |
1f965b194 dm raid1: separat... |
22 23 |
#define MAX_RECOVERY 1 /* Maximum number of regions recovered in parallel. */ |
72d948616 [PATCH] dm: impro... |
24 |
|
a8e6afa23 dm raid1: add han... |
25 |
#define DM_RAID1_HANDLE_ERRORS 0x01 |
f44db678e dm raid1: handle ... |
26 |
#define errors_handled(p) ((p)->features & DM_RAID1_HANDLE_ERRORS) |
a8e6afa23 dm raid1: add han... |
27 |
|
33184048d [PATCH] dm: raid1... |
28 |
static DECLARE_WAIT_QUEUE_HEAD(_kmirrord_recovery_stopped); |
1da177e4c Linux-2.6.12-rc2 |
29 |
|
1da177e4c Linux-2.6.12-rc2 |
30 |
/*----------------------------------------------------------------- |
e4c8b3ba3 [PATCH] dm: mirro... |
31 32 |
* Mirror set structures. *---------------------------------------------------------------*/ |
72f4b3141 dm raid1: handle ... |
33 34 |
enum dm_raid1_error { DM_RAID1_WRITE_ERROR, |
64b30c46e dm raid1: report ... |
35 |
DM_RAID1_FLUSH_ERROR, |
72f4b3141 dm raid1: handle ... |
36 37 38 |
DM_RAID1_SYNC_ERROR, DM_RAID1_READ_ERROR }; |
e4c8b3ba3 [PATCH] dm: mirro... |
39 |
struct mirror { |
aa5617c55 dm raid1: add mir... |
40 |
struct mirror_set *ms; |
e4c8b3ba3 [PATCH] dm: mirro... |
41 |
atomic_t error_count; |
39ed7adb1 dm-raid1 breakage... |
42 |
unsigned long error_type; |
e4c8b3ba3 [PATCH] dm: mirro... |
43 44 45 46 47 48 49 |
struct dm_dev *dev; sector_t offset; }; struct mirror_set { struct dm_target *ti; struct list_head list; |
1f965b194 dm raid1: separat... |
50 |
|
a8e6afa23 dm raid1: add han... |
51 |
uint64_t features; |
e4c8b3ba3 [PATCH] dm: mirro... |
52 |
|
72f4b3141 dm raid1: handle ... |
53 |
spinlock_t lock; /* protects the lists */ |
e4c8b3ba3 [PATCH] dm: mirro... |
54 55 |
struct bio_list reads; struct bio_list writes; |
72f4b3141 dm raid1: handle ... |
56 |
struct bio_list failures; |
047885076 dm raid1: add fra... |
57 |
struct bio_list holds; /* bios are waiting until suspend */ |
e4c8b3ba3 [PATCH] dm: mirro... |
58 |
|
1f965b194 dm raid1: separat... |
59 60 |
struct dm_region_hash *rh; struct dm_kcopyd_client *kcopyd_client; |
88be163ab dm raid1: update ... |
61 |
struct dm_io_client *io_client; |
06386bbfd dm raid1: handle ... |
62 |
mempool_t *read_record_pool; |
88be163ab dm raid1: update ... |
63 |
|
e4c8b3ba3 [PATCH] dm: mirro... |
64 65 66 |
/* recovery */ region_t nr_regions; int in_sync; |
fc1ff9588 dm raid1: handle ... |
67 |
int log_failure; |
929be8fcb dm raid1: hold al... |
68 |
int leg_failure; |
b80aa7a0c dm raid1: fix EIO... |
69 |
atomic_t suspend; |
e4c8b3ba3 [PATCH] dm: mirro... |
70 |
|
72f4b3141 dm raid1: handle ... |
71 |
atomic_t default_mirror; /* Default mirror */ |
e4c8b3ba3 [PATCH] dm: mirro... |
72 |
|
6ad36fe2b dm raid1: one kmi... |
73 74 |
struct workqueue_struct *kmirrord_wq; struct work_struct kmirrord_work; |
a2aebe03b dm raid1: use timer |
75 76 |
struct timer_list timer; unsigned long timer_pending; |
72f4b3141 dm raid1: handle ... |
77 |
struct work_struct trigger_event; |
6ad36fe2b dm raid1: one kmi... |
78 |
|
1f965b194 dm raid1: separat... |
79 |
unsigned nr_mirrors; |
e4c8b3ba3 [PATCH] dm: mirro... |
80 81 |
struct mirror mirror[0]; }; |
1f965b194 dm raid1: separat... |
82 |
static void wakeup_mirrord(void *context) |
1da177e4c Linux-2.6.12-rc2 |
83 |
{ |
1f965b194 dm raid1: separat... |
84 |
struct mirror_set *ms = context; |
1da177e4c Linux-2.6.12-rc2 |
85 |
|
6ad36fe2b dm raid1: one kmi... |
86 87 |
queue_work(ms->kmirrord_wq, &ms->kmirrord_work); } |
a2aebe03b dm raid1: use timer |
88 89 90 91 92 |
static void delayed_wake_fn(unsigned long data) { struct mirror_set *ms = (struct mirror_set *) data; clear_bit(0, &ms->timer_pending); |
1f965b194 dm raid1: separat... |
93 |
wakeup_mirrord(ms); |
a2aebe03b dm raid1: use timer |
94 95 96 97 98 99 100 101 102 103 104 105 |
} static void delayed_wake(struct mirror_set *ms) { if (test_and_set_bit(0, &ms->timer_pending)) return; ms->timer.expires = jiffies + HZ / 5; ms->timer.data = (unsigned long) ms; ms->timer.function = delayed_wake_fn; add_timer(&ms->timer); } |
1f965b194 dm raid1: separat... |
106 |
static void wakeup_all_recovery_waiters(void *context) |
1da177e4c Linux-2.6.12-rc2 |
107 |
{ |
1f965b194 dm raid1: separat... |
108 |
wake_up_all(&_kmirrord_recovery_stopped); |
1da177e4c Linux-2.6.12-rc2 |
109 |
} |
1f965b194 dm raid1: separat... |
110 |
static void queue_bio(struct mirror_set *ms, struct bio *bio, int rw) |
1da177e4c Linux-2.6.12-rc2 |
111 112 |
{ unsigned long flags; |
1da177e4c Linux-2.6.12-rc2 |
113 |
int should_wake = 0; |
1f965b194 dm raid1: separat... |
114 |
struct bio_list *bl; |
1da177e4c Linux-2.6.12-rc2 |
115 |
|
1f965b194 dm raid1: separat... |
116 117 118 119 120 |
bl = (rw == WRITE) ? &ms->writes : &ms->reads; spin_lock_irqsave(&ms->lock, flags); should_wake = !(bl->head); bio_list_add(bl, bio); spin_unlock_irqrestore(&ms->lock, flags); |
1da177e4c Linux-2.6.12-rc2 |
121 122 |
if (should_wake) |
1f965b194 dm raid1: separat... |
123 |
wakeup_mirrord(ms); |
1da177e4c Linux-2.6.12-rc2 |
124 |
} |
1f965b194 dm raid1: separat... |
125 |
static void dispatch_bios(void *context, struct bio_list *bio_list) |
1da177e4c Linux-2.6.12-rc2 |
126 |
{ |
1f965b194 dm raid1: separat... |
127 128 |
struct mirror_set *ms = context; struct bio *bio; |
1da177e4c Linux-2.6.12-rc2 |
129 |
|
1f965b194 dm raid1: separat... |
130 131 |
while ((bio = bio_list_pop(bio_list))) queue_bio(ms, bio, WRITE); |
1da177e4c Linux-2.6.12-rc2 |
132 |
} |
06386bbfd dm raid1: handle ... |
133 134 135 136 137 |
#define MIN_READ_RECORDS 20 struct dm_raid1_read_record { struct mirror *m; struct dm_bio_details details; }; |
95f8fac8d dm raid1: switch ... |
138 |
static struct kmem_cache *_dm_raid1_read_record_cache; |
1da177e4c Linux-2.6.12-rc2 |
139 140 141 142 143 144 |
/* * Every mirror should look like this one. */ #define DEFAULT_MIRROR 0 /* |
06386bbfd dm raid1: handle ... |
145 146 |
* This is yucky. We squirrel the mirror struct away inside * bi_next for read/write buffers. This is safe since the bh |
1da177e4c Linux-2.6.12-rc2 |
147 148 |
* doesn't get submitted to the lower levels of block layer. */ |
06386bbfd dm raid1: handle ... |
149 |
static struct mirror *bio_get_m(struct bio *bio) |
1da177e4c Linux-2.6.12-rc2 |
150 |
{ |
06386bbfd dm raid1: handle ... |
151 |
return (struct mirror *) bio->bi_next; |
1da177e4c Linux-2.6.12-rc2 |
152 |
} |
06386bbfd dm raid1: handle ... |
153 |
static void bio_set_m(struct bio *bio, struct mirror *m) |
1da177e4c Linux-2.6.12-rc2 |
154 |
{ |
06386bbfd dm raid1: handle ... |
155 |
bio->bi_next = (struct bio *) m; |
1da177e4c Linux-2.6.12-rc2 |
156 |
} |
72f4b3141 dm raid1: handle ... |
157 158 159 160 161 162 163 164 165 166 167 168 |
static struct mirror *get_default_mirror(struct mirror_set *ms) { return &ms->mirror[atomic_read(&ms->default_mirror)]; } static void set_default_mirror(struct mirror *m) { struct mirror_set *ms = m->ms; struct mirror *m0 = &(ms->mirror[0]); atomic_set(&ms->default_mirror, m - m0); } |
87968ddd2 dm raid1: abstrac... |
169 170 171 172 173 174 175 176 177 178 |
static struct mirror *get_valid_mirror(struct mirror_set *ms) { struct mirror *m; for (m = ms->mirror; m < ms->mirror + ms->nr_mirrors; m++) if (!atomic_read(&m->error_count)) return m; return NULL; } |
72f4b3141 dm raid1: handle ... |
179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 |
/* fail_mirror * @m: mirror device to fail * @error_type: one of the enum's, DM_RAID1_*_ERROR * * If errors are being handled, record the type of * error encountered for this device. If this type * of error has already been recorded, we can return; * otherwise, we must signal userspace by triggering * an event. Additionally, if the device is the * primary device, we must choose a new primary, but * only if the mirror is in-sync. * * This function must not block. */ static void fail_mirror(struct mirror *m, enum dm_raid1_error error_type) { struct mirror_set *ms = m->ms; struct mirror *new; |
929be8fcb dm raid1: hold al... |
197 |
ms->leg_failure = 1; |
72f4b3141 dm raid1: handle ... |
198 199 200 201 202 203 204 205 206 |
/* * error_count is used for nothing more than a * simple way to tell if a device has encountered * errors. */ atomic_inc(&m->error_count); if (test_and_set_bit(error_type, &m->error_type)) return; |
d460c65a6 dm raid1: fix err... |
207 208 |
if (!errors_handled(ms)) return; |
72f4b3141 dm raid1: handle ... |
209 210 211 212 213 214 215 216 217 218 219 220 |
if (m != get_default_mirror(ms)) goto out; if (!ms->in_sync) { /* * Better to issue requests to same failing device * than to risk returning corrupt data. */ DMERR("Primary mirror (%s) failed while out-of-sync: " "Reads may fail.", m->dev->name); goto out; } |
87968ddd2 dm raid1: abstrac... |
221 222 223 224 |
new = get_valid_mirror(ms); if (new) set_default_mirror(new); else |
72f4b3141 dm raid1: handle ... |
225 226 227 228 229 |
DMWARN("All sides of mirror have failed."); out: schedule_work(&ms->trigger_event); } |
c0da3748b dm raid1: impleme... |
230 231 232 233 234 235 236 237 238 |
static int mirror_flush(struct dm_target *ti) { struct mirror_set *ms = ti->private; unsigned long error_bits; unsigned int i; struct dm_io_region io[ms->nr_mirrors]; struct mirror *m; struct dm_io_request io_req = { |
d87f4c14f dm: implement REQ... |
239 |
.bi_rw = WRITE_FLUSH, |
c0da3748b dm raid1: impleme... |
240 |
.mem.type = DM_IO_KMEM, |
5fc2ffeab dm raid1: support... |
241 |
.mem.ptr.addr = NULL, |
c0da3748b dm raid1: impleme... |
242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 |
.client = ms->io_client, }; for (i = 0, m = ms->mirror; i < ms->nr_mirrors; i++, m++) { io[i].bdev = m->dev->bdev; io[i].sector = 0; io[i].count = 0; } error_bits = -1; dm_io(&io_req, ms->nr_mirrors, io, &error_bits); if (unlikely(error_bits != 0)) { for (i = 0; i < ms->nr_mirrors; i++) if (test_bit(i, &error_bits)) fail_mirror(ms->mirror + i, |
64b30c46e dm raid1: report ... |
257 |
DM_RAID1_FLUSH_ERROR); |
c0da3748b dm raid1: impleme... |
258 259 260 261 262 |
return -EIO; } return 0; } |
1da177e4c Linux-2.6.12-rc2 |
263 264 265 266 267 268 269 |
/*----------------------------------------------------------------- * Recovery. * * When a mirror is first activated we may find that some regions * are in the no-sync state. We have to recover these by * recopying from the default mirror to all the others. *---------------------------------------------------------------*/ |
4cdc1d1fa dm io: write erro... |
270 |
static void recovery_complete(int read_err, unsigned long write_err, |
1da177e4c Linux-2.6.12-rc2 |
271 272 |
void *context) { |
1f965b194 dm raid1: separat... |
273 274 |
struct dm_region *reg = context; struct mirror_set *ms = dm_rh_region_context(reg); |
8f0205b79 dm raid1: handle ... |
275 |
int m, bit = 0; |
1da177e4c Linux-2.6.12-rc2 |
276 |
|
8f0205b79 dm raid1: handle ... |
277 |
if (read_err) { |
f44db678e dm raid1: handle ... |
278 279 |
/* Read error means the failure of default mirror. */ DMERR_LIMIT("Unable to read primary mirror during recovery"); |
8f0205b79 dm raid1: handle ... |
280 281 |
fail_mirror(get_default_mirror(ms), DM_RAID1_SYNC_ERROR); } |
f44db678e dm raid1: handle ... |
282 |
|
8f0205b79 dm raid1: handle ... |
283 |
if (write_err) { |
4cdc1d1fa dm io: write erro... |
284 |
DMERR_LIMIT("Write error during recovery (error = 0x%lx)", |
f44db678e dm raid1: handle ... |
285 |
write_err); |
8f0205b79 dm raid1: handle ... |
286 287 288 289 290 291 292 293 294 295 296 297 298 |
/* * Bits correspond to devices (excluding default mirror). * The default mirror cannot change during recovery. */ for (m = 0; m < ms->nr_mirrors; m++) { if (&ms->mirror[m] == get_default_mirror(ms)) continue; if (test_bit(bit, &write_err)) fail_mirror(ms->mirror + m, DM_RAID1_SYNC_ERROR); bit++; } } |
f44db678e dm raid1: handle ... |
299 |
|
1f965b194 dm raid1: separat... |
300 |
dm_rh_recovery_end(reg, !(read_err || write_err)); |
1da177e4c Linux-2.6.12-rc2 |
301 |
} |
1f965b194 dm raid1: separat... |
302 |
static int recover(struct mirror_set *ms, struct dm_region *reg) |
1da177e4c Linux-2.6.12-rc2 |
303 304 |
{ int r; |
1f965b194 dm raid1: separat... |
305 |
unsigned i; |
eb69aca5d dm kcopyd: clean ... |
306 |
struct dm_io_region from, to[DM_KCOPYD_MAX_REGIONS], *dest; |
1da177e4c Linux-2.6.12-rc2 |
307 308 |
struct mirror *m; unsigned long flags = 0; |
1f965b194 dm raid1: separat... |
309 310 |
region_t key = dm_rh_get_region_key(reg); sector_t region_size = dm_rh_get_region_size(ms->rh); |
1da177e4c Linux-2.6.12-rc2 |
311 312 |
/* fill in the source */ |
72f4b3141 dm raid1: handle ... |
313 |
m = get_default_mirror(ms); |
1da177e4c Linux-2.6.12-rc2 |
314 |
from.bdev = m->dev->bdev; |
1f965b194 dm raid1: separat... |
315 316 |
from.sector = m->offset + dm_rh_region_to_sector(ms->rh, key); if (key == (ms->nr_regions - 1)) { |
1da177e4c Linux-2.6.12-rc2 |
317 318 319 320 |
/* * The final region may be smaller than * region_size. */ |
1f965b194 dm raid1: separat... |
321 |
from.count = ms->ti->len & (region_size - 1); |
1da177e4c Linux-2.6.12-rc2 |
322 |
if (!from.count) |
1f965b194 dm raid1: separat... |
323 |
from.count = region_size; |
1da177e4c Linux-2.6.12-rc2 |
324 |
} else |
1f965b194 dm raid1: separat... |
325 |
from.count = region_size; |
1da177e4c Linux-2.6.12-rc2 |
326 327 328 |
/* fill in the destinations */ for (i = 0, dest = to; i < ms->nr_mirrors; i++) { |
72f4b3141 dm raid1: handle ... |
329 |
if (&ms->mirror[i] == get_default_mirror(ms)) |
1da177e4c Linux-2.6.12-rc2 |
330 331 332 333 |
continue; m = ms->mirror + i; dest->bdev = m->dev->bdev; |
1f965b194 dm raid1: separat... |
334 |
dest->sector = m->offset + dm_rh_region_to_sector(ms->rh, key); |
1da177e4c Linux-2.6.12-rc2 |
335 336 337 338 339 |
dest->count = from.count; dest++; } /* hand to kcopyd */ |
f7c83e2e4 dm raid1: kcopyd ... |
340 341 |
if (!errors_handled(ms)) set_bit(DM_KCOPYD_IGNORE_ERROR, &flags); |
eb69aca5d dm kcopyd: clean ... |
342 343 |
r = dm_kcopyd_copy(ms->kcopyd_client, &from, ms->nr_mirrors - 1, to, flags, recovery_complete, reg); |
1da177e4c Linux-2.6.12-rc2 |
344 345 346 347 348 349 |
return r; } static void do_recovery(struct mirror_set *ms) { |
1f965b194 dm raid1: separat... |
350 351 |
struct dm_region *reg; struct dm_dirty_log *log = dm_rh_dirty_log(ms->rh); |
1da177e4c Linux-2.6.12-rc2 |
352 |
int r; |
1da177e4c Linux-2.6.12-rc2 |
353 354 355 356 |
/* * Start quiescing some regions. */ |
1f965b194 dm raid1: separat... |
357 |
dm_rh_recovery_prepare(ms->rh); |
1da177e4c Linux-2.6.12-rc2 |
358 359 360 361 |
/* * Copy any already quiesced regions. */ |
1f965b194 dm raid1: separat... |
362 |
while ((reg = dm_rh_recovery_start(ms->rh))) { |
1da177e4c Linux-2.6.12-rc2 |
363 364 |
r = recover(ms, reg); if (r) |
1f965b194 dm raid1: separat... |
365 |
dm_rh_recovery_end(reg, 0); |
1da177e4c Linux-2.6.12-rc2 |
366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 |
} /* * Update the in sync flag. */ if (!ms->in_sync && (log->type->get_sync_count(log) == ms->nr_regions)) { /* the sync is complete */ dm_table_event(ms->ti->table); ms->in_sync = 1; } } /*----------------------------------------------------------------- * Reads *---------------------------------------------------------------*/ static struct mirror *choose_mirror(struct mirror_set *ms, sector_t sector) { |
06386bbfd dm raid1: handle ... |
384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 |
struct mirror *m = get_default_mirror(ms); do { if (likely(!atomic_read(&m->error_count))) return m; if (m-- == ms->mirror) m += ms->nr_mirrors; } while (m != get_default_mirror(ms)); return NULL; } static int default_ok(struct mirror *m) { struct mirror *default_mirror = get_default_mirror(m->ms); return !atomic_read(&default_mirror->error_count); } static int mirror_available(struct mirror_set *ms, struct bio *bio) { |
1f965b194 dm raid1: separat... |
406 407 |
struct dm_dirty_log *log = dm_rh_dirty_log(ms->rh); region_t region = dm_rh_bio_to_region(ms->rh, bio); |
06386bbfd dm raid1: handle ... |
408 |
|
1f965b194 dm raid1: separat... |
409 |
if (log->type->in_sync(log, region, 0)) |
06386bbfd dm raid1: handle ... |
410 411 412 |
return choose_mirror(ms, bio->bi_sector) ? 1 : 0; return 0; |
1da177e4c Linux-2.6.12-rc2 |
413 414 415 416 417 |
} /* * remap a buffer to a particular mirror. */ |
06386bbfd dm raid1: handle ... |
418 419 |
static sector_t map_sector(struct mirror *m, struct bio *bio) { |
4184153f9 dm raid1: support... |
420 421 |
if (unlikely(!bio->bi_size)) return 0; |
b441a262e dm: use dm_target... |
422 |
return m->offset + dm_target_offset(m->ms->ti, bio->bi_sector); |
06386bbfd dm raid1: handle ... |
423 424 425 |
} static void map_bio(struct mirror *m, struct bio *bio) |
1da177e4c Linux-2.6.12-rc2 |
426 427 |
{ bio->bi_bdev = m->dev->bdev; |
06386bbfd dm raid1: handle ... |
428 429 |
bio->bi_sector = map_sector(m, bio); } |
22a1ceb1e dm io: clean inte... |
430 |
static void map_region(struct dm_io_region *io, struct mirror *m, |
06386bbfd dm raid1: handle ... |
431 432 433 434 435 436 |
struct bio *bio) { io->bdev = m->dev->bdev; io->sector = map_sector(m, bio); io->count = bio->bi_size >> 9; } |
047885076 dm raid1: add fra... |
437 438 439 |
static void hold_bio(struct mirror_set *ms, struct bio *bio) { /* |
f07030409 dm raid1: fix dea... |
440 441 |
* Lock is required to avoid race condition during suspend * process. |
047885076 dm raid1: add fra... |
442 |
*/ |
f07030409 dm raid1: fix dea... |
443 |
spin_lock_irq(&ms->lock); |
047885076 dm raid1: add fra... |
444 |
if (atomic_read(&ms->suspend)) { |
f07030409 dm raid1: fix dea... |
445 446 447 448 449 |
spin_unlock_irq(&ms->lock); /* * If device is suspended, complete the bio. */ |
047885076 dm raid1: add fra... |
450 451 452 453 454 455 456 457 458 459 |
if (dm_noflush_suspending(ms->ti)) bio_endio(bio, DM_ENDIO_REQUEUE); else bio_endio(bio, -EIO); return; } /* * Hold bio until the suspend is complete. */ |
047885076 dm raid1: add fra... |
460 461 462 |
bio_list_add(&ms->holds, bio); spin_unlock_irq(&ms->lock); } |
06386bbfd dm raid1: handle ... |
463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 |
/*----------------------------------------------------------------- * Reads *---------------------------------------------------------------*/ static void read_callback(unsigned long error, void *context) { struct bio *bio = context; struct mirror *m; m = bio_get_m(bio); bio_set_m(bio, NULL); if (likely(!error)) { bio_endio(bio, 0); return; } fail_mirror(m, DM_RAID1_READ_ERROR); if (likely(default_ok(m)) || mirror_available(m->ms, bio)) { DMWARN_LIMIT("Read failure on mirror device %s. " "Trying alternative device.", m->dev->name); queue_bio(m->ms, bio, bio_rw(bio)); return; } DMERR_LIMIT("Read failure on mirror device %s. Failing I/O.", m->dev->name); bio_endio(bio, -EIO); } /* Asynchronous read. */ static void read_async_bio(struct mirror *m, struct bio *bio) { |
22a1ceb1e dm io: clean inte... |
497 |
struct dm_io_region io; |
06386bbfd dm raid1: handle ... |
498 499 500 501 502 503 504 505 506 507 508 |
struct dm_io_request io_req = { .bi_rw = READ, .mem.type = DM_IO_BVEC, .mem.ptr.bvec = bio->bi_io_vec + bio->bi_idx, .notify.fn = read_callback, .notify.context = bio, .client = m->ms->io_client, }; map_region(&io, m, bio); bio_set_m(bio, m); |
1f965b194 dm raid1: separat... |
509 510 511 512 513 514 515 516 |
BUG_ON(dm_io(&io_req, 1, &io, NULL)); } static inline int region_in_sync(struct mirror_set *ms, region_t region, int may_block) { int state = dm_rh_get_state(ms->rh, region, may_block); return state == DM_RH_CLEAN || state == DM_RH_DIRTY; |
1da177e4c Linux-2.6.12-rc2 |
517 518 519 520 521 522 523 524 525 |
} static void do_reads(struct mirror_set *ms, struct bio_list *reads) { region_t region; struct bio *bio; struct mirror *m; while ((bio = bio_list_pop(reads))) { |
1f965b194 dm raid1: separat... |
526 |
region = dm_rh_bio_to_region(ms->rh, bio); |
06386bbfd dm raid1: handle ... |
527 |
m = get_default_mirror(ms); |
1da177e4c Linux-2.6.12-rc2 |
528 529 530 531 |
/* * We can only read balance if the region is in sync. */ |
1f965b194 dm raid1: separat... |
532 |
if (likely(region_in_sync(ms, region, 1))) |
1da177e4c Linux-2.6.12-rc2 |
533 |
m = choose_mirror(ms, bio->bi_sector); |
06386bbfd dm raid1: handle ... |
534 535 |
else if (m && atomic_read(&m->error_count)) m = NULL; |
1da177e4c Linux-2.6.12-rc2 |
536 |
|
06386bbfd dm raid1: handle ... |
537 538 539 540 |
if (likely(m)) read_async_bio(m, bio); else bio_endio(bio, -EIO); |
1da177e4c Linux-2.6.12-rc2 |
541 542 543 544 545 546 547 548 549 550 551 552 553 |
} } /*----------------------------------------------------------------- * Writes. * * We do different things with the write io depending on the * state of the region that it's in: * * SYNC: increment pending, use kcopyd to write to *all* mirrors * RECOVERING: delay the io until recovery completes * NOSYNC: increment pending, just write to the default mirror *---------------------------------------------------------------*/ |
72f4b3141 dm raid1: handle ... |
554 |
|
72f4b3141 dm raid1: handle ... |
555 |
|
1da177e4c Linux-2.6.12-rc2 |
556 557 |
static void write_callback(unsigned long error, void *context) { |
72f4b3141 dm raid1: handle ... |
558 |
unsigned i, ret = 0; |
1da177e4c Linux-2.6.12-rc2 |
559 560 |
struct bio *bio = (struct bio *) context; struct mirror_set *ms; |
72f4b3141 dm raid1: handle ... |
561 562 |
int should_wake = 0; unsigned long flags; |
1da177e4c Linux-2.6.12-rc2 |
563 |
|
06386bbfd dm raid1: handle ... |
564 565 |
ms = bio_get_m(bio)->ms; bio_set_m(bio, NULL); |
1da177e4c Linux-2.6.12-rc2 |
566 567 568 569 570 571 572 |
/* * NOTE: We don't decrement the pending count here, * instead it is done by the targets endio function. * This way we handle both writes to SYNC and NOSYNC * regions with the same code. */ |
60f355ead dm raid1: hold wr... |
573 574 575 576 |
if (likely(!error)) { bio_endio(bio, ret); return; } |
1da177e4c Linux-2.6.12-rc2 |
577 |
|
72f4b3141 dm raid1: handle ... |
578 579 580 |
for (i = 0; i < ms->nr_mirrors; i++) if (test_bit(i, &error)) fail_mirror(ms->mirror + i, DM_RAID1_WRITE_ERROR); |
72f4b3141 dm raid1: handle ... |
581 |
|
60f355ead dm raid1: hold wr... |
582 583 584 585 586 587 588 589 590 591 592 593 |
/* * Need to raise event. Since raising * events can block, we need to do it in * the main thread. */ spin_lock_irqsave(&ms->lock, flags); if (!ms->failures.head) should_wake = 1; bio_list_add(&ms->failures, bio); spin_unlock_irqrestore(&ms->lock, flags); if (should_wake) wakeup_mirrord(ms); |
1da177e4c Linux-2.6.12-rc2 |
594 595 596 597 598 |
} static void do_write(struct mirror_set *ms, struct bio *bio) { unsigned int i; |
22a1ceb1e dm io: clean inte... |
599 |
struct dm_io_region io[ms->nr_mirrors], *dest = io; |
1da177e4c Linux-2.6.12-rc2 |
600 |
struct mirror *m; |
88be163ab dm raid1: update ... |
601 |
struct dm_io_request io_req = { |
d87f4c14f dm: implement REQ... |
602 |
.bi_rw = WRITE | (bio->bi_rw & WRITE_FLUSH_FUA), |
88be163ab dm raid1: update ... |
603 604 605 606 607 608 |
.mem.type = DM_IO_BVEC, .mem.ptr.bvec = bio->bi_io_vec + bio->bi_idx, .notify.fn = write_callback, .notify.context = bio, .client = ms->io_client, }; |
1da177e4c Linux-2.6.12-rc2 |
609 |
|
5fc2ffeab dm raid1: support... |
610 611 612 613 614 |
if (bio->bi_rw & REQ_DISCARD) { io_req.bi_rw |= REQ_DISCARD; io_req.mem.type = DM_IO_KMEM; io_req.mem.ptr.addr = NULL; } |
06386bbfd dm raid1: handle ... |
615 616 |
for (i = 0, m = ms->mirror; i < ms->nr_mirrors; i++, m++) map_region(dest++, m, bio); |
1da177e4c Linux-2.6.12-rc2 |
617 |
|
06386bbfd dm raid1: handle ... |
618 619 620 621 622 |
/* * Use default mirror because we only need it to retrieve the reference * to the mirror set in write_callback(). */ bio_set_m(bio, get_default_mirror(ms)); |
88be163ab dm raid1: update ... |
623 |
|
1f965b194 dm raid1: separat... |
624 |
BUG_ON(dm_io(&io_req, ms->nr_mirrors, io, NULL)); |
1da177e4c Linux-2.6.12-rc2 |
625 626 627 628 629 630 631 |
} static void do_writes(struct mirror_set *ms, struct bio_list *writes) { int state; struct bio *bio; struct bio_list sync, nosync, recover, *this_list = NULL; |
7513c2a76 dm raid1: add is_... |
632 633 634 |
struct bio_list requeue; struct dm_dirty_log *log = dm_rh_dirty_log(ms->rh); region_t region; |
1da177e4c Linux-2.6.12-rc2 |
635 636 637 638 639 640 641 642 643 644 |
if (!writes->head) return; /* * Classify each write. */ bio_list_init(&sync); bio_list_init(&nosync); bio_list_init(&recover); |
7513c2a76 dm raid1: add is_... |
645 |
bio_list_init(&requeue); |
1da177e4c Linux-2.6.12-rc2 |
646 647 |
while ((bio = bio_list_pop(writes))) { |
5fc2ffeab dm raid1: support... |
648 649 |
if ((bio->bi_rw & REQ_FLUSH) || (bio->bi_rw & REQ_DISCARD)) { |
4184153f9 dm raid1: support... |
650 651 652 |
bio_list_add(&sync, bio); continue; } |
7513c2a76 dm raid1: add is_... |
653 654 655 656 657 658 659 660 661 |
region = dm_rh_bio_to_region(ms->rh, bio); if (log->type->is_remote_recovering && log->type->is_remote_recovering(log, region)) { bio_list_add(&requeue, bio); continue; } state = dm_rh_get_state(ms->rh, region, 1); |
1da177e4c Linux-2.6.12-rc2 |
662 |
switch (state) { |
1f965b194 dm raid1: separat... |
663 664 |
case DM_RH_CLEAN: case DM_RH_DIRTY: |
1da177e4c Linux-2.6.12-rc2 |
665 666 |
this_list = &sync; break; |
1f965b194 dm raid1: separat... |
667 |
case DM_RH_NOSYNC: |
1da177e4c Linux-2.6.12-rc2 |
668 669 |
this_list = &nosync; break; |
1f965b194 dm raid1: separat... |
670 |
case DM_RH_RECOVERING: |
1da177e4c Linux-2.6.12-rc2 |
671 672 673 674 675 676 677 678 |
this_list = &recover; break; } bio_list_add(this_list, bio); } /* |
7513c2a76 dm raid1: add is_... |
679 680 681 682 683 684 685 |
* Add bios that are delayed due to remote recovery * back on to the write queue */ if (unlikely(requeue.head)) { spin_lock_irq(&ms->lock); bio_list_merge(&ms->writes, &requeue); spin_unlock_irq(&ms->lock); |
69885683d dm raid1: wake km... |
686 |
delayed_wake(ms); |
7513c2a76 dm raid1: add is_... |
687 688 689 |
} /* |
1da177e4c Linux-2.6.12-rc2 |
690 691 692 693 |
* Increment the pending counts for any regions that will * be written to (writes to recover regions are going to * be delayed). */ |
1f965b194 dm raid1: separat... |
694 695 |
dm_rh_inc_pending(ms->rh, &sync); dm_rh_inc_pending(ms->rh, &nosync); |
d2b698644 dm raid1: do not ... |
696 697 698 699 700 701 702 |
/* * If the flush fails on a previous call and succeeds here, * we must not reset the log_failure variable. We need * userspace interaction to do that. */ ms->log_failure = dm_rh_flush(ms->rh) ? 1 : ms->log_failure; |
1da177e4c Linux-2.6.12-rc2 |
703 704 705 706 |
/* * Dispatch io. */ |
5528d17de dm raid1: fail wr... |
707 |
if (unlikely(ms->log_failure) && errors_handled(ms)) { |
b80aa7a0c dm raid1: fix EIO... |
708 709 710 |
spin_lock_irq(&ms->lock); bio_list_merge(&ms->failures, &sync); spin_unlock_irq(&ms->lock); |
1f965b194 dm raid1: separat... |
711 |
wakeup_mirrord(ms); |
b80aa7a0c dm raid1: fix EIO... |
712 |
} else |
fc1ff9588 dm raid1: handle ... |
713 |
while ((bio = bio_list_pop(&sync))) |
b80aa7a0c dm raid1: fix EIO... |
714 |
do_write(ms, bio); |
1da177e4c Linux-2.6.12-rc2 |
715 716 |
while ((bio = bio_list_pop(&recover))) |
1f965b194 dm raid1: separat... |
717 |
dm_rh_delay(ms->rh, bio); |
1da177e4c Linux-2.6.12-rc2 |
718 719 |
while ((bio = bio_list_pop(&nosync))) { |
ede5ea0b8 dm raid1: always ... |
720 721 722 723 724 725 |
if (unlikely(ms->leg_failure) && errors_handled(ms)) { spin_lock_irq(&ms->lock); bio_list_add(&ms->failures, bio); spin_unlock_irq(&ms->lock); wakeup_mirrord(ms); } else { |
929be8fcb dm raid1: hold al... |
726 727 728 |
map_bio(get_default_mirror(ms), bio); generic_make_request(bio); } |
1da177e4c Linux-2.6.12-rc2 |
729 730 |
} } |
72f4b3141 dm raid1: handle ... |
731 732 733 |
static void do_failures(struct mirror_set *ms, struct bio_list *failures) { struct bio *bio; |
0f398a840 dm raid1: use hol... |
734 |
if (likely(!failures->head)) |
72f4b3141 dm raid1: handle ... |
735 |
return; |
b80aa7a0c dm raid1: fix EIO... |
736 737 |
/* * If the log has failed, unattempted writes are being |
0f398a840 dm raid1: use hol... |
738 |
* put on the holds list. We can't issue those writes |
b80aa7a0c dm raid1: fix EIO... |
739 740 741 742 743 744 745 746 747 748 749 750 751 752 |
* until a log has been marked, so we must store them. * * If a 'noflush' suspend is in progress, we can requeue * the I/O's to the core. This give userspace a chance * to reconfigure the mirror, at which point the core * will reissue the writes. If the 'noflush' flag is * not set, we have no choice but to return errors. * * Some writes on the failures list may have been * submitted before the log failure and represent a * failure to write to one of the devices. It is ok * for us to treat them the same and requeue them * as well. */ |
0f398a840 dm raid1: use hol... |
753 |
while ((bio = bio_list_pop(failures))) { |
60f355ead dm raid1: hold wr... |
754 |
if (!ms->log_failure) { |
0f398a840 dm raid1: use hol... |
755 |
ms->in_sync = 0; |
c58098be9 dm raid1: remove ... |
756 |
dm_rh_mark_nosync(ms->rh, bio); |
0f398a840 dm raid1: use hol... |
757 |
} |
60f355ead dm raid1: hold wr... |
758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 |
/* * If all the legs are dead, fail the I/O. * If we have been told to handle errors, hold the bio * and wait for userspace to deal with the problem. * Otherwise pretend that the I/O succeeded. (This would * be wrong if the failed leg returned after reboot and * got replicated back to the good legs.) */ if (!get_valid_mirror(ms)) bio_endio(bio, -EIO); else if (errors_handled(ms)) hold_bio(ms, bio); else bio_endio(bio, 0); |
b80aa7a0c dm raid1: fix EIO... |
773 |
} |
72f4b3141 dm raid1: handle ... |
774 775 776 777 778 779 780 781 782 |
} static void trigger_event(struct work_struct *work) { struct mirror_set *ms = container_of(work, struct mirror_set, trigger_event); dm_table_event(ms->ti->table); } |
1da177e4c Linux-2.6.12-rc2 |
783 784 785 |
/*----------------------------------------------------------------- * kmirrord *---------------------------------------------------------------*/ |
a2aebe03b dm raid1: use timer |
786 |
static void do_mirror(struct work_struct *work) |
1da177e4c Linux-2.6.12-rc2 |
787 |
{ |
1f965b194 dm raid1: separat... |
788 789 |
struct mirror_set *ms = container_of(work, struct mirror_set, kmirrord_work); |
72f4b3141 dm raid1: handle ... |
790 791 |
struct bio_list reads, writes, failures; unsigned long flags; |
1da177e4c Linux-2.6.12-rc2 |
792 |
|
72f4b3141 dm raid1: handle ... |
793 |
spin_lock_irqsave(&ms->lock, flags); |
1da177e4c Linux-2.6.12-rc2 |
794 795 |
reads = ms->reads; writes = ms->writes; |
72f4b3141 dm raid1: handle ... |
796 |
failures = ms->failures; |
1da177e4c Linux-2.6.12-rc2 |
797 798 |
bio_list_init(&ms->reads); bio_list_init(&ms->writes); |
72f4b3141 dm raid1: handle ... |
799 800 |
bio_list_init(&ms->failures); spin_unlock_irqrestore(&ms->lock, flags); |
1da177e4c Linux-2.6.12-rc2 |
801 |
|
1f965b194 dm raid1: separat... |
802 |
dm_rh_update_states(ms->rh, errors_handled(ms)); |
1da177e4c Linux-2.6.12-rc2 |
803 804 805 |
do_recovery(ms); do_reads(ms, &reads); do_writes(ms, &writes); |
72f4b3141 dm raid1: handle ... |
806 |
do_failures(ms, &failures); |
1da177e4c Linux-2.6.12-rc2 |
807 |
} |
1da177e4c Linux-2.6.12-rc2 |
808 809 810 811 812 813 |
/*----------------------------------------------------------------- * Target functions *---------------------------------------------------------------*/ static struct mirror_set *alloc_context(unsigned int nr_mirrors, uint32_t region_size, struct dm_target *ti, |
416cd17b1 dm log: clean int... |
814 |
struct dm_dirty_log *dl) |
1da177e4c Linux-2.6.12-rc2 |
815 816 817 |
{ size_t len; struct mirror_set *ms = NULL; |
1da177e4c Linux-2.6.12-rc2 |
818 |
len = sizeof(*ms) + (sizeof(ms->mirror[0]) * nr_mirrors); |
dd00cc486 some kmalloc/mems... |
819 |
ms = kzalloc(len, GFP_KERNEL); |
1da177e4c Linux-2.6.12-rc2 |
820 |
if (!ms) { |
72d948616 [PATCH] dm: impro... |
821 |
ti->error = "Cannot allocate mirror context"; |
1da177e4c Linux-2.6.12-rc2 |
822 823 |
return NULL; } |
1da177e4c Linux-2.6.12-rc2 |
824 |
spin_lock_init(&ms->lock); |
5339fc2d4 dm raid1: explici... |
825 826 827 828 |
bio_list_init(&ms->reads); bio_list_init(&ms->writes); bio_list_init(&ms->failures); bio_list_init(&ms->holds); |
1da177e4c Linux-2.6.12-rc2 |
829 830 831 832 833 |
ms->ti = ti; ms->nr_mirrors = nr_mirrors; ms->nr_regions = dm_sector_div_up(ti->len, region_size); ms->in_sync = 0; |
b80aa7a0c dm raid1: fix EIO... |
834 |
ms->log_failure = 0; |
929be8fcb dm raid1: hold al... |
835 |
ms->leg_failure = 0; |
b80aa7a0c dm raid1: fix EIO... |
836 |
atomic_set(&ms->suspend, 0); |
72f4b3141 dm raid1: handle ... |
837 |
atomic_set(&ms->default_mirror, DEFAULT_MIRROR); |
1da177e4c Linux-2.6.12-rc2 |
838 |
|
95f8fac8d dm raid1: switch ... |
839 840 |
ms->read_record_pool = mempool_create_slab_pool(MIN_READ_RECORDS, _dm_raid1_read_record_cache); |
06386bbfd dm raid1: handle ... |
841 842 843 844 845 |
if (!ms->read_record_pool) { ti->error = "Error creating mirror read_record_pool"; kfree(ms); return NULL; } |
bda8efec5 dm io: use fixed ... |
846 |
ms->io_client = dm_io_client_create(); |
88be163ab dm raid1: update ... |
847 848 |
if (IS_ERR(ms->io_client)) { ti->error = "Error creating dm_io client"; |
06386bbfd dm raid1: handle ... |
849 |
mempool_destroy(ms->read_record_pool); |
88be163ab dm raid1: update ... |
850 851 852 |
kfree(ms); return NULL; } |
1f965b194 dm raid1: separat... |
853 854 855 856 857 |
ms->rh = dm_region_hash_create(ms, dispatch_bios, wakeup_mirrord, wakeup_all_recovery_waiters, ms->ti->begin, MAX_RECOVERY, dl, region_size, ms->nr_regions); if (IS_ERR(ms->rh)) { |
72d948616 [PATCH] dm: impro... |
858 |
ti->error = "Error creating dirty region hash"; |
a72cf737e dm raid1: fix lea... |
859 |
dm_io_client_destroy(ms->io_client); |
06386bbfd dm raid1: handle ... |
860 |
mempool_destroy(ms->read_record_pool); |
1da177e4c Linux-2.6.12-rc2 |
861 862 863 864 865 866 867 868 869 870 871 872 |
kfree(ms); return NULL; } return ms; } static void free_context(struct mirror_set *ms, struct dm_target *ti, unsigned int m) { while (m--) dm_put_device(ti, ms->mirror[m].dev); |
88be163ab dm raid1: update ... |
873 |
dm_io_client_destroy(ms->io_client); |
1f965b194 dm raid1: separat... |
874 |
dm_region_hash_destroy(ms->rh); |
06386bbfd dm raid1: handle ... |
875 |
mempool_destroy(ms->read_record_pool); |
1da177e4c Linux-2.6.12-rc2 |
876 877 |
kfree(ms); } |
1da177e4c Linux-2.6.12-rc2 |
878 879 880 |
static int get_mirror(struct mirror_set *ms, struct dm_target *ti, unsigned int mirror, char **argv) { |
4ee218cd6 [PATCH] dm: remov... |
881 |
unsigned long long offset; |
1da177e4c Linux-2.6.12-rc2 |
882 |
|
4ee218cd6 [PATCH] dm: remov... |
883 |
if (sscanf(argv[1], "%llu", &offset) != 1) { |
72d948616 [PATCH] dm: impro... |
884 |
ti->error = "Invalid offset"; |
1da177e4c Linux-2.6.12-rc2 |
885 886 |
return -EINVAL; } |
8215d6ec5 dm table: remove ... |
887 |
if (dm_get_device(ti, argv[0], dm_table_get_mode(ti->table), |
1da177e4c Linux-2.6.12-rc2 |
888 |
&ms->mirror[mirror].dev)) { |
72d948616 [PATCH] dm: impro... |
889 |
ti->error = "Device lookup failure"; |
1da177e4c Linux-2.6.12-rc2 |
890 891 |
return -ENXIO; } |
aa5617c55 dm raid1: add mir... |
892 |
ms->mirror[mirror].ms = ms; |
72f4b3141 dm raid1: handle ... |
893 894 |
atomic_set(&(ms->mirror[mirror].error_count), 0); ms->mirror[mirror].error_type = 0; |
1da177e4c Linux-2.6.12-rc2 |
895 896 897 898 |
ms->mirror[mirror].offset = offset; return 0; } |
1da177e4c Linux-2.6.12-rc2 |
899 900 901 |
/* * Create dirty log: log_type #log_params <log_params> */ |
416cd17b1 dm log: clean int... |
902 |
static struct dm_dirty_log *create_dirty_log(struct dm_target *ti, |
1f965b194 dm raid1: separat... |
903 904 |
unsigned argc, char **argv, unsigned *args_used) |
1da177e4c Linux-2.6.12-rc2 |
905 |
{ |
1f965b194 dm raid1: separat... |
906 |
unsigned param_count; |
416cd17b1 dm log: clean int... |
907 |
struct dm_dirty_log *dl; |
1da177e4c Linux-2.6.12-rc2 |
908 909 |
if (argc < 2) { |
72d948616 [PATCH] dm: impro... |
910 |
ti->error = "Insufficient mirror log arguments"; |
1da177e4c Linux-2.6.12-rc2 |
911 912 913 914 |
return NULL; } if (sscanf(argv[1], "%u", ¶m_count) != 1) { |
72d948616 [PATCH] dm: impro... |
915 |
ti->error = "Invalid mirror log argument count"; |
1da177e4c Linux-2.6.12-rc2 |
916 917 918 919 920 921 |
return NULL; } *args_used = 2 + param_count; if (argc < *args_used) { |
72d948616 [PATCH] dm: impro... |
922 |
ti->error = "Insufficient mirror log arguments"; |
1da177e4c Linux-2.6.12-rc2 |
923 924 |
return NULL; } |
c0da3748b dm raid1: impleme... |
925 926 |
dl = dm_dirty_log_create(argv[0], ti, mirror_flush, param_count, argv + 2); |
1da177e4c Linux-2.6.12-rc2 |
927 |
if (!dl) { |
72d948616 [PATCH] dm: impro... |
928 |
ti->error = "Error creating mirror dirty log"; |
1da177e4c Linux-2.6.12-rc2 |
929 930 |
return NULL; } |
1da177e4c Linux-2.6.12-rc2 |
931 932 |
return dl; } |
a8e6afa23 dm raid1: add han... |
933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 |
static int parse_features(struct mirror_set *ms, unsigned argc, char **argv, unsigned *args_used) { unsigned num_features; struct dm_target *ti = ms->ti; *args_used = 0; if (!argc) return 0; if (sscanf(argv[0], "%u", &num_features) != 1) { ti->error = "Invalid number of features"; return -EINVAL; } argc--; argv++; (*args_used)++; if (num_features > argc) { ti->error = "Not enough arguments to support feature count"; return -EINVAL; } if (!strcmp("handle_errors", argv[0])) ms->features |= DM_RAID1_HANDLE_ERRORS; else { ti->error = "Unrecognised feature requested"; return -EINVAL; } (*args_used)++; return 0; } |
1da177e4c Linux-2.6.12-rc2 |
969 970 971 972 973 |
/* * Construct a mirror mapping: * * log_type #log_params <log_params> * #mirrors [mirror_path offset]{2,} |
a8e6afa23 dm raid1: add han... |
974 |
* [#features <features>] |
1da177e4c Linux-2.6.12-rc2 |
975 976 977 |
* * log_type is "core" or "disk" * #log_params is between 1 and 3 |
a8e6afa23 dm raid1: add han... |
978 979 |
* * If present, features must be "handle_errors". |
1da177e4c Linux-2.6.12-rc2 |
980 |
*/ |
1da177e4c Linux-2.6.12-rc2 |
981 982 983 984 985 |
static int mirror_ctr(struct dm_target *ti, unsigned int argc, char **argv) { int r; unsigned int nr_mirrors, m, args_used; struct mirror_set *ms; |
416cd17b1 dm log: clean int... |
986 |
struct dm_dirty_log *dl; |
1da177e4c Linux-2.6.12-rc2 |
987 988 989 990 991 992 993 994 995 |
dl = create_dirty_log(ti, argc, argv, &args_used); if (!dl) return -EINVAL; argv += args_used; argc -= args_used; if (!argc || sscanf(argv[0], "%u", &nr_mirrors) != 1 || |
eb69aca5d dm kcopyd: clean ... |
996 |
nr_mirrors < 2 || nr_mirrors > DM_KCOPYD_MAX_REGIONS + 1) { |
72d948616 [PATCH] dm: impro... |
997 |
ti->error = "Invalid number of mirrors"; |
416cd17b1 dm log: clean int... |
998 |
dm_dirty_log_destroy(dl); |
1da177e4c Linux-2.6.12-rc2 |
999 1000 1001 1002 |
return -EINVAL; } argv++, argc--; |
a8e6afa23 dm raid1: add han... |
1003 1004 |
if (argc < nr_mirrors * 2) { ti->error = "Too few mirror arguments"; |
416cd17b1 dm log: clean int... |
1005 |
dm_dirty_log_destroy(dl); |
1da177e4c Linux-2.6.12-rc2 |
1006 1007 1008 1009 1010 |
return -EINVAL; } ms = alloc_context(nr_mirrors, dl->type->get_region_size(dl), ti, dl); if (!ms) { |
416cd17b1 dm log: clean int... |
1011 |
dm_dirty_log_destroy(dl); |
1da177e4c Linux-2.6.12-rc2 |
1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 |
return -ENOMEM; } /* Get the mirror parameter sets */ for (m = 0; m < nr_mirrors; m++) { r = get_mirror(ms, ti, m, argv); if (r) { free_context(ms, ti, m); return r; } argv += 2; argc -= 2; } ti->private = ms; |
1f965b194 dm raid1: separat... |
1027 |
ti->split_io = dm_rh_get_region_size(ms->rh); |
4184153f9 dm raid1: support... |
1028 |
ti->num_flush_requests = 1; |
5fc2ffeab dm raid1: support... |
1029 |
ti->num_discard_requests = 1; |
1da177e4c Linux-2.6.12-rc2 |
1030 |
|
9c4376de9 dm: use non reent... |
1031 1032 |
ms->kmirrord_wq = alloc_workqueue("kmirrord", WQ_NON_REENTRANT | WQ_MEM_RECLAIM, 0); |
6ad36fe2b dm raid1: one kmi... |
1033 1034 |
if (!ms->kmirrord_wq) { DMERR("couldn't start kmirrord"); |
a72cf737e dm raid1: fix lea... |
1035 1036 |
r = -ENOMEM; goto err_free_context; |
6ad36fe2b dm raid1: one kmi... |
1037 1038 |
} INIT_WORK(&ms->kmirrord_work, do_mirror); |
a2aebe03b dm raid1: use timer |
1039 1040 |
init_timer(&ms->timer); ms->timer_pending = 0; |
72f4b3141 dm raid1: handle ... |
1041 |
INIT_WORK(&ms->trigger_event, trigger_event); |
6ad36fe2b dm raid1: one kmi... |
1042 |
|
a8e6afa23 dm raid1: add han... |
1043 |
r = parse_features(ms, argc, argv, &args_used); |
a72cf737e dm raid1: fix lea... |
1044 1045 |
if (r) goto err_destroy_wq; |
a8e6afa23 dm raid1: add han... |
1046 1047 1048 |
argv += args_used; argc -= args_used; |
f44db678e dm raid1: handle ... |
1049 1050 1051 1052 1053 1054 1055 1056 |
/* * Any read-balancing addition depends on the * DM_RAID1_HANDLE_ERRORS flag being present. * This is because the decision to balance depends * on the sync state of a region. If the above * flag is not present, we ignore errors; and * the sync state may be inaccurate. */ |
a8e6afa23 dm raid1: add han... |
1057 1058 |
if (argc) { ti->error = "Too many mirror arguments"; |
a72cf737e dm raid1: fix lea... |
1059 1060 |
r = -EINVAL; goto err_destroy_wq; |
a8e6afa23 dm raid1: add han... |
1061 |
} |
fa34ce730 dm kcopyd: return... |
1062 1063 1064 |
ms->kcopyd_client = dm_kcopyd_client_create(); if (IS_ERR(ms->kcopyd_client)) { r = PTR_ERR(ms->kcopyd_client); |
a72cf737e dm raid1: fix lea... |
1065 |
goto err_destroy_wq; |
fa34ce730 dm kcopyd: return... |
1066 |
} |
1da177e4c Linux-2.6.12-rc2 |
1067 |
|
1f965b194 dm raid1: separat... |
1068 |
wakeup_mirrord(ms); |
1da177e4c Linux-2.6.12-rc2 |
1069 |
return 0; |
a72cf737e dm raid1: fix lea... |
1070 1071 1072 1073 1074 1075 |
err_destroy_wq: destroy_workqueue(ms->kmirrord_wq); err_free_context: free_context(ms, ti, ms->nr_mirrors); return r; |
1da177e4c Linux-2.6.12-rc2 |
1076 1077 1078 1079 1080 |
} static void mirror_dtr(struct dm_target *ti) { struct mirror_set *ms = (struct mirror_set *) ti->private; |
a2aebe03b dm raid1: use timer |
1081 |
del_timer_sync(&ms->timer); |
6ad36fe2b dm raid1: one kmi... |
1082 |
flush_workqueue(ms->kmirrord_wq); |
d5ffa387e dm: dont use flus... |
1083 |
flush_work_sync(&ms->trigger_event); |
eb69aca5d dm kcopyd: clean ... |
1084 |
dm_kcopyd_client_destroy(ms->kcopyd_client); |
6ad36fe2b dm raid1: one kmi... |
1085 |
destroy_workqueue(ms->kmirrord_wq); |
1da177e4c Linux-2.6.12-rc2 |
1086 1087 |
free_context(ms, ti, ms->nr_mirrors); } |
1da177e4c Linux-2.6.12-rc2 |
1088 1089 1090 1091 1092 1093 1094 1095 1096 |
/* * Mirror mapping function */ static int mirror_map(struct dm_target *ti, struct bio *bio, union map_info *map_context) { int r, rw = bio_rw(bio); struct mirror *m; struct mirror_set *ms = ti->private; |
06386bbfd dm raid1: handle ... |
1097 |
struct dm_raid1_read_record *read_record = NULL; |
1f965b194 dm raid1: separat... |
1098 |
struct dm_dirty_log *log = dm_rh_dirty_log(ms->rh); |
1da177e4c Linux-2.6.12-rc2 |
1099 1100 |
if (rw == WRITE) { |
06386bbfd dm raid1: handle ... |
1101 |
/* Save region for mirror_end_io() handler */ |
1f965b194 dm raid1: separat... |
1102 |
map_context->ll = dm_rh_bio_to_region(ms->rh, bio); |
1da177e4c Linux-2.6.12-rc2 |
1103 |
queue_bio(ms, bio, rw); |
d2a7ad29a [PATCH] dm: map a... |
1104 |
return DM_MAPIO_SUBMITTED; |
1da177e4c Linux-2.6.12-rc2 |
1105 |
} |
1f965b194 dm raid1: separat... |
1106 |
r = log->type->in_sync(log, dm_rh_bio_to_region(ms->rh, bio), 0); |
1da177e4c Linux-2.6.12-rc2 |
1107 1108 |
if (r < 0 && r != -EWOULDBLOCK) return r; |
1da177e4c Linux-2.6.12-rc2 |
1109 |
/* |
06386bbfd dm raid1: handle ... |
1110 |
* If region is not in-sync queue the bio. |
1da177e4c Linux-2.6.12-rc2 |
1111 |
*/ |
06386bbfd dm raid1: handle ... |
1112 1113 1114 |
if (!r || (r == -EWOULDBLOCK)) { if (rw == READA) return -EWOULDBLOCK; |
1da177e4c Linux-2.6.12-rc2 |
1115 |
|
1da177e4c Linux-2.6.12-rc2 |
1116 |
queue_bio(ms, bio, rw); |
d2a7ad29a [PATCH] dm: map a... |
1117 |
return DM_MAPIO_SUBMITTED; |
1da177e4c Linux-2.6.12-rc2 |
1118 |
} |
06386bbfd dm raid1: handle ... |
1119 1120 1121 1122 |
/* * The region is in-sync and we can perform reads directly. * Store enough information so we can retry if it fails. */ |
1da177e4c Linux-2.6.12-rc2 |
1123 |
m = choose_mirror(ms, bio->bi_sector); |
06386bbfd dm raid1: handle ... |
1124 |
if (unlikely(!m)) |
1da177e4c Linux-2.6.12-rc2 |
1125 |
return -EIO; |
06386bbfd dm raid1: handle ... |
1126 1127 1128 1129 1130 1131 1132 1133 |
read_record = mempool_alloc(ms->read_record_pool, GFP_NOIO); if (likely(read_record)) { dm_bio_record(&read_record->details, bio); map_context->ptr = read_record; read_record->m = m; } map_bio(m, bio); |
d2a7ad29a [PATCH] dm: map a... |
1134 |
return DM_MAPIO_REMAPPED; |
1da177e4c Linux-2.6.12-rc2 |
1135 1136 1137 1138 1139 1140 1141 |
} static int mirror_end_io(struct dm_target *ti, struct bio *bio, int error, union map_info *map_context) { int rw = bio_rw(bio); struct mirror_set *ms = (struct mirror_set *) ti->private; |
06386bbfd dm raid1: handle ... |
1142 1143 1144 |
struct mirror *m = NULL; struct dm_bio_details *bd = NULL; struct dm_raid1_read_record *read_record = map_context->ptr; |
1da177e4c Linux-2.6.12-rc2 |
1145 1146 1147 1148 |
/* * We need to dec pending if this was a write. */ |
06386bbfd dm raid1: handle ... |
1149 |
if (rw == WRITE) { |
d87f4c14f dm: implement REQ... |
1150 |
if (!(bio->bi_rw & REQ_FLUSH)) |
4184153f9 dm raid1: support... |
1151 |
dm_rh_dec(ms->rh, map_context->ll); |
06386bbfd dm raid1: handle ... |
1152 1153 |
return error; } |
1da177e4c Linux-2.6.12-rc2 |
1154 |
|
06386bbfd dm raid1: handle ... |
1155 1156 |
if (error == -EOPNOTSUPP) goto out; |
7b6d91dae block: unify flag... |
1157 |
if ((error == -EWOULDBLOCK) && (bio->bi_rw & REQ_RAHEAD)) |
06386bbfd dm raid1: handle ... |
1158 1159 1160 1161 1162 1163 1164 1165 1166 |
goto out; if (unlikely(error)) { if (!read_record) { /* * There wasn't enough memory to record necessary * information for a retry or there was no other * mirror in-sync. */ |
e03f1a842 dm-raid1.c: fix N... |
1167 |
DMERR_LIMIT("Mirror read failed."); |
06386bbfd dm raid1: handle ... |
1168 1169 |
return -EIO; } |
e03f1a842 dm-raid1.c: fix N... |
1170 1171 |
m = read_record->m; |
06386bbfd dm raid1: handle ... |
1172 1173 |
DMERR("Mirror read failed from %s. Trying alternative device.", m->dev->name); |
06386bbfd dm raid1: handle ... |
1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 |
fail_mirror(m, DM_RAID1_READ_ERROR); /* * A failed read is requeued for another attempt using an intact * mirror. */ if (default_ok(m) || mirror_available(ms, bio)) { bd = &read_record->details; dm_bio_restore(bd, bio); mempool_free(read_record, ms->read_record_pool); map_context->ptr = NULL; queue_bio(ms, bio, rw); return 1; } DMERR("All replicated volumes dead, failing I/O"); } out: if (read_record) { mempool_free(read_record, ms->read_record_pool); map_context->ptr = NULL; } return error; |
1da177e4c Linux-2.6.12-rc2 |
1199 |
} |
b80aa7a0c dm raid1: fix EIO... |
1200 |
static void mirror_presuspend(struct dm_target *ti) |
1da177e4c Linux-2.6.12-rc2 |
1201 1202 |
{ struct mirror_set *ms = (struct mirror_set *) ti->private; |
1f965b194 dm raid1: separat... |
1203 |
struct dm_dirty_log *log = dm_rh_dirty_log(ms->rh); |
1da177e4c Linux-2.6.12-rc2 |
1204 |
|
047885076 dm raid1: add fra... |
1205 1206 |
struct bio_list holds; struct bio *bio; |
b80aa7a0c dm raid1: fix EIO... |
1207 1208 1209 |
atomic_set(&ms->suspend, 1); /* |
f07030409 dm raid1: fix dea... |
1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 |
* Process bios in the hold list to start recovery waiting * for bios in the hold list. After the process, no bio has * a chance to be added in the hold list because ms->suspend * is set. */ spin_lock_irq(&ms->lock); holds = ms->holds; bio_list_init(&ms->holds); spin_unlock_irq(&ms->lock); while ((bio = bio_list_pop(&holds))) hold_bio(ms, bio); /* |
b80aa7a0c dm raid1: fix EIO... |
1224 1225 1226 |
* We must finish up all the work that we've * generated (i.e. recovery work). */ |
1f965b194 dm raid1: separat... |
1227 |
dm_rh_stop_recovery(ms->rh); |
33184048d [PATCH] dm: raid1... |
1228 |
|
33184048d [PATCH] dm: raid1... |
1229 |
wait_event(_kmirrord_recovery_stopped, |
1f965b194 dm raid1: separat... |
1230 |
!dm_rh_recovery_in_flight(ms->rh)); |
33184048d [PATCH] dm: raid1... |
1231 |
|
b80aa7a0c dm raid1: fix EIO... |
1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 |
if (log->type->presuspend && log->type->presuspend(log)) /* FIXME: need better error handling */ DMWARN("log presuspend failed"); /* * Now that recovery is complete/stopped and the * delayed bios are queued, we need to wait for * the worker thread to complete. This way, * we know that all of our I/O has been pushed. */ flush_workqueue(ms->kmirrord_wq); } static void mirror_postsuspend(struct dm_target *ti) { struct mirror_set *ms = ti->private; |
1f965b194 dm raid1: separat... |
1248 |
struct dm_dirty_log *log = dm_rh_dirty_log(ms->rh); |
b80aa7a0c dm raid1: fix EIO... |
1249 |
|
6b3df0d7a dm log: split sus... |
1250 |
if (log->type->postsuspend && log->type->postsuspend(log)) |
1da177e4c Linux-2.6.12-rc2 |
1251 |
/* FIXME: need better error handling */ |
b80aa7a0c dm raid1: fix EIO... |
1252 |
DMWARN("log postsuspend failed"); |
1da177e4c Linux-2.6.12-rc2 |
1253 1254 1255 1256 |
} static void mirror_resume(struct dm_target *ti) { |
b80aa7a0c dm raid1: fix EIO... |
1257 |
struct mirror_set *ms = ti->private; |
1f965b194 dm raid1: separat... |
1258 |
struct dm_dirty_log *log = dm_rh_dirty_log(ms->rh); |
b80aa7a0c dm raid1: fix EIO... |
1259 1260 |
atomic_set(&ms->suspend, 0); |
1da177e4c Linux-2.6.12-rc2 |
1261 1262 1263 |
if (log->type->resume && log->type->resume(log)) /* FIXME: need better error handling */ DMWARN("log resume failed"); |
1f965b194 dm raid1: separat... |
1264 |
dm_rh_start_recovery(ms->rh); |
1da177e4c Linux-2.6.12-rc2 |
1265 |
} |
af195ac82 dm raid1: report ... |
1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 |
/* * device_status_char * @m: mirror device/leg we want the status of * * We return one character representing the most severe error * we have encountered. * A => Alive - No failures * D => Dead - A write failure occurred leaving mirror out-of-sync * S => Sync - A sychronization failure occurred, mirror out-of-sync * R => Read - A read failure occurred, mirror data unaffected * * Returns: <char> */ static char device_status_char(struct mirror *m) { if (!atomic_read(&(m->error_count))) return 'A'; |
64b30c46e dm raid1: report ... |
1283 1284 |
return (test_bit(DM_RAID1_FLUSH_ERROR, &(m->error_type))) ? 'F' : (test_bit(DM_RAID1_WRITE_ERROR, &(m->error_type))) ? 'D' : |
af195ac82 dm raid1: report ... |
1285 1286 1287 |
(test_bit(DM_RAID1_SYNC_ERROR, &(m->error_type))) ? 'S' : (test_bit(DM_RAID1_READ_ERROR, &(m->error_type))) ? 'R' : 'U'; } |
1da177e4c Linux-2.6.12-rc2 |
1288 1289 1290 |
static int mirror_status(struct dm_target *ti, status_type_t type, char *result, unsigned int maxlen) { |
315dcc226 dm log: report fa... |
1291 |
unsigned int m, sz = 0; |
1da177e4c Linux-2.6.12-rc2 |
1292 |
struct mirror_set *ms = (struct mirror_set *) ti->private; |
1f965b194 dm raid1: separat... |
1293 |
struct dm_dirty_log *log = dm_rh_dirty_log(ms->rh); |
af195ac82 dm raid1: report ... |
1294 |
char buffer[ms->nr_mirrors + 1]; |
1da177e4c Linux-2.6.12-rc2 |
1295 |
|
1da177e4c Linux-2.6.12-rc2 |
1296 1297 1298 |
switch (type) { case STATUSTYPE_INFO: DMEMIT("%d ", ms->nr_mirrors); |
af195ac82 dm raid1: report ... |
1299 |
for (m = 0; m < ms->nr_mirrors; m++) { |
1da177e4c Linux-2.6.12-rc2 |
1300 |
DMEMIT("%s ", ms->mirror[m].dev->name); |
af195ac82 dm raid1: report ... |
1301 1302 1303 |
buffer[m] = device_status_char(&(ms->mirror[m])); } buffer[m] = '\0'; |
1da177e4c Linux-2.6.12-rc2 |
1304 |
|
af195ac82 dm raid1: report ... |
1305 |
DMEMIT("%llu/%llu 1 %s ", |
1f965b194 dm raid1: separat... |
1306 |
(unsigned long long)log->type->get_sync_count(log), |
af195ac82 dm raid1: report ... |
1307 |
(unsigned long long)ms->nr_regions, buffer); |
315dcc226 dm log: report fa... |
1308 |
|
1f965b194 dm raid1: separat... |
1309 |
sz += log->type->status(log, type, result+sz, maxlen-sz); |
315dcc226 dm log: report fa... |
1310 |
|
1da177e4c Linux-2.6.12-rc2 |
1311 1312 1313 |
break; case STATUSTYPE_TABLE: |
1f965b194 dm raid1: separat... |
1314 |
sz = log->type->status(log, type, result, maxlen); |
315dcc226 dm log: report fa... |
1315 |
|
e52b8f6db [PATCH] dm mirror... |
1316 |
DMEMIT("%d", ms->nr_mirrors); |
1da177e4c Linux-2.6.12-rc2 |
1317 |
for (m = 0; m < ms->nr_mirrors; m++) |
e52b8f6db [PATCH] dm mirror... |
1318 |
DMEMIT(" %s %llu", ms->mirror[m].dev->name, |
b80aa7a0c dm raid1: fix EIO... |
1319 |
(unsigned long long)ms->mirror[m].offset); |
a8e6afa23 dm raid1: add han... |
1320 1321 1322 |
if (ms->features & DM_RAID1_HANDLE_ERRORS) DMEMIT(" 1 handle_errors"); |
1da177e4c Linux-2.6.12-rc2 |
1323 1324 1325 1326 |
} return 0; } |
af4874e03 dm target:s intro... |
1327 1328 1329 1330 1331 1332 1333 1334 1335 |
static int mirror_iterate_devices(struct dm_target *ti, iterate_devices_callout_fn fn, void *data) { struct mirror_set *ms = ti->private; int ret = 0; unsigned i; for (i = 0; !ret && i < ms->nr_mirrors; i++) ret = fn(ti, ms->mirror[i].dev, |
5dea271b6 dm table: pass co... |
1336 |
ms->mirror[i].offset, ti->len, data); |
af4874e03 dm target:s intro... |
1337 1338 1339 |
return ret; } |
1da177e4c Linux-2.6.12-rc2 |
1340 1341 |
static struct target_type mirror_target = { .name = "mirror", |
9c4376de9 dm: use non reent... |
1342 |
.version = {1, 12, 1}, |
1da177e4c Linux-2.6.12-rc2 |
1343 1344 1345 1346 1347 |
.module = THIS_MODULE, .ctr = mirror_ctr, .dtr = mirror_dtr, .map = mirror_map, .end_io = mirror_end_io, |
b80aa7a0c dm raid1: fix EIO... |
1348 |
.presuspend = mirror_presuspend, |
1da177e4c Linux-2.6.12-rc2 |
1349 1350 1351 |
.postsuspend = mirror_postsuspend, .resume = mirror_resume, .status = mirror_status, |
af4874e03 dm target:s intro... |
1352 |
.iterate_devices = mirror_iterate_devices, |
1da177e4c Linux-2.6.12-rc2 |
1353 1354 1355 1356 1357 |
}; static int __init dm_mirror_init(void) { int r; |
95f8fac8d dm raid1: switch ... |
1358 1359 1360 1361 1362 1363 |
_dm_raid1_read_record_cache = KMEM_CACHE(dm_raid1_read_record, 0); if (!_dm_raid1_read_record_cache) { DMERR("Can't allocate dm_raid1_read_record cache"); r = -ENOMEM; goto bad_cache; } |
1da177e4c Linux-2.6.12-rc2 |
1364 |
r = dm_register_target(&mirror_target); |
95f8fac8d dm raid1: switch ... |
1365 |
if (r < 0) { |
0cd331243 dm: remove duplic... |
1366 |
DMERR("Failed to register mirror target"); |
95f8fac8d dm raid1: switch ... |
1367 1368 1369 1370 |
goto bad_target; } return 0; |
1da177e4c Linux-2.6.12-rc2 |
1371 |
|
95f8fac8d dm raid1: switch ... |
1372 1373 1374 |
bad_target: kmem_cache_destroy(_dm_raid1_read_record_cache); bad_cache: |
1da177e4c Linux-2.6.12-rc2 |
1375 1376 1377 1378 1379 |
return r; } static void __exit dm_mirror_exit(void) { |
10d3bd09a dm: consolidate t... |
1380 |
dm_unregister_target(&mirror_target); |
95f8fac8d dm raid1: switch ... |
1381 |
kmem_cache_destroy(_dm_raid1_read_record_cache); |
1da177e4c Linux-2.6.12-rc2 |
1382 1383 1384 1385 1386 1387 1388 1389 1390 |
} /* Module hooks */ module_init(dm_mirror_init); module_exit(dm_mirror_exit); MODULE_DESCRIPTION(DM_NAME " mirror target"); MODULE_AUTHOR("Joe Thornber"); MODULE_LICENSE("GPL"); |