Blame view

drivers/md/dm-raid1.c 34.1 KB
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1
2
  /*
   * Copyright (C) 2003 Sistina Software Limited.
1f965b194   Heinz Mauelshagen   dm raid1: separat...
3
   * Copyright (C) 2005-2008 Red Hat, Inc. All rights reserved.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
4
5
6
   *
   * This file is released under the GPL.
   */
06386bbfd   Jonathan Brassow   dm raid1: handle ...
7
  #include "dm-bio-record.h"
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
8

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
9
10
11
12
13
  #include <linux/init.h>
  #include <linux/mempool.h>
  #include <linux/module.h>
  #include <linux/pagemap.h>
  #include <linux/slab.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
14
  #include <linux/workqueue.h>
1f965b194   Heinz Mauelshagen   dm raid1: separat...
15
  #include <linux/device-mapper.h>
a765e20ee   Alasdair G Kergon   dm: move include ...
16
17
18
  #include <linux/dm-io.h>
  #include <linux/dm-dirty-log.h>
  #include <linux/dm-kcopyd.h>
1f965b194   Heinz Mauelshagen   dm raid1: separat...
19
  #include <linux/dm-region-hash.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
20

72d948616   Alasdair G Kergon   [PATCH] dm: impro...
21
  #define DM_MSG_PREFIX "raid1"
1f965b194   Heinz Mauelshagen   dm raid1: separat...
22
23
  
  #define MAX_RECOVERY 1	/* Maximum number of regions recovered in parallel. */
72d948616   Alasdair G Kergon   [PATCH] dm: impro...
24

a8e6afa23   Jonathan E Brassow   dm raid1: add han...
25
  #define DM_RAID1_HANDLE_ERRORS 0x01
f44db678e   Jonathan Brassow   dm raid1: handle ...
26
  #define errors_handled(p)	((p)->features & DM_RAID1_HANDLE_ERRORS)
a8e6afa23   Jonathan E Brassow   dm raid1: add han...
27

33184048d   Jonathan E Brassow   [PATCH] dm: raid1...
28
  static DECLARE_WAIT_QUEUE_HEAD(_kmirrord_recovery_stopped);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
29

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
30
  /*-----------------------------------------------------------------
e4c8b3ba3   Neil Brown   [PATCH] dm: mirro...
31
32
   * Mirror set structures.
   *---------------------------------------------------------------*/
72f4b3141   Jonathan Brassow   dm raid1: handle ...
33
34
  enum dm_raid1_error {
  	DM_RAID1_WRITE_ERROR,
64b30c46e   Mikulas Patocka   dm raid1: report ...
35
  	DM_RAID1_FLUSH_ERROR,
72f4b3141   Jonathan Brassow   dm raid1: handle ...
36
37
38
  	DM_RAID1_SYNC_ERROR,
  	DM_RAID1_READ_ERROR
  };
e4c8b3ba3   Neil Brown   [PATCH] dm: mirro...
39
  struct mirror {
aa5617c55   Jonathan Brassow   dm raid1: add mir...
40
  	struct mirror_set *ms;
e4c8b3ba3   Neil Brown   [PATCH] dm: mirro...
41
  	atomic_t error_count;
39ed7adb1   Al Viro   dm-raid1 breakage...
42
  	unsigned long error_type;
e4c8b3ba3   Neil Brown   [PATCH] dm: mirro...
43
44
45
46
47
48
49
  	struct dm_dev *dev;
  	sector_t offset;
  };
  
  struct mirror_set {
  	struct dm_target *ti;
  	struct list_head list;
1f965b194   Heinz Mauelshagen   dm raid1: separat...
50

a8e6afa23   Jonathan E Brassow   dm raid1: add han...
51
  	uint64_t features;
e4c8b3ba3   Neil Brown   [PATCH] dm: mirro...
52

72f4b3141   Jonathan Brassow   dm raid1: handle ...
53
  	spinlock_t lock;	/* protects the lists */
e4c8b3ba3   Neil Brown   [PATCH] dm: mirro...
54
55
  	struct bio_list reads;
  	struct bio_list writes;
72f4b3141   Jonathan Brassow   dm raid1: handle ...
56
  	struct bio_list failures;
047885076   Mikulas Patocka   dm raid1: add fra...
57
  	struct bio_list holds;	/* bios are waiting until suspend */
e4c8b3ba3   Neil Brown   [PATCH] dm: mirro...
58

1f965b194   Heinz Mauelshagen   dm raid1: separat...
59
60
  	struct dm_region_hash *rh;
  	struct dm_kcopyd_client *kcopyd_client;
88be163ab   Milan Broz   dm raid1: update ...
61
  	struct dm_io_client *io_client;
06386bbfd   Jonathan Brassow   dm raid1: handle ...
62
  	mempool_t *read_record_pool;
88be163ab   Milan Broz   dm raid1: update ...
63

e4c8b3ba3   Neil Brown   [PATCH] dm: mirro...
64
65
66
  	/* recovery */
  	region_t nr_regions;
  	int in_sync;
fc1ff9588   Jonathan Brassow   dm raid1: handle ...
67
  	int log_failure;
929be8fcb   Mikulas Patocka   dm raid1: hold al...
68
  	int leg_failure;
b80aa7a0c   Jonathan Brassow   dm raid1: fix EIO...
69
  	atomic_t suspend;
e4c8b3ba3   Neil Brown   [PATCH] dm: mirro...
70

72f4b3141   Jonathan Brassow   dm raid1: handle ...
71
  	atomic_t default_mirror;	/* Default mirror */
e4c8b3ba3   Neil Brown   [PATCH] dm: mirro...
72

6ad36fe2b   Holger Smolinski   dm raid1: one kmi...
73
74
  	struct workqueue_struct *kmirrord_wq;
  	struct work_struct kmirrord_work;
a2aebe03b   Mikulas Patocka   dm raid1: use timer
75
76
  	struct timer_list timer;
  	unsigned long timer_pending;
72f4b3141   Jonathan Brassow   dm raid1: handle ...
77
  	struct work_struct trigger_event;
6ad36fe2b   Holger Smolinski   dm raid1: one kmi...
78

1f965b194   Heinz Mauelshagen   dm raid1: separat...
79
  	unsigned nr_mirrors;
e4c8b3ba3   Neil Brown   [PATCH] dm: mirro...
80
81
  	struct mirror mirror[0];
  };
1f965b194   Heinz Mauelshagen   dm raid1: separat...
82
  static void wakeup_mirrord(void *context)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
83
  {
1f965b194   Heinz Mauelshagen   dm raid1: separat...
84
  	struct mirror_set *ms = context;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
85

6ad36fe2b   Holger Smolinski   dm raid1: one kmi...
86
87
  	queue_work(ms->kmirrord_wq, &ms->kmirrord_work);
  }
a2aebe03b   Mikulas Patocka   dm raid1: use timer
88
89
90
91
92
  static void delayed_wake_fn(unsigned long data)
  {
  	struct mirror_set *ms = (struct mirror_set *) data;
  
  	clear_bit(0, &ms->timer_pending);
1f965b194   Heinz Mauelshagen   dm raid1: separat...
93
  	wakeup_mirrord(ms);
a2aebe03b   Mikulas Patocka   dm raid1: use timer
94
95
96
97
98
99
100
101
102
103
104
105
  }
  
  static void delayed_wake(struct mirror_set *ms)
  {
  	if (test_and_set_bit(0, &ms->timer_pending))
  		return;
  
  	ms->timer.expires = jiffies + HZ / 5;
  	ms->timer.data = (unsigned long) ms;
  	ms->timer.function = delayed_wake_fn;
  	add_timer(&ms->timer);
  }
1f965b194   Heinz Mauelshagen   dm raid1: separat...
106
  static void wakeup_all_recovery_waiters(void *context)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
107
  {
1f965b194   Heinz Mauelshagen   dm raid1: separat...
108
  	wake_up_all(&_kmirrord_recovery_stopped);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
109
  }
1f965b194   Heinz Mauelshagen   dm raid1: separat...
110
  static void queue_bio(struct mirror_set *ms, struct bio *bio, int rw)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
111
112
  {
  	unsigned long flags;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
113
  	int should_wake = 0;
1f965b194   Heinz Mauelshagen   dm raid1: separat...
114
  	struct bio_list *bl;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
115

1f965b194   Heinz Mauelshagen   dm raid1: separat...
116
117
118
119
120
  	bl = (rw == WRITE) ? &ms->writes : &ms->reads;
  	spin_lock_irqsave(&ms->lock, flags);
  	should_wake = !(bl->head);
  	bio_list_add(bl, bio);
  	spin_unlock_irqrestore(&ms->lock, flags);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
121
122
  
  	if (should_wake)
1f965b194   Heinz Mauelshagen   dm raid1: separat...
123
  		wakeup_mirrord(ms);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
124
  }
1f965b194   Heinz Mauelshagen   dm raid1: separat...
125
  static void dispatch_bios(void *context, struct bio_list *bio_list)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
126
  {
1f965b194   Heinz Mauelshagen   dm raid1: separat...
127
128
  	struct mirror_set *ms = context;
  	struct bio *bio;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
129

1f965b194   Heinz Mauelshagen   dm raid1: separat...
130
131
  	while ((bio = bio_list_pop(bio_list)))
  		queue_bio(ms, bio, WRITE);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
132
  }
06386bbfd   Jonathan Brassow   dm raid1: handle ...
133
134
135
136
137
  #define MIN_READ_RECORDS 20
  struct dm_raid1_read_record {
  	struct mirror *m;
  	struct dm_bio_details details;
  };
95f8fac8d   Mikulas Patocka   dm raid1: switch ...
138
  static struct kmem_cache *_dm_raid1_read_record_cache;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
139
140
141
142
143
144
  /*
   * Every mirror should look like this one.
   */
  #define DEFAULT_MIRROR 0
  
  /*
06386bbfd   Jonathan Brassow   dm raid1: handle ...
145
146
   * This is yucky.  We squirrel the mirror struct away inside
   * bi_next for read/write buffers.  This is safe since the bh
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
147
148
   * doesn't get submitted to the lower levels of block layer.
   */
06386bbfd   Jonathan Brassow   dm raid1: handle ...
149
  static struct mirror *bio_get_m(struct bio *bio)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
150
  {
06386bbfd   Jonathan Brassow   dm raid1: handle ...
151
  	return (struct mirror *) bio->bi_next;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
152
  }
06386bbfd   Jonathan Brassow   dm raid1: handle ...
153
  static void bio_set_m(struct bio *bio, struct mirror *m)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
154
  {
06386bbfd   Jonathan Brassow   dm raid1: handle ...
155
  	bio->bi_next = (struct bio *) m;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
156
  }
72f4b3141   Jonathan Brassow   dm raid1: handle ...
157
158
159
160
161
162
163
164
165
166
167
168
  static struct mirror *get_default_mirror(struct mirror_set *ms)
  {
  	return &ms->mirror[atomic_read(&ms->default_mirror)];
  }
  
  static void set_default_mirror(struct mirror *m)
  {
  	struct mirror_set *ms = m->ms;
  	struct mirror *m0 = &(ms->mirror[0]);
  
  	atomic_set(&ms->default_mirror, m - m0);
  }
87968ddd2   Mikulas Patocka   dm raid1: abstrac...
169
170
171
172
173
174
175
176
177
178
  static struct mirror *get_valid_mirror(struct mirror_set *ms)
  {
  	struct mirror *m;
  
  	for (m = ms->mirror; m < ms->mirror + ms->nr_mirrors; m++)
  		if (!atomic_read(&m->error_count))
  			return m;
  
  	return NULL;
  }
72f4b3141   Jonathan Brassow   dm raid1: handle ...
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
  /* fail_mirror
   * @m: mirror device to fail
   * @error_type: one of the enum's, DM_RAID1_*_ERROR
   *
   * If errors are being handled, record the type of
   * error encountered for this device.  If this type
   * of error has already been recorded, we can return;
   * otherwise, we must signal userspace by triggering
   * an event.  Additionally, if the device is the
   * primary device, we must choose a new primary, but
   * only if the mirror is in-sync.
   *
   * This function must not block.
   */
  static void fail_mirror(struct mirror *m, enum dm_raid1_error error_type)
  {
  	struct mirror_set *ms = m->ms;
  	struct mirror *new;
929be8fcb   Mikulas Patocka   dm raid1: hold al...
197
  	ms->leg_failure = 1;
72f4b3141   Jonathan Brassow   dm raid1: handle ...
198
199
200
201
202
203
204
205
206
  	/*
  	 * error_count is used for nothing more than a
  	 * simple way to tell if a device has encountered
  	 * errors.
  	 */
  	atomic_inc(&m->error_count);
  
  	if (test_and_set_bit(error_type, &m->error_type))
  		return;
d460c65a6   Jonathan Brassow   dm raid1: fix err...
207
208
  	if (!errors_handled(ms))
  		return;
72f4b3141   Jonathan Brassow   dm raid1: handle ...
209
210
211
212
213
214
215
216
217
218
219
220
  	if (m != get_default_mirror(ms))
  		goto out;
  
  	if (!ms->in_sync) {
  		/*
  		 * Better to issue requests to same failing device
  		 * than to risk returning corrupt data.
  		 */
  		DMERR("Primary mirror (%s) failed while out-of-sync: "
  		      "Reads may fail.", m->dev->name);
  		goto out;
  	}
87968ddd2   Mikulas Patocka   dm raid1: abstrac...
221
222
223
224
  	new = get_valid_mirror(ms);
  	if (new)
  		set_default_mirror(new);
  	else
72f4b3141   Jonathan Brassow   dm raid1: handle ...
225
226
227
228
229
  		DMWARN("All sides of mirror have failed.");
  
  out:
  	schedule_work(&ms->trigger_event);
  }
c0da3748b   Mikulas Patocka   dm raid1: impleme...
230
231
232
233
234
235
236
237
238
  static int mirror_flush(struct dm_target *ti)
  {
  	struct mirror_set *ms = ti->private;
  	unsigned long error_bits;
  
  	unsigned int i;
  	struct dm_io_region io[ms->nr_mirrors];
  	struct mirror *m;
  	struct dm_io_request io_req = {
d87f4c14f   Tejun Heo   dm: implement REQ...
239
  		.bi_rw = WRITE_FLUSH,
c0da3748b   Mikulas Patocka   dm raid1: impleme...
240
  		.mem.type = DM_IO_KMEM,
5fc2ffeab   Mike Snitzer   dm raid1: support...
241
  		.mem.ptr.addr = NULL,
c0da3748b   Mikulas Patocka   dm raid1: impleme...
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
  		.client = ms->io_client,
  	};
  
  	for (i = 0, m = ms->mirror; i < ms->nr_mirrors; i++, m++) {
  		io[i].bdev = m->dev->bdev;
  		io[i].sector = 0;
  		io[i].count = 0;
  	}
  
  	error_bits = -1;
  	dm_io(&io_req, ms->nr_mirrors, io, &error_bits);
  	if (unlikely(error_bits != 0)) {
  		for (i = 0; i < ms->nr_mirrors; i++)
  			if (test_bit(i, &error_bits))
  				fail_mirror(ms->mirror + i,
64b30c46e   Mikulas Patocka   dm raid1: report ...
257
  					    DM_RAID1_FLUSH_ERROR);
c0da3748b   Mikulas Patocka   dm raid1: impleme...
258
259
260
261
262
  		return -EIO;
  	}
  
  	return 0;
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
263
264
265
266
267
268
269
  /*-----------------------------------------------------------------
   * Recovery.
   *
   * When a mirror is first activated we may find that some regions
   * are in the no-sync state.  We have to recover these by
   * recopying from the default mirror to all the others.
   *---------------------------------------------------------------*/
4cdc1d1fa   Alasdair G Kergon   dm io: write erro...
270
  static void recovery_complete(int read_err, unsigned long write_err,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
271
272
  			      void *context)
  {
1f965b194   Heinz Mauelshagen   dm raid1: separat...
273
274
  	struct dm_region *reg = context;
  	struct mirror_set *ms = dm_rh_region_context(reg);
8f0205b79   Jonathan Brassow   dm raid1: handle ...
275
  	int m, bit = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
276

8f0205b79   Jonathan Brassow   dm raid1: handle ...
277
  	if (read_err) {
f44db678e   Jonathan Brassow   dm raid1: handle ...
278
279
  		/* Read error means the failure of default mirror. */
  		DMERR_LIMIT("Unable to read primary mirror during recovery");
8f0205b79   Jonathan Brassow   dm raid1: handle ...
280
281
  		fail_mirror(get_default_mirror(ms), DM_RAID1_SYNC_ERROR);
  	}
f44db678e   Jonathan Brassow   dm raid1: handle ...
282

8f0205b79   Jonathan Brassow   dm raid1: handle ...
283
  	if (write_err) {
4cdc1d1fa   Alasdair G Kergon   dm io: write erro...
284
  		DMERR_LIMIT("Write error during recovery (error = 0x%lx)",
f44db678e   Jonathan Brassow   dm raid1: handle ...
285
  			    write_err);
8f0205b79   Jonathan Brassow   dm raid1: handle ...
286
287
288
289
290
291
292
293
294
295
296
297
298
  		/*
  		 * Bits correspond to devices (excluding default mirror).
  		 * The default mirror cannot change during recovery.
  		 */
  		for (m = 0; m < ms->nr_mirrors; m++) {
  			if (&ms->mirror[m] == get_default_mirror(ms))
  				continue;
  			if (test_bit(bit, &write_err))
  				fail_mirror(ms->mirror + m,
  					    DM_RAID1_SYNC_ERROR);
  			bit++;
  		}
  	}
f44db678e   Jonathan Brassow   dm raid1: handle ...
299

1f965b194   Heinz Mauelshagen   dm raid1: separat...
300
  	dm_rh_recovery_end(reg, !(read_err || write_err));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
301
  }
1f965b194   Heinz Mauelshagen   dm raid1: separat...
302
  static int recover(struct mirror_set *ms, struct dm_region *reg)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
303
304
  {
  	int r;
1f965b194   Heinz Mauelshagen   dm raid1: separat...
305
  	unsigned i;
eb69aca5d   Heinz Mauelshagen   dm kcopyd: clean ...
306
  	struct dm_io_region from, to[DM_KCOPYD_MAX_REGIONS], *dest;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
307
308
  	struct mirror *m;
  	unsigned long flags = 0;
1f965b194   Heinz Mauelshagen   dm raid1: separat...
309
310
  	region_t key = dm_rh_get_region_key(reg);
  	sector_t region_size = dm_rh_get_region_size(ms->rh);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
311
312
  
  	/* fill in the source */
72f4b3141   Jonathan Brassow   dm raid1: handle ...
313
  	m = get_default_mirror(ms);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
314
  	from.bdev = m->dev->bdev;
1f965b194   Heinz Mauelshagen   dm raid1: separat...
315
316
  	from.sector = m->offset + dm_rh_region_to_sector(ms->rh, key);
  	if (key == (ms->nr_regions - 1)) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
317
318
319
320
  		/*
  		 * The final region may be smaller than
  		 * region_size.
  		 */
1f965b194   Heinz Mauelshagen   dm raid1: separat...
321
  		from.count = ms->ti->len & (region_size - 1);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
322
  		if (!from.count)
1f965b194   Heinz Mauelshagen   dm raid1: separat...
323
  			from.count = region_size;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
324
  	} else
1f965b194   Heinz Mauelshagen   dm raid1: separat...
325
  		from.count = region_size;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
326
327
328
  
  	/* fill in the destinations */
  	for (i = 0, dest = to; i < ms->nr_mirrors; i++) {
72f4b3141   Jonathan Brassow   dm raid1: handle ...
329
  		if (&ms->mirror[i] == get_default_mirror(ms))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
330
331
332
333
  			continue;
  
  		m = ms->mirror + i;
  		dest->bdev = m->dev->bdev;
1f965b194   Heinz Mauelshagen   dm raid1: separat...
334
  		dest->sector = m->offset + dm_rh_region_to_sector(ms->rh, key);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
335
336
337
338
339
  		dest->count = from.count;
  		dest++;
  	}
  
  	/* hand to kcopyd */
f7c83e2e4   Jonathan Brassow   dm raid1: kcopyd ...
340
341
  	if (!errors_handled(ms))
  		set_bit(DM_KCOPYD_IGNORE_ERROR, &flags);
eb69aca5d   Heinz Mauelshagen   dm kcopyd: clean ...
342
343
  	r = dm_kcopyd_copy(ms->kcopyd_client, &from, ms->nr_mirrors - 1, to,
  			   flags, recovery_complete, reg);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
344
345
346
347
348
349
  
  	return r;
  }
  
  static void do_recovery(struct mirror_set *ms)
  {
1f965b194   Heinz Mauelshagen   dm raid1: separat...
350
351
  	struct dm_region *reg;
  	struct dm_dirty_log *log = dm_rh_dirty_log(ms->rh);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
352
  	int r;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
353
354
355
356
  
  	/*
  	 * Start quiescing some regions.
  	 */
1f965b194   Heinz Mauelshagen   dm raid1: separat...
357
  	dm_rh_recovery_prepare(ms->rh);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
358
359
360
361
  
  	/*
  	 * Copy any already quiesced regions.
  	 */
1f965b194   Heinz Mauelshagen   dm raid1: separat...
362
  	while ((reg = dm_rh_recovery_start(ms->rh))) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
363
364
  		r = recover(ms, reg);
  		if (r)
1f965b194   Heinz Mauelshagen   dm raid1: separat...
365
  			dm_rh_recovery_end(reg, 0);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
  	}
  
  	/*
  	 * Update the in sync flag.
  	 */
  	if (!ms->in_sync &&
  	    (log->type->get_sync_count(log) == ms->nr_regions)) {
  		/* the sync is complete */
  		dm_table_event(ms->ti->table);
  		ms->in_sync = 1;
  	}
  }
  
  /*-----------------------------------------------------------------
   * Reads
   *---------------------------------------------------------------*/
  static struct mirror *choose_mirror(struct mirror_set *ms, sector_t sector)
  {
06386bbfd   Jonathan Brassow   dm raid1: handle ...
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
  	struct mirror *m = get_default_mirror(ms);
  
  	do {
  		if (likely(!atomic_read(&m->error_count)))
  			return m;
  
  		if (m-- == ms->mirror)
  			m += ms->nr_mirrors;
  	} while (m != get_default_mirror(ms));
  
  	return NULL;
  }
  
  static int default_ok(struct mirror *m)
  {
  	struct mirror *default_mirror = get_default_mirror(m->ms);
  
  	return !atomic_read(&default_mirror->error_count);
  }
  
  static int mirror_available(struct mirror_set *ms, struct bio *bio)
  {
1f965b194   Heinz Mauelshagen   dm raid1: separat...
406
407
  	struct dm_dirty_log *log = dm_rh_dirty_log(ms->rh);
  	region_t region = dm_rh_bio_to_region(ms->rh, bio);
06386bbfd   Jonathan Brassow   dm raid1: handle ...
408

1f965b194   Heinz Mauelshagen   dm raid1: separat...
409
  	if (log->type->in_sync(log, region, 0))
06386bbfd   Jonathan Brassow   dm raid1: handle ...
410
411
412
  		return choose_mirror(ms,  bio->bi_sector) ? 1 : 0;
  
  	return 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
413
414
415
416
417
  }
  
  /*
   * remap a buffer to a particular mirror.
   */
06386bbfd   Jonathan Brassow   dm raid1: handle ...
418
419
  static sector_t map_sector(struct mirror *m, struct bio *bio)
  {
4184153f9   Mikulas Patocka   dm raid1: support...
420
421
  	if (unlikely(!bio->bi_size))
  		return 0;
b441a262e   Alasdair G Kergon   dm: use dm_target...
422
  	return m->offset + dm_target_offset(m->ms->ti, bio->bi_sector);
06386bbfd   Jonathan Brassow   dm raid1: handle ...
423
424
425
  }
  
  static void map_bio(struct mirror *m, struct bio *bio)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
426
427
  {
  	bio->bi_bdev = m->dev->bdev;
06386bbfd   Jonathan Brassow   dm raid1: handle ...
428
429
  	bio->bi_sector = map_sector(m, bio);
  }
22a1ceb1e   Heinz Mauelshagen   dm io: clean inte...
430
  static void map_region(struct dm_io_region *io, struct mirror *m,
06386bbfd   Jonathan Brassow   dm raid1: handle ...
431
432
433
434
435
436
  		       struct bio *bio)
  {
  	io->bdev = m->dev->bdev;
  	io->sector = map_sector(m, bio);
  	io->count = bio->bi_size >> 9;
  }
047885076   Mikulas Patocka   dm raid1: add fra...
437
438
439
  static void hold_bio(struct mirror_set *ms, struct bio *bio)
  {
  	/*
f07030409   Takahiro Yasui   dm raid1: fix dea...
440
441
  	 * Lock is required to avoid race condition during suspend
  	 * process.
047885076   Mikulas Patocka   dm raid1: add fra...
442
  	 */
f07030409   Takahiro Yasui   dm raid1: fix dea...
443
  	spin_lock_irq(&ms->lock);
047885076   Mikulas Patocka   dm raid1: add fra...
444
  	if (atomic_read(&ms->suspend)) {
f07030409   Takahiro Yasui   dm raid1: fix dea...
445
446
447
448
449
  		spin_unlock_irq(&ms->lock);
  
  		/*
  		 * If device is suspended, complete the bio.
  		 */
047885076   Mikulas Patocka   dm raid1: add fra...
450
451
452
453
454
455
456
457
458
459
  		if (dm_noflush_suspending(ms->ti))
  			bio_endio(bio, DM_ENDIO_REQUEUE);
  		else
  			bio_endio(bio, -EIO);
  		return;
  	}
  
  	/*
  	 * Hold bio until the suspend is complete.
  	 */
047885076   Mikulas Patocka   dm raid1: add fra...
460
461
462
  	bio_list_add(&ms->holds, bio);
  	spin_unlock_irq(&ms->lock);
  }
06386bbfd   Jonathan Brassow   dm raid1: handle ...
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
  /*-----------------------------------------------------------------
   * Reads
   *---------------------------------------------------------------*/
  static void read_callback(unsigned long error, void *context)
  {
  	struct bio *bio = context;
  	struct mirror *m;
  
  	m = bio_get_m(bio);
  	bio_set_m(bio, NULL);
  
  	if (likely(!error)) {
  		bio_endio(bio, 0);
  		return;
  	}
  
  	fail_mirror(m, DM_RAID1_READ_ERROR);
  
  	if (likely(default_ok(m)) || mirror_available(m->ms, bio)) {
  		DMWARN_LIMIT("Read failure on mirror device %s.  "
  			     "Trying alternative device.",
  			     m->dev->name);
  		queue_bio(m->ms, bio, bio_rw(bio));
  		return;
  	}
  
  	DMERR_LIMIT("Read failure on mirror device %s.  Failing I/O.",
  		    m->dev->name);
  	bio_endio(bio, -EIO);
  }
  
  /* Asynchronous read. */
  static void read_async_bio(struct mirror *m, struct bio *bio)
  {
22a1ceb1e   Heinz Mauelshagen   dm io: clean inte...
497
  	struct dm_io_region io;
06386bbfd   Jonathan Brassow   dm raid1: handle ...
498
499
500
501
502
503
504
505
506
507
508
  	struct dm_io_request io_req = {
  		.bi_rw = READ,
  		.mem.type = DM_IO_BVEC,
  		.mem.ptr.bvec = bio->bi_io_vec + bio->bi_idx,
  		.notify.fn = read_callback,
  		.notify.context = bio,
  		.client = m->ms->io_client,
  	};
  
  	map_region(&io, m, bio);
  	bio_set_m(bio, m);
1f965b194   Heinz Mauelshagen   dm raid1: separat...
509
510
511
512
513
514
515
516
  	BUG_ON(dm_io(&io_req, 1, &io, NULL));
  }
  
  static inline int region_in_sync(struct mirror_set *ms, region_t region,
  				 int may_block)
  {
  	int state = dm_rh_get_state(ms->rh, region, may_block);
  	return state == DM_RH_CLEAN || state == DM_RH_DIRTY;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
517
518
519
520
521
522
523
524
525
  }
  
  static void do_reads(struct mirror_set *ms, struct bio_list *reads)
  {
  	region_t region;
  	struct bio *bio;
  	struct mirror *m;
  
  	while ((bio = bio_list_pop(reads))) {
1f965b194   Heinz Mauelshagen   dm raid1: separat...
526
  		region = dm_rh_bio_to_region(ms->rh, bio);
06386bbfd   Jonathan Brassow   dm raid1: handle ...
527
  		m = get_default_mirror(ms);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
528
529
530
531
  
  		/*
  		 * We can only read balance if the region is in sync.
  		 */
1f965b194   Heinz Mauelshagen   dm raid1: separat...
532
  		if (likely(region_in_sync(ms, region, 1)))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
533
  			m = choose_mirror(ms, bio->bi_sector);
06386bbfd   Jonathan Brassow   dm raid1: handle ...
534
535
  		else if (m && atomic_read(&m->error_count))
  			m = NULL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
536

06386bbfd   Jonathan Brassow   dm raid1: handle ...
537
538
539
540
  		if (likely(m))
  			read_async_bio(m, bio);
  		else
  			bio_endio(bio, -EIO);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
541
542
543
544
545
546
547
548
549
550
551
552
553
  	}
  }
  
  /*-----------------------------------------------------------------
   * Writes.
   *
   * We do different things with the write io depending on the
   * state of the region that it's in:
   *
   * SYNC: 	increment pending, use kcopyd to write to *all* mirrors
   * RECOVERING:	delay the io until recovery completes
   * NOSYNC:	increment pending, just write to the default mirror
   *---------------------------------------------------------------*/
72f4b3141   Jonathan Brassow   dm raid1: handle ...
554

72f4b3141   Jonathan Brassow   dm raid1: handle ...
555

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
556
557
  static void write_callback(unsigned long error, void *context)
  {
72f4b3141   Jonathan Brassow   dm raid1: handle ...
558
  	unsigned i, ret = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
559
560
  	struct bio *bio = (struct bio *) context;
  	struct mirror_set *ms;
72f4b3141   Jonathan Brassow   dm raid1: handle ...
561
562
  	int should_wake = 0;
  	unsigned long flags;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
563

06386bbfd   Jonathan Brassow   dm raid1: handle ...
564
565
  	ms = bio_get_m(bio)->ms;
  	bio_set_m(bio, NULL);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
566
567
568
569
570
571
572
  
  	/*
  	 * NOTE: We don't decrement the pending count here,
  	 * instead it is done by the targets endio function.
  	 * This way we handle both writes to SYNC and NOSYNC
  	 * regions with the same code.
  	 */
60f355ead   Mikulas Patocka   dm raid1: hold wr...
573
574
575
576
  	if (likely(!error)) {
  		bio_endio(bio, ret);
  		return;
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
577

72f4b3141   Jonathan Brassow   dm raid1: handle ...
578
579
580
  	for (i = 0; i < ms->nr_mirrors; i++)
  		if (test_bit(i, &error))
  			fail_mirror(ms->mirror + i, DM_RAID1_WRITE_ERROR);
72f4b3141   Jonathan Brassow   dm raid1: handle ...
581

60f355ead   Mikulas Patocka   dm raid1: hold wr...
582
583
584
585
586
587
588
589
590
591
592
593
  	/*
  	 * Need to raise event.  Since raising
  	 * events can block, we need to do it in
  	 * the main thread.
  	 */
  	spin_lock_irqsave(&ms->lock, flags);
  	if (!ms->failures.head)
  		should_wake = 1;
  	bio_list_add(&ms->failures, bio);
  	spin_unlock_irqrestore(&ms->lock, flags);
  	if (should_wake)
  		wakeup_mirrord(ms);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
594
595
596
597
598
  }
  
  static void do_write(struct mirror_set *ms, struct bio *bio)
  {
  	unsigned int i;
22a1ceb1e   Heinz Mauelshagen   dm io: clean inte...
599
  	struct dm_io_region io[ms->nr_mirrors], *dest = io;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
600
  	struct mirror *m;
88be163ab   Milan Broz   dm raid1: update ...
601
  	struct dm_io_request io_req = {
d87f4c14f   Tejun Heo   dm: implement REQ...
602
  		.bi_rw = WRITE | (bio->bi_rw & WRITE_FLUSH_FUA),
88be163ab   Milan Broz   dm raid1: update ...
603
604
605
606
607
608
  		.mem.type = DM_IO_BVEC,
  		.mem.ptr.bvec = bio->bi_io_vec + bio->bi_idx,
  		.notify.fn = write_callback,
  		.notify.context = bio,
  		.client = ms->io_client,
  	};
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
609

5fc2ffeab   Mike Snitzer   dm raid1: support...
610
611
612
613
614
  	if (bio->bi_rw & REQ_DISCARD) {
  		io_req.bi_rw |= REQ_DISCARD;
  		io_req.mem.type = DM_IO_KMEM;
  		io_req.mem.ptr.addr = NULL;
  	}
06386bbfd   Jonathan Brassow   dm raid1: handle ...
615
616
  	for (i = 0, m = ms->mirror; i < ms->nr_mirrors; i++, m++)
  		map_region(dest++, m, bio);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
617

06386bbfd   Jonathan Brassow   dm raid1: handle ...
618
619
620
621
622
  	/*
  	 * Use default mirror because we only need it to retrieve the reference
  	 * to the mirror set in write_callback().
  	 */
  	bio_set_m(bio, get_default_mirror(ms));
88be163ab   Milan Broz   dm raid1: update ...
623

1f965b194   Heinz Mauelshagen   dm raid1: separat...
624
  	BUG_ON(dm_io(&io_req, ms->nr_mirrors, io, NULL));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
625
626
627
628
629
630
631
  }
  
  static void do_writes(struct mirror_set *ms, struct bio_list *writes)
  {
  	int state;
  	struct bio *bio;
  	struct bio_list sync, nosync, recover, *this_list = NULL;
7513c2a76   Jonathan Brassow   dm raid1: add is_...
632
633
634
  	struct bio_list requeue;
  	struct dm_dirty_log *log = dm_rh_dirty_log(ms->rh);
  	region_t region;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
635
636
637
638
639
640
641
642
643
644
  
  	if (!writes->head)
  		return;
  
  	/*
  	 * Classify each write.
  	 */
  	bio_list_init(&sync);
  	bio_list_init(&nosync);
  	bio_list_init(&recover);
7513c2a76   Jonathan Brassow   dm raid1: add is_...
645
  	bio_list_init(&requeue);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
646
647
  
  	while ((bio = bio_list_pop(writes))) {
5fc2ffeab   Mike Snitzer   dm raid1: support...
648
649
  		if ((bio->bi_rw & REQ_FLUSH) ||
  		    (bio->bi_rw & REQ_DISCARD)) {
4184153f9   Mikulas Patocka   dm raid1: support...
650
651
652
  			bio_list_add(&sync, bio);
  			continue;
  		}
7513c2a76   Jonathan Brassow   dm raid1: add is_...
653
654
655
656
657
658
659
660
661
  		region = dm_rh_bio_to_region(ms->rh, bio);
  
  		if (log->type->is_remote_recovering &&
  		    log->type->is_remote_recovering(log, region)) {
  			bio_list_add(&requeue, bio);
  			continue;
  		}
  
  		state = dm_rh_get_state(ms->rh, region, 1);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
662
  		switch (state) {
1f965b194   Heinz Mauelshagen   dm raid1: separat...
663
664
  		case DM_RH_CLEAN:
  		case DM_RH_DIRTY:
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
665
666
  			this_list = &sync;
  			break;
1f965b194   Heinz Mauelshagen   dm raid1: separat...
667
  		case DM_RH_NOSYNC:
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
668
669
  			this_list = &nosync;
  			break;
1f965b194   Heinz Mauelshagen   dm raid1: separat...
670
  		case DM_RH_RECOVERING:
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
671
672
673
674
675
676
677
678
  			this_list = &recover;
  			break;
  		}
  
  		bio_list_add(this_list, bio);
  	}
  
  	/*
7513c2a76   Jonathan Brassow   dm raid1: add is_...
679
680
681
682
683
684
685
  	 * Add bios that are delayed due to remote recovery
  	 * back on to the write queue
  	 */
  	if (unlikely(requeue.head)) {
  		spin_lock_irq(&ms->lock);
  		bio_list_merge(&ms->writes, &requeue);
  		spin_unlock_irq(&ms->lock);
69885683d   Mikulas Patocka   dm raid1: wake km...
686
  		delayed_wake(ms);
7513c2a76   Jonathan Brassow   dm raid1: add is_...
687
688
689
  	}
  
  	/*
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
690
691
692
693
  	 * Increment the pending counts for any regions that will
  	 * be written to (writes to recover regions are going to
  	 * be delayed).
  	 */
1f965b194   Heinz Mauelshagen   dm raid1: separat...
694
695
  	dm_rh_inc_pending(ms->rh, &sync);
  	dm_rh_inc_pending(ms->rh, &nosync);
d2b698644   Jonathan Brassow   dm raid1: do not ...
696
697
698
699
700
701
702
  
  	/*
  	 * If the flush fails on a previous call and succeeds here,
  	 * we must not reset the log_failure variable.  We need
  	 * userspace interaction to do that.
  	 */
  	ms->log_failure = dm_rh_flush(ms->rh) ? 1 : ms->log_failure;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
703
704
705
706
  
  	/*
  	 * Dispatch io.
  	 */
5528d17de   Mikulas Patocka   dm raid1: fail wr...
707
  	if (unlikely(ms->log_failure) && errors_handled(ms)) {
b80aa7a0c   Jonathan Brassow   dm raid1: fix EIO...
708
709
710
  		spin_lock_irq(&ms->lock);
  		bio_list_merge(&ms->failures, &sync);
  		spin_unlock_irq(&ms->lock);
1f965b194   Heinz Mauelshagen   dm raid1: separat...
711
  		wakeup_mirrord(ms);
b80aa7a0c   Jonathan Brassow   dm raid1: fix EIO...
712
  	} else
fc1ff9588   Jonathan Brassow   dm raid1: handle ...
713
  		while ((bio = bio_list_pop(&sync)))
b80aa7a0c   Jonathan Brassow   dm raid1: fix EIO...
714
  			do_write(ms, bio);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
715
716
  
  	while ((bio = bio_list_pop(&recover)))
1f965b194   Heinz Mauelshagen   dm raid1: separat...
717
  		dm_rh_delay(ms->rh, bio);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
718
719
  
  	while ((bio = bio_list_pop(&nosync))) {
ede5ea0b8   Mikulas Patocka   dm raid1: always ...
720
721
722
723
724
725
  		if (unlikely(ms->leg_failure) && errors_handled(ms)) {
  			spin_lock_irq(&ms->lock);
  			bio_list_add(&ms->failures, bio);
  			spin_unlock_irq(&ms->lock);
  			wakeup_mirrord(ms);
  		} else {
929be8fcb   Mikulas Patocka   dm raid1: hold al...
726
727
728
  			map_bio(get_default_mirror(ms), bio);
  			generic_make_request(bio);
  		}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
729
730
  	}
  }
72f4b3141   Jonathan Brassow   dm raid1: handle ...
731
732
733
  static void do_failures(struct mirror_set *ms, struct bio_list *failures)
  {
  	struct bio *bio;
0f398a840   Mikulas Patocka   dm raid1: use hol...
734
  	if (likely(!failures->head))
72f4b3141   Jonathan Brassow   dm raid1: handle ...
735
  		return;
b80aa7a0c   Jonathan Brassow   dm raid1: fix EIO...
736
737
  	/*
  	 * If the log has failed, unattempted writes are being
0f398a840   Mikulas Patocka   dm raid1: use hol...
738
  	 * put on the holds list.  We can't issue those writes
b80aa7a0c   Jonathan Brassow   dm raid1: fix EIO...
739
740
741
742
743
744
745
746
747
748
749
750
751
752
  	 * until a log has been marked, so we must store them.
  	 *
  	 * If a 'noflush' suspend is in progress, we can requeue
  	 * the I/O's to the core.  This give userspace a chance
  	 * to reconfigure the mirror, at which point the core
  	 * will reissue the writes.  If the 'noflush' flag is
  	 * not set, we have no choice but to return errors.
  	 *
  	 * Some writes on the failures list may have been
  	 * submitted before the log failure and represent a
  	 * failure to write to one of the devices.  It is ok
  	 * for us to treat them the same and requeue them
  	 * as well.
  	 */
0f398a840   Mikulas Patocka   dm raid1: use hol...
753
  	while ((bio = bio_list_pop(failures))) {
60f355ead   Mikulas Patocka   dm raid1: hold wr...
754
  		if (!ms->log_failure) {
0f398a840   Mikulas Patocka   dm raid1: use hol...
755
  			ms->in_sync = 0;
c58098be9   Mikulas Patocka   dm raid1: remove ...
756
  			dm_rh_mark_nosync(ms->rh, bio);
0f398a840   Mikulas Patocka   dm raid1: use hol...
757
  		}
60f355ead   Mikulas Patocka   dm raid1: hold wr...
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
  
  		/*
  		 * If all the legs are dead, fail the I/O.
  		 * If we have been told to handle errors, hold the bio
  		 * and wait for userspace to deal with the problem.
  		 * Otherwise pretend that the I/O succeeded. (This would
  		 * be wrong if the failed leg returned after reboot and
  		 * got replicated back to the good legs.)
  		 */
  		if (!get_valid_mirror(ms))
  			bio_endio(bio, -EIO);
  		else if (errors_handled(ms))
  			hold_bio(ms, bio);
  		else
  			bio_endio(bio, 0);
b80aa7a0c   Jonathan Brassow   dm raid1: fix EIO...
773
  	}
72f4b3141   Jonathan Brassow   dm raid1: handle ...
774
775
776
777
778
779
780
781
782
  }
  
  static void trigger_event(struct work_struct *work)
  {
  	struct mirror_set *ms =
  		container_of(work, struct mirror_set, trigger_event);
  
  	dm_table_event(ms->ti->table);
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
783
784
785
  /*-----------------------------------------------------------------
   * kmirrord
   *---------------------------------------------------------------*/
a2aebe03b   Mikulas Patocka   dm raid1: use timer
786
  static void do_mirror(struct work_struct *work)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
787
  {
1f965b194   Heinz Mauelshagen   dm raid1: separat...
788
789
  	struct mirror_set *ms = container_of(work, struct mirror_set,
  					     kmirrord_work);
72f4b3141   Jonathan Brassow   dm raid1: handle ...
790
791
  	struct bio_list reads, writes, failures;
  	unsigned long flags;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
792

72f4b3141   Jonathan Brassow   dm raid1: handle ...
793
  	spin_lock_irqsave(&ms->lock, flags);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
794
795
  	reads = ms->reads;
  	writes = ms->writes;
72f4b3141   Jonathan Brassow   dm raid1: handle ...
796
  	failures = ms->failures;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
797
798
  	bio_list_init(&ms->reads);
  	bio_list_init(&ms->writes);
72f4b3141   Jonathan Brassow   dm raid1: handle ...
799
800
  	bio_list_init(&ms->failures);
  	spin_unlock_irqrestore(&ms->lock, flags);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
801

1f965b194   Heinz Mauelshagen   dm raid1: separat...
802
  	dm_rh_update_states(ms->rh, errors_handled(ms));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
803
804
805
  	do_recovery(ms);
  	do_reads(ms, &reads);
  	do_writes(ms, &writes);
72f4b3141   Jonathan Brassow   dm raid1: handle ...
806
  	do_failures(ms, &failures);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
807
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
808
809
810
811
812
813
  /*-----------------------------------------------------------------
   * Target functions
   *---------------------------------------------------------------*/
  static struct mirror_set *alloc_context(unsigned int nr_mirrors,
  					uint32_t region_size,
  					struct dm_target *ti,
416cd17b1   Heinz Mauelshagen   dm log: clean int...
814
  					struct dm_dirty_log *dl)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
815
816
817
  {
  	size_t len;
  	struct mirror_set *ms = NULL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
818
  	len = sizeof(*ms) + (sizeof(ms->mirror[0]) * nr_mirrors);
dd00cc486   Yoann Padioleau   some kmalloc/mems...
819
  	ms = kzalloc(len, GFP_KERNEL);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
820
  	if (!ms) {
72d948616   Alasdair G Kergon   [PATCH] dm: impro...
821
  		ti->error = "Cannot allocate mirror context";
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
822
823
  		return NULL;
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
824
  	spin_lock_init(&ms->lock);
5339fc2d4   Mikulas Patocka   dm raid1: explici...
825
826
827
828
  	bio_list_init(&ms->reads);
  	bio_list_init(&ms->writes);
  	bio_list_init(&ms->failures);
  	bio_list_init(&ms->holds);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
829
830
831
832
833
  
  	ms->ti = ti;
  	ms->nr_mirrors = nr_mirrors;
  	ms->nr_regions = dm_sector_div_up(ti->len, region_size);
  	ms->in_sync = 0;
b80aa7a0c   Jonathan Brassow   dm raid1: fix EIO...
834
  	ms->log_failure = 0;
929be8fcb   Mikulas Patocka   dm raid1: hold al...
835
  	ms->leg_failure = 0;
b80aa7a0c   Jonathan Brassow   dm raid1: fix EIO...
836
  	atomic_set(&ms->suspend, 0);
72f4b3141   Jonathan Brassow   dm raid1: handle ...
837
  	atomic_set(&ms->default_mirror, DEFAULT_MIRROR);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
838

95f8fac8d   Mikulas Patocka   dm raid1: switch ...
839
840
  	ms->read_record_pool = mempool_create_slab_pool(MIN_READ_RECORDS,
  						_dm_raid1_read_record_cache);
06386bbfd   Jonathan Brassow   dm raid1: handle ...
841
842
843
844
845
  	if (!ms->read_record_pool) {
  		ti->error = "Error creating mirror read_record_pool";
  		kfree(ms);
  		return NULL;
  	}
bda8efec5   Mikulas Patocka   dm io: use fixed ...
846
  	ms->io_client = dm_io_client_create();
88be163ab   Milan Broz   dm raid1: update ...
847
848
  	if (IS_ERR(ms->io_client)) {
  		ti->error = "Error creating dm_io client";
06386bbfd   Jonathan Brassow   dm raid1: handle ...
849
  		mempool_destroy(ms->read_record_pool);
88be163ab   Milan Broz   dm raid1: update ...
850
851
852
  		kfree(ms);
   		return NULL;
  	}
1f965b194   Heinz Mauelshagen   dm raid1: separat...
853
854
855
856
857
  	ms->rh = dm_region_hash_create(ms, dispatch_bios, wakeup_mirrord,
  				       wakeup_all_recovery_waiters,
  				       ms->ti->begin, MAX_RECOVERY,
  				       dl, region_size, ms->nr_regions);
  	if (IS_ERR(ms->rh)) {
72d948616   Alasdair G Kergon   [PATCH] dm: impro...
858
  		ti->error = "Error creating dirty region hash";
a72cf737e   Dmitry Monakhov   dm raid1: fix lea...
859
  		dm_io_client_destroy(ms->io_client);
06386bbfd   Jonathan Brassow   dm raid1: handle ...
860
  		mempool_destroy(ms->read_record_pool);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
861
862
863
864
865
866
867
868
869
870
871
872
  		kfree(ms);
  		return NULL;
  	}
  
  	return ms;
  }
  
  static void free_context(struct mirror_set *ms, struct dm_target *ti,
  			 unsigned int m)
  {
  	while (m--)
  		dm_put_device(ti, ms->mirror[m].dev);
88be163ab   Milan Broz   dm raid1: update ...
873
  	dm_io_client_destroy(ms->io_client);
1f965b194   Heinz Mauelshagen   dm raid1: separat...
874
  	dm_region_hash_destroy(ms->rh);
06386bbfd   Jonathan Brassow   dm raid1: handle ...
875
  	mempool_destroy(ms->read_record_pool);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
876
877
  	kfree(ms);
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
878
879
880
  static int get_mirror(struct mirror_set *ms, struct dm_target *ti,
  		      unsigned int mirror, char **argv)
  {
4ee218cd6   Andrew Morton   [PATCH] dm: remov...
881
  	unsigned long long offset;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
882

4ee218cd6   Andrew Morton   [PATCH] dm: remov...
883
  	if (sscanf(argv[1], "%llu", &offset) != 1) {
72d948616   Alasdair G Kergon   [PATCH] dm: impro...
884
  		ti->error = "Invalid offset";
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
885
886
  		return -EINVAL;
  	}
8215d6ec5   Nikanth Karthikesan   dm table: remove ...
887
  	if (dm_get_device(ti, argv[0], dm_table_get_mode(ti->table),
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
888
  			  &ms->mirror[mirror].dev)) {
72d948616   Alasdair G Kergon   [PATCH] dm: impro...
889
  		ti->error = "Device lookup failure";
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
890
891
  		return -ENXIO;
  	}
aa5617c55   Jonathan Brassow   dm raid1: add mir...
892
  	ms->mirror[mirror].ms = ms;
72f4b3141   Jonathan Brassow   dm raid1: handle ...
893
894
  	atomic_set(&(ms->mirror[mirror].error_count), 0);
  	ms->mirror[mirror].error_type = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
895
896
897
898
  	ms->mirror[mirror].offset = offset;
  
  	return 0;
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
899
900
901
  /*
   * Create dirty log: log_type #log_params <log_params>
   */
416cd17b1   Heinz Mauelshagen   dm log: clean int...
902
  static struct dm_dirty_log *create_dirty_log(struct dm_target *ti,
1f965b194   Heinz Mauelshagen   dm raid1: separat...
903
904
  					     unsigned argc, char **argv,
  					     unsigned *args_used)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
905
  {
1f965b194   Heinz Mauelshagen   dm raid1: separat...
906
  	unsigned param_count;
416cd17b1   Heinz Mauelshagen   dm log: clean int...
907
  	struct dm_dirty_log *dl;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
908
909
  
  	if (argc < 2) {
72d948616   Alasdair G Kergon   [PATCH] dm: impro...
910
  		ti->error = "Insufficient mirror log arguments";
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
911
912
913
914
  		return NULL;
  	}
  
  	if (sscanf(argv[1], "%u", &param_count) != 1) {
72d948616   Alasdair G Kergon   [PATCH] dm: impro...
915
  		ti->error = "Invalid mirror log argument count";
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
916
917
918
919
920
921
  		return NULL;
  	}
  
  	*args_used = 2 + param_count;
  
  	if (argc < *args_used) {
72d948616   Alasdair G Kergon   [PATCH] dm: impro...
922
  		ti->error = "Insufficient mirror log arguments";
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
923
924
  		return NULL;
  	}
c0da3748b   Mikulas Patocka   dm raid1: impleme...
925
926
  	dl = dm_dirty_log_create(argv[0], ti, mirror_flush, param_count,
  				 argv + 2);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
927
  	if (!dl) {
72d948616   Alasdair G Kergon   [PATCH] dm: impro...
928
  		ti->error = "Error creating mirror dirty log";
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
929
930
  		return NULL;
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
931
932
  	return dl;
  }
a8e6afa23   Jonathan E Brassow   dm raid1: add han...
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
  static int parse_features(struct mirror_set *ms, unsigned argc, char **argv,
  			  unsigned *args_used)
  {
  	unsigned num_features;
  	struct dm_target *ti = ms->ti;
  
  	*args_used = 0;
  
  	if (!argc)
  		return 0;
  
  	if (sscanf(argv[0], "%u", &num_features) != 1) {
  		ti->error = "Invalid number of features";
  		return -EINVAL;
  	}
  
  	argc--;
  	argv++;
  	(*args_used)++;
  
  	if (num_features > argc) {
  		ti->error = "Not enough arguments to support feature count";
  		return -EINVAL;
  	}
  
  	if (!strcmp("handle_errors", argv[0]))
  		ms->features |= DM_RAID1_HANDLE_ERRORS;
  	else {
  		ti->error = "Unrecognised feature requested";
  		return -EINVAL;
  	}
  
  	(*args_used)++;
  
  	return 0;
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
969
970
971
972
973
  /*
   * Construct a mirror mapping:
   *
   * log_type #log_params <log_params>
   * #mirrors [mirror_path offset]{2,}
a8e6afa23   Jonathan E Brassow   dm raid1: add han...
974
   * [#features <features>]
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
975
976
977
   *
   * log_type is "core" or "disk"
   * #log_params is between 1 and 3
a8e6afa23   Jonathan E Brassow   dm raid1: add han...
978
979
   *
   * If present, features must be "handle_errors".
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
980
   */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
981
982
983
984
985
  static int mirror_ctr(struct dm_target *ti, unsigned int argc, char **argv)
  {
  	int r;
  	unsigned int nr_mirrors, m, args_used;
  	struct mirror_set *ms;
416cd17b1   Heinz Mauelshagen   dm log: clean int...
986
  	struct dm_dirty_log *dl;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
987
988
989
990
991
992
993
994
995
  
  	dl = create_dirty_log(ti, argc, argv, &args_used);
  	if (!dl)
  		return -EINVAL;
  
  	argv += args_used;
  	argc -= args_used;
  
  	if (!argc || sscanf(argv[0], "%u", &nr_mirrors) != 1 ||
eb69aca5d   Heinz Mauelshagen   dm kcopyd: clean ...
996
  	    nr_mirrors < 2 || nr_mirrors > DM_KCOPYD_MAX_REGIONS + 1) {
72d948616   Alasdair G Kergon   [PATCH] dm: impro...
997
  		ti->error = "Invalid number of mirrors";
416cd17b1   Heinz Mauelshagen   dm log: clean int...
998
  		dm_dirty_log_destroy(dl);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
999
1000
1001
1002
  		return -EINVAL;
  	}
  
  	argv++, argc--;
a8e6afa23   Jonathan E Brassow   dm raid1: add han...
1003
1004
  	if (argc < nr_mirrors * 2) {
  		ti->error = "Too few mirror arguments";
416cd17b1   Heinz Mauelshagen   dm log: clean int...
1005
  		dm_dirty_log_destroy(dl);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1006
1007
1008
1009
1010
  		return -EINVAL;
  	}
  
  	ms = alloc_context(nr_mirrors, dl->type->get_region_size(dl), ti, dl);
  	if (!ms) {
416cd17b1   Heinz Mauelshagen   dm log: clean int...
1011
  		dm_dirty_log_destroy(dl);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
  		return -ENOMEM;
  	}
  
  	/* Get the mirror parameter sets */
  	for (m = 0; m < nr_mirrors; m++) {
  		r = get_mirror(ms, ti, m, argv);
  		if (r) {
  			free_context(ms, ti, m);
  			return r;
  		}
  		argv += 2;
  		argc -= 2;
  	}
  
  	ti->private = ms;
1f965b194   Heinz Mauelshagen   dm raid1: separat...
1027
  	ti->split_io = dm_rh_get_region_size(ms->rh);
4184153f9   Mikulas Patocka   dm raid1: support...
1028
  	ti->num_flush_requests = 1;
5fc2ffeab   Mike Snitzer   dm raid1: support...
1029
  	ti->num_discard_requests = 1;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1030

9c4376de9   Tejun Heo   dm: use non reent...
1031
1032
  	ms->kmirrord_wq = alloc_workqueue("kmirrord",
  					  WQ_NON_REENTRANT | WQ_MEM_RECLAIM, 0);
6ad36fe2b   Holger Smolinski   dm raid1: one kmi...
1033
1034
  	if (!ms->kmirrord_wq) {
  		DMERR("couldn't start kmirrord");
a72cf737e   Dmitry Monakhov   dm raid1: fix lea...
1035
1036
  		r = -ENOMEM;
  		goto err_free_context;
6ad36fe2b   Holger Smolinski   dm raid1: one kmi...
1037
1038
  	}
  	INIT_WORK(&ms->kmirrord_work, do_mirror);
a2aebe03b   Mikulas Patocka   dm raid1: use timer
1039
1040
  	init_timer(&ms->timer);
  	ms->timer_pending = 0;
72f4b3141   Jonathan Brassow   dm raid1: handle ...
1041
  	INIT_WORK(&ms->trigger_event, trigger_event);
6ad36fe2b   Holger Smolinski   dm raid1: one kmi...
1042

a8e6afa23   Jonathan E Brassow   dm raid1: add han...
1043
  	r = parse_features(ms, argc, argv, &args_used);
a72cf737e   Dmitry Monakhov   dm raid1: fix lea...
1044
1045
  	if (r)
  		goto err_destroy_wq;
a8e6afa23   Jonathan E Brassow   dm raid1: add han...
1046
1047
1048
  
  	argv += args_used;
  	argc -= args_used;
f44db678e   Jonathan Brassow   dm raid1: handle ...
1049
1050
1051
1052
1053
1054
1055
1056
  	/*
  	 * Any read-balancing addition depends on the
  	 * DM_RAID1_HANDLE_ERRORS flag being present.
  	 * This is because the decision to balance depends
  	 * on the sync state of a region.  If the above
  	 * flag is not present, we ignore errors; and
  	 * the sync state may be inaccurate.
  	 */
a8e6afa23   Jonathan E Brassow   dm raid1: add han...
1057
1058
  	if (argc) {
  		ti->error = "Too many mirror arguments";
a72cf737e   Dmitry Monakhov   dm raid1: fix lea...
1059
1060
  		r = -EINVAL;
  		goto err_destroy_wq;
a8e6afa23   Jonathan E Brassow   dm raid1: add han...
1061
  	}
fa34ce730   Mikulas Patocka   dm kcopyd: return...
1062
1063
1064
  	ms->kcopyd_client = dm_kcopyd_client_create();
  	if (IS_ERR(ms->kcopyd_client)) {
  		r = PTR_ERR(ms->kcopyd_client);
a72cf737e   Dmitry Monakhov   dm raid1: fix lea...
1065
  		goto err_destroy_wq;
fa34ce730   Mikulas Patocka   dm kcopyd: return...
1066
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1067

1f965b194   Heinz Mauelshagen   dm raid1: separat...
1068
  	wakeup_mirrord(ms);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1069
  	return 0;
a72cf737e   Dmitry Monakhov   dm raid1: fix lea...
1070
1071
1072
1073
1074
1075
  
  err_destroy_wq:
  	destroy_workqueue(ms->kmirrord_wq);
  err_free_context:
  	free_context(ms, ti, ms->nr_mirrors);
  	return r;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1076
1077
1078
1079
1080
  }
  
  static void mirror_dtr(struct dm_target *ti)
  {
  	struct mirror_set *ms = (struct mirror_set *) ti->private;
a2aebe03b   Mikulas Patocka   dm raid1: use timer
1081
  	del_timer_sync(&ms->timer);
6ad36fe2b   Holger Smolinski   dm raid1: one kmi...
1082
  	flush_workqueue(ms->kmirrord_wq);
d5ffa387e   Tejun Heo   dm: dont use flus...
1083
  	flush_work_sync(&ms->trigger_event);
eb69aca5d   Heinz Mauelshagen   dm kcopyd: clean ...
1084
  	dm_kcopyd_client_destroy(ms->kcopyd_client);
6ad36fe2b   Holger Smolinski   dm raid1: one kmi...
1085
  	destroy_workqueue(ms->kmirrord_wq);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1086
1087
  	free_context(ms, ti, ms->nr_mirrors);
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1088
1089
1090
1091
1092
1093
1094
1095
1096
  /*
   * Mirror mapping function
   */
  static int mirror_map(struct dm_target *ti, struct bio *bio,
  		      union map_info *map_context)
  {
  	int r, rw = bio_rw(bio);
  	struct mirror *m;
  	struct mirror_set *ms = ti->private;
06386bbfd   Jonathan Brassow   dm raid1: handle ...
1097
  	struct dm_raid1_read_record *read_record = NULL;
1f965b194   Heinz Mauelshagen   dm raid1: separat...
1098
  	struct dm_dirty_log *log = dm_rh_dirty_log(ms->rh);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1099
1100
  
  	if (rw == WRITE) {
06386bbfd   Jonathan Brassow   dm raid1: handle ...
1101
  		/* Save region for mirror_end_io() handler */
1f965b194   Heinz Mauelshagen   dm raid1: separat...
1102
  		map_context->ll = dm_rh_bio_to_region(ms->rh, bio);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1103
  		queue_bio(ms, bio, rw);
d2a7ad29a   Kiyoshi Ueda   [PATCH] dm: map a...
1104
  		return DM_MAPIO_SUBMITTED;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1105
  	}
1f965b194   Heinz Mauelshagen   dm raid1: separat...
1106
  	r = log->type->in_sync(log, dm_rh_bio_to_region(ms->rh, bio), 0);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1107
1108
  	if (r < 0 && r != -EWOULDBLOCK)
  		return r;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1109
  	/*
06386bbfd   Jonathan Brassow   dm raid1: handle ...
1110
  	 * If region is not in-sync queue the bio.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1111
  	 */
06386bbfd   Jonathan Brassow   dm raid1: handle ...
1112
1113
1114
  	if (!r || (r == -EWOULDBLOCK)) {
  		if (rw == READA)
  			return -EWOULDBLOCK;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1115

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1116
  		queue_bio(ms, bio, rw);
d2a7ad29a   Kiyoshi Ueda   [PATCH] dm: map a...
1117
  		return DM_MAPIO_SUBMITTED;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1118
  	}
06386bbfd   Jonathan Brassow   dm raid1: handle ...
1119
1120
1121
1122
  	/*
  	 * The region is in-sync and we can perform reads directly.
  	 * Store enough information so we can retry if it fails.
  	 */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1123
  	m = choose_mirror(ms, bio->bi_sector);
06386bbfd   Jonathan Brassow   dm raid1: handle ...
1124
  	if (unlikely(!m))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1125
  		return -EIO;
06386bbfd   Jonathan Brassow   dm raid1: handle ...
1126
1127
1128
1129
1130
1131
1132
1133
  	read_record = mempool_alloc(ms->read_record_pool, GFP_NOIO);
  	if (likely(read_record)) {
  		dm_bio_record(&read_record->details, bio);
  		map_context->ptr = read_record;
  		read_record->m = m;
  	}
  
  	map_bio(m, bio);
d2a7ad29a   Kiyoshi Ueda   [PATCH] dm: map a...
1134
  	return DM_MAPIO_REMAPPED;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1135
1136
1137
1138
1139
1140
1141
  }
  
  static int mirror_end_io(struct dm_target *ti, struct bio *bio,
  			 int error, union map_info *map_context)
  {
  	int rw = bio_rw(bio);
  	struct mirror_set *ms = (struct mirror_set *) ti->private;
06386bbfd   Jonathan Brassow   dm raid1: handle ...
1142
1143
1144
  	struct mirror *m = NULL;
  	struct dm_bio_details *bd = NULL;
  	struct dm_raid1_read_record *read_record = map_context->ptr;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1145
1146
1147
1148
  
  	/*
  	 * We need to dec pending if this was a write.
  	 */
06386bbfd   Jonathan Brassow   dm raid1: handle ...
1149
  	if (rw == WRITE) {
d87f4c14f   Tejun Heo   dm: implement REQ...
1150
  		if (!(bio->bi_rw & REQ_FLUSH))
4184153f9   Mikulas Patocka   dm raid1: support...
1151
  			dm_rh_dec(ms->rh, map_context->ll);
06386bbfd   Jonathan Brassow   dm raid1: handle ...
1152
1153
  		return error;
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1154

06386bbfd   Jonathan Brassow   dm raid1: handle ...
1155
1156
  	if (error == -EOPNOTSUPP)
  		goto out;
7b6d91dae   Christoph Hellwig   block: unify flag...
1157
  	if ((error == -EWOULDBLOCK) && (bio->bi_rw & REQ_RAHEAD))
06386bbfd   Jonathan Brassow   dm raid1: handle ...
1158
1159
1160
1161
1162
1163
1164
1165
1166
  		goto out;
  
  	if (unlikely(error)) {
  		if (!read_record) {
  			/*
  			 * There wasn't enough memory to record necessary
  			 * information for a retry or there was no other
  			 * mirror in-sync.
  			 */
e03f1a842   Adrian Bunk   dm-raid1.c: fix N...
1167
  			DMERR_LIMIT("Mirror read failed.");
06386bbfd   Jonathan Brassow   dm raid1: handle ...
1168
1169
  			return -EIO;
  		}
e03f1a842   Adrian Bunk   dm-raid1.c: fix N...
1170
1171
  
  		m = read_record->m;
06386bbfd   Jonathan Brassow   dm raid1: handle ...
1172
1173
  		DMERR("Mirror read failed from %s. Trying alternative device.",
  		      m->dev->name);
06386bbfd   Jonathan Brassow   dm raid1: handle ...
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
  		fail_mirror(m, DM_RAID1_READ_ERROR);
  
  		/*
  		 * A failed read is requeued for another attempt using an intact
  		 * mirror.
  		 */
  		if (default_ok(m) || mirror_available(ms, bio)) {
  			bd = &read_record->details;
  
  			dm_bio_restore(bd, bio);
  			mempool_free(read_record, ms->read_record_pool);
  			map_context->ptr = NULL;
  			queue_bio(ms, bio, rw);
  			return 1;
  		}
  		DMERR("All replicated volumes dead, failing I/O");
  	}
  
  out:
  	if (read_record) {
  		mempool_free(read_record, ms->read_record_pool);
  		map_context->ptr = NULL;
  	}
  
  	return error;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1199
  }
b80aa7a0c   Jonathan Brassow   dm raid1: fix EIO...
1200
  static void mirror_presuspend(struct dm_target *ti)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1201
1202
  {
  	struct mirror_set *ms = (struct mirror_set *) ti->private;
1f965b194   Heinz Mauelshagen   dm raid1: separat...
1203
  	struct dm_dirty_log *log = dm_rh_dirty_log(ms->rh);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1204

047885076   Mikulas Patocka   dm raid1: add fra...
1205
1206
  	struct bio_list holds;
  	struct bio *bio;
b80aa7a0c   Jonathan Brassow   dm raid1: fix EIO...
1207
1208
1209
  	atomic_set(&ms->suspend, 1);
  
  	/*
f07030409   Takahiro Yasui   dm raid1: fix dea...
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
  	 * Process bios in the hold list to start recovery waiting
  	 * for bios in the hold list. After the process, no bio has
  	 * a chance to be added in the hold list because ms->suspend
  	 * is set.
  	 */
  	spin_lock_irq(&ms->lock);
  	holds = ms->holds;
  	bio_list_init(&ms->holds);
  	spin_unlock_irq(&ms->lock);
  
  	while ((bio = bio_list_pop(&holds)))
  		hold_bio(ms, bio);
  
  	/*
b80aa7a0c   Jonathan Brassow   dm raid1: fix EIO...
1224
1225
1226
  	 * We must finish up all the work that we've
  	 * generated (i.e. recovery work).
  	 */
1f965b194   Heinz Mauelshagen   dm raid1: separat...
1227
  	dm_rh_stop_recovery(ms->rh);
33184048d   Jonathan E Brassow   [PATCH] dm: raid1...
1228

33184048d   Jonathan E Brassow   [PATCH] dm: raid1...
1229
  	wait_event(_kmirrord_recovery_stopped,
1f965b194   Heinz Mauelshagen   dm raid1: separat...
1230
  		   !dm_rh_recovery_in_flight(ms->rh));
33184048d   Jonathan E Brassow   [PATCH] dm: raid1...
1231

b80aa7a0c   Jonathan Brassow   dm raid1: fix EIO...
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
  	if (log->type->presuspend && log->type->presuspend(log))
  		/* FIXME: need better error handling */
  		DMWARN("log presuspend failed");
  
  	/*
  	 * Now that recovery is complete/stopped and the
  	 * delayed bios are queued, we need to wait for
  	 * the worker thread to complete.  This way,
  	 * we know that all of our I/O has been pushed.
  	 */
  	flush_workqueue(ms->kmirrord_wq);
  }
  
  static void mirror_postsuspend(struct dm_target *ti)
  {
  	struct mirror_set *ms = ti->private;
1f965b194   Heinz Mauelshagen   dm raid1: separat...
1248
  	struct dm_dirty_log *log = dm_rh_dirty_log(ms->rh);
b80aa7a0c   Jonathan Brassow   dm raid1: fix EIO...
1249

6b3df0d7a   Jonathan Brassow   dm log: split sus...
1250
  	if (log->type->postsuspend && log->type->postsuspend(log))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1251
  		/* FIXME: need better error handling */
b80aa7a0c   Jonathan Brassow   dm raid1: fix EIO...
1252
  		DMWARN("log postsuspend failed");
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1253
1254
1255
1256
  }
  
  static void mirror_resume(struct dm_target *ti)
  {
b80aa7a0c   Jonathan Brassow   dm raid1: fix EIO...
1257
  	struct mirror_set *ms = ti->private;
1f965b194   Heinz Mauelshagen   dm raid1: separat...
1258
  	struct dm_dirty_log *log = dm_rh_dirty_log(ms->rh);
b80aa7a0c   Jonathan Brassow   dm raid1: fix EIO...
1259
1260
  
  	atomic_set(&ms->suspend, 0);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1261
1262
1263
  	if (log->type->resume && log->type->resume(log))
  		/* FIXME: need better error handling */
  		DMWARN("log resume failed");
1f965b194   Heinz Mauelshagen   dm raid1: separat...
1264
  	dm_rh_start_recovery(ms->rh);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1265
  }
af195ac82   Jonathan Brassow   dm raid1: report ...
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
  /*
   * device_status_char
   * @m: mirror device/leg we want the status of
   *
   * We return one character representing the most severe error
   * we have encountered.
   *    A => Alive - No failures
   *    D => Dead - A write failure occurred leaving mirror out-of-sync
   *    S => Sync - A sychronization failure occurred, mirror out-of-sync
   *    R => Read - A read failure occurred, mirror data unaffected
   *
   * Returns: <char>
   */
  static char device_status_char(struct mirror *m)
  {
  	if (!atomic_read(&(m->error_count)))
  		return 'A';
64b30c46e   Mikulas Patocka   dm raid1: report ...
1283
1284
  	return (test_bit(DM_RAID1_FLUSH_ERROR, &(m->error_type))) ? 'F' :
  		(test_bit(DM_RAID1_WRITE_ERROR, &(m->error_type))) ? 'D' :
af195ac82   Jonathan Brassow   dm raid1: report ...
1285
1286
1287
  		(test_bit(DM_RAID1_SYNC_ERROR, &(m->error_type))) ? 'S' :
  		(test_bit(DM_RAID1_READ_ERROR, &(m->error_type))) ? 'R' : 'U';
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1288
1289
1290
  static int mirror_status(struct dm_target *ti, status_type_t type,
  			 char *result, unsigned int maxlen)
  {
315dcc226   Jonathan E Brassow   dm log: report fa...
1291
  	unsigned int m, sz = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1292
  	struct mirror_set *ms = (struct mirror_set *) ti->private;
1f965b194   Heinz Mauelshagen   dm raid1: separat...
1293
  	struct dm_dirty_log *log = dm_rh_dirty_log(ms->rh);
af195ac82   Jonathan Brassow   dm raid1: report ...
1294
  	char buffer[ms->nr_mirrors + 1];
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1295

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1296
1297
1298
  	switch (type) {
  	case STATUSTYPE_INFO:
  		DMEMIT("%d ", ms->nr_mirrors);
af195ac82   Jonathan Brassow   dm raid1: report ...
1299
  		for (m = 0; m < ms->nr_mirrors; m++) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1300
  			DMEMIT("%s ", ms->mirror[m].dev->name);
af195ac82   Jonathan Brassow   dm raid1: report ...
1301
1302
1303
  			buffer[m] = device_status_char(&(ms->mirror[m]));
  		}
  		buffer[m] = '\0';
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1304

af195ac82   Jonathan Brassow   dm raid1: report ...
1305
  		DMEMIT("%llu/%llu 1 %s ",
1f965b194   Heinz Mauelshagen   dm raid1: separat...
1306
  		      (unsigned long long)log->type->get_sync_count(log),
af195ac82   Jonathan Brassow   dm raid1: report ...
1307
  		      (unsigned long long)ms->nr_regions, buffer);
315dcc226   Jonathan E Brassow   dm log: report fa...
1308

1f965b194   Heinz Mauelshagen   dm raid1: separat...
1309
  		sz += log->type->status(log, type, result+sz, maxlen-sz);
315dcc226   Jonathan E Brassow   dm log: report fa...
1310

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1311
1312
1313
  		break;
  
  	case STATUSTYPE_TABLE:
1f965b194   Heinz Mauelshagen   dm raid1: separat...
1314
  		sz = log->type->status(log, type, result, maxlen);
315dcc226   Jonathan E Brassow   dm log: report fa...
1315

e52b8f6db   Jonathan Brassow   [PATCH] dm mirror...
1316
  		DMEMIT("%d", ms->nr_mirrors);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1317
  		for (m = 0; m < ms->nr_mirrors; m++)
e52b8f6db   Jonathan Brassow   [PATCH] dm mirror...
1318
  			DMEMIT(" %s %llu", ms->mirror[m].dev->name,
b80aa7a0c   Jonathan Brassow   dm raid1: fix EIO...
1319
  			       (unsigned long long)ms->mirror[m].offset);
a8e6afa23   Jonathan E Brassow   dm raid1: add han...
1320
1321
1322
  
  		if (ms->features & DM_RAID1_HANDLE_ERRORS)
  			DMEMIT(" 1 handle_errors");
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1323
1324
1325
1326
  	}
  
  	return 0;
  }
af4874e03   Mike Snitzer   dm target:s intro...
1327
1328
1329
1330
1331
1332
1333
1334
1335
  static int mirror_iterate_devices(struct dm_target *ti,
  				  iterate_devices_callout_fn fn, void *data)
  {
  	struct mirror_set *ms = ti->private;
  	int ret = 0;
  	unsigned i;
  
  	for (i = 0; !ret && i < ms->nr_mirrors; i++)
  		ret = fn(ti, ms->mirror[i].dev,
5dea271b6   Mike Snitzer   dm table: pass co...
1336
  			 ms->mirror[i].offset, ti->len, data);
af4874e03   Mike Snitzer   dm target:s intro...
1337
1338
1339
  
  	return ret;
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1340
1341
  static struct target_type mirror_target = {
  	.name	 = "mirror",
9c4376de9   Tejun Heo   dm: use non reent...
1342
  	.version = {1, 12, 1},
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1343
1344
1345
1346
1347
  	.module	 = THIS_MODULE,
  	.ctr	 = mirror_ctr,
  	.dtr	 = mirror_dtr,
  	.map	 = mirror_map,
  	.end_io	 = mirror_end_io,
b80aa7a0c   Jonathan Brassow   dm raid1: fix EIO...
1348
  	.presuspend = mirror_presuspend,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1349
1350
1351
  	.postsuspend = mirror_postsuspend,
  	.resume	 = mirror_resume,
  	.status	 = mirror_status,
af4874e03   Mike Snitzer   dm target:s intro...
1352
  	.iterate_devices = mirror_iterate_devices,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1353
1354
1355
1356
1357
  };
  
  static int __init dm_mirror_init(void)
  {
  	int r;
95f8fac8d   Mikulas Patocka   dm raid1: switch ...
1358
1359
1360
1361
1362
1363
  	_dm_raid1_read_record_cache = KMEM_CACHE(dm_raid1_read_record, 0);
  	if (!_dm_raid1_read_record_cache) {
  		DMERR("Can't allocate dm_raid1_read_record cache");
  		r = -ENOMEM;
  		goto bad_cache;
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1364
  	r = dm_register_target(&mirror_target);
95f8fac8d   Mikulas Patocka   dm raid1: switch ...
1365
  	if (r < 0) {
0cd331243   Alasdair G Kergon   dm: remove duplic...
1366
  		DMERR("Failed to register mirror target");
95f8fac8d   Mikulas Patocka   dm raid1: switch ...
1367
1368
1369
1370
  		goto bad_target;
  	}
  
  	return 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1371

95f8fac8d   Mikulas Patocka   dm raid1: switch ...
1372
1373
1374
  bad_target:
  	kmem_cache_destroy(_dm_raid1_read_record_cache);
  bad_cache:
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1375
1376
1377
1378
1379
  	return r;
  }
  
  static void __exit dm_mirror_exit(void)
  {
10d3bd09a   Mikulas Patocka   dm: consolidate t...
1380
  	dm_unregister_target(&mirror_target);
95f8fac8d   Mikulas Patocka   dm raid1: switch ...
1381
  	kmem_cache_destroy(_dm_raid1_read_record_cache);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1382
1383
1384
1385
1386
1387
1388
1389
1390
  }
  
  /* Module hooks */
  module_init(dm_mirror_init);
  module_exit(dm_mirror_exit);
  
  MODULE_DESCRIPTION(DM_NAME " mirror target");
  MODULE_AUTHOR("Joe Thornber");
  MODULE_LICENSE("GPL");