Blame view

drivers/md/raid1.c 76.9 KB
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1
2
3
4
5
6
7
8
9
10
11
  /*
   * raid1.c : Multiple Devices driver for Linux
   *
   * Copyright (C) 1999, 2000, 2001 Ingo Molnar, Red Hat
   *
   * Copyright (C) 1996, 1997, 1998 Ingo Molnar, Miguel de Icaza, Gadi Oxman
   *
   * RAID-1 management functions.
   *
   * Better read-balancing code written by Mika Kuoppala <miku@iki.fi>, 2000
   *
96de0e252   Jan Engelhardt   Convert files to ...
12
   * Fixes to reconstruction by Jakob Østergaard" <jakob@ostenfeld.dk>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
13
14
   * Various fixes by Neil Brown <neilb@cse.unsw.edu.au>
   *
191ea9b2c   NeilBrown   [PATCH] md: raid1...
15
16
17
18
19
20
21
22
23
   * Changes by Peter T. Breuer <ptb@it.uc3m.es> 31/1/2003 to support
   * bitmapped intelligence in resync:
   *
   *      - bitmap marked during normal i/o
   *      - bitmap used to skip nondirty blocks during sync
   *
   * Additions to bitmap code, (C) 2003-2004 Paul Clements, SteelEye Technology:
   * - persistent bitmap code
   *
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
24
25
26
27
28
29
30
31
32
   * This program is free software; you can redistribute it and/or modify
   * it under the terms of the GNU General Public License as published by
   * the Free Software Foundation; either version 2, or (at your option)
   * any later version.
   *
   * You should have received a copy of the GNU General Public License
   * (for example /usr/src/linux/COPYING); if not, write to the Free
   * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
   */
5a0e3ad6a   Tejun Heo   include cleanup: ...
33
  #include <linux/slab.h>
255707274   Stephen Rothwell   md: build failure...
34
  #include <linux/delay.h>
bff61975b   NeilBrown   md: move lots of ...
35
  #include <linux/blkdev.h>
056075c76   Paul Gortmaker   md: Add module.h ...
36
  #include <linux/module.h>
bff61975b   NeilBrown   md: move lots of ...
37
  #include <linux/seq_file.h>
8bda470e8   Christian Dietrich   md/raid: use prin...
38
  #include <linux/ratelimit.h>
43b2e5d86   NeilBrown   md: move md_k.h f...
39
  #include "md.h"
ef740c372   Christoph Hellwig   md: move headers ...
40
41
  #include "raid1.h"
  #include "bitmap.h"
191ea9b2c   NeilBrown   [PATCH] md: raid1...
42

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
43
44
45
46
  /*
   * Number of guaranteed r1bios in case of extreme VM load:
   */
  #define	NR_RAID1_BIOS 256
34db0cd60   NeilBrown   md: add proper wr...
47
48
49
50
51
  /* When there are this many requests queue to be written by
   * the raid1 thread, we become 'congested' to provide back-pressure
   * for writeback.
   */
  static int max_queued_requests = 1024;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
52

e80963604   NeilBrown   md/raid1: typedef...
53
54
  static void allow_barrier(struct r1conf *conf);
  static void lower_barrier(struct r1conf *conf);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
55

dd0fc66fb   Al Viro   [PATCH] gfp flags...
56
  static void * r1bio_pool_alloc(gfp_t gfp_flags, void *data)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
57
58
  {
  	struct pool_info *pi = data;
9f2c9d12b   NeilBrown   md: remove typede...
59
  	int size = offsetof(struct r1bio, bios[pi->raid_disks]);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
60
61
  
  	/* allocate a r1bio with room for raid_disks entries in the bios array */
7eaceacca   Jens Axboe   block: remove per...
62
  	return kzalloc(size, gfp_flags);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
63
64
65
66
67
68
69
70
71
72
73
74
  }
  
  static void r1bio_pool_free(void *r1_bio, void *data)
  {
  	kfree(r1_bio);
  }
  
  #define RESYNC_BLOCK_SIZE (64*1024)
  //#define RESYNC_BLOCK_SIZE PAGE_SIZE
  #define RESYNC_SECTORS (RESYNC_BLOCK_SIZE >> 9)
  #define RESYNC_PAGES ((RESYNC_BLOCK_SIZE + PAGE_SIZE-1) / PAGE_SIZE)
  #define RESYNC_WINDOW (2048*1024)
dd0fc66fb   Al Viro   [PATCH] gfp flags...
75
  static void * r1buf_pool_alloc(gfp_t gfp_flags, void *data)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
76
77
78
  {
  	struct pool_info *pi = data;
  	struct page *page;
9f2c9d12b   NeilBrown   md: remove typede...
79
  	struct r1bio *r1_bio;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
80
81
82
83
  	struct bio *bio;
  	int i, j;
  
  	r1_bio = r1bio_pool_alloc(gfp_flags, pi);
7eaceacca   Jens Axboe   block: remove per...
84
  	if (!r1_bio)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
85
  		return NULL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
86
87
88
89
90
  
  	/*
  	 * Allocate bios : 1 for reading, n-1 for writing
  	 */
  	for (j = pi->raid_disks ; j-- ; ) {
6746557f0   NeilBrown   md: use bio_kmall...
91
  		bio = bio_kmalloc(gfp_flags, RESYNC_PAGES);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
92
93
94
95
96
97
  		if (!bio)
  			goto out_free_bio;
  		r1_bio->bios[j] = bio;
  	}
  	/*
  	 * Allocate RESYNC_PAGES data pages and attach them to
d11c171e6   NeilBrown   [PATCH] md: allow...
98
99
100
  	 * the first bio.
  	 * If this is a user-requested check/repair, allocate
  	 * RESYNC_PAGES for each bio.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
101
  	 */
d11c171e6   NeilBrown   [PATCH] md: allow...
102
103
104
105
106
107
108
109
110
111
112
113
  	if (test_bit(MD_RECOVERY_REQUESTED, &pi->mddev->recovery))
  		j = pi->raid_disks;
  	else
  		j = 1;
  	while(j--) {
  		bio = r1_bio->bios[j];
  		for (i = 0; i < RESYNC_PAGES; i++) {
  			page = alloc_page(gfp_flags);
  			if (unlikely(!page))
  				goto out_free_pages;
  
  			bio->bi_io_vec[i].bv_page = page;
303a0e11d   NeilBrown   md/raid1 - don't ...
114
  			bio->bi_vcnt = i+1;
d11c171e6   NeilBrown   [PATCH] md: allow...
115
116
117
118
119
120
121
122
  		}
  	}
  	/* If not user-requests, copy the page pointers to all bios */
  	if (!test_bit(MD_RECOVERY_REQUESTED, &pi->mddev->recovery)) {
  		for (i=0; i<RESYNC_PAGES ; i++)
  			for (j=1; j<pi->raid_disks; j++)
  				r1_bio->bios[j]->bi_io_vec[i].bv_page =
  					r1_bio->bios[0]->bi_io_vec[i].bv_page;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
123
124
125
126
127
128
129
  	}
  
  	r1_bio->master_bio = NULL;
  
  	return r1_bio;
  
  out_free_pages:
303a0e11d   NeilBrown   md/raid1 - don't ...
130
131
132
  	for (j=0 ; j < pi->raid_disks; j++)
  		for (i=0; i < r1_bio->bios[j]->bi_vcnt ; i++)
  			put_page(r1_bio->bios[j]->bi_io_vec[i].bv_page);
d11c171e6   NeilBrown   [PATCH] md: allow...
133
  	j = -1;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
134
  out_free_bio:
8f19ccb2f   NeilBrown   md/raid1: Allocat...
135
  	while (++j < pi->raid_disks)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
136
137
138
139
140
141
142
143
  		bio_put(r1_bio->bios[j]);
  	r1bio_pool_free(r1_bio, data);
  	return NULL;
  }
  
  static void r1buf_pool_free(void *__r1_bio, void *data)
  {
  	struct pool_info *pi = data;
d11c171e6   NeilBrown   [PATCH] md: allow...
144
  	int i,j;
9f2c9d12b   NeilBrown   md: remove typede...
145
  	struct r1bio *r1bio = __r1_bio;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
146

d11c171e6   NeilBrown   [PATCH] md: allow...
147
148
149
150
151
  	for (i = 0; i < RESYNC_PAGES; i++)
  		for (j = pi->raid_disks; j-- ;) {
  			if (j == 0 ||
  			    r1bio->bios[j]->bi_io_vec[i].bv_page !=
  			    r1bio->bios[0]->bi_io_vec[i].bv_page)
1345b1d8a   NeilBrown   [PATCH] md: defin...
152
  				safe_put_page(r1bio->bios[j]->bi_io_vec[i].bv_page);
d11c171e6   NeilBrown   [PATCH] md: allow...
153
  		}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
154
155
156
157
158
  	for (i=0 ; i < pi->raid_disks; i++)
  		bio_put(r1bio->bios[i]);
  
  	r1bio_pool_free(r1bio, data);
  }
e80963604   NeilBrown   md/raid1: typedef...
159
  static void put_all_bios(struct r1conf *conf, struct r1bio *r1_bio)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
160
161
  {
  	int i;
8f19ccb2f   NeilBrown   md/raid1: Allocat...
162
  	for (i = 0; i < conf->raid_disks * 2; i++) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
163
  		struct bio **bio = r1_bio->bios + i;
4367af556   NeilBrown   md/raid1: clear b...
164
  		if (!BIO_SPECIAL(*bio))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
165
166
167
168
  			bio_put(*bio);
  		*bio = NULL;
  	}
  }
9f2c9d12b   NeilBrown   md: remove typede...
169
  static void free_r1bio(struct r1bio *r1_bio)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
170
  {
e80963604   NeilBrown   md/raid1: typedef...
171
  	struct r1conf *conf = r1_bio->mddev->private;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
172

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
173
174
175
  	put_all_bios(conf, r1_bio);
  	mempool_free(r1_bio, conf->r1bio_pool);
  }
9f2c9d12b   NeilBrown   md: remove typede...
176
  static void put_buf(struct r1bio *r1_bio)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
177
  {
e80963604   NeilBrown   md/raid1: typedef...
178
  	struct r1conf *conf = r1_bio->mddev->private;
3e198f782   NeilBrown   [PATCH] md: tidyu...
179
  	int i;
8f19ccb2f   NeilBrown   md/raid1: Allocat...
180
  	for (i = 0; i < conf->raid_disks * 2; i++) {
3e198f782   NeilBrown   [PATCH] md: tidyu...
181
182
183
184
  		struct bio *bio = r1_bio->bios[i];
  		if (bio->bi_end_io)
  			rdev_dec_pending(conf->mirrors[i].rdev, r1_bio->mddev);
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
185
186
  
  	mempool_free(r1_bio, conf->r1buf_pool);
17999be4a   NeilBrown   [PATCH] md: impro...
187
  	lower_barrier(conf);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
188
  }
9f2c9d12b   NeilBrown   md: remove typede...
189
  static void reschedule_retry(struct r1bio *r1_bio)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
190
191
  {
  	unsigned long flags;
fd01b88c7   NeilBrown   md: remove typede...
192
  	struct mddev *mddev = r1_bio->mddev;
e80963604   NeilBrown   md/raid1: typedef...
193
  	struct r1conf *conf = mddev->private;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
194
195
196
  
  	spin_lock_irqsave(&conf->device_lock, flags);
  	list_add(&r1_bio->retry_list, &conf->retry_list);
ddaf22aba   NeilBrown   [PATCH] md: attem...
197
  	conf->nr_queued ++;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
198
  	spin_unlock_irqrestore(&conf->device_lock, flags);
17999be4a   NeilBrown   [PATCH] md: impro...
199
  	wake_up(&conf->wait_barrier);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
200
201
202
203
204
205
206
207
  	md_wakeup_thread(mddev->thread);
  }
  
  /*
   * raid_end_bio_io() is called when we have finished servicing a mirrored
   * operation and are ready to return a success/failure code to the buffer
   * cache layer.
   */
9f2c9d12b   NeilBrown   md: remove typede...
208
  static void call_bio_endio(struct r1bio *r1_bio)
d2eb35acf   NeilBrown   md/raid1: avoid r...
209
210
211
  {
  	struct bio *bio = r1_bio->master_bio;
  	int done;
e80963604   NeilBrown   md/raid1: typedef...
212
  	struct r1conf *conf = r1_bio->mddev->private;
d2eb35acf   NeilBrown   md/raid1: avoid r...
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
  
  	if (bio->bi_phys_segments) {
  		unsigned long flags;
  		spin_lock_irqsave(&conf->device_lock, flags);
  		bio->bi_phys_segments--;
  		done = (bio->bi_phys_segments == 0);
  		spin_unlock_irqrestore(&conf->device_lock, flags);
  	} else
  		done = 1;
  
  	if (!test_bit(R1BIO_Uptodate, &r1_bio->state))
  		clear_bit(BIO_UPTODATE, &bio->bi_flags);
  	if (done) {
  		bio_endio(bio, 0);
  		/*
  		 * Wake up any possible resync thread that waits for the device
  		 * to go idle.
  		 */
  		allow_barrier(conf);
  	}
  }
9f2c9d12b   NeilBrown   md: remove typede...
234
  static void raid_end_bio_io(struct r1bio *r1_bio)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
235
236
  {
  	struct bio *bio = r1_bio->master_bio;
4b6d287f6   NeilBrown   [PATCH] md: add w...
237
238
  	/* if nobody has done the final endio yet, do it now */
  	if (!test_and_set_bit(R1BIO_Returned, &r1_bio->state)) {
36a4e1fe0   NeilBrown   md: remove PRINTK...
239
240
241
242
243
244
  		pr_debug("raid1: sync end %s on sectors %llu-%llu
  ",
  			 (bio_data_dir(bio) == WRITE) ? "write" : "read",
  			 (unsigned long long) bio->bi_sector,
  			 (unsigned long long) bio->bi_sector +
  			 (bio->bi_size >> 9) - 1);
4b6d287f6   NeilBrown   [PATCH] md: add w...
245

d2eb35acf   NeilBrown   md/raid1: avoid r...
246
  		call_bio_endio(r1_bio);
4b6d287f6   NeilBrown   [PATCH] md: add w...
247
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
248
249
250
251
252
253
  	free_r1bio(r1_bio);
  }
  
  /*
   * Update disk head position estimator based on IRQ completion info.
   */
9f2c9d12b   NeilBrown   md: remove typede...
254
  static inline void update_head_pos(int disk, struct r1bio *r1_bio)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
255
  {
e80963604   NeilBrown   md/raid1: typedef...
256
  	struct r1conf *conf = r1_bio->mddev->private;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
257
258
259
260
  
  	conf->mirrors[disk].head_position =
  		r1_bio->sector + (r1_bio->sectors);
  }
ba3ae3bee   Namhyung Kim   md/raid1: factor ...
261
262
263
  /*
   * Find the disk number which triggered given bio
   */
9f2c9d12b   NeilBrown   md: remove typede...
264
  static int find_bio_disk(struct r1bio *r1_bio, struct bio *bio)
ba3ae3bee   Namhyung Kim   md/raid1: factor ...
265
266
  {
  	int mirror;
301946364   NeilBrown   md/raid1: Replac...
267
268
  	struct r1conf *conf = r1_bio->mddev->private;
  	int raid_disks = conf->raid_disks;
ba3ae3bee   Namhyung Kim   md/raid1: factor ...
269

8f19ccb2f   NeilBrown   md/raid1: Allocat...
270
  	for (mirror = 0; mirror < raid_disks * 2; mirror++)
ba3ae3bee   Namhyung Kim   md/raid1: factor ...
271
272
  		if (r1_bio->bios[mirror] == bio)
  			break;
8f19ccb2f   NeilBrown   md/raid1: Allocat...
273
  	BUG_ON(mirror == raid_disks * 2);
ba3ae3bee   Namhyung Kim   md/raid1: factor ...
274
275
276
277
  	update_head_pos(mirror, r1_bio);
  
  	return mirror;
  }
6712ecf8f   NeilBrown   Drop 'size' argum...
278
  static void raid1_end_read_request(struct bio *bio, int error)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
279
280
  {
  	int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
9f2c9d12b   NeilBrown   md: remove typede...
281
  	struct r1bio *r1_bio = bio->bi_private;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
282
  	int mirror;
e80963604   NeilBrown   md/raid1: typedef...
283
  	struct r1conf *conf = r1_bio->mddev->private;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
284

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
285
286
287
288
  	mirror = r1_bio->read_disk;
  	/*
  	 * this branch is our 'one mirror IO has finished' event handler:
  	 */
ddaf22aba   NeilBrown   [PATCH] md: attem...
289
  	update_head_pos(mirror, r1_bio);
dd00a99e7   NeilBrown   md: avoid a possi...
290
291
292
293
294
295
  	if (uptodate)
  		set_bit(R1BIO_Uptodate, &r1_bio->state);
  	else {
  		/* If all other devices have failed, we want to return
  		 * the error upwards rather than fail the last device.
  		 * Here we redefine "uptodate" to mean "Don't want to retry"
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
296
  		 */
dd00a99e7   NeilBrown   md: avoid a possi...
297
298
299
300
301
302
303
304
  		unsigned long flags;
  		spin_lock_irqsave(&conf->device_lock, flags);
  		if (r1_bio->mddev->degraded == conf->raid_disks ||
  		    (r1_bio->mddev->degraded == conf->raid_disks-1 &&
  		     !test_bit(Faulty, &conf->mirrors[mirror].rdev->flags)))
  			uptodate = 1;
  		spin_unlock_irqrestore(&conf->device_lock, flags);
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
305

dd00a99e7   NeilBrown   md: avoid a possi...
306
  	if (uptodate)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
307
  		raid_end_bio_io(r1_bio);
dd00a99e7   NeilBrown   md: avoid a possi...
308
  	else {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
309
310
311
312
  		/*
  		 * oops, read error:
  		 */
  		char b[BDEVNAME_SIZE];
8bda470e8   Christian Dietrich   md/raid: use prin...
313
314
315
316
317
318
319
320
  		printk_ratelimited(
  			KERN_ERR "md/raid1:%s: %s: "
  			"rescheduling sector %llu
  ",
  			mdname(conf->mddev),
  			bdevname(conf->mirrors[mirror].rdev->bdev,
  				 b),
  			(unsigned long long)r1_bio->sector);
d2eb35acf   NeilBrown   md/raid1: avoid r...
321
  		set_bit(R1BIO_ReadError, &r1_bio->state);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
322
323
324
325
  		reschedule_retry(r1_bio);
  	}
  
  	rdev_dec_pending(conf->mirrors[mirror].rdev, conf->mddev);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
326
  }
9f2c9d12b   NeilBrown   md: remove typede...
327
  static void close_write(struct r1bio *r1_bio)
cd5ff9a16   NeilBrown   md/raid1: Handle...
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
  {
  	/* it really is the end of this request */
  	if (test_bit(R1BIO_BehindIO, &r1_bio->state)) {
  		/* free extra copy of the data pages */
  		int i = r1_bio->behind_page_count;
  		while (i--)
  			safe_put_page(r1_bio->behind_bvecs[i].bv_page);
  		kfree(r1_bio->behind_bvecs);
  		r1_bio->behind_bvecs = NULL;
  	}
  	/* clear the bitmap if all writes complete successfully */
  	bitmap_endwrite(r1_bio->mddev->bitmap, r1_bio->sector,
  			r1_bio->sectors,
  			!test_bit(R1BIO_Degraded, &r1_bio->state),
  			test_bit(R1BIO_BehindIO, &r1_bio->state));
  	md_write_end(r1_bio->mddev);
  }
9f2c9d12b   NeilBrown   md: remove typede...
345
  static void r1_bio_write_done(struct r1bio *r1_bio)
4e78064f4   NeilBrown   md: Fix possible ...
346
  {
cd5ff9a16   NeilBrown   md/raid1: Handle...
347
348
349
350
351
352
353
  	if (!atomic_dec_and_test(&r1_bio->remaining))
  		return;
  
  	if (test_bit(R1BIO_WriteError, &r1_bio->state))
  		reschedule_retry(r1_bio);
  	else {
  		close_write(r1_bio);
4367af556   NeilBrown   md/raid1: clear b...
354
355
356
357
  		if (test_bit(R1BIO_MadeGood, &r1_bio->state))
  			reschedule_retry(r1_bio);
  		else
  			raid_end_bio_io(r1_bio);
4e78064f4   NeilBrown   md: Fix possible ...
358
359
  	}
  }
6712ecf8f   NeilBrown   Drop 'size' argum...
360
  static void raid1_end_write_request(struct bio *bio, int error)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
361
362
  {
  	int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
9f2c9d12b   NeilBrown   md: remove typede...
363
  	struct r1bio *r1_bio = bio->bi_private;
a9701a304   NeilBrown   [PATCH] md: suppo...
364
  	int mirror, behind = test_bit(R1BIO_BehindIO, &r1_bio->state);
e80963604   NeilBrown   md/raid1: typedef...
365
  	struct r1conf *conf = r1_bio->mddev->private;
04b857f74   NeilBrown   [PATCH] md: Fix s...
366
  	struct bio *to_put = NULL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
367

ba3ae3bee   Namhyung Kim   md/raid1: factor ...
368
  	mirror = find_bio_disk(r1_bio, bio);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
369

e9c7469bb   Tejun Heo   md: implment REQ_...
370
371
372
  	/*
  	 * 'one mirror IO has finished' event handler:
  	 */
e9c7469bb   Tejun Heo   md: implment REQ_...
373
  	if (!uptodate) {
cd5ff9a16   NeilBrown   md/raid1: Handle...
374
375
  		set_bit(WriteErrorSeen,
  			&conf->mirrors[mirror].rdev->flags);
19d671695   NeilBrown   md/raid1: Mark de...
376
377
378
379
  		if (!test_and_set_bit(WantReplacement,
  				      &conf->mirrors[mirror].rdev->flags))
  			set_bit(MD_RECOVERY_NEEDED, &
  				conf->mddev->recovery);
cd5ff9a16   NeilBrown   md/raid1: Handle...
380
  		set_bit(R1BIO_WriteError, &r1_bio->state);
4367af556   NeilBrown   md/raid1: clear b...
381
  	} else {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
382
  		/*
e9c7469bb   Tejun Heo   md: implment REQ_...
383
384
385
386
387
388
389
390
  		 * Set R1BIO_Uptodate in our master bio, so that we
  		 * will return a good error code for to the higher
  		 * levels even if IO on some other mirrored buffer
  		 * fails.
  		 *
  		 * The 'master' represents the composite IO operation
  		 * to user-side. So if something waits for IO, then it
  		 * will wait for the 'master' bio.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
391
  		 */
4367af556   NeilBrown   md/raid1: clear b...
392
393
  		sector_t first_bad;
  		int bad_sectors;
cd5ff9a16   NeilBrown   md/raid1: Handle...
394
395
  		r1_bio->bios[mirror] = NULL;
  		to_put = bio;
e9c7469bb   Tejun Heo   md: implment REQ_...
396
  		set_bit(R1BIO_Uptodate, &r1_bio->state);
4367af556   NeilBrown   md/raid1: clear b...
397
398
399
400
401
402
403
404
  		/* Maybe we can clear some bad blocks. */
  		if (is_badblock(conf->mirrors[mirror].rdev,
  				r1_bio->sector, r1_bio->sectors,
  				&first_bad, &bad_sectors)) {
  			r1_bio->bios[mirror] = IO_MADE_GOOD;
  			set_bit(R1BIO_MadeGood, &r1_bio->state);
  		}
  	}
e9c7469bb   Tejun Heo   md: implment REQ_...
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
  	if (behind) {
  		if (test_bit(WriteMostly, &conf->mirrors[mirror].rdev->flags))
  			atomic_dec(&r1_bio->behind_remaining);
  
  		/*
  		 * In behind mode, we ACK the master bio once the I/O
  		 * has safely reached all non-writemostly
  		 * disks. Setting the Returned bit ensures that this
  		 * gets done only once -- we don't ever want to return
  		 * -EIO here, instead we'll wait
  		 */
  		if (atomic_read(&r1_bio->behind_remaining) >= (atomic_read(&r1_bio->remaining)-1) &&
  		    test_bit(R1BIO_Uptodate, &r1_bio->state)) {
  			/* Maybe we can return now */
  			if (!test_and_set_bit(R1BIO_Returned, &r1_bio->state)) {
  				struct bio *mbio = r1_bio->master_bio;
36a4e1fe0   NeilBrown   md: remove PRINTK...
421
422
423
424
425
426
  				pr_debug("raid1: behind end write sectors"
  					 " %llu-%llu
  ",
  					 (unsigned long long) mbio->bi_sector,
  					 (unsigned long long) mbio->bi_sector +
  					 (mbio->bi_size >> 9) - 1);
d2eb35acf   NeilBrown   md/raid1: avoid r...
427
  				call_bio_endio(r1_bio);
4b6d287f6   NeilBrown   [PATCH] md: add w...
428
429
430
  			}
  		}
  	}
4367af556   NeilBrown   md/raid1: clear b...
431
432
433
  	if (r1_bio->bios[mirror] == NULL)
  		rdev_dec_pending(conf->mirrors[mirror].rdev,
  				 conf->mddev);
e9c7469bb   Tejun Heo   md: implment REQ_...
434

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
435
  	/*
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
436
437
438
  	 * Let's see if all mirrored write operations have finished
  	 * already.
  	 */
af6d7b760   NeilBrown   md/raid1: improve...
439
  	r1_bio_write_done(r1_bio);
c70810b32   NeilBrown   [PATCH] md: refor...
440

04b857f74   NeilBrown   [PATCH] md: Fix s...
441
442
  	if (to_put)
  		bio_put(to_put);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
  }
  
  
  /*
   * This routine returns the disk from which the requested read should
   * be done. There is a per-array 'next expected sequential IO' sector
   * number - if this matches on the next IO then we use the last disk.
   * There is also a per-disk 'last know head position' sector that is
   * maintained from IRQ contexts, both the normal and the resync IO
   * completion handlers update this position correctly. If there is no
   * perfect sequential match then we pick the disk whose head is closest.
   *
   * If there are 2 mirrors in the same 2 devices, performance degrades
   * because position is mirror, not device based.
   *
   * The rdev for the device selected will have nr_pending incremented.
   */
e80963604   NeilBrown   md/raid1: typedef...
460
  static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sectors)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
461
  {
af3a2cd6b   NeilBrown   md: Fix read bala...
462
  	const sector_t this_sector = r1_bio->sector;
d2eb35acf   NeilBrown   md/raid1: avoid r...
463
464
  	int sectors;
  	int best_good_sectors;
f3ac8bf7c   NeilBrown   md: tidy up devic...
465
  	int start_disk;
76073054c   NeilBrown   md/raid1: clean u...
466
  	int best_disk;
f3ac8bf7c   NeilBrown   md: tidy up devic...
467
  	int i;
76073054c   NeilBrown   md/raid1: clean u...
468
  	sector_t best_dist;
3cb030020   NeilBrown   md: removing type...
469
  	struct md_rdev *rdev;
f3ac8bf7c   NeilBrown   md: tidy up devic...
470
  	int choose_first;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
471
472
473
  
  	rcu_read_lock();
  	/*
8ddf9efe6   NeilBrown   [PATCH] md: suppo...
474
  	 * Check if we can balance. We can balance on the whole
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
475
476
477
478
  	 * device if no resync is going on, or below the resync window.
  	 * We take the first readable disk when above the resync window.
  	 */
   retry:
d2eb35acf   NeilBrown   md/raid1: avoid r...
479
  	sectors = r1_bio->sectors;
76073054c   NeilBrown   md/raid1: clean u...
480
481
  	best_disk = -1;
  	best_dist = MaxSector;
d2eb35acf   NeilBrown   md/raid1: avoid r...
482
  	best_good_sectors = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
483
484
  	if (conf->mddev->recovery_cp < MaxSector &&
  	    (this_sector + sectors >= conf->next_resync)) {
f3ac8bf7c   NeilBrown   md: tidy up devic...
485
486
487
488
489
  		choose_first = 1;
  		start_disk = 0;
  	} else {
  		choose_first = 0;
  		start_disk = conf->last_used;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
490
  	}
8f19ccb2f   NeilBrown   md/raid1: Allocat...
491
  	for (i = 0 ; i < conf->raid_disks * 2 ; i++) {
76073054c   NeilBrown   md/raid1: clean u...
492
  		sector_t dist;
d2eb35acf   NeilBrown   md/raid1: avoid r...
493
494
  		sector_t first_bad;
  		int bad_sectors;
f3ac8bf7c   NeilBrown   md: tidy up devic...
495
496
497
498
499
500
501
  		int disk = start_disk + i;
  		if (disk >= conf->raid_disks)
  			disk -= conf->raid_disks;
  
  		rdev = rcu_dereference(conf->mirrors[disk].rdev);
  		if (r1_bio->bios[disk] == IO_BLOCKED
  		    || rdev == NULL
76073054c   NeilBrown   md/raid1: clean u...
502
  		    || test_bit(Faulty, &rdev->flags))
f3ac8bf7c   NeilBrown   md: tidy up devic...
503
  			continue;
76073054c   NeilBrown   md/raid1: clean u...
504
505
  		if (!test_bit(In_sync, &rdev->flags) &&
  		    rdev->recovery_offset < this_sector + sectors)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
506
  			continue;
76073054c   NeilBrown   md/raid1: clean u...
507
508
509
  		if (test_bit(WriteMostly, &rdev->flags)) {
  			/* Don't balance among write-mostly, just
  			 * use the first as a last resort */
307729c8b   NeilBrown   md/raid1: perform...
510
511
512
513
514
515
516
517
518
  			if (best_disk < 0) {
  				if (is_badblock(rdev, this_sector, sectors,
  						&first_bad, &bad_sectors)) {
  					if (first_bad < this_sector)
  						/* Cannot use this */
  						continue;
  					best_good_sectors = first_bad - this_sector;
  				} else
  					best_good_sectors = sectors;
76073054c   NeilBrown   md/raid1: clean u...
519
  				best_disk = disk;
307729c8b   NeilBrown   md/raid1: perform...
520
  			}
76073054c   NeilBrown   md/raid1: clean u...
521
522
523
524
525
  			continue;
  		}
  		/* This is a reasonable device to use.  It might
  		 * even be best.
  		 */
d2eb35acf   NeilBrown   md/raid1: avoid r...
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
  		if (is_badblock(rdev, this_sector, sectors,
  				&first_bad, &bad_sectors)) {
  			if (best_dist < MaxSector)
  				/* already have a better device */
  				continue;
  			if (first_bad <= this_sector) {
  				/* cannot read here. If this is the 'primary'
  				 * device, then we must not read beyond
  				 * bad_sectors from another device..
  				 */
  				bad_sectors -= (this_sector - first_bad);
  				if (choose_first && sectors > bad_sectors)
  					sectors = bad_sectors;
  				if (best_good_sectors > sectors)
  					best_good_sectors = sectors;
  
  			} else {
  				sector_t good_sectors = first_bad - this_sector;
  				if (good_sectors > best_good_sectors) {
  					best_good_sectors = good_sectors;
  					best_disk = disk;
  				}
  				if (choose_first)
  					break;
  			}
  			continue;
  		} else
  			best_good_sectors = sectors;
76073054c   NeilBrown   md/raid1: clean u...
554
555
556
557
558
559
560
561
  		dist = abs(this_sector - conf->mirrors[disk].head_position);
  		if (choose_first
  		    /* Don't change to another disk for sequential reads */
  		    || conf->next_seq_sect == this_sector
  		    || dist == 0
  		    /* If device is idle, use it */
  		    || atomic_read(&rdev->nr_pending) == 0) {
  			best_disk = disk;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
562
563
  			break;
  		}
76073054c   NeilBrown   md/raid1: clean u...
564
565
566
  		if (dist < best_dist) {
  			best_dist = dist;
  			best_disk = disk;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
567
  		}
f3ac8bf7c   NeilBrown   md: tidy up devic...
568
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
569

76073054c   NeilBrown   md/raid1: clean u...
570
571
  	if (best_disk >= 0) {
  		rdev = rcu_dereference(conf->mirrors[best_disk].rdev);
8ddf9efe6   NeilBrown   [PATCH] md: suppo...
572
573
574
  		if (!rdev)
  			goto retry;
  		atomic_inc(&rdev->nr_pending);
76073054c   NeilBrown   md/raid1: clean u...
575
  		if (test_bit(Faulty, &rdev->flags)) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
576
577
578
  			/* cannot risk returning a device that failed
  			 * before we inc'ed nr_pending
  			 */
03c902e17   NeilBrown   [PATCH] md: fix r...
579
  			rdev_dec_pending(rdev, conf->mddev);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
580
581
  			goto retry;
  		}
d2eb35acf   NeilBrown   md/raid1: avoid r...
582
  		sectors = best_good_sectors;
8ddf9efe6   NeilBrown   [PATCH] md: suppo...
583
  		conf->next_seq_sect = this_sector + sectors;
76073054c   NeilBrown   md/raid1: clean u...
584
  		conf->last_used = best_disk;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
585
586
  	}
  	rcu_read_unlock();
d2eb35acf   NeilBrown   md/raid1: avoid r...
587
  	*max_sectors = sectors;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
588

76073054c   NeilBrown   md/raid1: clean u...
589
  	return best_disk;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
590
  }
fd01b88c7   NeilBrown   md: remove typede...
591
  int md_raid1_congested(struct mddev *mddev, int bits)
0d1292282   NeilBrown   [PATCH] md: defin...
592
  {
e80963604   NeilBrown   md/raid1: typedef...
593
  	struct r1conf *conf = mddev->private;
0d1292282   NeilBrown   [PATCH] md: defin...
594
  	int i, ret = 0;
34db0cd60   NeilBrown   md: add proper wr...
595
596
597
  	if ((bits & (1 << BDI_async_congested)) &&
  	    conf->pending_count >= max_queued_requests)
  		return 1;
0d1292282   NeilBrown   [PATCH] md: defin...
598
  	rcu_read_lock();
301946364   NeilBrown   md/raid1: Replac...
599
  	for (i = 0; i < conf->raid_disks; i++) {
3cb030020   NeilBrown   md: removing type...
600
  		struct md_rdev *rdev = rcu_dereference(conf->mirrors[i].rdev);
0d1292282   NeilBrown   [PATCH] md: defin...
601
  		if (rdev && !test_bit(Faulty, &rdev->flags)) {
165125e1e   Jens Axboe   [BLOCK] Get rid o...
602
  			struct request_queue *q = bdev_get_queue(rdev->bdev);
0d1292282   NeilBrown   [PATCH] md: defin...
603

1ed7242e5   Jonathan Brassow   MD: raid1 changes...
604
  			BUG_ON(!q);
0d1292282   NeilBrown   [PATCH] md: defin...
605
606
607
  			/* Note the '|| 1' - when read_balance prefers
  			 * non-congested targets, it can be removed
  			 */
91a9e99d7   Alexander Beregalov   md/raid1: fix bui...
608
  			if ((bits & (1<<BDI_async_congested)) || 1)
0d1292282   NeilBrown   [PATCH] md: defin...
609
610
611
612
613
614
615
616
  				ret |= bdi_congested(&q->backing_dev_info, bits);
  			else
  				ret &= bdi_congested(&q->backing_dev_info, bits);
  		}
  	}
  	rcu_read_unlock();
  	return ret;
  }
1ed7242e5   Jonathan Brassow   MD: raid1 changes...
617
  EXPORT_SYMBOL_GPL(md_raid1_congested);
0d1292282   NeilBrown   [PATCH] md: defin...
618

1ed7242e5   Jonathan Brassow   MD: raid1 changes...
619
620
  static int raid1_congested(void *data, int bits)
  {
fd01b88c7   NeilBrown   md: remove typede...
621
  	struct mddev *mddev = data;
1ed7242e5   Jonathan Brassow   MD: raid1 changes...
622
623
624
625
  
  	return mddev_congested(mddev, bits) ||
  		md_raid1_congested(mddev, bits);
  }
0d1292282   NeilBrown   [PATCH] md: defin...
626

e80963604   NeilBrown   md/raid1: typedef...
627
  static void flush_pending_writes(struct r1conf *conf)
a35e63efa   NeilBrown   md: fix deadlock ...
628
629
630
  {
  	/* Any writes that have been queued but are awaiting
  	 * bitmap updates get flushed here.
a35e63efa   NeilBrown   md: fix deadlock ...
631
  	 */
a35e63efa   NeilBrown   md: fix deadlock ...
632
633
634
635
636
  	spin_lock_irq(&conf->device_lock);
  
  	if (conf->pending_bio_list.head) {
  		struct bio *bio;
  		bio = bio_list_get(&conf->pending_bio_list);
34db0cd60   NeilBrown   md: add proper wr...
637
  		conf->pending_count = 0;
a35e63efa   NeilBrown   md: fix deadlock ...
638
639
640
641
  		spin_unlock_irq(&conf->device_lock);
  		/* flush any pending bitmap writes to
  		 * disk before proceeding w/ I/O */
  		bitmap_unplug(conf->mddev->bitmap);
34db0cd60   NeilBrown   md: add proper wr...
642
  		wake_up(&conf->wait_barrier);
a35e63efa   NeilBrown   md: fix deadlock ...
643
644
645
646
647
648
649
  
  		while (bio) { /* submit pending writes */
  			struct bio *next = bio->bi_next;
  			bio->bi_next = NULL;
  			generic_make_request(bio);
  			bio = next;
  		}
a35e63efa   NeilBrown   md: fix deadlock ...
650
651
  	} else
  		spin_unlock_irq(&conf->device_lock);
7eaceacca   Jens Axboe   block: remove per...
652
  }
17999be4a   NeilBrown   [PATCH] md: impro...
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
  /* Barriers....
   * Sometimes we need to suspend IO while we do something else,
   * either some resync/recovery, or reconfigure the array.
   * To do this we raise a 'barrier'.
   * The 'barrier' is a counter that can be raised multiple times
   * to count how many activities are happening which preclude
   * normal IO.
   * We can only raise the barrier if there is no pending IO.
   * i.e. if nr_pending == 0.
   * We choose only to raise the barrier if no-one is waiting for the
   * barrier to go down.  This means that as soon as an IO request
   * is ready, no other operations which require a barrier will start
   * until the IO request has had a chance.
   *
   * So: regular IO calls 'wait_barrier'.  When that returns there
   *    is no backgroup IO happening,  It must arrange to call
   *    allow_barrier when it has finished its IO.
   * backgroup IO calls must call raise_barrier.  Once that returns
   *    there is no normal IO happeing.  It must arrange to call
   *    lower_barrier when the particular background IO completes.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
673
674
   */
  #define RESYNC_DEPTH 32
e80963604   NeilBrown   md/raid1: typedef...
675
  static void raise_barrier(struct r1conf *conf)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
676
677
  {
  	spin_lock_irq(&conf->resync_lock);
17999be4a   NeilBrown   [PATCH] md: impro...
678
679
680
  
  	/* Wait until no block IO is waiting */
  	wait_event_lock_irq(conf->wait_barrier, !conf->nr_waiting,
c3b328ac8   NeilBrown   md: fix up raid1/...
681
  			    conf->resync_lock, );
17999be4a   NeilBrown   [PATCH] md: impro...
682
683
684
  
  	/* block any new IO from starting */
  	conf->barrier++;
046abeede   NeilBrown   md/raid1: fix som...
685
  	/* Now wait for all pending IO to complete */
17999be4a   NeilBrown   [PATCH] md: impro...
686
687
  	wait_event_lock_irq(conf->wait_barrier,
  			    !conf->nr_pending && conf->barrier < RESYNC_DEPTH,
c3b328ac8   NeilBrown   md: fix up raid1/...
688
  			    conf->resync_lock, );
17999be4a   NeilBrown   [PATCH] md: impro...
689
690
691
  
  	spin_unlock_irq(&conf->resync_lock);
  }
e80963604   NeilBrown   md/raid1: typedef...
692
  static void lower_barrier(struct r1conf *conf)
17999be4a   NeilBrown   [PATCH] md: impro...
693
694
  {
  	unsigned long flags;
709ae4879   NeilBrown   md/raid1: add tak...
695
  	BUG_ON(conf->barrier <= 0);
17999be4a   NeilBrown   [PATCH] md: impro...
696
697
698
699
700
  	spin_lock_irqsave(&conf->resync_lock, flags);
  	conf->barrier--;
  	spin_unlock_irqrestore(&conf->resync_lock, flags);
  	wake_up(&conf->wait_barrier);
  }
e80963604   NeilBrown   md/raid1: typedef...
701
  static void wait_barrier(struct r1conf *conf)
17999be4a   NeilBrown   [PATCH] md: impro...
702
703
704
705
706
707
  {
  	spin_lock_irq(&conf->resync_lock);
  	if (conf->barrier) {
  		conf->nr_waiting++;
  		wait_event_lock_irq(conf->wait_barrier, !conf->barrier,
  				    conf->resync_lock,
c3b328ac8   NeilBrown   md: fix up raid1/...
708
  				    );
17999be4a   NeilBrown   [PATCH] md: impro...
709
  		conf->nr_waiting--;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
710
  	}
17999be4a   NeilBrown   [PATCH] md: impro...
711
  	conf->nr_pending++;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
712
713
  	spin_unlock_irq(&conf->resync_lock);
  }
e80963604   NeilBrown   md/raid1: typedef...
714
  static void allow_barrier(struct r1conf *conf)
17999be4a   NeilBrown   [PATCH] md: impro...
715
716
717
718
719
720
721
  {
  	unsigned long flags;
  	spin_lock_irqsave(&conf->resync_lock, flags);
  	conf->nr_pending--;
  	spin_unlock_irqrestore(&conf->resync_lock, flags);
  	wake_up(&conf->wait_barrier);
  }
e80963604   NeilBrown   md/raid1: typedef...
722
  static void freeze_array(struct r1conf *conf)
ddaf22aba   NeilBrown   [PATCH] md: attem...
723
724
725
726
  {
  	/* stop syncio and normal IO and wait for everything to
  	 * go quite.
  	 * We increment barrier and nr_waiting, and then
1c830532f   NeilBrown   md: fix possible ...
727
728
729
730
731
732
733
734
  	 * wait until nr_pending match nr_queued+1
  	 * This is called in the context of one normal IO request
  	 * that has failed. Thus any sync request that might be pending
  	 * will be blocked by nr_pending, and we need to wait for
  	 * pending IO requests to complete or be queued for re-try.
  	 * Thus the number queued (nr_queued) plus this request (1)
  	 * must match the number of pending IOs (nr_pending) before
  	 * we continue.
ddaf22aba   NeilBrown   [PATCH] md: attem...
735
736
737
738
739
  	 */
  	spin_lock_irq(&conf->resync_lock);
  	conf->barrier++;
  	conf->nr_waiting++;
  	wait_event_lock_irq(conf->wait_barrier,
1c830532f   NeilBrown   md: fix possible ...
740
  			    conf->nr_pending == conf->nr_queued+1,
ddaf22aba   NeilBrown   [PATCH] md: attem...
741
  			    conf->resync_lock,
c3b328ac8   NeilBrown   md: fix up raid1/...
742
  			    flush_pending_writes(conf));
ddaf22aba   NeilBrown   [PATCH] md: attem...
743
744
  	spin_unlock_irq(&conf->resync_lock);
  }
e80963604   NeilBrown   md/raid1: typedef...
745
  static void unfreeze_array(struct r1conf *conf)
ddaf22aba   NeilBrown   [PATCH] md: attem...
746
747
748
749
750
751
752
753
  {
  	/* reverse the effect of the freeze */
  	spin_lock_irq(&conf->resync_lock);
  	conf->barrier--;
  	conf->nr_waiting--;
  	wake_up(&conf->wait_barrier);
  	spin_unlock_irq(&conf->resync_lock);
  }
17999be4a   NeilBrown   [PATCH] md: impro...
754

4e78064f4   NeilBrown   md: Fix possible ...
755
  /* duplicate the data pages for behind I/O 
4e78064f4   NeilBrown   md: Fix possible ...
756
   */
9f2c9d12b   NeilBrown   md: remove typede...
757
  static void alloc_behind_pages(struct bio *bio, struct r1bio *r1_bio)
4b6d287f6   NeilBrown   [PATCH] md: add w...
758
759
760
  {
  	int i;
  	struct bio_vec *bvec;
2ca68f5ed   NeilBrown   md/raid1: store b...
761
  	struct bio_vec *bvecs = kzalloc(bio->bi_vcnt * sizeof(struct bio_vec),
4b6d287f6   NeilBrown   [PATCH] md: add w...
762
  					GFP_NOIO);
2ca68f5ed   NeilBrown   md/raid1: store b...
763
  	if (unlikely(!bvecs))
af6d7b760   NeilBrown   md/raid1: improve...
764
  		return;
4b6d287f6   NeilBrown   [PATCH] md: add w...
765

4b6d287f6   NeilBrown   [PATCH] md: add w...
766
  	bio_for_each_segment(bvec, bio, i) {
2ca68f5ed   NeilBrown   md/raid1: store b...
767
768
769
  		bvecs[i] = *bvec;
  		bvecs[i].bv_page = alloc_page(GFP_NOIO);
  		if (unlikely(!bvecs[i].bv_page))
4b6d287f6   NeilBrown   [PATCH] md: add w...
770
  			goto do_sync_io;
2ca68f5ed   NeilBrown   md/raid1: store b...
771
772
773
  		memcpy(kmap(bvecs[i].bv_page) + bvec->bv_offset,
  		       kmap(bvec->bv_page) + bvec->bv_offset, bvec->bv_len);
  		kunmap(bvecs[i].bv_page);
4b6d287f6   NeilBrown   [PATCH] md: add w...
774
775
  		kunmap(bvec->bv_page);
  	}
2ca68f5ed   NeilBrown   md/raid1: store b...
776
  	r1_bio->behind_bvecs = bvecs;
af6d7b760   NeilBrown   md/raid1: improve...
777
778
779
  	r1_bio->behind_page_count = bio->bi_vcnt;
  	set_bit(R1BIO_BehindIO, &r1_bio->state);
  	return;
4b6d287f6   NeilBrown   [PATCH] md: add w...
780
781
  
  do_sync_io:
af6d7b760   NeilBrown   md/raid1: improve...
782
  	for (i = 0; i < bio->bi_vcnt; i++)
2ca68f5ed   NeilBrown   md/raid1: store b...
783
784
785
  		if (bvecs[i].bv_page)
  			put_page(bvecs[i].bv_page);
  	kfree(bvecs);
36a4e1fe0   NeilBrown   md: remove PRINTK...
786
787
  	pr_debug("%dB behind alloc failed, doing sync I/O
  ", bio->bi_size);
4b6d287f6   NeilBrown   [PATCH] md: add w...
788
  }
b4fdcb02f   Linus Torvalds   Merge branch 'for...
789
  static void make_request(struct mddev *mddev, struct bio * bio)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
790
  {
e80963604   NeilBrown   md/raid1: typedef...
791
  	struct r1conf *conf = mddev->private;
0f6d02d58   NeilBrown   md: remove typede...
792
  	struct mirror_info *mirror;
9f2c9d12b   NeilBrown   md: remove typede...
793
  	struct r1bio *r1_bio;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
794
  	struct bio *read_bio;
1f68f0c4b   NeilBrown   md/raid1: avoid w...
795
  	int i, disks;
84255d101   NeilBrown   md: fix possible ...
796
  	struct bitmap *bitmap;
191ea9b2c   NeilBrown   [PATCH] md: raid1...
797
  	unsigned long flags;
a362357b6   Jens Axboe   [BLOCK] Unify the...
798
  	const int rw = bio_data_dir(bio);
2c7d46ec1   NeilBrown   md raid-1/10 Fix ...
799
  	const unsigned long do_sync = (bio->bi_rw & REQ_SYNC);
e9c7469bb   Tejun Heo   md: implment REQ_...
800
  	const unsigned long do_flush_fua = (bio->bi_rw & (REQ_FLUSH | REQ_FUA));
3cb030020   NeilBrown   md: removing type...
801
  	struct md_rdev *blocked_rdev;
c3b328ac8   NeilBrown   md: fix up raid1/...
802
  	int plugged;
1f68f0c4b   NeilBrown   md/raid1: avoid w...
803
804
805
  	int first_clone;
  	int sectors_handled;
  	int max_sectors;
191ea9b2c   NeilBrown   [PATCH] md: raid1...
806

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
807
808
809
810
811
  	/*
  	 * Register the new request and wait if the reconstruction
  	 * thread has put up a bar for new requests.
  	 * Continue immediately if no resync is active currently.
  	 */
62de608da   NeilBrown   [PATCH] md: Impro...
812

3d310eb7b   NeilBrown   [PATCH] md: fix d...
813
  	md_write_start(mddev, bio); /* wait on superblock update early */
6eef4b21f   NeilBrown   md: add honouring...
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
  	if (bio_data_dir(bio) == WRITE &&
  	    bio->bi_sector + bio->bi_size/512 > mddev->suspend_lo &&
  	    bio->bi_sector < mddev->suspend_hi) {
  		/* As the suspend_* range is controlled by
  		 * userspace, we want an interruptible
  		 * wait.
  		 */
  		DEFINE_WAIT(w);
  		for (;;) {
  			flush_signals(current);
  			prepare_to_wait(&conf->wait_barrier,
  					&w, TASK_INTERRUPTIBLE);
  			if (bio->bi_sector + bio->bi_size/512 <= mddev->suspend_lo ||
  			    bio->bi_sector >= mddev->suspend_hi)
  				break;
  			schedule();
  		}
  		finish_wait(&conf->wait_barrier, &w);
  	}
62de608da   NeilBrown   [PATCH] md: Impro...
833

17999be4a   NeilBrown   [PATCH] md: impro...
834
  	wait_barrier(conf);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
835

84255d101   NeilBrown   md: fix possible ...
836
  	bitmap = mddev->bitmap;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
837
838
839
840
841
842
843
844
845
  	/*
  	 * make_request() can abort the operation when READA is being
  	 * used and no empty request is available.
  	 *
  	 */
  	r1_bio = mempool_alloc(conf->r1bio_pool, GFP_NOIO);
  
  	r1_bio->master_bio = bio;
  	r1_bio->sectors = bio->bi_size >> 9;
191ea9b2c   NeilBrown   [PATCH] md: raid1...
846
  	r1_bio->state = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
847
848
  	r1_bio->mddev = mddev;
  	r1_bio->sector = bio->bi_sector;
d2eb35acf   NeilBrown   md/raid1: avoid r...
849
850
851
852
853
854
855
856
857
  	/* We might need to issue multiple reads to different
  	 * devices if there are bad blocks around, so we keep
  	 * track of the number of reads in bio->bi_phys_segments.
  	 * If this is 0, there is only one r1_bio and no locking
  	 * will be needed when requests complete.  If it is
  	 * non-zero, then it is the number of not-completed requests.
  	 */
  	bio->bi_phys_segments = 0;
  	clear_bit(BIO_SEG_VALID, &bio->bi_flags);
a362357b6   Jens Axboe   [BLOCK] Unify the...
858
  	if (rw == READ) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
859
860
861
  		/*
  		 * read balancing logic:
  		 */
d2eb35acf   NeilBrown   md/raid1: avoid r...
862
863
864
865
  		int rdisk;
  
  read_again:
  		rdisk = read_balance(conf, r1_bio, &max_sectors);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
866
867
868
869
  
  		if (rdisk < 0) {
  			/* couldn't find anywhere to read from */
  			raid_end_bio_io(r1_bio);
5a7bbad27   Christoph Hellwig   block: remove sup...
870
  			return;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
871
872
  		}
  		mirror = conf->mirrors + rdisk;
e555190d8   NeilBrown   md/raid1: delay r...
873
874
875
876
877
878
879
880
881
  		if (test_bit(WriteMostly, &mirror->rdev->flags) &&
  		    bitmap) {
  			/* Reading from a write-mostly device must
  			 * take care not to over-take any writes
  			 * that are 'behind'
  			 */
  			wait_event(bitmap->behind_wait,
  				   atomic_read(&bitmap->behind_writes) == 0);
  		}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
882
  		r1_bio->read_disk = rdisk;
a167f6632   NeilBrown   md: use separate ...
883
  		read_bio = bio_clone_mddev(bio, GFP_NOIO, mddev);
d2eb35acf   NeilBrown   md/raid1: avoid r...
884
885
  		md_trim_bio(read_bio, r1_bio->sector - bio->bi_sector,
  			    max_sectors);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
886
887
888
889
890
891
  
  		r1_bio->bios[rdisk] = read_bio;
  
  		read_bio->bi_sector = r1_bio->sector + mirror->rdev->data_offset;
  		read_bio->bi_bdev = mirror->rdev->bdev;
  		read_bio->bi_end_io = raid1_end_read_request;
7b6d91dae   Christoph Hellwig   block: unify flag...
892
  		read_bio->bi_rw = READ | do_sync;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
893
  		read_bio->bi_private = r1_bio;
d2eb35acf   NeilBrown   md/raid1: avoid r...
894
895
896
897
  		if (max_sectors < r1_bio->sectors) {
  			/* could not read all from this device, so we will
  			 * need another r1_bio.
  			 */
d2eb35acf   NeilBrown   md/raid1: avoid r...
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
  
  			sectors_handled = (r1_bio->sector + max_sectors
  					   - bio->bi_sector);
  			r1_bio->sectors = max_sectors;
  			spin_lock_irq(&conf->device_lock);
  			if (bio->bi_phys_segments == 0)
  				bio->bi_phys_segments = 2;
  			else
  				bio->bi_phys_segments++;
  			spin_unlock_irq(&conf->device_lock);
  			/* Cannot call generic_make_request directly
  			 * as that will be queued in __make_request
  			 * and subsequent mempool_alloc might block waiting
  			 * for it.  So hand bio over to raid1d.
  			 */
  			reschedule_retry(r1_bio);
  
  			r1_bio = mempool_alloc(conf->r1bio_pool, GFP_NOIO);
  
  			r1_bio->master_bio = bio;
  			r1_bio->sectors = (bio->bi_size >> 9) - sectors_handled;
  			r1_bio->state = 0;
  			r1_bio->mddev = mddev;
  			r1_bio->sector = bio->bi_sector + sectors_handled;
  			goto read_again;
  		} else
  			generic_make_request(read_bio);
5a7bbad27   Christoph Hellwig   block: remove sup...
925
  		return;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
926
927
928
929
930
  	}
  
  	/*
  	 * WRITE:
  	 */
34db0cd60   NeilBrown   md: add proper wr...
931
932
933
934
935
  	if (conf->pending_count >= max_queued_requests) {
  		md_wakeup_thread(mddev->thread);
  		wait_event(conf->wait_barrier,
  			   conf->pending_count < max_queued_requests);
  	}
1f68f0c4b   NeilBrown   md/raid1: avoid w...
936
  	/* first select target devices under rcu_lock and
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
937
938
  	 * inc refcount on their rdev.  Record them by setting
  	 * bios[x] to bio
1f68f0c4b   NeilBrown   md/raid1: avoid w...
939
940
941
942
943
944
  	 * If there are known/acknowledged bad blocks on any device on
  	 * which we have seen a write error, we want to avoid writing those
  	 * blocks.
  	 * This potentially requires several writes to write around
  	 * the bad blocks.  Each set of writes gets it's own r1bio
  	 * with a set of bios attached.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
945
  	 */
c3b328ac8   NeilBrown   md: fix up raid1/...
946
  	plugged = mddev_check_plugged(mddev);
8f19ccb2f   NeilBrown   md/raid1: Allocat...
947
  	disks = conf->raid_disks * 2;
6bfe0b499   Dan Williams   md: support block...
948
949
   retry_write:
  	blocked_rdev = NULL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
950
  	rcu_read_lock();
1f68f0c4b   NeilBrown   md/raid1: avoid w...
951
  	max_sectors = r1_bio->sectors;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
952
  	for (i = 0;  i < disks; i++) {
3cb030020   NeilBrown   md: removing type...
953
  		struct md_rdev *rdev = rcu_dereference(conf->mirrors[i].rdev);
6bfe0b499   Dan Williams   md: support block...
954
955
956
957
958
  		if (rdev && unlikely(test_bit(Blocked, &rdev->flags))) {
  			atomic_inc(&rdev->nr_pending);
  			blocked_rdev = rdev;
  			break;
  		}
1f68f0c4b   NeilBrown   md/raid1: avoid w...
959
960
  		r1_bio->bios[i] = NULL;
  		if (!rdev || test_bit(Faulty, &rdev->flags)) {
8f19ccb2f   NeilBrown   md/raid1: Allocat...
961
962
  			if (i < conf->raid_disks)
  				set_bit(R1BIO_Degraded, &r1_bio->state);
1f68f0c4b   NeilBrown   md/raid1: avoid w...
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
  			continue;
  		}
  
  		atomic_inc(&rdev->nr_pending);
  		if (test_bit(WriteErrorSeen, &rdev->flags)) {
  			sector_t first_bad;
  			int bad_sectors;
  			int is_bad;
  
  			is_bad = is_badblock(rdev, r1_bio->sector,
  					     max_sectors,
  					     &first_bad, &bad_sectors);
  			if (is_bad < 0) {
  				/* mustn't write here until the bad block is
  				 * acknowledged*/
  				set_bit(BlockedBadBlocks, &rdev->flags);
  				blocked_rdev = rdev;
  				break;
  			}
  			if (is_bad && first_bad <= r1_bio->sector) {
  				/* Cannot write here at all */
  				bad_sectors -= (r1_bio->sector - first_bad);
  				if (bad_sectors < max_sectors)
  					/* mustn't write more than bad_sectors
  					 * to other devices yet
  					 */
  					max_sectors = bad_sectors;
03c902e17   NeilBrown   [PATCH] md: fix r...
990
  				rdev_dec_pending(rdev, mddev);
1f68f0c4b   NeilBrown   md/raid1: avoid w...
991
992
993
994
995
996
997
998
999
1000
1001
  				/* We don't set R1BIO_Degraded as that
  				 * only applies if the disk is
  				 * missing, so it might be re-added,
  				 * and we want to know to recover this
  				 * chunk.
  				 * In this case the device is here,
  				 * and the fact that this chunk is not
  				 * in-sync is recorded in the bad
  				 * block log
  				 */
  				continue;
964147d5c   NeilBrown   md/raid1: fix cou...
1002
  			}
1f68f0c4b   NeilBrown   md/raid1: avoid w...
1003
1004
1005
1006
1007
1008
1009
  			if (is_bad) {
  				int good_sectors = first_bad - r1_bio->sector;
  				if (good_sectors < max_sectors)
  					max_sectors = good_sectors;
  			}
  		}
  		r1_bio->bios[i] = bio;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1010
1011
  	}
  	rcu_read_unlock();
6bfe0b499   Dan Williams   md: support block...
1012
1013
1014
1015
1016
1017
1018
  	if (unlikely(blocked_rdev)) {
  		/* Wait for this device to become unblocked */
  		int j;
  
  		for (j = 0; j < i; j++)
  			if (r1_bio->bios[j])
  				rdev_dec_pending(conf->mirrors[j].rdev, mddev);
1f68f0c4b   NeilBrown   md/raid1: avoid w...
1019
  		r1_bio->state = 0;
6bfe0b499   Dan Williams   md: support block...
1020
1021
1022
1023
1024
  		allow_barrier(conf);
  		md_wait_for_blocked_rdev(blocked_rdev, mddev);
  		wait_barrier(conf);
  		goto retry_write;
  	}
1f68f0c4b   NeilBrown   md/raid1: avoid w...
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
  	if (max_sectors < r1_bio->sectors) {
  		/* We are splitting this write into multiple parts, so
  		 * we need to prepare for allocating another r1_bio.
  		 */
  		r1_bio->sectors = max_sectors;
  		spin_lock_irq(&conf->device_lock);
  		if (bio->bi_phys_segments == 0)
  			bio->bi_phys_segments = 2;
  		else
  			bio->bi_phys_segments++;
  		spin_unlock_irq(&conf->device_lock);
191ea9b2c   NeilBrown   [PATCH] md: raid1...
1036
  	}
1f68f0c4b   NeilBrown   md/raid1: avoid w...
1037
  	sectors_handled = r1_bio->sector + max_sectors - bio->bi_sector;
4b6d287f6   NeilBrown   [PATCH] md: add w...
1038

4e78064f4   NeilBrown   md: Fix possible ...
1039
  	atomic_set(&r1_bio->remaining, 1);
4b6d287f6   NeilBrown   [PATCH] md: add w...
1040
  	atomic_set(&r1_bio->behind_remaining, 0);
06d91a5fe   NeilBrown   [PATCH] md: impro...
1041

1f68f0c4b   NeilBrown   md/raid1: avoid w...
1042
  	first_clone = 1;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1043
1044
1045
1046
  	for (i = 0; i < disks; i++) {
  		struct bio *mbio;
  		if (!r1_bio->bios[i])
  			continue;
a167f6632   NeilBrown   md: use separate ...
1047
  		mbio = bio_clone_mddev(bio, GFP_NOIO, mddev);
1f68f0c4b   NeilBrown   md/raid1: avoid w...
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
  		md_trim_bio(mbio, r1_bio->sector - bio->bi_sector, max_sectors);
  
  		if (first_clone) {
  			/* do behind I/O ?
  			 * Not if there are too many, or cannot
  			 * allocate memory, or a reader on WriteMostly
  			 * is waiting for behind writes to flush */
  			if (bitmap &&
  			    (atomic_read(&bitmap->behind_writes)
  			     < mddev->bitmap_info.max_write_behind) &&
  			    !waitqueue_active(&bitmap->behind_wait))
  				alloc_behind_pages(mbio, r1_bio);
  
  			bitmap_startwrite(bitmap, r1_bio->sector,
  					  r1_bio->sectors,
  					  test_bit(R1BIO_BehindIO,
  						   &r1_bio->state));
  			first_clone = 0;
  		}
2ca68f5ed   NeilBrown   md/raid1: store b...
1067
  		if (r1_bio->behind_bvecs) {
4b6d287f6   NeilBrown   [PATCH] md: add w...
1068
1069
1070
1071
1072
1073
1074
  			struct bio_vec *bvec;
  			int j;
  
  			/* Yes, I really want the '__' version so that
  			 * we clear any unused pointer in the io_vec, rather
  			 * than leave them unchanged.  This is important
  			 * because when we come to free the pages, we won't
046abeede   NeilBrown   md/raid1: fix som...
1075
  			 * know the original bi_idx, so we just free
4b6d287f6   NeilBrown   [PATCH] md: add w...
1076
1077
1078
  			 * them all
  			 */
  			__bio_for_each_segment(bvec, mbio, j, 0)
2ca68f5ed   NeilBrown   md/raid1: store b...
1079
  				bvec->bv_page = r1_bio->behind_bvecs[j].bv_page;
4b6d287f6   NeilBrown   [PATCH] md: add w...
1080
1081
1082
  			if (test_bit(WriteMostly, &conf->mirrors[i].rdev->flags))
  				atomic_inc(&r1_bio->behind_remaining);
  		}
1f68f0c4b   NeilBrown   md/raid1: avoid w...
1083
1084
1085
1086
1087
1088
1089
1090
  		r1_bio->bios[i] = mbio;
  
  		mbio->bi_sector	= (r1_bio->sector +
  				   conf->mirrors[i].rdev->data_offset);
  		mbio->bi_bdev = conf->mirrors[i].rdev->bdev;
  		mbio->bi_end_io	= raid1_end_write_request;
  		mbio->bi_rw = WRITE | do_flush_fua | do_sync;
  		mbio->bi_private = r1_bio;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1091
  		atomic_inc(&r1_bio->remaining);
4e78064f4   NeilBrown   md: Fix possible ...
1092
1093
  		spin_lock_irqsave(&conf->device_lock, flags);
  		bio_list_add(&conf->pending_bio_list, mbio);
34db0cd60   NeilBrown   md: add proper wr...
1094
  		conf->pending_count++;
4e78064f4   NeilBrown   md: Fix possible ...
1095
  		spin_unlock_irqrestore(&conf->device_lock, flags);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1096
  	}
079fa166a   NeilBrown   md/raid1,10: Remo...
1097
1098
1099
  	/* Mustn't call r1_bio_write_done before this next test,
  	 * as it could result in the bio being freed.
  	 */
1f68f0c4b   NeilBrown   md/raid1: avoid w...
1100
  	if (sectors_handled < (bio->bi_size >> 9)) {
079fa166a   NeilBrown   md/raid1,10: Remo...
1101
  		r1_bio_write_done(r1_bio);
1f68f0c4b   NeilBrown   md/raid1: avoid w...
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
  		/* We need another r1_bio.  It has already been counted
  		 * in bio->bi_phys_segments
  		 */
  		r1_bio = mempool_alloc(conf->r1bio_pool, GFP_NOIO);
  		r1_bio->master_bio = bio;
  		r1_bio->sectors = (bio->bi_size >> 9) - sectors_handled;
  		r1_bio->state = 0;
  		r1_bio->mddev = mddev;
  		r1_bio->sector = bio->bi_sector + sectors_handled;
  		goto retry_write;
  	}
079fa166a   NeilBrown   md/raid1,10: Remo...
1113
1114
1115
1116
  	r1_bio_write_done(r1_bio);
  
  	/* In case raid1d snuck in to freeze_array */
  	wake_up(&conf->wait_barrier);
c3b328ac8   NeilBrown   md: fix up raid1/...
1117
  	if (do_sync || !bitmap || !plugged)
e3881a681   Lars Ellenberg   [PATCH] md: pass ...
1118
  		md_wakeup_thread(mddev->thread);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1119
  }
fd01b88c7   NeilBrown   md: remove typede...
1120
  static void status(struct seq_file *seq, struct mddev *mddev)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1121
  {
e80963604   NeilBrown   md/raid1: typedef...
1122
  	struct r1conf *conf = mddev->private;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1123
1124
1125
  	int i;
  
  	seq_printf(seq, " [%d/%d] [", conf->raid_disks,
11ce99e62   NeilBrown   [PATCH] md: Remov...
1126
  		   conf->raid_disks - mddev->degraded);
ddac7c7e3   NeilBrown   [PATCH] md: Fix i...
1127
1128
  	rcu_read_lock();
  	for (i = 0; i < conf->raid_disks; i++) {
3cb030020   NeilBrown   md: removing type...
1129
  		struct md_rdev *rdev = rcu_dereference(conf->mirrors[i].rdev);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1130
  		seq_printf(seq, "%s",
ddac7c7e3   NeilBrown   [PATCH] md: Fix i...
1131
1132
1133
  			   rdev && test_bit(In_sync, &rdev->flags) ? "U" : "_");
  	}
  	rcu_read_unlock();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1134
1135
  	seq_printf(seq, "]");
  }
fd01b88c7   NeilBrown   md: remove typede...
1136
  static void error(struct mddev *mddev, struct md_rdev *rdev)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1137
1138
  {
  	char b[BDEVNAME_SIZE];
e80963604   NeilBrown   md/raid1: typedef...
1139
  	struct r1conf *conf = mddev->private;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1140
1141
1142
1143
1144
1145
1146
  
  	/*
  	 * If it is not operational, then we have already marked it as dead
  	 * else if it is the last working disks, ignore the error, let the
  	 * next level up know.
  	 * else mark the drive as failed
  	 */
b2d444d7a   NeilBrown   [PATCH] md: conve...
1147
  	if (test_bit(In_sync, &rdev->flags)
4044ba58d   NeilBrown   md: don't retry r...
1148
  	    && (conf->raid_disks - mddev->degraded) == 1) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1149
1150
  		/*
  		 * Don't fail the drive, act as though we were just a
4044ba58d   NeilBrown   md: don't retry r...
1151
1152
1153
  		 * normal single drive.
  		 * However don't try a recovery from this drive as
  		 * it is very likely to fail.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1154
  		 */
5389042ff   NeilBrown   md: change manage...
1155
  		conf->recovery_disabled = mddev->recovery_disabled;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1156
  		return;
4044ba58d   NeilBrown   md: don't retry r...
1157
  	}
de393cdea   NeilBrown   md: make it easie...
1158
  	set_bit(Blocked, &rdev->flags);
c04be0aa8   NeilBrown   [PATCH] md: Impro...
1159
1160
1161
  	if (test_and_clear_bit(In_sync, &rdev->flags)) {
  		unsigned long flags;
  		spin_lock_irqsave(&conf->device_lock, flags);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1162
  		mddev->degraded++;
dd00a99e7   NeilBrown   md: avoid a possi...
1163
  		set_bit(Faulty, &rdev->flags);
c04be0aa8   NeilBrown   [PATCH] md: Impro...
1164
  		spin_unlock_irqrestore(&conf->device_lock, flags);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1165
1166
1167
  		/*
  		 * if recovery is running, make sure it aborts.
  		 */
dfc706450   NeilBrown   md: restart recov...
1168
  		set_bit(MD_RECOVERY_INTR, &mddev->recovery);
dd00a99e7   NeilBrown   md: avoid a possi...
1169
1170
  	} else
  		set_bit(Faulty, &rdev->flags);
850b2b420   NeilBrown   [PATCH] md: repla...
1171
  	set_bit(MD_CHANGE_DEVS, &mddev->flags);
067032bc6   Joe Perches   md: Fix single pr...
1172
1173
1174
1175
1176
  	printk(KERN_ALERT
  	       "md/raid1:%s: Disk failure on %s, disabling device.
  "
  	       "md/raid1:%s: Operation continuing on %d devices.
  ",
9dd1e2faf   NeilBrown   md/raid1: improve...
1177
1178
  	       mdname(mddev), bdevname(rdev->bdev, b),
  	       mdname(mddev), conf->raid_disks - mddev->degraded);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1179
  }
e80963604   NeilBrown   md/raid1: typedef...
1180
  static void print_conf(struct r1conf *conf)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1181
1182
  {
  	int i;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1183

9dd1e2faf   NeilBrown   md/raid1: improve...
1184
1185
  	printk(KERN_DEBUG "RAID1 conf printout:
  ");
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1186
  	if (!conf) {
9dd1e2faf   NeilBrown   md/raid1: improve...
1187
1188
  		printk(KERN_DEBUG "(!conf)
  ");
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1189
1190
  		return;
  	}
9dd1e2faf   NeilBrown   md/raid1: improve...
1191
1192
  	printk(KERN_DEBUG " --- wd:%d rd:%d
  ", conf->raid_disks - conf->mddev->degraded,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1193
  		conf->raid_disks);
ddac7c7e3   NeilBrown   [PATCH] md: Fix i...
1194
  	rcu_read_lock();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1195
1196
  	for (i = 0; i < conf->raid_disks; i++) {
  		char b[BDEVNAME_SIZE];
3cb030020   NeilBrown   md: removing type...
1197
  		struct md_rdev *rdev = rcu_dereference(conf->mirrors[i].rdev);
ddac7c7e3   NeilBrown   [PATCH] md: Fix i...
1198
  		if (rdev)
9dd1e2faf   NeilBrown   md/raid1: improve...
1199
1200
  			printk(KERN_DEBUG " disk %d, wo:%d, o:%d, dev:%s
  ",
ddac7c7e3   NeilBrown   [PATCH] md: Fix i...
1201
1202
1203
  			       i, !test_bit(In_sync, &rdev->flags),
  			       !test_bit(Faulty, &rdev->flags),
  			       bdevname(rdev->bdev,b));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1204
  	}
ddac7c7e3   NeilBrown   [PATCH] md: Fix i...
1205
  	rcu_read_unlock();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1206
  }
e80963604   NeilBrown   md/raid1: typedef...
1207
  static void close_sync(struct r1conf *conf)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1208
  {
17999be4a   NeilBrown   [PATCH] md: impro...
1209
1210
  	wait_barrier(conf);
  	allow_barrier(conf);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1211
1212
1213
1214
  
  	mempool_destroy(conf->r1buf_pool);
  	conf->r1buf_pool = NULL;
  }
fd01b88c7   NeilBrown   md: remove typede...
1215
  static int raid1_spare_active(struct mddev *mddev)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1216
1217
  {
  	int i;
e80963604   NeilBrown   md/raid1: typedef...
1218
  	struct r1conf *conf = mddev->private;
6b9656205   NeilBrown   md: provide appro...
1219
1220
  	int count = 0;
  	unsigned long flags;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1221
1222
1223
  
  	/*
  	 * Find all failed disks within the RAID1 configuration 
ddac7c7e3   NeilBrown   [PATCH] md: Fix i...
1224
1225
  	 * and mark them readable.
  	 * Called under mddev lock, so rcu protection not needed.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1226
1227
  	 */
  	for (i = 0; i < conf->raid_disks; i++) {
3cb030020   NeilBrown   md: removing type...
1228
  		struct md_rdev *rdev = conf->mirrors[i].rdev;
8c7a2c2bc   NeilBrown   md/raid1: handle ...
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
  		struct md_rdev *repl = conf->mirrors[conf->raid_disks + i].rdev;
  		if (repl
  		    && repl->recovery_offset == MaxSector
  		    && !test_bit(Faulty, &repl->flags)
  		    && !test_and_set_bit(In_sync, &repl->flags)) {
  			/* replacement has just become active */
  			if (!rdev ||
  			    !test_and_clear_bit(In_sync, &rdev->flags))
  				count++;
  			if (rdev) {
  				/* Replaced device not technically
  				 * faulty, but we need to be sure
  				 * it gets removed and never re-added
  				 */
  				set_bit(Faulty, &rdev->flags);
  				sysfs_notify_dirent_safe(
  					rdev->sysfs_state);
  			}
  		}
ddac7c7e3   NeilBrown   [PATCH] md: Fix i...
1248
1249
  		if (rdev
  		    && !test_bit(Faulty, &rdev->flags)
c04be0aa8   NeilBrown   [PATCH] md: Impro...
1250
  		    && !test_and_set_bit(In_sync, &rdev->flags)) {
6b9656205   NeilBrown   md: provide appro...
1251
  			count++;
654e8b5ab   Jonathan Brassow   MD: raid1 s/sysfs...
1252
  			sysfs_notify_dirent_safe(rdev->sysfs_state);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1253
1254
  		}
  	}
6b9656205   NeilBrown   md: provide appro...
1255
1256
1257
  	spin_lock_irqsave(&conf->device_lock, flags);
  	mddev->degraded -= count;
  	spin_unlock_irqrestore(&conf->device_lock, flags);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1258
1259
  
  	print_conf(conf);
6b9656205   NeilBrown   md: provide appro...
1260
  	return count;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1261
  }
fd01b88c7   NeilBrown   md: remove typede...
1262
  static int raid1_add_disk(struct mddev *mddev, struct md_rdev *rdev)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1263
  {
e80963604   NeilBrown   md/raid1: typedef...
1264
  	struct r1conf *conf = mddev->private;
199050ea1   Neil Brown   rationalise retur...
1265
  	int err = -EEXIST;
41158c7eb   NeilBrown   [PATCH] md: optim...
1266
  	int mirror = 0;
0f6d02d58   NeilBrown   md: remove typede...
1267
  	struct mirror_info *p;
6c2fce2ef   Neil Brown   Support adding a ...
1268
  	int first = 0;
301946364   NeilBrown   md/raid1: Replac...
1269
  	int last = conf->raid_disks - 1;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1270

5389042ff   NeilBrown   md: change manage...
1271
1272
  	if (mddev->recovery_disabled == conf->recovery_disabled)
  		return -EBUSY;
6c2fce2ef   Neil Brown   Support adding a ...
1273
1274
  	if (rdev->raid_disk >= 0)
  		first = last = rdev->raid_disk;
7ef449d1e   NeilBrown   md/raid1: If ther...
1275
1276
1277
  	for (mirror = first; mirror <= last; mirror++) {
  		p = conf->mirrors+mirror;
  		if (!p->rdev) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1278

8f6c2e4b3   Martin K. Petersen   md: Use new topol...
1279
1280
  			disk_stack_limits(mddev->gendisk, rdev->bdev,
  					  rdev->data_offset << 9);
627a2d3c2   NeilBrown   md: deal with mer...
1281
1282
1283
1284
1285
  			/* as we don't honour merge_bvec_fn, we must
  			 * never risk violating it, so limit
  			 * ->max_segments to one lying with a single
  			 * page, as a one page request is never in
  			 * violation.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1286
  			 */
627a2d3c2   NeilBrown   md: deal with mer...
1287
1288
1289
1290
1291
  			if (rdev->bdev->bd_disk->queue->merge_bvec_fn) {
  				blk_queue_max_segments(mddev->queue, 1);
  				blk_queue_segment_boundary(mddev->queue,
  							   PAGE_CACHE_SIZE - 1);
  			}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1292
1293
1294
  
  			p->head_position = 0;
  			rdev->raid_disk = mirror;
199050ea1   Neil Brown   rationalise retur...
1295
  			err = 0;
6aea114a7   NeilBrown   [PATCH] md: fix -...
1296
1297
1298
1299
  			/* As all devices are equivalent, we don't need a full recovery
  			 * if this was recently any drive of the array
  			 */
  			if (rdev->saved_raid_disk < 0)
41158c7eb   NeilBrown   [PATCH] md: optim...
1300
  				conf->fullsync = 1;
d6065f7bf   Suzanne Wood   [PATCH] md: provi...
1301
  			rcu_assign_pointer(p->rdev, rdev);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1302
1303
  			break;
  		}
7ef449d1e   NeilBrown   md/raid1: If ther...
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
  		if (test_bit(WantReplacement, &p->rdev->flags) &&
  		    p[conf->raid_disks].rdev == NULL) {
  			/* Add this device as a replacement */
  			clear_bit(In_sync, &rdev->flags);
  			set_bit(Replacement, &rdev->flags);
  			rdev->raid_disk = mirror;
  			err = 0;
  			conf->fullsync = 1;
  			rcu_assign_pointer(p[conf->raid_disks].rdev, rdev);
  			break;
  		}
  	}
ac5e7113e   Andre Noll   md: Push down dat...
1316
  	md_integrity_add_rdev(rdev, mddev);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1317
  	print_conf(conf);
199050ea1   Neil Brown   rationalise retur...
1318
  	return err;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1319
  }
b8321b68d   NeilBrown   md: change hot_re...
1320
  static int raid1_remove_disk(struct mddev *mddev, struct md_rdev *rdev)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1321
  {
e80963604   NeilBrown   md/raid1: typedef...
1322
  	struct r1conf *conf = mddev->private;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1323
  	int err = 0;
b8321b68d   NeilBrown   md: change hot_re...
1324
  	int number = rdev->raid_disk;
0f6d02d58   NeilBrown   md: remove typede...
1325
  	struct mirror_info *p = conf->mirrors+ number;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1326

b014f14c8   NeilBrown   md/raid1: Allow a...
1327
1328
  	if (rdev != p->rdev)
  		p = conf->mirrors + conf->raid_disks + number;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1329
  	print_conf(conf);
b8321b68d   NeilBrown   md: change hot_re...
1330
  	if (rdev == p->rdev) {
b2d444d7a   NeilBrown   [PATCH] md: conve...
1331
  		if (test_bit(In_sync, &rdev->flags) ||
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1332
1333
1334
1335
  		    atomic_read(&rdev->nr_pending)) {
  			err = -EBUSY;
  			goto abort;
  		}
046abeede   NeilBrown   md/raid1: fix som...
1336
  		/* Only remove non-faulty devices if recovery
dfc706450   NeilBrown   md: restart recov...
1337
1338
1339
  		 * is not possible.
  		 */
  		if (!test_bit(Faulty, &rdev->flags) &&
5389042ff   NeilBrown   md: change manage...
1340
  		    mddev->recovery_disabled != conf->recovery_disabled &&
dfc706450   NeilBrown   md: restart recov...
1341
1342
1343
1344
  		    mddev->degraded < conf->raid_disks) {
  			err = -EBUSY;
  			goto abort;
  		}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1345
  		p->rdev = NULL;
fbd568a3e   Paul E. McKenney   [PATCH] Change sy...
1346
  		synchronize_rcu();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1347
1348
1349
1350
  		if (atomic_read(&rdev->nr_pending)) {
  			/* lost the race, try later */
  			err = -EBUSY;
  			p->rdev = rdev;
ac5e7113e   Andre Noll   md: Push down dat...
1351
  			goto abort;
8c7a2c2bc   NeilBrown   md/raid1: handle ...
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
  		} else if (conf->mirrors[conf->raid_disks + number].rdev) {
  			/* We just removed a device that is being replaced.
  			 * Move down the replacement.  We drain all IO before
  			 * doing this to avoid confusion.
  			 */
  			struct md_rdev *repl =
  				conf->mirrors[conf->raid_disks + number].rdev;
  			raise_barrier(conf);
  			clear_bit(Replacement, &repl->flags);
  			p->rdev = repl;
  			conf->mirrors[conf->raid_disks + number].rdev = NULL;
  			lower_barrier(conf);
  			clear_bit(WantReplacement, &rdev->flags);
  		} else
b014f14c8   NeilBrown   md/raid1: Allow a...
1366
  			clear_bit(WantReplacement, &rdev->flags);
a91a2785b   Martin K. Petersen   block: Require su...
1367
  		err = md_integrity_register(mddev);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1368
1369
1370
1371
1372
1373
  	}
  abort:
  
  	print_conf(conf);
  	return err;
  }
6712ecf8f   NeilBrown   Drop 'size' argum...
1374
  static void end_sync_read(struct bio *bio, int error)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1375
  {
9f2c9d12b   NeilBrown   md: remove typede...
1376
  	struct r1bio *r1_bio = bio->bi_private;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1377

0fc280f60   NeilBrown   md/raid1/ avoid b...
1378
  	update_head_pos(r1_bio->read_disk, r1_bio);
ba3ae3bee   Namhyung Kim   md/raid1: factor ...
1379

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1380
1381
1382
1383
1384
  	/*
  	 * we have read a block, now it needs to be re-written,
  	 * or re-read if the read failed.
  	 * We don't do much here, just schedule handling by raid1d
  	 */
69382e853   NeilBrown   [PATCH] md: bette...
1385
  	if (test_bit(BIO_UPTODATE, &bio->bi_flags))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1386
  		set_bit(R1BIO_Uptodate, &r1_bio->state);
d11c171e6   NeilBrown   [PATCH] md: allow...
1387
1388
1389
  
  	if (atomic_dec_and_test(&r1_bio->remaining))
  		reschedule_retry(r1_bio);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1390
  }
6712ecf8f   NeilBrown   Drop 'size' argum...
1391
  static void end_sync_write(struct bio *bio, int error)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1392
1393
  {
  	int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
9f2c9d12b   NeilBrown   md: remove typede...
1394
  	struct r1bio *r1_bio = bio->bi_private;
fd01b88c7   NeilBrown   md: remove typede...
1395
  	struct mddev *mddev = r1_bio->mddev;
e80963604   NeilBrown   md/raid1: typedef...
1396
  	struct r1conf *conf = mddev->private;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1397
  	int mirror=0;
4367af556   NeilBrown   md/raid1: clear b...
1398
1399
  	sector_t first_bad;
  	int bad_sectors;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1400

ba3ae3bee   Namhyung Kim   md/raid1: factor ...
1401
  	mirror = find_bio_disk(r1_bio, bio);
6b1117d50   NeilBrown   [PATCH] md: Don't...
1402
  	if (!uptodate) {
57dab0bdf   NeilBrown   md: use sector_t ...
1403
  		sector_t sync_blocks = 0;
6b1117d50   NeilBrown   [PATCH] md: Don't...
1404
1405
1406
1407
  		sector_t s = r1_bio->sector;
  		long sectors_to_go = r1_bio->sectors;
  		/* make sure these bits doesn't get cleared. */
  		do {
5e3db645f   NeilBrown   [PATCH] md: fix u...
1408
  			bitmap_end_sync(mddev->bitmap, s,
6b1117d50   NeilBrown   [PATCH] md: Don't...
1409
1410
1411
1412
  					&sync_blocks, 1);
  			s += sync_blocks;
  			sectors_to_go -= sync_blocks;
  		} while (sectors_to_go > 0);
d8f05d299   NeilBrown   md/raid1: record ...
1413
1414
  		set_bit(WriteErrorSeen,
  			&conf->mirrors[mirror].rdev->flags);
19d671695   NeilBrown   md/raid1: Mark de...
1415
1416
1417
1418
  		if (!test_and_set_bit(WantReplacement,
  				      &conf->mirrors[mirror].rdev->flags))
  			set_bit(MD_RECOVERY_NEEDED, &
  				mddev->recovery);
d8f05d299   NeilBrown   md/raid1: record ...
1419
  		set_bit(R1BIO_WriteError, &r1_bio->state);
4367af556   NeilBrown   md/raid1: clear b...
1420
1421
1422
  	} else if (is_badblock(conf->mirrors[mirror].rdev,
  			       r1_bio->sector,
  			       r1_bio->sectors,
3a9f28a51   NeilBrown   md/raid1: improve...
1423
1424
1425
1426
1427
1428
  			       &first_bad, &bad_sectors) &&
  		   !is_badblock(conf->mirrors[r1_bio->read_disk].rdev,
  				r1_bio->sector,
  				r1_bio->sectors,
  				&first_bad, &bad_sectors)
  		)
4367af556   NeilBrown   md/raid1: clear b...
1429
  		set_bit(R1BIO_MadeGood, &r1_bio->state);
e3b9703e2   NeilBrown   [PATCH] md: yet a...
1430

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1431
  	if (atomic_dec_and_test(&r1_bio->remaining)) {
4367af556   NeilBrown   md/raid1: clear b...
1432
  		int s = r1_bio->sectors;
d8f05d299   NeilBrown   md/raid1: record ...
1433
1434
  		if (test_bit(R1BIO_MadeGood, &r1_bio->state) ||
  		    test_bit(R1BIO_WriteError, &r1_bio->state))
4367af556   NeilBrown   md/raid1: clear b...
1435
1436
1437
1438
1439
  			reschedule_retry(r1_bio);
  		else {
  			put_buf(r1_bio);
  			md_done_sync(mddev, s, uptodate);
  		}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1440
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1441
  }
3cb030020   NeilBrown   md: removing type...
1442
  static int r1_sync_page_io(struct md_rdev *rdev, sector_t sector,
d8f05d299   NeilBrown   md/raid1: record ...
1443
1444
1445
1446
1447
  			    int sectors, struct page *page, int rw)
  {
  	if (sync_page_io(rdev, sector, sectors << 9, page, rw, false))
  		/* success */
  		return 1;
19d671695   NeilBrown   md/raid1: Mark de...
1448
  	if (rw == WRITE) {
d8f05d299   NeilBrown   md/raid1: record ...
1449
  		set_bit(WriteErrorSeen, &rdev->flags);
19d671695   NeilBrown   md/raid1: Mark de...
1450
1451
1452
1453
1454
  		if (!test_and_set_bit(WantReplacement,
  				      &rdev->flags))
  			set_bit(MD_RECOVERY_NEEDED, &
  				rdev->mddev->recovery);
  	}
d8f05d299   NeilBrown   md/raid1: record ...
1455
1456
1457
1458
1459
  	/* need to record an error - either for the block or the device */
  	if (!rdev_set_badblocks(rdev, sector, sectors, 0))
  		md_error(rdev->mddev, rdev);
  	return 0;
  }
9f2c9d12b   NeilBrown   md: remove typede...
1460
  static int fix_sync_read_error(struct r1bio *r1_bio)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1461
  {
a68e58703   NeilBrown   md/raid1: split o...
1462
1463
1464
1465
1466
1467
1468
  	/* Try some synchronous reads of other devices to get
  	 * good data, much like with normal read errors.  Only
  	 * read into the pages we already have so we don't
  	 * need to re-issue the read request.
  	 * We don't need to freeze the array, because being in an
  	 * active sync request, there is no normal IO, and
  	 * no overlapping syncs.
06f603851   NeilBrown   md/raid1: avoid r...
1469
1470
1471
  	 * We don't need to check is_badblock() again as we
  	 * made sure that anything with a bad block in range
  	 * will have bi_end_io clear.
a68e58703   NeilBrown   md/raid1: split o...
1472
  	 */
fd01b88c7   NeilBrown   md: remove typede...
1473
  	struct mddev *mddev = r1_bio->mddev;
e80963604   NeilBrown   md/raid1: typedef...
1474
  	struct r1conf *conf = mddev->private;
a68e58703   NeilBrown   md/raid1: split o...
1475
1476
1477
1478
1479
1480
1481
1482
1483
  	struct bio *bio = r1_bio->bios[r1_bio->read_disk];
  	sector_t sect = r1_bio->sector;
  	int sectors = r1_bio->sectors;
  	int idx = 0;
  
  	while(sectors) {
  		int s = sectors;
  		int d = r1_bio->read_disk;
  		int success = 0;
3cb030020   NeilBrown   md: removing type...
1484
  		struct md_rdev *rdev;
78d7f5f72   NeilBrown   md/raid1: tidy up...
1485
  		int start;
a68e58703   NeilBrown   md/raid1: split o...
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
  
  		if (s > (PAGE_SIZE>>9))
  			s = PAGE_SIZE >> 9;
  		do {
  			if (r1_bio->bios[d]->bi_end_io == end_sync_read) {
  				/* No rcu protection needed here devices
  				 * can only be removed when no resync is
  				 * active, and resync is currently active
  				 */
  				rdev = conf->mirrors[d].rdev;
9d3d80113   Namhyung Kim   md/raid1: move rd...
1496
  				if (sync_page_io(rdev, sect, s<<9,
a68e58703   NeilBrown   md/raid1: split o...
1497
1498
1499
1500
1501
1502
1503
  						 bio->bi_io_vec[idx].bv_page,
  						 READ, false)) {
  					success = 1;
  					break;
  				}
  			}
  			d++;
8f19ccb2f   NeilBrown   md/raid1: Allocat...
1504
  			if (d == conf->raid_disks * 2)
a68e58703   NeilBrown   md/raid1: split o...
1505
1506
  				d = 0;
  		} while (!success && d != r1_bio->read_disk);
78d7f5f72   NeilBrown   md/raid1: tidy up...
1507
  		if (!success) {
a68e58703   NeilBrown   md/raid1: split o...
1508
  			char b[BDEVNAME_SIZE];
3a9f28a51   NeilBrown   md/raid1: improve...
1509
1510
1511
1512
1513
1514
  			int abort = 0;
  			/* Cannot read from anywhere, this block is lost.
  			 * Record a bad block on each device.  If that doesn't
  			 * work just disable and interrupt the recovery.
  			 * Don't fail devices as that won't really help.
  			 */
a68e58703   NeilBrown   md/raid1: split o...
1515
1516
1517
1518
1519
1520
  			printk(KERN_ALERT "md/raid1:%s: %s: unrecoverable I/O read error"
  			       " for block %llu
  ",
  			       mdname(mddev),
  			       bdevname(bio->bi_bdev, b),
  			       (unsigned long long)r1_bio->sector);
8f19ccb2f   NeilBrown   md/raid1: Allocat...
1521
  			for (d = 0; d < conf->raid_disks * 2; d++) {
3a9f28a51   NeilBrown   md/raid1: improve...
1522
1523
1524
1525
1526
1527
1528
  				rdev = conf->mirrors[d].rdev;
  				if (!rdev || test_bit(Faulty, &rdev->flags))
  					continue;
  				if (!rdev_set_badblocks(rdev, sect, s, 0))
  					abort = 1;
  			}
  			if (abort) {
d890fa2b0   NeilBrown   md: Fix some bugs...
1529
1530
  				conf->recovery_disabled =
  					mddev->recovery_disabled;
3a9f28a51   NeilBrown   md/raid1: improve...
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
  				set_bit(MD_RECOVERY_INTR, &mddev->recovery);
  				md_done_sync(mddev, r1_bio->sectors, 0);
  				put_buf(r1_bio);
  				return 0;
  			}
  			/* Try next page */
  			sectors -= s;
  			sect += s;
  			idx++;
  			continue;
d11c171e6   NeilBrown   [PATCH] md: allow...
1541
  		}
78d7f5f72   NeilBrown   md/raid1: tidy up...
1542
1543
1544
1545
1546
  
  		start = d;
  		/* write it back and re-read */
  		while (d != r1_bio->read_disk) {
  			if (d == 0)
8f19ccb2f   NeilBrown   md/raid1: Allocat...
1547
  				d = conf->raid_disks * 2;
78d7f5f72   NeilBrown   md/raid1: tidy up...
1548
1549
1550
1551
  			d--;
  			if (r1_bio->bios[d]->bi_end_io != end_sync_read)
  				continue;
  			rdev = conf->mirrors[d].rdev;
d8f05d299   NeilBrown   md/raid1: record ...
1552
1553
1554
  			if (r1_sync_page_io(rdev, sect, s,
  					    bio->bi_io_vec[idx].bv_page,
  					    WRITE) == 0) {
78d7f5f72   NeilBrown   md/raid1: tidy up...
1555
1556
  				r1_bio->bios[d]->bi_end_io = NULL;
  				rdev_dec_pending(rdev, mddev);
9d3d80113   Namhyung Kim   md/raid1: move rd...
1557
  			}
78d7f5f72   NeilBrown   md/raid1: tidy up...
1558
1559
1560
1561
  		}
  		d = start;
  		while (d != r1_bio->read_disk) {
  			if (d == 0)
8f19ccb2f   NeilBrown   md/raid1: Allocat...
1562
  				d = conf->raid_disks * 2;
78d7f5f72   NeilBrown   md/raid1: tidy up...
1563
1564
1565
1566
  			d--;
  			if (r1_bio->bios[d]->bi_end_io != end_sync_read)
  				continue;
  			rdev = conf->mirrors[d].rdev;
d8f05d299   NeilBrown   md/raid1: record ...
1567
1568
1569
  			if (r1_sync_page_io(rdev, sect, s,
  					    bio->bi_io_vec[idx].bv_page,
  					    READ) != 0)
9d3d80113   Namhyung Kim   md/raid1: move rd...
1570
  				atomic_add(s, &rdev->corrected_errors);
78d7f5f72   NeilBrown   md/raid1: tidy up...
1571
  		}
a68e58703   NeilBrown   md/raid1: split o...
1572
1573
1574
1575
  		sectors -= s;
  		sect += s;
  		idx ++;
  	}
78d7f5f72   NeilBrown   md/raid1: tidy up...
1576
  	set_bit(R1BIO_Uptodate, &r1_bio->state);
7ca78d57d   NeilBrown   md/raid1: try fix...
1577
  	set_bit(BIO_UPTODATE, &bio->bi_flags);
a68e58703   NeilBrown   md/raid1: split o...
1578
1579
  	return 1;
  }
9f2c9d12b   NeilBrown   md: remove typede...
1580
  static int process_checks(struct r1bio *r1_bio)
a68e58703   NeilBrown   md/raid1: split o...
1581
1582
1583
1584
1585
1586
1587
1588
  {
  	/* We have read all readable devices.  If we haven't
  	 * got the block, then there is no hope left.
  	 * If we have, then we want to do a comparison
  	 * and skip the write if everything is the same.
  	 * If any blocks failed to read, then we need to
  	 * attempt an over-write
  	 */
fd01b88c7   NeilBrown   md: remove typede...
1589
  	struct mddev *mddev = r1_bio->mddev;
e80963604   NeilBrown   md/raid1: typedef...
1590
  	struct r1conf *conf = mddev->private;
a68e58703   NeilBrown   md/raid1: split o...
1591
1592
  	int primary;
  	int i;
8f19ccb2f   NeilBrown   md/raid1: Allocat...
1593
  	for (primary = 0; primary < conf->raid_disks * 2; primary++)
a68e58703   NeilBrown   md/raid1: split o...
1594
1595
1596
1597
1598
1599
1600
  		if (r1_bio->bios[primary]->bi_end_io == end_sync_read &&
  		    test_bit(BIO_UPTODATE, &r1_bio->bios[primary]->bi_flags)) {
  			r1_bio->bios[primary]->bi_end_io = NULL;
  			rdev_dec_pending(conf->mirrors[primary].rdev, mddev);
  			break;
  		}
  	r1_bio->read_disk = primary;
8f19ccb2f   NeilBrown   md/raid1: Allocat...
1601
  	for (i = 0; i < conf->raid_disks * 2; i++) {
78d7f5f72   NeilBrown   md/raid1: tidy up...
1602
1603
1604
1605
1606
  		int j;
  		int vcnt = r1_bio->sectors >> (PAGE_SHIFT- 9);
  		struct bio *pbio = r1_bio->bios[primary];
  		struct bio *sbio = r1_bio->bios[i];
  		int size;
a68e58703   NeilBrown   md/raid1: split o...
1607

78d7f5f72   NeilBrown   md/raid1: tidy up...
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
  		if (r1_bio->bios[i]->bi_end_io != end_sync_read)
  			continue;
  
  		if (test_bit(BIO_UPTODATE, &sbio->bi_flags)) {
  			for (j = vcnt; j-- ; ) {
  				struct page *p, *s;
  				p = pbio->bi_io_vec[j].bv_page;
  				s = sbio->bi_io_vec[j].bv_page;
  				if (memcmp(page_address(p),
  					   page_address(s),
  					   PAGE_SIZE))
  					break;
69382e853   NeilBrown   [PATCH] md: bette...
1620
  			}
78d7f5f72   NeilBrown   md/raid1: tidy up...
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
  		} else
  			j = 0;
  		if (j >= 0)
  			mddev->resync_mismatches += r1_bio->sectors;
  		if (j < 0 || (test_bit(MD_RECOVERY_CHECK, &mddev->recovery)
  			      && test_bit(BIO_UPTODATE, &sbio->bi_flags))) {
  			/* No need to write to this device. */
  			sbio->bi_end_io = NULL;
  			rdev_dec_pending(conf->mirrors[i].rdev, mddev);
  			continue;
  		}
  		/* fixup the bio for reuse */
  		sbio->bi_vcnt = vcnt;
  		sbio->bi_size = r1_bio->sectors << 9;
  		sbio->bi_idx = 0;
  		sbio->bi_phys_segments = 0;
  		sbio->bi_flags &= ~(BIO_POOL_MASK - 1);
  		sbio->bi_flags |= 1 << BIO_UPTODATE;
  		sbio->bi_next = NULL;
  		sbio->bi_sector = r1_bio->sector +
  			conf->mirrors[i].rdev->data_offset;
  		sbio->bi_bdev = conf->mirrors[i].rdev->bdev;
  		size = sbio->bi_size;
  		for (j = 0; j < vcnt ; j++) {
  			struct bio_vec *bi;
  			bi = &sbio->bi_io_vec[j];
  			bi->bv_offset = 0;
  			if (size > PAGE_SIZE)
  				bi->bv_len = PAGE_SIZE;
  			else
  				bi->bv_len = size;
  			size -= PAGE_SIZE;
  			memcpy(page_address(bi->bv_page),
  			       page_address(pbio->bi_io_vec[j].bv_page),
  			       PAGE_SIZE);
69382e853   NeilBrown   [PATCH] md: bette...
1656
  		}
78d7f5f72   NeilBrown   md/raid1: tidy up...
1657
  	}
a68e58703   NeilBrown   md/raid1: split o...
1658
1659
  	return 0;
  }
9f2c9d12b   NeilBrown   md: remove typede...
1660
  static void sync_request_write(struct mddev *mddev, struct r1bio *r1_bio)
a68e58703   NeilBrown   md/raid1: split o...
1661
  {
e80963604   NeilBrown   md/raid1: typedef...
1662
  	struct r1conf *conf = mddev->private;
a68e58703   NeilBrown   md/raid1: split o...
1663
  	int i;
8f19ccb2f   NeilBrown   md/raid1: Allocat...
1664
  	int disks = conf->raid_disks * 2;
a68e58703   NeilBrown   md/raid1: split o...
1665
1666
1667
  	struct bio *bio, *wbio;
  
  	bio = r1_bio->bios[r1_bio->read_disk];
a68e58703   NeilBrown   md/raid1: split o...
1668
1669
1670
1671
  	if (!test_bit(R1BIO_Uptodate, &r1_bio->state))
  		/* ouch - failed to read all of that. */
  		if (!fix_sync_read_error(r1_bio))
  			return;
7ca78d57d   NeilBrown   md/raid1: try fix...
1672
1673
1674
1675
  
  	if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
  		if (process_checks(r1_bio) < 0)
  			return;
d11c171e6   NeilBrown   [PATCH] md: allow...
1676
1677
1678
  	/*
  	 * schedule writes
  	 */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1679
1680
1681
  	atomic_set(&r1_bio->remaining, 1);
  	for (i = 0; i < disks ; i++) {
  		wbio = r1_bio->bios[i];
3e198f782   NeilBrown   [PATCH] md: tidyu...
1682
1683
1684
1685
  		if (wbio->bi_end_io == NULL ||
  		    (wbio->bi_end_io == end_sync_read &&
  		     (i == r1_bio->read_disk ||
  		      !test_bit(MD_RECOVERY_SYNC, &mddev->recovery))))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1686
  			continue;
3e198f782   NeilBrown   [PATCH] md: tidyu...
1687
1688
  		wbio->bi_rw = WRITE;
  		wbio->bi_end_io = end_sync_write;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1689
1690
  		atomic_inc(&r1_bio->remaining);
  		md_sync_acct(conf->mirrors[i].rdev->bdev, wbio->bi_size >> 9);
191ea9b2c   NeilBrown   [PATCH] md: raid1...
1691

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1692
1693
1694
1695
  		generic_make_request(wbio);
  	}
  
  	if (atomic_dec_and_test(&r1_bio->remaining)) {
191ea9b2c   NeilBrown   [PATCH] md: raid1...
1696
  		/* if we're here, all write(s) have completed, so clean up */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
  		md_done_sync(mddev, r1_bio->sectors, 1);
  		put_buf(r1_bio);
  	}
  }
  
  /*
   * This is a kernel thread which:
   *
   *	1.	Retries failed read operations on working mirrors.
   *	2.	Updates the raid superblock when problems encounter.
d2eb35acf   NeilBrown   md/raid1: avoid r...
1707
   *	3.	Performs writes following reads for array synchronising.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1708
   */
e80963604   NeilBrown   md/raid1: typedef...
1709
  static void fix_read_error(struct r1conf *conf, int read_disk,
867868fb5   NeilBrown   [PATCH] md: Facto...
1710
1711
  			   sector_t sect, int sectors)
  {
fd01b88c7   NeilBrown   md: remove typede...
1712
  	struct mddev *mddev = conf->mddev;
867868fb5   NeilBrown   [PATCH] md: Facto...
1713
1714
1715
1716
1717
  	while(sectors) {
  		int s = sectors;
  		int d = read_disk;
  		int success = 0;
  		int start;
3cb030020   NeilBrown   md: removing type...
1718
  		struct md_rdev *rdev;
867868fb5   NeilBrown   [PATCH] md: Facto...
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
  
  		if (s > (PAGE_SIZE>>9))
  			s = PAGE_SIZE >> 9;
  
  		do {
  			/* Note: no rcu protection needed here
  			 * as this is synchronous in the raid1d thread
  			 * which is the thread that might remove
  			 * a device.  If raid1d ever becomes multi-threaded....
  			 */
d2eb35acf   NeilBrown   md/raid1: avoid r...
1729
1730
  			sector_t first_bad;
  			int bad_sectors;
867868fb5   NeilBrown   [PATCH] md: Facto...
1731
1732
1733
  			rdev = conf->mirrors[d].rdev;
  			if (rdev &&
  			    test_bit(In_sync, &rdev->flags) &&
d2eb35acf   NeilBrown   md/raid1: avoid r...
1734
1735
  			    is_badblock(rdev, sect, s,
  					&first_bad, &bad_sectors) == 0 &&
ccebd4c41   Jonathan Brassow   md-new-param-to_s...
1736
1737
  			    sync_page_io(rdev, sect, s<<9,
  					 conf->tmppage, READ, false))
867868fb5   NeilBrown   [PATCH] md: Facto...
1738
1739
1740
  				success = 1;
  			else {
  				d++;
8f19ccb2f   NeilBrown   md/raid1: Allocat...
1741
  				if (d == conf->raid_disks * 2)
867868fb5   NeilBrown   [PATCH] md: Facto...
1742
1743
1744
1745
1746
  					d = 0;
  			}
  		} while (!success && d != read_disk);
  
  		if (!success) {
d8f05d299   NeilBrown   md/raid1: record ...
1747
  			/* Cannot read from anywhere - mark it bad */
3cb030020   NeilBrown   md: removing type...
1748
  			struct md_rdev *rdev = conf->mirrors[read_disk].rdev;
d8f05d299   NeilBrown   md/raid1: record ...
1749
1750
  			if (!rdev_set_badblocks(rdev, sect, s, 0))
  				md_error(mddev, rdev);
867868fb5   NeilBrown   [PATCH] md: Facto...
1751
1752
1753
1754
1755
1756
  			break;
  		}
  		/* write it back and re-read */
  		start = d;
  		while (d != read_disk) {
  			if (d==0)
8f19ccb2f   NeilBrown   md/raid1: Allocat...
1757
  				d = conf->raid_disks * 2;
867868fb5   NeilBrown   [PATCH] md: Facto...
1758
1759
1760
  			d--;
  			rdev = conf->mirrors[d].rdev;
  			if (rdev &&
d8f05d299   NeilBrown   md/raid1: record ...
1761
1762
1763
  			    test_bit(In_sync, &rdev->flags))
  				r1_sync_page_io(rdev, sect, s,
  						conf->tmppage, WRITE);
867868fb5   NeilBrown   [PATCH] md: Facto...
1764
1765
1766
1767
1768
  		}
  		d = start;
  		while (d != read_disk) {
  			char b[BDEVNAME_SIZE];
  			if (d==0)
8f19ccb2f   NeilBrown   md/raid1: Allocat...
1769
  				d = conf->raid_disks * 2;
867868fb5   NeilBrown   [PATCH] md: Facto...
1770
1771
1772
1773
  			d--;
  			rdev = conf->mirrors[d].rdev;
  			if (rdev &&
  			    test_bit(In_sync, &rdev->flags)) {
d8f05d299   NeilBrown   md/raid1: record ...
1774
1775
  				if (r1_sync_page_io(rdev, sect, s,
  						    conf->tmppage, READ)) {
867868fb5   NeilBrown   [PATCH] md: Facto...
1776
1777
  					atomic_add(s, &rdev->corrected_errors);
  					printk(KERN_INFO
9dd1e2faf   NeilBrown   md/raid1: improve...
1778
  					       "md/raid1:%s: read error corrected "
867868fb5   NeilBrown   [PATCH] md: Facto...
1779
1780
1781
  					       "(%d sectors at %llu on %s)
  ",
  					       mdname(mddev), s,
969b755aa   Randy Dunlap   [PATCH] md: fix p...
1782
1783
  					       (unsigned long long)(sect +
  					           rdev->data_offset),
867868fb5   NeilBrown   [PATCH] md: Facto...
1784
1785
1786
1787
1788
1789
1790
1791
  					       bdevname(rdev->bdev, b));
  				}
  			}
  		}
  		sectors -= s;
  		sect += s;
  	}
  }
cd5ff9a16   NeilBrown   md/raid1: Handle...
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
  static void bi_complete(struct bio *bio, int error)
  {
  	complete((struct completion *)bio->bi_private);
  }
  
  static int submit_bio_wait(int rw, struct bio *bio)
  {
  	struct completion event;
  	rw |= REQ_SYNC;
  
  	init_completion(&event);
  	bio->bi_private = &event;
  	bio->bi_end_io = bi_complete;
  	submit_bio(rw, bio);
  	wait_for_completion(&event);
  
  	return test_bit(BIO_UPTODATE, &bio->bi_flags);
  }
9f2c9d12b   NeilBrown   md: remove typede...
1810
  static int narrow_write_error(struct r1bio *r1_bio, int i)
cd5ff9a16   NeilBrown   md/raid1: Handle...
1811
  {
fd01b88c7   NeilBrown   md: remove typede...
1812
  	struct mddev *mddev = r1_bio->mddev;
e80963604   NeilBrown   md/raid1: typedef...
1813
  	struct r1conf *conf = mddev->private;
3cb030020   NeilBrown   md: removing type...
1814
  	struct md_rdev *rdev = conf->mirrors[i].rdev;
cd5ff9a16   NeilBrown   md/raid1: Handle...
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
  	int vcnt, idx;
  	struct bio_vec *vec;
  
  	/* bio has the data to be written to device 'i' where
  	 * we just recently had a write error.
  	 * We repeatedly clone the bio and trim down to one block,
  	 * then try the write.  Where the write fails we record
  	 * a bad block.
  	 * It is conceivable that the bio doesn't exactly align with
  	 * blocks.  We must handle this somehow.
  	 *
  	 * We currently own a reference on the rdev.
  	 */
  
  	int block_sectors;
  	sector_t sector;
  	int sectors;
  	int sect_to_write = r1_bio->sectors;
  	int ok = 1;
  
  	if (rdev->badblocks.shift < 0)
  		return 0;
  
  	block_sectors = 1 << rdev->badblocks.shift;
  	sector = r1_bio->sector;
  	sectors = ((sector + block_sectors)
  		   & ~(sector_t)(block_sectors - 1))
  		- sector;
  
  	if (test_bit(R1BIO_BehindIO, &r1_bio->state)) {
  		vcnt = r1_bio->behind_page_count;
  		vec = r1_bio->behind_bvecs;
  		idx = 0;
  		while (vec[idx].bv_page == NULL)
  			idx++;
  	} else {
  		vcnt = r1_bio->master_bio->bi_vcnt;
  		vec = r1_bio->master_bio->bi_io_vec;
  		idx = r1_bio->master_bio->bi_idx;
  	}
  	while (sect_to_write) {
  		struct bio *wbio;
  		if (sectors > sect_to_write)
  			sectors = sect_to_write;
  		/* Write at 'sector' for 'sectors'*/
  
  		wbio = bio_alloc_mddev(GFP_NOIO, vcnt, mddev);
  		memcpy(wbio->bi_io_vec, vec, vcnt * sizeof(struct bio_vec));
  		wbio->bi_sector = r1_bio->sector;
  		wbio->bi_rw = WRITE;
  		wbio->bi_vcnt = vcnt;
  		wbio->bi_size = r1_bio->sectors << 9;
  		wbio->bi_idx = idx;
  
  		md_trim_bio(wbio, sector - r1_bio->sector, sectors);
  		wbio->bi_sector += rdev->data_offset;
  		wbio->bi_bdev = rdev->bdev;
  		if (submit_bio_wait(WRITE, wbio) == 0)
  			/* failure! */
  			ok = rdev_set_badblocks(rdev, sector,
  						sectors, 0)
  				&& ok;
  
  		bio_put(wbio);
  		sect_to_write -= sectors;
  		sector += sectors;
  		sectors = block_sectors;
  	}
  	return ok;
  }
e80963604   NeilBrown   md/raid1: typedef...
1885
  static void handle_sync_write_finished(struct r1conf *conf, struct r1bio *r1_bio)
62096bce2   NeilBrown   md/raid1: factor ...
1886
1887
1888
  {
  	int m;
  	int s = r1_bio->sectors;
8f19ccb2f   NeilBrown   md/raid1: Allocat...
1889
  	for (m = 0; m < conf->raid_disks * 2 ; m++) {
3cb030020   NeilBrown   md: removing type...
1890
  		struct md_rdev *rdev = conf->mirrors[m].rdev;
62096bce2   NeilBrown   md/raid1: factor ...
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
  		struct bio *bio = r1_bio->bios[m];
  		if (bio->bi_end_io == NULL)
  			continue;
  		if (test_bit(BIO_UPTODATE, &bio->bi_flags) &&
  		    test_bit(R1BIO_MadeGood, &r1_bio->state)) {
  			rdev_clear_badblocks(rdev, r1_bio->sector, s);
  		}
  		if (!test_bit(BIO_UPTODATE, &bio->bi_flags) &&
  		    test_bit(R1BIO_WriteError, &r1_bio->state)) {
  			if (!rdev_set_badblocks(rdev, r1_bio->sector, s, 0))
  				md_error(conf->mddev, rdev);
  		}
  	}
  	put_buf(r1_bio);
  	md_done_sync(conf->mddev, s, 1);
  }
e80963604   NeilBrown   md/raid1: typedef...
1907
  static void handle_write_finished(struct r1conf *conf, struct r1bio *r1_bio)
62096bce2   NeilBrown   md/raid1: factor ...
1908
1909
  {
  	int m;
8f19ccb2f   NeilBrown   md/raid1: Allocat...
1910
  	for (m = 0; m < conf->raid_disks * 2 ; m++)
62096bce2   NeilBrown   md/raid1: factor ...
1911
  		if (r1_bio->bios[m] == IO_MADE_GOOD) {
3cb030020   NeilBrown   md: removing type...
1912
  			struct md_rdev *rdev = conf->mirrors[m].rdev;
62096bce2   NeilBrown   md/raid1: factor ...
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
  			rdev_clear_badblocks(rdev,
  					     r1_bio->sector,
  					     r1_bio->sectors);
  			rdev_dec_pending(rdev, conf->mddev);
  		} else if (r1_bio->bios[m] != NULL) {
  			/* This drive got a write error.  We need to
  			 * narrow down and record precise write
  			 * errors.
  			 */
  			if (!narrow_write_error(r1_bio, m)) {
  				md_error(conf->mddev,
  					 conf->mirrors[m].rdev);
  				/* an I/O failed, we can't clear the bitmap */
  				set_bit(R1BIO_Degraded, &r1_bio->state);
  			}
  			rdev_dec_pending(conf->mirrors[m].rdev,
  					 conf->mddev);
  		}
  	if (test_bit(R1BIO_WriteError, &r1_bio->state))
  		close_write(r1_bio);
  	raid_end_bio_io(r1_bio);
  }
e80963604   NeilBrown   md/raid1: typedef...
1935
  static void handle_read_error(struct r1conf *conf, struct r1bio *r1_bio)
62096bce2   NeilBrown   md/raid1: factor ...
1936
1937
1938
  {
  	int disk;
  	int max_sectors;
fd01b88c7   NeilBrown   md: remove typede...
1939
  	struct mddev *mddev = conf->mddev;
62096bce2   NeilBrown   md/raid1: factor ...
1940
1941
  	struct bio *bio;
  	char b[BDEVNAME_SIZE];
3cb030020   NeilBrown   md: removing type...
1942
  	struct md_rdev *rdev;
62096bce2   NeilBrown   md/raid1: factor ...
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
  
  	clear_bit(R1BIO_ReadError, &r1_bio->state);
  	/* we got a read error. Maybe the drive is bad.  Maybe just
  	 * the block and we can fix it.
  	 * We freeze all other IO, and try reading the block from
  	 * other devices.  When we find one, we re-write
  	 * and check it that fixes the read error.
  	 * This is all done synchronously while the array is
  	 * frozen
  	 */
  	if (mddev->ro == 0) {
  		freeze_array(conf);
  		fix_read_error(conf, r1_bio->read_disk,
  			       r1_bio->sector, r1_bio->sectors);
  		unfreeze_array(conf);
  	} else
  		md_error(mddev, conf->mirrors[r1_bio->read_disk].rdev);
  
  	bio = r1_bio->bios[r1_bio->read_disk];
  	bdevname(bio->bi_bdev, b);
  read_more:
  	disk = read_balance(conf, r1_bio, &max_sectors);
  	if (disk == -1) {
  		printk(KERN_ALERT "md/raid1:%s: %s: unrecoverable I/O"
  		       " read error for block %llu
  ",
  		       mdname(mddev), b, (unsigned long long)r1_bio->sector);
  		raid_end_bio_io(r1_bio);
  	} else {
  		const unsigned long do_sync
  			= r1_bio->master_bio->bi_rw & REQ_SYNC;
  		if (bio) {
  			r1_bio->bios[r1_bio->read_disk] =
  				mddev->ro ? IO_BLOCKED : NULL;
  			bio_put(bio);
  		}
  		r1_bio->read_disk = disk;
  		bio = bio_clone_mddev(r1_bio->master_bio, GFP_NOIO, mddev);
  		md_trim_bio(bio, r1_bio->sector - bio->bi_sector, max_sectors);
  		r1_bio->bios[r1_bio->read_disk] = bio;
  		rdev = conf->mirrors[disk].rdev;
  		printk_ratelimited(KERN_ERR
  				   "md/raid1:%s: redirecting sector %llu"
  				   " to other mirror: %s
  ",
  				   mdname(mddev),
  				   (unsigned long long)r1_bio->sector,
  				   bdevname(rdev->bdev, b));
  		bio->bi_sector = r1_bio->sector + rdev->data_offset;
  		bio->bi_bdev = rdev->bdev;
  		bio->bi_end_io = raid1_end_read_request;
  		bio->bi_rw = READ | do_sync;
  		bio->bi_private = r1_bio;
  		if (max_sectors < r1_bio->sectors) {
  			/* Drat - have to split this up more */
  			struct bio *mbio = r1_bio->master_bio;
  			int sectors_handled = (r1_bio->sector + max_sectors
  					       - mbio->bi_sector);
  			r1_bio->sectors = max_sectors;
  			spin_lock_irq(&conf->device_lock);
  			if (mbio->bi_phys_segments == 0)
  				mbio->bi_phys_segments = 2;
  			else
  				mbio->bi_phys_segments++;
  			spin_unlock_irq(&conf->device_lock);
  			generic_make_request(bio);
  			bio = NULL;
  
  			r1_bio = mempool_alloc(conf->r1bio_pool, GFP_NOIO);
  
  			r1_bio->master_bio = mbio;
  			r1_bio->sectors = (mbio->bi_size >> 9)
  					  - sectors_handled;
  			r1_bio->state = 0;
  			set_bit(R1BIO_ReadError, &r1_bio->state);
  			r1_bio->mddev = mddev;
  			r1_bio->sector = mbio->bi_sector + sectors_handled;
  
  			goto read_more;
  		} else
  			generic_make_request(bio);
  	}
  }
fd01b88c7   NeilBrown   md: remove typede...
2026
  static void raid1d(struct mddev *mddev)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2027
  {
9f2c9d12b   NeilBrown   md: remove typede...
2028
  	struct r1bio *r1_bio;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2029
  	unsigned long flags;
e80963604   NeilBrown   md/raid1: typedef...
2030
  	struct r1conf *conf = mddev->private;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2031
  	struct list_head *head = &conf->retry_list;
e1dfa0a29   NeilBrown   md: use new plugg...
2032
  	struct blk_plug plug;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2033
2034
  
  	md_check_recovery(mddev);
e1dfa0a29   NeilBrown   md: use new plugg...
2035
2036
  
  	blk_start_plug(&plug);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2037
  	for (;;) {
191ea9b2c   NeilBrown   [PATCH] md: raid1...
2038

c3b328ac8   NeilBrown   md: fix up raid1/...
2039
2040
  		if (atomic_read(&mddev->plug_cnt) == 0)
  			flush_pending_writes(conf);
191ea9b2c   NeilBrown   [PATCH] md: raid1...
2041

a35e63efa   NeilBrown   md: fix deadlock ...
2042
2043
2044
  		spin_lock_irqsave(&conf->device_lock, flags);
  		if (list_empty(head)) {
  			spin_unlock_irqrestore(&conf->device_lock, flags);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2045
  			break;
a35e63efa   NeilBrown   md: fix deadlock ...
2046
  		}
9f2c9d12b   NeilBrown   md: remove typede...
2047
  		r1_bio = list_entry(head->prev, struct r1bio, retry_list);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2048
  		list_del(head->prev);
ddaf22aba   NeilBrown   [PATCH] md: attem...
2049
  		conf->nr_queued--;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2050
2051
2052
  		spin_unlock_irqrestore(&conf->device_lock, flags);
  
  		mddev = r1_bio->mddev;
070ec55d0   NeilBrown   md: remove mddev_...
2053
  		conf = mddev->private;
4367af556   NeilBrown   md/raid1: clear b...
2054
  		if (test_bit(R1BIO_IsSync, &r1_bio->state)) {
d8f05d299   NeilBrown   md/raid1: record ...
2055
  			if (test_bit(R1BIO_MadeGood, &r1_bio->state) ||
62096bce2   NeilBrown   md/raid1: factor ...
2056
2057
2058
  			    test_bit(R1BIO_WriteError, &r1_bio->state))
  				handle_sync_write_finished(conf, r1_bio);
  			else
4367af556   NeilBrown   md/raid1: clear b...
2059
  				sync_request_write(mddev, r1_bio);
cd5ff9a16   NeilBrown   md/raid1: Handle...
2060
  		} else if (test_bit(R1BIO_MadeGood, &r1_bio->state) ||
62096bce2   NeilBrown   md/raid1: factor ...
2061
2062
2063
2064
2065
  			   test_bit(R1BIO_WriteError, &r1_bio->state))
  			handle_write_finished(conf, r1_bio);
  		else if (test_bit(R1BIO_ReadError, &r1_bio->state))
  			handle_read_error(conf, r1_bio);
  		else
d2eb35acf   NeilBrown   md/raid1: avoid r...
2066
2067
2068
2069
  			/* just a partial read to be scheduled from separate
  			 * context
  			 */
  			generic_make_request(r1_bio->bios[r1_bio->read_disk]);
62096bce2   NeilBrown   md/raid1: factor ...
2070

1d9d52416   NeilBrown   md/raid1/raid10: ...
2071
  		cond_resched();
de393cdea   NeilBrown   md: make it easie...
2072
2073
  		if (mddev->flags & ~(1<<MD_CHANGE_PENDING))
  			md_check_recovery(mddev);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2074
  	}
e1dfa0a29   NeilBrown   md: use new plugg...
2075
  	blk_finish_plug(&plug);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2076
  }
e80963604   NeilBrown   md/raid1: typedef...
2077
  static int init_resync(struct r1conf *conf)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2078
2079
2080
2081
  {
  	int buffs;
  
  	buffs = RESYNC_WINDOW / RESYNC_BLOCK_SIZE;
9e77c485f   Eric Sesterhenn   BUG_ON() Conversi...
2082
  	BUG_ON(conf->r1buf_pool);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
  	conf->r1buf_pool = mempool_create(buffs, r1buf_pool_alloc, r1buf_pool_free,
  					  conf->poolinfo);
  	if (!conf->r1buf_pool)
  		return -ENOMEM;
  	conf->next_resync = 0;
  	return 0;
  }
  
  /*
   * perform a "sync" on one "block"
   *
   * We need to make sure that no normal I/O request - particularly write
   * requests - conflict with active sync requests.
   *
   * This is achieved by tracking pending requests and a 'barrier' concept
   * that can be installed to exclude normal IO requests.
   */
fd01b88c7   NeilBrown   md: remove typede...
2100
  static sector_t sync_request(struct mddev *mddev, sector_t sector_nr, int *skipped, int go_faster)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2101
  {
e80963604   NeilBrown   md/raid1: typedef...
2102
  	struct r1conf *conf = mddev->private;
9f2c9d12b   NeilBrown   md: remove typede...
2103
  	struct r1bio *r1_bio;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2104
2105
  	struct bio *bio;
  	sector_t max_sector, nr_sectors;
3e198f782   NeilBrown   [PATCH] md: tidyu...
2106
  	int disk = -1;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2107
  	int i;
3e198f782   NeilBrown   [PATCH] md: tidyu...
2108
2109
  	int wonly = -1;
  	int write_targets = 0, read_targets = 0;
57dab0bdf   NeilBrown   md: use sector_t ...
2110
  	sector_t sync_blocks;
e3b9703e2   NeilBrown   [PATCH] md: yet a...
2111
  	int still_degraded = 0;
06f603851   NeilBrown   md/raid1: avoid r...
2112
2113
  	int good_sectors = RESYNC_SECTORS;
  	int min_bad = 0; /* number of sectors that are bad in all devices */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2114
2115
2116
  
  	if (!conf->r1buf_pool)
  		if (init_resync(conf))
57afd89f9   NeilBrown   [PATCH] md: impro...
2117
  			return 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2118

58c0fed40   Andre Noll   md: Make mddev->s...
2119
  	max_sector = mddev->dev_sectors;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2120
  	if (sector_nr >= max_sector) {
191ea9b2c   NeilBrown   [PATCH] md: raid1...
2121
2122
2123
2124
2125
  		/* If we aborted, we need to abort the
  		 * sync on the 'current' bitmap chunk (there will
  		 * only be one in raid1 resync.
  		 * We can find the current addess in mddev->curr_resync
  		 */
6a806c510   NeilBrown   [PATCH] md/raid1:...
2126
2127
  		if (mddev->curr_resync < max_sector) /* aborted */
  			bitmap_end_sync(mddev->bitmap, mddev->curr_resync,
191ea9b2c   NeilBrown   [PATCH] md: raid1...
2128
  						&sync_blocks, 1);
6a806c510   NeilBrown   [PATCH] md/raid1:...
2129
  		else /* completed sync */
191ea9b2c   NeilBrown   [PATCH] md: raid1...
2130
  			conf->fullsync = 0;
6a806c510   NeilBrown   [PATCH] md/raid1:...
2131
2132
  
  		bitmap_close_sync(mddev->bitmap);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2133
2134
2135
  		close_sync(conf);
  		return 0;
  	}
07d84d109   NeilBrown   [PATCH] md: Allow...
2136
2137
  	if (mddev->bitmap == NULL &&
  	    mddev->recovery_cp == MaxSector &&
6394cca54   NeilBrown   [PATCH] md: fix r...
2138
  	    !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery) &&
07d84d109   NeilBrown   [PATCH] md: Allow...
2139
2140
2141
2142
  	    conf->fullsync == 0) {
  		*skipped = 1;
  		return max_sector - sector_nr;
  	}
6394cca54   NeilBrown   [PATCH] md: fix r...
2143
2144
2145
  	/* before building a request, check if we can skip these blocks..
  	 * This call the bitmap_start_sync doesn't actually record anything
  	 */
e3b9703e2   NeilBrown   [PATCH] md: yet a...
2146
  	if (!bitmap_start_sync(mddev->bitmap, sector_nr, &sync_blocks, 1) &&
e5de485f0   NeilBrown   [PATCH] md: make ...
2147
  	    !conf->fullsync && !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) {
191ea9b2c   NeilBrown   [PATCH] md: raid1...
2148
2149
2150
2151
  		/* We can skip this block, and probably several more */
  		*skipped = 1;
  		return sync_blocks;
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2152
  	/*
17999be4a   NeilBrown   [PATCH] md: impro...
2153
2154
2155
  	 * If there is non-resync activity waiting for a turn,
  	 * and resync is going fast enough,
  	 * then let it though before starting on this new sync request.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2156
  	 */
17999be4a   NeilBrown   [PATCH] md: impro...
2157
  	if (!go_faster && conf->nr_waiting)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2158
  		msleep_interruptible(1000);
17999be4a   NeilBrown   [PATCH] md: impro...
2159

b47490c9b   NeilBrown   md: Update md bit...
2160
  	bitmap_cond_end_sync(mddev->bitmap, sector_nr);
1c4588e9c   NeilBrown   md/raid1: perform...
2161
  	r1_bio = mempool_alloc(conf->r1buf_pool, GFP_NOIO);
17999be4a   NeilBrown   [PATCH] md: impro...
2162
2163
2164
  	raise_barrier(conf);
  
  	conf->next_resync = sector_nr;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2165

3e198f782   NeilBrown   [PATCH] md: tidyu...
2166
  	rcu_read_lock();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2167
  	/*
3e198f782   NeilBrown   [PATCH] md: tidyu...
2168
2169
2170
2171
2172
2173
  	 * If we get a correctably read error during resync or recovery,
  	 * we might want to read from a different device.  So we
  	 * flag all drives that could conceivably be read from for READ,
  	 * and any others (which will be non-In_sync devices) for WRITE.
  	 * If a read fails, we try reading from something else for which READ
  	 * is OK.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2174
  	 */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2175

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2176
2177
  	r1_bio->mddev = mddev;
  	r1_bio->sector = sector_nr;
191ea9b2c   NeilBrown   [PATCH] md: raid1...
2178
  	r1_bio->state = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2179
  	set_bit(R1BIO_IsSync, &r1_bio->state);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2180

8f19ccb2f   NeilBrown   md/raid1: Allocat...
2181
  	for (i = 0; i < conf->raid_disks * 2; i++) {
3cb030020   NeilBrown   md: removing type...
2182
  		struct md_rdev *rdev;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2183
2184
2185
2186
  		bio = r1_bio->bios[i];
  
  		/* take from bio_init */
  		bio->bi_next = NULL;
db8d9d359   NeilBrown   md/raid1: minor b...
2187
  		bio->bi_flags &= ~(BIO_POOL_MASK-1);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2188
  		bio->bi_flags |= 1 << BIO_UPTODATE;
802ba064c   NeilBrown   [PATCH] md: Don't...
2189
  		bio->bi_rw = READ;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2190
2191
2192
  		bio->bi_vcnt = 0;
  		bio->bi_idx = 0;
  		bio->bi_phys_segments = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2193
2194
2195
  		bio->bi_size = 0;
  		bio->bi_end_io = NULL;
  		bio->bi_private = NULL;
3e198f782   NeilBrown   [PATCH] md: tidyu...
2196
2197
  		rdev = rcu_dereference(conf->mirrors[i].rdev);
  		if (rdev == NULL ||
06f603851   NeilBrown   md/raid1: avoid r...
2198
  		    test_bit(Faulty, &rdev->flags)) {
8f19ccb2f   NeilBrown   md/raid1: Allocat...
2199
2200
  			if (i < conf->raid_disks)
  				still_degraded = 1;
3e198f782   NeilBrown   [PATCH] md: tidyu...
2201
  		} else if (!test_bit(In_sync, &rdev->flags)) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2202
2203
2204
  			bio->bi_rw = WRITE;
  			bio->bi_end_io = end_sync_write;
  			write_targets ++;
3e198f782   NeilBrown   [PATCH] md: tidyu...
2205
2206
  		} else {
  			/* may need to read from here */
06f603851   NeilBrown   md/raid1: avoid r...
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
  			sector_t first_bad = MaxSector;
  			int bad_sectors;
  
  			if (is_badblock(rdev, sector_nr, good_sectors,
  					&first_bad, &bad_sectors)) {
  				if (first_bad > sector_nr)
  					good_sectors = first_bad - sector_nr;
  				else {
  					bad_sectors -= (sector_nr - first_bad);
  					if (min_bad == 0 ||
  					    min_bad > bad_sectors)
  						min_bad = bad_sectors;
  				}
  			}
  			if (sector_nr < first_bad) {
  				if (test_bit(WriteMostly, &rdev->flags)) {
  					if (wonly < 0)
  						wonly = i;
  				} else {
  					if (disk < 0)
  						disk = i;
  				}
  				bio->bi_rw = READ;
  				bio->bi_end_io = end_sync_read;
  				read_targets++;
3e198f782   NeilBrown   [PATCH] md: tidyu...
2232
  			}
3e198f782   NeilBrown   [PATCH] md: tidyu...
2233
  		}
06f603851   NeilBrown   md/raid1: avoid r...
2234
2235
2236
2237
2238
2239
  		if (bio->bi_end_io) {
  			atomic_inc(&rdev->nr_pending);
  			bio->bi_sector = sector_nr + rdev->data_offset;
  			bio->bi_bdev = rdev->bdev;
  			bio->bi_private = r1_bio;
  		}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2240
  	}
3e198f782   NeilBrown   [PATCH] md: tidyu...
2241
2242
2243
2244
  	rcu_read_unlock();
  	if (disk < 0)
  		disk = wonly;
  	r1_bio->read_disk = disk;
191ea9b2c   NeilBrown   [PATCH] md: raid1...
2245

06f603851   NeilBrown   md/raid1: avoid r...
2246
2247
2248
2249
2250
  	if (read_targets == 0 && min_bad > 0) {
  		/* These sectors are bad on all InSync devices, so we
  		 * need to mark them bad on all write targets
  		 */
  		int ok = 1;
8f19ccb2f   NeilBrown   md/raid1: Allocat...
2251
  		for (i = 0 ; i < conf->raid_disks * 2 ; i++)
06f603851   NeilBrown   md/raid1: avoid r...
2252
  			if (r1_bio->bios[i]->bi_end_io == end_sync_write) {
3cb030020   NeilBrown   md: removing type...
2253
  				struct md_rdev *rdev =
06f603851   NeilBrown   md/raid1: avoid r...
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
  					rcu_dereference(conf->mirrors[i].rdev);
  				ok = rdev_set_badblocks(rdev, sector_nr,
  							min_bad, 0
  					) && ok;
  			}
  		set_bit(MD_CHANGE_DEVS, &mddev->flags);
  		*skipped = 1;
  		put_buf(r1_bio);
  
  		if (!ok) {
  			/* Cannot record the badblocks, so need to
  			 * abort the resync.
  			 * If there are multiple read targets, could just
  			 * fail the really bad ones ???
  			 */
  			conf->recovery_disabled = mddev->recovery_disabled;
  			set_bit(MD_RECOVERY_INTR, &mddev->recovery);
  			return 0;
  		} else
  			return min_bad;
  
  	}
  	if (min_bad > 0 && min_bad < good_sectors) {
  		/* only resync enough to reach the next bad->good
  		 * transition */
  		good_sectors = min_bad;
  	}
3e198f782   NeilBrown   [PATCH] md: tidyu...
2281
2282
2283
2284
2285
  	if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) && read_targets > 0)
  		/* extra read targets are also write targets */
  		write_targets += read_targets-1;
  
  	if (write_targets == 0 || read_targets == 0) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2286
2287
2288
  		/* There is nowhere to write, so all non-sync
  		 * drives must be failed - so we are finished
  		 */
57afd89f9   NeilBrown   [PATCH] md: impro...
2289
2290
  		sector_t rv = max_sector - sector_nr;
  		*skipped = 1;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2291
  		put_buf(r1_bio);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2292
2293
  		return rv;
  	}
c62072777   NeilBrown   md: allow a maxim...
2294
2295
  	if (max_sector > mddev->resync_max)
  		max_sector = mddev->resync_max; /* Don't do IO beyond here */
06f603851   NeilBrown   md/raid1: avoid r...
2296
2297
  	if (max_sector > sector_nr + good_sectors)
  		max_sector = sector_nr + good_sectors;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2298
  	nr_sectors = 0;
289e99e8e   NeilBrown   [PATCH] md: initi...
2299
  	sync_blocks = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2300
2301
2302
2303
2304
2305
2306
  	do {
  		struct page *page;
  		int len = PAGE_SIZE;
  		if (sector_nr + (len>>9) > max_sector)
  			len = (max_sector - sector_nr) << 9;
  		if (len == 0)
  			break;
6a806c510   NeilBrown   [PATCH] md/raid1:...
2307
2308
  		if (sync_blocks == 0) {
  			if (!bitmap_start_sync(mddev->bitmap, sector_nr,
e5de485f0   NeilBrown   [PATCH] md: make ...
2309
2310
2311
  					       &sync_blocks, still_degraded) &&
  			    !conf->fullsync &&
  			    !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
6a806c510   NeilBrown   [PATCH] md/raid1:...
2312
  				break;
9e77c485f   Eric Sesterhenn   BUG_ON() Conversi...
2313
  			BUG_ON(sync_blocks < (PAGE_SIZE>>9));
7571ae887   NeilBrown   md/raid1: avoid ...
2314
  			if ((len >> 9) > sync_blocks)
6a806c510   NeilBrown   [PATCH] md/raid1:...
2315
  				len = sync_blocks<<9;
ab7a30c70   NeilBrown   [PATCH] md: fix b...
2316
  		}
191ea9b2c   NeilBrown   [PATCH] md: raid1...
2317

8f19ccb2f   NeilBrown   md/raid1: Allocat...
2318
  		for (i = 0 ; i < conf->raid_disks * 2; i++) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2319
2320
  			bio = r1_bio->bios[i];
  			if (bio->bi_end_io) {
d11c171e6   NeilBrown   [PATCH] md: allow...
2321
  				page = bio->bi_io_vec[bio->bi_vcnt].bv_page;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2322
2323
  				if (bio_add_page(bio, page, len, 0) == 0) {
  					/* stop here */
d11c171e6   NeilBrown   [PATCH] md: allow...
2324
  					bio->bi_io_vec[bio->bi_vcnt].bv_page = page;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2325
2326
2327
  					while (i > 0) {
  						i--;
  						bio = r1_bio->bios[i];
6a806c510   NeilBrown   [PATCH] md/raid1:...
2328
2329
  						if (bio->bi_end_io==NULL)
  							continue;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
  						/* remove last page from this bio */
  						bio->bi_vcnt--;
  						bio->bi_size -= len;
  						bio->bi_flags &= ~(1<< BIO_SEG_VALID);
  					}
  					goto bio_full;
  				}
  			}
  		}
  		nr_sectors += len>>9;
  		sector_nr += len>>9;
191ea9b2c   NeilBrown   [PATCH] md: raid1...
2341
  		sync_blocks -= (len>>9);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2342
2343
  	} while (r1_bio->bios[disk]->bi_vcnt < RESYNC_PAGES);
   bio_full:
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2344
  	r1_bio->sectors = nr_sectors;
d11c171e6   NeilBrown   [PATCH] md: allow...
2345
2346
2347
2348
2349
  	/* For a user-requested sync, we read all readable devices and do a
  	 * compare
  	 */
  	if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) {
  		atomic_set(&r1_bio->remaining, read_targets);
8f19ccb2f   NeilBrown   md/raid1: Allocat...
2350
  		for (i = 0; i < conf->raid_disks * 2; i++) {
d11c171e6   NeilBrown   [PATCH] md: allow...
2351
2352
  			bio = r1_bio->bios[i];
  			if (bio->bi_end_io == end_sync_read) {
ddac7c7e3   NeilBrown   [PATCH] md: Fix i...
2353
  				md_sync_acct(bio->bi_bdev, nr_sectors);
d11c171e6   NeilBrown   [PATCH] md: allow...
2354
2355
2356
2357
2358
2359
  				generic_make_request(bio);
  			}
  		}
  	} else {
  		atomic_set(&r1_bio->remaining, 1);
  		bio = r1_bio->bios[r1_bio->read_disk];
ddac7c7e3   NeilBrown   [PATCH] md: Fix i...
2360
  		md_sync_acct(bio->bi_bdev, nr_sectors);
d11c171e6   NeilBrown   [PATCH] md: allow...
2361
  		generic_make_request(bio);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2362

d11c171e6   NeilBrown   [PATCH] md: allow...
2363
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2364
2365
  	return nr_sectors;
  }
fd01b88c7   NeilBrown   md: remove typede...
2366
  static sector_t raid1_size(struct mddev *mddev, sector_t sectors, int raid_disks)
80c3a6ce4   Dan Williams   md: add 'size' as...
2367
2368
2369
2370
2371
2372
  {
  	if (sectors)
  		return sectors;
  
  	return mddev->dev_sectors;
  }
e80963604   NeilBrown   md/raid1: typedef...
2373
  static struct r1conf *setup_conf(struct mddev *mddev)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2374
  {
e80963604   NeilBrown   md/raid1: typedef...
2375
  	struct r1conf *conf;
709ae4879   NeilBrown   md/raid1: add tak...
2376
  	int i;
0f6d02d58   NeilBrown   md: remove typede...
2377
  	struct mirror_info *disk;
3cb030020   NeilBrown   md: removing type...
2378
  	struct md_rdev *rdev;
709ae4879   NeilBrown   md/raid1: add tak...
2379
  	int err = -ENOMEM;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2380

e80963604   NeilBrown   md/raid1: typedef...
2381
  	conf = kzalloc(sizeof(struct r1conf), GFP_KERNEL);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2382
  	if (!conf)
709ae4879   NeilBrown   md/raid1: add tak...
2383
  		goto abort;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2384

8f19ccb2f   NeilBrown   md/raid1: Allocat...
2385
2386
  	conf->mirrors = kzalloc(sizeof(struct mirror_info)
  				* mddev->raid_disks * 2,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2387
2388
  				 GFP_KERNEL);
  	if (!conf->mirrors)
709ae4879   NeilBrown   md/raid1: add tak...
2389
  		goto abort;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2390

ddaf22aba   NeilBrown   [PATCH] md: attem...
2391
2392
  	conf->tmppage = alloc_page(GFP_KERNEL);
  	if (!conf->tmppage)
709ae4879   NeilBrown   md/raid1: add tak...
2393
  		goto abort;
ddaf22aba   NeilBrown   [PATCH] md: attem...
2394

709ae4879   NeilBrown   md/raid1: add tak...
2395
  	conf->poolinfo = kzalloc(sizeof(*conf->poolinfo), GFP_KERNEL);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2396
  	if (!conf->poolinfo)
709ae4879   NeilBrown   md/raid1: add tak...
2397
  		goto abort;
8f19ccb2f   NeilBrown   md/raid1: Allocat...
2398
  	conf->poolinfo->raid_disks = mddev->raid_disks * 2;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2399
2400
2401
2402
  	conf->r1bio_pool = mempool_create(NR_RAID1_BIOS, r1bio_pool_alloc,
  					  r1bio_pool_free,
  					  conf->poolinfo);
  	if (!conf->r1bio_pool)
709ae4879   NeilBrown   md/raid1: add tak...
2403
  		goto abort;
ed9bfdf1a   NeilBrown   md: raid1/raid10:...
2404
  	conf->poolinfo->mddev = mddev;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2405

c19d57980   NeilBrown   md/raid1: recogni...
2406
  	err = -EINVAL;
e7e72bf64   Neil Brown   Remove blkdev war...
2407
  	spin_lock_init(&conf->device_lock);
159ec1fc0   Cheng Renquan   md: use list_for_...
2408
  	list_for_each_entry(rdev, &mddev->disks, same_set) {
709ae4879   NeilBrown   md/raid1: add tak...
2409
  		int disk_idx = rdev->raid_disk;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2410
2411
2412
  		if (disk_idx >= mddev->raid_disks
  		    || disk_idx < 0)
  			continue;
c19d57980   NeilBrown   md/raid1: recogni...
2413
2414
2415
2416
  		if (test_bit(Replacement, &rdev->flags))
  			disk = conf->mirrors + conf->raid_disks + disk_idx;
  		else
  			disk = conf->mirrors + disk_idx;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2417

c19d57980   NeilBrown   md/raid1: recogni...
2418
2419
  		if (disk->rdev)
  			goto abort;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2420
  		disk->rdev = rdev;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2421
2422
  
  		disk->head_position = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2423
2424
2425
  	}
  	conf->raid_disks = mddev->raid_disks;
  	conf->mddev = mddev;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2426
  	INIT_LIST_HEAD(&conf->retry_list);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2427
2428
  
  	spin_lock_init(&conf->resync_lock);
17999be4a   NeilBrown   [PATCH] md: impro...
2429
  	init_waitqueue_head(&conf->wait_barrier);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2430

191ea9b2c   NeilBrown   [PATCH] md: raid1...
2431
  	bio_list_init(&conf->pending_bio_list);
34db0cd60   NeilBrown   md: add proper wr...
2432
  	conf->pending_count = 0;
d890fa2b0   NeilBrown   md: Fix some bugs...
2433
  	conf->recovery_disabled = mddev->recovery_disabled - 1;
191ea9b2c   NeilBrown   [PATCH] md: raid1...
2434

c19d57980   NeilBrown   md/raid1: recogni...
2435
  	err = -EIO;
709ae4879   NeilBrown   md/raid1: add tak...
2436
  	conf->last_used = -1;
8f19ccb2f   NeilBrown   md/raid1: Allocat...
2437
  	for (i = 0; i < conf->raid_disks * 2; i++) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2438
2439
  
  		disk = conf->mirrors + i;
c19d57980   NeilBrown   md/raid1: recogni...
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
  		if (i < conf->raid_disks &&
  		    disk[conf->raid_disks].rdev) {
  			/* This slot has a replacement. */
  			if (!disk->rdev) {
  				/* No original, just make the replacement
  				 * a recovering spare
  				 */
  				disk->rdev =
  					disk[conf->raid_disks].rdev;
  				disk[conf->raid_disks].rdev = NULL;
  			} else if (!test_bit(In_sync, &disk->rdev->flags))
  				/* Original is not in_sync - bad */
  				goto abort;
  		}
5fd6c1dce   NeilBrown   [PATCH] md: allow...
2454
2455
  		if (!disk->rdev ||
  		    !test_bit(In_sync, &disk->rdev->flags)) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2456
  			disk->head_position = 0;
918f02383   NeilBrown   md: make sure a r...
2457
2458
  			if (disk->rdev)
  				conf->fullsync = 1;
709ae4879   NeilBrown   md/raid1: add tak...
2459
2460
2461
2462
2463
2464
  		} else if (conf->last_used < 0)
  			/*
  			 * The first working device is used as a
  			 * starting point to read balancing.
  			 */
  			conf->last_used = i;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2465
  	}
709ae4879   NeilBrown   md/raid1: add tak...
2466

709ae4879   NeilBrown   md/raid1: add tak...
2467
  	if (conf->last_used < 0) {
9dd1e2faf   NeilBrown   md/raid1: improve...
2468
2469
  		printk(KERN_ERR "md/raid1:%s: no operational mirrors
  ",
709ae4879   NeilBrown   md/raid1: add tak...
2470
2471
2472
2473
2474
2475
2476
  		       mdname(mddev));
  		goto abort;
  	}
  	err = -ENOMEM;
  	conf->thread = md_register_thread(raid1d, mddev, NULL);
  	if (!conf->thread) {
  		printk(KERN_ERR
9dd1e2faf   NeilBrown   md/raid1: improve...
2477
2478
  		       "md/raid1:%s: couldn't allocate thread
  ",
709ae4879   NeilBrown   md/raid1: add tak...
2479
2480
  		       mdname(mddev));
  		goto abort;
11ce99e62   NeilBrown   [PATCH] md: Remov...
2481
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2482

709ae4879   NeilBrown   md/raid1: add tak...
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
  	return conf;
  
   abort:
  	if (conf) {
  		if (conf->r1bio_pool)
  			mempool_destroy(conf->r1bio_pool);
  		kfree(conf->mirrors);
  		safe_put_page(conf->tmppage);
  		kfree(conf->poolinfo);
  		kfree(conf);
  	}
  	return ERR_PTR(err);
  }
fd01b88c7   NeilBrown   md: remove typede...
2496
  static int run(struct mddev *mddev)
709ae4879   NeilBrown   md/raid1: add tak...
2497
  {
e80963604   NeilBrown   md/raid1: typedef...
2498
  	struct r1conf *conf;
709ae4879   NeilBrown   md/raid1: add tak...
2499
  	int i;
3cb030020   NeilBrown   md: removing type...
2500
  	struct md_rdev *rdev;
709ae4879   NeilBrown   md/raid1: add tak...
2501
2502
  
  	if (mddev->level != 1) {
9dd1e2faf   NeilBrown   md/raid1: improve...
2503
2504
  		printk(KERN_ERR "md/raid1:%s: raid level not set to mirroring (%d)
  ",
709ae4879   NeilBrown   md/raid1: add tak...
2505
2506
2507
2508
  		       mdname(mddev), mddev->level);
  		return -EIO;
  	}
  	if (mddev->reshape_position != MaxSector) {
9dd1e2faf   NeilBrown   md/raid1: improve...
2509
2510
  		printk(KERN_ERR "md/raid1:%s: reshape_position set but not supported
  ",
709ae4879   NeilBrown   md/raid1: add tak...
2511
2512
2513
  		       mdname(mddev));
  		return -EIO;
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2514
  	/*
709ae4879   NeilBrown   md/raid1: add tak...
2515
2516
2517
  	 * copy the already verified devices into our private RAID1
  	 * bookkeeping area. [whatever we allocate in run(),
  	 * should be freed in stop()]
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2518
  	 */
709ae4879   NeilBrown   md/raid1: add tak...
2519
2520
2521
2522
  	if (mddev->private == NULL)
  		conf = setup_conf(mddev);
  	else
  		conf = mddev->private;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2523

709ae4879   NeilBrown   md/raid1: add tak...
2524
2525
  	if (IS_ERR(conf))
  		return PTR_ERR(conf);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2526

709ae4879   NeilBrown   md/raid1: add tak...
2527
  	list_for_each_entry(rdev, &mddev->disks, same_set) {
1ed7242e5   Jonathan Brassow   MD: raid1 changes...
2528
2529
  		if (!mddev->gendisk)
  			continue;
709ae4879   NeilBrown   md/raid1: add tak...
2530
2531
2532
  		disk_stack_limits(mddev->gendisk, rdev->bdev,
  				  rdev->data_offset << 9);
  		/* as we don't honour merge_bvec_fn, we must never risk
627a2d3c2   NeilBrown   md: deal with mer...
2533
2534
  		 * violating it, so limit ->max_segments to 1 lying within
  		 * a single page, as a one page request is never in violation.
709ae4879   NeilBrown   md/raid1: add tak...
2535
  		 */
627a2d3c2   NeilBrown   md: deal with mer...
2536
2537
2538
2539
2540
  		if (rdev->bdev->bd_disk->queue->merge_bvec_fn) {
  			blk_queue_max_segments(mddev->queue, 1);
  			blk_queue_segment_boundary(mddev->queue,
  						   PAGE_CACHE_SIZE - 1);
  		}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2541
  	}
191ea9b2c   NeilBrown   [PATCH] md: raid1...
2542

709ae4879   NeilBrown   md/raid1: add tak...
2543
2544
2545
2546
2547
2548
2549
2550
2551
  	mddev->degraded = 0;
  	for (i=0; i < conf->raid_disks; i++)
  		if (conf->mirrors[i].rdev == NULL ||
  		    !test_bit(In_sync, &conf->mirrors[i].rdev->flags) ||
  		    test_bit(Faulty, &conf->mirrors[i].rdev->flags))
  			mddev->degraded++;
  
  	if (conf->raid_disks - mddev->degraded == 1)
  		mddev->recovery_cp = MaxSector;
8c6ac868b   Andre Noll   md: Push down rec...
2552
  	if (mddev->recovery_cp != MaxSector)
9dd1e2faf   NeilBrown   md/raid1: improve...
2553
  		printk(KERN_NOTICE "md/raid1:%s: not clean"
8c6ac868b   Andre Noll   md: Push down rec...
2554
2555
2556
  		       " -- starting background reconstruction
  ",
  		       mdname(mddev));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2557
  	printk(KERN_INFO 
9dd1e2faf   NeilBrown   md/raid1: improve...
2558
2559
  		"md/raid1:%s: active with %d out of %d mirrors
  ",
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2560
2561
  		mdname(mddev), mddev->raid_disks - mddev->degraded, 
  		mddev->raid_disks);
709ae4879   NeilBrown   md/raid1: add tak...
2562

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2563
2564
2565
  	/*
  	 * Ok, everything is just fine now
  	 */
709ae4879   NeilBrown   md/raid1: add tak...
2566
2567
2568
  	mddev->thread = conf->thread;
  	conf->thread = NULL;
  	mddev->private = conf;
1f403624b   Dan Williams   md: centralize ->...
2569
  	md_set_array_sectors(mddev, raid1_size(mddev, 0, 0));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2570

1ed7242e5   Jonathan Brassow   MD: raid1 changes...
2571
2572
2573
2574
  	if (mddev->queue) {
  		mddev->queue->backing_dev_info.congested_fn = raid1_congested;
  		mddev->queue->backing_dev_info.congested_data = mddev;
  	}
a91a2785b   Martin K. Petersen   block: Require su...
2575
  	return md_integrity_register(mddev);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2576
  }
fd01b88c7   NeilBrown   md: remove typede...
2577
  static int stop(struct mddev *mddev)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2578
  {
e80963604   NeilBrown   md/raid1: typedef...
2579
  	struct r1conf *conf = mddev->private;
4b6d287f6   NeilBrown   [PATCH] md: add w...
2580
  	struct bitmap *bitmap = mddev->bitmap;
4b6d287f6   NeilBrown   [PATCH] md: add w...
2581
2582
  
  	/* wait for behind writes to complete */
e555190d8   NeilBrown   md/raid1: delay r...
2583
  	if (bitmap && atomic_read(&bitmap->behind_writes) > 0) {
9dd1e2faf   NeilBrown   md/raid1: improve...
2584
2585
2586
  		printk(KERN_INFO "md/raid1:%s: behind writes in progress - waiting to stop.
  ",
  		       mdname(mddev));
4b6d287f6   NeilBrown   [PATCH] md: add w...
2587
  		/* need to kick something here to make sure I/O goes? */
e555190d8   NeilBrown   md/raid1: delay r...
2588
2589
  		wait_event(bitmap->behind_wait,
  			   atomic_read(&bitmap->behind_writes) == 0);
4b6d287f6   NeilBrown   [PATCH] md: add w...
2590
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2591

409c57f38   NeilBrown   md: enable suspen...
2592
2593
  	raise_barrier(conf);
  	lower_barrier(conf);
01f96c0a9   NeilBrown   md: Avoid waking ...
2594
  	md_unregister_thread(&mddev->thread);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2595
2596
  	if (conf->r1bio_pool)
  		mempool_destroy(conf->r1bio_pool);
990a8baf5   Jesper Juhl   [PATCH] md: remov...
2597
2598
  	kfree(conf->mirrors);
  	kfree(conf->poolinfo);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2599
2600
2601
2602
  	kfree(conf);
  	mddev->private = NULL;
  	return 0;
  }
fd01b88c7   NeilBrown   md: remove typede...
2603
  static int raid1_resize(struct mddev *mddev, sector_t sectors)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2604
2605
2606
2607
2608
2609
2610
2611
  {
  	/* no resync is happening, and there is enough space
  	 * on all devices, so we can resize.
  	 * We need to make sure resync covers any new space.
  	 * If the array is shrinking we should possibly wait until
  	 * any io in the removed space completes, but it hardly seems
  	 * worth it.
  	 */
1f403624b   Dan Williams   md: centralize ->...
2612
  	md_set_array_sectors(mddev, raid1_size(mddev, sectors, 0));
b522adcde   Dan Williams   md: 'array_size' ...
2613
2614
  	if (mddev->array_sectors > raid1_size(mddev, sectors, 0))
  		return -EINVAL;
f233ea5c9   Andre Noll   md: Make mddev->a...
2615
  	set_capacity(mddev->gendisk, mddev->array_sectors);
449aad3e2   NeilBrown   md: Use revalidat...
2616
  	revalidate_disk(mddev->gendisk);
b522adcde   Dan Williams   md: 'array_size' ...
2617
  	if (sectors > mddev->dev_sectors &&
b098636cf   NeilBrown   md: allow resync_...
2618
  	    mddev->recovery_cp > mddev->dev_sectors) {
58c0fed40   Andre Noll   md: Make mddev->s...
2619
  		mddev->recovery_cp = mddev->dev_sectors;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2620
2621
  		set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
  	}
b522adcde   Dan Williams   md: 'array_size' ...
2622
  	mddev->dev_sectors = sectors;
4b5c7ae83   NeilBrown   [PATCH] md: when ...
2623
  	mddev->resync_max_sectors = sectors;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2624
2625
  	return 0;
  }
fd01b88c7   NeilBrown   md: remove typede...
2626
  static int raid1_reshape(struct mddev *mddev)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2627
2628
2629
2630
2631
2632
2633
2634
  {
  	/* We need to:
  	 * 1/ resize the r1bio_pool
  	 * 2/ resize conf->mirrors
  	 *
  	 * We allocate a new r1bio_pool if we can.
  	 * Then raise a device barrier and wait until all IO stops.
  	 * Then resize conf->mirrors and swap in the new r1bio pool.
6ea9c07c6   NeilBrown   [PATCH] md: cause...
2635
2636
2637
  	 *
  	 * At the same time, we "pack" the devices so that all the missing
  	 * devices have the higher raid_disk numbers.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2638
2639
2640
  	 */
  	mempool_t *newpool, *oldpool;
  	struct pool_info *newpoolinfo;
0f6d02d58   NeilBrown   md: remove typede...
2641
  	struct mirror_info *newmirrors;
e80963604   NeilBrown   md/raid1: typedef...
2642
  	struct r1conf *conf = mddev->private;
63c70c4f3   NeilBrown   [PATCH] md: Split...
2643
  	int cnt, raid_disks;
c04be0aa8   NeilBrown   [PATCH] md: Impro...
2644
  	unsigned long flags;
b5470dc5f   Dan Williams   md: resolve exter...
2645
  	int d, d2, err;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2646

63c70c4f3   NeilBrown   [PATCH] md: Split...
2647
  	/* Cannot change chunk_size, layout, or level */
664e7c413   Andre Noll   md: Convert mddev...
2648
  	if (mddev->chunk_sectors != mddev->new_chunk_sectors ||
63c70c4f3   NeilBrown   [PATCH] md: Split...
2649
2650
  	    mddev->layout != mddev->new_layout ||
  	    mddev->level != mddev->new_level) {
664e7c413   Andre Noll   md: Convert mddev...
2651
  		mddev->new_chunk_sectors = mddev->chunk_sectors;
63c70c4f3   NeilBrown   [PATCH] md: Split...
2652
2653
2654
2655
  		mddev->new_layout = mddev->layout;
  		mddev->new_level = mddev->level;
  		return -EINVAL;
  	}
b5470dc5f   Dan Williams   md: resolve exter...
2656
2657
2658
  	err = md_allow_write(mddev);
  	if (err)
  		return err;
2a2275d63   NeilBrown   [PATCH] md: fix p...
2659

63c70c4f3   NeilBrown   [PATCH] md: Split...
2660
  	raid_disks = mddev->raid_disks + mddev->delta_disks;
6ea9c07c6   NeilBrown   [PATCH] md: cause...
2661
2662
2663
2664
2665
2666
  	if (raid_disks < conf->raid_disks) {
  		cnt=0;
  		for (d= 0; d < conf->raid_disks; d++)
  			if (conf->mirrors[d].rdev)
  				cnt++;
  		if (cnt > raid_disks)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2667
  			return -EBUSY;
6ea9c07c6   NeilBrown   [PATCH] md: cause...
2668
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2669
2670
2671
2672
2673
  
  	newpoolinfo = kmalloc(sizeof(*newpoolinfo), GFP_KERNEL);
  	if (!newpoolinfo)
  		return -ENOMEM;
  	newpoolinfo->mddev = mddev;
8f19ccb2f   NeilBrown   md/raid1: Allocat...
2674
  	newpoolinfo->raid_disks = raid_disks * 2;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2675
2676
2677
2678
2679
2680
2681
  
  	newpool = mempool_create(NR_RAID1_BIOS, r1bio_pool_alloc,
  				 r1bio_pool_free, newpoolinfo);
  	if (!newpool) {
  		kfree(newpoolinfo);
  		return -ENOMEM;
  	}
8f19ccb2f   NeilBrown   md/raid1: Allocat...
2682
2683
  	newmirrors = kzalloc(sizeof(struct mirror_info) * raid_disks * 2,
  			     GFP_KERNEL);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2684
2685
2686
2687
2688
  	if (!newmirrors) {
  		kfree(newpoolinfo);
  		mempool_destroy(newpool);
  		return -ENOMEM;
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2689

17999be4a   NeilBrown   [PATCH] md: impro...
2690
  	raise_barrier(conf);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2691
2692
2693
2694
  
  	/* ok, everything is stopped */
  	oldpool = conf->r1bio_pool;
  	conf->r1bio_pool = newpool;
6ea9c07c6   NeilBrown   [PATCH] md: cause...
2695

a88aa7865   NeilBrown   md: correctly upd...
2696
  	for (d = d2 = 0; d < conf->raid_disks; d++) {
3cb030020   NeilBrown   md: removing type...
2697
  		struct md_rdev *rdev = conf->mirrors[d].rdev;
a88aa7865   NeilBrown   md: correctly upd...
2698
  		if (rdev && rdev->raid_disk != d2) {
36fad858a   Namhyung Kim   md: introduce lin...
2699
  			sysfs_unlink_rdev(mddev, rdev);
a88aa7865   NeilBrown   md: correctly upd...
2700
  			rdev->raid_disk = d2;
36fad858a   Namhyung Kim   md: introduce lin...
2701
2702
  			sysfs_unlink_rdev(mddev, rdev);
  			if (sysfs_link_rdev(mddev, rdev))
a88aa7865   NeilBrown   md: correctly upd...
2703
  				printk(KERN_WARNING
36fad858a   Namhyung Kim   md: introduce lin...
2704
2705
2706
  				       "md/raid1:%s: cannot register rd%d
  ",
  				       mdname(mddev), rdev->raid_disk);
6ea9c07c6   NeilBrown   [PATCH] md: cause...
2707
  		}
a88aa7865   NeilBrown   md: correctly upd...
2708
2709
2710
  		if (rdev)
  			newmirrors[d2++].rdev = rdev;
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2711
2712
2713
2714
  	kfree(conf->mirrors);
  	conf->mirrors = newmirrors;
  	kfree(conf->poolinfo);
  	conf->poolinfo = newpoolinfo;
c04be0aa8   NeilBrown   [PATCH] md: Impro...
2715
  	spin_lock_irqsave(&conf->device_lock, flags);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2716
  	mddev->degraded += (raid_disks - conf->raid_disks);
c04be0aa8   NeilBrown   [PATCH] md: Impro...
2717
  	spin_unlock_irqrestore(&conf->device_lock, flags);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2718
  	conf->raid_disks = mddev->raid_disks = raid_disks;
63c70c4f3   NeilBrown   [PATCH] md: Split...
2719
  	mddev->delta_disks = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2720

6ea9c07c6   NeilBrown   [PATCH] md: cause...
2721
  	conf->last_used = 0; /* just make sure it is in-range */
17999be4a   NeilBrown   [PATCH] md: impro...
2722
  	lower_barrier(conf);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2723
2724
2725
2726
2727
2728
2729
  
  	set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
  	md_wakeup_thread(mddev->thread);
  
  	mempool_destroy(oldpool);
  	return 0;
  }
fd01b88c7   NeilBrown   md: remove typede...
2730
  static void raid1_quiesce(struct mddev *mddev, int state)
36fa30636   NeilBrown   [PATCH] md: all h...
2731
  {
e80963604   NeilBrown   md/raid1: typedef...
2732
  	struct r1conf *conf = mddev->private;
36fa30636   NeilBrown   [PATCH] md: all h...
2733
2734
  
  	switch(state) {
6eef4b21f   NeilBrown   md: add honouring...
2735
2736
2737
  	case 2: /* wake for suspend */
  		wake_up(&conf->wait_barrier);
  		break;
9e6603da9   NeilBrown   [PATCH] md: raid1...
2738
  	case 1:
17999be4a   NeilBrown   [PATCH] md: impro...
2739
  		raise_barrier(conf);
36fa30636   NeilBrown   [PATCH] md: all h...
2740
  		break;
9e6603da9   NeilBrown   [PATCH] md: raid1...
2741
  	case 0:
17999be4a   NeilBrown   [PATCH] md: impro...
2742
  		lower_barrier(conf);
36fa30636   NeilBrown   [PATCH] md: all h...
2743
2744
  		break;
  	}
36fa30636   NeilBrown   [PATCH] md: all h...
2745
  }
fd01b88c7   NeilBrown   md: remove typede...
2746
  static void *raid1_takeover(struct mddev *mddev)
709ae4879   NeilBrown   md/raid1: add tak...
2747
2748
2749
2750
2751
  {
  	/* raid1 can take over:
  	 *  raid5 with 2 devices, any layout or chunk size
  	 */
  	if (mddev->level == 5 && mddev->raid_disks == 2) {
e80963604   NeilBrown   md/raid1: typedef...
2752
  		struct r1conf *conf;
709ae4879   NeilBrown   md/raid1: add tak...
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
  		mddev->new_level = 1;
  		mddev->new_layout = 0;
  		mddev->new_chunk_sectors = 0;
  		conf = setup_conf(mddev);
  		if (!IS_ERR(conf))
  			conf->barrier = 1;
  		return conf;
  	}
  	return ERR_PTR(-EINVAL);
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2763

84fc4b56d   NeilBrown   md: rename "mdk_p...
2764
  static struct md_personality raid1_personality =
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2765
2766
  {
  	.name		= "raid1",
2604b703b   NeilBrown   [PATCH] md: remov...
2767
  	.level		= 1,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
  	.owner		= THIS_MODULE,
  	.make_request	= make_request,
  	.run		= run,
  	.stop		= stop,
  	.status		= status,
  	.error_handler	= error,
  	.hot_add_disk	= raid1_add_disk,
  	.hot_remove_disk= raid1_remove_disk,
  	.spare_active	= raid1_spare_active,
  	.sync_request	= sync_request,
  	.resize		= raid1_resize,
80c3a6ce4   Dan Williams   md: add 'size' as...
2779
  	.size		= raid1_size,
63c70c4f3   NeilBrown   [PATCH] md: Split...
2780
  	.check_reshape	= raid1_reshape,
36fa30636   NeilBrown   [PATCH] md: all h...
2781
  	.quiesce	= raid1_quiesce,
709ae4879   NeilBrown   md/raid1: add tak...
2782
  	.takeover	= raid1_takeover,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2783
2784
2785
2786
  };
  
  static int __init raid_init(void)
  {
2604b703b   NeilBrown   [PATCH] md: remov...
2787
  	return register_md_personality(&raid1_personality);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2788
2789
2790
2791
  }
  
  static void raid_exit(void)
  {
2604b703b   NeilBrown   [PATCH] md: remov...
2792
  	unregister_md_personality(&raid1_personality);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2793
2794
2795
2796
2797
  }
  
  module_init(raid_init);
  module_exit(raid_exit);
  MODULE_LICENSE("GPL");
0efb9e619   NeilBrown   md: add MODULE_DE...
2798
  MODULE_DESCRIPTION("RAID1 (mirroring) personality for MD");
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2799
  MODULE_ALIAS("md-personality-3"); /* RAID1 */
d9d166c2a   NeilBrown   [PATCH] md: allow...
2800
  MODULE_ALIAS("md-raid1");
2604b703b   NeilBrown   [PATCH] md: remov...
2801
  MODULE_ALIAS("md-level-1");
34db0cd60   NeilBrown   md: add proper wr...
2802
2803
  
  module_param(max_queued_requests, int, S_IRUGO|S_IWUSR);