Blame view

drivers/md/raid5.c 159 KB
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1
2
3
4
  /*
   * raid5.c : Multiple Devices driver for Linux
   *	   Copyright (C) 1996, 1997 Ingo Molnar, Miguel de Icaza, Gadi Oxman
   *	   Copyright (C) 1999, 2000 Ingo Molnar
16a53ecc3   NeilBrown   [PATCH] md: merge...
5
   *	   Copyright (C) 2002, 2003 H. Peter Anvin
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
6
   *
16a53ecc3   NeilBrown   [PATCH] md: merge...
7
8
9
   * RAID-4/5/6 management functions.
   * Thanks to Penguin Computing for making the RAID-6 development possible
   * by donating a test server!
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
10
11
12
13
14
15
16
17
18
19
   *
   * This program is free software; you can redistribute it and/or modify
   * it under the terms of the GNU General Public License as published by
   * the Free Software Foundation; either version 2, or (at your option)
   * any later version.
   *
   * You should have received a copy of the GNU General Public License
   * (for example /usr/src/linux/COPYING); if not, write to the Free
   * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
   */
ae3c20ccf   NeilBrown   [PATCH] md: fix s...
20
21
22
23
24
25
26
27
28
  /*
   * BITMAP UNPLUGGING:
   *
   * The sequencing for updating the bitmap reliably is a little
   * subtle (and I got it wrong the first time) so it deserves some
   * explanation.
   *
   * We group bitmap updates into batches.  Each batch has a number.
   * We may write out several batches at once, but that isn't very important.
7c13edc87   NeilBrown   md: incorporate n...
29
30
   * conf->seq_write is the number of the last batch successfully written.
   * conf->seq_flush is the number of the last batch that was closed to
ae3c20ccf   NeilBrown   [PATCH] md: fix s...
31
32
33
   *    new additions.
   * When we discover that we will need to write to any block in a stripe
   * (in add_stripe_bio) we update the in-memory bitmap and record in sh->bm_seq
7c13edc87   NeilBrown   md: incorporate n...
34
   * the number of the batch it will be in. This is seq_flush+1.
ae3c20ccf   NeilBrown   [PATCH] md: fix s...
35
36
37
38
39
40
41
42
43
   * When we are ready to do a write, if that batch hasn't been written yet,
   *   we plug the array and queue the stripe for later.
   * When an unplug happens, we increment bm_flush, thus closing the current
   *   batch.
   * When we notice that bm_flush > bm_write, we write out all pending updates
   * to the bitmap, and advance bm_write to where bm_flush was.
   * This may occasionally write a bit out twice, but is sure never to
   * miss any bits.
   */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
44

bff61975b   NeilBrown   md: move lots of ...
45
  #include <linux/blkdev.h>
f67055780   NeilBrown   [PATCH] md: Check...
46
  #include <linux/kthread.h>
f701d589a   Dan Williams   md/raid6: move ra...
47
  #include <linux/raid/pq.h>
91c009248   Dan Williams   md: raid5_run_ops...
48
  #include <linux/async_tx.h>
056075c76   Paul Gortmaker   md: Add module.h ...
49
  #include <linux/module.h>
07a3b417d   Dan Williams   md/raid456: distr...
50
  #include <linux/async.h>
bff61975b   NeilBrown   md: move lots of ...
51
  #include <linux/seq_file.h>
36d1c6476   Dan Williams   md/raid6: move th...
52
  #include <linux/cpu.h>
5a0e3ad6a   Tejun Heo   include cleanup: ...
53
  #include <linux/slab.h>
8bda470e8   Christian Dietrich   md/raid: use prin...
54
  #include <linux/ratelimit.h>
43b2e5d86   NeilBrown   md: move md_k.h f...
55
  #include "md.h"
bff61975b   NeilBrown   md: move lots of ...
56
  #include "raid5.h"
54071b380   Trela Maciej   md:Add support fo...
57
  #include "raid0.h"
ef740c372   Christoph Hellwig   md: move headers ...
58
  #include "bitmap.h"
72626685d   NeilBrown   [PATCH] md: add w...
59

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
60
61
62
63
64
65
66
67
68
  /*
   * Stripe cache
   */
  
  #define NR_STRIPES		256
  #define STRIPE_SIZE		PAGE_SIZE
  #define STRIPE_SHIFT		(PAGE_SHIFT - 9)
  #define STRIPE_SECTORS		(STRIPE_SIZE>>9)
  #define	IO_THRESHOLD		1
8b3e6cdc5   Dan Williams   md: introduce get...
69
  #define BYPASS_THRESHOLD	1
fccddba06   NeilBrown   [PATCH] md: tidy ...
70
  #define NR_HASH			(PAGE_SIZE / sizeof(struct hlist_head))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
71
  #define HASH_MASK		(NR_HASH - 1)
d1688a6d5   NeilBrown   md/raid5: typedef...
72
  static inline struct hlist_head *stripe_hash(struct r5conf *conf, sector_t sect)
db298e194   NeilBrown   md/raid5: convert...
73
74
75
76
  {
  	int hash = (sect >> STRIPE_SHIFT) & HASH_MASK;
  	return &conf->stripe_hashtbl[hash];
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
77
78
79
80
81
82
83
  
  /* bio's attached to a stripe+device for I/O are linked together in bi_sector
   * order without overlap.  There may be several bio's per stripe+device, and
   * a bio could span several devices.
   * When walking this list for a particular stripe+device, we must never proceed
   * beyond a bio that extends past this device, as the next bio might no longer
   * be valid.
db298e194   NeilBrown   md/raid5: convert...
84
   * This function is used to determine the 'next' bio in the list, given the sector
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
85
86
   * of the current stripe+device
   */
db298e194   NeilBrown   md/raid5: convert...
87
88
89
90
91
92
93
94
  static inline struct bio *r5_next_bio(struct bio *bio, sector_t sector)
  {
  	int sectors = bio->bi_size >> 9;
  	if (bio->bi_sector + sectors < sector + STRIPE_SECTORS)
  		return bio->bi_next;
  	else
  		return NULL;
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
95

960e739d9   Jens Axboe   block: raid fixup...
96
  /*
5b99c2ffa   Jens Axboe   block: make bi_ph...
97
98
   * We maintain a biased count of active stripes in the bottom 16 bits of
   * bi_phys_segments, and a count of processed stripes in the upper 16 bits
960e739d9   Jens Axboe   block: raid fixup...
99
100
101
   */
  static inline int raid5_bi_phys_segments(struct bio *bio)
  {
5b99c2ffa   Jens Axboe   block: make bi_ph...
102
  	return bio->bi_phys_segments & 0xffff;
960e739d9   Jens Axboe   block: raid fixup...
103
104
105
106
  }
  
  static inline int raid5_bi_hw_segments(struct bio *bio)
  {
5b99c2ffa   Jens Axboe   block: make bi_ph...
107
  	return (bio->bi_phys_segments >> 16) & 0xffff;
960e739d9   Jens Axboe   block: raid fixup...
108
109
110
111
112
113
114
115
116
117
118
119
120
  }
  
  static inline int raid5_dec_bi_phys_segments(struct bio *bio)
  {
  	--bio->bi_phys_segments;
  	return raid5_bi_phys_segments(bio);
  }
  
  static inline int raid5_dec_bi_hw_segments(struct bio *bio)
  {
  	unsigned short val = raid5_bi_hw_segments(bio);
  
  	--val;
5b99c2ffa   Jens Axboe   block: make bi_ph...
121
  	bio->bi_phys_segments = (val << 16) | raid5_bi_phys_segments(bio);
960e739d9   Jens Axboe   block: raid fixup...
122
123
124
125
126
  	return val;
  }
  
  static inline void raid5_set_bi_hw_segments(struct bio *bio, unsigned int cnt)
  {
9b2dc8b66   Namhyung Kim   md/raid5: fix rai...
127
  	bio->bi_phys_segments = raid5_bi_phys_segments(bio) | (cnt << 16);
960e739d9   Jens Axboe   block: raid fixup...
128
  }
d0dabf7e5   NeilBrown   md/raid6: remove ...
129
130
131
  /* Find first data disk in a raid6 stripe */
  static inline int raid6_d0(struct stripe_head *sh)
  {
67cc2b816   NeilBrown   md/raid5: finish ...
132
133
134
135
  	if (sh->ddf_layout)
  		/* ddf always start from first device */
  		return 0;
  	/* md starts just after Q block */
d0dabf7e5   NeilBrown   md/raid6: remove ...
136
137
138
139
140
  	if (sh->qd_idx == sh->disks - 1)
  		return 0;
  	else
  		return sh->qd_idx + 1;
  }
16a53ecc3   NeilBrown   [PATCH] md: merge...
141
142
143
144
145
  static inline int raid6_next_disk(int disk, int raid_disks)
  {
  	disk++;
  	return (disk < raid_disks) ? disk : 0;
  }
a44568564   Dan Williams   raid5: refactor h...
146

d0dabf7e5   NeilBrown   md/raid6: remove ...
147
148
149
150
151
  /* When walking through the disks in a raid5, starting at raid6_d0,
   * We need to map each disk to a 'slot', where the data disks are slot
   * 0 .. raid_disks-3, the parity disk is raid_disks-2 and the Q disk
   * is raid_disks-1.  This help does that mapping.
   */
67cc2b816   NeilBrown   md/raid5: finish ...
152
153
  static int raid6_idx_to_slot(int idx, struct stripe_head *sh,
  			     int *count, int syndrome_disks)
d0dabf7e5   NeilBrown   md/raid6: remove ...
154
  {
6629542e7   Dan Williams   md/raid6: kill a ...
155
  	int slot = *count;
67cc2b816   NeilBrown   md/raid5: finish ...
156

e4424fee1   NeilBrown   md: fix problems ...
157
  	if (sh->ddf_layout)
6629542e7   Dan Williams   md/raid6: kill a ...
158
  		(*count)++;
d0dabf7e5   NeilBrown   md/raid6: remove ...
159
  	if (idx == sh->pd_idx)
67cc2b816   NeilBrown   md/raid5: finish ...
160
  		return syndrome_disks;
d0dabf7e5   NeilBrown   md/raid6: remove ...
161
  	if (idx == sh->qd_idx)
67cc2b816   NeilBrown   md/raid5: finish ...
162
  		return syndrome_disks + 1;
e4424fee1   NeilBrown   md: fix problems ...
163
  	if (!sh->ddf_layout)
6629542e7   Dan Williams   md/raid6: kill a ...
164
  		(*count)++;
d0dabf7e5   NeilBrown   md/raid6: remove ...
165
166
  	return slot;
  }
a44568564   Dan Williams   raid5: refactor h...
167
168
169
170
  static void return_io(struct bio *return_bi)
  {
  	struct bio *bi = return_bi;
  	while (bi) {
a44568564   Dan Williams   raid5: refactor h...
171
172
173
174
  
  		return_bi = bi->bi_next;
  		bi->bi_next = NULL;
  		bi->bi_size = 0;
0e13fe23a   Neil Brown   use bio_endio ins...
175
  		bio_endio(bi, 0);
a44568564   Dan Williams   raid5: refactor h...
176
177
178
  		bi = return_bi;
  	}
  }
d1688a6d5   NeilBrown   md/raid5: typedef...
179
  static void print_raid5_conf (struct r5conf *conf);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
180

600aa1099   Dan Williams   md: replace STRIP...
181
182
183
184
185
186
  static int stripe_operations_active(struct stripe_head *sh)
  {
  	return sh->check_state || sh->reconstruct_state ||
  	       test_bit(STRIPE_BIOFILL_RUN, &sh->state) ||
  	       test_bit(STRIPE_COMPUTE_RUN, &sh->state);
  }
d1688a6d5   NeilBrown   md/raid5: typedef...
187
  static void __release_stripe(struct r5conf *conf, struct stripe_head *sh)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
188
189
  {
  	if (atomic_dec_and_test(&sh->count)) {
78bafebd4   Eric Sesterhenn   BUG_ON() Conversi...
190
191
  		BUG_ON(!list_empty(&sh->lru));
  		BUG_ON(atomic_read(&conf->active_stripes)==0);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
192
  		if (test_bit(STRIPE_HANDLE, &sh->state)) {
482c08349   NeilBrown   md - remove old p...
193
  			if (test_bit(STRIPE_DELAYED, &sh->state))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
194
  				list_add_tail(&sh->lru, &conf->delayed_list);
482c08349   NeilBrown   md - remove old p...
195
196
  			else if (test_bit(STRIPE_BIT_DELAY, &sh->state) &&
  				   sh->bm_seq - conf->seq_write > 0)
72626685d   NeilBrown   [PATCH] md: add w...
197
  				list_add_tail(&sh->lru, &conf->bitmap_list);
482c08349   NeilBrown   md - remove old p...
198
  			else {
72626685d   NeilBrown   [PATCH] md: add w...
199
  				clear_bit(STRIPE_BIT_DELAY, &sh->state);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
200
  				list_add_tail(&sh->lru, &conf->handle_list);
72626685d   NeilBrown   [PATCH] md: add w...
201
  			}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
202
203
  			md_wakeup_thread(conf->mddev->thread);
  		} else {
600aa1099   Dan Williams   md: replace STRIP...
204
  			BUG_ON(stripe_operations_active(sh));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
205
206
207
208
209
  			if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) {
  				atomic_dec(&conf->preread_active_stripes);
  				if (atomic_read(&conf->preread_active_stripes) < IO_THRESHOLD)
  					md_wakeup_thread(conf->mddev->thread);
  			}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
210
  			atomic_dec(&conf->active_stripes);
ccfcc3c10   NeilBrown   [PATCH] md: Core ...
211
212
  			if (!test_bit(STRIPE_EXPANDING, &sh->state)) {
  				list_add_tail(&sh->lru, &conf->inactive_list);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
213
  				wake_up(&conf->wait_for_stripe);
46031f9a3   Raz Ben-Jehuda(caro)   [PATCH] md: allow...
214
215
  				if (conf->retry_read_aligned)
  					md_wakeup_thread(conf->mddev->thread);
ccfcc3c10   NeilBrown   [PATCH] md: Core ...
216
  			}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
217
218
219
  		}
  	}
  }
d0dabf7e5   NeilBrown   md/raid6: remove ...
220

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
221
222
  static void release_stripe(struct stripe_head *sh)
  {
d1688a6d5   NeilBrown   md/raid5: typedef...
223
  	struct r5conf *conf = sh->raid_conf;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
224
  	unsigned long flags;
16a53ecc3   NeilBrown   [PATCH] md: merge...
225

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
226
227
228
229
  	spin_lock_irqsave(&conf->device_lock, flags);
  	__release_stripe(conf, sh);
  	spin_unlock_irqrestore(&conf->device_lock, flags);
  }
fccddba06   NeilBrown   [PATCH] md: tidy ...
230
  static inline void remove_hash(struct stripe_head *sh)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
231
  {
45b4233ca   Dan Williams   raid5: replace cu...
232
233
234
  	pr_debug("remove_hash(), stripe %llu
  ",
  		(unsigned long long)sh->sector);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
235

fccddba06   NeilBrown   [PATCH] md: tidy ...
236
  	hlist_del_init(&sh->hash);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
237
  }
d1688a6d5   NeilBrown   md/raid5: typedef...
238
  static inline void insert_hash(struct r5conf *conf, struct stripe_head *sh)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
239
  {
fccddba06   NeilBrown   [PATCH] md: tidy ...
240
  	struct hlist_head *hp = stripe_hash(conf, sh->sector);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
241

45b4233ca   Dan Williams   raid5: replace cu...
242
243
244
  	pr_debug("insert_hash(), stripe %llu
  ",
  		(unsigned long long)sh->sector);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
245

fccddba06   NeilBrown   [PATCH] md: tidy ...
246
  	hlist_add_head(&sh->hash, hp);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
247
248
249
250
  }
  
  
  /* find an idle stripe, make sure it is unhashed, and return it. */
d1688a6d5   NeilBrown   md/raid5: typedef...
251
  static struct stripe_head *get_free_stripe(struct r5conf *conf)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
252
253
254
  {
  	struct stripe_head *sh = NULL;
  	struct list_head *first;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
255
256
257
258
259
260
261
262
263
264
  	if (list_empty(&conf->inactive_list))
  		goto out;
  	first = conf->inactive_list.next;
  	sh = list_entry(first, struct stripe_head, lru);
  	list_del_init(first);
  	remove_hash(sh);
  	atomic_inc(&conf->active_stripes);
  out:
  	return sh;
  }
e4e11e385   NeilBrown   md/raid5: avoid o...
265
  static void shrink_buffers(struct stripe_head *sh)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
266
267
268
  {
  	struct page *p;
  	int i;
e4e11e385   NeilBrown   md/raid5: avoid o...
269
  	int num = sh->raid_conf->pool_size;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
270

e4e11e385   NeilBrown   md/raid5: avoid o...
271
  	for (i = 0; i < num ; i++) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
272
273
274
275
  		p = sh->dev[i].page;
  		if (!p)
  			continue;
  		sh->dev[i].page = NULL;
2d1f3b5d1   NeilBrown   [PATCH] md: clean...
276
  		put_page(p);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
277
278
  	}
  }
e4e11e385   NeilBrown   md/raid5: avoid o...
279
  static int grow_buffers(struct stripe_head *sh)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
280
281
  {
  	int i;
e4e11e385   NeilBrown   md/raid5: avoid o...
282
  	int num = sh->raid_conf->pool_size;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
283

e4e11e385   NeilBrown   md/raid5: avoid o...
284
  	for (i = 0; i < num; i++) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
285
286
287
288
289
290
291
292
293
  		struct page *page;
  
  		if (!(page = alloc_page(GFP_KERNEL))) {
  			return 1;
  		}
  		sh->dev[i].page = page;
  	}
  	return 0;
  }
784052ecc   NeilBrown   md/raid5: prepare...
294
  static void raid5_build_block(struct stripe_head *sh, int i, int previous);
d1688a6d5   NeilBrown   md/raid5: typedef...
295
  static void stripe_set_idx(sector_t stripe, struct r5conf *conf, int previous,
911d4ee85   NeilBrown   md/raid5: simplif...
296
  			    struct stripe_head *sh);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
297

b5663ba40   NeilBrown   md/raid5: simplif...
298
  static void init_stripe(struct stripe_head *sh, sector_t sector, int previous)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
299
  {
d1688a6d5   NeilBrown   md/raid5: typedef...
300
  	struct r5conf *conf = sh->raid_conf;
7ecaa1e6a   NeilBrown   [PATCH] md: Infra...
301
  	int i;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
302

78bafebd4   Eric Sesterhenn   BUG_ON() Conversi...
303
304
  	BUG_ON(atomic_read(&sh->count) != 0);
  	BUG_ON(test_bit(STRIPE_HANDLE, &sh->state));
600aa1099   Dan Williams   md: replace STRIP...
305
  	BUG_ON(stripe_operations_active(sh));
d84e0f10d   Dan Williams   md: common infras...
306

45b4233ca   Dan Williams   raid5: replace cu...
307
308
  	pr_debug("init_stripe called, stripe %llu
  ",
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
309
310
311
  		(unsigned long long)sh->sector);
  
  	remove_hash(sh);
16a53ecc3   NeilBrown   [PATCH] md: merge...
312

86b42c713   NeilBrown   md/raid5: clearly...
313
  	sh->generation = conf->generation - previous;
b5663ba40   NeilBrown   md/raid5: simplif...
314
  	sh->disks = previous ? conf->previous_raid_disks : conf->raid_disks;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
315
  	sh->sector = sector;
911d4ee85   NeilBrown   md/raid5: simplif...
316
  	stripe_set_idx(sector, conf, previous, sh);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
317
  	sh->state = 0;
7ecaa1e6a   NeilBrown   [PATCH] md: Infra...
318
319
  
  	for (i = sh->disks; i--; ) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
320
  		struct r5dev *dev = &sh->dev[i];
d84e0f10d   Dan Williams   md: common infras...
321
  		if (dev->toread || dev->read || dev->towrite || dev->written ||
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
322
  		    test_bit(R5_LOCKED, &dev->flags)) {
d84e0f10d   Dan Williams   md: common infras...
323
324
  			printk(KERN_ERR "sector=%llx i=%d %p %p %p %p %d
  ",
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
325
  			       (unsigned long long)sh->sector, i, dev->toread,
d84e0f10d   Dan Williams   md: common infras...
326
  			       dev->read, dev->towrite, dev->written,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
327
  			       test_bit(R5_LOCKED, &dev->flags));
8cfa7b0f6   NeilBrown   md/raid5: Avoid B...
328
  			WARN_ON(1);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
329
330
  		}
  		dev->flags = 0;
784052ecc   NeilBrown   md/raid5: prepare...
331
  		raid5_build_block(sh, i, previous);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
332
333
334
  	}
  	insert_hash(conf, sh);
  }
d1688a6d5   NeilBrown   md/raid5: typedef...
335
  static struct stripe_head *__find_stripe(struct r5conf *conf, sector_t sector,
86b42c713   NeilBrown   md/raid5: clearly...
336
  					 short generation)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
337
338
  {
  	struct stripe_head *sh;
fccddba06   NeilBrown   [PATCH] md: tidy ...
339
  	struct hlist_node *hn;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
340

45b4233ca   Dan Williams   raid5: replace cu...
341
342
  	pr_debug("__find_stripe, sector %llu
  ", (unsigned long long)sector);
fccddba06   NeilBrown   [PATCH] md: tidy ...
343
  	hlist_for_each_entry(sh, hn, stripe_hash(conf, sector), hash)
86b42c713   NeilBrown   md/raid5: clearly...
344
  		if (sh->sector == sector && sh->generation == generation)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
345
  			return sh;
45b4233ca   Dan Williams   raid5: replace cu...
346
347
  	pr_debug("__stripe %llu not in cache
  ", (unsigned long long)sector);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
348
349
  	return NULL;
  }
674806d62   NeilBrown   md/raid5: More ca...
350
351
352
353
354
355
356
357
358
359
360
361
362
  /*
   * Need to check if array has failed when deciding whether to:
   *  - start an array
   *  - remove non-faulty devices
   *  - add a spare
   *  - allow a reshape
   * This determination is simple when no reshape is happening.
   * However if there is a reshape, we need to carefully check
   * both the before and after sections.
   * This is because some failed devices may only affect one
   * of the two sections, and some non-in_sync devices may
   * be insync in the section most affected by failed devices.
   */
d1688a6d5   NeilBrown   md/raid5: typedef...
363
  static int has_failed(struct r5conf *conf)
674806d62   NeilBrown   md/raid5: More ca...
364
365
366
367
368
369
370
371
372
  {
  	int degraded;
  	int i;
  	if (conf->mddev->reshape_position == MaxSector)
  		return conf->mddev->degraded > conf->max_degraded;
  
  	rcu_read_lock();
  	degraded = 0;
  	for (i = 0; i < conf->previous_raid_disks; i++) {
3cb030020   NeilBrown   md: removing type...
373
  		struct md_rdev *rdev = rcu_dereference(conf->disks[i].rdev);
674806d62   NeilBrown   md/raid5: More ca...
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
  		if (!rdev || test_bit(Faulty, &rdev->flags))
  			degraded++;
  		else if (test_bit(In_sync, &rdev->flags))
  			;
  		else
  			/* not in-sync or faulty.
  			 * If the reshape increases the number of devices,
  			 * this is being recovered by the reshape, so
  			 * this 'previous' section is not in_sync.
  			 * If the number of devices is being reduced however,
  			 * the device can only be part of the array if
  			 * we are reverting a reshape, so this section will
  			 * be in-sync.
  			 */
  			if (conf->raid_disks >= conf->previous_raid_disks)
  				degraded++;
  	}
  	rcu_read_unlock();
  	if (degraded > conf->max_degraded)
  		return 1;
  	rcu_read_lock();
  	degraded = 0;
  	for (i = 0; i < conf->raid_disks; i++) {
3cb030020   NeilBrown   md: removing type...
397
  		struct md_rdev *rdev = rcu_dereference(conf->disks[i].rdev);
674806d62   NeilBrown   md/raid5: More ca...
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
  		if (!rdev || test_bit(Faulty, &rdev->flags))
  			degraded++;
  		else if (test_bit(In_sync, &rdev->flags))
  			;
  		else
  			/* not in-sync or faulty.
  			 * If reshape increases the number of devices, this
  			 * section has already been recovered, else it
  			 * almost certainly hasn't.
  			 */
  			if (conf->raid_disks <= conf->previous_raid_disks)
  				degraded++;
  	}
  	rcu_read_unlock();
  	if (degraded > conf->max_degraded)
  		return 1;
  	return 0;
  }
b5663ba40   NeilBrown   md/raid5: simplif...
416
  static struct stripe_head *
d1688a6d5   NeilBrown   md/raid5: typedef...
417
  get_active_stripe(struct r5conf *conf, sector_t sector,
a8c906ca3   NeilBrown   md/raid5 - avoid ...
418
  		  int previous, int noblock, int noquiesce)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
419
420
  {
  	struct stripe_head *sh;
45b4233ca   Dan Williams   raid5: replace cu...
421
422
  	pr_debug("get_stripe, sector %llu
  ", (unsigned long long)sector);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
423
424
425
426
  
  	spin_lock_irq(&conf->device_lock);
  
  	do {
72626685d   NeilBrown   [PATCH] md: add w...
427
  		wait_event_lock_irq(conf->wait_for_stripe,
a8c906ca3   NeilBrown   md/raid5 - avoid ...
428
  				    conf->quiesce == 0 || noquiesce,
72626685d   NeilBrown   [PATCH] md: add w...
429
  				    conf->device_lock, /* nothing */);
86b42c713   NeilBrown   md/raid5: clearly...
430
  		sh = __find_stripe(conf, sector, conf->generation - previous);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
431
432
433
434
435
436
437
438
439
  		if (!sh) {
  			if (!conf->inactive_blocked)
  				sh = get_free_stripe(conf);
  			if (noblock && sh == NULL)
  				break;
  			if (!sh) {
  				conf->inactive_blocked = 1;
  				wait_event_lock_irq(conf->wait_for_stripe,
  						    !list_empty(&conf->inactive_list) &&
5036805be   NeilBrown   [PATCH] md: use c...
440
441
  						    (atomic_read(&conf->active_stripes)
  						     < (conf->max_nr_stripes *3/4)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
442
443
  						     || !conf->inactive_blocked),
  						    conf->device_lock,
7c13edc87   NeilBrown   md: incorporate n...
444
  						    );
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
445
446
  				conf->inactive_blocked = 0;
  			} else
b5663ba40   NeilBrown   md/raid5: simplif...
447
  				init_stripe(sh, sector, previous);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
448
449
  		} else {
  			if (atomic_read(&sh->count)) {
ab69ae12c   NeilBrown   md/raid5: be more...
450
451
  				BUG_ON(!list_empty(&sh->lru)
  				    && !test_bit(STRIPE_EXPANDING, &sh->state));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
452
453
454
  			} else {
  				if (!test_bit(STRIPE_HANDLE, &sh->state))
  					atomic_inc(&conf->active_stripes);
ff4e8d9a9   NeilBrown   [PATCH] md: fix r...
455
456
  				if (list_empty(&sh->lru) &&
  				    !test_bit(STRIPE_EXPANDING, &sh->state))
16a53ecc3   NeilBrown   [PATCH] md: merge...
457
458
  					BUG();
  				list_del_init(&sh->lru);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
459
460
461
462
463
464
465
466
467
468
  			}
  		}
  	} while (sh == NULL);
  
  	if (sh)
  		atomic_inc(&sh->count);
  
  	spin_unlock_irq(&conf->device_lock);
  	return sh;
  }
6712ecf8f   NeilBrown   Drop 'size' argum...
469
470
471
472
  static void
  raid5_end_read_request(struct bio *bi, int error);
  static void
  raid5_end_write_request(struct bio *bi, int error);
91c009248   Dan Williams   md: raid5_run_ops...
473

c4e5ac0a2   Dan Williams   md: use stripe_he...
474
  static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
91c009248   Dan Williams   md: raid5_run_ops...
475
  {
d1688a6d5   NeilBrown   md/raid5: typedef...
476
  	struct r5conf *conf = sh->raid_conf;
91c009248   Dan Williams   md: raid5_run_ops...
477
478
479
480
481
482
483
  	int i, disks = sh->disks;
  
  	might_sleep();
  
  	for (i = disks; i--; ) {
  		int rw;
  		struct bio *bi;
3cb030020   NeilBrown   md: removing type...
484
  		struct md_rdev *rdev;
e9c7469bb   Tejun Heo   md: implment REQ_...
485
486
487
488
489
490
  		if (test_and_clear_bit(R5_Wantwrite, &sh->dev[i].flags)) {
  			if (test_and_clear_bit(R5_WantFUA, &sh->dev[i].flags))
  				rw = WRITE_FUA;
  			else
  				rw = WRITE;
  		} else if (test_and_clear_bit(R5_Wantread, &sh->dev[i].flags))
91c009248   Dan Williams   md: raid5_run_ops...
491
492
493
494
495
496
497
  			rw = READ;
  		else
  			continue;
  
  		bi = &sh->dev[i].req;
  
  		bi->bi_rw = rw;
b062962ed   Namhyung Kim   md/raid5: fix FUA...
498
  		if (rw & WRITE)
91c009248   Dan Williams   md: raid5_run_ops...
499
500
501
502
503
504
505
506
507
508
509
  			bi->bi_end_io = raid5_end_write_request;
  		else
  			bi->bi_end_io = raid5_end_read_request;
  
  		rcu_read_lock();
  		rdev = rcu_dereference(conf->disks[i].rdev);
  		if (rdev && test_bit(Faulty, &rdev->flags))
  			rdev = NULL;
  		if (rdev)
  			atomic_inc(&rdev->nr_pending);
  		rcu_read_unlock();
73e92e51b   NeilBrown   md/raid5. Don't ...
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
  		/* We have already checked bad blocks for reads.  Now
  		 * need to check for writes.
  		 */
  		while ((rw & WRITE) && rdev &&
  		       test_bit(WriteErrorSeen, &rdev->flags)) {
  			sector_t first_bad;
  			int bad_sectors;
  			int bad = is_badblock(rdev, sh->sector, STRIPE_SECTORS,
  					      &first_bad, &bad_sectors);
  			if (!bad)
  				break;
  
  			if (bad < 0) {
  				set_bit(BlockedBadBlocks, &rdev->flags);
  				if (!conf->mddev->external &&
  				    conf->mddev->flags) {
  					/* It is very unlikely, but we might
  					 * still need to write out the
  					 * bad block log - better give it
  					 * a chance*/
  					md_check_recovery(conf->mddev);
  				}
  				md_wait_for_blocked_rdev(rdev, conf->mddev);
  			} else {
  				/* Acknowledged bad block - skip the write */
  				rdev_dec_pending(rdev, conf->mddev);
  				rdev = NULL;
  			}
  		}
91c009248   Dan Williams   md: raid5_run_ops...
539
  		if (rdev) {
c4e5ac0a2   Dan Williams   md: use stripe_he...
540
  			if (s->syncing || s->expanding || s->expanded)
91c009248   Dan Williams   md: raid5_run_ops...
541
  				md_sync_acct(rdev->bdev, STRIPE_SECTORS);
2b7497f0e   Dan Williams   md: kill STRIPE_O...
542
  			set_bit(STRIPE_IO_STARTED, &sh->state);
91c009248   Dan Williams   md: raid5_run_ops...
543
544
545
  			bi->bi_bdev = rdev->bdev;
  			pr_debug("%s: for %llu schedule op %ld on disc %d
  ",
e46b272b6   Harvey Harrison   md: replace remai...
546
  				__func__, (unsigned long long)sh->sector,
91c009248   Dan Williams   md: raid5_run_ops...
547
548
549
550
551
552
553
554
555
556
557
558
  				bi->bi_rw, i);
  			atomic_inc(&sh->count);
  			bi->bi_sector = sh->sector + rdev->data_offset;
  			bi->bi_flags = 1 << BIO_UPTODATE;
  			bi->bi_vcnt = 1;
  			bi->bi_max_vecs = 1;
  			bi->bi_idx = 0;
  			bi->bi_io_vec = &sh->dev[i].vec;
  			bi->bi_io_vec[0].bv_len = STRIPE_SIZE;
  			bi->bi_io_vec[0].bv_offset = 0;
  			bi->bi_size = STRIPE_SIZE;
  			bi->bi_next = NULL;
91c009248   Dan Williams   md: raid5_run_ops...
559
560
  			generic_make_request(bi);
  		} else {
b062962ed   Namhyung Kim   md/raid5: fix FUA...
561
  			if (rw & WRITE)
91c009248   Dan Williams   md: raid5_run_ops...
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
  				set_bit(STRIPE_DEGRADED, &sh->state);
  			pr_debug("skip op %ld on disc %d for sector %llu
  ",
  				bi->bi_rw, i, (unsigned long long)sh->sector);
  			clear_bit(R5_LOCKED, &sh->dev[i].flags);
  			set_bit(STRIPE_HANDLE, &sh->state);
  		}
  	}
  }
  
  static struct dma_async_tx_descriptor *
  async_copy_data(int frombio, struct bio *bio, struct page *page,
  	sector_t sector, struct dma_async_tx_descriptor *tx)
  {
  	struct bio_vec *bvl;
  	struct page *bio_page;
  	int i;
  	int page_offset;
a08abd8ca   Dan Williams   async_tx: structi...
580
  	struct async_submit_ctl submit;
0403e3827   Dan Williams   dmaengine: add fe...
581
  	enum async_tx_flags flags = 0;
91c009248   Dan Williams   md: raid5_run_ops...
582
583
584
585
586
  
  	if (bio->bi_sector >= sector)
  		page_offset = (signed)(bio->bi_sector - sector) * 512;
  	else
  		page_offset = (signed)(sector - bio->bi_sector) * -512;
a08abd8ca   Dan Williams   async_tx: structi...
587

0403e3827   Dan Williams   dmaengine: add fe...
588
589
590
  	if (frombio)
  		flags |= ASYNC_TX_FENCE;
  	init_async_submit(&submit, flags, tx, NULL, NULL, NULL);
91c009248   Dan Williams   md: raid5_run_ops...
591
  	bio_for_each_segment(bvl, bio, i) {
fcde90759   Namhyung Kim   md/raid5: remove ...
592
  		int len = bvl->bv_len;
91c009248   Dan Williams   md: raid5_run_ops...
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
  		int clen;
  		int b_offset = 0;
  
  		if (page_offset < 0) {
  			b_offset = -page_offset;
  			page_offset += b_offset;
  			len -= b_offset;
  		}
  
  		if (len > 0 && page_offset + len > STRIPE_SIZE)
  			clen = STRIPE_SIZE - page_offset;
  		else
  			clen = len;
  
  		if (clen > 0) {
fcde90759   Namhyung Kim   md/raid5: remove ...
608
609
  			b_offset += bvl->bv_offset;
  			bio_page = bvl->bv_page;
91c009248   Dan Williams   md: raid5_run_ops...
610
611
  			if (frombio)
  				tx = async_memcpy(page, bio_page, page_offset,
a08abd8ca   Dan Williams   async_tx: structi...
612
  						  b_offset, clen, &submit);
91c009248   Dan Williams   md: raid5_run_ops...
613
614
  			else
  				tx = async_memcpy(bio_page, page, b_offset,
a08abd8ca   Dan Williams   async_tx: structi...
615
  						  page_offset, clen, &submit);
91c009248   Dan Williams   md: raid5_run_ops...
616
  		}
a08abd8ca   Dan Williams   async_tx: structi...
617
618
  		/* chain the operations */
  		submit.depend_tx = tx;
91c009248   Dan Williams   md: raid5_run_ops...
619
620
621
622
623
624
625
626
627
628
629
630
  		if (clen < len) /* hit end of page */
  			break;
  		page_offset +=  len;
  	}
  
  	return tx;
  }
  
  static void ops_complete_biofill(void *stripe_head_ref)
  {
  	struct stripe_head *sh = stripe_head_ref;
  	struct bio *return_bi = NULL;
d1688a6d5   NeilBrown   md/raid5: typedef...
631
  	struct r5conf *conf = sh->raid_conf;
e4d84909d   Dan Williams   raid5: fix 2 bugs...
632
  	int i;
91c009248   Dan Williams   md: raid5_run_ops...
633

e46b272b6   Harvey Harrison   md: replace remai...
634
635
  	pr_debug("%s: stripe %llu
  ", __func__,
91c009248   Dan Williams   md: raid5_run_ops...
636
637
638
  		(unsigned long long)sh->sector);
  
  	/* clear completed biofills */
83de75cc9   Dan Williams   md: replace STRIP...
639
  	spin_lock_irq(&conf->device_lock);
91c009248   Dan Williams   md: raid5_run_ops...
640
641
  	for (i = sh->disks; i--; ) {
  		struct r5dev *dev = &sh->dev[i];
91c009248   Dan Williams   md: raid5_run_ops...
642
643
  
  		/* acknowledge completion of a biofill operation */
e4d84909d   Dan Williams   raid5: fix 2 bugs...
644
645
  		/* and check if we need to reply to a read request,
  		 * new R5_Wantfill requests are held off until
83de75cc9   Dan Williams   md: replace STRIP...
646
  		 * !STRIPE_BIOFILL_RUN
e4d84909d   Dan Williams   raid5: fix 2 bugs...
647
648
  		 */
  		if (test_and_clear_bit(R5_Wantfill, &dev->flags)) {
91c009248   Dan Williams   md: raid5_run_ops...
649
  			struct bio *rbi, *rbi2;
91c009248   Dan Williams   md: raid5_run_ops...
650

91c009248   Dan Williams   md: raid5_run_ops...
651
652
653
654
655
656
  			BUG_ON(!dev->read);
  			rbi = dev->read;
  			dev->read = NULL;
  			while (rbi && rbi->bi_sector <
  				dev->sector + STRIPE_SECTORS) {
  				rbi2 = r5_next_bio(rbi, dev->sector);
960e739d9   Jens Axboe   block: raid fixup...
657
  				if (!raid5_dec_bi_phys_segments(rbi)) {
91c009248   Dan Williams   md: raid5_run_ops...
658
659
660
  					rbi->bi_next = return_bi;
  					return_bi = rbi;
  				}
91c009248   Dan Williams   md: raid5_run_ops...
661
662
663
664
  				rbi = rbi2;
  			}
  		}
  	}
83de75cc9   Dan Williams   md: replace STRIP...
665
666
  	spin_unlock_irq(&conf->device_lock);
  	clear_bit(STRIPE_BIOFILL_RUN, &sh->state);
91c009248   Dan Williams   md: raid5_run_ops...
667
668
  
  	return_io(return_bi);
e4d84909d   Dan Williams   raid5: fix 2 bugs...
669
  	set_bit(STRIPE_HANDLE, &sh->state);
91c009248   Dan Williams   md: raid5_run_ops...
670
671
672
673
674
675
  	release_stripe(sh);
  }
  
  static void ops_run_biofill(struct stripe_head *sh)
  {
  	struct dma_async_tx_descriptor *tx = NULL;
d1688a6d5   NeilBrown   md/raid5: typedef...
676
  	struct r5conf *conf = sh->raid_conf;
a08abd8ca   Dan Williams   async_tx: structi...
677
  	struct async_submit_ctl submit;
91c009248   Dan Williams   md: raid5_run_ops...
678
  	int i;
e46b272b6   Harvey Harrison   md: replace remai...
679
680
  	pr_debug("%s: stripe %llu
  ", __func__,
91c009248   Dan Williams   md: raid5_run_ops...
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
  		(unsigned long long)sh->sector);
  
  	for (i = sh->disks; i--; ) {
  		struct r5dev *dev = &sh->dev[i];
  		if (test_bit(R5_Wantfill, &dev->flags)) {
  			struct bio *rbi;
  			spin_lock_irq(&conf->device_lock);
  			dev->read = rbi = dev->toread;
  			dev->toread = NULL;
  			spin_unlock_irq(&conf->device_lock);
  			while (rbi && rbi->bi_sector <
  				dev->sector + STRIPE_SECTORS) {
  				tx = async_copy_data(0, rbi, dev->page,
  					dev->sector, tx);
  				rbi = r5_next_bio(rbi, dev->sector);
  			}
  		}
  	}
  
  	atomic_inc(&sh->count);
a08abd8ca   Dan Williams   async_tx: structi...
701
702
  	init_async_submit(&submit, ASYNC_TX_ACK, tx, ops_complete_biofill, sh, NULL);
  	async_trigger_callback(&submit);
91c009248   Dan Williams   md: raid5_run_ops...
703
  }
4e7d2c0ae   Dan Williams   md/raid5: factor ...
704
  static void mark_target_uptodate(struct stripe_head *sh, int target)
91c009248   Dan Williams   md: raid5_run_ops...
705
  {
4e7d2c0ae   Dan Williams   md/raid5: factor ...
706
  	struct r5dev *tgt;
91c009248   Dan Williams   md: raid5_run_ops...
707

4e7d2c0ae   Dan Williams   md/raid5: factor ...
708
709
  	if (target < 0)
  		return;
91c009248   Dan Williams   md: raid5_run_ops...
710

4e7d2c0ae   Dan Williams   md/raid5: factor ...
711
  	tgt = &sh->dev[target];
91c009248   Dan Williams   md: raid5_run_ops...
712
713
714
  	set_bit(R5_UPTODATE, &tgt->flags);
  	BUG_ON(!test_bit(R5_Wantcompute, &tgt->flags));
  	clear_bit(R5_Wantcompute, &tgt->flags);
4e7d2c0ae   Dan Williams   md/raid5: factor ...
715
  }
ac6b53b6e   Dan Williams   md/raid6: asynchr...
716
  static void ops_complete_compute(void *stripe_head_ref)
91c009248   Dan Williams   md: raid5_run_ops...
717
718
  {
  	struct stripe_head *sh = stripe_head_ref;
91c009248   Dan Williams   md: raid5_run_ops...
719

e46b272b6   Harvey Harrison   md: replace remai...
720
721
  	pr_debug("%s: stripe %llu
  ", __func__,
91c009248   Dan Williams   md: raid5_run_ops...
722
  		(unsigned long long)sh->sector);
ac6b53b6e   Dan Williams   md/raid6: asynchr...
723
  	/* mark the computed target(s) as uptodate */
4e7d2c0ae   Dan Williams   md/raid5: factor ...
724
  	mark_target_uptodate(sh, sh->ops.target);
ac6b53b6e   Dan Williams   md/raid6: asynchr...
725
  	mark_target_uptodate(sh, sh->ops.target2);
4e7d2c0ae   Dan Williams   md/raid5: factor ...
726

ecc65c9b3   Dan Williams   md: replace STRIP...
727
728
729
  	clear_bit(STRIPE_COMPUTE_RUN, &sh->state);
  	if (sh->check_state == check_state_compute_run)
  		sh->check_state = check_state_compute_result;
91c009248   Dan Williams   md: raid5_run_ops...
730
731
732
  	set_bit(STRIPE_HANDLE, &sh->state);
  	release_stripe(sh);
  }
d6f38f31f   Dan Williams   md/raid5,6: add p...
733
734
735
736
737
738
739
740
741
  /* return a pointer to the address conversion region of the scribble buffer */
  static addr_conv_t *to_addr_conv(struct stripe_head *sh,
  				 struct raid5_percpu *percpu)
  {
  	return percpu->scribble + sizeof(struct page *) * (sh->disks + 2);
  }
  
  static struct dma_async_tx_descriptor *
  ops_run_compute5(struct stripe_head *sh, struct raid5_percpu *percpu)
91c009248   Dan Williams   md: raid5_run_ops...
742
  {
91c009248   Dan Williams   md: raid5_run_ops...
743
  	int disks = sh->disks;
d6f38f31f   Dan Williams   md/raid5,6: add p...
744
  	struct page **xor_srcs = percpu->scribble;
91c009248   Dan Williams   md: raid5_run_ops...
745
746
747
748
749
  	int target = sh->ops.target;
  	struct r5dev *tgt = &sh->dev[target];
  	struct page *xor_dest = tgt->page;
  	int count = 0;
  	struct dma_async_tx_descriptor *tx;
a08abd8ca   Dan Williams   async_tx: structi...
750
  	struct async_submit_ctl submit;
91c009248   Dan Williams   md: raid5_run_ops...
751
752
753
754
  	int i;
  
  	pr_debug("%s: stripe %llu block: %d
  ",
e46b272b6   Harvey Harrison   md: replace remai...
755
  		__func__, (unsigned long long)sh->sector, target);
91c009248   Dan Williams   md: raid5_run_ops...
756
757
758
759
760
761
762
  	BUG_ON(!test_bit(R5_Wantcompute, &tgt->flags));
  
  	for (i = disks; i--; )
  		if (i != target)
  			xor_srcs[count++] = sh->dev[i].page;
  
  	atomic_inc(&sh->count);
0403e3827   Dan Williams   dmaengine: add fe...
763
  	init_async_submit(&submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_ZERO_DST, NULL,
ac6b53b6e   Dan Williams   md/raid6: asynchr...
764
  			  ops_complete_compute, sh, to_addr_conv(sh, percpu));
91c009248   Dan Williams   md: raid5_run_ops...
765
  	if (unlikely(count == 1))
a08abd8ca   Dan Williams   async_tx: structi...
766
  		tx = async_memcpy(xor_dest, xor_srcs[0], 0, 0, STRIPE_SIZE, &submit);
91c009248   Dan Williams   md: raid5_run_ops...
767
  	else
a08abd8ca   Dan Williams   async_tx: structi...
768
  		tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, &submit);
91c009248   Dan Williams   md: raid5_run_ops...
769

91c009248   Dan Williams   md: raid5_run_ops...
770
771
  	return tx;
  }
ac6b53b6e   Dan Williams   md/raid6: asynchr...
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
  /* set_syndrome_sources - populate source buffers for gen_syndrome
   * @srcs - (struct page *) array of size sh->disks
   * @sh - stripe_head to parse
   *
   * Populates srcs in proper layout order for the stripe and returns the
   * 'count' of sources to be used in a call to async_gen_syndrome.  The P
   * destination buffer is recorded in srcs[count] and the Q destination
   * is recorded in srcs[count+1]].
   */
  static int set_syndrome_sources(struct page **srcs, struct stripe_head *sh)
  {
  	int disks = sh->disks;
  	int syndrome_disks = sh->ddf_layout ? disks : (disks - 2);
  	int d0_idx = raid6_d0(sh);
  	int count;
  	int i;
  
  	for (i = 0; i < disks; i++)
5dd33c9a4   NeilBrown   md/async: don't p...
790
  		srcs[i] = NULL;
ac6b53b6e   Dan Williams   md/raid6: asynchr...
791
792
793
794
795
796
797
798
799
  
  	count = 0;
  	i = d0_idx;
  	do {
  		int slot = raid6_idx_to_slot(i, sh, &count, syndrome_disks);
  
  		srcs[slot] = sh->dev[i].page;
  		i = raid6_next_disk(i, disks);
  	} while (i != d0_idx);
ac6b53b6e   Dan Williams   md/raid6: asynchr...
800

e4424fee1   NeilBrown   md: fix problems ...
801
  	return syndrome_disks;
ac6b53b6e   Dan Williams   md/raid6: asynchr...
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
  }
  
  static struct dma_async_tx_descriptor *
  ops_run_compute6_1(struct stripe_head *sh, struct raid5_percpu *percpu)
  {
  	int disks = sh->disks;
  	struct page **blocks = percpu->scribble;
  	int target;
  	int qd_idx = sh->qd_idx;
  	struct dma_async_tx_descriptor *tx;
  	struct async_submit_ctl submit;
  	struct r5dev *tgt;
  	struct page *dest;
  	int i;
  	int count;
  
  	if (sh->ops.target < 0)
  		target = sh->ops.target2;
  	else if (sh->ops.target2 < 0)
  		target = sh->ops.target;
91c009248   Dan Williams   md: raid5_run_ops...
822
  	else
ac6b53b6e   Dan Williams   md/raid6: asynchr...
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
  		/* we should only have one valid target */
  		BUG();
  	BUG_ON(target < 0);
  	pr_debug("%s: stripe %llu block: %d
  ",
  		__func__, (unsigned long long)sh->sector, target);
  
  	tgt = &sh->dev[target];
  	BUG_ON(!test_bit(R5_Wantcompute, &tgt->flags));
  	dest = tgt->page;
  
  	atomic_inc(&sh->count);
  
  	if (target == qd_idx) {
  		count = set_syndrome_sources(blocks, sh);
  		blocks[count] = NULL; /* regenerating p is not necessary */
  		BUG_ON(blocks[count+1] != dest); /* q should already be set */
0403e3827   Dan Williams   dmaengine: add fe...
840
841
  		init_async_submit(&submit, ASYNC_TX_FENCE, NULL,
  				  ops_complete_compute, sh,
ac6b53b6e   Dan Williams   md/raid6: asynchr...
842
843
844
845
846
847
848
849
850
851
  				  to_addr_conv(sh, percpu));
  		tx = async_gen_syndrome(blocks, 0, count+2, STRIPE_SIZE, &submit);
  	} else {
  		/* Compute any data- or p-drive using XOR */
  		count = 0;
  		for (i = disks; i-- ; ) {
  			if (i == target || i == qd_idx)
  				continue;
  			blocks[count++] = sh->dev[i].page;
  		}
0403e3827   Dan Williams   dmaengine: add fe...
852
853
  		init_async_submit(&submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_ZERO_DST,
  				  NULL, ops_complete_compute, sh,
ac6b53b6e   Dan Williams   md/raid6: asynchr...
854
855
856
  				  to_addr_conv(sh, percpu));
  		tx = async_xor(dest, blocks, 0, count, STRIPE_SIZE, &submit);
  	}
91c009248   Dan Williams   md: raid5_run_ops...
857

91c009248   Dan Williams   md: raid5_run_ops...
858
859
  	return tx;
  }
ac6b53b6e   Dan Williams   md/raid6: asynchr...
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
  static struct dma_async_tx_descriptor *
  ops_run_compute6_2(struct stripe_head *sh, struct raid5_percpu *percpu)
  {
  	int i, count, disks = sh->disks;
  	int syndrome_disks = sh->ddf_layout ? disks : disks-2;
  	int d0_idx = raid6_d0(sh);
  	int faila = -1, failb = -1;
  	int target = sh->ops.target;
  	int target2 = sh->ops.target2;
  	struct r5dev *tgt = &sh->dev[target];
  	struct r5dev *tgt2 = &sh->dev[target2];
  	struct dma_async_tx_descriptor *tx;
  	struct page **blocks = percpu->scribble;
  	struct async_submit_ctl submit;
  
  	pr_debug("%s: stripe %llu block1: %d block2: %d
  ",
  		 __func__, (unsigned long long)sh->sector, target, target2);
  	BUG_ON(target < 0 || target2 < 0);
  	BUG_ON(!test_bit(R5_Wantcompute, &tgt->flags));
  	BUG_ON(!test_bit(R5_Wantcompute, &tgt2->flags));
6c910a78e   Dan Williams   md/raid6: cleanup...
881
  	/* we need to open-code set_syndrome_sources to handle the
ac6b53b6e   Dan Williams   md/raid6: asynchr...
882
883
884
  	 * slot number conversion for 'faila' and 'failb'
  	 */
  	for (i = 0; i < disks ; i++)
5dd33c9a4   NeilBrown   md/async: don't p...
885
  		blocks[i] = NULL;
ac6b53b6e   Dan Williams   md/raid6: asynchr...
886
887
888
889
890
891
892
893
894
895
896
897
898
  	count = 0;
  	i = d0_idx;
  	do {
  		int slot = raid6_idx_to_slot(i, sh, &count, syndrome_disks);
  
  		blocks[slot] = sh->dev[i].page;
  
  		if (i == target)
  			faila = slot;
  		if (i == target2)
  			failb = slot;
  		i = raid6_next_disk(i, disks);
  	} while (i != d0_idx);
ac6b53b6e   Dan Williams   md/raid6: asynchr...
899
900
901
902
903
904
905
906
907
908
909
910
911
912
  
  	BUG_ON(faila == failb);
  	if (failb < faila)
  		swap(faila, failb);
  	pr_debug("%s: stripe: %llu faila: %d failb: %d
  ",
  		 __func__, (unsigned long long)sh->sector, faila, failb);
  
  	atomic_inc(&sh->count);
  
  	if (failb == syndrome_disks+1) {
  		/* Q disk is one of the missing disks */
  		if (faila == syndrome_disks) {
  			/* Missing P+Q, just recompute */
0403e3827   Dan Williams   dmaengine: add fe...
913
914
915
  			init_async_submit(&submit, ASYNC_TX_FENCE, NULL,
  					  ops_complete_compute, sh,
  					  to_addr_conv(sh, percpu));
e4424fee1   NeilBrown   md: fix problems ...
916
  			return async_gen_syndrome(blocks, 0, syndrome_disks+2,
ac6b53b6e   Dan Williams   md/raid6: asynchr...
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
  						  STRIPE_SIZE, &submit);
  		} else {
  			struct page *dest;
  			int data_target;
  			int qd_idx = sh->qd_idx;
  
  			/* Missing D+Q: recompute D from P, then recompute Q */
  			if (target == qd_idx)
  				data_target = target2;
  			else
  				data_target = target;
  
  			count = 0;
  			for (i = disks; i-- ; ) {
  				if (i == data_target || i == qd_idx)
  					continue;
  				blocks[count++] = sh->dev[i].page;
  			}
  			dest = sh->dev[data_target].page;
0403e3827   Dan Williams   dmaengine: add fe...
936
937
938
939
  			init_async_submit(&submit,
  					  ASYNC_TX_FENCE|ASYNC_TX_XOR_ZERO_DST,
  					  NULL, NULL, NULL,
  					  to_addr_conv(sh, percpu));
ac6b53b6e   Dan Williams   md/raid6: asynchr...
940
941
942
943
  			tx = async_xor(dest, blocks, 0, count, STRIPE_SIZE,
  				       &submit);
  
  			count = set_syndrome_sources(blocks, sh);
0403e3827   Dan Williams   dmaengine: add fe...
944
945
946
  			init_async_submit(&submit, ASYNC_TX_FENCE, tx,
  					  ops_complete_compute, sh,
  					  to_addr_conv(sh, percpu));
ac6b53b6e   Dan Williams   md/raid6: asynchr...
947
948
949
  			return async_gen_syndrome(blocks, 0, count+2,
  						  STRIPE_SIZE, &submit);
  		}
ac6b53b6e   Dan Williams   md/raid6: asynchr...
950
  	} else {
6c910a78e   Dan Williams   md/raid6: cleanup...
951
952
953
954
955
956
957
958
959
960
961
962
963
964
  		init_async_submit(&submit, ASYNC_TX_FENCE, NULL,
  				  ops_complete_compute, sh,
  				  to_addr_conv(sh, percpu));
  		if (failb == syndrome_disks) {
  			/* We're missing D+P. */
  			return async_raid6_datap_recov(syndrome_disks+2,
  						       STRIPE_SIZE, faila,
  						       blocks, &submit);
  		} else {
  			/* We're missing D+D. */
  			return async_raid6_2data_recov(syndrome_disks+2,
  						       STRIPE_SIZE, faila, failb,
  						       blocks, &submit);
  		}
ac6b53b6e   Dan Williams   md/raid6: asynchr...
965
966
  	}
  }
91c009248   Dan Williams   md: raid5_run_ops...
967
968
969
  static void ops_complete_prexor(void *stripe_head_ref)
  {
  	struct stripe_head *sh = stripe_head_ref;
e46b272b6   Harvey Harrison   md: replace remai...
970
971
  	pr_debug("%s: stripe %llu
  ", __func__,
91c009248   Dan Williams   md: raid5_run_ops...
972
  		(unsigned long long)sh->sector);
91c009248   Dan Williams   md: raid5_run_ops...
973
974
975
  }
  
  static struct dma_async_tx_descriptor *
d6f38f31f   Dan Williams   md/raid5,6: add p...
976
977
  ops_run_prexor(struct stripe_head *sh, struct raid5_percpu *percpu,
  	       struct dma_async_tx_descriptor *tx)
91c009248   Dan Williams   md: raid5_run_ops...
978
  {
91c009248   Dan Williams   md: raid5_run_ops...
979
  	int disks = sh->disks;
d6f38f31f   Dan Williams   md/raid5,6: add p...
980
  	struct page **xor_srcs = percpu->scribble;
91c009248   Dan Williams   md: raid5_run_ops...
981
  	int count = 0, pd_idx = sh->pd_idx, i;
a08abd8ca   Dan Williams   async_tx: structi...
982
  	struct async_submit_ctl submit;
91c009248   Dan Williams   md: raid5_run_ops...
983
984
985
  
  	/* existing parity data subtracted */
  	struct page *xor_dest = xor_srcs[count++] = sh->dev[pd_idx].page;
e46b272b6   Harvey Harrison   md: replace remai...
986
987
  	pr_debug("%s: stripe %llu
  ", __func__,
91c009248   Dan Williams   md: raid5_run_ops...
988
989
990
991
992
  		(unsigned long long)sh->sector);
  
  	for (i = disks; i--; ) {
  		struct r5dev *dev = &sh->dev[i];
  		/* Only process blocks that are known to be uptodate */
d8ee0728b   Dan Williams   md: replace R5_Wa...
993
  		if (test_bit(R5_Wantdrain, &dev->flags))
91c009248   Dan Williams   md: raid5_run_ops...
994
995
  			xor_srcs[count++] = dev->page;
  	}
0403e3827   Dan Williams   dmaengine: add fe...
996
  	init_async_submit(&submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_DROP_DST, tx,
d6f38f31f   Dan Williams   md/raid5,6: add p...
997
  			  ops_complete_prexor, sh, to_addr_conv(sh, percpu));
a08abd8ca   Dan Williams   async_tx: structi...
998
  	tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, &submit);
91c009248   Dan Williams   md: raid5_run_ops...
999
1000
1001
1002
1003
  
  	return tx;
  }
  
  static struct dma_async_tx_descriptor *
d8ee0728b   Dan Williams   md: replace R5_Wa...
1004
  ops_run_biodrain(struct stripe_head *sh, struct dma_async_tx_descriptor *tx)
91c009248   Dan Williams   md: raid5_run_ops...
1005
1006
  {
  	int disks = sh->disks;
d8ee0728b   Dan Williams   md: replace R5_Wa...
1007
  	int i;
91c009248   Dan Williams   md: raid5_run_ops...
1008

e46b272b6   Harvey Harrison   md: replace remai...
1009
1010
  	pr_debug("%s: stripe %llu
  ", __func__,
91c009248   Dan Williams   md: raid5_run_ops...
1011
1012
1013
1014
1015
  		(unsigned long long)sh->sector);
  
  	for (i = disks; i--; ) {
  		struct r5dev *dev = &sh->dev[i];
  		struct bio *chosen;
91c009248   Dan Williams   md: raid5_run_ops...
1016

d8ee0728b   Dan Williams   md: replace R5_Wa...
1017
  		if (test_and_clear_bit(R5_Wantdrain, &dev->flags)) {
91c009248   Dan Williams   md: raid5_run_ops...
1018
  			struct bio *wbi;
cbe47ec55   NeilBrown   md/raid5: Protect...
1019
  			spin_lock_irq(&sh->raid_conf->device_lock);
91c009248   Dan Williams   md: raid5_run_ops...
1020
1021
1022
1023
  			chosen = dev->towrite;
  			dev->towrite = NULL;
  			BUG_ON(dev->written);
  			wbi = dev->written = chosen;
cbe47ec55   NeilBrown   md/raid5: Protect...
1024
  			spin_unlock_irq(&sh->raid_conf->device_lock);
91c009248   Dan Williams   md: raid5_run_ops...
1025
1026
1027
  
  			while (wbi && wbi->bi_sector <
  				dev->sector + STRIPE_SECTORS) {
e9c7469bb   Tejun Heo   md: implment REQ_...
1028
1029
  				if (wbi->bi_rw & REQ_FUA)
  					set_bit(R5_WantFUA, &dev->flags);
91c009248   Dan Williams   md: raid5_run_ops...
1030
1031
1032
1033
1034
1035
1036
1037
1038
  				tx = async_copy_data(1, wbi, dev->page,
  					dev->sector, tx);
  				wbi = r5_next_bio(wbi, dev->sector);
  			}
  		}
  	}
  
  	return tx;
  }
ac6b53b6e   Dan Williams   md/raid6: asynchr...
1039
  static void ops_complete_reconstruct(void *stripe_head_ref)
91c009248   Dan Williams   md: raid5_run_ops...
1040
1041
  {
  	struct stripe_head *sh = stripe_head_ref;
ac6b53b6e   Dan Williams   md/raid6: asynchr...
1042
1043
1044
1045
  	int disks = sh->disks;
  	int pd_idx = sh->pd_idx;
  	int qd_idx = sh->qd_idx;
  	int i;
e9c7469bb   Tejun Heo   md: implment REQ_...
1046
  	bool fua = false;
91c009248   Dan Williams   md: raid5_run_ops...
1047

e46b272b6   Harvey Harrison   md: replace remai...
1048
1049
  	pr_debug("%s: stripe %llu
  ", __func__,
91c009248   Dan Williams   md: raid5_run_ops...
1050
  		(unsigned long long)sh->sector);
e9c7469bb   Tejun Heo   md: implment REQ_...
1051
1052
  	for (i = disks; i--; )
  		fua |= test_bit(R5_WantFUA, &sh->dev[i].flags);
91c009248   Dan Williams   md: raid5_run_ops...
1053
1054
  	for (i = disks; i--; ) {
  		struct r5dev *dev = &sh->dev[i];
ac6b53b6e   Dan Williams   md/raid6: asynchr...
1055

e9c7469bb   Tejun Heo   md: implment REQ_...
1056
  		if (dev->written || i == pd_idx || i == qd_idx) {
91c009248   Dan Williams   md: raid5_run_ops...
1057
  			set_bit(R5_UPTODATE, &dev->flags);
e9c7469bb   Tejun Heo   md: implment REQ_...
1058
1059
1060
  			if (fua)
  				set_bit(R5_WantFUA, &dev->flags);
  		}
91c009248   Dan Williams   md: raid5_run_ops...
1061
  	}
d8ee0728b   Dan Williams   md: replace R5_Wa...
1062
1063
1064
1065
1066
1067
1068
1069
  	if (sh->reconstruct_state == reconstruct_state_drain_run)
  		sh->reconstruct_state = reconstruct_state_drain_result;
  	else if (sh->reconstruct_state == reconstruct_state_prexor_drain_run)
  		sh->reconstruct_state = reconstruct_state_prexor_drain_result;
  	else {
  		BUG_ON(sh->reconstruct_state != reconstruct_state_run);
  		sh->reconstruct_state = reconstruct_state_result;
  	}
91c009248   Dan Williams   md: raid5_run_ops...
1070
1071
1072
1073
1074
1075
  
  	set_bit(STRIPE_HANDLE, &sh->state);
  	release_stripe(sh);
  }
  
  static void
ac6b53b6e   Dan Williams   md/raid6: asynchr...
1076
1077
  ops_run_reconstruct5(struct stripe_head *sh, struct raid5_percpu *percpu,
  		     struct dma_async_tx_descriptor *tx)
91c009248   Dan Williams   md: raid5_run_ops...
1078
  {
91c009248   Dan Williams   md: raid5_run_ops...
1079
  	int disks = sh->disks;
d6f38f31f   Dan Williams   md/raid5,6: add p...
1080
  	struct page **xor_srcs = percpu->scribble;
a08abd8ca   Dan Williams   async_tx: structi...
1081
  	struct async_submit_ctl submit;
91c009248   Dan Williams   md: raid5_run_ops...
1082
1083
  	int count = 0, pd_idx = sh->pd_idx, i;
  	struct page *xor_dest;
d8ee0728b   Dan Williams   md: replace R5_Wa...
1084
  	int prexor = 0;
91c009248   Dan Williams   md: raid5_run_ops...
1085
  	unsigned long flags;
91c009248   Dan Williams   md: raid5_run_ops...
1086

e46b272b6   Harvey Harrison   md: replace remai...
1087
1088
  	pr_debug("%s: stripe %llu
  ", __func__,
91c009248   Dan Williams   md: raid5_run_ops...
1089
1090
1091
1092
1093
  		(unsigned long long)sh->sector);
  
  	/* check if prexor is active which means only process blocks
  	 * that are part of a read-modify-write (written)
  	 */
d8ee0728b   Dan Williams   md: replace R5_Wa...
1094
1095
  	if (sh->reconstruct_state == reconstruct_state_prexor_drain_run) {
  		prexor = 1;
91c009248   Dan Williams   md: raid5_run_ops...
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
  		xor_dest = xor_srcs[count++] = sh->dev[pd_idx].page;
  		for (i = disks; i--; ) {
  			struct r5dev *dev = &sh->dev[i];
  			if (dev->written)
  				xor_srcs[count++] = dev->page;
  		}
  	} else {
  		xor_dest = sh->dev[pd_idx].page;
  		for (i = disks; i--; ) {
  			struct r5dev *dev = &sh->dev[i];
  			if (i != pd_idx)
  				xor_srcs[count++] = dev->page;
  		}
  	}
91c009248   Dan Williams   md: raid5_run_ops...
1110
1111
1112
1113
1114
  	/* 1/ if we prexor'd then the dest is reused as a source
  	 * 2/ if we did not prexor then we are redoing the parity
  	 * set ASYNC_TX_XOR_DROP_DST and ASYNC_TX_XOR_ZERO_DST
  	 * for the synchronous xor case
  	 */
88ba2aa58   Dan Williams   async_tx: kill AS...
1115
  	flags = ASYNC_TX_ACK |
91c009248   Dan Williams   md: raid5_run_ops...
1116
1117
1118
  		(prexor ? ASYNC_TX_XOR_DROP_DST : ASYNC_TX_XOR_ZERO_DST);
  
  	atomic_inc(&sh->count);
ac6b53b6e   Dan Williams   md/raid6: asynchr...
1119
  	init_async_submit(&submit, flags, tx, ops_complete_reconstruct, sh,
d6f38f31f   Dan Williams   md/raid5,6: add p...
1120
  			  to_addr_conv(sh, percpu));
a08abd8ca   Dan Williams   async_tx: structi...
1121
1122
1123
1124
  	if (unlikely(count == 1))
  		tx = async_memcpy(xor_dest, xor_srcs[0], 0, 0, STRIPE_SIZE, &submit);
  	else
  		tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, &submit);
91c009248   Dan Williams   md: raid5_run_ops...
1125
  }
ac6b53b6e   Dan Williams   md/raid6: asynchr...
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
  static void
  ops_run_reconstruct6(struct stripe_head *sh, struct raid5_percpu *percpu,
  		     struct dma_async_tx_descriptor *tx)
  {
  	struct async_submit_ctl submit;
  	struct page **blocks = percpu->scribble;
  	int count;
  
  	pr_debug("%s: stripe %llu
  ", __func__, (unsigned long long)sh->sector);
  
  	count = set_syndrome_sources(blocks, sh);
  
  	atomic_inc(&sh->count);
  
  	init_async_submit(&submit, ASYNC_TX_ACK, tx, ops_complete_reconstruct,
  			  sh, to_addr_conv(sh, percpu));
  	async_gen_syndrome(blocks, 0, count+2, STRIPE_SIZE,  &submit);
91c009248   Dan Williams   md: raid5_run_ops...
1144
1145
1146
1147
1148
  }
  
  static void ops_complete_check(void *stripe_head_ref)
  {
  	struct stripe_head *sh = stripe_head_ref;
91c009248   Dan Williams   md: raid5_run_ops...
1149

e46b272b6   Harvey Harrison   md: replace remai...
1150
1151
  	pr_debug("%s: stripe %llu
  ", __func__,
91c009248   Dan Williams   md: raid5_run_ops...
1152
  		(unsigned long long)sh->sector);
ecc65c9b3   Dan Williams   md: replace STRIP...
1153
  	sh->check_state = check_state_check_result;
91c009248   Dan Williams   md: raid5_run_ops...
1154
1155
1156
  	set_bit(STRIPE_HANDLE, &sh->state);
  	release_stripe(sh);
  }
ac6b53b6e   Dan Williams   md/raid6: asynchr...
1157
  static void ops_run_check_p(struct stripe_head *sh, struct raid5_percpu *percpu)
91c009248   Dan Williams   md: raid5_run_ops...
1158
  {
91c009248   Dan Williams   md: raid5_run_ops...
1159
  	int disks = sh->disks;
ac6b53b6e   Dan Williams   md/raid6: asynchr...
1160
1161
1162
  	int pd_idx = sh->pd_idx;
  	int qd_idx = sh->qd_idx;
  	struct page *xor_dest;
d6f38f31f   Dan Williams   md/raid5,6: add p...
1163
  	struct page **xor_srcs = percpu->scribble;
91c009248   Dan Williams   md: raid5_run_ops...
1164
  	struct dma_async_tx_descriptor *tx;
a08abd8ca   Dan Williams   async_tx: structi...
1165
  	struct async_submit_ctl submit;
ac6b53b6e   Dan Williams   md/raid6: asynchr...
1166
1167
  	int count;
  	int i;
91c009248   Dan Williams   md: raid5_run_ops...
1168

e46b272b6   Harvey Harrison   md: replace remai...
1169
1170
  	pr_debug("%s: stripe %llu
  ", __func__,
91c009248   Dan Williams   md: raid5_run_ops...
1171
  		(unsigned long long)sh->sector);
ac6b53b6e   Dan Williams   md/raid6: asynchr...
1172
1173
1174
  	count = 0;
  	xor_dest = sh->dev[pd_idx].page;
  	xor_srcs[count++] = xor_dest;
91c009248   Dan Williams   md: raid5_run_ops...
1175
  	for (i = disks; i--; ) {
ac6b53b6e   Dan Williams   md/raid6: asynchr...
1176
1177
1178
  		if (i == pd_idx || i == qd_idx)
  			continue;
  		xor_srcs[count++] = sh->dev[i].page;
91c009248   Dan Williams   md: raid5_run_ops...
1179
  	}
d6f38f31f   Dan Williams   md/raid5,6: add p...
1180
1181
  	init_async_submit(&submit, 0, NULL, NULL, NULL,
  			  to_addr_conv(sh, percpu));
099f53cb5   Dan Williams   async_tx: rename ...
1182
  	tx = async_xor_val(xor_dest, xor_srcs, 0, count, STRIPE_SIZE,
a08abd8ca   Dan Williams   async_tx: structi...
1183
  			   &sh->ops.zero_sum_result, &submit);
91c009248   Dan Williams   md: raid5_run_ops...
1184

91c009248   Dan Williams   md: raid5_run_ops...
1185
  	atomic_inc(&sh->count);
a08abd8ca   Dan Williams   async_tx: structi...
1186
1187
  	init_async_submit(&submit, ASYNC_TX_ACK, tx, ops_complete_check, sh, NULL);
  	tx = async_trigger_callback(&submit);
91c009248   Dan Williams   md: raid5_run_ops...
1188
  }
ac6b53b6e   Dan Williams   md/raid6: asynchr...
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
  static void ops_run_check_pq(struct stripe_head *sh, struct raid5_percpu *percpu, int checkp)
  {
  	struct page **srcs = percpu->scribble;
  	struct async_submit_ctl submit;
  	int count;
  
  	pr_debug("%s: stripe %llu checkp: %d
  ", __func__,
  		(unsigned long long)sh->sector, checkp);
  
  	count = set_syndrome_sources(srcs, sh);
  	if (!checkp)
  		srcs[count] = NULL;
91c009248   Dan Williams   md: raid5_run_ops...
1202

91c009248   Dan Williams   md: raid5_run_ops...
1203
  	atomic_inc(&sh->count);
ac6b53b6e   Dan Williams   md/raid6: asynchr...
1204
1205
1206
1207
  	init_async_submit(&submit, ASYNC_TX_ACK, NULL, ops_complete_check,
  			  sh, to_addr_conv(sh, percpu));
  	async_syndrome_val(srcs, 0, count+2, STRIPE_SIZE,
  			   &sh->ops.zero_sum_result, percpu->spare_page, &submit);
91c009248   Dan Williams   md: raid5_run_ops...
1208
  }
417b8d4ac   Dan Williams   md/raid456: downl...
1209
  static void __raid_run_ops(struct stripe_head *sh, unsigned long ops_request)
91c009248   Dan Williams   md: raid5_run_ops...
1210
1211
1212
  {
  	int overlap_clear = 0, i, disks = sh->disks;
  	struct dma_async_tx_descriptor *tx = NULL;
d1688a6d5   NeilBrown   md/raid5: typedef...
1213
  	struct r5conf *conf = sh->raid_conf;
ac6b53b6e   Dan Williams   md/raid6: asynchr...
1214
  	int level = conf->level;
d6f38f31f   Dan Williams   md/raid5,6: add p...
1215
1216
  	struct raid5_percpu *percpu;
  	unsigned long cpu;
91c009248   Dan Williams   md: raid5_run_ops...
1217

d6f38f31f   Dan Williams   md/raid5,6: add p...
1218
1219
  	cpu = get_cpu();
  	percpu = per_cpu_ptr(conf->percpu, cpu);
83de75cc9   Dan Williams   md: replace STRIP...
1220
  	if (test_bit(STRIPE_OP_BIOFILL, &ops_request)) {
91c009248   Dan Williams   md: raid5_run_ops...
1221
1222
1223
  		ops_run_biofill(sh);
  		overlap_clear++;
  	}
7b3a871ed   Dan Williams   md: handle operat...
1224
  	if (test_bit(STRIPE_OP_COMPUTE_BLK, &ops_request)) {
ac6b53b6e   Dan Williams   md/raid6: asynchr...
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
  		if (level < 6)
  			tx = ops_run_compute5(sh, percpu);
  		else {
  			if (sh->ops.target2 < 0 || sh->ops.target < 0)
  				tx = ops_run_compute6_1(sh, percpu);
  			else
  				tx = ops_run_compute6_2(sh, percpu);
  		}
  		/* terminate the chain if reconstruct is not set to be run */
  		if (tx && !test_bit(STRIPE_OP_RECONSTRUCT, &ops_request))
7b3a871ed   Dan Williams   md: handle operat...
1235
1236
  			async_tx_ack(tx);
  	}
91c009248   Dan Williams   md: raid5_run_ops...
1237

600aa1099   Dan Williams   md: replace STRIP...
1238
  	if (test_bit(STRIPE_OP_PREXOR, &ops_request))
d6f38f31f   Dan Williams   md/raid5,6: add p...
1239
  		tx = ops_run_prexor(sh, percpu, tx);
91c009248   Dan Williams   md: raid5_run_ops...
1240

600aa1099   Dan Williams   md: replace STRIP...
1241
  	if (test_bit(STRIPE_OP_BIODRAIN, &ops_request)) {
d8ee0728b   Dan Williams   md: replace R5_Wa...
1242
  		tx = ops_run_biodrain(sh, tx);
91c009248   Dan Williams   md: raid5_run_ops...
1243
1244
  		overlap_clear++;
  	}
ac6b53b6e   Dan Williams   md/raid6: asynchr...
1245
1246
1247
1248
1249
1250
  	if (test_bit(STRIPE_OP_RECONSTRUCT, &ops_request)) {
  		if (level < 6)
  			ops_run_reconstruct5(sh, percpu, tx);
  		else
  			ops_run_reconstruct6(sh, percpu, tx);
  	}
91c009248   Dan Williams   md: raid5_run_ops...
1251

ac6b53b6e   Dan Williams   md/raid6: asynchr...
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
  	if (test_bit(STRIPE_OP_CHECK, &ops_request)) {
  		if (sh->check_state == check_state_run)
  			ops_run_check_p(sh, percpu);
  		else if (sh->check_state == check_state_run_q)
  			ops_run_check_pq(sh, percpu, 0);
  		else if (sh->check_state == check_state_run_pq)
  			ops_run_check_pq(sh, percpu, 1);
  		else
  			BUG();
  	}
91c009248   Dan Williams   md: raid5_run_ops...
1262

91c009248   Dan Williams   md: raid5_run_ops...
1263
1264
1265
1266
1267
1268
  	if (overlap_clear)
  		for (i = disks; i--; ) {
  			struct r5dev *dev = &sh->dev[i];
  			if (test_and_clear_bit(R5_Overlap, &dev->flags))
  				wake_up(&sh->raid_conf->wait_for_overlap);
  		}
d6f38f31f   Dan Williams   md/raid5,6: add p...
1269
  	put_cpu();
91c009248   Dan Williams   md: raid5_run_ops...
1270
  }
417b8d4ac   Dan Williams   md/raid456: downl...
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
  #ifdef CONFIG_MULTICORE_RAID456
  static void async_run_ops(void *param, async_cookie_t cookie)
  {
  	struct stripe_head *sh = param;
  	unsigned long ops_request = sh->ops.request;
  
  	clear_bit_unlock(STRIPE_OPS_REQ_PENDING, &sh->state);
  	wake_up(&sh->ops.wait_for_ops);
  
  	__raid_run_ops(sh, ops_request);
  	release_stripe(sh);
  }
  
  static void raid_run_ops(struct stripe_head *sh, unsigned long ops_request)
  {
  	/* since handle_stripe can be called outside of raid5d context
  	 * we need to ensure sh->ops.request is de-staged before another
  	 * request arrives
  	 */
  	wait_event(sh->ops.wait_for_ops,
  		   !test_and_set_bit_lock(STRIPE_OPS_REQ_PENDING, &sh->state));
  	sh->ops.request = ops_request;
  
  	atomic_inc(&sh->count);
  	async_schedule(async_run_ops, sh);
  }
  #else
  #define raid_run_ops __raid_run_ops
  #endif
d1688a6d5   NeilBrown   md/raid5: typedef...
1300
  static int grow_one_stripe(struct r5conf *conf)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1301
1302
  {
  	struct stripe_head *sh;
6ce328462   Namhyung Kim   md/raid5: use kme...
1303
  	sh = kmem_cache_zalloc(conf->slab_cache, GFP_KERNEL);
3f294f4fb   NeilBrown   [PATCH] md: add k...
1304
1305
  	if (!sh)
  		return 0;
6ce328462   Namhyung Kim   md/raid5: use kme...
1306

3f294f4fb   NeilBrown   [PATCH] md: add k...
1307
  	sh->raid_conf = conf;
417b8d4ac   Dan Williams   md/raid456: downl...
1308
1309
1310
  	#ifdef CONFIG_MULTICORE_RAID456
  	init_waitqueue_head(&sh->ops.wait_for_ops);
  	#endif
3f294f4fb   NeilBrown   [PATCH] md: add k...
1311

e4e11e385   NeilBrown   md/raid5: avoid o...
1312
1313
  	if (grow_buffers(sh)) {
  		shrink_buffers(sh);
3f294f4fb   NeilBrown   [PATCH] md: add k...
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
  		kmem_cache_free(conf->slab_cache, sh);
  		return 0;
  	}
  	/* we just created an active stripe so... */
  	atomic_set(&sh->count, 1);
  	atomic_inc(&conf->active_stripes);
  	INIT_LIST_HEAD(&sh->lru);
  	release_stripe(sh);
  	return 1;
  }
d1688a6d5   NeilBrown   md/raid5: typedef...
1324
  static int grow_stripes(struct r5conf *conf, int num)
3f294f4fb   NeilBrown   [PATCH] md: add k...
1325
  {
e18b890bb   Christoph Lameter   [PATCH] slab: rem...
1326
  	struct kmem_cache *sc;
5e5e3e78e   NeilBrown   md: Fix handling ...
1327
  	int devs = max(conf->raid_disks, conf->previous_raid_disks);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1328

f4be6b43f   NeilBrown   md/raid5: ensure ...
1329
1330
1331
1332
1333
1334
1335
  	if (conf->mddev->gendisk)
  		sprintf(conf->cache_name[0],
  			"raid%d-%s", conf->level, mdname(conf->mddev));
  	else
  		sprintf(conf->cache_name[0],
  			"raid%d-%p", conf->level, conf->mddev);
  	sprintf(conf->cache_name[1], "%s-alt", conf->cache_name[0]);
ad01c9e37   NeilBrown   [PATCH] md: Allow...
1336
1337
  	conf->active_name = 0;
  	sc = kmem_cache_create(conf->cache_name[conf->active_name],
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1338
  			       sizeof(struct stripe_head)+(devs-1)*sizeof(struct r5dev),
20c2df83d   Paul Mundt   mm: Remove slab d...
1339
  			       0, 0, NULL);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1340
1341
1342
  	if (!sc)
  		return 1;
  	conf->slab_cache = sc;
ad01c9e37   NeilBrown   [PATCH] md: Allow...
1343
  	conf->pool_size = devs;
16a53ecc3   NeilBrown   [PATCH] md: merge...
1344
  	while (num--)
3f294f4fb   NeilBrown   [PATCH] md: add k...
1345
  		if (!grow_one_stripe(conf))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1346
  			return 1;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1347
1348
  	return 0;
  }
292695531   NeilBrown   [PATCH] md: Final...
1349

d6f38f31f   Dan Williams   md/raid5,6: add p...
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
  /**
   * scribble_len - return the required size of the scribble region
   * @num - total number of disks in the array
   *
   * The size must be enough to contain:
   * 1/ a struct page pointer for each device in the array +2
   * 2/ room to convert each entry in (1) to its corresponding dma
   *    (dma_map_page()) or page (page_address()) address.
   *
   * Note: the +2 is for the destination buffers of the ddf/raid6 case where we
   * calculate over all devices (not just the data blocks), using zeros in place
   * of the P and Q blocks.
   */
  static size_t scribble_len(int num)
  {
  	size_t len;
  
  	len = sizeof(struct page *) * (num+2) + sizeof(addr_conv_t) * (num+2);
  
  	return len;
  }
d1688a6d5   NeilBrown   md/raid5: typedef...
1371
  static int resize_stripes(struct r5conf *conf, int newsize)
ad01c9e37   NeilBrown   [PATCH] md: Allow...
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
  {
  	/* Make all the stripes able to hold 'newsize' devices.
  	 * New slots in each stripe get 'page' set to a new page.
  	 *
  	 * This happens in stages:
  	 * 1/ create a new kmem_cache and allocate the required number of
  	 *    stripe_heads.
  	 * 2/ gather all the old stripe_heads and tranfer the pages across
  	 *    to the new stripe_heads.  This will have the side effect of
  	 *    freezing the array as once all stripe_heads have been collected,
  	 *    no IO will be possible.  Old stripe heads are freed once their
  	 *    pages have been transferred over, and the old kmem_cache is
  	 *    freed when all stripes are done.
  	 * 3/ reallocate conf->disks to be suitable bigger.  If this fails,
  	 *    we simple return a failre status - no need to clean anything up.
  	 * 4/ allocate new pages for the new slots in the new stripe_heads.
  	 *    If this fails, we don't bother trying the shrink the
  	 *    stripe_heads down again, we just leave them as they are.
  	 *    As each stripe_head is processed the new one is released into
  	 *    active service.
  	 *
  	 * Once step2 is started, we cannot afford to wait for a write,
  	 * so we use GFP_NOIO allocations.
  	 */
  	struct stripe_head *osh, *nsh;
  	LIST_HEAD(newstripes);
  	struct disk_info *ndisks;
d6f38f31f   Dan Williams   md/raid5,6: add p...
1399
  	unsigned long cpu;
b5470dc5f   Dan Williams   md: resolve exter...
1400
  	int err;
e18b890bb   Christoph Lameter   [PATCH] slab: rem...
1401
  	struct kmem_cache *sc;
ad01c9e37   NeilBrown   [PATCH] md: Allow...
1402
1403
1404
1405
  	int i;
  
  	if (newsize <= conf->pool_size)
  		return 0; /* never bother to shrink */
b5470dc5f   Dan Williams   md: resolve exter...
1406
1407
1408
  	err = md_allow_write(conf->mddev);
  	if (err)
  		return err;
2a2275d63   NeilBrown   [PATCH] md: fix p...
1409

ad01c9e37   NeilBrown   [PATCH] md: Allow...
1410
1411
1412
  	/* Step 1 */
  	sc = kmem_cache_create(conf->cache_name[1-conf->active_name],
  			       sizeof(struct stripe_head)+(newsize-1)*sizeof(struct r5dev),
20c2df83d   Paul Mundt   mm: Remove slab d...
1413
  			       0, 0, NULL);
ad01c9e37   NeilBrown   [PATCH] md: Allow...
1414
1415
1416
1417
  	if (!sc)
  		return -ENOMEM;
  
  	for (i = conf->max_nr_stripes; i; i--) {
6ce328462   Namhyung Kim   md/raid5: use kme...
1418
  		nsh = kmem_cache_zalloc(sc, GFP_KERNEL);
ad01c9e37   NeilBrown   [PATCH] md: Allow...
1419
1420
  		if (!nsh)
  			break;
ad01c9e37   NeilBrown   [PATCH] md: Allow...
1421
  		nsh->raid_conf = conf;
417b8d4ac   Dan Williams   md/raid456: downl...
1422
1423
1424
  		#ifdef CONFIG_MULTICORE_RAID456
  		init_waitqueue_head(&nsh->ops.wait_for_ops);
  		#endif
ad01c9e37   NeilBrown   [PATCH] md: Allow...
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
  
  		list_add(&nsh->lru, &newstripes);
  	}
  	if (i) {
  		/* didn't get enough, give up */
  		while (!list_empty(&newstripes)) {
  			nsh = list_entry(newstripes.next, struct stripe_head, lru);
  			list_del(&nsh->lru);
  			kmem_cache_free(sc, nsh);
  		}
  		kmem_cache_destroy(sc);
  		return -ENOMEM;
  	}
  	/* Step 2 - Must use GFP_NOIO now.
  	 * OK, we have enough stripes, start collecting inactive
  	 * stripes and copying them over
  	 */
  	list_for_each_entry(nsh, &newstripes, lru) {
  		spin_lock_irq(&conf->device_lock);
  		wait_event_lock_irq(conf->wait_for_stripe,
  				    !list_empty(&conf->inactive_list),
  				    conf->device_lock,
482c08349   NeilBrown   md - remove old p...
1447
  				    );
ad01c9e37   NeilBrown   [PATCH] md: Allow...
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
  		osh = get_free_stripe(conf);
  		spin_unlock_irq(&conf->device_lock);
  		atomic_set(&nsh->count, 1);
  		for(i=0; i<conf->pool_size; i++)
  			nsh->dev[i].page = osh->dev[i].page;
  		for( ; i<newsize; i++)
  			nsh->dev[i].page = NULL;
  		kmem_cache_free(conf->slab_cache, osh);
  	}
  	kmem_cache_destroy(conf->slab_cache);
  
  	/* Step 3.
  	 * At this point, we are holding all the stripes so the array
  	 * is completely stalled, so now is a good time to resize
d6f38f31f   Dan Williams   md/raid5,6: add p...
1462
  	 * conf->disks and the scribble region
ad01c9e37   NeilBrown   [PATCH] md: Allow...
1463
1464
1465
1466
1467
1468
1469
1470
1471
  	 */
  	ndisks = kzalloc(newsize * sizeof(struct disk_info), GFP_NOIO);
  	if (ndisks) {
  		for (i=0; i<conf->raid_disks; i++)
  			ndisks[i] = conf->disks[i];
  		kfree(conf->disks);
  		conf->disks = ndisks;
  	} else
  		err = -ENOMEM;
d6f38f31f   Dan Williams   md/raid5,6: add p...
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
  	get_online_cpus();
  	conf->scribble_len = scribble_len(newsize);
  	for_each_present_cpu(cpu) {
  		struct raid5_percpu *percpu;
  		void *scribble;
  
  		percpu = per_cpu_ptr(conf->percpu, cpu);
  		scribble = kmalloc(conf->scribble_len, GFP_NOIO);
  
  		if (scribble) {
  			kfree(percpu->scribble);
  			percpu->scribble = scribble;
  		} else {
  			err = -ENOMEM;
  			break;
  		}
  	}
  	put_online_cpus();
ad01c9e37   NeilBrown   [PATCH] md: Allow...
1490
1491
1492
1493
  	/* Step 4, return new stripes to service */
  	while(!list_empty(&newstripes)) {
  		nsh = list_entry(newstripes.next, struct stripe_head, lru);
  		list_del_init(&nsh->lru);
d6f38f31f   Dan Williams   md/raid5,6: add p...
1494

ad01c9e37   NeilBrown   [PATCH] md: Allow...
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
  		for (i=conf->raid_disks; i < newsize; i++)
  			if (nsh->dev[i].page == NULL) {
  				struct page *p = alloc_page(GFP_NOIO);
  				nsh->dev[i].page = p;
  				if (!p)
  					err = -ENOMEM;
  			}
  		release_stripe(nsh);
  	}
  	/* critical section pass, GFP_NOIO no longer needed */
  
  	conf->slab_cache = sc;
  	conf->active_name = 1-conf->active_name;
  	conf->pool_size = newsize;
  	return err;
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1511

d1688a6d5   NeilBrown   md/raid5: typedef...
1512
  static int drop_one_stripe(struct r5conf *conf)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1513
1514
  {
  	struct stripe_head *sh;
3f294f4fb   NeilBrown   [PATCH] md: add k...
1515
1516
1517
1518
1519
  	spin_lock_irq(&conf->device_lock);
  	sh = get_free_stripe(conf);
  	spin_unlock_irq(&conf->device_lock);
  	if (!sh)
  		return 0;
78bafebd4   Eric Sesterhenn   BUG_ON() Conversi...
1520
  	BUG_ON(atomic_read(&sh->count));
e4e11e385   NeilBrown   md/raid5: avoid o...
1521
  	shrink_buffers(sh);
3f294f4fb   NeilBrown   [PATCH] md: add k...
1522
1523
1524
1525
  	kmem_cache_free(conf->slab_cache, sh);
  	atomic_dec(&conf->active_stripes);
  	return 1;
  }
d1688a6d5   NeilBrown   md/raid5: typedef...
1526
  static void shrink_stripes(struct r5conf *conf)
3f294f4fb   NeilBrown   [PATCH] md: add k...
1527
1528
1529
  {
  	while (drop_one_stripe(conf))
  		;
29fc7e3e7   NeilBrown   [PATCH] md: Assor...
1530
1531
  	if (conf->slab_cache)
  		kmem_cache_destroy(conf->slab_cache);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1532
1533
  	conf->slab_cache = NULL;
  }
6712ecf8f   NeilBrown   Drop 'size' argum...
1534
  static void raid5_end_read_request(struct bio * bi, int error)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1535
  {
99c0fb5f9   NeilBrown   md/raid5: Add sup...
1536
  	struct stripe_head *sh = bi->bi_private;
d1688a6d5   NeilBrown   md/raid5: typedef...
1537
  	struct r5conf *conf = sh->raid_conf;
7ecaa1e6a   NeilBrown   [PATCH] md: Infra...
1538
  	int disks = sh->disks, i;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1539
  	int uptodate = test_bit(BIO_UPTODATE, &bi->bi_flags);
d69504325   NeilBrown   [PATCH] md: inclu...
1540
  	char b[BDEVNAME_SIZE];
3cb030020   NeilBrown   md: removing type...
1541
  	struct md_rdev *rdev;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1542

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1543
1544
1545
1546
  
  	for (i=0 ; i<disks; i++)
  		if (bi == &sh->dev[i].req)
  			break;
45b4233ca   Dan Williams   raid5: replace cu...
1547
1548
1549
  	pr_debug("end_read_request %llu/%d, count: %d, uptodate %d.
  ",
  		(unsigned long long)sh->sector, i, atomic_read(&sh->count),
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1550
1551
1552
  		uptodate);
  	if (i == disks) {
  		BUG();
6712ecf8f   NeilBrown   Drop 'size' argum...
1553
  		return;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1554
1555
1556
  	}
  
  	if (uptodate) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1557
  		set_bit(R5_UPTODATE, &sh->dev[i].flags);
4e5314b56   NeilBrown   [PATCH] md: bette...
1558
  		if (test_bit(R5_ReadError, &sh->dev[i].flags)) {
d69504325   NeilBrown   [PATCH] md: inclu...
1559
  			rdev = conf->disks[i].rdev;
8bda470e8   Christian Dietrich   md/raid: use prin...
1560
1561
1562
1563
1564
1565
1566
1567
1568
  			printk_ratelimited(
  				KERN_INFO
  				"md/raid:%s: read error corrected"
  				" (%lu sectors at %llu on %s)
  ",
  				mdname(conf->mddev), STRIPE_SECTORS,
  				(unsigned long long)(sh->sector
  						     + rdev->data_offset),
  				bdevname(rdev->bdev, b));
ddd5115fe   Namhyung Kim   md/raid5: move rd...
1569
  			atomic_add(STRIPE_SECTORS, &rdev->corrected_errors);
4e5314b56   NeilBrown   [PATCH] md: bette...
1570
1571
1572
  			clear_bit(R5_ReadError, &sh->dev[i].flags);
  			clear_bit(R5_ReWrite, &sh->dev[i].flags);
  		}
ba22dcbf1   NeilBrown   [PATCH] md: impro...
1573
1574
  		if (atomic_read(&conf->disks[i].rdev->read_errors))
  			atomic_set(&conf->disks[i].rdev->read_errors, 0);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1575
  	} else {
d69504325   NeilBrown   [PATCH] md: inclu...
1576
  		const char *bdn = bdevname(conf->disks[i].rdev->bdev, b);
ba22dcbf1   NeilBrown   [PATCH] md: impro...
1577
  		int retry = 0;
d69504325   NeilBrown   [PATCH] md: inclu...
1578
  		rdev = conf->disks[i].rdev;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1579
  		clear_bit(R5_UPTODATE, &sh->dev[i].flags);
d69504325   NeilBrown   [PATCH] md: inclu...
1580
  		atomic_inc(&rdev->read_errors);
7b0bb5368   Gabriele A. Trombetti   md/raid6: Fix rai...
1581
  		if (conf->mddev->degraded >= conf->max_degraded)
8bda470e8   Christian Dietrich   md/raid: use prin...
1582
1583
1584
1585
1586
1587
1588
1589
1590
  			printk_ratelimited(
  				KERN_WARNING
  				"md/raid:%s: read error not correctable "
  				"(sector %llu on %s).
  ",
  				mdname(conf->mddev),
  				(unsigned long long)(sh->sector
  						     + rdev->data_offset),
  				bdn);
ba22dcbf1   NeilBrown   [PATCH] md: impro...
1591
  		else if (test_bit(R5_ReWrite, &sh->dev[i].flags))
4e5314b56   NeilBrown   [PATCH] md: bette...
1592
  			/* Oh, no!!! */
8bda470e8   Christian Dietrich   md/raid: use prin...
1593
1594
1595
1596
1597
1598
1599
1600
1601
  			printk_ratelimited(
  				KERN_WARNING
  				"md/raid:%s: read error NOT corrected!! "
  				"(sector %llu on %s).
  ",
  				mdname(conf->mddev),
  				(unsigned long long)(sh->sector
  						     + rdev->data_offset),
  				bdn);
d69504325   NeilBrown   [PATCH] md: inclu...
1602
  		else if (atomic_read(&rdev->read_errors)
ba22dcbf1   NeilBrown   [PATCH] md: impro...
1603
  			 > conf->max_nr_stripes)
14f8d26b8   NeilBrown   [PATCH] md: small...
1604
  			printk(KERN_WARNING
0c55e0225   NeilBrown   md/raid5: improve...
1605
1606
  			       "md/raid:%s: Too many read errors, failing device %s.
  ",
d69504325   NeilBrown   [PATCH] md: inclu...
1607
  			       mdname(conf->mddev), bdn);
ba22dcbf1   NeilBrown   [PATCH] md: impro...
1608
1609
1610
1611
1612
  		else
  			retry = 1;
  		if (retry)
  			set_bit(R5_ReadError, &sh->dev[i].flags);
  		else {
4e5314b56   NeilBrown   [PATCH] md: bette...
1613
1614
  			clear_bit(R5_ReadError, &sh->dev[i].flags);
  			clear_bit(R5_ReWrite, &sh->dev[i].flags);
d69504325   NeilBrown   [PATCH] md: inclu...
1615
  			md_error(conf->mddev, rdev);
ba22dcbf1   NeilBrown   [PATCH] md: impro...
1616
  		}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1617
1618
  	}
  	rdev_dec_pending(conf->disks[i].rdev, conf->mddev);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1619
1620
1621
  	clear_bit(R5_LOCKED, &sh->dev[i].flags);
  	set_bit(STRIPE_HANDLE, &sh->state);
  	release_stripe(sh);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1622
  }
d710e1381   NeilBrown   md: remove space ...
1623
  static void raid5_end_write_request(struct bio *bi, int error)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1624
  {
99c0fb5f9   NeilBrown   md/raid5: Add sup...
1625
  	struct stripe_head *sh = bi->bi_private;
d1688a6d5   NeilBrown   md/raid5: typedef...
1626
  	struct r5conf *conf = sh->raid_conf;
7ecaa1e6a   NeilBrown   [PATCH] md: Infra...
1627
  	int disks = sh->disks, i;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1628
  	int uptodate = test_bit(BIO_UPTODATE, &bi->bi_flags);
b84db560e   NeilBrown   md/raid5: Clear b...
1629
1630
  	sector_t first_bad;
  	int bad_sectors;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1631

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1632
1633
1634
  	for (i=0 ; i<disks; i++)
  		if (bi == &sh->dev[i].req)
  			break;
45b4233ca   Dan Williams   raid5: replace cu...
1635
1636
  	pr_debug("end_write_request %llu/%d, count %d, uptodate: %d.
  ",
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1637
1638
1639
1640
  		(unsigned long long)sh->sector, i, atomic_read(&sh->count),
  		uptodate);
  	if (i == disks) {
  		BUG();
6712ecf8f   NeilBrown   Drop 'size' argum...
1641
  		return;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1642
  	}
bc2607f39   NeilBrown   md/raid5: write e...
1643
1644
1645
  	if (!uptodate) {
  		set_bit(WriteErrorSeen, &conf->disks[i].rdev->flags);
  		set_bit(R5_WriteError, &sh->dev[i].flags);
b84db560e   NeilBrown   md/raid5: Clear b...
1646
1647
1648
  	} else if (is_badblock(conf->disks[i].rdev, sh->sector, STRIPE_SECTORS,
  			       &first_bad, &bad_sectors))
  		set_bit(R5_MadeGood, &sh->dev[i].flags);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1649
1650
1651
1652
1653
  
  	rdev_dec_pending(conf->disks[i].rdev, conf->mddev);
  	
  	clear_bit(R5_LOCKED, &sh->dev[i].flags);
  	set_bit(STRIPE_HANDLE, &sh->state);
c04be0aa8   NeilBrown   [PATCH] md: Impro...
1654
  	release_stripe(sh);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1655
  }
784052ecc   NeilBrown   md/raid5: prepare...
1656
  static sector_t compute_blocknr(struct stripe_head *sh, int i, int previous);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1657
  	
784052ecc   NeilBrown   md/raid5: prepare...
1658
  static void raid5_build_block(struct stripe_head *sh, int i, int previous)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
  {
  	struct r5dev *dev = &sh->dev[i];
  
  	bio_init(&dev->req);
  	dev->req.bi_io_vec = &dev->vec;
  	dev->req.bi_vcnt++;
  	dev->req.bi_max_vecs++;
  	dev->vec.bv_page = dev->page;
  	dev->vec.bv_len = STRIPE_SIZE;
  	dev->vec.bv_offset = 0;
  
  	dev->req.bi_sector = sh->sector;
  	dev->req.bi_private = sh;
  
  	dev->flags = 0;
784052ecc   NeilBrown   md/raid5: prepare...
1674
  	dev->sector = compute_blocknr(sh, i, previous);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1675
  }
fd01b88c7   NeilBrown   md: remove typede...
1676
  static void error(struct mddev *mddev, struct md_rdev *rdev)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1677
1678
  {
  	char b[BDEVNAME_SIZE];
d1688a6d5   NeilBrown   md/raid5: typedef...
1679
  	struct r5conf *conf = mddev->private;
0c55e0225   NeilBrown   md/raid5: improve...
1680
1681
  	pr_debug("raid456: error called
  ");
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1682

6f8d0c77c   NeilBrown   md: make error_ha...
1683
1684
1685
1686
1687
1688
1689
1690
1691
  	if (test_and_clear_bit(In_sync, &rdev->flags)) {
  		unsigned long flags;
  		spin_lock_irqsave(&conf->device_lock, flags);
  		mddev->degraded++;
  		spin_unlock_irqrestore(&conf->device_lock, flags);
  		/*
  		 * if recovery was running, make sure it aborts.
  		 */
  		set_bit(MD_RECOVERY_INTR, &mddev->recovery);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1692
  	}
de393cdea   NeilBrown   md: make it easie...
1693
  	set_bit(Blocked, &rdev->flags);
6f8d0c77c   NeilBrown   md: make error_ha...
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
  	set_bit(Faulty, &rdev->flags);
  	set_bit(MD_CHANGE_DEVS, &mddev->flags);
  	printk(KERN_ALERT
  	       "md/raid:%s: Disk failure on %s, disabling device.
  "
  	       "md/raid:%s: Operation continuing on %d devices.
  ",
  	       mdname(mddev),
  	       bdevname(rdev->bdev, b),
  	       mdname(mddev),
  	       conf->raid_disks - mddev->degraded);
16a53ecc3   NeilBrown   [PATCH] md: merge...
1705
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1706
1707
1708
1709
1710
  
  /*
   * Input: a 'big' sector number,
   * Output: index of the data and parity disk, and the sector # in them.
   */
d1688a6d5   NeilBrown   md/raid5: typedef...
1711
  static sector_t raid5_compute_sector(struct r5conf *conf, sector_t r_sector,
911d4ee85   NeilBrown   md/raid5: simplif...
1712
1713
  				     int previous, int *dd_idx,
  				     struct stripe_head *sh)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1714
  {
6e3b96ed6   NeilBrown   md/raid5: fix pre...
1715
  	sector_t stripe, stripe2;
35f2a5911   NeilBrown   md/raid5: allow f...
1716
  	sector_t chunk_number;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1717
  	unsigned int chunk_offset;
911d4ee85   NeilBrown   md/raid5: simplif...
1718
  	int pd_idx, qd_idx;
67cc2b816   NeilBrown   md/raid5: finish ...
1719
  	int ddf_layout = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1720
  	sector_t new_sector;
e183eaedd   NeilBrown   md/raid5: prepare...
1721
1722
  	int algorithm = previous ? conf->prev_algo
  				 : conf->algorithm;
09c9e5fa1   Andre Noll   md: convert conf-...
1723
1724
  	int sectors_per_chunk = previous ? conf->prev_chunk_sectors
  					 : conf->chunk_sectors;
112bf8970   NeilBrown   md/raid5: change ...
1725
1726
1727
  	int raid_disks = previous ? conf->previous_raid_disks
  				  : conf->raid_disks;
  	int data_disks = raid_disks - conf->max_degraded;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1728
1729
1730
1731
1732
1733
1734
1735
  
  	/* First compute the information on this sector */
  
  	/*
  	 * Compute the chunk number and the sector offset inside the chunk
  	 */
  	chunk_offset = sector_div(r_sector, sectors_per_chunk);
  	chunk_number = r_sector;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1736
1737
1738
1739
  
  	/*
  	 * Compute the stripe number
  	 */
35f2a5911   NeilBrown   md/raid5: allow f...
1740
1741
  	stripe = chunk_number;
  	*dd_idx = sector_div(stripe, data_disks);
6e3b96ed6   NeilBrown   md/raid5: fix pre...
1742
  	stripe2 = stripe;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1743
1744
1745
  	/*
  	 * Select the parity disk based on the user selected algorithm.
  	 */
84789554e   NeilBrown   md/raid5: move mo...
1746
  	pd_idx = qd_idx = -1;
16a53ecc3   NeilBrown   [PATCH] md: merge...
1747
1748
  	switch(conf->level) {
  	case 4:
911d4ee85   NeilBrown   md/raid5: simplif...
1749
  		pd_idx = data_disks;
16a53ecc3   NeilBrown   [PATCH] md: merge...
1750
1751
  		break;
  	case 5:
e183eaedd   NeilBrown   md/raid5: prepare...
1752
  		switch (algorithm) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1753
  		case ALGORITHM_LEFT_ASYMMETRIC:
6e3b96ed6   NeilBrown   md/raid5: fix pre...
1754
  			pd_idx = data_disks - sector_div(stripe2, raid_disks);
911d4ee85   NeilBrown   md/raid5: simplif...
1755
  			if (*dd_idx >= pd_idx)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1756
1757
1758
  				(*dd_idx)++;
  			break;
  		case ALGORITHM_RIGHT_ASYMMETRIC:
6e3b96ed6   NeilBrown   md/raid5: fix pre...
1759
  			pd_idx = sector_div(stripe2, raid_disks);
911d4ee85   NeilBrown   md/raid5: simplif...
1760
  			if (*dd_idx >= pd_idx)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1761
1762
1763
  				(*dd_idx)++;
  			break;
  		case ALGORITHM_LEFT_SYMMETRIC:
6e3b96ed6   NeilBrown   md/raid5: fix pre...
1764
  			pd_idx = data_disks - sector_div(stripe2, raid_disks);
911d4ee85   NeilBrown   md/raid5: simplif...
1765
  			*dd_idx = (pd_idx + 1 + *dd_idx) % raid_disks;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1766
1767
  			break;
  		case ALGORITHM_RIGHT_SYMMETRIC:
6e3b96ed6   NeilBrown   md/raid5: fix pre...
1768
  			pd_idx = sector_div(stripe2, raid_disks);
911d4ee85   NeilBrown   md/raid5: simplif...
1769
  			*dd_idx = (pd_idx + 1 + *dd_idx) % raid_disks;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1770
  			break;
99c0fb5f9   NeilBrown   md/raid5: Add sup...
1771
1772
1773
1774
1775
1776
1777
  		case ALGORITHM_PARITY_0:
  			pd_idx = 0;
  			(*dd_idx)++;
  			break;
  		case ALGORITHM_PARITY_N:
  			pd_idx = data_disks;
  			break;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1778
  		default:
99c0fb5f9   NeilBrown   md/raid5: Add sup...
1779
  			BUG();
16a53ecc3   NeilBrown   [PATCH] md: merge...
1780
1781
1782
  		}
  		break;
  	case 6:
e183eaedd   NeilBrown   md/raid5: prepare...
1783
  		switch (algorithm) {
16a53ecc3   NeilBrown   [PATCH] md: merge...
1784
  		case ALGORITHM_LEFT_ASYMMETRIC:
6e3b96ed6   NeilBrown   md/raid5: fix pre...
1785
  			pd_idx = raid_disks - 1 - sector_div(stripe2, raid_disks);
911d4ee85   NeilBrown   md/raid5: simplif...
1786
1787
  			qd_idx = pd_idx + 1;
  			if (pd_idx == raid_disks-1) {
99c0fb5f9   NeilBrown   md/raid5: Add sup...
1788
  				(*dd_idx)++;	/* Q D D D P */
911d4ee85   NeilBrown   md/raid5: simplif...
1789
1790
  				qd_idx = 0;
  			} else if (*dd_idx >= pd_idx)
16a53ecc3   NeilBrown   [PATCH] md: merge...
1791
1792
1793
  				(*dd_idx) += 2; /* D D P Q D */
  			break;
  		case ALGORITHM_RIGHT_ASYMMETRIC:
6e3b96ed6   NeilBrown   md/raid5: fix pre...
1794
  			pd_idx = sector_div(stripe2, raid_disks);
911d4ee85   NeilBrown   md/raid5: simplif...
1795
1796
  			qd_idx = pd_idx + 1;
  			if (pd_idx == raid_disks-1) {
99c0fb5f9   NeilBrown   md/raid5: Add sup...
1797
  				(*dd_idx)++;	/* Q D D D P */
911d4ee85   NeilBrown   md/raid5: simplif...
1798
1799
  				qd_idx = 0;
  			} else if (*dd_idx >= pd_idx)
16a53ecc3   NeilBrown   [PATCH] md: merge...
1800
1801
1802
  				(*dd_idx) += 2; /* D D P Q D */
  			break;
  		case ALGORITHM_LEFT_SYMMETRIC:
6e3b96ed6   NeilBrown   md/raid5: fix pre...
1803
  			pd_idx = raid_disks - 1 - sector_div(stripe2, raid_disks);
911d4ee85   NeilBrown   md/raid5: simplif...
1804
1805
  			qd_idx = (pd_idx + 1) % raid_disks;
  			*dd_idx = (pd_idx + 2 + *dd_idx) % raid_disks;
16a53ecc3   NeilBrown   [PATCH] md: merge...
1806
1807
  			break;
  		case ALGORITHM_RIGHT_SYMMETRIC:
6e3b96ed6   NeilBrown   md/raid5: fix pre...
1808
  			pd_idx = sector_div(stripe2, raid_disks);
911d4ee85   NeilBrown   md/raid5: simplif...
1809
1810
  			qd_idx = (pd_idx + 1) % raid_disks;
  			*dd_idx = (pd_idx + 2 + *dd_idx) % raid_disks;
16a53ecc3   NeilBrown   [PATCH] md: merge...
1811
  			break;
99c0fb5f9   NeilBrown   md/raid5: Add sup...
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
  
  		case ALGORITHM_PARITY_0:
  			pd_idx = 0;
  			qd_idx = 1;
  			(*dd_idx) += 2;
  			break;
  		case ALGORITHM_PARITY_N:
  			pd_idx = data_disks;
  			qd_idx = data_disks + 1;
  			break;
  
  		case ALGORITHM_ROTATING_ZERO_RESTART:
  			/* Exactly the same as RIGHT_ASYMMETRIC, but or
  			 * of blocks for computing Q is different.
  			 */
6e3b96ed6   NeilBrown   md/raid5: fix pre...
1827
  			pd_idx = sector_div(stripe2, raid_disks);
99c0fb5f9   NeilBrown   md/raid5: Add sup...
1828
1829
1830
1831
1832
1833
  			qd_idx = pd_idx + 1;
  			if (pd_idx == raid_disks-1) {
  				(*dd_idx)++;	/* Q D D D P */
  				qd_idx = 0;
  			} else if (*dd_idx >= pd_idx)
  				(*dd_idx) += 2; /* D D P Q D */
67cc2b816   NeilBrown   md/raid5: finish ...
1834
  			ddf_layout = 1;
99c0fb5f9   NeilBrown   md/raid5: Add sup...
1835
1836
1837
1838
1839
1840
1841
  			break;
  
  		case ALGORITHM_ROTATING_N_RESTART:
  			/* Same a left_asymmetric, by first stripe is
  			 * D D D P Q  rather than
  			 * Q D D D P
  			 */
6e3b96ed6   NeilBrown   md/raid5: fix pre...
1842
1843
  			stripe2 += 1;
  			pd_idx = raid_disks - 1 - sector_div(stripe2, raid_disks);
99c0fb5f9   NeilBrown   md/raid5: Add sup...
1844
1845
1846
1847
1848
1849
  			qd_idx = pd_idx + 1;
  			if (pd_idx == raid_disks-1) {
  				(*dd_idx)++;	/* Q D D D P */
  				qd_idx = 0;
  			} else if (*dd_idx >= pd_idx)
  				(*dd_idx) += 2; /* D D P Q D */
67cc2b816   NeilBrown   md/raid5: finish ...
1850
  			ddf_layout = 1;
99c0fb5f9   NeilBrown   md/raid5: Add sup...
1851
1852
1853
1854
  			break;
  
  		case ALGORITHM_ROTATING_N_CONTINUE:
  			/* Same as left_symmetric but Q is before P */
6e3b96ed6   NeilBrown   md/raid5: fix pre...
1855
  			pd_idx = raid_disks - 1 - sector_div(stripe2, raid_disks);
99c0fb5f9   NeilBrown   md/raid5: Add sup...
1856
1857
  			qd_idx = (pd_idx + raid_disks - 1) % raid_disks;
  			*dd_idx = (pd_idx + 1 + *dd_idx) % raid_disks;
67cc2b816   NeilBrown   md/raid5: finish ...
1858
  			ddf_layout = 1;
99c0fb5f9   NeilBrown   md/raid5: Add sup...
1859
1860
1861
1862
  			break;
  
  		case ALGORITHM_LEFT_ASYMMETRIC_6:
  			/* RAID5 left_asymmetric, with Q on last device */
6e3b96ed6   NeilBrown   md/raid5: fix pre...
1863
  			pd_idx = data_disks - sector_div(stripe2, raid_disks-1);
99c0fb5f9   NeilBrown   md/raid5: Add sup...
1864
1865
1866
1867
1868
1869
  			if (*dd_idx >= pd_idx)
  				(*dd_idx)++;
  			qd_idx = raid_disks - 1;
  			break;
  
  		case ALGORITHM_RIGHT_ASYMMETRIC_6:
6e3b96ed6   NeilBrown   md/raid5: fix pre...
1870
  			pd_idx = sector_div(stripe2, raid_disks-1);
99c0fb5f9   NeilBrown   md/raid5: Add sup...
1871
1872
1873
1874
1875
1876
  			if (*dd_idx >= pd_idx)
  				(*dd_idx)++;
  			qd_idx = raid_disks - 1;
  			break;
  
  		case ALGORITHM_LEFT_SYMMETRIC_6:
6e3b96ed6   NeilBrown   md/raid5: fix pre...
1877
  			pd_idx = data_disks - sector_div(stripe2, raid_disks-1);
99c0fb5f9   NeilBrown   md/raid5: Add sup...
1878
1879
1880
1881
1882
  			*dd_idx = (pd_idx + 1 + *dd_idx) % (raid_disks-1);
  			qd_idx = raid_disks - 1;
  			break;
  
  		case ALGORITHM_RIGHT_SYMMETRIC_6:
6e3b96ed6   NeilBrown   md/raid5: fix pre...
1883
  			pd_idx = sector_div(stripe2, raid_disks-1);
99c0fb5f9   NeilBrown   md/raid5: Add sup...
1884
1885
1886
1887
1888
1889
1890
1891
1892
  			*dd_idx = (pd_idx + 1 + *dd_idx) % (raid_disks-1);
  			qd_idx = raid_disks - 1;
  			break;
  
  		case ALGORITHM_PARITY_0_6:
  			pd_idx = 0;
  			(*dd_idx)++;
  			qd_idx = raid_disks - 1;
  			break;
16a53ecc3   NeilBrown   [PATCH] md: merge...
1893
  		default:
99c0fb5f9   NeilBrown   md/raid5: Add sup...
1894
  			BUG();
16a53ecc3   NeilBrown   [PATCH] md: merge...
1895
1896
  		}
  		break;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1897
  	}
911d4ee85   NeilBrown   md/raid5: simplif...
1898
1899
1900
  	if (sh) {
  		sh->pd_idx = pd_idx;
  		sh->qd_idx = qd_idx;
67cc2b816   NeilBrown   md/raid5: finish ...
1901
  		sh->ddf_layout = ddf_layout;
911d4ee85   NeilBrown   md/raid5: simplif...
1902
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1903
1904
1905
1906
1907
1908
  	/*
  	 * Finally, compute the new sector number
  	 */
  	new_sector = (sector_t)stripe * sectors_per_chunk + chunk_offset;
  	return new_sector;
  }
784052ecc   NeilBrown   md/raid5: prepare...
1909
  static sector_t compute_blocknr(struct stripe_head *sh, int i, int previous)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1910
  {
d1688a6d5   NeilBrown   md/raid5: typedef...
1911
  	struct r5conf *conf = sh->raid_conf;
b875e531f   NeilBrown   [PATCH] md: fix i...
1912
1913
  	int raid_disks = sh->disks;
  	int data_disks = raid_disks - conf->max_degraded;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1914
  	sector_t new_sector = sh->sector, check;
09c9e5fa1   Andre Noll   md: convert conf-...
1915
1916
  	int sectors_per_chunk = previous ? conf->prev_chunk_sectors
  					 : conf->chunk_sectors;
e183eaedd   NeilBrown   md/raid5: prepare...
1917
1918
  	int algorithm = previous ? conf->prev_algo
  				 : conf->algorithm;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1919
1920
  	sector_t stripe;
  	int chunk_offset;
35f2a5911   NeilBrown   md/raid5: allow f...
1921
1922
  	sector_t chunk_number;
  	int dummy1, dd_idx = i;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1923
  	sector_t r_sector;
911d4ee85   NeilBrown   md/raid5: simplif...
1924
  	struct stripe_head sh2;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1925

16a53ecc3   NeilBrown   [PATCH] md: merge...
1926

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1927
1928
  	chunk_offset = sector_div(new_sector, sectors_per_chunk);
  	stripe = new_sector;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1929

16a53ecc3   NeilBrown   [PATCH] md: merge...
1930
1931
1932
1933
1934
  	if (i == sh->pd_idx)
  		return 0;
  	switch(conf->level) {
  	case 4: break;
  	case 5:
e183eaedd   NeilBrown   md/raid5: prepare...
1935
  		switch (algorithm) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
  		case ALGORITHM_LEFT_ASYMMETRIC:
  		case ALGORITHM_RIGHT_ASYMMETRIC:
  			if (i > sh->pd_idx)
  				i--;
  			break;
  		case ALGORITHM_LEFT_SYMMETRIC:
  		case ALGORITHM_RIGHT_SYMMETRIC:
  			if (i < sh->pd_idx)
  				i += raid_disks;
  			i -= (sh->pd_idx + 1);
  			break;
99c0fb5f9   NeilBrown   md/raid5: Add sup...
1947
1948
1949
1950
1951
  		case ALGORITHM_PARITY_0:
  			i -= 1;
  			break;
  		case ALGORITHM_PARITY_N:
  			break;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1952
  		default:
99c0fb5f9   NeilBrown   md/raid5: Add sup...
1953
  			BUG();
16a53ecc3   NeilBrown   [PATCH] md: merge...
1954
1955
1956
  		}
  		break;
  	case 6:
d0dabf7e5   NeilBrown   md/raid6: remove ...
1957
  		if (i == sh->qd_idx)
16a53ecc3   NeilBrown   [PATCH] md: merge...
1958
  			return 0; /* It is the Q disk */
e183eaedd   NeilBrown   md/raid5: prepare...
1959
  		switch (algorithm) {
16a53ecc3   NeilBrown   [PATCH] md: merge...
1960
1961
  		case ALGORITHM_LEFT_ASYMMETRIC:
  		case ALGORITHM_RIGHT_ASYMMETRIC:
99c0fb5f9   NeilBrown   md/raid5: Add sup...
1962
1963
1964
1965
  		case ALGORITHM_ROTATING_ZERO_RESTART:
  		case ALGORITHM_ROTATING_N_RESTART:
  			if (sh->pd_idx == raid_disks-1)
  				i--;	/* Q D D D P */
16a53ecc3   NeilBrown   [PATCH] md: merge...
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
  			else if (i > sh->pd_idx)
  				i -= 2; /* D D P Q D */
  			break;
  		case ALGORITHM_LEFT_SYMMETRIC:
  		case ALGORITHM_RIGHT_SYMMETRIC:
  			if (sh->pd_idx == raid_disks-1)
  				i--; /* Q D D D P */
  			else {
  				/* D D P Q D */
  				if (i < sh->pd_idx)
  					i += raid_disks;
  				i -= (sh->pd_idx + 2);
  			}
  			break;
99c0fb5f9   NeilBrown   md/raid5: Add sup...
1980
1981
1982
1983
1984
1985
  		case ALGORITHM_PARITY_0:
  			i -= 2;
  			break;
  		case ALGORITHM_PARITY_N:
  			break;
  		case ALGORITHM_ROTATING_N_CONTINUE:
e4424fee1   NeilBrown   md: fix problems ...
1986
  			/* Like left_symmetric, but P is before Q */
99c0fb5f9   NeilBrown   md/raid5: Add sup...
1987
1988
  			if (sh->pd_idx == 0)
  				i--;	/* P D D D Q */
e4424fee1   NeilBrown   md: fix problems ...
1989
1990
1991
1992
1993
1994
  			else {
  				/* D D Q P D */
  				if (i < sh->pd_idx)
  					i += raid_disks;
  				i -= (sh->pd_idx + 1);
  			}
99c0fb5f9   NeilBrown   md/raid5: Add sup...
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
  			break;
  		case ALGORITHM_LEFT_ASYMMETRIC_6:
  		case ALGORITHM_RIGHT_ASYMMETRIC_6:
  			if (i > sh->pd_idx)
  				i--;
  			break;
  		case ALGORITHM_LEFT_SYMMETRIC_6:
  		case ALGORITHM_RIGHT_SYMMETRIC_6:
  			if (i < sh->pd_idx)
  				i += data_disks + 1;
  			i -= (sh->pd_idx + 1);
  			break;
  		case ALGORITHM_PARITY_0_6:
  			i -= 1;
  			break;
16a53ecc3   NeilBrown   [PATCH] md: merge...
2010
  		default:
99c0fb5f9   NeilBrown   md/raid5: Add sup...
2011
  			BUG();
16a53ecc3   NeilBrown   [PATCH] md: merge...
2012
2013
  		}
  		break;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2014
2015
2016
  	}
  
  	chunk_number = stripe * data_disks + i;
35f2a5911   NeilBrown   md/raid5: allow f...
2017
  	r_sector = chunk_number * sectors_per_chunk + chunk_offset;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2018

112bf8970   NeilBrown   md/raid5: change ...
2019
  	check = raid5_compute_sector(conf, r_sector,
784052ecc   NeilBrown   md/raid5: prepare...
2020
  				     previous, &dummy1, &sh2);
911d4ee85   NeilBrown   md/raid5: simplif...
2021
2022
  	if (check != sh->sector || dummy1 != dd_idx || sh2.pd_idx != sh->pd_idx
  		|| sh2.qd_idx != sh->qd_idx) {
0c55e0225   NeilBrown   md/raid5: improve...
2023
2024
2025
  		printk(KERN_ERR "md/raid:%s: compute_blocknr: map not correct
  ",
  		       mdname(conf->mddev));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2026
2027
2028
2029
  		return 0;
  	}
  	return r_sector;
  }
600aa1099   Dan Williams   md: replace STRIP...
2030
  static void
c0f7bddbe   Yuri Tikhonov   md/raid5,6: commo...
2031
  schedule_reconstruction(struct stripe_head *sh, struct stripe_head_state *s,
600aa1099   Dan Williams   md: replace STRIP...
2032
  			 int rcw, int expand)
e33129d84   Dan Williams   md: handle_stripe...
2033
2034
  {
  	int i, pd_idx = sh->pd_idx, disks = sh->disks;
d1688a6d5   NeilBrown   md/raid5: typedef...
2035
  	struct r5conf *conf = sh->raid_conf;
c0f7bddbe   Yuri Tikhonov   md/raid5,6: commo...
2036
  	int level = conf->level;
e33129d84   Dan Williams   md: handle_stripe...
2037
2038
2039
2040
2041
2042
2043
  
  	if (rcw) {
  		/* if we are not expanding this is a proper write request, and
  		 * there will be bios with new data to be drained into the
  		 * stripe cache
  		 */
  		if (!expand) {
600aa1099   Dan Williams   md: replace STRIP...
2044
2045
2046
2047
  			sh->reconstruct_state = reconstruct_state_drain_run;
  			set_bit(STRIPE_OP_BIODRAIN, &s->ops_request);
  		} else
  			sh->reconstruct_state = reconstruct_state_run;
16a53ecc3   NeilBrown   [PATCH] md: merge...
2048

ac6b53b6e   Dan Williams   md/raid6: asynchr...
2049
  		set_bit(STRIPE_OP_RECONSTRUCT, &s->ops_request);
e33129d84   Dan Williams   md: handle_stripe...
2050
2051
2052
2053
2054
2055
  
  		for (i = disks; i--; ) {
  			struct r5dev *dev = &sh->dev[i];
  
  			if (dev->towrite) {
  				set_bit(R5_LOCKED, &dev->flags);
d8ee0728b   Dan Williams   md: replace R5_Wa...
2056
  				set_bit(R5_Wantdrain, &dev->flags);
e33129d84   Dan Williams   md: handle_stripe...
2057
2058
  				if (!expand)
  					clear_bit(R5_UPTODATE, &dev->flags);
600aa1099   Dan Williams   md: replace STRIP...
2059
  				s->locked++;
e33129d84   Dan Williams   md: handle_stripe...
2060
2061
  			}
  		}
c0f7bddbe   Yuri Tikhonov   md/raid5,6: commo...
2062
  		if (s->locked + conf->max_degraded == disks)
8b3e6cdc5   Dan Williams   md: introduce get...
2063
  			if (!test_and_set_bit(STRIPE_FULL_WRITE, &sh->state))
c0f7bddbe   Yuri Tikhonov   md/raid5,6: commo...
2064
  				atomic_inc(&conf->pending_full_writes);
e33129d84   Dan Williams   md: handle_stripe...
2065
  	} else {
c0f7bddbe   Yuri Tikhonov   md/raid5,6: commo...
2066
  		BUG_ON(level == 6);
e33129d84   Dan Williams   md: handle_stripe...
2067
2068
  		BUG_ON(!(test_bit(R5_UPTODATE, &sh->dev[pd_idx].flags) ||
  			test_bit(R5_Wantcompute, &sh->dev[pd_idx].flags)));
d8ee0728b   Dan Williams   md: replace R5_Wa...
2069
  		sh->reconstruct_state = reconstruct_state_prexor_drain_run;
600aa1099   Dan Williams   md: replace STRIP...
2070
2071
  		set_bit(STRIPE_OP_PREXOR, &s->ops_request);
  		set_bit(STRIPE_OP_BIODRAIN, &s->ops_request);
ac6b53b6e   Dan Williams   md/raid6: asynchr...
2072
  		set_bit(STRIPE_OP_RECONSTRUCT, &s->ops_request);
e33129d84   Dan Williams   md: handle_stripe...
2073
2074
2075
2076
2077
  
  		for (i = disks; i--; ) {
  			struct r5dev *dev = &sh->dev[i];
  			if (i == pd_idx)
  				continue;
e33129d84   Dan Williams   md: handle_stripe...
2078
2079
  			if (dev->towrite &&
  			    (test_bit(R5_UPTODATE, &dev->flags) ||
d8ee0728b   Dan Williams   md: replace R5_Wa...
2080
2081
  			     test_bit(R5_Wantcompute, &dev->flags))) {
  				set_bit(R5_Wantdrain, &dev->flags);
e33129d84   Dan Williams   md: handle_stripe...
2082
2083
  				set_bit(R5_LOCKED, &dev->flags);
  				clear_bit(R5_UPTODATE, &dev->flags);
600aa1099   Dan Williams   md: replace STRIP...
2084
  				s->locked++;
e33129d84   Dan Williams   md: handle_stripe...
2085
2086
2087
  			}
  		}
  	}
c0f7bddbe   Yuri Tikhonov   md/raid5,6: commo...
2088
  	/* keep the parity disk(s) locked while asynchronous operations
e33129d84   Dan Williams   md: handle_stripe...
2089
2090
2091
2092
  	 * are in flight
  	 */
  	set_bit(R5_LOCKED, &sh->dev[pd_idx].flags);
  	clear_bit(R5_UPTODATE, &sh->dev[pd_idx].flags);
600aa1099   Dan Williams   md: replace STRIP...
2093
  	s->locked++;
e33129d84   Dan Williams   md: handle_stripe...
2094

c0f7bddbe   Yuri Tikhonov   md/raid5,6: commo...
2095
2096
2097
2098
2099
2100
2101
2102
  	if (level == 6) {
  		int qd_idx = sh->qd_idx;
  		struct r5dev *dev = &sh->dev[qd_idx];
  
  		set_bit(R5_LOCKED, &dev->flags);
  		clear_bit(R5_UPTODATE, &dev->flags);
  		s->locked++;
  	}
600aa1099   Dan Williams   md: replace STRIP...
2103
2104
  	pr_debug("%s: stripe %llu locked: %d ops_request: %lx
  ",
e46b272b6   Harvey Harrison   md: replace remai...
2105
  		__func__, (unsigned long long)sh->sector,
600aa1099   Dan Williams   md: replace STRIP...
2106
  		s->locked, s->ops_request);
e33129d84   Dan Williams   md: handle_stripe...
2107
  }
16a53ecc3   NeilBrown   [PATCH] md: merge...
2108

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2109
2110
  /*
   * Each stripe/dev can have one or more bion attached.
16a53ecc3   NeilBrown   [PATCH] md: merge...
2111
   * toread/towrite point to the first in a chain.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2112
2113
2114
2115
2116
   * The bi_next chain must be in order.
   */
  static int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx, int forwrite)
  {
  	struct bio **bip;
d1688a6d5   NeilBrown   md/raid5: typedef...
2117
  	struct r5conf *conf = sh->raid_conf;
72626685d   NeilBrown   [PATCH] md: add w...
2118
  	int firstwrite=0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2119

cbe47ec55   NeilBrown   md/raid5: Protect...
2120
2121
  	pr_debug("adding bi b#%llu to stripe s#%llu
  ",
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2122
2123
  		(unsigned long long)bi->bi_sector,
  		(unsigned long long)sh->sector);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2124
  	spin_lock_irq(&conf->device_lock);
72626685d   NeilBrown   [PATCH] md: add w...
2125
  	if (forwrite) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2126
  		bip = &sh->dev[dd_idx].towrite;
72626685d   NeilBrown   [PATCH] md: add w...
2127
2128
2129
  		if (*bip == NULL && sh->dev[dd_idx].written == NULL)
  			firstwrite = 1;
  	} else
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2130
2131
2132
2133
2134
2135
2136
2137
  		bip = &sh->dev[dd_idx].toread;
  	while (*bip && (*bip)->bi_sector < bi->bi_sector) {
  		if ((*bip)->bi_sector + ((*bip)->bi_size >> 9) > bi->bi_sector)
  			goto overlap;
  		bip = & (*bip)->bi_next;
  	}
  	if (*bip && (*bip)->bi_sector < bi->bi_sector + ((bi->bi_size)>>9))
  		goto overlap;
78bafebd4   Eric Sesterhenn   BUG_ON() Conversi...
2138
  	BUG_ON(*bip && bi->bi_next && (*bip) != bi->bi_next);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2139
2140
2141
  	if (*bip)
  		bi->bi_next = *bip;
  	*bip = bi;
960e739d9   Jens Axboe   block: raid fixup...
2142
  	bi->bi_phys_segments++;
72626685d   NeilBrown   [PATCH] md: add w...
2143

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
  	if (forwrite) {
  		/* check if page is covered */
  		sector_t sector = sh->dev[dd_idx].sector;
  		for (bi=sh->dev[dd_idx].towrite;
  		     sector < sh->dev[dd_idx].sector + STRIPE_SECTORS &&
  			     bi && bi->bi_sector <= sector;
  		     bi = r5_next_bio(bi, sh->dev[dd_idx].sector)) {
  			if (bi->bi_sector + (bi->bi_size>>9) >= sector)
  				sector = bi->bi_sector + (bi->bi_size>>9);
  		}
  		if (sector >= sh->dev[dd_idx].sector + STRIPE_SECTORS)
  			set_bit(R5_OVERWRITE, &sh->dev[dd_idx].flags);
  	}
cbe47ec55   NeilBrown   md/raid5: Protect...
2157
  	spin_unlock_irq(&conf->device_lock);
cbe47ec55   NeilBrown   md/raid5: Protect...
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
  
  	pr_debug("added bi b#%llu to stripe s#%llu, disk %d.
  ",
  		(unsigned long long)(*bip)->bi_sector,
  		(unsigned long long)sh->sector, dd_idx);
  
  	if (conf->mddev->bitmap && firstwrite) {
  		bitmap_startwrite(conf->mddev->bitmap, sh->sector,
  				  STRIPE_SECTORS, 0);
  		sh->bm_seq = conf->seq_flush+1;
  		set_bit(STRIPE_BIT_DELAY, &sh->state);
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2170
2171
2172
2173
2174
  	return 1;
  
   overlap:
  	set_bit(R5_Overlap, &sh->dev[dd_idx].flags);
  	spin_unlock_irq(&conf->device_lock);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2175
2176
  	return 0;
  }
d1688a6d5   NeilBrown   md/raid5: typedef...
2177
  static void end_reshape(struct r5conf *conf);
292695531   NeilBrown   [PATCH] md: Final...
2178

d1688a6d5   NeilBrown   md/raid5: typedef...
2179
  static void stripe_set_idx(sector_t stripe, struct r5conf *conf, int previous,
911d4ee85   NeilBrown   md/raid5: simplif...
2180
  			    struct stripe_head *sh)
ccfcc3c10   NeilBrown   [PATCH] md: Core ...
2181
  {
784052ecc   NeilBrown   md/raid5: prepare...
2182
  	int sectors_per_chunk =
09c9e5fa1   Andre Noll   md: convert conf-...
2183
  		previous ? conf->prev_chunk_sectors : conf->chunk_sectors;
911d4ee85   NeilBrown   md/raid5: simplif...
2184
  	int dd_idx;
2d2063cea   Coywolf Qi Hunt   [PATCH] md: remov...
2185
  	int chunk_offset = sector_div(stripe, sectors_per_chunk);
112bf8970   NeilBrown   md/raid5: change ...
2186
  	int disks = previous ? conf->previous_raid_disks : conf->raid_disks;
2d2063cea   Coywolf Qi Hunt   [PATCH] md: remov...
2187

112bf8970   NeilBrown   md/raid5: change ...
2188
2189
  	raid5_compute_sector(conf,
  			     stripe * (disks - conf->max_degraded)
b875e531f   NeilBrown   [PATCH] md: fix i...
2190
  			     *sectors_per_chunk + chunk_offset,
112bf8970   NeilBrown   md/raid5: change ...
2191
  			     previous,
911d4ee85   NeilBrown   md/raid5: simplif...
2192
  			     &dd_idx, sh);
ccfcc3c10   NeilBrown   [PATCH] md: Core ...
2193
  }
a44568564   Dan Williams   raid5: refactor h...
2194
  static void
d1688a6d5   NeilBrown   md/raid5: typedef...
2195
  handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh,
a44568564   Dan Williams   raid5: refactor h...
2196
2197
2198
2199
2200
2201
2202
2203
2204
  				struct stripe_head_state *s, int disks,
  				struct bio **return_bi)
  {
  	int i;
  	for (i = disks; i--; ) {
  		struct bio *bi;
  		int bitmap_end = 0;
  
  		if (test_bit(R5_ReadError, &sh->dev[i].flags)) {
3cb030020   NeilBrown   md: removing type...
2205
  			struct md_rdev *rdev;
a44568564   Dan Williams   raid5: refactor h...
2206
2207
2208
  			rcu_read_lock();
  			rdev = rcu_dereference(conf->disks[i].rdev);
  			if (rdev && test_bit(In_sync, &rdev->flags))
7f0da59bd   NeilBrown   md/raid5: use bad...
2209
2210
2211
  				atomic_inc(&rdev->nr_pending);
  			else
  				rdev = NULL;
a44568564   Dan Williams   raid5: refactor h...
2212
  			rcu_read_unlock();
7f0da59bd   NeilBrown   md/raid5: use bad...
2213
2214
2215
2216
2217
2218
2219
2220
  			if (rdev) {
  				if (!rdev_set_badblocks(
  					    rdev,
  					    sh->sector,
  					    STRIPE_SECTORS, 0))
  					md_error(conf->mddev, rdev);
  				rdev_dec_pending(rdev, conf->mddev);
  			}
a44568564   Dan Williams   raid5: refactor h...
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
  		}
  		spin_lock_irq(&conf->device_lock);
  		/* fail all writes first */
  		bi = sh->dev[i].towrite;
  		sh->dev[i].towrite = NULL;
  		if (bi) {
  			s->to_write--;
  			bitmap_end = 1;
  		}
  
  		if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags))
  			wake_up(&conf->wait_for_overlap);
  
  		while (bi && bi->bi_sector <
  			sh->dev[i].sector + STRIPE_SECTORS) {
  			struct bio *nextbi = r5_next_bio(bi, sh->dev[i].sector);
  			clear_bit(BIO_UPTODATE, &bi->bi_flags);
960e739d9   Jens Axboe   block: raid fixup...
2238
  			if (!raid5_dec_bi_phys_segments(bi)) {
a44568564   Dan Williams   raid5: refactor h...
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
  				md_write_end(conf->mddev);
  				bi->bi_next = *return_bi;
  				*return_bi = bi;
  			}
  			bi = nextbi;
  		}
  		/* and fail all 'written' */
  		bi = sh->dev[i].written;
  		sh->dev[i].written = NULL;
  		if (bi) bitmap_end = 1;
  		while (bi && bi->bi_sector <
  		       sh->dev[i].sector + STRIPE_SECTORS) {
  			struct bio *bi2 = r5_next_bio(bi, sh->dev[i].sector);
  			clear_bit(BIO_UPTODATE, &bi->bi_flags);
960e739d9   Jens Axboe   block: raid fixup...
2253
  			if (!raid5_dec_bi_phys_segments(bi)) {
a44568564   Dan Williams   raid5: refactor h...
2254
2255
2256
2257
2258
2259
  				md_write_end(conf->mddev);
  				bi->bi_next = *return_bi;
  				*return_bi = bi;
  			}
  			bi = bi2;
  		}
b5e98d65d   Dan Williams   md: handle_stripe...
2260
2261
2262
2263
2264
2265
  		/* fail any reads if this device is non-operational and
  		 * the data has not reached the cache yet.
  		 */
  		if (!test_bit(R5_Wantfill, &sh->dev[i].flags) &&
  		    (!test_bit(R5_Insync, &sh->dev[i].flags) ||
  		      test_bit(R5_ReadError, &sh->dev[i].flags))) {
a44568564   Dan Williams   raid5: refactor h...
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
  			bi = sh->dev[i].toread;
  			sh->dev[i].toread = NULL;
  			if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags))
  				wake_up(&conf->wait_for_overlap);
  			if (bi) s->to_read--;
  			while (bi && bi->bi_sector <
  			       sh->dev[i].sector + STRIPE_SECTORS) {
  				struct bio *nextbi =
  					r5_next_bio(bi, sh->dev[i].sector);
  				clear_bit(BIO_UPTODATE, &bi->bi_flags);
960e739d9   Jens Axboe   block: raid fixup...
2276
  				if (!raid5_dec_bi_phys_segments(bi)) {
a44568564   Dan Williams   raid5: refactor h...
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
  					bi->bi_next = *return_bi;
  					*return_bi = bi;
  				}
  				bi = nextbi;
  			}
  		}
  		spin_unlock_irq(&conf->device_lock);
  		if (bitmap_end)
  			bitmap_endwrite(conf->mddev->bitmap, sh->sector,
  					STRIPE_SECTORS, 0, 0);
8cfa7b0f6   NeilBrown   md/raid5: Avoid B...
2287
2288
2289
2290
  		/* If we were in the middle of a write the parity block might
  		 * still be locked - so just clear all R5_LOCKED flags
  		 */
  		clear_bit(R5_LOCKED, &sh->dev[i].flags);
a44568564   Dan Williams   raid5: refactor h...
2291
  	}
8b3e6cdc5   Dan Williams   md: introduce get...
2292
2293
2294
  	if (test_and_clear_bit(STRIPE_FULL_WRITE, &sh->state))
  		if (atomic_dec_and_test(&conf->pending_full_writes))
  			md_wakeup_thread(conf->mddev->thread);
a44568564   Dan Williams   raid5: refactor h...
2295
  }
7f0da59bd   NeilBrown   md/raid5: use bad...
2296
  static void
d1688a6d5   NeilBrown   md/raid5: typedef...
2297
  handle_failed_sync(struct r5conf *conf, struct stripe_head *sh,
7f0da59bd   NeilBrown   md/raid5: use bad...
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
  		   struct stripe_head_state *s)
  {
  	int abort = 0;
  	int i;
  
  	md_done_sync(conf->mddev, STRIPE_SECTORS, 0);
  	clear_bit(STRIPE_SYNCING, &sh->state);
  	s->syncing = 0;
  	/* There is nothing more to do for sync/check/repair.
  	 * For recover we need to record a bad block on all
  	 * non-sync devices, or abort the recovery
  	 */
  	if (!test_bit(MD_RECOVERY_RECOVER, &conf->mddev->recovery))
  		return;
  	/* During recovery devices cannot be removed, so locking and
  	 * refcounting of rdevs is not needed
  	 */
  	for (i = 0; i < conf->raid_disks; i++) {
3cb030020   NeilBrown   md: removing type...
2316
  		struct md_rdev *rdev = conf->disks[i].rdev;
7f0da59bd   NeilBrown   md/raid5: use bad...
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
  		if (!rdev
  		    || test_bit(Faulty, &rdev->flags)
  		    || test_bit(In_sync, &rdev->flags))
  			continue;
  		if (!rdev_set_badblocks(rdev, sh->sector,
  					STRIPE_SECTORS, 0))
  			abort = 1;
  	}
  	if (abort) {
  		conf->recovery_disabled = conf->mddev->recovery_disabled;
  		set_bit(MD_RECOVERY_INTR, &conf->mddev->recovery);
  	}
  }
93b3dbce6   NeilBrown   md/raid5: unite f...
2330
  /* fetch_block - checks the given member device to see if its data needs
1fe797e67   Dan Williams   md: rationalize r...
2331
2332
2333
   * to be read or computed to satisfy a request.
   *
   * Returns 1 when no more member devices need to be checked, otherwise returns
93b3dbce6   NeilBrown   md/raid5: unite f...
2334
   * 0 to tell the loop in handle_stripe_fill to continue
f38e12199   Dan Williams   md: handle_stripe...
2335
   */
93b3dbce6   NeilBrown   md/raid5: unite f...
2336
2337
  static int fetch_block(struct stripe_head *sh, struct stripe_head_state *s,
  		       int disk_idx, int disks)
a44568564   Dan Williams   raid5: refactor h...
2338
  {
5599becca   Yuri Tikhonov   md/raid6: asynchr...
2339
  	struct r5dev *dev = &sh->dev[disk_idx];
f2b3b44de   NeilBrown   md/raid5: unify s...
2340
2341
  	struct r5dev *fdev[2] = { &sh->dev[s->failed_num[0]],
  				  &sh->dev[s->failed_num[1]] };
5599becca   Yuri Tikhonov   md/raid6: asynchr...
2342

93b3dbce6   NeilBrown   md/raid5: unite f...
2343
  	/* is the data in this block needed, and can we get it? */
5599becca   Yuri Tikhonov   md/raid6: asynchr...
2344
2345
2346
2347
2348
  	if (!test_bit(R5_LOCKED, &dev->flags) &&
  	    !test_bit(R5_UPTODATE, &dev->flags) &&
  	    (dev->toread ||
  	     (dev->towrite && !test_bit(R5_OVERWRITE, &dev->flags)) ||
  	     s->syncing || s->expanding ||
5d35e09ca   NeilBrown   md/raid5: rearran...
2349
2350
  	     (s->failed >= 1 && fdev[0]->toread) ||
  	     (s->failed >= 2 && fdev[1]->toread) ||
93b3dbce6   NeilBrown   md/raid5: unite f...
2351
2352
2353
  	     (sh->raid_conf->level <= 5 && s->failed && fdev[0]->towrite &&
  	      !test_bit(R5_OVERWRITE, &fdev[0]->flags)) ||
  	     (sh->raid_conf->level == 6 && s->failed && s->to_write))) {
5599becca   Yuri Tikhonov   md/raid6: asynchr...
2354
2355
2356
2357
2358
2359
  		/* we would like to get this block, possibly by computing it,
  		 * otherwise read it if the backing disk is insync
  		 */
  		BUG_ON(test_bit(R5_Wantcompute, &dev->flags));
  		BUG_ON(test_bit(R5_Wantread, &dev->flags));
  		if ((s->uptodate == disks - 1) &&
f2b3b44de   NeilBrown   md/raid5: unify s...
2360
2361
  		    (s->failed && (disk_idx == s->failed_num[0] ||
  				   disk_idx == s->failed_num[1]))) {
5599becca   Yuri Tikhonov   md/raid6: asynchr...
2362
2363
  			/* have disk failed, and we're requested to fetch it;
  			 * do compute it
a44568564   Dan Williams   raid5: refactor h...
2364
  			 */
5599becca   Yuri Tikhonov   md/raid6: asynchr...
2365
2366
2367
2368
2369
2370
2371
2372
2373
  			pr_debug("Computing stripe %llu block %d
  ",
  			       (unsigned long long)sh->sector, disk_idx);
  			set_bit(STRIPE_COMPUTE_RUN, &sh->state);
  			set_bit(STRIPE_OP_COMPUTE_BLK, &s->ops_request);
  			set_bit(R5_Wantcompute, &dev->flags);
  			sh->ops.target = disk_idx;
  			sh->ops.target2 = -1; /* no 2nd target */
  			s->req_compute = 1;
93b3dbce6   NeilBrown   md/raid5: unite f...
2374
2375
2376
2377
2378
2379
  			/* Careful: from this point on 'uptodate' is in the eye
  			 * of raid_run_ops which services 'compute' operations
  			 * before writes. R5_Wantcompute flags a block that will
  			 * be R5_UPTODATE by the time it is needed for a
  			 * subsequent operation.
  			 */
5599becca   Yuri Tikhonov   md/raid6: asynchr...
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
  			s->uptodate++;
  			return 1;
  		} else if (s->uptodate == disks-2 && s->failed >= 2) {
  			/* Computing 2-failure is *very* expensive; only
  			 * do it if failed >= 2
  			 */
  			int other;
  			for (other = disks; other--; ) {
  				if (other == disk_idx)
  					continue;
  				if (!test_bit(R5_UPTODATE,
  				      &sh->dev[other].flags))
  					break;
a44568564   Dan Williams   raid5: refactor h...
2393
  			}
5599becca   Yuri Tikhonov   md/raid6: asynchr...
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
  			BUG_ON(other < 0);
  			pr_debug("Computing stripe %llu blocks %d,%d
  ",
  			       (unsigned long long)sh->sector,
  			       disk_idx, other);
  			set_bit(STRIPE_COMPUTE_RUN, &sh->state);
  			set_bit(STRIPE_OP_COMPUTE_BLK, &s->ops_request);
  			set_bit(R5_Wantcompute, &sh->dev[disk_idx].flags);
  			set_bit(R5_Wantcompute, &sh->dev[other].flags);
  			sh->ops.target = disk_idx;
  			sh->ops.target2 = other;
  			s->uptodate += 2;
  			s->req_compute = 1;
  			return 1;
  		} else if (test_bit(R5_Insync, &dev->flags)) {
  			set_bit(R5_LOCKED, &dev->flags);
  			set_bit(R5_Wantread, &dev->flags);
  			s->locked++;
  			pr_debug("Reading block %d (sync=%d)
  ",
  				disk_idx, s->syncing);
a44568564   Dan Williams   raid5: refactor h...
2415
2416
  		}
  	}
5599becca   Yuri Tikhonov   md/raid6: asynchr...
2417
2418
2419
2420
2421
  
  	return 0;
  }
  
  /**
93b3dbce6   NeilBrown   md/raid5: unite f...
2422
   * handle_stripe_fill - read or compute data to satisfy pending requests.
5599becca   Yuri Tikhonov   md/raid6: asynchr...
2423
   */
93b3dbce6   NeilBrown   md/raid5: unite f...
2424
2425
2426
  static void handle_stripe_fill(struct stripe_head *sh,
  			       struct stripe_head_state *s,
  			       int disks)
5599becca   Yuri Tikhonov   md/raid6: asynchr...
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
  {
  	int i;
  
  	/* look for blocks to read/compute, skip this if a compute
  	 * is already in flight, or if the stripe contents are in the
  	 * midst of changing due to a write
  	 */
  	if (!test_bit(STRIPE_COMPUTE_RUN, &sh->state) && !sh->check_state &&
  	    !sh->reconstruct_state)
  		for (i = disks; i--; )
93b3dbce6   NeilBrown   md/raid5: unite f...
2437
  			if (fetch_block(sh, s, i, disks))
5599becca   Yuri Tikhonov   md/raid6: asynchr...
2438
  				break;
a44568564   Dan Williams   raid5: refactor h...
2439
2440
  	set_bit(STRIPE_HANDLE, &sh->state);
  }
1fe797e67   Dan Williams   md: rationalize r...
2441
  /* handle_stripe_clean_event
a44568564   Dan Williams   raid5: refactor h...
2442
2443
2444
2445
   * any written block on an uptodate or failed drive can be returned.
   * Note that if we 'wrote' to a failed drive, it will be UPTODATE, but
   * never LOCKED, so we don't need to test 'failed' directly.
   */
d1688a6d5   NeilBrown   md/raid5: typedef...
2446
  static void handle_stripe_clean_event(struct r5conf *conf,
a44568564   Dan Williams   raid5: refactor h...
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
  	struct stripe_head *sh, int disks, struct bio **return_bi)
  {
  	int i;
  	struct r5dev *dev;
  
  	for (i = disks; i--; )
  		if (sh->dev[i].written) {
  			dev = &sh->dev[i];
  			if (!test_bit(R5_LOCKED, &dev->flags) &&
  				test_bit(R5_UPTODATE, &dev->flags)) {
  				/* We can return any write requests */
  				struct bio *wbi, *wbi2;
  				int bitmap_end = 0;
45b4233ca   Dan Williams   raid5: replace cu...
2460
2461
  				pr_debug("Return write for disc %d
  ", i);
a44568564   Dan Williams   raid5: refactor h...
2462
2463
2464
2465
2466
2467
  				spin_lock_irq(&conf->device_lock);
  				wbi = dev->written;
  				dev->written = NULL;
  				while (wbi && wbi->bi_sector <
  					dev->sector + STRIPE_SECTORS) {
  					wbi2 = r5_next_bio(wbi, dev->sector);
960e739d9   Jens Axboe   block: raid fixup...
2468
  					if (!raid5_dec_bi_phys_segments(wbi)) {
a44568564   Dan Williams   raid5: refactor h...
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
  						md_write_end(conf->mddev);
  						wbi->bi_next = *return_bi;
  						*return_bi = wbi;
  					}
  					wbi = wbi2;
  				}
  				if (dev->towrite == NULL)
  					bitmap_end = 1;
  				spin_unlock_irq(&conf->device_lock);
  				if (bitmap_end)
  					bitmap_endwrite(conf->mddev->bitmap,
  							sh->sector,
  							STRIPE_SECTORS,
  					 !test_bit(STRIPE_DEGRADED, &sh->state),
  							0);
  			}
  		}
8b3e6cdc5   Dan Williams   md: introduce get...
2486
2487
2488
2489
  
  	if (test_and_clear_bit(STRIPE_FULL_WRITE, &sh->state))
  		if (atomic_dec_and_test(&conf->pending_full_writes))
  			md_wakeup_thread(conf->mddev->thread);
a44568564   Dan Williams   raid5: refactor h...
2490
  }
d1688a6d5   NeilBrown   md/raid5: typedef...
2491
  static void handle_stripe_dirtying(struct r5conf *conf,
c8ac1803f   NeilBrown   md/raid5: unite h...
2492
2493
2494
  				   struct stripe_head *sh,
  				   struct stripe_head_state *s,
  				   int disks)
a44568564   Dan Williams   raid5: refactor h...
2495
2496
  {
  	int rmw = 0, rcw = 0, i;
c8ac1803f   NeilBrown   md/raid5: unite h...
2497
2498
2499
2500
2501
2502
2503
  	if (conf->max_degraded == 2) {
  		/* RAID6 requires 'rcw' in current implementation
  		 * Calculate the real rcw later - for now fake it
  		 * look like rcw is cheaper
  		 */
  		rcw = 1; rmw = 2;
  	} else for (i = disks; i--; ) {
a44568564   Dan Williams   raid5: refactor h...
2504
2505
2506
2507
  		/* would I have to read this buffer for read_modify_write */
  		struct r5dev *dev = &sh->dev[i];
  		if ((dev->towrite || i == sh->pd_idx) &&
  		    !test_bit(R5_LOCKED, &dev->flags) &&
f38e12199   Dan Williams   md: handle_stripe...
2508
2509
  		    !(test_bit(R5_UPTODATE, &dev->flags) ||
  		      test_bit(R5_Wantcompute, &dev->flags))) {
a44568564   Dan Williams   raid5: refactor h...
2510
2511
2512
2513
2514
2515
2516
2517
  			if (test_bit(R5_Insync, &dev->flags))
  				rmw++;
  			else
  				rmw += 2*disks;  /* cannot read it */
  		}
  		/* Would I have to read this buffer for reconstruct_write */
  		if (!test_bit(R5_OVERWRITE, &dev->flags) && i != sh->pd_idx &&
  		    !test_bit(R5_LOCKED, &dev->flags) &&
f38e12199   Dan Williams   md: handle_stripe...
2518
2519
2520
  		    !(test_bit(R5_UPTODATE, &dev->flags) ||
  		    test_bit(R5_Wantcompute, &dev->flags))) {
  			if (test_bit(R5_Insync, &dev->flags)) rcw++;
a44568564   Dan Williams   raid5: refactor h...
2521
2522
2523
2524
  			else
  				rcw += 2*disks;
  		}
  	}
45b4233ca   Dan Williams   raid5: replace cu...
2525
2526
  	pr_debug("for sector %llu, rmw=%d rcw=%d
  ",
a44568564   Dan Williams   raid5: refactor h...
2527
2528
2529
2530
2531
2532
2533
2534
  		(unsigned long long)sh->sector, rmw, rcw);
  	set_bit(STRIPE_HANDLE, &sh->state);
  	if (rmw < rcw && rmw > 0)
  		/* prefer read-modify-write, but need to get some data */
  		for (i = disks; i--; ) {
  			struct r5dev *dev = &sh->dev[i];
  			if ((dev->towrite || i == sh->pd_idx) &&
  			    !test_bit(R5_LOCKED, &dev->flags) &&
f38e12199   Dan Williams   md: handle_stripe...
2535
2536
  			    !(test_bit(R5_UPTODATE, &dev->flags) ||
  			    test_bit(R5_Wantcompute, &dev->flags)) &&
a44568564   Dan Williams   raid5: refactor h...
2537
2538
2539
  			    test_bit(R5_Insync, &dev->flags)) {
  				if (
  				  test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) {
45b4233ca   Dan Williams   raid5: replace cu...
2540
  					pr_debug("Read_old block "
a44568564   Dan Williams   raid5: refactor h...
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
  						"%d for r-m-w
  ", i);
  					set_bit(R5_LOCKED, &dev->flags);
  					set_bit(R5_Wantread, &dev->flags);
  					s->locked++;
  				} else {
  					set_bit(STRIPE_DELAYED, &sh->state);
  					set_bit(STRIPE_HANDLE, &sh->state);
  				}
  			}
  		}
c8ac1803f   NeilBrown   md/raid5: unite h...
2552
  	if (rcw <= rmw && rcw > 0) {
a44568564   Dan Williams   raid5: refactor h...
2553
  		/* want reconstruct write, but need to get some data */
c8ac1803f   NeilBrown   md/raid5: unite h...
2554
  		rcw = 0;
a44568564   Dan Williams   raid5: refactor h...
2555
2556
2557
  		for (i = disks; i--; ) {
  			struct r5dev *dev = &sh->dev[i];
  			if (!test_bit(R5_OVERWRITE, &dev->flags) &&
c8ac1803f   NeilBrown   md/raid5: unite h...
2558
  			    i != sh->pd_idx && i != sh->qd_idx &&
a44568564   Dan Williams   raid5: refactor h...
2559
  			    !test_bit(R5_LOCKED, &dev->flags) &&
f38e12199   Dan Williams   md: handle_stripe...
2560
  			    !(test_bit(R5_UPTODATE, &dev->flags) ||
c8ac1803f   NeilBrown   md/raid5: unite h...
2561
2562
2563
2564
  			      test_bit(R5_Wantcompute, &dev->flags))) {
  				rcw++;
  				if (!test_bit(R5_Insync, &dev->flags))
  					continue; /* it's a failed drive */
a44568564   Dan Williams   raid5: refactor h...
2565
2566
  				if (
  				  test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) {
45b4233ca   Dan Williams   raid5: replace cu...
2567
  					pr_debug("Read_old block "
a44568564   Dan Williams   raid5: refactor h...
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
  						"%d for Reconstruct
  ", i);
  					set_bit(R5_LOCKED, &dev->flags);
  					set_bit(R5_Wantread, &dev->flags);
  					s->locked++;
  				} else {
  					set_bit(STRIPE_DELAYED, &sh->state);
  					set_bit(STRIPE_HANDLE, &sh->state);
  				}
  			}
  		}
c8ac1803f   NeilBrown   md/raid5: unite h...
2579
  	}
a44568564   Dan Williams   raid5: refactor h...
2580
2581
2582
  	/* now if nothing is locked, and if we have enough data,
  	 * we can start a write request
  	 */
f38e12199   Dan Williams   md: handle_stripe...
2583
2584
  	/* since handle_stripe can be called at any time we need to handle the
  	 * case where a compute block operation has been submitted and then a
ac6b53b6e   Dan Williams   md/raid6: asynchr...
2585
2586
  	 * subsequent call wants to start a write request.  raid_run_ops only
  	 * handles the case where compute block and reconstruct are requested
f38e12199   Dan Williams   md: handle_stripe...
2587
2588
2589
  	 * simultaneously.  If this is not the case then new writes need to be
  	 * held off until the compute completes.
  	 */
976ea8d47   Dan Williams   md: replace STRIP...
2590
2591
2592
  	if ((s->req_compute || !test_bit(STRIPE_COMPUTE_RUN, &sh->state)) &&
  	    (s->locked == 0 && (rcw == 0 || rmw == 0) &&
  	    !test_bit(STRIPE_BIT_DELAY, &sh->state)))
c0f7bddbe   Yuri Tikhonov   md/raid5,6: commo...
2593
  		schedule_reconstruction(sh, s, rcw == 0, 0);
a44568564   Dan Williams   raid5: refactor h...
2594
  }
d1688a6d5   NeilBrown   md/raid5: typedef...
2595
  static void handle_parity_checks5(struct r5conf *conf, struct stripe_head *sh,
a44568564   Dan Williams   raid5: refactor h...
2596
2597
  				struct stripe_head_state *s, int disks)
  {
ecc65c9b3   Dan Williams   md: replace STRIP...
2598
  	struct r5dev *dev = NULL;
bd2ab6703   Dan Williams   md: close a livel...
2599

a44568564   Dan Williams   raid5: refactor h...
2600
  	set_bit(STRIPE_HANDLE, &sh->state);
e89f89629   Dan Williams   md: handle_stripe...
2601

ecc65c9b3   Dan Williams   md: replace STRIP...
2602
2603
2604
  	switch (sh->check_state) {
  	case check_state_idle:
  		/* start a new check operation if there are no failures */
bd2ab6703   Dan Williams   md: close a livel...
2605
  		if (s->failed == 0) {
bd2ab6703   Dan Williams   md: close a livel...
2606
  			BUG_ON(s->uptodate != disks);
ecc65c9b3   Dan Williams   md: replace STRIP...
2607
2608
  			sh->check_state = check_state_run;
  			set_bit(STRIPE_OP_CHECK, &s->ops_request);
bd2ab6703   Dan Williams   md: close a livel...
2609
  			clear_bit(R5_UPTODATE, &sh->dev[sh->pd_idx].flags);
bd2ab6703   Dan Williams   md: close a livel...
2610
  			s->uptodate--;
ecc65c9b3   Dan Williams   md: replace STRIP...
2611
  			break;
bd2ab6703   Dan Williams   md: close a livel...
2612
  		}
f2b3b44de   NeilBrown   md/raid5: unify s...
2613
  		dev = &sh->dev[s->failed_num[0]];
ecc65c9b3   Dan Williams   md: replace STRIP...
2614
2615
2616
2617
2618
2619
2620
2621
2622
  		/* fall through */
  	case check_state_compute_result:
  		sh->check_state = check_state_idle;
  		if (!dev)
  			dev = &sh->dev[sh->pd_idx];
  
  		/* check that a write has not made the stripe insync */
  		if (test_bit(STRIPE_INSYNC, &sh->state))
  			break;
c8894419a   Dan Williams   md: fix raid5 're...
2623

a44568564   Dan Williams   raid5: refactor h...
2624
  		/* either failed parity check, or recovery is happening */
a44568564   Dan Williams   raid5: refactor h...
2625
2626
2627
2628
  		BUG_ON(!test_bit(R5_UPTODATE, &dev->flags));
  		BUG_ON(s->uptodate != disks);
  
  		set_bit(R5_LOCKED, &dev->flags);
ecc65c9b3   Dan Williams   md: replace STRIP...
2629
  		s->locked++;
a44568564   Dan Williams   raid5: refactor h...
2630
  		set_bit(R5_Wantwrite, &dev->flags);
830ea0167   Dan Williams   md: handle_stripe...
2631

a44568564   Dan Williams   raid5: refactor h...
2632
  		clear_bit(STRIPE_DEGRADED, &sh->state);
a44568564   Dan Williams   raid5: refactor h...
2633
  		set_bit(STRIPE_INSYNC, &sh->state);
ecc65c9b3   Dan Williams   md: replace STRIP...
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
  		break;
  	case check_state_run:
  		break; /* we will be called again upon completion */
  	case check_state_check_result:
  		sh->check_state = check_state_idle;
  
  		/* if a failure occurred during the check operation, leave
  		 * STRIPE_INSYNC not set and let the stripe be handled again
  		 */
  		if (s->failed)
  			break;
  
  		/* handle a successful check operation, if parity is correct
  		 * we are done.  Otherwise update the mismatch count and repair
  		 * parity if !MD_RECOVERY_CHECK
  		 */
ad283ea4a   Dan Williams   async_tx: add sum...
2650
  		if ((sh->ops.zero_sum_result & SUM_CHECK_P_RESULT) == 0)
ecc65c9b3   Dan Williams   md: replace STRIP...
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
  			/* parity is correct (on disc,
  			 * not in buffer any more)
  			 */
  			set_bit(STRIPE_INSYNC, &sh->state);
  		else {
  			conf->mddev->resync_mismatches += STRIPE_SECTORS;
  			if (test_bit(MD_RECOVERY_CHECK, &conf->mddev->recovery))
  				/* don't try to repair!! */
  				set_bit(STRIPE_INSYNC, &sh->state);
  			else {
  				sh->check_state = check_state_compute_run;
976ea8d47   Dan Williams   md: replace STRIP...
2662
  				set_bit(STRIPE_COMPUTE_RUN, &sh->state);
ecc65c9b3   Dan Williams   md: replace STRIP...
2663
2664
2665
2666
  				set_bit(STRIPE_OP_COMPUTE_BLK, &s->ops_request);
  				set_bit(R5_Wantcompute,
  					&sh->dev[sh->pd_idx].flags);
  				sh->ops.target = sh->pd_idx;
ac6b53b6e   Dan Williams   md/raid6: asynchr...
2667
  				sh->ops.target2 = -1;
ecc65c9b3   Dan Williams   md: replace STRIP...
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
  				s->uptodate++;
  			}
  		}
  		break;
  	case check_state_compute_run:
  		break;
  	default:
  		printk(KERN_ERR "%s: unknown check_state: %d sector: %llu
  ",
  		       __func__, sh->check_state,
  		       (unsigned long long) sh->sector);
  		BUG();
a44568564   Dan Williams   raid5: refactor h...
2680
2681
  	}
  }
d1688a6d5   NeilBrown   md/raid5: typedef...
2682
  static void handle_parity_checks6(struct r5conf *conf, struct stripe_head *sh,
36d1c6476   Dan Williams   md/raid6: move th...
2683
  				  struct stripe_head_state *s,
f2b3b44de   NeilBrown   md/raid5: unify s...
2684
  				  int disks)
a44568564   Dan Williams   raid5: refactor h...
2685
  {
a44568564   Dan Williams   raid5: refactor h...
2686
  	int pd_idx = sh->pd_idx;
34e04e87f   NeilBrown   md/raid5: drop qd...
2687
  	int qd_idx = sh->qd_idx;
d82dfee0a   Dan Williams   md/raid6: asynchr...
2688
  	struct r5dev *dev;
a44568564   Dan Williams   raid5: refactor h...
2689
2690
2691
2692
  
  	set_bit(STRIPE_HANDLE, &sh->state);
  
  	BUG_ON(s->failed > 2);
d82dfee0a   Dan Williams   md/raid6: asynchr...
2693

a44568564   Dan Williams   raid5: refactor h...
2694
2695
2696
2697
2698
  	/* Want to check and possibly repair P and Q.
  	 * However there could be one 'failed' device, in which
  	 * case we can only check one of them, possibly using the
  	 * other to generate missing data
  	 */
d82dfee0a   Dan Williams   md/raid6: asynchr...
2699
2700
2701
  	switch (sh->check_state) {
  	case check_state_idle:
  		/* start a new check operation if there are < 2 failures */
f2b3b44de   NeilBrown   md/raid5: unify s...
2702
  		if (s->failed == s->q_failed) {
d82dfee0a   Dan Williams   md/raid6: asynchr...
2703
  			/* The only possible failed device holds Q, so it
a44568564   Dan Williams   raid5: refactor h...
2704
2705
2706
  			 * makes sense to check P (If anything else were failed,
  			 * we would have used P to recreate it).
  			 */
d82dfee0a   Dan Williams   md/raid6: asynchr...
2707
  			sh->check_state = check_state_run;
a44568564   Dan Williams   raid5: refactor h...
2708
  		}
f2b3b44de   NeilBrown   md/raid5: unify s...
2709
  		if (!s->q_failed && s->failed < 2) {
d82dfee0a   Dan Williams   md/raid6: asynchr...
2710
  			/* Q is not failed, and we didn't use it to generate
a44568564   Dan Williams   raid5: refactor h...
2711
2712
  			 * anything, so it makes sense to check it
  			 */
d82dfee0a   Dan Williams   md/raid6: asynchr...
2713
2714
2715
2716
  			if (sh->check_state == check_state_run)
  				sh->check_state = check_state_run_pq;
  			else
  				sh->check_state = check_state_run_q;
a44568564   Dan Williams   raid5: refactor h...
2717
  		}
a44568564   Dan Williams   raid5: refactor h...
2718

d82dfee0a   Dan Williams   md/raid6: asynchr...
2719
2720
  		/* discard potentially stale zero_sum_result */
  		sh->ops.zero_sum_result = 0;
a44568564   Dan Williams   raid5: refactor h...
2721

d82dfee0a   Dan Williams   md/raid6: asynchr...
2722
2723
2724
2725
  		if (sh->check_state == check_state_run) {
  			/* async_xor_zero_sum destroys the contents of P */
  			clear_bit(R5_UPTODATE, &sh->dev[pd_idx].flags);
  			s->uptodate--;
a44568564   Dan Williams   raid5: refactor h...
2726
  		}
d82dfee0a   Dan Williams   md/raid6: asynchr...
2727
2728
2729
2730
2731
2732
2733
  		if (sh->check_state >= check_state_run &&
  		    sh->check_state <= check_state_run_pq) {
  			/* async_syndrome_zero_sum preserves P and Q, so
  			 * no need to mark them !uptodate here
  			 */
  			set_bit(STRIPE_OP_CHECK, &s->ops_request);
  			break;
a44568564   Dan Williams   raid5: refactor h...
2734
  		}
d82dfee0a   Dan Williams   md/raid6: asynchr...
2735
2736
2737
2738
2739
  		/* we have 2-disk failure */
  		BUG_ON(s->failed != 2);
  		/* fall through */
  	case check_state_compute_result:
  		sh->check_state = check_state_idle;
a44568564   Dan Williams   raid5: refactor h...
2740

d82dfee0a   Dan Williams   md/raid6: asynchr...
2741
2742
2743
  		/* check that a write has not made the stripe insync */
  		if (test_bit(STRIPE_INSYNC, &sh->state))
  			break;
a44568564   Dan Williams   raid5: refactor h...
2744
2745
  
  		/* now write out any block on a failed drive,
d82dfee0a   Dan Williams   md/raid6: asynchr...
2746
  		 * or P or Q if they were recomputed
a44568564   Dan Williams   raid5: refactor h...
2747
  		 */
d82dfee0a   Dan Williams   md/raid6: asynchr...
2748
  		BUG_ON(s->uptodate < disks - 1); /* We don't need Q to recover */
a44568564   Dan Williams   raid5: refactor h...
2749
  		if (s->failed == 2) {
f2b3b44de   NeilBrown   md/raid5: unify s...
2750
  			dev = &sh->dev[s->failed_num[1]];
a44568564   Dan Williams   raid5: refactor h...
2751
2752
2753
2754
2755
  			s->locked++;
  			set_bit(R5_LOCKED, &dev->flags);
  			set_bit(R5_Wantwrite, &dev->flags);
  		}
  		if (s->failed >= 1) {
f2b3b44de   NeilBrown   md/raid5: unify s...
2756
  			dev = &sh->dev[s->failed_num[0]];
a44568564   Dan Williams   raid5: refactor h...
2757
2758
2759
2760
  			s->locked++;
  			set_bit(R5_LOCKED, &dev->flags);
  			set_bit(R5_Wantwrite, &dev->flags);
  		}
d82dfee0a   Dan Williams   md/raid6: asynchr...
2761
  		if (sh->ops.zero_sum_result & SUM_CHECK_P_RESULT) {
a44568564   Dan Williams   raid5: refactor h...
2762
2763
2764
2765
2766
  			dev = &sh->dev[pd_idx];
  			s->locked++;
  			set_bit(R5_LOCKED, &dev->flags);
  			set_bit(R5_Wantwrite, &dev->flags);
  		}
d82dfee0a   Dan Williams   md/raid6: asynchr...
2767
  		if (sh->ops.zero_sum_result & SUM_CHECK_Q_RESULT) {
a44568564   Dan Williams   raid5: refactor h...
2768
2769
2770
2771
2772
2773
2774
2775
  			dev = &sh->dev[qd_idx];
  			s->locked++;
  			set_bit(R5_LOCKED, &dev->flags);
  			set_bit(R5_Wantwrite, &dev->flags);
  		}
  		clear_bit(STRIPE_DEGRADED, &sh->state);
  
  		set_bit(STRIPE_INSYNC, &sh->state);
d82dfee0a   Dan Williams   md/raid6: asynchr...
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
  		break;
  	case check_state_run:
  	case check_state_run_q:
  	case check_state_run_pq:
  		break; /* we will be called again upon completion */
  	case check_state_check_result:
  		sh->check_state = check_state_idle;
  
  		/* handle a successful check operation, if parity is correct
  		 * we are done.  Otherwise update the mismatch count and repair
  		 * parity if !MD_RECOVERY_CHECK
  		 */
  		if (sh->ops.zero_sum_result == 0) {
  			/* both parities are correct */
  			if (!s->failed)
  				set_bit(STRIPE_INSYNC, &sh->state);
  			else {
  				/* in contrast to the raid5 case we can validate
  				 * parity, but still have a failure to write
  				 * back
  				 */
  				sh->check_state = check_state_compute_result;
  				/* Returning at this point means that we may go
  				 * off and bring p and/or q uptodate again so
  				 * we make sure to check zero_sum_result again
  				 * to verify if p or q need writeback
  				 */
  			}
  		} else {
  			conf->mddev->resync_mismatches += STRIPE_SECTORS;
  			if (test_bit(MD_RECOVERY_CHECK, &conf->mddev->recovery))
  				/* don't try to repair!! */
  				set_bit(STRIPE_INSYNC, &sh->state);
  			else {
  				int *target = &sh->ops.target;
  
  				sh->ops.target = -1;
  				sh->ops.target2 = -1;
  				sh->check_state = check_state_compute_run;
  				set_bit(STRIPE_COMPUTE_RUN, &sh->state);
  				set_bit(STRIPE_OP_COMPUTE_BLK, &s->ops_request);
  				if (sh->ops.zero_sum_result & SUM_CHECK_P_RESULT) {
  					set_bit(R5_Wantcompute,
  						&sh->dev[pd_idx].flags);
  					*target = pd_idx;
  					target = &sh->ops.target2;
  					s->uptodate++;
  				}
  				if (sh->ops.zero_sum_result & SUM_CHECK_Q_RESULT) {
  					set_bit(R5_Wantcompute,
  						&sh->dev[qd_idx].flags);
  					*target = qd_idx;
  					s->uptodate++;
  				}
  			}
  		}
  		break;
  	case check_state_compute_run:
  		break;
  	default:
  		printk(KERN_ERR "%s: unknown check_state: %d sector: %llu
  ",
  		       __func__, sh->check_state,
  		       (unsigned long long) sh->sector);
  		BUG();
a44568564   Dan Williams   raid5: refactor h...
2841
2842
  	}
  }
d1688a6d5   NeilBrown   md/raid5: typedef...
2843
  static void handle_stripe_expansion(struct r5conf *conf, struct stripe_head *sh)
a44568564   Dan Williams   raid5: refactor h...
2844
2845
2846
2847
2848
2849
  {
  	int i;
  
  	/* We have read all the blocks in this stripe and now we need to
  	 * copy some of them into a target stripe for expand.
  	 */
f0a50d375   Dan Williams   md: handle_stripe...
2850
  	struct dma_async_tx_descriptor *tx = NULL;
a44568564   Dan Williams   raid5: refactor h...
2851
2852
  	clear_bit(STRIPE_EXPAND_SOURCE, &sh->state);
  	for (i = 0; i < sh->disks; i++)
34e04e87f   NeilBrown   md/raid5: drop qd...
2853
  		if (i != sh->pd_idx && i != sh->qd_idx) {
911d4ee85   NeilBrown   md/raid5: simplif...
2854
  			int dd_idx, j;
a44568564   Dan Williams   raid5: refactor h...
2855
  			struct stripe_head *sh2;
a08abd8ca   Dan Williams   async_tx: structi...
2856
  			struct async_submit_ctl submit;
a44568564   Dan Williams   raid5: refactor h...
2857

784052ecc   NeilBrown   md/raid5: prepare...
2858
  			sector_t bn = compute_blocknr(sh, i, 1);
911d4ee85   NeilBrown   md/raid5: simplif...
2859
2860
  			sector_t s = raid5_compute_sector(conf, bn, 0,
  							  &dd_idx, NULL);
a8c906ca3   NeilBrown   md/raid5 - avoid ...
2861
  			sh2 = get_active_stripe(conf, s, 0, 1, 1);
a44568564   Dan Williams   raid5: refactor h...
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
2873
  			if (sh2 == NULL)
  				/* so far only the early blocks of this stripe
  				 * have been requested.  When later blocks
  				 * get requested, we will try again
  				 */
  				continue;
  			if (!test_bit(STRIPE_EXPANDING, &sh2->state) ||
  			   test_bit(R5_Expanded, &sh2->dev[dd_idx].flags)) {
  				/* must have already done this block */
  				release_stripe(sh2);
  				continue;
  			}
f0a50d375   Dan Williams   md: handle_stripe...
2874
2875
  
  			/* place all the copies on one channel */
a08abd8ca   Dan Williams   async_tx: structi...
2876
  			init_async_submit(&submit, 0, tx, NULL, NULL, NULL);
f0a50d375   Dan Williams   md: handle_stripe...
2877
  			tx = async_memcpy(sh2->dev[dd_idx].page,
88ba2aa58   Dan Williams   async_tx: kill AS...
2878
  					  sh->dev[i].page, 0, 0, STRIPE_SIZE,
a08abd8ca   Dan Williams   async_tx: structi...
2879
  					  &submit);
f0a50d375   Dan Williams   md: handle_stripe...
2880

a44568564   Dan Williams   raid5: refactor h...
2881
2882
2883
2884
  			set_bit(R5_Expanded, &sh2->dev[dd_idx].flags);
  			set_bit(R5_UPTODATE, &sh2->dev[dd_idx].flags);
  			for (j = 0; j < conf->raid_disks; j++)
  				if (j != sh2->pd_idx &&
86c374ba9   NeilBrown   md/raid5: Remove ...
2885
  				    j != sh2->qd_idx &&
a44568564   Dan Williams   raid5: refactor h...
2886
2887
2888
2889
2890
2891
2892
  				    !test_bit(R5_Expanded, &sh2->dev[j].flags))
  					break;
  			if (j == conf->raid_disks) {
  				set_bit(STRIPE_EXPAND_READY, &sh2->state);
  				set_bit(STRIPE_HANDLE, &sh2->state);
  			}
  			release_stripe(sh2);
f0a50d375   Dan Williams   md: handle_stripe...
2893

a44568564   Dan Williams   raid5: refactor h...
2894
  		}
a2e085518   NeilBrown   md: fix some bugs...
2895
2896
2897
2898
2899
  	/* done submitting copies, wait for them to complete */
  	if (tx) {
  		async_tx_ack(tx);
  		dma_wait_for_async_tx(tx);
  	}
a44568564   Dan Williams   raid5: refactor h...
2900
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2901

6bfe0b499   Dan Williams   md: support block...
2902

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
  /*
   * handle_stripe - do things to a stripe.
   *
   * We lock the stripe and then examine the state of various bits
   * to see what needs to be done.
   * Possible results:
   *    return some read request which now have data
   *    return some write requests which are safely on disc
   *    schedule a read on some buffers
   *    schedule a write of some buffers
   *    return confirmation of parity correctness
   *
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2915
2916
2917
2918
   * buffers are taken off read_list or write_list, and bh_cache buffers
   * get BH_Lock set before the stripe lock is released.
   *
   */
a44568564   Dan Williams   raid5: refactor h...
2919

acfe726bd   NeilBrown   md/raid5: finalis...
2920
  static void analyse_stripe(struct stripe_head *sh, struct stripe_head_state *s)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2921
  {
d1688a6d5   NeilBrown   md/raid5: typedef...
2922
  	struct r5conf *conf = sh->raid_conf;
f416885ef   NeilBrown   [PATCH] md: add s...
2923
  	int disks = sh->disks;
474af965f   NeilBrown   md/raid5: move so...
2924
2925
  	struct r5dev *dev;
  	int i;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2926

acfe726bd   NeilBrown   md/raid5: finalis...
2927
2928
2929
2930
2931
2932
2933
  	memset(s, 0, sizeof(*s));
  
  	s->syncing = test_bit(STRIPE_SYNCING, &sh->state);
  	s->expanding = test_bit(STRIPE_EXPAND_SOURCE, &sh->state);
  	s->expanded = test_bit(STRIPE_EXPAND_READY, &sh->state);
  	s->failed_num[0] = -1;
  	s->failed_num[1] = -1;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2934

acfe726bd   NeilBrown   md/raid5: finalis...
2935
  	/* Now to look around and see what can be done */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2936
  	rcu_read_lock();
c4c1663be   NeilBrown   md/raid5: replace...
2937
  	spin_lock_irq(&conf->device_lock);
16a53ecc3   NeilBrown   [PATCH] md: merge...
2938
  	for (i=disks; i--; ) {
3cb030020   NeilBrown   md: removing type...
2939
  		struct md_rdev *rdev;
31c176ecd   NeilBrown   md/raid5: avoid r...
2940
2941
2942
  		sector_t first_bad;
  		int bad_sectors;
  		int is_bad = 0;
acfe726bd   NeilBrown   md/raid5: finalis...
2943

16a53ecc3   NeilBrown   [PATCH] md: merge...
2944
  		dev = &sh->dev[i];
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2945

45b4233ca   Dan Williams   raid5: replace cu...
2946
2947
  		pr_debug("check %d: state 0x%lx read %p write %p written %p
  ",
16a53ecc3   NeilBrown   [PATCH] md: merge...
2948
  			i, dev->flags, dev->toread, dev->towrite, dev->written);
6c0069c0a   Yuri Tikhonov   md/raid6: asynchr...
2949
2950
2951
2952
2953
2954
2955
2956
  		/* maybe we can reply to a read
  		 *
  		 * new wantfill requests are only permitted while
  		 * ops_complete_biofill is guaranteed to be inactive
  		 */
  		if (test_bit(R5_UPTODATE, &dev->flags) && dev->toread &&
  		    !test_bit(STRIPE_BIOFILL_RUN, &sh->state))
  			set_bit(R5_Wantfill, &dev->flags);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2957

16a53ecc3   NeilBrown   [PATCH] md: merge...
2958
  		/* now count some things */
cc94015a9   NeilBrown   md/raid5: move st...
2959
2960
2961
2962
  		if (test_bit(R5_LOCKED, &dev->flags))
  			s->locked++;
  		if (test_bit(R5_UPTODATE, &dev->flags))
  			s->uptodate++;
2d6e4ecc8   Dan Williams   md/raid6: elimina...
2963
  		if (test_bit(R5_Wantcompute, &dev->flags)) {
cc94015a9   NeilBrown   md/raid5: move st...
2964
2965
  			s->compute++;
  			BUG_ON(s->compute > 2);
2d6e4ecc8   Dan Williams   md/raid6: elimina...
2966
  		}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2967

acfe726bd   NeilBrown   md/raid5: finalis...
2968
  		if (test_bit(R5_Wantfill, &dev->flags))
cc94015a9   NeilBrown   md/raid5: move st...
2969
  			s->to_fill++;
acfe726bd   NeilBrown   md/raid5: finalis...
2970
  		else if (dev->toread)
cc94015a9   NeilBrown   md/raid5: move st...
2971
  			s->to_read++;
16a53ecc3   NeilBrown   [PATCH] md: merge...
2972
  		if (dev->towrite) {
cc94015a9   NeilBrown   md/raid5: move st...
2973
  			s->to_write++;
16a53ecc3   NeilBrown   [PATCH] md: merge...
2974
  			if (!test_bit(R5_OVERWRITE, &dev->flags))
cc94015a9   NeilBrown   md/raid5: move st...
2975
  				s->non_overwrite++;
16a53ecc3   NeilBrown   [PATCH] md: merge...
2976
  		}
a44568564   Dan Williams   raid5: refactor h...
2977
  		if (dev->written)
cc94015a9   NeilBrown   md/raid5: move st...
2978
  			s->written++;
16a53ecc3   NeilBrown   [PATCH] md: merge...
2979
  		rdev = rcu_dereference(conf->disks[i].rdev);
9283d8c5a   NeilBrown   md/raid5: never w...
2980
2981
  		if (rdev && test_bit(Faulty, &rdev->flags))
  			rdev = NULL;
31c176ecd   NeilBrown   md/raid5: avoid r...
2982
2983
2984
2985
2986
2987
2988
2989
2990
2991
2992
2993
  		if (rdev) {
  			is_bad = is_badblock(rdev, sh->sector, STRIPE_SECTORS,
  					     &first_bad, &bad_sectors);
  			if (s->blocked_rdev == NULL
  			    && (test_bit(Blocked, &rdev->flags)
  				|| is_bad < 0)) {
  				if (is_bad < 0)
  					set_bit(BlockedBadBlocks,
  						&rdev->flags);
  				s->blocked_rdev = rdev;
  				atomic_inc(&rdev->nr_pending);
  			}
6bfe0b499   Dan Williams   md: support block...
2994
  		}
415e72d03   NeilBrown   md/raid5: Allow r...
2995
2996
2997
  		clear_bit(R5_Insync, &dev->flags);
  		if (!rdev)
  			/* Not in-sync */;
31c176ecd   NeilBrown   md/raid5: avoid r...
2998
2999
3000
3001
3002
3003
3004
3005
3006
3007
  		else if (is_bad) {
  			/* also not in-sync */
  			if (!test_bit(WriteErrorSeen, &rdev->flags)) {
  				/* treat as in-sync, but with a read error
  				 * which we can now try to correct
  				 */
  				set_bit(R5_Insync, &dev->flags);
  				set_bit(R5_ReadError, &dev->flags);
  			}
  		} else if (test_bit(In_sync, &rdev->flags))
415e72d03   NeilBrown   md/raid5: Allow r...
3008
  			set_bit(R5_Insync, &dev->flags);
30d7a4836   NeilBrown   md/raid5: ensure ...
3009
  		else if (sh->sector + STRIPE_SECTORS <= rdev->recovery_offset)
415e72d03   NeilBrown   md/raid5: Allow r...
3010
  			/* in sync if before recovery_offset */
30d7a4836   NeilBrown   md/raid5: ensure ...
3011
3012
3013
3014
3015
3016
3017
3018
  			set_bit(R5_Insync, &dev->flags);
  		else if (test_bit(R5_UPTODATE, &dev->flags) &&
  			 test_bit(R5_Expanded, &dev->flags))
  			/* If we've reshaped into here, we assume it is Insync.
  			 * We will shortly update recovery_offset to make
  			 * it official.
  			 */
  			set_bit(R5_Insync, &dev->flags);
5d8c71f9e   Adam Kwolek   md: raid5 crash d...
3019
  		if (rdev && test_bit(R5_WriteError, &dev->flags)) {
bc2607f39   NeilBrown   md/raid5: write e...
3020
3021
3022
3023
3024
3025
3026
  			clear_bit(R5_Insync, &dev->flags);
  			if (!test_bit(Faulty, &rdev->flags)) {
  				s->handle_bad_blocks = 1;
  				atomic_inc(&rdev->nr_pending);
  			} else
  				clear_bit(R5_WriteError, &dev->flags);
  		}
5d8c71f9e   Adam Kwolek   md: raid5 crash d...
3027
  		if (rdev && test_bit(R5_MadeGood, &dev->flags)) {
b84db560e   NeilBrown   md/raid5: Clear b...
3028
3029
3030
3031
3032
3033
  			if (!test_bit(Faulty, &rdev->flags)) {
  				s->handle_bad_blocks = 1;
  				atomic_inc(&rdev->nr_pending);
  			} else
  				clear_bit(R5_MadeGood, &dev->flags);
  		}
415e72d03   NeilBrown   md/raid5: Allow r...
3034
  		if (!test_bit(R5_Insync, &dev->flags)) {
16a53ecc3   NeilBrown   [PATCH] md: merge...
3035
3036
3037
  			/* The ReadError flag will just be confusing now */
  			clear_bit(R5_ReadError, &dev->flags);
  			clear_bit(R5_ReWrite, &dev->flags);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3038
  		}
415e72d03   NeilBrown   md/raid5: Allow r...
3039
3040
3041
  		if (test_bit(R5_ReadError, &dev->flags))
  			clear_bit(R5_Insync, &dev->flags);
  		if (!test_bit(R5_Insync, &dev->flags)) {
cc94015a9   NeilBrown   md/raid5: move st...
3042
3043
3044
  			if (s->failed < 2)
  				s->failed_num[s->failed] = i;
  			s->failed++;
415e72d03   NeilBrown   md/raid5: Allow r...
3045
  		}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3046
  	}
c4c1663be   NeilBrown   md/raid5: replace...
3047
  	spin_unlock_irq(&conf->device_lock);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3048
  	rcu_read_unlock();
cc94015a9   NeilBrown   md/raid5: move st...
3049
3050
3051
3052
3053
  }
  
  static void handle_stripe(struct stripe_head *sh)
  {
  	struct stripe_head_state s;
d1688a6d5   NeilBrown   md/raid5: typedef...
3054
  	struct r5conf *conf = sh->raid_conf;
3687c0618   NeilBrown   md/raid5: Move co...
3055
  	int i;
84789554e   NeilBrown   md/raid5: move mo...
3056
3057
  	int prexor;
  	int disks = sh->disks;
474af965f   NeilBrown   md/raid5: move so...
3058
  	struct r5dev *pdev, *qdev;
cc94015a9   NeilBrown   md/raid5: move st...
3059
3060
  
  	clear_bit(STRIPE_HANDLE, &sh->state);
257a4b42a   Dan Williams   md/raid5: STRIPE_...
3061
  	if (test_and_set_bit_lock(STRIPE_ACTIVE, &sh->state)) {
cc94015a9   NeilBrown   md/raid5: move st...
3062
3063
3064
3065
3066
3067
3068
3069
3070
3071
3072
3073
3074
3075
3076
3077
3078
3079
3080
  		/* already being handled, ensure it gets handled
  		 * again when current action finishes */
  		set_bit(STRIPE_HANDLE, &sh->state);
  		return;
  	}
  
  	if (test_and_clear_bit(STRIPE_SYNC_REQUESTED, &sh->state)) {
  		set_bit(STRIPE_SYNCING, &sh->state);
  		clear_bit(STRIPE_INSYNC, &sh->state);
  	}
  	clear_bit(STRIPE_DELAYED, &sh->state);
  
  	pr_debug("handling stripe %llu, state=%#lx cnt=%d, "
  		"pd_idx=%d, qd_idx=%d
  , check:%d, reconstruct:%d
  ",
  	       (unsigned long long)sh->sector, sh->state,
  	       atomic_read(&sh->count), sh->pd_idx, sh->qd_idx,
  	       sh->check_state, sh->reconstruct_state);
3687c0618   NeilBrown   md/raid5: Move co...
3081

acfe726bd   NeilBrown   md/raid5: finalis...
3082
  	analyse_stripe(sh, &s);
c5a310006   NeilBrown   md/raid5: move mo...
3083

bc2607f39   NeilBrown   md/raid5: write e...
3084
3085
3086
3087
  	if (s.handle_bad_blocks) {
  		set_bit(STRIPE_HANDLE, &sh->state);
  		goto finish;
  	}
474af965f   NeilBrown   md/raid5: move so...
3088
3089
3090
3091
3092
3093
3094
3095
3096
3097
3098
3099
3100
3101
3102
3103
3104
3105
3106
3107
3108
3109
3110
3111
  	if (unlikely(s.blocked_rdev)) {
  		if (s.syncing || s.expanding || s.expanded ||
  		    s.to_write || s.written) {
  			set_bit(STRIPE_HANDLE, &sh->state);
  			goto finish;
  		}
  		/* There is nothing for the blocked_rdev to block */
  		rdev_dec_pending(s.blocked_rdev, conf->mddev);
  		s.blocked_rdev = NULL;
  	}
  
  	if (s.to_fill && !test_bit(STRIPE_BIOFILL_RUN, &sh->state)) {
  		set_bit(STRIPE_OP_BIOFILL, &s.ops_request);
  		set_bit(STRIPE_BIOFILL_RUN, &sh->state);
  	}
  
  	pr_debug("locked=%d uptodate=%d to_read=%d"
  	       " to_write=%d failed=%d failed_num=%d,%d
  ",
  	       s.locked, s.uptodate, s.to_read, s.to_write, s.failed,
  	       s.failed_num[0], s.failed_num[1]);
  	/* check if the array has lost more than max_degraded devices and,
  	 * if so, some requests might need to be failed.
  	 */
9a3f530f3   NeilBrown   md/raid5: abort a...
3112
3113
3114
3115
3116
3117
3118
3119
  	if (s.failed > conf->max_degraded) {
  		sh->check_state = 0;
  		sh->reconstruct_state = 0;
  		if (s.to_read+s.to_write+s.written)
  			handle_failed_stripe(conf, sh, &s, disks, &s.return_bi);
  		if (s.syncing)
  			handle_failed_sync(conf, sh, &s);
  	}
474af965f   NeilBrown   md/raid5: move so...
3120
3121
3122
3123
3124
3125
3126
3127
3128
3129
3130
3131
3132
3133
3134
3135
3136
3137
3138
3139
3140
3141
3142
3143
3144
3145
3146
3147
3148
3149
  
  	/*
  	 * might be able to return some write requests if the parity blocks
  	 * are safe, or on a failed drive
  	 */
  	pdev = &sh->dev[sh->pd_idx];
  	s.p_failed = (s.failed >= 1 && s.failed_num[0] == sh->pd_idx)
  		|| (s.failed >= 2 && s.failed_num[1] == sh->pd_idx);
  	qdev = &sh->dev[sh->qd_idx];
  	s.q_failed = (s.failed >= 1 && s.failed_num[0] == sh->qd_idx)
  		|| (s.failed >= 2 && s.failed_num[1] == sh->qd_idx)
  		|| conf->level < 6;
  
  	if (s.written &&
  	    (s.p_failed || ((test_bit(R5_Insync, &pdev->flags)
  			     && !test_bit(R5_LOCKED, &pdev->flags)
  			     && test_bit(R5_UPTODATE, &pdev->flags)))) &&
  	    (s.q_failed || ((test_bit(R5_Insync, &qdev->flags)
  			     && !test_bit(R5_LOCKED, &qdev->flags)
  			     && test_bit(R5_UPTODATE, &qdev->flags)))))
  		handle_stripe_clean_event(conf, sh, disks, &s.return_bi);
  
  	/* Now we might consider reading some blocks, either to check/generate
  	 * parity, or to satisfy requests
  	 * or to load a block that is being partially written.
  	 */
  	if (s.to_read || s.non_overwrite
  	    || (conf->level == 6 && s.to_write && s.failed)
  	    || (s.syncing && (s.uptodate + s.compute < disks)) || s.expanding)
  		handle_stripe_fill(sh, &s, disks);
84789554e   NeilBrown   md/raid5: move mo...
3150
3151
3152
3153
3154
3155
3156
3157
3158
3159
3160
3161
3162
3163
3164
3165
3166
3167
3168
3169
3170
3171
3172
3173
3174
3175
3176
3177
3178
3179
3180
3181
3182
3183
3184
3185
3186
3187
3188
3189
3190
3191
3192
3193
3194
3195
3196
3197
3198
3199
3200
3201
3202
3203
3204
3205
3206
3207
3208
  	/* Now we check to see if any write operations have recently
  	 * completed
  	 */
  	prexor = 0;
  	if (sh->reconstruct_state == reconstruct_state_prexor_drain_result)
  		prexor = 1;
  	if (sh->reconstruct_state == reconstruct_state_drain_result ||
  	    sh->reconstruct_state == reconstruct_state_prexor_drain_result) {
  		sh->reconstruct_state = reconstruct_state_idle;
  
  		/* All the 'written' buffers and the parity block are ready to
  		 * be written back to disk
  		 */
  		BUG_ON(!test_bit(R5_UPTODATE, &sh->dev[sh->pd_idx].flags));
  		BUG_ON(sh->qd_idx >= 0 &&
  		       !test_bit(R5_UPTODATE, &sh->dev[sh->qd_idx].flags));
  		for (i = disks; i--; ) {
  			struct r5dev *dev = &sh->dev[i];
  			if (test_bit(R5_LOCKED, &dev->flags) &&
  				(i == sh->pd_idx || i == sh->qd_idx ||
  				 dev->written)) {
  				pr_debug("Writing block %d
  ", i);
  				set_bit(R5_Wantwrite, &dev->flags);
  				if (prexor)
  					continue;
  				if (!test_bit(R5_Insync, &dev->flags) ||
  				    ((i == sh->pd_idx || i == sh->qd_idx)  &&
  				     s.failed == 0))
  					set_bit(STRIPE_INSYNC, &sh->state);
  			}
  		}
  		if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state))
  			s.dec_preread_active = 1;
  	}
  
  	/* Now to consider new write requests and what else, if anything
  	 * should be read.  We do not handle new writes when:
  	 * 1/ A 'write' operation (copy+xor) is already in flight.
  	 * 2/ A 'check' operation is in flight, as it may clobber the parity
  	 *    block.
  	 */
  	if (s.to_write && !sh->reconstruct_state && !sh->check_state)
  		handle_stripe_dirtying(conf, sh, &s, disks);
  
  	/* maybe we need to check and possibly fix the parity for this stripe
  	 * Any reads will already have been scheduled, so we just see if enough
  	 * data is available.  The parity check is held off while parity
  	 * dependent operations are in flight.
  	 */
  	if (sh->check_state ||
  	    (s.syncing && s.locked == 0 &&
  	     !test_bit(STRIPE_COMPUTE_RUN, &sh->state) &&
  	     !test_bit(STRIPE_INSYNC, &sh->state))) {
  		if (conf->level == 6)
  			handle_parity_checks6(conf, sh, &s, disks);
  		else
  			handle_parity_checks5(conf, sh, &s, disks);
  	}
c5a310006   NeilBrown   md/raid5: move mo...
3209
3210
3211
3212
3213
3214
3215
3216
3217
3218
3219
3220
3221
3222
3223
3224
3225
3226
3227
3228
3229
3230
3231
3232
3233
3234
3235
3236
3237
  
  	if (s.syncing && s.locked == 0 && test_bit(STRIPE_INSYNC, &sh->state)) {
  		md_done_sync(conf->mddev, STRIPE_SECTORS, 1);
  		clear_bit(STRIPE_SYNCING, &sh->state);
  	}
  
  	/* If the failed drives are just a ReadError, then we might need
  	 * to progress the repair/check process
  	 */
  	if (s.failed <= conf->max_degraded && !conf->mddev->ro)
  		for (i = 0; i < s.failed; i++) {
  			struct r5dev *dev = &sh->dev[s.failed_num[i]];
  			if (test_bit(R5_ReadError, &dev->flags)
  			    && !test_bit(R5_LOCKED, &dev->flags)
  			    && test_bit(R5_UPTODATE, &dev->flags)
  				) {
  				if (!test_bit(R5_ReWrite, &dev->flags)) {
  					set_bit(R5_Wantwrite, &dev->flags);
  					set_bit(R5_ReWrite, &dev->flags);
  					set_bit(R5_LOCKED, &dev->flags);
  					s.locked++;
  				} else {
  					/* let's read it back */
  					set_bit(R5_Wantread, &dev->flags);
  					set_bit(R5_LOCKED, &dev->flags);
  					s.locked++;
  				}
  			}
  		}
3687c0618   NeilBrown   md/raid5: Move co...
3238
3239
3240
3241
3242
3243
3244
3245
3246
3247
3248
3249
3250
3251
3252
3253
3254
3255
3256
3257
3258
3259
3260
3261
3262
3263
3264
  	/* Finish reconstruct operations initiated by the expansion process */
  	if (sh->reconstruct_state == reconstruct_state_result) {
  		struct stripe_head *sh_src
  			= get_active_stripe(conf, sh->sector, 1, 1, 1);
  		if (sh_src && test_bit(STRIPE_EXPAND_SOURCE, &sh_src->state)) {
  			/* sh cannot be written until sh_src has been read.
  			 * so arrange for sh to be delayed a little
  			 */
  			set_bit(STRIPE_DELAYED, &sh->state);
  			set_bit(STRIPE_HANDLE, &sh->state);
  			if (!test_and_set_bit(STRIPE_PREREAD_ACTIVE,
  					      &sh_src->state))
  				atomic_inc(&conf->preread_active_stripes);
  			release_stripe(sh_src);
  			goto finish;
  		}
  		if (sh_src)
  			release_stripe(sh_src);
  
  		sh->reconstruct_state = reconstruct_state_idle;
  		clear_bit(STRIPE_EXPANDING, &sh->state);
  		for (i = conf->raid_disks; i--; ) {
  			set_bit(R5_Wantwrite, &sh->dev[i].flags);
  			set_bit(R5_LOCKED, &sh->dev[i].flags);
  			s.locked++;
  		}
  	}
f416885ef   NeilBrown   [PATCH] md: add s...
3265

3687c0618   NeilBrown   md/raid5: Move co...
3266
3267
3268
3269
3270
3271
3272
3273
3274
3275
3276
3277
3278
3279
3280
3281
  	if (s.expanded && test_bit(STRIPE_EXPANDING, &sh->state) &&
  	    !sh->reconstruct_state) {
  		/* Need to write out all blocks after computing parity */
  		sh->disks = conf->raid_disks;
  		stripe_set_idx(sh->sector, conf, 0, sh);
  		schedule_reconstruction(sh, &s, 1, 1);
  	} else if (s.expanded && !sh->reconstruct_state && s.locked == 0) {
  		clear_bit(STRIPE_EXPAND_READY, &sh->state);
  		atomic_dec(&conf->reshape_stripes);
  		wake_up(&conf->wait_for_overlap);
  		md_done_sync(conf->mddev, STRIPE_SECTORS, 1);
  	}
  
  	if (s.expanding && s.locked == 0 &&
  	    !test_bit(STRIPE_COMPUTE_RUN, &sh->state))
  		handle_stripe_expansion(conf, sh);
16a53ecc3   NeilBrown   [PATCH] md: merge...
3282

3687c0618   NeilBrown   md/raid5: Move co...
3283
  finish:
6bfe0b499   Dan Williams   md: support block...
3284
  	/* wait for this device to become unblocked */
43220aa0f   NeilBrown   md/raid5: fix a h...
3285
  	if (conf->mddev->external && unlikely(s.blocked_rdev))
c5709ef6a   NeilBrown   md/raid5: add so...
3286
  		md_wait_for_blocked_rdev(s.blocked_rdev, conf->mddev);
6bfe0b499   Dan Williams   md: support block...
3287

bc2607f39   NeilBrown   md/raid5: write e...
3288
3289
  	if (s.handle_bad_blocks)
  		for (i = disks; i--; ) {
3cb030020   NeilBrown   md: removing type...
3290
  			struct md_rdev *rdev;
bc2607f39   NeilBrown   md/raid5: write e...
3291
3292
3293
3294
3295
3296
3297
3298
3299
  			struct r5dev *dev = &sh->dev[i];
  			if (test_and_clear_bit(R5_WriteError, &dev->flags)) {
  				/* We own a safe reference to the rdev */
  				rdev = conf->disks[i].rdev;
  				if (!rdev_set_badblocks(rdev, sh->sector,
  							STRIPE_SECTORS, 0))
  					md_error(conf->mddev, rdev);
  				rdev_dec_pending(rdev, conf->mddev);
  			}
b84db560e   NeilBrown   md/raid5: Clear b...
3300
3301
3302
3303
3304
3305
  			if (test_and_clear_bit(R5_MadeGood, &dev->flags)) {
  				rdev = conf->disks[i].rdev;
  				rdev_clear_badblocks(rdev, sh->sector,
  						     STRIPE_SECTORS);
  				rdev_dec_pending(rdev, conf->mddev);
  			}
bc2607f39   NeilBrown   md/raid5: write e...
3306
  		}
6c0069c0a   Yuri Tikhonov   md/raid6: asynchr...
3307
3308
  	if (s.ops_request)
  		raid_run_ops(sh, s.ops_request);
f0e43bcde   Dan Williams   md: unify raid5/6...
3309
  	ops_run_io(sh, &s);
16a53ecc3   NeilBrown   [PATCH] md: merge...
3310

c5709ef6a   NeilBrown   md/raid5: add so...
3311
  	if (s.dec_preread_active) {
729a18663   NeilBrown   md/raid5: don't c...
3312
  		/* We delay this until after ops_run_io so that if make_request
e9c7469bb   Tejun Heo   md: implment REQ_...
3313
  		 * is waiting on a flush, it won't continue until the writes
729a18663   NeilBrown   md/raid5: don't c...
3314
3315
3316
3317
3318
3319
3320
  		 * have actually been submitted.
  		 */
  		atomic_dec(&conf->preread_active_stripes);
  		if (atomic_read(&conf->preread_active_stripes) <
  		    IO_THRESHOLD)
  			md_wakeup_thread(conf->mddev->thread);
  	}
c5709ef6a   NeilBrown   md/raid5: add so...
3321
  	return_io(s.return_bi);
16a53ecc3   NeilBrown   [PATCH] md: merge...
3322

257a4b42a   Dan Williams   md/raid5: STRIPE_...
3323
  	clear_bit_unlock(STRIPE_ACTIVE, &sh->state);
16a53ecc3   NeilBrown   [PATCH] md: merge...
3324
  }
d1688a6d5   NeilBrown   md/raid5: typedef...
3325
  static void raid5_activate_delayed(struct r5conf *conf)
16a53ecc3   NeilBrown   [PATCH] md: merge...
3326
3327
3328
3329
3330
3331
3332
3333
3334
3335
  {
  	if (atomic_read(&conf->preread_active_stripes) < IO_THRESHOLD) {
  		while (!list_empty(&conf->delayed_list)) {
  			struct list_head *l = conf->delayed_list.next;
  			struct stripe_head *sh;
  			sh = list_entry(l, struct stripe_head, lru);
  			list_del_init(l);
  			clear_bit(STRIPE_DELAYED, &sh->state);
  			if (!test_and_set_bit(STRIPE_PREREAD_ACTIVE, &sh->state))
  				atomic_inc(&conf->preread_active_stripes);
8b3e6cdc5   Dan Williams   md: introduce get...
3336
  			list_add_tail(&sh->lru, &conf->hold_list);
16a53ecc3   NeilBrown   [PATCH] md: merge...
3337
  		}
482c08349   NeilBrown   md - remove old p...
3338
  	}
16a53ecc3   NeilBrown   [PATCH] md: merge...
3339
  }
d1688a6d5   NeilBrown   md/raid5: typedef...
3340
  static void activate_bit_delay(struct r5conf *conf)
16a53ecc3   NeilBrown   [PATCH] md: merge...
3341
3342
3343
3344
3345
3346
3347
3348
3349
3350
3351
3352
  {
  	/* device_lock is held */
  	struct list_head head;
  	list_add(&head, &conf->bitmap_list);
  	list_del_init(&conf->bitmap_list);
  	while (!list_empty(&head)) {
  		struct stripe_head *sh = list_entry(head.next, struct stripe_head, lru);
  		list_del_init(&sh->lru);
  		atomic_inc(&sh->count);
  		__release_stripe(conf, sh);
  	}
  }
fd01b88c7   NeilBrown   md: remove typede...
3353
  int md_raid5_congested(struct mddev *mddev, int bits)
f022b2fdd   NeilBrown   [PATCH] md: add a...
3354
  {
d1688a6d5   NeilBrown   md/raid5: typedef...
3355
  	struct r5conf *conf = mddev->private;
f022b2fdd   NeilBrown   [PATCH] md: add a...
3356
3357
3358
3359
  
  	/* No difference between reads and writes.  Just check
  	 * how busy the stripe_cache is
  	 */
3fa841d7e   NeilBrown   md: report device...
3360

f022b2fdd   NeilBrown   [PATCH] md: add a...
3361
3362
3363
3364
3365
3366
3367
3368
3369
  	if (conf->inactive_blocked)
  		return 1;
  	if (conf->quiesce)
  		return 1;
  	if (list_empty_careful(&conf->inactive_list))
  		return 1;
  
  	return 0;
  }
11d8a6e37   NeilBrown   md/raid5: export ...
3370
3371
3372
3373
  EXPORT_SYMBOL_GPL(md_raid5_congested);
  
  static int raid5_congested(void *data, int bits)
  {
fd01b88c7   NeilBrown   md: remove typede...
3374
  	struct mddev *mddev = data;
11d8a6e37   NeilBrown   md/raid5: export ...
3375
3376
3377
3378
  
  	return mddev_congested(mddev, bits) ||
  		md_raid5_congested(mddev, bits);
  }
f022b2fdd   NeilBrown   [PATCH] md: add a...
3379

23032a0eb   Raz Ben-Jehuda(caro)   [PATCH] md: defin...
3380
3381
3382
  /* We want read requests to align with chunks where possible,
   * but write requests don't need to.
   */
cc371e66e   Alasdair G Kergon   Add bvec_merge_da...
3383
3384
3385
  static int raid5_mergeable_bvec(struct request_queue *q,
  				struct bvec_merge_data *bvm,
  				struct bio_vec *biovec)
23032a0eb   Raz Ben-Jehuda(caro)   [PATCH] md: defin...
3386
  {
fd01b88c7   NeilBrown   md: remove typede...
3387
  	struct mddev *mddev = q->queuedata;
cc371e66e   Alasdair G Kergon   Add bvec_merge_da...
3388
  	sector_t sector = bvm->bi_sector + get_start_sect(bvm->bi_bdev);
23032a0eb   Raz Ben-Jehuda(caro)   [PATCH] md: defin...
3389
  	int max;
9d8f03636   Andre Noll   md: Make mddev->c...
3390
  	unsigned int chunk_sectors = mddev->chunk_sectors;
cc371e66e   Alasdair G Kergon   Add bvec_merge_da...
3391
  	unsigned int bio_sectors = bvm->bi_size >> 9;
23032a0eb   Raz Ben-Jehuda(caro)   [PATCH] md: defin...
3392

cc371e66e   Alasdair G Kergon   Add bvec_merge_da...
3393
  	if ((bvm->bi_rw & 1) == WRITE)
23032a0eb   Raz Ben-Jehuda(caro)   [PATCH] md: defin...
3394
  		return biovec->bv_len; /* always allow writes to be mergeable */
664e7c413   Andre Noll   md: Convert mddev...
3395
3396
  	if (mddev->new_chunk_sectors < mddev->chunk_sectors)
  		chunk_sectors = mddev->new_chunk_sectors;
23032a0eb   Raz Ben-Jehuda(caro)   [PATCH] md: defin...
3397
3398
3399
3400
3401
3402
3403
  	max =  (chunk_sectors - ((sector & (chunk_sectors - 1)) + bio_sectors)) << 9;
  	if (max < 0) max = 0;
  	if (max <= biovec->bv_len && bio_sectors == 0)
  		return biovec->bv_len;
  	else
  		return max;
  }
f679623f5   Raz Ben-Jehuda(caro)   [PATCH] md: handl...
3404

fd01b88c7   NeilBrown   md: remove typede...
3405
  static int in_chunk_boundary(struct mddev *mddev, struct bio *bio)
f679623f5   Raz Ben-Jehuda(caro)   [PATCH] md: handl...
3406
3407
  {
  	sector_t sector = bio->bi_sector + get_start_sect(bio->bi_bdev);
9d8f03636   Andre Noll   md: Make mddev->c...
3408
  	unsigned int chunk_sectors = mddev->chunk_sectors;
f679623f5   Raz Ben-Jehuda(caro)   [PATCH] md: handl...
3409
  	unsigned int bio_sectors = bio->bi_size >> 9;
664e7c413   Andre Noll   md: Convert mddev...
3410
3411
  	if (mddev->new_chunk_sectors < mddev->chunk_sectors)
  		chunk_sectors = mddev->new_chunk_sectors;
f679623f5   Raz Ben-Jehuda(caro)   [PATCH] md: handl...
3412
3413
3414
3415
3416
  	return  chunk_sectors >=
  		((sector & (chunk_sectors - 1)) + bio_sectors);
  }
  
  /*
46031f9a3   Raz Ben-Jehuda(caro)   [PATCH] md: allow...
3417
3418
3419
   *  add bio to the retry LIFO  ( in O(1) ... we are in interrupt )
   *  later sampled by raid5d.
   */
d1688a6d5   NeilBrown   md/raid5: typedef...
3420
  static void add_bio_to_retry(struct bio *bi,struct r5conf *conf)
46031f9a3   Raz Ben-Jehuda(caro)   [PATCH] md: allow...
3421
3422
3423
3424
3425
3426
3427
3428
3429
3430
3431
  {
  	unsigned long flags;
  
  	spin_lock_irqsave(&conf->device_lock, flags);
  
  	bi->bi_next = conf->retry_read_aligned_list;
  	conf->retry_read_aligned_list = bi;
  
  	spin_unlock_irqrestore(&conf->device_lock, flags);
  	md_wakeup_thread(conf->mddev->thread);
  }
d1688a6d5   NeilBrown   md/raid5: typedef...
3432
  static struct bio *remove_bio_from_retry(struct r5conf *conf)
46031f9a3   Raz Ben-Jehuda(caro)   [PATCH] md: allow...
3433
3434
3435
3436
3437
3438
3439
3440
3441
3442
  {
  	struct bio *bi;
  
  	bi = conf->retry_read_aligned;
  	if (bi) {
  		conf->retry_read_aligned = NULL;
  		return bi;
  	}
  	bi = conf->retry_read_aligned_list;
  	if(bi) {
387bb1737   Neil Brown   [PATCH] md: fix v...
3443
  		conf->retry_read_aligned_list = bi->bi_next;
46031f9a3   Raz Ben-Jehuda(caro)   [PATCH] md: allow...
3444
  		bi->bi_next = NULL;
960e739d9   Jens Axboe   block: raid fixup...
3445
3446
3447
3448
  		/*
  		 * this sets the active strip count to 1 and the processed
  		 * strip count to zero (upper 8 bits)
  		 */
46031f9a3   Raz Ben-Jehuda(caro)   [PATCH] md: allow...
3449
  		bi->bi_phys_segments = 1; /* biased count of active stripes */
46031f9a3   Raz Ben-Jehuda(caro)   [PATCH] md: allow...
3450
3451
3452
3453
3454
3455
3456
  	}
  
  	return bi;
  }
  
  
  /*
f679623f5   Raz Ben-Jehuda(caro)   [PATCH] md: handl...
3457
3458
3459
3460
3461
   *  The "raid5_align_endio" should check if the read succeeded and if it
   *  did, call bio_endio on the original bio (having bio_put the new bio
   *  first).
   *  If the read failed..
   */
6712ecf8f   NeilBrown   Drop 'size' argum...
3462
  static void raid5_align_endio(struct bio *bi, int error)
f679623f5   Raz Ben-Jehuda(caro)   [PATCH] md: handl...
3463
3464
  {
  	struct bio* raid_bi  = bi->bi_private;
fd01b88c7   NeilBrown   md: remove typede...
3465
  	struct mddev *mddev;
d1688a6d5   NeilBrown   md/raid5: typedef...
3466
  	struct r5conf *conf;
46031f9a3   Raz Ben-Jehuda(caro)   [PATCH] md: allow...
3467
  	int uptodate = test_bit(BIO_UPTODATE, &bi->bi_flags);
3cb030020   NeilBrown   md: removing type...
3468
  	struct md_rdev *rdev;
46031f9a3   Raz Ben-Jehuda(caro)   [PATCH] md: allow...
3469

f679623f5   Raz Ben-Jehuda(caro)   [PATCH] md: handl...
3470
  	bio_put(bi);
46031f9a3   Raz Ben-Jehuda(caro)   [PATCH] md: allow...
3471

46031f9a3   Raz Ben-Jehuda(caro)   [PATCH] md: allow...
3472
3473
  	rdev = (void*)raid_bi->bi_next;
  	raid_bi->bi_next = NULL;
2b7f22284   NeilBrown   md/raid5: small t...
3474
3475
  	mddev = rdev->mddev;
  	conf = mddev->private;
46031f9a3   Raz Ben-Jehuda(caro)   [PATCH] md: allow...
3476
3477
3478
3479
  
  	rdev_dec_pending(rdev, conf->mddev);
  
  	if (!error && uptodate) {
6712ecf8f   NeilBrown   Drop 'size' argum...
3480
  		bio_endio(raid_bi, 0);
46031f9a3   Raz Ben-Jehuda(caro)   [PATCH] md: allow...
3481
3482
  		if (atomic_dec_and_test(&conf->active_aligned_reads))
  			wake_up(&conf->wait_for_stripe);
6712ecf8f   NeilBrown   Drop 'size' argum...
3483
  		return;
46031f9a3   Raz Ben-Jehuda(caro)   [PATCH] md: allow...
3484
  	}
45b4233ca   Dan Williams   raid5: replace cu...
3485
3486
  	pr_debug("raid5_align_endio : io error...handing IO for a retry
  ");
46031f9a3   Raz Ben-Jehuda(caro)   [PATCH] md: allow...
3487
3488
  
  	add_bio_to_retry(raid_bi, conf);
f679623f5   Raz Ben-Jehuda(caro)   [PATCH] md: handl...
3489
  }
387bb1737   Neil Brown   [PATCH] md: fix v...
3490
3491
  static int bio_fits_rdev(struct bio *bi)
  {
165125e1e   Jens Axboe   [BLOCK] Get rid o...
3492
  	struct request_queue *q = bdev_get_queue(bi->bi_bdev);
387bb1737   Neil Brown   [PATCH] md: fix v...
3493

ae03bf639   Martin K. Petersen   block: Use access...
3494
  	if ((bi->bi_size>>9) > queue_max_sectors(q))
387bb1737   Neil Brown   [PATCH] md: fix v...
3495
3496
  		return 0;
  	blk_recount_segments(q, bi);
8a78362c4   Martin K. Petersen   block: Consolidat...
3497
  	if (bi->bi_phys_segments > queue_max_segments(q))
387bb1737   Neil Brown   [PATCH] md: fix v...
3498
3499
3500
3501
3502
3503
3504
3505
3506
3507
  		return 0;
  
  	if (q->merge_bvec_fn)
  		/* it's too hard to apply the merge_bvec_fn at this stage,
  		 * just just give up
  		 */
  		return 0;
  
  	return 1;
  }
fd01b88c7   NeilBrown   md: remove typede...
3508
  static int chunk_aligned_read(struct mddev *mddev, struct bio * raid_bio)
f679623f5   Raz Ben-Jehuda(caro)   [PATCH] md: handl...
3509
  {
d1688a6d5   NeilBrown   md/raid5: typedef...
3510
  	struct r5conf *conf = mddev->private;
8553fe7ec   NeilBrown   md/raid5: remove ...
3511
  	int dd_idx;
f679623f5   Raz Ben-Jehuda(caro)   [PATCH] md: handl...
3512
  	struct bio* align_bi;
3cb030020   NeilBrown   md: removing type...
3513
  	struct md_rdev *rdev;
f679623f5   Raz Ben-Jehuda(caro)   [PATCH] md: handl...
3514
3515
  
  	if (!in_chunk_boundary(mddev, raid_bio)) {
45b4233ca   Dan Williams   raid5: replace cu...
3516
3517
  		pr_debug("chunk_aligned_read : non aligned
  ");
f679623f5   Raz Ben-Jehuda(caro)   [PATCH] md: handl...
3518
3519
3520
  		return 0;
  	}
  	/*
a167f6632   NeilBrown   md: use separate ...
3521
  	 * use bio_clone_mddev to make a copy of the bio
f679623f5   Raz Ben-Jehuda(caro)   [PATCH] md: handl...
3522
  	 */
a167f6632   NeilBrown   md: use separate ...
3523
  	align_bi = bio_clone_mddev(raid_bio, GFP_NOIO, mddev);
f679623f5   Raz Ben-Jehuda(caro)   [PATCH] md: handl...
3524
3525
3526
3527
3528
3529
3530
3531
3532
3533
3534
  	if (!align_bi)
  		return 0;
  	/*
  	 *   set bi_end_io to a new function, and set bi_private to the
  	 *     original bio.
  	 */
  	align_bi->bi_end_io  = raid5_align_endio;
  	align_bi->bi_private = raid_bio;
  	/*
  	 *	compute position
  	 */
112bf8970   NeilBrown   md/raid5: change ...
3535
3536
  	align_bi->bi_sector =  raid5_compute_sector(conf, raid_bio->bi_sector,
  						    0,
911d4ee85   NeilBrown   md/raid5: simplif...
3537
  						    &dd_idx, NULL);
f679623f5   Raz Ben-Jehuda(caro)   [PATCH] md: handl...
3538
3539
3540
3541
  
  	rcu_read_lock();
  	rdev = rcu_dereference(conf->disks[dd_idx].rdev);
  	if (rdev && test_bit(In_sync, &rdev->flags)) {
31c176ecd   NeilBrown   md/raid5: avoid r...
3542
3543
  		sector_t first_bad;
  		int bad_sectors;
f679623f5   Raz Ben-Jehuda(caro)   [PATCH] md: handl...
3544
3545
  		atomic_inc(&rdev->nr_pending);
  		rcu_read_unlock();
46031f9a3   Raz Ben-Jehuda(caro)   [PATCH] md: allow...
3546
3547
3548
3549
  		raid_bio->bi_next = (void*)rdev;
  		align_bi->bi_bdev =  rdev->bdev;
  		align_bi->bi_flags &= ~(1 << BIO_SEG_VALID);
  		align_bi->bi_sector += rdev->data_offset;
31c176ecd   NeilBrown   md/raid5: avoid r...
3550
3551
3552
3553
  		if (!bio_fits_rdev(align_bi) ||
  		    is_badblock(rdev, align_bi->bi_sector, align_bi->bi_size>>9,
  				&first_bad, &bad_sectors)) {
  			/* too big in some way, or has a known bad block */
387bb1737   Neil Brown   [PATCH] md: fix v...
3554
3555
3556
3557
  			bio_put(align_bi);
  			rdev_dec_pending(rdev, mddev);
  			return 0;
  		}
46031f9a3   Raz Ben-Jehuda(caro)   [PATCH] md: allow...
3558
3559
3560
3561
3562
3563
  		spin_lock_irq(&conf->device_lock);
  		wait_event_lock_irq(conf->wait_for_stripe,
  				    conf->quiesce == 0,
  				    conf->device_lock, /* nothing */);
  		atomic_inc(&conf->active_aligned_reads);
  		spin_unlock_irq(&conf->device_lock);
f679623f5   Raz Ben-Jehuda(caro)   [PATCH] md: handl...
3564
3565
3566
3567
  		generic_make_request(align_bi);
  		return 1;
  	} else {
  		rcu_read_unlock();
46031f9a3   Raz Ben-Jehuda(caro)   [PATCH] md: allow...
3568
  		bio_put(align_bi);
f679623f5   Raz Ben-Jehuda(caro)   [PATCH] md: handl...
3569
3570
3571
  		return 0;
  	}
  }
8b3e6cdc5   Dan Williams   md: introduce get...
3572
3573
3574
3575
3576
3577
3578
3579
3580
3581
  /* __get_priority_stripe - get the next stripe to process
   *
   * Full stripe writes are allowed to pass preread active stripes up until
   * the bypass_threshold is exceeded.  In general the bypass_count
   * increments when the handle_list is handled before the hold_list; however, it
   * will not be incremented when STRIPE_IO_STARTED is sampled set signifying a
   * stripe with in flight i/o.  The bypass_count will be reset when the
   * head of the hold_list has changed, i.e. the head was promoted to the
   * handle_list.
   */
d1688a6d5   NeilBrown   md/raid5: typedef...
3582
  static struct stripe_head *__get_priority_stripe(struct r5conf *conf)
8b3e6cdc5   Dan Williams   md: introduce get...
3583
3584
3585
3586
3587
3588
3589
3590
3591
3592
3593
3594
3595
3596
3597
3598
3599
3600
3601
3602
3603
3604
3605
3606
3607
3608
3609
3610
3611
3612
3613
3614
3615
3616
3617
3618
3619
3620
3621
3622
3623
3624
  {
  	struct stripe_head *sh;
  
  	pr_debug("%s: handle: %s hold: %s full_writes: %d bypass_count: %d
  ",
  		  __func__,
  		  list_empty(&conf->handle_list) ? "empty" : "busy",
  		  list_empty(&conf->hold_list) ? "empty" : "busy",
  		  atomic_read(&conf->pending_full_writes), conf->bypass_count);
  
  	if (!list_empty(&conf->handle_list)) {
  		sh = list_entry(conf->handle_list.next, typeof(*sh), lru);
  
  		if (list_empty(&conf->hold_list))
  			conf->bypass_count = 0;
  		else if (!test_bit(STRIPE_IO_STARTED, &sh->state)) {
  			if (conf->hold_list.next == conf->last_hold)
  				conf->bypass_count++;
  			else {
  				conf->last_hold = conf->hold_list.next;
  				conf->bypass_count -= conf->bypass_threshold;
  				if (conf->bypass_count < 0)
  					conf->bypass_count = 0;
  			}
  		}
  	} else if (!list_empty(&conf->hold_list) &&
  		   ((conf->bypass_threshold &&
  		     conf->bypass_count > conf->bypass_threshold) ||
  		    atomic_read(&conf->pending_full_writes) == 0)) {
  		sh = list_entry(conf->hold_list.next,
  				typeof(*sh), lru);
  		conf->bypass_count -= conf->bypass_threshold;
  		if (conf->bypass_count < 0)
  			conf->bypass_count = 0;
  	} else
  		return NULL;
  
  	list_del_init(&sh->lru);
  	atomic_inc(&sh->count);
  	BUG_ON(atomic_read(&sh->count) != 1);
  	return sh;
  }
f679623f5   Raz Ben-Jehuda(caro)   [PATCH] md: handl...
3625

b4fdcb02f   Linus Torvalds   Merge branch 'for...
3626
  static void make_request(struct mddev *mddev, struct bio * bi)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3627
  {
d1688a6d5   NeilBrown   md/raid5: typedef...
3628
  	struct r5conf *conf = mddev->private;
911d4ee85   NeilBrown   md/raid5: simplif...
3629
  	int dd_idx;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3630
3631
3632
  	sector_t new_sector;
  	sector_t logical_sector, last_sector;
  	struct stripe_head *sh;
a362357b6   Jens Axboe   [BLOCK] Unify the...
3633
  	const int rw = bio_data_dir(bi);
490773268   NeilBrown   md: move io accou...
3634
  	int remaining;
7c13edc87   NeilBrown   md: incorporate n...
3635
  	int plugged;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3636

e9c7469bb   Tejun Heo   md: implment REQ_...
3637
3638
  	if (unlikely(bi->bi_rw & REQ_FLUSH)) {
  		md_flush_request(mddev, bi);
5a7bbad27   Christoph Hellwig   block: remove sup...
3639
  		return;
e5dcdd80a   NeilBrown   [PATCH] md: fail ...
3640
  	}
3d310eb7b   NeilBrown   [PATCH] md: fix d...
3641
  	md_write_start(mddev, bi);
06d91a5fe   NeilBrown   [PATCH] md: impro...
3642

802ba064c   NeilBrown   [PATCH] md: Don't...
3643
  	if (rw == READ &&
524886151   Raz Ben-Jehuda(caro)   [PATCH] md: enabl...
3644
  	     mddev->reshape_position == MaxSector &&
21a52c6d0   NeilBrown   md: pass mddev to...
3645
  	     chunk_aligned_read(mddev,bi))
5a7bbad27   Christoph Hellwig   block: remove sup...
3646
  		return;
524886151   Raz Ben-Jehuda(caro)   [PATCH] md: enabl...
3647

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3648
3649
3650
3651
  	logical_sector = bi->bi_sector & ~((sector_t)STRIPE_SECTORS-1);
  	last_sector = bi->bi_sector + (bi->bi_size>>9);
  	bi->bi_next = NULL;
  	bi->bi_phys_segments = 1;	/* over-loaded to count active stripes */
06d91a5fe   NeilBrown   [PATCH] md: impro...
3652

7c13edc87   NeilBrown   md: incorporate n...
3653
  	plugged = mddev_check_plugged(mddev);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3654
3655
  	for (;logical_sector < last_sector; logical_sector += STRIPE_SECTORS) {
  		DEFINE_WAIT(w);
16a53ecc3   NeilBrown   [PATCH] md: merge...
3656
  		int disks, data_disks;
b5663ba40   NeilBrown   md/raid5: simplif...
3657
  		int previous;
b578d55fd   NeilBrown   [PATCH] md: Only ...
3658

7ecaa1e6a   NeilBrown   [PATCH] md: Infra...
3659
  	retry:
b5663ba40   NeilBrown   md/raid5: simplif...
3660
  		previous = 0;
b0f9ec047   NeilBrown   md/raid5: minor c...
3661
  		disks = conf->raid_disks;
b578d55fd   NeilBrown   [PATCH] md: Only ...
3662
  		prepare_to_wait(&conf->wait_for_overlap, &w, TASK_UNINTERRUPTIBLE);
b0f9ec047   NeilBrown   md/raid5: minor c...
3663
  		if (unlikely(conf->reshape_progress != MaxSector)) {
fef9c61fd   NeilBrown   md/raid5: change ...
3664
  			/* spinlock is needed as reshape_progress may be
df8e7f763   NeilBrown   [PATCH] md: Impro...
3665
3666
  			 * 64bit on a 32bit platform, and so it might be
  			 * possible to see a half-updated value
aeb878b09   Jesper Juhl   md, raid5: Fix sp...
3667
  			 * Of course reshape_progress could change after
df8e7f763   NeilBrown   [PATCH] md: Impro...
3668
3669
3670
3671
  			 * the lock is dropped, so once we get a reference
  			 * to the stripe that we think it is, we will have
  			 * to check again.
  			 */
7ecaa1e6a   NeilBrown   [PATCH] md: Infra...
3672
  			spin_lock_irq(&conf->device_lock);
fef9c61fd   NeilBrown   md/raid5: change ...
3673
3674
3675
  			if (mddev->delta_disks < 0
  			    ? logical_sector < conf->reshape_progress
  			    : logical_sector >= conf->reshape_progress) {
7ecaa1e6a   NeilBrown   [PATCH] md: Infra...
3676
  				disks = conf->previous_raid_disks;
b5663ba40   NeilBrown   md/raid5: simplif...
3677
3678
  				previous = 1;
  			} else {
fef9c61fd   NeilBrown   md/raid5: change ...
3679
3680
3681
  				if (mddev->delta_disks < 0
  				    ? logical_sector < conf->reshape_safe
  				    : logical_sector >= conf->reshape_safe) {
b578d55fd   NeilBrown   [PATCH] md: Only ...
3682
3683
3684
3685
3686
  					spin_unlock_irq(&conf->device_lock);
  					schedule();
  					goto retry;
  				}
  			}
7ecaa1e6a   NeilBrown   [PATCH] md: Infra...
3687
3688
  			spin_unlock_irq(&conf->device_lock);
  		}
16a53ecc3   NeilBrown   [PATCH] md: merge...
3689
  		data_disks = disks - conf->max_degraded;
112bf8970   NeilBrown   md/raid5: change ...
3690
3691
  		new_sector = raid5_compute_sector(conf, logical_sector,
  						  previous,
911d4ee85   NeilBrown   md/raid5: simplif...
3692
  						  &dd_idx, NULL);
0c55e0225   NeilBrown   md/raid5: improve...
3693
3694
  		pr_debug("raid456: make_request, sector %llu logical %llu
  ",
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3695
3696
  			(unsigned long long)new_sector, 
  			(unsigned long long)logical_sector);
b5663ba40   NeilBrown   md/raid5: simplif...
3697
  		sh = get_active_stripe(conf, new_sector, previous,
a8c906ca3   NeilBrown   md/raid5 - avoid ...
3698
  				       (bi->bi_rw&RWA_MASK), 0);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3699
  		if (sh) {
b0f9ec047   NeilBrown   md/raid5: minor c...
3700
  			if (unlikely(previous)) {
7ecaa1e6a   NeilBrown   [PATCH] md: Infra...
3701
  				/* expansion might have moved on while waiting for a
df8e7f763   NeilBrown   [PATCH] md: Impro...
3702
3703
3704
3705
3706
3707
  				 * stripe, so we must do the range check again.
  				 * Expansion could still move past after this
  				 * test, but as we are holding a reference to
  				 * 'sh', we know that if that happens,
  				 *  STRIPE_EXPANDING will get set and the expansion
  				 * won't proceed until we finish with the stripe.
7ecaa1e6a   NeilBrown   [PATCH] md: Infra...
3708
3709
3710
  				 */
  				int must_retry = 0;
  				spin_lock_irq(&conf->device_lock);
b0f9ec047   NeilBrown   md/raid5: minor c...
3711
3712
3713
  				if (mddev->delta_disks < 0
  				    ? logical_sector >= conf->reshape_progress
  				    : logical_sector < conf->reshape_progress)
7ecaa1e6a   NeilBrown   [PATCH] md: Infra...
3714
3715
3716
3717
3718
  					/* mismatch, need to try again */
  					must_retry = 1;
  				spin_unlock_irq(&conf->device_lock);
  				if (must_retry) {
  					release_stripe(sh);
7a3ab9089   Dan Williams   md/raid5: add mis...
3719
  					schedule();
7ecaa1e6a   NeilBrown   [PATCH] md: Infra...
3720
3721
3722
  					goto retry;
  				}
  			}
e62e58a5f   NeilBrown   md: use interrupt...
3723

ffd96e35c   Namhyung Kim   md/raid5: get rid...
3724
  			if (rw == WRITE &&
a5c308d4d   NeilBrown   md/raid5: suspend...
3725
  			    logical_sector >= mddev->suspend_lo &&
e464eafdb   NeilBrown   [PATCH] md: Suppo...
3726
3727
  			    logical_sector < mddev->suspend_hi) {
  				release_stripe(sh);
e62e58a5f   NeilBrown   md: use interrupt...
3728
3729
3730
3731
3732
3733
3734
3735
3736
3737
  				/* As the suspend_* range is controlled by
  				 * userspace, we want an interruptible
  				 * wait.
  				 */
  				flush_signals(current);
  				prepare_to_wait(&conf->wait_for_overlap,
  						&w, TASK_INTERRUPTIBLE);
  				if (logical_sector >= mddev->suspend_lo &&
  				    logical_sector < mddev->suspend_hi)
  					schedule();
e464eafdb   NeilBrown   [PATCH] md: Suppo...
3738
3739
  				goto retry;
  			}
7ecaa1e6a   NeilBrown   [PATCH] md: Infra...
3740
3741
  
  			if (test_bit(STRIPE_EXPANDING, &sh->state) ||
ffd96e35c   Namhyung Kim   md/raid5: get rid...
3742
  			    !add_stripe_bio(sh, bi, dd_idx, rw)) {
7ecaa1e6a   NeilBrown   [PATCH] md: Infra...
3743
3744
  				/* Stripe is busy expanding or
  				 * add failed due to overlap.  Flush everything
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3745
3746
  				 * and wait a while
  				 */
482c08349   NeilBrown   md - remove old p...
3747
  				md_wakeup_thread(mddev->thread);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3748
3749
3750
3751
3752
  				release_stripe(sh);
  				schedule();
  				goto retry;
  			}
  			finish_wait(&conf->wait_for_overlap, &w);
6ed3003c1   NeilBrown   md: fix an occasi...
3753
3754
  			set_bit(STRIPE_HANDLE, &sh->state);
  			clear_bit(STRIPE_DELAYED, &sh->state);
e9c7469bb   Tejun Heo   md: implment REQ_...
3755
  			if ((bi->bi_rw & REQ_SYNC) &&
729a18663   NeilBrown   md/raid5: don't c...
3756
3757
  			    !test_and_set_bit(STRIPE_PREREAD_ACTIVE, &sh->state))
  				atomic_inc(&conf->preread_active_stripes);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3758
  			release_stripe(sh);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3759
3760
3761
3762
3763
3764
3765
3766
  		} else {
  			/* cannot get stripe for read-ahead, just give-up */
  			clear_bit(BIO_UPTODATE, &bi->bi_flags);
  			finish_wait(&conf->wait_for_overlap, &w);
  			break;
  		}
  			
  	}
7c13edc87   NeilBrown   md: incorporate n...
3767
3768
  	if (!plugged)
  		md_wakeup_thread(mddev->thread);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3769
  	spin_lock_irq(&conf->device_lock);
960e739d9   Jens Axboe   block: raid fixup...
3770
  	remaining = raid5_dec_bi_phys_segments(bi);
f6344757a   NeilBrown   [PATCH] md: Remov...
3771
3772
  	spin_unlock_irq(&conf->device_lock);
  	if (remaining == 0) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3773

16a53ecc3   NeilBrown   [PATCH] md: merge...
3774
  		if ( rw == WRITE )
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3775
  			md_write_end(mddev);
6712ecf8f   NeilBrown   Drop 'size' argum...
3776

0e13fe23a   Neil Brown   use bio_endio ins...
3777
  		bio_endio(bi, 0);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3778
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3779
  }
fd01b88c7   NeilBrown   md: remove typede...
3780
  static sector_t raid5_size(struct mddev *mddev, sector_t sectors, int raid_disks);
b522adcde   Dan Williams   md: 'array_size' ...
3781

fd01b88c7   NeilBrown   md: remove typede...
3782
  static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr, int *skipped)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3783
  {
52c03291a   NeilBrown   [PATCH] md: split...
3784
3785
3786
3787
3788
3789
3790
3791
3792
  	/* reshaping is quite different to recovery/resync so it is
  	 * handled quite separately ... here.
  	 *
  	 * On each call to sync_request, we gather one chunk worth of
  	 * destination stripes and flag them as expanding.
  	 * Then we find all the source stripes and request reads.
  	 * As the reads complete, handle_stripe will copy the data
  	 * into the destination stripe and release that stripe.
  	 */
d1688a6d5   NeilBrown   md/raid5: typedef...
3793
  	struct r5conf *conf = mddev->private;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3794
  	struct stripe_head *sh;
ccfcc3c10   NeilBrown   [PATCH] md: Core ...
3795
  	sector_t first_sector, last_sector;
f416885ef   NeilBrown   [PATCH] md: add s...
3796
3797
3798
  	int raid_disks = conf->previous_raid_disks;
  	int data_disks = raid_disks - conf->max_degraded;
  	int new_data_disks = conf->raid_disks - conf->max_degraded;
52c03291a   NeilBrown   [PATCH] md: split...
3799
3800
  	int i;
  	int dd_idx;
c8f517c44   NeilBrown   md/raid5 revise r...
3801
  	sector_t writepos, readpos, safepos;
ec32a2bd3   NeilBrown   md: allow number ...
3802
  	sector_t stripe_addr;
7a6613810   NeilBrown   md/raid5: reshape...
3803
  	int reshape_sectors;
ab69ae12c   NeilBrown   md/raid5: be more...
3804
  	struct list_head stripes;
52c03291a   NeilBrown   [PATCH] md: split...
3805

fef9c61fd   NeilBrown   md/raid5: change ...
3806
3807
3808
3809
3810
3811
  	if (sector_nr == 0) {
  		/* If restarting in the middle, skip the initial sectors */
  		if (mddev->delta_disks < 0 &&
  		    conf->reshape_progress < raid5_size(mddev, 0, 0)) {
  			sector_nr = raid5_size(mddev, 0, 0)
  				- conf->reshape_progress;
a639755cf   NeilBrown   md/raid5: make su...
3812
  		} else if (mddev->delta_disks >= 0 &&
fef9c61fd   NeilBrown   md/raid5: change ...
3813
3814
  			   conf->reshape_progress > 0)
  			sector_nr = conf->reshape_progress;
f416885ef   NeilBrown   [PATCH] md: add s...
3815
  		sector_div(sector_nr, new_data_disks);
fef9c61fd   NeilBrown   md/raid5: change ...
3816
  		if (sector_nr) {
8dee72114   NeilBrown   md/raid5: make su...
3817
3818
  			mddev->curr_resync_completed = sector_nr;
  			sysfs_notify(&mddev->kobj, NULL, "sync_completed");
fef9c61fd   NeilBrown   md/raid5: change ...
3819
3820
3821
  			*skipped = 1;
  			return sector_nr;
  		}
52c03291a   NeilBrown   [PATCH] md: split...
3822
  	}
7a6613810   NeilBrown   md/raid5: reshape...
3823
3824
3825
3826
  	/* We need to process a full chunk at a time.
  	 * If old and new chunk sizes differ, we need to process the
  	 * largest of these
  	 */
664e7c413   Andre Noll   md: Convert mddev...
3827
3828
  	if (mddev->new_chunk_sectors > mddev->chunk_sectors)
  		reshape_sectors = mddev->new_chunk_sectors;
7a6613810   NeilBrown   md/raid5: reshape...
3829
  	else
9d8f03636   Andre Noll   md: Make mddev->c...
3830
  		reshape_sectors = mddev->chunk_sectors;
7a6613810   NeilBrown   md/raid5: reshape...
3831

52c03291a   NeilBrown   [PATCH] md: split...
3832
3833
3834
3835
3836
  	/* we update the metadata when there is more than 3Meg
  	 * in the block range (that is rather arbitrary, should
  	 * probably be time based) or when the data about to be
  	 * copied would over-write the source of the data at
  	 * the front of the range.
fef9c61fd   NeilBrown   md/raid5: change ...
3837
3838
  	 * i.e. one new_stripe along from reshape_progress new_maps
  	 * to after where reshape_safe old_maps to
52c03291a   NeilBrown   [PATCH] md: split...
3839
  	 */
fef9c61fd   NeilBrown   md/raid5: change ...
3840
  	writepos = conf->reshape_progress;
f416885ef   NeilBrown   [PATCH] md: add s...
3841
  	sector_div(writepos, new_data_disks);
c8f517c44   NeilBrown   md/raid5 revise r...
3842
3843
  	readpos = conf->reshape_progress;
  	sector_div(readpos, data_disks);
fef9c61fd   NeilBrown   md/raid5: change ...
3844
  	safepos = conf->reshape_safe;
f416885ef   NeilBrown   [PATCH] md: add s...
3845
  	sector_div(safepos, data_disks);
fef9c61fd   NeilBrown   md/raid5: change ...
3846
  	if (mddev->delta_disks < 0) {
ed37d83e6   NeilBrown   md: raid5: change...
3847
  		writepos -= min_t(sector_t, reshape_sectors, writepos);
c8f517c44   NeilBrown   md/raid5 revise r...
3848
  		readpos += reshape_sectors;
7a6613810   NeilBrown   md/raid5: reshape...
3849
  		safepos += reshape_sectors;
fef9c61fd   NeilBrown   md/raid5: change ...
3850
  	} else {
7a6613810   NeilBrown   md/raid5: reshape...
3851
  		writepos += reshape_sectors;
ed37d83e6   NeilBrown   md: raid5: change...
3852
3853
  		readpos -= min_t(sector_t, reshape_sectors, readpos);
  		safepos -= min_t(sector_t, reshape_sectors, safepos);
fef9c61fd   NeilBrown   md/raid5: change ...
3854
  	}
52c03291a   NeilBrown   [PATCH] md: split...
3855

c8f517c44   NeilBrown   md/raid5 revise r...
3856
3857
3858
3859
3860
3861
3862
3863
3864
3865
3866
3867
3868
3869
3870
3871
3872
  	/* 'writepos' is the most advanced device address we might write.
  	 * 'readpos' is the least advanced device address we might read.
  	 * 'safepos' is the least address recorded in the metadata as having
  	 *     been reshaped.
  	 * If 'readpos' is behind 'writepos', then there is no way that we can
  	 * ensure safety in the face of a crash - that must be done by userspace
  	 * making a backup of the data.  So in that case there is no particular
  	 * rush to update metadata.
  	 * Otherwise if 'safepos' is behind 'writepos', then we really need to
  	 * update the metadata to advance 'safepos' to match 'readpos' so that
  	 * we can be safe in the event of a crash.
  	 * So we insist on updating metadata if safepos is behind writepos and
  	 * readpos is beyond writepos.
  	 * In any case, update the metadata every 10 seconds.
  	 * Maybe that number should be configurable, but I'm not sure it is
  	 * worth it.... maybe it could be a multiple of safemode_delay???
  	 */
fef9c61fd   NeilBrown   md/raid5: change ...
3873
  	if ((mddev->delta_disks < 0
c8f517c44   NeilBrown   md/raid5 revise r...
3874
3875
3876
  	     ? (safepos > writepos && readpos < writepos)
  	     : (safepos < writepos && readpos > writepos)) ||
  	    time_after(jiffies, conf->reshape_checkpoint + 10*HZ)) {
52c03291a   NeilBrown   [PATCH] md: split...
3877
3878
3879
  		/* Cannot proceed until we've updated the superblock... */
  		wait_event(conf->wait_for_overlap,
  			   atomic_read(&conf->reshape_stripes)==0);
fef9c61fd   NeilBrown   md/raid5: change ...
3880
  		mddev->reshape_position = conf->reshape_progress;
75d3da43c   NeilBrown   md: Don't let imp...
3881
  		mddev->curr_resync_completed = sector_nr;
c8f517c44   NeilBrown   md/raid5 revise r...
3882
  		conf->reshape_checkpoint = jiffies;
850b2b420   NeilBrown   [PATCH] md: repla...
3883
  		set_bit(MD_CHANGE_DEVS, &mddev->flags);
52c03291a   NeilBrown   [PATCH] md: split...
3884
  		md_wakeup_thread(mddev->thread);
850b2b420   NeilBrown   [PATCH] md: repla...
3885
  		wait_event(mddev->sb_wait, mddev->flags == 0 ||
52c03291a   NeilBrown   [PATCH] md: split...
3886
3887
  			   kthread_should_stop());
  		spin_lock_irq(&conf->device_lock);
fef9c61fd   NeilBrown   md/raid5: change ...
3888
  		conf->reshape_safe = mddev->reshape_position;
52c03291a   NeilBrown   [PATCH] md: split...
3889
3890
  		spin_unlock_irq(&conf->device_lock);
  		wake_up(&conf->wait_for_overlap);
acb180b0e   NeilBrown   md: improve usefu...
3891
  		sysfs_notify(&mddev->kobj, NULL, "sync_completed");
52c03291a   NeilBrown   [PATCH] md: split...
3892
  	}
ec32a2bd3   NeilBrown   md: allow number ...
3893
3894
3895
3896
  	if (mddev->delta_disks < 0) {
  		BUG_ON(conf->reshape_progress == 0);
  		stripe_addr = writepos;
  		BUG_ON((mddev->dev_sectors &
7a6613810   NeilBrown   md/raid5: reshape...
3897
3898
  			~((sector_t)reshape_sectors - 1))
  		       - reshape_sectors - stripe_addr
ec32a2bd3   NeilBrown   md: allow number ...
3899
3900
  		       != sector_nr);
  	} else {
7a6613810   NeilBrown   md/raid5: reshape...
3901
  		BUG_ON(writepos != sector_nr + reshape_sectors);
ec32a2bd3   NeilBrown   md: allow number ...
3902
3903
  		stripe_addr = sector_nr;
  	}
ab69ae12c   NeilBrown   md/raid5: be more...
3904
  	INIT_LIST_HEAD(&stripes);
7a6613810   NeilBrown   md/raid5: reshape...
3905
  	for (i = 0; i < reshape_sectors; i += STRIPE_SECTORS) {
52c03291a   NeilBrown   [PATCH] md: split...
3906
  		int j;
a9f326ebf   NeilBrown   md: remove sparse...
3907
  		int skipped_disk = 0;
a8c906ca3   NeilBrown   md/raid5 - avoid ...
3908
  		sh = get_active_stripe(conf, stripe_addr+i, 0, 0, 1);
52c03291a   NeilBrown   [PATCH] md: split...
3909
3910
3911
3912
3913
3914
3915
3916
3917
  		set_bit(STRIPE_EXPANDING, &sh->state);
  		atomic_inc(&conf->reshape_stripes);
  		/* If any of this stripe is beyond the end of the old
  		 * array, then we need to zero those blocks
  		 */
  		for (j=sh->disks; j--;) {
  			sector_t s;
  			if (j == sh->pd_idx)
  				continue;
f416885ef   NeilBrown   [PATCH] md: add s...
3918
  			if (conf->level == 6 &&
d0dabf7e5   NeilBrown   md/raid6: remove ...
3919
  			    j == sh->qd_idx)
f416885ef   NeilBrown   [PATCH] md: add s...
3920
  				continue;
784052ecc   NeilBrown   md/raid5: prepare...
3921
  			s = compute_blocknr(sh, j, 0);
b522adcde   Dan Williams   md: 'array_size' ...
3922
  			if (s < raid5_size(mddev, 0, 0)) {
a9f326ebf   NeilBrown   md: remove sparse...
3923
  				skipped_disk = 1;
52c03291a   NeilBrown   [PATCH] md: split...
3924
3925
3926
3927
3928
3929
  				continue;
  			}
  			memset(page_address(sh->dev[j].page), 0, STRIPE_SIZE);
  			set_bit(R5_Expanded, &sh->dev[j].flags);
  			set_bit(R5_UPTODATE, &sh->dev[j].flags);
  		}
a9f326ebf   NeilBrown   md: remove sparse...
3930
  		if (!skipped_disk) {
52c03291a   NeilBrown   [PATCH] md: split...
3931
3932
3933
  			set_bit(STRIPE_EXPAND_READY, &sh->state);
  			set_bit(STRIPE_HANDLE, &sh->state);
  		}
ab69ae12c   NeilBrown   md/raid5: be more...
3934
  		list_add(&sh->lru, &stripes);
52c03291a   NeilBrown   [PATCH] md: split...
3935
3936
  	}
  	spin_lock_irq(&conf->device_lock);
fef9c61fd   NeilBrown   md/raid5: change ...
3937
  	if (mddev->delta_disks < 0)
7a6613810   NeilBrown   md/raid5: reshape...
3938
  		conf->reshape_progress -= reshape_sectors * new_data_disks;
fef9c61fd   NeilBrown   md/raid5: change ...
3939
  	else
7a6613810   NeilBrown   md/raid5: reshape...
3940
  		conf->reshape_progress += reshape_sectors * new_data_disks;
52c03291a   NeilBrown   [PATCH] md: split...
3941
3942
3943
3944
3945
3946
  	spin_unlock_irq(&conf->device_lock);
  	/* Ok, those stripe are ready. We can start scheduling
  	 * reads on the source stripes.
  	 * The source stripes are determined by mapping the first and last
  	 * block on the destination stripes.
  	 */
52c03291a   NeilBrown   [PATCH] md: split...
3947
  	first_sector =
ec32a2bd3   NeilBrown   md: allow number ...
3948
  		raid5_compute_sector(conf, stripe_addr*(new_data_disks),
911d4ee85   NeilBrown   md/raid5: simplif...
3949
  				     1, &dd_idx, NULL);
52c03291a   NeilBrown   [PATCH] md: split...
3950
  	last_sector =
0e6e0271a   NeilBrown   md/raid5: fix bug...
3951
  		raid5_compute_sector(conf, ((stripe_addr+reshape_sectors)
09c9e5fa1   Andre Noll   md: convert conf-...
3952
  					    * new_data_disks - 1),
911d4ee85   NeilBrown   md/raid5: simplif...
3953
  				     1, &dd_idx, NULL);
58c0fed40   Andre Noll   md: Make mddev->s...
3954
3955
  	if (last_sector >= mddev->dev_sectors)
  		last_sector = mddev->dev_sectors - 1;
52c03291a   NeilBrown   [PATCH] md: split...
3956
  	while (first_sector <= last_sector) {
a8c906ca3   NeilBrown   md/raid5 - avoid ...
3957
  		sh = get_active_stripe(conf, first_sector, 1, 0, 1);
52c03291a   NeilBrown   [PATCH] md: split...
3958
3959
3960
3961
3962
  		set_bit(STRIPE_EXPAND_SOURCE, &sh->state);
  		set_bit(STRIPE_HANDLE, &sh->state);
  		release_stripe(sh);
  		first_sector += STRIPE_SECTORS;
  	}
ab69ae12c   NeilBrown   md/raid5: be more...
3963
3964
3965
3966
3967
3968
3969
3970
  	/* Now that the sources are clearly marked, we can release
  	 * the destination stripes
  	 */
  	while (!list_empty(&stripes)) {
  		sh = list_entry(stripes.next, struct stripe_head, lru);
  		list_del_init(&sh->lru);
  		release_stripe(sh);
  	}
c62072777   NeilBrown   md: allow a maxim...
3971
3972
3973
  	/* If this takes us to the resync_max point where we have to pause,
  	 * then we need to write out the superblock.
  	 */
7a6613810   NeilBrown   md/raid5: reshape...
3974
  	sector_nr += reshape_sectors;
c03f6a196   NeilBrown   md: update sync_c...
3975
3976
  	if ((sector_nr - mddev->curr_resync_completed) * 2
  	    >= mddev->resync_max - mddev->curr_resync_completed) {
c62072777   NeilBrown   md: allow a maxim...
3977
3978
3979
  		/* Cannot proceed until we've updated the superblock... */
  		wait_event(conf->wait_for_overlap,
  			   atomic_read(&conf->reshape_stripes) == 0);
fef9c61fd   NeilBrown   md/raid5: change ...
3980
  		mddev->reshape_position = conf->reshape_progress;
75d3da43c   NeilBrown   md: Don't let imp...
3981
  		mddev->curr_resync_completed = sector_nr;
c8f517c44   NeilBrown   md/raid5 revise r...
3982
  		conf->reshape_checkpoint = jiffies;
c62072777   NeilBrown   md: allow a maxim...
3983
3984
3985
3986
3987
3988
  		set_bit(MD_CHANGE_DEVS, &mddev->flags);
  		md_wakeup_thread(mddev->thread);
  		wait_event(mddev->sb_wait,
  			   !test_bit(MD_CHANGE_DEVS, &mddev->flags)
  			   || kthread_should_stop());
  		spin_lock_irq(&conf->device_lock);
fef9c61fd   NeilBrown   md/raid5: change ...
3989
  		conf->reshape_safe = mddev->reshape_position;
c62072777   NeilBrown   md: allow a maxim...
3990
3991
  		spin_unlock_irq(&conf->device_lock);
  		wake_up(&conf->wait_for_overlap);
acb180b0e   NeilBrown   md: improve usefu...
3992
  		sysfs_notify(&mddev->kobj, NULL, "sync_completed");
c62072777   NeilBrown   md: allow a maxim...
3993
  	}
7a6613810   NeilBrown   md/raid5: reshape...
3994
  	return reshape_sectors;
52c03291a   NeilBrown   [PATCH] md: split...
3995
3996
3997
  }
  
  /* FIXME go_faster isn't used */
fd01b88c7   NeilBrown   md: remove typede...
3998
  static inline sector_t sync_request(struct mddev *mddev, sector_t sector_nr, int *skipped, int go_faster)
52c03291a   NeilBrown   [PATCH] md: split...
3999
  {
d1688a6d5   NeilBrown   md/raid5: typedef...
4000
  	struct r5conf *conf = mddev->private;
52c03291a   NeilBrown   [PATCH] md: split...
4001
  	struct stripe_head *sh;
58c0fed40   Andre Noll   md: Make mddev->s...
4002
  	sector_t max_sector = mddev->dev_sectors;
57dab0bdf   NeilBrown   md: use sector_t ...
4003
  	sector_t sync_blocks;
16a53ecc3   NeilBrown   [PATCH] md: merge...
4004
4005
  	int still_degraded = 0;
  	int i;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
4006

72626685d   NeilBrown   [PATCH] md: add w...
4007
  	if (sector_nr >= max_sector) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
4008
  		/* just being told to finish up .. nothing much to do */
cea9c2280   NeilBrown   md: add explicit ...
4009

292695531   NeilBrown   [PATCH] md: Final...
4010
4011
4012
4013
  		if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)) {
  			end_reshape(conf);
  			return 0;
  		}
72626685d   NeilBrown   [PATCH] md: add w...
4014
4015
4016
4017
  
  		if (mddev->curr_resync < max_sector) /* aborted */
  			bitmap_end_sync(mddev->bitmap, mddev->curr_resync,
  					&sync_blocks, 1);
16a53ecc3   NeilBrown   [PATCH] md: merge...
4018
  		else /* completed sync */
72626685d   NeilBrown   [PATCH] md: add w...
4019
4020
  			conf->fullsync = 0;
  		bitmap_close_sync(mddev->bitmap);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
4021
4022
  		return 0;
  	}
ccfcc3c10   NeilBrown   [PATCH] md: Core ...
4023

64bd660b5   NeilBrown   md: allow raid5_q...
4024
4025
  	/* Allow raid5_quiesce to complete */
  	wait_event(conf->wait_for_overlap, conf->quiesce != 2);
52c03291a   NeilBrown   [PATCH] md: split...
4026
4027
  	if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
  		return reshape_request(mddev, sector_nr, skipped);
f67055780   NeilBrown   [PATCH] md: Check...
4028

c62072777   NeilBrown   md: allow a maxim...
4029
4030
4031
4032
4033
  	/* No need to check resync_max as we never do more than one
  	 * stripe, and as resync_max will always be on a chunk boundary,
  	 * if the check in md_do_sync didn't fire, there is no chance
  	 * of overstepping resync_max here
  	 */
16a53ecc3   NeilBrown   [PATCH] md: merge...
4034
  	/* if there is too many failed drives and we are trying
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
4035
4036
4037
  	 * to resync, then assert that we are finished, because there is
  	 * nothing we can do.
  	 */
3285edf15   NeilBrown   [PATCH] md: Fix b...
4038
  	if (mddev->degraded >= conf->max_degraded &&
16a53ecc3   NeilBrown   [PATCH] md: merge...
4039
  	    test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
58c0fed40   Andre Noll   md: Make mddev->s...
4040
  		sector_t rv = mddev->dev_sectors - sector_nr;
57afd89f9   NeilBrown   [PATCH] md: impro...
4041
  		*skipped = 1;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
4042
4043
  		return rv;
  	}
72626685d   NeilBrown   [PATCH] md: add w...
4044
  	if (!bitmap_start_sync(mddev->bitmap, sector_nr, &sync_blocks, 1) &&
3855ad9f3   NeilBrown   [PATCH] md: make ...
4045
  	    !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery) &&
72626685d   NeilBrown   [PATCH] md: add w...
4046
4047
4048
4049
4050
4051
  	    !conf->fullsync && sync_blocks >= STRIPE_SECTORS) {
  		/* we can skip this block, and probably more */
  		sync_blocks /= STRIPE_SECTORS;
  		*skipped = 1;
  		return sync_blocks * STRIPE_SECTORS; /* keep things rounded to whole stripes */
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
4052

b47490c9b   NeilBrown   md: Update md bit...
4053
4054
  
  	bitmap_cond_end_sync(mddev->bitmap, sector_nr);
a8c906ca3   NeilBrown   md/raid5 - avoid ...
4055
  	sh = get_active_stripe(conf, sector_nr, 0, 1, 0);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
4056
  	if (sh == NULL) {
a8c906ca3   NeilBrown   md/raid5 - avoid ...
4057
  		sh = get_active_stripe(conf, sector_nr, 0, 0, 0);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
4058
  		/* make sure we don't swamp the stripe cache if someone else
16a53ecc3   NeilBrown   [PATCH] md: merge...
4059
  		 * is trying to get access
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
4060
  		 */
66c006a55   Nishanth Aravamudan   [PATCH] drivers/m...
4061
  		schedule_timeout_uninterruptible(1);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
4062
  	}
16a53ecc3   NeilBrown   [PATCH] md: merge...
4063
4064
4065
4066
  	/* Need to check if array will still be degraded after recovery/resync
  	 * We don't need to check the 'failed' flag as when that gets set,
  	 * recovery aborts.
  	 */
f001a70cd   NeilBrown   md/raid5: use con...
4067
  	for (i = 0; i < conf->raid_disks; i++)
16a53ecc3   NeilBrown   [PATCH] md: merge...
4068
4069
4070
4071
  		if (conf->disks[i].rdev == NULL)
  			still_degraded = 1;
  
  	bitmap_start_sync(mddev->bitmap, sector_nr, &sync_blocks, still_degraded);
83206d66b   NeilBrown   md/raid5: Remove ...
4072
  	set_bit(STRIPE_SYNC_REQUESTED, &sh->state);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
4073

1442577bf   NeilBrown   Revert "md: do no...
4074
  	handle_stripe(sh);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
4075
4076
4077
4078
  	release_stripe(sh);
  
  	return STRIPE_SECTORS;
  }
d1688a6d5   NeilBrown   md/raid5: typedef...
4079
  static int  retry_aligned_read(struct r5conf *conf, struct bio *raid_bio)
46031f9a3   Raz Ben-Jehuda(caro)   [PATCH] md: allow...
4080
4081
4082
4083
4084
4085
4086
4087
4088
4089
4090
4091
  {
  	/* We may not be able to submit a whole bio at once as there
  	 * may not be enough stripe_heads available.
  	 * We cannot pre-allocate enough stripe_heads as we may need
  	 * more than exist in the cache (if we allow ever large chunks).
  	 * So we do one stripe head at a time and record in
  	 * ->bi_hw_segments how many have been done.
  	 *
  	 * We *know* that this entire raid_bio is in one chunk, so
  	 * it will be only one 'dd_idx' and only need one call to raid5_compute_sector.
  	 */
  	struct stripe_head *sh;
911d4ee85   NeilBrown   md/raid5: simplif...
4092
  	int dd_idx;
46031f9a3   Raz Ben-Jehuda(caro)   [PATCH] md: allow...
4093
4094
4095
4096
4097
4098
  	sector_t sector, logical_sector, last_sector;
  	int scnt = 0;
  	int remaining;
  	int handled = 0;
  
  	logical_sector = raid_bio->bi_sector & ~((sector_t)STRIPE_SECTORS-1);
112bf8970   NeilBrown   md/raid5: change ...
4099
  	sector = raid5_compute_sector(conf, logical_sector,
911d4ee85   NeilBrown   md/raid5: simplif...
4100
  				      0, &dd_idx, NULL);
46031f9a3   Raz Ben-Jehuda(caro)   [PATCH] md: allow...
4101
4102
4103
  	last_sector = raid_bio->bi_sector + (raid_bio->bi_size>>9);
  
  	for (; logical_sector < last_sector;
387bb1737   Neil Brown   [PATCH] md: fix v...
4104
4105
4106
  	     logical_sector += STRIPE_SECTORS,
  		     sector += STRIPE_SECTORS,
  		     scnt++) {
46031f9a3   Raz Ben-Jehuda(caro)   [PATCH] md: allow...
4107

960e739d9   Jens Axboe   block: raid fixup...
4108
  		if (scnt < raid5_bi_hw_segments(raid_bio))
46031f9a3   Raz Ben-Jehuda(caro)   [PATCH] md: allow...
4109
4110
  			/* already done this stripe */
  			continue;
a8c906ca3   NeilBrown   md/raid5 - avoid ...
4111
  		sh = get_active_stripe(conf, sector, 0, 1, 0);
46031f9a3   Raz Ben-Jehuda(caro)   [PATCH] md: allow...
4112
4113
4114
  
  		if (!sh) {
  			/* failed to get a stripe - must wait */
960e739d9   Jens Axboe   block: raid fixup...
4115
  			raid5_set_bi_hw_segments(raid_bio, scnt);
46031f9a3   Raz Ben-Jehuda(caro)   [PATCH] md: allow...
4116
4117
4118
4119
4120
  			conf->retry_read_aligned = raid_bio;
  			return handled;
  		}
  
  		set_bit(R5_ReadError, &sh->dev[dd_idx].flags);
387bb1737   Neil Brown   [PATCH] md: fix v...
4121
4122
  		if (!add_stripe_bio(sh, raid_bio, dd_idx, 0)) {
  			release_stripe(sh);
960e739d9   Jens Axboe   block: raid fixup...
4123
  			raid5_set_bi_hw_segments(raid_bio, scnt);
387bb1737   Neil Brown   [PATCH] md: fix v...
4124
4125
4126
  			conf->retry_read_aligned = raid_bio;
  			return handled;
  		}
36d1c6476   Dan Williams   md/raid6: move th...
4127
  		handle_stripe(sh);
46031f9a3   Raz Ben-Jehuda(caro)   [PATCH] md: allow...
4128
4129
4130
4131
  		release_stripe(sh);
  		handled++;
  	}
  	spin_lock_irq(&conf->device_lock);
960e739d9   Jens Axboe   block: raid fixup...
4132
  	remaining = raid5_dec_bi_phys_segments(raid_bio);
46031f9a3   Raz Ben-Jehuda(caro)   [PATCH] md: allow...
4133
  	spin_unlock_irq(&conf->device_lock);
0e13fe23a   Neil Brown   use bio_endio ins...
4134
4135
  	if (remaining == 0)
  		bio_endio(raid_bio, 0);
46031f9a3   Raz Ben-Jehuda(caro)   [PATCH] md: allow...
4136
4137
4138
4139
  	if (atomic_dec_and_test(&conf->active_aligned_reads))
  		wake_up(&conf->wait_for_stripe);
  	return handled;
  }
46031f9a3   Raz Ben-Jehuda(caro)   [PATCH] md: allow...
4140

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
4141
4142
4143
4144
4145
4146
4147
  /*
   * This is our raid5 kernel thread.
   *
   * We scan the hash table for stripes which can be handled now.
   * During the scan, completed stripes are saved for us by the interrupt
   * handler, so that they will not have to wait for our next wakeup.
   */
fd01b88c7   NeilBrown   md: remove typede...
4148
  static void raid5d(struct mddev *mddev)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
4149
4150
  {
  	struct stripe_head *sh;
d1688a6d5   NeilBrown   md/raid5: typedef...
4151
  	struct r5conf *conf = mddev->private;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
4152
  	int handled;
e1dfa0a29   NeilBrown   md: use new plugg...
4153
  	struct blk_plug plug;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
4154

45b4233ca   Dan Williams   raid5: replace cu...
4155
4156
  	pr_debug("+++ raid5d active
  ");
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
4157
4158
  
  	md_check_recovery(mddev);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
4159

e1dfa0a29   NeilBrown   md: use new plugg...
4160
  	blk_start_plug(&plug);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
4161
4162
4163
  	handled = 0;
  	spin_lock_irq(&conf->device_lock);
  	while (1) {
46031f9a3   Raz Ben-Jehuda(caro)   [PATCH] md: allow...
4164
  		struct bio *bio;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
4165

7c13edc87   NeilBrown   md: incorporate n...
4166
4167
4168
4169
  		if (atomic_read(&mddev->plug_cnt) == 0 &&
  		    !list_empty(&conf->bitmap_list)) {
  			/* Now is a good time to flush some bitmap updates */
  			conf->seq_flush++;
700e432d8   NeilBrown   [PATCH] md: fix l...
4170
  			spin_unlock_irq(&conf->device_lock);
72626685d   NeilBrown   [PATCH] md: add w...
4171
  			bitmap_unplug(mddev->bitmap);
700e432d8   NeilBrown   [PATCH] md: fix l...
4172
  			spin_lock_irq(&conf->device_lock);
7c13edc87   NeilBrown   md: incorporate n...
4173
  			conf->seq_write = conf->seq_flush;
72626685d   NeilBrown   [PATCH] md: add w...
4174
4175
  			activate_bit_delay(conf);
  		}
7c13edc87   NeilBrown   md: incorporate n...
4176
4177
  		if (atomic_read(&mddev->plug_cnt) == 0)
  			raid5_activate_delayed(conf);
72626685d   NeilBrown   [PATCH] md: add w...
4178

46031f9a3   Raz Ben-Jehuda(caro)   [PATCH] md: allow...
4179
4180
4181
4182
4183
4184
4185
4186
4187
  		while ((bio = remove_bio_from_retry(conf))) {
  			int ok;
  			spin_unlock_irq(&conf->device_lock);
  			ok = retry_aligned_read(conf, bio);
  			spin_lock_irq(&conf->device_lock);
  			if (!ok)
  				break;
  			handled++;
  		}
8b3e6cdc5   Dan Williams   md: introduce get...
4188
  		sh = __get_priority_stripe(conf);
c9f21aaff   Dan Williams   md: move async_tx...
4189
  		if (!sh)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
4190
  			break;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
4191
4192
4193
  		spin_unlock_irq(&conf->device_lock);
  		
  		handled++;
417b8d4ac   Dan Williams   md/raid456: downl...
4194
4195
4196
  		handle_stripe(sh);
  		release_stripe(sh);
  		cond_resched();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
4197

de393cdea   NeilBrown   md: make it easie...
4198
4199
  		if (mddev->flags & ~(1<<MD_CHANGE_PENDING))
  			md_check_recovery(mddev);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
4200
4201
  		spin_lock_irq(&conf->device_lock);
  	}
45b4233ca   Dan Williams   raid5: replace cu...
4202
4203
  	pr_debug("%d stripes handled
  ", handled);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
4204
4205
  
  	spin_unlock_irq(&conf->device_lock);
c9f21aaff   Dan Williams   md: move async_tx...
4206
  	async_tx_issue_pending_all();
e1dfa0a29   NeilBrown   md: use new plugg...
4207
  	blk_finish_plug(&plug);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
4208

45b4233ca   Dan Williams   raid5: replace cu...
4209
4210
  	pr_debug("--- raid5d inactive
  ");
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
4211
  }
3f294f4fb   NeilBrown   [PATCH] md: add k...
4212
  static ssize_t
fd01b88c7   NeilBrown   md: remove typede...
4213
  raid5_show_stripe_cache_size(struct mddev *mddev, char *page)
3f294f4fb   NeilBrown   [PATCH] md: add k...
4214
  {
d1688a6d5   NeilBrown   md/raid5: typedef...
4215
  	struct r5conf *conf = mddev->private;
96de1e663   NeilBrown   [PATCH] md: fix s...
4216
4217
4218
4219
4220
  	if (conf)
  		return sprintf(page, "%d
  ", conf->max_nr_stripes);
  	else
  		return 0;
3f294f4fb   NeilBrown   [PATCH] md: add k...
4221
  }
c41d4ac40   NeilBrown   md/raid5: factor ...
4222
  int
fd01b88c7   NeilBrown   md: remove typede...
4223
  raid5_set_cache_size(struct mddev *mddev, int size)
3f294f4fb   NeilBrown   [PATCH] md: add k...
4224
  {
d1688a6d5   NeilBrown   md/raid5: typedef...
4225
  	struct r5conf *conf = mddev->private;
b5470dc5f   Dan Williams   md: resolve exter...
4226
  	int err;
c41d4ac40   NeilBrown   md/raid5: factor ...
4227
  	if (size <= 16 || size > 32768)
3f294f4fb   NeilBrown   [PATCH] md: add k...
4228
  		return -EINVAL;
c41d4ac40   NeilBrown   md/raid5: factor ...
4229
  	while (size < conf->max_nr_stripes) {
3f294f4fb   NeilBrown   [PATCH] md: add k...
4230
4231
4232
4233
4234
  		if (drop_one_stripe(conf))
  			conf->max_nr_stripes--;
  		else
  			break;
  	}
b5470dc5f   Dan Williams   md: resolve exter...
4235
4236
4237
  	err = md_allow_write(mddev);
  	if (err)
  		return err;
c41d4ac40   NeilBrown   md/raid5: factor ...
4238
  	while (size > conf->max_nr_stripes) {
3f294f4fb   NeilBrown   [PATCH] md: add k...
4239
4240
4241
4242
  		if (grow_one_stripe(conf))
  			conf->max_nr_stripes++;
  		else break;
  	}
c41d4ac40   NeilBrown   md/raid5: factor ...
4243
4244
4245
4246
4247
  	return 0;
  }
  EXPORT_SYMBOL(raid5_set_cache_size);
  
  static ssize_t
fd01b88c7   NeilBrown   md: remove typede...
4248
  raid5_store_stripe_cache_size(struct mddev *mddev, const char *page, size_t len)
c41d4ac40   NeilBrown   md/raid5: factor ...
4249
  {
d1688a6d5   NeilBrown   md/raid5: typedef...
4250
  	struct r5conf *conf = mddev->private;
c41d4ac40   NeilBrown   md/raid5: factor ...
4251
4252
4253
4254
4255
4256
4257
4258
4259
4260
4261
4262
4263
  	unsigned long new;
  	int err;
  
  	if (len >= PAGE_SIZE)
  		return -EINVAL;
  	if (!conf)
  		return -ENODEV;
  
  	if (strict_strtoul(page, 10, &new))
  		return -EINVAL;
  	err = raid5_set_cache_size(mddev, new);
  	if (err)
  		return err;
3f294f4fb   NeilBrown   [PATCH] md: add k...
4264
4265
  	return len;
  }
007583c92   NeilBrown   [PATCH] md: chang...
4266

96de1e663   NeilBrown   [PATCH] md: fix s...
4267
4268
4269
4270
  static struct md_sysfs_entry
  raid5_stripecache_size = __ATTR(stripe_cache_size, S_IRUGO | S_IWUSR,
  				raid5_show_stripe_cache_size,
  				raid5_store_stripe_cache_size);
3f294f4fb   NeilBrown   [PATCH] md: add k...
4271
4272
  
  static ssize_t
fd01b88c7   NeilBrown   md: remove typede...
4273
  raid5_show_preread_threshold(struct mddev *mddev, char *page)
8b3e6cdc5   Dan Williams   md: introduce get...
4274
  {
d1688a6d5   NeilBrown   md/raid5: typedef...
4275
  	struct r5conf *conf = mddev->private;
8b3e6cdc5   Dan Williams   md: introduce get...
4276
4277
4278
4279
4280
4281
4282
4283
  	if (conf)
  		return sprintf(page, "%d
  ", conf->bypass_threshold);
  	else
  		return 0;
  }
  
  static ssize_t
fd01b88c7   NeilBrown   md: remove typede...
4284
  raid5_store_preread_threshold(struct mddev *mddev, const char *page, size_t len)
8b3e6cdc5   Dan Williams   md: introduce get...
4285
  {
d1688a6d5   NeilBrown   md/raid5: typedef...
4286
  	struct r5conf *conf = mddev->private;
4ef197d87   Dan Williams   md: raid5.c conve...
4287
  	unsigned long new;
8b3e6cdc5   Dan Williams   md: introduce get...
4288
4289
4290
4291
  	if (len >= PAGE_SIZE)
  		return -EINVAL;
  	if (!conf)
  		return -ENODEV;
4ef197d87   Dan Williams   md: raid5.c conve...
4292
  	if (strict_strtoul(page, 10, &new))
8b3e6cdc5   Dan Williams   md: introduce get...
4293
  		return -EINVAL;
4ef197d87   Dan Williams   md: raid5.c conve...
4294
  	if (new > conf->max_nr_stripes)
8b3e6cdc5   Dan Williams   md: introduce get...
4295
4296
4297
4298
4299
4300
4301
4302
4303
4304
4305
4306
  		return -EINVAL;
  	conf->bypass_threshold = new;
  	return len;
  }
  
  static struct md_sysfs_entry
  raid5_preread_bypass_threshold = __ATTR(preread_bypass_threshold,
  					S_IRUGO | S_IWUSR,
  					raid5_show_preread_threshold,
  					raid5_store_preread_threshold);
  
  static ssize_t
fd01b88c7   NeilBrown   md: remove typede...
4307
  stripe_cache_active_show(struct mddev *mddev, char *page)
3f294f4fb   NeilBrown   [PATCH] md: add k...
4308
  {
d1688a6d5   NeilBrown   md/raid5: typedef...
4309
  	struct r5conf *conf = mddev->private;
96de1e663   NeilBrown   [PATCH] md: fix s...
4310
4311
4312
4313
4314
  	if (conf)
  		return sprintf(page, "%d
  ", atomic_read(&conf->active_stripes));
  	else
  		return 0;
3f294f4fb   NeilBrown   [PATCH] md: add k...
4315
  }
96de1e663   NeilBrown   [PATCH] md: fix s...
4316
4317
  static struct md_sysfs_entry
  raid5_stripecache_active = __ATTR_RO(stripe_cache_active);
3f294f4fb   NeilBrown   [PATCH] md: add k...
4318

007583c92   NeilBrown   [PATCH] md: chang...
4319
  static struct attribute *raid5_attrs[] =  {
3f294f4fb   NeilBrown   [PATCH] md: add k...
4320
4321
  	&raid5_stripecache_size.attr,
  	&raid5_stripecache_active.attr,
8b3e6cdc5   Dan Williams   md: introduce get...
4322
  	&raid5_preread_bypass_threshold.attr,
3f294f4fb   NeilBrown   [PATCH] md: add k...
4323
4324
  	NULL,
  };
007583c92   NeilBrown   [PATCH] md: chang...
4325
4326
4327
  static struct attribute_group raid5_attrs_group = {
  	.name = NULL,
  	.attrs = raid5_attrs,
3f294f4fb   NeilBrown   [PATCH] md: add k...
4328
  };
80c3a6ce4   Dan Williams   md: add 'size' as...
4329
  static sector_t
fd01b88c7   NeilBrown   md: remove typede...
4330
  raid5_size(struct mddev *mddev, sector_t sectors, int raid_disks)
80c3a6ce4   Dan Williams   md: add 'size' as...
4331
  {
d1688a6d5   NeilBrown   md/raid5: typedef...
4332
  	struct r5conf *conf = mddev->private;
80c3a6ce4   Dan Williams   md: add 'size' as...
4333
4334
4335
  
  	if (!sectors)
  		sectors = mddev->dev_sectors;
5e5e3e78e   NeilBrown   md: Fix handling ...
4336
  	if (!raid_disks)
7ec054783   NeilBrown   md/raid5: enhance...
4337
  		/* size is defined by the smallest of previous and new size */
5e5e3e78e   NeilBrown   md: Fix handling ...
4338
  		raid_disks = min(conf->raid_disks, conf->previous_raid_disks);
80c3a6ce4   Dan Williams   md: add 'size' as...
4339

9d8f03636   Andre Noll   md: Make mddev->c...
4340
  	sectors &= ~((sector_t)mddev->chunk_sectors - 1);
664e7c413   Andre Noll   md: Convert mddev...
4341
  	sectors &= ~((sector_t)mddev->new_chunk_sectors - 1);
80c3a6ce4   Dan Williams   md: add 'size' as...
4342
4343
  	return sectors * (raid_disks - conf->max_degraded);
  }
d1688a6d5   NeilBrown   md/raid5: typedef...
4344
  static void raid5_free_percpu(struct r5conf *conf)
36d1c6476   Dan Williams   md/raid6: move th...
4345
4346
4347
4348
4349
4350
4351
4352
4353
4354
4355
  {
  	struct raid5_percpu *percpu;
  	unsigned long cpu;
  
  	if (!conf->percpu)
  		return;
  
  	get_online_cpus();
  	for_each_possible_cpu(cpu) {
  		percpu = per_cpu_ptr(conf->percpu, cpu);
  		safe_put_page(percpu->spare_page);
d6f38f31f   Dan Williams   md/raid5,6: add p...
4356
  		kfree(percpu->scribble);
36d1c6476   Dan Williams   md/raid6: move th...
4357
4358
4359
4360
4361
4362
4363
4364
  	}
  #ifdef CONFIG_HOTPLUG_CPU
  	unregister_cpu_notifier(&conf->cpu_notify);
  #endif
  	put_online_cpus();
  
  	free_percpu(conf->percpu);
  }
d1688a6d5   NeilBrown   md/raid5: typedef...
4365
  static void free_conf(struct r5conf *conf)
95fc17aac   Dan Williams   md/raid6: release...
4366
4367
  {
  	shrink_stripes(conf);
36d1c6476   Dan Williams   md/raid6: move th...
4368
  	raid5_free_percpu(conf);
95fc17aac   Dan Williams   md/raid6: release...
4369
4370
4371
4372
  	kfree(conf->disks);
  	kfree(conf->stripe_hashtbl);
  	kfree(conf);
  }
36d1c6476   Dan Williams   md/raid6: move th...
4373
4374
4375
4376
  #ifdef CONFIG_HOTPLUG_CPU
  static int raid456_cpu_notify(struct notifier_block *nfb, unsigned long action,
  			      void *hcpu)
  {
d1688a6d5   NeilBrown   md/raid5: typedef...
4377
  	struct r5conf *conf = container_of(nfb, struct r5conf, cpu_notify);
36d1c6476   Dan Williams   md/raid6: move th...
4378
4379
4380
4381
4382
4383
  	long cpu = (long)hcpu;
  	struct raid5_percpu *percpu = per_cpu_ptr(conf->percpu, cpu);
  
  	switch (action) {
  	case CPU_UP_PREPARE:
  	case CPU_UP_PREPARE_FROZEN:
d6f38f31f   Dan Williams   md/raid5,6: add p...
4384
  		if (conf->level == 6 && !percpu->spare_page)
36d1c6476   Dan Williams   md/raid6: move th...
4385
  			percpu->spare_page = alloc_page(GFP_KERNEL);
d6f38f31f   Dan Williams   md/raid5,6: add p...
4386
4387
4388
4389
4390
4391
4392
  		if (!percpu->scribble)
  			percpu->scribble = kmalloc(conf->scribble_len, GFP_KERNEL);
  
  		if (!percpu->scribble ||
  		    (conf->level == 6 && !percpu->spare_page)) {
  			safe_put_page(percpu->spare_page);
  			kfree(percpu->scribble);
36d1c6476   Dan Williams   md/raid6: move th...
4393
4394
4395
  			pr_err("%s: failed memory allocation for cpu%ld
  ",
  			       __func__, cpu);
55af6bb50   Akinobu Mita   md: convert cpu n...
4396
  			return notifier_from_errno(-ENOMEM);
36d1c6476   Dan Williams   md/raid6: move th...
4397
4398
4399
4400
4401
  		}
  		break;
  	case CPU_DEAD:
  	case CPU_DEAD_FROZEN:
  		safe_put_page(percpu->spare_page);
d6f38f31f   Dan Williams   md/raid5,6: add p...
4402
  		kfree(percpu->scribble);
36d1c6476   Dan Williams   md/raid6: move th...
4403
  		percpu->spare_page = NULL;
d6f38f31f   Dan Williams   md/raid5,6: add p...
4404
  		percpu->scribble = NULL;
36d1c6476   Dan Williams   md/raid6: move th...
4405
4406
4407
4408
4409
4410
4411
  		break;
  	default:
  		break;
  	}
  	return NOTIFY_OK;
  }
  #endif
d1688a6d5   NeilBrown   md/raid5: typedef...
4412
  static int raid5_alloc_percpu(struct r5conf *conf)
36d1c6476   Dan Williams   md/raid6: move th...
4413
4414
4415
  {
  	unsigned long cpu;
  	struct page *spare_page;
a29d8b8e2   Tejun Heo   percpu: add __per...
4416
  	struct raid5_percpu __percpu *allcpus;
d6f38f31f   Dan Williams   md/raid5,6: add p...
4417
  	void *scribble;
36d1c6476   Dan Williams   md/raid6: move th...
4418
  	int err;
36d1c6476   Dan Williams   md/raid6: move th...
4419
4420
4421
4422
4423
4424
4425
4426
  	allcpus = alloc_percpu(struct raid5_percpu);
  	if (!allcpus)
  		return -ENOMEM;
  	conf->percpu = allcpus;
  
  	get_online_cpus();
  	err = 0;
  	for_each_present_cpu(cpu) {
d6f38f31f   Dan Williams   md/raid5,6: add p...
4427
4428
4429
4430
4431
4432
4433
4434
  		if (conf->level == 6) {
  			spare_page = alloc_page(GFP_KERNEL);
  			if (!spare_page) {
  				err = -ENOMEM;
  				break;
  			}
  			per_cpu_ptr(conf->percpu, cpu)->spare_page = spare_page;
  		}
5e5e3e78e   NeilBrown   md: Fix handling ...
4435
  		scribble = kmalloc(conf->scribble_len, GFP_KERNEL);
d6f38f31f   Dan Williams   md/raid5,6: add p...
4436
  		if (!scribble) {
36d1c6476   Dan Williams   md/raid6: move th...
4437
4438
4439
  			err = -ENOMEM;
  			break;
  		}
d6f38f31f   Dan Williams   md/raid5,6: add p...
4440
  		per_cpu_ptr(conf->percpu, cpu)->scribble = scribble;
36d1c6476   Dan Williams   md/raid6: move th...
4441
4442
4443
4444
4445
4446
4447
4448
4449
4450
4451
  	}
  #ifdef CONFIG_HOTPLUG_CPU
  	conf->cpu_notify.notifier_call = raid456_cpu_notify;
  	conf->cpu_notify.priority = 0;
  	if (err == 0)
  		err = register_cpu_notifier(&conf->cpu_notify);
  #endif
  	put_online_cpus();
  
  	return err;
  }
d1688a6d5   NeilBrown   md/raid5: typedef...
4452
  static struct r5conf *setup_conf(struct mddev *mddev)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
4453
  {
d1688a6d5   NeilBrown   md/raid5: typedef...
4454
  	struct r5conf *conf;
5e5e3e78e   NeilBrown   md: Fix handling ...
4455
  	int raid_disk, memory, max_disks;
3cb030020   NeilBrown   md: removing type...
4456
  	struct md_rdev *rdev;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
4457
  	struct disk_info *disk;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
4458

91adb5647   NeilBrown   md/raid5: refacto...
4459
4460
4461
  	if (mddev->new_level != 5
  	    && mddev->new_level != 4
  	    && mddev->new_level != 6) {
0c55e0225   NeilBrown   md/raid5: improve...
4462
4463
  		printk(KERN_ERR "md/raid:%s: raid level not set to 4/5/6 (%d)
  ",
91adb5647   NeilBrown   md/raid5: refacto...
4464
4465
  		       mdname(mddev), mddev->new_level);
  		return ERR_PTR(-EIO);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
4466
  	}
91adb5647   NeilBrown   md/raid5: refacto...
4467
4468
4469
4470
  	if ((mddev->new_level == 5
  	     && !algorithm_valid_raid5(mddev->new_layout)) ||
  	    (mddev->new_level == 6
  	     && !algorithm_valid_raid6(mddev->new_layout))) {
0c55e0225   NeilBrown   md/raid5: improve...
4471
4472
  		printk(KERN_ERR "md/raid:%s: layout %d not supported
  ",
91adb5647   NeilBrown   md/raid5: refacto...
4473
4474
  		       mdname(mddev), mddev->new_layout);
  		return ERR_PTR(-EIO);
99c0fb5f9   NeilBrown   md/raid5: Add sup...
4475
  	}
91adb5647   NeilBrown   md/raid5: refacto...
4476
  	if (mddev->new_level == 6 && mddev->raid_disks < 4) {
0c55e0225   NeilBrown   md/raid5: improve...
4477
4478
  		printk(KERN_ERR "md/raid:%s: not enough configured devices (%d, minimum 4)
  ",
91adb5647   NeilBrown   md/raid5: refacto...
4479
4480
  		       mdname(mddev), mddev->raid_disks);
  		return ERR_PTR(-EINVAL);
4bbf3771c   NeilBrown   md: Relax minimum...
4481
  	}
664e7c413   Andre Noll   md: Convert mddev...
4482
4483
4484
  	if (!mddev->new_chunk_sectors ||
  	    (mddev->new_chunk_sectors << 9) % PAGE_SIZE ||
  	    !is_power_of_2(mddev->new_chunk_sectors)) {
0c55e0225   NeilBrown   md/raid5: improve...
4485
4486
4487
  		printk(KERN_ERR "md/raid:%s: invalid chunk size %d
  ",
  		       mdname(mddev), mddev->new_chunk_sectors << 9);
91adb5647   NeilBrown   md/raid5: refacto...
4488
  		return ERR_PTR(-EINVAL);
f67055780   NeilBrown   [PATCH] md: Check...
4489
  	}
d1688a6d5   NeilBrown   md/raid5: typedef...
4490
  	conf = kzalloc(sizeof(struct r5conf), GFP_KERNEL);
91adb5647   NeilBrown   md/raid5: refacto...
4491
  	if (conf == NULL)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
4492
  		goto abort;
f5efd45ae   Dan Williams   md/raid5: initial...
4493
4494
4495
4496
4497
4498
4499
4500
4501
4502
4503
4504
  	spin_lock_init(&conf->device_lock);
  	init_waitqueue_head(&conf->wait_for_stripe);
  	init_waitqueue_head(&conf->wait_for_overlap);
  	INIT_LIST_HEAD(&conf->handle_list);
  	INIT_LIST_HEAD(&conf->hold_list);
  	INIT_LIST_HEAD(&conf->delayed_list);
  	INIT_LIST_HEAD(&conf->bitmap_list);
  	INIT_LIST_HEAD(&conf->inactive_list);
  	atomic_set(&conf->active_stripes, 0);
  	atomic_set(&conf->preread_active_stripes, 0);
  	atomic_set(&conf->active_aligned_reads, 0);
  	conf->bypass_threshold = BYPASS_THRESHOLD;
d890fa2b0   NeilBrown   md: Fix some bugs...
4505
  	conf->recovery_disabled = mddev->recovery_disabled - 1;
91adb5647   NeilBrown   md/raid5: refacto...
4506
4507
4508
4509
4510
  
  	conf->raid_disks = mddev->raid_disks;
  	if (mddev->reshape_position == MaxSector)
  		conf->previous_raid_disks = mddev->raid_disks;
  	else
f67055780   NeilBrown   [PATCH] md: Check...
4511
  		conf->previous_raid_disks = mddev->raid_disks - mddev->delta_disks;
5e5e3e78e   NeilBrown   md: Fix handling ...
4512
4513
  	max_disks = max(conf->raid_disks, conf->previous_raid_disks);
  	conf->scribble_len = scribble_len(max_disks);
f67055780   NeilBrown   [PATCH] md: Check...
4514

5e5e3e78e   NeilBrown   md: Fix handling ...
4515
  	conf->disks = kzalloc(max_disks * sizeof(struct disk_info),
b55e6bfcd   NeilBrown   [PATCH] md: Split...
4516
4517
4518
  			      GFP_KERNEL);
  	if (!conf->disks)
  		goto abort;
9ffae0cf3   NeilBrown   [PATCH] md: conve...
4519

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
4520
  	conf->mddev = mddev;
fccddba06   NeilBrown   [PATCH] md: tidy ...
4521
  	if ((conf->stripe_hashtbl = kzalloc(PAGE_SIZE, GFP_KERNEL)) == NULL)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
4522
  		goto abort;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
4523

36d1c6476   Dan Williams   md/raid6: move th...
4524
4525
4526
  	conf->level = mddev->new_level;
  	if (raid5_alloc_percpu(conf) != 0)
  		goto abort;
0c55e0225   NeilBrown   md/raid5: improve...
4527
4528
  	pr_debug("raid456: run(%s) called.
  ", mdname(mddev));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
4529

159ec1fc0   Cheng Renquan   md: use list_for_...
4530
  	list_for_each_entry(rdev, &mddev->disks, same_set) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
4531
  		raid_disk = rdev->raid_disk;
5e5e3e78e   NeilBrown   md: Fix handling ...
4532
  		if (raid_disk >= max_disks
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
4533
4534
4535
4536
4537
  		    || raid_disk < 0)
  			continue;
  		disk = conf->disks + raid_disk;
  
  		disk->rdev = rdev;
b2d444d7a   NeilBrown   [PATCH] md: conve...
4538
  		if (test_bit(In_sync, &rdev->flags)) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
4539
  			char b[BDEVNAME_SIZE];
0c55e0225   NeilBrown   md/raid5: improve...
4540
4541
4542
4543
  			printk(KERN_INFO "md/raid:%s: device %s operational as raid"
  			       " disk %d
  ",
  			       mdname(mddev), bdevname(rdev->bdev, b), raid_disk);
d6b212f4b   Jonathan Brassow   MD: raid5 do not ...
4544
  		} else if (rdev->saved_raid_disk != raid_disk)
8c2e870a6   Neil Brown   Ensure interrupte...
4545
4546
  			/* Cannot rely on bitmap to complete recovery */
  			conf->fullsync = 1;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
4547
  	}
09c9e5fa1   Andre Noll   md: convert conf-...
4548
  	conf->chunk_sectors = mddev->new_chunk_sectors;
91adb5647   NeilBrown   md/raid5: refacto...
4549
  	conf->level = mddev->new_level;
16a53ecc3   NeilBrown   [PATCH] md: merge...
4550
4551
4552
4553
  	if (conf->level == 6)
  		conf->max_degraded = 2;
  	else
  		conf->max_degraded = 1;
91adb5647   NeilBrown   md/raid5: refacto...
4554
  	conf->algorithm = mddev->new_layout;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
4555
  	conf->max_nr_stripes = NR_STRIPES;
fef9c61fd   NeilBrown   md/raid5: change ...
4556
  	conf->reshape_progress = mddev->reshape_position;
e183eaedd   NeilBrown   md/raid5: prepare...
4557
  	if (conf->reshape_progress != MaxSector) {
09c9e5fa1   Andre Noll   md: convert conf-...
4558
  		conf->prev_chunk_sectors = mddev->chunk_sectors;
e183eaedd   NeilBrown   md/raid5: prepare...
4559
4560
  		conf->prev_algo = mddev->layout;
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
4561

91adb5647   NeilBrown   md/raid5: refacto...
4562
  	memory = conf->max_nr_stripes * (sizeof(struct stripe_head) +
5e5e3e78e   NeilBrown   md: Fix handling ...
4563
  		 max_disks * ((sizeof(struct bio) + PAGE_SIZE))) / 1024;
91adb5647   NeilBrown   md/raid5: refacto...
4564
4565
  	if (grow_stripes(conf, conf->max_nr_stripes)) {
  		printk(KERN_ERR
0c55e0225   NeilBrown   md/raid5: improve...
4566
4567
4568
  		       "md/raid:%s: couldn't allocate %dkB for buffers
  ",
  		       mdname(mddev), memory);
91adb5647   NeilBrown   md/raid5: refacto...
4569
4570
  		goto abort;
  	} else
0c55e0225   NeilBrown   md/raid5: improve...
4571
4572
4573
  		printk(KERN_INFO "md/raid:%s: allocated %dkB
  ",
  		       mdname(mddev), memory);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
4574

0da3c6194   NeilBrown   md: Improve name ...
4575
  	conf->thread = md_register_thread(raid5d, mddev, NULL);
91adb5647   NeilBrown   md/raid5: refacto...
4576
4577
  	if (!conf->thread) {
  		printk(KERN_ERR
0c55e0225   NeilBrown   md/raid5: improve...
4578
4579
  		       "md/raid:%s: couldn't allocate thread.
  ",
91adb5647   NeilBrown   md/raid5: refacto...
4580
  		       mdname(mddev));
16a53ecc3   NeilBrown   [PATCH] md: merge...
4581
4582
  		goto abort;
  	}
91adb5647   NeilBrown   md/raid5: refacto...
4583
4584
4585
4586
4587
  
  	return conf;
  
   abort:
  	if (conf) {
95fc17aac   Dan Williams   md/raid6: release...
4588
  		free_conf(conf);
91adb5647   NeilBrown   md/raid5: refacto...
4589
4590
4591
4592
  		return ERR_PTR(-EIO);
  	} else
  		return ERR_PTR(-ENOMEM);
  }
c148ffdcd   NeilBrown   md/raid5: Allow d...
4593
4594
4595
4596
4597
4598
4599
4600
4601
4602
4603
4604
4605
4606
4607
4608
4609
4610
4611
4612
4613
4614
4615
4616
4617
4618
  
  static int only_parity(int raid_disk, int algo, int raid_disks, int max_degraded)
  {
  	switch (algo) {
  	case ALGORITHM_PARITY_0:
  		if (raid_disk < max_degraded)
  			return 1;
  		break;
  	case ALGORITHM_PARITY_N:
  		if (raid_disk >= raid_disks - max_degraded)
  			return 1;
  		break;
  	case ALGORITHM_PARITY_0_6:
  		if (raid_disk == 0 || 
  		    raid_disk == raid_disks - 1)
  			return 1;
  		break;
  	case ALGORITHM_LEFT_ASYMMETRIC_6:
  	case ALGORITHM_RIGHT_ASYMMETRIC_6:
  	case ALGORITHM_LEFT_SYMMETRIC_6:
  	case ALGORITHM_RIGHT_SYMMETRIC_6:
  		if (raid_disk == raid_disks - 1)
  			return 1;
  	}
  	return 0;
  }
fd01b88c7   NeilBrown   md: remove typede...
4619
  static int run(struct mddev *mddev)
91adb5647   NeilBrown   md/raid5: refacto...
4620
  {
d1688a6d5   NeilBrown   md/raid5: typedef...
4621
  	struct r5conf *conf;
9f7c22200   NeilBrown   md/raid5: export ...
4622
  	int working_disks = 0;
c148ffdcd   NeilBrown   md/raid5: Allow d...
4623
  	int dirty_parity_disks = 0;
3cb030020   NeilBrown   md: removing type...
4624
  	struct md_rdev *rdev;
c148ffdcd   NeilBrown   md/raid5: Allow d...
4625
  	sector_t reshape_offset = 0;
91adb5647   NeilBrown   md/raid5: refacto...
4626

8c6ac868b   Andre Noll   md: Push down rec...
4627
  	if (mddev->recovery_cp != MaxSector)
0c55e0225   NeilBrown   md/raid5: improve...
4628
  		printk(KERN_NOTICE "md/raid:%s: not clean"
8c6ac868b   Andre Noll   md: Push down rec...
4629
4630
4631
  		       " -- starting background reconstruction
  ",
  		       mdname(mddev));
91adb5647   NeilBrown   md/raid5: refacto...
4632
4633
4634
4635
4636
4637
4638
4639
  	if (mddev->reshape_position != MaxSector) {
  		/* Check that we can continue the reshape.
  		 * Currently only disks can change, it must
  		 * increase, and we must be past the point where
  		 * a stripe over-writes itself
  		 */
  		sector_t here_new, here_old;
  		int old_disks;
18b003349   Andre Noll   md: raid5 run(): ...
4640
  		int max_degraded = (mddev->level == 6 ? 2 : 1);
91adb5647   NeilBrown   md/raid5: refacto...
4641

88ce4930e   NeilBrown   md/raid5: allow l...
4642
  		if (mddev->new_level != mddev->level) {
0c55e0225   NeilBrown   md/raid5: improve...
4643
  			printk(KERN_ERR "md/raid:%s: unsupported reshape "
91adb5647   NeilBrown   md/raid5: refacto...
4644
4645
4646
4647
4648
  			       "required - aborting.
  ",
  			       mdname(mddev));
  			return -EINVAL;
  		}
91adb5647   NeilBrown   md/raid5: refacto...
4649
4650
4651
4652
4653
4654
  		old_disks = mddev->raid_disks - mddev->delta_disks;
  		/* reshape_position must be on a new-stripe boundary, and one
  		 * further up in new geometry must map after here in old
  		 * geometry.
  		 */
  		here_new = mddev->reshape_position;
664e7c413   Andre Noll   md: Convert mddev...
4655
  		if (sector_div(here_new, mddev->new_chunk_sectors *
91adb5647   NeilBrown   md/raid5: refacto...
4656
  			       (mddev->raid_disks - max_degraded))) {
0c55e0225   NeilBrown   md/raid5: improve...
4657
4658
4659
  			printk(KERN_ERR "md/raid:%s: reshape_position not "
  			       "on a stripe boundary
  ", mdname(mddev));
91adb5647   NeilBrown   md/raid5: refacto...
4660
4661
  			return -EINVAL;
  		}
c148ffdcd   NeilBrown   md/raid5: Allow d...
4662
  		reshape_offset = here_new * mddev->new_chunk_sectors;
91adb5647   NeilBrown   md/raid5: refacto...
4663
4664
  		/* here_new is the stripe we will write to */
  		here_old = mddev->reshape_position;
9d8f03636   Andre Noll   md: Make mddev->c...
4665
  		sector_div(here_old, mddev->chunk_sectors *
91adb5647   NeilBrown   md/raid5: refacto...
4666
4667
4668
  			   (old_disks-max_degraded));
  		/* here_old is the first stripe that we might need to read
  		 * from */
67ac6011d   NeilBrown   md/raid5: allow n...
4669
4670
4671
4672
4673
4674
4675
4676
4677
4678
4679
  		if (mddev->delta_disks == 0) {
  			/* We cannot be sure it is safe to start an in-place
  			 * reshape.  It is only safe if user-space if monitoring
  			 * and taking constant backups.
  			 * mdadm always starts a situation like this in
  			 * readonly mode so it can take control before
  			 * allowing any writes.  So just check for that.
  			 */
  			if ((here_new * mddev->new_chunk_sectors != 
  			     here_old * mddev->chunk_sectors) ||
  			    mddev->ro == 0) {
0c55e0225   NeilBrown   md/raid5: improve...
4680
4681
4682
4683
  				printk(KERN_ERR "md/raid:%s: in-place reshape must be started"
  				       " in read-only mode - aborting
  ",
  				       mdname(mddev));
67ac6011d   NeilBrown   md/raid5: allow n...
4684
4685
4686
4687
4688
4689
4690
  				return -EINVAL;
  			}
  		} else if (mddev->delta_disks < 0
  		    ? (here_new * mddev->new_chunk_sectors <=
  		       here_old * mddev->chunk_sectors)
  		    : (here_new * mddev->new_chunk_sectors >=
  		       here_old * mddev->chunk_sectors)) {
91adb5647   NeilBrown   md/raid5: refacto...
4691
  			/* Reading from the same stripe as writing to - bad */
0c55e0225   NeilBrown   md/raid5: improve...
4692
4693
4694
4695
  			printk(KERN_ERR "md/raid:%s: reshape_position too early for "
  			       "auto-recovery - aborting.
  ",
  			       mdname(mddev));
91adb5647   NeilBrown   md/raid5: refacto...
4696
4697
  			return -EINVAL;
  		}
0c55e0225   NeilBrown   md/raid5: improve...
4698
4699
4700
  		printk(KERN_INFO "md/raid:%s: reshape will continue
  ",
  		       mdname(mddev));
91adb5647   NeilBrown   md/raid5: refacto...
4701
4702
4703
4704
  		/* OK, we should be able to continue; */
  	} else {
  		BUG_ON(mddev->level != mddev->new_level);
  		BUG_ON(mddev->layout != mddev->new_layout);
664e7c413   Andre Noll   md: Convert mddev...
4705
  		BUG_ON(mddev->chunk_sectors != mddev->new_chunk_sectors);
91adb5647   NeilBrown   md/raid5: refacto...
4706
  		BUG_ON(mddev->delta_disks != 0);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
4707
  	}
91adb5647   NeilBrown   md/raid5: refacto...
4708

245f46c2c   NeilBrown   md: add ->takeove...
4709
4710
4711
4712
  	if (mddev->private == NULL)
  		conf = setup_conf(mddev);
  	else
  		conf = mddev->private;
91adb5647   NeilBrown   md/raid5: refacto...
4713
4714
4715
4716
4717
4718
4719
4720
4721
4722
  	if (IS_ERR(conf))
  		return PTR_ERR(conf);
  
  	mddev->thread = conf->thread;
  	conf->thread = NULL;
  	mddev->private = conf;
  
  	/*
  	 * 0 for a fully functional array, 1 or 2 for a degraded array.
  	 */
c148ffdcd   NeilBrown   md/raid5: Allow d...
4723
4724
4725
  	list_for_each_entry(rdev, &mddev->disks, same_set) {
  		if (rdev->raid_disk < 0)
  			continue;
2f1158824   NeilBrown   md/raid5: add a m...
4726
  		if (test_bit(In_sync, &rdev->flags)) {
91adb5647   NeilBrown   md/raid5: refacto...
4727
  			working_disks++;
2f1158824   NeilBrown   md/raid5: add a m...
4728
4729
  			continue;
  		}
c148ffdcd   NeilBrown   md/raid5: Allow d...
4730
4731
4732
4733
4734
4735
4736
4737
4738
4739
4740
4741
4742
  		/* This disc is not fully in-sync.  However if it
  		 * just stored parity (beyond the recovery_offset),
  		 * when we don't need to be concerned about the
  		 * array being dirty.
  		 * When reshape goes 'backwards', we never have
  		 * partially completed devices, so we only need
  		 * to worry about reshape going forwards.
  		 */
  		/* Hack because v0.91 doesn't store recovery_offset properly. */
  		if (mddev->major_version == 0 &&
  		    mddev->minor_version > 90)
  			rdev->recovery_offset = reshape_offset;
  			
c148ffdcd   NeilBrown   md/raid5: Allow d...
4743
4744
4745
4746
4747
4748
4749
4750
4751
4752
4753
4754
4755
4756
4757
  		if (rdev->recovery_offset < reshape_offset) {
  			/* We need to check old and new layout */
  			if (!only_parity(rdev->raid_disk,
  					 conf->algorithm,
  					 conf->raid_disks,
  					 conf->max_degraded))
  				continue;
  		}
  		if (!only_parity(rdev->raid_disk,
  				 conf->prev_algo,
  				 conf->previous_raid_disks,
  				 conf->max_degraded))
  			continue;
  		dirty_parity_disks++;
  	}
91adb5647   NeilBrown   md/raid5: refacto...
4758

5e5e3e78e   NeilBrown   md: Fix handling ...
4759
4760
  	mddev->degraded = (max(conf->raid_disks, conf->previous_raid_disks)
  			   - working_disks);
91adb5647   NeilBrown   md/raid5: refacto...
4761

674806d62   NeilBrown   md/raid5: More ca...
4762
  	if (has_failed(conf)) {
0c55e0225   NeilBrown   md/raid5: improve...
4763
  		printk(KERN_ERR "md/raid:%s: not enough operational devices"
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
4764
4765
  			" (%d/%d failed)
  ",
02c2de8cc   NeilBrown   [PATCH] md: remov...
4766
  			mdname(mddev), mddev->degraded, conf->raid_disks);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
4767
4768
  		goto abort;
  	}
91adb5647   NeilBrown   md/raid5: refacto...
4769
  	/* device size must be a multiple of chunk size */
9d8f03636   Andre Noll   md: Make mddev->c...
4770
  	mddev->dev_sectors &= ~(mddev->chunk_sectors - 1);
91adb5647   NeilBrown   md/raid5: refacto...
4771
  	mddev->resync_max_sectors = mddev->dev_sectors;
c148ffdcd   NeilBrown   md/raid5: Allow d...
4772
  	if (mddev->degraded > dirty_parity_disks &&
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
4773
  	    mddev->recovery_cp != MaxSector) {
6ff8d8ec0   NeilBrown   [PATCH] md: allow...
4774
4775
  		if (mddev->ok_start_degraded)
  			printk(KERN_WARNING
0c55e0225   NeilBrown   md/raid5: improve...
4776
4777
4778
  			       "md/raid:%s: starting dirty degraded array"
  			       " - data corruption possible.
  ",
6ff8d8ec0   NeilBrown   [PATCH] md: allow...
4779
4780
4781
  			       mdname(mddev));
  		else {
  			printk(KERN_ERR
0c55e0225   NeilBrown   md/raid5: improve...
4782
4783
  			       "md/raid:%s: cannot start dirty degraded array.
  ",
6ff8d8ec0   NeilBrown   [PATCH] md: allow...
4784
4785
4786
  			       mdname(mddev));
  			goto abort;
  		}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
4787
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
4788
  	if (mddev->degraded == 0)
0c55e0225   NeilBrown   md/raid5: improve...
4789
4790
4791
  		printk(KERN_INFO "md/raid:%s: raid level %d active with %d out of %d"
  		       " devices, algorithm %d
  ", mdname(mddev), conf->level,
e183eaedd   NeilBrown   md/raid5: prepare...
4792
4793
  		       mddev->raid_disks-mddev->degraded, mddev->raid_disks,
  		       mddev->new_layout);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
4794
  	else
0c55e0225   NeilBrown   md/raid5: improve...
4795
4796
4797
4798
4799
4800
  		printk(KERN_ALERT "md/raid:%s: raid level %d active with %d"
  		       " out of %d devices, algorithm %d
  ",
  		       mdname(mddev), conf->level,
  		       mddev->raid_disks - mddev->degraded,
  		       mddev->raid_disks, mddev->new_layout);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
4801
4802
  
  	print_raid5_conf(conf);
fef9c61fd   NeilBrown   md/raid5: change ...
4803
  	if (conf->reshape_progress != MaxSector) {
fef9c61fd   NeilBrown   md/raid5: change ...
4804
  		conf->reshape_safe = conf->reshape_progress;
f67055780   NeilBrown   [PATCH] md: Check...
4805
4806
4807
4808
4809
4810
  		atomic_set(&conf->reshape_stripes, 0);
  		clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
  		clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
  		set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
  		set_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
  		mddev->sync_thread = md_register_thread(md_do_sync, mddev,
0da3c6194   NeilBrown   md: Improve name ...
4811
  							"reshape");
f67055780   NeilBrown   [PATCH] md: Check...
4812
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
4813
4814
  
  	/* Ok, everything is just fine now */
a64c876fd   NeilBrown   md: manage redund...
4815
4816
  	if (mddev->to_remove == &raid5_attrs_group)
  		mddev->to_remove = NULL;
00bcb4ac7   NeilBrown   md: reduce depend...
4817
4818
  	else if (mddev->kobj.sd &&
  	    sysfs_create_group(&mddev->kobj, &raid5_attrs_group))
5e55e2f5f   NeilBrown   [PATCH] md: conve...
4819
  		printk(KERN_WARNING
4a5add499   NeilBrown   raid5: Don't set ...
4820
4821
  		       "raid5: failed to create sysfs attributes for %s
  ",
5e55e2f5f   NeilBrown   [PATCH] md: conve...
4822
  		       mdname(mddev));
4a5add499   NeilBrown   raid5: Don't set ...
4823
  	md_set_array_sectors(mddev, raid5_size(mddev, 0, 0));
7a5febe9f   NeilBrown   [PATCH] md: set t...
4824

4a5add499   NeilBrown   raid5: Don't set ...
4825
  	if (mddev->queue) {
9f7c22200   NeilBrown   md/raid5: export ...
4826
  		int chunk_size;
4a5add499   NeilBrown   raid5: Don't set ...
4827
4828
4829
4830
4831
4832
4833
4834
4835
  		/* read-ahead size must cover two whole stripes, which
  		 * is 2 * (datadisks) * chunksize where 'n' is the
  		 * number of raid devices
  		 */
  		int data_disks = conf->previous_raid_disks - conf->max_degraded;
  		int stripe = data_disks *
  			((mddev->chunk_sectors << 9) / PAGE_SIZE);
  		if (mddev->queue->backing_dev_info.ra_pages < 2 * stripe)
  			mddev->queue->backing_dev_info.ra_pages = 2 * stripe;
91adb5647   NeilBrown   md/raid5: refacto...
4836

4a5add499   NeilBrown   raid5: Don't set ...
4837
  		blk_queue_merge_bvec(mddev->queue, raid5_mergeable_bvec);
f022b2fdd   NeilBrown   [PATCH] md: add a...
4838

11d8a6e37   NeilBrown   md/raid5: export ...
4839
4840
  		mddev->queue->backing_dev_info.congested_data = mddev;
  		mddev->queue->backing_dev_info.congested_fn = raid5_congested;
7a5febe9f   NeilBrown   [PATCH] md: set t...
4841

9f7c22200   NeilBrown   md/raid5: export ...
4842
4843
4844
4845
  		chunk_size = mddev->chunk_sectors << 9;
  		blk_queue_io_min(mddev->queue, chunk_size);
  		blk_queue_io_opt(mddev->queue, chunk_size *
  				 (conf->raid_disks - conf->max_degraded));
8f6c2e4b3   Martin K. Petersen   md: Use new topol...
4846

9f7c22200   NeilBrown   md/raid5: export ...
4847
4848
4849
4850
  		list_for_each_entry(rdev, &mddev->disks, same_set)
  			disk_stack_limits(mddev->gendisk, rdev->bdev,
  					  rdev->data_offset << 9);
  	}
23032a0eb   Raz Ben-Jehuda(caro)   [PATCH] md: defin...
4851

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
4852
4853
  	return 0;
  abort:
01f96c0a9   NeilBrown   md: Avoid waking ...
4854
  	md_unregister_thread(&mddev->thread);
e4f869d9d   NeilBrown   md/raid5: remove ...
4855
4856
  	print_raid5_conf(conf);
  	free_conf(conf);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
4857
  	mddev->private = NULL;
0c55e0225   NeilBrown   md/raid5: improve...
4858
4859
  	printk(KERN_ALERT "md/raid:%s: failed to run raid set.
  ", mdname(mddev));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
4860
4861
  	return -EIO;
  }
fd01b88c7   NeilBrown   md: remove typede...
4862
  static int stop(struct mddev *mddev)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
4863
  {
d1688a6d5   NeilBrown   md/raid5: typedef...
4864
  	struct r5conf *conf = mddev->private;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
4865

01f96c0a9   NeilBrown   md: Avoid waking ...
4866
  	md_unregister_thread(&mddev->thread);
11d8a6e37   NeilBrown   md/raid5: export ...
4867
4868
  	if (mddev->queue)
  		mddev->queue->backing_dev_info.congested_fn = NULL;
95fc17aac   Dan Williams   md/raid6: release...
4869
  	free_conf(conf);
a64c876fd   NeilBrown   md: manage redund...
4870
4871
  	mddev->private = NULL;
  	mddev->to_remove = &raid5_attrs_group;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
4872
4873
  	return 0;
  }
fd01b88c7   NeilBrown   md: remove typede...
4874
  static void status(struct seq_file *seq, struct mddev *mddev)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
4875
  {
d1688a6d5   NeilBrown   md/raid5: typedef...
4876
  	struct r5conf *conf = mddev->private;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
4877
  	int i;
9d8f03636   Andre Noll   md: Make mddev->c...
4878
4879
  	seq_printf(seq, " level %d, %dk chunk, algorithm %d", mddev->level,
  		mddev->chunk_sectors / 2, mddev->layout);
02c2de8cc   NeilBrown   [PATCH] md: remov...
4880
  	seq_printf (seq, " [%d/%d] [", conf->raid_disks, conf->raid_disks - mddev->degraded);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
4881
4882
4883
  	for (i = 0; i < conf->raid_disks; i++)
  		seq_printf (seq, "%s",
  			       conf->disks[i].rdev &&
b2d444d7a   NeilBrown   [PATCH] md: conve...
4884
  			       test_bit(In_sync, &conf->disks[i].rdev->flags) ? "U" : "_");
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
4885
  	seq_printf (seq, "]");
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
4886
  }
d1688a6d5   NeilBrown   md/raid5: typedef...
4887
  static void print_raid5_conf (struct r5conf *conf)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
4888
4889
4890
  {
  	int i;
  	struct disk_info *tmp;
0c55e0225   NeilBrown   md/raid5: improve...
4891
4892
  	printk(KERN_DEBUG "RAID conf printout:
  ");
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
4893
4894
4895
4896
4897
  	if (!conf) {
  		printk("(conf==NULL)
  ");
  		return;
  	}
0c55e0225   NeilBrown   md/raid5: improve...
4898
4899
4900
4901
  	printk(KERN_DEBUG " --- level:%d rd:%d wd:%d
  ", conf->level,
  	       conf->raid_disks,
  	       conf->raid_disks - conf->mddev->degraded);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
4902
4903
4904
4905
4906
  
  	for (i = 0; i < conf->raid_disks; i++) {
  		char b[BDEVNAME_SIZE];
  		tmp = conf->disks + i;
  		if (tmp->rdev)
0c55e0225   NeilBrown   md/raid5: improve...
4907
4908
4909
4910
  			printk(KERN_DEBUG " disk %d, o:%d, dev:%s
  ",
  			       i, !test_bit(Faulty, &tmp->rdev->flags),
  			       bdevname(tmp->rdev->bdev, b));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
4911
4912
  	}
  }
fd01b88c7   NeilBrown   md: remove typede...
4913
  static int raid5_spare_active(struct mddev *mddev)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
4914
4915
  {
  	int i;
d1688a6d5   NeilBrown   md/raid5: typedef...
4916
  	struct r5conf *conf = mddev->private;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
4917
  	struct disk_info *tmp;
6b9656205   NeilBrown   md: provide appro...
4918
4919
  	int count = 0;
  	unsigned long flags;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
4920
4921
4922
4923
  
  	for (i = 0; i < conf->raid_disks; i++) {
  		tmp = conf->disks + i;
  		if (tmp->rdev
70fffd0bf   NeilBrown   md: Don't update ...
4924
  		    && tmp->rdev->recovery_offset == MaxSector
b2d444d7a   NeilBrown   [PATCH] md: conve...
4925
  		    && !test_bit(Faulty, &tmp->rdev->flags)
c04be0aa8   NeilBrown   [PATCH] md: Impro...
4926
  		    && !test_and_set_bit(In_sync, &tmp->rdev->flags)) {
6b9656205   NeilBrown   md: provide appro...
4927
  			count++;
43c73ca43   Jonathan Brassow   md/raid5: use sys...
4928
  			sysfs_notify_dirent_safe(tmp->rdev->sysfs_state);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
4929
4930
  		}
  	}
6b9656205   NeilBrown   md: provide appro...
4931
4932
4933
  	spin_lock_irqsave(&conf->device_lock, flags);
  	mddev->degraded -= count;
  	spin_unlock_irqrestore(&conf->device_lock, flags);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
4934
  	print_raid5_conf(conf);
6b9656205   NeilBrown   md: provide appro...
4935
  	return count;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
4936
  }
fd01b88c7   NeilBrown   md: remove typede...
4937
  static int raid5_remove_disk(struct mddev *mddev, int number)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
4938
  {
d1688a6d5   NeilBrown   md/raid5: typedef...
4939
  	struct r5conf *conf = mddev->private;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
4940
  	int err = 0;
3cb030020   NeilBrown   md: removing type...
4941
  	struct md_rdev *rdev;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
4942
4943
4944
4945
4946
  	struct disk_info *p = conf->disks + number;
  
  	print_raid5_conf(conf);
  	rdev = p->rdev;
  	if (rdev) {
ec32a2bd3   NeilBrown   md: allow number ...
4947
4948
4949
  		if (number >= conf->raid_disks &&
  		    conf->reshape_progress == MaxSector)
  			clear_bit(In_sync, &rdev->flags);
b2d444d7a   NeilBrown   [PATCH] md: conve...
4950
  		if (test_bit(In_sync, &rdev->flags) ||
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
4951
4952
4953
4954
  		    atomic_read(&rdev->nr_pending)) {
  			err = -EBUSY;
  			goto abort;
  		}
dfc706450   NeilBrown   md: restart recov...
4955
4956
4957
4958
  		/* Only remove non-faulty devices if recovery
  		 * isn't possible.
  		 */
  		if (!test_bit(Faulty, &rdev->flags) &&
7f0da59bd   NeilBrown   md/raid5: use bad...
4959
  		    mddev->recovery_disabled != conf->recovery_disabled &&
674806d62   NeilBrown   md/raid5: More ca...
4960
  		    !has_failed(conf) &&
ec32a2bd3   NeilBrown   md: allow number ...
4961
  		    number < conf->raid_disks) {
dfc706450   NeilBrown   md: restart recov...
4962
4963
4964
  			err = -EBUSY;
  			goto abort;
  		}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
4965
  		p->rdev = NULL;
fbd568a3e   Paul E. McKenney   [PATCH] Change sy...
4966
  		synchronize_rcu();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
4967
4968
4969
4970
4971
4972
4973
4974
4975
4976
4977
  		if (atomic_read(&rdev->nr_pending)) {
  			/* lost the race, try later */
  			err = -EBUSY;
  			p->rdev = rdev;
  		}
  	}
  abort:
  
  	print_raid5_conf(conf);
  	return err;
  }
fd01b88c7   NeilBrown   md: remove typede...
4978
  static int raid5_add_disk(struct mddev *mddev, struct md_rdev *rdev)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
4979
  {
d1688a6d5   NeilBrown   md/raid5: typedef...
4980
  	struct r5conf *conf = mddev->private;
199050ea1   Neil Brown   rationalise retur...
4981
  	int err = -EEXIST;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
4982
4983
  	int disk;
  	struct disk_info *p;
6c2fce2ef   Neil Brown   Support adding a ...
4984
4985
  	int first = 0;
  	int last = conf->raid_disks - 1;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
4986

7f0da59bd   NeilBrown   md/raid5: use bad...
4987
4988
  	if (mddev->recovery_disabled == conf->recovery_disabled)
  		return -EBUSY;
674806d62   NeilBrown   md/raid5: More ca...
4989
  	if (has_failed(conf))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
4990
  		/* no point adding a device */
199050ea1   Neil Brown   rationalise retur...
4991
  		return -EINVAL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
4992

6c2fce2ef   Neil Brown   Support adding a ...
4993
4994
  	if (rdev->raid_disk >= 0)
  		first = last = rdev->raid_disk;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
4995
4996
  
  	/*
16a53ecc3   NeilBrown   [PATCH] md: merge...
4997
4998
  	 * find the disk ... but prefer rdev->saved_raid_disk
  	 * if possible.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
4999
  	 */
16a53ecc3   NeilBrown   [PATCH] md: merge...
5000
  	if (rdev->saved_raid_disk >= 0 &&
6c2fce2ef   Neil Brown   Support adding a ...
5001
  	    rdev->saved_raid_disk >= first &&
16a53ecc3   NeilBrown   [PATCH] md: merge...
5002
5003
5004
  	    conf->disks[rdev->saved_raid_disk].rdev == NULL)
  		disk = rdev->saved_raid_disk;
  	else
6c2fce2ef   Neil Brown   Support adding a ...
5005
5006
  		disk = first;
  	for ( ; disk <= last ; disk++)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
5007
  		if ((p=conf->disks + disk)->rdev == NULL) {
b2d444d7a   NeilBrown   [PATCH] md: conve...
5008
  			clear_bit(In_sync, &rdev->flags);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
5009
  			rdev->raid_disk = disk;
199050ea1   Neil Brown   rationalise retur...
5010
  			err = 0;
72626685d   NeilBrown   [PATCH] md: add w...
5011
5012
  			if (rdev->saved_raid_disk != disk)
  				conf->fullsync = 1;
d6065f7bf   Suzanne Wood   [PATCH] md: provi...
5013
  			rcu_assign_pointer(p->rdev, rdev);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
5014
5015
5016
  			break;
  		}
  	print_raid5_conf(conf);
199050ea1   Neil Brown   rationalise retur...
5017
  	return err;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
5018
  }
fd01b88c7   NeilBrown   md: remove typede...
5019
  static int raid5_resize(struct mddev *mddev, sector_t sectors)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
5020
5021
5022
5023
5024
5025
5026
5027
  {
  	/* no resync is happening, and there is enough space
  	 * on all devices, so we can resize.
  	 * We need to make sure resync covers any new space.
  	 * If the array is shrinking we should possibly wait until
  	 * any io in the removed space completes, but it hardly seems
  	 * worth it.
  	 */
9d8f03636   Andre Noll   md: Make mddev->c...
5028
  	sectors &= ~((sector_t)mddev->chunk_sectors - 1);
1f403624b   Dan Williams   md: centralize ->...
5029
5030
  	md_set_array_sectors(mddev, raid5_size(mddev, sectors,
  					       mddev->raid_disks));
b522adcde   Dan Williams   md: 'array_size' ...
5031
5032
5033
  	if (mddev->array_sectors >
  	    raid5_size(mddev, sectors, mddev->raid_disks))
  		return -EINVAL;
f233ea5c9   Andre Noll   md: Make mddev->a...
5034
  	set_capacity(mddev->gendisk, mddev->array_sectors);
449aad3e2   NeilBrown   md: Use revalidat...
5035
  	revalidate_disk(mddev->gendisk);
b098636cf   NeilBrown   md: allow resync_...
5036
5037
  	if (sectors > mddev->dev_sectors &&
  	    mddev->recovery_cp > mddev->dev_sectors) {
58c0fed40   Andre Noll   md: Make mddev->s...
5038
  		mddev->recovery_cp = mddev->dev_sectors;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
5039
5040
  		set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
  	}
58c0fed40   Andre Noll   md: Make mddev->s...
5041
  	mddev->dev_sectors = sectors;
4b5c7ae83   NeilBrown   [PATCH] md: when ...
5042
  	mddev->resync_max_sectors = sectors;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
5043
5044
  	return 0;
  }
fd01b88c7   NeilBrown   md: remove typede...
5045
  static int check_stripe_cache(struct mddev *mddev)
01ee22b49   NeilBrown   md: raid5: check ...
5046
5047
5048
5049
5050
5051
5052
5053
5054
  {
  	/* Can only proceed if there are plenty of stripe_heads.
  	 * We need a minimum of one full stripe,, and for sensible progress
  	 * it is best to have about 4 times that.
  	 * If we require 4 times, then the default 256 4K stripe_heads will
  	 * allow for chunk sizes up to 256K, which is probably OK.
  	 * If the chunk size is greater, user-space should request more
  	 * stripe_heads first.
  	 */
d1688a6d5   NeilBrown   md/raid5: typedef...
5055
  	struct r5conf *conf = mddev->private;
01ee22b49   NeilBrown   md: raid5: check ...
5056
5057
5058
5059
  	if (((mddev->chunk_sectors << 9) / STRIPE_SIZE) * 4
  	    > conf->max_nr_stripes ||
  	    ((mddev->new_chunk_sectors << 9) / STRIPE_SIZE) * 4
  	    > conf->max_nr_stripes) {
0c55e0225   NeilBrown   md/raid5: improve...
5060
5061
5062
  		printk(KERN_WARNING "md/raid:%s: reshape: not enough stripes.  Needed %lu
  ",
  		       mdname(mddev),
01ee22b49   NeilBrown   md: raid5: check ...
5063
5064
5065
5066
5067
5068
  		       ((max(mddev->chunk_sectors, mddev->new_chunk_sectors) << 9)
  			/ STRIPE_SIZE)*4);
  		return 0;
  	}
  	return 1;
  }
fd01b88c7   NeilBrown   md: remove typede...
5069
  static int check_reshape(struct mddev *mddev)
292695531   NeilBrown   [PATCH] md: Final...
5070
  {
d1688a6d5   NeilBrown   md/raid5: typedef...
5071
  	struct r5conf *conf = mddev->private;
292695531   NeilBrown   [PATCH] md: Final...
5072

88ce4930e   NeilBrown   md/raid5: allow l...
5073
5074
  	if (mddev->delta_disks == 0 &&
  	    mddev->new_layout == mddev->layout &&
664e7c413   Andre Noll   md: Convert mddev...
5075
  	    mddev->new_chunk_sectors == mddev->chunk_sectors)
50ac168a6   NeilBrown   md: merge reconfi...
5076
  		return 0; /* nothing to do */
dba034eef   NeilBrown   Fail safely when ...
5077
5078
5079
  	if (mddev->bitmap)
  		/* Cannot grow a bitmap yet */
  		return -EBUSY;
674806d62   NeilBrown   md/raid5: More ca...
5080
  	if (has_failed(conf))
ec32a2bd3   NeilBrown   md: allow number ...
5081
5082
5083
5084
5085
5086
5087
5088
5089
5090
5091
5092
5093
  		return -EINVAL;
  	if (mddev->delta_disks < 0) {
  		/* We might be able to shrink, but the devices must
  		 * be made bigger first.
  		 * For raid6, 4 is the minimum size.
  		 * Otherwise 2 is the minimum
  		 */
  		int min = 2;
  		if (mddev->level == 6)
  			min = 4;
  		if (mddev->raid_disks + mddev->delta_disks < min)
  			return -EINVAL;
  	}
292695531   NeilBrown   [PATCH] md: Final...
5094

01ee22b49   NeilBrown   md: raid5: check ...
5095
  	if (!check_stripe_cache(mddev))
292695531   NeilBrown   [PATCH] md: Final...
5096
  		return -ENOSPC;
292695531   NeilBrown   [PATCH] md: Final...
5097

ec32a2bd3   NeilBrown   md: allow number ...
5098
  	return resize_stripes(conf, conf->raid_disks + mddev->delta_disks);
63c70c4f3   NeilBrown   [PATCH] md: Split...
5099
  }
fd01b88c7   NeilBrown   md: remove typede...
5100
  static int raid5_start_reshape(struct mddev *mddev)
63c70c4f3   NeilBrown   [PATCH] md: Split...
5101
  {
d1688a6d5   NeilBrown   md/raid5: typedef...
5102
  	struct r5conf *conf = mddev->private;
3cb030020   NeilBrown   md: removing type...
5103
  	struct md_rdev *rdev;
63c70c4f3   NeilBrown   [PATCH] md: Split...
5104
  	int spares = 0;
c04be0aa8   NeilBrown   [PATCH] md: Impro...
5105
  	unsigned long flags;
63c70c4f3   NeilBrown   [PATCH] md: Split...
5106

f416885ef   NeilBrown   [PATCH] md: add s...
5107
  	if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
63c70c4f3   NeilBrown   [PATCH] md: Split...
5108
  		return -EBUSY;
01ee22b49   NeilBrown   md: raid5: check ...
5109
5110
  	if (!check_stripe_cache(mddev))
  		return -ENOSPC;
159ec1fc0   Cheng Renquan   md: use list_for_...
5111
  	list_for_each_entry(rdev, &mddev->disks, same_set)
469518a34   NeilBrown   md: fix the test ...
5112
5113
  		if (!test_bit(In_sync, &rdev->flags)
  		    && !test_bit(Faulty, &rdev->flags))
292695531   NeilBrown   [PATCH] md: Final...
5114
  			spares++;
63c70c4f3   NeilBrown   [PATCH] md: Split...
5115

f416885ef   NeilBrown   [PATCH] md: add s...
5116
  	if (spares - mddev->degraded < mddev->delta_disks - conf->max_degraded)
292695531   NeilBrown   [PATCH] md: Final...
5117
5118
5119
5120
  		/* Not enough devices even to make a degraded array
  		 * of that size
  		 */
  		return -EINVAL;
ec32a2bd3   NeilBrown   md: allow number ...
5121
5122
5123
5124
5125
5126
  	/* Refuse to reduce size of the array.  Any reductions in
  	 * array size must be through explicit setting of array_size
  	 * attribute.
  	 */
  	if (raid5_size(mddev, 0, conf->raid_disks + mddev->delta_disks)
  	    < mddev->array_sectors) {
0c55e0225   NeilBrown   md/raid5: improve...
5127
  		printk(KERN_ERR "md/raid:%s: array size must be reduced "
ec32a2bd3   NeilBrown   md: allow number ...
5128
5129
5130
5131
  		       "before number of disks
  ", mdname(mddev));
  		return -EINVAL;
  	}
f67055780   NeilBrown   [PATCH] md: Check...
5132
  	atomic_set(&conf->reshape_stripes, 0);
292695531   NeilBrown   [PATCH] md: Final...
5133
5134
  	spin_lock_irq(&conf->device_lock);
  	conf->previous_raid_disks = conf->raid_disks;
63c70c4f3   NeilBrown   [PATCH] md: Split...
5135
  	conf->raid_disks += mddev->delta_disks;
09c9e5fa1   Andre Noll   md: convert conf-...
5136
5137
  	conf->prev_chunk_sectors = conf->chunk_sectors;
  	conf->chunk_sectors = mddev->new_chunk_sectors;
88ce4930e   NeilBrown   md/raid5: allow l...
5138
5139
  	conf->prev_algo = conf->algorithm;
  	conf->algorithm = mddev->new_layout;
fef9c61fd   NeilBrown   md/raid5: change ...
5140
5141
5142
5143
5144
  	if (mddev->delta_disks < 0)
  		conf->reshape_progress = raid5_size(mddev, 0, 0);
  	else
  		conf->reshape_progress = 0;
  	conf->reshape_safe = conf->reshape_progress;
86b42c713   NeilBrown   md/raid5: clearly...
5145
  	conf->generation++;
292695531   NeilBrown   [PATCH] md: Final...
5146
5147
5148
5149
  	spin_unlock_irq(&conf->device_lock);
  
  	/* Add some new drives, as many as will fit.
  	 * We know there are enough to make the newly sized array work.
3424bf6a7   NeilBrown   md/raid5: don't i...
5150
5151
5152
5153
  	 * Don't add devices if we are reducing the number of
  	 * devices in the array.  This is because it is not possible
  	 * to correctly record the "partially reconstructed" state of
  	 * such devices during the reshape and confusion could result.
292695531   NeilBrown   [PATCH] md: Final...
5154
  	 */
87a8dec91   NeilBrown   md: simplify some...
5155
5156
5157
5158
5159
5160
  	if (mddev->delta_disks >= 0) {
  		int added_devices = 0;
  		list_for_each_entry(rdev, &mddev->disks, same_set)
  			if (rdev->raid_disk < 0 &&
  			    !test_bit(Faulty, &rdev->flags)) {
  				if (raid5_add_disk(mddev, rdev) == 0) {
87a8dec91   NeilBrown   md: simplify some...
5161
5162
5163
5164
5165
5166
  					if (rdev->raid_disk
  					    >= conf->previous_raid_disks) {
  						set_bit(In_sync, &rdev->flags);
  						added_devices++;
  					} else
  						rdev->recovery_offset = 0;
36fad858a   Namhyung Kim   md: introduce lin...
5167
5168
  
  					if (sysfs_link_rdev(mddev, rdev))
87a8dec91   NeilBrown   md: simplify some...
5169
  						/* Failure here is OK */;
50da08409   NeilBrown   md: don't abort c...
5170
  				}
87a8dec91   NeilBrown   md: simplify some...
5171
5172
5173
5174
5175
5176
  			} else if (rdev->raid_disk >= conf->previous_raid_disks
  				   && !test_bit(Faulty, &rdev->flags)) {
  				/* This is a spare that was manually added */
  				set_bit(In_sync, &rdev->flags);
  				added_devices++;
  			}
292695531   NeilBrown   [PATCH] md: Final...
5177

87a8dec91   NeilBrown   md: simplify some...
5178
5179
5180
5181
  		/* When a reshape changes the number of devices,
  		 * ->degraded is measured against the larger of the
  		 * pre and post number of devices.
  		 */
ec32a2bd3   NeilBrown   md: allow number ...
5182
  		spin_lock_irqsave(&conf->device_lock, flags);
9eb07c259   NeilBrown   md: fix 'degraded...
5183
  		mddev->degraded += (conf->raid_disks - conf->previous_raid_disks)
ec32a2bd3   NeilBrown   md: allow number ...
5184
5185
5186
  			- added_devices;
  		spin_unlock_irqrestore(&conf->device_lock, flags);
  	}
63c70c4f3   NeilBrown   [PATCH] md: Split...
5187
  	mddev->raid_disks = conf->raid_disks;
e516402c0   NeilBrown   md/raid5: set res...
5188
  	mddev->reshape_position = conf->reshape_progress;
850b2b420   NeilBrown   [PATCH] md: repla...
5189
  	set_bit(MD_CHANGE_DEVS, &mddev->flags);
f67055780   NeilBrown   [PATCH] md: Check...
5190

292695531   NeilBrown   [PATCH] md: Final...
5191
5192
5193
5194
5195
  	clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
  	clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
  	set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
  	set_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
  	mddev->sync_thread = md_register_thread(md_do_sync, mddev,
0da3c6194   NeilBrown   md: Improve name ...
5196
  						"reshape");
292695531   NeilBrown   [PATCH] md: Final...
5197
5198
5199
5200
  	if (!mddev->sync_thread) {
  		mddev->recovery = 0;
  		spin_lock_irq(&conf->device_lock);
  		mddev->raid_disks = conf->raid_disks = conf->previous_raid_disks;
fef9c61fd   NeilBrown   md/raid5: change ...
5201
  		conf->reshape_progress = MaxSector;
292695531   NeilBrown   [PATCH] md: Final...
5202
5203
5204
  		spin_unlock_irq(&conf->device_lock);
  		return -EAGAIN;
  	}
c8f517c44   NeilBrown   md/raid5 revise r...
5205
  	conf->reshape_checkpoint = jiffies;
292695531   NeilBrown   [PATCH] md: Final...
5206
5207
5208
5209
  	md_wakeup_thread(mddev->sync_thread);
  	md_new_event(mddev);
  	return 0;
  }
292695531   NeilBrown   [PATCH] md: Final...
5210

ec32a2bd3   NeilBrown   md: allow number ...
5211
5212
5213
  /* This is called from the reshape thread and should make any
   * changes needed in 'conf'
   */
d1688a6d5   NeilBrown   md/raid5: typedef...
5214
  static void end_reshape(struct r5conf *conf)
292695531   NeilBrown   [PATCH] md: Final...
5215
  {
292695531   NeilBrown   [PATCH] md: Final...
5216

f67055780   NeilBrown   [PATCH] md: Check...
5217
  	if (!test_bit(MD_RECOVERY_INTR, &conf->mddev->recovery)) {
f67055780   NeilBrown   [PATCH] md: Check...
5218

f67055780   NeilBrown   [PATCH] md: Check...
5219
  		spin_lock_irq(&conf->device_lock);
cea9c2280   NeilBrown   md: add explicit ...
5220
  		conf->previous_raid_disks = conf->raid_disks;
fef9c61fd   NeilBrown   md/raid5: change ...
5221
  		conf->reshape_progress = MaxSector;
f67055780   NeilBrown   [PATCH] md: Check...
5222
  		spin_unlock_irq(&conf->device_lock);
b0f9ec047   NeilBrown   md/raid5: minor c...
5223
  		wake_up(&conf->wait_for_overlap);
16a53ecc3   NeilBrown   [PATCH] md: merge...
5224
5225
5226
5227
  
  		/* read-ahead size must cover two whole stripes, which is
  		 * 2 * (datadisks) * chunksize where 'n' is the number of raid devices
  		 */
4a5add499   NeilBrown   raid5: Don't set ...
5228
  		if (conf->mddev->queue) {
cea9c2280   NeilBrown   md: add explicit ...
5229
  			int data_disks = conf->raid_disks - conf->max_degraded;
09c9e5fa1   Andre Noll   md: convert conf-...
5230
  			int stripe = data_disks * ((conf->chunk_sectors << 9)
cea9c2280   NeilBrown   md: add explicit ...
5231
  						   / PAGE_SIZE);
16a53ecc3   NeilBrown   [PATCH] md: merge...
5232
5233
5234
  			if (conf->mddev->queue->backing_dev_info.ra_pages < 2 * stripe)
  				conf->mddev->queue->backing_dev_info.ra_pages = 2 * stripe;
  		}
292695531   NeilBrown   [PATCH] md: Final...
5235
  	}
292695531   NeilBrown   [PATCH] md: Final...
5236
  }
ec32a2bd3   NeilBrown   md: allow number ...
5237
5238
5239
  /* This is called from the raid5d thread with mddev_lock held.
   * It makes config changes to the device.
   */
fd01b88c7   NeilBrown   md: remove typede...
5240
  static void raid5_finish_reshape(struct mddev *mddev)
cea9c2280   NeilBrown   md: add explicit ...
5241
  {
d1688a6d5   NeilBrown   md/raid5: typedef...
5242
  	struct r5conf *conf = mddev->private;
cea9c2280   NeilBrown   md: add explicit ...
5243
5244
  
  	if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
ec32a2bd3   NeilBrown   md: allow number ...
5245
5246
5247
  		if (mddev->delta_disks > 0) {
  			md_set_array_sectors(mddev, raid5_size(mddev, 0, 0));
  			set_capacity(mddev->gendisk, mddev->array_sectors);
449aad3e2   NeilBrown   md: Use revalidat...
5248
  			revalidate_disk(mddev->gendisk);
ec32a2bd3   NeilBrown   md: allow number ...
5249
5250
  		} else {
  			int d;
ec32a2bd3   NeilBrown   md: allow number ...
5251
5252
5253
5254
5255
5256
5257
5258
  			mddev->degraded = conf->raid_disks;
  			for (d = 0; d < conf->raid_disks ; d++)
  				if (conf->disks[d].rdev &&
  				    test_bit(In_sync,
  					     &conf->disks[d].rdev->flags))
  					mddev->degraded--;
  			for (d = conf->raid_disks ;
  			     d < conf->raid_disks - mddev->delta_disks;
1a67dde0a   NeilBrown   md/raid5: Properl...
5259
  			     d++) {
3cb030020   NeilBrown   md: removing type...
5260
  				struct md_rdev *rdev = conf->disks[d].rdev;
1a67dde0a   NeilBrown   md/raid5: Properl...
5261
  				if (rdev && raid5_remove_disk(mddev, d) == 0) {
36fad858a   Namhyung Kim   md: introduce lin...
5262
  					sysfs_unlink_rdev(mddev, rdev);
1a67dde0a   NeilBrown   md/raid5: Properl...
5263
5264
5265
  					rdev->raid_disk = -1;
  				}
  			}
cea9c2280   NeilBrown   md: add explicit ...
5266
  		}
88ce4930e   NeilBrown   md/raid5: allow l...
5267
  		mddev->layout = conf->algorithm;
09c9e5fa1   Andre Noll   md: convert conf-...
5268
  		mddev->chunk_sectors = conf->chunk_sectors;
ec32a2bd3   NeilBrown   md: allow number ...
5269
5270
  		mddev->reshape_position = MaxSector;
  		mddev->delta_disks = 0;
cea9c2280   NeilBrown   md: add explicit ...
5271
5272
  	}
  }
fd01b88c7   NeilBrown   md: remove typede...
5273
  static void raid5_quiesce(struct mddev *mddev, int state)
72626685d   NeilBrown   [PATCH] md: add w...
5274
  {
d1688a6d5   NeilBrown   md/raid5: typedef...
5275
  	struct r5conf *conf = mddev->private;
72626685d   NeilBrown   [PATCH] md: add w...
5276
5277
  
  	switch(state) {
e464eafdb   NeilBrown   [PATCH] md: Suppo...
5278
5279
5280
  	case 2: /* resume for a suspend */
  		wake_up(&conf->wait_for_overlap);
  		break;
72626685d   NeilBrown   [PATCH] md: add w...
5281
5282
  	case 1: /* stop all writes */
  		spin_lock_irq(&conf->device_lock);
64bd660b5   NeilBrown   md: allow raid5_q...
5283
5284
5285
5286
  		/* '2' tells resync/reshape to pause so that all
  		 * active stripes can drain
  		 */
  		conf->quiesce = 2;
72626685d   NeilBrown   [PATCH] md: add w...
5287
  		wait_event_lock_irq(conf->wait_for_stripe,
46031f9a3   Raz Ben-Jehuda(caro)   [PATCH] md: allow...
5288
5289
  				    atomic_read(&conf->active_stripes) == 0 &&
  				    atomic_read(&conf->active_aligned_reads) == 0,
72626685d   NeilBrown   [PATCH] md: add w...
5290
  				    conf->device_lock, /* nothing */);
64bd660b5   NeilBrown   md: allow raid5_q...
5291
  		conf->quiesce = 1;
72626685d   NeilBrown   [PATCH] md: add w...
5292
  		spin_unlock_irq(&conf->device_lock);
64bd660b5   NeilBrown   md: allow raid5_q...
5293
5294
  		/* allow reshape to continue */
  		wake_up(&conf->wait_for_overlap);
72626685d   NeilBrown   [PATCH] md: add w...
5295
5296
5297
5298
5299
5300
  		break;
  
  	case 0: /* re-enable writes */
  		spin_lock_irq(&conf->device_lock);
  		conf->quiesce = 0;
  		wake_up(&conf->wait_for_stripe);
e464eafdb   NeilBrown   [PATCH] md: Suppo...
5301
  		wake_up(&conf->wait_for_overlap);
72626685d   NeilBrown   [PATCH] md: add w...
5302
5303
5304
  		spin_unlock_irq(&conf->device_lock);
  		break;
  	}
72626685d   NeilBrown   [PATCH] md: add w...
5305
  }
b15c2e57f   NeilBrown   [PATCH] md: move ...
5306

d562b0c43   NeilBrown   md: add ->takeove...
5307

fd01b88c7   NeilBrown   md: remove typede...
5308
  static void *raid45_takeover_raid0(struct mddev *mddev, int level)
54071b380   Trela Maciej   md:Add support fo...
5309
  {
e373ab109   NeilBrown   md/raid0: typedef...
5310
  	struct r0conf *raid0_conf = mddev->private;
d76c8420c   Randy Dunlap   raid5: fix build ...
5311
  	sector_t sectors;
54071b380   Trela Maciej   md:Add support fo...
5312

f1b29bcae   Dan Williams   md/raid4: permit ...
5313
  	/* for raid0 takeover only one zone is supported */
e373ab109   NeilBrown   md/raid0: typedef...
5314
  	if (raid0_conf->nr_strip_zones > 1) {
0c55e0225   NeilBrown   md/raid5: improve...
5315
5316
5317
  		printk(KERN_ERR "md/raid:%s: cannot takeover raid0 with more than one zone.
  ",
  		       mdname(mddev));
f1b29bcae   Dan Williams   md/raid4: permit ...
5318
5319
  		return ERR_PTR(-EINVAL);
  	}
e373ab109   NeilBrown   md/raid0: typedef...
5320
5321
  	sectors = raid0_conf->strip_zone[0].zone_end;
  	sector_div(sectors, raid0_conf->strip_zone[0].nb_dev);
3b71bd933   NeilBrown   md: Fix dev_secto...
5322
  	mddev->dev_sectors = sectors;
f1b29bcae   Dan Williams   md/raid4: permit ...
5323
  	mddev->new_level = level;
54071b380   Trela Maciej   md:Add support fo...
5324
5325
5326
5327
5328
5329
5330
5331
5332
  	mddev->new_layout = ALGORITHM_PARITY_N;
  	mddev->new_chunk_sectors = mddev->chunk_sectors;
  	mddev->raid_disks += 1;
  	mddev->delta_disks = 1;
  	/* make sure it will be not marked as dirty */
  	mddev->recovery_cp = MaxSector;
  
  	return setup_conf(mddev);
  }
fd01b88c7   NeilBrown   md: remove typede...
5333
  static void *raid5_takeover_raid1(struct mddev *mddev)
d562b0c43   NeilBrown   md: add ->takeove...
5334
5335
5336
5337
5338
5339
5340
5341
5342
5343
5344
5345
5346
5347
5348
5349
5350
5351
5352
5353
5354
  {
  	int chunksect;
  
  	if (mddev->raid_disks != 2 ||
  	    mddev->degraded > 1)
  		return ERR_PTR(-EINVAL);
  
  	/* Should check if there are write-behind devices? */
  
  	chunksect = 64*2; /* 64K by default */
  
  	/* The array must be an exact multiple of chunksize */
  	while (chunksect && (mddev->array_sectors & (chunksect-1)))
  		chunksect >>= 1;
  
  	if ((chunksect<<9) < STRIPE_SIZE)
  		/* array size does not allow a suitable chunk size */
  		return ERR_PTR(-EINVAL);
  
  	mddev->new_level = 5;
  	mddev->new_layout = ALGORITHM_LEFT_SYMMETRIC;
664e7c413   Andre Noll   md: Convert mddev...
5355
  	mddev->new_chunk_sectors = chunksect;
d562b0c43   NeilBrown   md: add ->takeove...
5356
5357
5358
  
  	return setup_conf(mddev);
  }
fd01b88c7   NeilBrown   md: remove typede...
5359
  static void *raid5_takeover_raid6(struct mddev *mddev)
fc9739c6d   NeilBrown   md: add takeover ...
5360
5361
5362
5363
5364
5365
5366
5367
5368
5369
5370
5371
5372
5373
5374
5375
5376
5377
5378
5379
5380
5381
5382
5383
5384
5385
5386
5387
5388
5389
5390
  {
  	int new_layout;
  
  	switch (mddev->layout) {
  	case ALGORITHM_LEFT_ASYMMETRIC_6:
  		new_layout = ALGORITHM_LEFT_ASYMMETRIC;
  		break;
  	case ALGORITHM_RIGHT_ASYMMETRIC_6:
  		new_layout = ALGORITHM_RIGHT_ASYMMETRIC;
  		break;
  	case ALGORITHM_LEFT_SYMMETRIC_6:
  		new_layout = ALGORITHM_LEFT_SYMMETRIC;
  		break;
  	case ALGORITHM_RIGHT_SYMMETRIC_6:
  		new_layout = ALGORITHM_RIGHT_SYMMETRIC;
  		break;
  	case ALGORITHM_PARITY_0_6:
  		new_layout = ALGORITHM_PARITY_0;
  		break;
  	case ALGORITHM_PARITY_N:
  		new_layout = ALGORITHM_PARITY_N;
  		break;
  	default:
  		return ERR_PTR(-EINVAL);
  	}
  	mddev->new_level = 5;
  	mddev->new_layout = new_layout;
  	mddev->delta_disks = -1;
  	mddev->raid_disks -= 1;
  	return setup_conf(mddev);
  }
d562b0c43   NeilBrown   md: add ->takeove...
5391

fd01b88c7   NeilBrown   md: remove typede...
5392
  static int raid5_check_reshape(struct mddev *mddev)
b35460352   NeilBrown   md/raid5: allow l...
5393
  {
88ce4930e   NeilBrown   md/raid5: allow l...
5394
5395
5396
5397
  	/* For a 2-drive array, the layout and chunk size can be changed
  	 * immediately as not restriping is needed.
  	 * For larger arrays we record the new value - after validation
  	 * to be used by a reshape pass.
b35460352   NeilBrown   md/raid5: allow l...
5398
  	 */
d1688a6d5   NeilBrown   md/raid5: typedef...
5399
  	struct r5conf *conf = mddev->private;
597a711b6   NeilBrown   md: remove unnece...
5400
  	int new_chunk = mddev->new_chunk_sectors;
b35460352   NeilBrown   md/raid5: allow l...
5401

597a711b6   NeilBrown   md: remove unnece...
5402
  	if (mddev->new_layout >= 0 && !algorithm_valid_raid5(mddev->new_layout))
b35460352   NeilBrown   md/raid5: allow l...
5403
5404
  		return -EINVAL;
  	if (new_chunk > 0) {
0ba459d26   Andre Noll   md/raid5: Use is_...
5405
  		if (!is_power_of_2(new_chunk))
b35460352   NeilBrown   md/raid5: allow l...
5406
  			return -EINVAL;
597a711b6   NeilBrown   md: remove unnece...
5407
  		if (new_chunk < (PAGE_SIZE>>9))
b35460352   NeilBrown   md/raid5: allow l...
5408
  			return -EINVAL;
597a711b6   NeilBrown   md: remove unnece...
5409
  		if (mddev->array_sectors & (new_chunk-1))
b35460352   NeilBrown   md/raid5: allow l...
5410
5411
5412
5413
5414
  			/* not factor of array size */
  			return -EINVAL;
  	}
  
  	/* They look valid */
88ce4930e   NeilBrown   md/raid5: allow l...
5415
  	if (mddev->raid_disks == 2) {
597a711b6   NeilBrown   md: remove unnece...
5416
5417
5418
5419
  		/* can make the change immediately */
  		if (mddev->new_layout >= 0) {
  			conf->algorithm = mddev->new_layout;
  			mddev->layout = mddev->new_layout;
88ce4930e   NeilBrown   md/raid5: allow l...
5420
5421
  		}
  		if (new_chunk > 0) {
597a711b6   NeilBrown   md: remove unnece...
5422
5423
  			conf->chunk_sectors = new_chunk ;
  			mddev->chunk_sectors = new_chunk;
88ce4930e   NeilBrown   md/raid5: allow l...
5424
5425
5426
  		}
  		set_bit(MD_CHANGE_DEVS, &mddev->flags);
  		md_wakeup_thread(mddev->thread);
b35460352   NeilBrown   md/raid5: allow l...
5427
  	}
50ac168a6   NeilBrown   md: merge reconfi...
5428
  	return check_reshape(mddev);
88ce4930e   NeilBrown   md/raid5: allow l...
5429
  }
fd01b88c7   NeilBrown   md: remove typede...
5430
  static int raid6_check_reshape(struct mddev *mddev)
88ce4930e   NeilBrown   md/raid5: allow l...
5431
  {
597a711b6   NeilBrown   md: remove unnece...
5432
  	int new_chunk = mddev->new_chunk_sectors;
50ac168a6   NeilBrown   md: merge reconfi...
5433

597a711b6   NeilBrown   md: remove unnece...
5434
  	if (mddev->new_layout >= 0 && !algorithm_valid_raid6(mddev->new_layout))
88ce4930e   NeilBrown   md/raid5: allow l...
5435
  		return -EINVAL;
b35460352   NeilBrown   md/raid5: allow l...
5436
  	if (new_chunk > 0) {
0ba459d26   Andre Noll   md/raid5: Use is_...
5437
  		if (!is_power_of_2(new_chunk))
88ce4930e   NeilBrown   md/raid5: allow l...
5438
  			return -EINVAL;
597a711b6   NeilBrown   md: remove unnece...
5439
  		if (new_chunk < (PAGE_SIZE >> 9))
88ce4930e   NeilBrown   md/raid5: allow l...
5440
  			return -EINVAL;
597a711b6   NeilBrown   md: remove unnece...
5441
  		if (mddev->array_sectors & (new_chunk-1))
88ce4930e   NeilBrown   md/raid5: allow l...
5442
5443
  			/* not factor of array size */
  			return -EINVAL;
b35460352   NeilBrown   md/raid5: allow l...
5444
  	}
88ce4930e   NeilBrown   md/raid5: allow l...
5445
5446
  
  	/* They look valid */
50ac168a6   NeilBrown   md: merge reconfi...
5447
  	return check_reshape(mddev);
b35460352   NeilBrown   md/raid5: allow l...
5448
  }
fd01b88c7   NeilBrown   md: remove typede...
5449
  static void *raid5_takeover(struct mddev *mddev)
d562b0c43   NeilBrown   md: add ->takeove...
5450
5451
  {
  	/* raid5 can take over:
f1b29bcae   Dan Williams   md/raid4: permit ...
5452
  	 *  raid0 - if there is only one strip zone - make it a raid4 layout
d562b0c43   NeilBrown   md: add ->takeove...
5453
5454
5455
  	 *  raid1 - if there are two drives.  We need to know the chunk size
  	 *  raid4 - trivial - just use a raid4 layout.
  	 *  raid6 - Providing it is a *_6 layout
d562b0c43   NeilBrown   md: add ->takeove...
5456
  	 */
f1b29bcae   Dan Williams   md/raid4: permit ...
5457
5458
  	if (mddev->level == 0)
  		return raid45_takeover_raid0(mddev, 5);
d562b0c43   NeilBrown   md: add ->takeove...
5459
5460
  	if (mddev->level == 1)
  		return raid5_takeover_raid1(mddev);
e9d4758f6   NeilBrown   md: add takeover ...
5461
5462
5463
5464
5465
  	if (mddev->level == 4) {
  		mddev->new_layout = ALGORITHM_PARITY_N;
  		mddev->new_level = 5;
  		return setup_conf(mddev);
  	}
fc9739c6d   NeilBrown   md: add takeover ...
5466
5467
  	if (mddev->level == 6)
  		return raid5_takeover_raid6(mddev);
d562b0c43   NeilBrown   md: add ->takeove...
5468
5469
5470
  
  	return ERR_PTR(-EINVAL);
  }
fd01b88c7   NeilBrown   md: remove typede...
5471
  static void *raid4_takeover(struct mddev *mddev)
a78d38a1a   NeilBrown   md: add support f...
5472
  {
f1b29bcae   Dan Williams   md/raid4: permit ...
5473
5474
5475
  	/* raid4 can take over:
  	 *  raid0 - if there is only one strip zone
  	 *  raid5 - if layout is right
a78d38a1a   NeilBrown   md: add support f...
5476
  	 */
f1b29bcae   Dan Williams   md/raid4: permit ...
5477
5478
  	if (mddev->level == 0)
  		return raid45_takeover_raid0(mddev, 4);
a78d38a1a   NeilBrown   md: add support f...
5479
5480
5481
5482
5483
5484
5485
5486
  	if (mddev->level == 5 &&
  	    mddev->layout == ALGORITHM_PARITY_N) {
  		mddev->new_layout = 0;
  		mddev->new_level = 4;
  		return setup_conf(mddev);
  	}
  	return ERR_PTR(-EINVAL);
  }
d562b0c43   NeilBrown   md: add ->takeove...
5487

84fc4b56d   NeilBrown   md: rename "mdk_p...
5488
  static struct md_personality raid5_personality;
245f46c2c   NeilBrown   md: add ->takeove...
5489

fd01b88c7   NeilBrown   md: remove typede...
5490
  static void *raid6_takeover(struct mddev *mddev)
245f46c2c   NeilBrown   md: add ->takeove...
5491
5492
5493
5494
5495
5496
5497
5498
5499
5500
5501
5502
5503
5504
5505
5506
5507
5508
5509
5510
5511
5512
5513
5514
5515
5516
5517
5518
5519
5520
5521
5522
5523
5524
5525
5526
5527
5528
5529
5530
5531
5532
5533
5534
  {
  	/* Currently can only take over a raid5.  We map the
  	 * personality to an equivalent raid6 personality
  	 * with the Q block at the end.
  	 */
  	int new_layout;
  
  	if (mddev->pers != &raid5_personality)
  		return ERR_PTR(-EINVAL);
  	if (mddev->degraded > 1)
  		return ERR_PTR(-EINVAL);
  	if (mddev->raid_disks > 253)
  		return ERR_PTR(-EINVAL);
  	if (mddev->raid_disks < 3)
  		return ERR_PTR(-EINVAL);
  
  	switch (mddev->layout) {
  	case ALGORITHM_LEFT_ASYMMETRIC:
  		new_layout = ALGORITHM_LEFT_ASYMMETRIC_6;
  		break;
  	case ALGORITHM_RIGHT_ASYMMETRIC:
  		new_layout = ALGORITHM_RIGHT_ASYMMETRIC_6;
  		break;
  	case ALGORITHM_LEFT_SYMMETRIC:
  		new_layout = ALGORITHM_LEFT_SYMMETRIC_6;
  		break;
  	case ALGORITHM_RIGHT_SYMMETRIC:
  		new_layout = ALGORITHM_RIGHT_SYMMETRIC_6;
  		break;
  	case ALGORITHM_PARITY_0:
  		new_layout = ALGORITHM_PARITY_0_6;
  		break;
  	case ALGORITHM_PARITY_N:
  		new_layout = ALGORITHM_PARITY_N;
  		break;
  	default:
  		return ERR_PTR(-EINVAL);
  	}
  	mddev->new_level = 6;
  	mddev->new_layout = new_layout;
  	mddev->delta_disks = 1;
  	mddev->raid_disks += 1;
  	return setup_conf(mddev);
  }
84fc4b56d   NeilBrown   md: rename "mdk_p...
5535
  static struct md_personality raid6_personality =
16a53ecc3   NeilBrown   [PATCH] md: merge...
5536
5537
5538
5539
5540
5541
5542
5543
5544
5545
5546
5547
5548
5549
  {
  	.name		= "raid6",
  	.level		= 6,
  	.owner		= THIS_MODULE,
  	.make_request	= make_request,
  	.run		= run,
  	.stop		= stop,
  	.status		= status,
  	.error_handler	= error,
  	.hot_add_disk	= raid5_add_disk,
  	.hot_remove_disk= raid5_remove_disk,
  	.spare_active	= raid5_spare_active,
  	.sync_request	= sync_request,
  	.resize		= raid5_resize,
80c3a6ce4   Dan Williams   md: add 'size' as...
5550
  	.size		= raid5_size,
50ac168a6   NeilBrown   md: merge reconfi...
5551
  	.check_reshape	= raid6_check_reshape,
f416885ef   NeilBrown   [PATCH] md: add s...
5552
  	.start_reshape  = raid5_start_reshape,
cea9c2280   NeilBrown   md: add explicit ...
5553
  	.finish_reshape = raid5_finish_reshape,
16a53ecc3   NeilBrown   [PATCH] md: merge...
5554
  	.quiesce	= raid5_quiesce,
245f46c2c   NeilBrown   md: add ->takeove...
5555
  	.takeover	= raid6_takeover,
16a53ecc3   NeilBrown   [PATCH] md: merge...
5556
  };
84fc4b56d   NeilBrown   md: rename "mdk_p...
5557
  static struct md_personality raid5_personality =
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
5558
5559
  {
  	.name		= "raid5",
2604b703b   NeilBrown   [PATCH] md: remov...
5560
  	.level		= 5,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
5561
5562
5563
5564
5565
5566
5567
5568
5569
5570
5571
  	.owner		= THIS_MODULE,
  	.make_request	= make_request,
  	.run		= run,
  	.stop		= stop,
  	.status		= status,
  	.error_handler	= error,
  	.hot_add_disk	= raid5_add_disk,
  	.hot_remove_disk= raid5_remove_disk,
  	.spare_active	= raid5_spare_active,
  	.sync_request	= sync_request,
  	.resize		= raid5_resize,
80c3a6ce4   Dan Williams   md: add 'size' as...
5572
  	.size		= raid5_size,
63c70c4f3   NeilBrown   [PATCH] md: Split...
5573
5574
  	.check_reshape	= raid5_check_reshape,
  	.start_reshape  = raid5_start_reshape,
cea9c2280   NeilBrown   md: add explicit ...
5575
  	.finish_reshape = raid5_finish_reshape,
72626685d   NeilBrown   [PATCH] md: add w...
5576
  	.quiesce	= raid5_quiesce,
d562b0c43   NeilBrown   md: add ->takeove...
5577
  	.takeover	= raid5_takeover,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
5578
  };
84fc4b56d   NeilBrown   md: rename "mdk_p...
5579
  static struct md_personality raid4_personality =
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
5580
  {
2604b703b   NeilBrown   [PATCH] md: remov...
5581
5582
5583
5584
5585
5586
5587
5588
5589
5590
5591
5592
5593
  	.name		= "raid4",
  	.level		= 4,
  	.owner		= THIS_MODULE,
  	.make_request	= make_request,
  	.run		= run,
  	.stop		= stop,
  	.status		= status,
  	.error_handler	= error,
  	.hot_add_disk	= raid5_add_disk,
  	.hot_remove_disk= raid5_remove_disk,
  	.spare_active	= raid5_spare_active,
  	.sync_request	= sync_request,
  	.resize		= raid5_resize,
80c3a6ce4   Dan Williams   md: add 'size' as...
5594
  	.size		= raid5_size,
3d37890ba   NeilBrown   [PATCH] md: allow...
5595
5596
  	.check_reshape	= raid5_check_reshape,
  	.start_reshape  = raid5_start_reshape,
cea9c2280   NeilBrown   md: add explicit ...
5597
  	.finish_reshape = raid5_finish_reshape,
2604b703b   NeilBrown   [PATCH] md: remov...
5598
  	.quiesce	= raid5_quiesce,
a78d38a1a   NeilBrown   md: add support f...
5599
  	.takeover	= raid4_takeover,
2604b703b   NeilBrown   [PATCH] md: remov...
5600
5601
5602
5603
  };
  
  static int __init raid5_init(void)
  {
16a53ecc3   NeilBrown   [PATCH] md: merge...
5604
  	register_md_personality(&raid6_personality);
2604b703b   NeilBrown   [PATCH] md: remov...
5605
5606
5607
  	register_md_personality(&raid5_personality);
  	register_md_personality(&raid4_personality);
  	return 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
5608
  }
2604b703b   NeilBrown   [PATCH] md: remov...
5609
  static void raid5_exit(void)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
5610
  {
16a53ecc3   NeilBrown   [PATCH] md: merge...
5611
  	unregister_md_personality(&raid6_personality);
2604b703b   NeilBrown   [PATCH] md: remov...
5612
5613
  	unregister_md_personality(&raid5_personality);
  	unregister_md_personality(&raid4_personality);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
5614
5615
5616
5617
5618
  }
  
  module_init(raid5_init);
  module_exit(raid5_exit);
  MODULE_LICENSE("GPL");
0efb9e619   NeilBrown   md: add MODULE_DE...
5619
  MODULE_DESCRIPTION("RAID4/5/6 (striping with parity) personality for MD");
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
5620
  MODULE_ALIAS("md-personality-4"); /* RAID5 */
d9d166c2a   NeilBrown   [PATCH] md: allow...
5621
5622
  MODULE_ALIAS("md-raid5");
  MODULE_ALIAS("md-raid4");
2604b703b   NeilBrown   [PATCH] md: remov...
5623
5624
  MODULE_ALIAS("md-level-5");
  MODULE_ALIAS("md-level-4");
16a53ecc3   NeilBrown   [PATCH] md: merge...
5625
5626
5627
5628
5629
5630
5631
  MODULE_ALIAS("md-personality-8"); /* RAID6 */
  MODULE_ALIAS("md-raid6");
  MODULE_ALIAS("md-level-6");
  
  /* This used to be two separate modules, they were: */
  MODULE_ALIAS("raid5");
  MODULE_ALIAS("raid6");