Blame view

fs/exofs/ore.c 29 KB
b14f8ab28   Boaz Harrosh   exofs: Kbuild, He...
1
2
  /*
   * Copyright (C) 2005, 2006
27d2e1491   Boaz Harrosh   exofs: Remove IBM...
3
   * Avishay Traeger (avishay@gmail.com)
b14f8ab28   Boaz Harrosh   exofs: Kbuild, He...
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
   * Copyright (C) 2008, 2009
   * Boaz Harrosh <bharrosh@panasas.com>
   *
   * This file is part of exofs.
   *
   * exofs is free software; you can redistribute it and/or modify
   * it under the terms of the GNU General Public License as published by
   * the Free Software Foundation.  Since it is based on ext2, and the only
   * valid version of GPL for the Linux kernel is version 2, the only valid
   * version of GPL for exofs is version 2.
   *
   * exofs is distributed in the hope that it will be useful,
   * but WITHOUT ANY WARRANTY; without even the implied warranty of
   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   * GNU General Public License for more details.
   *
   * You should have received a copy of the GNU General Public License
   * along with exofs; if not, write to the Free Software
   * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
   */
5a0e3ad6a   Tejun Heo   include cleanup: ...
24
  #include <linux/slab.h>
143cb494c   Paul Gortmaker   fs: add module.h ...
25
  #include <linux/module.h>
5d952b839   Boaz Harrosh   exofs: RAID0 support
26
  #include <asm/div64.h>
a1fec1dbb   Boaz Harrosh   ore: RAID5 read
27
  #include <linux/lcm.h>
b14f8ab28   Boaz Harrosh   exofs: Kbuild, He...
28

a1fec1dbb   Boaz Harrosh   ore: RAID5 read
29
  #include "ore_raid.h"
8ff660ab8   Boaz Harrosh   exofs: Rename rai...
30

cf283ade0   Boaz Harrosh   ore: Make ore its...
31
32
33
  MODULE_AUTHOR("Boaz Harrosh <bharrosh@panasas.com>");
  MODULE_DESCRIPTION("Objects Raid Engine ore.ko");
  MODULE_LICENSE("GPL");
5a51c0c7e   Boaz Harrosh   ore/exofs: Define...
34
35
36
37
38
39
40
41
42
  /* ore_verify_layout does a couple of things:
   * 1. Given a minimum number of needed parameters fixes up the rest of the
   *    members to be operatonals for the ore. The needed parameters are those
   *    that are defined by the pnfs-objects layout STD.
   * 2. Check to see if the current ore code actually supports these parameters
   *    for example stripe_unit must be a multple of the system PAGE_SIZE,
   *    and etc...
   * 3. Cache some havily used calculations that will be needed by users.
   */
5a51c0c7e   Boaz Harrosh   ore/exofs: Define...
43
44
45
46
47
48
  enum { BIO_MAX_PAGES_KMALLOC =
  		(PAGE_SIZE - sizeof(struct bio)) / sizeof(struct bio_vec),};
  
  int ore_verify_layout(unsigned total_comps, struct ore_layout *layout)
  {
  	u64 stripe_length;
44231e686   Boaz Harrosh   ore: Enable RAID5...
49
50
51
52
53
54
55
56
57
58
59
60
  	switch (layout->raid_algorithm) {
  	case PNFS_OSD_RAID_0:
  		layout->parity = 0;
  		break;
  	case PNFS_OSD_RAID_5:
  		layout->parity = 1;
  		break;
  	case PNFS_OSD_RAID_PQ:
  	case PNFS_OSD_RAID_4:
  	default:
  		ORE_ERR("Only RAID_0/5 for now
  ");
5a51c0c7e   Boaz Harrosh   ore/exofs: Define...
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
  		return -EINVAL;
  	}
  	if (0 != (layout->stripe_unit & ~PAGE_MASK)) {
  		ORE_ERR("Stripe Unit(0x%llx)"
  			  " must be Multples of PAGE_SIZE(0x%lx)
  ",
  			  _LLU(layout->stripe_unit), PAGE_SIZE);
  		return -EINVAL;
  	}
  	if (layout->group_width) {
  		if (!layout->group_depth) {
  			ORE_ERR("group_depth == 0 && group_width != 0
  ");
  			return -EINVAL;
  		}
  		if (total_comps < (layout->group_width * layout->mirrors_p1)) {
  			ORE_ERR("Data Map wrong, "
  				"numdevs=%d < group_width=%d * mirrors=%d
  ",
  				total_comps, layout->group_width,
  				layout->mirrors_p1);
  			return -EINVAL;
  		}
  		layout->group_count = total_comps / layout->mirrors_p1 /
  						layout->group_width;
  	} else {
  		if (layout->group_depth) {
  			printk(KERN_NOTICE "Warning: group_depth ignored "
  				"group_width == 0 && group_depth == %lld
  ",
  				_LLU(layout->group_depth));
  		}
  		layout->group_width = total_comps / layout->mirrors_p1;
  		layout->group_depth = -1;
  		layout->group_count = 1;
  	}
  
  	stripe_length = (u64)layout->group_width * layout->stripe_unit;
  	if (stripe_length >= (1ULL << 32)) {
  		ORE_ERR("Stripe_length(0x%llx) >= 32bit is not supported
  ",
  			_LLU(stripe_length));
  		return -EINVAL;
  	}
  
  	layout->max_io_length =
  		(BIO_MAX_PAGES_KMALLOC * PAGE_SIZE - layout->stripe_unit) *
  							layout->group_width;
769ba8d92   Boaz Harrosh   ore: RAID5 Write
109
110
111
112
113
114
115
116
  	if (layout->parity) {
  		unsigned stripe_length =
  				(layout->group_width - layout->parity) *
  				layout->stripe_unit;
  
  		layout->max_io_length /= stripe_length;
  		layout->max_io_length *= stripe_length;
  	}
5a51c0c7e   Boaz Harrosh   ore/exofs: Define...
117
118
119
  	return 0;
  }
  EXPORT_SYMBOL(ore_verify_layout);
8ff660ab8   Boaz Harrosh   exofs: Rename rai...
120
  static u8 *_ios_cred(struct ore_io_state *ios, unsigned index)
9e9db4564   Boaz Harrosh   exofs: ios: Move ...
121
  {
5bf696dad   Boaz Harrosh   exofs: Rename str...
122
  	return ios->oc->comps[index & ios->oc->single_comp].cred;
9e9db4564   Boaz Harrosh   exofs: ios: Move ...
123
  }
8ff660ab8   Boaz Harrosh   exofs: Rename rai...
124
  static struct osd_obj_id *_ios_obj(struct ore_io_state *ios, unsigned index)
9e9db4564   Boaz Harrosh   exofs: ios: Move ...
125
  {
5bf696dad   Boaz Harrosh   exofs: Rename str...
126
  	return &ios->oc->comps[index & ios->oc->single_comp].obj;
9e9db4564   Boaz Harrosh   exofs: ios: Move ...
127
  }
8ff660ab8   Boaz Harrosh   exofs: Rename rai...
128
  static struct osd_dev *_ios_od(struct ore_io_state *ios, unsigned index)
9e9db4564   Boaz Harrosh   exofs: ios: Move ...
129
  {
3bd985685   Boaz Harrosh   ore: Support for ...
130
131
132
133
  	ORE_DBGMSG2("oc->first_dev=%d oc->numdevs=%d i=%d oc->ods=%p
  ",
  		    ios->oc->first_dev, ios->oc->numdevs, index,
  		    ios->oc->ods);
d866d875f   Boaz Harrosh   ore/exofs: Change...
134
  	return ore_comp_dev(ios->oc, index);
9e9db4564   Boaz Harrosh   exofs: ios: Move ...
135
  }
769ba8d92   Boaz Harrosh   ore: RAID5 Write
136
  int  _ore_get_io_state(struct ore_layout *layout,
a1fec1dbb   Boaz Harrosh   ore: RAID5 read
137
138
139
  			struct ore_components *oc, unsigned numdevs,
  			unsigned sgs_per_dev, unsigned num_par_pages,
  			struct ore_io_state **pios)
b14f8ab28   Boaz Harrosh   exofs: Kbuild, He...
140
  {
8ff660ab8   Boaz Harrosh   exofs: Rename rai...
141
  	struct ore_io_state *ios;
a1fec1dbb   Boaz Harrosh   ore: RAID5 read
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
  	struct page **pages;
  	struct osd_sg_entry *sgilist;
  	struct __alloc_all_io_state {
  		struct ore_io_state ios;
  		struct ore_per_dev_state per_dev[numdevs];
  		union {
  			struct osd_sg_entry sglist[sgs_per_dev * numdevs];
  			struct page *pages[num_par_pages];
  		};
  	} *_aios;
  
  	if (likely(sizeof(*_aios) <= PAGE_SIZE)) {
  		_aios = kzalloc(sizeof(*_aios), GFP_KERNEL);
  		if (unlikely(!_aios)) {
  			ORE_DBGMSG("Failed kzalloc bytes=%zd
  ",
  				   sizeof(*_aios));
  			*pios = NULL;
  			return -ENOMEM;
  		}
  		pages = num_par_pages ? _aios->pages : NULL;
  		sgilist = sgs_per_dev ? _aios->sglist : NULL;
  		ios = &_aios->ios;
  	} else {
  		struct __alloc_small_io_state {
  			struct ore_io_state ios;
  			struct ore_per_dev_state per_dev[numdevs];
  		} *_aio_small;
  		union __extra_part {
  			struct osd_sg_entry sglist[sgs_per_dev * numdevs];
  			struct page *pages[num_par_pages];
  		} *extra_part;
  
  		_aio_small = kzalloc(sizeof(*_aio_small), GFP_KERNEL);
  		if (unlikely(!_aio_small)) {
  			ORE_DBGMSG("Failed alloc first part bytes=%zd
  ",
  				   sizeof(*_aio_small));
  			*pios = NULL;
  			return -ENOMEM;
  		}
  		extra_part = kzalloc(sizeof(*extra_part), GFP_KERNEL);
  		if (unlikely(!extra_part)) {
  			ORE_DBGMSG("Failed alloc second part bytes=%zd
  ",
  				   sizeof(*extra_part));
  			kfree(_aio_small);
  			*pios = NULL;
  			return -ENOMEM;
  		}
06886a5a3   Boaz Harrosh   exofs: Move all o...
192

a1fec1dbb   Boaz Harrosh   ore: RAID5 read
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
  		pages = num_par_pages ? extra_part->pages : NULL;
  		sgilist = sgs_per_dev ? extra_part->sglist : NULL;
  		/* In this case the per_dev[0].sgilist holds the pointer to
  		 * be freed
  		 */
  		ios = &_aio_small->ios;
  		ios->extra_part_alloc = true;
  	}
  
  	if (pages) {
  		ios->parity_pages = pages;
  		ios->max_par_pages = num_par_pages;
  	}
  	if (sgilist) {
  		unsigned d;
  
  		for (d = 0; d < numdevs; ++d) {
  			ios->per_dev[d].sglist = sgilist;
  			sgilist += sgs_per_dev;
  		}
  		ios->sgs_per_dev = sgs_per_dev;
06886a5a3   Boaz Harrosh   exofs: Move all o...
214
  	}
45d3abcb1   Boaz Harrosh   exofs: Move layou...
215
  	ios->layout = layout;
5bf696dad   Boaz Harrosh   exofs: Rename str...
216
  	ios->oc = oc;
b916c5cd4   Boaz Harrosh   ore: Only IO one ...
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
  	*pios = ios;
  	return 0;
  }
  
  /* Allocate an io_state for only a single group of devices
   *
   * If a user needs to call ore_read/write() this version must be used becase it
   * allocates extra stuff for striping and raid.
   * The ore might decide to only IO less then @length bytes do to alignmets
   * and constrains as follows:
   * - The IO cannot cross group boundary.
   * - In raid5/6 The end of the IO must align at end of a stripe eg.
   *   (@offset + @length) % strip_size == 0. Or the complete range is within a
   *   single stripe.
   * - Memory condition only permitted a shorter IO. (A user can use @length=~0
   *   And check the returned ios->length for max_io_size.)
   *
   * The caller must check returned ios->length (and/or ios->nr_pages) and
   * re-issue these pages that fall outside of ios->length
   */
  int  ore_get_rw_state(struct ore_layout *layout, struct ore_components *oc,
  		      bool is_reading, u64 offset, u64 length,
  		      struct ore_io_state **pios)
  {
  	struct ore_io_state *ios;
  	unsigned numdevs = layout->group_width * layout->mirrors_p1;
a1fec1dbb   Boaz Harrosh   ore: RAID5 read
243
  	unsigned sgs_per_dev = 0, max_par_pages = 0;
b916c5cd4   Boaz Harrosh   ore: Only IO one ...
244
  	int ret;
a1fec1dbb   Boaz Harrosh   ore: RAID5 read
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
  	if (layout->parity && length) {
  		unsigned data_devs = layout->group_width - layout->parity;
  		unsigned stripe_size = layout->stripe_unit * data_devs;
  		unsigned pages_in_unit = layout->stripe_unit / PAGE_SIZE;
  		u32 remainder;
  		u64 num_stripes;
  		u64 num_raid_units;
  
  		num_stripes = div_u64_rem(length, stripe_size, &remainder);
  		if (remainder)
  			++num_stripes;
  
  		num_raid_units =  num_stripes * layout->parity;
  
  		if (is_reading) {
  			/* For reads add per_dev sglist array */
  			/* TODO: Raid 6 we need twice more. Actually:
  			*         num_stripes / LCMdP(W,P);
  			*         if (W%P != 0) num_stripes *= parity;
  			*/
  
  			/* first/last seg is split */
  			num_raid_units += layout->group_width;
361aba569   Boaz Harrosh   ore: fix BUG_ON, ...
268
  			sgs_per_dev = div_u64(num_raid_units, data_devs) + 2;
a1fec1dbb   Boaz Harrosh   ore: RAID5 read
269
270
271
272
273
274
275
276
277
  		} else {
  			/* For Writes add parity pages array. */
  			max_par_pages = num_raid_units * pages_in_unit *
  						sizeof(struct page *);
  		}
  	}
  
  	ret = _ore_get_io_state(layout, oc, numdevs, sgs_per_dev, max_par_pages,
  				pios);
b916c5cd4   Boaz Harrosh   ore: Only IO one ...
278
279
280
281
  	if (unlikely(ret))
  		return ret;
  
  	ios = *pios;
e1042ba09   Boaz Harrosh   exofs: Add offset...
282
  	ios->reading = is_reading;
b916c5cd4   Boaz Harrosh   ore: Only IO one ...
283
284
285
  	ios->offset = offset;
  
  	if (length) {
a1fec1dbb   Boaz Harrosh   ore: RAID5 read
286
287
  		ore_calc_stripe_info(layout, offset, length, &ios->si);
  		ios->length = ios->si.length;
b916c5cd4   Boaz Harrosh   ore: Only IO one ...
288
  		ios->nr_pages = (ios->length + PAGE_SIZE - 1) / PAGE_SIZE;
a1fec1dbb   Boaz Harrosh   ore: RAID5 read
289
290
  		if (layout->parity)
  			_ore_post_alloc_raid_stuff(ios);
b916c5cd4   Boaz Harrosh   ore: Only IO one ...
291
  	}
e1042ba09   Boaz Harrosh   exofs: Add offset...
292

06886a5a3   Boaz Harrosh   exofs: Move all o...
293
  	return 0;
b14f8ab28   Boaz Harrosh   exofs: Kbuild, He...
294
  }
cf283ade0   Boaz Harrosh   ore: Make ore its...
295
  EXPORT_SYMBOL(ore_get_rw_state);
b14f8ab28   Boaz Harrosh   exofs: Kbuild, He...
296

b916c5cd4   Boaz Harrosh   ore: Only IO one ...
297
298
299
300
301
302
303
  /* Allocate an io_state for all the devices in the comps array
   *
   * This version of io_state allocation is used mostly by create/remove
   * and trunc where we currently need all the devices. The only wastful
   * bit is the read/write_attributes with no IO. Those sites should
   * be converted to use ore_get_rw_state() with length=0
   */
5bf696dad   Boaz Harrosh   exofs: Rename str...
304
  int  ore_get_io_state(struct ore_layout *layout, struct ore_components *oc,
b916c5cd4   Boaz Harrosh   ore: Only IO one ...
305
  		      struct ore_io_state **pios)
e1042ba09   Boaz Harrosh   exofs: Add offset...
306
  {
a1fec1dbb   Boaz Harrosh   ore: RAID5 read
307
  	return _ore_get_io_state(layout, oc, oc->numdevs, 0, 0, pios);
e1042ba09   Boaz Harrosh   exofs: Add offset...
308
  }
cf283ade0   Boaz Harrosh   ore: Make ore its...
309
  EXPORT_SYMBOL(ore_get_io_state);
e1042ba09   Boaz Harrosh   exofs: Add offset...
310

8ff660ab8   Boaz Harrosh   exofs: Rename rai...
311
  void ore_put_io_state(struct ore_io_state *ios)
b14f8ab28   Boaz Harrosh   exofs: Kbuild, He...
312
  {
06886a5a3   Boaz Harrosh   exofs: Move all o...
313
314
  	if (ios) {
  		unsigned i;
b14f8ab28   Boaz Harrosh   exofs: Kbuild, He...
315

06886a5a3   Boaz Harrosh   exofs: Move all o...
316
  		for (i = 0; i < ios->numdevs; i++) {
8ff660ab8   Boaz Harrosh   exofs: Rename rai...
317
  			struct ore_per_dev_state *per_dev = &ios->per_dev[i];
06886a5a3   Boaz Harrosh   exofs: Move all o...
318
319
320
321
322
323
  
  			if (per_dev->or)
  				osd_end_request(per_dev->or);
  			if (per_dev->bio)
  				bio_put(per_dev->bio);
  		}
a1fec1dbb   Boaz Harrosh   ore: RAID5 read
324
  		_ore_free_raid_stuff(ios);
06886a5a3   Boaz Harrosh   exofs: Move all o...
325
  		kfree(ios);
b14f8ab28   Boaz Harrosh   exofs: Kbuild, He...
326
  	}
06886a5a3   Boaz Harrosh   exofs: Move all o...
327
  }
cf283ade0   Boaz Harrosh   ore: Make ore its...
328
  EXPORT_SYMBOL(ore_put_io_state);
b14f8ab28   Boaz Harrosh   exofs: Kbuild, He...
329

8ff660ab8   Boaz Harrosh   exofs: Rename rai...
330
  static void _sync_done(struct ore_io_state *ios, void *p)
06886a5a3   Boaz Harrosh   exofs: Move all o...
331
332
  {
  	struct completion *waiting = p;
b14f8ab28   Boaz Harrosh   exofs: Kbuild, He...
333

06886a5a3   Boaz Harrosh   exofs: Move all o...
334
335
336
337
338
  	complete(waiting);
  }
  
  static void _last_io(struct kref *kref)
  {
8ff660ab8   Boaz Harrosh   exofs: Rename rai...
339
340
  	struct ore_io_state *ios = container_of(
  					kref, struct ore_io_state, kref);
06886a5a3   Boaz Harrosh   exofs: Move all o...
341
342
343
344
345
346
  
  	ios->done(ios, ios->private);
  }
  
  static void _done_io(struct osd_request *or, void *p)
  {
8ff660ab8   Boaz Harrosh   exofs: Rename rai...
347
  	struct ore_io_state *ios = p;
06886a5a3   Boaz Harrosh   exofs: Move all o...
348
349
350
  
  	kref_put(&ios->kref, _last_io);
  }
769ba8d92   Boaz Harrosh   ore: RAID5 Write
351
  int ore_io_execute(struct ore_io_state *ios)
06886a5a3   Boaz Harrosh   exofs: Move all o...
352
353
354
355
356
357
358
359
360
361
362
363
364
365
  {
  	DECLARE_COMPLETION_ONSTACK(wait);
  	bool sync = (ios->done == NULL);
  	int i, ret;
  
  	if (sync) {
  		ios->done = _sync_done;
  		ios->private = &wait;
  	}
  
  	for (i = 0; i < ios->numdevs; i++) {
  		struct osd_request *or = ios->per_dev[i].or;
  		if (unlikely(!or))
  			continue;
9e9db4564   Boaz Harrosh   exofs: ios: Move ...
366
  		ret = osd_finalize_request(or, 0, _ios_cred(ios, i), NULL);
06886a5a3   Boaz Harrosh   exofs: Move all o...
367
  		if (unlikely(ret)) {
8ff660ab8   Boaz Harrosh   exofs: Rename rai...
368
369
  			ORE_DBGMSG("Failed to osd_finalize_request() => %d
  ",
06886a5a3   Boaz Harrosh   exofs: Move all o...
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
  				     ret);
  			return ret;
  		}
  	}
  
  	kref_init(&ios->kref);
  
  	for (i = 0; i < ios->numdevs; i++) {
  		struct osd_request *or = ios->per_dev[i].or;
  		if (unlikely(!or))
  			continue;
  
  		kref_get(&ios->kref);
  		osd_execute_request_async(or, _done_io, ios);
  	}
  
  	kref_put(&ios->kref, _last_io);
  	ret = 0;
  
  	if (sync) {
  		wait_for_completion(&wait);
8ff660ab8   Boaz Harrosh   exofs: Rename rai...
391
  		ret = ore_check_io(ios, NULL);
06886a5a3   Boaz Harrosh   exofs: Move all o...
392
  	}
b14f8ab28   Boaz Harrosh   exofs: Kbuild, He...
393
394
  	return ret;
  }
22ddc5563   Boaz Harrosh   exofs: Recover in...
395
396
397
398
399
400
401
402
403
404
405
406
407
408
  static void _clear_bio(struct bio *bio)
  {
  	struct bio_vec *bv;
  	unsigned i;
  
  	__bio_for_each_segment(bv, bio, i, 0) {
  		unsigned this_count = bv->bv_len;
  
  		if (likely(PAGE_SIZE == this_count))
  			clear_highpage(bv->bv_page);
  		else
  			zero_user(bv->bv_page, bv->bv_offset, this_count);
  	}
  }
4b46c9f5c   Boaz Harrosh   ore/exofs: Change...
409
  int ore_check_io(struct ore_io_state *ios, ore_on_dev_error on_dev_error)
b14f8ab28   Boaz Harrosh   exofs: Kbuild, He...
410
  {
06886a5a3   Boaz Harrosh   exofs: Move all o...
411
412
413
  	enum osd_err_priority acumulated_osd_err = 0;
  	int acumulated_lin_err = 0;
  	int i;
b14f8ab28   Boaz Harrosh   exofs: Kbuild, He...
414

06886a5a3   Boaz Harrosh   exofs: Move all o...
415
416
  	for (i = 0; i < ios->numdevs; i++) {
  		struct osd_sense_info osi;
4b46c9f5c   Boaz Harrosh   ore/exofs: Change...
417
418
  		struct ore_per_dev_state *per_dev = &ios->per_dev[i];
  		struct osd_request *or = per_dev->or;
22ddc5563   Boaz Harrosh   exofs: Recover in...
419
420
421
422
  		int ret;
  
  		if (unlikely(!or))
  			continue;
06886a5a3   Boaz Harrosh   exofs: Move all o...
423

22ddc5563   Boaz Harrosh   exofs: Recover in...
424
  		ret = osd_req_decode_sense(or, &osi);
06886a5a3   Boaz Harrosh   exofs: Move all o...
425
426
  		if (likely(!ret))
  			continue;
22ddc5563   Boaz Harrosh   exofs: Recover in...
427
428
  		if (OSD_ERR_PRI_CLEAR_PAGES == osi.osd_err_pri) {
  			/* start read offset passed endof file */
4b46c9f5c   Boaz Harrosh   ore/exofs: Change...
429
  			_clear_bio(per_dev->bio);
8ff660ab8   Boaz Harrosh   exofs: Rename rai...
430
  			ORE_DBGMSG("start read offset passed end of file "
22ddc5563   Boaz Harrosh   exofs: Recover in...
431
432
  				"offset=0x%llx, length=0x%llx
  ",
4b46c9f5c   Boaz Harrosh   ore/exofs: Change...
433
434
  				_LLU(per_dev->offset),
  				_LLU(per_dev->length));
22ddc5563   Boaz Harrosh   exofs: Recover in...
435
436
  
  			continue; /* we recovered */
06886a5a3   Boaz Harrosh   exofs: Move all o...
437
  		}
4b46c9f5c   Boaz Harrosh   ore/exofs: Change...
438
439
440
441
  		if (on_dev_error) {
  			u64 residual = ios->reading ?
  					or->in.residual : or->out.residual;
  			u64 offset = (ios->offset + ios->length) - residual;
ffefb8eaa   Boaz Harrosh   ore: Fix crash in...
442
443
  			unsigned dev = per_dev->dev - ios->oc->first_dev;
  			struct ore_dev *od = ios->oc->ods[dev];
4b46c9f5c   Boaz Harrosh   ore/exofs: Change...
444

ffefb8eaa   Boaz Harrosh   ore: Fix crash in...
445
  			on_dev_error(ios, od, dev, osi.osd_err_pri,
4b46c9f5c   Boaz Harrosh   ore/exofs: Change...
446
447
  				     offset, residual);
  		}
06886a5a3   Boaz Harrosh   exofs: Move all o...
448
449
450
451
452
  		if (osi.osd_err_pri >= acumulated_osd_err) {
  			acumulated_osd_err = osi.osd_err_pri;
  			acumulated_lin_err = ret;
  		}
  	}
06886a5a3   Boaz Harrosh   exofs: Move all o...
453
454
  	return acumulated_lin_err;
  }
cf283ade0   Boaz Harrosh   ore: Make ore its...
455
  EXPORT_SYMBOL(ore_check_io);
06886a5a3   Boaz Harrosh   exofs: Move all o...
456

b367e78bd   Boaz Harrosh   exofs: Prepare fo...
457
458
459
  /*
   * L - logical offset into the file
   *
a1fec1dbb   Boaz Harrosh   ore: RAID5 read
460
461
   * D - number of Data devices
   *	D = group_width - parity
b367e78bd   Boaz Harrosh   exofs: Prepare fo...
462
   *
a1fec1dbb   Boaz Harrosh   ore: RAID5 read
463
464
   * U - The number of bytes in a stripe within a group
   *	U =  stripe_unit * D
b367e78bd   Boaz Harrosh   exofs: Prepare fo...
465
   *
50a76fd3c   Boaz Harrosh   exofs: groups sup...
466
467
   * T - The number of bytes striped within a group of component objects
   *     (before advancing to the next group)
a1fec1dbb   Boaz Harrosh   ore: RAID5 read
468
   *	T = U * group_depth
50a76fd3c   Boaz Harrosh   exofs: groups sup...
469
470
471
   *
   * S - The number of bytes striped across all component objects
   *     before the pattern repeats
a1fec1dbb   Boaz Harrosh   ore: RAID5 read
472
   *	S = T * group_count
50a76fd3c   Boaz Harrosh   exofs: groups sup...
473
   *
a1fec1dbb   Boaz Harrosh   ore: RAID5 read
474
   * M - The "major" (i.e., across all components) cycle number
50a76fd3c   Boaz Harrosh   exofs: groups sup...
475
476
   *	M = L / S
   *
a1fec1dbb   Boaz Harrosh   ore: RAID5 read
477
   * G - Counts the groups from the beginning of the major cycle
50a76fd3c   Boaz Harrosh   exofs: groups sup...
478
479
480
   *	G = (L - (M * S)) / T	[or (L % S) / T]
   *
   * H - The byte offset within the group
50a76fd3c   Boaz Harrosh   exofs: groups sup...
481
482
483
   *	H = (L - (M * S)) % T	[or (L % S) % T]
   *
   * N - The "minor" (i.e., across the group) stripe number
50a76fd3c   Boaz Harrosh   exofs: groups sup...
484
   *	N = H / U
b367e78bd   Boaz Harrosh   exofs: Prepare fo...
485
486
487
   *
   * C - The component index coresponding to L
   *
a1fec1dbb   Boaz Harrosh   ore: RAID5 read
488
489
   *	C = (H - (N * U)) / stripe_unit + G * D
   *	[or (L % U) / stripe_unit + G * D]
b367e78bd   Boaz Harrosh   exofs: Prepare fo...
490
491
   *
   * O - The component offset coresponding to L
50a76fd3c   Boaz Harrosh   exofs: groups sup...
492
   *	O = L % stripe_unit + N * stripe_unit + M * group_depth * stripe_unit
a1fec1dbb   Boaz Harrosh   ore: RAID5 read
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
   *
   * LCMdP – Parity cycle: Lowest Common Multiple of group_width, parity
   *          divide by parity
   *	LCMdP = lcm(group_width, parity) / parity
   *
   * R - The parity Rotation stripe
   *     (Note parity cycle always starts at a group's boundary)
   *	R = N % LCMdP
   *
   * I = the first parity device index
   *	I = (group_width + group_width - R*parity - parity) % group_width
   *
   * Craid - The component index Rotated
   *	Craid = (group_width + C - R*parity) % group_width
   *      (We add the group_width to avoid negative numbers modulo math)
b367e78bd   Boaz Harrosh   exofs: Prepare fo...
508
   */
611d7a5dc   Boaz Harrosh   ore: Make ore_cal...
509
  void ore_calc_stripe_info(struct ore_layout *layout, u64 file_offset,
a1fec1dbb   Boaz Harrosh   ore: RAID5 read
510
  			  u64 length, struct ore_striping_info *si)
5d952b839   Boaz Harrosh   exofs: RAID0 support
511
  {
16f75bb35   Boaz Harrosh   exofs: Fix trunca...
512
513
514
  	u32	stripe_unit = layout->stripe_unit;
  	u32	group_width = layout->group_width;
  	u64	group_depth = layout->group_depth;
a1fec1dbb   Boaz Harrosh   ore: RAID5 read
515
  	u32	parity      = layout->parity;
50a76fd3c   Boaz Harrosh   exofs: groups sup...
516

a1fec1dbb   Boaz Harrosh   ore: RAID5 read
517
518
  	u32	D = group_width - parity;
  	u32	U = D * stripe_unit;
50a76fd3c   Boaz Harrosh   exofs: groups sup...
519
  	u64	T = U * group_depth;
16f75bb35   Boaz Harrosh   exofs: Fix trunca...
520
  	u64	S = T * layout->group_count;
50a76fd3c   Boaz Harrosh   exofs: groups sup...
521
522
523
524
525
526
527
528
529
530
531
532
533
  	u64	M = div64_u64(file_offset, S);
  
  	/*
  	G = (L - (M * S)) / T
  	H = (L - (M * S)) % T
  	*/
  	u64	LmodS = file_offset - M * S;
  	u32	G = div64_u64(LmodS, T);
  	u64	H = LmodS - G * T;
  
  	u32	N = div_u64(H, U);
  
  	/* "H - (N * U)" is just "H % U" so it's bound to u32 */
a1fec1dbb   Boaz Harrosh   ore: RAID5 read
534
  	u32	C = (u32)(H - (N * U)) / stripe_unit + G * group_width;
b367e78bd   Boaz Harrosh   exofs: Prepare fo...
535

50a76fd3c   Boaz Harrosh   exofs: groups sup...
536
  	div_u64_rem(file_offset, stripe_unit, &si->unit_off);
5d952b839   Boaz Harrosh   exofs: RAID0 support
537

50a76fd3c   Boaz Harrosh   exofs: groups sup...
538
539
  	si->obj_offset = si->unit_off + (N * stripe_unit) +
  				  (M * group_depth * stripe_unit);
a1fec1dbb   Boaz Harrosh   ore: RAID5 read
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
  	if (parity) {
  		u32 LCMdP = lcm(group_width, parity) / parity;
  		/* R     = N % LCMdP; */
  		u32 RxP   = (N % LCMdP) * parity;
  		u32 first_dev = C - C % group_width;
  
  		si->par_dev = (group_width + group_width - parity - RxP) %
  			      group_width + first_dev;
  		si->dev = (group_width + C - RxP) % group_width + first_dev;
  		si->bytes_in_stripe = U;
  		si->first_stripe_start = M * S + G * T + N * U;
  	} else {
  		/* Make the math correct see _prepare_one_group */
  		si->par_dev = group_width;
  		si->dev = C;
  	}
  
  	si->dev *= layout->mirrors_p1;
  	si->par_dev *= layout->mirrors_p1;
  	si->offset = file_offset;
  	si->length = T - H;
  	if (si->length > length)
  		si->length = length;
16f75bb35   Boaz Harrosh   exofs: Fix trunca...
563
  	si->M = M;
5d952b839   Boaz Harrosh   exofs: RAID0 support
564
  }
611d7a5dc   Boaz Harrosh   ore: Make ore_cal...
565
  EXPORT_SYMBOL(ore_calc_stripe_info);
5d952b839   Boaz Harrosh   exofs: RAID0 support
566

a1fec1dbb   Boaz Harrosh   ore: RAID5 read
567
568
569
  int _ore_add_stripe_unit(struct ore_io_state *ios,  unsigned *cur_pg,
  			 unsigned pgbase, struct page **pages,
  			 struct ore_per_dev_state *per_dev, int cur_len)
5d952b839   Boaz Harrosh   exofs: RAID0 support
570
  {
86093aaff   Boaz Harrosh   exofs: convert io...
571
  	unsigned pg = *cur_pg;
5d952b839   Boaz Harrosh   exofs: RAID0 support
572
  	struct request_queue *q =
9e9db4564   Boaz Harrosh   exofs: ios: Move ...
573
  			osd_request_queue(_ios_od(ios, per_dev->dev));
bbf9a31bb   Boaz Harrosh   ore: Support for ...
574
575
  	unsigned len = cur_len;
  	int ret;
5d952b839   Boaz Harrosh   exofs: RAID0 support
576
577
578
579
  
  	if (per_dev->bio == NULL) {
  		unsigned pages_in_stripe = ios->layout->group_width *
  					(ios->layout->stripe_unit / PAGE_SIZE);
a1fec1dbb   Boaz Harrosh   ore: RAID5 read
580
581
582
583
584
  		unsigned nr_pages = ios->nr_pages * ios->layout->group_width /
  					(ios->layout->group_width -
  					 ios->layout->parity);
  		unsigned bio_size = (nr_pages + pages_in_stripe) /
  					ios->layout->group_width;
5d952b839   Boaz Harrosh   exofs: RAID0 support
585
586
587
  
  		per_dev->bio = bio_kmalloc(GFP_KERNEL, bio_size);
  		if (unlikely(!per_dev->bio)) {
8ff660ab8   Boaz Harrosh   exofs: Rename rai...
588
589
  			ORE_DBGMSG("Failed to allocate BIO size=%u
  ",
5d952b839   Boaz Harrosh   exofs: RAID0 support
590
  				     bio_size);
bbf9a31bb   Boaz Harrosh   ore: Support for ...
591
592
  			ret = -ENOMEM;
  			goto out;
5d952b839   Boaz Harrosh   exofs: RAID0 support
593
594
595
596
  		}
  	}
  
  	while (cur_len > 0) {
86093aaff   Boaz Harrosh   exofs: convert io...
597
598
  		unsigned pglen = min_t(unsigned, PAGE_SIZE - pgbase, cur_len);
  		unsigned added_len;
5d952b839   Boaz Harrosh   exofs: RAID0 support
599

86093aaff   Boaz Harrosh   exofs: convert io...
600
  		cur_len -= pglen;
5d952b839   Boaz Harrosh   exofs: RAID0 support
601

a1fec1dbb   Boaz Harrosh   ore: RAID5 read
602
  		added_len = bio_add_pc_page(q, per_dev->bio, pages[pg],
86093aaff   Boaz Harrosh   exofs: convert io...
603
  					    pglen, pgbase);
bbf9a31bb   Boaz Harrosh   ore: Support for ...
604
  		if (unlikely(pglen != added_len)) {
a1fec1dbb   Boaz Harrosh   ore: RAID5 read
605
606
607
  			ORE_DBGMSG("Failed bio_add_pc_page bi_vcnt=%u
  ",
  				   per_dev->bio->bi_vcnt);
bbf9a31bb   Boaz Harrosh   ore: Support for ...
608
609
610
  			ret = -ENOMEM;
  			goto out;
  		}
769ba8d92   Boaz Harrosh   ore: RAID5 Write
611
  		_add_stripe_page(ios->sp2d, &ios->si, pages[pg]);
86093aaff   Boaz Harrosh   exofs: convert io...
612
613
  		pgbase = 0;
  		++pg;
5d952b839   Boaz Harrosh   exofs: RAID0 support
614
615
  	}
  	BUG_ON(cur_len);
bbf9a31bb   Boaz Harrosh   ore: Support for ...
616
  	per_dev->length += len;
86093aaff   Boaz Harrosh   exofs: convert io...
617
  	*cur_pg = pg;
bbf9a31bb   Boaz Harrosh   ore: Support for ...
618
619
620
621
622
623
624
  	ret = 0;
  out:	/* we fail the complete unit on an error eg don't advance
  	 * per_dev->length and cur_pg. This means that we might have a bigger
  	 * bio than the CDB requested length (per_dev->length). That's fine
  	 * only the oposite is fatal.
  	 */
  	return ret;
5d952b839   Boaz Harrosh   exofs: RAID0 support
625
  }
982607540   Boaz Harrosh   ore: cleanup: Emb...
626
  static int _prepare_for_striping(struct ore_io_state *ios)
5d952b839   Boaz Harrosh   exofs: RAID0 support
627
  {
982607540   Boaz Harrosh   ore: cleanup: Emb...
628
  	struct ore_striping_info *si = &ios->si;
5d952b839   Boaz Harrosh   exofs: RAID0 support
629
  	unsigned stripe_unit = ios->layout->stripe_unit;
b367e78bd   Boaz Harrosh   exofs: Prepare fo...
630
  	unsigned mirrors_p1 = ios->layout->mirrors_p1;
a1fec1dbb   Boaz Harrosh   ore: RAID5 read
631
632
  	unsigned group_width = ios->layout->group_width;
  	unsigned devs_in_group = group_width * mirrors_p1;
b367e78bd   Boaz Harrosh   exofs: Prepare fo...
633
  	unsigned dev = si->dev;
50a76fd3c   Boaz Harrosh   exofs: groups sup...
634
  	unsigned first_dev = dev - (dev % devs_in_group);
a1fec1dbb   Boaz Harrosh   ore: RAID5 read
635
  	unsigned dev_order;
50a76fd3c   Boaz Harrosh   exofs: groups sup...
636
  	unsigned cur_pg = ios->pages_consumed;
982607540   Boaz Harrosh   ore: cleanup: Emb...
637
  	u64 length = ios->length;
86093aaff   Boaz Harrosh   exofs: convert io...
638
  	int ret = 0;
5d952b839   Boaz Harrosh   exofs: RAID0 support
639

982607540   Boaz Harrosh   ore: cleanup: Emb...
640
  	if (!ios->pages) {
982607540   Boaz Harrosh   ore: cleanup: Emb...
641
642
643
  		ios->numdevs = ios->layout->mirrors_p1;
  		return 0;
  	}
a1fec1dbb   Boaz Harrosh   ore: RAID5 read
644
645
646
647
  	BUG_ON(length > si->length);
  
  	dev_order = _dev_order(devs_in_group, mirrors_p1, si->par_dev, dev);
  	si->cur_comp = dev_order;
769ba8d92   Boaz Harrosh   ore: RAID5 Write
648
  	si->cur_pg = si->unit_off / PAGE_SIZE;
982607540   Boaz Harrosh   ore: cleanup: Emb...
649

5d952b839   Boaz Harrosh   exofs: RAID0 support
650
  	while (length) {
b916c5cd4   Boaz Harrosh   ore: Only IO one ...
651
652
  		unsigned comp = dev - first_dev;
  		struct ore_per_dev_state *per_dev = &ios->per_dev[comp];
b367e78bd   Boaz Harrosh   exofs: Prepare fo...
653
  		unsigned cur_len, page_off = 0;
5d952b839   Boaz Harrosh   exofs: RAID0 support
654
655
  
  		if (!per_dev->length) {
b367e78bd   Boaz Harrosh   exofs: Prepare fo...
656
  			per_dev->dev = dev;
a1fec1dbb   Boaz Harrosh   ore: RAID5 read
657
658
  			if (dev == si->dev) {
  				WARN_ON(dev == si->par_dev);
b367e78bd   Boaz Harrosh   exofs: Prepare fo...
659
660
661
662
  				per_dev->offset = si->obj_offset;
  				cur_len = stripe_unit - si->unit_off;
  				page_off = si->unit_off & ~PAGE_MASK;
  				BUG_ON(page_off && (page_off != ios->pgbase));
a1fec1dbb   Boaz Harrosh   ore: RAID5 read
663
664
665
666
667
668
669
670
  			} else {
  				if (si->cur_comp > dev_order)
  					per_dev->offset =
  						si->obj_offset - si->unit_off;
  				else /* si->cur_comp < dev_order */
  					per_dev->offset =
  						si->obj_offset + stripe_unit -
  								   si->unit_off;
b367e78bd   Boaz Harrosh   exofs: Prepare fo...
671
672
  				cur_len = stripe_unit;
  			}
5d952b839   Boaz Harrosh   exofs: RAID0 support
673
  		} else {
b367e78bd   Boaz Harrosh   exofs: Prepare fo...
674
  			cur_len = stripe_unit;
5d952b839   Boaz Harrosh   exofs: RAID0 support
675
  		}
b367e78bd   Boaz Harrosh   exofs: Prepare fo...
676
677
  		if (cur_len >= length)
  			cur_len = length;
5d952b839   Boaz Harrosh   exofs: RAID0 support
678

a1fec1dbb   Boaz Harrosh   ore: RAID5 read
679
680
  		ret = _ore_add_stripe_unit(ios, &cur_pg, page_off, ios->pages,
  					   per_dev, cur_len);
5d952b839   Boaz Harrosh   exofs: RAID0 support
681
682
  		if (unlikely(ret))
  			goto out;
6e31609b1   Boaz Harrosh   exofs: Remove use...
683
684
  		dev += mirrors_p1;
  		dev = (dev % devs_in_group) + first_dev;
5d952b839   Boaz Harrosh   exofs: RAID0 support
685
686
  
  		length -= cur_len;
a1fec1dbb   Boaz Harrosh   ore: RAID5 read
687
688
  
  		si->cur_comp = (si->cur_comp + 1) % group_width;
769ba8d92   Boaz Harrosh   ore: RAID5 Write
689
690
  		if (unlikely((dev == si->par_dev) || (!length && ios->sp2d))) {
  			if (!length && ios->sp2d) {
a1fec1dbb   Boaz Harrosh   ore: RAID5 read
691
692
693
694
  				/* If we are writing and this is the very last
  				 * stripe. then operate on parity dev.
  				 */
  				dev = si->par_dev;
769ba8d92   Boaz Harrosh   ore: RAID5 Write
695
696
  			}
  			if (ios->sp2d)
a1fec1dbb   Boaz Harrosh   ore: RAID5 read
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
  				/* In writes cur_len just means if it's the
  				 * last one. See _ore_add_parity_unit.
  				 */
  				cur_len = length;
  			per_dev = &ios->per_dev[dev - first_dev];
  			if (!per_dev->length) {
  				/* Only/always the parity unit of the first
  				 * stripe will be empty. So this is a chance to
  				 * initialize the per_dev info.
  				 */
  				per_dev->dev = dev;
  				per_dev->offset = si->obj_offset - si->unit_off;
  			}
  
  			ret = _ore_add_parity_unit(ios, si, per_dev, cur_len);
  			if (unlikely(ret))
  					goto out;
  
  			/* Rotate next par_dev backwards with wraping */
  			si->par_dev = (devs_in_group + si->par_dev -
  				       ios->layout->parity * mirrors_p1) %
  				      devs_in_group + first_dev;
  			/* Next stripe, start fresh */
  			si->cur_comp = 0;
769ba8d92   Boaz Harrosh   ore: RAID5 Write
721
  			si->cur_pg = 0;
a1fec1dbb   Boaz Harrosh   ore: RAID5 read
722
  		}
5d952b839   Boaz Harrosh   exofs: RAID0 support
723
724
  	}
  out:
b916c5cd4   Boaz Harrosh   ore: Only IO one ...
725
  	ios->numdevs = devs_in_group;
50a76fd3c   Boaz Harrosh   exofs: groups sup...
726
  	ios->pages_consumed = cur_pg;
bbf9a31bb   Boaz Harrosh   ore: Support for ...
727
728
729
730
731
732
733
  	if (unlikely(ret)) {
  		if (length == ios->length)
  			return ret;
  		else
  			ios->length -= length;
  	}
  	return 0;
5d952b839   Boaz Harrosh   exofs: RAID0 support
734
  }
8ff660ab8   Boaz Harrosh   exofs: Rename rai...
735
  int ore_create(struct ore_io_state *ios)
06886a5a3   Boaz Harrosh   exofs: Move all o...
736
737
  {
  	int i, ret;
5bf696dad   Boaz Harrosh   exofs: Rename str...
738
  	for (i = 0; i < ios->oc->numdevs; i++) {
06886a5a3   Boaz Harrosh   exofs: Move all o...
739
  		struct osd_request *or;
9e9db4564   Boaz Harrosh   exofs: ios: Move ...
740
  		or = osd_start_request(_ios_od(ios, i), GFP_KERNEL);
06886a5a3   Boaz Harrosh   exofs: Move all o...
741
  		if (unlikely(!or)) {
8ff660ab8   Boaz Harrosh   exofs: Rename rai...
742
743
  			ORE_ERR("%s: osd_start_request failed
  ", __func__);
06886a5a3   Boaz Harrosh   exofs: Move all o...
744
745
746
747
748
  			ret = -ENOMEM;
  			goto out;
  		}
  		ios->per_dev[i].or = or;
  		ios->numdevs++;
9e9db4564   Boaz Harrosh   exofs: ios: Move ...
749
  		osd_req_create_object(or, _ios_obj(ios, i));
06886a5a3   Boaz Harrosh   exofs: Move all o...
750
  	}
8ff660ab8   Boaz Harrosh   exofs: Rename rai...
751
  	ret = ore_io_execute(ios);
06886a5a3   Boaz Harrosh   exofs: Move all o...
752
753
754
755
  
  out:
  	return ret;
  }
cf283ade0   Boaz Harrosh   ore: Make ore its...
756
  EXPORT_SYMBOL(ore_create);
06886a5a3   Boaz Harrosh   exofs: Move all o...
757

8ff660ab8   Boaz Harrosh   exofs: Rename rai...
758
  int ore_remove(struct ore_io_state *ios)
06886a5a3   Boaz Harrosh   exofs: Move all o...
759
760
  {
  	int i, ret;
5bf696dad   Boaz Harrosh   exofs: Rename str...
761
  	for (i = 0; i < ios->oc->numdevs; i++) {
06886a5a3   Boaz Harrosh   exofs: Move all o...
762
  		struct osd_request *or;
9e9db4564   Boaz Harrosh   exofs: ios: Move ...
763
  		or = osd_start_request(_ios_od(ios, i), GFP_KERNEL);
06886a5a3   Boaz Harrosh   exofs: Move all o...
764
  		if (unlikely(!or)) {
8ff660ab8   Boaz Harrosh   exofs: Rename rai...
765
766
  			ORE_ERR("%s: osd_start_request failed
  ", __func__);
06886a5a3   Boaz Harrosh   exofs: Move all o...
767
768
769
770
771
  			ret = -ENOMEM;
  			goto out;
  		}
  		ios->per_dev[i].or = or;
  		ios->numdevs++;
9e9db4564   Boaz Harrosh   exofs: ios: Move ...
772
  		osd_req_remove_object(or, _ios_obj(ios, i));
06886a5a3   Boaz Harrosh   exofs: Move all o...
773
  	}
8ff660ab8   Boaz Harrosh   exofs: Rename rai...
774
  	ret = ore_io_execute(ios);
06886a5a3   Boaz Harrosh   exofs: Move all o...
775
776
777
778
  
  out:
  	return ret;
  }
cf283ade0   Boaz Harrosh   ore: Make ore its...
779
  EXPORT_SYMBOL(ore_remove);
06886a5a3   Boaz Harrosh   exofs: Move all o...
780

8ff660ab8   Boaz Harrosh   exofs: Rename rai...
781
  static int _write_mirror(struct ore_io_state *ios, int cur_comp)
06886a5a3   Boaz Harrosh   exofs: Move all o...
782
  {
8ff660ab8   Boaz Harrosh   exofs: Rename rai...
783
  	struct ore_per_dev_state *master_dev = &ios->per_dev[cur_comp];
5d952b839   Boaz Harrosh   exofs: RAID0 support
784
785
786
  	unsigned dev = ios->per_dev[cur_comp].dev;
  	unsigned last_comp = cur_comp + ios->layout->mirrors_p1;
  	int ret = 0;
06886a5a3   Boaz Harrosh   exofs: Move all o...
787

50a76fd3c   Boaz Harrosh   exofs: groups sup...
788
789
  	if (ios->pages && !master_dev->length)
  		return 0; /* Just an empty slot */
5d952b839   Boaz Harrosh   exofs: RAID0 support
790
  	for (; cur_comp < last_comp; ++cur_comp, ++dev) {
8ff660ab8   Boaz Harrosh   exofs: Rename rai...
791
  		struct ore_per_dev_state *per_dev = &ios->per_dev[cur_comp];
06886a5a3   Boaz Harrosh   exofs: Move all o...
792
  		struct osd_request *or;
9e9db4564   Boaz Harrosh   exofs: ios: Move ...
793
  		or = osd_start_request(_ios_od(ios, dev), GFP_KERNEL);
06886a5a3   Boaz Harrosh   exofs: Move all o...
794
  		if (unlikely(!or)) {
8ff660ab8   Boaz Harrosh   exofs: Rename rai...
795
796
  			ORE_ERR("%s: osd_start_request failed
  ", __func__);
06886a5a3   Boaz Harrosh   exofs: Move all o...
797
798
799
  			ret = -ENOMEM;
  			goto out;
  		}
5d952b839   Boaz Harrosh   exofs: RAID0 support
800
  		per_dev->or = or;
06886a5a3   Boaz Harrosh   exofs: Move all o...
801

86093aaff   Boaz Harrosh   exofs: convert io...
802
  		if (ios->pages) {
06886a5a3   Boaz Harrosh   exofs: Move all o...
803
  			struct bio *bio;
5d952b839   Boaz Harrosh   exofs: RAID0 support
804
  			if (per_dev != master_dev) {
04dc1e88a   Boaz Harrosh   exofs: Multi-devi...
805
  				bio = bio_kmalloc(GFP_KERNEL,
5d952b839   Boaz Harrosh   exofs: RAID0 support
806
  						  master_dev->bio->bi_max_vecs);
04dc1e88a   Boaz Harrosh   exofs: Multi-devi...
807
  				if (unlikely(!bio)) {
8ff660ab8   Boaz Harrosh   exofs: Rename rai...
808
  					ORE_DBGMSG(
426d31071   Paul Bolle   fix printk typo '...
809
810
  					      "Failed to allocate BIO size=%u
  ",
5d952b839   Boaz Harrosh   exofs: RAID0 support
811
  					      master_dev->bio->bi_max_vecs);
04dc1e88a   Boaz Harrosh   exofs: Multi-devi...
812
813
814
  					ret = -ENOMEM;
  					goto out;
  				}
5d952b839   Boaz Harrosh   exofs: RAID0 support
815
  				__bio_clone(bio, master_dev->bio);
04dc1e88a   Boaz Harrosh   exofs: Multi-devi...
816
817
  				bio->bi_bdev = NULL;
  				bio->bi_next = NULL;
6851a5e5c   Boaz Harrosh   ore: Remove check...
818
  				per_dev->offset = master_dev->offset;
5d952b839   Boaz Harrosh   exofs: RAID0 support
819
820
821
  				per_dev->length = master_dev->length;
  				per_dev->bio =  bio;
  				per_dev->dev = dev;
04dc1e88a   Boaz Harrosh   exofs: Multi-devi...
822
  			} else {
5d952b839   Boaz Harrosh   exofs: RAID0 support
823
824
  				bio = master_dev->bio;
  				/* FIXME: bio_set_dir() */
7b6d91dae   Christoph Hellwig   block: unify flag...
825
  				bio->bi_rw |= REQ_WRITE;
04dc1e88a   Boaz Harrosh   exofs: Multi-devi...
826
  			}
06886a5a3   Boaz Harrosh   exofs: Move all o...
827

9e9db4564   Boaz Harrosh   exofs: ios: Move ...
828
829
  			osd_req_write(or, _ios_obj(ios, dev), per_dev->offset,
  				      bio, per_dev->length);
8ff660ab8   Boaz Harrosh   exofs: Rename rai...
830
  			ORE_DBGMSG("write(0x%llx) offset=0x%llx "
34ce4e7c2   Boaz Harrosh   exofs: debug prin...
831
832
  				      "length=0x%llx dev=%d
  ",
9e9db4564   Boaz Harrosh   exofs: ios: Move ...
833
834
  				     _LLU(_ios_obj(ios, dev)->id),
  				     _LLU(per_dev->offset),
5d952b839   Boaz Harrosh   exofs: RAID0 support
835
  				     _LLU(per_dev->length), dev);
06886a5a3   Boaz Harrosh   exofs: Move all o...
836
  		} else if (ios->kern_buff) {
6851a5e5c   Boaz Harrosh   ore: Remove check...
837
838
839
840
841
842
843
844
845
  			per_dev->offset = ios->si.obj_offset;
  			per_dev->dev = ios->si.dev + dev;
  
  			/* no cross device without page array */
  			BUG_ON((ios->layout->group_width > 1) &&
  			       (ios->si.unit_off + ios->length >
  				ios->layout->stripe_unit));
  
  			ret = osd_req_write_kern(or, _ios_obj(ios, per_dev->dev),
9e9db4564   Boaz Harrosh   exofs: ios: Move ...
846
847
  						 per_dev->offset,
  						 ios->kern_buff, ios->length);
5d952b839   Boaz Harrosh   exofs: RAID0 support
848
849
  			if (unlikely(ret))
  				goto out;
8ff660ab8   Boaz Harrosh   exofs: Rename rai...
850
  			ORE_DBGMSG2("write_kern(0x%llx) offset=0x%llx "
34ce4e7c2   Boaz Harrosh   exofs: debug prin...
851
852
  				      "length=0x%llx dev=%d
  ",
9e9db4564   Boaz Harrosh   exofs: ios: Move ...
853
854
  				     _LLU(_ios_obj(ios, dev)->id),
  				     _LLU(per_dev->offset),
6851a5e5c   Boaz Harrosh   ore: Remove check...
855
  				     _LLU(ios->length), per_dev->dev);
06886a5a3   Boaz Harrosh   exofs: Move all o...
856
  		} else {
9e9db4564   Boaz Harrosh   exofs: ios: Move ...
857
  			osd_req_set_attributes(or, _ios_obj(ios, dev));
8ff660ab8   Boaz Harrosh   exofs: Rename rai...
858
859
  			ORE_DBGMSG2("obj(0x%llx) set_attributes=%d dev=%d
  ",
9e9db4564   Boaz Harrosh   exofs: ios: Move ...
860
861
  				     _LLU(_ios_obj(ios, dev)->id),
  				     ios->out_attr_len, dev);
06886a5a3   Boaz Harrosh   exofs: Move all o...
862
863
864
865
866
867
868
869
870
  		}
  
  		if (ios->out_attr)
  			osd_req_add_set_attr_list(or, ios->out_attr,
  						  ios->out_attr_len);
  
  		if (ios->in_attr)
  			osd_req_add_get_attr_list(or, ios->in_attr,
  						  ios->in_attr_len);
b14f8ab28   Boaz Harrosh   exofs: Kbuild, He...
871
  	}
06886a5a3   Boaz Harrosh   exofs: Move all o...
872
873
874
875
  
  out:
  	return ret;
  }
8ff660ab8   Boaz Harrosh   exofs: Rename rai...
876
  int ore_write(struct ore_io_state *ios)
5d952b839   Boaz Harrosh   exofs: RAID0 support
877
878
879
  {
  	int i;
  	int ret;
769ba8d92   Boaz Harrosh   ore: RAID5 Write
880
881
882
883
884
885
886
  	if (unlikely(ios->sp2d && !ios->r4w)) {
  		/* A library is attempting a RAID-write without providing
  		 * a pages lock interface.
  		 */
  		WARN_ON_ONCE(1);
  		return -ENOTSUPP;
  	}
5d952b839   Boaz Harrosh   exofs: RAID0 support
887
888
889
890
891
  	ret = _prepare_for_striping(ios);
  	if (unlikely(ret))
  		return ret;
  
  	for (i = 0; i < ios->numdevs; i += ios->layout->mirrors_p1) {
8ff660ab8   Boaz Harrosh   exofs: Rename rai...
892
  		ret = _write_mirror(ios, i);
5d952b839   Boaz Harrosh   exofs: RAID0 support
893
894
895
  		if (unlikely(ret))
  			return ret;
  	}
8ff660ab8   Boaz Harrosh   exofs: Rename rai...
896
  	ret = ore_io_execute(ios);
5d952b839   Boaz Harrosh   exofs: RAID0 support
897
898
  	return ret;
  }
cf283ade0   Boaz Harrosh   ore: Make ore its...
899
  EXPORT_SYMBOL(ore_write);
5d952b839   Boaz Harrosh   exofs: RAID0 support
900

769ba8d92   Boaz Harrosh   ore: RAID5 Write
901
  int _ore_read_mirror(struct ore_io_state *ios, unsigned cur_comp)
06886a5a3   Boaz Harrosh   exofs: Move all o...
902
  {
46f4d973f   Boaz Harrosh   exofs: unindent e...
903
  	struct osd_request *or;
8ff660ab8   Boaz Harrosh   exofs: Rename rai...
904
  	struct ore_per_dev_state *per_dev = &ios->per_dev[cur_comp];
9e9db4564   Boaz Harrosh   exofs: ios: Move ...
905
906
  	struct osd_obj_id *obj = _ios_obj(ios, cur_comp);
  	unsigned first_dev = (unsigned)obj->id;
06886a5a3   Boaz Harrosh   exofs: Move all o...
907

50a76fd3c   Boaz Harrosh   exofs: groups sup...
908
909
  	if (ios->pages && !per_dev->length)
  		return 0; /* Just an empty slot */
5d952b839   Boaz Harrosh   exofs: RAID0 support
910
  	first_dev = per_dev->dev + first_dev % ios->layout->mirrors_p1;
9e9db4564   Boaz Harrosh   exofs: ios: Move ...
911
  	or = osd_start_request(_ios_od(ios, first_dev), GFP_KERNEL);
46f4d973f   Boaz Harrosh   exofs: unindent e...
912
  	if (unlikely(!or)) {
8ff660ab8   Boaz Harrosh   exofs: Rename rai...
913
914
  		ORE_ERR("%s: osd_start_request failed
  ", __func__);
46f4d973f   Boaz Harrosh   exofs: unindent e...
915
916
917
  		return -ENOMEM;
  	}
  	per_dev->or = or;
46f4d973f   Boaz Harrosh   exofs: unindent e...
918

86093aaff   Boaz Harrosh   exofs: convert io...
919
  	if (ios->pages) {
a1fec1dbb   Boaz Harrosh   ore: RAID5 read
920
921
922
923
924
925
926
927
928
929
930
931
932
  		if (per_dev->cur_sg) {
  			/* finalize the last sg_entry */
  			_ore_add_sg_seg(per_dev, 0, false);
  			if (unlikely(!per_dev->cur_sg))
  				return 0; /* Skip parity only device */
  
  			osd_req_read_sg(or, obj, per_dev->bio,
  					per_dev->sglist, per_dev->cur_sg);
  		} else {
  			/* The no raid case */
  			osd_req_read(or, obj, per_dev->offset,
  				     per_dev->bio, per_dev->length);
  		}
8ff660ab8   Boaz Harrosh   exofs: Rename rai...
933
  		ORE_DBGMSG("read(0x%llx) offset=0x%llx length=0x%llx"
a1fec1dbb   Boaz Harrosh   ore: RAID5 read
934
935
  			     " dev=%d sg_len=%d
  ", _LLU(obj->id),
5d952b839   Boaz Harrosh   exofs: RAID0 support
936
  			     _LLU(per_dev->offset), _LLU(per_dev->length),
a1fec1dbb   Boaz Harrosh   ore: RAID5 read
937
  			     first_dev, per_dev->cur_sg);
46f4d973f   Boaz Harrosh   exofs: unindent e...
938
  	} else {
6851a5e5c   Boaz Harrosh   ore: Remove check...
939
  		BUG_ON(ios->kern_buff);
9e9db4564   Boaz Harrosh   exofs: ios: Move ...
940
  		osd_req_get_attributes(or, obj);
8ff660ab8   Boaz Harrosh   exofs: Rename rai...
941
942
  		ORE_DBGMSG2("obj(0x%llx) get_attributes=%d dev=%d
  ",
9e9db4564   Boaz Harrosh   exofs: ios: Move ...
943
944
  			      _LLU(obj->id),
  			      ios->in_attr_len, first_dev);
46f4d973f   Boaz Harrosh   exofs: unindent e...
945
  	}
46f4d973f   Boaz Harrosh   exofs: unindent e...
946
947
  	if (ios->out_attr)
  		osd_req_add_set_attr_list(or, ios->out_attr, ios->out_attr_len);
b14f8ab28   Boaz Harrosh   exofs: Kbuild, He...
948

46f4d973f   Boaz Harrosh   exofs: unindent e...
949
950
  	if (ios->in_attr)
  		osd_req_add_get_attr_list(or, ios->in_attr, ios->in_attr_len);
b14f8ab28   Boaz Harrosh   exofs: Kbuild, He...
951

5d952b839   Boaz Harrosh   exofs: RAID0 support
952
953
  	return 0;
  }
8ff660ab8   Boaz Harrosh   exofs: Rename rai...
954
  int ore_read(struct ore_io_state *ios)
5d952b839   Boaz Harrosh   exofs: RAID0 support
955
956
957
958
959
960
961
962
963
  {
  	int i;
  	int ret;
  
  	ret = _prepare_for_striping(ios);
  	if (unlikely(ret))
  		return ret;
  
  	for (i = 0; i < ios->numdevs; i += ios->layout->mirrors_p1) {
769ba8d92   Boaz Harrosh   ore: RAID5 Write
964
  		ret = _ore_read_mirror(ios, i);
5d952b839   Boaz Harrosh   exofs: RAID0 support
965
966
967
  		if (unlikely(ret))
  			return ret;
  	}
8ff660ab8   Boaz Harrosh   exofs: Rename rai...
968
  	ret = ore_io_execute(ios);
5d952b839   Boaz Harrosh   exofs: RAID0 support
969
  	return ret;
b14f8ab28   Boaz Harrosh   exofs: Kbuild, He...
970
  }
cf283ade0   Boaz Harrosh   ore: Make ore its...
971
  EXPORT_SYMBOL(ore_read);
b14f8ab28   Boaz Harrosh   exofs: Kbuild, He...
972

8ff660ab8   Boaz Harrosh   exofs: Rename rai...
973
  int extract_attr_from_ios(struct ore_io_state *ios, struct osd_attr *attr)
b14f8ab28   Boaz Harrosh   exofs: Kbuild, He...
974
975
976
977
978
979
980
  {
  	struct osd_attr cur_attr = {.attr_page = 0}; /* start with zeros */
  	void *iter = NULL;
  	int nelem;
  
  	do {
  		nelem = 1;
06886a5a3   Boaz Harrosh   exofs: Move all o...
981
982
  		osd_req_decode_get_attr_list(ios->per_dev[0].or,
  					     &cur_attr, &nelem, &iter);
b14f8ab28   Boaz Harrosh   exofs: Kbuild, He...
983
984
985
986
987
988
989
990
991
992
  		if ((cur_attr.attr_page == attr->attr_page) &&
  		    (cur_attr.attr_id == attr->attr_id)) {
  			attr->len = cur_attr.len;
  			attr->val_ptr = cur_attr.val_ptr;
  			return 0;
  		}
  	} while (iter);
  
  	return -EIO;
  }
cf283ade0   Boaz Harrosh   ore: Make ore its...
993
  EXPORT_SYMBOL(extract_attr_from_ios);
06886a5a3   Boaz Harrosh   exofs: Move all o...
994

8ff660ab8   Boaz Harrosh   exofs: Rename rai...
995
  static int _truncate_mirrors(struct ore_io_state *ios, unsigned cur_comp,
5d952b839   Boaz Harrosh   exofs: RAID0 support
996
997
998
999
1000
  			     struct osd_attr *attr)
  {
  	int last_comp = cur_comp + ios->layout->mirrors_p1;
  
  	for (; cur_comp < last_comp; ++cur_comp) {
8ff660ab8   Boaz Harrosh   exofs: Rename rai...
1001
  		struct ore_per_dev_state *per_dev = &ios->per_dev[cur_comp];
5d952b839   Boaz Harrosh   exofs: RAID0 support
1002
  		struct osd_request *or;
9e9db4564   Boaz Harrosh   exofs: ios: Move ...
1003
  		or = osd_start_request(_ios_od(ios, cur_comp), GFP_KERNEL);
5d952b839   Boaz Harrosh   exofs: RAID0 support
1004
  		if (unlikely(!or)) {
8ff660ab8   Boaz Harrosh   exofs: Rename rai...
1005
1006
  			ORE_ERR("%s: osd_start_request failed
  ", __func__);
5d952b839   Boaz Harrosh   exofs: RAID0 support
1007
1008
1009
  			return -ENOMEM;
  		}
  		per_dev->or = or;
9e9db4564   Boaz Harrosh   exofs: ios: Move ...
1010
  		osd_req_set_attributes(or, _ios_obj(ios, cur_comp));
5d952b839   Boaz Harrosh   exofs: RAID0 support
1011
1012
1013
1014
1015
  		osd_req_add_set_attr_list(or, attr, 1);
  	}
  
  	return 0;
  }
16f75bb35   Boaz Harrosh   exofs: Fix trunca...
1016
  struct _trunc_info {
eb507bc18   Boaz Harrosh   ore: Make ore_str...
1017
  	struct ore_striping_info si;
16f75bb35   Boaz Harrosh   exofs: Fix trunca...
1018
1019
1020
1021
1022
  	u64 prev_group_obj_off;
  	u64 next_group_obj_off;
  
  	unsigned first_group_dev;
  	unsigned nex_group_dev;
16f75bb35   Boaz Harrosh   exofs: Fix trunca...
1023
  };
1958c7c28   H Hartley Sweeten   exofs/ore.c: loca...
1024
1025
  static void _calc_trunk_info(struct ore_layout *layout, u64 file_offset,
  			     struct _trunc_info *ti)
16f75bb35   Boaz Harrosh   exofs: Fix trunca...
1026
1027
  {
  	unsigned stripe_unit = layout->stripe_unit;
a1fec1dbb   Boaz Harrosh   ore: RAID5 read
1028
  	ore_calc_stripe_info(layout, file_offset, 0, &ti->si);
16f75bb35   Boaz Harrosh   exofs: Fix trunca...
1029
1030
1031
1032
1033
1034
  
  	ti->prev_group_obj_off = ti->si.M * stripe_unit;
  	ti->next_group_obj_off = ti->si.M ? (ti->si.M - 1) * stripe_unit : 0;
  
  	ti->first_group_dev = ti->si.dev - (ti->si.dev % layout->group_width);
  	ti->nex_group_dev = ti->first_group_dev + layout->group_width;
16f75bb35   Boaz Harrosh   exofs: Fix trunca...
1035
  }
5bf696dad   Boaz Harrosh   exofs: Rename str...
1036
  int ore_truncate(struct ore_layout *layout, struct ore_components *oc,
9e9db4564   Boaz Harrosh   exofs: ios: Move ...
1037
  		   u64 size)
06886a5a3   Boaz Harrosh   exofs: Move all o...
1038
  {
8ff660ab8   Boaz Harrosh   exofs: Rename rai...
1039
  	struct ore_io_state *ios;
5d952b839   Boaz Harrosh   exofs: RAID0 support
1040
1041
1042
1043
  	struct exofs_trunc_attr {
  		struct osd_attr attr;
  		__be64 newsize;
  	} *size_attrs;
16f75bb35   Boaz Harrosh   exofs: Fix trunca...
1044
  	struct _trunc_info ti;
06886a5a3   Boaz Harrosh   exofs: Move all o...
1045
  	int i, ret;
5bf696dad   Boaz Harrosh   exofs: Rename str...
1046
  	ret = ore_get_io_state(layout, oc, &ios);
5d952b839   Boaz Harrosh   exofs: RAID0 support
1047
1048
  	if (unlikely(ret))
  		return ret;
16f75bb35   Boaz Harrosh   exofs: Fix trunca...
1049
  	_calc_trunk_info(ios->layout, size, &ti);
b916c5cd4   Boaz Harrosh   ore: Only IO one ...
1050
  	size_attrs = kcalloc(ios->oc->numdevs, sizeof(*size_attrs),
5d952b839   Boaz Harrosh   exofs: RAID0 support
1051
1052
1053
1054
1055
  			     GFP_KERNEL);
  	if (unlikely(!size_attrs)) {
  		ret = -ENOMEM;
  		goto out;
  	}
06886a5a3   Boaz Harrosh   exofs: Move all o...
1056

5bf696dad   Boaz Harrosh   exofs: Rename str...
1057
  	ios->numdevs = ios->oc->numdevs;
06886a5a3   Boaz Harrosh   exofs: Move all o...
1058

b916c5cd4   Boaz Harrosh   ore: Only IO one ...
1059
  	for (i = 0; i < ios->numdevs; ++i) {
5d952b839   Boaz Harrosh   exofs: RAID0 support
1060
1061
  		struct exofs_trunc_attr *size_attr = &size_attrs[i];
  		u64 obj_size;
06886a5a3   Boaz Harrosh   exofs: Move all o...
1062

16f75bb35   Boaz Harrosh   exofs: Fix trunca...
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
  		if (i < ti.first_group_dev)
  			obj_size = ti.prev_group_obj_off;
  		else if (i >= ti.nex_group_dev)
  			obj_size = ti.next_group_obj_off;
  		else if (i < ti.si.dev) /* dev within this group */
  			obj_size = ti.si.obj_offset +
  				      ios->layout->stripe_unit - ti.si.unit_off;
  		else if (i == ti.si.dev)
  			obj_size = ti.si.obj_offset;
  		else /* i > ti.dev */
  			obj_size = ti.si.obj_offset - ti.si.unit_off;
06886a5a3   Boaz Harrosh   exofs: Move all o...
1074

5d952b839   Boaz Harrosh   exofs: RAID0 support
1075
1076
1077
  		size_attr->newsize = cpu_to_be64(obj_size);
  		size_attr->attr = g_attr_logical_length;
  		size_attr->attr.val_ptr = &size_attr->newsize;
8ff660ab8   Boaz Harrosh   exofs: Rename rai...
1078
1079
  		ORE_DBGMSG("trunc(0x%llx) obj_offset=0x%llx dev=%d
  ",
5bf696dad   Boaz Harrosh   exofs: Rename str...
1080
  			     _LLU(oc->comps->obj.id), _LLU(obj_size), i);
5d952b839   Boaz Harrosh   exofs: RAID0 support
1081
1082
1083
1084
  		ret = _truncate_mirrors(ios, i * ios->layout->mirrors_p1,
  					&size_attr->attr);
  		if (unlikely(ret))
  			goto out;
06886a5a3   Boaz Harrosh   exofs: Move all o...
1085
  	}
8ff660ab8   Boaz Harrosh   exofs: Rename rai...
1086
  	ret = ore_io_execute(ios);
06886a5a3   Boaz Harrosh   exofs: Move all o...
1087
1088
  
  out:
5d952b839   Boaz Harrosh   exofs: RAID0 support
1089
  	kfree(size_attrs);
8ff660ab8   Boaz Harrosh   exofs: Rename rai...
1090
  	ore_put_io_state(ios);
06886a5a3   Boaz Harrosh   exofs: Move all o...
1091
1092
  	return ret;
  }
cf283ade0   Boaz Harrosh   ore: Make ore its...
1093
  EXPORT_SYMBOL(ore_truncate);
85e44df47   Boaz Harrosh   exofs: Move exofs...
1094
1095
1096
  
  const struct osd_attr g_attr_logical_length = ATTR_DEF(
  	OSD_APAGE_OBJECT_INFORMATION, OSD_ATTR_OI_LOGICAL_LENGTH, 8);
cf283ade0   Boaz Harrosh   ore: Make ore its...
1097
  EXPORT_SYMBOL(g_attr_logical_length);