Commit 50a76fd3c352ed2740eba01512efcfceee0703be
1 parent
b367e78bd1
Exists in
master
and in
4 other branches
exofs: groups support
* _calc_stripe_info() changes to accommodate for grouping calculations. Returns additional information * old _prepare_pages() becomes _prepare_one_group() which stores pages belonging to one device group. * New _prepare_for_striping iterates on all groups calling _prepare_one_group(). * Enable mounting of groups data_maps (group_width != 0) [QUESTION] what is faster A or B; A. x += stride; x = x % width + first_x; B x += stride if (x < last_x) x = first_x; Signed-off-by: Boaz Harrosh <bharrosh@panasas.com>
Showing 3 changed files with 141 additions and 37 deletions Side-by-side Diff
fs/exofs/exofs.h
... | ... | @@ -63,6 +63,8 @@ |
63 | 63 | unsigned mirrors_p1; |
64 | 64 | |
65 | 65 | unsigned group_width; |
66 | + u64 group_depth; | |
67 | + unsigned group_count; | |
66 | 68 | |
67 | 69 | enum exofs_inode_layout_gen_functions lay_func; |
68 | 70 | |
... | ... | @@ -132,6 +134,7 @@ |
132 | 134 | struct page **pages; |
133 | 135 | unsigned nr_pages; |
134 | 136 | unsigned pgbase; |
137 | + unsigned pages_consumed; | |
135 | 138 | |
136 | 139 | /* Attributes */ |
137 | 140 | unsigned in_attr_len; |
fs/exofs/ios.c
... | ... | @@ -262,25 +262,50 @@ |
262 | 262 | /* |
263 | 263 | * L - logical offset into the file |
264 | 264 | * |
265 | - * U - The number of bytes in a full stripe | |
265 | + * U - The number of bytes in a stripe within a group | |
266 | 266 | * |
267 | 267 | * U = stripe_unit * group_width |
268 | 268 | * |
269 | - * N - The stripe number | |
269 | + * T - The number of bytes striped within a group of component objects | |
270 | + * (before advancing to the next group) | |
270 | 271 | * |
271 | - * N = L / U | |
272 | + * T = stripe_unit * group_width * group_depth | |
272 | 273 | * |
274 | + * S - The number of bytes striped across all component objects | |
275 | + * before the pattern repeats | |
276 | + * | |
277 | + * S = stripe_unit * group_width * group_depth * group_count | |
278 | + * | |
279 | + * M - The "major" (i.e., across all components) stripe number | |
280 | + * | |
281 | + * M = L / S | |
282 | + * | |
283 | + * G - Counts the groups from the beginning of the major stripe | |
284 | + * | |
285 | + * G = (L - (M * S)) / T [or (L % S) / T] | |
286 | + * | |
287 | + * H - The byte offset within the group | |
288 | + * | |
289 | + * H = (L - (M * S)) % T [or (L % S) % T] | |
290 | + * | |
291 | + * N - The "minor" (i.e., across the group) stripe number | |
292 | + * | |
293 | + * N = H / U | |
294 | + * | |
273 | 295 | * C - The component index coresponding to L |
274 | 296 | * |
275 | - * C = (L - (N*U)) / stripe_unit | |
297 | + * C = (H - (N * U)) / stripe_unit + G * group_width | |
298 | + * [or (L % U) / stripe_unit + G * group_width] | |
276 | 299 | * |
277 | 300 | * O - The component offset coresponding to L |
278 | 301 | * |
279 | - * (N*stripe_unit)+(L%stripe_unit) | |
302 | + * O = L % stripe_unit + N * stripe_unit + M * group_depth * stripe_unit | |
280 | 303 | */ |
281 | - | |
282 | 304 | struct _striping_info { |
283 | 305 | u64 obj_offset; |
306 | + u64 group_length; | |
307 | + u64 total_group_length; | |
308 | + u64 Major; | |
284 | 309 | unsigned dev; |
285 | 310 | unsigned unit_off; |
286 | 311 | }; |
287 | 312 | |
288 | 313 | |
289 | 314 | |
290 | 315 | |
... | ... | @@ -290,15 +315,35 @@ |
290 | 315 | { |
291 | 316 | u32 stripe_unit = ios->layout->stripe_unit; |
292 | 317 | u32 group_width = ios->layout->group_width; |
318 | + u64 group_depth = ios->layout->group_depth; | |
319 | + | |
293 | 320 | u32 U = stripe_unit * group_width; |
321 | + u64 T = U * group_depth; | |
322 | + u64 S = T * ios->layout->group_count; | |
323 | + u64 M = div64_u64(file_offset, S); | |
294 | 324 | |
295 | - u32 LmodU; | |
296 | - u64 N = div_u64_rem(file_offset, U, &LmodU); | |
325 | + /* | |
326 | + G = (L - (M * S)) / T | |
327 | + H = (L - (M * S)) % T | |
328 | + */ | |
329 | + u64 LmodS = file_offset - M * S; | |
330 | + u32 G = div64_u64(LmodS, T); | |
331 | + u64 H = LmodS - G * T; | |
297 | 332 | |
298 | - si->unit_off = LmodU % stripe_unit; | |
299 | - si->obj_offset = N * stripe_unit + si->unit_off; | |
300 | - si->dev = LmodU / stripe_unit; | |
333 | + u32 N = div_u64(H, U); | |
334 | + | |
335 | + /* "H - (N * U)" is just "H % U" so it's bound to u32 */ | |
336 | + si->dev = (u32)(H - (N * U)) / stripe_unit + G * group_width; | |
301 | 337 | si->dev *= ios->layout->mirrors_p1; |
338 | + | |
339 | + div_u64_rem(file_offset, stripe_unit, &si->unit_off); | |
340 | + | |
341 | + si->obj_offset = si->unit_off + (N * stripe_unit) + | |
342 | + (M * group_depth * stripe_unit); | |
343 | + | |
344 | + si->group_length = T - H; | |
345 | + si->total_group_length = T; | |
346 | + si->Major = M; | |
302 | 347 | } |
303 | 348 | |
304 | 349 | static int _add_stripe_unit(struct exofs_io_state *ios, unsigned *cur_pg, |
305 | 350 | |
306 | 351 | |
307 | 352 | |
... | ... | @@ -345,16 +390,17 @@ |
345 | 390 | return 0; |
346 | 391 | } |
347 | 392 | |
348 | -static int _prepare_pages(struct exofs_io_state *ios, | |
349 | - struct _striping_info *si) | |
393 | +static int _prepare_one_group(struct exofs_io_state *ios, u64 length, | |
394 | + struct _striping_info *si, unsigned first_comp) | |
350 | 395 | { |
351 | - u64 length = ios->length; | |
352 | 396 | unsigned stripe_unit = ios->layout->stripe_unit; |
353 | 397 | unsigned mirrors_p1 = ios->layout->mirrors_p1; |
398 | + unsigned devs_in_group = ios->layout->group_width * mirrors_p1; | |
354 | 399 | unsigned dev = si->dev; |
355 | - unsigned comp = 0; | |
356 | - unsigned stripes = 0; | |
357 | - unsigned cur_pg = 0; | |
400 | + unsigned first_dev = dev - (dev % devs_in_group); | |
401 | + unsigned comp = first_comp + (dev - first_dev); | |
402 | + unsigned max_comp = ios->numdevs ? ios->numdevs - mirrors_p1 : 0; | |
403 | + unsigned cur_pg = ios->pages_consumed; | |
358 | 404 | int ret = 0; |
359 | 405 | |
360 | 406 | while (length) { |
361 | 407 | |
... | ... | @@ -377,10 +423,11 @@ |
377 | 423 | cur_len = stripe_unit; |
378 | 424 | } |
379 | 425 | |
380 | - stripes++; | |
426 | + if (max_comp < comp) | |
427 | + max_comp = comp; | |
381 | 428 | |
382 | 429 | dev += mirrors_p1; |
383 | - dev %= ios->layout->s_numdevs; | |
430 | + dev = (dev % devs_in_group) + first_dev; | |
384 | 431 | } else { |
385 | 432 | cur_len = stripe_unit; |
386 | 433 | } |
387 | 434 | |
388 | 435 | |
389 | 436 | |
... | ... | @@ -393,18 +440,24 @@ |
393 | 440 | goto out; |
394 | 441 | |
395 | 442 | comp += mirrors_p1; |
396 | - comp %= ios->layout->s_numdevs; | |
443 | + comp = (comp % devs_in_group) + first_comp; | |
397 | 444 | |
398 | 445 | length -= cur_len; |
399 | 446 | } |
400 | 447 | out: |
401 | - ios->numdevs = stripes * mirrors_p1; | |
448 | + ios->numdevs = max_comp + mirrors_p1; | |
449 | + ios->pages_consumed = cur_pg; | |
402 | 450 | return ret; |
403 | 451 | } |
404 | 452 | |
405 | 453 | static int _prepare_for_striping(struct exofs_io_state *ios) |
406 | 454 | { |
455 | + u64 length = ios->length; | |
407 | 456 | struct _striping_info si; |
457 | + unsigned devs_in_group = ios->layout->group_width * | |
458 | + ios->layout->mirrors_p1; | |
459 | + unsigned first_comp = 0; | |
460 | + int ret = 0; | |
408 | 461 | |
409 | 462 | _calc_stripe_info(ios, ios->offset, &si); |
410 | 463 | |
... | ... | @@ -424,7 +477,31 @@ |
424 | 477 | return 0; |
425 | 478 | } |
426 | 479 | |
427 | - return _prepare_pages(ios, &si); | |
480 | + while (length) { | |
481 | + if (length < si.group_length) | |
482 | + si.group_length = length; | |
483 | + | |
484 | + ret = _prepare_one_group(ios, si.group_length, &si, first_comp); | |
485 | + if (unlikely(ret)) | |
486 | + goto out; | |
487 | + | |
488 | + length -= si.group_length; | |
489 | + | |
490 | + si.group_length = si.total_group_length; | |
491 | + si.unit_off = 0; | |
492 | + ++si.Major; | |
493 | + si.obj_offset = si.Major * ios->layout->stripe_unit * | |
494 | + ios->layout->group_depth; | |
495 | + | |
496 | + si.dev = (si.dev - (si.dev % devs_in_group)) + devs_in_group; | |
497 | + si.dev %= ios->layout->s_numdevs; | |
498 | + | |
499 | + first_comp += devs_in_group; | |
500 | + first_comp %= ios->layout->s_numdevs; | |
501 | + } | |
502 | + | |
503 | +out: | |
504 | + return ret; | |
428 | 505 | } |
429 | 506 | |
430 | 507 | int exofs_sbi_create(struct exofs_io_state *ios) |
... | ... | @@ -482,6 +559,9 @@ |
482 | 559 | unsigned last_comp = cur_comp + ios->layout->mirrors_p1; |
483 | 560 | int ret = 0; |
484 | 561 | |
562 | + if (ios->pages && !master_dev->length) | |
563 | + return 0; /* Just an empty slot */ | |
564 | + | |
485 | 565 | for (; cur_comp < last_comp; ++cur_comp, ++dev) { |
486 | 566 | struct exofs_per_dev_state *per_dev = &ios->per_dev[cur_comp]; |
487 | 567 | struct osd_request *or; |
... | ... | @@ -579,6 +659,9 @@ |
579 | 659 | struct osd_request *or; |
580 | 660 | struct exofs_per_dev_state *per_dev = &ios->per_dev[cur_comp]; |
581 | 661 | unsigned first_dev = (unsigned)ios->obj.id; |
662 | + | |
663 | + if (ios->pages && !per_dev->length) | |
664 | + return 0; /* Just an empty slot */ | |
582 | 665 | |
583 | 666 | first_dev = per_dev->dev + first_dev % ios->layout->mirrors_p1; |
584 | 667 | or = osd_start_request(exofs_ios_od(ios, first_dev), GFP_KERNEL); |
fs/exofs/super.c
... | ... | @@ -323,11 +323,7 @@ |
323 | 323 | sbi->data_map.odm_raid_algorithm = |
324 | 324 | le32_to_cpu(dt->dt_data_map.cb_raid_algorithm); |
325 | 325 | |
326 | -/* FIXME: Only raid0 !group_width/depth for now. if not so, do not mount */ | |
327 | - if (sbi->data_map.odm_group_width || sbi->data_map.odm_group_depth) { | |
328 | - EXOFS_ERR("Group width/depth not supported\n"); | |
329 | - return -EINVAL; | |
330 | - } | |
326 | +/* FIXME: Only raid0 for now. if not so, do not mount */ | |
331 | 327 | if (sbi->data_map.odm_num_comps != numdevs) { |
332 | 328 | EXOFS_ERR("odm_num_comps(%u) != numdevs(%u)\n", |
333 | 329 | sbi->data_map.odm_num_comps, numdevs); |
... | ... | @@ -343,14 +339,6 @@ |
343 | 339 | return -EINVAL; |
344 | 340 | } |
345 | 341 | |
346 | - stripe_length = sbi->data_map.odm_stripe_unit * | |
347 | - (numdevs / (sbi->data_map.odm_mirror_cnt + 1)); | |
348 | - if (stripe_length >= (1ULL << 32)) { | |
349 | - EXOFS_ERR("Total Stripe length(0x%llx)" | |
350 | - " >= 32bit is not supported\n", _LLU(stripe_length)); | |
351 | - return -EINVAL; | |
352 | - } | |
353 | - | |
354 | 342 | if (0 != (sbi->data_map.odm_stripe_unit & ~PAGE_MASK)) { |
355 | 343 | EXOFS_ERR("Stripe Unit(0x%llx)" |
356 | 344 | " must be Multples of PAGE_SIZE(0x%lx)\n", |
357 | 345 | |
358 | 346 | |
... | ... | @@ -360,9 +348,37 @@ |
360 | 348 | |
361 | 349 | sbi->layout.stripe_unit = sbi->data_map.odm_stripe_unit; |
362 | 350 | sbi->layout.mirrors_p1 = sbi->data_map.odm_mirror_cnt + 1; |
363 | - sbi->layout.group_width = sbi->data_map.odm_num_comps / | |
351 | + | |
352 | + if (sbi->data_map.odm_group_width) { | |
353 | + sbi->layout.group_width = sbi->data_map.odm_group_width; | |
354 | + sbi->layout.group_depth = sbi->data_map.odm_group_depth; | |
355 | + if (!sbi->layout.group_depth) { | |
356 | + EXOFS_ERR("group_depth == 0 && group_width != 0\n"); | |
357 | + return -EINVAL; | |
358 | + } | |
359 | + sbi->layout.group_count = sbi->data_map.odm_num_comps / | |
360 | + sbi->layout.mirrors_p1 / | |
361 | + sbi->data_map.odm_group_width; | |
362 | + } else { | |
363 | + if (sbi->data_map.odm_group_depth) { | |
364 | + printk(KERN_NOTICE "Warning: group_depth ignored " | |
365 | + "group_width == 0 && group_depth == %d\n", | |
366 | + sbi->data_map.odm_group_depth); | |
367 | + sbi->data_map.odm_group_depth = 0; | |
368 | + } | |
369 | + sbi->layout.group_width = sbi->data_map.odm_num_comps / | |
364 | 370 | sbi->layout.mirrors_p1; |
371 | + sbi->layout.group_depth = -1; | |
372 | + sbi->layout.group_count = 1; | |
373 | + } | |
365 | 374 | |
375 | + stripe_length = (u64)sbi->layout.group_width * sbi->layout.stripe_unit; | |
376 | + if (stripe_length >= (1ULL << 32)) { | |
377 | + EXOFS_ERR("Total Stripe length(0x%llx)" | |
378 | + " >= 32bit is not supported\n", _LLU(stripe_length)); | |
379 | + return -EINVAL; | |
380 | + } | |
381 | + | |
366 | 382 | return 0; |
367 | 383 | } |
368 | 384 | |
... | ... | @@ -540,6 +556,8 @@ |
540 | 556 | sbi->layout.stripe_unit = PAGE_SIZE; |
541 | 557 | sbi->layout.mirrors_p1 = 1; |
542 | 558 | sbi->layout.group_width = 1; |
559 | + sbi->layout.group_depth = -1; | |
560 | + sbi->layout.group_count = 1; | |
543 | 561 | sbi->layout.s_ods[0] = od; |
544 | 562 | sbi->layout.s_numdevs = 1; |
545 | 563 | sbi->layout.s_pid = opts->pid; |