Commit 50a76fd3c352ed2740eba01512efcfceee0703be

Authored by Boaz Harrosh
1 parent b367e78bd1

exofs: groups support

* _calc_stripe_info() changes to accommodate for grouping
  calculations. Returns additional information

* old _prepare_pages() becomes _prepare_one_group()
  which stores pages belonging to one device group.

* New _prepare_for_striping iterates on all groups calling
  _prepare_one_group().

* Enable mounting of groups data_maps (group_width != 0)

[QUESTION]
what is faster A or B;
A.	x += stride;
	x = x % width + first_x;

B	x += stride
	if (x < last_x)
		x = first_x;

Signed-off-by: Boaz Harrosh <bharrosh@panasas.com>

Showing 3 changed files with 141 additions and 37 deletions Side-by-side Diff

... ... @@ -63,6 +63,8 @@
63 63 unsigned mirrors_p1;
64 64  
65 65 unsigned group_width;
  66 + u64 group_depth;
  67 + unsigned group_count;
66 68  
67 69 enum exofs_inode_layout_gen_functions lay_func;
68 70  
... ... @@ -132,6 +134,7 @@
132 134 struct page **pages;
133 135 unsigned nr_pages;
134 136 unsigned pgbase;
  137 + unsigned pages_consumed;
135 138  
136 139 /* Attributes */
137 140 unsigned in_attr_len;
... ... @@ -262,25 +262,50 @@
262 262 /*
263 263 * L - logical offset into the file
264 264 *
265   - * U - The number of bytes in a full stripe
  265 + * U - The number of bytes in a stripe within a group
266 266 *
267 267 * U = stripe_unit * group_width
268 268 *
269   - * N - The stripe number
  269 + * T - The number of bytes striped within a group of component objects
  270 + * (before advancing to the next group)
270 271 *
271   - * N = L / U
  272 + * T = stripe_unit * group_width * group_depth
272 273 *
  274 + * S - The number of bytes striped across all component objects
  275 + * before the pattern repeats
  276 + *
  277 + * S = stripe_unit * group_width * group_depth * group_count
  278 + *
  279 + * M - The "major" (i.e., across all components) stripe number
  280 + *
  281 + * M = L / S
  282 + *
  283 + * G - Counts the groups from the beginning of the major stripe
  284 + *
  285 + * G = (L - (M * S)) / T [or (L % S) / T]
  286 + *
  287 + * H - The byte offset within the group
  288 + *
  289 + * H = (L - (M * S)) % T [or (L % S) % T]
  290 + *
  291 + * N - The "minor" (i.e., across the group) stripe number
  292 + *
  293 + * N = H / U
  294 + *
273 295 * C - The component index coresponding to L
274 296 *
275   - * C = (L - (N*U)) / stripe_unit
  297 + * C = (H - (N * U)) / stripe_unit + G * group_width
  298 + * [or (L % U) / stripe_unit + G * group_width]
276 299 *
277 300 * O - The component offset coresponding to L
278 301 *
279   - * (N*stripe_unit)+(L%stripe_unit)
  302 + * O = L % stripe_unit + N * stripe_unit + M * group_depth * stripe_unit
280 303 */
281   -
282 304 struct _striping_info {
283 305 u64 obj_offset;
  306 + u64 group_length;
  307 + u64 total_group_length;
  308 + u64 Major;
284 309 unsigned dev;
285 310 unsigned unit_off;
286 311 };
287 312  
288 313  
289 314  
290 315  
... ... @@ -290,15 +315,35 @@
290 315 {
291 316 u32 stripe_unit = ios->layout->stripe_unit;
292 317 u32 group_width = ios->layout->group_width;
  318 + u64 group_depth = ios->layout->group_depth;
  319 +
293 320 u32 U = stripe_unit * group_width;
  321 + u64 T = U * group_depth;
  322 + u64 S = T * ios->layout->group_count;
  323 + u64 M = div64_u64(file_offset, S);
294 324  
295   - u32 LmodU;
296   - u64 N = div_u64_rem(file_offset, U, &LmodU);
  325 + /*
  326 + G = (L - (M * S)) / T
  327 + H = (L - (M * S)) % T
  328 + */
  329 + u64 LmodS = file_offset - M * S;
  330 + u32 G = div64_u64(LmodS, T);
  331 + u64 H = LmodS - G * T;
297 332  
298   - si->unit_off = LmodU % stripe_unit;
299   - si->obj_offset = N * stripe_unit + si->unit_off;
300   - si->dev = LmodU / stripe_unit;
  333 + u32 N = div_u64(H, U);
  334 +
  335 + /* "H - (N * U)" is just "H % U" so it's bound to u32 */
  336 + si->dev = (u32)(H - (N * U)) / stripe_unit + G * group_width;
301 337 si->dev *= ios->layout->mirrors_p1;
  338 +
  339 + div_u64_rem(file_offset, stripe_unit, &si->unit_off);
  340 +
  341 + si->obj_offset = si->unit_off + (N * stripe_unit) +
  342 + (M * group_depth * stripe_unit);
  343 +
  344 + si->group_length = T - H;
  345 + si->total_group_length = T;
  346 + si->Major = M;
302 347 }
303 348  
304 349 static int _add_stripe_unit(struct exofs_io_state *ios, unsigned *cur_pg,
305 350  
306 351  
307 352  
... ... @@ -345,16 +390,17 @@
345 390 return 0;
346 391 }
347 392  
348   -static int _prepare_pages(struct exofs_io_state *ios,
349   - struct _striping_info *si)
  393 +static int _prepare_one_group(struct exofs_io_state *ios, u64 length,
  394 + struct _striping_info *si, unsigned first_comp)
350 395 {
351   - u64 length = ios->length;
352 396 unsigned stripe_unit = ios->layout->stripe_unit;
353 397 unsigned mirrors_p1 = ios->layout->mirrors_p1;
  398 + unsigned devs_in_group = ios->layout->group_width * mirrors_p1;
354 399 unsigned dev = si->dev;
355   - unsigned comp = 0;
356   - unsigned stripes = 0;
357   - unsigned cur_pg = 0;
  400 + unsigned first_dev = dev - (dev % devs_in_group);
  401 + unsigned comp = first_comp + (dev - first_dev);
  402 + unsigned max_comp = ios->numdevs ? ios->numdevs - mirrors_p1 : 0;
  403 + unsigned cur_pg = ios->pages_consumed;
358 404 int ret = 0;
359 405  
360 406 while (length) {
361 407  
... ... @@ -377,10 +423,11 @@
377 423 cur_len = stripe_unit;
378 424 }
379 425  
380   - stripes++;
  426 + if (max_comp < comp)
  427 + max_comp = comp;
381 428  
382 429 dev += mirrors_p1;
383   - dev %= ios->layout->s_numdevs;
  430 + dev = (dev % devs_in_group) + first_dev;
384 431 } else {
385 432 cur_len = stripe_unit;
386 433 }
387 434  
388 435  
389 436  
... ... @@ -393,18 +440,24 @@
393 440 goto out;
394 441  
395 442 comp += mirrors_p1;
396   - comp %= ios->layout->s_numdevs;
  443 + comp = (comp % devs_in_group) + first_comp;
397 444  
398 445 length -= cur_len;
399 446 }
400 447 out:
401   - ios->numdevs = stripes * mirrors_p1;
  448 + ios->numdevs = max_comp + mirrors_p1;
  449 + ios->pages_consumed = cur_pg;
402 450 return ret;
403 451 }
404 452  
405 453 static int _prepare_for_striping(struct exofs_io_state *ios)
406 454 {
  455 + u64 length = ios->length;
407 456 struct _striping_info si;
  457 + unsigned devs_in_group = ios->layout->group_width *
  458 + ios->layout->mirrors_p1;
  459 + unsigned first_comp = 0;
  460 + int ret = 0;
408 461  
409 462 _calc_stripe_info(ios, ios->offset, &si);
410 463  
... ... @@ -424,7 +477,31 @@
424 477 return 0;
425 478 }
426 479  
427   - return _prepare_pages(ios, &si);
  480 + while (length) {
  481 + if (length < si.group_length)
  482 + si.group_length = length;
  483 +
  484 + ret = _prepare_one_group(ios, si.group_length, &si, first_comp);
  485 + if (unlikely(ret))
  486 + goto out;
  487 +
  488 + length -= si.group_length;
  489 +
  490 + si.group_length = si.total_group_length;
  491 + si.unit_off = 0;
  492 + ++si.Major;
  493 + si.obj_offset = si.Major * ios->layout->stripe_unit *
  494 + ios->layout->group_depth;
  495 +
  496 + si.dev = (si.dev - (si.dev % devs_in_group)) + devs_in_group;
  497 + si.dev %= ios->layout->s_numdevs;
  498 +
  499 + first_comp += devs_in_group;
  500 + first_comp %= ios->layout->s_numdevs;
  501 + }
  502 +
  503 +out:
  504 + return ret;
428 505 }
429 506  
430 507 int exofs_sbi_create(struct exofs_io_state *ios)
... ... @@ -482,6 +559,9 @@
482 559 unsigned last_comp = cur_comp + ios->layout->mirrors_p1;
483 560 int ret = 0;
484 561  
  562 + if (ios->pages && !master_dev->length)
  563 + return 0; /* Just an empty slot */
  564 +
485 565 for (; cur_comp < last_comp; ++cur_comp, ++dev) {
486 566 struct exofs_per_dev_state *per_dev = &ios->per_dev[cur_comp];
487 567 struct osd_request *or;
... ... @@ -579,6 +659,9 @@
579 659 struct osd_request *or;
580 660 struct exofs_per_dev_state *per_dev = &ios->per_dev[cur_comp];
581 661 unsigned first_dev = (unsigned)ios->obj.id;
  662 +
  663 + if (ios->pages && !per_dev->length)
  664 + return 0; /* Just an empty slot */
582 665  
583 666 first_dev = per_dev->dev + first_dev % ios->layout->mirrors_p1;
584 667 or = osd_start_request(exofs_ios_od(ios, first_dev), GFP_KERNEL);
... ... @@ -323,11 +323,7 @@
323 323 sbi->data_map.odm_raid_algorithm =
324 324 le32_to_cpu(dt->dt_data_map.cb_raid_algorithm);
325 325  
326   -/* FIXME: Only raid0 !group_width/depth for now. if not so, do not mount */
327   - if (sbi->data_map.odm_group_width || sbi->data_map.odm_group_depth) {
328   - EXOFS_ERR("Group width/depth not supported\n");
329   - return -EINVAL;
330   - }
  326 +/* FIXME: Only raid0 for now. if not so, do not mount */
331 327 if (sbi->data_map.odm_num_comps != numdevs) {
332 328 EXOFS_ERR("odm_num_comps(%u) != numdevs(%u)\n",
333 329 sbi->data_map.odm_num_comps, numdevs);
... ... @@ -343,14 +339,6 @@
343 339 return -EINVAL;
344 340 }
345 341  
346   - stripe_length = sbi->data_map.odm_stripe_unit *
347   - (numdevs / (sbi->data_map.odm_mirror_cnt + 1));
348   - if (stripe_length >= (1ULL << 32)) {
349   - EXOFS_ERR("Total Stripe length(0x%llx)"
350   - " >= 32bit is not supported\n", _LLU(stripe_length));
351   - return -EINVAL;
352   - }
353   -
354 342 if (0 != (sbi->data_map.odm_stripe_unit & ~PAGE_MASK)) {
355 343 EXOFS_ERR("Stripe Unit(0x%llx)"
356 344 " must be Multples of PAGE_SIZE(0x%lx)\n",
357 345  
358 346  
... ... @@ -360,9 +348,37 @@
360 348  
361 349 sbi->layout.stripe_unit = sbi->data_map.odm_stripe_unit;
362 350 sbi->layout.mirrors_p1 = sbi->data_map.odm_mirror_cnt + 1;
363   - sbi->layout.group_width = sbi->data_map.odm_num_comps /
  351 +
  352 + if (sbi->data_map.odm_group_width) {
  353 + sbi->layout.group_width = sbi->data_map.odm_group_width;
  354 + sbi->layout.group_depth = sbi->data_map.odm_group_depth;
  355 + if (!sbi->layout.group_depth) {
  356 + EXOFS_ERR("group_depth == 0 && group_width != 0\n");
  357 + return -EINVAL;
  358 + }
  359 + sbi->layout.group_count = sbi->data_map.odm_num_comps /
  360 + sbi->layout.mirrors_p1 /
  361 + sbi->data_map.odm_group_width;
  362 + } else {
  363 + if (sbi->data_map.odm_group_depth) {
  364 + printk(KERN_NOTICE "Warning: group_depth ignored "
  365 + "group_width == 0 && group_depth == %d\n",
  366 + sbi->data_map.odm_group_depth);
  367 + sbi->data_map.odm_group_depth = 0;
  368 + }
  369 + sbi->layout.group_width = sbi->data_map.odm_num_comps /
364 370 sbi->layout.mirrors_p1;
  371 + sbi->layout.group_depth = -1;
  372 + sbi->layout.group_count = 1;
  373 + }
365 374  
  375 + stripe_length = (u64)sbi->layout.group_width * sbi->layout.stripe_unit;
  376 + if (stripe_length >= (1ULL << 32)) {
  377 + EXOFS_ERR("Total Stripe length(0x%llx)"
  378 + " >= 32bit is not supported\n", _LLU(stripe_length));
  379 + return -EINVAL;
  380 + }
  381 +
366 382 return 0;
367 383 }
368 384  
... ... @@ -540,6 +556,8 @@
540 556 sbi->layout.stripe_unit = PAGE_SIZE;
541 557 sbi->layout.mirrors_p1 = 1;
542 558 sbi->layout.group_width = 1;
  559 + sbi->layout.group_depth = -1;
  560 + sbi->layout.group_count = 1;
543 561 sbi->layout.s_ods[0] = od;
544 562 sbi->layout.s_numdevs = 1;
545 563 sbi->layout.s_pid = opts->pid;