Commit 81744ee44ab2845c16ffd7d6f762f7b4a49a4750
Committed by
Jens Axboe
1 parent
2f7a2d89a8
Exists in
master
and in
7 other branches
block: Fix incorrect alignment offset reporting and update documentation
queue_sector_alignment_offset returned the wrong value which caused partitions to report an incorrect alignment_offset. Since offset alignment calculation is needed several places it has been split into a separate helper function. The topology stacking function has been updated accordingly. Furthermore, comments have been added to clarify how the stacking function works. Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com> Tested-by: Mike Snitzer <snitzer@redhat.com> Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
Showing 2 changed files with 42 additions and 13 deletions Side-by-side Diff
block/blk-settings.c
... | ... | @@ -505,20 +505,30 @@ |
505 | 505 | |
506 | 506 | /** |
507 | 507 | * blk_stack_limits - adjust queue_limits for stacked devices |
508 | - * @t: the stacking driver limits (top) | |
509 | - * @b: the underlying queue limits (bottom) | |
508 | + * @t: the stacking driver limits (top device) | |
509 | + * @b: the underlying queue limits (bottom, component device) | |
510 | 510 | * @offset: offset to beginning of data within component device |
511 | 511 | * |
512 | 512 | * Description: |
513 | - * Merges two queue_limit structs. Returns 0 if alignment didn't | |
514 | - * change. Returns -1 if adding the bottom device caused | |
515 | - * misalignment. | |
513 | + * This function is used by stacking drivers like MD and DM to ensure | |
514 | + * that all component devices have compatible block sizes and | |
515 | + * alignments. The stacking driver must provide a queue_limits | |
516 | + * struct (top) and then iteratively call the stacking function for | |
517 | + * all component (bottom) devices. The stacking function will | |
518 | + * attempt to combine the values and ensure proper alignment. | |
519 | + * | |
520 | + * Returns 0 if the top and bottom queue_limits are compatible. The | |
521 | + * top device's block sizes and alignment offsets may be adjusted to | |
522 | + * ensure alignment with the bottom device. If no compatible sizes | |
523 | + * and alignments exist, -1 is returned and the resulting top | |
524 | + * queue_limits will have the misaligned flag set to indicate that | |
525 | + * the alignment_offset is undefined. | |
516 | 526 | */ |
517 | 527 | int blk_stack_limits(struct queue_limits *t, struct queue_limits *b, |
518 | 528 | sector_t offset) |
519 | 529 | { |
520 | 530 | sector_t alignment; |
521 | - unsigned int top, bottom, granularity; | |
531 | + unsigned int top, bottom; | |
522 | 532 | |
523 | 533 | t->max_sectors = min_not_zero(t->max_sectors, b->max_sectors); |
524 | 534 | t->max_hw_sectors = min_not_zero(t->max_hw_sectors, b->max_hw_sectors); |
525 | 535 | |
526 | 536 | |
527 | 537 | |
... | ... | @@ -536,15 +546,18 @@ |
536 | 546 | t->max_segment_size = min_not_zero(t->max_segment_size, |
537 | 547 | b->max_segment_size); |
538 | 548 | |
539 | - granularity = max(b->physical_block_size, b->io_min); | |
540 | - alignment = b->alignment_offset - (offset & (granularity - 1)); | |
549 | + alignment = queue_limit_alignment_offset(b, offset); | |
541 | 550 | |
551 | + /* Bottom device has different alignment. Check that it is | |
552 | + * compatible with the current top alignment. | |
553 | + */ | |
542 | 554 | if (t->alignment_offset != alignment) { |
543 | 555 | |
544 | 556 | top = max(t->physical_block_size, t->io_min) |
545 | 557 | + t->alignment_offset; |
546 | - bottom = granularity + alignment; | |
558 | + bottom = max(b->physical_block_size, b->io_min) + alignment; | |
547 | 559 | |
560 | + /* Verify that top and bottom intervals line up */ | |
548 | 561 | if (max(top, bottom) & (min(top, bottom) - 1)) |
549 | 562 | t->misaligned = 1; |
550 | 563 | } |
551 | 564 | |
552 | 565 | |
553 | 566 | |
554 | 567 | |
555 | 568 | |
556 | 569 | |
... | ... | @@ -561,32 +574,39 @@ |
561 | 574 | t->no_cluster |= b->no_cluster; |
562 | 575 | t->discard_zeroes_data &= b->discard_zeroes_data; |
563 | 576 | |
577 | + /* Physical block size a multiple of the logical block size? */ | |
564 | 578 | if (t->physical_block_size & (t->logical_block_size - 1)) { |
565 | 579 | t->physical_block_size = t->logical_block_size; |
566 | 580 | t->misaligned = 1; |
567 | 581 | } |
568 | 582 | |
583 | + /* Minimum I/O a multiple of the physical block size? */ | |
569 | 584 | if (t->io_min & (t->physical_block_size - 1)) { |
570 | 585 | t->io_min = t->physical_block_size; |
571 | 586 | t->misaligned = 1; |
572 | 587 | } |
573 | 588 | |
589 | + /* Optimal I/O a multiple of the physical block size? */ | |
574 | 590 | if (t->io_opt & (t->physical_block_size - 1)) { |
575 | 591 | t->io_opt = 0; |
576 | 592 | t->misaligned = 1; |
577 | 593 | } |
578 | 594 | |
595 | + /* Find lowest common alignment_offset */ | |
579 | 596 | t->alignment_offset = lcm(t->alignment_offset, alignment) |
580 | 597 | & (max(t->physical_block_size, t->io_min) - 1); |
581 | 598 | |
599 | + /* Verify that new alignment_offset is on a logical block boundary */ | |
582 | 600 | if (t->alignment_offset & (t->logical_block_size - 1)) |
583 | 601 | t->misaligned = 1; |
584 | 602 | |
585 | 603 | /* Discard alignment and granularity */ |
586 | 604 | if (b->discard_granularity) { |
605 | + unsigned int granularity = b->discard_granularity; | |
606 | + offset &= granularity - 1; | |
587 | 607 | |
588 | - alignment = b->discard_alignment - | |
589 | - (offset & (b->discard_granularity - 1)); | |
608 | + alignment = (granularity + b->discard_alignment - offset) | |
609 | + & (granularity - 1); | |
590 | 610 | |
591 | 611 | if (t->discard_granularity != 0 && |
592 | 612 | t->discard_alignment != alignment) { |
... | ... | @@ -598,6 +618,8 @@ |
598 | 618 | t->discard_misaligned = 1; |
599 | 619 | } |
600 | 620 | |
621 | + t->max_discard_sectors = min_not_zero(t->max_discard_sectors, | |
622 | + b->max_discard_sectors); | |
601 | 623 | t->discard_granularity = max(t->discard_granularity, |
602 | 624 | b->discard_granularity); |
603 | 625 | t->discard_alignment = lcm(t->discard_alignment, alignment) & |
include/linux/blkdev.h
... | ... | @@ -1116,11 +1116,18 @@ |
1116 | 1116 | return q->limits.alignment_offset; |
1117 | 1117 | } |
1118 | 1118 | |
1119 | +static inline int queue_limit_alignment_offset(struct queue_limits *lim, sector_t offset) | |
1120 | +{ | |
1121 | + unsigned int granularity = max(lim->physical_block_size, lim->io_min); | |
1122 | + | |
1123 | + offset &= granularity - 1; | |
1124 | + return (granularity + lim->alignment_offset - offset) & (granularity - 1); | |
1125 | +} | |
1126 | + | |
1119 | 1127 | static inline int queue_sector_alignment_offset(struct request_queue *q, |
1120 | 1128 | sector_t sector) |
1121 | 1129 | { |
1122 | - return ((sector << 9) - q->limits.alignment_offset) | |
1123 | - & (q->limits.io_min - 1); | |
1130 | + return queue_limit_alignment_offset(&q->limits, sector << 9); | |
1124 | 1131 | } |
1125 | 1132 | |
1126 | 1133 | static inline int bdev_alignment_offset(struct block_device *bdev) |