Commit 55f2b8bdb0c7387eb2dc645b9ecbe5d0faa6b54e
Committed by
Alasdair G Kergon
1 parent
33d07c0dfa
Exists in
master
and in
20 other branches
dm thin: support for non power of 2 pool blocksize
Non power of 2 blocksize support is needed to properly align thinp IO on storage that has non power of 2 optimal IO sizes (e.g. RAID6 10+2). Use sector_div to support non power of 2 blocksize for the pool's data device. This provides comparable performance to the power of 2 math that was performed until now (as tested on modern x86_64 hardware). The kernel currently assumes that limits->discard_granularity is a power of two so the thin target only enables discard support if the block size is a power of two. Eliminate pool structure's 'block_shift', 'offset_mask' and remaining 4 byte holes. Signed-off-by: Mike Snitzer <snitzer@redhat.com> Signed-off-by: Mikulas Patocka <mpatocka@redhat.com> Signed-off-by: Alasdair G Kergon <agk@redhat.com>
Showing 1 changed file with 37 additions and 22 deletions Side-by-side Diff
drivers/md/dm-thin.c
... | ... | @@ -510,10 +510,8 @@ |
510 | 510 | struct block_device *md_dev; |
511 | 511 | struct dm_pool_metadata *pmd; |
512 | 512 | |
513 | - uint32_t sectors_per_block; | |
514 | - unsigned block_shift; | |
515 | - dm_block_t offset_mask; | |
516 | 513 | dm_block_t low_water_blocks; |
514 | + uint32_t sectors_per_block; | |
517 | 515 | |
518 | 516 | struct pool_features pf; |
519 | 517 | unsigned low_water_triggered:1; /* A dm event has been sent */ |
520 | 518 | |
... | ... | @@ -526,8 +524,8 @@ |
526 | 524 | struct work_struct worker; |
527 | 525 | struct delayed_work waker; |
528 | 526 | |
529 | - unsigned ref_count; | |
530 | 527 | unsigned long last_commit_jiffies; |
528 | + unsigned ref_count; | |
531 | 529 | |
532 | 530 | spinlock_t lock; |
533 | 531 | struct bio_list deferred_bios; |
534 | 532 | |
535 | 533 | |
... | ... | @@ -679,16 +677,21 @@ |
679 | 677 | |
680 | 678 | static dm_block_t get_bio_block(struct thin_c *tc, struct bio *bio) |
681 | 679 | { |
682 | - return bio->bi_sector >> tc->pool->block_shift; | |
680 | + sector_t block_nr = bio->bi_sector; | |
681 | + | |
682 | + (void) sector_div(block_nr, tc->pool->sectors_per_block); | |
683 | + | |
684 | + return block_nr; | |
683 | 685 | } |
684 | 686 | |
685 | 687 | static void remap(struct thin_c *tc, struct bio *bio, dm_block_t block) |
686 | 688 | { |
687 | 689 | struct pool *pool = tc->pool; |
690 | + sector_t bi_sector = bio->bi_sector; | |
688 | 691 | |
689 | 692 | bio->bi_bdev = tc->pool_dev->bdev; |
690 | - bio->bi_sector = (block << pool->block_shift) + | |
691 | - (bio->bi_sector & pool->offset_mask); | |
693 | + bio->bi_sector = (block * pool->sectors_per_block) + | |
694 | + sector_div(bi_sector, pool->sectors_per_block); | |
692 | 695 | } |
693 | 696 | |
694 | 697 | static void remap_to_origin(struct thin_c *tc, struct bio *bio) |
695 | 698 | |
... | ... | @@ -933,9 +936,10 @@ |
933 | 936 | */ |
934 | 937 | static int io_overlaps_block(struct pool *pool, struct bio *bio) |
935 | 938 | { |
936 | - return !(bio->bi_sector & pool->offset_mask) && | |
937 | - (bio->bi_size == (pool->sectors_per_block << SECTOR_SHIFT)); | |
939 | + sector_t bi_sector = bio->bi_sector; | |
938 | 940 | |
941 | + return !sector_div(bi_sector, pool->sectors_per_block) && | |
942 | + (bio->bi_size == (pool->sectors_per_block << SECTOR_SHIFT)); | |
939 | 943 | } |
940 | 944 | |
941 | 945 | static int io_overwrites_block(struct pool *pool, struct bio *bio) |
... | ... | @@ -1239,8 +1243,8 @@ |
1239 | 1243 | * part of the discard that is in a subsequent |
1240 | 1244 | * block. |
1241 | 1245 | */ |
1242 | - sector_t offset = bio->bi_sector - (block << pool->block_shift); | |
1243 | - unsigned remaining = (pool->sectors_per_block - offset) << 9; | |
1246 | + sector_t offset = bio->bi_sector - (block * pool->sectors_per_block); | |
1247 | + unsigned remaining = (pool->sectors_per_block - offset) << SECTOR_SHIFT; | |
1244 | 1248 | bio->bi_size = min(bio->bi_size, remaining); |
1245 | 1249 | |
1246 | 1250 | cell_release_singleton(cell, bio); |
... | ... | @@ -1722,8 +1726,6 @@ |
1722 | 1726 | |
1723 | 1727 | pool->pmd = pmd; |
1724 | 1728 | pool->sectors_per_block = block_size; |
1725 | - pool->block_shift = ffs(block_size) - 1; | |
1726 | - pool->offset_mask = block_size - 1; | |
1727 | 1729 | pool->low_water_blocks = 0; |
1728 | 1730 | pool_features_init(&pool->pf); |
1729 | 1731 | pool->prison = prison_create(PRISON_CELLS); |
... | ... | @@ -1971,7 +1973,7 @@ |
1971 | 1973 | if (kstrtoul(argv[2], 10, &block_size) || !block_size || |
1972 | 1974 | block_size < DATA_DEV_BLOCK_SIZE_MIN_SECTORS || |
1973 | 1975 | block_size > DATA_DEV_BLOCK_SIZE_MAX_SECTORS || |
1974 | - !is_power_of_2(block_size)) { | |
1976 | + block_size & (DATA_DEV_BLOCK_SIZE_MIN_SECTORS - 1)) { | |
1975 | 1977 | ti->error = "Invalid block size"; |
1976 | 1978 | r = -EINVAL; |
1977 | 1979 | goto out; |
... | ... | @@ -2018,6 +2020,15 @@ |
2018 | 2020 | goto out_flags_changed; |
2019 | 2021 | } |
2020 | 2022 | |
2023 | + /* | |
2024 | + * The block layer requires discard_granularity to be a power of 2. | |
2025 | + */ | |
2026 | + if (pf.discard_enabled && !is_power_of_2(block_size)) { | |
2027 | + ti->error = "Discard support must be disabled when the block size is not a power of 2"; | |
2028 | + r = -EINVAL; | |
2029 | + goto out_flags_changed; | |
2030 | + } | |
2031 | + | |
2021 | 2032 | pt->pool = pool; |
2022 | 2033 | pt->ti = ti; |
2023 | 2034 | pt->metadata_dev = metadata_dev; |
... | ... | @@ -2097,7 +2108,8 @@ |
2097 | 2108 | int r; |
2098 | 2109 | struct pool_c *pt = ti->private; |
2099 | 2110 | struct pool *pool = pt->pool; |
2100 | - dm_block_t data_size, sb_data_size; | |
2111 | + sector_t data_size = ti->len; | |
2112 | + dm_block_t sb_data_size; | |
2101 | 2113 | |
2102 | 2114 | /* |
2103 | 2115 | * Take control of the pool object. |
... | ... | @@ -2106,7 +2118,8 @@ |
2106 | 2118 | if (r) |
2107 | 2119 | return r; |
2108 | 2120 | |
2109 | - data_size = ti->len >> pool->block_shift; | |
2121 | + (void) sector_div(data_size, pool->sectors_per_block); | |
2122 | + | |
2110 | 2123 | r = dm_pool_get_data_dev_size(pool->pmd, &sb_data_size); |
2111 | 2124 | if (r) { |
2112 | 2125 | DMERR("failed to retrieve data device size"); |
... | ... | @@ -2115,7 +2128,7 @@ |
2115 | 2128 | |
2116 | 2129 | if (data_size < sb_data_size) { |
2117 | 2130 | DMERR("pool target too small, is %llu blocks (expected %llu)", |
2118 | - data_size, sb_data_size); | |
2131 | + (unsigned long long)data_size, sb_data_size); | |
2119 | 2132 | return -EINVAL; |
2120 | 2133 | |
2121 | 2134 | } else if (data_size > sb_data_size) { |
2122 | 2135 | |
2123 | 2136 | |
2124 | 2137 | |
2125 | 2138 | |
... | ... | @@ -2764,19 +2777,21 @@ |
2764 | 2777 | static int thin_iterate_devices(struct dm_target *ti, |
2765 | 2778 | iterate_devices_callout_fn fn, void *data) |
2766 | 2779 | { |
2767 | - dm_block_t blocks; | |
2780 | + sector_t blocks; | |
2768 | 2781 | struct thin_c *tc = ti->private; |
2782 | + struct pool *pool = tc->pool; | |
2769 | 2783 | |
2770 | 2784 | /* |
2771 | 2785 | * We can't call dm_pool_get_data_dev_size() since that blocks. So |
2772 | 2786 | * we follow a more convoluted path through to the pool's target. |
2773 | 2787 | */ |
2774 | - if (!tc->pool->ti) | |
2788 | + if (!pool->ti) | |
2775 | 2789 | return 0; /* nothing is bound */ |
2776 | 2790 | |
2777 | - blocks = tc->pool->ti->len >> tc->pool->block_shift; | |
2791 | + blocks = pool->ti->len; | |
2792 | + (void) sector_div(blocks, pool->sectors_per_block); | |
2778 | 2793 | if (blocks) |
2779 | - return fn(ti, tc->pool_dev, 0, tc->pool->sectors_per_block * blocks, data); | |
2794 | + return fn(ti, tc->pool_dev, 0, pool->sectors_per_block * blocks, data); | |
2780 | 2795 | |
2781 | 2796 | return 0; |
2782 | 2797 | } |
... | ... | @@ -2793,7 +2808,7 @@ |
2793 | 2808 | |
2794 | 2809 | static struct target_type thin_target = { |
2795 | 2810 | .name = "thin", |
2796 | - .version = {1, 1, 0}, | |
2811 | + .version = {1, 2, 0}, | |
2797 | 2812 | .module = THIS_MODULE, |
2798 | 2813 | .ctr = thin_ctr, |
2799 | 2814 | .dtr = thin_dtr, |