Commit 55f2b8bdb0c7387eb2dc645b9ecbe5d0faa6b54e

Authored by Mike Snitzer
Committed by Alasdair G Kergon
1 parent 33d07c0dfa

dm thin: support for non power of 2 pool blocksize

Non power of 2 blocksize support is needed to properly align thinp IO
on storage that has non power of 2 optimal IO sizes (e.g. RAID6 10+2).

Use sector_div to support non power of 2 blocksize for the pool's
data device.  This provides comparable performance to the power of 2
math that was performed until now (as tested on modern x86_64 hardware).

The kernel currently assumes that limits->discard_granularity is a power
of two so the thin target only enables discard support if the block
size is a power of two.

Eliminate pool structure's 'block_shift', 'offset_mask' and
remaining 4 byte holes.

Signed-off-by: Mike Snitzer <snitzer@redhat.com>
Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>

Showing 1 changed file with 37 additions and 22 deletions Side-by-side Diff

drivers/md/dm-thin.c
... ... @@ -510,10 +510,8 @@
510 510 struct block_device *md_dev;
511 511 struct dm_pool_metadata *pmd;
512 512  
513   - uint32_t sectors_per_block;
514   - unsigned block_shift;
515   - dm_block_t offset_mask;
516 513 dm_block_t low_water_blocks;
  514 + uint32_t sectors_per_block;
517 515  
518 516 struct pool_features pf;
519 517 unsigned low_water_triggered:1; /* A dm event has been sent */
520 518  
... ... @@ -526,8 +524,8 @@
526 524 struct work_struct worker;
527 525 struct delayed_work waker;
528 526  
529   - unsigned ref_count;
530 527 unsigned long last_commit_jiffies;
  528 + unsigned ref_count;
531 529  
532 530 spinlock_t lock;
533 531 struct bio_list deferred_bios;
534 532  
535 533  
... ... @@ -679,16 +677,21 @@
679 677  
680 678 static dm_block_t get_bio_block(struct thin_c *tc, struct bio *bio)
681 679 {
682   - return bio->bi_sector >> tc->pool->block_shift;
  680 + sector_t block_nr = bio->bi_sector;
  681 +
  682 + (void) sector_div(block_nr, tc->pool->sectors_per_block);
  683 +
  684 + return block_nr;
683 685 }
684 686  
685 687 static void remap(struct thin_c *tc, struct bio *bio, dm_block_t block)
686 688 {
687 689 struct pool *pool = tc->pool;
  690 + sector_t bi_sector = bio->bi_sector;
688 691  
689 692 bio->bi_bdev = tc->pool_dev->bdev;
690   - bio->bi_sector = (block << pool->block_shift) +
691   - (bio->bi_sector & pool->offset_mask);
  693 + bio->bi_sector = (block * pool->sectors_per_block) +
  694 + sector_div(bi_sector, pool->sectors_per_block);
692 695 }
693 696  
694 697 static void remap_to_origin(struct thin_c *tc, struct bio *bio)
695 698  
... ... @@ -933,9 +936,10 @@
933 936 */
934 937 static int io_overlaps_block(struct pool *pool, struct bio *bio)
935 938 {
936   - return !(bio->bi_sector & pool->offset_mask) &&
937   - (bio->bi_size == (pool->sectors_per_block << SECTOR_SHIFT));
  939 + sector_t bi_sector = bio->bi_sector;
938 940  
  941 + return !sector_div(bi_sector, pool->sectors_per_block) &&
  942 + (bio->bi_size == (pool->sectors_per_block << SECTOR_SHIFT));
939 943 }
940 944  
941 945 static int io_overwrites_block(struct pool *pool, struct bio *bio)
... ... @@ -1239,8 +1243,8 @@
1239 1243 * part of the discard that is in a subsequent
1240 1244 * block.
1241 1245 */
1242   - sector_t offset = bio->bi_sector - (block << pool->block_shift);
1243   - unsigned remaining = (pool->sectors_per_block - offset) << 9;
  1246 + sector_t offset = bio->bi_sector - (block * pool->sectors_per_block);
  1247 + unsigned remaining = (pool->sectors_per_block - offset) << SECTOR_SHIFT;
1244 1248 bio->bi_size = min(bio->bi_size, remaining);
1245 1249  
1246 1250 cell_release_singleton(cell, bio);
... ... @@ -1722,8 +1726,6 @@
1722 1726  
1723 1727 pool->pmd = pmd;
1724 1728 pool->sectors_per_block = block_size;
1725   - pool->block_shift = ffs(block_size) - 1;
1726   - pool->offset_mask = block_size - 1;
1727 1729 pool->low_water_blocks = 0;
1728 1730 pool_features_init(&pool->pf);
1729 1731 pool->prison = prison_create(PRISON_CELLS);
... ... @@ -1971,7 +1973,7 @@
1971 1973 if (kstrtoul(argv[2], 10, &block_size) || !block_size ||
1972 1974 block_size < DATA_DEV_BLOCK_SIZE_MIN_SECTORS ||
1973 1975 block_size > DATA_DEV_BLOCK_SIZE_MAX_SECTORS ||
1974   - !is_power_of_2(block_size)) {
  1976 + block_size & (DATA_DEV_BLOCK_SIZE_MIN_SECTORS - 1)) {
1975 1977 ti->error = "Invalid block size";
1976 1978 r = -EINVAL;
1977 1979 goto out;
... ... @@ -2018,6 +2020,15 @@
2018 2020 goto out_flags_changed;
2019 2021 }
2020 2022  
  2023 + /*
  2024 + * The block layer requires discard_granularity to be a power of 2.
  2025 + */
  2026 + if (pf.discard_enabled && !is_power_of_2(block_size)) {
  2027 + ti->error = "Discard support must be disabled when the block size is not a power of 2";
  2028 + r = -EINVAL;
  2029 + goto out_flags_changed;
  2030 + }
  2031 +
2021 2032 pt->pool = pool;
2022 2033 pt->ti = ti;
2023 2034 pt->metadata_dev = metadata_dev;
... ... @@ -2097,7 +2108,8 @@
2097 2108 int r;
2098 2109 struct pool_c *pt = ti->private;
2099 2110 struct pool *pool = pt->pool;
2100   - dm_block_t data_size, sb_data_size;
  2111 + sector_t data_size = ti->len;
  2112 + dm_block_t sb_data_size;
2101 2113  
2102 2114 /*
2103 2115 * Take control of the pool object.
... ... @@ -2106,7 +2118,8 @@
2106 2118 if (r)
2107 2119 return r;
2108 2120  
2109   - data_size = ti->len >> pool->block_shift;
  2121 + (void) sector_div(data_size, pool->sectors_per_block);
  2122 +
2110 2123 r = dm_pool_get_data_dev_size(pool->pmd, &sb_data_size);
2111 2124 if (r) {
2112 2125 DMERR("failed to retrieve data device size");
... ... @@ -2115,7 +2128,7 @@
2115 2128  
2116 2129 if (data_size < sb_data_size) {
2117 2130 DMERR("pool target too small, is %llu blocks (expected %llu)",
2118   - data_size, sb_data_size);
  2131 + (unsigned long long)data_size, sb_data_size);
2119 2132 return -EINVAL;
2120 2133  
2121 2134 } else if (data_size > sb_data_size) {
2122 2135  
2123 2136  
2124 2137  
2125 2138  
... ... @@ -2764,19 +2777,21 @@
2764 2777 static int thin_iterate_devices(struct dm_target *ti,
2765 2778 iterate_devices_callout_fn fn, void *data)
2766 2779 {
2767   - dm_block_t blocks;
  2780 + sector_t blocks;
2768 2781 struct thin_c *tc = ti->private;
  2782 + struct pool *pool = tc->pool;
2769 2783  
2770 2784 /*
2771 2785 * We can't call dm_pool_get_data_dev_size() since that blocks. So
2772 2786 * we follow a more convoluted path through to the pool's target.
2773 2787 */
2774   - if (!tc->pool->ti)
  2788 + if (!pool->ti)
2775 2789 return 0; /* nothing is bound */
2776 2790  
2777   - blocks = tc->pool->ti->len >> tc->pool->block_shift;
  2791 + blocks = pool->ti->len;
  2792 + (void) sector_div(blocks, pool->sectors_per_block);
2778 2793 if (blocks)
2779   - return fn(ti, tc->pool_dev, 0, tc->pool->sectors_per_block * blocks, data);
  2794 + return fn(ti, tc->pool_dev, 0, pool->sectors_per_block * blocks, data);
2780 2795  
2781 2796 return 0;
2782 2797 }
... ... @@ -2793,7 +2808,7 @@
2793 2808  
2794 2809 static struct target_type thin_target = {
2795 2810 .name = "thin",
2796   - .version = {1, 1, 0},
  2811 + .version = {1, 2, 0},
2797 2812 .module = THIS_MODULE,
2798 2813 .ctr = thin_ctr,
2799 2814 .dtr = thin_dtr,