Commit d5b9dd04bd74b774b8e8d93ced7a0d15ad403fa9

Authored by Mikulas Patocka
Committed by Alasdair G Kergon
1 parent 0864901254

dm: ignore merge_bvec for snapshots when safe

Add a new flag DMF_MERGE_IS_OPTIONAL to struct mapped_device to indicate
whether the device can accept bios larger than the size its merge
function returns.  When set, use this to send large bios to snapshots
which can split them if necessary.  Snapshot I/O may be significantly
fragmented and this approach seems to improve peformance.

Before the patch, dm_set_device_limits restricted bio size to page size
if the underlying device had a merge function and the target didn't
provide a merge function.  After the patch, dm_set_device_limits
restricts bio size to page size if the underlying device has a merge
function, doesn't have DMF_MERGE_IS_OPTIONAL flag and the target doesn't
provide a merge function.

The snapshot target can't provide a merge function because when the merge
function is called, it is impossible to determine where the bio will be
remapped.  Previously this led us to impose a 4k limit, which we can
now remove if the snapshot store is located on a device without a merge
function.  Together with another patch for optimizing full chunk writes,
it improves performance from 29MB/s to 40MB/s when writing to the
filesystem on snapshot store.

If the snapshot store is placed on a non-dm device with a merge function
(such as md-raid), device mapper still limits all bios to page size.

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>

Showing 3 changed files with 64 additions and 2 deletions Side-by-side Diff

drivers/md/dm-table.c
... ... @@ -540,8 +540,7 @@
540 540 * If not we'll force DM to use PAGE_SIZE or
541 541 * smaller I/O, just to be safe.
542 542 */
543   -
544   - if (q->merge_bvec_fn && !ti->type->merge)
  543 + if (dm_queue_merge_is_compulsory(q) && !ti->type->merge)
545 544 blk_limits_max_hw_sectors(limits,
546 545 (unsigned int) (PAGE_SIZE >> 9));
547 546 return 0;
... ... @@ -111,6 +111,7 @@
111 111 #define DMF_FREEING 3
112 112 #define DMF_DELETING 4
113 113 #define DMF_NOFLUSH_SUSPENDING 5
  114 +#define DMF_MERGE_IS_OPTIONAL 6
114 115  
115 116 /*
116 117 * Work processed by per-device workqueue.
... ... @@ -1993,6 +1994,59 @@
1993 1994 }
1994 1995  
1995 1996 /*
  1997 + * Return 1 if the queue has a compulsory merge_bvec_fn function.
  1998 + *
  1999 + * If this function returns 0, then the device is either a non-dm
  2000 + * device without a merge_bvec_fn, or it is a dm device that is
  2001 + * able to split any bios it receives that are too big.
  2002 + */
  2003 +int dm_queue_merge_is_compulsory(struct request_queue *q)
  2004 +{
  2005 + struct mapped_device *dev_md;
  2006 +
  2007 + if (!q->merge_bvec_fn)
  2008 + return 0;
  2009 +
  2010 + if (q->make_request_fn == dm_request) {
  2011 + dev_md = q->queuedata;
  2012 + if (test_bit(DMF_MERGE_IS_OPTIONAL, &dev_md->flags))
  2013 + return 0;
  2014 + }
  2015 +
  2016 + return 1;
  2017 +}
  2018 +
  2019 +static int dm_device_merge_is_compulsory(struct dm_target *ti,
  2020 + struct dm_dev *dev, sector_t start,
  2021 + sector_t len, void *data)
  2022 +{
  2023 + struct block_device *bdev = dev->bdev;
  2024 + struct request_queue *q = bdev_get_queue(bdev);
  2025 +
  2026 + return dm_queue_merge_is_compulsory(q);
  2027 +}
  2028 +
  2029 +/*
  2030 + * Return 1 if it is acceptable to ignore merge_bvec_fn based
  2031 + * on the properties of the underlying devices.
  2032 + */
  2033 +static int dm_table_merge_is_optional(struct dm_table *table)
  2034 +{
  2035 + unsigned i = 0;
  2036 + struct dm_target *ti;
  2037 +
  2038 + while (i < dm_table_get_num_targets(table)) {
  2039 + ti = dm_table_get_target(table, i++);
  2040 +
  2041 + if (ti->type->iterate_devices &&
  2042 + ti->type->iterate_devices(ti, dm_device_merge_is_compulsory, NULL))
  2043 + return 0;
  2044 + }
  2045 +
  2046 + return 1;
  2047 +}
  2048 +
  2049 +/*
1996 2050 * Returns old map, which caller must destroy.
1997 2051 */
1998 2052 static struct dm_table *__bind(struct mapped_device *md, struct dm_table *t,
... ... @@ -2002,6 +2056,7 @@
2002 2056 struct request_queue *q = md->queue;
2003 2057 sector_t size;
2004 2058 unsigned long flags;
  2059 + int merge_is_optional;
2005 2060  
2006 2061 size = dm_table_get_size(t);
2007 2062  
2008 2063  
... ... @@ -2027,10 +2082,16 @@
2027 2082  
2028 2083 __bind_mempools(md, t);
2029 2084  
  2085 + merge_is_optional = dm_table_merge_is_optional(t);
  2086 +
2030 2087 write_lock_irqsave(&md->map_lock, flags);
2031 2088 old_map = md->map;
2032 2089 md->map = t;
2033 2090 dm_table_set_restrictions(t, q, limits);
  2091 + if (merge_is_optional)
  2092 + set_bit(DMF_MERGE_IS_OPTIONAL, &md->flags);
  2093 + else
  2094 + clear_bit(DMF_MERGE_IS_OPTIONAL, &md->flags);
2034 2095 write_unlock_irqrestore(&md->map_lock, flags);
2035 2096  
2036 2097 return old_map;
... ... @@ -66,6 +66,8 @@
66 66 void dm_table_free_md_mempools(struct dm_table *t);
67 67 struct dm_md_mempools *dm_table_get_md_mempools(struct dm_table *t);
68 68  
  69 +int dm_queue_merge_is_compulsory(struct request_queue *q);
  70 +
69 71 void dm_lock_md_type(struct mapped_device *md);
70 72 void dm_unlock_md_type(struct mapped_device *md);
71 73 void dm_set_md_type(struct mapped_device *md, unsigned type);