Commit 2dd9c257fbc243aa76ee6db0bb8371f9f74fad2d

Authored by Joe Thornber
Committed by Alasdair G Kergon
1 parent c4a69ecdb4

dm thin: support read only external snapshot origins

Support the use of an external _read only_ device as an origin for a thin
device.

Any read to an unprovisioned area of the thin device will be passed
through to the origin.  Writes trigger allocation of new blocks as
usual.

One possible use case for this would be VM hosts that want to run
guests on thinly-provisioned volumes but have the base image on another
device (possibly shared between many VMs).

Signed-off-by: Joe Thornber <ejt@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>

Showing 2 changed files with 109 additions and 15 deletions Side-by-side Diff

Documentation/device-mapper/thin-provisioning.txt
... ... @@ -169,6 +169,38 @@
169 169  
170 170 dmsetup create snap --table "0 2097152 thin /dev/mapper/pool 1"
171 171  
  172 +External snapshots
  173 +------------------
  174 +
  175 +You can use an external _read only_ device as an origin for a
  176 +thinly-provisioned volume. Any read to an unprovisioned area of the
  177 +thin device will be passed through to the origin. Writes trigger
  178 +the allocation of new blocks as usual.
  179 +
  180 +One use case for this is VM hosts that want to run guests on
  181 +thinly-provisioned volumes but have the base image on another device
  182 +(possibly shared between many VMs).
  183 +
  184 +You must not write to the origin device if you use this technique!
  185 +Of course, you may write to the thin device and take internal snapshots
  186 +of the thin volume.
  187 +
  188 +i) Creating a snapshot of an external device
  189 +
  190 + This is the same as creating a thin device.
  191 + You don't mention the origin at this stage.
  192 +
  193 + dmsetup message /dev/mapper/pool 0 "create_thin 0"
  194 +
  195 +ii) Using a snapshot of an external device.
  196 +
  197 + Append an extra parameter to the thin target specifying the origin:
  198 +
  199 + dmsetup create snap --table "0 2097152 thin /dev/mapper/pool 0 /dev/image"
  200 +
  201 + N.B. All descendants (internal snapshots) of this snapshot require the
  202 + same extra origin parameter.
  203 +
172 204 Deactivation
173 205 ------------
174 206  
... ... @@ -254,7 +286,7 @@
254 286  
255 287 i) Constructor
256 288  
257   - thin <pool dev> <dev id>
  289 + thin <pool dev> <dev id> [<external origin dev>]
258 290  
259 291 pool dev:
260 292 the thin-pool device, e.g. /dev/mapper/my_pool or 253:0
... ... @@ -262,6 +294,11 @@
262 294 dev id:
263 295 the internal device identifier of the device to be
264 296 activated.
  297 +
  298 + external origin dev:
  299 + an optional block device outside the pool to be treated as a
  300 + read-only snapshot origin: reads to unprovisioned areas of the
  301 + thin target will be mapped to this device.
265 302  
266 303 The pool doesn't store any size against the thin devices. If you
267 304 load a thin target that is smaller than you've been using previously,
drivers/md/dm-thin.c
... ... @@ -549,6 +549,7 @@
549 549 */
550 550 struct thin_c {
551 551 struct dm_dev *pool_dev;
  552 + struct dm_dev *origin_dev;
552 553 dm_thin_id dev_id;
553 554  
554 555 struct pool *pool;
555 556  
556 557  
... ... @@ -666,14 +667,16 @@
666 667 (bio->bi_sector & pool->offset_mask);
667 668 }
668 669  
669   -static void remap_and_issue(struct thin_c *tc, struct bio *bio,
670   - dm_block_t block)
  670 +static void remap_to_origin(struct thin_c *tc, struct bio *bio)
671 671 {
  672 + bio->bi_bdev = tc->origin_dev->bdev;
  673 +}
  674 +
  675 +static void issue(struct thin_c *tc, struct bio *bio)
  676 +{
672 677 struct pool *pool = tc->pool;
673 678 unsigned long flags;
674 679  
675   - remap(tc, bio, block);
676   -
677 680 /*
678 681 * Batch together any FUA/FLUSH bios we find and then issue
679 682 * a single commit for them in process_deferred_bios().
... ... @@ -686,6 +689,19 @@
686 689 generic_make_request(bio);
687 690 }
688 691  
  692 +static void remap_to_origin_and_issue(struct thin_c *tc, struct bio *bio)
  693 +{
  694 + remap_to_origin(tc, bio);
  695 + issue(tc, bio);
  696 +}
  697 +
  698 +static void remap_and_issue(struct thin_c *tc, struct bio *bio,
  699 + dm_block_t block)
  700 +{
  701 + remap(tc, bio, block);
  702 + issue(tc, bio);
  703 +}
  704 +
689 705 /*
690 706 * wake_worker() is used when new work is queued and when pool_resume is
691 707 * ready to continue deferred IO processing.
... ... @@ -932,7 +948,8 @@
932 948 }
933 949  
934 950 static void schedule_copy(struct thin_c *tc, dm_block_t virt_block,
935   - dm_block_t data_origin, dm_block_t data_dest,
  951 + struct dm_dev *origin, dm_block_t data_origin,
  952 + dm_block_t data_dest,
936 953 struct cell *cell, struct bio *bio)
937 954 {
938 955 int r;
... ... @@ -964,7 +981,7 @@
964 981 } else {
965 982 struct dm_io_region from, to;
966 983  
967   - from.bdev = tc->pool_dev->bdev;
  984 + from.bdev = origin->bdev;
968 985 from.sector = data_origin * pool->sectors_per_block;
969 986 from.count = pool->sectors_per_block;
970 987  
... ... @@ -982,6 +999,22 @@
982 999 }
983 1000 }
984 1001  
  1002 +static void schedule_internal_copy(struct thin_c *tc, dm_block_t virt_block,
  1003 + dm_block_t data_origin, dm_block_t data_dest,
  1004 + struct cell *cell, struct bio *bio)
  1005 +{
  1006 + schedule_copy(tc, virt_block, tc->pool_dev,
  1007 + data_origin, data_dest, cell, bio);
  1008 +}
  1009 +
  1010 +static void schedule_external_copy(struct thin_c *tc, dm_block_t virt_block,
  1011 + dm_block_t data_dest,
  1012 + struct cell *cell, struct bio *bio)
  1013 +{
  1014 + schedule_copy(tc, virt_block, tc->origin_dev,
  1015 + virt_block, data_dest, cell, bio);
  1016 +}
  1017 +
985 1018 static void schedule_zero(struct thin_c *tc, dm_block_t virt_block,
986 1019 dm_block_t data_block, struct cell *cell,
987 1020 struct bio *bio)
... ... @@ -1128,8 +1161,8 @@
1128 1161 r = alloc_data_block(tc, &data_block);
1129 1162 switch (r) {
1130 1163 case 0:
1131   - schedule_copy(tc, block, lookup_result->block,
1132   - data_block, cell, bio);
  1164 + schedule_internal_copy(tc, block, lookup_result->block,
  1165 + data_block, cell, bio);
1133 1166 break;
1134 1167  
1135 1168 case -ENOSPC:
... ... @@ -1203,7 +1236,10 @@
1203 1236 r = alloc_data_block(tc, &data_block);
1204 1237 switch (r) {
1205 1238 case 0:
1206   - schedule_zero(tc, block, data_block, cell, bio);
  1239 + if (tc->origin_dev)
  1240 + schedule_external_copy(tc, block, data_block, cell, bio);
  1241 + else
  1242 + schedule_zero(tc, block, data_block, cell, bio);
1207 1243 break;
1208 1244  
1209 1245 case -ENOSPC:
... ... @@ -1254,7 +1290,11 @@
1254 1290 break;
1255 1291  
1256 1292 case -ENODATA:
1257   - provision_block(tc, bio, block, cell);
  1293 + if (bio_data_dir(bio) == READ && tc->origin_dev) {
  1294 + cell_release_singleton(cell, bio);
  1295 + remap_to_origin_and_issue(tc, bio);
  1296 + } else
  1297 + provision_block(tc, bio, block, cell);
1258 1298 break;
1259 1299  
1260 1300 default:
... ... @@ -2237,6 +2277,8 @@
2237 2277 __pool_dec(tc->pool);
2238 2278 dm_pool_close_thin_device(tc->td);
2239 2279 dm_put_device(ti, tc->pool_dev);
  2280 + if (tc->origin_dev)
  2281 + dm_put_device(ti, tc->origin_dev);
2240 2282 kfree(tc);
2241 2283  
2242 2284 mutex_unlock(&dm_thin_pool_table.mutex);
2243 2285  
2244 2286  
2245 2287  
... ... @@ -2245,21 +2287,22 @@
2245 2287 /*
2246 2288 * Thin target parameters:
2247 2289 *
2248   - * <pool_dev> <dev_id>
  2290 + * <pool_dev> <dev_id> [origin_dev]
2249 2291 *
2250 2292 * pool_dev: the path to the pool (eg, /dev/mapper/my_pool)
2251 2293 * dev_id: the internal device identifier
  2294 + * origin_dev: a device external to the pool that should act as the origin
2252 2295 */
2253 2296 static int thin_ctr(struct dm_target *ti, unsigned argc, char **argv)
2254 2297 {
2255 2298 int r;
2256 2299 struct thin_c *tc;
2257   - struct dm_dev *pool_dev;
  2300 + struct dm_dev *pool_dev, *origin_dev;
2258 2301 struct mapped_device *pool_md;
2259 2302  
2260 2303 mutex_lock(&dm_thin_pool_table.mutex);
2261 2304  
2262   - if (argc != 2) {
  2305 + if (argc != 2 && argc != 3) {
2263 2306 ti->error = "Invalid argument count";
2264 2307 r = -EINVAL;
2265 2308 goto out_unlock;
... ... @@ -2272,6 +2315,15 @@
2272 2315 goto out_unlock;
2273 2316 }
2274 2317  
  2318 + if (argc == 3) {
  2319 + r = dm_get_device(ti, argv[2], FMODE_READ, &origin_dev);
  2320 + if (r) {
  2321 + ti->error = "Error opening origin device";
  2322 + goto bad_origin_dev;
  2323 + }
  2324 + tc->origin_dev = origin_dev;
  2325 + }
  2326 +
2275 2327 r = dm_get_device(ti, argv[0], dm_table_get_mode(ti->table), &pool_dev);
2276 2328 if (r) {
2277 2329 ti->error = "Error opening pool device";
... ... @@ -2324,6 +2376,9 @@
2324 2376 bad_common:
2325 2377 dm_put_device(ti, tc->pool_dev);
2326 2378 bad_pool_dev:
  2379 + if (tc->origin_dev)
  2380 + dm_put_device(ti, tc->origin_dev);
  2381 +bad_origin_dev:
2327 2382 kfree(tc);
2328 2383 out_unlock:
2329 2384 mutex_unlock(&dm_thin_pool_table.mutex);
... ... @@ -2382,6 +2437,8 @@
2382 2437 DMEMIT("%s %lu",
2383 2438 format_dev_t(buf, tc->pool_dev->bdev->bd_dev),
2384 2439 (unsigned long) tc->dev_id);
  2440 + if (tc->origin_dev)
  2441 + DMEMIT(" %s", format_dev_t(buf, tc->origin_dev->bdev->bd_dev));
2385 2442 break;
2386 2443 }
2387 2444 }
... ... @@ -2419,7 +2476,7 @@
2419 2476  
2420 2477 static struct target_type thin_target = {
2421 2478 .name = "thin",
2422   - .version = {1, 0, 0},
  2479 + .version = {1, 1, 0},
2423 2480 .module = THIS_MODULE,
2424 2481 .ctr = thin_ctr,
2425 2482 .dtr = thin_dtr,