Commit 2dd9c257fbc243aa76ee6db0bb8371f9f74fad2d
Committed by
Alasdair G Kergon
1 parent
c4a69ecdb4
Exists in
master
and in
20 other branches
dm thin: support read only external snapshot origins
Support the use of an external _read only_ device as an origin for a thin device. Any read to an unprovisioned area of the thin device will be passed through to the origin. Writes trigger allocation of new blocks as usual. One possible use case for this would be VM hosts that want to run guests on thinly-provisioned volumes but have the base image on another device (possibly shared between many VMs). Signed-off-by: Joe Thornber <ejt@redhat.com> Signed-off-by: Mike Snitzer <snitzer@redhat.com> Signed-off-by: Alasdair G Kergon <agk@redhat.com>
Showing 2 changed files with 109 additions and 15 deletions Side-by-side Diff
Documentation/device-mapper/thin-provisioning.txt
... | ... | @@ -169,6 +169,38 @@ |
169 | 169 | |
170 | 170 | dmsetup create snap --table "0 2097152 thin /dev/mapper/pool 1" |
171 | 171 | |
172 | +External snapshots | |
173 | +------------------ | |
174 | + | |
175 | +You can use an external _read only_ device as an origin for a | |
176 | +thinly-provisioned volume. Any read to an unprovisioned area of the | |
177 | +thin device will be passed through to the origin. Writes trigger | |
178 | +the allocation of new blocks as usual. | |
179 | + | |
180 | +One use case for this is VM hosts that want to run guests on | |
181 | +thinly-provisioned volumes but have the base image on another device | |
182 | +(possibly shared between many VMs). | |
183 | + | |
184 | +You must not write to the origin device if you use this technique! | |
185 | +Of course, you may write to the thin device and take internal snapshots | |
186 | +of the thin volume. | |
187 | + | |
188 | +i) Creating a snapshot of an external device | |
189 | + | |
190 | + This is the same as creating a thin device. | |
191 | + You don't mention the origin at this stage. | |
192 | + | |
193 | + dmsetup message /dev/mapper/pool 0 "create_thin 0" | |
194 | + | |
195 | +ii) Using a snapshot of an external device. | |
196 | + | |
197 | + Append an extra parameter to the thin target specifying the origin: | |
198 | + | |
199 | + dmsetup create snap --table "0 2097152 thin /dev/mapper/pool 0 /dev/image" | |
200 | + | |
201 | + N.B. All descendants (internal snapshots) of this snapshot require the | |
202 | + same extra origin parameter. | |
203 | + | |
172 | 204 | Deactivation |
173 | 205 | ------------ |
174 | 206 | |
... | ... | @@ -254,7 +286,7 @@ |
254 | 286 | |
255 | 287 | i) Constructor |
256 | 288 | |
257 | - thin <pool dev> <dev id> | |
289 | + thin <pool dev> <dev id> [<external origin dev>] | |
258 | 290 | |
259 | 291 | pool dev: |
260 | 292 | the thin-pool device, e.g. /dev/mapper/my_pool or 253:0 |
... | ... | @@ -262,6 +294,11 @@ |
262 | 294 | dev id: |
263 | 295 | the internal device identifier of the device to be |
264 | 296 | activated. |
297 | + | |
298 | + external origin dev: | |
299 | + an optional block device outside the pool to be treated as a | |
300 | + read-only snapshot origin: reads to unprovisioned areas of the | |
301 | + thin target will be mapped to this device. | |
265 | 302 | |
266 | 303 | The pool doesn't store any size against the thin devices. If you |
267 | 304 | load a thin target that is smaller than you've been using previously, |
drivers/md/dm-thin.c
... | ... | @@ -549,6 +549,7 @@ |
549 | 549 | */ |
550 | 550 | struct thin_c { |
551 | 551 | struct dm_dev *pool_dev; |
552 | + struct dm_dev *origin_dev; | |
552 | 553 | dm_thin_id dev_id; |
553 | 554 | |
554 | 555 | struct pool *pool; |
555 | 556 | |
556 | 557 | |
... | ... | @@ -666,14 +667,16 @@ |
666 | 667 | (bio->bi_sector & pool->offset_mask); |
667 | 668 | } |
668 | 669 | |
669 | -static void remap_and_issue(struct thin_c *tc, struct bio *bio, | |
670 | - dm_block_t block) | |
670 | +static void remap_to_origin(struct thin_c *tc, struct bio *bio) | |
671 | 671 | { |
672 | + bio->bi_bdev = tc->origin_dev->bdev; | |
673 | +} | |
674 | + | |
675 | +static void issue(struct thin_c *tc, struct bio *bio) | |
676 | +{ | |
672 | 677 | struct pool *pool = tc->pool; |
673 | 678 | unsigned long flags; |
674 | 679 | |
675 | - remap(tc, bio, block); | |
676 | - | |
677 | 680 | /* |
678 | 681 | * Batch together any FUA/FLUSH bios we find and then issue |
679 | 682 | * a single commit for them in process_deferred_bios(). |
... | ... | @@ -686,6 +689,19 @@ |
686 | 689 | generic_make_request(bio); |
687 | 690 | } |
688 | 691 | |
692 | +static void remap_to_origin_and_issue(struct thin_c *tc, struct bio *bio) | |
693 | +{ | |
694 | + remap_to_origin(tc, bio); | |
695 | + issue(tc, bio); | |
696 | +} | |
697 | + | |
698 | +static void remap_and_issue(struct thin_c *tc, struct bio *bio, | |
699 | + dm_block_t block) | |
700 | +{ | |
701 | + remap(tc, bio, block); | |
702 | + issue(tc, bio); | |
703 | +} | |
704 | + | |
689 | 705 | /* |
690 | 706 | * wake_worker() is used when new work is queued and when pool_resume is |
691 | 707 | * ready to continue deferred IO processing. |
... | ... | @@ -932,7 +948,8 @@ |
932 | 948 | } |
933 | 949 | |
934 | 950 | static void schedule_copy(struct thin_c *tc, dm_block_t virt_block, |
935 | - dm_block_t data_origin, dm_block_t data_dest, | |
951 | + struct dm_dev *origin, dm_block_t data_origin, | |
952 | + dm_block_t data_dest, | |
936 | 953 | struct cell *cell, struct bio *bio) |
937 | 954 | { |
938 | 955 | int r; |
... | ... | @@ -964,7 +981,7 @@ |
964 | 981 | } else { |
965 | 982 | struct dm_io_region from, to; |
966 | 983 | |
967 | - from.bdev = tc->pool_dev->bdev; | |
984 | + from.bdev = origin->bdev; | |
968 | 985 | from.sector = data_origin * pool->sectors_per_block; |
969 | 986 | from.count = pool->sectors_per_block; |
970 | 987 | |
... | ... | @@ -982,6 +999,22 @@ |
982 | 999 | } |
983 | 1000 | } |
984 | 1001 | |
1002 | +static void schedule_internal_copy(struct thin_c *tc, dm_block_t virt_block, | |
1003 | + dm_block_t data_origin, dm_block_t data_dest, | |
1004 | + struct cell *cell, struct bio *bio) | |
1005 | +{ | |
1006 | + schedule_copy(tc, virt_block, tc->pool_dev, | |
1007 | + data_origin, data_dest, cell, bio); | |
1008 | +} | |
1009 | + | |
1010 | +static void schedule_external_copy(struct thin_c *tc, dm_block_t virt_block, | |
1011 | + dm_block_t data_dest, | |
1012 | + struct cell *cell, struct bio *bio) | |
1013 | +{ | |
1014 | + schedule_copy(tc, virt_block, tc->origin_dev, | |
1015 | + virt_block, data_dest, cell, bio); | |
1016 | +} | |
1017 | + | |
985 | 1018 | static void schedule_zero(struct thin_c *tc, dm_block_t virt_block, |
986 | 1019 | dm_block_t data_block, struct cell *cell, |
987 | 1020 | struct bio *bio) |
... | ... | @@ -1128,8 +1161,8 @@ |
1128 | 1161 | r = alloc_data_block(tc, &data_block); |
1129 | 1162 | switch (r) { |
1130 | 1163 | case 0: |
1131 | - schedule_copy(tc, block, lookup_result->block, | |
1132 | - data_block, cell, bio); | |
1164 | + schedule_internal_copy(tc, block, lookup_result->block, | |
1165 | + data_block, cell, bio); | |
1133 | 1166 | break; |
1134 | 1167 | |
1135 | 1168 | case -ENOSPC: |
... | ... | @@ -1203,7 +1236,10 @@ |
1203 | 1236 | r = alloc_data_block(tc, &data_block); |
1204 | 1237 | switch (r) { |
1205 | 1238 | case 0: |
1206 | - schedule_zero(tc, block, data_block, cell, bio); | |
1239 | + if (tc->origin_dev) | |
1240 | + schedule_external_copy(tc, block, data_block, cell, bio); | |
1241 | + else | |
1242 | + schedule_zero(tc, block, data_block, cell, bio); | |
1207 | 1243 | break; |
1208 | 1244 | |
1209 | 1245 | case -ENOSPC: |
... | ... | @@ -1254,7 +1290,11 @@ |
1254 | 1290 | break; |
1255 | 1291 | |
1256 | 1292 | case -ENODATA: |
1257 | - provision_block(tc, bio, block, cell); | |
1293 | + if (bio_data_dir(bio) == READ && tc->origin_dev) { | |
1294 | + cell_release_singleton(cell, bio); | |
1295 | + remap_to_origin_and_issue(tc, bio); | |
1296 | + } else | |
1297 | + provision_block(tc, bio, block, cell); | |
1258 | 1298 | break; |
1259 | 1299 | |
1260 | 1300 | default: |
... | ... | @@ -2237,6 +2277,8 @@ |
2237 | 2277 | __pool_dec(tc->pool); |
2238 | 2278 | dm_pool_close_thin_device(tc->td); |
2239 | 2279 | dm_put_device(ti, tc->pool_dev); |
2280 | + if (tc->origin_dev) | |
2281 | + dm_put_device(ti, tc->origin_dev); | |
2240 | 2282 | kfree(tc); |
2241 | 2283 | |
2242 | 2284 | mutex_unlock(&dm_thin_pool_table.mutex); |
2243 | 2285 | |
2244 | 2286 | |
2245 | 2287 | |
... | ... | @@ -2245,21 +2287,22 @@ |
2245 | 2287 | /* |
2246 | 2288 | * Thin target parameters: |
2247 | 2289 | * |
2248 | - * <pool_dev> <dev_id> | |
2290 | + * <pool_dev> <dev_id> [origin_dev] | |
2249 | 2291 | * |
2250 | 2292 | * pool_dev: the path to the pool (eg, /dev/mapper/my_pool) |
2251 | 2293 | * dev_id: the internal device identifier |
2294 | + * origin_dev: a device external to the pool that should act as the origin | |
2252 | 2295 | */ |
2253 | 2296 | static int thin_ctr(struct dm_target *ti, unsigned argc, char **argv) |
2254 | 2297 | { |
2255 | 2298 | int r; |
2256 | 2299 | struct thin_c *tc; |
2257 | - struct dm_dev *pool_dev; | |
2300 | + struct dm_dev *pool_dev, *origin_dev; | |
2258 | 2301 | struct mapped_device *pool_md; |
2259 | 2302 | |
2260 | 2303 | mutex_lock(&dm_thin_pool_table.mutex); |
2261 | 2304 | |
2262 | - if (argc != 2) { | |
2305 | + if (argc != 2 && argc != 3) { | |
2263 | 2306 | ti->error = "Invalid argument count"; |
2264 | 2307 | r = -EINVAL; |
2265 | 2308 | goto out_unlock; |
... | ... | @@ -2272,6 +2315,15 @@ |
2272 | 2315 | goto out_unlock; |
2273 | 2316 | } |
2274 | 2317 | |
2318 | + if (argc == 3) { | |
2319 | + r = dm_get_device(ti, argv[2], FMODE_READ, &origin_dev); | |
2320 | + if (r) { | |
2321 | + ti->error = "Error opening origin device"; | |
2322 | + goto bad_origin_dev; | |
2323 | + } | |
2324 | + tc->origin_dev = origin_dev; | |
2325 | + } | |
2326 | + | |
2275 | 2327 | r = dm_get_device(ti, argv[0], dm_table_get_mode(ti->table), &pool_dev); |
2276 | 2328 | if (r) { |
2277 | 2329 | ti->error = "Error opening pool device"; |
... | ... | @@ -2324,6 +2376,9 @@ |
2324 | 2376 | bad_common: |
2325 | 2377 | dm_put_device(ti, tc->pool_dev); |
2326 | 2378 | bad_pool_dev: |
2379 | + if (tc->origin_dev) | |
2380 | + dm_put_device(ti, tc->origin_dev); | |
2381 | +bad_origin_dev: | |
2327 | 2382 | kfree(tc); |
2328 | 2383 | out_unlock: |
2329 | 2384 | mutex_unlock(&dm_thin_pool_table.mutex); |
... | ... | @@ -2382,6 +2437,8 @@ |
2382 | 2437 | DMEMIT("%s %lu", |
2383 | 2438 | format_dev_t(buf, tc->pool_dev->bdev->bd_dev), |
2384 | 2439 | (unsigned long) tc->dev_id); |
2440 | + if (tc->origin_dev) | |
2441 | + DMEMIT(" %s", format_dev_t(buf, tc->origin_dev->bdev->bd_dev)); | |
2385 | 2442 | break; |
2386 | 2443 | } |
2387 | 2444 | } |
... | ... | @@ -2419,7 +2476,7 @@ |
2419 | 2476 | |
2420 | 2477 | static struct target_type thin_target = { |
2421 | 2478 | .name = "thin", |
2422 | - .version = {1, 0, 0}, | |
2479 | + .version = {1, 1, 0}, | |
2423 | 2480 | .module = THIS_MODULE, |
2424 | 2481 | .ctr = thin_ctr, |
2425 | 2482 | .dtr = thin_dtr, |