Commit bfc835b5716fd86b568d4f9b15be04c4f361082b
Exists in
ti-lsk-linux-4.1.y
and in
10 other branches
Merge tag 'dm-3.19-fixes-2' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm
Pull device mapper fixes from Mike Snitzer: "Two stable fixes for dm-cache and one 3.19 DM core fix: - fix potential for dm-cache metadata corruption via stale metadata buffers being used when switching an inactive cache table to active; this could occur due to each table having it's own bufio client rather than sharing the client between tables. - fix dm-cache target to properly account for discard IO while suspending otherwise IO quiescing could complete prematurely. - fix DM core's handling of multiple internal suspends by maintaining an 'internal_suspend_count' and only resuming the device when this count drops to zero" * tag 'dm-3.19-fixes-2' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm: dm: fix handling of multiple internal suspends dm cache: fix problematic dual use of a single migration count variable dm cache: share cache-metadata object across inactive and active DM tables
Showing 3 changed files Side-by-side Diff
drivers/md/dm-cache-metadata.c
... | ... | @@ -94,6 +94,9 @@ |
94 | 94 | } __packed; |
95 | 95 | |
96 | 96 | struct dm_cache_metadata { |
97 | + atomic_t ref_count; | |
98 | + struct list_head list; | |
99 | + | |
97 | 100 | struct block_device *bdev; |
98 | 101 | struct dm_block_manager *bm; |
99 | 102 | struct dm_space_map *metadata_sm; |
... | ... | @@ -669,10 +672,10 @@ |
669 | 672 | |
670 | 673 | /*----------------------------------------------------------------*/ |
671 | 674 | |
672 | -struct dm_cache_metadata *dm_cache_metadata_open(struct block_device *bdev, | |
673 | - sector_t data_block_size, | |
674 | - bool may_format_device, | |
675 | - size_t policy_hint_size) | |
675 | +static struct dm_cache_metadata *metadata_open(struct block_device *bdev, | |
676 | + sector_t data_block_size, | |
677 | + bool may_format_device, | |
678 | + size_t policy_hint_size) | |
676 | 679 | { |
677 | 680 | int r; |
678 | 681 | struct dm_cache_metadata *cmd; |
... | ... | @@ -683,6 +686,7 @@ |
683 | 686 | return NULL; |
684 | 687 | } |
685 | 688 | |
689 | + atomic_set(&cmd->ref_count, 1); | |
686 | 690 | init_rwsem(&cmd->root_lock); |
687 | 691 | cmd->bdev = bdev; |
688 | 692 | cmd->data_block_size = data_block_size; |
689 | 693 | |
... | ... | @@ -705,10 +709,95 @@ |
705 | 709 | return cmd; |
706 | 710 | } |
707 | 711 | |
712 | +/* | |
713 | + * We keep a little list of ref counted metadata objects to prevent two | |
714 | + * different target instances creating separate bufio instances. This is | |
715 | + * an issue if a table is reloaded before the suspend. | |
716 | + */ | |
717 | +static DEFINE_MUTEX(table_lock); | |
718 | +static LIST_HEAD(table); | |
719 | + | |
720 | +static struct dm_cache_metadata *lookup(struct block_device *bdev) | |
721 | +{ | |
722 | + struct dm_cache_metadata *cmd; | |
723 | + | |
724 | + list_for_each_entry(cmd, &table, list) | |
725 | + if (cmd->bdev == bdev) { | |
726 | + atomic_inc(&cmd->ref_count); | |
727 | + return cmd; | |
728 | + } | |
729 | + | |
730 | + return NULL; | |
731 | +} | |
732 | + | |
733 | +static struct dm_cache_metadata *lookup_or_open(struct block_device *bdev, | |
734 | + sector_t data_block_size, | |
735 | + bool may_format_device, | |
736 | + size_t policy_hint_size) | |
737 | +{ | |
738 | + struct dm_cache_metadata *cmd, *cmd2; | |
739 | + | |
740 | + mutex_lock(&table_lock); | |
741 | + cmd = lookup(bdev); | |
742 | + mutex_unlock(&table_lock); | |
743 | + | |
744 | + if (cmd) | |
745 | + return cmd; | |
746 | + | |
747 | + cmd = metadata_open(bdev, data_block_size, may_format_device, policy_hint_size); | |
748 | + if (cmd) { | |
749 | + mutex_lock(&table_lock); | |
750 | + cmd2 = lookup(bdev); | |
751 | + if (cmd2) { | |
752 | + mutex_unlock(&table_lock); | |
753 | + __destroy_persistent_data_objects(cmd); | |
754 | + kfree(cmd); | |
755 | + return cmd2; | |
756 | + } | |
757 | + list_add(&cmd->list, &table); | |
758 | + mutex_unlock(&table_lock); | |
759 | + } | |
760 | + | |
761 | + return cmd; | |
762 | +} | |
763 | + | |
764 | +static bool same_params(struct dm_cache_metadata *cmd, sector_t data_block_size) | |
765 | +{ | |
766 | + if (cmd->data_block_size != data_block_size) { | |
767 | + DMERR("data_block_size (%llu) different from that in metadata (%llu)\n", | |
768 | + (unsigned long long) data_block_size, | |
769 | + (unsigned long long) cmd->data_block_size); | |
770 | + return false; | |
771 | + } | |
772 | + | |
773 | + return true; | |
774 | +} | |
775 | + | |
776 | +struct dm_cache_metadata *dm_cache_metadata_open(struct block_device *bdev, | |
777 | + sector_t data_block_size, | |
778 | + bool may_format_device, | |
779 | + size_t policy_hint_size) | |
780 | +{ | |
781 | + struct dm_cache_metadata *cmd = lookup_or_open(bdev, data_block_size, | |
782 | + may_format_device, policy_hint_size); | |
783 | + if (cmd && !same_params(cmd, data_block_size)) { | |
784 | + dm_cache_metadata_close(cmd); | |
785 | + return NULL; | |
786 | + } | |
787 | + | |
788 | + return cmd; | |
789 | +} | |
790 | + | |
708 | 791 | void dm_cache_metadata_close(struct dm_cache_metadata *cmd) |
709 | 792 | { |
710 | - __destroy_persistent_data_objects(cmd); | |
711 | - kfree(cmd); | |
793 | + if (atomic_dec_and_test(&cmd->ref_count)) { | |
794 | + mutex_lock(&table_lock); | |
795 | + list_del(&cmd->list); | |
796 | + mutex_unlock(&table_lock); | |
797 | + | |
798 | + __destroy_persistent_data_objects(cmd); | |
799 | + kfree(cmd); | |
800 | + } | |
712 | 801 | } |
713 | 802 | |
714 | 803 | /* |
drivers/md/dm-cache-target.c
... | ... | @@ -221,8 +221,14 @@ |
221 | 221 | struct list_head need_commit_migrations; |
222 | 222 | sector_t migration_threshold; |
223 | 223 | wait_queue_head_t migration_wait; |
224 | - atomic_t nr_migrations; | |
224 | + atomic_t nr_allocated_migrations; | |
225 | 225 | |
226 | + /* | |
227 | + * The number of in flight migrations that are performing | |
228 | + * background io. eg, promotion, writeback. | |
229 | + */ | |
230 | + atomic_t nr_io_migrations; | |
231 | + | |
226 | 232 | wait_queue_head_t quiescing_wait; |
227 | 233 | atomic_t quiescing; |
228 | 234 | atomic_t quiescing_ack; |
... | ... | @@ -258,7 +264,6 @@ |
258 | 264 | struct dm_deferred_set *all_io_ds; |
259 | 265 | |
260 | 266 | mempool_t *migration_pool; |
261 | - struct dm_cache_migration *next_migration; | |
262 | 267 | |
263 | 268 | struct dm_cache_policy *policy; |
264 | 269 | unsigned policy_nr_args; |
265 | 270 | |
... | ... | @@ -350,10 +355,31 @@ |
350 | 355 | dm_bio_prison_free_cell(cache->prison, cell); |
351 | 356 | } |
352 | 357 | |
358 | +static struct dm_cache_migration *alloc_migration(struct cache *cache) | |
359 | +{ | |
360 | + struct dm_cache_migration *mg; | |
361 | + | |
362 | + mg = mempool_alloc(cache->migration_pool, GFP_NOWAIT); | |
363 | + if (mg) { | |
364 | + mg->cache = cache; | |
365 | + atomic_inc(&mg->cache->nr_allocated_migrations); | |
366 | + } | |
367 | + | |
368 | + return mg; | |
369 | +} | |
370 | + | |
371 | +static void free_migration(struct dm_cache_migration *mg) | |
372 | +{ | |
373 | + if (atomic_dec_and_test(&mg->cache->nr_allocated_migrations)) | |
374 | + wake_up(&mg->cache->migration_wait); | |
375 | + | |
376 | + mempool_free(mg, mg->cache->migration_pool); | |
377 | +} | |
378 | + | |
353 | 379 | static int prealloc_data_structs(struct cache *cache, struct prealloc *p) |
354 | 380 | { |
355 | 381 | if (!p->mg) { |
356 | - p->mg = mempool_alloc(cache->migration_pool, GFP_NOWAIT); | |
382 | + p->mg = alloc_migration(cache); | |
357 | 383 | if (!p->mg) |
358 | 384 | return -ENOMEM; |
359 | 385 | } |
... | ... | @@ -382,7 +408,7 @@ |
382 | 408 | free_prison_cell(cache, p->cell1); |
383 | 409 | |
384 | 410 | if (p->mg) |
385 | - mempool_free(p->mg, cache->migration_pool); | |
411 | + free_migration(p->mg); | |
386 | 412 | } |
387 | 413 | |
388 | 414 | static struct dm_cache_migration *prealloc_get_migration(struct prealloc *p) |
389 | 415 | |
390 | 416 | |
391 | 417 | |
392 | 418 | |
... | ... | @@ -854,26 +880,16 @@ |
854 | 880 | * Migration covers moving data from the origin device to the cache, or |
855 | 881 | * vice versa. |
856 | 882 | *--------------------------------------------------------------*/ |
857 | -static void free_migration(struct dm_cache_migration *mg) | |
883 | +static void inc_io_migrations(struct cache *cache) | |
858 | 884 | { |
859 | - mempool_free(mg, mg->cache->migration_pool); | |
885 | + atomic_inc(&cache->nr_io_migrations); | |
860 | 886 | } |
861 | 887 | |
862 | -static void inc_nr_migrations(struct cache *cache) | |
888 | +static void dec_io_migrations(struct cache *cache) | |
863 | 889 | { |
864 | - atomic_inc(&cache->nr_migrations); | |
890 | + atomic_dec(&cache->nr_io_migrations); | |
865 | 891 | } |
866 | 892 | |
867 | -static void dec_nr_migrations(struct cache *cache) | |
868 | -{ | |
869 | - atomic_dec(&cache->nr_migrations); | |
870 | - | |
871 | - /* | |
872 | - * Wake the worker in case we're suspending the target. | |
873 | - */ | |
874 | - wake_up(&cache->migration_wait); | |
875 | -} | |
876 | - | |
877 | 893 | static void __cell_defer(struct cache *cache, struct dm_bio_prison_cell *cell, |
878 | 894 | bool holder) |
879 | 895 | { |
880 | 896 | |
881 | 897 | |
... | ... | @@ -894,11 +910,10 @@ |
894 | 910 | wake_worker(cache); |
895 | 911 | } |
896 | 912 | |
897 | -static void cleanup_migration(struct dm_cache_migration *mg) | |
913 | +static void free_io_migration(struct dm_cache_migration *mg) | |
898 | 914 | { |
899 | - struct cache *cache = mg->cache; | |
915 | + dec_io_migrations(mg->cache); | |
900 | 916 | free_migration(mg); |
901 | - dec_nr_migrations(cache); | |
902 | 917 | } |
903 | 918 | |
904 | 919 | static void migration_failure(struct dm_cache_migration *mg) |
... | ... | @@ -923,7 +938,7 @@ |
923 | 938 | cell_defer(cache, mg->new_ocell, true); |
924 | 939 | } |
925 | 940 | |
926 | - cleanup_migration(mg); | |
941 | + free_io_migration(mg); | |
927 | 942 | } |
928 | 943 | |
929 | 944 | static void migration_success_pre_commit(struct dm_cache_migration *mg) |
... | ... | @@ -934,7 +949,7 @@ |
934 | 949 | if (mg->writeback) { |
935 | 950 | clear_dirty(cache, mg->old_oblock, mg->cblock); |
936 | 951 | cell_defer(cache, mg->old_ocell, false); |
937 | - cleanup_migration(mg); | |
952 | + free_io_migration(mg); | |
938 | 953 | return; |
939 | 954 | |
940 | 955 | } else if (mg->demote) { |
941 | 956 | |
... | ... | @@ -944,14 +959,14 @@ |
944 | 959 | mg->old_oblock); |
945 | 960 | if (mg->promote) |
946 | 961 | cell_defer(cache, mg->new_ocell, true); |
947 | - cleanup_migration(mg); | |
962 | + free_io_migration(mg); | |
948 | 963 | return; |
949 | 964 | } |
950 | 965 | } else { |
951 | 966 | if (dm_cache_insert_mapping(cache->cmd, mg->cblock, mg->new_oblock)) { |
952 | 967 | DMWARN_LIMIT("promotion failed; couldn't update on disk metadata"); |
953 | 968 | policy_remove_mapping(cache->policy, mg->new_oblock); |
954 | - cleanup_migration(mg); | |
969 | + free_io_migration(mg); | |
955 | 970 | return; |
956 | 971 | } |
957 | 972 | } |
... | ... | @@ -984,7 +999,7 @@ |
984 | 999 | } else { |
985 | 1000 | if (mg->invalidate) |
986 | 1001 | policy_remove_mapping(cache->policy, mg->old_oblock); |
987 | - cleanup_migration(mg); | |
1002 | + free_io_migration(mg); | |
988 | 1003 | } |
989 | 1004 | |
990 | 1005 | } else { |
... | ... | @@ -999,7 +1014,7 @@ |
999 | 1014 | bio_endio(mg->new_ocell->holder, 0); |
1000 | 1015 | cell_defer(cache, mg->new_ocell, false); |
1001 | 1016 | } |
1002 | - cleanup_migration(mg); | |
1017 | + free_io_migration(mg); | |
1003 | 1018 | } |
1004 | 1019 | } |
1005 | 1020 | |
... | ... | @@ -1251,7 +1266,7 @@ |
1251 | 1266 | mg->new_ocell = cell; |
1252 | 1267 | mg->start_jiffies = jiffies; |
1253 | 1268 | |
1254 | - inc_nr_migrations(cache); | |
1269 | + inc_io_migrations(cache); | |
1255 | 1270 | quiesce_migration(mg); |
1256 | 1271 | } |
1257 | 1272 | |
... | ... | @@ -1275,7 +1290,7 @@ |
1275 | 1290 | mg->new_ocell = NULL; |
1276 | 1291 | mg->start_jiffies = jiffies; |
1277 | 1292 | |
1278 | - inc_nr_migrations(cache); | |
1293 | + inc_io_migrations(cache); | |
1279 | 1294 | quiesce_migration(mg); |
1280 | 1295 | } |
1281 | 1296 | |
... | ... | @@ -1302,7 +1317,7 @@ |
1302 | 1317 | mg->new_ocell = new_ocell; |
1303 | 1318 | mg->start_jiffies = jiffies; |
1304 | 1319 | |
1305 | - inc_nr_migrations(cache); | |
1320 | + inc_io_migrations(cache); | |
1306 | 1321 | quiesce_migration(mg); |
1307 | 1322 | } |
1308 | 1323 | |
... | ... | @@ -1330,7 +1345,7 @@ |
1330 | 1345 | mg->new_ocell = NULL; |
1331 | 1346 | mg->start_jiffies = jiffies; |
1332 | 1347 | |
1333 | - inc_nr_migrations(cache); | |
1348 | + inc_io_migrations(cache); | |
1334 | 1349 | quiesce_migration(mg); |
1335 | 1350 | } |
1336 | 1351 | |
... | ... | @@ -1412,7 +1427,7 @@ |
1412 | 1427 | |
1413 | 1428 | static bool spare_migration_bandwidth(struct cache *cache) |
1414 | 1429 | { |
1415 | - sector_t current_volume = (atomic_read(&cache->nr_migrations) + 1) * | |
1430 | + sector_t current_volume = (atomic_read(&cache->nr_io_migrations) + 1) * | |
1416 | 1431 | cache->sectors_per_block; |
1417 | 1432 | return current_volume < cache->migration_threshold; |
1418 | 1433 | } |
... | ... | @@ -1764,7 +1779,7 @@ |
1764 | 1779 | |
1765 | 1780 | static void wait_for_migrations(struct cache *cache) |
1766 | 1781 | { |
1767 | - wait_event(cache->migration_wait, !atomic_read(&cache->nr_migrations)); | |
1782 | + wait_event(cache->migration_wait, !atomic_read(&cache->nr_allocated_migrations)); | |
1768 | 1783 | } |
1769 | 1784 | |
1770 | 1785 | static void stop_worker(struct cache *cache) |
... | ... | @@ -1876,9 +1891,6 @@ |
1876 | 1891 | { |
1877 | 1892 | unsigned i; |
1878 | 1893 | |
1879 | - if (cache->next_migration) | |
1880 | - mempool_free(cache->next_migration, cache->migration_pool); | |
1881 | - | |
1882 | 1894 | if (cache->migration_pool) |
1883 | 1895 | mempool_destroy(cache->migration_pool); |
1884 | 1896 | |
... | ... | @@ -2424,7 +2436,8 @@ |
2424 | 2436 | INIT_LIST_HEAD(&cache->quiesced_migrations); |
2425 | 2437 | INIT_LIST_HEAD(&cache->completed_migrations); |
2426 | 2438 | INIT_LIST_HEAD(&cache->need_commit_migrations); |
2427 | - atomic_set(&cache->nr_migrations, 0); | |
2439 | + atomic_set(&cache->nr_allocated_migrations, 0); | |
2440 | + atomic_set(&cache->nr_io_migrations, 0); | |
2428 | 2441 | init_waitqueue_head(&cache->migration_wait); |
2429 | 2442 | |
2430 | 2443 | init_waitqueue_head(&cache->quiescing_wait); |
... | ... | @@ -2486,8 +2499,6 @@ |
2486 | 2499 | *error = "Error creating cache's migration mempool"; |
2487 | 2500 | goto bad; |
2488 | 2501 | } |
2489 | - | |
2490 | - cache->next_migration = NULL; | |
2491 | 2502 | |
2492 | 2503 | cache->need_tick_bio = true; |
2493 | 2504 | cache->sized = false; |
drivers/md/dm.c
... | ... | @@ -206,6 +206,9 @@ |
206 | 206 | /* zero-length flush that will be cloned and submitted to targets */ |
207 | 207 | struct bio flush_bio; |
208 | 208 | |
209 | + /* the number of internal suspends */ | |
210 | + unsigned internal_suspend_count; | |
211 | + | |
209 | 212 | struct dm_stats stats; |
210 | 213 | }; |
211 | 214 | |
... | ... | @@ -2928,7 +2931,7 @@ |
2928 | 2931 | { |
2929 | 2932 | struct dm_table *map = NULL; |
2930 | 2933 | |
2931 | - if (dm_suspended_internally_md(md)) | |
2934 | + if (md->internal_suspend_count++) | |
2932 | 2935 | return; /* nested internal suspend */ |
2933 | 2936 | |
2934 | 2937 | if (dm_suspended_md(md)) { |
... | ... | @@ -2953,7 +2956,9 @@ |
2953 | 2956 | |
2954 | 2957 | static void __dm_internal_resume(struct mapped_device *md) |
2955 | 2958 | { |
2956 | - if (!dm_suspended_internally_md(md)) | |
2959 | + BUG_ON(!md->internal_suspend_count); | |
2960 | + | |
2961 | + if (--md->internal_suspend_count) | |
2957 | 2962 | return; /* resume from nested internal suspend */ |
2958 | 2963 | |
2959 | 2964 | if (dm_suspended_md(md)) |