Commit bfc835b5716fd86b568d4f9b15be04c4f361082b

Authored by Linus Torvalds

Merge tag 'dm-3.19-fixes-2' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm

Pull device mapper fixes from Mike Snitzer:
 "Two stable fixes for dm-cache and one 3.19 DM core fix:

   - fix potential for dm-cache metadata corruption via stale metadata
     buffers being used when switching an inactive cache table to
     active; this could occur due to each table having it's own bufio
     client rather than sharing the client between tables.

   - fix dm-cache target to properly account for discard IO while
     suspending otherwise IO quiescing could complete prematurely.

   - fix DM core's handling of multiple internal suspends by maintaining
     an 'internal_suspend_count' and only resuming the device when this
     count drops to zero"

* tag 'dm-3.19-fixes-2' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm:
  dm: fix handling of multiple internal suspends
  dm cache: fix problematic dual use of a single migration count variable
  dm cache: share cache-metadata object across inactive and active DM tables

Showing 3 changed files Side-by-side Diff

drivers/md/dm-cache-metadata.c
... ... @@ -94,6 +94,9 @@
94 94 } __packed;
95 95  
96 96 struct dm_cache_metadata {
  97 + atomic_t ref_count;
  98 + struct list_head list;
  99 +
97 100 struct block_device *bdev;
98 101 struct dm_block_manager *bm;
99 102 struct dm_space_map *metadata_sm;
... ... @@ -669,10 +672,10 @@
669 672  
670 673 /*----------------------------------------------------------------*/
671 674  
672   -struct dm_cache_metadata *dm_cache_metadata_open(struct block_device *bdev,
673   - sector_t data_block_size,
674   - bool may_format_device,
675   - size_t policy_hint_size)
  675 +static struct dm_cache_metadata *metadata_open(struct block_device *bdev,
  676 + sector_t data_block_size,
  677 + bool may_format_device,
  678 + size_t policy_hint_size)
676 679 {
677 680 int r;
678 681 struct dm_cache_metadata *cmd;
... ... @@ -683,6 +686,7 @@
683 686 return NULL;
684 687 }
685 688  
  689 + atomic_set(&cmd->ref_count, 1);
686 690 init_rwsem(&cmd->root_lock);
687 691 cmd->bdev = bdev;
688 692 cmd->data_block_size = data_block_size;
689 693  
... ... @@ -705,10 +709,95 @@
705 709 return cmd;
706 710 }
707 711  
  712 +/*
  713 + * We keep a little list of ref counted metadata objects to prevent two
  714 + * different target instances creating separate bufio instances. This is
  715 + * an issue if a table is reloaded before the suspend.
  716 + */
  717 +static DEFINE_MUTEX(table_lock);
  718 +static LIST_HEAD(table);
  719 +
  720 +static struct dm_cache_metadata *lookup(struct block_device *bdev)
  721 +{
  722 + struct dm_cache_metadata *cmd;
  723 +
  724 + list_for_each_entry(cmd, &table, list)
  725 + if (cmd->bdev == bdev) {
  726 + atomic_inc(&cmd->ref_count);
  727 + return cmd;
  728 + }
  729 +
  730 + return NULL;
  731 +}
  732 +
  733 +static struct dm_cache_metadata *lookup_or_open(struct block_device *bdev,
  734 + sector_t data_block_size,
  735 + bool may_format_device,
  736 + size_t policy_hint_size)
  737 +{
  738 + struct dm_cache_metadata *cmd, *cmd2;
  739 +
  740 + mutex_lock(&table_lock);
  741 + cmd = lookup(bdev);
  742 + mutex_unlock(&table_lock);
  743 +
  744 + if (cmd)
  745 + return cmd;
  746 +
  747 + cmd = metadata_open(bdev, data_block_size, may_format_device, policy_hint_size);
  748 + if (cmd) {
  749 + mutex_lock(&table_lock);
  750 + cmd2 = lookup(bdev);
  751 + if (cmd2) {
  752 + mutex_unlock(&table_lock);
  753 + __destroy_persistent_data_objects(cmd);
  754 + kfree(cmd);
  755 + return cmd2;
  756 + }
  757 + list_add(&cmd->list, &table);
  758 + mutex_unlock(&table_lock);
  759 + }
  760 +
  761 + return cmd;
  762 +}
  763 +
  764 +static bool same_params(struct dm_cache_metadata *cmd, sector_t data_block_size)
  765 +{
  766 + if (cmd->data_block_size != data_block_size) {
  767 + DMERR("data_block_size (%llu) different from that in metadata (%llu)\n",
  768 + (unsigned long long) data_block_size,
  769 + (unsigned long long) cmd->data_block_size);
  770 + return false;
  771 + }
  772 +
  773 + return true;
  774 +}
  775 +
  776 +struct dm_cache_metadata *dm_cache_metadata_open(struct block_device *bdev,
  777 + sector_t data_block_size,
  778 + bool may_format_device,
  779 + size_t policy_hint_size)
  780 +{
  781 + struct dm_cache_metadata *cmd = lookup_or_open(bdev, data_block_size,
  782 + may_format_device, policy_hint_size);
  783 + if (cmd && !same_params(cmd, data_block_size)) {
  784 + dm_cache_metadata_close(cmd);
  785 + return NULL;
  786 + }
  787 +
  788 + return cmd;
  789 +}
  790 +
708 791 void dm_cache_metadata_close(struct dm_cache_metadata *cmd)
709 792 {
710   - __destroy_persistent_data_objects(cmd);
711   - kfree(cmd);
  793 + if (atomic_dec_and_test(&cmd->ref_count)) {
  794 + mutex_lock(&table_lock);
  795 + list_del(&cmd->list);
  796 + mutex_unlock(&table_lock);
  797 +
  798 + __destroy_persistent_data_objects(cmd);
  799 + kfree(cmd);
  800 + }
712 801 }
713 802  
714 803 /*
drivers/md/dm-cache-target.c
... ... @@ -221,8 +221,14 @@
221 221 struct list_head need_commit_migrations;
222 222 sector_t migration_threshold;
223 223 wait_queue_head_t migration_wait;
224   - atomic_t nr_migrations;
  224 + atomic_t nr_allocated_migrations;
225 225  
  226 + /*
  227 + * The number of in flight migrations that are performing
  228 + * background io. eg, promotion, writeback.
  229 + */
  230 + atomic_t nr_io_migrations;
  231 +
226 232 wait_queue_head_t quiescing_wait;
227 233 atomic_t quiescing;
228 234 atomic_t quiescing_ack;
... ... @@ -258,7 +264,6 @@
258 264 struct dm_deferred_set *all_io_ds;
259 265  
260 266 mempool_t *migration_pool;
261   - struct dm_cache_migration *next_migration;
262 267  
263 268 struct dm_cache_policy *policy;
264 269 unsigned policy_nr_args;
265 270  
... ... @@ -350,10 +355,31 @@
350 355 dm_bio_prison_free_cell(cache->prison, cell);
351 356 }
352 357  
  358 +static struct dm_cache_migration *alloc_migration(struct cache *cache)
  359 +{
  360 + struct dm_cache_migration *mg;
  361 +
  362 + mg = mempool_alloc(cache->migration_pool, GFP_NOWAIT);
  363 + if (mg) {
  364 + mg->cache = cache;
  365 + atomic_inc(&mg->cache->nr_allocated_migrations);
  366 + }
  367 +
  368 + return mg;
  369 +}
  370 +
  371 +static void free_migration(struct dm_cache_migration *mg)
  372 +{
  373 + if (atomic_dec_and_test(&mg->cache->nr_allocated_migrations))
  374 + wake_up(&mg->cache->migration_wait);
  375 +
  376 + mempool_free(mg, mg->cache->migration_pool);
  377 +}
  378 +
353 379 static int prealloc_data_structs(struct cache *cache, struct prealloc *p)
354 380 {
355 381 if (!p->mg) {
356   - p->mg = mempool_alloc(cache->migration_pool, GFP_NOWAIT);
  382 + p->mg = alloc_migration(cache);
357 383 if (!p->mg)
358 384 return -ENOMEM;
359 385 }
... ... @@ -382,7 +408,7 @@
382 408 free_prison_cell(cache, p->cell1);
383 409  
384 410 if (p->mg)
385   - mempool_free(p->mg, cache->migration_pool);
  411 + free_migration(p->mg);
386 412 }
387 413  
388 414 static struct dm_cache_migration *prealloc_get_migration(struct prealloc *p)
389 415  
390 416  
391 417  
392 418  
... ... @@ -854,26 +880,16 @@
854 880 * Migration covers moving data from the origin device to the cache, or
855 881 * vice versa.
856 882 *--------------------------------------------------------------*/
857   -static void free_migration(struct dm_cache_migration *mg)
  883 +static void inc_io_migrations(struct cache *cache)
858 884 {
859   - mempool_free(mg, mg->cache->migration_pool);
  885 + atomic_inc(&cache->nr_io_migrations);
860 886 }
861 887  
862   -static void inc_nr_migrations(struct cache *cache)
  888 +static void dec_io_migrations(struct cache *cache)
863 889 {
864   - atomic_inc(&cache->nr_migrations);
  890 + atomic_dec(&cache->nr_io_migrations);
865 891 }
866 892  
867   -static void dec_nr_migrations(struct cache *cache)
868   -{
869   - atomic_dec(&cache->nr_migrations);
870   -
871   - /*
872   - * Wake the worker in case we're suspending the target.
873   - */
874   - wake_up(&cache->migration_wait);
875   -}
876   -
877 893 static void __cell_defer(struct cache *cache, struct dm_bio_prison_cell *cell,
878 894 bool holder)
879 895 {
880 896  
881 897  
... ... @@ -894,11 +910,10 @@
894 910 wake_worker(cache);
895 911 }
896 912  
897   -static void cleanup_migration(struct dm_cache_migration *mg)
  913 +static void free_io_migration(struct dm_cache_migration *mg)
898 914 {
899   - struct cache *cache = mg->cache;
  915 + dec_io_migrations(mg->cache);
900 916 free_migration(mg);
901   - dec_nr_migrations(cache);
902 917 }
903 918  
904 919 static void migration_failure(struct dm_cache_migration *mg)
... ... @@ -923,7 +938,7 @@
923 938 cell_defer(cache, mg->new_ocell, true);
924 939 }
925 940  
926   - cleanup_migration(mg);
  941 + free_io_migration(mg);
927 942 }
928 943  
929 944 static void migration_success_pre_commit(struct dm_cache_migration *mg)
... ... @@ -934,7 +949,7 @@
934 949 if (mg->writeback) {
935 950 clear_dirty(cache, mg->old_oblock, mg->cblock);
936 951 cell_defer(cache, mg->old_ocell, false);
937   - cleanup_migration(mg);
  952 + free_io_migration(mg);
938 953 return;
939 954  
940 955 } else if (mg->demote) {
941 956  
... ... @@ -944,14 +959,14 @@
944 959 mg->old_oblock);
945 960 if (mg->promote)
946 961 cell_defer(cache, mg->new_ocell, true);
947   - cleanup_migration(mg);
  962 + free_io_migration(mg);
948 963 return;
949 964 }
950 965 } else {
951 966 if (dm_cache_insert_mapping(cache->cmd, mg->cblock, mg->new_oblock)) {
952 967 DMWARN_LIMIT("promotion failed; couldn't update on disk metadata");
953 968 policy_remove_mapping(cache->policy, mg->new_oblock);
954   - cleanup_migration(mg);
  969 + free_io_migration(mg);
955 970 return;
956 971 }
957 972 }
... ... @@ -984,7 +999,7 @@
984 999 } else {
985 1000 if (mg->invalidate)
986 1001 policy_remove_mapping(cache->policy, mg->old_oblock);
987   - cleanup_migration(mg);
  1002 + free_io_migration(mg);
988 1003 }
989 1004  
990 1005 } else {
... ... @@ -999,7 +1014,7 @@
999 1014 bio_endio(mg->new_ocell->holder, 0);
1000 1015 cell_defer(cache, mg->new_ocell, false);
1001 1016 }
1002   - cleanup_migration(mg);
  1017 + free_io_migration(mg);
1003 1018 }
1004 1019 }
1005 1020  
... ... @@ -1251,7 +1266,7 @@
1251 1266 mg->new_ocell = cell;
1252 1267 mg->start_jiffies = jiffies;
1253 1268  
1254   - inc_nr_migrations(cache);
  1269 + inc_io_migrations(cache);
1255 1270 quiesce_migration(mg);
1256 1271 }
1257 1272  
... ... @@ -1275,7 +1290,7 @@
1275 1290 mg->new_ocell = NULL;
1276 1291 mg->start_jiffies = jiffies;
1277 1292  
1278   - inc_nr_migrations(cache);
  1293 + inc_io_migrations(cache);
1279 1294 quiesce_migration(mg);
1280 1295 }
1281 1296  
... ... @@ -1302,7 +1317,7 @@
1302 1317 mg->new_ocell = new_ocell;
1303 1318 mg->start_jiffies = jiffies;
1304 1319  
1305   - inc_nr_migrations(cache);
  1320 + inc_io_migrations(cache);
1306 1321 quiesce_migration(mg);
1307 1322 }
1308 1323  
... ... @@ -1330,7 +1345,7 @@
1330 1345 mg->new_ocell = NULL;
1331 1346 mg->start_jiffies = jiffies;
1332 1347  
1333   - inc_nr_migrations(cache);
  1348 + inc_io_migrations(cache);
1334 1349 quiesce_migration(mg);
1335 1350 }
1336 1351  
... ... @@ -1412,7 +1427,7 @@
1412 1427  
1413 1428 static bool spare_migration_bandwidth(struct cache *cache)
1414 1429 {
1415   - sector_t current_volume = (atomic_read(&cache->nr_migrations) + 1) *
  1430 + sector_t current_volume = (atomic_read(&cache->nr_io_migrations) + 1) *
1416 1431 cache->sectors_per_block;
1417 1432 return current_volume < cache->migration_threshold;
1418 1433 }
... ... @@ -1764,7 +1779,7 @@
1764 1779  
1765 1780 static void wait_for_migrations(struct cache *cache)
1766 1781 {
1767   - wait_event(cache->migration_wait, !atomic_read(&cache->nr_migrations));
  1782 + wait_event(cache->migration_wait, !atomic_read(&cache->nr_allocated_migrations));
1768 1783 }
1769 1784  
1770 1785 static void stop_worker(struct cache *cache)
... ... @@ -1876,9 +1891,6 @@
1876 1891 {
1877 1892 unsigned i;
1878 1893  
1879   - if (cache->next_migration)
1880   - mempool_free(cache->next_migration, cache->migration_pool);
1881   -
1882 1894 if (cache->migration_pool)
1883 1895 mempool_destroy(cache->migration_pool);
1884 1896  
... ... @@ -2424,7 +2436,8 @@
2424 2436 INIT_LIST_HEAD(&cache->quiesced_migrations);
2425 2437 INIT_LIST_HEAD(&cache->completed_migrations);
2426 2438 INIT_LIST_HEAD(&cache->need_commit_migrations);
2427   - atomic_set(&cache->nr_migrations, 0);
  2439 + atomic_set(&cache->nr_allocated_migrations, 0);
  2440 + atomic_set(&cache->nr_io_migrations, 0);
2428 2441 init_waitqueue_head(&cache->migration_wait);
2429 2442  
2430 2443 init_waitqueue_head(&cache->quiescing_wait);
... ... @@ -2486,8 +2499,6 @@
2486 2499 *error = "Error creating cache's migration mempool";
2487 2500 goto bad;
2488 2501 }
2489   -
2490   - cache->next_migration = NULL;
2491 2502  
2492 2503 cache->need_tick_bio = true;
2493 2504 cache->sized = false;
... ... @@ -206,6 +206,9 @@
206 206 /* zero-length flush that will be cloned and submitted to targets */
207 207 struct bio flush_bio;
208 208  
  209 + /* the number of internal suspends */
  210 + unsigned internal_suspend_count;
  211 +
209 212 struct dm_stats stats;
210 213 };
211 214  
... ... @@ -2928,7 +2931,7 @@
2928 2931 {
2929 2932 struct dm_table *map = NULL;
2930 2933  
2931   - if (dm_suspended_internally_md(md))
  2934 + if (md->internal_suspend_count++)
2932 2935 return; /* nested internal suspend */
2933 2936  
2934 2937 if (dm_suspended_md(md)) {
... ... @@ -2953,7 +2956,9 @@
2953 2956  
2954 2957 static void __dm_internal_resume(struct mapped_device *md)
2955 2958 {
2956   - if (!dm_suspended_internally_md(md))
  2959 + BUG_ON(!md->internal_suspend_count);
  2960 +
  2961 + if (--md->internal_suspend_count)
2957 2962 return; /* resume from nested internal suspend */
2958 2963  
2959 2964 if (dm_suspended_md(md))