Commit c140e1c4e23bdaf0a5c00b6a8b6d18f259d39a00
1 parent
760fe67e53
Exists in
master
and in
13 other branches
dm thin: use per thin device deferred bio lists
The thin-pool previously only had a single deferred_bios list that would collect bios for all thin devices in the pool. Split this per-pool deferred_bios list out to per-thin deferred_bios_list -- doing so enables increased parallelism when processing deferred bios. And now that each thin device has it's own deferred_bios_list we can sort all bios in the list using logical sector. The requeue code in error handling path is also cleaner as a side-effect. Signed-off-by: Mike Snitzer <snitzer@redhat.com> Acked-by: Joe Thornber <ejt@redhat.com>
Showing 1 changed file with 104 additions and 61 deletions Side-by-side Diff
drivers/md/dm-thin.c
... | ... | @@ -12,6 +12,7 @@ |
12 | 12 | #include <linux/dm-io.h> |
13 | 13 | #include <linux/dm-kcopyd.h> |
14 | 14 | #include <linux/list.h> |
15 | +#include <linux/rculist.h> | |
15 | 16 | #include <linux/init.h> |
16 | 17 | #include <linux/module.h> |
17 | 18 | #include <linux/slab.h> |
18 | 19 | |
19 | 20 | |
... | ... | @@ -178,13 +179,11 @@ |
178 | 179 | unsigned ref_count; |
179 | 180 | |
180 | 181 | spinlock_t lock; |
181 | - struct bio_list deferred_bios; | |
182 | 182 | struct bio_list deferred_flush_bios; |
183 | 183 | struct list_head prepared_mappings; |
184 | 184 | struct list_head prepared_discards; |
185 | + struct list_head active_thins; | |
185 | 186 | |
186 | - struct bio_list retry_on_resume_list; | |
187 | - | |
188 | 187 | struct dm_deferred_set *shared_read_ds; |
189 | 188 | struct dm_deferred_set *all_io_ds; |
190 | 189 | |
... | ... | @@ -220,6 +219,7 @@ |
220 | 219 | * Target context for a thin. |
221 | 220 | */ |
222 | 221 | struct thin_c { |
222 | + struct list_head list; | |
223 | 223 | struct dm_dev *pool_dev; |
224 | 224 | struct dm_dev *origin_dev; |
225 | 225 | dm_thin_id dev_id; |
... | ... | @@ -227,6 +227,9 @@ |
227 | 227 | struct pool *pool; |
228 | 228 | struct dm_thin_device *td; |
229 | 229 | bool requeue_mode:1; |
230 | + spinlock_t lock; | |
231 | + struct bio_list deferred_bio_list; | |
232 | + struct bio_list retry_on_resume_list; | |
230 | 233 | }; |
231 | 234 | |
232 | 235 | /*----------------------------------------------------------------*/ |
... | ... | @@ -287,9 +290,9 @@ |
287 | 290 | struct pool *pool = tc->pool; |
288 | 291 | unsigned long flags; |
289 | 292 | |
290 | - spin_lock_irqsave(&pool->lock, flags); | |
291 | - dm_cell_release_no_holder(pool->prison, cell, &pool->deferred_bios); | |
292 | - spin_unlock_irqrestore(&pool->lock, flags); | |
293 | + spin_lock_irqsave(&tc->lock, flags); | |
294 | + dm_cell_release_no_holder(pool->prison, cell, &tc->deferred_bio_list); | |
295 | + spin_unlock_irqrestore(&tc->lock, flags); | |
293 | 296 | |
294 | 297 | wake_worker(pool); |
295 | 298 | } |
296 | 299 | |
297 | 300 | |
298 | 301 | |
299 | 302 | |
... | ... | @@ -378,30 +381,22 @@ |
378 | 381 | |
379 | 382 | bio_list_init(&bios); |
380 | 383 | |
381 | - spin_lock_irqsave(&tc->pool->lock, flags); | |
384 | + spin_lock_irqsave(&tc->lock, flags); | |
382 | 385 | bio_list_merge(&bios, master); |
383 | 386 | bio_list_init(master); |
384 | - spin_unlock_irqrestore(&tc->pool->lock, flags); | |
387 | + spin_unlock_irqrestore(&tc->lock, flags); | |
385 | 388 | |
386 | - while ((bio = bio_list_pop(&bios))) { | |
387 | - struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook)); | |
388 | - | |
389 | - if (h->tc == tc) | |
390 | - bio_endio(bio, DM_ENDIO_REQUEUE); | |
391 | - else | |
392 | - bio_list_add(master, bio); | |
393 | - } | |
389 | + while ((bio = bio_list_pop(&bios))) | |
390 | + bio_endio(bio, DM_ENDIO_REQUEUE); | |
394 | 391 | } |
395 | 392 | |
396 | 393 | static void requeue_io(struct thin_c *tc) |
397 | 394 | { |
398 | - struct pool *pool = tc->pool; | |
399 | - | |
400 | - requeue_bio_list(tc, &pool->deferred_bios); | |
401 | - requeue_bio_list(tc, &pool->retry_on_resume_list); | |
395 | + requeue_bio_list(tc, &tc->deferred_bio_list); | |
396 | + requeue_bio_list(tc, &tc->retry_on_resume_list); | |
402 | 397 | } |
403 | 398 | |
404 | -static void error_retry_list(struct pool *pool) | |
399 | +static void error_thin_retry_list(struct thin_c *tc) | |
405 | 400 | { |
406 | 401 | struct bio *bio; |
407 | 402 | unsigned long flags; |
408 | 403 | |
... | ... | @@ -409,15 +404,25 @@ |
409 | 404 | |
410 | 405 | bio_list_init(&bios); |
411 | 406 | |
412 | - spin_lock_irqsave(&pool->lock, flags); | |
413 | - bio_list_merge(&bios, &pool->retry_on_resume_list); | |
414 | - bio_list_init(&pool->retry_on_resume_list); | |
415 | - spin_unlock_irqrestore(&pool->lock, flags); | |
407 | + spin_lock_irqsave(&tc->lock, flags); | |
408 | + bio_list_merge(&bios, &tc->retry_on_resume_list); | |
409 | + bio_list_init(&tc->retry_on_resume_list); | |
410 | + spin_unlock_irqrestore(&tc->lock, flags); | |
416 | 411 | |
417 | 412 | while ((bio = bio_list_pop(&bios))) |
418 | 413 | bio_io_error(bio); |
419 | 414 | } |
420 | 415 | |
416 | +static void error_retry_list(struct pool *pool) | |
417 | +{ | |
418 | + struct thin_c *tc; | |
419 | + | |
420 | + rcu_read_lock(); | |
421 | + list_for_each_entry_rcu(tc, &pool->active_thins, list) | |
422 | + error_thin_retry_list(tc); | |
423 | + rcu_read_unlock(); | |
424 | +} | |
425 | + | |
421 | 426 | /* |
422 | 427 | * This section of code contains the logic for processing a thin device's IO. |
423 | 428 | * Much of the code depends on pool object resources (lists, workqueues, etc) |
... | ... | @@ -608,9 +613,9 @@ |
608 | 613 | struct pool *pool = tc->pool; |
609 | 614 | unsigned long flags; |
610 | 615 | |
611 | - spin_lock_irqsave(&pool->lock, flags); | |
612 | - cell_release(pool, cell, &pool->deferred_bios); | |
613 | - spin_unlock_irqrestore(&tc->pool->lock, flags); | |
616 | + spin_lock_irqsave(&tc->lock, flags); | |
617 | + cell_release(pool, cell, &tc->deferred_bio_list); | |
618 | + spin_unlock_irqrestore(&tc->lock, flags); | |
614 | 619 | |
615 | 620 | wake_worker(pool); |
616 | 621 | } |
... | ... | @@ -623,9 +628,9 @@ |
623 | 628 | struct pool *pool = tc->pool; |
624 | 629 | unsigned long flags; |
625 | 630 | |
626 | - spin_lock_irqsave(&pool->lock, flags); | |
627 | - cell_release_no_holder(pool, cell, &pool->deferred_bios); | |
628 | - spin_unlock_irqrestore(&pool->lock, flags); | |
631 | + spin_lock_irqsave(&tc->lock, flags); | |
632 | + cell_release_no_holder(pool, cell, &tc->deferred_bio_list); | |
633 | + spin_unlock_irqrestore(&tc->lock, flags); | |
629 | 634 | |
630 | 635 | wake_worker(pool); |
631 | 636 | } |
632 | 637 | |
... | ... | @@ -1001,12 +1006,11 @@ |
1001 | 1006 | { |
1002 | 1007 | struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook)); |
1003 | 1008 | struct thin_c *tc = h->tc; |
1004 | - struct pool *pool = tc->pool; | |
1005 | 1009 | unsigned long flags; |
1006 | 1010 | |
1007 | - spin_lock_irqsave(&pool->lock, flags); | |
1008 | - bio_list_add(&pool->retry_on_resume_list, bio); | |
1009 | - spin_unlock_irqrestore(&pool->lock, flags); | |
1011 | + spin_lock_irqsave(&tc->lock, flags); | |
1012 | + bio_list_add(&tc->retry_on_resume_list, bio); | |
1013 | + spin_unlock_irqrestore(&tc->lock, flags); | |
1010 | 1014 | } |
1011 | 1015 | |
1012 | 1016 | static bool should_error_unserviceable_bio(struct pool *pool) |
1013 | 1017 | |
1014 | 1018 | |
1015 | 1019 | |
1016 | 1020 | |
1017 | 1021 | |
... | ... | @@ -1363,38 +1367,36 @@ |
1363 | 1367 | jiffies > pool->last_commit_jiffies + COMMIT_PERIOD; |
1364 | 1368 | } |
1365 | 1369 | |
1366 | -static void process_deferred_bios(struct pool *pool) | |
1370 | +static void process_thin_deferred_bios(struct thin_c *tc) | |
1367 | 1371 | { |
1372 | + struct pool *pool = tc->pool; | |
1368 | 1373 | unsigned long flags; |
1369 | 1374 | struct bio *bio; |
1370 | 1375 | struct bio_list bios; |
1371 | 1376 | |
1377 | + if (tc->requeue_mode) { | |
1378 | + requeue_bio_list(tc, &tc->deferred_bio_list); | |
1379 | + return; | |
1380 | + } | |
1381 | + | |
1372 | 1382 | bio_list_init(&bios); |
1373 | 1383 | |
1374 | - spin_lock_irqsave(&pool->lock, flags); | |
1375 | - bio_list_merge(&bios, &pool->deferred_bios); | |
1376 | - bio_list_init(&pool->deferred_bios); | |
1377 | - spin_unlock_irqrestore(&pool->lock, flags); | |
1384 | + spin_lock_irqsave(&tc->lock, flags); | |
1385 | + bio_list_merge(&bios, &tc->deferred_bio_list); | |
1386 | + bio_list_init(&tc->deferred_bio_list); | |
1387 | + spin_unlock_irqrestore(&tc->lock, flags); | |
1378 | 1388 | |
1379 | 1389 | while ((bio = bio_list_pop(&bios))) { |
1380 | - struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook)); | |
1381 | - struct thin_c *tc = h->tc; | |
1382 | - | |
1383 | - if (tc->requeue_mode) { | |
1384 | - bio_endio(bio, DM_ENDIO_REQUEUE); | |
1385 | - continue; | |
1386 | - } | |
1387 | - | |
1388 | 1390 | /* |
1389 | 1391 | * If we've got no free new_mapping structs, and processing |
1390 | 1392 | * this bio might require one, we pause until there are some |
1391 | 1393 | * prepared mappings to process. |
1392 | 1394 | */ |
1393 | 1395 | if (ensure_next_mapping(pool)) { |
1394 | - spin_lock_irqsave(&pool->lock, flags); | |
1395 | - bio_list_add(&pool->deferred_bios, bio); | |
1396 | - bio_list_merge(&pool->deferred_bios, &bios); | |
1397 | - spin_unlock_irqrestore(&pool->lock, flags); | |
1396 | + spin_lock_irqsave(&tc->lock, flags); | |
1397 | + bio_list_add(&tc->deferred_bio_list, bio); | |
1398 | + bio_list_merge(&tc->deferred_bio_list, &bios); | |
1399 | + spin_unlock_irqrestore(&tc->lock, flags); | |
1398 | 1400 | break; |
1399 | 1401 | } |
1400 | 1402 | |
1401 | 1403 | |
... | ... | @@ -1403,7 +1405,20 @@ |
1403 | 1405 | else |
1404 | 1406 | pool->process_bio(tc, bio); |
1405 | 1407 | } |
1408 | +} | |
1406 | 1409 | |
1410 | +static void process_deferred_bios(struct pool *pool) | |
1411 | +{ | |
1412 | + unsigned long flags; | |
1413 | + struct bio *bio; | |
1414 | + struct bio_list bios; | |
1415 | + struct thin_c *tc; | |
1416 | + | |
1417 | + rcu_read_lock(); | |
1418 | + list_for_each_entry_rcu(tc, &pool->active_thins, list) | |
1419 | + process_thin_deferred_bios(tc); | |
1420 | + rcu_read_unlock(); | |
1421 | + | |
1407 | 1422 | /* |
1408 | 1423 | * If there are any deferred flush bios, we must commit |
1409 | 1424 | * the metadata before issuing them. |
... | ... | @@ -1634,9 +1649,9 @@ |
1634 | 1649 | unsigned long flags; |
1635 | 1650 | struct pool *pool = tc->pool; |
1636 | 1651 | |
1637 | - spin_lock_irqsave(&pool->lock, flags); | |
1638 | - bio_list_add(&pool->deferred_bios, bio); | |
1639 | - spin_unlock_irqrestore(&pool->lock, flags); | |
1652 | + spin_lock_irqsave(&tc->lock, flags); | |
1653 | + bio_list_add(&tc->deferred_bio_list, bio); | |
1654 | + spin_unlock_irqrestore(&tc->lock, flags); | |
1640 | 1655 | |
1641 | 1656 | wake_worker(pool); |
1642 | 1657 | } |
1643 | 1658 | |
... | ... | @@ -1767,10 +1782,19 @@ |
1767 | 1782 | return bdi_congested(&q->backing_dev_info, bdi_bits); |
1768 | 1783 | } |
1769 | 1784 | |
1770 | -static void __requeue_bios(struct pool *pool) | |
1785 | +static void requeue_bios(struct pool *pool) | |
1771 | 1786 | { |
1772 | - bio_list_merge(&pool->deferred_bios, &pool->retry_on_resume_list); | |
1773 | - bio_list_init(&pool->retry_on_resume_list); | |
1787 | + unsigned long flags; | |
1788 | + struct thin_c *tc; | |
1789 | + | |
1790 | + rcu_read_lock(); | |
1791 | + list_for_each_entry_rcu(tc, &pool->active_thins, list) { | |
1792 | + spin_lock_irqsave(&tc->lock, flags); | |
1793 | + bio_list_merge(&tc->deferred_bio_list, &tc->retry_on_resume_list); | |
1794 | + bio_list_init(&tc->retry_on_resume_list); | |
1795 | + spin_unlock_irqrestore(&tc->lock, flags); | |
1796 | + } | |
1797 | + rcu_read_unlock(); | |
1774 | 1798 | } |
1775 | 1799 | |
1776 | 1800 | /*---------------------------------------------------------------- |
1777 | 1801 | |
1778 | 1802 | |
... | ... | @@ -1951,12 +1975,11 @@ |
1951 | 1975 | INIT_WORK(&pool->worker, do_worker); |
1952 | 1976 | INIT_DELAYED_WORK(&pool->waker, do_waker); |
1953 | 1977 | spin_lock_init(&pool->lock); |
1954 | - bio_list_init(&pool->deferred_bios); | |
1955 | 1978 | bio_list_init(&pool->deferred_flush_bios); |
1956 | 1979 | INIT_LIST_HEAD(&pool->prepared_mappings); |
1957 | 1980 | INIT_LIST_HEAD(&pool->prepared_discards); |
1981 | + INIT_LIST_HEAD(&pool->active_thins); | |
1958 | 1982 | pool->low_water_triggered = false; |
1959 | - bio_list_init(&pool->retry_on_resume_list); | |
1960 | 1983 | |
1961 | 1984 | pool->shared_read_ds = dm_deferred_set_create(); |
1962 | 1985 | if (!pool->shared_read_ds) { |
1963 | 1986 | |
... | ... | @@ -2501,8 +2524,8 @@ |
2501 | 2524 | |
2502 | 2525 | spin_lock_irqsave(&pool->lock, flags); |
2503 | 2526 | pool->low_water_triggered = false; |
2504 | - __requeue_bios(pool); | |
2505 | 2527 | spin_unlock_irqrestore(&pool->lock, flags); |
2528 | + requeue_bios(pool); | |
2506 | 2529 | |
2507 | 2530 | do_waker(&pool->waker.work); |
2508 | 2531 | } |
2509 | 2532 | |
... | ... | @@ -2962,7 +2985,13 @@ |
2962 | 2985 | static void thin_dtr(struct dm_target *ti) |
2963 | 2986 | { |
2964 | 2987 | struct thin_c *tc = ti->private; |
2988 | + unsigned long flags; | |
2965 | 2989 | |
2990 | + spin_lock_irqsave(&tc->pool->lock, flags); | |
2991 | + list_del_rcu(&tc->list); | |
2992 | + spin_unlock_irqrestore(&tc->pool->lock, flags); | |
2993 | + synchronize_rcu(); | |
2994 | + | |
2966 | 2995 | mutex_lock(&dm_thin_pool_table.mutex); |
2967 | 2996 | |
2968 | 2997 | __pool_dec(tc->pool); |
... | ... | @@ -3008,6 +3037,9 @@ |
3008 | 3037 | r = -ENOMEM; |
3009 | 3038 | goto out_unlock; |
3010 | 3039 | } |
3040 | + spin_lock_init(&tc->lock); | |
3041 | + bio_list_init(&tc->deferred_bio_list); | |
3042 | + bio_list_init(&tc->retry_on_resume_list); | |
3011 | 3043 | |
3012 | 3044 | if (argc == 3) { |
3013 | 3045 | r = dm_get_device(ti, argv[2], FMODE_READ, &origin_dev); |
... | ... | @@ -3078,6 +3110,17 @@ |
3078 | 3110 | dm_put(pool_md); |
3079 | 3111 | |
3080 | 3112 | mutex_unlock(&dm_thin_pool_table.mutex); |
3113 | + | |
3114 | + spin_lock(&tc->pool->lock); | |
3115 | + list_add_tail_rcu(&tc->list, &tc->pool->active_thins); | |
3116 | + spin_unlock(&tc->pool->lock); | |
3117 | + /* | |
3118 | + * This synchronize_rcu() call is needed here otherwise we risk a | |
3119 | + * wake_worker() call finding no bios to process (because the newly | |
3120 | + * added tc isn't yet visible). So this reduces latency since we | |
3121 | + * aren't then dependent on the periodic commit to wake_worker(). | |
3122 | + */ | |
3123 | + synchronize_rcu(); | |
3081 | 3124 | |
3082 | 3125 | return 0; |
3083 | 3126 |