Commit c140e1c4e23bdaf0a5c00b6a8b6d18f259d39a00

Authored by Mike Snitzer
1 parent 760fe67e53

dm thin: use per thin device deferred bio lists

The thin-pool previously only had a single deferred_bios list that would
collect bios for all thin devices in the pool.  Split this per-pool
deferred_bios list out to per-thin deferred_bios_list -- doing so
enables increased parallelism when processing deferred bios.  And now
that each thin device has it's own deferred_bios_list we can sort all
bios in the list using logical sector.  The requeue code in error
handling path is also cleaner as a side-effect.

Signed-off-by: Mike Snitzer <snitzer@redhat.com>
Acked-by: Joe Thornber <ejt@redhat.com>

Showing 1 changed file with 104 additions and 61 deletions Side-by-side Diff

drivers/md/dm-thin.c
... ... @@ -12,6 +12,7 @@
12 12 #include <linux/dm-io.h>
13 13 #include <linux/dm-kcopyd.h>
14 14 #include <linux/list.h>
  15 +#include <linux/rculist.h>
15 16 #include <linux/init.h>
16 17 #include <linux/module.h>
17 18 #include <linux/slab.h>
18 19  
19 20  
... ... @@ -178,13 +179,11 @@
178 179 unsigned ref_count;
179 180  
180 181 spinlock_t lock;
181   - struct bio_list deferred_bios;
182 182 struct bio_list deferred_flush_bios;
183 183 struct list_head prepared_mappings;
184 184 struct list_head prepared_discards;
  185 + struct list_head active_thins;
185 186  
186   - struct bio_list retry_on_resume_list;
187   -
188 187 struct dm_deferred_set *shared_read_ds;
189 188 struct dm_deferred_set *all_io_ds;
190 189  
... ... @@ -220,6 +219,7 @@
220 219 * Target context for a thin.
221 220 */
222 221 struct thin_c {
  222 + struct list_head list;
223 223 struct dm_dev *pool_dev;
224 224 struct dm_dev *origin_dev;
225 225 dm_thin_id dev_id;
... ... @@ -227,6 +227,9 @@
227 227 struct pool *pool;
228 228 struct dm_thin_device *td;
229 229 bool requeue_mode:1;
  230 + spinlock_t lock;
  231 + struct bio_list deferred_bio_list;
  232 + struct bio_list retry_on_resume_list;
230 233 };
231 234  
232 235 /*----------------------------------------------------------------*/
... ... @@ -287,9 +290,9 @@
287 290 struct pool *pool = tc->pool;
288 291 unsigned long flags;
289 292  
290   - spin_lock_irqsave(&pool->lock, flags);
291   - dm_cell_release_no_holder(pool->prison, cell, &pool->deferred_bios);
292   - spin_unlock_irqrestore(&pool->lock, flags);
  293 + spin_lock_irqsave(&tc->lock, flags);
  294 + dm_cell_release_no_holder(pool->prison, cell, &tc->deferred_bio_list);
  295 + spin_unlock_irqrestore(&tc->lock, flags);
293 296  
294 297 wake_worker(pool);
295 298 }
296 299  
297 300  
298 301  
299 302  
... ... @@ -378,30 +381,22 @@
378 381  
379 382 bio_list_init(&bios);
380 383  
381   - spin_lock_irqsave(&tc->pool->lock, flags);
  384 + spin_lock_irqsave(&tc->lock, flags);
382 385 bio_list_merge(&bios, master);
383 386 bio_list_init(master);
384   - spin_unlock_irqrestore(&tc->pool->lock, flags);
  387 + spin_unlock_irqrestore(&tc->lock, flags);
385 388  
386   - while ((bio = bio_list_pop(&bios))) {
387   - struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook));
388   -
389   - if (h->tc == tc)
390   - bio_endio(bio, DM_ENDIO_REQUEUE);
391   - else
392   - bio_list_add(master, bio);
393   - }
  389 + while ((bio = bio_list_pop(&bios)))
  390 + bio_endio(bio, DM_ENDIO_REQUEUE);
394 391 }
395 392  
396 393 static void requeue_io(struct thin_c *tc)
397 394 {
398   - struct pool *pool = tc->pool;
399   -
400   - requeue_bio_list(tc, &pool->deferred_bios);
401   - requeue_bio_list(tc, &pool->retry_on_resume_list);
  395 + requeue_bio_list(tc, &tc->deferred_bio_list);
  396 + requeue_bio_list(tc, &tc->retry_on_resume_list);
402 397 }
403 398  
404   -static void error_retry_list(struct pool *pool)
  399 +static void error_thin_retry_list(struct thin_c *tc)
405 400 {
406 401 struct bio *bio;
407 402 unsigned long flags;
408 403  
... ... @@ -409,15 +404,25 @@
409 404  
410 405 bio_list_init(&bios);
411 406  
412   - spin_lock_irqsave(&pool->lock, flags);
413   - bio_list_merge(&bios, &pool->retry_on_resume_list);
414   - bio_list_init(&pool->retry_on_resume_list);
415   - spin_unlock_irqrestore(&pool->lock, flags);
  407 + spin_lock_irqsave(&tc->lock, flags);
  408 + bio_list_merge(&bios, &tc->retry_on_resume_list);
  409 + bio_list_init(&tc->retry_on_resume_list);
  410 + spin_unlock_irqrestore(&tc->lock, flags);
416 411  
417 412 while ((bio = bio_list_pop(&bios)))
418 413 bio_io_error(bio);
419 414 }
420 415  
  416 +static void error_retry_list(struct pool *pool)
  417 +{
  418 + struct thin_c *tc;
  419 +
  420 + rcu_read_lock();
  421 + list_for_each_entry_rcu(tc, &pool->active_thins, list)
  422 + error_thin_retry_list(tc);
  423 + rcu_read_unlock();
  424 +}
  425 +
421 426 /*
422 427 * This section of code contains the logic for processing a thin device's IO.
423 428 * Much of the code depends on pool object resources (lists, workqueues, etc)
... ... @@ -608,9 +613,9 @@
608 613 struct pool *pool = tc->pool;
609 614 unsigned long flags;
610 615  
611   - spin_lock_irqsave(&pool->lock, flags);
612   - cell_release(pool, cell, &pool->deferred_bios);
613   - spin_unlock_irqrestore(&tc->pool->lock, flags);
  616 + spin_lock_irqsave(&tc->lock, flags);
  617 + cell_release(pool, cell, &tc->deferred_bio_list);
  618 + spin_unlock_irqrestore(&tc->lock, flags);
614 619  
615 620 wake_worker(pool);
616 621 }
... ... @@ -623,9 +628,9 @@
623 628 struct pool *pool = tc->pool;
624 629 unsigned long flags;
625 630  
626   - spin_lock_irqsave(&pool->lock, flags);
627   - cell_release_no_holder(pool, cell, &pool->deferred_bios);
628   - spin_unlock_irqrestore(&pool->lock, flags);
  631 + spin_lock_irqsave(&tc->lock, flags);
  632 + cell_release_no_holder(pool, cell, &tc->deferred_bio_list);
  633 + spin_unlock_irqrestore(&tc->lock, flags);
629 634  
630 635 wake_worker(pool);
631 636 }
632 637  
... ... @@ -1001,12 +1006,11 @@
1001 1006 {
1002 1007 struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook));
1003 1008 struct thin_c *tc = h->tc;
1004   - struct pool *pool = tc->pool;
1005 1009 unsigned long flags;
1006 1010  
1007   - spin_lock_irqsave(&pool->lock, flags);
1008   - bio_list_add(&pool->retry_on_resume_list, bio);
1009   - spin_unlock_irqrestore(&pool->lock, flags);
  1011 + spin_lock_irqsave(&tc->lock, flags);
  1012 + bio_list_add(&tc->retry_on_resume_list, bio);
  1013 + spin_unlock_irqrestore(&tc->lock, flags);
1010 1014 }
1011 1015  
1012 1016 static bool should_error_unserviceable_bio(struct pool *pool)
1013 1017  
1014 1018  
1015 1019  
1016 1020  
1017 1021  
... ... @@ -1363,38 +1367,36 @@
1363 1367 jiffies > pool->last_commit_jiffies + COMMIT_PERIOD;
1364 1368 }
1365 1369  
1366   -static void process_deferred_bios(struct pool *pool)
  1370 +static void process_thin_deferred_bios(struct thin_c *tc)
1367 1371 {
  1372 + struct pool *pool = tc->pool;
1368 1373 unsigned long flags;
1369 1374 struct bio *bio;
1370 1375 struct bio_list bios;
1371 1376  
  1377 + if (tc->requeue_mode) {
  1378 + requeue_bio_list(tc, &tc->deferred_bio_list);
  1379 + return;
  1380 + }
  1381 +
1372 1382 bio_list_init(&bios);
1373 1383  
1374   - spin_lock_irqsave(&pool->lock, flags);
1375   - bio_list_merge(&bios, &pool->deferred_bios);
1376   - bio_list_init(&pool->deferred_bios);
1377   - spin_unlock_irqrestore(&pool->lock, flags);
  1384 + spin_lock_irqsave(&tc->lock, flags);
  1385 + bio_list_merge(&bios, &tc->deferred_bio_list);
  1386 + bio_list_init(&tc->deferred_bio_list);
  1387 + spin_unlock_irqrestore(&tc->lock, flags);
1378 1388  
1379 1389 while ((bio = bio_list_pop(&bios))) {
1380   - struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook));
1381   - struct thin_c *tc = h->tc;
1382   -
1383   - if (tc->requeue_mode) {
1384   - bio_endio(bio, DM_ENDIO_REQUEUE);
1385   - continue;
1386   - }
1387   -
1388 1390 /*
1389 1391 * If we've got no free new_mapping structs, and processing
1390 1392 * this bio might require one, we pause until there are some
1391 1393 * prepared mappings to process.
1392 1394 */
1393 1395 if (ensure_next_mapping(pool)) {
1394   - spin_lock_irqsave(&pool->lock, flags);
1395   - bio_list_add(&pool->deferred_bios, bio);
1396   - bio_list_merge(&pool->deferred_bios, &bios);
1397   - spin_unlock_irqrestore(&pool->lock, flags);
  1396 + spin_lock_irqsave(&tc->lock, flags);
  1397 + bio_list_add(&tc->deferred_bio_list, bio);
  1398 + bio_list_merge(&tc->deferred_bio_list, &bios);
  1399 + spin_unlock_irqrestore(&tc->lock, flags);
1398 1400 break;
1399 1401 }
1400 1402  
1401 1403  
... ... @@ -1403,7 +1405,20 @@
1403 1405 else
1404 1406 pool->process_bio(tc, bio);
1405 1407 }
  1408 +}
1406 1409  
  1410 +static void process_deferred_bios(struct pool *pool)
  1411 +{
  1412 + unsigned long flags;
  1413 + struct bio *bio;
  1414 + struct bio_list bios;
  1415 + struct thin_c *tc;
  1416 +
  1417 + rcu_read_lock();
  1418 + list_for_each_entry_rcu(tc, &pool->active_thins, list)
  1419 + process_thin_deferred_bios(tc);
  1420 + rcu_read_unlock();
  1421 +
1407 1422 /*
1408 1423 * If there are any deferred flush bios, we must commit
1409 1424 * the metadata before issuing them.
... ... @@ -1634,9 +1649,9 @@
1634 1649 unsigned long flags;
1635 1650 struct pool *pool = tc->pool;
1636 1651  
1637   - spin_lock_irqsave(&pool->lock, flags);
1638   - bio_list_add(&pool->deferred_bios, bio);
1639   - spin_unlock_irqrestore(&pool->lock, flags);
  1652 + spin_lock_irqsave(&tc->lock, flags);
  1653 + bio_list_add(&tc->deferred_bio_list, bio);
  1654 + spin_unlock_irqrestore(&tc->lock, flags);
1640 1655  
1641 1656 wake_worker(pool);
1642 1657 }
1643 1658  
... ... @@ -1767,10 +1782,19 @@
1767 1782 return bdi_congested(&q->backing_dev_info, bdi_bits);
1768 1783 }
1769 1784  
1770   -static void __requeue_bios(struct pool *pool)
  1785 +static void requeue_bios(struct pool *pool)
1771 1786 {
1772   - bio_list_merge(&pool->deferred_bios, &pool->retry_on_resume_list);
1773   - bio_list_init(&pool->retry_on_resume_list);
  1787 + unsigned long flags;
  1788 + struct thin_c *tc;
  1789 +
  1790 + rcu_read_lock();
  1791 + list_for_each_entry_rcu(tc, &pool->active_thins, list) {
  1792 + spin_lock_irqsave(&tc->lock, flags);
  1793 + bio_list_merge(&tc->deferred_bio_list, &tc->retry_on_resume_list);
  1794 + bio_list_init(&tc->retry_on_resume_list);
  1795 + spin_unlock_irqrestore(&tc->lock, flags);
  1796 + }
  1797 + rcu_read_unlock();
1774 1798 }
1775 1799  
1776 1800 /*----------------------------------------------------------------
1777 1801  
1778 1802  
... ... @@ -1951,12 +1975,11 @@
1951 1975 INIT_WORK(&pool->worker, do_worker);
1952 1976 INIT_DELAYED_WORK(&pool->waker, do_waker);
1953 1977 spin_lock_init(&pool->lock);
1954   - bio_list_init(&pool->deferred_bios);
1955 1978 bio_list_init(&pool->deferred_flush_bios);
1956 1979 INIT_LIST_HEAD(&pool->prepared_mappings);
1957 1980 INIT_LIST_HEAD(&pool->prepared_discards);
  1981 + INIT_LIST_HEAD(&pool->active_thins);
1958 1982 pool->low_water_triggered = false;
1959   - bio_list_init(&pool->retry_on_resume_list);
1960 1983  
1961 1984 pool->shared_read_ds = dm_deferred_set_create();
1962 1985 if (!pool->shared_read_ds) {
1963 1986  
... ... @@ -2501,8 +2524,8 @@
2501 2524  
2502 2525 spin_lock_irqsave(&pool->lock, flags);
2503 2526 pool->low_water_triggered = false;
2504   - __requeue_bios(pool);
2505 2527 spin_unlock_irqrestore(&pool->lock, flags);
  2528 + requeue_bios(pool);
2506 2529  
2507 2530 do_waker(&pool->waker.work);
2508 2531 }
2509 2532  
... ... @@ -2962,7 +2985,13 @@
2962 2985 static void thin_dtr(struct dm_target *ti)
2963 2986 {
2964 2987 struct thin_c *tc = ti->private;
  2988 + unsigned long flags;
2965 2989  
  2990 + spin_lock_irqsave(&tc->pool->lock, flags);
  2991 + list_del_rcu(&tc->list);
  2992 + spin_unlock_irqrestore(&tc->pool->lock, flags);
  2993 + synchronize_rcu();
  2994 +
2966 2995 mutex_lock(&dm_thin_pool_table.mutex);
2967 2996  
2968 2997 __pool_dec(tc->pool);
... ... @@ -3008,6 +3037,9 @@
3008 3037 r = -ENOMEM;
3009 3038 goto out_unlock;
3010 3039 }
  3040 + spin_lock_init(&tc->lock);
  3041 + bio_list_init(&tc->deferred_bio_list);
  3042 + bio_list_init(&tc->retry_on_resume_list);
3011 3043  
3012 3044 if (argc == 3) {
3013 3045 r = dm_get_device(ti, argv[2], FMODE_READ, &origin_dev);
... ... @@ -3078,6 +3110,17 @@
3078 3110 dm_put(pool_md);
3079 3111  
3080 3112 mutex_unlock(&dm_thin_pool_table.mutex);
  3113 +
  3114 + spin_lock(&tc->pool->lock);
  3115 + list_add_tail_rcu(&tc->list, &tc->pool->active_thins);
  3116 + spin_unlock(&tc->pool->lock);
  3117 + /*
  3118 + * This synchronize_rcu() call is needed here otherwise we risk a
  3119 + * wake_worker() call finding no bios to process (because the newly
  3120 + * added tc isn't yet visible). So this reduces latency since we
  3121 + * aren't then dependent on the periodic commit to wake_worker().
  3122 + */
  3123 + synchronize_rcu();
3081 3124  
3082 3125 return 0;
3083 3126