Commit 85ad643b7e7e52d37620fb272a9fd577a8095647

Authored by Joe Thornber
Committed by Mike Snitzer
1 parent 8d07e8a5f5

dm thin: add timeout to stop out-of-data-space mode holding IO forever

If the pool runs out of data space, dm-thin can be configured to
either error IOs that would trigger provisioning, or hold those IOs
until the pool is resized.  Unfortunately, holding IOs until the pool is
resized can result in a cascade of tasks hitting the hung_task_timeout,
which may render the system unavailable.

Add a fixed timeout so IOs can only be held for a maximum of 60 seconds.
If LVM is going to resize a thin-pool that is out of data space it needs
to be prompt about it.

Signed-off-by: Joe Thornber <ejt@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
Cc: stable@vger.kernel.org # 3.14+

Showing 1 changed file with 21 additions and 0 deletions Side-by-side Diff

drivers/md/dm-thin.c
... ... @@ -27,6 +27,7 @@
27 27 #define MAPPING_POOL_SIZE 1024
28 28 #define PRISON_CELLS 1024
29 29 #define COMMIT_PERIOD HZ
  30 +#define NO_SPACE_TIMEOUT (HZ * 60)
30 31  
31 32 DECLARE_DM_KCOPYD_THROTTLE_WITH_MODULE_PARM(snapshot_copy_throttle,
32 33 "A percentage of time allocated for copy on write");
... ... @@ -175,6 +176,7 @@
175 176 struct workqueue_struct *wq;
176 177 struct work_struct worker;
177 178 struct delayed_work waker;
  179 + struct delayed_work no_space_timeout;
178 180  
179 181 unsigned long last_commit_jiffies;
180 182 unsigned ref_count;
... ... @@ -1590,6 +1592,20 @@
1590 1592 queue_delayed_work(pool->wq, &pool->waker, COMMIT_PERIOD);
1591 1593 }
1592 1594  
  1595 +/*
  1596 + * We're holding onto IO to allow userland time to react. After the
  1597 + * timeout either the pool will have been resized (and thus back in
  1598 + * PM_WRITE mode), or we degrade to PM_READ_ONLY and start erroring IO.
  1599 + */
  1600 +static void do_no_space_timeout(struct work_struct *ws)
  1601 +{
  1602 + struct pool *pool = container_of(to_delayed_work(ws), struct pool,
  1603 + no_space_timeout);
  1604 +
  1605 + if (get_pool_mode(pool) == PM_OUT_OF_DATA_SPACE && !pool->pf.error_if_no_space)
  1606 + set_pool_mode(pool, PM_READ_ONLY);
  1607 +}
  1608 +
1593 1609 /*----------------------------------------------------------------*/
1594 1610  
1595 1611 struct noflush_work {
... ... @@ -1715,6 +1731,9 @@
1715 1731 pool->process_discard = process_discard;
1716 1732 pool->process_prepared_mapping = process_prepared_mapping;
1717 1733 pool->process_prepared_discard = process_prepared_discard_passdown;
  1734 +
  1735 + if (!pool->pf.error_if_no_space)
  1736 + queue_delayed_work(pool->wq, &pool->no_space_timeout, NO_SPACE_TIMEOUT);
1718 1737 break;
1719 1738  
1720 1739 case PM_WRITE:
... ... @@ -2100,6 +2119,7 @@
2100 2119  
2101 2120 INIT_WORK(&pool->worker, do_worker);
2102 2121 INIT_DELAYED_WORK(&pool->waker, do_waker);
  2122 + INIT_DELAYED_WORK(&pool->no_space_timeout, do_no_space_timeout);
2103 2123 spin_lock_init(&pool->lock);
2104 2124 bio_list_init(&pool->deferred_flush_bios);
2105 2125 INIT_LIST_HEAD(&pool->prepared_mappings);
... ... @@ -2662,6 +2682,7 @@
2662 2682 struct pool *pool = pt->pool;
2663 2683  
2664 2684 cancel_delayed_work(&pool->waker);
  2685 + cancel_delayed_work(&pool->no_space_timeout);
2665 2686 flush_workqueue(pool->wq);
2666 2687 (void) commit(pool);
2667 2688 }