Commit f2dbd76a0a994bc1d5a3d0e7c844cc373832e86c

Authored by Tejun Heo
Committed by Jens Axboe
1 parent 1238033c79

block, cfq: replace current_io_context() with create_io_context()

When called under queue_lock, current_io_context() triggers lockdep
warning if it hits allocation path.  This is because io_context
installation is protected by task_lock which is not IRQ safe, so it
triggers irq-unsafe-lock -> irq -> irq-safe-lock -> irq-unsafe-lock
deadlock warning.

Given the restriction, accessor + creator rolled into one doesn't work
too well.  Drop current_io_context() and let the users access
task->io_context directly inside queue_lock combined with explicit
creation using create_io_context().

Future ioc updates will further consolidate ioc access and the create
interface will be unexported.

While at it, relocate ioc internal interface declarations in blk.h and
add section comments before and after.

This patch does not introduce functional change.

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>

Showing 4 changed files with 71 additions and 54 deletions Side-by-side Diff

... ... @@ -771,9 +771,12 @@
771 771 {
772 772 struct request *rq = NULL;
773 773 struct request_list *rl = &q->rq;
774   - struct io_context *ioc = NULL;
  774 + struct io_context *ioc;
775 775 const bool is_sync = rw_is_sync(rw_flags) != 0;
  776 + bool retried = false;
776 777 int may_queue;
  778 +retry:
  779 + ioc = current->io_context;
777 780  
778 781 if (unlikely(blk_queue_dead(q)))
779 782 return NULL;
780 783  
... ... @@ -784,8 +787,21 @@
784 787  
785 788 if (rl->count[is_sync]+1 >= queue_congestion_on_threshold(q)) {
786 789 if (rl->count[is_sync]+1 >= q->nr_requests) {
787   - ioc = current_io_context(GFP_ATOMIC, q->node);
788 790 /*
  791 + * We want ioc to record batching state. If it's
  792 + * not already there, creating a new one requires
  793 + * dropping queue_lock, which in turn requires
  794 + * retesting conditions to avoid queue hang.
  795 + */
  796 + if (!ioc && !retried) {
  797 + spin_unlock_irq(q->queue_lock);
  798 + create_io_context(current, gfp_mask, q->node);
  799 + spin_lock_irq(q->queue_lock);
  800 + retried = true;
  801 + goto retry;
  802 + }
  803 +
  804 + /*
789 805 * The queue will fill after this allocation, so set
790 806 * it as full, and mark this process as "batching".
791 807 * This process will be allowed to complete a batch of
... ... @@ -892,7 +908,6 @@
892 908 rq = get_request(q, rw_flags, bio, GFP_NOIO);
893 909 while (!rq) {
894 910 DEFINE_WAIT(wait);
895   - struct io_context *ioc;
896 911 struct request_list *rl = &q->rq;
897 912  
898 913 if (unlikely(blk_queue_dead(q)))
... ... @@ -912,8 +927,8 @@
912 927 * up to a big batch of them for a small period time.
913 928 * See ioc_batching, ioc_set_batching
914 929 */
915   - ioc = current_io_context(GFP_NOIO, q->node);
916   - ioc_set_batching(q, ioc);
  930 + create_io_context(current, GFP_NOIO, q->node);
  931 + ioc_set_batching(q, current->io_context);
917 932  
918 933 spin_lock_irq(q->queue_lock);
919 934 finish_wait(&rl->wait[is_sync], &wait);
... ... @@ -205,16 +205,15 @@
205 205 put_io_context(ioc, NULL);
206 206 }
207 207  
208   -static struct io_context *create_task_io_context(struct task_struct *task,
209   - gfp_t gfp_flags, int node,
210   - bool take_ref)
  208 +void create_io_context_slowpath(struct task_struct *task, gfp_t gfp_flags,
  209 + int node)
211 210 {
212 211 struct io_context *ioc;
213 212  
214 213 ioc = kmem_cache_alloc_node(iocontext_cachep, gfp_flags | __GFP_ZERO,
215 214 node);
216 215 if (unlikely(!ioc))
217   - return NULL;
  216 + return;
218 217  
219 218 /* initialize */
220 219 atomic_long_set(&ioc->refcount, 1);
221 220  
222 221  
223 222  
224 223  
225 224  
... ... @@ -226,44 +225,15 @@
226 225  
227 226 /* try to install, somebody might already have beaten us to it */
228 227 task_lock(task);
229   -
230   - if (!task->io_context && !(task->flags & PF_EXITING)) {
  228 + if (!task->io_context && !(task->flags & PF_EXITING))
231 229 task->io_context = ioc;
232   - } else {
  230 + else
233 231 kmem_cache_free(iocontext_cachep, ioc);
234   - ioc = task->io_context;
235   - }
236   -
237   - if (ioc && take_ref)
238   - get_io_context(ioc);
239   -
240 232 task_unlock(task);
241   - return ioc;
242 233 }
  234 +EXPORT_SYMBOL(create_io_context_slowpath);
243 235  
244 236 /**
245   - * current_io_context - get io_context of %current
246   - * @gfp_flags: allocation flags, used if allocation is necessary
247   - * @node: allocation node, used if allocation is necessary
248   - *
249   - * Return io_context of %current. If it doesn't exist, it is created with
250   - * @gfp_flags and @node. The returned io_context does NOT have its
251   - * reference count incremented. Because io_context is exited only on task
252   - * exit, %current can be sure that the returned io_context is valid and
253   - * alive as long as it is executing.
254   - */
255   -struct io_context *current_io_context(gfp_t gfp_flags, int node)
256   -{
257   - might_sleep_if(gfp_flags & __GFP_WAIT);
258   -
259   - if (current->io_context)
260   - return current->io_context;
261   -
262   - return create_task_io_context(current, gfp_flags, node, false);
263   -}
264   -EXPORT_SYMBOL(current_io_context);
265   -
266   -/**
267 237 * get_task_io_context - get io_context of a task
268 238 * @task: task of interest
269 239 * @gfp_flags: allocation flags, used if allocation is necessary
... ... @@ -274,7 +244,7 @@
274 244 * incremented.
275 245 *
276 246 * This function always goes through task_lock() and it's better to use
277   - * current_io_context() + get_io_context() for %current.
  247 + * %current->io_context + get_io_context() for %current.
278 248 */
279 249 struct io_context *get_task_io_context(struct task_struct *task,
280 250 gfp_t gfp_flags, int node)
281 251  
282 252  
... ... @@ -283,16 +253,18 @@
283 253  
284 254 might_sleep_if(gfp_flags & __GFP_WAIT);
285 255  
286   - task_lock(task);
287   - ioc = task->io_context;
288   - if (likely(ioc)) {
289   - get_io_context(ioc);
  256 + do {
  257 + task_lock(task);
  258 + ioc = task->io_context;
  259 + if (likely(ioc)) {
  260 + get_io_context(ioc);
  261 + task_unlock(task);
  262 + return ioc;
  263 + }
290 264 task_unlock(task);
291   - return ioc;
292   - }
293   - task_unlock(task);
  265 + } while (create_io_context(task, gfp_flags, node));
294 266  
295   - return create_task_io_context(task, gfp_flags, node, true);
  267 + return NULL;
296 268 }
297 269 EXPORT_SYMBOL(get_task_io_context);
298 270  
... ... @@ -127,9 +127,6 @@
127 127 }
128 128 #endif
129 129  
130   -void get_io_context(struct io_context *ioc);
131   -struct io_context *current_io_context(gfp_t gfp_flags, int node);
132   -
133 130 int ll_back_merge_fn(struct request_queue *q, struct request *req,
134 131 struct bio *bio);
135 132 int ll_front_merge_fn(struct request_queue *q, struct request *req,
... ... @@ -198,6 +195,39 @@
198 195 (rq->cmd_flags & REQ_DISCARD));
199 196 }
200 197  
  198 +/*
  199 + * Internal io_context interface
  200 + */
  201 +void get_io_context(struct io_context *ioc);
  202 +
  203 +void create_io_context_slowpath(struct task_struct *task, gfp_t gfp_mask,
  204 + int node);
  205 +
  206 +/**
  207 + * create_io_context - try to create task->io_context
  208 + * @task: target task
  209 + * @gfp_mask: allocation mask
  210 + * @node: allocation node
  211 + *
  212 + * If @task->io_context is %NULL, allocate a new io_context and install it.
  213 + * Returns the current @task->io_context which may be %NULL if allocation
  214 + * failed.
  215 + *
  216 + * Note that this function can't be called with IRQ disabled because
  217 + * task_lock which protects @task->io_context is IRQ-unsafe.
  218 + */
  219 +static inline struct io_context *create_io_context(struct task_struct *task,
  220 + gfp_t gfp_mask, int node)
  221 +{
  222 + WARN_ON_ONCE(irqs_disabled());
  223 + if (unlikely(!task->io_context))
  224 + create_io_context_slowpath(task, gfp_mask, node);
  225 + return task->io_context;
  226 +}
  227 +
  228 +/*
  229 + * Internal throttling interface
  230 + */
201 231 #ifdef CONFIG_BLK_DEV_THROTTLING
202 232 extern bool blk_throtl_bio(struct request_queue *q, struct bio *bio);
203 233 extern void blk_throtl_drain(struct request_queue *q);
... ... @@ -3012,7 +3012,7 @@
3012 3012 might_sleep_if(gfp_mask & __GFP_WAIT);
3013 3013  
3014 3014 /* allocate stuff */
3015   - ioc = current_io_context(gfp_mask, q->node);
  3015 + ioc = create_io_context(current, gfp_mask, q->node);
3016 3016 if (!ioc)
3017 3017 goto out;
3018 3018