Commit dee11c2364f51cac53df17d742a0c69097e29a4e

Authored by Ken Chen
Committed by Linus Torvalds
1 parent 3e8219806c

[PATCH] aio: fix buggy put_ioctx call in aio_complete - v2

An AIO bug was reported that sleeping function is being called in softirq
context:

BUG: warning at kernel/mutex.c:132/__mutex_lock_common()
Call Trace:
     [<a000000100577b00>] __mutex_lock_slowpath+0x640/0x6c0
     [<a000000100577ba0>] mutex_lock+0x20/0x40
     [<a0000001000a25b0>] flush_workqueue+0xb0/0x1a0
     [<a00000010018c0c0>] __put_ioctx+0xc0/0x240
     [<a00000010018d470>] aio_complete+0x2f0/0x420
     [<a00000010019cc80>] finished_one_bio+0x200/0x2a0
     [<a00000010019d1c0>] dio_bio_complete+0x1c0/0x200
     [<a00000010019d260>] dio_bio_end_aio+0x60/0x80
     [<a00000010014acd0>] bio_endio+0x110/0x1c0
     [<a0000001002770e0>] __end_that_request_first+0x180/0xba0
     [<a000000100277b90>] end_that_request_chunk+0x30/0x60
     [<a0000002073c0c70>] scsi_end_request+0x50/0x300 [scsi_mod]
     [<a0000002073c1240>] scsi_io_completion+0x200/0x8a0 [scsi_mod]
     [<a0000002074729b0>] sd_rw_intr+0x330/0x860 [sd_mod]
     [<a0000002073b3ac0>] scsi_finish_command+0x100/0x1c0 [scsi_mod]
     [<a0000002073c2910>] scsi_softirq_done+0x230/0x300 [scsi_mod]
     [<a000000100277d20>] blk_done_softirq+0x160/0x1c0
     [<a000000100083e00>] __do_softirq+0x200/0x240
     [<a000000100083eb0>] do_softirq+0x70/0xc0

See report: http://marc.theaimsgroup.com/?l=linux-kernel&m=116599593200888&w=2

flush_workqueue() is not allowed to be called in the softirq context.
However, aio_complete() called from I/O interrupt can potentially call
put_ioctx with last ref count on ioctx and triggers bug.  It is simply
incorrect to perform ioctx freeing from aio_complete.

The bug is trigger-able from a race between io_destroy() and aio_complete().
A possible scenario:

cpu0                               cpu1
io_destroy                         aio_complete
  wait_for_all_aios {                __aio_put_req
     ...                                 ctx->reqs_active--;
     if (!ctx->reqs_active)
        return;
  }
  ...
  put_ioctx(ioctx)

                                     put_ioctx(ctx);
                                        __put_ioctx
                                          bam! Bug trigger!

The real problem is that the condition check of ctx->reqs_active in
wait_for_all_aios() is incorrect that access to reqs_active is not
being properly protected by spin lock.

This patch adds that protective spin lock, and at the same time removes
all duplicate ref counting for each kiocb as reqs_active is already used
as a ref count for each active ioctx.  This also ensures that buggy call
to flush_workqueue() in softirq context is eliminated.

Signed-off-by: "Ken Chen" <kenchen@google.com>
Cc: Zach Brown <zach.brown@oracle.com>
Cc: Suparna Bhattacharya <suparna@in.ibm.com>
Cc: Benjamin LaHaise <bcrl@kvack.org>
Cc: Badari Pulavarty <pbadari@us.ibm.com>
Cc: <stable@kernel.org>
Acked-by: Jeff Moyer <jmoyer@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

Showing 1 changed file with 9 additions and 11 deletions Side-by-side Diff

... ... @@ -298,17 +298,23 @@
298 298 struct task_struct *tsk = current;
299 299 DECLARE_WAITQUEUE(wait, tsk);
300 300  
  301 + spin_lock_irq(&ctx->ctx_lock);
301 302 if (!ctx->reqs_active)
302   - return;
  303 + goto out;
303 304  
304 305 add_wait_queue(&ctx->wait, &wait);
305 306 set_task_state(tsk, TASK_UNINTERRUPTIBLE);
306 307 while (ctx->reqs_active) {
  308 + spin_unlock_irq(&ctx->ctx_lock);
307 309 schedule();
308 310 set_task_state(tsk, TASK_UNINTERRUPTIBLE);
  311 + spin_lock_irq(&ctx->ctx_lock);
309 312 }
310 313 __set_task_state(tsk, TASK_RUNNING);
311 314 remove_wait_queue(&ctx->wait, &wait);
  315 +
  316 +out:
  317 + spin_unlock_irq(&ctx->ctx_lock);
312 318 }
313 319  
314 320 /* wait_on_sync_kiocb:
... ... @@ -424,7 +430,6 @@
424 430 ring = kmap_atomic(ctx->ring_info.ring_pages[0], KM_USER0);
425 431 if (ctx->reqs_active < aio_ring_avail(&ctx->ring_info, ring)) {
426 432 list_add(&req->ki_list, &ctx->active_reqs);
427   - get_ioctx(ctx);
428 433 ctx->reqs_active++;
429 434 okay = 1;
430 435 }
... ... @@ -536,8 +541,6 @@
536 541 spin_lock_irq(&ctx->ctx_lock);
537 542 ret = __aio_put_req(ctx, req);
538 543 spin_unlock_irq(&ctx->ctx_lock);
539   - if (ret)
540   - put_ioctx(ctx);
541 544 return ret;
542 545 }
543 546  
... ... @@ -779,8 +782,7 @@
779 782 */
780 783 iocb->ki_users++; /* grab extra reference */
781 784 aio_run_iocb(iocb);
782   - if (__aio_put_req(ctx, iocb)) /* drop extra ref */
783   - put_ioctx(ctx);
  785 + __aio_put_req(ctx, iocb);
784 786 }
785 787 if (!list_empty(&ctx->run_list))
786 788 return 1;
787 789  
... ... @@ -997,14 +999,10 @@
997 999 /* everything turned out well, dispose of the aiocb. */
998 1000 ret = __aio_put_req(ctx, iocb);
999 1001  
1000   - spin_unlock_irqrestore(&ctx->ctx_lock, flags);
1001   -
1002 1002 if (waitqueue_active(&ctx->wait))
1003 1003 wake_up(&ctx->wait);
1004 1004  
1005   - if (ret)
1006   - put_ioctx(ctx);
1007   -
  1005 + spin_unlock_irqrestore(&ctx->ctx_lock, flags);
1008 1006 return ret;
1009 1007 }
1010 1008