Commit 87c3a86e1c220121d0ced59d1a71e78ed9abc6dd

Authored by Davide Libenzi
Committed by Linus Torvalds
1 parent d0115552cd

eventfd: remove fput() call from possible IRQ context

Remove a source of fput() call from inside IRQ context.  Myself, like Eric,
wasn't able to reproduce an fput() call from IRQ context, but Jeff said he was
able to, with the attached test program.  Independently from this, the bug is
conceptually there, so we might be better off fixing it.  This patch adds an
optimization similar to the one we already do on ->ki_filp, on ->ki_eventfd.
Playing with ->f_count directly is not pretty in general, but the alternative
here would be to add a brand new delayed fput() infrastructure, that I'm not
sure is worth it.

Signed-off-by: Davide Libenzi <davidel@xmailserver.org>
Cc: Benjamin LaHaise <bcrl@kvack.org>
Cc: Trond Myklebust <trond.myklebust@fys.uio.no>
Cc: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: Jeff Moyer <jmoyer@redhat.com>
Cc: Zach Brown <zach.brown@oracle.com>
Cc: <stable@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

Showing 1 changed file with 27 additions and 10 deletions Side-by-side Diff

... ... @@ -443,7 +443,7 @@
443 443 req->private = NULL;
444 444 req->ki_iovec = NULL;
445 445 INIT_LIST_HEAD(&req->ki_run_list);
446   - req->ki_eventfd = ERR_PTR(-EINVAL);
  446 + req->ki_eventfd = NULL;
447 447  
448 448 /* Check if the completion queue has enough free space to
449 449 * accept an event from this io.
... ... @@ -485,8 +485,6 @@
485 485 {
486 486 assert_spin_locked(&ctx->ctx_lock);
487 487  
488   - if (!IS_ERR(req->ki_eventfd))
489   - fput(req->ki_eventfd);
490 488 if (req->ki_dtor)
491 489 req->ki_dtor(req);
492 490 if (req->ki_iovec != &req->ki_inline_vec)
... ... @@ -508,8 +506,11 @@
508 506 list_del(&req->ki_list);
509 507 spin_unlock_irq(&fput_lock);
510 508  
511   - /* Complete the fput */
512   - __fput(req->ki_filp);
  509 + /* Complete the fput(s) */
  510 + if (req->ki_filp != NULL)
  511 + __fput(req->ki_filp);
  512 + if (req->ki_eventfd != NULL)
  513 + __fput(req->ki_eventfd);
513 514  
514 515 /* Link the iocb into the context's free list */
515 516 spin_lock_irq(&ctx->ctx_lock);
516 517  
... ... @@ -527,12 +528,14 @@
527 528 */
528 529 static int __aio_put_req(struct kioctx *ctx, struct kiocb *req)
529 530 {
  531 + int schedule_putreq = 0;
  532 +
530 533 dprintk(KERN_DEBUG "aio_put(%p): f_count=%ld\n",
531 534 req, atomic_long_read(&req->ki_filp->f_count));
532 535  
533 536 assert_spin_locked(&ctx->ctx_lock);
534 537  
535   - req->ki_users --;
  538 + req->ki_users--;
536 539 BUG_ON(req->ki_users < 0);
537 540 if (likely(req->ki_users))
538 541 return 0;
539 542  
... ... @@ -540,10 +543,23 @@
540 543 req->ki_cancel = NULL;
541 544 req->ki_retry = NULL;
542 545  
543   - /* Must be done under the lock to serialise against cancellation.
544   - * Call this aio_fput as it duplicates fput via the fput_work.
  546 + /*
  547 + * Try to optimize the aio and eventfd file* puts, by avoiding to
  548 + * schedule work in case it is not __fput() time. In normal cases,
  549 + * we would not be holding the last reference to the file*, so
  550 + * this function will be executed w/out any aio kthread wakeup.
545 551 */
546   - if (unlikely(atomic_long_dec_and_test(&req->ki_filp->f_count))) {
  552 + if (unlikely(atomic_long_dec_and_test(&req->ki_filp->f_count)))
  553 + schedule_putreq++;
  554 + else
  555 + req->ki_filp = NULL;
  556 + if (req->ki_eventfd != NULL) {
  557 + if (unlikely(atomic_long_dec_and_test(&req->ki_eventfd->f_count)))
  558 + schedule_putreq++;
  559 + else
  560 + req->ki_eventfd = NULL;
  561 + }
  562 + if (unlikely(schedule_putreq)) {
547 563 get_ioctx(ctx);
548 564 spin_lock(&fput_lock);
549 565 list_add(&req->ki_list, &fput_head);
... ... @@ -1009,7 +1025,7 @@
1009 1025 * eventfd. The eventfd_signal() function is safe to be called
1010 1026 * from IRQ context.
1011 1027 */
1012   - if (!IS_ERR(iocb->ki_eventfd))
  1028 + if (iocb->ki_eventfd != NULL)
1013 1029 eventfd_signal(iocb->ki_eventfd, 1);
1014 1030  
1015 1031 put_rq:
... ... @@ -1608,6 +1624,7 @@
1608 1624 req->ki_eventfd = eventfd_fget((int) iocb->aio_resfd);
1609 1625 if (IS_ERR(req->ki_eventfd)) {
1610 1626 ret = PTR_ERR(req->ki_eventfd);
  1627 + req->ki_eventfd = NULL;
1611 1628 goto out_put_req;
1612 1629 }
1613 1630 }