Commit c1ef57a3a3f5e69e98baf89055b423da62791c13

Authored by Linus Torvalds

Merge tag 'io_uring-5.6-2020-02-05' of git://git.kernel.dk/linux-block

Pull io_uring updates from Jens Axboe:
 "Some later fixes for io_uring:

   - Small cleanup series from Pavel

   - Belt and suspenders build time check of sqe size and layout
     (Stefan)

   - Addition of ->show_fdinfo() on request of Jann Horn, to aid in
     understanding mapped personalities

   - eventfd recursion/deadlock fix, for both io_uring and aio

   - Fixup for send/recv handling

   - Fixup for double deferral of read/write request

   - Fix for potential double completion event for close request

   - Adjust fadvise advice async/inline behavior

   - Fix for shutdown hang with SQPOLL thread

   - Fix for potential use-after-free of fixed file table"

* tag 'io_uring-5.6-2020-02-05' of git://git.kernel.dk/linux-block:
  io_uring: cleanup fixed file data table references
  io_uring: spin for sq thread to idle on shutdown
  aio: prevent potential eventfd recursion on poll
  io_uring: put the flag changing code in the same spot
  io_uring: iterate req cache backwards
  io_uring: punt even fadvise() WILLNEED to async context
  io_uring: fix sporadic double CQE entry for close
  io_uring: remove extra ->file check
  io_uring: don't map read/write iovec potentially twice
  io_uring: use the proper helpers for io_send/recv
  io_uring: prevent potential eventfd recursion on poll
  eventfd: track eventfd_signal() recursion depth
  io_uring: add BUILD_BUG_ON() to assert the layout of struct io_uring_sqe
  io_uring: add ->show_fdinfo() for the io_uring file descriptor

Showing 4 changed files Side-by-side Diff

... ... @@ -1610,6 +1610,14 @@
1610 1610 return 0;
1611 1611 }
1612 1612  
  1613 +static void aio_poll_put_work(struct work_struct *work)
  1614 +{
  1615 + struct poll_iocb *req = container_of(work, struct poll_iocb, work);
  1616 + struct aio_kiocb *iocb = container_of(req, struct aio_kiocb, poll);
  1617 +
  1618 + iocb_put(iocb);
  1619 +}
  1620 +
1613 1621 static void aio_poll_complete_work(struct work_struct *work)
1614 1622 {
1615 1623 struct poll_iocb *req = container_of(work, struct poll_iocb, work);
... ... @@ -1674,6 +1682,8 @@
1674 1682 list_del_init(&req->wait.entry);
1675 1683  
1676 1684 if (mask && spin_trylock_irqsave(&iocb->ki_ctx->ctx_lock, flags)) {
  1685 + struct kioctx *ctx = iocb->ki_ctx;
  1686 +
1677 1687 /*
1678 1688 * Try to complete the iocb inline if we can. Use
1679 1689 * irqsave/irqrestore because not all filesystems (e.g. fuse)
... ... @@ -1683,8 +1693,14 @@
1683 1693 list_del(&iocb->ki_list);
1684 1694 iocb->ki_res.res = mangle_poll(mask);
1685 1695 req->done = true;
1686   - spin_unlock_irqrestore(&iocb->ki_ctx->ctx_lock, flags);
1687   - iocb_put(iocb);
  1696 + if (iocb->ki_eventfd && eventfd_signal_count()) {
  1697 + iocb = NULL;
  1698 + INIT_WORK(&req->work, aio_poll_put_work);
  1699 + schedule_work(&req->work);
  1700 + }
  1701 + spin_unlock_irqrestore(&ctx->ctx_lock, flags);
  1702 + if (iocb)
  1703 + iocb_put(iocb);
1688 1704 } else {
1689 1705 schedule_work(&req->work);
1690 1706 }
... ... @@ -24,6 +24,8 @@
24 24 #include <linux/seq_file.h>
25 25 #include <linux/idr.h>
26 26  
  27 +DEFINE_PER_CPU(int, eventfd_wake_count);
  28 +
27 29 static DEFINE_IDA(eventfd_ida);
28 30  
29 31 struct eventfd_ctx {
30 32  
31 33  
... ... @@ -60,12 +62,25 @@
60 62 {
61 63 unsigned long flags;
62 64  
  65 + /*
  66 + * Deadlock or stack overflow issues can happen if we recurse here
  67 + * through waitqueue wakeup handlers. If the caller users potentially
  68 + * nested waitqueues with custom wakeup handlers, then it should
  69 + * check eventfd_signal_count() before calling this function. If
  70 + * it returns true, the eventfd_signal() call should be deferred to a
  71 + * safe context.
  72 + */
  73 + if (WARN_ON_ONCE(this_cpu_read(eventfd_wake_count)))
  74 + return 0;
  75 +
63 76 spin_lock_irqsave(&ctx->wqh.lock, flags);
  77 + this_cpu_inc(eventfd_wake_count);
64 78 if (ULLONG_MAX - ctx->count < n)
65 79 n = ULLONG_MAX - ctx->count;
66 80 ctx->count += n;
67 81 if (waitqueue_active(&ctx->wqh))
68 82 wake_up_locked_poll(&ctx->wqh, EPOLLIN);
  83 + this_cpu_dec(eventfd_wake_count);
69 84 spin_unlock_irqrestore(&ctx->wqh.lock, flags);
70 85  
71 86 return n;
... ... @@ -585,8 +585,7 @@
585 585 * io_kiocb alloc cache
586 586 */
587 587 void *reqs[IO_IOPOLL_BATCH];
588   - unsigned int free_reqs;
589   - unsigned int cur_req;
  588 + unsigned int free_reqs;
590 589  
591 590 /*
592 591 * File reference cache
... ... @@ -754,6 +753,7 @@
754 753 struct io_uring_files_update *ip,
755 754 unsigned nr_args);
756 755 static int io_grab_files(struct io_kiocb *req);
  756 +static void io_ring_file_ref_flush(struct fixed_file_data *data);
757 757  
758 758 static struct kmem_cache *req_cachep;
759 759  
760 760  
761 761  
762 762  
... ... @@ -1020,21 +1020,28 @@
1020 1020  
1021 1021 static inline bool io_should_trigger_evfd(struct io_ring_ctx *ctx)
1022 1022 {
  1023 + if (!ctx->cq_ev_fd)
  1024 + return false;
1023 1025 if (!ctx->eventfd_async)
1024 1026 return true;
1025 1027 return io_wq_current_is_worker() || in_interrupt();
1026 1028 }
1027 1029  
1028   -static void io_cqring_ev_posted(struct io_ring_ctx *ctx)
  1030 +static void __io_cqring_ev_posted(struct io_ring_ctx *ctx, bool trigger_ev)
1029 1031 {
1030 1032 if (waitqueue_active(&ctx->wait))
1031 1033 wake_up(&ctx->wait);
1032 1034 if (waitqueue_active(&ctx->sqo_wait))
1033 1035 wake_up(&ctx->sqo_wait);
1034   - if (ctx->cq_ev_fd && io_should_trigger_evfd(ctx))
  1036 + if (trigger_ev)
1035 1037 eventfd_signal(ctx->cq_ev_fd, 1);
1036 1038 }
1037 1039  
  1040 +static void io_cqring_ev_posted(struct io_ring_ctx *ctx)
  1041 +{
  1042 + __io_cqring_ev_posted(ctx, io_should_trigger_evfd(ctx));
  1043 +}
  1044 +
1038 1045 /* Returns true if there are no backlogged entries after the flush */
1039 1046 static bool io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force)
1040 1047 {
1041 1048  
1042 1049  
... ... @@ -1183,12 +1190,10 @@
1183 1190 ret = 1;
1184 1191 }
1185 1192 state->free_reqs = ret - 1;
1186   - state->cur_req = 1;
1187   - req = state->reqs[0];
  1193 + req = state->reqs[ret - 1];
1188 1194 } else {
1189   - req = state->reqs[state->cur_req];
1190 1195 state->free_reqs--;
1191   - state->cur_req++;
  1196 + req = state->reqs[state->free_reqs];
1192 1197 }
1193 1198  
1194 1199 got_it:
... ... @@ -1855,9 +1860,6 @@
1855 1860 unsigned ioprio;
1856 1861 int ret;
1857 1862  
1858   - if (!req->file)
1859   - return -EBADF;
1860   -
1861 1863 if (S_ISREG(file_inode(req->file)->i_mode))
1862 1864 req->flags |= REQ_F_ISREG;
1863 1865  
1864 1866  
... ... @@ -1866,8 +1868,11 @@
1866 1868 req->flags |= REQ_F_CUR_POS;
1867 1869 kiocb->ki_pos = req->file->f_pos;
1868 1870 }
1869   - kiocb->ki_flags = iocb_flags(kiocb->ki_filp);
1870 1871 kiocb->ki_hint = ki_hint_validate(file_write_hint(kiocb->ki_filp));
  1872 + kiocb->ki_flags = iocb_flags(kiocb->ki_filp);
  1873 + ret = kiocb_set_rw_flags(kiocb, READ_ONCE(sqe->rw_flags));
  1874 + if (unlikely(ret))
  1875 + return ret;
1871 1876  
1872 1877 ioprio = READ_ONCE(sqe->ioprio);
1873 1878 if (ioprio) {
... ... @@ -1879,10 +1884,6 @@
1879 1884 } else
1880 1885 kiocb->ki_ioprio = get_current_ioprio();
1881 1886  
1882   - ret = kiocb_set_rw_flags(kiocb, READ_ONCE(sqe->rw_flags));
1883   - if (unlikely(ret))
1884   - return ret;
1885   -
1886 1887 /* don't allow async punt if RWF_NOWAIT was requested */
1887 1888 if ((kiocb->ki_flags & IOCB_NOWAIT) ||
1888 1889 (req->file->f_flags & O_NONBLOCK))
1889 1890  
... ... @@ -2164,10 +2165,12 @@
2164 2165 {
2165 2166 if (!io_op_defs[req->opcode].async_ctx)
2166 2167 return 0;
2167   - if (!req->io && io_alloc_async_ctx(req))
2168   - return -ENOMEM;
  2168 + if (!req->io) {
  2169 + if (io_alloc_async_ctx(req))
  2170 + return -ENOMEM;
2169 2171  
2170   - io_req_map_rw(req, io_size, iovec, fast_iov, iter);
  2172 + io_req_map_rw(req, io_size, iovec, fast_iov, iter);
  2173 + }
2171 2174 req->work.func = io_rw_async;
2172 2175 return 0;
2173 2176 }
... ... @@ -2724,9 +2727,16 @@
2724 2727 struct io_fadvise *fa = &req->fadvise;
2725 2728 int ret;
2726 2729  
2727   - /* DONTNEED may block, others _should_ not */
2728   - if (fa->advice == POSIX_FADV_DONTNEED && force_nonblock)
2729   - return -EAGAIN;
  2730 + if (force_nonblock) {
  2731 + switch (fa->advice) {
  2732 + case POSIX_FADV_NORMAL:
  2733 + case POSIX_FADV_RANDOM:
  2734 + case POSIX_FADV_SEQUENTIAL:
  2735 + break;
  2736 + default:
  2737 + return -EAGAIN;
  2738 + }
  2739 + }
2730 2740  
2731 2741 ret = vfs_fadvise(req->file, fa->offset, fa->len, fa->advice);
2732 2742 if (ret < 0)
2733 2743  
2734 2744  
... ... @@ -2837,16 +2847,13 @@
2837 2847 int ret;
2838 2848  
2839 2849 ret = filp_close(req->close.put_file, req->work.files);
2840   - if (ret < 0) {
  2850 + if (ret < 0)
2841 2851 req_set_fail_links(req);
2842   - }
2843 2852 io_cqring_add_event(req, ret);
2844 2853 }
2845 2854  
2846 2855 fput(req->close.put_file);
2847 2856  
2848   - /* we bypassed the re-issue, drop the submission reference */
2849   - io_put_req(req);
2850 2857 io_put_req_find_next(req, &nxt);
2851 2858 if (nxt)
2852 2859 io_wq_assign_next(workptr, nxt);
... ... @@ -2888,7 +2895,13 @@
2888 2895  
2889 2896 eagain:
2890 2897 req->work.func = io_close_finish;
2891   - return -EAGAIN;
  2898 + /*
  2899 + * Do manual async queue here to avoid grabbing files - we don't
  2900 + * need the files, and it'll cause io_close_finish() to close
  2901 + * the file again and cause a double CQE entry for this request
  2902 + */
  2903 + io_queue_async_work(req);
  2904 + return 0;
2892 2905 }
2893 2906  
2894 2907 static int io_prep_sfr(struct io_kiocb *req, const struct io_uring_sqe *sqe)
... ... @@ -3083,7 +3096,8 @@
3083 3096 else if (force_nonblock)
3084 3097 flags |= MSG_DONTWAIT;
3085 3098  
3086   - ret = __sys_sendmsg_sock(sock, &msg, flags);
  3099 + msg.msg_flags = flags;
  3100 + ret = sock_sendmsg(sock, &msg);
3087 3101 if (force_nonblock && ret == -EAGAIN)
3088 3102 return -EAGAIN;
3089 3103 if (ret == -ERESTARTSYS)
... ... @@ -3109,6 +3123,7 @@
3109 3123  
3110 3124 sr->msg_flags = READ_ONCE(sqe->msg_flags);
3111 3125 sr->msg = u64_to_user_ptr(READ_ONCE(sqe->addr));
  3126 + sr->len = READ_ONCE(sqe->len);
3112 3127  
3113 3128 if (!io || req->opcode == IORING_OP_RECV)
3114 3129 return 0;
... ... @@ -3227,7 +3242,7 @@
3227 3242 else if (force_nonblock)
3228 3243 flags |= MSG_DONTWAIT;
3229 3244  
3230   - ret = __sys_recvmsg_sock(sock, &msg, NULL, NULL, flags);
  3245 + ret = sock_recvmsg(sock, &msg, flags);
3231 3246 if (force_nonblock && ret == -EAGAIN)
3232 3247 return -EAGAIN;
3233 3248 if (ret == -ERESTARTSYS)
... ... @@ -3561,6 +3576,14 @@
3561 3576 __io_poll_flush(req->ctx, nodes);
3562 3577 }
3563 3578  
  3579 +static void io_poll_trigger_evfd(struct io_wq_work **workptr)
  3580 +{
  3581 + struct io_kiocb *req = container_of(*workptr, struct io_kiocb, work);
  3582 +
  3583 + eventfd_signal(req->ctx->cq_ev_fd, 1);
  3584 + io_put_req(req);
  3585 +}
  3586 +
3564 3587 static int io_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
3565 3588 void *key)
3566 3589 {
3567 3590  
3568 3591  
... ... @@ -3586,14 +3609,22 @@
3586 3609  
3587 3610 if (llist_empty(&ctx->poll_llist) &&
3588 3611 spin_trylock_irqsave(&ctx->completion_lock, flags)) {
  3612 + bool trigger_ev;
  3613 +
3589 3614 hash_del(&req->hash_node);
3590 3615 io_poll_complete(req, mask, 0);
3591   - req->flags |= REQ_F_COMP_LOCKED;
3592   - io_put_req(req);
3593   - spin_unlock_irqrestore(&ctx->completion_lock, flags);
3594 3616  
3595   - io_cqring_ev_posted(ctx);
3596   - req = NULL;
  3617 + trigger_ev = io_should_trigger_evfd(ctx);
  3618 + if (trigger_ev && eventfd_signal_count()) {
  3619 + trigger_ev = false;
  3620 + req->work.func = io_poll_trigger_evfd;
  3621 + } else {
  3622 + req->flags |= REQ_F_COMP_LOCKED;
  3623 + io_put_req(req);
  3624 + req = NULL;
  3625 + }
  3626 + spin_unlock_irqrestore(&ctx->completion_lock, flags);
  3627 + __io_cqring_ev_posted(ctx, trigger_ev);
3597 3628 } else {
3598 3629 req->result = mask;
3599 3630 req->llist_node.next = NULL;
... ... @@ -4815,8 +4846,7 @@
4815 4846 blk_finish_plug(&state->plug);
4816 4847 io_file_put(state);
4817 4848 if (state->free_reqs)
4818   - kmem_cache_free_bulk(req_cachep, state->free_reqs,
4819   - &state->reqs[state->cur_req]);
  4849 + kmem_cache_free_bulk(req_cachep, state->free_reqs, state->reqs);
4820 4850 }
4821 4851  
4822 4852 /*
... ... @@ -5041,7 +5071,8 @@
5041 5071 * reap events and wake us up.
5042 5072 */
5043 5073 if (inflight ||
5044   - (!time_after(jiffies, timeout) && ret != -EBUSY)) {
  5074 + (!time_after(jiffies, timeout) && ret != -EBUSY &&
  5075 + !percpu_ref_is_dying(&ctx->refs))) {
5045 5076 cond_resched();
5046 5077 continue;
5047 5078 }
5048 5079  
5049 5080  
... ... @@ -5231,15 +5262,10 @@
5231 5262 if (!data)
5232 5263 return -ENXIO;
5233 5264  
5234   - /* protect against inflight atomic switch, which drops the ref */
5235   - percpu_ref_get(&data->refs);
5236   - /* wait for existing switches */
5237   - flush_work(&data->ref_work);
5238 5265 percpu_ref_kill_and_confirm(&data->refs, io_file_ref_kill);
5239   - wait_for_completion(&data->done);
5240   - percpu_ref_put(&data->refs);
5241   - /* flush potential new switch */
5242 5266 flush_work(&data->ref_work);
  5267 + wait_for_completion(&data->done);
  5268 + io_ring_file_ref_flush(data);
5243 5269 percpu_ref_exit(&data->refs);
5244 5270  
5245 5271 __io_sqe_files_unregister(ctx);
5246 5272  
5247 5273  
... ... @@ -5477,14 +5503,11 @@
5477 5503 struct completion *done;
5478 5504 };
5479 5505  
5480   -static void io_ring_file_ref_switch(struct work_struct *work)
  5506 +static void io_ring_file_ref_flush(struct fixed_file_data *data)
5481 5507 {
5482 5508 struct io_file_put *pfile, *tmp;
5483   - struct fixed_file_data *data;
5484 5509 struct llist_node *node;
5485 5510  
5486   - data = container_of(work, struct fixed_file_data, ref_work);
5487   -
5488 5511 while ((node = llist_del_all(&data->put_llist)) != NULL) {
5489 5512 llist_for_each_entry_safe(pfile, tmp, node, llist) {
5490 5513 io_ring_file_put(data->ctx, pfile->file);
5491 5514  
... ... @@ -5494,7 +5517,14 @@
5494 5517 kfree(pfile);
5495 5518 }
5496 5519 }
  5520 +}
5497 5521  
  5522 +static void io_ring_file_ref_switch(struct work_struct *work)
  5523 +{
  5524 + struct fixed_file_data *data;
  5525 +
  5526 + data = container_of(work, struct fixed_file_data, ref_work);
  5527 + io_ring_file_ref_flush(data);
5498 5528 percpu_ref_get(&data->refs);
5499 5529 percpu_ref_switch_to_percpu(&data->refs);
5500 5530 }
... ... @@ -5505,8 +5535,14 @@
5505 5535  
5506 5536 data = container_of(ref, struct fixed_file_data, refs);
5507 5537  
5508   - /* we can't safely switch from inside this context, punt to wq */
5509   - queue_work(system_wq, &data->ref_work);
  5538 + /*
  5539 + * We can't safely switch from inside this context, punt to wq. If
  5540 + * the table ref is going away, the table is being unregistered.
  5541 + * Don't queue up the async work for that case, the caller will
  5542 + * handle it.
  5543 + */
  5544 + if (!percpu_ref_is_dying(&data->refs))
  5545 + queue_work(system_wq, &data->ref_work);
5510 5546 }
5511 5547  
5512 5548 static int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg,
... ... @@ -6295,6 +6331,16 @@
6295 6331 percpu_ref_kill(&ctx->refs);
6296 6332 mutex_unlock(&ctx->uring_lock);
6297 6333  
  6334 + /*
  6335 + * Wait for sq thread to idle, if we have one. It won't spin on new
  6336 + * work after we've killed the ctx ref above. This is important to do
  6337 + * before we cancel existing commands, as the thread could otherwise
  6338 + * be queueing new work post that. If that's work we need to cancel,
  6339 + * it could cause shutdown to hang.
  6340 + */
  6341 + while (ctx->sqo_thread && !wq_has_sleeper(&ctx->sqo_wait))
  6342 + cpu_relax();
  6343 +
6298 6344 io_kill_timeouts(ctx);
6299 6345 io_poll_remove_all(ctx);
6300 6346  
... ... @@ -6501,6 +6547,80 @@
6501 6547 return submitted ? submitted : ret;
6502 6548 }
6503 6549  
  6550 +static int io_uring_show_cred(int id, void *p, void *data)
  6551 +{
  6552 + const struct cred *cred = p;
  6553 + struct seq_file *m = data;
  6554 + struct user_namespace *uns = seq_user_ns(m);
  6555 + struct group_info *gi;
  6556 + kernel_cap_t cap;
  6557 + unsigned __capi;
  6558 + int g;
  6559 +
  6560 + seq_printf(m, "%5d\n", id);
  6561 + seq_put_decimal_ull(m, "\tUid:\t", from_kuid_munged(uns, cred->uid));
  6562 + seq_put_decimal_ull(m, "\t\t", from_kuid_munged(uns, cred->euid));
  6563 + seq_put_decimal_ull(m, "\t\t", from_kuid_munged(uns, cred->suid));
  6564 + seq_put_decimal_ull(m, "\t\t", from_kuid_munged(uns, cred->fsuid));
  6565 + seq_put_decimal_ull(m, "\n\tGid:\t", from_kgid_munged(uns, cred->gid));
  6566 + seq_put_decimal_ull(m, "\t\t", from_kgid_munged(uns, cred->egid));
  6567 + seq_put_decimal_ull(m, "\t\t", from_kgid_munged(uns, cred->sgid));
  6568 + seq_put_decimal_ull(m, "\t\t", from_kgid_munged(uns, cred->fsgid));
  6569 + seq_puts(m, "\n\tGroups:\t");
  6570 + gi = cred->group_info;
  6571 + for (g = 0; g < gi->ngroups; g++) {
  6572 + seq_put_decimal_ull(m, g ? " " : "",
  6573 + from_kgid_munged(uns, gi->gid[g]));
  6574 + }
  6575 + seq_puts(m, "\n\tCapEff:\t");
  6576 + cap = cred->cap_effective;
  6577 + CAP_FOR_EACH_U32(__capi)
  6578 + seq_put_hex_ll(m, NULL, cap.cap[CAP_LAST_U32 - __capi], 8);
  6579 + seq_putc(m, '\n');
  6580 + return 0;
  6581 +}
  6582 +
  6583 +static void __io_uring_show_fdinfo(struct io_ring_ctx *ctx, struct seq_file *m)
  6584 +{
  6585 + int i;
  6586 +
  6587 + mutex_lock(&ctx->uring_lock);
  6588 + seq_printf(m, "UserFiles:\t%u\n", ctx->nr_user_files);
  6589 + for (i = 0; i < ctx->nr_user_files; i++) {
  6590 + struct fixed_file_table *table;
  6591 + struct file *f;
  6592 +
  6593 + table = &ctx->file_data->table[i >> IORING_FILE_TABLE_SHIFT];
  6594 + f = table->files[i & IORING_FILE_TABLE_MASK];
  6595 + if (f)
  6596 + seq_printf(m, "%5u: %s\n", i, file_dentry(f)->d_iname);
  6597 + else
  6598 + seq_printf(m, "%5u: <none>\n", i);
  6599 + }
  6600 + seq_printf(m, "UserBufs:\t%u\n", ctx->nr_user_bufs);
  6601 + for (i = 0; i < ctx->nr_user_bufs; i++) {
  6602 + struct io_mapped_ubuf *buf = &ctx->user_bufs[i];
  6603 +
  6604 + seq_printf(m, "%5u: 0x%llx/%u\n", i, buf->ubuf,
  6605 + (unsigned int) buf->len);
  6606 + }
  6607 + if (!idr_is_empty(&ctx->personality_idr)) {
  6608 + seq_printf(m, "Personalities:\n");
  6609 + idr_for_each(&ctx->personality_idr, io_uring_show_cred, m);
  6610 + }
  6611 + mutex_unlock(&ctx->uring_lock);
  6612 +}
  6613 +
  6614 +static void io_uring_show_fdinfo(struct seq_file *m, struct file *f)
  6615 +{
  6616 + struct io_ring_ctx *ctx = f->private_data;
  6617 +
  6618 + if (percpu_ref_tryget(&ctx->refs)) {
  6619 + __io_uring_show_fdinfo(ctx, m);
  6620 + percpu_ref_put(&ctx->refs);
  6621 + }
  6622 +}
  6623 +
6504 6624 static const struct file_operations io_uring_fops = {
6505 6625 .release = io_uring_release,
6506 6626 .flush = io_uring_flush,
... ... @@ -6511,6 +6631,7 @@
6511 6631 #endif
6512 6632 .poll = io_uring_poll,
6513 6633 .fasync = io_uring_fasync,
  6634 + .show_fdinfo = io_uring_show_fdinfo,
6514 6635 };
6515 6636  
6516 6637 static int io_allocate_scq_urings(struct io_ring_ctx *ctx,
... ... @@ -6963,6 +7084,39 @@
6963 7084  
6964 7085 static int __init io_uring_init(void)
6965 7086 {
  7087 +#define __BUILD_BUG_VERIFY_ELEMENT(stype, eoffset, etype, ename) do { \
  7088 + BUILD_BUG_ON(offsetof(stype, ename) != eoffset); \
  7089 + BUILD_BUG_ON(sizeof(etype) != sizeof_field(stype, ename)); \
  7090 +} while (0)
  7091 +
  7092 +#define BUILD_BUG_SQE_ELEM(eoffset, etype, ename) \
  7093 + __BUILD_BUG_VERIFY_ELEMENT(struct io_uring_sqe, eoffset, etype, ename)
  7094 + BUILD_BUG_ON(sizeof(struct io_uring_sqe) != 64);
  7095 + BUILD_BUG_SQE_ELEM(0, __u8, opcode);
  7096 + BUILD_BUG_SQE_ELEM(1, __u8, flags);
  7097 + BUILD_BUG_SQE_ELEM(2, __u16, ioprio);
  7098 + BUILD_BUG_SQE_ELEM(4, __s32, fd);
  7099 + BUILD_BUG_SQE_ELEM(8, __u64, off);
  7100 + BUILD_BUG_SQE_ELEM(8, __u64, addr2);
  7101 + BUILD_BUG_SQE_ELEM(16, __u64, addr);
  7102 + BUILD_BUG_SQE_ELEM(24, __u32, len);
  7103 + BUILD_BUG_SQE_ELEM(28, __kernel_rwf_t, rw_flags);
  7104 + BUILD_BUG_SQE_ELEM(28, /* compat */ int, rw_flags);
  7105 + BUILD_BUG_SQE_ELEM(28, /* compat */ __u32, rw_flags);
  7106 + BUILD_BUG_SQE_ELEM(28, __u32, fsync_flags);
  7107 + BUILD_BUG_SQE_ELEM(28, __u16, poll_events);
  7108 + BUILD_BUG_SQE_ELEM(28, __u32, sync_range_flags);
  7109 + BUILD_BUG_SQE_ELEM(28, __u32, msg_flags);
  7110 + BUILD_BUG_SQE_ELEM(28, __u32, timeout_flags);
  7111 + BUILD_BUG_SQE_ELEM(28, __u32, accept_flags);
  7112 + BUILD_BUG_SQE_ELEM(28, __u32, cancel_flags);
  7113 + BUILD_BUG_SQE_ELEM(28, __u32, open_flags);
  7114 + BUILD_BUG_SQE_ELEM(28, __u32, statx_flags);
  7115 + BUILD_BUG_SQE_ELEM(28, __u32, fadvise_advice);
  7116 + BUILD_BUG_SQE_ELEM(32, __u64, user_data);
  7117 + BUILD_BUG_SQE_ELEM(40, __u16, buf_index);
  7118 + BUILD_BUG_SQE_ELEM(42, __u16, personality);
  7119 +
6966 7120 BUILD_BUG_ON(ARRAY_SIZE(io_op_defs) != IORING_OP_LAST);
6967 7121 req_cachep = KMEM_CACHE(io_kiocb, SLAB_HWCACHE_ALIGN | SLAB_PANIC);
6968 7122 return 0;
include/linux/eventfd.h
... ... @@ -12,6 +12,8 @@
12 12 #include <linux/fcntl.h>
13 13 #include <linux/wait.h>
14 14 #include <linux/err.h>
  15 +#include <linux/percpu-defs.h>
  16 +#include <linux/percpu.h>
15 17  
16 18 /*
17 19 * CAREFUL: Check include/uapi/asm-generic/fcntl.h when defining
... ... @@ -40,6 +42,13 @@
40 42 int eventfd_ctx_remove_wait_queue(struct eventfd_ctx *ctx, wait_queue_entry_t *wait,
41 43 __u64 *cnt);
42 44  
  45 +DECLARE_PER_CPU(int, eventfd_wake_count);
  46 +
  47 +static inline bool eventfd_signal_count(void)
  48 +{
  49 + return this_cpu_read(eventfd_wake_count);
  50 +}
  51 +
43 52 #else /* CONFIG_EVENTFD */
44 53  
45 54 /*
... ... @@ -66,6 +75,11 @@
66 75 wait_queue_entry_t *wait, __u64 *cnt)
67 76 {
68 77 return -ENOSYS;
  78 +}
  79 +
  80 +static inline bool eventfd_signal_count(void)
  81 +{
  82 + return false;
69 83 }
70 84  
71 85 #endif