Commit c1ef57a3a3f5e69e98baf89055b423da62791c13
Merge tag 'io_uring-5.6-2020-02-05' of git://git.kernel.dk/linux-block
Pull io_uring updates from Jens Axboe: "Some later fixes for io_uring: - Small cleanup series from Pavel - Belt and suspenders build time check of sqe size and layout (Stefan) - Addition of ->show_fdinfo() on request of Jann Horn, to aid in understanding mapped personalities - eventfd recursion/deadlock fix, for both io_uring and aio - Fixup for send/recv handling - Fixup for double deferral of read/write request - Fix for potential double completion event for close request - Adjust fadvise advice async/inline behavior - Fix for shutdown hang with SQPOLL thread - Fix for potential use-after-free of fixed file table" * tag 'io_uring-5.6-2020-02-05' of git://git.kernel.dk/linux-block: io_uring: cleanup fixed file data table references io_uring: spin for sq thread to idle on shutdown aio: prevent potential eventfd recursion on poll io_uring: put the flag changing code in the same spot io_uring: iterate req cache backwards io_uring: punt even fadvise() WILLNEED to async context io_uring: fix sporadic double CQE entry for close io_uring: remove extra ->file check io_uring: don't map read/write iovec potentially twice io_uring: use the proper helpers for io_send/recv io_uring: prevent potential eventfd recursion on poll eventfd: track eventfd_signal() recursion depth io_uring: add BUILD_BUG_ON() to assert the layout of struct io_uring_sqe io_uring: add ->show_fdinfo() for the io_uring file descriptor
Showing 4 changed files Side-by-side Diff
fs/aio.c
... | ... | @@ -1610,6 +1610,14 @@ |
1610 | 1610 | return 0; |
1611 | 1611 | } |
1612 | 1612 | |
1613 | +static void aio_poll_put_work(struct work_struct *work) | |
1614 | +{ | |
1615 | + struct poll_iocb *req = container_of(work, struct poll_iocb, work); | |
1616 | + struct aio_kiocb *iocb = container_of(req, struct aio_kiocb, poll); | |
1617 | + | |
1618 | + iocb_put(iocb); | |
1619 | +} | |
1620 | + | |
1613 | 1621 | static void aio_poll_complete_work(struct work_struct *work) |
1614 | 1622 | { |
1615 | 1623 | struct poll_iocb *req = container_of(work, struct poll_iocb, work); |
... | ... | @@ -1674,6 +1682,8 @@ |
1674 | 1682 | list_del_init(&req->wait.entry); |
1675 | 1683 | |
1676 | 1684 | if (mask && spin_trylock_irqsave(&iocb->ki_ctx->ctx_lock, flags)) { |
1685 | + struct kioctx *ctx = iocb->ki_ctx; | |
1686 | + | |
1677 | 1687 | /* |
1678 | 1688 | * Try to complete the iocb inline if we can. Use |
1679 | 1689 | * irqsave/irqrestore because not all filesystems (e.g. fuse) |
... | ... | @@ -1683,8 +1693,14 @@ |
1683 | 1693 | list_del(&iocb->ki_list); |
1684 | 1694 | iocb->ki_res.res = mangle_poll(mask); |
1685 | 1695 | req->done = true; |
1686 | - spin_unlock_irqrestore(&iocb->ki_ctx->ctx_lock, flags); | |
1687 | - iocb_put(iocb); | |
1696 | + if (iocb->ki_eventfd && eventfd_signal_count()) { | |
1697 | + iocb = NULL; | |
1698 | + INIT_WORK(&req->work, aio_poll_put_work); | |
1699 | + schedule_work(&req->work); | |
1700 | + } | |
1701 | + spin_unlock_irqrestore(&ctx->ctx_lock, flags); | |
1702 | + if (iocb) | |
1703 | + iocb_put(iocb); | |
1688 | 1704 | } else { |
1689 | 1705 | schedule_work(&req->work); |
1690 | 1706 | } |
fs/eventfd.c
... | ... | @@ -24,6 +24,8 @@ |
24 | 24 | #include <linux/seq_file.h> |
25 | 25 | #include <linux/idr.h> |
26 | 26 | |
27 | +DEFINE_PER_CPU(int, eventfd_wake_count); | |
28 | + | |
27 | 29 | static DEFINE_IDA(eventfd_ida); |
28 | 30 | |
29 | 31 | struct eventfd_ctx { |
30 | 32 | |
31 | 33 | |
... | ... | @@ -60,12 +62,25 @@ |
60 | 62 | { |
61 | 63 | unsigned long flags; |
62 | 64 | |
65 | + /* | |
66 | + * Deadlock or stack overflow issues can happen if we recurse here | |
67 | + * through waitqueue wakeup handlers. If the caller users potentially | |
68 | + * nested waitqueues with custom wakeup handlers, then it should | |
69 | + * check eventfd_signal_count() before calling this function. If | |
70 | + * it returns true, the eventfd_signal() call should be deferred to a | |
71 | + * safe context. | |
72 | + */ | |
73 | + if (WARN_ON_ONCE(this_cpu_read(eventfd_wake_count))) | |
74 | + return 0; | |
75 | + | |
63 | 76 | spin_lock_irqsave(&ctx->wqh.lock, flags); |
77 | + this_cpu_inc(eventfd_wake_count); | |
64 | 78 | if (ULLONG_MAX - ctx->count < n) |
65 | 79 | n = ULLONG_MAX - ctx->count; |
66 | 80 | ctx->count += n; |
67 | 81 | if (waitqueue_active(&ctx->wqh)) |
68 | 82 | wake_up_locked_poll(&ctx->wqh, EPOLLIN); |
83 | + this_cpu_dec(eventfd_wake_count); | |
69 | 84 | spin_unlock_irqrestore(&ctx->wqh.lock, flags); |
70 | 85 | |
71 | 86 | return n; |
fs/io_uring.c
... | ... | @@ -585,8 +585,7 @@ |
585 | 585 | * io_kiocb alloc cache |
586 | 586 | */ |
587 | 587 | void *reqs[IO_IOPOLL_BATCH]; |
588 | - unsigned int free_reqs; | |
589 | - unsigned int cur_req; | |
588 | + unsigned int free_reqs; | |
590 | 589 | |
591 | 590 | /* |
592 | 591 | * File reference cache |
... | ... | @@ -754,6 +753,7 @@ |
754 | 753 | struct io_uring_files_update *ip, |
755 | 754 | unsigned nr_args); |
756 | 755 | static int io_grab_files(struct io_kiocb *req); |
756 | +static void io_ring_file_ref_flush(struct fixed_file_data *data); | |
757 | 757 | |
758 | 758 | static struct kmem_cache *req_cachep; |
759 | 759 | |
760 | 760 | |
761 | 761 | |
762 | 762 | |
... | ... | @@ -1020,21 +1020,28 @@ |
1020 | 1020 | |
1021 | 1021 | static inline bool io_should_trigger_evfd(struct io_ring_ctx *ctx) |
1022 | 1022 | { |
1023 | + if (!ctx->cq_ev_fd) | |
1024 | + return false; | |
1023 | 1025 | if (!ctx->eventfd_async) |
1024 | 1026 | return true; |
1025 | 1027 | return io_wq_current_is_worker() || in_interrupt(); |
1026 | 1028 | } |
1027 | 1029 | |
1028 | -static void io_cqring_ev_posted(struct io_ring_ctx *ctx) | |
1030 | +static void __io_cqring_ev_posted(struct io_ring_ctx *ctx, bool trigger_ev) | |
1029 | 1031 | { |
1030 | 1032 | if (waitqueue_active(&ctx->wait)) |
1031 | 1033 | wake_up(&ctx->wait); |
1032 | 1034 | if (waitqueue_active(&ctx->sqo_wait)) |
1033 | 1035 | wake_up(&ctx->sqo_wait); |
1034 | - if (ctx->cq_ev_fd && io_should_trigger_evfd(ctx)) | |
1036 | + if (trigger_ev) | |
1035 | 1037 | eventfd_signal(ctx->cq_ev_fd, 1); |
1036 | 1038 | } |
1037 | 1039 | |
1040 | +static void io_cqring_ev_posted(struct io_ring_ctx *ctx) | |
1041 | +{ | |
1042 | + __io_cqring_ev_posted(ctx, io_should_trigger_evfd(ctx)); | |
1043 | +} | |
1044 | + | |
1038 | 1045 | /* Returns true if there are no backlogged entries after the flush */ |
1039 | 1046 | static bool io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force) |
1040 | 1047 | { |
1041 | 1048 | |
1042 | 1049 | |
... | ... | @@ -1183,12 +1190,10 @@ |
1183 | 1190 | ret = 1; |
1184 | 1191 | } |
1185 | 1192 | state->free_reqs = ret - 1; |
1186 | - state->cur_req = 1; | |
1187 | - req = state->reqs[0]; | |
1193 | + req = state->reqs[ret - 1]; | |
1188 | 1194 | } else { |
1189 | - req = state->reqs[state->cur_req]; | |
1190 | 1195 | state->free_reqs--; |
1191 | - state->cur_req++; | |
1196 | + req = state->reqs[state->free_reqs]; | |
1192 | 1197 | } |
1193 | 1198 | |
1194 | 1199 | got_it: |
... | ... | @@ -1855,9 +1860,6 @@ |
1855 | 1860 | unsigned ioprio; |
1856 | 1861 | int ret; |
1857 | 1862 | |
1858 | - if (!req->file) | |
1859 | - return -EBADF; | |
1860 | - | |
1861 | 1863 | if (S_ISREG(file_inode(req->file)->i_mode)) |
1862 | 1864 | req->flags |= REQ_F_ISREG; |
1863 | 1865 | |
1864 | 1866 | |
... | ... | @@ -1866,8 +1868,11 @@ |
1866 | 1868 | req->flags |= REQ_F_CUR_POS; |
1867 | 1869 | kiocb->ki_pos = req->file->f_pos; |
1868 | 1870 | } |
1869 | - kiocb->ki_flags = iocb_flags(kiocb->ki_filp); | |
1870 | 1871 | kiocb->ki_hint = ki_hint_validate(file_write_hint(kiocb->ki_filp)); |
1872 | + kiocb->ki_flags = iocb_flags(kiocb->ki_filp); | |
1873 | + ret = kiocb_set_rw_flags(kiocb, READ_ONCE(sqe->rw_flags)); | |
1874 | + if (unlikely(ret)) | |
1875 | + return ret; | |
1871 | 1876 | |
1872 | 1877 | ioprio = READ_ONCE(sqe->ioprio); |
1873 | 1878 | if (ioprio) { |
... | ... | @@ -1879,10 +1884,6 @@ |
1879 | 1884 | } else |
1880 | 1885 | kiocb->ki_ioprio = get_current_ioprio(); |
1881 | 1886 | |
1882 | - ret = kiocb_set_rw_flags(kiocb, READ_ONCE(sqe->rw_flags)); | |
1883 | - if (unlikely(ret)) | |
1884 | - return ret; | |
1885 | - | |
1886 | 1887 | /* don't allow async punt if RWF_NOWAIT was requested */ |
1887 | 1888 | if ((kiocb->ki_flags & IOCB_NOWAIT) || |
1888 | 1889 | (req->file->f_flags & O_NONBLOCK)) |
1889 | 1890 | |
... | ... | @@ -2164,10 +2165,12 @@ |
2164 | 2165 | { |
2165 | 2166 | if (!io_op_defs[req->opcode].async_ctx) |
2166 | 2167 | return 0; |
2167 | - if (!req->io && io_alloc_async_ctx(req)) | |
2168 | - return -ENOMEM; | |
2168 | + if (!req->io) { | |
2169 | + if (io_alloc_async_ctx(req)) | |
2170 | + return -ENOMEM; | |
2169 | 2171 | |
2170 | - io_req_map_rw(req, io_size, iovec, fast_iov, iter); | |
2172 | + io_req_map_rw(req, io_size, iovec, fast_iov, iter); | |
2173 | + } | |
2171 | 2174 | req->work.func = io_rw_async; |
2172 | 2175 | return 0; |
2173 | 2176 | } |
... | ... | @@ -2724,9 +2727,16 @@ |
2724 | 2727 | struct io_fadvise *fa = &req->fadvise; |
2725 | 2728 | int ret; |
2726 | 2729 | |
2727 | - /* DONTNEED may block, others _should_ not */ | |
2728 | - if (fa->advice == POSIX_FADV_DONTNEED && force_nonblock) | |
2729 | - return -EAGAIN; | |
2730 | + if (force_nonblock) { | |
2731 | + switch (fa->advice) { | |
2732 | + case POSIX_FADV_NORMAL: | |
2733 | + case POSIX_FADV_RANDOM: | |
2734 | + case POSIX_FADV_SEQUENTIAL: | |
2735 | + break; | |
2736 | + default: | |
2737 | + return -EAGAIN; | |
2738 | + } | |
2739 | + } | |
2730 | 2740 | |
2731 | 2741 | ret = vfs_fadvise(req->file, fa->offset, fa->len, fa->advice); |
2732 | 2742 | if (ret < 0) |
2733 | 2743 | |
2734 | 2744 | |
... | ... | @@ -2837,16 +2847,13 @@ |
2837 | 2847 | int ret; |
2838 | 2848 | |
2839 | 2849 | ret = filp_close(req->close.put_file, req->work.files); |
2840 | - if (ret < 0) { | |
2850 | + if (ret < 0) | |
2841 | 2851 | req_set_fail_links(req); |
2842 | - } | |
2843 | 2852 | io_cqring_add_event(req, ret); |
2844 | 2853 | } |
2845 | 2854 | |
2846 | 2855 | fput(req->close.put_file); |
2847 | 2856 | |
2848 | - /* we bypassed the re-issue, drop the submission reference */ | |
2849 | - io_put_req(req); | |
2850 | 2857 | io_put_req_find_next(req, &nxt); |
2851 | 2858 | if (nxt) |
2852 | 2859 | io_wq_assign_next(workptr, nxt); |
... | ... | @@ -2888,7 +2895,13 @@ |
2888 | 2895 | |
2889 | 2896 | eagain: |
2890 | 2897 | req->work.func = io_close_finish; |
2891 | - return -EAGAIN; | |
2898 | + /* | |
2899 | + * Do manual async queue here to avoid grabbing files - we don't | |
2900 | + * need the files, and it'll cause io_close_finish() to close | |
2901 | + * the file again and cause a double CQE entry for this request | |
2902 | + */ | |
2903 | + io_queue_async_work(req); | |
2904 | + return 0; | |
2892 | 2905 | } |
2893 | 2906 | |
2894 | 2907 | static int io_prep_sfr(struct io_kiocb *req, const struct io_uring_sqe *sqe) |
... | ... | @@ -3083,7 +3096,8 @@ |
3083 | 3096 | else if (force_nonblock) |
3084 | 3097 | flags |= MSG_DONTWAIT; |
3085 | 3098 | |
3086 | - ret = __sys_sendmsg_sock(sock, &msg, flags); | |
3099 | + msg.msg_flags = flags; | |
3100 | + ret = sock_sendmsg(sock, &msg); | |
3087 | 3101 | if (force_nonblock && ret == -EAGAIN) |
3088 | 3102 | return -EAGAIN; |
3089 | 3103 | if (ret == -ERESTARTSYS) |
... | ... | @@ -3109,6 +3123,7 @@ |
3109 | 3123 | |
3110 | 3124 | sr->msg_flags = READ_ONCE(sqe->msg_flags); |
3111 | 3125 | sr->msg = u64_to_user_ptr(READ_ONCE(sqe->addr)); |
3126 | + sr->len = READ_ONCE(sqe->len); | |
3112 | 3127 | |
3113 | 3128 | if (!io || req->opcode == IORING_OP_RECV) |
3114 | 3129 | return 0; |
... | ... | @@ -3227,7 +3242,7 @@ |
3227 | 3242 | else if (force_nonblock) |
3228 | 3243 | flags |= MSG_DONTWAIT; |
3229 | 3244 | |
3230 | - ret = __sys_recvmsg_sock(sock, &msg, NULL, NULL, flags); | |
3245 | + ret = sock_recvmsg(sock, &msg, flags); | |
3231 | 3246 | if (force_nonblock && ret == -EAGAIN) |
3232 | 3247 | return -EAGAIN; |
3233 | 3248 | if (ret == -ERESTARTSYS) |
... | ... | @@ -3561,6 +3576,14 @@ |
3561 | 3576 | __io_poll_flush(req->ctx, nodes); |
3562 | 3577 | } |
3563 | 3578 | |
3579 | +static void io_poll_trigger_evfd(struct io_wq_work **workptr) | |
3580 | +{ | |
3581 | + struct io_kiocb *req = container_of(*workptr, struct io_kiocb, work); | |
3582 | + | |
3583 | + eventfd_signal(req->ctx->cq_ev_fd, 1); | |
3584 | + io_put_req(req); | |
3585 | +} | |
3586 | + | |
3564 | 3587 | static int io_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync, |
3565 | 3588 | void *key) |
3566 | 3589 | { |
3567 | 3590 | |
3568 | 3591 | |
... | ... | @@ -3586,14 +3609,22 @@ |
3586 | 3609 | |
3587 | 3610 | if (llist_empty(&ctx->poll_llist) && |
3588 | 3611 | spin_trylock_irqsave(&ctx->completion_lock, flags)) { |
3612 | + bool trigger_ev; | |
3613 | + | |
3589 | 3614 | hash_del(&req->hash_node); |
3590 | 3615 | io_poll_complete(req, mask, 0); |
3591 | - req->flags |= REQ_F_COMP_LOCKED; | |
3592 | - io_put_req(req); | |
3593 | - spin_unlock_irqrestore(&ctx->completion_lock, flags); | |
3594 | 3616 | |
3595 | - io_cqring_ev_posted(ctx); | |
3596 | - req = NULL; | |
3617 | + trigger_ev = io_should_trigger_evfd(ctx); | |
3618 | + if (trigger_ev && eventfd_signal_count()) { | |
3619 | + trigger_ev = false; | |
3620 | + req->work.func = io_poll_trigger_evfd; | |
3621 | + } else { | |
3622 | + req->flags |= REQ_F_COMP_LOCKED; | |
3623 | + io_put_req(req); | |
3624 | + req = NULL; | |
3625 | + } | |
3626 | + spin_unlock_irqrestore(&ctx->completion_lock, flags); | |
3627 | + __io_cqring_ev_posted(ctx, trigger_ev); | |
3597 | 3628 | } else { |
3598 | 3629 | req->result = mask; |
3599 | 3630 | req->llist_node.next = NULL; |
... | ... | @@ -4815,8 +4846,7 @@ |
4815 | 4846 | blk_finish_plug(&state->plug); |
4816 | 4847 | io_file_put(state); |
4817 | 4848 | if (state->free_reqs) |
4818 | - kmem_cache_free_bulk(req_cachep, state->free_reqs, | |
4819 | - &state->reqs[state->cur_req]); | |
4849 | + kmem_cache_free_bulk(req_cachep, state->free_reqs, state->reqs); | |
4820 | 4850 | } |
4821 | 4851 | |
4822 | 4852 | /* |
... | ... | @@ -5041,7 +5071,8 @@ |
5041 | 5071 | * reap events and wake us up. |
5042 | 5072 | */ |
5043 | 5073 | if (inflight || |
5044 | - (!time_after(jiffies, timeout) && ret != -EBUSY)) { | |
5074 | + (!time_after(jiffies, timeout) && ret != -EBUSY && | |
5075 | + !percpu_ref_is_dying(&ctx->refs))) { | |
5045 | 5076 | cond_resched(); |
5046 | 5077 | continue; |
5047 | 5078 | } |
5048 | 5079 | |
5049 | 5080 | |
... | ... | @@ -5231,15 +5262,10 @@ |
5231 | 5262 | if (!data) |
5232 | 5263 | return -ENXIO; |
5233 | 5264 | |
5234 | - /* protect against inflight atomic switch, which drops the ref */ | |
5235 | - percpu_ref_get(&data->refs); | |
5236 | - /* wait for existing switches */ | |
5237 | - flush_work(&data->ref_work); | |
5238 | 5265 | percpu_ref_kill_and_confirm(&data->refs, io_file_ref_kill); |
5239 | - wait_for_completion(&data->done); | |
5240 | - percpu_ref_put(&data->refs); | |
5241 | - /* flush potential new switch */ | |
5242 | 5266 | flush_work(&data->ref_work); |
5267 | + wait_for_completion(&data->done); | |
5268 | + io_ring_file_ref_flush(data); | |
5243 | 5269 | percpu_ref_exit(&data->refs); |
5244 | 5270 | |
5245 | 5271 | __io_sqe_files_unregister(ctx); |
5246 | 5272 | |
5247 | 5273 | |
... | ... | @@ -5477,14 +5503,11 @@ |
5477 | 5503 | struct completion *done; |
5478 | 5504 | }; |
5479 | 5505 | |
5480 | -static void io_ring_file_ref_switch(struct work_struct *work) | |
5506 | +static void io_ring_file_ref_flush(struct fixed_file_data *data) | |
5481 | 5507 | { |
5482 | 5508 | struct io_file_put *pfile, *tmp; |
5483 | - struct fixed_file_data *data; | |
5484 | 5509 | struct llist_node *node; |
5485 | 5510 | |
5486 | - data = container_of(work, struct fixed_file_data, ref_work); | |
5487 | - | |
5488 | 5511 | while ((node = llist_del_all(&data->put_llist)) != NULL) { |
5489 | 5512 | llist_for_each_entry_safe(pfile, tmp, node, llist) { |
5490 | 5513 | io_ring_file_put(data->ctx, pfile->file); |
5491 | 5514 | |
... | ... | @@ -5494,7 +5517,14 @@ |
5494 | 5517 | kfree(pfile); |
5495 | 5518 | } |
5496 | 5519 | } |
5520 | +} | |
5497 | 5521 | |
5522 | +static void io_ring_file_ref_switch(struct work_struct *work) | |
5523 | +{ | |
5524 | + struct fixed_file_data *data; | |
5525 | + | |
5526 | + data = container_of(work, struct fixed_file_data, ref_work); | |
5527 | + io_ring_file_ref_flush(data); | |
5498 | 5528 | percpu_ref_get(&data->refs); |
5499 | 5529 | percpu_ref_switch_to_percpu(&data->refs); |
5500 | 5530 | } |
... | ... | @@ -5505,8 +5535,14 @@ |
5505 | 5535 | |
5506 | 5536 | data = container_of(ref, struct fixed_file_data, refs); |
5507 | 5537 | |
5508 | - /* we can't safely switch from inside this context, punt to wq */ | |
5509 | - queue_work(system_wq, &data->ref_work); | |
5538 | + /* | |
5539 | + * We can't safely switch from inside this context, punt to wq. If | |
5540 | + * the table ref is going away, the table is being unregistered. | |
5541 | + * Don't queue up the async work for that case, the caller will | |
5542 | + * handle it. | |
5543 | + */ | |
5544 | + if (!percpu_ref_is_dying(&data->refs)) | |
5545 | + queue_work(system_wq, &data->ref_work); | |
5510 | 5546 | } |
5511 | 5547 | |
5512 | 5548 | static int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg, |
... | ... | @@ -6295,6 +6331,16 @@ |
6295 | 6331 | percpu_ref_kill(&ctx->refs); |
6296 | 6332 | mutex_unlock(&ctx->uring_lock); |
6297 | 6333 | |
6334 | + /* | |
6335 | + * Wait for sq thread to idle, if we have one. It won't spin on new | |
6336 | + * work after we've killed the ctx ref above. This is important to do | |
6337 | + * before we cancel existing commands, as the thread could otherwise | |
6338 | + * be queueing new work post that. If that's work we need to cancel, | |
6339 | + * it could cause shutdown to hang. | |
6340 | + */ | |
6341 | + while (ctx->sqo_thread && !wq_has_sleeper(&ctx->sqo_wait)) | |
6342 | + cpu_relax(); | |
6343 | + | |
6298 | 6344 | io_kill_timeouts(ctx); |
6299 | 6345 | io_poll_remove_all(ctx); |
6300 | 6346 | |
... | ... | @@ -6501,6 +6547,80 @@ |
6501 | 6547 | return submitted ? submitted : ret; |
6502 | 6548 | } |
6503 | 6549 | |
6550 | +static int io_uring_show_cred(int id, void *p, void *data) | |
6551 | +{ | |
6552 | + const struct cred *cred = p; | |
6553 | + struct seq_file *m = data; | |
6554 | + struct user_namespace *uns = seq_user_ns(m); | |
6555 | + struct group_info *gi; | |
6556 | + kernel_cap_t cap; | |
6557 | + unsigned __capi; | |
6558 | + int g; | |
6559 | + | |
6560 | + seq_printf(m, "%5d\n", id); | |
6561 | + seq_put_decimal_ull(m, "\tUid:\t", from_kuid_munged(uns, cred->uid)); | |
6562 | + seq_put_decimal_ull(m, "\t\t", from_kuid_munged(uns, cred->euid)); | |
6563 | + seq_put_decimal_ull(m, "\t\t", from_kuid_munged(uns, cred->suid)); | |
6564 | + seq_put_decimal_ull(m, "\t\t", from_kuid_munged(uns, cred->fsuid)); | |
6565 | + seq_put_decimal_ull(m, "\n\tGid:\t", from_kgid_munged(uns, cred->gid)); | |
6566 | + seq_put_decimal_ull(m, "\t\t", from_kgid_munged(uns, cred->egid)); | |
6567 | + seq_put_decimal_ull(m, "\t\t", from_kgid_munged(uns, cred->sgid)); | |
6568 | + seq_put_decimal_ull(m, "\t\t", from_kgid_munged(uns, cred->fsgid)); | |
6569 | + seq_puts(m, "\n\tGroups:\t"); | |
6570 | + gi = cred->group_info; | |
6571 | + for (g = 0; g < gi->ngroups; g++) { | |
6572 | + seq_put_decimal_ull(m, g ? " " : "", | |
6573 | + from_kgid_munged(uns, gi->gid[g])); | |
6574 | + } | |
6575 | + seq_puts(m, "\n\tCapEff:\t"); | |
6576 | + cap = cred->cap_effective; | |
6577 | + CAP_FOR_EACH_U32(__capi) | |
6578 | + seq_put_hex_ll(m, NULL, cap.cap[CAP_LAST_U32 - __capi], 8); | |
6579 | + seq_putc(m, '\n'); | |
6580 | + return 0; | |
6581 | +} | |
6582 | + | |
6583 | +static void __io_uring_show_fdinfo(struct io_ring_ctx *ctx, struct seq_file *m) | |
6584 | +{ | |
6585 | + int i; | |
6586 | + | |
6587 | + mutex_lock(&ctx->uring_lock); | |
6588 | + seq_printf(m, "UserFiles:\t%u\n", ctx->nr_user_files); | |
6589 | + for (i = 0; i < ctx->nr_user_files; i++) { | |
6590 | + struct fixed_file_table *table; | |
6591 | + struct file *f; | |
6592 | + | |
6593 | + table = &ctx->file_data->table[i >> IORING_FILE_TABLE_SHIFT]; | |
6594 | + f = table->files[i & IORING_FILE_TABLE_MASK]; | |
6595 | + if (f) | |
6596 | + seq_printf(m, "%5u: %s\n", i, file_dentry(f)->d_iname); | |
6597 | + else | |
6598 | + seq_printf(m, "%5u: <none>\n", i); | |
6599 | + } | |
6600 | + seq_printf(m, "UserBufs:\t%u\n", ctx->nr_user_bufs); | |
6601 | + for (i = 0; i < ctx->nr_user_bufs; i++) { | |
6602 | + struct io_mapped_ubuf *buf = &ctx->user_bufs[i]; | |
6603 | + | |
6604 | + seq_printf(m, "%5u: 0x%llx/%u\n", i, buf->ubuf, | |
6605 | + (unsigned int) buf->len); | |
6606 | + } | |
6607 | + if (!idr_is_empty(&ctx->personality_idr)) { | |
6608 | + seq_printf(m, "Personalities:\n"); | |
6609 | + idr_for_each(&ctx->personality_idr, io_uring_show_cred, m); | |
6610 | + } | |
6611 | + mutex_unlock(&ctx->uring_lock); | |
6612 | +} | |
6613 | + | |
6614 | +static void io_uring_show_fdinfo(struct seq_file *m, struct file *f) | |
6615 | +{ | |
6616 | + struct io_ring_ctx *ctx = f->private_data; | |
6617 | + | |
6618 | + if (percpu_ref_tryget(&ctx->refs)) { | |
6619 | + __io_uring_show_fdinfo(ctx, m); | |
6620 | + percpu_ref_put(&ctx->refs); | |
6621 | + } | |
6622 | +} | |
6623 | + | |
6504 | 6624 | static const struct file_operations io_uring_fops = { |
6505 | 6625 | .release = io_uring_release, |
6506 | 6626 | .flush = io_uring_flush, |
... | ... | @@ -6511,6 +6631,7 @@ |
6511 | 6631 | #endif |
6512 | 6632 | .poll = io_uring_poll, |
6513 | 6633 | .fasync = io_uring_fasync, |
6634 | + .show_fdinfo = io_uring_show_fdinfo, | |
6514 | 6635 | }; |
6515 | 6636 | |
6516 | 6637 | static int io_allocate_scq_urings(struct io_ring_ctx *ctx, |
... | ... | @@ -6963,6 +7084,39 @@ |
6963 | 7084 | |
6964 | 7085 | static int __init io_uring_init(void) |
6965 | 7086 | { |
7087 | +#define __BUILD_BUG_VERIFY_ELEMENT(stype, eoffset, etype, ename) do { \ | |
7088 | + BUILD_BUG_ON(offsetof(stype, ename) != eoffset); \ | |
7089 | + BUILD_BUG_ON(sizeof(etype) != sizeof_field(stype, ename)); \ | |
7090 | +} while (0) | |
7091 | + | |
7092 | +#define BUILD_BUG_SQE_ELEM(eoffset, etype, ename) \ | |
7093 | + __BUILD_BUG_VERIFY_ELEMENT(struct io_uring_sqe, eoffset, etype, ename) | |
7094 | + BUILD_BUG_ON(sizeof(struct io_uring_sqe) != 64); | |
7095 | + BUILD_BUG_SQE_ELEM(0, __u8, opcode); | |
7096 | + BUILD_BUG_SQE_ELEM(1, __u8, flags); | |
7097 | + BUILD_BUG_SQE_ELEM(2, __u16, ioprio); | |
7098 | + BUILD_BUG_SQE_ELEM(4, __s32, fd); | |
7099 | + BUILD_BUG_SQE_ELEM(8, __u64, off); | |
7100 | + BUILD_BUG_SQE_ELEM(8, __u64, addr2); | |
7101 | + BUILD_BUG_SQE_ELEM(16, __u64, addr); | |
7102 | + BUILD_BUG_SQE_ELEM(24, __u32, len); | |
7103 | + BUILD_BUG_SQE_ELEM(28, __kernel_rwf_t, rw_flags); | |
7104 | + BUILD_BUG_SQE_ELEM(28, /* compat */ int, rw_flags); | |
7105 | + BUILD_BUG_SQE_ELEM(28, /* compat */ __u32, rw_flags); | |
7106 | + BUILD_BUG_SQE_ELEM(28, __u32, fsync_flags); | |
7107 | + BUILD_BUG_SQE_ELEM(28, __u16, poll_events); | |
7108 | + BUILD_BUG_SQE_ELEM(28, __u32, sync_range_flags); | |
7109 | + BUILD_BUG_SQE_ELEM(28, __u32, msg_flags); | |
7110 | + BUILD_BUG_SQE_ELEM(28, __u32, timeout_flags); | |
7111 | + BUILD_BUG_SQE_ELEM(28, __u32, accept_flags); | |
7112 | + BUILD_BUG_SQE_ELEM(28, __u32, cancel_flags); | |
7113 | + BUILD_BUG_SQE_ELEM(28, __u32, open_flags); | |
7114 | + BUILD_BUG_SQE_ELEM(28, __u32, statx_flags); | |
7115 | + BUILD_BUG_SQE_ELEM(28, __u32, fadvise_advice); | |
7116 | + BUILD_BUG_SQE_ELEM(32, __u64, user_data); | |
7117 | + BUILD_BUG_SQE_ELEM(40, __u16, buf_index); | |
7118 | + BUILD_BUG_SQE_ELEM(42, __u16, personality); | |
7119 | + | |
6966 | 7120 | BUILD_BUG_ON(ARRAY_SIZE(io_op_defs) != IORING_OP_LAST); |
6967 | 7121 | req_cachep = KMEM_CACHE(io_kiocb, SLAB_HWCACHE_ALIGN | SLAB_PANIC); |
6968 | 7122 | return 0; |
include/linux/eventfd.h
... | ... | @@ -12,6 +12,8 @@ |
12 | 12 | #include <linux/fcntl.h> |
13 | 13 | #include <linux/wait.h> |
14 | 14 | #include <linux/err.h> |
15 | +#include <linux/percpu-defs.h> | |
16 | +#include <linux/percpu.h> | |
15 | 17 | |
16 | 18 | /* |
17 | 19 | * CAREFUL: Check include/uapi/asm-generic/fcntl.h when defining |
... | ... | @@ -40,6 +42,13 @@ |
40 | 42 | int eventfd_ctx_remove_wait_queue(struct eventfd_ctx *ctx, wait_queue_entry_t *wait, |
41 | 43 | __u64 *cnt); |
42 | 44 | |
45 | +DECLARE_PER_CPU(int, eventfd_wake_count); | |
46 | + | |
47 | +static inline bool eventfd_signal_count(void) | |
48 | +{ | |
49 | + return this_cpu_read(eventfd_wake_count); | |
50 | +} | |
51 | + | |
43 | 52 | #else /* CONFIG_EVENTFD */ |
44 | 53 | |
45 | 54 | /* |
... | ... | @@ -66,6 +75,11 @@ |
66 | 75 | wait_queue_entry_t *wait, __u64 *cnt) |
67 | 76 | { |
68 | 77 | return -ENOSYS; |
78 | +} | |
79 | + | |
80 | +static inline bool eventfd_signal_count(void) | |
81 | +{ | |
82 | + return false; | |
69 | 83 | } |
70 | 84 | |
71 | 85 | #endif |