Commit 58c85dc20a2c5ba0e53740c5dd1945e658da0e1c
Committed by
Linus Torvalds
1 parent
a1c8eae75e
Exists in
master
and in
20 other branches
aio: kill struct aio_ring_info
struct aio_ring_info was kind of odd, the only place it's used is where it's embedded in struct kioctx - there's no real need for it. The next patch rearranges struct kioctx and puts various things on their own cachelines - getting rid of struct aio_ring_info now makes that reordering a bit clearer. Signed-off-by: Kent Overstreet <koverstreet@google.com> Cc: Zach Brown <zab@redhat.com> Cc: Felipe Balbi <balbi@ti.com> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Cc: Mark Fasheh <mfasheh@suse.com> Cc: Joel Becker <jlbec@evilplan.org> Cc: Rusty Russell <rusty@rustcorp.com.au> Cc: Jens Axboe <axboe@kernel.dk> Cc: Asai Thambi S P <asamymuthupa@micron.com> Cc: Selvan Mani <smani@micron.com> Cc: Sam Bradshaw <sbradshaw@micron.com> Cc: Jeff Moyer <jmoyer@redhat.com> Cc: Al Viro <viro@zeniv.linux.org.uk> Cc: Benjamin LaHaise <bcrl@kvack.org> Reviewed-by: "Theodore Ts'o" <tytso@mit.edu> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Showing 1 changed file with 74 additions and 81 deletions Side-by-side Diff
fs/aio.c
... | ... | @@ -58,19 +58,7 @@ |
58 | 58 | }; /* 128 bytes + ring size */ |
59 | 59 | |
60 | 60 | #define AIO_RING_PAGES 8 |
61 | -struct aio_ring_info { | |
62 | - unsigned long mmap_base; | |
63 | - unsigned long mmap_size; | |
64 | 61 | |
65 | - struct page **ring_pages; | |
66 | - struct mutex ring_lock; | |
67 | - long nr_pages; | |
68 | - | |
69 | - unsigned nr, tail; | |
70 | - | |
71 | - struct page *internal_pages[AIO_RING_PAGES]; | |
72 | -}; | |
73 | - | |
74 | 62 | struct kioctx { |
75 | 63 | atomic_t users; |
76 | 64 | atomic_t dead; |
77 | 65 | |
78 | 66 | |
79 | 67 | |
... | ... | @@ -90,15 +78,31 @@ |
90 | 78 | * This is what userspace passed to io_setup(), it's not used for |
91 | 79 | * anything but counting against the global max_reqs quota. |
92 | 80 | * |
93 | - * The real limit is ring->nr - 1, which will be larger (see | |
81 | + * The real limit is nr_events - 1, which will be larger (see | |
94 | 82 | * aio_setup_ring()) |
95 | 83 | */ |
96 | 84 | unsigned max_reqs; |
97 | 85 | |
98 | - struct aio_ring_info ring_info; | |
86 | + /* Size of ringbuffer, in units of struct io_event */ | |
87 | + unsigned nr_events; | |
99 | 88 | |
100 | - spinlock_t completion_lock; | |
89 | + unsigned long mmap_base; | |
90 | + unsigned long mmap_size; | |
101 | 91 | |
92 | + struct page **ring_pages; | |
93 | + long nr_pages; | |
94 | + | |
95 | + struct { | |
96 | + struct mutex ring_lock; | |
97 | + } ____cacheline_aligned; | |
98 | + | |
99 | + struct { | |
100 | + unsigned tail; | |
101 | + spinlock_t completion_lock; | |
102 | + } ____cacheline_aligned; | |
103 | + | |
104 | + struct page *internal_pages[AIO_RING_PAGES]; | |
105 | + | |
102 | 106 | struct rcu_head rcu_head; |
103 | 107 | struct work_struct rcu_work; |
104 | 108 | }; |
105 | 109 | |
106 | 110 | |
107 | 111 | |
108 | 112 | |
... | ... | @@ -129,26 +133,21 @@ |
129 | 133 | |
130 | 134 | static void aio_free_ring(struct kioctx *ctx) |
131 | 135 | { |
132 | - struct aio_ring_info *info = &ctx->ring_info; | |
133 | 136 | long i; |
134 | 137 | |
135 | - for (i=0; i<info->nr_pages; i++) | |
136 | - put_page(info->ring_pages[i]); | |
138 | + for (i = 0; i < ctx->nr_pages; i++) | |
139 | + put_page(ctx->ring_pages[i]); | |
137 | 140 | |
138 | - if (info->mmap_size) { | |
139 | - vm_munmap(info->mmap_base, info->mmap_size); | |
140 | - } | |
141 | + if (ctx->mmap_size) | |
142 | + vm_munmap(ctx->mmap_base, ctx->mmap_size); | |
141 | 143 | |
142 | - if (info->ring_pages && info->ring_pages != info->internal_pages) | |
143 | - kfree(info->ring_pages); | |
144 | - info->ring_pages = NULL; | |
145 | - info->nr = 0; | |
144 | + if (ctx->ring_pages && ctx->ring_pages != ctx->internal_pages) | |
145 | + kfree(ctx->ring_pages); | |
146 | 146 | } |
147 | 147 | |
148 | 148 | static int aio_setup_ring(struct kioctx *ctx) |
149 | 149 | { |
150 | 150 | struct aio_ring *ring; |
151 | - struct aio_ring_info *info = &ctx->ring_info; | |
152 | 151 | unsigned nr_events = ctx->max_reqs; |
153 | 152 | struct mm_struct *mm = current->mm; |
154 | 153 | unsigned long size, populate; |
155 | 154 | |
156 | 155 | |
157 | 156 | |
158 | 157 | |
159 | 158 | |
160 | 159 | |
161 | 160 | |
162 | 161 | |
163 | 162 | |
... | ... | @@ -166,45 +165,44 @@ |
166 | 165 | |
167 | 166 | nr_events = (PAGE_SIZE * nr_pages - sizeof(struct aio_ring)) / sizeof(struct io_event); |
168 | 167 | |
169 | - info->nr = 0; | |
170 | - info->ring_pages = info->internal_pages; | |
168 | + ctx->nr_events = 0; | |
169 | + ctx->ring_pages = ctx->internal_pages; | |
171 | 170 | if (nr_pages > AIO_RING_PAGES) { |
172 | - info->ring_pages = kcalloc(nr_pages, sizeof(struct page *), GFP_KERNEL); | |
173 | - if (!info->ring_pages) | |
171 | + ctx->ring_pages = kcalloc(nr_pages, sizeof(struct page *), | |
172 | + GFP_KERNEL); | |
173 | + if (!ctx->ring_pages) | |
174 | 174 | return -ENOMEM; |
175 | 175 | } |
176 | 176 | |
177 | - info->mmap_size = nr_pages * PAGE_SIZE; | |
178 | - pr_debug("attempting mmap of %lu bytes\n", info->mmap_size); | |
177 | + ctx->mmap_size = nr_pages * PAGE_SIZE; | |
178 | + pr_debug("attempting mmap of %lu bytes\n", ctx->mmap_size); | |
179 | 179 | down_write(&mm->mmap_sem); |
180 | - info->mmap_base = do_mmap_pgoff(NULL, 0, info->mmap_size, | |
181 | - PROT_READ|PROT_WRITE, | |
182 | - MAP_ANONYMOUS|MAP_PRIVATE, 0, | |
183 | - &populate); | |
184 | - if (IS_ERR((void *)info->mmap_base)) { | |
180 | + ctx->mmap_base = do_mmap_pgoff(NULL, 0, ctx->mmap_size, | |
181 | + PROT_READ|PROT_WRITE, | |
182 | + MAP_ANONYMOUS|MAP_PRIVATE, 0, &populate); | |
183 | + if (IS_ERR((void *)ctx->mmap_base)) { | |
185 | 184 | up_write(&mm->mmap_sem); |
186 | - info->mmap_size = 0; | |
185 | + ctx->mmap_size = 0; | |
187 | 186 | aio_free_ring(ctx); |
188 | 187 | return -EAGAIN; |
189 | 188 | } |
190 | 189 | |
191 | - pr_debug("mmap address: 0x%08lx\n", info->mmap_base); | |
192 | - info->nr_pages = get_user_pages(current, mm, info->mmap_base, nr_pages, | |
193 | - 1, 0, info->ring_pages, NULL); | |
190 | + pr_debug("mmap address: 0x%08lx\n", ctx->mmap_base); | |
191 | + ctx->nr_pages = get_user_pages(current, mm, ctx->mmap_base, nr_pages, | |
192 | + 1, 0, ctx->ring_pages, NULL); | |
194 | 193 | up_write(&mm->mmap_sem); |
195 | 194 | |
196 | - if (unlikely(info->nr_pages != nr_pages)) { | |
195 | + if (unlikely(ctx->nr_pages != nr_pages)) { | |
197 | 196 | aio_free_ring(ctx); |
198 | 197 | return -EAGAIN; |
199 | 198 | } |
200 | 199 | if (populate) |
201 | - mm_populate(info->mmap_base, populate); | |
200 | + mm_populate(ctx->mmap_base, populate); | |
202 | 201 | |
203 | - ctx->user_id = info->mmap_base; | |
202 | + ctx->user_id = ctx->mmap_base; | |
203 | + ctx->nr_events = nr_events; /* trusted copy */ | |
204 | 204 | |
205 | - info->nr = nr_events; /* trusted copy */ | |
206 | - | |
207 | - ring = kmap_atomic(info->ring_pages[0]); | |
205 | + ring = kmap_atomic(ctx->ring_pages[0]); | |
208 | 206 | ring->nr = nr_events; /* user copy */ |
209 | 207 | ring->id = ctx->user_id; |
210 | 208 | ring->head = ring->tail = 0; |
... | ... | @@ -213,7 +211,7 @@ |
213 | 211 | ring->incompat_features = AIO_RING_INCOMPAT_FEATURES; |
214 | 212 | ring->header_length = sizeof(struct aio_ring); |
215 | 213 | kunmap_atomic(ring); |
216 | - flush_dcache_page(info->ring_pages[0]); | |
214 | + flush_dcache_page(ctx->ring_pages[0]); | |
217 | 215 | |
218 | 216 | return 0; |
219 | 217 | } |
... | ... | @@ -284,7 +282,6 @@ |
284 | 282 | */ |
285 | 283 | static void free_ioctx(struct kioctx *ctx) |
286 | 284 | { |
287 | - struct aio_ring_info *info = &ctx->ring_info; | |
288 | 285 | struct aio_ring *ring; |
289 | 286 | struct io_event res; |
290 | 287 | struct kiocb *req; |
291 | 288 | |
292 | 289 | |
293 | 290 | |
... | ... | @@ -302,18 +299,18 @@ |
302 | 299 | |
303 | 300 | spin_unlock_irq(&ctx->ctx_lock); |
304 | 301 | |
305 | - ring = kmap_atomic(info->ring_pages[0]); | |
302 | + ring = kmap_atomic(ctx->ring_pages[0]); | |
306 | 303 | head = ring->head; |
307 | 304 | kunmap_atomic(ring); |
308 | 305 | |
309 | 306 | while (atomic_read(&ctx->reqs_active) > 0) { |
310 | - wait_event(ctx->wait, head != info->tail); | |
307 | + wait_event(ctx->wait, head != ctx->tail); | |
311 | 308 | |
312 | - avail = (head <= info->tail ? info->tail : info->nr) - head; | |
309 | + avail = (head <= ctx->tail ? ctx->tail : ctx->nr_events) - head; | |
313 | 310 | |
314 | 311 | atomic_sub(avail, &ctx->reqs_active); |
315 | 312 | head += avail; |
316 | - head %= info->nr; | |
313 | + head %= ctx->nr_events; | |
317 | 314 | } |
318 | 315 | |
319 | 316 | WARN_ON(atomic_read(&ctx->reqs_active) < 0); |
... | ... | @@ -372,7 +369,7 @@ |
372 | 369 | atomic_set(&ctx->dead, 0); |
373 | 370 | spin_lock_init(&ctx->ctx_lock); |
374 | 371 | spin_lock_init(&ctx->completion_lock); |
375 | - mutex_init(&ctx->ring_info.ring_lock); | |
372 | + mutex_init(&ctx->ring_lock); | |
376 | 373 | init_waitqueue_head(&ctx->wait); |
377 | 374 | |
378 | 375 | INIT_LIST_HEAD(&ctx->active_reqs); |
... | ... | @@ -396,7 +393,7 @@ |
396 | 393 | spin_unlock(&mm->ioctx_lock); |
397 | 394 | |
398 | 395 | pr_debug("allocated ioctx %p[%ld]: mm=%p mask=0x%x\n", |
399 | - ctx, ctx->user_id, mm, ctx->ring_info.nr); | |
396 | + ctx, ctx->user_id, mm, ctx->nr_events); | |
400 | 397 | return ctx; |
401 | 398 | |
402 | 399 | out_cleanup: |
... | ... | @@ -491,7 +488,7 @@ |
491 | 488 | * just set it to 0; aio_free_ring() is the only |
492 | 489 | * place that uses ->mmap_size, so it's safe. |
493 | 490 | */ |
494 | - ctx->ring_info.mmap_size = 0; | |
491 | + ctx->mmap_size = 0; | |
495 | 492 | |
496 | 493 | if (!atomic_xchg(&ctx->dead, 1)) { |
497 | 494 | hlist_del_rcu(&ctx->list); |
498 | 495 | |
... | ... | @@ -514,10 +511,10 @@ |
514 | 511 | { |
515 | 512 | struct kiocb *req; |
516 | 513 | |
517 | - if (atomic_read(&ctx->reqs_active) >= ctx->ring_info.nr) | |
514 | + if (atomic_read(&ctx->reqs_active) >= ctx->nr_events) | |
518 | 515 | return NULL; |
519 | 516 | |
520 | - if (atomic_inc_return(&ctx->reqs_active) > ctx->ring_info.nr - 1) | |
517 | + if (atomic_inc_return(&ctx->reqs_active) > ctx->nr_events - 1) | |
521 | 518 | goto out_put; |
522 | 519 | |
523 | 520 | req = kmem_cache_alloc(kiocb_cachep, GFP_KERNEL|__GFP_ZERO); |
... | ... | @@ -578,7 +575,6 @@ |
578 | 575 | void aio_complete(struct kiocb *iocb, long res, long res2) |
579 | 576 | { |
580 | 577 | struct kioctx *ctx = iocb->ki_ctx; |
581 | - struct aio_ring_info *info; | |
582 | 578 | struct aio_ring *ring; |
583 | 579 | struct io_event *ev_page, *event; |
584 | 580 | unsigned long flags; |
... | ... | @@ -599,8 +595,6 @@ |
599 | 595 | return; |
600 | 596 | } |
601 | 597 | |
602 | - info = &ctx->ring_info; | |
603 | - | |
604 | 598 | /* |
605 | 599 | * Take rcu_read_lock() in case the kioctx is being destroyed, as we |
606 | 600 | * need to issue a wakeup after decrementing reqs_active. |
607 | 601 | |
608 | 602 | |
... | ... | @@ -633,13 +627,13 @@ |
633 | 627 | */ |
634 | 628 | spin_lock_irqsave(&ctx->completion_lock, flags); |
635 | 629 | |
636 | - tail = info->tail; | |
630 | + tail = ctx->tail; | |
637 | 631 | pos = tail + AIO_EVENTS_OFFSET; |
638 | 632 | |
639 | - if (++tail >= info->nr) | |
633 | + if (++tail >= ctx->nr_events) | |
640 | 634 | tail = 0; |
641 | 635 | |
642 | - ev_page = kmap_atomic(info->ring_pages[pos / AIO_EVENTS_PER_PAGE]); | |
636 | + ev_page = kmap_atomic(ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]); | |
643 | 637 | event = ev_page + pos % AIO_EVENTS_PER_PAGE; |
644 | 638 | |
645 | 639 | event->obj = (u64)(unsigned long)iocb->ki_obj.user; |
... | ... | @@ -648,7 +642,7 @@ |
648 | 642 | event->res2 = res2; |
649 | 643 | |
650 | 644 | kunmap_atomic(ev_page); |
651 | - flush_dcache_page(info->ring_pages[pos / AIO_EVENTS_PER_PAGE]); | |
645 | + flush_dcache_page(ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]); | |
652 | 646 | |
653 | 647 | pr_debug("%p[%u]: %p: %p %Lx %lx %lx\n", |
654 | 648 | ctx, tail, iocb, iocb->ki_obj.user, iocb->ki_user_data, |
655 | 649 | |
656 | 650 | |
... | ... | @@ -659,12 +653,12 @@ |
659 | 653 | */ |
660 | 654 | smp_wmb(); /* make event visible before updating tail */ |
661 | 655 | |
662 | - info->tail = tail; | |
656 | + ctx->tail = tail; | |
663 | 657 | |
664 | - ring = kmap_atomic(info->ring_pages[0]); | |
658 | + ring = kmap_atomic(ctx->ring_pages[0]); | |
665 | 659 | ring->tail = tail; |
666 | 660 | kunmap_atomic(ring); |
667 | - flush_dcache_page(info->ring_pages[0]); | |
661 | + flush_dcache_page(ctx->ring_pages[0]); | |
668 | 662 | |
669 | 663 | spin_unlock_irqrestore(&ctx->completion_lock, flags); |
670 | 664 | |
671 | 665 | |
672 | 666 | |
673 | 667 | |
674 | 668 | |
... | ... | @@ -704,21 +698,20 @@ |
704 | 698 | static long aio_read_events_ring(struct kioctx *ctx, |
705 | 699 | struct io_event __user *event, long nr) |
706 | 700 | { |
707 | - struct aio_ring_info *info = &ctx->ring_info; | |
708 | 701 | struct aio_ring *ring; |
709 | 702 | unsigned head, pos; |
710 | 703 | long ret = 0; |
711 | 704 | int copy_ret; |
712 | 705 | |
713 | - mutex_lock(&info->ring_lock); | |
706 | + mutex_lock(&ctx->ring_lock); | |
714 | 707 | |
715 | - ring = kmap_atomic(info->ring_pages[0]); | |
708 | + ring = kmap_atomic(ctx->ring_pages[0]); | |
716 | 709 | head = ring->head; |
717 | 710 | kunmap_atomic(ring); |
718 | 711 | |
719 | - pr_debug("h%u t%u m%u\n", head, info->tail, info->nr); | |
712 | + pr_debug("h%u t%u m%u\n", head, ctx->tail, ctx->nr_events); | |
720 | 713 | |
721 | - if (head == info->tail) | |
714 | + if (head == ctx->tail) | |
722 | 715 | goto out; |
723 | 716 | |
724 | 717 | while (ret < nr) { |
... | ... | @@ -726,8 +719,8 @@ |
726 | 719 | struct io_event *ev; |
727 | 720 | struct page *page; |
728 | 721 | |
729 | - avail = (head <= info->tail ? info->tail : info->nr) - head; | |
730 | - if (head == info->tail) | |
722 | + avail = (head <= ctx->tail ? ctx->tail : ctx->nr_events) - head; | |
723 | + if (head == ctx->tail) | |
731 | 724 | break; |
732 | 725 | |
733 | 726 | avail = min(avail, nr - ret); |
... | ... | @@ -735,7 +728,7 @@ |
735 | 728 | ((head + AIO_EVENTS_OFFSET) % AIO_EVENTS_PER_PAGE)); |
736 | 729 | |
737 | 730 | pos = head + AIO_EVENTS_OFFSET; |
738 | - page = info->ring_pages[pos / AIO_EVENTS_PER_PAGE]; | |
731 | + page = ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]; | |
739 | 732 | pos %= AIO_EVENTS_PER_PAGE; |
740 | 733 | |
741 | 734 | ev = kmap(page); |
742 | 735 | |
743 | 736 | |
744 | 737 | |
745 | 738 | |
... | ... | @@ -750,19 +743,19 @@ |
750 | 743 | |
751 | 744 | ret += avail; |
752 | 745 | head += avail; |
753 | - head %= info->nr; | |
746 | + head %= ctx->nr_events; | |
754 | 747 | } |
755 | 748 | |
756 | - ring = kmap_atomic(info->ring_pages[0]); | |
749 | + ring = kmap_atomic(ctx->ring_pages[0]); | |
757 | 750 | ring->head = head; |
758 | 751 | kunmap_atomic(ring); |
759 | - flush_dcache_page(info->ring_pages[0]); | |
752 | + flush_dcache_page(ctx->ring_pages[0]); | |
760 | 753 | |
761 | - pr_debug("%li h%u t%u\n", ret, head, info->tail); | |
754 | + pr_debug("%li h%u t%u\n", ret, head, ctx->tail); | |
762 | 755 | |
763 | 756 | atomic_sub(ret, &ctx->reqs_active); |
764 | 757 | out: |
765 | - mutex_unlock(&info->ring_lock); | |
758 | + mutex_unlock(&ctx->ring_lock); | |
766 | 759 | |
767 | 760 | return ret; |
768 | 761 | } |