Blame view
fs/aio.c
38.5 KB
1da177e4c Linux-2.6.12-rc2 |
1 2 3 4 5 6 7 8 9 10 |
/* * An async IO implementation for Linux * Written by Benjamin LaHaise <bcrl@kvack.org> * * Implements an efficient asynchronous io interface. * * Copyright 2000, 2001, 2002 Red Hat, Inc. All Rights Reserved. * * See ../COPYING for licensing terms. */ |
caf4167aa aio: dprintk() ->... |
11 |
#define pr_fmt(fmt) "%s: " fmt, __func__ |
1da177e4c Linux-2.6.12-rc2 |
12 13 14 15 16 |
#include <linux/kernel.h> #include <linux/init.h> #include <linux/errno.h> #include <linux/time.h> #include <linux/aio_abi.h> |
630d9c472 fs: reduce the us... |
17 |
#include <linux/export.h> |
1da177e4c Linux-2.6.12-rc2 |
18 |
#include <linux/syscalls.h> |
b9d128f10 block: move bdi/a... |
19 |
#include <linux/backing-dev.h> |
027445c37 [PATCH] Vectorize... |
20 |
#include <linux/uio.h> |
1da177e4c Linux-2.6.12-rc2 |
21 |
|
1da177e4c Linux-2.6.12-rc2 |
22 23 24 25 26 |
#include <linux/sched.h> #include <linux/fs.h> #include <linux/file.h> #include <linux/mm.h> #include <linux/mman.h> |
3d2d827f5 mm: move use_mm/u... |
27 |
#include <linux/mmu_context.h> |
e1bdd5f27 aio: percpu reqs_... |
28 |
#include <linux/percpu.h> |
1da177e4c Linux-2.6.12-rc2 |
29 30 31 32 33 34 |
#include <linux/slab.h> #include <linux/timer.h> #include <linux/aio.h> #include <linux/highmem.h> #include <linux/workqueue.h> #include <linux/security.h> |
9c3060bed signal/timer/even... |
35 |
#include <linux/eventfd.h> |
cfb1e33ee aio: implement re... |
36 |
#include <linux/blkdev.h> |
9d85cba71 aio: fix the comp... |
37 |
#include <linux/compat.h> |
36bc08cc0 fs/aio: Add suppo... |
38 39 |
#include <linux/migrate.h> #include <linux/ramfs.h> |
723be6e39 aio: percpu ioctx... |
40 |
#include <linux/percpu-refcount.h> |
71ad7490c rework aio migrat... |
41 |
#include <linux/mount.h> |
1da177e4c Linux-2.6.12-rc2 |
42 43 44 |
#include <asm/kmap_types.h> #include <asm/uaccess.h> |
1da177e4c Linux-2.6.12-rc2 |
45 |
|
68d70d03f constify rw_verif... |
46 |
#include "internal.h" |
4e179bca6 aio: move private... |
47 48 49 50 51 52 |
#define AIO_RING_MAGIC 0xa10a10a1 #define AIO_RING_COMPAT_FEATURES 1 #define AIO_RING_INCOMPAT_FEATURES 0 struct aio_ring { unsigned id; /* kernel internal index number */ unsigned nr; /* number of io_events */ |
fa8a53c39 aio: v4 ensure ac... |
53 54 |
unsigned head; /* Written to by userland or under ring_lock * mutex by aio_read_events_ring(). */ |
4e179bca6 aio: move private... |
55 56 57 58 59 60 61 62 63 64 65 66 |
unsigned tail; unsigned magic; unsigned compat_features; unsigned incompat_features; unsigned header_length; /* size of aio_ring */ struct io_event io_events[0]; }; /* 128 bytes + ring size */ #define AIO_RING_PAGES 8 |
4e179bca6 aio: move private... |
67 |
|
db446a08c aio: convert the ... |
68 69 70 71 72 |
struct kioctx_table { struct rcu_head rcu; unsigned nr; struct kioctx *table[]; }; |
e1bdd5f27 aio: percpu reqs_... |
73 74 75 |
struct kioctx_cpu { unsigned reqs_available; }; |
4e179bca6 aio: move private... |
76 |
struct kioctx { |
723be6e39 aio: percpu ioctx... |
77 |
struct percpu_ref users; |
36f558890 aio: refcounting ... |
78 |
atomic_t dead; |
4e179bca6 aio: move private... |
79 |
|
e34ecee2a aio: Fix a trinit... |
80 |
struct percpu_ref reqs; |
4e179bca6 aio: move private... |
81 |
unsigned long user_id; |
4e179bca6 aio: move private... |
82 |
|
e1bdd5f27 aio: percpu reqs_... |
83 84 85 86 87 88 89 |
struct __percpu kioctx_cpu *cpu; /* * For percpu reqs_available, number of slots we move to/from global * counter at a time: */ unsigned req_batch; |
3e845ce01 aio: change reqs_... |
90 91 92 93 |
/* * This is what userspace passed to io_setup(), it's not used for * anything but counting against the global max_reqs quota. * |
58c85dc20 aio: kill struct ... |
94 |
* The real limit is nr_events - 1, which will be larger (see |
3e845ce01 aio: change reqs_... |
95 96 |
* aio_setup_ring()) */ |
4e179bca6 aio: move private... |
97 |
unsigned max_reqs; |
58c85dc20 aio: kill struct ... |
98 99 |
/* Size of ringbuffer, in units of struct io_event */ unsigned nr_events; |
4e179bca6 aio: move private... |
100 |
|
58c85dc20 aio: kill struct ... |
101 102 103 104 105 |
unsigned long mmap_base; unsigned long mmap_size; struct page **ring_pages; long nr_pages; |
723be6e39 aio: percpu ioctx... |
106 |
struct work_struct free_work; |
4e23bcaeb aio: give shared ... |
107 108 |
struct { |
34e83fc61 aio: reqs_active ... |
109 110 111 112 113 |
/* * This counts the number of available slots in the ringbuffer, * so we avoid overflowing it: it's decremented (if positive) * when allocating a kiocb and incremented when the resulting * io_event is pulled off the ringbuffer. |
e1bdd5f27 aio: percpu reqs_... |
114 115 |
* * We batch accesses to it with a percpu version. |
34e83fc61 aio: reqs_active ... |
116 117 |
*/ atomic_t reqs_available; |
4e23bcaeb aio: give shared ... |
118 119 120 121 122 123 |
} ____cacheline_aligned_in_smp; struct { spinlock_t ctx_lock; struct list_head active_reqs; /* used for cancellation */ } ____cacheline_aligned_in_smp; |
58c85dc20 aio: kill struct ... |
124 125 |
struct { struct mutex ring_lock; |
4e23bcaeb aio: give shared ... |
126 127 |
wait_queue_head_t wait; } ____cacheline_aligned_in_smp; |
58c85dc20 aio: kill struct ... |
128 129 130 131 |
struct { unsigned tail; spinlock_t completion_lock; |
4e23bcaeb aio: give shared ... |
132 |
} ____cacheline_aligned_in_smp; |
58c85dc20 aio: kill struct ... |
133 134 |
struct page *internal_pages[AIO_RING_PAGES]; |
36bc08cc0 fs/aio: Add suppo... |
135 |
struct file *aio_ring_file; |
db446a08c aio: convert the ... |
136 137 |
unsigned id; |
4e179bca6 aio: move private... |
138 |
}; |
1da177e4c Linux-2.6.12-rc2 |
139 |
/*------ sysctl variables----*/ |
d55b5fdaf [PATCH] aio: remo... |
140 141 142 |
static DEFINE_SPINLOCK(aio_nr_lock); unsigned long aio_nr; /* current system wide number of aio requests */ unsigned long aio_max_nr = 0x10000; /* system wide maximum number of aio requests */ |
1da177e4c Linux-2.6.12-rc2 |
143 |
/*----end sysctl variables---*/ |
e18b890bb [PATCH] slab: rem... |
144 145 |
static struct kmem_cache *kiocb_cachep; static struct kmem_cache *kioctx_cachep; |
1da177e4c Linux-2.6.12-rc2 |
146 |
|
71ad7490c rework aio migrat... |
147 148 149 150 151 152 153 154 155 156 157 |
static struct vfsmount *aio_mnt; static const struct file_operations aio_ring_fops; static const struct address_space_operations aio_ctx_aops; static struct file *aio_private_file(struct kioctx *ctx, loff_t nr_pages) { struct qstr this = QSTR_INIT("[aio]", 5); struct file *file; struct path path; struct inode *inode = alloc_anon_inode(aio_mnt->mnt_sb); |
7f62656be aio: checking for... |
158 159 |
if (IS_ERR(inode)) return ERR_CAST(inode); |
71ad7490c rework aio migrat... |
160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 |
inode->i_mapping->a_ops = &aio_ctx_aops; inode->i_mapping->private_data = ctx; inode->i_size = PAGE_SIZE * nr_pages; path.dentry = d_alloc_pseudo(aio_mnt->mnt_sb, &this); if (!path.dentry) { iput(inode); return ERR_PTR(-ENOMEM); } path.mnt = mntget(aio_mnt); d_instantiate(path.dentry, inode); file = alloc_file(&path, FMODE_READ | FMODE_WRITE, &aio_ring_fops); if (IS_ERR(file)) { path_put(&path); return file; } file->f_flags = O_RDWR; file->private_data = ctx; return file; } static struct dentry *aio_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *data) { static const struct dentry_operations ops = { .d_dname = simple_dname, }; return mount_pseudo(fs_type, "aio:", NULL, &ops, 0xa10a10a1); } |
1da177e4c Linux-2.6.12-rc2 |
192 193 194 195 196 197 |
/* aio_setup * Creates the slab caches used by the aio routines, panic on * failure as this is done early during the boot sequence. */ static int __init aio_setup(void) { |
71ad7490c rework aio migrat... |
198 199 200 201 202 203 204 205 |
static struct file_system_type aio_fs = { .name = "aio", .mount = aio_mount, .kill_sb = kill_anon_super, }; aio_mnt = kern_mount(&aio_fs); if (IS_ERR(aio_mnt)) panic("Failed to create aio fs mount."); |
0a31bd5f2 KMEM_CACHE(): sim... |
206 207 |
kiocb_cachep = KMEM_CACHE(kiocb, SLAB_HWCACHE_ALIGN|SLAB_PANIC); kioctx_cachep = KMEM_CACHE(kioctx,SLAB_HWCACHE_ALIGN|SLAB_PANIC); |
1da177e4c Linux-2.6.12-rc2 |
208 |
|
caf4167aa aio: dprintk() ->... |
209 210 |
pr_debug("sizeof(struct page) = %zu ", sizeof(struct page)); |
1da177e4c Linux-2.6.12-rc2 |
211 212 213 |
return 0; } |
385773e04 aio.c: move EXPOR... |
214 |
__initcall(aio_setup); |
1da177e4c Linux-2.6.12-rc2 |
215 |
|
5e9ae2e5d aio: fix use-afte... |
216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 |
static void put_aio_ring_file(struct kioctx *ctx) { struct file *aio_ring_file = ctx->aio_ring_file; if (aio_ring_file) { truncate_setsize(aio_ring_file->f_inode, 0); /* Prevent further access to the kioctx from migratepages */ spin_lock(&aio_ring_file->f_inode->i_mapping->private_lock); aio_ring_file->f_inode->i_mapping->private_data = NULL; ctx->aio_ring_file = NULL; spin_unlock(&aio_ring_file->f_inode->i_mapping->private_lock); fput(aio_ring_file); } } |
1da177e4c Linux-2.6.12-rc2 |
231 232 |
static void aio_free_ring(struct kioctx *ctx) { |
36bc08cc0 fs/aio: Add suppo... |
233 |
int i; |
1da177e4c Linux-2.6.12-rc2 |
234 |
|
fa8a53c39 aio: v4 ensure ac... |
235 236 237 238 |
/* Disconnect the kiotx from the ring file. This prevents future * accesses to the kioctx from page migration. */ put_aio_ring_file(ctx); |
36bc08cc0 fs/aio: Add suppo... |
239 |
for (i = 0; i < ctx->nr_pages; i++) { |
8e321fefb aio/migratepages:... |
240 |
struct page *page; |
36bc08cc0 fs/aio: Add suppo... |
241 242 243 |
pr_debug("pid(%d) [%d] page->count=%d ", current->pid, i, page_count(ctx->ring_pages[i])); |
8e321fefb aio/migratepages:... |
244 245 246 247 248 |
page = ctx->ring_pages[i]; if (!page) continue; ctx->ring_pages[i] = NULL; put_page(page); |
36bc08cc0 fs/aio: Add suppo... |
249 |
} |
1da177e4c Linux-2.6.12-rc2 |
250 |
|
ddb8c45ba aio: nullify aio-... |
251 |
if (ctx->ring_pages && ctx->ring_pages != ctx->internal_pages) { |
58c85dc20 aio: kill struct ... |
252 |
kfree(ctx->ring_pages); |
ddb8c45ba aio: nullify aio-... |
253 254 |
ctx->ring_pages = NULL; } |
36bc08cc0 fs/aio: Add suppo... |
255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 |
} static int aio_ring_mmap(struct file *file, struct vm_area_struct *vma) { vma->vm_ops = &generic_file_vm_ops; return 0; } static const struct file_operations aio_ring_fops = { .mmap = aio_ring_mmap, }; static int aio_set_page_dirty(struct page *page) { return 0; } |
0c45355fc aio: fix build wh... |
271 |
#if IS_ENABLED(CONFIG_MIGRATION) |
36bc08cc0 fs/aio: Add suppo... |
272 273 274 |
static int aio_migratepage(struct address_space *mapping, struct page *new, struct page *old, enum migrate_mode mode) { |
5e9ae2e5d aio: fix use-afte... |
275 |
struct kioctx *ctx; |
36bc08cc0 fs/aio: Add suppo... |
276 |
unsigned long flags; |
fa8a53c39 aio: v4 ensure ac... |
277 |
pgoff_t idx; |
36bc08cc0 fs/aio: Add suppo... |
278 |
int rc; |
8e321fefb aio/migratepages:... |
279 |
rc = 0; |
fa8a53c39 aio: v4 ensure ac... |
280 |
/* mapping->private_lock here protects against the kioctx teardown. */ |
8e321fefb aio/migratepages:... |
281 282 |
spin_lock(&mapping->private_lock); ctx = mapping->private_data; |
fa8a53c39 aio: v4 ensure ac... |
283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 |
if (!ctx) { rc = -EINVAL; goto out; } /* The ring_lock mutex. The prevents aio_read_events() from writing * to the ring's head, and prevents page migration from mucking in * a partially initialized kiotx. */ if (!mutex_trylock(&ctx->ring_lock)) { rc = -EAGAIN; goto out; } idx = old->index; if (idx < (pgoff_t)ctx->nr_pages) { /* Make sure the old page hasn't already been changed */ if (ctx->ring_pages[idx] != old) rc = -EAGAIN; |
8e321fefb aio/migratepages:... |
302 303 |
} else rc = -EINVAL; |
8e321fefb aio/migratepages:... |
304 305 |
if (rc != 0) |
fa8a53c39 aio: v4 ensure ac... |
306 |
goto out_unlock; |
8e321fefb aio/migratepages:... |
307 |
|
36bc08cc0 fs/aio: Add suppo... |
308 309 |
/* Writeback must be complete */ BUG_ON(PageWriteback(old)); |
8e321fefb aio/migratepages:... |
310 |
get_page(new); |
36bc08cc0 fs/aio: Add suppo... |
311 |
|
8e321fefb aio/migratepages:... |
312 |
rc = migrate_page_move_mapping(mapping, new, old, NULL, mode, 1); |
36bc08cc0 fs/aio: Add suppo... |
313 |
if (rc != MIGRATEPAGE_SUCCESS) { |
8e321fefb aio/migratepages:... |
314 |
put_page(new); |
fa8a53c39 aio: v4 ensure ac... |
315 |
goto out_unlock; |
36bc08cc0 fs/aio: Add suppo... |
316 |
} |
fa8a53c39 aio: v4 ensure ac... |
317 318 319 |
/* Take completion_lock to prevent other writes to the ring buffer * while the old page is copied to the new. This prevents new * events from being lost. |
5e9ae2e5d aio: fix use-afte... |
320 |
*/ |
fa8a53c39 aio: v4 ensure ac... |
321 322 323 324 325 |
spin_lock_irqsave(&ctx->completion_lock, flags); migrate_page_copy(new, old); BUG_ON(ctx->ring_pages[idx] != old); ctx->ring_pages[idx] = new; spin_unlock_irqrestore(&ctx->completion_lock, flags); |
36bc08cc0 fs/aio: Add suppo... |
326 |
|
fa8a53c39 aio: v4 ensure ac... |
327 328 |
/* The old page is no longer accessible. */ put_page(old); |
8e321fefb aio/migratepages:... |
329 |
|
fa8a53c39 aio: v4 ensure ac... |
330 331 332 333 |
out_unlock: mutex_unlock(&ctx->ring_lock); out: spin_unlock(&mapping->private_lock); |
36bc08cc0 fs/aio: Add suppo... |
334 |
return rc; |
1da177e4c Linux-2.6.12-rc2 |
335 |
} |
0c45355fc aio: fix build wh... |
336 |
#endif |
1da177e4c Linux-2.6.12-rc2 |
337 |
|
36bc08cc0 fs/aio: Add suppo... |
338 339 |
static const struct address_space_operations aio_ctx_aops = { .set_page_dirty = aio_set_page_dirty, |
0c45355fc aio: fix build wh... |
340 |
#if IS_ENABLED(CONFIG_MIGRATION) |
36bc08cc0 fs/aio: Add suppo... |
341 |
.migratepage = aio_migratepage, |
0c45355fc aio: fix build wh... |
342 |
#endif |
36bc08cc0 fs/aio: Add suppo... |
343 |
}; |
1da177e4c Linux-2.6.12-rc2 |
344 345 346 |
static int aio_setup_ring(struct kioctx *ctx) { struct aio_ring *ring; |
1da177e4c Linux-2.6.12-rc2 |
347 |
unsigned nr_events = ctx->max_reqs; |
41003a7bc aio: remove retry... |
348 |
struct mm_struct *mm = current->mm; |
3dc9acb67 aio: clean up and... |
349 |
unsigned long size, unused; |
1da177e4c Linux-2.6.12-rc2 |
350 |
int nr_pages; |
36bc08cc0 fs/aio: Add suppo... |
351 352 |
int i; struct file *file; |
1da177e4c Linux-2.6.12-rc2 |
353 354 355 356 357 358 |
/* Compensate for the ring buffer's head/tail overlap entry */ nr_events += 2; /* 1 is required, 2 for good luck */ size = sizeof(struct aio_ring); size += sizeof(struct io_event) * nr_events; |
1da177e4c Linux-2.6.12-rc2 |
359 |
|
36bc08cc0 fs/aio: Add suppo... |
360 |
nr_pages = PFN_UP(size); |
1da177e4c Linux-2.6.12-rc2 |
361 362 |
if (nr_pages < 0) return -EINVAL; |
71ad7490c rework aio migrat... |
363 |
file = aio_private_file(ctx, nr_pages); |
36bc08cc0 fs/aio: Add suppo... |
364 365 |
if (IS_ERR(file)) { ctx->aio_ring_file = NULL; |
fa8a53c39 aio: v4 ensure ac... |
366 |
return -ENOMEM; |
36bc08cc0 fs/aio: Add suppo... |
367 |
} |
3dc9acb67 aio: clean up and... |
368 369 370 371 372 373 374 375 376 377 378 379 380 |
ctx->aio_ring_file = file; nr_events = (PAGE_SIZE * nr_pages - sizeof(struct aio_ring)) / sizeof(struct io_event); ctx->ring_pages = ctx->internal_pages; if (nr_pages > AIO_RING_PAGES) { ctx->ring_pages = kcalloc(nr_pages, sizeof(struct page *), GFP_KERNEL); if (!ctx->ring_pages) { put_aio_ring_file(ctx); return -ENOMEM; } } |
36bc08cc0 fs/aio: Add suppo... |
381 382 383 384 385 386 387 388 389 390 391 392 |
for (i = 0; i < nr_pages; i++) { struct page *page; page = find_or_create_page(file->f_inode->i_mapping, i, GFP_HIGHUSER | __GFP_ZERO); if (!page) break; pr_debug("pid(%d) page[%d]->count=%d ", current->pid, i, page_count(page)); SetPageUptodate(page); SetPageDirty(page); unlock_page(page); |
3dc9acb67 aio: clean up and... |
393 394 |
ctx->ring_pages[i] = page; |
36bc08cc0 fs/aio: Add suppo... |
395 |
} |
3dc9acb67 aio: clean up and... |
396 |
ctx->nr_pages = i; |
1da177e4c Linux-2.6.12-rc2 |
397 |
|
3dc9acb67 aio: clean up and... |
398 399 |
if (unlikely(i != nr_pages)) { aio_free_ring(ctx); |
fa8a53c39 aio: v4 ensure ac... |
400 |
return -ENOMEM; |
1da177e4c Linux-2.6.12-rc2 |
401 |
} |
58c85dc20 aio: kill struct ... |
402 403 404 |
ctx->mmap_size = nr_pages * PAGE_SIZE; pr_debug("attempting mmap of %lu bytes ", ctx->mmap_size); |
36bc08cc0 fs/aio: Add suppo... |
405 |
|
41003a7bc aio: remove retry... |
406 |
down_write(&mm->mmap_sem); |
36bc08cc0 fs/aio: Add suppo... |
407 408 |
ctx->mmap_base = do_mmap_pgoff(ctx->aio_ring_file, 0, ctx->mmap_size, PROT_READ | PROT_WRITE, |
3dc9acb67 aio: clean up and... |
409 410 |
MAP_SHARED, 0, &unused); up_write(&mm->mmap_sem); |
58c85dc20 aio: kill struct ... |
411 |
if (IS_ERR((void *)ctx->mmap_base)) { |
58c85dc20 aio: kill struct ... |
412 |
ctx->mmap_size = 0; |
1da177e4c Linux-2.6.12-rc2 |
413 |
aio_free_ring(ctx); |
fa8a53c39 aio: v4 ensure ac... |
414 |
return -ENOMEM; |
1da177e4c Linux-2.6.12-rc2 |
415 |
} |
58c85dc20 aio: kill struct ... |
416 417 |
pr_debug("mmap address: 0x%08lx ", ctx->mmap_base); |
d6c355c7d aio: fix race in ... |
418 |
|
58c85dc20 aio: kill struct ... |
419 420 |
ctx->user_id = ctx->mmap_base; ctx->nr_events = nr_events; /* trusted copy */ |
1da177e4c Linux-2.6.12-rc2 |
421 |
|
58c85dc20 aio: kill struct ... |
422 |
ring = kmap_atomic(ctx->ring_pages[0]); |
1da177e4c Linux-2.6.12-rc2 |
423 |
ring->nr = nr_events; /* user copy */ |
db446a08c aio: convert the ... |
424 |
ring->id = ~0U; |
1da177e4c Linux-2.6.12-rc2 |
425 426 427 428 429 |
ring->head = ring->tail = 0; ring->magic = AIO_RING_MAGIC; ring->compat_features = AIO_RING_COMPAT_FEATURES; ring->incompat_features = AIO_RING_INCOMPAT_FEATURES; ring->header_length = sizeof(struct aio_ring); |
e8e3c3d66 fs: remove the se... |
430 |
kunmap_atomic(ring); |
58c85dc20 aio: kill struct ... |
431 |
flush_dcache_page(ctx->ring_pages[0]); |
1da177e4c Linux-2.6.12-rc2 |
432 433 434 |
return 0; } |
1da177e4c Linux-2.6.12-rc2 |
435 436 437 |
#define AIO_EVENTS_PER_PAGE (PAGE_SIZE / sizeof(struct io_event)) #define AIO_EVENTS_FIRST_PAGE ((PAGE_SIZE - sizeof(struct aio_ring)) / sizeof(struct io_event)) #define AIO_EVENTS_OFFSET (AIO_EVENTS_PER_PAGE - AIO_EVENTS_FIRST_PAGE) |
0460fef2a aio: use cancella... |
438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 |
void kiocb_set_cancel_fn(struct kiocb *req, kiocb_cancel_fn *cancel) { struct kioctx *ctx = req->ki_ctx; unsigned long flags; spin_lock_irqsave(&ctx->ctx_lock, flags); if (!req->ki_list.next) list_add(&req->ki_list, &ctx->active_reqs); req->ki_cancel = cancel; spin_unlock_irqrestore(&ctx->ctx_lock, flags); } EXPORT_SYMBOL(kiocb_set_cancel_fn); |
bec68faaf aio: io_cancel() ... |
453 |
static int kiocb_cancel(struct kioctx *ctx, struct kiocb *kiocb) |
906b973cf aio: add kiocb_ca... |
454 |
{ |
0460fef2a aio: use cancella... |
455 |
kiocb_cancel_fn *old, *cancel; |
906b973cf aio: add kiocb_ca... |
456 |
|
0460fef2a aio: use cancella... |
457 458 459 460 461 462 463 464 |
/* * Don't want to set kiocb->ki_cancel = KIOCB_CANCELLED unless it * actually has a cancel function, hence the cmpxchg() */ cancel = ACCESS_ONCE(kiocb->ki_cancel); do { if (!cancel || cancel == KIOCB_CANCELLED) |
57282d8fd aio: Kill ki_users |
465 |
return -EINVAL; |
906b973cf aio: add kiocb_ca... |
466 |
|
0460fef2a aio: use cancella... |
467 468 469 |
old = cancel; cancel = cmpxchg(&kiocb->ki_cancel, old, KIOCB_CANCELLED); } while (cancel != old); |
906b973cf aio: add kiocb_ca... |
470 |
|
57282d8fd aio: Kill ki_users |
471 |
return cancel(kiocb); |
906b973cf aio: add kiocb_ca... |
472 |
} |
e34ecee2a aio: Fix a trinit... |
473 |
static void free_ioctx(struct work_struct *work) |
36f558890 aio: refcounting ... |
474 |
{ |
e34ecee2a aio: Fix a trinit... |
475 |
struct kioctx *ctx = container_of(work, struct kioctx, free_work); |
e1bdd5f27 aio: percpu reqs_... |
476 |
|
e34ecee2a aio: Fix a trinit... |
477 478 |
pr_debug("freeing %p ", ctx); |
e1bdd5f27 aio: percpu reqs_... |
479 |
|
e34ecee2a aio: Fix a trinit... |
480 |
aio_free_ring(ctx); |
e1bdd5f27 aio: percpu reqs_... |
481 |
free_percpu(ctx->cpu); |
36f558890 aio: refcounting ... |
482 483 |
kmem_cache_free(kioctx_cachep, ctx); } |
e34ecee2a aio: Fix a trinit... |
484 485 486 487 488 489 490 |
static void free_ioctx_reqs(struct percpu_ref *ref) { struct kioctx *ctx = container_of(ref, struct kioctx, reqs); INIT_WORK(&ctx->free_work, free_ioctx); schedule_work(&ctx->free_work); } |
36f558890 aio: refcounting ... |
491 492 493 494 495 |
/* * When this function runs, the kioctx has been removed from the "hash table" * and ctx->users has dropped to 0, so we know no more kiocbs can be submitted - * now it's safe to cancel any that need to be. */ |
e34ecee2a aio: Fix a trinit... |
496 |
static void free_ioctx_users(struct percpu_ref *ref) |
36f558890 aio: refcounting ... |
497 |
{ |
e34ecee2a aio: Fix a trinit... |
498 |
struct kioctx *ctx = container_of(ref, struct kioctx, users); |
36f558890 aio: refcounting ... |
499 500 501 502 503 504 505 506 507 |
struct kiocb *req; spin_lock_irq(&ctx->ctx_lock); while (!list_empty(&ctx->active_reqs)) { req = list_first_entry(&ctx->active_reqs, struct kiocb, ki_list); list_del_init(&req->ki_list); |
bec68faaf aio: io_cancel() ... |
508 |
kiocb_cancel(ctx, req); |
36f558890 aio: refcounting ... |
509 510 511 |
} spin_unlock_irq(&ctx->ctx_lock); |
e34ecee2a aio: Fix a trinit... |
512 513 |
percpu_ref_kill(&ctx->reqs); percpu_ref_put(&ctx->reqs); |
36f558890 aio: refcounting ... |
514 |
} |
db446a08c aio: convert the ... |
515 516 517 518 519 520 521 |
static int ioctx_add_table(struct kioctx *ctx, struct mm_struct *mm) { unsigned i, new_nr; struct kioctx_table *table, *old; struct aio_ring *ring; spin_lock(&mm->ioctx_lock); |
d9b2c8714 aio: rcu_read_loc... |
522 |
rcu_read_lock(); |
77d30b14d aio: fix rcu spar... |
523 |
table = rcu_dereference(mm->ioctx_table); |
db446a08c aio: convert the ... |
524 525 526 527 528 529 530 |
while (1) { if (table) for (i = 0; i < table->nr; i++) if (!table->table[i]) { ctx->id = i; table->table[i] = ctx; |
d9b2c8714 aio: rcu_read_loc... |
531 |
rcu_read_unlock(); |
db446a08c aio: convert the ... |
532 |
spin_unlock(&mm->ioctx_lock); |
fa8a53c39 aio: v4 ensure ac... |
533 534 535 536 |
/* While kioctx setup is in progress, * we are protected from page migration * changes ring_pages by ->ring_lock. */ |
db446a08c aio: convert the ... |
537 538 539 540 541 542 543 |
ring = kmap_atomic(ctx->ring_pages[0]); ring->id = ctx->id; kunmap_atomic(ring); return 0; } new_nr = (table ? table->nr : 1) * 4; |
d9b2c8714 aio: rcu_read_loc... |
544 |
rcu_read_unlock(); |
db446a08c aio: convert the ... |
545 546 547 548 549 550 551 552 553 554 |
spin_unlock(&mm->ioctx_lock); table = kzalloc(sizeof(*table) + sizeof(struct kioctx *) * new_nr, GFP_KERNEL); if (!table) return -ENOMEM; table->nr = new_nr; spin_lock(&mm->ioctx_lock); |
d9b2c8714 aio: rcu_read_loc... |
555 |
rcu_read_lock(); |
77d30b14d aio: fix rcu spar... |
556 |
old = rcu_dereference(mm->ioctx_table); |
db446a08c aio: convert the ... |
557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 |
if (!old) { rcu_assign_pointer(mm->ioctx_table, table); } else if (table->nr > old->nr) { memcpy(table->table, old->table, old->nr * sizeof(struct kioctx *)); rcu_assign_pointer(mm->ioctx_table, table); kfree_rcu(old, rcu); } else { kfree(table); table = old; } } } |
e34ecee2a aio: Fix a trinit... |
572 573 574 575 576 577 578 579 580 |
static void aio_nr_sub(unsigned nr) { spin_lock(&aio_nr_lock); if (WARN_ON(aio_nr - nr > aio_nr)) aio_nr = 0; else aio_nr -= nr; spin_unlock(&aio_nr_lock); } |
1da177e4c Linux-2.6.12-rc2 |
581 582 583 584 585 |
/* ioctx_alloc * Allocates and initializes an ioctx. Returns an ERR_PTR if it failed. */ static struct kioctx *ioctx_alloc(unsigned nr_events) { |
41003a7bc aio: remove retry... |
586 |
struct mm_struct *mm = current->mm; |
1da177e4c Linux-2.6.12-rc2 |
587 |
struct kioctx *ctx; |
e23754f88 aio: don't bother... |
588 |
int err = -ENOMEM; |
1da177e4c Linux-2.6.12-rc2 |
589 |
|
e1bdd5f27 aio: percpu reqs_... |
590 591 592 593 594 595 596 597 598 599 600 |
/* * We keep track of the number of available ringbuffer slots, to prevent * overflow (reqs_available), and we also use percpu counters for this. * * So since up to half the slots might be on other cpu's percpu counters * and unavailable, double nr_events so userspace sees what they * expected: additionally, we move req_batch slots to/from percpu * counters at a time, so make sure that isn't 0: */ nr_events = max(nr_events, num_possible_cpus() * 4); nr_events *= 2; |
1da177e4c Linux-2.6.12-rc2 |
601 602 603 604 605 606 607 |
/* Prevent overflows */ if ((nr_events > (0x10000000U / sizeof(struct io_event))) || (nr_events > (0x10000000U / sizeof(struct kiocb)))) { pr_debug("ENOMEM: nr_events too high "); return ERR_PTR(-EINVAL); } |
4cd81c3df aio: double aio_m... |
608 |
if (!nr_events || (unsigned long)nr_events > (aio_max_nr * 2UL)) |
1da177e4c Linux-2.6.12-rc2 |
609 |
return ERR_PTR(-EAGAIN); |
c37622296 [PATCH] Transform... |
610 |
ctx = kmem_cache_zalloc(kioctx_cachep, GFP_KERNEL); |
1da177e4c Linux-2.6.12-rc2 |
611 612 |
if (!ctx) return ERR_PTR(-ENOMEM); |
1da177e4c Linux-2.6.12-rc2 |
613 |
ctx->max_reqs = nr_events; |
1da177e4c Linux-2.6.12-rc2 |
614 |
|
1da177e4c Linux-2.6.12-rc2 |
615 |
spin_lock_init(&ctx->ctx_lock); |
0460fef2a aio: use cancella... |
616 |
spin_lock_init(&ctx->completion_lock); |
58c85dc20 aio: kill struct ... |
617 |
mutex_init(&ctx->ring_lock); |
fa8a53c39 aio: v4 ensure ac... |
618 619 620 |
/* Protect against page migration throughout kiotx setup by keeping * the ring_lock mutex held until setup is complete. */ mutex_lock(&ctx->ring_lock); |
1da177e4c Linux-2.6.12-rc2 |
621 622 623 |
init_waitqueue_head(&ctx->wait); INIT_LIST_HEAD(&ctx->active_reqs); |
1da177e4c Linux-2.6.12-rc2 |
624 |
|
fa8a53c39 aio: v4 ensure ac... |
625 626 627 628 629 |
if (percpu_ref_init(&ctx->users, free_ioctx_users)) goto err; if (percpu_ref_init(&ctx->reqs, free_ioctx_reqs)) goto err; |
e1bdd5f27 aio: percpu reqs_... |
630 631 |
ctx->cpu = alloc_percpu(struct kioctx_cpu); if (!ctx->cpu) |
e34ecee2a aio: Fix a trinit... |
632 |
goto err; |
1da177e4c Linux-2.6.12-rc2 |
633 |
|
fa8a53c39 aio: v4 ensure ac... |
634 635 |
err = aio_setup_ring(ctx); if (err < 0) |
e34ecee2a aio: Fix a trinit... |
636 |
goto err; |
e1bdd5f27 aio: percpu reqs_... |
637 |
|
34e83fc61 aio: reqs_active ... |
638 |
atomic_set(&ctx->reqs_available, ctx->nr_events - 1); |
e1bdd5f27 aio: percpu reqs_... |
639 |
ctx->req_batch = (ctx->nr_events - 1) / (num_possible_cpus() * 4); |
6878ea72a aio: be defensive... |
640 641 |
if (ctx->req_batch < 1) ctx->req_batch = 1; |
34e83fc61 aio: reqs_active ... |
642 |
|
1da177e4c Linux-2.6.12-rc2 |
643 |
/* limit the number of system wide aios */ |
9fa1cb397 aio: aio_nr_lock ... |
644 |
spin_lock(&aio_nr_lock); |
4cd81c3df aio: double aio_m... |
645 |
if (aio_nr + nr_events > (aio_max_nr * 2UL) || |
2dd542b7a aio: aio_nr decre... |
646 |
aio_nr + nr_events < aio_nr) { |
9fa1cb397 aio: aio_nr_lock ... |
647 |
spin_unlock(&aio_nr_lock); |
e34ecee2a aio: Fix a trinit... |
648 |
err = -EAGAIN; |
d1b943271 aio: clean up aio... |
649 |
goto err_ctx; |
2dd542b7a aio: aio_nr decre... |
650 651 |
} aio_nr += ctx->max_reqs; |
9fa1cb397 aio: aio_nr_lock ... |
652 |
spin_unlock(&aio_nr_lock); |
1da177e4c Linux-2.6.12-rc2 |
653 |
|
1881686f8 aio: fix kioctx l... |
654 655 |
percpu_ref_get(&ctx->users); /* io_setup() will drop this ref */ percpu_ref_get(&ctx->reqs); /* free_ioctx_users() will drop this */ |
723be6e39 aio: percpu ioctx... |
656 |
|
da90382c2 aio: fix error ha... |
657 658 |
err = ioctx_add_table(ctx, mm); if (err) |
e34ecee2a aio: Fix a trinit... |
659 |
goto err_cleanup; |
da90382c2 aio: fix error ha... |
660 |
|
fa8a53c39 aio: v4 ensure ac... |
661 662 |
/* Release the ring_lock mutex now that all setup is complete. */ mutex_unlock(&ctx->ring_lock); |
caf4167aa aio: dprintk() ->... |
663 664 |
pr_debug("allocated ioctx %p[%ld]: mm=%p mask=0x%x ", |
58c85dc20 aio: kill struct ... |
665 |
ctx, ctx->user_id, mm, ctx->nr_events); |
1da177e4c Linux-2.6.12-rc2 |
666 |
return ctx; |
e34ecee2a aio: Fix a trinit... |
667 668 |
err_cleanup: aio_nr_sub(ctx->max_reqs); |
d1b943271 aio: clean up aio... |
669 670 |
err_ctx: aio_free_ring(ctx); |
e34ecee2a aio: Fix a trinit... |
671 |
err: |
fa8a53c39 aio: v4 ensure ac... |
672 |
mutex_unlock(&ctx->ring_lock); |
e1bdd5f27 aio: percpu reqs_... |
673 |
free_percpu(ctx->cpu); |
e34ecee2a aio: Fix a trinit... |
674 |
free_percpu(ctx->reqs.pcpu_count); |
723be6e39 aio: percpu ioctx... |
675 |
free_percpu(ctx->users.pcpu_count); |
1da177e4c Linux-2.6.12-rc2 |
676 |
kmem_cache_free(kioctx_cachep, ctx); |
caf4167aa aio: dprintk() ->... |
677 678 |
pr_debug("error allocating ioctx %d ", err); |
e23754f88 aio: don't bother... |
679 |
return ERR_PTR(err); |
1da177e4c Linux-2.6.12-rc2 |
680 |
} |
36f558890 aio: refcounting ... |
681 682 683 684 685 |
/* kill_ioctx * Cancels all outstanding aio requests on an aio context. Used * when the processes owning a context have all exited to encourage * the rapid destruction of the kioctx. */ |
db446a08c aio: convert the ... |
686 |
static void kill_ioctx(struct mm_struct *mm, struct kioctx *ctx) |
36f558890 aio: refcounting ... |
687 688 |
{ if (!atomic_xchg(&ctx->dead, 1)) { |
db446a08c aio: convert the ... |
689 690 691 |
struct kioctx_table *table; spin_lock(&mm->ioctx_lock); |
d9b2c8714 aio: rcu_read_loc... |
692 |
rcu_read_lock(); |
77d30b14d aio: fix rcu spar... |
693 |
table = rcu_dereference(mm->ioctx_table); |
db446a08c aio: convert the ... |
694 695 696 |
WARN_ON(ctx != table->table[ctx->id]); table->table[ctx->id] = NULL; |
d9b2c8714 aio: rcu_read_loc... |
697 |
rcu_read_unlock(); |
db446a08c aio: convert the ... |
698 |
spin_unlock(&mm->ioctx_lock); |
723be6e39 aio: percpu ioctx... |
699 700 |
/* percpu_ref_kill() will do the necessary call_rcu() */ wake_up_all(&ctx->wait); |
dee11c236 [PATCH] aio: fix ... |
701 |
|
36f558890 aio: refcounting ... |
702 |
/* |
4fcc712f5 aio: fix io_destr... |
703 704 705 706 707 |
* It'd be more correct to do this in free_ioctx(), after all * the outstanding kiocbs have finished - but by then io_destroy * has already returned, so io_setup() could potentially return * -EAGAIN with no ioctxs actually in use (as far as userspace * could tell). |
36f558890 aio: refcounting ... |
708 |
*/ |
e34ecee2a aio: Fix a trinit... |
709 |
aio_nr_sub(ctx->max_reqs); |
4fcc712f5 aio: fix io_destr... |
710 711 712 |
if (ctx->mmap_size) vm_munmap(ctx->mmap_base, ctx->mmap_size); |
723be6e39 aio: percpu ioctx... |
713 |
percpu_ref_kill(&ctx->users); |
36f558890 aio: refcounting ... |
714 |
} |
1da177e4c Linux-2.6.12-rc2 |
715 716 717 718 719 |
} /* wait_on_sync_kiocb: * Waits on the given sync kiocb to complete. */ |
57282d8fd aio: Kill ki_users |
720 |
ssize_t wait_on_sync_kiocb(struct kiocb *req) |
1da177e4c Linux-2.6.12-rc2 |
721 |
{ |
57282d8fd aio: Kill ki_users |
722 |
while (!req->ki_ctx) { |
1da177e4c Linux-2.6.12-rc2 |
723 |
set_current_state(TASK_UNINTERRUPTIBLE); |
57282d8fd aio: Kill ki_users |
724 |
if (req->ki_ctx) |
1da177e4c Linux-2.6.12-rc2 |
725 |
break; |
41d10da37 aio: account I/O ... |
726 |
io_schedule(); |
1da177e4c Linux-2.6.12-rc2 |
727 728 |
} __set_current_state(TASK_RUNNING); |
57282d8fd aio: Kill ki_users |
729 |
return req->ki_user_data; |
1da177e4c Linux-2.6.12-rc2 |
730 |
} |
385773e04 aio.c: move EXPOR... |
731 |
EXPORT_SYMBOL(wait_on_sync_kiocb); |
1da177e4c Linux-2.6.12-rc2 |
732 |
|
36f558890 aio: refcounting ... |
733 734 735 736 737 738 739 |
/* * exit_aio: called when the last user of mm goes away. At this point, there is * no way for any new requests to be submited or any of the io_* syscalls to be * called on the context. * * There may be outstanding kiocbs, but free_ioctx() will explicitly wait on * them. |
1da177e4c Linux-2.6.12-rc2 |
740 |
*/ |
fc9b52cd8 fs: remove fastca... |
741 |
void exit_aio(struct mm_struct *mm) |
1da177e4c Linux-2.6.12-rc2 |
742 |
{ |
db446a08c aio: convert the ... |
743 |
struct kioctx_table *table; |
abf137dd7 aio: make the loo... |
744 |
struct kioctx *ctx; |
db446a08c aio: convert the ... |
745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 |
unsigned i = 0; while (1) { rcu_read_lock(); table = rcu_dereference(mm->ioctx_table); do { if (!table || i >= table->nr) { rcu_read_unlock(); rcu_assign_pointer(mm->ioctx_table, NULL); if (table) kfree(table); return; } ctx = table->table[i++]; } while (!ctx); rcu_read_unlock(); |
abf137dd7 aio: make the loo... |
764 |
|
936af1576 aio: don't bother... |
765 766 767 768 769 770 771 |
/* * We don't need to bother with munmap() here - * exit_mmap(mm) is coming and it'll unmap everything. * Since aio_free_ring() uses non-zero ->mmap_size * as indicator that it needs to unmap the area, * just set it to 0; aio_free_ring() is the only * place that uses ->mmap_size, so it's safe. |
936af1576 aio: don't bother... |
772 |
*/ |
58c85dc20 aio: kill struct ... |
773 |
ctx->mmap_size = 0; |
36f558890 aio: refcounting ... |
774 |
|
db446a08c aio: convert the ... |
775 |
kill_ioctx(mm, ctx); |
1da177e4c Linux-2.6.12-rc2 |
776 777 |
} } |
e1bdd5f27 aio: percpu reqs_... |
778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 |
static void put_reqs_available(struct kioctx *ctx, unsigned nr) { struct kioctx_cpu *kcpu; preempt_disable(); kcpu = this_cpu_ptr(ctx->cpu); kcpu->reqs_available += nr; while (kcpu->reqs_available >= ctx->req_batch * 2) { kcpu->reqs_available -= ctx->req_batch; atomic_add(ctx->req_batch, &ctx->reqs_available); } preempt_enable(); } static bool get_reqs_available(struct kioctx *ctx) { struct kioctx_cpu *kcpu; bool ret = false; preempt_disable(); kcpu = this_cpu_ptr(ctx->cpu); if (!kcpu->reqs_available) { int old, avail = atomic_read(&ctx->reqs_available); do { if (avail < ctx->req_batch) goto out; old = avail; avail = atomic_cmpxchg(&ctx->reqs_available, avail, avail - ctx->req_batch); } while (avail != old); kcpu->reqs_available += ctx->req_batch; } ret = true; kcpu->reqs_available--; out: preempt_enable(); return ret; } |
1da177e4c Linux-2.6.12-rc2 |
823 |
/* aio_get_req |
57282d8fd aio: Kill ki_users |
824 825 |
* Allocate a slot for an aio request. * Returns NULL if no requests are free. |
1da177e4c Linux-2.6.12-rc2 |
826 |
*/ |
a1c8eae75 aio: kill batch a... |
827 |
static inline struct kiocb *aio_get_req(struct kioctx *ctx) |
1da177e4c Linux-2.6.12-rc2 |
828 |
{ |
a1c8eae75 aio: kill batch a... |
829 |
struct kiocb *req; |
e1bdd5f27 aio: percpu reqs_... |
830 |
if (!get_reqs_available(ctx)) |
a1c8eae75 aio: kill batch a... |
831 |
return NULL; |
0460fef2a aio: use cancella... |
832 |
req = kmem_cache_alloc(kiocb_cachep, GFP_KERNEL|__GFP_ZERO); |
1da177e4c Linux-2.6.12-rc2 |
833 |
if (unlikely(!req)) |
a1c8eae75 aio: kill batch a... |
834 |
goto out_put; |
1da177e4c Linux-2.6.12-rc2 |
835 |
|
e34ecee2a aio: Fix a trinit... |
836 |
percpu_ref_get(&ctx->reqs); |
1da177e4c Linux-2.6.12-rc2 |
837 |
req->ki_ctx = ctx; |
080d676de aio: allocate kio... |
838 |
return req; |
a1c8eae75 aio: kill batch a... |
839 |
out_put: |
e1bdd5f27 aio: percpu reqs_... |
840 |
put_reqs_available(ctx, 1); |
a1c8eae75 aio: kill batch a... |
841 |
return NULL; |
1da177e4c Linux-2.6.12-rc2 |
842 |
} |
11599ebac aio: make aio_put... |
843 |
static void kiocb_free(struct kiocb *req) |
1da177e4c Linux-2.6.12-rc2 |
844 |
{ |
1d98ebfcc aio: do fget() af... |
845 846 |
if (req->ki_filp) fput(req->ki_filp); |
133890103 eventfd: revised ... |
847 848 |
if (req->ki_eventfd != NULL) eventfd_ctx_put(req->ki_eventfd); |
1da177e4c Linux-2.6.12-rc2 |
849 |
kmem_cache_free(kiocb_cachep, req); |
1da177e4c Linux-2.6.12-rc2 |
850 |
} |
d5470b596 fs/aio.c: make 3 ... |
851 |
static struct kioctx *lookup_ioctx(unsigned long ctx_id) |
1da177e4c Linux-2.6.12-rc2 |
852 |
{ |
db446a08c aio: convert the ... |
853 |
struct aio_ring __user *ring = (void __user *)ctx_id; |
abf137dd7 aio: make the loo... |
854 |
struct mm_struct *mm = current->mm; |
65c24491b aio: lookup_ioctx... |
855 |
struct kioctx *ctx, *ret = NULL; |
db446a08c aio: convert the ... |
856 857 858 859 860 |
struct kioctx_table *table; unsigned id; if (get_user(id, &ring->id)) return NULL; |
1da177e4c Linux-2.6.12-rc2 |
861 |
|
abf137dd7 aio: make the loo... |
862 |
rcu_read_lock(); |
db446a08c aio: convert the ... |
863 |
table = rcu_dereference(mm->ioctx_table); |
abf137dd7 aio: make the loo... |
864 |
|
db446a08c aio: convert the ... |
865 866 |
if (!table || id >= table->nr) goto out; |
1da177e4c Linux-2.6.12-rc2 |
867 |
|
db446a08c aio: convert the ... |
868 |
ctx = table->table[id]; |
f30d704fe aio: table lookup... |
869 |
if (ctx && ctx->user_id == ctx_id) { |
db446a08c aio: convert the ... |
870 871 872 873 |
percpu_ref_get(&ctx->users); ret = ctx; } out: |
abf137dd7 aio: make the loo... |
874 |
rcu_read_unlock(); |
65c24491b aio: lookup_ioctx... |
875 |
return ret; |
1da177e4c Linux-2.6.12-rc2 |
876 |
} |
1da177e4c Linux-2.6.12-rc2 |
877 878 |
/* aio_complete * Called when the io request on the given iocb is complete. |
1da177e4c Linux-2.6.12-rc2 |
879 |
*/ |
2d68449e8 aio: kill return ... |
880 |
void aio_complete(struct kiocb *iocb, long res, long res2) |
1da177e4c Linux-2.6.12-rc2 |
881 882 |
{ struct kioctx *ctx = iocb->ki_ctx; |
1da177e4c Linux-2.6.12-rc2 |
883 |
struct aio_ring *ring; |
21b40200c aio: use flush_dc... |
884 |
struct io_event *ev_page, *event; |
1da177e4c Linux-2.6.12-rc2 |
885 |
unsigned long flags; |
21b40200c aio: use flush_dc... |
886 |
unsigned tail, pos; |
1da177e4c Linux-2.6.12-rc2 |
887 |
|
20dcae324 [PATCH] aio: remo... |
888 889 890 891 892 893 |
/* * Special case handling for sync iocbs: * - events go directly into the iocb for fast handling * - the sync task with the iocb in its stack holds the single iocb * ref, no other paths have a way to get another ref * - the sync task helpfully left a reference to itself in the iocb |
1da177e4c Linux-2.6.12-rc2 |
894 895 |
*/ if (is_sync_kiocb(iocb)) { |
1da177e4c Linux-2.6.12-rc2 |
896 |
iocb->ki_user_data = res; |
57282d8fd aio: Kill ki_users |
897 898 |
smp_wmb(); iocb->ki_ctx = ERR_PTR(-EXDEV); |
1da177e4c Linux-2.6.12-rc2 |
899 |
wake_up_process(iocb->ki_obj.tsk); |
2d68449e8 aio: kill return ... |
900 |
return; |
1da177e4c Linux-2.6.12-rc2 |
901 |
} |
0460fef2a aio: use cancella... |
902 903 904 905 906 907 908 |
if (iocb->ki_list.next) { unsigned long flags; spin_lock_irqsave(&ctx->ctx_lock, flags); list_del(&iocb->ki_list); spin_unlock_irqrestore(&ctx->ctx_lock, flags); } |
11599ebac aio: make aio_put... |
909 |
|
1da177e4c Linux-2.6.12-rc2 |
910 |
/* |
0460fef2a aio: use cancella... |
911 |
* Add a completion event to the ring buffer. Must be done holding |
4b30f07e7 aio: fix wrong co... |
912 |
* ctx->completion_lock to prevent other code from messing with the tail |
0460fef2a aio: use cancella... |
913 914 915 |
* pointer since we might be called from irq context. */ spin_lock_irqsave(&ctx->completion_lock, flags); |
58c85dc20 aio: kill struct ... |
916 |
tail = ctx->tail; |
21b40200c aio: use flush_dc... |
917 |
pos = tail + AIO_EVENTS_OFFSET; |
58c85dc20 aio: kill struct ... |
918 |
if (++tail >= ctx->nr_events) |
4bf69b2a0 [PATCH] aio: ring... |
919 |
tail = 0; |
1da177e4c Linux-2.6.12-rc2 |
920 |
|
58c85dc20 aio: kill struct ... |
921 |
ev_page = kmap_atomic(ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]); |
21b40200c aio: use flush_dc... |
922 |
event = ev_page + pos % AIO_EVENTS_PER_PAGE; |
1da177e4c Linux-2.6.12-rc2 |
923 924 925 926 |
event->obj = (u64)(unsigned long)iocb->ki_obj.user; event->data = iocb->ki_user_data; event->res = res; event->res2 = res2; |
21b40200c aio: use flush_dc... |
927 |
kunmap_atomic(ev_page); |
58c85dc20 aio: kill struct ... |
928 |
flush_dcache_page(ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]); |
21b40200c aio: use flush_dc... |
929 930 931 |
pr_debug("%p[%u]: %p: %p %Lx %lx %lx ", |
caf4167aa aio: dprintk() ->... |
932 933 |
ctx, tail, iocb, iocb->ki_obj.user, iocb->ki_user_data, res, res2); |
1da177e4c Linux-2.6.12-rc2 |
934 935 936 937 938 |
/* after flagging the request as done, we * must never even look at it again */ smp_wmb(); /* make event visible before updating tail */ |
58c85dc20 aio: kill struct ... |
939 |
ctx->tail = tail; |
1da177e4c Linux-2.6.12-rc2 |
940 |
|
58c85dc20 aio: kill struct ... |
941 |
ring = kmap_atomic(ctx->ring_pages[0]); |
21b40200c aio: use flush_dc... |
942 |
ring->tail = tail; |
e8e3c3d66 fs: remove the se... |
943 |
kunmap_atomic(ring); |
58c85dc20 aio: kill struct ... |
944 |
flush_dcache_page(ctx->ring_pages[0]); |
1da177e4c Linux-2.6.12-rc2 |
945 |
|
0460fef2a aio: use cancella... |
946 |
spin_unlock_irqrestore(&ctx->completion_lock, flags); |
21b40200c aio: use flush_dc... |
947 948 |
pr_debug("added to ring %p at [%u] ", iocb, tail); |
8d1c98b0b eventfd/kaio inte... |
949 950 951 952 953 954 |
/* * Check if the user asked us to deliver the result through an * eventfd. The eventfd_signal() function is safe to be called * from IRQ context. */ |
87c3a86e1 eventfd: remove f... |
955 |
if (iocb->ki_eventfd != NULL) |
8d1c98b0b eventfd/kaio inte... |
956 |
eventfd_signal(iocb->ki_eventfd, 1); |
1da177e4c Linux-2.6.12-rc2 |
957 |
/* everything turned out well, dispose of the aiocb. */ |
57282d8fd aio: Kill ki_users |
958 |
kiocb_free(iocb); |
1da177e4c Linux-2.6.12-rc2 |
959 |
|
6cb2a2104 aio: bad AIO race... |
960 961 962 963 964 965 966 |
/* * We have to order our ring_info tail store above and test * of the wait list below outside the wait lock. This is * like in wake_up_bit() where clearing a bit has to be * ordered with the unlocked test. */ smp_mb(); |
1da177e4c Linux-2.6.12-rc2 |
967 968 |
if (waitqueue_active(&ctx->wait)) wake_up(&ctx->wait); |
e34ecee2a aio: Fix a trinit... |
969 |
percpu_ref_put(&ctx->reqs); |
1da177e4c Linux-2.6.12-rc2 |
970 |
} |
385773e04 aio.c: move EXPOR... |
971 |
EXPORT_SYMBOL(aio_complete); |
1da177e4c Linux-2.6.12-rc2 |
972 |
|
a31ad380b aio: make aio_rea... |
973 974 975 |
/* aio_read_events * Pull an event off of the ioctx's event ring. Returns the number of * events fetched |
1da177e4c Linux-2.6.12-rc2 |
976 |
*/ |
a31ad380b aio: make aio_rea... |
977 978 |
static long aio_read_events_ring(struct kioctx *ctx, struct io_event __user *event, long nr) |
1da177e4c Linux-2.6.12-rc2 |
979 |
{ |
1da177e4c Linux-2.6.12-rc2 |
980 |
struct aio_ring *ring; |
5ffac122d aio: Don't use ct... |
981 |
unsigned head, tail, pos; |
a31ad380b aio: make aio_rea... |
982 983 |
long ret = 0; int copy_ret; |
58c85dc20 aio: kill struct ... |
984 |
mutex_lock(&ctx->ring_lock); |
1da177e4c Linux-2.6.12-rc2 |
985 |
|
fa8a53c39 aio: v4 ensure ac... |
986 |
/* Access to ->ring_pages here is protected by ctx->ring_lock. */ |
58c85dc20 aio: kill struct ... |
987 |
ring = kmap_atomic(ctx->ring_pages[0]); |
a31ad380b aio: make aio_rea... |
988 |
head = ring->head; |
5ffac122d aio: Don't use ct... |
989 |
tail = ring->tail; |
a31ad380b aio: make aio_rea... |
990 |
kunmap_atomic(ring); |
5ffac122d aio: Don't use ct... |
991 992 |
pr_debug("h%u t%u m%u ", head, tail, ctx->nr_events); |
1da177e4c Linux-2.6.12-rc2 |
993 |
|
5ffac122d aio: Don't use ct... |
994 |
if (head == tail) |
1da177e4c Linux-2.6.12-rc2 |
995 |
goto out; |
a31ad380b aio: make aio_rea... |
996 997 998 999 |
while (ret < nr) { long avail; struct io_event *ev; struct page *page; |
5ffac122d aio: Don't use ct... |
1000 1001 |
avail = (head <= tail ? tail : ctx->nr_events) - head; if (head == tail) |
a31ad380b aio: make aio_rea... |
1002 1003 1004 1005 1006 1007 1008 |
break; avail = min(avail, nr - ret); avail = min_t(long, avail, AIO_EVENTS_PER_PAGE - ((head + AIO_EVENTS_OFFSET) % AIO_EVENTS_PER_PAGE)); pos = head + AIO_EVENTS_OFFSET; |
58c85dc20 aio: kill struct ... |
1009 |
page = ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]; |
a31ad380b aio: make aio_rea... |
1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 |
pos %= AIO_EVENTS_PER_PAGE; ev = kmap(page); copy_ret = copy_to_user(event + ret, ev + pos, sizeof(*ev) * avail); kunmap(page); if (unlikely(copy_ret)) { ret = -EFAULT; goto out; } ret += avail; head += avail; |
58c85dc20 aio: kill struct ... |
1024 |
head %= ctx->nr_events; |
1da177e4c Linux-2.6.12-rc2 |
1025 |
} |
1da177e4c Linux-2.6.12-rc2 |
1026 |
|
58c85dc20 aio: kill struct ... |
1027 |
ring = kmap_atomic(ctx->ring_pages[0]); |
a31ad380b aio: make aio_rea... |
1028 |
ring->head = head; |
91d80a84b aio: fix possible... |
1029 |
kunmap_atomic(ring); |
58c85dc20 aio: kill struct ... |
1030 |
flush_dcache_page(ctx->ring_pages[0]); |
a31ad380b aio: make aio_rea... |
1031 |
|
5ffac122d aio: Don't use ct... |
1032 1033 |
pr_debug("%li h%u t%u ", ret, head, tail); |
3e845ce01 aio: change reqs_... |
1034 |
|
e1bdd5f27 aio: percpu reqs_... |
1035 |
put_reqs_available(ctx, ret); |
a31ad380b aio: make aio_rea... |
1036 |
out: |
58c85dc20 aio: kill struct ... |
1037 |
mutex_unlock(&ctx->ring_lock); |
a31ad380b aio: make aio_rea... |
1038 |
|
1da177e4c Linux-2.6.12-rc2 |
1039 1040 |
return ret; } |
a31ad380b aio: make aio_rea... |
1041 1042 |
static bool aio_read_events(struct kioctx *ctx, long min_nr, long nr, struct io_event __user *event, long *i) |
1da177e4c Linux-2.6.12-rc2 |
1043 |
{ |
a31ad380b aio: make aio_rea... |
1044 |
long ret = aio_read_events_ring(ctx, event + *i, nr - *i); |
1da177e4c Linux-2.6.12-rc2 |
1045 |
|
a31ad380b aio: make aio_rea... |
1046 1047 |
if (ret > 0) *i += ret; |
1da177e4c Linux-2.6.12-rc2 |
1048 |
|
a31ad380b aio: make aio_rea... |
1049 1050 |
if (unlikely(atomic_read(&ctx->dead))) ret = -EINVAL; |
1da177e4c Linux-2.6.12-rc2 |
1051 |
|
a31ad380b aio: make aio_rea... |
1052 1053 |
if (!*i) *i = ret; |
1da177e4c Linux-2.6.12-rc2 |
1054 |
|
a31ad380b aio: make aio_rea... |
1055 |
return ret < 0 || *i >= min_nr; |
1da177e4c Linux-2.6.12-rc2 |
1056 |
} |
a31ad380b aio: make aio_rea... |
1057 |
static long read_events(struct kioctx *ctx, long min_nr, long nr, |
1da177e4c Linux-2.6.12-rc2 |
1058 1059 1060 |
struct io_event __user *event, struct timespec __user *timeout) { |
a31ad380b aio: make aio_rea... |
1061 1062 |
ktime_t until = { .tv64 = KTIME_MAX }; long ret = 0; |
1da177e4c Linux-2.6.12-rc2 |
1063 |
|
1da177e4c Linux-2.6.12-rc2 |
1064 1065 |
if (timeout) { struct timespec ts; |
a31ad380b aio: make aio_rea... |
1066 |
|
1da177e4c Linux-2.6.12-rc2 |
1067 |
if (unlikely(copy_from_user(&ts, timeout, sizeof(ts)))) |
a31ad380b aio: make aio_rea... |
1068 |
return -EFAULT; |
1da177e4c Linux-2.6.12-rc2 |
1069 |
|
a31ad380b aio: make aio_rea... |
1070 |
until = timespec_to_ktime(ts); |
1da177e4c Linux-2.6.12-rc2 |
1071 |
} |
a31ad380b aio: make aio_rea... |
1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 |
/* * Note that aio_read_events() is being called as the conditional - i.e. * we're calling it after prepare_to_wait() has set task state to * TASK_INTERRUPTIBLE. * * But aio_read_events() can block, and if it blocks it's going to flip * the task state back to TASK_RUNNING. * * This should be ok, provided it doesn't flip the state back to * TASK_RUNNING and return 0 too much - that causes us to spin. That * will only happen if the mutex_lock() call blocks, and we then find * the ringbuffer empty. So in practice we should be ok, but it's * something to be aware of when touching this code. */ wait_event_interruptible_hrtimeout(ctx->wait, aio_read_events(ctx, min_nr, nr, event, &ret), until); |
1da177e4c Linux-2.6.12-rc2 |
1088 |
|
a31ad380b aio: make aio_rea... |
1089 1090 |
if (!ret && signal_pending(current)) ret = -EINTR; |
1da177e4c Linux-2.6.12-rc2 |
1091 |
|
a31ad380b aio: make aio_rea... |
1092 |
return ret; |
1da177e4c Linux-2.6.12-rc2 |
1093 |
} |
1da177e4c Linux-2.6.12-rc2 |
1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 |
/* sys_io_setup: * Create an aio_context capable of receiving at least nr_events. * ctxp must not point to an aio_context that already exists, and * must be initialized to 0 prior to the call. On successful * creation of the aio_context, *ctxp is filled in with the resulting * handle. May fail with -EINVAL if *ctxp is not initialized, * if the specified nr_events exceeds internal limits. May fail * with -EAGAIN if the specified nr_events exceeds the user's limit * of available events. May fail with -ENOMEM if insufficient kernel * resources are available. May fail with -EFAULT if an invalid * pointer is passed for ctxp. Will fail with -ENOSYS if not * implemented. */ |
002c8976e [CVE-2009-0029] S... |
1107 |
SYSCALL_DEFINE2(io_setup, unsigned, nr_events, aio_context_t __user *, ctxp) |
1da177e4c Linux-2.6.12-rc2 |
1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 |
{ struct kioctx *ioctx = NULL; unsigned long ctx; long ret; ret = get_user(ctx, ctxp); if (unlikely(ret)) goto out; ret = -EINVAL; |
d55b5fdaf [PATCH] aio: remo... |
1118 1119 1120 1121 |
if (unlikely(ctx || nr_events == 0)) { pr_debug("EINVAL: io_setup: ctx %lu nr_events %u ", ctx, nr_events); |
1da177e4c Linux-2.6.12-rc2 |
1122 1123 1124 1125 1126 1127 1128 |
goto out; } ioctx = ioctx_alloc(nr_events); ret = PTR_ERR(ioctx); if (!IS_ERR(ioctx)) { ret = put_user(ioctx->user_id, ctxp); |
a2e1859ad aio: take final p... |
1129 |
if (ret) |
db446a08c aio: convert the ... |
1130 |
kill_ioctx(current->mm, ioctx); |
723be6e39 aio: percpu ioctx... |
1131 |
percpu_ref_put(&ioctx->users); |
1da177e4c Linux-2.6.12-rc2 |
1132 1133 1134 1135 1136 1137 1138 1139 1140 |
} out: return ret; } /* sys_io_destroy: * Destroy the aio_context specified. May cancel any outstanding * AIOs and block on completion. Will fail with -ENOSYS if not |
642b5123a aio: fix wrong su... |
1141 |
* implemented. May fail with -EINVAL if the context pointed to |
1da177e4c Linux-2.6.12-rc2 |
1142 1143 |
* is invalid. */ |
002c8976e [CVE-2009-0029] S... |
1144 |
SYSCALL_DEFINE1(io_destroy, aio_context_t, ctx) |
1da177e4c Linux-2.6.12-rc2 |
1145 1146 1147 |
{ struct kioctx *ioctx = lookup_ioctx(ctx); if (likely(NULL != ioctx)) { |
db446a08c aio: convert the ... |
1148 |
kill_ioctx(current->mm, ioctx); |
723be6e39 aio: percpu ioctx... |
1149 |
percpu_ref_put(&ioctx->users); |
1da177e4c Linux-2.6.12-rc2 |
1150 1151 1152 1153 1154 1155 |
return 0; } pr_debug("EINVAL: io_destroy: invalid context id "); return -EINVAL; } |
41ef4eb8e aio: kill ki_retry |
1156 1157 |
typedef ssize_t (aio_rw_op)(struct kiocb *, const struct iovec *, unsigned long, loff_t); |
8bc92afcf aio: Kill unneede... |
1158 1159 1160 1161 1162 |
static ssize_t aio_setup_vectored_rw(struct kiocb *kiocb, int rw, char __user *buf, unsigned long *nr_segs, struct iovec **iovec, bool compat) |
eed4e51fb [PATCH] Add vecto... |
1163 1164 |
{ ssize_t ret; |
8bc92afcf aio: Kill unneede... |
1165 |
*nr_segs = kiocb->ki_nbytes; |
41ef4eb8e aio: kill ki_retry |
1166 |
|
9d85cba71 aio: fix the comp... |
1167 1168 |
#ifdef CONFIG_COMPAT if (compat) |
41ef4eb8e aio: kill ki_retry |
1169 |
ret = compat_rw_copy_check_uvector(rw, |
8bc92afcf aio: Kill unneede... |
1170 1171 |
(struct compat_iovec __user *)buf, *nr_segs, 1, *iovec, iovec); |
9d85cba71 aio: fix the comp... |
1172 1173 |
else #endif |
41ef4eb8e aio: kill ki_retry |
1174 |
ret = rw_copy_check_uvector(rw, |
8bc92afcf aio: Kill unneede... |
1175 1176 |
(struct iovec __user *)buf, *nr_segs, 1, *iovec, iovec); |
eed4e51fb [PATCH] Add vecto... |
1177 |
if (ret < 0) |
41ef4eb8e aio: kill ki_retry |
1178 |
return ret; |
a70b52ec1 vfs: make AIO use... |
1179 |
|
41ef4eb8e aio: kill ki_retry |
1180 |
/* ki_nbytes now reflect bytes instead of segs */ |
eed4e51fb [PATCH] Add vecto... |
1181 |
kiocb->ki_nbytes = ret; |
41ef4eb8e aio: kill ki_retry |
1182 |
return 0; |
eed4e51fb [PATCH] Add vecto... |
1183 |
} |
8bc92afcf aio: Kill unneede... |
1184 1185 1186 1187 |
static ssize_t aio_setup_single_vector(struct kiocb *kiocb, int rw, char __user *buf, unsigned long *nr_segs, struct iovec *iovec) |
eed4e51fb [PATCH] Add vecto... |
1188 |
{ |
8bc92afcf aio: Kill unneede... |
1189 |
if (unlikely(!access_ok(!rw, buf, kiocb->ki_nbytes))) |
41ef4eb8e aio: kill ki_retry |
1190 |
return -EFAULT; |
a70b52ec1 vfs: make AIO use... |
1191 |
|
8bc92afcf aio: Kill unneede... |
1192 1193 1194 |
iovec->iov_base = buf; iovec->iov_len = kiocb->ki_nbytes; *nr_segs = 1; |
eed4e51fb [PATCH] Add vecto... |
1195 1196 |
return 0; } |
1da177e4c Linux-2.6.12-rc2 |
1197 1198 1199 1200 1201 |
/* * aio_setup_iocb: * Performs the initial checks and aio retry method * setup for the kiocb at the time of io submission. */ |
8bc92afcf aio: Kill unneede... |
1202 1203 |
static ssize_t aio_run_iocb(struct kiocb *req, unsigned opcode, char __user *buf, bool compat) |
1da177e4c Linux-2.6.12-rc2 |
1204 |
{ |
41ef4eb8e aio: kill ki_retry |
1205 1206 |
struct file *file = req->ki_filp; ssize_t ret; |
8bc92afcf aio: Kill unneede... |
1207 |
unsigned long nr_segs; |
41ef4eb8e aio: kill ki_retry |
1208 1209 1210 |
int rw; fmode_t mode; aio_rw_op *rw_op; |
8bc92afcf aio: Kill unneede... |
1211 |
struct iovec inline_vec, *iovec = &inline_vec; |
1da177e4c Linux-2.6.12-rc2 |
1212 |
|
8bc92afcf aio: Kill unneede... |
1213 |
switch (opcode) { |
1da177e4c Linux-2.6.12-rc2 |
1214 |
case IOCB_CMD_PREAD: |
eed4e51fb [PATCH] Add vecto... |
1215 |
case IOCB_CMD_PREADV: |
41ef4eb8e aio: kill ki_retry |
1216 1217 1218 1219 1220 1221 |
mode = FMODE_READ; rw = READ; rw_op = file->f_op->aio_read; goto rw_common; case IOCB_CMD_PWRITE: |
eed4e51fb [PATCH] Add vecto... |
1222 |
case IOCB_CMD_PWRITEV: |
41ef4eb8e aio: kill ki_retry |
1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 |
mode = FMODE_WRITE; rw = WRITE; rw_op = file->f_op->aio_write; goto rw_common; rw_common: if (unlikely(!(file->f_mode & mode))) return -EBADF; if (!rw_op) return -EINVAL; |
8bc92afcf aio: Kill unneede... |
1233 1234 1235 1236 1237 1238 |
ret = (opcode == IOCB_CMD_PREADV || opcode == IOCB_CMD_PWRITEV) ? aio_setup_vectored_rw(req, rw, buf, &nr_segs, &iovec, compat) : aio_setup_single_vector(req, rw, buf, &nr_segs, iovec); |
eed4e51fb [PATCH] Add vecto... |
1239 |
if (ret) |
41ef4eb8e aio: kill ki_retry |
1240 1241 1242 |
return ret; ret = rw_verify_area(rw, file, &req->ki_pos, req->ki_nbytes); |
8bc92afcf aio: Kill unneede... |
1243 1244 1245 |
if (ret < 0) { if (iovec != &inline_vec) kfree(iovec); |
41ef4eb8e aio: kill ki_retry |
1246 |
return ret; |
8bc92afcf aio: Kill unneede... |
1247 |
} |
41ef4eb8e aio: kill ki_retry |
1248 1249 |
req->ki_nbytes = ret; |
41ef4eb8e aio: kill ki_retry |
1250 |
|
73a7075e3 aio: Kill aio_rw_... |
1251 1252 1253 1254 1255 1256 1257 1258 1259 |
/* XXX: move/kill - rw_verify_area()? */ /* This matches the pread()/pwrite() logic */ if (req->ki_pos < 0) { ret = -EINVAL; break; } if (rw == WRITE) file_start_write(file); |
8bc92afcf aio: Kill unneede... |
1260 |
ret = rw_op(req, iovec, nr_segs, req->ki_pos); |
73a7075e3 aio: Kill aio_rw_... |
1261 1262 1263 |
if (rw == WRITE) file_end_write(file); |
1da177e4c Linux-2.6.12-rc2 |
1264 |
break; |
41ef4eb8e aio: kill ki_retry |
1265 |
|
1da177e4c Linux-2.6.12-rc2 |
1266 |
case IOCB_CMD_FDSYNC: |
41ef4eb8e aio: kill ki_retry |
1267 1268 1269 1270 |
if (!file->f_op->aio_fsync) return -EINVAL; ret = file->f_op->aio_fsync(req, 1); |
1da177e4c Linux-2.6.12-rc2 |
1271 |
break; |
41ef4eb8e aio: kill ki_retry |
1272 |
|
1da177e4c Linux-2.6.12-rc2 |
1273 |
case IOCB_CMD_FSYNC: |
41ef4eb8e aio: kill ki_retry |
1274 1275 1276 1277 |
if (!file->f_op->aio_fsync) return -EINVAL; ret = file->f_op->aio_fsync(req, 0); |
1da177e4c Linux-2.6.12-rc2 |
1278 |
break; |
41ef4eb8e aio: kill ki_retry |
1279 |
|
1da177e4c Linux-2.6.12-rc2 |
1280 |
default: |
caf4167aa aio: dprintk() ->... |
1281 1282 |
pr_debug("EINVAL: no operation provided "); |
41ef4eb8e aio: kill ki_retry |
1283 |
return -EINVAL; |
1da177e4c Linux-2.6.12-rc2 |
1284 |
} |
8bc92afcf aio: Kill unneede... |
1285 1286 |
if (iovec != &inline_vec) kfree(iovec); |
41ef4eb8e aio: kill ki_retry |
1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 |
if (ret != -EIOCBQUEUED) { /* * There's no easy way to restart the syscall since other AIO's * may be already running. Just fail this IO with EINTR. */ if (unlikely(ret == -ERESTARTSYS || ret == -ERESTARTNOINTR || ret == -ERESTARTNOHAND || ret == -ERESTART_RESTARTBLOCK)) ret = -EINTR; aio_complete(req, ret, 0); } |
1da177e4c Linux-2.6.12-rc2 |
1298 1299 1300 |
return 0; } |
d5470b596 fs/aio.c: make 3 ... |
1301 |
static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, |
a1c8eae75 aio: kill batch a... |
1302 |
struct iocb *iocb, bool compat) |
1da177e4c Linux-2.6.12-rc2 |
1303 1304 |
{ struct kiocb *req; |
1da177e4c Linux-2.6.12-rc2 |
1305 1306 1307 |
ssize_t ret; /* enforce forwards compatibility on users */ |
9c3060bed signal/timer/even... |
1308 |
if (unlikely(iocb->aio_reserved1 || iocb->aio_reserved2)) { |
caf4167aa aio: dprintk() ->... |
1309 1310 |
pr_debug("EINVAL: reserve field set "); |
1da177e4c Linux-2.6.12-rc2 |
1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 |
return -EINVAL; } /* prevent overflows */ if (unlikely( (iocb->aio_buf != (unsigned long)iocb->aio_buf) || (iocb->aio_nbytes != (size_t)iocb->aio_nbytes) || ((ssize_t)iocb->aio_nbytes < 0) )) { pr_debug("EINVAL: io_submit: overflow check "); return -EINVAL; } |
41ef4eb8e aio: kill ki_retry |
1324 |
req = aio_get_req(ctx); |
1d98ebfcc aio: do fget() af... |
1325 |
if (unlikely(!req)) |
1da177e4c Linux-2.6.12-rc2 |
1326 |
return -EAGAIN; |
1d98ebfcc aio: do fget() af... |
1327 1328 1329 1330 1331 |
req->ki_filp = fget(iocb->aio_fildes); if (unlikely(!req->ki_filp)) { ret = -EBADF; goto out_put_req; |
1da177e4c Linux-2.6.12-rc2 |
1332 |
} |
1d98ebfcc aio: do fget() af... |
1333 |
|
9c3060bed signal/timer/even... |
1334 1335 1336 1337 1338 1339 1340 |
if (iocb->aio_flags & IOCB_FLAG_RESFD) { /* * If the IOCB_FLAG_RESFD flag of aio_flags is set, get an * instance of the file* now. The file descriptor must be * an eventfd() fd, and will be signaled for each completed * event using the eventfd_signal() function. */ |
133890103 eventfd: revised ... |
1341 |
req->ki_eventfd = eventfd_ctx_fdget((int) iocb->aio_resfd); |
801678c5a Remove duplicated... |
1342 |
if (IS_ERR(req->ki_eventfd)) { |
9c3060bed signal/timer/even... |
1343 |
ret = PTR_ERR(req->ki_eventfd); |
87c3a86e1 eventfd: remove f... |
1344 |
req->ki_eventfd = NULL; |
9c3060bed signal/timer/even... |
1345 1346 1347 |
goto out_put_req; } } |
1da177e4c Linux-2.6.12-rc2 |
1348 |
|
8a6608907 aio: kill ki_key |
1349 |
ret = put_user(KIOCB_KEY, &user_iocb->aio_key); |
1da177e4c Linux-2.6.12-rc2 |
1350 |
if (unlikely(ret)) { |
caf4167aa aio: dprintk() ->... |
1351 1352 |
pr_debug("EFAULT: aio_key "); |
1da177e4c Linux-2.6.12-rc2 |
1353 1354 1355 1356 1357 1358 |
goto out_put_req; } req->ki_obj.user = user_iocb; req->ki_user_data = iocb->aio_data; req->ki_pos = iocb->aio_offset; |
73a7075e3 aio: Kill aio_rw_... |
1359 |
req->ki_nbytes = iocb->aio_nbytes; |
1da177e4c Linux-2.6.12-rc2 |
1360 |
|
8bc92afcf aio: Kill unneede... |
1361 1362 1363 |
ret = aio_run_iocb(req, iocb->aio_lio_opcode, (char __user *)(unsigned long)iocb->aio_buf, compat); |
1da177e4c Linux-2.6.12-rc2 |
1364 1365 |
if (ret) goto out_put_req; |
1da177e4c Linux-2.6.12-rc2 |
1366 |
return 0; |
1da177e4c Linux-2.6.12-rc2 |
1367 |
out_put_req: |
e1bdd5f27 aio: percpu reqs_... |
1368 |
put_reqs_available(ctx, 1); |
e34ecee2a aio: Fix a trinit... |
1369 |
percpu_ref_put(&ctx->reqs); |
57282d8fd aio: Kill ki_users |
1370 |
kiocb_free(req); |
1da177e4c Linux-2.6.12-rc2 |
1371 1372 |
return ret; } |
9d85cba71 aio: fix the comp... |
1373 1374 |
long do_io_submit(aio_context_t ctx_id, long nr, struct iocb __user *__user *iocbpp, bool compat) |
1da177e4c Linux-2.6.12-rc2 |
1375 1376 1377 |
{ struct kioctx *ctx; long ret = 0; |
080d676de aio: allocate kio... |
1378 |
int i = 0; |
9f5b94254 fs: make aio plug |
1379 |
struct blk_plug plug; |
1da177e4c Linux-2.6.12-rc2 |
1380 1381 1382 |
if (unlikely(nr < 0)) return -EINVAL; |
75e1c70fc aio: check for mu... |
1383 1384 |
if (unlikely(nr > LONG_MAX/sizeof(*iocbpp))) nr = LONG_MAX/sizeof(*iocbpp); |
1da177e4c Linux-2.6.12-rc2 |
1385 1386 1387 1388 1389 |
if (unlikely(!access_ok(VERIFY_READ, iocbpp, (nr*sizeof(*iocbpp))))) return -EFAULT; ctx = lookup_ioctx(ctx_id); if (unlikely(!ctx)) { |
caf4167aa aio: dprintk() ->... |
1390 1391 |
pr_debug("EINVAL: invalid context id "); |
1da177e4c Linux-2.6.12-rc2 |
1392 1393 |
return -EINVAL; } |
9f5b94254 fs: make aio plug |
1394 |
blk_start_plug(&plug); |
1da177e4c Linux-2.6.12-rc2 |
1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 |
/* * AKPM: should this return a partial result if some of the IOs were * successfully submitted? */ for (i=0; i<nr; i++) { struct iocb __user *user_iocb; struct iocb tmp; if (unlikely(__get_user(user_iocb, iocbpp + i))) { ret = -EFAULT; break; } if (unlikely(copy_from_user(&tmp, user_iocb, sizeof(tmp)))) { ret = -EFAULT; break; } |
a1c8eae75 aio: kill batch a... |
1412 |
ret = io_submit_one(ctx, user_iocb, &tmp, compat); |
1da177e4c Linux-2.6.12-rc2 |
1413 1414 1415 |
if (ret) break; } |
9f5b94254 fs: make aio plug |
1416 |
blk_finish_plug(&plug); |
1da177e4c Linux-2.6.12-rc2 |
1417 |
|
723be6e39 aio: percpu ioctx... |
1418 |
percpu_ref_put(&ctx->users); |
1da177e4c Linux-2.6.12-rc2 |
1419 1420 |
return i ? i : ret; } |
9d85cba71 aio: fix the comp... |
1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 |
/* sys_io_submit: * Queue the nr iocbs pointed to by iocbpp for processing. Returns * the number of iocbs queued. May return -EINVAL if the aio_context * specified by ctx_id is invalid, if nr is < 0, if the iocb at * *iocbpp[0] is not properly initialized, if the operation specified * is invalid for the file descriptor in the iocb. May fail with * -EFAULT if any of the data structures point to invalid data. May * fail with -EBADF if the file descriptor specified in the first * iocb is invalid. May fail with -EAGAIN if insufficient resources * are available to queue any iocbs. Will return 0 if nr is 0. Will * fail with -ENOSYS if not implemented. */ SYSCALL_DEFINE3(io_submit, aio_context_t, ctx_id, long, nr, struct iocb __user * __user *, iocbpp) { return do_io_submit(ctx_id, nr, iocbpp, 0); } |
1da177e4c Linux-2.6.12-rc2 |
1438 1439 |
/* lookup_kiocb * Finds a given iocb for cancellation. |
1da177e4c Linux-2.6.12-rc2 |
1440 |
*/ |
25ee7e383 [PATCH] fs/aio.c:... |
1441 1442 |
static struct kiocb *lookup_kiocb(struct kioctx *ctx, struct iocb __user *iocb, u32 key) |
1da177e4c Linux-2.6.12-rc2 |
1443 1444 |
{ struct list_head *pos; |
d00689af6 [PATCH] aio: repl... |
1445 1446 |
assert_spin_locked(&ctx->ctx_lock); |
8a6608907 aio: kill ki_key |
1447 1448 |
if (key != KIOCB_KEY) return NULL; |
1da177e4c Linux-2.6.12-rc2 |
1449 1450 1451 |
/* TODO: use a hash or array, this sucks. */ list_for_each(pos, &ctx->active_reqs) { struct kiocb *kiocb = list_kiocb(pos); |
8a6608907 aio: kill ki_key |
1452 |
if (kiocb->ki_obj.user == iocb) |
1da177e4c Linux-2.6.12-rc2 |
1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 |
return kiocb; } return NULL; } /* sys_io_cancel: * Attempts to cancel an iocb previously passed to io_submit. If * the operation is successfully cancelled, the resulting event is * copied into the memory pointed to by result without being placed * into the completion queue and 0 is returned. May fail with * -EFAULT if any of the data structures pointed to are invalid. * May fail with -EINVAL if aio_context specified by ctx_id is * invalid. May fail with -EAGAIN if the iocb specified was not * cancelled. Will fail with -ENOSYS if not implemented. */ |
002c8976e [CVE-2009-0029] S... |
1468 1469 |
SYSCALL_DEFINE3(io_cancel, aio_context_t, ctx_id, struct iocb __user *, iocb, struct io_event __user *, result) |
1da177e4c Linux-2.6.12-rc2 |
1470 |
{ |
1da177e4c Linux-2.6.12-rc2 |
1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 |
struct kioctx *ctx; struct kiocb *kiocb; u32 key; int ret; ret = get_user(key, &iocb->aio_key); if (unlikely(ret)) return -EFAULT; ctx = lookup_ioctx(ctx_id); if (unlikely(!ctx)) return -EINVAL; spin_lock_irq(&ctx->ctx_lock); |
906b973cf aio: add kiocb_ca... |
1485 |
|
1da177e4c Linux-2.6.12-rc2 |
1486 |
kiocb = lookup_kiocb(ctx, iocb, key); |
906b973cf aio: add kiocb_ca... |
1487 |
if (kiocb) |
bec68faaf aio: io_cancel() ... |
1488 |
ret = kiocb_cancel(ctx, kiocb); |
906b973cf aio: add kiocb_ca... |
1489 1490 |
else ret = -EINVAL; |
1da177e4c Linux-2.6.12-rc2 |
1491 |
spin_unlock_irq(&ctx->ctx_lock); |
906b973cf aio: add kiocb_ca... |
1492 |
if (!ret) { |
bec68faaf aio: io_cancel() ... |
1493 1494 1495 1496 |
/* * The result argument is no longer used - the io_event is * always delivered via the ring buffer. -EINPROGRESS indicates * cancellation is progress: |
906b973cf aio: add kiocb_ca... |
1497 |
*/ |
bec68faaf aio: io_cancel() ... |
1498 |
ret = -EINPROGRESS; |
906b973cf aio: add kiocb_ca... |
1499 |
} |
1da177e4c Linux-2.6.12-rc2 |
1500 |
|
723be6e39 aio: percpu ioctx... |
1501 |
percpu_ref_put(&ctx->users); |
1da177e4c Linux-2.6.12-rc2 |
1502 1503 1504 1505 1506 1507 |
return ret; } /* io_getevents: * Attempts to read at least min_nr events and up to nr events from |
642b5123a aio: fix wrong su... |
1508 1509 1510 1511 1512 1513 1514 1515 |
* the completion queue for the aio_context specified by ctx_id. If * it succeeds, the number of read events is returned. May fail with * -EINVAL if ctx_id is invalid, if min_nr is out of range, if nr is * out of range, if timeout is out of range. May fail with -EFAULT * if any of the memory specified is invalid. May return 0 or * < min_nr if the timeout specified by timeout has elapsed * before sufficient events are available, where timeout == NULL * specifies an infinite timeout. Note that the timeout pointed to by |
6900807c6 aio: fix io_getev... |
1516 |
* timeout is relative. Will fail with -ENOSYS if not implemented. |
1da177e4c Linux-2.6.12-rc2 |
1517 |
*/ |
002c8976e [CVE-2009-0029] S... |
1518 1519 1520 1521 1522 |
SYSCALL_DEFINE5(io_getevents, aio_context_t, ctx_id, long, min_nr, long, nr, struct io_event __user *, events, struct timespec __user *, timeout) |
1da177e4c Linux-2.6.12-rc2 |
1523 1524 1525 1526 1527 |
{ struct kioctx *ioctx = lookup_ioctx(ctx_id); long ret = -EINVAL; if (likely(ioctx)) { |
2e4102559 aio: remove unnec... |
1528 |
if (likely(min_nr <= nr && min_nr >= 0)) |
1da177e4c Linux-2.6.12-rc2 |
1529 |
ret = read_events(ioctx, min_nr, nr, events, timeout); |
723be6e39 aio: percpu ioctx... |
1530 |
percpu_ref_put(&ioctx->users); |
1da177e4c Linux-2.6.12-rc2 |
1531 |
} |
1da177e4c Linux-2.6.12-rc2 |
1532 1533 |
return ret; } |