Blame view

fs/aio.c 38.5 KB
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1
2
3
4
5
6
7
8
9
10
  /*
   *	An async IO implementation for Linux
   *	Written by Benjamin LaHaise <bcrl@kvack.org>
   *
   *	Implements an efficient asynchronous io interface.
   *
   *	Copyright 2000, 2001, 2002 Red Hat, Inc.  All Rights Reserved.
   *
   *	See ../COPYING for licensing terms.
   */
caf4167aa   Kent Overstreet   aio: dprintk() ->...
11
  #define pr_fmt(fmt) "%s: " fmt, __func__
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
12
13
14
15
16
  #include <linux/kernel.h>
  #include <linux/init.h>
  #include <linux/errno.h>
  #include <linux/time.h>
  #include <linux/aio_abi.h>
630d9c472   Paul Gortmaker   fs: reduce the us...
17
  #include <linux/export.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
18
  #include <linux/syscalls.h>
b9d128f10   Jens Axboe   block: move bdi/a...
19
  #include <linux/backing-dev.h>
027445c37   Badari Pulavarty   [PATCH] Vectorize...
20
  #include <linux/uio.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
21

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
22
23
24
25
26
  #include <linux/sched.h>
  #include <linux/fs.h>
  #include <linux/file.h>
  #include <linux/mm.h>
  #include <linux/mman.h>
3d2d827f5   Michael S. Tsirkin   mm: move use_mm/u...
27
  #include <linux/mmu_context.h>
e1bdd5f27   Kent Overstreet   aio: percpu reqs_...
28
  #include <linux/percpu.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
29
30
31
32
33
34
  #include <linux/slab.h>
  #include <linux/timer.h>
  #include <linux/aio.h>
  #include <linux/highmem.h>
  #include <linux/workqueue.h>
  #include <linux/security.h>
9c3060bed   Davide Libenzi   signal/timer/even...
35
  #include <linux/eventfd.h>
cfb1e33ee   Jeff Moyer   aio: implement re...
36
  #include <linux/blkdev.h>
9d85cba71   Jeff Moyer   aio: fix the comp...
37
  #include <linux/compat.h>
36bc08cc0   Gu Zheng   fs/aio: Add suppo...
38
39
  #include <linux/migrate.h>
  #include <linux/ramfs.h>
723be6e39   Kent Overstreet   aio: percpu ioctx...
40
  #include <linux/percpu-refcount.h>
71ad7490c   Benjamin LaHaise   rework aio migrat...
41
  #include <linux/mount.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
42
43
44
  
  #include <asm/kmap_types.h>
  #include <asm/uaccess.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
45

68d70d03f   Al Viro   constify rw_verif...
46
  #include "internal.h"
4e179bca6   Kent Overstreet   aio: move private...
47
48
49
50
51
52
  #define AIO_RING_MAGIC			0xa10a10a1
  #define AIO_RING_COMPAT_FEATURES	1
  #define AIO_RING_INCOMPAT_FEATURES	0
  struct aio_ring {
  	unsigned	id;	/* kernel internal index number */
  	unsigned	nr;	/* number of io_events */
fa8a53c39   Benjamin LaHaise   aio: v4 ensure ac...
53
54
  	unsigned	head;	/* Written to by userland or under ring_lock
  				 * mutex by aio_read_events_ring(). */
4e179bca6   Kent Overstreet   aio: move private...
55
56
57
58
59
60
61
62
63
64
65
66
  	unsigned	tail;
  
  	unsigned	magic;
  	unsigned	compat_features;
  	unsigned	incompat_features;
  	unsigned	header_length;	/* size of aio_ring */
  
  
  	struct io_event		io_events[0];
  }; /* 128 bytes + ring size */
  
  #define AIO_RING_PAGES	8
4e179bca6   Kent Overstreet   aio: move private...
67

db446a08c   Benjamin LaHaise   aio: convert the ...
68
69
70
71
72
  struct kioctx_table {
  	struct rcu_head	rcu;
  	unsigned	nr;
  	struct kioctx	*table[];
  };
e1bdd5f27   Kent Overstreet   aio: percpu reqs_...
73
74
75
  struct kioctx_cpu {
  	unsigned		reqs_available;
  };
4e179bca6   Kent Overstreet   aio: move private...
76
  struct kioctx {
723be6e39   Kent Overstreet   aio: percpu ioctx...
77
  	struct percpu_ref	users;
36f558890   Kent Overstreet   aio: refcounting ...
78
  	atomic_t		dead;
4e179bca6   Kent Overstreet   aio: move private...
79

e34ecee2a   Kent Overstreet   aio: Fix a trinit...
80
  	struct percpu_ref	reqs;
4e179bca6   Kent Overstreet   aio: move private...
81
  	unsigned long		user_id;
4e179bca6   Kent Overstreet   aio: move private...
82

e1bdd5f27   Kent Overstreet   aio: percpu reqs_...
83
84
85
86
87
88
89
  	struct __percpu kioctx_cpu *cpu;
  
  	/*
  	 * For percpu reqs_available, number of slots we move to/from global
  	 * counter at a time:
  	 */
  	unsigned		req_batch;
3e845ce01   Kent Overstreet   aio: change reqs_...
90
91
92
93
  	/*
  	 * This is what userspace passed to io_setup(), it's not used for
  	 * anything but counting against the global max_reqs quota.
  	 *
58c85dc20   Kent Overstreet   aio: kill struct ...
94
  	 * The real limit is nr_events - 1, which will be larger (see
3e845ce01   Kent Overstreet   aio: change reqs_...
95
96
  	 * aio_setup_ring())
  	 */
4e179bca6   Kent Overstreet   aio: move private...
97
  	unsigned		max_reqs;
58c85dc20   Kent Overstreet   aio: kill struct ...
98
99
  	/* Size of ringbuffer, in units of struct io_event */
  	unsigned		nr_events;
4e179bca6   Kent Overstreet   aio: move private...
100

58c85dc20   Kent Overstreet   aio: kill struct ...
101
102
103
104
105
  	unsigned long		mmap_base;
  	unsigned long		mmap_size;
  
  	struct page		**ring_pages;
  	long			nr_pages;
723be6e39   Kent Overstreet   aio: percpu ioctx...
106
  	struct work_struct	free_work;
4e23bcaeb   Kent Overstreet   aio: give shared ...
107
108
  
  	struct {
34e83fc61   Kent Overstreet   aio: reqs_active ...
109
110
111
112
113
  		/*
  		 * This counts the number of available slots in the ringbuffer,
  		 * so we avoid overflowing it: it's decremented (if positive)
  		 * when allocating a kiocb and incremented when the resulting
  		 * io_event is pulled off the ringbuffer.
e1bdd5f27   Kent Overstreet   aio: percpu reqs_...
114
115
  		 *
  		 * We batch accesses to it with a percpu version.
34e83fc61   Kent Overstreet   aio: reqs_active ...
116
117
  		 */
  		atomic_t	reqs_available;
4e23bcaeb   Kent Overstreet   aio: give shared ...
118
119
120
121
122
123
  	} ____cacheline_aligned_in_smp;
  
  	struct {
  		spinlock_t	ctx_lock;
  		struct list_head active_reqs;	/* used for cancellation */
  	} ____cacheline_aligned_in_smp;
58c85dc20   Kent Overstreet   aio: kill struct ...
124
125
  	struct {
  		struct mutex	ring_lock;
4e23bcaeb   Kent Overstreet   aio: give shared ...
126
127
  		wait_queue_head_t wait;
  	} ____cacheline_aligned_in_smp;
58c85dc20   Kent Overstreet   aio: kill struct ...
128
129
130
131
  
  	struct {
  		unsigned	tail;
  		spinlock_t	completion_lock;
4e23bcaeb   Kent Overstreet   aio: give shared ...
132
  	} ____cacheline_aligned_in_smp;
58c85dc20   Kent Overstreet   aio: kill struct ...
133
134
  
  	struct page		*internal_pages[AIO_RING_PAGES];
36bc08cc0   Gu Zheng   fs/aio: Add suppo...
135
  	struct file		*aio_ring_file;
db446a08c   Benjamin LaHaise   aio: convert the ...
136
137
  
  	unsigned		id;
4e179bca6   Kent Overstreet   aio: move private...
138
  };
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
139
  /*------ sysctl variables----*/
d55b5fdaf   Zach Brown   [PATCH] aio: remo...
140
141
142
  static DEFINE_SPINLOCK(aio_nr_lock);
  unsigned long aio_nr;		/* current system wide number of aio requests */
  unsigned long aio_max_nr = 0x10000; /* system wide maximum number of aio requests */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
143
  /*----end sysctl variables---*/
e18b890bb   Christoph Lameter   [PATCH] slab: rem...
144
145
  static struct kmem_cache	*kiocb_cachep;
  static struct kmem_cache	*kioctx_cachep;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
146

71ad7490c   Benjamin LaHaise   rework aio migrat...
147
148
149
150
151
152
153
154
155
156
157
  static struct vfsmount *aio_mnt;
  
  static const struct file_operations aio_ring_fops;
  static const struct address_space_operations aio_ctx_aops;
  
  static struct file *aio_private_file(struct kioctx *ctx, loff_t nr_pages)
  {
  	struct qstr this = QSTR_INIT("[aio]", 5);
  	struct file *file;
  	struct path path;
  	struct inode *inode = alloc_anon_inode(aio_mnt->mnt_sb);
7f62656be   Dan Carpenter   aio: checking for...
158
159
  	if (IS_ERR(inode))
  		return ERR_CAST(inode);
71ad7490c   Benjamin LaHaise   rework aio migrat...
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
  
  	inode->i_mapping->a_ops = &aio_ctx_aops;
  	inode->i_mapping->private_data = ctx;
  	inode->i_size = PAGE_SIZE * nr_pages;
  
  	path.dentry = d_alloc_pseudo(aio_mnt->mnt_sb, &this);
  	if (!path.dentry) {
  		iput(inode);
  		return ERR_PTR(-ENOMEM);
  	}
  	path.mnt = mntget(aio_mnt);
  
  	d_instantiate(path.dentry, inode);
  	file = alloc_file(&path, FMODE_READ | FMODE_WRITE, &aio_ring_fops);
  	if (IS_ERR(file)) {
  		path_put(&path);
  		return file;
  	}
  
  	file->f_flags = O_RDWR;
  	file->private_data = ctx;
  	return file;
  }
  
  static struct dentry *aio_mount(struct file_system_type *fs_type,
  				int flags, const char *dev_name, void *data)
  {
  	static const struct dentry_operations ops = {
  		.d_dname	= simple_dname,
  	};
  	return mount_pseudo(fs_type, "aio:", NULL, &ops, 0xa10a10a1);
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
192
193
194
195
196
197
  /* aio_setup
   *	Creates the slab caches used by the aio routines, panic on
   *	failure as this is done early during the boot sequence.
   */
  static int __init aio_setup(void)
  {
71ad7490c   Benjamin LaHaise   rework aio migrat...
198
199
200
201
202
203
204
205
  	static struct file_system_type aio_fs = {
  		.name		= "aio",
  		.mount		= aio_mount,
  		.kill_sb	= kill_anon_super,
  	};
  	aio_mnt = kern_mount(&aio_fs);
  	if (IS_ERR(aio_mnt))
  		panic("Failed to create aio fs mount.");
0a31bd5f2   Christoph Lameter   KMEM_CACHE(): sim...
206
207
  	kiocb_cachep = KMEM_CACHE(kiocb, SLAB_HWCACHE_ALIGN|SLAB_PANIC);
  	kioctx_cachep = KMEM_CACHE(kioctx,SLAB_HWCACHE_ALIGN|SLAB_PANIC);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
208

caf4167aa   Kent Overstreet   aio: dprintk() ->...
209
210
  	pr_debug("sizeof(struct page) = %zu
  ", sizeof(struct page));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
211
212
213
  
  	return 0;
  }
385773e04   H Hartley Sweeten   aio.c: move EXPOR...
214
  __initcall(aio_setup);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
215

5e9ae2e5d   Benjamin LaHaise   aio: fix use-afte...
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
  static void put_aio_ring_file(struct kioctx *ctx)
  {
  	struct file *aio_ring_file = ctx->aio_ring_file;
  	if (aio_ring_file) {
  		truncate_setsize(aio_ring_file->f_inode, 0);
  
  		/* Prevent further access to the kioctx from migratepages */
  		spin_lock(&aio_ring_file->f_inode->i_mapping->private_lock);
  		aio_ring_file->f_inode->i_mapping->private_data = NULL;
  		ctx->aio_ring_file = NULL;
  		spin_unlock(&aio_ring_file->f_inode->i_mapping->private_lock);
  
  		fput(aio_ring_file);
  	}
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
231
232
  static void aio_free_ring(struct kioctx *ctx)
  {
36bc08cc0   Gu Zheng   fs/aio: Add suppo...
233
  	int i;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
234

fa8a53c39   Benjamin LaHaise   aio: v4 ensure ac...
235
236
237
238
  	/* Disconnect the kiotx from the ring file.  This prevents future
  	 * accesses to the kioctx from page migration.
  	 */
  	put_aio_ring_file(ctx);
36bc08cc0   Gu Zheng   fs/aio: Add suppo...
239
  	for (i = 0; i < ctx->nr_pages; i++) {
8e321fefb   Benjamin LaHaise   aio/migratepages:...
240
  		struct page *page;
36bc08cc0   Gu Zheng   fs/aio: Add suppo...
241
242
243
  		pr_debug("pid(%d) [%d] page->count=%d
  ", current->pid, i,
  				page_count(ctx->ring_pages[i]));
8e321fefb   Benjamin LaHaise   aio/migratepages:...
244
245
246
247
248
  		page = ctx->ring_pages[i];
  		if (!page)
  			continue;
  		ctx->ring_pages[i] = NULL;
  		put_page(page);
36bc08cc0   Gu Zheng   fs/aio: Add suppo...
249
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
250

ddb8c45ba   Sasha Levin   aio: nullify aio-...
251
  	if (ctx->ring_pages && ctx->ring_pages != ctx->internal_pages) {
58c85dc20   Kent Overstreet   aio: kill struct ...
252
  		kfree(ctx->ring_pages);
ddb8c45ba   Sasha Levin   aio: nullify aio-...
253
254
  		ctx->ring_pages = NULL;
  	}
36bc08cc0   Gu Zheng   fs/aio: Add suppo...
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
  }
  
  static int aio_ring_mmap(struct file *file, struct vm_area_struct *vma)
  {
  	vma->vm_ops = &generic_file_vm_ops;
  	return 0;
  }
  
  static const struct file_operations aio_ring_fops = {
  	.mmap = aio_ring_mmap,
  };
  
  static int aio_set_page_dirty(struct page *page)
  {
  	return 0;
  }
0c45355fc   Benjamin LaHaise   aio: fix build wh...
271
  #if IS_ENABLED(CONFIG_MIGRATION)
36bc08cc0   Gu Zheng   fs/aio: Add suppo...
272
273
274
  static int aio_migratepage(struct address_space *mapping, struct page *new,
  			struct page *old, enum migrate_mode mode)
  {
5e9ae2e5d   Benjamin LaHaise   aio: fix use-afte...
275
  	struct kioctx *ctx;
36bc08cc0   Gu Zheng   fs/aio: Add suppo...
276
  	unsigned long flags;
fa8a53c39   Benjamin LaHaise   aio: v4 ensure ac...
277
  	pgoff_t idx;
36bc08cc0   Gu Zheng   fs/aio: Add suppo...
278
  	int rc;
8e321fefb   Benjamin LaHaise   aio/migratepages:...
279
  	rc = 0;
fa8a53c39   Benjamin LaHaise   aio: v4 ensure ac...
280
  	/* mapping->private_lock here protects against the kioctx teardown.  */
8e321fefb   Benjamin LaHaise   aio/migratepages:...
281
282
  	spin_lock(&mapping->private_lock);
  	ctx = mapping->private_data;
fa8a53c39   Benjamin LaHaise   aio: v4 ensure ac...
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
  	if (!ctx) {
  		rc = -EINVAL;
  		goto out;
  	}
  
  	/* The ring_lock mutex.  The prevents aio_read_events() from writing
  	 * to the ring's head, and prevents page migration from mucking in
  	 * a partially initialized kiotx.
  	 */
  	if (!mutex_trylock(&ctx->ring_lock)) {
  		rc = -EAGAIN;
  		goto out;
  	}
  
  	idx = old->index;
  	if (idx < (pgoff_t)ctx->nr_pages) {
  		/* Make sure the old page hasn't already been changed */
  		if (ctx->ring_pages[idx] != old)
  			rc = -EAGAIN;
8e321fefb   Benjamin LaHaise   aio/migratepages:...
302
303
  	} else
  		rc = -EINVAL;
8e321fefb   Benjamin LaHaise   aio/migratepages:...
304
305
  
  	if (rc != 0)
fa8a53c39   Benjamin LaHaise   aio: v4 ensure ac...
306
  		goto out_unlock;
8e321fefb   Benjamin LaHaise   aio/migratepages:...
307

36bc08cc0   Gu Zheng   fs/aio: Add suppo...
308
309
  	/* Writeback must be complete */
  	BUG_ON(PageWriteback(old));
8e321fefb   Benjamin LaHaise   aio/migratepages:...
310
  	get_page(new);
36bc08cc0   Gu Zheng   fs/aio: Add suppo...
311

8e321fefb   Benjamin LaHaise   aio/migratepages:...
312
  	rc = migrate_page_move_mapping(mapping, new, old, NULL, mode, 1);
36bc08cc0   Gu Zheng   fs/aio: Add suppo...
313
  	if (rc != MIGRATEPAGE_SUCCESS) {
8e321fefb   Benjamin LaHaise   aio/migratepages:...
314
  		put_page(new);
fa8a53c39   Benjamin LaHaise   aio: v4 ensure ac...
315
  		goto out_unlock;
36bc08cc0   Gu Zheng   fs/aio: Add suppo...
316
  	}
fa8a53c39   Benjamin LaHaise   aio: v4 ensure ac...
317
318
319
  	/* Take completion_lock to prevent other writes to the ring buffer
  	 * while the old page is copied to the new.  This prevents new
  	 * events from being lost.
5e9ae2e5d   Benjamin LaHaise   aio: fix use-afte...
320
  	 */
fa8a53c39   Benjamin LaHaise   aio: v4 ensure ac...
321
322
323
324
325
  	spin_lock_irqsave(&ctx->completion_lock, flags);
  	migrate_page_copy(new, old);
  	BUG_ON(ctx->ring_pages[idx] != old);
  	ctx->ring_pages[idx] = new;
  	spin_unlock_irqrestore(&ctx->completion_lock, flags);
36bc08cc0   Gu Zheng   fs/aio: Add suppo...
326

fa8a53c39   Benjamin LaHaise   aio: v4 ensure ac...
327
328
  	/* The old page is no longer accessible. */
  	put_page(old);
8e321fefb   Benjamin LaHaise   aio/migratepages:...
329

fa8a53c39   Benjamin LaHaise   aio: v4 ensure ac...
330
331
332
333
  out_unlock:
  	mutex_unlock(&ctx->ring_lock);
  out:
  	spin_unlock(&mapping->private_lock);
36bc08cc0   Gu Zheng   fs/aio: Add suppo...
334
  	return rc;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
335
  }
0c45355fc   Benjamin LaHaise   aio: fix build wh...
336
  #endif
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
337

36bc08cc0   Gu Zheng   fs/aio: Add suppo...
338
339
  static const struct address_space_operations aio_ctx_aops = {
  	.set_page_dirty = aio_set_page_dirty,
0c45355fc   Benjamin LaHaise   aio: fix build wh...
340
  #if IS_ENABLED(CONFIG_MIGRATION)
36bc08cc0   Gu Zheng   fs/aio: Add suppo...
341
  	.migratepage	= aio_migratepage,
0c45355fc   Benjamin LaHaise   aio: fix build wh...
342
  #endif
36bc08cc0   Gu Zheng   fs/aio: Add suppo...
343
  };
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
344
345
346
  static int aio_setup_ring(struct kioctx *ctx)
  {
  	struct aio_ring *ring;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
347
  	unsigned nr_events = ctx->max_reqs;
41003a7bc   Zach Brown   aio: remove retry...
348
  	struct mm_struct *mm = current->mm;
3dc9acb67   Linus Torvalds   aio: clean up and...
349
  	unsigned long size, unused;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
350
  	int nr_pages;
36bc08cc0   Gu Zheng   fs/aio: Add suppo...
351
352
  	int i;
  	struct file *file;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
353
354
355
356
357
358
  
  	/* Compensate for the ring buffer's head/tail overlap entry */
  	nr_events += 2;	/* 1 is required, 2 for good luck */
  
  	size = sizeof(struct aio_ring);
  	size += sizeof(struct io_event) * nr_events;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
359

36bc08cc0   Gu Zheng   fs/aio: Add suppo...
360
  	nr_pages = PFN_UP(size);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
361
362
  	if (nr_pages < 0)
  		return -EINVAL;
71ad7490c   Benjamin LaHaise   rework aio migrat...
363
  	file = aio_private_file(ctx, nr_pages);
36bc08cc0   Gu Zheng   fs/aio: Add suppo...
364
365
  	if (IS_ERR(file)) {
  		ctx->aio_ring_file = NULL;
fa8a53c39   Benjamin LaHaise   aio: v4 ensure ac...
366
  		return -ENOMEM;
36bc08cc0   Gu Zheng   fs/aio: Add suppo...
367
  	}
3dc9acb67   Linus Torvalds   aio: clean up and...
368
369
370
371
372
373
374
375
376
377
378
379
380
  	ctx->aio_ring_file = file;
  	nr_events = (PAGE_SIZE * nr_pages - sizeof(struct aio_ring))
  			/ sizeof(struct io_event);
  
  	ctx->ring_pages = ctx->internal_pages;
  	if (nr_pages > AIO_RING_PAGES) {
  		ctx->ring_pages = kcalloc(nr_pages, sizeof(struct page *),
  					  GFP_KERNEL);
  		if (!ctx->ring_pages) {
  			put_aio_ring_file(ctx);
  			return -ENOMEM;
  		}
  	}
36bc08cc0   Gu Zheng   fs/aio: Add suppo...
381
382
383
384
385
386
387
388
389
390
391
392
  	for (i = 0; i < nr_pages; i++) {
  		struct page *page;
  		page = find_or_create_page(file->f_inode->i_mapping,
  					   i, GFP_HIGHUSER | __GFP_ZERO);
  		if (!page)
  			break;
  		pr_debug("pid(%d) page[%d]->count=%d
  ",
  			 current->pid, i, page_count(page));
  		SetPageUptodate(page);
  		SetPageDirty(page);
  		unlock_page(page);
3dc9acb67   Linus Torvalds   aio: clean up and...
393
394
  
  		ctx->ring_pages[i] = page;
36bc08cc0   Gu Zheng   fs/aio: Add suppo...
395
  	}
3dc9acb67   Linus Torvalds   aio: clean up and...
396
  	ctx->nr_pages = i;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
397

3dc9acb67   Linus Torvalds   aio: clean up and...
398
399
  	if (unlikely(i != nr_pages)) {
  		aio_free_ring(ctx);
fa8a53c39   Benjamin LaHaise   aio: v4 ensure ac...
400
  		return -ENOMEM;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
401
  	}
58c85dc20   Kent Overstreet   aio: kill struct ...
402
403
404
  	ctx->mmap_size = nr_pages * PAGE_SIZE;
  	pr_debug("attempting mmap of %lu bytes
  ", ctx->mmap_size);
36bc08cc0   Gu Zheng   fs/aio: Add suppo...
405

41003a7bc   Zach Brown   aio: remove retry...
406
  	down_write(&mm->mmap_sem);
36bc08cc0   Gu Zheng   fs/aio: Add suppo...
407
408
  	ctx->mmap_base = do_mmap_pgoff(ctx->aio_ring_file, 0, ctx->mmap_size,
  				       PROT_READ | PROT_WRITE,
3dc9acb67   Linus Torvalds   aio: clean up and...
409
410
  				       MAP_SHARED, 0, &unused);
  	up_write(&mm->mmap_sem);
58c85dc20   Kent Overstreet   aio: kill struct ...
411
  	if (IS_ERR((void *)ctx->mmap_base)) {
58c85dc20   Kent Overstreet   aio: kill struct ...
412
  		ctx->mmap_size = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
413
  		aio_free_ring(ctx);
fa8a53c39   Benjamin LaHaise   aio: v4 ensure ac...
414
  		return -ENOMEM;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
415
  	}
58c85dc20   Kent Overstreet   aio: kill struct ...
416
417
  	pr_debug("mmap address: 0x%08lx
  ", ctx->mmap_base);
d6c355c7d   Benjamin LaHaise   aio: fix race in ...
418

58c85dc20   Kent Overstreet   aio: kill struct ...
419
420
  	ctx->user_id = ctx->mmap_base;
  	ctx->nr_events = nr_events; /* trusted copy */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
421

58c85dc20   Kent Overstreet   aio: kill struct ...
422
  	ring = kmap_atomic(ctx->ring_pages[0]);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
423
  	ring->nr = nr_events;	/* user copy */
db446a08c   Benjamin LaHaise   aio: convert the ...
424
  	ring->id = ~0U;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
425
426
427
428
429
  	ring->head = ring->tail = 0;
  	ring->magic = AIO_RING_MAGIC;
  	ring->compat_features = AIO_RING_COMPAT_FEATURES;
  	ring->incompat_features = AIO_RING_INCOMPAT_FEATURES;
  	ring->header_length = sizeof(struct aio_ring);
e8e3c3d66   Cong Wang   fs: remove the se...
430
  	kunmap_atomic(ring);
58c85dc20   Kent Overstreet   aio: kill struct ...
431
  	flush_dcache_page(ctx->ring_pages[0]);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
432
433
434
  
  	return 0;
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
435
436
437
  #define AIO_EVENTS_PER_PAGE	(PAGE_SIZE / sizeof(struct io_event))
  #define AIO_EVENTS_FIRST_PAGE	((PAGE_SIZE - sizeof(struct aio_ring)) / sizeof(struct io_event))
  #define AIO_EVENTS_OFFSET	(AIO_EVENTS_PER_PAGE - AIO_EVENTS_FIRST_PAGE)
0460fef2a   Kent Overstreet   aio: use cancella...
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
  void kiocb_set_cancel_fn(struct kiocb *req, kiocb_cancel_fn *cancel)
  {
  	struct kioctx *ctx = req->ki_ctx;
  	unsigned long flags;
  
  	spin_lock_irqsave(&ctx->ctx_lock, flags);
  
  	if (!req->ki_list.next)
  		list_add(&req->ki_list, &ctx->active_reqs);
  
  	req->ki_cancel = cancel;
  
  	spin_unlock_irqrestore(&ctx->ctx_lock, flags);
  }
  EXPORT_SYMBOL(kiocb_set_cancel_fn);
bec68faaf   Kent Overstreet   aio: io_cancel() ...
453
  static int kiocb_cancel(struct kioctx *ctx, struct kiocb *kiocb)
906b973cf   Kent Overstreet   aio: add kiocb_ca...
454
  {
0460fef2a   Kent Overstreet   aio: use cancella...
455
  	kiocb_cancel_fn *old, *cancel;
906b973cf   Kent Overstreet   aio: add kiocb_ca...
456

0460fef2a   Kent Overstreet   aio: use cancella...
457
458
459
460
461
462
463
464
  	/*
  	 * Don't want to set kiocb->ki_cancel = KIOCB_CANCELLED unless it
  	 * actually has a cancel function, hence the cmpxchg()
  	 */
  
  	cancel = ACCESS_ONCE(kiocb->ki_cancel);
  	do {
  		if (!cancel || cancel == KIOCB_CANCELLED)
57282d8fd   Kent Overstreet   aio: Kill ki_users
465
  			return -EINVAL;
906b973cf   Kent Overstreet   aio: add kiocb_ca...
466

0460fef2a   Kent Overstreet   aio: use cancella...
467
468
469
  		old = cancel;
  		cancel = cmpxchg(&kiocb->ki_cancel, old, KIOCB_CANCELLED);
  	} while (cancel != old);
906b973cf   Kent Overstreet   aio: add kiocb_ca...
470

57282d8fd   Kent Overstreet   aio: Kill ki_users
471
  	return cancel(kiocb);
906b973cf   Kent Overstreet   aio: add kiocb_ca...
472
  }
e34ecee2a   Kent Overstreet   aio: Fix a trinit...
473
  static void free_ioctx(struct work_struct *work)
36f558890   Kent Overstreet   aio: refcounting ...
474
  {
e34ecee2a   Kent Overstreet   aio: Fix a trinit...
475
  	struct kioctx *ctx = container_of(work, struct kioctx, free_work);
e1bdd5f27   Kent Overstreet   aio: percpu reqs_...
476

e34ecee2a   Kent Overstreet   aio: Fix a trinit...
477
478
  	pr_debug("freeing %p
  ", ctx);
e1bdd5f27   Kent Overstreet   aio: percpu reqs_...
479

e34ecee2a   Kent Overstreet   aio: Fix a trinit...
480
  	aio_free_ring(ctx);
e1bdd5f27   Kent Overstreet   aio: percpu reqs_...
481
  	free_percpu(ctx->cpu);
36f558890   Kent Overstreet   aio: refcounting ...
482
483
  	kmem_cache_free(kioctx_cachep, ctx);
  }
e34ecee2a   Kent Overstreet   aio: Fix a trinit...
484
485
486
487
488
489
490
  static void free_ioctx_reqs(struct percpu_ref *ref)
  {
  	struct kioctx *ctx = container_of(ref, struct kioctx, reqs);
  
  	INIT_WORK(&ctx->free_work, free_ioctx);
  	schedule_work(&ctx->free_work);
  }
36f558890   Kent Overstreet   aio: refcounting ...
491
492
493
494
495
  /*
   * When this function runs, the kioctx has been removed from the "hash table"
   * and ctx->users has dropped to 0, so we know no more kiocbs can be submitted -
   * now it's safe to cancel any that need to be.
   */
e34ecee2a   Kent Overstreet   aio: Fix a trinit...
496
  static void free_ioctx_users(struct percpu_ref *ref)
36f558890   Kent Overstreet   aio: refcounting ...
497
  {
e34ecee2a   Kent Overstreet   aio: Fix a trinit...
498
  	struct kioctx *ctx = container_of(ref, struct kioctx, users);
36f558890   Kent Overstreet   aio: refcounting ...
499
500
501
502
503
504
505
506
507
  	struct kiocb *req;
  
  	spin_lock_irq(&ctx->ctx_lock);
  
  	while (!list_empty(&ctx->active_reqs)) {
  		req = list_first_entry(&ctx->active_reqs,
  				       struct kiocb, ki_list);
  
  		list_del_init(&req->ki_list);
bec68faaf   Kent Overstreet   aio: io_cancel() ...
508
  		kiocb_cancel(ctx, req);
36f558890   Kent Overstreet   aio: refcounting ...
509
510
511
  	}
  
  	spin_unlock_irq(&ctx->ctx_lock);
e34ecee2a   Kent Overstreet   aio: Fix a trinit...
512
513
  	percpu_ref_kill(&ctx->reqs);
  	percpu_ref_put(&ctx->reqs);
36f558890   Kent Overstreet   aio: refcounting ...
514
  }
db446a08c   Benjamin LaHaise   aio: convert the ...
515
516
517
518
519
520
521
  static int ioctx_add_table(struct kioctx *ctx, struct mm_struct *mm)
  {
  	unsigned i, new_nr;
  	struct kioctx_table *table, *old;
  	struct aio_ring *ring;
  
  	spin_lock(&mm->ioctx_lock);
d9b2c8714   Artem Savkov   aio: rcu_read_loc...
522
  	rcu_read_lock();
77d30b14d   Benjamin LaHaise   aio: fix rcu spar...
523
  	table = rcu_dereference(mm->ioctx_table);
db446a08c   Benjamin LaHaise   aio: convert the ...
524
525
526
527
528
529
530
  
  	while (1) {
  		if (table)
  			for (i = 0; i < table->nr; i++)
  				if (!table->table[i]) {
  					ctx->id = i;
  					table->table[i] = ctx;
d9b2c8714   Artem Savkov   aio: rcu_read_loc...
531
  					rcu_read_unlock();
db446a08c   Benjamin LaHaise   aio: convert the ...
532
  					spin_unlock(&mm->ioctx_lock);
fa8a53c39   Benjamin LaHaise   aio: v4 ensure ac...
533
534
535
536
  					/* While kioctx setup is in progress,
  					 * we are protected from page migration
  					 * changes ring_pages by ->ring_lock.
  					 */
db446a08c   Benjamin LaHaise   aio: convert the ...
537
538
539
540
541
542
543
  					ring = kmap_atomic(ctx->ring_pages[0]);
  					ring->id = ctx->id;
  					kunmap_atomic(ring);
  					return 0;
  				}
  
  		new_nr = (table ? table->nr : 1) * 4;
d9b2c8714   Artem Savkov   aio: rcu_read_loc...
544
  		rcu_read_unlock();
db446a08c   Benjamin LaHaise   aio: convert the ...
545
546
547
548
549
550
551
552
553
554
  		spin_unlock(&mm->ioctx_lock);
  
  		table = kzalloc(sizeof(*table) + sizeof(struct kioctx *) *
  				new_nr, GFP_KERNEL);
  		if (!table)
  			return -ENOMEM;
  
  		table->nr = new_nr;
  
  		spin_lock(&mm->ioctx_lock);
d9b2c8714   Artem Savkov   aio: rcu_read_loc...
555
  		rcu_read_lock();
77d30b14d   Benjamin LaHaise   aio: fix rcu spar...
556
  		old = rcu_dereference(mm->ioctx_table);
db446a08c   Benjamin LaHaise   aio: convert the ...
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
  
  		if (!old) {
  			rcu_assign_pointer(mm->ioctx_table, table);
  		} else if (table->nr > old->nr) {
  			memcpy(table->table, old->table,
  			       old->nr * sizeof(struct kioctx *));
  
  			rcu_assign_pointer(mm->ioctx_table, table);
  			kfree_rcu(old, rcu);
  		} else {
  			kfree(table);
  			table = old;
  		}
  	}
  }
e34ecee2a   Kent Overstreet   aio: Fix a trinit...
572
573
574
575
576
577
578
579
580
  static void aio_nr_sub(unsigned nr)
  {
  	spin_lock(&aio_nr_lock);
  	if (WARN_ON(aio_nr - nr > aio_nr))
  		aio_nr = 0;
  	else
  		aio_nr -= nr;
  	spin_unlock(&aio_nr_lock);
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
581
582
583
584
585
  /* ioctx_alloc
   *	Allocates and initializes an ioctx.  Returns an ERR_PTR if it failed.
   */
  static struct kioctx *ioctx_alloc(unsigned nr_events)
  {
41003a7bc   Zach Brown   aio: remove retry...
586
  	struct mm_struct *mm = current->mm;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
587
  	struct kioctx *ctx;
e23754f88   Al Viro   aio: don't bother...
588
  	int err = -ENOMEM;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
589

e1bdd5f27   Kent Overstreet   aio: percpu reqs_...
590
591
592
593
594
595
596
597
598
599
600
  	/*
  	 * We keep track of the number of available ringbuffer slots, to prevent
  	 * overflow (reqs_available), and we also use percpu counters for this.
  	 *
  	 * So since up to half the slots might be on other cpu's percpu counters
  	 * and unavailable, double nr_events so userspace sees what they
  	 * expected: additionally, we move req_batch slots to/from percpu
  	 * counters at a time, so make sure that isn't 0:
  	 */
  	nr_events = max(nr_events, num_possible_cpus() * 4);
  	nr_events *= 2;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
601
602
603
604
605
606
607
  	/* Prevent overflows */
  	if ((nr_events > (0x10000000U / sizeof(struct io_event))) ||
  	    (nr_events > (0x10000000U / sizeof(struct kiocb)))) {
  		pr_debug("ENOMEM: nr_events too high
  ");
  		return ERR_PTR(-EINVAL);
  	}
4cd81c3df   Benjamin LaHaise   aio: double aio_m...
608
  	if (!nr_events || (unsigned long)nr_events > (aio_max_nr * 2UL))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
609
  		return ERR_PTR(-EAGAIN);
c37622296   Robert P. J. Day   [PATCH] Transform...
610
  	ctx = kmem_cache_zalloc(kioctx_cachep, GFP_KERNEL);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
611
612
  	if (!ctx)
  		return ERR_PTR(-ENOMEM);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
613
  	ctx->max_reqs = nr_events;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
614

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
615
  	spin_lock_init(&ctx->ctx_lock);
0460fef2a   Kent Overstreet   aio: use cancella...
616
  	spin_lock_init(&ctx->completion_lock);
58c85dc20   Kent Overstreet   aio: kill struct ...
617
  	mutex_init(&ctx->ring_lock);
fa8a53c39   Benjamin LaHaise   aio: v4 ensure ac...
618
619
620
  	/* Protect against page migration throughout kiotx setup by keeping
  	 * the ring_lock mutex held until setup is complete. */
  	mutex_lock(&ctx->ring_lock);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
621
622
623
  	init_waitqueue_head(&ctx->wait);
  
  	INIT_LIST_HEAD(&ctx->active_reqs);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
624

fa8a53c39   Benjamin LaHaise   aio: v4 ensure ac...
625
626
627
628
629
  	if (percpu_ref_init(&ctx->users, free_ioctx_users))
  		goto err;
  
  	if (percpu_ref_init(&ctx->reqs, free_ioctx_reqs))
  		goto err;
e1bdd5f27   Kent Overstreet   aio: percpu reqs_...
630
631
  	ctx->cpu = alloc_percpu(struct kioctx_cpu);
  	if (!ctx->cpu)
e34ecee2a   Kent Overstreet   aio: Fix a trinit...
632
  		goto err;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
633

fa8a53c39   Benjamin LaHaise   aio: v4 ensure ac...
634
635
  	err = aio_setup_ring(ctx);
  	if (err < 0)
e34ecee2a   Kent Overstreet   aio: Fix a trinit...
636
  		goto err;
e1bdd5f27   Kent Overstreet   aio: percpu reqs_...
637

34e83fc61   Kent Overstreet   aio: reqs_active ...
638
  	atomic_set(&ctx->reqs_available, ctx->nr_events - 1);
e1bdd5f27   Kent Overstreet   aio: percpu reqs_...
639
  	ctx->req_batch = (ctx->nr_events - 1) / (num_possible_cpus() * 4);
6878ea72a   Benjamin LaHaise   aio: be defensive...
640
641
  	if (ctx->req_batch < 1)
  		ctx->req_batch = 1;
34e83fc61   Kent Overstreet   aio: reqs_active ...
642

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
643
  	/* limit the number of system wide aios */
9fa1cb397   Al Viro   aio: aio_nr_lock ...
644
  	spin_lock(&aio_nr_lock);
4cd81c3df   Benjamin LaHaise   aio: double aio_m...
645
  	if (aio_nr + nr_events > (aio_max_nr * 2UL) ||
2dd542b7a   Al Viro   aio: aio_nr decre...
646
  	    aio_nr + nr_events < aio_nr) {
9fa1cb397   Al Viro   aio: aio_nr_lock ...
647
  		spin_unlock(&aio_nr_lock);
e34ecee2a   Kent Overstreet   aio: Fix a trinit...
648
  		err = -EAGAIN;
d1b943271   Gu Zheng   aio: clean up aio...
649
  		goto err_ctx;
2dd542b7a   Al Viro   aio: aio_nr decre...
650
651
  	}
  	aio_nr += ctx->max_reqs;
9fa1cb397   Al Viro   aio: aio_nr_lock ...
652
  	spin_unlock(&aio_nr_lock);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
653

1881686f8   Benjamin LaHaise   aio: fix kioctx l...
654
655
  	percpu_ref_get(&ctx->users);	/* io_setup() will drop this ref */
  	percpu_ref_get(&ctx->reqs);	/* free_ioctx_users() will drop this */
723be6e39   Kent Overstreet   aio: percpu ioctx...
656

da90382c2   Benjamin LaHaise   aio: fix error ha...
657
658
  	err = ioctx_add_table(ctx, mm);
  	if (err)
e34ecee2a   Kent Overstreet   aio: Fix a trinit...
659
  		goto err_cleanup;
da90382c2   Benjamin LaHaise   aio: fix error ha...
660

fa8a53c39   Benjamin LaHaise   aio: v4 ensure ac...
661
662
  	/* Release the ring_lock mutex now that all setup is complete. */
  	mutex_unlock(&ctx->ring_lock);
caf4167aa   Kent Overstreet   aio: dprintk() ->...
663
664
  	pr_debug("allocated ioctx %p[%ld]: mm=%p mask=0x%x
  ",
58c85dc20   Kent Overstreet   aio: kill struct ...
665
  		 ctx, ctx->user_id, mm, ctx->nr_events);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
666
  	return ctx;
e34ecee2a   Kent Overstreet   aio: Fix a trinit...
667
668
  err_cleanup:
  	aio_nr_sub(ctx->max_reqs);
d1b943271   Gu Zheng   aio: clean up aio...
669
670
  err_ctx:
  	aio_free_ring(ctx);
e34ecee2a   Kent Overstreet   aio: Fix a trinit...
671
  err:
fa8a53c39   Benjamin LaHaise   aio: v4 ensure ac...
672
  	mutex_unlock(&ctx->ring_lock);
e1bdd5f27   Kent Overstreet   aio: percpu reqs_...
673
  	free_percpu(ctx->cpu);
e34ecee2a   Kent Overstreet   aio: Fix a trinit...
674
  	free_percpu(ctx->reqs.pcpu_count);
723be6e39   Kent Overstreet   aio: percpu ioctx...
675
  	free_percpu(ctx->users.pcpu_count);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
676
  	kmem_cache_free(kioctx_cachep, ctx);
caf4167aa   Kent Overstreet   aio: dprintk() ->...
677
678
  	pr_debug("error allocating ioctx %d
  ", err);
e23754f88   Al Viro   aio: don't bother...
679
  	return ERR_PTR(err);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
680
  }
36f558890   Kent Overstreet   aio: refcounting ...
681
682
683
684
685
  /* kill_ioctx
   *	Cancels all outstanding aio requests on an aio context.  Used
   *	when the processes owning a context have all exited to encourage
   *	the rapid destruction of the kioctx.
   */
db446a08c   Benjamin LaHaise   aio: convert the ...
686
  static void kill_ioctx(struct mm_struct *mm, struct kioctx *ctx)
36f558890   Kent Overstreet   aio: refcounting ...
687
688
  {
  	if (!atomic_xchg(&ctx->dead, 1)) {
db446a08c   Benjamin LaHaise   aio: convert the ...
689
690
691
  		struct kioctx_table *table;
  
  		spin_lock(&mm->ioctx_lock);
d9b2c8714   Artem Savkov   aio: rcu_read_loc...
692
  		rcu_read_lock();
77d30b14d   Benjamin LaHaise   aio: fix rcu spar...
693
  		table = rcu_dereference(mm->ioctx_table);
db446a08c   Benjamin LaHaise   aio: convert the ...
694
695
696
  
  		WARN_ON(ctx != table->table[ctx->id]);
  		table->table[ctx->id] = NULL;
d9b2c8714   Artem Savkov   aio: rcu_read_loc...
697
  		rcu_read_unlock();
db446a08c   Benjamin LaHaise   aio: convert the ...
698
  		spin_unlock(&mm->ioctx_lock);
723be6e39   Kent Overstreet   aio: percpu ioctx...
699
700
  		/* percpu_ref_kill() will do the necessary call_rcu() */
  		wake_up_all(&ctx->wait);
dee11c236   Ken Chen   [PATCH] aio: fix ...
701

36f558890   Kent Overstreet   aio: refcounting ...
702
  		/*
4fcc712f5   Kent Overstreet   aio: fix io_destr...
703
704
705
706
707
  		 * It'd be more correct to do this in free_ioctx(), after all
  		 * the outstanding kiocbs have finished - but by then io_destroy
  		 * has already returned, so io_setup() could potentially return
  		 * -EAGAIN with no ioctxs actually in use (as far as userspace
  		 *  could tell).
36f558890   Kent Overstreet   aio: refcounting ...
708
  		 */
e34ecee2a   Kent Overstreet   aio: Fix a trinit...
709
  		aio_nr_sub(ctx->max_reqs);
4fcc712f5   Kent Overstreet   aio: fix io_destr...
710
711
712
  
  		if (ctx->mmap_size)
  			vm_munmap(ctx->mmap_base, ctx->mmap_size);
723be6e39   Kent Overstreet   aio: percpu ioctx...
713
  		percpu_ref_kill(&ctx->users);
36f558890   Kent Overstreet   aio: refcounting ...
714
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
715
716
717
718
719
  }
  
  /* wait_on_sync_kiocb:
   *	Waits on the given sync kiocb to complete.
   */
57282d8fd   Kent Overstreet   aio: Kill ki_users
720
  ssize_t wait_on_sync_kiocb(struct kiocb *req)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
721
  {
57282d8fd   Kent Overstreet   aio: Kill ki_users
722
  	while (!req->ki_ctx) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
723
  		set_current_state(TASK_UNINTERRUPTIBLE);
57282d8fd   Kent Overstreet   aio: Kill ki_users
724
  		if (req->ki_ctx)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
725
  			break;
41d10da37   Jeff Moyer   aio: account I/O ...
726
  		io_schedule();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
727
728
  	}
  	__set_current_state(TASK_RUNNING);
57282d8fd   Kent Overstreet   aio: Kill ki_users
729
  	return req->ki_user_data;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
730
  }
385773e04   H Hartley Sweeten   aio.c: move EXPOR...
731
  EXPORT_SYMBOL(wait_on_sync_kiocb);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
732

36f558890   Kent Overstreet   aio: refcounting ...
733
734
735
736
737
738
739
  /*
   * exit_aio: called when the last user of mm goes away.  At this point, there is
   * no way for any new requests to be submited or any of the io_* syscalls to be
   * called on the context.
   *
   * There may be outstanding kiocbs, but free_ioctx() will explicitly wait on
   * them.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
740
   */
fc9b52cd8   Harvey Harrison   fs: remove fastca...
741
  void exit_aio(struct mm_struct *mm)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
742
  {
db446a08c   Benjamin LaHaise   aio: convert the ...
743
  	struct kioctx_table *table;
abf137dd7   Jens Axboe   aio: make the loo...
744
  	struct kioctx *ctx;
db446a08c   Benjamin LaHaise   aio: convert the ...
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
  	unsigned i = 0;
  
  	while (1) {
  		rcu_read_lock();
  		table = rcu_dereference(mm->ioctx_table);
  
  		do {
  			if (!table || i >= table->nr) {
  				rcu_read_unlock();
  				rcu_assign_pointer(mm->ioctx_table, NULL);
  				if (table)
  					kfree(table);
  				return;
  			}
  
  			ctx = table->table[i++];
  		} while (!ctx);
  
  		rcu_read_unlock();
abf137dd7   Jens Axboe   aio: make the loo...
764

936af1576   Al Viro   aio: don't bother...
765
766
767
768
769
770
771
  		/*
  		 * We don't need to bother with munmap() here -
  		 * exit_mmap(mm) is coming and it'll unmap everything.
  		 * Since aio_free_ring() uses non-zero ->mmap_size
  		 * as indicator that it needs to unmap the area,
  		 * just set it to 0; aio_free_ring() is the only
  		 * place that uses ->mmap_size, so it's safe.
936af1576   Al Viro   aio: don't bother...
772
  		 */
58c85dc20   Kent Overstreet   aio: kill struct ...
773
  		ctx->mmap_size = 0;
36f558890   Kent Overstreet   aio: refcounting ...
774

db446a08c   Benjamin LaHaise   aio: convert the ...
775
  		kill_ioctx(mm, ctx);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
776
777
  	}
  }
e1bdd5f27   Kent Overstreet   aio: percpu reqs_...
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
  static void put_reqs_available(struct kioctx *ctx, unsigned nr)
  {
  	struct kioctx_cpu *kcpu;
  
  	preempt_disable();
  	kcpu = this_cpu_ptr(ctx->cpu);
  
  	kcpu->reqs_available += nr;
  	while (kcpu->reqs_available >= ctx->req_batch * 2) {
  		kcpu->reqs_available -= ctx->req_batch;
  		atomic_add(ctx->req_batch, &ctx->reqs_available);
  	}
  
  	preempt_enable();
  }
  
  static bool get_reqs_available(struct kioctx *ctx)
  {
  	struct kioctx_cpu *kcpu;
  	bool ret = false;
  
  	preempt_disable();
  	kcpu = this_cpu_ptr(ctx->cpu);
  
  	if (!kcpu->reqs_available) {
  		int old, avail = atomic_read(&ctx->reqs_available);
  
  		do {
  			if (avail < ctx->req_batch)
  				goto out;
  
  			old = avail;
  			avail = atomic_cmpxchg(&ctx->reqs_available,
  					       avail, avail - ctx->req_batch);
  		} while (avail != old);
  
  		kcpu->reqs_available += ctx->req_batch;
  	}
  
  	ret = true;
  	kcpu->reqs_available--;
  out:
  	preempt_enable();
  	return ret;
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
823
  /* aio_get_req
57282d8fd   Kent Overstreet   aio: Kill ki_users
824
825
   *	Allocate a slot for an aio request.
   * Returns NULL if no requests are free.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
826
   */
a1c8eae75   Kent Overstreet   aio: kill batch a...
827
  static inline struct kiocb *aio_get_req(struct kioctx *ctx)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
828
  {
a1c8eae75   Kent Overstreet   aio: kill batch a...
829
  	struct kiocb *req;
e1bdd5f27   Kent Overstreet   aio: percpu reqs_...
830
  	if (!get_reqs_available(ctx))
a1c8eae75   Kent Overstreet   aio: kill batch a...
831
  		return NULL;
0460fef2a   Kent Overstreet   aio: use cancella...
832
  	req = kmem_cache_alloc(kiocb_cachep, GFP_KERNEL|__GFP_ZERO);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
833
  	if (unlikely(!req))
a1c8eae75   Kent Overstreet   aio: kill batch a...
834
  		goto out_put;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
835

e34ecee2a   Kent Overstreet   aio: Fix a trinit...
836
  	percpu_ref_get(&ctx->reqs);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
837
  	req->ki_ctx = ctx;
080d676de   Jeff Moyer   aio: allocate kio...
838
  	return req;
a1c8eae75   Kent Overstreet   aio: kill batch a...
839
  out_put:
e1bdd5f27   Kent Overstreet   aio: percpu reqs_...
840
  	put_reqs_available(ctx, 1);
a1c8eae75   Kent Overstreet   aio: kill batch a...
841
  	return NULL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
842
  }
11599ebac   Kent Overstreet   aio: make aio_put...
843
  static void kiocb_free(struct kiocb *req)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
844
  {
1d98ebfcc   Kent Overstreet   aio: do fget() af...
845
846
  	if (req->ki_filp)
  		fput(req->ki_filp);
133890103   Davide Libenzi   eventfd: revised ...
847
848
  	if (req->ki_eventfd != NULL)
  		eventfd_ctx_put(req->ki_eventfd);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
849
  	kmem_cache_free(kiocb_cachep, req);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
850
  }
d5470b596   Adrian Bunk   fs/aio.c: make 3 ...
851
  static struct kioctx *lookup_ioctx(unsigned long ctx_id)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
852
  {
db446a08c   Benjamin LaHaise   aio: convert the ...
853
  	struct aio_ring __user *ring  = (void __user *)ctx_id;
abf137dd7   Jens Axboe   aio: make the loo...
854
  	struct mm_struct *mm = current->mm;
65c24491b   Jeff Moyer   aio: lookup_ioctx...
855
  	struct kioctx *ctx, *ret = NULL;
db446a08c   Benjamin LaHaise   aio: convert the ...
856
857
858
859
860
  	struct kioctx_table *table;
  	unsigned id;
  
  	if (get_user(id, &ring->id))
  		return NULL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
861

abf137dd7   Jens Axboe   aio: make the loo...
862
  	rcu_read_lock();
db446a08c   Benjamin LaHaise   aio: convert the ...
863
  	table = rcu_dereference(mm->ioctx_table);
abf137dd7   Jens Axboe   aio: make the loo...
864

db446a08c   Benjamin LaHaise   aio: convert the ...
865
866
  	if (!table || id >= table->nr)
  		goto out;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
867

db446a08c   Benjamin LaHaise   aio: convert the ...
868
  	ctx = table->table[id];
f30d704fe   Benjamin LaHaise   aio: table lookup...
869
  	if (ctx && ctx->user_id == ctx_id) {
db446a08c   Benjamin LaHaise   aio: convert the ...
870
871
872
873
  		percpu_ref_get(&ctx->users);
  		ret = ctx;
  	}
  out:
abf137dd7   Jens Axboe   aio: make the loo...
874
  	rcu_read_unlock();
65c24491b   Jeff Moyer   aio: lookup_ioctx...
875
  	return ret;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
876
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
877
878
  /* aio_complete
   *	Called when the io request on the given iocb is complete.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
879
   */
2d68449e8   Kent Overstreet   aio: kill return ...
880
  void aio_complete(struct kiocb *iocb, long res, long res2)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
881
882
  {
  	struct kioctx	*ctx = iocb->ki_ctx;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
883
  	struct aio_ring	*ring;
21b40200c   Kent Overstreet   aio: use flush_dc...
884
  	struct io_event	*ev_page, *event;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
885
  	unsigned long	flags;
21b40200c   Kent Overstreet   aio: use flush_dc...
886
  	unsigned tail, pos;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
887

20dcae324   Zach Brown   [PATCH] aio: remo...
888
889
890
891
892
893
  	/*
  	 * Special case handling for sync iocbs:
  	 *  - events go directly into the iocb for fast handling
  	 *  - the sync task with the iocb in its stack holds the single iocb
  	 *    ref, no other paths have a way to get another ref
  	 *  - the sync task helpfully left a reference to itself in the iocb
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
894
895
  	 */
  	if (is_sync_kiocb(iocb)) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
896
  		iocb->ki_user_data = res;
57282d8fd   Kent Overstreet   aio: Kill ki_users
897
898
  		smp_wmb();
  		iocb->ki_ctx = ERR_PTR(-EXDEV);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
899
  		wake_up_process(iocb->ki_obj.tsk);
2d68449e8   Kent Overstreet   aio: kill return ...
900
  		return;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
901
  	}
0460fef2a   Kent Overstreet   aio: use cancella...
902
903
904
905
906
907
908
  	if (iocb->ki_list.next) {
  		unsigned long flags;
  
  		spin_lock_irqsave(&ctx->ctx_lock, flags);
  		list_del(&iocb->ki_list);
  		spin_unlock_irqrestore(&ctx->ctx_lock, flags);
  	}
11599ebac   Kent Overstreet   aio: make aio_put...
909

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
910
  	/*
0460fef2a   Kent Overstreet   aio: use cancella...
911
  	 * Add a completion event to the ring buffer. Must be done holding
4b30f07e7   Tang Chen   aio: fix wrong co...
912
  	 * ctx->completion_lock to prevent other code from messing with the tail
0460fef2a   Kent Overstreet   aio: use cancella...
913
914
915
  	 * pointer since we might be called from irq context.
  	 */
  	spin_lock_irqsave(&ctx->completion_lock, flags);
58c85dc20   Kent Overstreet   aio: kill struct ...
916
  	tail = ctx->tail;
21b40200c   Kent Overstreet   aio: use flush_dc...
917
  	pos = tail + AIO_EVENTS_OFFSET;
58c85dc20   Kent Overstreet   aio: kill struct ...
918
  	if (++tail >= ctx->nr_events)
4bf69b2a0   Kenneth W Chen   [PATCH] aio: ring...
919
  		tail = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
920

58c85dc20   Kent Overstreet   aio: kill struct ...
921
  	ev_page = kmap_atomic(ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]);
21b40200c   Kent Overstreet   aio: use flush_dc...
922
  	event = ev_page + pos % AIO_EVENTS_PER_PAGE;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
923
924
925
926
  	event->obj = (u64)(unsigned long)iocb->ki_obj.user;
  	event->data = iocb->ki_user_data;
  	event->res = res;
  	event->res2 = res2;
21b40200c   Kent Overstreet   aio: use flush_dc...
927
  	kunmap_atomic(ev_page);
58c85dc20   Kent Overstreet   aio: kill struct ...
928
  	flush_dcache_page(ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]);
21b40200c   Kent Overstreet   aio: use flush_dc...
929
930
931
  
  	pr_debug("%p[%u]: %p: %p %Lx %lx %lx
  ",
caf4167aa   Kent Overstreet   aio: dprintk() ->...
932
933
  		 ctx, tail, iocb, iocb->ki_obj.user, iocb->ki_user_data,
  		 res, res2);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
934
935
936
937
938
  
  	/* after flagging the request as done, we
  	 * must never even look at it again
  	 */
  	smp_wmb();	/* make event visible before updating tail */
58c85dc20   Kent Overstreet   aio: kill struct ...
939
  	ctx->tail = tail;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
940

58c85dc20   Kent Overstreet   aio: kill struct ...
941
  	ring = kmap_atomic(ctx->ring_pages[0]);
21b40200c   Kent Overstreet   aio: use flush_dc...
942
  	ring->tail = tail;
e8e3c3d66   Cong Wang   fs: remove the se...
943
  	kunmap_atomic(ring);
58c85dc20   Kent Overstreet   aio: kill struct ...
944
  	flush_dcache_page(ctx->ring_pages[0]);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
945

0460fef2a   Kent Overstreet   aio: use cancella...
946
  	spin_unlock_irqrestore(&ctx->completion_lock, flags);
21b40200c   Kent Overstreet   aio: use flush_dc...
947
948
  	pr_debug("added to ring %p at [%u]
  ", iocb, tail);
8d1c98b0b   Davide Libenzi   eventfd/kaio inte...
949
950
951
952
953
954
  
  	/*
  	 * Check if the user asked us to deliver the result through an
  	 * eventfd. The eventfd_signal() function is safe to be called
  	 * from IRQ context.
  	 */
87c3a86e1   Davide Libenzi   eventfd: remove f...
955
  	if (iocb->ki_eventfd != NULL)
8d1c98b0b   Davide Libenzi   eventfd/kaio inte...
956
  		eventfd_signal(iocb->ki_eventfd, 1);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
957
  	/* everything turned out well, dispose of the aiocb. */
57282d8fd   Kent Overstreet   aio: Kill ki_users
958
  	kiocb_free(iocb);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
959

6cb2a2104   Quentin Barnes   aio: bad AIO race...
960
961
962
963
964
965
966
  	/*
  	 * We have to order our ring_info tail store above and test
  	 * of the wait list below outside the wait lock.  This is
  	 * like in wake_up_bit() where clearing a bit has to be
  	 * ordered with the unlocked test.
  	 */
  	smp_mb();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
967
968
  	if (waitqueue_active(&ctx->wait))
  		wake_up(&ctx->wait);
e34ecee2a   Kent Overstreet   aio: Fix a trinit...
969
  	percpu_ref_put(&ctx->reqs);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
970
  }
385773e04   H Hartley Sweeten   aio.c: move EXPOR...
971
  EXPORT_SYMBOL(aio_complete);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
972

a31ad380b   Kent Overstreet   aio: make aio_rea...
973
974
975
  /* aio_read_events
   *	Pull an event off of the ioctx's event ring.  Returns the number of
   *	events fetched
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
976
   */
a31ad380b   Kent Overstreet   aio: make aio_rea...
977
978
  static long aio_read_events_ring(struct kioctx *ctx,
  				 struct io_event __user *event, long nr)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
979
  {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
980
  	struct aio_ring *ring;
5ffac122d   Kent Overstreet   aio: Don't use ct...
981
  	unsigned head, tail, pos;
a31ad380b   Kent Overstreet   aio: make aio_rea...
982
983
  	long ret = 0;
  	int copy_ret;
58c85dc20   Kent Overstreet   aio: kill struct ...
984
  	mutex_lock(&ctx->ring_lock);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
985

fa8a53c39   Benjamin LaHaise   aio: v4 ensure ac...
986
  	/* Access to ->ring_pages here is protected by ctx->ring_lock. */
58c85dc20   Kent Overstreet   aio: kill struct ...
987
  	ring = kmap_atomic(ctx->ring_pages[0]);
a31ad380b   Kent Overstreet   aio: make aio_rea...
988
  	head = ring->head;
5ffac122d   Kent Overstreet   aio: Don't use ct...
989
  	tail = ring->tail;
a31ad380b   Kent Overstreet   aio: make aio_rea...
990
  	kunmap_atomic(ring);
5ffac122d   Kent Overstreet   aio: Don't use ct...
991
992
  	pr_debug("h%u t%u m%u
  ", head, tail, ctx->nr_events);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
993

5ffac122d   Kent Overstreet   aio: Don't use ct...
994
  	if (head == tail)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
995
  		goto out;
a31ad380b   Kent Overstreet   aio: make aio_rea...
996
997
998
999
  	while (ret < nr) {
  		long avail;
  		struct io_event *ev;
  		struct page *page;
5ffac122d   Kent Overstreet   aio: Don't use ct...
1000
1001
  		avail = (head <= tail ?  tail : ctx->nr_events) - head;
  		if (head == tail)
a31ad380b   Kent Overstreet   aio: make aio_rea...
1002
1003
1004
1005
1006
1007
1008
  			break;
  
  		avail = min(avail, nr - ret);
  		avail = min_t(long, avail, AIO_EVENTS_PER_PAGE -
  			    ((head + AIO_EVENTS_OFFSET) % AIO_EVENTS_PER_PAGE));
  
  		pos = head + AIO_EVENTS_OFFSET;
58c85dc20   Kent Overstreet   aio: kill struct ...
1009
  		page = ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE];
a31ad380b   Kent Overstreet   aio: make aio_rea...
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
  		pos %= AIO_EVENTS_PER_PAGE;
  
  		ev = kmap(page);
  		copy_ret = copy_to_user(event + ret, ev + pos,
  					sizeof(*ev) * avail);
  		kunmap(page);
  
  		if (unlikely(copy_ret)) {
  			ret = -EFAULT;
  			goto out;
  		}
  
  		ret += avail;
  		head += avail;
58c85dc20   Kent Overstreet   aio: kill struct ...
1024
  		head %= ctx->nr_events;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1025
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1026

58c85dc20   Kent Overstreet   aio: kill struct ...
1027
  	ring = kmap_atomic(ctx->ring_pages[0]);
a31ad380b   Kent Overstreet   aio: make aio_rea...
1028
  	ring->head = head;
91d80a84b   Zhao Hongjiang   aio: fix possible...
1029
  	kunmap_atomic(ring);
58c85dc20   Kent Overstreet   aio: kill struct ...
1030
  	flush_dcache_page(ctx->ring_pages[0]);
a31ad380b   Kent Overstreet   aio: make aio_rea...
1031

5ffac122d   Kent Overstreet   aio: Don't use ct...
1032
1033
  	pr_debug("%li  h%u t%u
  ", ret, head, tail);
3e845ce01   Kent Overstreet   aio: change reqs_...
1034

e1bdd5f27   Kent Overstreet   aio: percpu reqs_...
1035
  	put_reqs_available(ctx, ret);
a31ad380b   Kent Overstreet   aio: make aio_rea...
1036
  out:
58c85dc20   Kent Overstreet   aio: kill struct ...
1037
  	mutex_unlock(&ctx->ring_lock);
a31ad380b   Kent Overstreet   aio: make aio_rea...
1038

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1039
1040
  	return ret;
  }
a31ad380b   Kent Overstreet   aio: make aio_rea...
1041
1042
  static bool aio_read_events(struct kioctx *ctx, long min_nr, long nr,
  			    struct io_event __user *event, long *i)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1043
  {
a31ad380b   Kent Overstreet   aio: make aio_rea...
1044
  	long ret = aio_read_events_ring(ctx, event + *i, nr - *i);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1045

a31ad380b   Kent Overstreet   aio: make aio_rea...
1046
1047
  	if (ret > 0)
  		*i += ret;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1048

a31ad380b   Kent Overstreet   aio: make aio_rea...
1049
1050
  	if (unlikely(atomic_read(&ctx->dead)))
  		ret = -EINVAL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1051

a31ad380b   Kent Overstreet   aio: make aio_rea...
1052
1053
  	if (!*i)
  		*i = ret;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1054

a31ad380b   Kent Overstreet   aio: make aio_rea...
1055
  	return ret < 0 || *i >= min_nr;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1056
  }
a31ad380b   Kent Overstreet   aio: make aio_rea...
1057
  static long read_events(struct kioctx *ctx, long min_nr, long nr,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1058
1059
1060
  			struct io_event __user *event,
  			struct timespec __user *timeout)
  {
a31ad380b   Kent Overstreet   aio: make aio_rea...
1061
1062
  	ktime_t until = { .tv64 = KTIME_MAX };
  	long ret = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1063

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1064
1065
  	if (timeout) {
  		struct timespec	ts;
a31ad380b   Kent Overstreet   aio: make aio_rea...
1066

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1067
  		if (unlikely(copy_from_user(&ts, timeout, sizeof(ts))))
a31ad380b   Kent Overstreet   aio: make aio_rea...
1068
  			return -EFAULT;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1069

a31ad380b   Kent Overstreet   aio: make aio_rea...
1070
  		until = timespec_to_ktime(ts);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1071
  	}
a31ad380b   Kent Overstreet   aio: make aio_rea...
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
  	/*
  	 * Note that aio_read_events() is being called as the conditional - i.e.
  	 * we're calling it after prepare_to_wait() has set task state to
  	 * TASK_INTERRUPTIBLE.
  	 *
  	 * But aio_read_events() can block, and if it blocks it's going to flip
  	 * the task state back to TASK_RUNNING.
  	 *
  	 * This should be ok, provided it doesn't flip the state back to
  	 * TASK_RUNNING and return 0 too much - that causes us to spin. That
  	 * will only happen if the mutex_lock() call blocks, and we then find
  	 * the ringbuffer empty. So in practice we should be ok, but it's
  	 * something to be aware of when touching this code.
  	 */
  	wait_event_interruptible_hrtimeout(ctx->wait,
  			aio_read_events(ctx, min_nr, nr, event, &ret), until);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1088

a31ad380b   Kent Overstreet   aio: make aio_rea...
1089
1090
  	if (!ret && signal_pending(current))
  		ret = -EINTR;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1091

a31ad380b   Kent Overstreet   aio: make aio_rea...
1092
  	return ret;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1093
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
  /* sys_io_setup:
   *	Create an aio_context capable of receiving at least nr_events.
   *	ctxp must not point to an aio_context that already exists, and
   *	must be initialized to 0 prior to the call.  On successful
   *	creation of the aio_context, *ctxp is filled in with the resulting 
   *	handle.  May fail with -EINVAL if *ctxp is not initialized,
   *	if the specified nr_events exceeds internal limits.  May fail 
   *	with -EAGAIN if the specified nr_events exceeds the user's limit 
   *	of available events.  May fail with -ENOMEM if insufficient kernel
   *	resources are available.  May fail with -EFAULT if an invalid
   *	pointer is passed for ctxp.  Will fail with -ENOSYS if not
   *	implemented.
   */
002c8976e   Heiko Carstens   [CVE-2009-0029] S...
1107
  SYSCALL_DEFINE2(io_setup, unsigned, nr_events, aio_context_t __user *, ctxp)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
  {
  	struct kioctx *ioctx = NULL;
  	unsigned long ctx;
  	long ret;
  
  	ret = get_user(ctx, ctxp);
  	if (unlikely(ret))
  		goto out;
  
  	ret = -EINVAL;
d55b5fdaf   Zach Brown   [PATCH] aio: remo...
1118
1119
1120
1121
  	if (unlikely(ctx || nr_events == 0)) {
  		pr_debug("EINVAL: io_setup: ctx %lu nr_events %u
  ",
  		         ctx, nr_events);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1122
1123
1124
1125
1126
1127
1128
  		goto out;
  	}
  
  	ioctx = ioctx_alloc(nr_events);
  	ret = PTR_ERR(ioctx);
  	if (!IS_ERR(ioctx)) {
  		ret = put_user(ioctx->user_id, ctxp);
a2e1859ad   Al Viro   aio: take final p...
1129
  		if (ret)
db446a08c   Benjamin LaHaise   aio: convert the ...
1130
  			kill_ioctx(current->mm, ioctx);
723be6e39   Kent Overstreet   aio: percpu ioctx...
1131
  		percpu_ref_put(&ioctx->users);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1132
1133
1134
1135
1136
1137
1138
1139
1140
  	}
  
  out:
  	return ret;
  }
  
  /* sys_io_destroy:
   *	Destroy the aio_context specified.  May cancel any outstanding 
   *	AIOs and block on completion.  Will fail with -ENOSYS if not
642b5123a   Satoru Takeuchi   aio: fix wrong su...
1141
   *	implemented.  May fail with -EINVAL if the context pointed to
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1142
1143
   *	is invalid.
   */
002c8976e   Heiko Carstens   [CVE-2009-0029] S...
1144
  SYSCALL_DEFINE1(io_destroy, aio_context_t, ctx)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1145
1146
1147
  {
  	struct kioctx *ioctx = lookup_ioctx(ctx);
  	if (likely(NULL != ioctx)) {
db446a08c   Benjamin LaHaise   aio: convert the ...
1148
  		kill_ioctx(current->mm, ioctx);
723be6e39   Kent Overstreet   aio: percpu ioctx...
1149
  		percpu_ref_put(&ioctx->users);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1150
1151
1152
1153
1154
1155
  		return 0;
  	}
  	pr_debug("EINVAL: io_destroy: invalid context id
  ");
  	return -EINVAL;
  }
41ef4eb8e   Kent Overstreet   aio: kill ki_retry
1156
1157
  typedef ssize_t (aio_rw_op)(struct kiocb *, const struct iovec *,
  			    unsigned long, loff_t);
8bc92afcf   Kent Overstreet   aio: Kill unneede...
1158
1159
1160
1161
1162
  static ssize_t aio_setup_vectored_rw(struct kiocb *kiocb,
  				     int rw, char __user *buf,
  				     unsigned long *nr_segs,
  				     struct iovec **iovec,
  				     bool compat)
eed4e51fb   Badari Pulavarty   [PATCH] Add vecto...
1163
1164
  {
  	ssize_t ret;
8bc92afcf   Kent Overstreet   aio: Kill unneede...
1165
  	*nr_segs = kiocb->ki_nbytes;
41ef4eb8e   Kent Overstreet   aio: kill ki_retry
1166

9d85cba71   Jeff Moyer   aio: fix the comp...
1167
1168
  #ifdef CONFIG_COMPAT
  	if (compat)
41ef4eb8e   Kent Overstreet   aio: kill ki_retry
1169
  		ret = compat_rw_copy_check_uvector(rw,
8bc92afcf   Kent Overstreet   aio: Kill unneede...
1170
1171
  				(struct compat_iovec __user *)buf,
  				*nr_segs, 1, *iovec, iovec);
9d85cba71   Jeff Moyer   aio: fix the comp...
1172
1173
  	else
  #endif
41ef4eb8e   Kent Overstreet   aio: kill ki_retry
1174
  		ret = rw_copy_check_uvector(rw,
8bc92afcf   Kent Overstreet   aio: Kill unneede...
1175
1176
  				(struct iovec __user *)buf,
  				*nr_segs, 1, *iovec, iovec);
eed4e51fb   Badari Pulavarty   [PATCH] Add vecto...
1177
  	if (ret < 0)
41ef4eb8e   Kent Overstreet   aio: kill ki_retry
1178
  		return ret;
a70b52ec1   Linus Torvalds   vfs: make AIO use...
1179

41ef4eb8e   Kent Overstreet   aio: kill ki_retry
1180
  	/* ki_nbytes now reflect bytes instead of segs */
eed4e51fb   Badari Pulavarty   [PATCH] Add vecto...
1181
  	kiocb->ki_nbytes = ret;
41ef4eb8e   Kent Overstreet   aio: kill ki_retry
1182
  	return 0;
eed4e51fb   Badari Pulavarty   [PATCH] Add vecto...
1183
  }
8bc92afcf   Kent Overstreet   aio: Kill unneede...
1184
1185
1186
1187
  static ssize_t aio_setup_single_vector(struct kiocb *kiocb,
  				       int rw, char __user *buf,
  				       unsigned long *nr_segs,
  				       struct iovec *iovec)
eed4e51fb   Badari Pulavarty   [PATCH] Add vecto...
1188
  {
8bc92afcf   Kent Overstreet   aio: Kill unneede...
1189
  	if (unlikely(!access_ok(!rw, buf, kiocb->ki_nbytes)))
41ef4eb8e   Kent Overstreet   aio: kill ki_retry
1190
  		return -EFAULT;
a70b52ec1   Linus Torvalds   vfs: make AIO use...
1191

8bc92afcf   Kent Overstreet   aio: Kill unneede...
1192
1193
1194
  	iovec->iov_base = buf;
  	iovec->iov_len = kiocb->ki_nbytes;
  	*nr_segs = 1;
eed4e51fb   Badari Pulavarty   [PATCH] Add vecto...
1195
1196
  	return 0;
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1197
1198
1199
1200
1201
  /*
   * aio_setup_iocb:
   *	Performs the initial checks and aio retry method
   *	setup for the kiocb at the time of io submission.
   */
8bc92afcf   Kent Overstreet   aio: Kill unneede...
1202
1203
  static ssize_t aio_run_iocb(struct kiocb *req, unsigned opcode,
  			    char __user *buf, bool compat)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1204
  {
41ef4eb8e   Kent Overstreet   aio: kill ki_retry
1205
1206
  	struct file *file = req->ki_filp;
  	ssize_t ret;
8bc92afcf   Kent Overstreet   aio: Kill unneede...
1207
  	unsigned long nr_segs;
41ef4eb8e   Kent Overstreet   aio: kill ki_retry
1208
1209
1210
  	int rw;
  	fmode_t mode;
  	aio_rw_op *rw_op;
8bc92afcf   Kent Overstreet   aio: Kill unneede...
1211
  	struct iovec inline_vec, *iovec = &inline_vec;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1212

8bc92afcf   Kent Overstreet   aio: Kill unneede...
1213
  	switch (opcode) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1214
  	case IOCB_CMD_PREAD:
eed4e51fb   Badari Pulavarty   [PATCH] Add vecto...
1215
  	case IOCB_CMD_PREADV:
41ef4eb8e   Kent Overstreet   aio: kill ki_retry
1216
1217
1218
1219
1220
1221
  		mode	= FMODE_READ;
  		rw	= READ;
  		rw_op	= file->f_op->aio_read;
  		goto rw_common;
  
  	case IOCB_CMD_PWRITE:
eed4e51fb   Badari Pulavarty   [PATCH] Add vecto...
1222
  	case IOCB_CMD_PWRITEV:
41ef4eb8e   Kent Overstreet   aio: kill ki_retry
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
  		mode	= FMODE_WRITE;
  		rw	= WRITE;
  		rw_op	= file->f_op->aio_write;
  		goto rw_common;
  rw_common:
  		if (unlikely(!(file->f_mode & mode)))
  			return -EBADF;
  
  		if (!rw_op)
  			return -EINVAL;
8bc92afcf   Kent Overstreet   aio: Kill unneede...
1233
1234
1235
1236
1237
1238
  		ret = (opcode == IOCB_CMD_PREADV ||
  		       opcode == IOCB_CMD_PWRITEV)
  			? aio_setup_vectored_rw(req, rw, buf, &nr_segs,
  						&iovec, compat)
  			: aio_setup_single_vector(req, rw, buf, &nr_segs,
  						  iovec);
eed4e51fb   Badari Pulavarty   [PATCH] Add vecto...
1239
  		if (ret)
41ef4eb8e   Kent Overstreet   aio: kill ki_retry
1240
1241
1242
  			return ret;
  
  		ret = rw_verify_area(rw, file, &req->ki_pos, req->ki_nbytes);
8bc92afcf   Kent Overstreet   aio: Kill unneede...
1243
1244
1245
  		if (ret < 0) {
  			if (iovec != &inline_vec)
  				kfree(iovec);
41ef4eb8e   Kent Overstreet   aio: kill ki_retry
1246
  			return ret;
8bc92afcf   Kent Overstreet   aio: Kill unneede...
1247
  		}
41ef4eb8e   Kent Overstreet   aio: kill ki_retry
1248
1249
  
  		req->ki_nbytes = ret;
41ef4eb8e   Kent Overstreet   aio: kill ki_retry
1250

73a7075e3   Kent Overstreet   aio: Kill aio_rw_...
1251
1252
1253
1254
1255
1256
1257
1258
1259
  		/* XXX: move/kill - rw_verify_area()? */
  		/* This matches the pread()/pwrite() logic */
  		if (req->ki_pos < 0) {
  			ret = -EINVAL;
  			break;
  		}
  
  		if (rw == WRITE)
  			file_start_write(file);
8bc92afcf   Kent Overstreet   aio: Kill unneede...
1260
  		ret = rw_op(req, iovec, nr_segs, req->ki_pos);
73a7075e3   Kent Overstreet   aio: Kill aio_rw_...
1261
1262
1263
  
  		if (rw == WRITE)
  			file_end_write(file);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1264
  		break;
41ef4eb8e   Kent Overstreet   aio: kill ki_retry
1265

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1266
  	case IOCB_CMD_FDSYNC:
41ef4eb8e   Kent Overstreet   aio: kill ki_retry
1267
1268
1269
1270
  		if (!file->f_op->aio_fsync)
  			return -EINVAL;
  
  		ret = file->f_op->aio_fsync(req, 1);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1271
  		break;
41ef4eb8e   Kent Overstreet   aio: kill ki_retry
1272

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1273
  	case IOCB_CMD_FSYNC:
41ef4eb8e   Kent Overstreet   aio: kill ki_retry
1274
1275
1276
1277
  		if (!file->f_op->aio_fsync)
  			return -EINVAL;
  
  		ret = file->f_op->aio_fsync(req, 0);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1278
  		break;
41ef4eb8e   Kent Overstreet   aio: kill ki_retry
1279

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1280
  	default:
caf4167aa   Kent Overstreet   aio: dprintk() ->...
1281
1282
  		pr_debug("EINVAL: no operation provided
  ");
41ef4eb8e   Kent Overstreet   aio: kill ki_retry
1283
  		return -EINVAL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1284
  	}
8bc92afcf   Kent Overstreet   aio: Kill unneede...
1285
1286
  	if (iovec != &inline_vec)
  		kfree(iovec);
41ef4eb8e   Kent Overstreet   aio: kill ki_retry
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
  	if (ret != -EIOCBQUEUED) {
  		/*
  		 * There's no easy way to restart the syscall since other AIO's
  		 * may be already running. Just fail this IO with EINTR.
  		 */
  		if (unlikely(ret == -ERESTARTSYS || ret == -ERESTARTNOINTR ||
  			     ret == -ERESTARTNOHAND ||
  			     ret == -ERESTART_RESTARTBLOCK))
  			ret = -EINTR;
  		aio_complete(req, ret, 0);
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1298
1299
1300
  
  	return 0;
  }
d5470b596   Adrian Bunk   fs/aio.c: make 3 ...
1301
  static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
a1c8eae75   Kent Overstreet   aio: kill batch a...
1302
  			 struct iocb *iocb, bool compat)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1303
1304
  {
  	struct kiocb *req;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1305
1306
1307
  	ssize_t ret;
  
  	/* enforce forwards compatibility on users */
9c3060bed   Davide Libenzi   signal/timer/even...
1308
  	if (unlikely(iocb->aio_reserved1 || iocb->aio_reserved2)) {
caf4167aa   Kent Overstreet   aio: dprintk() ->...
1309
1310
  		pr_debug("EINVAL: reserve field set
  ");
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
  		return -EINVAL;
  	}
  
  	/* prevent overflows */
  	if (unlikely(
  	    (iocb->aio_buf != (unsigned long)iocb->aio_buf) ||
  	    (iocb->aio_nbytes != (size_t)iocb->aio_nbytes) ||
  	    ((ssize_t)iocb->aio_nbytes < 0)
  	   )) {
  		pr_debug("EINVAL: io_submit: overflow check
  ");
  		return -EINVAL;
  	}
41ef4eb8e   Kent Overstreet   aio: kill ki_retry
1324
  	req = aio_get_req(ctx);
1d98ebfcc   Kent Overstreet   aio: do fget() af...
1325
  	if (unlikely(!req))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1326
  		return -EAGAIN;
1d98ebfcc   Kent Overstreet   aio: do fget() af...
1327
1328
1329
1330
1331
  
  	req->ki_filp = fget(iocb->aio_fildes);
  	if (unlikely(!req->ki_filp)) {
  		ret = -EBADF;
  		goto out_put_req;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1332
  	}
1d98ebfcc   Kent Overstreet   aio: do fget() af...
1333

9c3060bed   Davide Libenzi   signal/timer/even...
1334
1335
1336
1337
1338
1339
1340
  	if (iocb->aio_flags & IOCB_FLAG_RESFD) {
  		/*
  		 * If the IOCB_FLAG_RESFD flag of aio_flags is set, get an
  		 * instance of the file* now. The file descriptor must be
  		 * an eventfd() fd, and will be signaled for each completed
  		 * event using the eventfd_signal() function.
  		 */
133890103   Davide Libenzi   eventfd: revised ...
1341
  		req->ki_eventfd = eventfd_ctx_fdget((int) iocb->aio_resfd);
801678c5a   Hirofumi Nakagawa   Remove duplicated...
1342
  		if (IS_ERR(req->ki_eventfd)) {
9c3060bed   Davide Libenzi   signal/timer/even...
1343
  			ret = PTR_ERR(req->ki_eventfd);
87c3a86e1   Davide Libenzi   eventfd: remove f...
1344
  			req->ki_eventfd = NULL;
9c3060bed   Davide Libenzi   signal/timer/even...
1345
1346
1347
  			goto out_put_req;
  		}
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1348

8a6608907   Kent Overstreet   aio: kill ki_key
1349
  	ret = put_user(KIOCB_KEY, &user_iocb->aio_key);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1350
  	if (unlikely(ret)) {
caf4167aa   Kent Overstreet   aio: dprintk() ->...
1351
1352
  		pr_debug("EFAULT: aio_key
  ");
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1353
1354
1355
1356
1357
1358
  		goto out_put_req;
  	}
  
  	req->ki_obj.user = user_iocb;
  	req->ki_user_data = iocb->aio_data;
  	req->ki_pos = iocb->aio_offset;
73a7075e3   Kent Overstreet   aio: Kill aio_rw_...
1359
  	req->ki_nbytes = iocb->aio_nbytes;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1360

8bc92afcf   Kent Overstreet   aio: Kill unneede...
1361
1362
1363
  	ret = aio_run_iocb(req, iocb->aio_lio_opcode,
  			   (char __user *)(unsigned long)iocb->aio_buf,
  			   compat);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1364
1365
  	if (ret)
  		goto out_put_req;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1366
  	return 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1367
  out_put_req:
e1bdd5f27   Kent Overstreet   aio: percpu reqs_...
1368
  	put_reqs_available(ctx, 1);
e34ecee2a   Kent Overstreet   aio: Fix a trinit...
1369
  	percpu_ref_put(&ctx->reqs);
57282d8fd   Kent Overstreet   aio: Kill ki_users
1370
  	kiocb_free(req);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1371
1372
  	return ret;
  }
9d85cba71   Jeff Moyer   aio: fix the comp...
1373
1374
  long do_io_submit(aio_context_t ctx_id, long nr,
  		  struct iocb __user *__user *iocbpp, bool compat)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1375
1376
1377
  {
  	struct kioctx *ctx;
  	long ret = 0;
080d676de   Jeff Moyer   aio: allocate kio...
1378
  	int i = 0;
9f5b94254   Shaohua Li   fs: make aio plug
1379
  	struct blk_plug plug;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1380
1381
1382
  
  	if (unlikely(nr < 0))
  		return -EINVAL;
75e1c70fc   Jeff Moyer   aio: check for mu...
1383
1384
  	if (unlikely(nr > LONG_MAX/sizeof(*iocbpp)))
  		nr = LONG_MAX/sizeof(*iocbpp);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1385
1386
1387
1388
1389
  	if (unlikely(!access_ok(VERIFY_READ, iocbpp, (nr*sizeof(*iocbpp)))))
  		return -EFAULT;
  
  	ctx = lookup_ioctx(ctx_id);
  	if (unlikely(!ctx)) {
caf4167aa   Kent Overstreet   aio: dprintk() ->...
1390
1391
  		pr_debug("EINVAL: invalid context id
  ");
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1392
1393
  		return -EINVAL;
  	}
9f5b94254   Shaohua Li   fs: make aio plug
1394
  	blk_start_plug(&plug);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
  	/*
  	 * AKPM: should this return a partial result if some of the IOs were
  	 * successfully submitted?
  	 */
  	for (i=0; i<nr; i++) {
  		struct iocb __user *user_iocb;
  		struct iocb tmp;
  
  		if (unlikely(__get_user(user_iocb, iocbpp + i))) {
  			ret = -EFAULT;
  			break;
  		}
  
  		if (unlikely(copy_from_user(&tmp, user_iocb, sizeof(tmp)))) {
  			ret = -EFAULT;
  			break;
  		}
a1c8eae75   Kent Overstreet   aio: kill batch a...
1412
  		ret = io_submit_one(ctx, user_iocb, &tmp, compat);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1413
1414
1415
  		if (ret)
  			break;
  	}
9f5b94254   Shaohua Li   fs: make aio plug
1416
  	blk_finish_plug(&plug);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1417

723be6e39   Kent Overstreet   aio: percpu ioctx...
1418
  	percpu_ref_put(&ctx->users);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1419
1420
  	return i ? i : ret;
  }
9d85cba71   Jeff Moyer   aio: fix the comp...
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
  /* sys_io_submit:
   *	Queue the nr iocbs pointed to by iocbpp for processing.  Returns
   *	the number of iocbs queued.  May return -EINVAL if the aio_context
   *	specified by ctx_id is invalid, if nr is < 0, if the iocb at
   *	*iocbpp[0] is not properly initialized, if the operation specified
   *	is invalid for the file descriptor in the iocb.  May fail with
   *	-EFAULT if any of the data structures point to invalid data.  May
   *	fail with -EBADF if the file descriptor specified in the first
   *	iocb is invalid.  May fail with -EAGAIN if insufficient resources
   *	are available to queue any iocbs.  Will return 0 if nr is 0.  Will
   *	fail with -ENOSYS if not implemented.
   */
  SYSCALL_DEFINE3(io_submit, aio_context_t, ctx_id, long, nr,
  		struct iocb __user * __user *, iocbpp)
  {
  	return do_io_submit(ctx_id, nr, iocbpp, 0);
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1438
1439
  /* lookup_kiocb
   *	Finds a given iocb for cancellation.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1440
   */
25ee7e383   Adrian Bunk   [PATCH] fs/aio.c:...
1441
1442
  static struct kiocb *lookup_kiocb(struct kioctx *ctx, struct iocb __user *iocb,
  				  u32 key)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1443
1444
  {
  	struct list_head *pos;
d00689af6   Zach Brown   [PATCH] aio: repl...
1445
1446
  
  	assert_spin_locked(&ctx->ctx_lock);
8a6608907   Kent Overstreet   aio: kill ki_key
1447
1448
  	if (key != KIOCB_KEY)
  		return NULL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1449
1450
1451
  	/* TODO: use a hash or array, this sucks. */
  	list_for_each(pos, &ctx->active_reqs) {
  		struct kiocb *kiocb = list_kiocb(pos);
8a6608907   Kent Overstreet   aio: kill ki_key
1452
  		if (kiocb->ki_obj.user == iocb)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
  			return kiocb;
  	}
  	return NULL;
  }
  
  /* sys_io_cancel:
   *	Attempts to cancel an iocb previously passed to io_submit.  If
   *	the operation is successfully cancelled, the resulting event is
   *	copied into the memory pointed to by result without being placed
   *	into the completion queue and 0 is returned.  May fail with
   *	-EFAULT if any of the data structures pointed to are invalid.
   *	May fail with -EINVAL if aio_context specified by ctx_id is
   *	invalid.  May fail with -EAGAIN if the iocb specified was not
   *	cancelled.  Will fail with -ENOSYS if not implemented.
   */
002c8976e   Heiko Carstens   [CVE-2009-0029] S...
1468
1469
  SYSCALL_DEFINE3(io_cancel, aio_context_t, ctx_id, struct iocb __user *, iocb,
  		struct io_event __user *, result)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1470
  {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
  	struct kioctx *ctx;
  	struct kiocb *kiocb;
  	u32 key;
  	int ret;
  
  	ret = get_user(key, &iocb->aio_key);
  	if (unlikely(ret))
  		return -EFAULT;
  
  	ctx = lookup_ioctx(ctx_id);
  	if (unlikely(!ctx))
  		return -EINVAL;
  
  	spin_lock_irq(&ctx->ctx_lock);
906b973cf   Kent Overstreet   aio: add kiocb_ca...
1485

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1486
  	kiocb = lookup_kiocb(ctx, iocb, key);
906b973cf   Kent Overstreet   aio: add kiocb_ca...
1487
  	if (kiocb)
bec68faaf   Kent Overstreet   aio: io_cancel() ...
1488
  		ret = kiocb_cancel(ctx, kiocb);
906b973cf   Kent Overstreet   aio: add kiocb_ca...
1489
1490
  	else
  		ret = -EINVAL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1491
  	spin_unlock_irq(&ctx->ctx_lock);
906b973cf   Kent Overstreet   aio: add kiocb_ca...
1492
  	if (!ret) {
bec68faaf   Kent Overstreet   aio: io_cancel() ...
1493
1494
1495
1496
  		/*
  		 * The result argument is no longer used - the io_event is
  		 * always delivered via the ring buffer. -EINPROGRESS indicates
  		 * cancellation is progress:
906b973cf   Kent Overstreet   aio: add kiocb_ca...
1497
  		 */
bec68faaf   Kent Overstreet   aio: io_cancel() ...
1498
  		ret = -EINPROGRESS;
906b973cf   Kent Overstreet   aio: add kiocb_ca...
1499
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1500

723be6e39   Kent Overstreet   aio: percpu ioctx...
1501
  	percpu_ref_put(&ctx->users);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1502
1503
1504
1505
1506
1507
  
  	return ret;
  }
  
  /* io_getevents:
   *	Attempts to read at least min_nr events and up to nr events from
642b5123a   Satoru Takeuchi   aio: fix wrong su...
1508
1509
1510
1511
1512
1513
1514
1515
   *	the completion queue for the aio_context specified by ctx_id. If
   *	it succeeds, the number of read events is returned. May fail with
   *	-EINVAL if ctx_id is invalid, if min_nr is out of range, if nr is
   *	out of range, if timeout is out of range.  May fail with -EFAULT
   *	if any of the memory specified is invalid.  May return 0 or
   *	< min_nr if the timeout specified by timeout has elapsed
   *	before sufficient events are available, where timeout == NULL
   *	specifies an infinite timeout. Note that the timeout pointed to by
6900807c6   Jeff Moyer   aio: fix io_getev...
1516
   *	timeout is relative.  Will fail with -ENOSYS if not implemented.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1517
   */
002c8976e   Heiko Carstens   [CVE-2009-0029] S...
1518
1519
1520
1521
1522
  SYSCALL_DEFINE5(io_getevents, aio_context_t, ctx_id,
  		long, min_nr,
  		long, nr,
  		struct io_event __user *, events,
  		struct timespec __user *, timeout)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1523
1524
1525
1526
1527
  {
  	struct kioctx *ioctx = lookup_ioctx(ctx_id);
  	long ret = -EINVAL;
  
  	if (likely(ioctx)) {
2e4102559   Namhyung Kim   aio: remove unnec...
1528
  		if (likely(min_nr <= nr && min_nr >= 0))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1529
  			ret = read_events(ioctx, min_nr, nr, events, timeout);
723be6e39   Kent Overstreet   aio: percpu ioctx...
1530
  		percpu_ref_put(&ioctx->users);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1531
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1532
1533
  	return ret;
  }