Blame view

fs/aio.c 47.2 KB
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1
2
3
4
5
6
7
8
9
10
  /*
   *	An async IO implementation for Linux
   *	Written by Benjamin LaHaise <bcrl@kvack.org>
   *
   *	Implements an efficient asynchronous io interface.
   *
   *	Copyright 2000, 2001, 2002 Red Hat, Inc.  All Rights Reserved.
   *
   *	See ../COPYING for licensing terms.
   */
caf4167aa   Kent Overstreet   aio: dprintk() ->...
11
  #define pr_fmt(fmt) "%s: " fmt, __func__
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
12
13
14
15
16
  #include <linux/kernel.h>
  #include <linux/init.h>
  #include <linux/errno.h>
  #include <linux/time.h>
  #include <linux/aio_abi.h>
630d9c472   Paul Gortmaker   fs: reduce the us...
17
  #include <linux/export.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
18
  #include <linux/syscalls.h>
b9d128f10   Jens Axboe   block: move bdi/a...
19
  #include <linux/backing-dev.h>
027445c37   Badari Pulavarty   [PATCH] Vectorize...
20
  #include <linux/uio.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
21

174cd4b1e   Ingo Molnar   sched/headers: Pr...
22
  #include <linux/sched/signal.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
23
24
25
26
  #include <linux/fs.h>
  #include <linux/file.h>
  #include <linux/mm.h>
  #include <linux/mman.h>
3d2d827f5   Michael S. Tsirkin   mm: move use_mm/u...
27
  #include <linux/mmu_context.h>
e1bdd5f27   Kent Overstreet   aio: percpu reqs_...
28
  #include <linux/percpu.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
29
30
31
32
33
34
  #include <linux/slab.h>
  #include <linux/timer.h>
  #include <linux/aio.h>
  #include <linux/highmem.h>
  #include <linux/workqueue.h>
  #include <linux/security.h>
9c3060bed   Davide Libenzi   signal/timer/even...
35
  #include <linux/eventfd.h>
cfb1e33ee   Jeff Moyer   aio: implement re...
36
  #include <linux/blkdev.h>
9d85cba71   Jeff Moyer   aio: fix the comp...
37
  #include <linux/compat.h>
36bc08cc0   Gu Zheng   fs/aio: Add suppo...
38
39
  #include <linux/migrate.h>
  #include <linux/ramfs.h>
723be6e39   Kent Overstreet   aio: percpu ioctx...
40
  #include <linux/percpu-refcount.h>
71ad7490c   Benjamin LaHaise   rework aio migrat...
41
  #include <linux/mount.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
42
43
  
  #include <asm/kmap_types.h>
7c0f6ba68   Linus Torvalds   Replace <asm/uacc...
44
  #include <linux/uaccess.h>
5f4610fe2   Jeff Moyer   aio: fix spectre ...
45
  #include <linux/nospec.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
46

68d70d03f   Al Viro   constify rw_verif...
47
  #include "internal.h"
4e179bca6   Kent Overstreet   aio: move private...
48
49
50
51
52
53
  #define AIO_RING_MAGIC			0xa10a10a1
  #define AIO_RING_COMPAT_FEATURES	1
  #define AIO_RING_INCOMPAT_FEATURES	0
  struct aio_ring {
  	unsigned	id;	/* kernel internal index number */
  	unsigned	nr;	/* number of io_events */
fa8a53c39   Benjamin LaHaise   aio: v4 ensure ac...
54
55
  	unsigned	head;	/* Written to by userland or under ring_lock
  				 * mutex by aio_read_events_ring(). */
4e179bca6   Kent Overstreet   aio: move private...
56
57
58
59
60
61
62
63
64
65
66
67
  	unsigned	tail;
  
  	unsigned	magic;
  	unsigned	compat_features;
  	unsigned	incompat_features;
  	unsigned	header_length;	/* size of aio_ring */
  
  
  	struct io_event		io_events[0];
  }; /* 128 bytes + ring size */
  
  #define AIO_RING_PAGES	8
4e179bca6   Kent Overstreet   aio: move private...
68

db446a08c   Benjamin LaHaise   aio: convert the ...
69
  struct kioctx_table {
cd21b3400   Tejun Heo   fs/aio: Use RCU a...
70
71
72
  	struct rcu_head		rcu;
  	unsigned		nr;
  	struct kioctx __rcu	*table[];
db446a08c   Benjamin LaHaise   aio: convert the ...
73
  };
e1bdd5f27   Kent Overstreet   aio: percpu reqs_...
74
75
76
  struct kioctx_cpu {
  	unsigned		reqs_available;
  };
dc48e56d7   Jens Axboe   aio: fix serial d...
77
78
79
80
  struct ctx_rq_wait {
  	struct completion comp;
  	atomic_t count;
  };
4e179bca6   Kent Overstreet   aio: move private...
81
  struct kioctx {
723be6e39   Kent Overstreet   aio: percpu ioctx...
82
  	struct percpu_ref	users;
36f558890   Kent Overstreet   aio: refcounting ...
83
  	atomic_t		dead;
4e179bca6   Kent Overstreet   aio: move private...
84

e34ecee2a   Kent Overstreet   aio: Fix a trinit...
85
  	struct percpu_ref	reqs;
4e179bca6   Kent Overstreet   aio: move private...
86
  	unsigned long		user_id;
4e179bca6   Kent Overstreet   aio: move private...
87

e1bdd5f27   Kent Overstreet   aio: percpu reqs_...
88
89
90
91
92
93
94
  	struct __percpu kioctx_cpu *cpu;
  
  	/*
  	 * For percpu reqs_available, number of slots we move to/from global
  	 * counter at a time:
  	 */
  	unsigned		req_batch;
3e845ce01   Kent Overstreet   aio: change reqs_...
95
96
97
98
  	/*
  	 * This is what userspace passed to io_setup(), it's not used for
  	 * anything but counting against the global max_reqs quota.
  	 *
58c85dc20   Kent Overstreet   aio: kill struct ...
99
  	 * The real limit is nr_events - 1, which will be larger (see
3e845ce01   Kent Overstreet   aio: change reqs_...
100
101
  	 * aio_setup_ring())
  	 */
4e179bca6   Kent Overstreet   aio: move private...
102
  	unsigned		max_reqs;
58c85dc20   Kent Overstreet   aio: kill struct ...
103
104
  	/* Size of ringbuffer, in units of struct io_event */
  	unsigned		nr_events;
4e179bca6   Kent Overstreet   aio: move private...
105

58c85dc20   Kent Overstreet   aio: kill struct ...
106
107
108
109
110
  	unsigned long		mmap_base;
  	unsigned long		mmap_size;
  
  	struct page		**ring_pages;
  	long			nr_pages;
076c7c068   Tejun Heo   fs/aio: Add expli...
111
112
  	struct rcu_head		free_rcu;
  	struct work_struct	free_work;	/* see free_ioctx() */
4e23bcaeb   Kent Overstreet   aio: give shared ...
113

e02ba72aa   Anatol Pomozov   aio: block io_des...
114
115
116
  	/*
  	 * signals when all in-flight requests are done
  	 */
dc48e56d7   Jens Axboe   aio: fix serial d...
117
  	struct ctx_rq_wait	*rq_wait;
e02ba72aa   Anatol Pomozov   aio: block io_des...
118

4e23bcaeb   Kent Overstreet   aio: give shared ...
119
  	struct {
34e83fc61   Kent Overstreet   aio: reqs_active ...
120
121
122
123
124
  		/*
  		 * This counts the number of available slots in the ringbuffer,
  		 * so we avoid overflowing it: it's decremented (if positive)
  		 * when allocating a kiocb and incremented when the resulting
  		 * io_event is pulled off the ringbuffer.
e1bdd5f27   Kent Overstreet   aio: percpu reqs_...
125
126
  		 *
  		 * We batch accesses to it with a percpu version.
34e83fc61   Kent Overstreet   aio: reqs_active ...
127
128
  		 */
  		atomic_t	reqs_available;
4e23bcaeb   Kent Overstreet   aio: give shared ...
129
130
131
132
133
134
  	} ____cacheline_aligned_in_smp;
  
  	struct {
  		spinlock_t	ctx_lock;
  		struct list_head active_reqs;	/* used for cancellation */
  	} ____cacheline_aligned_in_smp;
58c85dc20   Kent Overstreet   aio: kill struct ...
135
136
  	struct {
  		struct mutex	ring_lock;
4e23bcaeb   Kent Overstreet   aio: give shared ...
137
138
  		wait_queue_head_t wait;
  	} ____cacheline_aligned_in_smp;
58c85dc20   Kent Overstreet   aio: kill struct ...
139
140
141
  
  	struct {
  		unsigned	tail;
d856f32a8   Benjamin LaHaise   aio: fix reqs_ava...
142
  		unsigned	completed_events;
58c85dc20   Kent Overstreet   aio: kill struct ...
143
  		spinlock_t	completion_lock;
4e23bcaeb   Kent Overstreet   aio: give shared ...
144
  	} ____cacheline_aligned_in_smp;
58c85dc20   Kent Overstreet   aio: kill struct ...
145
146
  
  	struct page		*internal_pages[AIO_RING_PAGES];
36bc08cc0   Gu Zheng   fs/aio: Add suppo...
147
  	struct file		*aio_ring_file;
db446a08c   Benjamin LaHaise   aio: convert the ...
148
149
  
  	unsigned		id;
4e179bca6   Kent Overstreet   aio: move private...
150
  };
04b2fa9f8   Christoph Hellwig   fs: split generic...
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
  /*
   * We use ki_cancel == KIOCB_CANCELLED to indicate that a kiocb has been either
   * cancelled or completed (this makes a certain amount of sense because
   * successful cancellation - io_cancel() - does deliver the completion to
   * userspace).
   *
   * And since most things don't implement kiocb cancellation and we'd really like
   * kiocb completion to be lockless when possible, we use ki_cancel to
   * synchronize cancellation and completion - we only set it to KIOCB_CANCELLED
   * with xchg() or cmpxchg(), see batch_complete_aio() and kiocb_cancel().
   */
  #define KIOCB_CANCELLED		((void *) (~0ULL))
  
  struct aio_kiocb {
  	struct kiocb		common;
  
  	struct kioctx		*ki_ctx;
  	kiocb_cancel_fn		*ki_cancel;
  
  	struct iocb __user	*ki_user_iocb;	/* user's aiocb */
  	__u64			ki_user_data;	/* user's data for completion */
  
  	struct list_head	ki_list;	/* the aio core uses this
  						 * for cancellation */
  
  	/*
  	 * If the aio_resfd field of the userspace iocb is not zero,
  	 * this is the underlying eventfd context to deliver events to.
  	 */
  	struct eventfd_ctx	*ki_eventfd;
  };
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
182
  /*------ sysctl variables----*/
d55b5fdaf   Zach Brown   [PATCH] aio: remo...
183
184
185
  static DEFINE_SPINLOCK(aio_nr_lock);
  unsigned long aio_nr;		/* current system wide number of aio requests */
  unsigned long aio_max_nr = 0x10000; /* system wide maximum number of aio requests */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
186
  /*----end sysctl variables---*/
e18b890bb   Christoph Lameter   [PATCH] slab: rem...
187
188
  static struct kmem_cache	*kiocb_cachep;
  static struct kmem_cache	*kioctx_cachep;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
189

71ad7490c   Benjamin LaHaise   rework aio migrat...
190
191
192
193
194
195
196
197
198
199
200
  static struct vfsmount *aio_mnt;
  
  static const struct file_operations aio_ring_fops;
  static const struct address_space_operations aio_ctx_aops;
  
  static struct file *aio_private_file(struct kioctx *ctx, loff_t nr_pages)
  {
  	struct qstr this = QSTR_INIT("[aio]", 5);
  	struct file *file;
  	struct path path;
  	struct inode *inode = alloc_anon_inode(aio_mnt->mnt_sb);
7f62656be   Dan Carpenter   aio: checking for...
201
202
  	if (IS_ERR(inode))
  		return ERR_CAST(inode);
71ad7490c   Benjamin LaHaise   rework aio migrat...
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
  
  	inode->i_mapping->a_ops = &aio_ctx_aops;
  	inode->i_mapping->private_data = ctx;
  	inode->i_size = PAGE_SIZE * nr_pages;
  
  	path.dentry = d_alloc_pseudo(aio_mnt->mnt_sb, &this);
  	if (!path.dentry) {
  		iput(inode);
  		return ERR_PTR(-ENOMEM);
  	}
  	path.mnt = mntget(aio_mnt);
  
  	d_instantiate(path.dentry, inode);
  	file = alloc_file(&path, FMODE_READ | FMODE_WRITE, &aio_ring_fops);
  	if (IS_ERR(file)) {
  		path_put(&path);
  		return file;
  	}
  
  	file->f_flags = O_RDWR;
71ad7490c   Benjamin LaHaise   rework aio migrat...
223
224
225
226
227
228
229
230
231
  	return file;
  }
  
  static struct dentry *aio_mount(struct file_system_type *fs_type,
  				int flags, const char *dev_name, void *data)
  {
  	static const struct dentry_operations ops = {
  		.d_dname	= simple_dname,
  	};
22f6b4d34   Jann Horn   aio: mark AIO pse...
232
233
234
235
236
237
  	struct dentry *root = mount_pseudo(fs_type, "aio:", NULL, &ops,
  					   AIO_RING_MAGIC);
  
  	if (!IS_ERR(root))
  		root->d_sb->s_iflags |= SB_I_NOEXEC;
  	return root;
71ad7490c   Benjamin LaHaise   rework aio migrat...
238
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
239
240
241
242
243
244
  /* aio_setup
   *	Creates the slab caches used by the aio routines, panic on
   *	failure as this is done early during the boot sequence.
   */
  static int __init aio_setup(void)
  {
71ad7490c   Benjamin LaHaise   rework aio migrat...
245
246
247
248
249
250
251
252
  	static struct file_system_type aio_fs = {
  		.name		= "aio",
  		.mount		= aio_mount,
  		.kill_sb	= kill_anon_super,
  	};
  	aio_mnt = kern_mount(&aio_fs);
  	if (IS_ERR(aio_mnt))
  		panic("Failed to create aio fs mount.");
04b2fa9f8   Christoph Hellwig   fs: split generic...
253
  	kiocb_cachep = KMEM_CACHE(aio_kiocb, SLAB_HWCACHE_ALIGN|SLAB_PANIC);
0a31bd5f2   Christoph Lameter   KMEM_CACHE(): sim...
254
  	kioctx_cachep = KMEM_CACHE(kioctx,SLAB_HWCACHE_ALIGN|SLAB_PANIC);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
255

caf4167aa   Kent Overstreet   aio: dprintk() ->...
256
257
  	pr_debug("sizeof(struct page) = %zu
  ", sizeof(struct page));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
258
259
260
  
  	return 0;
  }
385773e04   H Hartley Sweeten   aio.c: move EXPOR...
261
  __initcall(aio_setup);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
262

5e9ae2e5d   Benjamin LaHaise   aio: fix use-afte...
263
264
265
  static void put_aio_ring_file(struct kioctx *ctx)
  {
  	struct file *aio_ring_file = ctx->aio_ring_file;
de04e7693   Rasmus Villemoes   fs/aio.c: elimina...
266
  	struct address_space *i_mapping;
5e9ae2e5d   Benjamin LaHaise   aio: fix use-afte...
267
  	if (aio_ring_file) {
450630975   Al Viro   don't open-code f...
268
  		truncate_setsize(file_inode(aio_ring_file), 0);
5e9ae2e5d   Benjamin LaHaise   aio: fix use-afte...
269
270
  
  		/* Prevent further access to the kioctx from migratepages */
450630975   Al Viro   don't open-code f...
271
  		i_mapping = aio_ring_file->f_mapping;
de04e7693   Rasmus Villemoes   fs/aio.c: elimina...
272
273
  		spin_lock(&i_mapping->private_lock);
  		i_mapping->private_data = NULL;
5e9ae2e5d   Benjamin LaHaise   aio: fix use-afte...
274
  		ctx->aio_ring_file = NULL;
de04e7693   Rasmus Villemoes   fs/aio.c: elimina...
275
  		spin_unlock(&i_mapping->private_lock);
5e9ae2e5d   Benjamin LaHaise   aio: fix use-afte...
276
277
278
279
  
  		fput(aio_ring_file);
  	}
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
280
281
  static void aio_free_ring(struct kioctx *ctx)
  {
36bc08cc0   Gu Zheng   fs/aio: Add suppo...
282
  	int i;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
283

fa8a53c39   Benjamin LaHaise   aio: v4 ensure ac...
284
285
286
287
  	/* Disconnect the kiotx from the ring file.  This prevents future
  	 * accesses to the kioctx from page migration.
  	 */
  	put_aio_ring_file(ctx);
36bc08cc0   Gu Zheng   fs/aio: Add suppo...
288
  	for (i = 0; i < ctx->nr_pages; i++) {
8e321fefb   Benjamin LaHaise   aio/migratepages:...
289
  		struct page *page;
36bc08cc0   Gu Zheng   fs/aio: Add suppo...
290
291
292
  		pr_debug("pid(%d) [%d] page->count=%d
  ", current->pid, i,
  				page_count(ctx->ring_pages[i]));
8e321fefb   Benjamin LaHaise   aio/migratepages:...
293
294
295
296
297
  		page = ctx->ring_pages[i];
  		if (!page)
  			continue;
  		ctx->ring_pages[i] = NULL;
  		put_page(page);
36bc08cc0   Gu Zheng   fs/aio: Add suppo...
298
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
299

ddb8c45ba   Sasha Levin   aio: nullify aio-...
300
  	if (ctx->ring_pages && ctx->ring_pages != ctx->internal_pages) {
58c85dc20   Kent Overstreet   aio: kill struct ...
301
  		kfree(ctx->ring_pages);
ddb8c45ba   Sasha Levin   aio: nullify aio-...
302
303
  		ctx->ring_pages = NULL;
  	}
36bc08cc0   Gu Zheng   fs/aio: Add suppo...
304
  }
5477e70a6   Oleg Nesterov   mm: move ->mremap...
305
  static int aio_ring_mremap(struct vm_area_struct *vma)
e4a0d3e72   Pavel Emelyanov   aio: Make it poss...
306
  {
5477e70a6   Oleg Nesterov   mm: move ->mremap...
307
  	struct file *file = vma->vm_file;
e4a0d3e72   Pavel Emelyanov   aio: Make it poss...
308
309
  	struct mm_struct *mm = vma->vm_mm;
  	struct kioctx_table *table;
b2edffdd9   Al Viro   fix mremap() vs. ...
310
  	int i, res = -EINVAL;
e4a0d3e72   Pavel Emelyanov   aio: Make it poss...
311
312
313
314
315
316
  
  	spin_lock(&mm->ioctx_lock);
  	rcu_read_lock();
  	table = rcu_dereference(mm->ioctx_table);
  	for (i = 0; i < table->nr; i++) {
  		struct kioctx *ctx;
cd21b3400   Tejun Heo   fs/aio: Use RCU a...
317
  		ctx = rcu_dereference(table->table[i]);
e4a0d3e72   Pavel Emelyanov   aio: Make it poss...
318
  		if (ctx && ctx->aio_ring_file == file) {
b2edffdd9   Al Viro   fix mremap() vs. ...
319
320
321
322
  			if (!atomic_read(&ctx->dead)) {
  				ctx->user_id = ctx->mmap_base = vma->vm_start;
  				res = 0;
  			}
e4a0d3e72   Pavel Emelyanov   aio: Make it poss...
323
324
325
326
327
328
  			break;
  		}
  	}
  
  	rcu_read_unlock();
  	spin_unlock(&mm->ioctx_lock);
b2edffdd9   Al Viro   fix mremap() vs. ...
329
  	return res;
e4a0d3e72   Pavel Emelyanov   aio: Make it poss...
330
  }
5477e70a6   Oleg Nesterov   mm: move ->mremap...
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
  static const struct vm_operations_struct aio_ring_vm_ops = {
  	.mremap		= aio_ring_mremap,
  #if IS_ENABLED(CONFIG_MMU)
  	.fault		= filemap_fault,
  	.map_pages	= filemap_map_pages,
  	.page_mkwrite	= filemap_page_mkwrite,
  #endif
  };
  
  static int aio_ring_mmap(struct file *file, struct vm_area_struct *vma)
  {
  	vma->vm_flags |= VM_DONTEXPAND;
  	vma->vm_ops = &aio_ring_vm_ops;
  	return 0;
  }
36bc08cc0   Gu Zheng   fs/aio: Add suppo...
346
347
348
  static const struct file_operations aio_ring_fops = {
  	.mmap = aio_ring_mmap,
  };
0c45355fc   Benjamin LaHaise   aio: fix build wh...
349
  #if IS_ENABLED(CONFIG_MIGRATION)
36bc08cc0   Gu Zheng   fs/aio: Add suppo...
350
351
352
  static int aio_migratepage(struct address_space *mapping, struct page *new,
  			struct page *old, enum migrate_mode mode)
  {
5e9ae2e5d   Benjamin LaHaise   aio: fix use-afte...
353
  	struct kioctx *ctx;
36bc08cc0   Gu Zheng   fs/aio: Add suppo...
354
  	unsigned long flags;
fa8a53c39   Benjamin LaHaise   aio: v4 ensure ac...
355
  	pgoff_t idx;
36bc08cc0   Gu Zheng   fs/aio: Add suppo...
356
  	int rc;
2916ecc0f   Jérôme Glisse   mm/migrate: new m...
357
358
359
360
361
362
363
  	/*
  	 * We cannot support the _NO_COPY case here, because copy needs to
  	 * happen under the ctx->completion_lock. That does not work with the
  	 * migration workflow of MIGRATE_SYNC_NO_COPY.
  	 */
  	if (mode == MIGRATE_SYNC_NO_COPY)
  		return -EINVAL;
8e321fefb   Benjamin LaHaise   aio/migratepages:...
364
  	rc = 0;
fa8a53c39   Benjamin LaHaise   aio: v4 ensure ac...
365
  	/* mapping->private_lock here protects against the kioctx teardown.  */
8e321fefb   Benjamin LaHaise   aio/migratepages:...
366
367
  	spin_lock(&mapping->private_lock);
  	ctx = mapping->private_data;
fa8a53c39   Benjamin LaHaise   aio: v4 ensure ac...
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
  	if (!ctx) {
  		rc = -EINVAL;
  		goto out;
  	}
  
  	/* The ring_lock mutex.  The prevents aio_read_events() from writing
  	 * to the ring's head, and prevents page migration from mucking in
  	 * a partially initialized kiotx.
  	 */
  	if (!mutex_trylock(&ctx->ring_lock)) {
  		rc = -EAGAIN;
  		goto out;
  	}
  
  	idx = old->index;
  	if (idx < (pgoff_t)ctx->nr_pages) {
  		/* Make sure the old page hasn't already been changed */
  		if (ctx->ring_pages[idx] != old)
  			rc = -EAGAIN;
8e321fefb   Benjamin LaHaise   aio/migratepages:...
387
388
  	} else
  		rc = -EINVAL;
8e321fefb   Benjamin LaHaise   aio/migratepages:...
389
390
  
  	if (rc != 0)
fa8a53c39   Benjamin LaHaise   aio: v4 ensure ac...
391
  		goto out_unlock;
8e321fefb   Benjamin LaHaise   aio/migratepages:...
392

36bc08cc0   Gu Zheng   fs/aio: Add suppo...
393
394
  	/* Writeback must be complete */
  	BUG_ON(PageWriteback(old));
8e321fefb   Benjamin LaHaise   aio/migratepages:...
395
  	get_page(new);
36bc08cc0   Gu Zheng   fs/aio: Add suppo...
396

8e321fefb   Benjamin LaHaise   aio/migratepages:...
397
  	rc = migrate_page_move_mapping(mapping, new, old, NULL, mode, 1);
36bc08cc0   Gu Zheng   fs/aio: Add suppo...
398
  	if (rc != MIGRATEPAGE_SUCCESS) {
8e321fefb   Benjamin LaHaise   aio/migratepages:...
399
  		put_page(new);
fa8a53c39   Benjamin LaHaise   aio: v4 ensure ac...
400
  		goto out_unlock;
36bc08cc0   Gu Zheng   fs/aio: Add suppo...
401
  	}
fa8a53c39   Benjamin LaHaise   aio: v4 ensure ac...
402
403
404
  	/* Take completion_lock to prevent other writes to the ring buffer
  	 * while the old page is copied to the new.  This prevents new
  	 * events from being lost.
5e9ae2e5d   Benjamin LaHaise   aio: fix use-afte...
405
  	 */
fa8a53c39   Benjamin LaHaise   aio: v4 ensure ac...
406
407
408
409
410
  	spin_lock_irqsave(&ctx->completion_lock, flags);
  	migrate_page_copy(new, old);
  	BUG_ON(ctx->ring_pages[idx] != old);
  	ctx->ring_pages[idx] = new;
  	spin_unlock_irqrestore(&ctx->completion_lock, flags);
36bc08cc0   Gu Zheng   fs/aio: Add suppo...
411

fa8a53c39   Benjamin LaHaise   aio: v4 ensure ac...
412
413
  	/* The old page is no longer accessible. */
  	put_page(old);
8e321fefb   Benjamin LaHaise   aio/migratepages:...
414

fa8a53c39   Benjamin LaHaise   aio: v4 ensure ac...
415
416
417
418
  out_unlock:
  	mutex_unlock(&ctx->ring_lock);
  out:
  	spin_unlock(&mapping->private_lock);
36bc08cc0   Gu Zheng   fs/aio: Add suppo...
419
  	return rc;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
420
  }
0c45355fc   Benjamin LaHaise   aio: fix build wh...
421
  #endif
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
422

36bc08cc0   Gu Zheng   fs/aio: Add suppo...
423
  static const struct address_space_operations aio_ctx_aops = {
835f252c6   Gu Zheng   aio: fix uncorren...
424
  	.set_page_dirty = __set_page_dirty_no_writeback,
0c45355fc   Benjamin LaHaise   aio: fix build wh...
425
  #if IS_ENABLED(CONFIG_MIGRATION)
36bc08cc0   Gu Zheng   fs/aio: Add suppo...
426
  	.migratepage	= aio_migratepage,
0c45355fc   Benjamin LaHaise   aio: fix build wh...
427
  #endif
36bc08cc0   Gu Zheng   fs/aio: Add suppo...
428
  };
2a8a98673   Mauricio Faria de Oliveira   fs: aio: fix the ...
429
  static int aio_setup_ring(struct kioctx *ctx, unsigned int nr_events)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
430
431
  {
  	struct aio_ring *ring;
41003a7bc   Zach Brown   aio: remove retry...
432
  	struct mm_struct *mm = current->mm;
3dc9acb67   Linus Torvalds   aio: clean up and...
433
  	unsigned long size, unused;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
434
  	int nr_pages;
36bc08cc0   Gu Zheng   fs/aio: Add suppo...
435
436
  	int i;
  	struct file *file;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
437
438
439
440
441
442
  
  	/* Compensate for the ring buffer's head/tail overlap entry */
  	nr_events += 2;	/* 1 is required, 2 for good luck */
  
  	size = sizeof(struct aio_ring);
  	size += sizeof(struct io_event) * nr_events;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
443

36bc08cc0   Gu Zheng   fs/aio: Add suppo...
444
  	nr_pages = PFN_UP(size);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
445
446
  	if (nr_pages < 0)
  		return -EINVAL;
71ad7490c   Benjamin LaHaise   rework aio migrat...
447
  	file = aio_private_file(ctx, nr_pages);
36bc08cc0   Gu Zheng   fs/aio: Add suppo...
448
449
  	if (IS_ERR(file)) {
  		ctx->aio_ring_file = NULL;
fa8a53c39   Benjamin LaHaise   aio: v4 ensure ac...
450
  		return -ENOMEM;
36bc08cc0   Gu Zheng   fs/aio: Add suppo...
451
  	}
3dc9acb67   Linus Torvalds   aio: clean up and...
452
453
454
455
456
457
458
459
460
461
462
463
464
  	ctx->aio_ring_file = file;
  	nr_events = (PAGE_SIZE * nr_pages - sizeof(struct aio_ring))
  			/ sizeof(struct io_event);
  
  	ctx->ring_pages = ctx->internal_pages;
  	if (nr_pages > AIO_RING_PAGES) {
  		ctx->ring_pages = kcalloc(nr_pages, sizeof(struct page *),
  					  GFP_KERNEL);
  		if (!ctx->ring_pages) {
  			put_aio_ring_file(ctx);
  			return -ENOMEM;
  		}
  	}
36bc08cc0   Gu Zheng   fs/aio: Add suppo...
465
466
  	for (i = 0; i < nr_pages; i++) {
  		struct page *page;
450630975   Al Viro   don't open-code f...
467
  		page = find_or_create_page(file->f_mapping,
36bc08cc0   Gu Zheng   fs/aio: Add suppo...
468
469
470
471
472
473
474
  					   i, GFP_HIGHUSER | __GFP_ZERO);
  		if (!page)
  			break;
  		pr_debug("pid(%d) page[%d]->count=%d
  ",
  			 current->pid, i, page_count(page));
  		SetPageUptodate(page);
36bc08cc0   Gu Zheng   fs/aio: Add suppo...
475
  		unlock_page(page);
3dc9acb67   Linus Torvalds   aio: clean up and...
476
477
  
  		ctx->ring_pages[i] = page;
36bc08cc0   Gu Zheng   fs/aio: Add suppo...
478
  	}
3dc9acb67   Linus Torvalds   aio: clean up and...
479
  	ctx->nr_pages = i;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
480

3dc9acb67   Linus Torvalds   aio: clean up and...
481
482
  	if (unlikely(i != nr_pages)) {
  		aio_free_ring(ctx);
fa8a53c39   Benjamin LaHaise   aio: v4 ensure ac...
483
  		return -ENOMEM;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
484
  	}
58c85dc20   Kent Overstreet   aio: kill struct ...
485
486
487
  	ctx->mmap_size = nr_pages * PAGE_SIZE;
  	pr_debug("attempting mmap of %lu bytes
  ", ctx->mmap_size);
36bc08cc0   Gu Zheng   fs/aio: Add suppo...
488

013373e8b   Michal Hocko   aio: make aio_set...
489
490
491
492
493
  	if (down_write_killable(&mm->mmap_sem)) {
  		ctx->mmap_size = 0;
  		aio_free_ring(ctx);
  		return -EINTR;
  	}
36bc08cc0   Gu Zheng   fs/aio: Add suppo...
494
495
  	ctx->mmap_base = do_mmap_pgoff(ctx->aio_ring_file, 0, ctx->mmap_size,
  				       PROT_READ | PROT_WRITE,
897ab3e0c   Mike Rapoport   userfaultfd: non-...
496
  				       MAP_SHARED, 0, &unused, NULL);
3dc9acb67   Linus Torvalds   aio: clean up and...
497
  	up_write(&mm->mmap_sem);
58c85dc20   Kent Overstreet   aio: kill struct ...
498
  	if (IS_ERR((void *)ctx->mmap_base)) {
58c85dc20   Kent Overstreet   aio: kill struct ...
499
  		ctx->mmap_size = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
500
  		aio_free_ring(ctx);
fa8a53c39   Benjamin LaHaise   aio: v4 ensure ac...
501
  		return -ENOMEM;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
502
  	}
58c85dc20   Kent Overstreet   aio: kill struct ...
503
504
  	pr_debug("mmap address: 0x%08lx
  ", ctx->mmap_base);
d6c355c7d   Benjamin LaHaise   aio: fix race in ...
505

58c85dc20   Kent Overstreet   aio: kill struct ...
506
507
  	ctx->user_id = ctx->mmap_base;
  	ctx->nr_events = nr_events; /* trusted copy */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
508

58c85dc20   Kent Overstreet   aio: kill struct ...
509
  	ring = kmap_atomic(ctx->ring_pages[0]);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
510
  	ring->nr = nr_events;	/* user copy */
db446a08c   Benjamin LaHaise   aio: convert the ...
511
  	ring->id = ~0U;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
512
513
514
515
516
  	ring->head = ring->tail = 0;
  	ring->magic = AIO_RING_MAGIC;
  	ring->compat_features = AIO_RING_COMPAT_FEATURES;
  	ring->incompat_features = AIO_RING_INCOMPAT_FEATURES;
  	ring->header_length = sizeof(struct aio_ring);
e8e3c3d66   Cong Wang   fs: remove the se...
517
  	kunmap_atomic(ring);
58c85dc20   Kent Overstreet   aio: kill struct ...
518
  	flush_dcache_page(ctx->ring_pages[0]);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
519
520
521
  
  	return 0;
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
522
523
524
  #define AIO_EVENTS_PER_PAGE	(PAGE_SIZE / sizeof(struct io_event))
  #define AIO_EVENTS_FIRST_PAGE	((PAGE_SIZE - sizeof(struct aio_ring)) / sizeof(struct io_event))
  #define AIO_EVENTS_OFFSET	(AIO_EVENTS_PER_PAGE - AIO_EVENTS_FIRST_PAGE)
04b2fa9f8   Christoph Hellwig   fs: split generic...
525
  void kiocb_set_cancel_fn(struct kiocb *iocb, kiocb_cancel_fn *cancel)
0460fef2a   Kent Overstreet   aio: use cancella...
526
  {
04b2fa9f8   Christoph Hellwig   fs: split generic...
527
  	struct aio_kiocb *req = container_of(iocb, struct aio_kiocb, common);
0460fef2a   Kent Overstreet   aio: use cancella...
528
529
530
531
532
533
534
535
536
537
538
539
540
  	struct kioctx *ctx = req->ki_ctx;
  	unsigned long flags;
  
  	spin_lock_irqsave(&ctx->ctx_lock, flags);
  
  	if (!req->ki_list.next)
  		list_add(&req->ki_list, &ctx->active_reqs);
  
  	req->ki_cancel = cancel;
  
  	spin_unlock_irqrestore(&ctx->ctx_lock, flags);
  }
  EXPORT_SYMBOL(kiocb_set_cancel_fn);
04b2fa9f8   Christoph Hellwig   fs: split generic...
541
  static int kiocb_cancel(struct aio_kiocb *kiocb)
906b973cf   Kent Overstreet   aio: add kiocb_ca...
542
  {
0460fef2a   Kent Overstreet   aio: use cancella...
543
  	kiocb_cancel_fn *old, *cancel;
906b973cf   Kent Overstreet   aio: add kiocb_ca...
544

0460fef2a   Kent Overstreet   aio: use cancella...
545
546
547
548
549
550
551
552
  	/*
  	 * Don't want to set kiocb->ki_cancel = KIOCB_CANCELLED unless it
  	 * actually has a cancel function, hence the cmpxchg()
  	 */
  
  	cancel = ACCESS_ONCE(kiocb->ki_cancel);
  	do {
  		if (!cancel || cancel == KIOCB_CANCELLED)
57282d8fd   Kent Overstreet   aio: Kill ki_users
553
  			return -EINVAL;
906b973cf   Kent Overstreet   aio: add kiocb_ca...
554

0460fef2a   Kent Overstreet   aio: use cancella...
555
556
557
  		old = cancel;
  		cancel = cmpxchg(&kiocb->ki_cancel, old, KIOCB_CANCELLED);
  	} while (cancel != old);
906b973cf   Kent Overstreet   aio: add kiocb_ca...
558

04b2fa9f8   Christoph Hellwig   fs: split generic...
559
  	return cancel(&kiocb->common);
906b973cf   Kent Overstreet   aio: add kiocb_ca...
560
  }
076c7c068   Tejun Heo   fs/aio: Add expli...
561
562
563
564
565
566
  /*
   * free_ioctx() should be RCU delayed to synchronize against the RCU
   * protected lookup_ioctx() and also needs process context to call
   * aio_free_ring(), so the double bouncing through kioctx->free_rcu and
   * ->free_work.
   */
e34ecee2a   Kent Overstreet   aio: Fix a trinit...
567
  static void free_ioctx(struct work_struct *work)
36f558890   Kent Overstreet   aio: refcounting ...
568
  {
e34ecee2a   Kent Overstreet   aio: Fix a trinit...
569
  	struct kioctx *ctx = container_of(work, struct kioctx, free_work);
e1bdd5f27   Kent Overstreet   aio: percpu reqs_...
570

e34ecee2a   Kent Overstreet   aio: Fix a trinit...
571
572
  	pr_debug("freeing %p
  ", ctx);
e1bdd5f27   Kent Overstreet   aio: percpu reqs_...
573

e34ecee2a   Kent Overstreet   aio: Fix a trinit...
574
  	aio_free_ring(ctx);
e1bdd5f27   Kent Overstreet   aio: percpu reqs_...
575
  	free_percpu(ctx->cpu);
9a1049da9   Tejun Heo   percpu-refcount: ...
576
577
  	percpu_ref_exit(&ctx->reqs);
  	percpu_ref_exit(&ctx->users);
36f558890   Kent Overstreet   aio: refcounting ...
578
579
  	kmem_cache_free(kioctx_cachep, ctx);
  }
076c7c068   Tejun Heo   fs/aio: Add expli...
580
581
582
583
584
585
586
  static void free_ioctx_rcufn(struct rcu_head *head)
  {
  	struct kioctx *ctx = container_of(head, struct kioctx, free_rcu);
  
  	INIT_WORK(&ctx->free_work, free_ioctx);
  	schedule_work(&ctx->free_work);
  }
e34ecee2a   Kent Overstreet   aio: Fix a trinit...
587
588
589
  static void free_ioctx_reqs(struct percpu_ref *ref)
  {
  	struct kioctx *ctx = container_of(ref, struct kioctx, reqs);
e02ba72aa   Anatol Pomozov   aio: block io_des...
590
  	/* At this point we know that there are no any in-flight requests */
dc48e56d7   Jens Axboe   aio: fix serial d...
591
592
  	if (ctx->rq_wait && atomic_dec_and_test(&ctx->rq_wait->count))
  		complete(&ctx->rq_wait->comp);
e02ba72aa   Anatol Pomozov   aio: block io_des...
593

076c7c068   Tejun Heo   fs/aio: Add expli...
594
595
  	/* Synchronize against RCU protected table->table[] dereferences */
  	call_rcu(&ctx->free_rcu, free_ioctx_rcufn);
e34ecee2a   Kent Overstreet   aio: Fix a trinit...
596
  }
36f558890   Kent Overstreet   aio: refcounting ...
597
598
599
600
601
  /*
   * When this function runs, the kioctx has been removed from the "hash table"
   * and ctx->users has dropped to 0, so we know no more kiocbs can be submitted -
   * now it's safe to cancel any that need to be.
   */
e34ecee2a   Kent Overstreet   aio: Fix a trinit...
602
  static void free_ioctx_users(struct percpu_ref *ref)
36f558890   Kent Overstreet   aio: refcounting ...
603
  {
e34ecee2a   Kent Overstreet   aio: Fix a trinit...
604
  	struct kioctx *ctx = container_of(ref, struct kioctx, users);
04b2fa9f8   Christoph Hellwig   fs: split generic...
605
  	struct aio_kiocb *req;
36f558890   Kent Overstreet   aio: refcounting ...
606
607
608
609
610
  
  	spin_lock_irq(&ctx->ctx_lock);
  
  	while (!list_empty(&ctx->active_reqs)) {
  		req = list_first_entry(&ctx->active_reqs,
04b2fa9f8   Christoph Hellwig   fs: split generic...
611
  				       struct aio_kiocb, ki_list);
d52a8f9ea   Fabian Frederick   fs/aio.c: Remove ...
612
  		kiocb_cancel(req);
6a19487d5   Al Viro   fix io_destroy()/...
613
  		list_del_init(&req->ki_list);
36f558890   Kent Overstreet   aio: refcounting ...
614
615
616
  	}
  
  	spin_unlock_irq(&ctx->ctx_lock);
e34ecee2a   Kent Overstreet   aio: Fix a trinit...
617
618
  	percpu_ref_kill(&ctx->reqs);
  	percpu_ref_put(&ctx->reqs);
36f558890   Kent Overstreet   aio: refcounting ...
619
  }
db446a08c   Benjamin LaHaise   aio: convert the ...
620
621
622
623
624
625
626
  static int ioctx_add_table(struct kioctx *ctx, struct mm_struct *mm)
  {
  	unsigned i, new_nr;
  	struct kioctx_table *table, *old;
  	struct aio_ring *ring;
  
  	spin_lock(&mm->ioctx_lock);
855ef0dec   Oleg Nesterov   aio: kill the mis...
627
  	table = rcu_dereference_raw(mm->ioctx_table);
db446a08c   Benjamin LaHaise   aio: convert the ...
628
629
630
631
  
  	while (1) {
  		if (table)
  			for (i = 0; i < table->nr; i++)
cd21b3400   Tejun Heo   fs/aio: Use RCU a...
632
  				if (!rcu_access_pointer(table->table[i])) {
db446a08c   Benjamin LaHaise   aio: convert the ...
633
  					ctx->id = i;
cd21b3400   Tejun Heo   fs/aio: Use RCU a...
634
  					rcu_assign_pointer(table->table[i], ctx);
db446a08c   Benjamin LaHaise   aio: convert the ...
635
  					spin_unlock(&mm->ioctx_lock);
fa8a53c39   Benjamin LaHaise   aio: v4 ensure ac...
636
637
638
639
  					/* While kioctx setup is in progress,
  					 * we are protected from page migration
  					 * changes ring_pages by ->ring_lock.
  					 */
db446a08c   Benjamin LaHaise   aio: convert the ...
640
641
642
643
644
645
646
  					ring = kmap_atomic(ctx->ring_pages[0]);
  					ring->id = ctx->id;
  					kunmap_atomic(ring);
  					return 0;
  				}
  
  		new_nr = (table ? table->nr : 1) * 4;
db446a08c   Benjamin LaHaise   aio: convert the ...
647
648
649
650
651
652
653
654
655
656
  		spin_unlock(&mm->ioctx_lock);
  
  		table = kzalloc(sizeof(*table) + sizeof(struct kioctx *) *
  				new_nr, GFP_KERNEL);
  		if (!table)
  			return -ENOMEM;
  
  		table->nr = new_nr;
  
  		spin_lock(&mm->ioctx_lock);
855ef0dec   Oleg Nesterov   aio: kill the mis...
657
  		old = rcu_dereference_raw(mm->ioctx_table);
db446a08c   Benjamin LaHaise   aio: convert the ...
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
  
  		if (!old) {
  			rcu_assign_pointer(mm->ioctx_table, table);
  		} else if (table->nr > old->nr) {
  			memcpy(table->table, old->table,
  			       old->nr * sizeof(struct kioctx *));
  
  			rcu_assign_pointer(mm->ioctx_table, table);
  			kfree_rcu(old, rcu);
  		} else {
  			kfree(table);
  			table = old;
  		}
  	}
  }
e34ecee2a   Kent Overstreet   aio: Fix a trinit...
673
674
675
676
677
678
679
680
681
  static void aio_nr_sub(unsigned nr)
  {
  	spin_lock(&aio_nr_lock);
  	if (WARN_ON(aio_nr - nr > aio_nr))
  		aio_nr = 0;
  	else
  		aio_nr -= nr;
  	spin_unlock(&aio_nr_lock);
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
682
683
684
685
686
  /* ioctx_alloc
   *	Allocates and initializes an ioctx.  Returns an ERR_PTR if it failed.
   */
  static struct kioctx *ioctx_alloc(unsigned nr_events)
  {
41003a7bc   Zach Brown   aio: remove retry...
687
  	struct mm_struct *mm = current->mm;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
688
  	struct kioctx *ctx;
e23754f88   Al Viro   aio: don't bother...
689
  	int err = -ENOMEM;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
690

e1bdd5f27   Kent Overstreet   aio: percpu reqs_...
691
  	/*
2a8a98673   Mauricio Faria de Oliveira   fs: aio: fix the ...
692
693
694
695
696
697
  	 * Store the original nr_events -- what userspace passed to io_setup(),
  	 * for counting against the global limit -- before it changes.
  	 */
  	unsigned int max_reqs = nr_events;
  
  	/*
e1bdd5f27   Kent Overstreet   aio: percpu reqs_...
698
699
700
701
702
703
704
705
706
707
  	 * We keep track of the number of available ringbuffer slots, to prevent
  	 * overflow (reqs_available), and we also use percpu counters for this.
  	 *
  	 * So since up to half the slots might be on other cpu's percpu counters
  	 * and unavailable, double nr_events so userspace sees what they
  	 * expected: additionally, we move req_batch slots to/from percpu
  	 * counters at a time, so make sure that isn't 0:
  	 */
  	nr_events = max(nr_events, num_possible_cpus() * 4);
  	nr_events *= 2;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
708
  	/* Prevent overflows */
08397acdd   Al Viro   ioctx_alloc(): re...
709
  	if (nr_events > (0x10000000U / sizeof(struct io_event))) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
710
711
712
713
  		pr_debug("ENOMEM: nr_events too high
  ");
  		return ERR_PTR(-EINVAL);
  	}
2a8a98673   Mauricio Faria de Oliveira   fs: aio: fix the ...
714
  	if (!nr_events || (unsigned long)max_reqs > aio_max_nr)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
715
  		return ERR_PTR(-EAGAIN);
c37622296   Robert P. J. Day   [PATCH] Transform...
716
  	ctx = kmem_cache_zalloc(kioctx_cachep, GFP_KERNEL);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
717
718
  	if (!ctx)
  		return ERR_PTR(-ENOMEM);
2a8a98673   Mauricio Faria de Oliveira   fs: aio: fix the ...
719
  	ctx->max_reqs = max_reqs;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
720

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
721
  	spin_lock_init(&ctx->ctx_lock);
0460fef2a   Kent Overstreet   aio: use cancella...
722
  	spin_lock_init(&ctx->completion_lock);
58c85dc20   Kent Overstreet   aio: kill struct ...
723
  	mutex_init(&ctx->ring_lock);
fa8a53c39   Benjamin LaHaise   aio: v4 ensure ac...
724
725
726
  	/* Protect against page migration throughout kiotx setup by keeping
  	 * the ring_lock mutex held until setup is complete. */
  	mutex_lock(&ctx->ring_lock);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
727
728
729
  	init_waitqueue_head(&ctx->wait);
  
  	INIT_LIST_HEAD(&ctx->active_reqs);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
730

2aad2a86f   Tejun Heo   percpu_ref: add P...
731
  	if (percpu_ref_init(&ctx->users, free_ioctx_users, 0, GFP_KERNEL))
fa8a53c39   Benjamin LaHaise   aio: v4 ensure ac...
732
  		goto err;
2aad2a86f   Tejun Heo   percpu_ref: add P...
733
  	if (percpu_ref_init(&ctx->reqs, free_ioctx_reqs, 0, GFP_KERNEL))
fa8a53c39   Benjamin LaHaise   aio: v4 ensure ac...
734
  		goto err;
e1bdd5f27   Kent Overstreet   aio: percpu reqs_...
735
736
  	ctx->cpu = alloc_percpu(struct kioctx_cpu);
  	if (!ctx->cpu)
e34ecee2a   Kent Overstreet   aio: Fix a trinit...
737
  		goto err;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
738

2a8a98673   Mauricio Faria de Oliveira   fs: aio: fix the ...
739
  	err = aio_setup_ring(ctx, nr_events);
fa8a53c39   Benjamin LaHaise   aio: v4 ensure ac...
740
  	if (err < 0)
e34ecee2a   Kent Overstreet   aio: Fix a trinit...
741
  		goto err;
e1bdd5f27   Kent Overstreet   aio: percpu reqs_...
742

34e83fc61   Kent Overstreet   aio: reqs_active ...
743
  	atomic_set(&ctx->reqs_available, ctx->nr_events - 1);
e1bdd5f27   Kent Overstreet   aio: percpu reqs_...
744
  	ctx->req_batch = (ctx->nr_events - 1) / (num_possible_cpus() * 4);
6878ea72a   Benjamin LaHaise   aio: be defensive...
745
746
  	if (ctx->req_batch < 1)
  		ctx->req_batch = 1;
34e83fc61   Kent Overstreet   aio: reqs_active ...
747

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
748
  	/* limit the number of system wide aios */
9fa1cb397   Al Viro   aio: aio_nr_lock ...
749
  	spin_lock(&aio_nr_lock);
2a8a98673   Mauricio Faria de Oliveira   fs: aio: fix the ...
750
751
  	if (aio_nr + ctx->max_reqs > aio_max_nr ||
  	    aio_nr + ctx->max_reqs < aio_nr) {
9fa1cb397   Al Viro   aio: aio_nr_lock ...
752
  		spin_unlock(&aio_nr_lock);
e34ecee2a   Kent Overstreet   aio: Fix a trinit...
753
  		err = -EAGAIN;
d1b943271   Gu Zheng   aio: clean up aio...
754
  		goto err_ctx;
2dd542b7a   Al Viro   aio: aio_nr decre...
755
756
  	}
  	aio_nr += ctx->max_reqs;
9fa1cb397   Al Viro   aio: aio_nr_lock ...
757
  	spin_unlock(&aio_nr_lock);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
758

1881686f8   Benjamin LaHaise   aio: fix kioctx l...
759
760
  	percpu_ref_get(&ctx->users);	/* io_setup() will drop this ref */
  	percpu_ref_get(&ctx->reqs);	/* free_ioctx_users() will drop this */
723be6e39   Kent Overstreet   aio: percpu ioctx...
761

da90382c2   Benjamin LaHaise   aio: fix error ha...
762
763
  	err = ioctx_add_table(ctx, mm);
  	if (err)
e34ecee2a   Kent Overstreet   aio: Fix a trinit...
764
  		goto err_cleanup;
da90382c2   Benjamin LaHaise   aio: fix error ha...
765

fa8a53c39   Benjamin LaHaise   aio: v4 ensure ac...
766
767
  	/* Release the ring_lock mutex now that all setup is complete. */
  	mutex_unlock(&ctx->ring_lock);
caf4167aa   Kent Overstreet   aio: dprintk() ->...
768
769
  	pr_debug("allocated ioctx %p[%ld]: mm=%p mask=0x%x
  ",
58c85dc20   Kent Overstreet   aio: kill struct ...
770
  		 ctx, ctx->user_id, mm, ctx->nr_events);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
771
  	return ctx;
e34ecee2a   Kent Overstreet   aio: Fix a trinit...
772
773
  err_cleanup:
  	aio_nr_sub(ctx->max_reqs);
d1b943271   Gu Zheng   aio: clean up aio...
774
  err_ctx:
deeb8525f   Al Viro   ioctx_alloc(): fi...
775
776
777
  	atomic_set(&ctx->dead, 1);
  	if (ctx->mmap_size)
  		vm_munmap(ctx->mmap_base, ctx->mmap_size);
d1b943271   Gu Zheng   aio: clean up aio...
778
  	aio_free_ring(ctx);
e34ecee2a   Kent Overstreet   aio: Fix a trinit...
779
  err:
fa8a53c39   Benjamin LaHaise   aio: v4 ensure ac...
780
  	mutex_unlock(&ctx->ring_lock);
e1bdd5f27   Kent Overstreet   aio: percpu reqs_...
781
  	free_percpu(ctx->cpu);
9a1049da9   Tejun Heo   percpu-refcount: ...
782
783
  	percpu_ref_exit(&ctx->reqs);
  	percpu_ref_exit(&ctx->users);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
784
  	kmem_cache_free(kioctx_cachep, ctx);
caf4167aa   Kent Overstreet   aio: dprintk() ->...
785
786
  	pr_debug("error allocating ioctx %d
  ", err);
e23754f88   Al Viro   aio: don't bother...
787
  	return ERR_PTR(err);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
788
  }
36f558890   Kent Overstreet   aio: refcounting ...
789
790
791
792
793
  /* kill_ioctx
   *	Cancels all outstanding aio requests on an aio context.  Used
   *	when the processes owning a context have all exited to encourage
   *	the rapid destruction of the kioctx.
   */
fb2d44838   Benjamin LaHaise   aio: report error...
794
  static int kill_ioctx(struct mm_struct *mm, struct kioctx *ctx,
dc48e56d7   Jens Axboe   aio: fix serial d...
795
  		      struct ctx_rq_wait *wait)
36f558890   Kent Overstreet   aio: refcounting ...
796
  {
fa88b6f88   Benjamin LaHaise   aio: cleanup: fla...
797
  	struct kioctx_table *table;
db446a08c   Benjamin LaHaise   aio: convert the ...
798

b2edffdd9   Al Viro   fix mremap() vs. ...
799
800
801
  	spin_lock(&mm->ioctx_lock);
  	if (atomic_xchg(&ctx->dead, 1)) {
  		spin_unlock(&mm->ioctx_lock);
fa88b6f88   Benjamin LaHaise   aio: cleanup: fla...
802
  		return -EINVAL;
b2edffdd9   Al Viro   fix mremap() vs. ...
803
  	}
db446a08c   Benjamin LaHaise   aio: convert the ...
804

855ef0dec   Oleg Nesterov   aio: kill the mis...
805
  	table = rcu_dereference_raw(mm->ioctx_table);
cd21b3400   Tejun Heo   fs/aio: Use RCU a...
806
807
  	WARN_ON(ctx != rcu_access_pointer(table->table[ctx->id]));
  	RCU_INIT_POINTER(table->table[ctx->id], NULL);
fa88b6f88   Benjamin LaHaise   aio: cleanup: fla...
808
  	spin_unlock(&mm->ioctx_lock);
4fcc712f5   Kent Overstreet   aio: fix io_destr...
809

076c7c068   Tejun Heo   fs/aio: Add expli...
810
  	/* free_ioctx_reqs() will do the necessary RCU synchronization */
fa88b6f88   Benjamin LaHaise   aio: cleanup: fla...
811
  	wake_up_all(&ctx->wait);
4fcc712f5   Kent Overstreet   aio: fix io_destr...
812

fa88b6f88   Benjamin LaHaise   aio: cleanup: fla...
813
814
815
816
817
818
819
820
  	/*
  	 * It'd be more correct to do this in free_ioctx(), after all
  	 * the outstanding kiocbs have finished - but by then io_destroy
  	 * has already returned, so io_setup() could potentially return
  	 * -EAGAIN with no ioctxs actually in use (as far as userspace
  	 *  could tell).
  	 */
  	aio_nr_sub(ctx->max_reqs);
4fcc712f5   Kent Overstreet   aio: fix io_destr...
821

fa88b6f88   Benjamin LaHaise   aio: cleanup: fla...
822
823
  	if (ctx->mmap_size)
  		vm_munmap(ctx->mmap_base, ctx->mmap_size);
fb2d44838   Benjamin LaHaise   aio: report error...
824

dc48e56d7   Jens Axboe   aio: fix serial d...
825
  	ctx->rq_wait = wait;
fa88b6f88   Benjamin LaHaise   aio: cleanup: fla...
826
827
  	percpu_ref_kill(&ctx->users);
  	return 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
828
  }
36f558890   Kent Overstreet   aio: refcounting ...
829
830
831
832
833
834
835
  /*
   * exit_aio: called when the last user of mm goes away.  At this point, there is
   * no way for any new requests to be submited or any of the io_* syscalls to be
   * called on the context.
   *
   * There may be outstanding kiocbs, but free_ioctx() will explicitly wait on
   * them.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
836
   */
fc9b52cd8   Harvey Harrison   fs: remove fastca...
837
  void exit_aio(struct mm_struct *mm)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
838
  {
4b70ac5fd   Oleg Nesterov   aio: change exit_...
839
  	struct kioctx_table *table = rcu_dereference_raw(mm->ioctx_table);
dc48e56d7   Jens Axboe   aio: fix serial d...
840
841
  	struct ctx_rq_wait wait;
  	int i, skipped;
db446a08c   Benjamin LaHaise   aio: convert the ...
842

4b70ac5fd   Oleg Nesterov   aio: change exit_...
843
844
  	if (!table)
  		return;
db446a08c   Benjamin LaHaise   aio: convert the ...
845

dc48e56d7   Jens Axboe   aio: fix serial d...
846
847
848
849
  	atomic_set(&wait.count, table->nr);
  	init_completion(&wait.comp);
  
  	skipped = 0;
4b70ac5fd   Oleg Nesterov   aio: change exit_...
850
  	for (i = 0; i < table->nr; ++i) {
cd21b3400   Tejun Heo   fs/aio: Use RCU a...
851
852
  		struct kioctx *ctx =
  			rcu_dereference_protected(table->table[i], true);
abf137dd7   Jens Axboe   aio: make the loo...
853

dc48e56d7   Jens Axboe   aio: fix serial d...
854
855
  		if (!ctx) {
  			skipped++;
4b70ac5fd   Oleg Nesterov   aio: change exit_...
856
  			continue;
dc48e56d7   Jens Axboe   aio: fix serial d...
857
  		}
936af1576   Al Viro   aio: don't bother...
858
  		/*
4b70ac5fd   Oleg Nesterov   aio: change exit_...
859
860
861
862
863
  		 * We don't need to bother with munmap() here - exit_mmap(mm)
  		 * is coming and it'll unmap everything. And we simply can't,
  		 * this is not necessarily our ->mm.
  		 * Since kill_ioctx() uses non-zero ->mmap_size as indicator
  		 * that it needs to unmap the area, just set it to 0.
936af1576   Al Viro   aio: don't bother...
864
  		 */
58c85dc20   Kent Overstreet   aio: kill struct ...
865
  		ctx->mmap_size = 0;
dc48e56d7   Jens Axboe   aio: fix serial d...
866
867
  		kill_ioctx(mm, ctx, &wait);
  	}
36f558890   Kent Overstreet   aio: refcounting ...
868

dc48e56d7   Jens Axboe   aio: fix serial d...
869
  	if (!atomic_sub_and_test(skipped, &wait.count)) {
6098b45b3   Gu Zheng   aio: block exit_a...
870
  		/* Wait until all IO for the context are done. */
dc48e56d7   Jens Axboe   aio: fix serial d...
871
  		wait_for_completion(&wait.comp);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
872
  	}
4b70ac5fd   Oleg Nesterov   aio: change exit_...
873
874
875
  
  	RCU_INIT_POINTER(mm->ioctx_table, NULL);
  	kfree(table);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
876
  }
e1bdd5f27   Kent Overstreet   aio: percpu reqs_...
877
878
879
  static void put_reqs_available(struct kioctx *ctx, unsigned nr)
  {
  	struct kioctx_cpu *kcpu;
263782c1c   Benjamin LaHaise   aio: protect reqs...
880
  	unsigned long flags;
e1bdd5f27   Kent Overstreet   aio: percpu reqs_...
881

263782c1c   Benjamin LaHaise   aio: protect reqs...
882
  	local_irq_save(flags);
be6fb451a   Benjamin LaHaise   aio: remove no lo...
883
  	kcpu = this_cpu_ptr(ctx->cpu);
e1bdd5f27   Kent Overstreet   aio: percpu reqs_...
884
  	kcpu->reqs_available += nr;
263782c1c   Benjamin LaHaise   aio: protect reqs...
885

e1bdd5f27   Kent Overstreet   aio: percpu reqs_...
886
887
888
889
  	while (kcpu->reqs_available >= ctx->req_batch * 2) {
  		kcpu->reqs_available -= ctx->req_batch;
  		atomic_add(ctx->req_batch, &ctx->reqs_available);
  	}
263782c1c   Benjamin LaHaise   aio: protect reqs...
890
  	local_irq_restore(flags);
e1bdd5f27   Kent Overstreet   aio: percpu reqs_...
891
892
893
894
895
896
  }
  
  static bool get_reqs_available(struct kioctx *ctx)
  {
  	struct kioctx_cpu *kcpu;
  	bool ret = false;
263782c1c   Benjamin LaHaise   aio: protect reqs...
897
  	unsigned long flags;
e1bdd5f27   Kent Overstreet   aio: percpu reqs_...
898

263782c1c   Benjamin LaHaise   aio: protect reqs...
899
  	local_irq_save(flags);
be6fb451a   Benjamin LaHaise   aio: remove no lo...
900
  	kcpu = this_cpu_ptr(ctx->cpu);
e1bdd5f27   Kent Overstreet   aio: percpu reqs_...
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
  	if (!kcpu->reqs_available) {
  		int old, avail = atomic_read(&ctx->reqs_available);
  
  		do {
  			if (avail < ctx->req_batch)
  				goto out;
  
  			old = avail;
  			avail = atomic_cmpxchg(&ctx->reqs_available,
  					       avail, avail - ctx->req_batch);
  		} while (avail != old);
  
  		kcpu->reqs_available += ctx->req_batch;
  	}
  
  	ret = true;
  	kcpu->reqs_available--;
  out:
263782c1c   Benjamin LaHaise   aio: protect reqs...
919
  	local_irq_restore(flags);
e1bdd5f27   Kent Overstreet   aio: percpu reqs_...
920
921
  	return ret;
  }
d856f32a8   Benjamin LaHaise   aio: fix reqs_ava...
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
  /* refill_reqs_available
   *	Updates the reqs_available reference counts used for tracking the
   *	number of free slots in the completion ring.  This can be called
   *	from aio_complete() (to optimistically update reqs_available) or
   *	from aio_get_req() (the we're out of events case).  It must be
   *	called holding ctx->completion_lock.
   */
  static void refill_reqs_available(struct kioctx *ctx, unsigned head,
                                    unsigned tail)
  {
  	unsigned events_in_ring, completed;
  
  	/* Clamp head since userland can write to it. */
  	head %= ctx->nr_events;
  	if (head <= tail)
  		events_in_ring = tail - head;
  	else
  		events_in_ring = ctx->nr_events - (head - tail);
  
  	completed = ctx->completed_events;
  	if (events_in_ring < completed)
  		completed -= events_in_ring;
  	else
  		completed = 0;
  
  	if (!completed)
  		return;
  
  	ctx->completed_events -= completed;
  	put_reqs_available(ctx, completed);
  }
  
  /* user_refill_reqs_available
   *	Called to refill reqs_available when aio_get_req() encounters an
   *	out of space in the completion ring.
   */
  static void user_refill_reqs_available(struct kioctx *ctx)
  {
  	spin_lock_irq(&ctx->completion_lock);
  	if (ctx->completed_events) {
  		struct aio_ring *ring;
  		unsigned head;
  
  		/* Access of ring->head may race with aio_read_events_ring()
  		 * here, but that's okay since whether we read the old version
  		 * or the new version, and either will be valid.  The important
  		 * part is that head cannot pass tail since we prevent
  		 * aio_complete() from updating tail by holding
  		 * ctx->completion_lock.  Even if head is invalid, the check
  		 * against ctx->completed_events below will make sure we do the
  		 * safe/right thing.
  		 */
  		ring = kmap_atomic(ctx->ring_pages[0]);
  		head = ring->head;
  		kunmap_atomic(ring);
  
  		refill_reqs_available(ctx, head, ctx->tail);
  	}
  
  	spin_unlock_irq(&ctx->completion_lock);
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
983
  /* aio_get_req
57282d8fd   Kent Overstreet   aio: Kill ki_users
984
985
   *	Allocate a slot for an aio request.
   * Returns NULL if no requests are free.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
986
   */
04b2fa9f8   Christoph Hellwig   fs: split generic...
987
  static inline struct aio_kiocb *aio_get_req(struct kioctx *ctx)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
988
  {
04b2fa9f8   Christoph Hellwig   fs: split generic...
989
  	struct aio_kiocb *req;
a1c8eae75   Kent Overstreet   aio: kill batch a...
990

d856f32a8   Benjamin LaHaise   aio: fix reqs_ava...
991
992
993
994
995
  	if (!get_reqs_available(ctx)) {
  		user_refill_reqs_available(ctx);
  		if (!get_reqs_available(ctx))
  			return NULL;
  	}
a1c8eae75   Kent Overstreet   aio: kill batch a...
996

0460fef2a   Kent Overstreet   aio: use cancella...
997
  	req = kmem_cache_alloc(kiocb_cachep, GFP_KERNEL|__GFP_ZERO);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
998
  	if (unlikely(!req))
a1c8eae75   Kent Overstreet   aio: kill batch a...
999
  		goto out_put;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1000

e34ecee2a   Kent Overstreet   aio: Fix a trinit...
1001
  	percpu_ref_get(&ctx->reqs);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1002
  	req->ki_ctx = ctx;
080d676de   Jeff Moyer   aio: allocate kio...
1003
  	return req;
a1c8eae75   Kent Overstreet   aio: kill batch a...
1004
  out_put:
e1bdd5f27   Kent Overstreet   aio: percpu reqs_...
1005
  	put_reqs_available(ctx, 1);
a1c8eae75   Kent Overstreet   aio: kill batch a...
1006
  	return NULL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1007
  }
04b2fa9f8   Christoph Hellwig   fs: split generic...
1008
  static void kiocb_free(struct aio_kiocb *req)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1009
  {
04b2fa9f8   Christoph Hellwig   fs: split generic...
1010
1011
  	if (req->common.ki_filp)
  		fput(req->common.ki_filp);
133890103   Davide Libenzi   eventfd: revised ...
1012
1013
  	if (req->ki_eventfd != NULL)
  		eventfd_ctx_put(req->ki_eventfd);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1014
  	kmem_cache_free(kiocb_cachep, req);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1015
  }
d5470b596   Adrian Bunk   fs/aio.c: make 3 ...
1016
  static struct kioctx *lookup_ioctx(unsigned long ctx_id)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1017
  {
db446a08c   Benjamin LaHaise   aio: convert the ...
1018
  	struct aio_ring __user *ring  = (void __user *)ctx_id;
abf137dd7   Jens Axboe   aio: make the loo...
1019
  	struct mm_struct *mm = current->mm;
65c24491b   Jeff Moyer   aio: lookup_ioctx...
1020
  	struct kioctx *ctx, *ret = NULL;
db446a08c   Benjamin LaHaise   aio: convert the ...
1021
1022
1023
1024
1025
  	struct kioctx_table *table;
  	unsigned id;
  
  	if (get_user(id, &ring->id))
  		return NULL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1026

abf137dd7   Jens Axboe   aio: make the loo...
1027
  	rcu_read_lock();
db446a08c   Benjamin LaHaise   aio: convert the ...
1028
  	table = rcu_dereference(mm->ioctx_table);
abf137dd7   Jens Axboe   aio: make the loo...
1029

db446a08c   Benjamin LaHaise   aio: convert the ...
1030
1031
  	if (!table || id >= table->nr)
  		goto out;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1032

5f4610fe2   Jeff Moyer   aio: fix spectre ...
1033
  	id = array_index_nospec(id, table->nr);
cd21b3400   Tejun Heo   fs/aio: Use RCU a...
1034
  	ctx = rcu_dereference(table->table[id]);
f30d704fe   Benjamin LaHaise   aio: table lookup...
1035
  	if (ctx && ctx->user_id == ctx_id) {
fbcede36b   Al Viro   aio: fix io_destr...
1036
1037
  		if (percpu_ref_tryget_live(&ctx->users))
  			ret = ctx;
db446a08c   Benjamin LaHaise   aio: convert the ...
1038
1039
  	}
  out:
abf137dd7   Jens Axboe   aio: make the loo...
1040
  	rcu_read_unlock();
65c24491b   Jeff Moyer   aio: lookup_ioctx...
1041
  	return ret;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1042
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1043
1044
  /* aio_complete
   *	Called when the io request on the given iocb is complete.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1045
   */
04b2fa9f8   Christoph Hellwig   fs: split generic...
1046
  static void aio_complete(struct kiocb *kiocb, long res, long res2)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1047
  {
04b2fa9f8   Christoph Hellwig   fs: split generic...
1048
  	struct aio_kiocb *iocb = container_of(kiocb, struct aio_kiocb, common);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1049
  	struct kioctx	*ctx = iocb->ki_ctx;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1050
  	struct aio_ring	*ring;
21b40200c   Kent Overstreet   aio: use flush_dc...
1051
  	struct io_event	*ev_page, *event;
d856f32a8   Benjamin LaHaise   aio: fix reqs_ava...
1052
  	unsigned tail, pos, head;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1053
  	unsigned long	flags;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1054

70fe2f481   Jan Kara   aio: fix freeze p...
1055
1056
1057
1058
1059
1060
1061
  	if (kiocb->ki_flags & IOCB_WRITE) {
  		struct file *file = kiocb->ki_filp;
  
  		/*
  		 * Tell lockdep we inherited freeze protection from submission
  		 * thread.
  		 */
a12f1ae61   Shaohua Li   aio: fix lock dep...
1062
1063
  		if (S_ISREG(file_inode(file)->i_mode))
  			__sb_writers_acquired(file_inode(file)->i_sb, SB_FREEZE_WRITE);
70fe2f481   Jan Kara   aio: fix freeze p...
1064
1065
  		file_end_write(file);
  	}
20dcae324   Zach Brown   [PATCH] aio: remo...
1066
1067
1068
1069
1070
1071
  	/*
  	 * Special case handling for sync iocbs:
  	 *  - events go directly into the iocb for fast handling
  	 *  - the sync task with the iocb in its stack holds the single iocb
  	 *    ref, no other paths have a way to get another ref
  	 *  - the sync task helpfully left a reference to itself in the iocb
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1072
  	 */
04b2fa9f8   Christoph Hellwig   fs: split generic...
1073
  	BUG_ON(is_sync_kiocb(kiocb));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1074

0460fef2a   Kent Overstreet   aio: use cancella...
1075
1076
1077
1078
1079
1080
1081
  	if (iocb->ki_list.next) {
  		unsigned long flags;
  
  		spin_lock_irqsave(&ctx->ctx_lock, flags);
  		list_del(&iocb->ki_list);
  		spin_unlock_irqrestore(&ctx->ctx_lock, flags);
  	}
11599ebac   Kent Overstreet   aio: make aio_put...
1082

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1083
  	/*
0460fef2a   Kent Overstreet   aio: use cancella...
1084
  	 * Add a completion event to the ring buffer. Must be done holding
4b30f07e7   Tang Chen   aio: fix wrong co...
1085
  	 * ctx->completion_lock to prevent other code from messing with the tail
0460fef2a   Kent Overstreet   aio: use cancella...
1086
1087
1088
  	 * pointer since we might be called from irq context.
  	 */
  	spin_lock_irqsave(&ctx->completion_lock, flags);
58c85dc20   Kent Overstreet   aio: kill struct ...
1089
  	tail = ctx->tail;
21b40200c   Kent Overstreet   aio: use flush_dc...
1090
  	pos = tail + AIO_EVENTS_OFFSET;
58c85dc20   Kent Overstreet   aio: kill struct ...
1091
  	if (++tail >= ctx->nr_events)
4bf69b2a0   Kenneth W Chen   [PATCH] aio: ring...
1092
  		tail = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1093

58c85dc20   Kent Overstreet   aio: kill struct ...
1094
  	ev_page = kmap_atomic(ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]);
21b40200c   Kent Overstreet   aio: use flush_dc...
1095
  	event = ev_page + pos % AIO_EVENTS_PER_PAGE;
04b2fa9f8   Christoph Hellwig   fs: split generic...
1096
  	event->obj = (u64)(unsigned long)iocb->ki_user_iocb;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1097
1098
1099
  	event->data = iocb->ki_user_data;
  	event->res = res;
  	event->res2 = res2;
21b40200c   Kent Overstreet   aio: use flush_dc...
1100
  	kunmap_atomic(ev_page);
58c85dc20   Kent Overstreet   aio: kill struct ...
1101
  	flush_dcache_page(ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]);
21b40200c   Kent Overstreet   aio: use flush_dc...
1102
1103
1104
  
  	pr_debug("%p[%u]: %p: %p %Lx %lx %lx
  ",
04b2fa9f8   Christoph Hellwig   fs: split generic...
1105
  		 ctx, tail, iocb, iocb->ki_user_iocb, iocb->ki_user_data,
caf4167aa   Kent Overstreet   aio: dprintk() ->...
1106
  		 res, res2);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1107
1108
1109
1110
1111
  
  	/* after flagging the request as done, we
  	 * must never even look at it again
  	 */
  	smp_wmb();	/* make event visible before updating tail */
58c85dc20   Kent Overstreet   aio: kill struct ...
1112
  	ctx->tail = tail;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1113

58c85dc20   Kent Overstreet   aio: kill struct ...
1114
  	ring = kmap_atomic(ctx->ring_pages[0]);
d856f32a8   Benjamin LaHaise   aio: fix reqs_ava...
1115
  	head = ring->head;
21b40200c   Kent Overstreet   aio: use flush_dc...
1116
  	ring->tail = tail;
e8e3c3d66   Cong Wang   fs: remove the se...
1117
  	kunmap_atomic(ring);
58c85dc20   Kent Overstreet   aio: kill struct ...
1118
  	flush_dcache_page(ctx->ring_pages[0]);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1119

d856f32a8   Benjamin LaHaise   aio: fix reqs_ava...
1120
1121
1122
  	ctx->completed_events++;
  	if (ctx->completed_events > 1)
  		refill_reqs_available(ctx, head, tail);
0460fef2a   Kent Overstreet   aio: use cancella...
1123
  	spin_unlock_irqrestore(&ctx->completion_lock, flags);
21b40200c   Kent Overstreet   aio: use flush_dc...
1124
1125
  	pr_debug("added to ring %p at [%u]
  ", iocb, tail);
8d1c98b0b   Davide Libenzi   eventfd/kaio inte...
1126
1127
1128
1129
1130
1131
  
  	/*
  	 * Check if the user asked us to deliver the result through an
  	 * eventfd. The eventfd_signal() function is safe to be called
  	 * from IRQ context.
  	 */
87c3a86e1   Davide Libenzi   eventfd: remove f...
1132
  	if (iocb->ki_eventfd != NULL)
8d1c98b0b   Davide Libenzi   eventfd/kaio inte...
1133
  		eventfd_signal(iocb->ki_eventfd, 1);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1134
  	/* everything turned out well, dispose of the aiocb. */
57282d8fd   Kent Overstreet   aio: Kill ki_users
1135
  	kiocb_free(iocb);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1136

6cb2a2104   Quentin Barnes   aio: bad AIO race...
1137
1138
1139
1140
1141
1142
1143
  	/*
  	 * We have to order our ring_info tail store above and test
  	 * of the wait list below outside the wait lock.  This is
  	 * like in wake_up_bit() where clearing a bit has to be
  	 * ordered with the unlocked test.
  	 */
  	smp_mb();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1144
1145
  	if (waitqueue_active(&ctx->wait))
  		wake_up(&ctx->wait);
e34ecee2a   Kent Overstreet   aio: Fix a trinit...
1146
  	percpu_ref_put(&ctx->reqs);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1147
  }
2be4e7dee   Gu Zheng   aio: fix some com...
1148
  /* aio_read_events_ring
a31ad380b   Kent Overstreet   aio: make aio_rea...
1149
1150
   *	Pull an event off of the ioctx's event ring.  Returns the number of
   *	events fetched
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1151
   */
a31ad380b   Kent Overstreet   aio: make aio_rea...
1152
1153
  static long aio_read_events_ring(struct kioctx *ctx,
  				 struct io_event __user *event, long nr)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1154
  {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1155
  	struct aio_ring *ring;
5ffac122d   Kent Overstreet   aio: Don't use ct...
1156
  	unsigned head, tail, pos;
a31ad380b   Kent Overstreet   aio: make aio_rea...
1157
1158
  	long ret = 0;
  	int copy_ret;
9c9ce763b   Dave Chinner   aio: annotate aio...
1159
1160
1161
1162
1163
1164
1165
  	/*
  	 * The mutex can block and wake us up and that will cause
  	 * wait_event_interruptible_hrtimeout() to schedule without sleeping
  	 * and repeat. This should be rare enough that it doesn't cause
  	 * peformance issues. See the comment in read_events() for more detail.
  	 */
  	sched_annotate_sleep();
58c85dc20   Kent Overstreet   aio: kill struct ...
1166
  	mutex_lock(&ctx->ring_lock);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1167

fa8a53c39   Benjamin LaHaise   aio: v4 ensure ac...
1168
  	/* Access to ->ring_pages here is protected by ctx->ring_lock. */
58c85dc20   Kent Overstreet   aio: kill struct ...
1169
  	ring = kmap_atomic(ctx->ring_pages[0]);
a31ad380b   Kent Overstreet   aio: make aio_rea...
1170
  	head = ring->head;
5ffac122d   Kent Overstreet   aio: Don't use ct...
1171
  	tail = ring->tail;
a31ad380b   Kent Overstreet   aio: make aio_rea...
1172
  	kunmap_atomic(ring);
2ff396be6   Jeff Moyer   aio: add missing ...
1173
1174
1175
1176
1177
  	/*
  	 * Ensure that once we've read the current tail pointer, that
  	 * we also see the events that were stored up to the tail.
  	 */
  	smp_rmb();
5ffac122d   Kent Overstreet   aio: Don't use ct...
1178
1179
  	pr_debug("h%u t%u m%u
  ", head, tail, ctx->nr_events);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1180

5ffac122d   Kent Overstreet   aio: Don't use ct...
1181
  	if (head == tail)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1182
  		goto out;
edfbbf388   Benjamin LaHaise   aio: fix kernel m...
1183
1184
  	head %= ctx->nr_events;
  	tail %= ctx->nr_events;
a31ad380b   Kent Overstreet   aio: make aio_rea...
1185
1186
1187
1188
  	while (ret < nr) {
  		long avail;
  		struct io_event *ev;
  		struct page *page;
5ffac122d   Kent Overstreet   aio: Don't use ct...
1189
1190
  		avail = (head <= tail ?  tail : ctx->nr_events) - head;
  		if (head == tail)
a31ad380b   Kent Overstreet   aio: make aio_rea...
1191
1192
1193
1194
1195
1196
1197
  			break;
  
  		avail = min(avail, nr - ret);
  		avail = min_t(long, avail, AIO_EVENTS_PER_PAGE -
  			    ((head + AIO_EVENTS_OFFSET) % AIO_EVENTS_PER_PAGE));
  
  		pos = head + AIO_EVENTS_OFFSET;
58c85dc20   Kent Overstreet   aio: kill struct ...
1198
  		page = ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE];
a31ad380b   Kent Overstreet   aio: make aio_rea...
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
  		pos %= AIO_EVENTS_PER_PAGE;
  
  		ev = kmap(page);
  		copy_ret = copy_to_user(event + ret, ev + pos,
  					sizeof(*ev) * avail);
  		kunmap(page);
  
  		if (unlikely(copy_ret)) {
  			ret = -EFAULT;
  			goto out;
  		}
  
  		ret += avail;
  		head += avail;
58c85dc20   Kent Overstreet   aio: kill struct ...
1213
  		head %= ctx->nr_events;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1214
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1215

58c85dc20   Kent Overstreet   aio: kill struct ...
1216
  	ring = kmap_atomic(ctx->ring_pages[0]);
a31ad380b   Kent Overstreet   aio: make aio_rea...
1217
  	ring->head = head;
91d80a84b   Zhao Hongjiang   aio: fix possible...
1218
  	kunmap_atomic(ring);
58c85dc20   Kent Overstreet   aio: kill struct ...
1219
  	flush_dcache_page(ctx->ring_pages[0]);
a31ad380b   Kent Overstreet   aio: make aio_rea...
1220

5ffac122d   Kent Overstreet   aio: Don't use ct...
1221
1222
  	pr_debug("%li  h%u t%u
  ", ret, head, tail);
a31ad380b   Kent Overstreet   aio: make aio_rea...
1223
  out:
58c85dc20   Kent Overstreet   aio: kill struct ...
1224
  	mutex_unlock(&ctx->ring_lock);
a31ad380b   Kent Overstreet   aio: make aio_rea...
1225

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1226
1227
  	return ret;
  }
a31ad380b   Kent Overstreet   aio: make aio_rea...
1228
1229
  static bool aio_read_events(struct kioctx *ctx, long min_nr, long nr,
  			    struct io_event __user *event, long *i)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1230
  {
a31ad380b   Kent Overstreet   aio: make aio_rea...
1231
  	long ret = aio_read_events_ring(ctx, event + *i, nr - *i);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1232

a31ad380b   Kent Overstreet   aio: make aio_rea...
1233
1234
  	if (ret > 0)
  		*i += ret;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1235

a31ad380b   Kent Overstreet   aio: make aio_rea...
1236
1237
  	if (unlikely(atomic_read(&ctx->dead)))
  		ret = -EINVAL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1238

a31ad380b   Kent Overstreet   aio: make aio_rea...
1239
1240
  	if (!*i)
  		*i = ret;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1241

a31ad380b   Kent Overstreet   aio: make aio_rea...
1242
  	return ret < 0 || *i >= min_nr;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1243
  }
a31ad380b   Kent Overstreet   aio: make aio_rea...
1244
  static long read_events(struct kioctx *ctx, long min_nr, long nr,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1245
1246
1247
  			struct io_event __user *event,
  			struct timespec __user *timeout)
  {
2456e8553   Thomas Gleixner   ktime: Get rid of...
1248
  	ktime_t until = KTIME_MAX;
a31ad380b   Kent Overstreet   aio: make aio_rea...
1249
  	long ret = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1250

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1251
1252
  	if (timeout) {
  		struct timespec	ts;
a31ad380b   Kent Overstreet   aio: make aio_rea...
1253

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1254
  		if (unlikely(copy_from_user(&ts, timeout, sizeof(ts))))
a31ad380b   Kent Overstreet   aio: make aio_rea...
1255
  			return -EFAULT;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1256

a31ad380b   Kent Overstreet   aio: make aio_rea...
1257
  		until = timespec_to_ktime(ts);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1258
  	}
a31ad380b   Kent Overstreet   aio: make aio_rea...
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
  	/*
  	 * Note that aio_read_events() is being called as the conditional - i.e.
  	 * we're calling it after prepare_to_wait() has set task state to
  	 * TASK_INTERRUPTIBLE.
  	 *
  	 * But aio_read_events() can block, and if it blocks it's going to flip
  	 * the task state back to TASK_RUNNING.
  	 *
  	 * This should be ok, provided it doesn't flip the state back to
  	 * TASK_RUNNING and return 0 too much - that causes us to spin. That
  	 * will only happen if the mutex_lock() call blocks, and we then find
  	 * the ringbuffer empty. So in practice we should be ok, but it's
  	 * something to be aware of when touching this code.
  	 */
2456e8553   Thomas Gleixner   ktime: Get rid of...
1273
  	if (until == 0)
5f785de58   Fam Zheng   aio: Skip timer f...
1274
1275
1276
1277
1278
  		aio_read_events(ctx, min_nr, nr, event, &ret);
  	else
  		wait_event_interruptible_hrtimeout(ctx->wait,
  				aio_read_events(ctx, min_nr, nr, event, &ret),
  				until);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1279

a31ad380b   Kent Overstreet   aio: make aio_rea...
1280
1281
  	if (!ret && signal_pending(current))
  		ret = -EINTR;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1282

a31ad380b   Kent Overstreet   aio: make aio_rea...
1283
  	return ret;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1284
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
  /* sys_io_setup:
   *	Create an aio_context capable of receiving at least nr_events.
   *	ctxp must not point to an aio_context that already exists, and
   *	must be initialized to 0 prior to the call.  On successful
   *	creation of the aio_context, *ctxp is filled in with the resulting 
   *	handle.  May fail with -EINVAL if *ctxp is not initialized,
   *	if the specified nr_events exceeds internal limits.  May fail 
   *	with -EAGAIN if the specified nr_events exceeds the user's limit 
   *	of available events.  May fail with -ENOMEM if insufficient kernel
   *	resources are available.  May fail with -EFAULT if an invalid
   *	pointer is passed for ctxp.  Will fail with -ENOSYS if not
   *	implemented.
   */
002c8976e   Heiko Carstens   [CVE-2009-0029] S...
1298
  SYSCALL_DEFINE2(io_setup, unsigned, nr_events, aio_context_t __user *, ctxp)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
  {
  	struct kioctx *ioctx = NULL;
  	unsigned long ctx;
  	long ret;
  
  	ret = get_user(ctx, ctxp);
  	if (unlikely(ret))
  		goto out;
  
  	ret = -EINVAL;
d55b5fdaf   Zach Brown   [PATCH] aio: remo...
1309
  	if (unlikely(ctx || nr_events == 0)) {
acd88d4e1   Kinglong Mee   fs/aio.c: Remove ...
1310
1311
  		pr_debug("EINVAL: ctx %lu nr_events %u
  ",
d55b5fdaf   Zach Brown   [PATCH] aio: remo...
1312
  		         ctx, nr_events);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1313
1314
1315
1316
1317
1318
1319
  		goto out;
  	}
  
  	ioctx = ioctx_alloc(nr_events);
  	ret = PTR_ERR(ioctx);
  	if (!IS_ERR(ioctx)) {
  		ret = put_user(ioctx->user_id, ctxp);
a2e1859ad   Al Viro   aio: take final p...
1320
  		if (ret)
e02ba72aa   Anatol Pomozov   aio: block io_des...
1321
  			kill_ioctx(current->mm, ioctx, NULL);
723be6e39   Kent Overstreet   aio: percpu ioctx...
1322
  		percpu_ref_put(&ioctx->users);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1323
1324
1325
1326
1327
  	}
  
  out:
  	return ret;
  }
c00d2c7e8   Al Viro   move aio compat t...
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
  #ifdef CONFIG_COMPAT
  COMPAT_SYSCALL_DEFINE2(io_setup, unsigned, nr_events, u32 __user *, ctx32p)
  {
  	struct kioctx *ioctx = NULL;
  	unsigned long ctx;
  	long ret;
  
  	ret = get_user(ctx, ctx32p);
  	if (unlikely(ret))
  		goto out;
  
  	ret = -EINVAL;
  	if (unlikely(ctx || nr_events == 0)) {
  		pr_debug("EINVAL: ctx %lu nr_events %u
  ",
  		         ctx, nr_events);
  		goto out;
  	}
  
  	ioctx = ioctx_alloc(nr_events);
  	ret = PTR_ERR(ioctx);
  	if (!IS_ERR(ioctx)) {
  		/* truncating is ok because it's a user address */
  		ret = put_user((u32)ioctx->user_id, ctx32p);
  		if (ret)
  			kill_ioctx(current->mm, ioctx, NULL);
  		percpu_ref_put(&ioctx->users);
  	}
  
  out:
  	return ret;
  }
  #endif
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1361
1362
1363
  /* sys_io_destroy:
   *	Destroy the aio_context specified.  May cancel any outstanding 
   *	AIOs and block on completion.  Will fail with -ENOSYS if not
642b5123a   Satoru Takeuchi   aio: fix wrong su...
1364
   *	implemented.  May fail with -EINVAL if the context pointed to
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1365
1366
   *	is invalid.
   */
002c8976e   Heiko Carstens   [CVE-2009-0029] S...
1367
  SYSCALL_DEFINE1(io_destroy, aio_context_t, ctx)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1368
1369
1370
  {
  	struct kioctx *ioctx = lookup_ioctx(ctx);
  	if (likely(NULL != ioctx)) {
dc48e56d7   Jens Axboe   aio: fix serial d...
1371
  		struct ctx_rq_wait wait;
fb2d44838   Benjamin LaHaise   aio: report error...
1372
  		int ret;
e02ba72aa   Anatol Pomozov   aio: block io_des...
1373

dc48e56d7   Jens Axboe   aio: fix serial d...
1374
1375
  		init_completion(&wait.comp);
  		atomic_set(&wait.count, 1);
e02ba72aa   Anatol Pomozov   aio: block io_des...
1376
1377
1378
1379
  		/* Pass requests_done to kill_ioctx() where it can be set
  		 * in a thread-safe way. If we try to set it here then we have
  		 * a race condition if two io_destroy() called simultaneously.
  		 */
dc48e56d7   Jens Axboe   aio: fix serial d...
1380
  		ret = kill_ioctx(current->mm, ioctx, &wait);
723be6e39   Kent Overstreet   aio: percpu ioctx...
1381
  		percpu_ref_put(&ioctx->users);
e02ba72aa   Anatol Pomozov   aio: block io_des...
1382
1383
1384
1385
1386
  
  		/* Wait until all IO for the context are done. Otherwise kernel
  		 * keep using user-space buffers even if user thinks the context
  		 * is destroyed.
  		 */
fb2d44838   Benjamin LaHaise   aio: report error...
1387
  		if (!ret)
dc48e56d7   Jens Axboe   aio: fix serial d...
1388
  			wait_for_completion(&wait.comp);
e02ba72aa   Anatol Pomozov   aio: block io_des...
1389

fb2d44838   Benjamin LaHaise   aio: report error...
1390
  		return ret;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1391
  	}
acd88d4e1   Kinglong Mee   fs/aio.c: Remove ...
1392
1393
  	pr_debug("EINVAL: invalid context id
  ");
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1394
1395
  	return -EINVAL;
  }
89319d31d   Christoph Hellwig   fs: remove aio_ru...
1396
1397
  static int aio_setup_rw(int rw, struct iocb *iocb, struct iovec **iovec,
  		bool vectored, bool compat, struct iov_iter *iter)
eed4e51fb   Badari Pulavarty   [PATCH] Add vecto...
1398
  {
89319d31d   Christoph Hellwig   fs: remove aio_ru...
1399
1400
1401
1402
1403
1404
1405
1406
  	void __user *buf = (void __user *)(uintptr_t)iocb->aio_buf;
  	size_t len = iocb->aio_nbytes;
  
  	if (!vectored) {
  		ssize_t ret = import_single_range(rw, buf, len, *iovec, iter);
  		*iovec = NULL;
  		return ret;
  	}
9d85cba71   Jeff Moyer   aio: fix the comp...
1407
1408
  #ifdef CONFIG_COMPAT
  	if (compat)
89319d31d   Christoph Hellwig   fs: remove aio_ru...
1409
1410
  		return compat_import_iovec(rw, buf, len, UIO_FASTIOV, iovec,
  				iter);
9d85cba71   Jeff Moyer   aio: fix the comp...
1411
  #endif
89319d31d   Christoph Hellwig   fs: remove aio_ru...
1412
  	return import_iovec(rw, buf, len, UIO_FASTIOV, iovec, iter);
eed4e51fb   Badari Pulavarty   [PATCH] Add vecto...
1413
  }
89319d31d   Christoph Hellwig   fs: remove aio_ru...
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
  static inline ssize_t aio_ret(struct kiocb *req, ssize_t ret)
  {
  	switch (ret) {
  	case -EIOCBQUEUED:
  		return ret;
  	case -ERESTARTSYS:
  	case -ERESTARTNOINTR:
  	case -ERESTARTNOHAND:
  	case -ERESTART_RESTARTBLOCK:
  		/*
  		 * There's no easy way to restart the syscall since other AIO's
  		 * may be already running. Just fail this IO with EINTR.
  		 */
  		ret = -EINTR;
  		/*FALLTHRU*/
  	default:
  		aio_complete(req, ret, 0);
  		return 0;
  	}
  }
  
  static ssize_t aio_read(struct kiocb *req, struct iocb *iocb, bool vectored,
  		bool compat)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1437
  {
41ef4eb8e   Kent Overstreet   aio: kill ki_retry
1438
  	struct file *file = req->ki_filp;
00fefb9cf   Gu Zheng   aio: use iovec ar...
1439
  	struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs;
293bc9822   Al Viro   new methods: ->re...
1440
  	struct iov_iter iter;
89319d31d   Christoph Hellwig   fs: remove aio_ru...
1441
  	ssize_t ret;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1442

89319d31d   Christoph Hellwig   fs: remove aio_ru...
1443
1444
1445
1446
  	if (unlikely(!(file->f_mode & FMODE_READ)))
  		return -EBADF;
  	if (unlikely(!file->f_op->read_iter))
  		return -EINVAL;
73a7075e3   Kent Overstreet   aio: Kill aio_rw_...
1447

89319d31d   Christoph Hellwig   fs: remove aio_ru...
1448
1449
1450
1451
1452
  	ret = aio_setup_rw(READ, iocb, &iovec, vectored, compat, &iter);
  	if (ret)
  		return ret;
  	ret = rw_verify_area(READ, file, &req->ki_pos, iov_iter_count(&iter));
  	if (!ret)
bb7462b6f   Miklos Szeredi   vfs: use helpers ...
1453
  		ret = aio_ret(req, call_read_iter(file, req, &iter));
89319d31d   Christoph Hellwig   fs: remove aio_ru...
1454
1455
1456
  	kfree(iovec);
  	return ret;
  }
73a7075e3   Kent Overstreet   aio: Kill aio_rw_...
1457

89319d31d   Christoph Hellwig   fs: remove aio_ru...
1458
1459
1460
1461
1462
1463
1464
  static ssize_t aio_write(struct kiocb *req, struct iocb *iocb, bool vectored,
  		bool compat)
  {
  	struct file *file = req->ki_filp;
  	struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs;
  	struct iov_iter iter;
  	ssize_t ret;
41ef4eb8e   Kent Overstreet   aio: kill ki_retry
1465

89319d31d   Christoph Hellwig   fs: remove aio_ru...
1466
1467
1468
  	if (unlikely(!(file->f_mode & FMODE_WRITE)))
  		return -EBADF;
  	if (unlikely(!file->f_op->write_iter))
41ef4eb8e   Kent Overstreet   aio: kill ki_retry
1469
  		return -EINVAL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1470

89319d31d   Christoph Hellwig   fs: remove aio_ru...
1471
1472
1473
1474
1475
  	ret = aio_setup_rw(WRITE, iocb, &iovec, vectored, compat, &iter);
  	if (ret)
  		return ret;
  	ret = rw_verify_area(WRITE, file, &req->ki_pos, iov_iter_count(&iter));
  	if (!ret) {
70fe2f481   Jan Kara   aio: fix freeze p...
1476
  		req->ki_flags |= IOCB_WRITE;
89319d31d   Christoph Hellwig   fs: remove aio_ru...
1477
  		file_start_write(file);
bb7462b6f   Miklos Szeredi   vfs: use helpers ...
1478
  		ret = aio_ret(req, call_write_iter(file, req, &iter));
70fe2f481   Jan Kara   aio: fix freeze p...
1479
1480
1481
1482
1483
  		/*
  		 * We release freeze protection in aio_complete().  Fool lockdep
  		 * by telling it the lock got released so that it doesn't
  		 * complain about held lock when we return to userspace.
  		 */
a12f1ae61   Shaohua Li   aio: fix lock dep...
1484
1485
  		if (S_ISREG(file_inode(file)->i_mode))
  			__sb_writers_release(file_inode(file)->i_sb, SB_FREEZE_WRITE);
41ef4eb8e   Kent Overstreet   aio: kill ki_retry
1486
  	}
89319d31d   Christoph Hellwig   fs: remove aio_ru...
1487
1488
  	kfree(iovec);
  	return ret;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1489
  }
d5470b596   Adrian Bunk   fs/aio.c: make 3 ...
1490
  static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
a1c8eae75   Kent Overstreet   aio: kill batch a...
1491
  			 struct iocb *iocb, bool compat)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1492
  {
04b2fa9f8   Christoph Hellwig   fs: split generic...
1493
  	struct aio_kiocb *req;
89319d31d   Christoph Hellwig   fs: remove aio_ru...
1494
  	struct file *file;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1495
1496
1497
  	ssize_t ret;
  
  	/* enforce forwards compatibility on users */
9830f4be1   Goldwyn Rodrigues   fs: Use RWF_* fla...
1498
  	if (unlikely(iocb->aio_reserved2)) {
caf4167aa   Kent Overstreet   aio: dprintk() ->...
1499
1500
  		pr_debug("EINVAL: reserve field set
  ");
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1501
1502
1503
1504
1505
1506
1507
1508
1509
  		return -EINVAL;
  	}
  
  	/* prevent overflows */
  	if (unlikely(
  	    (iocb->aio_buf != (unsigned long)iocb->aio_buf) ||
  	    (iocb->aio_nbytes != (size_t)iocb->aio_nbytes) ||
  	    ((ssize_t)iocb->aio_nbytes < 0)
  	   )) {
acd88d4e1   Kinglong Mee   fs/aio.c: Remove ...
1510
1511
  		pr_debug("EINVAL: overflow check
  ");
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1512
1513
  		return -EINVAL;
  	}
41ef4eb8e   Kent Overstreet   aio: kill ki_retry
1514
  	req = aio_get_req(ctx);
1d98ebfcc   Kent Overstreet   aio: do fget() af...
1515
  	if (unlikely(!req))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1516
  		return -EAGAIN;
1d98ebfcc   Kent Overstreet   aio: do fget() af...
1517

89319d31d   Christoph Hellwig   fs: remove aio_ru...
1518
  	req->common.ki_filp = file = fget(iocb->aio_fildes);
04b2fa9f8   Christoph Hellwig   fs: split generic...
1519
  	if (unlikely(!req->common.ki_filp)) {
1d98ebfcc   Kent Overstreet   aio: do fget() af...
1520
1521
  		ret = -EBADF;
  		goto out_put_req;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1522
  	}
04b2fa9f8   Christoph Hellwig   fs: split generic...
1523
1524
  	req->common.ki_pos = iocb->aio_offset;
  	req->common.ki_complete = aio_complete;
2ba48ce51   Al Viro   mirror O_APPEND a...
1525
  	req->common.ki_flags = iocb_flags(req->common.ki_filp);
45d06cf70   Jens Axboe   fs: add O_DIRECT ...
1526
  	req->common.ki_hint = file_write_hint(file);
1d98ebfcc   Kent Overstreet   aio: do fget() af...
1527

9c3060bed   Davide Libenzi   signal/timer/even...
1528
1529
1530
1531
1532
1533
1534
  	if (iocb->aio_flags & IOCB_FLAG_RESFD) {
  		/*
  		 * If the IOCB_FLAG_RESFD flag of aio_flags is set, get an
  		 * instance of the file* now. The file descriptor must be
  		 * an eventfd() fd, and will be signaled for each completed
  		 * event using the eventfd_signal() function.
  		 */
133890103   Davide Libenzi   eventfd: revised ...
1535
  		req->ki_eventfd = eventfd_ctx_fdget((int) iocb->aio_resfd);
801678c5a   Hirofumi Nakagawa   Remove duplicated...
1536
  		if (IS_ERR(req->ki_eventfd)) {
9c3060bed   Davide Libenzi   signal/timer/even...
1537
  			ret = PTR_ERR(req->ki_eventfd);
87c3a86e1   Davide Libenzi   eventfd: remove f...
1538
  			req->ki_eventfd = NULL;
9c3060bed   Davide Libenzi   signal/timer/even...
1539
1540
  			goto out_put_req;
  		}
04b2fa9f8   Christoph Hellwig   fs: split generic...
1541
1542
  
  		req->common.ki_flags |= IOCB_EVENTFD;
9c3060bed   Davide Libenzi   signal/timer/even...
1543
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1544

9830f4be1   Goldwyn Rodrigues   fs: Use RWF_* fla...
1545
1546
1547
1548
1549
1550
  	ret = kiocb_set_rw_flags(&req->common, iocb->aio_rw_flags);
  	if (unlikely(ret)) {
  		pr_debug("EINVAL: aio_rw_flags
  ");
  		goto out_put_req;
  	}
8a6608907   Kent Overstreet   aio: kill ki_key
1551
  	ret = put_user(KIOCB_KEY, &user_iocb->aio_key);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1552
  	if (unlikely(ret)) {
caf4167aa   Kent Overstreet   aio: dprintk() ->...
1553
1554
  		pr_debug("EFAULT: aio_key
  ");
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1555
1556
  		goto out_put_req;
  	}
04b2fa9f8   Christoph Hellwig   fs: split generic...
1557
  	req->ki_user_iocb = user_iocb;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1558
  	req->ki_user_data = iocb->aio_data;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1559

89319d31d   Christoph Hellwig   fs: remove aio_ru...
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
  	get_file(file);
  	switch (iocb->aio_lio_opcode) {
  	case IOCB_CMD_PREAD:
  		ret = aio_read(&req->common, iocb, false, compat);
  		break;
  	case IOCB_CMD_PWRITE:
  		ret = aio_write(&req->common, iocb, false, compat);
  		break;
  	case IOCB_CMD_PREADV:
  		ret = aio_read(&req->common, iocb, true, compat);
  		break;
  	case IOCB_CMD_PWRITEV:
  		ret = aio_write(&req->common, iocb, true, compat);
  		break;
  	default:
  		pr_debug("invalid aio operation %d
  ", iocb->aio_lio_opcode);
  		ret = -EINVAL;
  		break;
  	}
  	fput(file);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1581

89319d31d   Christoph Hellwig   fs: remove aio_ru...
1582
1583
  	if (ret && ret != -EIOCBQUEUED)
  		goto out_put_req;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1584
  	return 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1585
  out_put_req:
e1bdd5f27   Kent Overstreet   aio: percpu reqs_...
1586
  	put_reqs_available(ctx, 1);
e34ecee2a   Kent Overstreet   aio: Fix a trinit...
1587
  	percpu_ref_put(&ctx->reqs);
57282d8fd   Kent Overstreet   aio: Kill ki_users
1588
  	kiocb_free(req);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1589
1590
  	return ret;
  }
c00d2c7e8   Al Viro   move aio compat t...
1591
1592
  static long do_io_submit(aio_context_t ctx_id, long nr,
  			  struct iocb __user *__user *iocbpp, bool compat)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1593
1594
1595
  {
  	struct kioctx *ctx;
  	long ret = 0;
080d676de   Jeff Moyer   aio: allocate kio...
1596
  	int i = 0;
9f5b94254   Shaohua Li   fs: make aio plug
1597
  	struct blk_plug plug;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1598
1599
1600
  
  	if (unlikely(nr < 0))
  		return -EINVAL;
75e1c70fc   Jeff Moyer   aio: check for mu...
1601
1602
  	if (unlikely(nr > LONG_MAX/sizeof(*iocbpp)))
  		nr = LONG_MAX/sizeof(*iocbpp);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1603
1604
1605
1606
1607
  	if (unlikely(!access_ok(VERIFY_READ, iocbpp, (nr*sizeof(*iocbpp)))))
  		return -EFAULT;
  
  	ctx = lookup_ioctx(ctx_id);
  	if (unlikely(!ctx)) {
caf4167aa   Kent Overstreet   aio: dprintk() ->...
1608
1609
  		pr_debug("EINVAL: invalid context id
  ");
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1610
1611
  		return -EINVAL;
  	}
9f5b94254   Shaohua Li   fs: make aio plug
1612
  	blk_start_plug(&plug);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
  	/*
  	 * AKPM: should this return a partial result if some of the IOs were
  	 * successfully submitted?
  	 */
  	for (i=0; i<nr; i++) {
  		struct iocb __user *user_iocb;
  		struct iocb tmp;
  
  		if (unlikely(__get_user(user_iocb, iocbpp + i))) {
  			ret = -EFAULT;
  			break;
  		}
  
  		if (unlikely(copy_from_user(&tmp, user_iocb, sizeof(tmp)))) {
  			ret = -EFAULT;
  			break;
  		}
a1c8eae75   Kent Overstreet   aio: kill batch a...
1630
  		ret = io_submit_one(ctx, user_iocb, &tmp, compat);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1631
1632
1633
  		if (ret)
  			break;
  	}
9f5b94254   Shaohua Li   fs: make aio plug
1634
  	blk_finish_plug(&plug);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1635

723be6e39   Kent Overstreet   aio: percpu ioctx...
1636
  	percpu_ref_put(&ctx->users);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1637
1638
  	return i ? i : ret;
  }
9d85cba71   Jeff Moyer   aio: fix the comp...
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
  /* sys_io_submit:
   *	Queue the nr iocbs pointed to by iocbpp for processing.  Returns
   *	the number of iocbs queued.  May return -EINVAL if the aio_context
   *	specified by ctx_id is invalid, if nr is < 0, if the iocb at
   *	*iocbpp[0] is not properly initialized, if the operation specified
   *	is invalid for the file descriptor in the iocb.  May fail with
   *	-EFAULT if any of the data structures point to invalid data.  May
   *	fail with -EBADF if the file descriptor specified in the first
   *	iocb is invalid.  May fail with -EAGAIN if insufficient resources
   *	are available to queue any iocbs.  Will return 0 if nr is 0.  Will
   *	fail with -ENOSYS if not implemented.
   */
  SYSCALL_DEFINE3(io_submit, aio_context_t, ctx_id, long, nr,
  		struct iocb __user * __user *, iocbpp)
  {
  	return do_io_submit(ctx_id, nr, iocbpp, 0);
  }
c00d2c7e8   Al Viro   move aio compat t...
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
  #ifdef CONFIG_COMPAT
  static inline long
  copy_iocb(long nr, u32 __user *ptr32, struct iocb __user * __user *ptr64)
  {
  	compat_uptr_t uptr;
  	int i;
  
  	for (i = 0; i < nr; ++i) {
  		if (get_user(uptr, ptr32 + i))
  			return -EFAULT;
  		if (put_user(compat_ptr(uptr), ptr64 + i))
  			return -EFAULT;
  	}
  	return 0;
  }
  
  #define MAX_AIO_SUBMITS 	(PAGE_SIZE/sizeof(struct iocb *))
  
  COMPAT_SYSCALL_DEFINE3(io_submit, compat_aio_context_t, ctx_id,
  		       int, nr, u32 __user *, iocb)
  {
  	struct iocb __user * __user *iocb64;
  	long ret;
  
  	if (unlikely(nr < 0))
  		return -EINVAL;
  
  	if (nr > MAX_AIO_SUBMITS)
  		nr = MAX_AIO_SUBMITS;
  
  	iocb64 = compat_alloc_user_space(nr * sizeof(*iocb64));
  	ret = copy_iocb(nr, iocb, iocb64);
  	if (!ret)
  		ret = do_io_submit(ctx_id, nr, iocb64, 1);
  	return ret;
  }
  #endif
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1693
1694
  /* lookup_kiocb
   *	Finds a given iocb for cancellation.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1695
   */
04b2fa9f8   Christoph Hellwig   fs: split generic...
1696
1697
  static struct aio_kiocb *
  lookup_kiocb(struct kioctx *ctx, struct iocb __user *iocb, u32 key)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1698
  {
04b2fa9f8   Christoph Hellwig   fs: split generic...
1699
  	struct aio_kiocb *kiocb;
d00689af6   Zach Brown   [PATCH] aio: repl...
1700
1701
  
  	assert_spin_locked(&ctx->ctx_lock);
8a6608907   Kent Overstreet   aio: kill ki_key
1702
1703
  	if (key != KIOCB_KEY)
  		return NULL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1704
  	/* TODO: use a hash or array, this sucks. */
04b2fa9f8   Christoph Hellwig   fs: split generic...
1705
1706
  	list_for_each_entry(kiocb, &ctx->active_reqs, ki_list) {
  		if (kiocb->ki_user_iocb == iocb)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
  			return kiocb;
  	}
  	return NULL;
  }
  
  /* sys_io_cancel:
   *	Attempts to cancel an iocb previously passed to io_submit.  If
   *	the operation is successfully cancelled, the resulting event is
   *	copied into the memory pointed to by result without being placed
   *	into the completion queue and 0 is returned.  May fail with
   *	-EFAULT if any of the data structures pointed to are invalid.
   *	May fail with -EINVAL if aio_context specified by ctx_id is
   *	invalid.  May fail with -EAGAIN if the iocb specified was not
   *	cancelled.  Will fail with -ENOSYS if not implemented.
   */
002c8976e   Heiko Carstens   [CVE-2009-0029] S...
1722
1723
  SYSCALL_DEFINE3(io_cancel, aio_context_t, ctx_id, struct iocb __user *, iocb,
  		struct io_event __user *, result)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1724
  {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1725
  	struct kioctx *ctx;
04b2fa9f8   Christoph Hellwig   fs: split generic...
1726
  	struct aio_kiocb *kiocb;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
  	u32 key;
  	int ret;
  
  	ret = get_user(key, &iocb->aio_key);
  	if (unlikely(ret))
  		return -EFAULT;
  
  	ctx = lookup_ioctx(ctx_id);
  	if (unlikely(!ctx))
  		return -EINVAL;
  
  	spin_lock_irq(&ctx->ctx_lock);
906b973cf   Kent Overstreet   aio: add kiocb_ca...
1739

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1740
  	kiocb = lookup_kiocb(ctx, iocb, key);
906b973cf   Kent Overstreet   aio: add kiocb_ca...
1741
  	if (kiocb)
d52a8f9ea   Fabian Frederick   fs/aio.c: Remove ...
1742
  		ret = kiocb_cancel(kiocb);
906b973cf   Kent Overstreet   aio: add kiocb_ca...
1743
1744
  	else
  		ret = -EINVAL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1745
  	spin_unlock_irq(&ctx->ctx_lock);
906b973cf   Kent Overstreet   aio: add kiocb_ca...
1746
  	if (!ret) {
bec68faaf   Kent Overstreet   aio: io_cancel() ...
1747
1748
1749
1750
  		/*
  		 * The result argument is no longer used - the io_event is
  		 * always delivered via the ring buffer. -EINPROGRESS indicates
  		 * cancellation is progress:
906b973cf   Kent Overstreet   aio: add kiocb_ca...
1751
  		 */
bec68faaf   Kent Overstreet   aio: io_cancel() ...
1752
  		ret = -EINPROGRESS;
906b973cf   Kent Overstreet   aio: add kiocb_ca...
1753
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1754

723be6e39   Kent Overstreet   aio: percpu ioctx...
1755
  	percpu_ref_put(&ctx->users);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1756
1757
1758
1759
1760
1761
  
  	return ret;
  }
  
  /* io_getevents:
   *	Attempts to read at least min_nr events and up to nr events from
642b5123a   Satoru Takeuchi   aio: fix wrong su...
1762
1763
1764
1765
1766
1767
1768
1769
   *	the completion queue for the aio_context specified by ctx_id. If
   *	it succeeds, the number of read events is returned. May fail with
   *	-EINVAL if ctx_id is invalid, if min_nr is out of range, if nr is
   *	out of range, if timeout is out of range.  May fail with -EFAULT
   *	if any of the memory specified is invalid.  May return 0 or
   *	< min_nr if the timeout specified by timeout has elapsed
   *	before sufficient events are available, where timeout == NULL
   *	specifies an infinite timeout. Note that the timeout pointed to by
6900807c6   Jeff Moyer   aio: fix io_getev...
1770
   *	timeout is relative.  Will fail with -ENOSYS if not implemented.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1771
   */
002c8976e   Heiko Carstens   [CVE-2009-0029] S...
1772
1773
1774
1775
1776
  SYSCALL_DEFINE5(io_getevents, aio_context_t, ctx_id,
  		long, min_nr,
  		long, nr,
  		struct io_event __user *, events,
  		struct timespec __user *, timeout)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1777
1778
1779
1780
1781
  {
  	struct kioctx *ioctx = lookup_ioctx(ctx_id);
  	long ret = -EINVAL;
  
  	if (likely(ioctx)) {
2e4102559   Namhyung Kim   aio: remove unnec...
1782
  		if (likely(min_nr <= nr && min_nr >= 0))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1783
  			ret = read_events(ioctx, min_nr, nr, events, timeout);
723be6e39   Kent Overstreet   aio: percpu ioctx...
1784
  		percpu_ref_put(&ioctx->users);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1785
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1786
1787
  	return ret;
  }
c00d2c7e8   Al Viro   move aio compat t...
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
  
  #ifdef CONFIG_COMPAT
  COMPAT_SYSCALL_DEFINE5(io_getevents, compat_aio_context_t, ctx_id,
  		       compat_long_t, min_nr,
  		       compat_long_t, nr,
  		       struct io_event __user *, events,
  		       struct compat_timespec __user *, timeout)
  {
  	struct timespec t;
  	struct timespec __user *ut = NULL;
  
  	if (timeout) {
  		if (compat_get_timespec(&t, timeout))
  			return -EFAULT;
  
  		ut = compat_alloc_user_space(sizeof(*ut));
  		if (copy_to_user(ut, &t, sizeof(t)))
  			return -EFAULT;
  	}
  	return sys_io_getevents(ctx_id, min_nr, nr, events, ut);
  }
  #endif