Blame view

fs/aio.c 56 KB
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1
2
3
4
5
6
7
  /*
   *	An async IO implementation for Linux
   *	Written by Benjamin LaHaise <bcrl@kvack.org>
   *
   *	Implements an efficient asynchronous io interface.
   *
   *	Copyright 2000, 2001, 2002 Red Hat, Inc.  All Rights Reserved.
bfe4037e7   Christoph Hellwig   aio: implement IO...
8
   *	Copyright 2018 Christoph Hellwig.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
9
10
11
   *
   *	See ../COPYING for licensing terms.
   */
caf4167aa   Kent Overstreet   aio: dprintk() ->...
12
  #define pr_fmt(fmt) "%s: " fmt, __func__
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
13
14
15
16
17
  #include <linux/kernel.h>
  #include <linux/init.h>
  #include <linux/errno.h>
  #include <linux/time.h>
  #include <linux/aio_abi.h>
630d9c472   Paul Gortmaker   fs: reduce the us...
18
  #include <linux/export.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
19
  #include <linux/syscalls.h>
b9d128f10   Jens Axboe   block: move bdi/a...
20
  #include <linux/backing-dev.h>
9018ccc45   Christoph Hellwig   aio: add a iocb r...
21
  #include <linux/refcount.h>
027445c37   Badari Pulavarty   [PATCH] Vectorize...
22
  #include <linux/uio.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
23

174cd4b1e   Ingo Molnar   sched/headers: Pr...
24
  #include <linux/sched/signal.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
25
26
27
28
  #include <linux/fs.h>
  #include <linux/file.h>
  #include <linux/mm.h>
  #include <linux/mman.h>
3d2d827f5   Michael S. Tsirkin   mm: move use_mm/u...
29
  #include <linux/mmu_context.h>
e1bdd5f27   Kent Overstreet   aio: percpu reqs_...
30
  #include <linux/percpu.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
31
32
33
34
35
36
  #include <linux/slab.h>
  #include <linux/timer.h>
  #include <linux/aio.h>
  #include <linux/highmem.h>
  #include <linux/workqueue.h>
  #include <linux/security.h>
9c3060bed   Davide Libenzi   signal/timer/even...
37
  #include <linux/eventfd.h>
cfb1e33ee   Jeff Moyer   aio: implement re...
38
  #include <linux/blkdev.h>
9d85cba71   Jeff Moyer   aio: fix the comp...
39
  #include <linux/compat.h>
36bc08cc0   Gu Zheng   fs/aio: Add suppo...
40
41
  #include <linux/migrate.h>
  #include <linux/ramfs.h>
723be6e39   Kent Overstreet   aio: percpu ioctx...
42
  #include <linux/percpu-refcount.h>
71ad7490c   Benjamin LaHaise   rework aio migrat...
43
  #include <linux/mount.h>
52db59df1   David Howells   vfs: Convert aio ...
44
  #include <linux/pseudo_fs.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
45
46
  
  #include <asm/kmap_types.h>
7c0f6ba68   Linus Torvalds   Replace <asm/uacc...
47
  #include <linux/uaccess.h>
a538e3ff9   Jeff Moyer   aio: fix spectre ...
48
  #include <linux/nospec.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
49

68d70d03f   Al Viro   constify rw_verif...
50
  #include "internal.h"
f3a2752a4   Christoph Hellwig   aio: simplify KIO...
51
  #define KIOCB_KEY		0
4e179bca6   Kent Overstreet   aio: move private...
52
53
54
55
56
57
  #define AIO_RING_MAGIC			0xa10a10a1
  #define AIO_RING_COMPAT_FEATURES	1
  #define AIO_RING_INCOMPAT_FEATURES	0
  struct aio_ring {
  	unsigned	id;	/* kernel internal index number */
  	unsigned	nr;	/* number of io_events */
fa8a53c39   Benjamin LaHaise   aio: v4 ensure ac...
58
59
  	unsigned	head;	/* Written to by userland or under ring_lock
  				 * mutex by aio_read_events_ring(). */
4e179bca6   Kent Overstreet   aio: move private...
60
61
62
63
64
65
66
67
68
69
  	unsigned	tail;
  
  	unsigned	magic;
  	unsigned	compat_features;
  	unsigned	incompat_features;
  	unsigned	header_length;	/* size of aio_ring */
  
  
  	struct io_event		io_events[0];
  }; /* 128 bytes + ring size */
a79d40e9b   Jens Axboe   aio: only use blk...
70
71
72
73
74
  /*
   * Plugging is meant to work with larger batches of IOs. If we don't
   * have more than the below, then don't bother setting up a plug.
   */
  #define AIO_PLUG_THRESHOLD	2
4e179bca6   Kent Overstreet   aio: move private...
75
  #define AIO_RING_PAGES	8
4e179bca6   Kent Overstreet   aio: move private...
76

db446a08c   Benjamin LaHaise   aio: convert the ...
77
  struct kioctx_table {
d0264c01e   Tejun Heo   fs/aio: Use RCU a...
78
79
80
  	struct rcu_head		rcu;
  	unsigned		nr;
  	struct kioctx __rcu	*table[];
db446a08c   Benjamin LaHaise   aio: convert the ...
81
  };
e1bdd5f27   Kent Overstreet   aio: percpu reqs_...
82
83
84
  struct kioctx_cpu {
  	unsigned		reqs_available;
  };
dc48e56d7   Jens Axboe   aio: fix serial d...
85
86
87
88
  struct ctx_rq_wait {
  	struct completion comp;
  	atomic_t count;
  };
4e179bca6   Kent Overstreet   aio: move private...
89
  struct kioctx {
723be6e39   Kent Overstreet   aio: percpu ioctx...
90
  	struct percpu_ref	users;
36f558890   Kent Overstreet   aio: refcounting ...
91
  	atomic_t		dead;
4e179bca6   Kent Overstreet   aio: move private...
92

e34ecee2a   Kent Overstreet   aio: Fix a trinit...
93
  	struct percpu_ref	reqs;
4e179bca6   Kent Overstreet   aio: move private...
94
  	unsigned long		user_id;
4e179bca6   Kent Overstreet   aio: move private...
95

e1bdd5f27   Kent Overstreet   aio: percpu reqs_...
96
97
98
99
100
101
102
  	struct __percpu kioctx_cpu *cpu;
  
  	/*
  	 * For percpu reqs_available, number of slots we move to/from global
  	 * counter at a time:
  	 */
  	unsigned		req_batch;
3e845ce01   Kent Overstreet   aio: change reqs_...
103
104
105
106
  	/*
  	 * This is what userspace passed to io_setup(), it's not used for
  	 * anything but counting against the global max_reqs quota.
  	 *
58c85dc20   Kent Overstreet   aio: kill struct ...
107
  	 * The real limit is nr_events - 1, which will be larger (see
3e845ce01   Kent Overstreet   aio: change reqs_...
108
109
  	 * aio_setup_ring())
  	 */
4e179bca6   Kent Overstreet   aio: move private...
110
  	unsigned		max_reqs;
58c85dc20   Kent Overstreet   aio: kill struct ...
111
112
  	/* Size of ringbuffer, in units of struct io_event */
  	unsigned		nr_events;
4e179bca6   Kent Overstreet   aio: move private...
113

58c85dc20   Kent Overstreet   aio: kill struct ...
114
115
116
117
118
  	unsigned long		mmap_base;
  	unsigned long		mmap_size;
  
  	struct page		**ring_pages;
  	long			nr_pages;
f729863a8   Tejun Heo   fs/aio: Use rcu_w...
119
  	struct rcu_work		free_rwork;	/* see free_ioctx() */
4e23bcaeb   Kent Overstreet   aio: give shared ...
120

e02ba72aa   Anatol Pomozov   aio: block io_des...
121
122
123
  	/*
  	 * signals when all in-flight requests are done
  	 */
dc48e56d7   Jens Axboe   aio: fix serial d...
124
  	struct ctx_rq_wait	*rq_wait;
e02ba72aa   Anatol Pomozov   aio: block io_des...
125

4e23bcaeb   Kent Overstreet   aio: give shared ...
126
  	struct {
34e83fc61   Kent Overstreet   aio: reqs_active ...
127
128
129
130
131
  		/*
  		 * This counts the number of available slots in the ringbuffer,
  		 * so we avoid overflowing it: it's decremented (if positive)
  		 * when allocating a kiocb and incremented when the resulting
  		 * io_event is pulled off the ringbuffer.
e1bdd5f27   Kent Overstreet   aio: percpu reqs_...
132
133
  		 *
  		 * We batch accesses to it with a percpu version.
34e83fc61   Kent Overstreet   aio: reqs_active ...
134
135
  		 */
  		atomic_t	reqs_available;
4e23bcaeb   Kent Overstreet   aio: give shared ...
136
137
138
139
140
141
  	} ____cacheline_aligned_in_smp;
  
  	struct {
  		spinlock_t	ctx_lock;
  		struct list_head active_reqs;	/* used for cancellation */
  	} ____cacheline_aligned_in_smp;
58c85dc20   Kent Overstreet   aio: kill struct ...
142
143
  	struct {
  		struct mutex	ring_lock;
4e23bcaeb   Kent Overstreet   aio: give shared ...
144
145
  		wait_queue_head_t wait;
  	} ____cacheline_aligned_in_smp;
58c85dc20   Kent Overstreet   aio: kill struct ...
146
147
148
  
  	struct {
  		unsigned	tail;
d856f32a8   Benjamin LaHaise   aio: fix reqs_ava...
149
  		unsigned	completed_events;
58c85dc20   Kent Overstreet   aio: kill struct ...
150
  		spinlock_t	completion_lock;
4e23bcaeb   Kent Overstreet   aio: give shared ...
151
  	} ____cacheline_aligned_in_smp;
58c85dc20   Kent Overstreet   aio: kill struct ...
152
153
  
  	struct page		*internal_pages[AIO_RING_PAGES];
36bc08cc0   Gu Zheng   fs/aio: Add suppo...
154
  	struct file		*aio_ring_file;
db446a08c   Benjamin LaHaise   aio: convert the ...
155
156
  
  	unsigned		id;
4e179bca6   Kent Overstreet   aio: move private...
157
  };
84c4e1f89   Linus Torvalds   aio: simplify - a...
158
159
160
161
  /*
   * First field must be the file pointer in all the
   * iocb unions! See also 'struct kiocb' in <linux/fs.h>
   */
a3c0d439e   Christoph Hellwig   aio: implement IO...
162
  struct fsync_iocb {
a3c0d439e   Christoph Hellwig   aio: implement IO...
163
  	struct file		*file;
84c4e1f89   Linus Torvalds   aio: simplify - a...
164
  	struct work_struct	work;
a3c0d439e   Christoph Hellwig   aio: implement IO...
165
166
  	bool			datasync;
  };
bfe4037e7   Christoph Hellwig   aio: implement IO...
167
168
169
170
  struct poll_iocb {
  	struct file		*file;
  	struct wait_queue_head	*head;
  	__poll_t		events;
af5c72b1f   Al Viro   Fix aio_poll() races
171
  	bool			done;
bfe4037e7   Christoph Hellwig   aio: implement IO...
172
173
174
175
  	bool			cancelled;
  	struct wait_queue_entry	wait;
  	struct work_struct	work;
  };
84c4e1f89   Linus Torvalds   aio: simplify - a...
176
177
178
179
180
181
  /*
   * NOTE! Each of the iocb union members has the file pointer
   * as the first entry in their struct definition. So you can
   * access the file pointer through any of the sub-structs,
   * or directly as just 'ki_filp' in this struct.
   */
04b2fa9f8   Christoph Hellwig   fs: split generic...
182
  struct aio_kiocb {
54843f875   Christoph Hellwig   aio: refactor rea...
183
  	union {
84c4e1f89   Linus Torvalds   aio: simplify - a...
184
  		struct file		*ki_filp;
54843f875   Christoph Hellwig   aio: refactor rea...
185
  		struct kiocb		rw;
a3c0d439e   Christoph Hellwig   aio: implement IO...
186
  		struct fsync_iocb	fsync;
bfe4037e7   Christoph Hellwig   aio: implement IO...
187
  		struct poll_iocb	poll;
54843f875   Christoph Hellwig   aio: refactor rea...
188
  	};
04b2fa9f8   Christoph Hellwig   fs: split generic...
189
190
191
  
  	struct kioctx		*ki_ctx;
  	kiocb_cancel_fn		*ki_cancel;
a9339b785   Al Viro   aio: keep io_even...
192
  	struct io_event		ki_res;
04b2fa9f8   Christoph Hellwig   fs: split generic...
193
194
195
  
  	struct list_head	ki_list;	/* the aio core uses this
  						 * for cancellation */
9018ccc45   Christoph Hellwig   aio: add a iocb r...
196
  	refcount_t		ki_refcnt;
04b2fa9f8   Christoph Hellwig   fs: split generic...
197
198
199
200
201
202
203
  
  	/*
  	 * If the aio_resfd field of the userspace iocb is not zero,
  	 * this is the underlying eventfd context to deliver events to.
  	 */
  	struct eventfd_ctx	*ki_eventfd;
  };
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
204
  /*------ sysctl variables----*/
d55b5fdaf   Zach Brown   [PATCH] aio: remo...
205
206
207
  static DEFINE_SPINLOCK(aio_nr_lock);
  unsigned long aio_nr;		/* current system wide number of aio requests */
  unsigned long aio_max_nr = 0x10000; /* system wide maximum number of aio requests */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
208
  /*----end sysctl variables---*/
e18b890bb   Christoph Lameter   [PATCH] slab: rem...
209
210
  static struct kmem_cache	*kiocb_cachep;
  static struct kmem_cache	*kioctx_cachep;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
211

71ad7490c   Benjamin LaHaise   rework aio migrat...
212
213
214
215
216
217
218
  static struct vfsmount *aio_mnt;
  
  static const struct file_operations aio_ring_fops;
  static const struct address_space_operations aio_ctx_aops;
  
  static struct file *aio_private_file(struct kioctx *ctx, loff_t nr_pages)
  {
71ad7490c   Benjamin LaHaise   rework aio migrat...
219
  	struct file *file;
71ad7490c   Benjamin LaHaise   rework aio migrat...
220
  	struct inode *inode = alloc_anon_inode(aio_mnt->mnt_sb);
7f62656be   Dan Carpenter   aio: checking for...
221
222
  	if (IS_ERR(inode))
  		return ERR_CAST(inode);
71ad7490c   Benjamin LaHaise   rework aio migrat...
223
224
225
226
  
  	inode->i_mapping->a_ops = &aio_ctx_aops;
  	inode->i_mapping->private_data = ctx;
  	inode->i_size = PAGE_SIZE * nr_pages;
d93aa9d82   Al Viro   new wrapper: allo...
227
228
  	file = alloc_file_pseudo(inode, aio_mnt, "[aio]",
  				O_RDWR, &aio_ring_fops);
c9c554f21   Al Viro   alloc_file(): swi...
229
  	if (IS_ERR(file))
71ad7490c   Benjamin LaHaise   rework aio migrat...
230
  		iput(inode);
71ad7490c   Benjamin LaHaise   rework aio migrat...
231
232
  	return file;
  }
52db59df1   David Howells   vfs: Convert aio ...
233
  static int aio_init_fs_context(struct fs_context *fc)
71ad7490c   Benjamin LaHaise   rework aio migrat...
234
  {
52db59df1   David Howells   vfs: Convert aio ...
235
236
237
238
  	if (!init_pseudo(fc, AIO_RING_MAGIC))
  		return -ENOMEM;
  	fc->s_iflags |= SB_I_NOEXEC;
  	return 0;
71ad7490c   Benjamin LaHaise   rework aio migrat...
239
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
240
241
242
243
244
245
  /* aio_setup
   *	Creates the slab caches used by the aio routines, panic on
   *	failure as this is done early during the boot sequence.
   */
  static int __init aio_setup(void)
  {
71ad7490c   Benjamin LaHaise   rework aio migrat...
246
247
  	static struct file_system_type aio_fs = {
  		.name		= "aio",
52db59df1   David Howells   vfs: Convert aio ...
248
  		.init_fs_context = aio_init_fs_context,
71ad7490c   Benjamin LaHaise   rework aio migrat...
249
250
251
252
253
  		.kill_sb	= kill_anon_super,
  	};
  	aio_mnt = kern_mount(&aio_fs);
  	if (IS_ERR(aio_mnt))
  		panic("Failed to create aio fs mount.");
04b2fa9f8   Christoph Hellwig   fs: split generic...
254
  	kiocb_cachep = KMEM_CACHE(aio_kiocb, SLAB_HWCACHE_ALIGN|SLAB_PANIC);
0a31bd5f2   Christoph Lameter   KMEM_CACHE(): sim...
255
  	kioctx_cachep = KMEM_CACHE(kioctx,SLAB_HWCACHE_ALIGN|SLAB_PANIC);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
256
257
  	return 0;
  }
385773e04   H Hartley Sweeten   aio.c: move EXPOR...
258
  __initcall(aio_setup);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
259

5e9ae2e5d   Benjamin LaHaise   aio: fix use-afte...
260
261
262
  static void put_aio_ring_file(struct kioctx *ctx)
  {
  	struct file *aio_ring_file = ctx->aio_ring_file;
de04e7693   Rasmus Villemoes   fs/aio.c: elimina...
263
  	struct address_space *i_mapping;
5e9ae2e5d   Benjamin LaHaise   aio: fix use-afte...
264
  	if (aio_ring_file) {
450630975   Al Viro   don't open-code f...
265
  		truncate_setsize(file_inode(aio_ring_file), 0);
5e9ae2e5d   Benjamin LaHaise   aio: fix use-afte...
266
267
  
  		/* Prevent further access to the kioctx from migratepages */
450630975   Al Viro   don't open-code f...
268
  		i_mapping = aio_ring_file->f_mapping;
de04e7693   Rasmus Villemoes   fs/aio.c: elimina...
269
270
  		spin_lock(&i_mapping->private_lock);
  		i_mapping->private_data = NULL;
5e9ae2e5d   Benjamin LaHaise   aio: fix use-afte...
271
  		ctx->aio_ring_file = NULL;
de04e7693   Rasmus Villemoes   fs/aio.c: elimina...
272
  		spin_unlock(&i_mapping->private_lock);
5e9ae2e5d   Benjamin LaHaise   aio: fix use-afte...
273
274
275
276
  
  		fput(aio_ring_file);
  	}
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
277
278
  static void aio_free_ring(struct kioctx *ctx)
  {
36bc08cc0   Gu Zheng   fs/aio: Add suppo...
279
  	int i;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
280

fa8a53c39   Benjamin LaHaise   aio: v4 ensure ac...
281
282
283
284
  	/* Disconnect the kiotx from the ring file.  This prevents future
  	 * accesses to the kioctx from page migration.
  	 */
  	put_aio_ring_file(ctx);
36bc08cc0   Gu Zheng   fs/aio: Add suppo...
285
  	for (i = 0; i < ctx->nr_pages; i++) {
8e321fefb   Benjamin LaHaise   aio/migratepages:...
286
  		struct page *page;
36bc08cc0   Gu Zheng   fs/aio: Add suppo...
287
288
289
  		pr_debug("pid(%d) [%d] page->count=%d
  ", current->pid, i,
  				page_count(ctx->ring_pages[i]));
8e321fefb   Benjamin LaHaise   aio/migratepages:...
290
291
292
293
294
  		page = ctx->ring_pages[i];
  		if (!page)
  			continue;
  		ctx->ring_pages[i] = NULL;
  		put_page(page);
36bc08cc0   Gu Zheng   fs/aio: Add suppo...
295
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
296

ddb8c45ba   Sasha Levin   aio: nullify aio-...
297
  	if (ctx->ring_pages && ctx->ring_pages != ctx->internal_pages) {
58c85dc20   Kent Overstreet   aio: kill struct ...
298
  		kfree(ctx->ring_pages);
ddb8c45ba   Sasha Levin   aio: nullify aio-...
299
300
  		ctx->ring_pages = NULL;
  	}
36bc08cc0   Gu Zheng   fs/aio: Add suppo...
301
  }
5477e70a6   Oleg Nesterov   mm: move ->mremap...
302
  static int aio_ring_mremap(struct vm_area_struct *vma)
e4a0d3e72   Pavel Emelyanov   aio: Make it poss...
303
  {
5477e70a6   Oleg Nesterov   mm: move ->mremap...
304
  	struct file *file = vma->vm_file;
e4a0d3e72   Pavel Emelyanov   aio: Make it poss...
305
306
  	struct mm_struct *mm = vma->vm_mm;
  	struct kioctx_table *table;
b2edffdd9   Al Viro   fix mremap() vs. ...
307
  	int i, res = -EINVAL;
e4a0d3e72   Pavel Emelyanov   aio: Make it poss...
308
309
310
311
312
313
  
  	spin_lock(&mm->ioctx_lock);
  	rcu_read_lock();
  	table = rcu_dereference(mm->ioctx_table);
  	for (i = 0; i < table->nr; i++) {
  		struct kioctx *ctx;
d0264c01e   Tejun Heo   fs/aio: Use RCU a...
314
  		ctx = rcu_dereference(table->table[i]);
e4a0d3e72   Pavel Emelyanov   aio: Make it poss...
315
  		if (ctx && ctx->aio_ring_file == file) {
b2edffdd9   Al Viro   fix mremap() vs. ...
316
317
318
319
  			if (!atomic_read(&ctx->dead)) {
  				ctx->user_id = ctx->mmap_base = vma->vm_start;
  				res = 0;
  			}
e4a0d3e72   Pavel Emelyanov   aio: Make it poss...
320
321
322
323
324
325
  			break;
  		}
  	}
  
  	rcu_read_unlock();
  	spin_unlock(&mm->ioctx_lock);
b2edffdd9   Al Viro   fix mremap() vs. ...
326
  	return res;
e4a0d3e72   Pavel Emelyanov   aio: Make it poss...
327
  }
5477e70a6   Oleg Nesterov   mm: move ->mremap...
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
  static const struct vm_operations_struct aio_ring_vm_ops = {
  	.mremap		= aio_ring_mremap,
  #if IS_ENABLED(CONFIG_MMU)
  	.fault		= filemap_fault,
  	.map_pages	= filemap_map_pages,
  	.page_mkwrite	= filemap_page_mkwrite,
  #endif
  };
  
  static int aio_ring_mmap(struct file *file, struct vm_area_struct *vma)
  {
  	vma->vm_flags |= VM_DONTEXPAND;
  	vma->vm_ops = &aio_ring_vm_ops;
  	return 0;
  }
36bc08cc0   Gu Zheng   fs/aio: Add suppo...
343
344
345
  static const struct file_operations aio_ring_fops = {
  	.mmap = aio_ring_mmap,
  };
0c45355fc   Benjamin LaHaise   aio: fix build wh...
346
  #if IS_ENABLED(CONFIG_MIGRATION)
36bc08cc0   Gu Zheng   fs/aio: Add suppo...
347
348
349
  static int aio_migratepage(struct address_space *mapping, struct page *new,
  			struct page *old, enum migrate_mode mode)
  {
5e9ae2e5d   Benjamin LaHaise   aio: fix use-afte...
350
  	struct kioctx *ctx;
36bc08cc0   Gu Zheng   fs/aio: Add suppo...
351
  	unsigned long flags;
fa8a53c39   Benjamin LaHaise   aio: v4 ensure ac...
352
  	pgoff_t idx;
36bc08cc0   Gu Zheng   fs/aio: Add suppo...
353
  	int rc;
2916ecc0f   Jérôme Glisse   mm/migrate: new m...
354
355
356
357
358
359
360
  	/*
  	 * We cannot support the _NO_COPY case here, because copy needs to
  	 * happen under the ctx->completion_lock. That does not work with the
  	 * migration workflow of MIGRATE_SYNC_NO_COPY.
  	 */
  	if (mode == MIGRATE_SYNC_NO_COPY)
  		return -EINVAL;
8e321fefb   Benjamin LaHaise   aio/migratepages:...
361
  	rc = 0;
fa8a53c39   Benjamin LaHaise   aio: v4 ensure ac...
362
  	/* mapping->private_lock here protects against the kioctx teardown.  */
8e321fefb   Benjamin LaHaise   aio/migratepages:...
363
364
  	spin_lock(&mapping->private_lock);
  	ctx = mapping->private_data;
fa8a53c39   Benjamin LaHaise   aio: v4 ensure ac...
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
  	if (!ctx) {
  		rc = -EINVAL;
  		goto out;
  	}
  
  	/* The ring_lock mutex.  The prevents aio_read_events() from writing
  	 * to the ring's head, and prevents page migration from mucking in
  	 * a partially initialized kiotx.
  	 */
  	if (!mutex_trylock(&ctx->ring_lock)) {
  		rc = -EAGAIN;
  		goto out;
  	}
  
  	idx = old->index;
  	if (idx < (pgoff_t)ctx->nr_pages) {
  		/* Make sure the old page hasn't already been changed */
  		if (ctx->ring_pages[idx] != old)
  			rc = -EAGAIN;
8e321fefb   Benjamin LaHaise   aio/migratepages:...
384
385
  	} else
  		rc = -EINVAL;
8e321fefb   Benjamin LaHaise   aio/migratepages:...
386
387
  
  	if (rc != 0)
fa8a53c39   Benjamin LaHaise   aio: v4 ensure ac...
388
  		goto out_unlock;
8e321fefb   Benjamin LaHaise   aio/migratepages:...
389

36bc08cc0   Gu Zheng   fs/aio: Add suppo...
390
391
  	/* Writeback must be complete */
  	BUG_ON(PageWriteback(old));
8e321fefb   Benjamin LaHaise   aio/migratepages:...
392
  	get_page(new);
36bc08cc0   Gu Zheng   fs/aio: Add suppo...
393

371096949   Keith Busch   mm: migrate: remo...
394
  	rc = migrate_page_move_mapping(mapping, new, old, 1);
36bc08cc0   Gu Zheng   fs/aio: Add suppo...
395
  	if (rc != MIGRATEPAGE_SUCCESS) {
8e321fefb   Benjamin LaHaise   aio/migratepages:...
396
  		put_page(new);
fa8a53c39   Benjamin LaHaise   aio: v4 ensure ac...
397
  		goto out_unlock;
36bc08cc0   Gu Zheng   fs/aio: Add suppo...
398
  	}
fa8a53c39   Benjamin LaHaise   aio: v4 ensure ac...
399
400
401
  	/* Take completion_lock to prevent other writes to the ring buffer
  	 * while the old page is copied to the new.  This prevents new
  	 * events from being lost.
5e9ae2e5d   Benjamin LaHaise   aio: fix use-afte...
402
  	 */
fa8a53c39   Benjamin LaHaise   aio: v4 ensure ac...
403
404
405
406
407
  	spin_lock_irqsave(&ctx->completion_lock, flags);
  	migrate_page_copy(new, old);
  	BUG_ON(ctx->ring_pages[idx] != old);
  	ctx->ring_pages[idx] = new;
  	spin_unlock_irqrestore(&ctx->completion_lock, flags);
36bc08cc0   Gu Zheng   fs/aio: Add suppo...
408

fa8a53c39   Benjamin LaHaise   aio: v4 ensure ac...
409
410
  	/* The old page is no longer accessible. */
  	put_page(old);
8e321fefb   Benjamin LaHaise   aio/migratepages:...
411

fa8a53c39   Benjamin LaHaise   aio: v4 ensure ac...
412
413
414
415
  out_unlock:
  	mutex_unlock(&ctx->ring_lock);
  out:
  	spin_unlock(&mapping->private_lock);
36bc08cc0   Gu Zheng   fs/aio: Add suppo...
416
  	return rc;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
417
  }
0c45355fc   Benjamin LaHaise   aio: fix build wh...
418
  #endif
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
419

36bc08cc0   Gu Zheng   fs/aio: Add suppo...
420
  static const struct address_space_operations aio_ctx_aops = {
835f252c6   Gu Zheng   aio: fix uncorren...
421
  	.set_page_dirty = __set_page_dirty_no_writeback,
0c45355fc   Benjamin LaHaise   aio: fix build wh...
422
  #if IS_ENABLED(CONFIG_MIGRATION)
36bc08cc0   Gu Zheng   fs/aio: Add suppo...
423
  	.migratepage	= aio_migratepage,
0c45355fc   Benjamin LaHaise   aio: fix build wh...
424
  #endif
36bc08cc0   Gu Zheng   fs/aio: Add suppo...
425
  };
2a8a98673   Mauricio Faria de Oliveira   fs: aio: fix the ...
426
  static int aio_setup_ring(struct kioctx *ctx, unsigned int nr_events)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
427
428
  {
  	struct aio_ring *ring;
41003a7bc   Zach Brown   aio: remove retry...
429
  	struct mm_struct *mm = current->mm;
3dc9acb67   Linus Torvalds   aio: clean up and...
430
  	unsigned long size, unused;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
431
  	int nr_pages;
36bc08cc0   Gu Zheng   fs/aio: Add suppo...
432
433
  	int i;
  	struct file *file;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
434
435
436
437
438
439
  
  	/* Compensate for the ring buffer's head/tail overlap entry */
  	nr_events += 2;	/* 1 is required, 2 for good luck */
  
  	size = sizeof(struct aio_ring);
  	size += sizeof(struct io_event) * nr_events;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
440

36bc08cc0   Gu Zheng   fs/aio: Add suppo...
441
  	nr_pages = PFN_UP(size);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
442
443
  	if (nr_pages < 0)
  		return -EINVAL;
71ad7490c   Benjamin LaHaise   rework aio migrat...
444
  	file = aio_private_file(ctx, nr_pages);
36bc08cc0   Gu Zheng   fs/aio: Add suppo...
445
446
  	if (IS_ERR(file)) {
  		ctx->aio_ring_file = NULL;
fa8a53c39   Benjamin LaHaise   aio: v4 ensure ac...
447
  		return -ENOMEM;
36bc08cc0   Gu Zheng   fs/aio: Add suppo...
448
  	}
3dc9acb67   Linus Torvalds   aio: clean up and...
449
450
451
452
453
454
455
456
457
458
459
460
461
  	ctx->aio_ring_file = file;
  	nr_events = (PAGE_SIZE * nr_pages - sizeof(struct aio_ring))
  			/ sizeof(struct io_event);
  
  	ctx->ring_pages = ctx->internal_pages;
  	if (nr_pages > AIO_RING_PAGES) {
  		ctx->ring_pages = kcalloc(nr_pages, sizeof(struct page *),
  					  GFP_KERNEL);
  		if (!ctx->ring_pages) {
  			put_aio_ring_file(ctx);
  			return -ENOMEM;
  		}
  	}
36bc08cc0   Gu Zheng   fs/aio: Add suppo...
462
463
  	for (i = 0; i < nr_pages; i++) {
  		struct page *page;
450630975   Al Viro   don't open-code f...
464
  		page = find_or_create_page(file->f_mapping,
36bc08cc0   Gu Zheng   fs/aio: Add suppo...
465
466
467
468
469
470
471
  					   i, GFP_HIGHUSER | __GFP_ZERO);
  		if (!page)
  			break;
  		pr_debug("pid(%d) page[%d]->count=%d
  ",
  			 current->pid, i, page_count(page));
  		SetPageUptodate(page);
36bc08cc0   Gu Zheng   fs/aio: Add suppo...
472
  		unlock_page(page);
3dc9acb67   Linus Torvalds   aio: clean up and...
473
474
  
  		ctx->ring_pages[i] = page;
36bc08cc0   Gu Zheng   fs/aio: Add suppo...
475
  	}
3dc9acb67   Linus Torvalds   aio: clean up and...
476
  	ctx->nr_pages = i;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
477

3dc9acb67   Linus Torvalds   aio: clean up and...
478
479
  	if (unlikely(i != nr_pages)) {
  		aio_free_ring(ctx);
fa8a53c39   Benjamin LaHaise   aio: v4 ensure ac...
480
  		return -ENOMEM;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
481
  	}
58c85dc20   Kent Overstreet   aio: kill struct ...
482
483
484
  	ctx->mmap_size = nr_pages * PAGE_SIZE;
  	pr_debug("attempting mmap of %lu bytes
  ", ctx->mmap_size);
36bc08cc0   Gu Zheng   fs/aio: Add suppo...
485

013373e8b   Michal Hocko   aio: make aio_set...
486
487
488
489
490
  	if (down_write_killable(&mm->mmap_sem)) {
  		ctx->mmap_size = 0;
  		aio_free_ring(ctx);
  		return -EINTR;
  	}
36bc08cc0   Gu Zheng   fs/aio: Add suppo...
491
492
  	ctx->mmap_base = do_mmap_pgoff(ctx->aio_ring_file, 0, ctx->mmap_size,
  				       PROT_READ | PROT_WRITE,
897ab3e0c   Mike Rapoport   userfaultfd: non-...
493
  				       MAP_SHARED, 0, &unused, NULL);
3dc9acb67   Linus Torvalds   aio: clean up and...
494
  	up_write(&mm->mmap_sem);
58c85dc20   Kent Overstreet   aio: kill struct ...
495
  	if (IS_ERR((void *)ctx->mmap_base)) {
58c85dc20   Kent Overstreet   aio: kill struct ...
496
  		ctx->mmap_size = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
497
  		aio_free_ring(ctx);
fa8a53c39   Benjamin LaHaise   aio: v4 ensure ac...
498
  		return -ENOMEM;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
499
  	}
58c85dc20   Kent Overstreet   aio: kill struct ...
500
501
  	pr_debug("mmap address: 0x%08lx
  ", ctx->mmap_base);
d6c355c7d   Benjamin LaHaise   aio: fix race in ...
502

58c85dc20   Kent Overstreet   aio: kill struct ...
503
504
  	ctx->user_id = ctx->mmap_base;
  	ctx->nr_events = nr_events; /* trusted copy */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
505

58c85dc20   Kent Overstreet   aio: kill struct ...
506
  	ring = kmap_atomic(ctx->ring_pages[0]);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
507
  	ring->nr = nr_events;	/* user copy */
db446a08c   Benjamin LaHaise   aio: convert the ...
508
  	ring->id = ~0U;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
509
510
511
512
513
  	ring->head = ring->tail = 0;
  	ring->magic = AIO_RING_MAGIC;
  	ring->compat_features = AIO_RING_COMPAT_FEATURES;
  	ring->incompat_features = AIO_RING_INCOMPAT_FEATURES;
  	ring->header_length = sizeof(struct aio_ring);
e8e3c3d66   Cong Wang   fs: remove the se...
514
  	kunmap_atomic(ring);
58c85dc20   Kent Overstreet   aio: kill struct ...
515
  	flush_dcache_page(ctx->ring_pages[0]);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
516
517
518
  
  	return 0;
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
519
520
521
  #define AIO_EVENTS_PER_PAGE	(PAGE_SIZE / sizeof(struct io_event))
  #define AIO_EVENTS_FIRST_PAGE	((PAGE_SIZE - sizeof(struct aio_ring)) / sizeof(struct io_event))
  #define AIO_EVENTS_OFFSET	(AIO_EVENTS_PER_PAGE - AIO_EVENTS_FIRST_PAGE)
04b2fa9f8   Christoph Hellwig   fs: split generic...
522
  void kiocb_set_cancel_fn(struct kiocb *iocb, kiocb_cancel_fn *cancel)
0460fef2a   Kent Overstreet   aio: use cancella...
523
  {
54843f875   Christoph Hellwig   aio: refactor rea...
524
  	struct aio_kiocb *req = container_of(iocb, struct aio_kiocb, rw);
0460fef2a   Kent Overstreet   aio: use cancella...
525
526
  	struct kioctx *ctx = req->ki_ctx;
  	unsigned long flags;
75321b50a   Christoph Hellwig   aio: sanitize ki_...
527
528
  	if (WARN_ON_ONCE(!list_empty(&req->ki_list)))
  		return;
0460fef2a   Kent Overstreet   aio: use cancella...
529

75321b50a   Christoph Hellwig   aio: sanitize ki_...
530
531
  	spin_lock_irqsave(&ctx->ctx_lock, flags);
  	list_add_tail(&req->ki_list, &ctx->active_reqs);
0460fef2a   Kent Overstreet   aio: use cancella...
532
  	req->ki_cancel = cancel;
0460fef2a   Kent Overstreet   aio: use cancella...
533
534
535
  	spin_unlock_irqrestore(&ctx->ctx_lock, flags);
  }
  EXPORT_SYMBOL(kiocb_set_cancel_fn);
a6d7cff47   Tejun Heo   fs/aio: Add expli...
536
537
538
  /*
   * free_ioctx() should be RCU delayed to synchronize against the RCU
   * protected lookup_ioctx() and also needs process context to call
f729863a8   Tejun Heo   fs/aio: Use rcu_w...
539
   * aio_free_ring().  Use rcu_work.
a6d7cff47   Tejun Heo   fs/aio: Add expli...
540
   */
e34ecee2a   Kent Overstreet   aio: Fix a trinit...
541
  static void free_ioctx(struct work_struct *work)
36f558890   Kent Overstreet   aio: refcounting ...
542
  {
f729863a8   Tejun Heo   fs/aio: Use rcu_w...
543
544
  	struct kioctx *ctx = container_of(to_rcu_work(work), struct kioctx,
  					  free_rwork);
e34ecee2a   Kent Overstreet   aio: Fix a trinit...
545
546
  	pr_debug("freeing %p
  ", ctx);
e1bdd5f27   Kent Overstreet   aio: percpu reqs_...
547

e34ecee2a   Kent Overstreet   aio: Fix a trinit...
548
  	aio_free_ring(ctx);
e1bdd5f27   Kent Overstreet   aio: percpu reqs_...
549
  	free_percpu(ctx->cpu);
9a1049da9   Tejun Heo   percpu-refcount: ...
550
551
  	percpu_ref_exit(&ctx->reqs);
  	percpu_ref_exit(&ctx->users);
36f558890   Kent Overstreet   aio: refcounting ...
552
553
  	kmem_cache_free(kioctx_cachep, ctx);
  }
e34ecee2a   Kent Overstreet   aio: Fix a trinit...
554
555
556
  static void free_ioctx_reqs(struct percpu_ref *ref)
  {
  	struct kioctx *ctx = container_of(ref, struct kioctx, reqs);
e02ba72aa   Anatol Pomozov   aio: block io_des...
557
  	/* At this point we know that there are no any in-flight requests */
dc48e56d7   Jens Axboe   aio: fix serial d...
558
559
  	if (ctx->rq_wait && atomic_dec_and_test(&ctx->rq_wait->count))
  		complete(&ctx->rq_wait->comp);
e02ba72aa   Anatol Pomozov   aio: block io_des...
560

a6d7cff47   Tejun Heo   fs/aio: Add expli...
561
  	/* Synchronize against RCU protected table->table[] dereferences */
f729863a8   Tejun Heo   fs/aio: Use rcu_w...
562
563
  	INIT_RCU_WORK(&ctx->free_rwork, free_ioctx);
  	queue_rcu_work(system_wq, &ctx->free_rwork);
e34ecee2a   Kent Overstreet   aio: Fix a trinit...
564
  }
36f558890   Kent Overstreet   aio: refcounting ...
565
566
567
568
569
  /*
   * When this function runs, the kioctx has been removed from the "hash table"
   * and ctx->users has dropped to 0, so we know no more kiocbs can be submitted -
   * now it's safe to cancel any that need to be.
   */
e34ecee2a   Kent Overstreet   aio: Fix a trinit...
570
  static void free_ioctx_users(struct percpu_ref *ref)
36f558890   Kent Overstreet   aio: refcounting ...
571
  {
e34ecee2a   Kent Overstreet   aio: Fix a trinit...
572
  	struct kioctx *ctx = container_of(ref, struct kioctx, users);
04b2fa9f8   Christoph Hellwig   fs: split generic...
573
  	struct aio_kiocb *req;
36f558890   Kent Overstreet   aio: refcounting ...
574
575
576
577
578
  
  	spin_lock_irq(&ctx->ctx_lock);
  
  	while (!list_empty(&ctx->active_reqs)) {
  		req = list_first_entry(&ctx->active_reqs,
04b2fa9f8   Christoph Hellwig   fs: split generic...
579
  				       struct aio_kiocb, ki_list);
888933f8f   Christoph Hellwig   aio: simplify can...
580
  		req->ki_cancel(&req->rw);
4faa99965   Al Viro   fix io_destroy()/...
581
  		list_del_init(&req->ki_list);
36f558890   Kent Overstreet   aio: refcounting ...
582
583
584
  	}
  
  	spin_unlock_irq(&ctx->ctx_lock);
e34ecee2a   Kent Overstreet   aio: Fix a trinit...
585
586
  	percpu_ref_kill(&ctx->reqs);
  	percpu_ref_put(&ctx->reqs);
36f558890   Kent Overstreet   aio: refcounting ...
587
  }
db446a08c   Benjamin LaHaise   aio: convert the ...
588
589
590
591
592
593
594
  static int ioctx_add_table(struct kioctx *ctx, struct mm_struct *mm)
  {
  	unsigned i, new_nr;
  	struct kioctx_table *table, *old;
  	struct aio_ring *ring;
  
  	spin_lock(&mm->ioctx_lock);
855ef0dec   Oleg Nesterov   aio: kill the mis...
595
  	table = rcu_dereference_raw(mm->ioctx_table);
db446a08c   Benjamin LaHaise   aio: convert the ...
596
597
598
599
  
  	while (1) {
  		if (table)
  			for (i = 0; i < table->nr; i++)
d0264c01e   Tejun Heo   fs/aio: Use RCU a...
600
  				if (!rcu_access_pointer(table->table[i])) {
db446a08c   Benjamin LaHaise   aio: convert the ...
601
  					ctx->id = i;
d0264c01e   Tejun Heo   fs/aio: Use RCU a...
602
  					rcu_assign_pointer(table->table[i], ctx);
db446a08c   Benjamin LaHaise   aio: convert the ...
603
  					spin_unlock(&mm->ioctx_lock);
fa8a53c39   Benjamin LaHaise   aio: v4 ensure ac...
604
605
606
607
  					/* While kioctx setup is in progress,
  					 * we are protected from page migration
  					 * changes ring_pages by ->ring_lock.
  					 */
db446a08c   Benjamin LaHaise   aio: convert the ...
608
609
610
611
612
613
614
  					ring = kmap_atomic(ctx->ring_pages[0]);
  					ring->id = ctx->id;
  					kunmap_atomic(ring);
  					return 0;
  				}
  
  		new_nr = (table ? table->nr : 1) * 4;
db446a08c   Benjamin LaHaise   aio: convert the ...
615
616
617
618
619
620
621
622
623
624
  		spin_unlock(&mm->ioctx_lock);
  
  		table = kzalloc(sizeof(*table) + sizeof(struct kioctx *) *
  				new_nr, GFP_KERNEL);
  		if (!table)
  			return -ENOMEM;
  
  		table->nr = new_nr;
  
  		spin_lock(&mm->ioctx_lock);
855ef0dec   Oleg Nesterov   aio: kill the mis...
625
  		old = rcu_dereference_raw(mm->ioctx_table);
db446a08c   Benjamin LaHaise   aio: convert the ...
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
  
  		if (!old) {
  			rcu_assign_pointer(mm->ioctx_table, table);
  		} else if (table->nr > old->nr) {
  			memcpy(table->table, old->table,
  			       old->nr * sizeof(struct kioctx *));
  
  			rcu_assign_pointer(mm->ioctx_table, table);
  			kfree_rcu(old, rcu);
  		} else {
  			kfree(table);
  			table = old;
  		}
  	}
  }
e34ecee2a   Kent Overstreet   aio: Fix a trinit...
641
642
643
644
645
646
647
648
649
  static void aio_nr_sub(unsigned nr)
  {
  	spin_lock(&aio_nr_lock);
  	if (WARN_ON(aio_nr - nr > aio_nr))
  		aio_nr = 0;
  	else
  		aio_nr -= nr;
  	spin_unlock(&aio_nr_lock);
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
650
651
652
653
654
  /* ioctx_alloc
   *	Allocates and initializes an ioctx.  Returns an ERR_PTR if it failed.
   */
  static struct kioctx *ioctx_alloc(unsigned nr_events)
  {
41003a7bc   Zach Brown   aio: remove retry...
655
  	struct mm_struct *mm = current->mm;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
656
  	struct kioctx *ctx;
e23754f88   Al Viro   aio: don't bother...
657
  	int err = -ENOMEM;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
658

e1bdd5f27   Kent Overstreet   aio: percpu reqs_...
659
  	/*
2a8a98673   Mauricio Faria de Oliveira   fs: aio: fix the ...
660
661
662
663
664
665
  	 * Store the original nr_events -- what userspace passed to io_setup(),
  	 * for counting against the global limit -- before it changes.
  	 */
  	unsigned int max_reqs = nr_events;
  
  	/*
e1bdd5f27   Kent Overstreet   aio: percpu reqs_...
666
667
668
669
670
671
672
673
674
675
  	 * We keep track of the number of available ringbuffer slots, to prevent
  	 * overflow (reqs_available), and we also use percpu counters for this.
  	 *
  	 * So since up to half the slots might be on other cpu's percpu counters
  	 * and unavailable, double nr_events so userspace sees what they
  	 * expected: additionally, we move req_batch slots to/from percpu
  	 * counters at a time, so make sure that isn't 0:
  	 */
  	nr_events = max(nr_events, num_possible_cpus() * 4);
  	nr_events *= 2;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
676
  	/* Prevent overflows */
08397acdd   Al Viro   ioctx_alloc(): re...
677
  	if (nr_events > (0x10000000U / sizeof(struct io_event))) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
678
679
680
681
  		pr_debug("ENOMEM: nr_events too high
  ");
  		return ERR_PTR(-EINVAL);
  	}
2a8a98673   Mauricio Faria de Oliveira   fs: aio: fix the ...
682
  	if (!nr_events || (unsigned long)max_reqs > aio_max_nr)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
683
  		return ERR_PTR(-EAGAIN);
c37622296   Robert P. J. Day   [PATCH] Transform...
684
  	ctx = kmem_cache_zalloc(kioctx_cachep, GFP_KERNEL);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
685
686
  	if (!ctx)
  		return ERR_PTR(-ENOMEM);
2a8a98673   Mauricio Faria de Oliveira   fs: aio: fix the ...
687
  	ctx->max_reqs = max_reqs;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
688

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
689
  	spin_lock_init(&ctx->ctx_lock);
0460fef2a   Kent Overstreet   aio: use cancella...
690
  	spin_lock_init(&ctx->completion_lock);
58c85dc20   Kent Overstreet   aio: kill struct ...
691
  	mutex_init(&ctx->ring_lock);
fa8a53c39   Benjamin LaHaise   aio: v4 ensure ac...
692
693
694
  	/* Protect against page migration throughout kiotx setup by keeping
  	 * the ring_lock mutex held until setup is complete. */
  	mutex_lock(&ctx->ring_lock);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
695
696
697
  	init_waitqueue_head(&ctx->wait);
  
  	INIT_LIST_HEAD(&ctx->active_reqs);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
698

2aad2a86f   Tejun Heo   percpu_ref: add P...
699
  	if (percpu_ref_init(&ctx->users, free_ioctx_users, 0, GFP_KERNEL))
fa8a53c39   Benjamin LaHaise   aio: v4 ensure ac...
700
  		goto err;
2aad2a86f   Tejun Heo   percpu_ref: add P...
701
  	if (percpu_ref_init(&ctx->reqs, free_ioctx_reqs, 0, GFP_KERNEL))
fa8a53c39   Benjamin LaHaise   aio: v4 ensure ac...
702
  		goto err;
e1bdd5f27   Kent Overstreet   aio: percpu reqs_...
703
704
  	ctx->cpu = alloc_percpu(struct kioctx_cpu);
  	if (!ctx->cpu)
e34ecee2a   Kent Overstreet   aio: Fix a trinit...
705
  		goto err;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
706

2a8a98673   Mauricio Faria de Oliveira   fs: aio: fix the ...
707
  	err = aio_setup_ring(ctx, nr_events);
fa8a53c39   Benjamin LaHaise   aio: v4 ensure ac...
708
  	if (err < 0)
e34ecee2a   Kent Overstreet   aio: Fix a trinit...
709
  		goto err;
e1bdd5f27   Kent Overstreet   aio: percpu reqs_...
710

34e83fc61   Kent Overstreet   aio: reqs_active ...
711
  	atomic_set(&ctx->reqs_available, ctx->nr_events - 1);
e1bdd5f27   Kent Overstreet   aio: percpu reqs_...
712
  	ctx->req_batch = (ctx->nr_events - 1) / (num_possible_cpus() * 4);
6878ea72a   Benjamin LaHaise   aio: be defensive...
713
714
  	if (ctx->req_batch < 1)
  		ctx->req_batch = 1;
34e83fc61   Kent Overstreet   aio: reqs_active ...
715

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
716
  	/* limit the number of system wide aios */
9fa1cb397   Al Viro   aio: aio_nr_lock ...
717
  	spin_lock(&aio_nr_lock);
2a8a98673   Mauricio Faria de Oliveira   fs: aio: fix the ...
718
719
  	if (aio_nr + ctx->max_reqs > aio_max_nr ||
  	    aio_nr + ctx->max_reqs < aio_nr) {
9fa1cb397   Al Viro   aio: aio_nr_lock ...
720
  		spin_unlock(&aio_nr_lock);
e34ecee2a   Kent Overstreet   aio: Fix a trinit...
721
  		err = -EAGAIN;
d1b943271   Gu Zheng   aio: clean up aio...
722
  		goto err_ctx;
2dd542b7a   Al Viro   aio: aio_nr decre...
723
724
  	}
  	aio_nr += ctx->max_reqs;
9fa1cb397   Al Viro   aio: aio_nr_lock ...
725
  	spin_unlock(&aio_nr_lock);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
726

1881686f8   Benjamin LaHaise   aio: fix kioctx l...
727
728
  	percpu_ref_get(&ctx->users);	/* io_setup() will drop this ref */
  	percpu_ref_get(&ctx->reqs);	/* free_ioctx_users() will drop this */
723be6e39   Kent Overstreet   aio: percpu ioctx...
729

da90382c2   Benjamin LaHaise   aio: fix error ha...
730
731
  	err = ioctx_add_table(ctx, mm);
  	if (err)
e34ecee2a   Kent Overstreet   aio: Fix a trinit...
732
  		goto err_cleanup;
da90382c2   Benjamin LaHaise   aio: fix error ha...
733

fa8a53c39   Benjamin LaHaise   aio: v4 ensure ac...
734
735
  	/* Release the ring_lock mutex now that all setup is complete. */
  	mutex_unlock(&ctx->ring_lock);
caf4167aa   Kent Overstreet   aio: dprintk() ->...
736
737
  	pr_debug("allocated ioctx %p[%ld]: mm=%p mask=0x%x
  ",
58c85dc20   Kent Overstreet   aio: kill struct ...
738
  		 ctx, ctx->user_id, mm, ctx->nr_events);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
739
  	return ctx;
e34ecee2a   Kent Overstreet   aio: Fix a trinit...
740
741
  err_cleanup:
  	aio_nr_sub(ctx->max_reqs);
d1b943271   Gu Zheng   aio: clean up aio...
742
  err_ctx:
deeb8525f   Al Viro   ioctx_alloc(): fi...
743
744
745
  	atomic_set(&ctx->dead, 1);
  	if (ctx->mmap_size)
  		vm_munmap(ctx->mmap_base, ctx->mmap_size);
d1b943271   Gu Zheng   aio: clean up aio...
746
  	aio_free_ring(ctx);
e34ecee2a   Kent Overstreet   aio: Fix a trinit...
747
  err:
fa8a53c39   Benjamin LaHaise   aio: v4 ensure ac...
748
  	mutex_unlock(&ctx->ring_lock);
e1bdd5f27   Kent Overstreet   aio: percpu reqs_...
749
  	free_percpu(ctx->cpu);
9a1049da9   Tejun Heo   percpu-refcount: ...
750
751
  	percpu_ref_exit(&ctx->reqs);
  	percpu_ref_exit(&ctx->users);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
752
  	kmem_cache_free(kioctx_cachep, ctx);
caf4167aa   Kent Overstreet   aio: dprintk() ->...
753
754
  	pr_debug("error allocating ioctx %d
  ", err);
e23754f88   Al Viro   aio: don't bother...
755
  	return ERR_PTR(err);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
756
  }
36f558890   Kent Overstreet   aio: refcounting ...
757
758
759
760
761
  /* kill_ioctx
   *	Cancels all outstanding aio requests on an aio context.  Used
   *	when the processes owning a context have all exited to encourage
   *	the rapid destruction of the kioctx.
   */
fb2d44838   Benjamin LaHaise   aio: report error...
762
  static int kill_ioctx(struct mm_struct *mm, struct kioctx *ctx,
dc48e56d7   Jens Axboe   aio: fix serial d...
763
  		      struct ctx_rq_wait *wait)
36f558890   Kent Overstreet   aio: refcounting ...
764
  {
fa88b6f88   Benjamin LaHaise   aio: cleanup: fla...
765
  	struct kioctx_table *table;
db446a08c   Benjamin LaHaise   aio: convert the ...
766

b2edffdd9   Al Viro   fix mremap() vs. ...
767
768
769
  	spin_lock(&mm->ioctx_lock);
  	if (atomic_xchg(&ctx->dead, 1)) {
  		spin_unlock(&mm->ioctx_lock);
fa88b6f88   Benjamin LaHaise   aio: cleanup: fla...
770
  		return -EINVAL;
b2edffdd9   Al Viro   fix mremap() vs. ...
771
  	}
db446a08c   Benjamin LaHaise   aio: convert the ...
772

855ef0dec   Oleg Nesterov   aio: kill the mis...
773
  	table = rcu_dereference_raw(mm->ioctx_table);
d0264c01e   Tejun Heo   fs/aio: Use RCU a...
774
775
  	WARN_ON(ctx != rcu_access_pointer(table->table[ctx->id]));
  	RCU_INIT_POINTER(table->table[ctx->id], NULL);
fa88b6f88   Benjamin LaHaise   aio: cleanup: fla...
776
  	spin_unlock(&mm->ioctx_lock);
4fcc712f5   Kent Overstreet   aio: fix io_destr...
777

a6d7cff47   Tejun Heo   fs/aio: Add expli...
778
  	/* free_ioctx_reqs() will do the necessary RCU synchronization */
fa88b6f88   Benjamin LaHaise   aio: cleanup: fla...
779
  	wake_up_all(&ctx->wait);
4fcc712f5   Kent Overstreet   aio: fix io_destr...
780

fa88b6f88   Benjamin LaHaise   aio: cleanup: fla...
781
782
783
784
785
786
787
788
  	/*
  	 * It'd be more correct to do this in free_ioctx(), after all
  	 * the outstanding kiocbs have finished - but by then io_destroy
  	 * has already returned, so io_setup() could potentially return
  	 * -EAGAIN with no ioctxs actually in use (as far as userspace
  	 *  could tell).
  	 */
  	aio_nr_sub(ctx->max_reqs);
4fcc712f5   Kent Overstreet   aio: fix io_destr...
789

fa88b6f88   Benjamin LaHaise   aio: cleanup: fla...
790
791
  	if (ctx->mmap_size)
  		vm_munmap(ctx->mmap_base, ctx->mmap_size);
fb2d44838   Benjamin LaHaise   aio: report error...
792

dc48e56d7   Jens Axboe   aio: fix serial d...
793
  	ctx->rq_wait = wait;
fa88b6f88   Benjamin LaHaise   aio: cleanup: fla...
794
795
  	percpu_ref_kill(&ctx->users);
  	return 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
796
  }
36f558890   Kent Overstreet   aio: refcounting ...
797
798
799
800
801
802
803
  /*
   * exit_aio: called when the last user of mm goes away.  At this point, there is
   * no way for any new requests to be submited or any of the io_* syscalls to be
   * called on the context.
   *
   * There may be outstanding kiocbs, but free_ioctx() will explicitly wait on
   * them.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
804
   */
fc9b52cd8   Harvey Harrison   fs: remove fastca...
805
  void exit_aio(struct mm_struct *mm)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
806
  {
4b70ac5fd   Oleg Nesterov   aio: change exit_...
807
  	struct kioctx_table *table = rcu_dereference_raw(mm->ioctx_table);
dc48e56d7   Jens Axboe   aio: fix serial d...
808
809
  	struct ctx_rq_wait wait;
  	int i, skipped;
db446a08c   Benjamin LaHaise   aio: convert the ...
810

4b70ac5fd   Oleg Nesterov   aio: change exit_...
811
812
  	if (!table)
  		return;
db446a08c   Benjamin LaHaise   aio: convert the ...
813

dc48e56d7   Jens Axboe   aio: fix serial d...
814
815
816
817
  	atomic_set(&wait.count, table->nr);
  	init_completion(&wait.comp);
  
  	skipped = 0;
4b70ac5fd   Oleg Nesterov   aio: change exit_...
818
  	for (i = 0; i < table->nr; ++i) {
d0264c01e   Tejun Heo   fs/aio: Use RCU a...
819
820
  		struct kioctx *ctx =
  			rcu_dereference_protected(table->table[i], true);
abf137dd7   Jens Axboe   aio: make the loo...
821

dc48e56d7   Jens Axboe   aio: fix serial d...
822
823
  		if (!ctx) {
  			skipped++;
4b70ac5fd   Oleg Nesterov   aio: change exit_...
824
  			continue;
dc48e56d7   Jens Axboe   aio: fix serial d...
825
  		}
936af1576   Al Viro   aio: don't bother...
826
  		/*
4b70ac5fd   Oleg Nesterov   aio: change exit_...
827
828
829
830
831
  		 * We don't need to bother with munmap() here - exit_mmap(mm)
  		 * is coming and it'll unmap everything. And we simply can't,
  		 * this is not necessarily our ->mm.
  		 * Since kill_ioctx() uses non-zero ->mmap_size as indicator
  		 * that it needs to unmap the area, just set it to 0.
936af1576   Al Viro   aio: don't bother...
832
  		 */
58c85dc20   Kent Overstreet   aio: kill struct ...
833
  		ctx->mmap_size = 0;
dc48e56d7   Jens Axboe   aio: fix serial d...
834
835
  		kill_ioctx(mm, ctx, &wait);
  	}
36f558890   Kent Overstreet   aio: refcounting ...
836

dc48e56d7   Jens Axboe   aio: fix serial d...
837
  	if (!atomic_sub_and_test(skipped, &wait.count)) {
6098b45b3   Gu Zheng   aio: block exit_a...
838
  		/* Wait until all IO for the context are done. */
dc48e56d7   Jens Axboe   aio: fix serial d...
839
  		wait_for_completion(&wait.comp);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
840
  	}
4b70ac5fd   Oleg Nesterov   aio: change exit_...
841
842
843
  
  	RCU_INIT_POINTER(mm->ioctx_table, NULL);
  	kfree(table);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
844
  }
e1bdd5f27   Kent Overstreet   aio: percpu reqs_...
845
846
847
  static void put_reqs_available(struct kioctx *ctx, unsigned nr)
  {
  	struct kioctx_cpu *kcpu;
263782c1c   Benjamin LaHaise   aio: protect reqs...
848
  	unsigned long flags;
e1bdd5f27   Kent Overstreet   aio: percpu reqs_...
849

263782c1c   Benjamin LaHaise   aio: protect reqs...
850
  	local_irq_save(flags);
be6fb451a   Benjamin LaHaise   aio: remove no lo...
851
  	kcpu = this_cpu_ptr(ctx->cpu);
e1bdd5f27   Kent Overstreet   aio: percpu reqs_...
852
  	kcpu->reqs_available += nr;
263782c1c   Benjamin LaHaise   aio: protect reqs...
853

e1bdd5f27   Kent Overstreet   aio: percpu reqs_...
854
855
856
857
  	while (kcpu->reqs_available >= ctx->req_batch * 2) {
  		kcpu->reqs_available -= ctx->req_batch;
  		atomic_add(ctx->req_batch, &ctx->reqs_available);
  	}
263782c1c   Benjamin LaHaise   aio: protect reqs...
858
  	local_irq_restore(flags);
e1bdd5f27   Kent Overstreet   aio: percpu reqs_...
859
  }
432c79978   Christoph Hellwig   aio: separate out...
860
  static bool __get_reqs_available(struct kioctx *ctx)
e1bdd5f27   Kent Overstreet   aio: percpu reqs_...
861
862
863
  {
  	struct kioctx_cpu *kcpu;
  	bool ret = false;
263782c1c   Benjamin LaHaise   aio: protect reqs...
864
  	unsigned long flags;
e1bdd5f27   Kent Overstreet   aio: percpu reqs_...
865

263782c1c   Benjamin LaHaise   aio: protect reqs...
866
  	local_irq_save(flags);
be6fb451a   Benjamin LaHaise   aio: remove no lo...
867
  	kcpu = this_cpu_ptr(ctx->cpu);
e1bdd5f27   Kent Overstreet   aio: percpu reqs_...
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
  	if (!kcpu->reqs_available) {
  		int old, avail = atomic_read(&ctx->reqs_available);
  
  		do {
  			if (avail < ctx->req_batch)
  				goto out;
  
  			old = avail;
  			avail = atomic_cmpxchg(&ctx->reqs_available,
  					       avail, avail - ctx->req_batch);
  		} while (avail != old);
  
  		kcpu->reqs_available += ctx->req_batch;
  	}
  
  	ret = true;
  	kcpu->reqs_available--;
  out:
263782c1c   Benjamin LaHaise   aio: protect reqs...
886
  	local_irq_restore(flags);
e1bdd5f27   Kent Overstreet   aio: percpu reqs_...
887
888
  	return ret;
  }
d856f32a8   Benjamin LaHaise   aio: fix reqs_ava...
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
  /* refill_reqs_available
   *	Updates the reqs_available reference counts used for tracking the
   *	number of free slots in the completion ring.  This can be called
   *	from aio_complete() (to optimistically update reqs_available) or
   *	from aio_get_req() (the we're out of events case).  It must be
   *	called holding ctx->completion_lock.
   */
  static void refill_reqs_available(struct kioctx *ctx, unsigned head,
                                    unsigned tail)
  {
  	unsigned events_in_ring, completed;
  
  	/* Clamp head since userland can write to it. */
  	head %= ctx->nr_events;
  	if (head <= tail)
  		events_in_ring = tail - head;
  	else
  		events_in_ring = ctx->nr_events - (head - tail);
  
  	completed = ctx->completed_events;
  	if (events_in_ring < completed)
  		completed -= events_in_ring;
  	else
  		completed = 0;
  
  	if (!completed)
  		return;
  
  	ctx->completed_events -= completed;
  	put_reqs_available(ctx, completed);
  }
  
  /* user_refill_reqs_available
   *	Called to refill reqs_available when aio_get_req() encounters an
   *	out of space in the completion ring.
   */
  static void user_refill_reqs_available(struct kioctx *ctx)
  {
  	spin_lock_irq(&ctx->completion_lock);
  	if (ctx->completed_events) {
  		struct aio_ring *ring;
  		unsigned head;
  
  		/* Access of ring->head may race with aio_read_events_ring()
  		 * here, but that's okay since whether we read the old version
  		 * or the new version, and either will be valid.  The important
  		 * part is that head cannot pass tail since we prevent
  		 * aio_complete() from updating tail by holding
  		 * ctx->completion_lock.  Even if head is invalid, the check
  		 * against ctx->completed_events below will make sure we do the
  		 * safe/right thing.
  		 */
  		ring = kmap_atomic(ctx->ring_pages[0]);
  		head = ring->head;
  		kunmap_atomic(ring);
  
  		refill_reqs_available(ctx, head, ctx->tail);
  	}
  
  	spin_unlock_irq(&ctx->completion_lock);
  }
432c79978   Christoph Hellwig   aio: separate out...
950
951
952
953
954
955
956
  static bool get_reqs_available(struct kioctx *ctx)
  {
  	if (__get_reqs_available(ctx))
  		return true;
  	user_refill_reqs_available(ctx);
  	return __get_reqs_available(ctx);
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
957
  /* aio_get_req
57282d8fd   Kent Overstreet   aio: Kill ki_users
958
959
   *	Allocate a slot for an aio request.
   * Returns NULL if no requests are free.
b53119f13   Linus Torvalds   pin iocb through ...
960
961
962
   *
   * The refcount is initialized to 2 - one for the async op completion,
   * one for the synchronous code that does this.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
963
   */
04b2fa9f8   Christoph Hellwig   fs: split generic...
964
  static inline struct aio_kiocb *aio_get_req(struct kioctx *ctx)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
965
  {
04b2fa9f8   Christoph Hellwig   fs: split generic...
966
  	struct aio_kiocb *req;
a1c8eae75   Kent Overstreet   aio: kill batch a...
967

2bc4ca9bb   Jens Axboe   aio: don't zero e...
968
  	req = kmem_cache_alloc(kiocb_cachep, GFP_KERNEL);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
969
  	if (unlikely(!req))
432c79978   Christoph Hellwig   aio: separate out...
970
  		return NULL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
971

fa0ca2aee   Al Viro   deal with get_req...
972
  	if (unlikely(!get_reqs_available(ctx))) {
6af1c849d   Wei Yongjun   aio: use kmem_cac...
973
  		kmem_cache_free(kiocb_cachep, req);
fa0ca2aee   Al Viro   deal with get_req...
974
975
  		return NULL;
  	}
e34ecee2a   Kent Overstreet   aio: Fix a trinit...
976
  	percpu_ref_get(&ctx->reqs);
2bc4ca9bb   Jens Axboe   aio: don't zero e...
977
  	req->ki_ctx = ctx;
75321b50a   Christoph Hellwig   aio: sanitize ki_...
978
  	INIT_LIST_HEAD(&req->ki_list);
b53119f13   Linus Torvalds   pin iocb through ...
979
  	refcount_set(&req->ki_refcnt, 2);
2bc4ca9bb   Jens Axboe   aio: don't zero e...
980
  	req->ki_eventfd = NULL;
080d676de   Jeff Moyer   aio: allocate kio...
981
  	return req;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
982
  }
d5470b596   Adrian Bunk   fs/aio.c: make 3 ...
983
  static struct kioctx *lookup_ioctx(unsigned long ctx_id)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
984
  {
db446a08c   Benjamin LaHaise   aio: convert the ...
985
  	struct aio_ring __user *ring  = (void __user *)ctx_id;
abf137dd7   Jens Axboe   aio: make the loo...
986
  	struct mm_struct *mm = current->mm;
65c24491b   Jeff Moyer   aio: lookup_ioctx...
987
  	struct kioctx *ctx, *ret = NULL;
db446a08c   Benjamin LaHaise   aio: convert the ...
988
989
990
991
992
  	struct kioctx_table *table;
  	unsigned id;
  
  	if (get_user(id, &ring->id))
  		return NULL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
993

abf137dd7   Jens Axboe   aio: make the loo...
994
  	rcu_read_lock();
db446a08c   Benjamin LaHaise   aio: convert the ...
995
  	table = rcu_dereference(mm->ioctx_table);
abf137dd7   Jens Axboe   aio: make the loo...
996

db446a08c   Benjamin LaHaise   aio: convert the ...
997
998
  	if (!table || id >= table->nr)
  		goto out;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
999

a538e3ff9   Jeff Moyer   aio: fix spectre ...
1000
  	id = array_index_nospec(id, table->nr);
d0264c01e   Tejun Heo   fs/aio: Use RCU a...
1001
  	ctx = rcu_dereference(table->table[id]);
f30d704fe   Benjamin LaHaise   aio: table lookup...
1002
  	if (ctx && ctx->user_id == ctx_id) {
baf10564f   Al Viro   aio: fix io_destr...
1003
1004
  		if (percpu_ref_tryget_live(&ctx->users))
  			ret = ctx;
db446a08c   Benjamin LaHaise   aio: convert the ...
1005
1006
  	}
  out:
abf137dd7   Jens Axboe   aio: make the loo...
1007
  	rcu_read_unlock();
65c24491b   Jeff Moyer   aio: lookup_ioctx...
1008
  	return ret;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1009
  }
b53119f13   Linus Torvalds   pin iocb through ...
1010
1011
  static inline void iocb_destroy(struct aio_kiocb *iocb)
  {
742597034   Al Viro   aio: move droppin...
1012
1013
  	if (iocb->ki_eventfd)
  		eventfd_ctx_put(iocb->ki_eventfd);
b53119f13   Linus Torvalds   pin iocb through ...
1014
1015
1016
1017
1018
  	if (iocb->ki_filp)
  		fput(iocb->ki_filp);
  	percpu_ref_put(&iocb->ki_ctx->reqs);
  	kmem_cache_free(kiocb_cachep, iocb);
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1019
1020
  /* aio_complete
   *	Called when the io request on the given iocb is complete.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1021
   */
2bb874c0d   Al Viro   aio: store event ...
1022
  static void aio_complete(struct aio_kiocb *iocb)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1023
1024
  {
  	struct kioctx	*ctx = iocb->ki_ctx;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1025
  	struct aio_ring	*ring;
21b40200c   Kent Overstreet   aio: use flush_dc...
1026
  	struct io_event	*ev_page, *event;
d856f32a8   Benjamin LaHaise   aio: fix reqs_ava...
1027
  	unsigned tail, pos, head;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1028
  	unsigned long	flags;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1029

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1030
  	/*
0460fef2a   Kent Overstreet   aio: use cancella...
1031
  	 * Add a completion event to the ring buffer. Must be done holding
4b30f07e7   Tang Chen   aio: fix wrong co...
1032
  	 * ctx->completion_lock to prevent other code from messing with the tail
0460fef2a   Kent Overstreet   aio: use cancella...
1033
1034
1035
  	 * pointer since we might be called from irq context.
  	 */
  	spin_lock_irqsave(&ctx->completion_lock, flags);
58c85dc20   Kent Overstreet   aio: kill struct ...
1036
  	tail = ctx->tail;
21b40200c   Kent Overstreet   aio: use flush_dc...
1037
  	pos = tail + AIO_EVENTS_OFFSET;
58c85dc20   Kent Overstreet   aio: kill struct ...
1038
  	if (++tail >= ctx->nr_events)
4bf69b2a0   Kenneth W Chen   [PATCH] aio: ring...
1039
  		tail = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1040

58c85dc20   Kent Overstreet   aio: kill struct ...
1041
  	ev_page = kmap_atomic(ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]);
21b40200c   Kent Overstreet   aio: use flush_dc...
1042
  	event = ev_page + pos % AIO_EVENTS_PER_PAGE;
a9339b785   Al Viro   aio: keep io_even...
1043
  	*event = iocb->ki_res;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1044

21b40200c   Kent Overstreet   aio: use flush_dc...
1045
  	kunmap_atomic(ev_page);
58c85dc20   Kent Overstreet   aio: kill struct ...
1046
  	flush_dcache_page(ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]);
21b40200c   Kent Overstreet   aio: use flush_dc...
1047

a9339b785   Al Viro   aio: keep io_even...
1048
1049
1050
1051
  	pr_debug("%p[%u]: %p: %p %Lx %Lx %Lx
  ", ctx, tail, iocb,
  		 (void __user *)(unsigned long)iocb->ki_res.obj,
  		 iocb->ki_res.data, iocb->ki_res.res, iocb->ki_res.res2);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1052
1053
1054
1055
1056
  
  	/* after flagging the request as done, we
  	 * must never even look at it again
  	 */
  	smp_wmb();	/* make event visible before updating tail */
58c85dc20   Kent Overstreet   aio: kill struct ...
1057
  	ctx->tail = tail;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1058

58c85dc20   Kent Overstreet   aio: kill struct ...
1059
  	ring = kmap_atomic(ctx->ring_pages[0]);
d856f32a8   Benjamin LaHaise   aio: fix reqs_ava...
1060
  	head = ring->head;
21b40200c   Kent Overstreet   aio: use flush_dc...
1061
  	ring->tail = tail;
e8e3c3d66   Cong Wang   fs: remove the se...
1062
  	kunmap_atomic(ring);
58c85dc20   Kent Overstreet   aio: kill struct ...
1063
  	flush_dcache_page(ctx->ring_pages[0]);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1064

d856f32a8   Benjamin LaHaise   aio: fix reqs_ava...
1065
1066
1067
  	ctx->completed_events++;
  	if (ctx->completed_events > 1)
  		refill_reqs_available(ctx, head, tail);
0460fef2a   Kent Overstreet   aio: use cancella...
1068
  	spin_unlock_irqrestore(&ctx->completion_lock, flags);
21b40200c   Kent Overstreet   aio: use flush_dc...
1069
1070
  	pr_debug("added to ring %p at [%u]
  ", iocb, tail);
8d1c98b0b   Davide Libenzi   eventfd/kaio inte...
1071
1072
1073
1074
1075
1076
  
  	/*
  	 * Check if the user asked us to deliver the result through an
  	 * eventfd. The eventfd_signal() function is safe to be called
  	 * from IRQ context.
  	 */
742597034   Al Viro   aio: move droppin...
1077
  	if (iocb->ki_eventfd)
8d1c98b0b   Davide Libenzi   eventfd/kaio inte...
1078
  		eventfd_signal(iocb->ki_eventfd, 1);
6cb2a2104   Quentin Barnes   aio: bad AIO race...
1079
1080
1081
1082
1083
1084
1085
  	/*
  	 * We have to order our ring_info tail store above and test
  	 * of the wait list below outside the wait lock.  This is
  	 * like in wake_up_bit() where clearing a bit has to be
  	 * ordered with the unlocked test.
  	 */
  	smp_mb();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1086
1087
  	if (waitqueue_active(&ctx->wait))
  		wake_up(&ctx->wait);
2bb874c0d   Al Viro   aio: store event ...
1088
1089
1090
1091
1092
1093
1094
1095
  }
  
  static inline void iocb_put(struct aio_kiocb *iocb)
  {
  	if (refcount_dec_and_test(&iocb->ki_refcnt)) {
  		aio_complete(iocb);
  		iocb_destroy(iocb);
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1096
  }
2be4e7dee   Gu Zheng   aio: fix some com...
1097
  /* aio_read_events_ring
a31ad380b   Kent Overstreet   aio: make aio_rea...
1098
1099
   *	Pull an event off of the ioctx's event ring.  Returns the number of
   *	events fetched
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1100
   */
a31ad380b   Kent Overstreet   aio: make aio_rea...
1101
1102
  static long aio_read_events_ring(struct kioctx *ctx,
  				 struct io_event __user *event, long nr)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1103
  {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1104
  	struct aio_ring *ring;
5ffac122d   Kent Overstreet   aio: Don't use ct...
1105
  	unsigned head, tail, pos;
a31ad380b   Kent Overstreet   aio: make aio_rea...
1106
1107
  	long ret = 0;
  	int copy_ret;
9c9ce763b   Dave Chinner   aio: annotate aio...
1108
1109
1110
1111
1112
1113
1114
  	/*
  	 * The mutex can block and wake us up and that will cause
  	 * wait_event_interruptible_hrtimeout() to schedule without sleeping
  	 * and repeat. This should be rare enough that it doesn't cause
  	 * peformance issues. See the comment in read_events() for more detail.
  	 */
  	sched_annotate_sleep();
58c85dc20   Kent Overstreet   aio: kill struct ...
1115
  	mutex_lock(&ctx->ring_lock);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1116

fa8a53c39   Benjamin LaHaise   aio: v4 ensure ac...
1117
  	/* Access to ->ring_pages here is protected by ctx->ring_lock. */
58c85dc20   Kent Overstreet   aio: kill struct ...
1118
  	ring = kmap_atomic(ctx->ring_pages[0]);
a31ad380b   Kent Overstreet   aio: make aio_rea...
1119
  	head = ring->head;
5ffac122d   Kent Overstreet   aio: Don't use ct...
1120
  	tail = ring->tail;
a31ad380b   Kent Overstreet   aio: make aio_rea...
1121
  	kunmap_atomic(ring);
2ff396be6   Jeff Moyer   aio: add missing ...
1122
1123
1124
1125
1126
  	/*
  	 * Ensure that once we've read the current tail pointer, that
  	 * we also see the events that were stored up to the tail.
  	 */
  	smp_rmb();
5ffac122d   Kent Overstreet   aio: Don't use ct...
1127
1128
  	pr_debug("h%u t%u m%u
  ", head, tail, ctx->nr_events);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1129

5ffac122d   Kent Overstreet   aio: Don't use ct...
1130
  	if (head == tail)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1131
  		goto out;
edfbbf388   Benjamin LaHaise   aio: fix kernel m...
1132
1133
  	head %= ctx->nr_events;
  	tail %= ctx->nr_events;
a31ad380b   Kent Overstreet   aio: make aio_rea...
1134
1135
1136
1137
  	while (ret < nr) {
  		long avail;
  		struct io_event *ev;
  		struct page *page;
5ffac122d   Kent Overstreet   aio: Don't use ct...
1138
1139
  		avail = (head <= tail ?  tail : ctx->nr_events) - head;
  		if (head == tail)
a31ad380b   Kent Overstreet   aio: make aio_rea...
1140
  			break;
a31ad380b   Kent Overstreet   aio: make aio_rea...
1141
  		pos = head + AIO_EVENTS_OFFSET;
58c85dc20   Kent Overstreet   aio: kill struct ...
1142
  		page = ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE];
a31ad380b   Kent Overstreet   aio: make aio_rea...
1143
  		pos %= AIO_EVENTS_PER_PAGE;
d2988bd41   Al Viro   aio_read_events_r...
1144
1145
  		avail = min(avail, nr - ret);
  		avail = min_t(long, avail, AIO_EVENTS_PER_PAGE - pos);
a31ad380b   Kent Overstreet   aio: make aio_rea...
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
  		ev = kmap(page);
  		copy_ret = copy_to_user(event + ret, ev + pos,
  					sizeof(*ev) * avail);
  		kunmap(page);
  
  		if (unlikely(copy_ret)) {
  			ret = -EFAULT;
  			goto out;
  		}
  
  		ret += avail;
  		head += avail;
58c85dc20   Kent Overstreet   aio: kill struct ...
1158
  		head %= ctx->nr_events;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1159
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1160

58c85dc20   Kent Overstreet   aio: kill struct ...
1161
  	ring = kmap_atomic(ctx->ring_pages[0]);
a31ad380b   Kent Overstreet   aio: make aio_rea...
1162
  	ring->head = head;
91d80a84b   Zhao Hongjiang   aio: fix possible...
1163
  	kunmap_atomic(ring);
58c85dc20   Kent Overstreet   aio: kill struct ...
1164
  	flush_dcache_page(ctx->ring_pages[0]);
a31ad380b   Kent Overstreet   aio: make aio_rea...
1165

5ffac122d   Kent Overstreet   aio: Don't use ct...
1166
1167
  	pr_debug("%li  h%u t%u
  ", ret, head, tail);
a31ad380b   Kent Overstreet   aio: make aio_rea...
1168
  out:
58c85dc20   Kent Overstreet   aio: kill struct ...
1169
  	mutex_unlock(&ctx->ring_lock);
a31ad380b   Kent Overstreet   aio: make aio_rea...
1170

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1171
1172
  	return ret;
  }
a31ad380b   Kent Overstreet   aio: make aio_rea...
1173
1174
  static bool aio_read_events(struct kioctx *ctx, long min_nr, long nr,
  			    struct io_event __user *event, long *i)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1175
  {
a31ad380b   Kent Overstreet   aio: make aio_rea...
1176
  	long ret = aio_read_events_ring(ctx, event + *i, nr - *i);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1177

a31ad380b   Kent Overstreet   aio: make aio_rea...
1178
1179
  	if (ret > 0)
  		*i += ret;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1180

a31ad380b   Kent Overstreet   aio: make aio_rea...
1181
1182
  	if (unlikely(atomic_read(&ctx->dead)))
  		ret = -EINVAL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1183

a31ad380b   Kent Overstreet   aio: make aio_rea...
1184
1185
  	if (!*i)
  		*i = ret;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1186

a31ad380b   Kent Overstreet   aio: make aio_rea...
1187
  	return ret < 0 || *i >= min_nr;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1188
  }
a31ad380b   Kent Overstreet   aio: make aio_rea...
1189
  static long read_events(struct kioctx *ctx, long min_nr, long nr,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1190
  			struct io_event __user *event,
fa2e62a54   Deepa Dinamani   io_getevents: Use...
1191
  			ktime_t until)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1192
  {
a31ad380b   Kent Overstreet   aio: make aio_rea...
1193
  	long ret = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1194

a31ad380b   Kent Overstreet   aio: make aio_rea...
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
  	/*
  	 * Note that aio_read_events() is being called as the conditional - i.e.
  	 * we're calling it after prepare_to_wait() has set task state to
  	 * TASK_INTERRUPTIBLE.
  	 *
  	 * But aio_read_events() can block, and if it blocks it's going to flip
  	 * the task state back to TASK_RUNNING.
  	 *
  	 * This should be ok, provided it doesn't flip the state back to
  	 * TASK_RUNNING and return 0 too much - that causes us to spin. That
  	 * will only happen if the mutex_lock() call blocks, and we then find
  	 * the ringbuffer empty. So in practice we should be ok, but it's
  	 * something to be aware of when touching this code.
  	 */
2456e8553   Thomas Gleixner   ktime: Get rid of...
1209
  	if (until == 0)
5f785de58   Fam Zheng   aio: Skip timer f...
1210
1211
1212
1213
1214
  		aio_read_events(ctx, min_nr, nr, event, &ret);
  	else
  		wait_event_interruptible_hrtimeout(ctx->wait,
  				aio_read_events(ctx, min_nr, nr, event, &ret),
  				until);
a31ad380b   Kent Overstreet   aio: make aio_rea...
1215
  	return ret;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1216
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
  /* sys_io_setup:
   *	Create an aio_context capable of receiving at least nr_events.
   *	ctxp must not point to an aio_context that already exists, and
   *	must be initialized to 0 prior to the call.  On successful
   *	creation of the aio_context, *ctxp is filled in with the resulting 
   *	handle.  May fail with -EINVAL if *ctxp is not initialized,
   *	if the specified nr_events exceeds internal limits.  May fail 
   *	with -EAGAIN if the specified nr_events exceeds the user's limit 
   *	of available events.  May fail with -ENOMEM if insufficient kernel
   *	resources are available.  May fail with -EFAULT if an invalid
   *	pointer is passed for ctxp.  Will fail with -ENOSYS if not
   *	implemented.
   */
002c8976e   Heiko Carstens   [CVE-2009-0029] S...
1230
  SYSCALL_DEFINE2(io_setup, unsigned, nr_events, aio_context_t __user *, ctxp)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
  {
  	struct kioctx *ioctx = NULL;
  	unsigned long ctx;
  	long ret;
  
  	ret = get_user(ctx, ctxp);
  	if (unlikely(ret))
  		goto out;
  
  	ret = -EINVAL;
d55b5fdaf   Zach Brown   [PATCH] aio: remo...
1241
  	if (unlikely(ctx || nr_events == 0)) {
acd88d4e1   Kinglong Mee   fs/aio.c: Remove ...
1242
1243
  		pr_debug("EINVAL: ctx %lu nr_events %u
  ",
d55b5fdaf   Zach Brown   [PATCH] aio: remo...
1244
  		         ctx, nr_events);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1245
1246
1247
1248
1249
1250
1251
  		goto out;
  	}
  
  	ioctx = ioctx_alloc(nr_events);
  	ret = PTR_ERR(ioctx);
  	if (!IS_ERR(ioctx)) {
  		ret = put_user(ioctx->user_id, ctxp);
a2e1859ad   Al Viro   aio: take final p...
1252
  		if (ret)
e02ba72aa   Anatol Pomozov   aio: block io_des...
1253
  			kill_ioctx(current->mm, ioctx, NULL);
723be6e39   Kent Overstreet   aio: percpu ioctx...
1254
  		percpu_ref_put(&ioctx->users);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1255
1256
1257
1258
1259
  	}
  
  out:
  	return ret;
  }
c00d2c7e8   Al Viro   move aio compat t...
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
  #ifdef CONFIG_COMPAT
  COMPAT_SYSCALL_DEFINE2(io_setup, unsigned, nr_events, u32 __user *, ctx32p)
  {
  	struct kioctx *ioctx = NULL;
  	unsigned long ctx;
  	long ret;
  
  	ret = get_user(ctx, ctx32p);
  	if (unlikely(ret))
  		goto out;
  
  	ret = -EINVAL;
  	if (unlikely(ctx || nr_events == 0)) {
  		pr_debug("EINVAL: ctx %lu nr_events %u
  ",
  		         ctx, nr_events);
  		goto out;
  	}
  
  	ioctx = ioctx_alloc(nr_events);
  	ret = PTR_ERR(ioctx);
  	if (!IS_ERR(ioctx)) {
  		/* truncating is ok because it's a user address */
  		ret = put_user((u32)ioctx->user_id, ctx32p);
  		if (ret)
  			kill_ioctx(current->mm, ioctx, NULL);
  		percpu_ref_put(&ioctx->users);
  	}
  
  out:
  	return ret;
  }
  #endif
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1293
1294
1295
  /* sys_io_destroy:
   *	Destroy the aio_context specified.  May cancel any outstanding 
   *	AIOs and block on completion.  Will fail with -ENOSYS if not
642b5123a   Satoru Takeuchi   aio: fix wrong su...
1296
   *	implemented.  May fail with -EINVAL if the context pointed to
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1297
1298
   *	is invalid.
   */
002c8976e   Heiko Carstens   [CVE-2009-0029] S...
1299
  SYSCALL_DEFINE1(io_destroy, aio_context_t, ctx)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1300
1301
1302
  {
  	struct kioctx *ioctx = lookup_ioctx(ctx);
  	if (likely(NULL != ioctx)) {
dc48e56d7   Jens Axboe   aio: fix serial d...
1303
  		struct ctx_rq_wait wait;
fb2d44838   Benjamin LaHaise   aio: report error...
1304
  		int ret;
e02ba72aa   Anatol Pomozov   aio: block io_des...
1305

dc48e56d7   Jens Axboe   aio: fix serial d...
1306
1307
  		init_completion(&wait.comp);
  		atomic_set(&wait.count, 1);
e02ba72aa   Anatol Pomozov   aio: block io_des...
1308
1309
1310
1311
  		/* Pass requests_done to kill_ioctx() where it can be set
  		 * in a thread-safe way. If we try to set it here then we have
  		 * a race condition if two io_destroy() called simultaneously.
  		 */
dc48e56d7   Jens Axboe   aio: fix serial d...
1312
  		ret = kill_ioctx(current->mm, ioctx, &wait);
723be6e39   Kent Overstreet   aio: percpu ioctx...
1313
  		percpu_ref_put(&ioctx->users);
e02ba72aa   Anatol Pomozov   aio: block io_des...
1314
1315
1316
1317
1318
  
  		/* Wait until all IO for the context are done. Otherwise kernel
  		 * keep using user-space buffers even if user thinks the context
  		 * is destroyed.
  		 */
fb2d44838   Benjamin LaHaise   aio: report error...
1319
  		if (!ret)
dc48e56d7   Jens Axboe   aio: fix serial d...
1320
  			wait_for_completion(&wait.comp);
e02ba72aa   Anatol Pomozov   aio: block io_des...
1321

fb2d44838   Benjamin LaHaise   aio: report error...
1322
  		return ret;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1323
  	}
acd88d4e1   Kinglong Mee   fs/aio.c: Remove ...
1324
1325
  	pr_debug("EINVAL: invalid context id
  ");
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1326
1327
  	return -EINVAL;
  }
3c96c7f4c   Al Viro   aio: take list re...
1328
1329
1330
1331
1332
1333
1334
1335
1336
  static void aio_remove_iocb(struct aio_kiocb *iocb)
  {
  	struct kioctx *ctx = iocb->ki_ctx;
  	unsigned long flags;
  
  	spin_lock_irqsave(&ctx->ctx_lock, flags);
  	list_del(&iocb->ki_list);
  	spin_unlock_irqrestore(&ctx->ctx_lock, flags);
  }
54843f875   Christoph Hellwig   aio: refactor rea...
1337
1338
1339
  static void aio_complete_rw(struct kiocb *kiocb, long res, long res2)
  {
  	struct aio_kiocb *iocb = container_of(kiocb, struct aio_kiocb, rw);
3c96c7f4c   Al Viro   aio: take list re...
1340
1341
  	if (!list_empty_careful(&iocb->ki_list))
  		aio_remove_iocb(iocb);
54843f875   Christoph Hellwig   aio: refactor rea...
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
  	if (kiocb->ki_flags & IOCB_WRITE) {
  		struct inode *inode = file_inode(kiocb->ki_filp);
  
  		/*
  		 * Tell lockdep we inherited freeze protection from submission
  		 * thread.
  		 */
  		if (S_ISREG(inode->i_mode))
  			__sb_writers_acquired(inode->i_sb, SB_FREEZE_WRITE);
  		file_end_write(kiocb->ki_filp);
  	}
2bb874c0d   Al Viro   aio: store event ...
1353
1354
1355
  	iocb->ki_res.res = res;
  	iocb->ki_res.res2 = res2;
  	iocb_put(iocb);
54843f875   Christoph Hellwig   aio: refactor rea...
1356
  }
88a6f18b9   Jens Axboe   aio: split out io...
1357
  static int aio_prep_rw(struct kiocb *req, const struct iocb *iocb)
54843f875   Christoph Hellwig   aio: refactor rea...
1358
1359
  {
  	int ret;
54843f875   Christoph Hellwig   aio: refactor rea...
1360
  	req->ki_complete = aio_complete_rw;
ec51f8ee1   Mike Marshall   aio: initialize k...
1361
  	req->private = NULL;
54843f875   Christoph Hellwig   aio: refactor rea...
1362
1363
1364
1365
  	req->ki_pos = iocb->aio_offset;
  	req->ki_flags = iocb_flags(req->ki_filp);
  	if (iocb->aio_flags & IOCB_FLAG_RESFD)
  		req->ki_flags |= IOCB_EVENTFD;
fc28724d6   Adam Manzanares   fs: Convert kiocb...
1366
  	req->ki_hint = ki_hint_validate(file_write_hint(req->ki_filp));
d9a08a9e6   Adam Manzanares   fs: Add aio iopri...
1367
1368
1369
1370
1371
1372
1373
1374
  	if (iocb->aio_flags & IOCB_FLAG_IOPRIO) {
  		/*
  		 * If the IOCB_FLAG_IOPRIO flag of aio_flags is set, then
  		 * aio_reqprio is interpreted as an I/O scheduling
  		 * class and priority.
  		 */
  		ret = ioprio_check_cap(iocb->aio_reqprio);
  		if (ret) {
9a6d9a62e   Adam Manzanares   fs: aio ioprio us...
1375
1376
  			pr_debug("aio ioprio check cap error: %d
  ", ret);
84c4e1f89   Linus Torvalds   aio: simplify - a...
1377
  			return ret;
d9a08a9e6   Adam Manzanares   fs: Add aio iopri...
1378
1379
1380
1381
  		}
  
  		req->ki_ioprio = iocb->aio_reqprio;
  	} else
76dc89139   Damien Le Moal   aio: Fix fallback...
1382
  		req->ki_ioprio = get_current_ioprio();
d9a08a9e6   Adam Manzanares   fs: Add aio iopri...
1383

54843f875   Christoph Hellwig   aio: refactor rea...
1384
1385
  	ret = kiocb_set_rw_flags(req, iocb->aio_rw_flags);
  	if (unlikely(ret))
84c4e1f89   Linus Torvalds   aio: simplify - a...
1386
  		return ret;
154989e45   Christoph Hellwig   aio: clear IOCB_H...
1387
1388
1389
  
  	req->ki_flags &= ~IOCB_HIPRI; /* no one is going to poll for this I/O */
  	return 0;
54843f875   Christoph Hellwig   aio: refactor rea...
1390
  }
87e5e6dab   Jens Axboe   uio: make import_...
1391
1392
1393
  static ssize_t aio_setup_rw(int rw, const struct iocb *iocb,
  		struct iovec **iovec, bool vectored, bool compat,
  		struct iov_iter *iter)
eed4e51fb   Badari Pulavarty   [PATCH] Add vecto...
1394
  {
89319d31d   Christoph Hellwig   fs: remove aio_ru...
1395
1396
1397
1398
1399
1400
1401
1402
  	void __user *buf = (void __user *)(uintptr_t)iocb->aio_buf;
  	size_t len = iocb->aio_nbytes;
  
  	if (!vectored) {
  		ssize_t ret = import_single_range(rw, buf, len, *iovec, iter);
  		*iovec = NULL;
  		return ret;
  	}
9d85cba71   Jeff Moyer   aio: fix the comp...
1403
1404
  #ifdef CONFIG_COMPAT
  	if (compat)
89319d31d   Christoph Hellwig   fs: remove aio_ru...
1405
1406
  		return compat_import_iovec(rw, buf, len, UIO_FASTIOV, iovec,
  				iter);
9d85cba71   Jeff Moyer   aio: fix the comp...
1407
  #endif
89319d31d   Christoph Hellwig   fs: remove aio_ru...
1408
  	return import_iovec(rw, buf, len, UIO_FASTIOV, iovec, iter);
eed4e51fb   Badari Pulavarty   [PATCH] Add vecto...
1409
  }
9061d14a8   Al Viro   aio: all callers ...
1410
  static inline void aio_rw_done(struct kiocb *req, ssize_t ret)
89319d31d   Christoph Hellwig   fs: remove aio_ru...
1411
1412
1413
  {
  	switch (ret) {
  	case -EIOCBQUEUED:
9061d14a8   Al Viro   aio: all callers ...
1414
  		break;
89319d31d   Christoph Hellwig   fs: remove aio_ru...
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
  	case -ERESTARTSYS:
  	case -ERESTARTNOINTR:
  	case -ERESTARTNOHAND:
  	case -ERESTART_RESTARTBLOCK:
  		/*
  		 * There's no easy way to restart the syscall since other AIO's
  		 * may be already running. Just fail this IO with EINTR.
  		 */
  		ret = -EINTR;
  		/*FALLTHRU*/
  	default:
bc9bff616   Jens Axboe   aio: use assigned...
1426
  		req->ki_complete(req, ret, 0);
89319d31d   Christoph Hellwig   fs: remove aio_ru...
1427
1428
  	}
  }
958c13ce1   Al Viro   make aio_read()/a...
1429
  static int aio_read(struct kiocb *req, const struct iocb *iocb,
88a6f18b9   Jens Axboe   aio: split out io...
1430
  			bool vectored, bool compat)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1431
  {
00fefb9cf   Gu Zheng   aio: use iovec ar...
1432
  	struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs;
293bc9822   Al Viro   new methods: ->re...
1433
  	struct iov_iter iter;
54843f875   Christoph Hellwig   aio: refactor rea...
1434
  	struct file *file;
958c13ce1   Al Viro   make aio_read()/a...
1435
  	int ret;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1436

54843f875   Christoph Hellwig   aio: refactor rea...
1437
1438
1439
1440
  	ret = aio_prep_rw(req, iocb);
  	if (ret)
  		return ret;
  	file = req->ki_filp;
89319d31d   Christoph Hellwig   fs: remove aio_ru...
1441
  	if (unlikely(!(file->f_mode & FMODE_READ)))
84c4e1f89   Linus Torvalds   aio: simplify - a...
1442
  		return -EBADF;
54843f875   Christoph Hellwig   aio: refactor rea...
1443
  	ret = -EINVAL;
89319d31d   Christoph Hellwig   fs: remove aio_ru...
1444
  	if (unlikely(!file->f_op->read_iter))
84c4e1f89   Linus Torvalds   aio: simplify - a...
1445
  		return -EINVAL;
73a7075e3   Kent Overstreet   aio: Kill aio_rw_...
1446

89319d31d   Christoph Hellwig   fs: remove aio_ru...
1447
  	ret = aio_setup_rw(READ, iocb, &iovec, vectored, compat, &iter);
87e5e6dab   Jens Axboe   uio: make import_...
1448
  	if (ret < 0)
84c4e1f89   Linus Torvalds   aio: simplify - a...
1449
  		return ret;
89319d31d   Christoph Hellwig   fs: remove aio_ru...
1450
1451
  	ret = rw_verify_area(READ, file, &req->ki_pos, iov_iter_count(&iter));
  	if (!ret)
9061d14a8   Al Viro   aio: all callers ...
1452
  		aio_rw_done(req, call_read_iter(file, req, &iter));
89319d31d   Christoph Hellwig   fs: remove aio_ru...
1453
1454
1455
  	kfree(iovec);
  	return ret;
  }
73a7075e3   Kent Overstreet   aio: Kill aio_rw_...
1456

958c13ce1   Al Viro   make aio_read()/a...
1457
  static int aio_write(struct kiocb *req, const struct iocb *iocb,
88a6f18b9   Jens Axboe   aio: split out io...
1458
  			 bool vectored, bool compat)
89319d31d   Christoph Hellwig   fs: remove aio_ru...
1459
  {
89319d31d   Christoph Hellwig   fs: remove aio_ru...
1460
1461
  	struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs;
  	struct iov_iter iter;
54843f875   Christoph Hellwig   aio: refactor rea...
1462
  	struct file *file;
958c13ce1   Al Viro   make aio_read()/a...
1463
  	int ret;
41ef4eb8e   Kent Overstreet   aio: kill ki_retry
1464

54843f875   Christoph Hellwig   aio: refactor rea...
1465
1466
1467
1468
  	ret = aio_prep_rw(req, iocb);
  	if (ret)
  		return ret;
  	file = req->ki_filp;
89319d31d   Christoph Hellwig   fs: remove aio_ru...
1469
  	if (unlikely(!(file->f_mode & FMODE_WRITE)))
84c4e1f89   Linus Torvalds   aio: simplify - a...
1470
  		return -EBADF;
89319d31d   Christoph Hellwig   fs: remove aio_ru...
1471
  	if (unlikely(!file->f_op->write_iter))
84c4e1f89   Linus Torvalds   aio: simplify - a...
1472
  		return -EINVAL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1473

89319d31d   Christoph Hellwig   fs: remove aio_ru...
1474
  	ret = aio_setup_rw(WRITE, iocb, &iovec, vectored, compat, &iter);
87e5e6dab   Jens Axboe   uio: make import_...
1475
  	if (ret < 0)
84c4e1f89   Linus Torvalds   aio: simplify - a...
1476
  		return ret;
89319d31d   Christoph Hellwig   fs: remove aio_ru...
1477
1478
  	ret = rw_verify_area(WRITE, file, &req->ki_pos, iov_iter_count(&iter));
  	if (!ret) {
70fe2f481   Jan Kara   aio: fix freeze p...
1479
  		/*
92ce47285   Christoph Hellwig   aio: remove the e...
1480
  		 * Open-code file_start_write here to grab freeze protection,
54843f875   Christoph Hellwig   aio: refactor rea...
1481
1482
1483
1484
  		 * which will be released by another thread in
  		 * aio_complete_rw().  Fool lockdep by telling it the lock got
  		 * released so that it doesn't complain about the held lock when
  		 * we return to userspace.
70fe2f481   Jan Kara   aio: fix freeze p...
1485
  		 */
92ce47285   Christoph Hellwig   aio: remove the e...
1486
1487
  		if (S_ISREG(file_inode(file)->i_mode)) {
  			__sb_start_write(file_inode(file)->i_sb, SB_FREEZE_WRITE, true);
a12f1ae61   Shaohua Li   aio: fix lock dep...
1488
  			__sb_writers_release(file_inode(file)->i_sb, SB_FREEZE_WRITE);
92ce47285   Christoph Hellwig   aio: remove the e...
1489
1490
  		}
  		req->ki_flags |= IOCB_WRITE;
9061d14a8   Al Viro   aio: all callers ...
1491
  		aio_rw_done(req, call_write_iter(file, req, &iter));
41ef4eb8e   Kent Overstreet   aio: kill ki_retry
1492
  	}
89319d31d   Christoph Hellwig   fs: remove aio_ru...
1493
1494
  	kfree(iovec);
  	return ret;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1495
  }
a3c0d439e   Christoph Hellwig   aio: implement IO...
1496
1497
  static void aio_fsync_work(struct work_struct *work)
  {
2bb874c0d   Al Viro   aio: store event ...
1498
  	struct aio_kiocb *iocb = container_of(work, struct aio_kiocb, fsync.work);
a3c0d439e   Christoph Hellwig   aio: implement IO...
1499

2bb874c0d   Al Viro   aio: store event ...
1500
1501
  	iocb->ki_res.res = vfs_fsync(iocb->fsync.file, iocb->fsync.datasync);
  	iocb_put(iocb);
a3c0d439e   Christoph Hellwig   aio: implement IO...
1502
  }
88a6f18b9   Jens Axboe   aio: split out io...
1503
1504
  static int aio_fsync(struct fsync_iocb *req, const struct iocb *iocb,
  		     bool datasync)
a3c0d439e   Christoph Hellwig   aio: implement IO...
1505
1506
1507
1508
  {
  	if (unlikely(iocb->aio_buf || iocb->aio_offset || iocb->aio_nbytes ||
  			iocb->aio_rw_flags))
  		return -EINVAL;
a11e1d432   Linus Torvalds   Revert changes to...
1509

84c4e1f89   Linus Torvalds   aio: simplify - a...
1510
  	if (unlikely(!req->file->f_op->fsync))
a3c0d439e   Christoph Hellwig   aio: implement IO...
1511
  		return -EINVAL;
a3c0d439e   Christoph Hellwig   aio: implement IO...
1512
1513
1514
1515
  
  	req->datasync = datasync;
  	INIT_WORK(&req->work, aio_fsync_work);
  	schedule_work(&req->work);
9061d14a8   Al Viro   aio: all callers ...
1516
  	return 0;
a3c0d439e   Christoph Hellwig   aio: implement IO...
1517
  }
bfe4037e7   Christoph Hellwig   aio: implement IO...
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
  static void aio_poll_complete_work(struct work_struct *work)
  {
  	struct poll_iocb *req = container_of(work, struct poll_iocb, work);
  	struct aio_kiocb *iocb = container_of(req, struct aio_kiocb, poll);
  	struct poll_table_struct pt = { ._key = req->events };
  	struct kioctx *ctx = iocb->ki_ctx;
  	__poll_t mask = 0;
  
  	if (!READ_ONCE(req->cancelled))
  		mask = vfs_poll(req->file, &pt) & req->events;
  
  	/*
  	 * Note that ->ki_cancel callers also delete iocb from active_reqs after
  	 * calling ->ki_cancel.  We need the ctx_lock roundtrip here to
  	 * synchronize with them.  In the cancellation case the list_del_init
  	 * itself is not actually needed, but harmless so we keep it in to
  	 * avoid further branches in the fast path.
  	 */
  	spin_lock_irq(&ctx->ctx_lock);
  	if (!mask && !READ_ONCE(req->cancelled)) {
  		add_wait_queue(req->head, &req->wait);
  		spin_unlock_irq(&ctx->ctx_lock);
  		return;
  	}
  	list_del_init(&iocb->ki_list);
af5c72b1f   Al Viro   Fix aio_poll() races
1543
1544
  	iocb->ki_res.res = mangle_poll(mask);
  	req->done = true;
bfe4037e7   Christoph Hellwig   aio: implement IO...
1545
  	spin_unlock_irq(&ctx->ctx_lock);
af5c72b1f   Al Viro   Fix aio_poll() races
1546
  	iocb_put(iocb);
bfe4037e7   Christoph Hellwig   aio: implement IO...
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
  }
  
  /* assumes we are called with irqs disabled */
  static int aio_poll_cancel(struct kiocb *iocb)
  {
  	struct aio_kiocb *aiocb = container_of(iocb, struct aio_kiocb, rw);
  	struct poll_iocb *req = &aiocb->poll;
  
  	spin_lock(&req->head->lock);
  	WRITE_ONCE(req->cancelled, true);
  	if (!list_empty(&req->wait.entry)) {
  		list_del_init(&req->wait.entry);
  		schedule_work(&aiocb->poll.work);
  	}
  	spin_unlock(&req->head->lock);
  
  	return 0;
  }
  
  static int aio_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
  		void *key)
  {
  	struct poll_iocb *req = container_of(wait, struct poll_iocb, wait);
e8693bcfa   Christoph Hellwig   aio: allow direct...
1570
  	struct aio_kiocb *iocb = container_of(req, struct aio_kiocb, poll);
bfe4037e7   Christoph Hellwig   aio: implement IO...
1571
  	__poll_t mask = key_to_poll(key);
d3d6a18d7   Bart Van Assche   aio: Fix locking ...
1572
  	unsigned long flags;
bfe4037e7   Christoph Hellwig   aio: implement IO...
1573

bfe4037e7   Christoph Hellwig   aio: implement IO...
1574
  	/* for instances that support it check for an event match first: */
af5c72b1f   Al Viro   Fix aio_poll() races
1575
1576
  	if (mask && !(mask & req->events))
  		return 0;
e8693bcfa   Christoph Hellwig   aio: allow direct...
1577

af5c72b1f   Al Viro   Fix aio_poll() races
1578
1579
1580
  	list_del_init(&req->wait.entry);
  
  	if (mask && spin_trylock_irqsave(&iocb->ki_ctx->ctx_lock, flags)) {
d3d6a18d7   Bart Van Assche   aio: Fix locking ...
1581
1582
1583
1584
1585
1586
  		/*
  		 * Try to complete the iocb inline if we can. Use
  		 * irqsave/irqrestore because not all filesystems (e.g. fuse)
  		 * call this function with IRQs disabled and because IRQs
  		 * have to be disabled before ctx_lock is obtained.
  		 */
af5c72b1f   Al Viro   Fix aio_poll() races
1587
1588
1589
1590
1591
1592
1593
  		list_del(&iocb->ki_list);
  		iocb->ki_res.res = mangle_poll(mask);
  		req->done = true;
  		spin_unlock_irqrestore(&iocb->ki_ctx->ctx_lock, flags);
  		iocb_put(iocb);
  	} else {
  		schedule_work(&req->work);
e8693bcfa   Christoph Hellwig   aio: allow direct...
1594
  	}
bfe4037e7   Christoph Hellwig   aio: implement IO...
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
  	return 1;
  }
  
  struct aio_poll_table {
  	struct poll_table_struct	pt;
  	struct aio_kiocb		*iocb;
  	int				error;
  };
  
  static void
  aio_poll_queue_proc(struct file *file, struct wait_queue_head *head,
  		struct poll_table_struct *p)
  {
  	struct aio_poll_table *pt = container_of(p, struct aio_poll_table, pt);
  
  	/* multiple wait queues per file are not supported */
  	if (unlikely(pt->iocb->poll.head)) {
  		pt->error = -EINVAL;
  		return;
  	}
  
  	pt->error = 0;
  	pt->iocb->poll.head = head;
  	add_wait_queue(head, &pt->iocb->poll.wait);
  }
958c13ce1   Al Viro   make aio_read()/a...
1620
  static int aio_poll(struct aio_kiocb *aiocb, const struct iocb *iocb)
bfe4037e7   Christoph Hellwig   aio: implement IO...
1621
1622
1623
1624
  {
  	struct kioctx *ctx = aiocb->ki_ctx;
  	struct poll_iocb *req = &aiocb->poll;
  	struct aio_poll_table apt;
af5c72b1f   Al Viro   Fix aio_poll() races
1625
  	bool cancel = false;
bfe4037e7   Christoph Hellwig   aio: implement IO...
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
  	__poll_t mask;
  
  	/* reject any unknown events outside the normal event mask. */
  	if ((u16)iocb->aio_buf != iocb->aio_buf)
  		return -EINVAL;
  	/* reject fields that are not defined for poll */
  	if (iocb->aio_offset || iocb->aio_nbytes || iocb->aio_rw_flags)
  		return -EINVAL;
  
  	INIT_WORK(&req->work, aio_poll_complete_work);
  	req->events = demangle_poll(iocb->aio_buf) | EPOLLERR | EPOLLHUP;
bfe4037e7   Christoph Hellwig   aio: implement IO...
1637

2bc4ca9bb   Jens Axboe   aio: don't zero e...
1638
  	req->head = NULL;
af5c72b1f   Al Viro   Fix aio_poll() races
1639
  	req->done = false;
2bc4ca9bb   Jens Axboe   aio: don't zero e...
1640
  	req->cancelled = false;
bfe4037e7   Christoph Hellwig   aio: implement IO...
1641
1642
1643
1644
1645
1646
1647
1648
  	apt.pt._qproc = aio_poll_queue_proc;
  	apt.pt._key = req->events;
  	apt.iocb = aiocb;
  	apt.error = -EINVAL; /* same as no support for IOCB_CMD_POLL */
  
  	/* initialized the list so that we can do list_empty checks */
  	INIT_LIST_HEAD(&req->wait.entry);
  	init_waitqueue_func_entry(&req->wait, aio_poll_wake);
bfe4037e7   Christoph Hellwig   aio: implement IO...
1649
  	mask = vfs_poll(req->file, &apt.pt) & req->events;
bfe4037e7   Christoph Hellwig   aio: implement IO...
1650
  	spin_lock_irq(&ctx->ctx_lock);
af5c72b1f   Al Viro   Fix aio_poll() races
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
  	if (likely(req->head)) {
  		spin_lock(&req->head->lock);
  		if (unlikely(list_empty(&req->wait.entry))) {
  			if (apt.error)
  				cancel = true;
  			apt.error = 0;
  			mask = 0;
  		}
  		if (mask || apt.error) {
  			list_del_init(&req->wait.entry);
  		} else if (cancel) {
  			WRITE_ONCE(req->cancelled, true);
  		} else if (!req->done) { /* actually waiting for an event */
  			list_add_tail(&aiocb->ki_list, &ctx->active_reqs);
  			aiocb->ki_cancel = aio_poll_cancel;
  		}
  		spin_unlock(&req->head->lock);
  	}
  	if (mask) { /* no async, we'd stolen it */
  		aiocb->ki_res.res = mangle_poll(mask);
bfe4037e7   Christoph Hellwig   aio: implement IO...
1671
  		apt.error = 0;
bfe4037e7   Christoph Hellwig   aio: implement IO...
1672
  	}
bfe4037e7   Christoph Hellwig   aio: implement IO...
1673
  	spin_unlock_irq(&ctx->ctx_lock);
bfe4037e7   Christoph Hellwig   aio: implement IO...
1674
  	if (mask)
af5c72b1f   Al Viro   Fix aio_poll() races
1675
1676
  		iocb_put(aiocb);
  	return apt.error;
bfe4037e7   Christoph Hellwig   aio: implement IO...
1677
  }
88a6f18b9   Jens Axboe   aio: split out io...
1678
  static int __io_submit_one(struct kioctx *ctx, const struct iocb *iocb,
7316b49c2   Al Viro   aio: move sanity ...
1679
1680
  			   struct iocb __user *user_iocb, struct aio_kiocb *req,
  			   bool compat)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1681
  {
84c4e1f89   Linus Torvalds   aio: simplify - a...
1682
  	req->ki_filp = fget(iocb->aio_fildes);
84c4e1f89   Linus Torvalds   aio: simplify - a...
1683
  	if (unlikely(!req->ki_filp))
7316b49c2   Al Viro   aio: move sanity ...
1684
  		return -EBADF;
84c4e1f89   Linus Torvalds   aio: simplify - a...
1685

88a6f18b9   Jens Axboe   aio: split out io...
1686
  	if (iocb->aio_flags & IOCB_FLAG_RESFD) {
742597034   Al Viro   aio: move droppin...
1687
  		struct eventfd_ctx *eventfd;
9c3060bed   Davide Libenzi   signal/timer/even...
1688
1689
1690
1691
1692
1693
  		/*
  		 * If the IOCB_FLAG_RESFD flag of aio_flags is set, get an
  		 * instance of the file* now. The file descriptor must be
  		 * an eventfd() fd, and will be signaled for each completed
  		 * event using the eventfd_signal() function.
  		 */
742597034   Al Viro   aio: move droppin...
1694
  		eventfd = eventfd_ctx_fdget(iocb->aio_resfd);
7316b49c2   Al Viro   aio: move sanity ...
1695
  		if (IS_ERR(eventfd))
18bfb9c6a   Dan Carpenter   aio: Fix an error...
1696
  			return PTR_ERR(eventfd);
7316b49c2   Al Viro   aio: move sanity ...
1697

742597034   Al Viro   aio: move droppin...
1698
  		req->ki_eventfd = eventfd;
9830f4be1   Goldwyn Rodrigues   fs: Use RWF_* fla...
1699
  	}
7316b49c2   Al Viro   aio: move sanity ...
1700
  	if (unlikely(put_user(KIOCB_KEY, &user_iocb->aio_key))) {
caf4167aa   Kent Overstreet   aio: dprintk() ->...
1701
1702
  		pr_debug("EFAULT: aio_key
  ");
7316b49c2   Al Viro   aio: move sanity ...
1703
  		return -EFAULT;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1704
  	}
a9339b785   Al Viro   aio: keep io_even...
1705
1706
1707
1708
  	req->ki_res.obj = (u64)(unsigned long)user_iocb;
  	req->ki_res.data = iocb->aio_data;
  	req->ki_res.res = 0;
  	req->ki_res.res2 = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1709

88a6f18b9   Jens Axboe   aio: split out io...
1710
  	switch (iocb->aio_lio_opcode) {
89319d31d   Christoph Hellwig   fs: remove aio_ru...
1711
  	case IOCB_CMD_PREAD:
7316b49c2   Al Viro   aio: move sanity ...
1712
  		return aio_read(&req->rw, iocb, false, compat);
89319d31d   Christoph Hellwig   fs: remove aio_ru...
1713
  	case IOCB_CMD_PWRITE:
7316b49c2   Al Viro   aio: move sanity ...
1714
  		return aio_write(&req->rw, iocb, false, compat);
89319d31d   Christoph Hellwig   fs: remove aio_ru...
1715
  	case IOCB_CMD_PREADV:
7316b49c2   Al Viro   aio: move sanity ...
1716
  		return aio_read(&req->rw, iocb, true, compat);
89319d31d   Christoph Hellwig   fs: remove aio_ru...
1717
  	case IOCB_CMD_PWRITEV:
7316b49c2   Al Viro   aio: move sanity ...
1718
  		return aio_write(&req->rw, iocb, true, compat);
a3c0d439e   Christoph Hellwig   aio: implement IO...
1719
  	case IOCB_CMD_FSYNC:
7316b49c2   Al Viro   aio: move sanity ...
1720
  		return aio_fsync(&req->fsync, iocb, false);
a3c0d439e   Christoph Hellwig   aio: implement IO...
1721
  	case IOCB_CMD_FDSYNC:
7316b49c2   Al Viro   aio: move sanity ...
1722
  		return aio_fsync(&req->fsync, iocb, true);
bfe4037e7   Christoph Hellwig   aio: implement IO...
1723
  	case IOCB_CMD_POLL:
7316b49c2   Al Viro   aio: move sanity ...
1724
  		return aio_poll(req, iocb);
89319d31d   Christoph Hellwig   fs: remove aio_ru...
1725
  	default:
88a6f18b9   Jens Axboe   aio: split out io...
1726
1727
  		pr_debug("invalid aio operation %d
  ", iocb->aio_lio_opcode);
7316b49c2   Al Viro   aio: move sanity ...
1728
  		return -EINVAL;
89319d31d   Christoph Hellwig   fs: remove aio_ru...
1729
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1730
  }
88a6f18b9   Jens Axboe   aio: split out io...
1731
1732
1733
  static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
  			 bool compat)
  {
7316b49c2   Al Viro   aio: move sanity ...
1734
  	struct aio_kiocb *req;
88a6f18b9   Jens Axboe   aio: split out io...
1735
  	struct iocb iocb;
7316b49c2   Al Viro   aio: move sanity ...
1736
  	int err;
88a6f18b9   Jens Axboe   aio: split out io...
1737
1738
1739
  
  	if (unlikely(copy_from_user(&iocb, user_iocb, sizeof(iocb))))
  		return -EFAULT;
7316b49c2   Al Viro   aio: move sanity ...
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
  	/* enforce forwards compatibility on users */
  	if (unlikely(iocb.aio_reserved2)) {
  		pr_debug("EINVAL: reserve field set
  ");
  		return -EINVAL;
  	}
  
  	/* prevent overflows */
  	if (unlikely(
  	    (iocb.aio_buf != (unsigned long)iocb.aio_buf) ||
  	    (iocb.aio_nbytes != (size_t)iocb.aio_nbytes) ||
  	    ((ssize_t)iocb.aio_nbytes < 0)
  	   )) {
  		pr_debug("EINVAL: overflow check
  ");
  		return -EINVAL;
  	}
  
  	req = aio_get_req(ctx);
  	if (unlikely(!req))
  		return -EAGAIN;
  
  	err = __io_submit_one(ctx, &iocb, user_iocb, req, compat);
  
  	/* Done with the synchronous reference */
  	iocb_put(req);
  
  	/*
  	 * If err is 0, we'd either done aio_complete() ourselves or have
  	 * arranged for that to be done asynchronously.  Anything non-zero
  	 * means that we need to destroy req ourselves.
  	 */
  	if (unlikely(err)) {
  		iocb_destroy(req);
  		put_reqs_available(ctx, 1);
  	}
  	return err;
88a6f18b9   Jens Axboe   aio: split out io...
1777
  }
67ba049f9   Al Viro   aio: fold do_io_s...
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
  /* sys_io_submit:
   *	Queue the nr iocbs pointed to by iocbpp for processing.  Returns
   *	the number of iocbs queued.  May return -EINVAL if the aio_context
   *	specified by ctx_id is invalid, if nr is < 0, if the iocb at
   *	*iocbpp[0] is not properly initialized, if the operation specified
   *	is invalid for the file descriptor in the iocb.  May fail with
   *	-EFAULT if any of the data structures point to invalid data.  May
   *	fail with -EBADF if the file descriptor specified in the first
   *	iocb is invalid.  May fail with -EAGAIN if insufficient resources
   *	are available to queue any iocbs.  Will return 0 if nr is 0.  Will
   *	fail with -ENOSYS if not implemented.
   */
  SYSCALL_DEFINE3(io_submit, aio_context_t, ctx_id, long, nr,
  		struct iocb __user * __user *, iocbpp)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1792
1793
1794
  {
  	struct kioctx *ctx;
  	long ret = 0;
080d676de   Jeff Moyer   aio: allocate kio...
1795
  	int i = 0;
9f5b94254   Shaohua Li   fs: make aio plug
1796
  	struct blk_plug plug;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1797
1798
1799
  
  	if (unlikely(nr < 0))
  		return -EINVAL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1800
1801
  	ctx = lookup_ioctx(ctx_id);
  	if (unlikely(!ctx)) {
caf4167aa   Kent Overstreet   aio: dprintk() ->...
1802
1803
  		pr_debug("EINVAL: invalid context id
  ");
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1804
1805
  		return -EINVAL;
  	}
1da92779e   Al Viro   aio: sanitize the...
1806
1807
  	if (nr > ctx->nr_events)
  		nr = ctx->nr_events;
a79d40e9b   Jens Axboe   aio: only use blk...
1808
1809
  	if (nr > AIO_PLUG_THRESHOLD)
  		blk_start_plug(&plug);
67ba049f9   Al Viro   aio: fold do_io_s...
1810
  	for (i = 0; i < nr; i++) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1811
  		struct iocb __user *user_iocb;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1812

67ba049f9   Al Viro   aio: fold do_io_s...
1813
  		if (unlikely(get_user(user_iocb, iocbpp + i))) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1814
1815
1816
  			ret = -EFAULT;
  			break;
  		}
67ba049f9   Al Viro   aio: fold do_io_s...
1817
  		ret = io_submit_one(ctx, user_iocb, false);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1818
1819
1820
  		if (ret)
  			break;
  	}
a79d40e9b   Jens Axboe   aio: only use blk...
1821
1822
  	if (nr > AIO_PLUG_THRESHOLD)
  		blk_finish_plug(&plug);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1823

723be6e39   Kent Overstreet   aio: percpu ioctx...
1824
  	percpu_ref_put(&ctx->users);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1825
1826
  	return i ? i : ret;
  }
c00d2c7e8   Al Viro   move aio compat t...
1827
  #ifdef CONFIG_COMPAT
c00d2c7e8   Al Viro   move aio compat t...
1828
  COMPAT_SYSCALL_DEFINE3(io_submit, compat_aio_context_t, ctx_id,
67ba049f9   Al Viro   aio: fold do_io_s...
1829
  		       int, nr, compat_uptr_t __user *, iocbpp)
c00d2c7e8   Al Viro   move aio compat t...
1830
  {
67ba049f9   Al Viro   aio: fold do_io_s...
1831
1832
1833
1834
  	struct kioctx *ctx;
  	long ret = 0;
  	int i = 0;
  	struct blk_plug plug;
c00d2c7e8   Al Viro   move aio compat t...
1835
1836
1837
  
  	if (unlikely(nr < 0))
  		return -EINVAL;
67ba049f9   Al Viro   aio: fold do_io_s...
1838
1839
1840
1841
1842
1843
  	ctx = lookup_ioctx(ctx_id);
  	if (unlikely(!ctx)) {
  		pr_debug("EINVAL: invalid context id
  ");
  		return -EINVAL;
  	}
1da92779e   Al Viro   aio: sanitize the...
1844
1845
  	if (nr > ctx->nr_events)
  		nr = ctx->nr_events;
a79d40e9b   Jens Axboe   aio: only use blk...
1846
1847
  	if (nr > AIO_PLUG_THRESHOLD)
  		blk_start_plug(&plug);
67ba049f9   Al Viro   aio: fold do_io_s...
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
  	for (i = 0; i < nr; i++) {
  		compat_uptr_t user_iocb;
  
  		if (unlikely(get_user(user_iocb, iocbpp + i))) {
  			ret = -EFAULT;
  			break;
  		}
  
  		ret = io_submit_one(ctx, compat_ptr(user_iocb), true);
  		if (ret)
  			break;
  	}
a79d40e9b   Jens Axboe   aio: only use blk...
1860
1861
  	if (nr > AIO_PLUG_THRESHOLD)
  		blk_finish_plug(&plug);
67ba049f9   Al Viro   aio: fold do_io_s...
1862
1863
1864
  
  	percpu_ref_put(&ctx->users);
  	return i ? i : ret;
c00d2c7e8   Al Viro   move aio compat t...
1865
1866
  }
  #endif
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
  /* sys_io_cancel:
   *	Attempts to cancel an iocb previously passed to io_submit.  If
   *	the operation is successfully cancelled, the resulting event is
   *	copied into the memory pointed to by result without being placed
   *	into the completion queue and 0 is returned.  May fail with
   *	-EFAULT if any of the data structures pointed to are invalid.
   *	May fail with -EINVAL if aio_context specified by ctx_id is
   *	invalid.  May fail with -EAGAIN if the iocb specified was not
   *	cancelled.  Will fail with -ENOSYS if not implemented.
   */
002c8976e   Heiko Carstens   [CVE-2009-0029] S...
1877
1878
  SYSCALL_DEFINE3(io_cancel, aio_context_t, ctx_id, struct iocb __user *, iocb,
  		struct io_event __user *, result)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1879
  {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1880
  	struct kioctx *ctx;
04b2fa9f8   Christoph Hellwig   fs: split generic...
1881
  	struct aio_kiocb *kiocb;
888933f8f   Christoph Hellwig   aio: simplify can...
1882
  	int ret = -EINVAL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1883
  	u32 key;
a9339b785   Al Viro   aio: keep io_even...
1884
  	u64 obj = (u64)(unsigned long)iocb;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1885

f3a2752a4   Christoph Hellwig   aio: simplify KIO...
1886
  	if (unlikely(get_user(key, &iocb->aio_key)))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1887
  		return -EFAULT;
f3a2752a4   Christoph Hellwig   aio: simplify KIO...
1888
1889
  	if (unlikely(key != KIOCB_KEY))
  		return -EINVAL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1890
1891
1892
1893
1894
1895
  
  	ctx = lookup_ioctx(ctx_id);
  	if (unlikely(!ctx))
  		return -EINVAL;
  
  	spin_lock_irq(&ctx->ctx_lock);
833f4154e   Al Viro   aio: fold lookup_...
1896
1897
  	/* TODO: use a hash or array, this sucks. */
  	list_for_each_entry(kiocb, &ctx->active_reqs, ki_list) {
a9339b785   Al Viro   aio: keep io_even...
1898
  		if (kiocb->ki_res.obj == obj) {
833f4154e   Al Viro   aio: fold lookup_...
1899
1900
1901
1902
  			ret = kiocb->ki_cancel(&kiocb->rw);
  			list_del_init(&kiocb->ki_list);
  			break;
  		}
888933f8f   Christoph Hellwig   aio: simplify can...
1903
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1904
  	spin_unlock_irq(&ctx->ctx_lock);
906b973cf   Kent Overstreet   aio: add kiocb_ca...
1905
  	if (!ret) {
bec68faaf   Kent Overstreet   aio: io_cancel() ...
1906
1907
1908
1909
  		/*
  		 * The result argument is no longer used - the io_event is
  		 * always delivered via the ring buffer. -EINPROGRESS indicates
  		 * cancellation is progress:
906b973cf   Kent Overstreet   aio: add kiocb_ca...
1910
  		 */
bec68faaf   Kent Overstreet   aio: io_cancel() ...
1911
  		ret = -EINPROGRESS;
906b973cf   Kent Overstreet   aio: add kiocb_ca...
1912
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1913

723be6e39   Kent Overstreet   aio: percpu ioctx...
1914
  	percpu_ref_put(&ctx->users);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1915
1916
1917
  
  	return ret;
  }
fa2e62a54   Deepa Dinamani   io_getevents: Use...
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
  static long do_io_getevents(aio_context_t ctx_id,
  		long min_nr,
  		long nr,
  		struct io_event __user *events,
  		struct timespec64 *ts)
  {
  	ktime_t until = ts ? timespec64_to_ktime(*ts) : KTIME_MAX;
  	struct kioctx *ioctx = lookup_ioctx(ctx_id);
  	long ret = -EINVAL;
  
  	if (likely(ioctx)) {
  		if (likely(min_nr <= nr && min_nr >= 0))
  			ret = read_events(ioctx, min_nr, nr, events, until);
  		percpu_ref_put(&ioctx->users);
  	}
  
  	return ret;
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1936
1937
  /* io_getevents:
   *	Attempts to read at least min_nr events and up to nr events from
642b5123a   Satoru Takeuchi   aio: fix wrong su...
1938
1939
1940
1941
1942
1943
1944
1945
   *	the completion queue for the aio_context specified by ctx_id. If
   *	it succeeds, the number of read events is returned. May fail with
   *	-EINVAL if ctx_id is invalid, if min_nr is out of range, if nr is
   *	out of range, if timeout is out of range.  May fail with -EFAULT
   *	if any of the memory specified is invalid.  May return 0 or
   *	< min_nr if the timeout specified by timeout has elapsed
   *	before sufficient events are available, where timeout == NULL
   *	specifies an infinite timeout. Note that the timeout pointed to by
6900807c6   Jeff Moyer   aio: fix io_getev...
1946
   *	timeout is relative.  Will fail with -ENOSYS if not implemented.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1947
   */
3ca47e958   Arnd Bergmann   y2038: remove CON...
1948
  #ifdef CONFIG_64BIT
7a35397f8   Deepa Dinamani   io_pgetevents: us...
1949

002c8976e   Heiko Carstens   [CVE-2009-0029] S...
1950
1951
1952
1953
  SYSCALL_DEFINE5(io_getevents, aio_context_t, ctx_id,
  		long, min_nr,
  		long, nr,
  		struct io_event __user *, events,
7a35397f8   Deepa Dinamani   io_pgetevents: us...
1954
  		struct __kernel_timespec __user *, timeout)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1955
  {
fa2e62a54   Deepa Dinamani   io_getevents: Use...
1956
  	struct timespec64	ts;
7a074e96d   Christoph Hellwig   aio: implement io...
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
  	int			ret;
  
  	if (timeout && unlikely(get_timespec64(&ts, timeout)))
  		return -EFAULT;
  
  	ret = do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &ts : NULL);
  	if (!ret && signal_pending(current))
  		ret = -EINTR;
  	return ret;
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1967

7a35397f8   Deepa Dinamani   io_pgetevents: us...
1968
  #endif
9ba546c01   Christoph Hellwig   aio: don't expose...
1969
1970
1971
1972
  struct __aio_sigset {
  	const sigset_t __user	*sigmask;
  	size_t		sigsetsize;
  };
7a074e96d   Christoph Hellwig   aio: implement io...
1973
1974
1975
1976
1977
  SYSCALL_DEFINE6(io_pgetevents,
  		aio_context_t, ctx_id,
  		long, min_nr,
  		long, nr,
  		struct io_event __user *, events,
7a35397f8   Deepa Dinamani   io_pgetevents: us...
1978
  		struct __kernel_timespec __user *, timeout,
7a074e96d   Christoph Hellwig   aio: implement io...
1979
1980
1981
  		const struct __aio_sigset __user *, usig)
  {
  	struct __aio_sigset	ksig = { NULL, };
7a074e96d   Christoph Hellwig   aio: implement io...
1982
  	struct timespec64	ts;
97abc889e   Oleg Nesterov   signal: remove th...
1983
  	bool interrupted;
7a074e96d   Christoph Hellwig   aio: implement io...
1984
1985
1986
1987
1988
1989
1990
  	int ret;
  
  	if (timeout && unlikely(get_timespec64(&ts, timeout)))
  		return -EFAULT;
  
  	if (usig && copy_from_user(&ksig, usig, sizeof(ksig)))
  		return -EFAULT;
b772434be   Oleg Nesterov   signal: simplify ...
1991
  	ret = set_user_sigmask(ksig.sigmask, ksig.sigsetsize);
7a35397f8   Deepa Dinamani   io_pgetevents: us...
1992
1993
  	if (ret)
  		return ret;
7a074e96d   Christoph Hellwig   aio: implement io...
1994
1995
  
  	ret = do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &ts : NULL);
97abc889e   Oleg Nesterov   signal: remove th...
1996
1997
  
  	interrupted = signal_pending(current);
b772434be   Oleg Nesterov   signal: simplify ...
1998
  	restore_saved_sigmask_unless(interrupted);
97abc889e   Oleg Nesterov   signal: remove th...
1999
  	if (interrupted && !ret)
7a35397f8   Deepa Dinamani   io_pgetevents: us...
2000
  		ret = -ERESTARTNOHAND;
7a074e96d   Christoph Hellwig   aio: implement io...
2001

7a35397f8   Deepa Dinamani   io_pgetevents: us...
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
  	return ret;
  }
  
  #if defined(CONFIG_COMPAT_32BIT_TIME) && !defined(CONFIG_64BIT)
  
  SYSCALL_DEFINE6(io_pgetevents_time32,
  		aio_context_t, ctx_id,
  		long, min_nr,
  		long, nr,
  		struct io_event __user *, events,
  		struct old_timespec32 __user *, timeout,
  		const struct __aio_sigset __user *, usig)
  {
  	struct __aio_sigset	ksig = { NULL, };
7a35397f8   Deepa Dinamani   io_pgetevents: us...
2016
  	struct timespec64	ts;
97abc889e   Oleg Nesterov   signal: remove th...
2017
  	bool interrupted;
7a35397f8   Deepa Dinamani   io_pgetevents: us...
2018
2019
2020
2021
2022
2023
2024
  	int ret;
  
  	if (timeout && unlikely(get_old_timespec32(&ts, timeout)))
  		return -EFAULT;
  
  	if (usig && copy_from_user(&ksig, usig, sizeof(ksig)))
  		return -EFAULT;
ded653ccb   Deepa Dinamani   signal: Add set_u...
2025

b772434be   Oleg Nesterov   signal: simplify ...
2026
  	ret = set_user_sigmask(ksig.sigmask, ksig.sigsetsize);
ded653ccb   Deepa Dinamani   signal: Add set_u...
2027
2028
  	if (ret)
  		return ret;
7a074e96d   Christoph Hellwig   aio: implement io...
2029
2030
  
  	ret = do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &ts : NULL);
97abc889e   Oleg Nesterov   signal: remove th...
2031
2032
  
  	interrupted = signal_pending(current);
b772434be   Oleg Nesterov   signal: simplify ...
2033
  	restore_saved_sigmask_unless(interrupted);
97abc889e   Oleg Nesterov   signal: remove th...
2034
  	if (interrupted && !ret)
854a6ed56   Deepa Dinamani   signal: Add resto...
2035
  		ret = -ERESTARTNOHAND;
fa2e62a54   Deepa Dinamani   io_getevents: Use...
2036

7a074e96d   Christoph Hellwig   aio: implement io...
2037
  	return ret;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2038
  }
c00d2c7e8   Al Viro   move aio compat t...
2039

7a35397f8   Deepa Dinamani   io_pgetevents: us...
2040
2041
2042
  #endif
  
  #if defined(CONFIG_COMPAT_32BIT_TIME)
8dabe7245   Arnd Bergmann   y2038: syscalls: ...
2043
2044
2045
2046
2047
  SYSCALL_DEFINE5(io_getevents_time32, __u32, ctx_id,
  		__s32, min_nr,
  		__s32, nr,
  		struct io_event __user *, events,
  		struct old_timespec32 __user *, timeout)
c00d2c7e8   Al Viro   move aio compat t...
2048
  {
fa2e62a54   Deepa Dinamani   io_getevents: Use...
2049
  	struct timespec64 t;
7a074e96d   Christoph Hellwig   aio: implement io...
2050
  	int ret;
9afc5eee6   Arnd Bergmann   y2038: globally r...
2051
  	if (timeout && get_old_timespec32(&t, timeout))
7a074e96d   Christoph Hellwig   aio: implement io...
2052
2053
2054
2055
2056
2057
2058
  		return -EFAULT;
  
  	ret = do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &t : NULL);
  	if (!ret && signal_pending(current))
  		ret = -EINTR;
  	return ret;
  }
7a35397f8   Deepa Dinamani   io_pgetevents: us...
2059
2060
2061
  #endif
  
  #ifdef CONFIG_COMPAT
c00d2c7e8   Al Viro   move aio compat t...
2062

7a074e96d   Christoph Hellwig   aio: implement io...
2063
  struct __compat_aio_sigset {
97eba80fc   Guillem Jover   aio: Fix io_pgete...
2064
  	compat_uptr_t		sigmask;
7a074e96d   Christoph Hellwig   aio: implement io...
2065
2066
  	compat_size_t		sigsetsize;
  };
7a35397f8   Deepa Dinamani   io_pgetevents: us...
2067
  #if defined(CONFIG_COMPAT_32BIT_TIME)
7a074e96d   Christoph Hellwig   aio: implement io...
2068
2069
2070
2071
2072
  COMPAT_SYSCALL_DEFINE6(io_pgetevents,
  		compat_aio_context_t, ctx_id,
  		compat_long_t, min_nr,
  		compat_long_t, nr,
  		struct io_event __user *, events,
9afc5eee6   Arnd Bergmann   y2038: globally r...
2073
  		struct old_timespec32 __user *, timeout,
7a074e96d   Christoph Hellwig   aio: implement io...
2074
2075
  		const struct __compat_aio_sigset __user *, usig)
  {
97eba80fc   Guillem Jover   aio: Fix io_pgete...
2076
  	struct __compat_aio_sigset ksig = { 0, };
7a074e96d   Christoph Hellwig   aio: implement io...
2077
  	struct timespec64 t;
97abc889e   Oleg Nesterov   signal: remove th...
2078
  	bool interrupted;
7a074e96d   Christoph Hellwig   aio: implement io...
2079
  	int ret;
9afc5eee6   Arnd Bergmann   y2038: globally r...
2080
  	if (timeout && get_old_timespec32(&t, timeout))
7a074e96d   Christoph Hellwig   aio: implement io...
2081
2082
2083
2084
  		return -EFAULT;
  
  	if (usig && copy_from_user(&ksig, usig, sizeof(ksig)))
  		return -EFAULT;
97eba80fc   Guillem Jover   aio: Fix io_pgete...
2085
  	ret = set_compat_user_sigmask(compat_ptr(ksig.sigmask), ksig.sigsetsize);
ded653ccb   Deepa Dinamani   signal: Add set_u...
2086
2087
  	if (ret)
  		return ret;
c00d2c7e8   Al Viro   move aio compat t...
2088

7a074e96d   Christoph Hellwig   aio: implement io...
2089
  	ret = do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &t : NULL);
97abc889e   Oleg Nesterov   signal: remove th...
2090
2091
  
  	interrupted = signal_pending(current);
b772434be   Oleg Nesterov   signal: simplify ...
2092
  	restore_saved_sigmask_unless(interrupted);
97abc889e   Oleg Nesterov   signal: remove th...
2093
  	if (interrupted && !ret)
854a6ed56   Deepa Dinamani   signal: Add resto...
2094
  		ret = -ERESTARTNOHAND;
fa2e62a54   Deepa Dinamani   io_getevents: Use...
2095

7a074e96d   Christoph Hellwig   aio: implement io...
2096
  	return ret;
c00d2c7e8   Al Viro   move aio compat t...
2097
  }
7a35397f8   Deepa Dinamani   io_pgetevents: us...
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
  
  #endif
  
  COMPAT_SYSCALL_DEFINE6(io_pgetevents_time64,
  		compat_aio_context_t, ctx_id,
  		compat_long_t, min_nr,
  		compat_long_t, nr,
  		struct io_event __user *, events,
  		struct __kernel_timespec __user *, timeout,
  		const struct __compat_aio_sigset __user *, usig)
  {
97eba80fc   Guillem Jover   aio: Fix io_pgete...
2109
  	struct __compat_aio_sigset ksig = { 0, };
7a35397f8   Deepa Dinamani   io_pgetevents: us...
2110
  	struct timespec64 t;
97abc889e   Oleg Nesterov   signal: remove th...
2111
  	bool interrupted;
7a35397f8   Deepa Dinamani   io_pgetevents: us...
2112
2113
2114
2115
2116
2117
2118
  	int ret;
  
  	if (timeout && get_timespec64(&t, timeout))
  		return -EFAULT;
  
  	if (usig && copy_from_user(&ksig, usig, sizeof(ksig)))
  		return -EFAULT;
97eba80fc   Guillem Jover   aio: Fix io_pgete...
2119
  	ret = set_compat_user_sigmask(compat_ptr(ksig.sigmask), ksig.sigsetsize);
7a35397f8   Deepa Dinamani   io_pgetevents: us...
2120
2121
2122
2123
  	if (ret)
  		return ret;
  
  	ret = do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &t : NULL);
97abc889e   Oleg Nesterov   signal: remove th...
2124
2125
  
  	interrupted = signal_pending(current);
b772434be   Oleg Nesterov   signal: simplify ...
2126
  	restore_saved_sigmask_unless(interrupted);
97abc889e   Oleg Nesterov   signal: remove th...
2127
  	if (interrupted && !ret)
7a35397f8   Deepa Dinamani   io_pgetevents: us...
2128
  		ret = -ERESTARTNOHAND;
fa2e62a54   Deepa Dinamani   io_getevents: Use...
2129

7a074e96d   Christoph Hellwig   aio: implement io...
2130
  	return ret;
c00d2c7e8   Al Viro   move aio compat t...
2131
2132
  }
  #endif