Blame view

fs/aio.c 56.4 KB
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1
2
3
4
5
6
7
  /*
   *	An async IO implementation for Linux
   *	Written by Benjamin LaHaise <bcrl@kvack.org>
   *
   *	Implements an efficient asynchronous io interface.
   *
   *	Copyright 2000, 2001, 2002 Red Hat, Inc.  All Rights Reserved.
bfe4037e7   Christoph Hellwig   aio: implement IO...
8
   *	Copyright 2018 Christoph Hellwig.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
9
10
11
   *
   *	See ../COPYING for licensing terms.
   */
caf4167aa   Kent Overstreet   aio: dprintk() ->...
12
  #define pr_fmt(fmt) "%s: " fmt, __func__
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
13
14
15
16
17
  #include <linux/kernel.h>
  #include <linux/init.h>
  #include <linux/errno.h>
  #include <linux/time.h>
  #include <linux/aio_abi.h>
630d9c472   Paul Gortmaker   fs: reduce the us...
18
  #include <linux/export.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
19
  #include <linux/syscalls.h>
b9d128f10   Jens Axboe   block: move bdi/a...
20
  #include <linux/backing-dev.h>
9018ccc45   Christoph Hellwig   aio: add a iocb r...
21
  #include <linux/refcount.h>
027445c37   Badari Pulavarty   [PATCH] Vectorize...
22
  #include <linux/uio.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
23

174cd4b1e   Ingo Molnar   sched/headers: Pr...
24
  #include <linux/sched/signal.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
25
26
27
28
  #include <linux/fs.h>
  #include <linux/file.h>
  #include <linux/mm.h>
  #include <linux/mman.h>
e1bdd5f27   Kent Overstreet   aio: percpu reqs_...
29
  #include <linux/percpu.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
30
31
32
33
34
35
  #include <linux/slab.h>
  #include <linux/timer.h>
  #include <linux/aio.h>
  #include <linux/highmem.h>
  #include <linux/workqueue.h>
  #include <linux/security.h>
9c3060bed   Davide Libenzi   signal/timer/even...
36
  #include <linux/eventfd.h>
cfb1e33ee   Jeff Moyer   aio: implement re...
37
  #include <linux/blkdev.h>
9d85cba71   Jeff Moyer   aio: fix the comp...
38
  #include <linux/compat.h>
36bc08cc0   Gu Zheng   fs/aio: Add suppo...
39
40
  #include <linux/migrate.h>
  #include <linux/ramfs.h>
723be6e39   Kent Overstreet   aio: percpu ioctx...
41
  #include <linux/percpu-refcount.h>
71ad7490c   Benjamin LaHaise   rework aio migrat...
42
  #include <linux/mount.h>
52db59df1   David Howells   vfs: Convert aio ...
43
  #include <linux/pseudo_fs.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
44
45
  
  #include <asm/kmap_types.h>
7c0f6ba68   Linus Torvalds   Replace <asm/uacc...
46
  #include <linux/uaccess.h>
a538e3ff9   Jeff Moyer   aio: fix spectre ...
47
  #include <linux/nospec.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
48

68d70d03f   Al Viro   constify rw_verif...
49
  #include "internal.h"
f3a2752a4   Christoph Hellwig   aio: simplify KIO...
50
  #define KIOCB_KEY		0
4e179bca6   Kent Overstreet   aio: move private...
51
52
53
54
55
56
  #define AIO_RING_MAGIC			0xa10a10a1
  #define AIO_RING_COMPAT_FEATURES	1
  #define AIO_RING_INCOMPAT_FEATURES	0
  struct aio_ring {
  	unsigned	id;	/* kernel internal index number */
  	unsigned	nr;	/* number of io_events */
fa8a53c39   Benjamin LaHaise   aio: v4 ensure ac...
57
58
  	unsigned	head;	/* Written to by userland or under ring_lock
  				 * mutex by aio_read_events_ring(). */
4e179bca6   Kent Overstreet   aio: move private...
59
60
61
62
63
64
  	unsigned	tail;
  
  	unsigned	magic;
  	unsigned	compat_features;
  	unsigned	incompat_features;
  	unsigned	header_length;	/* size of aio_ring */
241cb28e3   Gustavo A. R. Silva   aio: Replace zero...
65
  	struct io_event		io_events[];
4e179bca6   Kent Overstreet   aio: move private...
66
  }; /* 128 bytes + ring size */
a79d40e9b   Jens Axboe   aio: only use blk...
67
68
69
70
71
  /*
   * Plugging is meant to work with larger batches of IOs. If we don't
   * have more than the below, then don't bother setting up a plug.
   */
  #define AIO_PLUG_THRESHOLD	2
4e179bca6   Kent Overstreet   aio: move private...
72
  #define AIO_RING_PAGES	8
4e179bca6   Kent Overstreet   aio: move private...
73

db446a08c   Benjamin LaHaise   aio: convert the ...
74
  struct kioctx_table {
d0264c01e   Tejun Heo   fs/aio: Use RCU a...
75
76
77
  	struct rcu_head		rcu;
  	unsigned		nr;
  	struct kioctx __rcu	*table[];
db446a08c   Benjamin LaHaise   aio: convert the ...
78
  };
e1bdd5f27   Kent Overstreet   aio: percpu reqs_...
79
80
81
  struct kioctx_cpu {
  	unsigned		reqs_available;
  };
dc48e56d7   Jens Axboe   aio: fix serial d...
82
83
84
85
  struct ctx_rq_wait {
  	struct completion comp;
  	atomic_t count;
  };
4e179bca6   Kent Overstreet   aio: move private...
86
  struct kioctx {
723be6e39   Kent Overstreet   aio: percpu ioctx...
87
  	struct percpu_ref	users;
36f558890   Kent Overstreet   aio: refcounting ...
88
  	atomic_t		dead;
4e179bca6   Kent Overstreet   aio: move private...
89

e34ecee2a   Kent Overstreet   aio: Fix a trinit...
90
  	struct percpu_ref	reqs;
4e179bca6   Kent Overstreet   aio: move private...
91
  	unsigned long		user_id;
4e179bca6   Kent Overstreet   aio: move private...
92

e1bdd5f27   Kent Overstreet   aio: percpu reqs_...
93
94
95
96
97
98
99
  	struct __percpu kioctx_cpu *cpu;
  
  	/*
  	 * For percpu reqs_available, number of slots we move to/from global
  	 * counter at a time:
  	 */
  	unsigned		req_batch;
3e845ce01   Kent Overstreet   aio: change reqs_...
100
101
102
103
  	/*
  	 * This is what userspace passed to io_setup(), it's not used for
  	 * anything but counting against the global max_reqs quota.
  	 *
58c85dc20   Kent Overstreet   aio: kill struct ...
104
  	 * The real limit is nr_events - 1, which will be larger (see
3e845ce01   Kent Overstreet   aio: change reqs_...
105
106
  	 * aio_setup_ring())
  	 */
4e179bca6   Kent Overstreet   aio: move private...
107
  	unsigned		max_reqs;
58c85dc20   Kent Overstreet   aio: kill struct ...
108
109
  	/* Size of ringbuffer, in units of struct io_event */
  	unsigned		nr_events;
4e179bca6   Kent Overstreet   aio: move private...
110

58c85dc20   Kent Overstreet   aio: kill struct ...
111
112
113
114
115
  	unsigned long		mmap_base;
  	unsigned long		mmap_size;
  
  	struct page		**ring_pages;
  	long			nr_pages;
f729863a8   Tejun Heo   fs/aio: Use rcu_w...
116
  	struct rcu_work		free_rwork;	/* see free_ioctx() */
4e23bcaeb   Kent Overstreet   aio: give shared ...
117

e02ba72aa   Anatol Pomozov   aio: block io_des...
118
119
120
  	/*
  	 * signals when all in-flight requests are done
  	 */
dc48e56d7   Jens Axboe   aio: fix serial d...
121
  	struct ctx_rq_wait	*rq_wait;
e02ba72aa   Anatol Pomozov   aio: block io_des...
122

4e23bcaeb   Kent Overstreet   aio: give shared ...
123
  	struct {
34e83fc61   Kent Overstreet   aio: reqs_active ...
124
125
126
127
128
  		/*
  		 * This counts the number of available slots in the ringbuffer,
  		 * so we avoid overflowing it: it's decremented (if positive)
  		 * when allocating a kiocb and incremented when the resulting
  		 * io_event is pulled off the ringbuffer.
e1bdd5f27   Kent Overstreet   aio: percpu reqs_...
129
130
  		 *
  		 * We batch accesses to it with a percpu version.
34e83fc61   Kent Overstreet   aio: reqs_active ...
131
132
  		 */
  		atomic_t	reqs_available;
4e23bcaeb   Kent Overstreet   aio: give shared ...
133
134
135
136
137
138
  	} ____cacheline_aligned_in_smp;
  
  	struct {
  		spinlock_t	ctx_lock;
  		struct list_head active_reqs;	/* used for cancellation */
  	} ____cacheline_aligned_in_smp;
58c85dc20   Kent Overstreet   aio: kill struct ...
139
140
  	struct {
  		struct mutex	ring_lock;
4e23bcaeb   Kent Overstreet   aio: give shared ...
141
142
  		wait_queue_head_t wait;
  	} ____cacheline_aligned_in_smp;
58c85dc20   Kent Overstreet   aio: kill struct ...
143
144
145
  
  	struct {
  		unsigned	tail;
d856f32a8   Benjamin LaHaise   aio: fix reqs_ava...
146
  		unsigned	completed_events;
58c85dc20   Kent Overstreet   aio: kill struct ...
147
  		spinlock_t	completion_lock;
4e23bcaeb   Kent Overstreet   aio: give shared ...
148
  	} ____cacheline_aligned_in_smp;
58c85dc20   Kent Overstreet   aio: kill struct ...
149
150
  
  	struct page		*internal_pages[AIO_RING_PAGES];
36bc08cc0   Gu Zheng   fs/aio: Add suppo...
151
  	struct file		*aio_ring_file;
db446a08c   Benjamin LaHaise   aio: convert the ...
152
153
  
  	unsigned		id;
4e179bca6   Kent Overstreet   aio: move private...
154
  };
84c4e1f89   Linus Torvalds   aio: simplify - a...
155
156
157
158
  /*
   * First field must be the file pointer in all the
   * iocb unions! See also 'struct kiocb' in <linux/fs.h>
   */
a3c0d439e   Christoph Hellwig   aio: implement IO...
159
  struct fsync_iocb {
a3c0d439e   Christoph Hellwig   aio: implement IO...
160
  	struct file		*file;
84c4e1f89   Linus Torvalds   aio: simplify - a...
161
  	struct work_struct	work;
a3c0d439e   Christoph Hellwig   aio: implement IO...
162
  	bool			datasync;
530f32fc3   Miklos Szeredi   aio: fix async fs...
163
  	struct cred		*creds;
a3c0d439e   Christoph Hellwig   aio: implement IO...
164
  };
bfe4037e7   Christoph Hellwig   aio: implement IO...
165
166
167
168
  struct poll_iocb {
  	struct file		*file;
  	struct wait_queue_head	*head;
  	__poll_t		events;
af5c72b1f   Al Viro   Fix aio_poll() races
169
  	bool			done;
bfe4037e7   Christoph Hellwig   aio: implement IO...
170
171
172
173
  	bool			cancelled;
  	struct wait_queue_entry	wait;
  	struct work_struct	work;
  };
84c4e1f89   Linus Torvalds   aio: simplify - a...
174
175
176
177
178
179
  /*
   * NOTE! Each of the iocb union members has the file pointer
   * as the first entry in their struct definition. So you can
   * access the file pointer through any of the sub-structs,
   * or directly as just 'ki_filp' in this struct.
   */
04b2fa9f8   Christoph Hellwig   fs: split generic...
180
  struct aio_kiocb {
54843f875   Christoph Hellwig   aio: refactor rea...
181
  	union {
84c4e1f89   Linus Torvalds   aio: simplify - a...
182
  		struct file		*ki_filp;
54843f875   Christoph Hellwig   aio: refactor rea...
183
  		struct kiocb		rw;
a3c0d439e   Christoph Hellwig   aio: implement IO...
184
  		struct fsync_iocb	fsync;
bfe4037e7   Christoph Hellwig   aio: implement IO...
185
  		struct poll_iocb	poll;
54843f875   Christoph Hellwig   aio: refactor rea...
186
  	};
04b2fa9f8   Christoph Hellwig   fs: split generic...
187
188
189
  
  	struct kioctx		*ki_ctx;
  	kiocb_cancel_fn		*ki_cancel;
a9339b785   Al Viro   aio: keep io_even...
190
  	struct io_event		ki_res;
04b2fa9f8   Christoph Hellwig   fs: split generic...
191
192
193
  
  	struct list_head	ki_list;	/* the aio core uses this
  						 * for cancellation */
9018ccc45   Christoph Hellwig   aio: add a iocb r...
194
  	refcount_t		ki_refcnt;
04b2fa9f8   Christoph Hellwig   fs: split generic...
195
196
197
198
199
200
201
  
  	/*
  	 * If the aio_resfd field of the userspace iocb is not zero,
  	 * this is the underlying eventfd context to deliver events to.
  	 */
  	struct eventfd_ctx	*ki_eventfd;
  };
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
202
  /*------ sysctl variables----*/
d55b5fdaf   Zach Brown   [PATCH] aio: remo...
203
204
205
  static DEFINE_SPINLOCK(aio_nr_lock);
  unsigned long aio_nr;		/* current system wide number of aio requests */
  unsigned long aio_max_nr = 0x10000; /* system wide maximum number of aio requests */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
206
  /*----end sysctl variables---*/
e18b890bb   Christoph Lameter   [PATCH] slab: rem...
207
208
  static struct kmem_cache	*kiocb_cachep;
  static struct kmem_cache	*kioctx_cachep;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
209

71ad7490c   Benjamin LaHaise   rework aio migrat...
210
211
212
213
214
215
216
  static struct vfsmount *aio_mnt;
  
  static const struct file_operations aio_ring_fops;
  static const struct address_space_operations aio_ctx_aops;
  
  static struct file *aio_private_file(struct kioctx *ctx, loff_t nr_pages)
  {
71ad7490c   Benjamin LaHaise   rework aio migrat...
217
  	struct file *file;
71ad7490c   Benjamin LaHaise   rework aio migrat...
218
  	struct inode *inode = alloc_anon_inode(aio_mnt->mnt_sb);
7f62656be   Dan Carpenter   aio: checking for...
219
220
  	if (IS_ERR(inode))
  		return ERR_CAST(inode);
71ad7490c   Benjamin LaHaise   rework aio migrat...
221
222
223
224
  
  	inode->i_mapping->a_ops = &aio_ctx_aops;
  	inode->i_mapping->private_data = ctx;
  	inode->i_size = PAGE_SIZE * nr_pages;
d93aa9d82   Al Viro   new wrapper: allo...
225
226
  	file = alloc_file_pseudo(inode, aio_mnt, "[aio]",
  				O_RDWR, &aio_ring_fops);
c9c554f21   Al Viro   alloc_file(): swi...
227
  	if (IS_ERR(file))
71ad7490c   Benjamin LaHaise   rework aio migrat...
228
  		iput(inode);
71ad7490c   Benjamin LaHaise   rework aio migrat...
229
230
  	return file;
  }
52db59df1   David Howells   vfs: Convert aio ...
231
  static int aio_init_fs_context(struct fs_context *fc)
71ad7490c   Benjamin LaHaise   rework aio migrat...
232
  {
52db59df1   David Howells   vfs: Convert aio ...
233
234
235
236
  	if (!init_pseudo(fc, AIO_RING_MAGIC))
  		return -ENOMEM;
  	fc->s_iflags |= SB_I_NOEXEC;
  	return 0;
71ad7490c   Benjamin LaHaise   rework aio migrat...
237
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
238
239
240
241
242
243
  /* aio_setup
   *	Creates the slab caches used by the aio routines, panic on
   *	failure as this is done early during the boot sequence.
   */
  static int __init aio_setup(void)
  {
71ad7490c   Benjamin LaHaise   rework aio migrat...
244
245
  	static struct file_system_type aio_fs = {
  		.name		= "aio",
52db59df1   David Howells   vfs: Convert aio ...
246
  		.init_fs_context = aio_init_fs_context,
71ad7490c   Benjamin LaHaise   rework aio migrat...
247
248
249
250
251
  		.kill_sb	= kill_anon_super,
  	};
  	aio_mnt = kern_mount(&aio_fs);
  	if (IS_ERR(aio_mnt))
  		panic("Failed to create aio fs mount.");
04b2fa9f8   Christoph Hellwig   fs: split generic...
252
  	kiocb_cachep = KMEM_CACHE(aio_kiocb, SLAB_HWCACHE_ALIGN|SLAB_PANIC);
0a31bd5f2   Christoph Lameter   KMEM_CACHE(): sim...
253
  	kioctx_cachep = KMEM_CACHE(kioctx,SLAB_HWCACHE_ALIGN|SLAB_PANIC);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
254
255
  	return 0;
  }
385773e04   H Hartley Sweeten   aio.c: move EXPOR...
256
  __initcall(aio_setup);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
257

5e9ae2e5d   Benjamin LaHaise   aio: fix use-afte...
258
259
260
  static void put_aio_ring_file(struct kioctx *ctx)
  {
  	struct file *aio_ring_file = ctx->aio_ring_file;
de04e7693   Rasmus Villemoes   fs/aio.c: elimina...
261
  	struct address_space *i_mapping;
5e9ae2e5d   Benjamin LaHaise   aio: fix use-afte...
262
  	if (aio_ring_file) {
450630975   Al Viro   don't open-code f...
263
  		truncate_setsize(file_inode(aio_ring_file), 0);
5e9ae2e5d   Benjamin LaHaise   aio: fix use-afte...
264
265
  
  		/* Prevent further access to the kioctx from migratepages */
450630975   Al Viro   don't open-code f...
266
  		i_mapping = aio_ring_file->f_mapping;
de04e7693   Rasmus Villemoes   fs/aio.c: elimina...
267
268
  		spin_lock(&i_mapping->private_lock);
  		i_mapping->private_data = NULL;
5e9ae2e5d   Benjamin LaHaise   aio: fix use-afte...
269
  		ctx->aio_ring_file = NULL;
de04e7693   Rasmus Villemoes   fs/aio.c: elimina...
270
  		spin_unlock(&i_mapping->private_lock);
5e9ae2e5d   Benjamin LaHaise   aio: fix use-afte...
271
272
273
274
  
  		fput(aio_ring_file);
  	}
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
275
276
  static void aio_free_ring(struct kioctx *ctx)
  {
36bc08cc0   Gu Zheng   fs/aio: Add suppo...
277
  	int i;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
278

fa8a53c39   Benjamin LaHaise   aio: v4 ensure ac...
279
280
281
282
  	/* Disconnect the kiotx from the ring file.  This prevents future
  	 * accesses to the kioctx from page migration.
  	 */
  	put_aio_ring_file(ctx);
36bc08cc0   Gu Zheng   fs/aio: Add suppo...
283
  	for (i = 0; i < ctx->nr_pages; i++) {
8e321fefb   Benjamin LaHaise   aio/migratepages:...
284
  		struct page *page;
36bc08cc0   Gu Zheng   fs/aio: Add suppo...
285
286
287
  		pr_debug("pid(%d) [%d] page->count=%d
  ", current->pid, i,
  				page_count(ctx->ring_pages[i]));
8e321fefb   Benjamin LaHaise   aio/migratepages:...
288
289
290
291
292
  		page = ctx->ring_pages[i];
  		if (!page)
  			continue;
  		ctx->ring_pages[i] = NULL;
  		put_page(page);
36bc08cc0   Gu Zheng   fs/aio: Add suppo...
293
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
294

ddb8c45ba   Sasha Levin   aio: nullify aio-...
295
  	if (ctx->ring_pages && ctx->ring_pages != ctx->internal_pages) {
58c85dc20   Kent Overstreet   aio: kill struct ...
296
  		kfree(ctx->ring_pages);
ddb8c45ba   Sasha Levin   aio: nullify aio-...
297
298
  		ctx->ring_pages = NULL;
  	}
36bc08cc0   Gu Zheng   fs/aio: Add suppo...
299
  }
5477e70a6   Oleg Nesterov   mm: move ->mremap...
300
  static int aio_ring_mremap(struct vm_area_struct *vma)
e4a0d3e72   Pavel Emelyanov   aio: Make it poss...
301
  {
5477e70a6   Oleg Nesterov   mm: move ->mremap...
302
  	struct file *file = vma->vm_file;
e4a0d3e72   Pavel Emelyanov   aio: Make it poss...
303
304
  	struct mm_struct *mm = vma->vm_mm;
  	struct kioctx_table *table;
b2edffdd9   Al Viro   fix mremap() vs. ...
305
  	int i, res = -EINVAL;
e4a0d3e72   Pavel Emelyanov   aio: Make it poss...
306
307
308
309
310
311
  
  	spin_lock(&mm->ioctx_lock);
  	rcu_read_lock();
  	table = rcu_dereference(mm->ioctx_table);
  	for (i = 0; i < table->nr; i++) {
  		struct kioctx *ctx;
d0264c01e   Tejun Heo   fs/aio: Use RCU a...
312
  		ctx = rcu_dereference(table->table[i]);
e4a0d3e72   Pavel Emelyanov   aio: Make it poss...
313
  		if (ctx && ctx->aio_ring_file == file) {
b2edffdd9   Al Viro   fix mremap() vs. ...
314
315
316
317
  			if (!atomic_read(&ctx->dead)) {
  				ctx->user_id = ctx->mmap_base = vma->vm_start;
  				res = 0;
  			}
e4a0d3e72   Pavel Emelyanov   aio: Make it poss...
318
319
320
321
322
323
  			break;
  		}
  	}
  
  	rcu_read_unlock();
  	spin_unlock(&mm->ioctx_lock);
b2edffdd9   Al Viro   fix mremap() vs. ...
324
  	return res;
e4a0d3e72   Pavel Emelyanov   aio: Make it poss...
325
  }
5477e70a6   Oleg Nesterov   mm: move ->mremap...
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
  static const struct vm_operations_struct aio_ring_vm_ops = {
  	.mremap		= aio_ring_mremap,
  #if IS_ENABLED(CONFIG_MMU)
  	.fault		= filemap_fault,
  	.map_pages	= filemap_map_pages,
  	.page_mkwrite	= filemap_page_mkwrite,
  #endif
  };
  
  static int aio_ring_mmap(struct file *file, struct vm_area_struct *vma)
  {
  	vma->vm_flags |= VM_DONTEXPAND;
  	vma->vm_ops = &aio_ring_vm_ops;
  	return 0;
  }
36bc08cc0   Gu Zheng   fs/aio: Add suppo...
341
342
343
  static const struct file_operations aio_ring_fops = {
  	.mmap = aio_ring_mmap,
  };
0c45355fc   Benjamin LaHaise   aio: fix build wh...
344
  #if IS_ENABLED(CONFIG_MIGRATION)
36bc08cc0   Gu Zheng   fs/aio: Add suppo...
345
346
347
  static int aio_migratepage(struct address_space *mapping, struct page *new,
  			struct page *old, enum migrate_mode mode)
  {
5e9ae2e5d   Benjamin LaHaise   aio: fix use-afte...
348
  	struct kioctx *ctx;
36bc08cc0   Gu Zheng   fs/aio: Add suppo...
349
  	unsigned long flags;
fa8a53c39   Benjamin LaHaise   aio: v4 ensure ac...
350
  	pgoff_t idx;
36bc08cc0   Gu Zheng   fs/aio: Add suppo...
351
  	int rc;
2916ecc0f   Jérôme Glisse   mm/migrate: new m...
352
353
354
355
356
357
358
  	/*
  	 * We cannot support the _NO_COPY case here, because copy needs to
  	 * happen under the ctx->completion_lock. That does not work with the
  	 * migration workflow of MIGRATE_SYNC_NO_COPY.
  	 */
  	if (mode == MIGRATE_SYNC_NO_COPY)
  		return -EINVAL;
8e321fefb   Benjamin LaHaise   aio/migratepages:...
359
  	rc = 0;
fa8a53c39   Benjamin LaHaise   aio: v4 ensure ac...
360
  	/* mapping->private_lock here protects against the kioctx teardown.  */
8e321fefb   Benjamin LaHaise   aio/migratepages:...
361
362
  	spin_lock(&mapping->private_lock);
  	ctx = mapping->private_data;
fa8a53c39   Benjamin LaHaise   aio: v4 ensure ac...
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
  	if (!ctx) {
  		rc = -EINVAL;
  		goto out;
  	}
  
  	/* The ring_lock mutex.  The prevents aio_read_events() from writing
  	 * to the ring's head, and prevents page migration from mucking in
  	 * a partially initialized kiotx.
  	 */
  	if (!mutex_trylock(&ctx->ring_lock)) {
  		rc = -EAGAIN;
  		goto out;
  	}
  
  	idx = old->index;
  	if (idx < (pgoff_t)ctx->nr_pages) {
  		/* Make sure the old page hasn't already been changed */
  		if (ctx->ring_pages[idx] != old)
  			rc = -EAGAIN;
8e321fefb   Benjamin LaHaise   aio/migratepages:...
382
383
  	} else
  		rc = -EINVAL;
8e321fefb   Benjamin LaHaise   aio/migratepages:...
384
385
  
  	if (rc != 0)
fa8a53c39   Benjamin LaHaise   aio: v4 ensure ac...
386
  		goto out_unlock;
8e321fefb   Benjamin LaHaise   aio/migratepages:...
387

36bc08cc0   Gu Zheng   fs/aio: Add suppo...
388
389
  	/* Writeback must be complete */
  	BUG_ON(PageWriteback(old));
8e321fefb   Benjamin LaHaise   aio/migratepages:...
390
  	get_page(new);
36bc08cc0   Gu Zheng   fs/aio: Add suppo...
391

371096949   Keith Busch   mm: migrate: remo...
392
  	rc = migrate_page_move_mapping(mapping, new, old, 1);
36bc08cc0   Gu Zheng   fs/aio: Add suppo...
393
  	if (rc != MIGRATEPAGE_SUCCESS) {
8e321fefb   Benjamin LaHaise   aio/migratepages:...
394
  		put_page(new);
fa8a53c39   Benjamin LaHaise   aio: v4 ensure ac...
395
  		goto out_unlock;
36bc08cc0   Gu Zheng   fs/aio: Add suppo...
396
  	}
fa8a53c39   Benjamin LaHaise   aio: v4 ensure ac...
397
398
399
  	/* Take completion_lock to prevent other writes to the ring buffer
  	 * while the old page is copied to the new.  This prevents new
  	 * events from being lost.
5e9ae2e5d   Benjamin LaHaise   aio: fix use-afte...
400
  	 */
fa8a53c39   Benjamin LaHaise   aio: v4 ensure ac...
401
402
403
404
405
  	spin_lock_irqsave(&ctx->completion_lock, flags);
  	migrate_page_copy(new, old);
  	BUG_ON(ctx->ring_pages[idx] != old);
  	ctx->ring_pages[idx] = new;
  	spin_unlock_irqrestore(&ctx->completion_lock, flags);
36bc08cc0   Gu Zheng   fs/aio: Add suppo...
406

fa8a53c39   Benjamin LaHaise   aio: v4 ensure ac...
407
408
  	/* The old page is no longer accessible. */
  	put_page(old);
8e321fefb   Benjamin LaHaise   aio/migratepages:...
409

fa8a53c39   Benjamin LaHaise   aio: v4 ensure ac...
410
411
412
413
  out_unlock:
  	mutex_unlock(&ctx->ring_lock);
  out:
  	spin_unlock(&mapping->private_lock);
36bc08cc0   Gu Zheng   fs/aio: Add suppo...
414
  	return rc;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
415
  }
0c45355fc   Benjamin LaHaise   aio: fix build wh...
416
  #endif
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
417

36bc08cc0   Gu Zheng   fs/aio: Add suppo...
418
  static const struct address_space_operations aio_ctx_aops = {
835f252c6   Gu Zheng   aio: fix uncorren...
419
  	.set_page_dirty = __set_page_dirty_no_writeback,
0c45355fc   Benjamin LaHaise   aio: fix build wh...
420
  #if IS_ENABLED(CONFIG_MIGRATION)
36bc08cc0   Gu Zheng   fs/aio: Add suppo...
421
  	.migratepage	= aio_migratepage,
0c45355fc   Benjamin LaHaise   aio: fix build wh...
422
  #endif
36bc08cc0   Gu Zheng   fs/aio: Add suppo...
423
  };
2a8a98673   Mauricio Faria de Oliveira   fs: aio: fix the ...
424
  static int aio_setup_ring(struct kioctx *ctx, unsigned int nr_events)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
425
426
  {
  	struct aio_ring *ring;
41003a7bc   Zach Brown   aio: remove retry...
427
  	struct mm_struct *mm = current->mm;
3dc9acb67   Linus Torvalds   aio: clean up and...
428
  	unsigned long size, unused;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
429
  	int nr_pages;
36bc08cc0   Gu Zheng   fs/aio: Add suppo...
430
431
  	int i;
  	struct file *file;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
432
433
434
435
436
437
  
  	/* Compensate for the ring buffer's head/tail overlap entry */
  	nr_events += 2;	/* 1 is required, 2 for good luck */
  
  	size = sizeof(struct aio_ring);
  	size += sizeof(struct io_event) * nr_events;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
438

36bc08cc0   Gu Zheng   fs/aio: Add suppo...
439
  	nr_pages = PFN_UP(size);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
440
441
  	if (nr_pages < 0)
  		return -EINVAL;
71ad7490c   Benjamin LaHaise   rework aio migrat...
442
  	file = aio_private_file(ctx, nr_pages);
36bc08cc0   Gu Zheng   fs/aio: Add suppo...
443
444
  	if (IS_ERR(file)) {
  		ctx->aio_ring_file = NULL;
fa8a53c39   Benjamin LaHaise   aio: v4 ensure ac...
445
  		return -ENOMEM;
36bc08cc0   Gu Zheng   fs/aio: Add suppo...
446
  	}
3dc9acb67   Linus Torvalds   aio: clean up and...
447
448
449
450
451
452
453
454
455
456
457
458
459
  	ctx->aio_ring_file = file;
  	nr_events = (PAGE_SIZE * nr_pages - sizeof(struct aio_ring))
  			/ sizeof(struct io_event);
  
  	ctx->ring_pages = ctx->internal_pages;
  	if (nr_pages > AIO_RING_PAGES) {
  		ctx->ring_pages = kcalloc(nr_pages, sizeof(struct page *),
  					  GFP_KERNEL);
  		if (!ctx->ring_pages) {
  			put_aio_ring_file(ctx);
  			return -ENOMEM;
  		}
  	}
36bc08cc0   Gu Zheng   fs/aio: Add suppo...
460
461
  	for (i = 0; i < nr_pages; i++) {
  		struct page *page;
450630975   Al Viro   don't open-code f...
462
  		page = find_or_create_page(file->f_mapping,
36bc08cc0   Gu Zheng   fs/aio: Add suppo...
463
464
465
466
467
468
469
  					   i, GFP_HIGHUSER | __GFP_ZERO);
  		if (!page)
  			break;
  		pr_debug("pid(%d) page[%d]->count=%d
  ",
  			 current->pid, i, page_count(page));
  		SetPageUptodate(page);
36bc08cc0   Gu Zheng   fs/aio: Add suppo...
470
  		unlock_page(page);
3dc9acb67   Linus Torvalds   aio: clean up and...
471
472
  
  		ctx->ring_pages[i] = page;
36bc08cc0   Gu Zheng   fs/aio: Add suppo...
473
  	}
3dc9acb67   Linus Torvalds   aio: clean up and...
474
  	ctx->nr_pages = i;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
475

3dc9acb67   Linus Torvalds   aio: clean up and...
476
477
  	if (unlikely(i != nr_pages)) {
  		aio_free_ring(ctx);
fa8a53c39   Benjamin LaHaise   aio: v4 ensure ac...
478
  		return -ENOMEM;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
479
  	}
58c85dc20   Kent Overstreet   aio: kill struct ...
480
481
482
  	ctx->mmap_size = nr_pages * PAGE_SIZE;
  	pr_debug("attempting mmap of %lu bytes
  ", ctx->mmap_size);
36bc08cc0   Gu Zheng   fs/aio: Add suppo...
483

d8ed45c5d   Michel Lespinasse   mmap locking API:...
484
  	if (mmap_write_lock_killable(mm)) {
013373e8b   Michal Hocko   aio: make aio_set...
485
486
487
488
  		ctx->mmap_size = 0;
  		aio_free_ring(ctx);
  		return -EINTR;
  	}
45e55300f   Peter Collingbourne   mm: remove unnece...
489
490
491
  	ctx->mmap_base = do_mmap(ctx->aio_ring_file, 0, ctx->mmap_size,
  				 PROT_READ | PROT_WRITE,
  				 MAP_SHARED, 0, &unused, NULL);
d8ed45c5d   Michel Lespinasse   mmap locking API:...
492
  	mmap_write_unlock(mm);
58c85dc20   Kent Overstreet   aio: kill struct ...
493
  	if (IS_ERR((void *)ctx->mmap_base)) {
58c85dc20   Kent Overstreet   aio: kill struct ...
494
  		ctx->mmap_size = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
495
  		aio_free_ring(ctx);
fa8a53c39   Benjamin LaHaise   aio: v4 ensure ac...
496
  		return -ENOMEM;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
497
  	}
58c85dc20   Kent Overstreet   aio: kill struct ...
498
499
  	pr_debug("mmap address: 0x%08lx
  ", ctx->mmap_base);
d6c355c7d   Benjamin LaHaise   aio: fix race in ...
500

58c85dc20   Kent Overstreet   aio: kill struct ...
501
502
  	ctx->user_id = ctx->mmap_base;
  	ctx->nr_events = nr_events; /* trusted copy */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
503

58c85dc20   Kent Overstreet   aio: kill struct ...
504
  	ring = kmap_atomic(ctx->ring_pages[0]);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
505
  	ring->nr = nr_events;	/* user copy */
db446a08c   Benjamin LaHaise   aio: convert the ...
506
  	ring->id = ~0U;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
507
508
509
510
511
  	ring->head = ring->tail = 0;
  	ring->magic = AIO_RING_MAGIC;
  	ring->compat_features = AIO_RING_COMPAT_FEATURES;
  	ring->incompat_features = AIO_RING_INCOMPAT_FEATURES;
  	ring->header_length = sizeof(struct aio_ring);
e8e3c3d66   Cong Wang   fs: remove the se...
512
  	kunmap_atomic(ring);
58c85dc20   Kent Overstreet   aio: kill struct ...
513
  	flush_dcache_page(ctx->ring_pages[0]);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
514
515
516
  
  	return 0;
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
517
518
519
  #define AIO_EVENTS_PER_PAGE	(PAGE_SIZE / sizeof(struct io_event))
  #define AIO_EVENTS_FIRST_PAGE	((PAGE_SIZE - sizeof(struct aio_ring)) / sizeof(struct io_event))
  #define AIO_EVENTS_OFFSET	(AIO_EVENTS_PER_PAGE - AIO_EVENTS_FIRST_PAGE)
04b2fa9f8   Christoph Hellwig   fs: split generic...
520
  void kiocb_set_cancel_fn(struct kiocb *iocb, kiocb_cancel_fn *cancel)
0460fef2a   Kent Overstreet   aio: use cancella...
521
  {
54843f875   Christoph Hellwig   aio: refactor rea...
522
  	struct aio_kiocb *req = container_of(iocb, struct aio_kiocb, rw);
0460fef2a   Kent Overstreet   aio: use cancella...
523
524
  	struct kioctx *ctx = req->ki_ctx;
  	unsigned long flags;
75321b50a   Christoph Hellwig   aio: sanitize ki_...
525
526
  	if (WARN_ON_ONCE(!list_empty(&req->ki_list)))
  		return;
0460fef2a   Kent Overstreet   aio: use cancella...
527

75321b50a   Christoph Hellwig   aio: sanitize ki_...
528
529
  	spin_lock_irqsave(&ctx->ctx_lock, flags);
  	list_add_tail(&req->ki_list, &ctx->active_reqs);
0460fef2a   Kent Overstreet   aio: use cancella...
530
  	req->ki_cancel = cancel;
0460fef2a   Kent Overstreet   aio: use cancella...
531
532
533
  	spin_unlock_irqrestore(&ctx->ctx_lock, flags);
  }
  EXPORT_SYMBOL(kiocb_set_cancel_fn);
a6d7cff47   Tejun Heo   fs/aio: Add expli...
534
535
536
  /*
   * free_ioctx() should be RCU delayed to synchronize against the RCU
   * protected lookup_ioctx() and also needs process context to call
f729863a8   Tejun Heo   fs/aio: Use rcu_w...
537
   * aio_free_ring().  Use rcu_work.
a6d7cff47   Tejun Heo   fs/aio: Add expli...
538
   */
e34ecee2a   Kent Overstreet   aio: Fix a trinit...
539
  static void free_ioctx(struct work_struct *work)
36f558890   Kent Overstreet   aio: refcounting ...
540
  {
f729863a8   Tejun Heo   fs/aio: Use rcu_w...
541
542
  	struct kioctx *ctx = container_of(to_rcu_work(work), struct kioctx,
  					  free_rwork);
e34ecee2a   Kent Overstreet   aio: Fix a trinit...
543
544
  	pr_debug("freeing %p
  ", ctx);
e1bdd5f27   Kent Overstreet   aio: percpu reqs_...
545

e34ecee2a   Kent Overstreet   aio: Fix a trinit...
546
  	aio_free_ring(ctx);
e1bdd5f27   Kent Overstreet   aio: percpu reqs_...
547
  	free_percpu(ctx->cpu);
9a1049da9   Tejun Heo   percpu-refcount: ...
548
549
  	percpu_ref_exit(&ctx->reqs);
  	percpu_ref_exit(&ctx->users);
36f558890   Kent Overstreet   aio: refcounting ...
550
551
  	kmem_cache_free(kioctx_cachep, ctx);
  }
e34ecee2a   Kent Overstreet   aio: Fix a trinit...
552
553
554
  static void free_ioctx_reqs(struct percpu_ref *ref)
  {
  	struct kioctx *ctx = container_of(ref, struct kioctx, reqs);
e02ba72aa   Anatol Pomozov   aio: block io_des...
555
  	/* At this point we know that there are no any in-flight requests */
dc48e56d7   Jens Axboe   aio: fix serial d...
556
557
  	if (ctx->rq_wait && atomic_dec_and_test(&ctx->rq_wait->count))
  		complete(&ctx->rq_wait->comp);
e02ba72aa   Anatol Pomozov   aio: block io_des...
558

a6d7cff47   Tejun Heo   fs/aio: Add expli...
559
  	/* Synchronize against RCU protected table->table[] dereferences */
f729863a8   Tejun Heo   fs/aio: Use rcu_w...
560
561
  	INIT_RCU_WORK(&ctx->free_rwork, free_ioctx);
  	queue_rcu_work(system_wq, &ctx->free_rwork);
e34ecee2a   Kent Overstreet   aio: Fix a trinit...
562
  }
36f558890   Kent Overstreet   aio: refcounting ...
563
564
565
566
567
  /*
   * When this function runs, the kioctx has been removed from the "hash table"
   * and ctx->users has dropped to 0, so we know no more kiocbs can be submitted -
   * now it's safe to cancel any that need to be.
   */
e34ecee2a   Kent Overstreet   aio: Fix a trinit...
568
  static void free_ioctx_users(struct percpu_ref *ref)
36f558890   Kent Overstreet   aio: refcounting ...
569
  {
e34ecee2a   Kent Overstreet   aio: Fix a trinit...
570
  	struct kioctx *ctx = container_of(ref, struct kioctx, users);
04b2fa9f8   Christoph Hellwig   fs: split generic...
571
  	struct aio_kiocb *req;
36f558890   Kent Overstreet   aio: refcounting ...
572
573
574
575
576
  
  	spin_lock_irq(&ctx->ctx_lock);
  
  	while (!list_empty(&ctx->active_reqs)) {
  		req = list_first_entry(&ctx->active_reqs,
04b2fa9f8   Christoph Hellwig   fs: split generic...
577
  				       struct aio_kiocb, ki_list);
888933f8f   Christoph Hellwig   aio: simplify can...
578
  		req->ki_cancel(&req->rw);
4faa99965   Al Viro   fix io_destroy()/...
579
  		list_del_init(&req->ki_list);
36f558890   Kent Overstreet   aio: refcounting ...
580
581
582
  	}
  
  	spin_unlock_irq(&ctx->ctx_lock);
e34ecee2a   Kent Overstreet   aio: Fix a trinit...
583
584
  	percpu_ref_kill(&ctx->reqs);
  	percpu_ref_put(&ctx->reqs);
36f558890   Kent Overstreet   aio: refcounting ...
585
  }
db446a08c   Benjamin LaHaise   aio: convert the ...
586
587
588
589
590
591
592
  static int ioctx_add_table(struct kioctx *ctx, struct mm_struct *mm)
  {
  	unsigned i, new_nr;
  	struct kioctx_table *table, *old;
  	struct aio_ring *ring;
  
  	spin_lock(&mm->ioctx_lock);
855ef0dec   Oleg Nesterov   aio: kill the mis...
593
  	table = rcu_dereference_raw(mm->ioctx_table);
db446a08c   Benjamin LaHaise   aio: convert the ...
594
595
596
597
  
  	while (1) {
  		if (table)
  			for (i = 0; i < table->nr; i++)
d0264c01e   Tejun Heo   fs/aio: Use RCU a...
598
  				if (!rcu_access_pointer(table->table[i])) {
db446a08c   Benjamin LaHaise   aio: convert the ...
599
  					ctx->id = i;
d0264c01e   Tejun Heo   fs/aio: Use RCU a...
600
  					rcu_assign_pointer(table->table[i], ctx);
db446a08c   Benjamin LaHaise   aio: convert the ...
601
  					spin_unlock(&mm->ioctx_lock);
fa8a53c39   Benjamin LaHaise   aio: v4 ensure ac...
602
603
604
605
  					/* While kioctx setup is in progress,
  					 * we are protected from page migration
  					 * changes ring_pages by ->ring_lock.
  					 */
db446a08c   Benjamin LaHaise   aio: convert the ...
606
607
608
609
610
611
612
  					ring = kmap_atomic(ctx->ring_pages[0]);
  					ring->id = ctx->id;
  					kunmap_atomic(ring);
  					return 0;
  				}
  
  		new_nr = (table ? table->nr : 1) * 4;
db446a08c   Benjamin LaHaise   aio: convert the ...
613
614
615
616
617
618
619
620
621
622
  		spin_unlock(&mm->ioctx_lock);
  
  		table = kzalloc(sizeof(*table) + sizeof(struct kioctx *) *
  				new_nr, GFP_KERNEL);
  		if (!table)
  			return -ENOMEM;
  
  		table->nr = new_nr;
  
  		spin_lock(&mm->ioctx_lock);
855ef0dec   Oleg Nesterov   aio: kill the mis...
623
  		old = rcu_dereference_raw(mm->ioctx_table);
db446a08c   Benjamin LaHaise   aio: convert the ...
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
  
  		if (!old) {
  			rcu_assign_pointer(mm->ioctx_table, table);
  		} else if (table->nr > old->nr) {
  			memcpy(table->table, old->table,
  			       old->nr * sizeof(struct kioctx *));
  
  			rcu_assign_pointer(mm->ioctx_table, table);
  			kfree_rcu(old, rcu);
  		} else {
  			kfree(table);
  			table = old;
  		}
  	}
  }
e34ecee2a   Kent Overstreet   aio: Fix a trinit...
639
640
641
642
643
644
645
646
647
  static void aio_nr_sub(unsigned nr)
  {
  	spin_lock(&aio_nr_lock);
  	if (WARN_ON(aio_nr - nr > aio_nr))
  		aio_nr = 0;
  	else
  		aio_nr -= nr;
  	spin_unlock(&aio_nr_lock);
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
648
649
650
651
652
  /* ioctx_alloc
   *	Allocates and initializes an ioctx.  Returns an ERR_PTR if it failed.
   */
  static struct kioctx *ioctx_alloc(unsigned nr_events)
  {
41003a7bc   Zach Brown   aio: remove retry...
653
  	struct mm_struct *mm = current->mm;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
654
  	struct kioctx *ctx;
e23754f88   Al Viro   aio: don't bother...
655
  	int err = -ENOMEM;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
656

e1bdd5f27   Kent Overstreet   aio: percpu reqs_...
657
  	/*
2a8a98673   Mauricio Faria de Oliveira   fs: aio: fix the ...
658
659
660
661
662
663
  	 * Store the original nr_events -- what userspace passed to io_setup(),
  	 * for counting against the global limit -- before it changes.
  	 */
  	unsigned int max_reqs = nr_events;
  
  	/*
e1bdd5f27   Kent Overstreet   aio: percpu reqs_...
664
665
666
667
668
669
670
671
672
673
  	 * We keep track of the number of available ringbuffer slots, to prevent
  	 * overflow (reqs_available), and we also use percpu counters for this.
  	 *
  	 * So since up to half the slots might be on other cpu's percpu counters
  	 * and unavailable, double nr_events so userspace sees what they
  	 * expected: additionally, we move req_batch slots to/from percpu
  	 * counters at a time, so make sure that isn't 0:
  	 */
  	nr_events = max(nr_events, num_possible_cpus() * 4);
  	nr_events *= 2;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
674
  	/* Prevent overflows */
08397acdd   Al Viro   ioctx_alloc(): re...
675
  	if (nr_events > (0x10000000U / sizeof(struct io_event))) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
676
677
678
679
  		pr_debug("ENOMEM: nr_events too high
  ");
  		return ERR_PTR(-EINVAL);
  	}
2a8a98673   Mauricio Faria de Oliveira   fs: aio: fix the ...
680
  	if (!nr_events || (unsigned long)max_reqs > aio_max_nr)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
681
  		return ERR_PTR(-EAGAIN);
c37622296   Robert P. J. Day   [PATCH] Transform...
682
  	ctx = kmem_cache_zalloc(kioctx_cachep, GFP_KERNEL);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
683
684
  	if (!ctx)
  		return ERR_PTR(-ENOMEM);
2a8a98673   Mauricio Faria de Oliveira   fs: aio: fix the ...
685
  	ctx->max_reqs = max_reqs;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
686

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
687
  	spin_lock_init(&ctx->ctx_lock);
0460fef2a   Kent Overstreet   aio: use cancella...
688
  	spin_lock_init(&ctx->completion_lock);
58c85dc20   Kent Overstreet   aio: kill struct ...
689
  	mutex_init(&ctx->ring_lock);
fa8a53c39   Benjamin LaHaise   aio: v4 ensure ac...
690
691
692
  	/* Protect against page migration throughout kiotx setup by keeping
  	 * the ring_lock mutex held until setup is complete. */
  	mutex_lock(&ctx->ring_lock);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
693
694
695
  	init_waitqueue_head(&ctx->wait);
  
  	INIT_LIST_HEAD(&ctx->active_reqs);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
696

2aad2a86f   Tejun Heo   percpu_ref: add P...
697
  	if (percpu_ref_init(&ctx->users, free_ioctx_users, 0, GFP_KERNEL))
fa8a53c39   Benjamin LaHaise   aio: v4 ensure ac...
698
  		goto err;
2aad2a86f   Tejun Heo   percpu_ref: add P...
699
  	if (percpu_ref_init(&ctx->reqs, free_ioctx_reqs, 0, GFP_KERNEL))
fa8a53c39   Benjamin LaHaise   aio: v4 ensure ac...
700
  		goto err;
e1bdd5f27   Kent Overstreet   aio: percpu reqs_...
701
702
  	ctx->cpu = alloc_percpu(struct kioctx_cpu);
  	if (!ctx->cpu)
e34ecee2a   Kent Overstreet   aio: Fix a trinit...
703
  		goto err;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
704

2a8a98673   Mauricio Faria de Oliveira   fs: aio: fix the ...
705
  	err = aio_setup_ring(ctx, nr_events);
fa8a53c39   Benjamin LaHaise   aio: v4 ensure ac...
706
  	if (err < 0)
e34ecee2a   Kent Overstreet   aio: Fix a trinit...
707
  		goto err;
e1bdd5f27   Kent Overstreet   aio: percpu reqs_...
708

34e83fc61   Kent Overstreet   aio: reqs_active ...
709
  	atomic_set(&ctx->reqs_available, ctx->nr_events - 1);
e1bdd5f27   Kent Overstreet   aio: percpu reqs_...
710
  	ctx->req_batch = (ctx->nr_events - 1) / (num_possible_cpus() * 4);
6878ea72a   Benjamin LaHaise   aio: be defensive...
711
712
  	if (ctx->req_batch < 1)
  		ctx->req_batch = 1;
34e83fc61   Kent Overstreet   aio: reqs_active ...
713

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
714
  	/* limit the number of system wide aios */
9fa1cb397   Al Viro   aio: aio_nr_lock ...
715
  	spin_lock(&aio_nr_lock);
2a8a98673   Mauricio Faria de Oliveira   fs: aio: fix the ...
716
717
  	if (aio_nr + ctx->max_reqs > aio_max_nr ||
  	    aio_nr + ctx->max_reqs < aio_nr) {
9fa1cb397   Al Viro   aio: aio_nr_lock ...
718
  		spin_unlock(&aio_nr_lock);
e34ecee2a   Kent Overstreet   aio: Fix a trinit...
719
  		err = -EAGAIN;
d1b943271   Gu Zheng   aio: clean up aio...
720
  		goto err_ctx;
2dd542b7a   Al Viro   aio: aio_nr decre...
721
722
  	}
  	aio_nr += ctx->max_reqs;
9fa1cb397   Al Viro   aio: aio_nr_lock ...
723
  	spin_unlock(&aio_nr_lock);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
724

1881686f8   Benjamin LaHaise   aio: fix kioctx l...
725
726
  	percpu_ref_get(&ctx->users);	/* io_setup() will drop this ref */
  	percpu_ref_get(&ctx->reqs);	/* free_ioctx_users() will drop this */
723be6e39   Kent Overstreet   aio: percpu ioctx...
727

da90382c2   Benjamin LaHaise   aio: fix error ha...
728
729
  	err = ioctx_add_table(ctx, mm);
  	if (err)
e34ecee2a   Kent Overstreet   aio: Fix a trinit...
730
  		goto err_cleanup;
da90382c2   Benjamin LaHaise   aio: fix error ha...
731

fa8a53c39   Benjamin LaHaise   aio: v4 ensure ac...
732
733
  	/* Release the ring_lock mutex now that all setup is complete. */
  	mutex_unlock(&ctx->ring_lock);
caf4167aa   Kent Overstreet   aio: dprintk() ->...
734
735
  	pr_debug("allocated ioctx %p[%ld]: mm=%p mask=0x%x
  ",
58c85dc20   Kent Overstreet   aio: kill struct ...
736
  		 ctx, ctx->user_id, mm, ctx->nr_events);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
737
  	return ctx;
e34ecee2a   Kent Overstreet   aio: Fix a trinit...
738
739
  err_cleanup:
  	aio_nr_sub(ctx->max_reqs);
d1b943271   Gu Zheng   aio: clean up aio...
740
  err_ctx:
deeb8525f   Al Viro   ioctx_alloc(): fi...
741
742
743
  	atomic_set(&ctx->dead, 1);
  	if (ctx->mmap_size)
  		vm_munmap(ctx->mmap_base, ctx->mmap_size);
d1b943271   Gu Zheng   aio: clean up aio...
744
  	aio_free_ring(ctx);
e34ecee2a   Kent Overstreet   aio: Fix a trinit...
745
  err:
fa8a53c39   Benjamin LaHaise   aio: v4 ensure ac...
746
  	mutex_unlock(&ctx->ring_lock);
e1bdd5f27   Kent Overstreet   aio: percpu reqs_...
747
  	free_percpu(ctx->cpu);
9a1049da9   Tejun Heo   percpu-refcount: ...
748
749
  	percpu_ref_exit(&ctx->reqs);
  	percpu_ref_exit(&ctx->users);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
750
  	kmem_cache_free(kioctx_cachep, ctx);
caf4167aa   Kent Overstreet   aio: dprintk() ->...
751
752
  	pr_debug("error allocating ioctx %d
  ", err);
e23754f88   Al Viro   aio: don't bother...
753
  	return ERR_PTR(err);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
754
  }
36f558890   Kent Overstreet   aio: refcounting ...
755
756
757
758
759
  /* kill_ioctx
   *	Cancels all outstanding aio requests on an aio context.  Used
   *	when the processes owning a context have all exited to encourage
   *	the rapid destruction of the kioctx.
   */
fb2d44838   Benjamin LaHaise   aio: report error...
760
  static int kill_ioctx(struct mm_struct *mm, struct kioctx *ctx,
dc48e56d7   Jens Axboe   aio: fix serial d...
761
  		      struct ctx_rq_wait *wait)
36f558890   Kent Overstreet   aio: refcounting ...
762
  {
fa88b6f88   Benjamin LaHaise   aio: cleanup: fla...
763
  	struct kioctx_table *table;
db446a08c   Benjamin LaHaise   aio: convert the ...
764

b2edffdd9   Al Viro   fix mremap() vs. ...
765
766
767
  	spin_lock(&mm->ioctx_lock);
  	if (atomic_xchg(&ctx->dead, 1)) {
  		spin_unlock(&mm->ioctx_lock);
fa88b6f88   Benjamin LaHaise   aio: cleanup: fla...
768
  		return -EINVAL;
b2edffdd9   Al Viro   fix mremap() vs. ...
769
  	}
db446a08c   Benjamin LaHaise   aio: convert the ...
770

855ef0dec   Oleg Nesterov   aio: kill the mis...
771
  	table = rcu_dereference_raw(mm->ioctx_table);
d0264c01e   Tejun Heo   fs/aio: Use RCU a...
772
773
  	WARN_ON(ctx != rcu_access_pointer(table->table[ctx->id]));
  	RCU_INIT_POINTER(table->table[ctx->id], NULL);
fa88b6f88   Benjamin LaHaise   aio: cleanup: fla...
774
  	spin_unlock(&mm->ioctx_lock);
4fcc712f5   Kent Overstreet   aio: fix io_destr...
775

a6d7cff47   Tejun Heo   fs/aio: Add expli...
776
  	/* free_ioctx_reqs() will do the necessary RCU synchronization */
fa88b6f88   Benjamin LaHaise   aio: cleanup: fla...
777
  	wake_up_all(&ctx->wait);
4fcc712f5   Kent Overstreet   aio: fix io_destr...
778

fa88b6f88   Benjamin LaHaise   aio: cleanup: fla...
779
780
781
782
783
784
785
786
  	/*
  	 * It'd be more correct to do this in free_ioctx(), after all
  	 * the outstanding kiocbs have finished - but by then io_destroy
  	 * has already returned, so io_setup() could potentially return
  	 * -EAGAIN with no ioctxs actually in use (as far as userspace
  	 *  could tell).
  	 */
  	aio_nr_sub(ctx->max_reqs);
4fcc712f5   Kent Overstreet   aio: fix io_destr...
787

fa88b6f88   Benjamin LaHaise   aio: cleanup: fla...
788
789
  	if (ctx->mmap_size)
  		vm_munmap(ctx->mmap_base, ctx->mmap_size);
fb2d44838   Benjamin LaHaise   aio: report error...
790

dc48e56d7   Jens Axboe   aio: fix serial d...
791
  	ctx->rq_wait = wait;
fa88b6f88   Benjamin LaHaise   aio: cleanup: fla...
792
793
  	percpu_ref_kill(&ctx->users);
  	return 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
794
  }
36f558890   Kent Overstreet   aio: refcounting ...
795
796
797
798
799
800
801
  /*
   * exit_aio: called when the last user of mm goes away.  At this point, there is
   * no way for any new requests to be submited or any of the io_* syscalls to be
   * called on the context.
   *
   * There may be outstanding kiocbs, but free_ioctx() will explicitly wait on
   * them.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
802
   */
fc9b52cd8   Harvey Harrison   fs: remove fastca...
803
  void exit_aio(struct mm_struct *mm)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
804
  {
4b70ac5fd   Oleg Nesterov   aio: change exit_...
805
  	struct kioctx_table *table = rcu_dereference_raw(mm->ioctx_table);
dc48e56d7   Jens Axboe   aio: fix serial d...
806
807
  	struct ctx_rq_wait wait;
  	int i, skipped;
db446a08c   Benjamin LaHaise   aio: convert the ...
808

4b70ac5fd   Oleg Nesterov   aio: change exit_...
809
810
  	if (!table)
  		return;
db446a08c   Benjamin LaHaise   aio: convert the ...
811

dc48e56d7   Jens Axboe   aio: fix serial d...
812
813
814
815
  	atomic_set(&wait.count, table->nr);
  	init_completion(&wait.comp);
  
  	skipped = 0;
4b70ac5fd   Oleg Nesterov   aio: change exit_...
816
  	for (i = 0; i < table->nr; ++i) {
d0264c01e   Tejun Heo   fs/aio: Use RCU a...
817
818
  		struct kioctx *ctx =
  			rcu_dereference_protected(table->table[i], true);
abf137dd7   Jens Axboe   aio: make the loo...
819

dc48e56d7   Jens Axboe   aio: fix serial d...
820
821
  		if (!ctx) {
  			skipped++;
4b70ac5fd   Oleg Nesterov   aio: change exit_...
822
  			continue;
dc48e56d7   Jens Axboe   aio: fix serial d...
823
  		}
936af1576   Al Viro   aio: don't bother...
824
  		/*
4b70ac5fd   Oleg Nesterov   aio: change exit_...
825
826
827
828
829
  		 * We don't need to bother with munmap() here - exit_mmap(mm)
  		 * is coming and it'll unmap everything. And we simply can't,
  		 * this is not necessarily our ->mm.
  		 * Since kill_ioctx() uses non-zero ->mmap_size as indicator
  		 * that it needs to unmap the area, just set it to 0.
936af1576   Al Viro   aio: don't bother...
830
  		 */
58c85dc20   Kent Overstreet   aio: kill struct ...
831
  		ctx->mmap_size = 0;
dc48e56d7   Jens Axboe   aio: fix serial d...
832
833
  		kill_ioctx(mm, ctx, &wait);
  	}
36f558890   Kent Overstreet   aio: refcounting ...
834

dc48e56d7   Jens Axboe   aio: fix serial d...
835
  	if (!atomic_sub_and_test(skipped, &wait.count)) {
6098b45b3   Gu Zheng   aio: block exit_a...
836
  		/* Wait until all IO for the context are done. */
dc48e56d7   Jens Axboe   aio: fix serial d...
837
  		wait_for_completion(&wait.comp);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
838
  	}
4b70ac5fd   Oleg Nesterov   aio: change exit_...
839
840
841
  
  	RCU_INIT_POINTER(mm->ioctx_table, NULL);
  	kfree(table);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
842
  }
e1bdd5f27   Kent Overstreet   aio: percpu reqs_...
843
844
845
  static void put_reqs_available(struct kioctx *ctx, unsigned nr)
  {
  	struct kioctx_cpu *kcpu;
263782c1c   Benjamin LaHaise   aio: protect reqs...
846
  	unsigned long flags;
e1bdd5f27   Kent Overstreet   aio: percpu reqs_...
847

263782c1c   Benjamin LaHaise   aio: protect reqs...
848
  	local_irq_save(flags);
be6fb451a   Benjamin LaHaise   aio: remove no lo...
849
  	kcpu = this_cpu_ptr(ctx->cpu);
e1bdd5f27   Kent Overstreet   aio: percpu reqs_...
850
  	kcpu->reqs_available += nr;
263782c1c   Benjamin LaHaise   aio: protect reqs...
851

e1bdd5f27   Kent Overstreet   aio: percpu reqs_...
852
853
854
855
  	while (kcpu->reqs_available >= ctx->req_batch * 2) {
  		kcpu->reqs_available -= ctx->req_batch;
  		atomic_add(ctx->req_batch, &ctx->reqs_available);
  	}
263782c1c   Benjamin LaHaise   aio: protect reqs...
856
  	local_irq_restore(flags);
e1bdd5f27   Kent Overstreet   aio: percpu reqs_...
857
  }
432c79978   Christoph Hellwig   aio: separate out...
858
  static bool __get_reqs_available(struct kioctx *ctx)
e1bdd5f27   Kent Overstreet   aio: percpu reqs_...
859
860
861
  {
  	struct kioctx_cpu *kcpu;
  	bool ret = false;
263782c1c   Benjamin LaHaise   aio: protect reqs...
862
  	unsigned long flags;
e1bdd5f27   Kent Overstreet   aio: percpu reqs_...
863

263782c1c   Benjamin LaHaise   aio: protect reqs...
864
  	local_irq_save(flags);
be6fb451a   Benjamin LaHaise   aio: remove no lo...
865
  	kcpu = this_cpu_ptr(ctx->cpu);
e1bdd5f27   Kent Overstreet   aio: percpu reqs_...
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
  	if (!kcpu->reqs_available) {
  		int old, avail = atomic_read(&ctx->reqs_available);
  
  		do {
  			if (avail < ctx->req_batch)
  				goto out;
  
  			old = avail;
  			avail = atomic_cmpxchg(&ctx->reqs_available,
  					       avail, avail - ctx->req_batch);
  		} while (avail != old);
  
  		kcpu->reqs_available += ctx->req_batch;
  	}
  
  	ret = true;
  	kcpu->reqs_available--;
  out:
263782c1c   Benjamin LaHaise   aio: protect reqs...
884
  	local_irq_restore(flags);
e1bdd5f27   Kent Overstreet   aio: percpu reqs_...
885
886
  	return ret;
  }
d856f32a8   Benjamin LaHaise   aio: fix reqs_ava...
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
  /* refill_reqs_available
   *	Updates the reqs_available reference counts used for tracking the
   *	number of free slots in the completion ring.  This can be called
   *	from aio_complete() (to optimistically update reqs_available) or
   *	from aio_get_req() (the we're out of events case).  It must be
   *	called holding ctx->completion_lock.
   */
  static void refill_reqs_available(struct kioctx *ctx, unsigned head,
                                    unsigned tail)
  {
  	unsigned events_in_ring, completed;
  
  	/* Clamp head since userland can write to it. */
  	head %= ctx->nr_events;
  	if (head <= tail)
  		events_in_ring = tail - head;
  	else
  		events_in_ring = ctx->nr_events - (head - tail);
  
  	completed = ctx->completed_events;
  	if (events_in_ring < completed)
  		completed -= events_in_ring;
  	else
  		completed = 0;
  
  	if (!completed)
  		return;
  
  	ctx->completed_events -= completed;
  	put_reqs_available(ctx, completed);
  }
  
  /* user_refill_reqs_available
   *	Called to refill reqs_available when aio_get_req() encounters an
   *	out of space in the completion ring.
   */
  static void user_refill_reqs_available(struct kioctx *ctx)
  {
  	spin_lock_irq(&ctx->completion_lock);
  	if (ctx->completed_events) {
  		struct aio_ring *ring;
  		unsigned head;
  
  		/* Access of ring->head may race with aio_read_events_ring()
  		 * here, but that's okay since whether we read the old version
  		 * or the new version, and either will be valid.  The important
  		 * part is that head cannot pass tail since we prevent
  		 * aio_complete() from updating tail by holding
  		 * ctx->completion_lock.  Even if head is invalid, the check
  		 * against ctx->completed_events below will make sure we do the
  		 * safe/right thing.
  		 */
  		ring = kmap_atomic(ctx->ring_pages[0]);
  		head = ring->head;
  		kunmap_atomic(ring);
  
  		refill_reqs_available(ctx, head, ctx->tail);
  	}
  
  	spin_unlock_irq(&ctx->completion_lock);
  }
432c79978   Christoph Hellwig   aio: separate out...
948
949
950
951
952
953
954
  static bool get_reqs_available(struct kioctx *ctx)
  {
  	if (__get_reqs_available(ctx))
  		return true;
  	user_refill_reqs_available(ctx);
  	return __get_reqs_available(ctx);
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
955
  /* aio_get_req
57282d8fd   Kent Overstreet   aio: Kill ki_users
956
957
   *	Allocate a slot for an aio request.
   * Returns NULL if no requests are free.
b53119f13   Linus Torvalds   pin iocb through ...
958
959
960
   *
   * The refcount is initialized to 2 - one for the async op completion,
   * one for the synchronous code that does this.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
961
   */
04b2fa9f8   Christoph Hellwig   fs: split generic...
962
  static inline struct aio_kiocb *aio_get_req(struct kioctx *ctx)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
963
  {
04b2fa9f8   Christoph Hellwig   fs: split generic...
964
  	struct aio_kiocb *req;
a1c8eae75   Kent Overstreet   aio: kill batch a...
965

2bc4ca9bb   Jens Axboe   aio: don't zero e...
966
  	req = kmem_cache_alloc(kiocb_cachep, GFP_KERNEL);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
967
  	if (unlikely(!req))
432c79978   Christoph Hellwig   aio: separate out...
968
  		return NULL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
969

fa0ca2aee   Al Viro   deal with get_req...
970
  	if (unlikely(!get_reqs_available(ctx))) {
6af1c849d   Wei Yongjun   aio: use kmem_cac...
971
  		kmem_cache_free(kiocb_cachep, req);
fa0ca2aee   Al Viro   deal with get_req...
972
973
  		return NULL;
  	}
e34ecee2a   Kent Overstreet   aio: Fix a trinit...
974
  	percpu_ref_get(&ctx->reqs);
2bc4ca9bb   Jens Axboe   aio: don't zero e...
975
  	req->ki_ctx = ctx;
75321b50a   Christoph Hellwig   aio: sanitize ki_...
976
  	INIT_LIST_HEAD(&req->ki_list);
b53119f13   Linus Torvalds   pin iocb through ...
977
  	refcount_set(&req->ki_refcnt, 2);
2bc4ca9bb   Jens Axboe   aio: don't zero e...
978
  	req->ki_eventfd = NULL;
080d676de   Jeff Moyer   aio: allocate kio...
979
  	return req;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
980
  }
d5470b596   Adrian Bunk   fs/aio.c: make 3 ...
981
  static struct kioctx *lookup_ioctx(unsigned long ctx_id)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
982
  {
db446a08c   Benjamin LaHaise   aio: convert the ...
983
  	struct aio_ring __user *ring  = (void __user *)ctx_id;
abf137dd7   Jens Axboe   aio: make the loo...
984
  	struct mm_struct *mm = current->mm;
65c24491b   Jeff Moyer   aio: lookup_ioctx...
985
  	struct kioctx *ctx, *ret = NULL;
db446a08c   Benjamin LaHaise   aio: convert the ...
986
987
988
989
990
  	struct kioctx_table *table;
  	unsigned id;
  
  	if (get_user(id, &ring->id))
  		return NULL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
991

abf137dd7   Jens Axboe   aio: make the loo...
992
  	rcu_read_lock();
db446a08c   Benjamin LaHaise   aio: convert the ...
993
  	table = rcu_dereference(mm->ioctx_table);
abf137dd7   Jens Axboe   aio: make the loo...
994

db446a08c   Benjamin LaHaise   aio: convert the ...
995
996
  	if (!table || id >= table->nr)
  		goto out;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
997

a538e3ff9   Jeff Moyer   aio: fix spectre ...
998
  	id = array_index_nospec(id, table->nr);
d0264c01e   Tejun Heo   fs/aio: Use RCU a...
999
  	ctx = rcu_dereference(table->table[id]);
f30d704fe   Benjamin LaHaise   aio: table lookup...
1000
  	if (ctx && ctx->user_id == ctx_id) {
baf10564f   Al Viro   aio: fix io_destr...
1001
1002
  		if (percpu_ref_tryget_live(&ctx->users))
  			ret = ctx;
db446a08c   Benjamin LaHaise   aio: convert the ...
1003
1004
  	}
  out:
abf137dd7   Jens Axboe   aio: make the loo...
1005
  	rcu_read_unlock();
65c24491b   Jeff Moyer   aio: lookup_ioctx...
1006
  	return ret;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1007
  }
b53119f13   Linus Torvalds   pin iocb through ...
1008
1009
  static inline void iocb_destroy(struct aio_kiocb *iocb)
  {
742597034   Al Viro   aio: move droppin...
1010
1011
  	if (iocb->ki_eventfd)
  		eventfd_ctx_put(iocb->ki_eventfd);
b53119f13   Linus Torvalds   pin iocb through ...
1012
1013
1014
1015
1016
  	if (iocb->ki_filp)
  		fput(iocb->ki_filp);
  	percpu_ref_put(&iocb->ki_ctx->reqs);
  	kmem_cache_free(kiocb_cachep, iocb);
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1017
1018
  /* aio_complete
   *	Called when the io request on the given iocb is complete.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1019
   */
2bb874c0d   Al Viro   aio: store event ...
1020
  static void aio_complete(struct aio_kiocb *iocb)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1021
1022
  {
  	struct kioctx	*ctx = iocb->ki_ctx;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1023
  	struct aio_ring	*ring;
21b40200c   Kent Overstreet   aio: use flush_dc...
1024
  	struct io_event	*ev_page, *event;
d856f32a8   Benjamin LaHaise   aio: fix reqs_ava...
1025
  	unsigned tail, pos, head;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1026
  	unsigned long	flags;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1027

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1028
  	/*
0460fef2a   Kent Overstreet   aio: use cancella...
1029
  	 * Add a completion event to the ring buffer. Must be done holding
4b30f07e7   Tang Chen   aio: fix wrong co...
1030
  	 * ctx->completion_lock to prevent other code from messing with the tail
0460fef2a   Kent Overstreet   aio: use cancella...
1031
1032
1033
  	 * pointer since we might be called from irq context.
  	 */
  	spin_lock_irqsave(&ctx->completion_lock, flags);
58c85dc20   Kent Overstreet   aio: kill struct ...
1034
  	tail = ctx->tail;
21b40200c   Kent Overstreet   aio: use flush_dc...
1035
  	pos = tail + AIO_EVENTS_OFFSET;
58c85dc20   Kent Overstreet   aio: kill struct ...
1036
  	if (++tail >= ctx->nr_events)
4bf69b2a0   Kenneth W Chen   [PATCH] aio: ring...
1037
  		tail = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1038

58c85dc20   Kent Overstreet   aio: kill struct ...
1039
  	ev_page = kmap_atomic(ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]);
21b40200c   Kent Overstreet   aio: use flush_dc...
1040
  	event = ev_page + pos % AIO_EVENTS_PER_PAGE;
a9339b785   Al Viro   aio: keep io_even...
1041
  	*event = iocb->ki_res;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1042

21b40200c   Kent Overstreet   aio: use flush_dc...
1043
  	kunmap_atomic(ev_page);
58c85dc20   Kent Overstreet   aio: kill struct ...
1044
  	flush_dcache_page(ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]);
21b40200c   Kent Overstreet   aio: use flush_dc...
1045

a9339b785   Al Viro   aio: keep io_even...
1046
1047
1048
1049
  	pr_debug("%p[%u]: %p: %p %Lx %Lx %Lx
  ", ctx, tail, iocb,
  		 (void __user *)(unsigned long)iocb->ki_res.obj,
  		 iocb->ki_res.data, iocb->ki_res.res, iocb->ki_res.res2);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1050
1051
1052
1053
1054
  
  	/* after flagging the request as done, we
  	 * must never even look at it again
  	 */
  	smp_wmb();	/* make event visible before updating tail */
58c85dc20   Kent Overstreet   aio: kill struct ...
1055
  	ctx->tail = tail;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1056

58c85dc20   Kent Overstreet   aio: kill struct ...
1057
  	ring = kmap_atomic(ctx->ring_pages[0]);
d856f32a8   Benjamin LaHaise   aio: fix reqs_ava...
1058
  	head = ring->head;
21b40200c   Kent Overstreet   aio: use flush_dc...
1059
  	ring->tail = tail;
e8e3c3d66   Cong Wang   fs: remove the se...
1060
  	kunmap_atomic(ring);
58c85dc20   Kent Overstreet   aio: kill struct ...
1061
  	flush_dcache_page(ctx->ring_pages[0]);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1062

d856f32a8   Benjamin LaHaise   aio: fix reqs_ava...
1063
1064
1065
  	ctx->completed_events++;
  	if (ctx->completed_events > 1)
  		refill_reqs_available(ctx, head, tail);
0460fef2a   Kent Overstreet   aio: use cancella...
1066
  	spin_unlock_irqrestore(&ctx->completion_lock, flags);
21b40200c   Kent Overstreet   aio: use flush_dc...
1067
1068
  	pr_debug("added to ring %p at [%u]
  ", iocb, tail);
8d1c98b0b   Davide Libenzi   eventfd/kaio inte...
1069
1070
1071
1072
1073
1074
  
  	/*
  	 * Check if the user asked us to deliver the result through an
  	 * eventfd. The eventfd_signal() function is safe to be called
  	 * from IRQ context.
  	 */
742597034   Al Viro   aio: move droppin...
1075
  	if (iocb->ki_eventfd)
8d1c98b0b   Davide Libenzi   eventfd/kaio inte...
1076
  		eventfd_signal(iocb->ki_eventfd, 1);
6cb2a2104   Quentin Barnes   aio: bad AIO race...
1077
1078
1079
1080
1081
1082
1083
  	/*
  	 * We have to order our ring_info tail store above and test
  	 * of the wait list below outside the wait lock.  This is
  	 * like in wake_up_bit() where clearing a bit has to be
  	 * ordered with the unlocked test.
  	 */
  	smp_mb();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1084
1085
  	if (waitqueue_active(&ctx->wait))
  		wake_up(&ctx->wait);
2bb874c0d   Al Viro   aio: store event ...
1086
1087
1088
1089
1090
1091
1092
1093
  }
  
  static inline void iocb_put(struct aio_kiocb *iocb)
  {
  	if (refcount_dec_and_test(&iocb->ki_refcnt)) {
  		aio_complete(iocb);
  		iocb_destroy(iocb);
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1094
  }
2be4e7dee   Gu Zheng   aio: fix some com...
1095
  /* aio_read_events_ring
a31ad380b   Kent Overstreet   aio: make aio_rea...
1096
1097
   *	Pull an event off of the ioctx's event ring.  Returns the number of
   *	events fetched
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1098
   */
a31ad380b   Kent Overstreet   aio: make aio_rea...
1099
1100
  static long aio_read_events_ring(struct kioctx *ctx,
  				 struct io_event __user *event, long nr)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1101
  {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1102
  	struct aio_ring *ring;
5ffac122d   Kent Overstreet   aio: Don't use ct...
1103
  	unsigned head, tail, pos;
a31ad380b   Kent Overstreet   aio: make aio_rea...
1104
1105
  	long ret = 0;
  	int copy_ret;
9c9ce763b   Dave Chinner   aio: annotate aio...
1106
1107
1108
1109
1110
1111
1112
  	/*
  	 * The mutex can block and wake us up and that will cause
  	 * wait_event_interruptible_hrtimeout() to schedule without sleeping
  	 * and repeat. This should be rare enough that it doesn't cause
  	 * peformance issues. See the comment in read_events() for more detail.
  	 */
  	sched_annotate_sleep();
58c85dc20   Kent Overstreet   aio: kill struct ...
1113
  	mutex_lock(&ctx->ring_lock);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1114

fa8a53c39   Benjamin LaHaise   aio: v4 ensure ac...
1115
  	/* Access to ->ring_pages here is protected by ctx->ring_lock. */
58c85dc20   Kent Overstreet   aio: kill struct ...
1116
  	ring = kmap_atomic(ctx->ring_pages[0]);
a31ad380b   Kent Overstreet   aio: make aio_rea...
1117
  	head = ring->head;
5ffac122d   Kent Overstreet   aio: Don't use ct...
1118
  	tail = ring->tail;
a31ad380b   Kent Overstreet   aio: make aio_rea...
1119
  	kunmap_atomic(ring);
2ff396be6   Jeff Moyer   aio: add missing ...
1120
1121
1122
1123
1124
  	/*
  	 * Ensure that once we've read the current tail pointer, that
  	 * we also see the events that were stored up to the tail.
  	 */
  	smp_rmb();
5ffac122d   Kent Overstreet   aio: Don't use ct...
1125
1126
  	pr_debug("h%u t%u m%u
  ", head, tail, ctx->nr_events);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1127

5ffac122d   Kent Overstreet   aio: Don't use ct...
1128
  	if (head == tail)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1129
  		goto out;
edfbbf388   Benjamin LaHaise   aio: fix kernel m...
1130
1131
  	head %= ctx->nr_events;
  	tail %= ctx->nr_events;
a31ad380b   Kent Overstreet   aio: make aio_rea...
1132
1133
1134
1135
  	while (ret < nr) {
  		long avail;
  		struct io_event *ev;
  		struct page *page;
5ffac122d   Kent Overstreet   aio: Don't use ct...
1136
1137
  		avail = (head <= tail ?  tail : ctx->nr_events) - head;
  		if (head == tail)
a31ad380b   Kent Overstreet   aio: make aio_rea...
1138
  			break;
a31ad380b   Kent Overstreet   aio: make aio_rea...
1139
  		pos = head + AIO_EVENTS_OFFSET;
58c85dc20   Kent Overstreet   aio: kill struct ...
1140
  		page = ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE];
a31ad380b   Kent Overstreet   aio: make aio_rea...
1141
  		pos %= AIO_EVENTS_PER_PAGE;
d2988bd41   Al Viro   aio_read_events_r...
1142
1143
  		avail = min(avail, nr - ret);
  		avail = min_t(long, avail, AIO_EVENTS_PER_PAGE - pos);
a31ad380b   Kent Overstreet   aio: make aio_rea...
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
  		ev = kmap(page);
  		copy_ret = copy_to_user(event + ret, ev + pos,
  					sizeof(*ev) * avail);
  		kunmap(page);
  
  		if (unlikely(copy_ret)) {
  			ret = -EFAULT;
  			goto out;
  		}
  
  		ret += avail;
  		head += avail;
58c85dc20   Kent Overstreet   aio: kill struct ...
1156
  		head %= ctx->nr_events;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1157
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1158

58c85dc20   Kent Overstreet   aio: kill struct ...
1159
  	ring = kmap_atomic(ctx->ring_pages[0]);
a31ad380b   Kent Overstreet   aio: make aio_rea...
1160
  	ring->head = head;
91d80a84b   Zhao Hongjiang   aio: fix possible...
1161
  	kunmap_atomic(ring);
58c85dc20   Kent Overstreet   aio: kill struct ...
1162
  	flush_dcache_page(ctx->ring_pages[0]);
a31ad380b   Kent Overstreet   aio: make aio_rea...
1163

5ffac122d   Kent Overstreet   aio: Don't use ct...
1164
1165
  	pr_debug("%li  h%u t%u
  ", ret, head, tail);
a31ad380b   Kent Overstreet   aio: make aio_rea...
1166
  out:
58c85dc20   Kent Overstreet   aio: kill struct ...
1167
  	mutex_unlock(&ctx->ring_lock);
a31ad380b   Kent Overstreet   aio: make aio_rea...
1168

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1169
1170
  	return ret;
  }
a31ad380b   Kent Overstreet   aio: make aio_rea...
1171
1172
  static bool aio_read_events(struct kioctx *ctx, long min_nr, long nr,
  			    struct io_event __user *event, long *i)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1173
  {
a31ad380b   Kent Overstreet   aio: make aio_rea...
1174
  	long ret = aio_read_events_ring(ctx, event + *i, nr - *i);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1175

a31ad380b   Kent Overstreet   aio: make aio_rea...
1176
1177
  	if (ret > 0)
  		*i += ret;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1178

a31ad380b   Kent Overstreet   aio: make aio_rea...
1179
1180
  	if (unlikely(atomic_read(&ctx->dead)))
  		ret = -EINVAL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1181

a31ad380b   Kent Overstreet   aio: make aio_rea...
1182
1183
  	if (!*i)
  		*i = ret;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1184

a31ad380b   Kent Overstreet   aio: make aio_rea...
1185
  	return ret < 0 || *i >= min_nr;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1186
  }
a31ad380b   Kent Overstreet   aio: make aio_rea...
1187
  static long read_events(struct kioctx *ctx, long min_nr, long nr,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1188
  			struct io_event __user *event,
fa2e62a54   Deepa Dinamani   io_getevents: Use...
1189
  			ktime_t until)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1190
  {
a31ad380b   Kent Overstreet   aio: make aio_rea...
1191
  	long ret = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1192

a31ad380b   Kent Overstreet   aio: make aio_rea...
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
  	/*
  	 * Note that aio_read_events() is being called as the conditional - i.e.
  	 * we're calling it after prepare_to_wait() has set task state to
  	 * TASK_INTERRUPTIBLE.
  	 *
  	 * But aio_read_events() can block, and if it blocks it's going to flip
  	 * the task state back to TASK_RUNNING.
  	 *
  	 * This should be ok, provided it doesn't flip the state back to
  	 * TASK_RUNNING and return 0 too much - that causes us to spin. That
  	 * will only happen if the mutex_lock() call blocks, and we then find
  	 * the ringbuffer empty. So in practice we should be ok, but it's
  	 * something to be aware of when touching this code.
  	 */
2456e8553   Thomas Gleixner   ktime: Get rid of...
1207
  	if (until == 0)
5f785de58   Fam Zheng   aio: Skip timer f...
1208
1209
1210
1211
1212
  		aio_read_events(ctx, min_nr, nr, event, &ret);
  	else
  		wait_event_interruptible_hrtimeout(ctx->wait,
  				aio_read_events(ctx, min_nr, nr, event, &ret),
  				until);
a31ad380b   Kent Overstreet   aio: make aio_rea...
1213
  	return ret;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1214
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
  /* sys_io_setup:
   *	Create an aio_context capable of receiving at least nr_events.
   *	ctxp must not point to an aio_context that already exists, and
   *	must be initialized to 0 prior to the call.  On successful
   *	creation of the aio_context, *ctxp is filled in with the resulting 
   *	handle.  May fail with -EINVAL if *ctxp is not initialized,
   *	if the specified nr_events exceeds internal limits.  May fail 
   *	with -EAGAIN if the specified nr_events exceeds the user's limit 
   *	of available events.  May fail with -ENOMEM if insufficient kernel
   *	resources are available.  May fail with -EFAULT if an invalid
   *	pointer is passed for ctxp.  Will fail with -ENOSYS if not
   *	implemented.
   */
002c8976e   Heiko Carstens   [CVE-2009-0029] S...
1228
  SYSCALL_DEFINE2(io_setup, unsigned, nr_events, aio_context_t __user *, ctxp)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
  {
  	struct kioctx *ioctx = NULL;
  	unsigned long ctx;
  	long ret;
  
  	ret = get_user(ctx, ctxp);
  	if (unlikely(ret))
  		goto out;
  
  	ret = -EINVAL;
d55b5fdaf   Zach Brown   [PATCH] aio: remo...
1239
  	if (unlikely(ctx || nr_events == 0)) {
acd88d4e1   Kinglong Mee   fs/aio.c: Remove ...
1240
1241
  		pr_debug("EINVAL: ctx %lu nr_events %u
  ",
d55b5fdaf   Zach Brown   [PATCH] aio: remo...
1242
  		         ctx, nr_events);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1243
1244
1245
1246
1247
1248
1249
  		goto out;
  	}
  
  	ioctx = ioctx_alloc(nr_events);
  	ret = PTR_ERR(ioctx);
  	if (!IS_ERR(ioctx)) {
  		ret = put_user(ioctx->user_id, ctxp);
a2e1859ad   Al Viro   aio: take final p...
1250
  		if (ret)
e02ba72aa   Anatol Pomozov   aio: block io_des...
1251
  			kill_ioctx(current->mm, ioctx, NULL);
723be6e39   Kent Overstreet   aio: percpu ioctx...
1252
  		percpu_ref_put(&ioctx->users);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1253
1254
1255
1256
1257
  	}
  
  out:
  	return ret;
  }
c00d2c7e8   Al Viro   move aio compat t...
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
  #ifdef CONFIG_COMPAT
  COMPAT_SYSCALL_DEFINE2(io_setup, unsigned, nr_events, u32 __user *, ctx32p)
  {
  	struct kioctx *ioctx = NULL;
  	unsigned long ctx;
  	long ret;
  
  	ret = get_user(ctx, ctx32p);
  	if (unlikely(ret))
  		goto out;
  
  	ret = -EINVAL;
  	if (unlikely(ctx || nr_events == 0)) {
  		pr_debug("EINVAL: ctx %lu nr_events %u
  ",
  		         ctx, nr_events);
  		goto out;
  	}
  
  	ioctx = ioctx_alloc(nr_events);
  	ret = PTR_ERR(ioctx);
  	if (!IS_ERR(ioctx)) {
  		/* truncating is ok because it's a user address */
  		ret = put_user((u32)ioctx->user_id, ctx32p);
  		if (ret)
  			kill_ioctx(current->mm, ioctx, NULL);
  		percpu_ref_put(&ioctx->users);
  	}
  
  out:
  	return ret;
  }
  #endif
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1291
1292
1293
  /* sys_io_destroy:
   *	Destroy the aio_context specified.  May cancel any outstanding 
   *	AIOs and block on completion.  Will fail with -ENOSYS if not
642b5123a   Satoru Takeuchi   aio: fix wrong su...
1294
   *	implemented.  May fail with -EINVAL if the context pointed to
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1295
1296
   *	is invalid.
   */
002c8976e   Heiko Carstens   [CVE-2009-0029] S...
1297
  SYSCALL_DEFINE1(io_destroy, aio_context_t, ctx)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1298
1299
1300
  {
  	struct kioctx *ioctx = lookup_ioctx(ctx);
  	if (likely(NULL != ioctx)) {
dc48e56d7   Jens Axboe   aio: fix serial d...
1301
  		struct ctx_rq_wait wait;
fb2d44838   Benjamin LaHaise   aio: report error...
1302
  		int ret;
e02ba72aa   Anatol Pomozov   aio: block io_des...
1303

dc48e56d7   Jens Axboe   aio: fix serial d...
1304
1305
  		init_completion(&wait.comp);
  		atomic_set(&wait.count, 1);
e02ba72aa   Anatol Pomozov   aio: block io_des...
1306
1307
1308
1309
  		/* Pass requests_done to kill_ioctx() where it can be set
  		 * in a thread-safe way. If we try to set it here then we have
  		 * a race condition if two io_destroy() called simultaneously.
  		 */
dc48e56d7   Jens Axboe   aio: fix serial d...
1310
  		ret = kill_ioctx(current->mm, ioctx, &wait);
723be6e39   Kent Overstreet   aio: percpu ioctx...
1311
  		percpu_ref_put(&ioctx->users);
e02ba72aa   Anatol Pomozov   aio: block io_des...
1312
1313
1314
1315
1316
  
  		/* Wait until all IO for the context are done. Otherwise kernel
  		 * keep using user-space buffers even if user thinks the context
  		 * is destroyed.
  		 */
fb2d44838   Benjamin LaHaise   aio: report error...
1317
  		if (!ret)
dc48e56d7   Jens Axboe   aio: fix serial d...
1318
  			wait_for_completion(&wait.comp);
e02ba72aa   Anatol Pomozov   aio: block io_des...
1319

fb2d44838   Benjamin LaHaise   aio: report error...
1320
  		return ret;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1321
  	}
acd88d4e1   Kinglong Mee   fs/aio.c: Remove ...
1322
1323
  	pr_debug("EINVAL: invalid context id
  ");
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1324
1325
  	return -EINVAL;
  }
3c96c7f4c   Al Viro   aio: take list re...
1326
1327
1328
1329
1330
1331
1332
1333
1334
  static void aio_remove_iocb(struct aio_kiocb *iocb)
  {
  	struct kioctx *ctx = iocb->ki_ctx;
  	unsigned long flags;
  
  	spin_lock_irqsave(&ctx->ctx_lock, flags);
  	list_del(&iocb->ki_list);
  	spin_unlock_irqrestore(&ctx->ctx_lock, flags);
  }
54843f875   Christoph Hellwig   aio: refactor rea...
1335
1336
1337
  static void aio_complete_rw(struct kiocb *kiocb, long res, long res2)
  {
  	struct aio_kiocb *iocb = container_of(kiocb, struct aio_kiocb, rw);
3c96c7f4c   Al Viro   aio: take list re...
1338
1339
  	if (!list_empty_careful(&iocb->ki_list))
  		aio_remove_iocb(iocb);
54843f875   Christoph Hellwig   aio: refactor rea...
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
  	if (kiocb->ki_flags & IOCB_WRITE) {
  		struct inode *inode = file_inode(kiocb->ki_filp);
  
  		/*
  		 * Tell lockdep we inherited freeze protection from submission
  		 * thread.
  		 */
  		if (S_ISREG(inode->i_mode))
  			__sb_writers_acquired(inode->i_sb, SB_FREEZE_WRITE);
  		file_end_write(kiocb->ki_filp);
  	}
2bb874c0d   Al Viro   aio: store event ...
1351
1352
1353
  	iocb->ki_res.res = res;
  	iocb->ki_res.res2 = res2;
  	iocb_put(iocb);
54843f875   Christoph Hellwig   aio: refactor rea...
1354
  }
88a6f18b9   Jens Axboe   aio: split out io...
1355
  static int aio_prep_rw(struct kiocb *req, const struct iocb *iocb)
54843f875   Christoph Hellwig   aio: refactor rea...
1356
1357
  {
  	int ret;
54843f875   Christoph Hellwig   aio: refactor rea...
1358
  	req->ki_complete = aio_complete_rw;
ec51f8ee1   Mike Marshall   aio: initialize k...
1359
  	req->private = NULL;
54843f875   Christoph Hellwig   aio: refactor rea...
1360
1361
1362
1363
  	req->ki_pos = iocb->aio_offset;
  	req->ki_flags = iocb_flags(req->ki_filp);
  	if (iocb->aio_flags & IOCB_FLAG_RESFD)
  		req->ki_flags |= IOCB_EVENTFD;
fc28724d6   Adam Manzanares   fs: Convert kiocb...
1364
  	req->ki_hint = ki_hint_validate(file_write_hint(req->ki_filp));
d9a08a9e6   Adam Manzanares   fs: Add aio iopri...
1365
1366
1367
1368
1369
1370
1371
1372
  	if (iocb->aio_flags & IOCB_FLAG_IOPRIO) {
  		/*
  		 * If the IOCB_FLAG_IOPRIO flag of aio_flags is set, then
  		 * aio_reqprio is interpreted as an I/O scheduling
  		 * class and priority.
  		 */
  		ret = ioprio_check_cap(iocb->aio_reqprio);
  		if (ret) {
9a6d9a62e   Adam Manzanares   fs: aio ioprio us...
1373
1374
  			pr_debug("aio ioprio check cap error: %d
  ", ret);
84c4e1f89   Linus Torvalds   aio: simplify - a...
1375
  			return ret;
d9a08a9e6   Adam Manzanares   fs: Add aio iopri...
1376
1377
1378
1379
  		}
  
  		req->ki_ioprio = iocb->aio_reqprio;
  	} else
76dc89139   Damien Le Moal   aio: Fix fallback...
1380
  		req->ki_ioprio = get_current_ioprio();
d9a08a9e6   Adam Manzanares   fs: Add aio iopri...
1381

54843f875   Christoph Hellwig   aio: refactor rea...
1382
1383
  	ret = kiocb_set_rw_flags(req, iocb->aio_rw_flags);
  	if (unlikely(ret))
84c4e1f89   Linus Torvalds   aio: simplify - a...
1384
  		return ret;
154989e45   Christoph Hellwig   aio: clear IOCB_H...
1385
1386
1387
  
  	req->ki_flags &= ~IOCB_HIPRI; /* no one is going to poll for this I/O */
  	return 0;
54843f875   Christoph Hellwig   aio: refactor rea...
1388
  }
87e5e6dab   Jens Axboe   uio: make import_...
1389
1390
1391
  static ssize_t aio_setup_rw(int rw, const struct iocb *iocb,
  		struct iovec **iovec, bool vectored, bool compat,
  		struct iov_iter *iter)
eed4e51fb   Badari Pulavarty   [PATCH] Add vecto...
1392
  {
89319d31d   Christoph Hellwig   fs: remove aio_ru...
1393
1394
1395
1396
1397
1398
1399
1400
  	void __user *buf = (void __user *)(uintptr_t)iocb->aio_buf;
  	size_t len = iocb->aio_nbytes;
  
  	if (!vectored) {
  		ssize_t ret = import_single_range(rw, buf, len, *iovec, iter);
  		*iovec = NULL;
  		return ret;
  	}
89cd35c58   Christoph Hellwig   iov_iter: transpa...
1401
1402
  
  	return __import_iovec(rw, buf, len, UIO_FASTIOV, iovec, iter, compat);
eed4e51fb   Badari Pulavarty   [PATCH] Add vecto...
1403
  }
9061d14a8   Al Viro   aio: all callers ...
1404
  static inline void aio_rw_done(struct kiocb *req, ssize_t ret)
89319d31d   Christoph Hellwig   fs: remove aio_ru...
1405
1406
1407
  {
  	switch (ret) {
  	case -EIOCBQUEUED:
9061d14a8   Al Viro   aio: all callers ...
1408
  		break;
89319d31d   Christoph Hellwig   fs: remove aio_ru...
1409
1410
1411
1412
1413
1414
1415
1416
1417
  	case -ERESTARTSYS:
  	case -ERESTARTNOINTR:
  	case -ERESTARTNOHAND:
  	case -ERESTART_RESTARTBLOCK:
  		/*
  		 * There's no easy way to restart the syscall since other AIO's
  		 * may be already running. Just fail this IO with EINTR.
  		 */
  		ret = -EINTR;
df561f668   Gustavo A. R. Silva   treewide: Use fal...
1418
  		fallthrough;
89319d31d   Christoph Hellwig   fs: remove aio_ru...
1419
  	default:
bc9bff616   Jens Axboe   aio: use assigned...
1420
  		req->ki_complete(req, ret, 0);
89319d31d   Christoph Hellwig   fs: remove aio_ru...
1421
1422
  	}
  }
958c13ce1   Al Viro   make aio_read()/a...
1423
  static int aio_read(struct kiocb *req, const struct iocb *iocb,
88a6f18b9   Jens Axboe   aio: split out io...
1424
  			bool vectored, bool compat)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1425
  {
00fefb9cf   Gu Zheng   aio: use iovec ar...
1426
  	struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs;
293bc9822   Al Viro   new methods: ->re...
1427
  	struct iov_iter iter;
54843f875   Christoph Hellwig   aio: refactor rea...
1428
  	struct file *file;
958c13ce1   Al Viro   make aio_read()/a...
1429
  	int ret;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1430

54843f875   Christoph Hellwig   aio: refactor rea...
1431
1432
1433
1434
  	ret = aio_prep_rw(req, iocb);
  	if (ret)
  		return ret;
  	file = req->ki_filp;
89319d31d   Christoph Hellwig   fs: remove aio_ru...
1435
  	if (unlikely(!(file->f_mode & FMODE_READ)))
84c4e1f89   Linus Torvalds   aio: simplify - a...
1436
  		return -EBADF;
54843f875   Christoph Hellwig   aio: refactor rea...
1437
  	ret = -EINVAL;
89319d31d   Christoph Hellwig   fs: remove aio_ru...
1438
  	if (unlikely(!file->f_op->read_iter))
84c4e1f89   Linus Torvalds   aio: simplify - a...
1439
  		return -EINVAL;
73a7075e3   Kent Overstreet   aio: Kill aio_rw_...
1440

89319d31d   Christoph Hellwig   fs: remove aio_ru...
1441
  	ret = aio_setup_rw(READ, iocb, &iovec, vectored, compat, &iter);
87e5e6dab   Jens Axboe   uio: make import_...
1442
  	if (ret < 0)
84c4e1f89   Linus Torvalds   aio: simplify - a...
1443
  		return ret;
89319d31d   Christoph Hellwig   fs: remove aio_ru...
1444
1445
  	ret = rw_verify_area(READ, file, &req->ki_pos, iov_iter_count(&iter));
  	if (!ret)
9061d14a8   Al Viro   aio: all callers ...
1446
  		aio_rw_done(req, call_read_iter(file, req, &iter));
89319d31d   Christoph Hellwig   fs: remove aio_ru...
1447
1448
1449
  	kfree(iovec);
  	return ret;
  }
73a7075e3   Kent Overstreet   aio: Kill aio_rw_...
1450

958c13ce1   Al Viro   make aio_read()/a...
1451
  static int aio_write(struct kiocb *req, const struct iocb *iocb,
88a6f18b9   Jens Axboe   aio: split out io...
1452
  			 bool vectored, bool compat)
89319d31d   Christoph Hellwig   fs: remove aio_ru...
1453
  {
89319d31d   Christoph Hellwig   fs: remove aio_ru...
1454
1455
  	struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs;
  	struct iov_iter iter;
54843f875   Christoph Hellwig   aio: refactor rea...
1456
  	struct file *file;
958c13ce1   Al Viro   make aio_read()/a...
1457
  	int ret;
41ef4eb8e   Kent Overstreet   aio: kill ki_retry
1458

54843f875   Christoph Hellwig   aio: refactor rea...
1459
1460
1461
1462
  	ret = aio_prep_rw(req, iocb);
  	if (ret)
  		return ret;
  	file = req->ki_filp;
89319d31d   Christoph Hellwig   fs: remove aio_ru...
1463
  	if (unlikely(!(file->f_mode & FMODE_WRITE)))
84c4e1f89   Linus Torvalds   aio: simplify - a...
1464
  		return -EBADF;
89319d31d   Christoph Hellwig   fs: remove aio_ru...
1465
  	if (unlikely(!file->f_op->write_iter))
84c4e1f89   Linus Torvalds   aio: simplify - a...
1466
  		return -EINVAL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1467

89319d31d   Christoph Hellwig   fs: remove aio_ru...
1468
  	ret = aio_setup_rw(WRITE, iocb, &iovec, vectored, compat, &iter);
87e5e6dab   Jens Axboe   uio: make import_...
1469
  	if (ret < 0)
84c4e1f89   Linus Torvalds   aio: simplify - a...
1470
  		return ret;
89319d31d   Christoph Hellwig   fs: remove aio_ru...
1471
1472
  	ret = rw_verify_area(WRITE, file, &req->ki_pos, iov_iter_count(&iter));
  	if (!ret) {
70fe2f481   Jan Kara   aio: fix freeze p...
1473
  		/*
92ce47285   Christoph Hellwig   aio: remove the e...
1474
  		 * Open-code file_start_write here to grab freeze protection,
54843f875   Christoph Hellwig   aio: refactor rea...
1475
1476
1477
1478
  		 * which will be released by another thread in
  		 * aio_complete_rw().  Fool lockdep by telling it the lock got
  		 * released so that it doesn't complain about the held lock when
  		 * we return to userspace.
70fe2f481   Jan Kara   aio: fix freeze p...
1479
  		 */
92ce47285   Christoph Hellwig   aio: remove the e...
1480
  		if (S_ISREG(file_inode(file)->i_mode)) {
8a3c84b64   Darrick J. Wong   vfs: separate __s...
1481
  			sb_start_write(file_inode(file)->i_sb);
a12f1ae61   Shaohua Li   aio: fix lock dep...
1482
  			__sb_writers_release(file_inode(file)->i_sb, SB_FREEZE_WRITE);
92ce47285   Christoph Hellwig   aio: remove the e...
1483
1484
  		}
  		req->ki_flags |= IOCB_WRITE;
9061d14a8   Al Viro   aio: all callers ...
1485
  		aio_rw_done(req, call_write_iter(file, req, &iter));
41ef4eb8e   Kent Overstreet   aio: kill ki_retry
1486
  	}
89319d31d   Christoph Hellwig   fs: remove aio_ru...
1487
1488
  	kfree(iovec);
  	return ret;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1489
  }
a3c0d439e   Christoph Hellwig   aio: implement IO...
1490
1491
  static void aio_fsync_work(struct work_struct *work)
  {
2bb874c0d   Al Viro   aio: store event ...
1492
  	struct aio_kiocb *iocb = container_of(work, struct aio_kiocb, fsync.work);
530f32fc3   Miklos Szeredi   aio: fix async fs...
1493
  	const struct cred *old_cred = override_creds(iocb->fsync.creds);
a3c0d439e   Christoph Hellwig   aio: implement IO...
1494

2bb874c0d   Al Viro   aio: store event ...
1495
  	iocb->ki_res.res = vfs_fsync(iocb->fsync.file, iocb->fsync.datasync);
530f32fc3   Miklos Szeredi   aio: fix async fs...
1496
1497
  	revert_creds(old_cred);
  	put_cred(iocb->fsync.creds);
2bb874c0d   Al Viro   aio: store event ...
1498
  	iocb_put(iocb);
a3c0d439e   Christoph Hellwig   aio: implement IO...
1499
  }
88a6f18b9   Jens Axboe   aio: split out io...
1500
1501
  static int aio_fsync(struct fsync_iocb *req, const struct iocb *iocb,
  		     bool datasync)
a3c0d439e   Christoph Hellwig   aio: implement IO...
1502
1503
1504
1505
  {
  	if (unlikely(iocb->aio_buf || iocb->aio_offset || iocb->aio_nbytes ||
  			iocb->aio_rw_flags))
  		return -EINVAL;
a11e1d432   Linus Torvalds   Revert changes to...
1506

84c4e1f89   Linus Torvalds   aio: simplify - a...
1507
  	if (unlikely(!req->file->f_op->fsync))
a3c0d439e   Christoph Hellwig   aio: implement IO...
1508
  		return -EINVAL;
a3c0d439e   Christoph Hellwig   aio: implement IO...
1509

530f32fc3   Miklos Szeredi   aio: fix async fs...
1510
1511
1512
  	req->creds = prepare_creds();
  	if (!req->creds)
  		return -ENOMEM;
a3c0d439e   Christoph Hellwig   aio: implement IO...
1513
1514
1515
  	req->datasync = datasync;
  	INIT_WORK(&req->work, aio_fsync_work);
  	schedule_work(&req->work);
9061d14a8   Al Viro   aio: all callers ...
1516
  	return 0;
a3c0d439e   Christoph Hellwig   aio: implement IO...
1517
  }
01d7a3568   Jens Axboe   aio: prevent pote...
1518
1519
1520
1521
1522
1523
1524
  static void aio_poll_put_work(struct work_struct *work)
  {
  	struct poll_iocb *req = container_of(work, struct poll_iocb, work);
  	struct aio_kiocb *iocb = container_of(req, struct aio_kiocb, poll);
  
  	iocb_put(iocb);
  }
bfe4037e7   Christoph Hellwig   aio: implement IO...
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
  static void aio_poll_complete_work(struct work_struct *work)
  {
  	struct poll_iocb *req = container_of(work, struct poll_iocb, work);
  	struct aio_kiocb *iocb = container_of(req, struct aio_kiocb, poll);
  	struct poll_table_struct pt = { ._key = req->events };
  	struct kioctx *ctx = iocb->ki_ctx;
  	__poll_t mask = 0;
  
  	if (!READ_ONCE(req->cancelled))
  		mask = vfs_poll(req->file, &pt) & req->events;
  
  	/*
  	 * Note that ->ki_cancel callers also delete iocb from active_reqs after
  	 * calling ->ki_cancel.  We need the ctx_lock roundtrip here to
  	 * synchronize with them.  In the cancellation case the list_del_init
  	 * itself is not actually needed, but harmless so we keep it in to
  	 * avoid further branches in the fast path.
  	 */
  	spin_lock_irq(&ctx->ctx_lock);
  	if (!mask && !READ_ONCE(req->cancelled)) {
  		add_wait_queue(req->head, &req->wait);
  		spin_unlock_irq(&ctx->ctx_lock);
  		return;
  	}
  	list_del_init(&iocb->ki_list);
af5c72b1f   Al Viro   Fix aio_poll() races
1550
1551
  	iocb->ki_res.res = mangle_poll(mask);
  	req->done = true;
bfe4037e7   Christoph Hellwig   aio: implement IO...
1552
  	spin_unlock_irq(&ctx->ctx_lock);
af5c72b1f   Al Viro   Fix aio_poll() races
1553
  	iocb_put(iocb);
bfe4037e7   Christoph Hellwig   aio: implement IO...
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
  }
  
  /* assumes we are called with irqs disabled */
  static int aio_poll_cancel(struct kiocb *iocb)
  {
  	struct aio_kiocb *aiocb = container_of(iocb, struct aio_kiocb, rw);
  	struct poll_iocb *req = &aiocb->poll;
  
  	spin_lock(&req->head->lock);
  	WRITE_ONCE(req->cancelled, true);
  	if (!list_empty(&req->wait.entry)) {
  		list_del_init(&req->wait.entry);
  		schedule_work(&aiocb->poll.work);
  	}
  	spin_unlock(&req->head->lock);
  
  	return 0;
  }
  
  static int aio_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
  		void *key)
  {
  	struct poll_iocb *req = container_of(wait, struct poll_iocb, wait);
e8693bcfa   Christoph Hellwig   aio: allow direct...
1577
  	struct aio_kiocb *iocb = container_of(req, struct aio_kiocb, poll);
bfe4037e7   Christoph Hellwig   aio: implement IO...
1578
  	__poll_t mask = key_to_poll(key);
d3d6a18d7   Bart Van Assche   aio: Fix locking ...
1579
  	unsigned long flags;
bfe4037e7   Christoph Hellwig   aio: implement IO...
1580

bfe4037e7   Christoph Hellwig   aio: implement IO...
1581
  	/* for instances that support it check for an event match first: */
af5c72b1f   Al Viro   Fix aio_poll() races
1582
1583
  	if (mask && !(mask & req->events))
  		return 0;
e8693bcfa   Christoph Hellwig   aio: allow direct...
1584

af5c72b1f   Al Viro   Fix aio_poll() races
1585
1586
1587
  	list_del_init(&req->wait.entry);
  
  	if (mask && spin_trylock_irqsave(&iocb->ki_ctx->ctx_lock, flags)) {
01d7a3568   Jens Axboe   aio: prevent pote...
1588
  		struct kioctx *ctx = iocb->ki_ctx;
d3d6a18d7   Bart Van Assche   aio: Fix locking ...
1589
1590
1591
1592
1593
1594
  		/*
  		 * Try to complete the iocb inline if we can. Use
  		 * irqsave/irqrestore because not all filesystems (e.g. fuse)
  		 * call this function with IRQs disabled and because IRQs
  		 * have to be disabled before ctx_lock is obtained.
  		 */
af5c72b1f   Al Viro   Fix aio_poll() races
1595
1596
1597
  		list_del(&iocb->ki_list);
  		iocb->ki_res.res = mangle_poll(mask);
  		req->done = true;
01d7a3568   Jens Axboe   aio: prevent pote...
1598
1599
1600
1601
1602
1603
1604
1605
  		if (iocb->ki_eventfd && eventfd_signal_count()) {
  			iocb = NULL;
  			INIT_WORK(&req->work, aio_poll_put_work);
  			schedule_work(&req->work);
  		}
  		spin_unlock_irqrestore(&ctx->ctx_lock, flags);
  		if (iocb)
  			iocb_put(iocb);
af5c72b1f   Al Viro   Fix aio_poll() races
1606
1607
  	} else {
  		schedule_work(&req->work);
e8693bcfa   Christoph Hellwig   aio: allow direct...
1608
  	}
bfe4037e7   Christoph Hellwig   aio: implement IO...
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
  	return 1;
  }
  
  struct aio_poll_table {
  	struct poll_table_struct	pt;
  	struct aio_kiocb		*iocb;
  	int				error;
  };
  
  static void
  aio_poll_queue_proc(struct file *file, struct wait_queue_head *head,
  		struct poll_table_struct *p)
  {
  	struct aio_poll_table *pt = container_of(p, struct aio_poll_table, pt);
  
  	/* multiple wait queues per file are not supported */
  	if (unlikely(pt->iocb->poll.head)) {
  		pt->error = -EINVAL;
  		return;
  	}
  
  	pt->error = 0;
  	pt->iocb->poll.head = head;
  	add_wait_queue(head, &pt->iocb->poll.wait);
  }
958c13ce1   Al Viro   make aio_read()/a...
1634
  static int aio_poll(struct aio_kiocb *aiocb, const struct iocb *iocb)
bfe4037e7   Christoph Hellwig   aio: implement IO...
1635
1636
1637
1638
  {
  	struct kioctx *ctx = aiocb->ki_ctx;
  	struct poll_iocb *req = &aiocb->poll;
  	struct aio_poll_table apt;
af5c72b1f   Al Viro   Fix aio_poll() races
1639
  	bool cancel = false;
bfe4037e7   Christoph Hellwig   aio: implement IO...
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
  	__poll_t mask;
  
  	/* reject any unknown events outside the normal event mask. */
  	if ((u16)iocb->aio_buf != iocb->aio_buf)
  		return -EINVAL;
  	/* reject fields that are not defined for poll */
  	if (iocb->aio_offset || iocb->aio_nbytes || iocb->aio_rw_flags)
  		return -EINVAL;
  
  	INIT_WORK(&req->work, aio_poll_complete_work);
  	req->events = demangle_poll(iocb->aio_buf) | EPOLLERR | EPOLLHUP;
bfe4037e7   Christoph Hellwig   aio: implement IO...
1651

2bc4ca9bb   Jens Axboe   aio: don't zero e...
1652
  	req->head = NULL;
af5c72b1f   Al Viro   Fix aio_poll() races
1653
  	req->done = false;
2bc4ca9bb   Jens Axboe   aio: don't zero e...
1654
  	req->cancelled = false;
bfe4037e7   Christoph Hellwig   aio: implement IO...
1655
1656
1657
1658
1659
1660
1661
1662
  	apt.pt._qproc = aio_poll_queue_proc;
  	apt.pt._key = req->events;
  	apt.iocb = aiocb;
  	apt.error = -EINVAL; /* same as no support for IOCB_CMD_POLL */
  
  	/* initialized the list so that we can do list_empty checks */
  	INIT_LIST_HEAD(&req->wait.entry);
  	init_waitqueue_func_entry(&req->wait, aio_poll_wake);
bfe4037e7   Christoph Hellwig   aio: implement IO...
1663
  	mask = vfs_poll(req->file, &apt.pt) & req->events;
bfe4037e7   Christoph Hellwig   aio: implement IO...
1664
  	spin_lock_irq(&ctx->ctx_lock);
af5c72b1f   Al Viro   Fix aio_poll() races
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
  	if (likely(req->head)) {
  		spin_lock(&req->head->lock);
  		if (unlikely(list_empty(&req->wait.entry))) {
  			if (apt.error)
  				cancel = true;
  			apt.error = 0;
  			mask = 0;
  		}
  		if (mask || apt.error) {
  			list_del_init(&req->wait.entry);
  		} else if (cancel) {
  			WRITE_ONCE(req->cancelled, true);
  		} else if (!req->done) { /* actually waiting for an event */
  			list_add_tail(&aiocb->ki_list, &ctx->active_reqs);
  			aiocb->ki_cancel = aio_poll_cancel;
  		}
  		spin_unlock(&req->head->lock);
  	}
  	if (mask) { /* no async, we'd stolen it */
  		aiocb->ki_res.res = mangle_poll(mask);
bfe4037e7   Christoph Hellwig   aio: implement IO...
1685
  		apt.error = 0;
bfe4037e7   Christoph Hellwig   aio: implement IO...
1686
  	}
bfe4037e7   Christoph Hellwig   aio: implement IO...
1687
  	spin_unlock_irq(&ctx->ctx_lock);
bfe4037e7   Christoph Hellwig   aio: implement IO...
1688
  	if (mask)
af5c72b1f   Al Viro   Fix aio_poll() races
1689
1690
  		iocb_put(aiocb);
  	return apt.error;
bfe4037e7   Christoph Hellwig   aio: implement IO...
1691
  }
88a6f18b9   Jens Axboe   aio: split out io...
1692
  static int __io_submit_one(struct kioctx *ctx, const struct iocb *iocb,
7316b49c2   Al Viro   aio: move sanity ...
1693
1694
  			   struct iocb __user *user_iocb, struct aio_kiocb *req,
  			   bool compat)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1695
  {
84c4e1f89   Linus Torvalds   aio: simplify - a...
1696
  	req->ki_filp = fget(iocb->aio_fildes);
84c4e1f89   Linus Torvalds   aio: simplify - a...
1697
  	if (unlikely(!req->ki_filp))
7316b49c2   Al Viro   aio: move sanity ...
1698
  		return -EBADF;
84c4e1f89   Linus Torvalds   aio: simplify - a...
1699

88a6f18b9   Jens Axboe   aio: split out io...
1700
  	if (iocb->aio_flags & IOCB_FLAG_RESFD) {
742597034   Al Viro   aio: move droppin...
1701
  		struct eventfd_ctx *eventfd;
9c3060bed   Davide Libenzi   signal/timer/even...
1702
1703
1704
1705
1706
1707
  		/*
  		 * If the IOCB_FLAG_RESFD flag of aio_flags is set, get an
  		 * instance of the file* now. The file descriptor must be
  		 * an eventfd() fd, and will be signaled for each completed
  		 * event using the eventfd_signal() function.
  		 */
742597034   Al Viro   aio: move droppin...
1708
  		eventfd = eventfd_ctx_fdget(iocb->aio_resfd);
7316b49c2   Al Viro   aio: move sanity ...
1709
  		if (IS_ERR(eventfd))
18bfb9c6a   Dan Carpenter   aio: Fix an error...
1710
  			return PTR_ERR(eventfd);
7316b49c2   Al Viro   aio: move sanity ...
1711

742597034   Al Viro   aio: move droppin...
1712
  		req->ki_eventfd = eventfd;
9830f4be1   Goldwyn Rodrigues   fs: Use RWF_* fla...
1713
  	}
7316b49c2   Al Viro   aio: move sanity ...
1714
  	if (unlikely(put_user(KIOCB_KEY, &user_iocb->aio_key))) {
caf4167aa   Kent Overstreet   aio: dprintk() ->...
1715
1716
  		pr_debug("EFAULT: aio_key
  ");
7316b49c2   Al Viro   aio: move sanity ...
1717
  		return -EFAULT;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1718
  	}
a9339b785   Al Viro   aio: keep io_even...
1719
1720
1721
1722
  	req->ki_res.obj = (u64)(unsigned long)user_iocb;
  	req->ki_res.data = iocb->aio_data;
  	req->ki_res.res = 0;
  	req->ki_res.res2 = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1723

88a6f18b9   Jens Axboe   aio: split out io...
1724
  	switch (iocb->aio_lio_opcode) {
89319d31d   Christoph Hellwig   fs: remove aio_ru...
1725
  	case IOCB_CMD_PREAD:
7316b49c2   Al Viro   aio: move sanity ...
1726
  		return aio_read(&req->rw, iocb, false, compat);
89319d31d   Christoph Hellwig   fs: remove aio_ru...
1727
  	case IOCB_CMD_PWRITE:
7316b49c2   Al Viro   aio: move sanity ...
1728
  		return aio_write(&req->rw, iocb, false, compat);
89319d31d   Christoph Hellwig   fs: remove aio_ru...
1729
  	case IOCB_CMD_PREADV:
7316b49c2   Al Viro   aio: move sanity ...
1730
  		return aio_read(&req->rw, iocb, true, compat);
89319d31d   Christoph Hellwig   fs: remove aio_ru...
1731
  	case IOCB_CMD_PWRITEV:
7316b49c2   Al Viro   aio: move sanity ...
1732
  		return aio_write(&req->rw, iocb, true, compat);
a3c0d439e   Christoph Hellwig   aio: implement IO...
1733
  	case IOCB_CMD_FSYNC:
7316b49c2   Al Viro   aio: move sanity ...
1734
  		return aio_fsync(&req->fsync, iocb, false);
a3c0d439e   Christoph Hellwig   aio: implement IO...
1735
  	case IOCB_CMD_FDSYNC:
7316b49c2   Al Viro   aio: move sanity ...
1736
  		return aio_fsync(&req->fsync, iocb, true);
bfe4037e7   Christoph Hellwig   aio: implement IO...
1737
  	case IOCB_CMD_POLL:
7316b49c2   Al Viro   aio: move sanity ...
1738
  		return aio_poll(req, iocb);
89319d31d   Christoph Hellwig   fs: remove aio_ru...
1739
  	default:
88a6f18b9   Jens Axboe   aio: split out io...
1740
1741
  		pr_debug("invalid aio operation %d
  ", iocb->aio_lio_opcode);
7316b49c2   Al Viro   aio: move sanity ...
1742
  		return -EINVAL;
89319d31d   Christoph Hellwig   fs: remove aio_ru...
1743
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1744
  }
88a6f18b9   Jens Axboe   aio: split out io...
1745
1746
1747
  static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
  			 bool compat)
  {
7316b49c2   Al Viro   aio: move sanity ...
1748
  	struct aio_kiocb *req;
88a6f18b9   Jens Axboe   aio: split out io...
1749
  	struct iocb iocb;
7316b49c2   Al Viro   aio: move sanity ...
1750
  	int err;
88a6f18b9   Jens Axboe   aio: split out io...
1751
1752
1753
  
  	if (unlikely(copy_from_user(&iocb, user_iocb, sizeof(iocb))))
  		return -EFAULT;
7316b49c2   Al Viro   aio: move sanity ...
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
  	/* enforce forwards compatibility on users */
  	if (unlikely(iocb.aio_reserved2)) {
  		pr_debug("EINVAL: reserve field set
  ");
  		return -EINVAL;
  	}
  
  	/* prevent overflows */
  	if (unlikely(
  	    (iocb.aio_buf != (unsigned long)iocb.aio_buf) ||
  	    (iocb.aio_nbytes != (size_t)iocb.aio_nbytes) ||
  	    ((ssize_t)iocb.aio_nbytes < 0)
  	   )) {
  		pr_debug("EINVAL: overflow check
  ");
  		return -EINVAL;
  	}
  
  	req = aio_get_req(ctx);
  	if (unlikely(!req))
  		return -EAGAIN;
  
  	err = __io_submit_one(ctx, &iocb, user_iocb, req, compat);
  
  	/* Done with the synchronous reference */
  	iocb_put(req);
  
  	/*
  	 * If err is 0, we'd either done aio_complete() ourselves or have
  	 * arranged for that to be done asynchronously.  Anything non-zero
  	 * means that we need to destroy req ourselves.
  	 */
  	if (unlikely(err)) {
  		iocb_destroy(req);
  		put_reqs_available(ctx, 1);
  	}
  	return err;
88a6f18b9   Jens Axboe   aio: split out io...
1791
  }
67ba049f9   Al Viro   aio: fold do_io_s...
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
  /* sys_io_submit:
   *	Queue the nr iocbs pointed to by iocbpp for processing.  Returns
   *	the number of iocbs queued.  May return -EINVAL if the aio_context
   *	specified by ctx_id is invalid, if nr is < 0, if the iocb at
   *	*iocbpp[0] is not properly initialized, if the operation specified
   *	is invalid for the file descriptor in the iocb.  May fail with
   *	-EFAULT if any of the data structures point to invalid data.  May
   *	fail with -EBADF if the file descriptor specified in the first
   *	iocb is invalid.  May fail with -EAGAIN if insufficient resources
   *	are available to queue any iocbs.  Will return 0 if nr is 0.  Will
   *	fail with -ENOSYS if not implemented.
   */
  SYSCALL_DEFINE3(io_submit, aio_context_t, ctx_id, long, nr,
  		struct iocb __user * __user *, iocbpp)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1806
1807
1808
  {
  	struct kioctx *ctx;
  	long ret = 0;
080d676de   Jeff Moyer   aio: allocate kio...
1809
  	int i = 0;
9f5b94254   Shaohua Li   fs: make aio plug
1810
  	struct blk_plug plug;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1811
1812
1813
  
  	if (unlikely(nr < 0))
  		return -EINVAL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1814
1815
  	ctx = lookup_ioctx(ctx_id);
  	if (unlikely(!ctx)) {
caf4167aa   Kent Overstreet   aio: dprintk() ->...
1816
1817
  		pr_debug("EINVAL: invalid context id
  ");
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1818
1819
  		return -EINVAL;
  	}
1da92779e   Al Viro   aio: sanitize the...
1820
1821
  	if (nr > ctx->nr_events)
  		nr = ctx->nr_events;
a79d40e9b   Jens Axboe   aio: only use blk...
1822
1823
  	if (nr > AIO_PLUG_THRESHOLD)
  		blk_start_plug(&plug);
67ba049f9   Al Viro   aio: fold do_io_s...
1824
  	for (i = 0; i < nr; i++) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1825
  		struct iocb __user *user_iocb;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1826

67ba049f9   Al Viro   aio: fold do_io_s...
1827
  		if (unlikely(get_user(user_iocb, iocbpp + i))) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1828
1829
1830
  			ret = -EFAULT;
  			break;
  		}
67ba049f9   Al Viro   aio: fold do_io_s...
1831
  		ret = io_submit_one(ctx, user_iocb, false);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1832
1833
1834
  		if (ret)
  			break;
  	}
a79d40e9b   Jens Axboe   aio: only use blk...
1835
1836
  	if (nr > AIO_PLUG_THRESHOLD)
  		blk_finish_plug(&plug);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1837

723be6e39   Kent Overstreet   aio: percpu ioctx...
1838
  	percpu_ref_put(&ctx->users);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1839
1840
  	return i ? i : ret;
  }
c00d2c7e8   Al Viro   move aio compat t...
1841
  #ifdef CONFIG_COMPAT
c00d2c7e8   Al Viro   move aio compat t...
1842
  COMPAT_SYSCALL_DEFINE3(io_submit, compat_aio_context_t, ctx_id,
67ba049f9   Al Viro   aio: fold do_io_s...
1843
  		       int, nr, compat_uptr_t __user *, iocbpp)
c00d2c7e8   Al Viro   move aio compat t...
1844
  {
67ba049f9   Al Viro   aio: fold do_io_s...
1845
1846
1847
1848
  	struct kioctx *ctx;
  	long ret = 0;
  	int i = 0;
  	struct blk_plug plug;
c00d2c7e8   Al Viro   move aio compat t...
1849
1850
1851
  
  	if (unlikely(nr < 0))
  		return -EINVAL;
67ba049f9   Al Viro   aio: fold do_io_s...
1852
1853
1854
1855
1856
1857
  	ctx = lookup_ioctx(ctx_id);
  	if (unlikely(!ctx)) {
  		pr_debug("EINVAL: invalid context id
  ");
  		return -EINVAL;
  	}
1da92779e   Al Viro   aio: sanitize the...
1858
1859
  	if (nr > ctx->nr_events)
  		nr = ctx->nr_events;
a79d40e9b   Jens Axboe   aio: only use blk...
1860
1861
  	if (nr > AIO_PLUG_THRESHOLD)
  		blk_start_plug(&plug);
67ba049f9   Al Viro   aio: fold do_io_s...
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
  	for (i = 0; i < nr; i++) {
  		compat_uptr_t user_iocb;
  
  		if (unlikely(get_user(user_iocb, iocbpp + i))) {
  			ret = -EFAULT;
  			break;
  		}
  
  		ret = io_submit_one(ctx, compat_ptr(user_iocb), true);
  		if (ret)
  			break;
  	}
a79d40e9b   Jens Axboe   aio: only use blk...
1874
1875
  	if (nr > AIO_PLUG_THRESHOLD)
  		blk_finish_plug(&plug);
67ba049f9   Al Viro   aio: fold do_io_s...
1876
1877
1878
  
  	percpu_ref_put(&ctx->users);
  	return i ? i : ret;
c00d2c7e8   Al Viro   move aio compat t...
1879
1880
  }
  #endif
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
  /* sys_io_cancel:
   *	Attempts to cancel an iocb previously passed to io_submit.  If
   *	the operation is successfully cancelled, the resulting event is
   *	copied into the memory pointed to by result without being placed
   *	into the completion queue and 0 is returned.  May fail with
   *	-EFAULT if any of the data structures pointed to are invalid.
   *	May fail with -EINVAL if aio_context specified by ctx_id is
   *	invalid.  May fail with -EAGAIN if the iocb specified was not
   *	cancelled.  Will fail with -ENOSYS if not implemented.
   */
002c8976e   Heiko Carstens   [CVE-2009-0029] S...
1891
1892
  SYSCALL_DEFINE3(io_cancel, aio_context_t, ctx_id, struct iocb __user *, iocb,
  		struct io_event __user *, result)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1893
  {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1894
  	struct kioctx *ctx;
04b2fa9f8   Christoph Hellwig   fs: split generic...
1895
  	struct aio_kiocb *kiocb;
888933f8f   Christoph Hellwig   aio: simplify can...
1896
  	int ret = -EINVAL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1897
  	u32 key;
a9339b785   Al Viro   aio: keep io_even...
1898
  	u64 obj = (u64)(unsigned long)iocb;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1899

f3a2752a4   Christoph Hellwig   aio: simplify KIO...
1900
  	if (unlikely(get_user(key, &iocb->aio_key)))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1901
  		return -EFAULT;
f3a2752a4   Christoph Hellwig   aio: simplify KIO...
1902
1903
  	if (unlikely(key != KIOCB_KEY))
  		return -EINVAL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1904
1905
1906
1907
1908
1909
  
  	ctx = lookup_ioctx(ctx_id);
  	if (unlikely(!ctx))
  		return -EINVAL;
  
  	spin_lock_irq(&ctx->ctx_lock);
833f4154e   Al Viro   aio: fold lookup_...
1910
1911
  	/* TODO: use a hash or array, this sucks. */
  	list_for_each_entry(kiocb, &ctx->active_reqs, ki_list) {
a9339b785   Al Viro   aio: keep io_even...
1912
  		if (kiocb->ki_res.obj == obj) {
833f4154e   Al Viro   aio: fold lookup_...
1913
1914
1915
1916
  			ret = kiocb->ki_cancel(&kiocb->rw);
  			list_del_init(&kiocb->ki_list);
  			break;
  		}
888933f8f   Christoph Hellwig   aio: simplify can...
1917
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1918
  	spin_unlock_irq(&ctx->ctx_lock);
906b973cf   Kent Overstreet   aio: add kiocb_ca...
1919
  	if (!ret) {
bec68faaf   Kent Overstreet   aio: io_cancel() ...
1920
1921
1922
1923
  		/*
  		 * The result argument is no longer used - the io_event is
  		 * always delivered via the ring buffer. -EINPROGRESS indicates
  		 * cancellation is progress:
906b973cf   Kent Overstreet   aio: add kiocb_ca...
1924
  		 */
bec68faaf   Kent Overstreet   aio: io_cancel() ...
1925
  		ret = -EINPROGRESS;
906b973cf   Kent Overstreet   aio: add kiocb_ca...
1926
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1927

723be6e39   Kent Overstreet   aio: percpu ioctx...
1928
  	percpu_ref_put(&ctx->users);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1929
1930
1931
  
  	return ret;
  }
fa2e62a54   Deepa Dinamani   io_getevents: Use...
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
  static long do_io_getevents(aio_context_t ctx_id,
  		long min_nr,
  		long nr,
  		struct io_event __user *events,
  		struct timespec64 *ts)
  {
  	ktime_t until = ts ? timespec64_to_ktime(*ts) : KTIME_MAX;
  	struct kioctx *ioctx = lookup_ioctx(ctx_id);
  	long ret = -EINVAL;
  
  	if (likely(ioctx)) {
  		if (likely(min_nr <= nr && min_nr >= 0))
  			ret = read_events(ioctx, min_nr, nr, events, until);
  		percpu_ref_put(&ioctx->users);
  	}
  
  	return ret;
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1950
1951
  /* io_getevents:
   *	Attempts to read at least min_nr events and up to nr events from
642b5123a   Satoru Takeuchi   aio: fix wrong su...
1952
1953
1954
1955
1956
1957
1958
1959
   *	the completion queue for the aio_context specified by ctx_id. If
   *	it succeeds, the number of read events is returned. May fail with
   *	-EINVAL if ctx_id is invalid, if min_nr is out of range, if nr is
   *	out of range, if timeout is out of range.  May fail with -EFAULT
   *	if any of the memory specified is invalid.  May return 0 or
   *	< min_nr if the timeout specified by timeout has elapsed
   *	before sufficient events are available, where timeout == NULL
   *	specifies an infinite timeout. Note that the timeout pointed to by
6900807c6   Jeff Moyer   aio: fix io_getev...
1960
   *	timeout is relative.  Will fail with -ENOSYS if not implemented.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1961
   */
3ca47e958   Arnd Bergmann   y2038: remove CON...
1962
  #ifdef CONFIG_64BIT
7a35397f8   Deepa Dinamani   io_pgetevents: us...
1963

002c8976e   Heiko Carstens   [CVE-2009-0029] S...
1964
1965
1966
1967
  SYSCALL_DEFINE5(io_getevents, aio_context_t, ctx_id,
  		long, min_nr,
  		long, nr,
  		struct io_event __user *, events,
7a35397f8   Deepa Dinamani   io_pgetevents: us...
1968
  		struct __kernel_timespec __user *, timeout)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1969
  {
fa2e62a54   Deepa Dinamani   io_getevents: Use...
1970
  	struct timespec64	ts;
7a074e96d   Christoph Hellwig   aio: implement io...
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
  	int			ret;
  
  	if (timeout && unlikely(get_timespec64(&ts, timeout)))
  		return -EFAULT;
  
  	ret = do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &ts : NULL);
  	if (!ret && signal_pending(current))
  		ret = -EINTR;
  	return ret;
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1981

7a35397f8   Deepa Dinamani   io_pgetevents: us...
1982
  #endif
9ba546c01   Christoph Hellwig   aio: don't expose...
1983
1984
1985
1986
  struct __aio_sigset {
  	const sigset_t __user	*sigmask;
  	size_t		sigsetsize;
  };
7a074e96d   Christoph Hellwig   aio: implement io...
1987
1988
1989
1990
1991
  SYSCALL_DEFINE6(io_pgetevents,
  		aio_context_t, ctx_id,
  		long, min_nr,
  		long, nr,
  		struct io_event __user *, events,
7a35397f8   Deepa Dinamani   io_pgetevents: us...
1992
  		struct __kernel_timespec __user *, timeout,
7a074e96d   Christoph Hellwig   aio: implement io...
1993
1994
1995
  		const struct __aio_sigset __user *, usig)
  {
  	struct __aio_sigset	ksig = { NULL, };
7a074e96d   Christoph Hellwig   aio: implement io...
1996
  	struct timespec64	ts;
97abc889e   Oleg Nesterov   signal: remove th...
1997
  	bool interrupted;
7a074e96d   Christoph Hellwig   aio: implement io...
1998
1999
2000
2001
2002
2003
2004
  	int ret;
  
  	if (timeout && unlikely(get_timespec64(&ts, timeout)))
  		return -EFAULT;
  
  	if (usig && copy_from_user(&ksig, usig, sizeof(ksig)))
  		return -EFAULT;
b772434be   Oleg Nesterov   signal: simplify ...
2005
  	ret = set_user_sigmask(ksig.sigmask, ksig.sigsetsize);
7a35397f8   Deepa Dinamani   io_pgetevents: us...
2006
2007
  	if (ret)
  		return ret;
7a074e96d   Christoph Hellwig   aio: implement io...
2008
2009
  
  	ret = do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &ts : NULL);
97abc889e   Oleg Nesterov   signal: remove th...
2010
2011
  
  	interrupted = signal_pending(current);
b772434be   Oleg Nesterov   signal: simplify ...
2012
  	restore_saved_sigmask_unless(interrupted);
97abc889e   Oleg Nesterov   signal: remove th...
2013
  	if (interrupted && !ret)
7a35397f8   Deepa Dinamani   io_pgetevents: us...
2014
  		ret = -ERESTARTNOHAND;
7a074e96d   Christoph Hellwig   aio: implement io...
2015

7a35397f8   Deepa Dinamani   io_pgetevents: us...
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
  	return ret;
  }
  
  #if defined(CONFIG_COMPAT_32BIT_TIME) && !defined(CONFIG_64BIT)
  
  SYSCALL_DEFINE6(io_pgetevents_time32,
  		aio_context_t, ctx_id,
  		long, min_nr,
  		long, nr,
  		struct io_event __user *, events,
  		struct old_timespec32 __user *, timeout,
  		const struct __aio_sigset __user *, usig)
  {
  	struct __aio_sigset	ksig = { NULL, };
7a35397f8   Deepa Dinamani   io_pgetevents: us...
2030
  	struct timespec64	ts;
97abc889e   Oleg Nesterov   signal: remove th...
2031
  	bool interrupted;
7a35397f8   Deepa Dinamani   io_pgetevents: us...
2032
2033
2034
2035
2036
2037
2038
  	int ret;
  
  	if (timeout && unlikely(get_old_timespec32(&ts, timeout)))
  		return -EFAULT;
  
  	if (usig && copy_from_user(&ksig, usig, sizeof(ksig)))
  		return -EFAULT;
ded653ccb   Deepa Dinamani   signal: Add set_u...
2039

b772434be   Oleg Nesterov   signal: simplify ...
2040
  	ret = set_user_sigmask(ksig.sigmask, ksig.sigsetsize);
ded653ccb   Deepa Dinamani   signal: Add set_u...
2041
2042
  	if (ret)
  		return ret;
7a074e96d   Christoph Hellwig   aio: implement io...
2043
2044
  
  	ret = do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &ts : NULL);
97abc889e   Oleg Nesterov   signal: remove th...
2045
2046
  
  	interrupted = signal_pending(current);
b772434be   Oleg Nesterov   signal: simplify ...
2047
  	restore_saved_sigmask_unless(interrupted);
97abc889e   Oleg Nesterov   signal: remove th...
2048
  	if (interrupted && !ret)
854a6ed56   Deepa Dinamani   signal: Add resto...
2049
  		ret = -ERESTARTNOHAND;
fa2e62a54   Deepa Dinamani   io_getevents: Use...
2050

7a074e96d   Christoph Hellwig   aio: implement io...
2051
  	return ret;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2052
  }
c00d2c7e8   Al Viro   move aio compat t...
2053

7a35397f8   Deepa Dinamani   io_pgetevents: us...
2054
2055
2056
  #endif
  
  #if defined(CONFIG_COMPAT_32BIT_TIME)
8dabe7245   Arnd Bergmann   y2038: syscalls: ...
2057
2058
2059
2060
2061
  SYSCALL_DEFINE5(io_getevents_time32, __u32, ctx_id,
  		__s32, min_nr,
  		__s32, nr,
  		struct io_event __user *, events,
  		struct old_timespec32 __user *, timeout)
c00d2c7e8   Al Viro   move aio compat t...
2062
  {
fa2e62a54   Deepa Dinamani   io_getevents: Use...
2063
  	struct timespec64 t;
7a074e96d   Christoph Hellwig   aio: implement io...
2064
  	int ret;
9afc5eee6   Arnd Bergmann   y2038: globally r...
2065
  	if (timeout && get_old_timespec32(&t, timeout))
7a074e96d   Christoph Hellwig   aio: implement io...
2066
2067
2068
2069
2070
2071
2072
  		return -EFAULT;
  
  	ret = do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &t : NULL);
  	if (!ret && signal_pending(current))
  		ret = -EINTR;
  	return ret;
  }
7a35397f8   Deepa Dinamani   io_pgetevents: us...
2073
2074
2075
  #endif
  
  #ifdef CONFIG_COMPAT
c00d2c7e8   Al Viro   move aio compat t...
2076

7a074e96d   Christoph Hellwig   aio: implement io...
2077
  struct __compat_aio_sigset {
97eba80fc   Guillem Jover   aio: Fix io_pgete...
2078
  	compat_uptr_t		sigmask;
7a074e96d   Christoph Hellwig   aio: implement io...
2079
2080
  	compat_size_t		sigsetsize;
  };
7a35397f8   Deepa Dinamani   io_pgetevents: us...
2081
  #if defined(CONFIG_COMPAT_32BIT_TIME)
7a074e96d   Christoph Hellwig   aio: implement io...
2082
2083
2084
2085
2086
  COMPAT_SYSCALL_DEFINE6(io_pgetevents,
  		compat_aio_context_t, ctx_id,
  		compat_long_t, min_nr,
  		compat_long_t, nr,
  		struct io_event __user *, events,
9afc5eee6   Arnd Bergmann   y2038: globally r...
2087
  		struct old_timespec32 __user *, timeout,
7a074e96d   Christoph Hellwig   aio: implement io...
2088
2089
  		const struct __compat_aio_sigset __user *, usig)
  {
97eba80fc   Guillem Jover   aio: Fix io_pgete...
2090
  	struct __compat_aio_sigset ksig = { 0, };
7a074e96d   Christoph Hellwig   aio: implement io...
2091
  	struct timespec64 t;
97abc889e   Oleg Nesterov   signal: remove th...
2092
  	bool interrupted;
7a074e96d   Christoph Hellwig   aio: implement io...
2093
  	int ret;
9afc5eee6   Arnd Bergmann   y2038: globally r...
2094
  	if (timeout && get_old_timespec32(&t, timeout))
7a074e96d   Christoph Hellwig   aio: implement io...
2095
2096
2097
2098
  		return -EFAULT;
  
  	if (usig && copy_from_user(&ksig, usig, sizeof(ksig)))
  		return -EFAULT;
97eba80fc   Guillem Jover   aio: Fix io_pgete...
2099
  	ret = set_compat_user_sigmask(compat_ptr(ksig.sigmask), ksig.sigsetsize);
ded653ccb   Deepa Dinamani   signal: Add set_u...
2100
2101
  	if (ret)
  		return ret;
c00d2c7e8   Al Viro   move aio compat t...
2102

7a074e96d   Christoph Hellwig   aio: implement io...
2103
  	ret = do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &t : NULL);
97abc889e   Oleg Nesterov   signal: remove th...
2104
2105
  
  	interrupted = signal_pending(current);
b772434be   Oleg Nesterov   signal: simplify ...
2106
  	restore_saved_sigmask_unless(interrupted);
97abc889e   Oleg Nesterov   signal: remove th...
2107
  	if (interrupted && !ret)
854a6ed56   Deepa Dinamani   signal: Add resto...
2108
  		ret = -ERESTARTNOHAND;
fa2e62a54   Deepa Dinamani   io_getevents: Use...
2109

7a074e96d   Christoph Hellwig   aio: implement io...
2110
  	return ret;
c00d2c7e8   Al Viro   move aio compat t...
2111
  }
7a35397f8   Deepa Dinamani   io_pgetevents: us...
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
  
  #endif
  
  COMPAT_SYSCALL_DEFINE6(io_pgetevents_time64,
  		compat_aio_context_t, ctx_id,
  		compat_long_t, min_nr,
  		compat_long_t, nr,
  		struct io_event __user *, events,
  		struct __kernel_timespec __user *, timeout,
  		const struct __compat_aio_sigset __user *, usig)
  {
97eba80fc   Guillem Jover   aio: Fix io_pgete...
2123
  	struct __compat_aio_sigset ksig = { 0, };
7a35397f8   Deepa Dinamani   io_pgetevents: us...
2124
  	struct timespec64 t;
97abc889e   Oleg Nesterov   signal: remove th...
2125
  	bool interrupted;
7a35397f8   Deepa Dinamani   io_pgetevents: us...
2126
2127
2128
2129
2130
2131
2132
  	int ret;
  
  	if (timeout && get_timespec64(&t, timeout))
  		return -EFAULT;
  
  	if (usig && copy_from_user(&ksig, usig, sizeof(ksig)))
  		return -EFAULT;
97eba80fc   Guillem Jover   aio: Fix io_pgete...
2133
  	ret = set_compat_user_sigmask(compat_ptr(ksig.sigmask), ksig.sigsetsize);
7a35397f8   Deepa Dinamani   io_pgetevents: us...
2134
2135
2136
2137
  	if (ret)
  		return ret;
  
  	ret = do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &t : NULL);
97abc889e   Oleg Nesterov   signal: remove th...
2138
2139
  
  	interrupted = signal_pending(current);
b772434be   Oleg Nesterov   signal: simplify ...
2140
  	restore_saved_sigmask_unless(interrupted);
97abc889e   Oleg Nesterov   signal: remove th...
2141
  	if (interrupted && !ret)
7a35397f8   Deepa Dinamani   io_pgetevents: us...
2142
  		ret = -ERESTARTNOHAND;
fa2e62a54   Deepa Dinamani   io_getevents: Use...
2143

7a074e96d   Christoph Hellwig   aio: implement io...
2144
  	return ret;
c00d2c7e8   Al Viro   move aio compat t...
2145
2146
  }
  #endif