Blame view

fs/aio.c 56.6 KB
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1
2
3
4
5
6
7
  /*
   *	An async IO implementation for Linux
   *	Written by Benjamin LaHaise <bcrl@kvack.org>
   *
   *	Implements an efficient asynchronous io interface.
   *
   *	Copyright 2000, 2001, 2002 Red Hat, Inc.  All Rights Reserved.
bfe4037e7   Christoph Hellwig   aio: implement IO...
8
   *	Copyright 2018 Christoph Hellwig.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
9
10
11
   *
   *	See ../COPYING for licensing terms.
   */
caf4167aa   Kent Overstreet   aio: dprintk() ->...
12
  #define pr_fmt(fmt) "%s: " fmt, __func__
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
13
14
15
16
17
  #include <linux/kernel.h>
  #include <linux/init.h>
  #include <linux/errno.h>
  #include <linux/time.h>
  #include <linux/aio_abi.h>
630d9c472   Paul Gortmaker   fs: reduce the us...
18
  #include <linux/export.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
19
  #include <linux/syscalls.h>
b9d128f10   Jens Axboe   block: move bdi/a...
20
  #include <linux/backing-dev.h>
9018ccc45   Christoph Hellwig   aio: add a iocb r...
21
  #include <linux/refcount.h>
027445c37   Badari Pulavarty   [PATCH] Vectorize...
22
  #include <linux/uio.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
23

174cd4b1e   Ingo Molnar   sched/headers: Pr...
24
  #include <linux/sched/signal.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
25
26
27
28
  #include <linux/fs.h>
  #include <linux/file.h>
  #include <linux/mm.h>
  #include <linux/mman.h>
3d2d827f5   Michael S. Tsirkin   mm: move use_mm/u...
29
  #include <linux/mmu_context.h>
e1bdd5f27   Kent Overstreet   aio: percpu reqs_...
30
  #include <linux/percpu.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
31
32
33
34
35
36
  #include <linux/slab.h>
  #include <linux/timer.h>
  #include <linux/aio.h>
  #include <linux/highmem.h>
  #include <linux/workqueue.h>
  #include <linux/security.h>
9c3060bed   Davide Libenzi   signal/timer/even...
37
  #include <linux/eventfd.h>
cfb1e33ee   Jeff Moyer   aio: implement re...
38
  #include <linux/blkdev.h>
9d85cba71   Jeff Moyer   aio: fix the comp...
39
  #include <linux/compat.h>
36bc08cc0   Gu Zheng   fs/aio: Add suppo...
40
41
  #include <linux/migrate.h>
  #include <linux/ramfs.h>
723be6e39   Kent Overstreet   aio: percpu ioctx...
42
  #include <linux/percpu-refcount.h>
71ad7490c   Benjamin LaHaise   rework aio migrat...
43
  #include <linux/mount.h>
52db59df1   David Howells   vfs: Convert aio ...
44
  #include <linux/pseudo_fs.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
45
46
  
  #include <asm/kmap_types.h>
7c0f6ba68   Linus Torvalds   Replace <asm/uacc...
47
  #include <linux/uaccess.h>
a538e3ff9   Jeff Moyer   aio: fix spectre ...
48
  #include <linux/nospec.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
49

68d70d03f   Al Viro   constify rw_verif...
50
  #include "internal.h"
f3a2752a4   Christoph Hellwig   aio: simplify KIO...
51
  #define KIOCB_KEY		0
4e179bca6   Kent Overstreet   aio: move private...
52
53
54
55
56
57
  #define AIO_RING_MAGIC			0xa10a10a1
  #define AIO_RING_COMPAT_FEATURES	1
  #define AIO_RING_INCOMPAT_FEATURES	0
  struct aio_ring {
  	unsigned	id;	/* kernel internal index number */
  	unsigned	nr;	/* number of io_events */
fa8a53c39   Benjamin LaHaise   aio: v4 ensure ac...
58
59
  	unsigned	head;	/* Written to by userland or under ring_lock
  				 * mutex by aio_read_events_ring(). */
4e179bca6   Kent Overstreet   aio: move private...
60
61
62
63
64
65
66
67
68
69
  	unsigned	tail;
  
  	unsigned	magic;
  	unsigned	compat_features;
  	unsigned	incompat_features;
  	unsigned	header_length;	/* size of aio_ring */
  
  
  	struct io_event		io_events[0];
  }; /* 128 bytes + ring size */
a79d40e9b   Jens Axboe   aio: only use blk...
70
71
72
73
74
  /*
   * Plugging is meant to work with larger batches of IOs. If we don't
   * have more than the below, then don't bother setting up a plug.
   */
  #define AIO_PLUG_THRESHOLD	2
4e179bca6   Kent Overstreet   aio: move private...
75
  #define AIO_RING_PAGES	8
4e179bca6   Kent Overstreet   aio: move private...
76

db446a08c   Benjamin LaHaise   aio: convert the ...
77
  struct kioctx_table {
d0264c01e   Tejun Heo   fs/aio: Use RCU a...
78
79
80
  	struct rcu_head		rcu;
  	unsigned		nr;
  	struct kioctx __rcu	*table[];
db446a08c   Benjamin LaHaise   aio: convert the ...
81
  };
e1bdd5f27   Kent Overstreet   aio: percpu reqs_...
82
83
84
  struct kioctx_cpu {
  	unsigned		reqs_available;
  };
dc48e56d7   Jens Axboe   aio: fix serial d...
85
86
87
88
  struct ctx_rq_wait {
  	struct completion comp;
  	atomic_t count;
  };
4e179bca6   Kent Overstreet   aio: move private...
89
  struct kioctx {
723be6e39   Kent Overstreet   aio: percpu ioctx...
90
  	struct percpu_ref	users;
36f558890   Kent Overstreet   aio: refcounting ...
91
  	atomic_t		dead;
4e179bca6   Kent Overstreet   aio: move private...
92

e34ecee2a   Kent Overstreet   aio: Fix a trinit...
93
  	struct percpu_ref	reqs;
4e179bca6   Kent Overstreet   aio: move private...
94
  	unsigned long		user_id;
4e179bca6   Kent Overstreet   aio: move private...
95

e1bdd5f27   Kent Overstreet   aio: percpu reqs_...
96
97
98
99
100
101
102
  	struct __percpu kioctx_cpu *cpu;
  
  	/*
  	 * For percpu reqs_available, number of slots we move to/from global
  	 * counter at a time:
  	 */
  	unsigned		req_batch;
3e845ce01   Kent Overstreet   aio: change reqs_...
103
104
105
106
  	/*
  	 * This is what userspace passed to io_setup(), it's not used for
  	 * anything but counting against the global max_reqs quota.
  	 *
58c85dc20   Kent Overstreet   aio: kill struct ...
107
  	 * The real limit is nr_events - 1, which will be larger (see
3e845ce01   Kent Overstreet   aio: change reqs_...
108
109
  	 * aio_setup_ring())
  	 */
4e179bca6   Kent Overstreet   aio: move private...
110
  	unsigned		max_reqs;
58c85dc20   Kent Overstreet   aio: kill struct ...
111
112
  	/* Size of ringbuffer, in units of struct io_event */
  	unsigned		nr_events;
4e179bca6   Kent Overstreet   aio: move private...
113

58c85dc20   Kent Overstreet   aio: kill struct ...
114
115
116
117
118
  	unsigned long		mmap_base;
  	unsigned long		mmap_size;
  
  	struct page		**ring_pages;
  	long			nr_pages;
f729863a8   Tejun Heo   fs/aio: Use rcu_w...
119
  	struct rcu_work		free_rwork;	/* see free_ioctx() */
4e23bcaeb   Kent Overstreet   aio: give shared ...
120

e02ba72aa   Anatol Pomozov   aio: block io_des...
121
122
123
  	/*
  	 * signals when all in-flight requests are done
  	 */
dc48e56d7   Jens Axboe   aio: fix serial d...
124
  	struct ctx_rq_wait	*rq_wait;
e02ba72aa   Anatol Pomozov   aio: block io_des...
125

4e23bcaeb   Kent Overstreet   aio: give shared ...
126
  	struct {
34e83fc61   Kent Overstreet   aio: reqs_active ...
127
128
129
130
131
  		/*
  		 * This counts the number of available slots in the ringbuffer,
  		 * so we avoid overflowing it: it's decremented (if positive)
  		 * when allocating a kiocb and incremented when the resulting
  		 * io_event is pulled off the ringbuffer.
e1bdd5f27   Kent Overstreet   aio: percpu reqs_...
132
133
  		 *
  		 * We batch accesses to it with a percpu version.
34e83fc61   Kent Overstreet   aio: reqs_active ...
134
135
  		 */
  		atomic_t	reqs_available;
4e23bcaeb   Kent Overstreet   aio: give shared ...
136
137
138
139
140
141
  	} ____cacheline_aligned_in_smp;
  
  	struct {
  		spinlock_t	ctx_lock;
  		struct list_head active_reqs;	/* used for cancellation */
  	} ____cacheline_aligned_in_smp;
58c85dc20   Kent Overstreet   aio: kill struct ...
142
143
  	struct {
  		struct mutex	ring_lock;
4e23bcaeb   Kent Overstreet   aio: give shared ...
144
145
  		wait_queue_head_t wait;
  	} ____cacheline_aligned_in_smp;
58c85dc20   Kent Overstreet   aio: kill struct ...
146
147
148
  
  	struct {
  		unsigned	tail;
d856f32a8   Benjamin LaHaise   aio: fix reqs_ava...
149
  		unsigned	completed_events;
58c85dc20   Kent Overstreet   aio: kill struct ...
150
  		spinlock_t	completion_lock;
4e23bcaeb   Kent Overstreet   aio: give shared ...
151
  	} ____cacheline_aligned_in_smp;
58c85dc20   Kent Overstreet   aio: kill struct ...
152
153
  
  	struct page		*internal_pages[AIO_RING_PAGES];
36bc08cc0   Gu Zheng   fs/aio: Add suppo...
154
  	struct file		*aio_ring_file;
db446a08c   Benjamin LaHaise   aio: convert the ...
155
156
  
  	unsigned		id;
4e179bca6   Kent Overstreet   aio: move private...
157
  };
84c4e1f89   Linus Torvalds   aio: simplify - a...
158
159
160
161
  /*
   * First field must be the file pointer in all the
   * iocb unions! See also 'struct kiocb' in <linux/fs.h>
   */
a3c0d439e   Christoph Hellwig   aio: implement IO...
162
  struct fsync_iocb {
a3c0d439e   Christoph Hellwig   aio: implement IO...
163
  	struct file		*file;
84c4e1f89   Linus Torvalds   aio: simplify - a...
164
  	struct work_struct	work;
a3c0d439e   Christoph Hellwig   aio: implement IO...
165
  	bool			datasync;
7328f9a4a   Miklos Szeredi   aio: fix async fs...
166
  	struct cred		*creds;
a3c0d439e   Christoph Hellwig   aio: implement IO...
167
  };
bfe4037e7   Christoph Hellwig   aio: implement IO...
168
169
170
171
  struct poll_iocb {
  	struct file		*file;
  	struct wait_queue_head	*head;
  	__poll_t		events;
af5c72b1f   Al Viro   Fix aio_poll() races
172
  	bool			done;
bfe4037e7   Christoph Hellwig   aio: implement IO...
173
174
175
176
  	bool			cancelled;
  	struct wait_queue_entry	wait;
  	struct work_struct	work;
  };
84c4e1f89   Linus Torvalds   aio: simplify - a...
177
178
179
180
181
182
  /*
   * NOTE! Each of the iocb union members has the file pointer
   * as the first entry in their struct definition. So you can
   * access the file pointer through any of the sub-structs,
   * or directly as just 'ki_filp' in this struct.
   */
04b2fa9f8   Christoph Hellwig   fs: split generic...
183
  struct aio_kiocb {
54843f875   Christoph Hellwig   aio: refactor rea...
184
  	union {
84c4e1f89   Linus Torvalds   aio: simplify - a...
185
  		struct file		*ki_filp;
54843f875   Christoph Hellwig   aio: refactor rea...
186
  		struct kiocb		rw;
a3c0d439e   Christoph Hellwig   aio: implement IO...
187
  		struct fsync_iocb	fsync;
bfe4037e7   Christoph Hellwig   aio: implement IO...
188
  		struct poll_iocb	poll;
54843f875   Christoph Hellwig   aio: refactor rea...
189
  	};
04b2fa9f8   Christoph Hellwig   fs: split generic...
190
191
192
  
  	struct kioctx		*ki_ctx;
  	kiocb_cancel_fn		*ki_cancel;
a9339b785   Al Viro   aio: keep io_even...
193
  	struct io_event		ki_res;
04b2fa9f8   Christoph Hellwig   fs: split generic...
194
195
196
  
  	struct list_head	ki_list;	/* the aio core uses this
  						 * for cancellation */
9018ccc45   Christoph Hellwig   aio: add a iocb r...
197
  	refcount_t		ki_refcnt;
04b2fa9f8   Christoph Hellwig   fs: split generic...
198
199
200
201
202
203
204
  
  	/*
  	 * If the aio_resfd field of the userspace iocb is not zero,
  	 * this is the underlying eventfd context to deliver events to.
  	 */
  	struct eventfd_ctx	*ki_eventfd;
  };
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
205
  /*------ sysctl variables----*/
d55b5fdaf   Zach Brown   [PATCH] aio: remo...
206
207
208
  static DEFINE_SPINLOCK(aio_nr_lock);
  unsigned long aio_nr;		/* current system wide number of aio requests */
  unsigned long aio_max_nr = 0x10000; /* system wide maximum number of aio requests */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
209
  /*----end sysctl variables---*/
e18b890bb   Christoph Lameter   [PATCH] slab: rem...
210
211
  static struct kmem_cache	*kiocb_cachep;
  static struct kmem_cache	*kioctx_cachep;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
212

71ad7490c   Benjamin LaHaise   rework aio migrat...
213
214
215
216
217
218
219
  static struct vfsmount *aio_mnt;
  
  static const struct file_operations aio_ring_fops;
  static const struct address_space_operations aio_ctx_aops;
  
  static struct file *aio_private_file(struct kioctx *ctx, loff_t nr_pages)
  {
71ad7490c   Benjamin LaHaise   rework aio migrat...
220
  	struct file *file;
71ad7490c   Benjamin LaHaise   rework aio migrat...
221
  	struct inode *inode = alloc_anon_inode(aio_mnt->mnt_sb);
7f62656be   Dan Carpenter   aio: checking for...
222
223
  	if (IS_ERR(inode))
  		return ERR_CAST(inode);
71ad7490c   Benjamin LaHaise   rework aio migrat...
224
225
226
227
  
  	inode->i_mapping->a_ops = &aio_ctx_aops;
  	inode->i_mapping->private_data = ctx;
  	inode->i_size = PAGE_SIZE * nr_pages;
d93aa9d82   Al Viro   new wrapper: allo...
228
229
  	file = alloc_file_pseudo(inode, aio_mnt, "[aio]",
  				O_RDWR, &aio_ring_fops);
c9c554f21   Al Viro   alloc_file(): swi...
230
  	if (IS_ERR(file))
71ad7490c   Benjamin LaHaise   rework aio migrat...
231
  		iput(inode);
71ad7490c   Benjamin LaHaise   rework aio migrat...
232
233
  	return file;
  }
52db59df1   David Howells   vfs: Convert aio ...
234
  static int aio_init_fs_context(struct fs_context *fc)
71ad7490c   Benjamin LaHaise   rework aio migrat...
235
  {
52db59df1   David Howells   vfs: Convert aio ...
236
237
238
239
  	if (!init_pseudo(fc, AIO_RING_MAGIC))
  		return -ENOMEM;
  	fc->s_iflags |= SB_I_NOEXEC;
  	return 0;
71ad7490c   Benjamin LaHaise   rework aio migrat...
240
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
241
242
243
244
245
246
  /* aio_setup
   *	Creates the slab caches used by the aio routines, panic on
   *	failure as this is done early during the boot sequence.
   */
  static int __init aio_setup(void)
  {
71ad7490c   Benjamin LaHaise   rework aio migrat...
247
248
  	static struct file_system_type aio_fs = {
  		.name		= "aio",
52db59df1   David Howells   vfs: Convert aio ...
249
  		.init_fs_context = aio_init_fs_context,
71ad7490c   Benjamin LaHaise   rework aio migrat...
250
251
252
253
254
  		.kill_sb	= kill_anon_super,
  	};
  	aio_mnt = kern_mount(&aio_fs);
  	if (IS_ERR(aio_mnt))
  		panic("Failed to create aio fs mount.");
04b2fa9f8   Christoph Hellwig   fs: split generic...
255
  	kiocb_cachep = KMEM_CACHE(aio_kiocb, SLAB_HWCACHE_ALIGN|SLAB_PANIC);
0a31bd5f2   Christoph Lameter   KMEM_CACHE(): sim...
256
  	kioctx_cachep = KMEM_CACHE(kioctx,SLAB_HWCACHE_ALIGN|SLAB_PANIC);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
257
258
  	return 0;
  }
385773e04   H Hartley Sweeten   aio.c: move EXPOR...
259
  __initcall(aio_setup);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
260

5e9ae2e5d   Benjamin LaHaise   aio: fix use-afte...
261
262
263
  static void put_aio_ring_file(struct kioctx *ctx)
  {
  	struct file *aio_ring_file = ctx->aio_ring_file;
de04e7693   Rasmus Villemoes   fs/aio.c: elimina...
264
  	struct address_space *i_mapping;
5e9ae2e5d   Benjamin LaHaise   aio: fix use-afte...
265
  	if (aio_ring_file) {
450630975   Al Viro   don't open-code f...
266
  		truncate_setsize(file_inode(aio_ring_file), 0);
5e9ae2e5d   Benjamin LaHaise   aio: fix use-afte...
267
268
  
  		/* Prevent further access to the kioctx from migratepages */
450630975   Al Viro   don't open-code f...
269
  		i_mapping = aio_ring_file->f_mapping;
de04e7693   Rasmus Villemoes   fs/aio.c: elimina...
270
271
  		spin_lock(&i_mapping->private_lock);
  		i_mapping->private_data = NULL;
5e9ae2e5d   Benjamin LaHaise   aio: fix use-afte...
272
  		ctx->aio_ring_file = NULL;
de04e7693   Rasmus Villemoes   fs/aio.c: elimina...
273
  		spin_unlock(&i_mapping->private_lock);
5e9ae2e5d   Benjamin LaHaise   aio: fix use-afte...
274
275
276
277
  
  		fput(aio_ring_file);
  	}
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
278
279
  static void aio_free_ring(struct kioctx *ctx)
  {
36bc08cc0   Gu Zheng   fs/aio: Add suppo...
280
  	int i;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
281

fa8a53c39   Benjamin LaHaise   aio: v4 ensure ac...
282
283
284
285
  	/* Disconnect the kiotx from the ring file.  This prevents future
  	 * accesses to the kioctx from page migration.
  	 */
  	put_aio_ring_file(ctx);
36bc08cc0   Gu Zheng   fs/aio: Add suppo...
286
  	for (i = 0; i < ctx->nr_pages; i++) {
8e321fefb   Benjamin LaHaise   aio/migratepages:...
287
  		struct page *page;
36bc08cc0   Gu Zheng   fs/aio: Add suppo...
288
289
290
  		pr_debug("pid(%d) [%d] page->count=%d
  ", current->pid, i,
  				page_count(ctx->ring_pages[i]));
8e321fefb   Benjamin LaHaise   aio/migratepages:...
291
292
293
294
295
  		page = ctx->ring_pages[i];
  		if (!page)
  			continue;
  		ctx->ring_pages[i] = NULL;
  		put_page(page);
36bc08cc0   Gu Zheng   fs/aio: Add suppo...
296
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
297

ddb8c45ba   Sasha Levin   aio: nullify aio-...
298
  	if (ctx->ring_pages && ctx->ring_pages != ctx->internal_pages) {
58c85dc20   Kent Overstreet   aio: kill struct ...
299
  		kfree(ctx->ring_pages);
ddb8c45ba   Sasha Levin   aio: nullify aio-...
300
301
  		ctx->ring_pages = NULL;
  	}
36bc08cc0   Gu Zheng   fs/aio: Add suppo...
302
  }
5477e70a6   Oleg Nesterov   mm: move ->mremap...
303
  static int aio_ring_mremap(struct vm_area_struct *vma)
e4a0d3e72   Pavel Emelyanov   aio: Make it poss...
304
  {
5477e70a6   Oleg Nesterov   mm: move ->mremap...
305
  	struct file *file = vma->vm_file;
e4a0d3e72   Pavel Emelyanov   aio: Make it poss...
306
307
  	struct mm_struct *mm = vma->vm_mm;
  	struct kioctx_table *table;
b2edffdd9   Al Viro   fix mremap() vs. ...
308
  	int i, res = -EINVAL;
e4a0d3e72   Pavel Emelyanov   aio: Make it poss...
309
310
311
312
313
314
  
  	spin_lock(&mm->ioctx_lock);
  	rcu_read_lock();
  	table = rcu_dereference(mm->ioctx_table);
  	for (i = 0; i < table->nr; i++) {
  		struct kioctx *ctx;
d0264c01e   Tejun Heo   fs/aio: Use RCU a...
315
  		ctx = rcu_dereference(table->table[i]);
e4a0d3e72   Pavel Emelyanov   aio: Make it poss...
316
  		if (ctx && ctx->aio_ring_file == file) {
b2edffdd9   Al Viro   fix mremap() vs. ...
317
318
319
320
  			if (!atomic_read(&ctx->dead)) {
  				ctx->user_id = ctx->mmap_base = vma->vm_start;
  				res = 0;
  			}
e4a0d3e72   Pavel Emelyanov   aio: Make it poss...
321
322
323
324
325
326
  			break;
  		}
  	}
  
  	rcu_read_unlock();
  	spin_unlock(&mm->ioctx_lock);
b2edffdd9   Al Viro   fix mremap() vs. ...
327
  	return res;
e4a0d3e72   Pavel Emelyanov   aio: Make it poss...
328
  }
5477e70a6   Oleg Nesterov   mm: move ->mremap...
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
  static const struct vm_operations_struct aio_ring_vm_ops = {
  	.mremap		= aio_ring_mremap,
  #if IS_ENABLED(CONFIG_MMU)
  	.fault		= filemap_fault,
  	.map_pages	= filemap_map_pages,
  	.page_mkwrite	= filemap_page_mkwrite,
  #endif
  };
  
  static int aio_ring_mmap(struct file *file, struct vm_area_struct *vma)
  {
  	vma->vm_flags |= VM_DONTEXPAND;
  	vma->vm_ops = &aio_ring_vm_ops;
  	return 0;
  }
36bc08cc0   Gu Zheng   fs/aio: Add suppo...
344
345
346
  static const struct file_operations aio_ring_fops = {
  	.mmap = aio_ring_mmap,
  };
0c45355fc   Benjamin LaHaise   aio: fix build wh...
347
  #if IS_ENABLED(CONFIG_MIGRATION)
36bc08cc0   Gu Zheng   fs/aio: Add suppo...
348
349
350
  static int aio_migratepage(struct address_space *mapping, struct page *new,
  			struct page *old, enum migrate_mode mode)
  {
5e9ae2e5d   Benjamin LaHaise   aio: fix use-afte...
351
  	struct kioctx *ctx;
36bc08cc0   Gu Zheng   fs/aio: Add suppo...
352
  	unsigned long flags;
fa8a53c39   Benjamin LaHaise   aio: v4 ensure ac...
353
  	pgoff_t idx;
36bc08cc0   Gu Zheng   fs/aio: Add suppo...
354
  	int rc;
2916ecc0f   Jérôme Glisse   mm/migrate: new m...
355
356
357
358
359
360
361
  	/*
  	 * We cannot support the _NO_COPY case here, because copy needs to
  	 * happen under the ctx->completion_lock. That does not work with the
  	 * migration workflow of MIGRATE_SYNC_NO_COPY.
  	 */
  	if (mode == MIGRATE_SYNC_NO_COPY)
  		return -EINVAL;
8e321fefb   Benjamin LaHaise   aio/migratepages:...
362
  	rc = 0;
fa8a53c39   Benjamin LaHaise   aio: v4 ensure ac...
363
  	/* mapping->private_lock here protects against the kioctx teardown.  */
8e321fefb   Benjamin LaHaise   aio/migratepages:...
364
365
  	spin_lock(&mapping->private_lock);
  	ctx = mapping->private_data;
fa8a53c39   Benjamin LaHaise   aio: v4 ensure ac...
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
  	if (!ctx) {
  		rc = -EINVAL;
  		goto out;
  	}
  
  	/* The ring_lock mutex.  The prevents aio_read_events() from writing
  	 * to the ring's head, and prevents page migration from mucking in
  	 * a partially initialized kiotx.
  	 */
  	if (!mutex_trylock(&ctx->ring_lock)) {
  		rc = -EAGAIN;
  		goto out;
  	}
  
  	idx = old->index;
  	if (idx < (pgoff_t)ctx->nr_pages) {
  		/* Make sure the old page hasn't already been changed */
  		if (ctx->ring_pages[idx] != old)
  			rc = -EAGAIN;
8e321fefb   Benjamin LaHaise   aio/migratepages:...
385
386
  	} else
  		rc = -EINVAL;
8e321fefb   Benjamin LaHaise   aio/migratepages:...
387
388
  
  	if (rc != 0)
fa8a53c39   Benjamin LaHaise   aio: v4 ensure ac...
389
  		goto out_unlock;
8e321fefb   Benjamin LaHaise   aio/migratepages:...
390

36bc08cc0   Gu Zheng   fs/aio: Add suppo...
391
392
  	/* Writeback must be complete */
  	BUG_ON(PageWriteback(old));
8e321fefb   Benjamin LaHaise   aio/migratepages:...
393
  	get_page(new);
36bc08cc0   Gu Zheng   fs/aio: Add suppo...
394

371096949   Keith Busch   mm: migrate: remo...
395
  	rc = migrate_page_move_mapping(mapping, new, old, 1);
36bc08cc0   Gu Zheng   fs/aio: Add suppo...
396
  	if (rc != MIGRATEPAGE_SUCCESS) {
8e321fefb   Benjamin LaHaise   aio/migratepages:...
397
  		put_page(new);
fa8a53c39   Benjamin LaHaise   aio: v4 ensure ac...
398
  		goto out_unlock;
36bc08cc0   Gu Zheng   fs/aio: Add suppo...
399
  	}
fa8a53c39   Benjamin LaHaise   aio: v4 ensure ac...
400
401
402
  	/* Take completion_lock to prevent other writes to the ring buffer
  	 * while the old page is copied to the new.  This prevents new
  	 * events from being lost.
5e9ae2e5d   Benjamin LaHaise   aio: fix use-afte...
403
  	 */
fa8a53c39   Benjamin LaHaise   aio: v4 ensure ac...
404
405
406
407
408
  	spin_lock_irqsave(&ctx->completion_lock, flags);
  	migrate_page_copy(new, old);
  	BUG_ON(ctx->ring_pages[idx] != old);
  	ctx->ring_pages[idx] = new;
  	spin_unlock_irqrestore(&ctx->completion_lock, flags);
36bc08cc0   Gu Zheng   fs/aio: Add suppo...
409

fa8a53c39   Benjamin LaHaise   aio: v4 ensure ac...
410
411
  	/* The old page is no longer accessible. */
  	put_page(old);
8e321fefb   Benjamin LaHaise   aio/migratepages:...
412

fa8a53c39   Benjamin LaHaise   aio: v4 ensure ac...
413
414
415
416
  out_unlock:
  	mutex_unlock(&ctx->ring_lock);
  out:
  	spin_unlock(&mapping->private_lock);
36bc08cc0   Gu Zheng   fs/aio: Add suppo...
417
  	return rc;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
418
  }
0c45355fc   Benjamin LaHaise   aio: fix build wh...
419
  #endif
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
420

36bc08cc0   Gu Zheng   fs/aio: Add suppo...
421
  static const struct address_space_operations aio_ctx_aops = {
835f252c6   Gu Zheng   aio: fix uncorren...
422
  	.set_page_dirty = __set_page_dirty_no_writeback,
0c45355fc   Benjamin LaHaise   aio: fix build wh...
423
  #if IS_ENABLED(CONFIG_MIGRATION)
36bc08cc0   Gu Zheng   fs/aio: Add suppo...
424
  	.migratepage	= aio_migratepage,
0c45355fc   Benjamin LaHaise   aio: fix build wh...
425
  #endif
36bc08cc0   Gu Zheng   fs/aio: Add suppo...
426
  };
2a8a98673   Mauricio Faria de Oliveira   fs: aio: fix the ...
427
  static int aio_setup_ring(struct kioctx *ctx, unsigned int nr_events)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
428
429
  {
  	struct aio_ring *ring;
41003a7bc   Zach Brown   aio: remove retry...
430
  	struct mm_struct *mm = current->mm;
3dc9acb67   Linus Torvalds   aio: clean up and...
431
  	unsigned long size, unused;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
432
  	int nr_pages;
36bc08cc0   Gu Zheng   fs/aio: Add suppo...
433
434
  	int i;
  	struct file *file;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
435
436
437
438
439
440
  
  	/* Compensate for the ring buffer's head/tail overlap entry */
  	nr_events += 2;	/* 1 is required, 2 for good luck */
  
  	size = sizeof(struct aio_ring);
  	size += sizeof(struct io_event) * nr_events;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
441

36bc08cc0   Gu Zheng   fs/aio: Add suppo...
442
  	nr_pages = PFN_UP(size);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
443
444
  	if (nr_pages < 0)
  		return -EINVAL;
71ad7490c   Benjamin LaHaise   rework aio migrat...
445
  	file = aio_private_file(ctx, nr_pages);
36bc08cc0   Gu Zheng   fs/aio: Add suppo...
446
447
  	if (IS_ERR(file)) {
  		ctx->aio_ring_file = NULL;
fa8a53c39   Benjamin LaHaise   aio: v4 ensure ac...
448
  		return -ENOMEM;
36bc08cc0   Gu Zheng   fs/aio: Add suppo...
449
  	}
3dc9acb67   Linus Torvalds   aio: clean up and...
450
451
452
453
454
455
456
457
458
459
460
461
462
  	ctx->aio_ring_file = file;
  	nr_events = (PAGE_SIZE * nr_pages - sizeof(struct aio_ring))
  			/ sizeof(struct io_event);
  
  	ctx->ring_pages = ctx->internal_pages;
  	if (nr_pages > AIO_RING_PAGES) {
  		ctx->ring_pages = kcalloc(nr_pages, sizeof(struct page *),
  					  GFP_KERNEL);
  		if (!ctx->ring_pages) {
  			put_aio_ring_file(ctx);
  			return -ENOMEM;
  		}
  	}
36bc08cc0   Gu Zheng   fs/aio: Add suppo...
463
464
  	for (i = 0; i < nr_pages; i++) {
  		struct page *page;
450630975   Al Viro   don't open-code f...
465
  		page = find_or_create_page(file->f_mapping,
36bc08cc0   Gu Zheng   fs/aio: Add suppo...
466
467
468
469
470
471
472
  					   i, GFP_HIGHUSER | __GFP_ZERO);
  		if (!page)
  			break;
  		pr_debug("pid(%d) page[%d]->count=%d
  ",
  			 current->pid, i, page_count(page));
  		SetPageUptodate(page);
36bc08cc0   Gu Zheng   fs/aio: Add suppo...
473
  		unlock_page(page);
3dc9acb67   Linus Torvalds   aio: clean up and...
474
475
  
  		ctx->ring_pages[i] = page;
36bc08cc0   Gu Zheng   fs/aio: Add suppo...
476
  	}
3dc9acb67   Linus Torvalds   aio: clean up and...
477
  	ctx->nr_pages = i;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
478

3dc9acb67   Linus Torvalds   aio: clean up and...
479
480
  	if (unlikely(i != nr_pages)) {
  		aio_free_ring(ctx);
fa8a53c39   Benjamin LaHaise   aio: v4 ensure ac...
481
  		return -ENOMEM;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
482
  	}
58c85dc20   Kent Overstreet   aio: kill struct ...
483
484
485
  	ctx->mmap_size = nr_pages * PAGE_SIZE;
  	pr_debug("attempting mmap of %lu bytes
  ", ctx->mmap_size);
36bc08cc0   Gu Zheng   fs/aio: Add suppo...
486

013373e8b   Michal Hocko   aio: make aio_set...
487
488
489
490
491
  	if (down_write_killable(&mm->mmap_sem)) {
  		ctx->mmap_size = 0;
  		aio_free_ring(ctx);
  		return -EINTR;
  	}
36bc08cc0   Gu Zheng   fs/aio: Add suppo...
492
493
  	ctx->mmap_base = do_mmap_pgoff(ctx->aio_ring_file, 0, ctx->mmap_size,
  				       PROT_READ | PROT_WRITE,
897ab3e0c   Mike Rapoport   userfaultfd: non-...
494
  				       MAP_SHARED, 0, &unused, NULL);
3dc9acb67   Linus Torvalds   aio: clean up and...
495
  	up_write(&mm->mmap_sem);
58c85dc20   Kent Overstreet   aio: kill struct ...
496
  	if (IS_ERR((void *)ctx->mmap_base)) {
58c85dc20   Kent Overstreet   aio: kill struct ...
497
  		ctx->mmap_size = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
498
  		aio_free_ring(ctx);
fa8a53c39   Benjamin LaHaise   aio: v4 ensure ac...
499
  		return -ENOMEM;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
500
  	}
58c85dc20   Kent Overstreet   aio: kill struct ...
501
502
  	pr_debug("mmap address: 0x%08lx
  ", ctx->mmap_base);
d6c355c7d   Benjamin LaHaise   aio: fix race in ...
503

58c85dc20   Kent Overstreet   aio: kill struct ...
504
505
  	ctx->user_id = ctx->mmap_base;
  	ctx->nr_events = nr_events; /* trusted copy */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
506

58c85dc20   Kent Overstreet   aio: kill struct ...
507
  	ring = kmap_atomic(ctx->ring_pages[0]);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
508
  	ring->nr = nr_events;	/* user copy */
db446a08c   Benjamin LaHaise   aio: convert the ...
509
  	ring->id = ~0U;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
510
511
512
513
514
  	ring->head = ring->tail = 0;
  	ring->magic = AIO_RING_MAGIC;
  	ring->compat_features = AIO_RING_COMPAT_FEATURES;
  	ring->incompat_features = AIO_RING_INCOMPAT_FEATURES;
  	ring->header_length = sizeof(struct aio_ring);
e8e3c3d66   Cong Wang   fs: remove the se...
515
  	kunmap_atomic(ring);
58c85dc20   Kent Overstreet   aio: kill struct ...
516
  	flush_dcache_page(ctx->ring_pages[0]);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
517
518
519
  
  	return 0;
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
520
521
522
  #define AIO_EVENTS_PER_PAGE	(PAGE_SIZE / sizeof(struct io_event))
  #define AIO_EVENTS_FIRST_PAGE	((PAGE_SIZE - sizeof(struct aio_ring)) / sizeof(struct io_event))
  #define AIO_EVENTS_OFFSET	(AIO_EVENTS_PER_PAGE - AIO_EVENTS_FIRST_PAGE)
04b2fa9f8   Christoph Hellwig   fs: split generic...
523
  void kiocb_set_cancel_fn(struct kiocb *iocb, kiocb_cancel_fn *cancel)
0460fef2a   Kent Overstreet   aio: use cancella...
524
  {
54843f875   Christoph Hellwig   aio: refactor rea...
525
  	struct aio_kiocb *req = container_of(iocb, struct aio_kiocb, rw);
0460fef2a   Kent Overstreet   aio: use cancella...
526
527
  	struct kioctx *ctx = req->ki_ctx;
  	unsigned long flags;
75321b50a   Christoph Hellwig   aio: sanitize ki_...
528
529
  	if (WARN_ON_ONCE(!list_empty(&req->ki_list)))
  		return;
0460fef2a   Kent Overstreet   aio: use cancella...
530

75321b50a   Christoph Hellwig   aio: sanitize ki_...
531
532
  	spin_lock_irqsave(&ctx->ctx_lock, flags);
  	list_add_tail(&req->ki_list, &ctx->active_reqs);
0460fef2a   Kent Overstreet   aio: use cancella...
533
  	req->ki_cancel = cancel;
0460fef2a   Kent Overstreet   aio: use cancella...
534
535
536
  	spin_unlock_irqrestore(&ctx->ctx_lock, flags);
  }
  EXPORT_SYMBOL(kiocb_set_cancel_fn);
a6d7cff47   Tejun Heo   fs/aio: Add expli...
537
538
539
  /*
   * free_ioctx() should be RCU delayed to synchronize against the RCU
   * protected lookup_ioctx() and also needs process context to call
f729863a8   Tejun Heo   fs/aio: Use rcu_w...
540
   * aio_free_ring().  Use rcu_work.
a6d7cff47   Tejun Heo   fs/aio: Add expli...
541
   */
e34ecee2a   Kent Overstreet   aio: Fix a trinit...
542
  static void free_ioctx(struct work_struct *work)
36f558890   Kent Overstreet   aio: refcounting ...
543
  {
f729863a8   Tejun Heo   fs/aio: Use rcu_w...
544
545
  	struct kioctx *ctx = container_of(to_rcu_work(work), struct kioctx,
  					  free_rwork);
e34ecee2a   Kent Overstreet   aio: Fix a trinit...
546
547
  	pr_debug("freeing %p
  ", ctx);
e1bdd5f27   Kent Overstreet   aio: percpu reqs_...
548

e34ecee2a   Kent Overstreet   aio: Fix a trinit...
549
  	aio_free_ring(ctx);
e1bdd5f27   Kent Overstreet   aio: percpu reqs_...
550
  	free_percpu(ctx->cpu);
9a1049da9   Tejun Heo   percpu-refcount: ...
551
552
  	percpu_ref_exit(&ctx->reqs);
  	percpu_ref_exit(&ctx->users);
36f558890   Kent Overstreet   aio: refcounting ...
553
554
  	kmem_cache_free(kioctx_cachep, ctx);
  }
e34ecee2a   Kent Overstreet   aio: Fix a trinit...
555
556
557
  static void free_ioctx_reqs(struct percpu_ref *ref)
  {
  	struct kioctx *ctx = container_of(ref, struct kioctx, reqs);
e02ba72aa   Anatol Pomozov   aio: block io_des...
558
  	/* At this point we know that there are no any in-flight requests */
dc48e56d7   Jens Axboe   aio: fix serial d...
559
560
  	if (ctx->rq_wait && atomic_dec_and_test(&ctx->rq_wait->count))
  		complete(&ctx->rq_wait->comp);
e02ba72aa   Anatol Pomozov   aio: block io_des...
561

a6d7cff47   Tejun Heo   fs/aio: Add expli...
562
  	/* Synchronize against RCU protected table->table[] dereferences */
f729863a8   Tejun Heo   fs/aio: Use rcu_w...
563
564
  	INIT_RCU_WORK(&ctx->free_rwork, free_ioctx);
  	queue_rcu_work(system_wq, &ctx->free_rwork);
e34ecee2a   Kent Overstreet   aio: Fix a trinit...
565
  }
36f558890   Kent Overstreet   aio: refcounting ...
566
567
568
569
570
  /*
   * When this function runs, the kioctx has been removed from the "hash table"
   * and ctx->users has dropped to 0, so we know no more kiocbs can be submitted -
   * now it's safe to cancel any that need to be.
   */
e34ecee2a   Kent Overstreet   aio: Fix a trinit...
571
  static void free_ioctx_users(struct percpu_ref *ref)
36f558890   Kent Overstreet   aio: refcounting ...
572
  {
e34ecee2a   Kent Overstreet   aio: Fix a trinit...
573
  	struct kioctx *ctx = container_of(ref, struct kioctx, users);
04b2fa9f8   Christoph Hellwig   fs: split generic...
574
  	struct aio_kiocb *req;
36f558890   Kent Overstreet   aio: refcounting ...
575
576
577
578
579
  
  	spin_lock_irq(&ctx->ctx_lock);
  
  	while (!list_empty(&ctx->active_reqs)) {
  		req = list_first_entry(&ctx->active_reqs,
04b2fa9f8   Christoph Hellwig   fs: split generic...
580
  				       struct aio_kiocb, ki_list);
888933f8f   Christoph Hellwig   aio: simplify can...
581
  		req->ki_cancel(&req->rw);
4faa99965   Al Viro   fix io_destroy()/...
582
  		list_del_init(&req->ki_list);
36f558890   Kent Overstreet   aio: refcounting ...
583
584
585
  	}
  
  	spin_unlock_irq(&ctx->ctx_lock);
e34ecee2a   Kent Overstreet   aio: Fix a trinit...
586
587
  	percpu_ref_kill(&ctx->reqs);
  	percpu_ref_put(&ctx->reqs);
36f558890   Kent Overstreet   aio: refcounting ...
588
  }
db446a08c   Benjamin LaHaise   aio: convert the ...
589
590
591
592
593
594
595
  static int ioctx_add_table(struct kioctx *ctx, struct mm_struct *mm)
  {
  	unsigned i, new_nr;
  	struct kioctx_table *table, *old;
  	struct aio_ring *ring;
  
  	spin_lock(&mm->ioctx_lock);
855ef0dec   Oleg Nesterov   aio: kill the mis...
596
  	table = rcu_dereference_raw(mm->ioctx_table);
db446a08c   Benjamin LaHaise   aio: convert the ...
597
598
599
600
  
  	while (1) {
  		if (table)
  			for (i = 0; i < table->nr; i++)
d0264c01e   Tejun Heo   fs/aio: Use RCU a...
601
  				if (!rcu_access_pointer(table->table[i])) {
db446a08c   Benjamin LaHaise   aio: convert the ...
602
  					ctx->id = i;
d0264c01e   Tejun Heo   fs/aio: Use RCU a...
603
  					rcu_assign_pointer(table->table[i], ctx);
db446a08c   Benjamin LaHaise   aio: convert the ...
604
  					spin_unlock(&mm->ioctx_lock);
fa8a53c39   Benjamin LaHaise   aio: v4 ensure ac...
605
606
607
608
  					/* While kioctx setup is in progress,
  					 * we are protected from page migration
  					 * changes ring_pages by ->ring_lock.
  					 */
db446a08c   Benjamin LaHaise   aio: convert the ...
609
610
611
612
613
614
615
  					ring = kmap_atomic(ctx->ring_pages[0]);
  					ring->id = ctx->id;
  					kunmap_atomic(ring);
  					return 0;
  				}
  
  		new_nr = (table ? table->nr : 1) * 4;
db446a08c   Benjamin LaHaise   aio: convert the ...
616
617
618
619
620
621
622
623
624
625
  		spin_unlock(&mm->ioctx_lock);
  
  		table = kzalloc(sizeof(*table) + sizeof(struct kioctx *) *
  				new_nr, GFP_KERNEL);
  		if (!table)
  			return -ENOMEM;
  
  		table->nr = new_nr;
  
  		spin_lock(&mm->ioctx_lock);
855ef0dec   Oleg Nesterov   aio: kill the mis...
626
  		old = rcu_dereference_raw(mm->ioctx_table);
db446a08c   Benjamin LaHaise   aio: convert the ...
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
  
  		if (!old) {
  			rcu_assign_pointer(mm->ioctx_table, table);
  		} else if (table->nr > old->nr) {
  			memcpy(table->table, old->table,
  			       old->nr * sizeof(struct kioctx *));
  
  			rcu_assign_pointer(mm->ioctx_table, table);
  			kfree_rcu(old, rcu);
  		} else {
  			kfree(table);
  			table = old;
  		}
  	}
  }
e34ecee2a   Kent Overstreet   aio: Fix a trinit...
642
643
644
645
646
647
648
649
650
  static void aio_nr_sub(unsigned nr)
  {
  	spin_lock(&aio_nr_lock);
  	if (WARN_ON(aio_nr - nr > aio_nr))
  		aio_nr = 0;
  	else
  		aio_nr -= nr;
  	spin_unlock(&aio_nr_lock);
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
651
652
653
654
655
  /* ioctx_alloc
   *	Allocates and initializes an ioctx.  Returns an ERR_PTR if it failed.
   */
  static struct kioctx *ioctx_alloc(unsigned nr_events)
  {
41003a7bc   Zach Brown   aio: remove retry...
656
  	struct mm_struct *mm = current->mm;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
657
  	struct kioctx *ctx;
e23754f88   Al Viro   aio: don't bother...
658
  	int err = -ENOMEM;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
659

e1bdd5f27   Kent Overstreet   aio: percpu reqs_...
660
  	/*
2a8a98673   Mauricio Faria de Oliveira   fs: aio: fix the ...
661
662
663
664
665
666
  	 * Store the original nr_events -- what userspace passed to io_setup(),
  	 * for counting against the global limit -- before it changes.
  	 */
  	unsigned int max_reqs = nr_events;
  
  	/*
e1bdd5f27   Kent Overstreet   aio: percpu reqs_...
667
668
669
670
671
672
673
674
675
676
  	 * We keep track of the number of available ringbuffer slots, to prevent
  	 * overflow (reqs_available), and we also use percpu counters for this.
  	 *
  	 * So since up to half the slots might be on other cpu's percpu counters
  	 * and unavailable, double nr_events so userspace sees what they
  	 * expected: additionally, we move req_batch slots to/from percpu
  	 * counters at a time, so make sure that isn't 0:
  	 */
  	nr_events = max(nr_events, num_possible_cpus() * 4);
  	nr_events *= 2;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
677
  	/* Prevent overflows */
08397acdd   Al Viro   ioctx_alloc(): re...
678
  	if (nr_events > (0x10000000U / sizeof(struct io_event))) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
679
680
681
682
  		pr_debug("ENOMEM: nr_events too high
  ");
  		return ERR_PTR(-EINVAL);
  	}
2a8a98673   Mauricio Faria de Oliveira   fs: aio: fix the ...
683
  	if (!nr_events || (unsigned long)max_reqs > aio_max_nr)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
684
  		return ERR_PTR(-EAGAIN);
c37622296   Robert P. J. Day   [PATCH] Transform...
685
  	ctx = kmem_cache_zalloc(kioctx_cachep, GFP_KERNEL);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
686
687
  	if (!ctx)
  		return ERR_PTR(-ENOMEM);
2a8a98673   Mauricio Faria de Oliveira   fs: aio: fix the ...
688
  	ctx->max_reqs = max_reqs;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
689

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
690
  	spin_lock_init(&ctx->ctx_lock);
0460fef2a   Kent Overstreet   aio: use cancella...
691
  	spin_lock_init(&ctx->completion_lock);
58c85dc20   Kent Overstreet   aio: kill struct ...
692
  	mutex_init(&ctx->ring_lock);
fa8a53c39   Benjamin LaHaise   aio: v4 ensure ac...
693
694
695
  	/* Protect against page migration throughout kiotx setup by keeping
  	 * the ring_lock mutex held until setup is complete. */
  	mutex_lock(&ctx->ring_lock);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
696
697
698
  	init_waitqueue_head(&ctx->wait);
  
  	INIT_LIST_HEAD(&ctx->active_reqs);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
699

2aad2a86f   Tejun Heo   percpu_ref: add P...
700
  	if (percpu_ref_init(&ctx->users, free_ioctx_users, 0, GFP_KERNEL))
fa8a53c39   Benjamin LaHaise   aio: v4 ensure ac...
701
  		goto err;
2aad2a86f   Tejun Heo   percpu_ref: add P...
702
  	if (percpu_ref_init(&ctx->reqs, free_ioctx_reqs, 0, GFP_KERNEL))
fa8a53c39   Benjamin LaHaise   aio: v4 ensure ac...
703
  		goto err;
e1bdd5f27   Kent Overstreet   aio: percpu reqs_...
704
705
  	ctx->cpu = alloc_percpu(struct kioctx_cpu);
  	if (!ctx->cpu)
e34ecee2a   Kent Overstreet   aio: Fix a trinit...
706
  		goto err;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
707

2a8a98673   Mauricio Faria de Oliveira   fs: aio: fix the ...
708
  	err = aio_setup_ring(ctx, nr_events);
fa8a53c39   Benjamin LaHaise   aio: v4 ensure ac...
709
  	if (err < 0)
e34ecee2a   Kent Overstreet   aio: Fix a trinit...
710
  		goto err;
e1bdd5f27   Kent Overstreet   aio: percpu reqs_...
711

34e83fc61   Kent Overstreet   aio: reqs_active ...
712
  	atomic_set(&ctx->reqs_available, ctx->nr_events - 1);
e1bdd5f27   Kent Overstreet   aio: percpu reqs_...
713
  	ctx->req_batch = (ctx->nr_events - 1) / (num_possible_cpus() * 4);
6878ea72a   Benjamin LaHaise   aio: be defensive...
714
715
  	if (ctx->req_batch < 1)
  		ctx->req_batch = 1;
34e83fc61   Kent Overstreet   aio: reqs_active ...
716

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
717
  	/* limit the number of system wide aios */
9fa1cb397   Al Viro   aio: aio_nr_lock ...
718
  	spin_lock(&aio_nr_lock);
2a8a98673   Mauricio Faria de Oliveira   fs: aio: fix the ...
719
720
  	if (aio_nr + ctx->max_reqs > aio_max_nr ||
  	    aio_nr + ctx->max_reqs < aio_nr) {
9fa1cb397   Al Viro   aio: aio_nr_lock ...
721
  		spin_unlock(&aio_nr_lock);
e34ecee2a   Kent Overstreet   aio: Fix a trinit...
722
  		err = -EAGAIN;
d1b943271   Gu Zheng   aio: clean up aio...
723
  		goto err_ctx;
2dd542b7a   Al Viro   aio: aio_nr decre...
724
725
  	}
  	aio_nr += ctx->max_reqs;
9fa1cb397   Al Viro   aio: aio_nr_lock ...
726
  	spin_unlock(&aio_nr_lock);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
727

1881686f8   Benjamin LaHaise   aio: fix kioctx l...
728
729
  	percpu_ref_get(&ctx->users);	/* io_setup() will drop this ref */
  	percpu_ref_get(&ctx->reqs);	/* free_ioctx_users() will drop this */
723be6e39   Kent Overstreet   aio: percpu ioctx...
730

da90382c2   Benjamin LaHaise   aio: fix error ha...
731
732
  	err = ioctx_add_table(ctx, mm);
  	if (err)
e34ecee2a   Kent Overstreet   aio: Fix a trinit...
733
  		goto err_cleanup;
da90382c2   Benjamin LaHaise   aio: fix error ha...
734

fa8a53c39   Benjamin LaHaise   aio: v4 ensure ac...
735
736
  	/* Release the ring_lock mutex now that all setup is complete. */
  	mutex_unlock(&ctx->ring_lock);
caf4167aa   Kent Overstreet   aio: dprintk() ->...
737
738
  	pr_debug("allocated ioctx %p[%ld]: mm=%p mask=0x%x
  ",
58c85dc20   Kent Overstreet   aio: kill struct ...
739
  		 ctx, ctx->user_id, mm, ctx->nr_events);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
740
  	return ctx;
e34ecee2a   Kent Overstreet   aio: Fix a trinit...
741
742
  err_cleanup:
  	aio_nr_sub(ctx->max_reqs);
d1b943271   Gu Zheng   aio: clean up aio...
743
  err_ctx:
deeb8525f   Al Viro   ioctx_alloc(): fi...
744
745
746
  	atomic_set(&ctx->dead, 1);
  	if (ctx->mmap_size)
  		vm_munmap(ctx->mmap_base, ctx->mmap_size);
d1b943271   Gu Zheng   aio: clean up aio...
747
  	aio_free_ring(ctx);
e34ecee2a   Kent Overstreet   aio: Fix a trinit...
748
  err:
fa8a53c39   Benjamin LaHaise   aio: v4 ensure ac...
749
  	mutex_unlock(&ctx->ring_lock);
e1bdd5f27   Kent Overstreet   aio: percpu reqs_...
750
  	free_percpu(ctx->cpu);
9a1049da9   Tejun Heo   percpu-refcount: ...
751
752
  	percpu_ref_exit(&ctx->reqs);
  	percpu_ref_exit(&ctx->users);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
753
  	kmem_cache_free(kioctx_cachep, ctx);
caf4167aa   Kent Overstreet   aio: dprintk() ->...
754
755
  	pr_debug("error allocating ioctx %d
  ", err);
e23754f88   Al Viro   aio: don't bother...
756
  	return ERR_PTR(err);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
757
  }
36f558890   Kent Overstreet   aio: refcounting ...
758
759
760
761
762
  /* kill_ioctx
   *	Cancels all outstanding aio requests on an aio context.  Used
   *	when the processes owning a context have all exited to encourage
   *	the rapid destruction of the kioctx.
   */
fb2d44838   Benjamin LaHaise   aio: report error...
763
  static int kill_ioctx(struct mm_struct *mm, struct kioctx *ctx,
dc48e56d7   Jens Axboe   aio: fix serial d...
764
  		      struct ctx_rq_wait *wait)
36f558890   Kent Overstreet   aio: refcounting ...
765
  {
fa88b6f88   Benjamin LaHaise   aio: cleanup: fla...
766
  	struct kioctx_table *table;
db446a08c   Benjamin LaHaise   aio: convert the ...
767

b2edffdd9   Al Viro   fix mremap() vs. ...
768
769
770
  	spin_lock(&mm->ioctx_lock);
  	if (atomic_xchg(&ctx->dead, 1)) {
  		spin_unlock(&mm->ioctx_lock);
fa88b6f88   Benjamin LaHaise   aio: cleanup: fla...
771
  		return -EINVAL;
b2edffdd9   Al Viro   fix mremap() vs. ...
772
  	}
db446a08c   Benjamin LaHaise   aio: convert the ...
773

855ef0dec   Oleg Nesterov   aio: kill the mis...
774
  	table = rcu_dereference_raw(mm->ioctx_table);
d0264c01e   Tejun Heo   fs/aio: Use RCU a...
775
776
  	WARN_ON(ctx != rcu_access_pointer(table->table[ctx->id]));
  	RCU_INIT_POINTER(table->table[ctx->id], NULL);
fa88b6f88   Benjamin LaHaise   aio: cleanup: fla...
777
  	spin_unlock(&mm->ioctx_lock);
4fcc712f5   Kent Overstreet   aio: fix io_destr...
778

a6d7cff47   Tejun Heo   fs/aio: Add expli...
779
  	/* free_ioctx_reqs() will do the necessary RCU synchronization */
fa88b6f88   Benjamin LaHaise   aio: cleanup: fla...
780
  	wake_up_all(&ctx->wait);
4fcc712f5   Kent Overstreet   aio: fix io_destr...
781

fa88b6f88   Benjamin LaHaise   aio: cleanup: fla...
782
783
784
785
786
787
788
789
  	/*
  	 * It'd be more correct to do this in free_ioctx(), after all
  	 * the outstanding kiocbs have finished - but by then io_destroy
  	 * has already returned, so io_setup() could potentially return
  	 * -EAGAIN with no ioctxs actually in use (as far as userspace
  	 *  could tell).
  	 */
  	aio_nr_sub(ctx->max_reqs);
4fcc712f5   Kent Overstreet   aio: fix io_destr...
790

fa88b6f88   Benjamin LaHaise   aio: cleanup: fla...
791
792
  	if (ctx->mmap_size)
  		vm_munmap(ctx->mmap_base, ctx->mmap_size);
fb2d44838   Benjamin LaHaise   aio: report error...
793

dc48e56d7   Jens Axboe   aio: fix serial d...
794
  	ctx->rq_wait = wait;
fa88b6f88   Benjamin LaHaise   aio: cleanup: fla...
795
796
  	percpu_ref_kill(&ctx->users);
  	return 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
797
  }
36f558890   Kent Overstreet   aio: refcounting ...
798
799
800
801
802
803
804
  /*
   * exit_aio: called when the last user of mm goes away.  At this point, there is
   * no way for any new requests to be submited or any of the io_* syscalls to be
   * called on the context.
   *
   * There may be outstanding kiocbs, but free_ioctx() will explicitly wait on
   * them.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
805
   */
fc9b52cd8   Harvey Harrison   fs: remove fastca...
806
  void exit_aio(struct mm_struct *mm)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
807
  {
4b70ac5fd   Oleg Nesterov   aio: change exit_...
808
  	struct kioctx_table *table = rcu_dereference_raw(mm->ioctx_table);
dc48e56d7   Jens Axboe   aio: fix serial d...
809
810
  	struct ctx_rq_wait wait;
  	int i, skipped;
db446a08c   Benjamin LaHaise   aio: convert the ...
811

4b70ac5fd   Oleg Nesterov   aio: change exit_...
812
813
  	if (!table)
  		return;
db446a08c   Benjamin LaHaise   aio: convert the ...
814

dc48e56d7   Jens Axboe   aio: fix serial d...
815
816
817
818
  	atomic_set(&wait.count, table->nr);
  	init_completion(&wait.comp);
  
  	skipped = 0;
4b70ac5fd   Oleg Nesterov   aio: change exit_...
819
  	for (i = 0; i < table->nr; ++i) {
d0264c01e   Tejun Heo   fs/aio: Use RCU a...
820
821
  		struct kioctx *ctx =
  			rcu_dereference_protected(table->table[i], true);
abf137dd7   Jens Axboe   aio: make the loo...
822

dc48e56d7   Jens Axboe   aio: fix serial d...
823
824
  		if (!ctx) {
  			skipped++;
4b70ac5fd   Oleg Nesterov   aio: change exit_...
825
  			continue;
dc48e56d7   Jens Axboe   aio: fix serial d...
826
  		}
936af1576   Al Viro   aio: don't bother...
827
  		/*
4b70ac5fd   Oleg Nesterov   aio: change exit_...
828
829
830
831
832
  		 * We don't need to bother with munmap() here - exit_mmap(mm)
  		 * is coming and it'll unmap everything. And we simply can't,
  		 * this is not necessarily our ->mm.
  		 * Since kill_ioctx() uses non-zero ->mmap_size as indicator
  		 * that it needs to unmap the area, just set it to 0.
936af1576   Al Viro   aio: don't bother...
833
  		 */
58c85dc20   Kent Overstreet   aio: kill struct ...
834
  		ctx->mmap_size = 0;
dc48e56d7   Jens Axboe   aio: fix serial d...
835
836
  		kill_ioctx(mm, ctx, &wait);
  	}
36f558890   Kent Overstreet   aio: refcounting ...
837

dc48e56d7   Jens Axboe   aio: fix serial d...
838
  	if (!atomic_sub_and_test(skipped, &wait.count)) {
6098b45b3   Gu Zheng   aio: block exit_a...
839
  		/* Wait until all IO for the context are done. */
dc48e56d7   Jens Axboe   aio: fix serial d...
840
  		wait_for_completion(&wait.comp);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
841
  	}
4b70ac5fd   Oleg Nesterov   aio: change exit_...
842
843
844
  
  	RCU_INIT_POINTER(mm->ioctx_table, NULL);
  	kfree(table);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
845
  }
e1bdd5f27   Kent Overstreet   aio: percpu reqs_...
846
847
848
  static void put_reqs_available(struct kioctx *ctx, unsigned nr)
  {
  	struct kioctx_cpu *kcpu;
263782c1c   Benjamin LaHaise   aio: protect reqs...
849
  	unsigned long flags;
e1bdd5f27   Kent Overstreet   aio: percpu reqs_...
850

263782c1c   Benjamin LaHaise   aio: protect reqs...
851
  	local_irq_save(flags);
be6fb451a   Benjamin LaHaise   aio: remove no lo...
852
  	kcpu = this_cpu_ptr(ctx->cpu);
e1bdd5f27   Kent Overstreet   aio: percpu reqs_...
853
  	kcpu->reqs_available += nr;
263782c1c   Benjamin LaHaise   aio: protect reqs...
854

e1bdd5f27   Kent Overstreet   aio: percpu reqs_...
855
856
857
858
  	while (kcpu->reqs_available >= ctx->req_batch * 2) {
  		kcpu->reqs_available -= ctx->req_batch;
  		atomic_add(ctx->req_batch, &ctx->reqs_available);
  	}
263782c1c   Benjamin LaHaise   aio: protect reqs...
859
  	local_irq_restore(flags);
e1bdd5f27   Kent Overstreet   aio: percpu reqs_...
860
  }
432c79978   Christoph Hellwig   aio: separate out...
861
  static bool __get_reqs_available(struct kioctx *ctx)
e1bdd5f27   Kent Overstreet   aio: percpu reqs_...
862
863
864
  {
  	struct kioctx_cpu *kcpu;
  	bool ret = false;
263782c1c   Benjamin LaHaise   aio: protect reqs...
865
  	unsigned long flags;
e1bdd5f27   Kent Overstreet   aio: percpu reqs_...
866

263782c1c   Benjamin LaHaise   aio: protect reqs...
867
  	local_irq_save(flags);
be6fb451a   Benjamin LaHaise   aio: remove no lo...
868
  	kcpu = this_cpu_ptr(ctx->cpu);
e1bdd5f27   Kent Overstreet   aio: percpu reqs_...
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
  	if (!kcpu->reqs_available) {
  		int old, avail = atomic_read(&ctx->reqs_available);
  
  		do {
  			if (avail < ctx->req_batch)
  				goto out;
  
  			old = avail;
  			avail = atomic_cmpxchg(&ctx->reqs_available,
  					       avail, avail - ctx->req_batch);
  		} while (avail != old);
  
  		kcpu->reqs_available += ctx->req_batch;
  	}
  
  	ret = true;
  	kcpu->reqs_available--;
  out:
263782c1c   Benjamin LaHaise   aio: protect reqs...
887
  	local_irq_restore(flags);
e1bdd5f27   Kent Overstreet   aio: percpu reqs_...
888
889
  	return ret;
  }
d856f32a8   Benjamin LaHaise   aio: fix reqs_ava...
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
  /* refill_reqs_available
   *	Updates the reqs_available reference counts used for tracking the
   *	number of free slots in the completion ring.  This can be called
   *	from aio_complete() (to optimistically update reqs_available) or
   *	from aio_get_req() (the we're out of events case).  It must be
   *	called holding ctx->completion_lock.
   */
  static void refill_reqs_available(struct kioctx *ctx, unsigned head,
                                    unsigned tail)
  {
  	unsigned events_in_ring, completed;
  
  	/* Clamp head since userland can write to it. */
  	head %= ctx->nr_events;
  	if (head <= tail)
  		events_in_ring = tail - head;
  	else
  		events_in_ring = ctx->nr_events - (head - tail);
  
  	completed = ctx->completed_events;
  	if (events_in_ring < completed)
  		completed -= events_in_ring;
  	else
  		completed = 0;
  
  	if (!completed)
  		return;
  
  	ctx->completed_events -= completed;
  	put_reqs_available(ctx, completed);
  }
  
  /* user_refill_reqs_available
   *	Called to refill reqs_available when aio_get_req() encounters an
   *	out of space in the completion ring.
   */
  static void user_refill_reqs_available(struct kioctx *ctx)
  {
  	spin_lock_irq(&ctx->completion_lock);
  	if (ctx->completed_events) {
  		struct aio_ring *ring;
  		unsigned head;
  
  		/* Access of ring->head may race with aio_read_events_ring()
  		 * here, but that's okay since whether we read the old version
  		 * or the new version, and either will be valid.  The important
  		 * part is that head cannot pass tail since we prevent
  		 * aio_complete() from updating tail by holding
  		 * ctx->completion_lock.  Even if head is invalid, the check
  		 * against ctx->completed_events below will make sure we do the
  		 * safe/right thing.
  		 */
  		ring = kmap_atomic(ctx->ring_pages[0]);
  		head = ring->head;
  		kunmap_atomic(ring);
  
  		refill_reqs_available(ctx, head, ctx->tail);
  	}
  
  	spin_unlock_irq(&ctx->completion_lock);
  }
432c79978   Christoph Hellwig   aio: separate out...
951
952
953
954
955
956
957
  static bool get_reqs_available(struct kioctx *ctx)
  {
  	if (__get_reqs_available(ctx))
  		return true;
  	user_refill_reqs_available(ctx);
  	return __get_reqs_available(ctx);
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
958
  /* aio_get_req
57282d8fd   Kent Overstreet   aio: Kill ki_users
959
960
   *	Allocate a slot for an aio request.
   * Returns NULL if no requests are free.
b53119f13   Linus Torvalds   pin iocb through ...
961
962
963
   *
   * The refcount is initialized to 2 - one for the async op completion,
   * one for the synchronous code that does this.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
964
   */
04b2fa9f8   Christoph Hellwig   fs: split generic...
965
  static inline struct aio_kiocb *aio_get_req(struct kioctx *ctx)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
966
  {
04b2fa9f8   Christoph Hellwig   fs: split generic...
967
  	struct aio_kiocb *req;
a1c8eae75   Kent Overstreet   aio: kill batch a...
968

2bc4ca9bb   Jens Axboe   aio: don't zero e...
969
  	req = kmem_cache_alloc(kiocb_cachep, GFP_KERNEL);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
970
  	if (unlikely(!req))
432c79978   Christoph Hellwig   aio: separate out...
971
  		return NULL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
972

fa0ca2aee   Al Viro   deal with get_req...
973
  	if (unlikely(!get_reqs_available(ctx))) {
6af1c849d   Wei Yongjun   aio: use kmem_cac...
974
  		kmem_cache_free(kiocb_cachep, req);
fa0ca2aee   Al Viro   deal with get_req...
975
976
  		return NULL;
  	}
e34ecee2a   Kent Overstreet   aio: Fix a trinit...
977
  	percpu_ref_get(&ctx->reqs);
2bc4ca9bb   Jens Axboe   aio: don't zero e...
978
  	req->ki_ctx = ctx;
75321b50a   Christoph Hellwig   aio: sanitize ki_...
979
  	INIT_LIST_HEAD(&req->ki_list);
b53119f13   Linus Torvalds   pin iocb through ...
980
  	refcount_set(&req->ki_refcnt, 2);
2bc4ca9bb   Jens Axboe   aio: don't zero e...
981
  	req->ki_eventfd = NULL;
080d676de   Jeff Moyer   aio: allocate kio...
982
  	return req;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
983
  }
d5470b596   Adrian Bunk   fs/aio.c: make 3 ...
984
  static struct kioctx *lookup_ioctx(unsigned long ctx_id)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
985
  {
db446a08c   Benjamin LaHaise   aio: convert the ...
986
  	struct aio_ring __user *ring  = (void __user *)ctx_id;
abf137dd7   Jens Axboe   aio: make the loo...
987
  	struct mm_struct *mm = current->mm;
65c24491b   Jeff Moyer   aio: lookup_ioctx...
988
  	struct kioctx *ctx, *ret = NULL;
db446a08c   Benjamin LaHaise   aio: convert the ...
989
990
991
992
993
  	struct kioctx_table *table;
  	unsigned id;
  
  	if (get_user(id, &ring->id))
  		return NULL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
994

abf137dd7   Jens Axboe   aio: make the loo...
995
  	rcu_read_lock();
db446a08c   Benjamin LaHaise   aio: convert the ...
996
  	table = rcu_dereference(mm->ioctx_table);
abf137dd7   Jens Axboe   aio: make the loo...
997

db446a08c   Benjamin LaHaise   aio: convert the ...
998
999
  	if (!table || id >= table->nr)
  		goto out;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1000

a538e3ff9   Jeff Moyer   aio: fix spectre ...
1001
  	id = array_index_nospec(id, table->nr);
d0264c01e   Tejun Heo   fs/aio: Use RCU a...
1002
  	ctx = rcu_dereference(table->table[id]);
f30d704fe   Benjamin LaHaise   aio: table lookup...
1003
  	if (ctx && ctx->user_id == ctx_id) {
baf10564f   Al Viro   aio: fix io_destr...
1004
1005
  		if (percpu_ref_tryget_live(&ctx->users))
  			ret = ctx;
db446a08c   Benjamin LaHaise   aio: convert the ...
1006
1007
  	}
  out:
abf137dd7   Jens Axboe   aio: make the loo...
1008
  	rcu_read_unlock();
65c24491b   Jeff Moyer   aio: lookup_ioctx...
1009
  	return ret;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1010
  }
b53119f13   Linus Torvalds   pin iocb through ...
1011
1012
  static inline void iocb_destroy(struct aio_kiocb *iocb)
  {
742597034   Al Viro   aio: move droppin...
1013
1014
  	if (iocb->ki_eventfd)
  		eventfd_ctx_put(iocb->ki_eventfd);
b53119f13   Linus Torvalds   pin iocb through ...
1015
1016
1017
1018
1019
  	if (iocb->ki_filp)
  		fput(iocb->ki_filp);
  	percpu_ref_put(&iocb->ki_ctx->reqs);
  	kmem_cache_free(kiocb_cachep, iocb);
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1020
1021
  /* aio_complete
   *	Called when the io request on the given iocb is complete.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1022
   */
2bb874c0d   Al Viro   aio: store event ...
1023
  static void aio_complete(struct aio_kiocb *iocb)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1024
1025
  {
  	struct kioctx	*ctx = iocb->ki_ctx;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1026
  	struct aio_ring	*ring;
21b40200c   Kent Overstreet   aio: use flush_dc...
1027
  	struct io_event	*ev_page, *event;
d856f32a8   Benjamin LaHaise   aio: fix reqs_ava...
1028
  	unsigned tail, pos, head;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1029
  	unsigned long	flags;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1030

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1031
  	/*
0460fef2a   Kent Overstreet   aio: use cancella...
1032
  	 * Add a completion event to the ring buffer. Must be done holding
4b30f07e7   Tang Chen   aio: fix wrong co...
1033
  	 * ctx->completion_lock to prevent other code from messing with the tail
0460fef2a   Kent Overstreet   aio: use cancella...
1034
1035
1036
  	 * pointer since we might be called from irq context.
  	 */
  	spin_lock_irqsave(&ctx->completion_lock, flags);
58c85dc20   Kent Overstreet   aio: kill struct ...
1037
  	tail = ctx->tail;
21b40200c   Kent Overstreet   aio: use flush_dc...
1038
  	pos = tail + AIO_EVENTS_OFFSET;
58c85dc20   Kent Overstreet   aio: kill struct ...
1039
  	if (++tail >= ctx->nr_events)
4bf69b2a0   Kenneth W Chen   [PATCH] aio: ring...
1040
  		tail = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1041

58c85dc20   Kent Overstreet   aio: kill struct ...
1042
  	ev_page = kmap_atomic(ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]);
21b40200c   Kent Overstreet   aio: use flush_dc...
1043
  	event = ev_page + pos % AIO_EVENTS_PER_PAGE;
a9339b785   Al Viro   aio: keep io_even...
1044
  	*event = iocb->ki_res;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1045

21b40200c   Kent Overstreet   aio: use flush_dc...
1046
  	kunmap_atomic(ev_page);
58c85dc20   Kent Overstreet   aio: kill struct ...
1047
  	flush_dcache_page(ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]);
21b40200c   Kent Overstreet   aio: use flush_dc...
1048

a9339b785   Al Viro   aio: keep io_even...
1049
1050
1051
1052
  	pr_debug("%p[%u]: %p: %p %Lx %Lx %Lx
  ", ctx, tail, iocb,
  		 (void __user *)(unsigned long)iocb->ki_res.obj,
  		 iocb->ki_res.data, iocb->ki_res.res, iocb->ki_res.res2);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1053
1054
1055
1056
1057
  
  	/* after flagging the request as done, we
  	 * must never even look at it again
  	 */
  	smp_wmb();	/* make event visible before updating tail */
58c85dc20   Kent Overstreet   aio: kill struct ...
1058
  	ctx->tail = tail;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1059

58c85dc20   Kent Overstreet   aio: kill struct ...
1060
  	ring = kmap_atomic(ctx->ring_pages[0]);
d856f32a8   Benjamin LaHaise   aio: fix reqs_ava...
1061
  	head = ring->head;
21b40200c   Kent Overstreet   aio: use flush_dc...
1062
  	ring->tail = tail;
e8e3c3d66   Cong Wang   fs: remove the se...
1063
  	kunmap_atomic(ring);
58c85dc20   Kent Overstreet   aio: kill struct ...
1064
  	flush_dcache_page(ctx->ring_pages[0]);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1065

d856f32a8   Benjamin LaHaise   aio: fix reqs_ava...
1066
1067
1068
  	ctx->completed_events++;
  	if (ctx->completed_events > 1)
  		refill_reqs_available(ctx, head, tail);
0460fef2a   Kent Overstreet   aio: use cancella...
1069
  	spin_unlock_irqrestore(&ctx->completion_lock, flags);
21b40200c   Kent Overstreet   aio: use flush_dc...
1070
1071
  	pr_debug("added to ring %p at [%u]
  ", iocb, tail);
8d1c98b0b   Davide Libenzi   eventfd/kaio inte...
1072
1073
1074
1075
1076
1077
  
  	/*
  	 * Check if the user asked us to deliver the result through an
  	 * eventfd. The eventfd_signal() function is safe to be called
  	 * from IRQ context.
  	 */
742597034   Al Viro   aio: move droppin...
1078
  	if (iocb->ki_eventfd)
8d1c98b0b   Davide Libenzi   eventfd/kaio inte...
1079
  		eventfd_signal(iocb->ki_eventfd, 1);
6cb2a2104   Quentin Barnes   aio: bad AIO race...
1080
1081
1082
1083
1084
1085
1086
  	/*
  	 * We have to order our ring_info tail store above and test
  	 * of the wait list below outside the wait lock.  This is
  	 * like in wake_up_bit() where clearing a bit has to be
  	 * ordered with the unlocked test.
  	 */
  	smp_mb();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1087
1088
  	if (waitqueue_active(&ctx->wait))
  		wake_up(&ctx->wait);
2bb874c0d   Al Viro   aio: store event ...
1089
1090
1091
1092
1093
1094
1095
1096
  }
  
  static inline void iocb_put(struct aio_kiocb *iocb)
  {
  	if (refcount_dec_and_test(&iocb->ki_refcnt)) {
  		aio_complete(iocb);
  		iocb_destroy(iocb);
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1097
  }
2be4e7dee   Gu Zheng   aio: fix some com...
1098
  /* aio_read_events_ring
a31ad380b   Kent Overstreet   aio: make aio_rea...
1099
1100
   *	Pull an event off of the ioctx's event ring.  Returns the number of
   *	events fetched
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1101
   */
a31ad380b   Kent Overstreet   aio: make aio_rea...
1102
1103
  static long aio_read_events_ring(struct kioctx *ctx,
  				 struct io_event __user *event, long nr)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1104
  {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1105
  	struct aio_ring *ring;
5ffac122d   Kent Overstreet   aio: Don't use ct...
1106
  	unsigned head, tail, pos;
a31ad380b   Kent Overstreet   aio: make aio_rea...
1107
1108
  	long ret = 0;
  	int copy_ret;
9c9ce763b   Dave Chinner   aio: annotate aio...
1109
1110
1111
1112
1113
1114
1115
  	/*
  	 * The mutex can block and wake us up and that will cause
  	 * wait_event_interruptible_hrtimeout() to schedule without sleeping
  	 * and repeat. This should be rare enough that it doesn't cause
  	 * peformance issues. See the comment in read_events() for more detail.
  	 */
  	sched_annotate_sleep();
58c85dc20   Kent Overstreet   aio: kill struct ...
1116
  	mutex_lock(&ctx->ring_lock);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1117

fa8a53c39   Benjamin LaHaise   aio: v4 ensure ac...
1118
  	/* Access to ->ring_pages here is protected by ctx->ring_lock. */
58c85dc20   Kent Overstreet   aio: kill struct ...
1119
  	ring = kmap_atomic(ctx->ring_pages[0]);
a31ad380b   Kent Overstreet   aio: make aio_rea...
1120
  	head = ring->head;
5ffac122d   Kent Overstreet   aio: Don't use ct...
1121
  	tail = ring->tail;
a31ad380b   Kent Overstreet   aio: make aio_rea...
1122
  	kunmap_atomic(ring);
2ff396be6   Jeff Moyer   aio: add missing ...
1123
1124
1125
1126
1127
  	/*
  	 * Ensure that once we've read the current tail pointer, that
  	 * we also see the events that were stored up to the tail.
  	 */
  	smp_rmb();
5ffac122d   Kent Overstreet   aio: Don't use ct...
1128
1129
  	pr_debug("h%u t%u m%u
  ", head, tail, ctx->nr_events);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1130

5ffac122d   Kent Overstreet   aio: Don't use ct...
1131
  	if (head == tail)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1132
  		goto out;
edfbbf388   Benjamin LaHaise   aio: fix kernel m...
1133
1134
  	head %= ctx->nr_events;
  	tail %= ctx->nr_events;
a31ad380b   Kent Overstreet   aio: make aio_rea...
1135
1136
1137
1138
  	while (ret < nr) {
  		long avail;
  		struct io_event *ev;
  		struct page *page;
5ffac122d   Kent Overstreet   aio: Don't use ct...
1139
1140
  		avail = (head <= tail ?  tail : ctx->nr_events) - head;
  		if (head == tail)
a31ad380b   Kent Overstreet   aio: make aio_rea...
1141
  			break;
a31ad380b   Kent Overstreet   aio: make aio_rea...
1142
  		pos = head + AIO_EVENTS_OFFSET;
58c85dc20   Kent Overstreet   aio: kill struct ...
1143
  		page = ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE];
a31ad380b   Kent Overstreet   aio: make aio_rea...
1144
  		pos %= AIO_EVENTS_PER_PAGE;
d2988bd41   Al Viro   aio_read_events_r...
1145
1146
  		avail = min(avail, nr - ret);
  		avail = min_t(long, avail, AIO_EVENTS_PER_PAGE - pos);
a31ad380b   Kent Overstreet   aio: make aio_rea...
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
  		ev = kmap(page);
  		copy_ret = copy_to_user(event + ret, ev + pos,
  					sizeof(*ev) * avail);
  		kunmap(page);
  
  		if (unlikely(copy_ret)) {
  			ret = -EFAULT;
  			goto out;
  		}
  
  		ret += avail;
  		head += avail;
58c85dc20   Kent Overstreet   aio: kill struct ...
1159
  		head %= ctx->nr_events;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1160
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1161

58c85dc20   Kent Overstreet   aio: kill struct ...
1162
  	ring = kmap_atomic(ctx->ring_pages[0]);
a31ad380b   Kent Overstreet   aio: make aio_rea...
1163
  	ring->head = head;
91d80a84b   Zhao Hongjiang   aio: fix possible...
1164
  	kunmap_atomic(ring);
58c85dc20   Kent Overstreet   aio: kill struct ...
1165
  	flush_dcache_page(ctx->ring_pages[0]);
a31ad380b   Kent Overstreet   aio: make aio_rea...
1166

5ffac122d   Kent Overstreet   aio: Don't use ct...
1167
1168
  	pr_debug("%li  h%u t%u
  ", ret, head, tail);
a31ad380b   Kent Overstreet   aio: make aio_rea...
1169
  out:
58c85dc20   Kent Overstreet   aio: kill struct ...
1170
  	mutex_unlock(&ctx->ring_lock);
a31ad380b   Kent Overstreet   aio: make aio_rea...
1171

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1172
1173
  	return ret;
  }
a31ad380b   Kent Overstreet   aio: make aio_rea...
1174
1175
  static bool aio_read_events(struct kioctx *ctx, long min_nr, long nr,
  			    struct io_event __user *event, long *i)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1176
  {
a31ad380b   Kent Overstreet   aio: make aio_rea...
1177
  	long ret = aio_read_events_ring(ctx, event + *i, nr - *i);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1178

a31ad380b   Kent Overstreet   aio: make aio_rea...
1179
1180
  	if (ret > 0)
  		*i += ret;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1181

a31ad380b   Kent Overstreet   aio: make aio_rea...
1182
1183
  	if (unlikely(atomic_read(&ctx->dead)))
  		ret = -EINVAL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1184

a31ad380b   Kent Overstreet   aio: make aio_rea...
1185
1186
  	if (!*i)
  		*i = ret;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1187

a31ad380b   Kent Overstreet   aio: make aio_rea...
1188
  	return ret < 0 || *i >= min_nr;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1189
  }
a31ad380b   Kent Overstreet   aio: make aio_rea...
1190
  static long read_events(struct kioctx *ctx, long min_nr, long nr,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1191
  			struct io_event __user *event,
fa2e62a54   Deepa Dinamani   io_getevents: Use...
1192
  			ktime_t until)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1193
  {
a31ad380b   Kent Overstreet   aio: make aio_rea...
1194
  	long ret = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1195

a31ad380b   Kent Overstreet   aio: make aio_rea...
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
  	/*
  	 * Note that aio_read_events() is being called as the conditional - i.e.
  	 * we're calling it after prepare_to_wait() has set task state to
  	 * TASK_INTERRUPTIBLE.
  	 *
  	 * But aio_read_events() can block, and if it blocks it's going to flip
  	 * the task state back to TASK_RUNNING.
  	 *
  	 * This should be ok, provided it doesn't flip the state back to
  	 * TASK_RUNNING and return 0 too much - that causes us to spin. That
  	 * will only happen if the mutex_lock() call blocks, and we then find
  	 * the ringbuffer empty. So in practice we should be ok, but it's
  	 * something to be aware of when touching this code.
  	 */
2456e8553   Thomas Gleixner   ktime: Get rid of...
1210
  	if (until == 0)
5f785de58   Fam Zheng   aio: Skip timer f...
1211
1212
1213
1214
1215
  		aio_read_events(ctx, min_nr, nr, event, &ret);
  	else
  		wait_event_interruptible_hrtimeout(ctx->wait,
  				aio_read_events(ctx, min_nr, nr, event, &ret),
  				until);
a31ad380b   Kent Overstreet   aio: make aio_rea...
1216
  	return ret;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1217
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
  /* sys_io_setup:
   *	Create an aio_context capable of receiving at least nr_events.
   *	ctxp must not point to an aio_context that already exists, and
   *	must be initialized to 0 prior to the call.  On successful
   *	creation of the aio_context, *ctxp is filled in with the resulting 
   *	handle.  May fail with -EINVAL if *ctxp is not initialized,
   *	if the specified nr_events exceeds internal limits.  May fail 
   *	with -EAGAIN if the specified nr_events exceeds the user's limit 
   *	of available events.  May fail with -ENOMEM if insufficient kernel
   *	resources are available.  May fail with -EFAULT if an invalid
   *	pointer is passed for ctxp.  Will fail with -ENOSYS if not
   *	implemented.
   */
002c8976e   Heiko Carstens   [CVE-2009-0029] S...
1231
  SYSCALL_DEFINE2(io_setup, unsigned, nr_events, aio_context_t __user *, ctxp)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
  {
  	struct kioctx *ioctx = NULL;
  	unsigned long ctx;
  	long ret;
  
  	ret = get_user(ctx, ctxp);
  	if (unlikely(ret))
  		goto out;
  
  	ret = -EINVAL;
d55b5fdaf   Zach Brown   [PATCH] aio: remo...
1242
  	if (unlikely(ctx || nr_events == 0)) {
acd88d4e1   Kinglong Mee   fs/aio.c: Remove ...
1243
1244
  		pr_debug("EINVAL: ctx %lu nr_events %u
  ",
d55b5fdaf   Zach Brown   [PATCH] aio: remo...
1245
  		         ctx, nr_events);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1246
1247
1248
1249
1250
1251
1252
  		goto out;
  	}
  
  	ioctx = ioctx_alloc(nr_events);
  	ret = PTR_ERR(ioctx);
  	if (!IS_ERR(ioctx)) {
  		ret = put_user(ioctx->user_id, ctxp);
a2e1859ad   Al Viro   aio: take final p...
1253
  		if (ret)
e02ba72aa   Anatol Pomozov   aio: block io_des...
1254
  			kill_ioctx(current->mm, ioctx, NULL);
723be6e39   Kent Overstreet   aio: percpu ioctx...
1255
  		percpu_ref_put(&ioctx->users);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1256
1257
1258
1259
1260
  	}
  
  out:
  	return ret;
  }
c00d2c7e8   Al Viro   move aio compat t...
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
  #ifdef CONFIG_COMPAT
  COMPAT_SYSCALL_DEFINE2(io_setup, unsigned, nr_events, u32 __user *, ctx32p)
  {
  	struct kioctx *ioctx = NULL;
  	unsigned long ctx;
  	long ret;
  
  	ret = get_user(ctx, ctx32p);
  	if (unlikely(ret))
  		goto out;
  
  	ret = -EINVAL;
  	if (unlikely(ctx || nr_events == 0)) {
  		pr_debug("EINVAL: ctx %lu nr_events %u
  ",
  		         ctx, nr_events);
  		goto out;
  	}
  
  	ioctx = ioctx_alloc(nr_events);
  	ret = PTR_ERR(ioctx);
  	if (!IS_ERR(ioctx)) {
  		/* truncating is ok because it's a user address */
  		ret = put_user((u32)ioctx->user_id, ctx32p);
  		if (ret)
  			kill_ioctx(current->mm, ioctx, NULL);
  		percpu_ref_put(&ioctx->users);
  	}
  
  out:
  	return ret;
  }
  #endif
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1294
1295
1296
  /* sys_io_destroy:
   *	Destroy the aio_context specified.  May cancel any outstanding 
   *	AIOs and block on completion.  Will fail with -ENOSYS if not
642b5123a   Satoru Takeuchi   aio: fix wrong su...
1297
   *	implemented.  May fail with -EINVAL if the context pointed to
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1298
1299
   *	is invalid.
   */
002c8976e   Heiko Carstens   [CVE-2009-0029] S...
1300
  SYSCALL_DEFINE1(io_destroy, aio_context_t, ctx)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1301
1302
1303
  {
  	struct kioctx *ioctx = lookup_ioctx(ctx);
  	if (likely(NULL != ioctx)) {
dc48e56d7   Jens Axboe   aio: fix serial d...
1304
  		struct ctx_rq_wait wait;
fb2d44838   Benjamin LaHaise   aio: report error...
1305
  		int ret;
e02ba72aa   Anatol Pomozov   aio: block io_des...
1306

dc48e56d7   Jens Axboe   aio: fix serial d...
1307
1308
  		init_completion(&wait.comp);
  		atomic_set(&wait.count, 1);
e02ba72aa   Anatol Pomozov   aio: block io_des...
1309
1310
1311
1312
  		/* Pass requests_done to kill_ioctx() where it can be set
  		 * in a thread-safe way. If we try to set it here then we have
  		 * a race condition if two io_destroy() called simultaneously.
  		 */
dc48e56d7   Jens Axboe   aio: fix serial d...
1313
  		ret = kill_ioctx(current->mm, ioctx, &wait);
723be6e39   Kent Overstreet   aio: percpu ioctx...
1314
  		percpu_ref_put(&ioctx->users);
e02ba72aa   Anatol Pomozov   aio: block io_des...
1315
1316
1317
1318
1319
  
  		/* Wait until all IO for the context are done. Otherwise kernel
  		 * keep using user-space buffers even if user thinks the context
  		 * is destroyed.
  		 */
fb2d44838   Benjamin LaHaise   aio: report error...
1320
  		if (!ret)
dc48e56d7   Jens Axboe   aio: fix serial d...
1321
  			wait_for_completion(&wait.comp);
e02ba72aa   Anatol Pomozov   aio: block io_des...
1322

fb2d44838   Benjamin LaHaise   aio: report error...
1323
  		return ret;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1324
  	}
acd88d4e1   Kinglong Mee   fs/aio.c: Remove ...
1325
1326
  	pr_debug("EINVAL: invalid context id
  ");
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1327
1328
  	return -EINVAL;
  }
3c96c7f4c   Al Viro   aio: take list re...
1329
1330
1331
1332
1333
1334
1335
1336
1337
  static void aio_remove_iocb(struct aio_kiocb *iocb)
  {
  	struct kioctx *ctx = iocb->ki_ctx;
  	unsigned long flags;
  
  	spin_lock_irqsave(&ctx->ctx_lock, flags);
  	list_del(&iocb->ki_list);
  	spin_unlock_irqrestore(&ctx->ctx_lock, flags);
  }
54843f875   Christoph Hellwig   aio: refactor rea...
1338
1339
1340
  static void aio_complete_rw(struct kiocb *kiocb, long res, long res2)
  {
  	struct aio_kiocb *iocb = container_of(kiocb, struct aio_kiocb, rw);
3c96c7f4c   Al Viro   aio: take list re...
1341
1342
  	if (!list_empty_careful(&iocb->ki_list))
  		aio_remove_iocb(iocb);
54843f875   Christoph Hellwig   aio: refactor rea...
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
  	if (kiocb->ki_flags & IOCB_WRITE) {
  		struct inode *inode = file_inode(kiocb->ki_filp);
  
  		/*
  		 * Tell lockdep we inherited freeze protection from submission
  		 * thread.
  		 */
  		if (S_ISREG(inode->i_mode))
  			__sb_writers_acquired(inode->i_sb, SB_FREEZE_WRITE);
  		file_end_write(kiocb->ki_filp);
  	}
2bb874c0d   Al Viro   aio: store event ...
1354
1355
1356
  	iocb->ki_res.res = res;
  	iocb->ki_res.res2 = res2;
  	iocb_put(iocb);
54843f875   Christoph Hellwig   aio: refactor rea...
1357
  }
88a6f18b9   Jens Axboe   aio: split out io...
1358
  static int aio_prep_rw(struct kiocb *req, const struct iocb *iocb)
54843f875   Christoph Hellwig   aio: refactor rea...
1359
1360
  {
  	int ret;
54843f875   Christoph Hellwig   aio: refactor rea...
1361
  	req->ki_complete = aio_complete_rw;
ec51f8ee1   Mike Marshall   aio: initialize k...
1362
  	req->private = NULL;
54843f875   Christoph Hellwig   aio: refactor rea...
1363
1364
1365
1366
  	req->ki_pos = iocb->aio_offset;
  	req->ki_flags = iocb_flags(req->ki_filp);
  	if (iocb->aio_flags & IOCB_FLAG_RESFD)
  		req->ki_flags |= IOCB_EVENTFD;
fc28724d6   Adam Manzanares   fs: Convert kiocb...
1367
  	req->ki_hint = ki_hint_validate(file_write_hint(req->ki_filp));
d9a08a9e6   Adam Manzanares   fs: Add aio iopri...
1368
1369
1370
1371
1372
1373
1374
1375
  	if (iocb->aio_flags & IOCB_FLAG_IOPRIO) {
  		/*
  		 * If the IOCB_FLAG_IOPRIO flag of aio_flags is set, then
  		 * aio_reqprio is interpreted as an I/O scheduling
  		 * class and priority.
  		 */
  		ret = ioprio_check_cap(iocb->aio_reqprio);
  		if (ret) {
9a6d9a62e   Adam Manzanares   fs: aio ioprio us...
1376
1377
  			pr_debug("aio ioprio check cap error: %d
  ", ret);
84c4e1f89   Linus Torvalds   aio: simplify - a...
1378
  			return ret;
d9a08a9e6   Adam Manzanares   fs: Add aio iopri...
1379
1380
1381
1382
  		}
  
  		req->ki_ioprio = iocb->aio_reqprio;
  	} else
76dc89139   Damien Le Moal   aio: Fix fallback...
1383
  		req->ki_ioprio = get_current_ioprio();
d9a08a9e6   Adam Manzanares   fs: Add aio iopri...
1384

54843f875   Christoph Hellwig   aio: refactor rea...
1385
1386
  	ret = kiocb_set_rw_flags(req, iocb->aio_rw_flags);
  	if (unlikely(ret))
84c4e1f89   Linus Torvalds   aio: simplify - a...
1387
  		return ret;
154989e45   Christoph Hellwig   aio: clear IOCB_H...
1388
1389
1390
  
  	req->ki_flags &= ~IOCB_HIPRI; /* no one is going to poll for this I/O */
  	return 0;
54843f875   Christoph Hellwig   aio: refactor rea...
1391
  }
87e5e6dab   Jens Axboe   uio: make import_...
1392
1393
1394
  static ssize_t aio_setup_rw(int rw, const struct iocb *iocb,
  		struct iovec **iovec, bool vectored, bool compat,
  		struct iov_iter *iter)
eed4e51fb   Badari Pulavarty   [PATCH] Add vecto...
1395
  {
89319d31d   Christoph Hellwig   fs: remove aio_ru...
1396
1397
1398
1399
1400
1401
1402
1403
  	void __user *buf = (void __user *)(uintptr_t)iocb->aio_buf;
  	size_t len = iocb->aio_nbytes;
  
  	if (!vectored) {
  		ssize_t ret = import_single_range(rw, buf, len, *iovec, iter);
  		*iovec = NULL;
  		return ret;
  	}
9d85cba71   Jeff Moyer   aio: fix the comp...
1404
1405
  #ifdef CONFIG_COMPAT
  	if (compat)
89319d31d   Christoph Hellwig   fs: remove aio_ru...
1406
1407
  		return compat_import_iovec(rw, buf, len, UIO_FASTIOV, iovec,
  				iter);
9d85cba71   Jeff Moyer   aio: fix the comp...
1408
  #endif
89319d31d   Christoph Hellwig   fs: remove aio_ru...
1409
  	return import_iovec(rw, buf, len, UIO_FASTIOV, iovec, iter);
eed4e51fb   Badari Pulavarty   [PATCH] Add vecto...
1410
  }
9061d14a8   Al Viro   aio: all callers ...
1411
  static inline void aio_rw_done(struct kiocb *req, ssize_t ret)
89319d31d   Christoph Hellwig   fs: remove aio_ru...
1412
1413
1414
  {
  	switch (ret) {
  	case -EIOCBQUEUED:
9061d14a8   Al Viro   aio: all callers ...
1415
  		break;
89319d31d   Christoph Hellwig   fs: remove aio_ru...
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
  	case -ERESTARTSYS:
  	case -ERESTARTNOINTR:
  	case -ERESTARTNOHAND:
  	case -ERESTART_RESTARTBLOCK:
  		/*
  		 * There's no easy way to restart the syscall since other AIO's
  		 * may be already running. Just fail this IO with EINTR.
  		 */
  		ret = -EINTR;
  		/*FALLTHRU*/
  	default:
bc9bff616   Jens Axboe   aio: use assigned...
1427
  		req->ki_complete(req, ret, 0);
89319d31d   Christoph Hellwig   fs: remove aio_ru...
1428
1429
  	}
  }
958c13ce1   Al Viro   make aio_read()/a...
1430
  static int aio_read(struct kiocb *req, const struct iocb *iocb,
88a6f18b9   Jens Axboe   aio: split out io...
1431
  			bool vectored, bool compat)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1432
  {
00fefb9cf   Gu Zheng   aio: use iovec ar...
1433
  	struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs;
293bc9822   Al Viro   new methods: ->re...
1434
  	struct iov_iter iter;
54843f875   Christoph Hellwig   aio: refactor rea...
1435
  	struct file *file;
958c13ce1   Al Viro   make aio_read()/a...
1436
  	int ret;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1437

54843f875   Christoph Hellwig   aio: refactor rea...
1438
1439
1440
1441
  	ret = aio_prep_rw(req, iocb);
  	if (ret)
  		return ret;
  	file = req->ki_filp;
89319d31d   Christoph Hellwig   fs: remove aio_ru...
1442
  	if (unlikely(!(file->f_mode & FMODE_READ)))
84c4e1f89   Linus Torvalds   aio: simplify - a...
1443
  		return -EBADF;
54843f875   Christoph Hellwig   aio: refactor rea...
1444
  	ret = -EINVAL;
89319d31d   Christoph Hellwig   fs: remove aio_ru...
1445
  	if (unlikely(!file->f_op->read_iter))
84c4e1f89   Linus Torvalds   aio: simplify - a...
1446
  		return -EINVAL;
73a7075e3   Kent Overstreet   aio: Kill aio_rw_...
1447

89319d31d   Christoph Hellwig   fs: remove aio_ru...
1448
  	ret = aio_setup_rw(READ, iocb, &iovec, vectored, compat, &iter);
87e5e6dab   Jens Axboe   uio: make import_...
1449
  	if (ret < 0)
84c4e1f89   Linus Torvalds   aio: simplify - a...
1450
  		return ret;
89319d31d   Christoph Hellwig   fs: remove aio_ru...
1451
1452
  	ret = rw_verify_area(READ, file, &req->ki_pos, iov_iter_count(&iter));
  	if (!ret)
9061d14a8   Al Viro   aio: all callers ...
1453
  		aio_rw_done(req, call_read_iter(file, req, &iter));
89319d31d   Christoph Hellwig   fs: remove aio_ru...
1454
1455
1456
  	kfree(iovec);
  	return ret;
  }
73a7075e3   Kent Overstreet   aio: Kill aio_rw_...
1457

958c13ce1   Al Viro   make aio_read()/a...
1458
  static int aio_write(struct kiocb *req, const struct iocb *iocb,
88a6f18b9   Jens Axboe   aio: split out io...
1459
  			 bool vectored, bool compat)
89319d31d   Christoph Hellwig   fs: remove aio_ru...
1460
  {
89319d31d   Christoph Hellwig   fs: remove aio_ru...
1461
1462
  	struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs;
  	struct iov_iter iter;
54843f875   Christoph Hellwig   aio: refactor rea...
1463
  	struct file *file;
958c13ce1   Al Viro   make aio_read()/a...
1464
  	int ret;
41ef4eb8e   Kent Overstreet   aio: kill ki_retry
1465

54843f875   Christoph Hellwig   aio: refactor rea...
1466
1467
1468
1469
  	ret = aio_prep_rw(req, iocb);
  	if (ret)
  		return ret;
  	file = req->ki_filp;
89319d31d   Christoph Hellwig   fs: remove aio_ru...
1470
  	if (unlikely(!(file->f_mode & FMODE_WRITE)))
84c4e1f89   Linus Torvalds   aio: simplify - a...
1471
  		return -EBADF;
89319d31d   Christoph Hellwig   fs: remove aio_ru...
1472
  	if (unlikely(!file->f_op->write_iter))
84c4e1f89   Linus Torvalds   aio: simplify - a...
1473
  		return -EINVAL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1474

89319d31d   Christoph Hellwig   fs: remove aio_ru...
1475
  	ret = aio_setup_rw(WRITE, iocb, &iovec, vectored, compat, &iter);
87e5e6dab   Jens Axboe   uio: make import_...
1476
  	if (ret < 0)
84c4e1f89   Linus Torvalds   aio: simplify - a...
1477
  		return ret;
89319d31d   Christoph Hellwig   fs: remove aio_ru...
1478
1479
  	ret = rw_verify_area(WRITE, file, &req->ki_pos, iov_iter_count(&iter));
  	if (!ret) {
70fe2f481   Jan Kara   aio: fix freeze p...
1480
  		/*
92ce47285   Christoph Hellwig   aio: remove the e...
1481
  		 * Open-code file_start_write here to grab freeze protection,
54843f875   Christoph Hellwig   aio: refactor rea...
1482
1483
1484
1485
  		 * which will be released by another thread in
  		 * aio_complete_rw().  Fool lockdep by telling it the lock got
  		 * released so that it doesn't complain about the held lock when
  		 * we return to userspace.
70fe2f481   Jan Kara   aio: fix freeze p...
1486
  		 */
92ce47285   Christoph Hellwig   aio: remove the e...
1487
1488
  		if (S_ISREG(file_inode(file)->i_mode)) {
  			__sb_start_write(file_inode(file)->i_sb, SB_FREEZE_WRITE, true);
a12f1ae61   Shaohua Li   aio: fix lock dep...
1489
  			__sb_writers_release(file_inode(file)->i_sb, SB_FREEZE_WRITE);
92ce47285   Christoph Hellwig   aio: remove the e...
1490
1491
  		}
  		req->ki_flags |= IOCB_WRITE;
9061d14a8   Al Viro   aio: all callers ...
1492
  		aio_rw_done(req, call_write_iter(file, req, &iter));
41ef4eb8e   Kent Overstreet   aio: kill ki_retry
1493
  	}
89319d31d   Christoph Hellwig   fs: remove aio_ru...
1494
1495
  	kfree(iovec);
  	return ret;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1496
  }
a3c0d439e   Christoph Hellwig   aio: implement IO...
1497
1498
  static void aio_fsync_work(struct work_struct *work)
  {
2bb874c0d   Al Viro   aio: store event ...
1499
  	struct aio_kiocb *iocb = container_of(work, struct aio_kiocb, fsync.work);
7328f9a4a   Miklos Szeredi   aio: fix async fs...
1500
  	const struct cred *old_cred = override_creds(iocb->fsync.creds);
a3c0d439e   Christoph Hellwig   aio: implement IO...
1501

2bb874c0d   Al Viro   aio: store event ...
1502
  	iocb->ki_res.res = vfs_fsync(iocb->fsync.file, iocb->fsync.datasync);
7328f9a4a   Miklos Szeredi   aio: fix async fs...
1503
1504
  	revert_creds(old_cred);
  	put_cred(iocb->fsync.creds);
2bb874c0d   Al Viro   aio: store event ...
1505
  	iocb_put(iocb);
a3c0d439e   Christoph Hellwig   aio: implement IO...
1506
  }
88a6f18b9   Jens Axboe   aio: split out io...
1507
1508
  static int aio_fsync(struct fsync_iocb *req, const struct iocb *iocb,
  		     bool datasync)
a3c0d439e   Christoph Hellwig   aio: implement IO...
1509
1510
1511
1512
  {
  	if (unlikely(iocb->aio_buf || iocb->aio_offset || iocb->aio_nbytes ||
  			iocb->aio_rw_flags))
  		return -EINVAL;
a11e1d432   Linus Torvalds   Revert changes to...
1513

84c4e1f89   Linus Torvalds   aio: simplify - a...
1514
  	if (unlikely(!req->file->f_op->fsync))
a3c0d439e   Christoph Hellwig   aio: implement IO...
1515
  		return -EINVAL;
a3c0d439e   Christoph Hellwig   aio: implement IO...
1516

7328f9a4a   Miklos Szeredi   aio: fix async fs...
1517
1518
1519
  	req->creds = prepare_creds();
  	if (!req->creds)
  		return -ENOMEM;
a3c0d439e   Christoph Hellwig   aio: implement IO...
1520
1521
1522
  	req->datasync = datasync;
  	INIT_WORK(&req->work, aio_fsync_work);
  	schedule_work(&req->work);
9061d14a8   Al Viro   aio: all callers ...
1523
  	return 0;
a3c0d439e   Christoph Hellwig   aio: implement IO...
1524
  }
8dcbf2683   Jens Axboe   aio: prevent pote...
1525
1526
1527
1528
1529
1530
1531
  static void aio_poll_put_work(struct work_struct *work)
  {
  	struct poll_iocb *req = container_of(work, struct poll_iocb, work);
  	struct aio_kiocb *iocb = container_of(req, struct aio_kiocb, poll);
  
  	iocb_put(iocb);
  }
bfe4037e7   Christoph Hellwig   aio: implement IO...
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
  static void aio_poll_complete_work(struct work_struct *work)
  {
  	struct poll_iocb *req = container_of(work, struct poll_iocb, work);
  	struct aio_kiocb *iocb = container_of(req, struct aio_kiocb, poll);
  	struct poll_table_struct pt = { ._key = req->events };
  	struct kioctx *ctx = iocb->ki_ctx;
  	__poll_t mask = 0;
  
  	if (!READ_ONCE(req->cancelled))
  		mask = vfs_poll(req->file, &pt) & req->events;
  
  	/*
  	 * Note that ->ki_cancel callers also delete iocb from active_reqs after
  	 * calling ->ki_cancel.  We need the ctx_lock roundtrip here to
  	 * synchronize with them.  In the cancellation case the list_del_init
  	 * itself is not actually needed, but harmless so we keep it in to
  	 * avoid further branches in the fast path.
  	 */
  	spin_lock_irq(&ctx->ctx_lock);
  	if (!mask && !READ_ONCE(req->cancelled)) {
  		add_wait_queue(req->head, &req->wait);
  		spin_unlock_irq(&ctx->ctx_lock);
  		return;
  	}
  	list_del_init(&iocb->ki_list);
af5c72b1f   Al Viro   Fix aio_poll() races
1557
1558
  	iocb->ki_res.res = mangle_poll(mask);
  	req->done = true;
bfe4037e7   Christoph Hellwig   aio: implement IO...
1559
  	spin_unlock_irq(&ctx->ctx_lock);
af5c72b1f   Al Viro   Fix aio_poll() races
1560
  	iocb_put(iocb);
bfe4037e7   Christoph Hellwig   aio: implement IO...
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
  }
  
  /* assumes we are called with irqs disabled */
  static int aio_poll_cancel(struct kiocb *iocb)
  {
  	struct aio_kiocb *aiocb = container_of(iocb, struct aio_kiocb, rw);
  	struct poll_iocb *req = &aiocb->poll;
  
  	spin_lock(&req->head->lock);
  	WRITE_ONCE(req->cancelled, true);
  	if (!list_empty(&req->wait.entry)) {
  		list_del_init(&req->wait.entry);
  		schedule_work(&aiocb->poll.work);
  	}
  	spin_unlock(&req->head->lock);
  
  	return 0;
  }
  
  static int aio_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
  		void *key)
  {
  	struct poll_iocb *req = container_of(wait, struct poll_iocb, wait);
e8693bcfa   Christoph Hellwig   aio: allow direct...
1584
  	struct aio_kiocb *iocb = container_of(req, struct aio_kiocb, poll);
bfe4037e7   Christoph Hellwig   aio: implement IO...
1585
  	__poll_t mask = key_to_poll(key);
d3d6a18d7   Bart Van Assche   aio: Fix locking ...
1586
  	unsigned long flags;
bfe4037e7   Christoph Hellwig   aio: implement IO...
1587

bfe4037e7   Christoph Hellwig   aio: implement IO...
1588
  	/* for instances that support it check for an event match first: */
af5c72b1f   Al Viro   Fix aio_poll() races
1589
1590
  	if (mask && !(mask & req->events))
  		return 0;
e8693bcfa   Christoph Hellwig   aio: allow direct...
1591

af5c72b1f   Al Viro   Fix aio_poll() races
1592
1593
1594
  	list_del_init(&req->wait.entry);
  
  	if (mask && spin_trylock_irqsave(&iocb->ki_ctx->ctx_lock, flags)) {
8dcbf2683   Jens Axboe   aio: prevent pote...
1595
  		struct kioctx *ctx = iocb->ki_ctx;
d3d6a18d7   Bart Van Assche   aio: Fix locking ...
1596
1597
1598
1599
1600
1601
  		/*
  		 * Try to complete the iocb inline if we can. Use
  		 * irqsave/irqrestore because not all filesystems (e.g. fuse)
  		 * call this function with IRQs disabled and because IRQs
  		 * have to be disabled before ctx_lock is obtained.
  		 */
af5c72b1f   Al Viro   Fix aio_poll() races
1602
1603
1604
  		list_del(&iocb->ki_list);
  		iocb->ki_res.res = mangle_poll(mask);
  		req->done = true;
8dcbf2683   Jens Axboe   aio: prevent pote...
1605
1606
1607
1608
1609
1610
1611
1612
  		if (iocb->ki_eventfd && eventfd_signal_count()) {
  			iocb = NULL;
  			INIT_WORK(&req->work, aio_poll_put_work);
  			schedule_work(&req->work);
  		}
  		spin_unlock_irqrestore(&ctx->ctx_lock, flags);
  		if (iocb)
  			iocb_put(iocb);
af5c72b1f   Al Viro   Fix aio_poll() races
1613
1614
  	} else {
  		schedule_work(&req->work);
e8693bcfa   Christoph Hellwig   aio: allow direct...
1615
  	}
bfe4037e7   Christoph Hellwig   aio: implement IO...
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
  	return 1;
  }
  
  struct aio_poll_table {
  	struct poll_table_struct	pt;
  	struct aio_kiocb		*iocb;
  	int				error;
  };
  
  static void
  aio_poll_queue_proc(struct file *file, struct wait_queue_head *head,
  		struct poll_table_struct *p)
  {
  	struct aio_poll_table *pt = container_of(p, struct aio_poll_table, pt);
  
  	/* multiple wait queues per file are not supported */
  	if (unlikely(pt->iocb->poll.head)) {
  		pt->error = -EINVAL;
  		return;
  	}
  
  	pt->error = 0;
  	pt->iocb->poll.head = head;
  	add_wait_queue(head, &pt->iocb->poll.wait);
  }
958c13ce1   Al Viro   make aio_read()/a...
1641
  static int aio_poll(struct aio_kiocb *aiocb, const struct iocb *iocb)
bfe4037e7   Christoph Hellwig   aio: implement IO...
1642
1643
1644
1645
  {
  	struct kioctx *ctx = aiocb->ki_ctx;
  	struct poll_iocb *req = &aiocb->poll;
  	struct aio_poll_table apt;
af5c72b1f   Al Viro   Fix aio_poll() races
1646
  	bool cancel = false;
bfe4037e7   Christoph Hellwig   aio: implement IO...
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
  	__poll_t mask;
  
  	/* reject any unknown events outside the normal event mask. */
  	if ((u16)iocb->aio_buf != iocb->aio_buf)
  		return -EINVAL;
  	/* reject fields that are not defined for poll */
  	if (iocb->aio_offset || iocb->aio_nbytes || iocb->aio_rw_flags)
  		return -EINVAL;
  
  	INIT_WORK(&req->work, aio_poll_complete_work);
  	req->events = demangle_poll(iocb->aio_buf) | EPOLLERR | EPOLLHUP;
bfe4037e7   Christoph Hellwig   aio: implement IO...
1658

2bc4ca9bb   Jens Axboe   aio: don't zero e...
1659
  	req->head = NULL;
af5c72b1f   Al Viro   Fix aio_poll() races
1660
  	req->done = false;
2bc4ca9bb   Jens Axboe   aio: don't zero e...
1661
  	req->cancelled = false;
bfe4037e7   Christoph Hellwig   aio: implement IO...
1662
1663
1664
1665
1666
1667
1668
1669
  	apt.pt._qproc = aio_poll_queue_proc;
  	apt.pt._key = req->events;
  	apt.iocb = aiocb;
  	apt.error = -EINVAL; /* same as no support for IOCB_CMD_POLL */
  
  	/* initialized the list so that we can do list_empty checks */
  	INIT_LIST_HEAD(&req->wait.entry);
  	init_waitqueue_func_entry(&req->wait, aio_poll_wake);
bfe4037e7   Christoph Hellwig   aio: implement IO...
1670
  	mask = vfs_poll(req->file, &apt.pt) & req->events;
bfe4037e7   Christoph Hellwig   aio: implement IO...
1671
  	spin_lock_irq(&ctx->ctx_lock);
af5c72b1f   Al Viro   Fix aio_poll() races
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
  	if (likely(req->head)) {
  		spin_lock(&req->head->lock);
  		if (unlikely(list_empty(&req->wait.entry))) {
  			if (apt.error)
  				cancel = true;
  			apt.error = 0;
  			mask = 0;
  		}
  		if (mask || apt.error) {
  			list_del_init(&req->wait.entry);
  		} else if (cancel) {
  			WRITE_ONCE(req->cancelled, true);
  		} else if (!req->done) { /* actually waiting for an event */
  			list_add_tail(&aiocb->ki_list, &ctx->active_reqs);
  			aiocb->ki_cancel = aio_poll_cancel;
  		}
  		spin_unlock(&req->head->lock);
  	}
  	if (mask) { /* no async, we'd stolen it */
  		aiocb->ki_res.res = mangle_poll(mask);
bfe4037e7   Christoph Hellwig   aio: implement IO...
1692
  		apt.error = 0;
bfe4037e7   Christoph Hellwig   aio: implement IO...
1693
  	}
bfe4037e7   Christoph Hellwig   aio: implement IO...
1694
  	spin_unlock_irq(&ctx->ctx_lock);
bfe4037e7   Christoph Hellwig   aio: implement IO...
1695
  	if (mask)
af5c72b1f   Al Viro   Fix aio_poll() races
1696
1697
  		iocb_put(aiocb);
  	return apt.error;
bfe4037e7   Christoph Hellwig   aio: implement IO...
1698
  }
88a6f18b9   Jens Axboe   aio: split out io...
1699
  static int __io_submit_one(struct kioctx *ctx, const struct iocb *iocb,
7316b49c2   Al Viro   aio: move sanity ...
1700
1701
  			   struct iocb __user *user_iocb, struct aio_kiocb *req,
  			   bool compat)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1702
  {
84c4e1f89   Linus Torvalds   aio: simplify - a...
1703
  	req->ki_filp = fget(iocb->aio_fildes);
84c4e1f89   Linus Torvalds   aio: simplify - a...
1704
  	if (unlikely(!req->ki_filp))
7316b49c2   Al Viro   aio: move sanity ...
1705
  		return -EBADF;
84c4e1f89   Linus Torvalds   aio: simplify - a...
1706

88a6f18b9   Jens Axboe   aio: split out io...
1707
  	if (iocb->aio_flags & IOCB_FLAG_RESFD) {
742597034   Al Viro   aio: move droppin...
1708
  		struct eventfd_ctx *eventfd;
9c3060bed   Davide Libenzi   signal/timer/even...
1709
1710
1711
1712
1713
1714
  		/*
  		 * If the IOCB_FLAG_RESFD flag of aio_flags is set, get an
  		 * instance of the file* now. The file descriptor must be
  		 * an eventfd() fd, and will be signaled for each completed
  		 * event using the eventfd_signal() function.
  		 */
742597034   Al Viro   aio: move droppin...
1715
  		eventfd = eventfd_ctx_fdget(iocb->aio_resfd);
7316b49c2   Al Viro   aio: move sanity ...
1716
  		if (IS_ERR(eventfd))
18bfb9c6a   Dan Carpenter   aio: Fix an error...
1717
  			return PTR_ERR(eventfd);
7316b49c2   Al Viro   aio: move sanity ...
1718

742597034   Al Viro   aio: move droppin...
1719
  		req->ki_eventfd = eventfd;
9830f4be1   Goldwyn Rodrigues   fs: Use RWF_* fla...
1720
  	}
7316b49c2   Al Viro   aio: move sanity ...
1721
  	if (unlikely(put_user(KIOCB_KEY, &user_iocb->aio_key))) {
caf4167aa   Kent Overstreet   aio: dprintk() ->...
1722
1723
  		pr_debug("EFAULT: aio_key
  ");
7316b49c2   Al Viro   aio: move sanity ...
1724
  		return -EFAULT;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1725
  	}
a9339b785   Al Viro   aio: keep io_even...
1726
1727
1728
1729
  	req->ki_res.obj = (u64)(unsigned long)user_iocb;
  	req->ki_res.data = iocb->aio_data;
  	req->ki_res.res = 0;
  	req->ki_res.res2 = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1730

88a6f18b9   Jens Axboe   aio: split out io...
1731
  	switch (iocb->aio_lio_opcode) {
89319d31d   Christoph Hellwig   fs: remove aio_ru...
1732
  	case IOCB_CMD_PREAD:
7316b49c2   Al Viro   aio: move sanity ...
1733
  		return aio_read(&req->rw, iocb, false, compat);
89319d31d   Christoph Hellwig   fs: remove aio_ru...
1734
  	case IOCB_CMD_PWRITE:
7316b49c2   Al Viro   aio: move sanity ...
1735
  		return aio_write(&req->rw, iocb, false, compat);
89319d31d   Christoph Hellwig   fs: remove aio_ru...
1736
  	case IOCB_CMD_PREADV:
7316b49c2   Al Viro   aio: move sanity ...
1737
  		return aio_read(&req->rw, iocb, true, compat);
89319d31d   Christoph Hellwig   fs: remove aio_ru...
1738
  	case IOCB_CMD_PWRITEV:
7316b49c2   Al Viro   aio: move sanity ...
1739
  		return aio_write(&req->rw, iocb, true, compat);
a3c0d439e   Christoph Hellwig   aio: implement IO...
1740
  	case IOCB_CMD_FSYNC:
7316b49c2   Al Viro   aio: move sanity ...
1741
  		return aio_fsync(&req->fsync, iocb, false);
a3c0d439e   Christoph Hellwig   aio: implement IO...
1742
  	case IOCB_CMD_FDSYNC:
7316b49c2   Al Viro   aio: move sanity ...
1743
  		return aio_fsync(&req->fsync, iocb, true);
bfe4037e7   Christoph Hellwig   aio: implement IO...
1744
  	case IOCB_CMD_POLL:
7316b49c2   Al Viro   aio: move sanity ...
1745
  		return aio_poll(req, iocb);
89319d31d   Christoph Hellwig   fs: remove aio_ru...
1746
  	default:
88a6f18b9   Jens Axboe   aio: split out io...
1747
1748
  		pr_debug("invalid aio operation %d
  ", iocb->aio_lio_opcode);
7316b49c2   Al Viro   aio: move sanity ...
1749
  		return -EINVAL;
89319d31d   Christoph Hellwig   fs: remove aio_ru...
1750
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1751
  }
88a6f18b9   Jens Axboe   aio: split out io...
1752
1753
1754
  static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
  			 bool compat)
  {
7316b49c2   Al Viro   aio: move sanity ...
1755
  	struct aio_kiocb *req;
88a6f18b9   Jens Axboe   aio: split out io...
1756
  	struct iocb iocb;
7316b49c2   Al Viro   aio: move sanity ...
1757
  	int err;
88a6f18b9   Jens Axboe   aio: split out io...
1758
1759
1760
  
  	if (unlikely(copy_from_user(&iocb, user_iocb, sizeof(iocb))))
  		return -EFAULT;
7316b49c2   Al Viro   aio: move sanity ...
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
  	/* enforce forwards compatibility on users */
  	if (unlikely(iocb.aio_reserved2)) {
  		pr_debug("EINVAL: reserve field set
  ");
  		return -EINVAL;
  	}
  
  	/* prevent overflows */
  	if (unlikely(
  	    (iocb.aio_buf != (unsigned long)iocb.aio_buf) ||
  	    (iocb.aio_nbytes != (size_t)iocb.aio_nbytes) ||
  	    ((ssize_t)iocb.aio_nbytes < 0)
  	   )) {
  		pr_debug("EINVAL: overflow check
  ");
  		return -EINVAL;
  	}
  
  	req = aio_get_req(ctx);
  	if (unlikely(!req))
  		return -EAGAIN;
  
  	err = __io_submit_one(ctx, &iocb, user_iocb, req, compat);
  
  	/* Done with the synchronous reference */
  	iocb_put(req);
  
  	/*
  	 * If err is 0, we'd either done aio_complete() ourselves or have
  	 * arranged for that to be done asynchronously.  Anything non-zero
  	 * means that we need to destroy req ourselves.
  	 */
  	if (unlikely(err)) {
  		iocb_destroy(req);
  		put_reqs_available(ctx, 1);
  	}
  	return err;
88a6f18b9   Jens Axboe   aio: split out io...
1798
  }
67ba049f9   Al Viro   aio: fold do_io_s...
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
  /* sys_io_submit:
   *	Queue the nr iocbs pointed to by iocbpp for processing.  Returns
   *	the number of iocbs queued.  May return -EINVAL if the aio_context
   *	specified by ctx_id is invalid, if nr is < 0, if the iocb at
   *	*iocbpp[0] is not properly initialized, if the operation specified
   *	is invalid for the file descriptor in the iocb.  May fail with
   *	-EFAULT if any of the data structures point to invalid data.  May
   *	fail with -EBADF if the file descriptor specified in the first
   *	iocb is invalid.  May fail with -EAGAIN if insufficient resources
   *	are available to queue any iocbs.  Will return 0 if nr is 0.  Will
   *	fail with -ENOSYS if not implemented.
   */
  SYSCALL_DEFINE3(io_submit, aio_context_t, ctx_id, long, nr,
  		struct iocb __user * __user *, iocbpp)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1813
1814
1815
  {
  	struct kioctx *ctx;
  	long ret = 0;
080d676de   Jeff Moyer   aio: allocate kio...
1816
  	int i = 0;
9f5b94254   Shaohua Li   fs: make aio plug
1817
  	struct blk_plug plug;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1818
1819
1820
  
  	if (unlikely(nr < 0))
  		return -EINVAL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1821
1822
  	ctx = lookup_ioctx(ctx_id);
  	if (unlikely(!ctx)) {
caf4167aa   Kent Overstreet   aio: dprintk() ->...
1823
1824
  		pr_debug("EINVAL: invalid context id
  ");
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1825
1826
  		return -EINVAL;
  	}
1da92779e   Al Viro   aio: sanitize the...
1827
1828
  	if (nr > ctx->nr_events)
  		nr = ctx->nr_events;
a79d40e9b   Jens Axboe   aio: only use blk...
1829
1830
  	if (nr > AIO_PLUG_THRESHOLD)
  		blk_start_plug(&plug);
67ba049f9   Al Viro   aio: fold do_io_s...
1831
  	for (i = 0; i < nr; i++) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1832
  		struct iocb __user *user_iocb;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1833

67ba049f9   Al Viro   aio: fold do_io_s...
1834
  		if (unlikely(get_user(user_iocb, iocbpp + i))) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1835
1836
1837
  			ret = -EFAULT;
  			break;
  		}
67ba049f9   Al Viro   aio: fold do_io_s...
1838
  		ret = io_submit_one(ctx, user_iocb, false);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1839
1840
1841
  		if (ret)
  			break;
  	}
a79d40e9b   Jens Axboe   aio: only use blk...
1842
1843
  	if (nr > AIO_PLUG_THRESHOLD)
  		blk_finish_plug(&plug);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1844

723be6e39   Kent Overstreet   aio: percpu ioctx...
1845
  	percpu_ref_put(&ctx->users);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1846
1847
  	return i ? i : ret;
  }
c00d2c7e8   Al Viro   move aio compat t...
1848
  #ifdef CONFIG_COMPAT
c00d2c7e8   Al Viro   move aio compat t...
1849
  COMPAT_SYSCALL_DEFINE3(io_submit, compat_aio_context_t, ctx_id,
67ba049f9   Al Viro   aio: fold do_io_s...
1850
  		       int, nr, compat_uptr_t __user *, iocbpp)
c00d2c7e8   Al Viro   move aio compat t...
1851
  {
67ba049f9   Al Viro   aio: fold do_io_s...
1852
1853
1854
1855
  	struct kioctx *ctx;
  	long ret = 0;
  	int i = 0;
  	struct blk_plug plug;
c00d2c7e8   Al Viro   move aio compat t...
1856
1857
1858
  
  	if (unlikely(nr < 0))
  		return -EINVAL;
67ba049f9   Al Viro   aio: fold do_io_s...
1859
1860
1861
1862
1863
1864
  	ctx = lookup_ioctx(ctx_id);
  	if (unlikely(!ctx)) {
  		pr_debug("EINVAL: invalid context id
  ");
  		return -EINVAL;
  	}
1da92779e   Al Viro   aio: sanitize the...
1865
1866
  	if (nr > ctx->nr_events)
  		nr = ctx->nr_events;
a79d40e9b   Jens Axboe   aio: only use blk...
1867
1868
  	if (nr > AIO_PLUG_THRESHOLD)
  		blk_start_plug(&plug);
67ba049f9   Al Viro   aio: fold do_io_s...
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
  	for (i = 0; i < nr; i++) {
  		compat_uptr_t user_iocb;
  
  		if (unlikely(get_user(user_iocb, iocbpp + i))) {
  			ret = -EFAULT;
  			break;
  		}
  
  		ret = io_submit_one(ctx, compat_ptr(user_iocb), true);
  		if (ret)
  			break;
  	}
a79d40e9b   Jens Axboe   aio: only use blk...
1881
1882
  	if (nr > AIO_PLUG_THRESHOLD)
  		blk_finish_plug(&plug);
67ba049f9   Al Viro   aio: fold do_io_s...
1883
1884
1885
  
  	percpu_ref_put(&ctx->users);
  	return i ? i : ret;
c00d2c7e8   Al Viro   move aio compat t...
1886
1887
  }
  #endif
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
  /* sys_io_cancel:
   *	Attempts to cancel an iocb previously passed to io_submit.  If
   *	the operation is successfully cancelled, the resulting event is
   *	copied into the memory pointed to by result without being placed
   *	into the completion queue and 0 is returned.  May fail with
   *	-EFAULT if any of the data structures pointed to are invalid.
   *	May fail with -EINVAL if aio_context specified by ctx_id is
   *	invalid.  May fail with -EAGAIN if the iocb specified was not
   *	cancelled.  Will fail with -ENOSYS if not implemented.
   */
002c8976e   Heiko Carstens   [CVE-2009-0029] S...
1898
1899
  SYSCALL_DEFINE3(io_cancel, aio_context_t, ctx_id, struct iocb __user *, iocb,
  		struct io_event __user *, result)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1900
  {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1901
  	struct kioctx *ctx;
04b2fa9f8   Christoph Hellwig   fs: split generic...
1902
  	struct aio_kiocb *kiocb;
888933f8f   Christoph Hellwig   aio: simplify can...
1903
  	int ret = -EINVAL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1904
  	u32 key;
a9339b785   Al Viro   aio: keep io_even...
1905
  	u64 obj = (u64)(unsigned long)iocb;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1906

f3a2752a4   Christoph Hellwig   aio: simplify KIO...
1907
  	if (unlikely(get_user(key, &iocb->aio_key)))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1908
  		return -EFAULT;
f3a2752a4   Christoph Hellwig   aio: simplify KIO...
1909
1910
  	if (unlikely(key != KIOCB_KEY))
  		return -EINVAL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1911
1912
1913
1914
1915
1916
  
  	ctx = lookup_ioctx(ctx_id);
  	if (unlikely(!ctx))
  		return -EINVAL;
  
  	spin_lock_irq(&ctx->ctx_lock);
833f4154e   Al Viro   aio: fold lookup_...
1917
1918
  	/* TODO: use a hash or array, this sucks. */
  	list_for_each_entry(kiocb, &ctx->active_reqs, ki_list) {
a9339b785   Al Viro   aio: keep io_even...
1919
  		if (kiocb->ki_res.obj == obj) {
833f4154e   Al Viro   aio: fold lookup_...
1920
1921
1922
1923
  			ret = kiocb->ki_cancel(&kiocb->rw);
  			list_del_init(&kiocb->ki_list);
  			break;
  		}
888933f8f   Christoph Hellwig   aio: simplify can...
1924
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1925
  	spin_unlock_irq(&ctx->ctx_lock);
906b973cf   Kent Overstreet   aio: add kiocb_ca...
1926
  	if (!ret) {
bec68faaf   Kent Overstreet   aio: io_cancel() ...
1927
1928
1929
1930
  		/*
  		 * The result argument is no longer used - the io_event is
  		 * always delivered via the ring buffer. -EINPROGRESS indicates
  		 * cancellation is progress:
906b973cf   Kent Overstreet   aio: add kiocb_ca...
1931
  		 */
bec68faaf   Kent Overstreet   aio: io_cancel() ...
1932
  		ret = -EINPROGRESS;
906b973cf   Kent Overstreet   aio: add kiocb_ca...
1933
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1934

723be6e39   Kent Overstreet   aio: percpu ioctx...
1935
  	percpu_ref_put(&ctx->users);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1936
1937
1938
  
  	return ret;
  }
fa2e62a54   Deepa Dinamani   io_getevents: Use...
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
  static long do_io_getevents(aio_context_t ctx_id,
  		long min_nr,
  		long nr,
  		struct io_event __user *events,
  		struct timespec64 *ts)
  {
  	ktime_t until = ts ? timespec64_to_ktime(*ts) : KTIME_MAX;
  	struct kioctx *ioctx = lookup_ioctx(ctx_id);
  	long ret = -EINVAL;
  
  	if (likely(ioctx)) {
  		if (likely(min_nr <= nr && min_nr >= 0))
  			ret = read_events(ioctx, min_nr, nr, events, until);
  		percpu_ref_put(&ioctx->users);
  	}
  
  	return ret;
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1957
1958
  /* io_getevents:
   *	Attempts to read at least min_nr events and up to nr events from
642b5123a   Satoru Takeuchi   aio: fix wrong su...
1959
1960
1961
1962
1963
1964
1965
1966
   *	the completion queue for the aio_context specified by ctx_id. If
   *	it succeeds, the number of read events is returned. May fail with
   *	-EINVAL if ctx_id is invalid, if min_nr is out of range, if nr is
   *	out of range, if timeout is out of range.  May fail with -EFAULT
   *	if any of the memory specified is invalid.  May return 0 or
   *	< min_nr if the timeout specified by timeout has elapsed
   *	before sufficient events are available, where timeout == NULL
   *	specifies an infinite timeout. Note that the timeout pointed to by
6900807c6   Jeff Moyer   aio: fix io_getev...
1967
   *	timeout is relative.  Will fail with -ENOSYS if not implemented.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1968
   */
7a35397f8   Deepa Dinamani   io_pgetevents: us...
1969
  #if !defined(CONFIG_64BIT_TIME) || defined(CONFIG_64BIT)
002c8976e   Heiko Carstens   [CVE-2009-0029] S...
1970
1971
1972
1973
  SYSCALL_DEFINE5(io_getevents, aio_context_t, ctx_id,
  		long, min_nr,
  		long, nr,
  		struct io_event __user *, events,
7a35397f8   Deepa Dinamani   io_pgetevents: us...
1974
  		struct __kernel_timespec __user *, timeout)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1975
  {
fa2e62a54   Deepa Dinamani   io_getevents: Use...
1976
  	struct timespec64	ts;
7a074e96d   Christoph Hellwig   aio: implement io...
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
  	int			ret;
  
  	if (timeout && unlikely(get_timespec64(&ts, timeout)))
  		return -EFAULT;
  
  	ret = do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &ts : NULL);
  	if (!ret && signal_pending(current))
  		ret = -EINTR;
  	return ret;
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1987

7a35397f8   Deepa Dinamani   io_pgetevents: us...
1988
  #endif
9ba546c01   Christoph Hellwig   aio: don't expose...
1989
1990
1991
1992
  struct __aio_sigset {
  	const sigset_t __user	*sigmask;
  	size_t		sigsetsize;
  };
7a074e96d   Christoph Hellwig   aio: implement io...
1993
1994
1995
1996
1997
  SYSCALL_DEFINE6(io_pgetevents,
  		aio_context_t, ctx_id,
  		long, min_nr,
  		long, nr,
  		struct io_event __user *, events,
7a35397f8   Deepa Dinamani   io_pgetevents: us...
1998
  		struct __kernel_timespec __user *, timeout,
7a074e96d   Christoph Hellwig   aio: implement io...
1999
2000
2001
  		const struct __aio_sigset __user *, usig)
  {
  	struct __aio_sigset	ksig = { NULL, };
7a074e96d   Christoph Hellwig   aio: implement io...
2002
  	struct timespec64	ts;
97abc889e   Oleg Nesterov   signal: remove th...
2003
  	bool interrupted;
7a074e96d   Christoph Hellwig   aio: implement io...
2004
2005
2006
2007
2008
2009
2010
  	int ret;
  
  	if (timeout && unlikely(get_timespec64(&ts, timeout)))
  		return -EFAULT;
  
  	if (usig && copy_from_user(&ksig, usig, sizeof(ksig)))
  		return -EFAULT;
b772434be   Oleg Nesterov   signal: simplify ...
2011
  	ret = set_user_sigmask(ksig.sigmask, ksig.sigsetsize);
7a35397f8   Deepa Dinamani   io_pgetevents: us...
2012
2013
  	if (ret)
  		return ret;
7a074e96d   Christoph Hellwig   aio: implement io...
2014
2015
  
  	ret = do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &ts : NULL);
97abc889e   Oleg Nesterov   signal: remove th...
2016
2017
  
  	interrupted = signal_pending(current);
b772434be   Oleg Nesterov   signal: simplify ...
2018
  	restore_saved_sigmask_unless(interrupted);
97abc889e   Oleg Nesterov   signal: remove th...
2019
  	if (interrupted && !ret)
7a35397f8   Deepa Dinamani   io_pgetevents: us...
2020
  		ret = -ERESTARTNOHAND;
7a074e96d   Christoph Hellwig   aio: implement io...
2021

7a35397f8   Deepa Dinamani   io_pgetevents: us...
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
  	return ret;
  }
  
  #if defined(CONFIG_COMPAT_32BIT_TIME) && !defined(CONFIG_64BIT)
  
  SYSCALL_DEFINE6(io_pgetevents_time32,
  		aio_context_t, ctx_id,
  		long, min_nr,
  		long, nr,
  		struct io_event __user *, events,
  		struct old_timespec32 __user *, timeout,
  		const struct __aio_sigset __user *, usig)
  {
  	struct __aio_sigset	ksig = { NULL, };
7a35397f8   Deepa Dinamani   io_pgetevents: us...
2036
  	struct timespec64	ts;
97abc889e   Oleg Nesterov   signal: remove th...
2037
  	bool interrupted;
7a35397f8   Deepa Dinamani   io_pgetevents: us...
2038
2039
2040
2041
2042
2043
2044
  	int ret;
  
  	if (timeout && unlikely(get_old_timespec32(&ts, timeout)))
  		return -EFAULT;
  
  	if (usig && copy_from_user(&ksig, usig, sizeof(ksig)))
  		return -EFAULT;
ded653ccb   Deepa Dinamani   signal: Add set_u...
2045

b772434be   Oleg Nesterov   signal: simplify ...
2046
  	ret = set_user_sigmask(ksig.sigmask, ksig.sigsetsize);
ded653ccb   Deepa Dinamani   signal: Add set_u...
2047
2048
  	if (ret)
  		return ret;
7a074e96d   Christoph Hellwig   aio: implement io...
2049
2050
  
  	ret = do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &ts : NULL);
97abc889e   Oleg Nesterov   signal: remove th...
2051
2052
  
  	interrupted = signal_pending(current);
b772434be   Oleg Nesterov   signal: simplify ...
2053
  	restore_saved_sigmask_unless(interrupted);
97abc889e   Oleg Nesterov   signal: remove th...
2054
  	if (interrupted && !ret)
854a6ed56   Deepa Dinamani   signal: Add resto...
2055
  		ret = -ERESTARTNOHAND;
fa2e62a54   Deepa Dinamani   io_getevents: Use...
2056

7a074e96d   Christoph Hellwig   aio: implement io...
2057
  	return ret;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2058
  }
c00d2c7e8   Al Viro   move aio compat t...
2059

7a35397f8   Deepa Dinamani   io_pgetevents: us...
2060
2061
2062
  #endif
  
  #if defined(CONFIG_COMPAT_32BIT_TIME)
8dabe7245   Arnd Bergmann   y2038: syscalls: ...
2063
2064
2065
2066
2067
  SYSCALL_DEFINE5(io_getevents_time32, __u32, ctx_id,
  		__s32, min_nr,
  		__s32, nr,
  		struct io_event __user *, events,
  		struct old_timespec32 __user *, timeout)
c00d2c7e8   Al Viro   move aio compat t...
2068
  {
fa2e62a54   Deepa Dinamani   io_getevents: Use...
2069
  	struct timespec64 t;
7a074e96d   Christoph Hellwig   aio: implement io...
2070
  	int ret;
9afc5eee6   Arnd Bergmann   y2038: globally r...
2071
  	if (timeout && get_old_timespec32(&t, timeout))
7a074e96d   Christoph Hellwig   aio: implement io...
2072
2073
2074
2075
2076
2077
2078
  		return -EFAULT;
  
  	ret = do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &t : NULL);
  	if (!ret && signal_pending(current))
  		ret = -EINTR;
  	return ret;
  }
7a35397f8   Deepa Dinamani   io_pgetevents: us...
2079
2080
2081
  #endif
  
  #ifdef CONFIG_COMPAT
c00d2c7e8   Al Viro   move aio compat t...
2082

7a074e96d   Christoph Hellwig   aio: implement io...
2083
  struct __compat_aio_sigset {
97eba80fc   Guillem Jover   aio: Fix io_pgete...
2084
  	compat_uptr_t		sigmask;
7a074e96d   Christoph Hellwig   aio: implement io...
2085
2086
  	compat_size_t		sigsetsize;
  };
7a35397f8   Deepa Dinamani   io_pgetevents: us...
2087
  #if defined(CONFIG_COMPAT_32BIT_TIME)
7a074e96d   Christoph Hellwig   aio: implement io...
2088
2089
2090
2091
2092
  COMPAT_SYSCALL_DEFINE6(io_pgetevents,
  		compat_aio_context_t, ctx_id,
  		compat_long_t, min_nr,
  		compat_long_t, nr,
  		struct io_event __user *, events,
9afc5eee6   Arnd Bergmann   y2038: globally r...
2093
  		struct old_timespec32 __user *, timeout,
7a074e96d   Christoph Hellwig   aio: implement io...
2094
2095
  		const struct __compat_aio_sigset __user *, usig)
  {
97eba80fc   Guillem Jover   aio: Fix io_pgete...
2096
  	struct __compat_aio_sigset ksig = { 0, };
7a074e96d   Christoph Hellwig   aio: implement io...
2097
  	struct timespec64 t;
97abc889e   Oleg Nesterov   signal: remove th...
2098
  	bool interrupted;
7a074e96d   Christoph Hellwig   aio: implement io...
2099
  	int ret;
9afc5eee6   Arnd Bergmann   y2038: globally r...
2100
  	if (timeout && get_old_timespec32(&t, timeout))
7a074e96d   Christoph Hellwig   aio: implement io...
2101
2102
2103
2104
  		return -EFAULT;
  
  	if (usig && copy_from_user(&ksig, usig, sizeof(ksig)))
  		return -EFAULT;
97eba80fc   Guillem Jover   aio: Fix io_pgete...
2105
  	ret = set_compat_user_sigmask(compat_ptr(ksig.sigmask), ksig.sigsetsize);
ded653ccb   Deepa Dinamani   signal: Add set_u...
2106
2107
  	if (ret)
  		return ret;
c00d2c7e8   Al Viro   move aio compat t...
2108

7a074e96d   Christoph Hellwig   aio: implement io...
2109
  	ret = do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &t : NULL);
97abc889e   Oleg Nesterov   signal: remove th...
2110
2111
  
  	interrupted = signal_pending(current);
b772434be   Oleg Nesterov   signal: simplify ...
2112
  	restore_saved_sigmask_unless(interrupted);
97abc889e   Oleg Nesterov   signal: remove th...
2113
  	if (interrupted && !ret)
854a6ed56   Deepa Dinamani   signal: Add resto...
2114
  		ret = -ERESTARTNOHAND;
fa2e62a54   Deepa Dinamani   io_getevents: Use...
2115

7a074e96d   Christoph Hellwig   aio: implement io...
2116
  	return ret;
c00d2c7e8   Al Viro   move aio compat t...
2117
  }
7a35397f8   Deepa Dinamani   io_pgetevents: us...
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
  
  #endif
  
  COMPAT_SYSCALL_DEFINE6(io_pgetevents_time64,
  		compat_aio_context_t, ctx_id,
  		compat_long_t, min_nr,
  		compat_long_t, nr,
  		struct io_event __user *, events,
  		struct __kernel_timespec __user *, timeout,
  		const struct __compat_aio_sigset __user *, usig)
  {
97eba80fc   Guillem Jover   aio: Fix io_pgete...
2129
  	struct __compat_aio_sigset ksig = { 0, };
7a35397f8   Deepa Dinamani   io_pgetevents: us...
2130
  	struct timespec64 t;
97abc889e   Oleg Nesterov   signal: remove th...
2131
  	bool interrupted;
7a35397f8   Deepa Dinamani   io_pgetevents: us...
2132
2133
2134
2135
2136
2137
2138
  	int ret;
  
  	if (timeout && get_timespec64(&t, timeout))
  		return -EFAULT;
  
  	if (usig && copy_from_user(&ksig, usig, sizeof(ksig)))
  		return -EFAULT;
97eba80fc   Guillem Jover   aio: Fix io_pgete...
2139
  	ret = set_compat_user_sigmask(compat_ptr(ksig.sigmask), ksig.sigsetsize);
7a35397f8   Deepa Dinamani   io_pgetevents: us...
2140
2141
2142
2143
  	if (ret)
  		return ret;
  
  	ret = do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &t : NULL);
97abc889e   Oleg Nesterov   signal: remove th...
2144
2145
  
  	interrupted = signal_pending(current);
b772434be   Oleg Nesterov   signal: simplify ...
2146
  	restore_saved_sigmask_unless(interrupted);
97abc889e   Oleg Nesterov   signal: remove th...
2147
  	if (interrupted && !ret)
7a35397f8   Deepa Dinamani   io_pgetevents: us...
2148
  		ret = -ERESTARTNOHAND;
fa2e62a54   Deepa Dinamani   io_getevents: Use...
2149

7a074e96d   Christoph Hellwig   aio: implement io...
2150
  	return ret;
c00d2c7e8   Al Viro   move aio compat t...
2151
2152
  }
  #endif