Blame view

fs/read_write.c 47.5 KB
b24413180   Greg Kroah-Hartman   License cleanup: ...
1
  // SPDX-License-Identifier: GPL-2.0
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2
3
4
5
6
  /*
   *  linux/fs/read_write.c
   *
   *  Copyright (C) 1991, 1992  Linus Torvalds
   */
b12fb7f46   Ingo Molnar   sched/headers: Pr...
7
  #include <linux/slab.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
8
  #include <linux/stat.h>
b12fb7f46   Ingo Molnar   sched/headers: Pr...
9
  #include <linux/sched/xacct.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
10
11
12
  #include <linux/fcntl.h>
  #include <linux/file.h>
  #include <linux/uio.h>
0eeca2830   Robert Love   [PATCH] inotify
13
  #include <linux/fsnotify.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
14
  #include <linux/security.h>
630d9c472   Paul Gortmaker   fs: reduce the us...
15
  #include <linux/export.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
16
  #include <linux/syscalls.h>
e28cc7157   Linus Torvalds   Relax the rw_veri...
17
  #include <linux/pagemap.h>
d6b29d7ce   Jens Axboe   splice: divorce t...
18
  #include <linux/splice.h>
561c67319   Al Viro   switch lseek to C...
19
  #include <linux/compat.h>
29732938a   Zach Brown   vfs: add copy_fil...
20
  #include <linux/mount.h>
2feb55f89   Wouter van Kesteren   fs: allow no_seek...
21
  #include <linux/fs.h>
06ae43f34   Al Viro   Don't bother with...
22
  #include "internal.h"
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
23

7c0f6ba68   Linus Torvalds   Replace <asm/uacc...
24
  #include <linux/uaccess.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
25
  #include <asm/unistd.h>
4b6f5d20b   Arjan van de Ven   [PATCH] Make most...
26
  const struct file_operations generic_ro_fops = {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
27
  	.llseek		= generic_file_llseek,
aad4f8bb4   Al Viro   switch simple gen...
28
  	.read_iter	= generic_file_read_iter,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
29
  	.mmap		= generic_file_readonly_mmap,
534f2aaa6   Jens Axboe   sys_sendfile: swi...
30
  	.splice_read	= generic_file_splice_read,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
31
32
33
  };
  
  EXPORT_SYMBOL(generic_ro_fops);
ddef7ed2b   Christoph Hellwig   annotate RWF_... ...
34
  static inline bool unsigned_offsets(struct file *file)
4a3956c79   KAMEZAWA Hiroyuki   vfs: introduce FM...
35
  {
cccb5a1e6   Al Viro   fix signedness me...
36
  	return file->f_mode & FMODE_UNSIGNED_OFFSET;
4a3956c79   KAMEZAWA Hiroyuki   vfs: introduce FM...
37
  }
46a1c2c7a   Jie Liu   vfs: export lseek...
38
39
40
41
42
43
44
45
46
47
48
49
50
  /**
   * vfs_setpos - update the file offset for lseek
   * @file:	file structure in question
   * @offset:	file offset to seek to
   * @maxsize:	maximum file size
   *
   * This is a low-level filesystem helper for updating the file offset to
   * the value specified by @offset if the given offset is valid and it is
   * not equal to the current file offset.
   *
   * Return the specified offset on success and -EINVAL on invalid offset.
   */
  loff_t vfs_setpos(struct file *file, loff_t offset, loff_t maxsize)
ef3d0fd27   Andi Kleen   vfs: do (nearly) ...
51
52
53
54
55
56
57
58
59
60
61
62
  {
  	if (offset < 0 && !unsigned_offsets(file))
  		return -EINVAL;
  	if (offset > maxsize)
  		return -EINVAL;
  
  	if (offset != file->f_pos) {
  		file->f_pos = offset;
  		file->f_version = 0;
  	}
  	return offset;
  }
46a1c2c7a   Jie Liu   vfs: export lseek...
63
  EXPORT_SYMBOL(vfs_setpos);
ef3d0fd27   Andi Kleen   vfs: do (nearly) ...
64

3a8cff4f0   Christoph Hellwig   [PATCH] generic_f...
65
  /**
5760495a8   Andi Kleen   vfs: add generic_...
66
   * generic_file_llseek_size - generic llseek implementation for regular files
3a8cff4f0   Christoph Hellwig   [PATCH] generic_f...
67
68
   * @file:	file structure to seek on
   * @offset:	file offset to seek to
965c8e59c   Andrew Morton   lseek: the "whenc...
69
   * @whence:	type of seek
e8b96eb50   Eric Sandeen   vfs: allow custom...
70
71
   * @size:	max size of this file in file system
   * @eof:	offset used for SEEK_END position
3a8cff4f0   Christoph Hellwig   [PATCH] generic_f...
72
   *
5760495a8   Andi Kleen   vfs: add generic_...
73
   * This is a variant of generic_file_llseek that allows passing in a custom
e8b96eb50   Eric Sandeen   vfs: allow custom...
74
   * maximum file size and a custom EOF position, for e.g. hashed directories
ef3d0fd27   Andi Kleen   vfs: do (nearly) ...
75
76
   *
   * Synchronization:
5760495a8   Andi Kleen   vfs: add generic_...
77
   * SEEK_SET and SEEK_END are unsynchronized (but atomic on 64bit platforms)
ef3d0fd27   Andi Kleen   vfs: do (nearly) ...
78
79
   * SEEK_CUR is synchronized against other SEEK_CURs, but not read/writes.
   * read/writes behave like SEEK_SET against seeks.
3a8cff4f0   Christoph Hellwig   [PATCH] generic_f...
80
   */
9465efc9e   Andi Kleen   Remove BKL from r...
81
  loff_t
965c8e59c   Andrew Morton   lseek: the "whenc...
82
  generic_file_llseek_size(struct file *file, loff_t offset, int whence,
e8b96eb50   Eric Sandeen   vfs: allow custom...
83
  		loff_t maxsize, loff_t eof)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
84
  {
965c8e59c   Andrew Morton   lseek: the "whenc...
85
  	switch (whence) {
3a8cff4f0   Christoph Hellwig   [PATCH] generic_f...
86
  	case SEEK_END:
e8b96eb50   Eric Sandeen   vfs: allow custom...
87
  		offset += eof;
3a8cff4f0   Christoph Hellwig   [PATCH] generic_f...
88
89
  		break;
  	case SEEK_CUR:
5b6f1eb97   Alain Knaff   vfs: lseek(fd, 0,...
90
91
92
93
94
95
96
97
  		/*
  		 * Here we special-case the lseek(fd, 0, SEEK_CUR)
  		 * position-querying operation.  Avoid rewriting the "same"
  		 * f_pos value back to the file because a concurrent read(),
  		 * write() or lseek() might have altered it
  		 */
  		if (offset == 0)
  			return file->f_pos;
ef3d0fd27   Andi Kleen   vfs: do (nearly) ...
98
99
100
101
102
103
  		/*
  		 * f_lock protects against read/modify/write race with other
  		 * SEEK_CURs. Note that parallel writes and reads behave
  		 * like SEEK_SET.
  		 */
  		spin_lock(&file->f_lock);
46a1c2c7a   Jie Liu   vfs: export lseek...
104
  		offset = vfs_setpos(file, file->f_pos + offset, maxsize);
ef3d0fd27   Andi Kleen   vfs: do (nearly) ...
105
106
  		spin_unlock(&file->f_lock);
  		return offset;
982d81658   Josef Bacik   fs: add SEEK_HOLE...
107
108
109
110
111
  	case SEEK_DATA:
  		/*
  		 * In the generic case the entire file is data, so as long as
  		 * offset isn't at the end of the file then the offset is data.
  		 */
fc46820b2   Andreas Gruenbacher   vfs: Return -ENXI...
112
  		if ((unsigned long long)offset >= eof)
982d81658   Josef Bacik   fs: add SEEK_HOLE...
113
114
115
116
117
118
119
  			return -ENXIO;
  		break;
  	case SEEK_HOLE:
  		/*
  		 * There is a virtual hole at the end of the file, so as long as
  		 * offset isn't i_size or larger, return i_size.
  		 */
fc46820b2   Andreas Gruenbacher   vfs: Return -ENXI...
120
  		if ((unsigned long long)offset >= eof)
982d81658   Josef Bacik   fs: add SEEK_HOLE...
121
  			return -ENXIO;
e8b96eb50   Eric Sandeen   vfs: allow custom...
122
  		offset = eof;
982d81658   Josef Bacik   fs: add SEEK_HOLE...
123
  		break;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
124
  	}
3a8cff4f0   Christoph Hellwig   [PATCH] generic_f...
125

46a1c2c7a   Jie Liu   vfs: export lseek...
126
  	return vfs_setpos(file, offset, maxsize);
5760495a8   Andi Kleen   vfs: add generic_...
127
128
129
130
131
132
133
  }
  EXPORT_SYMBOL(generic_file_llseek_size);
  
  /**
   * generic_file_llseek - generic llseek implementation for regular files
   * @file:	file structure to seek on
   * @offset:	file offset to seek to
965c8e59c   Andrew Morton   lseek: the "whenc...
134
   * @whence:	type of seek
5760495a8   Andi Kleen   vfs: add generic_...
135
136
137
   *
   * This is a generic implemenation of ->llseek useable for all normal local
   * filesystems.  It just updates the file offset to the value specified by
546ae2d2f   Ming Lei   fs/read_write.c: ...
138
   * @offset and @whence.
5760495a8   Andi Kleen   vfs: add generic_...
139
   */
965c8e59c   Andrew Morton   lseek: the "whenc...
140
  loff_t generic_file_llseek(struct file *file, loff_t offset, int whence)
5760495a8   Andi Kleen   vfs: add generic_...
141
142
  {
  	struct inode *inode = file->f_mapping->host;
965c8e59c   Andrew Morton   lseek: the "whenc...
143
  	return generic_file_llseek_size(file, offset, whence,
e8b96eb50   Eric Sandeen   vfs: allow custom...
144
145
  					inode->i_sb->s_maxbytes,
  					i_size_read(inode));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
146
  }
9465efc9e   Andi Kleen   Remove BKL from r...
147
  EXPORT_SYMBOL(generic_file_llseek);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
148

ae6afc3f5   jan Blunck   vfs: introduce no...
149
  /**
1bf9d14df   Al Viro   new helper: fixed...
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
   * fixed_size_llseek - llseek implementation for fixed-sized devices
   * @file:	file structure to seek on
   * @offset:	file offset to seek to
   * @whence:	type of seek
   * @size:	size of the file
   *
   */
  loff_t fixed_size_llseek(struct file *file, loff_t offset, int whence, loff_t size)
  {
  	switch (whence) {
  	case SEEK_SET: case SEEK_CUR: case SEEK_END:
  		return generic_file_llseek_size(file, offset, whence,
  						size, size);
  	default:
  		return -EINVAL;
  	}
  }
  EXPORT_SYMBOL(fixed_size_llseek);
  
  /**
b25472f9b   Al Viro   new helpers: no_s...
170
171
172
173
174
175
176
177
178
179
180
   * no_seek_end_llseek - llseek implementation for fixed-sized devices
   * @file:	file structure to seek on
   * @offset:	file offset to seek to
   * @whence:	type of seek
   *
   */
  loff_t no_seek_end_llseek(struct file *file, loff_t offset, int whence)
  {
  	switch (whence) {
  	case SEEK_SET: case SEEK_CUR:
  		return generic_file_llseek_size(file, offset, whence,
2feb55f89   Wouter van Kesteren   fs: allow no_seek...
181
  						OFFSET_MAX, 0);
b25472f9b   Al Viro   new helpers: no_s...
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
  	default:
  		return -EINVAL;
  	}
  }
  EXPORT_SYMBOL(no_seek_end_llseek);
  
  /**
   * no_seek_end_llseek_size - llseek implementation for fixed-sized devices
   * @file:	file structure to seek on
   * @offset:	file offset to seek to
   * @whence:	type of seek
   * @size:	maximal offset allowed
   *
   */
  loff_t no_seek_end_llseek_size(struct file *file, loff_t offset, int whence, loff_t size)
  {
  	switch (whence) {
  	case SEEK_SET: case SEEK_CUR:
  		return generic_file_llseek_size(file, offset, whence,
  						size, 0);
  	default:
  		return -EINVAL;
  	}
  }
  EXPORT_SYMBOL(no_seek_end_llseek_size);
  
  /**
ae6afc3f5   jan Blunck   vfs: introduce no...
209
210
211
   * noop_llseek - No Operation Performed llseek implementation
   * @file:	file structure to seek on
   * @offset:	file offset to seek to
965c8e59c   Andrew Morton   lseek: the "whenc...
212
   * @whence:	type of seek
ae6afc3f5   jan Blunck   vfs: introduce no...
213
214
215
216
217
218
   *
   * This is an implementation of ->llseek useable for the rare special case when
   * userspace expects the seek to succeed but the (device) file is actually not
   * able to perform the seek. In this case you use noop_llseek() instead of
   * falling back to the default implementation of ->llseek.
   */
965c8e59c   Andrew Morton   lseek: the "whenc...
219
  loff_t noop_llseek(struct file *file, loff_t offset, int whence)
ae6afc3f5   jan Blunck   vfs: introduce no...
220
221
222
223
  {
  	return file->f_pos;
  }
  EXPORT_SYMBOL(noop_llseek);
965c8e59c   Andrew Morton   lseek: the "whenc...
224
  loff_t no_llseek(struct file *file, loff_t offset, int whence)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
225
226
227
228
  {
  	return -ESPIPE;
  }
  EXPORT_SYMBOL(no_llseek);
965c8e59c   Andrew Morton   lseek: the "whenc...
229
  loff_t default_llseek(struct file *file, loff_t offset, int whence)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
230
  {
496ad9aa8   Al Viro   new helper: file_...
231
  	struct inode *inode = file_inode(file);
16abef0e9   David Sterba   fs: use loff_t ty...
232
  	loff_t retval;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
233

5955102c9   Al Viro   wrappers for ->i_...
234
  	inode_lock(inode);
965c8e59c   Andrew Morton   lseek: the "whenc...
235
  	switch (whence) {
7b8e89249   Chris Snook   use symbolic cons...
236
  		case SEEK_END:
982d81658   Josef Bacik   fs: add SEEK_HOLE...
237
  			offset += i_size_read(inode);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
238
  			break;
7b8e89249   Chris Snook   use symbolic cons...
239
  		case SEEK_CUR:
5b6f1eb97   Alain Knaff   vfs: lseek(fd, 0,...
240
241
242
243
  			if (offset == 0) {
  				retval = file->f_pos;
  				goto out;
  			}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
244
  			offset += file->f_pos;
982d81658   Josef Bacik   fs: add SEEK_HOLE...
245
246
247
248
249
250
251
  			break;
  		case SEEK_DATA:
  			/*
  			 * In the generic case the entire file is data, so as
  			 * long as offset isn't at the end of the file then the
  			 * offset is data.
  			 */
bacb2d816   Dan Carpenter   fs: add missing u...
252
253
254
255
  			if (offset >= inode->i_size) {
  				retval = -ENXIO;
  				goto out;
  			}
982d81658   Josef Bacik   fs: add SEEK_HOLE...
256
257
258
259
260
261
262
  			break;
  		case SEEK_HOLE:
  			/*
  			 * There is a virtual hole at the end of the file, so
  			 * as long as offset isn't i_size or larger, return
  			 * i_size.
  			 */
bacb2d816   Dan Carpenter   fs: add missing u...
263
264
265
266
  			if (offset >= inode->i_size) {
  				retval = -ENXIO;
  				goto out;
  			}
982d81658   Josef Bacik   fs: add SEEK_HOLE...
267
268
  			offset = inode->i_size;
  			break;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
269
270
  	}
  	retval = -EINVAL;
cccb5a1e6   Al Viro   fix signedness me...
271
  	if (offset >= 0 || unsigned_offsets(file)) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
272
273
274
275
276
277
  		if (offset != file->f_pos) {
  			file->f_pos = offset;
  			file->f_version = 0;
  		}
  		retval = offset;
  	}
5b6f1eb97   Alain Knaff   vfs: lseek(fd, 0,...
278
  out:
5955102c9   Al Viro   wrappers for ->i_...
279
  	inode_unlock(inode);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
280
281
282
  	return retval;
  }
  EXPORT_SYMBOL(default_llseek);
965c8e59c   Andrew Morton   lseek: the "whenc...
283
  loff_t vfs_llseek(struct file *file, loff_t offset, int whence)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
284
285
286
287
288
  {
  	loff_t (*fn)(struct file *, loff_t, int);
  
  	fn = no_llseek;
  	if (file->f_mode & FMODE_LSEEK) {
72c2d5319   Al Viro   file->f_op is nev...
289
  		if (file->f_op->llseek)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
290
291
  			fn = file->f_op->llseek;
  	}
965c8e59c   Andrew Morton   lseek: the "whenc...
292
  	return fn(file, offset, whence);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
293
294
  }
  EXPORT_SYMBOL(vfs_llseek);
965c8e59c   Andrew Morton   lseek: the "whenc...
295
  SYSCALL_DEFINE3(lseek, unsigned int, fd, off_t, offset, unsigned int, whence)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
296
297
  {
  	off_t retval;
9c225f265   Linus Torvalds   vfs: atomic f_pos...
298
  	struct fd f = fdget_pos(fd);
2903ff019   Al Viro   switch simple cas...
299
300
  	if (!f.file)
  		return -EBADF;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
301
302
  
  	retval = -EINVAL;
965c8e59c   Andrew Morton   lseek: the "whenc...
303
304
  	if (whence <= SEEK_MAX) {
  		loff_t res = vfs_llseek(f.file, offset, whence);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
305
306
307
308
  		retval = res;
  		if (res != (loff_t)retval)
  			retval = -EOVERFLOW;	/* LFS: should only happen on 32 bit platforms */
  	}
9c225f265   Linus Torvalds   vfs: atomic f_pos...
309
  	fdput_pos(f);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
310
311
  	return retval;
  }
561c67319   Al Viro   switch lseek to C...
312
313
314
315
316
317
  #ifdef CONFIG_COMPAT
  COMPAT_SYSCALL_DEFINE3(lseek, unsigned int, fd, compat_off_t, offset, unsigned int, whence)
  {
  	return sys_lseek(fd, offset, whence);
  }
  #endif
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
318
  #ifdef __ARCH_WANT_SYS_LLSEEK
003d7ab47   Heiko Carstens   [CVE-2009-0029] S...
319
320
  SYSCALL_DEFINE5(llseek, unsigned int, fd, unsigned long, offset_high,
  		unsigned long, offset_low, loff_t __user *, result,
965c8e59c   Andrew Morton   lseek: the "whenc...
321
  		unsigned int, whence)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
322
323
  {
  	int retval;
d7a15f8d0   Eric Biggers   vfs: atomic f_pos...
324
  	struct fd f = fdget_pos(fd);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
325
  	loff_t offset;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
326

2903ff019   Al Viro   switch simple cas...
327
328
  	if (!f.file)
  		return -EBADF;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
329
330
  
  	retval = -EINVAL;
965c8e59c   Andrew Morton   lseek: the "whenc...
331
  	if (whence > SEEK_MAX)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
332
  		goto out_putf;
2903ff019   Al Viro   switch simple cas...
333
  	offset = vfs_llseek(f.file, ((loff_t) offset_high << 32) | offset_low,
965c8e59c   Andrew Morton   lseek: the "whenc...
334
  			whence);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
335
336
337
338
339
340
341
342
  
  	retval = (int)offset;
  	if (offset >= 0) {
  		retval = -EFAULT;
  		if (!copy_to_user(result, &offset, sizeof(offset)))
  			retval = 0;
  	}
  out_putf:
d7a15f8d0   Eric Biggers   vfs: atomic f_pos...
343
  	fdput_pos(f);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
344
345
346
  	return retval;
  }
  #endif
68d70d03f   Al Viro   constify rw_verif...
347
  int rw_verify_area(int read_write, struct file *file, const loff_t *ppos, size_t count)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
348
349
350
  {
  	struct inode *inode;
  	loff_t pos;
c43e259cc   James Morris   security: call se...
351
  	int retval = -EINVAL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
352

496ad9aa8   Al Viro   new helper: file_...
353
  	inode = file_inode(file);
e28cc7157   Linus Torvalds   Relax the rw_veri...
354
  	if (unlikely((ssize_t) count < 0))
c43e259cc   James Morris   security: call se...
355
  		return retval;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
356
  	pos = *ppos;
cccb5a1e6   Al Viro   fix signedness me...
357
358
359
360
361
362
363
  	if (unlikely(pos < 0)) {
  		if (!unsigned_offsets(file))
  			return retval;
  		if (count >= -pos) /* both values are in 0..LLONG_MAX */
  			return -EOVERFLOW;
  	} else if (unlikely((loff_t) (pos + count) < 0)) {
  		if (!unsigned_offsets(file))
4a3956c79   KAMEZAWA Hiroyuki   vfs: introduce FM...
364
365
  			return retval;
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
366

bd61e0a9c   Jeff Layton   locks: convert po...
367
  	if (unlikely(inode->i_flctx && mandatory_lock(inode))) {
acc15575e   Christoph Hellwig   locks: new locks_...
368
369
  		retval = locks_mandatory_area(inode, file, pos, pos + count - 1,
  				read_write == READ ? F_RDLCK : F_WRLCK);
e28cc7157   Linus Torvalds   Relax the rw_veri...
370
371
372
  		if (retval < 0)
  			return retval;
  	}
bc61384dc   Al Viro   rw_verify_area():...
373
  	return security_file_permission(file,
c43e259cc   James Morris   security: call se...
374
  				read_write == READ ? MAY_READ : MAY_WRITE);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
375
  }
5d5d56897   Al Viro   make new_sync_{re...
376
  static ssize_t new_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos)
293bc9822   Al Viro   new methods: ->re...
377
378
379
380
381
382
383
384
  {
  	struct iovec iov = { .iov_base = buf, .iov_len = len };
  	struct kiocb kiocb;
  	struct iov_iter iter;
  	ssize_t ret;
  
  	init_sync_kiocb(&kiocb, filp);
  	kiocb.ki_pos = *ppos;
293bc9822   Al Viro   new methods: ->re...
385
  	iov_iter_init(&iter, READ, &iov, 1, len);
bb7462b6f   Miklos Szeredi   vfs: use helpers ...
386
  	ret = call_read_iter(filp, &kiocb, &iter);
599bd19bd   Christoph Hellwig   fs: don't allow t...
387
  	BUG_ON(ret == -EIOCBQUEUED);
293bc9822   Al Viro   new methods: ->re...
388
389
390
  	*ppos = kiocb.ki_pos;
  	return ret;
  }
6fb5032eb   Dmitry Kasatkin   VFS: refactor vfs...
391
392
393
  ssize_t __vfs_read(struct file *file, char __user *buf, size_t count,
  		   loff_t *pos)
  {
6fb5032eb   Dmitry Kasatkin   VFS: refactor vfs...
394
  	if (file->f_op->read)
3d04c8a17   Al Viro   export __vfs_read()
395
  		return file->f_op->read(file, buf, count, pos);
6fb5032eb   Dmitry Kasatkin   VFS: refactor vfs...
396
  	else if (file->f_op->read_iter)
3d04c8a17   Al Viro   export __vfs_read()
397
  		return new_sync_read(file, buf, count, pos);
6fb5032eb   Dmitry Kasatkin   VFS: refactor vfs...
398
  	else
3d04c8a17   Al Viro   export __vfs_read()
399
  		return -EINVAL;
6fb5032eb   Dmitry Kasatkin   VFS: refactor vfs...
400
  }
bdd1d2d3d   Christoph Hellwig   fs: fix kernel_re...
401
  ssize_t kernel_read(struct file *file, void *buf, size_t count, loff_t *pos)
c41fbad01   Christoph Hellwig   fs: move kernel_r...
402
403
  {
  	mm_segment_t old_fs;
bdd1d2d3d   Christoph Hellwig   fs: fix kernel_re...
404
  	ssize_t result;
c41fbad01   Christoph Hellwig   fs: move kernel_r...
405
406
407
408
  
  	old_fs = get_fs();
  	set_fs(get_ds());
  	/* The cast to a user pointer is valid due to the set_fs() */
bdd1d2d3d   Christoph Hellwig   fs: fix kernel_re...
409
  	result = vfs_read(file, (void __user *)buf, count, pos);
c41fbad01   Christoph Hellwig   fs: move kernel_r...
410
411
412
413
  	set_fs(old_fs);
  	return result;
  }
  EXPORT_SYMBOL(kernel_read);
6fb5032eb   Dmitry Kasatkin   VFS: refactor vfs...
414

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
415
416
417
418
419
420
  ssize_t vfs_read(struct file *file, char __user *buf, size_t count, loff_t *pos)
  {
  	ssize_t ret;
  
  	if (!(file->f_mode & FMODE_READ))
  		return -EBADF;
7f7f25e82   Al Viro   replace checking ...
421
  	if (!(file->f_mode & FMODE_CAN_READ))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
422
423
424
425
426
  		return -EINVAL;
  	if (unlikely(!access_ok(VERIFY_WRITE, buf, count)))
  		return -EFAULT;
  
  	ret = rw_verify_area(READ, file, pos, count);
bc61384dc   Al Viro   rw_verify_area():...
427
428
429
  	if (!ret) {
  		if (count > MAX_RW_COUNT)
  			count =  MAX_RW_COUNT;
6fb5032eb   Dmitry Kasatkin   VFS: refactor vfs...
430
  		ret = __vfs_read(file, buf, count, pos);
c43e259cc   James Morris   security: call se...
431
  		if (ret > 0) {
2a12a9d78   Eric Paris   fsnotify: pass a ...
432
  			fsnotify_access(file);
c43e259cc   James Morris   security: call se...
433
  			add_rchar(current, ret);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
434
  		}
c43e259cc   James Morris   security: call se...
435
  		inc_syscr(current);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
436
437
438
439
  	}
  
  	return ret;
  }
5d5d56897   Al Viro   make new_sync_{re...
440
  static ssize_t new_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos)
293bc9822   Al Viro   new methods: ->re...
441
442
443
444
445
446
447
448
  {
  	struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = len };
  	struct kiocb kiocb;
  	struct iov_iter iter;
  	ssize_t ret;
  
  	init_sync_kiocb(&kiocb, filp);
  	kiocb.ki_pos = *ppos;
293bc9822   Al Viro   new methods: ->re...
449
  	iov_iter_init(&iter, WRITE, &iov, 1, len);
bb7462b6f   Miklos Szeredi   vfs: use helpers ...
450
  	ret = call_write_iter(filp, &kiocb, &iter);
599bd19bd   Christoph Hellwig   fs: don't allow t...
451
  	BUG_ON(ret == -EIOCBQUEUED);
f765b134c   Al Viro   new_sync_write():...
452
453
  	if (ret > 0)
  		*ppos = kiocb.ki_pos;
293bc9822   Al Viro   new methods: ->re...
454
455
  	return ret;
  }
493c84c07   Al Viro   new helper: __vfs...
456
457
458
459
460
  ssize_t __vfs_write(struct file *file, const char __user *p, size_t count,
  		    loff_t *pos)
  {
  	if (file->f_op->write)
  		return file->f_op->write(file, p, count, pos);
493c84c07   Al Viro   new helper: __vfs...
461
462
463
464
465
  	else if (file->f_op->write_iter)
  		return new_sync_write(file, p, count, pos);
  	else
  		return -EINVAL;
  }
493c84c07   Al Viro   new helper: __vfs...
466

73e18f7c0   Christoph Hellwig   fs: make the buf ...
467
  ssize_t __kernel_write(struct file *file, const void *buf, size_t count, loff_t *pos)
06ae43f34   Al Viro   Don't bother with...
468
469
470
471
  {
  	mm_segment_t old_fs;
  	const char __user *p;
  	ssize_t ret;
7f7f25e82   Al Viro   replace checking ...
472
  	if (!(file->f_mode & FMODE_CAN_WRITE))
3e84f48ed   Al Viro   vfs/splice: Fix m...
473
  		return -EINVAL;
06ae43f34   Al Viro   Don't bother with...
474
475
476
477
478
  	old_fs = get_fs();
  	set_fs(get_ds());
  	p = (__force const char __user *)buf;
  	if (count > MAX_RW_COUNT)
  		count =  MAX_RW_COUNT;
493c84c07   Al Viro   new helper: __vfs...
479
  	ret = __vfs_write(file, p, count, pos);
06ae43f34   Al Viro   Don't bother with...
480
481
482
483
484
485
486
487
  	set_fs(old_fs);
  	if (ret > 0) {
  		fsnotify_modify(file);
  		add_wchar(current, ret);
  	}
  	inc_syscw(current);
  	return ret;
  }
2ec3a12a6   Al Viro   cachefiles_write_...
488
  EXPORT_SYMBOL(__kernel_write);
e13ec939e   Christoph Hellwig   fs: fix kernel_wr...
489
490
  ssize_t kernel_write(struct file *file, const void *buf, size_t count,
  			    loff_t *pos)
ac452acae   Christoph Hellwig   fs: move kernel_w...
491
492
493
494
495
496
497
  {
  	mm_segment_t old_fs;
  	ssize_t res;
  
  	old_fs = get_fs();
  	set_fs(get_ds());
  	/* The cast to a user pointer is valid due to the set_fs() */
e13ec939e   Christoph Hellwig   fs: fix kernel_wr...
498
  	res = vfs_write(file, (__force const char __user *)buf, count, pos);
ac452acae   Christoph Hellwig   fs: move kernel_w...
499
500
501
502
503
  	set_fs(old_fs);
  
  	return res;
  }
  EXPORT_SYMBOL(kernel_write);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
504
505
506
507
508
509
  ssize_t vfs_write(struct file *file, const char __user *buf, size_t count, loff_t *pos)
  {
  	ssize_t ret;
  
  	if (!(file->f_mode & FMODE_WRITE))
  		return -EBADF;
7f7f25e82   Al Viro   replace checking ...
510
  	if (!(file->f_mode & FMODE_CAN_WRITE))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
511
512
513
514
515
  		return -EINVAL;
  	if (unlikely(!access_ok(VERIFY_READ, buf, count)))
  		return -EFAULT;
  
  	ret = rw_verify_area(WRITE, file, pos, count);
bc61384dc   Al Viro   rw_verify_area():...
516
517
518
  	if (!ret) {
  		if (count > MAX_RW_COUNT)
  			count =  MAX_RW_COUNT;
03d95eb2f   Al Viro   lift sb_start_wri...
519
  		file_start_write(file);
493c84c07   Al Viro   new helper: __vfs...
520
  		ret = __vfs_write(file, buf, count, pos);
c43e259cc   James Morris   security: call se...
521
  		if (ret > 0) {
2a12a9d78   Eric Paris   fsnotify: pass a ...
522
  			fsnotify_modify(file);
c43e259cc   James Morris   security: call se...
523
  			add_wchar(current, ret);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
524
  		}
c43e259cc   James Morris   security: call se...
525
  		inc_syscw(current);
03d95eb2f   Al Viro   lift sb_start_wri...
526
  		file_end_write(file);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
527
528
529
530
  	}
  
  	return ret;
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
531
532
533
534
535
536
537
538
539
  static inline loff_t file_pos_read(struct file *file)
  {
  	return file->f_pos;
  }
  
  static inline void file_pos_write(struct file *file, loff_t pos)
  {
  	file->f_pos = pos;
  }
3cdad4288   Heiko Carstens   [CVE-2009-0029] S...
540
  SYSCALL_DEFINE3(read, unsigned int, fd, char __user *, buf, size_t, count)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
541
  {
9c225f265   Linus Torvalds   vfs: atomic f_pos...
542
  	struct fd f = fdget_pos(fd);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
543
  	ssize_t ret = -EBADF;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
544

2903ff019   Al Viro   switch simple cas...
545
546
547
  	if (f.file) {
  		loff_t pos = file_pos_read(f.file);
  		ret = vfs_read(f.file, buf, count, &pos);
5faf153eb   Al Viro   don't call file_p...
548
549
  		if (ret >= 0)
  			file_pos_write(f.file, pos);
9c225f265   Linus Torvalds   vfs: atomic f_pos...
550
  		fdput_pos(f);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
551
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
552
553
  	return ret;
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
554

3cdad4288   Heiko Carstens   [CVE-2009-0029] S...
555
556
  SYSCALL_DEFINE3(write, unsigned int, fd, const char __user *, buf,
  		size_t, count)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
557
  {
9c225f265   Linus Torvalds   vfs: atomic f_pos...
558
  	struct fd f = fdget_pos(fd);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
559
  	ssize_t ret = -EBADF;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
560

2903ff019   Al Viro   switch simple cas...
561
562
563
  	if (f.file) {
  		loff_t pos = file_pos_read(f.file);
  		ret = vfs_write(f.file, buf, count, &pos);
5faf153eb   Al Viro   don't call file_p...
564
565
  		if (ret >= 0)
  			file_pos_write(f.file, pos);
9c225f265   Linus Torvalds   vfs: atomic f_pos...
566
  		fdput_pos(f);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
567
568
569
570
  	}
  
  	return ret;
  }
4a0fd5bf0   Al Viro   teach SYSCALL_DEF...
571
572
  SYSCALL_DEFINE4(pread64, unsigned int, fd, char __user *, buf,
  			size_t, count, loff_t, pos)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
573
  {
2903ff019   Al Viro   switch simple cas...
574
  	struct fd f;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
575
  	ssize_t ret = -EBADF;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
576
577
578
  
  	if (pos < 0)
  		return -EINVAL;
2903ff019   Al Viro   switch simple cas...
579
580
  	f = fdget(fd);
  	if (f.file) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
581
  		ret = -ESPIPE;
2903ff019   Al Viro   switch simple cas...
582
583
584
  		if (f.file->f_mode & FMODE_PREAD)
  			ret = vfs_read(f.file, buf, count, &pos);
  		fdput(f);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
585
586
587
588
  	}
  
  	return ret;
  }
4a0fd5bf0   Al Viro   teach SYSCALL_DEF...
589
590
  SYSCALL_DEFINE4(pwrite64, unsigned int, fd, const char __user *, buf,
  			 size_t, count, loff_t, pos)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
591
  {
2903ff019   Al Viro   switch simple cas...
592
  	struct fd f;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
593
  	ssize_t ret = -EBADF;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
594
595
596
  
  	if (pos < 0)
  		return -EINVAL;
2903ff019   Al Viro   switch simple cas...
597
598
  	f = fdget(fd);
  	if (f.file) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
599
  		ret = -ESPIPE;
2903ff019   Al Viro   switch simple cas...
600
601
602
  		if (f.file->f_mode & FMODE_PWRITE)  
  			ret = vfs_write(f.file, buf, count, &pos);
  		fdput(f);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
  	}
  
  	return ret;
  }
  
  /*
   * Reduce an iovec's length in-place.  Return the resulting number of segments
   */
  unsigned long iov_shorten(struct iovec *iov, unsigned long nr_segs, size_t to)
  {
  	unsigned long seg = 0;
  	size_t len = 0;
  
  	while (seg < nr_segs) {
  		seg++;
  		if (len + iov->iov_len >= to) {
  			iov->iov_len = to - len;
  			break;
  		}
  		len += iov->iov_len;
  		iov++;
  	}
  	return seg;
  }
19295529d   Eric Sandeen   ext4: export iov_...
627
  EXPORT_SYMBOL(iov_shorten);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
628

ac15ac066   Al Viro   lift iov_iter int...
629
  static ssize_t do_iter_readv_writev(struct file *filp, struct iov_iter *iter,
ddef7ed2b   Christoph Hellwig   annotate RWF_... ...
630
  		loff_t *ppos, int type, rwf_t flags)
293bc9822   Al Viro   new methods: ->re...
631
632
  {
  	struct kiocb kiocb;
293bc9822   Al Viro   new methods: ->re...
633
634
635
  	ssize_t ret;
  
  	init_sync_kiocb(&kiocb, filp);
fdd2f5b7d   Goldwyn Rodrigues   fs: Separate out ...
636
637
638
  	ret = kiocb_set_rw_flags(&kiocb, flags);
  	if (ret)
  		return ret;
293bc9822   Al Viro   new methods: ->re...
639
  	kiocb.ki_pos = *ppos;
293bc9822   Al Viro   new methods: ->re...
640

0f78d06ac   Miklos Szeredi   vfs: pass type in...
641
  	if (type == READ)
bb7462b6f   Miklos Szeredi   vfs: use helpers ...
642
  		ret = call_read_iter(filp, &kiocb, iter);
0f78d06ac   Miklos Szeredi   vfs: pass type in...
643
  	else
bb7462b6f   Miklos Szeredi   vfs: use helpers ...
644
  		ret = call_write_iter(filp, &kiocb, iter);
599bd19bd   Christoph Hellwig   fs: don't allow t...
645
  	BUG_ON(ret == -EIOCBQUEUED);
293bc9822   Al Viro   new methods: ->re...
646
647
648
  	*ppos = kiocb.ki_pos;
  	return ret;
  }
ee0b3e671   Badari Pulavarty   [PATCH] Remove re...
649
  /* Do it by hand, with file-ops */
ac15ac066   Al Viro   lift iov_iter int...
650
  static ssize_t do_loop_readv_writev(struct file *filp, struct iov_iter *iter,
ddef7ed2b   Christoph Hellwig   annotate RWF_... ...
651
  		loff_t *ppos, int type, rwf_t flags)
ee0b3e671   Badari Pulavarty   [PATCH] Remove re...
652
  {
ee0b3e671   Badari Pulavarty   [PATCH] Remove re...
653
  	ssize_t ret = 0;
97be7ebe5   Christoph Hellwig   vfs: add the RWF_...
654
  	if (flags & ~RWF_HIPRI)
793b80ef1   Christoph Hellwig   vfs: pass a flags...
655
  		return -EOPNOTSUPP;
ac15ac066   Al Viro   lift iov_iter int...
656
657
  	while (iov_iter_count(iter)) {
  		struct iovec iovec = iov_iter_iovec(iter);
ee0b3e671   Badari Pulavarty   [PATCH] Remove re...
658
  		ssize_t nr;
0f78d06ac   Miklos Szeredi   vfs: pass type in...
659
660
661
662
663
664
665
  		if (type == READ) {
  			nr = filp->f_op->read(filp, iovec.iov_base,
  					      iovec.iov_len, ppos);
  		} else {
  			nr = filp->f_op->write(filp, iovec.iov_base,
  					       iovec.iov_len, ppos);
  		}
ee0b3e671   Badari Pulavarty   [PATCH] Remove re...
666
667
668
669
670
671
672
  
  		if (nr < 0) {
  			if (!ret)
  				ret = nr;
  			break;
  		}
  		ret += nr;
ac15ac066   Al Viro   lift iov_iter int...
673
  		if (nr != iovec.iov_len)
ee0b3e671   Badari Pulavarty   [PATCH] Remove re...
674
  			break;
ac15ac066   Al Viro   lift iov_iter int...
675
  		iov_iter_advance(iter, nr);
ee0b3e671   Badari Pulavarty   [PATCH] Remove re...
676
677
678
679
  	}
  
  	return ret;
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
680
681
  /* A write operation does a read from user space and vice versa */
  #define vrfy_dir(type) ((type) == READ ? VERIFY_WRITE : VERIFY_READ)
ffecee4f2   Vegard Nossum   iov_iter: kernel-...
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
  /**
   * rw_copy_check_uvector() - Copy an array of &struct iovec from userspace
   *     into the kernel and check that it is valid.
   *
   * @type: One of %CHECK_IOVEC_ONLY, %READ, or %WRITE.
   * @uvector: Pointer to the userspace array.
   * @nr_segs: Number of elements in userspace array.
   * @fast_segs: Number of elements in @fast_pointer.
   * @fast_pointer: Pointer to (usually small on-stack) kernel array.
   * @ret_pointer: (output parameter) Pointer to a variable that will point to
   *     either @fast_pointer, a newly allocated kernel array, or NULL,
   *     depending on which array was used.
   *
   * This function copies an array of &struct iovec of @nr_segs from
   * userspace into the kernel and checks that each element is valid (e.g.
   * it does not point to a kernel address or cause overflow by being too
   * large, etc.).
   *
   * As an optimization, the caller may provide a pointer to a small
   * on-stack array in @fast_pointer, typically %UIO_FASTIOV elements long
   * (the size of this array, or 0 if unused, should be given in @fast_segs).
   *
   * @ret_pointer will always point to the array that was used, so the
   * caller must take care not to call kfree() on it e.g. in case the
   * @fast_pointer array was used and it was allocated on the stack.
   *
   * Return: The total number of bytes covered by the iovec array on success
   *   or a negative error code on error.
   */
eed4e51fb   Badari Pulavarty   [PATCH] Add vecto...
711
712
713
  ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector,
  			      unsigned long nr_segs, unsigned long fast_segs,
  			      struct iovec *fast_pointer,
ac34ebb3a   Christopher Yeoh   aio/vfs: cleanup ...
714
  			      struct iovec **ret_pointer)
435f49a51   Linus Torvalds   readv/writev: do ...
715
  {
eed4e51fb   Badari Pulavarty   [PATCH] Add vecto...
716
  	unsigned long seg;
435f49a51   Linus Torvalds   readv/writev: do ...
717
  	ssize_t ret;
eed4e51fb   Badari Pulavarty   [PATCH] Add vecto...
718
  	struct iovec *iov = fast_pointer;
435f49a51   Linus Torvalds   readv/writev: do ...
719
720
721
722
723
  	/*
  	 * SuS says "The readv() function *may* fail if the iovcnt argument
  	 * was less than or equal to 0, or greater than {IOV_MAX}.  Linux has
  	 * traditionally returned zero for zero segments, so...
  	 */
eed4e51fb   Badari Pulavarty   [PATCH] Add vecto...
724
725
  	if (nr_segs == 0) {
  		ret = 0;
435f49a51   Linus Torvalds   readv/writev: do ...
726
  		goto out;
eed4e51fb   Badari Pulavarty   [PATCH] Add vecto...
727
  	}
435f49a51   Linus Torvalds   readv/writev: do ...
728
729
730
731
  	/*
  	 * First get the "struct iovec" from user memory and
  	 * verify all the pointers
  	 */
eed4e51fb   Badari Pulavarty   [PATCH] Add vecto...
732
733
  	if (nr_segs > UIO_MAXIOV) {
  		ret = -EINVAL;
435f49a51   Linus Torvalds   readv/writev: do ...
734
  		goto out;
eed4e51fb   Badari Pulavarty   [PATCH] Add vecto...
735
736
  	}
  	if (nr_segs > fast_segs) {
435f49a51   Linus Torvalds   readv/writev: do ...
737
  		iov = kmalloc(nr_segs*sizeof(struct iovec), GFP_KERNEL);
eed4e51fb   Badari Pulavarty   [PATCH] Add vecto...
738
739
  		if (iov == NULL) {
  			ret = -ENOMEM;
435f49a51   Linus Torvalds   readv/writev: do ...
740
  			goto out;
eed4e51fb   Badari Pulavarty   [PATCH] Add vecto...
741
  		}
435f49a51   Linus Torvalds   readv/writev: do ...
742
  	}
eed4e51fb   Badari Pulavarty   [PATCH] Add vecto...
743
744
  	if (copy_from_user(iov, uvector, nr_segs*sizeof(*uvector))) {
  		ret = -EFAULT;
435f49a51   Linus Torvalds   readv/writev: do ...
745
  		goto out;
eed4e51fb   Badari Pulavarty   [PATCH] Add vecto...
746
  	}
435f49a51   Linus Torvalds   readv/writev: do ...
747
  	/*
eed4e51fb   Badari Pulavarty   [PATCH] Add vecto...
748
749
750
751
  	 * According to the Single Unix Specification we should return EINVAL
  	 * if an element length is < 0 when cast to ssize_t or if the
  	 * total length would overflow the ssize_t return value of the
  	 * system call.
435f49a51   Linus Torvalds   readv/writev: do ...
752
753
754
755
  	 *
  	 * Linux caps all read/write calls to MAX_RW_COUNT, and avoids the
  	 * overflow case.
  	 */
eed4e51fb   Badari Pulavarty   [PATCH] Add vecto...
756
  	ret = 0;
435f49a51   Linus Torvalds   readv/writev: do ...
757
758
759
  	for (seg = 0; seg < nr_segs; seg++) {
  		void __user *buf = iov[seg].iov_base;
  		ssize_t len = (ssize_t)iov[seg].iov_len;
eed4e51fb   Badari Pulavarty   [PATCH] Add vecto...
760
761
762
  
  		/* see if we we're about to use an invalid len or if
  		 * it's about to overflow ssize_t */
435f49a51   Linus Torvalds   readv/writev: do ...
763
  		if (len < 0) {
eed4e51fb   Badari Pulavarty   [PATCH] Add vecto...
764
  			ret = -EINVAL;
435f49a51   Linus Torvalds   readv/writev: do ...
765
  			goto out;
eed4e51fb   Badari Pulavarty   [PATCH] Add vecto...
766
  		}
ac34ebb3a   Christopher Yeoh   aio/vfs: cleanup ...
767
  		if (type >= 0
fcf634098   Christopher Yeoh   Cross Memory Attach
768
  		    && unlikely(!access_ok(vrfy_dir(type), buf, len))) {
eed4e51fb   Badari Pulavarty   [PATCH] Add vecto...
769
  			ret = -EFAULT;
435f49a51   Linus Torvalds   readv/writev: do ...
770
771
772
773
774
  			goto out;
  		}
  		if (len > MAX_RW_COUNT - ret) {
  			len = MAX_RW_COUNT - ret;
  			iov[seg].iov_len = len;
eed4e51fb   Badari Pulavarty   [PATCH] Add vecto...
775
  		}
eed4e51fb   Badari Pulavarty   [PATCH] Add vecto...
776
  		ret += len;
435f49a51   Linus Torvalds   readv/writev: do ...
777
  	}
eed4e51fb   Badari Pulavarty   [PATCH] Add vecto...
778
779
780
781
  out:
  	*ret_pointer = iov;
  	return ret;
  }
f50298556   Al Viro   move compat_rw_co...
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
  #ifdef CONFIG_COMPAT
  ssize_t compat_rw_copy_check_uvector(int type,
  		const struct compat_iovec __user *uvector, unsigned long nr_segs,
  		unsigned long fast_segs, struct iovec *fast_pointer,
  		struct iovec **ret_pointer)
  {
  	compat_ssize_t tot_len;
  	struct iovec *iov = *ret_pointer = fast_pointer;
  	ssize_t ret = 0;
  	int seg;
  
  	/*
  	 * SuS says "The readv() function *may* fail if the iovcnt argument
  	 * was less than or equal to 0, or greater than {IOV_MAX}.  Linux has
  	 * traditionally returned zero for zero segments, so...
  	 */
  	if (nr_segs == 0)
  		goto out;
  
  	ret = -EINVAL;
  	if (nr_segs > UIO_MAXIOV)
  		goto out;
  	if (nr_segs > fast_segs) {
  		ret = -ENOMEM;
  		iov = kmalloc(nr_segs*sizeof(struct iovec), GFP_KERNEL);
  		if (iov == NULL)
  			goto out;
  	}
  	*ret_pointer = iov;
  
  	ret = -EFAULT;
  	if (!access_ok(VERIFY_READ, uvector, nr_segs*sizeof(*uvector)))
  		goto out;
  
  	/*
  	 * Single unix specification:
  	 * We should -EINVAL if an element length is not >= 0 and fitting an
  	 * ssize_t.
  	 *
  	 * In Linux, the total length is limited to MAX_RW_COUNT, there is
  	 * no overflow possibility.
  	 */
  	tot_len = 0;
  	ret = -EINVAL;
  	for (seg = 0; seg < nr_segs; seg++) {
  		compat_uptr_t buf;
  		compat_ssize_t len;
  
  		if (__get_user(len, &uvector->iov_len) ||
  		   __get_user(buf, &uvector->iov_base)) {
  			ret = -EFAULT;
  			goto out;
  		}
  		if (len < 0)	/* size_t not fitting in compat_ssize_t .. */
  			goto out;
  		if (type >= 0 &&
  		    !access_ok(vrfy_dir(type), compat_ptr(buf), len)) {
  			ret = -EFAULT;
  			goto out;
  		}
  		if (len > MAX_RW_COUNT - tot_len)
  			len = MAX_RW_COUNT - tot_len;
  		tot_len += len;
  		iov->iov_base = compat_ptr(buf);
  		iov->iov_len = (compat_size_t) len;
  		uvector++;
  		iov++;
  	}
  	ret = tot_len;
  
  out:
  	return ret;
  }
  #endif
19c735868   Christoph Hellwig   fs: remove __do_r...
856
  static ssize_t do_iter_read(struct file *file, struct iov_iter *iter,
ddef7ed2b   Christoph Hellwig   annotate RWF_... ...
857
  		loff_t *pos, rwf_t flags)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
858
  {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
859
  	size_t tot_len;
7687a7a44   Miklos Szeredi   vfs: extract comm...
860
  	ssize_t ret = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
861

edab5fe38   Christoph Hellwig   fs: move more cod...
862
863
864
865
  	if (!(file->f_mode & FMODE_READ))
  		return -EBADF;
  	if (!(file->f_mode & FMODE_CAN_READ))
  		return -EINVAL;
7687a7a44   Miklos Szeredi   vfs: extract comm...
866
  	tot_len = iov_iter_count(iter);
0504c074b   Al Viro   switch {compat_,}...
867
868
  	if (!tot_len)
  		goto out;
19c735868   Christoph Hellwig   fs: remove __do_r...
869
  	ret = rw_verify_area(READ, file, pos, tot_len);
e28cc7157   Linus Torvalds   Relax the rw_veri...
870
  	if (ret < 0)
19c735868   Christoph Hellwig   fs: remove __do_r...
871
  		return ret;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
872

19c735868   Christoph Hellwig   fs: remove __do_r...
873
874
  	if (file->f_op->read_iter)
  		ret = do_iter_readv_writev(file, iter, pos, READ, flags);
ee0b3e671   Badari Pulavarty   [PATCH] Remove re...
875
  	else
19c735868   Christoph Hellwig   fs: remove __do_r...
876
  		ret = do_loop_readv_writev(file, iter, pos, READ, flags);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
877
  out:
19c735868   Christoph Hellwig   fs: remove __do_r...
878
879
  	if (ret >= 0)
  		fsnotify_access(file);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
880
  	return ret;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
881
  }
18e9710ee   Christoph Hellwig   fs: implement vfs...
882
  ssize_t vfs_iter_read(struct file *file, struct iov_iter *iter, loff_t *ppos,
ddef7ed2b   Christoph Hellwig   annotate RWF_... ...
883
  		rwf_t flags)
7687a7a44   Miklos Szeredi   vfs: extract comm...
884
  {
18e9710ee   Christoph Hellwig   fs: implement vfs...
885
886
887
888
889
  	if (!file->f_op->read_iter)
  		return -EINVAL;
  	return do_iter_read(file, iter, ppos, flags);
  }
  EXPORT_SYMBOL(vfs_iter_read);
7687a7a44   Miklos Szeredi   vfs: extract comm...
890

19c735868   Christoph Hellwig   fs: remove __do_r...
891
  static ssize_t do_iter_write(struct file *file, struct iov_iter *iter,
ddef7ed2b   Christoph Hellwig   annotate RWF_... ...
892
  		loff_t *pos, rwf_t flags)
19c735868   Christoph Hellwig   fs: remove __do_r...
893
894
895
  {
  	size_t tot_len;
  	ssize_t ret = 0;
03d95eb2f   Al Viro   lift sb_start_wri...
896

edab5fe38   Christoph Hellwig   fs: move more cod...
897
898
899
900
  	if (!(file->f_mode & FMODE_WRITE))
  		return -EBADF;
  	if (!(file->f_mode & FMODE_CAN_WRITE))
  		return -EINVAL;
19c735868   Christoph Hellwig   fs: remove __do_r...
901
902
903
904
  	tot_len = iov_iter_count(iter);
  	if (!tot_len)
  		return 0;
  	ret = rw_verify_area(WRITE, file, pos, tot_len);
7687a7a44   Miklos Szeredi   vfs: extract comm...
905
906
  	if (ret < 0)
  		return ret;
19c735868   Christoph Hellwig   fs: remove __do_r...
907
908
909
910
  	if (file->f_op->write_iter)
  		ret = do_iter_readv_writev(file, iter, pos, WRITE, flags);
  	else
  		ret = do_loop_readv_writev(file, iter, pos, WRITE, flags);
19c735868   Christoph Hellwig   fs: remove __do_r...
911
912
  	if (ret > 0)
  		fsnotify_modify(file);
7687a7a44   Miklos Szeredi   vfs: extract comm...
913
914
  	return ret;
  }
abbb65899   Christoph Hellwig   fs: implement vfs...
915
  ssize_t vfs_iter_write(struct file *file, struct iov_iter *iter, loff_t *ppos,
ddef7ed2b   Christoph Hellwig   annotate RWF_... ...
916
  		rwf_t flags)
abbb65899   Christoph Hellwig   fs: implement vfs...
917
918
919
920
921
922
  {
  	if (!file->f_op->write_iter)
  		return -EINVAL;
  	return do_iter_write(file, iter, ppos, flags);
  }
  EXPORT_SYMBOL(vfs_iter_write);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
923
  ssize_t vfs_readv(struct file *file, const struct iovec __user *vec,
ddef7ed2b   Christoph Hellwig   annotate RWF_... ...
924
  		  unsigned long vlen, loff_t *pos, rwf_t flags)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
925
  {
7687a7a44   Miklos Szeredi   vfs: extract comm...
926
927
928
929
  	struct iovec iovstack[UIO_FASTIOV];
  	struct iovec *iov = iovstack;
  	struct iov_iter iter;
  	ssize_t ret;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
930

251b42a1d   Christoph Hellwig   fs: remove do_rea...
931
  	ret = import_iovec(READ, vec, vlen, ARRAY_SIZE(iovstack), &iov, &iter);
edab5fe38   Christoph Hellwig   fs: move more cod...
932
933
934
935
  	if (ret >= 0) {
  		ret = do_iter_read(file, &iter, pos, flags);
  		kfree(iov);
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
936

251b42a1d   Christoph Hellwig   fs: remove do_rea...
937
938
  	return ret;
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
939

9725d4cef   Christoph Hellwig   fs: unexport vfs_...
940
  static ssize_t vfs_writev(struct file *file, const struct iovec __user *vec,
ddef7ed2b   Christoph Hellwig   annotate RWF_... ...
941
  		   unsigned long vlen, loff_t *pos, rwf_t flags)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
942
  {
251b42a1d   Christoph Hellwig   fs: remove do_rea...
943
944
945
946
  	struct iovec iovstack[UIO_FASTIOV];
  	struct iovec *iov = iovstack;
  	struct iov_iter iter;
  	ssize_t ret;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
947

251b42a1d   Christoph Hellwig   fs: remove do_rea...
948
  	ret = import_iovec(WRITE, vec, vlen, ARRAY_SIZE(iovstack), &iov, &iter);
edab5fe38   Christoph Hellwig   fs: move more cod...
949
  	if (ret >= 0) {
62473a2d6   Al Viro   move file_{start,...
950
  		file_start_write(file);
edab5fe38   Christoph Hellwig   fs: move more cod...
951
  		ret = do_iter_write(file, &iter, pos, flags);
62473a2d6   Al Viro   move file_{start,...
952
  		file_end_write(file);
edab5fe38   Christoph Hellwig   fs: move more cod...
953
954
  		kfree(iov);
  	}
251b42a1d   Christoph Hellwig   fs: remove do_rea...
955
  	return ret;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
956
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
957

f17d8b354   Milosz Tanski   vfs: vfs: Define ...
958
  static ssize_t do_readv(unsigned long fd, const struct iovec __user *vec,
ddef7ed2b   Christoph Hellwig   annotate RWF_... ...
959
  			unsigned long vlen, rwf_t flags)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
960
  {
9c225f265   Linus Torvalds   vfs: atomic f_pos...
961
  	struct fd f = fdget_pos(fd);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
962
  	ssize_t ret = -EBADF;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
963

2903ff019   Al Viro   switch simple cas...
964
965
  	if (f.file) {
  		loff_t pos = file_pos_read(f.file);
f17d8b354   Milosz Tanski   vfs: vfs: Define ...
966
  		ret = vfs_readv(f.file, vec, vlen, &pos, flags);
5faf153eb   Al Viro   don't call file_p...
967
968
  		if (ret >= 0)
  			file_pos_write(f.file, pos);
9c225f265   Linus Torvalds   vfs: atomic f_pos...
969
  		fdput_pos(f);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
970
971
972
  	}
  
  	if (ret > 0)
4b98d11b4   Alexey Dobriyan   [PATCH] ifdef ->r...
973
974
  		add_rchar(current, ret);
  	inc_syscr(current);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
975
976
  	return ret;
  }
f17d8b354   Milosz Tanski   vfs: vfs: Define ...
977
  static ssize_t do_writev(unsigned long fd, const struct iovec __user *vec,
ddef7ed2b   Christoph Hellwig   annotate RWF_... ...
978
  			 unsigned long vlen, rwf_t flags)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
979
  {
9c225f265   Linus Torvalds   vfs: atomic f_pos...
980
  	struct fd f = fdget_pos(fd);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
981
  	ssize_t ret = -EBADF;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
982

2903ff019   Al Viro   switch simple cas...
983
984
  	if (f.file) {
  		loff_t pos = file_pos_read(f.file);
f17d8b354   Milosz Tanski   vfs: vfs: Define ...
985
  		ret = vfs_writev(f.file, vec, vlen, &pos, flags);
5faf153eb   Al Viro   don't call file_p...
986
987
  		if (ret >= 0)
  			file_pos_write(f.file, pos);
9c225f265   Linus Torvalds   vfs: atomic f_pos...
988
  		fdput_pos(f);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
989
990
991
  	}
  
  	if (ret > 0)
4b98d11b4   Alexey Dobriyan   [PATCH] ifdef ->r...
992
993
  		add_wchar(current, ret);
  	inc_syscw(current);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
994
995
  	return ret;
  }
601cc11d0   Linus Torvalds   Make non-compat p...
996
997
998
999
1000
  static inline loff_t pos_from_hilo(unsigned long high, unsigned long low)
  {
  #define HALF_LONG_BITS (BITS_PER_LONG / 2)
  	return (((loff_t)high << HALF_LONG_BITS) << HALF_LONG_BITS) | low;
  }
f17d8b354   Milosz Tanski   vfs: vfs: Define ...
1001
  static ssize_t do_preadv(unsigned long fd, const struct iovec __user *vec,
ddef7ed2b   Christoph Hellwig   annotate RWF_... ...
1002
  			 unsigned long vlen, loff_t pos, rwf_t flags)
f3554f4bc   Gerd Hoffmann   preadv/pwritev: A...
1003
  {
2903ff019   Al Viro   switch simple cas...
1004
  	struct fd f;
f3554f4bc   Gerd Hoffmann   preadv/pwritev: A...
1005
  	ssize_t ret = -EBADF;
f3554f4bc   Gerd Hoffmann   preadv/pwritev: A...
1006
1007
1008
  
  	if (pos < 0)
  		return -EINVAL;
2903ff019   Al Viro   switch simple cas...
1009
1010
  	f = fdget(fd);
  	if (f.file) {
f3554f4bc   Gerd Hoffmann   preadv/pwritev: A...
1011
  		ret = -ESPIPE;
2903ff019   Al Viro   switch simple cas...
1012
  		if (f.file->f_mode & FMODE_PREAD)
f17d8b354   Milosz Tanski   vfs: vfs: Define ...
1013
  			ret = vfs_readv(f.file, vec, vlen, &pos, flags);
2903ff019   Al Viro   switch simple cas...
1014
  		fdput(f);
f3554f4bc   Gerd Hoffmann   preadv/pwritev: A...
1015
1016
1017
1018
1019
1020
1021
  	}
  
  	if (ret > 0)
  		add_rchar(current, ret);
  	inc_syscr(current);
  	return ret;
  }
f17d8b354   Milosz Tanski   vfs: vfs: Define ...
1022
  static ssize_t do_pwritev(unsigned long fd, const struct iovec __user *vec,
ddef7ed2b   Christoph Hellwig   annotate RWF_... ...
1023
  			  unsigned long vlen, loff_t pos, rwf_t flags)
f3554f4bc   Gerd Hoffmann   preadv/pwritev: A...
1024
  {
2903ff019   Al Viro   switch simple cas...
1025
  	struct fd f;
f3554f4bc   Gerd Hoffmann   preadv/pwritev: A...
1026
  	ssize_t ret = -EBADF;
f3554f4bc   Gerd Hoffmann   preadv/pwritev: A...
1027
1028
1029
  
  	if (pos < 0)
  		return -EINVAL;
2903ff019   Al Viro   switch simple cas...
1030
1031
  	f = fdget(fd);
  	if (f.file) {
f3554f4bc   Gerd Hoffmann   preadv/pwritev: A...
1032
  		ret = -ESPIPE;
2903ff019   Al Viro   switch simple cas...
1033
  		if (f.file->f_mode & FMODE_PWRITE)
f17d8b354   Milosz Tanski   vfs: vfs: Define ...
1034
  			ret = vfs_writev(f.file, vec, vlen, &pos, flags);
2903ff019   Al Viro   switch simple cas...
1035
  		fdput(f);
f3554f4bc   Gerd Hoffmann   preadv/pwritev: A...
1036
1037
1038
1039
1040
1041
1042
  	}
  
  	if (ret > 0)
  		add_wchar(current, ret);
  	inc_syscw(current);
  	return ret;
  }
f17d8b354   Milosz Tanski   vfs: vfs: Define ...
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
  SYSCALL_DEFINE3(readv, unsigned long, fd, const struct iovec __user *, vec,
  		unsigned long, vlen)
  {
  	return do_readv(fd, vec, vlen, 0);
  }
  
  SYSCALL_DEFINE3(writev, unsigned long, fd, const struct iovec __user *, vec,
  		unsigned long, vlen)
  {
  	return do_writev(fd, vec, vlen, 0);
  }
  
  SYSCALL_DEFINE5(preadv, unsigned long, fd, const struct iovec __user *, vec,
  		unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h)
  {
  	loff_t pos = pos_from_hilo(pos_h, pos_l);
  
  	return do_preadv(fd, vec, vlen, pos, 0);
  }
  
  SYSCALL_DEFINE6(preadv2, unsigned long, fd, const struct iovec __user *, vec,
  		unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h,
ddef7ed2b   Christoph Hellwig   annotate RWF_... ...
1065
  		rwf_t, flags)
f17d8b354   Milosz Tanski   vfs: vfs: Define ...
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
  {
  	loff_t pos = pos_from_hilo(pos_h, pos_l);
  
  	if (pos == -1)
  		return do_readv(fd, vec, vlen, flags);
  
  	return do_preadv(fd, vec, vlen, pos, flags);
  }
  
  SYSCALL_DEFINE5(pwritev, unsigned long, fd, const struct iovec __user *, vec,
  		unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h)
  {
  	loff_t pos = pos_from_hilo(pos_h, pos_l);
  
  	return do_pwritev(fd, vec, vlen, pos, 0);
  }
  
  SYSCALL_DEFINE6(pwritev2, unsigned long, fd, const struct iovec __user *, vec,
  		unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h,
ddef7ed2b   Christoph Hellwig   annotate RWF_... ...
1085
  		rwf_t, flags)
f17d8b354   Milosz Tanski   vfs: vfs: Define ...
1086
1087
1088
1089
1090
1091
1092
1093
  {
  	loff_t pos = pos_from_hilo(pos_h, pos_l);
  
  	if (pos == -1)
  		return do_writev(fd, vec, vlen, flags);
  
  	return do_pwritev(fd, vec, vlen, pos, flags);
  }
72ec35163   Al Viro   switch compat rea...
1094
  #ifdef CONFIG_COMPAT
72ec35163   Al Viro   switch compat rea...
1095
1096
  static size_t compat_readv(struct file *file,
  			   const struct compat_iovec __user *vec,
ddef7ed2b   Christoph Hellwig   annotate RWF_... ...
1097
  			   unsigned long vlen, loff_t *pos, rwf_t flags)
72ec35163   Al Viro   switch compat rea...
1098
  {
72ec35163   Al Viro   switch compat rea...
1099
1100
  	struct iovec iovstack[UIO_FASTIOV];
  	struct iovec *iov = iovstack;
ac15ac066   Al Viro   lift iov_iter int...
1101
  	struct iov_iter iter;
72ec35163   Al Viro   switch compat rea...
1102
  	ssize_t ret;
72ec35163   Al Viro   switch compat rea...
1103

26c87fb7d   Christoph Hellwig   fs: remove do_com...
1104
  	ret = compat_import_iovec(READ, vec, vlen, UIO_FASTIOV, &iov, &iter);
edab5fe38   Christoph Hellwig   fs: move more cod...
1105
1106
1107
1108
  	if (ret >= 0) {
  		ret = do_iter_read(file, &iter, pos, flags);
  		kfree(iov);
  	}
72ec35163   Al Viro   switch compat rea...
1109
1110
1111
1112
1113
  	if (ret > 0)
  		add_rchar(current, ret);
  	inc_syscr(current);
  	return ret;
  }
f17d8b354   Milosz Tanski   vfs: vfs: Define ...
1114
1115
  static size_t do_compat_readv(compat_ulong_t fd,
  				 const struct compat_iovec __user *vec,
ddef7ed2b   Christoph Hellwig   annotate RWF_... ...
1116
  				 compat_ulong_t vlen, rwf_t flags)
72ec35163   Al Viro   switch compat rea...
1117
  {
9c225f265   Linus Torvalds   vfs: atomic f_pos...
1118
  	struct fd f = fdget_pos(fd);
72ec35163   Al Viro   switch compat rea...
1119
1120
1121
1122
1123
1124
  	ssize_t ret;
  	loff_t pos;
  
  	if (!f.file)
  		return -EBADF;
  	pos = f.file->f_pos;
f17d8b354   Milosz Tanski   vfs: vfs: Define ...
1125
  	ret = compat_readv(f.file, vec, vlen, &pos, flags);
5faf153eb   Al Viro   don't call file_p...
1126
1127
  	if (ret >= 0)
  		f.file->f_pos = pos;
9c225f265   Linus Torvalds   vfs: atomic f_pos...
1128
  	fdput_pos(f);
72ec35163   Al Viro   switch compat rea...
1129
  	return ret;
f17d8b354   Milosz Tanski   vfs: vfs: Define ...
1130

72ec35163   Al Viro   switch compat rea...
1131
  }
f17d8b354   Milosz Tanski   vfs: vfs: Define ...
1132
1133
1134
1135
1136
1137
1138
1139
  COMPAT_SYSCALL_DEFINE3(readv, compat_ulong_t, fd,
  		const struct compat_iovec __user *,vec,
  		compat_ulong_t, vlen)
  {
  	return do_compat_readv(fd, vec, vlen, 0);
  }
  
  static long do_compat_preadv64(unsigned long fd,
378a10f3a   Heiko Carstens   fs/compat: option...
1140
  				  const struct compat_iovec __user *vec,
ddef7ed2b   Christoph Hellwig   annotate RWF_... ...
1141
  				  unsigned long vlen, loff_t pos, rwf_t flags)
72ec35163   Al Viro   switch compat rea...
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
  {
  	struct fd f;
  	ssize_t ret;
  
  	if (pos < 0)
  		return -EINVAL;
  	f = fdget(fd);
  	if (!f.file)
  		return -EBADF;
  	ret = -ESPIPE;
  	if (f.file->f_mode & FMODE_PREAD)
f17d8b354   Milosz Tanski   vfs: vfs: Define ...
1153
  		ret = compat_readv(f.file, vec, vlen, &pos, flags);
72ec35163   Al Viro   switch compat rea...
1154
1155
1156
  	fdput(f);
  	return ret;
  }
378a10f3a   Heiko Carstens   fs/compat: option...
1157
1158
1159
1160
1161
  #ifdef __ARCH_WANT_COMPAT_SYS_PREADV64
  COMPAT_SYSCALL_DEFINE4(preadv64, unsigned long, fd,
  		const struct compat_iovec __user *,vec,
  		unsigned long, vlen, loff_t, pos)
  {
f17d8b354   Milosz Tanski   vfs: vfs: Define ...
1162
  	return do_compat_preadv64(fd, vec, vlen, pos, 0);
378a10f3a   Heiko Carstens   fs/compat: option...
1163
1164
  }
  #endif
dfd948e32   Heiko Carstens   fs/compat: fix pa...
1165
  COMPAT_SYSCALL_DEFINE5(preadv, compat_ulong_t, fd,
72ec35163   Al Viro   switch compat rea...
1166
  		const struct compat_iovec __user *,vec,
dfd948e32   Heiko Carstens   fs/compat: fix pa...
1167
  		compat_ulong_t, vlen, u32, pos_low, u32, pos_high)
72ec35163   Al Viro   switch compat rea...
1168
1169
  {
  	loff_t pos = ((loff_t)pos_high << 32) | pos_low;
378a10f3a   Heiko Carstens   fs/compat: option...
1170

f17d8b354   Milosz Tanski   vfs: vfs: Define ...
1171
1172
  	return do_compat_preadv64(fd, vec, vlen, pos, 0);
  }
3ebfd81f7   H.J. Lu   x86/syscalls: Add...
1173
1174
1175
  #ifdef __ARCH_WANT_COMPAT_SYS_PREADV64V2
  COMPAT_SYSCALL_DEFINE5(preadv64v2, unsigned long, fd,
  		const struct compat_iovec __user *,vec,
ddef7ed2b   Christoph Hellwig   annotate RWF_... ...
1176
  		unsigned long, vlen, loff_t, pos, rwf_t, flags)
3ebfd81f7   H.J. Lu   x86/syscalls: Add...
1177
1178
1179
1180
  {
  	return do_compat_preadv64(fd, vec, vlen, pos, flags);
  }
  #endif
f17d8b354   Milosz Tanski   vfs: vfs: Define ...
1181
1182
1183
  COMPAT_SYSCALL_DEFINE6(preadv2, compat_ulong_t, fd,
  		const struct compat_iovec __user *,vec,
  		compat_ulong_t, vlen, u32, pos_low, u32, pos_high,
ddef7ed2b   Christoph Hellwig   annotate RWF_... ...
1184
  		rwf_t, flags)
f17d8b354   Milosz Tanski   vfs: vfs: Define ...
1185
1186
1187
1188
1189
1190
1191
  {
  	loff_t pos = ((loff_t)pos_high << 32) | pos_low;
  
  	if (pos == -1)
  		return do_compat_readv(fd, vec, vlen, flags);
  
  	return do_compat_preadv64(fd, vec, vlen, pos, flags);
72ec35163   Al Viro   switch compat rea...
1192
1193
1194
1195
  }
  
  static size_t compat_writev(struct file *file,
  			    const struct compat_iovec __user *vec,
ddef7ed2b   Christoph Hellwig   annotate RWF_... ...
1196
  			    unsigned long vlen, loff_t *pos, rwf_t flags)
72ec35163   Al Viro   switch compat rea...
1197
  {
26c87fb7d   Christoph Hellwig   fs: remove do_com...
1198
1199
1200
  	struct iovec iovstack[UIO_FASTIOV];
  	struct iovec *iov = iovstack;
  	struct iov_iter iter;
edab5fe38   Christoph Hellwig   fs: move more cod...
1201
  	ssize_t ret;
72ec35163   Al Viro   switch compat rea...
1202

26c87fb7d   Christoph Hellwig   fs: remove do_com...
1203
  	ret = compat_import_iovec(WRITE, vec, vlen, UIO_FASTIOV, &iov, &iter);
edab5fe38   Christoph Hellwig   fs: move more cod...
1204
  	if (ret >= 0) {
62473a2d6   Al Viro   move file_{start,...
1205
  		file_start_write(file);
edab5fe38   Christoph Hellwig   fs: move more cod...
1206
  		ret = do_iter_write(file, &iter, pos, flags);
62473a2d6   Al Viro   move file_{start,...
1207
  		file_end_write(file);
edab5fe38   Christoph Hellwig   fs: move more cod...
1208
1209
  		kfree(iov);
  	}
72ec35163   Al Viro   switch compat rea...
1210
1211
1212
1213
1214
  	if (ret > 0)
  		add_wchar(current, ret);
  	inc_syscw(current);
  	return ret;
  }
f17d8b354   Milosz Tanski   vfs: vfs: Define ...
1215
1216
  static size_t do_compat_writev(compat_ulong_t fd,
  				  const struct compat_iovec __user* vec,
ddef7ed2b   Christoph Hellwig   annotate RWF_... ...
1217
  				  compat_ulong_t vlen, rwf_t flags)
72ec35163   Al Viro   switch compat rea...
1218
  {
9c225f265   Linus Torvalds   vfs: atomic f_pos...
1219
  	struct fd f = fdget_pos(fd);
72ec35163   Al Viro   switch compat rea...
1220
1221
1222
1223
1224
1225
  	ssize_t ret;
  	loff_t pos;
  
  	if (!f.file)
  		return -EBADF;
  	pos = f.file->f_pos;
f17d8b354   Milosz Tanski   vfs: vfs: Define ...
1226
  	ret = compat_writev(f.file, vec, vlen, &pos, flags);
5faf153eb   Al Viro   don't call file_p...
1227
1228
  	if (ret >= 0)
  		f.file->f_pos = pos;
9c225f265   Linus Torvalds   vfs: atomic f_pos...
1229
  	fdput_pos(f);
72ec35163   Al Viro   switch compat rea...
1230
1231
  	return ret;
  }
f17d8b354   Milosz Tanski   vfs: vfs: Define ...
1232
1233
1234
1235
1236
1237
1238
1239
  COMPAT_SYSCALL_DEFINE3(writev, compat_ulong_t, fd,
  		const struct compat_iovec __user *, vec,
  		compat_ulong_t, vlen)
  {
  	return do_compat_writev(fd, vec, vlen, 0);
  }
  
  static long do_compat_pwritev64(unsigned long fd,
378a10f3a   Heiko Carstens   fs/compat: option...
1240
  				   const struct compat_iovec __user *vec,
ddef7ed2b   Christoph Hellwig   annotate RWF_... ...
1241
  				   unsigned long vlen, loff_t pos, rwf_t flags)
72ec35163   Al Viro   switch compat rea...
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
  {
  	struct fd f;
  	ssize_t ret;
  
  	if (pos < 0)
  		return -EINVAL;
  	f = fdget(fd);
  	if (!f.file)
  		return -EBADF;
  	ret = -ESPIPE;
  	if (f.file->f_mode & FMODE_PWRITE)
f17d8b354   Milosz Tanski   vfs: vfs: Define ...
1253
  		ret = compat_writev(f.file, vec, vlen, &pos, flags);
72ec35163   Al Viro   switch compat rea...
1254
1255
1256
  	fdput(f);
  	return ret;
  }
378a10f3a   Heiko Carstens   fs/compat: option...
1257
1258
1259
1260
1261
  #ifdef __ARCH_WANT_COMPAT_SYS_PWRITEV64
  COMPAT_SYSCALL_DEFINE4(pwritev64, unsigned long, fd,
  		const struct compat_iovec __user *,vec,
  		unsigned long, vlen, loff_t, pos)
  {
f17d8b354   Milosz Tanski   vfs: vfs: Define ...
1262
  	return do_compat_pwritev64(fd, vec, vlen, pos, 0);
378a10f3a   Heiko Carstens   fs/compat: option...
1263
1264
  }
  #endif
dfd948e32   Heiko Carstens   fs/compat: fix pa...
1265
  COMPAT_SYSCALL_DEFINE5(pwritev, compat_ulong_t, fd,
72ec35163   Al Viro   switch compat rea...
1266
  		const struct compat_iovec __user *,vec,
dfd948e32   Heiko Carstens   fs/compat: fix pa...
1267
  		compat_ulong_t, vlen, u32, pos_low, u32, pos_high)
72ec35163   Al Viro   switch compat rea...
1268
1269
  {
  	loff_t pos = ((loff_t)pos_high << 32) | pos_low;
378a10f3a   Heiko Carstens   fs/compat: option...
1270

f17d8b354   Milosz Tanski   vfs: vfs: Define ...
1271
  	return do_compat_pwritev64(fd, vec, vlen, pos, 0);
72ec35163   Al Viro   switch compat rea...
1272
  }
f17d8b354   Milosz Tanski   vfs: vfs: Define ...
1273

3ebfd81f7   H.J. Lu   x86/syscalls: Add...
1274
1275
1276
  #ifdef __ARCH_WANT_COMPAT_SYS_PWRITEV64V2
  COMPAT_SYSCALL_DEFINE5(pwritev64v2, unsigned long, fd,
  		const struct compat_iovec __user *,vec,
ddef7ed2b   Christoph Hellwig   annotate RWF_... ...
1277
  		unsigned long, vlen, loff_t, pos, rwf_t, flags)
3ebfd81f7   H.J. Lu   x86/syscalls: Add...
1278
1279
1280
1281
  {
  	return do_compat_pwritev64(fd, vec, vlen, pos, flags);
  }
  #endif
f17d8b354   Milosz Tanski   vfs: vfs: Define ...
1282
1283
  COMPAT_SYSCALL_DEFINE6(pwritev2, compat_ulong_t, fd,
  		const struct compat_iovec __user *,vec,
ddef7ed2b   Christoph Hellwig   annotate RWF_... ...
1284
  		compat_ulong_t, vlen, u32, pos_low, u32, pos_high, rwf_t, flags)
f17d8b354   Milosz Tanski   vfs: vfs: Define ...
1285
1286
1287
1288
1289
1290
1291
  {
  	loff_t pos = ((loff_t)pos_high << 32) | pos_low;
  
  	if (pos == -1)
  		return do_compat_writev(fd, vec, vlen, flags);
  
  	return do_compat_pwritev64(fd, vec, vlen, pos, flags);
72ec35163   Al Viro   switch compat rea...
1292
  }
f17d8b354   Milosz Tanski   vfs: vfs: Define ...
1293

72ec35163   Al Viro   switch compat rea...
1294
  #endif
19f4fc3ae   Al Viro   convert sendfile{...
1295
1296
  static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos,
  		  	   size_t count, loff_t max)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1297
  {
2903ff019   Al Viro   switch simple cas...
1298
1299
  	struct fd in, out;
  	struct inode *in_inode, *out_inode;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1300
  	loff_t pos;
7995bd287   Al Viro   splice: don't pas...
1301
  	loff_t out_pos;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1302
  	ssize_t retval;
2903ff019   Al Viro   switch simple cas...
1303
  	int fl;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1304
1305
1306
1307
1308
  
  	/*
  	 * Get input file, and verify that it is ok..
  	 */
  	retval = -EBADF;
2903ff019   Al Viro   switch simple cas...
1309
1310
  	in = fdget(in_fd);
  	if (!in.file)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1311
  		goto out;
2903ff019   Al Viro   switch simple cas...
1312
  	if (!(in.file->f_mode & FMODE_READ))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1313
  		goto fput_in;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1314
  	retval = -ESPIPE;
7995bd287   Al Viro   splice: don't pas...
1315
1316
1317
1318
  	if (!ppos) {
  		pos = in.file->f_pos;
  	} else {
  		pos = *ppos;
2903ff019   Al Viro   switch simple cas...
1319
  		if (!(in.file->f_mode & FMODE_PREAD))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1320
  			goto fput_in;
7995bd287   Al Viro   splice: don't pas...
1321
1322
  	}
  	retval = rw_verify_area(READ, in.file, &pos, count);
e28cc7157   Linus Torvalds   Relax the rw_veri...
1323
  	if (retval < 0)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1324
  		goto fput_in;
bc61384dc   Al Viro   rw_verify_area():...
1325
1326
  	if (count > MAX_RW_COUNT)
  		count =  MAX_RW_COUNT;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1327

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1328
1329
1330
1331
  	/*
  	 * Get output file, and verify that it is ok..
  	 */
  	retval = -EBADF;
2903ff019   Al Viro   switch simple cas...
1332
1333
  	out = fdget(out_fd);
  	if (!out.file)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1334
  		goto fput_in;
2903ff019   Al Viro   switch simple cas...
1335
  	if (!(out.file->f_mode & FMODE_WRITE))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1336
1337
  		goto fput_out;
  	retval = -EINVAL;
496ad9aa8   Al Viro   new helper: file_...
1338
1339
  	in_inode = file_inode(in.file);
  	out_inode = file_inode(out.file);
7995bd287   Al Viro   splice: don't pas...
1340
1341
  	out_pos = out.file->f_pos;
  	retval = rw_verify_area(WRITE, out.file, &out_pos, count);
e28cc7157   Linus Torvalds   Relax the rw_veri...
1342
  	if (retval < 0)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1343
  		goto fput_out;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1344
1345
  	if (!max)
  		max = min(in_inode->i_sb->s_maxbytes, out_inode->i_sb->s_maxbytes);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1346
1347
1348
1349
1350
1351
  	if (unlikely(pos + count > max)) {
  		retval = -EOVERFLOW;
  		if (pos >= max)
  			goto fput_out;
  		count = max - pos;
  	}
d96e6e716   Jens Axboe   Remove remnants o...
1352
  	fl = 0;
534f2aaa6   Jens Axboe   sys_sendfile: swi...
1353
  #if 0
d96e6e716   Jens Axboe   Remove remnants o...
1354
1355
1356
1357
1358
1359
  	/*
  	 * We need to debate whether we can enable this or not. The
  	 * man page documents EAGAIN return for the output at least,
  	 * and the application is arguably buggy if it doesn't expect
  	 * EAGAIN on a non-blocking file descriptor.
  	 */
2903ff019   Al Viro   switch simple cas...
1360
  	if (in.file->f_flags & O_NONBLOCK)
d96e6e716   Jens Axboe   Remove remnants o...
1361
  		fl = SPLICE_F_NONBLOCK;
534f2aaa6   Jens Axboe   sys_sendfile: swi...
1362
  #endif
50cd2c577   Al Viro   lift file_*_write...
1363
  	file_start_write(out.file);
7995bd287   Al Viro   splice: don't pas...
1364
  	retval = do_splice_direct(in.file, &pos, out.file, &out_pos, count, fl);
50cd2c577   Al Viro   lift file_*_write...
1365
  	file_end_write(out.file);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1366
1367
  
  	if (retval > 0) {
4b98d11b4   Alexey Dobriyan   [PATCH] ifdef ->r...
1368
1369
  		add_rchar(current, retval);
  		add_wchar(current, retval);
a68c2f12b   Scott Wolchok   sendfile: allows ...
1370
1371
  		fsnotify_access(in.file);
  		fsnotify_modify(out.file);
7995bd287   Al Viro   splice: don't pas...
1372
1373
1374
1375
1376
  		out.file->f_pos = out_pos;
  		if (ppos)
  			*ppos = pos;
  		else
  			in.file->f_pos = pos;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1377
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1378

4b98d11b4   Alexey Dobriyan   [PATCH] ifdef ->r...
1379
1380
  	inc_syscr(current);
  	inc_syscw(current);
7995bd287   Al Viro   splice: don't pas...
1381
  	if (pos > max)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1382
1383
1384
  		retval = -EOVERFLOW;
  
  fput_out:
2903ff019   Al Viro   switch simple cas...
1385
  	fdput(out);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1386
  fput_in:
2903ff019   Al Viro   switch simple cas...
1387
  	fdput(in);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1388
1389
1390
  out:
  	return retval;
  }
002c8976e   Heiko Carstens   [CVE-2009-0029] S...
1391
  SYSCALL_DEFINE4(sendfile, int, out_fd, int, in_fd, off_t __user *, offset, size_t, count)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
  {
  	loff_t pos;
  	off_t off;
  	ssize_t ret;
  
  	if (offset) {
  		if (unlikely(get_user(off, offset)))
  			return -EFAULT;
  		pos = off;
  		ret = do_sendfile(out_fd, in_fd, &pos, count, MAX_NON_LFS);
  		if (unlikely(put_user(pos, offset)))
  			return -EFAULT;
  		return ret;
  	}
  
  	return do_sendfile(out_fd, in_fd, NULL, count, 0);
  }
002c8976e   Heiko Carstens   [CVE-2009-0029] S...
1409
  SYSCALL_DEFINE4(sendfile64, int, out_fd, int, in_fd, loff_t __user *, offset, size_t, count)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
  {
  	loff_t pos;
  	ssize_t ret;
  
  	if (offset) {
  		if (unlikely(copy_from_user(&pos, offset, sizeof(loff_t))))
  			return -EFAULT;
  		ret = do_sendfile(out_fd, in_fd, &pos, count, 0);
  		if (unlikely(put_user(pos, offset)))
  			return -EFAULT;
  		return ret;
  	}
  
  	return do_sendfile(out_fd, in_fd, NULL, count, 0);
  }
19f4fc3ae   Al Viro   convert sendfile{...
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
  
  #ifdef CONFIG_COMPAT
  COMPAT_SYSCALL_DEFINE4(sendfile, int, out_fd, int, in_fd,
  		compat_off_t __user *, offset, compat_size_t, count)
  {
  	loff_t pos;
  	off_t off;
  	ssize_t ret;
  
  	if (offset) {
  		if (unlikely(get_user(off, offset)))
  			return -EFAULT;
  		pos = off;
  		ret = do_sendfile(out_fd, in_fd, &pos, count, MAX_NON_LFS);
  		if (unlikely(put_user(pos, offset)))
  			return -EFAULT;
  		return ret;
  	}
  
  	return do_sendfile(out_fd, in_fd, NULL, count, 0);
  }
  
  COMPAT_SYSCALL_DEFINE4(sendfile64, int, out_fd, int, in_fd,
  		compat_loff_t __user *, offset, compat_size_t, count)
  {
  	loff_t pos;
  	ssize_t ret;
  
  	if (offset) {
  		if (unlikely(copy_from_user(&pos, offset, sizeof(loff_t))))
  			return -EFAULT;
  		ret = do_sendfile(out_fd, in_fd, &pos, count, 0);
  		if (unlikely(put_user(pos, offset)))
  			return -EFAULT;
  		return ret;
  	}
  
  	return do_sendfile(out_fd, in_fd, NULL, count, 0);
  }
  #endif
29732938a   Zach Brown   vfs: add copy_fil...
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
  
  /*
   * copy_file_range() differs from regular file read and write in that it
   * specifically allows return partial success.  When it does so is up to
   * the copy_file_range method.
   */
  ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in,
  			    struct file *file_out, loff_t pos_out,
  			    size_t len, unsigned int flags)
  {
  	struct inode *inode_in = file_inode(file_in);
  	struct inode *inode_out = file_inode(file_out);
  	ssize_t ret;
  
  	if (flags != 0)
  		return -EINVAL;
11cbfb107   Amir Goldstein   vfs: deny copy_fi...
1481
1482
1483
1484
  	if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode))
  		return -EISDIR;
  	if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode))
  		return -EINVAL;
29732938a   Zach Brown   vfs: add copy_fil...
1485
  	ret = rw_verify_area(READ, file_in, &pos_in, len);
bc61384dc   Al Viro   rw_verify_area():...
1486
1487
1488
1489
1490
  	if (unlikely(ret))
  		return ret;
  
  	ret = rw_verify_area(WRITE, file_out, &pos_out, len);
  	if (unlikely(ret))
29732938a   Zach Brown   vfs: add copy_fil...
1491
1492
1493
1494
  		return ret;
  
  	if (!(file_in->f_mode & FMODE_READ) ||
  	    !(file_out->f_mode & FMODE_WRITE) ||
eac70053a   Anna Schumaker   vfs: Add vfs_copy...
1495
  	    (file_out->f_flags & O_APPEND))
29732938a   Zach Brown   vfs: add copy_fil...
1496
1497
1498
1499
1500
1501
1502
1503
  		return -EBADF;
  
  	/* this could be relaxed once a method supports cross-fs copies */
  	if (inode_in->i_sb != inode_out->i_sb)
  		return -EXDEV;
  
  	if (len == 0)
  		return 0;
bfe219d37   Amir Goldstein   vfs: wrap write f...
1504
  	file_start_write(file_out);
29732938a   Zach Brown   vfs: add copy_fil...
1505

a76b5b043   Christoph Hellwig   fs: try to clone ...
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
  	/*
  	 * Try cloning first, this is supported by more file systems, and
  	 * more efficient if both clone and copy are supported (e.g. NFS).
  	 */
  	if (file_in->f_op->clone_file_range) {
  		ret = file_in->f_op->clone_file_range(file_in, pos_in,
  				file_out, pos_out, len);
  		if (ret == 0) {
  			ret = len;
  			goto done;
  		}
  	}
  
  	if (file_out->f_op->copy_file_range) {
eac70053a   Anna Schumaker   vfs: Add vfs_copy...
1520
1521
  		ret = file_out->f_op->copy_file_range(file_in, pos_in, file_out,
  						      pos_out, len, flags);
a76b5b043   Christoph Hellwig   fs: try to clone ...
1522
1523
1524
  		if (ret != -EOPNOTSUPP)
  			goto done;
  	}
eac70053a   Anna Schumaker   vfs: Add vfs_copy...
1525

a76b5b043   Christoph Hellwig   fs: try to clone ...
1526
1527
  	ret = do_splice_direct(file_in, &pos_in, file_out, &pos_out,
  			len > MAX_RW_COUNT ? MAX_RW_COUNT : len, 0);
eac70053a   Anna Schumaker   vfs: Add vfs_copy...
1528

a76b5b043   Christoph Hellwig   fs: try to clone ...
1529
  done:
29732938a   Zach Brown   vfs: add copy_fil...
1530
1531
1532
1533
1534
1535
  	if (ret > 0) {
  		fsnotify_access(file_in);
  		add_rchar(current, ret);
  		fsnotify_modify(file_out);
  		add_wchar(current, ret);
  	}
a76b5b043   Christoph Hellwig   fs: try to clone ...
1536

29732938a   Zach Brown   vfs: add copy_fil...
1537
1538
  	inc_syscr(current);
  	inc_syscw(current);
bfe219d37   Amir Goldstein   vfs: wrap write f...
1539
  	file_end_write(file_out);
29732938a   Zach Brown   vfs: add copy_fil...
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
  
  	return ret;
  }
  EXPORT_SYMBOL(vfs_copy_file_range);
  
  SYSCALL_DEFINE6(copy_file_range, int, fd_in, loff_t __user *, off_in,
  		int, fd_out, loff_t __user *, off_out,
  		size_t, len, unsigned int, flags)
  {
  	loff_t pos_in;
  	loff_t pos_out;
  	struct fd f_in;
  	struct fd f_out;
  	ssize_t ret = -EBADF;
  
  	f_in = fdget(fd_in);
  	if (!f_in.file)
  		goto out2;
  
  	f_out = fdget(fd_out);
  	if (!f_out.file)
  		goto out1;
  
  	ret = -EFAULT;
  	if (off_in) {
  		if (copy_from_user(&pos_in, off_in, sizeof(loff_t)))
  			goto out;
  	} else {
  		pos_in = f_in.file->f_pos;
  	}
  
  	if (off_out) {
  		if (copy_from_user(&pos_out, off_out, sizeof(loff_t)))
  			goto out;
  	} else {
  		pos_out = f_out.file->f_pos;
  	}
  
  	ret = vfs_copy_file_range(f_in.file, pos_in, f_out.file, pos_out, len,
  				  flags);
  	if (ret > 0) {
  		pos_in += ret;
  		pos_out += ret;
  
  		if (off_in) {
  			if (copy_to_user(off_in, &pos_in, sizeof(loff_t)))
  				ret = -EFAULT;
  		} else {
  			f_in.file->f_pos = pos_in;
  		}
  
  		if (off_out) {
  			if (copy_to_user(off_out, &pos_out, sizeof(loff_t)))
  				ret = -EFAULT;
  		} else {
  			f_out.file->f_pos = pos_out;
  		}
  	}
  
  out:
  	fdput(f_out);
  out1:
  	fdput(f_in);
  out2:
  	return ret;
  }
04b38d601   Christoph Hellwig   vfs: pull btrfs c...
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
  
  static int clone_verify_area(struct file *file, loff_t pos, u64 len, bool write)
  {
  	struct inode *inode = file_inode(file);
  
  	if (unlikely(pos < 0))
  		return -EINVAL;
  
  	 if (unlikely((loff_t) (pos + len) < 0))
  		return -EINVAL;
  
  	if (unlikely(inode->i_flctx && mandatory_lock(inode))) {
  		loff_t end = len ? pos + len - 1 : OFFSET_MAX;
  		int retval;
  
  		retval = locks_mandatory_area(inode, file, pos, end,
  				write ? F_WRLCK : F_RDLCK);
  		if (retval < 0)
  			return retval;
  	}
  
  	return security_file_permission(file, write ? MAY_WRITE : MAY_READ);
  }
876bec6f9   Darrick J. Wong   vfs: refactor clo...
1629
1630
1631
1632
  /*
   * Check that the two inodes are eligible for cloning, the ranges make
   * sense, and then flush all dirty data.  Caller must ensure that the
   * inodes have been locked against any other modifications.
22725ce4e   Darrick J. Wong   vfs: fix isize/po...
1633
1634
1635
   *
   * Returns: 0 for "nothing to clone", 1 for "something to clone", or
   * the usual negative error code.
876bec6f9   Darrick J. Wong   vfs: refactor clo...
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
   */
  int vfs_clone_file_prep_inodes(struct inode *inode_in, loff_t pos_in,
  			       struct inode *inode_out, loff_t pos_out,
  			       u64 *len, bool is_dedupe)
  {
  	loff_t bs = inode_out->i_sb->s_blocksize;
  	loff_t blen;
  	loff_t isize;
  	bool same_inode = (inode_in == inode_out);
  	int ret;
  
  	/* Don't touch certain kinds of inodes */
  	if (IS_IMMUTABLE(inode_out))
  		return -EPERM;
  
  	if (IS_SWAPFILE(inode_in) || IS_SWAPFILE(inode_out))
  		return -ETXTBSY;
  
  	/* Don't reflink dirs, pipes, sockets... */
  	if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode))
  		return -EISDIR;
  	if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode))
  		return -EINVAL;
  
  	/* Are we going all the way to the end? */
  	isize = i_size_read(inode_in);
22725ce4e   Darrick J. Wong   vfs: fix isize/po...
1662
  	if (isize == 0)
876bec6f9   Darrick J. Wong   vfs: refactor clo...
1663
  		return 0;
876bec6f9   Darrick J. Wong   vfs: refactor clo...
1664
1665
1666
  
  	/* Zero length dedupe exits immediately; reflink goes to EOF. */
  	if (*len == 0) {
22725ce4e   Darrick J. Wong   vfs: fix isize/po...
1667
  		if (is_dedupe || pos_in == isize)
876bec6f9   Darrick J. Wong   vfs: refactor clo...
1668
  			return 0;
22725ce4e   Darrick J. Wong   vfs: fix isize/po...
1669
1670
  		if (pos_in > isize)
  			return -EINVAL;
876bec6f9   Darrick J. Wong   vfs: refactor clo...
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
  		*len = isize - pos_in;
  	}
  
  	/* Ensure offsets don't wrap and the input is inside i_size */
  	if (pos_in + *len < pos_in || pos_out + *len < pos_out ||
  	    pos_in + *len > isize)
  		return -EINVAL;
  
  	/* Don't allow dedupe past EOF in the dest file */
  	if (is_dedupe) {
  		loff_t	disize;
  
  		disize = i_size_read(inode_out);
  		if (pos_out >= disize || pos_out + *len > disize)
  			return -EINVAL;
  	}
  
  	/* If we're linking to EOF, continue to the block boundary. */
  	if (pos_in + *len == isize)
  		blen = ALIGN(isize, bs) - pos_in;
  	else
  		blen = *len;
  
  	/* Only reflink if we're aligned to block boundaries */
  	if (!IS_ALIGNED(pos_in, bs) || !IS_ALIGNED(pos_in + blen, bs) ||
  	    !IS_ALIGNED(pos_out, bs) || !IS_ALIGNED(pos_out + blen, bs))
  		return -EINVAL;
  
  	/* Don't allow overlapped reflink within the same file */
  	if (same_inode) {
  		if (pos_out + blen > pos_in && pos_out < pos_in + blen)
  			return -EINVAL;
  	}
  
  	/* Wait for the completion of any pending IOs on both files */
  	inode_dio_wait(inode_in);
  	if (!same_inode)
  		inode_dio_wait(inode_out);
  
  	ret = filemap_write_and_wait_range(inode_in->i_mapping,
  			pos_in, pos_in + *len - 1);
  	if (ret)
  		return ret;
  
  	ret = filemap_write_and_wait_range(inode_out->i_mapping,
  			pos_out, pos_out + *len - 1);
  	if (ret)
  		return ret;
  
  	/*
  	 * Check that the extents are the same.
  	 */
  	if (is_dedupe) {
  		bool		is_same = false;
  
  		ret = vfs_dedupe_file_range_compare(inode_in, pos_in,
  				inode_out, pos_out, *len, &is_same);
  		if (ret)
  			return ret;
  		if (!is_same)
  			return -EBADE;
  	}
22725ce4e   Darrick J. Wong   vfs: fix isize/po...
1733
  	return 1;
876bec6f9   Darrick J. Wong   vfs: refactor clo...
1734
1735
  }
  EXPORT_SYMBOL(vfs_clone_file_prep_inodes);
8a6cee344   Amir Goldstein   vfs: swap names o...
1736
1737
  int do_clone_file_range(struct file *file_in, loff_t pos_in,
  			struct file *file_out, loff_t pos_out, u64 len)
04b38d601   Christoph Hellwig   vfs: pull btrfs c...
1738
1739
1740
1741
  {
  	struct inode *inode_in = file_inode(file_in);
  	struct inode *inode_out = file_inode(file_out);
  	int ret;
b335e9d99   Amir Goldstein   vfs: fix vfs_clon...
1742
1743
1744
1745
  	if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode))
  		return -EISDIR;
  	if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode))
  		return -EINVAL;
913b86e92   Amir Goldstein   vfs: allow vfs_cl...
1746
1747
1748
1749
1750
1751
  	/*
  	 * FICLONE/FICLONERANGE ioctls enforce that src and dest files are on
  	 * the same mount. Practically, they only need to be on the same file
  	 * system.
  	 */
  	if (inode_in->i_sb != inode_out->i_sb)
04b38d601   Christoph Hellwig   vfs: pull btrfs c...
1752
  		return -EXDEV;
04b38d601   Christoph Hellwig   vfs: pull btrfs c...
1753
1754
  	if (!(file_in->f_mode & FMODE_READ) ||
  	    !(file_out->f_mode & FMODE_WRITE) ||
0fcbf996d   Christoph Hellwig   fs: return -EOPNO...
1755
  	    (file_out->f_flags & O_APPEND))
04b38d601   Christoph Hellwig   vfs: pull btrfs c...
1756
  		return -EBADF;
0fcbf996d   Christoph Hellwig   fs: return -EOPNO...
1757
1758
  	if (!file_in->f_op->clone_file_range)
  		return -EOPNOTSUPP;
04b38d601   Christoph Hellwig   vfs: pull btrfs c...
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
  	ret = clone_verify_area(file_in, pos_in, len, false);
  	if (ret)
  		return ret;
  
  	ret = clone_verify_area(file_out, pos_out, len, true);
  	if (ret)
  		return ret;
  
  	if (pos_in + len > i_size_read(inode_in))
  		return -EINVAL;
04b38d601   Christoph Hellwig   vfs: pull btrfs c...
1769
1770
1771
1772
1773
1774
  	ret = file_in->f_op->clone_file_range(file_in, pos_in,
  			file_out, pos_out, len);
  	if (!ret) {
  		fsnotify_access(file_in);
  		fsnotify_modify(file_out);
  	}
04b38d601   Christoph Hellwig   vfs: pull btrfs c...
1775
1776
  	return ret;
  }
8a6cee344   Amir Goldstein   vfs: swap names o...
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
  EXPORT_SYMBOL(do_clone_file_range);
  
  int vfs_clone_file_range(struct file *file_in, loff_t pos_in,
  			 struct file *file_out, loff_t pos_out, u64 len)
  {
  	int ret;
  
  	file_start_write(file_out);
  	ret = do_clone_file_range(file_in, pos_in, file_out, pos_out, len);
  	file_end_write(file_out);
  
  	return ret;
  }
04b38d601   Christoph Hellwig   vfs: pull btrfs c...
1790
  EXPORT_SYMBOL(vfs_clone_file_range);
54dbc1517   Darrick J. Wong   vfs: hoist the bt...
1791

876bec6f9   Darrick J. Wong   vfs: refactor clo...
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
  /*
   * Read a page's worth of file data into the page cache.  Return the page
   * locked.
   */
  static struct page *vfs_dedupe_get_page(struct inode *inode, loff_t offset)
  {
  	struct address_space *mapping;
  	struct page *page;
  	pgoff_t n;
  
  	n = offset >> PAGE_SHIFT;
  	mapping = inode->i_mapping;
  	page = read_mapping_page(mapping, n, NULL);
  	if (IS_ERR(page))
  		return page;
  	if (!PageUptodate(page)) {
  		put_page(page);
  		return ERR_PTR(-EIO);
  	}
  	lock_page(page);
  	return page;
  }
  
  /*
   * Compare extents of two files to see if they are the same.
   * Caller must have locked both inodes to prevent write races.
   */
  int vfs_dedupe_file_range_compare(struct inode *src, loff_t srcoff,
  				  struct inode *dest, loff_t destoff,
  				  loff_t len, bool *is_same)
  {
  	loff_t src_poff;
  	loff_t dest_poff;
  	void *src_addr;
  	void *dest_addr;
  	struct page *src_page;
  	struct page *dest_page;
  	loff_t cmp_len;
  	bool same;
  	int error;
  
  	error = -EINVAL;
  	same = true;
  	while (len) {
  		src_poff = srcoff & (PAGE_SIZE - 1);
  		dest_poff = destoff & (PAGE_SIZE - 1);
  		cmp_len = min(PAGE_SIZE - src_poff,
  			      PAGE_SIZE - dest_poff);
  		cmp_len = min(cmp_len, len);
  		if (cmp_len <= 0)
  			goto out_error;
  
  		src_page = vfs_dedupe_get_page(src, srcoff);
  		if (IS_ERR(src_page)) {
  			error = PTR_ERR(src_page);
  			goto out_error;
  		}
  		dest_page = vfs_dedupe_get_page(dest, destoff);
  		if (IS_ERR(dest_page)) {
  			error = PTR_ERR(dest_page);
  			unlock_page(src_page);
  			put_page(src_page);
  			goto out_error;
  		}
  		src_addr = kmap_atomic(src_page);
  		dest_addr = kmap_atomic(dest_page);
  
  		flush_dcache_page(src_page);
  		flush_dcache_page(dest_page);
  
  		if (memcmp(src_addr + src_poff, dest_addr + dest_poff, cmp_len))
  			same = false;
  
  		kunmap_atomic(dest_addr);
  		kunmap_atomic(src_addr);
  		unlock_page(dest_page);
  		unlock_page(src_page);
  		put_page(dest_page);
  		put_page(src_page);
  
  		if (!same)
  			break;
  
  		srcoff += cmp_len;
  		destoff += cmp_len;
  		len -= cmp_len;
  	}
  
  	*is_same = same;
  	return 0;
  
  out_error:
  	return error;
  }
  EXPORT_SYMBOL(vfs_dedupe_file_range_compare);
54dbc1517   Darrick J. Wong   vfs: hoist the bt...
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
  int vfs_dedupe_file_range(struct file *file, struct file_dedupe_range *same)
  {
  	struct file_dedupe_range_info *info;
  	struct inode *src = file_inode(file);
  	u64 off;
  	u64 len;
  	int i;
  	int ret;
  	bool is_admin = capable(CAP_SYS_ADMIN);
  	u16 count = same->dest_count;
  	struct file *dst_file;
  	loff_t dst_off;
  	ssize_t deduped;
  
  	if (!(file->f_mode & FMODE_READ))
  		return -EINVAL;
  
  	if (same->reserved1 || same->reserved2)
  		return -EINVAL;
  
  	off = same->src_offset;
  	len = same->src_length;
  
  	ret = -EISDIR;
  	if (S_ISDIR(src->i_mode))
  		goto out;
  
  	ret = -EINVAL;
  	if (!S_ISREG(src->i_mode))
  		goto out;
  
  	ret = clone_verify_area(file, off, len, false);
  	if (ret < 0)
  		goto out;
  	ret = 0;
22725ce4e   Darrick J. Wong   vfs: fix isize/po...
1922
1923
  	if (off + len > i_size_read(src))
  		return -EINVAL;
54dbc1517   Darrick J. Wong   vfs: hoist the bt...
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
  	/* pre-format output fields to sane values */
  	for (i = 0; i < count; i++) {
  		same->info[i].bytes_deduped = 0ULL;
  		same->info[i].status = FILE_DEDUPE_RANGE_SAME;
  	}
  
  	for (i = 0, info = same->info; i < count; i++, info++) {
  		struct inode *dst;
  		struct fd dst_fd = fdget(info->dest_fd);
  
  		dst_file = dst_fd.file;
  		if (!dst_file) {
  			info->status = -EBADF;
  			goto next_loop;
  		}
  		dst = file_inode(dst_file);
  
  		ret = mnt_want_write_file(dst_file);
  		if (ret) {
  			info->status = ret;
  			goto next_loop;
  		}
  
  		dst_off = info->dest_offset;
  		ret = clone_verify_area(dst_file, dst_off, len, true);
  		if (ret < 0) {
  			info->status = ret;
  			goto next_file;
  		}
  		ret = 0;
  
  		if (info->reserved) {
  			info->status = -EINVAL;
  		} else if (!(is_admin || (dst_file->f_mode & FMODE_WRITE))) {
  			info->status = -EINVAL;
  		} else if (file->f_path.mnt != dst_file->f_path.mnt) {
  			info->status = -EXDEV;
  		} else if (S_ISDIR(dst->i_mode)) {
  			info->status = -EISDIR;
  		} else if (dst_file->f_op->dedupe_file_range == NULL) {
  			info->status = -EINVAL;
  		} else {
  			deduped = dst_file->f_op->dedupe_file_range(file, off,
  							len, dst_file,
  							info->dest_offset);
  			if (deduped == -EBADE)
  				info->status = FILE_DEDUPE_RANGE_DIFFERS;
  			else if (deduped < 0)
  				info->status = deduped;
  			else
  				info->bytes_deduped += deduped;
  		}
  
  next_file:
  		mnt_drop_write_file(dst_file);
  next_loop:
  		fdput(dst_fd);
e62e560fc   Darrick J. Wong   vfs: abort dedupe...
1981
1982
1983
  
  		if (fatal_signal_pending(current))
  			goto out;
54dbc1517   Darrick J. Wong   vfs: hoist the bt...
1984
1985
1986
1987
1988
1989
  	}
  
  out:
  	return ret;
  }
  EXPORT_SYMBOL(vfs_dedupe_file_range);