Blame view

fs/read_write.c 52.7 KB
b24413180   Greg Kroah-Hartman   License cleanup: ...
1
  // SPDX-License-Identifier: GPL-2.0
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2
3
4
5
6
  /*
   *  linux/fs/read_write.c
   *
   *  Copyright (C) 1991, 1992  Linus Torvalds
   */
b12fb7f46   Ingo Molnar   sched/headers: Pr...
7
  #include <linux/slab.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
8
  #include <linux/stat.h>
b12fb7f46   Ingo Molnar   sched/headers: Pr...
9
  #include <linux/sched/xacct.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
10
11
12
  #include <linux/fcntl.h>
  #include <linux/file.h>
  #include <linux/uio.h>
0eeca2830   Robert Love   [PATCH] inotify
13
  #include <linux/fsnotify.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
14
  #include <linux/security.h>
630d9c472   Paul Gortmaker   fs: reduce the us...
15
  #include <linux/export.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
16
  #include <linux/syscalls.h>
e28cc7157   Linus Torvalds   Relax the rw_veri...
17
  #include <linux/pagemap.h>
d6b29d7ce   Jens Axboe   splice: divorce t...
18
  #include <linux/splice.h>
561c67319   Al Viro   switch lseek to C...
19
  #include <linux/compat.h>
29732938a   Zach Brown   vfs: add copy_fil...
20
  #include <linux/mount.h>
2feb55f89   Wouter van Kesteren   fs: allow no_seek...
21
  #include <linux/fs.h>
06ae43f34   Al Viro   Don't bother with...
22
  #include "internal.h"
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
23

7c0f6ba68   Linus Torvalds   Replace <asm/uacc...
24
  #include <linux/uaccess.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
25
  #include <asm/unistd.h>
4b6f5d20b   Arjan van de Ven   [PATCH] Make most...
26
  const struct file_operations generic_ro_fops = {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
27
  	.llseek		= generic_file_llseek,
aad4f8bb4   Al Viro   switch simple gen...
28
  	.read_iter	= generic_file_read_iter,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
29
  	.mmap		= generic_file_readonly_mmap,
534f2aaa6   Jens Axboe   sys_sendfile: swi...
30
  	.splice_read	= generic_file_splice_read,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
31
32
33
  };
  
  EXPORT_SYMBOL(generic_ro_fops);
ddef7ed2b   Christoph Hellwig   annotate RWF_... ...
34
  static inline bool unsigned_offsets(struct file *file)
4a3956c79   KAMEZAWA Hiroyuki   vfs: introduce FM...
35
  {
cccb5a1e6   Al Viro   fix signedness me...
36
  	return file->f_mode & FMODE_UNSIGNED_OFFSET;
4a3956c79   KAMEZAWA Hiroyuki   vfs: introduce FM...
37
  }
46a1c2c7a   Jie Liu   vfs: export lseek...
38
39
40
41
42
43
44
45
46
47
48
49
50
  /**
   * vfs_setpos - update the file offset for lseek
   * @file:	file structure in question
   * @offset:	file offset to seek to
   * @maxsize:	maximum file size
   *
   * This is a low-level filesystem helper for updating the file offset to
   * the value specified by @offset if the given offset is valid and it is
   * not equal to the current file offset.
   *
   * Return the specified offset on success and -EINVAL on invalid offset.
   */
  loff_t vfs_setpos(struct file *file, loff_t offset, loff_t maxsize)
ef3d0fd27   Andi Kleen   vfs: do (nearly) ...
51
52
53
54
55
56
57
58
59
60
61
62
  {
  	if (offset < 0 && !unsigned_offsets(file))
  		return -EINVAL;
  	if (offset > maxsize)
  		return -EINVAL;
  
  	if (offset != file->f_pos) {
  		file->f_pos = offset;
  		file->f_version = 0;
  	}
  	return offset;
  }
46a1c2c7a   Jie Liu   vfs: export lseek...
63
  EXPORT_SYMBOL(vfs_setpos);
ef3d0fd27   Andi Kleen   vfs: do (nearly) ...
64

3a8cff4f0   Christoph Hellwig   [PATCH] generic_f...
65
  /**
5760495a8   Andi Kleen   vfs: add generic_...
66
   * generic_file_llseek_size - generic llseek implementation for regular files
3a8cff4f0   Christoph Hellwig   [PATCH] generic_f...
67
68
   * @file:	file structure to seek on
   * @offset:	file offset to seek to
965c8e59c   Andrew Morton   lseek: the "whenc...
69
   * @whence:	type of seek
e8b96eb50   Eric Sandeen   vfs: allow custom...
70
71
   * @size:	max size of this file in file system
   * @eof:	offset used for SEEK_END position
3a8cff4f0   Christoph Hellwig   [PATCH] generic_f...
72
   *
5760495a8   Andi Kleen   vfs: add generic_...
73
   * This is a variant of generic_file_llseek that allows passing in a custom
e8b96eb50   Eric Sandeen   vfs: allow custom...
74
   * maximum file size and a custom EOF position, for e.g. hashed directories
ef3d0fd27   Andi Kleen   vfs: do (nearly) ...
75
76
   *
   * Synchronization:
5760495a8   Andi Kleen   vfs: add generic_...
77
   * SEEK_SET and SEEK_END are unsynchronized (but atomic on 64bit platforms)
ef3d0fd27   Andi Kleen   vfs: do (nearly) ...
78
79
   * SEEK_CUR is synchronized against other SEEK_CURs, but not read/writes.
   * read/writes behave like SEEK_SET against seeks.
3a8cff4f0   Christoph Hellwig   [PATCH] generic_f...
80
   */
9465efc9e   Andi Kleen   Remove BKL from r...
81
  loff_t
965c8e59c   Andrew Morton   lseek: the "whenc...
82
  generic_file_llseek_size(struct file *file, loff_t offset, int whence,
e8b96eb50   Eric Sandeen   vfs: allow custom...
83
  		loff_t maxsize, loff_t eof)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
84
  {
965c8e59c   Andrew Morton   lseek: the "whenc...
85
  	switch (whence) {
3a8cff4f0   Christoph Hellwig   [PATCH] generic_f...
86
  	case SEEK_END:
e8b96eb50   Eric Sandeen   vfs: allow custom...
87
  		offset += eof;
3a8cff4f0   Christoph Hellwig   [PATCH] generic_f...
88
89
  		break;
  	case SEEK_CUR:
5b6f1eb97   Alain Knaff   vfs: lseek(fd, 0,...
90
91
92
93
94
95
96
97
  		/*
  		 * Here we special-case the lseek(fd, 0, SEEK_CUR)
  		 * position-querying operation.  Avoid rewriting the "same"
  		 * f_pos value back to the file because a concurrent read(),
  		 * write() or lseek() might have altered it
  		 */
  		if (offset == 0)
  			return file->f_pos;
ef3d0fd27   Andi Kleen   vfs: do (nearly) ...
98
99
100
101
102
103
  		/*
  		 * f_lock protects against read/modify/write race with other
  		 * SEEK_CURs. Note that parallel writes and reads behave
  		 * like SEEK_SET.
  		 */
  		spin_lock(&file->f_lock);
46a1c2c7a   Jie Liu   vfs: export lseek...
104
  		offset = vfs_setpos(file, file->f_pos + offset, maxsize);
ef3d0fd27   Andi Kleen   vfs: do (nearly) ...
105
106
  		spin_unlock(&file->f_lock);
  		return offset;
982d81658   Josef Bacik   fs: add SEEK_HOLE...
107
108
109
110
111
  	case SEEK_DATA:
  		/*
  		 * In the generic case the entire file is data, so as long as
  		 * offset isn't at the end of the file then the offset is data.
  		 */
fc46820b2   Andreas Gruenbacher   vfs: Return -ENXI...
112
  		if ((unsigned long long)offset >= eof)
982d81658   Josef Bacik   fs: add SEEK_HOLE...
113
114
115
116
117
118
119
  			return -ENXIO;
  		break;
  	case SEEK_HOLE:
  		/*
  		 * There is a virtual hole at the end of the file, so as long as
  		 * offset isn't i_size or larger, return i_size.
  		 */
fc46820b2   Andreas Gruenbacher   vfs: Return -ENXI...
120
  		if ((unsigned long long)offset >= eof)
982d81658   Josef Bacik   fs: add SEEK_HOLE...
121
  			return -ENXIO;
e8b96eb50   Eric Sandeen   vfs: allow custom...
122
  		offset = eof;
982d81658   Josef Bacik   fs: add SEEK_HOLE...
123
  		break;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
124
  	}
3a8cff4f0   Christoph Hellwig   [PATCH] generic_f...
125

46a1c2c7a   Jie Liu   vfs: export lseek...
126
  	return vfs_setpos(file, offset, maxsize);
5760495a8   Andi Kleen   vfs: add generic_...
127
128
129
130
131
132
133
  }
  EXPORT_SYMBOL(generic_file_llseek_size);
  
  /**
   * generic_file_llseek - generic llseek implementation for regular files
   * @file:	file structure to seek on
   * @offset:	file offset to seek to
965c8e59c   Andrew Morton   lseek: the "whenc...
134
   * @whence:	type of seek
5760495a8   Andi Kleen   vfs: add generic_...
135
136
137
   *
   * This is a generic implemenation of ->llseek useable for all normal local
   * filesystems.  It just updates the file offset to the value specified by
546ae2d2f   Ming Lei   fs/read_write.c: ...
138
   * @offset and @whence.
5760495a8   Andi Kleen   vfs: add generic_...
139
   */
965c8e59c   Andrew Morton   lseek: the "whenc...
140
  loff_t generic_file_llseek(struct file *file, loff_t offset, int whence)
5760495a8   Andi Kleen   vfs: add generic_...
141
142
  {
  	struct inode *inode = file->f_mapping->host;
965c8e59c   Andrew Morton   lseek: the "whenc...
143
  	return generic_file_llseek_size(file, offset, whence,
e8b96eb50   Eric Sandeen   vfs: allow custom...
144
145
  					inode->i_sb->s_maxbytes,
  					i_size_read(inode));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
146
  }
9465efc9e   Andi Kleen   Remove BKL from r...
147
  EXPORT_SYMBOL(generic_file_llseek);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
148

ae6afc3f5   jan Blunck   vfs: introduce no...
149
  /**
1bf9d14df   Al Viro   new helper: fixed...
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
   * fixed_size_llseek - llseek implementation for fixed-sized devices
   * @file:	file structure to seek on
   * @offset:	file offset to seek to
   * @whence:	type of seek
   * @size:	size of the file
   *
   */
  loff_t fixed_size_llseek(struct file *file, loff_t offset, int whence, loff_t size)
  {
  	switch (whence) {
  	case SEEK_SET: case SEEK_CUR: case SEEK_END:
  		return generic_file_llseek_size(file, offset, whence,
  						size, size);
  	default:
  		return -EINVAL;
  	}
  }
  EXPORT_SYMBOL(fixed_size_llseek);
  
  /**
b25472f9b   Al Viro   new helpers: no_s...
170
171
172
173
174
175
176
177
178
179
180
   * no_seek_end_llseek - llseek implementation for fixed-sized devices
   * @file:	file structure to seek on
   * @offset:	file offset to seek to
   * @whence:	type of seek
   *
   */
  loff_t no_seek_end_llseek(struct file *file, loff_t offset, int whence)
  {
  	switch (whence) {
  	case SEEK_SET: case SEEK_CUR:
  		return generic_file_llseek_size(file, offset, whence,
2feb55f89   Wouter van Kesteren   fs: allow no_seek...
181
  						OFFSET_MAX, 0);
b25472f9b   Al Viro   new helpers: no_s...
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
  	default:
  		return -EINVAL;
  	}
  }
  EXPORT_SYMBOL(no_seek_end_llseek);
  
  /**
   * no_seek_end_llseek_size - llseek implementation for fixed-sized devices
   * @file:	file structure to seek on
   * @offset:	file offset to seek to
   * @whence:	type of seek
   * @size:	maximal offset allowed
   *
   */
  loff_t no_seek_end_llseek_size(struct file *file, loff_t offset, int whence, loff_t size)
  {
  	switch (whence) {
  	case SEEK_SET: case SEEK_CUR:
  		return generic_file_llseek_size(file, offset, whence,
  						size, 0);
  	default:
  		return -EINVAL;
  	}
  }
  EXPORT_SYMBOL(no_seek_end_llseek_size);
  
  /**
ae6afc3f5   jan Blunck   vfs: introduce no...
209
210
211
   * noop_llseek - No Operation Performed llseek implementation
   * @file:	file structure to seek on
   * @offset:	file offset to seek to
965c8e59c   Andrew Morton   lseek: the "whenc...
212
   * @whence:	type of seek
ae6afc3f5   jan Blunck   vfs: introduce no...
213
214
215
216
217
218
   *
   * This is an implementation of ->llseek useable for the rare special case when
   * userspace expects the seek to succeed but the (device) file is actually not
   * able to perform the seek. In this case you use noop_llseek() instead of
   * falling back to the default implementation of ->llseek.
   */
965c8e59c   Andrew Morton   lseek: the "whenc...
219
  loff_t noop_llseek(struct file *file, loff_t offset, int whence)
ae6afc3f5   jan Blunck   vfs: introduce no...
220
221
222
223
  {
  	return file->f_pos;
  }
  EXPORT_SYMBOL(noop_llseek);
965c8e59c   Andrew Morton   lseek: the "whenc...
224
  loff_t no_llseek(struct file *file, loff_t offset, int whence)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
225
226
227
228
  {
  	return -ESPIPE;
  }
  EXPORT_SYMBOL(no_llseek);
965c8e59c   Andrew Morton   lseek: the "whenc...
229
  loff_t default_llseek(struct file *file, loff_t offset, int whence)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
230
  {
496ad9aa8   Al Viro   new helper: file_...
231
  	struct inode *inode = file_inode(file);
16abef0e9   David Sterba   fs: use loff_t ty...
232
  	loff_t retval;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
233

5955102c9   Al Viro   wrappers for ->i_...
234
  	inode_lock(inode);
965c8e59c   Andrew Morton   lseek: the "whenc...
235
  	switch (whence) {
7b8e89249   Chris Snook   use symbolic cons...
236
  		case SEEK_END:
982d81658   Josef Bacik   fs: add SEEK_HOLE...
237
  			offset += i_size_read(inode);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
238
  			break;
7b8e89249   Chris Snook   use symbolic cons...
239
  		case SEEK_CUR:
5b6f1eb97   Alain Knaff   vfs: lseek(fd, 0,...
240
241
242
243
  			if (offset == 0) {
  				retval = file->f_pos;
  				goto out;
  			}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
244
  			offset += file->f_pos;
982d81658   Josef Bacik   fs: add SEEK_HOLE...
245
246
247
248
249
250
251
  			break;
  		case SEEK_DATA:
  			/*
  			 * In the generic case the entire file is data, so as
  			 * long as offset isn't at the end of the file then the
  			 * offset is data.
  			 */
bacb2d816   Dan Carpenter   fs: add missing u...
252
253
254
255
  			if (offset >= inode->i_size) {
  				retval = -ENXIO;
  				goto out;
  			}
982d81658   Josef Bacik   fs: add SEEK_HOLE...
256
257
258
259
260
261
262
  			break;
  		case SEEK_HOLE:
  			/*
  			 * There is a virtual hole at the end of the file, so
  			 * as long as offset isn't i_size or larger, return
  			 * i_size.
  			 */
bacb2d816   Dan Carpenter   fs: add missing u...
263
264
265
266
  			if (offset >= inode->i_size) {
  				retval = -ENXIO;
  				goto out;
  			}
982d81658   Josef Bacik   fs: add SEEK_HOLE...
267
268
  			offset = inode->i_size;
  			break;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
269
270
  	}
  	retval = -EINVAL;
cccb5a1e6   Al Viro   fix signedness me...
271
  	if (offset >= 0 || unsigned_offsets(file)) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
272
273
274
275
276
277
  		if (offset != file->f_pos) {
  			file->f_pos = offset;
  			file->f_version = 0;
  		}
  		retval = offset;
  	}
5b6f1eb97   Alain Knaff   vfs: lseek(fd, 0,...
278
  out:
5955102c9   Al Viro   wrappers for ->i_...
279
  	inode_unlock(inode);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
280
281
282
  	return retval;
  }
  EXPORT_SYMBOL(default_llseek);
965c8e59c   Andrew Morton   lseek: the "whenc...
283
  loff_t vfs_llseek(struct file *file, loff_t offset, int whence)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
284
285
286
287
288
  {
  	loff_t (*fn)(struct file *, loff_t, int);
  
  	fn = no_llseek;
  	if (file->f_mode & FMODE_LSEEK) {
72c2d5319   Al Viro   file->f_op is nev...
289
  		if (file->f_op->llseek)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
290
291
  			fn = file->f_op->llseek;
  	}
965c8e59c   Andrew Morton   lseek: the "whenc...
292
  	return fn(file, offset, whence);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
293
294
  }
  EXPORT_SYMBOL(vfs_llseek);
76847e434   Dominik Brodowski   fs: add ksys_lsee...
295
  off_t ksys_lseek(unsigned int fd, off_t offset, unsigned int whence)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
296
297
  {
  	off_t retval;
9c225f265   Linus Torvalds   vfs: atomic f_pos...
298
  	struct fd f = fdget_pos(fd);
2903ff019   Al Viro   switch simple cas...
299
300
  	if (!f.file)
  		return -EBADF;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
301
302
  
  	retval = -EINVAL;
965c8e59c   Andrew Morton   lseek: the "whenc...
303
304
  	if (whence <= SEEK_MAX) {
  		loff_t res = vfs_llseek(f.file, offset, whence);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
305
306
307
308
  		retval = res;
  		if (res != (loff_t)retval)
  			retval = -EOVERFLOW;	/* LFS: should only happen on 32 bit platforms */
  	}
9c225f265   Linus Torvalds   vfs: atomic f_pos...
309
  	fdput_pos(f);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
310
311
  	return retval;
  }
76847e434   Dominik Brodowski   fs: add ksys_lsee...
312
313
314
315
  SYSCALL_DEFINE3(lseek, unsigned int, fd, off_t, offset, unsigned int, whence)
  {
  	return ksys_lseek(fd, offset, whence);
  }
561c67319   Al Viro   switch lseek to C...
316
317
318
  #ifdef CONFIG_COMPAT
  COMPAT_SYSCALL_DEFINE3(lseek, unsigned int, fd, compat_off_t, offset, unsigned int, whence)
  {
76847e434   Dominik Brodowski   fs: add ksys_lsee...
319
  	return ksys_lseek(fd, offset, whence);
561c67319   Al Viro   switch lseek to C...
320
321
  }
  #endif
caf6f9c8a   Arnd Bergmann   asm-generic: Remo...
322
  #if !defined(CONFIG_64BIT) || defined(CONFIG_COMPAT)
003d7ab47   Heiko Carstens   [CVE-2009-0029] S...
323
324
  SYSCALL_DEFINE5(llseek, unsigned int, fd, unsigned long, offset_high,
  		unsigned long, offset_low, loff_t __user *, result,
965c8e59c   Andrew Morton   lseek: the "whenc...
325
  		unsigned int, whence)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
326
327
  {
  	int retval;
d7a15f8d0   Eric Biggers   vfs: atomic f_pos...
328
  	struct fd f = fdget_pos(fd);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
329
  	loff_t offset;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
330

2903ff019   Al Viro   switch simple cas...
331
332
  	if (!f.file)
  		return -EBADF;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
333
334
  
  	retval = -EINVAL;
965c8e59c   Andrew Morton   lseek: the "whenc...
335
  	if (whence > SEEK_MAX)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
336
  		goto out_putf;
2903ff019   Al Viro   switch simple cas...
337
  	offset = vfs_llseek(f.file, ((loff_t) offset_high << 32) | offset_low,
965c8e59c   Andrew Morton   lseek: the "whenc...
338
  			whence);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
339
340
341
342
343
344
345
346
  
  	retval = (int)offset;
  	if (offset >= 0) {
  		retval = -EFAULT;
  		if (!copy_to_user(result, &offset, sizeof(offset)))
  			retval = 0;
  	}
  out_putf:
d7a15f8d0   Eric Biggers   vfs: atomic f_pos...
347
  	fdput_pos(f);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
348
349
350
  	return retval;
  }
  #endif
68d70d03f   Al Viro   constify rw_verif...
351
  int rw_verify_area(int read_write, struct file *file, const loff_t *ppos, size_t count)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
352
353
  {
  	struct inode *inode;
c43e259cc   James Morris   security: call se...
354
  	int retval = -EINVAL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
355

496ad9aa8   Al Viro   new helper: file_...
356
  	inode = file_inode(file);
e28cc7157   Linus Torvalds   Relax the rw_veri...
357
  	if (unlikely((ssize_t) count < 0))
c43e259cc   James Morris   security: call se...
358
  		return retval;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
359

438ab720c   Kirill Smelkov   vfs: pass ppos=NU...
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
  	/*
  	 * ranged mandatory locking does not apply to streams - it makes sense
  	 * only for files where position has a meaning.
  	 */
  	if (ppos) {
  		loff_t pos = *ppos;
  
  		if (unlikely(pos < 0)) {
  			if (!unsigned_offsets(file))
  				return retval;
  			if (count >= -pos) /* both values are in 0..LLONG_MAX */
  				return -EOVERFLOW;
  		} else if (unlikely((loff_t) (pos + count) < 0)) {
  			if (!unsigned_offsets(file))
  				return retval;
  		}
  
  		if (unlikely(inode->i_flctx && mandatory_lock(inode))) {
  			retval = locks_mandatory_area(inode, file, pos, pos + count - 1,
  					read_write == READ ? F_RDLCK : F_WRLCK);
  			if (retval < 0)
  				return retval;
  		}
e28cc7157   Linus Torvalds   Relax the rw_veri...
383
  	}
438ab720c   Kirill Smelkov   vfs: pass ppos=NU...
384

bc61384dc   Al Viro   rw_verify_area():...
385
  	return security_file_permission(file,
c43e259cc   James Morris   security: call se...
386
  				read_write == READ ? MAY_READ : MAY_WRITE);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
387
  }
5d5d56897   Al Viro   make new_sync_{re...
388
  static ssize_t new_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos)
293bc9822   Al Viro   new methods: ->re...
389
390
391
392
393
394
395
  {
  	struct iovec iov = { .iov_base = buf, .iov_len = len };
  	struct kiocb kiocb;
  	struct iov_iter iter;
  	ssize_t ret;
  
  	init_sync_kiocb(&kiocb, filp);
438ab720c   Kirill Smelkov   vfs: pass ppos=NU...
396
  	kiocb.ki_pos = (ppos ? *ppos : 0);
293bc9822   Al Viro   new methods: ->re...
397
  	iov_iter_init(&iter, READ, &iov, 1, len);
bb7462b6f   Miklos Szeredi   vfs: use helpers ...
398
  	ret = call_read_iter(filp, &kiocb, &iter);
599bd19bd   Christoph Hellwig   fs: don't allow t...
399
  	BUG_ON(ret == -EIOCBQUEUED);
438ab720c   Kirill Smelkov   vfs: pass ppos=NU...
400
401
  	if (ppos)
  		*ppos = kiocb.ki_pos;
293bc9822   Al Viro   new methods: ->re...
402
403
  	return ret;
  }
6fb5032eb   Dmitry Kasatkin   VFS: refactor vfs...
404
405
406
  ssize_t __vfs_read(struct file *file, char __user *buf, size_t count,
  		   loff_t *pos)
  {
6fb5032eb   Dmitry Kasatkin   VFS: refactor vfs...
407
  	if (file->f_op->read)
3d04c8a17   Al Viro   export __vfs_read()
408
  		return file->f_op->read(file, buf, count, pos);
6fb5032eb   Dmitry Kasatkin   VFS: refactor vfs...
409
  	else if (file->f_op->read_iter)
3d04c8a17   Al Viro   export __vfs_read()
410
  		return new_sync_read(file, buf, count, pos);
6fb5032eb   Dmitry Kasatkin   VFS: refactor vfs...
411
  	else
3d04c8a17   Al Viro   export __vfs_read()
412
  		return -EINVAL;
6fb5032eb   Dmitry Kasatkin   VFS: refactor vfs...
413
  }
bdd1d2d3d   Christoph Hellwig   fs: fix kernel_re...
414
  ssize_t kernel_read(struct file *file, void *buf, size_t count, loff_t *pos)
c41fbad01   Christoph Hellwig   fs: move kernel_r...
415
416
  {
  	mm_segment_t old_fs;
bdd1d2d3d   Christoph Hellwig   fs: fix kernel_re...
417
  	ssize_t result;
c41fbad01   Christoph Hellwig   fs: move kernel_r...
418
419
  
  	old_fs = get_fs();
736706bee   Linus Torvalds   get rid of legacy...
420
  	set_fs(KERNEL_DS);
c41fbad01   Christoph Hellwig   fs: move kernel_r...
421
  	/* The cast to a user pointer is valid due to the set_fs() */
bdd1d2d3d   Christoph Hellwig   fs: fix kernel_re...
422
  	result = vfs_read(file, (void __user *)buf, count, pos);
c41fbad01   Christoph Hellwig   fs: move kernel_r...
423
424
425
426
  	set_fs(old_fs);
  	return result;
  }
  EXPORT_SYMBOL(kernel_read);
6fb5032eb   Dmitry Kasatkin   VFS: refactor vfs...
427

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
428
429
430
431
432
433
  ssize_t vfs_read(struct file *file, char __user *buf, size_t count, loff_t *pos)
  {
  	ssize_t ret;
  
  	if (!(file->f_mode & FMODE_READ))
  		return -EBADF;
7f7f25e82   Al Viro   replace checking ...
434
  	if (!(file->f_mode & FMODE_CAN_READ))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
435
  		return -EINVAL;
96d4f267e   Linus Torvalds   Remove 'type' arg...
436
  	if (unlikely(!access_ok(buf, count)))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
437
438
439
  		return -EFAULT;
  
  	ret = rw_verify_area(READ, file, pos, count);
bc61384dc   Al Viro   rw_verify_area():...
440
441
442
  	if (!ret) {
  		if (count > MAX_RW_COUNT)
  			count =  MAX_RW_COUNT;
6fb5032eb   Dmitry Kasatkin   VFS: refactor vfs...
443
  		ret = __vfs_read(file, buf, count, pos);
c43e259cc   James Morris   security: call se...
444
  		if (ret > 0) {
2a12a9d78   Eric Paris   fsnotify: pass a ...
445
  			fsnotify_access(file);
c43e259cc   James Morris   security: call se...
446
  			add_rchar(current, ret);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
447
  		}
c43e259cc   James Morris   security: call se...
448
  		inc_syscr(current);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
449
450
451
452
  	}
  
  	return ret;
  }
5d5d56897   Al Viro   make new_sync_{re...
453
  static ssize_t new_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos)
293bc9822   Al Viro   new methods: ->re...
454
455
456
457
458
459
460
  {
  	struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = len };
  	struct kiocb kiocb;
  	struct iov_iter iter;
  	ssize_t ret;
  
  	init_sync_kiocb(&kiocb, filp);
438ab720c   Kirill Smelkov   vfs: pass ppos=NU...
461
  	kiocb.ki_pos = (ppos ? *ppos : 0);
293bc9822   Al Viro   new methods: ->re...
462
  	iov_iter_init(&iter, WRITE, &iov, 1, len);
bb7462b6f   Miklos Szeredi   vfs: use helpers ...
463
  	ret = call_write_iter(filp, &kiocb, &iter);
599bd19bd   Christoph Hellwig   fs: don't allow t...
464
  	BUG_ON(ret == -EIOCBQUEUED);
438ab720c   Kirill Smelkov   vfs: pass ppos=NU...
465
  	if (ret > 0 && ppos)
f765b134c   Al Viro   new_sync_write():...
466
  		*ppos = kiocb.ki_pos;
293bc9822   Al Viro   new methods: ->re...
467
468
  	return ret;
  }
12e1e7af1   Geert Uytterhoeven   vfs: Make __vfs_w...
469
470
  static ssize_t __vfs_write(struct file *file, const char __user *p,
  			   size_t count, loff_t *pos)
493c84c07   Al Viro   new helper: __vfs...
471
472
473
  {
  	if (file->f_op->write)
  		return file->f_op->write(file, p, count, pos);
493c84c07   Al Viro   new helper: __vfs...
474
475
476
477
478
  	else if (file->f_op->write_iter)
  		return new_sync_write(file, p, count, pos);
  	else
  		return -EINVAL;
  }
493c84c07   Al Viro   new helper: __vfs...
479

73e18f7c0   Christoph Hellwig   fs: make the buf ...
480
  ssize_t __kernel_write(struct file *file, const void *buf, size_t count, loff_t *pos)
06ae43f34   Al Viro   Don't bother with...
481
482
483
484
  {
  	mm_segment_t old_fs;
  	const char __user *p;
  	ssize_t ret;
7f7f25e82   Al Viro   replace checking ...
485
  	if (!(file->f_mode & FMODE_CAN_WRITE))
3e84f48ed   Al Viro   vfs/splice: Fix m...
486
  		return -EINVAL;
06ae43f34   Al Viro   Don't bother with...
487
  	old_fs = get_fs();
736706bee   Linus Torvalds   get rid of legacy...
488
  	set_fs(KERNEL_DS);
06ae43f34   Al Viro   Don't bother with...
489
490
491
  	p = (__force const char __user *)buf;
  	if (count > MAX_RW_COUNT)
  		count =  MAX_RW_COUNT;
493c84c07   Al Viro   new helper: __vfs...
492
  	ret = __vfs_write(file, p, count, pos);
06ae43f34   Al Viro   Don't bother with...
493
494
495
496
497
498
499
500
  	set_fs(old_fs);
  	if (ret > 0) {
  		fsnotify_modify(file);
  		add_wchar(current, ret);
  	}
  	inc_syscw(current);
  	return ret;
  }
2ec3a12a6   Al Viro   cachefiles_write_...
501
  EXPORT_SYMBOL(__kernel_write);
e13ec939e   Christoph Hellwig   fs: fix kernel_wr...
502
503
  ssize_t kernel_write(struct file *file, const void *buf, size_t count,
  			    loff_t *pos)
ac452acae   Christoph Hellwig   fs: move kernel_w...
504
505
506
507
508
  {
  	mm_segment_t old_fs;
  	ssize_t res;
  
  	old_fs = get_fs();
736706bee   Linus Torvalds   get rid of legacy...
509
  	set_fs(KERNEL_DS);
ac452acae   Christoph Hellwig   fs: move kernel_w...
510
  	/* The cast to a user pointer is valid due to the set_fs() */
e13ec939e   Christoph Hellwig   fs: fix kernel_wr...
511
  	res = vfs_write(file, (__force const char __user *)buf, count, pos);
ac452acae   Christoph Hellwig   fs: move kernel_w...
512
513
514
515
516
  	set_fs(old_fs);
  
  	return res;
  }
  EXPORT_SYMBOL(kernel_write);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
517
518
519
520
521
522
  ssize_t vfs_write(struct file *file, const char __user *buf, size_t count, loff_t *pos)
  {
  	ssize_t ret;
  
  	if (!(file->f_mode & FMODE_WRITE))
  		return -EBADF;
7f7f25e82   Al Viro   replace checking ...
523
  	if (!(file->f_mode & FMODE_CAN_WRITE))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
524
  		return -EINVAL;
96d4f267e   Linus Torvalds   Remove 'type' arg...
525
  	if (unlikely(!access_ok(buf, count)))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
526
527
528
  		return -EFAULT;
  
  	ret = rw_verify_area(WRITE, file, pos, count);
bc61384dc   Al Viro   rw_verify_area():...
529
530
531
  	if (!ret) {
  		if (count > MAX_RW_COUNT)
  			count =  MAX_RW_COUNT;
03d95eb2f   Al Viro   lift sb_start_wri...
532
  		file_start_write(file);
493c84c07   Al Viro   new helper: __vfs...
533
  		ret = __vfs_write(file, buf, count, pos);
c43e259cc   James Morris   security: call se...
534
  		if (ret > 0) {
2a12a9d78   Eric Paris   fsnotify: pass a ...
535
  			fsnotify_modify(file);
c43e259cc   James Morris   security: call se...
536
  			add_wchar(current, ret);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
537
  		}
c43e259cc   James Morris   security: call se...
538
  		inc_syscw(current);
03d95eb2f   Al Viro   lift sb_start_wri...
539
  		file_end_write(file);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
540
541
542
543
  	}
  
  	return ret;
  }
438ab720c   Kirill Smelkov   vfs: pass ppos=NU...
544
545
  /* file_ppos returns &file->f_pos or NULL if file is stream */
  static inline loff_t *file_ppos(struct file *file)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
546
  {
438ab720c   Kirill Smelkov   vfs: pass ppos=NU...
547
  	return file->f_mode & FMODE_STREAM ? NULL : &file->f_pos;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
548
  }
3ce4a7bf6   Dominik Brodowski   fs: add ksys_read...
549
  ssize_t ksys_read(unsigned int fd, char __user *buf, size_t count)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
550
  {
9c225f265   Linus Torvalds   vfs: atomic f_pos...
551
  	struct fd f = fdget_pos(fd);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
552
  	ssize_t ret = -EBADF;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
553

2903ff019   Al Viro   switch simple cas...
554
  	if (f.file) {
438ab720c   Kirill Smelkov   vfs: pass ppos=NU...
555
556
557
558
559
560
561
562
  		loff_t pos, *ppos = file_ppos(f.file);
  		if (ppos) {
  			pos = *ppos;
  			ppos = &pos;
  		}
  		ret = vfs_read(f.file, buf, count, ppos);
  		if (ret >= 0 && ppos)
  			f.file->f_pos = pos;
9c225f265   Linus Torvalds   vfs: atomic f_pos...
563
  		fdput_pos(f);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
564
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
565
566
  	return ret;
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
567

3ce4a7bf6   Dominik Brodowski   fs: add ksys_read...
568
569
570
571
  SYSCALL_DEFINE3(read, unsigned int, fd, char __user *, buf, size_t, count)
  {
  	return ksys_read(fd, buf, count);
  }
e7a3e8b2e   Dominik Brodowski   fs: add ksys_writ...
572
  ssize_t ksys_write(unsigned int fd, const char __user *buf, size_t count)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
573
  {
9c225f265   Linus Torvalds   vfs: atomic f_pos...
574
  	struct fd f = fdget_pos(fd);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
575
  	ssize_t ret = -EBADF;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
576

2903ff019   Al Viro   switch simple cas...
577
  	if (f.file) {
438ab720c   Kirill Smelkov   vfs: pass ppos=NU...
578
579
580
581
582
583
584
585
  		loff_t pos, *ppos = file_ppos(f.file);
  		if (ppos) {
  			pos = *ppos;
  			ppos = &pos;
  		}
  		ret = vfs_write(f.file, buf, count, ppos);
  		if (ret >= 0 && ppos)
  			f.file->f_pos = pos;
9c225f265   Linus Torvalds   vfs: atomic f_pos...
586
  		fdput_pos(f);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
587
588
589
590
  	}
  
  	return ret;
  }
e7a3e8b2e   Dominik Brodowski   fs: add ksys_writ...
591
592
593
594
595
  SYSCALL_DEFINE3(write, unsigned int, fd, const char __user *, buf,
  		size_t, count)
  {
  	return ksys_write(fd, buf, count);
  }
36028d5dd   Dominik Brodowski   fs: add ksys_p{re...
596
597
  ssize_t ksys_pread64(unsigned int fd, char __user *buf, size_t count,
  		     loff_t pos)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
598
  {
2903ff019   Al Viro   switch simple cas...
599
  	struct fd f;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
600
  	ssize_t ret = -EBADF;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
601
602
603
  
  	if (pos < 0)
  		return -EINVAL;
2903ff019   Al Viro   switch simple cas...
604
605
  	f = fdget(fd);
  	if (f.file) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
606
  		ret = -ESPIPE;
2903ff019   Al Viro   switch simple cas...
607
608
609
  		if (f.file->f_mode & FMODE_PREAD)
  			ret = vfs_read(f.file, buf, count, &pos);
  		fdput(f);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
610
611
612
613
  	}
  
  	return ret;
  }
36028d5dd   Dominik Brodowski   fs: add ksys_p{re...
614
615
616
617
618
619
620
621
  SYSCALL_DEFINE4(pread64, unsigned int, fd, char __user *, buf,
  			size_t, count, loff_t, pos)
  {
  	return ksys_pread64(fd, buf, count, pos);
  }
  
  ssize_t ksys_pwrite64(unsigned int fd, const char __user *buf,
  		      size_t count, loff_t pos)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
622
  {
2903ff019   Al Viro   switch simple cas...
623
  	struct fd f;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
624
  	ssize_t ret = -EBADF;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
625
626
627
  
  	if (pos < 0)
  		return -EINVAL;
2903ff019   Al Viro   switch simple cas...
628
629
  	f = fdget(fd);
  	if (f.file) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
630
  		ret = -ESPIPE;
2903ff019   Al Viro   switch simple cas...
631
632
633
  		if (f.file->f_mode & FMODE_PWRITE)  
  			ret = vfs_write(f.file, buf, count, &pos);
  		fdput(f);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
634
635
636
637
  	}
  
  	return ret;
  }
36028d5dd   Dominik Brodowski   fs: add ksys_p{re...
638
639
640
641
642
  SYSCALL_DEFINE4(pwrite64, unsigned int, fd, const char __user *, buf,
  			 size_t, count, loff_t, pos)
  {
  	return ksys_pwrite64(fd, buf, count, pos);
  }
ac15ac066   Al Viro   lift iov_iter int...
643
  static ssize_t do_iter_readv_writev(struct file *filp, struct iov_iter *iter,
ddef7ed2b   Christoph Hellwig   annotate RWF_... ...
644
  		loff_t *ppos, int type, rwf_t flags)
293bc9822   Al Viro   new methods: ->re...
645
646
  {
  	struct kiocb kiocb;
293bc9822   Al Viro   new methods: ->re...
647
648
649
  	ssize_t ret;
  
  	init_sync_kiocb(&kiocb, filp);
fdd2f5b7d   Goldwyn Rodrigues   fs: Separate out ...
650
651
652
  	ret = kiocb_set_rw_flags(&kiocb, flags);
  	if (ret)
  		return ret;
438ab720c   Kirill Smelkov   vfs: pass ppos=NU...
653
  	kiocb.ki_pos = (ppos ? *ppos : 0);
293bc9822   Al Viro   new methods: ->re...
654

0f78d06ac   Miklos Szeredi   vfs: pass type in...
655
  	if (type == READ)
bb7462b6f   Miklos Szeredi   vfs: use helpers ...
656
  		ret = call_read_iter(filp, &kiocb, iter);
0f78d06ac   Miklos Szeredi   vfs: pass type in...
657
  	else
bb7462b6f   Miklos Szeredi   vfs: use helpers ...
658
  		ret = call_write_iter(filp, &kiocb, iter);
599bd19bd   Christoph Hellwig   fs: don't allow t...
659
  	BUG_ON(ret == -EIOCBQUEUED);
438ab720c   Kirill Smelkov   vfs: pass ppos=NU...
660
661
  	if (ppos)
  		*ppos = kiocb.ki_pos;
293bc9822   Al Viro   new methods: ->re...
662
663
  	return ret;
  }
ee0b3e671   Badari Pulavarty   [PATCH] Remove re...
664
  /* Do it by hand, with file-ops */
ac15ac066   Al Viro   lift iov_iter int...
665
  static ssize_t do_loop_readv_writev(struct file *filp, struct iov_iter *iter,
ddef7ed2b   Christoph Hellwig   annotate RWF_... ...
666
  		loff_t *ppos, int type, rwf_t flags)
ee0b3e671   Badari Pulavarty   [PATCH] Remove re...
667
  {
ee0b3e671   Badari Pulavarty   [PATCH] Remove re...
668
  	ssize_t ret = 0;
97be7ebe5   Christoph Hellwig   vfs: add the RWF_...
669
  	if (flags & ~RWF_HIPRI)
793b80ef1   Christoph Hellwig   vfs: pass a flags...
670
  		return -EOPNOTSUPP;
ac15ac066   Al Viro   lift iov_iter int...
671
672
  	while (iov_iter_count(iter)) {
  		struct iovec iovec = iov_iter_iovec(iter);
ee0b3e671   Badari Pulavarty   [PATCH] Remove re...
673
  		ssize_t nr;
0f78d06ac   Miklos Szeredi   vfs: pass type in...
674
675
676
677
678
679
680
  		if (type == READ) {
  			nr = filp->f_op->read(filp, iovec.iov_base,
  					      iovec.iov_len, ppos);
  		} else {
  			nr = filp->f_op->write(filp, iovec.iov_base,
  					       iovec.iov_len, ppos);
  		}
ee0b3e671   Badari Pulavarty   [PATCH] Remove re...
681
682
683
684
685
686
687
  
  		if (nr < 0) {
  			if (!ret)
  				ret = nr;
  			break;
  		}
  		ret += nr;
ac15ac066   Al Viro   lift iov_iter int...
688
  		if (nr != iovec.iov_len)
ee0b3e671   Badari Pulavarty   [PATCH] Remove re...
689
  			break;
ac15ac066   Al Viro   lift iov_iter int...
690
  		iov_iter_advance(iter, nr);
ee0b3e671   Badari Pulavarty   [PATCH] Remove re...
691
692
693
694
  	}
  
  	return ret;
  }
ffecee4f2   Vegard Nossum   iov_iter: kernel-...
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
  /**
   * rw_copy_check_uvector() - Copy an array of &struct iovec from userspace
   *     into the kernel and check that it is valid.
   *
   * @type: One of %CHECK_IOVEC_ONLY, %READ, or %WRITE.
   * @uvector: Pointer to the userspace array.
   * @nr_segs: Number of elements in userspace array.
   * @fast_segs: Number of elements in @fast_pointer.
   * @fast_pointer: Pointer to (usually small on-stack) kernel array.
   * @ret_pointer: (output parameter) Pointer to a variable that will point to
   *     either @fast_pointer, a newly allocated kernel array, or NULL,
   *     depending on which array was used.
   *
   * This function copies an array of &struct iovec of @nr_segs from
   * userspace into the kernel and checks that each element is valid (e.g.
   * it does not point to a kernel address or cause overflow by being too
   * large, etc.).
   *
   * As an optimization, the caller may provide a pointer to a small
   * on-stack array in @fast_pointer, typically %UIO_FASTIOV elements long
   * (the size of this array, or 0 if unused, should be given in @fast_segs).
   *
   * @ret_pointer will always point to the array that was used, so the
   * caller must take care not to call kfree() on it e.g. in case the
   * @fast_pointer array was used and it was allocated on the stack.
   *
   * Return: The total number of bytes covered by the iovec array on success
   *   or a negative error code on error.
   */
eed4e51fb   Badari Pulavarty   [PATCH] Add vecto...
724
725
726
  ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector,
  			      unsigned long nr_segs, unsigned long fast_segs,
  			      struct iovec *fast_pointer,
ac34ebb3a   Christopher Yeoh   aio/vfs: cleanup ...
727
  			      struct iovec **ret_pointer)
435f49a51   Linus Torvalds   readv/writev: do ...
728
  {
eed4e51fb   Badari Pulavarty   [PATCH] Add vecto...
729
  	unsigned long seg;
435f49a51   Linus Torvalds   readv/writev: do ...
730
  	ssize_t ret;
eed4e51fb   Badari Pulavarty   [PATCH] Add vecto...
731
  	struct iovec *iov = fast_pointer;
435f49a51   Linus Torvalds   readv/writev: do ...
732
733
734
735
736
  	/*
  	 * SuS says "The readv() function *may* fail if the iovcnt argument
  	 * was less than or equal to 0, or greater than {IOV_MAX}.  Linux has
  	 * traditionally returned zero for zero segments, so...
  	 */
eed4e51fb   Badari Pulavarty   [PATCH] Add vecto...
737
738
  	if (nr_segs == 0) {
  		ret = 0;
435f49a51   Linus Torvalds   readv/writev: do ...
739
  		goto out;
eed4e51fb   Badari Pulavarty   [PATCH] Add vecto...
740
  	}
435f49a51   Linus Torvalds   readv/writev: do ...
741
742
743
744
  	/*
  	 * First get the "struct iovec" from user memory and
  	 * verify all the pointers
  	 */
eed4e51fb   Badari Pulavarty   [PATCH] Add vecto...
745
746
  	if (nr_segs > UIO_MAXIOV) {
  		ret = -EINVAL;
435f49a51   Linus Torvalds   readv/writev: do ...
747
  		goto out;
eed4e51fb   Badari Pulavarty   [PATCH] Add vecto...
748
749
  	}
  	if (nr_segs > fast_segs) {
6da2ec560   Kees Cook   treewide: kmalloc...
750
  		iov = kmalloc_array(nr_segs, sizeof(struct iovec), GFP_KERNEL);
eed4e51fb   Badari Pulavarty   [PATCH] Add vecto...
751
752
  		if (iov == NULL) {
  			ret = -ENOMEM;
435f49a51   Linus Torvalds   readv/writev: do ...
753
  			goto out;
eed4e51fb   Badari Pulavarty   [PATCH] Add vecto...
754
  		}
435f49a51   Linus Torvalds   readv/writev: do ...
755
  	}
eed4e51fb   Badari Pulavarty   [PATCH] Add vecto...
756
757
  	if (copy_from_user(iov, uvector, nr_segs*sizeof(*uvector))) {
  		ret = -EFAULT;
435f49a51   Linus Torvalds   readv/writev: do ...
758
  		goto out;
eed4e51fb   Badari Pulavarty   [PATCH] Add vecto...
759
  	}
435f49a51   Linus Torvalds   readv/writev: do ...
760
  	/*
eed4e51fb   Badari Pulavarty   [PATCH] Add vecto...
761
762
763
764
  	 * According to the Single Unix Specification we should return EINVAL
  	 * if an element length is < 0 when cast to ssize_t or if the
  	 * total length would overflow the ssize_t return value of the
  	 * system call.
435f49a51   Linus Torvalds   readv/writev: do ...
765
766
767
768
  	 *
  	 * Linux caps all read/write calls to MAX_RW_COUNT, and avoids the
  	 * overflow case.
  	 */
eed4e51fb   Badari Pulavarty   [PATCH] Add vecto...
769
  	ret = 0;
435f49a51   Linus Torvalds   readv/writev: do ...
770
771
772
  	for (seg = 0; seg < nr_segs; seg++) {
  		void __user *buf = iov[seg].iov_base;
  		ssize_t len = (ssize_t)iov[seg].iov_len;
eed4e51fb   Badari Pulavarty   [PATCH] Add vecto...
773
774
775
  
  		/* see if we we're about to use an invalid len or if
  		 * it's about to overflow ssize_t */
435f49a51   Linus Torvalds   readv/writev: do ...
776
  		if (len < 0) {
eed4e51fb   Badari Pulavarty   [PATCH] Add vecto...
777
  			ret = -EINVAL;
435f49a51   Linus Torvalds   readv/writev: do ...
778
  			goto out;
eed4e51fb   Badari Pulavarty   [PATCH] Add vecto...
779
  		}
ac34ebb3a   Christopher Yeoh   aio/vfs: cleanup ...
780
  		if (type >= 0
96d4f267e   Linus Torvalds   Remove 'type' arg...
781
  		    && unlikely(!access_ok(buf, len))) {
eed4e51fb   Badari Pulavarty   [PATCH] Add vecto...
782
  			ret = -EFAULT;
435f49a51   Linus Torvalds   readv/writev: do ...
783
784
785
786
787
  			goto out;
  		}
  		if (len > MAX_RW_COUNT - ret) {
  			len = MAX_RW_COUNT - ret;
  			iov[seg].iov_len = len;
eed4e51fb   Badari Pulavarty   [PATCH] Add vecto...
788
  		}
eed4e51fb   Badari Pulavarty   [PATCH] Add vecto...
789
  		ret += len;
435f49a51   Linus Torvalds   readv/writev: do ...
790
  	}
eed4e51fb   Badari Pulavarty   [PATCH] Add vecto...
791
792
793
794
  out:
  	*ret_pointer = iov;
  	return ret;
  }
f50298556   Al Viro   move compat_rw_co...
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
  #ifdef CONFIG_COMPAT
  ssize_t compat_rw_copy_check_uvector(int type,
  		const struct compat_iovec __user *uvector, unsigned long nr_segs,
  		unsigned long fast_segs, struct iovec *fast_pointer,
  		struct iovec **ret_pointer)
  {
  	compat_ssize_t tot_len;
  	struct iovec *iov = *ret_pointer = fast_pointer;
  	ssize_t ret = 0;
  	int seg;
  
  	/*
  	 * SuS says "The readv() function *may* fail if the iovcnt argument
  	 * was less than or equal to 0, or greater than {IOV_MAX}.  Linux has
  	 * traditionally returned zero for zero segments, so...
  	 */
  	if (nr_segs == 0)
  		goto out;
  
  	ret = -EINVAL;
  	if (nr_segs > UIO_MAXIOV)
  		goto out;
  	if (nr_segs > fast_segs) {
  		ret = -ENOMEM;
6da2ec560   Kees Cook   treewide: kmalloc...
819
  		iov = kmalloc_array(nr_segs, sizeof(struct iovec), GFP_KERNEL);
f50298556   Al Viro   move compat_rw_co...
820
821
822
823
824
825
  		if (iov == NULL)
  			goto out;
  	}
  	*ret_pointer = iov;
  
  	ret = -EFAULT;
96d4f267e   Linus Torvalds   Remove 'type' arg...
826
  	if (!access_ok(uvector, nr_segs*sizeof(*uvector)))
f50298556   Al Viro   move compat_rw_co...
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
  		goto out;
  
  	/*
  	 * Single unix specification:
  	 * We should -EINVAL if an element length is not >= 0 and fitting an
  	 * ssize_t.
  	 *
  	 * In Linux, the total length is limited to MAX_RW_COUNT, there is
  	 * no overflow possibility.
  	 */
  	tot_len = 0;
  	ret = -EINVAL;
  	for (seg = 0; seg < nr_segs; seg++) {
  		compat_uptr_t buf;
  		compat_ssize_t len;
  
  		if (__get_user(len, &uvector->iov_len) ||
  		   __get_user(buf, &uvector->iov_base)) {
  			ret = -EFAULT;
  			goto out;
  		}
  		if (len < 0)	/* size_t not fitting in compat_ssize_t .. */
  			goto out;
  		if (type >= 0 &&
96d4f267e   Linus Torvalds   Remove 'type' arg...
851
  		    !access_ok(compat_ptr(buf), len)) {
f50298556   Al Viro   move compat_rw_co...
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
  			ret = -EFAULT;
  			goto out;
  		}
  		if (len > MAX_RW_COUNT - tot_len)
  			len = MAX_RW_COUNT - tot_len;
  		tot_len += len;
  		iov->iov_base = compat_ptr(buf);
  		iov->iov_len = (compat_size_t) len;
  		uvector++;
  		iov++;
  	}
  	ret = tot_len;
  
  out:
  	return ret;
  }
  #endif
19c735868   Christoph Hellwig   fs: remove __do_r...
869
  static ssize_t do_iter_read(struct file *file, struct iov_iter *iter,
ddef7ed2b   Christoph Hellwig   annotate RWF_... ...
870
  		loff_t *pos, rwf_t flags)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
871
  {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
872
  	size_t tot_len;
7687a7a44   Miklos Szeredi   vfs: extract comm...
873
  	ssize_t ret = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
874

edab5fe38   Christoph Hellwig   fs: move more cod...
875
876
877
878
  	if (!(file->f_mode & FMODE_READ))
  		return -EBADF;
  	if (!(file->f_mode & FMODE_CAN_READ))
  		return -EINVAL;
7687a7a44   Miklos Szeredi   vfs: extract comm...
879
  	tot_len = iov_iter_count(iter);
0504c074b   Al Viro   switch {compat_,}...
880
881
  	if (!tot_len)
  		goto out;
19c735868   Christoph Hellwig   fs: remove __do_r...
882
  	ret = rw_verify_area(READ, file, pos, tot_len);
e28cc7157   Linus Torvalds   Relax the rw_veri...
883
  	if (ret < 0)
19c735868   Christoph Hellwig   fs: remove __do_r...
884
  		return ret;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
885

19c735868   Christoph Hellwig   fs: remove __do_r...
886
887
  	if (file->f_op->read_iter)
  		ret = do_iter_readv_writev(file, iter, pos, READ, flags);
ee0b3e671   Badari Pulavarty   [PATCH] Remove re...
888
  	else
19c735868   Christoph Hellwig   fs: remove __do_r...
889
  		ret = do_loop_readv_writev(file, iter, pos, READ, flags);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
890
  out:
19c735868   Christoph Hellwig   fs: remove __do_r...
891
892
  	if (ret >= 0)
  		fsnotify_access(file);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
893
  	return ret;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
894
  }
5dcdc43e2   Jiufei Xue   vfs: add vfs_iocb...
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
  ssize_t vfs_iocb_iter_read(struct file *file, struct kiocb *iocb,
  			   struct iov_iter *iter)
  {
  	size_t tot_len;
  	ssize_t ret = 0;
  
  	if (!file->f_op->read_iter)
  		return -EINVAL;
  	if (!(file->f_mode & FMODE_READ))
  		return -EBADF;
  	if (!(file->f_mode & FMODE_CAN_READ))
  		return -EINVAL;
  
  	tot_len = iov_iter_count(iter);
  	if (!tot_len)
  		goto out;
  	ret = rw_verify_area(READ, file, &iocb->ki_pos, tot_len);
  	if (ret < 0)
  		return ret;
  
  	ret = call_read_iter(file, iocb, iter);
  out:
  	if (ret >= 0)
  		fsnotify_access(file);
  	return ret;
  }
  EXPORT_SYMBOL(vfs_iocb_iter_read);
18e9710ee   Christoph Hellwig   fs: implement vfs...
922
  ssize_t vfs_iter_read(struct file *file, struct iov_iter *iter, loff_t *ppos,
ddef7ed2b   Christoph Hellwig   annotate RWF_... ...
923
  		rwf_t flags)
7687a7a44   Miklos Szeredi   vfs: extract comm...
924
  {
18e9710ee   Christoph Hellwig   fs: implement vfs...
925
926
927
928
929
  	if (!file->f_op->read_iter)
  		return -EINVAL;
  	return do_iter_read(file, iter, ppos, flags);
  }
  EXPORT_SYMBOL(vfs_iter_read);
7687a7a44   Miklos Szeredi   vfs: extract comm...
930

19c735868   Christoph Hellwig   fs: remove __do_r...
931
  static ssize_t do_iter_write(struct file *file, struct iov_iter *iter,
ddef7ed2b   Christoph Hellwig   annotate RWF_... ...
932
  		loff_t *pos, rwf_t flags)
19c735868   Christoph Hellwig   fs: remove __do_r...
933
934
935
  {
  	size_t tot_len;
  	ssize_t ret = 0;
03d95eb2f   Al Viro   lift sb_start_wri...
936

edab5fe38   Christoph Hellwig   fs: move more cod...
937
938
939
940
  	if (!(file->f_mode & FMODE_WRITE))
  		return -EBADF;
  	if (!(file->f_mode & FMODE_CAN_WRITE))
  		return -EINVAL;
19c735868   Christoph Hellwig   fs: remove __do_r...
941
942
943
944
  	tot_len = iov_iter_count(iter);
  	if (!tot_len)
  		return 0;
  	ret = rw_verify_area(WRITE, file, pos, tot_len);
7687a7a44   Miklos Szeredi   vfs: extract comm...
945
946
  	if (ret < 0)
  		return ret;
19c735868   Christoph Hellwig   fs: remove __do_r...
947
948
949
950
  	if (file->f_op->write_iter)
  		ret = do_iter_readv_writev(file, iter, pos, WRITE, flags);
  	else
  		ret = do_loop_readv_writev(file, iter, pos, WRITE, flags);
19c735868   Christoph Hellwig   fs: remove __do_r...
951
952
  	if (ret > 0)
  		fsnotify_modify(file);
7687a7a44   Miklos Szeredi   vfs: extract comm...
953
954
  	return ret;
  }
5dcdc43e2   Jiufei Xue   vfs: add vfs_iocb...
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
  ssize_t vfs_iocb_iter_write(struct file *file, struct kiocb *iocb,
  			    struct iov_iter *iter)
  {
  	size_t tot_len;
  	ssize_t ret = 0;
  
  	if (!file->f_op->write_iter)
  		return -EINVAL;
  	if (!(file->f_mode & FMODE_WRITE))
  		return -EBADF;
  	if (!(file->f_mode & FMODE_CAN_WRITE))
  		return -EINVAL;
  
  	tot_len = iov_iter_count(iter);
  	if (!tot_len)
  		return 0;
  	ret = rw_verify_area(WRITE, file, &iocb->ki_pos, tot_len);
  	if (ret < 0)
  		return ret;
  
  	ret = call_write_iter(file, iocb, iter);
  	if (ret > 0)
  		fsnotify_modify(file);
  
  	return ret;
  }
  EXPORT_SYMBOL(vfs_iocb_iter_write);
abbb65899   Christoph Hellwig   fs: implement vfs...
982
  ssize_t vfs_iter_write(struct file *file, struct iov_iter *iter, loff_t *ppos,
ddef7ed2b   Christoph Hellwig   annotate RWF_... ...
983
  		rwf_t flags)
abbb65899   Christoph Hellwig   fs: implement vfs...
984
985
986
987
988
989
  {
  	if (!file->f_op->write_iter)
  		return -EINVAL;
  	return do_iter_write(file, iter, ppos, flags);
  }
  EXPORT_SYMBOL(vfs_iter_write);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
990
  ssize_t vfs_readv(struct file *file, const struct iovec __user *vec,
ddef7ed2b   Christoph Hellwig   annotate RWF_... ...
991
  		  unsigned long vlen, loff_t *pos, rwf_t flags)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
992
  {
7687a7a44   Miklos Szeredi   vfs: extract comm...
993
994
995
996
  	struct iovec iovstack[UIO_FASTIOV];
  	struct iovec *iov = iovstack;
  	struct iov_iter iter;
  	ssize_t ret;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
997

251b42a1d   Christoph Hellwig   fs: remove do_rea...
998
  	ret = import_iovec(READ, vec, vlen, ARRAY_SIZE(iovstack), &iov, &iter);
edab5fe38   Christoph Hellwig   fs: move more cod...
999
1000
1001
1002
  	if (ret >= 0) {
  		ret = do_iter_read(file, &iter, pos, flags);
  		kfree(iov);
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1003

251b42a1d   Christoph Hellwig   fs: remove do_rea...
1004
1005
  	return ret;
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1006

9725d4cef   Christoph Hellwig   fs: unexport vfs_...
1007
  static ssize_t vfs_writev(struct file *file, const struct iovec __user *vec,
ddef7ed2b   Christoph Hellwig   annotate RWF_... ...
1008
  		   unsigned long vlen, loff_t *pos, rwf_t flags)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1009
  {
251b42a1d   Christoph Hellwig   fs: remove do_rea...
1010
1011
1012
1013
  	struct iovec iovstack[UIO_FASTIOV];
  	struct iovec *iov = iovstack;
  	struct iov_iter iter;
  	ssize_t ret;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1014

251b42a1d   Christoph Hellwig   fs: remove do_rea...
1015
  	ret = import_iovec(WRITE, vec, vlen, ARRAY_SIZE(iovstack), &iov, &iter);
edab5fe38   Christoph Hellwig   fs: move more cod...
1016
  	if (ret >= 0) {
62473a2d6   Al Viro   move file_{start,...
1017
  		file_start_write(file);
edab5fe38   Christoph Hellwig   fs: move more cod...
1018
  		ret = do_iter_write(file, &iter, pos, flags);
62473a2d6   Al Viro   move file_{start,...
1019
  		file_end_write(file);
edab5fe38   Christoph Hellwig   fs: move more cod...
1020
1021
  		kfree(iov);
  	}
251b42a1d   Christoph Hellwig   fs: remove do_rea...
1022
  	return ret;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1023
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1024

f17d8b354   Milosz Tanski   vfs: vfs: Define ...
1025
  static ssize_t do_readv(unsigned long fd, const struct iovec __user *vec,
ddef7ed2b   Christoph Hellwig   annotate RWF_... ...
1026
  			unsigned long vlen, rwf_t flags)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1027
  {
9c225f265   Linus Torvalds   vfs: atomic f_pos...
1028
  	struct fd f = fdget_pos(fd);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1029
  	ssize_t ret = -EBADF;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1030

2903ff019   Al Viro   switch simple cas...
1031
  	if (f.file) {
438ab720c   Kirill Smelkov   vfs: pass ppos=NU...
1032
1033
1034
1035
1036
1037
1038
1039
  		loff_t pos, *ppos = file_ppos(f.file);
  		if (ppos) {
  			pos = *ppos;
  			ppos = &pos;
  		}
  		ret = vfs_readv(f.file, vec, vlen, ppos, flags);
  		if (ret >= 0 && ppos)
  			f.file->f_pos = pos;
9c225f265   Linus Torvalds   vfs: atomic f_pos...
1040
  		fdput_pos(f);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1041
1042
1043
  	}
  
  	if (ret > 0)
4b98d11b4   Alexey Dobriyan   [PATCH] ifdef ->r...
1044
1045
  		add_rchar(current, ret);
  	inc_syscr(current);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1046
1047
  	return ret;
  }
f17d8b354   Milosz Tanski   vfs: vfs: Define ...
1048
  static ssize_t do_writev(unsigned long fd, const struct iovec __user *vec,
ddef7ed2b   Christoph Hellwig   annotate RWF_... ...
1049
  			 unsigned long vlen, rwf_t flags)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1050
  {
9c225f265   Linus Torvalds   vfs: atomic f_pos...
1051
  	struct fd f = fdget_pos(fd);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1052
  	ssize_t ret = -EBADF;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1053

2903ff019   Al Viro   switch simple cas...
1054
  	if (f.file) {
438ab720c   Kirill Smelkov   vfs: pass ppos=NU...
1055
1056
1057
1058
1059
1060
1061
1062
  		loff_t pos, *ppos = file_ppos(f.file);
  		if (ppos) {
  			pos = *ppos;
  			ppos = &pos;
  		}
  		ret = vfs_writev(f.file, vec, vlen, ppos, flags);
  		if (ret >= 0 && ppos)
  			f.file->f_pos = pos;
9c225f265   Linus Torvalds   vfs: atomic f_pos...
1063
  		fdput_pos(f);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1064
1065
1066
  	}
  
  	if (ret > 0)
4b98d11b4   Alexey Dobriyan   [PATCH] ifdef ->r...
1067
1068
  		add_wchar(current, ret);
  	inc_syscw(current);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1069
1070
  	return ret;
  }
601cc11d0   Linus Torvalds   Make non-compat p...
1071
1072
1073
1074
1075
  static inline loff_t pos_from_hilo(unsigned long high, unsigned long low)
  {
  #define HALF_LONG_BITS (BITS_PER_LONG / 2)
  	return (((loff_t)high << HALF_LONG_BITS) << HALF_LONG_BITS) | low;
  }
f17d8b354   Milosz Tanski   vfs: vfs: Define ...
1076
  static ssize_t do_preadv(unsigned long fd, const struct iovec __user *vec,
ddef7ed2b   Christoph Hellwig   annotate RWF_... ...
1077
  			 unsigned long vlen, loff_t pos, rwf_t flags)
f3554f4bc   Gerd Hoffmann   preadv/pwritev: A...
1078
  {
2903ff019   Al Viro   switch simple cas...
1079
  	struct fd f;
f3554f4bc   Gerd Hoffmann   preadv/pwritev: A...
1080
  	ssize_t ret = -EBADF;
f3554f4bc   Gerd Hoffmann   preadv/pwritev: A...
1081
1082
1083
  
  	if (pos < 0)
  		return -EINVAL;
2903ff019   Al Viro   switch simple cas...
1084
1085
  	f = fdget(fd);
  	if (f.file) {
f3554f4bc   Gerd Hoffmann   preadv/pwritev: A...
1086
  		ret = -ESPIPE;
2903ff019   Al Viro   switch simple cas...
1087
  		if (f.file->f_mode & FMODE_PREAD)
f17d8b354   Milosz Tanski   vfs: vfs: Define ...
1088
  			ret = vfs_readv(f.file, vec, vlen, &pos, flags);
2903ff019   Al Viro   switch simple cas...
1089
  		fdput(f);
f3554f4bc   Gerd Hoffmann   preadv/pwritev: A...
1090
1091
1092
1093
1094
1095
1096
  	}
  
  	if (ret > 0)
  		add_rchar(current, ret);
  	inc_syscr(current);
  	return ret;
  }
f17d8b354   Milosz Tanski   vfs: vfs: Define ...
1097
  static ssize_t do_pwritev(unsigned long fd, const struct iovec __user *vec,
ddef7ed2b   Christoph Hellwig   annotate RWF_... ...
1098
  			  unsigned long vlen, loff_t pos, rwf_t flags)
f3554f4bc   Gerd Hoffmann   preadv/pwritev: A...
1099
  {
2903ff019   Al Viro   switch simple cas...
1100
  	struct fd f;
f3554f4bc   Gerd Hoffmann   preadv/pwritev: A...
1101
  	ssize_t ret = -EBADF;
f3554f4bc   Gerd Hoffmann   preadv/pwritev: A...
1102
1103
1104
  
  	if (pos < 0)
  		return -EINVAL;
2903ff019   Al Viro   switch simple cas...
1105
1106
  	f = fdget(fd);
  	if (f.file) {
f3554f4bc   Gerd Hoffmann   preadv/pwritev: A...
1107
  		ret = -ESPIPE;
2903ff019   Al Viro   switch simple cas...
1108
  		if (f.file->f_mode & FMODE_PWRITE)
f17d8b354   Milosz Tanski   vfs: vfs: Define ...
1109
  			ret = vfs_writev(f.file, vec, vlen, &pos, flags);
2903ff019   Al Viro   switch simple cas...
1110
  		fdput(f);
f3554f4bc   Gerd Hoffmann   preadv/pwritev: A...
1111
1112
1113
1114
1115
1116
1117
  	}
  
  	if (ret > 0)
  		add_wchar(current, ret);
  	inc_syscw(current);
  	return ret;
  }
f17d8b354   Milosz Tanski   vfs: vfs: Define ...
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
  SYSCALL_DEFINE3(readv, unsigned long, fd, const struct iovec __user *, vec,
  		unsigned long, vlen)
  {
  	return do_readv(fd, vec, vlen, 0);
  }
  
  SYSCALL_DEFINE3(writev, unsigned long, fd, const struct iovec __user *, vec,
  		unsigned long, vlen)
  {
  	return do_writev(fd, vec, vlen, 0);
  }
  
  SYSCALL_DEFINE5(preadv, unsigned long, fd, const struct iovec __user *, vec,
  		unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h)
  {
  	loff_t pos = pos_from_hilo(pos_h, pos_l);
  
  	return do_preadv(fd, vec, vlen, pos, 0);
  }
  
  SYSCALL_DEFINE6(preadv2, unsigned long, fd, const struct iovec __user *, vec,
  		unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h,
ddef7ed2b   Christoph Hellwig   annotate RWF_... ...
1140
  		rwf_t, flags)
f17d8b354   Milosz Tanski   vfs: vfs: Define ...
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
  {
  	loff_t pos = pos_from_hilo(pos_h, pos_l);
  
  	if (pos == -1)
  		return do_readv(fd, vec, vlen, flags);
  
  	return do_preadv(fd, vec, vlen, pos, flags);
  }
  
  SYSCALL_DEFINE5(pwritev, unsigned long, fd, const struct iovec __user *, vec,
  		unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h)
  {
  	loff_t pos = pos_from_hilo(pos_h, pos_l);
  
  	return do_pwritev(fd, vec, vlen, pos, 0);
  }
  
  SYSCALL_DEFINE6(pwritev2, unsigned long, fd, const struct iovec __user *, vec,
  		unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h,
ddef7ed2b   Christoph Hellwig   annotate RWF_... ...
1160
  		rwf_t, flags)
f17d8b354   Milosz Tanski   vfs: vfs: Define ...
1161
1162
1163
1164
1165
1166
1167
1168
  {
  	loff_t pos = pos_from_hilo(pos_h, pos_l);
  
  	if (pos == -1)
  		return do_writev(fd, vec, vlen, flags);
  
  	return do_pwritev(fd, vec, vlen, pos, flags);
  }
72ec35163   Al Viro   switch compat rea...
1169
  #ifdef CONFIG_COMPAT
72ec35163   Al Viro   switch compat rea...
1170
1171
  static size_t compat_readv(struct file *file,
  			   const struct compat_iovec __user *vec,
ddef7ed2b   Christoph Hellwig   annotate RWF_... ...
1172
  			   unsigned long vlen, loff_t *pos, rwf_t flags)
72ec35163   Al Viro   switch compat rea...
1173
  {
72ec35163   Al Viro   switch compat rea...
1174
1175
  	struct iovec iovstack[UIO_FASTIOV];
  	struct iovec *iov = iovstack;
ac15ac066   Al Viro   lift iov_iter int...
1176
  	struct iov_iter iter;
72ec35163   Al Viro   switch compat rea...
1177
  	ssize_t ret;
72ec35163   Al Viro   switch compat rea...
1178

26c87fb7d   Christoph Hellwig   fs: remove do_com...
1179
  	ret = compat_import_iovec(READ, vec, vlen, UIO_FASTIOV, &iov, &iter);
edab5fe38   Christoph Hellwig   fs: move more cod...
1180
1181
1182
1183
  	if (ret >= 0) {
  		ret = do_iter_read(file, &iter, pos, flags);
  		kfree(iov);
  	}
72ec35163   Al Viro   switch compat rea...
1184
1185
1186
1187
1188
  	if (ret > 0)
  		add_rchar(current, ret);
  	inc_syscr(current);
  	return ret;
  }
f17d8b354   Milosz Tanski   vfs: vfs: Define ...
1189
1190
  static size_t do_compat_readv(compat_ulong_t fd,
  				 const struct compat_iovec __user *vec,
ddef7ed2b   Christoph Hellwig   annotate RWF_... ...
1191
  				 compat_ulong_t vlen, rwf_t flags)
72ec35163   Al Viro   switch compat rea...
1192
  {
9c225f265   Linus Torvalds   vfs: atomic f_pos...
1193
  	struct fd f = fdget_pos(fd);
72ec35163   Al Viro   switch compat rea...
1194
1195
1196
1197
1198
1199
  	ssize_t ret;
  	loff_t pos;
  
  	if (!f.file)
  		return -EBADF;
  	pos = f.file->f_pos;
f17d8b354   Milosz Tanski   vfs: vfs: Define ...
1200
  	ret = compat_readv(f.file, vec, vlen, &pos, flags);
5faf153eb   Al Viro   don't call file_p...
1201
1202
  	if (ret >= 0)
  		f.file->f_pos = pos;
9c225f265   Linus Torvalds   vfs: atomic f_pos...
1203
  	fdput_pos(f);
72ec35163   Al Viro   switch compat rea...
1204
  	return ret;
f17d8b354   Milosz Tanski   vfs: vfs: Define ...
1205

72ec35163   Al Viro   switch compat rea...
1206
  }
f17d8b354   Milosz Tanski   vfs: vfs: Define ...
1207
1208
1209
1210
1211
1212
1213
1214
  COMPAT_SYSCALL_DEFINE3(readv, compat_ulong_t, fd,
  		const struct compat_iovec __user *,vec,
  		compat_ulong_t, vlen)
  {
  	return do_compat_readv(fd, vec, vlen, 0);
  }
  
  static long do_compat_preadv64(unsigned long fd,
378a10f3a   Heiko Carstens   fs/compat: option...
1215
  				  const struct compat_iovec __user *vec,
ddef7ed2b   Christoph Hellwig   annotate RWF_... ...
1216
  				  unsigned long vlen, loff_t pos, rwf_t flags)
72ec35163   Al Viro   switch compat rea...
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
  {
  	struct fd f;
  	ssize_t ret;
  
  	if (pos < 0)
  		return -EINVAL;
  	f = fdget(fd);
  	if (!f.file)
  		return -EBADF;
  	ret = -ESPIPE;
  	if (f.file->f_mode & FMODE_PREAD)
f17d8b354   Milosz Tanski   vfs: vfs: Define ...
1228
  		ret = compat_readv(f.file, vec, vlen, &pos, flags);
72ec35163   Al Viro   switch compat rea...
1229
1230
1231
  	fdput(f);
  	return ret;
  }
378a10f3a   Heiko Carstens   fs/compat: option...
1232
1233
1234
1235
1236
  #ifdef __ARCH_WANT_COMPAT_SYS_PREADV64
  COMPAT_SYSCALL_DEFINE4(preadv64, unsigned long, fd,
  		const struct compat_iovec __user *,vec,
  		unsigned long, vlen, loff_t, pos)
  {
f17d8b354   Milosz Tanski   vfs: vfs: Define ...
1237
  	return do_compat_preadv64(fd, vec, vlen, pos, 0);
378a10f3a   Heiko Carstens   fs/compat: option...
1238
1239
  }
  #endif
dfd948e32   Heiko Carstens   fs/compat: fix pa...
1240
  COMPAT_SYSCALL_DEFINE5(preadv, compat_ulong_t, fd,
72ec35163   Al Viro   switch compat rea...
1241
  		const struct compat_iovec __user *,vec,
dfd948e32   Heiko Carstens   fs/compat: fix pa...
1242
  		compat_ulong_t, vlen, u32, pos_low, u32, pos_high)
72ec35163   Al Viro   switch compat rea...
1243
1244
  {
  	loff_t pos = ((loff_t)pos_high << 32) | pos_low;
378a10f3a   Heiko Carstens   fs/compat: option...
1245

f17d8b354   Milosz Tanski   vfs: vfs: Define ...
1246
1247
  	return do_compat_preadv64(fd, vec, vlen, pos, 0);
  }
3ebfd81f7   H.J. Lu   x86/syscalls: Add...
1248
1249
1250
  #ifdef __ARCH_WANT_COMPAT_SYS_PREADV64V2
  COMPAT_SYSCALL_DEFINE5(preadv64v2, unsigned long, fd,
  		const struct compat_iovec __user *,vec,
ddef7ed2b   Christoph Hellwig   annotate RWF_... ...
1251
  		unsigned long, vlen, loff_t, pos, rwf_t, flags)
3ebfd81f7   H.J. Lu   x86/syscalls: Add...
1252
  {
cc4b1242d   Aurelien Jarno   vfs: fix preadv64...
1253
1254
  	if (pos == -1)
  		return do_compat_readv(fd, vec, vlen, flags);
3ebfd81f7   H.J. Lu   x86/syscalls: Add...
1255
1256
1257
  	return do_compat_preadv64(fd, vec, vlen, pos, flags);
  }
  #endif
f17d8b354   Milosz Tanski   vfs: vfs: Define ...
1258
1259
1260
  COMPAT_SYSCALL_DEFINE6(preadv2, compat_ulong_t, fd,
  		const struct compat_iovec __user *,vec,
  		compat_ulong_t, vlen, u32, pos_low, u32, pos_high,
ddef7ed2b   Christoph Hellwig   annotate RWF_... ...
1261
  		rwf_t, flags)
f17d8b354   Milosz Tanski   vfs: vfs: Define ...
1262
1263
1264
1265
1266
1267
1268
  {
  	loff_t pos = ((loff_t)pos_high << 32) | pos_low;
  
  	if (pos == -1)
  		return do_compat_readv(fd, vec, vlen, flags);
  
  	return do_compat_preadv64(fd, vec, vlen, pos, flags);
72ec35163   Al Viro   switch compat rea...
1269
1270
1271
1272
  }
  
  static size_t compat_writev(struct file *file,
  			    const struct compat_iovec __user *vec,
ddef7ed2b   Christoph Hellwig   annotate RWF_... ...
1273
  			    unsigned long vlen, loff_t *pos, rwf_t flags)
72ec35163   Al Viro   switch compat rea...
1274
  {
26c87fb7d   Christoph Hellwig   fs: remove do_com...
1275
1276
1277
  	struct iovec iovstack[UIO_FASTIOV];
  	struct iovec *iov = iovstack;
  	struct iov_iter iter;
edab5fe38   Christoph Hellwig   fs: move more cod...
1278
  	ssize_t ret;
72ec35163   Al Viro   switch compat rea...
1279

26c87fb7d   Christoph Hellwig   fs: remove do_com...
1280
  	ret = compat_import_iovec(WRITE, vec, vlen, UIO_FASTIOV, &iov, &iter);
edab5fe38   Christoph Hellwig   fs: move more cod...
1281
  	if (ret >= 0) {
62473a2d6   Al Viro   move file_{start,...
1282
  		file_start_write(file);
edab5fe38   Christoph Hellwig   fs: move more cod...
1283
  		ret = do_iter_write(file, &iter, pos, flags);
62473a2d6   Al Viro   move file_{start,...
1284
  		file_end_write(file);
edab5fe38   Christoph Hellwig   fs: move more cod...
1285
1286
  		kfree(iov);
  	}
72ec35163   Al Viro   switch compat rea...
1287
1288
1289
1290
1291
  	if (ret > 0)
  		add_wchar(current, ret);
  	inc_syscw(current);
  	return ret;
  }
f17d8b354   Milosz Tanski   vfs: vfs: Define ...
1292
1293
  static size_t do_compat_writev(compat_ulong_t fd,
  				  const struct compat_iovec __user* vec,
ddef7ed2b   Christoph Hellwig   annotate RWF_... ...
1294
  				  compat_ulong_t vlen, rwf_t flags)
72ec35163   Al Viro   switch compat rea...
1295
  {
9c225f265   Linus Torvalds   vfs: atomic f_pos...
1296
  	struct fd f = fdget_pos(fd);
72ec35163   Al Viro   switch compat rea...
1297
1298
1299
1300
1301
1302
  	ssize_t ret;
  	loff_t pos;
  
  	if (!f.file)
  		return -EBADF;
  	pos = f.file->f_pos;
f17d8b354   Milosz Tanski   vfs: vfs: Define ...
1303
  	ret = compat_writev(f.file, vec, vlen, &pos, flags);
5faf153eb   Al Viro   don't call file_p...
1304
1305
  	if (ret >= 0)
  		f.file->f_pos = pos;
9c225f265   Linus Torvalds   vfs: atomic f_pos...
1306
  	fdput_pos(f);
72ec35163   Al Viro   switch compat rea...
1307
1308
  	return ret;
  }
f17d8b354   Milosz Tanski   vfs: vfs: Define ...
1309
1310
1311
1312
1313
1314
1315
1316
  COMPAT_SYSCALL_DEFINE3(writev, compat_ulong_t, fd,
  		const struct compat_iovec __user *, vec,
  		compat_ulong_t, vlen)
  {
  	return do_compat_writev(fd, vec, vlen, 0);
  }
  
  static long do_compat_pwritev64(unsigned long fd,
378a10f3a   Heiko Carstens   fs/compat: option...
1317
  				   const struct compat_iovec __user *vec,
ddef7ed2b   Christoph Hellwig   annotate RWF_... ...
1318
  				   unsigned long vlen, loff_t pos, rwf_t flags)
72ec35163   Al Viro   switch compat rea...
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
  {
  	struct fd f;
  	ssize_t ret;
  
  	if (pos < 0)
  		return -EINVAL;
  	f = fdget(fd);
  	if (!f.file)
  		return -EBADF;
  	ret = -ESPIPE;
  	if (f.file->f_mode & FMODE_PWRITE)
f17d8b354   Milosz Tanski   vfs: vfs: Define ...
1330
  		ret = compat_writev(f.file, vec, vlen, &pos, flags);
72ec35163   Al Viro   switch compat rea...
1331
1332
1333
  	fdput(f);
  	return ret;
  }
378a10f3a   Heiko Carstens   fs/compat: option...
1334
1335
1336
1337
1338
  #ifdef __ARCH_WANT_COMPAT_SYS_PWRITEV64
  COMPAT_SYSCALL_DEFINE4(pwritev64, unsigned long, fd,
  		const struct compat_iovec __user *,vec,
  		unsigned long, vlen, loff_t, pos)
  {
f17d8b354   Milosz Tanski   vfs: vfs: Define ...
1339
  	return do_compat_pwritev64(fd, vec, vlen, pos, 0);
378a10f3a   Heiko Carstens   fs/compat: option...
1340
1341
  }
  #endif
dfd948e32   Heiko Carstens   fs/compat: fix pa...
1342
  COMPAT_SYSCALL_DEFINE5(pwritev, compat_ulong_t, fd,
72ec35163   Al Viro   switch compat rea...
1343
  		const struct compat_iovec __user *,vec,
dfd948e32   Heiko Carstens   fs/compat: fix pa...
1344
  		compat_ulong_t, vlen, u32, pos_low, u32, pos_high)
72ec35163   Al Viro   switch compat rea...
1345
1346
  {
  	loff_t pos = ((loff_t)pos_high << 32) | pos_low;
378a10f3a   Heiko Carstens   fs/compat: option...
1347

f17d8b354   Milosz Tanski   vfs: vfs: Define ...
1348
  	return do_compat_pwritev64(fd, vec, vlen, pos, 0);
72ec35163   Al Viro   switch compat rea...
1349
  }
f17d8b354   Milosz Tanski   vfs: vfs: Define ...
1350

3ebfd81f7   H.J. Lu   x86/syscalls: Add...
1351
1352
1353
  #ifdef __ARCH_WANT_COMPAT_SYS_PWRITEV64V2
  COMPAT_SYSCALL_DEFINE5(pwritev64v2, unsigned long, fd,
  		const struct compat_iovec __user *,vec,
ddef7ed2b   Christoph Hellwig   annotate RWF_... ...
1354
  		unsigned long, vlen, loff_t, pos, rwf_t, flags)
3ebfd81f7   H.J. Lu   x86/syscalls: Add...
1355
  {
cc4b1242d   Aurelien Jarno   vfs: fix preadv64...
1356
1357
  	if (pos == -1)
  		return do_compat_writev(fd, vec, vlen, flags);
3ebfd81f7   H.J. Lu   x86/syscalls: Add...
1358
1359
1360
  	return do_compat_pwritev64(fd, vec, vlen, pos, flags);
  }
  #endif
f17d8b354   Milosz Tanski   vfs: vfs: Define ...
1361
1362
  COMPAT_SYSCALL_DEFINE6(pwritev2, compat_ulong_t, fd,
  		const struct compat_iovec __user *,vec,
ddef7ed2b   Christoph Hellwig   annotate RWF_... ...
1363
  		compat_ulong_t, vlen, u32, pos_low, u32, pos_high, rwf_t, flags)
f17d8b354   Milosz Tanski   vfs: vfs: Define ...
1364
1365
1366
1367
1368
1369
1370
  {
  	loff_t pos = ((loff_t)pos_high << 32) | pos_low;
  
  	if (pos == -1)
  		return do_compat_writev(fd, vec, vlen, flags);
  
  	return do_compat_pwritev64(fd, vec, vlen, pos, flags);
72ec35163   Al Viro   switch compat rea...
1371
  }
f17d8b354   Milosz Tanski   vfs: vfs: Define ...
1372

72ec35163   Al Viro   switch compat rea...
1373
  #endif
19f4fc3ae   Al Viro   convert sendfile{...
1374
1375
  static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos,
  		  	   size_t count, loff_t max)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1376
  {
2903ff019   Al Viro   switch simple cas...
1377
1378
  	struct fd in, out;
  	struct inode *in_inode, *out_inode;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1379
  	loff_t pos;
7995bd287   Al Viro   splice: don't pas...
1380
  	loff_t out_pos;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1381
  	ssize_t retval;
2903ff019   Al Viro   switch simple cas...
1382
  	int fl;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1383
1384
1385
1386
1387
  
  	/*
  	 * Get input file, and verify that it is ok..
  	 */
  	retval = -EBADF;
2903ff019   Al Viro   switch simple cas...
1388
1389
  	in = fdget(in_fd);
  	if (!in.file)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1390
  		goto out;
2903ff019   Al Viro   switch simple cas...
1391
  	if (!(in.file->f_mode & FMODE_READ))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1392
  		goto fput_in;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1393
  	retval = -ESPIPE;
7995bd287   Al Viro   splice: don't pas...
1394
1395
1396
1397
  	if (!ppos) {
  		pos = in.file->f_pos;
  	} else {
  		pos = *ppos;
2903ff019   Al Viro   switch simple cas...
1398
  		if (!(in.file->f_mode & FMODE_PREAD))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1399
  			goto fput_in;
7995bd287   Al Viro   splice: don't pas...
1400
1401
  	}
  	retval = rw_verify_area(READ, in.file, &pos, count);
e28cc7157   Linus Torvalds   Relax the rw_veri...
1402
  	if (retval < 0)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1403
  		goto fput_in;
bc61384dc   Al Viro   rw_verify_area():...
1404
1405
  	if (count > MAX_RW_COUNT)
  		count =  MAX_RW_COUNT;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1406

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1407
1408
1409
1410
  	/*
  	 * Get output file, and verify that it is ok..
  	 */
  	retval = -EBADF;
2903ff019   Al Viro   switch simple cas...
1411
1412
  	out = fdget(out_fd);
  	if (!out.file)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1413
  		goto fput_in;
2903ff019   Al Viro   switch simple cas...
1414
  	if (!(out.file->f_mode & FMODE_WRITE))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1415
  		goto fput_out;
496ad9aa8   Al Viro   new helper: file_...
1416
1417
  	in_inode = file_inode(in.file);
  	out_inode = file_inode(out.file);
7995bd287   Al Viro   splice: don't pas...
1418
1419
  	out_pos = out.file->f_pos;
  	retval = rw_verify_area(WRITE, out.file, &out_pos, count);
e28cc7157   Linus Torvalds   Relax the rw_veri...
1420
  	if (retval < 0)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1421
  		goto fput_out;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1422
1423
  	if (!max)
  		max = min(in_inode->i_sb->s_maxbytes, out_inode->i_sb->s_maxbytes);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1424
1425
1426
1427
1428
1429
  	if (unlikely(pos + count > max)) {
  		retval = -EOVERFLOW;
  		if (pos >= max)
  			goto fput_out;
  		count = max - pos;
  	}
d96e6e716   Jens Axboe   Remove remnants o...
1430
  	fl = 0;
534f2aaa6   Jens Axboe   sys_sendfile: swi...
1431
  #if 0
d96e6e716   Jens Axboe   Remove remnants o...
1432
1433
1434
1435
1436
1437
  	/*
  	 * We need to debate whether we can enable this or not. The
  	 * man page documents EAGAIN return for the output at least,
  	 * and the application is arguably buggy if it doesn't expect
  	 * EAGAIN on a non-blocking file descriptor.
  	 */
2903ff019   Al Viro   switch simple cas...
1438
  	if (in.file->f_flags & O_NONBLOCK)
d96e6e716   Jens Axboe   Remove remnants o...
1439
  		fl = SPLICE_F_NONBLOCK;
534f2aaa6   Jens Axboe   sys_sendfile: swi...
1440
  #endif
50cd2c577   Al Viro   lift file_*_write...
1441
  	file_start_write(out.file);
7995bd287   Al Viro   splice: don't pas...
1442
  	retval = do_splice_direct(in.file, &pos, out.file, &out_pos, count, fl);
50cd2c577   Al Viro   lift file_*_write...
1443
  	file_end_write(out.file);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1444
1445
  
  	if (retval > 0) {
4b98d11b4   Alexey Dobriyan   [PATCH] ifdef ->r...
1446
1447
  		add_rchar(current, retval);
  		add_wchar(current, retval);
a68c2f12b   Scott Wolchok   sendfile: allows ...
1448
1449
  		fsnotify_access(in.file);
  		fsnotify_modify(out.file);
7995bd287   Al Viro   splice: don't pas...
1450
1451
1452
1453
1454
  		out.file->f_pos = out_pos;
  		if (ppos)
  			*ppos = pos;
  		else
  			in.file->f_pos = pos;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1455
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1456

4b98d11b4   Alexey Dobriyan   [PATCH] ifdef ->r...
1457
1458
  	inc_syscr(current);
  	inc_syscw(current);
7995bd287   Al Viro   splice: don't pas...
1459
  	if (pos > max)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1460
1461
1462
  		retval = -EOVERFLOW;
  
  fput_out:
2903ff019   Al Viro   switch simple cas...
1463
  	fdput(out);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1464
  fput_in:
2903ff019   Al Viro   switch simple cas...
1465
  	fdput(in);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1466
1467
1468
  out:
  	return retval;
  }
002c8976e   Heiko Carstens   [CVE-2009-0029] S...
1469
  SYSCALL_DEFINE4(sendfile, int, out_fd, int, in_fd, off_t __user *, offset, size_t, count)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
  {
  	loff_t pos;
  	off_t off;
  	ssize_t ret;
  
  	if (offset) {
  		if (unlikely(get_user(off, offset)))
  			return -EFAULT;
  		pos = off;
  		ret = do_sendfile(out_fd, in_fd, &pos, count, MAX_NON_LFS);
  		if (unlikely(put_user(pos, offset)))
  			return -EFAULT;
  		return ret;
  	}
  
  	return do_sendfile(out_fd, in_fd, NULL, count, 0);
  }
002c8976e   Heiko Carstens   [CVE-2009-0029] S...
1487
  SYSCALL_DEFINE4(sendfile64, int, out_fd, int, in_fd, loff_t __user *, offset, size_t, count)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
  {
  	loff_t pos;
  	ssize_t ret;
  
  	if (offset) {
  		if (unlikely(copy_from_user(&pos, offset, sizeof(loff_t))))
  			return -EFAULT;
  		ret = do_sendfile(out_fd, in_fd, &pos, count, 0);
  		if (unlikely(put_user(pos, offset)))
  			return -EFAULT;
  		return ret;
  	}
  
  	return do_sendfile(out_fd, in_fd, NULL, count, 0);
  }
19f4fc3ae   Al Viro   convert sendfile{...
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
  
  #ifdef CONFIG_COMPAT
  COMPAT_SYSCALL_DEFINE4(sendfile, int, out_fd, int, in_fd,
  		compat_off_t __user *, offset, compat_size_t, count)
  {
  	loff_t pos;
  	off_t off;
  	ssize_t ret;
  
  	if (offset) {
  		if (unlikely(get_user(off, offset)))
  			return -EFAULT;
  		pos = off;
  		ret = do_sendfile(out_fd, in_fd, &pos, count, MAX_NON_LFS);
  		if (unlikely(put_user(pos, offset)))
  			return -EFAULT;
  		return ret;
  	}
  
  	return do_sendfile(out_fd, in_fd, NULL, count, 0);
  }
  
  COMPAT_SYSCALL_DEFINE4(sendfile64, int, out_fd, int, in_fd,
  		compat_loff_t __user *, offset, compat_size_t, count)
  {
  	loff_t pos;
  	ssize_t ret;
  
  	if (offset) {
  		if (unlikely(copy_from_user(&pos, offset, sizeof(loff_t))))
  			return -EFAULT;
  		ret = do_sendfile(out_fd, in_fd, &pos, count, 0);
  		if (unlikely(put_user(pos, offset)))
  			return -EFAULT;
  		return ret;
  	}
  
  	return do_sendfile(out_fd, in_fd, NULL, count, 0);
  }
  #endif
29732938a   Zach Brown   vfs: add copy_fil...
1543

f16acc9d9   Dave Chinner   vfs: introduce ge...
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
  /**
   * generic_copy_file_range - copy data between two files
   * @file_in:	file structure to read from
   * @pos_in:	file offset to read from
   * @file_out:	file structure to write data to
   * @pos_out:	file offset to write data to
   * @len:	amount of data to copy
   * @flags:	copy flags
   *
   * This is a generic filesystem helper to copy data from one file to another.
   * It has no constraints on the source or destination file owners - the files
   * can belong to different superblocks and different filesystem types. Short
   * copies are allowed.
   *
   * This should be called from the @file_out filesystem, as per the
   * ->copy_file_range() method.
   *
   * Returns the number of bytes copied or a negative error indicating the
   * failure.
   */
  
  ssize_t generic_copy_file_range(struct file *file_in, loff_t pos_in,
  				struct file *file_out, loff_t pos_out,
  				size_t len, unsigned int flags)
  {
  	return do_splice_direct(file_in, &pos_in, file_out, &pos_out,
  				len > MAX_RW_COUNT ? MAX_RW_COUNT : len, 0);
  }
  EXPORT_SYMBOL(generic_copy_file_range);
64bf5ff58   Dave Chinner   vfs: no fallback ...
1573
1574
1575
1576
  static ssize_t do_copy_file_range(struct file *file_in, loff_t pos_in,
  				  struct file *file_out, loff_t pos_out,
  				  size_t len, unsigned int flags)
  {
5dae222a5   Amir Goldstein   vfs: allow copy_f...
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
  	/*
  	 * Although we now allow filesystems to handle cross sb copy, passing
  	 * a file of the wrong filesystem type to filesystem driver can result
  	 * in an attempt to dereference the wrong type of ->private_data, so
  	 * avoid doing that until we really have a good reason.  NFS defines
  	 * several different file_system_type structures, but they all end up
  	 * using the same ->copy_file_range() function pointer.
  	 */
  	if (file_out->f_op->copy_file_range &&
  	    file_out->f_op->copy_file_range == file_in->f_op->copy_file_range)
64bf5ff58   Dave Chinner   vfs: no fallback ...
1587
1588
1589
1590
1591
1592
1593
  		return file_out->f_op->copy_file_range(file_in, pos_in,
  						       file_out, pos_out,
  						       len, flags);
  
  	return generic_copy_file_range(file_in, pos_in, file_out, pos_out, len,
  				       flags);
  }
29732938a   Zach Brown   vfs: add copy_fil...
1594
1595
1596
1597
1598
1599
1600
1601
1602
  /*
   * copy_file_range() differs from regular file read and write in that it
   * specifically allows return partial success.  When it does so is up to
   * the copy_file_range method.
   */
  ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in,
  			    struct file *file_out, loff_t pos_out,
  			    size_t len, unsigned int flags)
  {
29732938a   Zach Brown   vfs: add copy_fil...
1603
1604
1605
1606
  	ssize_t ret;
  
  	if (flags != 0)
  		return -EINVAL;
96e6e8f4a   Amir Goldstein   vfs: add missing ...
1607
1608
  	ret = generic_copy_file_checks(file_in, pos_in, file_out, pos_out, &len,
  				       flags);
a31713517   Amir Goldstein   vfs: introduce ge...
1609
1610
  	if (unlikely(ret))
  		return ret;
11cbfb107   Amir Goldstein   vfs: deny copy_fi...
1611

29732938a   Zach Brown   vfs: add copy_fil...
1612
  	ret = rw_verify_area(READ, file_in, &pos_in, len);
bc61384dc   Al Viro   rw_verify_area():...
1613
1614
1615
1616
1617
  	if (unlikely(ret))
  		return ret;
  
  	ret = rw_verify_area(WRITE, file_out, &pos_out, len);
  	if (unlikely(ret))
29732938a   Zach Brown   vfs: add copy_fil...
1618
  		return ret;
29732938a   Zach Brown   vfs: add copy_fil...
1619
1620
  	if (len == 0)
  		return 0;
bfe219d37   Amir Goldstein   vfs: wrap write f...
1621
  	file_start_write(file_out);
29732938a   Zach Brown   vfs: add copy_fil...
1622

a76b5b043   Christoph Hellwig   fs: try to clone ...
1623
1624
1625
1626
  	/*
  	 * Try cloning first, this is supported by more file systems, and
  	 * more efficient if both clone and copy are supported (e.g. NFS).
  	 */
5dae222a5   Amir Goldstein   vfs: allow copy_f...
1627
1628
  	if (file_in->f_op->remap_file_range &&
  	    file_inode(file_in)->i_sb == file_inode(file_out)->i_sb) {
42ec3d4c0   Darrick J. Wong   vfs: make remap_f...
1629
1630
1631
1632
  		loff_t cloned;
  
  		cloned = file_in->f_op->remap_file_range(file_in, pos_in,
  				file_out, pos_out,
eca3654e3   Darrick J. Wong   vfs: enable remap...
1633
1634
  				min_t(loff_t, MAX_RW_COUNT, len),
  				REMAP_FILE_CAN_SHORTEN);
42ec3d4c0   Darrick J. Wong   vfs: make remap_f...
1635
1636
  		if (cloned > 0) {
  			ret = cloned;
a76b5b043   Christoph Hellwig   fs: try to clone ...
1637
1638
1639
  			goto done;
  		}
  	}
64bf5ff58   Dave Chinner   vfs: no fallback ...
1640
1641
1642
  	ret = do_copy_file_range(file_in, pos_in, file_out, pos_out, len,
  				flags);
  	WARN_ON_ONCE(ret == -EOPNOTSUPP);
a76b5b043   Christoph Hellwig   fs: try to clone ...
1643
  done:
29732938a   Zach Brown   vfs: add copy_fil...
1644
1645
1646
1647
1648
1649
  	if (ret > 0) {
  		fsnotify_access(file_in);
  		add_rchar(current, ret);
  		fsnotify_modify(file_out);
  		add_wchar(current, ret);
  	}
a76b5b043   Christoph Hellwig   fs: try to clone ...
1650

29732938a   Zach Brown   vfs: add copy_fil...
1651
1652
  	inc_syscr(current);
  	inc_syscw(current);
bfe219d37   Amir Goldstein   vfs: wrap write f...
1653
  	file_end_write(file_out);
29732938a   Zach Brown   vfs: add copy_fil...
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
  
  	return ret;
  }
  EXPORT_SYMBOL(vfs_copy_file_range);
  
  SYSCALL_DEFINE6(copy_file_range, int, fd_in, loff_t __user *, off_in,
  		int, fd_out, loff_t __user *, off_out,
  		size_t, len, unsigned int, flags)
  {
  	loff_t pos_in;
  	loff_t pos_out;
  	struct fd f_in;
  	struct fd f_out;
  	ssize_t ret = -EBADF;
  
  	f_in = fdget(fd_in);
  	if (!f_in.file)
  		goto out2;
  
  	f_out = fdget(fd_out);
  	if (!f_out.file)
  		goto out1;
  
  	ret = -EFAULT;
  	if (off_in) {
  		if (copy_from_user(&pos_in, off_in, sizeof(loff_t)))
  			goto out;
  	} else {
  		pos_in = f_in.file->f_pos;
  	}
  
  	if (off_out) {
  		if (copy_from_user(&pos_out, off_out, sizeof(loff_t)))
  			goto out;
  	} else {
  		pos_out = f_out.file->f_pos;
  	}
  
  	ret = vfs_copy_file_range(f_in.file, pos_in, f_out.file, pos_out, len,
  				  flags);
  	if (ret > 0) {
  		pos_in += ret;
  		pos_out += ret;
  
  		if (off_in) {
  			if (copy_to_user(off_in, &pos_in, sizeof(loff_t)))
  				ret = -EFAULT;
  		} else {
  			f_in.file->f_pos = pos_in;
  		}
  
  		if (off_out) {
  			if (copy_to_user(off_out, &pos_out, sizeof(loff_t)))
  				ret = -EFAULT;
  		} else {
  			f_out.file->f_pos = pos_out;
  		}
  	}
  
  out:
  	fdput(f_out);
  out1:
  	fdput(f_in);
  out2:
  	return ret;
  }
04b38d601   Christoph Hellwig   vfs: pull btrfs c...
1720

42ec3d4c0   Darrick J. Wong   vfs: make remap_f...
1721
1722
  static int remap_verify_area(struct file *file, loff_t pos, loff_t len,
  			     bool write)
04b38d601   Christoph Hellwig   vfs: pull btrfs c...
1723
1724
  {
  	struct inode *inode = file_inode(file);
42ec3d4c0   Darrick J. Wong   vfs: make remap_f...
1725
  	if (unlikely(pos < 0 || len < 0))
04b38d601   Christoph Hellwig   vfs: pull btrfs c...
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
  		return -EINVAL;
  
  	 if (unlikely((loff_t) (pos + len) < 0))
  		return -EINVAL;
  
  	if (unlikely(inode->i_flctx && mandatory_lock(inode))) {
  		loff_t end = len ? pos + len - 1 : OFFSET_MAX;
  		int retval;
  
  		retval = locks_mandatory_area(inode, file, pos, end,
  				write ? F_WRLCK : F_RDLCK);
  		if (retval < 0)
  			return retval;
  	}
  
  	return security_file_permission(file, write ? MAY_WRITE : MAY_READ);
  }
07d19dc9f   Darrick J. Wong   vfs: avoid proble...
1743
1744
1745
1746
1747
  /*
   * Ensure that we don't remap a partial EOF block in the middle of something
   * else.  Assume that the offsets have already been checked for block
   * alignment.
   *
a5e6ea18e   Filipe Manana   fs: allow dedupli...
1748
1749
1750
   * For clone we only link a partial EOF block above or at the destination file's
   * EOF.  For deduplication we accept a partial EOF block only if it ends at the
   * destination file's EOF (can not link it into the middle of a file).
eca3654e3   Darrick J. Wong   vfs: enable remap...
1751
1752
   *
   * Shorten the request if possible.
07d19dc9f   Darrick J. Wong   vfs: avoid proble...
1753
1754
1755
1756
   */
  static int generic_remap_check_len(struct inode *inode_in,
  				   struct inode *inode_out,
  				   loff_t pos_out,
42ec3d4c0   Darrick J. Wong   vfs: make remap_f...
1757
  				   loff_t *len,
a91ae49bb   Darrick J. Wong   vfs: pass remap f...
1758
  				   unsigned int remap_flags)
07d19dc9f   Darrick J. Wong   vfs: avoid proble...
1759
1760
  {
  	u64 blkmask = i_blocksize(inode_in) - 1;
eca3654e3   Darrick J. Wong   vfs: enable remap...
1761
  	loff_t new_len = *len;
07d19dc9f   Darrick J. Wong   vfs: avoid proble...
1762
1763
1764
  
  	if ((*len & blkmask) == 0)
  		return 0;
a5e6ea18e   Filipe Manana   fs: allow dedupli...
1765
  	if (pos_out + *len < i_size_read(inode_out))
eca3654e3   Darrick J. Wong   vfs: enable remap...
1766
  		new_len &= ~blkmask;
07d19dc9f   Darrick J. Wong   vfs: avoid proble...
1767

eca3654e3   Darrick J. Wong   vfs: enable remap...
1768
1769
1770
1771
1772
1773
1774
1775
1776
  	if (new_len == *len)
  		return 0;
  
  	if (remap_flags & REMAP_FILE_CAN_SHORTEN) {
  		*len = new_len;
  		return 0;
  	}
  
  	return (remap_flags & REMAP_FILE_DEDUP) ? -EBADE : -EINVAL;
07d19dc9f   Darrick J. Wong   vfs: avoid proble...
1777
  }
04b38d601   Christoph Hellwig   vfs: pull btrfs c...
1778

edc58dd01   Darrick J. Wong   vfs: fix page loc...
1779
  /* Read a page's worth of file data into the page cache. */
c32e5f399   Darrick J. Wong   vfs: hide file ra...
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
  static struct page *vfs_dedupe_get_page(struct inode *inode, loff_t offset)
  {
  	struct page *page;
  
  	page = read_mapping_page(inode->i_mapping, offset >> PAGE_SHIFT, NULL);
  	if (IS_ERR(page))
  		return page;
  	if (!PageUptodate(page)) {
  		put_page(page);
  		return ERR_PTR(-EIO);
  	}
c32e5f399   Darrick J. Wong   vfs: hide file ra...
1791
1792
1793
1794
  	return page;
  }
  
  /*
edc58dd01   Darrick J. Wong   vfs: fix page loc...
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
   * Lock two pages, ensuring that we lock in offset order if the pages are from
   * the same file.
   */
  static void vfs_lock_two_pages(struct page *page1, struct page *page2)
  {
  	/* Always lock in order of increasing index. */
  	if (page1->index > page2->index)
  		swap(page1, page2);
  
  	lock_page(page1);
  	if (page1 != page2)
  		lock_page(page2);
  }
  
  /* Unlock two pages, being careful not to unlock the same page twice. */
  static void vfs_unlock_two_pages(struct page *page1, struct page *page2)
  {
  	unlock_page(page1);
  	if (page1 != page2)
  		unlock_page(page2);
  }
  
  /*
c32e5f399   Darrick J. Wong   vfs: hide file ra...
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
   * Compare extents of two files to see if they are the same.
   * Caller must have locked both inodes to prevent write races.
   */
  static int vfs_dedupe_file_range_compare(struct inode *src, loff_t srcoff,
  					 struct inode *dest, loff_t destoff,
  					 loff_t len, bool *is_same)
  {
  	loff_t src_poff;
  	loff_t dest_poff;
  	void *src_addr;
  	void *dest_addr;
  	struct page *src_page;
  	struct page *dest_page;
  	loff_t cmp_len;
  	bool same;
  	int error;
  
  	error = -EINVAL;
  	same = true;
  	while (len) {
  		src_poff = srcoff & (PAGE_SIZE - 1);
  		dest_poff = destoff & (PAGE_SIZE - 1);
  		cmp_len = min(PAGE_SIZE - src_poff,
  			      PAGE_SIZE - dest_poff);
  		cmp_len = min(cmp_len, len);
  		if (cmp_len <= 0)
  			goto out_error;
  
  		src_page = vfs_dedupe_get_page(src, srcoff);
  		if (IS_ERR(src_page)) {
  			error = PTR_ERR(src_page);
  			goto out_error;
  		}
  		dest_page = vfs_dedupe_get_page(dest, destoff);
  		if (IS_ERR(dest_page)) {
  			error = PTR_ERR(dest_page);
c32e5f399   Darrick J. Wong   vfs: hide file ra...
1854
1855
1856
  			put_page(src_page);
  			goto out_error;
  		}
edc58dd01   Darrick J. Wong   vfs: fix page loc...
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
  
  		vfs_lock_two_pages(src_page, dest_page);
  
  		/*
  		 * Now that we've locked both pages, make sure they're still
  		 * mapped to the file data we're interested in.  If not,
  		 * someone is invalidating pages on us and we lose.
  		 */
  		if (!PageUptodate(src_page) || !PageUptodate(dest_page) ||
  		    src_page->mapping != src->i_mapping ||
  		    dest_page->mapping != dest->i_mapping) {
  			same = false;
  			goto unlock;
  		}
c32e5f399   Darrick J. Wong   vfs: hide file ra...
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
  		src_addr = kmap_atomic(src_page);
  		dest_addr = kmap_atomic(dest_page);
  
  		flush_dcache_page(src_page);
  		flush_dcache_page(dest_page);
  
  		if (memcmp(src_addr + src_poff, dest_addr + dest_poff, cmp_len))
  			same = false;
  
  		kunmap_atomic(dest_addr);
  		kunmap_atomic(src_addr);
edc58dd01   Darrick J. Wong   vfs: fix page loc...
1882
1883
  unlock:
  		vfs_unlock_two_pages(src_page, dest_page);
c32e5f399   Darrick J. Wong   vfs: hide file ra...
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
  		put_page(dest_page);
  		put_page(src_page);
  
  		if (!same)
  			break;
  
  		srcoff += cmp_len;
  		destoff += cmp_len;
  		len -= cmp_len;
  	}
  
  	*is_same = same;
  	return 0;
  
  out_error:
  	return error;
  }
04b38d601   Christoph Hellwig   vfs: pull btrfs c...
1901

876bec6f9   Darrick J. Wong   vfs: refactor clo...
1902
1903
1904
1905
  /*
   * Check that the two inodes are eligible for cloning, the ranges make
   * sense, and then flush all dirty data.  Caller must ensure that the
   * inodes have been locked against any other modifications.
22725ce4e   Darrick J. Wong   vfs: fix isize/po...
1906
   *
8c5c836bd   Darrick J. Wong   vfs: clean up gen...
1907
1908
   * If there's an error, then the usual negative error code is returned.
   * Otherwise returns 0 with *len set to the request length.
876bec6f9   Darrick J. Wong   vfs: refactor clo...
1909
   */
a83ab01a6   Darrick J. Wong   vfs: rename vfs_c...
1910
1911
  int generic_remap_file_range_prep(struct file *file_in, loff_t pos_in,
  				  struct file *file_out, loff_t pos_out,
42ec3d4c0   Darrick J. Wong   vfs: make remap_f...
1912
  				  loff_t *len, unsigned int remap_flags)
876bec6f9   Darrick J. Wong   vfs: refactor clo...
1913
  {
1383a7ed6   Darrick J. Wong   vfs: check file r...
1914
1915
  	struct inode *inode_in = file_inode(file_in);
  	struct inode *inode_out = file_inode(file_out);
876bec6f9   Darrick J. Wong   vfs: refactor clo...
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
  	bool same_inode = (inode_in == inode_out);
  	int ret;
  
  	/* Don't touch certain kinds of inodes */
  	if (IS_IMMUTABLE(inode_out))
  		return -EPERM;
  
  	if (IS_SWAPFILE(inode_in) || IS_SWAPFILE(inode_out))
  		return -ETXTBSY;
  
  	/* Don't reflink dirs, pipes, sockets... */
  	if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode))
  		return -EISDIR;
  	if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode))
  		return -EINVAL;
876bec6f9   Darrick J. Wong   vfs: refactor clo...
1931
1932
  	/* Zero length dedupe exits immediately; reflink goes to EOF. */
  	if (*len == 0) {
1383a7ed6   Darrick J. Wong   vfs: check file r...
1933
  		loff_t isize = i_size_read(inode_in);
a91ae49bb   Darrick J. Wong   vfs: pass remap f...
1934
  		if ((remap_flags & REMAP_FILE_DEDUP) || pos_in == isize)
876bec6f9   Darrick J. Wong   vfs: refactor clo...
1935
  			return 0;
22725ce4e   Darrick J. Wong   vfs: fix isize/po...
1936
1937
  		if (pos_in > isize)
  			return -EINVAL;
876bec6f9   Darrick J. Wong   vfs: refactor clo...
1938
  		*len = isize - pos_in;
2c5773f10   Darrick J. Wong   vfs: exit early f...
1939
1940
  		if (*len == 0)
  			return 0;
876bec6f9   Darrick J. Wong   vfs: refactor clo...
1941
  	}
1383a7ed6   Darrick J. Wong   vfs: check file r...
1942
1943
  	/* Check that we don't violate system file offset limits. */
  	ret = generic_remap_checks(file_in, pos_in, file_out, pos_out, len,
3d28193e1   Darrick J. Wong   vfs: pass remap f...
1944
  			remap_flags);
1383a7ed6   Darrick J. Wong   vfs: check file r...
1945
1946
  	if (ret)
  		return ret;
876bec6f9   Darrick J. Wong   vfs: refactor clo...
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
  
  	/* Wait for the completion of any pending IOs on both files */
  	inode_dio_wait(inode_in);
  	if (!same_inode)
  		inode_dio_wait(inode_out);
  
  	ret = filemap_write_and_wait_range(inode_in->i_mapping,
  			pos_in, pos_in + *len - 1);
  	if (ret)
  		return ret;
  
  	ret = filemap_write_and_wait_range(inode_out->i_mapping,
  			pos_out, pos_out + *len - 1);
  	if (ret)
  		return ret;
  
  	/*
  	 * Check that the extents are the same.
  	 */
a91ae49bb   Darrick J. Wong   vfs: pass remap f...
1966
  	if (remap_flags & REMAP_FILE_DEDUP) {
876bec6f9   Darrick J. Wong   vfs: refactor clo...
1967
1968
1969
1970
1971
1972
1973
1974
1975
  		bool		is_same = false;
  
  		ret = vfs_dedupe_file_range_compare(inode_in, pos_in,
  				inode_out, pos_out, *len, &is_same);
  		if (ret)
  			return ret;
  		if (!is_same)
  			return -EBADE;
  	}
07d19dc9f   Darrick J. Wong   vfs: avoid proble...
1976
  	ret = generic_remap_check_len(inode_in, inode_out, pos_out, len,
a91ae49bb   Darrick J. Wong   vfs: pass remap f...
1977
  			remap_flags);
07d19dc9f   Darrick J. Wong   vfs: avoid proble...
1978
1979
  	if (ret)
  		return ret;
8dde90bca   Darrick J. Wong   vfs: remap helper...
1980
  	/* If can't alter the file contents, we're done. */
e38f7f53c   Amir Goldstein   vfs: introduce fi...
1981
1982
  	if (!(remap_flags & REMAP_FILE_DEDUP))
  		ret = file_modified(file_out);
8dde90bca   Darrick J. Wong   vfs: remap helper...
1983

e38f7f53c   Amir Goldstein   vfs: introduce fi...
1984
  	return ret;
876bec6f9   Darrick J. Wong   vfs: refactor clo...
1985
  }
a83ab01a6   Darrick J. Wong   vfs: rename vfs_c...
1986
  EXPORT_SYMBOL(generic_remap_file_range_prep);
876bec6f9   Darrick J. Wong   vfs: refactor clo...
1987

42ec3d4c0   Darrick J. Wong   vfs: make remap_f...
1988
  loff_t do_clone_file_range(struct file *file_in, loff_t pos_in,
452ce6595   Darrick J. Wong   vfs: plumb remap ...
1989
1990
  			   struct file *file_out, loff_t pos_out,
  			   loff_t len, unsigned int remap_flags)
04b38d601   Christoph Hellwig   vfs: pull btrfs c...
1991
  {
42ec3d4c0   Darrick J. Wong   vfs: make remap_f...
1992
  	loff_t ret;
04b38d601   Christoph Hellwig   vfs: pull btrfs c...
1993

6744557b5   Darrick J. Wong   vfs: allow some r...
1994
  	WARN_ON_ONCE(remap_flags & REMAP_FILE_DEDUP);
04b38d601   Christoph Hellwig   vfs: pull btrfs c...
1995

913b86e92   Amir Goldstein   vfs: allow vfs_cl...
1996
1997
1998
1999
2000
  	/*
  	 * FICLONE/FICLONERANGE ioctls enforce that src and dest files are on
  	 * the same mount. Practically, they only need to be on the same file
  	 * system.
  	 */
a31713517   Amir Goldstein   vfs: introduce ge...
2001
  	if (file_inode(file_in)->i_sb != file_inode(file_out)->i_sb)
04b38d601   Christoph Hellwig   vfs: pull btrfs c...
2002
  		return -EXDEV;
a31713517   Amir Goldstein   vfs: introduce ge...
2003
2004
2005
  	ret = generic_file_rw_checks(file_in, file_out);
  	if (ret < 0)
  		return ret;
04b38d601   Christoph Hellwig   vfs: pull btrfs c...
2006

2e5dfc99f   Darrick J. Wong   vfs: combine the ...
2007
  	if (!file_in->f_op->remap_file_range)
0fcbf996d   Christoph Hellwig   fs: return -EOPNO...
2008
  		return -EOPNOTSUPP;
6095028b4   Darrick J. Wong   vfs: rename clone...
2009
  	ret = remap_verify_area(file_in, pos_in, len, false);
04b38d601   Christoph Hellwig   vfs: pull btrfs c...
2010
2011
  	if (ret)
  		return ret;
6095028b4   Darrick J. Wong   vfs: rename clone...
2012
  	ret = remap_verify_area(file_out, pos_out, len, true);
04b38d601   Christoph Hellwig   vfs: pull btrfs c...
2013
2014
  	if (ret)
  		return ret;
2e5dfc99f   Darrick J. Wong   vfs: combine the ...
2015
  	ret = file_in->f_op->remap_file_range(file_in, pos_in,
452ce6595   Darrick J. Wong   vfs: plumb remap ...
2016
  			file_out, pos_out, len, remap_flags);
42ec3d4c0   Darrick J. Wong   vfs: make remap_f...
2017
2018
  	if (ret < 0)
  		return ret;
04b38d601   Christoph Hellwig   vfs: pull btrfs c...
2019

42ec3d4c0   Darrick J. Wong   vfs: make remap_f...
2020
2021
  	fsnotify_access(file_in);
  	fsnotify_modify(file_out);
04b38d601   Christoph Hellwig   vfs: pull btrfs c...
2022
2023
  	return ret;
  }
a725356b6   Amir Goldstein   vfs: swap names o...
2024
  EXPORT_SYMBOL(do_clone_file_range);
42ec3d4c0   Darrick J. Wong   vfs: make remap_f...
2025
  loff_t vfs_clone_file_range(struct file *file_in, loff_t pos_in,
452ce6595   Darrick J. Wong   vfs: plumb remap ...
2026
2027
  			    struct file *file_out, loff_t pos_out,
  			    loff_t len, unsigned int remap_flags)
a725356b6   Amir Goldstein   vfs: swap names o...
2028
  {
42ec3d4c0   Darrick J. Wong   vfs: make remap_f...
2029
  	loff_t ret;
a725356b6   Amir Goldstein   vfs: swap names o...
2030
2031
  
  	file_start_write(file_out);
452ce6595   Darrick J. Wong   vfs: plumb remap ...
2032
2033
  	ret = do_clone_file_range(file_in, pos_in, file_out, pos_out, len,
  				  remap_flags);
a725356b6   Amir Goldstein   vfs: swap names o...
2034
2035
2036
2037
  	file_end_write(file_out);
  
  	return ret;
  }
04b38d601   Christoph Hellwig   vfs: pull btrfs c...
2038
  EXPORT_SYMBOL(vfs_clone_file_range);
54dbc1517   Darrick J. Wong   vfs: hoist the bt...
2039

5de4480ae   Mark Fasheh   vfs: allow dedupe...
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
  /* Check whether we are allowed to dedupe the destination file */
  static bool allow_file_dedupe(struct file *file)
  {
  	if (capable(CAP_SYS_ADMIN))
  		return true;
  	if (file->f_mode & FMODE_WRITE)
  		return true;
  	if (uid_eq(current_fsuid(), file_inode(file)->i_uid))
  		return true;
  	if (!inode_permission(file_inode(file), MAY_WRITE))
  		return true;
  	return false;
  }
42ec3d4c0   Darrick J. Wong   vfs: make remap_f...
2053
2054
  loff_t vfs_dedupe_file_range_one(struct file *src_file, loff_t src_pos,
  				 struct file *dst_file, loff_t dst_pos,
df3658361   Darrick J. Wong   vfs: plumb remap ...
2055
  				 loff_t len, unsigned int remap_flags)
1b4f42a1e   Miklos Szeredi   vfs: dedupe: extr...
2056
  {
42ec3d4c0   Darrick J. Wong   vfs: make remap_f...
2057
  	loff_t ret;
1b4f42a1e   Miklos Szeredi   vfs: dedupe: extr...
2058

eca3654e3   Darrick J. Wong   vfs: enable remap...
2059
2060
  	WARN_ON_ONCE(remap_flags & ~(REMAP_FILE_DEDUP |
  				     REMAP_FILE_CAN_SHORTEN));
1b4f42a1e   Miklos Szeredi   vfs: dedupe: extr...
2061
2062
2063
2064
  
  	ret = mnt_want_write_file(dst_file);
  	if (ret)
  		return ret;
6095028b4   Darrick J. Wong   vfs: rename clone...
2065
  	ret = remap_verify_area(dst_file, dst_pos, len, true);
1b4f42a1e   Miklos Szeredi   vfs: dedupe: extr...
2066
2067
  	if (ret < 0)
  		goto out_drop_write;
85c95f208   Mark Fasheh   vfs: dedupe shoul...
2068
  	ret = -EPERM;
5de4480ae   Mark Fasheh   vfs: allow dedupe...
2069
  	if (!allow_file_dedupe(dst_file))
1b4f42a1e   Miklos Szeredi   vfs: dedupe: extr...
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
  		goto out_drop_write;
  
  	ret = -EXDEV;
  	if (src_file->f_path.mnt != dst_file->f_path.mnt)
  		goto out_drop_write;
  
  	ret = -EISDIR;
  	if (S_ISDIR(file_inode(dst_file)->i_mode))
  		goto out_drop_write;
  
  	ret = -EINVAL;
2e5dfc99f   Darrick J. Wong   vfs: combine the ...
2081
  	if (!dst_file->f_op->remap_file_range)
1b4f42a1e   Miklos Szeredi   vfs: dedupe: extr...
2082
  		goto out_drop_write;
9aae20500   Darrick J. Wong   vfs: skip zero-le...
2083
2084
2085
2086
  	if (len == 0) {
  		ret = 0;
  		goto out_drop_write;
  	}
2e5dfc99f   Darrick J. Wong   vfs: combine the ...
2087
  	ret = dst_file->f_op->remap_file_range(src_file, src_pos, dst_file,
df3658361   Darrick J. Wong   vfs: plumb remap ...
2088
  			dst_pos, len, remap_flags | REMAP_FILE_DEDUP);
1b4f42a1e   Miklos Szeredi   vfs: dedupe: extr...
2089
2090
2091
2092
2093
  out_drop_write:
  	mnt_drop_write_file(dst_file);
  
  	return ret;
  }
f18253668   Miklos Szeredi   vfs: export vfs_d...
2094
  EXPORT_SYMBOL(vfs_dedupe_file_range_one);
1b4f42a1e   Miklos Szeredi   vfs: dedupe: extr...
2095

54dbc1517   Darrick J. Wong   vfs: hoist the bt...
2096
2097
2098
2099
2100
2101
2102
2103
  int vfs_dedupe_file_range(struct file *file, struct file_dedupe_range *same)
  {
  	struct file_dedupe_range_info *info;
  	struct inode *src = file_inode(file);
  	u64 off;
  	u64 len;
  	int i;
  	int ret;
54dbc1517   Darrick J. Wong   vfs: hoist the bt...
2104
  	u16 count = same->dest_count;
42ec3d4c0   Darrick J. Wong   vfs: make remap_f...
2105
  	loff_t deduped;
54dbc1517   Darrick J. Wong   vfs: hoist the bt...
2106
2107
2108
2109
2110
2111
2112
2113
2114
  
  	if (!(file->f_mode & FMODE_READ))
  		return -EINVAL;
  
  	if (same->reserved1 || same->reserved2)
  		return -EINVAL;
  
  	off = same->src_offset;
  	len = same->src_length;
54dbc1517   Darrick J. Wong   vfs: hoist the bt...
2115
  	if (S_ISDIR(src->i_mode))
494633fac   Dave Chinner   vfs: vfs_dedupe_f...
2116
  		return -EISDIR;
54dbc1517   Darrick J. Wong   vfs: hoist the bt...
2117

54dbc1517   Darrick J. Wong   vfs: hoist the bt...
2118
  	if (!S_ISREG(src->i_mode))
494633fac   Dave Chinner   vfs: vfs_dedupe_f...
2119
2120
2121
2122
  		return -EINVAL;
  
  	if (!file->f_op->remap_file_range)
  		return -EOPNOTSUPP;
54dbc1517   Darrick J. Wong   vfs: hoist the bt...
2123

6095028b4   Darrick J. Wong   vfs: rename clone...
2124
  	ret = remap_verify_area(file, off, len, false);
54dbc1517   Darrick J. Wong   vfs: hoist the bt...
2125
  	if (ret < 0)
494633fac   Dave Chinner   vfs: vfs_dedupe_f...
2126
  		return ret;
54dbc1517   Darrick J. Wong   vfs: hoist the bt...
2127
  	ret = 0;
22725ce4e   Darrick J. Wong   vfs: fix isize/po...
2128
2129
  	if (off + len > i_size_read(src))
  		return -EINVAL;
92b66d2cd   Miklos Szeredi   vfs: limit size o...
2130
2131
  	/* Arbitrary 1G limit on a single dedupe request, can be raised. */
  	len = min_t(u64, len, 1 << 30);
54dbc1517   Darrick J. Wong   vfs: hoist the bt...
2132
2133
2134
2135
2136
2137
2138
  	/* pre-format output fields to sane values */
  	for (i = 0; i < count; i++) {
  		same->info[i].bytes_deduped = 0ULL;
  		same->info[i].status = FILE_DEDUPE_RANGE_SAME;
  	}
  
  	for (i = 0, info = same->info; i < count; i++, info++) {
54dbc1517   Darrick J. Wong   vfs: hoist the bt...
2139
  		struct fd dst_fd = fdget(info->dest_fd);
1b4f42a1e   Miklos Szeredi   vfs: dedupe: extr...
2140
  		struct file *dst_file = dst_fd.file;
54dbc1517   Darrick J. Wong   vfs: hoist the bt...
2141

54dbc1517   Darrick J. Wong   vfs: hoist the bt...
2142
2143
2144
2145
  		if (!dst_file) {
  			info->status = -EBADF;
  			goto next_loop;
  		}
54dbc1517   Darrick J. Wong   vfs: hoist the bt...
2146
2147
2148
  
  		if (info->reserved) {
  			info->status = -EINVAL;
1b4f42a1e   Miklos Szeredi   vfs: dedupe: extr...
2149
  			goto next_fdput;
54dbc1517   Darrick J. Wong   vfs: hoist the bt...
2150
  		}
1b4f42a1e   Miklos Szeredi   vfs: dedupe: extr...
2151
  		deduped = vfs_dedupe_file_range_one(file, off, dst_file,
df3658361   Darrick J. Wong   vfs: plumb remap ...
2152
  						    info->dest_offset, len,
eca3654e3   Darrick J. Wong   vfs: enable remap...
2153
  						    REMAP_FILE_CAN_SHORTEN);
1b4f42a1e   Miklos Szeredi   vfs: dedupe: extr...
2154
2155
2156
2157
2158
2159
  		if (deduped == -EBADE)
  			info->status = FILE_DEDUPE_RANGE_DIFFERS;
  		else if (deduped < 0)
  			info->status = deduped;
  		else
  			info->bytes_deduped = len;
227627114   Zev Weiss   fs: avoid fdput()...
2160
  next_fdput:
54dbc1517   Darrick J. Wong   vfs: hoist the bt...
2161
  		fdput(dst_fd);
227627114   Zev Weiss   fs: avoid fdput()...
2162
  next_loop:
e62e560fc   Darrick J. Wong   vfs: abort dedupe...
2163
  		if (fatal_signal_pending(current))
494633fac   Dave Chinner   vfs: vfs_dedupe_f...
2164
  			break;
54dbc1517   Darrick J. Wong   vfs: hoist the bt...
2165
  	}
54dbc1517   Darrick J. Wong   vfs: hoist the bt...
2166
2167
2168
  	return ret;
  }
  EXPORT_SYMBOL(vfs_dedupe_file_range);