Blame view

fs/read_write.c 40.6 KB
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1
2
3
4
5
6
7
8
9
10
11
  /*
   *  linux/fs/read_write.c
   *
   *  Copyright (C) 1991, 1992  Linus Torvalds
   */
  
  #include <linux/slab.h> 
  #include <linux/stat.h>
  #include <linux/fcntl.h>
  #include <linux/file.h>
  #include <linux/uio.h>
0eeca2830   Robert Love   [PATCH] inotify
12
  #include <linux/fsnotify.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
13
  #include <linux/security.h>
630d9c472   Paul Gortmaker   fs: reduce the us...
14
  #include <linux/export.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
15
  #include <linux/syscalls.h>
e28cc7157   Linus Torvalds   Relax the rw_veri...
16
  #include <linux/pagemap.h>
d6b29d7ce   Jens Axboe   splice: divorce t...
17
  #include <linux/splice.h>
561c67319   Al Viro   switch lseek to C...
18
  #include <linux/compat.h>
29732938a   Zach Brown   vfs: add copy_fil...
19
  #include <linux/mount.h>
2feb55f89   Wouter van Kesteren   fs: allow no_seek...
20
  #include <linux/fs.h>
06ae43f34   Al Viro   Don't bother with...
21
  #include "internal.h"
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
22
23
24
  
  #include <asm/uaccess.h>
  #include <asm/unistd.h>
c0bd14af5   Al Viro   kill fs/read_write.h
25
  typedef ssize_t (*io_fn_t)(struct file *, char __user *, size_t, loff_t *);
293bc9822   Al Viro   new methods: ->re...
26
  typedef ssize_t (*iter_fn_t)(struct kiocb *, struct iov_iter *);
c0bd14af5   Al Viro   kill fs/read_write.h
27

4b6f5d20b   Arjan van de Ven   [PATCH] Make most...
28
  const struct file_operations generic_ro_fops = {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
29
  	.llseek		= generic_file_llseek,
aad4f8bb4   Al Viro   switch simple gen...
30
  	.read_iter	= generic_file_read_iter,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
31
  	.mmap		= generic_file_readonly_mmap,
534f2aaa6   Jens Axboe   sys_sendfile: swi...
32
  	.splice_read	= generic_file_splice_read,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
33
34
35
  };
  
  EXPORT_SYMBOL(generic_ro_fops);
cccb5a1e6   Al Viro   fix signedness me...
36
  static inline int unsigned_offsets(struct file *file)
4a3956c79   KAMEZAWA Hiroyuki   vfs: introduce FM...
37
  {
cccb5a1e6   Al Viro   fix signedness me...
38
  	return file->f_mode & FMODE_UNSIGNED_OFFSET;
4a3956c79   KAMEZAWA Hiroyuki   vfs: introduce FM...
39
  }
46a1c2c7a   Jie Liu   vfs: export lseek...
40
41
42
43
44
45
46
47
48
49
50
51
52
  /**
   * vfs_setpos - update the file offset for lseek
   * @file:	file structure in question
   * @offset:	file offset to seek to
   * @maxsize:	maximum file size
   *
   * This is a low-level filesystem helper for updating the file offset to
   * the value specified by @offset if the given offset is valid and it is
   * not equal to the current file offset.
   *
   * Return the specified offset on success and -EINVAL on invalid offset.
   */
  loff_t vfs_setpos(struct file *file, loff_t offset, loff_t maxsize)
ef3d0fd27   Andi Kleen   vfs: do (nearly) ...
53
54
55
56
57
58
59
60
61
62
63
64
  {
  	if (offset < 0 && !unsigned_offsets(file))
  		return -EINVAL;
  	if (offset > maxsize)
  		return -EINVAL;
  
  	if (offset != file->f_pos) {
  		file->f_pos = offset;
  		file->f_version = 0;
  	}
  	return offset;
  }
46a1c2c7a   Jie Liu   vfs: export lseek...
65
  EXPORT_SYMBOL(vfs_setpos);
ef3d0fd27   Andi Kleen   vfs: do (nearly) ...
66

3a8cff4f0   Christoph Hellwig   [PATCH] generic_f...
67
  /**
5760495a8   Andi Kleen   vfs: add generic_...
68
   * generic_file_llseek_size - generic llseek implementation for regular files
3a8cff4f0   Christoph Hellwig   [PATCH] generic_f...
69
70
   * @file:	file structure to seek on
   * @offset:	file offset to seek to
965c8e59c   Andrew Morton   lseek: the "whenc...
71
   * @whence:	type of seek
e8b96eb50   Eric Sandeen   vfs: allow custom...
72
73
   * @size:	max size of this file in file system
   * @eof:	offset used for SEEK_END position
3a8cff4f0   Christoph Hellwig   [PATCH] generic_f...
74
   *
5760495a8   Andi Kleen   vfs: add generic_...
75
   * This is a variant of generic_file_llseek that allows passing in a custom
e8b96eb50   Eric Sandeen   vfs: allow custom...
76
   * maximum file size and a custom EOF position, for e.g. hashed directories
ef3d0fd27   Andi Kleen   vfs: do (nearly) ...
77
78
   *
   * Synchronization:
5760495a8   Andi Kleen   vfs: add generic_...
79
   * SEEK_SET and SEEK_END are unsynchronized (but atomic on 64bit platforms)
ef3d0fd27   Andi Kleen   vfs: do (nearly) ...
80
81
   * SEEK_CUR is synchronized against other SEEK_CURs, but not read/writes.
   * read/writes behave like SEEK_SET against seeks.
3a8cff4f0   Christoph Hellwig   [PATCH] generic_f...
82
   */
9465efc9e   Andi Kleen   Remove BKL from r...
83
  loff_t
965c8e59c   Andrew Morton   lseek: the "whenc...
84
  generic_file_llseek_size(struct file *file, loff_t offset, int whence,
e8b96eb50   Eric Sandeen   vfs: allow custom...
85
  		loff_t maxsize, loff_t eof)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
86
  {
965c8e59c   Andrew Morton   lseek: the "whenc...
87
  	switch (whence) {
3a8cff4f0   Christoph Hellwig   [PATCH] generic_f...
88
  	case SEEK_END:
e8b96eb50   Eric Sandeen   vfs: allow custom...
89
  		offset += eof;
3a8cff4f0   Christoph Hellwig   [PATCH] generic_f...
90
91
  		break;
  	case SEEK_CUR:
5b6f1eb97   Alain Knaff   vfs: lseek(fd, 0,...
92
93
94
95
96
97
98
99
  		/*
  		 * Here we special-case the lseek(fd, 0, SEEK_CUR)
  		 * position-querying operation.  Avoid rewriting the "same"
  		 * f_pos value back to the file because a concurrent read(),
  		 * write() or lseek() might have altered it
  		 */
  		if (offset == 0)
  			return file->f_pos;
ef3d0fd27   Andi Kleen   vfs: do (nearly) ...
100
101
102
103
104
105
  		/*
  		 * f_lock protects against read/modify/write race with other
  		 * SEEK_CURs. Note that parallel writes and reads behave
  		 * like SEEK_SET.
  		 */
  		spin_lock(&file->f_lock);
46a1c2c7a   Jie Liu   vfs: export lseek...
106
  		offset = vfs_setpos(file, file->f_pos + offset, maxsize);
ef3d0fd27   Andi Kleen   vfs: do (nearly) ...
107
108
  		spin_unlock(&file->f_lock);
  		return offset;
982d81658   Josef Bacik   fs: add SEEK_HOLE...
109
110
111
112
113
  	case SEEK_DATA:
  		/*
  		 * In the generic case the entire file is data, so as long as
  		 * offset isn't at the end of the file then the offset is data.
  		 */
e8b96eb50   Eric Sandeen   vfs: allow custom...
114
  		if (offset >= eof)
982d81658   Josef Bacik   fs: add SEEK_HOLE...
115
116
117
118
119
120
121
  			return -ENXIO;
  		break;
  	case SEEK_HOLE:
  		/*
  		 * There is a virtual hole at the end of the file, so as long as
  		 * offset isn't i_size or larger, return i_size.
  		 */
e8b96eb50   Eric Sandeen   vfs: allow custom...
122
  		if (offset >= eof)
982d81658   Josef Bacik   fs: add SEEK_HOLE...
123
  			return -ENXIO;
e8b96eb50   Eric Sandeen   vfs: allow custom...
124
  		offset = eof;
982d81658   Josef Bacik   fs: add SEEK_HOLE...
125
  		break;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
126
  	}
3a8cff4f0   Christoph Hellwig   [PATCH] generic_f...
127

46a1c2c7a   Jie Liu   vfs: export lseek...
128
  	return vfs_setpos(file, offset, maxsize);
5760495a8   Andi Kleen   vfs: add generic_...
129
130
131
132
133
134
135
  }
  EXPORT_SYMBOL(generic_file_llseek_size);
  
  /**
   * generic_file_llseek - generic llseek implementation for regular files
   * @file:	file structure to seek on
   * @offset:	file offset to seek to
965c8e59c   Andrew Morton   lseek: the "whenc...
136
   * @whence:	type of seek
5760495a8   Andi Kleen   vfs: add generic_...
137
138
139
   *
   * This is a generic implemenation of ->llseek useable for all normal local
   * filesystems.  It just updates the file offset to the value specified by
546ae2d2f   Ming Lei   fs/read_write.c: ...
140
   * @offset and @whence.
5760495a8   Andi Kleen   vfs: add generic_...
141
   */
965c8e59c   Andrew Morton   lseek: the "whenc...
142
  loff_t generic_file_llseek(struct file *file, loff_t offset, int whence)
5760495a8   Andi Kleen   vfs: add generic_...
143
144
  {
  	struct inode *inode = file->f_mapping->host;
965c8e59c   Andrew Morton   lseek: the "whenc...
145
  	return generic_file_llseek_size(file, offset, whence,
e8b96eb50   Eric Sandeen   vfs: allow custom...
146
147
  					inode->i_sb->s_maxbytes,
  					i_size_read(inode));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
148
  }
9465efc9e   Andi Kleen   Remove BKL from r...
149
  EXPORT_SYMBOL(generic_file_llseek);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
150

ae6afc3f5   jan Blunck   vfs: introduce no...
151
  /**
1bf9d14df   Al Viro   new helper: fixed...
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
   * fixed_size_llseek - llseek implementation for fixed-sized devices
   * @file:	file structure to seek on
   * @offset:	file offset to seek to
   * @whence:	type of seek
   * @size:	size of the file
   *
   */
  loff_t fixed_size_llseek(struct file *file, loff_t offset, int whence, loff_t size)
  {
  	switch (whence) {
  	case SEEK_SET: case SEEK_CUR: case SEEK_END:
  		return generic_file_llseek_size(file, offset, whence,
  						size, size);
  	default:
  		return -EINVAL;
  	}
  }
  EXPORT_SYMBOL(fixed_size_llseek);
  
  /**
b25472f9b   Al Viro   new helpers: no_s...
172
173
174
175
176
177
178
179
180
181
182
   * no_seek_end_llseek - llseek implementation for fixed-sized devices
   * @file:	file structure to seek on
   * @offset:	file offset to seek to
   * @whence:	type of seek
   *
   */
  loff_t no_seek_end_llseek(struct file *file, loff_t offset, int whence)
  {
  	switch (whence) {
  	case SEEK_SET: case SEEK_CUR:
  		return generic_file_llseek_size(file, offset, whence,
2feb55f89   Wouter van Kesteren   fs: allow no_seek...
183
  						OFFSET_MAX, 0);
b25472f9b   Al Viro   new helpers: no_s...
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
  	default:
  		return -EINVAL;
  	}
  }
  EXPORT_SYMBOL(no_seek_end_llseek);
  
  /**
   * no_seek_end_llseek_size - llseek implementation for fixed-sized devices
   * @file:	file structure to seek on
   * @offset:	file offset to seek to
   * @whence:	type of seek
   * @size:	maximal offset allowed
   *
   */
  loff_t no_seek_end_llseek_size(struct file *file, loff_t offset, int whence, loff_t size)
  {
  	switch (whence) {
  	case SEEK_SET: case SEEK_CUR:
  		return generic_file_llseek_size(file, offset, whence,
  						size, 0);
  	default:
  		return -EINVAL;
  	}
  }
  EXPORT_SYMBOL(no_seek_end_llseek_size);
  
  /**
ae6afc3f5   jan Blunck   vfs: introduce no...
211
212
213
   * noop_llseek - No Operation Performed llseek implementation
   * @file:	file structure to seek on
   * @offset:	file offset to seek to
965c8e59c   Andrew Morton   lseek: the "whenc...
214
   * @whence:	type of seek
ae6afc3f5   jan Blunck   vfs: introduce no...
215
216
217
218
219
220
   *
   * This is an implementation of ->llseek useable for the rare special case when
   * userspace expects the seek to succeed but the (device) file is actually not
   * able to perform the seek. In this case you use noop_llseek() instead of
   * falling back to the default implementation of ->llseek.
   */
965c8e59c   Andrew Morton   lseek: the "whenc...
221
  loff_t noop_llseek(struct file *file, loff_t offset, int whence)
ae6afc3f5   jan Blunck   vfs: introduce no...
222
223
224
225
  {
  	return file->f_pos;
  }
  EXPORT_SYMBOL(noop_llseek);
965c8e59c   Andrew Morton   lseek: the "whenc...
226
  loff_t no_llseek(struct file *file, loff_t offset, int whence)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
227
228
229
230
  {
  	return -ESPIPE;
  }
  EXPORT_SYMBOL(no_llseek);
965c8e59c   Andrew Morton   lseek: the "whenc...
231
  loff_t default_llseek(struct file *file, loff_t offset, int whence)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
232
  {
496ad9aa8   Al Viro   new helper: file_...
233
  	struct inode *inode = file_inode(file);
16abef0e9   David Sterba   fs: use loff_t ty...
234
  	loff_t retval;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
235

5955102c9   Al Viro   wrappers for ->i_...
236
  	inode_lock(inode);
965c8e59c   Andrew Morton   lseek: the "whenc...
237
  	switch (whence) {
7b8e89249   Chris Snook   use symbolic cons...
238
  		case SEEK_END:
982d81658   Josef Bacik   fs: add SEEK_HOLE...
239
  			offset += i_size_read(inode);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
240
  			break;
7b8e89249   Chris Snook   use symbolic cons...
241
  		case SEEK_CUR:
5b6f1eb97   Alain Knaff   vfs: lseek(fd, 0,...
242
243
244
245
  			if (offset == 0) {
  				retval = file->f_pos;
  				goto out;
  			}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
246
  			offset += file->f_pos;
982d81658   Josef Bacik   fs: add SEEK_HOLE...
247
248
249
250
251
252
253
  			break;
  		case SEEK_DATA:
  			/*
  			 * In the generic case the entire file is data, so as
  			 * long as offset isn't at the end of the file then the
  			 * offset is data.
  			 */
bacb2d816   Dan Carpenter   fs: add missing u...
254
255
256
257
  			if (offset >= inode->i_size) {
  				retval = -ENXIO;
  				goto out;
  			}
982d81658   Josef Bacik   fs: add SEEK_HOLE...
258
259
260
261
262
263
264
  			break;
  		case SEEK_HOLE:
  			/*
  			 * There is a virtual hole at the end of the file, so
  			 * as long as offset isn't i_size or larger, return
  			 * i_size.
  			 */
bacb2d816   Dan Carpenter   fs: add missing u...
265
266
267
268
  			if (offset >= inode->i_size) {
  				retval = -ENXIO;
  				goto out;
  			}
982d81658   Josef Bacik   fs: add SEEK_HOLE...
269
270
  			offset = inode->i_size;
  			break;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
271
272
  	}
  	retval = -EINVAL;
cccb5a1e6   Al Viro   fix signedness me...
273
  	if (offset >= 0 || unsigned_offsets(file)) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
274
275
276
277
278
279
  		if (offset != file->f_pos) {
  			file->f_pos = offset;
  			file->f_version = 0;
  		}
  		retval = offset;
  	}
5b6f1eb97   Alain Knaff   vfs: lseek(fd, 0,...
280
  out:
5955102c9   Al Viro   wrappers for ->i_...
281
  	inode_unlock(inode);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
282
283
284
  	return retval;
  }
  EXPORT_SYMBOL(default_llseek);
965c8e59c   Andrew Morton   lseek: the "whenc...
285
  loff_t vfs_llseek(struct file *file, loff_t offset, int whence)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
286
287
288
289
290
  {
  	loff_t (*fn)(struct file *, loff_t, int);
  
  	fn = no_llseek;
  	if (file->f_mode & FMODE_LSEEK) {
72c2d5319   Al Viro   file->f_op is nev...
291
  		if (file->f_op->llseek)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
292
293
  			fn = file->f_op->llseek;
  	}
965c8e59c   Andrew Morton   lseek: the "whenc...
294
  	return fn(file, offset, whence);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
295
296
  }
  EXPORT_SYMBOL(vfs_llseek);
965c8e59c   Andrew Morton   lseek: the "whenc...
297
  SYSCALL_DEFINE3(lseek, unsigned int, fd, off_t, offset, unsigned int, whence)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
298
299
  {
  	off_t retval;
9c225f265   Linus Torvalds   vfs: atomic f_pos...
300
  	struct fd f = fdget_pos(fd);
2903ff019   Al Viro   switch simple cas...
301
302
  	if (!f.file)
  		return -EBADF;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
303
304
  
  	retval = -EINVAL;
965c8e59c   Andrew Morton   lseek: the "whenc...
305
306
  	if (whence <= SEEK_MAX) {
  		loff_t res = vfs_llseek(f.file, offset, whence);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
307
308
309
310
  		retval = res;
  		if (res != (loff_t)retval)
  			retval = -EOVERFLOW;	/* LFS: should only happen on 32 bit platforms */
  	}
9c225f265   Linus Torvalds   vfs: atomic f_pos...
311
  	fdput_pos(f);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
312
313
  	return retval;
  }
561c67319   Al Viro   switch lseek to C...
314
315
316
317
318
319
  #ifdef CONFIG_COMPAT
  COMPAT_SYSCALL_DEFINE3(lseek, unsigned int, fd, compat_off_t, offset, unsigned int, whence)
  {
  	return sys_lseek(fd, offset, whence);
  }
  #endif
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
320
  #ifdef __ARCH_WANT_SYS_LLSEEK
003d7ab47   Heiko Carstens   [CVE-2009-0029] S...
321
322
  SYSCALL_DEFINE5(llseek, unsigned int, fd, unsigned long, offset_high,
  		unsigned long, offset_low, loff_t __user *, result,
965c8e59c   Andrew Morton   lseek: the "whenc...
323
  		unsigned int, whence)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
324
325
  {
  	int retval;
d7a15f8d0   Eric Biggers   vfs: atomic f_pos...
326
  	struct fd f = fdget_pos(fd);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
327
  	loff_t offset;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
328

2903ff019   Al Viro   switch simple cas...
329
330
  	if (!f.file)
  		return -EBADF;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
331
332
  
  	retval = -EINVAL;
965c8e59c   Andrew Morton   lseek: the "whenc...
333
  	if (whence > SEEK_MAX)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
334
  		goto out_putf;
2903ff019   Al Viro   switch simple cas...
335
  	offset = vfs_llseek(f.file, ((loff_t) offset_high << 32) | offset_low,
965c8e59c   Andrew Morton   lseek: the "whenc...
336
  			whence);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
337
338
339
340
341
342
343
344
  
  	retval = (int)offset;
  	if (offset >= 0) {
  		retval = -EFAULT;
  		if (!copy_to_user(result, &offset, sizeof(offset)))
  			retval = 0;
  	}
  out_putf:
d7a15f8d0   Eric Biggers   vfs: atomic f_pos...
345
  	fdput_pos(f);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
346
347
348
  	return retval;
  }
  #endif
dbe4e192a   Christoph Hellwig   fs: add vfs_iter_...
349
350
351
352
353
354
355
356
357
358
  ssize_t vfs_iter_read(struct file *file, struct iov_iter *iter, loff_t *ppos)
  {
  	struct kiocb kiocb;
  	ssize_t ret;
  
  	if (!file->f_op->read_iter)
  		return -EINVAL;
  
  	init_sync_kiocb(&kiocb, file);
  	kiocb.ki_pos = *ppos;
dbe4e192a   Christoph Hellwig   fs: add vfs_iter_...
359
360
361
  
  	iter->type |= READ;
  	ret = file->f_op->read_iter(&kiocb, iter);
599bd19bd   Christoph Hellwig   fs: don't allow t...
362
  	BUG_ON(ret == -EIOCBQUEUED);
dbe4e192a   Christoph Hellwig   fs: add vfs_iter_...
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
  	if (ret > 0)
  		*ppos = kiocb.ki_pos;
  	return ret;
  }
  EXPORT_SYMBOL(vfs_iter_read);
  
  ssize_t vfs_iter_write(struct file *file, struct iov_iter *iter, loff_t *ppos)
  {
  	struct kiocb kiocb;
  	ssize_t ret;
  
  	if (!file->f_op->write_iter)
  		return -EINVAL;
  
  	init_sync_kiocb(&kiocb, file);
  	kiocb.ki_pos = *ppos;
dbe4e192a   Christoph Hellwig   fs: add vfs_iter_...
379
380
381
  
  	iter->type |= WRITE;
  	ret = file->f_op->write_iter(&kiocb, iter);
599bd19bd   Christoph Hellwig   fs: don't allow t...
382
  	BUG_ON(ret == -EIOCBQUEUED);
dbe4e192a   Christoph Hellwig   fs: add vfs_iter_...
383
384
385
386
387
  	if (ret > 0)
  		*ppos = kiocb.ki_pos;
  	return ret;
  }
  EXPORT_SYMBOL(vfs_iter_write);
68d70d03f   Al Viro   constify rw_verif...
388
  int rw_verify_area(int read_write, struct file *file, const loff_t *ppos, size_t count)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
389
390
391
  {
  	struct inode *inode;
  	loff_t pos;
c43e259cc   James Morris   security: call se...
392
  	int retval = -EINVAL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
393

496ad9aa8   Al Viro   new helper: file_...
394
  	inode = file_inode(file);
e28cc7157   Linus Torvalds   Relax the rw_veri...
395
  	if (unlikely((ssize_t) count < 0))
c43e259cc   James Morris   security: call se...
396
  		return retval;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
397
  	pos = *ppos;
cccb5a1e6   Al Viro   fix signedness me...
398
399
400
401
402
403
404
  	if (unlikely(pos < 0)) {
  		if (!unsigned_offsets(file))
  			return retval;
  		if (count >= -pos) /* both values are in 0..LLONG_MAX */
  			return -EOVERFLOW;
  	} else if (unlikely((loff_t) (pos + count) < 0)) {
  		if (!unsigned_offsets(file))
4a3956c79   KAMEZAWA Hiroyuki   vfs: introduce FM...
405
406
  			return retval;
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
407

bd61e0a9c   Jeff Layton   locks: convert po...
408
  	if (unlikely(inode->i_flctx && mandatory_lock(inode))) {
acc15575e   Christoph Hellwig   locks: new locks_...
409
410
  		retval = locks_mandatory_area(inode, file, pos, pos + count - 1,
  				read_write == READ ? F_RDLCK : F_WRLCK);
e28cc7157   Linus Torvalds   Relax the rw_veri...
411
412
413
  		if (retval < 0)
  			return retval;
  	}
bc61384dc   Al Viro   rw_verify_area():...
414
  	return security_file_permission(file,
c43e259cc   James Morris   security: call se...
415
  				read_write == READ ? MAY_READ : MAY_WRITE);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
416
  }
5d5d56897   Al Viro   make new_sync_{re...
417
  static ssize_t new_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos)
293bc9822   Al Viro   new methods: ->re...
418
419
420
421
422
423
424
425
  {
  	struct iovec iov = { .iov_base = buf, .iov_len = len };
  	struct kiocb kiocb;
  	struct iov_iter iter;
  	ssize_t ret;
  
  	init_sync_kiocb(&kiocb, filp);
  	kiocb.ki_pos = *ppos;
293bc9822   Al Viro   new methods: ->re...
426
427
428
  	iov_iter_init(&iter, READ, &iov, 1, len);
  
  	ret = filp->f_op->read_iter(&kiocb, &iter);
599bd19bd   Christoph Hellwig   fs: don't allow t...
429
  	BUG_ON(ret == -EIOCBQUEUED);
293bc9822   Al Viro   new methods: ->re...
430
431
432
  	*ppos = kiocb.ki_pos;
  	return ret;
  }
6fb5032eb   Dmitry Kasatkin   VFS: refactor vfs...
433
434
435
  ssize_t __vfs_read(struct file *file, char __user *buf, size_t count,
  		   loff_t *pos)
  {
6fb5032eb   Dmitry Kasatkin   VFS: refactor vfs...
436
  	if (file->f_op->read)
3d04c8a17   Al Viro   export __vfs_read()
437
  		return file->f_op->read(file, buf, count, pos);
6fb5032eb   Dmitry Kasatkin   VFS: refactor vfs...
438
  	else if (file->f_op->read_iter)
3d04c8a17   Al Viro   export __vfs_read()
439
  		return new_sync_read(file, buf, count, pos);
6fb5032eb   Dmitry Kasatkin   VFS: refactor vfs...
440
  	else
3d04c8a17   Al Viro   export __vfs_read()
441
  		return -EINVAL;
6fb5032eb   Dmitry Kasatkin   VFS: refactor vfs...
442
  }
3d04c8a17   Al Viro   export __vfs_read()
443
  EXPORT_SYMBOL(__vfs_read);
6fb5032eb   Dmitry Kasatkin   VFS: refactor vfs...
444

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
445
446
447
448
449
450
  ssize_t vfs_read(struct file *file, char __user *buf, size_t count, loff_t *pos)
  {
  	ssize_t ret;
  
  	if (!(file->f_mode & FMODE_READ))
  		return -EBADF;
7f7f25e82   Al Viro   replace checking ...
451
  	if (!(file->f_mode & FMODE_CAN_READ))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
452
453
454
455
456
  		return -EINVAL;
  	if (unlikely(!access_ok(VERIFY_WRITE, buf, count)))
  		return -EFAULT;
  
  	ret = rw_verify_area(READ, file, pos, count);
bc61384dc   Al Viro   rw_verify_area():...
457
458
459
  	if (!ret) {
  		if (count > MAX_RW_COUNT)
  			count =  MAX_RW_COUNT;
6fb5032eb   Dmitry Kasatkin   VFS: refactor vfs...
460
  		ret = __vfs_read(file, buf, count, pos);
c43e259cc   James Morris   security: call se...
461
  		if (ret > 0) {
2a12a9d78   Eric Paris   fsnotify: pass a ...
462
  			fsnotify_access(file);
c43e259cc   James Morris   security: call se...
463
  			add_rchar(current, ret);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
464
  		}
c43e259cc   James Morris   security: call se...
465
  		inc_syscr(current);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
466
467
468
469
470
471
  	}
  
  	return ret;
  }
  
  EXPORT_SYMBOL(vfs_read);
5d5d56897   Al Viro   make new_sync_{re...
472
  static ssize_t new_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos)
293bc9822   Al Viro   new methods: ->re...
473
474
475
476
477
478
479
480
  {
  	struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = len };
  	struct kiocb kiocb;
  	struct iov_iter iter;
  	ssize_t ret;
  
  	init_sync_kiocb(&kiocb, filp);
  	kiocb.ki_pos = *ppos;
293bc9822   Al Viro   new methods: ->re...
481
482
483
  	iov_iter_init(&iter, WRITE, &iov, 1, len);
  
  	ret = filp->f_op->write_iter(&kiocb, &iter);
599bd19bd   Christoph Hellwig   fs: don't allow t...
484
  	BUG_ON(ret == -EIOCBQUEUED);
f765b134c   Al Viro   new_sync_write():...
485
486
  	if (ret > 0)
  		*ppos = kiocb.ki_pos;
293bc9822   Al Viro   new methods: ->re...
487
488
  	return ret;
  }
493c84c07   Al Viro   new helper: __vfs...
489
490
491
492
493
  ssize_t __vfs_write(struct file *file, const char __user *p, size_t count,
  		    loff_t *pos)
  {
  	if (file->f_op->write)
  		return file->f_op->write(file, p, count, pos);
493c84c07   Al Viro   new helper: __vfs...
494
495
496
497
498
499
  	else if (file->f_op->write_iter)
  		return new_sync_write(file, p, count, pos);
  	else
  		return -EINVAL;
  }
  EXPORT_SYMBOL(__vfs_write);
06ae43f34   Al Viro   Don't bother with...
500
501
502
503
504
  ssize_t __kernel_write(struct file *file, const char *buf, size_t count, loff_t *pos)
  {
  	mm_segment_t old_fs;
  	const char __user *p;
  	ssize_t ret;
7f7f25e82   Al Viro   replace checking ...
505
  	if (!(file->f_mode & FMODE_CAN_WRITE))
3e84f48ed   Al Viro   vfs/splice: Fix m...
506
  		return -EINVAL;
06ae43f34   Al Viro   Don't bother with...
507
508
509
510
511
  	old_fs = get_fs();
  	set_fs(get_ds());
  	p = (__force const char __user *)buf;
  	if (count > MAX_RW_COUNT)
  		count =  MAX_RW_COUNT;
493c84c07   Al Viro   new helper: __vfs...
512
  	ret = __vfs_write(file, p, count, pos);
06ae43f34   Al Viro   Don't bother with...
513
514
515
516
517
518
519
520
  	set_fs(old_fs);
  	if (ret > 0) {
  		fsnotify_modify(file);
  		add_wchar(current, ret);
  	}
  	inc_syscw(current);
  	return ret;
  }
2ec3a12a6   Al Viro   cachefiles_write_...
521
  EXPORT_SYMBOL(__kernel_write);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
522
523
524
525
526
527
  ssize_t vfs_write(struct file *file, const char __user *buf, size_t count, loff_t *pos)
  {
  	ssize_t ret;
  
  	if (!(file->f_mode & FMODE_WRITE))
  		return -EBADF;
7f7f25e82   Al Viro   replace checking ...
528
  	if (!(file->f_mode & FMODE_CAN_WRITE))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
529
530
531
532
533
  		return -EINVAL;
  	if (unlikely(!access_ok(VERIFY_READ, buf, count)))
  		return -EFAULT;
  
  	ret = rw_verify_area(WRITE, file, pos, count);
bc61384dc   Al Viro   rw_verify_area():...
534
535
536
  	if (!ret) {
  		if (count > MAX_RW_COUNT)
  			count =  MAX_RW_COUNT;
03d95eb2f   Al Viro   lift sb_start_wri...
537
  		file_start_write(file);
493c84c07   Al Viro   new helper: __vfs...
538
  		ret = __vfs_write(file, buf, count, pos);
c43e259cc   James Morris   security: call se...
539
  		if (ret > 0) {
2a12a9d78   Eric Paris   fsnotify: pass a ...
540
  			fsnotify_modify(file);
c43e259cc   James Morris   security: call se...
541
  			add_wchar(current, ret);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
542
  		}
c43e259cc   James Morris   security: call se...
543
  		inc_syscw(current);
03d95eb2f   Al Viro   lift sb_start_wri...
544
  		file_end_write(file);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
  	}
  
  	return ret;
  }
  
  EXPORT_SYMBOL(vfs_write);
  
  static inline loff_t file_pos_read(struct file *file)
  {
  	return file->f_pos;
  }
  
  static inline void file_pos_write(struct file *file, loff_t pos)
  {
  	file->f_pos = pos;
  }
3cdad4288   Heiko Carstens   [CVE-2009-0029] S...
561
  SYSCALL_DEFINE3(read, unsigned int, fd, char __user *, buf, size_t, count)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
562
  {
9c225f265   Linus Torvalds   vfs: atomic f_pos...
563
  	struct fd f = fdget_pos(fd);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
564
  	ssize_t ret = -EBADF;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
565

2903ff019   Al Viro   switch simple cas...
566
567
568
  	if (f.file) {
  		loff_t pos = file_pos_read(f.file);
  		ret = vfs_read(f.file, buf, count, &pos);
5faf153eb   Al Viro   don't call file_p...
569
570
  		if (ret >= 0)
  			file_pos_write(f.file, pos);
9c225f265   Linus Torvalds   vfs: atomic f_pos...
571
  		fdput_pos(f);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
572
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
573
574
  	return ret;
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
575

3cdad4288   Heiko Carstens   [CVE-2009-0029] S...
576
577
  SYSCALL_DEFINE3(write, unsigned int, fd, const char __user *, buf,
  		size_t, count)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
578
  {
9c225f265   Linus Torvalds   vfs: atomic f_pos...
579
  	struct fd f = fdget_pos(fd);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
580
  	ssize_t ret = -EBADF;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
581

2903ff019   Al Viro   switch simple cas...
582
583
584
  	if (f.file) {
  		loff_t pos = file_pos_read(f.file);
  		ret = vfs_write(f.file, buf, count, &pos);
5faf153eb   Al Viro   don't call file_p...
585
586
  		if (ret >= 0)
  			file_pos_write(f.file, pos);
9c225f265   Linus Torvalds   vfs: atomic f_pos...
587
  		fdput_pos(f);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
588
589
590
591
  	}
  
  	return ret;
  }
4a0fd5bf0   Al Viro   teach SYSCALL_DEF...
592
593
  SYSCALL_DEFINE4(pread64, unsigned int, fd, char __user *, buf,
  			size_t, count, loff_t, pos)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
594
  {
2903ff019   Al Viro   switch simple cas...
595
  	struct fd f;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
596
  	ssize_t ret = -EBADF;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
597
598
599
  
  	if (pos < 0)
  		return -EINVAL;
2903ff019   Al Viro   switch simple cas...
600
601
  	f = fdget(fd);
  	if (f.file) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
602
  		ret = -ESPIPE;
2903ff019   Al Viro   switch simple cas...
603
604
605
  		if (f.file->f_mode & FMODE_PREAD)
  			ret = vfs_read(f.file, buf, count, &pos);
  		fdput(f);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
606
607
608
609
  	}
  
  	return ret;
  }
4a0fd5bf0   Al Viro   teach SYSCALL_DEF...
610
611
  SYSCALL_DEFINE4(pwrite64, unsigned int, fd, const char __user *, buf,
  			 size_t, count, loff_t, pos)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
612
  {
2903ff019   Al Viro   switch simple cas...
613
  	struct fd f;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
614
  	ssize_t ret = -EBADF;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
615
616
617
  
  	if (pos < 0)
  		return -EINVAL;
2903ff019   Al Viro   switch simple cas...
618
619
  	f = fdget(fd);
  	if (f.file) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
620
  		ret = -ESPIPE;
2903ff019   Al Viro   switch simple cas...
621
622
623
  		if (f.file->f_mode & FMODE_PWRITE)  
  			ret = vfs_write(f.file, buf, count, &pos);
  		fdput(f);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
  	}
  
  	return ret;
  }
  
  /*
   * Reduce an iovec's length in-place.  Return the resulting number of segments
   */
  unsigned long iov_shorten(struct iovec *iov, unsigned long nr_segs, size_t to)
  {
  	unsigned long seg = 0;
  	size_t len = 0;
  
  	while (seg < nr_segs) {
  		seg++;
  		if (len + iov->iov_len >= to) {
  			iov->iov_len = to - len;
  			break;
  		}
  		len += iov->iov_len;
  		iov++;
  	}
  	return seg;
  }
19295529d   Eric Sandeen   ext4: export iov_...
648
  EXPORT_SYMBOL(iov_shorten);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
649

ac15ac066   Al Viro   lift iov_iter int...
650
  static ssize_t do_iter_readv_writev(struct file *filp, struct iov_iter *iter,
793b80ef1   Christoph Hellwig   vfs: pass a flags...
651
  		loff_t *ppos, iter_fn_t fn, int flags)
293bc9822   Al Viro   new methods: ->re...
652
653
  {
  	struct kiocb kiocb;
293bc9822   Al Viro   new methods: ->re...
654
  	ssize_t ret;
e864f3956   Christoph Hellwig   fs: add RWF_DSYNC...
655
  	if (flags & ~(RWF_HIPRI | RWF_DSYNC | RWF_SYNC))
793b80ef1   Christoph Hellwig   vfs: pass a flags...
656
  		return -EOPNOTSUPP;
293bc9822   Al Viro   new methods: ->re...
657
  	init_sync_kiocb(&kiocb, filp);
97be7ebe5   Christoph Hellwig   vfs: add the RWF_...
658
659
  	if (flags & RWF_HIPRI)
  		kiocb.ki_flags |= IOCB_HIPRI;
e864f3956   Christoph Hellwig   fs: add RWF_DSYNC...
660
661
662
663
  	if (flags & RWF_DSYNC)
  		kiocb.ki_flags |= IOCB_DSYNC;
  	if (flags & RWF_SYNC)
  		kiocb.ki_flags |= (IOCB_DSYNC | IOCB_SYNC);
293bc9822   Al Viro   new methods: ->re...
664
  	kiocb.ki_pos = *ppos;
293bc9822   Al Viro   new methods: ->re...
665

ac15ac066   Al Viro   lift iov_iter int...
666
  	ret = fn(&kiocb, iter);
599bd19bd   Christoph Hellwig   fs: don't allow t...
667
  	BUG_ON(ret == -EIOCBQUEUED);
293bc9822   Al Viro   new methods: ->re...
668
669
670
  	*ppos = kiocb.ki_pos;
  	return ret;
  }
ee0b3e671   Badari Pulavarty   [PATCH] Remove re...
671
  /* Do it by hand, with file-ops */
ac15ac066   Al Viro   lift iov_iter int...
672
  static ssize_t do_loop_readv_writev(struct file *filp, struct iov_iter *iter,
793b80ef1   Christoph Hellwig   vfs: pass a flags...
673
  		loff_t *ppos, io_fn_t fn, int flags)
ee0b3e671   Badari Pulavarty   [PATCH] Remove re...
674
  {
ee0b3e671   Badari Pulavarty   [PATCH] Remove re...
675
  	ssize_t ret = 0;
97be7ebe5   Christoph Hellwig   vfs: add the RWF_...
676
  	if (flags & ~RWF_HIPRI)
793b80ef1   Christoph Hellwig   vfs: pass a flags...
677
  		return -EOPNOTSUPP;
ac15ac066   Al Viro   lift iov_iter int...
678
679
  	while (iov_iter_count(iter)) {
  		struct iovec iovec = iov_iter_iovec(iter);
ee0b3e671   Badari Pulavarty   [PATCH] Remove re...
680
  		ssize_t nr;
ac15ac066   Al Viro   lift iov_iter int...
681
  		nr = fn(filp, iovec.iov_base, iovec.iov_len, ppos);
ee0b3e671   Badari Pulavarty   [PATCH] Remove re...
682
683
684
685
686
687
688
  
  		if (nr < 0) {
  			if (!ret)
  				ret = nr;
  			break;
  		}
  		ret += nr;
ac15ac066   Al Viro   lift iov_iter int...
689
  		if (nr != iovec.iov_len)
ee0b3e671   Badari Pulavarty   [PATCH] Remove re...
690
  			break;
ac15ac066   Al Viro   lift iov_iter int...
691
  		iov_iter_advance(iter, nr);
ee0b3e671   Badari Pulavarty   [PATCH] Remove re...
692
693
694
695
  	}
  
  	return ret;
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
696
697
  /* A write operation does a read from user space and vice versa */
  #define vrfy_dir(type) ((type) == READ ? VERIFY_WRITE : VERIFY_READ)
ffecee4f2   Vegard Nossum   iov_iter: kernel-...
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
  /**
   * rw_copy_check_uvector() - Copy an array of &struct iovec from userspace
   *     into the kernel and check that it is valid.
   *
   * @type: One of %CHECK_IOVEC_ONLY, %READ, or %WRITE.
   * @uvector: Pointer to the userspace array.
   * @nr_segs: Number of elements in userspace array.
   * @fast_segs: Number of elements in @fast_pointer.
   * @fast_pointer: Pointer to (usually small on-stack) kernel array.
   * @ret_pointer: (output parameter) Pointer to a variable that will point to
   *     either @fast_pointer, a newly allocated kernel array, or NULL,
   *     depending on which array was used.
   *
   * This function copies an array of &struct iovec of @nr_segs from
   * userspace into the kernel and checks that each element is valid (e.g.
   * it does not point to a kernel address or cause overflow by being too
   * large, etc.).
   *
   * As an optimization, the caller may provide a pointer to a small
   * on-stack array in @fast_pointer, typically %UIO_FASTIOV elements long
   * (the size of this array, or 0 if unused, should be given in @fast_segs).
   *
   * @ret_pointer will always point to the array that was used, so the
   * caller must take care not to call kfree() on it e.g. in case the
   * @fast_pointer array was used and it was allocated on the stack.
   *
   * Return: The total number of bytes covered by the iovec array on success
   *   or a negative error code on error.
   */
eed4e51fb   Badari Pulavarty   [PATCH] Add vecto...
727
728
729
  ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector,
  			      unsigned long nr_segs, unsigned long fast_segs,
  			      struct iovec *fast_pointer,
ac34ebb3a   Christopher Yeoh   aio/vfs: cleanup ...
730
  			      struct iovec **ret_pointer)
435f49a51   Linus Torvalds   readv/writev: do ...
731
  {
eed4e51fb   Badari Pulavarty   [PATCH] Add vecto...
732
  	unsigned long seg;
435f49a51   Linus Torvalds   readv/writev: do ...
733
  	ssize_t ret;
eed4e51fb   Badari Pulavarty   [PATCH] Add vecto...
734
  	struct iovec *iov = fast_pointer;
435f49a51   Linus Torvalds   readv/writev: do ...
735
736
737
738
739
  	/*
  	 * SuS says "The readv() function *may* fail if the iovcnt argument
  	 * was less than or equal to 0, or greater than {IOV_MAX}.  Linux has
  	 * traditionally returned zero for zero segments, so...
  	 */
eed4e51fb   Badari Pulavarty   [PATCH] Add vecto...
740
741
  	if (nr_segs == 0) {
  		ret = 0;
435f49a51   Linus Torvalds   readv/writev: do ...
742
  		goto out;
eed4e51fb   Badari Pulavarty   [PATCH] Add vecto...
743
  	}
435f49a51   Linus Torvalds   readv/writev: do ...
744
745
746
747
  	/*
  	 * First get the "struct iovec" from user memory and
  	 * verify all the pointers
  	 */
eed4e51fb   Badari Pulavarty   [PATCH] Add vecto...
748
749
  	if (nr_segs > UIO_MAXIOV) {
  		ret = -EINVAL;
435f49a51   Linus Torvalds   readv/writev: do ...
750
  		goto out;
eed4e51fb   Badari Pulavarty   [PATCH] Add vecto...
751
752
  	}
  	if (nr_segs > fast_segs) {
435f49a51   Linus Torvalds   readv/writev: do ...
753
  		iov = kmalloc(nr_segs*sizeof(struct iovec), GFP_KERNEL);
eed4e51fb   Badari Pulavarty   [PATCH] Add vecto...
754
755
  		if (iov == NULL) {
  			ret = -ENOMEM;
435f49a51   Linus Torvalds   readv/writev: do ...
756
  			goto out;
eed4e51fb   Badari Pulavarty   [PATCH] Add vecto...
757
  		}
435f49a51   Linus Torvalds   readv/writev: do ...
758
  	}
eed4e51fb   Badari Pulavarty   [PATCH] Add vecto...
759
760
  	if (copy_from_user(iov, uvector, nr_segs*sizeof(*uvector))) {
  		ret = -EFAULT;
435f49a51   Linus Torvalds   readv/writev: do ...
761
  		goto out;
eed4e51fb   Badari Pulavarty   [PATCH] Add vecto...
762
  	}
435f49a51   Linus Torvalds   readv/writev: do ...
763
  	/*
eed4e51fb   Badari Pulavarty   [PATCH] Add vecto...
764
765
766
767
  	 * According to the Single Unix Specification we should return EINVAL
  	 * if an element length is < 0 when cast to ssize_t or if the
  	 * total length would overflow the ssize_t return value of the
  	 * system call.
435f49a51   Linus Torvalds   readv/writev: do ...
768
769
770
771
  	 *
  	 * Linux caps all read/write calls to MAX_RW_COUNT, and avoids the
  	 * overflow case.
  	 */
eed4e51fb   Badari Pulavarty   [PATCH] Add vecto...
772
  	ret = 0;
435f49a51   Linus Torvalds   readv/writev: do ...
773
774
775
  	for (seg = 0; seg < nr_segs; seg++) {
  		void __user *buf = iov[seg].iov_base;
  		ssize_t len = (ssize_t)iov[seg].iov_len;
eed4e51fb   Badari Pulavarty   [PATCH] Add vecto...
776
777
778
  
  		/* see if we we're about to use an invalid len or if
  		 * it's about to overflow ssize_t */
435f49a51   Linus Torvalds   readv/writev: do ...
779
  		if (len < 0) {
eed4e51fb   Badari Pulavarty   [PATCH] Add vecto...
780
  			ret = -EINVAL;
435f49a51   Linus Torvalds   readv/writev: do ...
781
  			goto out;
eed4e51fb   Badari Pulavarty   [PATCH] Add vecto...
782
  		}
ac34ebb3a   Christopher Yeoh   aio/vfs: cleanup ...
783
  		if (type >= 0
fcf634098   Christopher Yeoh   Cross Memory Attach
784
  		    && unlikely(!access_ok(vrfy_dir(type), buf, len))) {
eed4e51fb   Badari Pulavarty   [PATCH] Add vecto...
785
  			ret = -EFAULT;
435f49a51   Linus Torvalds   readv/writev: do ...
786
787
788
789
790
  			goto out;
  		}
  		if (len > MAX_RW_COUNT - ret) {
  			len = MAX_RW_COUNT - ret;
  			iov[seg].iov_len = len;
eed4e51fb   Badari Pulavarty   [PATCH] Add vecto...
791
  		}
eed4e51fb   Badari Pulavarty   [PATCH] Add vecto...
792
  		ret += len;
435f49a51   Linus Torvalds   readv/writev: do ...
793
  	}
eed4e51fb   Badari Pulavarty   [PATCH] Add vecto...
794
795
796
797
  out:
  	*ret_pointer = iov;
  	return ret;
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
798
799
  static ssize_t do_readv_writev(int type, struct file *file,
  			       const struct iovec __user * uvector,
793b80ef1   Christoph Hellwig   vfs: pass a flags...
800
801
  			       unsigned long nr_segs, loff_t *pos,
  			       int flags)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
802
  {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
803
804
  	size_t tot_len;
  	struct iovec iovstack[UIO_FASTIOV];
ee0b3e671   Badari Pulavarty   [PATCH] Remove re...
805
  	struct iovec *iov = iovstack;
ac15ac066   Al Viro   lift iov_iter int...
806
  	struct iov_iter iter;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
807
  	ssize_t ret;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
808
  	io_fn_t fn;
293bc9822   Al Viro   new methods: ->re...
809
  	iter_fn_t iter_fn;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
810

0504c074b   Al Viro   switch {compat_,}...
811
812
813
814
  	ret = import_iovec(type, uvector, nr_segs,
  			   ARRAY_SIZE(iovstack), &iov, &iter);
  	if (ret < 0)
  		return ret;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
815

0504c074b   Al Viro   switch {compat_,}...
816
817
818
  	tot_len = iov_iter_count(&iter);
  	if (!tot_len)
  		goto out;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
819
  	ret = rw_verify_area(type, file, pos, tot_len);
e28cc7157   Linus Torvalds   Relax the rw_veri...
820
  	if (ret < 0)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
821
  		goto out;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
822
823
  	if (type == READ) {
  		fn = file->f_op->read;
293bc9822   Al Viro   new methods: ->re...
824
  		iter_fn = file->f_op->read_iter;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
825
826
  	} else {
  		fn = (io_fn_t)file->f_op->write;
293bc9822   Al Viro   new methods: ->re...
827
  		iter_fn = file->f_op->write_iter;
03d95eb2f   Al Viro   lift sb_start_wri...
828
  		file_start_write(file);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
829
  	}
293bc9822   Al Viro   new methods: ->re...
830
  	if (iter_fn)
793b80ef1   Christoph Hellwig   vfs: pass a flags...
831
  		ret = do_iter_readv_writev(file, &iter, pos, iter_fn, flags);
ee0b3e671   Badari Pulavarty   [PATCH] Remove re...
832
  	else
793b80ef1   Christoph Hellwig   vfs: pass a flags...
833
  		ret = do_loop_readv_writev(file, &iter, pos, fn, flags);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
834

03d95eb2f   Al Viro   lift sb_start_wri...
835
836
  	if (type != READ)
  		file_end_write(file);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
837
  out:
0504c074b   Al Viro   switch {compat_,}...
838
  	kfree(iov);
0eeca2830   Robert Love   [PATCH] inotify
839
840
  	if ((ret + (type == READ)) > 0) {
  		if (type == READ)
2a12a9d78   Eric Paris   fsnotify: pass a ...
841
  			fsnotify_access(file);
0eeca2830   Robert Love   [PATCH] inotify
842
  		else
2a12a9d78   Eric Paris   fsnotify: pass a ...
843
  			fsnotify_modify(file);
0eeca2830   Robert Love   [PATCH] inotify
844
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
845
  	return ret;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
846
847
848
  }
  
  ssize_t vfs_readv(struct file *file, const struct iovec __user *vec,
793b80ef1   Christoph Hellwig   vfs: pass a flags...
849
  		  unsigned long vlen, loff_t *pos, int flags)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
850
851
852
  {
  	if (!(file->f_mode & FMODE_READ))
  		return -EBADF;
7f7f25e82   Al Viro   replace checking ...
853
  	if (!(file->f_mode & FMODE_CAN_READ))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
854
  		return -EINVAL;
793b80ef1   Christoph Hellwig   vfs: pass a flags...
855
  	return do_readv_writev(READ, file, vec, vlen, pos, flags);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
856
857
858
859
860
  }
  
  EXPORT_SYMBOL(vfs_readv);
  
  ssize_t vfs_writev(struct file *file, const struct iovec __user *vec,
793b80ef1   Christoph Hellwig   vfs: pass a flags...
861
  		   unsigned long vlen, loff_t *pos, int flags)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
862
863
864
  {
  	if (!(file->f_mode & FMODE_WRITE))
  		return -EBADF;
7f7f25e82   Al Viro   replace checking ...
865
  	if (!(file->f_mode & FMODE_CAN_WRITE))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
866
  		return -EINVAL;
793b80ef1   Christoph Hellwig   vfs: pass a flags...
867
  	return do_readv_writev(WRITE, file, vec, vlen, pos, flags);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
868
869
870
  }
  
  EXPORT_SYMBOL(vfs_writev);
f17d8b354   Milosz Tanski   vfs: vfs: Define ...
871
872
  static ssize_t do_readv(unsigned long fd, const struct iovec __user *vec,
  			unsigned long vlen, int flags)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
873
  {
9c225f265   Linus Torvalds   vfs: atomic f_pos...
874
  	struct fd f = fdget_pos(fd);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
875
  	ssize_t ret = -EBADF;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
876

2903ff019   Al Viro   switch simple cas...
877
878
  	if (f.file) {
  		loff_t pos = file_pos_read(f.file);
f17d8b354   Milosz Tanski   vfs: vfs: Define ...
879
  		ret = vfs_readv(f.file, vec, vlen, &pos, flags);
5faf153eb   Al Viro   don't call file_p...
880
881
  		if (ret >= 0)
  			file_pos_write(f.file, pos);
9c225f265   Linus Torvalds   vfs: atomic f_pos...
882
  		fdput_pos(f);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
883
884
885
  	}
  
  	if (ret > 0)
4b98d11b4   Alexey Dobriyan   [PATCH] ifdef ->r...
886
887
  		add_rchar(current, ret);
  	inc_syscr(current);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
888
889
  	return ret;
  }
f17d8b354   Milosz Tanski   vfs: vfs: Define ...
890
891
  static ssize_t do_writev(unsigned long fd, const struct iovec __user *vec,
  			 unsigned long vlen, int flags)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
892
  {
9c225f265   Linus Torvalds   vfs: atomic f_pos...
893
  	struct fd f = fdget_pos(fd);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
894
  	ssize_t ret = -EBADF;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
895

2903ff019   Al Viro   switch simple cas...
896
897
  	if (f.file) {
  		loff_t pos = file_pos_read(f.file);
f17d8b354   Milosz Tanski   vfs: vfs: Define ...
898
  		ret = vfs_writev(f.file, vec, vlen, &pos, flags);
5faf153eb   Al Viro   don't call file_p...
899
900
  		if (ret >= 0)
  			file_pos_write(f.file, pos);
9c225f265   Linus Torvalds   vfs: atomic f_pos...
901
  		fdput_pos(f);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
902
903
904
  	}
  
  	if (ret > 0)
4b98d11b4   Alexey Dobriyan   [PATCH] ifdef ->r...
905
906
  		add_wchar(current, ret);
  	inc_syscw(current);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
907
908
  	return ret;
  }
601cc11d0   Linus Torvalds   Make non-compat p...
909
910
911
912
913
  static inline loff_t pos_from_hilo(unsigned long high, unsigned long low)
  {
  #define HALF_LONG_BITS (BITS_PER_LONG / 2)
  	return (((loff_t)high << HALF_LONG_BITS) << HALF_LONG_BITS) | low;
  }
f17d8b354   Milosz Tanski   vfs: vfs: Define ...
914
915
  static ssize_t do_preadv(unsigned long fd, const struct iovec __user *vec,
  			 unsigned long vlen, loff_t pos, int flags)
f3554f4bc   Gerd Hoffmann   preadv/pwritev: A...
916
  {
2903ff019   Al Viro   switch simple cas...
917
  	struct fd f;
f3554f4bc   Gerd Hoffmann   preadv/pwritev: A...
918
  	ssize_t ret = -EBADF;
f3554f4bc   Gerd Hoffmann   preadv/pwritev: A...
919
920
921
  
  	if (pos < 0)
  		return -EINVAL;
2903ff019   Al Viro   switch simple cas...
922
923
  	f = fdget(fd);
  	if (f.file) {
f3554f4bc   Gerd Hoffmann   preadv/pwritev: A...
924
  		ret = -ESPIPE;
2903ff019   Al Viro   switch simple cas...
925
  		if (f.file->f_mode & FMODE_PREAD)
f17d8b354   Milosz Tanski   vfs: vfs: Define ...
926
  			ret = vfs_readv(f.file, vec, vlen, &pos, flags);
2903ff019   Al Viro   switch simple cas...
927
  		fdput(f);
f3554f4bc   Gerd Hoffmann   preadv/pwritev: A...
928
929
930
931
932
933
934
  	}
  
  	if (ret > 0)
  		add_rchar(current, ret);
  	inc_syscr(current);
  	return ret;
  }
f17d8b354   Milosz Tanski   vfs: vfs: Define ...
935
936
  static ssize_t do_pwritev(unsigned long fd, const struct iovec __user *vec,
  			  unsigned long vlen, loff_t pos, int flags)
f3554f4bc   Gerd Hoffmann   preadv/pwritev: A...
937
  {
2903ff019   Al Viro   switch simple cas...
938
  	struct fd f;
f3554f4bc   Gerd Hoffmann   preadv/pwritev: A...
939
  	ssize_t ret = -EBADF;
f3554f4bc   Gerd Hoffmann   preadv/pwritev: A...
940
941
942
  
  	if (pos < 0)
  		return -EINVAL;
2903ff019   Al Viro   switch simple cas...
943
944
  	f = fdget(fd);
  	if (f.file) {
f3554f4bc   Gerd Hoffmann   preadv/pwritev: A...
945
  		ret = -ESPIPE;
2903ff019   Al Viro   switch simple cas...
946
  		if (f.file->f_mode & FMODE_PWRITE)
f17d8b354   Milosz Tanski   vfs: vfs: Define ...
947
  			ret = vfs_writev(f.file, vec, vlen, &pos, flags);
2903ff019   Al Viro   switch simple cas...
948
  		fdput(f);
f3554f4bc   Gerd Hoffmann   preadv/pwritev: A...
949
950
951
952
953
954
955
  	}
  
  	if (ret > 0)
  		add_wchar(current, ret);
  	inc_syscw(current);
  	return ret;
  }
f17d8b354   Milosz Tanski   vfs: vfs: Define ...
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
  SYSCALL_DEFINE3(readv, unsigned long, fd, const struct iovec __user *, vec,
  		unsigned long, vlen)
  {
  	return do_readv(fd, vec, vlen, 0);
  }
  
  SYSCALL_DEFINE3(writev, unsigned long, fd, const struct iovec __user *, vec,
  		unsigned long, vlen)
  {
  	return do_writev(fd, vec, vlen, 0);
  }
  
  SYSCALL_DEFINE5(preadv, unsigned long, fd, const struct iovec __user *, vec,
  		unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h)
  {
  	loff_t pos = pos_from_hilo(pos_h, pos_l);
  
  	return do_preadv(fd, vec, vlen, pos, 0);
  }
  
  SYSCALL_DEFINE6(preadv2, unsigned long, fd, const struct iovec __user *, vec,
  		unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h,
  		int, flags)
  {
  	loff_t pos = pos_from_hilo(pos_h, pos_l);
  
  	if (pos == -1)
  		return do_readv(fd, vec, vlen, flags);
  
  	return do_preadv(fd, vec, vlen, pos, flags);
  }
  
  SYSCALL_DEFINE5(pwritev, unsigned long, fd, const struct iovec __user *, vec,
  		unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h)
  {
  	loff_t pos = pos_from_hilo(pos_h, pos_l);
  
  	return do_pwritev(fd, vec, vlen, pos, 0);
  }
  
  SYSCALL_DEFINE6(pwritev2, unsigned long, fd, const struct iovec __user *, vec,
  		unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h,
  		int, flags)
  {
  	loff_t pos = pos_from_hilo(pos_h, pos_l);
  
  	if (pos == -1)
  		return do_writev(fd, vec, vlen, flags);
  
  	return do_pwritev(fd, vec, vlen, pos, flags);
  }
72ec35163   Al Viro   switch compat rea...
1007
1008
1009
1010
  #ifdef CONFIG_COMPAT
  
  static ssize_t compat_do_readv_writev(int type, struct file *file,
  			       const struct compat_iovec __user *uvector,
793b80ef1   Christoph Hellwig   vfs: pass a flags...
1011
1012
  			       unsigned long nr_segs, loff_t *pos,
  			       int flags)
72ec35163   Al Viro   switch compat rea...
1013
1014
1015
1016
  {
  	compat_ssize_t tot_len;
  	struct iovec iovstack[UIO_FASTIOV];
  	struct iovec *iov = iovstack;
ac15ac066   Al Viro   lift iov_iter int...
1017
  	struct iov_iter iter;
72ec35163   Al Viro   switch compat rea...
1018
1019
  	ssize_t ret;
  	io_fn_t fn;
293bc9822   Al Viro   new methods: ->re...
1020
  	iter_fn_t iter_fn;
72ec35163   Al Viro   switch compat rea...
1021

0504c074b   Al Viro   switch {compat_,}...
1022
1023
1024
1025
  	ret = compat_import_iovec(type, uvector, nr_segs,
  				  UIO_FASTIOV, &iov, &iter);
  	if (ret < 0)
  		return ret;
72ec35163   Al Viro   switch compat rea...
1026

0504c074b   Al Viro   switch {compat_,}...
1027
1028
1029
  	tot_len = iov_iter_count(&iter);
  	if (!tot_len)
  		goto out;
72ec35163   Al Viro   switch compat rea...
1030
1031
1032
  	ret = rw_verify_area(type, file, pos, tot_len);
  	if (ret < 0)
  		goto out;
72ec35163   Al Viro   switch compat rea...
1033
1034
  	if (type == READ) {
  		fn = file->f_op->read;
293bc9822   Al Viro   new methods: ->re...
1035
  		iter_fn = file->f_op->read_iter;
72ec35163   Al Viro   switch compat rea...
1036
1037
  	} else {
  		fn = (io_fn_t)file->f_op->write;
293bc9822   Al Viro   new methods: ->re...
1038
  		iter_fn = file->f_op->write_iter;
03d95eb2f   Al Viro   lift sb_start_wri...
1039
  		file_start_write(file);
72ec35163   Al Viro   switch compat rea...
1040
  	}
293bc9822   Al Viro   new methods: ->re...
1041
  	if (iter_fn)
793b80ef1   Christoph Hellwig   vfs: pass a flags...
1042
  		ret = do_iter_readv_writev(file, &iter, pos, iter_fn, flags);
03d95eb2f   Al Viro   lift sb_start_wri...
1043
  	else
793b80ef1   Christoph Hellwig   vfs: pass a flags...
1044
  		ret = do_loop_readv_writev(file, &iter, pos, fn, flags);
72ec35163   Al Viro   switch compat rea...
1045

03d95eb2f   Al Viro   lift sb_start_wri...
1046
1047
  	if (type != READ)
  		file_end_write(file);
72ec35163   Al Viro   switch compat rea...
1048
  out:
0504c074b   Al Viro   switch {compat_,}...
1049
  	kfree(iov);
72ec35163   Al Viro   switch compat rea...
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
  	if ((ret + (type == READ)) > 0) {
  		if (type == READ)
  			fsnotify_access(file);
  		else
  			fsnotify_modify(file);
  	}
  	return ret;
  }
  
  static size_t compat_readv(struct file *file,
  			   const struct compat_iovec __user *vec,
f17d8b354   Milosz Tanski   vfs: vfs: Define ...
1061
  			   unsigned long vlen, loff_t *pos, int flags)
72ec35163   Al Viro   switch compat rea...
1062
1063
1064
1065
1066
1067
1068
  {
  	ssize_t ret = -EBADF;
  
  	if (!(file->f_mode & FMODE_READ))
  		goto out;
  
  	ret = -EINVAL;
7f7f25e82   Al Viro   replace checking ...
1069
  	if (!(file->f_mode & FMODE_CAN_READ))
72ec35163   Al Viro   switch compat rea...
1070
  		goto out;
f17d8b354   Milosz Tanski   vfs: vfs: Define ...
1071
  	ret = compat_do_readv_writev(READ, file, vec, vlen, pos, flags);
72ec35163   Al Viro   switch compat rea...
1072
1073
1074
1075
1076
1077
1078
  
  out:
  	if (ret > 0)
  		add_rchar(current, ret);
  	inc_syscr(current);
  	return ret;
  }
f17d8b354   Milosz Tanski   vfs: vfs: Define ...
1079
1080
1081
  static size_t do_compat_readv(compat_ulong_t fd,
  				 const struct compat_iovec __user *vec,
  				 compat_ulong_t vlen, int flags)
72ec35163   Al Viro   switch compat rea...
1082
  {
9c225f265   Linus Torvalds   vfs: atomic f_pos...
1083
  	struct fd f = fdget_pos(fd);
72ec35163   Al Viro   switch compat rea...
1084
1085
1086
1087
1088
1089
  	ssize_t ret;
  	loff_t pos;
  
  	if (!f.file)
  		return -EBADF;
  	pos = f.file->f_pos;
f17d8b354   Milosz Tanski   vfs: vfs: Define ...
1090
  	ret = compat_readv(f.file, vec, vlen, &pos, flags);
5faf153eb   Al Viro   don't call file_p...
1091
1092
  	if (ret >= 0)
  		f.file->f_pos = pos;
9c225f265   Linus Torvalds   vfs: atomic f_pos...
1093
  	fdput_pos(f);
72ec35163   Al Viro   switch compat rea...
1094
  	return ret;
f17d8b354   Milosz Tanski   vfs: vfs: Define ...
1095

72ec35163   Al Viro   switch compat rea...
1096
  }
f17d8b354   Milosz Tanski   vfs: vfs: Define ...
1097
1098
1099
1100
1101
1102
1103
1104
  COMPAT_SYSCALL_DEFINE3(readv, compat_ulong_t, fd,
  		const struct compat_iovec __user *,vec,
  		compat_ulong_t, vlen)
  {
  	return do_compat_readv(fd, vec, vlen, 0);
  }
  
  static long do_compat_preadv64(unsigned long fd,
378a10f3a   Heiko Carstens   fs/compat: option...
1105
  				  const struct compat_iovec __user *vec,
f17d8b354   Milosz Tanski   vfs: vfs: Define ...
1106
  				  unsigned long vlen, loff_t pos, int flags)
72ec35163   Al Viro   switch compat rea...
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
  {
  	struct fd f;
  	ssize_t ret;
  
  	if (pos < 0)
  		return -EINVAL;
  	f = fdget(fd);
  	if (!f.file)
  		return -EBADF;
  	ret = -ESPIPE;
  	if (f.file->f_mode & FMODE_PREAD)
f17d8b354   Milosz Tanski   vfs: vfs: Define ...
1118
  		ret = compat_readv(f.file, vec, vlen, &pos, flags);
72ec35163   Al Viro   switch compat rea...
1119
1120
1121
  	fdput(f);
  	return ret;
  }
378a10f3a   Heiko Carstens   fs/compat: option...
1122
1123
1124
1125
1126
  #ifdef __ARCH_WANT_COMPAT_SYS_PREADV64
  COMPAT_SYSCALL_DEFINE4(preadv64, unsigned long, fd,
  		const struct compat_iovec __user *,vec,
  		unsigned long, vlen, loff_t, pos)
  {
f17d8b354   Milosz Tanski   vfs: vfs: Define ...
1127
  	return do_compat_preadv64(fd, vec, vlen, pos, 0);
378a10f3a   Heiko Carstens   fs/compat: option...
1128
1129
  }
  #endif
dfd948e32   Heiko Carstens   fs/compat: fix pa...
1130
  COMPAT_SYSCALL_DEFINE5(preadv, compat_ulong_t, fd,
72ec35163   Al Viro   switch compat rea...
1131
  		const struct compat_iovec __user *,vec,
dfd948e32   Heiko Carstens   fs/compat: fix pa...
1132
  		compat_ulong_t, vlen, u32, pos_low, u32, pos_high)
72ec35163   Al Viro   switch compat rea...
1133
1134
  {
  	loff_t pos = ((loff_t)pos_high << 32) | pos_low;
378a10f3a   Heiko Carstens   fs/compat: option...
1135

f17d8b354   Milosz Tanski   vfs: vfs: Define ...
1136
1137
  	return do_compat_preadv64(fd, vec, vlen, pos, 0);
  }
3ebfd81f7   H.J. Lu   x86/syscalls: Add...
1138
1139
1140
1141
1142
1143
1144
1145
  #ifdef __ARCH_WANT_COMPAT_SYS_PREADV64V2
  COMPAT_SYSCALL_DEFINE5(preadv64v2, unsigned long, fd,
  		const struct compat_iovec __user *,vec,
  		unsigned long, vlen, loff_t, pos, int, flags)
  {
  	return do_compat_preadv64(fd, vec, vlen, pos, flags);
  }
  #endif
f17d8b354   Milosz Tanski   vfs: vfs: Define ...
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
  COMPAT_SYSCALL_DEFINE6(preadv2, compat_ulong_t, fd,
  		const struct compat_iovec __user *,vec,
  		compat_ulong_t, vlen, u32, pos_low, u32, pos_high,
  		int, flags)
  {
  	loff_t pos = ((loff_t)pos_high << 32) | pos_low;
  
  	if (pos == -1)
  		return do_compat_readv(fd, vec, vlen, flags);
  
  	return do_compat_preadv64(fd, vec, vlen, pos, flags);
72ec35163   Al Viro   switch compat rea...
1157
1158
1159
1160
  }
  
  static size_t compat_writev(struct file *file,
  			    const struct compat_iovec __user *vec,
f17d8b354   Milosz Tanski   vfs: vfs: Define ...
1161
  			    unsigned long vlen, loff_t *pos, int flags)
72ec35163   Al Viro   switch compat rea...
1162
1163
1164
1165
1166
1167
1168
  {
  	ssize_t ret = -EBADF;
  
  	if (!(file->f_mode & FMODE_WRITE))
  		goto out;
  
  	ret = -EINVAL;
7f7f25e82   Al Viro   replace checking ...
1169
  	if (!(file->f_mode & FMODE_CAN_WRITE))
72ec35163   Al Viro   switch compat rea...
1170
  		goto out;
793b80ef1   Christoph Hellwig   vfs: pass a flags...
1171
  	ret = compat_do_readv_writev(WRITE, file, vec, vlen, pos, 0);
72ec35163   Al Viro   switch compat rea...
1172
1173
1174
1175
1176
1177
1178
  
  out:
  	if (ret > 0)
  		add_wchar(current, ret);
  	inc_syscw(current);
  	return ret;
  }
f17d8b354   Milosz Tanski   vfs: vfs: Define ...
1179
1180
1181
  static size_t do_compat_writev(compat_ulong_t fd,
  				  const struct compat_iovec __user* vec,
  				  compat_ulong_t vlen, int flags)
72ec35163   Al Viro   switch compat rea...
1182
  {
9c225f265   Linus Torvalds   vfs: atomic f_pos...
1183
  	struct fd f = fdget_pos(fd);
72ec35163   Al Viro   switch compat rea...
1184
1185
1186
1187
1188
1189
  	ssize_t ret;
  	loff_t pos;
  
  	if (!f.file)
  		return -EBADF;
  	pos = f.file->f_pos;
f17d8b354   Milosz Tanski   vfs: vfs: Define ...
1190
  	ret = compat_writev(f.file, vec, vlen, &pos, flags);
5faf153eb   Al Viro   don't call file_p...
1191
1192
  	if (ret >= 0)
  		f.file->f_pos = pos;
9c225f265   Linus Torvalds   vfs: atomic f_pos...
1193
  	fdput_pos(f);
72ec35163   Al Viro   switch compat rea...
1194
1195
  	return ret;
  }
f17d8b354   Milosz Tanski   vfs: vfs: Define ...
1196
1197
1198
1199
1200
1201
1202
1203
  COMPAT_SYSCALL_DEFINE3(writev, compat_ulong_t, fd,
  		const struct compat_iovec __user *, vec,
  		compat_ulong_t, vlen)
  {
  	return do_compat_writev(fd, vec, vlen, 0);
  }
  
  static long do_compat_pwritev64(unsigned long fd,
378a10f3a   Heiko Carstens   fs/compat: option...
1204
  				   const struct compat_iovec __user *vec,
f17d8b354   Milosz Tanski   vfs: vfs: Define ...
1205
  				   unsigned long vlen, loff_t pos, int flags)
72ec35163   Al Viro   switch compat rea...
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
  {
  	struct fd f;
  	ssize_t ret;
  
  	if (pos < 0)
  		return -EINVAL;
  	f = fdget(fd);
  	if (!f.file)
  		return -EBADF;
  	ret = -ESPIPE;
  	if (f.file->f_mode & FMODE_PWRITE)
f17d8b354   Milosz Tanski   vfs: vfs: Define ...
1217
  		ret = compat_writev(f.file, vec, vlen, &pos, flags);
72ec35163   Al Viro   switch compat rea...
1218
1219
1220
  	fdput(f);
  	return ret;
  }
378a10f3a   Heiko Carstens   fs/compat: option...
1221
1222
1223
1224
1225
  #ifdef __ARCH_WANT_COMPAT_SYS_PWRITEV64
  COMPAT_SYSCALL_DEFINE4(pwritev64, unsigned long, fd,
  		const struct compat_iovec __user *,vec,
  		unsigned long, vlen, loff_t, pos)
  {
f17d8b354   Milosz Tanski   vfs: vfs: Define ...
1226
  	return do_compat_pwritev64(fd, vec, vlen, pos, 0);
378a10f3a   Heiko Carstens   fs/compat: option...
1227
1228
  }
  #endif
dfd948e32   Heiko Carstens   fs/compat: fix pa...
1229
  COMPAT_SYSCALL_DEFINE5(pwritev, compat_ulong_t, fd,
72ec35163   Al Viro   switch compat rea...
1230
  		const struct compat_iovec __user *,vec,
dfd948e32   Heiko Carstens   fs/compat: fix pa...
1231
  		compat_ulong_t, vlen, u32, pos_low, u32, pos_high)
72ec35163   Al Viro   switch compat rea...
1232
1233
  {
  	loff_t pos = ((loff_t)pos_high << 32) | pos_low;
378a10f3a   Heiko Carstens   fs/compat: option...
1234

f17d8b354   Milosz Tanski   vfs: vfs: Define ...
1235
  	return do_compat_pwritev64(fd, vec, vlen, pos, 0);
72ec35163   Al Viro   switch compat rea...
1236
  }
f17d8b354   Milosz Tanski   vfs: vfs: Define ...
1237

3ebfd81f7   H.J. Lu   x86/syscalls: Add...
1238
1239
1240
1241
1242
1243
1244
1245
  #ifdef __ARCH_WANT_COMPAT_SYS_PWRITEV64V2
  COMPAT_SYSCALL_DEFINE5(pwritev64v2, unsigned long, fd,
  		const struct compat_iovec __user *,vec,
  		unsigned long, vlen, loff_t, pos, int, flags)
  {
  	return do_compat_pwritev64(fd, vec, vlen, pos, flags);
  }
  #endif
f17d8b354   Milosz Tanski   vfs: vfs: Define ...
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
  COMPAT_SYSCALL_DEFINE6(pwritev2, compat_ulong_t, fd,
  		const struct compat_iovec __user *,vec,
  		compat_ulong_t, vlen, u32, pos_low, u32, pos_high, int, flags)
  {
  	loff_t pos = ((loff_t)pos_high << 32) | pos_low;
  
  	if (pos == -1)
  		return do_compat_writev(fd, vec, vlen, flags);
  
  	return do_compat_pwritev64(fd, vec, vlen, pos, flags);
72ec35163   Al Viro   switch compat rea...
1256
  }
f17d8b354   Milosz Tanski   vfs: vfs: Define ...
1257

72ec35163   Al Viro   switch compat rea...
1258
  #endif
19f4fc3ae   Al Viro   convert sendfile{...
1259
1260
  static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos,
  		  	   size_t count, loff_t max)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1261
  {
2903ff019   Al Viro   switch simple cas...
1262
1263
  	struct fd in, out;
  	struct inode *in_inode, *out_inode;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1264
  	loff_t pos;
7995bd287   Al Viro   splice: don't pas...
1265
  	loff_t out_pos;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1266
  	ssize_t retval;
2903ff019   Al Viro   switch simple cas...
1267
  	int fl;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1268
1269
1270
1271
1272
  
  	/*
  	 * Get input file, and verify that it is ok..
  	 */
  	retval = -EBADF;
2903ff019   Al Viro   switch simple cas...
1273
1274
  	in = fdget(in_fd);
  	if (!in.file)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1275
  		goto out;
2903ff019   Al Viro   switch simple cas...
1276
  	if (!(in.file->f_mode & FMODE_READ))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1277
  		goto fput_in;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1278
  	retval = -ESPIPE;
7995bd287   Al Viro   splice: don't pas...
1279
1280
1281
1282
  	if (!ppos) {
  		pos = in.file->f_pos;
  	} else {
  		pos = *ppos;
2903ff019   Al Viro   switch simple cas...
1283
  		if (!(in.file->f_mode & FMODE_PREAD))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1284
  			goto fput_in;
7995bd287   Al Viro   splice: don't pas...
1285
1286
  	}
  	retval = rw_verify_area(READ, in.file, &pos, count);
e28cc7157   Linus Torvalds   Relax the rw_veri...
1287
  	if (retval < 0)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1288
  		goto fput_in;
bc61384dc   Al Viro   rw_verify_area():...
1289
1290
  	if (count > MAX_RW_COUNT)
  		count =  MAX_RW_COUNT;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1291

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1292
1293
1294
1295
  	/*
  	 * Get output file, and verify that it is ok..
  	 */
  	retval = -EBADF;
2903ff019   Al Viro   switch simple cas...
1296
1297
  	out = fdget(out_fd);
  	if (!out.file)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1298
  		goto fput_in;
2903ff019   Al Viro   switch simple cas...
1299
  	if (!(out.file->f_mode & FMODE_WRITE))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1300
1301
  		goto fput_out;
  	retval = -EINVAL;
496ad9aa8   Al Viro   new helper: file_...
1302
1303
  	in_inode = file_inode(in.file);
  	out_inode = file_inode(out.file);
7995bd287   Al Viro   splice: don't pas...
1304
1305
  	out_pos = out.file->f_pos;
  	retval = rw_verify_area(WRITE, out.file, &out_pos, count);
e28cc7157   Linus Torvalds   Relax the rw_veri...
1306
  	if (retval < 0)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1307
  		goto fput_out;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1308
1309
  	if (!max)
  		max = min(in_inode->i_sb->s_maxbytes, out_inode->i_sb->s_maxbytes);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1310
1311
1312
1313
1314
1315
  	if (unlikely(pos + count > max)) {
  		retval = -EOVERFLOW;
  		if (pos >= max)
  			goto fput_out;
  		count = max - pos;
  	}
d96e6e716   Jens Axboe   Remove remnants o...
1316
  	fl = 0;
534f2aaa6   Jens Axboe   sys_sendfile: swi...
1317
  #if 0
d96e6e716   Jens Axboe   Remove remnants o...
1318
1319
1320
1321
1322
1323
  	/*
  	 * We need to debate whether we can enable this or not. The
  	 * man page documents EAGAIN return for the output at least,
  	 * and the application is arguably buggy if it doesn't expect
  	 * EAGAIN on a non-blocking file descriptor.
  	 */
2903ff019   Al Viro   switch simple cas...
1324
  	if (in.file->f_flags & O_NONBLOCK)
d96e6e716   Jens Axboe   Remove remnants o...
1325
  		fl = SPLICE_F_NONBLOCK;
534f2aaa6   Jens Axboe   sys_sendfile: swi...
1326
  #endif
50cd2c577   Al Viro   lift file_*_write...
1327
  	file_start_write(out.file);
7995bd287   Al Viro   splice: don't pas...
1328
  	retval = do_splice_direct(in.file, &pos, out.file, &out_pos, count, fl);
50cd2c577   Al Viro   lift file_*_write...
1329
  	file_end_write(out.file);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1330
1331
  
  	if (retval > 0) {
4b98d11b4   Alexey Dobriyan   [PATCH] ifdef ->r...
1332
1333
  		add_rchar(current, retval);
  		add_wchar(current, retval);
a68c2f12b   Scott Wolchok   sendfile: allows ...
1334
1335
  		fsnotify_access(in.file);
  		fsnotify_modify(out.file);
7995bd287   Al Viro   splice: don't pas...
1336
1337
1338
1339
1340
  		out.file->f_pos = out_pos;
  		if (ppos)
  			*ppos = pos;
  		else
  			in.file->f_pos = pos;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1341
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1342

4b98d11b4   Alexey Dobriyan   [PATCH] ifdef ->r...
1343
1344
  	inc_syscr(current);
  	inc_syscw(current);
7995bd287   Al Viro   splice: don't pas...
1345
  	if (pos > max)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1346
1347
1348
  		retval = -EOVERFLOW;
  
  fput_out:
2903ff019   Al Viro   switch simple cas...
1349
  	fdput(out);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1350
  fput_in:
2903ff019   Al Viro   switch simple cas...
1351
  	fdput(in);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1352
1353
1354
  out:
  	return retval;
  }
002c8976e   Heiko Carstens   [CVE-2009-0029] S...
1355
  SYSCALL_DEFINE4(sendfile, int, out_fd, int, in_fd, off_t __user *, offset, size_t, count)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
  {
  	loff_t pos;
  	off_t off;
  	ssize_t ret;
  
  	if (offset) {
  		if (unlikely(get_user(off, offset)))
  			return -EFAULT;
  		pos = off;
  		ret = do_sendfile(out_fd, in_fd, &pos, count, MAX_NON_LFS);
  		if (unlikely(put_user(pos, offset)))
  			return -EFAULT;
  		return ret;
  	}
  
  	return do_sendfile(out_fd, in_fd, NULL, count, 0);
  }
002c8976e   Heiko Carstens   [CVE-2009-0029] S...
1373
  SYSCALL_DEFINE4(sendfile64, int, out_fd, int, in_fd, loff_t __user *, offset, size_t, count)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
  {
  	loff_t pos;
  	ssize_t ret;
  
  	if (offset) {
  		if (unlikely(copy_from_user(&pos, offset, sizeof(loff_t))))
  			return -EFAULT;
  		ret = do_sendfile(out_fd, in_fd, &pos, count, 0);
  		if (unlikely(put_user(pos, offset)))
  			return -EFAULT;
  		return ret;
  	}
  
  	return do_sendfile(out_fd, in_fd, NULL, count, 0);
  }
19f4fc3ae   Al Viro   convert sendfile{...
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
  
  #ifdef CONFIG_COMPAT
  COMPAT_SYSCALL_DEFINE4(sendfile, int, out_fd, int, in_fd,
  		compat_off_t __user *, offset, compat_size_t, count)
  {
  	loff_t pos;
  	off_t off;
  	ssize_t ret;
  
  	if (offset) {
  		if (unlikely(get_user(off, offset)))
  			return -EFAULT;
  		pos = off;
  		ret = do_sendfile(out_fd, in_fd, &pos, count, MAX_NON_LFS);
  		if (unlikely(put_user(pos, offset)))
  			return -EFAULT;
  		return ret;
  	}
  
  	return do_sendfile(out_fd, in_fd, NULL, count, 0);
  }
  
  COMPAT_SYSCALL_DEFINE4(sendfile64, int, out_fd, int, in_fd,
  		compat_loff_t __user *, offset, compat_size_t, count)
  {
  	loff_t pos;
  	ssize_t ret;
  
  	if (offset) {
  		if (unlikely(copy_from_user(&pos, offset, sizeof(loff_t))))
  			return -EFAULT;
  		ret = do_sendfile(out_fd, in_fd, &pos, count, 0);
  		if (unlikely(put_user(pos, offset)))
  			return -EFAULT;
  		return ret;
  	}
  
  	return do_sendfile(out_fd, in_fd, NULL, count, 0);
  }
  #endif
29732938a   Zach Brown   vfs: add copy_fil...
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
  
  /*
   * copy_file_range() differs from regular file read and write in that it
   * specifically allows return partial success.  When it does so is up to
   * the copy_file_range method.
   */
  ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in,
  			    struct file *file_out, loff_t pos_out,
  			    size_t len, unsigned int flags)
  {
  	struct inode *inode_in = file_inode(file_in);
  	struct inode *inode_out = file_inode(file_out);
  	ssize_t ret;
  
  	if (flags != 0)
  		return -EINVAL;
29732938a   Zach Brown   vfs: add copy_fil...
1445
  	ret = rw_verify_area(READ, file_in, &pos_in, len);
bc61384dc   Al Viro   rw_verify_area():...
1446
1447
1448
1449
1450
  	if (unlikely(ret))
  		return ret;
  
  	ret = rw_verify_area(WRITE, file_out, &pos_out, len);
  	if (unlikely(ret))
29732938a   Zach Brown   vfs: add copy_fil...
1451
1452
1453
1454
  		return ret;
  
  	if (!(file_in->f_mode & FMODE_READ) ||
  	    !(file_out->f_mode & FMODE_WRITE) ||
eac70053a   Anna Schumaker   vfs: Add vfs_copy...
1455
  	    (file_out->f_flags & O_APPEND))
29732938a   Zach Brown   vfs: add copy_fil...
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
  		return -EBADF;
  
  	/* this could be relaxed once a method supports cross-fs copies */
  	if (inode_in->i_sb != inode_out->i_sb)
  		return -EXDEV;
  
  	if (len == 0)
  		return 0;
  
  	ret = mnt_want_write_file(file_out);
  	if (ret)
  		return ret;
eac70053a   Anna Schumaker   vfs: Add vfs_copy...
1468
1469
1470
1471
1472
1473
1474
  	ret = -EOPNOTSUPP;
  	if (file_out->f_op->copy_file_range)
  		ret = file_out->f_op->copy_file_range(file_in, pos_in, file_out,
  						      pos_out, len, flags);
  	if (ret == -EOPNOTSUPP)
  		ret = do_splice_direct(file_in, &pos_in, file_out, &pos_out,
  				len > MAX_RW_COUNT ? MAX_RW_COUNT : len, 0);
29732938a   Zach Brown   vfs: add copy_fil...
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
  	if (ret > 0) {
  		fsnotify_access(file_in);
  		add_rchar(current, ret);
  		fsnotify_modify(file_out);
  		add_wchar(current, ret);
  	}
  	inc_syscr(current);
  	inc_syscw(current);
  
  	mnt_drop_write_file(file_out);
  
  	return ret;
  }
  EXPORT_SYMBOL(vfs_copy_file_range);
  
  SYSCALL_DEFINE6(copy_file_range, int, fd_in, loff_t __user *, off_in,
  		int, fd_out, loff_t __user *, off_out,
  		size_t, len, unsigned int, flags)
  {
  	loff_t pos_in;
  	loff_t pos_out;
  	struct fd f_in;
  	struct fd f_out;
  	ssize_t ret = -EBADF;
  
  	f_in = fdget(fd_in);
  	if (!f_in.file)
  		goto out2;
  
  	f_out = fdget(fd_out);
  	if (!f_out.file)
  		goto out1;
  
  	ret = -EFAULT;
  	if (off_in) {
  		if (copy_from_user(&pos_in, off_in, sizeof(loff_t)))
  			goto out;
  	} else {
  		pos_in = f_in.file->f_pos;
  	}
  
  	if (off_out) {
  		if (copy_from_user(&pos_out, off_out, sizeof(loff_t)))
  			goto out;
  	} else {
  		pos_out = f_out.file->f_pos;
  	}
  
  	ret = vfs_copy_file_range(f_in.file, pos_in, f_out.file, pos_out, len,
  				  flags);
  	if (ret > 0) {
  		pos_in += ret;
  		pos_out += ret;
  
  		if (off_in) {
  			if (copy_to_user(off_in, &pos_in, sizeof(loff_t)))
  				ret = -EFAULT;
  		} else {
  			f_in.file->f_pos = pos_in;
  		}
  
  		if (off_out) {
  			if (copy_to_user(off_out, &pos_out, sizeof(loff_t)))
  				ret = -EFAULT;
  		} else {
  			f_out.file->f_pos = pos_out;
  		}
  	}
  
  out:
  	fdput(f_out);
  out1:
  	fdput(f_in);
  out2:
  	return ret;
  }
04b38d601   Christoph Hellwig   vfs: pull btrfs c...
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
  
  static int clone_verify_area(struct file *file, loff_t pos, u64 len, bool write)
  {
  	struct inode *inode = file_inode(file);
  
  	if (unlikely(pos < 0))
  		return -EINVAL;
  
  	 if (unlikely((loff_t) (pos + len) < 0))
  		return -EINVAL;
  
  	if (unlikely(inode->i_flctx && mandatory_lock(inode))) {
  		loff_t end = len ? pos + len - 1 : OFFSET_MAX;
  		int retval;
  
  		retval = locks_mandatory_area(inode, file, pos, end,
  				write ? F_WRLCK : F_RDLCK);
  		if (retval < 0)
  			return retval;
  	}
  
  	return security_file_permission(file, write ? MAY_WRITE : MAY_READ);
  }
  
  int vfs_clone_file_range(struct file *file_in, loff_t pos_in,
  		struct file *file_out, loff_t pos_out, u64 len)
  {
  	struct inode *inode_in = file_inode(file_in);
  	struct inode *inode_out = file_inode(file_out);
  	int ret;
  
  	if (inode_in->i_sb != inode_out->i_sb ||
  	    file_in->f_path.mnt != file_out->f_path.mnt)
  		return -EXDEV;
  
  	if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode))
  		return -EISDIR;
  	if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode))
d79bdd52d   Darrick J. Wong   vfs: wire up comp...
1589
  		return -EINVAL;
04b38d601   Christoph Hellwig   vfs: pull btrfs c...
1590
1591
1592
  
  	if (!(file_in->f_mode & FMODE_READ) ||
  	    !(file_out->f_mode & FMODE_WRITE) ||
0fcbf996d   Christoph Hellwig   fs: return -EOPNO...
1593
  	    (file_out->f_flags & O_APPEND))
04b38d601   Christoph Hellwig   vfs: pull btrfs c...
1594
  		return -EBADF;
0fcbf996d   Christoph Hellwig   fs: return -EOPNO...
1595
1596
  	if (!file_in->f_op->clone_file_range)
  		return -EOPNOTSUPP;
04b38d601   Christoph Hellwig   vfs: pull btrfs c...
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
  	ret = clone_verify_area(file_in, pos_in, len, false);
  	if (ret)
  		return ret;
  
  	ret = clone_verify_area(file_out, pos_out, len, true);
  	if (ret)
  		return ret;
  
  	if (pos_in + len > i_size_read(inode_in))
  		return -EINVAL;
  
  	ret = mnt_want_write_file(file_out);
  	if (ret)
  		return ret;
  
  	ret = file_in->f_op->clone_file_range(file_in, pos_in,
  			file_out, pos_out, len);
  	if (!ret) {
  		fsnotify_access(file_in);
  		fsnotify_modify(file_out);
  	}
  
  	mnt_drop_write_file(file_out);
  	return ret;
  }
  EXPORT_SYMBOL(vfs_clone_file_range);
54dbc1517   Darrick J. Wong   vfs: hoist the bt...
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
  
  int vfs_dedupe_file_range(struct file *file, struct file_dedupe_range *same)
  {
  	struct file_dedupe_range_info *info;
  	struct inode *src = file_inode(file);
  	u64 off;
  	u64 len;
  	int i;
  	int ret;
  	bool is_admin = capable(CAP_SYS_ADMIN);
  	u16 count = same->dest_count;
  	struct file *dst_file;
  	loff_t dst_off;
  	ssize_t deduped;
  
  	if (!(file->f_mode & FMODE_READ))
  		return -EINVAL;
  
  	if (same->reserved1 || same->reserved2)
  		return -EINVAL;
  
  	off = same->src_offset;
  	len = same->src_length;
  
  	ret = -EISDIR;
  	if (S_ISDIR(src->i_mode))
  		goto out;
  
  	ret = -EINVAL;
  	if (!S_ISREG(src->i_mode))
  		goto out;
  
  	ret = clone_verify_area(file, off, len, false);
  	if (ret < 0)
  		goto out;
  	ret = 0;
  
  	/* pre-format output fields to sane values */
  	for (i = 0; i < count; i++) {
  		same->info[i].bytes_deduped = 0ULL;
  		same->info[i].status = FILE_DEDUPE_RANGE_SAME;
  	}
  
  	for (i = 0, info = same->info; i < count; i++, info++) {
  		struct inode *dst;
  		struct fd dst_fd = fdget(info->dest_fd);
  
  		dst_file = dst_fd.file;
  		if (!dst_file) {
  			info->status = -EBADF;
  			goto next_loop;
  		}
  		dst = file_inode(dst_file);
  
  		ret = mnt_want_write_file(dst_file);
  		if (ret) {
  			info->status = ret;
  			goto next_loop;
  		}
  
  		dst_off = info->dest_offset;
  		ret = clone_verify_area(dst_file, dst_off, len, true);
  		if (ret < 0) {
  			info->status = ret;
  			goto next_file;
  		}
  		ret = 0;
  
  		if (info->reserved) {
  			info->status = -EINVAL;
  		} else if (!(is_admin || (dst_file->f_mode & FMODE_WRITE))) {
  			info->status = -EINVAL;
  		} else if (file->f_path.mnt != dst_file->f_path.mnt) {
  			info->status = -EXDEV;
  		} else if (S_ISDIR(dst->i_mode)) {
  			info->status = -EISDIR;
  		} else if (dst_file->f_op->dedupe_file_range == NULL) {
  			info->status = -EINVAL;
  		} else {
  			deduped = dst_file->f_op->dedupe_file_range(file, off,
  							len, dst_file,
  							info->dest_offset);
  			if (deduped == -EBADE)
  				info->status = FILE_DEDUPE_RANGE_DIFFERS;
  			else if (deduped < 0)
  				info->status = deduped;
  			else
  				info->bytes_deduped += deduped;
  		}
  
  next_file:
  		mnt_drop_write_file(dst_file);
  next_loop:
  		fdput(dst_fd);
e62e560fc   Darrick J. Wong   vfs: abort dedupe...
1717
1718
1719
  
  		if (fatal_signal_pending(current))
  			goto out;
54dbc1517   Darrick J. Wong   vfs: hoist the bt...
1720
1721
1722
1723
1724
1725
  	}
  
  out:
  	return ret;
  }
  EXPORT_SYMBOL(vfs_dedupe_file_range);