Blame view

fs/read_write.c 20 KB
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1
2
3
4
5
6
7
8
9
10
11
12
  /*
   *  linux/fs/read_write.c
   *
   *  Copyright (C) 1991, 1992  Linus Torvalds
   */
  
  #include <linux/slab.h> 
  #include <linux/stat.h>
  #include <linux/fcntl.h>
  #include <linux/file.h>
  #include <linux/uio.h>
  #include <linux/smp_lock.h>
0eeca2830   Robert Love   [PATCH] inotify
13
  #include <linux/fsnotify.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
14
15
16
  #include <linux/security.h>
  #include <linux/module.h>
  #include <linux/syscalls.h>
e28cc7157   Linus Torvalds   Relax the rw_veri...
17
  #include <linux/pagemap.h>
d6b29d7ce   Jens Axboe   splice: divorce t...
18
  #include <linux/splice.h>
ee0b3e671   Badari Pulavarty   [PATCH] Remove re...
19
  #include "read_write.h"
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
20
21
22
  
  #include <asm/uaccess.h>
  #include <asm/unistd.h>
4b6f5d20b   Arjan van de Ven   [PATCH] Make most...
23
  const struct file_operations generic_ro_fops = {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
24
  	.llseek		= generic_file_llseek,
543ade1fc   Badari Pulavarty   [PATCH] Streamlin...
25
26
  	.read		= do_sync_read,
  	.aio_read	= generic_file_aio_read,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
27
  	.mmap		= generic_file_readonly_mmap,
534f2aaa6   Jens Axboe   sys_sendfile: swi...
28
  	.splice_read	= generic_file_splice_read,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
29
30
31
  };
  
  EXPORT_SYMBOL(generic_ro_fops);
3a8cff4f0   Christoph Hellwig   [PATCH] generic_f...
32
33
34
35
36
37
38
39
40
  /**
   * generic_file_llseek_unlocked - lockless generic llseek implementation
   * @file:	file structure to seek on
   * @offset:	file offset to seek to
   * @origin:	type of seek
   *
   * Updates the file offset to the value specified by @offset and @origin.
   * Locking must be provided by the caller.
   */
9465efc9e   Andi Kleen   Remove BKL from r...
41
42
  loff_t
  generic_file_llseek_unlocked(struct file *file, loff_t offset, int origin)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
43
  {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
44
  	struct inode *inode = file->f_mapping->host;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
45
  	switch (origin) {
3a8cff4f0   Christoph Hellwig   [PATCH] generic_f...
46
47
48
49
  	case SEEK_END:
  		offset += inode->i_size;
  		break;
  	case SEEK_CUR:
5b6f1eb97   Alain Knaff   vfs: lseek(fd, 0,...
50
51
52
53
54
55
56
57
  		/*
  		 * Here we special-case the lseek(fd, 0, SEEK_CUR)
  		 * position-querying operation.  Avoid rewriting the "same"
  		 * f_pos value back to the file because a concurrent read(),
  		 * write() or lseek() might have altered it
  		 */
  		if (offset == 0)
  			return file->f_pos;
3a8cff4f0   Christoph Hellwig   [PATCH] generic_f...
58
59
  		offset += file->f_pos;
  		break;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
60
  	}
3a8cff4f0   Christoph Hellwig   [PATCH] generic_f...
61
62
63
64
65
66
67
68
  
  	if (offset < 0 || offset > inode->i_sb->s_maxbytes)
  		return -EINVAL;
  
  	/* Special lock needed here? */
  	if (offset != file->f_pos) {
  		file->f_pos = offset;
  		file->f_version = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
69
  	}
3a8cff4f0   Christoph Hellwig   [PATCH] generic_f...
70
71
  
  	return offset;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
72
  }
9465efc9e   Andi Kleen   Remove BKL from r...
73
  EXPORT_SYMBOL(generic_file_llseek_unlocked);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
74

3a8cff4f0   Christoph Hellwig   [PATCH] generic_f...
75
76
77
78
79
80
81
82
83
84
  /**
   * generic_file_llseek - generic llseek implementation for regular files
   * @file:	file structure to seek on
   * @offset:	file offset to seek to
   * @origin:	type of seek
   *
   * This is a generic implemenation of ->llseek useable for all normal local
   * filesystems.  It just updates the file offset to the value specified by
   * @offset and @origin under i_mutex.
   */
9465efc9e   Andi Kleen   Remove BKL from r...
85
  loff_t generic_file_llseek(struct file *file, loff_t offset, int origin)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
86
  {
3a8cff4f0   Christoph Hellwig   [PATCH] generic_f...
87
  	loff_t rval;
9465efc9e   Andi Kleen   Remove BKL from r...
88
  	mutex_lock(&file->f_dentry->d_inode->i_mutex);
3a8cff4f0   Christoph Hellwig   [PATCH] generic_f...
89
  	rval = generic_file_llseek_unlocked(file, offset, origin);
9465efc9e   Andi Kleen   Remove BKL from r...
90
  	mutex_unlock(&file->f_dentry->d_inode->i_mutex);
3a8cff4f0   Christoph Hellwig   [PATCH] generic_f...
91
92
  
  	return rval;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
93
  }
9465efc9e   Andi Kleen   Remove BKL from r...
94
  EXPORT_SYMBOL(generic_file_llseek);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
95
96
97
98
99
100
101
102
103
  
  loff_t no_llseek(struct file *file, loff_t offset, int origin)
  {
  	return -ESPIPE;
  }
  EXPORT_SYMBOL(no_llseek);
  
  loff_t default_llseek(struct file *file, loff_t offset, int origin)
  {
16abef0e9   David Sterba   fs: use loff_t ty...
104
  	loff_t retval;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
105
106
107
  
  	lock_kernel();
  	switch (origin) {
7b8e89249   Chris Snook   use symbolic cons...
108
  		case SEEK_END:
0f7fc9e4d   Josef "Jeff" Sipek   [PATCH] VFS: chan...
109
  			offset += i_size_read(file->f_path.dentry->d_inode);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
110
  			break;
7b8e89249   Chris Snook   use symbolic cons...
111
  		case SEEK_CUR:
5b6f1eb97   Alain Knaff   vfs: lseek(fd, 0,...
112
113
114
115
  			if (offset == 0) {
  				retval = file->f_pos;
  				goto out;
  			}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
116
117
118
119
120
121
122
123
124
125
  			offset += file->f_pos;
  	}
  	retval = -EINVAL;
  	if (offset >= 0) {
  		if (offset != file->f_pos) {
  			file->f_pos = offset;
  			file->f_version = 0;
  		}
  		retval = offset;
  	}
5b6f1eb97   Alain Knaff   vfs: lseek(fd, 0,...
126
  out:
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
  	unlock_kernel();
  	return retval;
  }
  EXPORT_SYMBOL(default_llseek);
  
  loff_t vfs_llseek(struct file *file, loff_t offset, int origin)
  {
  	loff_t (*fn)(struct file *, loff_t, int);
  
  	fn = no_llseek;
  	if (file->f_mode & FMODE_LSEEK) {
  		fn = default_llseek;
  		if (file->f_op && file->f_op->llseek)
  			fn = file->f_op->llseek;
  	}
  	return fn(file, offset, origin);
  }
  EXPORT_SYMBOL(vfs_llseek);
003d7ab47   Heiko Carstens   [CVE-2009-0029] S...
145
  SYSCALL_DEFINE3(lseek, unsigned int, fd, off_t, offset, unsigned int, origin)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
146
147
148
149
150
151
152
153
154
155
156
  {
  	off_t retval;
  	struct file * file;
  	int fput_needed;
  
  	retval = -EBADF;
  	file = fget_light(fd, &fput_needed);
  	if (!file)
  		goto bad;
  
  	retval = -EINVAL;
1ae7075bc   Chris Snook   use use SEEK_MAX ...
157
  	if (origin <= SEEK_MAX) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
158
159
160
161
162
163
164
165
166
167
168
  		loff_t res = vfs_llseek(file, offset, origin);
  		retval = res;
  		if (res != (loff_t)retval)
  			retval = -EOVERFLOW;	/* LFS: should only happen on 32 bit platforms */
  	}
  	fput_light(file, fput_needed);
  bad:
  	return retval;
  }
  
  #ifdef __ARCH_WANT_SYS_LLSEEK
003d7ab47   Heiko Carstens   [CVE-2009-0029] S...
169
170
171
  SYSCALL_DEFINE5(llseek, unsigned int, fd, unsigned long, offset_high,
  		unsigned long, offset_low, loff_t __user *, result,
  		unsigned int, origin)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
172
173
174
175
176
177
178
179
180
181
182
183
  {
  	int retval;
  	struct file * file;
  	loff_t offset;
  	int fput_needed;
  
  	retval = -EBADF;
  	file = fget_light(fd, &fput_needed);
  	if (!file)
  		goto bad;
  
  	retval = -EINVAL;
1ae7075bc   Chris Snook   use use SEEK_MAX ...
184
  	if (origin > SEEK_MAX)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
  		goto out_putf;
  
  	offset = vfs_llseek(file, ((loff_t) offset_high << 32) | offset_low,
  			origin);
  
  	retval = (int)offset;
  	if (offset >= 0) {
  		retval = -EFAULT;
  		if (!copy_to_user(result, &offset, sizeof(offset)))
  			retval = 0;
  	}
  out_putf:
  	fput_light(file, fput_needed);
  bad:
  	return retval;
  }
  #endif
e28cc7157   Linus Torvalds   Relax the rw_veri...
202
203
204
205
206
207
  /*
   * rw_verify_area doesn't like huge counts. We limit
   * them to something that fits in "int" so that others
   * won't have to do range checks all the time.
   */
  #define MAX_RW_COUNT (INT_MAX & PAGE_CACHE_MASK)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
208
209
210
211
212
  
  int rw_verify_area(int read_write, struct file *file, loff_t *ppos, size_t count)
  {
  	struct inode *inode;
  	loff_t pos;
c43e259cc   James Morris   security: call se...
213
  	int retval = -EINVAL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
214

163da958b   Eric Dumazet   [PATCH] FS: speed...
215
  	inode = file->f_path.dentry->d_inode;
e28cc7157   Linus Torvalds   Relax the rw_veri...
216
  	if (unlikely((ssize_t) count < 0))
c43e259cc   James Morris   security: call se...
217
  		return retval;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
218
219
  	pos = *ppos;
  	if (unlikely((pos < 0) || (loff_t) (pos + count) < 0))
c43e259cc   James Morris   security: call se...
220
  		return retval;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
221

a16877ca9   Pavel Emelyanov   Cleanup macros fo...
222
  	if (unlikely(inode->i_flock && mandatory_lock(inode))) {
c43e259cc   James Morris   security: call se...
223
  		retval = locks_mandatory_area(
e28cc7157   Linus Torvalds   Relax the rw_veri...
224
225
226
227
228
  			read_write == READ ? FLOCK_VERIFY_READ : FLOCK_VERIFY_WRITE,
  			inode, file, pos, count);
  		if (retval < 0)
  			return retval;
  	}
c43e259cc   James Morris   security: call se...
229
230
231
232
  	retval = security_file_permission(file,
  				read_write == READ ? MAY_READ : MAY_WRITE);
  	if (retval)
  		return retval;
e28cc7157   Linus Torvalds   Relax the rw_veri...
233
  	return count > MAX_RW_COUNT ? MAX_RW_COUNT : count;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
234
  }
63e688091   Benjamin LaHaise   [PATCH] aio: fix ...
235
236
237
238
239
240
241
242
243
  static void wait_on_retry_sync_kiocb(struct kiocb *iocb)
  {
  	set_current_state(TASK_UNINTERRUPTIBLE);
  	if (!kiocbIsKicked(iocb))
  		schedule();
  	else
  		kiocbClearKicked(iocb);
  	__set_current_state(TASK_RUNNING);
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
244
245
  ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos)
  {
027445c37   Badari Pulavarty   [PATCH] Vectorize...
246
  	struct iovec iov = { .iov_base = buf, .iov_len = len };
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
247
248
249
250
251
  	struct kiocb kiocb;
  	ssize_t ret;
  
  	init_sync_kiocb(&kiocb, filp);
  	kiocb.ki_pos = *ppos;
027445c37   Badari Pulavarty   [PATCH] Vectorize...
252
253
254
255
256
257
  	kiocb.ki_left = len;
  
  	for (;;) {
  		ret = filp->f_op->aio_read(&kiocb, &iov, 1, kiocb.ki_pos);
  		if (ret != -EIOCBRETRY)
  			break;
63e688091   Benjamin LaHaise   [PATCH] aio: fix ...
258
  		wait_on_retry_sync_kiocb(&kiocb);
027445c37   Badari Pulavarty   [PATCH] Vectorize...
259
  	}
63e688091   Benjamin LaHaise   [PATCH] aio: fix ...
260

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
  	if (-EIOCBQUEUED == ret)
  		ret = wait_on_sync_kiocb(&kiocb);
  	*ppos = kiocb.ki_pos;
  	return ret;
  }
  
  EXPORT_SYMBOL(do_sync_read);
  
  ssize_t vfs_read(struct file *file, char __user *buf, size_t count, loff_t *pos)
  {
  	ssize_t ret;
  
  	if (!(file->f_mode & FMODE_READ))
  		return -EBADF;
  	if (!file->f_op || (!file->f_op->read && !file->f_op->aio_read))
  		return -EINVAL;
  	if (unlikely(!access_ok(VERIFY_WRITE, buf, count)))
  		return -EFAULT;
  
  	ret = rw_verify_area(READ, file, pos, count);
e28cc7157   Linus Torvalds   Relax the rw_veri...
281
282
  	if (ret >= 0) {
  		count = ret;
c43e259cc   James Morris   security: call se...
283
284
285
286
287
288
289
  		if (file->f_op->read)
  			ret = file->f_op->read(file, buf, count, pos);
  		else
  			ret = do_sync_read(file, buf, count, pos);
  		if (ret > 0) {
  			fsnotify_access(file->f_path.dentry);
  			add_rchar(current, ret);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
290
  		}
c43e259cc   James Morris   security: call se...
291
  		inc_syscr(current);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
292
293
294
295
296
297
298
299
300
  	}
  
  	return ret;
  }
  
  EXPORT_SYMBOL(vfs_read);
  
  ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos)
  {
027445c37   Badari Pulavarty   [PATCH] Vectorize...
301
  	struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = len };
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
302
303
304
305
306
  	struct kiocb kiocb;
  	ssize_t ret;
  
  	init_sync_kiocb(&kiocb, filp);
  	kiocb.ki_pos = *ppos;
027445c37   Badari Pulavarty   [PATCH] Vectorize...
307
308
309
310
311
312
  	kiocb.ki_left = len;
  
  	for (;;) {
  		ret = filp->f_op->aio_write(&kiocb, &iov, 1, kiocb.ki_pos);
  		if (ret != -EIOCBRETRY)
  			break;
63e688091   Benjamin LaHaise   [PATCH] aio: fix ...
313
  		wait_on_retry_sync_kiocb(&kiocb);
027445c37   Badari Pulavarty   [PATCH] Vectorize...
314
  	}
63e688091   Benjamin LaHaise   [PATCH] aio: fix ...
315

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
  	if (-EIOCBQUEUED == ret)
  		ret = wait_on_sync_kiocb(&kiocb);
  	*ppos = kiocb.ki_pos;
  	return ret;
  }
  
  EXPORT_SYMBOL(do_sync_write);
  
  ssize_t vfs_write(struct file *file, const char __user *buf, size_t count, loff_t *pos)
  {
  	ssize_t ret;
  
  	if (!(file->f_mode & FMODE_WRITE))
  		return -EBADF;
  	if (!file->f_op || (!file->f_op->write && !file->f_op->aio_write))
  		return -EINVAL;
  	if (unlikely(!access_ok(VERIFY_READ, buf, count)))
  		return -EFAULT;
  
  	ret = rw_verify_area(WRITE, file, pos, count);
e28cc7157   Linus Torvalds   Relax the rw_veri...
336
337
  	if (ret >= 0) {
  		count = ret;
c43e259cc   James Morris   security: call se...
338
339
340
341
342
343
344
  		if (file->f_op->write)
  			ret = file->f_op->write(file, buf, count, pos);
  		else
  			ret = do_sync_write(file, buf, count, pos);
  		if (ret > 0) {
  			fsnotify_modify(file->f_path.dentry);
  			add_wchar(current, ret);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
345
  		}
c43e259cc   James Morris   security: call se...
346
  		inc_syscw(current);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
  	}
  
  	return ret;
  }
  
  EXPORT_SYMBOL(vfs_write);
  
  static inline loff_t file_pos_read(struct file *file)
  {
  	return file->f_pos;
  }
  
  static inline void file_pos_write(struct file *file, loff_t pos)
  {
  	file->f_pos = pos;
  }
3cdad4288   Heiko Carstens   [CVE-2009-0029] S...
363
  SYSCALL_DEFINE3(read, unsigned int, fd, char __user *, buf, size_t, count)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
  {
  	struct file *file;
  	ssize_t ret = -EBADF;
  	int fput_needed;
  
  	file = fget_light(fd, &fput_needed);
  	if (file) {
  		loff_t pos = file_pos_read(file);
  		ret = vfs_read(file, buf, count, &pos);
  		file_pos_write(file, pos);
  		fput_light(file, fput_needed);
  	}
  
  	return ret;
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
379

3cdad4288   Heiko Carstens   [CVE-2009-0029] S...
380
381
  SYSCALL_DEFINE3(write, unsigned int, fd, const char __user *, buf,
  		size_t, count)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
  {
  	struct file *file;
  	ssize_t ret = -EBADF;
  	int fput_needed;
  
  	file = fget_light(fd, &fput_needed);
  	if (file) {
  		loff_t pos = file_pos_read(file);
  		ret = vfs_write(file, buf, count, &pos);
  		file_pos_write(file, pos);
  		fput_light(file, fput_needed);
  	}
  
  	return ret;
  }
6673e0c3f   Heiko Carstens   [CVE-2009-0029] S...
397
398
  SYSCALL_DEFINE(pread64)(unsigned int fd, char __user *buf,
  			size_t count, loff_t pos)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
  {
  	struct file *file;
  	ssize_t ret = -EBADF;
  	int fput_needed;
  
  	if (pos < 0)
  		return -EINVAL;
  
  	file = fget_light(fd, &fput_needed);
  	if (file) {
  		ret = -ESPIPE;
  		if (file->f_mode & FMODE_PREAD)
  			ret = vfs_read(file, buf, count, &pos);
  		fput_light(file, fput_needed);
  	}
  
  	return ret;
  }
6673e0c3f   Heiko Carstens   [CVE-2009-0029] S...
417
418
419
420
421
422
423
424
  #ifdef CONFIG_HAVE_SYSCALL_WRAPPERS
  asmlinkage long SyS_pread64(long fd, long buf, long count, loff_t pos)
  {
  	return SYSC_pread64((unsigned int) fd, (char __user *) buf,
  			    (size_t) count, pos);
  }
  SYSCALL_ALIAS(sys_pread64, SyS_pread64);
  #endif
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
425

6673e0c3f   Heiko Carstens   [CVE-2009-0029] S...
426
427
  SYSCALL_DEFINE(pwrite64)(unsigned int fd, const char __user *buf,
  			 size_t count, loff_t pos)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
  {
  	struct file *file;
  	ssize_t ret = -EBADF;
  	int fput_needed;
  
  	if (pos < 0)
  		return -EINVAL;
  
  	file = fget_light(fd, &fput_needed);
  	if (file) {
  		ret = -ESPIPE;
  		if (file->f_mode & FMODE_PWRITE)  
  			ret = vfs_write(file, buf, count, &pos);
  		fput_light(file, fput_needed);
  	}
  
  	return ret;
  }
6673e0c3f   Heiko Carstens   [CVE-2009-0029] S...
446
447
448
449
450
451
452
453
  #ifdef CONFIG_HAVE_SYSCALL_WRAPPERS
  asmlinkage long SyS_pwrite64(long fd, long buf, long count, loff_t pos)
  {
  	return SYSC_pwrite64((unsigned int) fd, (const char __user *) buf,
  			     (size_t) count, pos);
  }
  SYSCALL_ALIAS(sys_pwrite64, SyS_pwrite64);
  #endif
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
  
  /*
   * Reduce an iovec's length in-place.  Return the resulting number of segments
   */
  unsigned long iov_shorten(struct iovec *iov, unsigned long nr_segs, size_t to)
  {
  	unsigned long seg = 0;
  	size_t len = 0;
  
  	while (seg < nr_segs) {
  		seg++;
  		if (len + iov->iov_len >= to) {
  			iov->iov_len = to - len;
  			break;
  		}
  		len += iov->iov_len;
  		iov++;
  	}
  	return seg;
  }
19295529d   Eric Sandeen   ext4: export iov_...
474
  EXPORT_SYMBOL(iov_shorten);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
475

ee0b3e671   Badari Pulavarty   [PATCH] Remove re...
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
  ssize_t do_sync_readv_writev(struct file *filp, const struct iovec *iov,
  		unsigned long nr_segs, size_t len, loff_t *ppos, iov_fn_t fn)
  {
  	struct kiocb kiocb;
  	ssize_t ret;
  
  	init_sync_kiocb(&kiocb, filp);
  	kiocb.ki_pos = *ppos;
  	kiocb.ki_left = len;
  	kiocb.ki_nbytes = len;
  
  	for (;;) {
  		ret = fn(&kiocb, iov, nr_segs, kiocb.ki_pos);
  		if (ret != -EIOCBRETRY)
  			break;
  		wait_on_retry_sync_kiocb(&kiocb);
  	}
  
  	if (ret == -EIOCBQUEUED)
  		ret = wait_on_sync_kiocb(&kiocb);
  	*ppos = kiocb.ki_pos;
  	return ret;
  }
  
  /* Do it by hand, with file-ops */
  ssize_t do_loop_readv_writev(struct file *filp, struct iovec *iov,
  		unsigned long nr_segs, loff_t *ppos, io_fn_t fn)
  {
  	struct iovec *vector = iov;
  	ssize_t ret = 0;
  
  	while (nr_segs > 0) {
  		void __user *base;
  		size_t len;
  		ssize_t nr;
  
  		base = vector->iov_base;
  		len = vector->iov_len;
  		vector++;
  		nr_segs--;
  
  		nr = fn(filp, base, len, ppos);
  
  		if (nr < 0) {
  			if (!ret)
  				ret = nr;
  			break;
  		}
  		ret += nr;
  		if (nr != len)
  			break;
  	}
  
  	return ret;
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
531
532
  /* A write operation does a read from user space and vice versa */
  #define vrfy_dir(type) ((type) == READ ? VERIFY_WRITE : VERIFY_READ)
eed4e51fb   Badari Pulavarty   [PATCH] Add vecto...
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
  ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector,
  			      unsigned long nr_segs, unsigned long fast_segs,
  			      struct iovec *fast_pointer,
  			      struct iovec **ret_pointer)
    {
  	unsigned long seg;
    	ssize_t ret;
  	struct iovec *iov = fast_pointer;
  
    	/*
    	 * SuS says "The readv() function *may* fail if the iovcnt argument
    	 * was less than or equal to 0, or greater than {IOV_MAX}.  Linux has
    	 * traditionally returned zero for zero segments, so...
    	 */
  	if (nr_segs == 0) {
  		ret = 0;
    		goto out;
  	}
  
    	/*
    	 * First get the "struct iovec" from user memory and
    	 * verify all the pointers
    	 */
  	if (nr_segs > UIO_MAXIOV) {
  		ret = -EINVAL;
    		goto out;
  	}
  	if (nr_segs > fast_segs) {
    		iov = kmalloc(nr_segs*sizeof(struct iovec), GFP_KERNEL);
  		if (iov == NULL) {
  			ret = -ENOMEM;
    			goto out;
  		}
    	}
  	if (copy_from_user(iov, uvector, nr_segs*sizeof(*uvector))) {
  		ret = -EFAULT;
    		goto out;
  	}
  
    	/*
  	 * According to the Single Unix Specification we should return EINVAL
  	 * if an element length is < 0 when cast to ssize_t or if the
  	 * total length would overflow the ssize_t return value of the
  	 * system call.
    	 */
  	ret = 0;
    	for (seg = 0; seg < nr_segs; seg++) {
    		void __user *buf = iov[seg].iov_base;
    		ssize_t len = (ssize_t)iov[seg].iov_len;
  
  		/* see if we we're about to use an invalid len or if
  		 * it's about to overflow ssize_t */
  		if (len < 0 || (ret + len < ret)) {
  			ret = -EINVAL;
    			goto out;
  		}
  		if (unlikely(!access_ok(vrfy_dir(type), buf, len))) {
  			ret = -EFAULT;
    			goto out;
  		}
  
  		ret += len;
    	}
  out:
  	*ret_pointer = iov;
  	return ret;
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
600
601
602
603
  static ssize_t do_readv_writev(int type, struct file *file,
  			       const struct iovec __user * uvector,
  			       unsigned long nr_segs, loff_t *pos)
  {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
604
605
  	size_t tot_len;
  	struct iovec iovstack[UIO_FASTIOV];
ee0b3e671   Badari Pulavarty   [PATCH] Remove re...
606
  	struct iovec *iov = iovstack;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
607
  	ssize_t ret;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
608
609
  	io_fn_t fn;
  	iov_fn_t fnv;
eed4e51fb   Badari Pulavarty   [PATCH] Add vecto...
610
611
  	if (!file->f_op) {
  		ret = -EINVAL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
612
  		goto out;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
613
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
614

eed4e51fb   Badari Pulavarty   [PATCH] Add vecto...
615
616
617
  	ret = rw_copy_check_uvector(type, uvector, nr_segs,
  			ARRAY_SIZE(iovstack), iovstack, &iov);
  	if (ret <= 0)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
618
  		goto out;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
619

eed4e51fb   Badari Pulavarty   [PATCH] Add vecto...
620
  	tot_len = ret;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
621
  	ret = rw_verify_area(type, file, pos, tot_len);
e28cc7157   Linus Torvalds   Relax the rw_veri...
622
  	if (ret < 0)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
623
624
625
626
627
  		goto out;
  
  	fnv = NULL;
  	if (type == READ) {
  		fn = file->f_op->read;
ee0b3e671   Badari Pulavarty   [PATCH] Remove re...
628
  		fnv = file->f_op->aio_read;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
629
630
  	} else {
  		fn = (io_fn_t)file->f_op->write;
ee0b3e671   Badari Pulavarty   [PATCH] Remove re...
631
  		fnv = file->f_op->aio_write;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
632
  	}
ee0b3e671   Badari Pulavarty   [PATCH] Remove re...
633
634
635
636
637
  	if (fnv)
  		ret = do_sync_readv_writev(file, iov, nr_segs, tot_len,
  						pos, fnv);
  	else
  		ret = do_loop_readv_writev(file, iov, nr_segs, pos, fn);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
638

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
639
640
641
  out:
  	if (iov != iovstack)
  		kfree(iov);
0eeca2830   Robert Love   [PATCH] inotify
642
643
  	if ((ret + (type == READ)) > 0) {
  		if (type == READ)
0f7fc9e4d   Josef "Jeff" Sipek   [PATCH] VFS: chan...
644
  			fsnotify_access(file->f_path.dentry);
0eeca2830   Robert Love   [PATCH] inotify
645
  		else
0f7fc9e4d   Josef "Jeff" Sipek   [PATCH] VFS: chan...
646
  			fsnotify_modify(file->f_path.dentry);
0eeca2830   Robert Love   [PATCH] inotify
647
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
648
  	return ret;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
649
650
651
652
653
654
655
  }
  
  ssize_t vfs_readv(struct file *file, const struct iovec __user *vec,
  		  unsigned long vlen, loff_t *pos)
  {
  	if (!(file->f_mode & FMODE_READ))
  		return -EBADF;
ee0b3e671   Badari Pulavarty   [PATCH] Remove re...
656
  	if (!file->f_op || (!file->f_op->aio_read && !file->f_op->read))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
657
658
659
660
661
662
663
664
665
666
667
668
  		return -EINVAL;
  
  	return do_readv_writev(READ, file, vec, vlen, pos);
  }
  
  EXPORT_SYMBOL(vfs_readv);
  
  ssize_t vfs_writev(struct file *file, const struct iovec __user *vec,
  		   unsigned long vlen, loff_t *pos)
  {
  	if (!(file->f_mode & FMODE_WRITE))
  		return -EBADF;
ee0b3e671   Badari Pulavarty   [PATCH] Remove re...
669
  	if (!file->f_op || (!file->f_op->aio_write && !file->f_op->write))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
670
671
672
673
674
675
  		return -EINVAL;
  
  	return do_readv_writev(WRITE, file, vec, vlen, pos);
  }
  
  EXPORT_SYMBOL(vfs_writev);
3cdad4288   Heiko Carstens   [CVE-2009-0029] S...
676
677
  SYSCALL_DEFINE3(readv, unsigned long, fd, const struct iovec __user *, vec,
  		unsigned long, vlen)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
678
679
680
681
682
683
684
685
686
687
688
689
690
691
  {
  	struct file *file;
  	ssize_t ret = -EBADF;
  	int fput_needed;
  
  	file = fget_light(fd, &fput_needed);
  	if (file) {
  		loff_t pos = file_pos_read(file);
  		ret = vfs_readv(file, vec, vlen, &pos);
  		file_pos_write(file, pos);
  		fput_light(file, fput_needed);
  	}
  
  	if (ret > 0)
4b98d11b4   Alexey Dobriyan   [PATCH] ifdef ->r...
692
693
  		add_rchar(current, ret);
  	inc_syscr(current);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
694
695
  	return ret;
  }
3cdad4288   Heiko Carstens   [CVE-2009-0029] S...
696
697
  SYSCALL_DEFINE3(writev, unsigned long, fd, const struct iovec __user *, vec,
  		unsigned long, vlen)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
698
699
700
701
702
703
704
705
706
707
708
709
710
711
  {
  	struct file *file;
  	ssize_t ret = -EBADF;
  	int fput_needed;
  
  	file = fget_light(fd, &fput_needed);
  	if (file) {
  		loff_t pos = file_pos_read(file);
  		ret = vfs_writev(file, vec, vlen, &pos);
  		file_pos_write(file, pos);
  		fput_light(file, fput_needed);
  	}
  
  	if (ret > 0)
4b98d11b4   Alexey Dobriyan   [PATCH] ifdef ->r...
712
713
  		add_wchar(current, ret);
  	inc_syscw(current);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
714
715
  	return ret;
  }
601cc11d0   Linus Torvalds   Make non-compat p...
716
717
718
719
720
  static inline loff_t pos_from_hilo(unsigned long high, unsigned long low)
  {
  #define HALF_LONG_BITS (BITS_PER_LONG / 2)
  	return (((loff_t)high << HALF_LONG_BITS) << HALF_LONG_BITS) | low;
  }
f3554f4bc   Gerd Hoffmann   preadv/pwritev: A...
721
  SYSCALL_DEFINE5(preadv, unsigned long, fd, const struct iovec __user *, vec,
601cc11d0   Linus Torvalds   Make non-compat p...
722
  		unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h)
f3554f4bc   Gerd Hoffmann   preadv/pwritev: A...
723
  {
601cc11d0   Linus Torvalds   Make non-compat p...
724
  	loff_t pos = pos_from_hilo(pos_h, pos_l);
f3554f4bc   Gerd Hoffmann   preadv/pwritev: A...
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
  	struct file *file;
  	ssize_t ret = -EBADF;
  	int fput_needed;
  
  	if (pos < 0)
  		return -EINVAL;
  
  	file = fget_light(fd, &fput_needed);
  	if (file) {
  		ret = -ESPIPE;
  		if (file->f_mode & FMODE_PREAD)
  			ret = vfs_readv(file, vec, vlen, &pos);
  		fput_light(file, fput_needed);
  	}
  
  	if (ret > 0)
  		add_rchar(current, ret);
  	inc_syscr(current);
  	return ret;
  }
  
  SYSCALL_DEFINE5(pwritev, unsigned long, fd, const struct iovec __user *, vec,
601cc11d0   Linus Torvalds   Make non-compat p...
747
  		unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h)
f3554f4bc   Gerd Hoffmann   preadv/pwritev: A...
748
  {
601cc11d0   Linus Torvalds   Make non-compat p...
749
  	loff_t pos = pos_from_hilo(pos_h, pos_l);
f3554f4bc   Gerd Hoffmann   preadv/pwritev: A...
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
  	struct file *file;
  	ssize_t ret = -EBADF;
  	int fput_needed;
  
  	if (pos < 0)
  		return -EINVAL;
  
  	file = fget_light(fd, &fput_needed);
  	if (file) {
  		ret = -ESPIPE;
  		if (file->f_mode & FMODE_PWRITE)
  			ret = vfs_writev(file, vec, vlen, &pos);
  		fput_light(file, fput_needed);
  	}
  
  	if (ret > 0)
  		add_wchar(current, ret);
  	inc_syscw(current);
  	return ret;
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
770
771
772
773
774
775
776
  static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos,
  			   size_t count, loff_t max)
  {
  	struct file * in_file, * out_file;
  	struct inode * in_inode, * out_inode;
  	loff_t pos;
  	ssize_t retval;
534f2aaa6   Jens Axboe   sys_sendfile: swi...
777
  	int fput_needed_in, fput_needed_out, fl;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
778
779
780
781
782
783
784
785
786
787
  
  	/*
  	 * Get input file, and verify that it is ok..
  	 */
  	retval = -EBADF;
  	in_file = fget_light(in_fd, &fput_needed_in);
  	if (!in_file)
  		goto out;
  	if (!(in_file->f_mode & FMODE_READ))
  		goto fput_in;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
788
789
790
791
792
793
794
  	retval = -ESPIPE;
  	if (!ppos)
  		ppos = &in_file->f_pos;
  	else
  		if (!(in_file->f_mode & FMODE_PREAD))
  			goto fput_in;
  	retval = rw_verify_area(READ, in_file, ppos, count);
e28cc7157   Linus Torvalds   Relax the rw_veri...
795
  	if (retval < 0)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
796
  		goto fput_in;
e28cc7157   Linus Torvalds   Relax the rw_veri...
797
  	count = retval;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
798

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
799
800
801
802
803
804
805
806
807
808
809
810
  	/*
  	 * Get output file, and verify that it is ok..
  	 */
  	retval = -EBADF;
  	out_file = fget_light(out_fd, &fput_needed_out);
  	if (!out_file)
  		goto fput_in;
  	if (!(out_file->f_mode & FMODE_WRITE))
  		goto fput_out;
  	retval = -EINVAL;
  	if (!out_file->f_op || !out_file->f_op->sendpage)
  		goto fput_out;
6818173bd   Miklos Szeredi   splice: implement...
811
  	in_inode = in_file->f_path.dentry->d_inode;
0f7fc9e4d   Josef "Jeff" Sipek   [PATCH] VFS: chan...
812
  	out_inode = out_file->f_path.dentry->d_inode;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
813
  	retval = rw_verify_area(WRITE, out_file, &out_file->f_pos, count);
e28cc7157   Linus Torvalds   Relax the rw_veri...
814
  	if (retval < 0)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
815
  		goto fput_out;
e28cc7157   Linus Torvalds   Relax the rw_veri...
816
  	count = retval;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
817

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
818
819
820
821
822
823
824
825
826
827
828
829
830
  	if (!max)
  		max = min(in_inode->i_sb->s_maxbytes, out_inode->i_sb->s_maxbytes);
  
  	pos = *ppos;
  	retval = -EINVAL;
  	if (unlikely(pos < 0))
  		goto fput_out;
  	if (unlikely(pos + count > max)) {
  		retval = -EOVERFLOW;
  		if (pos >= max)
  			goto fput_out;
  		count = max - pos;
  	}
d96e6e716   Jens Axboe   Remove remnants o...
831
  	fl = 0;
534f2aaa6   Jens Axboe   sys_sendfile: swi...
832
  #if 0
d96e6e716   Jens Axboe   Remove remnants o...
833
834
835
836
837
838
839
840
  	/*
  	 * We need to debate whether we can enable this or not. The
  	 * man page documents EAGAIN return for the output at least,
  	 * and the application is arguably buggy if it doesn't expect
  	 * EAGAIN on a non-blocking file descriptor.
  	 */
  	if (in_file->f_flags & O_NONBLOCK)
  		fl = SPLICE_F_NONBLOCK;
534f2aaa6   Jens Axboe   sys_sendfile: swi...
841
  #endif
d96e6e716   Jens Axboe   Remove remnants o...
842
  	retval = do_splice_direct(in_file, ppos, out_file, count, fl);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
843
844
  
  	if (retval > 0) {
4b98d11b4   Alexey Dobriyan   [PATCH] ifdef ->r...
845
846
  		add_rchar(current, retval);
  		add_wchar(current, retval);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
847
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
848

4b98d11b4   Alexey Dobriyan   [PATCH] ifdef ->r...
849
850
  	inc_syscr(current);
  	inc_syscw(current);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
851
852
853
854
855
856
857
858
859
860
  	if (*ppos > max)
  		retval = -EOVERFLOW;
  
  fput_out:
  	fput_light(out_file, fput_needed_out);
  fput_in:
  	fput_light(in_file, fput_needed_in);
  out:
  	return retval;
  }
002c8976e   Heiko Carstens   [CVE-2009-0029] S...
861
  SYSCALL_DEFINE4(sendfile, int, out_fd, int, in_fd, off_t __user *, offset, size_t, count)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
  {
  	loff_t pos;
  	off_t off;
  	ssize_t ret;
  
  	if (offset) {
  		if (unlikely(get_user(off, offset)))
  			return -EFAULT;
  		pos = off;
  		ret = do_sendfile(out_fd, in_fd, &pos, count, MAX_NON_LFS);
  		if (unlikely(put_user(pos, offset)))
  			return -EFAULT;
  		return ret;
  	}
  
  	return do_sendfile(out_fd, in_fd, NULL, count, 0);
  }
002c8976e   Heiko Carstens   [CVE-2009-0029] S...
879
  SYSCALL_DEFINE4(sendfile64, int, out_fd, int, in_fd, loff_t __user *, offset, size_t, count)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
  {
  	loff_t pos;
  	ssize_t ret;
  
  	if (offset) {
  		if (unlikely(copy_from_user(&pos, offset, sizeof(loff_t))))
  			return -EFAULT;
  		ret = do_sendfile(out_fd, in_fd, &pos, count, 0);
  		if (unlikely(put_user(pos, offset)))
  			return -EFAULT;
  		return ret;
  	}
  
  	return do_sendfile(out_fd, in_fd, NULL, count, 0);
  }