Commit 293bc9822fa9b3c9d4b7893bcb241e085580771a
1 parent
7f7f25e82d
Exists in
ti-lsk-linux-4.1.y
and in
10 other branches
new methods: ->read_iter() and ->write_iter()
Beginning to introduce those. Just the callers for now, and it's clumsier than it'll eventually become; once we finish converting aio_read and aio_write instances, the things will get nicer. For now, these guys are in parallel to ->aio_read() and ->aio_write(); they take iocb and iov_iter, with everything in iov_iter already validated. File offset is passed in iocb->ki_pos, iov/nr_segs - in iov_iter. Main concerns in that series are stack footprint and ability to split the damn thing cleanly. [fix from Peter Ujfalusi <peter.ujfalusi@ti.com> folded] Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Showing 7 changed files with 121 additions and 13 deletions Side-by-side Diff
Documentation/filesystems/Locking
... | ... | @@ -430,6 +430,8 @@ |
430 | 430 | ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *); |
431 | 431 | ssize_t (*aio_read) (struct kiocb *, const struct iovec *, unsigned long, loff_t); |
432 | 432 | ssize_t (*aio_write) (struct kiocb *, const struct iovec *, unsigned long, loff_t); |
433 | + ssize_t (*read_iter) (struct kiocb *, struct iov_iter *); | |
434 | + ssize_t (*write_iter) (struct kiocb *, struct iov_iter *); | |
433 | 435 | int (*iterate) (struct file *, struct dir_context *); |
434 | 436 | unsigned int (*poll) (struct file *, struct poll_table_struct *); |
435 | 437 | long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long); |
Documentation/filesystems/vfs.txt
... | ... | @@ -806,6 +806,8 @@ |
806 | 806 | ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *); |
807 | 807 | ssize_t (*aio_read) (struct kiocb *, const struct iovec *, unsigned long, loff_t); |
808 | 808 | ssize_t (*aio_write) (struct kiocb *, const struct iovec *, unsigned long, loff_t); |
809 | + ssize_t (*read_iter) (struct kiocb *, struct iov_iter *); | |
810 | + ssize_t (*write_iter) (struct kiocb *, struct iov_iter *); | |
809 | 811 | int (*iterate) (struct file *, struct dir_context *); |
810 | 812 | unsigned int (*poll) (struct file *, struct poll_table_struct *); |
811 | 813 | long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long); |
812 | 814 | |
813 | 815 | |
... | ... | @@ -836,11 +838,15 @@ |
836 | 838 | |
837 | 839 | read: called by read(2) and related system calls |
838 | 840 | |
839 | - aio_read: called by io_submit(2) and other asynchronous I/O operations | |
841 | + aio_read: vectored, possibly asynchronous read | |
840 | 842 | |
843 | + read_iter: possibly asynchronous read with iov_iter as destination | |
844 | + | |
841 | 845 | write: called by write(2) and related system calls |
842 | 846 | |
843 | - aio_write: called by io_submit(2) and other asynchronous I/O operations | |
847 | + aio_write: vectored, possibly asynchronous write | |
848 | + | |
849 | + write_iter: possibly asynchronous write with iov_iter as source | |
844 | 850 | |
845 | 851 | iterate: called when the VFS needs to read the directory contents |
846 | 852 |
fs/aio.c
... | ... | @@ -1241,6 +1241,7 @@ |
1241 | 1241 | |
1242 | 1242 | typedef ssize_t (aio_rw_op)(struct kiocb *, const struct iovec *, |
1243 | 1243 | unsigned long, loff_t); |
1244 | +typedef ssize_t (rw_iter_op)(struct kiocb *, struct iov_iter *); | |
1244 | 1245 | |
1245 | 1246 | static ssize_t aio_setup_vectored_rw(struct kiocb *kiocb, |
1246 | 1247 | int rw, char __user *buf, |
1247 | 1248 | |
... | ... | @@ -1298,7 +1299,9 @@ |
1298 | 1299 | int rw; |
1299 | 1300 | fmode_t mode; |
1300 | 1301 | aio_rw_op *rw_op; |
1302 | + rw_iter_op *iter_op; | |
1301 | 1303 | struct iovec inline_vec, *iovec = &inline_vec; |
1304 | + struct iov_iter iter; | |
1302 | 1305 | |
1303 | 1306 | switch (opcode) { |
1304 | 1307 | case IOCB_CMD_PREAD: |
... | ... | @@ -1306,6 +1309,7 @@ |
1306 | 1309 | mode = FMODE_READ; |
1307 | 1310 | rw = READ; |
1308 | 1311 | rw_op = file->f_op->aio_read; |
1312 | + iter_op = file->f_op->read_iter; | |
1309 | 1313 | goto rw_common; |
1310 | 1314 | |
1311 | 1315 | case IOCB_CMD_PWRITE: |
1312 | 1316 | |
... | ... | @@ -1313,12 +1317,13 @@ |
1313 | 1317 | mode = FMODE_WRITE; |
1314 | 1318 | rw = WRITE; |
1315 | 1319 | rw_op = file->f_op->aio_write; |
1320 | + iter_op = file->f_op->write_iter; | |
1316 | 1321 | goto rw_common; |
1317 | 1322 | rw_common: |
1318 | 1323 | if (unlikely(!(file->f_mode & mode))) |
1319 | 1324 | return -EBADF; |
1320 | 1325 | |
1321 | - if (!rw_op) | |
1326 | + if (!rw_op && !iter_op) | |
1322 | 1327 | return -EINVAL; |
1323 | 1328 | |
1324 | 1329 | ret = (opcode == IOCB_CMD_PREADV || |
... | ... | @@ -1347,7 +1352,12 @@ |
1347 | 1352 | if (rw == WRITE) |
1348 | 1353 | file_start_write(file); |
1349 | 1354 | |
1350 | - ret = rw_op(req, iovec, nr_segs, req->ki_pos); | |
1355 | + if (iter_op) { | |
1356 | + iov_iter_init(&iter, rw, iovec, nr_segs, req->ki_nbytes); | |
1357 | + ret = iter_op(req, &iter); | |
1358 | + } else { | |
1359 | + ret = rw_op(req, iovec, nr_segs, req->ki_pos); | |
1360 | + } | |
1351 | 1361 | |
1352 | 1362 | if (rw == WRITE) |
1353 | 1363 | file_end_write(file); |
fs/file_table.c
... | ... | @@ -175,9 +175,11 @@ |
175 | 175 | file->f_path = *path; |
176 | 176 | file->f_inode = path->dentry->d_inode; |
177 | 177 | file->f_mapping = path->dentry->d_inode->i_mapping; |
178 | - if ((mode & FMODE_READ) && likely(fop->read || fop->aio_read)) | |
178 | + if ((mode & FMODE_READ) && | |
179 | + likely(fop->read || fop->aio_read || fop->read_iter)) | |
179 | 180 | mode |= FMODE_CAN_READ; |
180 | - if ((mode & FMODE_WRITE) && likely(fop->write || fop->aio_write)) | |
181 | + if ((mode & FMODE_WRITE) && | |
182 | + likely(fop->write || fop->aio_write || fop->write_iter)) | |
181 | 183 | mode |= FMODE_CAN_WRITE; |
182 | 184 | file->f_mode = mode; |
183 | 185 | file->f_op = fop; |
fs/open.c
... | ... | @@ -725,9 +725,11 @@ |
725 | 725 | } |
726 | 726 | if ((f->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ) |
727 | 727 | i_readcount_inc(inode); |
728 | - if ((f->f_mode & FMODE_READ) && likely(f->f_op->read || f->f_op->aio_read)) | |
728 | + if ((f->f_mode & FMODE_READ) && | |
729 | + likely(f->f_op->read || f->f_op->aio_read || f->f_op->read_iter)) | |
729 | 730 | f->f_mode |= FMODE_CAN_READ; |
730 | - if ((f->f_mode & FMODE_WRITE) && likely(f->f_op->write || f->f_op->aio_write)) | |
731 | + if ((f->f_mode & FMODE_WRITE) && | |
732 | + likely(f->f_op->write || f->f_op->aio_write || f->f_op->write_iter)) | |
731 | 733 | f->f_mode |= FMODE_CAN_WRITE; |
732 | 734 | |
733 | 735 | f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC); |
fs/read_write.c
... | ... | @@ -25,6 +25,7 @@ |
25 | 25 | typedef ssize_t (*io_fn_t)(struct file *, char __user *, size_t, loff_t *); |
26 | 26 | typedef ssize_t (*iov_fn_t)(struct kiocb *, const struct iovec *, |
27 | 27 | unsigned long, loff_t); |
28 | +typedef ssize_t (*iter_fn_t)(struct kiocb *, struct iov_iter *); | |
28 | 29 | |
29 | 30 | const struct file_operations generic_ro_fops = { |
30 | 31 | .llseek = generic_file_llseek, |
... | ... | @@ -390,6 +391,27 @@ |
390 | 391 | |
391 | 392 | EXPORT_SYMBOL(do_sync_read); |
392 | 393 | |
394 | +ssize_t new_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos) | |
395 | +{ | |
396 | + struct iovec iov = { .iov_base = buf, .iov_len = len }; | |
397 | + struct kiocb kiocb; | |
398 | + struct iov_iter iter; | |
399 | + ssize_t ret; | |
400 | + | |
401 | + init_sync_kiocb(&kiocb, filp); | |
402 | + kiocb.ki_pos = *ppos; | |
403 | + kiocb.ki_nbytes = len; | |
404 | + iov_iter_init(&iter, READ, &iov, 1, len); | |
405 | + | |
406 | + ret = filp->f_op->read_iter(&kiocb, &iter); | |
407 | + if (-EIOCBQUEUED == ret) | |
408 | + ret = wait_on_sync_kiocb(&kiocb); | |
409 | + *ppos = kiocb.ki_pos; | |
410 | + return ret; | |
411 | +} | |
412 | + | |
413 | +EXPORT_SYMBOL(new_sync_read); | |
414 | + | |
393 | 415 | ssize_t vfs_read(struct file *file, char __user *buf, size_t count, loff_t *pos) |
394 | 416 | { |
395 | 417 | ssize_t ret; |
396 | 418 | |
... | ... | @@ -406,8 +428,10 @@ |
406 | 428 | count = ret; |
407 | 429 | if (file->f_op->read) |
408 | 430 | ret = file->f_op->read(file, buf, count, pos); |
409 | - else | |
431 | + else if (file->f_op->aio_read) | |
410 | 432 | ret = do_sync_read(file, buf, count, pos); |
433 | + else | |
434 | + ret = new_sync_read(file, buf, count, pos); | |
411 | 435 | if (ret > 0) { |
412 | 436 | fsnotify_access(file); |
413 | 437 | add_rchar(current, ret); |
... | ... | @@ -439,6 +463,27 @@ |
439 | 463 | |
440 | 464 | EXPORT_SYMBOL(do_sync_write); |
441 | 465 | |
466 | +ssize_t new_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos) | |
467 | +{ | |
468 | + struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = len }; | |
469 | + struct kiocb kiocb; | |
470 | + struct iov_iter iter; | |
471 | + ssize_t ret; | |
472 | + | |
473 | + init_sync_kiocb(&kiocb, filp); | |
474 | + kiocb.ki_pos = *ppos; | |
475 | + kiocb.ki_nbytes = len; | |
476 | + iov_iter_init(&iter, WRITE, &iov, 1, len); | |
477 | + | |
478 | + ret = filp->f_op->write_iter(&kiocb, &iter); | |
479 | + if (-EIOCBQUEUED == ret) | |
480 | + ret = wait_on_sync_kiocb(&kiocb); | |
481 | + *ppos = kiocb.ki_pos; | |
482 | + return ret; | |
483 | +} | |
484 | + | |
485 | +EXPORT_SYMBOL(new_sync_write); | |
486 | + | |
442 | 487 | ssize_t __kernel_write(struct file *file, const char *buf, size_t count, loff_t *pos) |
443 | 488 | { |
444 | 489 | mm_segment_t old_fs; |
445 | 490 | |
... | ... | @@ -455,8 +500,10 @@ |
455 | 500 | count = MAX_RW_COUNT; |
456 | 501 | if (file->f_op->write) |
457 | 502 | ret = file->f_op->write(file, p, count, pos); |
458 | - else | |
503 | + else if (file->f_op->aio_write) | |
459 | 504 | ret = do_sync_write(file, p, count, pos); |
505 | + else | |
506 | + ret = new_sync_write(file, p, count, pos); | |
460 | 507 | set_fs(old_fs); |
461 | 508 | if (ret > 0) { |
462 | 509 | fsnotify_modify(file); |
463 | 510 | |
... | ... | @@ -483,8 +530,10 @@ |
483 | 530 | file_start_write(file); |
484 | 531 | if (file->f_op->write) |
485 | 532 | ret = file->f_op->write(file, buf, count, pos); |
486 | - else | |
533 | + else if (file->f_op->aio_write) | |
487 | 534 | ret = do_sync_write(file, buf, count, pos); |
535 | + else | |
536 | + ret = new_sync_write(file, buf, count, pos); | |
488 | 537 | if (ret > 0) { |
489 | 538 | fsnotify_modify(file); |
490 | 539 | add_wchar(current, ret); |
... | ... | @@ -601,6 +650,25 @@ |
601 | 650 | } |
602 | 651 | EXPORT_SYMBOL(iov_shorten); |
603 | 652 | |
653 | +static ssize_t do_iter_readv_writev(struct file *filp, int rw, const struct iovec *iov, | |
654 | + unsigned long nr_segs, size_t len, loff_t *ppos, iter_fn_t fn) | |
655 | +{ | |
656 | + struct kiocb kiocb; | |
657 | + struct iov_iter iter; | |
658 | + ssize_t ret; | |
659 | + | |
660 | + init_sync_kiocb(&kiocb, filp); | |
661 | + kiocb.ki_pos = *ppos; | |
662 | + kiocb.ki_nbytes = len; | |
663 | + | |
664 | + iov_iter_init(&iter, rw, iov, nr_segs, len); | |
665 | + ret = fn(&kiocb, &iter); | |
666 | + if (ret == -EIOCBQUEUED) | |
667 | + ret = wait_on_sync_kiocb(&kiocb); | |
668 | + *ppos = kiocb.ki_pos; | |
669 | + return ret; | |
670 | +} | |
671 | + | |
604 | 672 | static ssize_t do_sync_readv_writev(struct file *filp, const struct iovec *iov, |
605 | 673 | unsigned long nr_segs, size_t len, loff_t *ppos, iov_fn_t fn) |
606 | 674 | { |
... | ... | @@ -738,6 +806,7 @@ |
738 | 806 | ssize_t ret; |
739 | 807 | io_fn_t fn; |
740 | 808 | iov_fn_t fnv; |
809 | + iter_fn_t iter_fn; | |
741 | 810 | |
742 | 811 | ret = rw_copy_check_uvector(type, uvector, nr_segs, |
743 | 812 | ARRAY_SIZE(iovstack), iovstack, &iov); |
744 | 813 | |
745 | 814 | |
... | ... | @@ -753,13 +822,18 @@ |
753 | 822 | if (type == READ) { |
754 | 823 | fn = file->f_op->read; |
755 | 824 | fnv = file->f_op->aio_read; |
825 | + iter_fn = file->f_op->read_iter; | |
756 | 826 | } else { |
757 | 827 | fn = (io_fn_t)file->f_op->write; |
758 | 828 | fnv = file->f_op->aio_write; |
829 | + iter_fn = file->f_op->write_iter; | |
759 | 830 | file_start_write(file); |
760 | 831 | } |
761 | 832 | |
762 | - if (fnv) | |
833 | + if (iter_fn) | |
834 | + ret = do_iter_readv_writev(file, type, iov, nr_segs, tot_len, | |
835 | + pos, iter_fn); | |
836 | + else if (fnv) | |
763 | 837 | ret = do_sync_readv_writev(file, iov, nr_segs, tot_len, |
764 | 838 | pos, fnv); |
765 | 839 | else |
... | ... | @@ -912,6 +986,7 @@ |
912 | 986 | ssize_t ret; |
913 | 987 | io_fn_t fn; |
914 | 988 | iov_fn_t fnv; |
989 | + iter_fn_t iter_fn; | |
915 | 990 | |
916 | 991 | ret = compat_rw_copy_check_uvector(type, uvector, nr_segs, |
917 | 992 | UIO_FASTIOV, iovstack, &iov); |
918 | 993 | |
919 | 994 | |
... | ... | @@ -927,13 +1002,18 @@ |
927 | 1002 | if (type == READ) { |
928 | 1003 | fn = file->f_op->read; |
929 | 1004 | fnv = file->f_op->aio_read; |
1005 | + iter_fn = file->f_op->read_iter; | |
930 | 1006 | } else { |
931 | 1007 | fn = (io_fn_t)file->f_op->write; |
932 | 1008 | fnv = file->f_op->aio_write; |
1009 | + iter_fn = file->f_op->write_iter; | |
933 | 1010 | file_start_write(file); |
934 | 1011 | } |
935 | 1012 | |
936 | - if (fnv) | |
1013 | + if (iter_fn) | |
1014 | + ret = do_iter_readv_writev(file, type, iov, nr_segs, tot_len, | |
1015 | + pos, iter_fn); | |
1016 | + else if (fnv) | |
937 | 1017 | ret = do_sync_readv_writev(file, iov, nr_segs, tot_len, |
938 | 1018 | pos, fnv); |
939 | 1019 | else |
include/linux/fs.h
... | ... | @@ -1451,6 +1451,8 @@ |
1451 | 1451 | #define HAVE_COMPAT_IOCTL 1 |
1452 | 1452 | #define HAVE_UNLOCKED_IOCTL 1 |
1453 | 1453 | |
1454 | +struct iov_iter; | |
1455 | + | |
1454 | 1456 | struct file_operations { |
1455 | 1457 | struct module *owner; |
1456 | 1458 | loff_t (*llseek) (struct file *, loff_t, int); |
... | ... | @@ -1458,6 +1460,8 @@ |
1458 | 1460 | ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *); |
1459 | 1461 | ssize_t (*aio_read) (struct kiocb *, const struct iovec *, unsigned long, loff_t); |
1460 | 1462 | ssize_t (*aio_write) (struct kiocb *, const struct iovec *, unsigned long, loff_t); |
1463 | + ssize_t (*read_iter) (struct kiocb *, struct iov_iter *); | |
1464 | + ssize_t (*write_iter) (struct kiocb *, struct iov_iter *); | |
1461 | 1465 | int (*iterate) (struct file *, struct dir_context *); |
1462 | 1466 | unsigned int (*poll) (struct file *, struct poll_table_struct *); |
1463 | 1467 | long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long); |
... | ... | @@ -2415,6 +2419,8 @@ |
2415 | 2419 | extern ssize_t generic_perform_write(struct file *, struct iov_iter *, loff_t); |
2416 | 2420 | extern ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos); |
2417 | 2421 | extern ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos); |
2422 | +extern ssize_t new_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos); | |
2423 | +extern ssize_t new_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos); | |
2418 | 2424 | |
2419 | 2425 | /* fs/block_dev.c */ |
2420 | 2426 | extern ssize_t blkdev_aio_write(struct kiocb *iocb, const struct iovec *iov, |