Commit 293bc9822fa9b3c9d4b7893bcb241e085580771a

Authored by Al Viro
1 parent 7f7f25e82d

new methods: ->read_iter() and ->write_iter()

Beginning to introduce those.  Just the callers for now, and it's
clumsier than it'll eventually become; once we finish converting
aio_read and aio_write instances, the things will get nicer.

For now, these guys are in parallel to ->aio_read() and ->aio_write();
they take iocb and iov_iter, with everything in iov_iter already
validated.  File offset is passed in iocb->ki_pos, iov/nr_segs -
in iov_iter.

Main concerns in that series are stack footprint and ability to
split the damn thing cleanly.

[fix from Peter Ujfalusi <peter.ujfalusi@ti.com> folded]

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

Showing 7 changed files with 121 additions and 13 deletions Side-by-side Diff

Documentation/filesystems/Locking
... ... @@ -430,6 +430,8 @@
430 430 ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *);
431 431 ssize_t (*aio_read) (struct kiocb *, const struct iovec *, unsigned long, loff_t);
432 432 ssize_t (*aio_write) (struct kiocb *, const struct iovec *, unsigned long, loff_t);
  433 + ssize_t (*read_iter) (struct kiocb *, struct iov_iter *);
  434 + ssize_t (*write_iter) (struct kiocb *, struct iov_iter *);
433 435 int (*iterate) (struct file *, struct dir_context *);
434 436 unsigned int (*poll) (struct file *, struct poll_table_struct *);
435 437 long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long);
Documentation/filesystems/vfs.txt
... ... @@ -806,6 +806,8 @@
806 806 ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *);
807 807 ssize_t (*aio_read) (struct kiocb *, const struct iovec *, unsigned long, loff_t);
808 808 ssize_t (*aio_write) (struct kiocb *, const struct iovec *, unsigned long, loff_t);
  809 + ssize_t (*read_iter) (struct kiocb *, struct iov_iter *);
  810 + ssize_t (*write_iter) (struct kiocb *, struct iov_iter *);
809 811 int (*iterate) (struct file *, struct dir_context *);
810 812 unsigned int (*poll) (struct file *, struct poll_table_struct *);
811 813 long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long);
812 814  
813 815  
... ... @@ -836,11 +838,15 @@
836 838  
837 839 read: called by read(2) and related system calls
838 840  
839   - aio_read: called by io_submit(2) and other asynchronous I/O operations
  841 + aio_read: vectored, possibly asynchronous read
840 842  
  843 + read_iter: possibly asynchronous read with iov_iter as destination
  844 +
841 845 write: called by write(2) and related system calls
842 846  
843   - aio_write: called by io_submit(2) and other asynchronous I/O operations
  847 + aio_write: vectored, possibly asynchronous write
  848 +
  849 + write_iter: possibly asynchronous write with iov_iter as source
844 850  
845 851 iterate: called when the VFS needs to read the directory contents
846 852  
... ... @@ -1241,6 +1241,7 @@
1241 1241  
1242 1242 typedef ssize_t (aio_rw_op)(struct kiocb *, const struct iovec *,
1243 1243 unsigned long, loff_t);
  1244 +typedef ssize_t (rw_iter_op)(struct kiocb *, struct iov_iter *);
1244 1245  
1245 1246 static ssize_t aio_setup_vectored_rw(struct kiocb *kiocb,
1246 1247 int rw, char __user *buf,
1247 1248  
... ... @@ -1298,7 +1299,9 @@
1298 1299 int rw;
1299 1300 fmode_t mode;
1300 1301 aio_rw_op *rw_op;
  1302 + rw_iter_op *iter_op;
1301 1303 struct iovec inline_vec, *iovec = &inline_vec;
  1304 + struct iov_iter iter;
1302 1305  
1303 1306 switch (opcode) {
1304 1307 case IOCB_CMD_PREAD:
... ... @@ -1306,6 +1309,7 @@
1306 1309 mode = FMODE_READ;
1307 1310 rw = READ;
1308 1311 rw_op = file->f_op->aio_read;
  1312 + iter_op = file->f_op->read_iter;
1309 1313 goto rw_common;
1310 1314  
1311 1315 case IOCB_CMD_PWRITE:
1312 1316  
... ... @@ -1313,12 +1317,13 @@
1313 1317 mode = FMODE_WRITE;
1314 1318 rw = WRITE;
1315 1319 rw_op = file->f_op->aio_write;
  1320 + iter_op = file->f_op->write_iter;
1316 1321 goto rw_common;
1317 1322 rw_common:
1318 1323 if (unlikely(!(file->f_mode & mode)))
1319 1324 return -EBADF;
1320 1325  
1321   - if (!rw_op)
  1326 + if (!rw_op && !iter_op)
1322 1327 return -EINVAL;
1323 1328  
1324 1329 ret = (opcode == IOCB_CMD_PREADV ||
... ... @@ -1347,7 +1352,12 @@
1347 1352 if (rw == WRITE)
1348 1353 file_start_write(file);
1349 1354  
1350   - ret = rw_op(req, iovec, nr_segs, req->ki_pos);
  1355 + if (iter_op) {
  1356 + iov_iter_init(&iter, rw, iovec, nr_segs, req->ki_nbytes);
  1357 + ret = iter_op(req, &iter);
  1358 + } else {
  1359 + ret = rw_op(req, iovec, nr_segs, req->ki_pos);
  1360 + }
1351 1361  
1352 1362 if (rw == WRITE)
1353 1363 file_end_write(file);
... ... @@ -175,9 +175,11 @@
175 175 file->f_path = *path;
176 176 file->f_inode = path->dentry->d_inode;
177 177 file->f_mapping = path->dentry->d_inode->i_mapping;
178   - if ((mode & FMODE_READ) && likely(fop->read || fop->aio_read))
  178 + if ((mode & FMODE_READ) &&
  179 + likely(fop->read || fop->aio_read || fop->read_iter))
179 180 mode |= FMODE_CAN_READ;
180   - if ((mode & FMODE_WRITE) && likely(fop->write || fop->aio_write))
  181 + if ((mode & FMODE_WRITE) &&
  182 + likely(fop->write || fop->aio_write || fop->write_iter))
181 183 mode |= FMODE_CAN_WRITE;
182 184 file->f_mode = mode;
183 185 file->f_op = fop;
... ... @@ -725,9 +725,11 @@
725 725 }
726 726 if ((f->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ)
727 727 i_readcount_inc(inode);
728   - if ((f->f_mode & FMODE_READ) && likely(f->f_op->read || f->f_op->aio_read))
  728 + if ((f->f_mode & FMODE_READ) &&
  729 + likely(f->f_op->read || f->f_op->aio_read || f->f_op->read_iter))
729 730 f->f_mode |= FMODE_CAN_READ;
730   - if ((f->f_mode & FMODE_WRITE) && likely(f->f_op->write || f->f_op->aio_write))
  731 + if ((f->f_mode & FMODE_WRITE) &&
  732 + likely(f->f_op->write || f->f_op->aio_write || f->f_op->write_iter))
731 733 f->f_mode |= FMODE_CAN_WRITE;
732 734  
733 735 f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);
... ... @@ -25,6 +25,7 @@
25 25 typedef ssize_t (*io_fn_t)(struct file *, char __user *, size_t, loff_t *);
26 26 typedef ssize_t (*iov_fn_t)(struct kiocb *, const struct iovec *,
27 27 unsigned long, loff_t);
  28 +typedef ssize_t (*iter_fn_t)(struct kiocb *, struct iov_iter *);
28 29  
29 30 const struct file_operations generic_ro_fops = {
30 31 .llseek = generic_file_llseek,
... ... @@ -390,6 +391,27 @@
390 391  
391 392 EXPORT_SYMBOL(do_sync_read);
392 393  
  394 +ssize_t new_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos)
  395 +{
  396 + struct iovec iov = { .iov_base = buf, .iov_len = len };
  397 + struct kiocb kiocb;
  398 + struct iov_iter iter;
  399 + ssize_t ret;
  400 +
  401 + init_sync_kiocb(&kiocb, filp);
  402 + kiocb.ki_pos = *ppos;
  403 + kiocb.ki_nbytes = len;
  404 + iov_iter_init(&iter, READ, &iov, 1, len);
  405 +
  406 + ret = filp->f_op->read_iter(&kiocb, &iter);
  407 + if (-EIOCBQUEUED == ret)
  408 + ret = wait_on_sync_kiocb(&kiocb);
  409 + *ppos = kiocb.ki_pos;
  410 + return ret;
  411 +}
  412 +
  413 +EXPORT_SYMBOL(new_sync_read);
  414 +
393 415 ssize_t vfs_read(struct file *file, char __user *buf, size_t count, loff_t *pos)
394 416 {
395 417 ssize_t ret;
396 418  
... ... @@ -406,8 +428,10 @@
406 428 count = ret;
407 429 if (file->f_op->read)
408 430 ret = file->f_op->read(file, buf, count, pos);
409   - else
  431 + else if (file->f_op->aio_read)
410 432 ret = do_sync_read(file, buf, count, pos);
  433 + else
  434 + ret = new_sync_read(file, buf, count, pos);
411 435 if (ret > 0) {
412 436 fsnotify_access(file);
413 437 add_rchar(current, ret);
... ... @@ -439,6 +463,27 @@
439 463  
440 464 EXPORT_SYMBOL(do_sync_write);
441 465  
  466 +ssize_t new_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos)
  467 +{
  468 + struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = len };
  469 + struct kiocb kiocb;
  470 + struct iov_iter iter;
  471 + ssize_t ret;
  472 +
  473 + init_sync_kiocb(&kiocb, filp);
  474 + kiocb.ki_pos = *ppos;
  475 + kiocb.ki_nbytes = len;
  476 + iov_iter_init(&iter, WRITE, &iov, 1, len);
  477 +
  478 + ret = filp->f_op->write_iter(&kiocb, &iter);
  479 + if (-EIOCBQUEUED == ret)
  480 + ret = wait_on_sync_kiocb(&kiocb);
  481 + *ppos = kiocb.ki_pos;
  482 + return ret;
  483 +}
  484 +
  485 +EXPORT_SYMBOL(new_sync_write);
  486 +
442 487 ssize_t __kernel_write(struct file *file, const char *buf, size_t count, loff_t *pos)
443 488 {
444 489 mm_segment_t old_fs;
445 490  
... ... @@ -455,8 +500,10 @@
455 500 count = MAX_RW_COUNT;
456 501 if (file->f_op->write)
457 502 ret = file->f_op->write(file, p, count, pos);
458   - else
  503 + else if (file->f_op->aio_write)
459 504 ret = do_sync_write(file, p, count, pos);
  505 + else
  506 + ret = new_sync_write(file, p, count, pos);
460 507 set_fs(old_fs);
461 508 if (ret > 0) {
462 509 fsnotify_modify(file);
463 510  
... ... @@ -483,8 +530,10 @@
483 530 file_start_write(file);
484 531 if (file->f_op->write)
485 532 ret = file->f_op->write(file, buf, count, pos);
486   - else
  533 + else if (file->f_op->aio_write)
487 534 ret = do_sync_write(file, buf, count, pos);
  535 + else
  536 + ret = new_sync_write(file, buf, count, pos);
488 537 if (ret > 0) {
489 538 fsnotify_modify(file);
490 539 add_wchar(current, ret);
... ... @@ -601,6 +650,25 @@
601 650 }
602 651 EXPORT_SYMBOL(iov_shorten);
603 652  
  653 +static ssize_t do_iter_readv_writev(struct file *filp, int rw, const struct iovec *iov,
  654 + unsigned long nr_segs, size_t len, loff_t *ppos, iter_fn_t fn)
  655 +{
  656 + struct kiocb kiocb;
  657 + struct iov_iter iter;
  658 + ssize_t ret;
  659 +
  660 + init_sync_kiocb(&kiocb, filp);
  661 + kiocb.ki_pos = *ppos;
  662 + kiocb.ki_nbytes = len;
  663 +
  664 + iov_iter_init(&iter, rw, iov, nr_segs, len);
  665 + ret = fn(&kiocb, &iter);
  666 + if (ret == -EIOCBQUEUED)
  667 + ret = wait_on_sync_kiocb(&kiocb);
  668 + *ppos = kiocb.ki_pos;
  669 + return ret;
  670 +}
  671 +
604 672 static ssize_t do_sync_readv_writev(struct file *filp, const struct iovec *iov,
605 673 unsigned long nr_segs, size_t len, loff_t *ppos, iov_fn_t fn)
606 674 {
... ... @@ -738,6 +806,7 @@
738 806 ssize_t ret;
739 807 io_fn_t fn;
740 808 iov_fn_t fnv;
  809 + iter_fn_t iter_fn;
741 810  
742 811 ret = rw_copy_check_uvector(type, uvector, nr_segs,
743 812 ARRAY_SIZE(iovstack), iovstack, &iov);
744 813  
745 814  
... ... @@ -753,13 +822,18 @@
753 822 if (type == READ) {
754 823 fn = file->f_op->read;
755 824 fnv = file->f_op->aio_read;
  825 + iter_fn = file->f_op->read_iter;
756 826 } else {
757 827 fn = (io_fn_t)file->f_op->write;
758 828 fnv = file->f_op->aio_write;
  829 + iter_fn = file->f_op->write_iter;
759 830 file_start_write(file);
760 831 }
761 832  
762   - if (fnv)
  833 + if (iter_fn)
  834 + ret = do_iter_readv_writev(file, type, iov, nr_segs, tot_len,
  835 + pos, iter_fn);
  836 + else if (fnv)
763 837 ret = do_sync_readv_writev(file, iov, nr_segs, tot_len,
764 838 pos, fnv);
765 839 else
... ... @@ -912,6 +986,7 @@
912 986 ssize_t ret;
913 987 io_fn_t fn;
914 988 iov_fn_t fnv;
  989 + iter_fn_t iter_fn;
915 990  
916 991 ret = compat_rw_copy_check_uvector(type, uvector, nr_segs,
917 992 UIO_FASTIOV, iovstack, &iov);
918 993  
919 994  
... ... @@ -927,13 +1002,18 @@
927 1002 if (type == READ) {
928 1003 fn = file->f_op->read;
929 1004 fnv = file->f_op->aio_read;
  1005 + iter_fn = file->f_op->read_iter;
930 1006 } else {
931 1007 fn = (io_fn_t)file->f_op->write;
932 1008 fnv = file->f_op->aio_write;
  1009 + iter_fn = file->f_op->write_iter;
933 1010 file_start_write(file);
934 1011 }
935 1012  
936   - if (fnv)
  1013 + if (iter_fn)
  1014 + ret = do_iter_readv_writev(file, type, iov, nr_segs, tot_len,
  1015 + pos, iter_fn);
  1016 + else if (fnv)
937 1017 ret = do_sync_readv_writev(file, iov, nr_segs, tot_len,
938 1018 pos, fnv);
939 1019 else
... ... @@ -1451,6 +1451,8 @@
1451 1451 #define HAVE_COMPAT_IOCTL 1
1452 1452 #define HAVE_UNLOCKED_IOCTL 1
1453 1453  
  1454 +struct iov_iter;
  1455 +
1454 1456 struct file_operations {
1455 1457 struct module *owner;
1456 1458 loff_t (*llseek) (struct file *, loff_t, int);
... ... @@ -1458,6 +1460,8 @@
1458 1460 ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *);
1459 1461 ssize_t (*aio_read) (struct kiocb *, const struct iovec *, unsigned long, loff_t);
1460 1462 ssize_t (*aio_write) (struct kiocb *, const struct iovec *, unsigned long, loff_t);
  1463 + ssize_t (*read_iter) (struct kiocb *, struct iov_iter *);
  1464 + ssize_t (*write_iter) (struct kiocb *, struct iov_iter *);
1461 1465 int (*iterate) (struct file *, struct dir_context *);
1462 1466 unsigned int (*poll) (struct file *, struct poll_table_struct *);
1463 1467 long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long);
... ... @@ -2415,6 +2419,8 @@
2415 2419 extern ssize_t generic_perform_write(struct file *, struct iov_iter *, loff_t);
2416 2420 extern ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos);
2417 2421 extern ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos);
  2422 +extern ssize_t new_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos);
  2423 +extern ssize_t new_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos);
2418 2424  
2419 2425 /* fs/block_dev.c */
2420 2426 extern ssize_t blkdev_aio_write(struct kiocb *iocb, const struct iovec *iov,