Commit b2858d7d1639c04ca3c54988d76c5f7300b76f1c

Authored by Miklos Szeredi
Committed by Jens Axboe
1 parent 4fc981ef9e

splice: fix kmaps in default_file_splice_write()

Unfortunately multiple kmap() within a single thread are deadlockable,
so writing out multiple buffers with writev() isn't possible.

Change the implementation so that it does a separate write() for each
buffer.  This actually simplifies the code a lot since the
splice_from_pipe() helper can be used.

This limitation is caused by HIGHMEM pages, and so only affects a
subset of architectures and configurations.  In the future it may be
worth to implement default_file_splice_write() in a more efficient way
on configs that allow it.

Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

Showing 1 changed file with 22 additions and 108 deletions Side-by-side Diff

... ... @@ -535,8 +535,8 @@
535 535 return res;
536 536 }
537 537  
538   -static ssize_t kernel_writev(struct file *file, const struct iovec *vec,
539   - unsigned long vlen, loff_t *ppos)
  538 +static ssize_t kernel_write(struct file *file, const char *buf, size_t count,
  539 + loff_t pos)
540 540 {
541 541 mm_segment_t old_fs;
542 542 ssize_t res;
... ... @@ -544,7 +544,7 @@
544 544 old_fs = get_fs();
545 545 set_fs(get_ds());
546 546 /* The cast to a user pointer is valid due to the set_fs() */
547   - res = vfs_writev(file, (const struct iovec __user *)vec, vlen, ppos);
  547 + res = vfs_write(file, (const char __user *)buf, count, &pos);
548 548 set_fs(old_fs);
549 549  
550 550 return res;
551 551  
552 552  
553 553  
554 554  
... ... @@ -1003,120 +1003,34 @@
1003 1003  
1004 1004 EXPORT_SYMBOL(generic_file_splice_write);
1005 1005  
1006   -static struct pipe_buffer *nth_pipe_buf(struct pipe_inode_info *pipe, int n)
  1006 +static int write_pipe_buf(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
  1007 + struct splice_desc *sd)
1007 1008 {
1008   - return &pipe->bufs[(pipe->curbuf + n) % PIPE_BUFFERS];
  1009 + int ret;
  1010 + void *data;
  1011 +
  1012 + ret = buf->ops->confirm(pipe, buf);
  1013 + if (ret)
  1014 + return ret;
  1015 +
  1016 + data = buf->ops->map(pipe, buf, 0);
  1017 + ret = kernel_write(sd->u.file, data + buf->offset, sd->len, sd->pos);
  1018 + buf->ops->unmap(pipe, buf, data);
  1019 +
  1020 + return ret;
1009 1021 }
1010 1022  
1011 1023 static ssize_t default_file_splice_write(struct pipe_inode_info *pipe,
1012 1024 struct file *out, loff_t *ppos,
1013 1025 size_t len, unsigned int flags)
1014 1026 {
1015   - ssize_t ret = 0;
1016   - ssize_t total_len = 0;
1017   - int do_wakeup = 0;
  1027 + ssize_t ret;
1018 1028  
1019   - pipe_lock(pipe);
1020   - while (len) {
1021   - struct pipe_buffer *buf;
1022   - void *data[PIPE_BUFFERS];
1023   - struct iovec vec[PIPE_BUFFERS];
1024   - unsigned int nr_pages = 0;
1025   - unsigned int write_len = 0;
1026   - unsigned int now_len = len;
1027   - unsigned int this_len;
1028   - int i;
  1029 + ret = splice_from_pipe(pipe, out, ppos, len, flags, write_pipe_buf);
  1030 + if (ret > 0)
  1031 + *ppos += ret;
1029 1032  
1030   - BUG_ON(pipe->nrbufs > PIPE_BUFFERS);
1031   - for (i = 0; i < pipe->nrbufs && now_len; i++) {
1032   - buf = nth_pipe_buf(pipe, i);
1033   -
1034   - ret = buf->ops->confirm(pipe, buf);
1035   - if (ret)
1036   - break;
1037   -
1038   - data[i] = buf->ops->map(pipe, buf, 0);
1039   - this_len = min(buf->len, now_len);
1040   - vec[i].iov_base = (void __user *) data[i] + buf->offset;
1041   - vec[i].iov_len = this_len;
1042   - now_len -= this_len;
1043   - write_len += this_len;
1044   - nr_pages++;
1045   - }
1046   -
1047   - if (nr_pages) {
1048   - ret = kernel_writev(out, vec, nr_pages, ppos);
1049   - if (ret == 0)
1050   - ret = -EIO;
1051   - if (ret > 0) {
1052   - len -= ret;
1053   - total_len += ret;
1054   - }
1055   - }
1056   -
1057   - for (i = 0; i < nr_pages; i++) {
1058   - buf = nth_pipe_buf(pipe, i);
1059   - buf->ops->unmap(pipe, buf, data[i]);
1060   -
1061   - if (ret > 0) {
1062   - this_len = min_t(unsigned, vec[i].iov_len, ret);
1063   - buf->offset += this_len;
1064   - buf->len -= this_len;
1065   - ret -= this_len;
1066   - }
1067   - }
1068   -
1069   - if (ret < 0)
1070   - break;
1071   -
1072   - while (pipe->nrbufs) {
1073   - const struct pipe_buf_operations *ops;
1074   -
1075   - buf = nth_pipe_buf(pipe, 0);
1076   - if (buf->len)
1077   - break;
1078   -
1079   - ops = buf->ops;
1080   - buf->ops = NULL;
1081   - ops->release(pipe, buf);
1082   - pipe->curbuf = (pipe->curbuf + 1) % PIPE_BUFFERS;
1083   - pipe->nrbufs--;
1084   - if (pipe->inode)
1085   - do_wakeup = 1;
1086   - }
1087   -
1088   - if (pipe->nrbufs)
1089   - continue;
1090   - if (!pipe->writers)
1091   - break;
1092   - if (!pipe->waiting_writers) {
1093   - if (total_len)
1094   - break;
1095   - }
1096   -
1097   - if (flags & SPLICE_F_NONBLOCK) {
1098   - ret = -EAGAIN;
1099   - break;
1100   - }
1101   -
1102   - if (signal_pending(current)) {
1103   - ret = -ERESTARTSYS;
1104   - break;
1105   - }
1106   -
1107   - if (do_wakeup) {
1108   - wakeup_pipe_writers(pipe);
1109   - do_wakeup = 0;
1110   - }
1111   -
1112   - pipe_wait(pipe);
1113   - }
1114   - pipe_unlock(pipe);
1115   -
1116   - if (do_wakeup)
1117   - wakeup_pipe_writers(pipe);
1118   -
1119   - return total_len ? total_len : ret;
  1033 + return ret;
1120 1034 }
1121 1035  
1122 1036 /**