Blame view
fs/splice.c
39.7 KB
5274f052e
|
1 2 3 4 5 6 7 8 9 10 11 |
/* * "splice": joining two ropes together by interweaving their strands. * * This is the "extended pipe" functionality, where a pipe is used as * an arbitrary in-memory buffer. Think of a pipe as a small kernel * buffer that you can use to transfer data from one end to the other. * * The traditional unix read/write is extended with a "splice()" operation * that transfers data buffers to or from a pipe buffer. * * Named by Larry McVoy, original implementation from Linus, extended by |
c2058e061
|
12 13 |
* Jens to support splicing to files, network, direct splicing, etc and * fixing lots of bugs. |
5274f052e
|
14 |
* |
0fe234795
|
15 |
* Copyright (C) 2005-2006 Jens Axboe <axboe@kernel.dk> |
c2058e061
|
16 17 |
* Copyright (C) 2005-2006 Linus Torvalds <torvalds@osdl.org> * Copyright (C) 2006 Ingo Molnar <mingo@elte.hu> |
5274f052e
|
18 19 20 21 22 |
* */ #include <linux/fs.h> #include <linux/file.h> #include <linux/pagemap.h> |
d6b29d7ce
|
23 |
#include <linux/splice.h> |
08e552c69
|
24 |
#include <linux/memcontrol.h> |
5274f052e
|
25 |
#include <linux/mm_inline.h> |
5abc97aa2
|
26 |
#include <linux/swap.h> |
4f6f0bd2f
|
27 28 |
#include <linux/writeback.h> #include <linux/buffer_head.h> |
a0f067802
|
29 |
#include <linux/module.h> |
4f6f0bd2f
|
30 |
#include <linux/syscalls.h> |
912d35f86
|
31 |
#include <linux/uio.h> |
29ce20586
|
32 |
#include <linux/security.h> |
5274f052e
|
33 |
|
83f9135bd
|
34 35 36 37 38 39 |
/* * Attempt to steal a page from a pipe buffer. This should perhaps go into * a vm helper function, it's already simplified quite a bit by the * addition of remove_mapping(). If success is returned, the caller may * attempt to reuse this page for another destination. */ |
76ad4d111
|
40 |
static int page_cache_pipe_buf_steal(struct pipe_inode_info *pipe, |
5abc97aa2
|
41 42 43 |
struct pipe_buffer *buf) { struct page *page = buf->page; |
9e94cd4fd
|
44 |
struct address_space *mapping; |
5abc97aa2
|
45 |
|
9e0267c26
|
46 |
lock_page(page); |
9e94cd4fd
|
47 48 49 |
mapping = page_mapping(page); if (mapping) { WARN_ON(!PageUptodate(page)); |
5abc97aa2
|
50 |
|
9e94cd4fd
|
51 52 53 54 55 56 57 58 59 |
/* * At least for ext2 with nobh option, we need to wait on * writeback completing on this page, since we'll remove it * from the pagecache. Otherwise truncate wont wait on the * page, allowing the disk blocks to be reused by someone else * before we actually wrote our data to them. fs corruption * ensues. */ wait_on_page_writeback(page); |
ad8d6f0a7
|
60 |
|
266cf658e
|
61 62 |
if (page_has_private(page) && !try_to_release_page(page, GFP_KERNEL)) |
ca39d651d
|
63 |
goto out_unlock; |
4f6f0bd2f
|
64 |
|
9e94cd4fd
|
65 66 67 68 69 70 71 72 |
/* * If we succeeded in removing the mapping, set LRU flag * and return good. */ if (remove_mapping(mapping, page)) { buf->flags |= PIPE_BUF_FLAG_LRU; return 0; } |
9e0267c26
|
73 |
} |
5abc97aa2
|
74 |
|
9e94cd4fd
|
75 76 77 78 |
/* * Raced with truncate or failed to remove page from current * address space, unlock and return failure. */ |
ca39d651d
|
79 |
out_unlock: |
9e94cd4fd
|
80 81 |
unlock_page(page); return 1; |
5abc97aa2
|
82 |
} |
76ad4d111
|
83 |
static void page_cache_pipe_buf_release(struct pipe_inode_info *pipe, |
5274f052e
|
84 85 86 |
struct pipe_buffer *buf) { page_cache_release(buf->page); |
1432873af
|
87 |
buf->flags &= ~PIPE_BUF_FLAG_LRU; |
5274f052e
|
88 |
} |
0845718da
|
89 90 91 92 |
/* * Check whether the contents of buf is OK to access. Since the content * is a page cache page, IO may be in flight. */ |
cac36bb06
|
93 94 |
static int page_cache_pipe_buf_confirm(struct pipe_inode_info *pipe, struct pipe_buffer *buf) |
5274f052e
|
95 96 |
{ struct page *page = buf->page; |
49d0b21be
|
97 |
int err; |
5274f052e
|
98 99 |
if (!PageUptodate(page)) { |
49d0b21be
|
100 101 102 103 |
lock_page(page); /* * Page got truncated/unhashed. This will cause a 0-byte |
73d62d83e
|
104 |
* splice, if this is the first page. |
49d0b21be
|
105 106 107 108 109 |
*/ if (!page->mapping) { err = -ENODATA; goto error; } |
5274f052e
|
110 |
|
49d0b21be
|
111 |
/* |
73d62d83e
|
112 |
* Uh oh, read-error from disk. |
49d0b21be
|
113 114 115 116 117 118 119 |
*/ if (!PageUptodate(page)) { err = -EIO; goto error; } /* |
f84d75199
|
120 |
* Page is ok afterall, we are done. |
49d0b21be
|
121 |
*/ |
5274f052e
|
122 |
unlock_page(page); |
5274f052e
|
123 |
} |
f84d75199
|
124 |
return 0; |
49d0b21be
|
125 126 |
error: unlock_page(page); |
f84d75199
|
127 |
return err; |
70524490e
|
128 |
} |
d4c3cca94
|
129 |
static const struct pipe_buf_operations page_cache_pipe_buf_ops = { |
5274f052e
|
130 |
.can_merge = 0, |
f84d75199
|
131 132 |
.map = generic_pipe_buf_map, .unmap = generic_pipe_buf_unmap, |
cac36bb06
|
133 |
.confirm = page_cache_pipe_buf_confirm, |
5274f052e
|
134 |
.release = page_cache_pipe_buf_release, |
5abc97aa2
|
135 |
.steal = page_cache_pipe_buf_steal, |
f84d75199
|
136 |
.get = generic_pipe_buf_get, |
5274f052e
|
137 |
}; |
912d35f86
|
138 139 140 |
static int user_page_pipe_buf_steal(struct pipe_inode_info *pipe, struct pipe_buffer *buf) { |
7afa6fd03
|
141 142 |
if (!(buf->flags & PIPE_BUF_FLAG_GIFT)) return 1; |
1432873af
|
143 |
buf->flags |= PIPE_BUF_FLAG_LRU; |
330ab7161
|
144 |
return generic_pipe_buf_steal(pipe, buf); |
912d35f86
|
145 |
} |
d4c3cca94
|
146 |
static const struct pipe_buf_operations user_page_pipe_buf_ops = { |
912d35f86
|
147 |
.can_merge = 0, |
f84d75199
|
148 149 |
.map = generic_pipe_buf_map, .unmap = generic_pipe_buf_unmap, |
cac36bb06
|
150 |
.confirm = generic_pipe_buf_confirm, |
912d35f86
|
151 152 |
.release = page_cache_pipe_buf_release, .steal = user_page_pipe_buf_steal, |
f84d75199
|
153 |
.get = generic_pipe_buf_get, |
912d35f86
|
154 |
}; |
932cc6d4f
|
155 156 157 158 159 160 |
/** * splice_to_pipe - fill passed data into a pipe * @pipe: pipe to fill * @spd: data to fill * * Description: |
79685b8de
|
161 |
* @spd contains a map of pages and len/offset tuples, along with |
932cc6d4f
|
162 163 164 |
* the struct pipe_buf_operations associated with these pages. This * function will link that data to the pipe. * |
83f9135bd
|
165 |
*/ |
d6b29d7ce
|
166 167 |
ssize_t splice_to_pipe(struct pipe_inode_info *pipe, struct splice_pipe_desc *spd) |
5274f052e
|
168 |
{ |
00de00bda
|
169 |
unsigned int spd_pages = spd->nr_pages; |
912d35f86
|
170 |
int ret, do_wakeup, page_nr; |
5274f052e
|
171 172 173 |
ret = 0; do_wakeup = 0; |
912d35f86
|
174 |
page_nr = 0; |
5274f052e
|
175 |
|
61e0d47c3
|
176 |
pipe_lock(pipe); |
5274f052e
|
177 |
|
5274f052e
|
178 |
for (;;) { |
3a326a2ce
|
179 |
if (!pipe->readers) { |
5274f052e
|
180 181 182 183 184 |
send_sig(SIGPIPE, current, 0); if (!ret) ret = -EPIPE; break; } |
6f767b042
|
185 186 |
if (pipe->nrbufs < PIPE_BUFFERS) { int newbuf = (pipe->curbuf + pipe->nrbufs) & (PIPE_BUFFERS - 1); |
3a326a2ce
|
187 |
struct pipe_buffer *buf = pipe->bufs + newbuf; |
5274f052e
|
188 |
|
912d35f86
|
189 190 191 |
buf->page = spd->pages[page_nr]; buf->offset = spd->partial[page_nr].offset; buf->len = spd->partial[page_nr].len; |
497f9625c
|
192 |
buf->private = spd->partial[page_nr].private; |
912d35f86
|
193 |
buf->ops = spd->ops; |
7afa6fd03
|
194 195 |
if (spd->flags & SPLICE_F_GIFT) buf->flags |= PIPE_BUF_FLAG_GIFT; |
6f767b042
|
196 |
pipe->nrbufs++; |
912d35f86
|
197 198 |
page_nr++; ret += buf->len; |
6f767b042
|
199 200 |
if (pipe->inode) do_wakeup = 1; |
5274f052e
|
201 |
|
912d35f86
|
202 |
if (!--spd->nr_pages) |
5274f052e
|
203 |
break; |
6f767b042
|
204 |
if (pipe->nrbufs < PIPE_BUFFERS) |
5274f052e
|
205 206 207 208 |
continue; break; } |
912d35f86
|
209 |
if (spd->flags & SPLICE_F_NONBLOCK) { |
29e350944
|
210 211 212 213 |
if (!ret) ret = -EAGAIN; break; } |
5274f052e
|
214 215 216 217 218 219 220 |
if (signal_pending(current)) { if (!ret) ret = -ERESTARTSYS; break; } if (do_wakeup) { |
c0bd1f650
|
221 |
smp_mb(); |
3a326a2ce
|
222 223 224 |
if (waitqueue_active(&pipe->wait)) wake_up_interruptible_sync(&pipe->wait); kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); |
5274f052e
|
225 226 |
do_wakeup = 0; } |
3a326a2ce
|
227 228 229 |
pipe->waiting_writers++; pipe_wait(pipe); pipe->waiting_writers--; |
5274f052e
|
230 |
} |
61e0d47c3
|
231 |
pipe_unlock(pipe); |
5274f052e
|
232 |
|
61e0d47c3
|
233 234 235 236 237 |
if (do_wakeup) { smp_mb(); if (waitqueue_active(&pipe->wait)) wake_up_interruptible(&pipe->wait); kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); |
5274f052e
|
238 |
} |
00de00bda
|
239 |
while (page_nr < spd_pages) |
bbdfc2f70
|
240 |
spd->spd_release(spd, page_nr++); |
5274f052e
|
241 242 243 |
return ret; } |
bbdfc2f70
|
244 245 246 247 |
static void spd_release_page(struct splice_pipe_desc *spd, unsigned int i) { page_cache_release(spd->pages[i]); } |
3a326a2ce
|
248 |
static int |
cbb7e577e
|
249 250 251 |
__generic_file_splice_read(struct file *in, loff_t *ppos, struct pipe_inode_info *pipe, size_t len, unsigned int flags) |
5274f052e
|
252 253 |
{ struct address_space *mapping = in->f_mapping; |
d8983910a
|
254 |
unsigned int loff, nr_pages, req_pages; |
16c523dda
|
255 |
struct page *pages[PIPE_BUFFERS]; |
912d35f86
|
256 |
struct partial_page partial[PIPE_BUFFERS]; |
5274f052e
|
257 |
struct page *page; |
91ad66ef4
|
258 259 |
pgoff_t index, end_index; loff_t isize; |
eb20796bf
|
260 |
int error, page_nr; |
912d35f86
|
261 262 263 264 265 |
struct splice_pipe_desc spd = { .pages = pages, .partial = partial, .flags = flags, .ops = &page_cache_pipe_buf_ops, |
bbdfc2f70
|
266 |
.spd_release = spd_release_page, |
912d35f86
|
267 |
}; |
5274f052e
|
268 |
|
cbb7e577e
|
269 |
index = *ppos >> PAGE_CACHE_SHIFT; |
912d35f86
|
270 |
loff = *ppos & ~PAGE_CACHE_MASK; |
d8983910a
|
271 272 |
req_pages = (len + loff + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; nr_pages = min(req_pages, (unsigned)PIPE_BUFFERS); |
5274f052e
|
273 274 |
/* |
eb20796bf
|
275 276 277 |
* Lookup the (hopefully) full range of pages we need. */ spd.nr_pages = find_get_pages_contig(mapping, index, nr_pages, pages); |
431a4820b
|
278 |
index += spd.nr_pages; |
82aa5d618
|
279 |
|
eb20796bf
|
280 281 |
/* * If find_get_pages_contig() returned fewer pages than we needed, |
431a4820b
|
282 |
* readahead/allocate the rest and fill in the holes. |
eb20796bf
|
283 |
*/ |
431a4820b
|
284 |
if (spd.nr_pages < nr_pages) |
cf914a7d6
|
285 286 |
page_cache_sync_readahead(mapping, &in->f_ra, in, index, req_pages - spd.nr_pages); |
431a4820b
|
287 |
|
932cc6d4f
|
288 |
error = 0; |
eb20796bf
|
289 |
while (spd.nr_pages < nr_pages) { |
82aa5d618
|
290 |
/* |
eb20796bf
|
291 292 |
* Page could be there, find_get_pages_contig() breaks on * the first hole. |
5274f052e
|
293 |
*/ |
7480a9043
|
294 295 |
page = find_get_page(mapping, index); if (!page) { |
e27dedd84
|
296 |
/* |
eb20796bf
|
297 |
* page didn't exist, allocate one. |
7480a9043
|
298 299 300 301 302 303 |
*/ page = page_cache_alloc_cold(mapping); if (!page) break; error = add_to_page_cache_lru(page, mapping, index, |
4cd135046
|
304 |
mapping_gfp_mask(mapping)); |
7480a9043
|
305 306 |
if (unlikely(error)) { page_cache_release(page); |
a0548871e
|
307 308 |
if (error == -EEXIST) continue; |
7480a9043
|
309 310 |
break; } |
eb20796bf
|
311 312 313 314 315 |
/* * add_to_page_cache() locks the page, unlock it * to avoid convoluting the logic below even more. */ unlock_page(page); |
7480a9043
|
316 |
} |
eb20796bf
|
317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 |
pages[spd.nr_pages++] = page; index++; } /* * Now loop over the map and see if we need to start IO on any * pages, fill in the partial map, etc. */ index = *ppos >> PAGE_CACHE_SHIFT; nr_pages = spd.nr_pages; spd.nr_pages = 0; for (page_nr = 0; page_nr < nr_pages; page_nr++) { unsigned int this_len; if (!len) break; /* * this_len is the max we'll use from this page */ this_len = min_t(unsigned long, len, PAGE_CACHE_SIZE - loff); page = pages[page_nr]; |
a08a166fe
|
339 |
if (PageReadahead(page)) |
cf914a7d6
|
340 |
page_cache_async_readahead(mapping, &in->f_ra, in, |
d8983910a
|
341 |
page, index, req_pages - page_nr); |
a08a166fe
|
342 |
|
7480a9043
|
343 344 345 346 |
/* * If the page isn't uptodate, we may need to start io on it */ if (!PageUptodate(page)) { |
c4f895cbe
|
347 348 349 350 |
/* * If in nonblock mode then dont block on waiting * for an in-flight io page */ |
9ae9d68cb
|
351 |
if (flags & SPLICE_F_NONBLOCK) { |
529ae9aaa
|
352 |
if (!trylock_page(page)) { |
8191ecd1d
|
353 |
error = -EAGAIN; |
9ae9d68cb
|
354 |
break; |
8191ecd1d
|
355 |
} |
9ae9d68cb
|
356 357 |
} else lock_page(page); |
7480a9043
|
358 359 |
/* |
32502b841
|
360 361 362 363 |
* Page was truncated, or invalidated by the * filesystem. Redo the find/create, but this time the * page is kept locked, so there's no chance of another * race with truncate/invalidate. |
7480a9043
|
364 365 366 |
*/ if (!page->mapping) { unlock_page(page); |
32502b841
|
367 368 369 370 371 372 373 374 375 |
page = find_or_create_page(mapping, index, mapping_gfp_mask(mapping)); if (!page) { error = -ENOMEM; break; } page_cache_release(pages[page_nr]); pages[page_nr] = page; |
7480a9043
|
376 377 378 379 380 381 382 383 |
} /* * page was already under io and is now done, great */ if (PageUptodate(page)) { unlock_page(page); goto fill_it; } |
5274f052e
|
384 |
|
7480a9043
|
385 386 387 388 |
/* * need to read in the page */ error = mapping->a_ops->readpage(in, page); |
5274f052e
|
389 |
if (unlikely(error)) { |
eb20796bf
|
390 391 392 393 394 395 |
/* * We really should re-lookup the page here, * but it complicates things a lot. Instead * lets just do what we already stored, and * we'll get it the next time we are called. */ |
7480a9043
|
396 |
if (error == AOP_TRUNCATED_PAGE) |
eb20796bf
|
397 |
error = 0; |
5274f052e
|
398 399 |
break; } |
620a324b7
|
400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 |
} fill_it: /* * i_size must be checked after PageUptodate. */ isize = i_size_read(mapping->host); end_index = (isize - 1) >> PAGE_CACHE_SHIFT; if (unlikely(!isize || index > end_index)) break; /* * if this is the last page, see if we need to shrink * the length and stop */ if (end_index == index) { unsigned int plen; |
91ad66ef4
|
416 417 |
/* |
620a324b7
|
418 |
* max good bytes in this page |
91ad66ef4
|
419 |
*/ |
620a324b7
|
420 421 |
plen = ((isize - 1) & ~PAGE_CACHE_MASK) + 1; if (plen <= loff) |
91ad66ef4
|
422 |
break; |
91ad66ef4
|
423 424 |
/* |
620a324b7
|
425 |
* force quit after adding this page |
91ad66ef4
|
426 |
*/ |
620a324b7
|
427 428 |
this_len = min(this_len, plen - loff); len = this_len; |
5274f052e
|
429 |
} |
620a324b7
|
430 |
|
eb20796bf
|
431 432 |
partial[page_nr].offset = loff; partial[page_nr].len = this_len; |
82aa5d618
|
433 |
len -= this_len; |
91ad66ef4
|
434 |
loff = 0; |
eb20796bf
|
435 436 |
spd.nr_pages++; index++; |
5274f052e
|
437 |
} |
eb20796bf
|
438 |
/* |
475ecade6
|
439 |
* Release any pages at the end, if we quit early. 'page_nr' is how far |
eb20796bf
|
440 441 442 443 |
* we got, 'nr_pages' is how many pages are in the map. */ while (page_nr < nr_pages) page_cache_release(pages[page_nr++]); |
f4e6b498d
|
444 |
in->f_ra.prev_pos = (loff_t)index << PAGE_CACHE_SHIFT; |
eb20796bf
|
445 |
|
912d35f86
|
446 |
if (spd.nr_pages) |
00522fb41
|
447 |
return splice_to_pipe(pipe, &spd); |
5274f052e
|
448 |
|
7480a9043
|
449 |
return error; |
5274f052e
|
450 |
} |
83f9135bd
|
451 452 453 |
/** * generic_file_splice_read - splice data from file to a pipe * @in: file to splice from |
932cc6d4f
|
454 |
* @ppos: position in @in |
83f9135bd
|
455 456 457 458 |
* @pipe: pipe to splice to * @len: number of bytes to splice * @flags: splice modifier flags * |
932cc6d4f
|
459 460 461 462 463 |
* Description: * Will read pages from given file and fill them into a pipe. Can be * used as long as the address_space operations for the source implements * a readpage() hook. * |
83f9135bd
|
464 |
*/ |
cbb7e577e
|
465 466 467 |
ssize_t generic_file_splice_read(struct file *in, loff_t *ppos, struct pipe_inode_info *pipe, size_t len, unsigned int flags) |
5274f052e
|
468 |
{ |
d366d3988
|
469 |
loff_t isize, left; |
8191ecd1d
|
470 |
int ret; |
d366d3988
|
471 472 473 474 475 476 477 478 |
isize = i_size_read(in->f_mapping->host); if (unlikely(*ppos >= isize)) return 0; left = isize - *ppos; if (unlikely(left < len)) len = left; |
5274f052e
|
479 |
|
8191ecd1d
|
480 481 |
ret = __generic_file_splice_read(in, ppos, pipe, len, flags); if (ret > 0) |
cbb7e577e
|
482 |
*ppos += ret; |
5274f052e
|
483 484 485 |
return ret; } |
059a8f373
|
486 |
EXPORT_SYMBOL(generic_file_splice_read); |
5274f052e
|
487 |
/* |
4f6f0bd2f
|
488 |
* Send 'sd->len' bytes to socket from 'sd->file' at position 'sd->pos' |
016b661e2
|
489 |
* using sendpage(). Return the number of bytes sent. |
5274f052e
|
490 |
*/ |
76ad4d111
|
491 |
static int pipe_to_sendpage(struct pipe_inode_info *pipe, |
5274f052e
|
492 493 |
struct pipe_buffer *buf, struct splice_desc *sd) { |
6a14b90bb
|
494 |
struct file *file = sd->u.file; |
5274f052e
|
495 |
loff_t pos = sd->pos; |
f84d75199
|
496 |
int ret, more; |
5274f052e
|
497 |
|
cac36bb06
|
498 |
ret = buf->ops->confirm(pipe, buf); |
f84d75199
|
499 500 |
if (!ret) { more = (sd->flags & SPLICE_F_MORE) || sd->len < sd->total_len; |
5274f052e
|
501 |
|
f84d75199
|
502 503 504 |
ret = file->f_op->sendpage(file, buf->page, buf->offset, sd->len, &pos, more); } |
5274f052e
|
505 |
|
016b661e2
|
506 |
return ret; |
5274f052e
|
507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 |
} /* * This is a little more tricky than the file -> pipe splicing. There are * basically three cases: * * - Destination page already exists in the address space and there * are users of it. For that case we have no other option that * copying the data. Tough luck. * - Destination page already exists in the address space, but there * are no users of it. Make sure it's uptodate, then drop it. Fall * through to last case. * - Destination page does not exist, we can add the pipe page to * the page cache and avoid the copy. * |
83f9135bd
|
522 523 524 525 526 527 |
* If asked to move pages to the output file (SPLICE_F_MOVE is set in * sd->flags), we attempt to migrate pages from the pipe to the output * file address space page cache. This is possible if no one else has * the pipe page referenced outside of the pipe and page cache. If * SPLICE_F_MOVE isn't set, or we cannot move the page, we simply create * a new page in the output file page cache and fill/dirty that. |
5274f052e
|
528 |
*/ |
328eaaba4
|
529 530 |
int pipe_to_file(struct pipe_inode_info *pipe, struct pipe_buffer *buf, struct splice_desc *sd) |
5274f052e
|
531 |
{ |
6a14b90bb
|
532 |
struct file *file = sd->u.file; |
5274f052e
|
533 |
struct address_space *mapping = file->f_mapping; |
016b661e2
|
534 |
unsigned int offset, this_len; |
5274f052e
|
535 |
struct page *page; |
afddba49d
|
536 |
void *fsdata; |
3e7ee3e7b
|
537 |
int ret; |
5274f052e
|
538 539 |
/* |
49d0b21be
|
540 |
* make sure the data in this buffer is uptodate |
5274f052e
|
541 |
*/ |
cac36bb06
|
542 |
ret = buf->ops->confirm(pipe, buf); |
f84d75199
|
543 544 |
if (unlikely(ret)) return ret; |
5274f052e
|
545 |
|
5274f052e
|
546 |
offset = sd->pos & ~PAGE_CACHE_MASK; |
016b661e2
|
547 548 549 |
this_len = sd->len; if (this_len + offset > PAGE_CACHE_SIZE) this_len = PAGE_CACHE_SIZE - offset; |
afddba49d
|
550 551 552 553 |
ret = pagecache_write_begin(file, mapping, sd->pos, this_len, AOP_FLAG_UNINTERRUPTIBLE, &page, &fsdata); if (unlikely(ret)) goto out; |
5274f052e
|
554 |
|
0568b409c
|
555 |
if (buf->page != page) { |
f84d75199
|
556 557 558 |
/* * Careful, ->map() uses KM_USER0! */ |
76ad4d111
|
559 |
char *src = buf->ops->map(pipe, buf, 1); |
f84d75199
|
560 |
char *dst = kmap_atomic(page, KM_USER1); |
5abc97aa2
|
561 |
|
016b661e2
|
562 |
memcpy(dst + offset, src + buf->offset, this_len); |
5abc97aa2
|
563 |
flush_dcache_page(page); |
f84d75199
|
564 |
kunmap_atomic(dst, KM_USER1); |
76ad4d111
|
565 |
buf->ops->unmap(pipe, buf, src); |
5abc97aa2
|
566 |
} |
afddba49d
|
567 568 |
ret = pagecache_write_end(file, mapping, sd->pos, this_len, this_len, page, fsdata); |
5274f052e
|
569 |
out: |
5274f052e
|
570 571 |
return ret; } |
328eaaba4
|
572 |
EXPORT_SYMBOL(pipe_to_file); |
5274f052e
|
573 |
|
b3c2d2ddd
|
574 575 576 577 578 579 580 |
static void wakeup_pipe_writers(struct pipe_inode_info *pipe) { smp_mb(); if (waitqueue_active(&pipe->wait)) wake_up_interruptible(&pipe->wait); kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT); } |
932cc6d4f
|
581 |
/** |
b3c2d2ddd
|
582 |
* splice_from_pipe_feed - feed available data from a pipe to a file |
932cc6d4f
|
583 584 585 586 587 |
* @pipe: pipe to splice from * @sd: information to @actor * @actor: handler that splices the data * * Description: |
b3c2d2ddd
|
588 589 590 591 592 593 594 |
* This function loops over the pipe and calls @actor to do the * actual moving of a single struct pipe_buffer to the desired * destination. It returns when there's no more buffers left in * the pipe or if the requested number of bytes (@sd->total_len) * have been copied. It returns a positive number (one) if the * pipe needs to be filled with more data, zero if the required * number of bytes have been copied and -errno on error. |
932cc6d4f
|
595 |
* |
b3c2d2ddd
|
596 597 598 599 |
* This, together with splice_from_pipe_{begin,end,next}, may be * used to implement the functionality of __splice_from_pipe() when * locking is required around copying the pipe buffers to the * destination. |
83f9135bd
|
600 |
*/ |
b3c2d2ddd
|
601 602 |
int splice_from_pipe_feed(struct pipe_inode_info *pipe, struct splice_desc *sd, splice_actor *actor) |
5274f052e
|
603 |
{ |
b3c2d2ddd
|
604 |
int ret; |
5274f052e
|
605 |
|
b3c2d2ddd
|
606 607 608 |
while (pipe->nrbufs) { struct pipe_buffer *buf = pipe->bufs + pipe->curbuf; const struct pipe_buf_operations *ops = buf->ops; |
5274f052e
|
609 |
|
b3c2d2ddd
|
610 611 612 |
sd->len = buf->len; if (sd->len > sd->total_len) sd->len = sd->total_len; |
5274f052e
|
613 |
|
b3c2d2ddd
|
614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 |
ret = actor(pipe, buf, sd); if (ret <= 0) { if (ret == -ENODATA) ret = 0; return ret; } buf->offset += ret; buf->len -= ret; sd->num_spliced += ret; sd->len -= ret; sd->pos += ret; sd->total_len -= ret; if (!buf->len) { buf->ops = NULL; ops->release(pipe, buf); pipe->curbuf = (pipe->curbuf + 1) & (PIPE_BUFFERS - 1); pipe->nrbufs--; if (pipe->inode) sd->need_wakeup = true; } |
5274f052e
|
636 |
|
b3c2d2ddd
|
637 638 639 |
if (!sd->total_len) return 0; } |
5274f052e
|
640 |
|
b3c2d2ddd
|
641 642 643 |
return 1; } EXPORT_SYMBOL(splice_from_pipe_feed); |
5274f052e
|
644 |
|
b3c2d2ddd
|
645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 |
/** * splice_from_pipe_next - wait for some data to splice from * @pipe: pipe to splice from * @sd: information about the splice operation * * Description: * This function will wait for some data and return a positive * value (one) if pipe buffers are available. It will return zero * or -errno if no more data needs to be spliced. */ int splice_from_pipe_next(struct pipe_inode_info *pipe, struct splice_desc *sd) { while (!pipe->nrbufs) { if (!pipe->writers) return 0; |
016b661e2
|
660 |
|
b3c2d2ddd
|
661 662 |
if (!pipe->waiting_writers && sd->num_spliced) return 0; |
73d62d83e
|
663 |
|
b3c2d2ddd
|
664 665 |
if (sd->flags & SPLICE_F_NONBLOCK) return -EAGAIN; |
5274f052e
|
666 |
|
b3c2d2ddd
|
667 668 |
if (signal_pending(current)) return -ERESTARTSYS; |
5274f052e
|
669 |
|
b3c2d2ddd
|
670 671 672 |
if (sd->need_wakeup) { wakeup_pipe_writers(pipe); sd->need_wakeup = false; |
5274f052e
|
673 |
} |
b3c2d2ddd
|
674 675 |
pipe_wait(pipe); } |
29e350944
|
676 |
|
b3c2d2ddd
|
677 678 679 |
return 1; } EXPORT_SYMBOL(splice_from_pipe_next); |
5274f052e
|
680 |
|
b3c2d2ddd
|
681 682 |
/** * splice_from_pipe_begin - start splicing from pipe |
b80901bbf
|
683 |
* @sd: information about the splice operation |
b3c2d2ddd
|
684 685 686 687 688 689 690 691 692 693 694 695 |
* * Description: * This function should be called before a loop containing * splice_from_pipe_next() and splice_from_pipe_feed() to * initialize the necessary fields of @sd. */ void splice_from_pipe_begin(struct splice_desc *sd) { sd->num_spliced = 0; sd->need_wakeup = false; } EXPORT_SYMBOL(splice_from_pipe_begin); |
5274f052e
|
696 |
|
b3c2d2ddd
|
697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 |
/** * splice_from_pipe_end - finish splicing from pipe * @pipe: pipe to splice from * @sd: information about the splice operation * * Description: * This function will wake up pipe writers if necessary. It should * be called after a loop containing splice_from_pipe_next() and * splice_from_pipe_feed(). */ void splice_from_pipe_end(struct pipe_inode_info *pipe, struct splice_desc *sd) { if (sd->need_wakeup) wakeup_pipe_writers(pipe); } EXPORT_SYMBOL(splice_from_pipe_end); |
5274f052e
|
713 |
|
b3c2d2ddd
|
714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 |
/** * __splice_from_pipe - splice data from a pipe to given actor * @pipe: pipe to splice from * @sd: information to @actor * @actor: handler that splices the data * * Description: * This function does little more than loop over the pipe and call * @actor to do the actual moving of a single struct pipe_buffer to * the desired destination. See pipe_to_file, pipe_to_sendpage, or * pipe_to_user. * */ ssize_t __splice_from_pipe(struct pipe_inode_info *pipe, struct splice_desc *sd, splice_actor *actor) { int ret; |
5274f052e
|
731 |
|
b3c2d2ddd
|
732 733 734 735 736 737 738 739 740 |
splice_from_pipe_begin(sd); do { ret = splice_from_pipe_next(pipe, sd); if (ret > 0) ret = splice_from_pipe_feed(pipe, sd, actor); } while (ret > 0); splice_from_pipe_end(pipe, sd); return sd->num_spliced ? sd->num_spliced : ret; |
5274f052e
|
741 |
} |
40bee44ea
|
742 |
EXPORT_SYMBOL(__splice_from_pipe); |
5274f052e
|
743 |
|
932cc6d4f
|
744 745 746 747 748 749 750 751 752 753 |
/** * splice_from_pipe - splice data from a pipe to a file * @pipe: pipe to splice from * @out: file to splice to * @ppos: position in @out * @len: how many bytes to splice * @flags: splice modifier flags * @actor: handler that splices the data * * Description: |
2933970b9
|
754 |
* See __splice_from_pipe. This function locks the pipe inode, |
932cc6d4f
|
755 756 757 |
* otherwise it's identical to __splice_from_pipe(). * */ |
6da618098
|
758 759 760 761 762 |
ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out, loff_t *ppos, size_t len, unsigned int flags, splice_actor *actor) { ssize_t ret; |
c66ab6fa7
|
763 764 765 766 |
struct splice_desc sd = { .total_len = len, .flags = flags, .pos = *ppos, |
6a14b90bb
|
767 |
.u.file = out, |
c66ab6fa7
|
768 |
}; |
6da618098
|
769 |
|
61e0d47c3
|
770 |
pipe_lock(pipe); |
c66ab6fa7
|
771 |
ret = __splice_from_pipe(pipe, &sd, actor); |
61e0d47c3
|
772 |
pipe_unlock(pipe); |
6da618098
|
773 774 775 776 777 |
return ret; } /** |
83f9135bd
|
778 |
* generic_file_splice_write - splice data from a pipe to a file |
3a326a2ce
|
779 |
* @pipe: pipe info |
83f9135bd
|
780 |
* @out: file to write to |
932cc6d4f
|
781 |
* @ppos: position in @out |
83f9135bd
|
782 783 784 |
* @len: number of bytes to splice * @flags: splice modifier flags * |
932cc6d4f
|
785 786 787 |
* Description: * Will either move or copy pages (determined by @flags options) from * the given pipe inode to the given file. |
83f9135bd
|
788 789 |
* */ |
3a326a2ce
|
790 791 |
ssize_t generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out, |
cbb7e577e
|
792 |
loff_t *ppos, size_t len, unsigned int flags) |
5274f052e
|
793 |
{ |
4f6f0bd2f
|
794 |
struct address_space *mapping = out->f_mapping; |
8c34e2d63
|
795 |
struct inode *inode = mapping->host; |
7f3d4ee10
|
796 797 798 799 800 801 |
struct splice_desc sd = { .total_len = len, .flags = flags, .pos = *ppos, .u.file = out, }; |
3a326a2ce
|
802 |
ssize_t ret; |
61e0d47c3
|
803 |
pipe_lock(pipe); |
eb443e5a2
|
804 805 806 807 808 809 810 811 812 813 814 815 816 817 |
splice_from_pipe_begin(&sd); do { ret = splice_from_pipe_next(pipe, &sd); if (ret <= 0) break; mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD); ret = file_remove_suid(out); if (!ret) ret = splice_from_pipe_feed(pipe, &sd, pipe_to_file); mutex_unlock(&inode->i_mutex); } while (ret > 0); splice_from_pipe_end(pipe, &sd); |
61e0d47c3
|
818 |
pipe_unlock(pipe); |
eb443e5a2
|
819 820 821 |
if (sd.num_spliced) ret = sd.num_spliced; |
a4514ebd8
|
822 |
if (ret > 0) { |
17ee4f49a
|
823 |
unsigned long nr_pages; |
a4514ebd8
|
824 |
*ppos += ret; |
17ee4f49a
|
825 |
nr_pages = (ret + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; |
a4514ebd8
|
826 827 828 829 830 831 |
/* * If file or inode is SYNC and we actually wrote some data, * sync it. */ if (unlikely((out->f_flags & O_SYNC) || IS_SYNC(inode))) { |
7f3d4ee10
|
832 |
int err; |
a4514ebd8
|
833 834 835 836 |
mutex_lock(&inode->i_mutex); err = generic_osync_inode(inode, mapping, OSYNC_METADATA|OSYNC_DATA); mutex_unlock(&inode->i_mutex); |
4f6f0bd2f
|
837 |
|
a4514ebd8
|
838 839 840 |
if (err) ret = err; } |
17ee4f49a
|
841 |
balance_dirty_pages_ratelimited_nr(mapping, nr_pages); |
4f6f0bd2f
|
842 843 844 |
} return ret; |
5274f052e
|
845 |
} |
059a8f373
|
846 |
EXPORT_SYMBOL(generic_file_splice_write); |
83f9135bd
|
847 848 |
/** * generic_splice_sendpage - splice data from a pipe to a socket |
932cc6d4f
|
849 |
* @pipe: pipe to splice from |
83f9135bd
|
850 |
* @out: socket to write to |
932cc6d4f
|
851 |
* @ppos: position in @out |
83f9135bd
|
852 853 854 |
* @len: number of bytes to splice * @flags: splice modifier flags * |
932cc6d4f
|
855 856 857 |
* Description: * Will send @len bytes from the pipe to a network socket. No data copying * is involved. |
83f9135bd
|
858 859 |
* */ |
3a326a2ce
|
860 |
ssize_t generic_splice_sendpage(struct pipe_inode_info *pipe, struct file *out, |
cbb7e577e
|
861 |
loff_t *ppos, size_t len, unsigned int flags) |
5274f052e
|
862 |
{ |
00522fb41
|
863 |
return splice_from_pipe(pipe, out, ppos, len, flags, pipe_to_sendpage); |
5274f052e
|
864 |
} |
059a8f373
|
865 |
EXPORT_SYMBOL(generic_splice_sendpage); |
a0f067802
|
866 |
|
83f9135bd
|
867 868 869 |
/* * Attempt to initiate a splice from pipe to file. */ |
3a326a2ce
|
870 |
static long do_splice_from(struct pipe_inode_info *pipe, struct file *out, |
cbb7e577e
|
871 |
loff_t *ppos, size_t len, unsigned int flags) |
5274f052e
|
872 |
{ |
5274f052e
|
873 |
int ret; |
49570e9b2
|
874 |
if (unlikely(!out->f_op || !out->f_op->splice_write)) |
5274f052e
|
875 |
return -EINVAL; |
49570e9b2
|
876 |
if (unlikely(!(out->f_mode & FMODE_WRITE))) |
5274f052e
|
877 |
return -EBADF; |
efc968d45
|
878 879 |
if (unlikely(out->f_flags & O_APPEND)) return -EINVAL; |
cbb7e577e
|
880 |
ret = rw_verify_area(WRITE, out, ppos, len); |
5274f052e
|
881 882 |
if (unlikely(ret < 0)) return ret; |
cbb7e577e
|
883 |
return out->f_op->splice_write(pipe, out, ppos, len, flags); |
5274f052e
|
884 |
} |
83f9135bd
|
885 886 887 |
/* * Attempt to initiate a splice from a file to a pipe. */ |
cbb7e577e
|
888 889 890 |
static long do_splice_to(struct file *in, loff_t *ppos, struct pipe_inode_info *pipe, size_t len, unsigned int flags) |
5274f052e
|
891 |
{ |
5274f052e
|
892 |
int ret; |
49570e9b2
|
893 |
if (unlikely(!in->f_op || !in->f_op->splice_read)) |
5274f052e
|
894 |
return -EINVAL; |
49570e9b2
|
895 |
if (unlikely(!(in->f_mode & FMODE_READ))) |
5274f052e
|
896 |
return -EBADF; |
cbb7e577e
|
897 |
ret = rw_verify_area(READ, in, ppos, len); |
5274f052e
|
898 899 |
if (unlikely(ret < 0)) return ret; |
cbb7e577e
|
900 |
return in->f_op->splice_read(in, ppos, pipe, len, flags); |
5274f052e
|
901 |
} |
932cc6d4f
|
902 903 904 905 906 907 908 909 910 |
/** * splice_direct_to_actor - splices data directly between two non-pipes * @in: file to splice from * @sd: actor information on where to splice to * @actor: handles the data splicing * * Description: * This is a special case helper to splice directly between two * points, without requiring an explicit pipe. Internally an allocated |
79685b8de
|
911 |
* pipe is cached in the process, and reused during the lifetime of |
932cc6d4f
|
912 913 |
* that process. * |
c66ab6fa7
|
914 915 916 |
*/ ssize_t splice_direct_to_actor(struct file *in, struct splice_desc *sd, splice_direct_actor *actor) |
b92ce5589
|
917 918 919 920 |
{ struct pipe_inode_info *pipe; long ret, bytes; umode_t i_mode; |
c66ab6fa7
|
921 922 |
size_t len; int i, flags; |
b92ce5589
|
923 924 925 926 927 928 |
/* * We require the input being a regular file, as we don't want to * randomly drop data for eg socket -> socket splicing. Use the * piped splicing for that! */ |
0f7fc9e4d
|
929 |
i_mode = in->f_path.dentry->d_inode->i_mode; |
b92ce5589
|
930 931 932 933 934 935 936 937 |
if (unlikely(!S_ISREG(i_mode) && !S_ISBLK(i_mode))) return -EINVAL; /* * neither in nor out is a pipe, setup an internal pipe attached to * 'out' and transfer the wanted data from 'in' to 'out' through that */ pipe = current->splice_pipe; |
49570e9b2
|
938 |
if (unlikely(!pipe)) { |
b92ce5589
|
939 940 941 942 943 944 |
pipe = alloc_pipe_info(NULL); if (!pipe) return -ENOMEM; /* * We don't have an immediate reader, but we'll read the stuff |
00522fb41
|
945 |
* out of the pipe right after the splice_to_pipe(). So set |
b92ce5589
|
946 947 948 949 950 951 952 953 |
* PIPE_READERS appropriately. */ pipe->readers = 1; current->splice_pipe = pipe; } /* |
73d62d83e
|
954 |
* Do the splice. |
b92ce5589
|
955 956 957 |
*/ ret = 0; bytes = 0; |
c66ab6fa7
|
958 959 960 961 962 963 964 |
len = sd->total_len; flags = sd->flags; /* * Don't block on output, we have to drain the direct pipe. */ sd->flags &= ~SPLICE_F_NONBLOCK; |
b92ce5589
|
965 966 |
while (len) { |
51a92c0f6
|
967 |
size_t read_len; |
a82c53a0e
|
968 |
loff_t pos = sd->pos, prev_pos = pos; |
b92ce5589
|
969 |
|
bcd4f3acb
|
970 |
ret = do_splice_to(in, &pos, pipe, len, flags); |
51a92c0f6
|
971 |
if (unlikely(ret <= 0)) |
b92ce5589
|
972 973 974 |
goto out_release; read_len = ret; |
c66ab6fa7
|
975 |
sd->total_len = read_len; |
b92ce5589
|
976 977 978 979 980 981 |
/* * NOTE: nonblocking mode only applies to the input. We * must not do the output in nonblocking mode as then we * could get stuck data in the internal pipe: */ |
c66ab6fa7
|
982 |
ret = actor(pipe, sd); |
a82c53a0e
|
983 984 |
if (unlikely(ret <= 0)) { sd->pos = prev_pos; |
b92ce5589
|
985 |
goto out_release; |
a82c53a0e
|
986 |
} |
b92ce5589
|
987 988 989 |
bytes += ret; len -= ret; |
bcd4f3acb
|
990 |
sd->pos = pos; |
b92ce5589
|
991 |
|
a82c53a0e
|
992 993 |
if (ret < read_len) { sd->pos = prev_pos + ret; |
51a92c0f6
|
994 |
goto out_release; |
a82c53a0e
|
995 |
} |
b92ce5589
|
996 |
} |
9e97198db
|
997 |
done: |
b92ce5589
|
998 |
pipe->nrbufs = pipe->curbuf = 0; |
808487085
|
999 |
file_accessed(in); |
b92ce5589
|
1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 |
return bytes; out_release: /* * If we did an incomplete transfer we must release * the pipe buffers in question: */ for (i = 0; i < PIPE_BUFFERS; i++) { struct pipe_buffer *buf = pipe->bufs + i; if (buf->ops) { buf->ops->release(pipe, buf); buf->ops = NULL; } } |
b92ce5589
|
1015 |
|
9e97198db
|
1016 1017 |
if (!bytes) bytes = ret; |
c66ab6fa7
|
1018 |
|
9e97198db
|
1019 |
goto done; |
c66ab6fa7
|
1020 1021 1022 1023 1024 1025 |
} EXPORT_SYMBOL(splice_direct_to_actor); static int direct_splice_actor(struct pipe_inode_info *pipe, struct splice_desc *sd) { |
6a14b90bb
|
1026 |
struct file *file = sd->u.file; |
c66ab6fa7
|
1027 1028 1029 |
return do_splice_from(pipe, file, &sd->pos, sd->total_len, sd->flags); } |
932cc6d4f
|
1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 |
/** * do_splice_direct - splices data directly between two files * @in: file to splice from * @ppos: input file offset * @out: file to splice to * @len: number of bytes to splice * @flags: splice modifier flags * * Description: * For use by do_sendfile(). splice can easily emulate sendfile, but * doing it in the application would incur an extra system call * (splice in + splice out, as compared to just sendfile()). So this helper * can splice directly through a process-private pipe. * */ |
c66ab6fa7
|
1045 1046 1047 1048 1049 1050 1051 1052 |
long do_splice_direct(struct file *in, loff_t *ppos, struct file *out, size_t len, unsigned int flags) { struct splice_desc sd = { .len = len, .total_len = len, .flags = flags, .pos = *ppos, |
6a14b90bb
|
1053 |
.u.file = out, |
c66ab6fa7
|
1054 |
}; |
51a92c0f6
|
1055 |
long ret; |
c66ab6fa7
|
1056 1057 |
ret = splice_direct_to_actor(in, &sd, direct_splice_actor); |
51a92c0f6
|
1058 |
if (ret > 0) |
a82c53a0e
|
1059 |
*ppos = sd.pos; |
51a92c0f6
|
1060 |
|
c66ab6fa7
|
1061 |
return ret; |
b92ce5589
|
1062 |
} |
83f9135bd
|
1063 |
/* |
ddac0d39c
|
1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 |
* After the inode slimming patch, i_pipe/i_bdev/i_cdev share the same * location, so checking ->i_pipe is not enough to verify that this is a * pipe. */ static inline struct pipe_inode_info *pipe_info(struct inode *inode) { if (S_ISFIFO(inode->i_mode)) return inode->i_pipe; return NULL; } /* |
83f9135bd
|
1077 1078 |
* Determine where to splice to/from. */ |
529565dcb
|
1079 1080 1081 |
static long do_splice(struct file *in, loff_t __user *off_in, struct file *out, loff_t __user *off_out, size_t len, unsigned int flags) |
5274f052e
|
1082 |
{ |
3a326a2ce
|
1083 |
struct pipe_inode_info *pipe; |
cbb7e577e
|
1084 |
loff_t offset, *off; |
a4514ebd8
|
1085 |
long ret; |
5274f052e
|
1086 |
|
0f7fc9e4d
|
1087 |
pipe = pipe_info(in->f_path.dentry->d_inode); |
529565dcb
|
1088 1089 1090 |
if (pipe) { if (off_in) return -ESPIPE; |
b92ce5589
|
1091 1092 1093 |
if (off_out) { if (out->f_op->llseek == no_llseek) return -EINVAL; |
cbb7e577e
|
1094 |
if (copy_from_user(&offset, off_out, sizeof(loff_t))) |
b92ce5589
|
1095 |
return -EFAULT; |
cbb7e577e
|
1096 1097 1098 |
off = &offset; } else off = &out->f_pos; |
529565dcb
|
1099 |
|
a4514ebd8
|
1100 1101 1102 1103 1104 1105 |
ret = do_splice_from(pipe, out, off, len, flags); if (off_out && copy_to_user(off_out, off, sizeof(loff_t))) ret = -EFAULT; return ret; |
529565dcb
|
1106 |
} |
5274f052e
|
1107 |
|
0f7fc9e4d
|
1108 |
pipe = pipe_info(out->f_path.dentry->d_inode); |
529565dcb
|
1109 1110 1111 |
if (pipe) { if (off_out) return -ESPIPE; |
b92ce5589
|
1112 1113 1114 |
if (off_in) { if (in->f_op->llseek == no_llseek) return -EINVAL; |
cbb7e577e
|
1115 |
if (copy_from_user(&offset, off_in, sizeof(loff_t))) |
b92ce5589
|
1116 |
return -EFAULT; |
cbb7e577e
|
1117 1118 1119 |
off = &offset; } else off = &in->f_pos; |
529565dcb
|
1120 |
|
a4514ebd8
|
1121 1122 1123 1124 1125 1126 |
ret = do_splice_to(in, off, pipe, len, flags); if (off_in && copy_to_user(off_in, off, sizeof(loff_t))) ret = -EFAULT; return ret; |
529565dcb
|
1127 |
} |
5274f052e
|
1128 1129 1130 |
return -EINVAL; } |
912d35f86
|
1131 1132 1133 1134 1135 1136 1137 1138 1139 |
/* * Map an iov into an array of pages and offset/length tupples. With the * partial_page structure, we can map several non-contiguous ranges into * our ones pages[] map instead of splitting that operation into pieces. * Could easily be exported as a generic helper for other users, in which * case one would probably want to add a 'max_nr_pages' parameter as well. */ static int get_iovec_page_array(const struct iovec __user *iov, unsigned int nr_vecs, struct page **pages, |
7afa6fd03
|
1140 |
struct partial_page *partial, int aligned) |
912d35f86
|
1141 1142 |
{ int buffers = 0, error = 0; |
912d35f86
|
1143 1144 |
while (nr_vecs) { unsigned long off, npages; |
757239576
|
1145 |
struct iovec entry; |
912d35f86
|
1146 1147 1148 |
void __user *base; size_t len; int i; |
757239576
|
1149 |
error = -EFAULT; |
bc40d73c9
|
1150 |
if (copy_from_user(&entry, iov, sizeof(entry))) |
912d35f86
|
1151 |
break; |
757239576
|
1152 1153 |
base = entry.iov_base; len = entry.iov_len; |
912d35f86
|
1154 1155 1156 |
/* * Sanity check this iovec. 0 read succeeds. */ |
757239576
|
1157 |
error = 0; |
912d35f86
|
1158 1159 1160 |
if (unlikely(!len)) break; error = -EFAULT; |
712a30e63
|
1161 |
if (!access_ok(VERIFY_READ, base, len)) |
912d35f86
|
1162 1163 1164 1165 1166 1167 1168 |
break; /* * Get this base offset and number of pages, then map * in the user pages. */ off = (unsigned long) base & ~PAGE_MASK; |
7afa6fd03
|
1169 1170 1171 1172 1173 1174 1175 1176 |
/* * If asked for alignment, the offset must be zero and the * length a multiple of the PAGE_SIZE. */ error = -EINVAL; if (aligned && (off || len & ~PAGE_MASK)) break; |
912d35f86
|
1177 1178 1179 |
npages = (off + len + PAGE_SIZE - 1) >> PAGE_SHIFT; if (npages > PIPE_BUFFERS - buffers) npages = PIPE_BUFFERS - buffers; |
bc40d73c9
|
1180 1181 |
error = get_user_pages_fast((unsigned long)base, npages, 0, &pages[buffers]); |
912d35f86
|
1182 1183 1184 1185 1186 1187 1188 1189 |
if (unlikely(error <= 0)) break; /* * Fill this contiguous range into the partial page map. */ for (i = 0; i < error; i++) { |
7591489a8
|
1190 |
const int plen = min_t(size_t, len, PAGE_SIZE - off); |
912d35f86
|
1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 |
partial[buffers].offset = off; partial[buffers].len = plen; off = 0; len -= plen; buffers++; } /* * We didn't complete this iov, stop here since it probably * means we have to move some of this into a pipe to * be able to continue. */ if (len) break; /* * Don't continue if we mapped fewer pages than we asked for, * or if we mapped the max number of pages that we have * room for. */ if (error < npages || buffers == PIPE_BUFFERS) break; nr_vecs--; iov++; } |
912d35f86
|
1219 1220 1221 1222 1223 |
if (buffers) return buffers; return error; } |
6a14b90bb
|
1224 1225 1226 1227 1228 |
static int pipe_to_user(struct pipe_inode_info *pipe, struct pipe_buffer *buf, struct splice_desc *sd) { char *src; int ret; |
cac36bb06
|
1229 |
ret = buf->ops->confirm(pipe, buf); |
6a14b90bb
|
1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 |
if (unlikely(ret)) return ret; /* * See if we can use the atomic maps, by prefaulting in the * pages and doing an atomic copy */ if (!fault_in_pages_writeable(sd->u.userptr, sd->len)) { src = buf->ops->map(pipe, buf, 1); ret = __copy_to_user_inatomic(sd->u.userptr, src + buf->offset, sd->len); buf->ops->unmap(pipe, buf, src); if (!ret) { ret = sd->len; goto out; } } /* * No dice, use slow non-atomic map and copy */ src = buf->ops->map(pipe, buf, 0); ret = sd->len; if (copy_to_user(sd->u.userptr, src + buf->offset, sd->len)) ret = -EFAULT; |
6866bef40
|
1256 |
buf->ops->unmap(pipe, buf, src); |
6a14b90bb
|
1257 1258 1259 |
out: if (ret > 0) sd->u.userptr += ret; |
6a14b90bb
|
1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 |
return ret; } /* * For lack of a better implementation, implement vmsplice() to userspace * as a simple copy of the pipes pages to the user iov. */ static long vmsplice_to_user(struct file *file, const struct iovec __user *iov, unsigned long nr_segs, unsigned int flags) { struct pipe_inode_info *pipe; struct splice_desc sd; ssize_t size; int error; long ret; pipe = pipe_info(file->f_path.dentry->d_inode); if (!pipe) return -EBADF; |
61e0d47c3
|
1279 |
pipe_lock(pipe); |
6a14b90bb
|
1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 |
error = ret = 0; while (nr_segs) { void __user *base; size_t len; /* * Get user address base and length for this iovec. */ error = get_user(base, &iov->iov_base); if (unlikely(error)) break; error = get_user(len, &iov->iov_len); if (unlikely(error)) break; /* * Sanity check this iovec. 0 read succeeds. */ if (unlikely(!len)) break; if (unlikely(!base)) { error = -EFAULT; break; } |
8811930dc
|
1305 1306 1307 1308 |
if (unlikely(!access_ok(VERIFY_WRITE, base, len))) { error = -EFAULT; break; } |
6a14b90bb
|
1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 |
sd.len = 0; sd.total_len = len; sd.flags = flags; sd.u.userptr = base; sd.pos = 0; size = __splice_from_pipe(pipe, &sd, pipe_to_user); if (size < 0) { if (!ret) ret = size; break; } ret += size; if (size < len) break; nr_segs--; iov++; } |
61e0d47c3
|
1331 |
pipe_unlock(pipe); |
6a14b90bb
|
1332 1333 1334 1335 1336 1337 |
if (!ret) ret = error; return ret; } |
912d35f86
|
1338 1339 1340 1341 |
/* * vmsplice splices a user address range into a pipe. It can be thought of * as splice-from-memory, where the regular splice is splice-from-file (or * to file). In both cases the output is a pipe, naturally. |
912d35f86
|
1342 |
*/ |
6a14b90bb
|
1343 1344 |
static long vmsplice_to_pipe(struct file *file, const struct iovec __user *iov, unsigned long nr_segs, unsigned int flags) |
912d35f86
|
1345 |
{ |
ddac0d39c
|
1346 |
struct pipe_inode_info *pipe; |
912d35f86
|
1347 1348 1349 1350 1351 1352 1353 |
struct page *pages[PIPE_BUFFERS]; struct partial_page partial[PIPE_BUFFERS]; struct splice_pipe_desc spd = { .pages = pages, .partial = partial, .flags = flags, .ops = &user_page_pipe_buf_ops, |
bbdfc2f70
|
1354 |
.spd_release = spd_release_page, |
912d35f86
|
1355 |
}; |
0f7fc9e4d
|
1356 |
pipe = pipe_info(file->f_path.dentry->d_inode); |
ddac0d39c
|
1357 |
if (!pipe) |
912d35f86
|
1358 |
return -EBADF; |
912d35f86
|
1359 |
|
7afa6fd03
|
1360 1361 |
spd.nr_pages = get_iovec_page_array(iov, nr_segs, pages, partial, flags & SPLICE_F_GIFT); |
912d35f86
|
1362 1363 |
if (spd.nr_pages <= 0) return spd.nr_pages; |
00522fb41
|
1364 |
return splice_to_pipe(pipe, &spd); |
912d35f86
|
1365 |
} |
6a14b90bb
|
1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 |
/* * Note that vmsplice only really supports true splicing _from_ user memory * to a pipe, not the other way around. Splicing from user memory is a simple * operation that can be supported without any funky alignment restrictions * or nasty vm tricks. We simply map in the user memory and fill them into * a pipe. The reverse isn't quite as easy, though. There are two possible * solutions for that: * * - memcpy() the data internally, at which point we might as well just * do a regular read() on the buffer anyway. * - Lots of nasty vm tricks, that are neither fast nor flexible (it * has restriction limitations on both ends of the pipe). * * Currently we punt and implement it as a normal copy, see pipe_to_user(). * */ |
836f92adf
|
1382 1383 |
SYSCALL_DEFINE4(vmsplice, int, fd, const struct iovec __user *, iov, unsigned long, nr_segs, unsigned int, flags) |
912d35f86
|
1384 1385 1386 1387 |
{ struct file *file; long error; int fput; |
6a14b90bb
|
1388 1389 1390 1391 |
if (unlikely(nr_segs > UIO_MAXIOV)) return -EINVAL; else if (unlikely(!nr_segs)) return 0; |
912d35f86
|
1392 1393 1394 1395 |
error = -EBADF; file = fget_light(fd, &fput); if (file) { if (file->f_mode & FMODE_WRITE) |
6a14b90bb
|
1396 1397 1398 |
error = vmsplice_to_pipe(file, iov, nr_segs, flags); else if (file->f_mode & FMODE_READ) error = vmsplice_to_user(file, iov, nr_segs, flags); |
912d35f86
|
1399 1400 1401 1402 1403 1404 |
fput_light(file, fput); } return error; } |
836f92adf
|
1405 1406 1407 |
SYSCALL_DEFINE6(splice, int, fd_in, loff_t __user *, off_in, int, fd_out, loff_t __user *, off_out, size_t, len, unsigned int, flags) |
5274f052e
|
1408 1409 1410 1411 1412 1413 1414 1415 1416 |
{ long error; struct file *in, *out; int fput_in, fput_out; if (unlikely(!len)) return 0; error = -EBADF; |
529565dcb
|
1417 |
in = fget_light(fd_in, &fput_in); |
5274f052e
|
1418 1419 |
if (in) { if (in->f_mode & FMODE_READ) { |
529565dcb
|
1420 |
out = fget_light(fd_out, &fput_out); |
5274f052e
|
1421 1422 |
if (out) { if (out->f_mode & FMODE_WRITE) |
529565dcb
|
1423 1424 1425 |
error = do_splice(in, off_in, out, off_out, len, flags); |
5274f052e
|
1426 1427 1428 1429 1430 1431 1432 1433 1434 |
fput_light(out, fput_out); } } fput_light(in, fput_in); } return error; } |
70524490e
|
1435 1436 |
/* |
aadd06e5c
|
1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 |
* Make sure there's data to read. Wait for input if we can, otherwise * return an appropriate error. */ static int link_ipipe_prep(struct pipe_inode_info *pipe, unsigned int flags) { int ret; /* * Check ->nrbufs without the inode lock first. This function * is speculative anyways, so missing one is ok. */ if (pipe->nrbufs) return 0; ret = 0; |
61e0d47c3
|
1452 |
pipe_lock(pipe); |
aadd06e5c
|
1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 |
while (!pipe->nrbufs) { if (signal_pending(current)) { ret = -ERESTARTSYS; break; } if (!pipe->writers) break; if (!pipe->waiting_writers) { if (flags & SPLICE_F_NONBLOCK) { ret = -EAGAIN; break; } } pipe_wait(pipe); } |
61e0d47c3
|
1469 |
pipe_unlock(pipe); |
aadd06e5c
|
1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 |
return ret; } /* * Make sure there's writeable room. Wait for room if we can, otherwise * return an appropriate error. */ static int link_opipe_prep(struct pipe_inode_info *pipe, unsigned int flags) { int ret; /* * Check ->nrbufs without the inode lock first. This function * is speculative anyways, so missing one is ok. */ if (pipe->nrbufs < PIPE_BUFFERS) return 0; ret = 0; |
61e0d47c3
|
1489 |
pipe_lock(pipe); |
aadd06e5c
|
1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 |
while (pipe->nrbufs >= PIPE_BUFFERS) { if (!pipe->readers) { send_sig(SIGPIPE, current, 0); ret = -EPIPE; break; } if (flags & SPLICE_F_NONBLOCK) { ret = -EAGAIN; break; } if (signal_pending(current)) { ret = -ERESTARTSYS; break; } pipe->waiting_writers++; pipe_wait(pipe); pipe->waiting_writers--; } |
61e0d47c3
|
1509 |
pipe_unlock(pipe); |
aadd06e5c
|
1510 1511 1512 1513 |
return ret; } /* |
70524490e
|
1514 1515 1516 1517 1518 1519 1520 |
* Link contents of ipipe to opipe. */ static int link_pipe(struct pipe_inode_info *ipipe, struct pipe_inode_info *opipe, size_t len, unsigned int flags) { struct pipe_buffer *ibuf, *obuf; |
aadd06e5c
|
1521 |
int ret = 0, i = 0, nbuf; |
70524490e
|
1522 1523 1524 |
/* * Potential ABBA deadlock, work around it by ordering lock |
61e0d47c3
|
1525 |
* grabbing by pipe info address. Otherwise two different processes |
70524490e
|
1526 1527 |
* could deadlock (one doing tee from A -> B, the other from B -> A). */ |
61e0d47c3
|
1528 |
pipe_double_lock(ipipe, opipe); |
70524490e
|
1529 |
|
aadd06e5c
|
1530 |
do { |
70524490e
|
1531 1532 1533 1534 1535 1536 |
if (!opipe->readers) { send_sig(SIGPIPE, current, 0); if (!ret) ret = -EPIPE; break; } |
70524490e
|
1537 |
|
aadd06e5c
|
1538 1539 1540 1541 1542 1543 |
/* * If we have iterated all input buffers or ran out of * output room, break. */ if (i >= ipipe->nrbufs || opipe->nrbufs >= PIPE_BUFFERS) break; |
70524490e
|
1544 |
|
aadd06e5c
|
1545 1546 |
ibuf = ipipe->bufs + ((ipipe->curbuf + i) & (PIPE_BUFFERS - 1)); nbuf = (opipe->curbuf + opipe->nrbufs) & (PIPE_BUFFERS - 1); |
70524490e
|
1547 1548 |
/* |
aadd06e5c
|
1549 1550 |
* Get a reference to this pipe buffer, * so we can copy the contents over. |
70524490e
|
1551 |
*/ |
aadd06e5c
|
1552 1553 1554 1555 |
ibuf->ops->get(ipipe, ibuf); obuf = opipe->bufs + nbuf; *obuf = *ibuf; |
2a27250e6
|
1556 |
/* |
aadd06e5c
|
1557 1558 |
* Don't inherit the gift flag, we need to * prevent multiple steals of this page. |
2a27250e6
|
1559 |
*/ |
aadd06e5c
|
1560 |
obuf->flags &= ~PIPE_BUF_FLAG_GIFT; |
70524490e
|
1561 |
|
aadd06e5c
|
1562 1563 |
if (obuf->len > len) obuf->len = len; |
70524490e
|
1564 |
|
aadd06e5c
|
1565 1566 1567 1568 1569 |
opipe->nrbufs++; ret += obuf->len; len -= obuf->len; i++; } while (len); |
70524490e
|
1570 |
|
02cf01aea
|
1571 1572 1573 1574 1575 1576 |
/* * return EAGAIN if we have the potential of some data in the * future, otherwise just return 0 */ if (!ret && ipipe->waiting_writers && (flags & SPLICE_F_NONBLOCK)) ret = -EAGAIN; |
61e0d47c3
|
1577 1578 |
pipe_unlock(ipipe); pipe_unlock(opipe); |
70524490e
|
1579 |
|
aadd06e5c
|
1580 1581 1582 1583 |
/* * If we put data in the output pipe, wakeup any potential readers. */ if (ret > 0) { |
70524490e
|
1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 |
smp_mb(); if (waitqueue_active(&opipe->wait)) wake_up_interruptible(&opipe->wait); kill_fasync(&opipe->fasync_readers, SIGIO, POLL_IN); } return ret; } /* * This is a tee(1) implementation that works on pipes. It doesn't copy * any data, it simply references the 'in' pages on the 'out' pipe. * The 'flags' used are the SPLICE_F_* variants, currently the only * applicable one is SPLICE_F_NONBLOCK. */ static long do_tee(struct file *in, struct file *out, size_t len, unsigned int flags) { |
0f7fc9e4d
|
1602 1603 |
struct pipe_inode_info *ipipe = pipe_info(in->f_path.dentry->d_inode); struct pipe_inode_info *opipe = pipe_info(out->f_path.dentry->d_inode); |
aadd06e5c
|
1604 |
int ret = -EINVAL; |
70524490e
|
1605 1606 |
/* |
aadd06e5c
|
1607 1608 |
* Duplicate the contents of ipipe to opipe without actually * copying the data. |
70524490e
|
1609 |
*/ |
aadd06e5c
|
1610 1611 1612 1613 1614 1615 1616 1617 |
if (ipipe && opipe && ipipe != opipe) { /* * Keep going, unless we encounter an error. The ipipe/opipe * ordering doesn't really matter. */ ret = link_ipipe_prep(ipipe, flags); if (!ret) { ret = link_opipe_prep(opipe, flags); |
02cf01aea
|
1618 |
if (!ret) |
aadd06e5c
|
1619 |
ret = link_pipe(ipipe, opipe, len, flags); |
aadd06e5c
|
1620 1621 |
} } |
70524490e
|
1622 |
|
aadd06e5c
|
1623 |
return ret; |
70524490e
|
1624 |
} |
836f92adf
|
1625 |
SYSCALL_DEFINE4(tee, int, fdin, int, fdout, size_t, len, unsigned int, flags) |
70524490e
|
1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 |
{ struct file *in; int error, fput_in; if (unlikely(!len)) return 0; error = -EBADF; in = fget_light(fdin, &fput_in); if (in) { if (in->f_mode & FMODE_READ) { int fput_out; struct file *out = fget_light(fdout, &fput_out); if (out) { if (out->f_mode & FMODE_WRITE) error = do_tee(in, out, len, flags); fput_light(out, fput_out); } } fput_light(in, fput_in); } return error; } |