Blame view

fs/splice.c 34.4 KB
5274f052e   Jens Axboe   [PATCH] Introduce...
1
2
3
4
5
6
7
8
9
10
11
  /*
   * "splice": joining two ropes together by interweaving their strands.
   *
   * This is the "extended pipe" functionality, where a pipe is used as
   * an arbitrary in-memory buffer. Think of a pipe as a small kernel
   * buffer that you can use to transfer data from one end to the other.
   *
   * The traditional unix read/write is extended with a "splice()" operation
   * that transfers data buffers to or from a pipe buffer.
   *
   * Named by Larry McVoy, original implementation from Linus, extended by
c2058e061   Jens Axboe   [PATCH] splice: a...
12
13
   * Jens to support splicing to files, network, direct splicing, etc and
   * fixing lots of bugs.
5274f052e   Jens Axboe   [PATCH] Introduce...
14
   *
0fe234795   Jens Axboe   [PATCH] Update ax...
15
   * Copyright (C) 2005-2006 Jens Axboe <axboe@kernel.dk>
c2058e061   Jens Axboe   [PATCH] splice: a...
16
17
   * Copyright (C) 2005-2006 Linus Torvalds <torvalds@osdl.org>
   * Copyright (C) 2006 Ingo Molnar <mingo@elte.hu>
5274f052e   Jens Axboe   [PATCH] Introduce...
18
19
20
21
22
23
24
   *
   */
  #include <linux/fs.h>
  #include <linux/file.h>
  #include <linux/pagemap.h>
  #include <linux/pipe_fs_i.h>
  #include <linux/mm_inline.h>
5abc97aa2   Jens Axboe   [PATCH] splice: a...
25
  #include <linux/swap.h>
4f6f0bd2f   Jens Axboe   [PATCH] splice: i...
26
27
  #include <linux/writeback.h>
  #include <linux/buffer_head.h>
a0f067802   Jeff Garzik   [PATCH] splice ex...
28
  #include <linux/module.h>
4f6f0bd2f   Jens Axboe   [PATCH] splice: i...
29
  #include <linux/syscalls.h>
912d35f86   Jens Axboe   [PATCH] Add suppo...
30
  #include <linux/uio.h>
5274f052e   Jens Axboe   [PATCH] Introduce...
31

912d35f86   Jens Axboe   [PATCH] Add suppo...
32
33
34
35
36
37
  struct partial_page {
  	unsigned int offset;
  	unsigned int len;
  };
  
  /*
00522fb41   Jens Axboe   [PATCH] splice: r...
38
   * Passed to splice_to_pipe
912d35f86   Jens Axboe   [PATCH] Add suppo...
39
40
41
42
43
44
45
46
   */
  struct splice_pipe_desc {
  	struct page **pages;		/* page map */
  	struct partial_page *partial;	/* pages[] may not be contig */
  	int nr_pages;			/* number of pages in map */
  	unsigned int flags;		/* splice flags */
  	struct pipe_buf_operations *ops;/* ops associated with output pipe */
  };
83f9135bd   Jens Axboe   [PATCH] splice: a...
47
48
49
50
51
52
  /*
   * Attempt to steal a page from a pipe buffer. This should perhaps go into
   * a vm helper function, it's already simplified quite a bit by the
   * addition of remove_mapping(). If success is returned, the caller may
   * attempt to reuse this page for another destination.
   */
76ad4d111   Jens Axboe   [PATCH] splice: r...
53
  static int page_cache_pipe_buf_steal(struct pipe_inode_info *pipe,
5abc97aa2   Jens Axboe   [PATCH] splice: a...
54
55
56
  				     struct pipe_buffer *buf)
  {
  	struct page *page = buf->page;
9e94cd4fd   Jens Axboe   [PATCH] splice: r...
57
  	struct address_space *mapping;
5abc97aa2   Jens Axboe   [PATCH] splice: a...
58

9e0267c26   Jens Axboe   [PATCH] splice: f...
59
  	lock_page(page);
9e94cd4fd   Jens Axboe   [PATCH] splice: r...
60
61
62
  	mapping = page_mapping(page);
  	if (mapping) {
  		WARN_ON(!PageUptodate(page));
5abc97aa2   Jens Axboe   [PATCH] splice: a...
63

9e94cd4fd   Jens Axboe   [PATCH] splice: r...
64
65
66
67
68
69
70
71
72
  		/*
  		 * At least for ext2 with nobh option, we need to wait on
  		 * writeback completing on this page, since we'll remove it
  		 * from the pagecache.  Otherwise truncate wont wait on the
  		 * page, allowing the disk blocks to be reused by someone else
  		 * before we actually wrote our data to them. fs corruption
  		 * ensues.
  		 */
  		wait_on_page_writeback(page);
ad8d6f0a7   Jens Axboe   [PATCH] splice: p...
73

9e94cd4fd   Jens Axboe   [PATCH] splice: r...
74
75
  		if (PagePrivate(page))
  			try_to_release_page(page, mapping_gfp_mask(mapping));
4f6f0bd2f   Jens Axboe   [PATCH] splice: i...
76

9e94cd4fd   Jens Axboe   [PATCH] splice: r...
77
78
79
80
81
82
83
84
  		/*
  		 * If we succeeded in removing the mapping, set LRU flag
  		 * and return good.
  		 */
  		if (remove_mapping(mapping, page)) {
  			buf->flags |= PIPE_BUF_FLAG_LRU;
  			return 0;
  		}
9e0267c26   Jens Axboe   [PATCH] splice: f...
85
  	}
5abc97aa2   Jens Axboe   [PATCH] splice: a...
86

9e94cd4fd   Jens Axboe   [PATCH] splice: r...
87
88
89
90
91
92
  	/*
  	 * Raced with truncate or failed to remove page from current
  	 * address space, unlock and return failure.
  	 */
  	unlock_page(page);
  	return 1;
5abc97aa2   Jens Axboe   [PATCH] splice: a...
93
  }
76ad4d111   Jens Axboe   [PATCH] splice: r...
94
  static void page_cache_pipe_buf_release(struct pipe_inode_info *pipe,
5274f052e   Jens Axboe   [PATCH] Introduce...
95
96
97
  					struct pipe_buffer *buf)
  {
  	page_cache_release(buf->page);
1432873af   Jens Axboe   [PATCH] splice: L...
98
  	buf->flags &= ~PIPE_BUF_FLAG_LRU;
5274f052e   Jens Axboe   [PATCH] Introduce...
99
  }
76ad4d111   Jens Axboe   [PATCH] splice: r...
100
  static int page_cache_pipe_buf_pin(struct pipe_inode_info *pipe,
f84d75199   Jens Axboe   [PATCH] pipe: int...
101
  				   struct pipe_buffer *buf)
5274f052e   Jens Axboe   [PATCH] Introduce...
102
103
  {
  	struct page *page = buf->page;
49d0b21be   Jens Axboe   [PATCH] splice: o...
104
  	int err;
5274f052e   Jens Axboe   [PATCH] Introduce...
105
106
  
  	if (!PageUptodate(page)) {
49d0b21be   Jens Axboe   [PATCH] splice: o...
107
108
109
110
  		lock_page(page);
  
  		/*
  		 * Page got truncated/unhashed. This will cause a 0-byte
73d62d83e   Ingo Molnar   [PATCH] splice: c...
111
  		 * splice, if this is the first page.
49d0b21be   Jens Axboe   [PATCH] splice: o...
112
113
114
115
116
  		 */
  		if (!page->mapping) {
  			err = -ENODATA;
  			goto error;
  		}
5274f052e   Jens Axboe   [PATCH] Introduce...
117

49d0b21be   Jens Axboe   [PATCH] splice: o...
118
  		/*
73d62d83e   Ingo Molnar   [PATCH] splice: c...
119
  		 * Uh oh, read-error from disk.
49d0b21be   Jens Axboe   [PATCH] splice: o...
120
121
122
123
124
125
126
  		 */
  		if (!PageUptodate(page)) {
  			err = -EIO;
  			goto error;
  		}
  
  		/*
f84d75199   Jens Axboe   [PATCH] pipe: int...
127
  		 * Page is ok afterall, we are done.
49d0b21be   Jens Axboe   [PATCH] splice: o...
128
  		 */
5274f052e   Jens Axboe   [PATCH] Introduce...
129
  		unlock_page(page);
5274f052e   Jens Axboe   [PATCH] Introduce...
130
  	}
f84d75199   Jens Axboe   [PATCH] pipe: int...
131
  	return 0;
49d0b21be   Jens Axboe   [PATCH] splice: o...
132
133
  error:
  	unlock_page(page);
f84d75199   Jens Axboe   [PATCH] pipe: int...
134
  	return err;
70524490e   Jens Axboe   [PATCH] splice: a...
135
  }
5274f052e   Jens Axboe   [PATCH] Introduce...
136
137
  static struct pipe_buf_operations page_cache_pipe_buf_ops = {
  	.can_merge = 0,
f84d75199   Jens Axboe   [PATCH] pipe: int...
138
139
140
  	.map = generic_pipe_buf_map,
  	.unmap = generic_pipe_buf_unmap,
  	.pin = page_cache_pipe_buf_pin,
5274f052e   Jens Axboe   [PATCH] Introduce...
141
  	.release = page_cache_pipe_buf_release,
5abc97aa2   Jens Axboe   [PATCH] splice: a...
142
  	.steal = page_cache_pipe_buf_steal,
f84d75199   Jens Axboe   [PATCH] pipe: int...
143
  	.get = generic_pipe_buf_get,
5274f052e   Jens Axboe   [PATCH] Introduce...
144
  };
912d35f86   Jens Axboe   [PATCH] Add suppo...
145
146
147
  static int user_page_pipe_buf_steal(struct pipe_inode_info *pipe,
  				    struct pipe_buffer *buf)
  {
7afa6fd03   Jens Axboe   [PATCH] vmsplice:...
148
149
  	if (!(buf->flags & PIPE_BUF_FLAG_GIFT))
  		return 1;
1432873af   Jens Axboe   [PATCH] splice: L...
150
  	buf->flags |= PIPE_BUF_FLAG_LRU;
330ab7161   Jens Axboe   [PATCH] vmsplice:...
151
  	return generic_pipe_buf_steal(pipe, buf);
912d35f86   Jens Axboe   [PATCH] Add suppo...
152
153
154
155
  }
  
  static struct pipe_buf_operations user_page_pipe_buf_ops = {
  	.can_merge = 0,
f84d75199   Jens Axboe   [PATCH] pipe: int...
156
157
158
  	.map = generic_pipe_buf_map,
  	.unmap = generic_pipe_buf_unmap,
  	.pin = generic_pipe_buf_pin,
912d35f86   Jens Axboe   [PATCH] Add suppo...
159
160
  	.release = page_cache_pipe_buf_release,
  	.steal = user_page_pipe_buf_steal,
f84d75199   Jens Axboe   [PATCH] pipe: int...
161
  	.get = generic_pipe_buf_get,
912d35f86   Jens Axboe   [PATCH] Add suppo...
162
  };
83f9135bd   Jens Axboe   [PATCH] splice: a...
163
164
165
166
  /*
   * Pipe output worker. This sets up our pipe format with the page cache
   * pipe buffer operations. Otherwise very similar to the regular pipe_writev().
   */
00522fb41   Jens Axboe   [PATCH] splice: r...
167
168
  static ssize_t splice_to_pipe(struct pipe_inode_info *pipe,
  			      struct splice_pipe_desc *spd)
5274f052e   Jens Axboe   [PATCH] Introduce...
169
  {
912d35f86   Jens Axboe   [PATCH] Add suppo...
170
  	int ret, do_wakeup, page_nr;
5274f052e   Jens Axboe   [PATCH] Introduce...
171
172
173
  
  	ret = 0;
  	do_wakeup = 0;
912d35f86   Jens Axboe   [PATCH] Add suppo...
174
  	page_nr = 0;
5274f052e   Jens Axboe   [PATCH] Introduce...
175

3a326a2ce   Ingo Molnar   [PATCH] introduce...
176
177
  	if (pipe->inode)
  		mutex_lock(&pipe->inode->i_mutex);
5274f052e   Jens Axboe   [PATCH] Introduce...
178

5274f052e   Jens Axboe   [PATCH] Introduce...
179
  	for (;;) {
3a326a2ce   Ingo Molnar   [PATCH] introduce...
180
  		if (!pipe->readers) {
5274f052e   Jens Axboe   [PATCH] Introduce...
181
182
183
184
185
  			send_sig(SIGPIPE, current, 0);
  			if (!ret)
  				ret = -EPIPE;
  			break;
  		}
6f767b042   Jens Axboe   [PATCH] splice: s...
186
187
  		if (pipe->nrbufs < PIPE_BUFFERS) {
  			int newbuf = (pipe->curbuf + pipe->nrbufs) & (PIPE_BUFFERS - 1);
3a326a2ce   Ingo Molnar   [PATCH] introduce...
188
  			struct pipe_buffer *buf = pipe->bufs + newbuf;
5274f052e   Jens Axboe   [PATCH] Introduce...
189

912d35f86   Jens Axboe   [PATCH] Add suppo...
190
191
192
193
  			buf->page = spd->pages[page_nr];
  			buf->offset = spd->partial[page_nr].offset;
  			buf->len = spd->partial[page_nr].len;
  			buf->ops = spd->ops;
7afa6fd03   Jens Axboe   [PATCH] vmsplice:...
194
195
  			if (spd->flags & SPLICE_F_GIFT)
  				buf->flags |= PIPE_BUF_FLAG_GIFT;
6f767b042   Jens Axboe   [PATCH] splice: s...
196
  			pipe->nrbufs++;
912d35f86   Jens Axboe   [PATCH] Add suppo...
197
198
  			page_nr++;
  			ret += buf->len;
6f767b042   Jens Axboe   [PATCH] splice: s...
199
200
  			if (pipe->inode)
  				do_wakeup = 1;
5274f052e   Jens Axboe   [PATCH] Introduce...
201

912d35f86   Jens Axboe   [PATCH] Add suppo...
202
  			if (!--spd->nr_pages)
5274f052e   Jens Axboe   [PATCH] Introduce...
203
  				break;
6f767b042   Jens Axboe   [PATCH] splice: s...
204
  			if (pipe->nrbufs < PIPE_BUFFERS)
5274f052e   Jens Axboe   [PATCH] Introduce...
205
206
207
208
  				continue;
  
  			break;
  		}
912d35f86   Jens Axboe   [PATCH] Add suppo...
209
  		if (spd->flags & SPLICE_F_NONBLOCK) {
29e350944   Linus Torvalds   splice: add SPLIC...
210
211
212
213
  			if (!ret)
  				ret = -EAGAIN;
  			break;
  		}
5274f052e   Jens Axboe   [PATCH] Introduce...
214
215
216
217
218
219
220
  		if (signal_pending(current)) {
  			if (!ret)
  				ret = -ERESTARTSYS;
  			break;
  		}
  
  		if (do_wakeup) {
c0bd1f650   Jens Axboe   [PATCH] splice: o...
221
  			smp_mb();
3a326a2ce   Ingo Molnar   [PATCH] introduce...
222
223
224
  			if (waitqueue_active(&pipe->wait))
  				wake_up_interruptible_sync(&pipe->wait);
  			kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
5274f052e   Jens Axboe   [PATCH] Introduce...
225
226
  			do_wakeup = 0;
  		}
3a326a2ce   Ingo Molnar   [PATCH] introduce...
227
228
229
  		pipe->waiting_writers++;
  		pipe_wait(pipe);
  		pipe->waiting_writers--;
5274f052e   Jens Axboe   [PATCH] Introduce...
230
  	}
3a326a2ce   Ingo Molnar   [PATCH] introduce...
231
232
  	if (pipe->inode)
  		mutex_unlock(&pipe->inode->i_mutex);
5274f052e   Jens Axboe   [PATCH] Introduce...
233
234
  
  	if (do_wakeup) {
c0bd1f650   Jens Axboe   [PATCH] splice: o...
235
  		smp_mb();
3a326a2ce   Ingo Molnar   [PATCH] introduce...
236
237
238
  		if (waitqueue_active(&pipe->wait))
  			wake_up_interruptible(&pipe->wait);
  		kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
5274f052e   Jens Axboe   [PATCH] Introduce...
239
  	}
912d35f86   Jens Axboe   [PATCH] Add suppo...
240
241
  	while (page_nr < spd->nr_pages)
  		page_cache_release(spd->pages[page_nr++]);
5274f052e   Jens Axboe   [PATCH] Introduce...
242
243
244
  
  	return ret;
  }
3a326a2ce   Ingo Molnar   [PATCH] introduce...
245
  static int
cbb7e577e   Jens Axboe   [PATCH] splice: p...
246
247
248
  __generic_file_splice_read(struct file *in, loff_t *ppos,
  			   struct pipe_inode_info *pipe, size_t len,
  			   unsigned int flags)
5274f052e   Jens Axboe   [PATCH] Introduce...
249
250
  {
  	struct address_space *mapping = in->f_mapping;
912d35f86   Jens Axboe   [PATCH] Add suppo...
251
  	unsigned int loff, nr_pages;
16c523dda   Jens Axboe   [PATCH] splice: c...
252
  	struct page *pages[PIPE_BUFFERS];
912d35f86   Jens Axboe   [PATCH] Add suppo...
253
  	struct partial_page partial[PIPE_BUFFERS];
5274f052e   Jens Axboe   [PATCH] Introduce...
254
  	struct page *page;
91ad66ef4   Jens Axboe   [PATCH] splice: c...
255
256
  	pgoff_t index, end_index;
  	loff_t isize;
912d35f86   Jens Axboe   [PATCH] Add suppo...
257
  	size_t total_len;
eb20796bf   Jens Axboe   [PATCH] splice: m...
258
  	int error, page_nr;
912d35f86   Jens Axboe   [PATCH] Add suppo...
259
260
261
262
263
264
  	struct splice_pipe_desc spd = {
  		.pages = pages,
  		.partial = partial,
  		.flags = flags,
  		.ops = &page_cache_pipe_buf_ops,
  	};
5274f052e   Jens Axboe   [PATCH] Introduce...
265

cbb7e577e   Jens Axboe   [PATCH] splice: p...
266
  	index = *ppos >> PAGE_CACHE_SHIFT;
912d35f86   Jens Axboe   [PATCH] Add suppo...
267
268
  	loff = *ppos & ~PAGE_CACHE_MASK;
  	nr_pages = (len + loff + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
5274f052e   Jens Axboe   [PATCH] Introduce...
269
270
271
272
273
  
  	if (nr_pages > PIPE_BUFFERS)
  		nr_pages = PIPE_BUFFERS;
  
  	/*
73d62d83e   Ingo Molnar   [PATCH] splice: c...
274
  	 * Initiate read-ahead on this page range. however, don't call into
0b749ce38   Jens Axboe   [PATCH] splice: b...
275
276
  	 * read-ahead if this is a non-zero offset (we are likely doing small
  	 * chunk splice and the page is already there) for a single page.
5274f052e   Jens Axboe   [PATCH] Introduce...
277
  	 */
eb645a24d   Jens Axboe   [PATCH] splice: s...
278
279
  	if (!loff || nr_pages > 1)
  		page_cache_readahead(mapping, &in->f_ra, in, index, nr_pages);
5274f052e   Jens Axboe   [PATCH] Introduce...
280
281
  
  	/*
73d62d83e   Ingo Molnar   [PATCH] splice: c...
282
  	 * Now fill in the holes:
5274f052e   Jens Axboe   [PATCH] Introduce...
283
  	 */
7480a9043   Jens Axboe   [PATCH] splice: s...
284
  	error = 0;
912d35f86   Jens Axboe   [PATCH] Add suppo...
285
  	total_len = 0;
82aa5d618   Jens Axboe   [PATCH] splice: f...
286

eb20796bf   Jens Axboe   [PATCH] splice: m...
287
288
289
290
  	/*
  	 * Lookup the (hopefully) full range of pages we need.
  	 */
  	spd.nr_pages = find_get_pages_contig(mapping, index, nr_pages, pages);
82aa5d618   Jens Axboe   [PATCH] splice: f...
291

eb20796bf   Jens Axboe   [PATCH] splice: m...
292
293
294
295
296
297
  	/*
  	 * If find_get_pages_contig() returned fewer pages than we needed,
  	 * allocate the rest.
  	 */
  	index += spd.nr_pages;
  	while (spd.nr_pages < nr_pages) {
82aa5d618   Jens Axboe   [PATCH] splice: f...
298
  		/*
eb20796bf   Jens Axboe   [PATCH] splice: m...
299
300
  		 * Page could be there, find_get_pages_contig() breaks on
  		 * the first hole.
5274f052e   Jens Axboe   [PATCH] Introduce...
301
  		 */
7480a9043   Jens Axboe   [PATCH] splice: s...
302
303
304
  		page = find_get_page(mapping, index);
  		if (!page) {
  			/*
e27dedd84   Jens Axboe   [PATCH] splice: c...
305
306
307
308
309
310
  			 * Make sure the read-ahead engine is notified
  			 * about this failure.
  			 */
  			handle_ra_miss(mapping, &in->f_ra, index);
  
  			/*
eb20796bf   Jens Axboe   [PATCH] splice: m...
311
  			 * page didn't exist, allocate one.
7480a9043   Jens Axboe   [PATCH] splice: s...
312
313
314
315
316
317
  			 */
  			page = page_cache_alloc_cold(mapping);
  			if (!page)
  				break;
  
  			error = add_to_page_cache_lru(page, mapping, index,
eb20796bf   Jens Axboe   [PATCH] splice: m...
318
  					      mapping_gfp_mask(mapping));
7480a9043   Jens Axboe   [PATCH] splice: s...
319
320
  			if (unlikely(error)) {
  				page_cache_release(page);
a0548871e   Jens Axboe   [PATCH] splice: r...
321
322
  				if (error == -EEXIST)
  					continue;
7480a9043   Jens Axboe   [PATCH] splice: s...
323
324
  				break;
  			}
eb20796bf   Jens Axboe   [PATCH] splice: m...
325
326
327
328
329
  			/*
  			 * add_to_page_cache() locks the page, unlock it
  			 * to avoid convoluting the logic below even more.
  			 */
  			unlock_page(page);
7480a9043   Jens Axboe   [PATCH] splice: s...
330
  		}
eb20796bf   Jens Axboe   [PATCH] splice: m...
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
  		pages[spd.nr_pages++] = page;
  		index++;
  	}
  
  	/*
  	 * Now loop over the map and see if we need to start IO on any
  	 * pages, fill in the partial map, etc.
  	 */
  	index = *ppos >> PAGE_CACHE_SHIFT;
  	nr_pages = spd.nr_pages;
  	spd.nr_pages = 0;
  	for (page_nr = 0; page_nr < nr_pages; page_nr++) {
  		unsigned int this_len;
  
  		if (!len)
  			break;
  
  		/*
  		 * this_len is the max we'll use from this page
  		 */
  		this_len = min_t(unsigned long, len, PAGE_CACHE_SIZE - loff);
  		page = pages[page_nr];
7480a9043   Jens Axboe   [PATCH] splice: s...
353
354
355
356
  		/*
  		 * If the page isn't uptodate, we may need to start io on it
  		 */
  		if (!PageUptodate(page)) {
c4f895cbe   Jens Axboe   [PATCH] splice: c...
357
358
359
360
361
362
  			/*
  			 * If in nonblock mode then dont block on waiting
  			 * for an in-flight io page
  			 */
  			if (flags & SPLICE_F_NONBLOCK)
  				break;
7480a9043   Jens Axboe   [PATCH] splice: s...
363
364
365
366
367
368
369
370
371
  			lock_page(page);
  
  			/*
  			 * page was truncated, stop here. if this isn't the
  			 * first page, we'll just complete what we already
  			 * added
  			 */
  			if (!page->mapping) {
  				unlock_page(page);
7480a9043   Jens Axboe   [PATCH] splice: s...
372
373
374
375
376
377
378
379
380
  				break;
  			}
  			/*
  			 * page was already under io and is now done, great
  			 */
  			if (PageUptodate(page)) {
  				unlock_page(page);
  				goto fill_it;
  			}
5274f052e   Jens Axboe   [PATCH] Introduce...
381

7480a9043   Jens Axboe   [PATCH] splice: s...
382
383
384
385
  			/*
  			 * need to read in the page
  			 */
  			error = mapping->a_ops->readpage(in, page);
5274f052e   Jens Axboe   [PATCH] Introduce...
386
  			if (unlikely(error)) {
eb20796bf   Jens Axboe   [PATCH] splice: m...
387
388
389
390
391
392
  				/*
  				 * We really should re-lookup the page here,
  				 * but it complicates things a lot. Instead
  				 * lets just do what we already stored, and
  				 * we'll get it the next time we are called.
  				 */
7480a9043   Jens Axboe   [PATCH] splice: s...
393
  				if (error == AOP_TRUNCATED_PAGE)
eb20796bf   Jens Axboe   [PATCH] splice: m...
394
  					error = 0;
5274f052e   Jens Axboe   [PATCH] Introduce...
395
396
  				break;
  			}
91ad66ef4   Jens Axboe   [PATCH] splice: c...
397
398
399
400
401
402
  
  			/*
  			 * i_size must be checked after ->readpage().
  			 */
  			isize = i_size_read(mapping->host);
  			end_index = (isize - 1) >> PAGE_CACHE_SHIFT;
eb20796bf   Jens Axboe   [PATCH] splice: m...
403
  			if (unlikely(!isize || index > end_index))
91ad66ef4   Jens Axboe   [PATCH] splice: c...
404
  				break;
91ad66ef4   Jens Axboe   [PATCH] splice: c...
405
406
407
408
409
410
411
  
  			/*
  			 * if this is the last page, see if we need to shrink
  			 * the length and stop
  			 */
  			if (end_index == index) {
  				loff = PAGE_CACHE_SIZE - (isize & ~PAGE_CACHE_MASK);
eb20796bf   Jens Axboe   [PATCH] splice: m...
412
  				if (total_len + loff > isize)
91ad66ef4   Jens Axboe   [PATCH] splice: c...
413
  					break;
91ad66ef4   Jens Axboe   [PATCH] splice: c...
414
415
416
  				/*
  				 * force quit after adding this page
  				 */
eb20796bf   Jens Axboe   [PATCH] splice: m...
417
  				len = this_len;
82aa5d618   Jens Axboe   [PATCH] splice: f...
418
  				this_len = min(this_len, loff);
912d35f86   Jens Axboe   [PATCH] Add suppo...
419
  				loff = 0;
91ad66ef4   Jens Axboe   [PATCH] splice: c...
420
  			}
5274f052e   Jens Axboe   [PATCH] Introduce...
421
  		}
7480a9043   Jens Axboe   [PATCH] splice: s...
422
  fill_it:
eb20796bf   Jens Axboe   [PATCH] splice: m...
423
424
  		partial[page_nr].offset = loff;
  		partial[page_nr].len = this_len;
82aa5d618   Jens Axboe   [PATCH] splice: f...
425
  		len -= this_len;
912d35f86   Jens Axboe   [PATCH] Add suppo...
426
  		total_len += this_len;
91ad66ef4   Jens Axboe   [PATCH] splice: c...
427
  		loff = 0;
eb20796bf   Jens Axboe   [PATCH] splice: m...
428
429
  		spd.nr_pages++;
  		index++;
5274f052e   Jens Axboe   [PATCH] Introduce...
430
  	}
eb20796bf   Jens Axboe   [PATCH] splice: m...
431
432
433
434
435
436
  	/*
  	 * Release any pages at the end, if we quit early. 'i' is how far
  	 * we got, 'nr_pages' is how many pages are in the map.
  	 */
  	while (page_nr < nr_pages)
  		page_cache_release(pages[page_nr++]);
912d35f86   Jens Axboe   [PATCH] Add suppo...
437
  	if (spd.nr_pages)
00522fb41   Jens Axboe   [PATCH] splice: r...
438
  		return splice_to_pipe(pipe, &spd);
5274f052e   Jens Axboe   [PATCH] Introduce...
439

7480a9043   Jens Axboe   [PATCH] splice: s...
440
  	return error;
5274f052e   Jens Axboe   [PATCH] Introduce...
441
  }
83f9135bd   Jens Axboe   [PATCH] splice: a...
442
443
444
445
446
447
448
449
  /**
   * generic_file_splice_read - splice data from file to a pipe
   * @in:		file to splice from
   * @pipe:	pipe to splice to
   * @len:	number of bytes to splice
   * @flags:	splice modifier flags
   *
   * Will read pages from given file and fill them into a pipe.
83f9135bd   Jens Axboe   [PATCH] splice: a...
450
   */
cbb7e577e   Jens Axboe   [PATCH] splice: p...
451
452
453
  ssize_t generic_file_splice_read(struct file *in, loff_t *ppos,
  				 struct pipe_inode_info *pipe, size_t len,
  				 unsigned int flags)
5274f052e   Jens Axboe   [PATCH] Introduce...
454
455
456
457
458
459
  {
  	ssize_t spliced;
  	int ret;
  
  	ret = 0;
  	spliced = 0;
3a326a2ce   Ingo Molnar   [PATCH] introduce...
460

5274f052e   Jens Axboe   [PATCH] Introduce...
461
  	while (len) {
cbb7e577e   Jens Axboe   [PATCH] splice: p...
462
  		ret = __generic_file_splice_read(in, ppos, pipe, len, flags);
5274f052e   Jens Axboe   [PATCH] Introduce...
463

c4f895cbe   Jens Axboe   [PATCH] splice: c...
464
  		if (ret < 0)
5274f052e   Jens Axboe   [PATCH] Introduce...
465
  			break;
c4f895cbe   Jens Axboe   [PATCH] splice: c...
466
467
468
469
470
471
472
473
  		else if (!ret) {
  			if (spliced)
  				break;
  			if (flags & SPLICE_F_NONBLOCK) {
  				ret = -EAGAIN;
  				break;
  			}
  		}
5274f052e   Jens Axboe   [PATCH] Introduce...
474

cbb7e577e   Jens Axboe   [PATCH] splice: p...
475
  		*ppos += ret;
5274f052e   Jens Axboe   [PATCH] Introduce...
476
477
478
479
480
481
482
483
484
  		len -= ret;
  		spliced += ret;
  	}
  
  	if (spliced)
  		return spliced;
  
  	return ret;
  }
059a8f373   Jens Axboe   [PATCH] splice: e...
485
  EXPORT_SYMBOL(generic_file_splice_read);
5274f052e   Jens Axboe   [PATCH] Introduce...
486
  /*
4f6f0bd2f   Jens Axboe   [PATCH] splice: i...
487
   * Send 'sd->len' bytes to socket from 'sd->file' at position 'sd->pos'
016b661e2   Jens Axboe   [PATCH] splice: f...
488
   * using sendpage(). Return the number of bytes sent.
5274f052e   Jens Axboe   [PATCH] Introduce...
489
   */
76ad4d111   Jens Axboe   [PATCH] splice: r...
490
  static int pipe_to_sendpage(struct pipe_inode_info *pipe,
5274f052e   Jens Axboe   [PATCH] Introduce...
491
492
493
494
  			    struct pipe_buffer *buf, struct splice_desc *sd)
  {
  	struct file *file = sd->file;
  	loff_t pos = sd->pos;
f84d75199   Jens Axboe   [PATCH] pipe: int...
495
  	int ret, more;
5274f052e   Jens Axboe   [PATCH] Introduce...
496

76ad4d111   Jens Axboe   [PATCH] splice: r...
497
  	ret = buf->ops->pin(pipe, buf);
f84d75199   Jens Axboe   [PATCH] pipe: int...
498
499
  	if (!ret) {
  		more = (sd->flags & SPLICE_F_MORE) || sd->len < sd->total_len;
5274f052e   Jens Axboe   [PATCH] Introduce...
500

f84d75199   Jens Axboe   [PATCH] pipe: int...
501
502
503
  		ret = file->f_op->sendpage(file, buf->page, buf->offset,
  					   sd->len, &pos, more);
  	}
5274f052e   Jens Axboe   [PATCH] Introduce...
504

016b661e2   Jens Axboe   [PATCH] splice: f...
505
  	return ret;
5274f052e   Jens Axboe   [PATCH] Introduce...
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
  }
  
  /*
   * This is a little more tricky than the file -> pipe splicing. There are
   * basically three cases:
   *
   *	- Destination page already exists in the address space and there
   *	  are users of it. For that case we have no other option that
   *	  copying the data. Tough luck.
   *	- Destination page already exists in the address space, but there
   *	  are no users of it. Make sure it's uptodate, then drop it. Fall
   *	  through to last case.
   *	- Destination page does not exist, we can add the pipe page to
   *	  the page cache and avoid the copy.
   *
83f9135bd   Jens Axboe   [PATCH] splice: a...
521
522
523
524
525
526
   * If asked to move pages to the output file (SPLICE_F_MOVE is set in
   * sd->flags), we attempt to migrate pages from the pipe to the output
   * file address space page cache. This is possible if no one else has
   * the pipe page referenced outside of the pipe and page cache. If
   * SPLICE_F_MOVE isn't set, or we cannot move the page, we simply create
   * a new page in the output file page cache and fill/dirty that.
5274f052e   Jens Axboe   [PATCH] Introduce...
527
   */
76ad4d111   Jens Axboe   [PATCH] splice: r...
528
  static int pipe_to_file(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
5274f052e   Jens Axboe   [PATCH] Introduce...
529
530
531
532
  			struct splice_desc *sd)
  {
  	struct file *file = sd->file;
  	struct address_space *mapping = file->f_mapping;
3e7ee3e7b   Jens Axboe   [PATCH] splice: f...
533
  	gfp_t gfp_mask = mapping_gfp_mask(mapping);
016b661e2   Jens Axboe   [PATCH] splice: f...
534
  	unsigned int offset, this_len;
5274f052e   Jens Axboe   [PATCH] Introduce...
535
  	struct page *page;
5274f052e   Jens Axboe   [PATCH] Introduce...
536
  	pgoff_t index;
3e7ee3e7b   Jens Axboe   [PATCH] splice: f...
537
  	int ret;
5274f052e   Jens Axboe   [PATCH] Introduce...
538
539
  
  	/*
49d0b21be   Jens Axboe   [PATCH] splice: o...
540
  	 * make sure the data in this buffer is uptodate
5274f052e   Jens Axboe   [PATCH] Introduce...
541
  	 */
76ad4d111   Jens Axboe   [PATCH] splice: r...
542
  	ret = buf->ops->pin(pipe, buf);
f84d75199   Jens Axboe   [PATCH] pipe: int...
543
544
  	if (unlikely(ret))
  		return ret;
5274f052e   Jens Axboe   [PATCH] Introduce...
545
546
547
  
  	index = sd->pos >> PAGE_CACHE_SHIFT;
  	offset = sd->pos & ~PAGE_CACHE_MASK;
016b661e2   Jens Axboe   [PATCH] splice: f...
548
549
550
  	this_len = sd->len;
  	if (this_len + offset > PAGE_CACHE_SIZE)
  		this_len = PAGE_CACHE_SIZE - offset;
5274f052e   Jens Axboe   [PATCH] Introduce...
551
  	/*
0568b409c   Jens Axboe   [PATCH] splice: f...
552
553
  	 * Reuse buf page, if SPLICE_F_MOVE is set and we are doing a full
  	 * page.
5274f052e   Jens Axboe   [PATCH] Introduce...
554
  	 */
0568b409c   Jens Axboe   [PATCH] splice: f...
555
  	if ((sd->flags & SPLICE_F_MOVE) && this_len == PAGE_CACHE_SIZE) {
83f9135bd   Jens Axboe   [PATCH] splice: a...
556
  		/*
1432873af   Jens Axboe   [PATCH] splice: L...
557
558
559
  		 * If steal succeeds, buf->page is now pruned from the
  		 * pagecache and we can reuse it. The page will also be
  		 * locked on successful return.
83f9135bd   Jens Axboe   [PATCH] splice: a...
560
  		 */
76ad4d111   Jens Axboe   [PATCH] splice: r...
561
  		if (buf->ops->steal(pipe, buf))
5abc97aa2   Jens Axboe   [PATCH] splice: a...
562
563
564
  			goto find_page;
  
  		page = buf->page;
46e678c96   Jens Axboe   [PATCH] splice: f...
565
566
  		if (add_to_page_cache(page, mapping, index, gfp_mask)) {
  			unlock_page(page);
5abc97aa2   Jens Axboe   [PATCH] splice: a...
567
  			goto find_page;
46e678c96   Jens Axboe   [PATCH] splice: f...
568
  		}
1432873af   Jens Axboe   [PATCH] splice: L...
569
570
571
572
573
  
  		page_cache_get(page);
  
  		if (!(buf->flags & PIPE_BUF_FLAG_LRU))
  			lru_cache_add(page);
5abc97aa2   Jens Axboe   [PATCH] splice: a...
574
575
  	} else {
  find_page:
9e0267c26   Jens Axboe   [PATCH] splice: f...
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
  		page = find_lock_page(mapping, index);
  		if (!page) {
  			ret = -ENOMEM;
  			page = page_cache_alloc_cold(mapping);
  			if (unlikely(!page))
  				goto out_nomem;
  
  			/*
  			 * This will also lock the page
  			 */
  			ret = add_to_page_cache_lru(page, mapping, index,
  						    gfp_mask);
  			if (unlikely(ret))
  				goto out;
  		}
5abc97aa2   Jens Axboe   [PATCH] splice: a...
591
592
  
  		/*
9e0267c26   Jens Axboe   [PATCH] splice: f...
593
594
595
596
  		 * We get here with the page locked. If the page is also
  		 * uptodate, we don't need to do more. If it isn't, we
  		 * may need to bring it in if we are not going to overwrite
  		 * the full page.
5abc97aa2   Jens Axboe   [PATCH] splice: a...
597
598
  		 */
  		if (!PageUptodate(page)) {
016b661e2   Jens Axboe   [PATCH] splice: f...
599
  			if (this_len < PAGE_CACHE_SIZE) {
5abc97aa2   Jens Axboe   [PATCH] splice: a...
600
601
602
603
604
605
606
607
  				ret = mapping->a_ops->readpage(file, page);
  				if (unlikely(ret))
  					goto out;
  
  				lock_page(page);
  
  				if (!PageUptodate(page)) {
  					/*
73d62d83e   Ingo Molnar   [PATCH] splice: c...
608
  					 * Page got invalidated, repeat.
5abc97aa2   Jens Axboe   [PATCH] splice: a...
609
610
611
612
613
614
615
616
  					 */
  					if (!page->mapping) {
  						unlock_page(page);
  						page_cache_release(page);
  						goto find_page;
  					}
  					ret = -EIO;
  					goto out;
5274f052e   Jens Axboe   [PATCH] Introduce...
617
  				}
9e0267c26   Jens Axboe   [PATCH] splice: f...
618
  			} else
5abc97aa2   Jens Axboe   [PATCH] splice: a...
619
  				SetPageUptodate(page);
5274f052e   Jens Axboe   [PATCH] Introduce...
620
621
  		}
  	}
016b661e2   Jens Axboe   [PATCH] splice: f...
622
  	ret = mapping->a_ops->prepare_write(file, page, offset, offset+this_len);
bfc4ee39f   Jens Axboe   [PATCH] splice: f...
623
624
625
626
627
  	if (unlikely(ret)) {
  		loff_t isize = i_size_read(mapping->host);
  
  		if (ret != AOP_TRUNCATED_PAGE)
  			unlock_page(page);
4f6f0bd2f   Jens Axboe   [PATCH] splice: i...
628
  		page_cache_release(page);
bfc4ee39f   Jens Axboe   [PATCH] splice: f...
629
630
631
632
633
634
635
636
637
  		if (ret == AOP_TRUNCATED_PAGE)
  			goto find_page;
  
  		/*
  		 * prepare_write() may have instantiated a few blocks
  		 * outside i_size.  Trim these off again.
  		 */
  		if (sd->pos + this_len > isize)
  			vmtruncate(mapping->host, isize);
5274f052e   Jens Axboe   [PATCH] Introduce...
638
  		goto out;
bfc4ee39f   Jens Axboe   [PATCH] splice: f...
639
  	}
5274f052e   Jens Axboe   [PATCH] Introduce...
640

0568b409c   Jens Axboe   [PATCH] splice: f...
641
  	if (buf->page != page) {
f84d75199   Jens Axboe   [PATCH] pipe: int...
642
643
644
  		/*
  		 * Careful, ->map() uses KM_USER0!
  		 */
76ad4d111   Jens Axboe   [PATCH] splice: r...
645
  		char *src = buf->ops->map(pipe, buf, 1);
f84d75199   Jens Axboe   [PATCH] pipe: int...
646
  		char *dst = kmap_atomic(page, KM_USER1);
5abc97aa2   Jens Axboe   [PATCH] splice: a...
647

016b661e2   Jens Axboe   [PATCH] splice: f...
648
  		memcpy(dst + offset, src + buf->offset, this_len);
5abc97aa2   Jens Axboe   [PATCH] splice: a...
649
  		flush_dcache_page(page);
f84d75199   Jens Axboe   [PATCH] pipe: int...
650
  		kunmap_atomic(dst, KM_USER1);
76ad4d111   Jens Axboe   [PATCH] splice: r...
651
  		buf->ops->unmap(pipe, buf, src);
5abc97aa2   Jens Axboe   [PATCH] splice: a...
652
  	}
5274f052e   Jens Axboe   [PATCH] Introduce...
653

016b661e2   Jens Axboe   [PATCH] splice: f...
654
  	ret = mapping->a_ops->commit_write(file, page, offset, offset+this_len);
0568b409c   Jens Axboe   [PATCH] splice: f...
655
656
657
658
659
660
661
662
663
  	if (!ret) {
  		/*
  		 * Return the number of bytes written and mark page as
  		 * accessed, we are now done!
  		 */
  		ret = this_len;
  		mark_page_accessed(page);
  		balance_dirty_pages_ratelimited(mapping);
  	} else if (ret == AOP_TRUNCATED_PAGE) {
4f6f0bd2f   Jens Axboe   [PATCH] splice: i...
664
665
  		page_cache_release(page);
  		goto find_page;
0568b409c   Jens Axboe   [PATCH] splice: f...
666
  	}
5274f052e   Jens Axboe   [PATCH] Introduce...
667
  out:
0568b409c   Jens Axboe   [PATCH] splice: f...
668
  	page_cache_release(page);
9e0267c26   Jens Axboe   [PATCH] splice: f...
669
  	unlock_page(page);
9aefe431f   Dave Jones   [PATCH] splice: p...
670
  out_nomem:
5274f052e   Jens Axboe   [PATCH] Introduce...
671
672
  	return ret;
  }
83f9135bd   Jens Axboe   [PATCH] splice: a...
673
674
675
676
677
  /*
   * Pipe input worker. Most of this logic works like a regular pipe, the
   * key here is the 'actor' worker passed in that actually moves the data
   * to the wanted destination. See pipe_to_file/pipe_to_sendpage above.
   */
00522fb41   Jens Axboe   [PATCH] splice: r...
678
679
680
  ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out,
  			 loff_t *ppos, size_t len, unsigned int flags,
  			 splice_actor *actor)
5274f052e   Jens Axboe   [PATCH] Introduce...
681
  {
5274f052e   Jens Axboe   [PATCH] Introduce...
682
683
684
685
686
687
688
689
690
  	int ret, do_wakeup, err;
  	struct splice_desc sd;
  
  	ret = 0;
  	do_wakeup = 0;
  
  	sd.total_len = len;
  	sd.flags = flags;
  	sd.file = out;
cbb7e577e   Jens Axboe   [PATCH] splice: p...
691
  	sd.pos = *ppos;
5274f052e   Jens Axboe   [PATCH] Introduce...
692

3a326a2ce   Ingo Molnar   [PATCH] introduce...
693
694
  	if (pipe->inode)
  		mutex_lock(&pipe->inode->i_mutex);
5274f052e   Jens Axboe   [PATCH] Introduce...
695

5274f052e   Jens Axboe   [PATCH] Introduce...
696
  	for (;;) {
6f767b042   Jens Axboe   [PATCH] splice: s...
697
698
  		if (pipe->nrbufs) {
  			struct pipe_buffer *buf = pipe->bufs + pipe->curbuf;
5274f052e   Jens Axboe   [PATCH] Introduce...
699
700
701
702
703
  			struct pipe_buf_operations *ops = buf->ops;
  
  			sd.len = buf->len;
  			if (sd.len > sd.total_len)
  				sd.len = sd.total_len;
3a326a2ce   Ingo Molnar   [PATCH] introduce...
704
  			err = actor(pipe, buf, &sd);
016b661e2   Jens Axboe   [PATCH] splice: f...
705
  			if (err <= 0) {
5274f052e   Jens Axboe   [PATCH] Introduce...
706
707
708
709
710
  				if (!ret && err != -ENODATA)
  					ret = err;
  
  				break;
  			}
016b661e2   Jens Axboe   [PATCH] splice: f...
711
712
713
714
715
716
717
718
719
  			ret += err;
  			buf->offset += err;
  			buf->len -= err;
  
  			sd.len -= err;
  			sd.pos += err;
  			sd.total_len -= err;
  			if (sd.len)
  				continue;
73d62d83e   Ingo Molnar   [PATCH] splice: c...
720

5274f052e   Jens Axboe   [PATCH] Introduce...
721
722
  			if (!buf->len) {
  				buf->ops = NULL;
3a326a2ce   Ingo Molnar   [PATCH] introduce...
723
  				ops->release(pipe, buf);
6f767b042   Jens Axboe   [PATCH] splice: s...
724
725
726
727
  				pipe->curbuf = (pipe->curbuf + 1) & (PIPE_BUFFERS - 1);
  				pipe->nrbufs--;
  				if (pipe->inode)
  					do_wakeup = 1;
5274f052e   Jens Axboe   [PATCH] Introduce...
728
  			}
5274f052e   Jens Axboe   [PATCH] Introduce...
729
730
731
  			if (!sd.total_len)
  				break;
  		}
6f767b042   Jens Axboe   [PATCH] splice: s...
732
  		if (pipe->nrbufs)
5274f052e   Jens Axboe   [PATCH] Introduce...
733
  			continue;
3a326a2ce   Ingo Molnar   [PATCH] introduce...
734
  		if (!pipe->writers)
5274f052e   Jens Axboe   [PATCH] Introduce...
735
  			break;
3a326a2ce   Ingo Molnar   [PATCH] introduce...
736
  		if (!pipe->waiting_writers) {
5274f052e   Jens Axboe   [PATCH] Introduce...
737
738
739
  			if (ret)
  				break;
  		}
29e350944   Linus Torvalds   splice: add SPLIC...
740
741
742
743
744
  		if (flags & SPLICE_F_NONBLOCK) {
  			if (!ret)
  				ret = -EAGAIN;
  			break;
  		}
5274f052e   Jens Axboe   [PATCH] Introduce...
745
746
747
748
749
750
751
  		if (signal_pending(current)) {
  			if (!ret)
  				ret = -ERESTARTSYS;
  			break;
  		}
  
  		if (do_wakeup) {
c0bd1f650   Jens Axboe   [PATCH] splice: o...
752
  			smp_mb();
3a326a2ce   Ingo Molnar   [PATCH] introduce...
753
754
755
  			if (waitqueue_active(&pipe->wait))
  				wake_up_interruptible_sync(&pipe->wait);
  			kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
5274f052e   Jens Axboe   [PATCH] Introduce...
756
757
  			do_wakeup = 0;
  		}
3a326a2ce   Ingo Molnar   [PATCH] introduce...
758
  		pipe_wait(pipe);
5274f052e   Jens Axboe   [PATCH] Introduce...
759
  	}
3a326a2ce   Ingo Molnar   [PATCH] introduce...
760
761
  	if (pipe->inode)
  		mutex_unlock(&pipe->inode->i_mutex);
5274f052e   Jens Axboe   [PATCH] Introduce...
762
763
  
  	if (do_wakeup) {
c0bd1f650   Jens Axboe   [PATCH] splice: o...
764
  		smp_mb();
3a326a2ce   Ingo Molnar   [PATCH] introduce...
765
766
767
  		if (waitqueue_active(&pipe->wait))
  			wake_up_interruptible(&pipe->wait);
  		kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
5274f052e   Jens Axboe   [PATCH] Introduce...
768
  	}
5274f052e   Jens Axboe   [PATCH] Introduce...
769
  	return ret;
5274f052e   Jens Axboe   [PATCH] Introduce...
770
  }
83f9135bd   Jens Axboe   [PATCH] splice: a...
771
772
  /**
   * generic_file_splice_write - splice data from a pipe to a file
3a326a2ce   Ingo Molnar   [PATCH] introduce...
773
   * @pipe:	pipe info
83f9135bd   Jens Axboe   [PATCH] splice: a...
774
775
776
777
778
779
780
781
   * @out:	file to write to
   * @len:	number of bytes to splice
   * @flags:	splice modifier flags
   *
   * Will either move or copy pages (determined by @flags options) from
   * the given pipe inode to the given file.
   *
   */
3a326a2ce   Ingo Molnar   [PATCH] introduce...
782
783
  ssize_t
  generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
cbb7e577e   Jens Axboe   [PATCH] splice: p...
784
  			  loff_t *ppos, size_t len, unsigned int flags)
5274f052e   Jens Axboe   [PATCH] Introduce...
785
  {
4f6f0bd2f   Jens Axboe   [PATCH] splice: i...
786
  	struct address_space *mapping = out->f_mapping;
3a326a2ce   Ingo Molnar   [PATCH] introduce...
787
  	ssize_t ret;
00522fb41   Jens Axboe   [PATCH] splice: r...
788
  	ret = splice_from_pipe(pipe, out, ppos, len, flags, pipe_to_file);
a4514ebd8   Jens Axboe   [PATCH] splice: o...
789
  	if (ret > 0) {
4f6f0bd2f   Jens Axboe   [PATCH] splice: i...
790
  		struct inode *inode = mapping->host;
4f6f0bd2f   Jens Axboe   [PATCH] splice: i...
791

a4514ebd8   Jens Axboe   [PATCH] splice: o...
792
793
794
795
796
797
798
799
800
801
802
803
804
  		*ppos += ret;
  
  		/*
  		 * If file or inode is SYNC and we actually wrote some data,
  		 * sync it.
  		 */
  		if (unlikely((out->f_flags & O_SYNC) || IS_SYNC(inode))) {
  			int err;
  
  			mutex_lock(&inode->i_mutex);
  			err = generic_osync_inode(inode, mapping,
  						  OSYNC_METADATA|OSYNC_DATA);
  			mutex_unlock(&inode->i_mutex);
4f6f0bd2f   Jens Axboe   [PATCH] splice: i...
805

a4514ebd8   Jens Axboe   [PATCH] splice: o...
806
807
808
  			if (err)
  				ret = err;
  		}
4f6f0bd2f   Jens Axboe   [PATCH] splice: i...
809
810
811
  	}
  
  	return ret;
5274f052e   Jens Axboe   [PATCH] Introduce...
812
  }
059a8f373   Jens Axboe   [PATCH] splice: e...
813
  EXPORT_SYMBOL(generic_file_splice_write);
83f9135bd   Jens Axboe   [PATCH] splice: a...
814
815
816
817
818
819
820
821
822
823
824
  /**
   * generic_splice_sendpage - splice data from a pipe to a socket
   * @inode:	pipe inode
   * @out:	socket to write to
   * @len:	number of bytes to splice
   * @flags:	splice modifier flags
   *
   * Will send @len bytes from the pipe to a network socket. No data copying
   * is involved.
   *
   */
3a326a2ce   Ingo Molnar   [PATCH] introduce...
825
  ssize_t generic_splice_sendpage(struct pipe_inode_info *pipe, struct file *out,
cbb7e577e   Jens Axboe   [PATCH] splice: p...
826
  				loff_t *ppos, size_t len, unsigned int flags)
5274f052e   Jens Axboe   [PATCH] Introduce...
827
  {
00522fb41   Jens Axboe   [PATCH] splice: r...
828
  	return splice_from_pipe(pipe, out, ppos, len, flags, pipe_to_sendpage);
5274f052e   Jens Axboe   [PATCH] Introduce...
829
  }
059a8f373   Jens Axboe   [PATCH] splice: e...
830
  EXPORT_SYMBOL(generic_splice_sendpage);
a0f067802   Jeff Garzik   [PATCH] splice ex...
831

83f9135bd   Jens Axboe   [PATCH] splice: a...
832
833
834
  /*
   * Attempt to initiate a splice from pipe to file.
   */
3a326a2ce   Ingo Molnar   [PATCH] introduce...
835
  static long do_splice_from(struct pipe_inode_info *pipe, struct file *out,
cbb7e577e   Jens Axboe   [PATCH] splice: p...
836
  			   loff_t *ppos, size_t len, unsigned int flags)
5274f052e   Jens Axboe   [PATCH] Introduce...
837
  {
5274f052e   Jens Axboe   [PATCH] Introduce...
838
  	int ret;
49570e9b2   Jens Axboe   [PATCH] splice: u...
839
  	if (unlikely(!out->f_op || !out->f_op->splice_write))
5274f052e   Jens Axboe   [PATCH] Introduce...
840
  		return -EINVAL;
49570e9b2   Jens Axboe   [PATCH] splice: u...
841
  	if (unlikely(!(out->f_mode & FMODE_WRITE)))
5274f052e   Jens Axboe   [PATCH] Introduce...
842
  		return -EBADF;
cbb7e577e   Jens Axboe   [PATCH] splice: p...
843
  	ret = rw_verify_area(WRITE, out, ppos, len);
5274f052e   Jens Axboe   [PATCH] Introduce...
844
845
  	if (unlikely(ret < 0))
  		return ret;
cbb7e577e   Jens Axboe   [PATCH] splice: p...
846
  	return out->f_op->splice_write(pipe, out, ppos, len, flags);
5274f052e   Jens Axboe   [PATCH] Introduce...
847
  }
83f9135bd   Jens Axboe   [PATCH] splice: a...
848
849
850
  /*
   * Attempt to initiate a splice from a file to a pipe.
   */
cbb7e577e   Jens Axboe   [PATCH] splice: p...
851
852
853
  static long do_splice_to(struct file *in, loff_t *ppos,
  			 struct pipe_inode_info *pipe, size_t len,
  			 unsigned int flags)
5274f052e   Jens Axboe   [PATCH] Introduce...
854
  {
cbb7e577e   Jens Axboe   [PATCH] splice: p...
855
  	loff_t isize, left;
5274f052e   Jens Axboe   [PATCH] Introduce...
856
  	int ret;
49570e9b2   Jens Axboe   [PATCH] splice: u...
857
  	if (unlikely(!in->f_op || !in->f_op->splice_read))
5274f052e   Jens Axboe   [PATCH] Introduce...
858
  		return -EINVAL;
49570e9b2   Jens Axboe   [PATCH] splice: u...
859
  	if (unlikely(!(in->f_mode & FMODE_READ)))
5274f052e   Jens Axboe   [PATCH] Introduce...
860
  		return -EBADF;
cbb7e577e   Jens Axboe   [PATCH] splice: p...
861
  	ret = rw_verify_area(READ, in, ppos, len);
5274f052e   Jens Axboe   [PATCH] Introduce...
862
863
864
865
  	if (unlikely(ret < 0))
  		return ret;
  
  	isize = i_size_read(in->f_mapping->host);
cbb7e577e   Jens Axboe   [PATCH] splice: p...
866
  	if (unlikely(*ppos >= isize))
5274f052e   Jens Axboe   [PATCH] Introduce...
867
868
  		return 0;
  	
cbb7e577e   Jens Axboe   [PATCH] splice: p...
869
  	left = isize - *ppos;
49570e9b2   Jens Axboe   [PATCH] splice: u...
870
  	if (unlikely(left < len))
5274f052e   Jens Axboe   [PATCH] Introduce...
871
  		len = left;
cbb7e577e   Jens Axboe   [PATCH] splice: p...
872
  	return in->f_op->splice_read(in, ppos, pipe, len, flags);
5274f052e   Jens Axboe   [PATCH] Introduce...
873
  }
cbb7e577e   Jens Axboe   [PATCH] splice: p...
874
875
  long do_splice_direct(struct file *in, loff_t *ppos, struct file *out,
  		      size_t len, unsigned int flags)
b92ce5589   Jens Axboe   [PATCH] splice: a...
876
877
878
  {
  	struct pipe_inode_info *pipe;
  	long ret, bytes;
cbb7e577e   Jens Axboe   [PATCH] splice: p...
879
  	loff_t out_off;
b92ce5589   Jens Axboe   [PATCH] splice: a...
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
  	umode_t i_mode;
  	int i;
  
  	/*
  	 * We require the input being a regular file, as we don't want to
  	 * randomly drop data for eg socket -> socket splicing. Use the
  	 * piped splicing for that!
  	 */
  	i_mode = in->f_dentry->d_inode->i_mode;
  	if (unlikely(!S_ISREG(i_mode) && !S_ISBLK(i_mode)))
  		return -EINVAL;
  
  	/*
  	 * neither in nor out is a pipe, setup an internal pipe attached to
  	 * 'out' and transfer the wanted data from 'in' to 'out' through that
  	 */
  	pipe = current->splice_pipe;
49570e9b2   Jens Axboe   [PATCH] splice: u...
897
  	if (unlikely(!pipe)) {
b92ce5589   Jens Axboe   [PATCH] splice: a...
898
899
900
901
902
903
  		pipe = alloc_pipe_info(NULL);
  		if (!pipe)
  			return -ENOMEM;
  
  		/*
  		 * We don't have an immediate reader, but we'll read the stuff
00522fb41   Jens Axboe   [PATCH] splice: r...
904
  		 * out of the pipe right after the splice_to_pipe(). So set
b92ce5589   Jens Axboe   [PATCH] splice: a...
905
906
907
908
909
910
911
912
  		 * PIPE_READERS appropriately.
  		 */
  		pipe->readers = 1;
  
  		current->splice_pipe = pipe;
  	}
  
  	/*
73d62d83e   Ingo Molnar   [PATCH] splice: c...
913
  	 * Do the splice.
b92ce5589   Jens Axboe   [PATCH] splice: a...
914
915
916
  	 */
  	ret = 0;
  	bytes = 0;
cbb7e577e   Jens Axboe   [PATCH] splice: p...
917
  	out_off = 0;
b92ce5589   Jens Axboe   [PATCH] splice: a...
918
919
920
921
922
923
924
925
  
  	while (len) {
  		size_t read_len, max_read_len;
  
  		/*
  		 * Do at most PIPE_BUFFERS pages worth of transfer:
  		 */
  		max_read_len = min(len, (size_t)(PIPE_BUFFERS*PAGE_SIZE));
cbb7e577e   Jens Axboe   [PATCH] splice: p...
926
  		ret = do_splice_to(in, ppos, pipe, max_read_len, flags);
b92ce5589   Jens Axboe   [PATCH] splice: a...
927
928
929
930
931
932
933
934
935
936
  		if (unlikely(ret < 0))
  			goto out_release;
  
  		read_len = ret;
  
  		/*
  		 * NOTE: nonblocking mode only applies to the input. We
  		 * must not do the output in nonblocking mode as then we
  		 * could get stuck data in the internal pipe:
  		 */
cbb7e577e   Jens Axboe   [PATCH] splice: p...
937
  		ret = do_splice_from(pipe, out, &out_off, read_len,
b92ce5589   Jens Axboe   [PATCH] splice: a...
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
  				     flags & ~SPLICE_F_NONBLOCK);
  		if (unlikely(ret < 0))
  			goto out_release;
  
  		bytes += ret;
  		len -= ret;
  
  		/*
  		 * In nonblocking mode, if we got back a short read then
  		 * that was due to either an IO error or due to the
  		 * pagecache entry not being there. In the IO error case
  		 * the _next_ splice attempt will produce a clean IO error
  		 * return value (not a short read), so in both cases it's
  		 * correct to break out of the loop here:
  		 */
  		if ((flags & SPLICE_F_NONBLOCK) && (read_len < max_read_len))
  			break;
  	}
  
  	pipe->nrbufs = pipe->curbuf = 0;
  
  	return bytes;
  
  out_release:
  	/*
  	 * If we did an incomplete transfer we must release
  	 * the pipe buffers in question:
  	 */
  	for (i = 0; i < PIPE_BUFFERS; i++) {
  		struct pipe_buffer *buf = pipe->bufs + i;
  
  		if (buf->ops) {
  			buf->ops->release(pipe, buf);
  			buf->ops = NULL;
  		}
  	}
  	pipe->nrbufs = pipe->curbuf = 0;
  
  	/*
  	 * If we transferred some data, return the number of bytes:
  	 */
  	if (bytes > 0)
  		return bytes;
  
  	return ret;
  }
  
  EXPORT_SYMBOL(do_splice_direct);
83f9135bd   Jens Axboe   [PATCH] splice: a...
986
987
988
  /*
   * Determine where to splice to/from.
   */
529565dcb   Ingo Molnar   [PATCH] splice: a...
989
990
991
  static long do_splice(struct file *in, loff_t __user *off_in,
  		      struct file *out, loff_t __user *off_out,
  		      size_t len, unsigned int flags)
5274f052e   Jens Axboe   [PATCH] Introduce...
992
  {
3a326a2ce   Ingo Molnar   [PATCH] introduce...
993
  	struct pipe_inode_info *pipe;
cbb7e577e   Jens Axboe   [PATCH] splice: p...
994
  	loff_t offset, *off;
a4514ebd8   Jens Axboe   [PATCH] splice: o...
995
  	long ret;
5274f052e   Jens Axboe   [PATCH] Introduce...
996

3a326a2ce   Ingo Molnar   [PATCH] introduce...
997
  	pipe = in->f_dentry->d_inode->i_pipe;
529565dcb   Ingo Molnar   [PATCH] splice: a...
998
999
1000
  	if (pipe) {
  		if (off_in)
  			return -ESPIPE;
b92ce5589   Jens Axboe   [PATCH] splice: a...
1001
1002
1003
  		if (off_out) {
  			if (out->f_op->llseek == no_llseek)
  				return -EINVAL;
cbb7e577e   Jens Axboe   [PATCH] splice: p...
1004
  			if (copy_from_user(&offset, off_out, sizeof(loff_t)))
b92ce5589   Jens Axboe   [PATCH] splice: a...
1005
  				return -EFAULT;
cbb7e577e   Jens Axboe   [PATCH] splice: p...
1006
1007
1008
  			off = &offset;
  		} else
  			off = &out->f_pos;
529565dcb   Ingo Molnar   [PATCH] splice: a...
1009

a4514ebd8   Jens Axboe   [PATCH] splice: o...
1010
1011
1012
1013
1014
1015
  		ret = do_splice_from(pipe, out, off, len, flags);
  
  		if (off_out && copy_to_user(off_out, off, sizeof(loff_t)))
  			ret = -EFAULT;
  
  		return ret;
529565dcb   Ingo Molnar   [PATCH] splice: a...
1016
  	}
5274f052e   Jens Axboe   [PATCH] Introduce...
1017

3a326a2ce   Ingo Molnar   [PATCH] introduce...
1018
  	pipe = out->f_dentry->d_inode->i_pipe;
529565dcb   Ingo Molnar   [PATCH] splice: a...
1019
1020
1021
  	if (pipe) {
  		if (off_out)
  			return -ESPIPE;
b92ce5589   Jens Axboe   [PATCH] splice: a...
1022
1023
1024
  		if (off_in) {
  			if (in->f_op->llseek == no_llseek)
  				return -EINVAL;
cbb7e577e   Jens Axboe   [PATCH] splice: p...
1025
  			if (copy_from_user(&offset, off_in, sizeof(loff_t)))
b92ce5589   Jens Axboe   [PATCH] splice: a...
1026
  				return -EFAULT;
cbb7e577e   Jens Axboe   [PATCH] splice: p...
1027
1028
1029
  			off = &offset;
  		} else
  			off = &in->f_pos;
529565dcb   Ingo Molnar   [PATCH] splice: a...
1030

a4514ebd8   Jens Axboe   [PATCH] splice: o...
1031
1032
1033
1034
1035
1036
  		ret = do_splice_to(in, off, pipe, len, flags);
  
  		if (off_in && copy_to_user(off_in, off, sizeof(loff_t)))
  			ret = -EFAULT;
  
  		return ret;
529565dcb   Ingo Molnar   [PATCH] splice: a...
1037
  	}
5274f052e   Jens Axboe   [PATCH] Introduce...
1038
1039
1040
  
  	return -EINVAL;
  }
912d35f86   Jens Axboe   [PATCH] Add suppo...
1041
1042
1043
1044
1045
1046
1047
1048
1049
  /*
   * Map an iov into an array of pages and offset/length tupples. With the
   * partial_page structure, we can map several non-contiguous ranges into
   * our ones pages[] map instead of splitting that operation into pieces.
   * Could easily be exported as a generic helper for other users, in which
   * case one would probably want to add a 'max_nr_pages' parameter as well.
   */
  static int get_iovec_page_array(const struct iovec __user *iov,
  				unsigned int nr_vecs, struct page **pages,
7afa6fd03   Jens Axboe   [PATCH] vmsplice:...
1050
  				struct partial_page *partial, int aligned)
912d35f86   Jens Axboe   [PATCH] Add suppo...
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
  {
  	int buffers = 0, error = 0;
  
  	/*
  	 * It's ok to take the mmap_sem for reading, even
  	 * across a "get_user()".
  	 */
  	down_read(&current->mm->mmap_sem);
  
  	while (nr_vecs) {
  		unsigned long off, npages;
  		void __user *base;
  		size_t len;
  		int i;
  
  		/*
  		 * Get user address base and length for this iovec.
  		 */
  		error = get_user(base, &iov->iov_base);
  		if (unlikely(error))
  			break;
  		error = get_user(len, &iov->iov_len);
  		if (unlikely(error))
  			break;
  
  		/*
  		 * Sanity check this iovec. 0 read succeeds.
  		 */
  		if (unlikely(!len))
  			break;
  		error = -EFAULT;
  		if (unlikely(!base))
  			break;
  
  		/*
  		 * Get this base offset and number of pages, then map
  		 * in the user pages.
  		 */
  		off = (unsigned long) base & ~PAGE_MASK;
7afa6fd03   Jens Axboe   [PATCH] vmsplice:...
1090
1091
1092
1093
1094
1095
1096
1097
  
  		/*
  		 * If asked for alignment, the offset must be zero and the
  		 * length a multiple of the PAGE_SIZE.
  		 */
  		error = -EINVAL;
  		if (aligned && (off || len & ~PAGE_MASK))
  			break;
912d35f86   Jens Axboe   [PATCH] Add suppo...
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
  		npages = (off + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
  		if (npages > PIPE_BUFFERS - buffers)
  			npages = PIPE_BUFFERS - buffers;
  
  		error = get_user_pages(current, current->mm,
  				       (unsigned long) base, npages, 0, 0,
  				       &pages[buffers], NULL);
  
  		if (unlikely(error <= 0))
  			break;
  
  		/*
  		 * Fill this contiguous range into the partial page map.
  		 */
  		for (i = 0; i < error; i++) {
7591489a8   Jens Axboe   [PATCH] vmsplice:...
1113
  			const int plen = min_t(size_t, len, PAGE_SIZE - off);
912d35f86   Jens Axboe   [PATCH] Add suppo...
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
  
  			partial[buffers].offset = off;
  			partial[buffers].len = plen;
  
  			off = 0;
  			len -= plen;
  			buffers++;
  		}
  
  		/*
  		 * We didn't complete this iov, stop here since it probably
  		 * means we have to move some of this into a pipe to
  		 * be able to continue.
  		 */
  		if (len)
  			break;
  
  		/*
  		 * Don't continue if we mapped fewer pages than we asked for,
  		 * or if we mapped the max number of pages that we have
  		 * room for.
  		 */
  		if (error < npages || buffers == PIPE_BUFFERS)
  			break;
  
  		nr_vecs--;
  		iov++;
  	}
  
  	up_read(&current->mm->mmap_sem);
  
  	if (buffers)
  		return buffers;
  
  	return error;
  }
  
  /*
   * vmsplice splices a user address range into a pipe. It can be thought of
   * as splice-from-memory, where the regular splice is splice-from-file (or
   * to file). In both cases the output is a pipe, naturally.
   *
   * Note that vmsplice only supports splicing _from_ user memory to a pipe,
   * not the other way around. Splicing from user memory is a simple operation
   * that can be supported without any funky alignment restrictions or nasty
   * vm tricks. We simply map in the user memory and fill them into a pipe.
   * The reverse isn't quite as easy, though. There are two possible solutions
   * for that:
   *
   *	- memcpy() the data internally, at which point we might as well just
   *	  do a regular read() on the buffer anyway.
   *	- Lots of nasty vm tricks, that are neither fast nor flexible (it
   *	  has restriction limitations on both ends of the pipe).
   *
   * Alas, it isn't here.
   *
   */
  static long do_vmsplice(struct file *file, const struct iovec __user *iov,
  			unsigned long nr_segs, unsigned int flags)
  {
  	struct pipe_inode_info *pipe = file->f_dentry->d_inode->i_pipe;
  	struct page *pages[PIPE_BUFFERS];
  	struct partial_page partial[PIPE_BUFFERS];
  	struct splice_pipe_desc spd = {
  		.pages = pages,
  		.partial = partial,
  		.flags = flags,
  		.ops = &user_page_pipe_buf_ops,
  	};
  
  	if (unlikely(!pipe))
  		return -EBADF;
  	if (unlikely(nr_segs > UIO_MAXIOV))
  		return -EINVAL;
  	else if (unlikely(!nr_segs))
  		return 0;
7afa6fd03   Jens Axboe   [PATCH] vmsplice:...
1190
1191
  	spd.nr_pages = get_iovec_page_array(iov, nr_segs, pages, partial,
  					    flags & SPLICE_F_GIFT);
912d35f86   Jens Axboe   [PATCH] Add suppo...
1192
1193
  	if (spd.nr_pages <= 0)
  		return spd.nr_pages;
00522fb41   Jens Axboe   [PATCH] splice: r...
1194
  	return splice_to_pipe(pipe, &spd);
912d35f86   Jens Axboe   [PATCH] Add suppo...
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
  }
  
  asmlinkage long sys_vmsplice(int fd, const struct iovec __user *iov,
  			     unsigned long nr_segs, unsigned int flags)
  {
  	struct file *file;
  	long error;
  	int fput;
  
  	error = -EBADF;
  	file = fget_light(fd, &fput);
  	if (file) {
  		if (file->f_mode & FMODE_WRITE)
  			error = do_vmsplice(file, iov, nr_segs, flags);
  
  		fput_light(file, fput);
  	}
  
  	return error;
  }
529565dcb   Ingo Molnar   [PATCH] splice: a...
1215
1216
1217
  asmlinkage long sys_splice(int fd_in, loff_t __user *off_in,
  			   int fd_out, loff_t __user *off_out,
  			   size_t len, unsigned int flags)
5274f052e   Jens Axboe   [PATCH] Introduce...
1218
1219
1220
1221
1222
1223
1224
1225
1226
  {
  	long error;
  	struct file *in, *out;
  	int fput_in, fput_out;
  
  	if (unlikely(!len))
  		return 0;
  
  	error = -EBADF;
529565dcb   Ingo Molnar   [PATCH] splice: a...
1227
  	in = fget_light(fd_in, &fput_in);
5274f052e   Jens Axboe   [PATCH] Introduce...
1228
1229
  	if (in) {
  		if (in->f_mode & FMODE_READ) {
529565dcb   Ingo Molnar   [PATCH] splice: a...
1230
  			out = fget_light(fd_out, &fput_out);
5274f052e   Jens Axboe   [PATCH] Introduce...
1231
1232
  			if (out) {
  				if (out->f_mode & FMODE_WRITE)
529565dcb   Ingo Molnar   [PATCH] splice: a...
1233
1234
1235
  					error = do_splice(in, off_in,
  							  out, off_out,
  							  len, flags);
5274f052e   Jens Axboe   [PATCH] Introduce...
1236
1237
1238
1239
1240
1241
1242
1243
1244
  				fput_light(out, fput_out);
  			}
  		}
  
  		fput_light(in, fput_in);
  	}
  
  	return error;
  }
70524490e   Jens Axboe   [PATCH] splice: a...
1245
1246
  
  /*
aadd06e5c   Jens Axboe   [PATCH] splice: f...
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
   * Make sure there's data to read. Wait for input if we can, otherwise
   * return an appropriate error.
   */
  static int link_ipipe_prep(struct pipe_inode_info *pipe, unsigned int flags)
  {
  	int ret;
  
  	/*
  	 * Check ->nrbufs without the inode lock first. This function
  	 * is speculative anyways, so missing one is ok.
  	 */
  	if (pipe->nrbufs)
  		return 0;
  
  	ret = 0;
  	mutex_lock(&pipe->inode->i_mutex);
  
  	while (!pipe->nrbufs) {
  		if (signal_pending(current)) {
  			ret = -ERESTARTSYS;
  			break;
  		}
  		if (!pipe->writers)
  			break;
  		if (!pipe->waiting_writers) {
  			if (flags & SPLICE_F_NONBLOCK) {
  				ret = -EAGAIN;
  				break;
  			}
  		}
  		pipe_wait(pipe);
  	}
  
  	mutex_unlock(&pipe->inode->i_mutex);
  	return ret;
  }
  
  /*
   * Make sure there's writeable room. Wait for room if we can, otherwise
   * return an appropriate error.
   */
  static int link_opipe_prep(struct pipe_inode_info *pipe, unsigned int flags)
  {
  	int ret;
  
  	/*
  	 * Check ->nrbufs without the inode lock first. This function
  	 * is speculative anyways, so missing one is ok.
  	 */
  	if (pipe->nrbufs < PIPE_BUFFERS)
  		return 0;
  
  	ret = 0;
  	mutex_lock(&pipe->inode->i_mutex);
  
  	while (pipe->nrbufs >= PIPE_BUFFERS) {
  		if (!pipe->readers) {
  			send_sig(SIGPIPE, current, 0);
  			ret = -EPIPE;
  			break;
  		}
  		if (flags & SPLICE_F_NONBLOCK) {
  			ret = -EAGAIN;
  			break;
  		}
  		if (signal_pending(current)) {
  			ret = -ERESTARTSYS;
  			break;
  		}
  		pipe->waiting_writers++;
  		pipe_wait(pipe);
  		pipe->waiting_writers--;
  	}
  
  	mutex_unlock(&pipe->inode->i_mutex);
  	return ret;
  }
  
  /*
70524490e   Jens Axboe   [PATCH] splice: a...
1326
1327
1328
1329
1330
1331
1332
   * Link contents of ipipe to opipe.
   */
  static int link_pipe(struct pipe_inode_info *ipipe,
  		     struct pipe_inode_info *opipe,
  		     size_t len, unsigned int flags)
  {
  	struct pipe_buffer *ibuf, *obuf;
aadd06e5c   Jens Axboe   [PATCH] splice: f...
1333
  	int ret = 0, i = 0, nbuf;
70524490e   Jens Axboe   [PATCH] splice: a...
1334
1335
1336
1337
1338
1339
1340
  
  	/*
  	 * Potential ABBA deadlock, work around it by ordering lock
  	 * grabbing by inode address. Otherwise two different processes
  	 * could deadlock (one doing tee from A -> B, the other from B -> A).
  	 */
  	if (ipipe->inode < opipe->inode) {
aadd06e5c   Jens Axboe   [PATCH] splice: f...
1341
1342
  		mutex_lock_nested(&ipipe->inode->i_mutex, I_MUTEX_PARENT);
  		mutex_lock_nested(&opipe->inode->i_mutex, I_MUTEX_CHILD);
70524490e   Jens Axboe   [PATCH] splice: a...
1343
  	} else {
aadd06e5c   Jens Axboe   [PATCH] splice: f...
1344
1345
  		mutex_lock_nested(&opipe->inode->i_mutex, I_MUTEX_PARENT);
  		mutex_lock_nested(&ipipe->inode->i_mutex, I_MUTEX_CHILD);
70524490e   Jens Axboe   [PATCH] splice: a...
1346
  	}
aadd06e5c   Jens Axboe   [PATCH] splice: f...
1347
  	do {
70524490e   Jens Axboe   [PATCH] splice: a...
1348
1349
1350
1351
1352
1353
  		if (!opipe->readers) {
  			send_sig(SIGPIPE, current, 0);
  			if (!ret)
  				ret = -EPIPE;
  			break;
  		}
70524490e   Jens Axboe   [PATCH] splice: a...
1354

aadd06e5c   Jens Axboe   [PATCH] splice: f...
1355
1356
1357
1358
1359
1360
  		/*
  		 * If we have iterated all input buffers or ran out of
  		 * output room, break.
  		 */
  		if (i >= ipipe->nrbufs || opipe->nrbufs >= PIPE_BUFFERS)
  			break;
70524490e   Jens Axboe   [PATCH] splice: a...
1361

aadd06e5c   Jens Axboe   [PATCH] splice: f...
1362
1363
  		ibuf = ipipe->bufs + ((ipipe->curbuf + i) & (PIPE_BUFFERS - 1));
  		nbuf = (opipe->curbuf + opipe->nrbufs) & (PIPE_BUFFERS - 1);
70524490e   Jens Axboe   [PATCH] splice: a...
1364
1365
  
  		/*
aadd06e5c   Jens Axboe   [PATCH] splice: f...
1366
1367
  		 * Get a reference to this pipe buffer,
  		 * so we can copy the contents over.
70524490e   Jens Axboe   [PATCH] splice: a...
1368
  		 */
aadd06e5c   Jens Axboe   [PATCH] splice: f...
1369
1370
1371
1372
  		ibuf->ops->get(ipipe, ibuf);
  
  		obuf = opipe->bufs + nbuf;
  		*obuf = *ibuf;
2a27250e6   Jens Axboe   [PATCH] tee: link...
1373
  		/*
aadd06e5c   Jens Axboe   [PATCH] splice: f...
1374
1375
  		 * Don't inherit the gift flag, we need to
  		 * prevent multiple steals of this page.
2a27250e6   Jens Axboe   [PATCH] tee: link...
1376
  		 */
aadd06e5c   Jens Axboe   [PATCH] splice: f...
1377
  		obuf->flags &= ~PIPE_BUF_FLAG_GIFT;
70524490e   Jens Axboe   [PATCH] splice: a...
1378

aadd06e5c   Jens Axboe   [PATCH] splice: f...
1379
1380
  		if (obuf->len > len)
  			obuf->len = len;
70524490e   Jens Axboe   [PATCH] splice: a...
1381

aadd06e5c   Jens Axboe   [PATCH] splice: f...
1382
1383
1384
1385
1386
  		opipe->nrbufs++;
  		ret += obuf->len;
  		len -= obuf->len;
  		i++;
  	} while (len);
70524490e   Jens Axboe   [PATCH] splice: a...
1387
1388
1389
  
  	mutex_unlock(&ipipe->inode->i_mutex);
  	mutex_unlock(&opipe->inode->i_mutex);
aadd06e5c   Jens Axboe   [PATCH] splice: f...
1390
1391
1392
1393
  	/*
  	 * If we put data in the output pipe, wakeup any potential readers.
  	 */
  	if (ret > 0) {
70524490e   Jens Axboe   [PATCH] splice: a...
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
  		smp_mb();
  		if (waitqueue_active(&opipe->wait))
  			wake_up_interruptible(&opipe->wait);
  		kill_fasync(&opipe->fasync_readers, SIGIO, POLL_IN);
  	}
  
  	return ret;
  }
  
  /*
   * This is a tee(1) implementation that works on pipes. It doesn't copy
   * any data, it simply references the 'in' pages on the 'out' pipe.
   * The 'flags' used are the SPLICE_F_* variants, currently the only
   * applicable one is SPLICE_F_NONBLOCK.
   */
  static long do_tee(struct file *in, struct file *out, size_t len,
  		   unsigned int flags)
  {
  	struct pipe_inode_info *ipipe = in->f_dentry->d_inode->i_pipe;
  	struct pipe_inode_info *opipe = out->f_dentry->d_inode->i_pipe;
aadd06e5c   Jens Axboe   [PATCH] splice: f...
1414
  	int ret = -EINVAL;
70524490e   Jens Axboe   [PATCH] splice: a...
1415
1416
  
  	/*
aadd06e5c   Jens Axboe   [PATCH] splice: f...
1417
1418
  	 * Duplicate the contents of ipipe to opipe without actually
  	 * copying the data.
70524490e   Jens Axboe   [PATCH] splice: a...
1419
  	 */
aadd06e5c   Jens Axboe   [PATCH] splice: f...
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
  	if (ipipe && opipe && ipipe != opipe) {
  		/*
  		 * Keep going, unless we encounter an error. The ipipe/opipe
  		 * ordering doesn't really matter.
  		 */
  		ret = link_ipipe_prep(ipipe, flags);
  		if (!ret) {
  			ret = link_opipe_prep(opipe, flags);
  			if (!ret) {
  				ret = link_pipe(ipipe, opipe, len, flags);
  				if (!ret && (flags & SPLICE_F_NONBLOCK))
  					ret = -EAGAIN;
  			}
  		}
  	}
70524490e   Jens Axboe   [PATCH] splice: a...
1435

aadd06e5c   Jens Axboe   [PATCH] splice: f...
1436
  	return ret;
70524490e   Jens Axboe   [PATCH] splice: a...
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
  }
  
  asmlinkage long sys_tee(int fdin, int fdout, size_t len, unsigned int flags)
  {
  	struct file *in;
  	int error, fput_in;
  
  	if (unlikely(!len))
  		return 0;
  
  	error = -EBADF;
  	in = fget_light(fdin, &fput_in);
  	if (in) {
  		if (in->f_mode & FMODE_READ) {
  			int fput_out;
  			struct file *out = fget_light(fdout, &fput_out);
  
  			if (out) {
  				if (out->f_mode & FMODE_WRITE)
  					error = do_tee(in, out, len, flags);
  				fput_light(out, fput_out);
  			}
  		}
   		fput_light(in, fput_in);
   	}
  
  	return error;
  }