Blame view

fs/pipe.c 34.8 KB
b24413180   Greg Kroah-Hartman   License cleanup: ...
1
  // SPDX-License-Identifier: GPL-2.0
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2
3
4
5
6
7
8
9
10
11
12
13
14
  /*
   *  linux/fs/pipe.c
   *
   *  Copyright (C) 1991, 1992, 1999  Linus Torvalds
   */
  
  #include <linux/mm.h>
  #include <linux/file.h>
  #include <linux/poll.h>
  #include <linux/slab.h>
  #include <linux/module.h>
  #include <linux/init.h>
  #include <linux/fs.h>
35f3d14db   Jens Axboe   pipe: add support...
15
  #include <linux/log2.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
16
  #include <linux/mount.h>
4fa7ec5db   David Howells   vfs: Convert pipe...
17
  #include <linux/pseudo_fs.h>
b502bd115   Muthu Kumar   magic.h: move som...
18
  #include <linux/magic.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
19
20
21
  #include <linux/pipe_fs_i.h>
  #include <linux/uio.h>
  #include <linux/highmem.h>
5274f052e   Jens Axboe   [PATCH] Introduce...
22
  #include <linux/pagemap.h>
db3495099   Al Viro   [PATCH] AUDIT_FD_...
23
  #include <linux/audit.h>
ba719baea   Ulrich Drepper   sys_pipe(): fix f...
24
  #include <linux/syscalls.h>
b492e95be   Jens Axboe   pipe: set lower a...
25
  #include <linux/fcntl.h>
d86133bd3   Vladimir Davydov   pipe: account to ...
26
  #include <linux/memcontrol.h>
c73be61ce   David Howells   pipe: Add general...
27
  #include <linux/watch_queue.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
28

7c0f6ba68   Linus Torvalds   Replace <asm/uacc...
29
  #include <linux/uaccess.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
30
  #include <asm/ioctls.h>
599a0ac14   Al Viro   pipe: fold file_o...
31
  #include "internal.h"
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
32
  /*
46c4c9d1b   Alex Xu (Hello71)   pipe: increase mi...
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
   * New pipe buffers will be restricted to this size while the user is exceeding
   * their pipe buffer quota. The general pipe use case needs at least two
   * buffers: one for data yet to be read, and one for new data. If this is less
   * than two, then a write to a non-empty pipe may block even if the pipe is not
   * full. This can occur with GNU make jobserver or similar uses of pipes as
   * semaphores: multiple processes may be waiting to write tokens back to the
   * pipe before reading tokens: https://lore.kernel.org/lkml/1628086770.5rn8p04n6j.none@localhost/.
   *
   * Users can reduce their pipe buffers with F_SETPIPE_SZ below this at their
   * own risk, namely: pipe writes to non-full pipes may block until the pipe is
   * emptied.
   */
  #define PIPE_MIN_DEF_BUFFERS 2
  
  /*
b492e95be   Jens Axboe   pipe: set lower a...
48
   * The max size that a non-root user is allowed to grow the pipe. Can
ff9da691c   Jens Axboe   pipe: change /pro...
49
   * be set by root in /proc/sys/fs/pipe-max-size
b492e95be   Jens Axboe   pipe: set lower a...
50
   */
ff9da691c   Jens Axboe   pipe: change /pro...
51
  unsigned int pipe_max_size = 1048576;
759c01142   Willy Tarreau   pipe: limit the p...
52
53
54
55
56
  /* Maximum allocatable pages per user. Hard limit is unset by default, soft
   * matches default values.
   */
  unsigned long pipe_user_pages_hard;
  unsigned long pipe_user_pages_soft = PIPE_DEF_BUFFERS * INR_OPEN_CUR;
b492e95be   Jens Axboe   pipe: set lower a...
57
  /*
8cefc107c   David Howells   pipe: Use head an...
58
59
60
61
62
63
   * We use head and tail indices that aren't masked off, except at the point of
   * dereference, but rather they're allowed to wrap naturally.  This means there
   * isn't a dead spot in the buffer, but the ring has to be a power of two and
   * <= 2^31.
   * -- David Howells 2019-09-23.
   *
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
64
65
66
67
68
69
70
71
72
   * Reads with count = 0 should always return 0.
   * -- Julian Bradfield 1999-06-07.
   *
   * FIFOs and Pipes now generate SIGIO for both readers and writers.
   * -- Jeremy Elson <jelson@circlemud.org> 2001-08-16
   *
   * pipe_read & write cleanup
   * -- Manfred Spraul <manfred@colorfullife.com> 2002-05-09
   */
61e0d47c3   Miklos Szeredi   splice: add helpe...
73
74
  static void pipe_lock_nested(struct pipe_inode_info *pipe, int subclass)
  {
6447a3cf1   Al Viro   get rid of pipe->...
75
  	if (pipe->files)
72b0d9aac   Al Viro   pipe: don't use -...
76
  		mutex_lock_nested(&pipe->mutex, subclass);
61e0d47c3   Miklos Szeredi   splice: add helpe...
77
78
79
80
81
82
83
84
85
86
87
88
89
  }
  
  void pipe_lock(struct pipe_inode_info *pipe)
  {
  	/*
  	 * pipe_lock() nests non-pipe inode locks (for writing to a file)
  	 */
  	pipe_lock_nested(pipe, I_MUTEX_PARENT);
  }
  EXPORT_SYMBOL(pipe_lock);
  
  void pipe_unlock(struct pipe_inode_info *pipe)
  {
6447a3cf1   Al Viro   get rid of pipe->...
90
  	if (pipe->files)
72b0d9aac   Al Viro   pipe: don't use -...
91
  		mutex_unlock(&pipe->mutex);
61e0d47c3   Miklos Szeredi   splice: add helpe...
92
93
  }
  EXPORT_SYMBOL(pipe_unlock);
ebec73f47   Al Viro   introduce variant...
94
95
96
97
98
99
100
101
102
  static inline void __pipe_lock(struct pipe_inode_info *pipe)
  {
  	mutex_lock_nested(&pipe->mutex, I_MUTEX_PARENT);
  }
  
  static inline void __pipe_unlock(struct pipe_inode_info *pipe)
  {
  	mutex_unlock(&pipe->mutex);
  }
61e0d47c3   Miklos Szeredi   splice: add helpe...
103
104
105
106
107
108
109
110
111
  void pipe_double_lock(struct pipe_inode_info *pipe1,
  		      struct pipe_inode_info *pipe2)
  {
  	BUG_ON(pipe1 == pipe2);
  
  	if (pipe1 < pipe2) {
  		pipe_lock_nested(pipe1, I_MUTEX_PARENT);
  		pipe_lock_nested(pipe2, I_MUTEX_CHILD);
  	} else {
023d43c7b   Peter Zijlstra   lockdep: Fix lock...
112
113
  		pipe_lock_nested(pipe2, I_MUTEX_PARENT);
  		pipe_lock_nested(pipe1, I_MUTEX_CHILD);
61e0d47c3   Miklos Szeredi   splice: add helpe...
114
115
  	}
  }
341b446bc   Ingo Molnar   [PATCH] another r...
116
117
  static void anon_pipe_buf_release(struct pipe_inode_info *pipe,
  				  struct pipe_buffer *buf)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
118
119
  {
  	struct page *page = buf->page;
5274f052e   Jens Axboe   [PATCH] Introduce...
120
121
122
  	/*
  	 * If nobody else uses this page, and we don't already have a
  	 * temporary page, let's keep track of it as a one-deep
341b446bc   Ingo Molnar   [PATCH] another r...
123
  	 * allocation cache. (Otherwise just release our reference to it)
5274f052e   Jens Axboe   [PATCH] Introduce...
124
  	 */
341b446bc   Ingo Molnar   [PATCH] another r...
125
  	if (page_count(page) == 1 && !pipe->tmp_page)
923f4f239   Ingo Molnar   [PATCH] pipe.c/fi...
126
  		pipe->tmp_page = page;
341b446bc   Ingo Molnar   [PATCH] another r...
127
  	else
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
128
  		put_page(page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
129
  }
c928f642c   Christoph Hellwig   fs: rename pipe_b...
130
131
  static bool anon_pipe_buf_try_steal(struct pipe_inode_info *pipe,
  		struct pipe_buffer *buf)
d86133bd3   Vladimir Davydov   pipe: account to ...
132
133
  {
  	struct page *page = buf->page;
c928f642c   Christoph Hellwig   fs: rename pipe_b...
134
135
136
137
138
  	if (page_count(page) != 1)
  		return false;
  	memcg_kmem_uncharge_page(page, 0);
  	__SetPageLocked(page);
  	return true;
d86133bd3   Vladimir Davydov   pipe: account to ...
139
  }
0845718da   Jens Axboe   pipe: add documen...
140
  /**
c928f642c   Christoph Hellwig   fs: rename pipe_b...
141
   * generic_pipe_buf_try_steal - attempt to take ownership of a &pipe_buffer
0845718da   Jens Axboe   pipe: add documen...
142
143
144
145
   * @pipe:	the pipe that the buffer belongs to
   * @buf:	the buffer to attempt to steal
   *
   * Description:
b51d63c6d   Randy Dunlap   kernel-doc: fix f...
146
   *	This function attempts to steal the &struct page attached to
0845718da   Jens Axboe   pipe: add documen...
147
148
   *	@buf. If successful, this function returns 0 and returns with
   *	the page locked. The caller may then reuse the page for whatever
b51d63c6d   Randy Dunlap   kernel-doc: fix f...
149
   *	he wishes; the typical use is insertion into a different file
0845718da   Jens Axboe   pipe: add documen...
150
151
   *	page cache.
   */
c928f642c   Christoph Hellwig   fs: rename pipe_b...
152
153
  bool generic_pipe_buf_try_steal(struct pipe_inode_info *pipe,
  		struct pipe_buffer *buf)
5abc97aa2   Jens Axboe   [PATCH] splice: a...
154
  {
46e678c96   Jens Axboe   [PATCH] splice: f...
155
  	struct page *page = buf->page;
0845718da   Jens Axboe   pipe: add documen...
156
157
158
159
160
  	/*
  	 * A reference of one is golden, that means that the owner of this
  	 * page is the only one holding a reference to it. lock the page
  	 * and return OK.
  	 */
46e678c96   Jens Axboe   [PATCH] splice: f...
161
  	if (page_count(page) == 1) {
46e678c96   Jens Axboe   [PATCH] splice: f...
162
  		lock_page(page);
c928f642c   Christoph Hellwig   fs: rename pipe_b...
163
  		return true;
46e678c96   Jens Axboe   [PATCH] splice: f...
164
  	}
c928f642c   Christoph Hellwig   fs: rename pipe_b...
165
  	return false;
5abc97aa2   Jens Axboe   [PATCH] splice: a...
166
  }
c928f642c   Christoph Hellwig   fs: rename pipe_b...
167
  EXPORT_SYMBOL(generic_pipe_buf_try_steal);
5abc97aa2   Jens Axboe   [PATCH] splice: a...
168

0845718da   Jens Axboe   pipe: add documen...
169
  /**
b51d63c6d   Randy Dunlap   kernel-doc: fix f...
170
   * generic_pipe_buf_get - get a reference to a &struct pipe_buffer
0845718da   Jens Axboe   pipe: add documen...
171
172
173
174
175
   * @pipe:	the pipe that the buffer belongs to
   * @buf:	the buffer to get a reference to
   *
   * Description:
   *	This function grabs an extra reference to @buf. It's used in
3d742d4b6   Randy Dunlap   fs: delete repeat...
176
   *	the tee() system call, when we duplicate the buffers in one
0845718da   Jens Axboe   pipe: add documen...
177
178
   *	pipe into another.
   */
15fab63e1   Matthew Wilcox   fs: prevent page ...
179
  bool generic_pipe_buf_get(struct pipe_inode_info *pipe, struct pipe_buffer *buf)
70524490e   Jens Axboe   [PATCH] splice: a...
180
  {
cd1adf1b6   Linus Torvalds   Revert "mm/gup: r...
181
  	return try_get_page(buf->page);
70524490e   Jens Axboe   [PATCH] splice: a...
182
  }
51921cb74   Miklos Szeredi   mm: export generi...
183
  EXPORT_SYMBOL(generic_pipe_buf_get);
70524490e   Jens Axboe   [PATCH] splice: a...
184

0845718da   Jens Axboe   pipe: add documen...
185
  /**
6818173bd   Miklos Szeredi   splice: implement...
186
187
188
189
190
191
192
193
194
195
   * generic_pipe_buf_release - put a reference to a &struct pipe_buffer
   * @pipe:	the pipe that the buffer belongs to
   * @buf:	the buffer to put a reference to
   *
   * Description:
   *	This function releases a reference to @buf.
   */
  void generic_pipe_buf_release(struct pipe_inode_info *pipe,
  			      struct pipe_buffer *buf)
  {
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
196
  	put_page(buf->page);
6818173bd   Miklos Szeredi   splice: implement...
197
  }
51921cb74   Miklos Szeredi   mm: export generi...
198
  EXPORT_SYMBOL(generic_pipe_buf_release);
6818173bd   Miklos Szeredi   splice: implement...
199

d4c3cca94   Eric Dumazet   [PATCH] constify ...
200
  static const struct pipe_buf_operations anon_pipe_buf_ops = {
c928f642c   Christoph Hellwig   fs: rename pipe_b...
201
202
203
  	.release	= anon_pipe_buf_release,
  	.try_steal	= anon_pipe_buf_try_steal,
  	.get		= generic_pipe_buf_get,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
204
  };
85190d15f   Linus Torvalds   pipe: don't use '...
205
206
207
208
209
210
211
212
213
  /* Done while waiting without holding the pipe lock - thus the READ_ONCE() */
  static inline bool pipe_readable(const struct pipe_inode_info *pipe)
  {
  	unsigned int head = READ_ONCE(pipe->head);
  	unsigned int tail = READ_ONCE(pipe->tail);
  	unsigned int writers = READ_ONCE(pipe->writers);
  
  	return !pipe_empty(head, tail) || !writers;
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
214
  static ssize_t
fb9096a34   Al Viro   pipe: switch to -...
215
  pipe_read(struct kiocb *iocb, struct iov_iter *to)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
216
  {
fb9096a34   Al Viro   pipe: switch to -...
217
  	size_t total_len = iov_iter_count(to);
ee0b3e671   Badari Pulavarty   [PATCH] Remove re...
218
  	struct file *filp = iocb->ki_filp;
de32ec4cf   Al Viro   pipe: set file->p...
219
  	struct pipe_inode_info *pipe = filp->private_data;
0ddad21d3   Linus Torvalds   pipe: use exclusi...
220
  	bool was_full, wake_next_reader = false;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
221
  	ssize_t ret;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
222

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
223
224
225
  	/* Null read succeeds. */
  	if (unlikely(total_len == 0))
  		return 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
226
  	ret = 0;
ebec73f47   Al Viro   introduce variant...
227
  	__pipe_lock(pipe);
f467a6a66   Linus Torvalds   pipe: fix and cla...
228
229
230
231
232
233
234
235
236
237
  
  	/*
  	 * We only wake up writers if the pipe was full when we started
  	 * reading in order to avoid unnecessary wakeups.
  	 *
  	 * But when we do wake up writers, we do so using a sync wakeup
  	 * (WF_SYNC), because we want them to get going and generate more
  	 * data for us.
  	 */
  	was_full = pipe_full(pipe->head, pipe->tail, pipe->max_usage);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
238
  	for (;;) {
eb38c2e9f   David Howells   watch_queue: Fix ...
239
240
  		/* Read ->head with a barrier vs post_one_notification() */
  		unsigned int head = smp_load_acquire(&pipe->head);
8cefc107c   David Howells   pipe: Use head an...
241
242
  		unsigned int tail = pipe->tail;
  		unsigned int mask = pipe->ring_size - 1;
e7d553d69   David Howells   pipe: Add notific...
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
  #ifdef CONFIG_WATCH_QUEUE
  		if (pipe->note_loss) {
  			struct watch_notification n;
  
  			if (total_len < 8) {
  				if (ret == 0)
  					ret = -ENOBUFS;
  				break;
  			}
  
  			n.type = WATCH_TYPE_META;
  			n.subtype = WATCH_META_LOSS_NOTIFICATION;
  			n.info = watch_sizeof(n);
  			if (copy_to_iter(&n, sizeof(n), to) != sizeof(n)) {
  				if (ret == 0)
  					ret = -EFAULT;
  				break;
  			}
  			ret += sizeof(n);
  			total_len -= sizeof(n);
  			pipe->note_loss = false;
  		}
  #endif
8cefc107c   David Howells   pipe: Use head an...
266
267
  		if (!pipe_empty(head, tail)) {
  			struct pipe_buffer *buf = &pipe->bufs[tail & mask];
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
268
  			size_t chars = buf->len;
637b58c28   Al Viro   switch pipe_read(...
269
270
  			size_t written;
  			int error;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
271

8cfba7638   David Howells   pipe: Allow buffe...
272
273
274
275
276
277
  			if (chars > total_len) {
  				if (buf->flags & PIPE_BUF_FLAG_WHOLE) {
  					if (ret == 0)
  						ret = -ENOBUFS;
  					break;
  				}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
278
  				chars = total_len;
8cfba7638   David Howells   pipe: Allow buffe...
279
  			}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
280

fba597db4   Miklos Szeredi   pipe: add pipe_bu...
281
  			error = pipe_buf_confirm(pipe, buf);
f84d75199   Jens Axboe   [PATCH] pipe: int...
282
  			if (error) {
5274f052e   Jens Axboe   [PATCH] Introduce...
283
  				if (!ret)
e5953cbdf   Nicolas Kaiser   pipe: fix failure...
284
  					ret = error;
5274f052e   Jens Axboe   [PATCH] Introduce...
285
286
  				break;
  			}
f84d75199   Jens Axboe   [PATCH] pipe: int...
287

fb9096a34   Al Viro   pipe: switch to -...
288
  			written = copy_page_to_iter(buf->page, buf->offset, chars, to);
637b58c28   Al Viro   switch pipe_read(...
289
  			if (unlikely(written < chars)) {
341b446bc   Ingo Molnar   [PATCH] another r...
290
  				if (!ret)
637b58c28   Al Viro   switch pipe_read(...
291
  					ret = -EFAULT;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
292
293
294
295
296
  				break;
  			}
  			ret += chars;
  			buf->offset += chars;
  			buf->len -= chars;
9883035ae   Linus Torvalds   pipes: add a "pac...
297
298
299
300
301
302
  
  			/* Was it a packet buffer? Clean up and exit */
  			if (buf->flags & PIPE_BUF_FLAG_PACKET) {
  				total_len = chars;
  				buf->len = 0;
  			}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
303
  			if (!buf->len) {
a779638cf   Miklos Szeredi   pipe: add pipe_bu...
304
  				pipe_buf_release(pipe, buf);
0ddad21d3   Linus Torvalds   pipe: use exclusi...
305
  				spin_lock_irq(&pipe->rd_wait.lock);
e7d553d69   David Howells   pipe: Add notific...
306
307
308
309
  #ifdef CONFIG_WATCH_QUEUE
  				if (buf->flags & PIPE_BUF_FLAG_LOSS)
  					pipe->note_loss = true;
  #endif
8cefc107c   David Howells   pipe: Use head an...
310
311
  				tail++;
  				pipe->tail = tail;
0ddad21d3   Linus Torvalds   pipe: use exclusi...
312
  				spin_unlock_irq(&pipe->rd_wait.lock);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
313
314
315
316
  			}
  			total_len -= chars;
  			if (!total_len)
  				break;	/* common path: read succeeded */
8cefc107c   David Howells   pipe: Use head an...
317
318
  			if (!pipe_empty(head, tail))	/* More to do? */
  				continue;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
319
  		}
8cefc107c   David Howells   pipe: Use head an...
320

923f4f239   Ingo Molnar   [PATCH] pipe.c/fi...
321
  		if (!pipe->writers)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
322
  			break;
a28c8b9db   Linus Torvalds   pipe: remove 'wai...
323
324
325
326
327
  		if (ret)
  			break;
  		if (filp->f_flags & O_NONBLOCK) {
  			ret = -EAGAIN;
  			break;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
328
  		}
85190d15f   Linus Torvalds   pipe: don't use '...
329
  		__pipe_unlock(pipe);
d1c6a2aa0   Linus Torvalds   pipe: simplify si...
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
  
  		/*
  		 * We only get here if we didn't actually read anything.
  		 *
  		 * However, we could have seen (and removed) a zero-sized
  		 * pipe buffer, and might have made space in the buffers
  		 * that way.
  		 *
  		 * You can't make zero-sized pipe buffers by doing an empty
  		 * write (not even in packet mode), but they can happen if
  		 * the writer gets an EFAULT when trying to fill a buffer
  		 * that already got allocated and inserted in the buffer
  		 * array.
  		 *
  		 * So we still need to wake up any pending writers in the
  		 * _very_ unlikely case that the pipe was full, but we got
  		 * no data.
  		 */
fe67f4dd8   Linus Torvalds   pipe: do FASYNC n...
348
  		if (unlikely(was_full))
0ddad21d3   Linus Torvalds   pipe: use exclusi...
349
  			wake_up_interruptible_sync_poll(&pipe->wr_wait, EPOLLOUT | EPOLLWRNORM);
fe67f4dd8   Linus Torvalds   pipe: do FASYNC n...
350
  		kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
d1c6a2aa0   Linus Torvalds   pipe: simplify si...
351
352
353
354
355
356
357
  
  		/*
  		 * But because we didn't read anything, at this point we can
  		 * just return directly with -ERESTARTSYS if we're interrupted,
  		 * since we've done any required wakeups and there's no need
  		 * to mark anything accessed. And we've dropped the lock.
  		 */
0ddad21d3   Linus Torvalds   pipe: use exclusi...
358
  		if (wait_event_interruptible_exclusive(pipe->rd_wait, pipe_readable(pipe)) < 0)
d1c6a2aa0   Linus Torvalds   pipe: simplify si...
359
  			return -ERESTARTSYS;
85190d15f   Linus Torvalds   pipe: don't use '...
360
  		__pipe_lock(pipe);
f467a6a66   Linus Torvalds   pipe: fix and cla...
361
  		was_full = pipe_full(pipe->head, pipe->tail, pipe->max_usage);
0ddad21d3   Linus Torvalds   pipe: use exclusi...
362
  		wake_next_reader = true;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
363
  	}
0ddad21d3   Linus Torvalds   pipe: use exclusi...
364
365
  	if (pipe_empty(pipe->head, pipe->tail))
  		wake_next_reader = false;
ebec73f47   Al Viro   introduce variant...
366
  	__pipe_unlock(pipe);
341b446bc   Ingo Molnar   [PATCH] another r...
367

fe67f4dd8   Linus Torvalds   pipe: do FASYNC n...
368
  	if (was_full)
0ddad21d3   Linus Torvalds   pipe: use exclusi...
369
  		wake_up_interruptible_sync_poll(&pipe->wr_wait, EPOLLOUT | EPOLLWRNORM);
0ddad21d3   Linus Torvalds   pipe: use exclusi...
370
371
  	if (wake_next_reader)
  		wake_up_interruptible_sync_poll(&pipe->rd_wait, EPOLLIN | EPOLLRDNORM);
fe67f4dd8   Linus Torvalds   pipe: do FASYNC n...
372
  	kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
373
374
375
376
  	if (ret > 0)
  		file_accessed(filp);
  	return ret;
  }
9883035ae   Linus Torvalds   pipes: add a "pac...
377
378
379
380
  static inline int is_packetized(struct file *file)
  {
  	return (file->f_flags & O_DIRECT) != 0;
  }
85190d15f   Linus Torvalds   pipe: don't use '...
381
382
383
384
385
386
387
388
389
390
  /* Done while waiting without holding the pipe lock - thus the READ_ONCE() */
  static inline bool pipe_writable(const struct pipe_inode_info *pipe)
  {
  	unsigned int head = READ_ONCE(pipe->head);
  	unsigned int tail = READ_ONCE(pipe->tail);
  	unsigned int max_usage = READ_ONCE(pipe->max_usage);
  
  	return !pipe_full(head, tail, max_usage) ||
  		!READ_ONCE(pipe->readers);
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
391
  static ssize_t
f0d1bec9d   Al Viro   new helper: copy_...
392
  pipe_write(struct kiocb *iocb, struct iov_iter *from)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
393
  {
ee0b3e671   Badari Pulavarty   [PATCH] Remove re...
394
  	struct file *filp = iocb->ki_filp;
de32ec4cf   Al Viro   pipe: set file->p...
395
  	struct pipe_inode_info *pipe = filp->private_data;
8f868d68d   David Howells   pipe: Fix missing...
396
  	unsigned int head;
f0d1bec9d   Al Viro   new helper: copy_...
397
  	ssize_t ret = 0;
f0d1bec9d   Al Viro   new helper: copy_...
398
  	size_t total_len = iov_iter_count(from);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
399
  	ssize_t chars;
1b6b26ae7   Linus Torvalds   pipe: fix and cla...
400
  	bool was_empty = false;
0ddad21d3   Linus Torvalds   pipe: use exclusi...
401
  	bool wake_next_writer = false;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
402

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
403
404
405
  	/* Null write succeeds. */
  	if (unlikely(total_len == 0))
  		return 0;
ebec73f47   Al Viro   introduce variant...
406
  	__pipe_lock(pipe);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
407

923f4f239   Ingo Molnar   [PATCH] pipe.c/fi...
408
  	if (!pipe->readers) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
409
410
411
412
  		send_sig(SIGPIPE, current, 0);
  		ret = -EPIPE;
  		goto out;
  	}
c73be61ce   David Howells   pipe: Add general...
413
414
415
416
417
418
  #ifdef CONFIG_WATCH_QUEUE
  	if (pipe->watch_queue) {
  		ret = -EXDEV;
  		goto out;
  	}
  #endif
1b6b26ae7   Linus Torvalds   pipe: fix and cla...
419
  	/*
1b6b26ae7   Linus Torvalds   pipe: fix and cla...
420
421
422
423
  	 * If it wasn't empty we try to merge new data into
  	 * the last buffer.
  	 *
  	 * That naturally merges small writes, but it also
3a34b13a8   Linus Torvalds   pipe: make pipe w...
424
  	 * page-aligns the rest of the writes for large writes
1b6b26ae7   Linus Torvalds   pipe: fix and cla...
425
426
  	 * spanning multiple pages.
  	 */
8cefc107c   David Howells   pipe: Use head an...
427
  	head = pipe->head;
3b844826b   Linus Torvalds   pipe: avoid unnec...
428
  	was_empty = pipe_empty(head, pipe->tail);
1b6b26ae7   Linus Torvalds   pipe: fix and cla...
429
  	chars = total_len & (PAGE_SIZE-1);
3b844826b   Linus Torvalds   pipe: avoid unnec...
430
  	if (chars && !was_empty) {
8f868d68d   David Howells   pipe: Fix missing...
431
  		unsigned int mask = pipe->ring_size - 1;
8cefc107c   David Howells   pipe: Use head an...
432
  		struct pipe_buffer *buf = &pipe->bufs[(head - 1) & mask];
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
433
  		int offset = buf->offset + buf->len;
341b446bc   Ingo Molnar   [PATCH] another r...
434

f6dd97558   Christoph Hellwig   pipe: merge anon_...
435
436
  		if ((buf->flags & PIPE_BUF_FLAG_CAN_MERGE) &&
  		    offset + chars <= PAGE_SIZE) {
fba597db4   Miklos Szeredi   pipe: add pipe_bu...
437
  			ret = pipe_buf_confirm(pipe, buf);
6ae080699   Eric Biggers   fs/pipe.c: return...
438
  			if (ret)
5274f052e   Jens Axboe   [PATCH] Introduce...
439
  				goto out;
f84d75199   Jens Axboe   [PATCH] pipe: int...
440

f0d1bec9d   Al Viro   new helper: copy_...
441
442
  			ret = copy_page_from_iter(buf->page, offset, chars, from);
  			if (unlikely(ret < chars)) {
6ae080699   Eric Biggers   fs/pipe.c: return...
443
  				ret = -EFAULT;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
444
  				goto out;
f6762b7ad   Jens Axboe   [PATCH] pipe: ena...
445
  			}
1b6b26ae7   Linus Torvalds   pipe: fix and cla...
446

6ae080699   Eric Biggers   fs/pipe.c: return...
447
  			buf->len += ret;
f0d1bec9d   Al Viro   new helper: copy_...
448
  			if (!iov_iter_count(from))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
449
450
451
452
453
  				goto out;
  		}
  	}
  
  	for (;;) {
923f4f239   Ingo Molnar   [PATCH] pipe.c/fi...
454
  		if (!pipe->readers) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
455
  			send_sig(SIGPIPE, current, 0);
341b446bc   Ingo Molnar   [PATCH] another r...
456
457
  			if (!ret)
  				ret = -EPIPE;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
458
459
  			break;
  		}
8cefc107c   David Howells   pipe: Use head an...
460

a194dfe6e   David Howells   pipe: Rearrange s...
461
  		head = pipe->head;
8f868d68d   David Howells   pipe: Fix missing...
462
463
  		if (!pipe_full(head, pipe->tail, pipe->max_usage)) {
  			unsigned int mask = pipe->ring_size - 1;
8cefc107c   David Howells   pipe: Use head an...
464
  			struct pipe_buffer *buf = &pipe->bufs[head & mask];
923f4f239   Ingo Molnar   [PATCH] pipe.c/fi...
465
  			struct page *page = pipe->tmp_page;
f0d1bec9d   Al Viro   new helper: copy_...
466
  			int copied;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
467
468
  
  			if (!page) {
d86133bd3   Vladimir Davydov   pipe: account to ...
469
  				page = alloc_page(GFP_HIGHUSER | __GFP_ACCOUNT);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
470
471
472
473
  				if (unlikely(!page)) {
  					ret = ret ? : -ENOMEM;
  					break;
  				}
923f4f239   Ingo Molnar   [PATCH] pipe.c/fi...
474
  				pipe->tmp_page = page;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
475
  			}
a194dfe6e   David Howells   pipe: Rearrange s...
476
477
478
479
480
481
  
  			/* Allocate a slot in the ring in advance and attach an
  			 * empty buffer.  If we fault or otherwise fail to use
  			 * it, either the reader will consume it or it'll still
  			 * be there for the next write.
  			 */
0ddad21d3   Linus Torvalds   pipe: use exclusi...
482
  			spin_lock_irq(&pipe->rd_wait.lock);
a194dfe6e   David Howells   pipe: Rearrange s...
483
484
  
  			head = pipe->head;
8f868d68d   David Howells   pipe: Fix missing...
485
  			if (pipe_full(head, pipe->tail, pipe->max_usage)) {
0ddad21d3   Linus Torvalds   pipe: use exclusi...
486
  				spin_unlock_irq(&pipe->rd_wait.lock);
8df441294   David Howells   pipe: Check for r...
487
488
  				continue;
  			}
a194dfe6e   David Howells   pipe: Rearrange s...
489
  			pipe->head = head + 1;
0ddad21d3   Linus Torvalds   pipe: use exclusi...
490
  			spin_unlock_irq(&pipe->rd_wait.lock);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
491
492
  
  			/* Insert it into the buffer array */
a194dfe6e   David Howells   pipe: Rearrange s...
493
  			buf = &pipe->bufs[head & mask];
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
494
495
496
  			buf->page = page;
  			buf->ops = &anon_pipe_buf_ops;
  			buf->offset = 0;
a194dfe6e   David Howells   pipe: Rearrange s...
497
  			buf->len = 0;
f6dd97558   Christoph Hellwig   pipe: merge anon_...
498
  			if (is_packetized(filp))
9883035ae   Linus Torvalds   pipes: add a "pac...
499
  				buf->flags = PIPE_BUF_FLAG_PACKET;
f6dd97558   Christoph Hellwig   pipe: merge anon_...
500
501
  			else
  				buf->flags = PIPE_BUF_FLAG_CAN_MERGE;
923f4f239   Ingo Molnar   [PATCH] pipe.c/fi...
502
  			pipe->tmp_page = NULL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
503

a194dfe6e   David Howells   pipe: Rearrange s...
504
505
506
507
508
509
510
511
512
  			copied = copy_page_from_iter(page, 0, PAGE_SIZE, from);
  			if (unlikely(copied < PAGE_SIZE && iov_iter_count(from))) {
  				if (!ret)
  					ret = -EFAULT;
  				break;
  			}
  			ret += copied;
  			buf->offset = 0;
  			buf->len = copied;
f0d1bec9d   Al Viro   new helper: copy_...
513
  			if (!iov_iter_count(from))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
514
515
  				break;
  		}
8cefc107c   David Howells   pipe: Use head an...
516

8f868d68d   David Howells   pipe: Fix missing...
517
  		if (!pipe_full(head, pipe->tail, pipe->max_usage))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
518
  			continue;
8cefc107c   David Howells   pipe: Use head an...
519
520
  
  		/* Wait for buffer space to become available. */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
521
  		if (filp->f_flags & O_NONBLOCK) {
341b446bc   Ingo Molnar   [PATCH] another r...
522
523
  			if (!ret)
  				ret = -EAGAIN;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
524
525
526
  			break;
  		}
  		if (signal_pending(current)) {
341b446bc   Ingo Molnar   [PATCH] another r...
527
528
  			if (!ret)
  				ret = -ERESTARTSYS;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
529
530
  			break;
  		}
1b6b26ae7   Linus Torvalds   pipe: fix and cla...
531
532
533
534
535
536
537
  
  		/*
  		 * We're going to release the pipe lock and wait for more
  		 * space. We wake up any readers if necessary, and then
  		 * after waiting we need to re-check whether the pipe
  		 * become empty while we dropped the lock.
  		 */
85190d15f   Linus Torvalds   pipe: don't use '...
538
  		__pipe_unlock(pipe);
fe67f4dd8   Linus Torvalds   pipe: do FASYNC n...
539
  		if (was_empty)
0ddad21d3   Linus Torvalds   pipe: use exclusi...
540
  			wake_up_interruptible_sync_poll(&pipe->rd_wait, EPOLLIN | EPOLLRDNORM);
fe67f4dd8   Linus Torvalds   pipe: do FASYNC n...
541
  		kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
0ddad21d3   Linus Torvalds   pipe: use exclusi...
542
  		wait_event_interruptible_exclusive(pipe->wr_wait, pipe_writable(pipe));
85190d15f   Linus Torvalds   pipe: don't use '...
543
  		__pipe_lock(pipe);
0dd1e3773   Jan Stancek   pipe: fix empty p...
544
  		was_empty = pipe_empty(pipe->head, pipe->tail);
0ddad21d3   Linus Torvalds   pipe: use exclusi...
545
  		wake_next_writer = true;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
546
547
  	}
  out:
0ddad21d3   Linus Torvalds   pipe: use exclusi...
548
549
  	if (pipe_full(pipe->head, pipe->tail, pipe->max_usage))
  		wake_next_writer = false;
ebec73f47   Al Viro   introduce variant...
550
  	__pipe_unlock(pipe);
1b6b26ae7   Linus Torvalds   pipe: fix and cla...
551
552
553
554
555
556
557
558
559
  
  	/*
  	 * If we do do a wakeup event, we do a 'sync' wakeup, because we
  	 * want the reader to start processing things asap, rather than
  	 * leave the data pending.
  	 *
  	 * This is particularly important for small writes, because of
  	 * how (for example) the GNU make jobserver uses small writes to
  	 * wake up pending jobs
3b844826b   Linus Torvalds   pipe: avoid unnec...
560
561
562
  	 *
  	 * Epoll nonsensically wants a wakeup whether the pipe
  	 * was already empty or not.
1b6b26ae7   Linus Torvalds   pipe: fix and cla...
563
  	 */
fe67f4dd8   Linus Torvalds   pipe: do FASYNC n...
564
  	if (was_empty || pipe->poll_usage)
0ddad21d3   Linus Torvalds   pipe: use exclusi...
565
  		wake_up_interruptible_sync_poll(&pipe->rd_wait, EPOLLIN | EPOLLRDNORM);
fe67f4dd8   Linus Torvalds   pipe: do FASYNC n...
566
  	kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
0ddad21d3   Linus Torvalds   pipe: use exclusi...
567
568
  	if (wake_next_writer)
  		wake_up_interruptible_sync_poll(&pipe->wr_wait, EPOLLOUT | EPOLLWRNORM);
7e775f46a   Dmitry Monakhov   fs/pipe.c: skip f...
569
  	if (ret > 0 && sb_start_write_trylock(file_inode(filp)->i_sb)) {
c3b2da314   Josef Bacik   fs: introduce ino...
570
571
572
  		int err = file_update_time(filp);
  		if (err)
  			ret = err;
7e775f46a   Dmitry Monakhov   fs/pipe.c: skip f...
573
  		sb_end_write(file_inode(filp)->i_sb);
c3b2da314   Josef Bacik   fs: introduce ino...
574
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
575
576
  	return ret;
  }
d59d0b1b8   Andi Kleen   BKL-Removal: conv...
577
  static long pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
578
  {
de32ec4cf   Al Viro   pipe: set file->p...
579
  	struct pipe_inode_info *pipe = filp->private_data;
8cefc107c   David Howells   pipe: Use head an...
580
  	int count, head, tail, mask;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
581
582
  
  	switch (cmd) {
c73be61ce   David Howells   pipe: Add general...
583
584
585
586
587
588
  	case FIONREAD:
  		__pipe_lock(pipe);
  		count = 0;
  		head = pipe->head;
  		tail = pipe->tail;
  		mask = pipe->ring_size - 1;
8cefc107c   David Howells   pipe: Use head an...
589

c73be61ce   David Howells   pipe: Add general...
590
591
592
593
594
  		while (tail != head) {
  			count += pipe->bufs[tail & mask].len;
  			tail++;
  		}
  		__pipe_unlock(pipe);
923f4f239   Ingo Molnar   [PATCH] pipe.c/fi...
595

c73be61ce   David Howells   pipe: Add general...
596
  		return put_user(count, (int __user *)arg);
923f4f239   Ingo Molnar   [PATCH] pipe.c/fi...
597

c73be61ce   David Howells   pipe: Add general...
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
  #ifdef CONFIG_WATCH_QUEUE
  	case IOC_WATCH_QUEUE_SET_SIZE: {
  		int ret;
  		__pipe_lock(pipe);
  		ret = watch_queue_set_size(pipe, arg);
  		__pipe_unlock(pipe);
  		return ret;
  	}
  
  	case IOC_WATCH_QUEUE_SET_FILTER:
  		return watch_queue_set_filter(
  			pipe, (struct watch_notification_filter __user *)arg);
  #endif
  
  	default:
  		return -ENOIOCTLCMD;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
614
615
  	}
  }
dd67081b3   Christoph Hellwig   pipe: convert to ...
616
  /* No kernel lock held - fine */
a11e1d432   Linus Torvalds   Revert changes to...
617
618
  static __poll_t
  pipe_poll(struct file *filp, poll_table *wait)
dd67081b3   Christoph Hellwig   pipe: convert to ...
619
  {
a11e1d432   Linus Torvalds   Revert changes to...
620
  	__poll_t mask;
dd67081b3   Christoph Hellwig   pipe: convert to ...
621
  	struct pipe_inode_info *pipe = filp->private_data;
ad910e36d   Linus Torvalds   pipe: fix poll/se...
622
  	unsigned int head, tail;
a11e1d432   Linus Torvalds   Revert changes to...
623

3b844826b   Linus Torvalds   pipe: avoid unnec...
624
  	/* Epoll has some historical nasty semantics, this enables them */
e6acf868f   Kuniyuki Iwashima   pipe: make poll_u...
625
  	WRITE_ONCE(pipe->poll_usage, true);
3b844826b   Linus Torvalds   pipe: avoid unnec...
626

ad910e36d   Linus Torvalds   pipe: fix poll/se...
627
  	/*
0ddad21d3   Linus Torvalds   pipe: use exclusi...
628
  	 * Reading pipe state only -- no need for acquiring the semaphore.
ad910e36d   Linus Torvalds   pipe: fix poll/se...
629
630
631
632
  	 *
  	 * But because this is racy, the code has to add the
  	 * entry to the poll table _first_ ..
  	 */
0ddad21d3   Linus Torvalds   pipe: use exclusi...
633
634
635
636
  	if (filp->f_mode & FMODE_READ)
  		poll_wait(filp, &pipe->rd_wait, wait);
  	if (filp->f_mode & FMODE_WRITE)
  		poll_wait(filp, &pipe->wr_wait, wait);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
637

ad910e36d   Linus Torvalds   pipe: fix poll/se...
638
639
640
641
642
643
644
  	/*
  	 * .. and only then can you do the racy tests. That way,
  	 * if something changes and you got it wrong, the poll
  	 * table entry will wake you up and fix it.
  	 */
  	head = READ_ONCE(pipe->head);
  	tail = READ_ONCE(pipe->tail);
a11e1d432   Linus Torvalds   Revert changes to...
645
  	mask = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
646
  	if (filp->f_mode & FMODE_READ) {
8cefc107c   David Howells   pipe: Use head an...
647
648
  		if (!pipe_empty(head, tail))
  			mask |= EPOLLIN | EPOLLRDNORM;
923f4f239   Ingo Molnar   [PATCH] pipe.c/fi...
649
  		if (!pipe->writers && filp->f_version != pipe->w_counter)
a9a08845e   Linus Torvalds   vfs: do bulk POLL...
650
  			mask |= EPOLLHUP;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
651
652
653
  	}
  
  	if (filp->f_mode & FMODE_WRITE) {
6718b6f85   David Howells   pipe: Allow pipes...
654
  		if (!pipe_full(head, tail, pipe->max_usage))
8cefc107c   David Howells   pipe: Use head an...
655
  			mask |= EPOLLOUT | EPOLLWRNORM;
5e5d7a222   Pekka Enberg   [PATCH] pipe: rem...
656
  		/*
a9a08845e   Linus Torvalds   vfs: do bulk POLL...
657
  		 * Most Unices do not set EPOLLERR for FIFOs but on Linux they
5e5d7a222   Pekka Enberg   [PATCH] pipe: rem...
658
659
  		 * behave exactly like pipes for poll().
  		 */
923f4f239   Ingo Molnar   [PATCH] pipe.c/fi...
660
  		if (!pipe->readers)
a9a08845e   Linus Torvalds   vfs: do bulk POLL...
661
  			mask |= EPOLLERR;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
662
663
664
665
  	}
  
  	return mask;
  }
b0d8d2292   Linus Torvalds   vfs: fix subtle u...
666
667
668
669
670
671
672
673
674
675
676
677
678
679
  static void put_pipe_info(struct inode *inode, struct pipe_inode_info *pipe)
  {
  	int kill = 0;
  
  	spin_lock(&inode->i_lock);
  	if (!--pipe->files) {
  		inode->i_pipe = NULL;
  		kill = 1;
  	}
  	spin_unlock(&inode->i_lock);
  
  	if (kill)
  		free_pipe_info(pipe);
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
680
  static int
599a0ac14   Al Viro   pipe: fold file_o...
681
  pipe_release(struct inode *inode, struct file *file)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
682
  {
b0d8d2292   Linus Torvalds   vfs: fix subtle u...
683
  	struct pipe_inode_info *pipe = file->private_data;
923f4f239   Ingo Molnar   [PATCH] pipe.c/fi...
684

ebec73f47   Al Viro   introduce variant...
685
  	__pipe_lock(pipe);
599a0ac14   Al Viro   pipe: fold file_o...
686
687
688
689
  	if (file->f_mode & FMODE_READ)
  		pipe->readers--;
  	if (file->f_mode & FMODE_WRITE)
  		pipe->writers--;
341b446bc   Ingo Molnar   [PATCH] another r...
690

6551d5c56   Linus Torvalds   pipe: make sure t...
691
692
693
694
  	/* Was that the last reader or writer, but not the other side? */
  	if (!pipe->readers != !pipe->writers) {
  		wake_up_interruptible_all(&pipe->rd_wait);
  		wake_up_interruptible_all(&pipe->wr_wait);
923f4f239   Ingo Molnar   [PATCH] pipe.c/fi...
695
696
  		kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
  		kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
697
  	}
ebec73f47   Al Viro   introduce variant...
698
  	__pipe_unlock(pipe);
ba5bb1473   Al Viro   pipe: take alloca...
699

b0d8d2292   Linus Torvalds   vfs: fix subtle u...
700
  	put_pipe_info(inode, pipe);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
701
702
703
704
  	return 0;
  }
  
  static int
599a0ac14   Al Viro   pipe: fold file_o...
705
  pipe_fasync(int fd, struct file *filp, int on)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
706
  {
de32ec4cf   Al Viro   pipe: set file->p...
707
  	struct pipe_inode_info *pipe = filp->private_data;
599a0ac14   Al Viro   pipe: fold file_o...
708
  	int retval = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
709

ebec73f47   Al Viro   introduce variant...
710
  	__pipe_lock(pipe);
599a0ac14   Al Viro   pipe: fold file_o...
711
712
713
  	if (filp->f_mode & FMODE_READ)
  		retval = fasync_helper(fd, filp, on, &pipe->fasync_readers);
  	if ((filp->f_mode & FMODE_WRITE) && retval >= 0) {
341b446bc   Ingo Molnar   [PATCH] another r...
714
  		retval = fasync_helper(fd, filp, on, &pipe->fasync_writers);
599a0ac14   Al Viro   pipe: fold file_o...
715
716
  		if (retval < 0 && (filp->f_mode & FMODE_READ))
  			/* this can happen only if on == T */
e5bc49ba7   Oleg Nesterov   pipe_rdwr_fasync:...
717
718
  			fasync_helper(-1, filp, 0, &pipe->fasync_readers);
  	}
ebec73f47   Al Viro   introduce variant...
719
  	__pipe_unlock(pipe);
60aa49243   Jonathan Corbet   Rationalize fasyn...
720
  	return retval;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
721
  }
c73be61ce   David Howells   pipe: Add general...
722
723
  unsigned long account_pipe_buffers(struct user_struct *user,
  				   unsigned long old, unsigned long new)
759c01142   Willy Tarreau   pipe: limit the p...
724
  {
9c87bcf0a   Michael Kerrisk (man-pages)   pipe: make accoun...
725
  	return atomic_long_add_return(new - old, &user->pipe_bufs);
759c01142   Willy Tarreau   pipe: limit the p...
726
  }
c73be61ce   David Howells   pipe: Add general...
727
  bool too_many_pipe_buffers_soft(unsigned long user_bufs)
759c01142   Willy Tarreau   pipe: limit the p...
728
  {
f73407618   Eric Biggers   pipe: read buffer...
729
730
731
  	unsigned long soft_limit = READ_ONCE(pipe_user_pages_soft);
  
  	return soft_limit && user_bufs > soft_limit;
759c01142   Willy Tarreau   pipe: limit the p...
732
  }
c73be61ce   David Howells   pipe: Add general...
733
  bool too_many_pipe_buffers_hard(unsigned long user_bufs)
759c01142   Willy Tarreau   pipe: limit the p...
734
  {
f73407618   Eric Biggers   pipe: read buffer...
735
736
737
  	unsigned long hard_limit = READ_ONCE(pipe_user_pages_hard);
  
  	return hard_limit && user_bufs > hard_limit;
759c01142   Willy Tarreau   pipe: limit the p...
738
  }
c73be61ce   David Howells   pipe: Add general...
739
  bool pipe_is_unprivileged_user(void)
85c2dd547   Eric Biggers   pipe: actually al...
740
741
742
  {
  	return !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN);
  }
7bee130e2   Al Viro   get rid of alloc_...
743
  struct pipe_inode_info *alloc_pipe_info(void)
3a326a2ce   Ingo Molnar   [PATCH] introduce...
744
  {
923f4f239   Ingo Molnar   [PATCH] pipe.c/fi...
745
  	struct pipe_inode_info *pipe;
09b4d1990   Michael Kerrisk (man-pages)   pipe: simplify lo...
746
747
  	unsigned long pipe_bufs = PIPE_DEF_BUFFERS;
  	struct user_struct *user = get_current_user();
9c87bcf0a   Michael Kerrisk (man-pages)   pipe: make accoun...
748
  	unsigned long user_bufs;
f73407618   Eric Biggers   pipe: read buffer...
749
  	unsigned int max_size = READ_ONCE(pipe_max_size);
3a326a2ce   Ingo Molnar   [PATCH] introduce...
750

d86133bd3   Vladimir Davydov   pipe: account to ...
751
  	pipe = kzalloc(sizeof(struct pipe_inode_info), GFP_KERNEL_ACCOUNT);
09b4d1990   Michael Kerrisk (man-pages)   pipe: simplify lo...
752
753
  	if (pipe == NULL)
  		goto out_free_uid;
f73407618   Eric Biggers   pipe: read buffer...
754
755
  	if (pipe_bufs * PAGE_SIZE > max_size && !capable(CAP_SYS_RESOURCE))
  		pipe_bufs = max_size >> PAGE_SHIFT;
086e774a5   Michael Kerrisk (man-pages)   pipe: cap initial...
756

9c87bcf0a   Michael Kerrisk (man-pages)   pipe: make accoun...
757
  	user_bufs = account_pipe_buffers(user, 0, pipe_bufs);
a005ca0e6   Michael Kerrisk (man-pages)   pipe: fix limit c...
758

c73be61ce   David Howells   pipe: Add general...
759
  	if (too_many_pipe_buffers_soft(user_bufs) && pipe_is_unprivileged_user()) {
46c4c9d1b   Alex Xu (Hello71)   pipe: increase mi...
760
761
  		user_bufs = account_pipe_buffers(user, pipe_bufs, PIPE_MIN_DEF_BUFFERS);
  		pipe_bufs = PIPE_MIN_DEF_BUFFERS;
09b4d1990   Michael Kerrisk (man-pages)   pipe: simplify lo...
762
  	}
759c01142   Willy Tarreau   pipe: limit the p...
763

c73be61ce   David Howells   pipe: Add general...
764
  	if (too_many_pipe_buffers_hard(user_bufs) && pipe_is_unprivileged_user())
a005ca0e6   Michael Kerrisk (man-pages)   pipe: fix limit c...
765
766
767
768
  		goto out_revert_acct;
  
  	pipe->bufs = kcalloc(pipe_bufs, sizeof(struct pipe_buffer),
  			     GFP_KERNEL_ACCOUNT);
09b4d1990   Michael Kerrisk (man-pages)   pipe: simplify lo...
769
  	if (pipe->bufs) {
0ddad21d3   Linus Torvalds   pipe: use exclusi...
770
771
  		init_waitqueue_head(&pipe->rd_wait);
  		init_waitqueue_head(&pipe->wr_wait);
09b4d1990   Michael Kerrisk (man-pages)   pipe: simplify lo...
772
  		pipe->r_counter = pipe->w_counter = 1;
6718b6f85   David Howells   pipe: Allow pipes...
773
  		pipe->max_usage = pipe_bufs;
8cefc107c   David Howells   pipe: Use head an...
774
  		pipe->ring_size = pipe_bufs;
c73be61ce   David Howells   pipe: Add general...
775
  		pipe->nr_accounted = pipe_bufs;
09b4d1990   Michael Kerrisk (man-pages)   pipe: simplify lo...
776
  		pipe->user = user;
09b4d1990   Michael Kerrisk (man-pages)   pipe: simplify lo...
777
778
  		mutex_init(&pipe->mutex);
  		return pipe;
3a326a2ce   Ingo Molnar   [PATCH] introduce...
779
  	}
a005ca0e6   Michael Kerrisk (man-pages)   pipe: fix limit c...
780
  out_revert_acct:
9c87bcf0a   Michael Kerrisk (man-pages)   pipe: make accoun...
781
  	(void) account_pipe_buffers(user, pipe_bufs, 0);
09b4d1990   Michael Kerrisk (man-pages)   pipe: simplify lo...
782
783
784
  	kfree(pipe);
  out_free_uid:
  	free_uid(user);
35f3d14db   Jens Axboe   pipe: add support...
785
  	return NULL;
3a326a2ce   Ingo Molnar   [PATCH] introduce...
786
  }
4b8a8f1e4   Al Viro   get rid of the la...
787
  void free_pipe_info(struct pipe_inode_info *pipe)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
788
789
  {
  	int i;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
790

c73be61ce   David Howells   pipe: Add general...
791
  #ifdef CONFIG_WATCH_QUEUE
8275b6699   David Howells   watch_queue, pipe...
792
  	if (pipe->watch_queue)
c73be61ce   David Howells   pipe: Add general...
793
  		watch_queue_clear(pipe->watch_queue);
c73be61ce   David Howells   pipe: Add general...
794
795
796
  #endif
  
  	(void) account_pipe_buffers(pipe->user, pipe->nr_accounted, 0);
759c01142   Willy Tarreau   pipe: limit the p...
797
  	free_uid(pipe->user);
8cefc107c   David Howells   pipe: Use head an...
798
  	for (i = 0; i < pipe->ring_size; i++) {
923f4f239   Ingo Molnar   [PATCH] pipe.c/fi...
799
  		struct pipe_buffer *buf = pipe->bufs + i;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
800
  		if (buf->ops)
a779638cf   Miklos Szeredi   pipe: add pipe_bu...
801
  			pipe_buf_release(pipe, buf);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
802
  	}
8275b6699   David Howells   watch_queue, pipe...
803
804
805
806
  #ifdef CONFIG_WATCH_QUEUE
  	if (pipe->watch_queue)
  		put_watch_queue(pipe->watch_queue);
  #endif
923f4f239   Ingo Molnar   [PATCH] pipe.c/fi...
807
808
  	if (pipe->tmp_page)
  		__free_page(pipe->tmp_page);
35f3d14db   Jens Axboe   pipe: add support...
809
  	kfree(pipe->bufs);
923f4f239   Ingo Molnar   [PATCH] pipe.c/fi...
810
  	kfree(pipe);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
811
  }
fa3536cc1   Eric Dumazet   [PATCH] Use __rea...
812
  static struct vfsmount *pipe_mnt __read_mostly;
341b446bc   Ingo Molnar   [PATCH] another r...
813

c23fbb6bc   Eric Dumazet   VFS: delay the de...
814
815
816
817
818
819
  /*
   * pipefs_dname() is called from d_path().
   */
  static char *pipefs_dname(struct dentry *dentry, char *buffer, int buflen)
  {
  	return dynamic_dname(dentry, buffer, buflen, "pipe:[%lu]",
75c3cfa85   David Howells   VFS: assorted wei...
820
  				d_inode(dentry)->i_ino);
c23fbb6bc   Eric Dumazet   VFS: delay the de...
821
  }
3ba13d179   Al Viro   constify dentry_o...
822
  static const struct dentry_operations pipefs_dentry_operations = {
c23fbb6bc   Eric Dumazet   VFS: delay the de...
823
  	.d_dname	= pipefs_dname,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
824
825
826
827
  };
  
  static struct inode * get_pipe_inode(void)
  {
a209dfc7b   Eric Dumazet   vfs: dont chain p...
828
  	struct inode *inode = new_inode_pseudo(pipe_mnt->mnt_sb);
923f4f239   Ingo Molnar   [PATCH] pipe.c/fi...
829
  	struct pipe_inode_info *pipe;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
830
831
832
  
  	if (!inode)
  		goto fail_inode;
85fe4025c   Christoph Hellwig   fs: do not assign...
833
  	inode->i_ino = get_next_ino();
7bee130e2   Al Viro   get rid of alloc_...
834
  	pipe = alloc_pipe_info();
923f4f239   Ingo Molnar   [PATCH] pipe.c/fi...
835
  	if (!pipe)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
836
  		goto fail_iput;
3a326a2ce   Ingo Molnar   [PATCH] introduce...
837

ba5bb1473   Al Viro   pipe: take alloca...
838
839
  	inode->i_pipe = pipe;
  	pipe->files = 2;
923f4f239   Ingo Molnar   [PATCH] pipe.c/fi...
840
  	pipe->readers = pipe->writers = 1;
599a0ac14   Al Viro   pipe: fold file_o...
841
  	inode->i_fop = &pipefifo_fops;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
842
843
844
845
846
847
848
849
850
  
  	/*
  	 * Mark the inode dirty from the very beginning,
  	 * that way it will never be moved to the dirty
  	 * list because "mark_inode_dirty()" will think
  	 * that it already _is_ on the dirty list.
  	 */
  	inode->i_state = I_DIRTY;
  	inode->i_mode = S_IFIFO | S_IRUSR | S_IWUSR;
da9592ede   David Howells   CRED: Wrap task c...
851
852
  	inode->i_uid = current_fsuid();
  	inode->i_gid = current_fsgid();
078cd8279   Deepa Dinamani   fs: Replace CURRE...
853
  	inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
923f4f239   Ingo Molnar   [PATCH] pipe.c/fi...
854

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
855
856
857
858
  	return inode;
  
  fail_iput:
  	iput(inode);
341b446bc   Ingo Molnar   [PATCH] another r...
859

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
860
861
862
  fail_inode:
  	return NULL;
  }
e4fad8e5d   Al Viro   consolidate pipe ...
863
  int create_pipe_files(struct file **res, int flags)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
864
  {
e4fad8e5d   Al Viro   consolidate pipe ...
865
  	struct inode *inode = get_pipe_inode();
d6cbd281d   Andi Kleen   [PATCH] Some clea...
866
  	struct file *f;
8a018eb55   Qian Cai   pipe: Fix memory ...
867
  	int error;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
868

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
869
  	if (!inode)
e4fad8e5d   Al Viro   consolidate pipe ...
870
  		return -ENFILE;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
871

c73be61ce   David Howells   pipe: Add general...
872
  	if (flags & O_NOTIFICATION_PIPE) {
8a018eb55   Qian Cai   pipe: Fix memory ...
873
874
875
  		error = watch_queue_init(inode->i_pipe);
  		if (error) {
  			free_pipe_info(inode->i_pipe);
c73be61ce   David Howells   pipe: Add general...
876
  			iput(inode);
8a018eb55   Qian Cai   pipe: Fix memory ...
877
  			return error;
c73be61ce   David Howells   pipe: Add general...
878
  		}
c73be61ce   David Howells   pipe: Add general...
879
  	}
152b6372c   Al Viro   create_pipe_files...
880
881
882
  	f = alloc_file_pseudo(inode, pipe_mnt, "",
  				O_WRONLY | (flags & (O_NONBLOCK | O_DIRECT)),
  				&pipefifo_fops);
e9bb1f9b1   Eric Biggers   fs/pipe.c: preser...
883
  	if (IS_ERR(f)) {
152b6372c   Al Viro   create_pipe_files...
884
885
886
  		free_pipe_info(inode->i_pipe);
  		iput(inode);
  		return PTR_ERR(f);
e9bb1f9b1   Eric Biggers   fs/pipe.c: preser...
887
  	}
341b446bc   Ingo Molnar   [PATCH] another r...
888

de32ec4cf   Al Viro   pipe: set file->p...
889
  	f->private_data = inode->i_pipe;
d6cbd281d   Andi Kleen   [PATCH] Some clea...
890

183266f26   Al Viro   new helper: alloc...
891
892
  	res[0] = alloc_file_clone(f, O_RDONLY | (flags & O_NONBLOCK),
  				  &pipefifo_fops);
e9bb1f9b1   Eric Biggers   fs/pipe.c: preser...
893
  	if (IS_ERR(res[0])) {
b10a4a9f7   Al Viro   create_pipe_files...
894
895
896
  		put_pipe_info(inode, inode->i_pipe);
  		fput(f);
  		return PTR_ERR(res[0]);
e9bb1f9b1   Eric Biggers   fs/pipe.c: preser...
897
  	}
de32ec4cf   Al Viro   pipe: set file->p...
898
  	res[0]->private_data = inode->i_pipe;
e4fad8e5d   Al Viro   consolidate pipe ...
899
  	res[1] = f;
d8e464ecc   Linus Torvalds   vfs: mark pipes a...
900
901
  	stream_open(inode, res[0]);
  	stream_open(inode, res[1]);
e4fad8e5d   Al Viro   consolidate pipe ...
902
  	return 0;
d6cbd281d   Andi Kleen   [PATCH] Some clea...
903
  }
5b249b1b0   Al Viro   pipe(2) - race-fr...
904
  static int __do_pipe_flags(int *fd, struct file **files, int flags)
d6cbd281d   Andi Kleen   [PATCH] Some clea...
905
  {
d6cbd281d   Andi Kleen   [PATCH] Some clea...
906
907
  	int error;
  	int fdw, fdr;
c73be61ce   David Howells   pipe: Add general...
908
  	if (flags & ~(O_CLOEXEC | O_NONBLOCK | O_DIRECT | O_NOTIFICATION_PIPE))
ed8cae8ba   Ulrich Drepper   flag parameters: ...
909
  		return -EINVAL;
e4fad8e5d   Al Viro   consolidate pipe ...
910
911
912
  	error = create_pipe_files(files, flags);
  	if (error)
  		return error;
d6cbd281d   Andi Kleen   [PATCH] Some clea...
913

ed8cae8ba   Ulrich Drepper   flag parameters: ...
914
  	error = get_unused_fd_flags(flags);
d6cbd281d   Andi Kleen   [PATCH] Some clea...
915
916
917
  	if (error < 0)
  		goto err_read_pipe;
  	fdr = error;
ed8cae8ba   Ulrich Drepper   flag parameters: ...
918
  	error = get_unused_fd_flags(flags);
d6cbd281d   Andi Kleen   [PATCH] Some clea...
919
920
921
  	if (error < 0)
  		goto err_fdr;
  	fdw = error;
157cf649a   Al Viro   sanitize audit_fd...
922
  	audit_fd_pair(fdr, fdw);
d6cbd281d   Andi Kleen   [PATCH] Some clea...
923
924
  	fd[0] = fdr;
  	fd[1] = fdw;
d6cbd281d   Andi Kleen   [PATCH] Some clea...
925
926
927
928
929
  	return 0;
  
   err_fdr:
  	put_unused_fd(fdr);
   err_read_pipe:
e4fad8e5d   Al Viro   consolidate pipe ...
930
931
  	fput(files[0]);
  	fput(files[1]);
d6cbd281d   Andi Kleen   [PATCH] Some clea...
932
  	return error;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
933
  }
5b249b1b0   Al Viro   pipe(2) - race-fr...
934
935
936
937
938
939
940
941
942
943
  int do_pipe_flags(int *fd, int flags)
  {
  	struct file *files[2];
  	int error = __do_pipe_flags(fd, files, flags);
  	if (!error) {
  		fd_install(fd[0], files[0]);
  		fd_install(fd[1], files[1]);
  	}
  	return error;
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
944
  /*
d35c7b0e5   Ulrich Drepper   unified (weak) sy...
945
946
947
   * sys_pipe() is the normal C calling standard for creating
   * a pipe. It's not the way Unix traditionally does this, though.
   */
0a216dd1c   Dominik Brodowski   fs: add do_pipe2(...
948
  static int do_pipe2(int __user *fildes, int flags)
d35c7b0e5   Ulrich Drepper   unified (weak) sy...
949
  {
5b249b1b0   Al Viro   pipe(2) - race-fr...
950
  	struct file *files[2];
d35c7b0e5   Ulrich Drepper   unified (weak) sy...
951
952
  	int fd[2];
  	int error;
5b249b1b0   Al Viro   pipe(2) - race-fr...
953
  	error = __do_pipe_flags(fd, files, flags);
d35c7b0e5   Ulrich Drepper   unified (weak) sy...
954
  	if (!error) {
5b249b1b0   Al Viro   pipe(2) - race-fr...
955
956
957
958
959
  		if (unlikely(copy_to_user(fildes, fd, sizeof(fd)))) {
  			fput(files[0]);
  			fput(files[1]);
  			put_unused_fd(fd[0]);
  			put_unused_fd(fd[1]);
d35c7b0e5   Ulrich Drepper   unified (weak) sy...
960
  			error = -EFAULT;
5b249b1b0   Al Viro   pipe(2) - race-fr...
961
962
963
  		} else {
  			fd_install(fd[0], files[0]);
  			fd_install(fd[1], files[1]);
ba719baea   Ulrich Drepper   sys_pipe(): fix f...
964
  		}
d35c7b0e5   Ulrich Drepper   unified (weak) sy...
965
966
967
  	}
  	return error;
  }
0a216dd1c   Dominik Brodowski   fs: add do_pipe2(...
968
969
970
971
  SYSCALL_DEFINE2(pipe2, int __user *, fildes, int, flags)
  {
  	return do_pipe2(fildes, flags);
  }
2b6642199   Heiko Carstens   [CVE-2009-0029] S...
972
  SYSCALL_DEFINE1(pipe, int __user *, fildes)
ed8cae8ba   Ulrich Drepper   flag parameters: ...
973
  {
0a216dd1c   Dominik Brodowski   fs: add do_pipe2(...
974
  	return do_pipe2(fildes, 0);
ed8cae8ba   Ulrich Drepper   flag parameters: ...
975
  }
472e5b056   Linus Torvalds   pipe: remove pipe...
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
  /*
   * This is the stupid "wait for pipe to be readable or writable"
   * model.
   *
   * See pipe_read/write() for the proper kind of exclusive wait,
   * but that requires that we wake up any other readers/writers
   * if we then do not end up reading everything (ie the whole
   * "wake_next_reader/writer" logic in pipe_read/write()).
   */
  void pipe_wait_readable(struct pipe_inode_info *pipe)
  {
  	pipe_unlock(pipe);
  	wait_event_interruptible(pipe->rd_wait, pipe_readable(pipe));
  	pipe_lock(pipe);
  }
  
  void pipe_wait_writable(struct pipe_inode_info *pipe)
  {
  	pipe_unlock(pipe);
  	wait_event_interruptible(pipe->wr_wait, pipe_writable(pipe));
  	pipe_lock(pipe);
  }
  
  /*
   * This depends on both the wait (here) and the wakeup (wake_up_partner)
   * holding the pipe lock, so "*cnt" is stable and we know a wakeup cannot
   * race with the count check and waitqueue prep.
   *
   * Normally in order to avoid races, you'd do the prepare_to_wait() first,
   * then check the condition you're waiting for, and only then sleep. But
   * because of the pipe lock, we can check the condition before being on
   * the wait queue.
   *
   * We use the 'rd_wait' waitqueue for pipe partner waiting.
   */
fc7478a2b   Al Viro   pipe: switch wait...
1011
  static int wait_for_partner(struct pipe_inode_info *pipe, unsigned int *cnt)
f776c7388   Al Viro   fold fifo.c into ...
1012
  {
472e5b056   Linus Torvalds   pipe: remove pipe...
1013
  	DEFINE_WAIT(rdwait);
8cefc107c   David Howells   pipe: Use head an...
1014
  	int cur = *cnt;
f776c7388   Al Viro   fold fifo.c into ...
1015
1016
  
  	while (cur == *cnt) {
472e5b056   Linus Torvalds   pipe: remove pipe...
1017
1018
1019
1020
1021
  		prepare_to_wait(&pipe->rd_wait, &rdwait, TASK_INTERRUPTIBLE);
  		pipe_unlock(pipe);
  		schedule();
  		finish_wait(&pipe->rd_wait, &rdwait);
  		pipe_lock(pipe);
f776c7388   Al Viro   fold fifo.c into ...
1022
1023
1024
1025
1026
  		if (signal_pending(current))
  			break;
  	}
  	return cur == *cnt ? -ERESTARTSYS : 0;
  }
fc7478a2b   Al Viro   pipe: switch wait...
1027
  static void wake_up_partner(struct pipe_inode_info *pipe)
f776c7388   Al Viro   fold fifo.c into ...
1028
  {
6551d5c56   Linus Torvalds   pipe: make sure t...
1029
  	wake_up_interruptible_all(&pipe->rd_wait);
f776c7388   Al Viro   fold fifo.c into ...
1030
1031
1032
1033
1034
  }
  
  static int fifo_open(struct inode *inode, struct file *filp)
  {
  	struct pipe_inode_info *pipe;
599a0ac14   Al Viro   pipe: fold file_o...
1035
  	bool is_pipe = inode->i_sb->s_magic == PIPEFS_MAGIC;
f776c7388   Al Viro   fold fifo.c into ...
1036
  	int ret;
ba5bb1473   Al Viro   pipe: take alloca...
1037
1038
1039
1040
1041
1042
1043
1044
1045
  	filp->f_version = 0;
  
  	spin_lock(&inode->i_lock);
  	if (inode->i_pipe) {
  		pipe = inode->i_pipe;
  		pipe->files++;
  		spin_unlock(&inode->i_lock);
  	} else {
  		spin_unlock(&inode->i_lock);
7bee130e2   Al Viro   get rid of alloc_...
1046
  		pipe = alloc_pipe_info();
f776c7388   Al Viro   fold fifo.c into ...
1047
  		if (!pipe)
ba5bb1473   Al Viro   pipe: take alloca...
1048
1049
1050
1051
1052
1053
  			return -ENOMEM;
  		pipe->files = 1;
  		spin_lock(&inode->i_lock);
  		if (unlikely(inode->i_pipe)) {
  			inode->i_pipe->files++;
  			spin_unlock(&inode->i_lock);
4b8a8f1e4   Al Viro   get rid of the la...
1054
  			free_pipe_info(pipe);
ba5bb1473   Al Viro   pipe: take alloca...
1055
1056
1057
1058
1059
  			pipe = inode->i_pipe;
  		} else {
  			inode->i_pipe = pipe;
  			spin_unlock(&inode->i_lock);
  		}
f776c7388   Al Viro   fold fifo.c into ...
1060
  	}
de32ec4cf   Al Viro   pipe: set file->p...
1061
  	filp->private_data = pipe;
ba5bb1473   Al Viro   pipe: take alloca...
1062
  	/* OK, we have a pipe and it's pinned down */
ebec73f47   Al Viro   introduce variant...
1063
  	__pipe_lock(pipe);
f776c7388   Al Viro   fold fifo.c into ...
1064
1065
  
  	/* We can only do regular read/write on fifos */
d8e464ecc   Linus Torvalds   vfs: mark pipes a...
1066
  	stream_open(inode, filp);
f776c7388   Al Viro   fold fifo.c into ...
1067

d8e464ecc   Linus Torvalds   vfs: mark pipes a...
1068
  	switch (filp->f_mode & (FMODE_READ | FMODE_WRITE)) {
f776c7388   Al Viro   fold fifo.c into ...
1069
1070
1071
1072
1073
1074
  	case FMODE_READ:
  	/*
  	 *  O_RDONLY
  	 *  POSIX.1 says that O_NONBLOCK means return with the FIFO
  	 *  opened, even when there is no process writing the FIFO.
  	 */
f776c7388   Al Viro   fold fifo.c into ...
1075
1076
  		pipe->r_counter++;
  		if (pipe->readers++ == 0)
fc7478a2b   Al Viro   pipe: switch wait...
1077
  			wake_up_partner(pipe);
f776c7388   Al Viro   fold fifo.c into ...
1078

599a0ac14   Al Viro   pipe: fold file_o...
1079
  		if (!is_pipe && !pipe->writers) {
f776c7388   Al Viro   fold fifo.c into ...
1080
  			if ((filp->f_flags & O_NONBLOCK)) {
a9a08845e   Linus Torvalds   vfs: do bulk POLL...
1081
  				/* suppress EPOLLHUP until we have
f776c7388   Al Viro   fold fifo.c into ...
1082
1083
1084
  				 * seen a writer */
  				filp->f_version = pipe->w_counter;
  			} else {
fc7478a2b   Al Viro   pipe: switch wait...
1085
  				if (wait_for_partner(pipe, &pipe->w_counter))
f776c7388   Al Viro   fold fifo.c into ...
1086
1087
1088
1089
  					goto err_rd;
  			}
  		}
  		break;
8cefc107c   David Howells   pipe: Use head an...
1090

f776c7388   Al Viro   fold fifo.c into ...
1091
1092
1093
1094
1095
1096
1097
  	case FMODE_WRITE:
  	/*
  	 *  O_WRONLY
  	 *  POSIX.1 says that O_NONBLOCK means return -1 with
  	 *  errno=ENXIO when there is no process reading the FIFO.
  	 */
  		ret = -ENXIO;
599a0ac14   Al Viro   pipe: fold file_o...
1098
  		if (!is_pipe && (filp->f_flags & O_NONBLOCK) && !pipe->readers)
f776c7388   Al Viro   fold fifo.c into ...
1099
  			goto err;
f776c7388   Al Viro   fold fifo.c into ...
1100
1101
  		pipe->w_counter++;
  		if (!pipe->writers++)
fc7478a2b   Al Viro   pipe: switch wait...
1102
  			wake_up_partner(pipe);
f776c7388   Al Viro   fold fifo.c into ...
1103

599a0ac14   Al Viro   pipe: fold file_o...
1104
  		if (!is_pipe && !pipe->readers) {
fc7478a2b   Al Viro   pipe: switch wait...
1105
  			if (wait_for_partner(pipe, &pipe->r_counter))
f776c7388   Al Viro   fold fifo.c into ...
1106
1107
1108
  				goto err_wr;
  		}
  		break;
8cefc107c   David Howells   pipe: Use head an...
1109

f776c7388   Al Viro   fold fifo.c into ...
1110
1111
1112
1113
1114
1115
1116
  	case FMODE_READ | FMODE_WRITE:
  	/*
  	 *  O_RDWR
  	 *  POSIX.1 leaves this case "undefined" when O_NONBLOCK is set.
  	 *  This implementation will NEVER block on a O_RDWR open, since
  	 *  the process can at least talk to itself.
  	 */
f776c7388   Al Viro   fold fifo.c into ...
1117
1118
1119
1120
1121
1122
  
  		pipe->readers++;
  		pipe->writers++;
  		pipe->r_counter++;
  		pipe->w_counter++;
  		if (pipe->readers == 1 || pipe->writers == 1)
fc7478a2b   Al Viro   pipe: switch wait...
1123
  			wake_up_partner(pipe);
f776c7388   Al Viro   fold fifo.c into ...
1124
1125
1126
1127
1128
1129
1130
1131
  		break;
  
  	default:
  		ret = -EINVAL;
  		goto err;
  	}
  
  	/* Ok! */
ebec73f47   Al Viro   introduce variant...
1132
  	__pipe_unlock(pipe);
f776c7388   Al Viro   fold fifo.c into ...
1133
1134
1135
1136
  	return 0;
  
  err_rd:
  	if (!--pipe->readers)
0ddad21d3   Linus Torvalds   pipe: use exclusi...
1137
  		wake_up_interruptible(&pipe->wr_wait);
f776c7388   Al Viro   fold fifo.c into ...
1138
1139
1140
1141
1142
  	ret = -ERESTARTSYS;
  	goto err;
  
  err_wr:
  	if (!--pipe->writers)
6551d5c56   Linus Torvalds   pipe: make sure t...
1143
  		wake_up_interruptible_all(&pipe->rd_wait);
f776c7388   Al Viro   fold fifo.c into ...
1144
1145
1146
1147
  	ret = -ERESTARTSYS;
  	goto err;
  
  err:
ebec73f47   Al Viro   introduce variant...
1148
  	__pipe_unlock(pipe);
b0d8d2292   Linus Torvalds   vfs: fix subtle u...
1149
1150
  
  	put_pipe_info(inode, pipe);
f776c7388   Al Viro   fold fifo.c into ...
1151
1152
  	return ret;
  }
599a0ac14   Al Viro   pipe: fold file_o...
1153
1154
1155
  const struct file_operations pipefifo_fops = {
  	.open		= fifo_open,
  	.llseek		= no_llseek,
fb9096a34   Al Viro   pipe: switch to -...
1156
  	.read_iter	= pipe_read,
f0d1bec9d   Al Viro   new helper: copy_...
1157
  	.write_iter	= pipe_write,
a11e1d432   Linus Torvalds   Revert changes to...
1158
  	.poll		= pipe_poll,
599a0ac14   Al Viro   pipe: fold file_o...
1159
1160
1161
  	.unlocked_ioctl	= pipe_ioctl,
  	.release	= pipe_release,
  	.fasync		= pipe_fasync,
f8ad8187c   Johannes Berg   fs/pipe: allow se...
1162
  	.splice_write	= iter_file_splice_write,
f776c7388   Al Viro   fold fifo.c into ...
1163
  };
d35c7b0e5   Ulrich Drepper   unified (weak) sy...
1164
  /*
f491bd711   Michael Kerrisk (man-pages)   pipe: relocate ro...
1165
   * Currently we rely on the pipe array holding a power-of-2 number
d3f14c485   Joe Lawrence   pipe: avoid round...
1166
   * of pages. Returns 0 on error.
f491bd711   Michael Kerrisk (man-pages)   pipe: relocate ro...
1167
   */
96e99be40   Eric Biggers   pipe: reject F_SE...
1168
  unsigned int round_pipe_size(unsigned long size)
f491bd711   Michael Kerrisk (man-pages)   pipe: relocate ro...
1169
  {
c4fed5a91   Eric Biggers   pipe: simplify ro...
1170
  	if (size > (1U << 31))
96e99be40   Eric Biggers   pipe: reject F_SE...
1171
  		return 0;
4c2e4befb   Eric Biggers   pipe, sysctl: dro...
1172
1173
  	/* Minimum pipe size, as required by POSIX */
  	if (size < PAGE_SIZE)
c4fed5a91   Eric Biggers   pipe: simplify ro...
1174
  		return PAGE_SIZE;
d3f14c485   Joe Lawrence   pipe: avoid round...
1175

c4fed5a91   Eric Biggers   pipe: simplify ro...
1176
  	return roundup_pow_of_two(size);
f491bd711   Michael Kerrisk (man-pages)   pipe: relocate ro...
1177
1178
1179
  }
  
  /*
c73be61ce   David Howells   pipe: Add general...
1180
   * Resize the pipe ring to a number of slots.
cf2fbc56c   David Howells   pipe: Fix missing...
1181
1182
1183
1184
   *
   * Note the pipe can be reduced in capacity, but only if the current
   * occupancy doesn't exceed nr_slots; if it does, EBUSY will be
   * returned instead.
35f3d14db   Jens Axboe   pipe: add support...
1185
   */
c73be61ce   David Howells   pipe: Add general...
1186
  int pipe_resize_ring(struct pipe_inode_info *pipe, unsigned int nr_slots)
35f3d14db   Jens Axboe   pipe: add support...
1187
1188
  {
  	struct pipe_buffer *bufs;
c73be61ce   David Howells   pipe: Add general...
1189
  	unsigned int head, tail, mask, n;
35f3d14db   Jens Axboe   pipe: add support...
1190

8cefc107c   David Howells   pipe: Use head an...
1191
  	bufs = kcalloc(nr_slots, sizeof(*bufs),
d86133bd3   Vladimir Davydov   pipe: account to ...
1192
  		       GFP_KERNEL_ACCOUNT | __GFP_NOWARN);
c73be61ce   David Howells   pipe: Add general...
1193
1194
  	if (unlikely(!bufs))
  		return -ENOMEM;
35f3d14db   Jens Axboe   pipe: add support...
1195

cf2fbc56c   David Howells   pipe: Fix missing...
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
  	spin_lock_irq(&pipe->rd_wait.lock);
  	mask = pipe->ring_size - 1;
  	head = pipe->head;
  	tail = pipe->tail;
  
  	n = pipe_occupancy(head, tail);
  	if (nr_slots < n) {
  		spin_unlock_irq(&pipe->rd_wait.lock);
  		kfree(bufs);
  		return -EBUSY;
  	}
35f3d14db   Jens Axboe   pipe: add support...
1207
1208
  	/*
  	 * The pipe array wraps around, so just start the new one at zero
8cefc107c   David Howells   pipe: Use head an...
1209
  	 * and adjust the indices.
35f3d14db   Jens Axboe   pipe: add support...
1210
  	 */
8cefc107c   David Howells   pipe: Use head an...
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
  	if (n > 0) {
  		unsigned int h = head & mask;
  		unsigned int t = tail & mask;
  		if (h > t) {
  			memcpy(bufs, pipe->bufs + t,
  			       n * sizeof(struct pipe_buffer));
  		} else {
  			unsigned int tsize = pipe->ring_size - t;
  			if (h > 0)
  				memcpy(bufs + tsize, pipe->bufs,
  				       h * sizeof(struct pipe_buffer));
  			memcpy(bufs, pipe->bufs + t,
  			       tsize * sizeof(struct pipe_buffer));
  		}
35f3d14db   Jens Axboe   pipe: add support...
1225
  	}
8cefc107c   David Howells   pipe: Use head an...
1226
1227
  	head = n;
  	tail = 0;
35f3d14db   Jens Axboe   pipe: add support...
1228
1229
  	kfree(pipe->bufs);
  	pipe->bufs = bufs;
8cefc107c   David Howells   pipe: Use head an...
1230
  	pipe->ring_size = nr_slots;
c73be61ce   David Howells   pipe: Add general...
1231
1232
  	if (pipe->max_usage > nr_slots)
  		pipe->max_usage = nr_slots;
8cefc107c   David Howells   pipe: Use head an...
1233
1234
  	pipe->tail = tail;
  	pipe->head = head;
6551d5c56   Linus Torvalds   pipe: make sure t...
1235

cf2fbc56c   David Howells   pipe: Fix missing...
1236
  	spin_unlock_irq(&pipe->rd_wait.lock);
6551d5c56   Linus Torvalds   pipe: make sure t...
1237
1238
  	/* This might have made more room for writers */
  	wake_up_interruptible(&pipe->wr_wait);
c73be61ce   David Howells   pipe: Add general...
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
  	return 0;
  }
  
  /*
   * Allocate a new array of pipe buffers and copy the info over. Returns the
   * pipe size if successful, or return -ERROR on error.
   */
  static long pipe_set_size(struct pipe_inode_info *pipe, unsigned long arg)
  {
  	unsigned long user_bufs;
  	unsigned int nr_slots, size;
  	long ret = 0;
  
  #ifdef CONFIG_WATCH_QUEUE
  	if (pipe->watch_queue)
  		return -EBUSY;
  #endif
  
  	size = round_pipe_size(arg);
  	nr_slots = size >> PAGE_SHIFT;
  
  	if (!nr_slots)
  		return -EINVAL;
  
  	/*
  	 * If trying to increase the pipe capacity, check that an
  	 * unprivileged user is not trying to exceed various limits
  	 * (soft limit check here, hard limit check just below).
  	 * Decreasing the pipe capacity is always permitted, even
  	 * if the user is currently over a limit.
  	 */
  	if (nr_slots > pipe->max_usage &&
  			size > pipe_max_size && !capable(CAP_SYS_RESOURCE))
  		return -EPERM;
  
  	user_bufs = account_pipe_buffers(pipe->user, pipe->nr_accounted, nr_slots);
  
  	if (nr_slots > pipe->max_usage &&
  			(too_many_pipe_buffers_hard(user_bufs) ||
  			 too_many_pipe_buffers_soft(user_bufs)) &&
  			pipe_is_unprivileged_user()) {
  		ret = -EPERM;
  		goto out_revert_acct;
  	}
  
  	ret = pipe_resize_ring(pipe, nr_slots);
  	if (ret < 0)
  		goto out_revert_acct;
  
  	pipe->max_usage = nr_slots;
  	pipe->nr_accounted = nr_slots;
6718b6f85   David Howells   pipe: Allow pipes...
1290
  	return pipe->max_usage * PAGE_SIZE;
b0b91d18e   Michael Kerrisk (man-pages)   pipe: fix limit c...
1291
1292
  
  out_revert_acct:
c73be61ce   David Howells   pipe: Add general...
1293
  	(void) account_pipe_buffers(pipe->user, nr_slots, pipe->nr_accounted);
b0b91d18e   Michael Kerrisk (man-pages)   pipe: fix limit c...
1294
  	return ret;
35f3d14db   Jens Axboe   pipe: add support...
1295
  }
ff9da691c   Jens Axboe   pipe: change /pro...
1296
  /*
4e7b5671c   Christoph Hellwig   block: remove i_bdev
1297
1298
   * Note that i_pipe and i_cdev share the same location, so checking ->i_pipe is
   * not enough to verify that this is a pipe.
720836465   Linus Torvalds   Un-inline get_pip...
1299
   */
c73be61ce   David Howells   pipe: Add general...
1300
  struct pipe_inode_info *get_pipe_info(struct file *file, bool for_splice)
720836465   Linus Torvalds   Un-inline get_pip...
1301
  {
c73be61ce   David Howells   pipe: Add general...
1302
1303
1304
1305
1306
1307
1308
1309
1310
  	struct pipe_inode_info *pipe = file->private_data;
  
  	if (file->f_op != &pipefifo_fops || !pipe)
  		return NULL;
  #ifdef CONFIG_WATCH_QUEUE
  	if (for_splice && pipe->watch_queue)
  		return NULL;
  #endif
  	return pipe;
720836465   Linus Torvalds   Un-inline get_pip...
1311
  }
35f3d14db   Jens Axboe   pipe: add support...
1312
1313
1314
1315
  long pipe_fcntl(struct file *file, unsigned int cmd, unsigned long arg)
  {
  	struct pipe_inode_info *pipe;
  	long ret;
c73be61ce   David Howells   pipe: Add general...
1316
  	pipe = get_pipe_info(file, false);
35f3d14db   Jens Axboe   pipe: add support...
1317
1318
  	if (!pipe)
  		return -EBADF;
ebec73f47   Al Viro   introduce variant...
1319
  	__pipe_lock(pipe);
35f3d14db   Jens Axboe   pipe: add support...
1320
1321
  
  	switch (cmd) {
d37d41666   Michael Kerrisk (man-pages)   pipe: move limit ...
1322
1323
  	case F_SETPIPE_SZ:
  		ret = pipe_set_size(pipe, arg);
35f3d14db   Jens Axboe   pipe: add support...
1324
1325
  		break;
  	case F_GETPIPE_SZ:
6718b6f85   David Howells   pipe: Allow pipes...
1326
  		ret = pipe->max_usage * PAGE_SIZE;
35f3d14db   Jens Axboe   pipe: add support...
1327
1328
1329
1330
1331
  		break;
  	default:
  		ret = -EINVAL;
  		break;
  	}
ebec73f47   Al Viro   introduce variant...
1332
  	__pipe_unlock(pipe);
35f3d14db   Jens Axboe   pipe: add support...
1333
1334
  	return ret;
  }
ff0c7d15f   Nick Piggin   fs: avoid inode R...
1335
1336
  static const struct super_operations pipefs_ops = {
  	.destroy_inode = free_inode_nonrcu,
d70ef97ba   Pavel Emelyanov   fs/pipe.c: add ->...
1337
  	.statfs = simple_statfs,
ff0c7d15f   Nick Piggin   fs: avoid inode R...
1338
  };
35f3d14db   Jens Axboe   pipe: add support...
1339
  /*
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1340
1341
1342
1343
1344
   * pipefs should _never_ be mounted by userland - too much of security hassle,
   * no real gain from having the whole whorehouse mounted. So we don't need
   * any operations on the root directory. However, we need a non-trivial
   * d_name - pipe: will go nicely and kill the special-casing in procfs.
   */
4fa7ec5db   David Howells   vfs: Convert pipe...
1345
1346
  
  static int pipefs_init_fs_context(struct fs_context *fc)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1347
  {
4fa7ec5db   David Howells   vfs: Convert pipe...
1348
1349
1350
1351
1352
1353
  	struct pseudo_fs_context *ctx = init_pseudo(fc, PIPEFS_MAGIC);
  	if (!ctx)
  		return -ENOMEM;
  	ctx->ops = &pipefs_ops;
  	ctx->dops = &pipefs_dentry_operations;
  	return 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1354
1355
1356
1357
  }
  
  static struct file_system_type pipe_fs_type = {
  	.name		= "pipefs",
4fa7ec5db   David Howells   vfs: Convert pipe...
1358
  	.init_fs_context = pipefs_init_fs_context,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1359
1360
1361
1362
1363
1364
  	.kill_sb	= kill_anon_super,
  };
  
  static int __init init_pipe_fs(void)
  {
  	int err = register_filesystem(&pipe_fs_type);
341b446bc   Ingo Molnar   [PATCH] another r...
1365

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1366
1367
1368
1369
1370
1371
1372
1373
1374
  	if (!err) {
  		pipe_mnt = kern_mount(&pipe_fs_type);
  		if (IS_ERR(pipe_mnt)) {
  			err = PTR_ERR(pipe_mnt);
  			unregister_filesystem(&pipe_fs_type);
  		}
  	}
  	return err;
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1375
  fs_initcall(init_pipe_fs);