Blame view

fs/pipe.c 33.8 KB
b24413180   Greg Kroah-Hartman   License cleanup: ...
1
  // SPDX-License-Identifier: GPL-2.0
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2
3
4
5
6
7
8
9
10
11
12
13
14
  /*
   *  linux/fs/pipe.c
   *
   *  Copyright (C) 1991, 1992, 1999  Linus Torvalds
   */
  
  #include <linux/mm.h>
  #include <linux/file.h>
  #include <linux/poll.h>
  #include <linux/slab.h>
  #include <linux/module.h>
  #include <linux/init.h>
  #include <linux/fs.h>
35f3d14db   Jens Axboe   pipe: add support...
15
  #include <linux/log2.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
16
  #include <linux/mount.h>
4fa7ec5db   David Howells   vfs: Convert pipe...
17
  #include <linux/pseudo_fs.h>
b502bd115   Muthu Kumar   magic.h: move som...
18
  #include <linux/magic.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
19
20
21
  #include <linux/pipe_fs_i.h>
  #include <linux/uio.h>
  #include <linux/highmem.h>
5274f052e   Jens Axboe   [PATCH] Introduce...
22
  #include <linux/pagemap.h>
db3495099   Al Viro   [PATCH] AUDIT_FD_...
23
  #include <linux/audit.h>
ba719baea   Ulrich Drepper   sys_pipe(): fix f...
24
  #include <linux/syscalls.h>
b492e95be   Jens Axboe   pipe: set lower a...
25
  #include <linux/fcntl.h>
d86133bd3   Vladimir Davydov   pipe: account to ...
26
  #include <linux/memcontrol.h>
c73be61ce   David Howells   pipe: Add general...
27
  #include <linux/watch_queue.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
28

7c0f6ba68   Linus Torvalds   Replace <asm/uacc...
29
  #include <linux/uaccess.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
30
  #include <asm/ioctls.h>
599a0ac14   Al Viro   pipe: fold file_o...
31
  #include "internal.h"
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
32
  /*
b492e95be   Jens Axboe   pipe: set lower a...
33
   * The max size that a non-root user is allowed to grow the pipe. Can
ff9da691c   Jens Axboe   pipe: change /pro...
34
   * be set by root in /proc/sys/fs/pipe-max-size
b492e95be   Jens Axboe   pipe: set lower a...
35
   */
ff9da691c   Jens Axboe   pipe: change /pro...
36
  unsigned int pipe_max_size = 1048576;
759c01142   Willy Tarreau   pipe: limit the p...
37
38
39
40
41
  /* Maximum allocatable pages per user. Hard limit is unset by default, soft
   * matches default values.
   */
  unsigned long pipe_user_pages_hard;
  unsigned long pipe_user_pages_soft = PIPE_DEF_BUFFERS * INR_OPEN_CUR;
b492e95be   Jens Axboe   pipe: set lower a...
42
  /*
8cefc107c   David Howells   pipe: Use head an...
43
44
45
46
47
48
   * We use head and tail indices that aren't masked off, except at the point of
   * dereference, but rather they're allowed to wrap naturally.  This means there
   * isn't a dead spot in the buffer, but the ring has to be a power of two and
   * <= 2^31.
   * -- David Howells 2019-09-23.
   *
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
49
50
51
52
53
54
55
56
57
   * Reads with count = 0 should always return 0.
   * -- Julian Bradfield 1999-06-07.
   *
   * FIFOs and Pipes now generate SIGIO for both readers and writers.
   * -- Jeremy Elson <jelson@circlemud.org> 2001-08-16
   *
   * pipe_read & write cleanup
   * -- Manfred Spraul <manfred@colorfullife.com> 2002-05-09
   */
61e0d47c3   Miklos Szeredi   splice: add helpe...
58
59
  static void pipe_lock_nested(struct pipe_inode_info *pipe, int subclass)
  {
6447a3cf1   Al Viro   get rid of pipe->...
60
  	if (pipe->files)
72b0d9aac   Al Viro   pipe: don't use -...
61
  		mutex_lock_nested(&pipe->mutex, subclass);
61e0d47c3   Miklos Szeredi   splice: add helpe...
62
63
64
65
66
67
68
69
70
71
72
73
74
  }
  
  void pipe_lock(struct pipe_inode_info *pipe)
  {
  	/*
  	 * pipe_lock() nests non-pipe inode locks (for writing to a file)
  	 */
  	pipe_lock_nested(pipe, I_MUTEX_PARENT);
  }
  EXPORT_SYMBOL(pipe_lock);
  
  void pipe_unlock(struct pipe_inode_info *pipe)
  {
6447a3cf1   Al Viro   get rid of pipe->...
75
  	if (pipe->files)
72b0d9aac   Al Viro   pipe: don't use -...
76
  		mutex_unlock(&pipe->mutex);
61e0d47c3   Miklos Szeredi   splice: add helpe...
77
78
  }
  EXPORT_SYMBOL(pipe_unlock);
ebec73f47   Al Viro   introduce variant...
79
80
81
82
83
84
85
86
87
  static inline void __pipe_lock(struct pipe_inode_info *pipe)
  {
  	mutex_lock_nested(&pipe->mutex, I_MUTEX_PARENT);
  }
  
  static inline void __pipe_unlock(struct pipe_inode_info *pipe)
  {
  	mutex_unlock(&pipe->mutex);
  }
61e0d47c3   Miklos Szeredi   splice: add helpe...
88
89
90
91
92
93
94
95
96
  void pipe_double_lock(struct pipe_inode_info *pipe1,
  		      struct pipe_inode_info *pipe2)
  {
  	BUG_ON(pipe1 == pipe2);
  
  	if (pipe1 < pipe2) {
  		pipe_lock_nested(pipe1, I_MUTEX_PARENT);
  		pipe_lock_nested(pipe2, I_MUTEX_CHILD);
  	} else {
023d43c7b   Peter Zijlstra   lockdep: Fix lock...
97
98
  		pipe_lock_nested(pipe2, I_MUTEX_PARENT);
  		pipe_lock_nested(pipe1, I_MUTEX_CHILD);
61e0d47c3   Miklos Szeredi   splice: add helpe...
99
100
  	}
  }
341b446bc   Ingo Molnar   [PATCH] another r...
101
102
  static void anon_pipe_buf_release(struct pipe_inode_info *pipe,
  				  struct pipe_buffer *buf)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
103
104
  {
  	struct page *page = buf->page;
5274f052e   Jens Axboe   [PATCH] Introduce...
105
106
107
  	/*
  	 * If nobody else uses this page, and we don't already have a
  	 * temporary page, let's keep track of it as a one-deep
341b446bc   Ingo Molnar   [PATCH] another r...
108
  	 * allocation cache. (Otherwise just release our reference to it)
5274f052e   Jens Axboe   [PATCH] Introduce...
109
  	 */
341b446bc   Ingo Molnar   [PATCH] another r...
110
  	if (page_count(page) == 1 && !pipe->tmp_page)
923f4f239   Ingo Molnar   [PATCH] pipe.c/fi...
111
  		pipe->tmp_page = page;
341b446bc   Ingo Molnar   [PATCH] another r...
112
  	else
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
113
  		put_page(page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
114
  }
c928f642c   Christoph Hellwig   fs: rename pipe_b...
115
116
  static bool anon_pipe_buf_try_steal(struct pipe_inode_info *pipe,
  		struct pipe_buffer *buf)
d86133bd3   Vladimir Davydov   pipe: account to ...
117
118
  {
  	struct page *page = buf->page;
c928f642c   Christoph Hellwig   fs: rename pipe_b...
119
120
121
122
123
  	if (page_count(page) != 1)
  		return false;
  	memcg_kmem_uncharge_page(page, 0);
  	__SetPageLocked(page);
  	return true;
d86133bd3   Vladimir Davydov   pipe: account to ...
124
  }
0845718da   Jens Axboe   pipe: add documen...
125
  /**
c928f642c   Christoph Hellwig   fs: rename pipe_b...
126
   * generic_pipe_buf_try_steal - attempt to take ownership of a &pipe_buffer
0845718da   Jens Axboe   pipe: add documen...
127
128
129
130
   * @pipe:	the pipe that the buffer belongs to
   * @buf:	the buffer to attempt to steal
   *
   * Description:
b51d63c6d   Randy Dunlap   kernel-doc: fix f...
131
   *	This function attempts to steal the &struct page attached to
0845718da   Jens Axboe   pipe: add documen...
132
133
   *	@buf. If successful, this function returns 0 and returns with
   *	the page locked. The caller may then reuse the page for whatever
b51d63c6d   Randy Dunlap   kernel-doc: fix f...
134
   *	he wishes; the typical use is insertion into a different file
0845718da   Jens Axboe   pipe: add documen...
135
136
   *	page cache.
   */
c928f642c   Christoph Hellwig   fs: rename pipe_b...
137
138
  bool generic_pipe_buf_try_steal(struct pipe_inode_info *pipe,
  		struct pipe_buffer *buf)
5abc97aa2   Jens Axboe   [PATCH] splice: a...
139
  {
46e678c96   Jens Axboe   [PATCH] splice: f...
140
  	struct page *page = buf->page;
0845718da   Jens Axboe   pipe: add documen...
141
142
143
144
145
  	/*
  	 * A reference of one is golden, that means that the owner of this
  	 * page is the only one holding a reference to it. lock the page
  	 * and return OK.
  	 */
46e678c96   Jens Axboe   [PATCH] splice: f...
146
  	if (page_count(page) == 1) {
46e678c96   Jens Axboe   [PATCH] splice: f...
147
  		lock_page(page);
c928f642c   Christoph Hellwig   fs: rename pipe_b...
148
  		return true;
46e678c96   Jens Axboe   [PATCH] splice: f...
149
  	}
c928f642c   Christoph Hellwig   fs: rename pipe_b...
150
  	return false;
5abc97aa2   Jens Axboe   [PATCH] splice: a...
151
  }
c928f642c   Christoph Hellwig   fs: rename pipe_b...
152
  EXPORT_SYMBOL(generic_pipe_buf_try_steal);
5abc97aa2   Jens Axboe   [PATCH] splice: a...
153

0845718da   Jens Axboe   pipe: add documen...
154
  /**
b51d63c6d   Randy Dunlap   kernel-doc: fix f...
155
   * generic_pipe_buf_get - get a reference to a &struct pipe_buffer
0845718da   Jens Axboe   pipe: add documen...
156
157
158
159
160
161
162
163
   * @pipe:	the pipe that the buffer belongs to
   * @buf:	the buffer to get a reference to
   *
   * Description:
   *	This function grabs an extra reference to @buf. It's used in
   *	in the tee() system call, when we duplicate the buffers in one
   *	pipe into another.
   */
15fab63e1   Matthew Wilcox   fs: prevent page ...
164
  bool generic_pipe_buf_get(struct pipe_inode_info *pipe, struct pipe_buffer *buf)
70524490e   Jens Axboe   [PATCH] splice: a...
165
  {
15fab63e1   Matthew Wilcox   fs: prevent page ...
166
  	return try_get_page(buf->page);
70524490e   Jens Axboe   [PATCH] splice: a...
167
  }
51921cb74   Miklos Szeredi   mm: export generi...
168
  EXPORT_SYMBOL(generic_pipe_buf_get);
70524490e   Jens Axboe   [PATCH] splice: a...
169

0845718da   Jens Axboe   pipe: add documen...
170
  /**
6818173bd   Miklos Szeredi   splice: implement...
171
172
173
174
175
176
177
178
179
180
   * generic_pipe_buf_release - put a reference to a &struct pipe_buffer
   * @pipe:	the pipe that the buffer belongs to
   * @buf:	the buffer to put a reference to
   *
   * Description:
   *	This function releases a reference to @buf.
   */
  void generic_pipe_buf_release(struct pipe_inode_info *pipe,
  			      struct pipe_buffer *buf)
  {
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
181
  	put_page(buf->page);
6818173bd   Miklos Szeredi   splice: implement...
182
  }
51921cb74   Miklos Szeredi   mm: export generi...
183
  EXPORT_SYMBOL(generic_pipe_buf_release);
6818173bd   Miklos Szeredi   splice: implement...
184

d4c3cca94   Eric Dumazet   [PATCH] constify ...
185
  static const struct pipe_buf_operations anon_pipe_buf_ops = {
c928f642c   Christoph Hellwig   fs: rename pipe_b...
186
187
188
  	.release	= anon_pipe_buf_release,
  	.try_steal	= anon_pipe_buf_try_steal,
  	.get		= generic_pipe_buf_get,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
189
  };
85190d15f   Linus Torvalds   pipe: don't use '...
190
191
192
193
194
195
196
197
198
  /* Done while waiting without holding the pipe lock - thus the READ_ONCE() */
  static inline bool pipe_readable(const struct pipe_inode_info *pipe)
  {
  	unsigned int head = READ_ONCE(pipe->head);
  	unsigned int tail = READ_ONCE(pipe->tail);
  	unsigned int writers = READ_ONCE(pipe->writers);
  
  	return !pipe_empty(head, tail) || !writers;
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
199
  static ssize_t
fb9096a34   Al Viro   pipe: switch to -...
200
  pipe_read(struct kiocb *iocb, struct iov_iter *to)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
201
  {
fb9096a34   Al Viro   pipe: switch to -...
202
  	size_t total_len = iov_iter_count(to);
ee0b3e671   Badari Pulavarty   [PATCH] Remove re...
203
  	struct file *filp = iocb->ki_filp;
de32ec4cf   Al Viro   pipe: set file->p...
204
  	struct pipe_inode_info *pipe = filp->private_data;
0ddad21d3   Linus Torvalds   pipe: use exclusi...
205
  	bool was_full, wake_next_reader = false;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
206
  	ssize_t ret;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
207

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
208
209
210
  	/* Null read succeeds. */
  	if (unlikely(total_len == 0))
  		return 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
211
  	ret = 0;
ebec73f47   Al Viro   introduce variant...
212
  	__pipe_lock(pipe);
f467a6a66   Linus Torvalds   pipe: fix and cla...
213
214
215
216
217
218
219
220
221
222
  
  	/*
  	 * We only wake up writers if the pipe was full when we started
  	 * reading in order to avoid unnecessary wakeups.
  	 *
  	 * But when we do wake up writers, we do so using a sync wakeup
  	 * (WF_SYNC), because we want them to get going and generate more
  	 * data for us.
  	 */
  	was_full = pipe_full(pipe->head, pipe->tail, pipe->max_usage);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
223
  	for (;;) {
8cefc107c   David Howells   pipe: Use head an...
224
225
226
  		unsigned int head = pipe->head;
  		unsigned int tail = pipe->tail;
  		unsigned int mask = pipe->ring_size - 1;
e7d553d69   David Howells   pipe: Add notific...
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
  #ifdef CONFIG_WATCH_QUEUE
  		if (pipe->note_loss) {
  			struct watch_notification n;
  
  			if (total_len < 8) {
  				if (ret == 0)
  					ret = -ENOBUFS;
  				break;
  			}
  
  			n.type = WATCH_TYPE_META;
  			n.subtype = WATCH_META_LOSS_NOTIFICATION;
  			n.info = watch_sizeof(n);
  			if (copy_to_iter(&n, sizeof(n), to) != sizeof(n)) {
  				if (ret == 0)
  					ret = -EFAULT;
  				break;
  			}
  			ret += sizeof(n);
  			total_len -= sizeof(n);
  			pipe->note_loss = false;
  		}
  #endif
8cefc107c   David Howells   pipe: Use head an...
250
251
  		if (!pipe_empty(head, tail)) {
  			struct pipe_buffer *buf = &pipe->bufs[tail & mask];
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
252
  			size_t chars = buf->len;
637b58c28   Al Viro   switch pipe_read(...
253
254
  			size_t written;
  			int error;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
255

8cfba7638   David Howells   pipe: Allow buffe...
256
257
258
259
260
261
  			if (chars > total_len) {
  				if (buf->flags & PIPE_BUF_FLAG_WHOLE) {
  					if (ret == 0)
  						ret = -ENOBUFS;
  					break;
  				}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
262
  				chars = total_len;
8cfba7638   David Howells   pipe: Allow buffe...
263
  			}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
264

fba597db4   Miklos Szeredi   pipe: add pipe_bu...
265
  			error = pipe_buf_confirm(pipe, buf);
f84d75199   Jens Axboe   [PATCH] pipe: int...
266
  			if (error) {
5274f052e   Jens Axboe   [PATCH] Introduce...
267
  				if (!ret)
e5953cbdf   Nicolas Kaiser   pipe: fix failure...
268
  					ret = error;
5274f052e   Jens Axboe   [PATCH] Introduce...
269
270
  				break;
  			}
f84d75199   Jens Axboe   [PATCH] pipe: int...
271

fb9096a34   Al Viro   pipe: switch to -...
272
  			written = copy_page_to_iter(buf->page, buf->offset, chars, to);
637b58c28   Al Viro   switch pipe_read(...
273
  			if (unlikely(written < chars)) {
341b446bc   Ingo Molnar   [PATCH] another r...
274
  				if (!ret)
637b58c28   Al Viro   switch pipe_read(...
275
  					ret = -EFAULT;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
276
277
278
279
280
  				break;
  			}
  			ret += chars;
  			buf->offset += chars;
  			buf->len -= chars;
9883035ae   Linus Torvalds   pipes: add a "pac...
281
282
283
284
285
286
  
  			/* Was it a packet buffer? Clean up and exit */
  			if (buf->flags & PIPE_BUF_FLAG_PACKET) {
  				total_len = chars;
  				buf->len = 0;
  			}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
287
  			if (!buf->len) {
a779638cf   Miklos Szeredi   pipe: add pipe_bu...
288
  				pipe_buf_release(pipe, buf);
0ddad21d3   Linus Torvalds   pipe: use exclusi...
289
  				spin_lock_irq(&pipe->rd_wait.lock);
e7d553d69   David Howells   pipe: Add notific...
290
291
292
293
  #ifdef CONFIG_WATCH_QUEUE
  				if (buf->flags & PIPE_BUF_FLAG_LOSS)
  					pipe->note_loss = true;
  #endif
8cefc107c   David Howells   pipe: Use head an...
294
295
  				tail++;
  				pipe->tail = tail;
0ddad21d3   Linus Torvalds   pipe: use exclusi...
296
  				spin_unlock_irq(&pipe->rd_wait.lock);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
297
298
299
300
  			}
  			total_len -= chars;
  			if (!total_len)
  				break;	/* common path: read succeeded */
8cefc107c   David Howells   pipe: Use head an...
301
302
  			if (!pipe_empty(head, tail))	/* More to do? */
  				continue;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
303
  		}
8cefc107c   David Howells   pipe: Use head an...
304

923f4f239   Ingo Molnar   [PATCH] pipe.c/fi...
305
  		if (!pipe->writers)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
306
  			break;
a28c8b9db   Linus Torvalds   pipe: remove 'wai...
307
308
309
310
311
  		if (ret)
  			break;
  		if (filp->f_flags & O_NONBLOCK) {
  			ret = -EAGAIN;
  			break;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
312
  		}
85190d15f   Linus Torvalds   pipe: don't use '...
313
  		__pipe_unlock(pipe);
d1c6a2aa0   Linus Torvalds   pipe: simplify si...
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
  
  		/*
  		 * We only get here if we didn't actually read anything.
  		 *
  		 * However, we could have seen (and removed) a zero-sized
  		 * pipe buffer, and might have made space in the buffers
  		 * that way.
  		 *
  		 * You can't make zero-sized pipe buffers by doing an empty
  		 * write (not even in packet mode), but they can happen if
  		 * the writer gets an EFAULT when trying to fill a buffer
  		 * that already got allocated and inserted in the buffer
  		 * array.
  		 *
  		 * So we still need to wake up any pending writers in the
  		 * _very_ unlikely case that the pipe was full, but we got
  		 * no data.
  		 */
  		if (unlikely(was_full)) {
0ddad21d3   Linus Torvalds   pipe: use exclusi...
333
  			wake_up_interruptible_sync_poll(&pipe->wr_wait, EPOLLOUT | EPOLLWRNORM);
f467a6a66   Linus Torvalds   pipe: fix and cla...
334
335
  			kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
  		}
d1c6a2aa0   Linus Torvalds   pipe: simplify si...
336
337
338
339
340
341
342
  
  		/*
  		 * But because we didn't read anything, at this point we can
  		 * just return directly with -ERESTARTSYS if we're interrupted,
  		 * since we've done any required wakeups and there's no need
  		 * to mark anything accessed. And we've dropped the lock.
  		 */
0ddad21d3   Linus Torvalds   pipe: use exclusi...
343
  		if (wait_event_interruptible_exclusive(pipe->rd_wait, pipe_readable(pipe)) < 0)
d1c6a2aa0   Linus Torvalds   pipe: simplify si...
344
  			return -ERESTARTSYS;
85190d15f   Linus Torvalds   pipe: don't use '...
345
  		__pipe_lock(pipe);
f467a6a66   Linus Torvalds   pipe: fix and cla...
346
  		was_full = pipe_full(pipe->head, pipe->tail, pipe->max_usage);
0ddad21d3   Linus Torvalds   pipe: use exclusi...
347
  		wake_next_reader = true;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
348
  	}
0ddad21d3   Linus Torvalds   pipe: use exclusi...
349
350
  	if (pipe_empty(pipe->head, pipe->tail))
  		wake_next_reader = false;
ebec73f47   Al Viro   introduce variant...
351
  	__pipe_unlock(pipe);
341b446bc   Ingo Molnar   [PATCH] another r...
352

f467a6a66   Linus Torvalds   pipe: fix and cla...
353
  	if (was_full) {
0ddad21d3   Linus Torvalds   pipe: use exclusi...
354
  		wake_up_interruptible_sync_poll(&pipe->wr_wait, EPOLLOUT | EPOLLWRNORM);
923f4f239   Ingo Molnar   [PATCH] pipe.c/fi...
355
  		kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
356
  	}
0ddad21d3   Linus Torvalds   pipe: use exclusi...
357
358
  	if (wake_next_reader)
  		wake_up_interruptible_sync_poll(&pipe->rd_wait, EPOLLIN | EPOLLRDNORM);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
359
360
361
362
  	if (ret > 0)
  		file_accessed(filp);
  	return ret;
  }
9883035ae   Linus Torvalds   pipes: add a "pac...
363
364
365
366
  static inline int is_packetized(struct file *file)
  {
  	return (file->f_flags & O_DIRECT) != 0;
  }
85190d15f   Linus Torvalds   pipe: don't use '...
367
368
369
370
371
372
373
374
375
376
  /* Done while waiting without holding the pipe lock - thus the READ_ONCE() */
  static inline bool pipe_writable(const struct pipe_inode_info *pipe)
  {
  	unsigned int head = READ_ONCE(pipe->head);
  	unsigned int tail = READ_ONCE(pipe->tail);
  	unsigned int max_usage = READ_ONCE(pipe->max_usage);
  
  	return !pipe_full(head, tail, max_usage) ||
  		!READ_ONCE(pipe->readers);
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
377
  static ssize_t
f0d1bec9d   Al Viro   new helper: copy_...
378
  pipe_write(struct kiocb *iocb, struct iov_iter *from)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
379
  {
ee0b3e671   Badari Pulavarty   [PATCH] Remove re...
380
  	struct file *filp = iocb->ki_filp;
de32ec4cf   Al Viro   pipe: set file->p...
381
  	struct pipe_inode_info *pipe = filp->private_data;
8f868d68d   David Howells   pipe: Fix missing...
382
  	unsigned int head;
f0d1bec9d   Al Viro   new helper: copy_...
383
  	ssize_t ret = 0;
f0d1bec9d   Al Viro   new helper: copy_...
384
  	size_t total_len = iov_iter_count(from);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
385
  	ssize_t chars;
1b6b26ae7   Linus Torvalds   pipe: fix and cla...
386
  	bool was_empty = false;
0ddad21d3   Linus Torvalds   pipe: use exclusi...
387
  	bool wake_next_writer = false;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
388

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
389
390
391
  	/* Null write succeeds. */
  	if (unlikely(total_len == 0))
  		return 0;
ebec73f47   Al Viro   introduce variant...
392
  	__pipe_lock(pipe);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
393

923f4f239   Ingo Molnar   [PATCH] pipe.c/fi...
394
  	if (!pipe->readers) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
395
396
397
398
  		send_sig(SIGPIPE, current, 0);
  		ret = -EPIPE;
  		goto out;
  	}
c73be61ce   David Howells   pipe: Add general...
399
400
401
402
403
404
  #ifdef CONFIG_WATCH_QUEUE
  	if (pipe->watch_queue) {
  		ret = -EXDEV;
  		goto out;
  	}
  #endif
1b6b26ae7   Linus Torvalds   pipe: fix and cla...
405
406
407
408
409
410
411
412
413
414
415
  	/*
  	 * Only wake up if the pipe started out empty, since
  	 * otherwise there should be no readers waiting.
  	 *
  	 * If it wasn't empty we try to merge new data into
  	 * the last buffer.
  	 *
  	 * That naturally merges small writes, but it also
  	 * page-aligs the rest of the writes for large writes
  	 * spanning multiple pages.
  	 */
8cefc107c   David Howells   pipe: Use head an...
416
  	head = pipe->head;
1b6b26ae7   Linus Torvalds   pipe: fix and cla...
417
418
419
  	was_empty = pipe_empty(head, pipe->tail);
  	chars = total_len & (PAGE_SIZE-1);
  	if (chars && !was_empty) {
8f868d68d   David Howells   pipe: Fix missing...
420
  		unsigned int mask = pipe->ring_size - 1;
8cefc107c   David Howells   pipe: Use head an...
421
  		struct pipe_buffer *buf = &pipe->bufs[(head - 1) & mask];
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
422
  		int offset = buf->offset + buf->len;
341b446bc   Ingo Molnar   [PATCH] another r...
423

f6dd97558   Christoph Hellwig   pipe: merge anon_...
424
425
  		if ((buf->flags & PIPE_BUF_FLAG_CAN_MERGE) &&
  		    offset + chars <= PAGE_SIZE) {
fba597db4   Miklos Szeredi   pipe: add pipe_bu...
426
  			ret = pipe_buf_confirm(pipe, buf);
6ae080699   Eric Biggers   fs/pipe.c: return...
427
  			if (ret)
5274f052e   Jens Axboe   [PATCH] Introduce...
428
  				goto out;
f84d75199   Jens Axboe   [PATCH] pipe: int...
429

f0d1bec9d   Al Viro   new helper: copy_...
430
431
  			ret = copy_page_from_iter(buf->page, offset, chars, from);
  			if (unlikely(ret < chars)) {
6ae080699   Eric Biggers   fs/pipe.c: return...
432
  				ret = -EFAULT;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
433
  				goto out;
f6762b7ad   Jens Axboe   [PATCH] pipe: ena...
434
  			}
1b6b26ae7   Linus Torvalds   pipe: fix and cla...
435

6ae080699   Eric Biggers   fs/pipe.c: return...
436
  			buf->len += ret;
f0d1bec9d   Al Viro   new helper: copy_...
437
  			if (!iov_iter_count(from))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
438
439
440
441
442
  				goto out;
  		}
  	}
  
  	for (;;) {
923f4f239   Ingo Molnar   [PATCH] pipe.c/fi...
443
  		if (!pipe->readers) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
444
  			send_sig(SIGPIPE, current, 0);
341b446bc   Ingo Molnar   [PATCH] another r...
445
446
  			if (!ret)
  				ret = -EPIPE;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
447
448
  			break;
  		}
8cefc107c   David Howells   pipe: Use head an...
449

a194dfe6e   David Howells   pipe: Rearrange s...
450
  		head = pipe->head;
8f868d68d   David Howells   pipe: Fix missing...
451
452
  		if (!pipe_full(head, pipe->tail, pipe->max_usage)) {
  			unsigned int mask = pipe->ring_size - 1;
8cefc107c   David Howells   pipe: Use head an...
453
  			struct pipe_buffer *buf = &pipe->bufs[head & mask];
923f4f239   Ingo Molnar   [PATCH] pipe.c/fi...
454
  			struct page *page = pipe->tmp_page;
f0d1bec9d   Al Viro   new helper: copy_...
455
  			int copied;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
456
457
  
  			if (!page) {
d86133bd3   Vladimir Davydov   pipe: account to ...
458
  				page = alloc_page(GFP_HIGHUSER | __GFP_ACCOUNT);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
459
460
461
462
  				if (unlikely(!page)) {
  					ret = ret ? : -ENOMEM;
  					break;
  				}
923f4f239   Ingo Molnar   [PATCH] pipe.c/fi...
463
  				pipe->tmp_page = page;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
464
  			}
a194dfe6e   David Howells   pipe: Rearrange s...
465
466
467
468
469
470
  
  			/* Allocate a slot in the ring in advance and attach an
  			 * empty buffer.  If we fault or otherwise fail to use
  			 * it, either the reader will consume it or it'll still
  			 * be there for the next write.
  			 */
0ddad21d3   Linus Torvalds   pipe: use exclusi...
471
  			spin_lock_irq(&pipe->rd_wait.lock);
a194dfe6e   David Howells   pipe: Rearrange s...
472
473
  
  			head = pipe->head;
8f868d68d   David Howells   pipe: Fix missing...
474
  			if (pipe_full(head, pipe->tail, pipe->max_usage)) {
0ddad21d3   Linus Torvalds   pipe: use exclusi...
475
  				spin_unlock_irq(&pipe->rd_wait.lock);
8df441294   David Howells   pipe: Check for r...
476
477
  				continue;
  			}
a194dfe6e   David Howells   pipe: Rearrange s...
478
  			pipe->head = head + 1;
0ddad21d3   Linus Torvalds   pipe: use exclusi...
479
  			spin_unlock_irq(&pipe->rd_wait.lock);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
480
481
  
  			/* Insert it into the buffer array */
a194dfe6e   David Howells   pipe: Rearrange s...
482
  			buf = &pipe->bufs[head & mask];
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
483
484
485
  			buf->page = page;
  			buf->ops = &anon_pipe_buf_ops;
  			buf->offset = 0;
a194dfe6e   David Howells   pipe: Rearrange s...
486
  			buf->len = 0;
f6dd97558   Christoph Hellwig   pipe: merge anon_...
487
  			if (is_packetized(filp))
9883035ae   Linus Torvalds   pipes: add a "pac...
488
  				buf->flags = PIPE_BUF_FLAG_PACKET;
f6dd97558   Christoph Hellwig   pipe: merge anon_...
489
490
  			else
  				buf->flags = PIPE_BUF_FLAG_CAN_MERGE;
923f4f239   Ingo Molnar   [PATCH] pipe.c/fi...
491
  			pipe->tmp_page = NULL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
492

a194dfe6e   David Howells   pipe: Rearrange s...
493
494
495
496
497
498
499
500
501
  			copied = copy_page_from_iter(page, 0, PAGE_SIZE, from);
  			if (unlikely(copied < PAGE_SIZE && iov_iter_count(from))) {
  				if (!ret)
  					ret = -EFAULT;
  				break;
  			}
  			ret += copied;
  			buf->offset = 0;
  			buf->len = copied;
f0d1bec9d   Al Viro   new helper: copy_...
502
  			if (!iov_iter_count(from))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
503
504
  				break;
  		}
8cefc107c   David Howells   pipe: Use head an...
505

8f868d68d   David Howells   pipe: Fix missing...
506
  		if (!pipe_full(head, pipe->tail, pipe->max_usage))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
507
  			continue;
8cefc107c   David Howells   pipe: Use head an...
508
509
  
  		/* Wait for buffer space to become available. */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
510
  		if (filp->f_flags & O_NONBLOCK) {
341b446bc   Ingo Molnar   [PATCH] another r...
511
512
  			if (!ret)
  				ret = -EAGAIN;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
513
514
515
  			break;
  		}
  		if (signal_pending(current)) {
341b446bc   Ingo Molnar   [PATCH] another r...
516
517
  			if (!ret)
  				ret = -ERESTARTSYS;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
518
519
  			break;
  		}
1b6b26ae7   Linus Torvalds   pipe: fix and cla...
520
521
522
523
524
525
526
  
  		/*
  		 * We're going to release the pipe lock and wait for more
  		 * space. We wake up any readers if necessary, and then
  		 * after waiting we need to re-check whether the pipe
  		 * become empty while we dropped the lock.
  		 */
85190d15f   Linus Torvalds   pipe: don't use '...
527
  		__pipe_unlock(pipe);
1b6b26ae7   Linus Torvalds   pipe: fix and cla...
528
  		if (was_empty) {
0ddad21d3   Linus Torvalds   pipe: use exclusi...
529
  			wake_up_interruptible_sync_poll(&pipe->rd_wait, EPOLLIN | EPOLLRDNORM);
1b6b26ae7   Linus Torvalds   pipe: fix and cla...
530
531
  			kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
  		}
0ddad21d3   Linus Torvalds   pipe: use exclusi...
532
  		wait_event_interruptible_exclusive(pipe->wr_wait, pipe_writable(pipe));
85190d15f   Linus Torvalds   pipe: don't use '...
533
  		__pipe_lock(pipe);
0dd1e3773   Jan Stancek   pipe: fix empty p...
534
  		was_empty = pipe_empty(pipe->head, pipe->tail);
0ddad21d3   Linus Torvalds   pipe: use exclusi...
535
  		wake_next_writer = true;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
536
537
  	}
  out:
0ddad21d3   Linus Torvalds   pipe: use exclusi...
538
539
  	if (pipe_full(pipe->head, pipe->tail, pipe->max_usage))
  		wake_next_writer = false;
ebec73f47   Al Viro   introduce variant...
540
  	__pipe_unlock(pipe);
1b6b26ae7   Linus Torvalds   pipe: fix and cla...
541
542
543
544
545
546
547
548
549
550
551
  
  	/*
  	 * If we do do a wakeup event, we do a 'sync' wakeup, because we
  	 * want the reader to start processing things asap, rather than
  	 * leave the data pending.
  	 *
  	 * This is particularly important for small writes, because of
  	 * how (for example) the GNU make jobserver uses small writes to
  	 * wake up pending jobs
  	 */
  	if (was_empty) {
0ddad21d3   Linus Torvalds   pipe: use exclusi...
552
  		wake_up_interruptible_sync_poll(&pipe->rd_wait, EPOLLIN | EPOLLRDNORM);
923f4f239   Ingo Molnar   [PATCH] pipe.c/fi...
553
  		kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
554
  	}
0ddad21d3   Linus Torvalds   pipe: use exclusi...
555
556
  	if (wake_next_writer)
  		wake_up_interruptible_sync_poll(&pipe->wr_wait, EPOLLOUT | EPOLLWRNORM);
7e775f46a   Dmitry Monakhov   fs/pipe.c: skip f...
557
  	if (ret > 0 && sb_start_write_trylock(file_inode(filp)->i_sb)) {
c3b2da314   Josef Bacik   fs: introduce ino...
558
559
560
  		int err = file_update_time(filp);
  		if (err)
  			ret = err;
7e775f46a   Dmitry Monakhov   fs/pipe.c: skip f...
561
  		sb_end_write(file_inode(filp)->i_sb);
c3b2da314   Josef Bacik   fs: introduce ino...
562
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
563
564
  	return ret;
  }
d59d0b1b8   Andi Kleen   BKL-Removal: conv...
565
  static long pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
566
  {
de32ec4cf   Al Viro   pipe: set file->p...
567
  	struct pipe_inode_info *pipe = filp->private_data;
8cefc107c   David Howells   pipe: Use head an...
568
  	int count, head, tail, mask;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
569
570
  
  	switch (cmd) {
c73be61ce   David Howells   pipe: Add general...
571
572
573
574
575
576
  	case FIONREAD:
  		__pipe_lock(pipe);
  		count = 0;
  		head = pipe->head;
  		tail = pipe->tail;
  		mask = pipe->ring_size - 1;
8cefc107c   David Howells   pipe: Use head an...
577

c73be61ce   David Howells   pipe: Add general...
578
579
580
581
582
  		while (tail != head) {
  			count += pipe->bufs[tail & mask].len;
  			tail++;
  		}
  		__pipe_unlock(pipe);
923f4f239   Ingo Molnar   [PATCH] pipe.c/fi...
583

c73be61ce   David Howells   pipe: Add general...
584
  		return put_user(count, (int __user *)arg);
923f4f239   Ingo Molnar   [PATCH] pipe.c/fi...
585

c73be61ce   David Howells   pipe: Add general...
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
  #ifdef CONFIG_WATCH_QUEUE
  	case IOC_WATCH_QUEUE_SET_SIZE: {
  		int ret;
  		__pipe_lock(pipe);
  		ret = watch_queue_set_size(pipe, arg);
  		__pipe_unlock(pipe);
  		return ret;
  	}
  
  	case IOC_WATCH_QUEUE_SET_FILTER:
  		return watch_queue_set_filter(
  			pipe, (struct watch_notification_filter __user *)arg);
  #endif
  
  	default:
  		return -ENOIOCTLCMD;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
602
603
  	}
  }
dd67081b3   Christoph Hellwig   pipe: convert to ...
604
  /* No kernel lock held - fine */
a11e1d432   Linus Torvalds   Revert changes to...
605
606
  static __poll_t
  pipe_poll(struct file *filp, poll_table *wait)
dd67081b3   Christoph Hellwig   pipe: convert to ...
607
  {
a11e1d432   Linus Torvalds   Revert changes to...
608
  	__poll_t mask;
dd67081b3   Christoph Hellwig   pipe: convert to ...
609
  	struct pipe_inode_info *pipe = filp->private_data;
ad910e36d   Linus Torvalds   pipe: fix poll/se...
610
  	unsigned int head, tail;
a11e1d432   Linus Torvalds   Revert changes to...
611

ad910e36d   Linus Torvalds   pipe: fix poll/se...
612
  	/*
0ddad21d3   Linus Torvalds   pipe: use exclusi...
613
  	 * Reading pipe state only -- no need for acquiring the semaphore.
ad910e36d   Linus Torvalds   pipe: fix poll/se...
614
615
616
617
  	 *
  	 * But because this is racy, the code has to add the
  	 * entry to the poll table _first_ ..
  	 */
0ddad21d3   Linus Torvalds   pipe: use exclusi...
618
619
620
621
  	if (filp->f_mode & FMODE_READ)
  		poll_wait(filp, &pipe->rd_wait, wait);
  	if (filp->f_mode & FMODE_WRITE)
  		poll_wait(filp, &pipe->wr_wait, wait);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
622

ad910e36d   Linus Torvalds   pipe: fix poll/se...
623
624
625
626
627
628
629
  	/*
  	 * .. and only then can you do the racy tests. That way,
  	 * if something changes and you got it wrong, the poll
  	 * table entry will wake you up and fix it.
  	 */
  	head = READ_ONCE(pipe->head);
  	tail = READ_ONCE(pipe->tail);
a11e1d432   Linus Torvalds   Revert changes to...
630
  	mask = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
631
  	if (filp->f_mode & FMODE_READ) {
8cefc107c   David Howells   pipe: Use head an...
632
633
  		if (!pipe_empty(head, tail))
  			mask |= EPOLLIN | EPOLLRDNORM;
923f4f239   Ingo Molnar   [PATCH] pipe.c/fi...
634
  		if (!pipe->writers && filp->f_version != pipe->w_counter)
a9a08845e   Linus Torvalds   vfs: do bulk POLL...
635
  			mask |= EPOLLHUP;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
636
637
638
  	}
  
  	if (filp->f_mode & FMODE_WRITE) {
6718b6f85   David Howells   pipe: Allow pipes...
639
  		if (!pipe_full(head, tail, pipe->max_usage))
8cefc107c   David Howells   pipe: Use head an...
640
  			mask |= EPOLLOUT | EPOLLWRNORM;
5e5d7a222   Pekka Enberg   [PATCH] pipe: rem...
641
  		/*
a9a08845e   Linus Torvalds   vfs: do bulk POLL...
642
  		 * Most Unices do not set EPOLLERR for FIFOs but on Linux they
5e5d7a222   Pekka Enberg   [PATCH] pipe: rem...
643
644
  		 * behave exactly like pipes for poll().
  		 */
923f4f239   Ingo Molnar   [PATCH] pipe.c/fi...
645
  		if (!pipe->readers)
a9a08845e   Linus Torvalds   vfs: do bulk POLL...
646
  			mask |= EPOLLERR;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
647
648
649
650
  	}
  
  	return mask;
  }
b0d8d2292   Linus Torvalds   vfs: fix subtle u...
651
652
653
654
655
656
657
658
659
660
661
662
663
664
  static void put_pipe_info(struct inode *inode, struct pipe_inode_info *pipe)
  {
  	int kill = 0;
  
  	spin_lock(&inode->i_lock);
  	if (!--pipe->files) {
  		inode->i_pipe = NULL;
  		kill = 1;
  	}
  	spin_unlock(&inode->i_lock);
  
  	if (kill)
  		free_pipe_info(pipe);
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
665
  static int
599a0ac14   Al Viro   pipe: fold file_o...
666
  pipe_release(struct inode *inode, struct file *file)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
667
  {
b0d8d2292   Linus Torvalds   vfs: fix subtle u...
668
  	struct pipe_inode_info *pipe = file->private_data;
923f4f239   Ingo Molnar   [PATCH] pipe.c/fi...
669

ebec73f47   Al Viro   introduce variant...
670
  	__pipe_lock(pipe);
599a0ac14   Al Viro   pipe: fold file_o...
671
672
673
674
  	if (file->f_mode & FMODE_READ)
  		pipe->readers--;
  	if (file->f_mode & FMODE_WRITE)
  		pipe->writers--;
341b446bc   Ingo Molnar   [PATCH] another r...
675

6551d5c56   Linus Torvalds   pipe: make sure t...
676
677
678
679
  	/* Was that the last reader or writer, but not the other side? */
  	if (!pipe->readers != !pipe->writers) {
  		wake_up_interruptible_all(&pipe->rd_wait);
  		wake_up_interruptible_all(&pipe->wr_wait);
923f4f239   Ingo Molnar   [PATCH] pipe.c/fi...
680
681
  		kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
  		kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
682
  	}
ebec73f47   Al Viro   introduce variant...
683
  	__pipe_unlock(pipe);
ba5bb1473   Al Viro   pipe: take alloca...
684

b0d8d2292   Linus Torvalds   vfs: fix subtle u...
685
  	put_pipe_info(inode, pipe);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
686
687
688
689
  	return 0;
  }
  
  static int
599a0ac14   Al Viro   pipe: fold file_o...
690
  pipe_fasync(int fd, struct file *filp, int on)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
691
  {
de32ec4cf   Al Viro   pipe: set file->p...
692
  	struct pipe_inode_info *pipe = filp->private_data;
599a0ac14   Al Viro   pipe: fold file_o...
693
  	int retval = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
694

ebec73f47   Al Viro   introduce variant...
695
  	__pipe_lock(pipe);
599a0ac14   Al Viro   pipe: fold file_o...
696
697
698
  	if (filp->f_mode & FMODE_READ)
  		retval = fasync_helper(fd, filp, on, &pipe->fasync_readers);
  	if ((filp->f_mode & FMODE_WRITE) && retval >= 0) {
341b446bc   Ingo Molnar   [PATCH] another r...
699
  		retval = fasync_helper(fd, filp, on, &pipe->fasync_writers);
599a0ac14   Al Viro   pipe: fold file_o...
700
701
  		if (retval < 0 && (filp->f_mode & FMODE_READ))
  			/* this can happen only if on == T */
e5bc49ba7   Oleg Nesterov   pipe_rdwr_fasync:...
702
703
  			fasync_helper(-1, filp, 0, &pipe->fasync_readers);
  	}
ebec73f47   Al Viro   introduce variant...
704
  	__pipe_unlock(pipe);
60aa49243   Jonathan Corbet   Rationalize fasyn...
705
  	return retval;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
706
  }
c73be61ce   David Howells   pipe: Add general...
707
708
  unsigned long account_pipe_buffers(struct user_struct *user,
  				   unsigned long old, unsigned long new)
759c01142   Willy Tarreau   pipe: limit the p...
709
  {
9c87bcf0a   Michael Kerrisk (man-pages)   pipe: make accoun...
710
  	return atomic_long_add_return(new - old, &user->pipe_bufs);
759c01142   Willy Tarreau   pipe: limit the p...
711
  }
c73be61ce   David Howells   pipe: Add general...
712
  bool too_many_pipe_buffers_soft(unsigned long user_bufs)
759c01142   Willy Tarreau   pipe: limit the p...
713
  {
f73407618   Eric Biggers   pipe: read buffer...
714
715
716
  	unsigned long soft_limit = READ_ONCE(pipe_user_pages_soft);
  
  	return soft_limit && user_bufs > soft_limit;
759c01142   Willy Tarreau   pipe: limit the p...
717
  }
c73be61ce   David Howells   pipe: Add general...
718
  bool too_many_pipe_buffers_hard(unsigned long user_bufs)
759c01142   Willy Tarreau   pipe: limit the p...
719
  {
f73407618   Eric Biggers   pipe: read buffer...
720
721
722
  	unsigned long hard_limit = READ_ONCE(pipe_user_pages_hard);
  
  	return hard_limit && user_bufs > hard_limit;
759c01142   Willy Tarreau   pipe: limit the p...
723
  }
c73be61ce   David Howells   pipe: Add general...
724
  bool pipe_is_unprivileged_user(void)
85c2dd547   Eric Biggers   pipe: actually al...
725
726
727
  {
  	return !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN);
  }
7bee130e2   Al Viro   get rid of alloc_...
728
  struct pipe_inode_info *alloc_pipe_info(void)
3a326a2ce   Ingo Molnar   [PATCH] introduce...
729
  {
923f4f239   Ingo Molnar   [PATCH] pipe.c/fi...
730
  	struct pipe_inode_info *pipe;
09b4d1990   Michael Kerrisk (man-pages)   pipe: simplify lo...
731
732
  	unsigned long pipe_bufs = PIPE_DEF_BUFFERS;
  	struct user_struct *user = get_current_user();
9c87bcf0a   Michael Kerrisk (man-pages)   pipe: make accoun...
733
  	unsigned long user_bufs;
f73407618   Eric Biggers   pipe: read buffer...
734
  	unsigned int max_size = READ_ONCE(pipe_max_size);
3a326a2ce   Ingo Molnar   [PATCH] introduce...
735

d86133bd3   Vladimir Davydov   pipe: account to ...
736
  	pipe = kzalloc(sizeof(struct pipe_inode_info), GFP_KERNEL_ACCOUNT);
09b4d1990   Michael Kerrisk (man-pages)   pipe: simplify lo...
737
738
  	if (pipe == NULL)
  		goto out_free_uid;
f73407618   Eric Biggers   pipe: read buffer...
739
740
  	if (pipe_bufs * PAGE_SIZE > max_size && !capable(CAP_SYS_RESOURCE))
  		pipe_bufs = max_size >> PAGE_SHIFT;
086e774a5   Michael Kerrisk (man-pages)   pipe: cap initial...
741

9c87bcf0a   Michael Kerrisk (man-pages)   pipe: make accoun...
742
  	user_bufs = account_pipe_buffers(user, 0, pipe_bufs);
a005ca0e6   Michael Kerrisk (man-pages)   pipe: fix limit c...
743

c73be61ce   David Howells   pipe: Add general...
744
  	if (too_many_pipe_buffers_soft(user_bufs) && pipe_is_unprivileged_user()) {
9c87bcf0a   Michael Kerrisk (man-pages)   pipe: make accoun...
745
  		user_bufs = account_pipe_buffers(user, pipe_bufs, 1);
a005ca0e6   Michael Kerrisk (man-pages)   pipe: fix limit c...
746
  		pipe_bufs = 1;
09b4d1990   Michael Kerrisk (man-pages)   pipe: simplify lo...
747
  	}
759c01142   Willy Tarreau   pipe: limit the p...
748

c73be61ce   David Howells   pipe: Add general...
749
  	if (too_many_pipe_buffers_hard(user_bufs) && pipe_is_unprivileged_user())
a005ca0e6   Michael Kerrisk (man-pages)   pipe: fix limit c...
750
751
752
753
  		goto out_revert_acct;
  
  	pipe->bufs = kcalloc(pipe_bufs, sizeof(struct pipe_buffer),
  			     GFP_KERNEL_ACCOUNT);
09b4d1990   Michael Kerrisk (man-pages)   pipe: simplify lo...
754
  	if (pipe->bufs) {
0ddad21d3   Linus Torvalds   pipe: use exclusi...
755
756
  		init_waitqueue_head(&pipe->rd_wait);
  		init_waitqueue_head(&pipe->wr_wait);
09b4d1990   Michael Kerrisk (man-pages)   pipe: simplify lo...
757
  		pipe->r_counter = pipe->w_counter = 1;
6718b6f85   David Howells   pipe: Allow pipes...
758
  		pipe->max_usage = pipe_bufs;
8cefc107c   David Howells   pipe: Use head an...
759
  		pipe->ring_size = pipe_bufs;
c73be61ce   David Howells   pipe: Add general...
760
  		pipe->nr_accounted = pipe_bufs;
09b4d1990   Michael Kerrisk (man-pages)   pipe: simplify lo...
761
  		pipe->user = user;
09b4d1990   Michael Kerrisk (man-pages)   pipe: simplify lo...
762
763
  		mutex_init(&pipe->mutex);
  		return pipe;
3a326a2ce   Ingo Molnar   [PATCH] introduce...
764
  	}
a005ca0e6   Michael Kerrisk (man-pages)   pipe: fix limit c...
765
  out_revert_acct:
9c87bcf0a   Michael Kerrisk (man-pages)   pipe: make accoun...
766
  	(void) account_pipe_buffers(user, pipe_bufs, 0);
09b4d1990   Michael Kerrisk (man-pages)   pipe: simplify lo...
767
768
769
  	kfree(pipe);
  out_free_uid:
  	free_uid(user);
35f3d14db   Jens Axboe   pipe: add support...
770
  	return NULL;
3a326a2ce   Ingo Molnar   [PATCH] introduce...
771
  }
4b8a8f1e4   Al Viro   get rid of the la...
772
  void free_pipe_info(struct pipe_inode_info *pipe)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
773
774
  {
  	int i;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
775

c73be61ce   David Howells   pipe: Add general...
776
777
778
779
780
781
782
783
  #ifdef CONFIG_WATCH_QUEUE
  	if (pipe->watch_queue) {
  		watch_queue_clear(pipe->watch_queue);
  		put_watch_queue(pipe->watch_queue);
  	}
  #endif
  
  	(void) account_pipe_buffers(pipe->user, pipe->nr_accounted, 0);
759c01142   Willy Tarreau   pipe: limit the p...
784
  	free_uid(pipe->user);
8cefc107c   David Howells   pipe: Use head an...
785
  	for (i = 0; i < pipe->ring_size; i++) {
923f4f239   Ingo Molnar   [PATCH] pipe.c/fi...
786
  		struct pipe_buffer *buf = pipe->bufs + i;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
787
  		if (buf->ops)
a779638cf   Miklos Szeredi   pipe: add pipe_bu...
788
  			pipe_buf_release(pipe, buf);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
789
  	}
923f4f239   Ingo Molnar   [PATCH] pipe.c/fi...
790
791
  	if (pipe->tmp_page)
  		__free_page(pipe->tmp_page);
35f3d14db   Jens Axboe   pipe: add support...
792
  	kfree(pipe->bufs);
923f4f239   Ingo Molnar   [PATCH] pipe.c/fi...
793
  	kfree(pipe);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
794
  }
fa3536cc1   Eric Dumazet   [PATCH] Use __rea...
795
  static struct vfsmount *pipe_mnt __read_mostly;
341b446bc   Ingo Molnar   [PATCH] another r...
796

c23fbb6bc   Eric Dumazet   VFS: delay the de...
797
798
799
800
801
802
  /*
   * pipefs_dname() is called from d_path().
   */
  static char *pipefs_dname(struct dentry *dentry, char *buffer, int buflen)
  {
  	return dynamic_dname(dentry, buffer, buflen, "pipe:[%lu]",
75c3cfa85   David Howells   VFS: assorted wei...
803
  				d_inode(dentry)->i_ino);
c23fbb6bc   Eric Dumazet   VFS: delay the de...
804
  }
3ba13d179   Al Viro   constify dentry_o...
805
  static const struct dentry_operations pipefs_dentry_operations = {
c23fbb6bc   Eric Dumazet   VFS: delay the de...
806
  	.d_dname	= pipefs_dname,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
807
808
809
810
  };
  
  static struct inode * get_pipe_inode(void)
  {
a209dfc7b   Eric Dumazet   vfs: dont chain p...
811
  	struct inode *inode = new_inode_pseudo(pipe_mnt->mnt_sb);
923f4f239   Ingo Molnar   [PATCH] pipe.c/fi...
812
  	struct pipe_inode_info *pipe;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
813
814
815
  
  	if (!inode)
  		goto fail_inode;
85fe4025c   Christoph Hellwig   fs: do not assign...
816
  	inode->i_ino = get_next_ino();
7bee130e2   Al Viro   get rid of alloc_...
817
  	pipe = alloc_pipe_info();
923f4f239   Ingo Molnar   [PATCH] pipe.c/fi...
818
  	if (!pipe)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
819
  		goto fail_iput;
3a326a2ce   Ingo Molnar   [PATCH] introduce...
820

ba5bb1473   Al Viro   pipe: take alloca...
821
822
  	inode->i_pipe = pipe;
  	pipe->files = 2;
923f4f239   Ingo Molnar   [PATCH] pipe.c/fi...
823
  	pipe->readers = pipe->writers = 1;
599a0ac14   Al Viro   pipe: fold file_o...
824
  	inode->i_fop = &pipefifo_fops;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
825
826
827
828
829
830
831
832
833
  
  	/*
  	 * Mark the inode dirty from the very beginning,
  	 * that way it will never be moved to the dirty
  	 * list because "mark_inode_dirty()" will think
  	 * that it already _is_ on the dirty list.
  	 */
  	inode->i_state = I_DIRTY;
  	inode->i_mode = S_IFIFO | S_IRUSR | S_IWUSR;
da9592ede   David Howells   CRED: Wrap task c...
834
835
  	inode->i_uid = current_fsuid();
  	inode->i_gid = current_fsgid();
078cd8279   Deepa Dinamani   fs: Replace CURRE...
836
  	inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
923f4f239   Ingo Molnar   [PATCH] pipe.c/fi...
837

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
838
839
840
841
  	return inode;
  
  fail_iput:
  	iput(inode);
341b446bc   Ingo Molnar   [PATCH] another r...
842

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
843
844
845
  fail_inode:
  	return NULL;
  }
e4fad8e5d   Al Viro   consolidate pipe ...
846
  int create_pipe_files(struct file **res, int flags)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
847
  {
e4fad8e5d   Al Viro   consolidate pipe ...
848
  	struct inode *inode = get_pipe_inode();
d6cbd281d   Andi Kleen   [PATCH] Some clea...
849
  	struct file *f;
8a018eb55   Qian Cai   pipe: Fix memory ...
850
  	int error;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
851

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
852
  	if (!inode)
e4fad8e5d   Al Viro   consolidate pipe ...
853
  		return -ENFILE;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
854

c73be61ce   David Howells   pipe: Add general...
855
  	if (flags & O_NOTIFICATION_PIPE) {
8a018eb55   Qian Cai   pipe: Fix memory ...
856
857
858
  		error = watch_queue_init(inode->i_pipe);
  		if (error) {
  			free_pipe_info(inode->i_pipe);
c73be61ce   David Howells   pipe: Add general...
859
  			iput(inode);
8a018eb55   Qian Cai   pipe: Fix memory ...
860
  			return error;
c73be61ce   David Howells   pipe: Add general...
861
  		}
c73be61ce   David Howells   pipe: Add general...
862
  	}
152b6372c   Al Viro   create_pipe_files...
863
864
865
  	f = alloc_file_pseudo(inode, pipe_mnt, "",
  				O_WRONLY | (flags & (O_NONBLOCK | O_DIRECT)),
  				&pipefifo_fops);
e9bb1f9b1   Eric Biggers   fs/pipe.c: preser...
866
  	if (IS_ERR(f)) {
152b6372c   Al Viro   create_pipe_files...
867
868
869
  		free_pipe_info(inode->i_pipe);
  		iput(inode);
  		return PTR_ERR(f);
e9bb1f9b1   Eric Biggers   fs/pipe.c: preser...
870
  	}
341b446bc   Ingo Molnar   [PATCH] another r...
871

de32ec4cf   Al Viro   pipe: set file->p...
872
  	f->private_data = inode->i_pipe;
d6cbd281d   Andi Kleen   [PATCH] Some clea...
873

183266f26   Al Viro   new helper: alloc...
874
875
  	res[0] = alloc_file_clone(f, O_RDONLY | (flags & O_NONBLOCK),
  				  &pipefifo_fops);
e9bb1f9b1   Eric Biggers   fs/pipe.c: preser...
876
  	if (IS_ERR(res[0])) {
b10a4a9f7   Al Viro   create_pipe_files...
877
878
879
  		put_pipe_info(inode, inode->i_pipe);
  		fput(f);
  		return PTR_ERR(res[0]);
e9bb1f9b1   Eric Biggers   fs/pipe.c: preser...
880
  	}
de32ec4cf   Al Viro   pipe: set file->p...
881
  	res[0]->private_data = inode->i_pipe;
e4fad8e5d   Al Viro   consolidate pipe ...
882
  	res[1] = f;
d8e464ecc   Linus Torvalds   vfs: mark pipes a...
883
884
  	stream_open(inode, res[0]);
  	stream_open(inode, res[1]);
e4fad8e5d   Al Viro   consolidate pipe ...
885
  	return 0;
d6cbd281d   Andi Kleen   [PATCH] Some clea...
886
  }
5b249b1b0   Al Viro   pipe(2) - race-fr...
887
  static int __do_pipe_flags(int *fd, struct file **files, int flags)
d6cbd281d   Andi Kleen   [PATCH] Some clea...
888
  {
d6cbd281d   Andi Kleen   [PATCH] Some clea...
889
890
  	int error;
  	int fdw, fdr;
c73be61ce   David Howells   pipe: Add general...
891
  	if (flags & ~(O_CLOEXEC | O_NONBLOCK | O_DIRECT | O_NOTIFICATION_PIPE))
ed8cae8ba   Ulrich Drepper   flag parameters: ...
892
  		return -EINVAL;
e4fad8e5d   Al Viro   consolidate pipe ...
893
894
895
  	error = create_pipe_files(files, flags);
  	if (error)
  		return error;
d6cbd281d   Andi Kleen   [PATCH] Some clea...
896

ed8cae8ba   Ulrich Drepper   flag parameters: ...
897
  	error = get_unused_fd_flags(flags);
d6cbd281d   Andi Kleen   [PATCH] Some clea...
898
899
900
  	if (error < 0)
  		goto err_read_pipe;
  	fdr = error;
ed8cae8ba   Ulrich Drepper   flag parameters: ...
901
  	error = get_unused_fd_flags(flags);
d6cbd281d   Andi Kleen   [PATCH] Some clea...
902
903
904
  	if (error < 0)
  		goto err_fdr;
  	fdw = error;
157cf649a   Al Viro   sanitize audit_fd...
905
  	audit_fd_pair(fdr, fdw);
d6cbd281d   Andi Kleen   [PATCH] Some clea...
906
907
  	fd[0] = fdr;
  	fd[1] = fdw;
d6cbd281d   Andi Kleen   [PATCH] Some clea...
908
909
910
911
912
  	return 0;
  
   err_fdr:
  	put_unused_fd(fdr);
   err_read_pipe:
e4fad8e5d   Al Viro   consolidate pipe ...
913
914
  	fput(files[0]);
  	fput(files[1]);
d6cbd281d   Andi Kleen   [PATCH] Some clea...
915
  	return error;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
916
  }
5b249b1b0   Al Viro   pipe(2) - race-fr...
917
918
919
920
921
922
923
924
925
926
  int do_pipe_flags(int *fd, int flags)
  {
  	struct file *files[2];
  	int error = __do_pipe_flags(fd, files, flags);
  	if (!error) {
  		fd_install(fd[0], files[0]);
  		fd_install(fd[1], files[1]);
  	}
  	return error;
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
927
  /*
d35c7b0e5   Ulrich Drepper   unified (weak) sy...
928
929
930
   * sys_pipe() is the normal C calling standard for creating
   * a pipe. It's not the way Unix traditionally does this, though.
   */
0a216dd1c   Dominik Brodowski   fs: add do_pipe2(...
931
  static int do_pipe2(int __user *fildes, int flags)
d35c7b0e5   Ulrich Drepper   unified (weak) sy...
932
  {
5b249b1b0   Al Viro   pipe(2) - race-fr...
933
  	struct file *files[2];
d35c7b0e5   Ulrich Drepper   unified (weak) sy...
934
935
  	int fd[2];
  	int error;
5b249b1b0   Al Viro   pipe(2) - race-fr...
936
  	error = __do_pipe_flags(fd, files, flags);
d35c7b0e5   Ulrich Drepper   unified (weak) sy...
937
  	if (!error) {
5b249b1b0   Al Viro   pipe(2) - race-fr...
938
939
940
941
942
  		if (unlikely(copy_to_user(fildes, fd, sizeof(fd)))) {
  			fput(files[0]);
  			fput(files[1]);
  			put_unused_fd(fd[0]);
  			put_unused_fd(fd[1]);
d35c7b0e5   Ulrich Drepper   unified (weak) sy...
943
  			error = -EFAULT;
5b249b1b0   Al Viro   pipe(2) - race-fr...
944
945
946
  		} else {
  			fd_install(fd[0], files[0]);
  			fd_install(fd[1], files[1]);
ba719baea   Ulrich Drepper   sys_pipe(): fix f...
947
  		}
d35c7b0e5   Ulrich Drepper   unified (weak) sy...
948
949
950
  	}
  	return error;
  }
0a216dd1c   Dominik Brodowski   fs: add do_pipe2(...
951
952
953
954
  SYSCALL_DEFINE2(pipe2, int __user *, fildes, int, flags)
  {
  	return do_pipe2(fildes, flags);
  }
2b6642199   Heiko Carstens   [CVE-2009-0029] S...
955
  SYSCALL_DEFINE1(pipe, int __user *, fildes)
ed8cae8ba   Ulrich Drepper   flag parameters: ...
956
  {
0a216dd1c   Dominik Brodowski   fs: add do_pipe2(...
957
  	return do_pipe2(fildes, 0);
ed8cae8ba   Ulrich Drepper   flag parameters: ...
958
  }
472e5b056   Linus Torvalds   pipe: remove pipe...
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
  /*
   * This is the stupid "wait for pipe to be readable or writable"
   * model.
   *
   * See pipe_read/write() for the proper kind of exclusive wait,
   * but that requires that we wake up any other readers/writers
   * if we then do not end up reading everything (ie the whole
   * "wake_next_reader/writer" logic in pipe_read/write()).
   */
  void pipe_wait_readable(struct pipe_inode_info *pipe)
  {
  	pipe_unlock(pipe);
  	wait_event_interruptible(pipe->rd_wait, pipe_readable(pipe));
  	pipe_lock(pipe);
  }
  
  void pipe_wait_writable(struct pipe_inode_info *pipe)
  {
  	pipe_unlock(pipe);
  	wait_event_interruptible(pipe->wr_wait, pipe_writable(pipe));
  	pipe_lock(pipe);
  }
  
  /*
   * This depends on both the wait (here) and the wakeup (wake_up_partner)
   * holding the pipe lock, so "*cnt" is stable and we know a wakeup cannot
   * race with the count check and waitqueue prep.
   *
   * Normally in order to avoid races, you'd do the prepare_to_wait() first,
   * then check the condition you're waiting for, and only then sleep. But
   * because of the pipe lock, we can check the condition before being on
   * the wait queue.
   *
   * We use the 'rd_wait' waitqueue for pipe partner waiting.
   */
fc7478a2b   Al Viro   pipe: switch wait...
994
  static int wait_for_partner(struct pipe_inode_info *pipe, unsigned int *cnt)
f776c7388   Al Viro   fold fifo.c into ...
995
  {
472e5b056   Linus Torvalds   pipe: remove pipe...
996
  	DEFINE_WAIT(rdwait);
8cefc107c   David Howells   pipe: Use head an...
997
  	int cur = *cnt;
f776c7388   Al Viro   fold fifo.c into ...
998
999
  
  	while (cur == *cnt) {
472e5b056   Linus Torvalds   pipe: remove pipe...
1000
1001
1002
1003
1004
  		prepare_to_wait(&pipe->rd_wait, &rdwait, TASK_INTERRUPTIBLE);
  		pipe_unlock(pipe);
  		schedule();
  		finish_wait(&pipe->rd_wait, &rdwait);
  		pipe_lock(pipe);
f776c7388   Al Viro   fold fifo.c into ...
1005
1006
1007
1008
1009
  		if (signal_pending(current))
  			break;
  	}
  	return cur == *cnt ? -ERESTARTSYS : 0;
  }
fc7478a2b   Al Viro   pipe: switch wait...
1010
  static void wake_up_partner(struct pipe_inode_info *pipe)
f776c7388   Al Viro   fold fifo.c into ...
1011
  {
6551d5c56   Linus Torvalds   pipe: make sure t...
1012
  	wake_up_interruptible_all(&pipe->rd_wait);
f776c7388   Al Viro   fold fifo.c into ...
1013
1014
1015
1016
1017
  }
  
  static int fifo_open(struct inode *inode, struct file *filp)
  {
  	struct pipe_inode_info *pipe;
599a0ac14   Al Viro   pipe: fold file_o...
1018
  	bool is_pipe = inode->i_sb->s_magic == PIPEFS_MAGIC;
f776c7388   Al Viro   fold fifo.c into ...
1019
  	int ret;
ba5bb1473   Al Viro   pipe: take alloca...
1020
1021
1022
1023
1024
1025
1026
1027
1028
  	filp->f_version = 0;
  
  	spin_lock(&inode->i_lock);
  	if (inode->i_pipe) {
  		pipe = inode->i_pipe;
  		pipe->files++;
  		spin_unlock(&inode->i_lock);
  	} else {
  		spin_unlock(&inode->i_lock);
7bee130e2   Al Viro   get rid of alloc_...
1029
  		pipe = alloc_pipe_info();
f776c7388   Al Viro   fold fifo.c into ...
1030
  		if (!pipe)
ba5bb1473   Al Viro   pipe: take alloca...
1031
1032
1033
1034
1035
1036
  			return -ENOMEM;
  		pipe->files = 1;
  		spin_lock(&inode->i_lock);
  		if (unlikely(inode->i_pipe)) {
  			inode->i_pipe->files++;
  			spin_unlock(&inode->i_lock);
4b8a8f1e4   Al Viro   get rid of the la...
1037
  			free_pipe_info(pipe);
ba5bb1473   Al Viro   pipe: take alloca...
1038
1039
1040
1041
1042
  			pipe = inode->i_pipe;
  		} else {
  			inode->i_pipe = pipe;
  			spin_unlock(&inode->i_lock);
  		}
f776c7388   Al Viro   fold fifo.c into ...
1043
  	}
de32ec4cf   Al Viro   pipe: set file->p...
1044
  	filp->private_data = pipe;
ba5bb1473   Al Viro   pipe: take alloca...
1045
  	/* OK, we have a pipe and it's pinned down */
ebec73f47   Al Viro   introduce variant...
1046
  	__pipe_lock(pipe);
f776c7388   Al Viro   fold fifo.c into ...
1047
1048
  
  	/* We can only do regular read/write on fifos */
d8e464ecc   Linus Torvalds   vfs: mark pipes a...
1049
  	stream_open(inode, filp);
f776c7388   Al Viro   fold fifo.c into ...
1050

d8e464ecc   Linus Torvalds   vfs: mark pipes a...
1051
  	switch (filp->f_mode & (FMODE_READ | FMODE_WRITE)) {
f776c7388   Al Viro   fold fifo.c into ...
1052
1053
1054
1055
1056
1057
  	case FMODE_READ:
  	/*
  	 *  O_RDONLY
  	 *  POSIX.1 says that O_NONBLOCK means return with the FIFO
  	 *  opened, even when there is no process writing the FIFO.
  	 */
f776c7388   Al Viro   fold fifo.c into ...
1058
1059
  		pipe->r_counter++;
  		if (pipe->readers++ == 0)
fc7478a2b   Al Viro   pipe: switch wait...
1060
  			wake_up_partner(pipe);
f776c7388   Al Viro   fold fifo.c into ...
1061

599a0ac14   Al Viro   pipe: fold file_o...
1062
  		if (!is_pipe && !pipe->writers) {
f776c7388   Al Viro   fold fifo.c into ...
1063
  			if ((filp->f_flags & O_NONBLOCK)) {
a9a08845e   Linus Torvalds   vfs: do bulk POLL...
1064
  				/* suppress EPOLLHUP until we have
f776c7388   Al Viro   fold fifo.c into ...
1065
1066
1067
  				 * seen a writer */
  				filp->f_version = pipe->w_counter;
  			} else {
fc7478a2b   Al Viro   pipe: switch wait...
1068
  				if (wait_for_partner(pipe, &pipe->w_counter))
f776c7388   Al Viro   fold fifo.c into ...
1069
1070
1071
1072
  					goto err_rd;
  			}
  		}
  		break;
8cefc107c   David Howells   pipe: Use head an...
1073

f776c7388   Al Viro   fold fifo.c into ...
1074
1075
1076
1077
1078
1079
1080
  	case FMODE_WRITE:
  	/*
  	 *  O_WRONLY
  	 *  POSIX.1 says that O_NONBLOCK means return -1 with
  	 *  errno=ENXIO when there is no process reading the FIFO.
  	 */
  		ret = -ENXIO;
599a0ac14   Al Viro   pipe: fold file_o...
1081
  		if (!is_pipe && (filp->f_flags & O_NONBLOCK) && !pipe->readers)
f776c7388   Al Viro   fold fifo.c into ...
1082
  			goto err;
f776c7388   Al Viro   fold fifo.c into ...
1083
1084
  		pipe->w_counter++;
  		if (!pipe->writers++)
fc7478a2b   Al Viro   pipe: switch wait...
1085
  			wake_up_partner(pipe);
f776c7388   Al Viro   fold fifo.c into ...
1086

599a0ac14   Al Viro   pipe: fold file_o...
1087
  		if (!is_pipe && !pipe->readers) {
fc7478a2b   Al Viro   pipe: switch wait...
1088
  			if (wait_for_partner(pipe, &pipe->r_counter))
f776c7388   Al Viro   fold fifo.c into ...
1089
1090
1091
  				goto err_wr;
  		}
  		break;
8cefc107c   David Howells   pipe: Use head an...
1092

f776c7388   Al Viro   fold fifo.c into ...
1093
1094
1095
1096
1097
1098
1099
  	case FMODE_READ | FMODE_WRITE:
  	/*
  	 *  O_RDWR
  	 *  POSIX.1 leaves this case "undefined" when O_NONBLOCK is set.
  	 *  This implementation will NEVER block on a O_RDWR open, since
  	 *  the process can at least talk to itself.
  	 */
f776c7388   Al Viro   fold fifo.c into ...
1100
1101
1102
1103
1104
1105
  
  		pipe->readers++;
  		pipe->writers++;
  		pipe->r_counter++;
  		pipe->w_counter++;
  		if (pipe->readers == 1 || pipe->writers == 1)
fc7478a2b   Al Viro   pipe: switch wait...
1106
  			wake_up_partner(pipe);
f776c7388   Al Viro   fold fifo.c into ...
1107
1108
1109
1110
1111
1112
1113
1114
  		break;
  
  	default:
  		ret = -EINVAL;
  		goto err;
  	}
  
  	/* Ok! */
ebec73f47   Al Viro   introduce variant...
1115
  	__pipe_unlock(pipe);
f776c7388   Al Viro   fold fifo.c into ...
1116
1117
1118
1119
  	return 0;
  
  err_rd:
  	if (!--pipe->readers)
0ddad21d3   Linus Torvalds   pipe: use exclusi...
1120
  		wake_up_interruptible(&pipe->wr_wait);
f776c7388   Al Viro   fold fifo.c into ...
1121
1122
1123
1124
1125
  	ret = -ERESTARTSYS;
  	goto err;
  
  err_wr:
  	if (!--pipe->writers)
6551d5c56   Linus Torvalds   pipe: make sure t...
1126
  		wake_up_interruptible_all(&pipe->rd_wait);
f776c7388   Al Viro   fold fifo.c into ...
1127
1128
1129
1130
  	ret = -ERESTARTSYS;
  	goto err;
  
  err:
ebec73f47   Al Viro   introduce variant...
1131
  	__pipe_unlock(pipe);
b0d8d2292   Linus Torvalds   vfs: fix subtle u...
1132
1133
  
  	put_pipe_info(inode, pipe);
f776c7388   Al Viro   fold fifo.c into ...
1134
1135
  	return ret;
  }
599a0ac14   Al Viro   pipe: fold file_o...
1136
1137
1138
  const struct file_operations pipefifo_fops = {
  	.open		= fifo_open,
  	.llseek		= no_llseek,
fb9096a34   Al Viro   pipe: switch to -...
1139
  	.read_iter	= pipe_read,
f0d1bec9d   Al Viro   new helper: copy_...
1140
  	.write_iter	= pipe_write,
a11e1d432   Linus Torvalds   Revert changes to...
1141
  	.poll		= pipe_poll,
599a0ac14   Al Viro   pipe: fold file_o...
1142
1143
1144
  	.unlocked_ioctl	= pipe_ioctl,
  	.release	= pipe_release,
  	.fasync		= pipe_fasync,
f776c7388   Al Viro   fold fifo.c into ...
1145
  };
d35c7b0e5   Ulrich Drepper   unified (weak) sy...
1146
  /*
f491bd711   Michael Kerrisk (man-pages)   pipe: relocate ro...
1147
   * Currently we rely on the pipe array holding a power-of-2 number
d3f14c485   Joe Lawrence   pipe: avoid round...
1148
   * of pages. Returns 0 on error.
f491bd711   Michael Kerrisk (man-pages)   pipe: relocate ro...
1149
   */
96e99be40   Eric Biggers   pipe: reject F_SE...
1150
  unsigned int round_pipe_size(unsigned long size)
f491bd711   Michael Kerrisk (man-pages)   pipe: relocate ro...
1151
  {
c4fed5a91   Eric Biggers   pipe: simplify ro...
1152
  	if (size > (1U << 31))
96e99be40   Eric Biggers   pipe: reject F_SE...
1153
  		return 0;
4c2e4befb   Eric Biggers   pipe, sysctl: dro...
1154
1155
  	/* Minimum pipe size, as required by POSIX */
  	if (size < PAGE_SIZE)
c4fed5a91   Eric Biggers   pipe: simplify ro...
1156
  		return PAGE_SIZE;
d3f14c485   Joe Lawrence   pipe: avoid round...
1157

c4fed5a91   Eric Biggers   pipe: simplify ro...
1158
  	return roundup_pow_of_two(size);
f491bd711   Michael Kerrisk (man-pages)   pipe: relocate ro...
1159
1160
1161
  }
  
  /*
c73be61ce   David Howells   pipe: Add general...
1162
   * Resize the pipe ring to a number of slots.
35f3d14db   Jens Axboe   pipe: add support...
1163
   */
c73be61ce   David Howells   pipe: Add general...
1164
  int pipe_resize_ring(struct pipe_inode_info *pipe, unsigned int nr_slots)
35f3d14db   Jens Axboe   pipe: add support...
1165
1166
  {
  	struct pipe_buffer *bufs;
c73be61ce   David Howells   pipe: Add general...
1167
  	unsigned int head, tail, mask, n;
35f3d14db   Jens Axboe   pipe: add support...
1168
1169
  
  	/*
8cefc107c   David Howells   pipe: Use head an...
1170
1171
1172
  	 * We can shrink the pipe, if arg is greater than the ring occupancy.
  	 * Since we don't expect a lot of shrink+grow operations, just free and
  	 * allocate again like we would do for growing.  If the pipe currently
35f3d14db   Jens Axboe   pipe: add support...
1173
1174
  	 * contains more buffers than arg, then return busy.
  	 */
8cefc107c   David Howells   pipe: Use head an...
1175
1176
1177
1178
  	mask = pipe->ring_size - 1;
  	head = pipe->head;
  	tail = pipe->tail;
  	n = pipe_occupancy(pipe->head, pipe->tail);
c73be61ce   David Howells   pipe: Add general...
1179
1180
  	if (nr_slots < n)
  		return -EBUSY;
35f3d14db   Jens Axboe   pipe: add support...
1181

8cefc107c   David Howells   pipe: Use head an...
1182
  	bufs = kcalloc(nr_slots, sizeof(*bufs),
d86133bd3   Vladimir Davydov   pipe: account to ...
1183
  		       GFP_KERNEL_ACCOUNT | __GFP_NOWARN);
c73be61ce   David Howells   pipe: Add general...
1184
1185
  	if (unlikely(!bufs))
  		return -ENOMEM;
35f3d14db   Jens Axboe   pipe: add support...
1186
1187
1188
  
  	/*
  	 * The pipe array wraps around, so just start the new one at zero
8cefc107c   David Howells   pipe: Use head an...
1189
  	 * and adjust the indices.
35f3d14db   Jens Axboe   pipe: add support...
1190
  	 */
8cefc107c   David Howells   pipe: Use head an...
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
  	if (n > 0) {
  		unsigned int h = head & mask;
  		unsigned int t = tail & mask;
  		if (h > t) {
  			memcpy(bufs, pipe->bufs + t,
  			       n * sizeof(struct pipe_buffer));
  		} else {
  			unsigned int tsize = pipe->ring_size - t;
  			if (h > 0)
  				memcpy(bufs + tsize, pipe->bufs,
  				       h * sizeof(struct pipe_buffer));
  			memcpy(bufs, pipe->bufs + t,
  			       tsize * sizeof(struct pipe_buffer));
  		}
35f3d14db   Jens Axboe   pipe: add support...
1205
  	}
8cefc107c   David Howells   pipe: Use head an...
1206
1207
  	head = n;
  	tail = 0;
35f3d14db   Jens Axboe   pipe: add support...
1208
1209
  	kfree(pipe->bufs);
  	pipe->bufs = bufs;
8cefc107c   David Howells   pipe: Use head an...
1210
  	pipe->ring_size = nr_slots;
c73be61ce   David Howells   pipe: Add general...
1211
1212
  	if (pipe->max_usage > nr_slots)
  		pipe->max_usage = nr_slots;
8cefc107c   David Howells   pipe: Use head an...
1213
1214
  	pipe->tail = tail;
  	pipe->head = head;
6551d5c56   Linus Torvalds   pipe: make sure t...
1215
1216
1217
  
  	/* This might have made more room for writers */
  	wake_up_interruptible(&pipe->wr_wait);
c73be61ce   David Howells   pipe: Add general...
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
  	return 0;
  }
  
  /*
   * Allocate a new array of pipe buffers and copy the info over. Returns the
   * pipe size if successful, or return -ERROR on error.
   */
  static long pipe_set_size(struct pipe_inode_info *pipe, unsigned long arg)
  {
  	unsigned long user_bufs;
  	unsigned int nr_slots, size;
  	long ret = 0;
  
  #ifdef CONFIG_WATCH_QUEUE
  	if (pipe->watch_queue)
  		return -EBUSY;
  #endif
  
  	size = round_pipe_size(arg);
  	nr_slots = size >> PAGE_SHIFT;
  
  	if (!nr_slots)
  		return -EINVAL;
  
  	/*
  	 * If trying to increase the pipe capacity, check that an
  	 * unprivileged user is not trying to exceed various limits
  	 * (soft limit check here, hard limit check just below).
  	 * Decreasing the pipe capacity is always permitted, even
  	 * if the user is currently over a limit.
  	 */
  	if (nr_slots > pipe->max_usage &&
  			size > pipe_max_size && !capable(CAP_SYS_RESOURCE))
  		return -EPERM;
  
  	user_bufs = account_pipe_buffers(pipe->user, pipe->nr_accounted, nr_slots);
  
  	if (nr_slots > pipe->max_usage &&
  			(too_many_pipe_buffers_hard(user_bufs) ||
  			 too_many_pipe_buffers_soft(user_bufs)) &&
  			pipe_is_unprivileged_user()) {
  		ret = -EPERM;
  		goto out_revert_acct;
  	}
  
  	ret = pipe_resize_ring(pipe, nr_slots);
  	if (ret < 0)
  		goto out_revert_acct;
  
  	pipe->max_usage = nr_slots;
  	pipe->nr_accounted = nr_slots;
6718b6f85   David Howells   pipe: Allow pipes...
1269
  	return pipe->max_usage * PAGE_SIZE;
b0b91d18e   Michael Kerrisk (man-pages)   pipe: fix limit c...
1270
1271
  
  out_revert_acct:
c73be61ce   David Howells   pipe: Add general...
1272
  	(void) account_pipe_buffers(pipe->user, nr_slots, pipe->nr_accounted);
b0b91d18e   Michael Kerrisk (man-pages)   pipe: fix limit c...
1273
  	return ret;
35f3d14db   Jens Axboe   pipe: add support...
1274
  }
ff9da691c   Jens Axboe   pipe: change /pro...
1275
  /*
720836465   Linus Torvalds   Un-inline get_pip...
1276
1277
1278
1279
   * After the inode slimming patch, i_pipe/i_bdev/i_cdev share the same
   * location, so checking ->i_pipe is not enough to verify that this is a
   * pipe.
   */
c73be61ce   David Howells   pipe: Add general...
1280
  struct pipe_inode_info *get_pipe_info(struct file *file, bool for_splice)
720836465   Linus Torvalds   Un-inline get_pip...
1281
  {
c73be61ce   David Howells   pipe: Add general...
1282
1283
1284
1285
1286
1287
1288
1289
1290
  	struct pipe_inode_info *pipe = file->private_data;
  
  	if (file->f_op != &pipefifo_fops || !pipe)
  		return NULL;
  #ifdef CONFIG_WATCH_QUEUE
  	if (for_splice && pipe->watch_queue)
  		return NULL;
  #endif
  	return pipe;
720836465   Linus Torvalds   Un-inline get_pip...
1291
  }
35f3d14db   Jens Axboe   pipe: add support...
1292
1293
1294
1295
  long pipe_fcntl(struct file *file, unsigned int cmd, unsigned long arg)
  {
  	struct pipe_inode_info *pipe;
  	long ret;
c73be61ce   David Howells   pipe: Add general...
1296
  	pipe = get_pipe_info(file, false);
35f3d14db   Jens Axboe   pipe: add support...
1297
1298
  	if (!pipe)
  		return -EBADF;
ebec73f47   Al Viro   introduce variant...
1299
  	__pipe_lock(pipe);
35f3d14db   Jens Axboe   pipe: add support...
1300
1301
  
  	switch (cmd) {
d37d41666   Michael Kerrisk (man-pages)   pipe: move limit ...
1302
1303
  	case F_SETPIPE_SZ:
  		ret = pipe_set_size(pipe, arg);
35f3d14db   Jens Axboe   pipe: add support...
1304
1305
  		break;
  	case F_GETPIPE_SZ:
6718b6f85   David Howells   pipe: Allow pipes...
1306
  		ret = pipe->max_usage * PAGE_SIZE;
35f3d14db   Jens Axboe   pipe: add support...
1307
1308
1309
1310
1311
  		break;
  	default:
  		ret = -EINVAL;
  		break;
  	}
ebec73f47   Al Viro   introduce variant...
1312
  	__pipe_unlock(pipe);
35f3d14db   Jens Axboe   pipe: add support...
1313
1314
  	return ret;
  }
ff0c7d15f   Nick Piggin   fs: avoid inode R...
1315
1316
  static const struct super_operations pipefs_ops = {
  	.destroy_inode = free_inode_nonrcu,
d70ef97ba   Pavel Emelyanov   fs/pipe.c: add ->...
1317
  	.statfs = simple_statfs,
ff0c7d15f   Nick Piggin   fs: avoid inode R...
1318
  };
35f3d14db   Jens Axboe   pipe: add support...
1319
  /*
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1320
1321
1322
1323
1324
   * pipefs should _never_ be mounted by userland - too much of security hassle,
   * no real gain from having the whole whorehouse mounted. So we don't need
   * any operations on the root directory. However, we need a non-trivial
   * d_name - pipe: will go nicely and kill the special-casing in procfs.
   */
4fa7ec5db   David Howells   vfs: Convert pipe...
1325
1326
  
  static int pipefs_init_fs_context(struct fs_context *fc)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1327
  {
4fa7ec5db   David Howells   vfs: Convert pipe...
1328
1329
1330
1331
1332
1333
  	struct pseudo_fs_context *ctx = init_pseudo(fc, PIPEFS_MAGIC);
  	if (!ctx)
  		return -ENOMEM;
  	ctx->ops = &pipefs_ops;
  	ctx->dops = &pipefs_dentry_operations;
  	return 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1334
1335
1336
1337
  }
  
  static struct file_system_type pipe_fs_type = {
  	.name		= "pipefs",
4fa7ec5db   David Howells   vfs: Convert pipe...
1338
  	.init_fs_context = pipefs_init_fs_context,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1339
1340
1341
1342
1343
1344
  	.kill_sb	= kill_anon_super,
  };
  
  static int __init init_pipe_fs(void)
  {
  	int err = register_filesystem(&pipe_fs_type);
341b446bc   Ingo Molnar   [PATCH] another r...
1345

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1346
1347
1348
1349
1350
1351
1352
1353
1354
  	if (!err) {
  		pipe_mnt = kern_mount(&pipe_fs_type);
  		if (IS_ERR(pipe_mnt)) {
  			err = PTR_ERR(pipe_mnt);
  			unregister_filesystem(&pipe_fs_type);
  		}
  	}
  	return err;
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1355
  fs_initcall(init_pipe_fs);