Blame view
fs/pipe.c
33 KB
b24413180
|
1 |
// SPDX-License-Identifier: GPL-2.0 |
1da177e4c
|
2 3 4 5 6 7 8 9 10 11 12 13 14 |
/* * linux/fs/pipe.c * * Copyright (C) 1991, 1992, 1999 Linus Torvalds */ #include <linux/mm.h> #include <linux/file.h> #include <linux/poll.h> #include <linux/slab.h> #include <linux/module.h> #include <linux/init.h> #include <linux/fs.h> |
35f3d14db
|
15 |
#include <linux/log2.h> |
1da177e4c
|
16 |
#include <linux/mount.h> |
4fa7ec5db
|
17 |
#include <linux/pseudo_fs.h> |
b502bd115
|
18 |
#include <linux/magic.h> |
1da177e4c
|
19 20 21 |
#include <linux/pipe_fs_i.h> #include <linux/uio.h> #include <linux/highmem.h> |
5274f052e
|
22 |
#include <linux/pagemap.h> |
db3495099
|
23 |
#include <linux/audit.h> |
ba719baea
|
24 |
#include <linux/syscalls.h> |
b492e95be
|
25 |
#include <linux/fcntl.h> |
d86133bd3
|
26 |
#include <linux/memcontrol.h> |
c73be61ce
|
27 |
#include <linux/watch_queue.h> |
1da177e4c
|
28 |
|
7c0f6ba68
|
29 |
#include <linux/uaccess.h> |
1da177e4c
|
30 |
#include <asm/ioctls.h> |
599a0ac14
|
31 |
#include "internal.h" |
1da177e4c
|
32 |
/* |
b492e95be
|
33 |
* The max size that a non-root user is allowed to grow the pipe. Can |
ff9da691c
|
34 |
* be set by root in /proc/sys/fs/pipe-max-size |
b492e95be
|
35 |
*/ |
ff9da691c
|
36 |
unsigned int pipe_max_size = 1048576; |
759c01142
|
37 38 39 40 41 |
/* Maximum allocatable pages per user. Hard limit is unset by default, soft * matches default values. */ unsigned long pipe_user_pages_hard; unsigned long pipe_user_pages_soft = PIPE_DEF_BUFFERS * INR_OPEN_CUR; |
b492e95be
|
42 |
/* |
8cefc107c
|
43 44 45 46 47 48 |
* We use head and tail indices that aren't masked off, except at the point of * dereference, but rather they're allowed to wrap naturally. This means there * isn't a dead spot in the buffer, but the ring has to be a power of two and * <= 2^31. * -- David Howells 2019-09-23. * |
1da177e4c
|
49 50 51 52 53 54 55 56 57 |
* Reads with count = 0 should always return 0. * -- Julian Bradfield 1999-06-07. * * FIFOs and Pipes now generate SIGIO for both readers and writers. * -- Jeremy Elson <jelson@circlemud.org> 2001-08-16 * * pipe_read & write cleanup * -- Manfred Spraul <manfred@colorfullife.com> 2002-05-09 */ |
61e0d47c3
|
58 59 |
static void pipe_lock_nested(struct pipe_inode_info *pipe, int subclass) { |
6447a3cf1
|
60 |
if (pipe->files) |
72b0d9aac
|
61 |
mutex_lock_nested(&pipe->mutex, subclass); |
61e0d47c3
|
62 63 64 65 66 67 68 69 70 71 72 73 74 |
} void pipe_lock(struct pipe_inode_info *pipe) { /* * pipe_lock() nests non-pipe inode locks (for writing to a file) */ pipe_lock_nested(pipe, I_MUTEX_PARENT); } EXPORT_SYMBOL(pipe_lock); void pipe_unlock(struct pipe_inode_info *pipe) { |
6447a3cf1
|
75 |
if (pipe->files) |
72b0d9aac
|
76 |
mutex_unlock(&pipe->mutex); |
61e0d47c3
|
77 78 |
} EXPORT_SYMBOL(pipe_unlock); |
ebec73f47
|
79 80 81 82 83 84 85 86 87 |
static inline void __pipe_lock(struct pipe_inode_info *pipe) { mutex_lock_nested(&pipe->mutex, I_MUTEX_PARENT); } static inline void __pipe_unlock(struct pipe_inode_info *pipe) { mutex_unlock(&pipe->mutex); } |
61e0d47c3
|
88 89 90 91 92 93 94 95 96 |
void pipe_double_lock(struct pipe_inode_info *pipe1, struct pipe_inode_info *pipe2) { BUG_ON(pipe1 == pipe2); if (pipe1 < pipe2) { pipe_lock_nested(pipe1, I_MUTEX_PARENT); pipe_lock_nested(pipe2, I_MUTEX_CHILD); } else { |
023d43c7b
|
97 98 |
pipe_lock_nested(pipe2, I_MUTEX_PARENT); pipe_lock_nested(pipe1, I_MUTEX_CHILD); |
61e0d47c3
|
99 100 |
} } |
1da177e4c
|
101 |
/* Drop the inode semaphore and wait for a pipe event, atomically */ |
3a326a2ce
|
102 |
void pipe_wait(struct pipe_inode_info *pipe) |
1da177e4c
|
103 |
{ |
0ddad21d3
|
104 105 |
DEFINE_WAIT(rdwait); DEFINE_WAIT(wrwait); |
1da177e4c
|
106 |
|
d79fc0fc6
|
107 108 109 110 |
/* * Pipes are system-local resources, so sleeping on them * is considered a noninteractive wait: */ |
0ddad21d3
|
111 112 |
prepare_to_wait(&pipe->rd_wait, &rdwait, TASK_INTERRUPTIBLE); prepare_to_wait(&pipe->wr_wait, &wrwait, TASK_INTERRUPTIBLE); |
61e0d47c3
|
113 |
pipe_unlock(pipe); |
1da177e4c
|
114 |
schedule(); |
0ddad21d3
|
115 116 |
finish_wait(&pipe->rd_wait, &rdwait); finish_wait(&pipe->wr_wait, &wrwait); |
61e0d47c3
|
117 |
pipe_lock(pipe); |
1da177e4c
|
118 |
} |
341b446bc
|
119 120 |
static void anon_pipe_buf_release(struct pipe_inode_info *pipe, struct pipe_buffer *buf) |
1da177e4c
|
121 122 |
{ struct page *page = buf->page; |
5274f052e
|
123 124 125 |
/* * If nobody else uses this page, and we don't already have a * temporary page, let's keep track of it as a one-deep |
341b446bc
|
126 |
* allocation cache. (Otherwise just release our reference to it) |
5274f052e
|
127 |
*/ |
341b446bc
|
128 |
if (page_count(page) == 1 && !pipe->tmp_page) |
923f4f239
|
129 |
pipe->tmp_page = page; |
341b446bc
|
130 |
else |
09cbfeaf1
|
131 |
put_page(page); |
1da177e4c
|
132 |
} |
c928f642c
|
133 134 |
static bool anon_pipe_buf_try_steal(struct pipe_inode_info *pipe, struct pipe_buffer *buf) |
d86133bd3
|
135 136 |
{ struct page *page = buf->page; |
c928f642c
|
137 138 139 140 141 |
if (page_count(page) != 1) return false; memcg_kmem_uncharge_page(page, 0); __SetPageLocked(page); return true; |
d86133bd3
|
142 |
} |
0845718da
|
143 |
/** |
c928f642c
|
144 |
* generic_pipe_buf_try_steal - attempt to take ownership of a &pipe_buffer |
0845718da
|
145 146 147 148 |
* @pipe: the pipe that the buffer belongs to * @buf: the buffer to attempt to steal * * Description: |
b51d63c6d
|
149 |
* This function attempts to steal the &struct page attached to |
0845718da
|
150 151 |
* @buf. If successful, this function returns 0 and returns with * the page locked. The caller may then reuse the page for whatever |
b51d63c6d
|
152 |
* he wishes; the typical use is insertion into a different file |
0845718da
|
153 154 |
* page cache. */ |
c928f642c
|
155 156 |
bool generic_pipe_buf_try_steal(struct pipe_inode_info *pipe, struct pipe_buffer *buf) |
5abc97aa2
|
157 |
{ |
46e678c96
|
158 |
struct page *page = buf->page; |
0845718da
|
159 160 161 162 163 |
/* * A reference of one is golden, that means that the owner of this * page is the only one holding a reference to it. lock the page * and return OK. */ |
46e678c96
|
164 |
if (page_count(page) == 1) { |
46e678c96
|
165 |
lock_page(page); |
c928f642c
|
166 |
return true; |
46e678c96
|
167 |
} |
c928f642c
|
168 |
return false; |
5abc97aa2
|
169 |
} |
c928f642c
|
170 |
EXPORT_SYMBOL(generic_pipe_buf_try_steal); |
5abc97aa2
|
171 |
|
0845718da
|
172 |
/** |
b51d63c6d
|
173 |
* generic_pipe_buf_get - get a reference to a &struct pipe_buffer |
0845718da
|
174 175 176 177 178 179 180 181 |
* @pipe: the pipe that the buffer belongs to * @buf: the buffer to get a reference to * * Description: * This function grabs an extra reference to @buf. It's used in * in the tee() system call, when we duplicate the buffers in one * pipe into another. */ |
15fab63e1
|
182 |
bool generic_pipe_buf_get(struct pipe_inode_info *pipe, struct pipe_buffer *buf) |
70524490e
|
183 |
{ |
15fab63e1
|
184 |
return try_get_page(buf->page); |
70524490e
|
185 |
} |
51921cb74
|
186 |
EXPORT_SYMBOL(generic_pipe_buf_get); |
70524490e
|
187 |
|
0845718da
|
188 |
/** |
6818173bd
|
189 190 191 192 193 194 195 196 197 198 |
* generic_pipe_buf_release - put a reference to a &struct pipe_buffer * @pipe: the pipe that the buffer belongs to * @buf: the buffer to put a reference to * * Description: * This function releases a reference to @buf. */ void generic_pipe_buf_release(struct pipe_inode_info *pipe, struct pipe_buffer *buf) { |
09cbfeaf1
|
199 |
put_page(buf->page); |
6818173bd
|
200 |
} |
51921cb74
|
201 |
EXPORT_SYMBOL(generic_pipe_buf_release); |
6818173bd
|
202 |
|
d4c3cca94
|
203 |
static const struct pipe_buf_operations anon_pipe_buf_ops = { |
c928f642c
|
204 205 206 |
.release = anon_pipe_buf_release, .try_steal = anon_pipe_buf_try_steal, .get = generic_pipe_buf_get, |
1da177e4c
|
207 |
}; |
85190d15f
|
208 209 210 211 212 213 214 215 216 |
/* Done while waiting without holding the pipe lock - thus the READ_ONCE() */ static inline bool pipe_readable(const struct pipe_inode_info *pipe) { unsigned int head = READ_ONCE(pipe->head); unsigned int tail = READ_ONCE(pipe->tail); unsigned int writers = READ_ONCE(pipe->writers); return !pipe_empty(head, tail) || !writers; } |
1da177e4c
|
217 |
static ssize_t |
fb9096a34
|
218 |
pipe_read(struct kiocb *iocb, struct iov_iter *to) |
1da177e4c
|
219 |
{ |
fb9096a34
|
220 |
size_t total_len = iov_iter_count(to); |
ee0b3e671
|
221 |
struct file *filp = iocb->ki_filp; |
de32ec4cf
|
222 |
struct pipe_inode_info *pipe = filp->private_data; |
0ddad21d3
|
223 |
bool was_full, wake_next_reader = false; |
1da177e4c
|
224 |
ssize_t ret; |
1da177e4c
|
225 |
|
1da177e4c
|
226 227 228 |
/* Null read succeeds. */ if (unlikely(total_len == 0)) return 0; |
1da177e4c
|
229 |
ret = 0; |
ebec73f47
|
230 |
__pipe_lock(pipe); |
f467a6a66
|
231 232 233 234 235 236 237 238 239 240 |
/* * We only wake up writers if the pipe was full when we started * reading in order to avoid unnecessary wakeups. * * But when we do wake up writers, we do so using a sync wakeup * (WF_SYNC), because we want them to get going and generate more * data for us. */ was_full = pipe_full(pipe->head, pipe->tail, pipe->max_usage); |
1da177e4c
|
241 |
for (;;) { |
8cefc107c
|
242 243 244 |
unsigned int head = pipe->head; unsigned int tail = pipe->tail; unsigned int mask = pipe->ring_size - 1; |
e7d553d69
|
245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 |
#ifdef CONFIG_WATCH_QUEUE if (pipe->note_loss) { struct watch_notification n; if (total_len < 8) { if (ret == 0) ret = -ENOBUFS; break; } n.type = WATCH_TYPE_META; n.subtype = WATCH_META_LOSS_NOTIFICATION; n.info = watch_sizeof(n); if (copy_to_iter(&n, sizeof(n), to) != sizeof(n)) { if (ret == 0) ret = -EFAULT; break; } ret += sizeof(n); total_len -= sizeof(n); pipe->note_loss = false; } #endif |
8cefc107c
|
268 269 |
if (!pipe_empty(head, tail)) { struct pipe_buffer *buf = &pipe->bufs[tail & mask]; |
1da177e4c
|
270 |
size_t chars = buf->len; |
637b58c28
|
271 272 |
size_t written; int error; |
1da177e4c
|
273 |
|
8cfba7638
|
274 275 276 277 278 279 |
if (chars > total_len) { if (buf->flags & PIPE_BUF_FLAG_WHOLE) { if (ret == 0) ret = -ENOBUFS; break; } |
1da177e4c
|
280 |
chars = total_len; |
8cfba7638
|
281 |
} |
1da177e4c
|
282 |
|
fba597db4
|
283 |
error = pipe_buf_confirm(pipe, buf); |
f84d75199
|
284 |
if (error) { |
5274f052e
|
285 |
if (!ret) |
e5953cbdf
|
286 |
ret = error; |
5274f052e
|
287 288 |
break; } |
f84d75199
|
289 |
|
fb9096a34
|
290 |
written = copy_page_to_iter(buf->page, buf->offset, chars, to); |
637b58c28
|
291 |
if (unlikely(written < chars)) { |
341b446bc
|
292 |
if (!ret) |
637b58c28
|
293 |
ret = -EFAULT; |
1da177e4c
|
294 295 296 297 298 |
break; } ret += chars; buf->offset += chars; buf->len -= chars; |
9883035ae
|
299 300 301 302 303 304 |
/* Was it a packet buffer? Clean up and exit */ if (buf->flags & PIPE_BUF_FLAG_PACKET) { total_len = chars; buf->len = 0; } |
1da177e4c
|
305 |
if (!buf->len) { |
a779638cf
|
306 |
pipe_buf_release(pipe, buf); |
0ddad21d3
|
307 |
spin_lock_irq(&pipe->rd_wait.lock); |
e7d553d69
|
308 309 310 311 |
#ifdef CONFIG_WATCH_QUEUE if (buf->flags & PIPE_BUF_FLAG_LOSS) pipe->note_loss = true; #endif |
8cefc107c
|
312 313 |
tail++; pipe->tail = tail; |
0ddad21d3
|
314 |
spin_unlock_irq(&pipe->rd_wait.lock); |
1da177e4c
|
315 316 317 318 |
} total_len -= chars; if (!total_len) break; /* common path: read succeeded */ |
8cefc107c
|
319 320 |
if (!pipe_empty(head, tail)) /* More to do? */ continue; |
1da177e4c
|
321 |
} |
8cefc107c
|
322 |
|
923f4f239
|
323 |
if (!pipe->writers) |
1da177e4c
|
324 |
break; |
a28c8b9db
|
325 326 327 328 329 |
if (ret) break; if (filp->f_flags & O_NONBLOCK) { ret = -EAGAIN; break; |
1da177e4c
|
330 |
} |
85190d15f
|
331 |
__pipe_unlock(pipe); |
d1c6a2aa0
|
332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 |
/* * We only get here if we didn't actually read anything. * * However, we could have seen (and removed) a zero-sized * pipe buffer, and might have made space in the buffers * that way. * * You can't make zero-sized pipe buffers by doing an empty * write (not even in packet mode), but they can happen if * the writer gets an EFAULT when trying to fill a buffer * that already got allocated and inserted in the buffer * array. * * So we still need to wake up any pending writers in the * _very_ unlikely case that the pipe was full, but we got * no data. */ if (unlikely(was_full)) { |
0ddad21d3
|
351 |
wake_up_interruptible_sync_poll(&pipe->wr_wait, EPOLLOUT | EPOLLWRNORM); |
f467a6a66
|
352 353 |
kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT); } |
d1c6a2aa0
|
354 355 356 357 358 359 360 |
/* * But because we didn't read anything, at this point we can * just return directly with -ERESTARTSYS if we're interrupted, * since we've done any required wakeups and there's no need * to mark anything accessed. And we've dropped the lock. */ |
0ddad21d3
|
361 |
if (wait_event_interruptible_exclusive(pipe->rd_wait, pipe_readable(pipe)) < 0) |
d1c6a2aa0
|
362 |
return -ERESTARTSYS; |
85190d15f
|
363 |
__pipe_lock(pipe); |
f467a6a66
|
364 |
was_full = pipe_full(pipe->head, pipe->tail, pipe->max_usage); |
0ddad21d3
|
365 |
wake_next_reader = true; |
1da177e4c
|
366 |
} |
0ddad21d3
|
367 368 |
if (pipe_empty(pipe->head, pipe->tail)) wake_next_reader = false; |
ebec73f47
|
369 |
__pipe_unlock(pipe); |
341b446bc
|
370 |
|
f467a6a66
|
371 |
if (was_full) { |
0ddad21d3
|
372 |
wake_up_interruptible_sync_poll(&pipe->wr_wait, EPOLLOUT | EPOLLWRNORM); |
923f4f239
|
373 |
kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT); |
1da177e4c
|
374 |
} |
0ddad21d3
|
375 376 |
if (wake_next_reader) wake_up_interruptible_sync_poll(&pipe->rd_wait, EPOLLIN | EPOLLRDNORM); |
1da177e4c
|
377 378 379 380 |
if (ret > 0) file_accessed(filp); return ret; } |
9883035ae
|
381 382 383 384 |
static inline int is_packetized(struct file *file) { return (file->f_flags & O_DIRECT) != 0; } |
85190d15f
|
385 386 387 388 389 390 391 392 393 394 |
/* Done while waiting without holding the pipe lock - thus the READ_ONCE() */ static inline bool pipe_writable(const struct pipe_inode_info *pipe) { unsigned int head = READ_ONCE(pipe->head); unsigned int tail = READ_ONCE(pipe->tail); unsigned int max_usage = READ_ONCE(pipe->max_usage); return !pipe_full(head, tail, max_usage) || !READ_ONCE(pipe->readers); } |
1da177e4c
|
395 |
static ssize_t |
f0d1bec9d
|
396 |
pipe_write(struct kiocb *iocb, struct iov_iter *from) |
1da177e4c
|
397 |
{ |
ee0b3e671
|
398 |
struct file *filp = iocb->ki_filp; |
de32ec4cf
|
399 |
struct pipe_inode_info *pipe = filp->private_data; |
8f868d68d
|
400 |
unsigned int head; |
f0d1bec9d
|
401 |
ssize_t ret = 0; |
f0d1bec9d
|
402 |
size_t total_len = iov_iter_count(from); |
1da177e4c
|
403 |
ssize_t chars; |
1b6b26ae7
|
404 |
bool was_empty = false; |
0ddad21d3
|
405 |
bool wake_next_writer = false; |
1da177e4c
|
406 |
|
1da177e4c
|
407 408 409 |
/* Null write succeeds. */ if (unlikely(total_len == 0)) return 0; |
ebec73f47
|
410 |
__pipe_lock(pipe); |
1da177e4c
|
411 |
|
923f4f239
|
412 |
if (!pipe->readers) { |
1da177e4c
|
413 414 415 416 |
send_sig(SIGPIPE, current, 0); ret = -EPIPE; goto out; } |
c73be61ce
|
417 418 419 420 421 422 |
#ifdef CONFIG_WATCH_QUEUE if (pipe->watch_queue) { ret = -EXDEV; goto out; } #endif |
1b6b26ae7
|
423 424 425 426 427 428 429 430 431 432 433 |
/* * Only wake up if the pipe started out empty, since * otherwise there should be no readers waiting. * * If it wasn't empty we try to merge new data into * the last buffer. * * That naturally merges small writes, but it also * page-aligs the rest of the writes for large writes * spanning multiple pages. */ |
8cefc107c
|
434 |
head = pipe->head; |
1b6b26ae7
|
435 436 437 |
was_empty = pipe_empty(head, pipe->tail); chars = total_len & (PAGE_SIZE-1); if (chars && !was_empty) { |
8f868d68d
|
438 |
unsigned int mask = pipe->ring_size - 1; |
8cefc107c
|
439 |
struct pipe_buffer *buf = &pipe->bufs[(head - 1) & mask]; |
1da177e4c
|
440 |
int offset = buf->offset + buf->len; |
341b446bc
|
441 |
|
f6dd97558
|
442 443 |
if ((buf->flags & PIPE_BUF_FLAG_CAN_MERGE) && offset + chars <= PAGE_SIZE) { |
fba597db4
|
444 |
ret = pipe_buf_confirm(pipe, buf); |
6ae080699
|
445 |
if (ret) |
5274f052e
|
446 |
goto out; |
f84d75199
|
447 |
|
f0d1bec9d
|
448 449 |
ret = copy_page_from_iter(buf->page, offset, chars, from); if (unlikely(ret < chars)) { |
6ae080699
|
450 |
ret = -EFAULT; |
1da177e4c
|
451 |
goto out; |
f6762b7ad
|
452 |
} |
1b6b26ae7
|
453 |
|
6ae080699
|
454 |
buf->len += ret; |
f0d1bec9d
|
455 |
if (!iov_iter_count(from)) |
1da177e4c
|
456 457 458 459 460 |
goto out; } } for (;;) { |
923f4f239
|
461 |
if (!pipe->readers) { |
1da177e4c
|
462 |
send_sig(SIGPIPE, current, 0); |
341b446bc
|
463 464 |
if (!ret) ret = -EPIPE; |
1da177e4c
|
465 466 |
break; } |
8cefc107c
|
467 |
|
a194dfe6e
|
468 |
head = pipe->head; |
8f868d68d
|
469 470 |
if (!pipe_full(head, pipe->tail, pipe->max_usage)) { unsigned int mask = pipe->ring_size - 1; |
8cefc107c
|
471 |
struct pipe_buffer *buf = &pipe->bufs[head & mask]; |
923f4f239
|
472 |
struct page *page = pipe->tmp_page; |
f0d1bec9d
|
473 |
int copied; |
1da177e4c
|
474 475 |
if (!page) { |
d86133bd3
|
476 |
page = alloc_page(GFP_HIGHUSER | __GFP_ACCOUNT); |
1da177e4c
|
477 478 479 480 |
if (unlikely(!page)) { ret = ret ? : -ENOMEM; break; } |
923f4f239
|
481 |
pipe->tmp_page = page; |
1da177e4c
|
482 |
} |
a194dfe6e
|
483 484 485 486 487 488 |
/* Allocate a slot in the ring in advance and attach an * empty buffer. If we fault or otherwise fail to use * it, either the reader will consume it or it'll still * be there for the next write. */ |
0ddad21d3
|
489 |
spin_lock_irq(&pipe->rd_wait.lock); |
a194dfe6e
|
490 491 |
head = pipe->head; |
8f868d68d
|
492 |
if (pipe_full(head, pipe->tail, pipe->max_usage)) { |
0ddad21d3
|
493 |
spin_unlock_irq(&pipe->rd_wait.lock); |
8df441294
|
494 495 |
continue; } |
a194dfe6e
|
496 |
pipe->head = head + 1; |
0ddad21d3
|
497 |
spin_unlock_irq(&pipe->rd_wait.lock); |
1da177e4c
|
498 499 |
/* Insert it into the buffer array */ |
a194dfe6e
|
500 |
buf = &pipe->bufs[head & mask]; |
1da177e4c
|
501 502 503 |
buf->page = page; buf->ops = &anon_pipe_buf_ops; buf->offset = 0; |
a194dfe6e
|
504 |
buf->len = 0; |
f6dd97558
|
505 |
if (is_packetized(filp)) |
9883035ae
|
506 |
buf->flags = PIPE_BUF_FLAG_PACKET; |
f6dd97558
|
507 508 |
else buf->flags = PIPE_BUF_FLAG_CAN_MERGE; |
923f4f239
|
509 |
pipe->tmp_page = NULL; |
1da177e4c
|
510 |
|
a194dfe6e
|
511 512 513 514 515 516 517 518 519 |
copied = copy_page_from_iter(page, 0, PAGE_SIZE, from); if (unlikely(copied < PAGE_SIZE && iov_iter_count(from))) { if (!ret) ret = -EFAULT; break; } ret += copied; buf->offset = 0; buf->len = copied; |
f0d1bec9d
|
520 |
if (!iov_iter_count(from)) |
1da177e4c
|
521 522 |
break; } |
8cefc107c
|
523 |
|
8f868d68d
|
524 |
if (!pipe_full(head, pipe->tail, pipe->max_usage)) |
1da177e4c
|
525 |
continue; |
8cefc107c
|
526 527 |
/* Wait for buffer space to become available. */ |
1da177e4c
|
528 |
if (filp->f_flags & O_NONBLOCK) { |
341b446bc
|
529 530 |
if (!ret) ret = -EAGAIN; |
1da177e4c
|
531 532 533 |
break; } if (signal_pending(current)) { |
341b446bc
|
534 535 |
if (!ret) ret = -ERESTARTSYS; |
1da177e4c
|
536 537 |
break; } |
1b6b26ae7
|
538 539 540 541 542 543 544 |
/* * We're going to release the pipe lock and wait for more * space. We wake up any readers if necessary, and then * after waiting we need to re-check whether the pipe * become empty while we dropped the lock. */ |
85190d15f
|
545 |
__pipe_unlock(pipe); |
1b6b26ae7
|
546 |
if (was_empty) { |
0ddad21d3
|
547 |
wake_up_interruptible_sync_poll(&pipe->rd_wait, EPOLLIN | EPOLLRDNORM); |
1b6b26ae7
|
548 549 |
kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); } |
0ddad21d3
|
550 |
wait_event_interruptible_exclusive(pipe->wr_wait, pipe_writable(pipe)); |
85190d15f
|
551 |
__pipe_lock(pipe); |
0dd1e3773
|
552 |
was_empty = pipe_empty(pipe->head, pipe->tail); |
0ddad21d3
|
553 |
wake_next_writer = true; |
1da177e4c
|
554 555 |
} out: |
0ddad21d3
|
556 557 |
if (pipe_full(pipe->head, pipe->tail, pipe->max_usage)) wake_next_writer = false; |
ebec73f47
|
558 |
__pipe_unlock(pipe); |
1b6b26ae7
|
559 560 561 562 563 564 565 566 567 568 569 |
/* * If we do do a wakeup event, we do a 'sync' wakeup, because we * want the reader to start processing things asap, rather than * leave the data pending. * * This is particularly important for small writes, because of * how (for example) the GNU make jobserver uses small writes to * wake up pending jobs */ if (was_empty) { |
0ddad21d3
|
570 |
wake_up_interruptible_sync_poll(&pipe->rd_wait, EPOLLIN | EPOLLRDNORM); |
923f4f239
|
571 |
kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); |
1da177e4c
|
572 |
} |
0ddad21d3
|
573 574 |
if (wake_next_writer) wake_up_interruptible_sync_poll(&pipe->wr_wait, EPOLLOUT | EPOLLWRNORM); |
7e775f46a
|
575 |
if (ret > 0 && sb_start_write_trylock(file_inode(filp)->i_sb)) { |
c3b2da314
|
576 577 578 |
int err = file_update_time(filp); if (err) ret = err; |
7e775f46a
|
579 |
sb_end_write(file_inode(filp)->i_sb); |
c3b2da314
|
580 |
} |
1da177e4c
|
581 582 |
return ret; } |
d59d0b1b8
|
583 |
static long pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) |
1da177e4c
|
584 |
{ |
de32ec4cf
|
585 |
struct pipe_inode_info *pipe = filp->private_data; |
8cefc107c
|
586 |
int count, head, tail, mask; |
1da177e4c
|
587 588 |
switch (cmd) { |
c73be61ce
|
589 590 591 592 593 594 |
case FIONREAD: __pipe_lock(pipe); count = 0; head = pipe->head; tail = pipe->tail; mask = pipe->ring_size - 1; |
8cefc107c
|
595 |
|
c73be61ce
|
596 597 598 599 600 |
while (tail != head) { count += pipe->bufs[tail & mask].len; tail++; } __pipe_unlock(pipe); |
923f4f239
|
601 |
|
c73be61ce
|
602 |
return put_user(count, (int __user *)arg); |
923f4f239
|
603 |
|
c73be61ce
|
604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 |
#ifdef CONFIG_WATCH_QUEUE case IOC_WATCH_QUEUE_SET_SIZE: { int ret; __pipe_lock(pipe); ret = watch_queue_set_size(pipe, arg); __pipe_unlock(pipe); return ret; } case IOC_WATCH_QUEUE_SET_FILTER: return watch_queue_set_filter( pipe, (struct watch_notification_filter __user *)arg); #endif default: return -ENOIOCTLCMD; |
1da177e4c
|
620 621 |
} } |
dd67081b3
|
622 |
/* No kernel lock held - fine */ |
a11e1d432
|
623 624 |
static __poll_t pipe_poll(struct file *filp, poll_table *wait) |
dd67081b3
|
625 |
{ |
a11e1d432
|
626 |
__poll_t mask; |
dd67081b3
|
627 |
struct pipe_inode_info *pipe = filp->private_data; |
ad910e36d
|
628 |
unsigned int head, tail; |
a11e1d432
|
629 |
|
ad910e36d
|
630 |
/* |
0ddad21d3
|
631 |
* Reading pipe state only -- no need for acquiring the semaphore. |
ad910e36d
|
632 633 634 635 |
* * But because this is racy, the code has to add the * entry to the poll table _first_ .. */ |
0ddad21d3
|
636 637 638 639 |
if (filp->f_mode & FMODE_READ) poll_wait(filp, &pipe->rd_wait, wait); if (filp->f_mode & FMODE_WRITE) poll_wait(filp, &pipe->wr_wait, wait); |
1da177e4c
|
640 |
|
ad910e36d
|
641 642 643 644 645 646 647 |
/* * .. and only then can you do the racy tests. That way, * if something changes and you got it wrong, the poll * table entry will wake you up and fix it. */ head = READ_ONCE(pipe->head); tail = READ_ONCE(pipe->tail); |
a11e1d432
|
648 |
mask = 0; |
1da177e4c
|
649 |
if (filp->f_mode & FMODE_READ) { |
8cefc107c
|
650 651 |
if (!pipe_empty(head, tail)) mask |= EPOLLIN | EPOLLRDNORM; |
923f4f239
|
652 |
if (!pipe->writers && filp->f_version != pipe->w_counter) |
a9a08845e
|
653 |
mask |= EPOLLHUP; |
1da177e4c
|
654 655 656 |
} if (filp->f_mode & FMODE_WRITE) { |
6718b6f85
|
657 |
if (!pipe_full(head, tail, pipe->max_usage)) |
8cefc107c
|
658 |
mask |= EPOLLOUT | EPOLLWRNORM; |
5e5d7a222
|
659 |
/* |
a9a08845e
|
660 |
* Most Unices do not set EPOLLERR for FIFOs but on Linux they |
5e5d7a222
|
661 662 |
* behave exactly like pipes for poll(). */ |
923f4f239
|
663 |
if (!pipe->readers) |
a9a08845e
|
664 |
mask |= EPOLLERR; |
1da177e4c
|
665 666 667 668 |
} return mask; } |
b0d8d2292
|
669 670 671 672 673 674 675 676 677 678 679 680 681 682 |
static void put_pipe_info(struct inode *inode, struct pipe_inode_info *pipe) { int kill = 0; spin_lock(&inode->i_lock); if (!--pipe->files) { inode->i_pipe = NULL; kill = 1; } spin_unlock(&inode->i_lock); if (kill) free_pipe_info(pipe); } |
1da177e4c
|
683 |
static int |
599a0ac14
|
684 |
pipe_release(struct inode *inode, struct file *file) |
1da177e4c
|
685 |
{ |
b0d8d2292
|
686 |
struct pipe_inode_info *pipe = file->private_data; |
923f4f239
|
687 |
|
ebec73f47
|
688 |
__pipe_lock(pipe); |
599a0ac14
|
689 690 691 692 |
if (file->f_mode & FMODE_READ) pipe->readers--; if (file->f_mode & FMODE_WRITE) pipe->writers--; |
341b446bc
|
693 |
|
6551d5c56
|
694 695 696 697 |
/* Was that the last reader or writer, but not the other side? */ if (!pipe->readers != !pipe->writers) { wake_up_interruptible_all(&pipe->rd_wait); wake_up_interruptible_all(&pipe->wr_wait); |
923f4f239
|
698 699 |
kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT); |
1da177e4c
|
700 |
} |
ebec73f47
|
701 |
__pipe_unlock(pipe); |
ba5bb1473
|
702 |
|
b0d8d2292
|
703 |
put_pipe_info(inode, pipe); |
1da177e4c
|
704 705 706 707 |
return 0; } static int |
599a0ac14
|
708 |
pipe_fasync(int fd, struct file *filp, int on) |
1da177e4c
|
709 |
{ |
de32ec4cf
|
710 |
struct pipe_inode_info *pipe = filp->private_data; |
599a0ac14
|
711 |
int retval = 0; |
1da177e4c
|
712 |
|
ebec73f47
|
713 |
__pipe_lock(pipe); |
599a0ac14
|
714 715 716 |
if (filp->f_mode & FMODE_READ) retval = fasync_helper(fd, filp, on, &pipe->fasync_readers); if ((filp->f_mode & FMODE_WRITE) && retval >= 0) { |
341b446bc
|
717 |
retval = fasync_helper(fd, filp, on, &pipe->fasync_writers); |
599a0ac14
|
718 719 |
if (retval < 0 && (filp->f_mode & FMODE_READ)) /* this can happen only if on == T */ |
e5bc49ba7
|
720 721 |
fasync_helper(-1, filp, 0, &pipe->fasync_readers); } |
ebec73f47
|
722 |
__pipe_unlock(pipe); |
60aa49243
|
723 |
return retval; |
1da177e4c
|
724 |
} |
c73be61ce
|
725 726 |
unsigned long account_pipe_buffers(struct user_struct *user, unsigned long old, unsigned long new) |
759c01142
|
727 |
{ |
9c87bcf0a
|
728 |
return atomic_long_add_return(new - old, &user->pipe_bufs); |
759c01142
|
729 |
} |
c73be61ce
|
730 |
bool too_many_pipe_buffers_soft(unsigned long user_bufs) |
759c01142
|
731 |
{ |
f73407618
|
732 733 734 |
unsigned long soft_limit = READ_ONCE(pipe_user_pages_soft); return soft_limit && user_bufs > soft_limit; |
759c01142
|
735 |
} |
c73be61ce
|
736 |
bool too_many_pipe_buffers_hard(unsigned long user_bufs) |
759c01142
|
737 |
{ |
f73407618
|
738 739 740 |
unsigned long hard_limit = READ_ONCE(pipe_user_pages_hard); return hard_limit && user_bufs > hard_limit; |
759c01142
|
741 |
} |
c73be61ce
|
742 |
bool pipe_is_unprivileged_user(void) |
85c2dd547
|
743 744 745 |
{ return !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN); } |
7bee130e2
|
746 |
struct pipe_inode_info *alloc_pipe_info(void) |
3a326a2ce
|
747 |
{ |
923f4f239
|
748 |
struct pipe_inode_info *pipe; |
09b4d1990
|
749 750 |
unsigned long pipe_bufs = PIPE_DEF_BUFFERS; struct user_struct *user = get_current_user(); |
9c87bcf0a
|
751 |
unsigned long user_bufs; |
f73407618
|
752 |
unsigned int max_size = READ_ONCE(pipe_max_size); |
3a326a2ce
|
753 |
|
d86133bd3
|
754 |
pipe = kzalloc(sizeof(struct pipe_inode_info), GFP_KERNEL_ACCOUNT); |
09b4d1990
|
755 756 |
if (pipe == NULL) goto out_free_uid; |
f73407618
|
757 758 |
if (pipe_bufs * PAGE_SIZE > max_size && !capable(CAP_SYS_RESOURCE)) pipe_bufs = max_size >> PAGE_SHIFT; |
086e774a5
|
759 |
|
9c87bcf0a
|
760 |
user_bufs = account_pipe_buffers(user, 0, pipe_bufs); |
a005ca0e6
|
761 |
|
c73be61ce
|
762 |
if (too_many_pipe_buffers_soft(user_bufs) && pipe_is_unprivileged_user()) { |
9c87bcf0a
|
763 |
user_bufs = account_pipe_buffers(user, pipe_bufs, 1); |
a005ca0e6
|
764 |
pipe_bufs = 1; |
09b4d1990
|
765 |
} |
759c01142
|
766 |
|
c73be61ce
|
767 |
if (too_many_pipe_buffers_hard(user_bufs) && pipe_is_unprivileged_user()) |
a005ca0e6
|
768 769 770 771 |
goto out_revert_acct; pipe->bufs = kcalloc(pipe_bufs, sizeof(struct pipe_buffer), GFP_KERNEL_ACCOUNT); |
09b4d1990
|
772 |
if (pipe->bufs) { |
0ddad21d3
|
773 774 |
init_waitqueue_head(&pipe->rd_wait); init_waitqueue_head(&pipe->wr_wait); |
09b4d1990
|
775 |
pipe->r_counter = pipe->w_counter = 1; |
6718b6f85
|
776 |
pipe->max_usage = pipe_bufs; |
8cefc107c
|
777 |
pipe->ring_size = pipe_bufs; |
c73be61ce
|
778 |
pipe->nr_accounted = pipe_bufs; |
09b4d1990
|
779 |
pipe->user = user; |
09b4d1990
|
780 781 |
mutex_init(&pipe->mutex); return pipe; |
3a326a2ce
|
782 |
} |
a005ca0e6
|
783 |
out_revert_acct: |
9c87bcf0a
|
784 |
(void) account_pipe_buffers(user, pipe_bufs, 0); |
09b4d1990
|
785 786 787 |
kfree(pipe); out_free_uid: free_uid(user); |
35f3d14db
|
788 |
return NULL; |
3a326a2ce
|
789 |
} |
4b8a8f1e4
|
790 |
void free_pipe_info(struct pipe_inode_info *pipe) |
1da177e4c
|
791 792 |
{ int i; |
1da177e4c
|
793 |
|
c73be61ce
|
794 795 796 797 798 799 800 801 |
#ifdef CONFIG_WATCH_QUEUE if (pipe->watch_queue) { watch_queue_clear(pipe->watch_queue); put_watch_queue(pipe->watch_queue); } #endif (void) account_pipe_buffers(pipe->user, pipe->nr_accounted, 0); |
759c01142
|
802 |
free_uid(pipe->user); |
8cefc107c
|
803 |
for (i = 0; i < pipe->ring_size; i++) { |
923f4f239
|
804 |
struct pipe_buffer *buf = pipe->bufs + i; |
1da177e4c
|
805 |
if (buf->ops) |
a779638cf
|
806 |
pipe_buf_release(pipe, buf); |
1da177e4c
|
807 |
} |
923f4f239
|
808 809 |
if (pipe->tmp_page) __free_page(pipe->tmp_page); |
35f3d14db
|
810 |
kfree(pipe->bufs); |
923f4f239
|
811 |
kfree(pipe); |
1da177e4c
|
812 |
} |
fa3536cc1
|
813 |
static struct vfsmount *pipe_mnt __read_mostly; |
341b446bc
|
814 |
|
c23fbb6bc
|
815 816 817 818 819 820 |
/* * pipefs_dname() is called from d_path(). */ static char *pipefs_dname(struct dentry *dentry, char *buffer, int buflen) { return dynamic_dname(dentry, buffer, buflen, "pipe:[%lu]", |
75c3cfa85
|
821 |
d_inode(dentry)->i_ino); |
c23fbb6bc
|
822 |
} |
3ba13d179
|
823 |
static const struct dentry_operations pipefs_dentry_operations = { |
c23fbb6bc
|
824 |
.d_dname = pipefs_dname, |
1da177e4c
|
825 826 827 828 |
}; static struct inode * get_pipe_inode(void) { |
a209dfc7b
|
829 |
struct inode *inode = new_inode_pseudo(pipe_mnt->mnt_sb); |
923f4f239
|
830 |
struct pipe_inode_info *pipe; |
1da177e4c
|
831 832 833 |
if (!inode) goto fail_inode; |
85fe4025c
|
834 |
inode->i_ino = get_next_ino(); |
7bee130e2
|
835 |
pipe = alloc_pipe_info(); |
923f4f239
|
836 |
if (!pipe) |
1da177e4c
|
837 |
goto fail_iput; |
3a326a2ce
|
838 |
|
ba5bb1473
|
839 840 |
inode->i_pipe = pipe; pipe->files = 2; |
923f4f239
|
841 |
pipe->readers = pipe->writers = 1; |
599a0ac14
|
842 |
inode->i_fop = &pipefifo_fops; |
1da177e4c
|
843 844 845 846 847 848 849 850 851 |
/* * Mark the inode dirty from the very beginning, * that way it will never be moved to the dirty * list because "mark_inode_dirty()" will think * that it already _is_ on the dirty list. */ inode->i_state = I_DIRTY; inode->i_mode = S_IFIFO | S_IRUSR | S_IWUSR; |
da9592ede
|
852 853 |
inode->i_uid = current_fsuid(); inode->i_gid = current_fsgid(); |
078cd8279
|
854 |
inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode); |
923f4f239
|
855 |
|
1da177e4c
|
856 857 858 859 |
return inode; fail_iput: iput(inode); |
341b446bc
|
860 |
|
1da177e4c
|
861 862 863 |
fail_inode: return NULL; } |
e4fad8e5d
|
864 |
int create_pipe_files(struct file **res, int flags) |
1da177e4c
|
865 |
{ |
e4fad8e5d
|
866 |
struct inode *inode = get_pipe_inode(); |
d6cbd281d
|
867 |
struct file *f; |
1da177e4c
|
868 |
|
1da177e4c
|
869 |
if (!inode) |
e4fad8e5d
|
870 |
return -ENFILE; |
1da177e4c
|
871 |
|
c73be61ce
|
872 873 874 875 876 877 878 879 880 881 |
if (flags & O_NOTIFICATION_PIPE) { #ifdef CONFIG_WATCH_QUEUE if (watch_queue_init(inode->i_pipe) < 0) { iput(inode); return -ENOMEM; } #else return -ENOPKG; #endif } |
152b6372c
|
882 883 884 |
f = alloc_file_pseudo(inode, pipe_mnt, "", O_WRONLY | (flags & (O_NONBLOCK | O_DIRECT)), &pipefifo_fops); |
e9bb1f9b1
|
885 |
if (IS_ERR(f)) { |
152b6372c
|
886 887 888 |
free_pipe_info(inode->i_pipe); iput(inode); return PTR_ERR(f); |
e9bb1f9b1
|
889 |
} |
341b446bc
|
890 |
|
de32ec4cf
|
891 |
f->private_data = inode->i_pipe; |
d6cbd281d
|
892 |
|
183266f26
|
893 894 |
res[0] = alloc_file_clone(f, O_RDONLY | (flags & O_NONBLOCK), &pipefifo_fops); |
e9bb1f9b1
|
895 |
if (IS_ERR(res[0])) { |
b10a4a9f7
|
896 897 898 |
put_pipe_info(inode, inode->i_pipe); fput(f); return PTR_ERR(res[0]); |
e9bb1f9b1
|
899 |
} |
de32ec4cf
|
900 |
res[0]->private_data = inode->i_pipe; |
e4fad8e5d
|
901 |
res[1] = f; |
d8e464ecc
|
902 903 |
stream_open(inode, res[0]); stream_open(inode, res[1]); |
e4fad8e5d
|
904 |
return 0; |
d6cbd281d
|
905 |
} |
5b249b1b0
|
906 |
static int __do_pipe_flags(int *fd, struct file **files, int flags) |
d6cbd281d
|
907 |
{ |
d6cbd281d
|
908 909 |
int error; int fdw, fdr; |
c73be61ce
|
910 |
if (flags & ~(O_CLOEXEC | O_NONBLOCK | O_DIRECT | O_NOTIFICATION_PIPE)) |
ed8cae8ba
|
911 |
return -EINVAL; |
e4fad8e5d
|
912 913 914 |
error = create_pipe_files(files, flags); if (error) return error; |
d6cbd281d
|
915 |
|
ed8cae8ba
|
916 |
error = get_unused_fd_flags(flags); |
d6cbd281d
|
917 918 919 |
if (error < 0) goto err_read_pipe; fdr = error; |
ed8cae8ba
|
920 |
error = get_unused_fd_flags(flags); |
d6cbd281d
|
921 922 923 |
if (error < 0) goto err_fdr; fdw = error; |
157cf649a
|
924 |
audit_fd_pair(fdr, fdw); |
d6cbd281d
|
925 926 |
fd[0] = fdr; fd[1] = fdw; |
d6cbd281d
|
927 928 929 930 931 |
return 0; err_fdr: put_unused_fd(fdr); err_read_pipe: |
e4fad8e5d
|
932 933 |
fput(files[0]); fput(files[1]); |
d6cbd281d
|
934 |
return error; |
1da177e4c
|
935 |
} |
5b249b1b0
|
936 937 938 939 940 941 942 943 944 945 |
int do_pipe_flags(int *fd, int flags) { struct file *files[2]; int error = __do_pipe_flags(fd, files, flags); if (!error) { fd_install(fd[0], files[0]); fd_install(fd[1], files[1]); } return error; } |
1da177e4c
|
946 |
/* |
d35c7b0e5
|
947 948 949 |
* sys_pipe() is the normal C calling standard for creating * a pipe. It's not the way Unix traditionally does this, though. */ |
0a216dd1c
|
950 |
static int do_pipe2(int __user *fildes, int flags) |
d35c7b0e5
|
951 |
{ |
5b249b1b0
|
952 |
struct file *files[2]; |
d35c7b0e5
|
953 954 |
int fd[2]; int error; |
5b249b1b0
|
955 |
error = __do_pipe_flags(fd, files, flags); |
d35c7b0e5
|
956 |
if (!error) { |
5b249b1b0
|
957 958 959 960 961 |
if (unlikely(copy_to_user(fildes, fd, sizeof(fd)))) { fput(files[0]); fput(files[1]); put_unused_fd(fd[0]); put_unused_fd(fd[1]); |
d35c7b0e5
|
962 |
error = -EFAULT; |
5b249b1b0
|
963 964 965 |
} else { fd_install(fd[0], files[0]); fd_install(fd[1], files[1]); |
ba719baea
|
966 |
} |
d35c7b0e5
|
967 968 969 |
} return error; } |
0a216dd1c
|
970 971 972 973 |
SYSCALL_DEFINE2(pipe2, int __user *, fildes, int, flags) { return do_pipe2(fildes, flags); } |
2b6642199
|
974 |
SYSCALL_DEFINE1(pipe, int __user *, fildes) |
ed8cae8ba
|
975 |
{ |
0a216dd1c
|
976 |
return do_pipe2(fildes, 0); |
ed8cae8ba
|
977 |
} |
fc7478a2b
|
978 |
static int wait_for_partner(struct pipe_inode_info *pipe, unsigned int *cnt) |
f776c7388
|
979 |
{ |
8cefc107c
|
980 |
int cur = *cnt; |
f776c7388
|
981 982 |
while (cur == *cnt) { |
fc7478a2b
|
983 |
pipe_wait(pipe); |
f776c7388
|
984 985 986 987 988 |
if (signal_pending(current)) break; } return cur == *cnt ? -ERESTARTSYS : 0; } |
fc7478a2b
|
989 |
static void wake_up_partner(struct pipe_inode_info *pipe) |
f776c7388
|
990 |
{ |
6551d5c56
|
991 992 |
wake_up_interruptible_all(&pipe->rd_wait); wake_up_interruptible_all(&pipe->wr_wait); |
f776c7388
|
993 994 995 996 997 |
} static int fifo_open(struct inode *inode, struct file *filp) { struct pipe_inode_info *pipe; |
599a0ac14
|
998 |
bool is_pipe = inode->i_sb->s_magic == PIPEFS_MAGIC; |
f776c7388
|
999 |
int ret; |
ba5bb1473
|
1000 1001 1002 1003 1004 1005 1006 1007 1008 |
filp->f_version = 0; spin_lock(&inode->i_lock); if (inode->i_pipe) { pipe = inode->i_pipe; pipe->files++; spin_unlock(&inode->i_lock); } else { spin_unlock(&inode->i_lock); |
7bee130e2
|
1009 |
pipe = alloc_pipe_info(); |
f776c7388
|
1010 |
if (!pipe) |
ba5bb1473
|
1011 1012 1013 1014 1015 1016 |
return -ENOMEM; pipe->files = 1; spin_lock(&inode->i_lock); if (unlikely(inode->i_pipe)) { inode->i_pipe->files++; spin_unlock(&inode->i_lock); |
4b8a8f1e4
|
1017 |
free_pipe_info(pipe); |
ba5bb1473
|
1018 1019 1020 1021 1022 |
pipe = inode->i_pipe; } else { inode->i_pipe = pipe; spin_unlock(&inode->i_lock); } |
f776c7388
|
1023 |
} |
de32ec4cf
|
1024 |
filp->private_data = pipe; |
ba5bb1473
|
1025 |
/* OK, we have a pipe and it's pinned down */ |
ebec73f47
|
1026 |
__pipe_lock(pipe); |
f776c7388
|
1027 1028 |
/* We can only do regular read/write on fifos */ |
d8e464ecc
|
1029 |
stream_open(inode, filp); |
f776c7388
|
1030 |
|
d8e464ecc
|
1031 |
switch (filp->f_mode & (FMODE_READ | FMODE_WRITE)) { |
f776c7388
|
1032 1033 1034 1035 1036 1037 |
case FMODE_READ: /* * O_RDONLY * POSIX.1 says that O_NONBLOCK means return with the FIFO * opened, even when there is no process writing the FIFO. */ |
f776c7388
|
1038 1039 |
pipe->r_counter++; if (pipe->readers++ == 0) |
fc7478a2b
|
1040 |
wake_up_partner(pipe); |
f776c7388
|
1041 |
|
599a0ac14
|
1042 |
if (!is_pipe && !pipe->writers) { |
f776c7388
|
1043 |
if ((filp->f_flags & O_NONBLOCK)) { |
a9a08845e
|
1044 |
/* suppress EPOLLHUP until we have |
f776c7388
|
1045 1046 1047 |
* seen a writer */ filp->f_version = pipe->w_counter; } else { |
fc7478a2b
|
1048 |
if (wait_for_partner(pipe, &pipe->w_counter)) |
f776c7388
|
1049 1050 1051 1052 |
goto err_rd; } } break; |
8cefc107c
|
1053 |
|
f776c7388
|
1054 1055 1056 1057 1058 1059 1060 |
case FMODE_WRITE: /* * O_WRONLY * POSIX.1 says that O_NONBLOCK means return -1 with * errno=ENXIO when there is no process reading the FIFO. */ ret = -ENXIO; |
599a0ac14
|
1061 |
if (!is_pipe && (filp->f_flags & O_NONBLOCK) && !pipe->readers) |
f776c7388
|
1062 |
goto err; |
f776c7388
|
1063 1064 |
pipe->w_counter++; if (!pipe->writers++) |
fc7478a2b
|
1065 |
wake_up_partner(pipe); |
f776c7388
|
1066 |
|
599a0ac14
|
1067 |
if (!is_pipe && !pipe->readers) { |
fc7478a2b
|
1068 |
if (wait_for_partner(pipe, &pipe->r_counter)) |
f776c7388
|
1069 1070 1071 |
goto err_wr; } break; |
8cefc107c
|
1072 |
|
f776c7388
|
1073 1074 1075 1076 1077 1078 1079 |
case FMODE_READ | FMODE_WRITE: /* * O_RDWR * POSIX.1 leaves this case "undefined" when O_NONBLOCK is set. * This implementation will NEVER block on a O_RDWR open, since * the process can at least talk to itself. */ |
f776c7388
|
1080 1081 1082 1083 1084 1085 |
pipe->readers++; pipe->writers++; pipe->r_counter++; pipe->w_counter++; if (pipe->readers == 1 || pipe->writers == 1) |
fc7478a2b
|
1086 |
wake_up_partner(pipe); |
f776c7388
|
1087 1088 1089 1090 1091 1092 1093 1094 |
break; default: ret = -EINVAL; goto err; } /* Ok! */ |
ebec73f47
|
1095 |
__pipe_unlock(pipe); |
f776c7388
|
1096 1097 1098 1099 |
return 0; err_rd: if (!--pipe->readers) |
0ddad21d3
|
1100 |
wake_up_interruptible(&pipe->wr_wait); |
f776c7388
|
1101 1102 1103 1104 1105 |
ret = -ERESTARTSYS; goto err; err_wr: if (!--pipe->writers) |
6551d5c56
|
1106 |
wake_up_interruptible_all(&pipe->rd_wait); |
f776c7388
|
1107 1108 1109 1110 |
ret = -ERESTARTSYS; goto err; err: |
ebec73f47
|
1111 |
__pipe_unlock(pipe); |
b0d8d2292
|
1112 1113 |
put_pipe_info(inode, pipe); |
f776c7388
|
1114 1115 |
return ret; } |
599a0ac14
|
1116 1117 1118 |
const struct file_operations pipefifo_fops = { .open = fifo_open, .llseek = no_llseek, |
fb9096a34
|
1119 |
.read_iter = pipe_read, |
f0d1bec9d
|
1120 |
.write_iter = pipe_write, |
a11e1d432
|
1121 |
.poll = pipe_poll, |
599a0ac14
|
1122 1123 1124 |
.unlocked_ioctl = pipe_ioctl, .release = pipe_release, .fasync = pipe_fasync, |
f776c7388
|
1125 |
}; |
d35c7b0e5
|
1126 |
/* |
f491bd711
|
1127 |
* Currently we rely on the pipe array holding a power-of-2 number |
d3f14c485
|
1128 |
* of pages. Returns 0 on error. |
f491bd711
|
1129 |
*/ |
96e99be40
|
1130 |
unsigned int round_pipe_size(unsigned long size) |
f491bd711
|
1131 |
{ |
c4fed5a91
|
1132 |
if (size > (1U << 31)) |
96e99be40
|
1133 |
return 0; |
4c2e4befb
|
1134 1135 |
/* Minimum pipe size, as required by POSIX */ if (size < PAGE_SIZE) |
c4fed5a91
|
1136 |
return PAGE_SIZE; |
d3f14c485
|
1137 |
|
c4fed5a91
|
1138 |
return roundup_pow_of_two(size); |
f491bd711
|
1139 1140 1141 |
} /* |
c73be61ce
|
1142 |
* Resize the pipe ring to a number of slots. |
35f3d14db
|
1143 |
*/ |
c73be61ce
|
1144 |
int pipe_resize_ring(struct pipe_inode_info *pipe, unsigned int nr_slots) |
35f3d14db
|
1145 1146 |
{ struct pipe_buffer *bufs; |
c73be61ce
|
1147 |
unsigned int head, tail, mask, n; |
35f3d14db
|
1148 1149 |
/* |
8cefc107c
|
1150 1151 1152 |
* We can shrink the pipe, if arg is greater than the ring occupancy. * Since we don't expect a lot of shrink+grow operations, just free and * allocate again like we would do for growing. If the pipe currently |
35f3d14db
|
1153 1154 |
* contains more buffers than arg, then return busy. */ |
8cefc107c
|
1155 1156 1157 1158 |
mask = pipe->ring_size - 1; head = pipe->head; tail = pipe->tail; n = pipe_occupancy(pipe->head, pipe->tail); |
c73be61ce
|
1159 1160 |
if (nr_slots < n) return -EBUSY; |
35f3d14db
|
1161 |
|
8cefc107c
|
1162 |
bufs = kcalloc(nr_slots, sizeof(*bufs), |
d86133bd3
|
1163 |
GFP_KERNEL_ACCOUNT | __GFP_NOWARN); |
c73be61ce
|
1164 1165 |
if (unlikely(!bufs)) return -ENOMEM; |
35f3d14db
|
1166 1167 1168 |
/* * The pipe array wraps around, so just start the new one at zero |
8cefc107c
|
1169 |
* and adjust the indices. |
35f3d14db
|
1170 |
*/ |
8cefc107c
|
1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 |
if (n > 0) { unsigned int h = head & mask; unsigned int t = tail & mask; if (h > t) { memcpy(bufs, pipe->bufs + t, n * sizeof(struct pipe_buffer)); } else { unsigned int tsize = pipe->ring_size - t; if (h > 0) memcpy(bufs + tsize, pipe->bufs, h * sizeof(struct pipe_buffer)); memcpy(bufs, pipe->bufs + t, tsize * sizeof(struct pipe_buffer)); } |
35f3d14db
|
1185 |
} |
8cefc107c
|
1186 1187 |
head = n; tail = 0; |
35f3d14db
|
1188 1189 |
kfree(pipe->bufs); pipe->bufs = bufs; |
8cefc107c
|
1190 |
pipe->ring_size = nr_slots; |
c73be61ce
|
1191 1192 |
if (pipe->max_usage > nr_slots) pipe->max_usage = nr_slots; |
8cefc107c
|
1193 1194 |
pipe->tail = tail; pipe->head = head; |
6551d5c56
|
1195 1196 1197 |
/* This might have made more room for writers */ wake_up_interruptible(&pipe->wr_wait); |
c73be61ce
|
1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 |
return 0; } /* * Allocate a new array of pipe buffers and copy the info over. Returns the * pipe size if successful, or return -ERROR on error. */ static long pipe_set_size(struct pipe_inode_info *pipe, unsigned long arg) { unsigned long user_bufs; unsigned int nr_slots, size; long ret = 0; #ifdef CONFIG_WATCH_QUEUE if (pipe->watch_queue) return -EBUSY; #endif size = round_pipe_size(arg); nr_slots = size >> PAGE_SHIFT; if (!nr_slots) return -EINVAL; /* * If trying to increase the pipe capacity, check that an * unprivileged user is not trying to exceed various limits * (soft limit check here, hard limit check just below). * Decreasing the pipe capacity is always permitted, even * if the user is currently over a limit. */ if (nr_slots > pipe->max_usage && size > pipe_max_size && !capable(CAP_SYS_RESOURCE)) return -EPERM; user_bufs = account_pipe_buffers(pipe->user, pipe->nr_accounted, nr_slots); if (nr_slots > pipe->max_usage && (too_many_pipe_buffers_hard(user_bufs) || too_many_pipe_buffers_soft(user_bufs)) && pipe_is_unprivileged_user()) { ret = -EPERM; goto out_revert_acct; } ret = pipe_resize_ring(pipe, nr_slots); if (ret < 0) goto out_revert_acct; pipe->max_usage = nr_slots; pipe->nr_accounted = nr_slots; |
6718b6f85
|
1249 |
return pipe->max_usage * PAGE_SIZE; |
b0b91d18e
|
1250 1251 |
out_revert_acct: |
c73be61ce
|
1252 |
(void) account_pipe_buffers(pipe->user, nr_slots, pipe->nr_accounted); |
b0b91d18e
|
1253 |
return ret; |
35f3d14db
|
1254 |
} |
ff9da691c
|
1255 |
/* |
720836465
|
1256 1257 1258 1259 |
* After the inode slimming patch, i_pipe/i_bdev/i_cdev share the same * location, so checking ->i_pipe is not enough to verify that this is a * pipe. */ |
c73be61ce
|
1260 |
struct pipe_inode_info *get_pipe_info(struct file *file, bool for_splice) |
720836465
|
1261 |
{ |
c73be61ce
|
1262 1263 1264 1265 1266 1267 1268 1269 1270 |
struct pipe_inode_info *pipe = file->private_data; if (file->f_op != &pipefifo_fops || !pipe) return NULL; #ifdef CONFIG_WATCH_QUEUE if (for_splice && pipe->watch_queue) return NULL; #endif return pipe; |
720836465
|
1271 |
} |
35f3d14db
|
1272 1273 1274 1275 |
long pipe_fcntl(struct file *file, unsigned int cmd, unsigned long arg) { struct pipe_inode_info *pipe; long ret; |
c73be61ce
|
1276 |
pipe = get_pipe_info(file, false); |
35f3d14db
|
1277 1278 |
if (!pipe) return -EBADF; |
ebec73f47
|
1279 |
__pipe_lock(pipe); |
35f3d14db
|
1280 1281 |
switch (cmd) { |
d37d41666
|
1282 1283 |
case F_SETPIPE_SZ: ret = pipe_set_size(pipe, arg); |
35f3d14db
|
1284 1285 |
break; case F_GETPIPE_SZ: |
6718b6f85
|
1286 |
ret = pipe->max_usage * PAGE_SIZE; |
35f3d14db
|
1287 1288 1289 1290 1291 |
break; default: ret = -EINVAL; break; } |
ebec73f47
|
1292 |
__pipe_unlock(pipe); |
35f3d14db
|
1293 1294 |
return ret; } |
ff0c7d15f
|
1295 1296 |
static const struct super_operations pipefs_ops = { .destroy_inode = free_inode_nonrcu, |
d70ef97ba
|
1297 |
.statfs = simple_statfs, |
ff0c7d15f
|
1298 |
}; |
35f3d14db
|
1299 |
/* |
1da177e4c
|
1300 1301 1302 1303 1304 |
* pipefs should _never_ be mounted by userland - too much of security hassle, * no real gain from having the whole whorehouse mounted. So we don't need * any operations on the root directory. However, we need a non-trivial * d_name - pipe: will go nicely and kill the special-casing in procfs. */ |
4fa7ec5db
|
1305 1306 |
static int pipefs_init_fs_context(struct fs_context *fc) |
1da177e4c
|
1307 |
{ |
4fa7ec5db
|
1308 1309 1310 1311 1312 1313 |
struct pseudo_fs_context *ctx = init_pseudo(fc, PIPEFS_MAGIC); if (!ctx) return -ENOMEM; ctx->ops = &pipefs_ops; ctx->dops = &pipefs_dentry_operations; return 0; |
1da177e4c
|
1314 1315 1316 1317 |
} static struct file_system_type pipe_fs_type = { .name = "pipefs", |
4fa7ec5db
|
1318 |
.init_fs_context = pipefs_init_fs_context, |
1da177e4c
|
1319 1320 1321 1322 1323 1324 |
.kill_sb = kill_anon_super, }; static int __init init_pipe_fs(void) { int err = register_filesystem(&pipe_fs_type); |
341b446bc
|
1325 |
|
1da177e4c
|
1326 1327 1328 1329 1330 1331 1332 1333 1334 |
if (!err) { pipe_mnt = kern_mount(&pipe_fs_type); if (IS_ERR(pipe_mnt)) { err = PTR_ERR(pipe_mnt); unregister_filesystem(&pipe_fs_type); } } return err; } |
1da177e4c
|
1335 |
fs_initcall(init_pipe_fs); |