Blame view
fs/select.c
34.7 KB
b24413180
|
1 |
// SPDX-License-Identifier: GPL-2.0 |
1da177e4c
|
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 |
/* * This file contains the procedures for the handling of select and poll * * Created for Linux based loosely upon Mathius Lattner's minix * patches by Peter MacDonald. Heavily edited by Linus. * * 4 February 1994 * COFF/ELF binary emulation. If the process has the STICKY_TIMEOUTS * flag set in its personality we do *not* modify the given timeout * parameter to reflect time remaining. * * 24 January 2000 * Changed sys_poll()/do_poll() to use PAGE_SIZE chunk-based allocation * of fds to overcome nfds < 16390 descriptors limit (Tigran Aivazian). */ |
022a16924
|
17 |
#include <linux/kernel.h> |
3f07c0144
|
18 19 |
#include <linux/sched/signal.h> #include <linux/sched/rt.h> |
1da177e4c
|
20 |
#include <linux/syscalls.h> |
630d9c472
|
21 |
#include <linux/export.h> |
1da177e4c
|
22 |
#include <linux/slab.h> |
1da177e4c
|
23 24 25 |
#include <linux/poll.h> #include <linux/personality.h> /* for STICKY_TIMEOUTS */ #include <linux/file.h> |
9f3acc314
|
26 |
#include <linux/fdtable.h> |
1da177e4c
|
27 |
#include <linux/fs.h> |
b835996f6
|
28 |
#include <linux/rcupdate.h> |
8ff3e8e85
|
29 |
#include <linux/hrtimer.h> |
9745cdb36
|
30 |
#include <linux/freezer.h> |
076bb0c82
|
31 |
#include <net/busy_poll.h> |
2d19309cf
|
32 |
#include <linux/vmalloc.h> |
1da177e4c
|
33 |
|
7c0f6ba68
|
34 |
#include <linux/uaccess.h> |
1da177e4c
|
35 |
|
90d6e24a3
|
36 37 38 39 40 41 42 43 44 45 46 47 |
/* * Estimate expected accuracy in ns from a timeval. * * After quite a bit of churning around, we've settled on * a simple thing of taking 0.1% of the timeout as the * slack, with a cap of 100 msec. * "nice" tasks get a 0.5% slack instead. * * Consider this comment an open invitation to come up with even * better solutions.. */ |
5ae87e79e
|
48 |
#define MAX_SLACK (100 * NSEC_PER_MSEC) |
766b9f928
|
49 |
static long __estimate_accuracy(struct timespec64 *tv) |
90d6e24a3
|
50 |
{ |
96d2ab484
|
51 |
long slack; |
90d6e24a3
|
52 |
int divfactor = 1000; |
5ae87e79e
|
53 54 |
if (tv->tv_sec < 0) return 0; |
4ce105d30
|
55 |
if (task_nice(current) > 0) |
90d6e24a3
|
56 |
divfactor = divfactor / 5; |
5ae87e79e
|
57 58 |
if (tv->tv_sec > MAX_SLACK / (NSEC_PER_SEC/divfactor)) return MAX_SLACK; |
90d6e24a3
|
59 60 |
slack = tv->tv_nsec / divfactor; slack += tv->tv_sec * (NSEC_PER_SEC/divfactor); |
5ae87e79e
|
61 62 |
if (slack > MAX_SLACK) return MAX_SLACK; |
96d2ab484
|
63 |
|
90d6e24a3
|
64 65 |
return slack; } |
766b9f928
|
66 |
u64 select_estimate_accuracy(struct timespec64 *tv) |
90d6e24a3
|
67 |
{ |
da8b44d5a
|
68 |
u64 ret; |
766b9f928
|
69 |
struct timespec64 now; |
90d6e24a3
|
70 71 72 73 |
/* * Realtime tasks get a slack of 0 for obvious reasons. */ |
4ce105d30
|
74 |
if (rt_task(current)) |
90d6e24a3
|
75 |
return 0; |
766b9f928
|
76 77 |
ktime_get_ts64(&now); now = timespec64_sub(*tv, now); |
90d6e24a3
|
78 79 80 81 82 |
ret = __estimate_accuracy(&now); if (ret < current->timer_slack_ns) return current->timer_slack_ns; return ret; } |
1da177e4c
|
83 84 85 |
struct poll_table_page { struct poll_table_page * next; struct poll_table_entry * entry; |
5e01fdff0
|
86 |
struct poll_table_entry entries[]; |
1da177e4c
|
87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 |
}; #define POLL_TABLE_FULL(table) \ ((unsigned long)((table)->entry+1) > PAGE_SIZE + (unsigned long)(table)) /* * Ok, Peter made a complicated, but straightforward multiple_wait() function. * I have rewritten this, taking some shortcuts: This code may not be easy to * follow, but it should be free of race-conditions, and it's practical. If you * understand what I'm doing here, then you understand how the linux * sleep/wakeup mechanism works. * * Two very simple procedures, poll_wait() and poll_freewait() make all the * work. poll_wait() is an inline-function defined in <linux/poll.h>, * as all select/poll functions have to call it to add an entry to the * poll table. */ |
75c96f858
|
104 105 |
static void __pollwait(struct file *filp, wait_queue_head_t *wait_address, poll_table *p); |
1da177e4c
|
106 107 108 109 |
void poll_initwait(struct poll_wqueues *pwq) { init_poll_funcptr(&pwq->pt, __pollwait); |
5f820f648
|
110 |
pwq->polling_task = current; |
b2add73db
|
111 |
pwq->triggered = 0; |
1da177e4c
|
112 113 |
pwq->error = 0; pwq->table = NULL; |
70674f95c
|
114 |
pwq->inline_index = 0; |
1da177e4c
|
115 |
} |
1da177e4c
|
116 |
EXPORT_SYMBOL(poll_initwait); |
70674f95c
|
117 118 |
static void free_poll_entry(struct poll_table_entry *entry) { |
ccf6780dc
|
119 |
remove_wait_queue(entry->wait_address, &entry->wait); |
70674f95c
|
120 121 |
fput(entry->filp); } |
1da177e4c
|
122 123 124 |
void poll_freewait(struct poll_wqueues *pwq) { struct poll_table_page * p = pwq->table; |
70674f95c
|
125 126 127 |
int i; for (i = 0; i < pwq->inline_index; i++) free_poll_entry(pwq->inline_entries + i); |
1da177e4c
|
128 129 130 131 132 133 134 |
while (p) { struct poll_table_entry * entry; struct poll_table_page *old; entry = p->entry; do { entry--; |
70674f95c
|
135 |
free_poll_entry(entry); |
1da177e4c
|
136 137 138 139 140 141 |
} while (entry > p->entries); old = p; p = p->next; free_page((unsigned long) old); } } |
1da177e4c
|
142 |
EXPORT_SYMBOL(poll_freewait); |
5f820f648
|
143 |
static struct poll_table_entry *poll_get_entry(struct poll_wqueues *p) |
1da177e4c
|
144 |
{ |
1da177e4c
|
145 |
struct poll_table_page *table = p->table; |
70674f95c
|
146 147 |
if (p->inline_index < N_INLINE_POLL_ENTRIES) return p->inline_entries + p->inline_index++; |
1da177e4c
|
148 149 150 151 152 153 |
if (!table || POLL_TABLE_FULL(table)) { struct poll_table_page *new_table; new_table = (struct poll_table_page *) __get_free_page(GFP_KERNEL); if (!new_table) { p->error = -ENOMEM; |
70674f95c
|
154 |
return NULL; |
1da177e4c
|
155 156 157 158 159 160 |
} new_table->entry = new_table->entries; new_table->next = table; p->table = new_table; table = new_table; } |
70674f95c
|
161 162 |
return table->entry++; } |
ac6424b98
|
163 |
static int __pollwake(wait_queue_entry_t *wait, unsigned mode, int sync, void *key) |
5f820f648
|
164 165 166 167 168 169 170 171 172 |
{ struct poll_wqueues *pwq = wait->private; DECLARE_WAITQUEUE(dummy_wait, pwq->polling_task); /* * Although this function is called under waitqueue lock, LOCK * doesn't imply write barrier and the users expect write * barrier semantics on wakeup functions. The following * smp_wmb() is equivalent to smp_wmb() in try_to_wake_up() |
b92b8b35a
|
173 |
* and is paired with smp_store_mb() in poll_schedule_timeout. |
5f820f648
|
174 175 176 177 178 179 180 181 182 183 184 185 186 187 |
*/ smp_wmb(); pwq->triggered = 1; /* * Perform the default wake up operation using a dummy * waitqueue. * * TODO: This is hacky but there currently is no interface to * pass in @sync. @sync is scheduled to be removed and once * that happens, wake_up_process() can be used directly. */ return default_wake_function(&dummy_wait, mode, sync, key); } |
ac6424b98
|
188 |
static int pollwake(wait_queue_entry_t *wait, unsigned mode, int sync, void *key) |
4938d7e02
|
189 190 191 192 |
{ struct poll_table_entry *entry; entry = container_of(wait, struct poll_table_entry, wait); |
3ad6f93e9
|
193 |
if (key && !(key_to_poll(key) & entry->key)) |
4938d7e02
|
194 195 196 |
return 0; return __pollwake(wait, mode, sync, key); } |
70674f95c
|
197 198 199 200 |
/* Add a new entry */ static void __pollwait(struct file *filp, wait_queue_head_t *wait_address, poll_table *p) { |
5f820f648
|
201 202 |
struct poll_wqueues *pwq = container_of(p, struct poll_wqueues, pt); struct poll_table_entry *entry = poll_get_entry(pwq); |
70674f95c
|
203 204 |
if (!entry) return; |
cb0942b81
|
205 |
entry->filp = get_file(filp); |
70674f95c
|
206 |
entry->wait_address = wait_address; |
626cf2366
|
207 |
entry->key = p->_key; |
5f820f648
|
208 209 |
init_waitqueue_func_entry(&entry->wait, pollwake); entry->wait.private = pwq; |
ccf6780dc
|
210 |
add_wait_queue(wait_address, &entry->wait); |
1da177e4c
|
211 |
} |
8f546ae1f
|
212 |
static int poll_schedule_timeout(struct poll_wqueues *pwq, int state, |
5f820f648
|
213 214 215 216 217 218 |
ktime_t *expires, unsigned long slack) { int rc = -EINTR; set_current_state(state); if (!pwq->triggered) |
59612d187
|
219 |
rc = schedule_hrtimeout_range(expires, slack, HRTIMER_MODE_ABS); |
5f820f648
|
220 221 222 223 224 |
__set_current_state(TASK_RUNNING); /* * Prepare for the next iteration. * |
b92b8b35a
|
225 |
* The following smp_store_mb() serves two purposes. First, it's |
5f820f648
|
226 227 228 229 230 231 232 |
* the counterpart rmb of the wmb in pollwake() such that data * written before wake up is always visible after wake up. * Second, the full barrier guarantees that triggered clearing * doesn't pass event check of the next iteration. Note that * this problem doesn't exist for the first iteration as * add_wait_queue() has full barrier semantics. */ |
b92b8b35a
|
233 |
smp_store_mb(pwq->triggered, 0); |
5f820f648
|
234 235 236 |
return rc; } |
5f820f648
|
237 |
|
b773ad40a
|
238 239 |
/** * poll_select_set_timeout - helper function to setup the timeout value |
766b9f928
|
240 |
* @to: pointer to timespec64 variable for the final timeout |
b773ad40a
|
241 242 243 244 245 246 247 248 |
* @sec: seconds (from user space) * @nsec: nanoseconds (from user space) * * Note, we do not use a timespec for the user space value here, That * way we can use the function for timeval and compat interfaces as well. * * Returns -EINVAL if sec/nsec are not normalized. Otherwise 0. */ |
766b9f928
|
249 |
int poll_select_set_timeout(struct timespec64 *to, time64_t sec, long nsec) |
b773ad40a
|
250 |
{ |
766b9f928
|
251 |
struct timespec64 ts = {.tv_sec = sec, .tv_nsec = nsec}; |
b773ad40a
|
252 |
|
766b9f928
|
253 |
if (!timespec64_valid(&ts)) |
b773ad40a
|
254 255 256 257 258 259 |
return -EINVAL; /* Optimize for the zero timeout value here */ if (!sec && !nsec) { to->tv_sec = to->tv_nsec = 0; } else { |
766b9f928
|
260 261 |
ktime_get_ts64(to); *to = timespec64_add_safe(*to, ts); |
b773ad40a
|
262 263 264 |
} return 0; } |
8bd27a300
|
265 266 267 268 269 270 |
enum poll_time_type { PT_TIMEVAL = 0, PT_OLD_TIMEVAL = 1, PT_TIMESPEC = 2, PT_OLD_TIMESPEC = 3, }; |
ac3010206
|
271 272 273 |
static int poll_select_finish(struct timespec64 *end_time, void __user *p, enum poll_time_type pt_type, int ret) |
b773ad40a
|
274 |
{ |
36819ad09
|
275 |
struct timespec64 rts; |
b773ad40a
|
276 |
|
ac3010206
|
277 |
restore_saved_sigmask_unless(ret == -ERESTARTNOHAND); |
b773ad40a
|
278 279 280 281 282 283 284 285 286 |
if (!p) return ret; if (current->personality & STICKY_TIMEOUTS) goto sticky; /* No update for zero timeout */ if (!end_time->tv_sec && !end_time->tv_nsec) return ret; |
36819ad09
|
287 288 289 290 |
ktime_get_ts64(&rts); rts = timespec64_sub(*end_time, rts); if (rts.tv_sec < 0) rts.tv_sec = rts.tv_nsec = 0; |
766b9f928
|
291 |
|
b773ad40a
|
292 |
|
8bd27a300
|
293 294 295 |
switch (pt_type) { case PT_TIMEVAL: { |
75d319c06
|
296 |
struct __kernel_old_timeval rtv; |
b773ad40a
|
297 |
|
8bd27a300
|
298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 |
if (sizeof(rtv) > sizeof(rtv.tv_sec) + sizeof(rtv.tv_usec)) memset(&rtv, 0, sizeof(rtv)); rtv.tv_sec = rts.tv_sec; rtv.tv_usec = rts.tv_nsec / NSEC_PER_USEC; if (!copy_to_user(p, &rtv, sizeof(rtv))) return ret; } break; case PT_OLD_TIMEVAL: { struct old_timeval32 rtv; rtv.tv_sec = rts.tv_sec; rtv.tv_usec = rts.tv_nsec / NSEC_PER_USEC; if (!copy_to_user(p, &rtv, sizeof(rtv))) return ret; } break; case PT_TIMESPEC: if (!put_timespec64(&rts, p)) |
b773ad40a
|
318 |
return ret; |
8bd27a300
|
319 320 321 322 323 324 325 326 |
break; case PT_OLD_TIMESPEC: if (!put_old_timespec32(&rts, p)) return ret; break; default: BUG(); } |
b773ad40a
|
327 328 329 330 331 332 333 334 335 336 337 338 339 |
/* * If an application puts its timeval in read-only memory, we * don't want the Linux-specific update to the timeval to * cause a fault after the select has completed * successfully. However, because we're not updating the * timeval, we can't restart the system call. */ sticky: if (ret == -ERESTARTNOHAND) ret = -EINTR; return ret; } |
e99ca56ce
|
340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 |
/* * Scalable version of the fd_set. */ typedef struct { unsigned long *in, *out, *ex; unsigned long *res_in, *res_out, *res_ex; } fd_set_bits; /* * How many longwords for "nr" bits? */ #define FDS_BITPERLONG (8*sizeof(long)) #define FDS_LONGS(nr) (((nr)+FDS_BITPERLONG-1)/FDS_BITPERLONG) #define FDS_BYTES(nr) (FDS_LONGS(nr)*sizeof(long)) /* |
e99ca56ce
|
357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 |
* Use "unsigned long" accesses to let user-mode fd_set's be long-aligned. */ static inline int get_fd_set(unsigned long nr, void __user *ufdset, unsigned long *fdset) { nr = FDS_BYTES(nr); if (ufdset) return copy_from_user(fdset, ufdset, nr) ? -EFAULT : 0; memset(fdset, 0, nr); return 0; } static inline unsigned long __must_check set_fd_set(unsigned long nr, void __user *ufdset, unsigned long *fdset) { if (ufdset) return __copy_to_user(ufdset, fdset, FDS_BYTES(nr)); return 0; } static inline void zero_fd_set(unsigned long nr, unsigned long *fdset) { memset(fdset, 0, FDS_BYTES(nr)); } |
1da177e4c
|
383 384 385 386 387 388 389 390 391 392 393 |
#define FDS_IN(fds, n) (fds->in + n) #define FDS_OUT(fds, n) (fds->out + n) #define FDS_EX(fds, n) (fds->ex + n) #define BITS(fds, n) (*FDS_IN(fds, n)|*FDS_OUT(fds, n)|*FDS_EX(fds, n)) static int max_select_fd(unsigned long n, fd_set_bits *fds) { unsigned long *open_fds; unsigned long set; int max; |
badf16621
|
394 |
struct fdtable *fdt; |
1da177e4c
|
395 396 |
/* handle last in-complete long-word first */ |
8ded2bbc1
|
397 398 |
set = ~(~0UL << (n & (BITS_PER_LONG-1))); n /= BITS_PER_LONG; |
badf16621
|
399 |
fdt = files_fdtable(current->files); |
1fd36adcd
|
400 |
open_fds = fdt->open_fds + n; |
1da177e4c
|
401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 |
max = 0; if (set) { set &= BITS(fds, n); if (set) { if (!(set & ~*open_fds)) goto get_max; return -EBADF; } } while (n) { open_fds--; n--; set = BITS(fds, n); if (!set) continue; if (set & ~*open_fds) return -EBADF; if (max) continue; get_max: do { max++; set >>= 1; } while (set); |
8ded2bbc1
|
425 |
max += n * BITS_PER_LONG; |
1da177e4c
|
426 427 428 429 |
} return max; } |
a9a08845e
|
430 431 432 |
#define POLLIN_SET (EPOLLRDNORM | EPOLLRDBAND | EPOLLIN | EPOLLHUP | EPOLLERR) #define POLLOUT_SET (EPOLLWRBAND | EPOLLWRNORM | EPOLLOUT | EPOLLERR) #define POLLEX_SET (EPOLLPRI) |
1da177e4c
|
433 |
|
4938d7e02
|
434 |
static inline void wait_key_set(poll_table *wait, unsigned long in, |
2d48d67fa
|
435 |
unsigned long out, unsigned long bit, |
016994377
|
436 |
__poll_t ll_flag) |
4938d7e02
|
437 |
{ |
2d48d67fa
|
438 |
wait->_key = POLLEX_SET | ll_flag; |
626cf2366
|
439 440 441 442 |
if (in & bit) wait->_key |= POLLIN_SET; if (out & bit) wait->_key |= POLLOUT_SET; |
4938d7e02
|
443 |
} |
e99ca56ce
|
444 |
static int do_select(int n, fd_set_bits *fds, struct timespec64 *end_time) |
1da177e4c
|
445 |
{ |
8ff3e8e85
|
446 |
ktime_t expire, *to = NULL; |
1da177e4c
|
447 448 |
struct poll_wqueues table; poll_table *wait; |
8ff3e8e85
|
449 |
int retval, i, timed_out = 0; |
da8b44d5a
|
450 |
u64 slack = 0; |
016994377
|
451 |
__poll_t busy_flag = net_busy_loop_on() ? POLL_BUSY_LOOP : 0; |
37056719b
|
452 |
unsigned long busy_start = 0; |
1da177e4c
|
453 |
|
b835996f6
|
454 |
rcu_read_lock(); |
1da177e4c
|
455 |
retval = max_select_fd(n, fds); |
b835996f6
|
456 |
rcu_read_unlock(); |
1da177e4c
|
457 458 459 460 461 462 463 |
if (retval < 0) return retval; n = retval; poll_initwait(&table); wait = &table.pt; |
8ff3e8e85
|
464 |
if (end_time && !end_time->tv_sec && !end_time->tv_nsec) { |
626cf2366
|
465 |
wait->_qproc = NULL; |
8ff3e8e85
|
466 467 |
timed_out = 1; } |
96d2ab484
|
468 |
if (end_time && !timed_out) |
231f3d393
|
469 |
slack = select_estimate_accuracy(end_time); |
90d6e24a3
|
470 |
|
1da177e4c
|
471 472 473 |
retval = 0; for (;;) { unsigned long *rinp, *routp, *rexp, *inp, *outp, *exp; |
cbf55001b
|
474 |
bool can_busy_loop = false; |
1da177e4c
|
475 |
|
1da177e4c
|
476 477 478 479 |
inp = fds->in; outp = fds->out; exp = fds->ex; rinp = fds->res_in; routp = fds->res_out; rexp = fds->res_ex; for (i = 0; i < n; ++rinp, ++routp, ++rexp) { |
e6c8adca2
|
480 |
unsigned long in, out, ex, all_bits, bit = 1, j; |
1da177e4c
|
481 |
unsigned long res_in = 0, res_out = 0, res_ex = 0; |
e6c8adca2
|
482 |
__poll_t mask; |
1da177e4c
|
483 484 485 486 |
in = *inp++; out = *outp++; ex = *exp++; all_bits = in | out | ex; if (all_bits == 0) { |
8ded2bbc1
|
487 |
i += BITS_PER_LONG; |
1da177e4c
|
488 489 |
continue; } |
8ded2bbc1
|
490 |
for (j = 0; j < BITS_PER_LONG; ++j, ++i, bit <<= 1) { |
2903ff019
|
491 |
struct fd f; |
1da177e4c
|
492 493 494 495 |
if (i >= n) break; if (!(bit & all_bits)) continue; |
2903ff019
|
496 497 |
f = fdget(i); if (f.file) { |
9965ed174
|
498 499 500 |
wait_key_set(wait, in, out, bit, busy_flag); mask = vfs_poll(f.file, wait); |
2903ff019
|
501 |
fdput(f); |
1da177e4c
|
502 503 504 |
if ((mask & POLLIN_SET) && (in & bit)) { res_in |= bit; retval++; |
626cf2366
|
505 |
wait->_qproc = NULL; |
1da177e4c
|
506 507 508 509 |
} if ((mask & POLLOUT_SET) && (out & bit)) { res_out |= bit; retval++; |
626cf2366
|
510 |
wait->_qproc = NULL; |
1da177e4c
|
511 512 513 514 |
} if ((mask & POLLEX_SET) && (ex & bit)) { res_ex |= bit; retval++; |
626cf2366
|
515 |
wait->_qproc = NULL; |
1da177e4c
|
516 |
} |
2d48d67fa
|
517 |
/* got something, stop busy polling */ |
cbf55001b
|
518 519 520 521 522 523 524 525 526 527 |
if (retval) { can_busy_loop = false; busy_flag = 0; /* * only remember a returned * POLL_BUSY_LOOP if we asked for it */ } else if (busy_flag & mask) can_busy_loop = true; |
1da177e4c
|
528 |
} |
1da177e4c
|
529 530 531 532 533 534 535 |
} if (res_in) *rinp = res_in; if (res_out) *routp = res_out; if (res_ex) *rexp = res_ex; |
55d853849
|
536 |
cond_resched(); |
1da177e4c
|
537 |
} |
626cf2366
|
538 |
wait->_qproc = NULL; |
8ff3e8e85
|
539 |
if (retval || timed_out || signal_pending(current)) |
1da177e4c
|
540 |
break; |
f5264481c
|
541 |
if (table.error) { |
1da177e4c
|
542 543 544 |
retval = table.error; break; } |
9f72949f6
|
545 |
|
cbf55001b
|
546 |
/* only if found POLL_BUSY_LOOP sockets && not out of time */ |
76b1e9b98
|
547 |
if (can_busy_loop && !need_resched()) { |
37056719b
|
548 549 |
if (!busy_start) { busy_start = busy_loop_current_time(); |
76b1e9b98
|
550 551 |
continue; } |
37056719b
|
552 |
if (!busy_loop_timeout(busy_start)) |
76b1e9b98
|
553 554 555 |
continue; } busy_flag = 0; |
2d48d67fa
|
556 |
|
8ff3e8e85
|
557 558 559 560 561 562 |
/* * If this is the first loop and we have a timeout * given, then we convert to ktime_t and set the to * pointer to the expiry value. */ if (end_time && !to) { |
766b9f928
|
563 |
expire = timespec64_to_ktime(*end_time); |
8ff3e8e85
|
564 |
to = &expire; |
9f72949f6
|
565 |
} |
8ff3e8e85
|
566 |
|
5f820f648
|
567 568 |
if (!poll_schedule_timeout(&table, TASK_INTERRUPTIBLE, to, slack)) |
8ff3e8e85
|
569 |
timed_out = 1; |
1da177e4c
|
570 |
} |
1da177e4c
|
571 572 |
poll_freewait(&table); |
1da177e4c
|
573 574 |
return retval; } |
1da177e4c
|
575 576 577 578 579 580 581 582 |
/* * We can actually return ERESTARTSYS instead of EINTR, but I'd * like to be certain this leads to no problems. So I return * EINTR just for safety. * * Update: ERESTARTSYS breaks at least the xview clock binary, so * I'm trying ERESTARTNOHAND which restart only when you want to. */ |
a2dcb44c3
|
583 |
int core_sys_select(int n, fd_set __user *inp, fd_set __user *outp, |
766b9f928
|
584 |
fd_set __user *exp, struct timespec64 *end_time) |
1da177e4c
|
585 586 |
{ fd_set_bits fds; |
29ff2db55
|
587 |
void *bits; |
bbea9f696
|
588 |
int ret, max_fds; |
2d19309cf
|
589 |
size_t size, alloc_size; |
badf16621
|
590 |
struct fdtable *fdt; |
70674f95c
|
591 |
/* Allocate small arguments on the stack to save memory and be faster */ |
30c14e40e
|
592 |
long stack_fds[SELECT_STACK_ALLOC/sizeof(long)]; |
1da177e4c
|
593 |
|
1da177e4c
|
594 595 596 |
ret = -EINVAL; if (n < 0) goto out_nofds; |
bbea9f696
|
597 |
/* max_fds can increase, so grab it once to avoid race */ |
b835996f6
|
598 |
rcu_read_lock(); |
badf16621
|
599 |
fdt = files_fdtable(current->files); |
bbea9f696
|
600 |
max_fds = fdt->max_fds; |
b835996f6
|
601 |
rcu_read_unlock(); |
bbea9f696
|
602 603 |
if (n > max_fds) n = max_fds; |
1da177e4c
|
604 605 606 607 608 609 |
/* * We need 6 bitmaps (in/out/ex for both incoming and outgoing), * since we used fdset we need to allocate memory in units of * long-words. */ |
1da177e4c
|
610 |
size = FDS_BYTES(n); |
b04eb6aa0
|
611 612 613 614 |
bits = stack_fds; if (size > sizeof(stack_fds) / 6) { /* Not enough space in on-stack array; must use kmalloc */ ret = -ENOMEM; |
2d19309cf
|
615 616 617 618 |
if (size > (SIZE_MAX / 6)) goto out_nofds; alloc_size = 6 * size; |
752ade68c
|
619 |
bits = kvmalloc(alloc_size, GFP_KERNEL); |
b04eb6aa0
|
620 621 622 |
if (!bits) goto out_nofds; } |
29ff2db55
|
623 624 625 626 627 628 |
fds.in = bits; fds.out = bits + size; fds.ex = bits + 2*size; fds.res_in = bits + 3*size; fds.res_out = bits + 4*size; fds.res_ex = bits + 5*size; |
1da177e4c
|
629 630 631 632 633 634 635 636 |
if ((ret = get_fd_set(n, inp, fds.in)) || (ret = get_fd_set(n, outp, fds.out)) || (ret = get_fd_set(n, exp, fds.ex))) goto out; zero_fd_set(n, fds.res_in); zero_fd_set(n, fds.res_out); zero_fd_set(n, fds.res_ex); |
8ff3e8e85
|
637 |
ret = do_select(n, &fds, end_time); |
1da177e4c
|
638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 |
if (ret < 0) goto out; if (!ret) { ret = -ERESTARTNOHAND; if (signal_pending(current)) goto out; ret = 0; } if (set_fd_set(n, inp, fds.res_in) || set_fd_set(n, outp, fds.res_out) || set_fd_set(n, exp, fds.res_ex)) ret = -EFAULT; out: |
70674f95c
|
654 |
if (bits != stack_fds) |
2d19309cf
|
655 |
kvfree(bits); |
1da177e4c
|
656 657 658 |
out_nofds: return ret; } |
4bdb9acab
|
659 |
static int kern_select(int n, fd_set __user *inp, fd_set __user *outp, |
75d319c06
|
660 |
fd_set __user *exp, struct __kernel_old_timeval __user *tvp) |
9f72949f6
|
661 |
{ |
766b9f928
|
662 |
struct timespec64 end_time, *to = NULL; |
75d319c06
|
663 |
struct __kernel_old_timeval tv; |
9f72949f6
|
664 665 666 667 668 |
int ret; if (tvp) { if (copy_from_user(&tv, tvp, sizeof(tv))) return -EFAULT; |
8ff3e8e85
|
669 |
to = &end_time; |
4d36a9e65
|
670 671 672 |
if (poll_select_set_timeout(to, tv.tv_sec + (tv.tv_usec / USEC_PER_SEC), (tv.tv_usec % USEC_PER_SEC) * NSEC_PER_USEC)) |
9f72949f6
|
673 |
return -EINVAL; |
9f72949f6
|
674 |
} |
8ff3e8e85
|
675 |
ret = core_sys_select(n, inp, outp, exp, to); |
ac3010206
|
676 |
return poll_select_finish(&end_time, tvp, PT_TIMEVAL, ret); |
9f72949f6
|
677 |
} |
4bdb9acab
|
678 |
SYSCALL_DEFINE5(select, int, n, fd_set __user *, inp, fd_set __user *, outp, |
75d319c06
|
679 |
fd_set __user *, exp, struct __kernel_old_timeval __user *, tvp) |
4bdb9acab
|
680 681 682 |
{ return kern_select(n, inp, outp, exp, tvp); } |
c9da9f212
|
683 |
static long do_pselect(int n, fd_set __user *inp, fd_set __user *outp, |
e024707bc
|
684 685 686 |
fd_set __user *exp, void __user *tsp, const sigset_t __user *sigmask, size_t sigsetsize, enum poll_time_type type) |
9f72949f6
|
687 |
{ |
36819ad09
|
688 |
struct timespec64 ts, end_time, *to = NULL; |
9f72949f6
|
689 690 691 |
int ret; if (tsp) { |
e024707bc
|
692 693 694 695 696 697 698 699 700 701 702 703 |
switch (type) { case PT_TIMESPEC: if (get_timespec64(&ts, tsp)) return -EFAULT; break; case PT_OLD_TIMESPEC: if (get_old_timespec32(&ts, tsp)) return -EFAULT; break; default: BUG(); } |
9f72949f6
|
704 |
|
8ff3e8e85
|
705 |
to = &end_time; |
36819ad09
|
706 |
if (poll_select_set_timeout(to, ts.tv_sec, ts.tv_nsec)) |
9f72949f6
|
707 |
return -EINVAL; |
9f72949f6
|
708 |
} |
b772434be
|
709 |
ret = set_user_sigmask(sigmask, sigsetsize); |
ded653ccb
|
710 711 |
if (ret) return ret; |
9f72949f6
|
712 |
|
62568510b
|
713 |
ret = core_sys_select(n, inp, outp, exp, to); |
ac3010206
|
714 |
return poll_select_finish(&end_time, tsp, type, ret); |
9f72949f6
|
715 716 717 718 719 720 721 722 |
} /* * Most architectures can't handle 7-argument syscalls. So we provide a * 6-argument version where the sixth argument is a pointer to a structure * which has a pointer to the sigset_t itself followed by a size_t containing * the sigset size. */ |
7e71609f6
|
723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 |
struct sigset_argpack { sigset_t __user *p; size_t size; }; static inline int get_sigset_argpack(struct sigset_argpack *to, struct sigset_argpack __user *from) { // the path is hot enough for overhead of copy_from_user() to matter if (from) { if (!user_read_access_begin(from, sizeof(*from))) return -EFAULT; unsafe_get_user(to->p, &from->p, Efault); unsafe_get_user(to->size, &from->size, Efault); user_read_access_end(); } return 0; Efault: user_access_end(); return -EFAULT; } |
d4e82042c
|
744 |
SYSCALL_DEFINE6(pselect6, int, n, fd_set __user *, inp, fd_set __user *, outp, |
e024707bc
|
745 746 747 |
fd_set __user *, exp, struct __kernel_timespec __user *, tsp, void __user *, sig) { |
7e71609f6
|
748 749 750 751 |
struct sigset_argpack x = {NULL, 0}; if (get_sigset_argpack(&x, sig)) return -EFAULT; |
e024707bc
|
752 |
|
7e71609f6
|
753 |
return do_pselect(n, inp, outp, exp, tsp, x.p, x.size, PT_TIMESPEC); |
e024707bc
|
754 755 756 757 758 759 |
} #if defined(CONFIG_COMPAT_32BIT_TIME) && !defined(CONFIG_64BIT) SYSCALL_DEFINE6(pselect6_time32, int, n, fd_set __user *, inp, fd_set __user *, outp, fd_set __user *, exp, struct old_timespec32 __user *, tsp, |
d4e82042c
|
760 |
void __user *, sig) |
9f72949f6
|
761 |
{ |
7e71609f6
|
762 763 764 765 |
struct sigset_argpack x = {NULL, 0}; if (get_sigset_argpack(&x, sig)) return -EFAULT; |
9f72949f6
|
766 |
|
7e71609f6
|
767 |
return do_pselect(n, inp, outp, exp, tsp, x.p, x.size, PT_OLD_TIMESPEC); |
9f72949f6
|
768 |
} |
9f72949f6
|
769 |
|
e024707bc
|
770 |
#endif |
5d0e52830
|
771 772 773 774 |
#ifdef __ARCH_WANT_SYS_OLD_SELECT struct sel_arg_struct { unsigned long n; fd_set __user *inp, *outp, *exp; |
75d319c06
|
775 |
struct __kernel_old_timeval __user *tvp; |
5d0e52830
|
776 777 778 779 780 781 782 783 |
}; SYSCALL_DEFINE1(old_select, struct sel_arg_struct __user *, arg) { struct sel_arg_struct a; if (copy_from_user(&a, arg, sizeof(a))) return -EFAULT; |
4bdb9acab
|
784 |
return kern_select(a.n, a.inp, a.outp, a.exp, a.tvp); |
5d0e52830
|
785 786 |
} #endif |
1da177e4c
|
787 788 789 |
struct poll_list { struct poll_list *next; int len; |
5e01fdff0
|
790 |
struct pollfd entries[]; |
1da177e4c
|
791 792 793 |
}; #define POLLFD_PER_PAGE ((PAGE_SIZE-sizeof(struct poll_list)) / sizeof(struct pollfd)) |
4a4b69f79
|
794 795 796 797 798 |
/* * Fish for pollable events on the pollfd->fd file descriptor. We're only * interested in events matching the pollfd->events mask, and the result * matching that mask is both recorded in pollfd->revents and returned. The * pwait poll_table will be used by the fd-provided poll handler for waiting, |
626cf2366
|
799 |
* if pwait->_qproc is non-NULL. |
4a4b69f79
|
800 |
*/ |
fb3679372
|
801 |
static inline __poll_t do_pollfd(struct pollfd *pollfd, poll_table *pwait, |
cbf55001b
|
802 |
bool *can_busy_poll, |
fb3679372
|
803 |
__poll_t busy_flag) |
1da177e4c
|
804 |
{ |
a0f8dcfc6
|
805 806 807 808 809 810 811 812 813 814 815 816 817 |
int fd = pollfd->fd; __poll_t mask = 0, filter; struct fd f; if (fd < 0) goto out; mask = EPOLLNVAL; f = fdget(fd); if (!f.file) goto out; /* userland u16 ->events contains POLL... bitmap */ filter = demangle_poll(pollfd->events) | EPOLLERR | EPOLLHUP; |
9965ed174
|
818 819 820 821 |
pwait->_key = filter | busy_flag; mask = vfs_poll(f.file, pwait); if (mask & busy_flag) *can_busy_poll = true; |
a0f8dcfc6
|
822 823 824 825 |
mask &= filter; /* Mask out unneeded events. */ fdput(f); out: |
fb3679372
|
826 |
/* ... and so does ->revents */ |
c71d227fc
|
827 |
pollfd->revents = mangle_poll(mask); |
4a4b69f79
|
828 |
return mask; |
1da177e4c
|
829 |
} |
ccec5ee30
|
830 |
static int do_poll(struct poll_list *list, struct poll_wqueues *wait, |
766b9f928
|
831 |
struct timespec64 *end_time) |
1da177e4c
|
832 |
{ |
1da177e4c
|
833 |
poll_table* pt = &wait->pt; |
8ff3e8e85
|
834 835 |
ktime_t expire, *to = NULL; int timed_out = 0, count = 0; |
da8b44d5a
|
836 |
u64 slack = 0; |
fb3679372
|
837 |
__poll_t busy_flag = net_busy_loop_on() ? POLL_BUSY_LOOP : 0; |
37056719b
|
838 |
unsigned long busy_start = 0; |
1da177e4c
|
839 |
|
9f72949f6
|
840 |
/* Optimise the no-wait case */ |
8ff3e8e85
|
841 |
if (end_time && !end_time->tv_sec && !end_time->tv_nsec) { |
626cf2366
|
842 |
pt->_qproc = NULL; |
8ff3e8e85
|
843 844 |
timed_out = 1; } |
9bf084f70
|
845 |
|
96d2ab484
|
846 |
if (end_time && !timed_out) |
231f3d393
|
847 |
slack = select_estimate_accuracy(end_time); |
90d6e24a3
|
848 |
|
1da177e4c
|
849 850 |
for (;;) { struct poll_list *walk; |
cbf55001b
|
851 |
bool can_busy_loop = false; |
9f72949f6
|
852 |
|
4a4b69f79
|
853 854 855 856 857 858 859 860 |
for (walk = list; walk != NULL; walk = walk->next) { struct pollfd * pfd, * pfd_end; pfd = walk->entries; pfd_end = pfd + walk->len; for (; pfd != pfd_end; pfd++) { /* * Fish for events. If we found one, record it |
626cf2366
|
861 |
* and kill poll_table->_qproc, so we don't |
4a4b69f79
|
862 863 864 865 |
* needlessly register any other waiters after * this. They'll get immediately deregistered * when we break out and return. */ |
cbf55001b
|
866 867 |
if (do_pollfd(pfd, pt, &can_busy_loop, busy_flag)) { |
4a4b69f79
|
868 |
count++; |
626cf2366
|
869 |
pt->_qproc = NULL; |
cbf55001b
|
870 871 872 |
/* found something, stop busy polling */ busy_flag = 0; can_busy_loop = false; |
4a4b69f79
|
873 874 |
} } |
1da177e4c
|
875 |
} |
4a4b69f79
|
876 877 |
/* * All waiters have already been registered, so don't provide |
626cf2366
|
878 |
* a poll_table->_qproc to them on the next loop iteration. |
4a4b69f79
|
879 |
*/ |
626cf2366
|
880 |
pt->_qproc = NULL; |
9bf084f70
|
881 882 883 |
if (!count) { count = wait->error; if (signal_pending(current)) |
8cf8b5539
|
884 |
count = -ERESTARTNOHAND; |
9bf084f70
|
885 |
} |
8ff3e8e85
|
886 |
if (count || timed_out) |
1da177e4c
|
887 |
break; |
9f72949f6
|
888 |
|
cbf55001b
|
889 |
/* only if found POLL_BUSY_LOOP sockets && not out of time */ |
76b1e9b98
|
890 |
if (can_busy_loop && !need_resched()) { |
37056719b
|
891 892 |
if (!busy_start) { busy_start = busy_loop_current_time(); |
76b1e9b98
|
893 894 |
continue; } |
37056719b
|
895 |
if (!busy_loop_timeout(busy_start)) |
76b1e9b98
|
896 897 898 |
continue; } busy_flag = 0; |
91e2fd337
|
899 |
|
8ff3e8e85
|
900 901 902 903 904 905 |
/* * If this is the first loop and we have a timeout * given, then we convert to ktime_t and set the to * pointer to the expiry value. */ if (end_time && !to) { |
766b9f928
|
906 |
expire = timespec64_to_ktime(*end_time); |
8ff3e8e85
|
907 |
to = &expire; |
9f72949f6
|
908 |
} |
5f820f648
|
909 |
if (!poll_schedule_timeout(wait, TASK_INTERRUPTIBLE, to, slack)) |
8ff3e8e85
|
910 |
timed_out = 1; |
1da177e4c
|
911 |
} |
1da177e4c
|
912 913 |
return count; } |
70674f95c
|
914 915 |
#define N_STACK_PPS ((sizeof(stack_pps) - sizeof(struct poll_list)) / \ sizeof(struct pollfd)) |
e99ca56ce
|
916 |
static int do_sys_poll(struct pollfd __user *ufds, unsigned int nfds, |
766b9f928
|
917 |
struct timespec64 *end_time) |
1da177e4c
|
918 919 |
{ struct poll_wqueues table; |
43e11fa2d
|
920 |
int err = -EFAULT, fdcount, len; |
30c14e40e
|
921 922 923 924 |
/* Allocate small arguments on the stack to save memory and be faster - use long to make sure the buffer is aligned properly on 64 bit archs to avoid unaligned access */ long stack_pps[POLL_STACK_ALLOC/sizeof(long)]; |
252e5725c
|
925 926 927 |
struct poll_list *const head = (struct poll_list *)stack_pps; struct poll_list *walk = head; unsigned long todo = nfds; |
1da177e4c
|
928 |
|
d554ed895
|
929 |
if (nfds > rlimit(RLIMIT_NOFILE)) |
1da177e4c
|
930 |
return -EINVAL; |
252e5725c
|
931 932 933 934 935 936 |
len = min_t(unsigned int, nfds, N_STACK_PPS); for (;;) { walk->next = NULL; walk->len = len; if (!len) break; |
1da177e4c
|
937 |
|
252e5725c
|
938 939 940 941 942 943 944 |
if (copy_from_user(walk->entries, ufds + nfds-todo, sizeof(struct pollfd) * walk->len)) goto out_fds; todo -= walk->len; if (!todo) break; |
1da177e4c
|
945 |
|
252e5725c
|
946 |
len = min(todo, POLLFD_PER_PAGE); |
43e11fa2d
|
947 948 |
walk = walk->next = kmalloc(struct_size(walk, entries, len), GFP_KERNEL); |
252e5725c
|
949 950 |
if (!walk) { err = -ENOMEM; |
1da177e4c
|
951 952 |
goto out_fds; } |
1da177e4c
|
953 |
} |
9f72949f6
|
954 |
|
252e5725c
|
955 |
poll_initwait(&table); |
ccec5ee30
|
956 |
fdcount = do_poll(head, &table, end_time); |
252e5725c
|
957 |
poll_freewait(&table); |
1da177e4c
|
958 |
|
bc880f204
|
959 960 |
if (!user_write_access_begin(ufds, nfds * sizeof(*ufds))) goto out_fds; |
252e5725c
|
961 |
for (walk = head; walk; walk = walk->next) { |
1da177e4c
|
962 963 |
struct pollfd *fds = walk->entries; int j; |
bc880f204
|
964 965 |
for (j = walk->len; j; fds++, ufds++, j--) unsafe_put_user(fds->revents, &ufds->revents, Efault); |
1da177e4c
|
966 |
} |
bc880f204
|
967 |
user_write_access_end(); |
252e5725c
|
968 |
|
1da177e4c
|
969 |
err = fdcount; |
1da177e4c
|
970 |
out_fds: |
252e5725c
|
971 972 973 974 975 |
walk = head->next; while (walk) { struct poll_list *pos = walk; walk = walk->next; kfree(pos); |
1da177e4c
|
976 |
} |
252e5725c
|
977 |
|
1da177e4c
|
978 |
return err; |
bc880f204
|
979 980 981 982 983 |
Efault: user_write_access_end(); err = -EFAULT; goto out_fds; |
1da177e4c
|
984 |
} |
9f72949f6
|
985 |
|
3075d9da0
|
986 987 |
static long do_restart_poll(struct restart_block *restart_block) { |
8ff3e8e85
|
988 989 |
struct pollfd __user *ufds = restart_block->poll.ufds; int nfds = restart_block->poll.nfds; |
766b9f928
|
990 |
struct timespec64 *to = NULL, end_time; |
3075d9da0
|
991 |
int ret; |
8ff3e8e85
|
992 993 994 995 996 997 998 |
if (restart_block->poll.has_timeout) { end_time.tv_sec = restart_block->poll.tv_sec; end_time.tv_nsec = restart_block->poll.tv_nsec; to = &end_time; } ret = do_sys_poll(ufds, nfds, to); |
8cf8b5539
|
999 |
if (ret == -ERESTARTNOHAND) { |
3075d9da0
|
1000 |
restart_block->fn = do_restart_poll; |
3075d9da0
|
1001 1002 1003 1004 |
ret = -ERESTART_RESTARTBLOCK; } return ret; } |
5a8a82b1d
|
1005 |
SYSCALL_DEFINE3(poll, struct pollfd __user *, ufds, unsigned int, nfds, |
faf309009
|
1006 |
int, timeout_msecs) |
9f72949f6
|
1007 |
{ |
766b9f928
|
1008 |
struct timespec64 end_time, *to = NULL; |
3075d9da0
|
1009 |
int ret; |
9f72949f6
|
1010 |
|
8ff3e8e85
|
1011 1012 1013 1014 |
if (timeout_msecs >= 0) { to = &end_time; poll_select_set_timeout(to, timeout_msecs / MSEC_PER_SEC, NSEC_PER_MSEC * (timeout_msecs % MSEC_PER_SEC)); |
9f72949f6
|
1015 |
} |
8ff3e8e85
|
1016 |
ret = do_sys_poll(ufds, nfds, to); |
8cf8b5539
|
1017 |
if (ret == -ERESTARTNOHAND) { |
3075d9da0
|
1018 |
struct restart_block *restart_block; |
8ff3e8e85
|
1019 |
|
f56141e3e
|
1020 |
restart_block = ¤t->restart_block; |
3075d9da0
|
1021 |
restart_block->fn = do_restart_poll; |
8ff3e8e85
|
1022 1023 1024 1025 1026 1027 1028 1029 1030 |
restart_block->poll.ufds = ufds; restart_block->poll.nfds = nfds; if (timeout_msecs >= 0) { restart_block->poll.tv_sec = end_time.tv_sec; restart_block->poll.tv_nsec = end_time.tv_nsec; restart_block->poll.has_timeout = 1; } else restart_block->poll.has_timeout = 0; |
3075d9da0
|
1031 1032 1033 |
ret = -ERESTART_RESTARTBLOCK; } return ret; |
9f72949f6
|
1034 |
} |
d4e82042c
|
1035 |
SYSCALL_DEFINE5(ppoll, struct pollfd __user *, ufds, unsigned int, nfds, |
8bd27a300
|
1036 |
struct __kernel_timespec __user *, tsp, const sigset_t __user *, sigmask, |
d4e82042c
|
1037 |
size_t, sigsetsize) |
9f72949f6
|
1038 |
{ |
36819ad09
|
1039 |
struct timespec64 ts, end_time, *to = NULL; |
9f72949f6
|
1040 1041 1042 |
int ret; if (tsp) { |
36819ad09
|
1043 |
if (get_timespec64(&ts, tsp)) |
9f72949f6
|
1044 |
return -EFAULT; |
8ff3e8e85
|
1045 1046 1047 |
to = &end_time; if (poll_select_set_timeout(to, ts.tv_sec, ts.tv_nsec)) return -EINVAL; |
9f72949f6
|
1048 |
} |
b772434be
|
1049 |
ret = set_user_sigmask(sigmask, sigsetsize); |
ded653ccb
|
1050 1051 |
if (ret) return ret; |
9f72949f6
|
1052 |
|
8ff3e8e85
|
1053 |
ret = do_sys_poll(ufds, nfds, to); |
ac3010206
|
1054 |
return poll_select_finish(&end_time, tsp, PT_TIMESPEC, ret); |
9f72949f6
|
1055 |
} |
e99ca56ce
|
1056 |
|
8bd27a300
|
1057 |
#if defined(CONFIG_COMPAT_32BIT_TIME) && !defined(CONFIG_64BIT) |
e99ca56ce
|
1058 |
|
8bd27a300
|
1059 1060 1061 |
SYSCALL_DEFINE5(ppoll_time32, struct pollfd __user *, ufds, unsigned int, nfds, struct old_timespec32 __user *, tsp, const sigset_t __user *, sigmask, size_t, sigsetsize) |
e99ca56ce
|
1062 |
{ |
8bd27a300
|
1063 1064 |
struct timespec64 ts, end_time, *to = NULL; int ret; |
e99ca56ce
|
1065 |
|
8bd27a300
|
1066 1067 1068 |
if (tsp) { if (get_old_timespec32(&ts, tsp)) return -EFAULT; |
e99ca56ce
|
1069 |
|
8bd27a300
|
1070 1071 1072 1073 |
to = &end_time; if (poll_select_set_timeout(to, ts.tv_sec, ts.tv_nsec)) return -EINVAL; } |
e99ca56ce
|
1074 |
|
b772434be
|
1075 |
ret = set_user_sigmask(sigmask, sigsetsize); |
8bd27a300
|
1076 |
if (ret) |
e99ca56ce
|
1077 |
return ret; |
8bd27a300
|
1078 |
ret = do_sys_poll(ufds, nfds, to); |
ac3010206
|
1079 |
return poll_select_finish(&end_time, tsp, PT_OLD_TIMESPEC, ret); |
e99ca56ce
|
1080 |
} |
8bd27a300
|
1081 1082 1083 1084 |
#endif #ifdef CONFIG_COMPAT #define __COMPAT_NFDBITS (8 * sizeof(compat_ulong_t)) |
e99ca56ce
|
1085 1086 1087 1088 1089 1090 1091 1092 1093 |
/* * Ooo, nasty. We need here to frob 32-bit unsigned longs to * 64-bit unsigned longs. */ static int compat_get_fd_set(unsigned long nr, compat_ulong_t __user *ufdset, unsigned long *fdset) { |
e99ca56ce
|
1094 |
if (ufdset) { |
464d62421
|
1095 |
return compat_get_bitmap(fdset, ufdset, nr); |
e99ca56ce
|
1096 |
} else { |
79de3cbe9
|
1097 |
zero_fd_set(nr, fdset); |
464d62421
|
1098 |
return 0; |
e99ca56ce
|
1099 |
} |
e99ca56ce
|
1100 1101 1102 1103 1104 1105 |
} static int compat_set_fd_set(unsigned long nr, compat_ulong_t __user *ufdset, unsigned long *fdset) { |
e99ca56ce
|
1106 1107 |
if (!ufdset) return 0; |
464d62421
|
1108 |
return compat_put_bitmap(ufdset, fdset, nr); |
e99ca56ce
|
1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 |
} /* * This is a virtual copy of sys_select from fs/select.c and probably * should be compared to it from time to time */ /* * We can actually return ERESTARTSYS instead of EINTR, but I'd * like to be certain this leads to no problems. So I return * EINTR just for safety. * * Update: ERESTARTSYS breaks at least the xview clock binary, so * I'm trying ERESTARTNOHAND which restart only when you want to. */ static int compat_core_sys_select(int n, compat_ulong_t __user *inp, compat_ulong_t __user *outp, compat_ulong_t __user *exp, |
36819ad09
|
1127 |
struct timespec64 *end_time) |
e99ca56ce
|
1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 |
{ fd_set_bits fds; void *bits; int size, max_fds, ret = -EINVAL; struct fdtable *fdt; long stack_fds[SELECT_STACK_ALLOC/sizeof(long)]; if (n < 0) goto out_nofds; /* max_fds can increase, so grab it once to avoid race */ rcu_read_lock(); fdt = files_fdtable(current->files); max_fds = fdt->max_fds; rcu_read_unlock(); if (n > max_fds) n = max_fds; /* * We need 6 bitmaps (in/out/ex for both incoming and outgoing), * since we used fdset we need to allocate memory in units of * long-words. */ size = FDS_BYTES(n); bits = stack_fds; if (size > sizeof(stack_fds) / 6) { |
6da2ec560
|
1154 |
bits = kmalloc_array(6, size, GFP_KERNEL); |
e99ca56ce
|
1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 |
ret = -ENOMEM; if (!bits) goto out_nofds; } fds.in = (unsigned long *) bits; fds.out = (unsigned long *) (bits + size); fds.ex = (unsigned long *) (bits + 2*size); fds.res_in = (unsigned long *) (bits + 3*size); fds.res_out = (unsigned long *) (bits + 4*size); fds.res_ex = (unsigned long *) (bits + 5*size); if ((ret = compat_get_fd_set(n, inp, fds.in)) || (ret = compat_get_fd_set(n, outp, fds.out)) || (ret = compat_get_fd_set(n, exp, fds.ex))) goto out; zero_fd_set(n, fds.res_in); zero_fd_set(n, fds.res_out); zero_fd_set(n, fds.res_ex); ret = do_select(n, &fds, end_time); if (ret < 0) goto out; if (!ret) { ret = -ERESTARTNOHAND; if (signal_pending(current)) goto out; ret = 0; } if (compat_set_fd_set(n, inp, fds.res_in) || compat_set_fd_set(n, outp, fds.res_out) || compat_set_fd_set(n, exp, fds.res_ex)) ret = -EFAULT; out: if (bits != stack_fds) kfree(bits); out_nofds: return ret; } |
05585e449
|
1195 1196 |
static int do_compat_select(int n, compat_ulong_t __user *inp, compat_ulong_t __user *outp, compat_ulong_t __user *exp, |
9afc5eee6
|
1197 |
struct old_timeval32 __user *tvp) |
e99ca56ce
|
1198 |
{ |
36819ad09
|
1199 |
struct timespec64 end_time, *to = NULL; |
9afc5eee6
|
1200 |
struct old_timeval32 tv; |
e99ca56ce
|
1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 |
int ret; if (tvp) { if (copy_from_user(&tv, tvp, sizeof(tv))) return -EFAULT; to = &end_time; if (poll_select_set_timeout(to, tv.tv_sec + (tv.tv_usec / USEC_PER_SEC), (tv.tv_usec % USEC_PER_SEC) * NSEC_PER_USEC)) return -EINVAL; } ret = compat_core_sys_select(n, inp, outp, exp, to); |
ac3010206
|
1215 |
return poll_select_finish(&end_time, tvp, PT_OLD_TIMEVAL, ret); |
e99ca56ce
|
1216 |
} |
05585e449
|
1217 1218 |
COMPAT_SYSCALL_DEFINE5(select, int, n, compat_ulong_t __user *, inp, compat_ulong_t __user *, outp, compat_ulong_t __user *, exp, |
9afc5eee6
|
1219 |
struct old_timeval32 __user *, tvp) |
05585e449
|
1220 1221 1222 |
{ return do_compat_select(n, inp, outp, exp, tvp); } |
e99ca56ce
|
1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 |
struct compat_sel_arg_struct { compat_ulong_t n; compat_uptr_t inp; compat_uptr_t outp; compat_uptr_t exp; compat_uptr_t tvp; }; COMPAT_SYSCALL_DEFINE1(old_select, struct compat_sel_arg_struct __user *, arg) { struct compat_sel_arg_struct a; if (copy_from_user(&a, arg, sizeof(a))) return -EFAULT; |
05585e449
|
1237 1238 |
return do_compat_select(a.n, compat_ptr(a.inp), compat_ptr(a.outp), compat_ptr(a.exp), compat_ptr(a.tvp)); |
e99ca56ce
|
1239 1240 1241 1242 |
} static long do_compat_pselect(int n, compat_ulong_t __user *inp, compat_ulong_t __user *outp, compat_ulong_t __user *exp, |
e024707bc
|
1243 1244 |
void __user *tsp, compat_sigset_t __user *sigmask, compat_size_t sigsetsize, enum poll_time_type type) |
e99ca56ce
|
1245 |
{ |
36819ad09
|
1246 |
struct timespec64 ts, end_time, *to = NULL; |
e99ca56ce
|
1247 1248 1249 |
int ret; if (tsp) { |
e024707bc
|
1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 |
switch (type) { case PT_OLD_TIMESPEC: if (get_old_timespec32(&ts, tsp)) return -EFAULT; break; case PT_TIMESPEC: if (get_timespec64(&ts, tsp)) return -EFAULT; break; default: BUG(); } |
e99ca56ce
|
1262 1263 1264 1265 1266 |
to = &end_time; if (poll_select_set_timeout(to, ts.tv_sec, ts.tv_nsec)) return -EINVAL; } |
b772434be
|
1267 |
ret = set_compat_user_sigmask(sigmask, sigsetsize); |
ded653ccb
|
1268 1269 |
if (ret) return ret; |
e99ca56ce
|
1270 1271 |
ret = compat_core_sys_select(n, inp, outp, exp, to); |
ac3010206
|
1272 |
return poll_select_finish(&end_time, tsp, type, ret); |
e99ca56ce
|
1273 |
} |
7e71609f6
|
1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 |
struct compat_sigset_argpack { compat_uptr_t p; compat_size_t size; }; static inline int get_compat_sigset_argpack(struct compat_sigset_argpack *to, struct compat_sigset_argpack __user *from) { if (from) { if (!user_read_access_begin(from, sizeof(*from))) return -EFAULT; unsafe_get_user(to->p, &from->p, Efault); unsafe_get_user(to->size, &from->size, Efault); user_read_access_end(); } return 0; Efault: user_access_end(); return -EFAULT; } |
e024707bc
|
1293 1294 1295 1296 |
COMPAT_SYSCALL_DEFINE6(pselect6_time64, int, n, compat_ulong_t __user *, inp, compat_ulong_t __user *, outp, compat_ulong_t __user *, exp, struct __kernel_timespec __user *, tsp, void __user *, sig) { |
7e71609f6
|
1297 1298 1299 1300 |
struct compat_sigset_argpack x = {0, 0}; if (get_compat_sigset_argpack(&x, sig)) return -EFAULT; |
e024707bc
|
1301 |
|
7e71609f6
|
1302 1303 |
return do_compat_pselect(n, inp, outp, exp, tsp, compat_ptr(x.p), x.size, PT_TIMESPEC); |
e024707bc
|
1304 1305 1306 |
} #if defined(CONFIG_COMPAT_32BIT_TIME) |
8dabe7245
|
1307 |
COMPAT_SYSCALL_DEFINE6(pselect6_time32, int, n, compat_ulong_t __user *, inp, |
e99ca56ce
|
1308 |
compat_ulong_t __user *, outp, compat_ulong_t __user *, exp, |
9afc5eee6
|
1309 |
struct old_timespec32 __user *, tsp, void __user *, sig) |
e99ca56ce
|
1310 |
{ |
7e71609f6
|
1311 1312 1313 1314 |
struct compat_sigset_argpack x = {0, 0}; if (get_compat_sigset_argpack(&x, sig)) return -EFAULT; |
e024707bc
|
1315 |
|
7e71609f6
|
1316 1317 |
return do_compat_pselect(n, inp, outp, exp, tsp, compat_ptr(x.p), x.size, PT_OLD_TIMESPEC); |
e99ca56ce
|
1318 |
} |
e024707bc
|
1319 |
#endif |
8bd27a300
|
1320 |
#if defined(CONFIG_COMPAT_32BIT_TIME) |
8dabe7245
|
1321 |
COMPAT_SYSCALL_DEFINE5(ppoll_time32, struct pollfd __user *, ufds, |
9afc5eee6
|
1322 |
unsigned int, nfds, struct old_timespec32 __user *, tsp, |
e99ca56ce
|
1323 1324 |
const compat_sigset_t __user *, sigmask, compat_size_t, sigsetsize) { |
36819ad09
|
1325 |
struct timespec64 ts, end_time, *to = NULL; |
e99ca56ce
|
1326 1327 1328 |
int ret; if (tsp) { |
9afc5eee6
|
1329 |
if (get_old_timespec32(&ts, tsp)) |
e99ca56ce
|
1330 1331 1332 1333 1334 1335 |
return -EFAULT; to = &end_time; if (poll_select_set_timeout(to, ts.tv_sec, ts.tv_nsec)) return -EINVAL; } |
b772434be
|
1336 |
ret = set_compat_user_sigmask(sigmask, sigsetsize); |
ded653ccb
|
1337 1338 |
if (ret) return ret; |
e99ca56ce
|
1339 1340 |
ret = do_sys_poll(ufds, nfds, to); |
ac3010206
|
1341 |
return poll_select_finish(&end_time, tsp, PT_OLD_TIMESPEC, ret); |
e99ca56ce
|
1342 1343 |
} #endif |
8bd27a300
|
1344 1345 1346 1347 1348 1349 |
/* New compat syscall for 64 bit time_t*/ COMPAT_SYSCALL_DEFINE5(ppoll_time64, struct pollfd __user *, ufds, unsigned int, nfds, struct __kernel_timespec __user *, tsp, const compat_sigset_t __user *, sigmask, compat_size_t, sigsetsize) { |
8bd27a300
|
1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 |
struct timespec64 ts, end_time, *to = NULL; int ret; if (tsp) { if (get_timespec64(&ts, tsp)) return -EFAULT; to = &end_time; if (poll_select_set_timeout(to, ts.tv_sec, ts.tv_nsec)) return -EINVAL; } |
b772434be
|
1361 |
ret = set_compat_user_sigmask(sigmask, sigsetsize); |
8bd27a300
|
1362 1363 1364 1365 |
if (ret) return ret; ret = do_sys_poll(ufds, nfds, to); |
ac3010206
|
1366 |
return poll_select_finish(&end_time, tsp, PT_TIMESPEC, ret); |
8bd27a300
|
1367 1368 1369 |
} #endif |