Blame view

fs/select.c 34.7 KB
b24413180   Greg Kroah-Hartman   License cleanup: ...
1
  // SPDX-License-Identifier: GPL-2.0
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
  /*
   * This file contains the procedures for the handling of select and poll
   *
   * Created for Linux based loosely upon Mathius Lattner's minix
   * patches by Peter MacDonald. Heavily edited by Linus.
   *
   *  4 February 1994
   *     COFF/ELF binary emulation. If the process has the STICKY_TIMEOUTS
   *     flag set in its personality we do *not* modify the given timeout
   *     parameter to reflect time remaining.
   *
   *  24 January 2000
   *     Changed sys_poll()/do_poll() to use PAGE_SIZE chunk-based allocation 
   *     of fds to overcome nfds < 16390 descriptors limit (Tigran Aivazian).
   */
022a16924   Milind Arun Choudhary   ROUND_UP macro cl...
17
  #include <linux/kernel.h>
3f07c0144   Ingo Molnar   sched/headers: Pr...
18
19
  #include <linux/sched/signal.h>
  #include <linux/sched/rt.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
20
  #include <linux/syscalls.h>
630d9c472   Paul Gortmaker   fs: reduce the us...
21
  #include <linux/export.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
22
  #include <linux/slab.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
23
24
25
  #include <linux/poll.h>
  #include <linux/personality.h> /* for STICKY_TIMEOUTS */
  #include <linux/file.h>
9f3acc314   Al Viro   [PATCH] split lin...
26
  #include <linux/fdtable.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
27
  #include <linux/fs.h>
b835996f6   Dipankar Sarma   [PATCH] files: lo...
28
  #include <linux/rcupdate.h>
8ff3e8e85   Arjan van de Ven   select: switch se...
29
  #include <linux/hrtimer.h>
9745cdb36   Colin Cross   select: use freez...
30
  #include <linux/freezer.h>
076bb0c82   Eliezer Tamir   net: rename inclu...
31
  #include <net/busy_poll.h>
2d19309cf   Vlastimil Babka   fs/select: add vm...
32
  #include <linux/vmalloc.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
33

7c0f6ba68   Linus Torvalds   Replace <asm/uacc...
34
  #include <linux/uaccess.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
35

90d6e24a3   Arjan van de Ven   hrtimer: make sel...
36
37
38
39
40
41
42
43
44
45
46
47
  
  /*
   * Estimate expected accuracy in ns from a timeval.
   *
   * After quite a bit of churning around, we've settled on
   * a simple thing of taking 0.1% of the timeout as the
   * slack, with a cap of 100 msec.
   * "nice" tasks get a 0.5% slack instead.
   *
   * Consider this comment an open invitation to come up with even
   * better solutions..
   */
5ae87e79e   Guillaume Knispel   poll/select: avoi...
48
  #define MAX_SLACK	(100 * NSEC_PER_MSEC)
766b9f928   Deepa Dinamani   fs: poll/select/r...
49
  static long __estimate_accuracy(struct timespec64 *tv)
90d6e24a3   Arjan van de Ven   hrtimer: make sel...
50
  {
96d2ab484   Arjan van de Ven   hrtimer: fix sign...
51
  	long slack;
90d6e24a3   Arjan van de Ven   hrtimer: make sel...
52
  	int divfactor = 1000;
5ae87e79e   Guillaume Knispel   poll/select: avoi...
53
54
  	if (tv->tv_sec < 0)
  		return 0;
4ce105d30   Arjan van de Ven   hrtimer: incorpor...
55
  	if (task_nice(current) > 0)
90d6e24a3   Arjan van de Ven   hrtimer: make sel...
56
  		divfactor = divfactor / 5;
5ae87e79e   Guillaume Knispel   poll/select: avoi...
57
58
  	if (tv->tv_sec > MAX_SLACK / (NSEC_PER_SEC/divfactor))
  		return MAX_SLACK;
90d6e24a3   Arjan van de Ven   hrtimer: make sel...
59
60
  	slack = tv->tv_nsec / divfactor;
  	slack += tv->tv_sec * (NSEC_PER_SEC/divfactor);
5ae87e79e   Guillaume Knispel   poll/select: avoi...
61
62
  	if (slack > MAX_SLACK)
  		return MAX_SLACK;
96d2ab484   Arjan van de Ven   hrtimer: fix sign...
63

90d6e24a3   Arjan van de Ven   hrtimer: make sel...
64
65
  	return slack;
  }
766b9f928   Deepa Dinamani   fs: poll/select/r...
66
  u64 select_estimate_accuracy(struct timespec64 *tv)
90d6e24a3   Arjan van de Ven   hrtimer: make sel...
67
  {
da8b44d5a   John Stultz   timer: convert ti...
68
  	u64 ret;
766b9f928   Deepa Dinamani   fs: poll/select/r...
69
  	struct timespec64 now;
90d6e24a3   Arjan van de Ven   hrtimer: make sel...
70
71
72
73
  
  	/*
  	 * Realtime tasks get a slack of 0 for obvious reasons.
  	 */
4ce105d30   Arjan van de Ven   hrtimer: incorpor...
74
  	if (rt_task(current))
90d6e24a3   Arjan van de Ven   hrtimer: make sel...
75
  		return 0;
766b9f928   Deepa Dinamani   fs: poll/select/r...
76
77
  	ktime_get_ts64(&now);
  	now = timespec64_sub(*tv, now);
90d6e24a3   Arjan van de Ven   hrtimer: make sel...
78
79
80
81
82
  	ret = __estimate_accuracy(&now);
  	if (ret < current->timer_slack_ns)
  		return current->timer_slack_ns;
  	return ret;
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
83
84
85
  struct poll_table_page {
  	struct poll_table_page * next;
  	struct poll_table_entry * entry;
5e01fdff0   Gustavo A. R. Silva   fs: Replace zero-...
86
  	struct poll_table_entry entries[];
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
  };
  
  #define POLL_TABLE_FULL(table) \
  	((unsigned long)((table)->entry+1) > PAGE_SIZE + (unsigned long)(table))
  
  /*
   * Ok, Peter made a complicated, but straightforward multiple_wait() function.
   * I have rewritten this, taking some shortcuts: This code may not be easy to
   * follow, but it should be free of race-conditions, and it's practical. If you
   * understand what I'm doing here, then you understand how the linux
   * sleep/wakeup mechanism works.
   *
   * Two very simple procedures, poll_wait() and poll_freewait() make all the
   * work.  poll_wait() is an inline-function defined in <linux/poll.h>,
   * as all select/poll functions have to call it to add an entry to the
   * poll table.
   */
75c96f858   Adrian Bunk   [PATCH] make some...
104
105
  static void __pollwait(struct file *filp, wait_queue_head_t *wait_address,
  		       poll_table *p);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
106
107
108
109
  
  void poll_initwait(struct poll_wqueues *pwq)
  {
  	init_poll_funcptr(&pwq->pt, __pollwait);
5f820f648   Tejun Heo   poll: allow f_op-...
110
  	pwq->polling_task = current;
b2add73db   Guillaume Knispel   poll/select: init...
111
  	pwq->triggered = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
112
113
  	pwq->error = 0;
  	pwq->table = NULL;
70674f95c   Andi Kleen   [PATCH] Optimize ...
114
  	pwq->inline_index = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
115
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
116
  EXPORT_SYMBOL(poll_initwait);
70674f95c   Andi Kleen   [PATCH] Optimize ...
117
118
  static void free_poll_entry(struct poll_table_entry *entry)
  {
ccf6780dc   WANG Cong   Style fix in fs/s...
119
  	remove_wait_queue(entry->wait_address, &entry->wait);
70674f95c   Andi Kleen   [PATCH] Optimize ...
120
121
  	fput(entry->filp);
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
122
123
124
  void poll_freewait(struct poll_wqueues *pwq)
  {
  	struct poll_table_page * p = pwq->table;
70674f95c   Andi Kleen   [PATCH] Optimize ...
125
126
127
  	int i;
  	for (i = 0; i < pwq->inline_index; i++)
  		free_poll_entry(pwq->inline_entries + i);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
128
129
130
131
132
133
134
  	while (p) {
  		struct poll_table_entry * entry;
  		struct poll_table_page *old;
  
  		entry = p->entry;
  		do {
  			entry--;
70674f95c   Andi Kleen   [PATCH] Optimize ...
135
  			free_poll_entry(entry);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
136
137
138
139
140
141
  		} while (entry > p->entries);
  		old = p;
  		p = p->next;
  		free_page((unsigned long) old);
  	}
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
142
  EXPORT_SYMBOL(poll_freewait);
5f820f648   Tejun Heo   poll: allow f_op-...
143
  static struct poll_table_entry *poll_get_entry(struct poll_wqueues *p)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
144
  {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
145
  	struct poll_table_page *table = p->table;
70674f95c   Andi Kleen   [PATCH] Optimize ...
146
147
  	if (p->inline_index < N_INLINE_POLL_ENTRIES)
  		return p->inline_entries + p->inline_index++;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
148
149
150
151
152
153
  	if (!table || POLL_TABLE_FULL(table)) {
  		struct poll_table_page *new_table;
  
  		new_table = (struct poll_table_page *) __get_free_page(GFP_KERNEL);
  		if (!new_table) {
  			p->error = -ENOMEM;
70674f95c   Andi Kleen   [PATCH] Optimize ...
154
  			return NULL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
155
156
157
158
159
160
  		}
  		new_table->entry = new_table->entries;
  		new_table->next = table;
  		p->table = new_table;
  		table = new_table;
  	}
70674f95c   Andi Kleen   [PATCH] Optimize ...
161
162
  	return table->entry++;
  }
ac6424b98   Ingo Molnar   sched/wait: Renam...
163
  static int __pollwake(wait_queue_entry_t *wait, unsigned mode, int sync, void *key)
5f820f648   Tejun Heo   poll: allow f_op-...
164
165
166
167
168
169
170
171
172
  {
  	struct poll_wqueues *pwq = wait->private;
  	DECLARE_WAITQUEUE(dummy_wait, pwq->polling_task);
  
  	/*
  	 * Although this function is called under waitqueue lock, LOCK
  	 * doesn't imply write barrier and the users expect write
  	 * barrier semantics on wakeup functions.  The following
  	 * smp_wmb() is equivalent to smp_wmb() in try_to_wake_up()
b92b8b35a   Peter Zijlstra   locking/arch: Ren...
173
  	 * and is paired with smp_store_mb() in poll_schedule_timeout.
5f820f648   Tejun Heo   poll: allow f_op-...
174
175
176
177
178
179
180
181
182
183
184
185
186
187
  	 */
  	smp_wmb();
  	pwq->triggered = 1;
  
  	/*
  	 * Perform the default wake up operation using a dummy
  	 * waitqueue.
  	 *
  	 * TODO: This is hacky but there currently is no interface to
  	 * pass in @sync.  @sync is scheduled to be removed and once
  	 * that happens, wake_up_process() can be used directly.
  	 */
  	return default_wake_function(&dummy_wait, mode, sync, key);
  }
ac6424b98   Ingo Molnar   sched/wait: Renam...
188
  static int pollwake(wait_queue_entry_t *wait, unsigned mode, int sync, void *key)
4938d7e02   Eric Dumazet   poll: avoid extra...
189
190
191
192
  {
  	struct poll_table_entry *entry;
  
  	entry = container_of(wait, struct poll_table_entry, wait);
3ad6f93e9   Al Viro   annotate poll-rel...
193
  	if (key && !(key_to_poll(key) & entry->key))
4938d7e02   Eric Dumazet   poll: avoid extra...
194
195
196
  		return 0;
  	return __pollwake(wait, mode, sync, key);
  }
70674f95c   Andi Kleen   [PATCH] Optimize ...
197
198
199
200
  /* Add a new entry */
  static void __pollwait(struct file *filp, wait_queue_head_t *wait_address,
  				poll_table *p)
  {
5f820f648   Tejun Heo   poll: allow f_op-...
201
202
  	struct poll_wqueues *pwq = container_of(p, struct poll_wqueues, pt);
  	struct poll_table_entry *entry = poll_get_entry(pwq);
70674f95c   Andi Kleen   [PATCH] Optimize ...
203
204
  	if (!entry)
  		return;
cb0942b81   Al Viro   make get_file() r...
205
  	entry->filp = get_file(filp);
70674f95c   Andi Kleen   [PATCH] Optimize ...
206
  	entry->wait_address = wait_address;
626cf2366   Hans Verkuil   poll: add poll_re...
207
  	entry->key = p->_key;
5f820f648   Tejun Heo   poll: allow f_op-...
208
209
  	init_waitqueue_func_entry(&entry->wait, pollwake);
  	entry->wait.private = pwq;
ccf6780dc   WANG Cong   Style fix in fs/s...
210
  	add_wait_queue(wait_address, &entry->wait);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
211
  }
8f546ae1f   Christoph Hellwig   fs: unexport poll...
212
  static int poll_schedule_timeout(struct poll_wqueues *pwq, int state,
5f820f648   Tejun Heo   poll: allow f_op-...
213
214
215
216
217
218
  			  ktime_t *expires, unsigned long slack)
  {
  	int rc = -EINTR;
  
  	set_current_state(state);
  	if (!pwq->triggered)
59612d187   Rafael J. Wysocki   Revert "select: u...
219
  		rc = schedule_hrtimeout_range(expires, slack, HRTIMER_MODE_ABS);
5f820f648   Tejun Heo   poll: allow f_op-...
220
221
222
223
224
  	__set_current_state(TASK_RUNNING);
  
  	/*
  	 * Prepare for the next iteration.
  	 *
b92b8b35a   Peter Zijlstra   locking/arch: Ren...
225
  	 * The following smp_store_mb() serves two purposes.  First, it's
5f820f648   Tejun Heo   poll: allow f_op-...
226
227
228
229
230
231
232
  	 * the counterpart rmb of the wmb in pollwake() such that data
  	 * written before wake up is always visible after wake up.
  	 * Second, the full barrier guarantees that triggered clearing
  	 * doesn't pass event check of the next iteration.  Note that
  	 * this problem doesn't exist for the first iteration as
  	 * add_wait_queue() has full barrier semantics.
  	 */
b92b8b35a   Peter Zijlstra   locking/arch: Ren...
233
  	smp_store_mb(pwq->triggered, 0);
5f820f648   Tejun Heo   poll: allow f_op-...
234
235
236
  
  	return rc;
  }
5f820f648   Tejun Heo   poll: allow f_op-...
237

b773ad40a   Thomas Gleixner   select: add poll_...
238
239
  /**
   * poll_select_set_timeout - helper function to setup the timeout value
766b9f928   Deepa Dinamani   fs: poll/select/r...
240
   * @to:		pointer to timespec64 variable for the final timeout
b773ad40a   Thomas Gleixner   select: add poll_...
241
242
243
244
245
246
247
248
   * @sec:	seconds (from user space)
   * @nsec:	nanoseconds (from user space)
   *
   * Note, we do not use a timespec for the user space value here, That
   * way we can use the function for timeval and compat interfaces as well.
   *
   * Returns -EINVAL if sec/nsec are not normalized. Otherwise 0.
   */
766b9f928   Deepa Dinamani   fs: poll/select/r...
249
  int poll_select_set_timeout(struct timespec64 *to, time64_t sec, long nsec)
b773ad40a   Thomas Gleixner   select: add poll_...
250
  {
766b9f928   Deepa Dinamani   fs: poll/select/r...
251
  	struct timespec64 ts = {.tv_sec = sec, .tv_nsec = nsec};
b773ad40a   Thomas Gleixner   select: add poll_...
252

766b9f928   Deepa Dinamani   fs: poll/select/r...
253
  	if (!timespec64_valid(&ts))
b773ad40a   Thomas Gleixner   select: add poll_...
254
255
256
257
258
259
  		return -EINVAL;
  
  	/* Optimize for the zero timeout value here */
  	if (!sec && !nsec) {
  		to->tv_sec = to->tv_nsec = 0;
  	} else {
766b9f928   Deepa Dinamani   fs: poll/select/r...
260
261
  		ktime_get_ts64(to);
  		*to = timespec64_add_safe(*to, ts);
b773ad40a   Thomas Gleixner   select: add poll_...
262
263
264
  	}
  	return 0;
  }
8bd27a300   Deepa Dinamani   ppoll: use __kern...
265
266
267
268
269
270
  enum poll_time_type {
  	PT_TIMEVAL = 0,
  	PT_OLD_TIMEVAL = 1,
  	PT_TIMESPEC = 2,
  	PT_OLD_TIMESPEC = 3,
  };
ac3010206   Oleg Nesterov   select: shift res...
271
272
273
  static int poll_select_finish(struct timespec64 *end_time,
  			      void __user *p,
  			      enum poll_time_type pt_type, int ret)
b773ad40a   Thomas Gleixner   select: add poll_...
274
  {
36819ad09   Deepa Dinamani   select: Use get/p...
275
  	struct timespec64 rts;
b773ad40a   Thomas Gleixner   select: add poll_...
276

ac3010206   Oleg Nesterov   select: shift res...
277
  	restore_saved_sigmask_unless(ret == -ERESTARTNOHAND);
b773ad40a   Thomas Gleixner   select: add poll_...
278
279
280
281
282
283
284
285
286
  	if (!p)
  		return ret;
  
  	if (current->personality & STICKY_TIMEOUTS)
  		goto sticky;
  
  	/* No update for zero timeout */
  	if (!end_time->tv_sec && !end_time->tv_nsec)
  		return ret;
36819ad09   Deepa Dinamani   select: Use get/p...
287
288
289
290
  	ktime_get_ts64(&rts);
  	rts = timespec64_sub(*end_time, rts);
  	if (rts.tv_sec < 0)
  		rts.tv_sec = rts.tv_nsec = 0;
766b9f928   Deepa Dinamani   fs: poll/select/r...
291

b773ad40a   Thomas Gleixner   select: add poll_...
292

8bd27a300   Deepa Dinamani   ppoll: use __kern...
293
294
295
  	switch (pt_type) {
  	case PT_TIMEVAL:
  		{
75d319c06   Arnd Bergmann   y2038: syscalls: ...
296
  			struct __kernel_old_timeval rtv;
b773ad40a   Thomas Gleixner   select: add poll_...
297

8bd27a300   Deepa Dinamani   ppoll: use __kern...
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
  			if (sizeof(rtv) > sizeof(rtv.tv_sec) + sizeof(rtv.tv_usec))
  				memset(&rtv, 0, sizeof(rtv));
  			rtv.tv_sec = rts.tv_sec;
  			rtv.tv_usec = rts.tv_nsec / NSEC_PER_USEC;
  			if (!copy_to_user(p, &rtv, sizeof(rtv)))
  				return ret;
  		}
  		break;
  	case PT_OLD_TIMEVAL:
  		{
  			struct old_timeval32 rtv;
  
  			rtv.tv_sec = rts.tv_sec;
  			rtv.tv_usec = rts.tv_nsec / NSEC_PER_USEC;
  			if (!copy_to_user(p, &rtv, sizeof(rtv)))
  				return ret;
  		}
  		break;
  	case PT_TIMESPEC:
  		if (!put_timespec64(&rts, p))
b773ad40a   Thomas Gleixner   select: add poll_...
318
  			return ret;
8bd27a300   Deepa Dinamani   ppoll: use __kern...
319
320
321
322
323
324
325
326
  		break;
  	case PT_OLD_TIMESPEC:
  		if (!put_old_timespec32(&rts, p))
  			return ret;
  		break;
  	default:
  		BUG();
  	}
b773ad40a   Thomas Gleixner   select: add poll_...
327
328
329
330
331
332
333
334
335
336
337
338
339
  	/*
  	 * If an application puts its timeval in read-only memory, we
  	 * don't want the Linux-specific update to the timeval to
  	 * cause a fault after the select has completed
  	 * successfully. However, because we're not updating the
  	 * timeval, we can't restart the system call.
  	 */
  
  sticky:
  	if (ret == -ERESTARTNOHAND)
  		ret = -EINTR;
  	return ret;
  }
e99ca56ce   Al Viro   move compat selec...
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
  /*
   * Scalable version of the fd_set.
   */
  
  typedef struct {
  	unsigned long *in, *out, *ex;
  	unsigned long *res_in, *res_out, *res_ex;
  } fd_set_bits;
  
  /*
   * How many longwords for "nr" bits?
   */
  #define FDS_BITPERLONG	(8*sizeof(long))
  #define FDS_LONGS(nr)	(((nr)+FDS_BITPERLONG-1)/FDS_BITPERLONG)
  #define FDS_BYTES(nr)	(FDS_LONGS(nr)*sizeof(long))
  
  /*
e99ca56ce   Al Viro   move compat selec...
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
   * Use "unsigned long" accesses to let user-mode fd_set's be long-aligned.
   */
  static inline
  int get_fd_set(unsigned long nr, void __user *ufdset, unsigned long *fdset)
  {
  	nr = FDS_BYTES(nr);
  	if (ufdset)
  		return copy_from_user(fdset, ufdset, nr) ? -EFAULT : 0;
  
  	memset(fdset, 0, nr);
  	return 0;
  }
  
  static inline unsigned long __must_check
  set_fd_set(unsigned long nr, void __user *ufdset, unsigned long *fdset)
  {
  	if (ufdset)
  		return __copy_to_user(ufdset, fdset, FDS_BYTES(nr));
  	return 0;
  }
  
  static inline
  void zero_fd_set(unsigned long nr, unsigned long *fdset)
  {
  	memset(fdset, 0, FDS_BYTES(nr));
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
383
384
385
386
387
388
389
390
391
392
393
  #define FDS_IN(fds, n)		(fds->in + n)
  #define FDS_OUT(fds, n)		(fds->out + n)
  #define FDS_EX(fds, n)		(fds->ex + n)
  
  #define BITS(fds, n)	(*FDS_IN(fds, n)|*FDS_OUT(fds, n)|*FDS_EX(fds, n))
  
  static int max_select_fd(unsigned long n, fd_set_bits *fds)
  {
  	unsigned long *open_fds;
  	unsigned long set;
  	int max;
badf16621   Dipankar Sarma   [PATCH] files: br...
394
  	struct fdtable *fdt;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
395
396
  
  	/* handle last in-complete long-word first */
8ded2bbc1   Josh Boyer   posix_types.h: Cl...
397
398
  	set = ~(~0UL << (n & (BITS_PER_LONG-1)));
  	n /= BITS_PER_LONG;
badf16621   Dipankar Sarma   [PATCH] files: br...
399
  	fdt = files_fdtable(current->files);
1fd36adcd   David Howells   Replace the fd_se...
400
  	open_fds = fdt->open_fds + n;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
  	max = 0;
  	if (set) {
  		set &= BITS(fds, n);
  		if (set) {
  			if (!(set & ~*open_fds))
  				goto get_max;
  			return -EBADF;
  		}
  	}
  	while (n) {
  		open_fds--;
  		n--;
  		set = BITS(fds, n);
  		if (!set)
  			continue;
  		if (set & ~*open_fds)
  			return -EBADF;
  		if (max)
  			continue;
  get_max:
  		do {
  			max++;
  			set >>= 1;
  		} while (set);
8ded2bbc1   Josh Boyer   posix_types.h: Cl...
425
  		max += n * BITS_PER_LONG;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
426
427
428
429
  	}
  
  	return max;
  }
a9a08845e   Linus Torvalds   vfs: do bulk POLL...
430
431
432
  #define POLLIN_SET (EPOLLRDNORM | EPOLLRDBAND | EPOLLIN | EPOLLHUP | EPOLLERR)
  #define POLLOUT_SET (EPOLLWRBAND | EPOLLWRNORM | EPOLLOUT | EPOLLERR)
  #define POLLEX_SET (EPOLLPRI)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
433

4938d7e02   Eric Dumazet   poll: avoid extra...
434
  static inline void wait_key_set(poll_table *wait, unsigned long in,
2d48d67fa   Eliezer Tamir   net: poll/select ...
435
  				unsigned long out, unsigned long bit,
016994377   Al Viro   annotate poll_tab...
436
  				__poll_t ll_flag)
4938d7e02   Eric Dumazet   poll: avoid extra...
437
  {
2d48d67fa   Eliezer Tamir   net: poll/select ...
438
  	wait->_key = POLLEX_SET | ll_flag;
626cf2366   Hans Verkuil   poll: add poll_re...
439
440
441
442
  	if (in & bit)
  		wait->_key |= POLLIN_SET;
  	if (out & bit)
  		wait->_key |= POLLOUT_SET;
4938d7e02   Eric Dumazet   poll: avoid extra...
443
  }
e99ca56ce   Al Viro   move compat selec...
444
  static int do_select(int n, fd_set_bits *fds, struct timespec64 *end_time)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
445
  {
8ff3e8e85   Arjan van de Ven   select: switch se...
446
  	ktime_t expire, *to = NULL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
447
448
  	struct poll_wqueues table;
  	poll_table *wait;
8ff3e8e85   Arjan van de Ven   select: switch se...
449
  	int retval, i, timed_out = 0;
da8b44d5a   John Stultz   timer: convert ti...
450
  	u64 slack = 0;
016994377   Al Viro   annotate poll_tab...
451
  	__poll_t busy_flag = net_busy_loop_on() ? POLL_BUSY_LOOP : 0;
37056719b   Alexander Duyck   net: Track start ...
452
  	unsigned long busy_start = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
453

b835996f6   Dipankar Sarma   [PATCH] files: lo...
454
  	rcu_read_lock();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
455
  	retval = max_select_fd(n, fds);
b835996f6   Dipankar Sarma   [PATCH] files: lo...
456
  	rcu_read_unlock();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
457
458
459
460
461
462
463
  
  	if (retval < 0)
  		return retval;
  	n = retval;
  
  	poll_initwait(&table);
  	wait = &table.pt;
8ff3e8e85   Arjan van de Ven   select: switch se...
464
  	if (end_time && !end_time->tv_sec && !end_time->tv_nsec) {
626cf2366   Hans Verkuil   poll: add poll_re...
465
  		wait->_qproc = NULL;
8ff3e8e85   Arjan van de Ven   select: switch se...
466
467
  		timed_out = 1;
  	}
96d2ab484   Arjan van de Ven   hrtimer: fix sign...
468
  	if (end_time && !timed_out)
231f3d393   Andrew Morton   select: rename es...
469
  		slack = select_estimate_accuracy(end_time);
90d6e24a3   Arjan van de Ven   hrtimer: make sel...
470

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
471
472
473
  	retval = 0;
  	for (;;) {
  		unsigned long *rinp, *routp, *rexp, *inp, *outp, *exp;
cbf55001b   Eliezer Tamir   net: rename low l...
474
  		bool can_busy_loop = false;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
475

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
476
477
478
479
  		inp = fds->in; outp = fds->out; exp = fds->ex;
  		rinp = fds->res_in; routp = fds->res_out; rexp = fds->res_ex;
  
  		for (i = 0; i < n; ++rinp, ++routp, ++rexp) {
e6c8adca2   Al Viro   anntotate the pla...
480
  			unsigned long in, out, ex, all_bits, bit = 1, j;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
481
  			unsigned long res_in = 0, res_out = 0, res_ex = 0;
e6c8adca2   Al Viro   anntotate the pla...
482
  			__poll_t mask;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
483
484
485
486
  
  			in = *inp++; out = *outp++; ex = *exp++;
  			all_bits = in | out | ex;
  			if (all_bits == 0) {
8ded2bbc1   Josh Boyer   posix_types.h: Cl...
487
  				i += BITS_PER_LONG;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
488
489
  				continue;
  			}
8ded2bbc1   Josh Boyer   posix_types.h: Cl...
490
  			for (j = 0; j < BITS_PER_LONG; ++j, ++i, bit <<= 1) {
2903ff019   Al Viro   switch simple cas...
491
  				struct fd f;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
492
493
494
495
  				if (i >= n)
  					break;
  				if (!(bit & all_bits))
  					continue;
2903ff019   Al Viro   switch simple cas...
496
497
  				f = fdget(i);
  				if (f.file) {
9965ed174   Christoph Hellwig   fs: add new vfs_p...
498
499
500
  					wait_key_set(wait, in, out, bit,
  						     busy_flag);
  					mask = vfs_poll(f.file, wait);
2903ff019   Al Viro   switch simple cas...
501
  					fdput(f);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
502
503
504
  					if ((mask & POLLIN_SET) && (in & bit)) {
  						res_in |= bit;
  						retval++;
626cf2366   Hans Verkuil   poll: add poll_re...
505
  						wait->_qproc = NULL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
506
507
508
509
  					}
  					if ((mask & POLLOUT_SET) && (out & bit)) {
  						res_out |= bit;
  						retval++;
626cf2366   Hans Verkuil   poll: add poll_re...
510
  						wait->_qproc = NULL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
511
512
513
514
  					}
  					if ((mask & POLLEX_SET) && (ex & bit)) {
  						res_ex |= bit;
  						retval++;
626cf2366   Hans Verkuil   poll: add poll_re...
515
  						wait->_qproc = NULL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
516
  					}
2d48d67fa   Eliezer Tamir   net: poll/select ...
517
  					/* got something, stop busy polling */
cbf55001b   Eliezer Tamir   net: rename low l...
518
519
520
521
522
523
524
525
526
527
  					if (retval) {
  						can_busy_loop = false;
  						busy_flag = 0;
  
  					/*
  					 * only remember a returned
  					 * POLL_BUSY_LOOP if we asked for it
  					 */
  					} else if (busy_flag & mask)
  						can_busy_loop = true;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
528
  				}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
529
530
531
532
533
534
535
  			}
  			if (res_in)
  				*rinp = res_in;
  			if (res_out)
  				*routp = res_out;
  			if (res_ex)
  				*rexp = res_ex;
55d853849   Linus Torvalds   Fix performance r...
536
  			cond_resched();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
537
  		}
626cf2366   Hans Verkuil   poll: add poll_re...
538
  		wait->_qproc = NULL;
8ff3e8e85   Arjan van de Ven   select: switch se...
539
  		if (retval || timed_out || signal_pending(current))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
540
  			break;
f5264481c   Pavel Machek   trivial: small cl...
541
  		if (table.error) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
542
543
544
  			retval = table.error;
  			break;
  		}
9f72949f6   David Woodhouse   [PATCH] Add psele...
545

cbf55001b   Eliezer Tamir   net: rename low l...
546
  		/* only if found POLL_BUSY_LOOP sockets && not out of time */
76b1e9b98   Eliezer Tamir   net/fs: change bu...
547
  		if (can_busy_loop && !need_resched()) {
37056719b   Alexander Duyck   net: Track start ...
548
549
  			if (!busy_start) {
  				busy_start = busy_loop_current_time();
76b1e9b98   Eliezer Tamir   net/fs: change bu...
550
551
  				continue;
  			}
37056719b   Alexander Duyck   net: Track start ...
552
  			if (!busy_loop_timeout(busy_start))
76b1e9b98   Eliezer Tamir   net/fs: change bu...
553
554
555
  				continue;
  		}
  		busy_flag = 0;
2d48d67fa   Eliezer Tamir   net: poll/select ...
556

8ff3e8e85   Arjan van de Ven   select: switch se...
557
558
559
560
561
562
  		/*
  		 * If this is the first loop and we have a timeout
  		 * given, then we convert to ktime_t and set the to
  		 * pointer to the expiry value.
  		 */
  		if (end_time && !to) {
766b9f928   Deepa Dinamani   fs: poll/select/r...
563
  			expire = timespec64_to_ktime(*end_time);
8ff3e8e85   Arjan van de Ven   select: switch se...
564
  			to = &expire;
9f72949f6   David Woodhouse   [PATCH] Add psele...
565
  		}
8ff3e8e85   Arjan van de Ven   select: switch se...
566

5f820f648   Tejun Heo   poll: allow f_op-...
567
568
  		if (!poll_schedule_timeout(&table, TASK_INTERRUPTIBLE,
  					   to, slack))
8ff3e8e85   Arjan van de Ven   select: switch se...
569
  			timed_out = 1;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
570
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
571
572
  
  	poll_freewait(&table);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
573
574
  	return retval;
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
575
576
577
578
579
580
581
582
  /*
   * We can actually return ERESTARTSYS instead of EINTR, but I'd
   * like to be certain this leads to no problems. So I return
   * EINTR just for safety.
   *
   * Update: ERESTARTSYS breaks at least the xview clock binary, so
   * I'm trying ERESTARTNOHAND which restart only when you want to.
   */
a2dcb44c3   Al Viro   [PATCH] make osf_...
583
  int core_sys_select(int n, fd_set __user *inp, fd_set __user *outp,
766b9f928   Deepa Dinamani   fs: poll/select/r...
584
  			   fd_set __user *exp, struct timespec64 *end_time)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
585
586
  {
  	fd_set_bits fds;
29ff2db55   Andrew Morton   [PATCH] select() ...
587
  	void *bits;
bbea9f696   Vadim Lobanov   [PATCH] fdtable: ...
588
  	int ret, max_fds;
2d19309cf   Vlastimil Babka   fs/select: add vm...
589
  	size_t size, alloc_size;
badf16621   Dipankar Sarma   [PATCH] files: br...
590
  	struct fdtable *fdt;
70674f95c   Andi Kleen   [PATCH] Optimize ...
591
  	/* Allocate small arguments on the stack to save memory and be faster */
30c14e40e   Jes Sorensen   [PATCH] avoid una...
592
  	long stack_fds[SELECT_STACK_ALLOC/sizeof(long)];
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
593

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
594
595
596
  	ret = -EINVAL;
  	if (n < 0)
  		goto out_nofds;
bbea9f696   Vadim Lobanov   [PATCH] fdtable: ...
597
  	/* max_fds can increase, so grab it once to avoid race */
b835996f6   Dipankar Sarma   [PATCH] files: lo...
598
  	rcu_read_lock();
badf16621   Dipankar Sarma   [PATCH] files: br...
599
  	fdt = files_fdtable(current->files);
bbea9f696   Vadim Lobanov   [PATCH] fdtable: ...
600
  	max_fds = fdt->max_fds;
b835996f6   Dipankar Sarma   [PATCH] files: lo...
601
  	rcu_read_unlock();
bbea9f696   Vadim Lobanov   [PATCH] fdtable: ...
602
603
  	if (n > max_fds)
  		n = max_fds;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
604
605
606
607
608
609
  
  	/*
  	 * We need 6 bitmaps (in/out/ex for both incoming and outgoing),
  	 * since we used fdset we need to allocate memory in units of
  	 * long-words. 
  	 */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
610
  	size = FDS_BYTES(n);
b04eb6aa0   Mitchell Blank Jr   [PATCH] select: d...
611
612
613
614
  	bits = stack_fds;
  	if (size > sizeof(stack_fds) / 6) {
  		/* Not enough space in on-stack array; must use kmalloc */
  		ret = -ENOMEM;
2d19309cf   Vlastimil Babka   fs/select: add vm...
615
616
617
618
  		if (size > (SIZE_MAX / 6))
  			goto out_nofds;
  
  		alloc_size = 6 * size;
752ade68c   Michal Hocko   treewide: use kv[...
619
  		bits = kvmalloc(alloc_size, GFP_KERNEL);
b04eb6aa0   Mitchell Blank Jr   [PATCH] select: d...
620
621
622
  		if (!bits)
  			goto out_nofds;
  	}
29ff2db55   Andrew Morton   [PATCH] select() ...
623
624
625
626
627
628
  	fds.in      = bits;
  	fds.out     = bits +   size;
  	fds.ex      = bits + 2*size;
  	fds.res_in  = bits + 3*size;
  	fds.res_out = bits + 4*size;
  	fds.res_ex  = bits + 5*size;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
629
630
631
632
633
634
635
636
  
  	if ((ret = get_fd_set(n, inp, fds.in)) ||
  	    (ret = get_fd_set(n, outp, fds.out)) ||
  	    (ret = get_fd_set(n, exp, fds.ex)))
  		goto out;
  	zero_fd_set(n, fds.res_in);
  	zero_fd_set(n, fds.res_out);
  	zero_fd_set(n, fds.res_ex);
8ff3e8e85   Arjan van de Ven   select: switch se...
637
  	ret = do_select(n, &fds, end_time);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
  
  	if (ret < 0)
  		goto out;
  	if (!ret) {
  		ret = -ERESTARTNOHAND;
  		if (signal_pending(current))
  			goto out;
  		ret = 0;
  	}
  
  	if (set_fd_set(n, inp, fds.res_in) ||
  	    set_fd_set(n, outp, fds.res_out) ||
  	    set_fd_set(n, exp, fds.res_ex))
  		ret = -EFAULT;
  
  out:
70674f95c   Andi Kleen   [PATCH] Optimize ...
654
  	if (bits != stack_fds)
2d19309cf   Vlastimil Babka   fs/select: add vm...
655
  		kvfree(bits);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
656
657
658
  out_nofds:
  	return ret;
  }
4bdb9acab   Dominik Brodowski   fs: add kern_sele...
659
  static int kern_select(int n, fd_set __user *inp, fd_set __user *outp,
75d319c06   Arnd Bergmann   y2038: syscalls: ...
660
  		       fd_set __user *exp, struct __kernel_old_timeval __user *tvp)
9f72949f6   David Woodhouse   [PATCH] Add psele...
661
  {
766b9f928   Deepa Dinamani   fs: poll/select/r...
662
  	struct timespec64 end_time, *to = NULL;
75d319c06   Arnd Bergmann   y2038: syscalls: ...
663
  	struct __kernel_old_timeval tv;
9f72949f6   David Woodhouse   [PATCH] Add psele...
664
665
666
667
668
  	int ret;
  
  	if (tvp) {
  		if (copy_from_user(&tv, tvp, sizeof(tv)))
  			return -EFAULT;
8ff3e8e85   Arjan van de Ven   select: switch se...
669
  		to = &end_time;
4d36a9e65   Arjan van de Ven   select: deal with...
670
671
672
  		if (poll_select_set_timeout(to,
  				tv.tv_sec + (tv.tv_usec / USEC_PER_SEC),
  				(tv.tv_usec % USEC_PER_SEC) * NSEC_PER_USEC))
9f72949f6   David Woodhouse   [PATCH] Add psele...
673
  			return -EINVAL;
9f72949f6   David Woodhouse   [PATCH] Add psele...
674
  	}
8ff3e8e85   Arjan van de Ven   select: switch se...
675
  	ret = core_sys_select(n, inp, outp, exp, to);
ac3010206   Oleg Nesterov   select: shift res...
676
  	return poll_select_finish(&end_time, tvp, PT_TIMEVAL, ret);
9f72949f6   David Woodhouse   [PATCH] Add psele...
677
  }
4bdb9acab   Dominik Brodowski   fs: add kern_sele...
678
  SYSCALL_DEFINE5(select, int, n, fd_set __user *, inp, fd_set __user *, outp,
75d319c06   Arnd Bergmann   y2038: syscalls: ...
679
  		fd_set __user *, exp, struct __kernel_old_timeval __user *, tvp)
4bdb9acab   Dominik Brodowski   fs: add kern_sele...
680
681
682
  {
  	return kern_select(n, inp, outp, exp, tvp);
  }
c9da9f212   Heiko Carstens   [CVE-2009-0029] M...
683
  static long do_pselect(int n, fd_set __user *inp, fd_set __user *outp,
e024707bc   Deepa Dinamani   pselect6: use __k...
684
685
686
  		       fd_set __user *exp, void __user *tsp,
  		       const sigset_t __user *sigmask, size_t sigsetsize,
  		       enum poll_time_type type)
9f72949f6   David Woodhouse   [PATCH] Add psele...
687
  {
36819ad09   Deepa Dinamani   select: Use get/p...
688
  	struct timespec64 ts, end_time, *to = NULL;
9f72949f6   David Woodhouse   [PATCH] Add psele...
689
690
691
  	int ret;
  
  	if (tsp) {
e024707bc   Deepa Dinamani   pselect6: use __k...
692
693
694
695
696
697
698
699
700
701
702
703
  		switch (type) {
  		case PT_TIMESPEC:
  			if (get_timespec64(&ts, tsp))
  				return -EFAULT;
  			break;
  		case PT_OLD_TIMESPEC:
  			if (get_old_timespec32(&ts, tsp))
  				return -EFAULT;
  			break;
  		default:
  			BUG();
  		}
9f72949f6   David Woodhouse   [PATCH] Add psele...
704

8ff3e8e85   Arjan van de Ven   select: switch se...
705
  		to = &end_time;
36819ad09   Deepa Dinamani   select: Use get/p...
706
  		if (poll_select_set_timeout(to, ts.tv_sec, ts.tv_nsec))
9f72949f6   David Woodhouse   [PATCH] Add psele...
707
  			return -EINVAL;
9f72949f6   David Woodhouse   [PATCH] Add psele...
708
  	}
b772434be   Oleg Nesterov   signal: simplify ...
709
  	ret = set_user_sigmask(sigmask, sigsetsize);
ded653ccb   Deepa Dinamani   signal: Add set_u...
710
711
  	if (ret)
  		return ret;
9f72949f6   David Woodhouse   [PATCH] Add psele...
712

62568510b   Bernd Schmidt   Fix timeouts in s...
713
  	ret = core_sys_select(n, inp, outp, exp, to);
ac3010206   Oleg Nesterov   select: shift res...
714
  	return poll_select_finish(&end_time, tsp, type, ret);
9f72949f6   David Woodhouse   [PATCH] Add psele...
715
716
717
718
719
720
721
722
  }
  
  /*
   * Most architectures can't handle 7-argument syscalls. So we provide a
   * 6-argument version where the sixth argument is a pointer to a structure
   * which has a pointer to the sigset_t itself followed by a size_t containing
   * the sigset size.
   */
7e71609f6   Al Viro   pselect6() and fr...
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
  struct sigset_argpack {
  	sigset_t __user *p;
  	size_t size;
  };
  
  static inline int get_sigset_argpack(struct sigset_argpack *to,
  				     struct sigset_argpack __user *from)
  {
  	// the path is hot enough for overhead of copy_from_user() to matter
  	if (from) {
  		if (!user_read_access_begin(from, sizeof(*from)))
  			return -EFAULT;
  		unsafe_get_user(to->p, &from->p, Efault);
  		unsafe_get_user(to->size, &from->size, Efault);
  		user_read_access_end();
  	}
  	return 0;
  Efault:
  	user_access_end();
  	return -EFAULT;
  }
d4e82042c   Heiko Carstens   [CVE-2009-0029] S...
744
  SYSCALL_DEFINE6(pselect6, int, n, fd_set __user *, inp, fd_set __user *, outp,
e024707bc   Deepa Dinamani   pselect6: use __k...
745
746
747
  		fd_set __user *, exp, struct __kernel_timespec __user *, tsp,
  		void __user *, sig)
  {
7e71609f6   Al Viro   pselect6() and fr...
748
749
750
751
  	struct sigset_argpack x = {NULL, 0};
  
  	if (get_sigset_argpack(&x, sig))
  		return -EFAULT;
e024707bc   Deepa Dinamani   pselect6: use __k...
752

7e71609f6   Al Viro   pselect6() and fr...
753
  	return do_pselect(n, inp, outp, exp, tsp, x.p, x.size, PT_TIMESPEC);
e024707bc   Deepa Dinamani   pselect6: use __k...
754
755
756
757
758
759
  }
  
  #if defined(CONFIG_COMPAT_32BIT_TIME) && !defined(CONFIG_64BIT)
  
  SYSCALL_DEFINE6(pselect6_time32, int, n, fd_set __user *, inp, fd_set __user *, outp,
  		fd_set __user *, exp, struct old_timespec32 __user *, tsp,
d4e82042c   Heiko Carstens   [CVE-2009-0029] S...
760
  		void __user *, sig)
9f72949f6   David Woodhouse   [PATCH] Add psele...
761
  {
7e71609f6   Al Viro   pselect6() and fr...
762
763
764
765
  	struct sigset_argpack x = {NULL, 0};
  
  	if (get_sigset_argpack(&x, sig))
  		return -EFAULT;
9f72949f6   David Woodhouse   [PATCH] Add psele...
766

7e71609f6   Al Viro   pselect6() and fr...
767
  	return do_pselect(n, inp, outp, exp, tsp, x.p, x.size, PT_OLD_TIMESPEC);
9f72949f6   David Woodhouse   [PATCH] Add psele...
768
  }
9f72949f6   David Woodhouse   [PATCH] Add psele...
769

e024707bc   Deepa Dinamani   pselect6: use __k...
770
  #endif
5d0e52830   Christoph Hellwig   Add generic sys_o...
771
772
773
774
  #ifdef __ARCH_WANT_SYS_OLD_SELECT
  struct sel_arg_struct {
  	unsigned long n;
  	fd_set __user *inp, *outp, *exp;
75d319c06   Arnd Bergmann   y2038: syscalls: ...
775
  	struct __kernel_old_timeval __user *tvp;
5d0e52830   Christoph Hellwig   Add generic sys_o...
776
777
778
779
780
781
782
783
  };
  
  SYSCALL_DEFINE1(old_select, struct sel_arg_struct __user *, arg)
  {
  	struct sel_arg_struct a;
  
  	if (copy_from_user(&a, arg, sizeof(a)))
  		return -EFAULT;
4bdb9acab   Dominik Brodowski   fs: add kern_sele...
784
  	return kern_select(a.n, a.inp, a.outp, a.exp, a.tvp);
5d0e52830   Christoph Hellwig   Add generic sys_o...
785
786
  }
  #endif
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
787
788
789
  struct poll_list {
  	struct poll_list *next;
  	int len;
5e01fdff0   Gustavo A. R. Silva   fs: Replace zero-...
790
  	struct pollfd entries[];
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
791
792
793
  };
  
  #define POLLFD_PER_PAGE  ((PAGE_SIZE-sizeof(struct poll_list)) / sizeof(struct pollfd))
4a4b69f79   Vadim Lobanov   [PATCH] Poll clea...
794
795
796
797
798
  /*
   * Fish for pollable events on the pollfd->fd file descriptor. We're only
   * interested in events matching the pollfd->events mask, and the result
   * matching that mask is both recorded in pollfd->revents and returned. The
   * pwait poll_table will be used by the fd-provided poll handler for waiting,
626cf2366   Hans Verkuil   poll: add poll_re...
799
   * if pwait->_qproc is non-NULL.
4a4b69f79   Vadim Lobanov   [PATCH] Poll clea...
800
   */
fb3679372   Al Viro   annotate poll(2) ...
801
  static inline __poll_t do_pollfd(struct pollfd *pollfd, poll_table *pwait,
cbf55001b   Eliezer Tamir   net: rename low l...
802
  				     bool *can_busy_poll,
fb3679372   Al Viro   annotate poll(2) ...
803
  				     __poll_t busy_flag)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
804
  {
a0f8dcfc6   Christoph Hellwig   fs: cleanup do_po...
805
806
807
808
809
810
811
812
813
814
815
816
817
  	int fd = pollfd->fd;
  	__poll_t mask = 0, filter;
  	struct fd f;
  
  	if (fd < 0)
  		goto out;
  	mask = EPOLLNVAL;
  	f = fdget(fd);
  	if (!f.file)
  		goto out;
  
  	/* userland u16 ->events contains POLL... bitmap */
  	filter = demangle_poll(pollfd->events) | EPOLLERR | EPOLLHUP;
9965ed174   Christoph Hellwig   fs: add new vfs_p...
818
819
820
821
  	pwait->_key = filter | busy_flag;
  	mask = vfs_poll(f.file, pwait);
  	if (mask & busy_flag)
  		*can_busy_poll = true;
a0f8dcfc6   Christoph Hellwig   fs: cleanup do_po...
822
823
824
825
  	mask &= filter;		/* Mask out unneeded events. */
  	fdput(f);
  
  out:
fb3679372   Al Viro   annotate poll(2) ...
826
  	/* ... and so does ->revents */
c71d227fc   Al Viro   make kernel-side ...
827
  	pollfd->revents = mangle_poll(mask);
4a4b69f79   Vadim Lobanov   [PATCH] Poll clea...
828
  	return mask;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
829
  }
ccec5ee30   Mateusz Guzik   poll: plug an unu...
830
  static int do_poll(struct poll_list *list, struct poll_wqueues *wait,
766b9f928   Deepa Dinamani   fs: poll/select/r...
831
  		   struct timespec64 *end_time)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
832
  {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
833
  	poll_table* pt = &wait->pt;
8ff3e8e85   Arjan van de Ven   select: switch se...
834
835
  	ktime_t expire, *to = NULL;
  	int timed_out = 0, count = 0;
da8b44d5a   John Stultz   timer: convert ti...
836
  	u64 slack = 0;
fb3679372   Al Viro   annotate poll(2) ...
837
  	__poll_t busy_flag = net_busy_loop_on() ? POLL_BUSY_LOOP : 0;
37056719b   Alexander Duyck   net: Track start ...
838
  	unsigned long busy_start = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
839

9f72949f6   David Woodhouse   [PATCH] Add psele...
840
  	/* Optimise the no-wait case */
8ff3e8e85   Arjan van de Ven   select: switch se...
841
  	if (end_time && !end_time->tv_sec && !end_time->tv_nsec) {
626cf2366   Hans Verkuil   poll: add poll_re...
842
  		pt->_qproc = NULL;
8ff3e8e85   Arjan van de Ven   select: switch se...
843
844
  		timed_out = 1;
  	}
9bf084f70   Oleg Nesterov   do_poll: return -...
845

96d2ab484   Arjan van de Ven   hrtimer: fix sign...
846
  	if (end_time && !timed_out)
231f3d393   Andrew Morton   select: rename es...
847
  		slack = select_estimate_accuracy(end_time);
90d6e24a3   Arjan van de Ven   hrtimer: make sel...
848

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
849
850
  	for (;;) {
  		struct poll_list *walk;
cbf55001b   Eliezer Tamir   net: rename low l...
851
  		bool can_busy_loop = false;
9f72949f6   David Woodhouse   [PATCH] Add psele...
852

4a4b69f79   Vadim Lobanov   [PATCH] Poll clea...
853
854
855
856
857
858
859
860
  		for (walk = list; walk != NULL; walk = walk->next) {
  			struct pollfd * pfd, * pfd_end;
  
  			pfd = walk->entries;
  			pfd_end = pfd + walk->len;
  			for (; pfd != pfd_end; pfd++) {
  				/*
  				 * Fish for events. If we found one, record it
626cf2366   Hans Verkuil   poll: add poll_re...
861
  				 * and kill poll_table->_qproc, so we don't
4a4b69f79   Vadim Lobanov   [PATCH] Poll clea...
862
863
864
865
  				 * needlessly register any other waiters after
  				 * this. They'll get immediately deregistered
  				 * when we break out and return.
  				 */
cbf55001b   Eliezer Tamir   net: rename low l...
866
867
  				if (do_pollfd(pfd, pt, &can_busy_loop,
  					      busy_flag)) {
4a4b69f79   Vadim Lobanov   [PATCH] Poll clea...
868
  					count++;
626cf2366   Hans Verkuil   poll: add poll_re...
869
  					pt->_qproc = NULL;
cbf55001b   Eliezer Tamir   net: rename low l...
870
871
872
  					/* found something, stop busy polling */
  					busy_flag = 0;
  					can_busy_loop = false;
4a4b69f79   Vadim Lobanov   [PATCH] Poll clea...
873
874
  				}
  			}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
875
  		}
4a4b69f79   Vadim Lobanov   [PATCH] Poll clea...
876
877
  		/*
  		 * All waiters have already been registered, so don't provide
626cf2366   Hans Verkuil   poll: add poll_re...
878
  		 * a poll_table->_qproc to them on the next loop iteration.
4a4b69f79   Vadim Lobanov   [PATCH] Poll clea...
879
  		 */
626cf2366   Hans Verkuil   poll: add poll_re...
880
  		pt->_qproc = NULL;
9bf084f70   Oleg Nesterov   do_poll: return -...
881
882
883
  		if (!count) {
  			count = wait->error;
  			if (signal_pending(current))
8cf8b5539   Oleg Nesterov   select: change do...
884
  				count = -ERESTARTNOHAND;
9bf084f70   Oleg Nesterov   do_poll: return -...
885
  		}
8ff3e8e85   Arjan van de Ven   select: switch se...
886
  		if (count || timed_out)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
887
  			break;
9f72949f6   David Woodhouse   [PATCH] Add psele...
888

cbf55001b   Eliezer Tamir   net: rename low l...
889
  		/* only if found POLL_BUSY_LOOP sockets && not out of time */
76b1e9b98   Eliezer Tamir   net/fs: change bu...
890
  		if (can_busy_loop && !need_resched()) {
37056719b   Alexander Duyck   net: Track start ...
891
892
  			if (!busy_start) {
  				busy_start = busy_loop_current_time();
76b1e9b98   Eliezer Tamir   net/fs: change bu...
893
894
  				continue;
  			}
37056719b   Alexander Duyck   net: Track start ...
895
  			if (!busy_loop_timeout(busy_start))
76b1e9b98   Eliezer Tamir   net/fs: change bu...
896
897
898
  				continue;
  		}
  		busy_flag = 0;
91e2fd337   Eliezer Tamir   net: avoid callin...
899

8ff3e8e85   Arjan van de Ven   select: switch se...
900
901
902
903
904
905
  		/*
  		 * If this is the first loop and we have a timeout
  		 * given, then we convert to ktime_t and set the to
  		 * pointer to the expiry value.
  		 */
  		if (end_time && !to) {
766b9f928   Deepa Dinamani   fs: poll/select/r...
906
  			expire = timespec64_to_ktime(*end_time);
8ff3e8e85   Arjan van de Ven   select: switch se...
907
  			to = &expire;
9f72949f6   David Woodhouse   [PATCH] Add psele...
908
  		}
5f820f648   Tejun Heo   poll: allow f_op-...
909
  		if (!poll_schedule_timeout(wait, TASK_INTERRUPTIBLE, to, slack))
8ff3e8e85   Arjan van de Ven   select: switch se...
910
  			timed_out = 1;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
911
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
912
913
  	return count;
  }
70674f95c   Andi Kleen   [PATCH] Optimize ...
914
915
  #define N_STACK_PPS ((sizeof(stack_pps) - sizeof(struct poll_list))  / \
  			sizeof(struct pollfd))
e99ca56ce   Al Viro   move compat selec...
916
  static int do_sys_poll(struct pollfd __user *ufds, unsigned int nfds,
766b9f928   Deepa Dinamani   fs: poll/select/r...
917
  		struct timespec64 *end_time)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
918
919
  {
  	struct poll_wqueues table;
43e11fa2d   Gustavo A. R. Silva   fs/select.c: use ...
920
  	int err = -EFAULT, fdcount, len;
30c14e40e   Jes Sorensen   [PATCH] avoid una...
921
922
923
924
  	/* Allocate small arguments on the stack to save memory and be
  	   faster - use long to make sure the buffer is aligned properly
  	   on 64 bit archs to avoid unaligned access */
  	long stack_pps[POLL_STACK_ALLOC/sizeof(long)];
252e5725c   Oleg Nesterov   do_sys_poll: simp...
925
926
927
  	struct poll_list *const head = (struct poll_list *)stack_pps;
   	struct poll_list *walk = head;
   	unsigned long todo = nfds;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
928

d554ed895   Jiri Slaby   fs: use rlimit he...
929
  	if (nfds > rlimit(RLIMIT_NOFILE))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
930
  		return -EINVAL;
252e5725c   Oleg Nesterov   do_sys_poll: simp...
931
932
933
934
935
936
  	len = min_t(unsigned int, nfds, N_STACK_PPS);
  	for (;;) {
  		walk->next = NULL;
  		walk->len = len;
  		if (!len)
  			break;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
937

252e5725c   Oleg Nesterov   do_sys_poll: simp...
938
939
940
941
942
943
944
  		if (copy_from_user(walk->entries, ufds + nfds-todo,
  					sizeof(struct pollfd) * walk->len))
  			goto out_fds;
  
  		todo -= walk->len;
  		if (!todo)
  			break;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
945

252e5725c   Oleg Nesterov   do_sys_poll: simp...
946
  		len = min(todo, POLLFD_PER_PAGE);
43e11fa2d   Gustavo A. R. Silva   fs/select.c: use ...
947
948
  		walk = walk->next = kmalloc(struct_size(walk, entries, len),
  					    GFP_KERNEL);
252e5725c   Oleg Nesterov   do_sys_poll: simp...
949
950
  		if (!walk) {
  			err = -ENOMEM;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
951
952
  			goto out_fds;
  		}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
953
  	}
9f72949f6   David Woodhouse   [PATCH] Add psele...
954

252e5725c   Oleg Nesterov   do_sys_poll: simp...
955
  	poll_initwait(&table);
ccec5ee30   Mateusz Guzik   poll: plug an unu...
956
  	fdcount = do_poll(head, &table, end_time);
252e5725c   Oleg Nesterov   do_sys_poll: simp...
957
  	poll_freewait(&table);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
958

bc880f204   Linus Torvalds   poll: fix perform...
959
960
  	if (!user_write_access_begin(ufds, nfds * sizeof(*ufds)))
  		goto out_fds;
252e5725c   Oleg Nesterov   do_sys_poll: simp...
961
  	for (walk = head; walk; walk = walk->next) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
962
963
  		struct pollfd *fds = walk->entries;
  		int j;
bc880f204   Linus Torvalds   poll: fix perform...
964
965
  		for (j = walk->len; j; fds++, ufds++, j--)
  			unsafe_put_user(fds->revents, &ufds->revents, Efault);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
966
    	}
bc880f204   Linus Torvalds   poll: fix perform...
967
  	user_write_access_end();
252e5725c   Oleg Nesterov   do_sys_poll: simp...
968

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
969
  	err = fdcount;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
970
  out_fds:
252e5725c   Oleg Nesterov   do_sys_poll: simp...
971
972
973
974
975
  	walk = head->next;
  	while (walk) {
  		struct poll_list *pos = walk;
  		walk = walk->next;
  		kfree(pos);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
976
  	}
252e5725c   Oleg Nesterov   do_sys_poll: simp...
977

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
978
  	return err;
bc880f204   Linus Torvalds   poll: fix perform...
979
980
981
982
983
  
  Efault:
  	user_write_access_end();
  	err = -EFAULT;
  	goto out_fds;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
984
  }
9f72949f6   David Woodhouse   [PATCH] Add psele...
985

3075d9da0   Chris Wright   Use ERESTART_REST...
986
987
  static long do_restart_poll(struct restart_block *restart_block)
  {
8ff3e8e85   Arjan van de Ven   select: switch se...
988
989
  	struct pollfd __user *ufds = restart_block->poll.ufds;
  	int nfds = restart_block->poll.nfds;
766b9f928   Deepa Dinamani   fs: poll/select/r...
990
  	struct timespec64 *to = NULL, end_time;
3075d9da0   Chris Wright   Use ERESTART_REST...
991
  	int ret;
8ff3e8e85   Arjan van de Ven   select: switch se...
992
993
994
995
996
997
998
  	if (restart_block->poll.has_timeout) {
  		end_time.tv_sec = restart_block->poll.tv_sec;
  		end_time.tv_nsec = restart_block->poll.tv_nsec;
  		to = &end_time;
  	}
  
  	ret = do_sys_poll(ufds, nfds, to);
8cf8b5539   Oleg Nesterov   select: change do...
999
  	if (ret == -ERESTARTNOHAND) {
3075d9da0   Chris Wright   Use ERESTART_REST...
1000
  		restart_block->fn = do_restart_poll;
3075d9da0   Chris Wright   Use ERESTART_REST...
1001
1002
1003
1004
  		ret = -ERESTART_RESTARTBLOCK;
  	}
  	return ret;
  }
5a8a82b1d   Heiko Carstens   [CVE-2009-0029] S...
1005
  SYSCALL_DEFINE3(poll, struct pollfd __user *, ufds, unsigned int, nfds,
faf309009   Linus Torvalds   sys_poll: fix inc...
1006
  		int, timeout_msecs)
9f72949f6   David Woodhouse   [PATCH] Add psele...
1007
  {
766b9f928   Deepa Dinamani   fs: poll/select/r...
1008
  	struct timespec64 end_time, *to = NULL;
3075d9da0   Chris Wright   Use ERESTART_REST...
1009
  	int ret;
9f72949f6   David Woodhouse   [PATCH] Add psele...
1010

8ff3e8e85   Arjan van de Ven   select: switch se...
1011
1012
1013
1014
  	if (timeout_msecs >= 0) {
  		to = &end_time;
  		poll_select_set_timeout(to, timeout_msecs / MSEC_PER_SEC,
  			NSEC_PER_MSEC * (timeout_msecs % MSEC_PER_SEC));
9f72949f6   David Woodhouse   [PATCH] Add psele...
1015
  	}
8ff3e8e85   Arjan van de Ven   select: switch se...
1016
  	ret = do_sys_poll(ufds, nfds, to);
8cf8b5539   Oleg Nesterov   select: change do...
1017
  	if (ret == -ERESTARTNOHAND) {
3075d9da0   Chris Wright   Use ERESTART_REST...
1018
  		struct restart_block *restart_block;
8ff3e8e85   Arjan van de Ven   select: switch se...
1019

f56141e3e   Andy Lutomirski   all arches, signa...
1020
  		restart_block = &current->restart_block;
3075d9da0   Chris Wright   Use ERESTART_REST...
1021
  		restart_block->fn = do_restart_poll;
8ff3e8e85   Arjan van de Ven   select: switch se...
1022
1023
1024
1025
1026
1027
1028
1029
1030
  		restart_block->poll.ufds = ufds;
  		restart_block->poll.nfds = nfds;
  
  		if (timeout_msecs >= 0) {
  			restart_block->poll.tv_sec = end_time.tv_sec;
  			restart_block->poll.tv_nsec = end_time.tv_nsec;
  			restart_block->poll.has_timeout = 1;
  		} else
  			restart_block->poll.has_timeout = 0;
3075d9da0   Chris Wright   Use ERESTART_REST...
1031
1032
1033
  		ret = -ERESTART_RESTARTBLOCK;
  	}
  	return ret;
9f72949f6   David Woodhouse   [PATCH] Add psele...
1034
  }
d4e82042c   Heiko Carstens   [CVE-2009-0029] S...
1035
  SYSCALL_DEFINE5(ppoll, struct pollfd __user *, ufds, unsigned int, nfds,
8bd27a300   Deepa Dinamani   ppoll: use __kern...
1036
  		struct __kernel_timespec __user *, tsp, const sigset_t __user *, sigmask,
d4e82042c   Heiko Carstens   [CVE-2009-0029] S...
1037
  		size_t, sigsetsize)
9f72949f6   David Woodhouse   [PATCH] Add psele...
1038
  {
36819ad09   Deepa Dinamani   select: Use get/p...
1039
  	struct timespec64 ts, end_time, *to = NULL;
9f72949f6   David Woodhouse   [PATCH] Add psele...
1040
1041
1042
  	int ret;
  
  	if (tsp) {
36819ad09   Deepa Dinamani   select: Use get/p...
1043
  		if (get_timespec64(&ts, tsp))
9f72949f6   David Woodhouse   [PATCH] Add psele...
1044
  			return -EFAULT;
8ff3e8e85   Arjan van de Ven   select: switch se...
1045
1046
1047
  		to = &end_time;
  		if (poll_select_set_timeout(to, ts.tv_sec, ts.tv_nsec))
  			return -EINVAL;
9f72949f6   David Woodhouse   [PATCH] Add psele...
1048
  	}
b772434be   Oleg Nesterov   signal: simplify ...
1049
  	ret = set_user_sigmask(sigmask, sigsetsize);
ded653ccb   Deepa Dinamani   signal: Add set_u...
1050
1051
  	if (ret)
  		return ret;
9f72949f6   David Woodhouse   [PATCH] Add psele...
1052

8ff3e8e85   Arjan van de Ven   select: switch se...
1053
  	ret = do_sys_poll(ufds, nfds, to);
ac3010206   Oleg Nesterov   select: shift res...
1054
  	return poll_select_finish(&end_time, tsp, PT_TIMESPEC, ret);
9f72949f6   David Woodhouse   [PATCH] Add psele...
1055
  }
e99ca56ce   Al Viro   move compat selec...
1056

8bd27a300   Deepa Dinamani   ppoll: use __kern...
1057
  #if defined(CONFIG_COMPAT_32BIT_TIME) && !defined(CONFIG_64BIT)
e99ca56ce   Al Viro   move compat selec...
1058

8bd27a300   Deepa Dinamani   ppoll: use __kern...
1059
1060
1061
  SYSCALL_DEFINE5(ppoll_time32, struct pollfd __user *, ufds, unsigned int, nfds,
  		struct old_timespec32 __user *, tsp, const sigset_t __user *, sigmask,
  		size_t, sigsetsize)
e99ca56ce   Al Viro   move compat selec...
1062
  {
8bd27a300   Deepa Dinamani   ppoll: use __kern...
1063
1064
  	struct timespec64 ts, end_time, *to = NULL;
  	int ret;
e99ca56ce   Al Viro   move compat selec...
1065

8bd27a300   Deepa Dinamani   ppoll: use __kern...
1066
1067
1068
  	if (tsp) {
  		if (get_old_timespec32(&ts, tsp))
  			return -EFAULT;
e99ca56ce   Al Viro   move compat selec...
1069

8bd27a300   Deepa Dinamani   ppoll: use __kern...
1070
1071
1072
1073
  		to = &end_time;
  		if (poll_select_set_timeout(to, ts.tv_sec, ts.tv_nsec))
  			return -EINVAL;
  	}
e99ca56ce   Al Viro   move compat selec...
1074

b772434be   Oleg Nesterov   signal: simplify ...
1075
  	ret = set_user_sigmask(sigmask, sigsetsize);
8bd27a300   Deepa Dinamani   ppoll: use __kern...
1076
  	if (ret)
e99ca56ce   Al Viro   move compat selec...
1077
  		return ret;
8bd27a300   Deepa Dinamani   ppoll: use __kern...
1078
  	ret = do_sys_poll(ufds, nfds, to);
ac3010206   Oleg Nesterov   select: shift res...
1079
  	return poll_select_finish(&end_time, tsp, PT_OLD_TIMESPEC, ret);
e99ca56ce   Al Viro   move compat selec...
1080
  }
8bd27a300   Deepa Dinamani   ppoll: use __kern...
1081
1082
1083
1084
  #endif
  
  #ifdef CONFIG_COMPAT
  #define __COMPAT_NFDBITS       (8 * sizeof(compat_ulong_t))
e99ca56ce   Al Viro   move compat selec...
1085
1086
1087
1088
1089
1090
1091
1092
1093
  
  /*
   * Ooo, nasty.  We need here to frob 32-bit unsigned longs to
   * 64-bit unsigned longs.
   */
  static
  int compat_get_fd_set(unsigned long nr, compat_ulong_t __user *ufdset,
  			unsigned long *fdset)
  {
e99ca56ce   Al Viro   move compat selec...
1094
  	if (ufdset) {
464d62421   Al Viro   select: switch co...
1095
  		return compat_get_bitmap(fdset, ufdset, nr);
e99ca56ce   Al Viro   move compat selec...
1096
  	} else {
79de3cbe9   Helge Deller   fs/select: Fix me...
1097
  		zero_fd_set(nr, fdset);
464d62421   Al Viro   select: switch co...
1098
  		return 0;
e99ca56ce   Al Viro   move compat selec...
1099
  	}
e99ca56ce   Al Viro   move compat selec...
1100
1101
1102
1103
1104
1105
  }
  
  static
  int compat_set_fd_set(unsigned long nr, compat_ulong_t __user *ufdset,
  		      unsigned long *fdset)
  {
e99ca56ce   Al Viro   move compat selec...
1106
1107
  	if (!ufdset)
  		return 0;
464d62421   Al Viro   select: switch co...
1108
  	return compat_put_bitmap(ufdset, fdset, nr);
e99ca56ce   Al Viro   move compat selec...
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
  }
  
  
  /*
   * This is a virtual copy of sys_select from fs/select.c and probably
   * should be compared to it from time to time
   */
  
  /*
   * We can actually return ERESTARTSYS instead of EINTR, but I'd
   * like to be certain this leads to no problems. So I return
   * EINTR just for safety.
   *
   * Update: ERESTARTSYS breaks at least the xview clock binary, so
   * I'm trying ERESTARTNOHAND which restart only when you want to.
   */
  static int compat_core_sys_select(int n, compat_ulong_t __user *inp,
  	compat_ulong_t __user *outp, compat_ulong_t __user *exp,
36819ad09   Deepa Dinamani   select: Use get/p...
1127
  	struct timespec64 *end_time)
e99ca56ce   Al Viro   move compat selec...
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
  {
  	fd_set_bits fds;
  	void *bits;
  	int size, max_fds, ret = -EINVAL;
  	struct fdtable *fdt;
  	long stack_fds[SELECT_STACK_ALLOC/sizeof(long)];
  
  	if (n < 0)
  		goto out_nofds;
  
  	/* max_fds can increase, so grab it once to avoid race */
  	rcu_read_lock();
  	fdt = files_fdtable(current->files);
  	max_fds = fdt->max_fds;
  	rcu_read_unlock();
  	if (n > max_fds)
  		n = max_fds;
  
  	/*
  	 * We need 6 bitmaps (in/out/ex for both incoming and outgoing),
  	 * since we used fdset we need to allocate memory in units of
  	 * long-words.
  	 */
  	size = FDS_BYTES(n);
  	bits = stack_fds;
  	if (size > sizeof(stack_fds) / 6) {
6da2ec560   Kees Cook   treewide: kmalloc...
1154
  		bits = kmalloc_array(6, size, GFP_KERNEL);
e99ca56ce   Al Viro   move compat selec...
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
  		ret = -ENOMEM;
  		if (!bits)
  			goto out_nofds;
  	}
  	fds.in      = (unsigned long *)  bits;
  	fds.out     = (unsigned long *) (bits +   size);
  	fds.ex      = (unsigned long *) (bits + 2*size);
  	fds.res_in  = (unsigned long *) (bits + 3*size);
  	fds.res_out = (unsigned long *) (bits + 4*size);
  	fds.res_ex  = (unsigned long *) (bits + 5*size);
  
  	if ((ret = compat_get_fd_set(n, inp, fds.in)) ||
  	    (ret = compat_get_fd_set(n, outp, fds.out)) ||
  	    (ret = compat_get_fd_set(n, exp, fds.ex)))
  		goto out;
  	zero_fd_set(n, fds.res_in);
  	zero_fd_set(n, fds.res_out);
  	zero_fd_set(n, fds.res_ex);
  
  	ret = do_select(n, &fds, end_time);
  
  	if (ret < 0)
  		goto out;
  	if (!ret) {
  		ret = -ERESTARTNOHAND;
  		if (signal_pending(current))
  			goto out;
  		ret = 0;
  	}
  
  	if (compat_set_fd_set(n, inp, fds.res_in) ||
  	    compat_set_fd_set(n, outp, fds.res_out) ||
  	    compat_set_fd_set(n, exp, fds.res_ex))
  		ret = -EFAULT;
  out:
  	if (bits != stack_fds)
  		kfree(bits);
  out_nofds:
  	return ret;
  }
05585e449   Dominik Brodowski   fs: add do_compat...
1195
1196
  static int do_compat_select(int n, compat_ulong_t __user *inp,
  	compat_ulong_t __user *outp, compat_ulong_t __user *exp,
9afc5eee6   Arnd Bergmann   y2038: globally r...
1197
  	struct old_timeval32 __user *tvp)
e99ca56ce   Al Viro   move compat selec...
1198
  {
36819ad09   Deepa Dinamani   select: Use get/p...
1199
  	struct timespec64 end_time, *to = NULL;
9afc5eee6   Arnd Bergmann   y2038: globally r...
1200
  	struct old_timeval32 tv;
e99ca56ce   Al Viro   move compat selec...
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
  	int ret;
  
  	if (tvp) {
  		if (copy_from_user(&tv, tvp, sizeof(tv)))
  			return -EFAULT;
  
  		to = &end_time;
  		if (poll_select_set_timeout(to,
  				tv.tv_sec + (tv.tv_usec / USEC_PER_SEC),
  				(tv.tv_usec % USEC_PER_SEC) * NSEC_PER_USEC))
  			return -EINVAL;
  	}
  
  	ret = compat_core_sys_select(n, inp, outp, exp, to);
ac3010206   Oleg Nesterov   select: shift res...
1215
  	return poll_select_finish(&end_time, tvp, PT_OLD_TIMEVAL, ret);
e99ca56ce   Al Viro   move compat selec...
1216
  }
05585e449   Dominik Brodowski   fs: add do_compat...
1217
1218
  COMPAT_SYSCALL_DEFINE5(select, int, n, compat_ulong_t __user *, inp,
  	compat_ulong_t __user *, outp, compat_ulong_t __user *, exp,
9afc5eee6   Arnd Bergmann   y2038: globally r...
1219
  	struct old_timeval32 __user *, tvp)
05585e449   Dominik Brodowski   fs: add do_compat...
1220
1221
1222
  {
  	return do_compat_select(n, inp, outp, exp, tvp);
  }
e99ca56ce   Al Viro   move compat selec...
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
  struct compat_sel_arg_struct {
  	compat_ulong_t n;
  	compat_uptr_t inp;
  	compat_uptr_t outp;
  	compat_uptr_t exp;
  	compat_uptr_t tvp;
  };
  
  COMPAT_SYSCALL_DEFINE1(old_select, struct compat_sel_arg_struct __user *, arg)
  {
  	struct compat_sel_arg_struct a;
  
  	if (copy_from_user(&a, arg, sizeof(a)))
  		return -EFAULT;
05585e449   Dominik Brodowski   fs: add do_compat...
1237
1238
  	return do_compat_select(a.n, compat_ptr(a.inp), compat_ptr(a.outp),
  				compat_ptr(a.exp), compat_ptr(a.tvp));
e99ca56ce   Al Viro   move compat selec...
1239
1240
1241
1242
  }
  
  static long do_compat_pselect(int n, compat_ulong_t __user *inp,
  	compat_ulong_t __user *outp, compat_ulong_t __user *exp,
e024707bc   Deepa Dinamani   pselect6: use __k...
1243
1244
  	void __user *tsp, compat_sigset_t __user *sigmask,
  	compat_size_t sigsetsize, enum poll_time_type type)
e99ca56ce   Al Viro   move compat selec...
1245
  {
36819ad09   Deepa Dinamani   select: Use get/p...
1246
  	struct timespec64 ts, end_time, *to = NULL;
e99ca56ce   Al Viro   move compat selec...
1247
1248
1249
  	int ret;
  
  	if (tsp) {
e024707bc   Deepa Dinamani   pselect6: use __k...
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
  		switch (type) {
  		case PT_OLD_TIMESPEC:
  			if (get_old_timespec32(&ts, tsp))
  				return -EFAULT;
  			break;
  		case PT_TIMESPEC:
  			if (get_timespec64(&ts, tsp))
  				return -EFAULT;
  			break;
  		default:
  			BUG();
  		}
e99ca56ce   Al Viro   move compat selec...
1262
1263
1264
1265
1266
  
  		to = &end_time;
  		if (poll_select_set_timeout(to, ts.tv_sec, ts.tv_nsec))
  			return -EINVAL;
  	}
b772434be   Oleg Nesterov   signal: simplify ...
1267
  	ret = set_compat_user_sigmask(sigmask, sigsetsize);
ded653ccb   Deepa Dinamani   signal: Add set_u...
1268
1269
  	if (ret)
  		return ret;
e99ca56ce   Al Viro   move compat selec...
1270
1271
  
  	ret = compat_core_sys_select(n, inp, outp, exp, to);
ac3010206   Oleg Nesterov   select: shift res...
1272
  	return poll_select_finish(&end_time, tsp, type, ret);
e99ca56ce   Al Viro   move compat selec...
1273
  }
7e71609f6   Al Viro   pselect6() and fr...
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
  struct compat_sigset_argpack {
  	compat_uptr_t p;
  	compat_size_t size;
  };
  static inline int get_compat_sigset_argpack(struct compat_sigset_argpack *to,
  					    struct compat_sigset_argpack __user *from)
  {
  	if (from) {
  		if (!user_read_access_begin(from, sizeof(*from)))
  			return -EFAULT;
  		unsafe_get_user(to->p, &from->p, Efault);
  		unsafe_get_user(to->size, &from->size, Efault);
  		user_read_access_end();
  	}
  	return 0;
  Efault:
  	user_access_end();
  	return -EFAULT;
  }
e024707bc   Deepa Dinamani   pselect6: use __k...
1293
1294
1295
1296
  COMPAT_SYSCALL_DEFINE6(pselect6_time64, int, n, compat_ulong_t __user *, inp,
  	compat_ulong_t __user *, outp, compat_ulong_t __user *, exp,
  	struct __kernel_timespec __user *, tsp, void __user *, sig)
  {
7e71609f6   Al Viro   pselect6() and fr...
1297
1298
1299
1300
  	struct compat_sigset_argpack x = {0, 0};
  
  	if (get_compat_sigset_argpack(&x, sig))
  		return -EFAULT;
e024707bc   Deepa Dinamani   pselect6: use __k...
1301

7e71609f6   Al Viro   pselect6() and fr...
1302
1303
  	return do_compat_pselect(n, inp, outp, exp, tsp, compat_ptr(x.p),
  				 x.size, PT_TIMESPEC);
e024707bc   Deepa Dinamani   pselect6: use __k...
1304
1305
1306
  }
  
  #if defined(CONFIG_COMPAT_32BIT_TIME)
8dabe7245   Arnd Bergmann   y2038: syscalls: ...
1307
  COMPAT_SYSCALL_DEFINE6(pselect6_time32, int, n, compat_ulong_t __user *, inp,
e99ca56ce   Al Viro   move compat selec...
1308
  	compat_ulong_t __user *, outp, compat_ulong_t __user *, exp,
9afc5eee6   Arnd Bergmann   y2038: globally r...
1309
  	struct old_timespec32 __user *, tsp, void __user *, sig)
e99ca56ce   Al Viro   move compat selec...
1310
  {
7e71609f6   Al Viro   pselect6() and fr...
1311
1312
1313
1314
  	struct compat_sigset_argpack x = {0, 0};
  
  	if (get_compat_sigset_argpack(&x, sig))
  		return -EFAULT;
e024707bc   Deepa Dinamani   pselect6: use __k...
1315

7e71609f6   Al Viro   pselect6() and fr...
1316
1317
  	return do_compat_pselect(n, inp, outp, exp, tsp, compat_ptr(x.p),
  				 x.size, PT_OLD_TIMESPEC);
e99ca56ce   Al Viro   move compat selec...
1318
  }
e024707bc   Deepa Dinamani   pselect6: use __k...
1319
  #endif
8bd27a300   Deepa Dinamani   ppoll: use __kern...
1320
  #if defined(CONFIG_COMPAT_32BIT_TIME)
8dabe7245   Arnd Bergmann   y2038: syscalls: ...
1321
  COMPAT_SYSCALL_DEFINE5(ppoll_time32, struct pollfd __user *, ufds,
9afc5eee6   Arnd Bergmann   y2038: globally r...
1322
  	unsigned int,  nfds, struct old_timespec32 __user *, tsp,
e99ca56ce   Al Viro   move compat selec...
1323
1324
  	const compat_sigset_t __user *, sigmask, compat_size_t, sigsetsize)
  {
36819ad09   Deepa Dinamani   select: Use get/p...
1325
  	struct timespec64 ts, end_time, *to = NULL;
e99ca56ce   Al Viro   move compat selec...
1326
1327
1328
  	int ret;
  
  	if (tsp) {
9afc5eee6   Arnd Bergmann   y2038: globally r...
1329
  		if (get_old_timespec32(&ts, tsp))
e99ca56ce   Al Viro   move compat selec...
1330
1331
1332
1333
1334
1335
  			return -EFAULT;
  
  		to = &end_time;
  		if (poll_select_set_timeout(to, ts.tv_sec, ts.tv_nsec))
  			return -EINVAL;
  	}
b772434be   Oleg Nesterov   signal: simplify ...
1336
  	ret = set_compat_user_sigmask(sigmask, sigsetsize);
ded653ccb   Deepa Dinamani   signal: Add set_u...
1337
1338
  	if (ret)
  		return ret;
e99ca56ce   Al Viro   move compat selec...
1339
1340
  
  	ret = do_sys_poll(ufds, nfds, to);
ac3010206   Oleg Nesterov   select: shift res...
1341
  	return poll_select_finish(&end_time, tsp, PT_OLD_TIMESPEC, ret);
e99ca56ce   Al Viro   move compat selec...
1342
1343
  }
  #endif
8bd27a300   Deepa Dinamani   ppoll: use __kern...
1344
1345
1346
1347
1348
1349
  
  /* New compat syscall for 64 bit time_t*/
  COMPAT_SYSCALL_DEFINE5(ppoll_time64, struct pollfd __user *, ufds,
  	unsigned int,  nfds, struct __kernel_timespec __user *, tsp,
  	const compat_sigset_t __user *, sigmask, compat_size_t, sigsetsize)
  {
8bd27a300   Deepa Dinamani   ppoll: use __kern...
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
  	struct timespec64 ts, end_time, *to = NULL;
  	int ret;
  
  	if (tsp) {
  		if (get_timespec64(&ts, tsp))
  			return -EFAULT;
  
  		to = &end_time;
  		if (poll_select_set_timeout(to, ts.tv_sec, ts.tv_nsec))
  			return -EINVAL;
  	}
b772434be   Oleg Nesterov   signal: simplify ...
1361
  	ret = set_compat_user_sigmask(sigmask, sigsetsize);
8bd27a300   Deepa Dinamani   ppoll: use __kern...
1362
1363
1364
1365
  	if (ret)
  		return ret;
  
  	ret = do_sys_poll(ufds, nfds, to);
ac3010206   Oleg Nesterov   select: shift res...
1366
  	return poll_select_finish(&end_time, tsp, PT_TIMESPEC, ret);
8bd27a300   Deepa Dinamani   ppoll: use __kern...
1367
1368
1369
  }
  
  #endif