Blame view

fs/select.c 34.6 KB
b24413180   Greg Kroah-Hartman   License cleanup: ...
1
  // SPDX-License-Identifier: GPL-2.0
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
  /*
   * This file contains the procedures for the handling of select and poll
   *
   * Created for Linux based loosely upon Mathius Lattner's minix
   * patches by Peter MacDonald. Heavily edited by Linus.
   *
   *  4 February 1994
   *     COFF/ELF binary emulation. If the process has the STICKY_TIMEOUTS
   *     flag set in its personality we do *not* modify the given timeout
   *     parameter to reflect time remaining.
   *
   *  24 January 2000
   *     Changed sys_poll()/do_poll() to use PAGE_SIZE chunk-based allocation 
   *     of fds to overcome nfds < 16390 descriptors limit (Tigran Aivazian).
   */
022a16924   Milind Arun Choudhary   ROUND_UP macro cl...
17
  #include <linux/kernel.h>
3f07c0144   Ingo Molnar   sched/headers: Pr...
18
19
  #include <linux/sched/signal.h>
  #include <linux/sched/rt.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
20
  #include <linux/syscalls.h>
630d9c472   Paul Gortmaker   fs: reduce the us...
21
  #include <linux/export.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
22
  #include <linux/slab.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
23
24
25
  #include <linux/poll.h>
  #include <linux/personality.h> /* for STICKY_TIMEOUTS */
  #include <linux/file.h>
9f3acc314   Al Viro   [PATCH] split lin...
26
  #include <linux/fdtable.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
27
  #include <linux/fs.h>
b835996f6   Dipankar Sarma   [PATCH] files: lo...
28
  #include <linux/rcupdate.h>
8ff3e8e85   Arjan van de Ven   select: switch se...
29
  #include <linux/hrtimer.h>
9745cdb36   Colin Cross   select: use freez...
30
  #include <linux/freezer.h>
076bb0c82   Eliezer Tamir   net: rename inclu...
31
  #include <net/busy_poll.h>
2d19309cf   Vlastimil Babka   fs/select: add vm...
32
  #include <linux/vmalloc.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
33

7c0f6ba68   Linus Torvalds   Replace <asm/uacc...
34
  #include <linux/uaccess.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
35

90d6e24a3   Arjan van de Ven   hrtimer: make sel...
36
37
38
39
40
41
42
43
44
45
46
47
  
  /*
   * Estimate expected accuracy in ns from a timeval.
   *
   * After quite a bit of churning around, we've settled on
   * a simple thing of taking 0.1% of the timeout as the
   * slack, with a cap of 100 msec.
   * "nice" tasks get a 0.5% slack instead.
   *
   * Consider this comment an open invitation to come up with even
   * better solutions..
   */
5ae87e79e   Guillaume Knispel   poll/select: avoi...
48
  #define MAX_SLACK	(100 * NSEC_PER_MSEC)
766b9f928   Deepa Dinamani   fs: poll/select/r...
49
  static long __estimate_accuracy(struct timespec64 *tv)
90d6e24a3   Arjan van de Ven   hrtimer: make sel...
50
  {
96d2ab484   Arjan van de Ven   hrtimer: fix sign...
51
  	long slack;
90d6e24a3   Arjan van de Ven   hrtimer: make sel...
52
  	int divfactor = 1000;
5ae87e79e   Guillaume Knispel   poll/select: avoi...
53
54
  	if (tv->tv_sec < 0)
  		return 0;
4ce105d30   Arjan van de Ven   hrtimer: incorpor...
55
  	if (task_nice(current) > 0)
90d6e24a3   Arjan van de Ven   hrtimer: make sel...
56
  		divfactor = divfactor / 5;
5ae87e79e   Guillaume Knispel   poll/select: avoi...
57
58
  	if (tv->tv_sec > MAX_SLACK / (NSEC_PER_SEC/divfactor))
  		return MAX_SLACK;
90d6e24a3   Arjan van de Ven   hrtimer: make sel...
59
60
  	slack = tv->tv_nsec / divfactor;
  	slack += tv->tv_sec * (NSEC_PER_SEC/divfactor);
5ae87e79e   Guillaume Knispel   poll/select: avoi...
61
62
  	if (slack > MAX_SLACK)
  		return MAX_SLACK;
96d2ab484   Arjan van de Ven   hrtimer: fix sign...
63

90d6e24a3   Arjan van de Ven   hrtimer: make sel...
64
65
  	return slack;
  }
766b9f928   Deepa Dinamani   fs: poll/select/r...
66
  u64 select_estimate_accuracy(struct timespec64 *tv)
90d6e24a3   Arjan van de Ven   hrtimer: make sel...
67
  {
da8b44d5a   John Stultz   timer: convert ti...
68
  	u64 ret;
766b9f928   Deepa Dinamani   fs: poll/select/r...
69
  	struct timespec64 now;
90d6e24a3   Arjan van de Ven   hrtimer: make sel...
70
71
72
73
  
  	/*
  	 * Realtime tasks get a slack of 0 for obvious reasons.
  	 */
4ce105d30   Arjan van de Ven   hrtimer: incorpor...
74
  	if (rt_task(current))
90d6e24a3   Arjan van de Ven   hrtimer: make sel...
75
  		return 0;
766b9f928   Deepa Dinamani   fs: poll/select/r...
76
77
  	ktime_get_ts64(&now);
  	now = timespec64_sub(*tv, now);
90d6e24a3   Arjan van de Ven   hrtimer: make sel...
78
79
80
81
82
  	ret = __estimate_accuracy(&now);
  	if (ret < current->timer_slack_ns)
  		return current->timer_slack_ns;
  	return ret;
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
  struct poll_table_page {
  	struct poll_table_page * next;
  	struct poll_table_entry * entry;
  	struct poll_table_entry entries[0];
  };
  
  #define POLL_TABLE_FULL(table) \
  	((unsigned long)((table)->entry+1) > PAGE_SIZE + (unsigned long)(table))
  
  /*
   * Ok, Peter made a complicated, but straightforward multiple_wait() function.
   * I have rewritten this, taking some shortcuts: This code may not be easy to
   * follow, but it should be free of race-conditions, and it's practical. If you
   * understand what I'm doing here, then you understand how the linux
   * sleep/wakeup mechanism works.
   *
   * Two very simple procedures, poll_wait() and poll_freewait() make all the
   * work.  poll_wait() is an inline-function defined in <linux/poll.h>,
   * as all select/poll functions have to call it to add an entry to the
   * poll table.
   */
75c96f858   Adrian Bunk   [PATCH] make some...
104
105
  static void __pollwait(struct file *filp, wait_queue_head_t *wait_address,
  		       poll_table *p);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
106
107
108
109
  
  void poll_initwait(struct poll_wqueues *pwq)
  {
  	init_poll_funcptr(&pwq->pt, __pollwait);
5f820f648   Tejun Heo   poll: allow f_op-...
110
  	pwq->polling_task = current;
b2add73db   Guillaume Knispel   poll/select: init...
111
  	pwq->triggered = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
112
113
  	pwq->error = 0;
  	pwq->table = NULL;
70674f95c   Andi Kleen   [PATCH] Optimize ...
114
  	pwq->inline_index = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
115
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
116
  EXPORT_SYMBOL(poll_initwait);
70674f95c   Andi Kleen   [PATCH] Optimize ...
117
118
  static void free_poll_entry(struct poll_table_entry *entry)
  {
ccf6780dc   WANG Cong   Style fix in fs/s...
119
  	remove_wait_queue(entry->wait_address, &entry->wait);
70674f95c   Andi Kleen   [PATCH] Optimize ...
120
121
  	fput(entry->filp);
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
122
123
124
  void poll_freewait(struct poll_wqueues *pwq)
  {
  	struct poll_table_page * p = pwq->table;
70674f95c   Andi Kleen   [PATCH] Optimize ...
125
126
127
  	int i;
  	for (i = 0; i < pwq->inline_index; i++)
  		free_poll_entry(pwq->inline_entries + i);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
128
129
130
131
132
133
134
  	while (p) {
  		struct poll_table_entry * entry;
  		struct poll_table_page *old;
  
  		entry = p->entry;
  		do {
  			entry--;
70674f95c   Andi Kleen   [PATCH] Optimize ...
135
  			free_poll_entry(entry);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
136
137
138
139
140
141
  		} while (entry > p->entries);
  		old = p;
  		p = p->next;
  		free_page((unsigned long) old);
  	}
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
142
  EXPORT_SYMBOL(poll_freewait);
5f820f648   Tejun Heo   poll: allow f_op-...
143
  static struct poll_table_entry *poll_get_entry(struct poll_wqueues *p)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
144
  {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
145
  	struct poll_table_page *table = p->table;
70674f95c   Andi Kleen   [PATCH] Optimize ...
146
147
  	if (p->inline_index < N_INLINE_POLL_ENTRIES)
  		return p->inline_entries + p->inline_index++;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
148
149
150
151
152
153
  	if (!table || POLL_TABLE_FULL(table)) {
  		struct poll_table_page *new_table;
  
  		new_table = (struct poll_table_page *) __get_free_page(GFP_KERNEL);
  		if (!new_table) {
  			p->error = -ENOMEM;
70674f95c   Andi Kleen   [PATCH] Optimize ...
154
  			return NULL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
155
156
157
158
159
160
  		}
  		new_table->entry = new_table->entries;
  		new_table->next = table;
  		p->table = new_table;
  		table = new_table;
  	}
70674f95c   Andi Kleen   [PATCH] Optimize ...
161
162
  	return table->entry++;
  }
ac6424b98   Ingo Molnar   sched/wait: Renam...
163
  static int __pollwake(wait_queue_entry_t *wait, unsigned mode, int sync, void *key)
5f820f648   Tejun Heo   poll: allow f_op-...
164
165
166
167
168
169
170
171
172
  {
  	struct poll_wqueues *pwq = wait->private;
  	DECLARE_WAITQUEUE(dummy_wait, pwq->polling_task);
  
  	/*
  	 * Although this function is called under waitqueue lock, LOCK
  	 * doesn't imply write barrier and the users expect write
  	 * barrier semantics on wakeup functions.  The following
  	 * smp_wmb() is equivalent to smp_wmb() in try_to_wake_up()
b92b8b35a   Peter Zijlstra   locking/arch: Ren...
173
  	 * and is paired with smp_store_mb() in poll_schedule_timeout.
5f820f648   Tejun Heo   poll: allow f_op-...
174
175
176
177
178
179
180
181
182
183
184
185
186
187
  	 */
  	smp_wmb();
  	pwq->triggered = 1;
  
  	/*
  	 * Perform the default wake up operation using a dummy
  	 * waitqueue.
  	 *
  	 * TODO: This is hacky but there currently is no interface to
  	 * pass in @sync.  @sync is scheduled to be removed and once
  	 * that happens, wake_up_process() can be used directly.
  	 */
  	return default_wake_function(&dummy_wait, mode, sync, key);
  }
ac6424b98   Ingo Molnar   sched/wait: Renam...
188
  static int pollwake(wait_queue_entry_t *wait, unsigned mode, int sync, void *key)
4938d7e02   Eric Dumazet   poll: avoid extra...
189
190
191
192
  {
  	struct poll_table_entry *entry;
  
  	entry = container_of(wait, struct poll_table_entry, wait);
3ad6f93e9   Al Viro   annotate poll-rel...
193
  	if (key && !(key_to_poll(key) & entry->key))
4938d7e02   Eric Dumazet   poll: avoid extra...
194
195
196
  		return 0;
  	return __pollwake(wait, mode, sync, key);
  }
70674f95c   Andi Kleen   [PATCH] Optimize ...
197
198
199
200
  /* Add a new entry */
  static void __pollwait(struct file *filp, wait_queue_head_t *wait_address,
  				poll_table *p)
  {
5f820f648   Tejun Heo   poll: allow f_op-...
201
202
  	struct poll_wqueues *pwq = container_of(p, struct poll_wqueues, pt);
  	struct poll_table_entry *entry = poll_get_entry(pwq);
70674f95c   Andi Kleen   [PATCH] Optimize ...
203
204
  	if (!entry)
  		return;
cb0942b81   Al Viro   make get_file() r...
205
  	entry->filp = get_file(filp);
70674f95c   Andi Kleen   [PATCH] Optimize ...
206
  	entry->wait_address = wait_address;
626cf2366   Hans Verkuil   poll: add poll_re...
207
  	entry->key = p->_key;
5f820f648   Tejun Heo   poll: allow f_op-...
208
209
  	init_waitqueue_func_entry(&entry->wait, pollwake);
  	entry->wait.private = pwq;
ccf6780dc   WANG Cong   Style fix in fs/s...
210
  	add_wait_queue(wait_address, &entry->wait);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
211
  }
8f546ae1f   Christoph Hellwig   fs: unexport poll...
212
  static int poll_schedule_timeout(struct poll_wqueues *pwq, int state,
5f820f648   Tejun Heo   poll: allow f_op-...
213
214
215
216
217
218
  			  ktime_t *expires, unsigned long slack)
  {
  	int rc = -EINTR;
  
  	set_current_state(state);
  	if (!pwq->triggered)
59612d187   Rafael J. Wysocki   Revert "select: u...
219
  		rc = schedule_hrtimeout_range(expires, slack, HRTIMER_MODE_ABS);
5f820f648   Tejun Heo   poll: allow f_op-...
220
221
222
223
224
  	__set_current_state(TASK_RUNNING);
  
  	/*
  	 * Prepare for the next iteration.
  	 *
b92b8b35a   Peter Zijlstra   locking/arch: Ren...
225
  	 * The following smp_store_mb() serves two purposes.  First, it's
5f820f648   Tejun Heo   poll: allow f_op-...
226
227
228
229
230
231
232
  	 * the counterpart rmb of the wmb in pollwake() such that data
  	 * written before wake up is always visible after wake up.
  	 * Second, the full barrier guarantees that triggered clearing
  	 * doesn't pass event check of the next iteration.  Note that
  	 * this problem doesn't exist for the first iteration as
  	 * add_wait_queue() has full barrier semantics.
  	 */
b92b8b35a   Peter Zijlstra   locking/arch: Ren...
233
  	smp_store_mb(pwq->triggered, 0);
5f820f648   Tejun Heo   poll: allow f_op-...
234
235
236
  
  	return rc;
  }
5f820f648   Tejun Heo   poll: allow f_op-...
237

b773ad40a   Thomas Gleixner   select: add poll_...
238
239
  /**
   * poll_select_set_timeout - helper function to setup the timeout value
766b9f928   Deepa Dinamani   fs: poll/select/r...
240
   * @to:		pointer to timespec64 variable for the final timeout
b773ad40a   Thomas Gleixner   select: add poll_...
241
242
243
244
245
246
247
248
   * @sec:	seconds (from user space)
   * @nsec:	nanoseconds (from user space)
   *
   * Note, we do not use a timespec for the user space value here, That
   * way we can use the function for timeval and compat interfaces as well.
   *
   * Returns -EINVAL if sec/nsec are not normalized. Otherwise 0.
   */
766b9f928   Deepa Dinamani   fs: poll/select/r...
249
  int poll_select_set_timeout(struct timespec64 *to, time64_t sec, long nsec)
b773ad40a   Thomas Gleixner   select: add poll_...
250
  {
766b9f928   Deepa Dinamani   fs: poll/select/r...
251
  	struct timespec64 ts = {.tv_sec = sec, .tv_nsec = nsec};
b773ad40a   Thomas Gleixner   select: add poll_...
252

766b9f928   Deepa Dinamani   fs: poll/select/r...
253
  	if (!timespec64_valid(&ts))
b773ad40a   Thomas Gleixner   select: add poll_...
254
255
256
257
258
259
  		return -EINVAL;
  
  	/* Optimize for the zero timeout value here */
  	if (!sec && !nsec) {
  		to->tv_sec = to->tv_nsec = 0;
  	} else {
766b9f928   Deepa Dinamani   fs: poll/select/r...
260
261
  		ktime_get_ts64(to);
  		*to = timespec64_add_safe(*to, ts);
b773ad40a   Thomas Gleixner   select: add poll_...
262
263
264
  	}
  	return 0;
  }
766b9f928   Deepa Dinamani   fs: poll/select/r...
265
266
  static int poll_select_copy_remaining(struct timespec64 *end_time,
  				      void __user *p,
b773ad40a   Thomas Gleixner   select: add poll_...
267
268
  				      int timeval, int ret)
  {
36819ad09   Deepa Dinamani   select: Use get/p...
269
  	struct timespec64 rts;
b773ad40a   Thomas Gleixner   select: add poll_...
270
271
272
273
274
275
276
277
278
279
280
  	struct timeval rtv;
  
  	if (!p)
  		return ret;
  
  	if (current->personality & STICKY_TIMEOUTS)
  		goto sticky;
  
  	/* No update for zero timeout */
  	if (!end_time->tv_sec && !end_time->tv_nsec)
  		return ret;
36819ad09   Deepa Dinamani   select: Use get/p...
281
282
283
284
  	ktime_get_ts64(&rts);
  	rts = timespec64_sub(*end_time, rts);
  	if (rts.tv_sec < 0)
  		rts.tv_sec = rts.tv_nsec = 0;
766b9f928   Deepa Dinamani   fs: poll/select/r...
285

b773ad40a   Thomas Gleixner   select: add poll_...
286
287
  
  	if (timeval) {
65329bf46   Vasiliy Kulikov   fs/select.c: fix ...
288
289
  		if (sizeof(rtv) > sizeof(rtv.tv_sec) + sizeof(rtv.tv_usec))
  			memset(&rtv, 0, sizeof(rtv));
36819ad09   Deepa Dinamani   select: Use get/p...
290
291
  		rtv.tv_sec = rts.tv_sec;
  		rtv.tv_usec = rts.tv_nsec / NSEC_PER_USEC;
b773ad40a   Thomas Gleixner   select: add poll_...
292
293
294
  
  		if (!copy_to_user(p, &rtv, sizeof(rtv)))
  			return ret;
36819ad09   Deepa Dinamani   select: Use get/p...
295
  	} else if (!put_timespec64(&rts, p))
b773ad40a   Thomas Gleixner   select: add poll_...
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
  		return ret;
  
  	/*
  	 * If an application puts its timeval in read-only memory, we
  	 * don't want the Linux-specific update to the timeval to
  	 * cause a fault after the select has completed
  	 * successfully. However, because we're not updating the
  	 * timeval, we can't restart the system call.
  	 */
  
  sticky:
  	if (ret == -ERESTARTNOHAND)
  		ret = -EINTR;
  	return ret;
  }
e99ca56ce   Al Viro   move compat selec...
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
  /*
   * Scalable version of the fd_set.
   */
  
  typedef struct {
  	unsigned long *in, *out, *ex;
  	unsigned long *res_in, *res_out, *res_ex;
  } fd_set_bits;
  
  /*
   * How many longwords for "nr" bits?
   */
  #define FDS_BITPERLONG	(8*sizeof(long))
  #define FDS_LONGS(nr)	(((nr)+FDS_BITPERLONG-1)/FDS_BITPERLONG)
  #define FDS_BYTES(nr)	(FDS_LONGS(nr)*sizeof(long))
  
  /*
   * We do a VERIFY_WRITE here even though we are only reading this time:
   * we'll write to it eventually..
   *
   * Use "unsigned long" accesses to let user-mode fd_set's be long-aligned.
   */
  static inline
  int get_fd_set(unsigned long nr, void __user *ufdset, unsigned long *fdset)
  {
  	nr = FDS_BYTES(nr);
  	if (ufdset)
  		return copy_from_user(fdset, ufdset, nr) ? -EFAULT : 0;
  
  	memset(fdset, 0, nr);
  	return 0;
  }
  
  static inline unsigned long __must_check
  set_fd_set(unsigned long nr, void __user *ufdset, unsigned long *fdset)
  {
  	if (ufdset)
  		return __copy_to_user(ufdset, fdset, FDS_BYTES(nr));
  	return 0;
  }
  
  static inline
  void zero_fd_set(unsigned long nr, unsigned long *fdset)
  {
  	memset(fdset, 0, FDS_BYTES(nr));
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
357
358
359
360
361
362
363
364
365
366
367
  #define FDS_IN(fds, n)		(fds->in + n)
  #define FDS_OUT(fds, n)		(fds->out + n)
  #define FDS_EX(fds, n)		(fds->ex + n)
  
  #define BITS(fds, n)	(*FDS_IN(fds, n)|*FDS_OUT(fds, n)|*FDS_EX(fds, n))
  
  static int max_select_fd(unsigned long n, fd_set_bits *fds)
  {
  	unsigned long *open_fds;
  	unsigned long set;
  	int max;
badf16621   Dipankar Sarma   [PATCH] files: br...
368
  	struct fdtable *fdt;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
369
370
  
  	/* handle last in-complete long-word first */
8ded2bbc1   Josh Boyer   posix_types.h: Cl...
371
372
  	set = ~(~0UL << (n & (BITS_PER_LONG-1)));
  	n /= BITS_PER_LONG;
badf16621   Dipankar Sarma   [PATCH] files: br...
373
  	fdt = files_fdtable(current->files);
1fd36adcd   David Howells   Replace the fd_se...
374
  	open_fds = fdt->open_fds + n;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
  	max = 0;
  	if (set) {
  		set &= BITS(fds, n);
  		if (set) {
  			if (!(set & ~*open_fds))
  				goto get_max;
  			return -EBADF;
  		}
  	}
  	while (n) {
  		open_fds--;
  		n--;
  		set = BITS(fds, n);
  		if (!set)
  			continue;
  		if (set & ~*open_fds)
  			return -EBADF;
  		if (max)
  			continue;
  get_max:
  		do {
  			max++;
  			set >>= 1;
  		} while (set);
8ded2bbc1   Josh Boyer   posix_types.h: Cl...
399
  		max += n * BITS_PER_LONG;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
400
401
402
403
  	}
  
  	return max;
  }
a9a08845e   Linus Torvalds   vfs: do bulk POLL...
404
405
406
  #define POLLIN_SET (EPOLLRDNORM | EPOLLRDBAND | EPOLLIN | EPOLLHUP | EPOLLERR)
  #define POLLOUT_SET (EPOLLWRBAND | EPOLLWRNORM | EPOLLOUT | EPOLLERR)
  #define POLLEX_SET (EPOLLPRI)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
407

4938d7e02   Eric Dumazet   poll: avoid extra...
408
  static inline void wait_key_set(poll_table *wait, unsigned long in,
2d48d67fa   Eliezer Tamir   net: poll/select ...
409
  				unsigned long out, unsigned long bit,
016994377   Al Viro   annotate poll_tab...
410
  				__poll_t ll_flag)
4938d7e02   Eric Dumazet   poll: avoid extra...
411
  {
2d48d67fa   Eliezer Tamir   net: poll/select ...
412
  	wait->_key = POLLEX_SET | ll_flag;
626cf2366   Hans Verkuil   poll: add poll_re...
413
414
415
416
  	if (in & bit)
  		wait->_key |= POLLIN_SET;
  	if (out & bit)
  		wait->_key |= POLLOUT_SET;
4938d7e02   Eric Dumazet   poll: avoid extra...
417
  }
e99ca56ce   Al Viro   move compat selec...
418
  static int do_select(int n, fd_set_bits *fds, struct timespec64 *end_time)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
419
  {
8ff3e8e85   Arjan van de Ven   select: switch se...
420
  	ktime_t expire, *to = NULL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
421
422
  	struct poll_wqueues table;
  	poll_table *wait;
8ff3e8e85   Arjan van de Ven   select: switch se...
423
  	int retval, i, timed_out = 0;
da8b44d5a   John Stultz   timer: convert ti...
424
  	u64 slack = 0;
016994377   Al Viro   annotate poll_tab...
425
  	__poll_t busy_flag = net_busy_loop_on() ? POLL_BUSY_LOOP : 0;
37056719b   Alexander Duyck   net: Track start ...
426
  	unsigned long busy_start = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
427

b835996f6   Dipankar Sarma   [PATCH] files: lo...
428
  	rcu_read_lock();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
429
  	retval = max_select_fd(n, fds);
b835996f6   Dipankar Sarma   [PATCH] files: lo...
430
  	rcu_read_unlock();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
431
432
433
434
435
436
437
  
  	if (retval < 0)
  		return retval;
  	n = retval;
  
  	poll_initwait(&table);
  	wait = &table.pt;
8ff3e8e85   Arjan van de Ven   select: switch se...
438
  	if (end_time && !end_time->tv_sec && !end_time->tv_nsec) {
626cf2366   Hans Verkuil   poll: add poll_re...
439
  		wait->_qproc = NULL;
8ff3e8e85   Arjan van de Ven   select: switch se...
440
441
  		timed_out = 1;
  	}
96d2ab484   Arjan van de Ven   hrtimer: fix sign...
442
  	if (end_time && !timed_out)
231f3d393   Andrew Morton   select: rename es...
443
  		slack = select_estimate_accuracy(end_time);
90d6e24a3   Arjan van de Ven   hrtimer: make sel...
444

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
445
446
447
  	retval = 0;
  	for (;;) {
  		unsigned long *rinp, *routp, *rexp, *inp, *outp, *exp;
cbf55001b   Eliezer Tamir   net: rename low l...
448
  		bool can_busy_loop = false;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
449

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
450
451
452
453
  		inp = fds->in; outp = fds->out; exp = fds->ex;
  		rinp = fds->res_in; routp = fds->res_out; rexp = fds->res_ex;
  
  		for (i = 0; i < n; ++rinp, ++routp, ++rexp) {
e6c8adca2   Al Viro   anntotate the pla...
454
  			unsigned long in, out, ex, all_bits, bit = 1, j;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
455
  			unsigned long res_in = 0, res_out = 0, res_ex = 0;
e6c8adca2   Al Viro   anntotate the pla...
456
  			__poll_t mask;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
457
458
459
460
  
  			in = *inp++; out = *outp++; ex = *exp++;
  			all_bits = in | out | ex;
  			if (all_bits == 0) {
8ded2bbc1   Josh Boyer   posix_types.h: Cl...
461
  				i += BITS_PER_LONG;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
462
463
  				continue;
  			}
8ded2bbc1   Josh Boyer   posix_types.h: Cl...
464
  			for (j = 0; j < BITS_PER_LONG; ++j, ++i, bit <<= 1) {
2903ff019   Al Viro   switch simple cas...
465
  				struct fd f;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
466
467
468
469
  				if (i >= n)
  					break;
  				if (!(bit & all_bits))
  					continue;
2903ff019   Al Viro   switch simple cas...
470
471
  				f = fdget(i);
  				if (f.file) {
9965ed174   Christoph Hellwig   fs: add new vfs_p...
472
473
474
  					wait_key_set(wait, in, out, bit,
  						     busy_flag);
  					mask = vfs_poll(f.file, wait);
2903ff019   Al Viro   switch simple cas...
475
  					fdput(f);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
476
477
478
  					if ((mask & POLLIN_SET) && (in & bit)) {
  						res_in |= bit;
  						retval++;
626cf2366   Hans Verkuil   poll: add poll_re...
479
  						wait->_qproc = NULL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
480
481
482
483
  					}
  					if ((mask & POLLOUT_SET) && (out & bit)) {
  						res_out |= bit;
  						retval++;
626cf2366   Hans Verkuil   poll: add poll_re...
484
  						wait->_qproc = NULL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
485
486
487
488
  					}
  					if ((mask & POLLEX_SET) && (ex & bit)) {
  						res_ex |= bit;
  						retval++;
626cf2366   Hans Verkuil   poll: add poll_re...
489
  						wait->_qproc = NULL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
490
  					}
2d48d67fa   Eliezer Tamir   net: poll/select ...
491
  					/* got something, stop busy polling */
cbf55001b   Eliezer Tamir   net: rename low l...
492
493
494
495
496
497
498
499
500
501
  					if (retval) {
  						can_busy_loop = false;
  						busy_flag = 0;
  
  					/*
  					 * only remember a returned
  					 * POLL_BUSY_LOOP if we asked for it
  					 */
  					} else if (busy_flag & mask)
  						can_busy_loop = true;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
502
  				}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
503
504
505
506
507
508
509
  			}
  			if (res_in)
  				*rinp = res_in;
  			if (res_out)
  				*routp = res_out;
  			if (res_ex)
  				*rexp = res_ex;
55d853849   Linus Torvalds   Fix performance r...
510
  			cond_resched();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
511
  		}
626cf2366   Hans Verkuil   poll: add poll_re...
512
  		wait->_qproc = NULL;
8ff3e8e85   Arjan van de Ven   select: switch se...
513
  		if (retval || timed_out || signal_pending(current))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
514
  			break;
f5264481c   Pavel Machek   trivial: small cl...
515
  		if (table.error) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
516
517
518
  			retval = table.error;
  			break;
  		}
9f72949f6   David Woodhouse   [PATCH] Add psele...
519

cbf55001b   Eliezer Tamir   net: rename low l...
520
  		/* only if found POLL_BUSY_LOOP sockets && not out of time */
76b1e9b98   Eliezer Tamir   net/fs: change bu...
521
  		if (can_busy_loop && !need_resched()) {
37056719b   Alexander Duyck   net: Track start ...
522
523
  			if (!busy_start) {
  				busy_start = busy_loop_current_time();
76b1e9b98   Eliezer Tamir   net/fs: change bu...
524
525
  				continue;
  			}
37056719b   Alexander Duyck   net: Track start ...
526
  			if (!busy_loop_timeout(busy_start))
76b1e9b98   Eliezer Tamir   net/fs: change bu...
527
528
529
  				continue;
  		}
  		busy_flag = 0;
2d48d67fa   Eliezer Tamir   net: poll/select ...
530

8ff3e8e85   Arjan van de Ven   select: switch se...
531
532
533
534
535
536
  		/*
  		 * If this is the first loop and we have a timeout
  		 * given, then we convert to ktime_t and set the to
  		 * pointer to the expiry value.
  		 */
  		if (end_time && !to) {
766b9f928   Deepa Dinamani   fs: poll/select/r...
537
  			expire = timespec64_to_ktime(*end_time);
8ff3e8e85   Arjan van de Ven   select: switch se...
538
  			to = &expire;
9f72949f6   David Woodhouse   [PATCH] Add psele...
539
  		}
8ff3e8e85   Arjan van de Ven   select: switch se...
540

5f820f648   Tejun Heo   poll: allow f_op-...
541
542
  		if (!poll_schedule_timeout(&table, TASK_INTERRUPTIBLE,
  					   to, slack))
8ff3e8e85   Arjan van de Ven   select: switch se...
543
  			timed_out = 1;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
544
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
545
546
  
  	poll_freewait(&table);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
547
548
  	return retval;
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
549
550
551
552
553
554
555
556
  /*
   * We can actually return ERESTARTSYS instead of EINTR, but I'd
   * like to be certain this leads to no problems. So I return
   * EINTR just for safety.
   *
   * Update: ERESTARTSYS breaks at least the xview clock binary, so
   * I'm trying ERESTARTNOHAND which restart only when you want to.
   */
a2dcb44c3   Al Viro   [PATCH] make osf_...
557
  int core_sys_select(int n, fd_set __user *inp, fd_set __user *outp,
766b9f928   Deepa Dinamani   fs: poll/select/r...
558
  			   fd_set __user *exp, struct timespec64 *end_time)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
559
560
  {
  	fd_set_bits fds;
29ff2db55   Andrew Morton   [PATCH] select() ...
561
  	void *bits;
bbea9f696   Vadim Lobanov   [PATCH] fdtable: ...
562
  	int ret, max_fds;
2d19309cf   Vlastimil Babka   fs/select: add vm...
563
  	size_t size, alloc_size;
badf16621   Dipankar Sarma   [PATCH] files: br...
564
  	struct fdtable *fdt;
70674f95c   Andi Kleen   [PATCH] Optimize ...
565
  	/* Allocate small arguments on the stack to save memory and be faster */
30c14e40e   Jes Sorensen   [PATCH] avoid una...
566
  	long stack_fds[SELECT_STACK_ALLOC/sizeof(long)];
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
567

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
568
569
570
  	ret = -EINVAL;
  	if (n < 0)
  		goto out_nofds;
bbea9f696   Vadim Lobanov   [PATCH] fdtable: ...
571
  	/* max_fds can increase, so grab it once to avoid race */
b835996f6   Dipankar Sarma   [PATCH] files: lo...
572
  	rcu_read_lock();
badf16621   Dipankar Sarma   [PATCH] files: br...
573
  	fdt = files_fdtable(current->files);
bbea9f696   Vadim Lobanov   [PATCH] fdtable: ...
574
  	max_fds = fdt->max_fds;
b835996f6   Dipankar Sarma   [PATCH] files: lo...
575
  	rcu_read_unlock();
bbea9f696   Vadim Lobanov   [PATCH] fdtable: ...
576
577
  	if (n > max_fds)
  		n = max_fds;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
578
579
580
581
582
583
  
  	/*
  	 * We need 6 bitmaps (in/out/ex for both incoming and outgoing),
  	 * since we used fdset we need to allocate memory in units of
  	 * long-words. 
  	 */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
584
  	size = FDS_BYTES(n);
b04eb6aa0   Mitchell Blank Jr   [PATCH] select: d...
585
586
587
588
  	bits = stack_fds;
  	if (size > sizeof(stack_fds) / 6) {
  		/* Not enough space in on-stack array; must use kmalloc */
  		ret = -ENOMEM;
2d19309cf   Vlastimil Babka   fs/select: add vm...
589
590
591
592
  		if (size > (SIZE_MAX / 6))
  			goto out_nofds;
  
  		alloc_size = 6 * size;
752ade68c   Michal Hocko   treewide: use kv[...
593
  		bits = kvmalloc(alloc_size, GFP_KERNEL);
b04eb6aa0   Mitchell Blank Jr   [PATCH] select: d...
594
595
596
  		if (!bits)
  			goto out_nofds;
  	}
29ff2db55   Andrew Morton   [PATCH] select() ...
597
598
599
600
601
602
  	fds.in      = bits;
  	fds.out     = bits +   size;
  	fds.ex      = bits + 2*size;
  	fds.res_in  = bits + 3*size;
  	fds.res_out = bits + 4*size;
  	fds.res_ex  = bits + 5*size;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
603
604
605
606
607
608
609
610
  
  	if ((ret = get_fd_set(n, inp, fds.in)) ||
  	    (ret = get_fd_set(n, outp, fds.out)) ||
  	    (ret = get_fd_set(n, exp, fds.ex)))
  		goto out;
  	zero_fd_set(n, fds.res_in);
  	zero_fd_set(n, fds.res_out);
  	zero_fd_set(n, fds.res_ex);
8ff3e8e85   Arjan van de Ven   select: switch se...
611
  	ret = do_select(n, &fds, end_time);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
  
  	if (ret < 0)
  		goto out;
  	if (!ret) {
  		ret = -ERESTARTNOHAND;
  		if (signal_pending(current))
  			goto out;
  		ret = 0;
  	}
  
  	if (set_fd_set(n, inp, fds.res_in) ||
  	    set_fd_set(n, outp, fds.res_out) ||
  	    set_fd_set(n, exp, fds.res_ex))
  		ret = -EFAULT;
  
  out:
70674f95c   Andi Kleen   [PATCH] Optimize ...
628
  	if (bits != stack_fds)
2d19309cf   Vlastimil Babka   fs/select: add vm...
629
  		kvfree(bits);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
630
631
632
  out_nofds:
  	return ret;
  }
4bdb9acab   Dominik Brodowski   fs: add kern_sele...
633
634
  static int kern_select(int n, fd_set __user *inp, fd_set __user *outp,
  		       fd_set __user *exp, struct timeval __user *tvp)
9f72949f6   David Woodhouse   [PATCH] Add psele...
635
  {
766b9f928   Deepa Dinamani   fs: poll/select/r...
636
  	struct timespec64 end_time, *to = NULL;
9f72949f6   David Woodhouse   [PATCH] Add psele...
637
638
639
640
641
642
  	struct timeval tv;
  	int ret;
  
  	if (tvp) {
  		if (copy_from_user(&tv, tvp, sizeof(tv)))
  			return -EFAULT;
8ff3e8e85   Arjan van de Ven   select: switch se...
643
  		to = &end_time;
4d36a9e65   Arjan van de Ven   select: deal with...
644
645
646
  		if (poll_select_set_timeout(to,
  				tv.tv_sec + (tv.tv_usec / USEC_PER_SEC),
  				(tv.tv_usec % USEC_PER_SEC) * NSEC_PER_USEC))
9f72949f6   David Woodhouse   [PATCH] Add psele...
647
  			return -EINVAL;
9f72949f6   David Woodhouse   [PATCH] Add psele...
648
  	}
8ff3e8e85   Arjan van de Ven   select: switch se...
649
650
  	ret = core_sys_select(n, inp, outp, exp, to);
  	ret = poll_select_copy_remaining(&end_time, tvp, 1, ret);
9f72949f6   David Woodhouse   [PATCH] Add psele...
651
652
653
  
  	return ret;
  }
4bdb9acab   Dominik Brodowski   fs: add kern_sele...
654
655
656
657
658
  SYSCALL_DEFINE5(select, int, n, fd_set __user *, inp, fd_set __user *, outp,
  		fd_set __user *, exp, struct timeval __user *, tvp)
  {
  	return kern_select(n, inp, outp, exp, tvp);
  }
c9da9f212   Heiko Carstens   [CVE-2009-0029] M...
659
660
661
  static long do_pselect(int n, fd_set __user *inp, fd_set __user *outp,
  		       fd_set __user *exp, struct timespec __user *tsp,
  		       const sigset_t __user *sigmask, size_t sigsetsize)
9f72949f6   David Woodhouse   [PATCH] Add psele...
662
  {
9f72949f6   David Woodhouse   [PATCH] Add psele...
663
  	sigset_t ksigmask, sigsaved;
36819ad09   Deepa Dinamani   select: Use get/p...
664
  	struct timespec64 ts, end_time, *to = NULL;
9f72949f6   David Woodhouse   [PATCH] Add psele...
665
666
667
  	int ret;
  
  	if (tsp) {
36819ad09   Deepa Dinamani   select: Use get/p...
668
  		if (get_timespec64(&ts, tsp))
9f72949f6   David Woodhouse   [PATCH] Add psele...
669
  			return -EFAULT;
8ff3e8e85   Arjan van de Ven   select: switch se...
670
  		to = &end_time;
36819ad09   Deepa Dinamani   select: Use get/p...
671
  		if (poll_select_set_timeout(to, ts.tv_sec, ts.tv_nsec))
9f72949f6   David Woodhouse   [PATCH] Add psele...
672
  			return -EINVAL;
9f72949f6   David Woodhouse   [PATCH] Add psele...
673
674
675
676
677
678
679
680
681
682
683
684
  	}
  
  	if (sigmask) {
  		/* XXX: Don't preclude handling different sized sigset_t's.  */
  		if (sigsetsize != sizeof(sigset_t))
  			return -EINVAL;
  		if (copy_from_user(&ksigmask, sigmask, sizeof(ksigmask)))
  			return -EFAULT;
  
  		sigdelsetmask(&ksigmask, sigmask(SIGKILL)|sigmask(SIGSTOP));
  		sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved);
  	}
62568510b   Bernd Schmidt   Fix timeouts in s...
685
  	ret = core_sys_select(n, inp, outp, exp, to);
8ff3e8e85   Arjan van de Ven   select: switch se...
686
  	ret = poll_select_copy_remaining(&end_time, tsp, 0, ret);
9f72949f6   David Woodhouse   [PATCH] Add psele...
687
688
689
690
691
692
693
694
695
696
  
  	if (ret == -ERESTARTNOHAND) {
  		/*
  		 * Don't restore the signal mask yet. Let do_signal() deliver
  		 * the signal on the way back to userspace, before the signal
  		 * mask is restored.
  		 */
  		if (sigmask) {
  			memcpy(&current->saved_sigmask, &sigsaved,
  					sizeof(sigsaved));
4e4c22c71   Roland McGrath   signals: add set_...
697
  			set_restore_sigmask();
9f72949f6   David Woodhouse   [PATCH] Add psele...
698
699
700
701
702
703
704
705
706
707
708
709
710
  		}
  	} else if (sigmask)
  		sigprocmask(SIG_SETMASK, &sigsaved, NULL);
  
  	return ret;
  }
  
  /*
   * Most architectures can't handle 7-argument syscalls. So we provide a
   * 6-argument version where the sixth argument is a pointer to a structure
   * which has a pointer to the sigset_t itself followed by a size_t containing
   * the sigset size.
   */
d4e82042c   Heiko Carstens   [CVE-2009-0029] S...
711
712
713
  SYSCALL_DEFINE6(pselect6, int, n, fd_set __user *, inp, fd_set __user *, outp,
  		fd_set __user *, exp, struct timespec __user *, tsp,
  		void __user *, sig)
9f72949f6   David Woodhouse   [PATCH] Add psele...
714
715
716
717
718
719
  {
  	size_t sigsetsize = 0;
  	sigset_t __user *up = NULL;
  
  	if (sig) {
  		if (!access_ok(VERIFY_READ, sig, sizeof(void *)+sizeof(size_t))
e110ab94e   Al Viro   [PATCH] fix __use...
720
  		    || __get_user(up, (sigset_t __user * __user *)sig)
9f72949f6   David Woodhouse   [PATCH] Add psele...
721
  		    || __get_user(sigsetsize,
e110ab94e   Al Viro   [PATCH] fix __use...
722
  				(size_t __user *)(sig+sizeof(void *))))
9f72949f6   David Woodhouse   [PATCH] Add psele...
723
724
  			return -EFAULT;
  	}
c9da9f212   Heiko Carstens   [CVE-2009-0029] M...
725
  	return do_pselect(n, inp, outp, exp, tsp, up, sigsetsize);
9f72949f6   David Woodhouse   [PATCH] Add psele...
726
  }
9f72949f6   David Woodhouse   [PATCH] Add psele...
727

5d0e52830   Christoph Hellwig   Add generic sys_o...
728
729
730
731
732
733
734
735
736
737
738
739
740
  #ifdef __ARCH_WANT_SYS_OLD_SELECT
  struct sel_arg_struct {
  	unsigned long n;
  	fd_set __user *inp, *outp, *exp;
  	struct timeval __user *tvp;
  };
  
  SYSCALL_DEFINE1(old_select, struct sel_arg_struct __user *, arg)
  {
  	struct sel_arg_struct a;
  
  	if (copy_from_user(&a, arg, sizeof(a)))
  		return -EFAULT;
4bdb9acab   Dominik Brodowski   fs: add kern_sele...
741
  	return kern_select(a.n, a.inp, a.outp, a.exp, a.tvp);
5d0e52830   Christoph Hellwig   Add generic sys_o...
742
743
  }
  #endif
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
744
745
746
747
748
749
750
  struct poll_list {
  	struct poll_list *next;
  	int len;
  	struct pollfd entries[0];
  };
  
  #define POLLFD_PER_PAGE  ((PAGE_SIZE-sizeof(struct poll_list)) / sizeof(struct pollfd))
4a4b69f79   Vadim Lobanov   [PATCH] Poll clea...
751
752
753
754
755
  /*
   * Fish for pollable events on the pollfd->fd file descriptor. We're only
   * interested in events matching the pollfd->events mask, and the result
   * matching that mask is both recorded in pollfd->revents and returned. The
   * pwait poll_table will be used by the fd-provided poll handler for waiting,
626cf2366   Hans Verkuil   poll: add poll_re...
756
   * if pwait->_qproc is non-NULL.
4a4b69f79   Vadim Lobanov   [PATCH] Poll clea...
757
   */
fb3679372   Al Viro   annotate poll(2) ...
758
  static inline __poll_t do_pollfd(struct pollfd *pollfd, poll_table *pwait,
cbf55001b   Eliezer Tamir   net: rename low l...
759
  				     bool *can_busy_poll,
fb3679372   Al Viro   annotate poll(2) ...
760
  				     __poll_t busy_flag)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
761
  {
a0f8dcfc6   Christoph Hellwig   fs: cleanup do_po...
762
763
764
765
766
767
768
769
770
771
772
773
774
  	int fd = pollfd->fd;
  	__poll_t mask = 0, filter;
  	struct fd f;
  
  	if (fd < 0)
  		goto out;
  	mask = EPOLLNVAL;
  	f = fdget(fd);
  	if (!f.file)
  		goto out;
  
  	/* userland u16 ->events contains POLL... bitmap */
  	filter = demangle_poll(pollfd->events) | EPOLLERR | EPOLLHUP;
9965ed174   Christoph Hellwig   fs: add new vfs_p...
775
776
777
778
  	pwait->_key = filter | busy_flag;
  	mask = vfs_poll(f.file, pwait);
  	if (mask & busy_flag)
  		*can_busy_poll = true;
a0f8dcfc6   Christoph Hellwig   fs: cleanup do_po...
779
780
781
782
  	mask &= filter;		/* Mask out unneeded events. */
  	fdput(f);
  
  out:
fb3679372   Al Viro   annotate poll(2) ...
783
  	/* ... and so does ->revents */
c71d227fc   Al Viro   make kernel-side ...
784
  	pollfd->revents = mangle_poll(mask);
4a4b69f79   Vadim Lobanov   [PATCH] Poll clea...
785
  	return mask;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
786
  }
ccec5ee30   Mateusz Guzik   poll: plug an unu...
787
  static int do_poll(struct poll_list *list, struct poll_wqueues *wait,
766b9f928   Deepa Dinamani   fs: poll/select/r...
788
  		   struct timespec64 *end_time)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
789
  {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
790
  	poll_table* pt = &wait->pt;
8ff3e8e85   Arjan van de Ven   select: switch se...
791
792
  	ktime_t expire, *to = NULL;
  	int timed_out = 0, count = 0;
da8b44d5a   John Stultz   timer: convert ti...
793
  	u64 slack = 0;
fb3679372   Al Viro   annotate poll(2) ...
794
  	__poll_t busy_flag = net_busy_loop_on() ? POLL_BUSY_LOOP : 0;
37056719b   Alexander Duyck   net: Track start ...
795
  	unsigned long busy_start = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
796

9f72949f6   David Woodhouse   [PATCH] Add psele...
797
  	/* Optimise the no-wait case */
8ff3e8e85   Arjan van de Ven   select: switch se...
798
  	if (end_time && !end_time->tv_sec && !end_time->tv_nsec) {
626cf2366   Hans Verkuil   poll: add poll_re...
799
  		pt->_qproc = NULL;
8ff3e8e85   Arjan van de Ven   select: switch se...
800
801
  		timed_out = 1;
  	}
9bf084f70   Oleg Nesterov   do_poll: return -...
802

96d2ab484   Arjan van de Ven   hrtimer: fix sign...
803
  	if (end_time && !timed_out)
231f3d393   Andrew Morton   select: rename es...
804
  		slack = select_estimate_accuracy(end_time);
90d6e24a3   Arjan van de Ven   hrtimer: make sel...
805

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
806
807
  	for (;;) {
  		struct poll_list *walk;
cbf55001b   Eliezer Tamir   net: rename low l...
808
  		bool can_busy_loop = false;
9f72949f6   David Woodhouse   [PATCH] Add psele...
809

4a4b69f79   Vadim Lobanov   [PATCH] Poll clea...
810
811
812
813
814
815
816
817
  		for (walk = list; walk != NULL; walk = walk->next) {
  			struct pollfd * pfd, * pfd_end;
  
  			pfd = walk->entries;
  			pfd_end = pfd + walk->len;
  			for (; pfd != pfd_end; pfd++) {
  				/*
  				 * Fish for events. If we found one, record it
626cf2366   Hans Verkuil   poll: add poll_re...
818
  				 * and kill poll_table->_qproc, so we don't
4a4b69f79   Vadim Lobanov   [PATCH] Poll clea...
819
820
821
822
  				 * needlessly register any other waiters after
  				 * this. They'll get immediately deregistered
  				 * when we break out and return.
  				 */
cbf55001b   Eliezer Tamir   net: rename low l...
823
824
  				if (do_pollfd(pfd, pt, &can_busy_loop,
  					      busy_flag)) {
4a4b69f79   Vadim Lobanov   [PATCH] Poll clea...
825
  					count++;
626cf2366   Hans Verkuil   poll: add poll_re...
826
  					pt->_qproc = NULL;
cbf55001b   Eliezer Tamir   net: rename low l...
827
828
829
  					/* found something, stop busy polling */
  					busy_flag = 0;
  					can_busy_loop = false;
4a4b69f79   Vadim Lobanov   [PATCH] Poll clea...
830
831
  				}
  			}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
832
  		}
4a4b69f79   Vadim Lobanov   [PATCH] Poll clea...
833
834
  		/*
  		 * All waiters have already been registered, so don't provide
626cf2366   Hans Verkuil   poll: add poll_re...
835
  		 * a poll_table->_qproc to them on the next loop iteration.
4a4b69f79   Vadim Lobanov   [PATCH] Poll clea...
836
  		 */
626cf2366   Hans Verkuil   poll: add poll_re...
837
  		pt->_qproc = NULL;
9bf084f70   Oleg Nesterov   do_poll: return -...
838
839
840
841
842
  		if (!count) {
  			count = wait->error;
  			if (signal_pending(current))
  				count = -EINTR;
  		}
8ff3e8e85   Arjan van de Ven   select: switch se...
843
  		if (count || timed_out)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
844
  			break;
9f72949f6   David Woodhouse   [PATCH] Add psele...
845

cbf55001b   Eliezer Tamir   net: rename low l...
846
  		/* only if found POLL_BUSY_LOOP sockets && not out of time */
76b1e9b98   Eliezer Tamir   net/fs: change bu...
847
  		if (can_busy_loop && !need_resched()) {
37056719b   Alexander Duyck   net: Track start ...
848
849
  			if (!busy_start) {
  				busy_start = busy_loop_current_time();
76b1e9b98   Eliezer Tamir   net/fs: change bu...
850
851
  				continue;
  			}
37056719b   Alexander Duyck   net: Track start ...
852
  			if (!busy_loop_timeout(busy_start))
76b1e9b98   Eliezer Tamir   net/fs: change bu...
853
854
855
  				continue;
  		}
  		busy_flag = 0;
91e2fd337   Eliezer Tamir   net: avoid callin...
856

8ff3e8e85   Arjan van de Ven   select: switch se...
857
858
859
860
861
862
  		/*
  		 * If this is the first loop and we have a timeout
  		 * given, then we convert to ktime_t and set the to
  		 * pointer to the expiry value.
  		 */
  		if (end_time && !to) {
766b9f928   Deepa Dinamani   fs: poll/select/r...
863
  			expire = timespec64_to_ktime(*end_time);
8ff3e8e85   Arjan van de Ven   select: switch se...
864
  			to = &expire;
9f72949f6   David Woodhouse   [PATCH] Add psele...
865
  		}
5f820f648   Tejun Heo   poll: allow f_op-...
866
  		if (!poll_schedule_timeout(wait, TASK_INTERRUPTIBLE, to, slack))
8ff3e8e85   Arjan van de Ven   select: switch se...
867
  			timed_out = 1;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
868
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
869
870
  	return count;
  }
70674f95c   Andi Kleen   [PATCH] Optimize ...
871
872
  #define N_STACK_PPS ((sizeof(stack_pps) - sizeof(struct poll_list))  / \
  			sizeof(struct pollfd))
e99ca56ce   Al Viro   move compat selec...
873
  static int do_sys_poll(struct pollfd __user *ufds, unsigned int nfds,
766b9f928   Deepa Dinamani   fs: poll/select/r...
874
  		struct timespec64 *end_time)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
875
876
  {
  	struct poll_wqueues table;
252e5725c   Oleg Nesterov   do_sys_poll: simp...
877
   	int err = -EFAULT, fdcount, len, size;
30c14e40e   Jes Sorensen   [PATCH] avoid una...
878
879
880
881
  	/* Allocate small arguments on the stack to save memory and be
  	   faster - use long to make sure the buffer is aligned properly
  	   on 64 bit archs to avoid unaligned access */
  	long stack_pps[POLL_STACK_ALLOC/sizeof(long)];
252e5725c   Oleg Nesterov   do_sys_poll: simp...
882
883
884
  	struct poll_list *const head = (struct poll_list *)stack_pps;
   	struct poll_list *walk = head;
   	unsigned long todo = nfds;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
885

d554ed895   Jiri Slaby   fs: use rlimit he...
886
  	if (nfds > rlimit(RLIMIT_NOFILE))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
887
  		return -EINVAL;
252e5725c   Oleg Nesterov   do_sys_poll: simp...
888
889
890
891
892
893
  	len = min_t(unsigned int, nfds, N_STACK_PPS);
  	for (;;) {
  		walk->next = NULL;
  		walk->len = len;
  		if (!len)
  			break;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
894

252e5725c   Oleg Nesterov   do_sys_poll: simp...
895
896
897
898
899
900
901
  		if (copy_from_user(walk->entries, ufds + nfds-todo,
  					sizeof(struct pollfd) * walk->len))
  			goto out_fds;
  
  		todo -= walk->len;
  		if (!todo)
  			break;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
902

252e5725c   Oleg Nesterov   do_sys_poll: simp...
903
904
905
906
907
  		len = min(todo, POLLFD_PER_PAGE);
  		size = sizeof(struct poll_list) + sizeof(struct pollfd) * len;
  		walk = walk->next = kmalloc(size, GFP_KERNEL);
  		if (!walk) {
  			err = -ENOMEM;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
908
909
  			goto out_fds;
  		}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
910
  	}
9f72949f6   David Woodhouse   [PATCH] Add psele...
911

252e5725c   Oleg Nesterov   do_sys_poll: simp...
912
  	poll_initwait(&table);
ccec5ee30   Mateusz Guzik   poll: plug an unu...
913
  	fdcount = do_poll(head, &table, end_time);
252e5725c   Oleg Nesterov   do_sys_poll: simp...
914
  	poll_freewait(&table);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
915

252e5725c   Oleg Nesterov   do_sys_poll: simp...
916
  	for (walk = head; walk; walk = walk->next) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
917
918
  		struct pollfd *fds = walk->entries;
  		int j;
252e5725c   Oleg Nesterov   do_sys_poll: simp...
919
920
  		for (j = 0; j < walk->len; j++, ufds++)
  			if (__put_user(fds[j].revents, &ufds->revents))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
921
  				goto out_fds;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
922
    	}
252e5725c   Oleg Nesterov   do_sys_poll: simp...
923

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
924
  	err = fdcount;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
925
  out_fds:
252e5725c   Oleg Nesterov   do_sys_poll: simp...
926
927
928
929
930
  	walk = head->next;
  	while (walk) {
  		struct poll_list *pos = walk;
  		walk = walk->next;
  		kfree(pos);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
931
  	}
252e5725c   Oleg Nesterov   do_sys_poll: simp...
932

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
933
934
  	return err;
  }
9f72949f6   David Woodhouse   [PATCH] Add psele...
935

3075d9da0   Chris Wright   Use ERESTART_REST...
936
937
  static long do_restart_poll(struct restart_block *restart_block)
  {
8ff3e8e85   Arjan van de Ven   select: switch se...
938
939
  	struct pollfd __user *ufds = restart_block->poll.ufds;
  	int nfds = restart_block->poll.nfds;
766b9f928   Deepa Dinamani   fs: poll/select/r...
940
  	struct timespec64 *to = NULL, end_time;
3075d9da0   Chris Wright   Use ERESTART_REST...
941
  	int ret;
8ff3e8e85   Arjan van de Ven   select: switch se...
942
943
944
945
946
947
948
  	if (restart_block->poll.has_timeout) {
  		end_time.tv_sec = restart_block->poll.tv_sec;
  		end_time.tv_nsec = restart_block->poll.tv_nsec;
  		to = &end_time;
  	}
  
  	ret = do_sys_poll(ufds, nfds, to);
3075d9da0   Chris Wright   Use ERESTART_REST...
949
950
  	if (ret == -EINTR) {
  		restart_block->fn = do_restart_poll;
3075d9da0   Chris Wright   Use ERESTART_REST...
951
952
953
954
  		ret = -ERESTART_RESTARTBLOCK;
  	}
  	return ret;
  }
5a8a82b1d   Heiko Carstens   [CVE-2009-0029] S...
955
  SYSCALL_DEFINE3(poll, struct pollfd __user *, ufds, unsigned int, nfds,
faf309009   Linus Torvalds   sys_poll: fix inc...
956
  		int, timeout_msecs)
9f72949f6   David Woodhouse   [PATCH] Add psele...
957
  {
766b9f928   Deepa Dinamani   fs: poll/select/r...
958
  	struct timespec64 end_time, *to = NULL;
3075d9da0   Chris Wright   Use ERESTART_REST...
959
  	int ret;
9f72949f6   David Woodhouse   [PATCH] Add psele...
960

8ff3e8e85   Arjan van de Ven   select: switch se...
961
962
963
964
  	if (timeout_msecs >= 0) {
  		to = &end_time;
  		poll_select_set_timeout(to, timeout_msecs / MSEC_PER_SEC,
  			NSEC_PER_MSEC * (timeout_msecs % MSEC_PER_SEC));
9f72949f6   David Woodhouse   [PATCH] Add psele...
965
  	}
8ff3e8e85   Arjan van de Ven   select: switch se...
966
  	ret = do_sys_poll(ufds, nfds, to);
3075d9da0   Chris Wright   Use ERESTART_REST...
967
968
  	if (ret == -EINTR) {
  		struct restart_block *restart_block;
8ff3e8e85   Arjan van de Ven   select: switch se...
969

f56141e3e   Andy Lutomirski   all arches, signa...
970
  		restart_block = &current->restart_block;
3075d9da0   Chris Wright   Use ERESTART_REST...
971
  		restart_block->fn = do_restart_poll;
8ff3e8e85   Arjan van de Ven   select: switch se...
972
973
974
975
976
977
978
979
980
  		restart_block->poll.ufds = ufds;
  		restart_block->poll.nfds = nfds;
  
  		if (timeout_msecs >= 0) {
  			restart_block->poll.tv_sec = end_time.tv_sec;
  			restart_block->poll.tv_nsec = end_time.tv_nsec;
  			restart_block->poll.has_timeout = 1;
  		} else
  			restart_block->poll.has_timeout = 0;
3075d9da0   Chris Wright   Use ERESTART_REST...
981
982
983
  		ret = -ERESTART_RESTARTBLOCK;
  	}
  	return ret;
9f72949f6   David Woodhouse   [PATCH] Add psele...
984
  }
d4e82042c   Heiko Carstens   [CVE-2009-0029] S...
985
986
987
  SYSCALL_DEFINE5(ppoll, struct pollfd __user *, ufds, unsigned int, nfds,
  		struct timespec __user *, tsp, const sigset_t __user *, sigmask,
  		size_t, sigsetsize)
9f72949f6   David Woodhouse   [PATCH] Add psele...
988
989
  {
  	sigset_t ksigmask, sigsaved;
36819ad09   Deepa Dinamani   select: Use get/p...
990
  	struct timespec64 ts, end_time, *to = NULL;
9f72949f6   David Woodhouse   [PATCH] Add psele...
991
992
993
  	int ret;
  
  	if (tsp) {
36819ad09   Deepa Dinamani   select: Use get/p...
994
  		if (get_timespec64(&ts, tsp))
9f72949f6   David Woodhouse   [PATCH] Add psele...
995
  			return -EFAULT;
8ff3e8e85   Arjan van de Ven   select: switch se...
996
997
998
  		to = &end_time;
  		if (poll_select_set_timeout(to, ts.tv_sec, ts.tv_nsec))
  			return -EINVAL;
9f72949f6   David Woodhouse   [PATCH] Add psele...
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
  	}
  
  	if (sigmask) {
  		/* XXX: Don't preclude handling different sized sigset_t's.  */
  		if (sigsetsize != sizeof(sigset_t))
  			return -EINVAL;
  		if (copy_from_user(&ksigmask, sigmask, sizeof(ksigmask)))
  			return -EFAULT;
  
  		sigdelsetmask(&ksigmask, sigmask(SIGKILL)|sigmask(SIGSTOP));
  		sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved);
  	}
8ff3e8e85   Arjan van de Ven   select: switch se...
1011
  	ret = do_sys_poll(ufds, nfds, to);
9f72949f6   David Woodhouse   [PATCH] Add psele...
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
  
  	/* We can restart this syscall, usually */
  	if (ret == -EINTR) {
  		/*
  		 * Don't restore the signal mask yet. Let do_signal() deliver
  		 * the signal on the way back to userspace, before the signal
  		 * mask is restored.
  		 */
  		if (sigmask) {
  			memcpy(&current->saved_sigmask, &sigsaved,
  					sizeof(sigsaved));
4e4c22c71   Roland McGrath   signals: add set_...
1023
  			set_restore_sigmask();
9f72949f6   David Woodhouse   [PATCH] Add psele...
1024
1025
1026
1027
  		}
  		ret = -ERESTARTNOHAND;
  	} else if (sigmask)
  		sigprocmask(SIG_SETMASK, &sigsaved, NULL);
8ff3e8e85   Arjan van de Ven   select: switch se...
1028
  	ret = poll_select_copy_remaining(&end_time, tsp, 0, ret);
9f72949f6   David Woodhouse   [PATCH] Add psele...
1029
1030
1031
  
  	return ret;
  }
e99ca56ce   Al Viro   move compat selec...
1032
1033
1034
1035
1036
  
  #ifdef CONFIG_COMPAT
  #define __COMPAT_NFDBITS       (8 * sizeof(compat_ulong_t))
  
  static
36819ad09   Deepa Dinamani   select: Use get/p...
1037
  int compat_poll_select_copy_remaining(struct timespec64 *end_time, void __user *p,
e99ca56ce   Al Viro   move compat selec...
1038
1039
  				      int timeval, int ret)
  {
36819ad09   Deepa Dinamani   select: Use get/p...
1040
  	struct timespec64 ts;
e99ca56ce   Al Viro   move compat selec...
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
  
  	if (!p)
  		return ret;
  
  	if (current->personality & STICKY_TIMEOUTS)
  		goto sticky;
  
  	/* No update for zero timeout */
  	if (!end_time->tv_sec && !end_time->tv_nsec)
  		return ret;
36819ad09   Deepa Dinamani   select: Use get/p...
1051
1052
  	ktime_get_ts64(&ts);
  	ts = timespec64_sub(*end_time, ts);
e99ca56ce   Al Viro   move compat selec...
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
  	if (ts.tv_sec < 0)
  		ts.tv_sec = ts.tv_nsec = 0;
  
  	if (timeval) {
  		struct compat_timeval rtv;
  
  		rtv.tv_sec = ts.tv_sec;
  		rtv.tv_usec = ts.tv_nsec / NSEC_PER_USEC;
  
  		if (!copy_to_user(p, &rtv, sizeof(rtv)))
  			return ret;
  	} else {
36819ad09   Deepa Dinamani   select: Use get/p...
1065
  		if (!compat_put_timespec64(&ts, p))
e99ca56ce   Al Viro   move compat selec...
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
  			return ret;
  	}
  	/*
  	 * If an application puts its timeval in read-only memory, we
  	 * don't want the Linux-specific update to the timeval to
  	 * cause a fault after the select has completed
  	 * successfully. However, because we're not updating the
  	 * timeval, we can't restart the system call.
  	 */
  
  sticky:
  	if (ret == -ERESTARTNOHAND)
  		ret = -EINTR;
  	return ret;
  }
  
  /*
   * Ooo, nasty.  We need here to frob 32-bit unsigned longs to
   * 64-bit unsigned longs.
   */
  static
  int compat_get_fd_set(unsigned long nr, compat_ulong_t __user *ufdset,
  			unsigned long *fdset)
  {
e99ca56ce   Al Viro   move compat selec...
1090
  	if (ufdset) {
464d62421   Al Viro   select: switch co...
1091
  		return compat_get_bitmap(fdset, ufdset, nr);
e99ca56ce   Al Viro   move compat selec...
1092
  	} else {
79de3cbe9   Helge Deller   fs/select: Fix me...
1093
  		zero_fd_set(nr, fdset);
464d62421   Al Viro   select: switch co...
1094
  		return 0;
e99ca56ce   Al Viro   move compat selec...
1095
  	}
e99ca56ce   Al Viro   move compat selec...
1096
1097
1098
1099
1100
1101
  }
  
  static
  int compat_set_fd_set(unsigned long nr, compat_ulong_t __user *ufdset,
  		      unsigned long *fdset)
  {
e99ca56ce   Al Viro   move compat selec...
1102
1103
  	if (!ufdset)
  		return 0;
464d62421   Al Viro   select: switch co...
1104
  	return compat_put_bitmap(ufdset, fdset, nr);
e99ca56ce   Al Viro   move compat selec...
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
  }
  
  
  /*
   * This is a virtual copy of sys_select from fs/select.c and probably
   * should be compared to it from time to time
   */
  
  /*
   * We can actually return ERESTARTSYS instead of EINTR, but I'd
   * like to be certain this leads to no problems. So I return
   * EINTR just for safety.
   *
   * Update: ERESTARTSYS breaks at least the xview clock binary, so
   * I'm trying ERESTARTNOHAND which restart only when you want to.
   */
  static int compat_core_sys_select(int n, compat_ulong_t __user *inp,
  	compat_ulong_t __user *outp, compat_ulong_t __user *exp,
36819ad09   Deepa Dinamani   select: Use get/p...
1123
  	struct timespec64 *end_time)
e99ca56ce   Al Viro   move compat selec...
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
  {
  	fd_set_bits fds;
  	void *bits;
  	int size, max_fds, ret = -EINVAL;
  	struct fdtable *fdt;
  	long stack_fds[SELECT_STACK_ALLOC/sizeof(long)];
  
  	if (n < 0)
  		goto out_nofds;
  
  	/* max_fds can increase, so grab it once to avoid race */
  	rcu_read_lock();
  	fdt = files_fdtable(current->files);
  	max_fds = fdt->max_fds;
  	rcu_read_unlock();
  	if (n > max_fds)
  		n = max_fds;
  
  	/*
  	 * We need 6 bitmaps (in/out/ex for both incoming and outgoing),
  	 * since we used fdset we need to allocate memory in units of
  	 * long-words.
  	 */
  	size = FDS_BYTES(n);
  	bits = stack_fds;
  	if (size > sizeof(stack_fds) / 6) {
6da2ec560   Kees Cook   treewide: kmalloc...
1150
  		bits = kmalloc_array(6, size, GFP_KERNEL);
e99ca56ce   Al Viro   move compat selec...
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
  		ret = -ENOMEM;
  		if (!bits)
  			goto out_nofds;
  	}
  	fds.in      = (unsigned long *)  bits;
  	fds.out     = (unsigned long *) (bits +   size);
  	fds.ex      = (unsigned long *) (bits + 2*size);
  	fds.res_in  = (unsigned long *) (bits + 3*size);
  	fds.res_out = (unsigned long *) (bits + 4*size);
  	fds.res_ex  = (unsigned long *) (bits + 5*size);
  
  	if ((ret = compat_get_fd_set(n, inp, fds.in)) ||
  	    (ret = compat_get_fd_set(n, outp, fds.out)) ||
  	    (ret = compat_get_fd_set(n, exp, fds.ex)))
  		goto out;
  	zero_fd_set(n, fds.res_in);
  	zero_fd_set(n, fds.res_out);
  	zero_fd_set(n, fds.res_ex);
  
  	ret = do_select(n, &fds, end_time);
  
  	if (ret < 0)
  		goto out;
  	if (!ret) {
  		ret = -ERESTARTNOHAND;
  		if (signal_pending(current))
  			goto out;
  		ret = 0;
  	}
  
  	if (compat_set_fd_set(n, inp, fds.res_in) ||
  	    compat_set_fd_set(n, outp, fds.res_out) ||
  	    compat_set_fd_set(n, exp, fds.res_ex))
  		ret = -EFAULT;
  out:
  	if (bits != stack_fds)
  		kfree(bits);
  out_nofds:
  	return ret;
  }
05585e449   Dominik Brodowski   fs: add do_compat...
1191
1192
1193
  static int do_compat_select(int n, compat_ulong_t __user *inp,
  	compat_ulong_t __user *outp, compat_ulong_t __user *exp,
  	struct compat_timeval __user *tvp)
e99ca56ce   Al Viro   move compat selec...
1194
  {
36819ad09   Deepa Dinamani   select: Use get/p...
1195
  	struct timespec64 end_time, *to = NULL;
e99ca56ce   Al Viro   move compat selec...
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
  	struct compat_timeval tv;
  	int ret;
  
  	if (tvp) {
  		if (copy_from_user(&tv, tvp, sizeof(tv)))
  			return -EFAULT;
  
  		to = &end_time;
  		if (poll_select_set_timeout(to,
  				tv.tv_sec + (tv.tv_usec / USEC_PER_SEC),
  				(tv.tv_usec % USEC_PER_SEC) * NSEC_PER_USEC))
  			return -EINVAL;
  	}
  
  	ret = compat_core_sys_select(n, inp, outp, exp, to);
  	ret = compat_poll_select_copy_remaining(&end_time, tvp, 1, ret);
  
  	return ret;
  }
05585e449   Dominik Brodowski   fs: add do_compat...
1215
1216
1217
1218
1219
1220
  COMPAT_SYSCALL_DEFINE5(select, int, n, compat_ulong_t __user *, inp,
  	compat_ulong_t __user *, outp, compat_ulong_t __user *, exp,
  	struct compat_timeval __user *, tvp)
  {
  	return do_compat_select(n, inp, outp, exp, tvp);
  }
e99ca56ce   Al Viro   move compat selec...
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
  struct compat_sel_arg_struct {
  	compat_ulong_t n;
  	compat_uptr_t inp;
  	compat_uptr_t outp;
  	compat_uptr_t exp;
  	compat_uptr_t tvp;
  };
  
  COMPAT_SYSCALL_DEFINE1(old_select, struct compat_sel_arg_struct __user *, arg)
  {
  	struct compat_sel_arg_struct a;
  
  	if (copy_from_user(&a, arg, sizeof(a)))
  		return -EFAULT;
05585e449   Dominik Brodowski   fs: add do_compat...
1235
1236
  	return do_compat_select(a.n, compat_ptr(a.inp), compat_ptr(a.outp),
  				compat_ptr(a.exp), compat_ptr(a.tvp));
e99ca56ce   Al Viro   move compat selec...
1237
1238
1239
1240
1241
1242
1243
  }
  
  static long do_compat_pselect(int n, compat_ulong_t __user *inp,
  	compat_ulong_t __user *outp, compat_ulong_t __user *exp,
  	struct compat_timespec __user *tsp, compat_sigset_t __user *sigmask,
  	compat_size_t sigsetsize)
  {
e99ca56ce   Al Viro   move compat selec...
1244
  	sigset_t ksigmask, sigsaved;
36819ad09   Deepa Dinamani   select: Use get/p...
1245
  	struct timespec64 ts, end_time, *to = NULL;
e99ca56ce   Al Viro   move compat selec...
1246
1247
1248
  	int ret;
  
  	if (tsp) {
36819ad09   Deepa Dinamani   select: Use get/p...
1249
  		if (compat_get_timespec64(&ts, tsp))
e99ca56ce   Al Viro   move compat selec...
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
  			return -EFAULT;
  
  		to = &end_time;
  		if (poll_select_set_timeout(to, ts.tv_sec, ts.tv_nsec))
  			return -EINVAL;
  	}
  
  	if (sigmask) {
  		if (sigsetsize != sizeof(compat_sigset_t))
  			return -EINVAL;
3968cf623   Al Viro   get_compat_sigset()
1260
  		if (get_compat_sigset(&ksigmask, sigmask))
e99ca56ce   Al Viro   move compat selec...
1261
  			return -EFAULT;
e99ca56ce   Al Viro   move compat selec...
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
  
  		sigdelsetmask(&ksigmask, sigmask(SIGKILL)|sigmask(SIGSTOP));
  		sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved);
  	}
  
  	ret = compat_core_sys_select(n, inp, outp, exp, to);
  	ret = compat_poll_select_copy_remaining(&end_time, tsp, 0, ret);
  
  	if (ret == -ERESTARTNOHAND) {
  		/*
  		 * Don't restore the signal mask yet. Let do_signal() deliver
  		 * the signal on the way back to userspace, before the signal
  		 * mask is restored.
  		 */
  		if (sigmask) {
  			memcpy(&current->saved_sigmask, &sigsaved,
  					sizeof(sigsaved));
  			set_restore_sigmask();
  		}
  	} else if (sigmask)
  		sigprocmask(SIG_SETMASK, &sigsaved, NULL);
  
  	return ret;
  }
  
  COMPAT_SYSCALL_DEFINE6(pselect6, int, n, compat_ulong_t __user *, inp,
  	compat_ulong_t __user *, outp, compat_ulong_t __user *, exp,
  	struct compat_timespec __user *, tsp, void __user *, sig)
  {
  	compat_size_t sigsetsize = 0;
  	compat_uptr_t up = 0;
  
  	if (sig) {
  		if (!access_ok(VERIFY_READ, sig,
  				sizeof(compat_uptr_t)+sizeof(compat_size_t)) ||
  		    	__get_user(up, (compat_uptr_t __user *)sig) ||
  		    	__get_user(sigsetsize,
  				(compat_size_t __user *)(sig+sizeof(up))))
  			return -EFAULT;
  	}
  	return do_compat_pselect(n, inp, outp, exp, tsp, compat_ptr(up),
  				 sigsetsize);
  }
  
  COMPAT_SYSCALL_DEFINE5(ppoll, struct pollfd __user *, ufds,
  	unsigned int,  nfds, struct compat_timespec __user *, tsp,
  	const compat_sigset_t __user *, sigmask, compat_size_t, sigsetsize)
  {
e99ca56ce   Al Viro   move compat selec...
1310
  	sigset_t ksigmask, sigsaved;
36819ad09   Deepa Dinamani   select: Use get/p...
1311
  	struct timespec64 ts, end_time, *to = NULL;
e99ca56ce   Al Viro   move compat selec...
1312
1313
1314
  	int ret;
  
  	if (tsp) {
36819ad09   Deepa Dinamani   select: Use get/p...
1315
  		if (compat_get_timespec64(&ts, tsp))
e99ca56ce   Al Viro   move compat selec...
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
  			return -EFAULT;
  
  		to = &end_time;
  		if (poll_select_set_timeout(to, ts.tv_sec, ts.tv_nsec))
  			return -EINVAL;
  	}
  
  	if (sigmask) {
  		if (sigsetsize != sizeof(compat_sigset_t))
  			return -EINVAL;
3968cf623   Al Viro   get_compat_sigset()
1326
  		if (get_compat_sigset(&ksigmask, sigmask))
e99ca56ce   Al Viro   move compat selec...
1327
  			return -EFAULT;
e99ca56ce   Al Viro   move compat selec...
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
  
  		sigdelsetmask(&ksigmask, sigmask(SIGKILL)|sigmask(SIGSTOP));
  		sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved);
  	}
  
  	ret = do_sys_poll(ufds, nfds, to);
  
  	/* We can restart this syscall, usually */
  	if (ret == -EINTR) {
  		/*
  		 * Don't restore the signal mask yet. Let do_signal() deliver
  		 * the signal on the way back to userspace, before the signal
  		 * mask is restored.
  		 */
  		if (sigmask) {
  			memcpy(&current->saved_sigmask, &sigsaved,
  				sizeof(sigsaved));
  			set_restore_sigmask();
  		}
  		ret = -ERESTARTNOHAND;
  	} else if (sigmask)
  		sigprocmask(SIG_SETMASK, &sigsaved, NULL);
  
  	ret = compat_poll_select_copy_remaining(&end_time, tsp, 0, ret);
  
  	return ret;
  }
  #endif