Blame view

fs/select.c 24 KB
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
  /*
   * This file contains the procedures for the handling of select and poll
   *
   * Created for Linux based loosely upon Mathius Lattner's minix
   * patches by Peter MacDonald. Heavily edited by Linus.
   *
   *  4 February 1994
   *     COFF/ELF binary emulation. If the process has the STICKY_TIMEOUTS
   *     flag set in its personality we do *not* modify the given timeout
   *     parameter to reflect time remaining.
   *
   *  24 January 2000
   *     Changed sys_poll()/do_poll() to use PAGE_SIZE chunk-based allocation 
   *     of fds to overcome nfds < 16390 descriptors limit (Tigran Aivazian).
   */
022a16924   Milind Arun Choudhary   ROUND_UP macro cl...
16
  #include <linux/kernel.h>
a99bbaf5e   Alexey Dobriyan   headers: remove s...
17
  #include <linux/sched.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
18
  #include <linux/syscalls.h>
630d9c472   Paul Gortmaker   fs: reduce the us...
19
  #include <linux/export.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
20
  #include <linux/slab.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
21
22
23
  #include <linux/poll.h>
  #include <linux/personality.h> /* for STICKY_TIMEOUTS */
  #include <linux/file.h>
9f3acc314   Al Viro   [PATCH] split lin...
24
  #include <linux/fdtable.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
25
  #include <linux/fs.h>
b835996f6   Dipankar Sarma   [PATCH] files: lo...
26
  #include <linux/rcupdate.h>
8ff3e8e85   Arjan van de Ven   select: switch se...
27
  #include <linux/hrtimer.h>
8bd75c77b   Clark Williams   sched/rt: Move rt...
28
  #include <linux/sched/rt.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
29
30
  
  #include <asm/uaccess.h>
90d6e24a3   Arjan van de Ven   hrtimer: make sel...
31
32
33
34
35
36
37
38
39
40
41
42
  
  /*
   * Estimate expected accuracy in ns from a timeval.
   *
   * After quite a bit of churning around, we've settled on
   * a simple thing of taking 0.1% of the timeout as the
   * slack, with a cap of 100 msec.
   * "nice" tasks get a 0.5% slack instead.
   *
   * Consider this comment an open invitation to come up with even
   * better solutions..
   */
5ae87e79e   Guillaume Knispel   poll/select: avoi...
43
  #define MAX_SLACK	(100 * NSEC_PER_MSEC)
96d2ab484   Arjan van de Ven   hrtimer: fix sign...
44
  static long __estimate_accuracy(struct timespec *tv)
90d6e24a3   Arjan van de Ven   hrtimer: make sel...
45
  {
96d2ab484   Arjan van de Ven   hrtimer: fix sign...
46
  	long slack;
90d6e24a3   Arjan van de Ven   hrtimer: make sel...
47
  	int divfactor = 1000;
5ae87e79e   Guillaume Knispel   poll/select: avoi...
48
49
  	if (tv->tv_sec < 0)
  		return 0;
4ce105d30   Arjan van de Ven   hrtimer: incorpor...
50
  	if (task_nice(current) > 0)
90d6e24a3   Arjan van de Ven   hrtimer: make sel...
51
  		divfactor = divfactor / 5;
5ae87e79e   Guillaume Knispel   poll/select: avoi...
52
53
  	if (tv->tv_sec > MAX_SLACK / (NSEC_PER_SEC/divfactor))
  		return MAX_SLACK;
90d6e24a3   Arjan van de Ven   hrtimer: make sel...
54
55
  	slack = tv->tv_nsec / divfactor;
  	slack += tv->tv_sec * (NSEC_PER_SEC/divfactor);
5ae87e79e   Guillaume Knispel   poll/select: avoi...
56
57
  	if (slack > MAX_SLACK)
  		return MAX_SLACK;
96d2ab484   Arjan van de Ven   hrtimer: fix sign...
58

90d6e24a3   Arjan van de Ven   hrtimer: make sel...
59
60
  	return slack;
  }
95aac7b1c   Shawn Bohrer   epoll: make epoll...
61
  long select_estimate_accuracy(struct timespec *tv)
90d6e24a3   Arjan van de Ven   hrtimer: make sel...
62
63
64
65
66
67
68
  {
  	unsigned long ret;
  	struct timespec now;
  
  	/*
  	 * Realtime tasks get a slack of 0 for obvious reasons.
  	 */
4ce105d30   Arjan van de Ven   hrtimer: incorpor...
69
  	if (rt_task(current))
90d6e24a3   Arjan van de Ven   hrtimer: make sel...
70
71
72
73
74
75
76
77
78
  		return 0;
  
  	ktime_get_ts(&now);
  	now = timespec_sub(*tv, now);
  	ret = __estimate_accuracy(&now);
  	if (ret < current->timer_slack_ns)
  		return current->timer_slack_ns;
  	return ret;
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
  struct poll_table_page {
  	struct poll_table_page * next;
  	struct poll_table_entry * entry;
  	struct poll_table_entry entries[0];
  };
  
  #define POLL_TABLE_FULL(table) \
  	((unsigned long)((table)->entry+1) > PAGE_SIZE + (unsigned long)(table))
  
  /*
   * Ok, Peter made a complicated, but straightforward multiple_wait() function.
   * I have rewritten this, taking some shortcuts: This code may not be easy to
   * follow, but it should be free of race-conditions, and it's practical. If you
   * understand what I'm doing here, then you understand how the linux
   * sleep/wakeup mechanism works.
   *
   * Two very simple procedures, poll_wait() and poll_freewait() make all the
   * work.  poll_wait() is an inline-function defined in <linux/poll.h>,
   * as all select/poll functions have to call it to add an entry to the
   * poll table.
   */
75c96f858   Adrian Bunk   [PATCH] make some...
100
101
  static void __pollwait(struct file *filp, wait_queue_head_t *wait_address,
  		       poll_table *p);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
102
103
104
105
  
  void poll_initwait(struct poll_wqueues *pwq)
  {
  	init_poll_funcptr(&pwq->pt, __pollwait);
5f820f648   Tejun Heo   poll: allow f_op-...
106
  	pwq->polling_task = current;
b2add73db   Guillaume Knispel   poll/select: init...
107
  	pwq->triggered = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
108
109
  	pwq->error = 0;
  	pwq->table = NULL;
70674f95c   Andi Kleen   [PATCH] Optimize ...
110
  	pwq->inline_index = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
111
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
112
  EXPORT_SYMBOL(poll_initwait);
70674f95c   Andi Kleen   [PATCH] Optimize ...
113
114
  static void free_poll_entry(struct poll_table_entry *entry)
  {
ccf6780dc   WANG Cong   Style fix in fs/s...
115
  	remove_wait_queue(entry->wait_address, &entry->wait);
70674f95c   Andi Kleen   [PATCH] Optimize ...
116
117
  	fput(entry->filp);
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
118
119
120
  void poll_freewait(struct poll_wqueues *pwq)
  {
  	struct poll_table_page * p = pwq->table;
70674f95c   Andi Kleen   [PATCH] Optimize ...
121
122
123
  	int i;
  	for (i = 0; i < pwq->inline_index; i++)
  		free_poll_entry(pwq->inline_entries + i);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
124
125
126
127
128
129
130
  	while (p) {
  		struct poll_table_entry * entry;
  		struct poll_table_page *old;
  
  		entry = p->entry;
  		do {
  			entry--;
70674f95c   Andi Kleen   [PATCH] Optimize ...
131
  			free_poll_entry(entry);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
132
133
134
135
136
137
  		} while (entry > p->entries);
  		old = p;
  		p = p->next;
  		free_page((unsigned long) old);
  	}
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
138
  EXPORT_SYMBOL(poll_freewait);
5f820f648   Tejun Heo   poll: allow f_op-...
139
  static struct poll_table_entry *poll_get_entry(struct poll_wqueues *p)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
140
  {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
141
  	struct poll_table_page *table = p->table;
70674f95c   Andi Kleen   [PATCH] Optimize ...
142
143
  	if (p->inline_index < N_INLINE_POLL_ENTRIES)
  		return p->inline_entries + p->inline_index++;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
144
145
146
147
148
149
  	if (!table || POLL_TABLE_FULL(table)) {
  		struct poll_table_page *new_table;
  
  		new_table = (struct poll_table_page *) __get_free_page(GFP_KERNEL);
  		if (!new_table) {
  			p->error = -ENOMEM;
70674f95c   Andi Kleen   [PATCH] Optimize ...
150
  			return NULL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
151
152
153
154
155
156
  		}
  		new_table->entry = new_table->entries;
  		new_table->next = table;
  		p->table = new_table;
  		table = new_table;
  	}
70674f95c   Andi Kleen   [PATCH] Optimize ...
157
158
  	return table->entry++;
  }
4938d7e02   Eric Dumazet   poll: avoid extra...
159
  static int __pollwake(wait_queue_t *wait, unsigned mode, int sync, void *key)
5f820f648   Tejun Heo   poll: allow f_op-...
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
  {
  	struct poll_wqueues *pwq = wait->private;
  	DECLARE_WAITQUEUE(dummy_wait, pwq->polling_task);
  
  	/*
  	 * Although this function is called under waitqueue lock, LOCK
  	 * doesn't imply write barrier and the users expect write
  	 * barrier semantics on wakeup functions.  The following
  	 * smp_wmb() is equivalent to smp_wmb() in try_to_wake_up()
  	 * and is paired with set_mb() in poll_schedule_timeout.
  	 */
  	smp_wmb();
  	pwq->triggered = 1;
  
  	/*
  	 * Perform the default wake up operation using a dummy
  	 * waitqueue.
  	 *
  	 * TODO: This is hacky but there currently is no interface to
  	 * pass in @sync.  @sync is scheduled to be removed and once
  	 * that happens, wake_up_process() can be used directly.
  	 */
  	return default_wake_function(&dummy_wait, mode, sync, key);
  }
4938d7e02   Eric Dumazet   poll: avoid extra...
184
185
186
187
188
189
190
191
192
  static int pollwake(wait_queue_t *wait, unsigned mode, int sync, void *key)
  {
  	struct poll_table_entry *entry;
  
  	entry = container_of(wait, struct poll_table_entry, wait);
  	if (key && !((unsigned long)key & entry->key))
  		return 0;
  	return __pollwake(wait, mode, sync, key);
  }
70674f95c   Andi Kleen   [PATCH] Optimize ...
193
194
195
196
  /* Add a new entry */
  static void __pollwait(struct file *filp, wait_queue_head_t *wait_address,
  				poll_table *p)
  {
5f820f648   Tejun Heo   poll: allow f_op-...
197
198
  	struct poll_wqueues *pwq = container_of(p, struct poll_wqueues, pt);
  	struct poll_table_entry *entry = poll_get_entry(pwq);
70674f95c   Andi Kleen   [PATCH] Optimize ...
199
200
  	if (!entry)
  		return;
cb0942b81   Al Viro   make get_file() r...
201
  	entry->filp = get_file(filp);
70674f95c   Andi Kleen   [PATCH] Optimize ...
202
  	entry->wait_address = wait_address;
626cf2366   Hans Verkuil   poll: add poll_re...
203
  	entry->key = p->_key;
5f820f648   Tejun Heo   poll: allow f_op-...
204
205
  	init_waitqueue_func_entry(&entry->wait, pollwake);
  	entry->wait.private = pwq;
ccf6780dc   WANG Cong   Style fix in fs/s...
206
  	add_wait_queue(wait_address, &entry->wait);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
207
  }
5f820f648   Tejun Heo   poll: allow f_op-...
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
  int poll_schedule_timeout(struct poll_wqueues *pwq, int state,
  			  ktime_t *expires, unsigned long slack)
  {
  	int rc = -EINTR;
  
  	set_current_state(state);
  	if (!pwq->triggered)
  		rc = schedule_hrtimeout_range(expires, slack, HRTIMER_MODE_ABS);
  	__set_current_state(TASK_RUNNING);
  
  	/*
  	 * Prepare for the next iteration.
  	 *
  	 * The following set_mb() serves two purposes.  First, it's
  	 * the counterpart rmb of the wmb in pollwake() such that data
  	 * written before wake up is always visible after wake up.
  	 * Second, the full barrier guarantees that triggered clearing
  	 * doesn't pass event check of the next iteration.  Note that
  	 * this problem doesn't exist for the first iteration as
  	 * add_wait_queue() has full barrier semantics.
  	 */
  	set_mb(pwq->triggered, 0);
  
  	return rc;
  }
  EXPORT_SYMBOL(poll_schedule_timeout);
b773ad40a   Thomas Gleixner   select: add poll_...
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
  /**
   * poll_select_set_timeout - helper function to setup the timeout value
   * @to:		pointer to timespec variable for the final timeout
   * @sec:	seconds (from user space)
   * @nsec:	nanoseconds (from user space)
   *
   * Note, we do not use a timespec for the user space value here, That
   * way we can use the function for timeval and compat interfaces as well.
   *
   * Returns -EINVAL if sec/nsec are not normalized. Otherwise 0.
   */
  int poll_select_set_timeout(struct timespec *to, long sec, long nsec)
  {
  	struct timespec ts = {.tv_sec = sec, .tv_nsec = nsec};
  
  	if (!timespec_valid(&ts))
  		return -EINVAL;
  
  	/* Optimize for the zero timeout value here */
  	if (!sec && !nsec) {
  		to->tv_sec = to->tv_nsec = 0;
  	} else {
  		ktime_get_ts(to);
  		*to = timespec_add_safe(*to, ts);
  	}
  	return 0;
  }
  
  static int poll_select_copy_remaining(struct timespec *end_time, void __user *p,
  				      int timeval, int ret)
  {
  	struct timespec rts;
  	struct timeval rtv;
  
  	if (!p)
  		return ret;
  
  	if (current->personality & STICKY_TIMEOUTS)
  		goto sticky;
  
  	/* No update for zero timeout */
  	if (!end_time->tv_sec && !end_time->tv_nsec)
  		return ret;
  
  	ktime_get_ts(&rts);
  	rts = timespec_sub(*end_time, rts);
  	if (rts.tv_sec < 0)
  		rts.tv_sec = rts.tv_nsec = 0;
  
  	if (timeval) {
65329bf46   Vasiliy Kulikov   fs/select.c: fix ...
284
285
  		if (sizeof(rtv) > sizeof(rtv.tv_sec) + sizeof(rtv.tv_usec))
  			memset(&rtv, 0, sizeof(rtv));
b773ad40a   Thomas Gleixner   select: add poll_...
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
  		rtv.tv_sec = rts.tv_sec;
  		rtv.tv_usec = rts.tv_nsec / NSEC_PER_USEC;
  
  		if (!copy_to_user(p, &rtv, sizeof(rtv)))
  			return ret;
  
  	} else if (!copy_to_user(p, &rts, sizeof(rts)))
  		return ret;
  
  	/*
  	 * If an application puts its timeval in read-only memory, we
  	 * don't want the Linux-specific update to the timeval to
  	 * cause a fault after the select has completed
  	 * successfully. However, because we're not updating the
  	 * timeval, we can't restart the system call.
  	 */
  
  sticky:
  	if (ret == -ERESTARTNOHAND)
  		ret = -EINTR;
  	return ret;
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
308
309
310
311
312
313
314
315
316
317
318
  #define FDS_IN(fds, n)		(fds->in + n)
  #define FDS_OUT(fds, n)		(fds->out + n)
  #define FDS_EX(fds, n)		(fds->ex + n)
  
  #define BITS(fds, n)	(*FDS_IN(fds, n)|*FDS_OUT(fds, n)|*FDS_EX(fds, n))
  
  static int max_select_fd(unsigned long n, fd_set_bits *fds)
  {
  	unsigned long *open_fds;
  	unsigned long set;
  	int max;
badf16621   Dipankar Sarma   [PATCH] files: br...
319
  	struct fdtable *fdt;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
320
321
  
  	/* handle last in-complete long-word first */
8ded2bbc1   Josh Boyer   posix_types.h: Cl...
322
323
  	set = ~(~0UL << (n & (BITS_PER_LONG-1)));
  	n /= BITS_PER_LONG;
badf16621   Dipankar Sarma   [PATCH] files: br...
324
  	fdt = files_fdtable(current->files);
1fd36adcd   David Howells   Replace the fd_se...
325
  	open_fds = fdt->open_fds + n;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
  	max = 0;
  	if (set) {
  		set &= BITS(fds, n);
  		if (set) {
  			if (!(set & ~*open_fds))
  				goto get_max;
  			return -EBADF;
  		}
  	}
  	while (n) {
  		open_fds--;
  		n--;
  		set = BITS(fds, n);
  		if (!set)
  			continue;
  		if (set & ~*open_fds)
  			return -EBADF;
  		if (max)
  			continue;
  get_max:
  		do {
  			max++;
  			set >>= 1;
  		} while (set);
8ded2bbc1   Josh Boyer   posix_types.h: Cl...
350
  		max += n * BITS_PER_LONG;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
351
352
353
354
  	}
  
  	return max;
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
355
356
357
  #define POLLIN_SET (POLLRDNORM | POLLRDBAND | POLLIN | POLLHUP | POLLERR)
  #define POLLOUT_SET (POLLWRBAND | POLLWRNORM | POLLOUT | POLLERR)
  #define POLLEX_SET (POLLPRI)
4938d7e02   Eric Dumazet   poll: avoid extra...
358
359
360
  static inline void wait_key_set(poll_table *wait, unsigned long in,
  				unsigned long out, unsigned long bit)
  {
626cf2366   Hans Verkuil   poll: add poll_re...
361
362
363
364
365
  	wait->_key = POLLEX_SET;
  	if (in & bit)
  		wait->_key |= POLLIN_SET;
  	if (out & bit)
  		wait->_key |= POLLOUT_SET;
4938d7e02   Eric Dumazet   poll: avoid extra...
366
  }
8ff3e8e85   Arjan van de Ven   select: switch se...
367
  int do_select(int n, fd_set_bits *fds, struct timespec *end_time)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
368
  {
8ff3e8e85   Arjan van de Ven   select: switch se...
369
  	ktime_t expire, *to = NULL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
370
371
  	struct poll_wqueues table;
  	poll_table *wait;
8ff3e8e85   Arjan van de Ven   select: switch se...
372
  	int retval, i, timed_out = 0;
90d6e24a3   Arjan van de Ven   hrtimer: make sel...
373
  	unsigned long slack = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
374

b835996f6   Dipankar Sarma   [PATCH] files: lo...
375
  	rcu_read_lock();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
376
  	retval = max_select_fd(n, fds);
b835996f6   Dipankar Sarma   [PATCH] files: lo...
377
  	rcu_read_unlock();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
378
379
380
381
382
383
384
  
  	if (retval < 0)
  		return retval;
  	n = retval;
  
  	poll_initwait(&table);
  	wait = &table.pt;
8ff3e8e85   Arjan van de Ven   select: switch se...
385
  	if (end_time && !end_time->tv_sec && !end_time->tv_nsec) {
626cf2366   Hans Verkuil   poll: add poll_re...
386
  		wait->_qproc = NULL;
8ff3e8e85   Arjan van de Ven   select: switch se...
387
388
  		timed_out = 1;
  	}
96d2ab484   Arjan van de Ven   hrtimer: fix sign...
389
  	if (end_time && !timed_out)
231f3d393   Andrew Morton   select: rename es...
390
  		slack = select_estimate_accuracy(end_time);
90d6e24a3   Arjan van de Ven   hrtimer: make sel...
391

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
392
393
394
  	retval = 0;
  	for (;;) {
  		unsigned long *rinp, *routp, *rexp, *inp, *outp, *exp;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
395
396
397
398
399
400
  		inp = fds->in; outp = fds->out; exp = fds->ex;
  		rinp = fds->res_in; routp = fds->res_out; rexp = fds->res_ex;
  
  		for (i = 0; i < n; ++rinp, ++routp, ++rexp) {
  			unsigned long in, out, ex, all_bits, bit = 1, mask, j;
  			unsigned long res_in = 0, res_out = 0, res_ex = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
401
402
403
404
  
  			in = *inp++; out = *outp++; ex = *exp++;
  			all_bits = in | out | ex;
  			if (all_bits == 0) {
8ded2bbc1   Josh Boyer   posix_types.h: Cl...
405
  				i += BITS_PER_LONG;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
406
407
  				continue;
  			}
8ded2bbc1   Josh Boyer   posix_types.h: Cl...
408
  			for (j = 0; j < BITS_PER_LONG; ++j, ++i, bit <<= 1) {
2903ff019   Al Viro   switch simple cas...
409
  				struct fd f;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
410
411
412
413
  				if (i >= n)
  					break;
  				if (!(bit & all_bits))
  					continue;
2903ff019   Al Viro   switch simple cas...
414
415
416
417
  				f = fdget(i);
  				if (f.file) {
  					const struct file_operations *f_op;
  					f_op = f.file->f_op;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
418
  					mask = DEFAULT_POLLMASK;
4938d7e02   Eric Dumazet   poll: avoid extra...
419
420
  					if (f_op && f_op->poll) {
  						wait_key_set(wait, in, out, bit);
2903ff019   Al Viro   switch simple cas...
421
  						mask = (*f_op->poll)(f.file, wait);
4938d7e02   Eric Dumazet   poll: avoid extra...
422
  					}
2903ff019   Al Viro   switch simple cas...
423
  					fdput(f);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
424
425
426
  					if ((mask & POLLIN_SET) && (in & bit)) {
  						res_in |= bit;
  						retval++;
626cf2366   Hans Verkuil   poll: add poll_re...
427
  						wait->_qproc = NULL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
428
429
430
431
  					}
  					if ((mask & POLLOUT_SET) && (out & bit)) {
  						res_out |= bit;
  						retval++;
626cf2366   Hans Verkuil   poll: add poll_re...
432
  						wait->_qproc = NULL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
433
434
435
436
  					}
  					if ((mask & POLLEX_SET) && (ex & bit)) {
  						res_ex |= bit;
  						retval++;
626cf2366   Hans Verkuil   poll: add poll_re...
437
  						wait->_qproc = NULL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
438
439
  					}
  				}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
440
441
442
443
444
445
446
  			}
  			if (res_in)
  				*rinp = res_in;
  			if (res_out)
  				*routp = res_out;
  			if (res_ex)
  				*rexp = res_ex;
55d853849   Linus Torvalds   Fix performance r...
447
  			cond_resched();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
448
  		}
626cf2366   Hans Verkuil   poll: add poll_re...
449
  		wait->_qproc = NULL;
8ff3e8e85   Arjan van de Ven   select: switch se...
450
  		if (retval || timed_out || signal_pending(current))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
451
  			break;
f5264481c   Pavel Machek   trivial: small cl...
452
  		if (table.error) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
453
454
455
  			retval = table.error;
  			break;
  		}
9f72949f6   David Woodhouse   [PATCH] Add psele...
456

8ff3e8e85   Arjan van de Ven   select: switch se...
457
458
459
460
461
462
463
464
  		/*
  		 * If this is the first loop and we have a timeout
  		 * given, then we convert to ktime_t and set the to
  		 * pointer to the expiry value.
  		 */
  		if (end_time && !to) {
  			expire = timespec_to_ktime(*end_time);
  			to = &expire;
9f72949f6   David Woodhouse   [PATCH] Add psele...
465
  		}
8ff3e8e85   Arjan van de Ven   select: switch se...
466

5f820f648   Tejun Heo   poll: allow f_op-...
467
468
  		if (!poll_schedule_timeout(&table, TASK_INTERRUPTIBLE,
  					   to, slack))
8ff3e8e85   Arjan van de Ven   select: switch se...
469
  			timed_out = 1;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
470
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
471
472
  
  	poll_freewait(&table);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
473
474
  	return retval;
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
475
476
477
478
479
480
481
482
  /*
   * We can actually return ERESTARTSYS instead of EINTR, but I'd
   * like to be certain this leads to no problems. So I return
   * EINTR just for safety.
   *
   * Update: ERESTARTSYS breaks at least the xview clock binary, so
   * I'm trying ERESTARTNOHAND which restart only when you want to.
   */
a2dcb44c3   Al Viro   [PATCH] make osf_...
483
  int core_sys_select(int n, fd_set __user *inp, fd_set __user *outp,
8ff3e8e85   Arjan van de Ven   select: switch se...
484
  			   fd_set __user *exp, struct timespec *end_time)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
485
486
  {
  	fd_set_bits fds;
29ff2db55   Andrew Morton   [PATCH] select() ...
487
  	void *bits;
bbea9f696   Vadim Lobanov   [PATCH] fdtable: ...
488
  	int ret, max_fds;
b04eb6aa0   Mitchell Blank Jr   [PATCH] select: d...
489
  	unsigned int size;
badf16621   Dipankar Sarma   [PATCH] files: br...
490
  	struct fdtable *fdt;
70674f95c   Andi Kleen   [PATCH] Optimize ...
491
  	/* Allocate small arguments on the stack to save memory and be faster */
30c14e40e   Jes Sorensen   [PATCH] avoid una...
492
  	long stack_fds[SELECT_STACK_ALLOC/sizeof(long)];
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
493

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
494
495
496
  	ret = -EINVAL;
  	if (n < 0)
  		goto out_nofds;
bbea9f696   Vadim Lobanov   [PATCH] fdtable: ...
497
  	/* max_fds can increase, so grab it once to avoid race */
b835996f6   Dipankar Sarma   [PATCH] files: lo...
498
  	rcu_read_lock();
badf16621   Dipankar Sarma   [PATCH] files: br...
499
  	fdt = files_fdtable(current->files);
bbea9f696   Vadim Lobanov   [PATCH] fdtable: ...
500
  	max_fds = fdt->max_fds;
b835996f6   Dipankar Sarma   [PATCH] files: lo...
501
  	rcu_read_unlock();
bbea9f696   Vadim Lobanov   [PATCH] fdtable: ...
502
503
  	if (n > max_fds)
  		n = max_fds;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
504
505
506
507
508
509
  
  	/*
  	 * We need 6 bitmaps (in/out/ex for both incoming and outgoing),
  	 * since we used fdset we need to allocate memory in units of
  	 * long-words. 
  	 */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
510
  	size = FDS_BYTES(n);
b04eb6aa0   Mitchell Blank Jr   [PATCH] select: d...
511
512
513
514
  	bits = stack_fds;
  	if (size > sizeof(stack_fds) / 6) {
  		/* Not enough space in on-stack array; must use kmalloc */
  		ret = -ENOMEM;
70674f95c   Andi Kleen   [PATCH] Optimize ...
515
  		bits = kmalloc(6 * size, GFP_KERNEL);
b04eb6aa0   Mitchell Blank Jr   [PATCH] select: d...
516
517
518
  		if (!bits)
  			goto out_nofds;
  	}
29ff2db55   Andrew Morton   [PATCH] select() ...
519
520
521
522
523
524
  	fds.in      = bits;
  	fds.out     = bits +   size;
  	fds.ex      = bits + 2*size;
  	fds.res_in  = bits + 3*size;
  	fds.res_out = bits + 4*size;
  	fds.res_ex  = bits + 5*size;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
525
526
527
528
529
530
531
532
  
  	if ((ret = get_fd_set(n, inp, fds.in)) ||
  	    (ret = get_fd_set(n, outp, fds.out)) ||
  	    (ret = get_fd_set(n, exp, fds.ex)))
  		goto out;
  	zero_fd_set(n, fds.res_in);
  	zero_fd_set(n, fds.res_out);
  	zero_fd_set(n, fds.res_ex);
8ff3e8e85   Arjan van de Ven   select: switch se...
533
  	ret = do_select(n, &fds, end_time);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
  
  	if (ret < 0)
  		goto out;
  	if (!ret) {
  		ret = -ERESTARTNOHAND;
  		if (signal_pending(current))
  			goto out;
  		ret = 0;
  	}
  
  	if (set_fd_set(n, inp, fds.res_in) ||
  	    set_fd_set(n, outp, fds.res_out) ||
  	    set_fd_set(n, exp, fds.res_ex))
  		ret = -EFAULT;
  
  out:
70674f95c   Andi Kleen   [PATCH] Optimize ...
550
551
  	if (bits != stack_fds)
  		kfree(bits);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
552
553
554
  out_nofds:
  	return ret;
  }
5a8a82b1d   Heiko Carstens   [CVE-2009-0029] S...
555
556
  SYSCALL_DEFINE5(select, int, n, fd_set __user *, inp, fd_set __user *, outp,
  		fd_set __user *, exp, struct timeval __user *, tvp)
9f72949f6   David Woodhouse   [PATCH] Add psele...
557
  {
8ff3e8e85   Arjan van de Ven   select: switch se...
558
  	struct timespec end_time, *to = NULL;
9f72949f6   David Woodhouse   [PATCH] Add psele...
559
560
561
562
563
564
  	struct timeval tv;
  	int ret;
  
  	if (tvp) {
  		if (copy_from_user(&tv, tvp, sizeof(tv)))
  			return -EFAULT;
8ff3e8e85   Arjan van de Ven   select: switch se...
565
  		to = &end_time;
4d36a9e65   Arjan van de Ven   select: deal with...
566
567
568
  		if (poll_select_set_timeout(to,
  				tv.tv_sec + (tv.tv_usec / USEC_PER_SEC),
  				(tv.tv_usec % USEC_PER_SEC) * NSEC_PER_USEC))
9f72949f6   David Woodhouse   [PATCH] Add psele...
569
  			return -EINVAL;
9f72949f6   David Woodhouse   [PATCH] Add psele...
570
  	}
8ff3e8e85   Arjan van de Ven   select: switch se...
571
572
  	ret = core_sys_select(n, inp, outp, exp, to);
  	ret = poll_select_copy_remaining(&end_time, tvp, 1, ret);
9f72949f6   David Woodhouse   [PATCH] Add psele...
573
574
575
  
  	return ret;
  }
c9da9f212   Heiko Carstens   [CVE-2009-0029] M...
576
577
578
  static long do_pselect(int n, fd_set __user *inp, fd_set __user *outp,
  		       fd_set __user *exp, struct timespec __user *tsp,
  		       const sigset_t __user *sigmask, size_t sigsetsize)
9f72949f6   David Woodhouse   [PATCH] Add psele...
579
  {
9f72949f6   David Woodhouse   [PATCH] Add psele...
580
  	sigset_t ksigmask, sigsaved;
8ff3e8e85   Arjan van de Ven   select: switch se...
581
  	struct timespec ts, end_time, *to = NULL;
9f72949f6   David Woodhouse   [PATCH] Add psele...
582
583
584
585
586
  	int ret;
  
  	if (tsp) {
  		if (copy_from_user(&ts, tsp, sizeof(ts)))
  			return -EFAULT;
8ff3e8e85   Arjan van de Ven   select: switch se...
587
588
  		to = &end_time;
  		if (poll_select_set_timeout(to, ts.tv_sec, ts.tv_nsec))
9f72949f6   David Woodhouse   [PATCH] Add psele...
589
  			return -EINVAL;
9f72949f6   David Woodhouse   [PATCH] Add psele...
590
591
592
593
594
595
596
597
598
599
600
601
  	}
  
  	if (sigmask) {
  		/* XXX: Don't preclude handling different sized sigset_t's.  */
  		if (sigsetsize != sizeof(sigset_t))
  			return -EINVAL;
  		if (copy_from_user(&ksigmask, sigmask, sizeof(ksigmask)))
  			return -EFAULT;
  
  		sigdelsetmask(&ksigmask, sigmask(SIGKILL)|sigmask(SIGSTOP));
  		sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved);
  	}
62568510b   Bernd Schmidt   Fix timeouts in s...
602
  	ret = core_sys_select(n, inp, outp, exp, to);
8ff3e8e85   Arjan van de Ven   select: switch se...
603
  	ret = poll_select_copy_remaining(&end_time, tsp, 0, ret);
9f72949f6   David Woodhouse   [PATCH] Add psele...
604
605
606
607
608
609
610
611
612
613
  
  	if (ret == -ERESTARTNOHAND) {
  		/*
  		 * Don't restore the signal mask yet. Let do_signal() deliver
  		 * the signal on the way back to userspace, before the signal
  		 * mask is restored.
  		 */
  		if (sigmask) {
  			memcpy(&current->saved_sigmask, &sigsaved,
  					sizeof(sigsaved));
4e4c22c71   Roland McGrath   signals: add set_...
614
  			set_restore_sigmask();
9f72949f6   David Woodhouse   [PATCH] Add psele...
615
616
617
618
619
620
621
622
623
624
625
626
627
  		}
  	} else if (sigmask)
  		sigprocmask(SIG_SETMASK, &sigsaved, NULL);
  
  	return ret;
  }
  
  /*
   * Most architectures can't handle 7-argument syscalls. So we provide a
   * 6-argument version where the sixth argument is a pointer to a structure
   * which has a pointer to the sigset_t itself followed by a size_t containing
   * the sigset size.
   */
d4e82042c   Heiko Carstens   [CVE-2009-0029] S...
628
629
630
  SYSCALL_DEFINE6(pselect6, int, n, fd_set __user *, inp, fd_set __user *, outp,
  		fd_set __user *, exp, struct timespec __user *, tsp,
  		void __user *, sig)
9f72949f6   David Woodhouse   [PATCH] Add psele...
631
632
633
634
635
636
  {
  	size_t sigsetsize = 0;
  	sigset_t __user *up = NULL;
  
  	if (sig) {
  		if (!access_ok(VERIFY_READ, sig, sizeof(void *)+sizeof(size_t))
e110ab94e   Al Viro   [PATCH] fix __use...
637
  		    || __get_user(up, (sigset_t __user * __user *)sig)
9f72949f6   David Woodhouse   [PATCH] Add psele...
638
  		    || __get_user(sigsetsize,
e110ab94e   Al Viro   [PATCH] fix __use...
639
  				(size_t __user *)(sig+sizeof(void *))))
9f72949f6   David Woodhouse   [PATCH] Add psele...
640
641
  			return -EFAULT;
  	}
c9da9f212   Heiko Carstens   [CVE-2009-0029] M...
642
  	return do_pselect(n, inp, outp, exp, tsp, up, sigsetsize);
9f72949f6   David Woodhouse   [PATCH] Add psele...
643
  }
9f72949f6   David Woodhouse   [PATCH] Add psele...
644

5d0e52830   Christoph Hellwig   Add generic sys_o...
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
  #ifdef __ARCH_WANT_SYS_OLD_SELECT
  struct sel_arg_struct {
  	unsigned long n;
  	fd_set __user *inp, *outp, *exp;
  	struct timeval __user *tvp;
  };
  
  SYSCALL_DEFINE1(old_select, struct sel_arg_struct __user *, arg)
  {
  	struct sel_arg_struct a;
  
  	if (copy_from_user(&a, arg, sizeof(a)))
  		return -EFAULT;
  	return sys_select(a.n, a.inp, a.outp, a.exp, a.tvp);
  }
  #endif
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
661
662
663
664
665
666
667
  struct poll_list {
  	struct poll_list *next;
  	int len;
  	struct pollfd entries[0];
  };
  
  #define POLLFD_PER_PAGE  ((PAGE_SIZE-sizeof(struct poll_list)) / sizeof(struct pollfd))
4a4b69f79   Vadim Lobanov   [PATCH] Poll clea...
668
669
670
671
672
  /*
   * Fish for pollable events on the pollfd->fd file descriptor. We're only
   * interested in events matching the pollfd->events mask, and the result
   * matching that mask is both recorded in pollfd->revents and returned. The
   * pwait poll_table will be used by the fd-provided poll handler for waiting,
626cf2366   Hans Verkuil   poll: add poll_re...
673
   * if pwait->_qproc is non-NULL.
4a4b69f79   Vadim Lobanov   [PATCH] Poll clea...
674
675
   */
  static inline unsigned int do_pollfd(struct pollfd *pollfd, poll_table *pwait)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
676
  {
4a4b69f79   Vadim Lobanov   [PATCH] Poll clea...
677
678
679
680
681
682
  	unsigned int mask;
  	int fd;
  
  	mask = 0;
  	fd = pollfd->fd;
  	if (fd >= 0) {
2903ff019   Al Viro   switch simple cas...
683
  		struct fd f = fdget(fd);
4a4b69f79   Vadim Lobanov   [PATCH] Poll clea...
684
  		mask = POLLNVAL;
2903ff019   Al Viro   switch simple cas...
685
  		if (f.file) {
4a4b69f79   Vadim Lobanov   [PATCH] Poll clea...
686
  			mask = DEFAULT_POLLMASK;
2903ff019   Al Viro   switch simple cas...
687
  			if (f.file->f_op && f.file->f_op->poll) {
626cf2366   Hans Verkuil   poll: add poll_re...
688
  				pwait->_key = pollfd->events|POLLERR|POLLHUP;
2903ff019   Al Viro   switch simple cas...
689
  				mask = f.file->f_op->poll(f.file, pwait);
4938d7e02   Eric Dumazet   poll: avoid extra...
690
  			}
4a4b69f79   Vadim Lobanov   [PATCH] Poll clea...
691
692
  			/* Mask out unneeded events. */
  			mask &= pollfd->events | POLLERR | POLLHUP;
2903ff019   Al Viro   switch simple cas...
693
  			fdput(f);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
694
  		}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
695
  	}
4a4b69f79   Vadim Lobanov   [PATCH] Poll clea...
696
697
698
  	pollfd->revents = mask;
  
  	return mask;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
699
700
701
  }
  
  static int do_poll(unsigned int nfds,  struct poll_list *list,
8ff3e8e85   Arjan van de Ven   select: switch se...
702
  		   struct poll_wqueues *wait, struct timespec *end_time)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
703
  {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
704
  	poll_table* pt = &wait->pt;
8ff3e8e85   Arjan van de Ven   select: switch se...
705
706
  	ktime_t expire, *to = NULL;
  	int timed_out = 0, count = 0;
90d6e24a3   Arjan van de Ven   hrtimer: make sel...
707
  	unsigned long slack = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
708

9f72949f6   David Woodhouse   [PATCH] Add psele...
709
  	/* Optimise the no-wait case */
8ff3e8e85   Arjan van de Ven   select: switch se...
710
  	if (end_time && !end_time->tv_sec && !end_time->tv_nsec) {
626cf2366   Hans Verkuil   poll: add poll_re...
711
  		pt->_qproc = NULL;
8ff3e8e85   Arjan van de Ven   select: switch se...
712
713
  		timed_out = 1;
  	}
9bf084f70   Oleg Nesterov   do_poll: return -...
714

96d2ab484   Arjan van de Ven   hrtimer: fix sign...
715
  	if (end_time && !timed_out)
231f3d393   Andrew Morton   select: rename es...
716
  		slack = select_estimate_accuracy(end_time);
90d6e24a3   Arjan van de Ven   hrtimer: make sel...
717

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
718
719
  	for (;;) {
  		struct poll_list *walk;
9f72949f6   David Woodhouse   [PATCH] Add psele...
720

4a4b69f79   Vadim Lobanov   [PATCH] Poll clea...
721
722
723
724
725
726
727
728
  		for (walk = list; walk != NULL; walk = walk->next) {
  			struct pollfd * pfd, * pfd_end;
  
  			pfd = walk->entries;
  			pfd_end = pfd + walk->len;
  			for (; pfd != pfd_end; pfd++) {
  				/*
  				 * Fish for events. If we found one, record it
626cf2366   Hans Verkuil   poll: add poll_re...
729
  				 * and kill poll_table->_qproc, so we don't
4a4b69f79   Vadim Lobanov   [PATCH] Poll clea...
730
731
732
733
734
735
  				 * needlessly register any other waiters after
  				 * this. They'll get immediately deregistered
  				 * when we break out and return.
  				 */
  				if (do_pollfd(pfd, pt)) {
  					count++;
626cf2366   Hans Verkuil   poll: add poll_re...
736
  					pt->_qproc = NULL;
4a4b69f79   Vadim Lobanov   [PATCH] Poll clea...
737
738
  				}
  			}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
739
  		}
4a4b69f79   Vadim Lobanov   [PATCH] Poll clea...
740
741
  		/*
  		 * All waiters have already been registered, so don't provide
626cf2366   Hans Verkuil   poll: add poll_re...
742
  		 * a poll_table->_qproc to them on the next loop iteration.
4a4b69f79   Vadim Lobanov   [PATCH] Poll clea...
743
  		 */
626cf2366   Hans Verkuil   poll: add poll_re...
744
  		pt->_qproc = NULL;
9bf084f70   Oleg Nesterov   do_poll: return -...
745
746
747
748
749
  		if (!count) {
  			count = wait->error;
  			if (signal_pending(current))
  				count = -EINTR;
  		}
8ff3e8e85   Arjan van de Ven   select: switch se...
750
  		if (count || timed_out)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
751
  			break;
9f72949f6   David Woodhouse   [PATCH] Add psele...
752

8ff3e8e85   Arjan van de Ven   select: switch se...
753
754
755
756
757
758
759
760
  		/*
  		 * If this is the first loop and we have a timeout
  		 * given, then we convert to ktime_t and set the to
  		 * pointer to the expiry value.
  		 */
  		if (end_time && !to) {
  			expire = timespec_to_ktime(*end_time);
  			to = &expire;
9f72949f6   David Woodhouse   [PATCH] Add psele...
761
  		}
5f820f648   Tejun Heo   poll: allow f_op-...
762
  		if (!poll_schedule_timeout(wait, TASK_INTERRUPTIBLE, to, slack))
8ff3e8e85   Arjan van de Ven   select: switch se...
763
  			timed_out = 1;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
764
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
765
766
  	return count;
  }
70674f95c   Andi Kleen   [PATCH] Optimize ...
767
768
  #define N_STACK_PPS ((sizeof(stack_pps) - sizeof(struct poll_list))  / \
  			sizeof(struct pollfd))
8ff3e8e85   Arjan van de Ven   select: switch se...
769
770
  int do_sys_poll(struct pollfd __user *ufds, unsigned int nfds,
  		struct timespec *end_time)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
771
772
  {
  	struct poll_wqueues table;
252e5725c   Oleg Nesterov   do_sys_poll: simp...
773
   	int err = -EFAULT, fdcount, len, size;
30c14e40e   Jes Sorensen   [PATCH] avoid una...
774
775
776
777
  	/* Allocate small arguments on the stack to save memory and be
  	   faster - use long to make sure the buffer is aligned properly
  	   on 64 bit archs to avoid unaligned access */
  	long stack_pps[POLL_STACK_ALLOC/sizeof(long)];
252e5725c   Oleg Nesterov   do_sys_poll: simp...
778
779
780
  	struct poll_list *const head = (struct poll_list *)stack_pps;
   	struct poll_list *walk = head;
   	unsigned long todo = nfds;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
781

d554ed895   Jiri Slaby   fs: use rlimit he...
782
  	if (nfds > rlimit(RLIMIT_NOFILE))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
783
  		return -EINVAL;
252e5725c   Oleg Nesterov   do_sys_poll: simp...
784
785
786
787
788
789
  	len = min_t(unsigned int, nfds, N_STACK_PPS);
  	for (;;) {
  		walk->next = NULL;
  		walk->len = len;
  		if (!len)
  			break;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
790

252e5725c   Oleg Nesterov   do_sys_poll: simp...
791
792
793
794
795
796
797
  		if (copy_from_user(walk->entries, ufds + nfds-todo,
  					sizeof(struct pollfd) * walk->len))
  			goto out_fds;
  
  		todo -= walk->len;
  		if (!todo)
  			break;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
798

252e5725c   Oleg Nesterov   do_sys_poll: simp...
799
800
801
802
803
  		len = min(todo, POLLFD_PER_PAGE);
  		size = sizeof(struct poll_list) + sizeof(struct pollfd) * len;
  		walk = walk->next = kmalloc(size, GFP_KERNEL);
  		if (!walk) {
  			err = -ENOMEM;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
804
805
  			goto out_fds;
  		}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
806
  	}
9f72949f6   David Woodhouse   [PATCH] Add psele...
807

252e5725c   Oleg Nesterov   do_sys_poll: simp...
808
  	poll_initwait(&table);
8ff3e8e85   Arjan van de Ven   select: switch se...
809
  	fdcount = do_poll(nfds, head, &table, end_time);
252e5725c   Oleg Nesterov   do_sys_poll: simp...
810
  	poll_freewait(&table);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
811

252e5725c   Oleg Nesterov   do_sys_poll: simp...
812
  	for (walk = head; walk; walk = walk->next) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
813
814
  		struct pollfd *fds = walk->entries;
  		int j;
252e5725c   Oleg Nesterov   do_sys_poll: simp...
815
816
  		for (j = 0; j < walk->len; j++, ufds++)
  			if (__put_user(fds[j].revents, &ufds->revents))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
817
  				goto out_fds;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
818
    	}
252e5725c   Oleg Nesterov   do_sys_poll: simp...
819

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
820
  	err = fdcount;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
821
  out_fds:
252e5725c   Oleg Nesterov   do_sys_poll: simp...
822
823
824
825
826
  	walk = head->next;
  	while (walk) {
  		struct poll_list *pos = walk;
  		walk = walk->next;
  		kfree(pos);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
827
  	}
252e5725c   Oleg Nesterov   do_sys_poll: simp...
828

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
829
830
  	return err;
  }
9f72949f6   David Woodhouse   [PATCH] Add psele...
831

3075d9da0   Chris Wright   Use ERESTART_REST...
832
833
  static long do_restart_poll(struct restart_block *restart_block)
  {
8ff3e8e85   Arjan van de Ven   select: switch se...
834
835
836
  	struct pollfd __user *ufds = restart_block->poll.ufds;
  	int nfds = restart_block->poll.nfds;
  	struct timespec *to = NULL, end_time;
3075d9da0   Chris Wright   Use ERESTART_REST...
837
  	int ret;
8ff3e8e85   Arjan van de Ven   select: switch se...
838
839
840
841
842
843
844
  	if (restart_block->poll.has_timeout) {
  		end_time.tv_sec = restart_block->poll.tv_sec;
  		end_time.tv_nsec = restart_block->poll.tv_nsec;
  		to = &end_time;
  	}
  
  	ret = do_sys_poll(ufds, nfds, to);
3075d9da0   Chris Wright   Use ERESTART_REST...
845
846
  	if (ret == -EINTR) {
  		restart_block->fn = do_restart_poll;
3075d9da0   Chris Wright   Use ERESTART_REST...
847
848
849
850
  		ret = -ERESTART_RESTARTBLOCK;
  	}
  	return ret;
  }
5a8a82b1d   Heiko Carstens   [CVE-2009-0029] S...
851
  SYSCALL_DEFINE3(poll, struct pollfd __user *, ufds, unsigned int, nfds,
faf309009   Linus Torvalds   sys_poll: fix inc...
852
  		int, timeout_msecs)
9f72949f6   David Woodhouse   [PATCH] Add psele...
853
  {
8ff3e8e85   Arjan van de Ven   select: switch se...
854
  	struct timespec end_time, *to = NULL;
3075d9da0   Chris Wright   Use ERESTART_REST...
855
  	int ret;
9f72949f6   David Woodhouse   [PATCH] Add psele...
856

8ff3e8e85   Arjan van de Ven   select: switch se...
857
858
859
860
  	if (timeout_msecs >= 0) {
  		to = &end_time;
  		poll_select_set_timeout(to, timeout_msecs / MSEC_PER_SEC,
  			NSEC_PER_MSEC * (timeout_msecs % MSEC_PER_SEC));
9f72949f6   David Woodhouse   [PATCH] Add psele...
861
  	}
8ff3e8e85   Arjan van de Ven   select: switch se...
862
  	ret = do_sys_poll(ufds, nfds, to);
3075d9da0   Chris Wright   Use ERESTART_REST...
863
864
  	if (ret == -EINTR) {
  		struct restart_block *restart_block;
8ff3e8e85   Arjan van de Ven   select: switch se...
865

3075d9da0   Chris Wright   Use ERESTART_REST...
866
867
  		restart_block = &current_thread_info()->restart_block;
  		restart_block->fn = do_restart_poll;
8ff3e8e85   Arjan van de Ven   select: switch se...
868
869
870
871
872
873
874
875
876
  		restart_block->poll.ufds = ufds;
  		restart_block->poll.nfds = nfds;
  
  		if (timeout_msecs >= 0) {
  			restart_block->poll.tv_sec = end_time.tv_sec;
  			restart_block->poll.tv_nsec = end_time.tv_nsec;
  			restart_block->poll.has_timeout = 1;
  		} else
  			restart_block->poll.has_timeout = 0;
3075d9da0   Chris Wright   Use ERESTART_REST...
877
878
879
  		ret = -ERESTART_RESTARTBLOCK;
  	}
  	return ret;
9f72949f6   David Woodhouse   [PATCH] Add psele...
880
  }
d4e82042c   Heiko Carstens   [CVE-2009-0029] S...
881
882
883
  SYSCALL_DEFINE5(ppoll, struct pollfd __user *, ufds, unsigned int, nfds,
  		struct timespec __user *, tsp, const sigset_t __user *, sigmask,
  		size_t, sigsetsize)
9f72949f6   David Woodhouse   [PATCH] Add psele...
884
885
  {
  	sigset_t ksigmask, sigsaved;
8ff3e8e85   Arjan van de Ven   select: switch se...
886
  	struct timespec ts, end_time, *to = NULL;
9f72949f6   David Woodhouse   [PATCH] Add psele...
887
888
889
890
891
  	int ret;
  
  	if (tsp) {
  		if (copy_from_user(&ts, tsp, sizeof(ts)))
  			return -EFAULT;
8ff3e8e85   Arjan van de Ven   select: switch se...
892
893
894
  		to = &end_time;
  		if (poll_select_set_timeout(to, ts.tv_sec, ts.tv_nsec))
  			return -EINVAL;
9f72949f6   David Woodhouse   [PATCH] Add psele...
895
896
897
898
899
900
901
902
903
904
905
906
  	}
  
  	if (sigmask) {
  		/* XXX: Don't preclude handling different sized sigset_t's.  */
  		if (sigsetsize != sizeof(sigset_t))
  			return -EINVAL;
  		if (copy_from_user(&ksigmask, sigmask, sizeof(ksigmask)))
  			return -EFAULT;
  
  		sigdelsetmask(&ksigmask, sigmask(SIGKILL)|sigmask(SIGSTOP));
  		sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved);
  	}
8ff3e8e85   Arjan van de Ven   select: switch se...
907
  	ret = do_sys_poll(ufds, nfds, to);
9f72949f6   David Woodhouse   [PATCH] Add psele...
908
909
910
911
912
913
914
915
916
917
918
  
  	/* We can restart this syscall, usually */
  	if (ret == -EINTR) {
  		/*
  		 * Don't restore the signal mask yet. Let do_signal() deliver
  		 * the signal on the way back to userspace, before the signal
  		 * mask is restored.
  		 */
  		if (sigmask) {
  			memcpy(&current->saved_sigmask, &sigsaved,
  					sizeof(sigsaved));
4e4c22c71   Roland McGrath   signals: add set_...
919
  			set_restore_sigmask();
9f72949f6   David Woodhouse   [PATCH] Add psele...
920
921
922
923
  		}
  		ret = -ERESTARTNOHAND;
  	} else if (sigmask)
  		sigprocmask(SIG_SETMASK, &sigsaved, NULL);
8ff3e8e85   Arjan van de Ven   select: switch se...
924
  	ret = poll_select_copy_remaining(&end_time, tsp, 0, ret);
9f72949f6   David Woodhouse   [PATCH] Add psele...
925
926
927
  
  	return ret;
  }