Blame view

fs/select.c 24.2 KB
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
  /*
   * This file contains the procedures for the handling of select and poll
   *
   * Created for Linux based loosely upon Mathius Lattner's minix
   * patches by Peter MacDonald. Heavily edited by Linus.
   *
   *  4 February 1994
   *     COFF/ELF binary emulation. If the process has the STICKY_TIMEOUTS
   *     flag set in its personality we do *not* modify the given timeout
   *     parameter to reflect time remaining.
   *
   *  24 January 2000
   *     Changed sys_poll()/do_poll() to use PAGE_SIZE chunk-based allocation 
   *     of fds to overcome nfds < 16390 descriptors limit (Tigran Aivazian).
   */
022a16924   Milind Arun Choudhary   ROUND_UP macro cl...
16
  #include <linux/kernel.h>
a99bbaf5e   Alexey Dobriyan   headers: remove s...
17
  #include <linux/sched.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
18
19
20
  #include <linux/syscalls.h>
  #include <linux/module.h>
  #include <linux/slab.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
21
22
23
  #include <linux/poll.h>
  #include <linux/personality.h> /* for STICKY_TIMEOUTS */
  #include <linux/file.h>
9f3acc314   Al Viro   [PATCH] split lin...
24
  #include <linux/fdtable.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
25
  #include <linux/fs.h>
b835996f6   Dipankar Sarma   [PATCH] files: lo...
26
  #include <linux/rcupdate.h>
8ff3e8e85   Arjan van de Ven   select: switch se...
27
  #include <linux/hrtimer.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
28
29
  
  #include <asm/uaccess.h>
90d6e24a3   Arjan van de Ven   hrtimer: make sel...
30
31
32
33
34
35
36
37
38
39
40
41
  
  /*
   * Estimate expected accuracy in ns from a timeval.
   *
   * After quite a bit of churning around, we've settled on
   * a simple thing of taking 0.1% of the timeout as the
   * slack, with a cap of 100 msec.
   * "nice" tasks get a 0.5% slack instead.
   *
   * Consider this comment an open invitation to come up with even
   * better solutions..
   */
5ae87e79e   Guillaume Knispel   poll/select: avoi...
42
  #define MAX_SLACK	(100 * NSEC_PER_MSEC)
96d2ab484   Arjan van de Ven   hrtimer: fix sign...
43
  static long __estimate_accuracy(struct timespec *tv)
90d6e24a3   Arjan van de Ven   hrtimer: make sel...
44
  {
96d2ab484   Arjan van de Ven   hrtimer: fix sign...
45
  	long slack;
90d6e24a3   Arjan van de Ven   hrtimer: make sel...
46
  	int divfactor = 1000;
5ae87e79e   Guillaume Knispel   poll/select: avoi...
47
48
  	if (tv->tv_sec < 0)
  		return 0;
4ce105d30   Arjan van de Ven   hrtimer: incorpor...
49
  	if (task_nice(current) > 0)
90d6e24a3   Arjan van de Ven   hrtimer: make sel...
50
  		divfactor = divfactor / 5;
5ae87e79e   Guillaume Knispel   poll/select: avoi...
51
52
  	if (tv->tv_sec > MAX_SLACK / (NSEC_PER_SEC/divfactor))
  		return MAX_SLACK;
90d6e24a3   Arjan van de Ven   hrtimer: make sel...
53
54
  	slack = tv->tv_nsec / divfactor;
  	slack += tv->tv_sec * (NSEC_PER_SEC/divfactor);
5ae87e79e   Guillaume Knispel   poll/select: avoi...
55
56
  	if (slack > MAX_SLACK)
  		return MAX_SLACK;
96d2ab484   Arjan van de Ven   hrtimer: fix sign...
57

90d6e24a3   Arjan van de Ven   hrtimer: make sel...
58
59
  	return slack;
  }
96d2ab484   Arjan van de Ven   hrtimer: fix sign...
60
  static long estimate_accuracy(struct timespec *tv)
90d6e24a3   Arjan van de Ven   hrtimer: make sel...
61
62
63
64
65
66
67
  {
  	unsigned long ret;
  	struct timespec now;
  
  	/*
  	 * Realtime tasks get a slack of 0 for obvious reasons.
  	 */
4ce105d30   Arjan van de Ven   hrtimer: incorpor...
68
  	if (rt_task(current))
90d6e24a3   Arjan van de Ven   hrtimer: make sel...
69
70
71
72
73
74
75
76
77
  		return 0;
  
  	ktime_get_ts(&now);
  	now = timespec_sub(*tv, now);
  	ret = __estimate_accuracy(&now);
  	if (ret < current->timer_slack_ns)
  		return current->timer_slack_ns;
  	return ret;
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
  struct poll_table_page {
  	struct poll_table_page * next;
  	struct poll_table_entry * entry;
  	struct poll_table_entry entries[0];
  };
  
  #define POLL_TABLE_FULL(table) \
  	((unsigned long)((table)->entry+1) > PAGE_SIZE + (unsigned long)(table))
  
  /*
   * Ok, Peter made a complicated, but straightforward multiple_wait() function.
   * I have rewritten this, taking some shortcuts: This code may not be easy to
   * follow, but it should be free of race-conditions, and it's practical. If you
   * understand what I'm doing here, then you understand how the linux
   * sleep/wakeup mechanism works.
   *
   * Two very simple procedures, poll_wait() and poll_freewait() make all the
   * work.  poll_wait() is an inline-function defined in <linux/poll.h>,
   * as all select/poll functions have to call it to add an entry to the
   * poll table.
   */
75c96f858   Adrian Bunk   [PATCH] make some...
99
100
  static void __pollwait(struct file *filp, wait_queue_head_t *wait_address,
  		       poll_table *p);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
101
102
103
104
  
  void poll_initwait(struct poll_wqueues *pwq)
  {
  	init_poll_funcptr(&pwq->pt, __pollwait);
5f820f648   Tejun Heo   poll: allow f_op-...
105
  	pwq->polling_task = current;
b2add73db   Guillaume Knispel   poll/select: init...
106
  	pwq->triggered = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
107
108
  	pwq->error = 0;
  	pwq->table = NULL;
70674f95c   Andi Kleen   [PATCH] Optimize ...
109
  	pwq->inline_index = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
110
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
111
  EXPORT_SYMBOL(poll_initwait);
70674f95c   Andi Kleen   [PATCH] Optimize ...
112
113
  static void free_poll_entry(struct poll_table_entry *entry)
  {
ccf6780dc   WANG Cong   Style fix in fs/s...
114
  	remove_wait_queue(entry->wait_address, &entry->wait);
70674f95c   Andi Kleen   [PATCH] Optimize ...
115
116
  	fput(entry->filp);
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
117
118
119
  void poll_freewait(struct poll_wqueues *pwq)
  {
  	struct poll_table_page * p = pwq->table;
70674f95c   Andi Kleen   [PATCH] Optimize ...
120
121
122
  	int i;
  	for (i = 0; i < pwq->inline_index; i++)
  		free_poll_entry(pwq->inline_entries + i);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
123
124
125
126
127
128
129
  	while (p) {
  		struct poll_table_entry * entry;
  		struct poll_table_page *old;
  
  		entry = p->entry;
  		do {
  			entry--;
70674f95c   Andi Kleen   [PATCH] Optimize ...
130
  			free_poll_entry(entry);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
131
132
133
134
135
136
  		} while (entry > p->entries);
  		old = p;
  		p = p->next;
  		free_page((unsigned long) old);
  	}
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
137
  EXPORT_SYMBOL(poll_freewait);
5f820f648   Tejun Heo   poll: allow f_op-...
138
  static struct poll_table_entry *poll_get_entry(struct poll_wqueues *p)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
139
  {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
140
  	struct poll_table_page *table = p->table;
70674f95c   Andi Kleen   [PATCH] Optimize ...
141
142
  	if (p->inline_index < N_INLINE_POLL_ENTRIES)
  		return p->inline_entries + p->inline_index++;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
143
144
145
146
147
148
  	if (!table || POLL_TABLE_FULL(table)) {
  		struct poll_table_page *new_table;
  
  		new_table = (struct poll_table_page *) __get_free_page(GFP_KERNEL);
  		if (!new_table) {
  			p->error = -ENOMEM;
70674f95c   Andi Kleen   [PATCH] Optimize ...
149
  			return NULL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
150
151
152
153
154
155
  		}
  		new_table->entry = new_table->entries;
  		new_table->next = table;
  		p->table = new_table;
  		table = new_table;
  	}
70674f95c   Andi Kleen   [PATCH] Optimize ...
156
157
  	return table->entry++;
  }
4938d7e02   Eric Dumazet   poll: avoid extra...
158
  static int __pollwake(wait_queue_t *wait, unsigned mode, int sync, void *key)
5f820f648   Tejun Heo   poll: allow f_op-...
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
  {
  	struct poll_wqueues *pwq = wait->private;
  	DECLARE_WAITQUEUE(dummy_wait, pwq->polling_task);
  
  	/*
  	 * Although this function is called under waitqueue lock, LOCK
  	 * doesn't imply write barrier and the users expect write
  	 * barrier semantics on wakeup functions.  The following
  	 * smp_wmb() is equivalent to smp_wmb() in try_to_wake_up()
  	 * and is paired with set_mb() in poll_schedule_timeout.
  	 */
  	smp_wmb();
  	pwq->triggered = 1;
  
  	/*
  	 * Perform the default wake up operation using a dummy
  	 * waitqueue.
  	 *
  	 * TODO: This is hacky but there currently is no interface to
  	 * pass in @sync.  @sync is scheduled to be removed and once
  	 * that happens, wake_up_process() can be used directly.
  	 */
  	return default_wake_function(&dummy_wait, mode, sync, key);
  }
4938d7e02   Eric Dumazet   poll: avoid extra...
183
184
185
186
187
188
189
190
191
  static int pollwake(wait_queue_t *wait, unsigned mode, int sync, void *key)
  {
  	struct poll_table_entry *entry;
  
  	entry = container_of(wait, struct poll_table_entry, wait);
  	if (key && !((unsigned long)key & entry->key))
  		return 0;
  	return __pollwake(wait, mode, sync, key);
  }
70674f95c   Andi Kleen   [PATCH] Optimize ...
192
193
194
195
  /* Add a new entry */
  static void __pollwait(struct file *filp, wait_queue_head_t *wait_address,
  				poll_table *p)
  {
5f820f648   Tejun Heo   poll: allow f_op-...
196
197
  	struct poll_wqueues *pwq = container_of(p, struct poll_wqueues, pt);
  	struct poll_table_entry *entry = poll_get_entry(pwq);
70674f95c   Andi Kleen   [PATCH] Optimize ...
198
199
200
201
202
  	if (!entry)
  		return;
  	get_file(filp);
  	entry->filp = filp;
  	entry->wait_address = wait_address;
4938d7e02   Eric Dumazet   poll: avoid extra...
203
  	entry->key = p->key;
5f820f648   Tejun Heo   poll: allow f_op-...
204
205
  	init_waitqueue_func_entry(&entry->wait, pollwake);
  	entry->wait.private = pwq;
ccf6780dc   WANG Cong   Style fix in fs/s...
206
  	add_wait_queue(wait_address, &entry->wait);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
207
  }
5f820f648   Tejun Heo   poll: allow f_op-...
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
  int poll_schedule_timeout(struct poll_wqueues *pwq, int state,
  			  ktime_t *expires, unsigned long slack)
  {
  	int rc = -EINTR;
  
  	set_current_state(state);
  	if (!pwq->triggered)
  		rc = schedule_hrtimeout_range(expires, slack, HRTIMER_MODE_ABS);
  	__set_current_state(TASK_RUNNING);
  
  	/*
  	 * Prepare for the next iteration.
  	 *
  	 * The following set_mb() serves two purposes.  First, it's
  	 * the counterpart rmb of the wmb in pollwake() such that data
  	 * written before wake up is always visible after wake up.
  	 * Second, the full barrier guarantees that triggered clearing
  	 * doesn't pass event check of the next iteration.  Note that
  	 * this problem doesn't exist for the first iteration as
  	 * add_wait_queue() has full barrier semantics.
  	 */
  	set_mb(pwq->triggered, 0);
  
  	return rc;
  }
  EXPORT_SYMBOL(poll_schedule_timeout);
b773ad40a   Thomas Gleixner   select: add poll_...
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
  /**
   * poll_select_set_timeout - helper function to setup the timeout value
   * @to:		pointer to timespec variable for the final timeout
   * @sec:	seconds (from user space)
   * @nsec:	nanoseconds (from user space)
   *
   * Note, we do not use a timespec for the user space value here, That
   * way we can use the function for timeval and compat interfaces as well.
   *
   * Returns -EINVAL if sec/nsec are not normalized. Otherwise 0.
   */
  int poll_select_set_timeout(struct timespec *to, long sec, long nsec)
  {
  	struct timespec ts = {.tv_sec = sec, .tv_nsec = nsec};
  
  	if (!timespec_valid(&ts))
  		return -EINVAL;
  
  	/* Optimize for the zero timeout value here */
  	if (!sec && !nsec) {
  		to->tv_sec = to->tv_nsec = 0;
  	} else {
  		ktime_get_ts(to);
  		*to = timespec_add_safe(*to, ts);
  	}
  	return 0;
  }
  
  static int poll_select_copy_remaining(struct timespec *end_time, void __user *p,
  				      int timeval, int ret)
  {
  	struct timespec rts;
  	struct timeval rtv;
  
  	if (!p)
  		return ret;
  
  	if (current->personality & STICKY_TIMEOUTS)
  		goto sticky;
  
  	/* No update for zero timeout */
  	if (!end_time->tv_sec && !end_time->tv_nsec)
  		return ret;
  
  	ktime_get_ts(&rts);
  	rts = timespec_sub(*end_time, rts);
  	if (rts.tv_sec < 0)
  		rts.tv_sec = rts.tv_nsec = 0;
  
  	if (timeval) {
  		rtv.tv_sec = rts.tv_sec;
  		rtv.tv_usec = rts.tv_nsec / NSEC_PER_USEC;
  
  		if (!copy_to_user(p, &rtv, sizeof(rtv)))
  			return ret;
  
  	} else if (!copy_to_user(p, &rts, sizeof(rts)))
  		return ret;
  
  	/*
  	 * If an application puts its timeval in read-only memory, we
  	 * don't want the Linux-specific update to the timeval to
  	 * cause a fault after the select has completed
  	 * successfully. However, because we're not updating the
  	 * timeval, we can't restart the system call.
  	 */
  
  sticky:
  	if (ret == -ERESTARTNOHAND)
  		ret = -EINTR;
  	return ret;
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
306
307
308
309
310
311
312
313
314
315
316
  #define FDS_IN(fds, n)		(fds->in + n)
  #define FDS_OUT(fds, n)		(fds->out + n)
  #define FDS_EX(fds, n)		(fds->ex + n)
  
  #define BITS(fds, n)	(*FDS_IN(fds, n)|*FDS_OUT(fds, n)|*FDS_EX(fds, n))
  
  static int max_select_fd(unsigned long n, fd_set_bits *fds)
  {
  	unsigned long *open_fds;
  	unsigned long set;
  	int max;
badf16621   Dipankar Sarma   [PATCH] files: br...
317
  	struct fdtable *fdt;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
318
319
320
321
  
  	/* handle last in-complete long-word first */
  	set = ~(~0UL << (n & (__NFDBITS-1)));
  	n /= __NFDBITS;
badf16621   Dipankar Sarma   [PATCH] files: br...
322
323
  	fdt = files_fdtable(current->files);
  	open_fds = fdt->open_fds->fds_bits+n;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
  	max = 0;
  	if (set) {
  		set &= BITS(fds, n);
  		if (set) {
  			if (!(set & ~*open_fds))
  				goto get_max;
  			return -EBADF;
  		}
  	}
  	while (n) {
  		open_fds--;
  		n--;
  		set = BITS(fds, n);
  		if (!set)
  			continue;
  		if (set & ~*open_fds)
  			return -EBADF;
  		if (max)
  			continue;
  get_max:
  		do {
  			max++;
  			set >>= 1;
  		} while (set);
  		max += n * __NFDBITS;
  	}
  
  	return max;
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
353
354
355
  #define POLLIN_SET (POLLRDNORM | POLLRDBAND | POLLIN | POLLHUP | POLLERR)
  #define POLLOUT_SET (POLLWRBAND | POLLWRNORM | POLLOUT | POLLERR)
  #define POLLEX_SET (POLLPRI)
4938d7e02   Eric Dumazet   poll: avoid extra...
356
357
358
359
360
361
362
363
364
365
366
  static inline void wait_key_set(poll_table *wait, unsigned long in,
  				unsigned long out, unsigned long bit)
  {
  	if (wait) {
  		wait->key = POLLEX_SET;
  		if (in & bit)
  			wait->key |= POLLIN_SET;
  		if (out & bit)
  			wait->key |= POLLOUT_SET;
  	}
  }
8ff3e8e85   Arjan van de Ven   select: switch se...
367
  int do_select(int n, fd_set_bits *fds, struct timespec *end_time)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
368
  {
8ff3e8e85   Arjan van de Ven   select: switch se...
369
  	ktime_t expire, *to = NULL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
370
371
  	struct poll_wqueues table;
  	poll_table *wait;
8ff3e8e85   Arjan van de Ven   select: switch se...
372
  	int retval, i, timed_out = 0;
90d6e24a3   Arjan van de Ven   hrtimer: make sel...
373
  	unsigned long slack = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
374

b835996f6   Dipankar Sarma   [PATCH] files: lo...
375
  	rcu_read_lock();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
376
  	retval = max_select_fd(n, fds);
b835996f6   Dipankar Sarma   [PATCH] files: lo...
377
  	rcu_read_unlock();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
378
379
380
381
382
383
384
  
  	if (retval < 0)
  		return retval;
  	n = retval;
  
  	poll_initwait(&table);
  	wait = &table.pt;
8ff3e8e85   Arjan van de Ven   select: switch se...
385
  	if (end_time && !end_time->tv_sec && !end_time->tv_nsec) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
386
  		wait = NULL;
8ff3e8e85   Arjan van de Ven   select: switch se...
387
388
  		timed_out = 1;
  	}
96d2ab484   Arjan van de Ven   hrtimer: fix sign...
389
  	if (end_time && !timed_out)
90d6e24a3   Arjan van de Ven   hrtimer: make sel...
390
  		slack = estimate_accuracy(end_time);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
391
392
393
  	retval = 0;
  	for (;;) {
  		unsigned long *rinp, *routp, *rexp, *inp, *outp, *exp;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
394
395
396
397
398
399
  		inp = fds->in; outp = fds->out; exp = fds->ex;
  		rinp = fds->res_in; routp = fds->res_out; rexp = fds->res_ex;
  
  		for (i = 0; i < n; ++rinp, ++routp, ++rexp) {
  			unsigned long in, out, ex, all_bits, bit = 1, mask, j;
  			unsigned long res_in = 0, res_out = 0, res_ex = 0;
99ac48f54   Arjan van de Ven   [PATCH] mark f_op...
400
  			const struct file_operations *f_op = NULL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
401
402
403
404
405
406
407
408
409
410
  			struct file *file = NULL;
  
  			in = *inp++; out = *outp++; ex = *exp++;
  			all_bits = in | out | ex;
  			if (all_bits == 0) {
  				i += __NFDBITS;
  				continue;
  			}
  
  			for (j = 0; j < __NFDBITS; ++j, ++i, bit <<= 1) {
e4a1f129f   Eric Dumazet   [PATCH] use fget_...
411
  				int fput_needed;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
412
413
414
415
  				if (i >= n)
  					break;
  				if (!(bit & all_bits))
  					continue;
e4a1f129f   Eric Dumazet   [PATCH] use fget_...
416
  				file = fget_light(i, &fput_needed);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
417
418
419
  				if (file) {
  					f_op = file->f_op;
  					mask = DEFAULT_POLLMASK;
4938d7e02   Eric Dumazet   poll: avoid extra...
420
421
422
423
  					if (f_op && f_op->poll) {
  						wait_key_set(wait, in, out, bit);
  						mask = (*f_op->poll)(file, wait);
  					}
e4a1f129f   Eric Dumazet   [PATCH] use fget_...
424
  					fput_light(file, fput_needed);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
425
426
427
  					if ((mask & POLLIN_SET) && (in & bit)) {
  						res_in |= bit;
  						retval++;
4938d7e02   Eric Dumazet   poll: avoid extra...
428
  						wait = NULL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
429
430
431
432
  					}
  					if ((mask & POLLOUT_SET) && (out & bit)) {
  						res_out |= bit;
  						retval++;
4938d7e02   Eric Dumazet   poll: avoid extra...
433
  						wait = NULL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
434
435
436
437
  					}
  					if ((mask & POLLEX_SET) && (ex & bit)) {
  						res_ex |= bit;
  						retval++;
4938d7e02   Eric Dumazet   poll: avoid extra...
438
  						wait = NULL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
439
440
  					}
  				}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
441
442
443
444
445
446
447
  			}
  			if (res_in)
  				*rinp = res_in;
  			if (res_out)
  				*routp = res_out;
  			if (res_ex)
  				*rexp = res_ex;
55d853849   Linus Torvalds   Fix performance r...
448
  			cond_resched();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
449
450
  		}
  		wait = NULL;
8ff3e8e85   Arjan van de Ven   select: switch se...
451
  		if (retval || timed_out || signal_pending(current))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
452
  			break;
f5264481c   Pavel Machek   trivial: small cl...
453
  		if (table.error) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
454
455
456
  			retval = table.error;
  			break;
  		}
9f72949f6   David Woodhouse   [PATCH] Add psele...
457

8ff3e8e85   Arjan van de Ven   select: switch se...
458
459
460
461
462
463
464
465
  		/*
  		 * If this is the first loop and we have a timeout
  		 * given, then we convert to ktime_t and set the to
  		 * pointer to the expiry value.
  		 */
  		if (end_time && !to) {
  			expire = timespec_to_ktime(*end_time);
  			to = &expire;
9f72949f6   David Woodhouse   [PATCH] Add psele...
466
  		}
8ff3e8e85   Arjan van de Ven   select: switch se...
467

5f820f648   Tejun Heo   poll: allow f_op-...
468
469
  		if (!poll_schedule_timeout(&table, TASK_INTERRUPTIBLE,
  					   to, slack))
8ff3e8e85   Arjan van de Ven   select: switch se...
470
  			timed_out = 1;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
471
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
472
473
  
  	poll_freewait(&table);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
474
475
  	return retval;
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
476
477
478
479
480
481
482
483
484
485
  /*
   * We can actually return ERESTARTSYS instead of EINTR, but I'd
   * like to be certain this leads to no problems. So I return
   * EINTR just for safety.
   *
   * Update: ERESTARTSYS breaks at least the xview clock binary, so
   * I'm trying ERESTARTNOHAND which restart only when you want to.
   */
  #define MAX_SELECT_SECONDS \
  	((unsigned long) (MAX_SCHEDULE_TIMEOUT / HZ)-1)
a2dcb44c3   Al Viro   [PATCH] make osf_...
486
  int core_sys_select(int n, fd_set __user *inp, fd_set __user *outp,
8ff3e8e85   Arjan van de Ven   select: switch se...
487
  			   fd_set __user *exp, struct timespec *end_time)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
488
489
  {
  	fd_set_bits fds;
29ff2db55   Andrew Morton   [PATCH] select() ...
490
  	void *bits;
bbea9f696   Vadim Lobanov   [PATCH] fdtable: ...
491
  	int ret, max_fds;
b04eb6aa0   Mitchell Blank Jr   [PATCH] select: d...
492
  	unsigned int size;
badf16621   Dipankar Sarma   [PATCH] files: br...
493
  	struct fdtable *fdt;
70674f95c   Andi Kleen   [PATCH] Optimize ...
494
  	/* Allocate small arguments on the stack to save memory and be faster */
30c14e40e   Jes Sorensen   [PATCH] avoid una...
495
  	long stack_fds[SELECT_STACK_ALLOC/sizeof(long)];
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
496

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
497
498
499
  	ret = -EINVAL;
  	if (n < 0)
  		goto out_nofds;
bbea9f696   Vadim Lobanov   [PATCH] fdtable: ...
500
  	/* max_fds can increase, so grab it once to avoid race */
b835996f6   Dipankar Sarma   [PATCH] files: lo...
501
  	rcu_read_lock();
badf16621   Dipankar Sarma   [PATCH] files: br...
502
  	fdt = files_fdtable(current->files);
bbea9f696   Vadim Lobanov   [PATCH] fdtable: ...
503
  	max_fds = fdt->max_fds;
b835996f6   Dipankar Sarma   [PATCH] files: lo...
504
  	rcu_read_unlock();
bbea9f696   Vadim Lobanov   [PATCH] fdtable: ...
505
506
  	if (n > max_fds)
  		n = max_fds;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
507
508
509
510
511
512
  
  	/*
  	 * We need 6 bitmaps (in/out/ex for both incoming and outgoing),
  	 * since we used fdset we need to allocate memory in units of
  	 * long-words. 
  	 */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
513
  	size = FDS_BYTES(n);
b04eb6aa0   Mitchell Blank Jr   [PATCH] select: d...
514
515
516
517
  	bits = stack_fds;
  	if (size > sizeof(stack_fds) / 6) {
  		/* Not enough space in on-stack array; must use kmalloc */
  		ret = -ENOMEM;
70674f95c   Andi Kleen   [PATCH] Optimize ...
518
  		bits = kmalloc(6 * size, GFP_KERNEL);
b04eb6aa0   Mitchell Blank Jr   [PATCH] select: d...
519
520
521
  		if (!bits)
  			goto out_nofds;
  	}
29ff2db55   Andrew Morton   [PATCH] select() ...
522
523
524
525
526
527
  	fds.in      = bits;
  	fds.out     = bits +   size;
  	fds.ex      = bits + 2*size;
  	fds.res_in  = bits + 3*size;
  	fds.res_out = bits + 4*size;
  	fds.res_ex  = bits + 5*size;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
528
529
530
531
532
533
534
535
  
  	if ((ret = get_fd_set(n, inp, fds.in)) ||
  	    (ret = get_fd_set(n, outp, fds.out)) ||
  	    (ret = get_fd_set(n, exp, fds.ex)))
  		goto out;
  	zero_fd_set(n, fds.res_in);
  	zero_fd_set(n, fds.res_out);
  	zero_fd_set(n, fds.res_ex);
8ff3e8e85   Arjan van de Ven   select: switch se...
536
  	ret = do_select(n, &fds, end_time);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
  
  	if (ret < 0)
  		goto out;
  	if (!ret) {
  		ret = -ERESTARTNOHAND;
  		if (signal_pending(current))
  			goto out;
  		ret = 0;
  	}
  
  	if (set_fd_set(n, inp, fds.res_in) ||
  	    set_fd_set(n, outp, fds.res_out) ||
  	    set_fd_set(n, exp, fds.res_ex))
  		ret = -EFAULT;
  
  out:
70674f95c   Andi Kleen   [PATCH] Optimize ...
553
554
  	if (bits != stack_fds)
  		kfree(bits);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
555
556
557
  out_nofds:
  	return ret;
  }
5a8a82b1d   Heiko Carstens   [CVE-2009-0029] S...
558
559
  SYSCALL_DEFINE5(select, int, n, fd_set __user *, inp, fd_set __user *, outp,
  		fd_set __user *, exp, struct timeval __user *, tvp)
9f72949f6   David Woodhouse   [PATCH] Add psele...
560
  {
8ff3e8e85   Arjan van de Ven   select: switch se...
561
  	struct timespec end_time, *to = NULL;
9f72949f6   David Woodhouse   [PATCH] Add psele...
562
563
564
565
566
567
  	struct timeval tv;
  	int ret;
  
  	if (tvp) {
  		if (copy_from_user(&tv, tvp, sizeof(tv)))
  			return -EFAULT;
8ff3e8e85   Arjan van de Ven   select: switch se...
568
  		to = &end_time;
4d36a9e65   Arjan van de Ven   select: deal with...
569
570
571
  		if (poll_select_set_timeout(to,
  				tv.tv_sec + (tv.tv_usec / USEC_PER_SEC),
  				(tv.tv_usec % USEC_PER_SEC) * NSEC_PER_USEC))
9f72949f6   David Woodhouse   [PATCH] Add psele...
572
  			return -EINVAL;
9f72949f6   David Woodhouse   [PATCH] Add psele...
573
  	}
8ff3e8e85   Arjan van de Ven   select: switch se...
574
575
  	ret = core_sys_select(n, inp, outp, exp, to);
  	ret = poll_select_copy_remaining(&end_time, tvp, 1, ret);
9f72949f6   David Woodhouse   [PATCH] Add psele...
576
577
578
  
  	return ret;
  }
f3de272b8   Roland McGrath   signals: use HAVE...
579
  #ifdef HAVE_SET_RESTORE_SIGMASK
c9da9f212   Heiko Carstens   [CVE-2009-0029] M...
580
581
582
  static long do_pselect(int n, fd_set __user *inp, fd_set __user *outp,
  		       fd_set __user *exp, struct timespec __user *tsp,
  		       const sigset_t __user *sigmask, size_t sigsetsize)
9f72949f6   David Woodhouse   [PATCH] Add psele...
583
  {
9f72949f6   David Woodhouse   [PATCH] Add psele...
584
  	sigset_t ksigmask, sigsaved;
8ff3e8e85   Arjan van de Ven   select: switch se...
585
  	struct timespec ts, end_time, *to = NULL;
9f72949f6   David Woodhouse   [PATCH] Add psele...
586
587
588
589
590
  	int ret;
  
  	if (tsp) {
  		if (copy_from_user(&ts, tsp, sizeof(ts)))
  			return -EFAULT;
8ff3e8e85   Arjan van de Ven   select: switch se...
591
592
  		to = &end_time;
  		if (poll_select_set_timeout(to, ts.tv_sec, ts.tv_nsec))
9f72949f6   David Woodhouse   [PATCH] Add psele...
593
  			return -EINVAL;
9f72949f6   David Woodhouse   [PATCH] Add psele...
594
595
596
597
598
599
600
601
602
603
604
605
  	}
  
  	if (sigmask) {
  		/* XXX: Don't preclude handling different sized sigset_t's.  */
  		if (sigsetsize != sizeof(sigset_t))
  			return -EINVAL;
  		if (copy_from_user(&ksigmask, sigmask, sizeof(ksigmask)))
  			return -EFAULT;
  
  		sigdelsetmask(&ksigmask, sigmask(SIGKILL)|sigmask(SIGSTOP));
  		sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved);
  	}
62568510b   Bernd Schmidt   Fix timeouts in s...
606
  	ret = core_sys_select(n, inp, outp, exp, to);
8ff3e8e85   Arjan van de Ven   select: switch se...
607
  	ret = poll_select_copy_remaining(&end_time, tsp, 0, ret);
9f72949f6   David Woodhouse   [PATCH] Add psele...
608
609
610
611
612
613
614
615
616
617
  
  	if (ret == -ERESTARTNOHAND) {
  		/*
  		 * Don't restore the signal mask yet. Let do_signal() deliver
  		 * the signal on the way back to userspace, before the signal
  		 * mask is restored.
  		 */
  		if (sigmask) {
  			memcpy(&current->saved_sigmask, &sigsaved,
  					sizeof(sigsaved));
4e4c22c71   Roland McGrath   signals: add set_...
618
  			set_restore_sigmask();
9f72949f6   David Woodhouse   [PATCH] Add psele...
619
620
621
622
623
624
625
626
627
628
629
630
631
  		}
  	} else if (sigmask)
  		sigprocmask(SIG_SETMASK, &sigsaved, NULL);
  
  	return ret;
  }
  
  /*
   * Most architectures can't handle 7-argument syscalls. So we provide a
   * 6-argument version where the sixth argument is a pointer to a structure
   * which has a pointer to the sigset_t itself followed by a size_t containing
   * the sigset size.
   */
d4e82042c   Heiko Carstens   [CVE-2009-0029] S...
632
633
634
  SYSCALL_DEFINE6(pselect6, int, n, fd_set __user *, inp, fd_set __user *, outp,
  		fd_set __user *, exp, struct timespec __user *, tsp,
  		void __user *, sig)
9f72949f6   David Woodhouse   [PATCH] Add psele...
635
636
637
638
639
640
  {
  	size_t sigsetsize = 0;
  	sigset_t __user *up = NULL;
  
  	if (sig) {
  		if (!access_ok(VERIFY_READ, sig, sizeof(void *)+sizeof(size_t))
e110ab94e   Al Viro   [PATCH] fix __use...
641
  		    || __get_user(up, (sigset_t __user * __user *)sig)
9f72949f6   David Woodhouse   [PATCH] Add psele...
642
  		    || __get_user(sigsetsize,
e110ab94e   Al Viro   [PATCH] fix __use...
643
  				(size_t __user *)(sig+sizeof(void *))))
9f72949f6   David Woodhouse   [PATCH] Add psele...
644
645
  			return -EFAULT;
  	}
c9da9f212   Heiko Carstens   [CVE-2009-0029] M...
646
  	return do_pselect(n, inp, outp, exp, tsp, up, sigsetsize);
9f72949f6   David Woodhouse   [PATCH] Add psele...
647
  }
f3de272b8   Roland McGrath   signals: use HAVE...
648
  #endif /* HAVE_SET_RESTORE_SIGMASK */
9f72949f6   David Woodhouse   [PATCH] Add psele...
649

5d0e52830   Christoph Hellwig   Add generic sys_o...
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
  #ifdef __ARCH_WANT_SYS_OLD_SELECT
  struct sel_arg_struct {
  	unsigned long n;
  	fd_set __user *inp, *outp, *exp;
  	struct timeval __user *tvp;
  };
  
  SYSCALL_DEFINE1(old_select, struct sel_arg_struct __user *, arg)
  {
  	struct sel_arg_struct a;
  
  	if (copy_from_user(&a, arg, sizeof(a)))
  		return -EFAULT;
  	return sys_select(a.n, a.inp, a.outp, a.exp, a.tvp);
  }
  #endif
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
666
667
668
669
670
671
672
  struct poll_list {
  	struct poll_list *next;
  	int len;
  	struct pollfd entries[0];
  };
  
  #define POLLFD_PER_PAGE  ((PAGE_SIZE-sizeof(struct poll_list)) / sizeof(struct pollfd))
4a4b69f79   Vadim Lobanov   [PATCH] Poll clea...
673
674
675
676
677
678
679
680
  /*
   * Fish for pollable events on the pollfd->fd file descriptor. We're only
   * interested in events matching the pollfd->events mask, and the result
   * matching that mask is both recorded in pollfd->revents and returned. The
   * pwait poll_table will be used by the fd-provided poll handler for waiting,
   * if non-NULL.
   */
  static inline unsigned int do_pollfd(struct pollfd *pollfd, poll_table *pwait)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
681
  {
4a4b69f79   Vadim Lobanov   [PATCH] Poll clea...
682
683
684
685
686
687
688
689
690
691
692
693
694
  	unsigned int mask;
  	int fd;
  
  	mask = 0;
  	fd = pollfd->fd;
  	if (fd >= 0) {
  		int fput_needed;
  		struct file * file;
  
  		file = fget_light(fd, &fput_needed);
  		mask = POLLNVAL;
  		if (file != NULL) {
  			mask = DEFAULT_POLLMASK;
4938d7e02   Eric Dumazet   poll: avoid extra...
695
696
697
698
  			if (file->f_op && file->f_op->poll) {
  				if (pwait)
  					pwait->key = pollfd->events |
  							POLLERR | POLLHUP;
4a4b69f79   Vadim Lobanov   [PATCH] Poll clea...
699
  				mask = file->f_op->poll(file, pwait);
4938d7e02   Eric Dumazet   poll: avoid extra...
700
  			}
4a4b69f79   Vadim Lobanov   [PATCH] Poll clea...
701
702
703
  			/* Mask out unneeded events. */
  			mask &= pollfd->events | POLLERR | POLLHUP;
  			fput_light(file, fput_needed);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
704
  		}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
705
  	}
4a4b69f79   Vadim Lobanov   [PATCH] Poll clea...
706
707
708
  	pollfd->revents = mask;
  
  	return mask;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
709
710
711
  }
  
  static int do_poll(unsigned int nfds,  struct poll_list *list,
8ff3e8e85   Arjan van de Ven   select: switch se...
712
  		   struct poll_wqueues *wait, struct timespec *end_time)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
713
  {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
714
  	poll_table* pt = &wait->pt;
8ff3e8e85   Arjan van de Ven   select: switch se...
715
716
  	ktime_t expire, *to = NULL;
  	int timed_out = 0, count = 0;
90d6e24a3   Arjan van de Ven   hrtimer: make sel...
717
  	unsigned long slack = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
718

9f72949f6   David Woodhouse   [PATCH] Add psele...
719
  	/* Optimise the no-wait case */
8ff3e8e85   Arjan van de Ven   select: switch se...
720
  	if (end_time && !end_time->tv_sec && !end_time->tv_nsec) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
721
  		pt = NULL;
8ff3e8e85   Arjan van de Ven   select: switch se...
722
723
  		timed_out = 1;
  	}
9bf084f70   Oleg Nesterov   do_poll: return -...
724

96d2ab484   Arjan van de Ven   hrtimer: fix sign...
725
  	if (end_time && !timed_out)
90d6e24a3   Arjan van de Ven   hrtimer: make sel...
726
  		slack = estimate_accuracy(end_time);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
727
728
  	for (;;) {
  		struct poll_list *walk;
9f72949f6   David Woodhouse   [PATCH] Add psele...
729

4a4b69f79   Vadim Lobanov   [PATCH] Poll clea...
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
  		for (walk = list; walk != NULL; walk = walk->next) {
  			struct pollfd * pfd, * pfd_end;
  
  			pfd = walk->entries;
  			pfd_end = pfd + walk->len;
  			for (; pfd != pfd_end; pfd++) {
  				/*
  				 * Fish for events. If we found one, record it
  				 * and kill the poll_table, so we don't
  				 * needlessly register any other waiters after
  				 * this. They'll get immediately deregistered
  				 * when we break out and return.
  				 */
  				if (do_pollfd(pfd, pt)) {
  					count++;
  					pt = NULL;
  				}
  			}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
748
  		}
4a4b69f79   Vadim Lobanov   [PATCH] Poll clea...
749
750
751
752
  		/*
  		 * All waiters have already been registered, so don't provide
  		 * a poll_table to them on the next loop iteration.
  		 */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
753
  		pt = NULL;
9bf084f70   Oleg Nesterov   do_poll: return -...
754
755
756
757
758
  		if (!count) {
  			count = wait->error;
  			if (signal_pending(current))
  				count = -EINTR;
  		}
8ff3e8e85   Arjan van de Ven   select: switch se...
759
  		if (count || timed_out)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
760
  			break;
9f72949f6   David Woodhouse   [PATCH] Add psele...
761

8ff3e8e85   Arjan van de Ven   select: switch se...
762
763
764
765
766
767
768
769
  		/*
  		 * If this is the first loop and we have a timeout
  		 * given, then we convert to ktime_t and set the to
  		 * pointer to the expiry value.
  		 */
  		if (end_time && !to) {
  			expire = timespec_to_ktime(*end_time);
  			to = &expire;
9f72949f6   David Woodhouse   [PATCH] Add psele...
770
  		}
5f820f648   Tejun Heo   poll: allow f_op-...
771
  		if (!poll_schedule_timeout(wait, TASK_INTERRUPTIBLE, to, slack))
8ff3e8e85   Arjan van de Ven   select: switch se...
772
  			timed_out = 1;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
773
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
774
775
  	return count;
  }
70674f95c   Andi Kleen   [PATCH] Optimize ...
776
777
  #define N_STACK_PPS ((sizeof(stack_pps) - sizeof(struct poll_list))  / \
  			sizeof(struct pollfd))
8ff3e8e85   Arjan van de Ven   select: switch se...
778
779
  int do_sys_poll(struct pollfd __user *ufds, unsigned int nfds,
  		struct timespec *end_time)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
780
781
  {
  	struct poll_wqueues table;
252e5725c   Oleg Nesterov   do_sys_poll: simp...
782
   	int err = -EFAULT, fdcount, len, size;
30c14e40e   Jes Sorensen   [PATCH] avoid una...
783
784
785
786
  	/* Allocate small arguments on the stack to save memory and be
  	   faster - use long to make sure the buffer is aligned properly
  	   on 64 bit archs to avoid unaligned access */
  	long stack_pps[POLL_STACK_ALLOC/sizeof(long)];
252e5725c   Oleg Nesterov   do_sys_poll: simp...
787
788
789
  	struct poll_list *const head = (struct poll_list *)stack_pps;
   	struct poll_list *walk = head;
   	unsigned long todo = nfds;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
790

d554ed895   Jiri Slaby   fs: use rlimit he...
791
  	if (nfds > rlimit(RLIMIT_NOFILE))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
792
  		return -EINVAL;
252e5725c   Oleg Nesterov   do_sys_poll: simp...
793
794
795
796
797
798
  	len = min_t(unsigned int, nfds, N_STACK_PPS);
  	for (;;) {
  		walk->next = NULL;
  		walk->len = len;
  		if (!len)
  			break;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
799

252e5725c   Oleg Nesterov   do_sys_poll: simp...
800
801
802
803
804
805
806
  		if (copy_from_user(walk->entries, ufds + nfds-todo,
  					sizeof(struct pollfd) * walk->len))
  			goto out_fds;
  
  		todo -= walk->len;
  		if (!todo)
  			break;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
807

252e5725c   Oleg Nesterov   do_sys_poll: simp...
808
809
810
811
812
  		len = min(todo, POLLFD_PER_PAGE);
  		size = sizeof(struct poll_list) + sizeof(struct pollfd) * len;
  		walk = walk->next = kmalloc(size, GFP_KERNEL);
  		if (!walk) {
  			err = -ENOMEM;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
813
814
  			goto out_fds;
  		}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
815
  	}
9f72949f6   David Woodhouse   [PATCH] Add psele...
816

252e5725c   Oleg Nesterov   do_sys_poll: simp...
817
  	poll_initwait(&table);
8ff3e8e85   Arjan van de Ven   select: switch se...
818
  	fdcount = do_poll(nfds, head, &table, end_time);
252e5725c   Oleg Nesterov   do_sys_poll: simp...
819
  	poll_freewait(&table);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
820

252e5725c   Oleg Nesterov   do_sys_poll: simp...
821
  	for (walk = head; walk; walk = walk->next) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
822
823
  		struct pollfd *fds = walk->entries;
  		int j;
252e5725c   Oleg Nesterov   do_sys_poll: simp...
824
825
  		for (j = 0; j < walk->len; j++, ufds++)
  			if (__put_user(fds[j].revents, &ufds->revents))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
826
  				goto out_fds;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
827
    	}
252e5725c   Oleg Nesterov   do_sys_poll: simp...
828

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
829
  	err = fdcount;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
830
  out_fds:
252e5725c   Oleg Nesterov   do_sys_poll: simp...
831
832
833
834
835
  	walk = head->next;
  	while (walk) {
  		struct poll_list *pos = walk;
  		walk = walk->next;
  		kfree(pos);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
836
  	}
252e5725c   Oleg Nesterov   do_sys_poll: simp...
837

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
838
839
  	return err;
  }
9f72949f6   David Woodhouse   [PATCH] Add psele...
840

3075d9da0   Chris Wright   Use ERESTART_REST...
841
842
  static long do_restart_poll(struct restart_block *restart_block)
  {
8ff3e8e85   Arjan van de Ven   select: switch se...
843
844
845
  	struct pollfd __user *ufds = restart_block->poll.ufds;
  	int nfds = restart_block->poll.nfds;
  	struct timespec *to = NULL, end_time;
3075d9da0   Chris Wright   Use ERESTART_REST...
846
  	int ret;
8ff3e8e85   Arjan van de Ven   select: switch se...
847
848
849
850
851
852
853
  	if (restart_block->poll.has_timeout) {
  		end_time.tv_sec = restart_block->poll.tv_sec;
  		end_time.tv_nsec = restart_block->poll.tv_nsec;
  		to = &end_time;
  	}
  
  	ret = do_sys_poll(ufds, nfds, to);
3075d9da0   Chris Wright   Use ERESTART_REST...
854
855
  	if (ret == -EINTR) {
  		restart_block->fn = do_restart_poll;
3075d9da0   Chris Wright   Use ERESTART_REST...
856
857
858
859
  		ret = -ERESTART_RESTARTBLOCK;
  	}
  	return ret;
  }
5a8a82b1d   Heiko Carstens   [CVE-2009-0029] S...
860
861
  SYSCALL_DEFINE3(poll, struct pollfd __user *, ufds, unsigned int, nfds,
  		long, timeout_msecs)
9f72949f6   David Woodhouse   [PATCH] Add psele...
862
  {
8ff3e8e85   Arjan van de Ven   select: switch se...
863
  	struct timespec end_time, *to = NULL;
3075d9da0   Chris Wright   Use ERESTART_REST...
864
  	int ret;
9f72949f6   David Woodhouse   [PATCH] Add psele...
865

8ff3e8e85   Arjan van de Ven   select: switch se...
866
867
868
869
  	if (timeout_msecs >= 0) {
  		to = &end_time;
  		poll_select_set_timeout(to, timeout_msecs / MSEC_PER_SEC,
  			NSEC_PER_MSEC * (timeout_msecs % MSEC_PER_SEC));
9f72949f6   David Woodhouse   [PATCH] Add psele...
870
  	}
8ff3e8e85   Arjan van de Ven   select: switch se...
871
  	ret = do_sys_poll(ufds, nfds, to);
3075d9da0   Chris Wright   Use ERESTART_REST...
872
873
  	if (ret == -EINTR) {
  		struct restart_block *restart_block;
8ff3e8e85   Arjan van de Ven   select: switch se...
874

3075d9da0   Chris Wright   Use ERESTART_REST...
875
876
  		restart_block = &current_thread_info()->restart_block;
  		restart_block->fn = do_restart_poll;
8ff3e8e85   Arjan van de Ven   select: switch se...
877
878
879
880
881
882
883
884
885
  		restart_block->poll.ufds = ufds;
  		restart_block->poll.nfds = nfds;
  
  		if (timeout_msecs >= 0) {
  			restart_block->poll.tv_sec = end_time.tv_sec;
  			restart_block->poll.tv_nsec = end_time.tv_nsec;
  			restart_block->poll.has_timeout = 1;
  		} else
  			restart_block->poll.has_timeout = 0;
3075d9da0   Chris Wright   Use ERESTART_REST...
886
887
888
  		ret = -ERESTART_RESTARTBLOCK;
  	}
  	return ret;
9f72949f6   David Woodhouse   [PATCH] Add psele...
889
  }
f3de272b8   Roland McGrath   signals: use HAVE...
890
  #ifdef HAVE_SET_RESTORE_SIGMASK
d4e82042c   Heiko Carstens   [CVE-2009-0029] S...
891
892
893
  SYSCALL_DEFINE5(ppoll, struct pollfd __user *, ufds, unsigned int, nfds,
  		struct timespec __user *, tsp, const sigset_t __user *, sigmask,
  		size_t, sigsetsize)
9f72949f6   David Woodhouse   [PATCH] Add psele...
894
895
  {
  	sigset_t ksigmask, sigsaved;
8ff3e8e85   Arjan van de Ven   select: switch se...
896
  	struct timespec ts, end_time, *to = NULL;
9f72949f6   David Woodhouse   [PATCH] Add psele...
897
898
899
900
901
  	int ret;
  
  	if (tsp) {
  		if (copy_from_user(&ts, tsp, sizeof(ts)))
  			return -EFAULT;
8ff3e8e85   Arjan van de Ven   select: switch se...
902
903
904
  		to = &end_time;
  		if (poll_select_set_timeout(to, ts.tv_sec, ts.tv_nsec))
  			return -EINVAL;
9f72949f6   David Woodhouse   [PATCH] Add psele...
905
906
907
908
909
910
911
912
913
914
915
916
  	}
  
  	if (sigmask) {
  		/* XXX: Don't preclude handling different sized sigset_t's.  */
  		if (sigsetsize != sizeof(sigset_t))
  			return -EINVAL;
  		if (copy_from_user(&ksigmask, sigmask, sizeof(ksigmask)))
  			return -EFAULT;
  
  		sigdelsetmask(&ksigmask, sigmask(SIGKILL)|sigmask(SIGSTOP));
  		sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved);
  	}
8ff3e8e85   Arjan van de Ven   select: switch se...
917
  	ret = do_sys_poll(ufds, nfds, to);
9f72949f6   David Woodhouse   [PATCH] Add psele...
918
919
920
921
922
923
924
925
926
927
928
  
  	/* We can restart this syscall, usually */
  	if (ret == -EINTR) {
  		/*
  		 * Don't restore the signal mask yet. Let do_signal() deliver
  		 * the signal on the way back to userspace, before the signal
  		 * mask is restored.
  		 */
  		if (sigmask) {
  			memcpy(&current->saved_sigmask, &sigsaved,
  					sizeof(sigsaved));
4e4c22c71   Roland McGrath   signals: add set_...
929
  			set_restore_sigmask();
9f72949f6   David Woodhouse   [PATCH] Add psele...
930
931
932
933
  		}
  		ret = -ERESTARTNOHAND;
  	} else if (sigmask)
  		sigprocmask(SIG_SETMASK, &sigsaved, NULL);
8ff3e8e85   Arjan van de Ven   select: switch se...
934
  	ret = poll_select_copy_remaining(&end_time, tsp, 0, ret);
9f72949f6   David Woodhouse   [PATCH] Add psele...
935
936
937
  
  	return ret;
  }
f3de272b8   Roland McGrath   signals: use HAVE...
938
  #endif /* HAVE_SET_RESTORE_SIGMASK */