Blame view

fs/select.c 24.2 KB
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
  /*
   * This file contains the procedures for the handling of select and poll
   *
   * Created for Linux based loosely upon Mathius Lattner's minix
   * patches by Peter MacDonald. Heavily edited by Linus.
   *
   *  4 February 1994
   *     COFF/ELF binary emulation. If the process has the STICKY_TIMEOUTS
   *     flag set in its personality we do *not* modify the given timeout
   *     parameter to reflect time remaining.
   *
   *  24 January 2000
   *     Changed sys_poll()/do_poll() to use PAGE_SIZE chunk-based allocation 
   *     of fds to overcome nfds < 16390 descriptors limit (Tigran Aivazian).
   */
022a16924   Milind Arun Choudhary   ROUND_UP macro cl...
16
  #include <linux/kernel.h>
a99bbaf5e   Alexey Dobriyan   headers: remove s...
17
  #include <linux/sched.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
18
19
20
  #include <linux/syscalls.h>
  #include <linux/module.h>
  #include <linux/slab.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
21
22
23
  #include <linux/poll.h>
  #include <linux/personality.h> /* for STICKY_TIMEOUTS */
  #include <linux/file.h>
9f3acc314   Al Viro   [PATCH] split lin...
24
  #include <linux/fdtable.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
25
  #include <linux/fs.h>
b835996f6   Dipankar Sarma   [PATCH] files: lo...
26
  #include <linux/rcupdate.h>
8ff3e8e85   Arjan van de Ven   select: switch se...
27
  #include <linux/hrtimer.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
28
29
  
  #include <asm/uaccess.h>
90d6e24a3   Arjan van de Ven   hrtimer: make sel...
30
31
32
33
34
35
36
37
38
39
40
41
  
  /*
   * Estimate expected accuracy in ns from a timeval.
   *
   * After quite a bit of churning around, we've settled on
   * a simple thing of taking 0.1% of the timeout as the
   * slack, with a cap of 100 msec.
   * "nice" tasks get a 0.5% slack instead.
   *
   * Consider this comment an open invitation to come up with even
   * better solutions..
   */
5ae87e79e   Guillaume Knispel   poll/select: avoi...
42
  #define MAX_SLACK	(100 * NSEC_PER_MSEC)
96d2ab484   Arjan van de Ven   hrtimer: fix sign...
43
  static long __estimate_accuracy(struct timespec *tv)
90d6e24a3   Arjan van de Ven   hrtimer: make sel...
44
  {
96d2ab484   Arjan van de Ven   hrtimer: fix sign...
45
  	long slack;
90d6e24a3   Arjan van de Ven   hrtimer: make sel...
46
  	int divfactor = 1000;
5ae87e79e   Guillaume Knispel   poll/select: avoi...
47
48
  	if (tv->tv_sec < 0)
  		return 0;
4ce105d30   Arjan van de Ven   hrtimer: incorpor...
49
  	if (task_nice(current) > 0)
90d6e24a3   Arjan van de Ven   hrtimer: make sel...
50
  		divfactor = divfactor / 5;
5ae87e79e   Guillaume Knispel   poll/select: avoi...
51
52
  	if (tv->tv_sec > MAX_SLACK / (NSEC_PER_SEC/divfactor))
  		return MAX_SLACK;
90d6e24a3   Arjan van de Ven   hrtimer: make sel...
53
54
  	slack = tv->tv_nsec / divfactor;
  	slack += tv->tv_sec * (NSEC_PER_SEC/divfactor);
5ae87e79e   Guillaume Knispel   poll/select: avoi...
55
56
  	if (slack > MAX_SLACK)
  		return MAX_SLACK;
96d2ab484   Arjan van de Ven   hrtimer: fix sign...
57

90d6e24a3   Arjan van de Ven   hrtimer: make sel...
58
59
  	return slack;
  }
95aac7b1c   Shawn Bohrer   epoll: make epoll...
60
  long select_estimate_accuracy(struct timespec *tv)
90d6e24a3   Arjan van de Ven   hrtimer: make sel...
61
62
63
64
65
66
67
  {
  	unsigned long ret;
  	struct timespec now;
  
  	/*
  	 * Realtime tasks get a slack of 0 for obvious reasons.
  	 */
4ce105d30   Arjan van de Ven   hrtimer: incorpor...
68
  	if (rt_task(current))
90d6e24a3   Arjan van de Ven   hrtimer: make sel...
69
70
71
72
73
74
75
76
77
  		return 0;
  
  	ktime_get_ts(&now);
  	now = timespec_sub(*tv, now);
  	ret = __estimate_accuracy(&now);
  	if (ret < current->timer_slack_ns)
  		return current->timer_slack_ns;
  	return ret;
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
  struct poll_table_page {
  	struct poll_table_page * next;
  	struct poll_table_entry * entry;
  	struct poll_table_entry entries[0];
  };
  
  #define POLL_TABLE_FULL(table) \
  	((unsigned long)((table)->entry+1) > PAGE_SIZE + (unsigned long)(table))
  
  /*
   * Ok, Peter made a complicated, but straightforward multiple_wait() function.
   * I have rewritten this, taking some shortcuts: This code may not be easy to
   * follow, but it should be free of race-conditions, and it's practical. If you
   * understand what I'm doing here, then you understand how the linux
   * sleep/wakeup mechanism works.
   *
   * Two very simple procedures, poll_wait() and poll_freewait() make all the
   * work.  poll_wait() is an inline-function defined in <linux/poll.h>,
   * as all select/poll functions have to call it to add an entry to the
   * poll table.
   */
75c96f858   Adrian Bunk   [PATCH] make some...
99
100
  static void __pollwait(struct file *filp, wait_queue_head_t *wait_address,
  		       poll_table *p);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
101
102
103
104
  
  void poll_initwait(struct poll_wqueues *pwq)
  {
  	init_poll_funcptr(&pwq->pt, __pollwait);
5f820f648   Tejun Heo   poll: allow f_op-...
105
  	pwq->polling_task = current;
b2add73db   Guillaume Knispel   poll/select: init...
106
  	pwq->triggered = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
107
108
  	pwq->error = 0;
  	pwq->table = NULL;
70674f95c   Andi Kleen   [PATCH] Optimize ...
109
  	pwq->inline_index = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
110
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
111
  EXPORT_SYMBOL(poll_initwait);
70674f95c   Andi Kleen   [PATCH] Optimize ...
112
113
  static void free_poll_entry(struct poll_table_entry *entry)
  {
ccf6780dc   WANG Cong   Style fix in fs/s...
114
  	remove_wait_queue(entry->wait_address, &entry->wait);
70674f95c   Andi Kleen   [PATCH] Optimize ...
115
116
  	fput(entry->filp);
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
117
118
119
  void poll_freewait(struct poll_wqueues *pwq)
  {
  	struct poll_table_page * p = pwq->table;
70674f95c   Andi Kleen   [PATCH] Optimize ...
120
121
122
  	int i;
  	for (i = 0; i < pwq->inline_index; i++)
  		free_poll_entry(pwq->inline_entries + i);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
123
124
125
126
127
128
129
  	while (p) {
  		struct poll_table_entry * entry;
  		struct poll_table_page *old;
  
  		entry = p->entry;
  		do {
  			entry--;
70674f95c   Andi Kleen   [PATCH] Optimize ...
130
  			free_poll_entry(entry);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
131
132
133
134
135
136
  		} while (entry > p->entries);
  		old = p;
  		p = p->next;
  		free_page((unsigned long) old);
  	}
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
137
  EXPORT_SYMBOL(poll_freewait);
5f820f648   Tejun Heo   poll: allow f_op-...
138
  static struct poll_table_entry *poll_get_entry(struct poll_wqueues *p)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
139
  {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
140
  	struct poll_table_page *table = p->table;
70674f95c   Andi Kleen   [PATCH] Optimize ...
141
142
  	if (p->inline_index < N_INLINE_POLL_ENTRIES)
  		return p->inline_entries + p->inline_index++;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
143
144
145
146
147
148
  	if (!table || POLL_TABLE_FULL(table)) {
  		struct poll_table_page *new_table;
  
  		new_table = (struct poll_table_page *) __get_free_page(GFP_KERNEL);
  		if (!new_table) {
  			p->error = -ENOMEM;
70674f95c   Andi Kleen   [PATCH] Optimize ...
149
  			return NULL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
150
151
152
153
154
155
  		}
  		new_table->entry = new_table->entries;
  		new_table->next = table;
  		p->table = new_table;
  		table = new_table;
  	}
70674f95c   Andi Kleen   [PATCH] Optimize ...
156
157
  	return table->entry++;
  }
4938d7e02   Eric Dumazet   poll: avoid extra...
158
  static int __pollwake(wait_queue_t *wait, unsigned mode, int sync, void *key)
5f820f648   Tejun Heo   poll: allow f_op-...
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
  {
  	struct poll_wqueues *pwq = wait->private;
  	DECLARE_WAITQUEUE(dummy_wait, pwq->polling_task);
  
  	/*
  	 * Although this function is called under waitqueue lock, LOCK
  	 * doesn't imply write barrier and the users expect write
  	 * barrier semantics on wakeup functions.  The following
  	 * smp_wmb() is equivalent to smp_wmb() in try_to_wake_up()
  	 * and is paired with set_mb() in poll_schedule_timeout.
  	 */
  	smp_wmb();
  	pwq->triggered = 1;
  
  	/*
  	 * Perform the default wake up operation using a dummy
  	 * waitqueue.
  	 *
  	 * TODO: This is hacky but there currently is no interface to
  	 * pass in @sync.  @sync is scheduled to be removed and once
  	 * that happens, wake_up_process() can be used directly.
  	 */
  	return default_wake_function(&dummy_wait, mode, sync, key);
  }
4938d7e02   Eric Dumazet   poll: avoid extra...
183
184
185
186
187
188
189
190
191
  static int pollwake(wait_queue_t *wait, unsigned mode, int sync, void *key)
  {
  	struct poll_table_entry *entry;
  
  	entry = container_of(wait, struct poll_table_entry, wait);
  	if (key && !((unsigned long)key & entry->key))
  		return 0;
  	return __pollwake(wait, mode, sync, key);
  }
70674f95c   Andi Kleen   [PATCH] Optimize ...
192
193
194
195
  /* Add a new entry */
  static void __pollwait(struct file *filp, wait_queue_head_t *wait_address,
  				poll_table *p)
  {
5f820f648   Tejun Heo   poll: allow f_op-...
196
197
  	struct poll_wqueues *pwq = container_of(p, struct poll_wqueues, pt);
  	struct poll_table_entry *entry = poll_get_entry(pwq);
70674f95c   Andi Kleen   [PATCH] Optimize ...
198
199
200
201
202
  	if (!entry)
  		return;
  	get_file(filp);
  	entry->filp = filp;
  	entry->wait_address = wait_address;
4938d7e02   Eric Dumazet   poll: avoid extra...
203
  	entry->key = p->key;
5f820f648   Tejun Heo   poll: allow f_op-...
204
205
  	init_waitqueue_func_entry(&entry->wait, pollwake);
  	entry->wait.private = pwq;
ccf6780dc   WANG Cong   Style fix in fs/s...
206
  	add_wait_queue(wait_address, &entry->wait);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
207
  }
5f820f648   Tejun Heo   poll: allow f_op-...
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
  int poll_schedule_timeout(struct poll_wqueues *pwq, int state,
  			  ktime_t *expires, unsigned long slack)
  {
  	int rc = -EINTR;
  
  	set_current_state(state);
  	if (!pwq->triggered)
  		rc = schedule_hrtimeout_range(expires, slack, HRTIMER_MODE_ABS);
  	__set_current_state(TASK_RUNNING);
  
  	/*
  	 * Prepare for the next iteration.
  	 *
  	 * The following set_mb() serves two purposes.  First, it's
  	 * the counterpart rmb of the wmb in pollwake() such that data
  	 * written before wake up is always visible after wake up.
  	 * Second, the full barrier guarantees that triggered clearing
  	 * doesn't pass event check of the next iteration.  Note that
  	 * this problem doesn't exist for the first iteration as
  	 * add_wait_queue() has full barrier semantics.
  	 */
  	set_mb(pwq->triggered, 0);
  
  	return rc;
  }
  EXPORT_SYMBOL(poll_schedule_timeout);
b773ad40a   Thomas Gleixner   select: add poll_...
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
  /**
   * poll_select_set_timeout - helper function to setup the timeout value
   * @to:		pointer to timespec variable for the final timeout
   * @sec:	seconds (from user space)
   * @nsec:	nanoseconds (from user space)
   *
   * Note, we do not use a timespec for the user space value here, That
   * way we can use the function for timeval and compat interfaces as well.
   *
   * Returns -EINVAL if sec/nsec are not normalized. Otherwise 0.
   */
  int poll_select_set_timeout(struct timespec *to, long sec, long nsec)
  {
  	struct timespec ts = {.tv_sec = sec, .tv_nsec = nsec};
  
  	if (!timespec_valid(&ts))
  		return -EINVAL;
  
  	/* Optimize for the zero timeout value here */
  	if (!sec && !nsec) {
  		to->tv_sec = to->tv_nsec = 0;
  	} else {
  		ktime_get_ts(to);
  		*to = timespec_add_safe(*to, ts);
  	}
  	return 0;
  }
  
  static int poll_select_copy_remaining(struct timespec *end_time, void __user *p,
  				      int timeval, int ret)
  {
  	struct timespec rts;
  	struct timeval rtv;
  
  	if (!p)
  		return ret;
  
  	if (current->personality & STICKY_TIMEOUTS)
  		goto sticky;
  
  	/* No update for zero timeout */
  	if (!end_time->tv_sec && !end_time->tv_nsec)
  		return ret;
  
  	ktime_get_ts(&rts);
  	rts = timespec_sub(*end_time, rts);
  	if (rts.tv_sec < 0)
  		rts.tv_sec = rts.tv_nsec = 0;
  
  	if (timeval) {
65329bf46   Vasiliy Kulikov   fs/select.c: fix ...
284
285
  		if (sizeof(rtv) > sizeof(rtv.tv_sec) + sizeof(rtv.tv_usec))
  			memset(&rtv, 0, sizeof(rtv));
b773ad40a   Thomas Gleixner   select: add poll_...
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
  		rtv.tv_sec = rts.tv_sec;
  		rtv.tv_usec = rts.tv_nsec / NSEC_PER_USEC;
  
  		if (!copy_to_user(p, &rtv, sizeof(rtv)))
  			return ret;
  
  	} else if (!copy_to_user(p, &rts, sizeof(rts)))
  		return ret;
  
  	/*
  	 * If an application puts its timeval in read-only memory, we
  	 * don't want the Linux-specific update to the timeval to
  	 * cause a fault after the select has completed
  	 * successfully. However, because we're not updating the
  	 * timeval, we can't restart the system call.
  	 */
  
  sticky:
  	if (ret == -ERESTARTNOHAND)
  		ret = -EINTR;
  	return ret;
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
308
309
310
311
312
313
314
315
316
317
318
  #define FDS_IN(fds, n)		(fds->in + n)
  #define FDS_OUT(fds, n)		(fds->out + n)
  #define FDS_EX(fds, n)		(fds->ex + n)
  
  #define BITS(fds, n)	(*FDS_IN(fds, n)|*FDS_OUT(fds, n)|*FDS_EX(fds, n))
  
  static int max_select_fd(unsigned long n, fd_set_bits *fds)
  {
  	unsigned long *open_fds;
  	unsigned long set;
  	int max;
badf16621   Dipankar Sarma   [PATCH] files: br...
319
  	struct fdtable *fdt;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
320
321
322
323
  
  	/* handle last in-complete long-word first */
  	set = ~(~0UL << (n & (__NFDBITS-1)));
  	n /= __NFDBITS;
badf16621   Dipankar Sarma   [PATCH] files: br...
324
325
  	fdt = files_fdtable(current->files);
  	open_fds = fdt->open_fds->fds_bits+n;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
  	max = 0;
  	if (set) {
  		set &= BITS(fds, n);
  		if (set) {
  			if (!(set & ~*open_fds))
  				goto get_max;
  			return -EBADF;
  		}
  	}
  	while (n) {
  		open_fds--;
  		n--;
  		set = BITS(fds, n);
  		if (!set)
  			continue;
  		if (set & ~*open_fds)
  			return -EBADF;
  		if (max)
  			continue;
  get_max:
  		do {
  			max++;
  			set >>= 1;
  		} while (set);
  		max += n * __NFDBITS;
  	}
  
  	return max;
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
355
356
357
  #define POLLIN_SET (POLLRDNORM | POLLRDBAND | POLLIN | POLLHUP | POLLERR)
  #define POLLOUT_SET (POLLWRBAND | POLLWRNORM | POLLOUT | POLLERR)
  #define POLLEX_SET (POLLPRI)
4938d7e02   Eric Dumazet   poll: avoid extra...
358
359
360
361
362
363
364
365
366
367
368
  static inline void wait_key_set(poll_table *wait, unsigned long in,
  				unsigned long out, unsigned long bit)
  {
  	if (wait) {
  		wait->key = POLLEX_SET;
  		if (in & bit)
  			wait->key |= POLLIN_SET;
  		if (out & bit)
  			wait->key |= POLLOUT_SET;
  	}
  }
8ff3e8e85   Arjan van de Ven   select: switch se...
369
  int do_select(int n, fd_set_bits *fds, struct timespec *end_time)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
370
  {
8ff3e8e85   Arjan van de Ven   select: switch se...
371
  	ktime_t expire, *to = NULL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
372
373
  	struct poll_wqueues table;
  	poll_table *wait;
8ff3e8e85   Arjan van de Ven   select: switch se...
374
  	int retval, i, timed_out = 0;
90d6e24a3   Arjan van de Ven   hrtimer: make sel...
375
  	unsigned long slack = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
376

b835996f6   Dipankar Sarma   [PATCH] files: lo...
377
  	rcu_read_lock();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
378
  	retval = max_select_fd(n, fds);
b835996f6   Dipankar Sarma   [PATCH] files: lo...
379
  	rcu_read_unlock();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
380
381
382
383
384
385
386
  
  	if (retval < 0)
  		return retval;
  	n = retval;
  
  	poll_initwait(&table);
  	wait = &table.pt;
8ff3e8e85   Arjan van de Ven   select: switch se...
387
  	if (end_time && !end_time->tv_sec && !end_time->tv_nsec) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
388
  		wait = NULL;
8ff3e8e85   Arjan van de Ven   select: switch se...
389
390
  		timed_out = 1;
  	}
96d2ab484   Arjan van de Ven   hrtimer: fix sign...
391
  	if (end_time && !timed_out)
231f3d393   Andrew Morton   select: rename es...
392
  		slack = select_estimate_accuracy(end_time);
90d6e24a3   Arjan van de Ven   hrtimer: make sel...
393

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
394
395
396
  	retval = 0;
  	for (;;) {
  		unsigned long *rinp, *routp, *rexp, *inp, *outp, *exp;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
397
398
399
400
401
402
  		inp = fds->in; outp = fds->out; exp = fds->ex;
  		rinp = fds->res_in; routp = fds->res_out; rexp = fds->res_ex;
  
  		for (i = 0; i < n; ++rinp, ++routp, ++rexp) {
  			unsigned long in, out, ex, all_bits, bit = 1, mask, j;
  			unsigned long res_in = 0, res_out = 0, res_ex = 0;
99ac48f54   Arjan van de Ven   [PATCH] mark f_op...
403
  			const struct file_operations *f_op = NULL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
404
405
406
407
408
409
410
411
412
413
  			struct file *file = NULL;
  
  			in = *inp++; out = *outp++; ex = *exp++;
  			all_bits = in | out | ex;
  			if (all_bits == 0) {
  				i += __NFDBITS;
  				continue;
  			}
  
  			for (j = 0; j < __NFDBITS; ++j, ++i, bit <<= 1) {
e4a1f129f   Eric Dumazet   [PATCH] use fget_...
414
  				int fput_needed;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
415
416
417
418
  				if (i >= n)
  					break;
  				if (!(bit & all_bits))
  					continue;
e4a1f129f   Eric Dumazet   [PATCH] use fget_...
419
  				file = fget_light(i, &fput_needed);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
420
421
422
  				if (file) {
  					f_op = file->f_op;
  					mask = DEFAULT_POLLMASK;
4938d7e02   Eric Dumazet   poll: avoid extra...
423
424
425
426
  					if (f_op && f_op->poll) {
  						wait_key_set(wait, in, out, bit);
  						mask = (*f_op->poll)(file, wait);
  					}
e4a1f129f   Eric Dumazet   [PATCH] use fget_...
427
  					fput_light(file, fput_needed);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
428
429
430
  					if ((mask & POLLIN_SET) && (in & bit)) {
  						res_in |= bit;
  						retval++;
4938d7e02   Eric Dumazet   poll: avoid extra...
431
  						wait = NULL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
432
433
434
435
  					}
  					if ((mask & POLLOUT_SET) && (out & bit)) {
  						res_out |= bit;
  						retval++;
4938d7e02   Eric Dumazet   poll: avoid extra...
436
  						wait = NULL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
437
438
439
440
  					}
  					if ((mask & POLLEX_SET) && (ex & bit)) {
  						res_ex |= bit;
  						retval++;
4938d7e02   Eric Dumazet   poll: avoid extra...
441
  						wait = NULL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
442
443
  					}
  				}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
444
445
446
447
448
449
450
  			}
  			if (res_in)
  				*rinp = res_in;
  			if (res_out)
  				*routp = res_out;
  			if (res_ex)
  				*rexp = res_ex;
55d853849   Linus Torvalds   Fix performance r...
451
  			cond_resched();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
452
453
  		}
  		wait = NULL;
8ff3e8e85   Arjan van de Ven   select: switch se...
454
  		if (retval || timed_out || signal_pending(current))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
455
  			break;
f5264481c   Pavel Machek   trivial: small cl...
456
  		if (table.error) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
457
458
459
  			retval = table.error;
  			break;
  		}
9f72949f6   David Woodhouse   [PATCH] Add psele...
460

8ff3e8e85   Arjan van de Ven   select: switch se...
461
462
463
464
465
466
467
468
  		/*
  		 * If this is the first loop and we have a timeout
  		 * given, then we convert to ktime_t and set the to
  		 * pointer to the expiry value.
  		 */
  		if (end_time && !to) {
  			expire = timespec_to_ktime(*end_time);
  			to = &expire;
9f72949f6   David Woodhouse   [PATCH] Add psele...
469
  		}
8ff3e8e85   Arjan van de Ven   select: switch se...
470

5f820f648   Tejun Heo   poll: allow f_op-...
471
472
  		if (!poll_schedule_timeout(&table, TASK_INTERRUPTIBLE,
  					   to, slack))
8ff3e8e85   Arjan van de Ven   select: switch se...
473
  			timed_out = 1;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
474
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
475
476
  
  	poll_freewait(&table);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
477
478
  	return retval;
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
479
480
481
482
483
484
485
486
  /*
   * We can actually return ERESTARTSYS instead of EINTR, but I'd
   * like to be certain this leads to no problems. So I return
   * EINTR just for safety.
   *
   * Update: ERESTARTSYS breaks at least the xview clock binary, so
   * I'm trying ERESTARTNOHAND which restart only when you want to.
   */
a2dcb44c3   Al Viro   [PATCH] make osf_...
487
  int core_sys_select(int n, fd_set __user *inp, fd_set __user *outp,
8ff3e8e85   Arjan van de Ven   select: switch se...
488
  			   fd_set __user *exp, struct timespec *end_time)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
489
490
  {
  	fd_set_bits fds;
29ff2db55   Andrew Morton   [PATCH] select() ...
491
  	void *bits;
bbea9f696   Vadim Lobanov   [PATCH] fdtable: ...
492
  	int ret, max_fds;
b04eb6aa0   Mitchell Blank Jr   [PATCH] select: d...
493
  	unsigned int size;
badf16621   Dipankar Sarma   [PATCH] files: br...
494
  	struct fdtable *fdt;
70674f95c   Andi Kleen   [PATCH] Optimize ...
495
  	/* Allocate small arguments on the stack to save memory and be faster */
30c14e40e   Jes Sorensen   [PATCH] avoid una...
496
  	long stack_fds[SELECT_STACK_ALLOC/sizeof(long)];
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
497

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
498
499
500
  	ret = -EINVAL;
  	if (n < 0)
  		goto out_nofds;
bbea9f696   Vadim Lobanov   [PATCH] fdtable: ...
501
  	/* max_fds can increase, so grab it once to avoid race */
b835996f6   Dipankar Sarma   [PATCH] files: lo...
502
  	rcu_read_lock();
badf16621   Dipankar Sarma   [PATCH] files: br...
503
  	fdt = files_fdtable(current->files);
bbea9f696   Vadim Lobanov   [PATCH] fdtable: ...
504
  	max_fds = fdt->max_fds;
b835996f6   Dipankar Sarma   [PATCH] files: lo...
505
  	rcu_read_unlock();
bbea9f696   Vadim Lobanov   [PATCH] fdtable: ...
506
507
  	if (n > max_fds)
  		n = max_fds;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
508
509
510
511
512
513
  
  	/*
  	 * We need 6 bitmaps (in/out/ex for both incoming and outgoing),
  	 * since we used fdset we need to allocate memory in units of
  	 * long-words. 
  	 */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
514
  	size = FDS_BYTES(n);
b04eb6aa0   Mitchell Blank Jr   [PATCH] select: d...
515
516
517
518
  	bits = stack_fds;
  	if (size > sizeof(stack_fds) / 6) {
  		/* Not enough space in on-stack array; must use kmalloc */
  		ret = -ENOMEM;
70674f95c   Andi Kleen   [PATCH] Optimize ...
519
  		bits = kmalloc(6 * size, GFP_KERNEL);
b04eb6aa0   Mitchell Blank Jr   [PATCH] select: d...
520
521
522
  		if (!bits)
  			goto out_nofds;
  	}
29ff2db55   Andrew Morton   [PATCH] select() ...
523
524
525
526
527
528
  	fds.in      = bits;
  	fds.out     = bits +   size;
  	fds.ex      = bits + 2*size;
  	fds.res_in  = bits + 3*size;
  	fds.res_out = bits + 4*size;
  	fds.res_ex  = bits + 5*size;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
529
530
531
532
533
534
535
536
  
  	if ((ret = get_fd_set(n, inp, fds.in)) ||
  	    (ret = get_fd_set(n, outp, fds.out)) ||
  	    (ret = get_fd_set(n, exp, fds.ex)))
  		goto out;
  	zero_fd_set(n, fds.res_in);
  	zero_fd_set(n, fds.res_out);
  	zero_fd_set(n, fds.res_ex);
8ff3e8e85   Arjan van de Ven   select: switch se...
537
  	ret = do_select(n, &fds, end_time);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
  
  	if (ret < 0)
  		goto out;
  	if (!ret) {
  		ret = -ERESTARTNOHAND;
  		if (signal_pending(current))
  			goto out;
  		ret = 0;
  	}
  
  	if (set_fd_set(n, inp, fds.res_in) ||
  	    set_fd_set(n, outp, fds.res_out) ||
  	    set_fd_set(n, exp, fds.res_ex))
  		ret = -EFAULT;
  
  out:
70674f95c   Andi Kleen   [PATCH] Optimize ...
554
555
  	if (bits != stack_fds)
  		kfree(bits);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
556
557
558
  out_nofds:
  	return ret;
  }
5a8a82b1d   Heiko Carstens   [CVE-2009-0029] S...
559
560
  SYSCALL_DEFINE5(select, int, n, fd_set __user *, inp, fd_set __user *, outp,
  		fd_set __user *, exp, struct timeval __user *, tvp)
9f72949f6   David Woodhouse   [PATCH] Add psele...
561
  {
8ff3e8e85   Arjan van de Ven   select: switch se...
562
  	struct timespec end_time, *to = NULL;
9f72949f6   David Woodhouse   [PATCH] Add psele...
563
564
565
566
567
568
  	struct timeval tv;
  	int ret;
  
  	if (tvp) {
  		if (copy_from_user(&tv, tvp, sizeof(tv)))
  			return -EFAULT;
8ff3e8e85   Arjan van de Ven   select: switch se...
569
  		to = &end_time;
4d36a9e65   Arjan van de Ven   select: deal with...
570
571
572
  		if (poll_select_set_timeout(to,
  				tv.tv_sec + (tv.tv_usec / USEC_PER_SEC),
  				(tv.tv_usec % USEC_PER_SEC) * NSEC_PER_USEC))
9f72949f6   David Woodhouse   [PATCH] Add psele...
573
  			return -EINVAL;
9f72949f6   David Woodhouse   [PATCH] Add psele...
574
  	}
8ff3e8e85   Arjan van de Ven   select: switch se...
575
576
  	ret = core_sys_select(n, inp, outp, exp, to);
  	ret = poll_select_copy_remaining(&end_time, tvp, 1, ret);
9f72949f6   David Woodhouse   [PATCH] Add psele...
577
578
579
  
  	return ret;
  }
f3de272b8   Roland McGrath   signals: use HAVE...
580
  #ifdef HAVE_SET_RESTORE_SIGMASK
c9da9f212   Heiko Carstens   [CVE-2009-0029] M...
581
582
583
  static long do_pselect(int n, fd_set __user *inp, fd_set __user *outp,
  		       fd_set __user *exp, struct timespec __user *tsp,
  		       const sigset_t __user *sigmask, size_t sigsetsize)
9f72949f6   David Woodhouse   [PATCH] Add psele...
584
  {
9f72949f6   David Woodhouse   [PATCH] Add psele...
585
  	sigset_t ksigmask, sigsaved;
8ff3e8e85   Arjan van de Ven   select: switch se...
586
  	struct timespec ts, end_time, *to = NULL;
9f72949f6   David Woodhouse   [PATCH] Add psele...
587
588
589
590
591
  	int ret;
  
  	if (tsp) {
  		if (copy_from_user(&ts, tsp, sizeof(ts)))
  			return -EFAULT;
8ff3e8e85   Arjan van de Ven   select: switch se...
592
593
  		to = &end_time;
  		if (poll_select_set_timeout(to, ts.tv_sec, ts.tv_nsec))
9f72949f6   David Woodhouse   [PATCH] Add psele...
594
  			return -EINVAL;
9f72949f6   David Woodhouse   [PATCH] Add psele...
595
596
597
598
599
600
601
602
603
604
605
606
  	}
  
  	if (sigmask) {
  		/* XXX: Don't preclude handling different sized sigset_t's.  */
  		if (sigsetsize != sizeof(sigset_t))
  			return -EINVAL;
  		if (copy_from_user(&ksigmask, sigmask, sizeof(ksigmask)))
  			return -EFAULT;
  
  		sigdelsetmask(&ksigmask, sigmask(SIGKILL)|sigmask(SIGSTOP));
  		sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved);
  	}
62568510b   Bernd Schmidt   Fix timeouts in s...
607
  	ret = core_sys_select(n, inp, outp, exp, to);
8ff3e8e85   Arjan van de Ven   select: switch se...
608
  	ret = poll_select_copy_remaining(&end_time, tsp, 0, ret);
9f72949f6   David Woodhouse   [PATCH] Add psele...
609
610
611
612
613
614
615
616
617
618
  
  	if (ret == -ERESTARTNOHAND) {
  		/*
  		 * Don't restore the signal mask yet. Let do_signal() deliver
  		 * the signal on the way back to userspace, before the signal
  		 * mask is restored.
  		 */
  		if (sigmask) {
  			memcpy(&current->saved_sigmask, &sigsaved,
  					sizeof(sigsaved));
4e4c22c71   Roland McGrath   signals: add set_...
619
  			set_restore_sigmask();
9f72949f6   David Woodhouse   [PATCH] Add psele...
620
621
622
623
624
625
626
627
628
629
630
631
632
  		}
  	} else if (sigmask)
  		sigprocmask(SIG_SETMASK, &sigsaved, NULL);
  
  	return ret;
  }
  
  /*
   * Most architectures can't handle 7-argument syscalls. So we provide a
   * 6-argument version where the sixth argument is a pointer to a structure
   * which has a pointer to the sigset_t itself followed by a size_t containing
   * the sigset size.
   */
d4e82042c   Heiko Carstens   [CVE-2009-0029] S...
633
634
635
  SYSCALL_DEFINE6(pselect6, int, n, fd_set __user *, inp, fd_set __user *, outp,
  		fd_set __user *, exp, struct timespec __user *, tsp,
  		void __user *, sig)
9f72949f6   David Woodhouse   [PATCH] Add psele...
636
637
638
639
640
641
  {
  	size_t sigsetsize = 0;
  	sigset_t __user *up = NULL;
  
  	if (sig) {
  		if (!access_ok(VERIFY_READ, sig, sizeof(void *)+sizeof(size_t))
e110ab94e   Al Viro   [PATCH] fix __use...
642
  		    || __get_user(up, (sigset_t __user * __user *)sig)
9f72949f6   David Woodhouse   [PATCH] Add psele...
643
  		    || __get_user(sigsetsize,
e110ab94e   Al Viro   [PATCH] fix __use...
644
  				(size_t __user *)(sig+sizeof(void *))))
9f72949f6   David Woodhouse   [PATCH] Add psele...
645
646
  			return -EFAULT;
  	}
c9da9f212   Heiko Carstens   [CVE-2009-0029] M...
647
  	return do_pselect(n, inp, outp, exp, tsp, up, sigsetsize);
9f72949f6   David Woodhouse   [PATCH] Add psele...
648
  }
f3de272b8   Roland McGrath   signals: use HAVE...
649
  #endif /* HAVE_SET_RESTORE_SIGMASK */
9f72949f6   David Woodhouse   [PATCH] Add psele...
650

5d0e52830   Christoph Hellwig   Add generic sys_o...
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
  #ifdef __ARCH_WANT_SYS_OLD_SELECT
  struct sel_arg_struct {
  	unsigned long n;
  	fd_set __user *inp, *outp, *exp;
  	struct timeval __user *tvp;
  };
  
  SYSCALL_DEFINE1(old_select, struct sel_arg_struct __user *, arg)
  {
  	struct sel_arg_struct a;
  
  	if (copy_from_user(&a, arg, sizeof(a)))
  		return -EFAULT;
  	return sys_select(a.n, a.inp, a.outp, a.exp, a.tvp);
  }
  #endif
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
667
668
669
670
671
672
673
  struct poll_list {
  	struct poll_list *next;
  	int len;
  	struct pollfd entries[0];
  };
  
  #define POLLFD_PER_PAGE  ((PAGE_SIZE-sizeof(struct poll_list)) / sizeof(struct pollfd))
4a4b69f79   Vadim Lobanov   [PATCH] Poll clea...
674
675
676
677
678
679
680
681
  /*
   * Fish for pollable events on the pollfd->fd file descriptor. We're only
   * interested in events matching the pollfd->events mask, and the result
   * matching that mask is both recorded in pollfd->revents and returned. The
   * pwait poll_table will be used by the fd-provided poll handler for waiting,
   * if non-NULL.
   */
  static inline unsigned int do_pollfd(struct pollfd *pollfd, poll_table *pwait)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
682
  {
4a4b69f79   Vadim Lobanov   [PATCH] Poll clea...
683
684
685
686
687
688
689
690
691
692
693
694
695
  	unsigned int mask;
  	int fd;
  
  	mask = 0;
  	fd = pollfd->fd;
  	if (fd >= 0) {
  		int fput_needed;
  		struct file * file;
  
  		file = fget_light(fd, &fput_needed);
  		mask = POLLNVAL;
  		if (file != NULL) {
  			mask = DEFAULT_POLLMASK;
4938d7e02   Eric Dumazet   poll: avoid extra...
696
697
698
699
  			if (file->f_op && file->f_op->poll) {
  				if (pwait)
  					pwait->key = pollfd->events |
  							POLLERR | POLLHUP;
4a4b69f79   Vadim Lobanov   [PATCH] Poll clea...
700
  				mask = file->f_op->poll(file, pwait);
4938d7e02   Eric Dumazet   poll: avoid extra...
701
  			}
4a4b69f79   Vadim Lobanov   [PATCH] Poll clea...
702
703
704
  			/* Mask out unneeded events. */
  			mask &= pollfd->events | POLLERR | POLLHUP;
  			fput_light(file, fput_needed);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
705
  		}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
706
  	}
4a4b69f79   Vadim Lobanov   [PATCH] Poll clea...
707
708
709
  	pollfd->revents = mask;
  
  	return mask;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
710
711
712
  }
  
  static int do_poll(unsigned int nfds,  struct poll_list *list,
8ff3e8e85   Arjan van de Ven   select: switch se...
713
  		   struct poll_wqueues *wait, struct timespec *end_time)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
714
  {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
715
  	poll_table* pt = &wait->pt;
8ff3e8e85   Arjan van de Ven   select: switch se...
716
717
  	ktime_t expire, *to = NULL;
  	int timed_out = 0, count = 0;
90d6e24a3   Arjan van de Ven   hrtimer: make sel...
718
  	unsigned long slack = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
719

9f72949f6   David Woodhouse   [PATCH] Add psele...
720
  	/* Optimise the no-wait case */
8ff3e8e85   Arjan van de Ven   select: switch se...
721
  	if (end_time && !end_time->tv_sec && !end_time->tv_nsec) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
722
  		pt = NULL;
8ff3e8e85   Arjan van de Ven   select: switch se...
723
724
  		timed_out = 1;
  	}
9bf084f70   Oleg Nesterov   do_poll: return -...
725

96d2ab484   Arjan van de Ven   hrtimer: fix sign...
726
  	if (end_time && !timed_out)
231f3d393   Andrew Morton   select: rename es...
727
  		slack = select_estimate_accuracy(end_time);
90d6e24a3   Arjan van de Ven   hrtimer: make sel...
728

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
729
730
  	for (;;) {
  		struct poll_list *walk;
9f72949f6   David Woodhouse   [PATCH] Add psele...
731

4a4b69f79   Vadim Lobanov   [PATCH] Poll clea...
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
  		for (walk = list; walk != NULL; walk = walk->next) {
  			struct pollfd * pfd, * pfd_end;
  
  			pfd = walk->entries;
  			pfd_end = pfd + walk->len;
  			for (; pfd != pfd_end; pfd++) {
  				/*
  				 * Fish for events. If we found one, record it
  				 * and kill the poll_table, so we don't
  				 * needlessly register any other waiters after
  				 * this. They'll get immediately deregistered
  				 * when we break out and return.
  				 */
  				if (do_pollfd(pfd, pt)) {
  					count++;
  					pt = NULL;
  				}
  			}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
750
  		}
4a4b69f79   Vadim Lobanov   [PATCH] Poll clea...
751
752
753
754
  		/*
  		 * All waiters have already been registered, so don't provide
  		 * a poll_table to them on the next loop iteration.
  		 */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
755
  		pt = NULL;
9bf084f70   Oleg Nesterov   do_poll: return -...
756
757
758
759
760
  		if (!count) {
  			count = wait->error;
  			if (signal_pending(current))
  				count = -EINTR;
  		}
8ff3e8e85   Arjan van de Ven   select: switch se...
761
  		if (count || timed_out)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
762
  			break;
9f72949f6   David Woodhouse   [PATCH] Add psele...
763

8ff3e8e85   Arjan van de Ven   select: switch se...
764
765
766
767
768
769
770
771
  		/*
  		 * If this is the first loop and we have a timeout
  		 * given, then we convert to ktime_t and set the to
  		 * pointer to the expiry value.
  		 */
  		if (end_time && !to) {
  			expire = timespec_to_ktime(*end_time);
  			to = &expire;
9f72949f6   David Woodhouse   [PATCH] Add psele...
772
  		}
5f820f648   Tejun Heo   poll: allow f_op-...
773
  		if (!poll_schedule_timeout(wait, TASK_INTERRUPTIBLE, to, slack))
8ff3e8e85   Arjan van de Ven   select: switch se...
774
  			timed_out = 1;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
775
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
776
777
  	return count;
  }
70674f95c   Andi Kleen   [PATCH] Optimize ...
778
779
  #define N_STACK_PPS ((sizeof(stack_pps) - sizeof(struct poll_list))  / \
  			sizeof(struct pollfd))
8ff3e8e85   Arjan van de Ven   select: switch se...
780
781
  int do_sys_poll(struct pollfd __user *ufds, unsigned int nfds,
  		struct timespec *end_time)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
782
783
  {
  	struct poll_wqueues table;
252e5725c   Oleg Nesterov   do_sys_poll: simp...
784
   	int err = -EFAULT, fdcount, len, size;
30c14e40e   Jes Sorensen   [PATCH] avoid una...
785
786
787
788
  	/* Allocate small arguments on the stack to save memory and be
  	   faster - use long to make sure the buffer is aligned properly
  	   on 64 bit archs to avoid unaligned access */
  	long stack_pps[POLL_STACK_ALLOC/sizeof(long)];
252e5725c   Oleg Nesterov   do_sys_poll: simp...
789
790
791
  	struct poll_list *const head = (struct poll_list *)stack_pps;
   	struct poll_list *walk = head;
   	unsigned long todo = nfds;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
792

d554ed895   Jiri Slaby   fs: use rlimit he...
793
  	if (nfds > rlimit(RLIMIT_NOFILE))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
794
  		return -EINVAL;
252e5725c   Oleg Nesterov   do_sys_poll: simp...
795
796
797
798
799
800
  	len = min_t(unsigned int, nfds, N_STACK_PPS);
  	for (;;) {
  		walk->next = NULL;
  		walk->len = len;
  		if (!len)
  			break;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
801

252e5725c   Oleg Nesterov   do_sys_poll: simp...
802
803
804
805
806
807
808
  		if (copy_from_user(walk->entries, ufds + nfds-todo,
  					sizeof(struct pollfd) * walk->len))
  			goto out_fds;
  
  		todo -= walk->len;
  		if (!todo)
  			break;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
809

252e5725c   Oleg Nesterov   do_sys_poll: simp...
810
811
812
813
814
  		len = min(todo, POLLFD_PER_PAGE);
  		size = sizeof(struct poll_list) + sizeof(struct pollfd) * len;
  		walk = walk->next = kmalloc(size, GFP_KERNEL);
  		if (!walk) {
  			err = -ENOMEM;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
815
816
  			goto out_fds;
  		}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
817
  	}
9f72949f6   David Woodhouse   [PATCH] Add psele...
818

252e5725c   Oleg Nesterov   do_sys_poll: simp...
819
  	poll_initwait(&table);
8ff3e8e85   Arjan van de Ven   select: switch se...
820
  	fdcount = do_poll(nfds, head, &table, end_time);
252e5725c   Oleg Nesterov   do_sys_poll: simp...
821
  	poll_freewait(&table);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
822

252e5725c   Oleg Nesterov   do_sys_poll: simp...
823
  	for (walk = head; walk; walk = walk->next) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
824
825
  		struct pollfd *fds = walk->entries;
  		int j;
252e5725c   Oleg Nesterov   do_sys_poll: simp...
826
827
  		for (j = 0; j < walk->len; j++, ufds++)
  			if (__put_user(fds[j].revents, &ufds->revents))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
828
  				goto out_fds;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
829
    	}
252e5725c   Oleg Nesterov   do_sys_poll: simp...
830

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
831
  	err = fdcount;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
832
  out_fds:
252e5725c   Oleg Nesterov   do_sys_poll: simp...
833
834
835
836
837
  	walk = head->next;
  	while (walk) {
  		struct poll_list *pos = walk;
  		walk = walk->next;
  		kfree(pos);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
838
  	}
252e5725c   Oleg Nesterov   do_sys_poll: simp...
839

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
840
841
  	return err;
  }
9f72949f6   David Woodhouse   [PATCH] Add psele...
842

3075d9da0   Chris Wright   Use ERESTART_REST...
843
844
  static long do_restart_poll(struct restart_block *restart_block)
  {
8ff3e8e85   Arjan van de Ven   select: switch se...
845
846
847
  	struct pollfd __user *ufds = restart_block->poll.ufds;
  	int nfds = restart_block->poll.nfds;
  	struct timespec *to = NULL, end_time;
3075d9da0   Chris Wright   Use ERESTART_REST...
848
  	int ret;
8ff3e8e85   Arjan van de Ven   select: switch se...
849
850
851
852
853
854
855
  	if (restart_block->poll.has_timeout) {
  		end_time.tv_sec = restart_block->poll.tv_sec;
  		end_time.tv_nsec = restart_block->poll.tv_nsec;
  		to = &end_time;
  	}
  
  	ret = do_sys_poll(ufds, nfds, to);
3075d9da0   Chris Wright   Use ERESTART_REST...
856
857
  	if (ret == -EINTR) {
  		restart_block->fn = do_restart_poll;
3075d9da0   Chris Wright   Use ERESTART_REST...
858
859
860
861
  		ret = -ERESTART_RESTARTBLOCK;
  	}
  	return ret;
  }
5a8a82b1d   Heiko Carstens   [CVE-2009-0029] S...
862
863
  SYSCALL_DEFINE3(poll, struct pollfd __user *, ufds, unsigned int, nfds,
  		long, timeout_msecs)
9f72949f6   David Woodhouse   [PATCH] Add psele...
864
  {
8ff3e8e85   Arjan van de Ven   select: switch se...
865
  	struct timespec end_time, *to = NULL;
3075d9da0   Chris Wright   Use ERESTART_REST...
866
  	int ret;
9f72949f6   David Woodhouse   [PATCH] Add psele...
867

8ff3e8e85   Arjan van de Ven   select: switch se...
868
869
870
871
  	if (timeout_msecs >= 0) {
  		to = &end_time;
  		poll_select_set_timeout(to, timeout_msecs / MSEC_PER_SEC,
  			NSEC_PER_MSEC * (timeout_msecs % MSEC_PER_SEC));
9f72949f6   David Woodhouse   [PATCH] Add psele...
872
  	}
8ff3e8e85   Arjan van de Ven   select: switch se...
873
  	ret = do_sys_poll(ufds, nfds, to);
3075d9da0   Chris Wright   Use ERESTART_REST...
874
875
  	if (ret == -EINTR) {
  		struct restart_block *restart_block;
8ff3e8e85   Arjan van de Ven   select: switch se...
876

3075d9da0   Chris Wright   Use ERESTART_REST...
877
878
  		restart_block = &current_thread_info()->restart_block;
  		restart_block->fn = do_restart_poll;
8ff3e8e85   Arjan van de Ven   select: switch se...
879
880
881
882
883
884
885
886
887
  		restart_block->poll.ufds = ufds;
  		restart_block->poll.nfds = nfds;
  
  		if (timeout_msecs >= 0) {
  			restart_block->poll.tv_sec = end_time.tv_sec;
  			restart_block->poll.tv_nsec = end_time.tv_nsec;
  			restart_block->poll.has_timeout = 1;
  		} else
  			restart_block->poll.has_timeout = 0;
3075d9da0   Chris Wright   Use ERESTART_REST...
888
889
890
  		ret = -ERESTART_RESTARTBLOCK;
  	}
  	return ret;
9f72949f6   David Woodhouse   [PATCH] Add psele...
891
  }
f3de272b8   Roland McGrath   signals: use HAVE...
892
  #ifdef HAVE_SET_RESTORE_SIGMASK
d4e82042c   Heiko Carstens   [CVE-2009-0029] S...
893
894
895
  SYSCALL_DEFINE5(ppoll, struct pollfd __user *, ufds, unsigned int, nfds,
  		struct timespec __user *, tsp, const sigset_t __user *, sigmask,
  		size_t, sigsetsize)
9f72949f6   David Woodhouse   [PATCH] Add psele...
896
897
  {
  	sigset_t ksigmask, sigsaved;
8ff3e8e85   Arjan van de Ven   select: switch se...
898
  	struct timespec ts, end_time, *to = NULL;
9f72949f6   David Woodhouse   [PATCH] Add psele...
899
900
901
902
903
  	int ret;
  
  	if (tsp) {
  		if (copy_from_user(&ts, tsp, sizeof(ts)))
  			return -EFAULT;
8ff3e8e85   Arjan van de Ven   select: switch se...
904
905
906
  		to = &end_time;
  		if (poll_select_set_timeout(to, ts.tv_sec, ts.tv_nsec))
  			return -EINVAL;
9f72949f6   David Woodhouse   [PATCH] Add psele...
907
908
909
910
911
912
913
914
915
916
917
918
  	}
  
  	if (sigmask) {
  		/* XXX: Don't preclude handling different sized sigset_t's.  */
  		if (sigsetsize != sizeof(sigset_t))
  			return -EINVAL;
  		if (copy_from_user(&ksigmask, sigmask, sizeof(ksigmask)))
  			return -EFAULT;
  
  		sigdelsetmask(&ksigmask, sigmask(SIGKILL)|sigmask(SIGSTOP));
  		sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved);
  	}
8ff3e8e85   Arjan van de Ven   select: switch se...
919
  	ret = do_sys_poll(ufds, nfds, to);
9f72949f6   David Woodhouse   [PATCH] Add psele...
920
921
922
923
924
925
926
927
928
929
930
  
  	/* We can restart this syscall, usually */
  	if (ret == -EINTR) {
  		/*
  		 * Don't restore the signal mask yet. Let do_signal() deliver
  		 * the signal on the way back to userspace, before the signal
  		 * mask is restored.
  		 */
  		if (sigmask) {
  			memcpy(&current->saved_sigmask, &sigsaved,
  					sizeof(sigsaved));
4e4c22c71   Roland McGrath   signals: add set_...
931
  			set_restore_sigmask();
9f72949f6   David Woodhouse   [PATCH] Add psele...
932
933
934
935
  		}
  		ret = -ERESTARTNOHAND;
  	} else if (sigmask)
  		sigprocmask(SIG_SETMASK, &sigsaved, NULL);
8ff3e8e85   Arjan van de Ven   select: switch se...
936
  	ret = poll_select_copy_remaining(&end_time, tsp, 0, ret);
9f72949f6   David Woodhouse   [PATCH] Add psele...
937
938
939
  
  	return ret;
  }
f3de272b8   Roland McGrath   signals: use HAVE...
940
  #endif /* HAVE_SET_RESTORE_SIGMASK */