Commit 90d6e24a3686325edea7748b966e138c9923017d

Authored by Arjan van de Ven
1 parent 6976675d94

hrtimer: make select() and poll() use the hrtimer range feature

This patch makes the select() and poll() hrtimers use the new range
feature and settings from the task struct.

In addition, this includes the estimate_accuracy() function that Linus
posted to lkml, but changed entirely based on other peoples lkml feedback.

Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>

Showing 1 changed file with 62 additions and 2 deletions Inline Diff

1 /* 1 /*
2 * This file contains the procedures for the handling of select and poll 2 * This file contains the procedures for the handling of select and poll
3 * 3 *
4 * Created for Linux based loosely upon Mathius Lattner's minix 4 * Created for Linux based loosely upon Mathius Lattner's minix
5 * patches by Peter MacDonald. Heavily edited by Linus. 5 * patches by Peter MacDonald. Heavily edited by Linus.
6 * 6 *
7 * 4 February 1994 7 * 4 February 1994
8 * COFF/ELF binary emulation. If the process has the STICKY_TIMEOUTS 8 * COFF/ELF binary emulation. If the process has the STICKY_TIMEOUTS
9 * flag set in its personality we do *not* modify the given timeout 9 * flag set in its personality we do *not* modify the given timeout
10 * parameter to reflect time remaining. 10 * parameter to reflect time remaining.
11 * 11 *
12 * 24 January 2000 12 * 24 January 2000
13 * Changed sys_poll()/do_poll() to use PAGE_SIZE chunk-based allocation 13 * Changed sys_poll()/do_poll() to use PAGE_SIZE chunk-based allocation
14 * of fds to overcome nfds < 16390 descriptors limit (Tigran Aivazian). 14 * of fds to overcome nfds < 16390 descriptors limit (Tigran Aivazian).
15 */ 15 */
16 16
17 #include <linux/kernel.h> 17 #include <linux/kernel.h>
18 #include <linux/syscalls.h> 18 #include <linux/syscalls.h>
19 #include <linux/module.h> 19 #include <linux/module.h>
20 #include <linux/slab.h> 20 #include <linux/slab.h>
21 #include <linux/poll.h> 21 #include <linux/poll.h>
22 #include <linux/personality.h> /* for STICKY_TIMEOUTS */ 22 #include <linux/personality.h> /* for STICKY_TIMEOUTS */
23 #include <linux/file.h> 23 #include <linux/file.h>
24 #include <linux/fdtable.h> 24 #include <linux/fdtable.h>
25 #include <linux/fs.h> 25 #include <linux/fs.h>
26 #include <linux/rcupdate.h> 26 #include <linux/rcupdate.h>
27 #include <linux/hrtimer.h> 27 #include <linux/hrtimer.h>
28 28
29 #include <asm/uaccess.h> 29 #include <asm/uaccess.h>
30 30
31
32 /*
33 * Estimate expected accuracy in ns from a timeval.
34 *
35 * After quite a bit of churning around, we've settled on
36 * a simple thing of taking 0.1% of the timeout as the
37 * slack, with a cap of 100 msec.
38 * "nice" tasks get a 0.5% slack instead.
39 *
40 * Consider this comment an open invitation to come up with even
41 * better solutions..
42 */
43
44 static unsigned long __estimate_accuracy(struct timespec *tv)
45 {
46 unsigned long slack;
47 int divfactor = 1000;
48
49 if (task_nice(current))
50 divfactor = divfactor / 5;
51
52 slack = tv->tv_nsec / divfactor;
53 slack += tv->tv_sec * (NSEC_PER_SEC/divfactor);
54
55 if (slack > 100 * NSEC_PER_MSEC)
56 slack = 100 * NSEC_PER_MSEC;
57 return slack;
58 }
59
60 static unsigned long estimate_accuracy(struct timespec *tv)
61 {
62 unsigned long ret;
63 struct timespec now;
64
65 /*
66 * Realtime tasks get a slack of 0 for obvious reasons.
67 */
68
69 if (current->policy == SCHED_FIFO ||
70 current->policy == SCHED_RR)
71 return 0;
72
73 ktime_get_ts(&now);
74 now = timespec_sub(*tv, now);
75 ret = __estimate_accuracy(&now);
76 if (ret < current->timer_slack_ns)
77 return current->timer_slack_ns;
78 return ret;
79 }
80
81
82
31 struct poll_table_page { 83 struct poll_table_page {
32 struct poll_table_page * next; 84 struct poll_table_page * next;
33 struct poll_table_entry * entry; 85 struct poll_table_entry * entry;
34 struct poll_table_entry entries[0]; 86 struct poll_table_entry entries[0];
35 }; 87 };
36 88
37 #define POLL_TABLE_FULL(table) \ 89 #define POLL_TABLE_FULL(table) \
38 ((unsigned long)((table)->entry+1) > PAGE_SIZE + (unsigned long)(table)) 90 ((unsigned long)((table)->entry+1) > PAGE_SIZE + (unsigned long)(table))
39 91
40 /* 92 /*
41 * Ok, Peter made a complicated, but straightforward multiple_wait() function. 93 * Ok, Peter made a complicated, but straightforward multiple_wait() function.
42 * I have rewritten this, taking some shortcuts: This code may not be easy to 94 * I have rewritten this, taking some shortcuts: This code may not be easy to
43 * follow, but it should be free of race-conditions, and it's practical. If you 95 * follow, but it should be free of race-conditions, and it's practical. If you
44 * understand what I'm doing here, then you understand how the linux 96 * understand what I'm doing here, then you understand how the linux
45 * sleep/wakeup mechanism works. 97 * sleep/wakeup mechanism works.
46 * 98 *
47 * Two very simple procedures, poll_wait() and poll_freewait() make all the 99 * Two very simple procedures, poll_wait() and poll_freewait() make all the
48 * work. poll_wait() is an inline-function defined in <linux/poll.h>, 100 * work. poll_wait() is an inline-function defined in <linux/poll.h>,
49 * as all select/poll functions have to call it to add an entry to the 101 * as all select/poll functions have to call it to add an entry to the
50 * poll table. 102 * poll table.
51 */ 103 */
52 static void __pollwait(struct file *filp, wait_queue_head_t *wait_address, 104 static void __pollwait(struct file *filp, wait_queue_head_t *wait_address,
53 poll_table *p); 105 poll_table *p);
54 106
55 void poll_initwait(struct poll_wqueues *pwq) 107 void poll_initwait(struct poll_wqueues *pwq)
56 { 108 {
57 init_poll_funcptr(&pwq->pt, __pollwait); 109 init_poll_funcptr(&pwq->pt, __pollwait);
58 pwq->error = 0; 110 pwq->error = 0;
59 pwq->table = NULL; 111 pwq->table = NULL;
60 pwq->inline_index = 0; 112 pwq->inline_index = 0;
61 } 113 }
62 114
63 EXPORT_SYMBOL(poll_initwait); 115 EXPORT_SYMBOL(poll_initwait);
64 116
65 static void free_poll_entry(struct poll_table_entry *entry) 117 static void free_poll_entry(struct poll_table_entry *entry)
66 { 118 {
67 remove_wait_queue(entry->wait_address, &entry->wait); 119 remove_wait_queue(entry->wait_address, &entry->wait);
68 fput(entry->filp); 120 fput(entry->filp);
69 } 121 }
70 122
71 void poll_freewait(struct poll_wqueues *pwq) 123 void poll_freewait(struct poll_wqueues *pwq)
72 { 124 {
73 struct poll_table_page * p = pwq->table; 125 struct poll_table_page * p = pwq->table;
74 int i; 126 int i;
75 for (i = 0; i < pwq->inline_index; i++) 127 for (i = 0; i < pwq->inline_index; i++)
76 free_poll_entry(pwq->inline_entries + i); 128 free_poll_entry(pwq->inline_entries + i);
77 while (p) { 129 while (p) {
78 struct poll_table_entry * entry; 130 struct poll_table_entry * entry;
79 struct poll_table_page *old; 131 struct poll_table_page *old;
80 132
81 entry = p->entry; 133 entry = p->entry;
82 do { 134 do {
83 entry--; 135 entry--;
84 free_poll_entry(entry); 136 free_poll_entry(entry);
85 } while (entry > p->entries); 137 } while (entry > p->entries);
86 old = p; 138 old = p;
87 p = p->next; 139 p = p->next;
88 free_page((unsigned long) old); 140 free_page((unsigned long) old);
89 } 141 }
90 } 142 }
91 143
92 EXPORT_SYMBOL(poll_freewait); 144 EXPORT_SYMBOL(poll_freewait);
93 145
94 static struct poll_table_entry *poll_get_entry(poll_table *_p) 146 static struct poll_table_entry *poll_get_entry(poll_table *_p)
95 { 147 {
96 struct poll_wqueues *p = container_of(_p, struct poll_wqueues, pt); 148 struct poll_wqueues *p = container_of(_p, struct poll_wqueues, pt);
97 struct poll_table_page *table = p->table; 149 struct poll_table_page *table = p->table;
98 150
99 if (p->inline_index < N_INLINE_POLL_ENTRIES) 151 if (p->inline_index < N_INLINE_POLL_ENTRIES)
100 return p->inline_entries + p->inline_index++; 152 return p->inline_entries + p->inline_index++;
101 153
102 if (!table || POLL_TABLE_FULL(table)) { 154 if (!table || POLL_TABLE_FULL(table)) {
103 struct poll_table_page *new_table; 155 struct poll_table_page *new_table;
104 156
105 new_table = (struct poll_table_page *) __get_free_page(GFP_KERNEL); 157 new_table = (struct poll_table_page *) __get_free_page(GFP_KERNEL);
106 if (!new_table) { 158 if (!new_table) {
107 p->error = -ENOMEM; 159 p->error = -ENOMEM;
108 __set_current_state(TASK_RUNNING); 160 __set_current_state(TASK_RUNNING);
109 return NULL; 161 return NULL;
110 } 162 }
111 new_table->entry = new_table->entries; 163 new_table->entry = new_table->entries;
112 new_table->next = table; 164 new_table->next = table;
113 p->table = new_table; 165 p->table = new_table;
114 table = new_table; 166 table = new_table;
115 } 167 }
116 168
117 return table->entry++; 169 return table->entry++;
118 } 170 }
119 171
120 /* Add a new entry */ 172 /* Add a new entry */
121 static void __pollwait(struct file *filp, wait_queue_head_t *wait_address, 173 static void __pollwait(struct file *filp, wait_queue_head_t *wait_address,
122 poll_table *p) 174 poll_table *p)
123 { 175 {
124 struct poll_table_entry *entry = poll_get_entry(p); 176 struct poll_table_entry *entry = poll_get_entry(p);
125 if (!entry) 177 if (!entry)
126 return; 178 return;
127 get_file(filp); 179 get_file(filp);
128 entry->filp = filp; 180 entry->filp = filp;
129 entry->wait_address = wait_address; 181 entry->wait_address = wait_address;
130 init_waitqueue_entry(&entry->wait, current); 182 init_waitqueue_entry(&entry->wait, current);
131 add_wait_queue(wait_address, &entry->wait); 183 add_wait_queue(wait_address, &entry->wait);
132 } 184 }
133 185
134 /** 186 /**
135 * poll_select_set_timeout - helper function to setup the timeout value 187 * poll_select_set_timeout - helper function to setup the timeout value
136 * @to: pointer to timespec variable for the final timeout 188 * @to: pointer to timespec variable for the final timeout
137 * @sec: seconds (from user space) 189 * @sec: seconds (from user space)
138 * @nsec: nanoseconds (from user space) 190 * @nsec: nanoseconds (from user space)
139 * 191 *
140 * Note, we do not use a timespec for the user space value here, That 192 * Note, we do not use a timespec for the user space value here, That
141 * way we can use the function for timeval and compat interfaces as well. 193 * way we can use the function for timeval and compat interfaces as well.
142 * 194 *
143 * Returns -EINVAL if sec/nsec are not normalized. Otherwise 0. 195 * Returns -EINVAL if sec/nsec are not normalized. Otherwise 0.
144 */ 196 */
145 int poll_select_set_timeout(struct timespec *to, long sec, long nsec) 197 int poll_select_set_timeout(struct timespec *to, long sec, long nsec)
146 { 198 {
147 struct timespec ts = {.tv_sec = sec, .tv_nsec = nsec}; 199 struct timespec ts = {.tv_sec = sec, .tv_nsec = nsec};
148 200
149 if (!timespec_valid(&ts)) 201 if (!timespec_valid(&ts))
150 return -EINVAL; 202 return -EINVAL;
151 203
152 /* Optimize for the zero timeout value here */ 204 /* Optimize for the zero timeout value here */
153 if (!sec && !nsec) { 205 if (!sec && !nsec) {
154 to->tv_sec = to->tv_nsec = 0; 206 to->tv_sec = to->tv_nsec = 0;
155 } else { 207 } else {
156 ktime_get_ts(to); 208 ktime_get_ts(to);
157 *to = timespec_add_safe(*to, ts); 209 *to = timespec_add_safe(*to, ts);
158 } 210 }
159 return 0; 211 return 0;
160 } 212 }
161 213
162 static int poll_select_copy_remaining(struct timespec *end_time, void __user *p, 214 static int poll_select_copy_remaining(struct timespec *end_time, void __user *p,
163 int timeval, int ret) 215 int timeval, int ret)
164 { 216 {
165 struct timespec rts; 217 struct timespec rts;
166 struct timeval rtv; 218 struct timeval rtv;
167 219
168 if (!p) 220 if (!p)
169 return ret; 221 return ret;
170 222
171 if (current->personality & STICKY_TIMEOUTS) 223 if (current->personality & STICKY_TIMEOUTS)
172 goto sticky; 224 goto sticky;
173 225
174 /* No update for zero timeout */ 226 /* No update for zero timeout */
175 if (!end_time->tv_sec && !end_time->tv_nsec) 227 if (!end_time->tv_sec && !end_time->tv_nsec)
176 return ret; 228 return ret;
177 229
178 ktime_get_ts(&rts); 230 ktime_get_ts(&rts);
179 rts = timespec_sub(*end_time, rts); 231 rts = timespec_sub(*end_time, rts);
180 if (rts.tv_sec < 0) 232 if (rts.tv_sec < 0)
181 rts.tv_sec = rts.tv_nsec = 0; 233 rts.tv_sec = rts.tv_nsec = 0;
182 234
183 if (timeval) { 235 if (timeval) {
184 rtv.tv_sec = rts.tv_sec; 236 rtv.tv_sec = rts.tv_sec;
185 rtv.tv_usec = rts.tv_nsec / NSEC_PER_USEC; 237 rtv.tv_usec = rts.tv_nsec / NSEC_PER_USEC;
186 238
187 if (!copy_to_user(p, &rtv, sizeof(rtv))) 239 if (!copy_to_user(p, &rtv, sizeof(rtv)))
188 return ret; 240 return ret;
189 241
190 } else if (!copy_to_user(p, &rts, sizeof(rts))) 242 } else if (!copy_to_user(p, &rts, sizeof(rts)))
191 return ret; 243 return ret;
192 244
193 /* 245 /*
194 * If an application puts its timeval in read-only memory, we 246 * If an application puts its timeval in read-only memory, we
195 * don't want the Linux-specific update to the timeval to 247 * don't want the Linux-specific update to the timeval to
196 * cause a fault after the select has completed 248 * cause a fault after the select has completed
197 * successfully. However, because we're not updating the 249 * successfully. However, because we're not updating the
198 * timeval, we can't restart the system call. 250 * timeval, we can't restart the system call.
199 */ 251 */
200 252
201 sticky: 253 sticky:
202 if (ret == -ERESTARTNOHAND) 254 if (ret == -ERESTARTNOHAND)
203 ret = -EINTR; 255 ret = -EINTR;
204 return ret; 256 return ret;
205 } 257 }
206 258
207 #define FDS_IN(fds, n) (fds->in + n) 259 #define FDS_IN(fds, n) (fds->in + n)
208 #define FDS_OUT(fds, n) (fds->out + n) 260 #define FDS_OUT(fds, n) (fds->out + n)
209 #define FDS_EX(fds, n) (fds->ex + n) 261 #define FDS_EX(fds, n) (fds->ex + n)
210 262
211 #define BITS(fds, n) (*FDS_IN(fds, n)|*FDS_OUT(fds, n)|*FDS_EX(fds, n)) 263 #define BITS(fds, n) (*FDS_IN(fds, n)|*FDS_OUT(fds, n)|*FDS_EX(fds, n))
212 264
213 static int max_select_fd(unsigned long n, fd_set_bits *fds) 265 static int max_select_fd(unsigned long n, fd_set_bits *fds)
214 { 266 {
215 unsigned long *open_fds; 267 unsigned long *open_fds;
216 unsigned long set; 268 unsigned long set;
217 int max; 269 int max;
218 struct fdtable *fdt; 270 struct fdtable *fdt;
219 271
220 /* handle last in-complete long-word first */ 272 /* handle last in-complete long-word first */
221 set = ~(~0UL << (n & (__NFDBITS-1))); 273 set = ~(~0UL << (n & (__NFDBITS-1)));
222 n /= __NFDBITS; 274 n /= __NFDBITS;
223 fdt = files_fdtable(current->files); 275 fdt = files_fdtable(current->files);
224 open_fds = fdt->open_fds->fds_bits+n; 276 open_fds = fdt->open_fds->fds_bits+n;
225 max = 0; 277 max = 0;
226 if (set) { 278 if (set) {
227 set &= BITS(fds, n); 279 set &= BITS(fds, n);
228 if (set) { 280 if (set) {
229 if (!(set & ~*open_fds)) 281 if (!(set & ~*open_fds))
230 goto get_max; 282 goto get_max;
231 return -EBADF; 283 return -EBADF;
232 } 284 }
233 } 285 }
234 while (n) { 286 while (n) {
235 open_fds--; 287 open_fds--;
236 n--; 288 n--;
237 set = BITS(fds, n); 289 set = BITS(fds, n);
238 if (!set) 290 if (!set)
239 continue; 291 continue;
240 if (set & ~*open_fds) 292 if (set & ~*open_fds)
241 return -EBADF; 293 return -EBADF;
242 if (max) 294 if (max)
243 continue; 295 continue;
244 get_max: 296 get_max:
245 do { 297 do {
246 max++; 298 max++;
247 set >>= 1; 299 set >>= 1;
248 } while (set); 300 } while (set);
249 max += n * __NFDBITS; 301 max += n * __NFDBITS;
250 } 302 }
251 303
252 return max; 304 return max;
253 } 305 }
254 306
255 #define POLLIN_SET (POLLRDNORM | POLLRDBAND | POLLIN | POLLHUP | POLLERR) 307 #define POLLIN_SET (POLLRDNORM | POLLRDBAND | POLLIN | POLLHUP | POLLERR)
256 #define POLLOUT_SET (POLLWRBAND | POLLWRNORM | POLLOUT | POLLERR) 308 #define POLLOUT_SET (POLLWRBAND | POLLWRNORM | POLLOUT | POLLERR)
257 #define POLLEX_SET (POLLPRI) 309 #define POLLEX_SET (POLLPRI)
258 310
259 int do_select(int n, fd_set_bits *fds, struct timespec *end_time) 311 int do_select(int n, fd_set_bits *fds, struct timespec *end_time)
260 { 312 {
261 ktime_t expire, *to = NULL; 313 ktime_t expire, *to = NULL;
262 struct poll_wqueues table; 314 struct poll_wqueues table;
263 poll_table *wait; 315 poll_table *wait;
264 int retval, i, timed_out = 0; 316 int retval, i, timed_out = 0;
317 unsigned long slack = 0;
265 318
266 rcu_read_lock(); 319 rcu_read_lock();
267 retval = max_select_fd(n, fds); 320 retval = max_select_fd(n, fds);
268 rcu_read_unlock(); 321 rcu_read_unlock();
269 322
270 if (retval < 0) 323 if (retval < 0)
271 return retval; 324 return retval;
272 n = retval; 325 n = retval;
273 326
274 poll_initwait(&table); 327 poll_initwait(&table);
275 wait = &table.pt; 328 wait = &table.pt;
276 if (end_time && !end_time->tv_sec && !end_time->tv_nsec) { 329 if (end_time && !end_time->tv_sec && !end_time->tv_nsec) {
277 wait = NULL; 330 wait = NULL;
278 timed_out = 1; 331 timed_out = 1;
279 } 332 }
280 333
334 if (end_time)
335 slack = estimate_accuracy(end_time);
336
281 retval = 0; 337 retval = 0;
282 for (;;) { 338 for (;;) {
283 unsigned long *rinp, *routp, *rexp, *inp, *outp, *exp; 339 unsigned long *rinp, *routp, *rexp, *inp, *outp, *exp;
284 340
285 set_current_state(TASK_INTERRUPTIBLE); 341 set_current_state(TASK_INTERRUPTIBLE);
286 342
287 inp = fds->in; outp = fds->out; exp = fds->ex; 343 inp = fds->in; outp = fds->out; exp = fds->ex;
288 rinp = fds->res_in; routp = fds->res_out; rexp = fds->res_ex; 344 rinp = fds->res_in; routp = fds->res_out; rexp = fds->res_ex;
289 345
290 for (i = 0; i < n; ++rinp, ++routp, ++rexp) { 346 for (i = 0; i < n; ++rinp, ++routp, ++rexp) {
291 unsigned long in, out, ex, all_bits, bit = 1, mask, j; 347 unsigned long in, out, ex, all_bits, bit = 1, mask, j;
292 unsigned long res_in = 0, res_out = 0, res_ex = 0; 348 unsigned long res_in = 0, res_out = 0, res_ex = 0;
293 const struct file_operations *f_op = NULL; 349 const struct file_operations *f_op = NULL;
294 struct file *file = NULL; 350 struct file *file = NULL;
295 351
296 in = *inp++; out = *outp++; ex = *exp++; 352 in = *inp++; out = *outp++; ex = *exp++;
297 all_bits = in | out | ex; 353 all_bits = in | out | ex;
298 if (all_bits == 0) { 354 if (all_bits == 0) {
299 i += __NFDBITS; 355 i += __NFDBITS;
300 continue; 356 continue;
301 } 357 }
302 358
303 for (j = 0; j < __NFDBITS; ++j, ++i, bit <<= 1) { 359 for (j = 0; j < __NFDBITS; ++j, ++i, bit <<= 1) {
304 int fput_needed; 360 int fput_needed;
305 if (i >= n) 361 if (i >= n)
306 break; 362 break;
307 if (!(bit & all_bits)) 363 if (!(bit & all_bits))
308 continue; 364 continue;
309 file = fget_light(i, &fput_needed); 365 file = fget_light(i, &fput_needed);
310 if (file) { 366 if (file) {
311 f_op = file->f_op; 367 f_op = file->f_op;
312 mask = DEFAULT_POLLMASK; 368 mask = DEFAULT_POLLMASK;
313 if (f_op && f_op->poll) 369 if (f_op && f_op->poll)
314 mask = (*f_op->poll)(file, retval ? NULL : wait); 370 mask = (*f_op->poll)(file, retval ? NULL : wait);
315 fput_light(file, fput_needed); 371 fput_light(file, fput_needed);
316 if ((mask & POLLIN_SET) && (in & bit)) { 372 if ((mask & POLLIN_SET) && (in & bit)) {
317 res_in |= bit; 373 res_in |= bit;
318 retval++; 374 retval++;
319 } 375 }
320 if ((mask & POLLOUT_SET) && (out & bit)) { 376 if ((mask & POLLOUT_SET) && (out & bit)) {
321 res_out |= bit; 377 res_out |= bit;
322 retval++; 378 retval++;
323 } 379 }
324 if ((mask & POLLEX_SET) && (ex & bit)) { 380 if ((mask & POLLEX_SET) && (ex & bit)) {
325 res_ex |= bit; 381 res_ex |= bit;
326 retval++; 382 retval++;
327 } 383 }
328 } 384 }
329 } 385 }
330 if (res_in) 386 if (res_in)
331 *rinp = res_in; 387 *rinp = res_in;
332 if (res_out) 388 if (res_out)
333 *routp = res_out; 389 *routp = res_out;
334 if (res_ex) 390 if (res_ex)
335 *rexp = res_ex; 391 *rexp = res_ex;
336 cond_resched(); 392 cond_resched();
337 } 393 }
338 wait = NULL; 394 wait = NULL;
339 if (retval || timed_out || signal_pending(current)) 395 if (retval || timed_out || signal_pending(current))
340 break; 396 break;
341 if (table.error) { 397 if (table.error) {
342 retval = table.error; 398 retval = table.error;
343 break; 399 break;
344 } 400 }
345 401
346 /* 402 /*
347 * If this is the first loop and we have a timeout 403 * If this is the first loop and we have a timeout
348 * given, then we convert to ktime_t and set the to 404 * given, then we convert to ktime_t and set the to
349 * pointer to the expiry value. 405 * pointer to the expiry value.
350 */ 406 */
351 if (end_time && !to) { 407 if (end_time && !to) {
352 expire = timespec_to_ktime(*end_time); 408 expire = timespec_to_ktime(*end_time);
353 to = &expire; 409 to = &expire;
354 } 410 }
355 411
356 if (!schedule_hrtimeout(to, HRTIMER_MODE_ABS)) 412 if (!schedule_hrtimeout_range(to, slack, HRTIMER_MODE_ABS))
357 timed_out = 1; 413 timed_out = 1;
358 } 414 }
359 __set_current_state(TASK_RUNNING); 415 __set_current_state(TASK_RUNNING);
360 416
361 poll_freewait(&table); 417 poll_freewait(&table);
362 418
363 return retval; 419 return retval;
364 } 420 }
365 421
366 /* 422 /*
367 * We can actually return ERESTARTSYS instead of EINTR, but I'd 423 * We can actually return ERESTARTSYS instead of EINTR, but I'd
368 * like to be certain this leads to no problems. So I return 424 * like to be certain this leads to no problems. So I return
369 * EINTR just for safety. 425 * EINTR just for safety.
370 * 426 *
371 * Update: ERESTARTSYS breaks at least the xview clock binary, so 427 * Update: ERESTARTSYS breaks at least the xview clock binary, so
372 * I'm trying ERESTARTNOHAND which restart only when you want to. 428 * I'm trying ERESTARTNOHAND which restart only when you want to.
373 */ 429 */
374 #define MAX_SELECT_SECONDS \ 430 #define MAX_SELECT_SECONDS \
375 ((unsigned long) (MAX_SCHEDULE_TIMEOUT / HZ)-1) 431 ((unsigned long) (MAX_SCHEDULE_TIMEOUT / HZ)-1)
376 432
377 int core_sys_select(int n, fd_set __user *inp, fd_set __user *outp, 433 int core_sys_select(int n, fd_set __user *inp, fd_set __user *outp,
378 fd_set __user *exp, struct timespec *end_time) 434 fd_set __user *exp, struct timespec *end_time)
379 { 435 {
380 fd_set_bits fds; 436 fd_set_bits fds;
381 void *bits; 437 void *bits;
382 int ret, max_fds; 438 int ret, max_fds;
383 unsigned int size; 439 unsigned int size;
384 struct fdtable *fdt; 440 struct fdtable *fdt;
385 /* Allocate small arguments on the stack to save memory and be faster */ 441 /* Allocate small arguments on the stack to save memory and be faster */
386 long stack_fds[SELECT_STACK_ALLOC/sizeof(long)]; 442 long stack_fds[SELECT_STACK_ALLOC/sizeof(long)];
387 443
388 ret = -EINVAL; 444 ret = -EINVAL;
389 if (n < 0) 445 if (n < 0)
390 goto out_nofds; 446 goto out_nofds;
391 447
392 /* max_fds can increase, so grab it once to avoid race */ 448 /* max_fds can increase, so grab it once to avoid race */
393 rcu_read_lock(); 449 rcu_read_lock();
394 fdt = files_fdtable(current->files); 450 fdt = files_fdtable(current->files);
395 max_fds = fdt->max_fds; 451 max_fds = fdt->max_fds;
396 rcu_read_unlock(); 452 rcu_read_unlock();
397 if (n > max_fds) 453 if (n > max_fds)
398 n = max_fds; 454 n = max_fds;
399 455
400 /* 456 /*
401 * We need 6 bitmaps (in/out/ex for both incoming and outgoing), 457 * We need 6 bitmaps (in/out/ex for both incoming and outgoing),
402 * since we used fdset we need to allocate memory in units of 458 * since we used fdset we need to allocate memory in units of
403 * long-words. 459 * long-words.
404 */ 460 */
405 size = FDS_BYTES(n); 461 size = FDS_BYTES(n);
406 bits = stack_fds; 462 bits = stack_fds;
407 if (size > sizeof(stack_fds) / 6) { 463 if (size > sizeof(stack_fds) / 6) {
408 /* Not enough space in on-stack array; must use kmalloc */ 464 /* Not enough space in on-stack array; must use kmalloc */
409 ret = -ENOMEM; 465 ret = -ENOMEM;
410 bits = kmalloc(6 * size, GFP_KERNEL); 466 bits = kmalloc(6 * size, GFP_KERNEL);
411 if (!bits) 467 if (!bits)
412 goto out_nofds; 468 goto out_nofds;
413 } 469 }
414 fds.in = bits; 470 fds.in = bits;
415 fds.out = bits + size; 471 fds.out = bits + size;
416 fds.ex = bits + 2*size; 472 fds.ex = bits + 2*size;
417 fds.res_in = bits + 3*size; 473 fds.res_in = bits + 3*size;
418 fds.res_out = bits + 4*size; 474 fds.res_out = bits + 4*size;
419 fds.res_ex = bits + 5*size; 475 fds.res_ex = bits + 5*size;
420 476
421 if ((ret = get_fd_set(n, inp, fds.in)) || 477 if ((ret = get_fd_set(n, inp, fds.in)) ||
422 (ret = get_fd_set(n, outp, fds.out)) || 478 (ret = get_fd_set(n, outp, fds.out)) ||
423 (ret = get_fd_set(n, exp, fds.ex))) 479 (ret = get_fd_set(n, exp, fds.ex)))
424 goto out; 480 goto out;
425 zero_fd_set(n, fds.res_in); 481 zero_fd_set(n, fds.res_in);
426 zero_fd_set(n, fds.res_out); 482 zero_fd_set(n, fds.res_out);
427 zero_fd_set(n, fds.res_ex); 483 zero_fd_set(n, fds.res_ex);
428 484
429 ret = do_select(n, &fds, end_time); 485 ret = do_select(n, &fds, end_time);
430 486
431 if (ret < 0) 487 if (ret < 0)
432 goto out; 488 goto out;
433 if (!ret) { 489 if (!ret) {
434 ret = -ERESTARTNOHAND; 490 ret = -ERESTARTNOHAND;
435 if (signal_pending(current)) 491 if (signal_pending(current))
436 goto out; 492 goto out;
437 ret = 0; 493 ret = 0;
438 } 494 }
439 495
440 if (set_fd_set(n, inp, fds.res_in) || 496 if (set_fd_set(n, inp, fds.res_in) ||
441 set_fd_set(n, outp, fds.res_out) || 497 set_fd_set(n, outp, fds.res_out) ||
442 set_fd_set(n, exp, fds.res_ex)) 498 set_fd_set(n, exp, fds.res_ex))
443 ret = -EFAULT; 499 ret = -EFAULT;
444 500
445 out: 501 out:
446 if (bits != stack_fds) 502 if (bits != stack_fds)
447 kfree(bits); 503 kfree(bits);
448 out_nofds: 504 out_nofds:
449 return ret; 505 return ret;
450 } 506 }
451 507
452 asmlinkage long sys_select(int n, fd_set __user *inp, fd_set __user *outp, 508 asmlinkage long sys_select(int n, fd_set __user *inp, fd_set __user *outp,
453 fd_set __user *exp, struct timeval __user *tvp) 509 fd_set __user *exp, struct timeval __user *tvp)
454 { 510 {
455 struct timespec end_time, *to = NULL; 511 struct timespec end_time, *to = NULL;
456 struct timeval tv; 512 struct timeval tv;
457 int ret; 513 int ret;
458 514
459 if (tvp) { 515 if (tvp) {
460 if (copy_from_user(&tv, tvp, sizeof(tv))) 516 if (copy_from_user(&tv, tvp, sizeof(tv)))
461 return -EFAULT; 517 return -EFAULT;
462 518
463 to = &end_time; 519 to = &end_time;
464 if (poll_select_set_timeout(to, tv.tv_sec, 520 if (poll_select_set_timeout(to, tv.tv_sec,
465 tv.tv_usec * NSEC_PER_USEC)) 521 tv.tv_usec * NSEC_PER_USEC))
466 return -EINVAL; 522 return -EINVAL;
467 } 523 }
468 524
469 ret = core_sys_select(n, inp, outp, exp, to); 525 ret = core_sys_select(n, inp, outp, exp, to);
470 ret = poll_select_copy_remaining(&end_time, tvp, 1, ret); 526 ret = poll_select_copy_remaining(&end_time, tvp, 1, ret);
471 527
472 return ret; 528 return ret;
473 } 529 }
474 530
475 #ifdef HAVE_SET_RESTORE_SIGMASK 531 #ifdef HAVE_SET_RESTORE_SIGMASK
476 asmlinkage long sys_pselect7(int n, fd_set __user *inp, fd_set __user *outp, 532 asmlinkage long sys_pselect7(int n, fd_set __user *inp, fd_set __user *outp,
477 fd_set __user *exp, struct timespec __user *tsp, 533 fd_set __user *exp, struct timespec __user *tsp,
478 const sigset_t __user *sigmask, size_t sigsetsize) 534 const sigset_t __user *sigmask, size_t sigsetsize)
479 { 535 {
480 sigset_t ksigmask, sigsaved; 536 sigset_t ksigmask, sigsaved;
481 struct timespec ts, end_time, *to = NULL; 537 struct timespec ts, end_time, *to = NULL;
482 int ret; 538 int ret;
483 539
484 if (tsp) { 540 if (tsp) {
485 if (copy_from_user(&ts, tsp, sizeof(ts))) 541 if (copy_from_user(&ts, tsp, sizeof(ts)))
486 return -EFAULT; 542 return -EFAULT;
487 543
488 to = &end_time; 544 to = &end_time;
489 if (poll_select_set_timeout(to, ts.tv_sec, ts.tv_nsec)) 545 if (poll_select_set_timeout(to, ts.tv_sec, ts.tv_nsec))
490 return -EINVAL; 546 return -EINVAL;
491 } 547 }
492 548
493 if (sigmask) { 549 if (sigmask) {
494 /* XXX: Don't preclude handling different sized sigset_t's. */ 550 /* XXX: Don't preclude handling different sized sigset_t's. */
495 if (sigsetsize != sizeof(sigset_t)) 551 if (sigsetsize != sizeof(sigset_t))
496 return -EINVAL; 552 return -EINVAL;
497 if (copy_from_user(&ksigmask, sigmask, sizeof(ksigmask))) 553 if (copy_from_user(&ksigmask, sigmask, sizeof(ksigmask)))
498 return -EFAULT; 554 return -EFAULT;
499 555
500 sigdelsetmask(&ksigmask, sigmask(SIGKILL)|sigmask(SIGSTOP)); 556 sigdelsetmask(&ksigmask, sigmask(SIGKILL)|sigmask(SIGSTOP));
501 sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved); 557 sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved);
502 } 558 }
503 559
504 ret = core_sys_select(n, inp, outp, exp, &end_time); 560 ret = core_sys_select(n, inp, outp, exp, &end_time);
505 ret = poll_select_copy_remaining(&end_time, tsp, 0, ret); 561 ret = poll_select_copy_remaining(&end_time, tsp, 0, ret);
506 562
507 if (ret == -ERESTARTNOHAND) { 563 if (ret == -ERESTARTNOHAND) {
508 /* 564 /*
509 * Don't restore the signal mask yet. Let do_signal() deliver 565 * Don't restore the signal mask yet. Let do_signal() deliver
510 * the signal on the way back to userspace, before the signal 566 * the signal on the way back to userspace, before the signal
511 * mask is restored. 567 * mask is restored.
512 */ 568 */
513 if (sigmask) { 569 if (sigmask) {
514 memcpy(&current->saved_sigmask, &sigsaved, 570 memcpy(&current->saved_sigmask, &sigsaved,
515 sizeof(sigsaved)); 571 sizeof(sigsaved));
516 set_restore_sigmask(); 572 set_restore_sigmask();
517 } 573 }
518 } else if (sigmask) 574 } else if (sigmask)
519 sigprocmask(SIG_SETMASK, &sigsaved, NULL); 575 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
520 576
521 return ret; 577 return ret;
522 } 578 }
523 579
524 /* 580 /*
525 * Most architectures can't handle 7-argument syscalls. So we provide a 581 * Most architectures can't handle 7-argument syscalls. So we provide a
526 * 6-argument version where the sixth argument is a pointer to a structure 582 * 6-argument version where the sixth argument is a pointer to a structure
527 * which has a pointer to the sigset_t itself followed by a size_t containing 583 * which has a pointer to the sigset_t itself followed by a size_t containing
528 * the sigset size. 584 * the sigset size.
529 */ 585 */
530 asmlinkage long sys_pselect6(int n, fd_set __user *inp, fd_set __user *outp, 586 asmlinkage long sys_pselect6(int n, fd_set __user *inp, fd_set __user *outp,
531 fd_set __user *exp, struct timespec __user *tsp, void __user *sig) 587 fd_set __user *exp, struct timespec __user *tsp, void __user *sig)
532 { 588 {
533 size_t sigsetsize = 0; 589 size_t sigsetsize = 0;
534 sigset_t __user *up = NULL; 590 sigset_t __user *up = NULL;
535 591
536 if (sig) { 592 if (sig) {
537 if (!access_ok(VERIFY_READ, sig, sizeof(void *)+sizeof(size_t)) 593 if (!access_ok(VERIFY_READ, sig, sizeof(void *)+sizeof(size_t))
538 || __get_user(up, (sigset_t __user * __user *)sig) 594 || __get_user(up, (sigset_t __user * __user *)sig)
539 || __get_user(sigsetsize, 595 || __get_user(sigsetsize,
540 (size_t __user *)(sig+sizeof(void *)))) 596 (size_t __user *)(sig+sizeof(void *))))
541 return -EFAULT; 597 return -EFAULT;
542 } 598 }
543 599
544 return sys_pselect7(n, inp, outp, exp, tsp, up, sigsetsize); 600 return sys_pselect7(n, inp, outp, exp, tsp, up, sigsetsize);
545 } 601 }
546 #endif /* HAVE_SET_RESTORE_SIGMASK */ 602 #endif /* HAVE_SET_RESTORE_SIGMASK */
547 603
548 struct poll_list { 604 struct poll_list {
549 struct poll_list *next; 605 struct poll_list *next;
550 int len; 606 int len;
551 struct pollfd entries[0]; 607 struct pollfd entries[0];
552 }; 608 };
553 609
554 #define POLLFD_PER_PAGE ((PAGE_SIZE-sizeof(struct poll_list)) / sizeof(struct pollfd)) 610 #define POLLFD_PER_PAGE ((PAGE_SIZE-sizeof(struct poll_list)) / sizeof(struct pollfd))
555 611
556 /* 612 /*
557 * Fish for pollable events on the pollfd->fd file descriptor. We're only 613 * Fish for pollable events on the pollfd->fd file descriptor. We're only
558 * interested in events matching the pollfd->events mask, and the result 614 * interested in events matching the pollfd->events mask, and the result
559 * matching that mask is both recorded in pollfd->revents and returned. The 615 * matching that mask is both recorded in pollfd->revents and returned. The
560 * pwait poll_table will be used by the fd-provided poll handler for waiting, 616 * pwait poll_table will be used by the fd-provided poll handler for waiting,
561 * if non-NULL. 617 * if non-NULL.
562 */ 618 */
563 static inline unsigned int do_pollfd(struct pollfd *pollfd, poll_table *pwait) 619 static inline unsigned int do_pollfd(struct pollfd *pollfd, poll_table *pwait)
564 { 620 {
565 unsigned int mask; 621 unsigned int mask;
566 int fd; 622 int fd;
567 623
568 mask = 0; 624 mask = 0;
569 fd = pollfd->fd; 625 fd = pollfd->fd;
570 if (fd >= 0) { 626 if (fd >= 0) {
571 int fput_needed; 627 int fput_needed;
572 struct file * file; 628 struct file * file;
573 629
574 file = fget_light(fd, &fput_needed); 630 file = fget_light(fd, &fput_needed);
575 mask = POLLNVAL; 631 mask = POLLNVAL;
576 if (file != NULL) { 632 if (file != NULL) {
577 mask = DEFAULT_POLLMASK; 633 mask = DEFAULT_POLLMASK;
578 if (file->f_op && file->f_op->poll) 634 if (file->f_op && file->f_op->poll)
579 mask = file->f_op->poll(file, pwait); 635 mask = file->f_op->poll(file, pwait);
580 /* Mask out unneeded events. */ 636 /* Mask out unneeded events. */
581 mask &= pollfd->events | POLLERR | POLLHUP; 637 mask &= pollfd->events | POLLERR | POLLHUP;
582 fput_light(file, fput_needed); 638 fput_light(file, fput_needed);
583 } 639 }
584 } 640 }
585 pollfd->revents = mask; 641 pollfd->revents = mask;
586 642
587 return mask; 643 return mask;
588 } 644 }
589 645
590 static int do_poll(unsigned int nfds, struct poll_list *list, 646 static int do_poll(unsigned int nfds, struct poll_list *list,
591 struct poll_wqueues *wait, struct timespec *end_time) 647 struct poll_wqueues *wait, struct timespec *end_time)
592 { 648 {
593 poll_table* pt = &wait->pt; 649 poll_table* pt = &wait->pt;
594 ktime_t expire, *to = NULL; 650 ktime_t expire, *to = NULL;
595 int timed_out = 0, count = 0; 651 int timed_out = 0, count = 0;
652 unsigned long slack = 0;
596 653
597 /* Optimise the no-wait case */ 654 /* Optimise the no-wait case */
598 if (end_time && !end_time->tv_sec && !end_time->tv_nsec) { 655 if (end_time && !end_time->tv_sec && !end_time->tv_nsec) {
599 pt = NULL; 656 pt = NULL;
600 timed_out = 1; 657 timed_out = 1;
601 } 658 }
602 659
660 if (end_time)
661 slack = estimate_accuracy(end_time);
662
603 for (;;) { 663 for (;;) {
604 struct poll_list *walk; 664 struct poll_list *walk;
605 665
606 set_current_state(TASK_INTERRUPTIBLE); 666 set_current_state(TASK_INTERRUPTIBLE);
607 for (walk = list; walk != NULL; walk = walk->next) { 667 for (walk = list; walk != NULL; walk = walk->next) {
608 struct pollfd * pfd, * pfd_end; 668 struct pollfd * pfd, * pfd_end;
609 669
610 pfd = walk->entries; 670 pfd = walk->entries;
611 pfd_end = pfd + walk->len; 671 pfd_end = pfd + walk->len;
612 for (; pfd != pfd_end; pfd++) { 672 for (; pfd != pfd_end; pfd++) {
613 /* 673 /*
614 * Fish for events. If we found one, record it 674 * Fish for events. If we found one, record it
615 * and kill the poll_table, so we don't 675 * and kill the poll_table, so we don't
616 * needlessly register any other waiters after 676 * needlessly register any other waiters after
617 * this. They'll get immediately deregistered 677 * this. They'll get immediately deregistered
618 * when we break out and return. 678 * when we break out and return.
619 */ 679 */
620 if (do_pollfd(pfd, pt)) { 680 if (do_pollfd(pfd, pt)) {
621 count++; 681 count++;
622 pt = NULL; 682 pt = NULL;
623 } 683 }
624 } 684 }
625 } 685 }
626 /* 686 /*
627 * All waiters have already been registered, so don't provide 687 * All waiters have already been registered, so don't provide
628 * a poll_table to them on the next loop iteration. 688 * a poll_table to them on the next loop iteration.
629 */ 689 */
630 pt = NULL; 690 pt = NULL;
631 if (!count) { 691 if (!count) {
632 count = wait->error; 692 count = wait->error;
633 if (signal_pending(current)) 693 if (signal_pending(current))
634 count = -EINTR; 694 count = -EINTR;
635 } 695 }
636 if (count || timed_out) 696 if (count || timed_out)
637 break; 697 break;
638 698
639 /* 699 /*
640 * If this is the first loop and we have a timeout 700 * If this is the first loop and we have a timeout
641 * given, then we convert to ktime_t and set the to 701 * given, then we convert to ktime_t and set the to
642 * pointer to the expiry value. 702 * pointer to the expiry value.
643 */ 703 */
644 if (end_time && !to) { 704 if (end_time && !to) {
645 expire = timespec_to_ktime(*end_time); 705 expire = timespec_to_ktime(*end_time);
646 to = &expire; 706 to = &expire;
647 } 707 }
648 708
649 if (!schedule_hrtimeout(to, HRTIMER_MODE_ABS)) 709 if (!schedule_hrtimeout_range(to, slack, HRTIMER_MODE_ABS))
650 timed_out = 1; 710 timed_out = 1;
651 } 711 }
652 __set_current_state(TASK_RUNNING); 712 __set_current_state(TASK_RUNNING);
653 return count; 713 return count;
654 } 714 }
655 715
656 #define N_STACK_PPS ((sizeof(stack_pps) - sizeof(struct poll_list)) / \ 716 #define N_STACK_PPS ((sizeof(stack_pps) - sizeof(struct poll_list)) / \
657 sizeof(struct pollfd)) 717 sizeof(struct pollfd))
658 718
659 int do_sys_poll(struct pollfd __user *ufds, unsigned int nfds, 719 int do_sys_poll(struct pollfd __user *ufds, unsigned int nfds,
660 struct timespec *end_time) 720 struct timespec *end_time)
661 { 721 {
662 struct poll_wqueues table; 722 struct poll_wqueues table;
663 int err = -EFAULT, fdcount, len, size; 723 int err = -EFAULT, fdcount, len, size;
664 /* Allocate small arguments on the stack to save memory and be 724 /* Allocate small arguments on the stack to save memory and be
665 faster - use long to make sure the buffer is aligned properly 725 faster - use long to make sure the buffer is aligned properly
666 on 64 bit archs to avoid unaligned access */ 726 on 64 bit archs to avoid unaligned access */
667 long stack_pps[POLL_STACK_ALLOC/sizeof(long)]; 727 long stack_pps[POLL_STACK_ALLOC/sizeof(long)];
668 struct poll_list *const head = (struct poll_list *)stack_pps; 728 struct poll_list *const head = (struct poll_list *)stack_pps;
669 struct poll_list *walk = head; 729 struct poll_list *walk = head;
670 unsigned long todo = nfds; 730 unsigned long todo = nfds;
671 731
672 if (nfds > current->signal->rlim[RLIMIT_NOFILE].rlim_cur) 732 if (nfds > current->signal->rlim[RLIMIT_NOFILE].rlim_cur)
673 return -EINVAL; 733 return -EINVAL;
674 734
675 len = min_t(unsigned int, nfds, N_STACK_PPS); 735 len = min_t(unsigned int, nfds, N_STACK_PPS);
676 for (;;) { 736 for (;;) {
677 walk->next = NULL; 737 walk->next = NULL;
678 walk->len = len; 738 walk->len = len;
679 if (!len) 739 if (!len)
680 break; 740 break;
681 741
682 if (copy_from_user(walk->entries, ufds + nfds-todo, 742 if (copy_from_user(walk->entries, ufds + nfds-todo,
683 sizeof(struct pollfd) * walk->len)) 743 sizeof(struct pollfd) * walk->len))
684 goto out_fds; 744 goto out_fds;
685 745
686 todo -= walk->len; 746 todo -= walk->len;
687 if (!todo) 747 if (!todo)
688 break; 748 break;
689 749
690 len = min(todo, POLLFD_PER_PAGE); 750 len = min(todo, POLLFD_PER_PAGE);
691 size = sizeof(struct poll_list) + sizeof(struct pollfd) * len; 751 size = sizeof(struct poll_list) + sizeof(struct pollfd) * len;
692 walk = walk->next = kmalloc(size, GFP_KERNEL); 752 walk = walk->next = kmalloc(size, GFP_KERNEL);
693 if (!walk) { 753 if (!walk) {
694 err = -ENOMEM; 754 err = -ENOMEM;
695 goto out_fds; 755 goto out_fds;
696 } 756 }
697 } 757 }
698 758
699 poll_initwait(&table); 759 poll_initwait(&table);
700 fdcount = do_poll(nfds, head, &table, end_time); 760 fdcount = do_poll(nfds, head, &table, end_time);
701 poll_freewait(&table); 761 poll_freewait(&table);
702 762
703 for (walk = head; walk; walk = walk->next) { 763 for (walk = head; walk; walk = walk->next) {
704 struct pollfd *fds = walk->entries; 764 struct pollfd *fds = walk->entries;
705 int j; 765 int j;
706 766
707 for (j = 0; j < walk->len; j++, ufds++) 767 for (j = 0; j < walk->len; j++, ufds++)
708 if (__put_user(fds[j].revents, &ufds->revents)) 768 if (__put_user(fds[j].revents, &ufds->revents))
709 goto out_fds; 769 goto out_fds;
710 } 770 }
711 771
712 err = fdcount; 772 err = fdcount;
713 out_fds: 773 out_fds:
714 walk = head->next; 774 walk = head->next;
715 while (walk) { 775 while (walk) {
716 struct poll_list *pos = walk; 776 struct poll_list *pos = walk;
717 walk = walk->next; 777 walk = walk->next;
718 kfree(pos); 778 kfree(pos);
719 } 779 }
720 780
721 return err; 781 return err;
722 } 782 }
723 783
724 static long do_restart_poll(struct restart_block *restart_block) 784 static long do_restart_poll(struct restart_block *restart_block)
725 { 785 {
726 struct pollfd __user *ufds = restart_block->poll.ufds; 786 struct pollfd __user *ufds = restart_block->poll.ufds;
727 int nfds = restart_block->poll.nfds; 787 int nfds = restart_block->poll.nfds;
728 struct timespec *to = NULL, end_time; 788 struct timespec *to = NULL, end_time;
729 int ret; 789 int ret;
730 790
731 if (restart_block->poll.has_timeout) { 791 if (restart_block->poll.has_timeout) {
732 end_time.tv_sec = restart_block->poll.tv_sec; 792 end_time.tv_sec = restart_block->poll.tv_sec;
733 end_time.tv_nsec = restart_block->poll.tv_nsec; 793 end_time.tv_nsec = restart_block->poll.tv_nsec;
734 to = &end_time; 794 to = &end_time;
735 } 795 }
736 796
737 ret = do_sys_poll(ufds, nfds, to); 797 ret = do_sys_poll(ufds, nfds, to);
738 798
739 if (ret == -EINTR) { 799 if (ret == -EINTR) {
740 restart_block->fn = do_restart_poll; 800 restart_block->fn = do_restart_poll;
741 ret = -ERESTART_RESTARTBLOCK; 801 ret = -ERESTART_RESTARTBLOCK;
742 } 802 }
743 return ret; 803 return ret;
744 } 804 }
745 805
746 asmlinkage long sys_poll(struct pollfd __user *ufds, unsigned int nfds, 806 asmlinkage long sys_poll(struct pollfd __user *ufds, unsigned int nfds,
747 long timeout_msecs) 807 long timeout_msecs)
748 { 808 {
749 struct timespec end_time, *to = NULL; 809 struct timespec end_time, *to = NULL;
750 int ret; 810 int ret;
751 811
752 if (timeout_msecs >= 0) { 812 if (timeout_msecs >= 0) {
753 to = &end_time; 813 to = &end_time;
754 poll_select_set_timeout(to, timeout_msecs / MSEC_PER_SEC, 814 poll_select_set_timeout(to, timeout_msecs / MSEC_PER_SEC,
755 NSEC_PER_MSEC * (timeout_msecs % MSEC_PER_SEC)); 815 NSEC_PER_MSEC * (timeout_msecs % MSEC_PER_SEC));
756 } 816 }
757 817
758 ret = do_sys_poll(ufds, nfds, to); 818 ret = do_sys_poll(ufds, nfds, to);
759 819
760 if (ret == -EINTR) { 820 if (ret == -EINTR) {
761 struct restart_block *restart_block; 821 struct restart_block *restart_block;
762 822
763 restart_block = &current_thread_info()->restart_block; 823 restart_block = &current_thread_info()->restart_block;
764 restart_block->fn = do_restart_poll; 824 restart_block->fn = do_restart_poll;
765 restart_block->poll.ufds = ufds; 825 restart_block->poll.ufds = ufds;
766 restart_block->poll.nfds = nfds; 826 restart_block->poll.nfds = nfds;
767 827
768 if (timeout_msecs >= 0) { 828 if (timeout_msecs >= 0) {
769 restart_block->poll.tv_sec = end_time.tv_sec; 829 restart_block->poll.tv_sec = end_time.tv_sec;
770 restart_block->poll.tv_nsec = end_time.tv_nsec; 830 restart_block->poll.tv_nsec = end_time.tv_nsec;
771 restart_block->poll.has_timeout = 1; 831 restart_block->poll.has_timeout = 1;
772 } else 832 } else
773 restart_block->poll.has_timeout = 0; 833 restart_block->poll.has_timeout = 0;
774 834
775 ret = -ERESTART_RESTARTBLOCK; 835 ret = -ERESTART_RESTARTBLOCK;
776 } 836 }
777 return ret; 837 return ret;
778 } 838 }
779 839
780 #ifdef HAVE_SET_RESTORE_SIGMASK 840 #ifdef HAVE_SET_RESTORE_SIGMASK
781 asmlinkage long sys_ppoll(struct pollfd __user *ufds, unsigned int nfds, 841 asmlinkage long sys_ppoll(struct pollfd __user *ufds, unsigned int nfds,
782 struct timespec __user *tsp, const sigset_t __user *sigmask, 842 struct timespec __user *tsp, const sigset_t __user *sigmask,
783 size_t sigsetsize) 843 size_t sigsetsize)
784 { 844 {
785 sigset_t ksigmask, sigsaved; 845 sigset_t ksigmask, sigsaved;
786 struct timespec ts, end_time, *to = NULL; 846 struct timespec ts, end_time, *to = NULL;
787 int ret; 847 int ret;
788 848
789 if (tsp) { 849 if (tsp) {
790 if (copy_from_user(&ts, tsp, sizeof(ts))) 850 if (copy_from_user(&ts, tsp, sizeof(ts)))
791 return -EFAULT; 851 return -EFAULT;
792 852
793 to = &end_time; 853 to = &end_time;
794 if (poll_select_set_timeout(to, ts.tv_sec, ts.tv_nsec)) 854 if (poll_select_set_timeout(to, ts.tv_sec, ts.tv_nsec))
795 return -EINVAL; 855 return -EINVAL;
796 } 856 }
797 857
798 if (sigmask) { 858 if (sigmask) {
799 /* XXX: Don't preclude handling different sized sigset_t's. */ 859 /* XXX: Don't preclude handling different sized sigset_t's. */
800 if (sigsetsize != sizeof(sigset_t)) 860 if (sigsetsize != sizeof(sigset_t))
801 return -EINVAL; 861 return -EINVAL;
802 if (copy_from_user(&ksigmask, sigmask, sizeof(ksigmask))) 862 if (copy_from_user(&ksigmask, sigmask, sizeof(ksigmask)))
803 return -EFAULT; 863 return -EFAULT;
804 864
805 sigdelsetmask(&ksigmask, sigmask(SIGKILL)|sigmask(SIGSTOP)); 865 sigdelsetmask(&ksigmask, sigmask(SIGKILL)|sigmask(SIGSTOP));
806 sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved); 866 sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved);
807 } 867 }
808 868
809 ret = do_sys_poll(ufds, nfds, to); 869 ret = do_sys_poll(ufds, nfds, to);
810 870
811 /* We can restart this syscall, usually */ 871 /* We can restart this syscall, usually */
812 if (ret == -EINTR) { 872 if (ret == -EINTR) {
813 /* 873 /*
814 * Don't restore the signal mask yet. Let do_signal() deliver 874 * Don't restore the signal mask yet. Let do_signal() deliver
815 * the signal on the way back to userspace, before the signal 875 * the signal on the way back to userspace, before the signal
816 * mask is restored. 876 * mask is restored.
817 */ 877 */
818 if (sigmask) { 878 if (sigmask) {
819 memcpy(&current->saved_sigmask, &sigsaved, 879 memcpy(&current->saved_sigmask, &sigsaved,
820 sizeof(sigsaved)); 880 sizeof(sigsaved));
821 set_restore_sigmask(); 881 set_restore_sigmask();
822 } 882 }
823 ret = -ERESTARTNOHAND; 883 ret = -ERESTARTNOHAND;
824 } else if (sigmask) 884 } else if (sigmask)
825 sigprocmask(SIG_SETMASK, &sigsaved, NULL); 885 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
826 886
827 ret = poll_select_copy_remaining(&end_time, tsp, 0, ret); 887 ret = poll_select_copy_remaining(&end_time, tsp, 0, ret);
828 888
829 return ret; 889 return ret;
830 } 890 }
831 #endif /* HAVE_SET_RESTORE_SIGMASK */ 891 #endif /* HAVE_SET_RESTORE_SIGMASK */
832 892