Blame view
fs/timerfd.c
13 KB
b215e2839
|
1 2 3 4 5 6 7 8 9 |
/* * fs/timerfd.c * * Copyright (C) 2007 Davide Libenzi <davidel@xmailserver.org> * * * Thanks to Thomas Gleixner for code reviews and useful comments. * */ |
11ffa9d60
|
10 |
#include <linux/alarmtimer.h> |
b215e2839
|
11 12 13 14 15 16 |
#include <linux/file.h> #include <linux/poll.h> #include <linux/init.h> #include <linux/fs.h> #include <linux/sched.h> #include <linux/kernel.h> |
5a0e3ad6a
|
17 |
#include <linux/slab.h> |
b215e2839
|
18 19 20 21 22 23 |
#include <linux/list.h> #include <linux/spinlock.h> #include <linux/time.h> #include <linux/hrtimer.h> #include <linux/anon_inodes.h> #include <linux/timerfd.h> |
45cc2b96f
|
24 |
#include <linux/syscalls.h> |
9d94b9e2f
|
25 |
#include <linux/compat.h> |
9ec269075
|
26 |
#include <linux/rcupdate.h> |
b215e2839
|
27 28 |
struct timerfd_ctx { |
11ffa9d60
|
29 30 31 32 |
union { struct hrtimer tmr; struct alarm alarm; } t; |
b215e2839
|
33 |
ktime_t tintv; |
99ee5315d
|
34 |
ktime_t moffs; |
b215e2839
|
35 |
wait_queue_head_t wqh; |
4d672e7ac
|
36 |
u64 ticks; |
4d672e7ac
|
37 |
int clockid; |
af9c4957c
|
38 39 |
short unsigned expired; short unsigned settime_flags; /* to show in fdinfo */ |
9ec269075
|
40 41 |
struct rcu_head rcu; struct list_head clist; |
99ee5315d
|
42 |
bool might_cancel; |
b215e2839
|
43 |
}; |
9ec269075
|
44 45 |
static LIST_HEAD(cancel_list); static DEFINE_SPINLOCK(cancel_lock); |
11ffa9d60
|
46 47 48 49 50 |
static inline bool isalarm(struct timerfd_ctx *ctx) { return ctx->clockid == CLOCK_REALTIME_ALARM || ctx->clockid == CLOCK_BOOTTIME_ALARM; } |
b215e2839
|
51 52 53 |
/* * This gets called when the timer event triggers. We set the "expired" * flag, but we do not re-arm the timer (in case it's necessary, |
4d672e7ac
|
54 |
* tintv.tv64 != 0) until the timer is accessed. |
b215e2839
|
55 |
*/ |
11ffa9d60
|
56 |
static void timerfd_triggered(struct timerfd_ctx *ctx) |
b215e2839
|
57 |
{ |
b215e2839
|
58 |
unsigned long flags; |
18963c01b
|
59 |
spin_lock_irqsave(&ctx->wqh.lock, flags); |
b215e2839
|
60 |
ctx->expired = 1; |
4d672e7ac
|
61 |
ctx->ticks++; |
b215e2839
|
62 |
wake_up_locked(&ctx->wqh); |
18963c01b
|
63 |
spin_unlock_irqrestore(&ctx->wqh.lock, flags); |
11ffa9d60
|
64 |
} |
b215e2839
|
65 |
|
11ffa9d60
|
66 67 68 69 70 |
static enum hrtimer_restart timerfd_tmrproc(struct hrtimer *htmr) { struct timerfd_ctx *ctx = container_of(htmr, struct timerfd_ctx, t.tmr); timerfd_triggered(ctx); |
b215e2839
|
71 72 |
return HRTIMER_NORESTART; } |
11ffa9d60
|
73 74 75 76 77 78 79 80 |
static enum alarmtimer_restart timerfd_alarmproc(struct alarm *alarm, ktime_t now) { struct timerfd_ctx *ctx = container_of(alarm, struct timerfd_ctx, t.alarm); timerfd_triggered(ctx); return ALARMTIMER_NORESTART; } |
9ec269075
|
81 82 |
/* * Called when the clock was set to cancel the timers in the cancel |
1123d9396
|
83 84 85 |
* list. This will wake up processes waiting on these timers. The * wake-up requires ctx->ticks to be non zero, therefore we increment * it before calling wake_up_locked(). |
9ec269075
|
86 87 |
*/ void timerfd_clock_was_set(void) |
4d672e7ac
|
88 |
{ |
53cc7bad3
|
89 |
ktime_t moffs = ktime_mono_to_real((ktime_t){ .tv64 = 0 }); |
9ec269075
|
90 91 |
struct timerfd_ctx *ctx; unsigned long flags; |
4d672e7ac
|
92 |
|
9ec269075
|
93 94 95 96 97 98 99 |
rcu_read_lock(); list_for_each_entry_rcu(ctx, &cancel_list, clist) { if (!ctx->might_cancel) continue; spin_lock_irqsave(&ctx->wqh.lock, flags); if (ctx->moffs.tv64 != moffs.tv64) { ctx->moffs.tv64 = KTIME_MAX; |
1123d9396
|
100 |
ctx->ticks++; |
9ec269075
|
101 102 103 104 105 |
wake_up_locked(&ctx->wqh); } spin_unlock_irqrestore(&ctx->wqh.lock, flags); } rcu_read_unlock(); |
4d672e7ac
|
106 |
} |
9ec269075
|
107 |
static void timerfd_remove_cancel(struct timerfd_ctx *ctx) |
99ee5315d
|
108 |
{ |
9ec269075
|
109 110 111 112 113 114 115 |
if (ctx->might_cancel) { ctx->might_cancel = false; spin_lock(&cancel_lock); list_del_rcu(&ctx->clist); spin_unlock(&cancel_lock); } } |
99ee5315d
|
116 |
|
9ec269075
|
117 118 119 |
static bool timerfd_canceled(struct timerfd_ctx *ctx) { if (!ctx->might_cancel || ctx->moffs.tv64 != KTIME_MAX) |
99ee5315d
|
120 |
return false; |
53cc7bad3
|
121 |
ctx->moffs = ktime_mono_to_real((ktime_t){ .tv64 = 0 }); |
9ec269075
|
122 123 |
return true; } |
99ee5315d
|
124 |
|
9ec269075
|
125 126 |
static void timerfd_setup_cancel(struct timerfd_ctx *ctx, int flags) { |
11ffa9d60
|
127 128 129 |
if ((ctx->clockid == CLOCK_REALTIME || ctx->clockid == CLOCK_REALTIME_ALARM) && (flags & TFD_TIMER_ABSTIME) && (flags & TFD_TIMER_CANCEL_ON_SET)) { |
9ec269075
|
130 131 132 133 134 135 136 137 138 139 |
if (!ctx->might_cancel) { ctx->might_cancel = true; spin_lock(&cancel_lock); list_add_rcu(&ctx->clist, &cancel_list); spin_unlock(&cancel_lock); } } else if (ctx->might_cancel) { timerfd_remove_cancel(ctx); } } |
99ee5315d
|
140 |
|
9ec269075
|
141 142 143 |
static ktime_t timerfd_get_remaining(struct timerfd_ctx *ctx) { ktime_t remaining; |
99ee5315d
|
144 |
|
11ffa9d60
|
145 146 147 148 |
if (isalarm(ctx)) remaining = alarm_expires_remaining(&ctx->t.alarm); else remaining = hrtimer_expires_remaining(&ctx->t.tmr); |
9ec269075
|
149 |
return remaining.tv64 < 0 ? ktime_set(0, 0): remaining; |
99ee5315d
|
150 151 152 153 |
} static int timerfd_setup(struct timerfd_ctx *ctx, int flags, const struct itimerspec *ktmr) |
b215e2839
|
154 155 156 |
{ enum hrtimer_mode htmode; ktime_t texp; |
99ee5315d
|
157 |
int clockid = ctx->clockid; |
b215e2839
|
158 159 160 161 162 163 |
htmode = (flags & TFD_TIMER_ABSTIME) ? HRTIMER_MODE_ABS: HRTIMER_MODE_REL; texp = timespec_to_ktime(ktmr->it_value); ctx->expired = 0; |
4d672e7ac
|
164 |
ctx->ticks = 0; |
b215e2839
|
165 |
ctx->tintv = timespec_to_ktime(ktmr->it_interval); |
11ffa9d60
|
166 167 168 169 170 171 172 173 174 175 176 |
if (isalarm(ctx)) { alarm_init(&ctx->t.alarm, ctx->clockid == CLOCK_REALTIME_ALARM ? ALARM_REALTIME : ALARM_BOOTTIME, timerfd_alarmproc); } else { hrtimer_init(&ctx->t.tmr, clockid, htmode); hrtimer_set_expires(&ctx->t.tmr, texp); ctx->t.tmr.function = timerfd_tmrproc; } |
99ee5315d
|
177 |
if (texp.tv64 != 0) { |
11ffa9d60
|
178 179 180 181 182 183 184 185 |
if (isalarm(ctx)) { if (flags & TFD_TIMER_ABSTIME) alarm_start(&ctx->t.alarm, texp); else alarm_start_relative(&ctx->t.alarm, texp); } else { hrtimer_start(&ctx->t.tmr, texp, htmode); } |
99ee5315d
|
186 187 188 |
if (timerfd_canceled(ctx)) return -ECANCELED; } |
af9c4957c
|
189 190 |
ctx->settime_flags = flags & TFD_SETTIME_FLAGS; |
99ee5315d
|
191 |
return 0; |
b215e2839
|
192 193 194 195 196 |
} static int timerfd_release(struct inode *inode, struct file *file) { struct timerfd_ctx *ctx = file->private_data; |
9ec269075
|
197 |
timerfd_remove_cancel(ctx); |
11ffa9d60
|
198 199 200 201 202 |
if (isalarm(ctx)) alarm_cancel(&ctx->t.alarm); else hrtimer_cancel(&ctx->t.tmr); |
9ec269075
|
203 |
kfree_rcu(ctx, rcu); |
b215e2839
|
204 205 206 207 208 209 210 211 212 213 |
return 0; } static unsigned int timerfd_poll(struct file *file, poll_table *wait) { struct timerfd_ctx *ctx = file->private_data; unsigned int events = 0; unsigned long flags; poll_wait(file, &ctx->wqh, wait); |
18963c01b
|
214 |
spin_lock_irqsave(&ctx->wqh.lock, flags); |
4d672e7ac
|
215 |
if (ctx->ticks) |
b215e2839
|
216 |
events |= POLLIN; |
18963c01b
|
217 |
spin_unlock_irqrestore(&ctx->wqh.lock, flags); |
b215e2839
|
218 219 220 221 222 223 224 225 226 |
return events; } static ssize_t timerfd_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { struct timerfd_ctx *ctx = file->private_data; ssize_t res; |
098284020
|
227 |
u64 ticks = 0; |
b215e2839
|
228 229 230 |
if (count < sizeof(ticks)) return -EINVAL; |
18963c01b
|
231 |
spin_lock_irq(&ctx->wqh.lock); |
8120a8aad
|
232 233 234 235 |
if (file->f_flags & O_NONBLOCK) res = -EAGAIN; else res = wait_event_interruptible_locked_irq(ctx->wqh, ctx->ticks); |
99ee5315d
|
236 |
|
9ec269075
|
237 238 239 240 241 242 243 244 245 246 |
/* * If clock has changed, we do not care about the * ticks and we do not rearm the timer. Userspace must * reevaluate anyway. */ if (timerfd_canceled(ctx)) { ctx->ticks = 0; ctx->expired = 0; res = -ECANCELED; } |
4d672e7ac
|
247 248 |
if (ctx->ticks) { ticks = ctx->ticks; |
99ee5315d
|
249 |
|
4d672e7ac
|
250 |
if (ctx->expired && ctx->tintv.tv64) { |
b215e2839
|
251 252 253 254 255 256 |
/* * If tintv.tv64 != 0, this is a periodic timer that * needs to be re-armed. We avoid doing it in the timer * callback to avoid DoS attacks specifying a very * short timer period. */ |
11ffa9d60
|
257 258 259 260 261 262 263 264 265 |
if (isalarm(ctx)) { ticks += alarm_forward_now( &ctx->t.alarm, ctx->tintv) - 1; alarm_restart(&ctx->t.alarm); } else { ticks += hrtimer_forward_now(&ctx->t.tmr, ctx->tintv) - 1; hrtimer_restart(&ctx->t.tmr); } |
4d672e7ac
|
266 267 268 |
} ctx->expired = 0; ctx->ticks = 0; |
b215e2839
|
269 |
} |
18963c01b
|
270 |
spin_unlock_irq(&ctx->wqh.lock); |
b215e2839
|
271 |
if (ticks) |
098284020
|
272 |
res = put_user(ticks, (u64 __user *) buf) ? -EFAULT: sizeof(ticks); |
b215e2839
|
273 274 |
return res; } |
af9c4957c
|
275 |
#ifdef CONFIG_PROC_FS |
a3816ab0e
|
276 |
static void timerfd_show(struct seq_file *m, struct file *file) |
af9c4957c
|
277 278 279 280 281 282 283 284 |
{ struct timerfd_ctx *ctx = file->private_data; struct itimerspec t; spin_lock_irq(&ctx->wqh.lock); t.it_value = ktime_to_timespec(timerfd_get_remaining(ctx)); t.it_interval = ktime_to_timespec(ctx->tintv); spin_unlock_irq(&ctx->wqh.lock); |
a3816ab0e
|
285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 |
seq_printf(m, "clockid: %d " "ticks: %llu " "settime flags: 0%o " "it_value: (%llu, %llu) " "it_interval: (%llu, %llu) ", ctx->clockid, (unsigned long long)ctx->ticks, ctx->settime_flags, (unsigned long long)t.it_value.tv_sec, (unsigned long long)t.it_value.tv_nsec, (unsigned long long)t.it_interval.tv_sec, (unsigned long long)t.it_interval.tv_nsec); |
af9c4957c
|
303 304 305 306 |
} #else #define timerfd_show NULL #endif |
5442e9fbd
|
307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 |
#ifdef CONFIG_CHECKPOINT_RESTORE static long timerfd_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { struct timerfd_ctx *ctx = file->private_data; int ret = 0; switch (cmd) { case TFD_IOC_SET_TICKS: { u64 ticks; if (copy_from_user(&ticks, (u64 __user *)arg, sizeof(ticks))) return -EFAULT; if (!ticks) return -EINVAL; spin_lock_irq(&ctx->wqh.lock); if (!timerfd_canceled(ctx)) { ctx->ticks = ticks; |
88299c9bd
|
325 |
wake_up_locked(&ctx->wqh); |
5442e9fbd
|
326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 |
} else ret = -ECANCELED; spin_unlock_irq(&ctx->wqh.lock); break; } default: ret = -ENOTTY; break; } return ret; } #else #define timerfd_ioctl NULL #endif |
b215e2839
|
341 342 343 344 |
static const struct file_operations timerfd_fops = { .release = timerfd_release, .poll = timerfd_poll, .read = timerfd_read, |
6038f373a
|
345 |
.llseek = noop_llseek, |
af9c4957c
|
346 |
.show_fdinfo = timerfd_show, |
5442e9fbd
|
347 |
.unlocked_ioctl = timerfd_ioctl, |
b215e2839
|
348 |
}; |
2903ff019
|
349 |
static int timerfd_fget(int fd, struct fd *p) |
4d672e7ac
|
350 |
{ |
2903ff019
|
351 352 353 354 355 356 |
struct fd f = fdget(fd); if (!f.file) return -EBADF; if (f.file->f_op != &timerfd_fops) { fdput(f); return -EINVAL; |
4d672e7ac
|
357 |
} |
2903ff019
|
358 359 |
*p = f; return 0; |
4d672e7ac
|
360 |
} |
836f92adf
|
361 |
SYSCALL_DEFINE2(timerfd_create, int, clockid, int, flags) |
b215e2839
|
362 |
{ |
2030a42ce
|
363 |
int ufd; |
b215e2839
|
364 |
struct timerfd_ctx *ctx; |
b215e2839
|
365 |
|
e38b36f32
|
366 367 368 |
/* Check the TFD_* constants for consistency. */ BUILD_BUG_ON(TFD_CLOEXEC != O_CLOEXEC); BUILD_BUG_ON(TFD_NONBLOCK != O_NONBLOCK); |
610d18f41
|
369 370 |
if ((flags & ~TFD_CREATE_FLAGS) || (clockid != CLOCK_MONOTONIC && |
11ffa9d60
|
371 372 |
clockid != CLOCK_REALTIME && clockid != CLOCK_REALTIME_ALARM && |
4a2378a94
|
373 |
clockid != CLOCK_BOOTTIME && |
11ffa9d60
|
374 |
clockid != CLOCK_BOOTTIME_ALARM)) |
b215e2839
|
375 |
return -EINVAL; |
4d672e7ac
|
376 377 378 379 380 381 382 |
ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); if (!ctx) return -ENOMEM; init_waitqueue_head(&ctx->wqh); ctx->clockid = clockid; |
11ffa9d60
|
383 384 385 386 387 388 389 390 |
if (isalarm(ctx)) alarm_init(&ctx->t.alarm, ctx->clockid == CLOCK_REALTIME_ALARM ? ALARM_REALTIME : ALARM_BOOTTIME, timerfd_alarmproc); else hrtimer_init(&ctx->t.tmr, clockid, HRTIMER_MODE_ABS); |
53cc7bad3
|
391 |
ctx->moffs = ktime_mono_to_real((ktime_t){ .tv64 = 0 }); |
4d672e7ac
|
392 |
|
11fcb6c14
|
393 |
ufd = anon_inode_getfd("[timerfd]", &timerfd_fops, ctx, |
628ff7c1d
|
394 |
O_RDWR | (flags & TFD_SHARED_FCNTL_FLAGS)); |
2030a42ce
|
395 |
if (ufd < 0) |
4d672e7ac
|
396 |
kfree(ctx); |
4d672e7ac
|
397 398 399 |
return ufd; } |
9d94b9e2f
|
400 401 402 |
static int do_timerfd_settime(int ufd, int flags, const struct itimerspec *new, struct itimerspec *old) |
4d672e7ac
|
403 |
{ |
2903ff019
|
404 |
struct fd f; |
4d672e7ac
|
405 |
struct timerfd_ctx *ctx; |
2903ff019
|
406 |
int ret; |
4d672e7ac
|
407 |
|
610d18f41
|
408 |
if ((flags & ~TFD_SETTIME_FLAGS) || |
9d94b9e2f
|
409 410 |
!timespec_valid(&new->it_value) || !timespec_valid(&new->it_interval)) |
b215e2839
|
411 |
return -EINVAL; |
2903ff019
|
412 413 414 415 |
ret = timerfd_fget(ufd, &f); if (ret) return ret; ctx = f.file->private_data; |
b215e2839
|
416 |
|
9ec269075
|
417 |
timerfd_setup_cancel(ctx, flags); |
4d672e7ac
|
418 419 420 421 422 423 |
/* * We need to stop the existing timer before reprogramming * it to the new values. */ for (;;) { spin_lock_irq(&ctx->wqh.lock); |
11ffa9d60
|
424 425 426 427 428 429 430 431 |
if (isalarm(ctx)) { if (alarm_try_to_cancel(&ctx->t.alarm) >= 0) break; } else { if (hrtimer_try_to_cancel(&ctx->t.tmr) >= 0) break; } |
18963c01b
|
432 |
spin_unlock_irq(&ctx->wqh.lock); |
4d672e7ac
|
433 |
cpu_relax(); |
b215e2839
|
434 |
} |
4d672e7ac
|
435 436 437 438 439 440 |
/* * If the timer is expired and it's periodic, we need to advance it * because the caller may want to know the previous expiration time. * We do not update "ticks" and "expired" since the timer will be * re-programmed again in the following timerfd_setup() call. */ |
11ffa9d60
|
441 442 443 444 445 446 |
if (ctx->expired && ctx->tintv.tv64) { if (isalarm(ctx)) alarm_forward_now(&ctx->t.alarm, ctx->tintv); else hrtimer_forward_now(&ctx->t.tmr, ctx->tintv); } |
b215e2839
|
447 |
|
9d94b9e2f
|
448 449 |
old->it_value = ktime_to_timespec(timerfd_get_remaining(ctx)); old->it_interval = ktime_to_timespec(ctx->tintv); |
4d672e7ac
|
450 451 452 453 |
/* * Re-program the timer to the new value ... */ |
9d94b9e2f
|
454 |
ret = timerfd_setup(ctx, flags, new); |
4d672e7ac
|
455 456 |
spin_unlock_irq(&ctx->wqh.lock); |
2903ff019
|
457 |
fdput(f); |
99ee5315d
|
458 |
return ret; |
4d672e7ac
|
459 |
} |
9d94b9e2f
|
460 |
static int do_timerfd_gettime(int ufd, struct itimerspec *t) |
4d672e7ac
|
461 |
{ |
2903ff019
|
462 |
struct fd f; |
4d672e7ac
|
463 |
struct timerfd_ctx *ctx; |
2903ff019
|
464 465 466 467 |
int ret = timerfd_fget(ufd, &f); if (ret) return ret; ctx = f.file->private_data; |
4d672e7ac
|
468 469 470 471 |
spin_lock_irq(&ctx->wqh.lock); if (ctx->expired && ctx->tintv.tv64) { ctx->expired = 0; |
11ffa9d60
|
472 473 474 475 476 477 478 479 480 481 482 483 |
if (isalarm(ctx)) { ctx->ticks += alarm_forward_now( &ctx->t.alarm, ctx->tintv) - 1; alarm_restart(&ctx->t.alarm); } else { ctx->ticks += hrtimer_forward_now(&ctx->t.tmr, ctx->tintv) - 1; hrtimer_restart(&ctx->t.tmr); } |
4d672e7ac
|
484 |
} |
9d94b9e2f
|
485 486 |
t->it_value = ktime_to_timespec(timerfd_get_remaining(ctx)); t->it_interval = ktime_to_timespec(ctx->tintv); |
4d672e7ac
|
487 |
spin_unlock_irq(&ctx->wqh.lock); |
2903ff019
|
488 |
fdput(f); |
9d94b9e2f
|
489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 |
return 0; } SYSCALL_DEFINE4(timerfd_settime, int, ufd, int, flags, const struct itimerspec __user *, utmr, struct itimerspec __user *, otmr) { struct itimerspec new, old; int ret; if (copy_from_user(&new, utmr, sizeof(new))) return -EFAULT; ret = do_timerfd_settime(ufd, flags, &new, &old); if (ret) return ret; if (otmr && copy_to_user(otmr, &old, sizeof(old))) return -EFAULT; return ret; } |
4d672e7ac
|
509 |
|
9d94b9e2f
|
510 511 512 513 514 515 |
SYSCALL_DEFINE2(timerfd_gettime, int, ufd, struct itimerspec __user *, otmr) { struct itimerspec kotmr; int ret = do_timerfd_gettime(ufd, &kotmr); if (ret) return ret; |
4d672e7ac
|
516 |
return copy_to_user(otmr, &kotmr, sizeof(kotmr)) ? -EFAULT: 0; |
b215e2839
|
517 |
} |
0e803bafb
|
518 |
#ifdef CONFIG_COMPAT |
9d94b9e2f
|
519 |
COMPAT_SYSCALL_DEFINE4(timerfd_settime, int, ufd, int, flags, |
0e803bafb
|
520 521 |
const struct compat_itimerspec __user *, utmr, struct compat_itimerspec __user *, otmr) |
9d94b9e2f
|
522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 |
{ struct itimerspec new, old; int ret; if (get_compat_itimerspec(&new, utmr)) return -EFAULT; ret = do_timerfd_settime(ufd, flags, &new, &old); if (ret) return ret; if (otmr && put_compat_itimerspec(otmr, &old)) return -EFAULT; return ret; } COMPAT_SYSCALL_DEFINE2(timerfd_gettime, int, ufd, |
0e803bafb
|
537 |
struct compat_itimerspec __user *, otmr) |
9d94b9e2f
|
538 539 540 541 542 |
{ struct itimerspec kotmr; int ret = do_timerfd_gettime(ufd, &kotmr); if (ret) return ret; |
0e803bafb
|
543 |
return put_compat_itimerspec(otmr, &kotmr) ? -EFAULT: 0; |
9d94b9e2f
|
544 545 |
} #endif |