Blame view
fs/timerfd.c
11.4 KB
b215e2839
|
1 2 3 4 5 6 7 8 9 |
/* * fs/timerfd.c * * Copyright (C) 2007 Davide Libenzi <davidel@xmailserver.org> * * * Thanks to Thomas Gleixner for code reviews and useful comments. * */ |
11ffa9d60
|
10 |
#include <linux/alarmtimer.h> |
b215e2839
|
11 12 13 14 15 16 |
#include <linux/file.h> #include <linux/poll.h> #include <linux/init.h> #include <linux/fs.h> #include <linux/sched.h> #include <linux/kernel.h> |
5a0e3ad6a
|
17 |
#include <linux/slab.h> |
b215e2839
|
18 19 20 21 22 23 |
#include <linux/list.h> #include <linux/spinlock.h> #include <linux/time.h> #include <linux/hrtimer.h> #include <linux/anon_inodes.h> #include <linux/timerfd.h> |
45cc2b96f
|
24 |
#include <linux/syscalls.h> |
9d94b9e2f
|
25 |
#include <linux/compat.h> |
9ec269075
|
26 |
#include <linux/rcupdate.h> |
b215e2839
|
27 28 |
struct timerfd_ctx { |
11ffa9d60
|
29 30 31 32 |
union { struct hrtimer tmr; struct alarm alarm; } t; |
b215e2839
|
33 |
ktime_t tintv; |
99ee5315d
|
34 |
ktime_t moffs; |
b215e2839
|
35 |
wait_queue_head_t wqh; |
4d672e7ac
|
36 |
u64 ticks; |
b215e2839
|
37 |
int expired; |
4d672e7ac
|
38 |
int clockid; |
9ec269075
|
39 40 |
struct rcu_head rcu; struct list_head clist; |
99ee5315d
|
41 |
bool might_cancel; |
b215e2839
|
42 |
}; |
9ec269075
|
43 44 |
static LIST_HEAD(cancel_list); static DEFINE_SPINLOCK(cancel_lock); |
11ffa9d60
|
45 46 47 48 49 |
static inline bool isalarm(struct timerfd_ctx *ctx) { return ctx->clockid == CLOCK_REALTIME_ALARM || ctx->clockid == CLOCK_BOOTTIME_ALARM; } |
b215e2839
|
50 51 52 |
/* * This gets called when the timer event triggers. We set the "expired" * flag, but we do not re-arm the timer (in case it's necessary, |
4d672e7ac
|
53 |
* tintv.tv64 != 0) until the timer is accessed. |
b215e2839
|
54 |
*/ |
11ffa9d60
|
55 |
static void timerfd_triggered(struct timerfd_ctx *ctx) |
b215e2839
|
56 |
{ |
b215e2839
|
57 |
unsigned long flags; |
18963c01b
|
58 |
spin_lock_irqsave(&ctx->wqh.lock, flags); |
b215e2839
|
59 |
ctx->expired = 1; |
4d672e7ac
|
60 |
ctx->ticks++; |
b215e2839
|
61 |
wake_up_locked(&ctx->wqh); |
18963c01b
|
62 |
spin_unlock_irqrestore(&ctx->wqh.lock, flags); |
11ffa9d60
|
63 |
} |
b215e2839
|
64 |
|
11ffa9d60
|
65 66 67 68 69 |
static enum hrtimer_restart timerfd_tmrproc(struct hrtimer *htmr) { struct timerfd_ctx *ctx = container_of(htmr, struct timerfd_ctx, t.tmr); timerfd_triggered(ctx); |
b215e2839
|
70 71 |
return HRTIMER_NORESTART; } |
11ffa9d60
|
72 73 74 75 76 77 78 79 |
static enum alarmtimer_restart timerfd_alarmproc(struct alarm *alarm, ktime_t now) { struct timerfd_ctx *ctx = container_of(alarm, struct timerfd_ctx, t.alarm); timerfd_triggered(ctx); return ALARMTIMER_NORESTART; } |
9ec269075
|
80 81 |
/* * Called when the clock was set to cancel the timers in the cancel |
1123d9396
|
82 83 84 |
* list. This will wake up processes waiting on these timers. The * wake-up requires ctx->ticks to be non zero, therefore we increment * it before calling wake_up_locked(). |
9ec269075
|
85 86 |
*/ void timerfd_clock_was_set(void) |
4d672e7ac
|
87 |
{ |
9ec269075
|
88 89 90 |
ktime_t moffs = ktime_get_monotonic_offset(); struct timerfd_ctx *ctx; unsigned long flags; |
4d672e7ac
|
91 |
|
9ec269075
|
92 93 94 95 96 97 98 |
rcu_read_lock(); list_for_each_entry_rcu(ctx, &cancel_list, clist) { if (!ctx->might_cancel) continue; spin_lock_irqsave(&ctx->wqh.lock, flags); if (ctx->moffs.tv64 != moffs.tv64) { ctx->moffs.tv64 = KTIME_MAX; |
1123d9396
|
99 |
ctx->ticks++; |
9ec269075
|
100 101 102 103 104 |
wake_up_locked(&ctx->wqh); } spin_unlock_irqrestore(&ctx->wqh.lock, flags); } rcu_read_unlock(); |
4d672e7ac
|
105 |
} |
9ec269075
|
106 |
static void timerfd_remove_cancel(struct timerfd_ctx *ctx) |
99ee5315d
|
107 |
{ |
9ec269075
|
108 109 110 111 112 113 114 |
if (ctx->might_cancel) { ctx->might_cancel = false; spin_lock(&cancel_lock); list_del_rcu(&ctx->clist); spin_unlock(&cancel_lock); } } |
99ee5315d
|
115 |
|
9ec269075
|
116 117 118 |
static bool timerfd_canceled(struct timerfd_ctx *ctx) { if (!ctx->might_cancel || ctx->moffs.tv64 != KTIME_MAX) |
99ee5315d
|
119 |
return false; |
9ec269075
|
120 121 122 |
ctx->moffs = ktime_get_monotonic_offset(); return true; } |
99ee5315d
|
123 |
|
9ec269075
|
124 125 |
static void timerfd_setup_cancel(struct timerfd_ctx *ctx, int flags) { |
11ffa9d60
|
126 127 128 |
if ((ctx->clockid == CLOCK_REALTIME || ctx->clockid == CLOCK_REALTIME_ALARM) && (flags & TFD_TIMER_ABSTIME) && (flags & TFD_TIMER_CANCEL_ON_SET)) { |
9ec269075
|
129 130 131 132 133 134 135 136 137 138 |
if (!ctx->might_cancel) { ctx->might_cancel = true; spin_lock(&cancel_lock); list_add_rcu(&ctx->clist, &cancel_list); spin_unlock(&cancel_lock); } } else if (ctx->might_cancel) { timerfd_remove_cancel(ctx); } } |
99ee5315d
|
139 |
|
9ec269075
|
140 141 142 |
static ktime_t timerfd_get_remaining(struct timerfd_ctx *ctx) { ktime_t remaining; |
99ee5315d
|
143 |
|
11ffa9d60
|
144 145 146 147 |
if (isalarm(ctx)) remaining = alarm_expires_remaining(&ctx->t.alarm); else remaining = hrtimer_expires_remaining(&ctx->t.tmr); |
9ec269075
|
148 |
return remaining.tv64 < 0 ? ktime_set(0, 0): remaining; |
99ee5315d
|
149 150 151 152 |
} static int timerfd_setup(struct timerfd_ctx *ctx, int flags, const struct itimerspec *ktmr) |
b215e2839
|
153 154 155 |
{ enum hrtimer_mode htmode; ktime_t texp; |
99ee5315d
|
156 |
int clockid = ctx->clockid; |
b215e2839
|
157 158 159 160 161 162 |
htmode = (flags & TFD_TIMER_ABSTIME) ? HRTIMER_MODE_ABS: HRTIMER_MODE_REL; texp = timespec_to_ktime(ktmr->it_value); ctx->expired = 0; |
4d672e7ac
|
163 |
ctx->ticks = 0; |
b215e2839
|
164 |
ctx->tintv = timespec_to_ktime(ktmr->it_interval); |
11ffa9d60
|
165 166 167 168 169 170 171 172 173 174 175 |
if (isalarm(ctx)) { alarm_init(&ctx->t.alarm, ctx->clockid == CLOCK_REALTIME_ALARM ? ALARM_REALTIME : ALARM_BOOTTIME, timerfd_alarmproc); } else { hrtimer_init(&ctx->t.tmr, clockid, htmode); hrtimer_set_expires(&ctx->t.tmr, texp); ctx->t.tmr.function = timerfd_tmrproc; } |
99ee5315d
|
176 |
if (texp.tv64 != 0) { |
11ffa9d60
|
177 178 179 180 181 182 183 184 |
if (isalarm(ctx)) { if (flags & TFD_TIMER_ABSTIME) alarm_start(&ctx->t.alarm, texp); else alarm_start_relative(&ctx->t.alarm, texp); } else { hrtimer_start(&ctx->t.tmr, texp, htmode); } |
99ee5315d
|
185 186 187 188 |
if (timerfd_canceled(ctx)) return -ECANCELED; } return 0; |
b215e2839
|
189 190 191 192 193 |
} static int timerfd_release(struct inode *inode, struct file *file) { struct timerfd_ctx *ctx = file->private_data; |
9ec269075
|
194 |
timerfd_remove_cancel(ctx); |
11ffa9d60
|
195 196 197 198 199 |
if (isalarm(ctx)) alarm_cancel(&ctx->t.alarm); else hrtimer_cancel(&ctx->t.tmr); |
9ec269075
|
200 |
kfree_rcu(ctx, rcu); |
b215e2839
|
201 202 203 204 205 206 207 208 209 210 |
return 0; } static unsigned int timerfd_poll(struct file *file, poll_table *wait) { struct timerfd_ctx *ctx = file->private_data; unsigned int events = 0; unsigned long flags; poll_wait(file, &ctx->wqh, wait); |
18963c01b
|
211 |
spin_lock_irqsave(&ctx->wqh.lock, flags); |
4d672e7ac
|
212 |
if (ctx->ticks) |
b215e2839
|
213 |
events |= POLLIN; |
18963c01b
|
214 |
spin_unlock_irqrestore(&ctx->wqh.lock, flags); |
b215e2839
|
215 216 217 218 219 220 221 222 223 |
return events; } static ssize_t timerfd_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { struct timerfd_ctx *ctx = file->private_data; ssize_t res; |
098284020
|
224 |
u64 ticks = 0; |
b215e2839
|
225 226 227 |
if (count < sizeof(ticks)) return -EINVAL; |
18963c01b
|
228 |
spin_lock_irq(&ctx->wqh.lock); |
8120a8aad
|
229 230 231 232 |
if (file->f_flags & O_NONBLOCK) res = -EAGAIN; else res = wait_event_interruptible_locked_irq(ctx->wqh, ctx->ticks); |
99ee5315d
|
233 |
|
9ec269075
|
234 235 236 237 238 239 240 241 242 243 |
/* * If clock has changed, we do not care about the * ticks and we do not rearm the timer. Userspace must * reevaluate anyway. */ if (timerfd_canceled(ctx)) { ctx->ticks = 0; ctx->expired = 0; res = -ECANCELED; } |
4d672e7ac
|
244 245 |
if (ctx->ticks) { ticks = ctx->ticks; |
99ee5315d
|
246 |
|
4d672e7ac
|
247 |
if (ctx->expired && ctx->tintv.tv64) { |
b215e2839
|
248 249 250 251 252 253 |
/* * If tintv.tv64 != 0, this is a periodic timer that * needs to be re-armed. We avoid doing it in the timer * callback to avoid DoS attacks specifying a very * short timer period. */ |
11ffa9d60
|
254 255 256 257 258 259 260 261 262 |
if (isalarm(ctx)) { ticks += alarm_forward_now( &ctx->t.alarm, ctx->tintv) - 1; alarm_restart(&ctx->t.alarm); } else { ticks += hrtimer_forward_now(&ctx->t.tmr, ctx->tintv) - 1; hrtimer_restart(&ctx->t.tmr); } |
4d672e7ac
|
263 264 265 |
} ctx->expired = 0; ctx->ticks = 0; |
b215e2839
|
266 |
} |
18963c01b
|
267 |
spin_unlock_irq(&ctx->wqh.lock); |
b215e2839
|
268 |
if (ticks) |
098284020
|
269 |
res = put_user(ticks, (u64 __user *) buf) ? -EFAULT: sizeof(ticks); |
b215e2839
|
270 271 272 273 274 275 276 |
return res; } static const struct file_operations timerfd_fops = { .release = timerfd_release, .poll = timerfd_poll, .read = timerfd_read, |
6038f373a
|
277 |
.llseek = noop_llseek, |
b215e2839
|
278 |
}; |
2903ff019
|
279 |
static int timerfd_fget(int fd, struct fd *p) |
4d672e7ac
|
280 |
{ |
2903ff019
|
281 282 283 284 285 286 |
struct fd f = fdget(fd); if (!f.file) return -EBADF; if (f.file->f_op != &timerfd_fops) { fdput(f); return -EINVAL; |
4d672e7ac
|
287 |
} |
2903ff019
|
288 289 |
*p = f; return 0; |
4d672e7ac
|
290 |
} |
836f92adf
|
291 |
SYSCALL_DEFINE2(timerfd_create, int, clockid, int, flags) |
b215e2839
|
292 |
{ |
2030a42ce
|
293 |
int ufd; |
b215e2839
|
294 |
struct timerfd_ctx *ctx; |
b215e2839
|
295 |
|
e38b36f32
|
296 297 298 |
/* Check the TFD_* constants for consistency. */ BUILD_BUG_ON(TFD_CLOEXEC != O_CLOEXEC); BUILD_BUG_ON(TFD_NONBLOCK != O_NONBLOCK); |
610d18f41
|
299 300 |
if ((flags & ~TFD_CREATE_FLAGS) || (clockid != CLOCK_MONOTONIC && |
11ffa9d60
|
301 302 |
clockid != CLOCK_REALTIME && clockid != CLOCK_REALTIME_ALARM && |
4a2378a94
|
303 |
clockid != CLOCK_BOOTTIME && |
11ffa9d60
|
304 |
clockid != CLOCK_BOOTTIME_ALARM)) |
b215e2839
|
305 |
return -EINVAL; |
4d672e7ac
|
306 307 308 309 310 311 312 |
ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); if (!ctx) return -ENOMEM; init_waitqueue_head(&ctx->wqh); ctx->clockid = clockid; |
11ffa9d60
|
313 314 315 316 317 318 319 320 |
if (isalarm(ctx)) alarm_init(&ctx->t.alarm, ctx->clockid == CLOCK_REALTIME_ALARM ? ALARM_REALTIME : ALARM_BOOTTIME, timerfd_alarmproc); else hrtimer_init(&ctx->t.tmr, clockid, HRTIMER_MODE_ABS); |
99ee5315d
|
321 |
ctx->moffs = ktime_get_monotonic_offset(); |
4d672e7ac
|
322 |
|
11fcb6c14
|
323 |
ufd = anon_inode_getfd("[timerfd]", &timerfd_fops, ctx, |
628ff7c1d
|
324 |
O_RDWR | (flags & TFD_SHARED_FCNTL_FLAGS)); |
2030a42ce
|
325 |
if (ufd < 0) |
4d672e7ac
|
326 |
kfree(ctx); |
4d672e7ac
|
327 328 329 |
return ufd; } |
9d94b9e2f
|
330 331 332 |
static int do_timerfd_settime(int ufd, int flags, const struct itimerspec *new, struct itimerspec *old) |
4d672e7ac
|
333 |
{ |
2903ff019
|
334 |
struct fd f; |
4d672e7ac
|
335 |
struct timerfd_ctx *ctx; |
2903ff019
|
336 |
int ret; |
4d672e7ac
|
337 |
|
610d18f41
|
338 |
if ((flags & ~TFD_SETTIME_FLAGS) || |
9d94b9e2f
|
339 340 |
!timespec_valid(&new->it_value) || !timespec_valid(&new->it_interval)) |
b215e2839
|
341 |
return -EINVAL; |
2903ff019
|
342 343 344 345 |
ret = timerfd_fget(ufd, &f); if (ret) return ret; ctx = f.file->private_data; |
b215e2839
|
346 |
|
9ec269075
|
347 |
timerfd_setup_cancel(ctx, flags); |
4d672e7ac
|
348 349 350 351 352 353 |
/* * We need to stop the existing timer before reprogramming * it to the new values. */ for (;;) { spin_lock_irq(&ctx->wqh.lock); |
11ffa9d60
|
354 355 356 357 358 359 360 361 |
if (isalarm(ctx)) { if (alarm_try_to_cancel(&ctx->t.alarm) >= 0) break; } else { if (hrtimer_try_to_cancel(&ctx->t.tmr) >= 0) break; } |
18963c01b
|
362 |
spin_unlock_irq(&ctx->wqh.lock); |
4d672e7ac
|
363 |
cpu_relax(); |
b215e2839
|
364 |
} |
4d672e7ac
|
365 366 367 368 369 370 |
/* * If the timer is expired and it's periodic, we need to advance it * because the caller may want to know the previous expiration time. * We do not update "ticks" and "expired" since the timer will be * re-programmed again in the following timerfd_setup() call. */ |
11ffa9d60
|
371 372 373 374 375 376 |
if (ctx->expired && ctx->tintv.tv64) { if (isalarm(ctx)) alarm_forward_now(&ctx->t.alarm, ctx->tintv); else hrtimer_forward_now(&ctx->t.tmr, ctx->tintv); } |
b215e2839
|
377 |
|
9d94b9e2f
|
378 379 |
old->it_value = ktime_to_timespec(timerfd_get_remaining(ctx)); old->it_interval = ktime_to_timespec(ctx->tintv); |
4d672e7ac
|
380 381 382 383 |
/* * Re-program the timer to the new value ... */ |
9d94b9e2f
|
384 |
ret = timerfd_setup(ctx, flags, new); |
4d672e7ac
|
385 386 |
spin_unlock_irq(&ctx->wqh.lock); |
2903ff019
|
387 |
fdput(f); |
99ee5315d
|
388 |
return ret; |
4d672e7ac
|
389 |
} |
9d94b9e2f
|
390 |
static int do_timerfd_gettime(int ufd, struct itimerspec *t) |
4d672e7ac
|
391 |
{ |
2903ff019
|
392 |
struct fd f; |
4d672e7ac
|
393 |
struct timerfd_ctx *ctx; |
2903ff019
|
394 395 396 397 |
int ret = timerfd_fget(ufd, &f); if (ret) return ret; ctx = f.file->private_data; |
4d672e7ac
|
398 399 400 401 |
spin_lock_irq(&ctx->wqh.lock); if (ctx->expired && ctx->tintv.tv64) { ctx->expired = 0; |
11ffa9d60
|
402 403 404 405 406 407 408 409 410 411 412 413 |
if (isalarm(ctx)) { ctx->ticks += alarm_forward_now( &ctx->t.alarm, ctx->tintv) - 1; alarm_restart(&ctx->t.alarm); } else { ctx->ticks += hrtimer_forward_now(&ctx->t.tmr, ctx->tintv) - 1; hrtimer_restart(&ctx->t.tmr); } |
4d672e7ac
|
414 |
} |
9d94b9e2f
|
415 416 |
t->it_value = ktime_to_timespec(timerfd_get_remaining(ctx)); t->it_interval = ktime_to_timespec(ctx->tintv); |
4d672e7ac
|
417 |
spin_unlock_irq(&ctx->wqh.lock); |
2903ff019
|
418 |
fdput(f); |
9d94b9e2f
|
419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 |
return 0; } SYSCALL_DEFINE4(timerfd_settime, int, ufd, int, flags, const struct itimerspec __user *, utmr, struct itimerspec __user *, otmr) { struct itimerspec new, old; int ret; if (copy_from_user(&new, utmr, sizeof(new))) return -EFAULT; ret = do_timerfd_settime(ufd, flags, &new, &old); if (ret) return ret; if (otmr && copy_to_user(otmr, &old, sizeof(old))) return -EFAULT; return ret; } |
4d672e7ac
|
439 |
|
9d94b9e2f
|
440 441 442 443 444 445 |
SYSCALL_DEFINE2(timerfd_gettime, int, ufd, struct itimerspec __user *, otmr) { struct itimerspec kotmr; int ret = do_timerfd_gettime(ufd, &kotmr); if (ret) return ret; |
4d672e7ac
|
446 |
return copy_to_user(otmr, &kotmr, sizeof(kotmr)) ? -EFAULT: 0; |
b215e2839
|
447 |
} |
0e803bafb
|
448 |
#ifdef CONFIG_COMPAT |
9d94b9e2f
|
449 |
COMPAT_SYSCALL_DEFINE4(timerfd_settime, int, ufd, int, flags, |
0e803bafb
|
450 451 |
const struct compat_itimerspec __user *, utmr, struct compat_itimerspec __user *, otmr) |
9d94b9e2f
|
452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 |
{ struct itimerspec new, old; int ret; if (get_compat_itimerspec(&new, utmr)) return -EFAULT; ret = do_timerfd_settime(ufd, flags, &new, &old); if (ret) return ret; if (otmr && put_compat_itimerspec(otmr, &old)) return -EFAULT; return ret; } COMPAT_SYSCALL_DEFINE2(timerfd_gettime, int, ufd, |
0e803bafb
|
467 |
struct compat_itimerspec __user *, otmr) |
9d94b9e2f
|
468 469 470 471 472 |
{ struct itimerspec kotmr; int ret = do_timerfd_gettime(ufd, &kotmr); if (ret) return ret; |
0e803bafb
|
473 |
return put_compat_itimerspec(otmr, &kotmr) ? -EFAULT: 0; |
9d94b9e2f
|
474 475 |
} #endif |