Commit 54cdfdb47f73b5af3d1ebb0f1e383efbe70fde9e

Authored by Thomas Gleixner
Committed by Linus Torvalds
1 parent d40891e75f

[PATCH] hrtimers: add high resolution timer support

Implement high resolution timers on top of the hrtimers infrastructure and the
clockevents / tick-management framework.  This provides accurate timers for
all hrtimer subsystem users.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Cc: john stultz <johnstul@us.ibm.com>
Cc: Roman Zippel <zippel@linux-m68k.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

Showing 8 changed files with 650 additions and 58 deletions Side-by-side Diff

Documentation/kernel-parameters.txt
... ... @@ -609,6 +609,10 @@
609 609 highmem otherwise. This also works to reduce highmem
610 610 size on bigger boxes.
611 611  
  612 + highres= [KNL] Enable/disable high resolution timer mode.
  613 + Valid parameters: "on", "off"
  614 + Default: "on"
  615 +
612 616 hisax= [HW,ISDN]
613 617 See Documentation/isdn/README.HiSax.
614 618  
include/linux/hrtimer.h
... ... @@ -41,16 +41,35 @@
41 41 };
42 42  
43 43 /*
44   - * Bit values to track state of the timer
  44 + * hrtimer callback modes:
45 45 *
  46 + * HRTIMER_CB_SOFTIRQ: Callback must run in softirq context
  47 + * HRTIMER_CB_IRQSAFE: Callback may run in hardirq context
  48 + * HRTIMER_CB_IRQSAFE_NO_RESTART: Callback may run in hardirq context and
  49 + * does not restart the timer
  50 + * HRTIMER_CB_IRQSAFE_NO_SOFTIRQ: Callback must run in softirq context
  51 + * Special mode for tick emultation
  52 + */
  53 +enum hrtimer_cb_mode {
  54 + HRTIMER_CB_SOFTIRQ,
  55 + HRTIMER_CB_IRQSAFE,
  56 + HRTIMER_CB_IRQSAFE_NO_RESTART,
  57 + HRTIMER_CB_IRQSAFE_NO_SOFTIRQ,
  58 +};
  59 +
  60 +/*
  61 + * Values to track state of the timer
  62 + *
46 63 * Possible states:
47 64 *
48 65 * 0x00 inactive
49 66 * 0x01 enqueued into rbtree
50 67 * 0x02 callback function running
  68 + * 0x04 callback pending (high resolution mode)
  69 + *
  70 + * Special case:
51 71 * 0x03 callback function running and enqueued
52 72 * (was requeued on another CPU)
53   - *
54 73 * The "callback function running and enqueued" status is only possible on
55 74 * SMP. It happens for example when a posix timer expired and the callback
56 75 * queued a signal. Between dropping the lock which protects the posix timer
... ... @@ -67,6 +86,7 @@
67 86 #define HRTIMER_STATE_INACTIVE 0x00
68 87 #define HRTIMER_STATE_ENQUEUED 0x01
69 88 #define HRTIMER_STATE_CALLBACK 0x02
  89 +#define HRTIMER_STATE_PENDING 0x04
70 90  
71 91 /**
72 92 * struct hrtimer - the basic hrtimer structure
73 93  
... ... @@ -77,8 +97,17 @@
77 97 * @function: timer expiry callback function
78 98 * @base: pointer to the timer base (per cpu and per clock)
79 99 * @state: state information (See bit values above)
  100 + * @cb_mode: high resolution timer feature to select the callback execution
  101 + * mode
  102 + * @cb_entry: list head to enqueue an expired timer into the callback list
  103 + * @start_site: timer statistics field to store the site where the timer
  104 + * was started
  105 + * @start_comm: timer statistics field to store the name of the process which
  106 + * started the timer
  107 + * @start_pid: timer statistics field to store the pid of the task which
  108 + * started the timer
80 109 *
81   - * The hrtimer structure must be initialized by init_hrtimer_#CLOCKTYPE()
  110 + * The hrtimer structure must be initialized by hrtimer_init()
82 111 */
83 112 struct hrtimer {
84 113 struct rb_node node;
... ... @@ -86,6 +115,10 @@
86 115 enum hrtimer_restart (*function)(struct hrtimer *);
87 116 struct hrtimer_clock_base *base;
88 117 unsigned long state;
  118 +#ifdef CONFIG_HIGH_RES_TIMERS
  119 + enum hrtimer_cb_mode cb_mode;
  120 + struct list_head cb_entry;
  121 +#endif
89 122 };
90 123  
91 124 /**
... ... @@ -110,6 +143,9 @@
110 143 * @get_time: function to retrieve the current time of the clock
111 144 * @get_softirq_time: function to retrieve the current time from the softirq
112 145 * @softirq_time: the time when running the hrtimer queue in the softirq
  146 + * @cb_pending: list of timers where the callback is pending
  147 + * @offset: offset of this clock to the monotonic base
  148 + * @reprogram: function to reprogram the timer event
113 149 */
114 150 struct hrtimer_clock_base {
115 151 struct hrtimer_cpu_base *cpu_base;
... ... @@ -120,6 +156,12 @@
120 156 ktime_t (*get_time)(void);
121 157 ktime_t (*get_softirq_time)(void);
122 158 ktime_t softirq_time;
  159 +#ifdef CONFIG_HIGH_RES_TIMERS
  160 + ktime_t offset;
  161 + int (*reprogram)(struct hrtimer *t,
  162 + struct hrtimer_clock_base *b,
  163 + ktime_t n);
  164 +#endif
123 165 };
124 166  
125 167 #define HRTIMER_MAX_CLOCK_BASES 2
126 168  
127 169  
128 170  
129 171  
... ... @@ -131,19 +173,74 @@
131 173 * @lock_key: the lock_class_key for use with lockdep
132 174 * @clock_base: array of clock bases for this cpu
133 175 * @curr_timer: the timer which is executing a callback right now
  176 + * @expires_next: absolute time of the next event which was scheduled
  177 + * via clock_set_next_event()
  178 + * @hres_active: State of high resolution mode
  179 + * @check_clocks: Indictator, when set evaluate time source and clock
  180 + * event devices whether high resolution mode can be
  181 + * activated.
  182 + * @cb_pending: Expired timers are moved from the rbtree to this
  183 + * list in the timer interrupt. The list is processed
  184 + * in the softirq.
  185 + * @nr_events: Total number of timer interrupt events
134 186 */
135 187 struct hrtimer_cpu_base {
136 188 spinlock_t lock;
137 189 struct lock_class_key lock_key;
138 190 struct hrtimer_clock_base clock_base[HRTIMER_MAX_CLOCK_BASES];
  191 +#ifdef CONFIG_HIGH_RES_TIMERS
  192 + ktime_t expires_next;
  193 + int hres_active;
  194 + struct list_head cb_pending;
  195 + unsigned long nr_events;
  196 +#endif
139 197 };
140 198  
  199 +#ifdef CONFIG_HIGH_RES_TIMERS
  200 +struct clock_event_device;
  201 +
  202 +extern void clock_was_set(void);
  203 +extern void hrtimer_interrupt(struct clock_event_device *dev);
  204 +
141 205 /*
  206 + * In high resolution mode the time reference must be read accurate
  207 + */
  208 +static inline ktime_t hrtimer_cb_get_time(struct hrtimer *timer)
  209 +{
  210 + return timer->base->get_time();
  211 +}
  212 +
  213 +/*
  214 + * The resolution of the clocks. The resolution value is returned in
  215 + * the clock_getres() system call to give application programmers an
  216 + * idea of the (in)accuracy of timers. Timer values are rounded up to
  217 + * this resolution values.
  218 + */
  219 +# define KTIME_HIGH_RES (ktime_t) { .tv64 = 1 }
  220 +# define KTIME_MONOTONIC_RES KTIME_HIGH_RES
  221 +
  222 +#else
  223 +
  224 +# define KTIME_MONOTONIC_RES KTIME_LOW_RES
  225 +
  226 +/*
142 227 * clock_was_set() is a NOP for non- high-resolution systems. The
143 228 * time-sorted order guarantees that a timer does not expire early and
144 229 * is expired in the next softirq when the clock was advanced.
145 230 */
146   -#define clock_was_set() do { } while (0)
  231 +static inline void clock_was_set(void) { }
  232 +
  233 +/*
  234 + * In non high resolution mode the time reference is taken from
  235 + * the base softirq time variable.
  236 + */
  237 +static inline ktime_t hrtimer_cb_get_time(struct hrtimer *timer)
  238 +{
  239 + return timer->base->softirq_time;
  240 +}
  241 +
  242 +#endif
  243 +
147 244 extern ktime_t ktime_get(void);
148 245 extern ktime_t ktime_get_real(void);
149 246  
150 247  
... ... @@ -168,9 +265,7 @@
168 265 extern ktime_t hrtimer_get_remaining(const struct hrtimer *timer);
169 266 extern int hrtimer_get_res(const clockid_t which_clock, struct timespec *tp);
170 267  
171   -#ifdef CONFIG_NO_IDLE_HZ
172 268 extern ktime_t hrtimer_get_next_event(void);
173   -#endif
174 269  
175 270 /*
176 271 * A timer is active, when it is enqueued into the rbtree or the callback
... ... @@ -179,6 +274,15 @@
179 274 static inline int hrtimer_active(const struct hrtimer *timer)
180 275 {
181 276 return timer->state != HRTIMER_STATE_INACTIVE;
  277 +}
  278 +
  279 +/*
  280 + * Helper function to check, whether the timer is on one of the queues
  281 + */
  282 +static inline int hrtimer_is_queued(struct hrtimer *timer)
  283 +{
  284 + return timer->state &
  285 + (HRTIMER_STATE_ENQUEUED | HRTIMER_STATE_PENDING);
182 286 }
183 287  
184 288 /* Forward a hrtimer so it expires after now: */
include/linux/interrupt.h
... ... @@ -242,6 +242,9 @@
242 242 BLOCK_SOFTIRQ,
243 243 TASKLET_SOFTIRQ,
244 244 SCHED_SOFTIRQ,
  245 +#ifdef CONFIG_HIGH_RES_TIMERS
  246 + HRTIMER_SOFTIRQ,
  247 +#endif
245 248 };
246 249  
247 250 /* softirq mask and active fields moved to irq_cpustat_t in
include/linux/ktime.h
... ... @@ -261,8 +261,7 @@
261 261 * idea of the (in)accuracy of timers. Timer values are rounded up to
262 262 * this resolution values.
263 263 */
264   -#define KTIME_REALTIME_RES (ktime_t){ .tv64 = TICK_NSEC }
265   -#define KTIME_MONOTONIC_RES (ktime_t){ .tv64 = TICK_NSEC }
  264 +#define KTIME_LOW_RES (ktime_t){ .tv64 = TICK_NSEC }
266 265  
267 266 /* Get the monotonic time in timespec format: */
268 267 extern void ktime_get_ts(struct timespec *ts);
... ... @@ -3,7 +3,7 @@
3 3 *
4 4 * Copyright(C) 2005-2006, Thomas Gleixner <tglx@linutronix.de>
5 5 * Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar
6   - * Copyright(C) 2006-2007 Timesys Corp., Thomas Gleixner <tglx@timesys.com>
  6 + * Copyright(C) 2006-2007 Timesys Corp., Thomas Gleixner
7 7 *
8 8 * High-resolution kernel timers
9 9 *
10 10  
11 11  
... ... @@ -32,13 +32,17 @@
32 32 */
33 33  
34 34 #include <linux/cpu.h>
  35 +#include <linux/irq.h>
35 36 #include <linux/module.h>
36 37 #include <linux/percpu.h>
37 38 #include <linux/hrtimer.h>
38 39 #include <linux/notifier.h>
39 40 #include <linux/syscalls.h>
  41 +#include <linux/kallsyms.h>
40 42 #include <linux/interrupt.h>
41 43 #include <linux/tick.h>
  44 +#include <linux/seq_file.h>
  45 +#include <linux/err.h>
42 46  
43 47 #include <asm/uaccess.h>
44 48  
... ... @@ -81,7 +85,7 @@
81 85 * This ensures that we capture erroneous accesses to these clock ids
82 86 * rather than moving them into the range of valid clock id's.
83 87 */
84   -static DEFINE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases) =
  88 +DEFINE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases) =
85 89 {
86 90  
87 91 .clock_base =
88 92  
... ... @@ -89,12 +93,12 @@
89 93 {
90 94 .index = CLOCK_REALTIME,
91 95 .get_time = &ktime_get_real,
92   - .resolution = KTIME_REALTIME_RES,
  96 + .resolution = KTIME_LOW_RES,
93 97 },
94 98 {
95 99 .index = CLOCK_MONOTONIC,
96 100 .get_time = &ktime_get,
97   - .resolution = KTIME_MONOTONIC_RES,
  101 + .resolution = KTIME_LOW_RES,
98 102 },
99 103 }
100 104 };
... ... @@ -151,14 +155,6 @@
151 155 }
152 156  
153 157 /*
154   - * Helper function to check, whether the timer is on one of the queues
155   - */
156   -static inline int hrtimer_is_queued(struct hrtimer *timer)
157   -{
158   - return timer->state & HRTIMER_STATE_ENQUEUED;
159   -}
160   -
161   -/*
162 158 * Helper function to check, whether the timer is running the callback
163 159 * function
164 160 */
... ... @@ -226,7 +222,7 @@
226 222 * completed. There is no conflict as we hold the lock until
227 223 * the timer is enqueued.
228 224 */
229   - if (unlikely(timer->state & HRTIMER_STATE_CALLBACK))
  225 + if (unlikely(hrtimer_callback_running(timer)))
230 226 return base;
231 227  
232 228 /* See the comment in lock_timer_base() */
... ... @@ -250,7 +246,7 @@
250 246 return base;
251 247 }
252 248  
253   -#define switch_hrtimer_base(t, b) (b)
  249 +# define switch_hrtimer_base(t, b) (b)
254 250  
255 251 #endif /* !CONFIG_SMP */
256 252  
... ... @@ -281,9 +277,6 @@
281 277  
282 278 return ktime_add(kt, tmp);
283 279 }
284   -
285   -#else /* CONFIG_KTIME_SCALAR */
286   -
287 280 # endif /* !CONFIG_KTIME_SCALAR */
288 281  
289 282 /*
290 283  
... ... @@ -308,7 +301,291 @@
308 301 }
309 302 #endif /* BITS_PER_LONG >= 64 */
310 303  
  304 +/* High resolution timer related functions */
  305 +#ifdef CONFIG_HIGH_RES_TIMERS
  306 +
311 307 /*
  308 + * High resolution timer enabled ?
  309 + */
  310 +static int hrtimer_hres_enabled __read_mostly = 1;
  311 +
  312 +/*
  313 + * Enable / Disable high resolution mode
  314 + */
  315 +static int __init setup_hrtimer_hres(char *str)
  316 +{
  317 + if (!strcmp(str, "off"))
  318 + hrtimer_hres_enabled = 0;
  319 + else if (!strcmp(str, "on"))
  320 + hrtimer_hres_enabled = 1;
  321 + else
  322 + return 0;
  323 + return 1;
  324 +}
  325 +
  326 +__setup("highres=", setup_hrtimer_hres);
  327 +
  328 +/*
  329 + * hrtimer_high_res_enabled - query, if the highres mode is enabled
  330 + */
  331 +static inline int hrtimer_is_hres_enabled(void)
  332 +{
  333 + return hrtimer_hres_enabled;
  334 +}
  335 +
  336 +/*
  337 + * Is the high resolution mode active ?
  338 + */
  339 +static inline int hrtimer_hres_active(void)
  340 +{
  341 + return __get_cpu_var(hrtimer_bases).hres_active;
  342 +}
  343 +
  344 +/*
  345 + * Reprogram the event source with checking both queues for the
  346 + * next event
  347 + * Called with interrupts disabled and base->lock held
  348 + */
  349 +static void hrtimer_force_reprogram(struct hrtimer_cpu_base *cpu_base)
  350 +{
  351 + int i;
  352 + struct hrtimer_clock_base *base = cpu_base->clock_base;
  353 + ktime_t expires;
  354 +
  355 + cpu_base->expires_next.tv64 = KTIME_MAX;
  356 +
  357 + for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++, base++) {
  358 + struct hrtimer *timer;
  359 +
  360 + if (!base->first)
  361 + continue;
  362 + timer = rb_entry(base->first, struct hrtimer, node);
  363 + expires = ktime_sub(timer->expires, base->offset);
  364 + if (expires.tv64 < cpu_base->expires_next.tv64)
  365 + cpu_base->expires_next = expires;
  366 + }
  367 +
  368 + if (cpu_base->expires_next.tv64 != KTIME_MAX)
  369 + tick_program_event(cpu_base->expires_next, 1);
  370 +}
  371 +
  372 +/*
  373 + * Shared reprogramming for clock_realtime and clock_monotonic
  374 + *
  375 + * When a timer is enqueued and expires earlier than the already enqueued
  376 + * timers, we have to check, whether it expires earlier than the timer for
  377 + * which the clock event device was armed.
  378 + *
  379 + * Called with interrupts disabled and base->cpu_base.lock held
  380 + */
  381 +static int hrtimer_reprogram(struct hrtimer *timer,
  382 + struct hrtimer_clock_base *base)
  383 +{
  384 + ktime_t *expires_next = &__get_cpu_var(hrtimer_bases).expires_next;
  385 + ktime_t expires = ktime_sub(timer->expires, base->offset);
  386 + int res;
  387 +
  388 + /*
  389 + * When the callback is running, we do not reprogram the clock event
  390 + * device. The timer callback is either running on a different CPU or
  391 + * the callback is executed in the hrtimer_interupt context. The
  392 + * reprogramming is handled either by the softirq, which called the
  393 + * callback or at the end of the hrtimer_interrupt.
  394 + */
  395 + if (hrtimer_callback_running(timer))
  396 + return 0;
  397 +
  398 + if (expires.tv64 >= expires_next->tv64)
  399 + return 0;
  400 +
  401 + /*
  402 + * Clockevents returns -ETIME, when the event was in the past.
  403 + */
  404 + res = tick_program_event(expires, 0);
  405 + if (!IS_ERR_VALUE(res))
  406 + *expires_next = expires;
  407 + return res;
  408 +}
  409 +
  410 +
  411 +/*
  412 + * Retrigger next event is called after clock was set
  413 + *
  414 + * Called with interrupts disabled via on_each_cpu()
  415 + */
  416 +static void retrigger_next_event(void *arg)
  417 +{
  418 + struct hrtimer_cpu_base *base;
  419 + struct timespec realtime_offset;
  420 + unsigned long seq;
  421 +
  422 + if (!hrtimer_hres_active())
  423 + return;
  424 +
  425 + do {
  426 + seq = read_seqbegin(&xtime_lock);
  427 + set_normalized_timespec(&realtime_offset,
  428 + -wall_to_monotonic.tv_sec,
  429 + -wall_to_monotonic.tv_nsec);
  430 + } while (read_seqretry(&xtime_lock, seq));
  431 +
  432 + base = &__get_cpu_var(hrtimer_bases);
  433 +
  434 + /* Adjust CLOCK_REALTIME offset */
  435 + spin_lock(&base->lock);
  436 + base->clock_base[CLOCK_REALTIME].offset =
  437 + timespec_to_ktime(realtime_offset);
  438 +
  439 + hrtimer_force_reprogram(base);
  440 + spin_unlock(&base->lock);
  441 +}
  442 +
  443 +/*
  444 + * Clock realtime was set
  445 + *
  446 + * Change the offset of the realtime clock vs. the monotonic
  447 + * clock.
  448 + *
  449 + * We might have to reprogram the high resolution timer interrupt. On
  450 + * SMP we call the architecture specific code to retrigger _all_ high
  451 + * resolution timer interrupts. On UP we just disable interrupts and
  452 + * call the high resolution interrupt code.
  453 + */
  454 +void clock_was_set(void)
  455 +{
  456 + /* Retrigger the CPU local events everywhere */
  457 + on_each_cpu(retrigger_next_event, NULL, 0, 1);
  458 +}
  459 +
  460 +/*
  461 + * Check, whether the timer is on the callback pending list
  462 + */
  463 +static inline int hrtimer_cb_pending(const struct hrtimer *timer)
  464 +{
  465 + return timer->state & HRTIMER_STATE_PENDING;
  466 +}
  467 +
  468 +/*
  469 + * Remove a timer from the callback pending list
  470 + */
  471 +static inline void hrtimer_remove_cb_pending(struct hrtimer *timer)
  472 +{
  473 + list_del_init(&timer->cb_entry);
  474 +}
  475 +
  476 +/*
  477 + * Initialize the high resolution related parts of cpu_base
  478 + */
  479 +static inline void hrtimer_init_hres(struct hrtimer_cpu_base *base)
  480 +{
  481 + base->expires_next.tv64 = KTIME_MAX;
  482 + base->hres_active = 0;
  483 + INIT_LIST_HEAD(&base->cb_pending);
  484 +}
  485 +
  486 +/*
  487 + * Initialize the high resolution related parts of a hrtimer
  488 + */
  489 +static inline void hrtimer_init_timer_hres(struct hrtimer *timer)
  490 +{
  491 + INIT_LIST_HEAD(&timer->cb_entry);
  492 +}
  493 +
  494 +/*
  495 + * When High resolution timers are active, try to reprogram. Note, that in case
  496 + * the state has HRTIMER_STATE_CALLBACK set, no reprogramming and no expiry
  497 + * check happens. The timer gets enqueued into the rbtree. The reprogramming
  498 + * and expiry check is done in the hrtimer_interrupt or in the softirq.
  499 + */
  500 +static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer,
  501 + struct hrtimer_clock_base *base)
  502 +{
  503 + if (base->cpu_base->hres_active && hrtimer_reprogram(timer, base)) {
  504 +
  505 + /* Timer is expired, act upon the callback mode */
  506 + switch(timer->cb_mode) {
  507 + case HRTIMER_CB_IRQSAFE_NO_RESTART:
  508 + /*
  509 + * We can call the callback from here. No restart
  510 + * happens, so no danger of recursion
  511 + */
  512 + BUG_ON(timer->function(timer) != HRTIMER_NORESTART);
  513 + return 1;
  514 + case HRTIMER_CB_IRQSAFE_NO_SOFTIRQ:
  515 + /*
  516 + * This is solely for the sched tick emulation with
  517 + * dynamic tick support to ensure that we do not
  518 + * restart the tick right on the edge and end up with
  519 + * the tick timer in the softirq ! The calling site
  520 + * takes care of this.
  521 + */
  522 + return 1;
  523 + case HRTIMER_CB_IRQSAFE:
  524 + case HRTIMER_CB_SOFTIRQ:
  525 + /*
  526 + * Move everything else into the softirq pending list !
  527 + */
  528 + list_add_tail(&timer->cb_entry,
  529 + &base->cpu_base->cb_pending);
  530 + timer->state = HRTIMER_STATE_PENDING;
  531 + raise_softirq(HRTIMER_SOFTIRQ);
  532 + return 1;
  533 + default:
  534 + BUG();
  535 + }
  536 + }
  537 + return 0;
  538 +}
  539 +
  540 +/*
  541 + * Switch to high resolution mode
  542 + */
  543 +static void hrtimer_switch_to_hres(void)
  544 +{
  545 + struct hrtimer_cpu_base *base = &__get_cpu_var(hrtimer_bases);
  546 + unsigned long flags;
  547 +
  548 + if (base->hres_active)
  549 + return;
  550 +
  551 + local_irq_save(flags);
  552 +
  553 + if (tick_init_highres()) {
  554 + local_irq_restore(flags);
  555 + return;
  556 + }
  557 + base->hres_active = 1;
  558 + base->clock_base[CLOCK_REALTIME].resolution = KTIME_HIGH_RES;
  559 + base->clock_base[CLOCK_MONOTONIC].resolution = KTIME_HIGH_RES;
  560 +
  561 + tick_setup_sched_timer();
  562 +
  563 + /* "Retrigger" the interrupt to get things going */
  564 + retrigger_next_event(NULL);
  565 + local_irq_restore(flags);
  566 + printk(KERN_INFO "Switched to high resolution mode on CPU %d\n",
  567 + smp_processor_id());
  568 +}
  569 +
  570 +#else
  571 +
  572 +static inline int hrtimer_hres_active(void) { return 0; }
  573 +static inline int hrtimer_is_hres_enabled(void) { return 0; }
  574 +static inline void hrtimer_switch_to_hres(void) { }
  575 +static inline void hrtimer_force_reprogram(struct hrtimer_cpu_base *base) { }
  576 +static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer,
  577 + struct hrtimer_clock_base *base)
  578 +{
  579 + return 0;
  580 +}
  581 +static inline int hrtimer_cb_pending(struct hrtimer *timer) { return 0; }
  582 +static inline void hrtimer_remove_cb_pending(struct hrtimer *timer) { }
  583 +static inline void hrtimer_init_hres(struct hrtimer_cpu_base *base) { }
  584 +static inline void hrtimer_init_timer_hres(struct hrtimer *timer) { }
  585 +
  586 +#endif /* CONFIG_HIGH_RES_TIMERS */
  587 +
  588 +/*
312 589 * Counterpart to lock_timer_base above:
313 590 */
314 591 static inline
... ... @@ -365,7 +642,7 @@
365 642 * red black tree is O(log(n)). Must hold the base lock.
366 643 */
367 644 static void enqueue_hrtimer(struct hrtimer *timer,
368   - struct hrtimer_clock_base *base)
  645 + struct hrtimer_clock_base *base, int reprogram)
369 646 {
370 647 struct rb_node **link = &base->active.rb_node;
371 648 struct rb_node *parent = NULL;
... ... @@ -391,6 +668,22 @@
391 668 * Insert the timer to the rbtree and check whether it
392 669 * replaces the first pending timer
393 670 */
  671 + if (!base->first || timer->expires.tv64 <
  672 + rb_entry(base->first, struct hrtimer, node)->expires.tv64) {
  673 + /*
  674 + * Reprogram the clock event device. When the timer is already
  675 + * expired hrtimer_enqueue_reprogram has either called the
  676 + * callback or added it to the pending list and raised the
  677 + * softirq.
  678 + *
  679 + * This is a NOP for !HIGHRES
  680 + */
  681 + if (reprogram && hrtimer_enqueue_reprogram(timer, base))
  682 + return;
  683 +
  684 + base->first = &timer->node;
  685 + }
  686 +
394 687 rb_link_node(&timer->node, parent, link);
395 688 rb_insert_color(&timer->node, &base->active);
396 689 /*
397 690  
398 691  
399 692  
... ... @@ -398,28 +691,38 @@
398 691 * state of a possibly running callback.
399 692 */
400 693 timer->state |= HRTIMER_STATE_ENQUEUED;
401   -
402   - if (!base->first || timer->expires.tv64 <
403   - rb_entry(base->first, struct hrtimer, node)->expires.tv64)
404   - base->first = &timer->node;
405 694 }
406 695  
407 696 /*
408 697 * __remove_hrtimer - internal function to remove a timer
409 698 *
410 699 * Caller must hold the base lock.
  700 + *
  701 + * High resolution timer mode reprograms the clock event device when the
  702 + * timer is the one which expires next. The caller can disable this by setting
  703 + * reprogram to zero. This is useful, when the context does a reprogramming
  704 + * anyway (e.g. timer interrupt)
411 705 */
412 706 static void __remove_hrtimer(struct hrtimer *timer,
413 707 struct hrtimer_clock_base *base,
414   - unsigned long newstate)
  708 + unsigned long newstate, int reprogram)
415 709 {
416   - /*
417   - * Remove the timer from the rbtree and replace the
418   - * first entry pointer if necessary.
419   - */
420   - if (base->first == &timer->node)
421   - base->first = rb_next(&timer->node);
422   - rb_erase(&timer->node, &base->active);
  710 + /* High res. callback list. NOP for !HIGHRES */
  711 + if (hrtimer_cb_pending(timer))
  712 + hrtimer_remove_cb_pending(timer);
  713 + else {
  714 + /*
  715 + * Remove the timer from the rbtree and replace the
  716 + * first entry pointer if necessary.
  717 + */
  718 + if (base->first == &timer->node) {
  719 + base->first = rb_next(&timer->node);
  720 + /* Reprogram the clock event device. if enabled */
  721 + if (reprogram && hrtimer_hres_active())
  722 + hrtimer_force_reprogram(base->cpu_base);
  723 + }
  724 + rb_erase(&timer->node, &base->active);
  725 + }
423 726 timer->state = newstate;
424 727 }
425 728  
... ... @@ -430,7 +733,19 @@
430 733 remove_hrtimer(struct hrtimer *timer, struct hrtimer_clock_base *base)
431 734 {
432 735 if (hrtimer_is_queued(timer)) {
433   - __remove_hrtimer(timer, base, HRTIMER_STATE_INACTIVE);
  736 + int reprogram;
  737 +
  738 + /*
  739 + * Remove the timer and force reprogramming when high
  740 + * resolution mode is active and the timer is on the current
  741 + * CPU. If we remove a timer on another CPU, reprogramming is
  742 + * skipped. The interrupt event on this CPU is fired and
  743 + * reprogramming happens in the interrupt handler. This is a
  744 + * rare case and less expensive than a smp call.
  745 + */
  746 + reprogram = base->cpu_base == &__get_cpu_var(hrtimer_bases);
  747 + __remove_hrtimer(timer, base, HRTIMER_STATE_INACTIVE,
  748 + reprogram);
434 749 return 1;
435 750 }
436 751 return 0;
... ... @@ -476,7 +791,7 @@
476 791 }
477 792 timer->expires = tim;
478 793  
479   - enqueue_hrtimer(timer, new_base);
  794 + enqueue_hrtimer(timer, new_base, base == new_base);
480 795  
481 796 unlock_hrtimer_base(timer, &flags);
482 797  
483 798  
484 799  
... ... @@ -567,17 +882,19 @@
567 882  
568 883 spin_lock_irqsave(&cpu_base->lock, flags);
569 884  
570   - for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++, base++) {
571   - struct hrtimer *timer;
  885 + if (!hrtimer_hres_active()) {
  886 + for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++, base++) {
  887 + struct hrtimer *timer;
572 888  
573   - if (!base->first)
574   - continue;
  889 + if (!base->first)
  890 + continue;
575 891  
576   - timer = rb_entry(base->first, struct hrtimer, node);
577   - delta.tv64 = timer->expires.tv64;
578   - delta = ktime_sub(delta, base->get_time());
579   - if (delta.tv64 < mindelta.tv64)
580   - mindelta.tv64 = delta.tv64;
  892 + timer = rb_entry(base->first, struct hrtimer, node);
  893 + delta.tv64 = timer->expires.tv64;
  894 + delta = ktime_sub(delta, base->get_time());
  895 + if (delta.tv64 < mindelta.tv64)
  896 + mindelta.tv64 = delta.tv64;
  897 + }
581 898 }
582 899  
583 900 spin_unlock_irqrestore(&cpu_base->lock, flags);
... ... @@ -607,6 +924,7 @@
607 924 clock_id = CLOCK_MONOTONIC;
608 925  
609 926 timer->base = &cpu_base->clock_base[clock_id];
  927 + hrtimer_init_timer_hres(timer);
610 928 }
611 929 EXPORT_SYMBOL_GPL(hrtimer_init);
612 930  
613 931  
... ... @@ -629,7 +947,140 @@
629 947 }
630 948 EXPORT_SYMBOL_GPL(hrtimer_get_res);
631 949  
  950 +#ifdef CONFIG_HIGH_RES_TIMERS
  951 +
632 952 /*
  953 + * High resolution timer interrupt
  954 + * Called with interrupts disabled
  955 + */
  956 +void hrtimer_interrupt(struct clock_event_device *dev)
  957 +{
  958 + struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases);
  959 + struct hrtimer_clock_base *base;
  960 + ktime_t expires_next, now;
  961 + int i, raise = 0;
  962 +
  963 + BUG_ON(!cpu_base->hres_active);
  964 + cpu_base->nr_events++;
  965 + dev->next_event.tv64 = KTIME_MAX;
  966 +
  967 + retry:
  968 + now = ktime_get();
  969 +
  970 + expires_next.tv64 = KTIME_MAX;
  971 +
  972 + base = cpu_base->clock_base;
  973 +
  974 + for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) {
  975 + ktime_t basenow;
  976 + struct rb_node *node;
  977 +
  978 + spin_lock(&cpu_base->lock);
  979 +
  980 + basenow = ktime_add(now, base->offset);
  981 +
  982 + while ((node = base->first)) {
  983 + struct hrtimer *timer;
  984 +
  985 + timer = rb_entry(node, struct hrtimer, node);
  986 +
  987 + if (basenow.tv64 < timer->expires.tv64) {
  988 + ktime_t expires;
  989 +
  990 + expires = ktime_sub(timer->expires,
  991 + base->offset);
  992 + if (expires.tv64 < expires_next.tv64)
  993 + expires_next = expires;
  994 + break;
  995 + }
  996 +
  997 + /* Move softirq callbacks to the pending list */
  998 + if (timer->cb_mode == HRTIMER_CB_SOFTIRQ) {
  999 + __remove_hrtimer(timer, base,
  1000 + HRTIMER_STATE_PENDING, 0);
  1001 + list_add_tail(&timer->cb_entry,
  1002 + &base->cpu_base->cb_pending);
  1003 + raise = 1;
  1004 + continue;
  1005 + }
  1006 +
  1007 + __remove_hrtimer(timer, base,
  1008 + HRTIMER_STATE_CALLBACK, 0);
  1009 +
  1010 + /*
  1011 + * Note: We clear the CALLBACK bit after
  1012 + * enqueue_hrtimer to avoid reprogramming of
  1013 + * the event hardware. This happens at the end
  1014 + * of this function anyway.
  1015 + */
  1016 + if (timer->function(timer) != HRTIMER_NORESTART) {
  1017 + BUG_ON(timer->state != HRTIMER_STATE_CALLBACK);
  1018 + enqueue_hrtimer(timer, base, 0);
  1019 + }
  1020 + timer->state &= ~HRTIMER_STATE_CALLBACK;
  1021 + }
  1022 + spin_unlock(&cpu_base->lock);
  1023 + base++;
  1024 + }
  1025 +
  1026 + cpu_base->expires_next = expires_next;
  1027 +
  1028 + /* Reprogramming necessary ? */
  1029 + if (expires_next.tv64 != KTIME_MAX) {
  1030 + if (tick_program_event(expires_next, 0))
  1031 + goto retry;
  1032 + }
  1033 +
  1034 + /* Raise softirq ? */
  1035 + if (raise)
  1036 + raise_softirq(HRTIMER_SOFTIRQ);
  1037 +}
  1038 +
  1039 +static void run_hrtimer_softirq(struct softirq_action *h)
  1040 +{
  1041 + struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases);
  1042 +
  1043 + spin_lock_irq(&cpu_base->lock);
  1044 +
  1045 + while (!list_empty(&cpu_base->cb_pending)) {
  1046 + enum hrtimer_restart (*fn)(struct hrtimer *);
  1047 + struct hrtimer *timer;
  1048 + int restart;
  1049 +
  1050 + timer = list_entry(cpu_base->cb_pending.next,
  1051 + struct hrtimer, cb_entry);
  1052 +
  1053 + fn = timer->function;
  1054 + __remove_hrtimer(timer, timer->base, HRTIMER_STATE_CALLBACK, 0);
  1055 + spin_unlock_irq(&cpu_base->lock);
  1056 +
  1057 + restart = fn(timer);
  1058 +
  1059 + spin_lock_irq(&cpu_base->lock);
  1060 +
  1061 + timer->state &= ~HRTIMER_STATE_CALLBACK;
  1062 + if (restart == HRTIMER_RESTART) {
  1063 + BUG_ON(hrtimer_active(timer));
  1064 + /*
  1065 + * Enqueue the timer, allow reprogramming of the event
  1066 + * device
  1067 + */
  1068 + enqueue_hrtimer(timer, timer->base, 1);
  1069 + } else if (hrtimer_active(timer)) {
  1070 + /*
  1071 + * If the timer was rearmed on another CPU, reprogram
  1072 + * the event device.
  1073 + */
  1074 + if (timer->base->first == &timer->node)
  1075 + hrtimer_reprogram(timer, timer->base);
  1076 + }
  1077 + }
  1078 + spin_unlock_irq(&cpu_base->lock);
  1079 +}
  1080 +
  1081 +#endif /* CONFIG_HIGH_RES_TIMERS */
  1082 +
  1083 +/*
633 1084 * Expire the per base hrtimer-queue:
634 1085 */
635 1086 static inline void run_hrtimer_queue(struct hrtimer_cpu_base *cpu_base,
... ... @@ -656,7 +1107,7 @@
656 1107 break;
657 1108  
658 1109 fn = timer->function;
659   - __remove_hrtimer(timer, base, HRTIMER_STATE_CALLBACK);
  1110 + __remove_hrtimer(timer, base, HRTIMER_STATE_CALLBACK, 0);
660 1111 spin_unlock_irq(&cpu_base->lock);
661 1112  
662 1113 restart = fn(timer);
... ... @@ -666,7 +1117,7 @@
666 1117 timer->state &= ~HRTIMER_STATE_CALLBACK;
667 1118 if (restart != HRTIMER_NORESTART) {
668 1119 BUG_ON(hrtimer_active(timer));
669   - enqueue_hrtimer(timer, base);
  1120 + enqueue_hrtimer(timer, base, 0);
670 1121 }
671 1122 }
672 1123 spin_unlock_irq(&cpu_base->lock);
673 1124  
... ... @@ -674,12 +1125,19 @@
674 1125  
675 1126 /*
676 1127 * Called from timer softirq every jiffy, expire hrtimers:
  1128 + *
  1129 + * For HRT its the fall back code to run the softirq in the timer
  1130 + * softirq context in case the hrtimer initialization failed or has
  1131 + * not been done yet.
677 1132 */
678 1133 void hrtimer_run_queues(void)
679 1134 {
680 1135 struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases);
681 1136 int i;
682 1137  
  1138 + if (hrtimer_hres_active())
  1139 + return;
  1140 +
683 1141 /*
684 1142 * This _is_ ugly: We have to check in the softirq context,
685 1143 * whether we can switch to highres and / or nohz mode. The
... ... @@ -688,7 +1146,8 @@
688 1146 * check bit in the tick_oneshot code, otherwise we might
689 1147 * deadlock vs. xtime_lock.
690 1148 */
691   - tick_check_oneshot_change(1);
  1149 + if (tick_check_oneshot_change(!hrtimer_is_hres_enabled()))
  1150 + hrtimer_switch_to_hres();
692 1151  
693 1152 hrtimer_get_softirq_time(cpu_base);
694 1153  
... ... @@ -716,6 +1175,9 @@
716 1175 {
717 1176 sl->timer.function = hrtimer_wakeup;
718 1177 sl->task = task;
  1178 +#ifdef CONFIG_HIGH_RES_TIMERS
  1179 + sl->timer.cb_mode = HRTIMER_CB_IRQSAFE_NO_RESTART;
  1180 +#endif
719 1181 }
720 1182  
721 1183 static int __sched do_nanosleep(struct hrtimer_sleeper *t, enum hrtimer_mode mode)
... ... @@ -726,7 +1188,8 @@
726 1188 set_current_state(TASK_INTERRUPTIBLE);
727 1189 hrtimer_start(&t->timer, t->timer.expires, mode);
728 1190  
729   - schedule();
  1191 + if (likely(t->task))
  1192 + schedule();
730 1193  
731 1194 hrtimer_cancel(&t->timer);
732 1195 mode = HRTIMER_MODE_ABS;
... ... @@ -831,6 +1294,7 @@
831 1294 for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++)
832 1295 cpu_base->clock_base[i].cpu_base = cpu_base;
833 1296  
  1297 + hrtimer_init_hres(cpu_base);
834 1298 }
835 1299  
836 1300 #ifdef CONFIG_HOTPLUG_CPU
837 1301  
... ... @@ -843,10 +1307,13 @@
843 1307  
844 1308 while ((node = rb_first(&old_base->active))) {
845 1309 timer = rb_entry(node, struct hrtimer, node);
846   - BUG_ON(timer->state & HRTIMER_STATE_CALLBACK);
847   - __remove_hrtimer(timer, old_base, HRTIMER_STATE_INACTIVE);
  1310 + BUG_ON(hrtimer_callback_running(timer));
  1311 + __remove_hrtimer(timer, old_base, HRTIMER_STATE_INACTIVE, 0);
848 1312 timer->base = new_base;
849   - enqueue_hrtimer(timer, new_base);
  1313 + /*
  1314 + * Enqueue the timer. Allow reprogramming of the event device
  1315 + */
  1316 + enqueue_hrtimer(timer, new_base, 1);
850 1317 }
851 1318 }
852 1319  
... ... @@ -859,6 +1326,8 @@
859 1326 old_base = &per_cpu(hrtimer_bases, cpu);
860 1327 new_base = &get_cpu_var(hrtimer_bases);
861 1328  
  1329 + tick_cancel_sched_timer(cpu);
  1330 +
862 1331 local_irq_disable();
863 1332  
864 1333 spin_lock(&new_base->lock);
... ... @@ -910,5 +1379,8 @@
910 1379 hrtimer_cpu_notify(&hrtimers_nb, (unsigned long)CPU_UP_PREPARE,
911 1380 (void *)(long)smp_processor_id());
912 1381 register_cpu_notifier(&hrtimers_nb);
  1382 +#ifdef CONFIG_HIGH_RES_TIMERS
  1383 + open_softirq(HRTIMER_SOFTIRQ, run_hrtimer_softirq, NULL);
  1384 +#endif
913 1385 }
... ... @@ -136,7 +136,7 @@
136 136 send_group_sig_info(SIGALRM, SEND_SIG_PRIV, sig->tsk);
137 137  
138 138 if (sig->it_real_incr.tv64 != 0) {
139   - hrtimer_forward(timer, timer->base->softirq_time,
  139 + hrtimer_forward(timer, hrtimer_cb_get_time(timer),
140 140 sig->it_real_incr);
141 141 return HRTIMER_RESTART;
142 142 }
kernel/posix-timers.c
... ... @@ -356,7 +356,7 @@
356 356 if (timr->it.real.interval.tv64 != 0) {
357 357 timr->it_overrun +=
358 358 hrtimer_forward(timer,
359   - timer->base->softirq_time,
  359 + hrtimer_cb_get_time(timer),
360 360 timr->it.real.interval);
361 361 ret = HRTIMER_RESTART;
362 362 ++timr->it_requeue_pending;
... ... @@ -13,4 +13,13 @@
13 13 This option enables a tickless system: timer interrupts will
14 14 only trigger on an as-needed basis both when the system is
15 15 busy and when the system is idle.
  16 +
  17 +config HIGH_RES_TIMERS
  18 + bool "High Resolution Timer Support"
  19 + depends on GENERIC_TIME && GENERIC_CLOCKEVENTS
  20 + select TICK_ONESHOT
  21 + help
  22 + This option enables high resolution timer support. If your
  23 + hardware is not capable then this option only increases
  24 + the size of the kernel image.