Commit c676329abb2b8359d9a5d734dec0c81779823fd6
Committed by
Ingo Molnar
1 parent
95ae3c59fa
Exists in
master
and in
4 other branches
sched_clock: Add local_clock() API and improve documentation
For people who otherwise get to write: cpu_clock(smp_processor_id()), there is now: local_clock(). Also, as per suggestion from Andrew, provide some documentation on the various clock interfaces, and minimize the unsigned long long vs u64 mess. Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Jens Axboe <jaxboe@fusionio.com> LKML-Reference: <1275052414.1645.52.camel@laptop> Signed-off-by: Ingo Molnar <mingo@elte.hu>
Showing 8 changed files with 113 additions and 34 deletions Side-by-side Diff
arch/parisc/kernel/ftrace.c
| ... | ... | @@ -82,7 +82,7 @@ |
| 82 | 82 | unsigned long ret; |
| 83 | 83 | |
| 84 | 84 | pop_return_trace(&trace, &ret); |
| 85 | - trace.rettime = cpu_clock(raw_smp_processor_id()); | |
| 85 | + trace.rettime = local_clock(); | |
| 86 | 86 | ftrace_graph_return(&trace); |
| 87 | 87 | |
| 88 | 88 | if (unlikely(!ret)) { |
| ... | ... | @@ -126,7 +126,7 @@ |
| 126 | 126 | return; |
| 127 | 127 | } |
| 128 | 128 | |
| 129 | - calltime = cpu_clock(raw_smp_processor_id()); | |
| 129 | + calltime = local_clock(); | |
| 130 | 130 | |
| 131 | 131 | if (push_return_trace(old, calltime, |
| 132 | 132 | self_addr, &trace.depth) == -EBUSY) { |
include/linux/sched.h
| ... | ... | @@ -1791,20 +1791,23 @@ |
| 1791 | 1791 | #endif |
| 1792 | 1792 | |
| 1793 | 1793 | /* |
| 1794 | - * Architectures can set this to 1 if they have specified | |
| 1795 | - * CONFIG_HAVE_UNSTABLE_SCHED_CLOCK in their arch Kconfig, | |
| 1796 | - * but then during bootup it turns out that sched_clock() | |
| 1797 | - * is reliable after all: | |
| 1794 | + * Do not use outside of architecture code which knows its limitations. | |
| 1795 | + * | |
| 1796 | + * sched_clock() has no promise of monotonicity or bounded drift between | |
| 1797 | + * CPUs, use (which you should not) requires disabling IRQs. | |
| 1798 | + * | |
| 1799 | + * Please use one of the three interfaces below. | |
| 1798 | 1800 | */ |
| 1799 | -#ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK | |
| 1800 | -extern int sched_clock_stable; | |
| 1801 | -#endif | |
| 1802 | - | |
| 1803 | -/* ftrace calls sched_clock() directly */ | |
| 1804 | 1801 | extern unsigned long long notrace sched_clock(void); |
| 1802 | +/* | |
| 1803 | + * See the comment in kernel/sched_clock.c | |
| 1804 | + */ | |
| 1805 | +extern u64 cpu_clock(int cpu); | |
| 1806 | +extern u64 local_clock(void); | |
| 1807 | +extern u64 sched_clock_cpu(int cpu); | |
| 1805 | 1808 | |
| 1809 | + | |
| 1806 | 1810 | extern void sched_clock_init(void); |
| 1807 | -extern u64 sched_clock_cpu(int cpu); | |
| 1808 | 1811 | |
| 1809 | 1812 | #ifndef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK |
| 1810 | 1813 | static inline void sched_clock_tick(void) |
| 1811 | 1814 | |
| ... | ... | @@ -1819,16 +1822,18 @@ |
| 1819 | 1822 | { |
| 1820 | 1823 | } |
| 1821 | 1824 | #else |
| 1825 | +/* | |
| 1826 | + * Architectures can set this to 1 if they have specified | |
| 1827 | + * CONFIG_HAVE_UNSTABLE_SCHED_CLOCK in their arch Kconfig, | |
| 1828 | + * but then during bootup it turns out that sched_clock() | |
| 1829 | + * is reliable after all: | |
| 1830 | + */ | |
| 1831 | +extern int sched_clock_stable; | |
| 1832 | + | |
| 1822 | 1833 | extern void sched_clock_tick(void); |
| 1823 | 1834 | extern void sched_clock_idle_sleep_event(void); |
| 1824 | 1835 | extern void sched_clock_idle_wakeup_event(u64 delta_ns); |
| 1825 | 1836 | #endif |
| 1826 | - | |
| 1827 | -/* | |
| 1828 | - * For kernel-internal use: high-speed (but slightly incorrect) per-cpu | |
| 1829 | - * clock constructed from sched_clock(): | |
| 1830 | - */ | |
| 1831 | -extern unsigned long long cpu_clock(int cpu); | |
| 1832 | 1837 | |
| 1833 | 1838 | extern unsigned long long |
| 1834 | 1839 | task_sched_runtime(struct task_struct *task); |
kernel/lockdep.c
kernel/perf_event.c
kernel/rcutorture.c
| ... | ... | @@ -239,8 +239,7 @@ |
| 239 | 239 | rcu_random(struct rcu_random_state *rrsp) |
| 240 | 240 | { |
| 241 | 241 | if (--rrsp->rrs_count < 0) { |
| 242 | - rrsp->rrs_state += | |
| 243 | - (unsigned long)cpu_clock(raw_smp_processor_id()); | |
| 242 | + rrsp->rrs_state += (unsigned long)local_clock(); | |
| 244 | 243 | rrsp->rrs_count = RCU_RANDOM_REFRESH; |
| 245 | 244 | } |
| 246 | 245 | rrsp->rrs_state = rrsp->rrs_state * RCU_RANDOM_MULT + RCU_RANDOM_ADD; |
kernel/sched.c
kernel/sched_clock.c
| ... | ... | @@ -10,19 +10,55 @@ |
| 10 | 10 | * Ingo Molnar <mingo@redhat.com> |
| 11 | 11 | * Guillaume Chazarain <guichaz@gmail.com> |
| 12 | 12 | * |
| 13 | - * Create a semi stable clock from a mixture of other events, including: | |
| 14 | - * - gtod | |
| 13 | + * | |
| 14 | + * What: | |
| 15 | + * | |
| 16 | + * cpu_clock(i) provides a fast (execution time) high resolution | |
| 17 | + * clock with bounded drift between CPUs. The value of cpu_clock(i) | |
| 18 | + * is monotonic for constant i. The timestamp returned is in nanoseconds. | |
| 19 | + * | |
| 20 | + * ######################### BIG FAT WARNING ########################## | |
| 21 | + * # when comparing cpu_clock(i) to cpu_clock(j) for i != j, time can # | |
| 22 | + * # go backwards !! # | |
| 23 | + * #################################################################### | |
| 24 | + * | |
| 25 | + * There is no strict promise about the base, although it tends to start | |
| 26 | + * at 0 on boot (but people really shouldn't rely on that). | |
| 27 | + * | |
| 28 | + * cpu_clock(i) -- can be used from any context, including NMI. | |
| 29 | + * sched_clock_cpu(i) -- must be used with local IRQs disabled (implied by NMI) | |
| 30 | + * local_clock() -- is cpu_clock() on the current cpu. | |
| 31 | + * | |
| 32 | + * How: | |
| 33 | + * | |
| 34 | + * The implementation either uses sched_clock() when | |
| 35 | + * !CONFIG_HAVE_UNSTABLE_SCHED_CLOCK, which means in that case the | |
| 36 | + * sched_clock() is assumed to provide these properties (mostly it means | |
| 37 | + * the architecture provides a globally synchronized highres time source). | |
| 38 | + * | |
| 39 | + * Otherwise it tries to create a semi stable clock from a mixture of other | |
| 40 | + * clocks, including: | |
| 41 | + * | |
| 42 | + * - GTOD (clock monotomic) | |
| 15 | 43 | * - sched_clock() |
| 16 | 44 | * - explicit idle events |
| 17 | 45 | * |
| 18 | - * We use gtod as base and the unstable clock deltas. The deltas are filtered, | |
| 19 | - * making it monotonic and keeping it within an expected window. | |
| 46 | + * We use GTOD as base and use sched_clock() deltas to improve resolution. The | |
| 47 | + * deltas are filtered to provide monotonicity and keeping it within an | |
| 48 | + * expected window. | |
| 20 | 49 | * |
| 21 | 50 | * Furthermore, explicit sleep and wakeup hooks allow us to account for time |
| 22 | 51 | * that is otherwise invisible (TSC gets stopped). |
| 23 | 52 | * |
| 24 | - * The clock: sched_clock_cpu() is monotonic per cpu, and should be somewhat | |
| 25 | - * consistent between cpus (never more than 2 jiffies difference). | |
| 53 | + * | |
| 54 | + * Notes: | |
| 55 | + * | |
| 56 | + * The !IRQ-safetly of sched_clock() and sched_clock_cpu() comes from things | |
| 57 | + * like cpufreq interrupts that can change the base clock (TSC) multiplier | |
| 58 | + * and cause funny jumps in time -- although the filtering provided by | |
| 59 | + * sched_clock_cpu() should mitigate serious artifacts we cannot rely on it | |
| 60 | + * in general since for !CONFIG_HAVE_UNSTABLE_SCHED_CLOCK we fully rely on | |
| 61 | + * sched_clock(). | |
| 26 | 62 | */ |
| 27 | 63 | #include <linux/spinlock.h> |
| 28 | 64 | #include <linux/hardirq.h> |
| ... | ... | @@ -170,6 +206,11 @@ |
| 170 | 206 | return val; |
| 171 | 207 | } |
| 172 | 208 | |
| 209 | +/* | |
| 210 | + * Similar to cpu_clock(), but requires local IRQs to be disabled. | |
| 211 | + * | |
| 212 | + * See cpu_clock(). | |
| 213 | + */ | |
| 173 | 214 | u64 sched_clock_cpu(int cpu) |
| 174 | 215 | { |
| 175 | 216 | struct sched_clock_data *scd; |
| 176 | 217 | |
| ... | ... | @@ -237,9 +278,19 @@ |
| 237 | 278 | } |
| 238 | 279 | EXPORT_SYMBOL_GPL(sched_clock_idle_wakeup_event); |
| 239 | 280 | |
| 240 | -unsigned long long cpu_clock(int cpu) | |
| 281 | +/* | |
| 282 | + * As outlined at the top, provides a fast, high resolution, nanosecond | |
| 283 | + * time source that is monotonic per cpu argument and has bounded drift | |
| 284 | + * between cpus. | |
| 285 | + * | |
| 286 | + * ######################### BIG FAT WARNING ########################## | |
| 287 | + * # when comparing cpu_clock(i) to cpu_clock(j) for i != j, time can # | |
| 288 | + * # go backwards !! # | |
| 289 | + * #################################################################### | |
| 290 | + */ | |
| 291 | +u64 cpu_clock(int cpu) | |
| 241 | 292 | { |
| 242 | - unsigned long long clock; | |
| 293 | + u64 clock; | |
| 243 | 294 | unsigned long flags; |
| 244 | 295 | |
| 245 | 296 | local_irq_save(flags); |
| ... | ... | @@ -249,6 +300,25 @@ |
| 249 | 300 | return clock; |
| 250 | 301 | } |
| 251 | 302 | |
| 303 | +/* | |
| 304 | + * Similar to cpu_clock() for the current cpu. Time will only be observed | |
| 305 | + * to be monotonic if care is taken to only compare timestampt taken on the | |
| 306 | + * same CPU. | |
| 307 | + * | |
| 308 | + * See cpu_clock(). | |
| 309 | + */ | |
| 310 | +u64 local_clock(void) | |
| 311 | +{ | |
| 312 | + u64 clock; | |
| 313 | + unsigned long flags; | |
| 314 | + | |
| 315 | + local_irq_save(flags); | |
| 316 | + clock = sched_clock_cpu(smp_processor_id()); | |
| 317 | + local_irq_restore(flags); | |
| 318 | + | |
| 319 | + return clock; | |
| 320 | +} | |
| 321 | + | |
| 252 | 322 | #else /* CONFIG_HAVE_UNSTABLE_SCHED_CLOCK */ |
| 253 | 323 | |
| 254 | 324 | void sched_clock_init(void) |
| 255 | 325 | |
| 256 | 326 | |
| ... | ... | @@ -264,13 +334,18 @@ |
| 264 | 334 | return sched_clock(); |
| 265 | 335 | } |
| 266 | 336 | |
| 267 | - | |
| 268 | -unsigned long long cpu_clock(int cpu) | |
| 337 | +u64 cpu_clock(int cpu) | |
| 269 | 338 | { |
| 270 | 339 | return sched_clock_cpu(cpu); |
| 271 | 340 | } |
| 272 | 341 | |
| 342 | +u64 local_clock(void) | |
| 343 | +{ | |
| 344 | + return sched_clock_cpu(0); | |
| 345 | +} | |
| 346 | + | |
| 273 | 347 | #endif /* CONFIG_HAVE_UNSTABLE_SCHED_CLOCK */ |
| 274 | 348 | |
| 275 | 349 | EXPORT_SYMBOL_GPL(cpu_clock); |
| 350 | +EXPORT_SYMBOL_GPL(local_clock); |