Commit c676329abb2b8359d9a5d734dec0c81779823fd6

Authored by Peter Zijlstra
Committed by Ingo Molnar
1 parent 95ae3c59fa

sched_clock: Add local_clock() API and improve documentation

For people who otherwise get to write: cpu_clock(smp_processor_id()),
there is now: local_clock().

Also, as per suggestion from Andrew, provide some documentation on
the various clock interfaces, and minimize the unsigned long long vs
u64 mess.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Jens Axboe <jaxboe@fusionio.com>
LKML-Reference: <1275052414.1645.52.camel@laptop>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

Showing 8 changed files with 113 additions and 34 deletions Side-by-side Diff

arch/parisc/kernel/ftrace.c
... ... @@ -82,7 +82,7 @@
82 82 unsigned long ret;
83 83  
84 84 pop_return_trace(&trace, &ret);
85   - trace.rettime = cpu_clock(raw_smp_processor_id());
  85 + trace.rettime = local_clock();
86 86 ftrace_graph_return(&trace);
87 87  
88 88 if (unlikely(!ret)) {
... ... @@ -126,7 +126,7 @@
126 126 return;
127 127 }
128 128  
129   - calltime = cpu_clock(raw_smp_processor_id());
  129 + calltime = local_clock();
130 130  
131 131 if (push_return_trace(old, calltime,
132 132 self_addr, &trace.depth) == -EBUSY) {
include/linux/sched.h
... ... @@ -1791,20 +1791,23 @@
1791 1791 #endif
1792 1792  
1793 1793 /*
1794   - * Architectures can set this to 1 if they have specified
1795   - * CONFIG_HAVE_UNSTABLE_SCHED_CLOCK in their arch Kconfig,
1796   - * but then during bootup it turns out that sched_clock()
1797   - * is reliable after all:
  1794 + * Do not use outside of architecture code which knows its limitations.
  1795 + *
  1796 + * sched_clock() has no promise of monotonicity or bounded drift between
  1797 + * CPUs, use (which you should not) requires disabling IRQs.
  1798 + *
  1799 + * Please use one of the three interfaces below.
1798 1800 */
1799   -#ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
1800   -extern int sched_clock_stable;
1801   -#endif
1802   -
1803   -/* ftrace calls sched_clock() directly */
1804 1801 extern unsigned long long notrace sched_clock(void);
  1802 +/*
  1803 + * See the comment in kernel/sched_clock.c
  1804 + */
  1805 +extern u64 cpu_clock(int cpu);
  1806 +extern u64 local_clock(void);
  1807 +extern u64 sched_clock_cpu(int cpu);
1805 1808  
  1809 +
1806 1810 extern void sched_clock_init(void);
1807   -extern u64 sched_clock_cpu(int cpu);
1808 1811  
1809 1812 #ifndef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
1810 1813 static inline void sched_clock_tick(void)
1811 1814  
... ... @@ -1819,16 +1822,18 @@
1819 1822 {
1820 1823 }
1821 1824 #else
  1825 +/*
  1826 + * Architectures can set this to 1 if they have specified
  1827 + * CONFIG_HAVE_UNSTABLE_SCHED_CLOCK in their arch Kconfig,
  1828 + * but then during bootup it turns out that sched_clock()
  1829 + * is reliable after all:
  1830 + */
  1831 +extern int sched_clock_stable;
  1832 +
1822 1833 extern void sched_clock_tick(void);
1823 1834 extern void sched_clock_idle_sleep_event(void);
1824 1835 extern void sched_clock_idle_wakeup_event(u64 delta_ns);
1825 1836 #endif
1826   -
1827   -/*
1828   - * For kernel-internal use: high-speed (but slightly incorrect) per-cpu
1829   - * clock constructed from sched_clock():
1830   - */
1831   -extern unsigned long long cpu_clock(int cpu);
1832 1837  
1833 1838 extern unsigned long long
1834 1839 task_sched_runtime(struct task_struct *task);
... ... @@ -146,7 +146,7 @@
146 146  
147 147 static inline u64 lockstat_clock(void)
148 148 {
149   - return cpu_clock(smp_processor_id());
  149 + return local_clock();
150 150 }
151 151  
152 152 static int lock_point(unsigned long points[], unsigned long ip)
... ... @@ -214,7 +214,7 @@
214 214  
215 215 static inline u64 perf_clock(void)
216 216 {
217   - return cpu_clock(raw_smp_processor_id());
  217 + return local_clock();
218 218 }
219 219  
220 220 /*
... ... @@ -239,8 +239,7 @@
239 239 rcu_random(struct rcu_random_state *rrsp)
240 240 {
241 241 if (--rrsp->rrs_count < 0) {
242   - rrsp->rrs_state +=
243   - (unsigned long)cpu_clock(raw_smp_processor_id());
  242 + rrsp->rrs_state += (unsigned long)local_clock();
244 243 rrsp->rrs_count = RCU_RANDOM_REFRESH;
245 244 }
246 245 rrsp->rrs_state = rrsp->rrs_state * RCU_RANDOM_MULT + RCU_RANDOM_ADD;
... ... @@ -1647,7 +1647,7 @@
1647 1647 if (root_task_group_empty())
1648 1648 return;
1649 1649  
1650   - now = cpu_clock(raw_smp_processor_id());
  1650 + now = local_clock();
1651 1651 elapsed = now - sd->last_update;
1652 1652  
1653 1653 if (elapsed >= (s64)(u64)sysctl_sched_shares_ratelimit) {
kernel/sched_clock.c
... ... @@ -10,19 +10,55 @@
10 10 * Ingo Molnar <mingo@redhat.com>
11 11 * Guillaume Chazarain <guichaz@gmail.com>
12 12 *
13   - * Create a semi stable clock from a mixture of other events, including:
14   - * - gtod
  13 + *
  14 + * What:
  15 + *
  16 + * cpu_clock(i) provides a fast (execution time) high resolution
  17 + * clock with bounded drift between CPUs. The value of cpu_clock(i)
  18 + * is monotonic for constant i. The timestamp returned is in nanoseconds.
  19 + *
  20 + * ######################### BIG FAT WARNING ##########################
  21 + * # when comparing cpu_clock(i) to cpu_clock(j) for i != j, time can #
  22 + * # go backwards !! #
  23 + * ####################################################################
  24 + *
  25 + * There is no strict promise about the base, although it tends to start
  26 + * at 0 on boot (but people really shouldn't rely on that).
  27 + *
  28 + * cpu_clock(i) -- can be used from any context, including NMI.
  29 + * sched_clock_cpu(i) -- must be used with local IRQs disabled (implied by NMI)
  30 + * local_clock() -- is cpu_clock() on the current cpu.
  31 + *
  32 + * How:
  33 + *
  34 + * The implementation either uses sched_clock() when
  35 + * !CONFIG_HAVE_UNSTABLE_SCHED_CLOCK, which means in that case the
  36 + * sched_clock() is assumed to provide these properties (mostly it means
  37 + * the architecture provides a globally synchronized highres time source).
  38 + *
  39 + * Otherwise it tries to create a semi stable clock from a mixture of other
  40 + * clocks, including:
  41 + *
  42 + * - GTOD (clock monotomic)
15 43 * - sched_clock()
16 44 * - explicit idle events
17 45 *
18   - * We use gtod as base and the unstable clock deltas. The deltas are filtered,
19   - * making it monotonic and keeping it within an expected window.
  46 + * We use GTOD as base and use sched_clock() deltas to improve resolution. The
  47 + * deltas are filtered to provide monotonicity and keeping it within an
  48 + * expected window.
20 49 *
21 50 * Furthermore, explicit sleep and wakeup hooks allow us to account for time
22 51 * that is otherwise invisible (TSC gets stopped).
23 52 *
24   - * The clock: sched_clock_cpu() is monotonic per cpu, and should be somewhat
25   - * consistent between cpus (never more than 2 jiffies difference).
  53 + *
  54 + * Notes:
  55 + *
  56 + * The !IRQ-safetly of sched_clock() and sched_clock_cpu() comes from things
  57 + * like cpufreq interrupts that can change the base clock (TSC) multiplier
  58 + * and cause funny jumps in time -- although the filtering provided by
  59 + * sched_clock_cpu() should mitigate serious artifacts we cannot rely on it
  60 + * in general since for !CONFIG_HAVE_UNSTABLE_SCHED_CLOCK we fully rely on
  61 + * sched_clock().
26 62 */
27 63 #include <linux/spinlock.h>
28 64 #include <linux/hardirq.h>
... ... @@ -170,6 +206,11 @@
170 206 return val;
171 207 }
172 208  
  209 +/*
  210 + * Similar to cpu_clock(), but requires local IRQs to be disabled.
  211 + *
  212 + * See cpu_clock().
  213 + */
173 214 u64 sched_clock_cpu(int cpu)
174 215 {
175 216 struct sched_clock_data *scd;
176 217  
... ... @@ -237,9 +278,19 @@
237 278 }
238 279 EXPORT_SYMBOL_GPL(sched_clock_idle_wakeup_event);
239 280  
240   -unsigned long long cpu_clock(int cpu)
  281 +/*
  282 + * As outlined at the top, provides a fast, high resolution, nanosecond
  283 + * time source that is monotonic per cpu argument and has bounded drift
  284 + * between cpus.
  285 + *
  286 + * ######################### BIG FAT WARNING ##########################
  287 + * # when comparing cpu_clock(i) to cpu_clock(j) for i != j, time can #
  288 + * # go backwards !! #
  289 + * ####################################################################
  290 + */
  291 +u64 cpu_clock(int cpu)
241 292 {
242   - unsigned long long clock;
  293 + u64 clock;
243 294 unsigned long flags;
244 295  
245 296 local_irq_save(flags);
... ... @@ -249,6 +300,25 @@
249 300 return clock;
250 301 }
251 302  
  303 +/*
  304 + * Similar to cpu_clock() for the current cpu. Time will only be observed
  305 + * to be monotonic if care is taken to only compare timestampt taken on the
  306 + * same CPU.
  307 + *
  308 + * See cpu_clock().
  309 + */
  310 +u64 local_clock(void)
  311 +{
  312 + u64 clock;
  313 + unsigned long flags;
  314 +
  315 + local_irq_save(flags);
  316 + clock = sched_clock_cpu(smp_processor_id());
  317 + local_irq_restore(flags);
  318 +
  319 + return clock;
  320 +}
  321 +
252 322 #else /* CONFIG_HAVE_UNSTABLE_SCHED_CLOCK */
253 323  
254 324 void sched_clock_init(void)
255 325  
256 326  
... ... @@ -264,13 +334,18 @@
264 334 return sched_clock();
265 335 }
266 336  
267   -
268   -unsigned long long cpu_clock(int cpu)
  337 +u64 cpu_clock(int cpu)
269 338 {
270 339 return sched_clock_cpu(cpu);
271 340 }
272 341  
  342 +u64 local_clock(void)
  343 +{
  344 + return sched_clock_cpu(0);
  345 +}
  346 +
273 347 #endif /* CONFIG_HAVE_UNSTABLE_SCHED_CLOCK */
274 348  
275 349 EXPORT_SYMBOL_GPL(cpu_clock);
  350 +EXPORT_SYMBOL_GPL(local_clock);
kernel/trace/trace_clock.c
... ... @@ -56,7 +56,7 @@
56 56 */
57 57 u64 notrace trace_clock(void)
58 58 {
59   - return cpu_clock(raw_smp_processor_id());
  59 + return local_clock();
60 60 }
61 61  
62 62