Commit 25e41933b58777f2d020c3b0186b430ea004ec28

Authored by Thomas Renninger
Committed by Ingo Molnar
1 parent 61a0d49c33

perf: Clean up power events by introducing new, more generic ones

Add these new power trace events:

 power:cpu_idle
 power:cpu_frequency
 power:machine_suspend

The old C-state/idle accounting events:
  power:power_start
  power:power_end

Have now a replacement (but we are still keeping the old
tracepoints for compatibility):

  power:cpu_idle

and
  power:power_frequency

is replaced with:
  power:cpu_frequency

power:machine_suspend is newly introduced.

Jean Pihet has a patch integrated into the generic layer
(kernel/power/suspend.c) which will make use of it.

the type= field got removed from both, it was never
used and the type is differed by the event type itself.

perf timechart userspace tool gets adjusted in a separate patch.

Signed-off-by: Thomas Renninger <trenn@suse.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Acked-by: Arjan van de Ven <arjan@linux.intel.com>
Acked-by: Jean Pihet <jean.pihet@newoldbits.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: rjw@sisk.pl
LKML-Reference: <1294073445-14812-3-git-send-email-trenn@suse.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
LKML-Reference: <1290072314-31155-2-git-send-email-trenn@suse.de>

Showing 9 changed files with 119 additions and 11 deletions Side-by-side Diff

arch/x86/kernel/process.c
... ... @@ -373,6 +373,7 @@
373 373 {
374 374 if (hlt_use_halt()) {
375 375 trace_power_start(POWER_CSTATE, 1, smp_processor_id());
  376 + trace_cpu_idle(1, smp_processor_id());
376 377 current_thread_info()->status &= ~TS_POLLING;
377 378 /*
378 379 * TS_POLLING-cleared state must be visible before we
... ... @@ -443,6 +444,7 @@
443 444 void mwait_idle_with_hints(unsigned long ax, unsigned long cx)
444 445 {
445 446 trace_power_start(POWER_CSTATE, (ax>>4)+1, smp_processor_id());
  447 + trace_cpu_idle((ax>>4)+1, smp_processor_id());
446 448 if (!need_resched()) {
447 449 if (cpu_has(&current_cpu_data, X86_FEATURE_CLFLUSH_MONITOR))
448 450 clflush((void *)&current_thread_info()->flags);
... ... @@ -459,6 +461,7 @@
459 461 {
460 462 if (!need_resched()) {
461 463 trace_power_start(POWER_CSTATE, 1, smp_processor_id());
  464 + trace_cpu_idle(1, smp_processor_id());
462 465 if (cpu_has(&current_cpu_data, X86_FEATURE_CLFLUSH_MONITOR))
463 466 clflush((void *)&current_thread_info()->flags);
464 467  
465 468  
... ... @@ -480,10 +483,12 @@
480 483 static void poll_idle(void)
481 484 {
482 485 trace_power_start(POWER_CSTATE, 0, smp_processor_id());
  486 + trace_cpu_idle(0, smp_processor_id());
483 487 local_irq_enable();
484 488 while (!need_resched())
485 489 cpu_relax();
486   - trace_power_end(0);
  490 + trace_power_end(smp_processor_id());
  491 + trace_cpu_idle(PWR_EVENT_EXIT, smp_processor_id());
487 492 }
488 493  
489 494 /*
arch/x86/kernel/process_32.c
... ... @@ -113,8 +113,8 @@
113 113 stop_critical_timings();
114 114 pm_idle();
115 115 start_critical_timings();
116   -
117 116 trace_power_end(smp_processor_id());
  117 + trace_cpu_idle(PWR_EVENT_EXIT, smp_processor_id());
118 118 }
119 119 tick_nohz_restart_sched_tick();
120 120 preempt_enable_no_resched();
arch/x86/kernel/process_64.c
... ... @@ -142,6 +142,8 @@
142 142 start_critical_timings();
143 143  
144 144 trace_power_end(smp_processor_id());
  145 + trace_cpu_idle(PWR_EVENT_EXIT,
  146 + smp_processor_id());
145 147  
146 148 /* In many cases the interrupt that ended idle
147 149 has already called exit_idle. But some idle
drivers/cpufreq/cpufreq.c
... ... @@ -355,6 +355,7 @@
355 355 dprintk("FREQ: %lu - CPU: %lu", (unsigned long)freqs->new,
356 356 (unsigned long)freqs->cpu);
357 357 trace_power_frequency(POWER_PSTATE, freqs->new, freqs->cpu);
  358 + trace_cpu_frequency(freqs->new, freqs->cpu);
358 359 srcu_notifier_call_chain(&cpufreq_transition_notifier_list,
359 360 CPUFREQ_POSTCHANGE, freqs);
360 361 if (likely(policy) && likely(policy->cpu == freqs->cpu))
drivers/cpuidle/cpuidle.c
... ... @@ -107,6 +107,7 @@
107 107 if (cpuidle_curr_governor->reflect)
108 108 cpuidle_curr_governor->reflect(dev);
109 109 trace_power_end(smp_processor_id());
  110 + trace_cpu_idle(PWR_EVENT_EXIT, smp_processor_id());
110 111 }
111 112  
112 113 /**
drivers/idle/intel_idle.c
... ... @@ -221,6 +221,7 @@
221 221  
222 222 stop_critical_timings();
223 223 trace_power_start(POWER_CSTATE, (eax >> 4) + 1, cpu);
  224 + trace_cpu_idle((eax >> 4) + 1, cpu);
224 225 if (!need_resched()) {
225 226  
226 227 __monitor((void *)&current_thread_info()->flags, 0, 0);
include/trace/events/power.h
... ... @@ -7,16 +7,67 @@
7 7 #include <linux/ktime.h>
8 8 #include <linux/tracepoint.h>
9 9  
10   -#ifndef _TRACE_POWER_ENUM_
11   -#define _TRACE_POWER_ENUM_
12   -enum {
13   - POWER_NONE = 0,
14   - POWER_CSTATE = 1, /* C-State */
15   - POWER_PSTATE = 2, /* Fequency change or DVFS */
16   - POWER_SSTATE = 3, /* Suspend */
17   -};
  10 +DECLARE_EVENT_CLASS(cpu,
  11 +
  12 + TP_PROTO(unsigned int state, unsigned int cpu_id),
  13 +
  14 + TP_ARGS(state, cpu_id),
  15 +
  16 + TP_STRUCT__entry(
  17 + __field( u32, state )
  18 + __field( u32, cpu_id )
  19 + ),
  20 +
  21 + TP_fast_assign(
  22 + __entry->state = state;
  23 + __entry->cpu_id = cpu_id;
  24 + ),
  25 +
  26 + TP_printk("state=%lu cpu_id=%lu", (unsigned long)__entry->state,
  27 + (unsigned long)__entry->cpu_id)
  28 +);
  29 +
  30 +DEFINE_EVENT(cpu, cpu_idle,
  31 +
  32 + TP_PROTO(unsigned int state, unsigned int cpu_id),
  33 +
  34 + TP_ARGS(state, cpu_id)
  35 +);
  36 +
  37 +/* This file can get included multiple times, TRACE_HEADER_MULTI_READ at top */
  38 +#ifndef _PWR_EVENT_AVOID_DOUBLE_DEFINING
  39 +#define _PWR_EVENT_AVOID_DOUBLE_DEFINING
  40 +
  41 +#define PWR_EVENT_EXIT -1
18 42 #endif
19 43  
  44 +DEFINE_EVENT(cpu, cpu_frequency,
  45 +
  46 + TP_PROTO(unsigned int frequency, unsigned int cpu_id),
  47 +
  48 + TP_ARGS(frequency, cpu_id)
  49 +);
  50 +
  51 +TRACE_EVENT(machine_suspend,
  52 +
  53 + TP_PROTO(unsigned int state),
  54 +
  55 + TP_ARGS(state),
  56 +
  57 + TP_STRUCT__entry(
  58 + __field( u32, state )
  59 + ),
  60 +
  61 + TP_fast_assign(
  62 + __entry->state = state;
  63 + ),
  64 +
  65 + TP_printk("state=%lu", (unsigned long)__entry->state)
  66 +);
  67 +
  68 +/* This code will be removed after deprecation time exceeded (2.6.41) */
  69 +#ifdef CONFIG_EVENT_POWER_TRACING_DEPRECATED
  70 +
20 71 /*
21 72 * The power events are used for cpuidle & suspend (power_start, power_end)
22 73 * and for cpufreq (power_frequency)
... ... @@ -75,6 +126,36 @@
75 126  
76 127 );
77 128  
  129 +/* Deprecated dummy functions must be protected against multi-declartion */
  130 +#ifndef _PWR_EVENT_AVOID_DOUBLE_DEFINING_DEPRECATED
  131 +#define _PWR_EVENT_AVOID_DOUBLE_DEFINING_DEPRECATED
  132 +
  133 +enum {
  134 + POWER_NONE = 0,
  135 + POWER_CSTATE = 1,
  136 + POWER_PSTATE = 2,
  137 +};
  138 +#endif /* _PWR_EVENT_AVOID_DOUBLE_DEFINING_DEPRECATED */
  139 +
  140 +#else /* CONFIG_EVENT_POWER_TRACING_DEPRECATED */
  141 +
  142 +#ifndef _PWR_EVENT_AVOID_DOUBLE_DEFINING_DEPRECATED
  143 +#define _PWR_EVENT_AVOID_DOUBLE_DEFINING_DEPRECATED
  144 +enum {
  145 + POWER_NONE = 0,
  146 + POWER_CSTATE = 1,
  147 + POWER_PSTATE = 2,
  148 +};
  149 +
  150 +/* These dummy declaration have to be ripped out when the deprecated
  151 + events get removed */
  152 +static inline void trace_power_start(u64 type, u64 state, u64 cpuid) {};
  153 +static inline void trace_power_end(u64 cpuid) {};
  154 +static inline void trace_power_frequency(u64 type, u64 state, u64 cpuid) {};
  155 +#endif /* _PWR_EVENT_AVOID_DOUBLE_DEFINING_DEPRECATED */
  156 +
  157 +#endif /* CONFIG_EVENT_POWER_TRACING_DEPRECATED */
  158 +
78 159 /*
79 160 * The clock events are used for clock enable/disable and for
80 161 * clock rate change
... ... @@ -153,7 +234,6 @@
153 234  
154 235 TP_ARGS(name, state, cpu_id)
155 236 );
156   -
157 237 #endif /* _TRACE_POWER_H */
158 238  
159 239 /* This part must be outside protection */
kernel/trace/Kconfig
... ... @@ -69,6 +69,21 @@
69 69 select CONTEXT_SWITCH_TRACER
70 70 bool
71 71  
  72 +config EVENT_POWER_TRACING_DEPRECATED
  73 + depends on EVENT_TRACING
  74 + bool "Deprecated power event trace API, to be removed"
  75 + default y
  76 + help
  77 + Provides old power event types:
  78 + C-state/idle accounting events:
  79 + power:power_start
  80 + power:power_end
  81 + and old cpufreq accounting event:
  82 + power:power_frequency
  83 + This is for userspace compatibility
  84 + and will vanish after 5 kernel iterations,
  85 + namely 2.6.41.
  86 +
72 87 config CONTEXT_SWITCH_TRACER
73 88 bool
74 89  
kernel/trace/power-traces.c
... ... @@ -13,5 +13,8 @@
13 13 #define CREATE_TRACE_POINTS
14 14 #include <trace/events/power.h>
15 15  
  16 +#ifdef EVENT_POWER_TRACING_DEPRECATED
16 17 EXPORT_TRACEPOINT_SYMBOL_GPL(power_start);
  18 +#endif
  19 +EXPORT_TRACEPOINT_SYMBOL_GPL(cpu_idle);