Commit f91298709790b9a483752ca3c967845537df2af3

Authored by Cyrill Gorcunov
Committed by Steven Rostedt
1 parent 4a9bd3f134

perf, x86: P4 PMU - Introduce event alias feature

Instead of hw_nmi_watchdog_set_attr() weak function
and appropriate x86_pmu::hw_watchdog_set_attr() call
we introduce even alias mechanism which allow us
to drop this routines completely and isolate quirks
of Netburst architecture inside P4 PMU code only.

The main idea remains the same though -- to allow
nmi-watchdog and perf top run simultaneously.

Note the aliasing mechanism applies to generic
PERF_COUNT_HW_CPU_CYCLES event only because arbitrary
event (say passed as RAW initially) might have some
additional bits set inside ESCR register changing
the behaviour of event and we can't guarantee anymore
that alias event will give the same result.

P.S. Thanks a huge to Don and Steven for for testing
     and early review.

Acked-by: Don Zickus <dzickus@redhat.com>
Tested-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
CC: Ingo Molnar <mingo@elte.hu>
CC: Peter Zijlstra <a.p.zijlstra@chello.nl>
CC: Stephane Eranian <eranian@google.com>
CC: Lin Ming <ming.m.lin@intel.com>
CC: Arnaldo Carvalho de Melo <acme@redhat.com>
CC: Frederic Weisbecker <fweisbec@gmail.com>
Link: http://lkml.kernel.org/r/20110708201712.GS23657@sun
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>

Showing 4 changed files with 139 additions and 38 deletions Side-by-side Diff

arch/x86/include/asm/perf_event_p4.h
... ... @@ -102,6 +102,14 @@
102 102 #define P4_CONFIG_HT (1ULL << P4_CONFIG_HT_SHIFT)
103 103  
104 104 /*
  105 + * If an event has alias it should be marked
  106 + * with a special bit. (Don't forget to check
  107 + * P4_PEBS_CONFIG_MASK and related bits on
  108 + * modification.)
  109 + */
  110 +#define P4_CONFIG_ALIASABLE (1 << 9)
  111 +
  112 +/*
105 113 * The bits we allow to pass for RAW events
106 114 */
107 115 #define P4_CONFIG_MASK_ESCR \
... ... @@ -122,6 +130,31 @@
122 130 #define P4_CONFIG_MASK \
123 131 (p4_config_pack_escr(P4_CONFIG_MASK_ESCR)) | \
124 132 (p4_config_pack_cccr(P4_CONFIG_MASK_CCCR))
  133 +
  134 +/*
  135 + * In case of event aliasing we need to preserve some
  136 + * caller bits otherwise the mapping won't be complete.
  137 + */
  138 +#define P4_CONFIG_EVENT_ALIAS_MASK \
  139 + (p4_config_pack_escr(P4_CONFIG_MASK_ESCR) | \
  140 + p4_config_pack_cccr(P4_CCCR_EDGE | \
  141 + P4_CCCR_THRESHOLD_MASK | \
  142 + P4_CCCR_COMPLEMENT | \
  143 + P4_CCCR_COMPARE))
  144 +
  145 +#define P4_CONFIG_EVENT_ALIAS_IMMUTABLE_BITS \
  146 + ((P4_CONFIG_HT) | \
  147 + p4_config_pack_escr(P4_ESCR_T0_OS | \
  148 + P4_ESCR_T0_USR | \
  149 + P4_ESCR_T1_OS | \
  150 + P4_ESCR_T1_USR) | \
  151 + p4_config_pack_cccr(P4_CCCR_OVF | \
  152 + P4_CCCR_CASCADE | \
  153 + P4_CCCR_FORCE_OVF | \
  154 + P4_CCCR_THREAD_ANY | \
  155 + P4_CCCR_OVF_PMI_T0 | \
  156 + P4_CCCR_OVF_PMI_T1 | \
  157 + P4_CONFIG_ALIASABLE))
125 158  
126 159 static inline bool p4_is_event_cascaded(u64 config)
127 160 {
arch/x86/kernel/cpu/perf_event.c
... ... @@ -274,7 +274,6 @@
274 274 void (*enable_all)(int added);
275 275 void (*enable)(struct perf_event *);
276 276 void (*disable)(struct perf_event *);
277   - void (*hw_watchdog_set_attr)(struct perf_event_attr *attr);
278 277 int (*hw_config)(struct perf_event *event);
279 278 int (*schedule_events)(struct cpu_hw_events *cpuc, int n, int *assign);
280 279 unsigned eventsel;
... ... @@ -359,12 +358,6 @@
359 358 [PERF_COUNT_HW_CACHE_MAX]
360 359 [PERF_COUNT_HW_CACHE_OP_MAX]
361 360 [PERF_COUNT_HW_CACHE_RESULT_MAX];
362   -
363   -void hw_nmi_watchdog_set_attr(struct perf_event_attr *wd_attr)
364   -{
365   - if (x86_pmu.hw_watchdog_set_attr)
366   - x86_pmu.hw_watchdog_set_attr(wd_attr);
367   -}
368 361  
369 362 /*
370 363 * Propagate event elapsed time into the generic event.
arch/x86/kernel/cpu/perf_event_p4.c
... ... @@ -570,11 +570,92 @@
570 570 },
571 571 };
572 572  
  573 +/*
  574 + * Because of Netburst being quite restricted in now
  575 + * many same events can run simultaneously, we use
  576 + * event aliases, ie different events which have the
  577 + * same functionallity but use non-intersected resources
  578 + * (ESCR/CCCR/couter registers). This allow us to run
  579 + * two or more semi-same events together. It is done
  580 + * transparently to a user space.
  581 + *
  582 + * Never set any cusom internal bits such as P4_CONFIG_HT,
  583 + * P4_CONFIG_ALIASABLE or bits for P4_PEBS_METRIC, they are
  584 + * either up-to-dated automatically either not appliable
  585 + * at all.
  586 + *
  587 + * And be really carefull choosing aliases!
  588 + */
  589 +struct p4_event_alias {
  590 + u64 orig;
  591 + u64 alter;
  592 +} p4_event_aliases[] = {
  593 + {
  594 + /*
  595 + * Non-halted cycles can be substituted with
  596 + * non-sleeping cycles (see Intel SDM Vol3b for
  597 + * details).
  598 + */
  599 + .orig =
  600 + p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_GLOBAL_POWER_EVENTS) |
  601 + P4_ESCR_EMASK_BIT(P4_EVENT_GLOBAL_POWER_EVENTS, RUNNING)),
  602 + .alter =
  603 + p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_EXECUTION_EVENT) |
  604 + P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS0)|
  605 + P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS1)|
  606 + P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS2)|
  607 + P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS3)|
  608 + P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS0) |
  609 + P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS1) |
  610 + P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS2) |
  611 + P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS3))|
  612 + p4_config_pack_cccr(P4_CCCR_THRESHOLD(15) | P4_CCCR_COMPLEMENT |
  613 + P4_CCCR_COMPARE),
  614 + },
  615 +};
  616 +
  617 +static u64 p4_get_alias_event(u64 config)
  618 +{
  619 + u64 config_match;
  620 + int i;
  621 +
  622 + /*
  623 + * Probably we're lucky and don't have to do
  624 + * matching over all config bits.
  625 + */
  626 + if (!(config & P4_CONFIG_ALIASABLE))
  627 + return 0;
  628 +
  629 + config_match = config & P4_CONFIG_EVENT_ALIAS_MASK;
  630 +
  631 + /*
  632 + * If an event was previously swapped to the alter config
  633 + * we should swap it back otherwise contnention on registers
  634 + * will return back.
  635 + */
  636 + for (i = 0; i < ARRAY_SIZE(p4_event_aliases); i++) {
  637 + if (config_match == p4_event_aliases[i].orig) {
  638 + config_match = p4_event_aliases[i].alter;
  639 + break;
  640 + } else if (config_match == p4_event_aliases[i].alter) {
  641 + config_match = p4_event_aliases[i].orig;
  642 + break;
  643 + }
  644 + }
  645 +
  646 + if (i >= ARRAY_SIZE(p4_event_aliases))
  647 + return 0;
  648 +
  649 + return config_match |
  650 + (config & P4_CONFIG_EVENT_ALIAS_IMMUTABLE_BITS);
  651 +}
  652 +
573 653 static u64 p4_general_events[PERF_COUNT_HW_MAX] = {
574 654 /* non-halted CPU clocks */
575 655 [PERF_COUNT_HW_CPU_CYCLES] =
576 656 p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_GLOBAL_POWER_EVENTS) |
577   - P4_ESCR_EMASK_BIT(P4_EVENT_GLOBAL_POWER_EVENTS, RUNNING)),
  657 + P4_ESCR_EMASK_BIT(P4_EVENT_GLOBAL_POWER_EVENTS, RUNNING)) |
  658 + P4_CONFIG_ALIASABLE,
578 659  
579 660 /*
580 661 * retired instructions
... ... @@ -719,31 +800,6 @@
719 800 return 0;
720 801 }
721 802  
722   -static void p4_hw_watchdog_set_attr(struct perf_event_attr *wd_attr)
723   -{
724   - /*
725   - * Watchdog ticks are special on Netburst, we use
726   - * that named "non-sleeping" ticks as recommended
727   - * by Intel SDM Vol3b.
728   - */
729   - WARN_ON_ONCE(wd_attr->type != PERF_TYPE_HARDWARE ||
730   - wd_attr->config != PERF_COUNT_HW_CPU_CYCLES);
731   -
732   - wd_attr->type = PERF_TYPE_RAW;
733   - wd_attr->config =
734   - p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_EXECUTION_EVENT) |
735   - P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS0) |
736   - P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS1) |
737   - P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS2) |
738   - P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS3) |
739   - P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS0) |
740   - P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS1) |
741   - P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS2) |
742   - P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS3)) |
743   - p4_config_pack_cccr(P4_CCCR_THRESHOLD(15) | P4_CCCR_COMPLEMENT |
744   - P4_CCCR_COMPARE);
745   -}
746   -
747 803 static int p4_hw_config(struct perf_event *event)
748 804 {
749 805 int cpu = get_cpu();
... ... @@ -1159,6 +1215,8 @@
1159 1215 struct p4_event_bind *bind;
1160 1216 unsigned int i, thread, num;
1161 1217 int cntr_idx, escr_idx;
  1218 + u64 config_alias;
  1219 + int pass;
1162 1220  
1163 1221 bitmap_zero(used_mask, X86_PMC_IDX_MAX);
1164 1222 bitmap_zero(escr_mask, P4_ESCR_MSR_TABLE_SIZE);
... ... @@ -1167,6 +1225,17 @@
1167 1225  
1168 1226 hwc = &cpuc->event_list[i]->hw;
1169 1227 thread = p4_ht_thread(cpu);
  1228 + pass = 0;
  1229 +
  1230 +again:
  1231 + /*
  1232 + * Aliases are swappable so we may hit circular
  1233 + * lock if both original config and alias need
  1234 + * resources (MSR registers) which already busy.
  1235 + */
  1236 + if (pass > 2)
  1237 + goto done;
  1238 +
1170 1239 bind = p4_config_get_bind(hwc->config);
1171 1240 escr_idx = p4_get_escr_idx(bind->escr_msr[thread]);
1172 1241 if (unlikely(escr_idx == -1))
... ... @@ -1180,8 +1249,17 @@
1180 1249 }
1181 1250  
1182 1251 cntr_idx = p4_next_cntr(thread, used_mask, bind);
1183   - if (cntr_idx == -1 || test_bit(escr_idx, escr_mask))
1184   - goto done;
  1252 + if (cntr_idx == -1 || test_bit(escr_idx, escr_mask)) {
  1253 + /*
  1254 + * Probably an event alias is still available.
  1255 + */
  1256 + config_alias = p4_get_alias_event(hwc->config);
  1257 + if (!config_alias)
  1258 + goto done;
  1259 + hwc->config = config_alias;
  1260 + pass++;
  1261 + goto again;
  1262 + }
1185 1263  
1186 1264 p4_pmu_swap_config_ts(hwc, cpu);
1187 1265 if (assign)
... ... @@ -1218,7 +1296,6 @@
1218 1296 .cntval_bits = ARCH_P4_CNTRVAL_BITS,
1219 1297 .cntval_mask = ARCH_P4_CNTRVAL_MASK,
1220 1298 .max_period = (1ULL << (ARCH_P4_CNTRVAL_BITS - 1)) - 1,
1221   - .hw_watchdog_set_attr = p4_hw_watchdog_set_attr,
1222 1299 .hw_config = p4_hw_config,
1223 1300 .schedule_events = p4_pmu_schedule_events,
1224 1301 /*
... ... @@ -200,7 +200,6 @@
200 200 }
201 201  
202 202 #ifdef CONFIG_HARDLOCKUP_DETECTOR
203   -void __weak hw_nmi_watchdog_set_attr(struct perf_event_attr *wd_attr) { }
204 203  
205 204 static struct perf_event_attr wd_hw_attr = {
206 205 .type = PERF_TYPE_HARDWARE,
... ... @@ -372,7 +371,6 @@
372 371  
373 372 wd_attr = &wd_hw_attr;
374 373 wd_attr->sample_period = hw_nmi_get_sample_period(watchdog_thresh);
375   - hw_nmi_watchdog_set_attr(wd_attr);
376 374  
377 375 /* Try to register using hardware perf events */
378 376 event = perf_event_create_kernel_counter(wd_attr, cpu, NULL, watchdog_overflow_callback, NULL);