Commit 89a1e18731959e9953fae15ddc1a983eb15a4f19

Authored by Peter Zijlstra
Committed by Ingo Molnar
1 parent 8dc85d5472

perf: Provide a separate task context for swevents

Since software events are always schedulable, mixing them up with
hardware events (who are not) can lead to funny scheduling oddities.

Giving them their own context solves this.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: paulus <paulus@samba.org>
Cc: stephane eranian <eranian@googlemail.com>
Cc: Robert Richter <robert.richter@amd.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Lin Ming <ming.m.lin@intel.com>
Cc: Yanmin <yanmin_zhang@linux.intel.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

Showing 4 changed files with 33 additions and 19 deletions Side-by-side Diff

include/linux/perf_event.h
... ... @@ -952,14 +952,7 @@
952 952 */
953 953 static inline int is_software_event(struct perf_event *event)
954 954 {
955   - switch (event->attr.type) {
956   - case PERF_TYPE_SOFTWARE:
957   - case PERF_TYPE_TRACEPOINT:
958   - /* for now the breakpoint stuff also works as software event */
959   - case PERF_TYPE_BREAKPOINT:
960   - return 1;
961   - }
962   - return 0;
  955 + return event->pmu->task_ctx_nr == perf_sw_context;
963 956 }
964 957  
965 958 extern atomic_t perf_swevent_enabled[PERF_COUNT_SW_MAX];
include/linux/sched.h
... ... @@ -1163,6 +1163,7 @@
1163 1163 enum perf_event_task_context {
1164 1164 perf_invalid_context = -1,
1165 1165 perf_hw_context = 0,
  1166 + perf_sw_context,
1166 1167 perf_nr_task_contexts,
1167 1168 };
1168 1169  
kernel/hw_breakpoint.c
... ... @@ -610,6 +610,8 @@
610 610 }
611 611  
612 612 static struct pmu perf_breakpoint = {
  613 + .task_ctx_nr = perf_sw_context, /* could eventually get its own */
  614 +
613 615 .event_init = hw_breakpoint_event_init,
614 616 .add = hw_breakpoint_add,
615 617 .del = hw_breakpoint_del,
... ... @@ -4709,6 +4709,8 @@
4709 4709 }
4710 4710  
4711 4711 static struct pmu perf_swevent = {
  4712 + .task_ctx_nr = perf_sw_context,
  4713 +
4712 4714 .event_init = perf_swevent_init,
4713 4715 .add = perf_swevent_add,
4714 4716 .del = perf_swevent_del,
... ... @@ -4800,6 +4802,8 @@
4800 4802 }
4801 4803  
4802 4804 static struct pmu perf_tracepoint = {
  4805 + .task_ctx_nr = perf_sw_context,
  4806 +
4803 4807 .event_init = perf_tp_event_init,
4804 4808 .add = perf_trace_add,
4805 4809 .del = perf_trace_del,
... ... @@ -4988,6 +4992,8 @@
4988 4992 }
4989 4993  
4990 4994 static struct pmu perf_cpu_clock = {
  4995 + .task_ctx_nr = perf_sw_context,
  4996 +
4991 4997 .event_init = cpu_clock_event_init,
4992 4998 .add = cpu_clock_event_add,
4993 4999 .del = cpu_clock_event_del,
... ... @@ -5063,6 +5069,8 @@
5063 5069 }
5064 5070  
5065 5071 static struct pmu perf_task_clock = {
  5072 + .task_ctx_nr = perf_sw_context,
  5073 +
5066 5074 .event_init = task_clock_event_init,
5067 5075 .add = task_clock_event_add,
5068 5076 .del = task_clock_event_del,
... ... @@ -5490,6 +5498,7 @@
5490 5498 struct perf_event_context *ctx;
5491 5499 struct file *event_file = NULL;
5492 5500 struct file *group_file = NULL;
  5501 + struct pmu *pmu;
5493 5502 int event_fd;
5494 5503 int fput_needed = 0;
5495 5504 int err;
5496 5505  
... ... @@ -5522,20 +5531,11 @@
5522 5531 goto err_fd;
5523 5532 }
5524 5533  
5525   - /*
5526   - * Get the target context (task or percpu):
5527   - */
5528   - ctx = find_get_context(event->pmu, pid, cpu);
5529   - if (IS_ERR(ctx)) {
5530   - err = PTR_ERR(ctx);
5531   - goto err_alloc;
5532   - }
5533   -
5534 5534 if (group_fd != -1) {
5535 5535 group_leader = perf_fget_light(group_fd, &fput_needed);
5536 5536 if (IS_ERR(group_leader)) {
5537 5537 err = PTR_ERR(group_leader);
5538   - goto err_context;
  5538 + goto err_alloc;
5539 5539 }
5540 5540 group_file = group_leader->filp;
5541 5541 if (flags & PERF_FLAG_FD_OUTPUT)
... ... @@ -5545,6 +5545,23 @@
5545 5545 }
5546 5546  
5547 5547 /*
  5548 + * Special case software events and allow them to be part of
  5549 + * any hardware group.
  5550 + */
  5551 + pmu = event->pmu;
  5552 + if ((pmu->task_ctx_nr == perf_sw_context) && group_leader)
  5553 + pmu = group_leader->pmu;
  5554 +
  5555 + /*
  5556 + * Get the target context (task or percpu):
  5557 + */
  5558 + ctx = find_get_context(pmu, pid, cpu);
  5559 + if (IS_ERR(ctx)) {
  5560 + err = PTR_ERR(ctx);
  5561 + goto err_group_fd;
  5562 + }
  5563 +
  5564 + /*
5548 5565 * Look up the group leader (we will attach this event to it):
5549 5566 */
5550 5567 if (group_leader) {
5551 5568  
... ... @@ -5605,8 +5622,9 @@
5605 5622 return event_fd;
5606 5623  
5607 5624 err_context:
5608   - fput_light(group_file, fput_needed);
5609 5625 put_ctx(ctx);
  5626 +err_group_fd:
  5627 + fput_light(group_file, fput_needed);
5610 5628 err_alloc:
5611 5629 free_event(event);
5612 5630 err_fd: