Blame view
kernel/perf_event.c
116 KB
0793a61d4
|
1 |
/* |
57c0c15b5
|
2 |
* Performance events core code: |
0793a61d4
|
3 |
* |
981445114
|
4 5 6 |
* Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de> * Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar * Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com> |
c5dd016cd
|
7 |
* Copyright © 2009 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com> |
7b732a750
|
8 |
* |
57c0c15b5
|
9 |
* For licensing details see kernel-base/COPYING |
0793a61d4
|
10 11 12 |
*/ #include <linux/fs.h> |
b9cacc7bf
|
13 |
#include <linux/mm.h> |
0793a61d4
|
14 15 |
#include <linux/cpu.h> #include <linux/smp.h> |
04289bb98
|
16 |
#include <linux/file.h> |
0793a61d4
|
17 18 |
#include <linux/poll.h> #include <linux/sysfs.h> |
22a4f650d
|
19 |
#include <linux/dcache.h> |
0793a61d4
|
20 |
#include <linux/percpu.h> |
22a4f650d
|
21 |
#include <linux/ptrace.h> |
b9cacc7bf
|
22 |
#include <linux/vmstat.h> |
906010b21
|
23 |
#include <linux/vmalloc.h> |
b9cacc7bf
|
24 25 |
#include <linux/hardirq.h> #include <linux/rculist.h> |
0793a61d4
|
26 27 28 |
#include <linux/uaccess.h> #include <linux/syscalls.h> #include <linux/anon_inodes.h> |
aa9c4c0f9
|
29 |
#include <linux/kernel_stat.h> |
cdd6c482c
|
30 |
#include <linux/perf_event.h> |
0793a61d4
|
31 |
|
4e193bd4d
|
32 |
#include <asm/irq_regs.h> |
0793a61d4
|
33 |
/* |
cdd6c482c
|
34 |
* Each CPU has a list of per CPU events: |
0793a61d4
|
35 36 |
*/ DEFINE_PER_CPU(struct perf_cpu_context, perf_cpu_context); |
cdd6c482c
|
37 |
int perf_max_events __read_mostly = 1; |
0793a61d4
|
38 39 |
static int perf_reserved_percpu __read_mostly; static int perf_overcommit __read_mostly = 1; |
cdd6c482c
|
40 41 42 43 |
static atomic_t nr_events __read_mostly; static atomic_t nr_mmap_events __read_mostly; static atomic_t nr_comm_events __read_mostly; static atomic_t nr_task_events __read_mostly; |
9ee318a78
|
44 |
|
0764771da
|
45 |
/* |
cdd6c482c
|
46 |
* perf event paranoia level: |
0fbdea19e
|
47 48 |
* -1 - not paranoid at all * 0 - disallow raw tracepoint access for unpriv |
cdd6c482c
|
49 |
* 1 - disallow cpu events for unpriv |
0fbdea19e
|
50 |
* 2 - disallow kernel profiling for unpriv |
0764771da
|
51 |
*/ |
cdd6c482c
|
52 |
int sysctl_perf_event_paranoid __read_mostly = 1; |
0764771da
|
53 |
|
0fbdea19e
|
54 55 |
static inline bool perf_paranoid_tracepoint_raw(void) { |
cdd6c482c
|
56 |
return sysctl_perf_event_paranoid > -1; |
0fbdea19e
|
57 |
} |
0764771da
|
58 59 |
static inline bool perf_paranoid_cpu(void) { |
cdd6c482c
|
60 |
return sysctl_perf_event_paranoid > 0; |
0764771da
|
61 62 63 64 |
} static inline bool perf_paranoid_kernel(void) { |
cdd6c482c
|
65 |
return sysctl_perf_event_paranoid > 1; |
0764771da
|
66 |
} |
cdd6c482c
|
67 |
int sysctl_perf_event_mlock __read_mostly = 512; /* 'free' kb per user */ |
df58ab24b
|
68 69 |
/* |
cdd6c482c
|
70 |
* max perf event sample rate |
df58ab24b
|
71 |
*/ |
cdd6c482c
|
72 |
int sysctl_perf_event_sample_rate __read_mostly = 100000; |
1ccd15497
|
73 |
|
cdd6c482c
|
74 |
static atomic64_t perf_event_id; |
a96bbc164
|
75 |
|
0793a61d4
|
76 |
/* |
cdd6c482c
|
77 |
* Lock for (sysadmin-configurable) event reservations: |
0793a61d4
|
78 |
*/ |
1dce8d99b
|
79 |
static DEFINE_SPINLOCK(perf_resource_lock); |
0793a61d4
|
80 81 82 83 |
/* * Architecture provided APIs - weak aliases: */ |
cdd6c482c
|
84 |
extern __weak const struct pmu *hw_perf_event_init(struct perf_event *event) |
0793a61d4
|
85 |
{ |
ff6f05416
|
86 |
return NULL; |
0793a61d4
|
87 |
} |
9e35ad388
|
88 89 |
void __weak hw_perf_disable(void) { barrier(); } void __weak hw_perf_enable(void) { barrier(); } |
cdd6c482c
|
90 91 |
void __weak hw_perf_event_setup(int cpu) { barrier(); } void __weak hw_perf_event_setup_online(int cpu) { barrier(); } |
22a4f650d
|
92 93 |
int __weak |
cdd6c482c
|
94 |
hw_perf_group_sched_in(struct perf_event *group_leader, |
3cbed429a
|
95 |
struct perf_cpu_context *cpuctx, |
cdd6c482c
|
96 |
struct perf_event_context *ctx, int cpu) |
3cbed429a
|
97 98 99 |
{ return 0; } |
0793a61d4
|
100 |
|
cdd6c482c
|
101 |
void __weak perf_event_print_debug(void) { } |
4eb96fcfe
|
102 |
|
245b2e70e
|
103 |
static DEFINE_PER_CPU(int, perf_disable_count); |
9e35ad388
|
104 105 106 |
void __perf_disable(void) { |
245b2e70e
|
107 |
__get_cpu_var(perf_disable_count)++; |
9e35ad388
|
108 109 110 111 |
} bool __perf_enable(void) { |
245b2e70e
|
112 |
return !--__get_cpu_var(perf_disable_count); |
9e35ad388
|
113 114 115 116 117 118 119 |
} void perf_disable(void) { __perf_disable(); hw_perf_disable(); } |
9e35ad388
|
120 121 122 123 124 125 |
void perf_enable(void) { if (__perf_enable()) hw_perf_enable(); } |
9e35ad388
|
126 |
|
cdd6c482c
|
127 |
static void get_ctx(struct perf_event_context *ctx) |
a63eaf34a
|
128 |
{ |
e5289d4a1
|
129 |
WARN_ON(!atomic_inc_not_zero(&ctx->refcount)); |
a63eaf34a
|
130 |
} |
c93f76690
|
131 132 |
static void free_ctx(struct rcu_head *head) { |
cdd6c482c
|
133 |
struct perf_event_context *ctx; |
c93f76690
|
134 |
|
cdd6c482c
|
135 |
ctx = container_of(head, struct perf_event_context, rcu_head); |
c93f76690
|
136 137 |
kfree(ctx); } |
cdd6c482c
|
138 |
static void put_ctx(struct perf_event_context *ctx) |
a63eaf34a
|
139 |
{ |
564c2b210
|
140 141 142 |
if (atomic_dec_and_test(&ctx->refcount)) { if (ctx->parent_ctx) put_ctx(ctx->parent_ctx); |
c93f76690
|
143 144 145 |
if (ctx->task) put_task_struct(ctx->task); call_rcu(&ctx->rcu_head, free_ctx); |
564c2b210
|
146 |
} |
a63eaf34a
|
147 |
} |
cdd6c482c
|
148 |
static void unclone_ctx(struct perf_event_context *ctx) |
71a851b4d
|
149 150 151 152 153 154 |
{ if (ctx->parent_ctx) { put_ctx(ctx->parent_ctx); ctx->parent_ctx = NULL; } } |
fccc714b3
|
155 |
/* |
cdd6c482c
|
156 |
* If we inherit events we want to return the parent event id |
7f453c24b
|
157 158 |
* to userspace. */ |
cdd6c482c
|
159 |
static u64 primary_event_id(struct perf_event *event) |
7f453c24b
|
160 |
{ |
cdd6c482c
|
161 |
u64 id = event->id; |
7f453c24b
|
162 |
|
cdd6c482c
|
163 164 |
if (event->parent) id = event->parent->id; |
7f453c24b
|
165 166 167 |
return id; } |
fccc714b3
|
168 |
/* |
cdd6c482c
|
169 |
* Get the perf_event_context for a task and lock it. |
25346b93c
|
170 171 172 |
* This has to cope with with the fact that until it is locked, * the context could get moved to another task. */ |
cdd6c482c
|
173 |
static struct perf_event_context * |
22a4f650d
|
174 |
perf_lock_task_context(struct task_struct *task, unsigned long *flags) |
25346b93c
|
175 |
{ |
cdd6c482c
|
176 |
struct perf_event_context *ctx; |
25346b93c
|
177 178 179 |
rcu_read_lock(); retry: |
cdd6c482c
|
180 |
ctx = rcu_dereference(task->perf_event_ctxp); |
25346b93c
|
181 182 183 184 |
if (ctx) { /* * If this context is a clone of another, it might * get swapped for another underneath us by |
cdd6c482c
|
185 |
* perf_event_task_sched_out, though the |
25346b93c
|
186 187 188 189 190 191 192 |
* rcu_read_lock() protects us from any context * getting freed. Lock the context and check if it * got swapped before we could get the lock, and retry * if so. If we locked the right context, then it * can't get swapped on us any more. */ spin_lock_irqsave(&ctx->lock, *flags); |
cdd6c482c
|
193 |
if (ctx != rcu_dereference(task->perf_event_ctxp)) { |
25346b93c
|
194 195 196 |
spin_unlock_irqrestore(&ctx->lock, *flags); goto retry; } |
b49a9e7e7
|
197 198 199 200 201 |
if (!atomic_inc_not_zero(&ctx->refcount)) { spin_unlock_irqrestore(&ctx->lock, *flags); ctx = NULL; } |
25346b93c
|
202 203 204 205 206 207 208 209 210 211 |
} rcu_read_unlock(); return ctx; } /* * Get the context for a task and increment its pin_count so it * can't get swapped to another task. This also increments its * reference count so that the context can't get freed. */ |
cdd6c482c
|
212 |
static struct perf_event_context *perf_pin_task_context(struct task_struct *task) |
25346b93c
|
213 |
{ |
cdd6c482c
|
214 |
struct perf_event_context *ctx; |
25346b93c
|
215 216 217 218 219 |
unsigned long flags; ctx = perf_lock_task_context(task, &flags); if (ctx) { ++ctx->pin_count; |
25346b93c
|
220 221 222 223 |
spin_unlock_irqrestore(&ctx->lock, flags); } return ctx; } |
cdd6c482c
|
224 |
static void perf_unpin_context(struct perf_event_context *ctx) |
25346b93c
|
225 226 227 228 229 230 231 232 233 234 |
{ unsigned long flags; spin_lock_irqsave(&ctx->lock, flags); --ctx->pin_count; spin_unlock_irqrestore(&ctx->lock, flags); put_ctx(ctx); } /* |
cdd6c482c
|
235 |
* Add a event from the lists for its context. |
fccc714b3
|
236 237 |
* Must be called with ctx->mutex and ctx->lock held. */ |
04289bb98
|
238 |
static void |
cdd6c482c
|
239 |
list_add_event(struct perf_event *event, struct perf_event_context *ctx) |
04289bb98
|
240 |
{ |
cdd6c482c
|
241 |
struct perf_event *group_leader = event->group_leader; |
04289bb98
|
242 243 |
/* |
cdd6c482c
|
244 245 |
* Depending on whether it is a standalone or sibling event, * add it straight to the context's event list, or to the group |
04289bb98
|
246 247 |
* leader's sibling list: */ |
cdd6c482c
|
248 249 |
if (group_leader == event) list_add_tail(&event->group_entry, &ctx->group_list); |
5c1481943
|
250 |
else { |
cdd6c482c
|
251 |
list_add_tail(&event->group_entry, &group_leader->sibling_list); |
5c1481943
|
252 253 |
group_leader->nr_siblings++; } |
592903cdc
|
254 |
|
cdd6c482c
|
255 256 257 |
list_add_rcu(&event->event_entry, &ctx->event_list); ctx->nr_events++; if (event->attr.inherit_stat) |
bfbd3381e
|
258 |
ctx->nr_stat++; |
04289bb98
|
259 |
} |
a63eaf34a
|
260 |
/* |
cdd6c482c
|
261 |
* Remove a event from the lists for its context. |
fccc714b3
|
262 |
* Must be called with ctx->mutex and ctx->lock held. |
a63eaf34a
|
263 |
*/ |
04289bb98
|
264 |
static void |
cdd6c482c
|
265 |
list_del_event(struct perf_event *event, struct perf_event_context *ctx) |
04289bb98
|
266 |
{ |
cdd6c482c
|
267 |
struct perf_event *sibling, *tmp; |
04289bb98
|
268 |
|
cdd6c482c
|
269 |
if (list_empty(&event->group_entry)) |
a63eaf34a
|
270 |
return; |
cdd6c482c
|
271 272 |
ctx->nr_events--; if (event->attr.inherit_stat) |
bfbd3381e
|
273 |
ctx->nr_stat--; |
8bc209595
|
274 |
|
cdd6c482c
|
275 276 |
list_del_init(&event->group_entry); list_del_rcu(&event->event_entry); |
04289bb98
|
277 |
|
cdd6c482c
|
278 279 |
if (event->group_leader != event) event->group_leader->nr_siblings--; |
5c1481943
|
280 |
|
04289bb98
|
281 |
/* |
cdd6c482c
|
282 283 |
* If this was a group event with sibling events then * upgrade the siblings to singleton events by adding them |
04289bb98
|
284 285 |
* to the context list directly: */ |
cdd6c482c
|
286 |
list_for_each_entry_safe(sibling, tmp, &event->sibling_list, group_entry) { |
04289bb98
|
287 |
|
65abc8653
|
288 |
list_move_tail(&sibling->group_entry, &ctx->group_list); |
04289bb98
|
289 290 291 |
sibling->group_leader = sibling; } } |
3b6f9e5cb
|
292 |
static void |
cdd6c482c
|
293 |
event_sched_out(struct perf_event *event, |
3b6f9e5cb
|
294 |
struct perf_cpu_context *cpuctx, |
cdd6c482c
|
295 |
struct perf_event_context *ctx) |
3b6f9e5cb
|
296 |
{ |
cdd6c482c
|
297 |
if (event->state != PERF_EVENT_STATE_ACTIVE) |
3b6f9e5cb
|
298 |
return; |
cdd6c482c
|
299 300 301 302 |
event->state = PERF_EVENT_STATE_INACTIVE; if (event->pending_disable) { event->pending_disable = 0; event->state = PERF_EVENT_STATE_OFF; |
970892a90
|
303 |
} |
cdd6c482c
|
304 305 306 |
event->tstamp_stopped = ctx->time; event->pmu->disable(event); event->oncpu = -1; |
3b6f9e5cb
|
307 |
|
cdd6c482c
|
308 |
if (!is_software_event(event)) |
3b6f9e5cb
|
309 310 |
cpuctx->active_oncpu--; ctx->nr_active--; |
cdd6c482c
|
311 |
if (event->attr.exclusive || !cpuctx->active_oncpu) |
3b6f9e5cb
|
312 313 |
cpuctx->exclusive = 0; } |
d859e29fe
|
314 |
static void |
cdd6c482c
|
315 |
group_sched_out(struct perf_event *group_event, |
d859e29fe
|
316 |
struct perf_cpu_context *cpuctx, |
cdd6c482c
|
317 |
struct perf_event_context *ctx) |
d859e29fe
|
318 |
{ |
cdd6c482c
|
319 |
struct perf_event *event; |
d859e29fe
|
320 |
|
cdd6c482c
|
321 |
if (group_event->state != PERF_EVENT_STATE_ACTIVE) |
d859e29fe
|
322 |
return; |
cdd6c482c
|
323 |
event_sched_out(group_event, cpuctx, ctx); |
d859e29fe
|
324 325 326 327 |
/* * Schedule out siblings (if any): */ |
cdd6c482c
|
328 329 |
list_for_each_entry(event, &group_event->sibling_list, group_entry) event_sched_out(event, cpuctx, ctx); |
d859e29fe
|
330 |
|
cdd6c482c
|
331 |
if (group_event->attr.exclusive) |
d859e29fe
|
332 333 |
cpuctx->exclusive = 0; } |
0793a61d4
|
334 |
/* |
cdd6c482c
|
335 |
* Cross CPU call to remove a performance event |
0793a61d4
|
336 |
* |
cdd6c482c
|
337 |
* We disable the event on the hardware level first. After that we |
0793a61d4
|
338 339 |
* remove it from the context list. */ |
cdd6c482c
|
340 |
static void __perf_event_remove_from_context(void *info) |
0793a61d4
|
341 342 |
{ struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); |
cdd6c482c
|
343 344 |
struct perf_event *event = info; struct perf_event_context *ctx = event->ctx; |
0793a61d4
|
345 346 347 348 349 350 |
/* * If this is a task context, we need to check whether it is * the current task context of this cpu. If not it has been * scheduled out before the smp call arrived. */ |
665c2142a
|
351 |
if (ctx->task && cpuctx->task_ctx != ctx) |
0793a61d4
|
352 |
return; |
3f4dee227
|
353 |
spin_lock(&ctx->lock); |
34adc8062
|
354 355 |
/* * Protect the list operation against NMI by disabling the |
cdd6c482c
|
356 |
* events on a global level. |
34adc8062
|
357 358 |
*/ perf_disable(); |
0793a61d4
|
359 |
|
cdd6c482c
|
360 |
event_sched_out(event, cpuctx, ctx); |
3b6f9e5cb
|
361 |
|
cdd6c482c
|
362 |
list_del_event(event, ctx); |
0793a61d4
|
363 364 365 |
if (!ctx->task) { /* |
cdd6c482c
|
366 |
* Allow more per task events with respect to the |
0793a61d4
|
367 368 369 |
* reservation: */ cpuctx->max_pertask = |
cdd6c482c
|
370 371 |
min(perf_max_events - ctx->nr_events, perf_max_events - perf_reserved_percpu); |
0793a61d4
|
372 |
} |
34adc8062
|
373 |
perf_enable(); |
665c2142a
|
374 |
spin_unlock(&ctx->lock); |
0793a61d4
|
375 376 377 378 |
} /* |
cdd6c482c
|
379 |
* Remove the event from a task's (or a CPU's) list of events. |
0793a61d4
|
380 |
* |
fccc714b3
|
381 |
* Must be called with ctx->mutex held. |
0793a61d4
|
382 |
* |
cdd6c482c
|
383 |
* CPU events are removed with a smp call. For task events we only |
0793a61d4
|
384 |
* call when the task is on a CPU. |
c93f76690
|
385 |
* |
cdd6c482c
|
386 387 |
* If event->ctx is a cloned context, callers must make sure that * every task struct that event->ctx->task could possibly point to |
c93f76690
|
388 389 |
* remains valid. This is OK when called from perf_release since * that only calls us on the top-level context, which can't be a clone. |
cdd6c482c
|
390 |
* When called from perf_event_exit_task, it's OK because the |
c93f76690
|
391 |
* context has been detached from its task. |
0793a61d4
|
392 |
*/ |
cdd6c482c
|
393 |
static void perf_event_remove_from_context(struct perf_event *event) |
0793a61d4
|
394 |
{ |
cdd6c482c
|
395 |
struct perf_event_context *ctx = event->ctx; |
0793a61d4
|
396 397 398 399 |
struct task_struct *task = ctx->task; if (!task) { /* |
cdd6c482c
|
400 |
* Per cpu events are removed via an smp call and |
0793a61d4
|
401 402 |
* the removal is always sucessful. */ |
cdd6c482c
|
403 404 405 |
smp_call_function_single(event->cpu, __perf_event_remove_from_context, event, 1); |
0793a61d4
|
406 407 408 409 |
return; } retry: |
cdd6c482c
|
410 411 |
task_oncpu_function_call(task, __perf_event_remove_from_context, event); |
0793a61d4
|
412 413 414 415 416 |
spin_lock_irq(&ctx->lock); /* * If the context is active we need to retry the smp call. */ |
cdd6c482c
|
417 |
if (ctx->nr_active && !list_empty(&event->group_entry)) { |
0793a61d4
|
418 419 420 421 422 423 |
spin_unlock_irq(&ctx->lock); goto retry; } /* * The lock prevents that this context is scheduled in so we |
cdd6c482c
|
424 |
* can remove the event safely, if the call above did not |
0793a61d4
|
425 426 |
* succeed. */ |
cdd6c482c
|
427 428 |
if (!list_empty(&event->group_entry)) { list_del_event(event, ctx); |
0793a61d4
|
429 430 431 |
} spin_unlock_irq(&ctx->lock); } |
4af4998b8
|
432 |
static inline u64 perf_clock(void) |
53cfbf593
|
433 |
{ |
4af4998b8
|
434 |
return cpu_clock(smp_processor_id()); |
53cfbf593
|
435 436 437 438 439 |
} /* * Update the record of the current time in a context. */ |
cdd6c482c
|
440 |
static void update_context_time(struct perf_event_context *ctx) |
53cfbf593
|
441 |
{ |
4af4998b8
|
442 443 444 445 |
u64 now = perf_clock(); ctx->time += now - ctx->timestamp; ctx->timestamp = now; |
53cfbf593
|
446 447 448 |
} /* |
cdd6c482c
|
449 |
* Update the total_time_enabled and total_time_running fields for a event. |
53cfbf593
|
450 |
*/ |
cdd6c482c
|
451 |
static void update_event_times(struct perf_event *event) |
53cfbf593
|
452 |
{ |
cdd6c482c
|
453 |
struct perf_event_context *ctx = event->ctx; |
53cfbf593
|
454 |
u64 run_end; |
cdd6c482c
|
455 456 |
if (event->state < PERF_EVENT_STATE_INACTIVE || event->group_leader->state < PERF_EVENT_STATE_INACTIVE) |
4af4998b8
|
457 |
return; |
cdd6c482c
|
458 |
event->total_time_enabled = ctx->time - event->tstamp_enabled; |
4af4998b8
|
459 |
|
cdd6c482c
|
460 461 |
if (event->state == PERF_EVENT_STATE_INACTIVE) run_end = event->tstamp_stopped; |
4af4998b8
|
462 463 |
else run_end = ctx->time; |
cdd6c482c
|
464 |
event->total_time_running = run_end - event->tstamp_running; |
53cfbf593
|
465 466 467 |
} /* |
cdd6c482c
|
468 |
* Update total_time_enabled and total_time_running for all events in a group. |
53cfbf593
|
469 |
*/ |
cdd6c482c
|
470 |
static void update_group_times(struct perf_event *leader) |
53cfbf593
|
471 |
{ |
cdd6c482c
|
472 |
struct perf_event *event; |
53cfbf593
|
473 |
|
cdd6c482c
|
474 475 476 |
update_event_times(leader); list_for_each_entry(event, &leader->sibling_list, group_entry) update_event_times(event); |
53cfbf593
|
477 478 479 |
} /* |
cdd6c482c
|
480 |
* Cross CPU call to disable a performance event |
d859e29fe
|
481 |
*/ |
cdd6c482c
|
482 |
static void __perf_event_disable(void *info) |
d859e29fe
|
483 |
{ |
cdd6c482c
|
484 |
struct perf_event *event = info; |
d859e29fe
|
485 |
struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); |
cdd6c482c
|
486 |
struct perf_event_context *ctx = event->ctx; |
d859e29fe
|
487 488 |
/* |
cdd6c482c
|
489 490 |
* If this is a per-task event, need to check whether this * event's task is the current task on this cpu. |
d859e29fe
|
491 |
*/ |
665c2142a
|
492 |
if (ctx->task && cpuctx->task_ctx != ctx) |
d859e29fe
|
493 |
return; |
3f4dee227
|
494 |
spin_lock(&ctx->lock); |
d859e29fe
|
495 496 |
/* |
cdd6c482c
|
497 |
* If the event is on, turn it off. |
d859e29fe
|
498 499 |
* If it is in error state, leave it in error state. */ |
cdd6c482c
|
500 |
if (event->state >= PERF_EVENT_STATE_INACTIVE) { |
4af4998b8
|
501 |
update_context_time(ctx); |
cdd6c482c
|
502 503 504 |
update_group_times(event); if (event == event->group_leader) group_sched_out(event, cpuctx, ctx); |
d859e29fe
|
505 |
else |
cdd6c482c
|
506 507 |
event_sched_out(event, cpuctx, ctx); event->state = PERF_EVENT_STATE_OFF; |
d859e29fe
|
508 |
} |
665c2142a
|
509 |
spin_unlock(&ctx->lock); |
d859e29fe
|
510 511 512 |
} /* |
cdd6c482c
|
513 |
* Disable a event. |
c93f76690
|
514 |
* |
cdd6c482c
|
515 516 |
* If event->ctx is a cloned context, callers must make sure that * every task struct that event->ctx->task could possibly point to |
c93f76690
|
517 |
* remains valid. This condition is satisifed when called through |
cdd6c482c
|
518 519 520 521 |
* perf_event_for_each_child or perf_event_for_each because they * hold the top-level event's child_mutex, so any descendant that * goes to exit will block in sync_child_event. * When called from perf_pending_event it's OK because event->ctx |
c93f76690
|
522 |
* is the current context on this CPU and preemption is disabled, |
cdd6c482c
|
523 |
* hence we can't get into perf_event_task_sched_out for this context. |
d859e29fe
|
524 |
*/ |
cdd6c482c
|
525 |
static void perf_event_disable(struct perf_event *event) |
d859e29fe
|
526 |
{ |
cdd6c482c
|
527 |
struct perf_event_context *ctx = event->ctx; |
d859e29fe
|
528 529 530 531 |
struct task_struct *task = ctx->task; if (!task) { /* |
cdd6c482c
|
532 |
* Disable the event on the cpu that it's on |
d859e29fe
|
533 |
*/ |
cdd6c482c
|
534 535 |
smp_call_function_single(event->cpu, __perf_event_disable, event, 1); |
d859e29fe
|
536 537 538 539 |
return; } retry: |
cdd6c482c
|
540 |
task_oncpu_function_call(task, __perf_event_disable, event); |
d859e29fe
|
541 542 543 |
spin_lock_irq(&ctx->lock); /* |
cdd6c482c
|
544 |
* If the event is still active, we need to retry the cross-call. |
d859e29fe
|
545 |
*/ |
cdd6c482c
|
546 |
if (event->state == PERF_EVENT_STATE_ACTIVE) { |
d859e29fe
|
547 548 549 550 551 552 553 554 |
spin_unlock_irq(&ctx->lock); goto retry; } /* * Since we have the lock this context can't be scheduled * in, so we can change the state safely. */ |
cdd6c482c
|
555 556 557 |
if (event->state == PERF_EVENT_STATE_INACTIVE) { update_group_times(event); event->state = PERF_EVENT_STATE_OFF; |
53cfbf593
|
558 |
} |
d859e29fe
|
559 560 561 |
spin_unlock_irq(&ctx->lock); } |
235c7fc7c
|
562 |
static int |
cdd6c482c
|
563 |
event_sched_in(struct perf_event *event, |
235c7fc7c
|
564 |
struct perf_cpu_context *cpuctx, |
cdd6c482c
|
565 |
struct perf_event_context *ctx, |
235c7fc7c
|
566 567 |
int cpu) { |
cdd6c482c
|
568 |
if (event->state <= PERF_EVENT_STATE_OFF) |
235c7fc7c
|
569 |
return 0; |
cdd6c482c
|
570 571 |
event->state = PERF_EVENT_STATE_ACTIVE; event->oncpu = cpu; /* TODO: put 'cpu' into cpuctx->cpu */ |
235c7fc7c
|
572 573 574 575 |
/* * The new state must be visible before we turn it on in the hardware: */ smp_wmb(); |
cdd6c482c
|
576 577 578 |
if (event->pmu->enable(event)) { event->state = PERF_EVENT_STATE_INACTIVE; event->oncpu = -1; |
235c7fc7c
|
579 580 |
return -EAGAIN; } |
cdd6c482c
|
581 |
event->tstamp_running += ctx->time - event->tstamp_stopped; |
53cfbf593
|
582 |
|
cdd6c482c
|
583 |
if (!is_software_event(event)) |
3b6f9e5cb
|
584 |
cpuctx->active_oncpu++; |
235c7fc7c
|
585 |
ctx->nr_active++; |
cdd6c482c
|
586 |
if (event->attr.exclusive) |
3b6f9e5cb
|
587 |
cpuctx->exclusive = 1; |
235c7fc7c
|
588 589 |
return 0; } |
6751b71ea
|
590 |
static int |
cdd6c482c
|
591 |
group_sched_in(struct perf_event *group_event, |
6751b71ea
|
592 |
struct perf_cpu_context *cpuctx, |
cdd6c482c
|
593 |
struct perf_event_context *ctx, |
6751b71ea
|
594 595 |
int cpu) { |
cdd6c482c
|
596 |
struct perf_event *event, *partial_group; |
6751b71ea
|
597 |
int ret; |
cdd6c482c
|
598 |
if (group_event->state == PERF_EVENT_STATE_OFF) |
6751b71ea
|
599 |
return 0; |
cdd6c482c
|
600 |
ret = hw_perf_group_sched_in(group_event, cpuctx, ctx, cpu); |
6751b71ea
|
601 602 |
if (ret) return ret < 0 ? ret : 0; |
cdd6c482c
|
603 |
if (event_sched_in(group_event, cpuctx, ctx, cpu)) |
6751b71ea
|
604 605 606 607 608 |
return -EAGAIN; /* * Schedule in siblings as one group (if any): */ |
cdd6c482c
|
609 610 611 |
list_for_each_entry(event, &group_event->sibling_list, group_entry) { if (event_sched_in(event, cpuctx, ctx, cpu)) { partial_group = event; |
6751b71ea
|
612 613 614 615 616 617 618 619 620 621 622 |
goto group_error; } } return 0; group_error: /* * Groups can be scheduled in as one unit only, so undo any * partial group before returning: */ |
cdd6c482c
|
623 624 |
list_for_each_entry(event, &group_event->sibling_list, group_entry) { if (event == partial_group) |
6751b71ea
|
625 |
break; |
cdd6c482c
|
626 |
event_sched_out(event, cpuctx, ctx); |
6751b71ea
|
627 |
} |
cdd6c482c
|
628 |
event_sched_out(group_event, cpuctx, ctx); |
6751b71ea
|
629 630 631 |
return -EAGAIN; } |
0793a61d4
|
632 |
/* |
cdd6c482c
|
633 634 |
* Return 1 for a group consisting entirely of software events, * 0 if the group contains any hardware events. |
3b6f9e5cb
|
635 |
*/ |
cdd6c482c
|
636 |
static int is_software_only_group(struct perf_event *leader) |
3b6f9e5cb
|
637 |
{ |
cdd6c482c
|
638 |
struct perf_event *event; |
3b6f9e5cb
|
639 |
|
cdd6c482c
|
640 |
if (!is_software_event(leader)) |
3b6f9e5cb
|
641 |
return 0; |
5c1481943
|
642 |
|
cdd6c482c
|
643 644 |
list_for_each_entry(event, &leader->sibling_list, group_entry) if (!is_software_event(event)) |
3b6f9e5cb
|
645 |
return 0; |
5c1481943
|
646 |
|
3b6f9e5cb
|
647 648 649 650 |
return 1; } /* |
cdd6c482c
|
651 |
* Work out whether we can put this event group on the CPU now. |
3b6f9e5cb
|
652 |
*/ |
cdd6c482c
|
653 |
static int group_can_go_on(struct perf_event *event, |
3b6f9e5cb
|
654 655 656 657 |
struct perf_cpu_context *cpuctx, int can_add_hw) { /* |
cdd6c482c
|
658 |
* Groups consisting entirely of software events can always go on. |
3b6f9e5cb
|
659 |
*/ |
cdd6c482c
|
660 |
if (is_software_only_group(event)) |
3b6f9e5cb
|
661 662 663 |
return 1; /* * If an exclusive group is already on, no other hardware |
cdd6c482c
|
664 |
* events can go on. |
3b6f9e5cb
|
665 666 667 668 669 |
*/ if (cpuctx->exclusive) return 0; /* * If this group is exclusive and there are already |
cdd6c482c
|
670 |
* events on the CPU, it can't go on. |
3b6f9e5cb
|
671 |
*/ |
cdd6c482c
|
672 |
if (event->attr.exclusive && cpuctx->active_oncpu) |
3b6f9e5cb
|
673 674 675 676 677 678 679 |
return 0; /* * Otherwise, try to add it if all previous groups were able * to go on. */ return can_add_hw; } |
cdd6c482c
|
680 681 |
static void add_event_to_ctx(struct perf_event *event, struct perf_event_context *ctx) |
53cfbf593
|
682 |
{ |
cdd6c482c
|
683 684 685 686 |
list_add_event(event, ctx); event->tstamp_enabled = ctx->time; event->tstamp_running = ctx->time; event->tstamp_stopped = ctx->time; |
53cfbf593
|
687 |
} |
3b6f9e5cb
|
688 |
/* |
cdd6c482c
|
689 |
* Cross CPU call to install and enable a performance event |
682076ae1
|
690 691 |
* * Must be called with ctx->mutex held |
0793a61d4
|
692 693 694 695 |
*/ static void __perf_install_in_context(void *info) { struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); |
cdd6c482c
|
696 697 698 |
struct perf_event *event = info; struct perf_event_context *ctx = event->ctx; struct perf_event *leader = event->group_leader; |
0793a61d4
|
699 |
int cpu = smp_processor_id(); |
3b6f9e5cb
|
700 |
int err; |
0793a61d4
|
701 702 703 704 705 |
/* * If this is a task context, we need to check whether it is * the current task context of this cpu. If not it has been * scheduled out before the smp call arrived. |
a63eaf34a
|
706 |
* Or possibly this is the right context but it isn't |
cdd6c482c
|
707 |
* on this cpu because it had no events. |
0793a61d4
|
708 |
*/ |
a63eaf34a
|
709 |
if (ctx->task && cpuctx->task_ctx != ctx) { |
665c2142a
|
710 |
if (cpuctx->task_ctx || ctx->task != current) |
a63eaf34a
|
711 712 713 |
return; cpuctx->task_ctx = ctx; } |
0793a61d4
|
714 |
|
3f4dee227
|
715 |
spin_lock(&ctx->lock); |
a63eaf34a
|
716 |
ctx->is_active = 1; |
4af4998b8
|
717 |
update_context_time(ctx); |
0793a61d4
|
718 719 720 |
/* * Protect the list operation against NMI by disabling the |
cdd6c482c
|
721 |
* events on a global level. NOP for non NMI based events. |
0793a61d4
|
722 |
*/ |
9e35ad388
|
723 |
perf_disable(); |
0793a61d4
|
724 |
|
cdd6c482c
|
725 |
add_event_to_ctx(event, ctx); |
0793a61d4
|
726 |
|
3b6f9e5cb
|
727 |
/* |
cdd6c482c
|
728 |
* Don't put the event on if it is disabled or if |
d859e29fe
|
729 730 |
* it is in a group and the group isn't on. */ |
cdd6c482c
|
731 732 |
if (event->state != PERF_EVENT_STATE_INACTIVE || (leader != event && leader->state != PERF_EVENT_STATE_ACTIVE)) |
d859e29fe
|
733 734 735 |
goto unlock; /* |
cdd6c482c
|
736 737 738 |
* An exclusive event can't go on if there are already active * hardware events, and no hardware event can go on if there * is already an exclusive event on. |
3b6f9e5cb
|
739 |
*/ |
cdd6c482c
|
740 |
if (!group_can_go_on(event, cpuctx, 1)) |
3b6f9e5cb
|
741 742 |
err = -EEXIST; else |
cdd6c482c
|
743 |
err = event_sched_in(event, cpuctx, ctx, cpu); |
3b6f9e5cb
|
744 |
|
d859e29fe
|
745 746 |
if (err) { /* |
cdd6c482c
|
747 |
* This event couldn't go on. If it is in a group |
d859e29fe
|
748 |
* then we have to pull the whole group off. |
cdd6c482c
|
749 |
* If the event group is pinned then put it in error state. |
d859e29fe
|
750 |
*/ |
cdd6c482c
|
751 |
if (leader != event) |
d859e29fe
|
752 |
group_sched_out(leader, cpuctx, ctx); |
0d48696f8
|
753 |
if (leader->attr.pinned) { |
53cfbf593
|
754 |
update_group_times(leader); |
cdd6c482c
|
755 |
leader->state = PERF_EVENT_STATE_ERROR; |
53cfbf593
|
756 |
} |
d859e29fe
|
757 |
} |
0793a61d4
|
758 |
|
3b6f9e5cb
|
759 |
if (!err && !ctx->task && cpuctx->max_pertask) |
0793a61d4
|
760 |
cpuctx->max_pertask--; |
d859e29fe
|
761 |
unlock: |
9e35ad388
|
762 |
perf_enable(); |
235c7fc7c
|
763 |
|
665c2142a
|
764 |
spin_unlock(&ctx->lock); |
0793a61d4
|
765 766 767 |
} /* |
cdd6c482c
|
768 |
* Attach a performance event to a context |
0793a61d4
|
769 |
* |
cdd6c482c
|
770 771 |
* First we add the event to the list with the hardware enable bit * in event->hw_config cleared. |
0793a61d4
|
772 |
* |
cdd6c482c
|
773 |
* If the event is attached to a task which is on a CPU we use a smp |
0793a61d4
|
774 775 |
* call to enable it in the task context. The task might have been * scheduled away, but we check this in the smp call again. |
d859e29fe
|
776 777 |
* * Must be called with ctx->mutex held. |
0793a61d4
|
778 779 |
*/ static void |
cdd6c482c
|
780 781 |
perf_install_in_context(struct perf_event_context *ctx, struct perf_event *event, |
0793a61d4
|
782 783 784 |
int cpu) { struct task_struct *task = ctx->task; |
0793a61d4
|
785 786 |
if (!task) { /* |
cdd6c482c
|
787 |
* Per cpu events are installed via an smp call and |
0793a61d4
|
788 789 790 |
* the install is always sucessful. */ smp_call_function_single(cpu, __perf_install_in_context, |
cdd6c482c
|
791 |
event, 1); |
0793a61d4
|
792 793 |
return; } |
0793a61d4
|
794 795 |
retry: task_oncpu_function_call(task, __perf_install_in_context, |
cdd6c482c
|
796 |
event); |
0793a61d4
|
797 798 799 |
spin_lock_irq(&ctx->lock); /* |
0793a61d4
|
800 801 |
* we need to retry the smp call. */ |
cdd6c482c
|
802 |
if (ctx->is_active && list_empty(&event->group_entry)) { |
0793a61d4
|
803 804 805 806 807 808 |
spin_unlock_irq(&ctx->lock); goto retry; } /* * The lock prevents that this context is scheduled in so we |
cdd6c482c
|
809 |
* can add the event safely, if it the call above did not |
0793a61d4
|
810 811 |
* succeed. */ |
cdd6c482c
|
812 813 |
if (list_empty(&event->group_entry)) add_event_to_ctx(event, ctx); |
0793a61d4
|
814 815 |
spin_unlock_irq(&ctx->lock); } |
d859e29fe
|
816 |
/* |
cdd6c482c
|
817 |
* Put a event into inactive state and update time fields. |
fa289beca
|
818 819 820 821 822 823 |
* Enabling the leader of a group effectively enables all * the group members that aren't explicitly disabled, so we * have to update their ->tstamp_enabled also. * Note: this works for group members as well as group leaders * since the non-leader members' sibling_lists will be empty. */ |
cdd6c482c
|
824 825 |
static void __perf_event_mark_enabled(struct perf_event *event, struct perf_event_context *ctx) |
fa289beca
|
826 |
{ |
cdd6c482c
|
827 |
struct perf_event *sub; |
fa289beca
|
828 |
|
cdd6c482c
|
829 830 831 832 |
event->state = PERF_EVENT_STATE_INACTIVE; event->tstamp_enabled = ctx->time - event->total_time_enabled; list_for_each_entry(sub, &event->sibling_list, group_entry) if (sub->state >= PERF_EVENT_STATE_INACTIVE) |
fa289beca
|
833 834 835 836 837 |
sub->tstamp_enabled = ctx->time - sub->total_time_enabled; } /* |
cdd6c482c
|
838 |
* Cross CPU call to enable a performance event |
d859e29fe
|
839 |
*/ |
cdd6c482c
|
840 |
static void __perf_event_enable(void *info) |
04289bb98
|
841 |
{ |
cdd6c482c
|
842 |
struct perf_event *event = info; |
d859e29fe
|
843 |
struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); |
cdd6c482c
|
844 845 |
struct perf_event_context *ctx = event->ctx; struct perf_event *leader = event->group_leader; |
d859e29fe
|
846 |
int err; |
04289bb98
|
847 |
|
d859e29fe
|
848 |
/* |
cdd6c482c
|
849 850 |
* If this is a per-task event, need to check whether this * event's task is the current task on this cpu. |
d859e29fe
|
851 |
*/ |
a63eaf34a
|
852 |
if (ctx->task && cpuctx->task_ctx != ctx) { |
665c2142a
|
853 |
if (cpuctx->task_ctx || ctx->task != current) |
a63eaf34a
|
854 855 856 |
return; cpuctx->task_ctx = ctx; } |
3cbed429a
|
857 |
|
3f4dee227
|
858 |
spin_lock(&ctx->lock); |
a63eaf34a
|
859 |
ctx->is_active = 1; |
4af4998b8
|
860 |
update_context_time(ctx); |
d859e29fe
|
861 |
|
cdd6c482c
|
862 |
if (event->state >= PERF_EVENT_STATE_INACTIVE) |
d859e29fe
|
863 |
goto unlock; |
cdd6c482c
|
864 |
__perf_event_mark_enabled(event, ctx); |
04289bb98
|
865 866 |
/* |
cdd6c482c
|
867 |
* If the event is in a group and isn't the group leader, |
d859e29fe
|
868 |
* then don't put it on unless the group is on. |
04289bb98
|
869 |
*/ |
cdd6c482c
|
870 |
if (leader != event && leader->state != PERF_EVENT_STATE_ACTIVE) |
d859e29fe
|
871 |
goto unlock; |
3b6f9e5cb
|
872 |
|
cdd6c482c
|
873 |
if (!group_can_go_on(event, cpuctx, 1)) { |
d859e29fe
|
874 |
err = -EEXIST; |
e758a33d6
|
875 |
} else { |
9e35ad388
|
876 |
perf_disable(); |
cdd6c482c
|
877 878 |
if (event == leader) err = group_sched_in(event, cpuctx, ctx, |
e758a33d6
|
879 880 |
smp_processor_id()); else |
cdd6c482c
|
881 |
err = event_sched_in(event, cpuctx, ctx, |
e758a33d6
|
882 |
smp_processor_id()); |
9e35ad388
|
883 |
perf_enable(); |
e758a33d6
|
884 |
} |
d859e29fe
|
885 886 887 |
if (err) { /* |
cdd6c482c
|
888 |
* If this event can't go on and it's part of a |
d859e29fe
|
889 890 |
* group, then the whole group has to come off. */ |
cdd6c482c
|
891 |
if (leader != event) |
d859e29fe
|
892 |
group_sched_out(leader, cpuctx, ctx); |
0d48696f8
|
893 |
if (leader->attr.pinned) { |
53cfbf593
|
894 |
update_group_times(leader); |
cdd6c482c
|
895 |
leader->state = PERF_EVENT_STATE_ERROR; |
53cfbf593
|
896 |
} |
d859e29fe
|
897 898 899 |
} unlock: |
665c2142a
|
900 |
spin_unlock(&ctx->lock); |
d859e29fe
|
901 902 903 |
} /* |
cdd6c482c
|
904 |
* Enable a event. |
c93f76690
|
905 |
* |
cdd6c482c
|
906 907 |
* If event->ctx is a cloned context, callers must make sure that * every task struct that event->ctx->task could possibly point to |
c93f76690
|
908 |
* remains valid. This condition is satisfied when called through |
cdd6c482c
|
909 910 |
* perf_event_for_each_child or perf_event_for_each as described * for perf_event_disable. |
d859e29fe
|
911 |
*/ |
cdd6c482c
|
912 |
static void perf_event_enable(struct perf_event *event) |
d859e29fe
|
913 |
{ |
cdd6c482c
|
914 |
struct perf_event_context *ctx = event->ctx; |
d859e29fe
|
915 916 917 918 |
struct task_struct *task = ctx->task; if (!task) { /* |
cdd6c482c
|
919 |
* Enable the event on the cpu that it's on |
d859e29fe
|
920 |
*/ |
cdd6c482c
|
921 922 |
smp_call_function_single(event->cpu, __perf_event_enable, event, 1); |
d859e29fe
|
923 924 925 926 |
return; } spin_lock_irq(&ctx->lock); |
cdd6c482c
|
927 |
if (event->state >= PERF_EVENT_STATE_INACTIVE) |
d859e29fe
|
928 929 930 |
goto out; /* |
cdd6c482c
|
931 932 |
* If the event is in error state, clear that first. * That way, if we see the event in error state below, we |
d859e29fe
|
933 934 935 936 |
* know that it has gone back into error state, as distinct * from the task having been scheduled away before the * cross-call arrived. */ |
cdd6c482c
|
937 938 |
if (event->state == PERF_EVENT_STATE_ERROR) event->state = PERF_EVENT_STATE_OFF; |
d859e29fe
|
939 940 941 |
retry: spin_unlock_irq(&ctx->lock); |
cdd6c482c
|
942 |
task_oncpu_function_call(task, __perf_event_enable, event); |
d859e29fe
|
943 944 945 946 |
spin_lock_irq(&ctx->lock); /* |
cdd6c482c
|
947 |
* If the context is active and the event is still off, |
d859e29fe
|
948 949 |
* we need to retry the cross-call. */ |
cdd6c482c
|
950 |
if (ctx->is_active && event->state == PERF_EVENT_STATE_OFF) |
d859e29fe
|
951 952 953 954 955 956 |
goto retry; /* * Since we have the lock this context can't be scheduled * in, so we can change the state safely. */ |
cdd6c482c
|
957 958 |
if (event->state == PERF_EVENT_STATE_OFF) __perf_event_mark_enabled(event, ctx); |
fa289beca
|
959 |
|
d859e29fe
|
960 961 962 |
out: spin_unlock_irq(&ctx->lock); } |
cdd6c482c
|
963 |
static int perf_event_refresh(struct perf_event *event, int refresh) |
79f146415
|
964 |
{ |
2023b3592
|
965 |
/* |
cdd6c482c
|
966 |
* not supported on inherited events |
2023b3592
|
967 |
*/ |
cdd6c482c
|
968 |
if (event->attr.inherit) |
2023b3592
|
969 |
return -EINVAL; |
cdd6c482c
|
970 971 |
atomic_add(refresh, &event->event_limit); perf_event_enable(event); |
2023b3592
|
972 973 |
return 0; |
79f146415
|
974 |
} |
cdd6c482c
|
975 |
void __perf_event_sched_out(struct perf_event_context *ctx, |
235c7fc7c
|
976 977 |
struct perf_cpu_context *cpuctx) { |
cdd6c482c
|
978 |
struct perf_event *event; |
235c7fc7c
|
979 |
|
d859e29fe
|
980 981 |
spin_lock(&ctx->lock); ctx->is_active = 0; |
cdd6c482c
|
982 |
if (likely(!ctx->nr_events)) |
d859e29fe
|
983 |
goto out; |
4af4998b8
|
984 |
update_context_time(ctx); |
235c7fc7c
|
985 |
|
9e35ad388
|
986 |
perf_disable(); |
8c9ed8e14
|
987 988 989 |
if (ctx->nr_active) list_for_each_entry(event, &ctx->group_list, group_entry) group_sched_out(event, cpuctx, ctx); |
9e35ad388
|
990 |
perf_enable(); |
d859e29fe
|
991 |
out: |
235c7fc7c
|
992 993 |
spin_unlock(&ctx->lock); } |
0793a61d4
|
994 |
/* |
564c2b210
|
995 996 |
* Test whether two contexts are equivalent, i.e. whether they * have both been cloned from the same version of the same context |
cdd6c482c
|
997 998 999 1000 |
* and they both have the same number of enabled events. * If the number of enabled events is the same, then the set * of enabled events should be the same, because these are both * inherited contexts, therefore we can't access individual events |
564c2b210
|
1001 |
* in them directly with an fd; we can only enable/disable all |
cdd6c482c
|
1002 |
* events via prctl, or enable/disable all events in a family |
564c2b210
|
1003 1004 |
* via ioctl, which will have the same effect on both contexts. */ |
cdd6c482c
|
1005 1006 |
static int context_equiv(struct perf_event_context *ctx1, struct perf_event_context *ctx2) |
564c2b210
|
1007 1008 |
{ return ctx1->parent_ctx && ctx1->parent_ctx == ctx2->parent_ctx |
ad3a37de8
|
1009 |
&& ctx1->parent_gen == ctx2->parent_gen |
25346b93c
|
1010 |
&& !ctx1->pin_count && !ctx2->pin_count; |
564c2b210
|
1011 |
} |
cdd6c482c
|
1012 |
static void __perf_event_read(void *event); |
bfbd3381e
|
1013 |
|
cdd6c482c
|
1014 1015 |
static void __perf_event_sync_stat(struct perf_event *event, struct perf_event *next_event) |
bfbd3381e
|
1016 1017 |
{ u64 value; |
cdd6c482c
|
1018 |
if (!event->attr.inherit_stat) |
bfbd3381e
|
1019 1020 1021 |
return; /* |
cdd6c482c
|
1022 |
* Update the event value, we cannot use perf_event_read() |
bfbd3381e
|
1023 1024 |
* because we're in the middle of a context switch and have IRQs * disabled, which upsets smp_call_function_single(), however |
cdd6c482c
|
1025 |
* we know the event must be on the current CPU, therefore we |
bfbd3381e
|
1026 1027 |
* don't need to use it. */ |
cdd6c482c
|
1028 1029 1030 |
switch (event->state) { case PERF_EVENT_STATE_ACTIVE: __perf_event_read(event); |
bfbd3381e
|
1031 |
break; |
cdd6c482c
|
1032 1033 |
case PERF_EVENT_STATE_INACTIVE: update_event_times(event); |
bfbd3381e
|
1034 1035 1036 1037 1038 1039 1040 |
break; default: break; } /* |
cdd6c482c
|
1041 |
* In order to keep per-task stats reliable we need to flip the event |
bfbd3381e
|
1042 1043 |
* values when we flip the contexts. */ |
cdd6c482c
|
1044 1045 1046 |
value = atomic64_read(&next_event->count); value = atomic64_xchg(&event->count, value); atomic64_set(&next_event->count, value); |
bfbd3381e
|
1047 |
|
cdd6c482c
|
1048 1049 |
swap(event->total_time_enabled, next_event->total_time_enabled); swap(event->total_time_running, next_event->total_time_running); |
19d2e7554
|
1050 |
|
bfbd3381e
|
1051 |
/* |
19d2e7554
|
1052 |
* Since we swizzled the values, update the user visible data too. |
bfbd3381e
|
1053 |
*/ |
cdd6c482c
|
1054 1055 |
perf_event_update_userpage(event); perf_event_update_userpage(next_event); |
bfbd3381e
|
1056 1057 1058 1059 |
} #define list_next_entry(pos, member) \ list_entry(pos->member.next, typeof(*pos), member) |
cdd6c482c
|
1060 1061 |
static void perf_event_sync_stat(struct perf_event_context *ctx, struct perf_event_context *next_ctx) |
bfbd3381e
|
1062 |
{ |
cdd6c482c
|
1063 |
struct perf_event *event, *next_event; |
bfbd3381e
|
1064 1065 1066 |
if (!ctx->nr_stat) return; |
cdd6c482c
|
1067 1068 |
event = list_first_entry(&ctx->event_list, struct perf_event, event_entry); |
bfbd3381e
|
1069 |
|
cdd6c482c
|
1070 1071 |
next_event = list_first_entry(&next_ctx->event_list, struct perf_event, event_entry); |
bfbd3381e
|
1072 |
|
cdd6c482c
|
1073 1074 |
while (&event->event_entry != &ctx->event_list && &next_event->event_entry != &next_ctx->event_list) { |
bfbd3381e
|
1075 |
|
cdd6c482c
|
1076 |
__perf_event_sync_stat(event, next_event); |
bfbd3381e
|
1077 |
|
cdd6c482c
|
1078 1079 |
event = list_next_entry(event, event_entry); next_event = list_next_entry(next_event, event_entry); |
bfbd3381e
|
1080 1081 |
} } |
564c2b210
|
1082 |
/* |
cdd6c482c
|
1083 |
* Called from scheduler to remove the events of the current task, |
0793a61d4
|
1084 1085 |
* with interrupts disabled. * |
cdd6c482c
|
1086 |
* We stop each event and update the event value in event->count. |
0793a61d4
|
1087 |
* |
7671581f1
|
1088 |
* This does not protect us against NMI, but disable() |
cdd6c482c
|
1089 1090 1091 |
* sets the disabled bit in the control field of event _before_ * accessing the event control register. If a NMI hits, then it will * not restart the event. |
0793a61d4
|
1092 |
*/ |
cdd6c482c
|
1093 |
void perf_event_task_sched_out(struct task_struct *task, |
564c2b210
|
1094 |
struct task_struct *next, int cpu) |
0793a61d4
|
1095 1096 |
{ struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu); |
cdd6c482c
|
1097 1098 1099 |
struct perf_event_context *ctx = task->perf_event_ctxp; struct perf_event_context *next_ctx; struct perf_event_context *parent; |
4a0deca65
|
1100 |
struct pt_regs *regs; |
c93f76690
|
1101 |
int do_switch = 1; |
0793a61d4
|
1102 |
|
10989fb24
|
1103 |
regs = task_pt_regs(task); |
cdd6c482c
|
1104 |
perf_sw_event(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, 1, regs, 0); |
10989fb24
|
1105 |
|
a63eaf34a
|
1106 |
if (likely(!ctx || !cpuctx->task_ctx)) |
0793a61d4
|
1107 |
return; |
bce379bf3
|
1108 |
update_context_time(ctx); |
c93f76690
|
1109 1110 1111 |
rcu_read_lock(); parent = rcu_dereference(ctx->parent_ctx); |
cdd6c482c
|
1112 |
next_ctx = next->perf_event_ctxp; |
c93f76690
|
1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 |
if (parent && next_ctx && rcu_dereference(next_ctx->parent_ctx) == parent) { /* * Looks like the two contexts are clones, so we might be * able to optimize the context switch. We lock both * contexts and check that they are clones under the * lock (including re-checking that neither has been * uncloned in the meantime). It doesn't matter which * order we take the locks because no other cpu could * be trying to lock both of these tasks. */ spin_lock(&ctx->lock); spin_lock_nested(&next_ctx->lock, SINGLE_DEPTH_NESTING); if (context_equiv(ctx, next_ctx)) { |
665c2142a
|
1127 1128 |
/* * XXX do we need a memory barrier of sorts |
cdd6c482c
|
1129 |
* wrt to rcu_dereference() of perf_event_ctxp |
665c2142a
|
1130 |
*/ |
cdd6c482c
|
1131 1132 |
task->perf_event_ctxp = next_ctx; next->perf_event_ctxp = ctx; |
c93f76690
|
1133 1134 1135 |
ctx->task = next; next_ctx->task = task; do_switch = 0; |
bfbd3381e
|
1136 |
|
cdd6c482c
|
1137 |
perf_event_sync_stat(ctx, next_ctx); |
c93f76690
|
1138 1139 1140 |
} spin_unlock(&next_ctx->lock); spin_unlock(&ctx->lock); |
564c2b210
|
1141 |
} |
c93f76690
|
1142 |
rcu_read_unlock(); |
564c2b210
|
1143 |
|
c93f76690
|
1144 |
if (do_switch) { |
cdd6c482c
|
1145 |
__perf_event_sched_out(ctx, cpuctx); |
c93f76690
|
1146 1147 |
cpuctx->task_ctx = NULL; } |
0793a61d4
|
1148 |
} |
665c2142a
|
1149 1150 1151 |
/* * Called with IRQs disabled */ |
cdd6c482c
|
1152 |
static void __perf_event_task_sched_out(struct perf_event_context *ctx) |
a08b159fc
|
1153 1154 |
{ struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); |
a63eaf34a
|
1155 1156 |
if (!cpuctx->task_ctx) return; |
012b84dae
|
1157 1158 1159 |
if (WARN_ON_ONCE(ctx != cpuctx->task_ctx)) return; |
cdd6c482c
|
1160 |
__perf_event_sched_out(ctx, cpuctx); |
a08b159fc
|
1161 1162 |
cpuctx->task_ctx = NULL; } |
665c2142a
|
1163 1164 1165 |
/* * Called with IRQs disabled */ |
cdd6c482c
|
1166 |
static void perf_event_cpu_sched_out(struct perf_cpu_context *cpuctx) |
04289bb98
|
1167 |
{ |
cdd6c482c
|
1168 |
__perf_event_sched_out(&cpuctx->ctx, cpuctx); |
04289bb98
|
1169 |
} |
235c7fc7c
|
1170 |
static void |
cdd6c482c
|
1171 |
__perf_event_sched_in(struct perf_event_context *ctx, |
235c7fc7c
|
1172 |
struct perf_cpu_context *cpuctx, int cpu) |
0793a61d4
|
1173 |
{ |
cdd6c482c
|
1174 |
struct perf_event *event; |
dd0e6ba22
|
1175 |
int can_add_hw = 1; |
0793a61d4
|
1176 |
|
d859e29fe
|
1177 1178 |
spin_lock(&ctx->lock); ctx->is_active = 1; |
cdd6c482c
|
1179 |
if (likely(!ctx->nr_events)) |
d859e29fe
|
1180 |
goto out; |
0793a61d4
|
1181 |
|
4af4998b8
|
1182 |
ctx->timestamp = perf_clock(); |
53cfbf593
|
1183 |
|
9e35ad388
|
1184 |
perf_disable(); |
3b6f9e5cb
|
1185 1186 1187 1188 1189 |
/* * First go through the list and put on any pinned groups * in order to give them the best chance of going on. */ |
cdd6c482c
|
1190 1191 1192 |
list_for_each_entry(event, &ctx->group_list, group_entry) { if (event->state <= PERF_EVENT_STATE_OFF || !event->attr.pinned) |
3b6f9e5cb
|
1193 |
continue; |
cdd6c482c
|
1194 |
if (event->cpu != -1 && event->cpu != cpu) |
3b6f9e5cb
|
1195 |
continue; |
8c9ed8e14
|
1196 1197 |
if (group_can_go_on(event, cpuctx, 1)) group_sched_in(event, cpuctx, ctx, cpu); |
3b6f9e5cb
|
1198 1199 1200 1201 1202 |
/* * If this pinned group hasn't been scheduled, * put it in error state. */ |
cdd6c482c
|
1203 1204 1205 |
if (event->state == PERF_EVENT_STATE_INACTIVE) { update_group_times(event); event->state = PERF_EVENT_STATE_ERROR; |
53cfbf593
|
1206 |
} |
3b6f9e5cb
|
1207 |
} |
cdd6c482c
|
1208 |
list_for_each_entry(event, &ctx->group_list, group_entry) { |
04289bb98
|
1209 |
/* |
cdd6c482c
|
1210 1211 |
* Ignore events in OFF or ERROR state, and * ignore pinned events since we did them already. |
3b6f9e5cb
|
1212 |
*/ |
cdd6c482c
|
1213 1214 |
if (event->state <= PERF_EVENT_STATE_OFF || event->attr.pinned) |
3b6f9e5cb
|
1215 1216 1217 |
continue; /* |
04289bb98
|
1218 |
* Listen to the 'cpu' scheduling filter constraint |
cdd6c482c
|
1219 |
* of events: |
04289bb98
|
1220 |
*/ |
cdd6c482c
|
1221 |
if (event->cpu != -1 && event->cpu != cpu) |
0793a61d4
|
1222 |
continue; |
8c9ed8e14
|
1223 1224 |
if (group_can_go_on(event, cpuctx, can_add_hw)) if (group_sched_in(event, cpuctx, ctx, cpu)) |
dd0e6ba22
|
1225 |
can_add_hw = 0; |
0793a61d4
|
1226 |
} |
9e35ad388
|
1227 |
perf_enable(); |
d859e29fe
|
1228 |
out: |
0793a61d4
|
1229 |
spin_unlock(&ctx->lock); |
235c7fc7c
|
1230 1231 1232 |
} /* |
cdd6c482c
|
1233 |
* Called from scheduler to add the events of the current task |
235c7fc7c
|
1234 1235 |
* with interrupts disabled. * |
cdd6c482c
|
1236 |
* We restore the event value and then enable it. |
235c7fc7c
|
1237 1238 |
* * This does not protect us against NMI, but enable() |
cdd6c482c
|
1239 1240 1241 |
* sets the enabled bit in the control field of event _before_ * accessing the event control register. If a NMI hits, then it will * keep the event running. |
235c7fc7c
|
1242 |
*/ |
cdd6c482c
|
1243 |
void perf_event_task_sched_in(struct task_struct *task, int cpu) |
235c7fc7c
|
1244 1245 |
{ struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu); |
cdd6c482c
|
1246 |
struct perf_event_context *ctx = task->perf_event_ctxp; |
04289bb98
|
1247 |
|
a63eaf34a
|
1248 1249 |
if (likely(!ctx)) return; |
564c2b210
|
1250 1251 |
if (cpuctx->task_ctx == ctx) return; |
cdd6c482c
|
1252 |
__perf_event_sched_in(ctx, cpuctx, cpu); |
0793a61d4
|
1253 1254 |
cpuctx->task_ctx = ctx; } |
cdd6c482c
|
1255 |
static void perf_event_cpu_sched_in(struct perf_cpu_context *cpuctx, int cpu) |
235c7fc7c
|
1256 |
{ |
cdd6c482c
|
1257 |
struct perf_event_context *ctx = &cpuctx->ctx; |
235c7fc7c
|
1258 |
|
cdd6c482c
|
1259 |
__perf_event_sched_in(ctx, cpuctx, cpu); |
235c7fc7c
|
1260 |
} |
a78ac3258
|
1261 |
#define MAX_INTERRUPTS (~0ULL) |
cdd6c482c
|
1262 |
static void perf_log_throttle(struct perf_event *event, int enable); |
26b119bc8
|
1263 |
|
cdd6c482c
|
1264 |
static void perf_adjust_period(struct perf_event *event, u64 events) |
bd2b5b128
|
1265 |
{ |
cdd6c482c
|
1266 |
struct hw_perf_event *hwc = &event->hw; |
bd2b5b128
|
1267 1268 1269 1270 |
u64 period, sample_period; s64 delta; events *= hwc->sample_period; |
cdd6c482c
|
1271 |
period = div64_u64(events, event->attr.sample_freq); |
bd2b5b128
|
1272 1273 1274 1275 1276 1277 1278 1279 |
delta = (s64)(period - hwc->sample_period); delta = (delta + 7) / 8; /* low pass filter */ sample_period = hwc->sample_period + delta; if (!sample_period) sample_period = 1; |
bd2b5b128
|
1280 1281 |
hwc->sample_period = sample_period; } |
cdd6c482c
|
1282 |
static void perf_ctx_adjust_freq(struct perf_event_context *ctx) |
60db5e09c
|
1283 |
{ |
cdd6c482c
|
1284 1285 |
struct perf_event *event; struct hw_perf_event *hwc; |
bd2b5b128
|
1286 |
u64 interrupts, freq; |
60db5e09c
|
1287 1288 |
spin_lock(&ctx->lock); |
03541f8b6
|
1289 |
list_for_each_entry_rcu(event, &ctx->event_list, event_entry) { |
cdd6c482c
|
1290 |
if (event->state != PERF_EVENT_STATE_ACTIVE) |
60db5e09c
|
1291 |
continue; |
cdd6c482c
|
1292 |
hwc = &event->hw; |
6a24ed6c6
|
1293 1294 1295 |
interrupts = hwc->interrupts; hwc->interrupts = 0; |
a78ac3258
|
1296 |
|
bd2b5b128
|
1297 |
/* |
cdd6c482c
|
1298 |
* unthrottle events on the tick |
bd2b5b128
|
1299 |
*/ |
a78ac3258
|
1300 |
if (interrupts == MAX_INTERRUPTS) { |
cdd6c482c
|
1301 1302 1303 |
perf_log_throttle(event, 1); event->pmu->unthrottle(event); interrupts = 2*sysctl_perf_event_sample_rate/HZ; |
a78ac3258
|
1304 |
} |
cdd6c482c
|
1305 |
if (!event->attr.freq || !event->attr.sample_freq) |
60db5e09c
|
1306 |
continue; |
bd2b5b128
|
1307 1308 1309 |
/* * if the specified freq < HZ then we need to skip ticks */ |
cdd6c482c
|
1310 1311 |
if (event->attr.sample_freq < HZ) { freq = event->attr.sample_freq; |
6a24ed6c6
|
1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 |
hwc->freq_count += freq; hwc->freq_interrupts += interrupts; if (hwc->freq_count < HZ) continue; interrupts = hwc->freq_interrupts; hwc->freq_interrupts = 0; hwc->freq_count -= HZ; } else freq = HZ; |
cdd6c482c
|
1324 |
perf_adjust_period(event, freq * interrupts); |
60db5e09c
|
1325 |
|
bd2b5b128
|
1326 1327 1328 1329 1330 1331 1332 |
/* * In order to avoid being stalled by an (accidental) huge * sample period, force reset the sample period if we didn't * get any events in this freq period. */ if (!interrupts) { perf_disable(); |
cdd6c482c
|
1333 |
event->pmu->disable(event); |
87847b8f2
|
1334 |
atomic64_set(&hwc->period_left, 0); |
cdd6c482c
|
1335 |
event->pmu->enable(event); |
bd2b5b128
|
1336 1337 |
perf_enable(); } |
60db5e09c
|
1338 1339 1340 |
} spin_unlock(&ctx->lock); } |
235c7fc7c
|
1341 |
/* |
cdd6c482c
|
1342 |
* Round-robin a context's events: |
235c7fc7c
|
1343 |
*/ |
cdd6c482c
|
1344 |
static void rotate_ctx(struct perf_event_context *ctx) |
0793a61d4
|
1345 |
{ |
cdd6c482c
|
1346 |
struct perf_event *event; |
0793a61d4
|
1347 |
|
cdd6c482c
|
1348 |
if (!ctx->nr_events) |
0793a61d4
|
1349 |
return; |
0793a61d4
|
1350 |
spin_lock(&ctx->lock); |
0793a61d4
|
1351 |
/* |
cdd6c482c
|
1352 |
* Rotate the first entry last (works just fine for group events too): |
0793a61d4
|
1353 |
*/ |
9e35ad388
|
1354 |
perf_disable(); |
cdd6c482c
|
1355 1356 |
list_for_each_entry(event, &ctx->group_list, group_entry) { list_move_tail(&event->group_entry, &ctx->group_list); |
0793a61d4
|
1357 1358 |
break; } |
9e35ad388
|
1359 |
perf_enable(); |
0793a61d4
|
1360 1361 |
spin_unlock(&ctx->lock); |
235c7fc7c
|
1362 |
} |
cdd6c482c
|
1363 |
void perf_event_task_tick(struct task_struct *curr, int cpu) |
235c7fc7c
|
1364 |
{ |
7fc23a538
|
1365 |
struct perf_cpu_context *cpuctx; |
cdd6c482c
|
1366 |
struct perf_event_context *ctx; |
7fc23a538
|
1367 |
|
cdd6c482c
|
1368 |
if (!atomic_read(&nr_events)) |
7fc23a538
|
1369 1370 1371 |
return; cpuctx = &per_cpu(perf_cpu_context, cpu); |
cdd6c482c
|
1372 |
ctx = curr->perf_event_ctxp; |
235c7fc7c
|
1373 |
|
bd2b5b128
|
1374 |
perf_ctx_adjust_freq(&cpuctx->ctx); |
a63eaf34a
|
1375 |
if (ctx) |
bd2b5b128
|
1376 |
perf_ctx_adjust_freq(ctx); |
60db5e09c
|
1377 |
|
cdd6c482c
|
1378 |
perf_event_cpu_sched_out(cpuctx); |
a63eaf34a
|
1379 |
if (ctx) |
cdd6c482c
|
1380 |
__perf_event_task_sched_out(ctx); |
0793a61d4
|
1381 |
|
b82914ce3
|
1382 |
rotate_ctx(&cpuctx->ctx); |
a63eaf34a
|
1383 1384 |
if (ctx) rotate_ctx(ctx); |
235c7fc7c
|
1385 |
|
cdd6c482c
|
1386 |
perf_event_cpu_sched_in(cpuctx, cpu); |
a63eaf34a
|
1387 |
if (ctx) |
cdd6c482c
|
1388 |
perf_event_task_sched_in(curr, cpu); |
0793a61d4
|
1389 1390 1391 |
} /* |
cdd6c482c
|
1392 |
* Enable all of a task's events that have been marked enable-on-exec. |
57e7986ed
|
1393 1394 |
* This expects task == current. */ |
cdd6c482c
|
1395 |
static void perf_event_enable_on_exec(struct task_struct *task) |
57e7986ed
|
1396 |
{ |
cdd6c482c
|
1397 1398 |
struct perf_event_context *ctx; struct perf_event *event; |
57e7986ed
|
1399 1400 1401 1402 |
unsigned long flags; int enabled = 0; local_irq_save(flags); |
cdd6c482c
|
1403 1404 |
ctx = task->perf_event_ctxp; if (!ctx || !ctx->nr_events) |
57e7986ed
|
1405 |
goto out; |
cdd6c482c
|
1406 |
__perf_event_task_sched_out(ctx); |
57e7986ed
|
1407 1408 |
spin_lock(&ctx->lock); |
cdd6c482c
|
1409 1410 |
list_for_each_entry(event, &ctx->group_list, group_entry) { if (!event->attr.enable_on_exec) |
57e7986ed
|
1411 |
continue; |
cdd6c482c
|
1412 1413 |
event->attr.enable_on_exec = 0; if (event->state >= PERF_EVENT_STATE_INACTIVE) |
57e7986ed
|
1414 |
continue; |
cdd6c482c
|
1415 |
__perf_event_mark_enabled(event, ctx); |
57e7986ed
|
1416 1417 1418 1419 |
enabled = 1; } /* |
cdd6c482c
|
1420 |
* Unclone this context if we enabled any event. |
57e7986ed
|
1421 |
*/ |
71a851b4d
|
1422 1423 |
if (enabled) unclone_ctx(ctx); |
57e7986ed
|
1424 1425 |
spin_unlock(&ctx->lock); |
cdd6c482c
|
1426 |
perf_event_task_sched_in(task, smp_processor_id()); |
57e7986ed
|
1427 1428 1429 1430 1431 |
out: local_irq_restore(flags); } /* |
cdd6c482c
|
1432 |
* Cross CPU call to read the hardware event |
0793a61d4
|
1433 |
*/ |
cdd6c482c
|
1434 |
static void __perf_event_read(void *info) |
0793a61d4
|
1435 |
{ |
e1ac3614f
|
1436 |
struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); |
cdd6c482c
|
1437 1438 |
struct perf_event *event = info; struct perf_event_context *ctx = event->ctx; |
aa9c4c0f9
|
1439 |
unsigned long flags; |
621a01eac
|
1440 |
|
e1ac3614f
|
1441 1442 1443 1444 |
/* * If this is a task context, we need to check whether it is * the current task context of this cpu. If not it has been * scheduled out before the smp call arrived. In that case |
cdd6c482c
|
1445 1446 |
* event->count would have been updated to a recent sample * when the event was scheduled out. |
e1ac3614f
|
1447 1448 1449 |
*/ if (ctx->task && cpuctx->task_ctx != ctx) return; |
849691a6c
|
1450 |
local_irq_save(flags); |
53cfbf593
|
1451 |
if (ctx->is_active) |
4af4998b8
|
1452 |
update_context_time(ctx); |
cdd6c482c
|
1453 1454 |
event->pmu->read(event); update_event_times(event); |
849691a6c
|
1455 |
local_irq_restore(flags); |
0793a61d4
|
1456 |
} |
cdd6c482c
|
1457 |
static u64 perf_event_read(struct perf_event *event) |
0793a61d4
|
1458 1459 |
{ /* |
cdd6c482c
|
1460 1461 |
* If event is enabled and currently active on a CPU, update the * value in the event structure: |
0793a61d4
|
1462 |
*/ |
cdd6c482c
|
1463 1464 1465 1466 1467 |
if (event->state == PERF_EVENT_STATE_ACTIVE) { smp_call_function_single(event->oncpu, __perf_event_read, event, 1); } else if (event->state == PERF_EVENT_STATE_INACTIVE) { update_event_times(event); |
0793a61d4
|
1468 |
} |
cdd6c482c
|
1469 |
return atomic64_read(&event->count); |
0793a61d4
|
1470 |
} |
a63eaf34a
|
1471 |
/* |
cdd6c482c
|
1472 |
* Initialize the perf_event context in a task_struct: |
a63eaf34a
|
1473 1474 |
*/ static void |
cdd6c482c
|
1475 |
__perf_event_init_context(struct perf_event_context *ctx, |
a63eaf34a
|
1476 1477 1478 1479 1480 |
struct task_struct *task) { memset(ctx, 0, sizeof(*ctx)); spin_lock_init(&ctx->lock); mutex_init(&ctx->mutex); |
65abc8653
|
1481 |
INIT_LIST_HEAD(&ctx->group_list); |
a63eaf34a
|
1482 1483 1484 1485 |
INIT_LIST_HEAD(&ctx->event_list); atomic_set(&ctx->refcount, 1); ctx->task = task; } |
cdd6c482c
|
1486 |
static struct perf_event_context *find_get_context(pid_t pid, int cpu) |
0793a61d4
|
1487 |
{ |
cdd6c482c
|
1488 |
struct perf_event_context *ctx; |
22a4f650d
|
1489 |
struct perf_cpu_context *cpuctx; |
0793a61d4
|
1490 |
struct task_struct *task; |
25346b93c
|
1491 |
unsigned long flags; |
c93f76690
|
1492 |
int err; |
0793a61d4
|
1493 1494 |
/* |
cdd6c482c
|
1495 |
* If cpu is not a wildcard then this is a percpu event: |
0793a61d4
|
1496 1497 |
*/ if (cpu != -1) { |
cdd6c482c
|
1498 |
/* Must be root to operate on a CPU event: */ |
0764771da
|
1499 |
if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN)) |
0793a61d4
|
1500 1501 1502 1503 1504 1505 |
return ERR_PTR(-EACCES); if (cpu < 0 || cpu > num_possible_cpus()) return ERR_PTR(-EINVAL); /* |
cdd6c482c
|
1506 |
* We could be clever and allow to attach a event to an |
0793a61d4
|
1507 1508 1509 1510 1511 1512 1513 1514 |
* offline CPU and activate it when the CPU comes up, but * that's for later. */ if (!cpu_isset(cpu, cpu_online_map)) return ERR_PTR(-ENODEV); cpuctx = &per_cpu(perf_cpu_context, cpu); ctx = &cpuctx->ctx; |
c93f76690
|
1515 |
get_ctx(ctx); |
0793a61d4
|
1516 |
|
0793a61d4
|
1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 |
return ctx; } rcu_read_lock(); if (!pid) task = current; else task = find_task_by_vpid(pid); if (task) get_task_struct(task); rcu_read_unlock(); if (!task) return ERR_PTR(-ESRCH); |
c93f76690
|
1531 |
/* |
cdd6c482c
|
1532 |
* Can't attach events to a dying task. |
c93f76690
|
1533 1534 1535 1536 |
*/ err = -ESRCH; if (task->flags & PF_EXITING) goto errout; |
0793a61d4
|
1537 |
/* Reuse ptrace permission checks for now. */ |
c93f76690
|
1538 1539 1540 |
err = -EACCES; if (!ptrace_may_access(task, PTRACE_MODE_READ)) goto errout; |
c93f76690
|
1541 |
retry: |
25346b93c
|
1542 |
ctx = perf_lock_task_context(task, &flags); |
c93f76690
|
1543 |
if (ctx) { |
71a851b4d
|
1544 |
unclone_ctx(ctx); |
25346b93c
|
1545 |
spin_unlock_irqrestore(&ctx->lock, flags); |
0793a61d4
|
1546 |
} |
a63eaf34a
|
1547 |
if (!ctx) { |
cdd6c482c
|
1548 |
ctx = kmalloc(sizeof(struct perf_event_context), GFP_KERNEL); |
c93f76690
|
1549 1550 1551 |
err = -ENOMEM; if (!ctx) goto errout; |
cdd6c482c
|
1552 |
__perf_event_init_context(ctx, task); |
c93f76690
|
1553 |
get_ctx(ctx); |
cdd6c482c
|
1554 |
if (cmpxchg(&task->perf_event_ctxp, NULL, ctx)) { |
a63eaf34a
|
1555 1556 1557 1558 1559 |
/* * We raced with some other task; use * the context they set. */ kfree(ctx); |
25346b93c
|
1560 |
goto retry; |
a63eaf34a
|
1561 |
} |
c93f76690
|
1562 |
get_task_struct(task); |
a63eaf34a
|
1563 |
} |
c93f76690
|
1564 |
put_task_struct(task); |
0793a61d4
|
1565 |
return ctx; |
c93f76690
|
1566 1567 1568 1569 |
errout: put_task_struct(task); return ERR_PTR(err); |
0793a61d4
|
1570 |
} |
cdd6c482c
|
1571 |
static void free_event_rcu(struct rcu_head *head) |
592903cdc
|
1572 |
{ |
cdd6c482c
|
1573 |
struct perf_event *event; |
592903cdc
|
1574 |
|
cdd6c482c
|
1575 1576 1577 1578 |
event = container_of(head, struct perf_event, rcu_head); if (event->ns) put_pid_ns(event->ns); kfree(event); |
592903cdc
|
1579 |
} |
cdd6c482c
|
1580 |
static void perf_pending_sync(struct perf_event *event); |
925d519ab
|
1581 |
|
cdd6c482c
|
1582 |
static void free_event(struct perf_event *event) |
f16009527
|
1583 |
{ |
cdd6c482c
|
1584 |
perf_pending_sync(event); |
925d519ab
|
1585 |
|
cdd6c482c
|
1586 1587 1588 1589 1590 1591 1592 1593 |
if (!event->parent) { atomic_dec(&nr_events); if (event->attr.mmap) atomic_dec(&nr_mmap_events); if (event->attr.comm) atomic_dec(&nr_comm_events); if (event->attr.task) atomic_dec(&nr_task_events); |
f344011cc
|
1594 |
} |
9ee318a78
|
1595 |
|
cdd6c482c
|
1596 1597 1598 |
if (event->output) { fput(event->output->filp); event->output = NULL; |
a4be7c277
|
1599 |
} |
cdd6c482c
|
1600 1601 |
if (event->destroy) event->destroy(event); |
e077df4f4
|
1602 |
|
cdd6c482c
|
1603 1604 |
put_ctx(event->ctx); call_rcu(&event->rcu_head, free_event_rcu); |
f16009527
|
1605 |
} |
0793a61d4
|
1606 1607 1608 1609 1610 |
/* * Called when the last reference to the file is gone. */ static int perf_release(struct inode *inode, struct file *file) { |
cdd6c482c
|
1611 1612 |
struct perf_event *event = file->private_data; struct perf_event_context *ctx = event->ctx; |
0793a61d4
|
1613 1614 |
file->private_data = NULL; |
ad3a37de8
|
1615 |
WARN_ON_ONCE(ctx->parent_ctx); |
d859e29fe
|
1616 |
mutex_lock(&ctx->mutex); |
cdd6c482c
|
1617 |
perf_event_remove_from_context(event); |
d859e29fe
|
1618 |
mutex_unlock(&ctx->mutex); |
0793a61d4
|
1619 |
|
cdd6c482c
|
1620 1621 1622 1623 |
mutex_lock(&event->owner->perf_event_mutex); list_del_init(&event->owner_entry); mutex_unlock(&event->owner->perf_event_mutex); put_task_struct(event->owner); |
082ff5a27
|
1624 |
|
cdd6c482c
|
1625 |
free_event(event); |
0793a61d4
|
1626 1627 1628 |
return 0; } |
cdd6c482c
|
1629 |
static int perf_event_read_size(struct perf_event *event) |
3dab77fb1
|
1630 1631 1632 1633 |
{ int entry = sizeof(u64); /* value */ int size = 0; int nr = 1; |
cdd6c482c
|
1634 |
if (event->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) |
3dab77fb1
|
1635 |
size += sizeof(u64); |
cdd6c482c
|
1636 |
if (event->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) |
3dab77fb1
|
1637 |
size += sizeof(u64); |
cdd6c482c
|
1638 |
if (event->attr.read_format & PERF_FORMAT_ID) |
3dab77fb1
|
1639 |
entry += sizeof(u64); |
cdd6c482c
|
1640 1641 |
if (event->attr.read_format & PERF_FORMAT_GROUP) { nr += event->group_leader->nr_siblings; |
3dab77fb1
|
1642 1643 1644 1645 1646 1647 1648 |
size += sizeof(u64); } size += entry * nr; return size; } |
cdd6c482c
|
1649 |
static u64 perf_event_read_value(struct perf_event *event) |
e53c09947
|
1650 |
{ |
cdd6c482c
|
1651 |
struct perf_event *child; |
e53c09947
|
1652 |
u64 total = 0; |
cdd6c482c
|
1653 1654 1655 |
total += perf_event_read(event); list_for_each_entry(child, &event->child_list, child_list) total += perf_event_read(child); |
e53c09947
|
1656 1657 1658 |
return total; } |
cdd6c482c
|
1659 |
static int perf_event_read_entry(struct perf_event *event, |
3dab77fb1
|
1660 1661 1662 1663 |
u64 read_format, char __user *buf) { int n = 0, count = 0; u64 values[2]; |
cdd6c482c
|
1664 |
values[n++] = perf_event_read_value(event); |
3dab77fb1
|
1665 |
if (read_format & PERF_FORMAT_ID) |
cdd6c482c
|
1666 |
values[n++] = primary_event_id(event); |
3dab77fb1
|
1667 1668 1669 1670 1671 1672 1673 1674 |
count = n * sizeof(u64); if (copy_to_user(buf, values, count)) return -EFAULT; return count; } |
cdd6c482c
|
1675 |
static int perf_event_read_group(struct perf_event *event, |
3dab77fb1
|
1676 1677 |
u64 read_format, char __user *buf) { |
cdd6c482c
|
1678 |
struct perf_event *leader = event->group_leader, *sub; |
3dab77fb1
|
1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 |
int n = 0, size = 0, err = -EFAULT; u64 values[3]; values[n++] = 1 + leader->nr_siblings; if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) { values[n++] = leader->total_time_enabled + atomic64_read(&leader->child_total_time_enabled); } if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) { values[n++] = leader->total_time_running + atomic64_read(&leader->child_total_time_running); } size = n * sizeof(u64); if (copy_to_user(buf, values, size)) return -EFAULT; |
cdd6c482c
|
1696 |
err = perf_event_read_entry(leader, read_format, buf + size); |
3dab77fb1
|
1697 1698 1699 1700 |
if (err < 0) return err; size += err; |
65abc8653
|
1701 |
list_for_each_entry(sub, &leader->sibling_list, group_entry) { |
cdd6c482c
|
1702 |
err = perf_event_read_entry(sub, read_format, |
3dab77fb1
|
1703 1704 1705 1706 1707 1708 1709 1710 1711 |
buf + size); if (err < 0) return err; size += err; } return size; } |
cdd6c482c
|
1712 |
static int perf_event_read_one(struct perf_event *event, |
3dab77fb1
|
1713 1714 1715 1716 |
u64 read_format, char __user *buf) { u64 values[4]; int n = 0; |
cdd6c482c
|
1717 |
values[n++] = perf_event_read_value(event); |
3dab77fb1
|
1718 |
if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) { |
cdd6c482c
|
1719 1720 |
values[n++] = event->total_time_enabled + atomic64_read(&event->child_total_time_enabled); |
3dab77fb1
|
1721 1722 |
} if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) { |
cdd6c482c
|
1723 1724 |
values[n++] = event->total_time_running + atomic64_read(&event->child_total_time_running); |
3dab77fb1
|
1725 1726 |
} if (read_format & PERF_FORMAT_ID) |
cdd6c482c
|
1727 |
values[n++] = primary_event_id(event); |
3dab77fb1
|
1728 1729 1730 1731 1732 1733 |
if (copy_to_user(buf, values, n * sizeof(u64))) return -EFAULT; return n * sizeof(u64); } |
0793a61d4
|
1734 |
/* |
cdd6c482c
|
1735 |
* Read the performance event - simple non blocking version for now |
0793a61d4
|
1736 1737 |
*/ static ssize_t |
cdd6c482c
|
1738 |
perf_read_hw(struct perf_event *event, char __user *buf, size_t count) |
0793a61d4
|
1739 |
{ |
cdd6c482c
|
1740 |
u64 read_format = event->attr.read_format; |
3dab77fb1
|
1741 |
int ret; |
0793a61d4
|
1742 |
|
3b6f9e5cb
|
1743 |
/* |
cdd6c482c
|
1744 |
* Return end-of-file for a read on a event that is in |
3b6f9e5cb
|
1745 1746 1747 |
* error state (i.e. because it was pinned but it couldn't be * scheduled on to the CPU at some point). */ |
cdd6c482c
|
1748 |
if (event->state == PERF_EVENT_STATE_ERROR) |
3b6f9e5cb
|
1749 |
return 0; |
cdd6c482c
|
1750 |
if (count < perf_event_read_size(event)) |
3dab77fb1
|
1751 |
return -ENOSPC; |
cdd6c482c
|
1752 1753 |
WARN_ON_ONCE(event->ctx->parent_ctx); mutex_lock(&event->child_mutex); |
3dab77fb1
|
1754 |
if (read_format & PERF_FORMAT_GROUP) |
cdd6c482c
|
1755 |
ret = perf_event_read_group(event, read_format, buf); |
3dab77fb1
|
1756 |
else |
cdd6c482c
|
1757 1758 |
ret = perf_event_read_one(event, read_format, buf); mutex_unlock(&event->child_mutex); |
0793a61d4
|
1759 |
|
3dab77fb1
|
1760 |
return ret; |
0793a61d4
|
1761 1762 1763 |
} static ssize_t |
0793a61d4
|
1764 1765 |
perf_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { |
cdd6c482c
|
1766 |
struct perf_event *event = file->private_data; |
0793a61d4
|
1767 |
|
cdd6c482c
|
1768 |
return perf_read_hw(event, buf, count); |
0793a61d4
|
1769 1770 1771 1772 |
} static unsigned int perf_poll(struct file *file, poll_table *wait) { |
cdd6c482c
|
1773 |
struct perf_event *event = file->private_data; |
c7138f37f
|
1774 |
struct perf_mmap_data *data; |
c33a0bc4e
|
1775 |
unsigned int events = POLL_HUP; |
c7138f37f
|
1776 1777 |
rcu_read_lock(); |
cdd6c482c
|
1778 |
data = rcu_dereference(event->data); |
c7138f37f
|
1779 |
if (data) |
c33a0bc4e
|
1780 |
events = atomic_xchg(&data->poll, 0); |
c7138f37f
|
1781 |
rcu_read_unlock(); |
0793a61d4
|
1782 |
|
cdd6c482c
|
1783 |
poll_wait(file, &event->waitq, wait); |
0793a61d4
|
1784 |
|
0793a61d4
|
1785 1786 |
return events; } |
cdd6c482c
|
1787 |
static void perf_event_reset(struct perf_event *event) |
6de6a7b95
|
1788 |
{ |
cdd6c482c
|
1789 1790 1791 |
(void)perf_event_read(event); atomic64_set(&event->count, 0); perf_event_update_userpage(event); |
3df5edad8
|
1792 |
} |
c93f76690
|
1793 |
/* |
cdd6c482c
|
1794 1795 1796 1797 |
* Holding the top-level event's child_mutex means that any * descendant process that has inherited this event will block * in sync_child_event if it goes to exit, thus satisfying the * task existence requirements of perf_event_enable/disable. |
c93f76690
|
1798 |
*/ |
cdd6c482c
|
1799 1800 |
static void perf_event_for_each_child(struct perf_event *event, void (*func)(struct perf_event *)) |
3df5edad8
|
1801 |
{ |
cdd6c482c
|
1802 |
struct perf_event *child; |
3df5edad8
|
1803 |
|
cdd6c482c
|
1804 1805 1806 1807 |
WARN_ON_ONCE(event->ctx->parent_ctx); mutex_lock(&event->child_mutex); func(event); list_for_each_entry(child, &event->child_list, child_list) |
3df5edad8
|
1808 |
func(child); |
cdd6c482c
|
1809 |
mutex_unlock(&event->child_mutex); |
3df5edad8
|
1810 |
} |
cdd6c482c
|
1811 1812 |
static void perf_event_for_each(struct perf_event *event, void (*func)(struct perf_event *)) |
3df5edad8
|
1813 |
{ |
cdd6c482c
|
1814 1815 |
struct perf_event_context *ctx = event->ctx; struct perf_event *sibling; |
3df5edad8
|
1816 |
|
75f937f24
|
1817 1818 |
WARN_ON_ONCE(ctx->parent_ctx); mutex_lock(&ctx->mutex); |
cdd6c482c
|
1819 |
event = event->group_leader; |
75f937f24
|
1820 |
|
cdd6c482c
|
1821 1822 1823 1824 |
perf_event_for_each_child(event, func); func(event); list_for_each_entry(sibling, &event->sibling_list, group_entry) perf_event_for_each_child(event, func); |
75f937f24
|
1825 |
mutex_unlock(&ctx->mutex); |
6de6a7b95
|
1826 |
} |
cdd6c482c
|
1827 |
static int perf_event_period(struct perf_event *event, u64 __user *arg) |
08247e31c
|
1828 |
{ |
cdd6c482c
|
1829 |
struct perf_event_context *ctx = event->ctx; |
08247e31c
|
1830 1831 1832 |
unsigned long size; int ret = 0; u64 value; |
cdd6c482c
|
1833 |
if (!event->attr.sample_period) |
08247e31c
|
1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 |
return -EINVAL; size = copy_from_user(&value, arg, sizeof(value)); if (size != sizeof(value)) return -EFAULT; if (!value) return -EINVAL; spin_lock_irq(&ctx->lock); |
cdd6c482c
|
1844 1845 |
if (event->attr.freq) { if (value > sysctl_perf_event_sample_rate) { |
08247e31c
|
1846 1847 1848 |
ret = -EINVAL; goto unlock; } |
cdd6c482c
|
1849 |
event->attr.sample_freq = value; |
08247e31c
|
1850 |
} else { |
cdd6c482c
|
1851 1852 |
event->attr.sample_period = value; event->hw.sample_period = value; |
08247e31c
|
1853 1854 1855 1856 1857 1858 |
} unlock: spin_unlock_irq(&ctx->lock); return ret; } |
cdd6c482c
|
1859 |
int perf_event_set_output(struct perf_event *event, int output_fd); |
a4be7c277
|
1860 |
|
d859e29fe
|
1861 1862 |
static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { |
cdd6c482c
|
1863 1864 |
struct perf_event *event = file->private_data; void (*func)(struct perf_event *); |
3df5edad8
|
1865 |
u32 flags = arg; |
d859e29fe
|
1866 1867 |
switch (cmd) { |
cdd6c482c
|
1868 1869 |
case PERF_EVENT_IOC_ENABLE: func = perf_event_enable; |
d859e29fe
|
1870 |
break; |
cdd6c482c
|
1871 1872 |
case PERF_EVENT_IOC_DISABLE: func = perf_event_disable; |
79f146415
|
1873 |
break; |
cdd6c482c
|
1874 1875 |
case PERF_EVENT_IOC_RESET: func = perf_event_reset; |
6de6a7b95
|
1876 |
break; |
3df5edad8
|
1877 |
|
cdd6c482c
|
1878 1879 |
case PERF_EVENT_IOC_REFRESH: return perf_event_refresh(event, arg); |
08247e31c
|
1880 |
|
cdd6c482c
|
1881 1882 |
case PERF_EVENT_IOC_PERIOD: return perf_event_period(event, (u64 __user *)arg); |
08247e31c
|
1883 |
|
cdd6c482c
|
1884 1885 |
case PERF_EVENT_IOC_SET_OUTPUT: return perf_event_set_output(event, arg); |
a4be7c277
|
1886 |
|
d859e29fe
|
1887 |
default: |
3df5edad8
|
1888 |
return -ENOTTY; |
d859e29fe
|
1889 |
} |
3df5edad8
|
1890 1891 |
if (flags & PERF_IOC_FLAG_GROUP) |
cdd6c482c
|
1892 |
perf_event_for_each(event, func); |
3df5edad8
|
1893 |
else |
cdd6c482c
|
1894 |
perf_event_for_each_child(event, func); |
3df5edad8
|
1895 1896 |
return 0; |
d859e29fe
|
1897 |
} |
cdd6c482c
|
1898 |
int perf_event_task_enable(void) |
771d7cde1
|
1899 |
{ |
cdd6c482c
|
1900 |
struct perf_event *event; |
771d7cde1
|
1901 |
|
cdd6c482c
|
1902 1903 1904 1905 |
mutex_lock(¤t->perf_event_mutex); list_for_each_entry(event, ¤t->perf_event_list, owner_entry) perf_event_for_each_child(event, perf_event_enable); mutex_unlock(¤t->perf_event_mutex); |
771d7cde1
|
1906 1907 1908 |
return 0; } |
cdd6c482c
|
1909 |
int perf_event_task_disable(void) |
771d7cde1
|
1910 |
{ |
cdd6c482c
|
1911 |
struct perf_event *event; |
771d7cde1
|
1912 |
|
cdd6c482c
|
1913 1914 1915 1916 |
mutex_lock(¤t->perf_event_mutex); list_for_each_entry(event, ¤t->perf_event_list, owner_entry) perf_event_for_each_child(event, perf_event_disable); mutex_unlock(¤t->perf_event_mutex); |
771d7cde1
|
1917 1918 1919 |
return 0; } |
cdd6c482c
|
1920 1921 |
#ifndef PERF_EVENT_INDEX_OFFSET # define PERF_EVENT_INDEX_OFFSET 0 |
f738eb1b6
|
1922 |
#endif |
cdd6c482c
|
1923 |
static int perf_event_index(struct perf_event *event) |
194002b27
|
1924 |
{ |
cdd6c482c
|
1925 |
if (event->state != PERF_EVENT_STATE_ACTIVE) |
194002b27
|
1926 |
return 0; |
cdd6c482c
|
1927 |
return event->hw.idx + 1 - PERF_EVENT_INDEX_OFFSET; |
194002b27
|
1928 |
} |
38ff667b3
|
1929 1930 1931 1932 1933 |
/* * Callers need to ensure there can be no nesting of this function, otherwise * the seqlock logic goes bad. We can not serialize this because the arch * code calls this from NMI context. */ |
cdd6c482c
|
1934 |
void perf_event_update_userpage(struct perf_event *event) |
37d818283
|
1935 |
{ |
cdd6c482c
|
1936 |
struct perf_event_mmap_page *userpg; |
22a4f650d
|
1937 |
struct perf_mmap_data *data; |
38ff667b3
|
1938 1939 |
rcu_read_lock(); |
cdd6c482c
|
1940 |
data = rcu_dereference(event->data); |
38ff667b3
|
1941 1942 1943 1944 |
if (!data) goto unlock; userpg = data->user_page; |
37d818283
|
1945 |
|
7b732a750
|
1946 1947 1948 1949 1950 |
/* * Disable preemption so as to not let the corresponding user-space * spin too long if we get preempted. */ preempt_disable(); |
37d818283
|
1951 |
++userpg->lock; |
92f22a386
|
1952 |
barrier(); |
cdd6c482c
|
1953 1954 1955 1956 |
userpg->index = perf_event_index(event); userpg->offset = atomic64_read(&event->count); if (event->state == PERF_EVENT_STATE_ACTIVE) userpg->offset -= atomic64_read(&event->hw.prev_count); |
7b732a750
|
1957 |
|
cdd6c482c
|
1958 1959 |
userpg->time_enabled = event->total_time_enabled + atomic64_read(&event->child_total_time_enabled); |
7f8b4e4e0
|
1960 |
|
cdd6c482c
|
1961 1962 |
userpg->time_running = event->total_time_running + atomic64_read(&event->child_total_time_running); |
7f8b4e4e0
|
1963 |
|
92f22a386
|
1964 |
barrier(); |
37d818283
|
1965 |
++userpg->lock; |
7b732a750
|
1966 |
preempt_enable(); |
38ff667b3
|
1967 |
unlock: |
7b732a750
|
1968 |
rcu_read_unlock(); |
37d818283
|
1969 |
} |
906010b21
|
1970 |
static unsigned long perf_data_size(struct perf_mmap_data *data) |
37d818283
|
1971 |
{ |
906010b21
|
1972 1973 |
return data->nr_pages << (PAGE_SHIFT + data->data_order); } |
37d818283
|
1974 |
|
906010b21
|
1975 |
#ifndef CONFIG_PERF_USE_VMALLOC |
43a21ea81
|
1976 |
|
906010b21
|
1977 1978 1979 |
/* * Back perf_mmap() with regular GFP_KERNEL-0 pages. */ |
43a21ea81
|
1980 |
|
906010b21
|
1981 1982 1983 1984 1985 |
static struct page * perf_mmap_to_page(struct perf_mmap_data *data, unsigned long pgoff) { if (pgoff > data->nr_pages) return NULL; |
43a21ea81
|
1986 |
|
906010b21
|
1987 1988 |
if (pgoff == 0) return virt_to_page(data->user_page); |
7b732a750
|
1989 |
|
906010b21
|
1990 |
return virt_to_page(data->data_pages[pgoff - 1]); |
7b732a750
|
1991 |
} |
906010b21
|
1992 1993 |
static struct perf_mmap_data * perf_mmap_data_alloc(struct perf_event *event, int nr_pages) |
7b732a750
|
1994 1995 1996 1997 |
{ struct perf_mmap_data *data; unsigned long size; int i; |
cdd6c482c
|
1998 |
WARN_ON(atomic_read(&event->mmap_count)); |
7b732a750
|
1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 |
size = sizeof(struct perf_mmap_data); size += nr_pages * sizeof(void *); data = kzalloc(size, GFP_KERNEL); if (!data) goto fail; data->user_page = (void *)get_zeroed_page(GFP_KERNEL); if (!data->user_page) goto fail_user_page; for (i = 0; i < nr_pages; i++) { data->data_pages[i] = (void *)get_zeroed_page(GFP_KERNEL); if (!data->data_pages[i]) goto fail_data_pages; } |
906010b21
|
2016 |
data->data_order = 0; |
7b732a750
|
2017 |
data->nr_pages = nr_pages; |
906010b21
|
2018 |
return data; |
7b732a750
|
2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 |
fail_data_pages: for (i--; i >= 0; i--) free_page((unsigned long)data->data_pages[i]); free_page((unsigned long)data->user_page); fail_user_page: kfree(data); fail: |
906010b21
|
2030 |
return NULL; |
7b732a750
|
2031 |
} |
43a21ea81
|
2032 2033 |
static void perf_mmap_free_page(unsigned long addr) { |
5bfd75609
|
2034 |
struct page *page = virt_to_page((void *)addr); |
43a21ea81
|
2035 2036 2037 2038 |
page->mapping = NULL; __free_page(page); } |
906010b21
|
2039 |
static void perf_mmap_data_free(struct perf_mmap_data *data) |
7b732a750
|
2040 |
{ |
7b732a750
|
2041 |
int i; |
43a21ea81
|
2042 |
perf_mmap_free_page((unsigned long)data->user_page); |
7b732a750
|
2043 |
for (i = 0; i < data->nr_pages; i++) |
43a21ea81
|
2044 |
perf_mmap_free_page((unsigned long)data->data_pages[i]); |
906010b21
|
2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 |
} #else /* * Back perf_mmap() with vmalloc memory. * * Required for architectures that have d-cache aliasing issues. */ static struct page * perf_mmap_to_page(struct perf_mmap_data *data, unsigned long pgoff) { if (pgoff > (1UL << data->data_order)) return NULL; return vmalloc_to_page((void *)data->user_page + pgoff * PAGE_SIZE); } static void perf_mmap_unmark_page(void *addr) { struct page *page = vmalloc_to_page(addr); page->mapping = NULL; } static void perf_mmap_data_free_work(struct work_struct *work) { struct perf_mmap_data *data; void *base; int i, nr; data = container_of(work, struct perf_mmap_data, work); nr = 1 << data->data_order; base = data->user_page; for (i = 0; i < nr + 1; i++) perf_mmap_unmark_page(base + (i * PAGE_SIZE)); vfree(base); } static void perf_mmap_data_free(struct perf_mmap_data *data) { schedule_work(&data->work); } static struct perf_mmap_data * perf_mmap_data_alloc(struct perf_event *event, int nr_pages) { struct perf_mmap_data *data; unsigned long size; void *all_buf; WARN_ON(atomic_read(&event->mmap_count)); |
43a21ea81
|
2100 |
|
906010b21
|
2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 |
size = sizeof(struct perf_mmap_data); size += sizeof(void *); data = kzalloc(size, GFP_KERNEL); if (!data) goto fail; INIT_WORK(&data->work, perf_mmap_data_free_work); all_buf = vmalloc_user((nr_pages + 1) * PAGE_SIZE); if (!all_buf) goto fail_all_buf; data->user_page = all_buf; data->data_pages[0] = all_buf + PAGE_SIZE; data->data_order = ilog2(nr_pages); data->nr_pages = 1; return data; fail_all_buf: kfree(data); fail: return NULL; } #endif static int perf_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) { struct perf_event *event = vma->vm_file->private_data; struct perf_mmap_data *data; int ret = VM_FAULT_SIGBUS; if (vmf->flags & FAULT_FLAG_MKWRITE) { if (vmf->pgoff == 0) ret = 0; return ret; } rcu_read_lock(); data = rcu_dereference(event->data); if (!data) goto unlock; if (vmf->pgoff && (vmf->flags & FAULT_FLAG_WRITE)) goto unlock; vmf->page = perf_mmap_to_page(data, vmf->pgoff); if (!vmf->page) goto unlock; get_page(vmf->page); vmf->page->mapping = vma->vm_file->f_mapping; vmf->page->index = vmf->pgoff; ret = 0; unlock: rcu_read_unlock(); return ret; } static void perf_mmap_data_init(struct perf_event *event, struct perf_mmap_data *data) { long max_size = perf_data_size(data); atomic_set(&data->lock, -1); if (event->attr.watermark) { data->watermark = min_t(long, max_size, event->attr.wakeup_watermark); } if (!data->watermark) data->watermark = max_t(long, PAGE_SIZE, max_size / 2); rcu_assign_pointer(event->data, data); } static void perf_mmap_data_free_rcu(struct rcu_head *rcu_head) { struct perf_mmap_data *data; data = container_of(rcu_head, struct perf_mmap_data, rcu_head); perf_mmap_data_free(data); |
7b732a750
|
2190 2191 |
kfree(data); } |
906010b21
|
2192 |
static void perf_mmap_data_release(struct perf_event *event) |
7b732a750
|
2193 |
{ |
cdd6c482c
|
2194 |
struct perf_mmap_data *data = event->data; |
7b732a750
|
2195 |
|
cdd6c482c
|
2196 |
WARN_ON(atomic_read(&event->mmap_count)); |
7b732a750
|
2197 |
|
cdd6c482c
|
2198 |
rcu_assign_pointer(event->data, NULL); |
906010b21
|
2199 |
call_rcu(&data->rcu_head, perf_mmap_data_free_rcu); |
7b732a750
|
2200 2201 2202 2203 |
} static void perf_mmap_open(struct vm_area_struct *vma) { |
cdd6c482c
|
2204 |
struct perf_event *event = vma->vm_file->private_data; |
7b732a750
|
2205 |
|
cdd6c482c
|
2206 |
atomic_inc(&event->mmap_count); |
7b732a750
|
2207 2208 2209 2210 |
} static void perf_mmap_close(struct vm_area_struct *vma) { |
cdd6c482c
|
2211 |
struct perf_event *event = vma->vm_file->private_data; |
7b732a750
|
2212 |
|
cdd6c482c
|
2213 2214 |
WARN_ON_ONCE(event->ctx->parent_ctx); if (atomic_dec_and_mutex_lock(&event->mmap_count, &event->mmap_mutex)) { |
906010b21
|
2215 |
unsigned long size = perf_data_size(event->data); |
789f90fcf
|
2216 |
struct user_struct *user = current_user(); |
906010b21
|
2217 |
atomic_long_sub((size >> PAGE_SHIFT) + 1, &user->locked_vm); |
cdd6c482c
|
2218 |
vma->vm_mm->locked_vm -= event->data->nr_locked; |
906010b21
|
2219 |
perf_mmap_data_release(event); |
cdd6c482c
|
2220 |
mutex_unlock(&event->mmap_mutex); |
7b732a750
|
2221 |
} |
37d818283
|
2222 |
} |
f0f37e2f7
|
2223 |
static const struct vm_operations_struct perf_mmap_vmops = { |
43a21ea81
|
2224 2225 2226 2227 |
.open = perf_mmap_open, .close = perf_mmap_close, .fault = perf_mmap_fault, .page_mkwrite = perf_mmap_fault, |
37d818283
|
2228 2229 2230 2231 |
}; static int perf_mmap(struct file *file, struct vm_area_struct *vma) { |
cdd6c482c
|
2232 |
struct perf_event *event = file->private_data; |
22a4f650d
|
2233 |
unsigned long user_locked, user_lock_limit; |
789f90fcf
|
2234 |
struct user_struct *user = current_user(); |
22a4f650d
|
2235 |
unsigned long locked, lock_limit; |
906010b21
|
2236 |
struct perf_mmap_data *data; |
7b732a750
|
2237 2238 |
unsigned long vma_size; unsigned long nr_pages; |
789f90fcf
|
2239 |
long user_extra, extra; |
7b732a750
|
2240 |
int ret = 0; |
37d818283
|
2241 |
|
43a21ea81
|
2242 |
if (!(vma->vm_flags & VM_SHARED)) |
37d818283
|
2243 |
return -EINVAL; |
7b732a750
|
2244 2245 2246 |
vma_size = vma->vm_end - vma->vm_start; nr_pages = (vma_size / PAGE_SIZE) - 1; |
7730d8655
|
2247 2248 2249 2250 2251 |
/* * If we have data pages ensure they're a power-of-two number, so we * can do bitmasks instead of modulo. */ if (nr_pages != 0 && !is_power_of_2(nr_pages)) |
37d818283
|
2252 |
return -EINVAL; |
7b732a750
|
2253 |
if (vma_size != PAGE_SIZE * (1 + nr_pages)) |
37d818283
|
2254 |
return -EINVAL; |
7b732a750
|
2255 2256 |
if (vma->vm_pgoff != 0) return -EINVAL; |
37d818283
|
2257 |
|
cdd6c482c
|
2258 2259 2260 |
WARN_ON_ONCE(event->ctx->parent_ctx); mutex_lock(&event->mmap_mutex); if (event->output) { |
a4be7c277
|
2261 2262 2263 |
ret = -EINVAL; goto unlock; } |
cdd6c482c
|
2264 2265 |
if (atomic_inc_not_zero(&event->mmap_count)) { if (nr_pages != event->data->nr_pages) |
ebb3c4c4c
|
2266 2267 2268 |
ret = -EINVAL; goto unlock; } |
789f90fcf
|
2269 |
user_extra = nr_pages + 1; |
cdd6c482c
|
2270 |
user_lock_limit = sysctl_perf_event_mlock >> (PAGE_SHIFT - 10); |
a3862d3f8
|
2271 2272 2273 2274 2275 |
/* * Increase the limit linearly with more CPUs: */ user_lock_limit *= num_online_cpus(); |
789f90fcf
|
2276 |
user_locked = atomic_long_read(&user->locked_vm) + user_extra; |
c5078f78b
|
2277 |
|
789f90fcf
|
2278 2279 2280 |
extra = 0; if (user_locked > user_lock_limit) extra = user_locked - user_lock_limit; |
7b732a750
|
2281 2282 2283 |
lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur; lock_limit >>= PAGE_SHIFT; |
789f90fcf
|
2284 |
locked = vma->vm_mm->locked_vm + extra; |
7b732a750
|
2285 |
|
459ec28ab
|
2286 2287 |
if ((locked > lock_limit) && perf_paranoid_tracepoint_raw() && !capable(CAP_IPC_LOCK)) { |
ebb3c4c4c
|
2288 2289 2290 |
ret = -EPERM; goto unlock; } |
7b732a750
|
2291 |
|
cdd6c482c
|
2292 |
WARN_ON(event->data); |
906010b21
|
2293 2294 2295 2296 |
data = perf_mmap_data_alloc(event, nr_pages); ret = -ENOMEM; if (!data) |
ebb3c4c4c
|
2297 |
goto unlock; |
906010b21
|
2298 2299 |
ret = 0; perf_mmap_data_init(event, data); |
cdd6c482c
|
2300 |
atomic_set(&event->mmap_count, 1); |
789f90fcf
|
2301 |
atomic_long_add(user_extra, &user->locked_vm); |
c5078f78b
|
2302 |
vma->vm_mm->locked_vm += extra; |
cdd6c482c
|
2303 |
event->data->nr_locked = extra; |
43a21ea81
|
2304 |
if (vma->vm_flags & VM_WRITE) |
cdd6c482c
|
2305 |
event->data->writable = 1; |
43a21ea81
|
2306 |
|
ebb3c4c4c
|
2307 |
unlock: |
cdd6c482c
|
2308 |
mutex_unlock(&event->mmap_mutex); |
37d818283
|
2309 |
|
37d818283
|
2310 2311 |
vma->vm_flags |= VM_RESERVED; vma->vm_ops = &perf_mmap_vmops; |
7b732a750
|
2312 2313 |
return ret; |
37d818283
|
2314 |
} |
3c446b3d3
|
2315 2316 |
static int perf_fasync(int fd, struct file *filp, int on) { |
3c446b3d3
|
2317 |
struct inode *inode = filp->f_path.dentry->d_inode; |
cdd6c482c
|
2318 |
struct perf_event *event = filp->private_data; |
3c446b3d3
|
2319 2320 2321 |
int retval; mutex_lock(&inode->i_mutex); |
cdd6c482c
|
2322 |
retval = fasync_helper(fd, filp, on, &event->fasync); |
3c446b3d3
|
2323 2324 2325 2326 2327 2328 2329 |
mutex_unlock(&inode->i_mutex); if (retval < 0) return retval; return 0; } |
0793a61d4
|
2330 2331 2332 2333 |
static const struct file_operations perf_fops = { .release = perf_release, .read = perf_read, .poll = perf_poll, |
d859e29fe
|
2334 2335 |
.unlocked_ioctl = perf_ioctl, .compat_ioctl = perf_ioctl, |
37d818283
|
2336 |
.mmap = perf_mmap, |
3c446b3d3
|
2337 |
.fasync = perf_fasync, |
0793a61d4
|
2338 |
}; |
15dbf27cc
|
2339 |
/* |
cdd6c482c
|
2340 |
* Perf event wakeup |
925d519ab
|
2341 2342 2343 2344 |
* * If there's data, ensure we set the poll() state and publish everything * to user-space before waking everybody up. */ |
cdd6c482c
|
2345 |
void perf_event_wakeup(struct perf_event *event) |
925d519ab
|
2346 |
{ |
cdd6c482c
|
2347 |
wake_up_all(&event->waitq); |
4c9e25428
|
2348 |
|
cdd6c482c
|
2349 2350 2351 |
if (event->pending_kill) { kill_fasync(&event->fasync, SIGIO, event->pending_kill); event->pending_kill = 0; |
4c9e25428
|
2352 |
} |
925d519ab
|
2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 |
} /* * Pending wakeups * * Handle the case where we need to wakeup up from NMI (or rq->lock) context. * * The NMI bit means we cannot possibly take locks. Therefore, maintain a * single linked list and use cmpxchg() to add entries lockless. */ |
cdd6c482c
|
2363 |
static void perf_pending_event(struct perf_pending_entry *entry) |
79f146415
|
2364 |
{ |
cdd6c482c
|
2365 2366 |
struct perf_event *event = container_of(entry, struct perf_event, pending); |
79f146415
|
2367 |
|
cdd6c482c
|
2368 2369 2370 |
if (event->pending_disable) { event->pending_disable = 0; __perf_event_disable(event); |
79f146415
|
2371 |
} |
cdd6c482c
|
2372 2373 2374 |
if (event->pending_wakeup) { event->pending_wakeup = 0; perf_event_wakeup(event); |
79f146415
|
2375 2376 |
} } |
671dec5da
|
2377 |
#define PENDING_TAIL ((struct perf_pending_entry *)-1UL) |
925d519ab
|
2378 |
|
671dec5da
|
2379 |
static DEFINE_PER_CPU(struct perf_pending_entry *, perf_pending_head) = { |
925d519ab
|
2380 2381 |
PENDING_TAIL, }; |
671dec5da
|
2382 2383 |
static void perf_pending_queue(struct perf_pending_entry *entry, void (*func)(struct perf_pending_entry *)) |
925d519ab
|
2384 |
{ |
671dec5da
|
2385 |
struct perf_pending_entry **head; |
925d519ab
|
2386 |
|
671dec5da
|
2387 |
if (cmpxchg(&entry->next, NULL, PENDING_TAIL) != NULL) |
925d519ab
|
2388 |
return; |
671dec5da
|
2389 2390 2391 |
entry->func = func; head = &get_cpu_var(perf_pending_head); |
925d519ab
|
2392 2393 |
do { |
671dec5da
|
2394 2395 |
entry->next = *head; } while (cmpxchg(head, entry->next, entry) != entry->next); |
925d519ab
|
2396 |
|
cdd6c482c
|
2397 |
set_perf_event_pending(); |
925d519ab
|
2398 |
|
671dec5da
|
2399 |
put_cpu_var(perf_pending_head); |
925d519ab
|
2400 2401 2402 2403 |
} static int __perf_pending_run(void) { |
671dec5da
|
2404 |
struct perf_pending_entry *list; |
925d519ab
|
2405 |
int nr = 0; |
671dec5da
|
2406 |
list = xchg(&__get_cpu_var(perf_pending_head), PENDING_TAIL); |
925d519ab
|
2407 |
while (list != PENDING_TAIL) { |
671dec5da
|
2408 2409 |
void (*func)(struct perf_pending_entry *); struct perf_pending_entry *entry = list; |
925d519ab
|
2410 2411 |
list = list->next; |
671dec5da
|
2412 2413 |
func = entry->func; entry->next = NULL; |
925d519ab
|
2414 2415 2416 2417 2418 2419 |
/* * Ensure we observe the unqueue before we issue the wakeup, * so that we won't be waiting forever. * -- see perf_not_pending(). */ smp_wmb(); |
671dec5da
|
2420 |
func(entry); |
925d519ab
|
2421 2422 2423 2424 2425 |
nr++; } return nr; } |
cdd6c482c
|
2426 |
static inline int perf_not_pending(struct perf_event *event) |
925d519ab
|
2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 |
{ /* * If we flush on whatever cpu we run, there is a chance we don't * need to wait. */ get_cpu(); __perf_pending_run(); put_cpu(); /* * Ensure we see the proper queue state before going to sleep * so that we do not miss the wakeup. -- see perf_pending_handle() */ smp_rmb(); |
cdd6c482c
|
2441 |
return event->pending.next == NULL; |
925d519ab
|
2442 |
} |
cdd6c482c
|
2443 |
static void perf_pending_sync(struct perf_event *event) |
925d519ab
|
2444 |
{ |
cdd6c482c
|
2445 |
wait_event(event->waitq, perf_not_pending(event)); |
925d519ab
|
2446 |
} |
cdd6c482c
|
2447 |
void perf_event_do_pending(void) |
925d519ab
|
2448 2449 2450 2451 2452 |
{ __perf_pending_run(); } /* |
394ee0762
|
2453 2454 |
* Callchain support -- arch specific */ |
9c03d88e3
|
2455 |
__weak struct perf_callchain_entry *perf_callchain(struct pt_regs *regs) |
394ee0762
|
2456 2457 2458 2459 2460 |
{ return NULL; } /* |
0322cd6ec
|
2461 2462 |
* Output */ |
2667de81f
|
2463 2464 |
static bool perf_output_space(struct perf_mmap_data *data, unsigned long tail, unsigned long offset, unsigned long head) |
43a21ea81
|
2465 |
{ |
43a21ea81
|
2466 2467 2468 2469 |
unsigned long mask; if (!data->writable) return true; |
906010b21
|
2470 |
mask = perf_data_size(data) - 1; |
43a21ea81
|
2471 2472 2473 2474 2475 2476 2477 2478 2479 |
offset = (offset - tail) & mask; head = (head - tail) & mask; if ((int)(head - offset) < 0) return false; return true; } |
c33a0bc4e
|
2480 |
static void perf_output_wakeup(struct perf_output_handle *handle) |
78d613eb1
|
2481 |
{ |
c33a0bc4e
|
2482 |
atomic_set(&handle->data->poll, POLL_IN); |
671dec5da
|
2483 |
if (handle->nmi) { |
cdd6c482c
|
2484 2485 2486 |
handle->event->pending_wakeup = 1; perf_pending_queue(&handle->event->pending, perf_pending_event); |
671dec5da
|
2487 |
} else |
cdd6c482c
|
2488 |
perf_event_wakeup(handle->event); |
78d613eb1
|
2489 |
} |
c33a0bc4e
|
2490 2491 2492 |
/* * Curious locking construct. * |
cdd6c482c
|
2493 2494 |
* We need to ensure a later event_id doesn't publish a head when a former * event_id isn't done writing. However since we need to deal with NMIs we |
c33a0bc4e
|
2495 2496 2497 2498 2499 2500 |
* cannot fully serialize things. * * What we do is serialize between CPUs so we only have to deal with NMI * nesting on a single CPU. * * We only publish the head (and generate a wakeup) when the outer-most |
cdd6c482c
|
2501 |
* event_id completes. |
c33a0bc4e
|
2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 |
*/ static void perf_output_lock(struct perf_output_handle *handle) { struct perf_mmap_data *data = handle->data; int cpu; handle->locked = 0; local_irq_save(handle->flags); cpu = smp_processor_id(); if (in_nmi() && atomic_read(&data->lock) == cpu) return; |
22c1558e5
|
2515 |
while (atomic_cmpxchg(&data->lock, -1, cpu) != -1) |
c33a0bc4e
|
2516 2517 2518 2519 2520 2521 2522 2523 |
cpu_relax(); handle->locked = 1; } static void perf_output_unlock(struct perf_output_handle *handle) { struct perf_mmap_data *data = handle->data; |
8e3747c13
|
2524 2525 |
unsigned long head; int cpu; |
c33a0bc4e
|
2526 |
|
c66de4a5b
|
2527 |
data->done_head = data->head; |
c33a0bc4e
|
2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 |
if (!handle->locked) goto out; again: /* * The xchg implies a full barrier that ensures all writes are done * before we publish the new head, matched by a rmb() in userspace when * reading this position. */ |
8e3747c13
|
2538 |
while ((head = atomic_long_xchg(&data->done_head, 0))) |
c33a0bc4e
|
2539 |
data->user_page->data_head = head; |
c33a0bc4e
|
2540 2541 |
/* |
c66de4a5b
|
2542 |
* NMI can happen here, which means we can miss a done_head update. |
c33a0bc4e
|
2543 |
*/ |
22c1558e5
|
2544 |
cpu = atomic_xchg(&data->lock, -1); |
c33a0bc4e
|
2545 2546 2547 2548 2549 |
WARN_ON_ONCE(cpu != smp_processor_id()); /* * Therefore we have to validate we did not indeed do so. */ |
8e3747c13
|
2550 |
if (unlikely(atomic_long_read(&data->done_head))) { |
c33a0bc4e
|
2551 2552 2553 |
/* * Since we had it locked, we can lock it again. */ |
22c1558e5
|
2554 |
while (atomic_cmpxchg(&data->lock, -1, cpu) != -1) |
c33a0bc4e
|
2555 2556 2557 2558 |
cpu_relax(); goto again; } |
c66de4a5b
|
2559 |
if (atomic_xchg(&data->wakeup, 0)) |
c33a0bc4e
|
2560 2561 2562 2563 |
perf_output_wakeup(handle); out: local_irq_restore(handle->flags); } |
5622f295b
|
2564 2565 |
void perf_output_copy(struct perf_output_handle *handle, const void *buf, unsigned int len) |
43a21ea81
|
2566 2567 |
{ unsigned int pages_mask; |
906010b21
|
2568 |
unsigned long offset; |
43a21ea81
|
2569 2570 2571 2572 2573 2574 2575 2576 |
unsigned int size; void **pages; offset = handle->offset; pages_mask = handle->data->nr_pages - 1; pages = handle->data->data_pages; do { |
906010b21
|
2577 2578 |
unsigned long page_offset; unsigned long page_size; |
43a21ea81
|
2579 2580 2581 |
int nr; nr = (offset >> PAGE_SHIFT) & pages_mask; |
906010b21
|
2582 2583 2584 |
page_size = 1UL << (handle->data->data_order + PAGE_SHIFT); page_offset = offset & (page_size - 1); size = min_t(unsigned int, page_size - page_offset, len); |
43a21ea81
|
2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 |
memcpy(pages[nr] + page_offset, buf, size); len -= size; buf += size; offset += size; } while (len); handle->offset = offset; /* * Check we didn't copy past our reservation window, taking the * possible unsigned int wrap into account. */ WARN_ON_ONCE(((long)(handle->head - handle->offset)) < 0); } |
5622f295b
|
2601 |
int perf_output_begin(struct perf_output_handle *handle, |
cdd6c482c
|
2602 |
struct perf_event *event, unsigned int size, |
5622f295b
|
2603 |
int nmi, int sample) |
0322cd6ec
|
2604 |
{ |
cdd6c482c
|
2605 |
struct perf_event *output_event; |
7b732a750
|
2606 |
struct perf_mmap_data *data; |
2667de81f
|
2607 |
unsigned long tail, offset, head; |
43a21ea81
|
2608 2609 2610 2611 2612 2613 |
int have_lost; struct { struct perf_event_header header; u64 id; u64 lost; } lost_event; |
0322cd6ec
|
2614 |
|
a4be7c277
|
2615 |
rcu_read_lock(); |
2023b3592
|
2616 |
/* |
cdd6c482c
|
2617 |
* For inherited events we send all the output towards the parent. |
2023b3592
|
2618 |
*/ |
cdd6c482c
|
2619 2620 |
if (event->parent) event = event->parent; |
2023b3592
|
2621 |
|
cdd6c482c
|
2622 2623 2624 |
output_event = rcu_dereference(event->output); if (output_event) event = output_event; |
a4be7c277
|
2625 |
|
cdd6c482c
|
2626 |
data = rcu_dereference(event->data); |
7b732a750
|
2627 2628 |
if (!data) goto out; |
43a21ea81
|
2629 |
handle->data = data; |
cdd6c482c
|
2630 |
handle->event = event; |
43a21ea81
|
2631 2632 |
handle->nmi = nmi; handle->sample = sample; |
78d613eb1
|
2633 |
|
7b732a750
|
2634 |
if (!data->nr_pages) |
78d613eb1
|
2635 |
goto fail; |
7b732a750
|
2636 |
|
43a21ea81
|
2637 2638 2639 |
have_lost = atomic_read(&data->lost); if (have_lost) size += sizeof(lost_event); |
c33a0bc4e
|
2640 |
perf_output_lock(handle); |
7b732a750
|
2641 |
do { |
2667de81f
|
2642 2643 2644 2645 2646 2647 2648 |
/* * Userspace could choose to issue a mb() before updating the * tail pointer. So that all reads will be completed before the * write is issued. */ tail = ACCESS_ONCE(data->user_page->data_tail); smp_rmb(); |
6dc5f2a41
|
2649 |
offset = head = atomic_long_read(&data->head); |
c7138f37f
|
2650 |
head += size; |
2667de81f
|
2651 |
if (unlikely(!perf_output_space(data, tail, offset, head))) |
43a21ea81
|
2652 |
goto fail; |
8e3747c13
|
2653 |
} while (atomic_long_cmpxchg(&data->head, offset, head) != offset); |
7b732a750
|
2654 |
|
b9cacc7bf
|
2655 |
handle->offset = offset; |
63e35b25d
|
2656 |
handle->head = head; |
c66de4a5b
|
2657 |
|
2667de81f
|
2658 |
if (head - tail > data->watermark) |
c66de4a5b
|
2659 |
atomic_set(&data->wakeup, 1); |
0322cd6ec
|
2660 |
|
43a21ea81
|
2661 |
if (have_lost) { |
cdd6c482c
|
2662 |
lost_event.header.type = PERF_RECORD_LOST; |
43a21ea81
|
2663 2664 |
lost_event.header.misc = 0; lost_event.header.size = sizeof(lost_event); |
cdd6c482c
|
2665 |
lost_event.id = event->id; |
43a21ea81
|
2666 2667 2668 2669 |
lost_event.lost = atomic_xchg(&data->lost, 0); perf_output_put(handle, lost_event); } |
b9cacc7bf
|
2670 |
return 0; |
7b732a750
|
2671 |
|
78d613eb1
|
2672 |
fail: |
43a21ea81
|
2673 2674 |
atomic_inc(&data->lost); perf_output_unlock(handle); |
b9cacc7bf
|
2675 2676 |
out: rcu_read_unlock(); |
7b732a750
|
2677 |
|
b9cacc7bf
|
2678 2679 |
return -ENOSPC; } |
7b732a750
|
2680 |
|
5622f295b
|
2681 |
void perf_output_end(struct perf_output_handle *handle) |
b9cacc7bf
|
2682 |
{ |
cdd6c482c
|
2683 |
struct perf_event *event = handle->event; |
c33a0bc4e
|
2684 |
struct perf_mmap_data *data = handle->data; |
cdd6c482c
|
2685 |
int wakeup_events = event->attr.wakeup_events; |
c457810ab
|
2686 |
|
43a21ea81
|
2687 |
if (handle->sample && wakeup_events) { |
c33a0bc4e
|
2688 |
int events = atomic_inc_return(&data->events); |
c457810ab
|
2689 |
if (events >= wakeup_events) { |
c33a0bc4e
|
2690 |
atomic_sub(wakeup_events, &data->events); |
c66de4a5b
|
2691 |
atomic_set(&data->wakeup, 1); |
c457810ab
|
2692 |
} |
c33a0bc4e
|
2693 2694 2695 |
} perf_output_unlock(handle); |
7b732a750
|
2696 |
rcu_read_unlock(); |
b9cacc7bf
|
2697 |
} |
cdd6c482c
|
2698 |
static u32 perf_event_pid(struct perf_event *event, struct task_struct *p) |
709e50cf8
|
2699 2700 |
{ /* |
cdd6c482c
|
2701 |
* only top level events have the pid namespace they were created in |
709e50cf8
|
2702 |
*/ |
cdd6c482c
|
2703 2704 |
if (event->parent) event = event->parent; |
709e50cf8
|
2705 |
|
cdd6c482c
|
2706 |
return task_tgid_nr_ns(p, event->ns); |
709e50cf8
|
2707 |
} |
cdd6c482c
|
2708 |
static u32 perf_event_tid(struct perf_event *event, struct task_struct *p) |
709e50cf8
|
2709 2710 |
{ /* |
cdd6c482c
|
2711 |
* only top level events have the pid namespace they were created in |
709e50cf8
|
2712 |
*/ |
cdd6c482c
|
2713 2714 |
if (event->parent) event = event->parent; |
709e50cf8
|
2715 |
|
cdd6c482c
|
2716 |
return task_pid_nr_ns(p, event->ns); |
709e50cf8
|
2717 |
} |
3dab77fb1
|
2718 |
static void perf_output_read_one(struct perf_output_handle *handle, |
cdd6c482c
|
2719 |
struct perf_event *event) |
3dab77fb1
|
2720 |
{ |
cdd6c482c
|
2721 |
u64 read_format = event->attr.read_format; |
3dab77fb1
|
2722 2723 |
u64 values[4]; int n = 0; |
cdd6c482c
|
2724 |
values[n++] = atomic64_read(&event->count); |
3dab77fb1
|
2725 |
if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) { |
cdd6c482c
|
2726 2727 |
values[n++] = event->total_time_enabled + atomic64_read(&event->child_total_time_enabled); |
3dab77fb1
|
2728 2729 |
} if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) { |
cdd6c482c
|
2730 2731 |
values[n++] = event->total_time_running + atomic64_read(&event->child_total_time_running); |
3dab77fb1
|
2732 2733 |
} if (read_format & PERF_FORMAT_ID) |
cdd6c482c
|
2734 |
values[n++] = primary_event_id(event); |
3dab77fb1
|
2735 2736 2737 2738 2739 |
perf_output_copy(handle, values, n * sizeof(u64)); } /* |
cdd6c482c
|
2740 |
* XXX PERF_FORMAT_GROUP vs inherited events seems difficult. |
3dab77fb1
|
2741 2742 |
*/ static void perf_output_read_group(struct perf_output_handle *handle, |
cdd6c482c
|
2743 |
struct perf_event *event) |
3dab77fb1
|
2744 |
{ |
cdd6c482c
|
2745 2746 |
struct perf_event *leader = event->group_leader, *sub; u64 read_format = event->attr.read_format; |
3dab77fb1
|
2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 |
u64 values[5]; int n = 0; values[n++] = 1 + leader->nr_siblings; if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) values[n++] = leader->total_time_enabled; if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) values[n++] = leader->total_time_running; |
cdd6c482c
|
2757 |
if (leader != event) |
3dab77fb1
|
2758 2759 2760 2761 |
leader->pmu->read(leader); values[n++] = atomic64_read(&leader->count); if (read_format & PERF_FORMAT_ID) |
cdd6c482c
|
2762 |
values[n++] = primary_event_id(leader); |
3dab77fb1
|
2763 2764 |
perf_output_copy(handle, values, n * sizeof(u64)); |
65abc8653
|
2765 |
list_for_each_entry(sub, &leader->sibling_list, group_entry) { |
3dab77fb1
|
2766 |
n = 0; |
cdd6c482c
|
2767 |
if (sub != event) |
3dab77fb1
|
2768 2769 2770 2771 |
sub->pmu->read(sub); values[n++] = atomic64_read(&sub->count); if (read_format & PERF_FORMAT_ID) |
cdd6c482c
|
2772 |
values[n++] = primary_event_id(sub); |
3dab77fb1
|
2773 2774 2775 2776 2777 2778 |
perf_output_copy(handle, values, n * sizeof(u64)); } } static void perf_output_read(struct perf_output_handle *handle, |
cdd6c482c
|
2779 |
struct perf_event *event) |
3dab77fb1
|
2780 |
{ |
cdd6c482c
|
2781 2782 |
if (event->attr.read_format & PERF_FORMAT_GROUP) perf_output_read_group(handle, event); |
3dab77fb1
|
2783 |
else |
cdd6c482c
|
2784 |
perf_output_read_one(handle, event); |
3dab77fb1
|
2785 |
} |
5622f295b
|
2786 2787 2788 |
void perf_output_sample(struct perf_output_handle *handle, struct perf_event_header *header, struct perf_sample_data *data, |
cdd6c482c
|
2789 |
struct perf_event *event) |
5622f295b
|
2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 |
{ u64 sample_type = data->type; perf_output_put(handle, *header); if (sample_type & PERF_SAMPLE_IP) perf_output_put(handle, data->ip); if (sample_type & PERF_SAMPLE_TID) perf_output_put(handle, data->tid_entry); if (sample_type & PERF_SAMPLE_TIME) perf_output_put(handle, data->time); if (sample_type & PERF_SAMPLE_ADDR) perf_output_put(handle, data->addr); if (sample_type & PERF_SAMPLE_ID) perf_output_put(handle, data->id); if (sample_type & PERF_SAMPLE_STREAM_ID) perf_output_put(handle, data->stream_id); if (sample_type & PERF_SAMPLE_CPU) perf_output_put(handle, data->cpu_entry); if (sample_type & PERF_SAMPLE_PERIOD) perf_output_put(handle, data->period); if (sample_type & PERF_SAMPLE_READ) |
cdd6c482c
|
2820 |
perf_output_read(handle, event); |
5622f295b
|
2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 |
if (sample_type & PERF_SAMPLE_CALLCHAIN) { if (data->callchain) { int size = 1; if (data->callchain) size += data->callchain->nr; size *= sizeof(u64); perf_output_copy(handle, data->callchain, size); } else { u64 nr = 0; perf_output_put(handle, nr); } } if (sample_type & PERF_SAMPLE_RAW) { if (data->raw) { perf_output_put(handle, data->raw->size); perf_output_copy(handle, data->raw->data, data->raw->size); } else { struct { u32 size; u32 data; } raw = { .size = sizeof(u32), .data = 0, }; perf_output_put(handle, raw); } } } void perf_prepare_sample(struct perf_event_header *header, struct perf_sample_data *data, |
cdd6c482c
|
2858 |
struct perf_event *event, |
5622f295b
|
2859 |
struct pt_regs *regs) |
7b732a750
|
2860 |
{ |
cdd6c482c
|
2861 |
u64 sample_type = event->attr.sample_type; |
7b732a750
|
2862 |
|
5622f295b
|
2863 |
data->type = sample_type; |
7b732a750
|
2864 |
|
cdd6c482c
|
2865 |
header->type = PERF_RECORD_SAMPLE; |
5622f295b
|
2866 2867 2868 2869 |
header->size = sizeof(*header); header->misc = 0; header->misc |= perf_misc_flags(regs); |
6fab01927
|
2870 |
|
b23f3325e
|
2871 |
if (sample_type & PERF_SAMPLE_IP) { |
5622f295b
|
2872 2873 2874 |
data->ip = perf_instruction_pointer(regs); header->size += sizeof(data->ip); |
8a057d849
|
2875 |
} |
ea5d20cf9
|
2876 |
|
b23f3325e
|
2877 |
if (sample_type & PERF_SAMPLE_TID) { |
ea5d20cf9
|
2878 |
/* namespace issues */ |
cdd6c482c
|
2879 2880 |
data->tid_entry.pid = perf_event_pid(event, current); data->tid_entry.tid = perf_event_tid(event, current); |
5ed00415e
|
2881 |
|
5622f295b
|
2882 |
header->size += sizeof(data->tid_entry); |
5ed00415e
|
2883 |
} |
b23f3325e
|
2884 |
if (sample_type & PERF_SAMPLE_TIME) { |
def0a9b25
|
2885 |
data->time = perf_clock(); |
4d855457d
|
2886 |
|
5622f295b
|
2887 |
header->size += sizeof(data->time); |
4d855457d
|
2888 |
} |
e6e18ec79
|
2889 |
if (sample_type & PERF_SAMPLE_ADDR) |
5622f295b
|
2890 |
header->size += sizeof(data->addr); |
78f13e952
|
2891 |
|
5622f295b
|
2892 |
if (sample_type & PERF_SAMPLE_ID) { |
cdd6c482c
|
2893 |
data->id = primary_event_id(event); |
a85f61abe
|
2894 |
|
5622f295b
|
2895 2896 2897 2898 |
header->size += sizeof(data->id); } if (sample_type & PERF_SAMPLE_STREAM_ID) { |
cdd6c482c
|
2899 |
data->stream_id = event->id; |
5622f295b
|
2900 2901 2902 |
header->size += sizeof(data->stream_id); } |
7f453c24b
|
2903 |
|
b23f3325e
|
2904 |
if (sample_type & PERF_SAMPLE_CPU) { |
5622f295b
|
2905 2906 |
data->cpu_entry.cpu = raw_smp_processor_id(); data->cpu_entry.reserved = 0; |
f370e1e2f
|
2907 |
|
5622f295b
|
2908 |
header->size += sizeof(data->cpu_entry); |
f370e1e2f
|
2909 |
} |
e6e18ec79
|
2910 |
if (sample_type & PERF_SAMPLE_PERIOD) |
5622f295b
|
2911 |
header->size += sizeof(data->period); |
689802b2d
|
2912 |
|
3dab77fb1
|
2913 |
if (sample_type & PERF_SAMPLE_READ) |
cdd6c482c
|
2914 |
header->size += perf_event_read_size(event); |
8a057d849
|
2915 |
|
b23f3325e
|
2916 |
if (sample_type & PERF_SAMPLE_CALLCHAIN) { |
5622f295b
|
2917 |
int size = 1; |
394ee0762
|
2918 |
|
5622f295b
|
2919 2920 2921 2922 2923 2924 |
data->callchain = perf_callchain(regs); if (data->callchain) size += data->callchain->nr; header->size += size * sizeof(u64); |
394ee0762
|
2925 |
} |
3a43ce68a
|
2926 |
if (sample_type & PERF_SAMPLE_RAW) { |
a044560c3
|
2927 2928 2929 2930 2931 2932 2933 2934 |
int size = sizeof(u32); if (data->raw) size += data->raw->size; else size += sizeof(u32); WARN_ON_ONCE(size & (sizeof(u64)-1)); |
5622f295b
|
2935 |
header->size += size; |
7f453c24b
|
2936 |
} |
5622f295b
|
2937 |
} |
7f453c24b
|
2938 |
|
cdd6c482c
|
2939 |
static void perf_event_output(struct perf_event *event, int nmi, |
5622f295b
|
2940 2941 2942 2943 2944 |
struct perf_sample_data *data, struct pt_regs *regs) { struct perf_output_handle handle; struct perf_event_header header; |
689802b2d
|
2945 |
|
cdd6c482c
|
2946 |
perf_prepare_sample(&header, data, event, regs); |
5c1481943
|
2947 |
|
cdd6c482c
|
2948 |
if (perf_output_begin(&handle, event, header.size, nmi, 1)) |
5622f295b
|
2949 |
return; |
0322cd6ec
|
2950 |
|
cdd6c482c
|
2951 |
perf_output_sample(&handle, &header, data, event); |
f413cdb80
|
2952 |
|
8a057d849
|
2953 |
perf_output_end(&handle); |
0322cd6ec
|
2954 2955 2956 |
} /* |
cdd6c482c
|
2957 |
* read event_id |
38b200d67
|
2958 2959 2960 2961 2962 2963 2964 |
*/ struct perf_read_event { struct perf_event_header header; u32 pid; u32 tid; |
38b200d67
|
2965 2966 2967 |
}; static void |
cdd6c482c
|
2968 |
perf_event_read_event(struct perf_event *event, |
38b200d67
|
2969 2970 2971 |
struct task_struct *task) { struct perf_output_handle handle; |
dfc65094d
|
2972 |
struct perf_read_event read_event = { |
38b200d67
|
2973 |
.header = { |
cdd6c482c
|
2974 |
.type = PERF_RECORD_READ, |
38b200d67
|
2975 |
.misc = 0, |
cdd6c482c
|
2976 |
.size = sizeof(read_event) + perf_event_read_size(event), |
38b200d67
|
2977 |
}, |
cdd6c482c
|
2978 2979 |
.pid = perf_event_pid(event, task), .tid = perf_event_tid(event, task), |
38b200d67
|
2980 |
}; |
3dab77fb1
|
2981 |
int ret; |
38b200d67
|
2982 |
|
cdd6c482c
|
2983 |
ret = perf_output_begin(&handle, event, read_event.header.size, 0, 0); |
38b200d67
|
2984 2985 |
if (ret) return; |
dfc65094d
|
2986 |
perf_output_put(&handle, read_event); |
cdd6c482c
|
2987 |
perf_output_read(&handle, event); |
3dab77fb1
|
2988 |
|
38b200d67
|
2989 2990 2991 2992 |
perf_output_end(&handle); } /* |
9f498cc5b
|
2993 2994 2995 |
* task tracking -- fork/exit * * enabled by: attr.comm | attr.mmap | attr.task |
60313ebed
|
2996 |
*/ |
9f498cc5b
|
2997 |
struct perf_task_event { |
3a80b4a35
|
2998 |
struct task_struct *task; |
cdd6c482c
|
2999 |
struct perf_event_context *task_ctx; |
60313ebed
|
3000 3001 3002 3003 3004 3005 |
struct { struct perf_event_header header; u32 pid; u32 ppid; |
9f498cc5b
|
3006 3007 |
u32 tid; u32 ptid; |
393b2ad8c
|
3008 |
u64 time; |
cdd6c482c
|
3009 |
} event_id; |
60313ebed
|
3010 |
}; |
cdd6c482c
|
3011 |
static void perf_event_task_output(struct perf_event *event, |
9f498cc5b
|
3012 |
struct perf_task_event *task_event) |
60313ebed
|
3013 3014 |
{ struct perf_output_handle handle; |
393b2ad8c
|
3015 |
int size; |
9f498cc5b
|
3016 |
struct task_struct *task = task_event->task; |
393b2ad8c
|
3017 |
int ret; |
cdd6c482c
|
3018 3019 |
size = task_event->event_id.header.size; ret = perf_output_begin(&handle, event, size, 0, 0); |
60313ebed
|
3020 3021 3022 |
if (ret) return; |
cdd6c482c
|
3023 3024 |
task_event->event_id.pid = perf_event_pid(event, task); task_event->event_id.ppid = perf_event_pid(event, current); |
60313ebed
|
3025 |
|
cdd6c482c
|
3026 3027 |
task_event->event_id.tid = perf_event_tid(event, task); task_event->event_id.ptid = perf_event_tid(event, current); |
9f498cc5b
|
3028 |
|
cdd6c482c
|
3029 |
task_event->event_id.time = perf_clock(); |
393b2ad8c
|
3030 |
|
cdd6c482c
|
3031 |
perf_output_put(&handle, task_event->event_id); |
393b2ad8c
|
3032 |
|
60313ebed
|
3033 3034 |
perf_output_end(&handle); } |
cdd6c482c
|
3035 |
static int perf_event_task_match(struct perf_event *event) |
60313ebed
|
3036 |
{ |
cdd6c482c
|
3037 |
if (event->attr.comm || event->attr.mmap || event->attr.task) |
60313ebed
|
3038 3039 3040 3041 |
return 1; return 0; } |
cdd6c482c
|
3042 |
static void perf_event_task_ctx(struct perf_event_context *ctx, |
9f498cc5b
|
3043 |
struct perf_task_event *task_event) |
60313ebed
|
3044 |
{ |
cdd6c482c
|
3045 |
struct perf_event *event; |
60313ebed
|
3046 3047 3048 3049 3050 |
if (system_state != SYSTEM_RUNNING || list_empty(&ctx->event_list)) return; rcu_read_lock(); |
cdd6c482c
|
3051 3052 3053 |
list_for_each_entry_rcu(event, &ctx->event_list, event_entry) { if (perf_event_task_match(event)) perf_event_task_output(event, task_event); |
60313ebed
|
3054 3055 3056 |
} rcu_read_unlock(); } |
cdd6c482c
|
3057 |
static void perf_event_task_event(struct perf_task_event *task_event) |
60313ebed
|
3058 3059 |
{ struct perf_cpu_context *cpuctx; |
cdd6c482c
|
3060 |
struct perf_event_context *ctx = task_event->task_ctx; |
60313ebed
|
3061 3062 |
cpuctx = &get_cpu_var(perf_cpu_context); |
cdd6c482c
|
3063 |
perf_event_task_ctx(&cpuctx->ctx, task_event); |
60313ebed
|
3064 3065 3066 |
put_cpu_var(perf_cpu_context); rcu_read_lock(); |
3a80b4a35
|
3067 |
if (!ctx) |
cdd6c482c
|
3068 |
ctx = rcu_dereference(task_event->task->perf_event_ctxp); |
60313ebed
|
3069 |
if (ctx) |
cdd6c482c
|
3070 |
perf_event_task_ctx(ctx, task_event); |
60313ebed
|
3071 3072 |
rcu_read_unlock(); } |
cdd6c482c
|
3073 3074 |
static void perf_event_task(struct task_struct *task, struct perf_event_context *task_ctx, |
3a80b4a35
|
3075 |
int new) |
60313ebed
|
3076 |
{ |
9f498cc5b
|
3077 |
struct perf_task_event task_event; |
60313ebed
|
3078 |
|
cdd6c482c
|
3079 3080 3081 |
if (!atomic_read(&nr_comm_events) && !atomic_read(&nr_mmap_events) && !atomic_read(&nr_task_events)) |
60313ebed
|
3082 |
return; |
9f498cc5b
|
3083 |
task_event = (struct perf_task_event){ |
3a80b4a35
|
3084 3085 |
.task = task, .task_ctx = task_ctx, |
cdd6c482c
|
3086 |
.event_id = { |
60313ebed
|
3087 |
.header = { |
cdd6c482c
|
3088 |
.type = new ? PERF_RECORD_FORK : PERF_RECORD_EXIT, |
573402db0
|
3089 |
.misc = 0, |
cdd6c482c
|
3090 |
.size = sizeof(task_event.event_id), |
60313ebed
|
3091 |
}, |
573402db0
|
3092 3093 |
/* .pid */ /* .ppid */ |
9f498cc5b
|
3094 3095 |
/* .tid */ /* .ptid */ |
60313ebed
|
3096 3097 |
}, }; |
cdd6c482c
|
3098 |
perf_event_task_event(&task_event); |
9f498cc5b
|
3099 |
} |
cdd6c482c
|
3100 |
void perf_event_fork(struct task_struct *task) |
9f498cc5b
|
3101 |
{ |
cdd6c482c
|
3102 |
perf_event_task(task, NULL, 1); |
60313ebed
|
3103 3104 3105 |
} /* |
8d1b2d936
|
3106 3107 3108 3109 |
* comm tracking */ struct perf_comm_event { |
22a4f650d
|
3110 3111 |
struct task_struct *task; char *comm; |
8d1b2d936
|
3112 3113 3114 3115 3116 3117 3118 |
int comm_size; struct { struct perf_event_header header; u32 pid; u32 tid; |
cdd6c482c
|
3119 |
} event_id; |
8d1b2d936
|
3120 |
}; |
cdd6c482c
|
3121 |
static void perf_event_comm_output(struct perf_event *event, |
8d1b2d936
|
3122 3123 3124 |
struct perf_comm_event *comm_event) { struct perf_output_handle handle; |
cdd6c482c
|
3125 3126 |
int size = comm_event->event_id.header.size; int ret = perf_output_begin(&handle, event, size, 0, 0); |
8d1b2d936
|
3127 3128 3129 |
if (ret) return; |
cdd6c482c
|
3130 3131 |
comm_event->event_id.pid = perf_event_pid(event, comm_event->task); comm_event->event_id.tid = perf_event_tid(event, comm_event->task); |
709e50cf8
|
3132 |
|
cdd6c482c
|
3133 |
perf_output_put(&handle, comm_event->event_id); |
8d1b2d936
|
3134 3135 3136 3137 |
perf_output_copy(&handle, comm_event->comm, comm_event->comm_size); perf_output_end(&handle); } |
cdd6c482c
|
3138 |
static int perf_event_comm_match(struct perf_event *event) |
8d1b2d936
|
3139 |
{ |
cdd6c482c
|
3140 |
if (event->attr.comm) |
8d1b2d936
|
3141 3142 3143 3144 |
return 1; return 0; } |
cdd6c482c
|
3145 |
static void perf_event_comm_ctx(struct perf_event_context *ctx, |
8d1b2d936
|
3146 3147 |
struct perf_comm_event *comm_event) { |
cdd6c482c
|
3148 |
struct perf_event *event; |
8d1b2d936
|
3149 3150 3151 3152 3153 |
if (system_state != SYSTEM_RUNNING || list_empty(&ctx->event_list)) return; rcu_read_lock(); |
cdd6c482c
|
3154 3155 3156 |
list_for_each_entry_rcu(event, &ctx->event_list, event_entry) { if (perf_event_comm_match(event)) perf_event_comm_output(event, comm_event); |
8d1b2d936
|
3157 3158 3159 |
} rcu_read_unlock(); } |
cdd6c482c
|
3160 |
static void perf_event_comm_event(struct perf_comm_event *comm_event) |
8d1b2d936
|
3161 3162 |
{ struct perf_cpu_context *cpuctx; |
cdd6c482c
|
3163 |
struct perf_event_context *ctx; |
8d1b2d936
|
3164 |
unsigned int size; |
413ee3b48
|
3165 |
char comm[TASK_COMM_LEN]; |
8d1b2d936
|
3166 |
|
413ee3b48
|
3167 3168 |
memset(comm, 0, sizeof(comm)); strncpy(comm, comm_event->task->comm, sizeof(comm)); |
888fcee06
|
3169 |
size = ALIGN(strlen(comm)+1, sizeof(u64)); |
8d1b2d936
|
3170 3171 3172 |
comm_event->comm = comm; comm_event->comm_size = size; |
cdd6c482c
|
3173 |
comm_event->event_id.header.size = sizeof(comm_event->event_id) + size; |
8d1b2d936
|
3174 3175 |
cpuctx = &get_cpu_var(perf_cpu_context); |
cdd6c482c
|
3176 |
perf_event_comm_ctx(&cpuctx->ctx, comm_event); |
8d1b2d936
|
3177 |
put_cpu_var(perf_cpu_context); |
665c2142a
|
3178 3179 3180 3181 3182 3183 |
rcu_read_lock(); /* * doesn't really matter which of the child contexts the * events ends up in. */ |
cdd6c482c
|
3184 |
ctx = rcu_dereference(current->perf_event_ctxp); |
665c2142a
|
3185 |
if (ctx) |
cdd6c482c
|
3186 |
perf_event_comm_ctx(ctx, comm_event); |
665c2142a
|
3187 |
rcu_read_unlock(); |
8d1b2d936
|
3188 |
} |
cdd6c482c
|
3189 |
void perf_event_comm(struct task_struct *task) |
8d1b2d936
|
3190 |
{ |
9ee318a78
|
3191 |
struct perf_comm_event comm_event; |
cdd6c482c
|
3192 3193 |
if (task->perf_event_ctxp) perf_event_enable_on_exec(task); |
57e7986ed
|
3194 |
|
cdd6c482c
|
3195 |
if (!atomic_read(&nr_comm_events)) |
9ee318a78
|
3196 |
return; |
a63eaf34a
|
3197 |
|
9ee318a78
|
3198 |
comm_event = (struct perf_comm_event){ |
8d1b2d936
|
3199 |
.task = task, |
573402db0
|
3200 3201 |
/* .comm */ /* .comm_size */ |
cdd6c482c
|
3202 |
.event_id = { |
573402db0
|
3203 |
.header = { |
cdd6c482c
|
3204 |
.type = PERF_RECORD_COMM, |
573402db0
|
3205 3206 3207 3208 3209 |
.misc = 0, /* .size */ }, /* .pid */ /* .tid */ |
8d1b2d936
|
3210 3211 |
}, }; |
cdd6c482c
|
3212 |
perf_event_comm_event(&comm_event); |
8d1b2d936
|
3213 3214 3215 |
} /* |
0a4a93919
|
3216 3217 3218 3219 |
* mmap tracking */ struct perf_mmap_event { |
089dd79db
|
3220 3221 3222 3223 |
struct vm_area_struct *vma; const char *file_name; int file_size; |
0a4a93919
|
3224 3225 3226 3227 3228 3229 3230 3231 3232 |
struct { struct perf_event_header header; u32 pid; u32 tid; u64 start; u64 len; u64 pgoff; |
cdd6c482c
|
3233 |
} event_id; |
0a4a93919
|
3234 |
}; |
cdd6c482c
|
3235 |
static void perf_event_mmap_output(struct perf_event *event, |
0a4a93919
|
3236 3237 3238 |
struct perf_mmap_event *mmap_event) { struct perf_output_handle handle; |
cdd6c482c
|
3239 3240 |
int size = mmap_event->event_id.header.size; int ret = perf_output_begin(&handle, event, size, 0, 0); |
0a4a93919
|
3241 3242 3243 |
if (ret) return; |
cdd6c482c
|
3244 3245 |
mmap_event->event_id.pid = perf_event_pid(event, current); mmap_event->event_id.tid = perf_event_tid(event, current); |
709e50cf8
|
3246 |
|
cdd6c482c
|
3247 |
perf_output_put(&handle, mmap_event->event_id); |
0a4a93919
|
3248 3249 |
perf_output_copy(&handle, mmap_event->file_name, mmap_event->file_size); |
78d613eb1
|
3250 |
perf_output_end(&handle); |
0a4a93919
|
3251 |
} |
cdd6c482c
|
3252 |
static int perf_event_mmap_match(struct perf_event *event, |
0a4a93919
|
3253 3254 |
struct perf_mmap_event *mmap_event) { |
cdd6c482c
|
3255 |
if (event->attr.mmap) |
0a4a93919
|
3256 3257 3258 3259 |
return 1; return 0; } |
cdd6c482c
|
3260 |
static void perf_event_mmap_ctx(struct perf_event_context *ctx, |
0a4a93919
|
3261 3262 |
struct perf_mmap_event *mmap_event) { |
cdd6c482c
|
3263 |
struct perf_event *event; |
0a4a93919
|
3264 3265 3266 3267 3268 |
if (system_state != SYSTEM_RUNNING || list_empty(&ctx->event_list)) return; rcu_read_lock(); |
cdd6c482c
|
3269 3270 3271 |
list_for_each_entry_rcu(event, &ctx->event_list, event_entry) { if (perf_event_mmap_match(event, mmap_event)) perf_event_mmap_output(event, mmap_event); |
0a4a93919
|
3272 3273 3274 |
} rcu_read_unlock(); } |
cdd6c482c
|
3275 |
static void perf_event_mmap_event(struct perf_mmap_event *mmap_event) |
0a4a93919
|
3276 3277 |
{ struct perf_cpu_context *cpuctx; |
cdd6c482c
|
3278 |
struct perf_event_context *ctx; |
089dd79db
|
3279 3280 |
struct vm_area_struct *vma = mmap_event->vma; struct file *file = vma->vm_file; |
0a4a93919
|
3281 3282 3283 |
unsigned int size; char tmp[16]; char *buf = NULL; |
089dd79db
|
3284 |
const char *name; |
0a4a93919
|
3285 |
|
413ee3b48
|
3286 |
memset(tmp, 0, sizeof(tmp)); |
0a4a93919
|
3287 |
if (file) { |
413ee3b48
|
3288 3289 3290 3291 3292 3293 |
/* * d_path works from the end of the buffer backwards, so we * need to add enough zero bytes after the string to handle * the 64bit alignment we do later. */ buf = kzalloc(PATH_MAX + sizeof(u64), GFP_KERNEL); |
0a4a93919
|
3294 3295 3296 3297 |
if (!buf) { name = strncpy(tmp, "//enomem", sizeof(tmp)); goto got_name; } |
d3d21c412
|
3298 |
name = d_path(&file->f_path, buf, PATH_MAX); |
0a4a93919
|
3299 3300 3301 3302 3303 |
if (IS_ERR(name)) { name = strncpy(tmp, "//toolong", sizeof(tmp)); goto got_name; } } else { |
413ee3b48
|
3304 3305 3306 |
if (arch_vma_name(mmap_event->vma)) { name = strncpy(tmp, arch_vma_name(mmap_event->vma), sizeof(tmp)); |
089dd79db
|
3307 |
goto got_name; |
413ee3b48
|
3308 |
} |
089dd79db
|
3309 3310 3311 3312 3313 |
if (!vma->vm_mm) { name = strncpy(tmp, "[vdso]", sizeof(tmp)); goto got_name; } |
0a4a93919
|
3314 3315 3316 3317 3318 |
name = strncpy(tmp, "//anon", sizeof(tmp)); goto got_name; } got_name: |
888fcee06
|
3319 |
size = ALIGN(strlen(name)+1, sizeof(u64)); |
0a4a93919
|
3320 3321 3322 |
mmap_event->file_name = name; mmap_event->file_size = size; |
cdd6c482c
|
3323 |
mmap_event->event_id.header.size = sizeof(mmap_event->event_id) + size; |
0a4a93919
|
3324 3325 |
cpuctx = &get_cpu_var(perf_cpu_context); |
cdd6c482c
|
3326 |
perf_event_mmap_ctx(&cpuctx->ctx, mmap_event); |
0a4a93919
|
3327 |
put_cpu_var(perf_cpu_context); |
665c2142a
|
3328 3329 3330 3331 3332 |
rcu_read_lock(); /* * doesn't really matter which of the child contexts the * events ends up in. */ |
cdd6c482c
|
3333 |
ctx = rcu_dereference(current->perf_event_ctxp); |
665c2142a
|
3334 |
if (ctx) |
cdd6c482c
|
3335 |
perf_event_mmap_ctx(ctx, mmap_event); |
665c2142a
|
3336 |
rcu_read_unlock(); |
0a4a93919
|
3337 3338 |
kfree(buf); } |
cdd6c482c
|
3339 |
void __perf_event_mmap(struct vm_area_struct *vma) |
0a4a93919
|
3340 |
{ |
9ee318a78
|
3341 |
struct perf_mmap_event mmap_event; |
cdd6c482c
|
3342 |
if (!atomic_read(&nr_mmap_events)) |
9ee318a78
|
3343 3344 3345 |
return; mmap_event = (struct perf_mmap_event){ |
089dd79db
|
3346 |
.vma = vma, |
573402db0
|
3347 3348 |
/* .file_name */ /* .file_size */ |
cdd6c482c
|
3349 |
.event_id = { |
573402db0
|
3350 |
.header = { |
cdd6c482c
|
3351 |
.type = PERF_RECORD_MMAP, |
573402db0
|
3352 3353 3354 3355 3356 |
.misc = 0, /* .size */ }, /* .pid */ /* .tid */ |
089dd79db
|
3357 3358 3359 |
.start = vma->vm_start, .len = vma->vm_end - vma->vm_start, .pgoff = vma->vm_pgoff, |
0a4a93919
|
3360 3361 |
}, }; |
cdd6c482c
|
3362 |
perf_event_mmap_event(&mmap_event); |
0a4a93919
|
3363 |
} |
0a4a93919
|
3364 |
/* |
a78ac3258
|
3365 3366 |
* IRQ throttle logging */ |
cdd6c482c
|
3367 |
static void perf_log_throttle(struct perf_event *event, int enable) |
a78ac3258
|
3368 3369 3370 3371 3372 3373 3374 |
{ struct perf_output_handle handle; int ret; struct { struct perf_event_header header; u64 time; |
cca3f454a
|
3375 |
u64 id; |
7f453c24b
|
3376 |
u64 stream_id; |
a78ac3258
|
3377 3378 |
} throttle_event = { .header = { |
cdd6c482c
|
3379 |
.type = PERF_RECORD_THROTTLE, |
a78ac3258
|
3380 3381 3382 |
.misc = 0, .size = sizeof(throttle_event), }, |
def0a9b25
|
3383 |
.time = perf_clock(), |
cdd6c482c
|
3384 3385 |
.id = primary_event_id(event), .stream_id = event->id, |
a78ac3258
|
3386 |
}; |
966ee4d6b
|
3387 |
if (enable) |
cdd6c482c
|
3388 |
throttle_event.header.type = PERF_RECORD_UNTHROTTLE; |
966ee4d6b
|
3389 |
|
cdd6c482c
|
3390 |
ret = perf_output_begin(&handle, event, sizeof(throttle_event), 1, 0); |
a78ac3258
|
3391 3392 3393 3394 3395 3396 3397 3398 |
if (ret) return; perf_output_put(&handle, throttle_event); perf_output_end(&handle); } /* |
cdd6c482c
|
3399 |
* Generic event overflow handling, sampling. |
f6c7d5fe5
|
3400 |
*/ |
cdd6c482c
|
3401 |
static int __perf_event_overflow(struct perf_event *event, int nmi, |
5622f295b
|
3402 3403 |
int throttle, struct perf_sample_data *data, struct pt_regs *regs) |
f6c7d5fe5
|
3404 |
{ |
cdd6c482c
|
3405 3406 |
int events = atomic_read(&event->event_limit); struct hw_perf_event *hwc = &event->hw; |
79f146415
|
3407 |
int ret = 0; |
cdd6c482c
|
3408 |
throttle = (throttle && event->pmu->unthrottle != NULL); |
850bc73ff
|
3409 |
|
a78ac3258
|
3410 |
if (!throttle) { |
bd2b5b128
|
3411 |
hwc->interrupts++; |
128f048f0
|
3412 |
} else { |
bd2b5b128
|
3413 3414 |
if (hwc->interrupts != MAX_INTERRUPTS) { hwc->interrupts++; |
df58ab24b
|
3415 |
if (HZ * hwc->interrupts > |
cdd6c482c
|
3416 |
(u64)sysctl_perf_event_sample_rate) { |
bd2b5b128
|
3417 |
hwc->interrupts = MAX_INTERRUPTS; |
cdd6c482c
|
3418 |
perf_log_throttle(event, 0); |
128f048f0
|
3419 3420 3421 3422 |
ret = 1; } } else { /* |
cdd6c482c
|
3423 |
* Keep re-disabling events even though on the previous |
128f048f0
|
3424 |
* pass we disabled it - just in case we raced with a |
cdd6c482c
|
3425 |
* sched-in and the event got enabled again: |
128f048f0
|
3426 |
*/ |
a78ac3258
|
3427 3428 3429 |
ret = 1; } } |
60db5e09c
|
3430 |
|
cdd6c482c
|
3431 |
if (event->attr.freq) { |
def0a9b25
|
3432 |
u64 now = perf_clock(); |
bd2b5b128
|
3433 3434 3435 3436 3437 |
s64 delta = now - hwc->freq_stamp; hwc->freq_stamp = now; if (delta > 0 && delta < TICK_NSEC) |
cdd6c482c
|
3438 |
perf_adjust_period(event, NSEC_PER_SEC / (int)delta); |
bd2b5b128
|
3439 |
} |
2023b3592
|
3440 3441 |
/* * XXX event_limit might not quite work as expected on inherited |
cdd6c482c
|
3442 |
* events |
2023b3592
|
3443 |
*/ |
cdd6c482c
|
3444 3445 |
event->pending_kill = POLL_IN; if (events && atomic_dec_and_test(&event->event_limit)) { |
79f146415
|
3446 |
ret = 1; |
cdd6c482c
|
3447 |
event->pending_kill = POLL_HUP; |
79f146415
|
3448 |
if (nmi) { |
cdd6c482c
|
3449 3450 3451 |
event->pending_disable = 1; perf_pending_queue(&event->pending, perf_pending_event); |
79f146415
|
3452 |
} else |
cdd6c482c
|
3453 |
perf_event_disable(event); |
79f146415
|
3454 |
} |
cdd6c482c
|
3455 |
perf_event_output(event, nmi, data, regs); |
79f146415
|
3456 |
return ret; |
f6c7d5fe5
|
3457 |
} |
cdd6c482c
|
3458 |
int perf_event_overflow(struct perf_event *event, int nmi, |
5622f295b
|
3459 3460 |
struct perf_sample_data *data, struct pt_regs *regs) |
850bc73ff
|
3461 |
{ |
cdd6c482c
|
3462 |
return __perf_event_overflow(event, nmi, 1, data, regs); |
850bc73ff
|
3463 |
} |
f6c7d5fe5
|
3464 |
/* |
cdd6c482c
|
3465 |
* Generic software event infrastructure |
15dbf27cc
|
3466 |
*/ |
7b4b6658e
|
3467 |
/* |
cdd6c482c
|
3468 3469 |
* We directly increment event->count and keep a second value in * event->hw.period_left to count intervals. This period event |
7b4b6658e
|
3470 3471 3472 |
* is kept in the range [-sample_period, 0] so that we can use the * sign as trigger. */ |
cdd6c482c
|
3473 |
static u64 perf_swevent_set_period(struct perf_event *event) |
15dbf27cc
|
3474 |
{ |
cdd6c482c
|
3475 |
struct hw_perf_event *hwc = &event->hw; |
7b4b6658e
|
3476 3477 3478 3479 3480 |
u64 period = hwc->last_period; u64 nr, offset; s64 old, val; hwc->last_period = hwc->sample_period; |
15dbf27cc
|
3481 3482 |
again: |
7b4b6658e
|
3483 3484 3485 |
old = val = atomic64_read(&hwc->period_left); if (val < 0) return 0; |
15dbf27cc
|
3486 |
|
7b4b6658e
|
3487 3488 3489 3490 3491 |
nr = div64_u64(period + val, period); offset = nr * period; val -= offset; if (atomic64_cmpxchg(&hwc->period_left, old, val) != old) goto again; |
15dbf27cc
|
3492 |
|
7b4b6658e
|
3493 |
return nr; |
15dbf27cc
|
3494 |
} |
cdd6c482c
|
3495 |
static void perf_swevent_overflow(struct perf_event *event, |
5622f295b
|
3496 3497 |
int nmi, struct perf_sample_data *data, struct pt_regs *regs) |
15dbf27cc
|
3498 |
{ |
cdd6c482c
|
3499 |
struct hw_perf_event *hwc = &event->hw; |
850bc73ff
|
3500 |
int throttle = 0; |
7b4b6658e
|
3501 |
u64 overflow; |
15dbf27cc
|
3502 |
|
cdd6c482c
|
3503 3504 |
data->period = event->hw.last_period; overflow = perf_swevent_set_period(event); |
15dbf27cc
|
3505 |
|
7b4b6658e
|
3506 3507 |
if (hwc->interrupts == MAX_INTERRUPTS) return; |
15dbf27cc
|
3508 |
|
7b4b6658e
|
3509 |
for (; overflow; overflow--) { |
cdd6c482c
|
3510 |
if (__perf_event_overflow(event, nmi, throttle, |
5622f295b
|
3511 |
data, regs)) { |
7b4b6658e
|
3512 3513 3514 3515 3516 3517 |
/* * We inhibit the overflow from happening when * hwc->interrupts == MAX_INTERRUPTS. */ break; } |
cf450a735
|
3518 |
throttle = 1; |
7b4b6658e
|
3519 |
} |
15dbf27cc
|
3520 |
} |
cdd6c482c
|
3521 |
static void perf_swevent_unthrottle(struct perf_event *event) |
d6d020e99
|
3522 |
{ |
d6d020e99
|
3523 |
/* |
7b4b6658e
|
3524 |
* Nothing to do, we already reset hwc->interrupts. |
d6d020e99
|
3525 |
*/ |
7b4b6658e
|
3526 |
} |
d6d020e99
|
3527 |
|
cdd6c482c
|
3528 |
static void perf_swevent_add(struct perf_event *event, u64 nr, |
5622f295b
|
3529 3530 |
int nmi, struct perf_sample_data *data, struct pt_regs *regs) |
7b4b6658e
|
3531 |
{ |
cdd6c482c
|
3532 |
struct hw_perf_event *hwc = &event->hw; |
d6d020e99
|
3533 |
|
cdd6c482c
|
3534 |
atomic64_add(nr, &event->count); |
d6d020e99
|
3535 |
|
7b4b6658e
|
3536 3537 |
if (!hwc->sample_period) return; |
d6d020e99
|
3538 |
|
5622f295b
|
3539 |
if (!regs) |
7b4b6658e
|
3540 |
return; |
df1a132bf
|
3541 |
|
7b4b6658e
|
3542 |
if (!atomic64_add_negative(nr, &hwc->period_left)) |
cdd6c482c
|
3543 |
perf_swevent_overflow(event, nmi, data, regs); |
d6d020e99
|
3544 |
} |
cdd6c482c
|
3545 |
static int perf_swevent_is_counting(struct perf_event *event) |
880ca15ad
|
3546 |
{ |
bcfc2602e
|
3547 |
/* |
cdd6c482c
|
3548 |
* The event is active, we're good! |
bcfc2602e
|
3549 |
*/ |
cdd6c482c
|
3550 |
if (event->state == PERF_EVENT_STATE_ACTIVE) |
880ca15ad
|
3551 |
return 1; |
bcfc2602e
|
3552 |
/* |
cdd6c482c
|
3553 |
* The event is off/error, not counting. |
bcfc2602e
|
3554 |
*/ |
cdd6c482c
|
3555 |
if (event->state != PERF_EVENT_STATE_INACTIVE) |
880ca15ad
|
3556 3557 3558 |
return 0; /* |
cdd6c482c
|
3559 |
* The event is inactive, if the context is active |
bcfc2602e
|
3560 3561 |
* we're part of a group that didn't make it on the 'pmu', * not counting. |
880ca15ad
|
3562 |
*/ |
cdd6c482c
|
3563 |
if (event->ctx->is_active) |
bcfc2602e
|
3564 3565 3566 3567 3568 3569 3570 3571 |
return 0; /* * We're inactive and the context is too, this means the * task is scheduled out, we're counting events that happen * to us, like migration events. */ return 1; |
880ca15ad
|
3572 |
} |
cdd6c482c
|
3573 |
static int perf_swevent_match(struct perf_event *event, |
1c432d899
|
3574 |
enum perf_type_id type, |
dfc65094d
|
3575 |
u32 event_id, struct pt_regs *regs) |
15dbf27cc
|
3576 |
{ |
cdd6c482c
|
3577 |
if (!perf_swevent_is_counting(event)) |
15dbf27cc
|
3578 |
return 0; |
cdd6c482c
|
3579 |
if (event->attr.type != type) |
a21ca2cac
|
3580 |
return 0; |
cdd6c482c
|
3581 |
if (event->attr.config != event_id) |
15dbf27cc
|
3582 |
return 0; |
3f731ca60
|
3583 |
if (regs) { |
cdd6c482c
|
3584 |
if (event->attr.exclude_user && user_mode(regs)) |
3f731ca60
|
3585 |
return 0; |
15dbf27cc
|
3586 |
|
cdd6c482c
|
3587 |
if (event->attr.exclude_kernel && !user_mode(regs)) |
3f731ca60
|
3588 3589 |
return 0; } |
15dbf27cc
|
3590 3591 3592 |
return 1; } |
cdd6c482c
|
3593 |
static void perf_swevent_ctx_event(struct perf_event_context *ctx, |
92bf309a9
|
3594 |
enum perf_type_id type, |
dfc65094d
|
3595 |
u32 event_id, u64 nr, int nmi, |
5622f295b
|
3596 3597 |
struct perf_sample_data *data, struct pt_regs *regs) |
15dbf27cc
|
3598 |
{ |
cdd6c482c
|
3599 |
struct perf_event *event; |
15dbf27cc
|
3600 |
|
01ef09d9f
|
3601 |
if (system_state != SYSTEM_RUNNING || list_empty(&ctx->event_list)) |
15dbf27cc
|
3602 |
return; |
592903cdc
|
3603 |
rcu_read_lock(); |
cdd6c482c
|
3604 3605 3606 |
list_for_each_entry_rcu(event, &ctx->event_list, event_entry) { if (perf_swevent_match(event, type, event_id, regs)) perf_swevent_add(event, nr, nmi, data, regs); |
15dbf27cc
|
3607 |
} |
592903cdc
|
3608 |
rcu_read_unlock(); |
15dbf27cc
|
3609 |
} |
cdd6c482c
|
3610 |
static int *perf_swevent_recursion_context(struct perf_cpu_context *cpuctx) |
96f6d4444
|
3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 |
{ if (in_nmi()) return &cpuctx->recursion[3]; if (in_irq()) return &cpuctx->recursion[2]; if (in_softirq()) return &cpuctx->recursion[1]; return &cpuctx->recursion[0]; } |
cdd6c482c
|
3623 |
static void do_perf_sw_event(enum perf_type_id type, u32 event_id, |
92bf309a9
|
3624 |
u64 nr, int nmi, |
5622f295b
|
3625 3626 |
struct perf_sample_data *data, struct pt_regs *regs) |
15dbf27cc
|
3627 3628 |
{ struct perf_cpu_context *cpuctx = &get_cpu_var(perf_cpu_context); |
cdd6c482c
|
3629 3630 |
int *recursion = perf_swevent_recursion_context(cpuctx); struct perf_event_context *ctx; |
96f6d4444
|
3631 3632 3633 3634 3635 3636 |
if (*recursion) goto out; (*recursion)++; barrier(); |
15dbf27cc
|
3637 |
|
cdd6c482c
|
3638 |
perf_swevent_ctx_event(&cpuctx->ctx, type, event_id, |
5622f295b
|
3639 |
nr, nmi, data, regs); |
665c2142a
|
3640 3641 3642 3643 3644 |
rcu_read_lock(); /* * doesn't really matter which of the child contexts the * events ends up in. */ |
cdd6c482c
|
3645 |
ctx = rcu_dereference(current->perf_event_ctxp); |
665c2142a
|
3646 |
if (ctx) |
cdd6c482c
|
3647 |
perf_swevent_ctx_event(ctx, type, event_id, nr, nmi, data, regs); |
665c2142a
|
3648 |
rcu_read_unlock(); |
15dbf27cc
|
3649 |
|
96f6d4444
|
3650 3651 3652 3653 |
barrier(); (*recursion)--; out: |
15dbf27cc
|
3654 3655 |
put_cpu_var(perf_cpu_context); } |
cdd6c482c
|
3656 |
void __perf_sw_event(u32 event_id, u64 nr, int nmi, |
f29ac756a
|
3657 |
struct pt_regs *regs, u64 addr) |
b8e83514b
|
3658 |
{ |
92bf309a9
|
3659 |
struct perf_sample_data data = { |
92bf309a9
|
3660 3661 |
.addr = addr, }; |
cdd6c482c
|
3662 |
do_perf_sw_event(PERF_TYPE_SOFTWARE, event_id, nr, nmi, |
5622f295b
|
3663 |
&data, regs); |
b8e83514b
|
3664 |
} |
cdd6c482c
|
3665 |
static void perf_swevent_read(struct perf_event *event) |
15dbf27cc
|
3666 |
{ |
15dbf27cc
|
3667 |
} |
cdd6c482c
|
3668 |
static int perf_swevent_enable(struct perf_event *event) |
15dbf27cc
|
3669 |
{ |
cdd6c482c
|
3670 |
struct hw_perf_event *hwc = &event->hw; |
7b4b6658e
|
3671 3672 3673 |
if (hwc->sample_period) { hwc->last_period = hwc->sample_period; |
cdd6c482c
|
3674 |
perf_swevent_set_period(event); |
7b4b6658e
|
3675 |
} |
15dbf27cc
|
3676 3677 |
return 0; } |
cdd6c482c
|
3678 |
static void perf_swevent_disable(struct perf_event *event) |
15dbf27cc
|
3679 |
{ |
15dbf27cc
|
3680 |
} |
4aeb0b423
|
3681 |
static const struct pmu perf_ops_generic = { |
cdd6c482c
|
3682 3683 3684 3685 |
.enable = perf_swevent_enable, .disable = perf_swevent_disable, .read = perf_swevent_read, .unthrottle = perf_swevent_unthrottle, |
ac17dc8e5
|
3686 |
}; |
15dbf27cc
|
3687 |
/* |
cdd6c482c
|
3688 |
* hrtimer based swevent callback |
7b4b6658e
|
3689 |
*/ |
cdd6c482c
|
3690 |
static enum hrtimer_restart perf_swevent_hrtimer(struct hrtimer *hrtimer) |
7b4b6658e
|
3691 3692 3693 |
{ enum hrtimer_restart ret = HRTIMER_RESTART; struct perf_sample_data data; |
5622f295b
|
3694 |
struct pt_regs *regs; |
cdd6c482c
|
3695 |
struct perf_event *event; |
7b4b6658e
|
3696 |
u64 period; |
cdd6c482c
|
3697 3698 |
event = container_of(hrtimer, struct perf_event, hw.hrtimer); event->pmu->read(event); |
7b4b6658e
|
3699 3700 |
data.addr = 0; |
5622f295b
|
3701 |
regs = get_irq_regs(); |
7b4b6658e
|
3702 3703 3704 3705 |
/* * In case we exclude kernel IPs or are somehow not in interrupt * context, provide the next best thing, the user IP. */ |
cdd6c482c
|
3706 3707 |
if ((event->attr.exclude_kernel || !regs) && !event->attr.exclude_user) |
5622f295b
|
3708 |
regs = task_pt_regs(current); |
7b4b6658e
|
3709 |
|
5622f295b
|
3710 |
if (regs) { |
54f440760
|
3711 3712 3713 |
if (!(event->attr.exclude_idle && current->pid == 0)) if (perf_event_overflow(event, 0, &data, regs)) ret = HRTIMER_NORESTART; |
7b4b6658e
|
3714 |
} |
cdd6c482c
|
3715 |
period = max_t(u64, 10000, event->hw.sample_period); |
7b4b6658e
|
3716 3717 3718 3719 |
hrtimer_forward_now(hrtimer, ns_to_ktime(period)); return ret; } |
721a669b7
|
3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733 3734 3735 3736 3737 3738 3739 3740 3741 3742 3743 3744 3745 3746 3747 3748 3749 3750 3751 3752 3753 3754 |
static void perf_swevent_start_hrtimer(struct perf_event *event) { struct hw_perf_event *hwc = &event->hw; hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); hwc->hrtimer.function = perf_swevent_hrtimer; if (hwc->sample_period) { u64 period; if (hwc->remaining) { if (hwc->remaining < 0) period = 10000; else period = hwc->remaining; hwc->remaining = 0; } else { period = max_t(u64, 10000, hwc->sample_period); } __hrtimer_start_range_ns(&hwc->hrtimer, ns_to_ktime(period), 0, HRTIMER_MODE_REL, 0); } } static void perf_swevent_cancel_hrtimer(struct perf_event *event) { struct hw_perf_event *hwc = &event->hw; if (hwc->sample_period) { ktime_t remaining = hrtimer_get_remaining(&hwc->hrtimer); hwc->remaining = ktime_to_ns(remaining); hrtimer_cancel(&hwc->hrtimer); } } |
7b4b6658e
|
3755 |
/* |
cdd6c482c
|
3756 |
* Software event: cpu wall time clock |
15dbf27cc
|
3757 |
*/ |
cdd6c482c
|
3758 |
static void cpu_clock_perf_event_update(struct perf_event *event) |
9abf8a08b
|
3759 3760 3761 3762 3763 3764 |
{ int cpu = raw_smp_processor_id(); s64 prev; u64 now; now = cpu_clock(cpu); |
cdd6c482c
|
3765 3766 3767 |
prev = atomic64_read(&event->hw.prev_count); atomic64_set(&event->hw.prev_count, now); atomic64_add(now - prev, &event->count); |
9abf8a08b
|
3768 |
} |
cdd6c482c
|
3769 |
static int cpu_clock_perf_event_enable(struct perf_event *event) |
d6d020e99
|
3770 |
{ |
cdd6c482c
|
3771 |
struct hw_perf_event *hwc = &event->hw; |
d6d020e99
|
3772 3773 3774 |
int cpu = raw_smp_processor_id(); atomic64_set(&hwc->prev_count, cpu_clock(cpu)); |
721a669b7
|
3775 |
perf_swevent_start_hrtimer(event); |
d6d020e99
|
3776 3777 3778 |
return 0; } |
cdd6c482c
|
3779 |
static void cpu_clock_perf_event_disable(struct perf_event *event) |
5c92d1241
|
3780 |
{ |
721a669b7
|
3781 |
perf_swevent_cancel_hrtimer(event); |
cdd6c482c
|
3782 |
cpu_clock_perf_event_update(event); |
5c92d1241
|
3783 |
} |
cdd6c482c
|
3784 |
static void cpu_clock_perf_event_read(struct perf_event *event) |
5c92d1241
|
3785 |
{ |
cdd6c482c
|
3786 |
cpu_clock_perf_event_update(event); |
5c92d1241
|
3787 |
} |
4aeb0b423
|
3788 |
static const struct pmu perf_ops_cpu_clock = { |
cdd6c482c
|
3789 3790 3791 |
.enable = cpu_clock_perf_event_enable, .disable = cpu_clock_perf_event_disable, .read = cpu_clock_perf_event_read, |
5c92d1241
|
3792 |
}; |
aa9c4c0f9
|
3793 |
/* |
cdd6c482c
|
3794 |
* Software event: task time clock |
15dbf27cc
|
3795 |
*/ |
cdd6c482c
|
3796 |
static void task_clock_perf_event_update(struct perf_event *event, u64 now) |
aa9c4c0f9
|
3797 |
{ |
e30e08f65
|
3798 |
u64 prev; |
8cb391e87
|
3799 |
s64 delta; |
cdd6c482c
|
3800 |
prev = atomic64_xchg(&event->hw.prev_count, now); |
8cb391e87
|
3801 |
delta = now - prev; |
cdd6c482c
|
3802 |
atomic64_add(delta, &event->count); |
bae43c994
|
3803 |
} |
cdd6c482c
|
3804 |
static int task_clock_perf_event_enable(struct perf_event *event) |
8cb391e87
|
3805 |
{ |
cdd6c482c
|
3806 |
struct hw_perf_event *hwc = &event->hw; |
a39d6f255
|
3807 |
u64 now; |
cdd6c482c
|
3808 |
now = event->ctx->time; |
d6d020e99
|
3809 |
|
a39d6f255
|
3810 |
atomic64_set(&hwc->prev_count, now); |
721a669b7
|
3811 3812 |
perf_swevent_start_hrtimer(event); |
95cdd2e78
|
3813 3814 |
return 0; |
8cb391e87
|
3815 |
} |
cdd6c482c
|
3816 |
static void task_clock_perf_event_disable(struct perf_event *event) |
bae43c994
|
3817 |
{ |
721a669b7
|
3818 |
perf_swevent_cancel_hrtimer(event); |
cdd6c482c
|
3819 |
task_clock_perf_event_update(event, event->ctx->time); |
e30e08f65
|
3820 |
|
d6d020e99
|
3821 |
} |
aa9c4c0f9
|
3822 |
|
cdd6c482c
|
3823 |
static void task_clock_perf_event_read(struct perf_event *event) |
d6d020e99
|
3824 |
{ |
e30e08f65
|
3825 3826 3827 |
u64 time; if (!in_nmi()) { |
cdd6c482c
|
3828 3829 |
update_context_time(event->ctx); time = event->ctx->time; |
e30e08f65
|
3830 3831 |
} else { u64 now = perf_clock(); |
cdd6c482c
|
3832 3833 |
u64 delta = now - event->ctx->timestamp; time = event->ctx->time + delta; |
e30e08f65
|
3834 |
} |
cdd6c482c
|
3835 |
task_clock_perf_event_update(event, time); |
bae43c994
|
3836 |
} |
4aeb0b423
|
3837 |
static const struct pmu perf_ops_task_clock = { |
cdd6c482c
|
3838 3839 3840 |
.enable = task_clock_perf_event_enable, .disable = task_clock_perf_event_disable, .read = task_clock_perf_event_read, |
bae43c994
|
3841 |
}; |
e077df4f4
|
3842 |
#ifdef CONFIG_EVENT_PROFILE |
cdd6c482c
|
3843 |
void perf_tp_event(int event_id, u64 addr, u64 count, void *record, |
f413cdb80
|
3844 |
int entry_size) |
e077df4f4
|
3845 |
{ |
3a43ce68a
|
3846 |
struct perf_raw_record raw = { |
f413cdb80
|
3847 |
.size = entry_size, |
3a43ce68a
|
3848 |
.data = record, |
f413cdb80
|
3849 |
}; |
92bf309a9
|
3850 |
struct perf_sample_data data = { |
3a6593050
|
3851 |
.addr = addr, |
3a43ce68a
|
3852 |
.raw = &raw, |
92bf309a9
|
3853 |
}; |
b8e83514b
|
3854 |
|
5622f295b
|
3855 3856 3857 3858 |
struct pt_regs *regs = get_irq_regs(); if (!regs) regs = task_pt_regs(current); |
b8e83514b
|
3859 |
|
cdd6c482c
|
3860 |
do_perf_sw_event(PERF_TYPE_TRACEPOINT, event_id, count, 1, |
5622f295b
|
3861 |
&data, regs); |
e077df4f4
|
3862 |
} |
cdd6c482c
|
3863 |
EXPORT_SYMBOL_GPL(perf_tp_event); |
e077df4f4
|
3864 3865 3866 |
extern int ftrace_profile_enable(int); extern void ftrace_profile_disable(int); |
cdd6c482c
|
3867 |
static void tp_perf_event_destroy(struct perf_event *event) |
e077df4f4
|
3868 |
{ |
cdd6c482c
|
3869 |
ftrace_profile_disable(event->attr.config); |
e077df4f4
|
3870 |
} |
cdd6c482c
|
3871 |
static const struct pmu *tp_perf_event_init(struct perf_event *event) |
e077df4f4
|
3872 |
{ |
a4e95fc2c
|
3873 3874 3875 3876 |
/* * Raw tracepoint data is a severe data leak, only allow root to * have these. */ |
cdd6c482c
|
3877 |
if ((event->attr.sample_type & PERF_SAMPLE_RAW) && |
0fbdea19e
|
3878 |
perf_paranoid_tracepoint_raw() && |
a4e95fc2c
|
3879 3880 |
!capable(CAP_SYS_ADMIN)) return ERR_PTR(-EPERM); |
cdd6c482c
|
3881 |
if (ftrace_profile_enable(event->attr.config)) |
e077df4f4
|
3882 |
return NULL; |
cdd6c482c
|
3883 |
event->destroy = tp_perf_event_destroy; |
e077df4f4
|
3884 3885 3886 3887 |
return &perf_ops_generic; } #else |
cdd6c482c
|
3888 |
static const struct pmu *tp_perf_event_init(struct perf_event *event) |
e077df4f4
|
3889 3890 3891 3892 |
{ return NULL; } #endif |
cdd6c482c
|
3893 |
atomic_t perf_swevent_enabled[PERF_COUNT_SW_MAX]; |
f29ac756a
|
3894 |
|
cdd6c482c
|
3895 |
static void sw_perf_event_destroy(struct perf_event *event) |
f29ac756a
|
3896 |
{ |
cdd6c482c
|
3897 |
u64 event_id = event->attr.config; |
f29ac756a
|
3898 |
|
cdd6c482c
|
3899 |
WARN_ON(event->parent); |
f344011cc
|
3900 |
|
cdd6c482c
|
3901 |
atomic_dec(&perf_swevent_enabled[event_id]); |
f29ac756a
|
3902 |
} |
cdd6c482c
|
3903 |
static const struct pmu *sw_perf_event_init(struct perf_event *event) |
5c92d1241
|
3904 |
{ |
4aeb0b423
|
3905 |
const struct pmu *pmu = NULL; |
cdd6c482c
|
3906 |
u64 event_id = event->attr.config; |
5c92d1241
|
3907 |
|
0475f9ea8
|
3908 |
/* |
cdd6c482c
|
3909 |
* Software events (currently) can't in general distinguish |
0475f9ea8
|
3910 3911 3912 3913 3914 |
* between user, kernel and hypervisor events. * However, context switches and cpu migrations are considered * to be kernel events, and page faults are never hypervisor * events. */ |
dfc65094d
|
3915 |
switch (event_id) { |
f4dbfa8f3
|
3916 |
case PERF_COUNT_SW_CPU_CLOCK: |
4aeb0b423
|
3917 |
pmu = &perf_ops_cpu_clock; |
d6d020e99
|
3918 |
|
5c92d1241
|
3919 |
break; |
f4dbfa8f3
|
3920 |
case PERF_COUNT_SW_TASK_CLOCK: |
23a185ca8
|
3921 |
/* |
cdd6c482c
|
3922 3923 |
* If the user instantiates this as a per-cpu event, * use the cpu_clock event instead. |
23a185ca8
|
3924 |
*/ |
cdd6c482c
|
3925 |
if (event->ctx->task) |
4aeb0b423
|
3926 |
pmu = &perf_ops_task_clock; |
23a185ca8
|
3927 |
else |
4aeb0b423
|
3928 |
pmu = &perf_ops_cpu_clock; |
d6d020e99
|
3929 |
|
bae43c994
|
3930 |
break; |
f4dbfa8f3
|
3931 3932 3933 3934 3935 |
case PERF_COUNT_SW_PAGE_FAULTS: case PERF_COUNT_SW_PAGE_FAULTS_MIN: case PERF_COUNT_SW_PAGE_FAULTS_MAJ: case PERF_COUNT_SW_CONTEXT_SWITCHES: case PERF_COUNT_SW_CPU_MIGRATIONS: |
cdd6c482c
|
3936 3937 3938 |
if (!event->parent) { atomic_inc(&perf_swevent_enabled[event_id]); event->destroy = sw_perf_event_destroy; |
f344011cc
|
3939 |
} |
3f731ca60
|
3940 |
pmu = &perf_ops_generic; |
6c594c21f
|
3941 |
break; |
5c92d1241
|
3942 |
} |
15dbf27cc
|
3943 |
|
4aeb0b423
|
3944 |
return pmu; |
5c92d1241
|
3945 |
} |
0793a61d4
|
3946 |
/* |
cdd6c482c
|
3947 |
* Allocate and initialize a event structure |
0793a61d4
|
3948 |
*/ |
cdd6c482c
|
3949 3950 |
static struct perf_event * perf_event_alloc(struct perf_event_attr *attr, |
04289bb98
|
3951 |
int cpu, |
cdd6c482c
|
3952 3953 3954 |
struct perf_event_context *ctx, struct perf_event *group_leader, struct perf_event *parent_event, |
9b51f66dc
|
3955 |
gfp_t gfpflags) |
0793a61d4
|
3956 |
{ |
4aeb0b423
|
3957 |
const struct pmu *pmu; |
cdd6c482c
|
3958 3959 |
struct perf_event *event; struct hw_perf_event *hwc; |
d5d2bc0dd
|
3960 |
long err; |
0793a61d4
|
3961 |
|
cdd6c482c
|
3962 3963 |
event = kzalloc(sizeof(*event), gfpflags); if (!event) |
d5d2bc0dd
|
3964 |
return ERR_PTR(-ENOMEM); |
0793a61d4
|
3965 |
|
04289bb98
|
3966 |
/* |
cdd6c482c
|
3967 |
* Single events are their own group leaders, with an |
04289bb98
|
3968 3969 3970 |
* empty sibling list: */ if (!group_leader) |
cdd6c482c
|
3971 |
group_leader = event; |
04289bb98
|
3972 |
|
cdd6c482c
|
3973 3974 |
mutex_init(&event->child_mutex); INIT_LIST_HEAD(&event->child_list); |
fccc714b3
|
3975 |
|
cdd6c482c
|
3976 3977 3978 3979 |
INIT_LIST_HEAD(&event->group_entry); INIT_LIST_HEAD(&event->event_entry); INIT_LIST_HEAD(&event->sibling_list); init_waitqueue_head(&event->waitq); |
0793a61d4
|
3980 |
|
cdd6c482c
|
3981 |
mutex_init(&event->mmap_mutex); |
7b732a750
|
3982 |
|
cdd6c482c
|
3983 3984 3985 3986 3987 3988 |
event->cpu = cpu; event->attr = *attr; event->group_leader = group_leader; event->pmu = NULL; event->ctx = ctx; event->oncpu = -1; |
a96bbc164
|
3989 |
|
cdd6c482c
|
3990 |
event->parent = parent_event; |
b84fbc9fb
|
3991 |
|
cdd6c482c
|
3992 3993 |
event->ns = get_pid_ns(current->nsproxy->pid_ns); event->id = atomic64_inc_return(&perf_event_id); |
a96bbc164
|
3994 |
|
cdd6c482c
|
3995 |
event->state = PERF_EVENT_STATE_INACTIVE; |
329d876d6
|
3996 |
|
0d48696f8
|
3997 |
if (attr->disabled) |
cdd6c482c
|
3998 |
event->state = PERF_EVENT_STATE_OFF; |
a86ed5085
|
3999 |
|
4aeb0b423
|
4000 |
pmu = NULL; |
b8e83514b
|
4001 |
|
cdd6c482c
|
4002 |
hwc = &event->hw; |
bd2b5b128
|
4003 |
hwc->sample_period = attr->sample_period; |
0d48696f8
|
4004 |
if (attr->freq && attr->sample_freq) |
bd2b5b128
|
4005 |
hwc->sample_period = 1; |
eced1dfcf
|
4006 |
hwc->last_period = hwc->sample_period; |
bd2b5b128
|
4007 4008 |
atomic64_set(&hwc->period_left, hwc->sample_period); |
60db5e09c
|
4009 |
|
2023b3592
|
4010 |
/* |
cdd6c482c
|
4011 |
* we currently do not support PERF_FORMAT_GROUP on inherited events |
2023b3592
|
4012 |
*/ |
3dab77fb1
|
4013 |
if (attr->inherit && (attr->read_format & PERF_FORMAT_GROUP)) |
2023b3592
|
4014 |
goto done; |
a21ca2cac
|
4015 |
switch (attr->type) { |
081fad861
|
4016 |
case PERF_TYPE_RAW: |
b8e83514b
|
4017 |
case PERF_TYPE_HARDWARE: |
8326f44da
|
4018 |
case PERF_TYPE_HW_CACHE: |
cdd6c482c
|
4019 |
pmu = hw_perf_event_init(event); |
b8e83514b
|
4020 4021 4022 |
break; case PERF_TYPE_SOFTWARE: |
cdd6c482c
|
4023 |
pmu = sw_perf_event_init(event); |
b8e83514b
|
4024 4025 4026 |
break; case PERF_TYPE_TRACEPOINT: |
cdd6c482c
|
4027 |
pmu = tp_perf_event_init(event); |
b8e83514b
|
4028 |
break; |
974802eaa
|
4029 4030 4031 |
default: break; |
b8e83514b
|
4032 |
} |
d5d2bc0dd
|
4033 4034 |
done: err = 0; |
4aeb0b423
|
4035 |
if (!pmu) |
d5d2bc0dd
|
4036 |
err = -EINVAL; |
4aeb0b423
|
4037 4038 |
else if (IS_ERR(pmu)) err = PTR_ERR(pmu); |
5c92d1241
|
4039 |
|
d5d2bc0dd
|
4040 |
if (err) { |
cdd6c482c
|
4041 4042 4043 |
if (event->ns) put_pid_ns(event->ns); kfree(event); |
d5d2bc0dd
|
4044 |
return ERR_PTR(err); |
621a01eac
|
4045 |
} |
d5d2bc0dd
|
4046 |
|
cdd6c482c
|
4047 |
event->pmu = pmu; |
0793a61d4
|
4048 |
|
cdd6c482c
|
4049 4050 4051 4052 4053 4054 4055 4056 |
if (!event->parent) { atomic_inc(&nr_events); if (event->attr.mmap) atomic_inc(&nr_mmap_events); if (event->attr.comm) atomic_inc(&nr_comm_events); if (event->attr.task) atomic_inc(&nr_task_events); |
f344011cc
|
4057 |
} |
9ee318a78
|
4058 |
|
cdd6c482c
|
4059 |
return event; |
0793a61d4
|
4060 |
} |
cdd6c482c
|
4061 4062 |
static int perf_copy_attr(struct perf_event_attr __user *uattr, struct perf_event_attr *attr) |
974802eaa
|
4063 |
{ |
974802eaa
|
4064 |
u32 size; |
cdf8073d6
|
4065 |
int ret; |
974802eaa
|
4066 4067 4068 4069 4070 4071 4072 4073 4074 4075 4076 4077 4078 4079 4080 4081 4082 4083 4084 4085 4086 4087 4088 4089 |
if (!access_ok(VERIFY_WRITE, uattr, PERF_ATTR_SIZE_VER0)) return -EFAULT; /* * zero the full structure, so that a short copy will be nice. */ memset(attr, 0, sizeof(*attr)); ret = get_user(size, &uattr->size); if (ret) return ret; if (size > PAGE_SIZE) /* silly large */ goto err_size; if (!size) /* abi compat */ size = PERF_ATTR_SIZE_VER0; if (size < PERF_ATTR_SIZE_VER0) goto err_size; /* * If we're handed a bigger struct than we know of, |
cdf8073d6
|
4090 4091 4092 |
* ensure all the unknown bits are 0 - i.e. new * user-space does not rely on any kernel feature * extensions we dont know about yet. |
974802eaa
|
4093 4094 |
*/ if (size > sizeof(*attr)) { |
cdf8073d6
|
4095 4096 4097 |
unsigned char __user *addr; unsigned char __user *end; unsigned char val; |
974802eaa
|
4098 |
|
cdf8073d6
|
4099 4100 |
addr = (void __user *)uattr + sizeof(*attr); end = (void __user *)uattr + size; |
974802eaa
|
4101 |
|
cdf8073d6
|
4102 |
for (; addr < end; addr++) { |
974802eaa
|
4103 4104 4105 4106 4107 4108 |
ret = get_user(val, addr); if (ret) return ret; if (val) goto err_size; } |
b3e62e350
|
4109 |
size = sizeof(*attr); |
974802eaa
|
4110 4111 4112 4113 4114 4115 4116 4117 4118 4119 4120 4121 4122 4123 4124 4125 4126 4127 4128 4129 4130 4131 4132 4133 4134 4135 4136 4137 4138 4139 |
} ret = copy_from_user(attr, uattr, size); if (ret) return -EFAULT; /* * If the type exists, the corresponding creation will verify * the attr->config. */ if (attr->type >= PERF_TYPE_MAX) return -EINVAL; if (attr->__reserved_1 || attr->__reserved_2 || attr->__reserved_3) return -EINVAL; if (attr->sample_type & ~(PERF_SAMPLE_MAX-1)) return -EINVAL; if (attr->read_format & ~(PERF_FORMAT_MAX-1)) return -EINVAL; out: return ret; err_size: put_user(sizeof(*attr), &uattr->size); ret = -E2BIG; goto out; } |
cdd6c482c
|
4140 |
int perf_event_set_output(struct perf_event *event, int output_fd) |
a4be7c277
|
4141 |
{ |
cdd6c482c
|
4142 |
struct perf_event *output_event = NULL; |
a4be7c277
|
4143 |
struct file *output_file = NULL; |
cdd6c482c
|
4144 |
struct perf_event *old_output; |
a4be7c277
|
4145 4146 4147 4148 4149 4150 4151 4152 4153 4154 4155 4156 |
int fput_needed = 0; int ret = -EINVAL; if (!output_fd) goto set; output_file = fget_light(output_fd, &fput_needed); if (!output_file) return -EBADF; if (output_file->f_op != &perf_fops) goto out; |
cdd6c482c
|
4157 |
output_event = output_file->private_data; |
a4be7c277
|
4158 4159 |
/* Don't chain output fds */ |
cdd6c482c
|
4160 |
if (output_event->output) |
a4be7c277
|
4161 4162 4163 |
goto out; /* Don't set an output fd when we already have an output channel */ |
cdd6c482c
|
4164 |
if (event->data) |
a4be7c277
|
4165 4166 4167 4168 4169 |
goto out; atomic_long_inc(&output_file->f_count); set: |
cdd6c482c
|
4170 4171 4172 4173 |
mutex_lock(&event->mmap_mutex); old_output = event->output; rcu_assign_pointer(event->output, output_event); mutex_unlock(&event->mmap_mutex); |
a4be7c277
|
4174 4175 4176 4177 |
if (old_output) { /* * we need to make sure no existing perf_output_*() |
cdd6c482c
|
4178 |
* is still referencing this event. |
a4be7c277
|
4179 4180 4181 4182 4183 4184 4185 4186 4187 4188 |
*/ synchronize_rcu(); fput(old_output->filp); } ret = 0; out: fput_light(output_file, fput_needed); return ret; } |
0793a61d4
|
4189 |
/** |
cdd6c482c
|
4190 |
* sys_perf_event_open - open a performance event, associate it to a task/cpu |
9f66a3810
|
4191 |
* |
cdd6c482c
|
4192 |
* @attr_uptr: event_id type attributes for monitoring/sampling |
0793a61d4
|
4193 |
* @pid: target pid |
9f66a3810
|
4194 |
* @cpu: target cpu |
cdd6c482c
|
4195 |
* @group_fd: group leader event fd |
0793a61d4
|
4196 |
*/ |
cdd6c482c
|
4197 4198 |
SYSCALL_DEFINE5(perf_event_open, struct perf_event_attr __user *, attr_uptr, |
2743a5b0f
|
4199 |
pid_t, pid, int, cpu, int, group_fd, unsigned long, flags) |
0793a61d4
|
4200 |
{ |
cdd6c482c
|
4201 4202 4203 4204 |
struct perf_event *event, *group_leader; struct perf_event_attr attr; struct perf_event_context *ctx; struct file *event_file = NULL; |
04289bb98
|
4205 4206 |
struct file *group_file = NULL; int fput_needed = 0; |
9b51f66dc
|
4207 |
int fput_needed2 = 0; |
dc86cabe4
|
4208 |
int err; |
0793a61d4
|
4209 |
|
2743a5b0f
|
4210 |
/* for future expandability... */ |
a4be7c277
|
4211 |
if (flags & ~(PERF_FLAG_FD_NO_GROUP | PERF_FLAG_FD_OUTPUT)) |
2743a5b0f
|
4212 |
return -EINVAL; |
dc86cabe4
|
4213 4214 4215 |
err = perf_copy_attr(attr_uptr, &attr); if (err) return err; |
eab656ae0
|
4216 |
|
0764771da
|
4217 4218 4219 4220 |
if (!attr.exclude_kernel) { if (perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN)) return -EACCES; } |
df58ab24b
|
4221 |
if (attr.freq) { |
cdd6c482c
|
4222 |
if (attr.sample_freq > sysctl_perf_event_sample_rate) |
df58ab24b
|
4223 4224 |
return -EINVAL; } |
04289bb98
|
4225 |
/* |
ccff286d8
|
4226 4227 4228 4229 4230 4231 4232 |
* Get the target context (task or percpu): */ ctx = find_get_context(pid, cpu); if (IS_ERR(ctx)) return PTR_ERR(ctx); /* |
cdd6c482c
|
4233 |
* Look up the group leader (we will attach this event to it): |
04289bb98
|
4234 4235 |
*/ group_leader = NULL; |
a4be7c277
|
4236 |
if (group_fd != -1 && !(flags & PERF_FLAG_FD_NO_GROUP)) { |
dc86cabe4
|
4237 |
err = -EINVAL; |
04289bb98
|
4238 4239 |
group_file = fget_light(group_fd, &fput_needed); if (!group_file) |
ccff286d8
|
4240 |
goto err_put_context; |
04289bb98
|
4241 |
if (group_file->f_op != &perf_fops) |
ccff286d8
|
4242 |
goto err_put_context; |
04289bb98
|
4243 4244 4245 |
group_leader = group_file->private_data; /* |
ccff286d8
|
4246 4247 4248 4249 4250 4251 4252 4253 |
* Do not allow a recursive hierarchy (this new sibling * becoming part of another group-sibling): */ if (group_leader->group_leader != group_leader) goto err_put_context; /* * Do not allow to attach to a group in a different * task or CPU context: |
04289bb98
|
4254 |
*/ |
ccff286d8
|
4255 4256 |
if (group_leader->ctx != ctx) goto err_put_context; |
3b6f9e5cb
|
4257 4258 4259 |
/* * Only a group leader can be exclusive or pinned */ |
0d48696f8
|
4260 |
if (attr.exclusive || attr.pinned) |
3b6f9e5cb
|
4261 |
goto err_put_context; |
04289bb98
|
4262 |
} |
cdd6c482c
|
4263 |
event = perf_event_alloc(&attr, cpu, ctx, group_leader, |
b84fbc9fb
|
4264 |
NULL, GFP_KERNEL); |
cdd6c482c
|
4265 4266 |
err = PTR_ERR(event); if (IS_ERR(event)) |
0793a61d4
|
4267 |
goto err_put_context; |
cdd6c482c
|
4268 |
err = anon_inode_getfd("[perf_event]", &perf_fops, event, 0); |
dc86cabe4
|
4269 |
if (err < 0) |
9b51f66dc
|
4270 |
goto err_free_put_context; |
cdd6c482c
|
4271 4272 |
event_file = fget_light(err, &fput_needed2); if (!event_file) |
9b51f66dc
|
4273 |
goto err_free_put_context; |
a4be7c277
|
4274 |
if (flags & PERF_FLAG_FD_OUTPUT) { |
cdd6c482c
|
4275 |
err = perf_event_set_output(event, group_fd); |
dc86cabe4
|
4276 4277 |
if (err) goto err_fput_free_put_context; |
a4be7c277
|
4278 |
} |
cdd6c482c
|
4279 |
event->filp = event_file; |
ad3a37de8
|
4280 |
WARN_ON_ONCE(ctx->parent_ctx); |
d859e29fe
|
4281 |
mutex_lock(&ctx->mutex); |
cdd6c482c
|
4282 |
perf_install_in_context(ctx, event, cpu); |
ad3a37de8
|
4283 |
++ctx->generation; |
d859e29fe
|
4284 |
mutex_unlock(&ctx->mutex); |
9b51f66dc
|
4285 |
|
cdd6c482c
|
4286 |
event->owner = current; |
082ff5a27
|
4287 |
get_task_struct(current); |
cdd6c482c
|
4288 4289 4290 |
mutex_lock(¤t->perf_event_mutex); list_add_tail(&event->owner_entry, ¤t->perf_event_list); mutex_unlock(¤t->perf_event_mutex); |
082ff5a27
|
4291 |
|
dc86cabe4
|
4292 |
err_fput_free_put_context: |
cdd6c482c
|
4293 |
fput_light(event_file, fput_needed2); |
0793a61d4
|
4294 |
|
9b51f66dc
|
4295 |
err_free_put_context: |
dc86cabe4
|
4296 |
if (err < 0) |
cdd6c482c
|
4297 |
kfree(event); |
0793a61d4
|
4298 4299 |
err_put_context: |
dc86cabe4
|
4300 4301 4302 4303 |
if (err < 0) put_ctx(ctx); fput_light(group_file, fput_needed); |
0793a61d4
|
4304 |
|
dc86cabe4
|
4305 |
return err; |
0793a61d4
|
4306 |
} |
9b51f66dc
|
4307 |
/* |
cdd6c482c
|
4308 |
* inherit a event from parent task to child task: |
9b51f66dc
|
4309 |
*/ |
cdd6c482c
|
4310 4311 |
static struct perf_event * inherit_event(struct perf_event *parent_event, |
9b51f66dc
|
4312 |
struct task_struct *parent, |
cdd6c482c
|
4313 |
struct perf_event_context *parent_ctx, |
9b51f66dc
|
4314 |
struct task_struct *child, |
cdd6c482c
|
4315 4316 |
struct perf_event *group_leader, struct perf_event_context *child_ctx) |
9b51f66dc
|
4317 |
{ |
cdd6c482c
|
4318 |
struct perf_event *child_event; |
9b51f66dc
|
4319 |
|
d859e29fe
|
4320 |
/* |
cdd6c482c
|
4321 4322 |
* Instead of creating recursive hierarchies of events, * we link inherited events back to the original parent, |
d859e29fe
|
4323 4324 4325 |
* which has a filp for sure, which we use as the reference * count: */ |
cdd6c482c
|
4326 4327 |
if (parent_event->parent) parent_event = parent_event->parent; |
d859e29fe
|
4328 |
|
cdd6c482c
|
4329 4330 4331 |
child_event = perf_event_alloc(&parent_event->attr, parent_event->cpu, child_ctx, group_leader, parent_event, |
b84fbc9fb
|
4332 |
GFP_KERNEL); |
cdd6c482c
|
4333 4334 |
if (IS_ERR(child_event)) return child_event; |
c93f76690
|
4335 |
get_ctx(child_ctx); |
9b51f66dc
|
4336 4337 |
/* |
cdd6c482c
|
4338 |
* Make the child state follow the state of the parent event, |
0d48696f8
|
4339 |
* not its attr.disabled bit. We hold the parent's mutex, |
cdd6c482c
|
4340 |
* so we won't race with perf_event_{en, dis}able_family. |
564c2b210
|
4341 |
*/ |
cdd6c482c
|
4342 4343 |
if (parent_event->state >= PERF_EVENT_STATE_INACTIVE) child_event->state = PERF_EVENT_STATE_INACTIVE; |
564c2b210
|
4344 |
else |
cdd6c482c
|
4345 |
child_event->state = PERF_EVENT_STATE_OFF; |
564c2b210
|
4346 |
|
cdd6c482c
|
4347 4348 |
if (parent_event->attr.freq) child_event->hw.sample_period = parent_event->hw.sample_period; |
bd2b5b128
|
4349 |
|
564c2b210
|
4350 |
/* |
9b51f66dc
|
4351 4352 |
* Link it up in the child's context: */ |
cdd6c482c
|
4353 |
add_event_to_ctx(child_event, child_ctx); |
9b51f66dc
|
4354 |
|
9b51f66dc
|
4355 4356 |
/* * Get a reference to the parent filp - we will fput it |
cdd6c482c
|
4357 |
* when the child event exits. This is safe to do because |
9b51f66dc
|
4358 4359 4360 |
* we are in the parent and we know that the filp still * exists and has a nonzero count: */ |
cdd6c482c
|
4361 |
atomic_long_inc(&parent_event->filp->f_count); |
9b51f66dc
|
4362 |
|
d859e29fe
|
4363 |
/* |
cdd6c482c
|
4364 |
* Link this into the parent event's child list |
d859e29fe
|
4365 |
*/ |
cdd6c482c
|
4366 4367 4368 4369 |
WARN_ON_ONCE(parent_event->ctx->parent_ctx); mutex_lock(&parent_event->child_mutex); list_add_tail(&child_event->child_list, &parent_event->child_list); mutex_unlock(&parent_event->child_mutex); |
d859e29fe
|
4370 |
|
cdd6c482c
|
4371 |
return child_event; |
d859e29fe
|
4372 |
} |
cdd6c482c
|
4373 |
static int inherit_group(struct perf_event *parent_event, |
d859e29fe
|
4374 |
struct task_struct *parent, |
cdd6c482c
|
4375 |
struct perf_event_context *parent_ctx, |
d859e29fe
|
4376 |
struct task_struct *child, |
cdd6c482c
|
4377 |
struct perf_event_context *child_ctx) |
d859e29fe
|
4378 |
{ |
cdd6c482c
|
4379 4380 4381 |
struct perf_event *leader; struct perf_event *sub; struct perf_event *child_ctr; |
d859e29fe
|
4382 |
|
cdd6c482c
|
4383 |
leader = inherit_event(parent_event, parent, parent_ctx, |
d859e29fe
|
4384 |
child, NULL, child_ctx); |
d5d2bc0dd
|
4385 4386 |
if (IS_ERR(leader)) return PTR_ERR(leader); |
cdd6c482c
|
4387 4388 |
list_for_each_entry(sub, &parent_event->sibling_list, group_entry) { child_ctr = inherit_event(sub, parent, parent_ctx, |
d5d2bc0dd
|
4389 4390 4391 |
child, leader, child_ctx); if (IS_ERR(child_ctr)) return PTR_ERR(child_ctr); |
d859e29fe
|
4392 |
} |
9b51f66dc
|
4393 4394 |
return 0; } |
cdd6c482c
|
4395 |
static void sync_child_event(struct perf_event *child_event, |
38b200d67
|
4396 |
struct task_struct *child) |
d859e29fe
|
4397 |
{ |
cdd6c482c
|
4398 |
struct perf_event *parent_event = child_event->parent; |
8bc209595
|
4399 |
u64 child_val; |
d859e29fe
|
4400 |
|
cdd6c482c
|
4401 4402 |
if (child_event->attr.inherit_stat) perf_event_read_event(child_event, child); |
38b200d67
|
4403 |
|
cdd6c482c
|
4404 |
child_val = atomic64_read(&child_event->count); |
d859e29fe
|
4405 4406 4407 4408 |
/* * Add back the child's count to the parent's count: */ |
cdd6c482c
|
4409 4410 4411 4412 4413 |
atomic64_add(child_val, &parent_event->count); atomic64_add(child_event->total_time_enabled, &parent_event->child_total_time_enabled); atomic64_add(child_event->total_time_running, &parent_event->child_total_time_running); |
d859e29fe
|
4414 4415 |
/* |
cdd6c482c
|
4416 |
* Remove this event from the parent's list |
d859e29fe
|
4417 |
*/ |
cdd6c482c
|
4418 4419 4420 4421 |
WARN_ON_ONCE(parent_event->ctx->parent_ctx); mutex_lock(&parent_event->child_mutex); list_del_init(&child_event->child_list); mutex_unlock(&parent_event->child_mutex); |
d859e29fe
|
4422 4423 |
/* |
cdd6c482c
|
4424 |
* Release the parent event, if this was the last |
d859e29fe
|
4425 4426 |
* reference to it. */ |
cdd6c482c
|
4427 |
fput(parent_event->filp); |
d859e29fe
|
4428 |
} |
9b51f66dc
|
4429 |
static void |
cdd6c482c
|
4430 4431 |
__perf_event_exit_task(struct perf_event *child_event, struct perf_event_context *child_ctx, |
38b200d67
|
4432 |
struct task_struct *child) |
9b51f66dc
|
4433 |
{ |
cdd6c482c
|
4434 |
struct perf_event *parent_event; |
9b51f66dc
|
4435 |
|
cdd6c482c
|
4436 4437 |
update_event_times(child_event); perf_event_remove_from_context(child_event); |
0cc0c027d
|
4438 |
|
cdd6c482c
|
4439 |
parent_event = child_event->parent; |
9b51f66dc
|
4440 |
/* |
cdd6c482c
|
4441 |
* It can happen that parent exits first, and has events |
9b51f66dc
|
4442 |
* that are still around due to the child reference. These |
cdd6c482c
|
4443 |
* events need to be zapped - but otherwise linger. |
9b51f66dc
|
4444 |
*/ |
cdd6c482c
|
4445 4446 4447 |
if (parent_event) { sync_child_event(child_event, child); free_event(child_event); |
4bcf349a0
|
4448 |
} |
9b51f66dc
|
4449 4450 4451 |
} /* |
cdd6c482c
|
4452 |
* When a child task exits, feed back event values to parent events. |
9b51f66dc
|
4453 |
*/ |
cdd6c482c
|
4454 |
void perf_event_exit_task(struct task_struct *child) |
9b51f66dc
|
4455 |
{ |
cdd6c482c
|
4456 4457 |
struct perf_event *child_event, *tmp; struct perf_event_context *child_ctx; |
a63eaf34a
|
4458 |
unsigned long flags; |
9b51f66dc
|
4459 |
|
cdd6c482c
|
4460 4461 |
if (likely(!child->perf_event_ctxp)) { perf_event_task(child, NULL, 0); |
9b51f66dc
|
4462 |
return; |
9f498cc5b
|
4463 |
} |
9b51f66dc
|
4464 |
|
a63eaf34a
|
4465 |
local_irq_save(flags); |
ad3a37de8
|
4466 4467 4468 4469 4470 4471 |
/* * We can't reschedule here because interrupts are disabled, * and either child is current or it is a task that can't be * scheduled, so we are now safe from rescheduling changing * our context. */ |
cdd6c482c
|
4472 4473 |
child_ctx = child->perf_event_ctxp; __perf_event_task_sched_out(child_ctx); |
c93f76690
|
4474 4475 4476 |
/* * Take the context lock here so that if find_get_context is |
cdd6c482c
|
4477 |
* reading child->perf_event_ctxp, we wait until it has |
c93f76690
|
4478 4479 4480 |
* incremented the context's refcount before we do put_ctx below. */ spin_lock(&child_ctx->lock); |
cdd6c482c
|
4481 |
child->perf_event_ctxp = NULL; |
71a851b4d
|
4482 4483 4484 |
/* * If this context is a clone; unclone it so it can't get * swapped to another process while we're removing all |
cdd6c482c
|
4485 |
* the events from it. |
71a851b4d
|
4486 4487 |
*/ unclone_ctx(child_ctx); |
9f498cc5b
|
4488 4489 4490 |
spin_unlock_irqrestore(&child_ctx->lock, flags); /* |
cdd6c482c
|
4491 4492 4493 |
* Report the task dead after unscheduling the events so that we * won't get any samples after PERF_RECORD_EXIT. We can however still * get a few PERF_RECORD_READ events. |
9f498cc5b
|
4494 |
*/ |
cdd6c482c
|
4495 |
perf_event_task(child, child_ctx, 0); |
a63eaf34a
|
4496 |
|
66fff2248
|
4497 4498 4499 |
/* * We can recurse on the same lock type through: * |
cdd6c482c
|
4500 4501 4502 |
* __perf_event_exit_task() * sync_child_event() * fput(parent_event->filp) |
66fff2248
|
4503 4504 4505 4506 4507 4508 |
* perf_release() * mutex_lock(&ctx->mutex) * * But since its the parent context it won't be the same instance. */ mutex_lock_nested(&child_ctx->mutex, SINGLE_DEPTH_NESTING); |
a63eaf34a
|
4509 |
|
8bc209595
|
4510 |
again: |
cdd6c482c
|
4511 |
list_for_each_entry_safe(child_event, tmp, &child_ctx->group_list, |
65abc8653
|
4512 |
group_entry) |
cdd6c482c
|
4513 |
__perf_event_exit_task(child_event, child_ctx, child); |
8bc209595
|
4514 4515 |
/* |
cdd6c482c
|
4516 |
* If the last event was a group event, it will have appended all |
8bc209595
|
4517 4518 4519 |
* its siblings to the list, but we obtained 'tmp' before that which * will still point to the list head terminating the iteration. */ |
65abc8653
|
4520 |
if (!list_empty(&child_ctx->group_list)) |
8bc209595
|
4521 |
goto again; |
a63eaf34a
|
4522 4523 4524 4525 |
mutex_unlock(&child_ctx->mutex); put_ctx(child_ctx); |
9b51f66dc
|
4526 4527 4528 |
} /* |
bbbee9082
|
4529 4530 4531 |
* free an unexposed, unused context as created by inheritance by * init_task below, used by fork() in case of fail. */ |
cdd6c482c
|
4532 |
void perf_event_free_task(struct task_struct *task) |
bbbee9082
|
4533 |
{ |
cdd6c482c
|
4534 4535 |
struct perf_event_context *ctx = task->perf_event_ctxp; struct perf_event *event, *tmp; |
bbbee9082
|
4536 4537 4538 4539 4540 4541 |
if (!ctx) return; mutex_lock(&ctx->mutex); again: |
cdd6c482c
|
4542 4543 |
list_for_each_entry_safe(event, tmp, &ctx->group_list, group_entry) { struct perf_event *parent = event->parent; |
bbbee9082
|
4544 4545 4546 4547 4548 |
if (WARN_ON_ONCE(!parent)) continue; mutex_lock(&parent->child_mutex); |
cdd6c482c
|
4549 |
list_del_init(&event->child_list); |
bbbee9082
|
4550 4551 4552 |
mutex_unlock(&parent->child_mutex); fput(parent->filp); |
cdd6c482c
|
4553 4554 |
list_del_event(event, ctx); free_event(event); |
bbbee9082
|
4555 |
} |
65abc8653
|
4556 |
if (!list_empty(&ctx->group_list)) |
bbbee9082
|
4557 4558 4559 4560 4561 4562 4563 4564 |
goto again; mutex_unlock(&ctx->mutex); put_ctx(ctx); } /* |
cdd6c482c
|
4565 |
* Initialize the perf_event context in task_struct |
9b51f66dc
|
4566 |
*/ |
cdd6c482c
|
4567 |
int perf_event_init_task(struct task_struct *child) |
9b51f66dc
|
4568 |
{ |
cdd6c482c
|
4569 4570 4571 |
struct perf_event_context *child_ctx, *parent_ctx; struct perf_event_context *cloned_ctx; struct perf_event *event; |
9b51f66dc
|
4572 |
struct task_struct *parent = current; |
564c2b210
|
4573 |
int inherited_all = 1; |
6ab423e0e
|
4574 |
int ret = 0; |
9b51f66dc
|
4575 |
|
cdd6c482c
|
4576 |
child->perf_event_ctxp = NULL; |
9b51f66dc
|
4577 |
|
cdd6c482c
|
4578 4579 |
mutex_init(&child->perf_event_mutex); INIT_LIST_HEAD(&child->perf_event_list); |
082ff5a27
|
4580 |
|
cdd6c482c
|
4581 |
if (likely(!parent->perf_event_ctxp)) |
6ab423e0e
|
4582 |
return 0; |
9b51f66dc
|
4583 4584 |
/* * This is executed from the parent task context, so inherit |
cdd6c482c
|
4585 |
* events that have been marked for cloning. |
a63eaf34a
|
4586 |
* First allocate and initialize a context for the child. |
9b51f66dc
|
4587 |
*/ |
cdd6c482c
|
4588 |
child_ctx = kmalloc(sizeof(struct perf_event_context), GFP_KERNEL); |
a63eaf34a
|
4589 |
if (!child_ctx) |
6ab423e0e
|
4590 |
return -ENOMEM; |
9b51f66dc
|
4591 |
|
cdd6c482c
|
4592 4593 |
__perf_event_init_context(child_ctx, child); child->perf_event_ctxp = child_ctx; |
c93f76690
|
4594 |
get_task_struct(child); |
a63eaf34a
|
4595 |
|
9b51f66dc
|
4596 |
/* |
25346b93c
|
4597 4598 |
* If the parent's context is a clone, pin it so it won't get * swapped under us. |
ad3a37de8
|
4599 |
*/ |
25346b93c
|
4600 |
parent_ctx = perf_pin_task_context(parent); |
ad3a37de8
|
4601 4602 4603 4604 4605 4606 |
/* * No need to check if parent_ctx != NULL here; since we saw * it non-NULL earlier, the only reason for it to become NULL * is if we exit, and since we're currently in the middle of * a fork we can't be exiting at the same time. */ |
ad3a37de8
|
4607 4608 |
/* |
9b51f66dc
|
4609 4610 4611 |
* Lock the parent list. No need to lock the child - not PID * hashed yet and not running, so nobody can access it. */ |
d859e29fe
|
4612 |
mutex_lock(&parent_ctx->mutex); |
9b51f66dc
|
4613 4614 4615 4616 4617 |
/* * We dont have to disable NMIs - we are only looking at * the list, not manipulating it: */ |
27f9994c5
|
4618 |
list_for_each_entry(event, &parent_ctx->group_list, group_entry) { |
d7b629a34
|
4619 |
|
cdd6c482c
|
4620 |
if (!event->attr.inherit) { |
564c2b210
|
4621 |
inherited_all = 0; |
9b51f66dc
|
4622 |
continue; |
564c2b210
|
4623 |
} |
9b51f66dc
|
4624 |
|
cdd6c482c
|
4625 |
ret = inherit_group(event, parent, parent_ctx, |
6ab423e0e
|
4626 4627 |
child, child_ctx); if (ret) { |
564c2b210
|
4628 |
inherited_all = 0; |
9b51f66dc
|
4629 |
break; |
564c2b210
|
4630 4631 4632 4633 4634 4635 4636 |
} } if (inherited_all) { /* * Mark the child context as a clone of the parent * context, or of whatever the parent is a clone of. |
ad3a37de8
|
4637 4638 |
* Note that if the parent is a clone, it could get * uncloned at any point, but that doesn't matter |
cdd6c482c
|
4639 |
* because the list of events and the generation |
ad3a37de8
|
4640 |
* count can't have changed since we took the mutex. |
564c2b210
|
4641 |
*/ |
ad3a37de8
|
4642 4643 4644 |
cloned_ctx = rcu_dereference(parent_ctx->parent_ctx); if (cloned_ctx) { child_ctx->parent_ctx = cloned_ctx; |
25346b93c
|
4645 |
child_ctx->parent_gen = parent_ctx->parent_gen; |
564c2b210
|
4646 4647 4648 4649 4650 |
} else { child_ctx->parent_ctx = parent_ctx; child_ctx->parent_gen = parent_ctx->generation; } get_ctx(child_ctx->parent_ctx); |
9b51f66dc
|
4651 |
} |
d859e29fe
|
4652 |
mutex_unlock(&parent_ctx->mutex); |
6ab423e0e
|
4653 |
|
25346b93c
|
4654 |
perf_unpin_context(parent_ctx); |
ad3a37de8
|
4655 |
|
6ab423e0e
|
4656 |
return ret; |
9b51f66dc
|
4657 |
} |
cdd6c482c
|
4658 |
static void __cpuinit perf_event_init_cpu(int cpu) |
0793a61d4
|
4659 |
{ |
04289bb98
|
4660 |
struct perf_cpu_context *cpuctx; |
0793a61d4
|
4661 |
|
04289bb98
|
4662 |
cpuctx = &per_cpu(perf_cpu_context, cpu); |
cdd6c482c
|
4663 |
__perf_event_init_context(&cpuctx->ctx, NULL); |
0793a61d4
|
4664 |
|
1dce8d99b
|
4665 |
spin_lock(&perf_resource_lock); |
cdd6c482c
|
4666 |
cpuctx->max_pertask = perf_max_events - perf_reserved_percpu; |
1dce8d99b
|
4667 |
spin_unlock(&perf_resource_lock); |
04289bb98
|
4668 |
|
cdd6c482c
|
4669 |
hw_perf_event_setup(cpu); |
0793a61d4
|
4670 4671 4672 |
} #ifdef CONFIG_HOTPLUG_CPU |
cdd6c482c
|
4673 |
static void __perf_event_exit_cpu(void *info) |
0793a61d4
|
4674 4675 |
{ struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); |
cdd6c482c
|
4676 4677 |
struct perf_event_context *ctx = &cpuctx->ctx; struct perf_event *event, *tmp; |
0793a61d4
|
4678 |
|
cdd6c482c
|
4679 4680 |
list_for_each_entry_safe(event, tmp, &ctx->group_list, group_entry) __perf_event_remove_from_context(event); |
0793a61d4
|
4681 |
} |
cdd6c482c
|
4682 |
static void perf_event_exit_cpu(int cpu) |
0793a61d4
|
4683 |
{ |
d859e29fe
|
4684 |
struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu); |
cdd6c482c
|
4685 |
struct perf_event_context *ctx = &cpuctx->ctx; |
d859e29fe
|
4686 4687 |
mutex_lock(&ctx->mutex); |
cdd6c482c
|
4688 |
smp_call_function_single(cpu, __perf_event_exit_cpu, NULL, 1); |
d859e29fe
|
4689 |
mutex_unlock(&ctx->mutex); |
0793a61d4
|
4690 4691 |
} #else |
cdd6c482c
|
4692 |
static inline void perf_event_exit_cpu(int cpu) { } |
0793a61d4
|
4693 4694 4695 4696 4697 4698 4699 4700 4701 4702 4703 |
#endif static int __cpuinit perf_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) { unsigned int cpu = (long)hcpu; switch (action) { case CPU_UP_PREPARE: case CPU_UP_PREPARE_FROZEN: |
cdd6c482c
|
4704 |
perf_event_init_cpu(cpu); |
0793a61d4
|
4705 |
break; |
28402971d
|
4706 4707 |
case CPU_ONLINE: case CPU_ONLINE_FROZEN: |
cdd6c482c
|
4708 |
hw_perf_event_setup_online(cpu); |
28402971d
|
4709 |
break; |
0793a61d4
|
4710 4711 |
case CPU_DOWN_PREPARE: case CPU_DOWN_PREPARE_FROZEN: |
cdd6c482c
|
4712 |
perf_event_exit_cpu(cpu); |
0793a61d4
|
4713 4714 4715 4716 4717 4718 4719 4720 |
break; default: break; } return NOTIFY_OK; } |
f38b08208
|
4721 4722 4723 |
/* * This has to have a higher priority than migration_notifier in sched.c. */ |
0793a61d4
|
4724 4725 |
static struct notifier_block __cpuinitdata perf_cpu_nb = { .notifier_call = perf_cpu_notify, |
f38b08208
|
4726 |
.priority = 20, |
0793a61d4
|
4727 |
}; |
cdd6c482c
|
4728 |
void __init perf_event_init(void) |
0793a61d4
|
4729 4730 4731 |
{ perf_cpu_notify(&perf_cpu_nb, (unsigned long)CPU_UP_PREPARE, (void *)(long)smp_processor_id()); |
28402971d
|
4732 4733 |
perf_cpu_notify(&perf_cpu_nb, (unsigned long)CPU_ONLINE, (void *)(long)smp_processor_id()); |
0793a61d4
|
4734 |
register_cpu_notifier(&perf_cpu_nb); |
0793a61d4
|
4735 |
} |
0793a61d4
|
4736 4737 4738 4739 4740 4741 4742 4743 4744 4745 4746 4747 4748 4749 4750 4751 4752 4753 4754 |
static ssize_t perf_show_reserve_percpu(struct sysdev_class *class, char *buf) { return sprintf(buf, "%d ", perf_reserved_percpu); } static ssize_t perf_set_reserve_percpu(struct sysdev_class *class, const char *buf, size_t count) { struct perf_cpu_context *cpuctx; unsigned long val; int err, cpu, mpt; err = strict_strtoul(buf, 10, &val); if (err) return err; |
cdd6c482c
|
4755 |
if (val > perf_max_events) |
0793a61d4
|
4756 |
return -EINVAL; |
1dce8d99b
|
4757 |
spin_lock(&perf_resource_lock); |
0793a61d4
|
4758 4759 4760 4761 |
perf_reserved_percpu = val; for_each_online_cpu(cpu) { cpuctx = &per_cpu(perf_cpu_context, cpu); spin_lock_irq(&cpuctx->ctx.lock); |
cdd6c482c
|
4762 4763 |
mpt = min(perf_max_events - cpuctx->ctx.nr_events, perf_max_events - perf_reserved_percpu); |
0793a61d4
|
4764 4765 4766 |
cpuctx->max_pertask = mpt; spin_unlock_irq(&cpuctx->ctx.lock); } |
1dce8d99b
|
4767 |
spin_unlock(&perf_resource_lock); |
0793a61d4
|
4768 4769 4770 4771 4772 4773 4774 4775 4776 4777 4778 4779 4780 4781 4782 4783 4784 4785 4786 4787 4788 |
return count; } static ssize_t perf_show_overcommit(struct sysdev_class *class, char *buf) { return sprintf(buf, "%d ", perf_overcommit); } static ssize_t perf_set_overcommit(struct sysdev_class *class, const char *buf, size_t count) { unsigned long val; int err; err = strict_strtoul(buf, 10, &val); if (err) return err; if (val > 1) return -EINVAL; |
1dce8d99b
|
4789 |
spin_lock(&perf_resource_lock); |
0793a61d4
|
4790 |
perf_overcommit = val; |
1dce8d99b
|
4791 |
spin_unlock(&perf_resource_lock); |
0793a61d4
|
4792 4793 4794 4795 4796 4797 4798 4799 4800 4801 4802 4803 4804 4805 4806 4807 4808 4809 4810 4811 4812 4813 4814 4815 4816 4817 |
return count; } static SYSDEV_CLASS_ATTR( reserve_percpu, 0644, perf_show_reserve_percpu, perf_set_reserve_percpu ); static SYSDEV_CLASS_ATTR( overcommit, 0644, perf_show_overcommit, perf_set_overcommit ); static struct attribute *perfclass_attrs[] = { &attr_reserve_percpu.attr, &attr_overcommit.attr, NULL }; static struct attribute_group perfclass_attr_group = { .attrs = perfclass_attrs, |
cdd6c482c
|
4818 |
.name = "perf_events", |
0793a61d4
|
4819 |
}; |
cdd6c482c
|
4820 |
static int __init perf_event_sysfs_init(void) |
0793a61d4
|
4821 4822 4823 4824 |
{ return sysfs_create_group(&cpu_sysdev_class.kset.kobj, &perfclass_attr_group); } |
cdd6c482c
|
4825 |
device_initcall(perf_event_sysfs_init); |