Blame view
kernel/trace/bpf_trace.c
13.3 KB
2541517c3 tracing, perf: Im... |
1 2 3 4 5 6 7 8 9 10 11 12 |
/* Copyright (c) 2011-2015 PLUMgrid, http://plumgrid.com * * This program is free software; you can redistribute it and/or * modify it under the terms of version 2 of the GNU General Public * License as published by the Free Software Foundation. */ #include <linux/kernel.h> #include <linux/types.h> #include <linux/slab.h> #include <linux/bpf.h> #include <linux/filter.h> #include <linux/uaccess.h> |
9c959c863 tracing: Allow BP... |
13 |
#include <linux/ctype.h> |
2541517c3 tracing, perf: Im... |
14 |
#include "trace.h" |
2541517c3 tracing, perf: Im... |
15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 |
/** * trace_call_bpf - invoke BPF program * @prog: BPF program * @ctx: opaque context pointer * * kprobe handlers execute BPF programs via this helper. * Can be used from static tracepoints in the future. * * Return: BPF programs always return an integer which is interpreted by * kprobe handler as: * 0 - return from kprobe (event is filtered out) * 1 - store kprobe event into ring buffer * Other values are reserved and currently alias to 1 */ unsigned int trace_call_bpf(struct bpf_prog *prog, void *ctx) { unsigned int ret; if (in_nmi()) /* not supported yet */ return 1; preempt_disable(); if (unlikely(__this_cpu_inc_return(bpf_prog_active) != 1)) { /* * since some bpf program is already running on this cpu, * don't call into another bpf program (same or different) * and don't send kprobe event into ring-buffer, * so return zero here */ ret = 0; goto out; } rcu_read_lock(); ret = BPF_PROG_RUN(prog, ctx); rcu_read_unlock(); out: __this_cpu_dec(bpf_prog_active); preempt_enable(); return ret; } EXPORT_SYMBOL_GPL(trace_call_bpf); static u64 bpf_probe_read(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) { void *dst = (void *) (long) r1; |
074f528ee bpf: convert rele... |
64 |
int ret, size = (int) r2; |
2541517c3 tracing, perf: Im... |
65 |
void *unsafe_ptr = (void *) (long) r3; |
074f528ee bpf: convert rele... |
66 67 68 69 70 |
ret = probe_kernel_read(dst, unsafe_ptr, size); if (unlikely(ret < 0)) memset(dst, 0, size); return ret; |
2541517c3 tracing, perf: Im... |
71 72 73 74 75 76 |
} static const struct bpf_func_proto bpf_probe_read_proto = { .func = bpf_probe_read, .gpl_only = true, .ret_type = RET_INTEGER, |
074f528ee bpf: convert rele... |
77 |
.arg1_type = ARG_PTR_TO_RAW_STACK, |
2541517c3 tracing, perf: Im... |
78 79 80 |
.arg2_type = ARG_CONST_STACK_SIZE, .arg3_type = ARG_ANYTHING, }; |
96ae52279 bpf: Add bpf_prob... |
81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 |
static u64 bpf_probe_write_user(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) { void *unsafe_ptr = (void *) (long) r1; void *src = (void *) (long) r2; int size = (int) r3; /* * Ensure we're in user context which is safe for the helper to * run. This helper has no business in a kthread. * * access_ok() should prevent writing to non-user memory, but in * some situations (nommu, temporary switch, etc) access_ok() does * not provide enough validation, hence the check on KERNEL_DS. */ if (unlikely(in_interrupt() || current->flags & (PF_KTHREAD | PF_EXITING))) return -EPERM; if (unlikely(segment_eq(get_fs(), KERNEL_DS))) return -EPERM; if (!access_ok(VERIFY_WRITE, unsafe_ptr, size)) return -EPERM; return probe_kernel_write(unsafe_ptr, src, size); } static const struct bpf_func_proto bpf_probe_write_user_proto = { .func = bpf_probe_write_user, .gpl_only = true, .ret_type = RET_INTEGER, .arg1_type = ARG_ANYTHING, .arg2_type = ARG_PTR_TO_STACK, .arg3_type = ARG_CONST_STACK_SIZE, }; static const struct bpf_func_proto *bpf_get_probe_write_proto(void) { pr_warn_ratelimited("%s[%d] is installing a program with bpf_probe_write_user helper that may corrupt user memory!", current->comm, task_pid_nr(current)); return &bpf_probe_write_user_proto; } |
9c959c863 tracing: Allow BP... |
123 124 |
/* * limited trace_printk() |
8d3b7dce8 bpf: add support ... |
125 |
* only %d %u %x %ld %lu %lx %lld %llu %llx %p %s conversion specifiers allowed |
9c959c863 tracing: Allow BP... |
126 127 128 129 |
*/ static u64 bpf_trace_printk(u64 r1, u64 fmt_size, u64 r3, u64 r4, u64 r5) { char *fmt = (char *) (long) r1; |
8d3b7dce8 bpf: add support ... |
130 |
bool str_seen = false; |
9c959c863 tracing: Allow BP... |
131 132 |
int mod[3] = {}; int fmt_cnt = 0; |
8d3b7dce8 bpf: add support ... |
133 134 |
u64 unsafe_addr; char buf[64]; |
9c959c863 tracing: Allow BP... |
135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 |
int i; /* * bpf_check()->check_func_arg()->check_stack_boundary() * guarantees that fmt points to bpf program stack, * fmt_size bytes of it were initialized and fmt_size > 0 */ if (fmt[--fmt_size] != 0) return -EINVAL; /* check format string for allowed specifiers */ for (i = 0; i < fmt_size; i++) { if ((!isprint(fmt[i]) && !isspace(fmt[i])) || !isascii(fmt[i])) return -EINVAL; if (fmt[i] != '%') continue; if (fmt_cnt >= 3) return -EINVAL; /* fmt[i] != 0 && fmt[last] == 0, so we can access fmt[i + 1] */ i++; if (fmt[i] == 'l') { mod[fmt_cnt]++; i++; |
8d3b7dce8 bpf: add support ... |
161 |
} else if (fmt[i] == 'p' || fmt[i] == 's') { |
9c959c863 tracing: Allow BP... |
162 163 164 165 166 |
mod[fmt_cnt]++; i++; if (!isspace(fmt[i]) && !ispunct(fmt[i]) && fmt[i] != 0) return -EINVAL; fmt_cnt++; |
8d3b7dce8 bpf: add support ... |
167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 |
if (fmt[i - 1] == 's') { if (str_seen) /* allow only one '%s' per fmt string */ return -EINVAL; str_seen = true; switch (fmt_cnt) { case 1: unsafe_addr = r3; r3 = (long) buf; break; case 2: unsafe_addr = r4; r4 = (long) buf; break; case 3: unsafe_addr = r5; r5 = (long) buf; break; } buf[0] = 0; strncpy_from_unsafe(buf, (void *) (long) unsafe_addr, sizeof(buf)); } |
9c959c863 tracing: Allow BP... |
192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 |
continue; } if (fmt[i] == 'l') { mod[fmt_cnt]++; i++; } if (fmt[i] != 'd' && fmt[i] != 'u' && fmt[i] != 'x') return -EINVAL; fmt_cnt++; } return __trace_printk(1/* fake ip will not be printed */, fmt, mod[0] == 2 ? r3 : mod[0] == 1 ? (long) r3 : (u32) r3, mod[1] == 2 ? r4 : mod[1] == 1 ? (long) r4 : (u32) r4, mod[2] == 2 ? r5 : mod[2] == 1 ? (long) r5 : (u32) r5); } static const struct bpf_func_proto bpf_trace_printk_proto = { .func = bpf_trace_printk, .gpl_only = true, .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_STACK, .arg2_type = ARG_CONST_STACK_SIZE, }; |
0756ea3e8 bpf: allow networ... |
218 219 220 221 222 223 224 225 226 227 |
const struct bpf_func_proto *bpf_get_trace_printk_proto(void) { /* * this program might be calling bpf_trace_printk, * so allocate per-cpu printk buffers */ trace_printk_init_buffers(); return &bpf_trace_printk_proto; } |
6816a7ffc bpf, trace: add B... |
228 |
static u64 bpf_perf_event_read(u64 r1, u64 flags, u64 r3, u64 r4, u64 r5) |
35578d798 bpf: Implement fu... |
229 230 231 |
{ struct bpf_map *map = (struct bpf_map *) (unsigned long) r1; struct bpf_array *array = container_of(map, struct bpf_array, map); |
6816a7ffc bpf, trace: add B... |
232 233 |
unsigned int cpu = smp_processor_id(); u64 index = flags & BPF_F_INDEX_MASK; |
3b1efb196 bpf, maps: flush ... |
234 |
struct bpf_event_entry *ee; |
35578d798 bpf: Implement fu... |
235 |
struct perf_event *event; |
6816a7ffc bpf, trace: add B... |
236 237 238 239 |
if (unlikely(flags & ~(BPF_F_INDEX_MASK))) return -EINVAL; if (index == BPF_F_CURRENT_CPU) index = cpu; |
35578d798 bpf: Implement fu... |
240 241 |
if (unlikely(index >= array->map.max_entries)) return -E2BIG; |
3b1efb196 bpf, maps: flush ... |
242 |
ee = READ_ONCE(array->ptrs[index]); |
1ca1cc98b bpf: minor cleanu... |
243 |
if (!ee) |
35578d798 bpf: Implement fu... |
244 |
return -ENOENT; |
3b1efb196 bpf, maps: flush ... |
245 |
event = ee->event; |
1ca1cc98b bpf: minor cleanu... |
246 247 248 |
if (unlikely(event->attr.type != PERF_TYPE_HARDWARE && event->attr.type != PERF_TYPE_RAW)) return -EINVAL; |
62544ce8e bpf: fix bpf_perf... |
249 |
/* make sure event is local and doesn't have pmu::count */ |
6816a7ffc bpf, trace: add B... |
250 |
if (unlikely(event->oncpu != cpu || event->pmu->count)) |
62544ce8e bpf: fix bpf_perf... |
251 |
return -EINVAL; |
35578d798 bpf: Implement fu... |
252 253 254 255 256 257 258 |
/* * we don't know if the function is run successfully by the * return value. It can be judged in other places, such as * eBPF programs. */ return perf_event_read_local(event); } |
62544ce8e bpf: fix bpf_perf... |
259 |
static const struct bpf_func_proto bpf_perf_event_read_proto = { |
35578d798 bpf: Implement fu... |
260 |
.func = bpf_perf_event_read, |
1075ef595 bpf: make tracing... |
261 |
.gpl_only = true, |
35578d798 bpf: Implement fu... |
262 263 264 265 |
.ret_type = RET_INTEGER, .arg1_type = ARG_CONST_MAP_PTR, .arg2_type = ARG_ANYTHING, }; |
8e7a3920a bpf, perf: split ... |
266 267 268 |
static __always_inline u64 __bpf_perf_event_output(struct pt_regs *regs, struct bpf_map *map, u64 flags, struct perf_raw_record *raw) |
a43eec304 bpf: introduce bp... |
269 |
{ |
a43eec304 bpf: introduce bp... |
270 |
struct bpf_array *array = container_of(map, struct bpf_array, map); |
d79313303 bpf, trace: fetch... |
271 |
unsigned int cpu = smp_processor_id(); |
1e33759c7 bpf, trace: add B... |
272 |
u64 index = flags & BPF_F_INDEX_MASK; |
a43eec304 bpf: introduce bp... |
273 |
struct perf_sample_data sample_data; |
3b1efb196 bpf, maps: flush ... |
274 |
struct bpf_event_entry *ee; |
a43eec304 bpf: introduce bp... |
275 |
struct perf_event *event; |
a43eec304 bpf: introduce bp... |
276 |
|
1e33759c7 bpf, trace: add B... |
277 |
if (index == BPF_F_CURRENT_CPU) |
d79313303 bpf, trace: fetch... |
278 |
index = cpu; |
a43eec304 bpf: introduce bp... |
279 280 |
if (unlikely(index >= array->map.max_entries)) return -E2BIG; |
3b1efb196 bpf, maps: flush ... |
281 |
ee = READ_ONCE(array->ptrs[index]); |
1ca1cc98b bpf: minor cleanu... |
282 |
if (!ee) |
a43eec304 bpf: introduce bp... |
283 |
return -ENOENT; |
3b1efb196 bpf, maps: flush ... |
284 |
event = ee->event; |
a43eec304 bpf: introduce bp... |
285 286 287 |
if (unlikely(event->attr.type != PERF_TYPE_SOFTWARE || event->attr.config != PERF_COUNT_SW_BPF_OUTPUT)) return -EINVAL; |
d79313303 bpf, trace: fetch... |
288 |
if (unlikely(event->oncpu != cpu)) |
a43eec304 bpf: introduce bp... |
289 290 291 |
return -EOPNOTSUPP; perf_sample_data_init(&sample_data, 0, 0); |
8e7a3920a bpf, perf: split ... |
292 |
sample_data.raw = raw; |
a43eec304 bpf: introduce bp... |
293 294 295 |
perf_event_output(event, &sample_data, regs); return 0; } |
8e7a3920a bpf, perf: split ... |
296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 |
static u64 bpf_perf_event_output(u64 r1, u64 r2, u64 flags, u64 r4, u64 size) { struct pt_regs *regs = (struct pt_regs *)(long) r1; struct bpf_map *map = (struct bpf_map *)(long) r2; void *data = (void *)(long) r4; struct perf_raw_record raw = { .frag = { .size = size, .data = data, }, }; if (unlikely(flags & ~(BPF_F_INDEX_MASK))) return -EINVAL; return __bpf_perf_event_output(regs, map, flags, &raw); } |
a43eec304 bpf: introduce bp... |
313 314 |
static const struct bpf_func_proto bpf_perf_event_output_proto = { .func = bpf_perf_event_output, |
1075ef595 bpf: make tracing... |
315 |
.gpl_only = true, |
a43eec304 bpf: introduce bp... |
316 317 318 319 320 321 322 |
.ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_CTX, .arg2_type = ARG_CONST_MAP_PTR, .arg3_type = ARG_ANYTHING, .arg4_type = ARG_PTR_TO_STACK, .arg5_type = ARG_CONST_STACK_SIZE, }; |
bd570ff97 bpf: add event ou... |
323 |
static DEFINE_PER_CPU(struct pt_regs, bpf_pt_regs); |
555c8a862 bpf: avoid stack ... |
324 325 |
u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size, void *ctx, u64 ctx_size, bpf_ctx_copy_t ctx_copy) |
bd570ff97 bpf: add event ou... |
326 327 |
{ struct pt_regs *regs = this_cpu_ptr(&bpf_pt_regs); |
555c8a862 bpf: avoid stack ... |
328 329 330 331 332 333 334 |
struct perf_raw_frag frag = { .copy = ctx_copy, .size = ctx_size, .data = ctx, }; struct perf_raw_record raw = { .frag = { |
183fc1537 kernel/trace/bpf_... |
335 336 337 |
{ .next = ctx_size ? &frag : NULL, }, |
555c8a862 bpf: avoid stack ... |
338 339 340 341 |
.size = meta_size, .data = meta, }, }; |
bd570ff97 bpf: add event ou... |
342 343 |
perf_fetch_caller_regs(regs); |
555c8a862 bpf: avoid stack ... |
344 |
return __bpf_perf_event_output(regs, map, flags, &raw); |
bd570ff97 bpf: add event ou... |
345 |
} |
606274c5a bpf: introduce bp... |
346 347 348 349 350 351 352 353 354 355 |
static u64 bpf_get_current_task(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) { return (long) current; } static const struct bpf_func_proto bpf_get_current_task_proto = { .func = bpf_get_current_task, .gpl_only = true, .ret_type = RET_INTEGER, }; |
9fd82b610 bpf: register BPF... |
356 |
static const struct bpf_func_proto *tracing_func_proto(enum bpf_func_id func_id) |
2541517c3 tracing, perf: Im... |
357 358 359 360 361 362 363 364 365 366 |
{ switch (func_id) { case BPF_FUNC_map_lookup_elem: return &bpf_map_lookup_elem_proto; case BPF_FUNC_map_update_elem: return &bpf_map_update_elem_proto; case BPF_FUNC_map_delete_elem: return &bpf_map_delete_elem_proto; case BPF_FUNC_probe_read: return &bpf_probe_read_proto; |
d9847d310 tracing: Allow BP... |
367 368 |
case BPF_FUNC_ktime_get_ns: return &bpf_ktime_get_ns_proto; |
04fd61ab3 bpf: allow bpf pr... |
369 370 |
case BPF_FUNC_tail_call: return &bpf_tail_call_proto; |
ffeedafbf bpf: introduce cu... |
371 372 |
case BPF_FUNC_get_current_pid_tgid: return &bpf_get_current_pid_tgid_proto; |
606274c5a bpf: introduce bp... |
373 374 |
case BPF_FUNC_get_current_task: return &bpf_get_current_task_proto; |
ffeedafbf bpf: introduce cu... |
375 376 377 378 |
case BPF_FUNC_get_current_uid_gid: return &bpf_get_current_uid_gid_proto; case BPF_FUNC_get_current_comm: return &bpf_get_current_comm_proto; |
9c959c863 tracing: Allow BP... |
379 |
case BPF_FUNC_trace_printk: |
0756ea3e8 bpf: allow networ... |
380 |
return bpf_get_trace_printk_proto(); |
ab1973d32 bpf: let kprobe p... |
381 382 |
case BPF_FUNC_get_smp_processor_id: return &bpf_get_smp_processor_id_proto; |
35578d798 bpf: Implement fu... |
383 384 |
case BPF_FUNC_perf_event_read: return &bpf_perf_event_read_proto; |
96ae52279 bpf: Add bpf_prob... |
385 386 |
case BPF_FUNC_probe_write_user: return bpf_get_probe_write_proto(); |
9fd82b610 bpf: register BPF... |
387 388 389 390 391 392 393 394 |
default: return NULL; } } static const struct bpf_func_proto *kprobe_prog_func_proto(enum bpf_func_id func_id) { switch (func_id) { |
a43eec304 bpf: introduce bp... |
395 396 |
case BPF_FUNC_perf_event_output: return &bpf_perf_event_output_proto; |
d5a3b1f69 bpf: introduce BP... |
397 398 |
case BPF_FUNC_get_stackid: return &bpf_get_stackid_proto; |
2541517c3 tracing, perf: Im... |
399 |
default: |
9fd82b610 bpf: register BPF... |
400 |
return tracing_func_proto(func_id); |
2541517c3 tracing, perf: Im... |
401 402 403 404 |
} } /* bpf+kprobe programs can access fields of 'struct pt_regs' */ |
19de99f70 bpf: fix matching... |
405 406 |
static bool kprobe_prog_is_valid_access(int off, int size, enum bpf_access_type type, enum bpf_reg_type *reg_type) |
2541517c3 tracing, perf: Im... |
407 |
{ |
2541517c3 tracing, perf: Im... |
408 409 |
if (off < 0 || off >= sizeof(struct pt_regs)) return false; |
2541517c3 tracing, perf: Im... |
410 411 |
if (type != BPF_READ) return false; |
2541517c3 tracing, perf: Im... |
412 413 |
if (off % size != 0) return false; |
2541517c3 tracing, perf: Im... |
414 415 |
return true; } |
27dff4e04 bpf: Constify bpf... |
416 |
static const struct bpf_verifier_ops kprobe_prog_ops = { |
2541517c3 tracing, perf: Im... |
417 418 419 420 421 422 423 424 |
.get_func_proto = kprobe_prog_func_proto, .is_valid_access = kprobe_prog_is_valid_access, }; static struct bpf_prog_type_list kprobe_tl = { .ops = &kprobe_prog_ops, .type = BPF_PROG_TYPE_KPROBE, }; |
9940d67c9 bpf: support bpf_... |
425 426 427 428 429 430 431 |
static u64 bpf_perf_event_output_tp(u64 r1, u64 r2, u64 index, u64 r4, u64 size) { /* * r1 points to perf tracepoint buffer where first 8 bytes are hidden * from bpf program and contain a pointer to 'struct pt_regs'. Fetch it * from there and call the same bpf_perf_event_output() helper */ |
266a0a790 bpf: avoid warnin... |
432 |
u64 ctx = *(long *)(uintptr_t)r1; |
9940d67c9 bpf: support bpf_... |
433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 |
return bpf_perf_event_output(ctx, r2, index, r4, size); } static const struct bpf_func_proto bpf_perf_event_output_proto_tp = { .func = bpf_perf_event_output_tp, .gpl_only = true, .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_CTX, .arg2_type = ARG_CONST_MAP_PTR, .arg3_type = ARG_ANYTHING, .arg4_type = ARG_PTR_TO_STACK, .arg5_type = ARG_CONST_STACK_SIZE, }; static u64 bpf_get_stackid_tp(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) { |
266a0a790 bpf: avoid warnin... |
450 |
u64 ctx = *(long *)(uintptr_t)r1; |
9940d67c9 bpf: support bpf_... |
451 452 453 454 455 456 457 458 459 460 461 462 |
return bpf_get_stackid(ctx, r2, r3, r4, r5); } static const struct bpf_func_proto bpf_get_stackid_proto_tp = { .func = bpf_get_stackid_tp, .gpl_only = true, .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_CTX, .arg2_type = ARG_CONST_MAP_PTR, .arg3_type = ARG_ANYTHING, }; |
9fd82b610 bpf: register BPF... |
463 464 465 466 |
static const struct bpf_func_proto *tp_prog_func_proto(enum bpf_func_id func_id) { switch (func_id) { case BPF_FUNC_perf_event_output: |
9940d67c9 bpf: support bpf_... |
467 |
return &bpf_perf_event_output_proto_tp; |
9fd82b610 bpf: register BPF... |
468 |
case BPF_FUNC_get_stackid: |
9940d67c9 bpf: support bpf_... |
469 |
return &bpf_get_stackid_proto_tp; |
9fd82b610 bpf: register BPF... |
470 471 472 473 |
default: return tracing_func_proto(func_id); } } |
19de99f70 bpf: fix matching... |
474 475 |
static bool tp_prog_is_valid_access(int off, int size, enum bpf_access_type type, enum bpf_reg_type *reg_type) |
9fd82b610 bpf: register BPF... |
476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 |
{ if (off < sizeof(void *) || off >= PERF_MAX_TRACE_SIZE) return false; if (type != BPF_READ) return false; if (off % size != 0) return false; return true; } static const struct bpf_verifier_ops tracepoint_prog_ops = { .get_func_proto = tp_prog_func_proto, .is_valid_access = tp_prog_is_valid_access, }; static struct bpf_prog_type_list tracepoint_tl = { .ops = &tracepoint_prog_ops, .type = BPF_PROG_TYPE_TRACEPOINT, }; |
2541517c3 tracing, perf: Im... |
495 496 497 |
static int __init register_kprobe_prog_ops(void) { bpf_register_prog_type(&kprobe_tl); |
9fd82b610 bpf: register BPF... |
498 |
bpf_register_prog_type(&tracepoint_tl); |
2541517c3 tracing, perf: Im... |
499 500 501 |
return 0; } late_initcall(register_kprobe_prog_ops); |