offwaketime_kern.c 3.78 KB
/* Copyright (c) 2016 Facebook
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of version 2 of the GNU General Public
 * License as published by the Free Software Foundation.
 */
#include <uapi/linux/bpf.h>
#include <uapi/linux/ptrace.h>
#include <uapi/linux/perf_event.h>
#include <linux/version.h>
#include <linux/sched.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>

#define _(P)                                                                   \
	({                                                                     \
		typeof(P) val;                                                 \
		bpf_probe_read_kernel(&val, sizeof(val), &(P));                \
		val;                                                           \
	})

#define MINBLOCK_US	1

struct key_t {
	char waker[TASK_COMM_LEN];
	char target[TASK_COMM_LEN];
	u32 wret;
	u32 tret;
};

struct {
	__uint(type, BPF_MAP_TYPE_HASH);
	__type(key, struct key_t);
	__type(value, u64);
	__uint(max_entries, 10000);
} counts SEC(".maps");

struct {
	__uint(type, BPF_MAP_TYPE_HASH);
	__type(key, u32);
	__type(value, u64);
	__uint(max_entries, 10000);
} start SEC(".maps");

struct wokeby_t {
	char name[TASK_COMM_LEN];
	u32 ret;
};

struct {
	__uint(type, BPF_MAP_TYPE_HASH);
	__type(key, u32);
	__type(value, struct wokeby_t);
	__uint(max_entries, 10000);
} wokeby SEC(".maps");

struct {
	__uint(type, BPF_MAP_TYPE_STACK_TRACE);
	__uint(key_size, sizeof(u32));
	__uint(value_size, PERF_MAX_STACK_DEPTH * sizeof(u64));
	__uint(max_entries, 10000);
} stackmap SEC(".maps");

#define STACKID_FLAGS (0 | BPF_F_FAST_STACK_CMP)

SEC("kprobe/try_to_wake_up")
int waker(struct pt_regs *ctx)
{
	struct task_struct *p = (void *) PT_REGS_PARM1(ctx);
	struct wokeby_t woke;
	u32 pid;

	pid = _(p->pid);

	bpf_get_current_comm(&woke.name, sizeof(woke.name));
	woke.ret = bpf_get_stackid(ctx, &stackmap, STACKID_FLAGS);

	bpf_map_update_elem(&wokeby, &pid, &woke, BPF_ANY);
	return 0;
}

static inline int update_counts(void *ctx, u32 pid, u64 delta)
{
	struct wokeby_t *woke;
	u64 zero = 0, *val;
	struct key_t key;

	__builtin_memset(&key.waker, 0, sizeof(key.waker));
	bpf_get_current_comm(&key.target, sizeof(key.target));
	key.tret = bpf_get_stackid(ctx, &stackmap, STACKID_FLAGS);
	key.wret = 0;

	woke = bpf_map_lookup_elem(&wokeby, &pid);
	if (woke) {
		key.wret = woke->ret;
		__builtin_memcpy(&key.waker, woke->name, sizeof(key.waker));
		bpf_map_delete_elem(&wokeby, &pid);
	}

	val = bpf_map_lookup_elem(&counts, &key);
	if (!val) {
		bpf_map_update_elem(&counts, &key, &zero, BPF_NOEXIST);
		val = bpf_map_lookup_elem(&counts, &key);
		if (!val)
			return 0;
	}
	(*val) += delta;
	return 0;
}

#if 1
/* taken from /sys/kernel/debug/tracing/events/sched/sched_switch/format */
struct sched_switch_args {
	unsigned long long pad;
	char prev_comm[16];
	int prev_pid;
	int prev_prio;
	long long prev_state;
	char next_comm[16];
	int next_pid;
	int next_prio;
};
SEC("tracepoint/sched/sched_switch")
int oncpu(struct sched_switch_args *ctx)
{
	/* record previous thread sleep time */
	u32 pid = ctx->prev_pid;
#else
SEC("kprobe/finish_task_switch")
int oncpu(struct pt_regs *ctx)
{
	struct task_struct *p = (void *) PT_REGS_PARM1(ctx);
	/* record previous thread sleep time */
	u32 pid = _(p->pid);
#endif
	u64 delta, ts, *tsp;

	ts = bpf_ktime_get_ns();
	bpf_map_update_elem(&start, &pid, &ts, BPF_ANY);

	/* calculate current thread's delta time */
	pid = bpf_get_current_pid_tgid();
	tsp = bpf_map_lookup_elem(&start, &pid);
	if (!tsp)
		/* missed start or filtered */
		return 0;

	delta = bpf_ktime_get_ns() - *tsp;
	bpf_map_delete_elem(&start, &pid);
	delta = delta / 1000;
	if (delta < MINBLOCK_US)
		return 0;

	return update_counts(ctx, pid, delta);
}
char _license[] SEC("license") = "GPL";
u32 _version SEC("version") = LINUX_VERSION_CODE;