Blame view

kernel/bpf/stackmap.c 22.1 KB
25763b3c8   Thomas Gleixner   treewide: Replace...
1
  // SPDX-License-Identifier: GPL-2.0-only
d5a3b1f69   Alexei Starovoitov   bpf: introduce BP...
2
  /* Copyright (c) 2016 Facebook
d5a3b1f69   Alexei Starovoitov   bpf: introduce BP...
3
4
5
6
   */
  #include <linux/bpf.h>
  #include <linux/jhash.h>
  #include <linux/filter.h>
7b04d6d60   Song Liu   bpf: Separate bpf...
7
  #include <linux/kernel.h>
d5a3b1f69   Alexei Starovoitov   bpf: introduce BP...
8
9
  #include <linux/stacktrace.h>
  #include <linux/perf_event.h>
615755a77   Song Liu   bpf: extend stack...
10
11
  #include <linux/elf.h>
  #include <linux/pagemap.h>
bae77c5eb   Song Liu   bpf: enable stack...
12
  #include <linux/irq_work.h>
c9a0f3b85   Jiri Olsa   bpf: Resolve BTF ...
13
  #include <linux/btf_ids.h>
557c0c6e7   Alexei Starovoitov   bpf: convert stac...
14
  #include "percpu_freelist.h"
d5a3b1f69   Alexei Starovoitov   bpf: introduce BP...
15

615755a77   Song Liu   bpf: extend stack...
16
17
18
  #define STACK_CREATE_FLAG_MASK					\
  	(BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY |	\
  	 BPF_F_STACK_BUILD_ID)
6e71b04a8   Chenbo Feng   bpf: Add file mod...
19

d5a3b1f69   Alexei Starovoitov   bpf: introduce BP...
20
  struct stack_map_bucket {
557c0c6e7   Alexei Starovoitov   bpf: convert stac...
21
  	struct pcpu_freelist_node fnode;
d5a3b1f69   Alexei Starovoitov   bpf: introduce BP...
22
23
  	u32 hash;
  	u32 nr;
615755a77   Song Liu   bpf: extend stack...
24
  	u64 data[];
d5a3b1f69   Alexei Starovoitov   bpf: introduce BP...
25
26
27
28
  };
  
  struct bpf_stack_map {
  	struct bpf_map map;
557c0c6e7   Alexei Starovoitov   bpf: convert stac...
29
30
  	void *elems;
  	struct pcpu_freelist freelist;
d5a3b1f69   Alexei Starovoitov   bpf: introduce BP...
31
  	u32 n_buckets;
557c0c6e7   Alexei Starovoitov   bpf: convert stac...
32
  	struct stack_map_bucket *buckets[];
d5a3b1f69   Alexei Starovoitov   bpf: introduce BP...
33
  };
bae77c5eb   Song Liu   bpf: enable stack...
34
35
36
  /* irq_work to run up_read() for build_id lookup in nmi context */
  struct stack_map_irq_work {
  	struct irq_work irq_work;
0cc55a021   Michel Lespinasse   mmap locking API:...
37
  	struct mm_struct *mm;
bae77c5eb   Song Liu   bpf: enable stack...
38
39
40
41
42
  };
  
  static void do_up_read(struct irq_work *entry)
  {
  	struct stack_map_irq_work *work;
099bfaa73   David Miller   bpf/stackmap: Don...
43
44
  	if (WARN_ON_ONCE(IS_ENABLED(CONFIG_PREEMPT_RT)))
  		return;
bae77c5eb   Song Liu   bpf: enable stack...
45
  	work = container_of(entry, struct stack_map_irq_work, irq_work);
0cc55a021   Michel Lespinasse   mmap locking API:...
46
  	mmap_read_unlock_non_owner(work->mm);
bae77c5eb   Song Liu   bpf: enable stack...
47
48
49
  }
  
  static DEFINE_PER_CPU(struct stack_map_irq_work, up_read_work);
615755a77   Song Liu   bpf: extend stack...
50
51
52
53
54
55
56
57
58
59
  static inline bool stack_map_use_build_id(struct bpf_map *map)
  {
  	return (map->map_flags & BPF_F_STACK_BUILD_ID);
  }
  
  static inline int stack_map_data_size(struct bpf_map *map)
  {
  	return stack_map_use_build_id(map) ?
  		sizeof(struct bpf_stack_build_id) : sizeof(u64);
  }
557c0c6e7   Alexei Starovoitov   bpf: convert stac...
60
61
62
63
  static int prealloc_elems_and_freelist(struct bpf_stack_map *smap)
  {
  	u32 elem_size = sizeof(struct stack_map_bucket) + smap->map.value_size;
  	int err;
96eabe7a4   Martin KaFai Lau   bpf: Allow select...
64
65
  	smap->elems = bpf_map_area_alloc(elem_size * smap->map.max_entries,
  					 smap->map.numa_node);
557c0c6e7   Alexei Starovoitov   bpf: convert stac...
66
67
68
69
70
71
72
73
74
75
76
77
  	if (!smap->elems)
  		return -ENOMEM;
  
  	err = pcpu_freelist_init(&smap->freelist);
  	if (err)
  		goto free_elems;
  
  	pcpu_freelist_populate(&smap->freelist, smap->elems, elem_size,
  			       smap->map.max_entries);
  	return 0;
  
  free_elems:
d407bd25a   Daniel Borkmann   bpf: don't trigge...
78
  	bpf_map_area_free(smap->elems);
557c0c6e7   Alexei Starovoitov   bpf: convert stac...
79
80
  	return err;
  }
d5a3b1f69   Alexei Starovoitov   bpf: introduce BP...
81
82
83
84
85
  /* Called from syscall */
  static struct bpf_map *stack_map_alloc(union bpf_attr *attr)
  {
  	u32 value_size = attr->value_size;
  	struct bpf_stack_map *smap;
b936ca643   Roman Gushchin   bpf: rework memlo...
86
  	struct bpf_map_memory mem;
d5a3b1f69   Alexei Starovoitov   bpf: introduce BP...
87
88
  	u64 cost, n_buckets;
  	int err;
2c78ee898   Alexei Starovoitov   bpf: Implement CA...
89
  	if (!bpf_capable())
d5a3b1f69   Alexei Starovoitov   bpf: introduce BP...
90
  		return ERR_PTR(-EPERM);
6e71b04a8   Chenbo Feng   bpf: Add file mod...
91
  	if (attr->map_flags & ~STACK_CREATE_FLAG_MASK)
823707b68   Alexei Starovoitov   bpf: check for re...
92
  		return ERR_PTR(-EINVAL);
d5a3b1f69   Alexei Starovoitov   bpf: introduce BP...
93
94
  	/* check sanity of attributes */
  	if (attr->max_entries == 0 || attr->key_size != 4 ||
615755a77   Song Liu   bpf: extend stack...
95
96
97
98
99
100
101
102
103
104
  	    value_size < 8 || value_size % 8)
  		return ERR_PTR(-EINVAL);
  
  	BUILD_BUG_ON(sizeof(struct bpf_stack_build_id) % sizeof(u64));
  	if (attr->map_flags & BPF_F_STACK_BUILD_ID) {
  		if (value_size % sizeof(struct bpf_stack_build_id) ||
  		    value_size / sizeof(struct bpf_stack_build_id)
  		    > sysctl_perf_event_max_stack)
  			return ERR_PTR(-EINVAL);
  	} else if (value_size / 8 > sysctl_perf_event_max_stack)
d5a3b1f69   Alexei Starovoitov   bpf: introduce BP...
105
106
107
108
109
110
  		return ERR_PTR(-EINVAL);
  
  	/* hash table size must be power of 2 */
  	n_buckets = roundup_pow_of_two(attr->max_entries);
  
  	cost = n_buckets * sizeof(struct stack_map_bucket *) + sizeof(*smap);
b936ca643   Roman Gushchin   bpf: rework memlo...
111
  	cost += n_buckets * (value_size + sizeof(struct stack_map_bucket));
c85d69135   Roman Gushchin   bpf: move memory ...
112
  	err = bpf_map_charge_init(&mem, cost);
b936ca643   Roman Gushchin   bpf: rework memlo...
113
114
  	if (err)
  		return ERR_PTR(err);
d5a3b1f69   Alexei Starovoitov   bpf: introduce BP...
115

96eabe7a4   Martin KaFai Lau   bpf: Allow select...
116
  	smap = bpf_map_area_alloc(cost, bpf_map_attr_numa_node(attr));
b936ca643   Roman Gushchin   bpf: rework memlo...
117
118
  	if (!smap) {
  		bpf_map_charge_finish(&mem);
d407bd25a   Daniel Borkmann   bpf: don't trigge...
119
  		return ERR_PTR(-ENOMEM);
b936ca643   Roman Gushchin   bpf: rework memlo...
120
  	}
d5a3b1f69   Alexei Starovoitov   bpf: introduce BP...
121

bd475643d   Jakub Kicinski   bpf: add helper f...
122
  	bpf_map_init_from_attr(&smap->map, attr);
d5a3b1f69   Alexei Starovoitov   bpf: introduce BP...
123
  	smap->map.value_size = value_size;
d5a3b1f69   Alexei Starovoitov   bpf: introduce BP...
124
  	smap->n_buckets = n_buckets;
557c0c6e7   Alexei Starovoitov   bpf: convert stac...
125

97c79a38c   Arnaldo Carvalho de Melo   perf core: Per ev...
126
  	err = get_callchain_buffers(sysctl_perf_event_max_stack);
d5a3b1f69   Alexei Starovoitov   bpf: introduce BP...
127
  	if (err)
b936ca643   Roman Gushchin   bpf: rework memlo...
128
  		goto free_charge;
d5a3b1f69   Alexei Starovoitov   bpf: introduce BP...
129

557c0c6e7   Alexei Starovoitov   bpf: convert stac...
130
131
132
  	err = prealloc_elems_and_freelist(smap);
  	if (err)
  		goto put_buffers;
b936ca643   Roman Gushchin   bpf: rework memlo...
133
  	bpf_map_charge_move(&smap->map.memory, &mem);
d5a3b1f69   Alexei Starovoitov   bpf: introduce BP...
134
  	return &smap->map;
557c0c6e7   Alexei Starovoitov   bpf: convert stac...
135
136
  put_buffers:
  	put_callchain_buffers();
b936ca643   Roman Gushchin   bpf: rework memlo...
137
138
  free_charge:
  	bpf_map_charge_finish(&mem);
d407bd25a   Daniel Borkmann   bpf: don't trigge...
139
  	bpf_map_area_free(smap);
d5a3b1f69   Alexei Starovoitov   bpf: introduce BP...
140
141
  	return ERR_PTR(err);
  }
615755a77   Song Liu   bpf: extend stack...
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
  #define BPF_BUILD_ID 3
  /*
   * Parse build id from the note segment. This logic can be shared between
   * 32-bit and 64-bit system, because Elf32_Nhdr and Elf64_Nhdr are
   * identical.
   */
  static inline int stack_map_parse_build_id(void *page_addr,
  					   unsigned char *build_id,
  					   void *note_start,
  					   Elf32_Word note_size)
  {
  	Elf32_Word note_offs = 0, new_offs;
  
  	/* check for overflow */
  	if (note_start < page_addr || note_start + note_size < note_start)
  		return -EINVAL;
  
  	/* only supports note that fits in the first page */
  	if (note_start + note_size > page_addr + PAGE_SIZE)
  		return -EINVAL;
  
  	while (note_offs + sizeof(Elf32_Nhdr) < note_size) {
  		Elf32_Nhdr *nhdr = (Elf32_Nhdr *)(note_start + note_offs);
  
  		if (nhdr->n_type == BPF_BUILD_ID &&
  		    nhdr->n_namesz == sizeof("GNU") &&
0b698005a   Stanislav Fomichev   bpf: don't assume...
168
169
  		    nhdr->n_descsz > 0 &&
  		    nhdr->n_descsz <= BPF_BUILD_ID_SIZE) {
615755a77   Song Liu   bpf: extend stack...
170
171
172
  			memcpy(build_id,
  			       note_start + note_offs +
  			       ALIGN(sizeof("GNU"), 4) + sizeof(Elf32_Nhdr),
0b698005a   Stanislav Fomichev   bpf: don't assume...
173
174
175
  			       nhdr->n_descsz);
  			memset(build_id + nhdr->n_descsz, 0,
  			       BPF_BUILD_ID_SIZE - nhdr->n_descsz);
615755a77   Song Liu   bpf: extend stack...
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
  			return 0;
  		}
  		new_offs = note_offs + sizeof(Elf32_Nhdr) +
  			ALIGN(nhdr->n_namesz, 4) + ALIGN(nhdr->n_descsz, 4);
  		if (new_offs <= note_offs)  /* overflow */
  			break;
  		note_offs = new_offs;
  	}
  	return -EINVAL;
  }
  
  /* Parse build ID from 32-bit ELF */
  static int stack_map_get_build_id_32(void *page_addr,
  				     unsigned char *build_id)
  {
  	Elf32_Ehdr *ehdr = (Elf32_Ehdr *)page_addr;
  	Elf32_Phdr *phdr;
  	int i;
  
  	/* only supports phdr that fits in one page */
  	if (ehdr->e_phnum >
  	    (PAGE_SIZE - sizeof(Elf32_Ehdr)) / sizeof(Elf32_Phdr))
  		return -EINVAL;
  
  	phdr = (Elf32_Phdr *)(page_addr + sizeof(Elf32_Ehdr));
b33164f2b   Jiri Olsa   bpf: Iterate thro...
201
202
203
204
205
206
207
  	for (i = 0; i < ehdr->e_phnum; ++i) {
  		if (phdr[i].p_type == PT_NOTE &&
  		    !stack_map_parse_build_id(page_addr, build_id,
  					      page_addr + phdr[i].p_offset,
  					      phdr[i].p_filesz))
  			return 0;
  	}
615755a77   Song Liu   bpf: extend stack...
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
  	return -EINVAL;
  }
  
  /* Parse build ID from 64-bit ELF */
  static int stack_map_get_build_id_64(void *page_addr,
  				     unsigned char *build_id)
  {
  	Elf64_Ehdr *ehdr = (Elf64_Ehdr *)page_addr;
  	Elf64_Phdr *phdr;
  	int i;
  
  	/* only supports phdr that fits in one page */
  	if (ehdr->e_phnum >
  	    (PAGE_SIZE - sizeof(Elf64_Ehdr)) / sizeof(Elf64_Phdr))
  		return -EINVAL;
  
  	phdr = (Elf64_Phdr *)(page_addr + sizeof(Elf64_Ehdr));
b33164f2b   Jiri Olsa   bpf: Iterate thro...
225
226
227
228
229
230
231
  	for (i = 0; i < ehdr->e_phnum; ++i) {
  		if (phdr[i].p_type == PT_NOTE &&
  		    !stack_map_parse_build_id(page_addr, build_id,
  					      page_addr + phdr[i].p_offset,
  					      phdr[i].p_filesz))
  			return 0;
  	}
615755a77   Song Liu   bpf: extend stack...
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
  	return -EINVAL;
  }
  
  /* Parse build ID of ELF file mapped to vma */
  static int stack_map_get_build_id(struct vm_area_struct *vma,
  				  unsigned char *build_id)
  {
  	Elf32_Ehdr *ehdr;
  	struct page *page;
  	void *page_addr;
  	int ret;
  
  	/* only works for page backed storage  */
  	if (!vma->vm_file)
  		return -EINVAL;
  
  	page = find_get_page(vma->vm_file->f_mapping, 0);
  	if (!page)
  		return -EFAULT;	/* page not mapped */
  
  	ret = -EINVAL;
beaf3d190   Song Liu   bpf: fix panic in...
253
  	page_addr = kmap_atomic(page);
615755a77   Song Liu   bpf: extend stack...
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
  	ehdr = (Elf32_Ehdr *)page_addr;
  
  	/* compare magic x7f "ELF" */
  	if (memcmp(ehdr->e_ident, ELFMAG, SELFMAG) != 0)
  		goto out;
  
  	/* only support executable file and shared object file */
  	if (ehdr->e_type != ET_EXEC && ehdr->e_type != ET_DYN)
  		goto out;
  
  	if (ehdr->e_ident[EI_CLASS] == ELFCLASS32)
  		ret = stack_map_get_build_id_32(page_addr, build_id);
  	else if (ehdr->e_ident[EI_CLASS] == ELFCLASS64)
  		ret = stack_map_get_build_id_64(page_addr, build_id);
  out:
beaf3d190   Song Liu   bpf: fix panic in...
269
  	kunmap_atomic(page_addr);
615755a77   Song Liu   bpf: extend stack...
270
271
272
  	put_page(page);
  	return ret;
  }
5f4126327   Yonghong Song   bpf: change proto...
273
  static void stack_map_get_build_id_offset(struct bpf_stack_build_id *id_offs,
615755a77   Song Liu   bpf: extend stack...
274
275
276
277
  					  u64 *ips, u32 trace_nr, bool user)
  {
  	int i;
  	struct vm_area_struct *vma;
bae77c5eb   Song Liu   bpf: enable stack...
278
  	bool irq_work_busy = false;
dc3b8ae9d   Arnd Bergmann   bpf: avoid -Wmayb...
279
  	struct stack_map_irq_work *work = NULL;
bae77c5eb   Song Liu   bpf: enable stack...
280

eac9153f2   Song Liu   bpf/stackmap: Fix...
281
  	if (irqs_disabled()) {
099bfaa73   David Miller   bpf/stackmap: Don...
282
283
284
285
286
287
288
289
290
291
292
  		if (!IS_ENABLED(CONFIG_PREEMPT_RT)) {
  			work = this_cpu_ptr(&up_read_work);
  			if (atomic_read(&work->irq_work.flags) & IRQ_WORK_BUSY) {
  				/* cannot queue more up_read, fallback */
  				irq_work_busy = true;
  			}
  		} else {
  			/*
  			 * PREEMPT_RT does not allow to trylock mmap sem in
  			 * interrupt disabled context. Force the fallback code.
  			 */
bae77c5eb   Song Liu   bpf: enable stack...
293
  			irq_work_busy = true;
099bfaa73   David Miller   bpf/stackmap: Don...
294
  		}
bae77c5eb   Song Liu   bpf: enable stack...
295
  	}
615755a77   Song Liu   bpf: extend stack...
296
297
  
  	/*
eac9153f2   Song Liu   bpf/stackmap: Fix...
298
299
300
  	 * We cannot do up_read() when the irq is disabled, because of
  	 * risk to deadlock with rq_lock. To do build_id lookup when the
  	 * irqs are disabled, we need to run up_read() in irq_work. We use
bae77c5eb   Song Liu   bpf: enable stack...
301
302
  	 * a percpu variable to do the irq_work. If the irq_work is
  	 * already used by another lookup, we fall back to report ips.
615755a77   Song Liu   bpf: extend stack...
303
304
305
306
  	 *
  	 * Same fallback is used for kernel stack (!user) on a stackmap
  	 * with build_id.
  	 */
bae77c5eb   Song Liu   bpf: enable stack...
307
  	if (!user || !current || !current->mm || irq_work_busy ||
0cc55a021   Michel Lespinasse   mmap locking API:...
308
  	    !mmap_read_trylock_non_owner(current->mm)) {
615755a77   Song Liu   bpf: extend stack...
309
310
311
312
  		/* cannot access current->mm, fall back to ips */
  		for (i = 0; i < trace_nr; i++) {
  			id_offs[i].status = BPF_STACK_BUILD_ID_IP;
  			id_offs[i].ip = ips[i];
4af396ae4   Stanislav Fomichev   bpf: zero out bui...
313
  			memset(id_offs[i].build_id, 0, BPF_BUILD_ID_SIZE);
615755a77   Song Liu   bpf: extend stack...
314
315
316
317
318
319
320
321
322
323
  		}
  		return;
  	}
  
  	for (i = 0; i < trace_nr; i++) {
  		vma = find_vma(current->mm, ips[i]);
  		if (!vma || stack_map_get_build_id(vma, id_offs[i].build_id)) {
  			/* per entry fall back to ips */
  			id_offs[i].status = BPF_STACK_BUILD_ID_IP;
  			id_offs[i].ip = ips[i];
4af396ae4   Stanislav Fomichev   bpf: zero out bui...
324
  			memset(id_offs[i].build_id, 0, BPF_BUILD_ID_SIZE);
615755a77   Song Liu   bpf: extend stack...
325
326
327
328
329
330
  			continue;
  		}
  		id_offs[i].offset = (vma->vm_pgoff << PAGE_SHIFT) + ips[i]
  			- vma->vm_start;
  		id_offs[i].status = BPF_STACK_BUILD_ID_VALID;
  	}
bae77c5eb   Song Liu   bpf: enable stack...
331

dc3b8ae9d   Arnd Bergmann   bpf: avoid -Wmayb...
332
  	if (!work) {
0cc55a021   Michel Lespinasse   mmap locking API:...
333
  		mmap_read_unlock_non_owner(current->mm);
bae77c5eb   Song Liu   bpf: enable stack...
334
  	} else {
0cc55a021   Michel Lespinasse   mmap locking API:...
335
  		work->mm = current->mm;
bae77c5eb   Song Liu   bpf: enable stack...
336
337
  		irq_work_queue(&work->irq_work);
  	}
615755a77   Song Liu   bpf: extend stack...
338
  }
fa28dcb82   Song Liu   bpf: Introduce he...
339
340
341
  static struct perf_callchain_entry *
  get_callchain_entry_for_task(struct task_struct *task, u32 init_nr)
  {
046cc3dd9   Song Liu   bpf: Fix build wi...
342
  #ifdef CONFIG_STACKTRACE
fa28dcb82   Song Liu   bpf: Introduce he...
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
  	struct perf_callchain_entry *entry;
  	int rctx;
  
  	entry = get_callchain_entry(&rctx);
  
  	if (!entry)
  		return NULL;
  
  	entry->nr = init_nr +
  		stack_trace_save_tsk(task, (unsigned long *)(entry->ip + init_nr),
  				     sysctl_perf_event_max_stack - init_nr, 0);
  
  	/* stack_trace_save_tsk() works on unsigned long array, while
  	 * perf_callchain_entry uses u64 array. For 32-bit systems, it is
  	 * necessary to fix this mismatch.
  	 */
  	if (__BITS_PER_LONG != 64) {
  		unsigned long *from = (unsigned long *) entry->ip;
  		u64 *to = entry->ip;
  		int i;
  
  		/* copy data from the end to avoid using extra buffer */
  		for (i = entry->nr - 1; i >= (int)init_nr; i--)
  			to[i] = (u64)(from[i]);
  	}
  
  	put_callchain_entry(rctx);
  
  	return entry;
046cc3dd9   Song Liu   bpf: Fix build wi...
372
373
374
  #else /* CONFIG_STACKTRACE */
  	return NULL;
  #endif
fa28dcb82   Song Liu   bpf: Introduce he...
375
  }
7b04d6d60   Song Liu   bpf: Separate bpf...
376
377
  static long __bpf_get_stackid(struct bpf_map *map,
  			      struct perf_callchain_entry *trace, u64 flags)
d5a3b1f69   Alexei Starovoitov   bpf: introduce BP...
378
  {
d5a3b1f69   Alexei Starovoitov   bpf: introduce BP...
379
  	struct bpf_stack_map *smap = container_of(map, struct bpf_stack_map, map);
d5a3b1f69   Alexei Starovoitov   bpf: introduce BP...
380
  	struct stack_map_bucket *bucket, *new_bucket, *old_bucket;
615755a77   Song Liu   bpf: extend stack...
381
  	u32 max_depth = map->value_size / stack_map_data_size(map);
c5dfd78eb   Arnaldo Carvalho de Melo   perf core: Allow ...
382
383
  	/* stack_map_alloc() checks that max_depth <= sysctl_perf_event_max_stack */
  	u32 init_nr = sysctl_perf_event_max_stack - max_depth;
d5a3b1f69   Alexei Starovoitov   bpf: introduce BP...
384
385
386
  	u32 skip = flags & BPF_F_SKIP_FIELD_MASK;
  	u32 hash, id, trace_nr, trace_len;
  	bool user = flags & BPF_F_USER_STACK;
d5a3b1f69   Alexei Starovoitov   bpf: introduce BP...
387
  	u64 *ips;
615755a77   Song Liu   bpf: extend stack...
388
  	bool hash_matches;
d5a3b1f69   Alexei Starovoitov   bpf: introduce BP...
389

d5a3b1f69   Alexei Starovoitov   bpf: introduce BP...
390
  	/* get_perf_callchain() guarantees that trace->nr >= init_nr
c5dfd78eb   Arnaldo Carvalho de Melo   perf core: Allow ...
391
  	 * and trace-nr <= sysctl_perf_event_max_stack, so trace_nr <= max_depth
d5a3b1f69   Alexei Starovoitov   bpf: introduce BP...
392
393
394
395
396
397
398
399
400
401
402
403
  	 */
  	trace_nr = trace->nr - init_nr;
  
  	if (trace_nr <= skip)
  		/* skipping more than usable stack trace */
  		return -EFAULT;
  
  	trace_nr -= skip;
  	trace_len = trace_nr * sizeof(u64);
  	ips = trace->ip + skip + init_nr;
  	hash = jhash2((u32 *)ips, trace_len / sizeof(u32), 0);
  	id = hash & (smap->n_buckets - 1);
557c0c6e7   Alexei Starovoitov   bpf: convert stac...
404
  	bucket = READ_ONCE(smap->buckets[id]);
d5a3b1f69   Alexei Starovoitov   bpf: introduce BP...
405

615755a77   Song Liu   bpf: extend stack...
406
407
408
409
410
411
412
413
414
415
416
  	hash_matches = bucket && bucket->hash == hash;
  	/* fast cmp */
  	if (hash_matches && flags & BPF_F_FAST_STACK_CMP)
  		return id;
  
  	if (stack_map_use_build_id(map)) {
  		/* for build_id+offset, pop a bucket before slow cmp */
  		new_bucket = (struct stack_map_bucket *)
  			pcpu_freelist_pop(&smap->freelist);
  		if (unlikely(!new_bucket))
  			return -ENOMEM;
5f4126327   Yonghong Song   bpf: change proto...
417
418
419
420
  		new_bucket->nr = trace_nr;
  		stack_map_get_build_id_offset(
  			(struct bpf_stack_build_id *)new_bucket->data,
  			ips, trace_nr, user);
615755a77   Song Liu   bpf: extend stack...
421
422
423
424
  		trace_len = trace_nr * sizeof(struct bpf_stack_build_id);
  		if (hash_matches && bucket->nr == trace_nr &&
  		    memcmp(bucket->data, new_bucket->data, trace_len) == 0) {
  			pcpu_freelist_push(&smap->freelist, &new_bucket->fnode);
d5a3b1f69   Alexei Starovoitov   bpf: introduce BP...
425
  			return id;
615755a77   Song Liu   bpf: extend stack...
426
427
428
429
430
431
432
433
  		}
  		if (bucket && !(flags & BPF_F_REUSE_STACKID)) {
  			pcpu_freelist_push(&smap->freelist, &new_bucket->fnode);
  			return -EEXIST;
  		}
  	} else {
  		if (hash_matches && bucket->nr == trace_nr &&
  		    memcmp(bucket->data, ips, trace_len) == 0)
d5a3b1f69   Alexei Starovoitov   bpf: introduce BP...
434
  			return id;
615755a77   Song Liu   bpf: extend stack...
435
436
437
438
439
440
441
442
  		if (bucket && !(flags & BPF_F_REUSE_STACKID))
  			return -EEXIST;
  
  		new_bucket = (struct stack_map_bucket *)
  			pcpu_freelist_pop(&smap->freelist);
  		if (unlikely(!new_bucket))
  			return -ENOMEM;
  		memcpy(new_bucket->data, ips, trace_len);
d5a3b1f69   Alexei Starovoitov   bpf: introduce BP...
443
  	}
d5a3b1f69   Alexei Starovoitov   bpf: introduce BP...
444
445
446
447
448
  	new_bucket->hash = hash;
  	new_bucket->nr = trace_nr;
  
  	old_bucket = xchg(&smap->buckets[id], new_bucket);
  	if (old_bucket)
557c0c6e7   Alexei Starovoitov   bpf: convert stac...
449
  		pcpu_freelist_push(&smap->freelist, &old_bucket->fnode);
d5a3b1f69   Alexei Starovoitov   bpf: introduce BP...
450
451
  	return id;
  }
7b04d6d60   Song Liu   bpf: Separate bpf...
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
  BPF_CALL_3(bpf_get_stackid, struct pt_regs *, regs, struct bpf_map *, map,
  	   u64, flags)
  {
  	u32 max_depth = map->value_size / stack_map_data_size(map);
  	/* stack_map_alloc() checks that max_depth <= sysctl_perf_event_max_stack */
  	u32 init_nr = sysctl_perf_event_max_stack - max_depth;
  	bool user = flags & BPF_F_USER_STACK;
  	struct perf_callchain_entry *trace;
  	bool kernel = !user;
  
  	if (unlikely(flags & ~(BPF_F_SKIP_FIELD_MASK | BPF_F_USER_STACK |
  			       BPF_F_FAST_STACK_CMP | BPF_F_REUSE_STACKID)))
  		return -EINVAL;
  
  	trace = get_perf_callchain(regs, init_nr, kernel, user,
  				   sysctl_perf_event_max_stack, false, false);
  
  	if (unlikely(!trace))
  		/* couldn't fetch the stack trace */
  		return -EFAULT;
  
  	return __bpf_get_stackid(map, trace, flags);
  }
d5a3b1f69   Alexei Starovoitov   bpf: introduce BP...
475
476
477
478
479
480
481
482
  const struct bpf_func_proto bpf_get_stackid_proto = {
  	.func		= bpf_get_stackid,
  	.gpl_only	= true,
  	.ret_type	= RET_INTEGER,
  	.arg1_type	= ARG_PTR_TO_CTX,
  	.arg2_type	= ARG_CONST_MAP_PTR,
  	.arg3_type	= ARG_ANYTHING,
  };
7b04d6d60   Song Liu   bpf: Separate bpf...
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
  static __u64 count_kernel_ip(struct perf_callchain_entry *trace)
  {
  	__u64 nr_kernel = 0;
  
  	while (nr_kernel < trace->nr) {
  		if (trace->ip[nr_kernel] == PERF_CONTEXT_USER)
  			break;
  		nr_kernel++;
  	}
  	return nr_kernel;
  }
  
  BPF_CALL_3(bpf_get_stackid_pe, struct bpf_perf_event_data_kern *, ctx,
  	   struct bpf_map *, map, u64, flags)
  {
  	struct perf_event *event = ctx->event;
  	struct perf_callchain_entry *trace;
  	bool kernel, user;
  	__u64 nr_kernel;
  	int ret;
  
  	/* perf_sample_data doesn't have callchain, use bpf_get_stackid */
  	if (!(event->attr.sample_type & __PERF_SAMPLE_CALLCHAIN_EARLY))
  		return bpf_get_stackid((unsigned long)(ctx->regs),
  				       (unsigned long) map, flags, 0, 0);
  
  	if (unlikely(flags & ~(BPF_F_SKIP_FIELD_MASK | BPF_F_USER_STACK |
  			       BPF_F_FAST_STACK_CMP | BPF_F_REUSE_STACKID)))
  		return -EINVAL;
  
  	user = flags & BPF_F_USER_STACK;
  	kernel = !user;
  
  	trace = ctx->data->callchain;
  	if (unlikely(!trace))
  		return -EFAULT;
  
  	nr_kernel = count_kernel_ip(trace);
  
  	if (kernel) {
  		__u64 nr = trace->nr;
  
  		trace->nr = nr_kernel;
  		ret = __bpf_get_stackid(map, trace, flags);
  
  		/* restore nr */
  		trace->nr = nr;
  	} else { /* user */
  		u64 skip = flags & BPF_F_SKIP_FIELD_MASK;
  
  		skip += nr_kernel;
  		if (skip > BPF_F_SKIP_FIELD_MASK)
  			return -EFAULT;
  
  		flags = (flags & ~BPF_F_SKIP_FIELD_MASK) | skip;
  		ret = __bpf_get_stackid(map, trace, flags);
  	}
  	return ret;
  }
  
  const struct bpf_func_proto bpf_get_stackid_proto_pe = {
  	.func		= bpf_get_stackid_pe,
  	.gpl_only	= false,
  	.ret_type	= RET_INTEGER,
  	.arg1_type	= ARG_PTR_TO_CTX,
  	.arg2_type	= ARG_CONST_MAP_PTR,
  	.arg3_type	= ARG_ANYTHING,
  };
fa28dcb82   Song Liu   bpf: Introduce he...
551
  static long __bpf_get_stack(struct pt_regs *regs, struct task_struct *task,
7b04d6d60   Song Liu   bpf: Separate bpf...
552
  			    struct perf_callchain_entry *trace_in,
fa28dcb82   Song Liu   bpf: Introduce he...
553
  			    void *buf, u32 size, u64 flags)
c195651e5   Yonghong Song   bpf: add bpf_get_...
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
  {
  	u32 init_nr, trace_nr, copy_len, elem_size, num_elem;
  	bool user_build_id = flags & BPF_F_USER_BUILD_ID;
  	u32 skip = flags & BPF_F_SKIP_FIELD_MASK;
  	bool user = flags & BPF_F_USER_STACK;
  	struct perf_callchain_entry *trace;
  	bool kernel = !user;
  	int err = -EINVAL;
  	u64 *ips;
  
  	if (unlikely(flags & ~(BPF_F_SKIP_FIELD_MASK | BPF_F_USER_STACK |
  			       BPF_F_USER_BUILD_ID)))
  		goto clear;
  	if (kernel && user_build_id)
  		goto clear;
  
  	elem_size = (user && user_build_id) ? sizeof(struct bpf_stack_build_id)
  					    : sizeof(u64);
  	if (unlikely(size % elem_size))
  		goto clear;
fa28dcb82   Song Liu   bpf: Introduce he...
574
575
576
  	/* cannot get valid user stack for task without user_mode regs */
  	if (task && user && !user_mode(regs))
  		goto err_fault;
c195651e5   Yonghong Song   bpf: add bpf_get_...
577
578
579
580
581
  	num_elem = size / elem_size;
  	if (sysctl_perf_event_max_stack < num_elem)
  		init_nr = 0;
  	else
  		init_nr = sysctl_perf_event_max_stack - num_elem;
fa28dcb82   Song Liu   bpf: Introduce he...
582

7b04d6d60   Song Liu   bpf: Separate bpf...
583
584
585
  	if (trace_in)
  		trace = trace_in;
  	else if (kernel && task)
fa28dcb82   Song Liu   bpf: Introduce he...
586
587
588
589
590
  		trace = get_callchain_entry_for_task(task, init_nr);
  	else
  		trace = get_perf_callchain(regs, init_nr, kernel, user,
  					   sysctl_perf_event_max_stack,
  					   false, false);
c195651e5   Yonghong Song   bpf: add bpf_get_...
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
  	if (unlikely(!trace))
  		goto err_fault;
  
  	trace_nr = trace->nr - init_nr;
  	if (trace_nr < skip)
  		goto err_fault;
  
  	trace_nr -= skip;
  	trace_nr = (trace_nr <= num_elem) ? trace_nr : num_elem;
  	copy_len = trace_nr * elem_size;
  	ips = trace->ip + skip + init_nr;
  	if (user && user_build_id)
  		stack_map_get_build_id_offset(buf, ips, trace_nr, user);
  	else
  		memcpy(buf, ips, copy_len);
  
  	if (size > copy_len)
  		memset(buf + copy_len, 0, size - copy_len);
  	return copy_len;
  
  err_fault:
  	err = -EFAULT;
  clear:
  	memset(buf, 0, size);
  	return err;
  }
fa28dcb82   Song Liu   bpf: Introduce he...
617
618
619
  BPF_CALL_4(bpf_get_stack, struct pt_regs *, regs, void *, buf, u32, size,
  	   u64, flags)
  {
7b04d6d60   Song Liu   bpf: Separate bpf...
620
  	return __bpf_get_stack(regs, NULL, NULL, buf, size, flags);
fa28dcb82   Song Liu   bpf: Introduce he...
621
  }
c195651e5   Yonghong Song   bpf: add bpf_get_...
622
623
624
625
626
627
628
629
630
  const struct bpf_func_proto bpf_get_stack_proto = {
  	.func		= bpf_get_stack,
  	.gpl_only	= true,
  	.ret_type	= RET_INTEGER,
  	.arg1_type	= ARG_PTR_TO_CTX,
  	.arg2_type	= ARG_PTR_TO_UNINIT_MEM,
  	.arg3_type	= ARG_CONST_SIZE_OR_ZERO,
  	.arg4_type	= ARG_ANYTHING,
  };
fa28dcb82   Song Liu   bpf: Introduce he...
631
632
633
634
  BPF_CALL_4(bpf_get_task_stack, struct task_struct *, task, void *, buf,
  	   u32, size, u64, flags)
  {
  	struct pt_regs *regs = task_pt_regs(task);
7b04d6d60   Song Liu   bpf: Separate bpf...
635
  	return __bpf_get_stack(regs, task, NULL, buf, size, flags);
fa28dcb82   Song Liu   bpf: Introduce he...
636
  }
9436ef6e8   Lorenz Bauer   bpf: Allow specif...
637
  BTF_ID_LIST_SINGLE(bpf_get_task_stack_btf_ids, struct, task_struct)
c9a0f3b85   Jiri Olsa   bpf: Resolve BTF ...
638

fa28dcb82   Song Liu   bpf: Introduce he...
639
640
641
642
643
  const struct bpf_func_proto bpf_get_task_stack_proto = {
  	.func		= bpf_get_task_stack,
  	.gpl_only	= false,
  	.ret_type	= RET_INTEGER,
  	.arg1_type	= ARG_PTR_TO_BTF_ID,
9436ef6e8   Lorenz Bauer   bpf: Allow specif...
644
  	.arg1_btf_id	= &bpf_get_task_stack_btf_ids[0],
fa28dcb82   Song Liu   bpf: Introduce he...
645
646
647
  	.arg2_type	= ARG_PTR_TO_UNINIT_MEM,
  	.arg3_type	= ARG_CONST_SIZE_OR_ZERO,
  	.arg4_type	= ARG_ANYTHING,
fa28dcb82   Song Liu   bpf: Introduce he...
648
  };
7b04d6d60   Song Liu   bpf: Separate bpf...
649
650
651
  BPF_CALL_4(bpf_get_stack_pe, struct bpf_perf_event_data_kern *, ctx,
  	   void *, buf, u32, size, u64, flags)
  {
2b9b305fc   Song Liu   bpf: Fix build on...
652
  	struct pt_regs *regs = (struct pt_regs *)(ctx->regs);
7b04d6d60   Song Liu   bpf: Separate bpf...
653
654
655
656
657
658
659
  	struct perf_event *event = ctx->event;
  	struct perf_callchain_entry *trace;
  	bool kernel, user;
  	int err = -EINVAL;
  	__u64 nr_kernel;
  
  	if (!(event->attr.sample_type & __PERF_SAMPLE_CALLCHAIN_EARLY))
2b9b305fc   Song Liu   bpf: Fix build on...
660
  		return __bpf_get_stack(regs, NULL, NULL, buf, size, flags);
7b04d6d60   Song Liu   bpf: Separate bpf...
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
  
  	if (unlikely(flags & ~(BPF_F_SKIP_FIELD_MASK | BPF_F_USER_STACK |
  			       BPF_F_USER_BUILD_ID)))
  		goto clear;
  
  	user = flags & BPF_F_USER_STACK;
  	kernel = !user;
  
  	err = -EFAULT;
  	trace = ctx->data->callchain;
  	if (unlikely(!trace))
  		goto clear;
  
  	nr_kernel = count_kernel_ip(trace);
  
  	if (kernel) {
  		__u64 nr = trace->nr;
  
  		trace->nr = nr_kernel;
2b9b305fc   Song Liu   bpf: Fix build on...
680
  		err = __bpf_get_stack(regs, NULL, trace, buf, size, flags);
7b04d6d60   Song Liu   bpf: Separate bpf...
681
682
683
684
685
686
687
688
689
690
691
  
  		/* restore nr */
  		trace->nr = nr;
  	} else { /* user */
  		u64 skip = flags & BPF_F_SKIP_FIELD_MASK;
  
  		skip += nr_kernel;
  		if (skip > BPF_F_SKIP_FIELD_MASK)
  			goto clear;
  
  		flags = (flags & ~BPF_F_SKIP_FIELD_MASK) | skip;
2b9b305fc   Song Liu   bpf: Fix build on...
692
  		err = __bpf_get_stack(regs, NULL, trace, buf, size, flags);
7b04d6d60   Song Liu   bpf: Separate bpf...
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
  	}
  	return err;
  
  clear:
  	memset(buf, 0, size);
  	return err;
  
  }
  
  const struct bpf_func_proto bpf_get_stack_proto_pe = {
  	.func		= bpf_get_stack_pe,
  	.gpl_only	= true,
  	.ret_type	= RET_INTEGER,
  	.arg1_type	= ARG_PTR_TO_CTX,
  	.arg2_type	= ARG_PTR_TO_UNINIT_MEM,
  	.arg3_type	= ARG_CONST_SIZE_OR_ZERO,
  	.arg4_type	= ARG_ANYTHING,
  };
557c0c6e7   Alexei Starovoitov   bpf: convert stac...
711
  /* Called from eBPF program */
d5a3b1f69   Alexei Starovoitov   bpf: introduce BP...
712
713
  static void *stack_map_lookup_elem(struct bpf_map *map, void *key)
  {
3b4a63f67   Prashant Bhole   bpf: return EOPNO...
714
  	return ERR_PTR(-EOPNOTSUPP);
557c0c6e7   Alexei Starovoitov   bpf: convert stac...
715
716
717
718
719
  }
  
  /* Called from syscall */
  int bpf_stackmap_copy(struct bpf_map *map, void *key, void *value)
  {
d5a3b1f69   Alexei Starovoitov   bpf: introduce BP...
720
  	struct bpf_stack_map *smap = container_of(map, struct bpf_stack_map, map);
557c0c6e7   Alexei Starovoitov   bpf: convert stac...
721
722
  	struct stack_map_bucket *bucket, *old_bucket;
  	u32 id = *(u32 *)key, trace_len;
d5a3b1f69   Alexei Starovoitov   bpf: introduce BP...
723
724
  
  	if (unlikely(id >= smap->n_buckets))
557c0c6e7   Alexei Starovoitov   bpf: convert stac...
725
726
727
728
729
  		return -ENOENT;
  
  	bucket = xchg(&smap->buckets[id], NULL);
  	if (!bucket)
  		return -ENOENT;
615755a77   Song Liu   bpf: extend stack...
730
731
  	trace_len = bucket->nr * stack_map_data_size(map);
  	memcpy(value, bucket->data, trace_len);
557c0c6e7   Alexei Starovoitov   bpf: convert stac...
732
733
734
735
736
737
  	memset(value + trace_len, 0, map->value_size - trace_len);
  
  	old_bucket = xchg(&smap->buckets[id], bucket);
  	if (old_bucket)
  		pcpu_freelist_push(&smap->freelist, &old_bucket->fnode);
  	return 0;
d5a3b1f69   Alexei Starovoitov   bpf: introduce BP...
738
  }
16f07c551   Yonghong Song   bpf: implement sy...
739
740
  static int stack_map_get_next_key(struct bpf_map *map, void *key,
  				  void *next_key)
d5a3b1f69   Alexei Starovoitov   bpf: introduce BP...
741
  {
16f07c551   Yonghong Song   bpf: implement sy...
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
  	struct bpf_stack_map *smap = container_of(map,
  						  struct bpf_stack_map, map);
  	u32 id;
  
  	WARN_ON_ONCE(!rcu_read_lock_held());
  
  	if (!key) {
  		id = 0;
  	} else {
  		id = *(u32 *)key;
  		if (id >= smap->n_buckets || !smap->buckets[id])
  			id = 0;
  		else
  			id++;
  	}
  
  	while (id < smap->n_buckets && !smap->buckets[id])
  		id++;
  
  	if (id >= smap->n_buckets)
  		return -ENOENT;
  
  	*(u32 *)next_key = id;
  	return 0;
d5a3b1f69   Alexei Starovoitov   bpf: introduce BP...
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
  }
  
  static int stack_map_update_elem(struct bpf_map *map, void *key, void *value,
  				 u64 map_flags)
  {
  	return -EINVAL;
  }
  
  /* Called from syscall or from eBPF program */
  static int stack_map_delete_elem(struct bpf_map *map, void *key)
  {
  	struct bpf_stack_map *smap = container_of(map, struct bpf_stack_map, map);
  	struct stack_map_bucket *old_bucket;
  	u32 id = *(u32 *)key;
  
  	if (unlikely(id >= smap->n_buckets))
  		return -E2BIG;
  
  	old_bucket = xchg(&smap->buckets[id], NULL);
  	if (old_bucket) {
557c0c6e7   Alexei Starovoitov   bpf: convert stac...
786
  		pcpu_freelist_push(&smap->freelist, &old_bucket->fnode);
d5a3b1f69   Alexei Starovoitov   bpf: introduce BP...
787
788
789
790
791
792
793
794
795
796
  		return 0;
  	} else {
  		return -ENOENT;
  	}
  }
  
  /* Called when map->refcnt goes to zero, either from workqueue or from syscall */
  static void stack_map_free(struct bpf_map *map)
  {
  	struct bpf_stack_map *smap = container_of(map, struct bpf_stack_map, map);
d5a3b1f69   Alexei Starovoitov   bpf: introduce BP...
797

d407bd25a   Daniel Borkmann   bpf: don't trigge...
798
  	bpf_map_area_free(smap->elems);
557c0c6e7   Alexei Starovoitov   bpf: convert stac...
799
  	pcpu_freelist_destroy(&smap->freelist);
d407bd25a   Daniel Borkmann   bpf: don't trigge...
800
  	bpf_map_area_free(smap);
d5a3b1f69   Alexei Starovoitov   bpf: introduce BP...
801
802
  	put_callchain_buffers();
  }
2872e9ac3   Andrey Ignatov   bpf: Set map_btf_...
803
  static int stack_trace_map_btf_id;
144991602   Mauricio Vasquez B   bpf: rename stack...
804
  const struct bpf_map_ops stack_trace_map_ops = {
f4d052592   Martin KaFai Lau   bpf: Add map_meta...
805
  	.map_meta_equal = bpf_map_meta_equal,
d5a3b1f69   Alexei Starovoitov   bpf: introduce BP...
806
807
808
809
810
811
  	.map_alloc = stack_map_alloc,
  	.map_free = stack_map_free,
  	.map_get_next_key = stack_map_get_next_key,
  	.map_lookup_elem = stack_map_lookup_elem,
  	.map_update_elem = stack_map_update_elem,
  	.map_delete_elem = stack_map_delete_elem,
e8d2bec04   Daniel Borkmann   bpf: decouple btf...
812
  	.map_check_btf = map_check_no_btf,
2872e9ac3   Andrey Ignatov   bpf: Set map_btf_...
813
814
  	.map_btf_name = "bpf_stack_map",
  	.map_btf_id = &stack_trace_map_btf_id,
d5a3b1f69   Alexei Starovoitov   bpf: introduce BP...
815
  };
bae77c5eb   Song Liu   bpf: enable stack...
816
817
818
819
820
821
822
823
824
825
826
827
828
  
  static int __init stack_map_init(void)
  {
  	int cpu;
  	struct stack_map_irq_work *work;
  
  	for_each_possible_cpu(cpu) {
  		work = per_cpu_ptr(&up_read_work, cpu);
  		init_irq_work(&work->irq_work, do_up_read);
  	}
  	return 0;
  }
  subsys_initcall(stack_map_init);