Blame view

kernel/bpf/hashtab.c 55 KB
5b497af42   Thomas Gleixner   treewide: Replace...
1
  // SPDX-License-Identifier: GPL-2.0-only
0f8e4bd8a   Alexei Starovoitov   bpf: add hashtabl...
2
  /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
6c9059817   Alexei Starovoitov   bpf: pre-allocate...
3
   * Copyright (c) 2016 Facebook
0f8e4bd8a   Alexei Starovoitov   bpf: add hashtabl...
4
5
   */
  #include <linux/bpf.h>
699c86d6e   Yonghong Song   bpf: btf: add pre...
6
  #include <linux/btf.h>
0f8e4bd8a   Alexei Starovoitov   bpf: add hashtabl...
7
8
  #include <linux/jhash.h>
  #include <linux/filter.h>
4fe843590   Alexei Starovoitov   bpf: convert htab...
9
  #include <linux/rculist_nulls.h>
c02034757   Daniel Borkmann   bpf: use per htab...
10
  #include <linux/random.h>
699c86d6e   Yonghong Song   bpf: btf: add pre...
11
  #include <uapi/linux/btf.h>
1e6c62a88   Alexei Starovoitov   bpf: Introduce sl...
12
  #include <linux/rcupdate_trace.h>
6c9059817   Alexei Starovoitov   bpf: pre-allocate...
13
  #include "percpu_freelist.h"
29ba732ac   Martin KaFai Lau   bpf: Add BPF_MAP_...
14
  #include "bpf_lru_list.h"
bcc6b1b7e   Martin KaFai Lau   bpf: Add hash of ...
15
  #include "map_in_map.h"
0f8e4bd8a   Alexei Starovoitov   bpf: add hashtabl...
16

6e71b04a8   Chenbo Feng   bpf: Add file mod...
17
18
  #define HTAB_CREATE_FLAG_MASK						\
  	(BPF_F_NO_PREALLOC | BPF_F_NO_COMMON_LRU | BPF_F_NUMA_NODE |	\
591fe9888   Daniel Borkmann   bpf: add program ...
19
  	 BPF_F_ACCESS_MASK | BPF_F_ZERO_SEED)
96eabe7a4   Martin KaFai Lau   bpf: Allow select...
20

057996380   Yonghong Song   bpf: Add batch op...
21
22
23
24
25
26
27
28
29
  #define BATCH_OPS(_name)			\
  	.map_lookup_batch =			\
  	_name##_map_lookup_batch,		\
  	.map_lookup_and_delete_batch =		\
  	_name##_map_lookup_and_delete_batch,	\
  	.map_update_batch =			\
  	generic_map_update_batch,		\
  	.map_delete_batch =			\
  	generic_map_delete_batch
dbca151ca   Thomas Gleixner   bpf: Update locki...
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
  /*
   * The bucket lock has two protection scopes:
   *
   * 1) Serializing concurrent operations from BPF programs on differrent
   *    CPUs
   *
   * 2) Serializing concurrent operations from BPF programs and sys_bpf()
   *
   * BPF programs can execute in any context including perf, kprobes and
   * tracing. As there are almost no limits where perf, kprobes and tracing
   * can be invoked from the lock operations need to be protected against
   * deadlocks. Deadlocks can be caused by recursion and by an invocation in
   * the lock held section when functions which acquire this lock are invoked
   * from sys_bpf(). BPF recursion is prevented by incrementing the per CPU
   * variable bpf_prog_active, which prevents BPF programs attached to perf
   * events, kprobes and tracing to be invoked before the prior invocation
   * from one of these contexts completed. sys_bpf() uses the same mechanism
   * by pinning the task to the current CPU and incrementing the recursion
   * protection accross the map operation.
7f805d17f   Thomas Gleixner   bpf: Prepare hash...
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
   *
   * This has subtle implications on PREEMPT_RT. PREEMPT_RT forbids certain
   * operations like memory allocations (even with GFP_ATOMIC) from atomic
   * contexts. This is required because even with GFP_ATOMIC the memory
   * allocator calls into code pathes which acquire locks with long held lock
   * sections. To ensure the deterministic behaviour these locks are regular
   * spinlocks, which are converted to 'sleepable' spinlocks on RT. The only
   * true atomic contexts on an RT kernel are the low level hardware
   * handling, scheduling, low level interrupt handling, NMIs etc. None of
   * these contexts should ever do memory allocations.
   *
   * As regular device interrupt handlers and soft interrupts are forced into
   * thread context, the existing code which does
   *   spin_lock*(); alloc(GPF_ATOMIC); spin_unlock*();
   * just works.
   *
   * In theory the BPF locks could be converted to regular spinlocks as well,
   * but the bucket locks and percpu_freelist locks can be taken from
   * arbitrary contexts (perf, kprobes, tracepoints) which are required to be
   * atomic contexts even on RT. These mechanisms require preallocated maps,
   * so there is no need to invoke memory allocations within the lock held
   * sections.
   *
   * BPF maps which need dynamic allocation are only used from (forced)
   * thread context on RT and can therefore use regular spinlocks which in
   * turn allows to invoke memory allocations from the lock held section.
   *
   * On a non RT kernel this distinction is neither possible nor required.
   * spinlock maps to raw_spinlock and the extra code is optimized out by the
   * compiler.
dbca151ca   Thomas Gleixner   bpf: Update locki...
79
   */
688ecfe60   tom.leiming@gmail.com   bpf: hash: use pe...
80
  struct bucket {
4fe843590   Alexei Starovoitov   bpf: convert htab...
81
  	struct hlist_nulls_head head;
7f805d17f   Thomas Gleixner   bpf: Prepare hash...
82
83
84
85
  	union {
  		raw_spinlock_t raw_lock;
  		spinlock_t     lock;
  	};
688ecfe60   tom.leiming@gmail.com   bpf: hash: use pe...
86
  };
0f8e4bd8a   Alexei Starovoitov   bpf: add hashtabl...
87
88
  struct bpf_htab {
  	struct bpf_map map;
688ecfe60   tom.leiming@gmail.com   bpf: hash: use pe...
89
  	struct bucket *buckets;
6c9059817   Alexei Starovoitov   bpf: pre-allocate...
90
  	void *elems;
29ba732ac   Martin KaFai Lau   bpf: Add BPF_MAP_...
91
92
93
94
  	union {
  		struct pcpu_freelist freelist;
  		struct bpf_lru lru;
  	};
8c290e60f   Alexei Starovoitov   bpf: fix hashmap ...
95
  	struct htab_elem *__percpu *extra_elems;
6591f1e66   tom.leiming@gmail.com   bpf: hash: use at...
96
  	atomic_t count;	/* number of elements in this hashtable */
0f8e4bd8a   Alexei Starovoitov   bpf: add hashtabl...
97
98
  	u32 n_buckets;	/* number of hash buckets */
  	u32 elem_size;	/* size of each element in bytes */
c02034757   Daniel Borkmann   bpf: use per htab...
99
  	u32 hashrnd;
0f8e4bd8a   Alexei Starovoitov   bpf: add hashtabl...
100
101
102
103
  };
  
  /* each htab element is struct htab_elem + key + value */
  struct htab_elem {
824bd0ce6   Alexei Starovoitov   bpf: introduce BP...
104
  	union {
4fe843590   Alexei Starovoitov   bpf: convert htab...
105
  		struct hlist_nulls_node hash_node;
9f691549f   Alexei Starovoitov   bpf: fix struct h...
106
107
108
109
110
  		struct {
  			void *padding;
  			union {
  				struct bpf_htab *htab;
  				struct pcpu_freelist_node fnode;
b9aff38de   Yonghong Song   bpf: Fix a potent...
111
  				struct htab_elem *batch_flink;
9f691549f   Alexei Starovoitov   bpf: fix struct h...
112
113
  			};
  		};
824bd0ce6   Alexei Starovoitov   bpf: introduce BP...
114
  	};
a6ed3ea65   Alexei Starovoitov   bpf: restore beha...
115
116
  	union {
  		struct rcu_head rcu;
29ba732ac   Martin KaFai Lau   bpf: Add BPF_MAP_...
117
  		struct bpf_lru_node lru_node;
a6ed3ea65   Alexei Starovoitov   bpf: restore beha...
118
  	};
6c9059817   Alexei Starovoitov   bpf: pre-allocate...
119
  	u32 hash;
d7f10df86   Gustavo A. R. Silva   bpf: Replace zero...
120
  	char key[] __aligned(8);
0f8e4bd8a   Alexei Starovoitov   bpf: add hashtabl...
121
  };
7f805d17f   Thomas Gleixner   bpf: Prepare hash...
122
123
124
125
126
127
128
129
130
  static inline bool htab_is_prealloc(const struct bpf_htab *htab)
  {
  	return !(htab->map.map_flags & BPF_F_NO_PREALLOC);
  }
  
  static inline bool htab_use_raw_lock(const struct bpf_htab *htab)
  {
  	return (!IS_ENABLED(CONFIG_PREEMPT_RT) || htab_is_prealloc(htab));
  }
d01f9b198   Thomas Gleixner   bpf: Factor out h...
131
132
133
134
135
136
  static void htab_init_buckets(struct bpf_htab *htab)
  {
  	unsigned i;
  
  	for (i = 0; i < htab->n_buckets; i++) {
  		INIT_HLIST_NULLS_HEAD(&htab->buckets[i].head, i);
7f805d17f   Thomas Gleixner   bpf: Prepare hash...
137
138
139
140
  		if (htab_use_raw_lock(htab))
  			raw_spin_lock_init(&htab->buckets[i].raw_lock);
  		else
  			spin_lock_init(&htab->buckets[i].lock);
d01f9b198   Thomas Gleixner   bpf: Factor out h...
141
142
143
144
145
146
147
  	}
  }
  
  static inline unsigned long htab_lock_bucket(const struct bpf_htab *htab,
  					     struct bucket *b)
  {
  	unsigned long flags;
7f805d17f   Thomas Gleixner   bpf: Prepare hash...
148
149
150
151
  	if (htab_use_raw_lock(htab))
  		raw_spin_lock_irqsave(&b->raw_lock, flags);
  	else
  		spin_lock_irqsave(&b->lock, flags);
d01f9b198   Thomas Gleixner   bpf: Factor out h...
152
153
154
155
156
157
158
  	return flags;
  }
  
  static inline void htab_unlock_bucket(const struct bpf_htab *htab,
  				      struct bucket *b,
  				      unsigned long flags)
  {
7f805d17f   Thomas Gleixner   bpf: Prepare hash...
159
160
161
162
  	if (htab_use_raw_lock(htab))
  		raw_spin_unlock_irqrestore(&b->raw_lock, flags);
  	else
  		spin_unlock_irqrestore(&b->lock, flags);
d01f9b198   Thomas Gleixner   bpf: Factor out h...
163
  }
29ba732ac   Martin KaFai Lau   bpf: Add BPF_MAP_...
164
165
166
167
  static bool htab_lru_map_delete_node(void *arg, struct bpf_lru_node *node);
  
  static bool htab_is_lru(const struct bpf_htab *htab)
  {
8f8449384   Martin KaFai Lau   bpf: Add BPF_MAP_...
168
169
170
171
172
173
174
175
  	return htab->map.map_type == BPF_MAP_TYPE_LRU_HASH ||
  		htab->map.map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH;
  }
  
  static bool htab_is_percpu(const struct bpf_htab *htab)
  {
  	return htab->map.map_type == BPF_MAP_TYPE_PERCPU_HASH ||
  		htab->map.map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH;
29ba732ac   Martin KaFai Lau   bpf: Add BPF_MAP_...
176
  }
6c9059817   Alexei Starovoitov   bpf: pre-allocate...
177
178
179
180
181
182
183
184
185
186
  static inline void htab_elem_set_ptr(struct htab_elem *l, u32 key_size,
  				     void __percpu *pptr)
  {
  	*(void __percpu **)(l->key + key_size) = pptr;
  }
  
  static inline void __percpu *htab_elem_get_ptr(struct htab_elem *l, u32 key_size)
  {
  	return *(void __percpu **)(l->key + key_size);
  }
bcc6b1b7e   Martin KaFai Lau   bpf: Add hash of ...
187
188
189
190
  static void *fd_htab_map_get_ptr(const struct bpf_map *map, struct htab_elem *l)
  {
  	return *(void **)(l->key + roundup(map->key_size, 8));
  }
6c9059817   Alexei Starovoitov   bpf: pre-allocate...
191
192
193
194
195
196
197
198
  static struct htab_elem *get_htab_elem(struct bpf_htab *htab, int i)
  {
  	return (struct htab_elem *) (htab->elems + i * htab->elem_size);
  }
  
  static void htab_free_elems(struct bpf_htab *htab)
  {
  	int i;
8f8449384   Martin KaFai Lau   bpf: Add BPF_MAP_...
199
  	if (!htab_is_percpu(htab))
6c9059817   Alexei Starovoitov   bpf: pre-allocate...
200
201
202
203
204
205
206
207
  		goto free_elems;
  
  	for (i = 0; i < htab->map.max_entries; i++) {
  		void __percpu *pptr;
  
  		pptr = htab_elem_get_ptr(get_htab_elem(htab, i),
  					 htab->map.key_size);
  		free_percpu(pptr);
9147efcbe   Eric Dumazet   bpf: add schedule...
208
  		cond_resched();
6c9059817   Alexei Starovoitov   bpf: pre-allocate...
209
210
  	}
  free_elems:
d407bd25a   Daniel Borkmann   bpf: don't trigge...
211
  	bpf_map_area_free(htab->elems);
6c9059817   Alexei Starovoitov   bpf: pre-allocate...
212
  }
b9aff38de   Yonghong Song   bpf: Fix a potent...
213
214
215
216
217
218
219
220
221
222
223
  /* The LRU list has a lock (lru_lock). Each htab bucket has a lock
   * (bucket_lock). If both locks need to be acquired together, the lock
   * order is always lru_lock -> bucket_lock and this only happens in
   * bpf_lru_list.c logic. For example, certain code path of
   * bpf_lru_pop_free(), which is called by function prealloc_lru_pop(),
   * will acquire lru_lock first followed by acquiring bucket_lock.
   *
   * In hashtab.c, to avoid deadlock, lock acquisition of
   * bucket_lock followed by lru_lock is not allowed. In such cases,
   * bucket_lock needs to be released first before acquiring lru_lock.
   */
29ba732ac   Martin KaFai Lau   bpf: Add BPF_MAP_...
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
  static struct htab_elem *prealloc_lru_pop(struct bpf_htab *htab, void *key,
  					  u32 hash)
  {
  	struct bpf_lru_node *node = bpf_lru_pop_free(&htab->lru, hash);
  	struct htab_elem *l;
  
  	if (node) {
  		l = container_of(node, struct htab_elem, lru_node);
  		memcpy(l->key, key, htab->map.key_size);
  		return l;
  	}
  
  	return NULL;
  }
  
  static int prealloc_init(struct bpf_htab *htab)
6c9059817   Alexei Starovoitov   bpf: pre-allocate...
240
  {
8c290e60f   Alexei Starovoitov   bpf: fix hashmap ...
241
  	u32 num_entries = htab->map.max_entries;
6c9059817   Alexei Starovoitov   bpf: pre-allocate...
242
  	int err = -ENOMEM, i;
8c290e60f   Alexei Starovoitov   bpf: fix hashmap ...
243
244
  	if (!htab_is_percpu(htab) && !htab_is_lru(htab))
  		num_entries += num_possible_cpus();
96eabe7a4   Martin KaFai Lau   bpf: Allow select...
245
246
  	htab->elems = bpf_map_area_alloc(htab->elem_size * num_entries,
  					 htab->map.numa_node);
6c9059817   Alexei Starovoitov   bpf: pre-allocate...
247
248
  	if (!htab->elems)
  		return -ENOMEM;
8f8449384   Martin KaFai Lau   bpf: Add BPF_MAP_...
249
  	if (!htab_is_percpu(htab))
6c9059817   Alexei Starovoitov   bpf: pre-allocate...
250
  		goto skip_percpu_elems;
8c290e60f   Alexei Starovoitov   bpf: fix hashmap ...
251
  	for (i = 0; i < num_entries; i++) {
6c9059817   Alexei Starovoitov   bpf: pre-allocate...
252
253
254
255
256
257
258
259
  		u32 size = round_up(htab->map.value_size, 8);
  		void __percpu *pptr;
  
  		pptr = __alloc_percpu_gfp(size, 8, GFP_USER | __GFP_NOWARN);
  		if (!pptr)
  			goto free_elems;
  		htab_elem_set_ptr(get_htab_elem(htab, i), htab->map.key_size,
  				  pptr);
9147efcbe   Eric Dumazet   bpf: add schedule...
260
  		cond_resched();
6c9059817   Alexei Starovoitov   bpf: pre-allocate...
261
262
263
  	}
  
  skip_percpu_elems:
29ba732ac   Martin KaFai Lau   bpf: Add BPF_MAP_...
264
265
266
267
268
269
270
271
272
  	if (htab_is_lru(htab))
  		err = bpf_lru_init(&htab->lru,
  				   htab->map.map_flags & BPF_F_NO_COMMON_LRU,
  				   offsetof(struct htab_elem, hash) -
  				   offsetof(struct htab_elem, lru_node),
  				   htab_lru_map_delete_node,
  				   htab);
  	else
  		err = pcpu_freelist_init(&htab->freelist);
6c9059817   Alexei Starovoitov   bpf: pre-allocate...
273
274
  	if (err)
  		goto free_elems;
29ba732ac   Martin KaFai Lau   bpf: Add BPF_MAP_...
275
276
277
  	if (htab_is_lru(htab))
  		bpf_lru_populate(&htab->lru, htab->elems,
  				 offsetof(struct htab_elem, lru_node),
8c290e60f   Alexei Starovoitov   bpf: fix hashmap ...
278
  				 htab->elem_size, num_entries);
29ba732ac   Martin KaFai Lau   bpf: Add BPF_MAP_...
279
  	else
9f691549f   Alexei Starovoitov   bpf: fix struct h...
280
281
  		pcpu_freelist_populate(&htab->freelist,
  				       htab->elems + offsetof(struct htab_elem, fnode),
8c290e60f   Alexei Starovoitov   bpf: fix hashmap ...
282
  				       htab->elem_size, num_entries);
29ba732ac   Martin KaFai Lau   bpf: Add BPF_MAP_...
283

6c9059817   Alexei Starovoitov   bpf: pre-allocate...
284
285
286
287
288
289
  	return 0;
  
  free_elems:
  	htab_free_elems(htab);
  	return err;
  }
29ba732ac   Martin KaFai Lau   bpf: Add BPF_MAP_...
290
291
292
293
294
295
296
297
298
  static void prealloc_destroy(struct bpf_htab *htab)
  {
  	htab_free_elems(htab);
  
  	if (htab_is_lru(htab))
  		bpf_lru_destroy(&htab->lru);
  	else
  		pcpu_freelist_destroy(&htab->freelist);
  }
a6ed3ea65   Alexei Starovoitov   bpf: restore beha...
299
300
  static int alloc_extra_elems(struct bpf_htab *htab)
  {
8c290e60f   Alexei Starovoitov   bpf: fix hashmap ...
301
302
  	struct htab_elem *__percpu *pptr, *l_new;
  	struct pcpu_freelist_node *l;
a6ed3ea65   Alexei Starovoitov   bpf: restore beha...
303
  	int cpu;
8c290e60f   Alexei Starovoitov   bpf: fix hashmap ...
304
305
  	pptr = __alloc_percpu_gfp(sizeof(struct htab_elem *), 8,
  				  GFP_USER | __GFP_NOWARN);
a6ed3ea65   Alexei Starovoitov   bpf: restore beha...
306
307
308
309
  	if (!pptr)
  		return -ENOMEM;
  
  	for_each_possible_cpu(cpu) {
8c290e60f   Alexei Starovoitov   bpf: fix hashmap ...
310
311
312
313
314
315
  		l = pcpu_freelist_pop(&htab->freelist);
  		/* pop will succeed, since prealloc_init()
  		 * preallocated extra num_possible_cpus elements
  		 */
  		l_new = container_of(l, struct htab_elem, fnode);
  		*per_cpu_ptr(pptr, cpu) = l_new;
a6ed3ea65   Alexei Starovoitov   bpf: restore beha...
316
317
318
319
  	}
  	htab->extra_elems = pptr;
  	return 0;
  }
0f8e4bd8a   Alexei Starovoitov   bpf: add hashtabl...
320
  /* Called from syscall */
9328e0d1b   Jakub Kicinski   bpf: hashtab: mov...
321
  static int htab_map_alloc_check(union bpf_attr *attr)
0f8e4bd8a   Alexei Starovoitov   bpf: add hashtabl...
322
  {
8f8449384   Martin KaFai Lau   bpf: Add BPF_MAP_...
323
324
325
326
  	bool percpu = (attr->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
  		       attr->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH);
  	bool lru = (attr->map_type == BPF_MAP_TYPE_LRU_HASH ||
  		    attr->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH);
29ba732ac   Martin KaFai Lau   bpf: Add BPF_MAP_...
327
328
329
330
331
332
333
  	/* percpu_lru means each cpu has its own LRU list.
  	 * it is different from BPF_MAP_TYPE_PERCPU_HASH where
  	 * the map's value itself is percpu.  percpu_lru has
  	 * nothing to do with the map's value.
  	 */
  	bool percpu_lru = (attr->map_flags & BPF_F_NO_COMMON_LRU);
  	bool prealloc = !(attr->map_flags & BPF_F_NO_PREALLOC);
96b3b6c90   Lorenz Bauer   bpf: allow zero-i...
334
  	bool zero_seed = (attr->map_flags & BPF_F_ZERO_SEED);
96eabe7a4   Martin KaFai Lau   bpf: Allow select...
335
  	int numa_node = bpf_map_attr_numa_node(attr);
0f8e4bd8a   Alexei Starovoitov   bpf: add hashtabl...
336

9f691549f   Alexei Starovoitov   bpf: fix struct h...
337
338
339
340
  	BUILD_BUG_ON(offsetof(struct htab_elem, htab) !=
  		     offsetof(struct htab_elem, hash_node.pprev));
  	BUILD_BUG_ON(offsetof(struct htab_elem, fnode.next) !=
  		     offsetof(struct htab_elem, hash_node.pprev));
2c78ee898   Alexei Starovoitov   bpf: Implement CA...
341
  	if (lru && !bpf_capable())
29ba732ac   Martin KaFai Lau   bpf: Add BPF_MAP_...
342
  		/* LRU implementation is much complicated than other
2c78ee898   Alexei Starovoitov   bpf: Implement CA...
343
  		 * maps.  Hence, limit to CAP_BPF.
29ba732ac   Martin KaFai Lau   bpf: Add BPF_MAP_...
344
  		 */
9328e0d1b   Jakub Kicinski   bpf: hashtab: mov...
345
  		return -EPERM;
29ba732ac   Martin KaFai Lau   bpf: Add BPF_MAP_...
346

96b3b6c90   Lorenz Bauer   bpf: allow zero-i...
347
348
349
  	if (zero_seed && !capable(CAP_SYS_ADMIN))
  		/* Guard against local DoS, and discourage production use. */
  		return -EPERM;
591fe9888   Daniel Borkmann   bpf: add program ...
350
351
  	if (attr->map_flags & ~HTAB_CREATE_FLAG_MASK ||
  	    !bpf_map_flags_access_ok(attr->map_flags))
9328e0d1b   Jakub Kicinski   bpf: hashtab: mov...
352
  		return -EINVAL;
6c9059817   Alexei Starovoitov   bpf: pre-allocate...
353

29ba732ac   Martin KaFai Lau   bpf: Add BPF_MAP_...
354
  	if (!lru && percpu_lru)
9328e0d1b   Jakub Kicinski   bpf: hashtab: mov...
355
  		return -EINVAL;
29ba732ac   Martin KaFai Lau   bpf: Add BPF_MAP_...
356
357
  
  	if (lru && !prealloc)
9328e0d1b   Jakub Kicinski   bpf: hashtab: mov...
358
  		return -ENOTSUPP;
29ba732ac   Martin KaFai Lau   bpf: Add BPF_MAP_...
359

96eabe7a4   Martin KaFai Lau   bpf: Allow select...
360
  	if (numa_node != NUMA_NO_NODE && (percpu || percpu_lru))
9328e0d1b   Jakub Kicinski   bpf: hashtab: mov...
361
  		return -EINVAL;
96eabe7a4   Martin KaFai Lau   bpf: Allow select...
362

daffc5a2e   Jakub Kicinski   bpf: hashtab: mov...
363
364
365
366
367
  	/* check sanity of attributes.
  	 * value_size == 0 may be allowed in the future to use map as a set
  	 */
  	if (attr->max_entries == 0 || attr->key_size == 0 ||
  	    attr->value_size == 0)
9328e0d1b   Jakub Kicinski   bpf: hashtab: mov...
368
  		return -EINVAL;
daffc5a2e   Jakub Kicinski   bpf: hashtab: mov...
369
370
371
372
373
  
  	if (attr->key_size > MAX_BPF_STACK)
  		/* eBPF programs initialize keys on stack, so they cannot be
  		 * larger than max stack size
  		 */
9328e0d1b   Jakub Kicinski   bpf: hashtab: mov...
374
  		return -E2BIG;
daffc5a2e   Jakub Kicinski   bpf: hashtab: mov...
375
376
377
378
379
380
381
382
  
  	if (attr->value_size >= KMALLOC_MAX_SIZE -
  	    MAX_BPF_STACK - sizeof(struct htab_elem))
  		/* if value_size is bigger, the user space won't be able to
  		 * access the elements via bpf syscall. This check also makes
  		 * sure that the elem_size doesn't overflow and it's
  		 * kmalloc-able later in htab_map_update_elem()
  		 */
9328e0d1b   Jakub Kicinski   bpf: hashtab: mov...
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
  		return -E2BIG;
  
  	return 0;
  }
  
  static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
  {
  	bool percpu = (attr->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
  		       attr->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH);
  	bool lru = (attr->map_type == BPF_MAP_TYPE_LRU_HASH ||
  		    attr->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH);
  	/* percpu_lru means each cpu has its own LRU list.
  	 * it is different from BPF_MAP_TYPE_PERCPU_HASH where
  	 * the map's value itself is percpu.  percpu_lru has
  	 * nothing to do with the map's value.
  	 */
  	bool percpu_lru = (attr->map_flags & BPF_F_NO_COMMON_LRU);
  	bool prealloc = !(attr->map_flags & BPF_F_NO_PREALLOC);
9328e0d1b   Jakub Kicinski   bpf: hashtab: mov...
401
  	struct bpf_htab *htab;
9328e0d1b   Jakub Kicinski   bpf: hashtab: mov...
402
  	u64 cost;
d01f9b198   Thomas Gleixner   bpf: Factor out h...
403
  	int err;
daffc5a2e   Jakub Kicinski   bpf: hashtab: mov...
404

0f8e4bd8a   Alexei Starovoitov   bpf: add hashtabl...
405
406
407
  	htab = kzalloc(sizeof(*htab), GFP_USER);
  	if (!htab)
  		return ERR_PTR(-ENOMEM);
bd475643d   Jakub Kicinski   bpf: add helper f...
408
  	bpf_map_init_from_attr(&htab->map, attr);
0f8e4bd8a   Alexei Starovoitov   bpf: add hashtabl...
409

29ba732ac   Martin KaFai Lau   bpf: Add BPF_MAP_...
410
411
412
413
414
415
416
417
418
419
420
  	if (percpu_lru) {
  		/* ensure each CPU's lru list has >=1 elements.
  		 * since we are at it, make each lru list has the same
  		 * number of elements.
  		 */
  		htab->map.max_entries = roundup(attr->max_entries,
  						num_possible_cpus());
  		if (htab->map.max_entries < attr->max_entries)
  			htab->map.max_entries = rounddown(attr->max_entries,
  							  num_possible_cpus());
  	}
0f8e4bd8a   Alexei Starovoitov   bpf: add hashtabl...
421
422
  	/* hash table size must be power of 2 */
  	htab->n_buckets = roundup_pow_of_two(htab->map.max_entries);
01b3f5215   Alexei Starovoitov   bpf: fix allocati...
423
  	htab->elem_size = sizeof(struct htab_elem) +
824bd0ce6   Alexei Starovoitov   bpf: introduce BP...
424
425
426
427
  			  round_up(htab->map.key_size, 8);
  	if (percpu)
  		htab->elem_size += sizeof(void *);
  	else
6c9059817   Alexei Starovoitov   bpf: pre-allocate...
428
  		htab->elem_size += round_up(htab->map.value_size, 8);
01b3f5215   Alexei Starovoitov   bpf: fix allocati...
429

daffc5a2e   Jakub Kicinski   bpf: hashtab: mov...
430
  	err = -E2BIG;
daaf427c6   Alexei Starovoitov   bpf: fix arraymap...
431
432
  	/* prevent zero size kmalloc and check for u32 overflow */
  	if (htab->n_buckets == 0 ||
688ecfe60   tom.leiming@gmail.com   bpf: hash: use pe...
433
  	    htab->n_buckets > U32_MAX / sizeof(struct bucket))
daaf427c6   Alexei Starovoitov   bpf: fix arraymap...
434
  		goto free_htab;
824bd0ce6   Alexei Starovoitov   bpf: introduce BP...
435
436
437
438
439
440
  	cost = (u64) htab->n_buckets * sizeof(struct bucket) +
  	       (u64) htab->elem_size * htab->map.max_entries;
  
  	if (percpu)
  		cost += (u64) round_up(htab->map.value_size, 8) *
  			num_possible_cpus() * htab->map.max_entries;
a6ed3ea65   Alexei Starovoitov   bpf: restore beha...
441
442
  	else
  	       cost += (u64) htab->elem_size * num_possible_cpus();
824bd0ce6   Alexei Starovoitov   bpf: introduce BP...
443

b936ca643   Roman Gushchin   bpf: rework memlo...
444
  	/* if map size is larger than memlock limit, reject it */
c85d69135   Roman Gushchin   bpf: move memory ...
445
  	err = bpf_map_charge_init(&htab->map.memory, cost);
6c9059817   Alexei Starovoitov   bpf: pre-allocate...
446
447
  	if (err)
  		goto free_htab;
01b3f5215   Alexei Starovoitov   bpf: fix allocati...
448
  	err = -ENOMEM;
d407bd25a   Daniel Borkmann   bpf: don't trigge...
449
  	htab->buckets = bpf_map_area_alloc(htab->n_buckets *
96eabe7a4   Martin KaFai Lau   bpf: Allow select...
450
451
  					   sizeof(struct bucket),
  					   htab->map.numa_node);
d407bd25a   Daniel Borkmann   bpf: don't trigge...
452
  	if (!htab->buckets)
b936ca643   Roman Gushchin   bpf: rework memlo...
453
  		goto free_charge;
0f8e4bd8a   Alexei Starovoitov   bpf: add hashtabl...
454

96b3b6c90   Lorenz Bauer   bpf: allow zero-i...
455
456
457
458
  	if (htab->map.map_flags & BPF_F_ZERO_SEED)
  		htab->hashrnd = 0;
  	else
  		htab->hashrnd = get_random_int();
d01f9b198   Thomas Gleixner   bpf: Factor out h...
459
  	htab_init_buckets(htab);
0f8e4bd8a   Alexei Starovoitov   bpf: add hashtabl...
460

29ba732ac   Martin KaFai Lau   bpf: Add BPF_MAP_...
461
462
  	if (prealloc) {
  		err = prealloc_init(htab);
6c9059817   Alexei Starovoitov   bpf: pre-allocate...
463
  		if (err)
8c290e60f   Alexei Starovoitov   bpf: fix hashmap ...
464
465
466
467
468
469
470
471
472
473
  			goto free_buckets;
  
  		if (!percpu && !lru) {
  			/* lru itself can remove the least used element, so
  			 * there is no need for an extra elem during map_update.
  			 */
  			err = alloc_extra_elems(htab);
  			if (err)
  				goto free_prealloc;
  		}
6c9059817   Alexei Starovoitov   bpf: pre-allocate...
474
  	}
0f8e4bd8a   Alexei Starovoitov   bpf: add hashtabl...
475

0f8e4bd8a   Alexei Starovoitov   bpf: add hashtabl...
476
  	return &htab->map;
8c290e60f   Alexei Starovoitov   bpf: fix hashmap ...
477
478
  free_prealloc:
  	prealloc_destroy(htab);
6c9059817   Alexei Starovoitov   bpf: pre-allocate...
479
  free_buckets:
d407bd25a   Daniel Borkmann   bpf: don't trigge...
480
  	bpf_map_area_free(htab->buckets);
b936ca643   Roman Gushchin   bpf: rework memlo...
481
482
  free_charge:
  	bpf_map_charge_finish(&htab->map.memory);
0f8e4bd8a   Alexei Starovoitov   bpf: add hashtabl...
483
484
485
486
  free_htab:
  	kfree(htab);
  	return ERR_PTR(err);
  }
c02034757   Daniel Borkmann   bpf: use per htab...
487
  static inline u32 htab_map_hash(const void *key, u32 key_len, u32 hashrnd)
0f8e4bd8a   Alexei Starovoitov   bpf: add hashtabl...
488
  {
c02034757   Daniel Borkmann   bpf: use per htab...
489
  	return jhash(key, key_len, hashrnd);
0f8e4bd8a   Alexei Starovoitov   bpf: add hashtabl...
490
  }
688ecfe60   tom.leiming@gmail.com   bpf: hash: use pe...
491
  static inline struct bucket *__select_bucket(struct bpf_htab *htab, u32 hash)
0f8e4bd8a   Alexei Starovoitov   bpf: add hashtabl...
492
493
494
  {
  	return &htab->buckets[hash & (htab->n_buckets - 1)];
  }
4fe843590   Alexei Starovoitov   bpf: convert htab...
495
  static inline struct hlist_nulls_head *select_bucket(struct bpf_htab *htab, u32 hash)
688ecfe60   tom.leiming@gmail.com   bpf: hash: use pe...
496
497
498
  {
  	return &__select_bucket(htab, hash)->head;
  }
4fe843590   Alexei Starovoitov   bpf: convert htab...
499
500
  /* this lookup function can only be called with bucket lock taken */
  static struct htab_elem *lookup_elem_raw(struct hlist_nulls_head *head, u32 hash,
0f8e4bd8a   Alexei Starovoitov   bpf: add hashtabl...
501
502
  					 void *key, u32 key_size)
  {
4fe843590   Alexei Starovoitov   bpf: convert htab...
503
  	struct hlist_nulls_node *n;
0f8e4bd8a   Alexei Starovoitov   bpf: add hashtabl...
504
  	struct htab_elem *l;
4fe843590   Alexei Starovoitov   bpf: convert htab...
505
  	hlist_nulls_for_each_entry_rcu(l, n, head, hash_node)
0f8e4bd8a   Alexei Starovoitov   bpf: add hashtabl...
506
507
508
509
510
  		if (l->hash == hash && !memcmp(&l->key, key, key_size))
  			return l;
  
  	return NULL;
  }
4fe843590   Alexei Starovoitov   bpf: convert htab...
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
  /* can be called without bucket lock. it will repeat the loop in
   * the unlikely event when elements moved from one bucket into another
   * while link list is being walked
   */
  static struct htab_elem *lookup_nulls_elem_raw(struct hlist_nulls_head *head,
  					       u32 hash, void *key,
  					       u32 key_size, u32 n_buckets)
  {
  	struct hlist_nulls_node *n;
  	struct htab_elem *l;
  
  again:
  	hlist_nulls_for_each_entry_rcu(l, n, head, hash_node)
  		if (l->hash == hash && !memcmp(&l->key, key, key_size))
  			return l;
  
  	if (unlikely(get_nulls_value(n) != (hash & (n_buckets - 1))))
  		goto again;
  
  	return NULL;
  }
9015d2f59   Alexei Starovoitov   bpf: inline htab_...
532
533
534
535
536
  /* Called from syscall or from eBPF program directly, so
   * arguments have to match bpf_map_lookup_elem() exactly.
   * The return value is adjusted by BPF instructions
   * in htab_map_gen_lookup().
   */
824bd0ce6   Alexei Starovoitov   bpf: introduce BP...
537
  static void *__htab_map_lookup_elem(struct bpf_map *map, void *key)
0f8e4bd8a   Alexei Starovoitov   bpf: add hashtabl...
538
539
  {
  	struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
4fe843590   Alexei Starovoitov   bpf: convert htab...
540
  	struct hlist_nulls_head *head;
0f8e4bd8a   Alexei Starovoitov   bpf: add hashtabl...
541
542
  	struct htab_elem *l;
  	u32 hash, key_size;
1e6c62a88   Alexei Starovoitov   bpf: Introduce sl...
543
  	WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held());
0f8e4bd8a   Alexei Starovoitov   bpf: add hashtabl...
544
545
  
  	key_size = map->key_size;
c02034757   Daniel Borkmann   bpf: use per htab...
546
  	hash = htab_map_hash(key, key_size, htab->hashrnd);
0f8e4bd8a   Alexei Starovoitov   bpf: add hashtabl...
547
548
  
  	head = select_bucket(htab, hash);
4fe843590   Alexei Starovoitov   bpf: convert htab...
549
  	l = lookup_nulls_elem_raw(head, hash, key, key_size, htab->n_buckets);
0f8e4bd8a   Alexei Starovoitov   bpf: add hashtabl...
550

824bd0ce6   Alexei Starovoitov   bpf: introduce BP...
551
552
553
554
555
556
  	return l;
  }
  
  static void *htab_map_lookup_elem(struct bpf_map *map, void *key)
  {
  	struct htab_elem *l = __htab_map_lookup_elem(map, key);
0f8e4bd8a   Alexei Starovoitov   bpf: add hashtabl...
557
558
559
560
561
  	if (l)
  		return l->key + round_up(map->key_size, 8);
  
  	return NULL;
  }
9015d2f59   Alexei Starovoitov   bpf: inline htab_...
562
563
564
565
566
567
568
569
570
571
572
  /* inline bpf_map_lookup_elem() call.
   * Instead of:
   * bpf_prog
   *   bpf_map_lookup_elem
   *     map->ops->map_lookup_elem
   *       htab_map_lookup_elem
   *         __htab_map_lookup_elem
   * do:
   * bpf_prog
   *   __htab_map_lookup_elem
   */
4a8f87e60   Daniel Borkmann   bpf: Allow for ma...
573
  static int htab_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf)
9015d2f59   Alexei Starovoitov   bpf: inline htab_...
574
575
576
  {
  	struct bpf_insn *insn = insn_buf;
  	const int ret = BPF_REG_0;
09772d92c   Daniel Borkmann   bpf: avoid retpol...
577
578
579
  	BUILD_BUG_ON(!__same_type(&__htab_map_lookup_elem,
  		     (void *(*)(struct bpf_map *map, void *key))NULL));
  	*insn++ = BPF_EMIT_CALL(BPF_CAST_CALL(__htab_map_lookup_elem));
9015d2f59   Alexei Starovoitov   bpf: inline htab_...
580
581
582
583
584
585
  	*insn++ = BPF_JMP_IMM(BPF_JEQ, ret, 0, 1);
  	*insn++ = BPF_ALU64_IMM(BPF_ADD, ret,
  				offsetof(struct htab_elem, key) +
  				round_up(map->key_size, 8));
  	return insn - insn_buf;
  }
50b045a8c   Daniel Borkmann   bpf, lru: avoid m...
586
587
  static __always_inline void *__htab_lru_map_lookup_elem(struct bpf_map *map,
  							void *key, const bool mark)
29ba732ac   Martin KaFai Lau   bpf: Add BPF_MAP_...
588
589
590
591
  {
  	struct htab_elem *l = __htab_map_lookup_elem(map, key);
  
  	if (l) {
50b045a8c   Daniel Borkmann   bpf, lru: avoid m...
592
593
  		if (mark)
  			bpf_lru_node_set_ref(&l->lru_node);
29ba732ac   Martin KaFai Lau   bpf: Add BPF_MAP_...
594
595
596
597
598
  		return l->key + round_up(map->key_size, 8);
  	}
  
  	return NULL;
  }
50b045a8c   Daniel Borkmann   bpf, lru: avoid m...
599
600
601
602
603
604
605
606
607
  static void *htab_lru_map_lookup_elem(struct bpf_map *map, void *key)
  {
  	return __htab_lru_map_lookup_elem(map, key, true);
  }
  
  static void *htab_lru_map_lookup_elem_sys(struct bpf_map *map, void *key)
  {
  	return __htab_lru_map_lookup_elem(map, key, false);
  }
4a8f87e60   Daniel Borkmann   bpf: Allow for ma...
608
  static int htab_lru_map_gen_lookup(struct bpf_map *map,
cc555421b   Martin KaFai Lau   bpf: Inline LRU m...
609
610
611
612
  				   struct bpf_insn *insn_buf)
  {
  	struct bpf_insn *insn = insn_buf;
  	const int ret = BPF_REG_0;
bb9b9f880   Martin KaFai Lau   bpf: Only set nod...
613
  	const int ref_reg = BPF_REG_1;
cc555421b   Martin KaFai Lau   bpf: Inline LRU m...
614

09772d92c   Daniel Borkmann   bpf: avoid retpol...
615
616
617
  	BUILD_BUG_ON(!__same_type(&__htab_map_lookup_elem,
  		     (void *(*)(struct bpf_map *map, void *key))NULL));
  	*insn++ = BPF_EMIT_CALL(BPF_CAST_CALL(__htab_map_lookup_elem));
bb9b9f880   Martin KaFai Lau   bpf: Only set nod...
618
619
620
621
622
  	*insn++ = BPF_JMP_IMM(BPF_JEQ, ret, 0, 4);
  	*insn++ = BPF_LDX_MEM(BPF_B, ref_reg, ret,
  			      offsetof(struct htab_elem, lru_node) +
  			      offsetof(struct bpf_lru_node, ref));
  	*insn++ = BPF_JMP_IMM(BPF_JNE, ref_reg, 0, 1);
cc555421b   Martin KaFai Lau   bpf: Inline LRU m...
623
624
625
626
627
628
629
630
631
  	*insn++ = BPF_ST_MEM(BPF_B, ret,
  			     offsetof(struct htab_elem, lru_node) +
  			     offsetof(struct bpf_lru_node, ref),
  			     1);
  	*insn++ = BPF_ALU64_IMM(BPF_ADD, ret,
  				offsetof(struct htab_elem, key) +
  				round_up(map->key_size, 8));
  	return insn - insn_buf;
  }
29ba732ac   Martin KaFai Lau   bpf: Add BPF_MAP_...
632
633
634
635
636
637
  /* It is called from the bpf_lru_list when the LRU needs to delete
   * older elements from the htab.
   */
  static bool htab_lru_map_delete_node(void *arg, struct bpf_lru_node *node)
  {
  	struct bpf_htab *htab = (struct bpf_htab *)arg;
4fe843590   Alexei Starovoitov   bpf: convert htab...
638
639
640
  	struct htab_elem *l = NULL, *tgt_l;
  	struct hlist_nulls_head *head;
  	struct hlist_nulls_node *n;
29ba732ac   Martin KaFai Lau   bpf: Add BPF_MAP_...
641
642
643
644
645
646
  	unsigned long flags;
  	struct bucket *b;
  
  	tgt_l = container_of(node, struct htab_elem, lru_node);
  	b = __select_bucket(htab, tgt_l->hash);
  	head = &b->head;
d01f9b198   Thomas Gleixner   bpf: Factor out h...
647
  	flags = htab_lock_bucket(htab, b);
29ba732ac   Martin KaFai Lau   bpf: Add BPF_MAP_...
648

4fe843590   Alexei Starovoitov   bpf: convert htab...
649
  	hlist_nulls_for_each_entry_rcu(l, n, head, hash_node)
29ba732ac   Martin KaFai Lau   bpf: Add BPF_MAP_...
650
  		if (l == tgt_l) {
4fe843590   Alexei Starovoitov   bpf: convert htab...
651
  			hlist_nulls_del_rcu(&l->hash_node);
29ba732ac   Martin KaFai Lau   bpf: Add BPF_MAP_...
652
653
  			break;
  		}
d01f9b198   Thomas Gleixner   bpf: Factor out h...
654
  	htab_unlock_bucket(htab, b, flags);
29ba732ac   Martin KaFai Lau   bpf: Add BPF_MAP_...
655
656
657
  
  	return l == tgt_l;
  }
0f8e4bd8a   Alexei Starovoitov   bpf: add hashtabl...
658
659
660
661
  /* Called from syscall */
  static int htab_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
  {
  	struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
4fe843590   Alexei Starovoitov   bpf: convert htab...
662
  	struct hlist_nulls_head *head;
0f8e4bd8a   Alexei Starovoitov   bpf: add hashtabl...
663
664
  	struct htab_elem *l, *next_l;
  	u32 hash, key_size;
8fe459243   Teng Qin   bpf: map_get_next...
665
  	int i = 0;
0f8e4bd8a   Alexei Starovoitov   bpf: add hashtabl...
666
667
668
669
  
  	WARN_ON_ONCE(!rcu_read_lock_held());
  
  	key_size = map->key_size;
8fe459243   Teng Qin   bpf: map_get_next...
670
671
  	if (!key)
  		goto find_first_elem;
c02034757   Daniel Borkmann   bpf: use per htab...
672
  	hash = htab_map_hash(key, key_size, htab->hashrnd);
0f8e4bd8a   Alexei Starovoitov   bpf: add hashtabl...
673
674
675
676
  
  	head = select_bucket(htab, hash);
  
  	/* lookup the key */
4fe843590   Alexei Starovoitov   bpf: convert htab...
677
  	l = lookup_nulls_elem_raw(head, hash, key, key_size, htab->n_buckets);
0f8e4bd8a   Alexei Starovoitov   bpf: add hashtabl...
678

8fe459243   Teng Qin   bpf: map_get_next...
679
  	if (!l)
0f8e4bd8a   Alexei Starovoitov   bpf: add hashtabl...
680
  		goto find_first_elem;
0f8e4bd8a   Alexei Starovoitov   bpf: add hashtabl...
681
682
  
  	/* key was found, get next key in the same bucket */
4fe843590   Alexei Starovoitov   bpf: convert htab...
683
  	next_l = hlist_nulls_entry_safe(rcu_dereference_raw(hlist_nulls_next_rcu(&l->hash_node)),
0f8e4bd8a   Alexei Starovoitov   bpf: add hashtabl...
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
  				  struct htab_elem, hash_node);
  
  	if (next_l) {
  		/* if next elem in this hash list is non-zero, just return it */
  		memcpy(next_key, next_l->key, key_size);
  		return 0;
  	}
  
  	/* no more elements in this hash list, go to the next bucket */
  	i = hash & (htab->n_buckets - 1);
  	i++;
  
  find_first_elem:
  	/* iterate over buckets */
  	for (; i < htab->n_buckets; i++) {
  		head = select_bucket(htab, i);
  
  		/* pick first element in the bucket */
4fe843590   Alexei Starovoitov   bpf: convert htab...
702
  		next_l = hlist_nulls_entry_safe(rcu_dereference_raw(hlist_nulls_first_rcu(head)),
0f8e4bd8a   Alexei Starovoitov   bpf: add hashtabl...
703
704
705
706
707
708
709
  					  struct htab_elem, hash_node);
  		if (next_l) {
  			/* if it's not empty, just return it */
  			memcpy(next_key, next_l->key, key_size);
  			return 0;
  		}
  	}
6c9059817   Alexei Starovoitov   bpf: pre-allocate...
710
  	/* iterated over all buckets and all elements */
0f8e4bd8a   Alexei Starovoitov   bpf: add hashtabl...
711
712
  	return -ENOENT;
  }
6c9059817   Alexei Starovoitov   bpf: pre-allocate...
713
  static void htab_elem_free(struct bpf_htab *htab, struct htab_elem *l)
824bd0ce6   Alexei Starovoitov   bpf: introduce BP...
714
  {
6c9059817   Alexei Starovoitov   bpf: pre-allocate...
715
716
  	if (htab->map.map_type == BPF_MAP_TYPE_PERCPU_HASH)
  		free_percpu(htab_elem_get_ptr(l, htab->map.key_size));
824bd0ce6   Alexei Starovoitov   bpf: introduce BP...
717
718
  	kfree(l);
  }
6c9059817   Alexei Starovoitov   bpf: pre-allocate...
719
  static void htab_elem_free_rcu(struct rcu_head *head)
824bd0ce6   Alexei Starovoitov   bpf: introduce BP...
720
721
  {
  	struct htab_elem *l = container_of(head, struct htab_elem, rcu);
6c9059817   Alexei Starovoitov   bpf: pre-allocate...
722
  	struct bpf_htab *htab = l->htab;
824bd0ce6   Alexei Starovoitov   bpf: introduce BP...
723

6c9059817   Alexei Starovoitov   bpf: pre-allocate...
724
  	htab_elem_free(htab, l);
824bd0ce6   Alexei Starovoitov   bpf: introduce BP...
725
  }
1d4e1eab4   Andrii Nakryiko   bpf: Fix map leak...
726
  static void htab_put_fd_value(struct bpf_htab *htab, struct htab_elem *l)
824bd0ce6   Alexei Starovoitov   bpf: introduce BP...
727
  {
bcc6b1b7e   Martin KaFai Lau   bpf: Add hash of ...
728
  	struct bpf_map *map = &htab->map;
1d4e1eab4   Andrii Nakryiko   bpf: Fix map leak...
729
  	void *ptr;
bcc6b1b7e   Martin KaFai Lau   bpf: Add hash of ...
730
731
  
  	if (map->ops->map_fd_put_ptr) {
1d4e1eab4   Andrii Nakryiko   bpf: Fix map leak...
732
  		ptr = fd_htab_map_get_ptr(map, l);
bcc6b1b7e   Martin KaFai Lau   bpf: Add hash of ...
733
734
  		map->ops->map_fd_put_ptr(ptr);
  	}
1d4e1eab4   Andrii Nakryiko   bpf: Fix map leak...
735
736
737
738
739
  }
  
  static void free_htab_elem(struct bpf_htab *htab, struct htab_elem *l)
  {
  	htab_put_fd_value(htab, l);
bcc6b1b7e   Martin KaFai Lau   bpf: Add hash of ...
740

8c290e60f   Alexei Starovoitov   bpf: fix hashmap ...
741
  	if (htab_is_prealloc(htab)) {
a89fac57b   Alexei Starovoitov   bpf: fix lockdep ...
742
  		__pcpu_freelist_push(&htab->freelist, &l->fnode);
824bd0ce6   Alexei Starovoitov   bpf: introduce BP...
743
  	} else {
6c9059817   Alexei Starovoitov   bpf: pre-allocate...
744
745
746
  		atomic_dec(&htab->count);
  		l->htab = htab;
  		call_rcu(&l->rcu, htab_elem_free_rcu);
824bd0ce6   Alexei Starovoitov   bpf: introduce BP...
747
748
  	}
  }
fd91de7b3   Martin KaFai Lau   bpf: Refactor cod...
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
  static void pcpu_copy_value(struct bpf_htab *htab, void __percpu *pptr,
  			    void *value, bool onallcpus)
  {
  	if (!onallcpus) {
  		/* copy true value_size bytes */
  		memcpy(this_cpu_ptr(pptr), value, htab->map.value_size);
  	} else {
  		u32 size = round_up(htab->map.value_size, 8);
  		int off = 0, cpu;
  
  		for_each_possible_cpu(cpu) {
  			bpf_long_memcpy(per_cpu_ptr(pptr, cpu),
  					value + off, size);
  			off += size;
  		}
  	}
  }
d3bec0138   David Verbeiren   bpf: Zero-fill re...
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
  static void pcpu_init_value(struct bpf_htab *htab, void __percpu *pptr,
  			    void *value, bool onallcpus)
  {
  	/* When using prealloc and not setting the initial value on all cpus,
  	 * zero-fill element values for other cpus (just as what happens when
  	 * not using prealloc). Otherwise, bpf program has no way to ensure
  	 * known initial values for cpus other than current one
  	 * (onallcpus=false always when coming from bpf prog).
  	 */
  	if (htab_is_prealloc(htab) && !onallcpus) {
  		u32 size = round_up(htab->map.value_size, 8);
  		int current_cpu = raw_smp_processor_id();
  		int cpu;
  
  		for_each_possible_cpu(cpu) {
  			if (cpu == current_cpu)
  				bpf_long_memcpy(per_cpu_ptr(pptr, cpu), value,
  						size);
  			else
  				memset(per_cpu_ptr(pptr, cpu), 0, size);
  		}
  	} else {
  		pcpu_copy_value(htab, pptr, value, onallcpus);
  	}
  }
cd36c3a21   Daniel Borkmann   bpf: fix map valu...
791
792
793
794
795
  static bool fd_htab_map_needs_adjust(const struct bpf_htab *htab)
  {
  	return htab->map.map_type == BPF_MAP_TYPE_HASH_OF_MAPS &&
  	       BITS_PER_LONG == 64;
  }
824bd0ce6   Alexei Starovoitov   bpf: introduce BP...
796
797
  static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key,
  					 void *value, u32 key_size, u32 hash,
a6ed3ea65   Alexei Starovoitov   bpf: restore beha...
798
  					 bool percpu, bool onallcpus,
8c290e60f   Alexei Starovoitov   bpf: fix hashmap ...
799
  					 struct htab_elem *old_elem)
824bd0ce6   Alexei Starovoitov   bpf: introduce BP...
800
  {
d83525ca6   Alexei Starovoitov   bpf: introduce bp...
801
  	u32 size = htab->map.value_size;
8c290e60f   Alexei Starovoitov   bpf: fix hashmap ...
802
803
  	bool prealloc = htab_is_prealloc(htab);
  	struct htab_elem *l_new, **pl_new;
824bd0ce6   Alexei Starovoitov   bpf: introduce BP...
804
  	void __percpu *pptr;
6c9059817   Alexei Starovoitov   bpf: pre-allocate...
805
  	if (prealloc) {
8c290e60f   Alexei Starovoitov   bpf: fix hashmap ...
806
807
808
809
810
811
  		if (old_elem) {
  			/* if we're updating the existing element,
  			 * use per-cpu extra elems to avoid freelist_pop/push
  			 */
  			pl_new = this_cpu_ptr(htab->extra_elems);
  			l_new = *pl_new;
1d4e1eab4   Andrii Nakryiko   bpf: Fix map leak...
812
  			htab_put_fd_value(htab, old_elem);
8c290e60f   Alexei Starovoitov   bpf: fix hashmap ...
813
814
815
  			*pl_new = old_elem;
  		} else {
  			struct pcpu_freelist_node *l;
9f691549f   Alexei Starovoitov   bpf: fix struct h...
816

a89fac57b   Alexei Starovoitov   bpf: fix lockdep ...
817
  			l = __pcpu_freelist_pop(&htab->freelist);
8c290e60f   Alexei Starovoitov   bpf: fix hashmap ...
818
819
  			if (!l)
  				return ERR_PTR(-E2BIG);
9f691549f   Alexei Starovoitov   bpf: fix struct h...
820
  			l_new = container_of(l, struct htab_elem, fnode);
6c9059817   Alexei Starovoitov   bpf: pre-allocate...
821
  		}
a6ed3ea65   Alexei Starovoitov   bpf: restore beha...
822
  	} else {
8c290e60f   Alexei Starovoitov   bpf: fix hashmap ...
823
824
825
826
827
828
829
  		if (atomic_inc_return(&htab->count) > htab->map.max_entries)
  			if (!old_elem) {
  				/* when map is full and update() is replacing
  				 * old element, it's ok to allocate, since
  				 * old element will be freed immediately.
  				 * Otherwise return an error
  				 */
ed2b82c03   Mauricio Vasquez B   bpf: hash map: de...
830
831
  				l_new = ERR_PTR(-E2BIG);
  				goto dec_count;
8c290e60f   Alexei Starovoitov   bpf: fix hashmap ...
832
  			}
96eabe7a4   Martin KaFai Lau   bpf: Allow select...
833
834
  		l_new = kmalloc_node(htab->elem_size, GFP_ATOMIC | __GFP_NOWARN,
  				     htab->map.numa_node);
ed2b82c03   Mauricio Vasquez B   bpf: hash map: de...
835
836
837
838
  		if (!l_new) {
  			l_new = ERR_PTR(-ENOMEM);
  			goto dec_count;
  		}
d83525ca6   Alexei Starovoitov   bpf: introduce bp...
839
840
  		check_and_init_map_lock(&htab->map,
  					l_new->key + round_up(key_size, 8));
6c9059817   Alexei Starovoitov   bpf: pre-allocate...
841
  	}
824bd0ce6   Alexei Starovoitov   bpf: introduce BP...
842
843
844
  
  	memcpy(l_new->key, key, key_size);
  	if (percpu) {
d83525ca6   Alexei Starovoitov   bpf: introduce bp...
845
  		size = round_up(size, 8);
6c9059817   Alexei Starovoitov   bpf: pre-allocate...
846
847
848
849
850
851
852
853
  		if (prealloc) {
  			pptr = htab_elem_get_ptr(l_new, key_size);
  		} else {
  			/* alloc_percpu zero-fills */
  			pptr = __alloc_percpu_gfp(size, 8,
  						  GFP_ATOMIC | __GFP_NOWARN);
  			if (!pptr) {
  				kfree(l_new);
ed2b82c03   Mauricio Vasquez B   bpf: hash map: de...
854
855
  				l_new = ERR_PTR(-ENOMEM);
  				goto dec_count;
6c9059817   Alexei Starovoitov   bpf: pre-allocate...
856
  			}
824bd0ce6   Alexei Starovoitov   bpf: introduce BP...
857
  		}
d3bec0138   David Verbeiren   bpf: Zero-fill re...
858
  		pcpu_init_value(htab, pptr, value, onallcpus);
15a07b338   Alexei Starovoitov   bpf: add lookup/u...
859

6c9059817   Alexei Starovoitov   bpf: pre-allocate...
860
861
  		if (!prealloc)
  			htab_elem_set_ptr(l_new, key_size, pptr);
d83525ca6   Alexei Starovoitov   bpf: introduce bp...
862
863
  	} else if (fd_htab_map_needs_adjust(htab)) {
  		size = round_up(size, 8);
824bd0ce6   Alexei Starovoitov   bpf: introduce BP...
864
  		memcpy(l_new->key + round_up(key_size, 8), value, size);
d83525ca6   Alexei Starovoitov   bpf: introduce bp...
865
866
867
868
  	} else {
  		copy_map_value(&htab->map,
  			       l_new->key + round_up(key_size, 8),
  			       value);
824bd0ce6   Alexei Starovoitov   bpf: introduce BP...
869
870
871
872
  	}
  
  	l_new->hash = hash;
  	return l_new;
ed2b82c03   Mauricio Vasquez B   bpf: hash map: de...
873
874
875
  dec_count:
  	atomic_dec(&htab->count);
  	return l_new;
824bd0ce6   Alexei Starovoitov   bpf: introduce BP...
876
877
878
879
880
  }
  
  static int check_flags(struct bpf_htab *htab, struct htab_elem *l_old,
  		       u64 map_flags)
  {
96049f3af   Alexei Starovoitov   bpf: introduce BP...
881
  	if (l_old && (map_flags & ~BPF_F_LOCK) == BPF_NOEXIST)
824bd0ce6   Alexei Starovoitov   bpf: introduce BP...
882
883
  		/* elem already exists */
  		return -EEXIST;
96049f3af   Alexei Starovoitov   bpf: introduce BP...
884
  	if (!l_old && (map_flags & ~BPF_F_LOCK) == BPF_EXIST)
824bd0ce6   Alexei Starovoitov   bpf: introduce BP...
885
886
887
888
889
  		/* elem doesn't exist, cannot update it */
  		return -ENOENT;
  
  	return 0;
  }
0f8e4bd8a   Alexei Starovoitov   bpf: add hashtabl...
890
891
892
893
894
  /* Called from syscall or from eBPF program */
  static int htab_map_update_elem(struct bpf_map *map, void *key, void *value,
  				u64 map_flags)
  {
  	struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
824bd0ce6   Alexei Starovoitov   bpf: introduce BP...
895
  	struct htab_elem *l_new = NULL, *l_old;
4fe843590   Alexei Starovoitov   bpf: convert htab...
896
  	struct hlist_nulls_head *head;
0f8e4bd8a   Alexei Starovoitov   bpf: add hashtabl...
897
  	unsigned long flags;
824bd0ce6   Alexei Starovoitov   bpf: introduce BP...
898
899
  	struct bucket *b;
  	u32 key_size, hash;
0f8e4bd8a   Alexei Starovoitov   bpf: add hashtabl...
900
  	int ret;
96049f3af   Alexei Starovoitov   bpf: introduce BP...
901
  	if (unlikely((map_flags & ~BPF_F_LOCK) > BPF_EXIST))
0f8e4bd8a   Alexei Starovoitov   bpf: add hashtabl...
902
903
  		/* unknown flags */
  		return -EINVAL;
1e6c62a88   Alexei Starovoitov   bpf: Introduce sl...
904
  	WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held());
0f8e4bd8a   Alexei Starovoitov   bpf: add hashtabl...
905

0f8e4bd8a   Alexei Starovoitov   bpf: add hashtabl...
906
  	key_size = map->key_size;
c02034757   Daniel Borkmann   bpf: use per htab...
907
  	hash = htab_map_hash(key, key_size, htab->hashrnd);
824bd0ce6   Alexei Starovoitov   bpf: introduce BP...
908

824bd0ce6   Alexei Starovoitov   bpf: introduce BP...
909
  	b = __select_bucket(htab, hash);
688ecfe60   tom.leiming@gmail.com   bpf: hash: use pe...
910
  	head = &b->head;
0f8e4bd8a   Alexei Starovoitov   bpf: add hashtabl...
911

96049f3af   Alexei Starovoitov   bpf: introduce BP...
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
  	if (unlikely(map_flags & BPF_F_LOCK)) {
  		if (unlikely(!map_value_has_spin_lock(map)))
  			return -EINVAL;
  		/* find an element without taking the bucket lock */
  		l_old = lookup_nulls_elem_raw(head, hash, key, key_size,
  					      htab->n_buckets);
  		ret = check_flags(htab, l_old, map_flags);
  		if (ret)
  			return ret;
  		if (l_old) {
  			/* grab the element lock and update value in place */
  			copy_map_value_locked(map,
  					      l_old->key + round_up(key_size, 8),
  					      value, false);
  			return 0;
  		}
  		/* fall through, grab the bucket lock and lookup again.
  		 * 99.9% chance that the element won't be found,
  		 * but second lookup under lock has to be done.
  		 */
  	}
d01f9b198   Thomas Gleixner   bpf: Factor out h...
933
  	flags = htab_lock_bucket(htab, b);
0f8e4bd8a   Alexei Starovoitov   bpf: add hashtabl...
934

824bd0ce6   Alexei Starovoitov   bpf: introduce BP...
935
  	l_old = lookup_elem_raw(head, hash, key, key_size);
0f8e4bd8a   Alexei Starovoitov   bpf: add hashtabl...
936

824bd0ce6   Alexei Starovoitov   bpf: introduce BP...
937
938
  	ret = check_flags(htab, l_old, map_flags);
  	if (ret)
0f8e4bd8a   Alexei Starovoitov   bpf: add hashtabl...
939
  		goto err;
0f8e4bd8a   Alexei Starovoitov   bpf: add hashtabl...
940

96049f3af   Alexei Starovoitov   bpf: introduce BP...
941
942
943
944
945
946
947
948
949
950
951
952
953
  	if (unlikely(l_old && (map_flags & BPF_F_LOCK))) {
  		/* first lookup without the bucket lock didn't find the element,
  		 * but second lookup with the bucket lock found it.
  		 * This case is highly unlikely, but has to be dealt with:
  		 * grab the element lock in addition to the bucket lock
  		 * and update element in place
  		 */
  		copy_map_value_locked(map,
  				      l_old->key + round_up(key_size, 8),
  				      value, false);
  		ret = 0;
  		goto err;
  	}
a6ed3ea65   Alexei Starovoitov   bpf: restore beha...
954
  	l_new = alloc_htab_elem(htab, key, value, key_size, hash, false, false,
8c290e60f   Alexei Starovoitov   bpf: fix hashmap ...
955
  				l_old);
6c9059817   Alexei Starovoitov   bpf: pre-allocate...
956
957
958
959
960
  	if (IS_ERR(l_new)) {
  		/* all pre-allocated elements are in use or memory exhausted */
  		ret = PTR_ERR(l_new);
  		goto err;
  	}
824bd0ce6   Alexei Starovoitov   bpf: introduce BP...
961
962
  	/* add new element to the head of the list, so that
  	 * concurrent search will find it before old elem
0f8e4bd8a   Alexei Starovoitov   bpf: add hashtabl...
963
  	 */
4fe843590   Alexei Starovoitov   bpf: convert htab...
964
  	hlist_nulls_add_head_rcu(&l_new->hash_node, head);
0f8e4bd8a   Alexei Starovoitov   bpf: add hashtabl...
965
  	if (l_old) {
4fe843590   Alexei Starovoitov   bpf: convert htab...
966
  		hlist_nulls_del_rcu(&l_old->hash_node);
8c290e60f   Alexei Starovoitov   bpf: fix hashmap ...
967
968
  		if (!htab_is_prealloc(htab))
  			free_htab_elem(htab, l_old);
0f8e4bd8a   Alexei Starovoitov   bpf: add hashtabl...
969
  	}
6c9059817   Alexei Starovoitov   bpf: pre-allocate...
970
  	ret = 0;
0f8e4bd8a   Alexei Starovoitov   bpf: add hashtabl...
971
  err:
d01f9b198   Thomas Gleixner   bpf: Factor out h...
972
  	htab_unlock_bucket(htab, b, flags);
0f8e4bd8a   Alexei Starovoitov   bpf: add hashtabl...
973
974
  	return ret;
  }
29ba732ac   Martin KaFai Lau   bpf: Add BPF_MAP_...
975
976
977
978
979
  static int htab_lru_map_update_elem(struct bpf_map *map, void *key, void *value,
  				    u64 map_flags)
  {
  	struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
  	struct htab_elem *l_new, *l_old = NULL;
4fe843590   Alexei Starovoitov   bpf: convert htab...
980
  	struct hlist_nulls_head *head;
29ba732ac   Martin KaFai Lau   bpf: Add BPF_MAP_...
981
982
983
984
985
986
987
988
  	unsigned long flags;
  	struct bucket *b;
  	u32 key_size, hash;
  	int ret;
  
  	if (unlikely(map_flags > BPF_EXIST))
  		/* unknown flags */
  		return -EINVAL;
1e6c62a88   Alexei Starovoitov   bpf: Introduce sl...
989
  	WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held());
29ba732ac   Martin KaFai Lau   bpf: Add BPF_MAP_...
990
991
  
  	key_size = map->key_size;
c02034757   Daniel Borkmann   bpf: use per htab...
992
  	hash = htab_map_hash(key, key_size, htab->hashrnd);
29ba732ac   Martin KaFai Lau   bpf: Add BPF_MAP_...
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
  
  	b = __select_bucket(htab, hash);
  	head = &b->head;
  
  	/* For LRU, we need to alloc before taking bucket's
  	 * spinlock because getting free nodes from LRU may need
  	 * to remove older elements from htab and this removal
  	 * operation will need a bucket lock.
  	 */
  	l_new = prealloc_lru_pop(htab, key, hash);
  	if (!l_new)
  		return -ENOMEM;
  	memcpy(l_new->key + round_up(map->key_size, 8), value, map->value_size);
d01f9b198   Thomas Gleixner   bpf: Factor out h...
1006
  	flags = htab_lock_bucket(htab, b);
29ba732ac   Martin KaFai Lau   bpf: Add BPF_MAP_...
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
  
  	l_old = lookup_elem_raw(head, hash, key, key_size);
  
  	ret = check_flags(htab, l_old, map_flags);
  	if (ret)
  		goto err;
  
  	/* add new element to the head of the list, so that
  	 * concurrent search will find it before old elem
  	 */
4fe843590   Alexei Starovoitov   bpf: convert htab...
1017
  	hlist_nulls_add_head_rcu(&l_new->hash_node, head);
29ba732ac   Martin KaFai Lau   bpf: Add BPF_MAP_...
1018
1019
  	if (l_old) {
  		bpf_lru_node_set_ref(&l_new->lru_node);
4fe843590   Alexei Starovoitov   bpf: convert htab...
1020
  		hlist_nulls_del_rcu(&l_old->hash_node);
29ba732ac   Martin KaFai Lau   bpf: Add BPF_MAP_...
1021
1022
1023
1024
  	}
  	ret = 0;
  
  err:
d01f9b198   Thomas Gleixner   bpf: Factor out h...
1025
  	htab_unlock_bucket(htab, b, flags);
29ba732ac   Martin KaFai Lau   bpf: Add BPF_MAP_...
1026
1027
1028
1029
1030
1031
1032
1033
  
  	if (ret)
  		bpf_lru_push_free(&htab->lru, &l_new->lru_node);
  	else if (l_old)
  		bpf_lru_push_free(&htab->lru, &l_old->lru_node);
  
  	return ret;
  }
15a07b338   Alexei Starovoitov   bpf: add lookup/u...
1034
1035
1036
  static int __htab_percpu_map_update_elem(struct bpf_map *map, void *key,
  					 void *value, u64 map_flags,
  					 bool onallcpus)
824bd0ce6   Alexei Starovoitov   bpf: introduce BP...
1037
1038
1039
  {
  	struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
  	struct htab_elem *l_new = NULL, *l_old;
4fe843590   Alexei Starovoitov   bpf: convert htab...
1040
  	struct hlist_nulls_head *head;
824bd0ce6   Alexei Starovoitov   bpf: introduce BP...
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
  	unsigned long flags;
  	struct bucket *b;
  	u32 key_size, hash;
  	int ret;
  
  	if (unlikely(map_flags > BPF_EXIST))
  		/* unknown flags */
  		return -EINVAL;
  
  	WARN_ON_ONCE(!rcu_read_lock_held());
  
  	key_size = map->key_size;
c02034757   Daniel Borkmann   bpf: use per htab...
1053
  	hash = htab_map_hash(key, key_size, htab->hashrnd);
824bd0ce6   Alexei Starovoitov   bpf: introduce BP...
1054
1055
1056
  
  	b = __select_bucket(htab, hash);
  	head = &b->head;
d01f9b198   Thomas Gleixner   bpf: Factor out h...
1057
  	flags = htab_lock_bucket(htab, b);
824bd0ce6   Alexei Starovoitov   bpf: introduce BP...
1058
1059
1060
1061
1062
1063
1064
1065
1066
  
  	l_old = lookup_elem_raw(head, hash, key, key_size);
  
  	ret = check_flags(htab, l_old, map_flags);
  	if (ret)
  		goto err;
  
  	if (l_old) {
  		/* per-cpu hash map can update value in-place */
fd91de7b3   Martin KaFai Lau   bpf: Refactor cod...
1067
1068
  		pcpu_copy_value(htab, htab_elem_get_ptr(l_old, key_size),
  				value, onallcpus);
824bd0ce6   Alexei Starovoitov   bpf: introduce BP...
1069
1070
  	} else {
  		l_new = alloc_htab_elem(htab, key, value, key_size,
8c290e60f   Alexei Starovoitov   bpf: fix hashmap ...
1071
  					hash, true, onallcpus, NULL);
6c9059817   Alexei Starovoitov   bpf: pre-allocate...
1072
1073
  		if (IS_ERR(l_new)) {
  			ret = PTR_ERR(l_new);
824bd0ce6   Alexei Starovoitov   bpf: introduce BP...
1074
1075
  			goto err;
  		}
4fe843590   Alexei Starovoitov   bpf: convert htab...
1076
  		hlist_nulls_add_head_rcu(&l_new->hash_node, head);
824bd0ce6   Alexei Starovoitov   bpf: introduce BP...
1077
1078
1079
  	}
  	ret = 0;
  err:
d01f9b198   Thomas Gleixner   bpf: Factor out h...
1080
  	htab_unlock_bucket(htab, b, flags);
824bd0ce6   Alexei Starovoitov   bpf: introduce BP...
1081
1082
  	return ret;
  }
8f8449384   Martin KaFai Lau   bpf: Add BPF_MAP_...
1083
1084
1085
1086
1087
1088
  static int __htab_lru_percpu_map_update_elem(struct bpf_map *map, void *key,
  					     void *value, u64 map_flags,
  					     bool onallcpus)
  {
  	struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
  	struct htab_elem *l_new = NULL, *l_old;
4fe843590   Alexei Starovoitov   bpf: convert htab...
1089
  	struct hlist_nulls_head *head;
8f8449384   Martin KaFai Lau   bpf: Add BPF_MAP_...
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
  	unsigned long flags;
  	struct bucket *b;
  	u32 key_size, hash;
  	int ret;
  
  	if (unlikely(map_flags > BPF_EXIST))
  		/* unknown flags */
  		return -EINVAL;
  
  	WARN_ON_ONCE(!rcu_read_lock_held());
  
  	key_size = map->key_size;
c02034757   Daniel Borkmann   bpf: use per htab...
1102
  	hash = htab_map_hash(key, key_size, htab->hashrnd);
8f8449384   Martin KaFai Lau   bpf: Add BPF_MAP_...
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
  
  	b = __select_bucket(htab, hash);
  	head = &b->head;
  
  	/* For LRU, we need to alloc before taking bucket's
  	 * spinlock because LRU's elem alloc may need
  	 * to remove older elem from htab and this removal
  	 * operation will need a bucket lock.
  	 */
  	if (map_flags != BPF_EXIST) {
  		l_new = prealloc_lru_pop(htab, key, hash);
  		if (!l_new)
  			return -ENOMEM;
  	}
d01f9b198   Thomas Gleixner   bpf: Factor out h...
1117
  	flags = htab_lock_bucket(htab, b);
8f8449384   Martin KaFai Lau   bpf: Add BPF_MAP_...
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
  
  	l_old = lookup_elem_raw(head, hash, key, key_size);
  
  	ret = check_flags(htab, l_old, map_flags);
  	if (ret)
  		goto err;
  
  	if (l_old) {
  		bpf_lru_node_set_ref(&l_old->lru_node);
  
  		/* per-cpu hash map can update value in-place */
  		pcpu_copy_value(htab, htab_elem_get_ptr(l_old, key_size),
  				value, onallcpus);
  	} else {
d3bec0138   David Verbeiren   bpf: Zero-fill re...
1132
  		pcpu_init_value(htab, htab_elem_get_ptr(l_new, key_size),
8f8449384   Martin KaFai Lau   bpf: Add BPF_MAP_...
1133
  				value, onallcpus);
4fe843590   Alexei Starovoitov   bpf: convert htab...
1134
  		hlist_nulls_add_head_rcu(&l_new->hash_node, head);
8f8449384   Martin KaFai Lau   bpf: Add BPF_MAP_...
1135
1136
1137
1138
  		l_new = NULL;
  	}
  	ret = 0;
  err:
d01f9b198   Thomas Gleixner   bpf: Factor out h...
1139
  	htab_unlock_bucket(htab, b, flags);
8f8449384   Martin KaFai Lau   bpf: Add BPF_MAP_...
1140
1141
1142
1143
  	if (l_new)
  		bpf_lru_push_free(&htab->lru, &l_new->lru_node);
  	return ret;
  }
15a07b338   Alexei Starovoitov   bpf: add lookup/u...
1144
1145
1146
1147
1148
  static int htab_percpu_map_update_elem(struct bpf_map *map, void *key,
  				       void *value, u64 map_flags)
  {
  	return __htab_percpu_map_update_elem(map, key, value, map_flags, false);
  }
8f8449384   Martin KaFai Lau   bpf: Add BPF_MAP_...
1149
1150
1151
1152
1153
1154
  static int htab_lru_percpu_map_update_elem(struct bpf_map *map, void *key,
  					   void *value, u64 map_flags)
  {
  	return __htab_lru_percpu_map_update_elem(map, key, value, map_flags,
  						 false);
  }
0f8e4bd8a   Alexei Starovoitov   bpf: add hashtabl...
1155
1156
1157
1158
  /* Called from syscall or from eBPF program */
  static int htab_map_delete_elem(struct bpf_map *map, void *key)
  {
  	struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
4fe843590   Alexei Starovoitov   bpf: convert htab...
1159
  	struct hlist_nulls_head *head;
688ecfe60   tom.leiming@gmail.com   bpf: hash: use pe...
1160
  	struct bucket *b;
0f8e4bd8a   Alexei Starovoitov   bpf: add hashtabl...
1161
1162
1163
1164
  	struct htab_elem *l;
  	unsigned long flags;
  	u32 hash, key_size;
  	int ret = -ENOENT;
1e6c62a88   Alexei Starovoitov   bpf: Introduce sl...
1165
  	WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held());
0f8e4bd8a   Alexei Starovoitov   bpf: add hashtabl...
1166
1167
  
  	key_size = map->key_size;
c02034757   Daniel Borkmann   bpf: use per htab...
1168
  	hash = htab_map_hash(key, key_size, htab->hashrnd);
688ecfe60   tom.leiming@gmail.com   bpf: hash: use pe...
1169
1170
  	b = __select_bucket(htab, hash);
  	head = &b->head;
0f8e4bd8a   Alexei Starovoitov   bpf: add hashtabl...
1171

d01f9b198   Thomas Gleixner   bpf: Factor out h...
1172
  	flags = htab_lock_bucket(htab, b);
0f8e4bd8a   Alexei Starovoitov   bpf: add hashtabl...
1173

0f8e4bd8a   Alexei Starovoitov   bpf: add hashtabl...
1174
1175
1176
  	l = lookup_elem_raw(head, hash, key, key_size);
  
  	if (l) {
4fe843590   Alexei Starovoitov   bpf: convert htab...
1177
  		hlist_nulls_del_rcu(&l->hash_node);
6c9059817   Alexei Starovoitov   bpf: pre-allocate...
1178
  		free_htab_elem(htab, l);
0f8e4bd8a   Alexei Starovoitov   bpf: add hashtabl...
1179
1180
  		ret = 0;
  	}
d01f9b198   Thomas Gleixner   bpf: Factor out h...
1181
  	htab_unlock_bucket(htab, b, flags);
0f8e4bd8a   Alexei Starovoitov   bpf: add hashtabl...
1182
1183
  	return ret;
  }
29ba732ac   Martin KaFai Lau   bpf: Add BPF_MAP_...
1184
1185
1186
  static int htab_lru_map_delete_elem(struct bpf_map *map, void *key)
  {
  	struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
4fe843590   Alexei Starovoitov   bpf: convert htab...
1187
  	struct hlist_nulls_head *head;
29ba732ac   Martin KaFai Lau   bpf: Add BPF_MAP_...
1188
1189
1190
1191
1192
  	struct bucket *b;
  	struct htab_elem *l;
  	unsigned long flags;
  	u32 hash, key_size;
  	int ret = -ENOENT;
1e6c62a88   Alexei Starovoitov   bpf: Introduce sl...
1193
  	WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held());
29ba732ac   Martin KaFai Lau   bpf: Add BPF_MAP_...
1194
1195
  
  	key_size = map->key_size;
c02034757   Daniel Borkmann   bpf: use per htab...
1196
  	hash = htab_map_hash(key, key_size, htab->hashrnd);
29ba732ac   Martin KaFai Lau   bpf: Add BPF_MAP_...
1197
1198
  	b = __select_bucket(htab, hash);
  	head = &b->head;
d01f9b198   Thomas Gleixner   bpf: Factor out h...
1199
  	flags = htab_lock_bucket(htab, b);
29ba732ac   Martin KaFai Lau   bpf: Add BPF_MAP_...
1200
1201
1202
1203
  
  	l = lookup_elem_raw(head, hash, key, key_size);
  
  	if (l) {
4fe843590   Alexei Starovoitov   bpf: convert htab...
1204
  		hlist_nulls_del_rcu(&l->hash_node);
29ba732ac   Martin KaFai Lau   bpf: Add BPF_MAP_...
1205
1206
  		ret = 0;
  	}
d01f9b198   Thomas Gleixner   bpf: Factor out h...
1207
  	htab_unlock_bucket(htab, b, flags);
29ba732ac   Martin KaFai Lau   bpf: Add BPF_MAP_...
1208
1209
1210
1211
  	if (l)
  		bpf_lru_push_free(&htab->lru, &l->lru_node);
  	return ret;
  }
0f8e4bd8a   Alexei Starovoitov   bpf: add hashtabl...
1212
1213
1214
1215
1216
  static void delete_all_elements(struct bpf_htab *htab)
  {
  	int i;
  
  	for (i = 0; i < htab->n_buckets; i++) {
4fe843590   Alexei Starovoitov   bpf: convert htab...
1217
1218
  		struct hlist_nulls_head *head = select_bucket(htab, i);
  		struct hlist_nulls_node *n;
0f8e4bd8a   Alexei Starovoitov   bpf: add hashtabl...
1219
  		struct htab_elem *l;
4fe843590   Alexei Starovoitov   bpf: convert htab...
1220
1221
  		hlist_nulls_for_each_entry_safe(l, n, head, hash_node) {
  			hlist_nulls_del_rcu(&l->hash_node);
8c290e60f   Alexei Starovoitov   bpf: fix hashmap ...
1222
  			htab_elem_free(htab, l);
0f8e4bd8a   Alexei Starovoitov   bpf: add hashtabl...
1223
1224
1225
  		}
  	}
  }
bcc6b1b7e   Martin KaFai Lau   bpf: Add hash of ...
1226

0f8e4bd8a   Alexei Starovoitov   bpf: add hashtabl...
1227
1228
1229
1230
  /* Called when map->refcnt goes to zero, either from workqueue or from syscall */
  static void htab_map_free(struct bpf_map *map)
  {
  	struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
bba1dc0b5   Alexei Starovoitov   bpf: Remove redun...
1231
1232
1233
  	/* bpf_free_used_maps() or close(map_fd) will trigger this map_free callback.
  	 * bpf_free_used_maps() is called after bpf prog is no longer executing.
  	 * There is no need to synchronize_rcu() here to protect map elements.
0f8e4bd8a   Alexei Starovoitov   bpf: add hashtabl...
1234
  	 */
0f8e4bd8a   Alexei Starovoitov   bpf: add hashtabl...
1235

6c9059817   Alexei Starovoitov   bpf: pre-allocate...
1236
1237
  	/* some of free_htab_elem() callbacks for elements of this map may
  	 * not have executed. Wait for them.
0f8e4bd8a   Alexei Starovoitov   bpf: add hashtabl...
1238
  	 */
6c9059817   Alexei Starovoitov   bpf: pre-allocate...
1239
  	rcu_barrier();
8c290e60f   Alexei Starovoitov   bpf: fix hashmap ...
1240
  	if (!htab_is_prealloc(htab))
6c9059817   Alexei Starovoitov   bpf: pre-allocate...
1241
  		delete_all_elements(htab);
29ba732ac   Martin KaFai Lau   bpf: Add BPF_MAP_...
1242
1243
  	else
  		prealloc_destroy(htab);
a6ed3ea65   Alexei Starovoitov   bpf: restore beha...
1244
  	free_percpu(htab->extra_elems);
d407bd25a   Daniel Borkmann   bpf: don't trigge...
1245
  	bpf_map_area_free(htab->buckets);
0f8e4bd8a   Alexei Starovoitov   bpf: add hashtabl...
1246
1247
  	kfree(htab);
  }
699c86d6e   Yonghong Song   bpf: btf: add pre...
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
  static void htab_map_seq_show_elem(struct bpf_map *map, void *key,
  				   struct seq_file *m)
  {
  	void *value;
  
  	rcu_read_lock();
  
  	value = htab_map_lookup_elem(map, key);
  	if (!value) {
  		rcu_read_unlock();
  		return;
  	}
  
  	btf_type_seq_show(map->btf, map->btf_key_type_id, key, m);
  	seq_puts(m, ": ");
  	btf_type_seq_show(map->btf, map->btf_value_type_id, value, m);
  	seq_puts(m, "
  ");
  
  	rcu_read_unlock();
  }
057996380   Yonghong Song   bpf: Add batch op...
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
  static int
  __htab_map_lookup_and_delete_batch(struct bpf_map *map,
  				   const union bpf_attr *attr,
  				   union bpf_attr __user *uattr,
  				   bool do_delete, bool is_lru_map,
  				   bool is_percpu)
  {
  	struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
  	u32 bucket_cnt, total, key_size, value_size, roundup_key_size;
  	void *keys = NULL, *values = NULL, *value, *dst_key, *dst_val;
  	void __user *uvalues = u64_to_user_ptr(attr->batch.values);
  	void __user *ukeys = u64_to_user_ptr(attr->batch.keys);
  	void *ubatch = u64_to_user_ptr(attr->batch.in_batch);
  	u32 batch, max_count, size, bucket_size;
b9aff38de   Yonghong Song   bpf: Fix a potent...
1283
  	struct htab_elem *node_to_free = NULL;
057996380   Yonghong Song   bpf: Add batch op...
1284
1285
1286
  	u64 elem_map_flags, map_flags;
  	struct hlist_nulls_head *head;
  	struct hlist_nulls_node *n;
492e0d0d6   Brian Vazquez   bpf: Do not grab ...
1287
1288
  	unsigned long flags = 0;
  	bool locked = false;
057996380   Yonghong Song   bpf: Add batch op...
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
  	struct htab_elem *l;
  	struct bucket *b;
  	int ret = 0;
  
  	elem_map_flags = attr->batch.elem_flags;
  	if ((elem_map_flags & ~BPF_F_LOCK) ||
  	    ((elem_map_flags & BPF_F_LOCK) && !map_value_has_spin_lock(map)))
  		return -EINVAL;
  
  	map_flags = attr->batch.flags;
  	if (map_flags)
  		return -EINVAL;
  
  	max_count = attr->batch.count;
  	if (!max_count)
  		return 0;
  
  	if (put_user(0, &uattr->batch.count))
  		return -EFAULT;
  
  	batch = 0;
  	if (ubatch && copy_from_user(&batch, ubatch, sizeof(batch)))
  		return -EFAULT;
  
  	if (batch >= htab->n_buckets)
  		return -ENOENT;
  
  	key_size = htab->map.key_size;
  	roundup_key_size = round_up(htab->map.key_size, 8);
  	value_size = htab->map.value_size;
  	size = round_up(value_size, 8);
  	if (is_percpu)
  		value_size = size * num_possible_cpus();
  	total = 0;
  	/* while experimenting with hash tables with sizes ranging from 10 to
  	 * 1000, it was observed that a bucket can have upto 5 entries.
  	 */
  	bucket_size = 5;
  
  alloc:
  	/* We cannot do copy_from_user or copy_to_user inside
  	 * the rcu_read_lock. Allocate enough space here.
  	 */
  	keys = kvmalloc(key_size * bucket_size, GFP_USER | __GFP_NOWARN);
  	values = kvmalloc(value_size * bucket_size, GFP_USER | __GFP_NOWARN);
  	if (!keys || !values) {
  		ret = -ENOMEM;
  		goto after_loop;
  	}
  
  again:
085fee1a7   Thomas Gleixner   bpf: Use recursio...
1340
  	bpf_disable_instrumentation();
057996380   Yonghong Song   bpf: Add batch op...
1341
1342
1343
1344
1345
1346
  	rcu_read_lock();
  again_nocopy:
  	dst_key = keys;
  	dst_val = values;
  	b = &htab->buckets[batch];
  	head = &b->head;
492e0d0d6   Brian Vazquez   bpf: Do not grab ...
1347
1348
  	/* do not grab the lock unless need it (bucket_cnt > 0). */
  	if (locked)
d01f9b198   Thomas Gleixner   bpf: Factor out h...
1349
  		flags = htab_lock_bucket(htab, b);
057996380   Yonghong Song   bpf: Add batch op...
1350
1351
1352
1353
  
  	bucket_cnt = 0;
  	hlist_nulls_for_each_entry_rcu(l, n, head, hash_node)
  		bucket_cnt++;
492e0d0d6   Brian Vazquez   bpf: Do not grab ...
1354
1355
1356
1357
  	if (bucket_cnt && !locked) {
  		locked = true;
  		goto again_nocopy;
  	}
057996380   Yonghong Song   bpf: Add batch op...
1358
1359
1360
  	if (bucket_cnt > (max_count - total)) {
  		if (total == 0)
  			ret = -ENOSPC;
492e0d0d6   Brian Vazquez   bpf: Do not grab ...
1361
1362
1363
  		/* Note that since bucket_cnt > 0 here, it is implicit
  		 * that the locked was grabbed, so release it.
  		 */
d01f9b198   Thomas Gleixner   bpf: Factor out h...
1364
  		htab_unlock_bucket(htab, b, flags);
057996380   Yonghong Song   bpf: Add batch op...
1365
  		rcu_read_unlock();
085fee1a7   Thomas Gleixner   bpf: Use recursio...
1366
  		bpf_enable_instrumentation();
057996380   Yonghong Song   bpf: Add batch op...
1367
1368
1369
1370
1371
  		goto after_loop;
  	}
  
  	if (bucket_cnt > bucket_size) {
  		bucket_size = bucket_cnt;
492e0d0d6   Brian Vazquez   bpf: Do not grab ...
1372
1373
1374
  		/* Note that since bucket_cnt > 0 here, it is implicit
  		 * that the locked was grabbed, so release it.
  		 */
d01f9b198   Thomas Gleixner   bpf: Factor out h...
1375
  		htab_unlock_bucket(htab, b, flags);
057996380   Yonghong Song   bpf: Add batch op...
1376
  		rcu_read_unlock();
085fee1a7   Thomas Gleixner   bpf: Use recursio...
1377
  		bpf_enable_instrumentation();
057996380   Yonghong Song   bpf: Add batch op...
1378
1379
1380
1381
  		kvfree(keys);
  		kvfree(values);
  		goto alloc;
  	}
492e0d0d6   Brian Vazquez   bpf: Do not grab ...
1382
1383
1384
  	/* Next block is only safe to run if you have grabbed the lock */
  	if (!locked)
  		goto next_batch;
057996380   Yonghong Song   bpf: Add batch op...
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
  	hlist_nulls_for_each_entry_safe(l, n, head, hash_node) {
  		memcpy(dst_key, l->key, key_size);
  
  		if (is_percpu) {
  			int off = 0, cpu;
  			void __percpu *pptr;
  
  			pptr = htab_elem_get_ptr(l, map->key_size);
  			for_each_possible_cpu(cpu) {
  				bpf_long_memcpy(dst_val + off,
  						per_cpu_ptr(pptr, cpu), size);
  				off += size;
  			}
  		} else {
  			value = l->key + roundup_key_size;
  			if (elem_map_flags & BPF_F_LOCK)
  				copy_map_value_locked(map, dst_val, value,
  						      true);
  			else
  				copy_map_value(map, dst_val, value);
  			check_and_init_map_lock(map, dst_val);
  		}
  		if (do_delete) {
  			hlist_nulls_del_rcu(&l->hash_node);
b9aff38de   Yonghong Song   bpf: Fix a potent...
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
  
  			/* bpf_lru_push_free() will acquire lru_lock, which
  			 * may cause deadlock. See comments in function
  			 * prealloc_lru_pop(). Let us do bpf_lru_push_free()
  			 * after releasing the bucket lock.
  			 */
  			if (is_lru_map) {
  				l->batch_flink = node_to_free;
  				node_to_free = l;
  			} else {
057996380   Yonghong Song   bpf: Add batch op...
1419
  				free_htab_elem(htab, l);
b9aff38de   Yonghong Song   bpf: Fix a potent...
1420
  			}
057996380   Yonghong Song   bpf: Add batch op...
1421
1422
1423
1424
  		}
  		dst_key += key_size;
  		dst_val += value_size;
  	}
d01f9b198   Thomas Gleixner   bpf: Factor out h...
1425
  	htab_unlock_bucket(htab, b, flags);
492e0d0d6   Brian Vazquez   bpf: Do not grab ...
1426
  	locked = false;
b9aff38de   Yonghong Song   bpf: Fix a potent...
1427
1428
1429
1430
1431
1432
  
  	while (node_to_free) {
  		l = node_to_free;
  		node_to_free = node_to_free->batch_flink;
  		bpf_lru_push_free(&htab->lru, &l->lru_node);
  	}
492e0d0d6   Brian Vazquez   bpf: Do not grab ...
1433
  next_batch:
057996380   Yonghong Song   bpf: Add batch op...
1434
1435
1436
1437
1438
1439
1440
1441
1442
  	/* If we are not copying data, we can go to next bucket and avoid
  	 * unlocking the rcu.
  	 */
  	if (!bucket_cnt && (batch + 1 < htab->n_buckets)) {
  		batch++;
  		goto again_nocopy;
  	}
  
  	rcu_read_unlock();
085fee1a7   Thomas Gleixner   bpf: Use recursio...
1443
  	bpf_enable_instrumentation();
057996380   Yonghong Song   bpf: Add batch op...
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
  	if (bucket_cnt && (copy_to_user(ukeys + total * key_size, keys,
  	    key_size * bucket_cnt) ||
  	    copy_to_user(uvalues + total * value_size, values,
  	    value_size * bucket_cnt))) {
  		ret = -EFAULT;
  		goto after_loop;
  	}
  
  	total += bucket_cnt;
  	batch++;
  	if (batch >= htab->n_buckets) {
  		ret = -ENOENT;
  		goto after_loop;
  	}
  	goto again;
  
  after_loop:
  	if (ret == -EFAULT)
  		goto out;
  
  	/* copy # of entries and next batch */
  	ubatch = u64_to_user_ptr(attr->batch.out_batch);
  	if (copy_to_user(ubatch, &batch, sizeof(batch)) ||
  	    put_user(total, &uattr->batch.count))
  		ret = -EFAULT;
  
  out:
  	kvfree(keys);
  	kvfree(values);
  	return ret;
  }
  
  static int
  htab_percpu_map_lookup_batch(struct bpf_map *map, const union bpf_attr *attr,
  			     union bpf_attr __user *uattr)
  {
  	return __htab_map_lookup_and_delete_batch(map, attr, uattr, false,
  						  false, true);
  }
  
  static int
  htab_percpu_map_lookup_and_delete_batch(struct bpf_map *map,
  					const union bpf_attr *attr,
  					union bpf_attr __user *uattr)
  {
  	return __htab_map_lookup_and_delete_batch(map, attr, uattr, true,
  						  false, true);
  }
  
  static int
  htab_map_lookup_batch(struct bpf_map *map, const union bpf_attr *attr,
  		      union bpf_attr __user *uattr)
  {
  	return __htab_map_lookup_and_delete_batch(map, attr, uattr, false,
  						  false, false);
  }
  
  static int
  htab_map_lookup_and_delete_batch(struct bpf_map *map,
  				 const union bpf_attr *attr,
  				 union bpf_attr __user *uattr)
  {
  	return __htab_map_lookup_and_delete_batch(map, attr, uattr, true,
  						  false, false);
  }
  
  static int
  htab_lru_percpu_map_lookup_batch(struct bpf_map *map,
  				 const union bpf_attr *attr,
  				 union bpf_attr __user *uattr)
  {
  	return __htab_map_lookup_and_delete_batch(map, attr, uattr, false,
  						  true, true);
  }
  
  static int
  htab_lru_percpu_map_lookup_and_delete_batch(struct bpf_map *map,
  					    const union bpf_attr *attr,
  					    union bpf_attr __user *uattr)
  {
  	return __htab_map_lookup_and_delete_batch(map, attr, uattr, true,
  						  true, true);
  }
  
  static int
  htab_lru_map_lookup_batch(struct bpf_map *map, const union bpf_attr *attr,
  			  union bpf_attr __user *uattr)
  {
  	return __htab_map_lookup_and_delete_batch(map, attr, uattr, false,
  						  true, false);
  }
  
  static int
  htab_lru_map_lookup_and_delete_batch(struct bpf_map *map,
  				     const union bpf_attr *attr,
  				     union bpf_attr __user *uattr)
  {
  	return __htab_map_lookup_and_delete_batch(map, attr, uattr, true,
  						  true, false);
  }
d6c4503cc   Yonghong Song   bpf: Implement bp...
1544
1545
1546
1547
  struct bpf_iter_seq_hash_map_info {
  	struct bpf_map *map;
  	struct bpf_htab *htab;
  	void *percpu_value_buf; // non-zero means percpu hash
d6c4503cc   Yonghong Song   bpf: Implement bp...
1548
1549
1550
1551
1552
1553
1554
1555
1556
  	u32 bucket_id;
  	u32 skip_elems;
  };
  
  static struct htab_elem *
  bpf_hash_map_seq_find_next(struct bpf_iter_seq_hash_map_info *info,
  			   struct htab_elem *prev_elem)
  {
  	const struct bpf_htab *htab = info->htab;
d6c4503cc   Yonghong Song   bpf: Implement bp...
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
  	u32 skip_elems = info->skip_elems;
  	u32 bucket_id = info->bucket_id;
  	struct hlist_nulls_head *head;
  	struct hlist_nulls_node *n;
  	struct htab_elem *elem;
  	struct bucket *b;
  	u32 i, count;
  
  	if (bucket_id >= htab->n_buckets)
  		return NULL;
  
  	/* try to find next elem in the same bucket */
  	if (prev_elem) {
  		/* no update/deletion on this bucket, prev_elem should be still valid
  		 * and we won't skip elements.
  		 */
  		n = rcu_dereference_raw(hlist_nulls_next_rcu(&prev_elem->hash_node));
  		elem = hlist_nulls_entry_safe(n, struct htab_elem, hash_node);
  		if (elem)
  			return elem;
  
  		/* not found, unlock and go to the next bucket */
  		b = &htab->buckets[bucket_id++];
dc0988bbe   Yonghong Song   bpf: Do not use b...
1580
  		rcu_read_unlock();
d6c4503cc   Yonghong Song   bpf: Implement bp...
1581
1582
1583
1584
1585
  		skip_elems = 0;
  	}
  
  	for (i = bucket_id; i < htab->n_buckets; i++) {
  		b = &htab->buckets[i];
dc0988bbe   Yonghong Song   bpf: Do not use b...
1586
  		rcu_read_lock();
d6c4503cc   Yonghong Song   bpf: Implement bp...
1587
1588
1589
1590
1591
  
  		count = 0;
  		head = &b->head;
  		hlist_nulls_for_each_entry_rcu(elem, n, head, hash_node) {
  			if (count >= skip_elems) {
d6c4503cc   Yonghong Song   bpf: Implement bp...
1592
1593
1594
1595
1596
1597
  				info->bucket_id = i;
  				info->skip_elems = count;
  				return elem;
  			}
  			count++;
  		}
dc0988bbe   Yonghong Song   bpf: Do not use b...
1598
  		rcu_read_unlock();
d6c4503cc   Yonghong Song   bpf: Implement bp...
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
  		skip_elems = 0;
  	}
  
  	info->bucket_id = i;
  	info->skip_elems = 0;
  	return NULL;
  }
  
  static void *bpf_hash_map_seq_start(struct seq_file *seq, loff_t *pos)
  {
  	struct bpf_iter_seq_hash_map_info *info = seq->private;
  	struct htab_elem *elem;
  
  	elem = bpf_hash_map_seq_find_next(info, NULL);
  	if (!elem)
  		return NULL;
  
  	if (*pos == 0)
  		++*pos;
  	return elem;
  }
  
  static void *bpf_hash_map_seq_next(struct seq_file *seq, void *v, loff_t *pos)
  {
  	struct bpf_iter_seq_hash_map_info *info = seq->private;
  
  	++*pos;
  	++info->skip_elems;
  	return bpf_hash_map_seq_find_next(info, v);
  }
  
  static int __bpf_hash_map_seq_show(struct seq_file *seq, struct htab_elem *elem)
  {
  	struct bpf_iter_seq_hash_map_info *info = seq->private;
  	u32 roundup_key_size, roundup_value_size;
  	struct bpf_iter__bpf_map_elem ctx = {};
  	struct bpf_map *map = info->map;
  	struct bpf_iter_meta meta;
  	int ret = 0, off = 0, cpu;
  	struct bpf_prog *prog;
  	void __percpu *pptr;
  
  	meta.seq = seq;
  	prog = bpf_iter_get_info(&meta, elem == NULL);
  	if (prog) {
  		ctx.meta = &meta;
  		ctx.map = info->map;
  		if (elem) {
  			roundup_key_size = round_up(map->key_size, 8);
  			ctx.key = elem->key;
  			if (!info->percpu_value_buf) {
  				ctx.value = elem->key + roundup_key_size;
  			} else {
  				roundup_value_size = round_up(map->value_size, 8);
  				pptr = htab_elem_get_ptr(elem, map->key_size);
  				for_each_possible_cpu(cpu) {
  					bpf_long_memcpy(info->percpu_value_buf + off,
  							per_cpu_ptr(pptr, cpu),
  							roundup_value_size);
  					off += roundup_value_size;
  				}
  				ctx.value = info->percpu_value_buf;
  			}
  		}
  		ret = bpf_iter_run_prog(prog, &ctx);
  	}
  
  	return ret;
  }
  
  static int bpf_hash_map_seq_show(struct seq_file *seq, void *v)
  {
  	return __bpf_hash_map_seq_show(seq, v);
  }
  
  static void bpf_hash_map_seq_stop(struct seq_file *seq, void *v)
  {
d6c4503cc   Yonghong Song   bpf: Implement bp...
1676
1677
1678
  	if (!v)
  		(void)__bpf_hash_map_seq_show(seq, NULL);
  	else
dc0988bbe   Yonghong Song   bpf: Do not use b...
1679
  		rcu_read_unlock();
d6c4503cc   Yonghong Song   bpf: Implement bp...
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
  }
  
  static int bpf_iter_init_hash_map(void *priv_data,
  				  struct bpf_iter_aux_info *aux)
  {
  	struct bpf_iter_seq_hash_map_info *seq_info = priv_data;
  	struct bpf_map *map = aux->map;
  	void *value_buf;
  	u32 buf_size;
  
  	if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
  	    map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) {
  		buf_size = round_up(map->value_size, 8) * num_possible_cpus();
  		value_buf = kmalloc(buf_size, GFP_USER | __GFP_NOWARN);
  		if (!value_buf)
  			return -ENOMEM;
  
  		seq_info->percpu_value_buf = value_buf;
  	}
  
  	seq_info->map = map;
  	seq_info->htab = container_of(map, struct bpf_htab, map);
  	return 0;
  }
  
  static void bpf_iter_fini_hash_map(void *priv_data)
  {
  	struct bpf_iter_seq_hash_map_info *seq_info = priv_data;
  
  	kfree(seq_info->percpu_value_buf);
  }
  
  static const struct seq_operations bpf_hash_map_seq_ops = {
  	.start	= bpf_hash_map_seq_start,
  	.next	= bpf_hash_map_seq_next,
  	.stop	= bpf_hash_map_seq_stop,
  	.show	= bpf_hash_map_seq_show,
  };
  
  static const struct bpf_iter_seq_info iter_seq_info = {
  	.seq_ops		= &bpf_hash_map_seq_ops,
  	.init_seq_private	= bpf_iter_init_hash_map,
  	.fini_seq_private	= bpf_iter_fini_hash_map,
  	.seq_priv_size		= sizeof(struct bpf_iter_seq_hash_map_info),
  };
41c48f3a9   Andrey Ignatov   bpf: Support acce...
1725
  static int htab_map_btf_id;
40077e0cf   Johannes Berg   bpf: remove struc...
1726
  const struct bpf_map_ops htab_map_ops = {
f4d052592   Martin KaFai Lau   bpf: Add map_meta...
1727
  	.map_meta_equal = bpf_map_meta_equal,
9328e0d1b   Jakub Kicinski   bpf: hashtab: mov...
1728
  	.map_alloc_check = htab_map_alloc_check,
0f8e4bd8a   Alexei Starovoitov   bpf: add hashtabl...
1729
1730
1731
1732
1733
1734
  	.map_alloc = htab_map_alloc,
  	.map_free = htab_map_free,
  	.map_get_next_key = htab_map_get_next_key,
  	.map_lookup_elem = htab_map_lookup_elem,
  	.map_update_elem = htab_map_update_elem,
  	.map_delete_elem = htab_map_delete_elem,
9015d2f59   Alexei Starovoitov   bpf: inline htab_...
1735
  	.map_gen_lookup = htab_map_gen_lookup,
699c86d6e   Yonghong Song   bpf: btf: add pre...
1736
  	.map_seq_show_elem = htab_map_seq_show_elem,
057996380   Yonghong Song   bpf: Add batch op...
1737
  	BATCH_OPS(htab),
41c48f3a9   Andrey Ignatov   bpf: Support acce...
1738
1739
  	.map_btf_name = "bpf_htab",
  	.map_btf_id = &htab_map_btf_id,
d6c4503cc   Yonghong Song   bpf: Implement bp...
1740
  	.iter_seq_info = &iter_seq_info,
0f8e4bd8a   Alexei Starovoitov   bpf: add hashtabl...
1741
  };
2872e9ac3   Andrey Ignatov   bpf: Set map_btf_...
1742
  static int htab_lru_map_btf_id;
40077e0cf   Johannes Berg   bpf: remove struc...
1743
  const struct bpf_map_ops htab_lru_map_ops = {
f4d052592   Martin KaFai Lau   bpf: Add map_meta...
1744
  	.map_meta_equal = bpf_map_meta_equal,
9328e0d1b   Jakub Kicinski   bpf: hashtab: mov...
1745
  	.map_alloc_check = htab_map_alloc_check,
29ba732ac   Martin KaFai Lau   bpf: Add BPF_MAP_...
1746
1747
1748
1749
  	.map_alloc = htab_map_alloc,
  	.map_free = htab_map_free,
  	.map_get_next_key = htab_map_get_next_key,
  	.map_lookup_elem = htab_lru_map_lookup_elem,
50b045a8c   Daniel Borkmann   bpf, lru: avoid m...
1750
  	.map_lookup_elem_sys_only = htab_lru_map_lookup_elem_sys,
29ba732ac   Martin KaFai Lau   bpf: Add BPF_MAP_...
1751
1752
  	.map_update_elem = htab_lru_map_update_elem,
  	.map_delete_elem = htab_lru_map_delete_elem,
cc555421b   Martin KaFai Lau   bpf: Inline LRU m...
1753
  	.map_gen_lookup = htab_lru_map_gen_lookup,
699c86d6e   Yonghong Song   bpf: btf: add pre...
1754
  	.map_seq_show_elem = htab_map_seq_show_elem,
057996380   Yonghong Song   bpf: Add batch op...
1755
  	BATCH_OPS(htab_lru),
2872e9ac3   Andrey Ignatov   bpf: Set map_btf_...
1756
1757
  	.map_btf_name = "bpf_htab",
  	.map_btf_id = &htab_lru_map_btf_id,
d6c4503cc   Yonghong Song   bpf: Implement bp...
1758
  	.iter_seq_info = &iter_seq_info,
29ba732ac   Martin KaFai Lau   bpf: Add BPF_MAP_...
1759
  };
824bd0ce6   Alexei Starovoitov   bpf: introduce BP...
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
  /* Called from eBPF program */
  static void *htab_percpu_map_lookup_elem(struct bpf_map *map, void *key)
  {
  	struct htab_elem *l = __htab_map_lookup_elem(map, key);
  
  	if (l)
  		return this_cpu_ptr(htab_elem_get_ptr(l, map->key_size));
  	else
  		return NULL;
  }
8f8449384   Martin KaFai Lau   bpf: Add BPF_MAP_...
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
  static void *htab_lru_percpu_map_lookup_elem(struct bpf_map *map, void *key)
  {
  	struct htab_elem *l = __htab_map_lookup_elem(map, key);
  
  	if (l) {
  		bpf_lru_node_set_ref(&l->lru_node);
  		return this_cpu_ptr(htab_elem_get_ptr(l, map->key_size));
  	}
  
  	return NULL;
  }
15a07b338   Alexei Starovoitov   bpf: add lookup/u...
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
  int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value)
  {
  	struct htab_elem *l;
  	void __percpu *pptr;
  	int ret = -ENOENT;
  	int cpu, off = 0;
  	u32 size;
  
  	/* per_cpu areas are zero-filled and bpf programs can only
  	 * access 'value_size' of them, so copying rounded areas
  	 * will not leak any kernel data
  	 */
  	size = round_up(map->value_size, 8);
  	rcu_read_lock();
  	l = __htab_map_lookup_elem(map, key);
  	if (!l)
  		goto out;
50b045a8c   Daniel Borkmann   bpf, lru: avoid m...
1798
1799
1800
  	/* We do not mark LRU map element here in order to not mess up
  	 * eviction heuristics when user space does a map walk.
  	 */
15a07b338   Alexei Starovoitov   bpf: add lookup/u...
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
  	pptr = htab_elem_get_ptr(l, map->key_size);
  	for_each_possible_cpu(cpu) {
  		bpf_long_memcpy(value + off,
  				per_cpu_ptr(pptr, cpu), size);
  		off += size;
  	}
  	ret = 0;
  out:
  	rcu_read_unlock();
  	return ret;
  }
  
  int bpf_percpu_hash_update(struct bpf_map *map, void *key, void *value,
  			   u64 map_flags)
  {
8f8449384   Martin KaFai Lau   bpf: Add BPF_MAP_...
1816
  	struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
6bbd9a05a   Sasha Levin   bpf: grab rcu rea...
1817
1818
1819
  	int ret;
  
  	rcu_read_lock();
8f8449384   Martin KaFai Lau   bpf: Add BPF_MAP_...
1820
1821
1822
1823
1824
1825
  	if (htab_is_lru(htab))
  		ret = __htab_lru_percpu_map_update_elem(map, key, value,
  							map_flags, true);
  	else
  		ret = __htab_percpu_map_update_elem(map, key, value, map_flags,
  						    true);
6bbd9a05a   Sasha Levin   bpf: grab rcu rea...
1826
1827
1828
  	rcu_read_unlock();
  
  	return ret;
15a07b338   Alexei Starovoitov   bpf: add lookup/u...
1829
  }
c7b27c37a   Yonghong Song   bpf: add bpffs pr...
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
  static void htab_percpu_map_seq_show_elem(struct bpf_map *map, void *key,
  					  struct seq_file *m)
  {
  	struct htab_elem *l;
  	void __percpu *pptr;
  	int cpu;
  
  	rcu_read_lock();
  
  	l = __htab_map_lookup_elem(map, key);
  	if (!l) {
  		rcu_read_unlock();
  		return;
  	}
  
  	btf_type_seq_show(map->btf, map->btf_key_type_id, key, m);
  	seq_puts(m, ": {
  ");
  	pptr = htab_elem_get_ptr(l, map->key_size);
  	for_each_possible_cpu(cpu) {
  		seq_printf(m, "\tcpu%d: ", cpu);
  		btf_type_seq_show(map->btf, map->btf_value_type_id,
  				  per_cpu_ptr(pptr, cpu), m);
  		seq_puts(m, "
  ");
  	}
  	seq_puts(m, "}
  ");
  
  	rcu_read_unlock();
  }
2872e9ac3   Andrey Ignatov   bpf: Set map_btf_...
1861
  static int htab_percpu_map_btf_id;
40077e0cf   Johannes Berg   bpf: remove struc...
1862
  const struct bpf_map_ops htab_percpu_map_ops = {
f4d052592   Martin KaFai Lau   bpf: Add map_meta...
1863
  	.map_meta_equal = bpf_map_meta_equal,
9328e0d1b   Jakub Kicinski   bpf: hashtab: mov...
1864
  	.map_alloc_check = htab_map_alloc_check,
824bd0ce6   Alexei Starovoitov   bpf: introduce BP...
1865
1866
1867
1868
1869
1870
  	.map_alloc = htab_map_alloc,
  	.map_free = htab_map_free,
  	.map_get_next_key = htab_map_get_next_key,
  	.map_lookup_elem = htab_percpu_map_lookup_elem,
  	.map_update_elem = htab_percpu_map_update_elem,
  	.map_delete_elem = htab_map_delete_elem,
c7b27c37a   Yonghong Song   bpf: add bpffs pr...
1871
  	.map_seq_show_elem = htab_percpu_map_seq_show_elem,
057996380   Yonghong Song   bpf: Add batch op...
1872
  	BATCH_OPS(htab_percpu),
2872e9ac3   Andrey Ignatov   bpf: Set map_btf_...
1873
1874
  	.map_btf_name = "bpf_htab",
  	.map_btf_id = &htab_percpu_map_btf_id,
d6c4503cc   Yonghong Song   bpf: Implement bp...
1875
  	.iter_seq_info = &iter_seq_info,
824bd0ce6   Alexei Starovoitov   bpf: introduce BP...
1876
  };
2872e9ac3   Andrey Ignatov   bpf: Set map_btf_...
1877
  static int htab_lru_percpu_map_btf_id;
40077e0cf   Johannes Berg   bpf: remove struc...
1878
  const struct bpf_map_ops htab_lru_percpu_map_ops = {
f4d052592   Martin KaFai Lau   bpf: Add map_meta...
1879
  	.map_meta_equal = bpf_map_meta_equal,
9328e0d1b   Jakub Kicinski   bpf: hashtab: mov...
1880
  	.map_alloc_check = htab_map_alloc_check,
8f8449384   Martin KaFai Lau   bpf: Add BPF_MAP_...
1881
1882
1883
1884
1885
1886
  	.map_alloc = htab_map_alloc,
  	.map_free = htab_map_free,
  	.map_get_next_key = htab_map_get_next_key,
  	.map_lookup_elem = htab_lru_percpu_map_lookup_elem,
  	.map_update_elem = htab_lru_percpu_map_update_elem,
  	.map_delete_elem = htab_lru_map_delete_elem,
c7b27c37a   Yonghong Song   bpf: add bpffs pr...
1887
  	.map_seq_show_elem = htab_percpu_map_seq_show_elem,
057996380   Yonghong Song   bpf: Add batch op...
1888
  	BATCH_OPS(htab_lru_percpu),
2872e9ac3   Andrey Ignatov   bpf: Set map_btf_...
1889
1890
  	.map_btf_name = "bpf_htab",
  	.map_btf_id = &htab_lru_percpu_map_btf_id,
d6c4503cc   Yonghong Song   bpf: Implement bp...
1891
  	.iter_seq_info = &iter_seq_info,
8f8449384   Martin KaFai Lau   bpf: Add BPF_MAP_...
1892
  };
9328e0d1b   Jakub Kicinski   bpf: hashtab: mov...
1893
  static int fd_htab_map_alloc_check(union bpf_attr *attr)
bcc6b1b7e   Martin KaFai Lau   bpf: Add hash of ...
1894
  {
bcc6b1b7e   Martin KaFai Lau   bpf: Add hash of ...
1895
  	if (attr->value_size != sizeof(u32))
9328e0d1b   Jakub Kicinski   bpf: hashtab: mov...
1896
1897
  		return -EINVAL;
  	return htab_map_alloc_check(attr);
bcc6b1b7e   Martin KaFai Lau   bpf: Add hash of ...
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
  }
  
  static void fd_htab_map_free(struct bpf_map *map)
  {
  	struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
  	struct hlist_nulls_node *n;
  	struct hlist_nulls_head *head;
  	struct htab_elem *l;
  	int i;
  
  	for (i = 0; i < htab->n_buckets; i++) {
  		head = select_bucket(htab, i);
  
  		hlist_nulls_for_each_entry_safe(l, n, head, hash_node) {
  			void *ptr = fd_htab_map_get_ptr(map, l);
  
  			map->ops->map_fd_put_ptr(ptr);
  		}
  	}
  
  	htab_map_free(map);
  }
  
  /* only called from syscall */
14dc6f04f   Martin KaFai Lau   bpf: Add syscall ...
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
  int bpf_fd_htab_map_lookup_elem(struct bpf_map *map, void *key, u32 *value)
  {
  	void **ptr;
  	int ret = 0;
  
  	if (!map->ops->map_fd_sys_lookup_elem)
  		return -ENOTSUPP;
  
  	rcu_read_lock();
  	ptr = htab_map_lookup_elem(map, key);
  	if (ptr)
  		*value = map->ops->map_fd_sys_lookup_elem(READ_ONCE(*ptr));
  	else
  		ret = -ENOENT;
  	rcu_read_unlock();
  
  	return ret;
  }
  
  /* only called from syscall */
bcc6b1b7e   Martin KaFai Lau   bpf: Add hash of ...
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
  int bpf_fd_htab_map_update_elem(struct bpf_map *map, struct file *map_file,
  				void *key, void *value, u64 map_flags)
  {
  	void *ptr;
  	int ret;
  	u32 ufd = *(u32 *)value;
  
  	ptr = map->ops->map_fd_get_ptr(map, map_file, ufd);
  	if (IS_ERR(ptr))
  		return PTR_ERR(ptr);
  
  	ret = htab_map_update_elem(map, key, &ptr, map_flags);
  	if (ret)
  		map->ops->map_fd_put_ptr(ptr);
  
  	return ret;
  }
  
  static struct bpf_map *htab_of_map_alloc(union bpf_attr *attr)
  {
  	struct bpf_map *map, *inner_map_meta;
  
  	inner_map_meta = bpf_map_meta_alloc(attr->inner_map_fd);
  	if (IS_ERR(inner_map_meta))
  		return inner_map_meta;
9328e0d1b   Jakub Kicinski   bpf: hashtab: mov...
1967
  	map = htab_map_alloc(attr);
bcc6b1b7e   Martin KaFai Lau   bpf: Add hash of ...
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
  	if (IS_ERR(map)) {
  		bpf_map_meta_free(inner_map_meta);
  		return map;
  	}
  
  	map->inner_map_meta = inner_map_meta;
  
  	return map;
  }
  
  static void *htab_of_map_lookup_elem(struct bpf_map *map, void *key)
  {
  	struct bpf_map **inner_map  = htab_map_lookup_elem(map, key);
  
  	if (!inner_map)
  		return NULL;
  
  	return READ_ONCE(*inner_map);
  }
4a8f87e60   Daniel Borkmann   bpf: Allow for ma...
1987
  static int htab_of_map_gen_lookup(struct bpf_map *map,
7b0c2a050   Daniel Borkmann   bpf: inline map i...
1988
1989
1990
1991
  				  struct bpf_insn *insn_buf)
  {
  	struct bpf_insn *insn = insn_buf;
  	const int ret = BPF_REG_0;
09772d92c   Daniel Borkmann   bpf: avoid retpol...
1992
1993
1994
  	BUILD_BUG_ON(!__same_type(&__htab_map_lookup_elem,
  		     (void *(*)(struct bpf_map *map, void *key))NULL));
  	*insn++ = BPF_EMIT_CALL(BPF_CAST_CALL(__htab_map_lookup_elem));
7b0c2a050   Daniel Borkmann   bpf: inline map i...
1995
1996
1997
1998
1999
2000
2001
2002
  	*insn++ = BPF_JMP_IMM(BPF_JEQ, ret, 0, 2);
  	*insn++ = BPF_ALU64_IMM(BPF_ADD, ret,
  				offsetof(struct htab_elem, key) +
  				round_up(map->key_size, 8));
  	*insn++ = BPF_LDX_MEM(BPF_DW, ret, ret, 0);
  
  	return insn - insn_buf;
  }
bcc6b1b7e   Martin KaFai Lau   bpf: Add hash of ...
2003
2004
2005
2006
2007
  static void htab_of_map_free(struct bpf_map *map)
  {
  	bpf_map_meta_free(map->inner_map_meta);
  	fd_htab_map_free(map);
  }
2872e9ac3   Andrey Ignatov   bpf: Set map_btf_...
2008
  static int htab_of_maps_map_btf_id;
40077e0cf   Johannes Berg   bpf: remove struc...
2009
  const struct bpf_map_ops htab_of_maps_map_ops = {
9328e0d1b   Jakub Kicinski   bpf: hashtab: mov...
2010
  	.map_alloc_check = fd_htab_map_alloc_check,
bcc6b1b7e   Martin KaFai Lau   bpf: Add hash of ...
2011
2012
2013
2014
2015
2016
2017
  	.map_alloc = htab_of_map_alloc,
  	.map_free = htab_of_map_free,
  	.map_get_next_key = htab_map_get_next_key,
  	.map_lookup_elem = htab_of_map_lookup_elem,
  	.map_delete_elem = htab_map_delete_elem,
  	.map_fd_get_ptr = bpf_map_fd_get_ptr,
  	.map_fd_put_ptr = bpf_map_fd_put_ptr,
14dc6f04f   Martin KaFai Lau   bpf: Add syscall ...
2018
  	.map_fd_sys_lookup_elem = bpf_map_fd_sys_lookup_elem,
7b0c2a050   Daniel Borkmann   bpf: inline map i...
2019
  	.map_gen_lookup = htab_of_map_gen_lookup,
e8d2bec04   Daniel Borkmann   bpf: decouple btf...
2020
  	.map_check_btf = map_check_no_btf,
2872e9ac3   Andrey Ignatov   bpf: Set map_btf_...
2021
2022
  	.map_btf_name = "bpf_htab",
  	.map_btf_id = &htab_of_maps_map_btf_id,
bcc6b1b7e   Martin KaFai Lau   bpf: Add hash of ...
2023
  };