Commit db20fd2b01087bdfbe30bce314a198eefedcc42e
Committed by
David S. Miller
1 parent
749730ce42
Exists in
ti-lsk-linux-4.1.y
and in
10 other branches
bpf: add lookup/update/delete/iterate methods to BPF maps
'maps' is a generic storage of different types for sharing data between kernel and userspace. The maps are accessed from user space via BPF syscall, which has commands: - create a map with given type and attributes fd = bpf(BPF_MAP_CREATE, union bpf_attr *attr, u32 size) returns fd or negative error - lookup key in a given map referenced by fd err = bpf(BPF_MAP_LOOKUP_ELEM, union bpf_attr *attr, u32 size) using attr->map_fd, attr->key, attr->value returns zero and stores found elem into value or negative error - create or update key/value pair in a given map err = bpf(BPF_MAP_UPDATE_ELEM, union bpf_attr *attr, u32 size) using attr->map_fd, attr->key, attr->value returns zero or negative error - find and delete element by key in a given map err = bpf(BPF_MAP_DELETE_ELEM, union bpf_attr *attr, u32 size) using attr->map_fd, attr->key - iterate map elements (based on input key return next_key) err = bpf(BPF_MAP_GET_NEXT_KEY, union bpf_attr *attr, u32 size) using attr->map_fd, attr->key, attr->next_key - close(fd) deletes the map Signed-off-by: Alexei Starovoitov <ast@plumgrid.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Showing 3 changed files with 281 additions and 0 deletions Side-by-side Diff
include/linux/bpf.h
... | ... | @@ -9,6 +9,7 @@ |
9 | 9 | |
10 | 10 | #include <uapi/linux/bpf.h> |
11 | 11 | #include <linux/workqueue.h> |
12 | +#include <linux/file.h> | |
12 | 13 | |
13 | 14 | struct bpf_map; |
14 | 15 | |
... | ... | @@ -17,6 +18,12 @@ |
17 | 18 | /* funcs callable from userspace (via syscall) */ |
18 | 19 | struct bpf_map *(*map_alloc)(union bpf_attr *attr); |
19 | 20 | void (*map_free)(struct bpf_map *); |
21 | + int (*map_get_next_key)(struct bpf_map *map, void *key, void *next_key); | |
22 | + | |
23 | + /* funcs callable from userspace and from eBPF programs */ | |
24 | + void *(*map_lookup_elem)(struct bpf_map *map, void *key); | |
25 | + int (*map_update_elem)(struct bpf_map *map, void *key, void *value); | |
26 | + int (*map_delete_elem)(struct bpf_map *map, void *key); | |
20 | 27 | }; |
21 | 28 | |
22 | 29 | struct bpf_map { |
... | ... | @@ -37,6 +44,7 @@ |
37 | 44 | |
38 | 45 | void bpf_register_map_type(struct bpf_map_type_list *tl); |
39 | 46 | void bpf_map_put(struct bpf_map *map); |
47 | +struct bpf_map *bpf_map_get(struct fd f); | |
40 | 48 | |
41 | 49 | #endif /* _LINUX_BPF_H */ |
include/uapi/linux/bpf.h
... | ... | @@ -70,6 +70,35 @@ |
70 | 70 | * map is deleted when fd is closed |
71 | 71 | */ |
72 | 72 | BPF_MAP_CREATE, |
73 | + | |
74 | + /* lookup key in a given map | |
75 | + * err = bpf(BPF_MAP_LOOKUP_ELEM, union bpf_attr *attr, u32 size) | |
76 | + * Using attr->map_fd, attr->key, attr->value | |
77 | + * returns zero and stores found elem into value | |
78 | + * or negative error | |
79 | + */ | |
80 | + BPF_MAP_LOOKUP_ELEM, | |
81 | + | |
82 | + /* create or update key/value pair in a given map | |
83 | + * err = bpf(BPF_MAP_UPDATE_ELEM, union bpf_attr *attr, u32 size) | |
84 | + * Using attr->map_fd, attr->key, attr->value | |
85 | + * returns zero or negative error | |
86 | + */ | |
87 | + BPF_MAP_UPDATE_ELEM, | |
88 | + | |
89 | + /* find and delete elem by key in a given map | |
90 | + * err = bpf(BPF_MAP_DELETE_ELEM, union bpf_attr *attr, u32 size) | |
91 | + * Using attr->map_fd, attr->key | |
92 | + * returns zero or negative error | |
93 | + */ | |
94 | + BPF_MAP_DELETE_ELEM, | |
95 | + | |
96 | + /* lookup key in a given map and return next key | |
97 | + * err = bpf(BPF_MAP_GET_NEXT_KEY, union bpf_attr *attr, u32 size) | |
98 | + * Using attr->map_fd, attr->key, attr->next_key | |
99 | + * returns zero and stores next key or negative error | |
100 | + */ | |
101 | + BPF_MAP_GET_NEXT_KEY, | |
73 | 102 | }; |
74 | 103 | |
75 | 104 | enum bpf_map_type { |
... | ... | @@ -82,6 +111,15 @@ |
82 | 111 | __u32 key_size; /* size of key in bytes */ |
83 | 112 | __u32 value_size; /* size of value in bytes */ |
84 | 113 | __u32 max_entries; /* max number of entries in a map */ |
114 | + }; | |
115 | + | |
116 | + struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */ | |
117 | + __u32 map_fd; | |
118 | + __aligned_u64 key; | |
119 | + union { | |
120 | + __aligned_u64 value; | |
121 | + __aligned_u64 next_key; | |
122 | + }; | |
85 | 123 | }; |
86 | 124 | } __attribute__((aligned(8))); |
87 | 125 |
kernel/bpf/syscall.c
... | ... | @@ -13,6 +13,7 @@ |
13 | 13 | #include <linux/syscalls.h> |
14 | 14 | #include <linux/slab.h> |
15 | 15 | #include <linux/anon_inodes.h> |
16 | +#include <linux/file.h> | |
16 | 17 | |
17 | 18 | static LIST_HEAD(bpf_map_types); |
18 | 19 | |
... | ... | @@ -111,6 +112,228 @@ |
111 | 112 | return err; |
112 | 113 | } |
113 | 114 | |
115 | +/* if error is returned, fd is released. | |
116 | + * On success caller should complete fd access with matching fdput() | |
117 | + */ | |
118 | +struct bpf_map *bpf_map_get(struct fd f) | |
119 | +{ | |
120 | + struct bpf_map *map; | |
121 | + | |
122 | + if (!f.file) | |
123 | + return ERR_PTR(-EBADF); | |
124 | + | |
125 | + if (f.file->f_op != &bpf_map_fops) { | |
126 | + fdput(f); | |
127 | + return ERR_PTR(-EINVAL); | |
128 | + } | |
129 | + | |
130 | + map = f.file->private_data; | |
131 | + | |
132 | + return map; | |
133 | +} | |
134 | + | |
135 | +/* helper to convert user pointers passed inside __aligned_u64 fields */ | |
136 | +static void __user *u64_to_ptr(__u64 val) | |
137 | +{ | |
138 | + return (void __user *) (unsigned long) val; | |
139 | +} | |
140 | + | |
141 | +/* last field in 'union bpf_attr' used by this command */ | |
142 | +#define BPF_MAP_LOOKUP_ELEM_LAST_FIELD value | |
143 | + | |
144 | +static int map_lookup_elem(union bpf_attr *attr) | |
145 | +{ | |
146 | + void __user *ukey = u64_to_ptr(attr->key); | |
147 | + void __user *uvalue = u64_to_ptr(attr->value); | |
148 | + int ufd = attr->map_fd; | |
149 | + struct fd f = fdget(ufd); | |
150 | + struct bpf_map *map; | |
151 | + void *key, *value; | |
152 | + int err; | |
153 | + | |
154 | + if (CHECK_ATTR(BPF_MAP_LOOKUP_ELEM)) | |
155 | + return -EINVAL; | |
156 | + | |
157 | + map = bpf_map_get(f); | |
158 | + if (IS_ERR(map)) | |
159 | + return PTR_ERR(map); | |
160 | + | |
161 | + err = -ENOMEM; | |
162 | + key = kmalloc(map->key_size, GFP_USER); | |
163 | + if (!key) | |
164 | + goto err_put; | |
165 | + | |
166 | + err = -EFAULT; | |
167 | + if (copy_from_user(key, ukey, map->key_size) != 0) | |
168 | + goto free_key; | |
169 | + | |
170 | + err = -ESRCH; | |
171 | + rcu_read_lock(); | |
172 | + value = map->ops->map_lookup_elem(map, key); | |
173 | + if (!value) | |
174 | + goto err_unlock; | |
175 | + | |
176 | + err = -EFAULT; | |
177 | + if (copy_to_user(uvalue, value, map->value_size) != 0) | |
178 | + goto err_unlock; | |
179 | + | |
180 | + err = 0; | |
181 | + | |
182 | +err_unlock: | |
183 | + rcu_read_unlock(); | |
184 | +free_key: | |
185 | + kfree(key); | |
186 | +err_put: | |
187 | + fdput(f); | |
188 | + return err; | |
189 | +} | |
190 | + | |
191 | +#define BPF_MAP_UPDATE_ELEM_LAST_FIELD value | |
192 | + | |
193 | +static int map_update_elem(union bpf_attr *attr) | |
194 | +{ | |
195 | + void __user *ukey = u64_to_ptr(attr->key); | |
196 | + void __user *uvalue = u64_to_ptr(attr->value); | |
197 | + int ufd = attr->map_fd; | |
198 | + struct fd f = fdget(ufd); | |
199 | + struct bpf_map *map; | |
200 | + void *key, *value; | |
201 | + int err; | |
202 | + | |
203 | + if (CHECK_ATTR(BPF_MAP_UPDATE_ELEM)) | |
204 | + return -EINVAL; | |
205 | + | |
206 | + map = bpf_map_get(f); | |
207 | + if (IS_ERR(map)) | |
208 | + return PTR_ERR(map); | |
209 | + | |
210 | + err = -ENOMEM; | |
211 | + key = kmalloc(map->key_size, GFP_USER); | |
212 | + if (!key) | |
213 | + goto err_put; | |
214 | + | |
215 | + err = -EFAULT; | |
216 | + if (copy_from_user(key, ukey, map->key_size) != 0) | |
217 | + goto free_key; | |
218 | + | |
219 | + err = -ENOMEM; | |
220 | + value = kmalloc(map->value_size, GFP_USER); | |
221 | + if (!value) | |
222 | + goto free_key; | |
223 | + | |
224 | + err = -EFAULT; | |
225 | + if (copy_from_user(value, uvalue, map->value_size) != 0) | |
226 | + goto free_value; | |
227 | + | |
228 | + /* eBPF program that use maps are running under rcu_read_lock(), | |
229 | + * therefore all map accessors rely on this fact, so do the same here | |
230 | + */ | |
231 | + rcu_read_lock(); | |
232 | + err = map->ops->map_update_elem(map, key, value); | |
233 | + rcu_read_unlock(); | |
234 | + | |
235 | +free_value: | |
236 | + kfree(value); | |
237 | +free_key: | |
238 | + kfree(key); | |
239 | +err_put: | |
240 | + fdput(f); | |
241 | + return err; | |
242 | +} | |
243 | + | |
244 | +#define BPF_MAP_DELETE_ELEM_LAST_FIELD key | |
245 | + | |
246 | +static int map_delete_elem(union bpf_attr *attr) | |
247 | +{ | |
248 | + void __user *ukey = u64_to_ptr(attr->key); | |
249 | + int ufd = attr->map_fd; | |
250 | + struct fd f = fdget(ufd); | |
251 | + struct bpf_map *map; | |
252 | + void *key; | |
253 | + int err; | |
254 | + | |
255 | + if (CHECK_ATTR(BPF_MAP_DELETE_ELEM)) | |
256 | + return -EINVAL; | |
257 | + | |
258 | + map = bpf_map_get(f); | |
259 | + if (IS_ERR(map)) | |
260 | + return PTR_ERR(map); | |
261 | + | |
262 | + err = -ENOMEM; | |
263 | + key = kmalloc(map->key_size, GFP_USER); | |
264 | + if (!key) | |
265 | + goto err_put; | |
266 | + | |
267 | + err = -EFAULT; | |
268 | + if (copy_from_user(key, ukey, map->key_size) != 0) | |
269 | + goto free_key; | |
270 | + | |
271 | + rcu_read_lock(); | |
272 | + err = map->ops->map_delete_elem(map, key); | |
273 | + rcu_read_unlock(); | |
274 | + | |
275 | +free_key: | |
276 | + kfree(key); | |
277 | +err_put: | |
278 | + fdput(f); | |
279 | + return err; | |
280 | +} | |
281 | + | |
282 | +/* last field in 'union bpf_attr' used by this command */ | |
283 | +#define BPF_MAP_GET_NEXT_KEY_LAST_FIELD next_key | |
284 | + | |
285 | +static int map_get_next_key(union bpf_attr *attr) | |
286 | +{ | |
287 | + void __user *ukey = u64_to_ptr(attr->key); | |
288 | + void __user *unext_key = u64_to_ptr(attr->next_key); | |
289 | + int ufd = attr->map_fd; | |
290 | + struct fd f = fdget(ufd); | |
291 | + struct bpf_map *map; | |
292 | + void *key, *next_key; | |
293 | + int err; | |
294 | + | |
295 | + if (CHECK_ATTR(BPF_MAP_GET_NEXT_KEY)) | |
296 | + return -EINVAL; | |
297 | + | |
298 | + map = bpf_map_get(f); | |
299 | + if (IS_ERR(map)) | |
300 | + return PTR_ERR(map); | |
301 | + | |
302 | + err = -ENOMEM; | |
303 | + key = kmalloc(map->key_size, GFP_USER); | |
304 | + if (!key) | |
305 | + goto err_put; | |
306 | + | |
307 | + err = -EFAULT; | |
308 | + if (copy_from_user(key, ukey, map->key_size) != 0) | |
309 | + goto free_key; | |
310 | + | |
311 | + err = -ENOMEM; | |
312 | + next_key = kmalloc(map->key_size, GFP_USER); | |
313 | + if (!next_key) | |
314 | + goto free_key; | |
315 | + | |
316 | + rcu_read_lock(); | |
317 | + err = map->ops->map_get_next_key(map, key, next_key); | |
318 | + rcu_read_unlock(); | |
319 | + if (err) | |
320 | + goto free_next_key; | |
321 | + | |
322 | + err = -EFAULT; | |
323 | + if (copy_to_user(unext_key, next_key, map->key_size) != 0) | |
324 | + goto free_next_key; | |
325 | + | |
326 | + err = 0; | |
327 | + | |
328 | +free_next_key: | |
329 | + kfree(next_key); | |
330 | +free_key: | |
331 | + kfree(key); | |
332 | +err_put: | |
333 | + fdput(f); | |
334 | + return err; | |
335 | +} | |
336 | + | |
114 | 337 | SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size) |
115 | 338 | { |
116 | 339 | union bpf_attr attr = {}; |
... | ... | @@ -159,6 +382,18 @@ |
159 | 382 | switch (cmd) { |
160 | 383 | case BPF_MAP_CREATE: |
161 | 384 | err = map_create(&attr); |
385 | + break; | |
386 | + case BPF_MAP_LOOKUP_ELEM: | |
387 | + err = map_lookup_elem(&attr); | |
388 | + break; | |
389 | + case BPF_MAP_UPDATE_ELEM: | |
390 | + err = map_update_elem(&attr); | |
391 | + break; | |
392 | + case BPF_MAP_DELETE_ELEM: | |
393 | + err = map_delete_elem(&attr); | |
394 | + break; | |
395 | + case BPF_MAP_GET_NEXT_KEY: | |
396 | + err = map_get_next_key(&attr); | |
162 | 397 | break; |
163 | 398 | default: |
164 | 399 | err = -EINVAL; |
-
mentioned in commit 8ebe66
-
mentioned in commit 8ebe66
-
mentioned in commit 8ebe66
-
mentioned in commit 8ebe66
-
mentioned in commit 8ebe66
-
mentioned in commit 8ebe66
-
mentioned in commit 8ebe66
-
mentioned in commit 8ebe66
-
mentioned in commit 8ebe66
-
mentioned in commit 592867
-
mentioned in commit 8ebe66
-
mentioned in commit 592867
-
mentioned in commit 8ebe66
-
mentioned in commit 592867
-
mentioned in commit 8ebe66
-
mentioned in commit 8ebe66
-
mentioned in commit 592867
-
mentioned in commit 592867