Commit db20fd2b01087bdfbe30bce314a198eefedcc42e

Authored by Alexei Starovoitov
Committed by David S. Miller
1 parent 749730ce42

bpf: add lookup/update/delete/iterate methods to BPF maps

'maps' is a generic storage of different types for sharing data between kernel
and userspace.

The maps are accessed from user space via BPF syscall, which has commands:

- create a map with given type and attributes
  fd = bpf(BPF_MAP_CREATE, union bpf_attr *attr, u32 size)
  returns fd or negative error

- lookup key in a given map referenced by fd
  err = bpf(BPF_MAP_LOOKUP_ELEM, union bpf_attr *attr, u32 size)
  using attr->map_fd, attr->key, attr->value
  returns zero and stores found elem into value or negative error

- create or update key/value pair in a given map
  err = bpf(BPF_MAP_UPDATE_ELEM, union bpf_attr *attr, u32 size)
  using attr->map_fd, attr->key, attr->value
  returns zero or negative error

- find and delete element by key in a given map
  err = bpf(BPF_MAP_DELETE_ELEM, union bpf_attr *attr, u32 size)
  using attr->map_fd, attr->key

- iterate map elements (based on input key return next_key)
  err = bpf(BPF_MAP_GET_NEXT_KEY, union bpf_attr *attr, u32 size)
  using attr->map_fd, attr->key, attr->next_key

- close(fd) deletes the map

Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

Showing 3 changed files with 281 additions and 0 deletions Side-by-side Diff

... ... @@ -9,6 +9,7 @@
9 9  
10 10 #include <uapi/linux/bpf.h>
11 11 #include <linux/workqueue.h>
  12 +#include <linux/file.h>
12 13  
13 14 struct bpf_map;
14 15  
... ... @@ -17,6 +18,12 @@
17 18 /* funcs callable from userspace (via syscall) */
18 19 struct bpf_map *(*map_alloc)(union bpf_attr *attr);
19 20 void (*map_free)(struct bpf_map *);
  21 + int (*map_get_next_key)(struct bpf_map *map, void *key, void *next_key);
  22 +
  23 + /* funcs callable from userspace and from eBPF programs */
  24 + void *(*map_lookup_elem)(struct bpf_map *map, void *key);
  25 + int (*map_update_elem)(struct bpf_map *map, void *key, void *value);
  26 + int (*map_delete_elem)(struct bpf_map *map, void *key);
20 27 };
21 28  
22 29 struct bpf_map {
... ... @@ -37,6 +44,7 @@
37 44  
38 45 void bpf_register_map_type(struct bpf_map_type_list *tl);
39 46 void bpf_map_put(struct bpf_map *map);
  47 +struct bpf_map *bpf_map_get(struct fd f);
40 48  
41 49 #endif /* _LINUX_BPF_H */
include/uapi/linux/bpf.h
... ... @@ -70,6 +70,35 @@
70 70 * map is deleted when fd is closed
71 71 */
72 72 BPF_MAP_CREATE,
  73 +
  74 + /* lookup key in a given map
  75 + * err = bpf(BPF_MAP_LOOKUP_ELEM, union bpf_attr *attr, u32 size)
  76 + * Using attr->map_fd, attr->key, attr->value
  77 + * returns zero and stores found elem into value
  78 + * or negative error
  79 + */
  80 + BPF_MAP_LOOKUP_ELEM,
  81 +
  82 + /* create or update key/value pair in a given map
  83 + * err = bpf(BPF_MAP_UPDATE_ELEM, union bpf_attr *attr, u32 size)
  84 + * Using attr->map_fd, attr->key, attr->value
  85 + * returns zero or negative error
  86 + */
  87 + BPF_MAP_UPDATE_ELEM,
  88 +
  89 + /* find and delete elem by key in a given map
  90 + * err = bpf(BPF_MAP_DELETE_ELEM, union bpf_attr *attr, u32 size)
  91 + * Using attr->map_fd, attr->key
  92 + * returns zero or negative error
  93 + */
  94 + BPF_MAP_DELETE_ELEM,
  95 +
  96 + /* lookup key in a given map and return next key
  97 + * err = bpf(BPF_MAP_GET_NEXT_KEY, union bpf_attr *attr, u32 size)
  98 + * Using attr->map_fd, attr->key, attr->next_key
  99 + * returns zero and stores next key or negative error
  100 + */
  101 + BPF_MAP_GET_NEXT_KEY,
73 102 };
74 103  
75 104 enum bpf_map_type {
... ... @@ -82,6 +111,15 @@
82 111 __u32 key_size; /* size of key in bytes */
83 112 __u32 value_size; /* size of value in bytes */
84 113 __u32 max_entries; /* max number of entries in a map */
  114 + };
  115 +
  116 + struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */
  117 + __u32 map_fd;
  118 + __aligned_u64 key;
  119 + union {
  120 + __aligned_u64 value;
  121 + __aligned_u64 next_key;
  122 + };
85 123 };
86 124 } __attribute__((aligned(8)));
87 125  
kernel/bpf/syscall.c
... ... @@ -13,6 +13,7 @@
13 13 #include <linux/syscalls.h>
14 14 #include <linux/slab.h>
15 15 #include <linux/anon_inodes.h>
  16 +#include <linux/file.h>
16 17  
17 18 static LIST_HEAD(bpf_map_types);
18 19  
... ... @@ -111,6 +112,228 @@
111 112 return err;
112 113 }
113 114  
  115 +/* if error is returned, fd is released.
  116 + * On success caller should complete fd access with matching fdput()
  117 + */
  118 +struct bpf_map *bpf_map_get(struct fd f)
  119 +{
  120 + struct bpf_map *map;
  121 +
  122 + if (!f.file)
  123 + return ERR_PTR(-EBADF);
  124 +
  125 + if (f.file->f_op != &bpf_map_fops) {
  126 + fdput(f);
  127 + return ERR_PTR(-EINVAL);
  128 + }
  129 +
  130 + map = f.file->private_data;
  131 +
  132 + return map;
  133 +}
  134 +
  135 +/* helper to convert user pointers passed inside __aligned_u64 fields */
  136 +static void __user *u64_to_ptr(__u64 val)
  137 +{
  138 + return (void __user *) (unsigned long) val;
  139 +}
  140 +
  141 +/* last field in 'union bpf_attr' used by this command */
  142 +#define BPF_MAP_LOOKUP_ELEM_LAST_FIELD value
  143 +
  144 +static int map_lookup_elem(union bpf_attr *attr)
  145 +{
  146 + void __user *ukey = u64_to_ptr(attr->key);
  147 + void __user *uvalue = u64_to_ptr(attr->value);
  148 + int ufd = attr->map_fd;
  149 + struct fd f = fdget(ufd);
  150 + struct bpf_map *map;
  151 + void *key, *value;
  152 + int err;
  153 +
  154 + if (CHECK_ATTR(BPF_MAP_LOOKUP_ELEM))
  155 + return -EINVAL;
  156 +
  157 + map = bpf_map_get(f);
  158 + if (IS_ERR(map))
  159 + return PTR_ERR(map);
  160 +
  161 + err = -ENOMEM;
  162 + key = kmalloc(map->key_size, GFP_USER);
  163 + if (!key)
  164 + goto err_put;
  165 +
  166 + err = -EFAULT;
  167 + if (copy_from_user(key, ukey, map->key_size) != 0)
  168 + goto free_key;
  169 +
  170 + err = -ESRCH;
  171 + rcu_read_lock();
  172 + value = map->ops->map_lookup_elem(map, key);
  173 + if (!value)
  174 + goto err_unlock;
  175 +
  176 + err = -EFAULT;
  177 + if (copy_to_user(uvalue, value, map->value_size) != 0)
  178 + goto err_unlock;
  179 +
  180 + err = 0;
  181 +
  182 +err_unlock:
  183 + rcu_read_unlock();
  184 +free_key:
  185 + kfree(key);
  186 +err_put:
  187 + fdput(f);
  188 + return err;
  189 +}
  190 +
  191 +#define BPF_MAP_UPDATE_ELEM_LAST_FIELD value
  192 +
  193 +static int map_update_elem(union bpf_attr *attr)
  194 +{
  195 + void __user *ukey = u64_to_ptr(attr->key);
  196 + void __user *uvalue = u64_to_ptr(attr->value);
  197 + int ufd = attr->map_fd;
  198 + struct fd f = fdget(ufd);
  199 + struct bpf_map *map;
  200 + void *key, *value;
  201 + int err;
  202 +
  203 + if (CHECK_ATTR(BPF_MAP_UPDATE_ELEM))
  204 + return -EINVAL;
  205 +
  206 + map = bpf_map_get(f);
  207 + if (IS_ERR(map))
  208 + return PTR_ERR(map);
  209 +
  210 + err = -ENOMEM;
  211 + key = kmalloc(map->key_size, GFP_USER);
  212 + if (!key)
  213 + goto err_put;
  214 +
  215 + err = -EFAULT;
  216 + if (copy_from_user(key, ukey, map->key_size) != 0)
  217 + goto free_key;
  218 +
  219 + err = -ENOMEM;
  220 + value = kmalloc(map->value_size, GFP_USER);
  221 + if (!value)
  222 + goto free_key;
  223 +
  224 + err = -EFAULT;
  225 + if (copy_from_user(value, uvalue, map->value_size) != 0)
  226 + goto free_value;
  227 +
  228 + /* eBPF program that use maps are running under rcu_read_lock(),
  229 + * therefore all map accessors rely on this fact, so do the same here
  230 + */
  231 + rcu_read_lock();
  232 + err = map->ops->map_update_elem(map, key, value);
  233 + rcu_read_unlock();
  234 +
  235 +free_value:
  236 + kfree(value);
  237 +free_key:
  238 + kfree(key);
  239 +err_put:
  240 + fdput(f);
  241 + return err;
  242 +}
  243 +
  244 +#define BPF_MAP_DELETE_ELEM_LAST_FIELD key
  245 +
  246 +static int map_delete_elem(union bpf_attr *attr)
  247 +{
  248 + void __user *ukey = u64_to_ptr(attr->key);
  249 + int ufd = attr->map_fd;
  250 + struct fd f = fdget(ufd);
  251 + struct bpf_map *map;
  252 + void *key;
  253 + int err;
  254 +
  255 + if (CHECK_ATTR(BPF_MAP_DELETE_ELEM))
  256 + return -EINVAL;
  257 +
  258 + map = bpf_map_get(f);
  259 + if (IS_ERR(map))
  260 + return PTR_ERR(map);
  261 +
  262 + err = -ENOMEM;
  263 + key = kmalloc(map->key_size, GFP_USER);
  264 + if (!key)
  265 + goto err_put;
  266 +
  267 + err = -EFAULT;
  268 + if (copy_from_user(key, ukey, map->key_size) != 0)
  269 + goto free_key;
  270 +
  271 + rcu_read_lock();
  272 + err = map->ops->map_delete_elem(map, key);
  273 + rcu_read_unlock();
  274 +
  275 +free_key:
  276 + kfree(key);
  277 +err_put:
  278 + fdput(f);
  279 + return err;
  280 +}
  281 +
  282 +/* last field in 'union bpf_attr' used by this command */
  283 +#define BPF_MAP_GET_NEXT_KEY_LAST_FIELD next_key
  284 +
  285 +static int map_get_next_key(union bpf_attr *attr)
  286 +{
  287 + void __user *ukey = u64_to_ptr(attr->key);
  288 + void __user *unext_key = u64_to_ptr(attr->next_key);
  289 + int ufd = attr->map_fd;
  290 + struct fd f = fdget(ufd);
  291 + struct bpf_map *map;
  292 + void *key, *next_key;
  293 + int err;
  294 +
  295 + if (CHECK_ATTR(BPF_MAP_GET_NEXT_KEY))
  296 + return -EINVAL;
  297 +
  298 + map = bpf_map_get(f);
  299 + if (IS_ERR(map))
  300 + return PTR_ERR(map);
  301 +
  302 + err = -ENOMEM;
  303 + key = kmalloc(map->key_size, GFP_USER);
  304 + if (!key)
  305 + goto err_put;
  306 +
  307 + err = -EFAULT;
  308 + if (copy_from_user(key, ukey, map->key_size) != 0)
  309 + goto free_key;
  310 +
  311 + err = -ENOMEM;
  312 + next_key = kmalloc(map->key_size, GFP_USER);
  313 + if (!next_key)
  314 + goto free_key;
  315 +
  316 + rcu_read_lock();
  317 + err = map->ops->map_get_next_key(map, key, next_key);
  318 + rcu_read_unlock();
  319 + if (err)
  320 + goto free_next_key;
  321 +
  322 + err = -EFAULT;
  323 + if (copy_to_user(unext_key, next_key, map->key_size) != 0)
  324 + goto free_next_key;
  325 +
  326 + err = 0;
  327 +
  328 +free_next_key:
  329 + kfree(next_key);
  330 +free_key:
  331 + kfree(key);
  332 +err_put:
  333 + fdput(f);
  334 + return err;
  335 +}
  336 +
114 337 SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size)
115 338 {
116 339 union bpf_attr attr = {};
... ... @@ -159,6 +382,18 @@
159 382 switch (cmd) {
160 383 case BPF_MAP_CREATE:
161 384 err = map_create(&attr);
  385 + break;
  386 + case BPF_MAP_LOOKUP_ELEM:
  387 + err = map_lookup_elem(&attr);
  388 + break;
  389 + case BPF_MAP_UPDATE_ELEM:
  390 + err = map_update_elem(&attr);
  391 + break;
  392 + case BPF_MAP_DELETE_ELEM:
  393 + err = map_delete_elem(&attr);
  394 + break;
  395 + case BPF_MAP_GET_NEXT_KEY:
  396 + err = map_get_next_key(&attr);
162 397 break;
163 398 default:
164 399 err = -EINVAL;