Commit 8ebe667c41e054384df19f2f382bc415badfaee1
Committed by
David S. Miller
1 parent
600ddd6825
Exists in
ti-lsk-linux-4.1.y
and in
10 other branches
bpf: rcu lock must not be held when calling copy_to_user()
BUG: sleeping function called from invalid context at mm/memory.c:3732 in_atomic(): 0, irqs_disabled(): 0, pid: 671, name: test_maps 1 lock held by test_maps/671: #0: (rcu_read_lock){......}, at: [<0000000000264190>] map_lookup_elem+0xe8/0x260 Call Trace: ([<0000000000115b7e>] show_trace+0x12e/0x150) [<0000000000115c40>] show_stack+0xa0/0x100 [<00000000009b163c>] dump_stack+0x74/0xc8 [<000000000017424a>] ___might_sleep+0x23a/0x248 [<00000000002b58e8>] might_fault+0x70/0xe8 [<0000000000264230>] map_lookup_elem+0x188/0x260 [<0000000000264716>] SyS_bpf+0x20e/0x840 Fix it by allocating temporary buffer to store map element value. Fixes: db20fd2b0108 ("bpf: add lookup/update/delete/iterate methods to BPF maps") Reported-by: Michael Holzheu <holzheu@linux.vnet.ibm.com> Signed-off-by: Alexei Starovoitov <ast@plumgrid.com> Acked-by: Daniel Borkmann <dborkman@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Showing 1 changed file with 17 additions and 8 deletions Inline Diff
kernel/bpf/syscall.c
1 | /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com | 1 | /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com |
2 | * | 2 | * |
3 | * This program is free software; you can redistribute it and/or | 3 | * This program is free software; you can redistribute it and/or |
4 | * modify it under the terms of version 2 of the GNU General Public | 4 | * modify it under the terms of version 2 of the GNU General Public |
5 | * License as published by the Free Software Foundation. | 5 | * License as published by the Free Software Foundation. |
6 | * | 6 | * |
7 | * This program is distributed in the hope that it will be useful, but | 7 | * This program is distributed in the hope that it will be useful, but |
8 | * WITHOUT ANY WARRANTY; without even the implied warranty of | 8 | * WITHOUT ANY WARRANTY; without even the implied warranty of |
9 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | 9 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
10 | * General Public License for more details. | 10 | * General Public License for more details. |
11 | */ | 11 | */ |
12 | #include <linux/bpf.h> | 12 | #include <linux/bpf.h> |
13 | #include <linux/syscalls.h> | 13 | #include <linux/syscalls.h> |
14 | #include <linux/slab.h> | 14 | #include <linux/slab.h> |
15 | #include <linux/anon_inodes.h> | 15 | #include <linux/anon_inodes.h> |
16 | #include <linux/file.h> | 16 | #include <linux/file.h> |
17 | #include <linux/license.h> | 17 | #include <linux/license.h> |
18 | #include <linux/filter.h> | 18 | #include <linux/filter.h> |
19 | 19 | ||
20 | static LIST_HEAD(bpf_map_types); | 20 | static LIST_HEAD(bpf_map_types); |
21 | 21 | ||
22 | static struct bpf_map *find_and_alloc_map(union bpf_attr *attr) | 22 | static struct bpf_map *find_and_alloc_map(union bpf_attr *attr) |
23 | { | 23 | { |
24 | struct bpf_map_type_list *tl; | 24 | struct bpf_map_type_list *tl; |
25 | struct bpf_map *map; | 25 | struct bpf_map *map; |
26 | 26 | ||
27 | list_for_each_entry(tl, &bpf_map_types, list_node) { | 27 | list_for_each_entry(tl, &bpf_map_types, list_node) { |
28 | if (tl->type == attr->map_type) { | 28 | if (tl->type == attr->map_type) { |
29 | map = tl->ops->map_alloc(attr); | 29 | map = tl->ops->map_alloc(attr); |
30 | if (IS_ERR(map)) | 30 | if (IS_ERR(map)) |
31 | return map; | 31 | return map; |
32 | map->ops = tl->ops; | 32 | map->ops = tl->ops; |
33 | map->map_type = attr->map_type; | 33 | map->map_type = attr->map_type; |
34 | return map; | 34 | return map; |
35 | } | 35 | } |
36 | } | 36 | } |
37 | return ERR_PTR(-EINVAL); | 37 | return ERR_PTR(-EINVAL); |
38 | } | 38 | } |
39 | 39 | ||
40 | /* boot time registration of different map implementations */ | 40 | /* boot time registration of different map implementations */ |
41 | void bpf_register_map_type(struct bpf_map_type_list *tl) | 41 | void bpf_register_map_type(struct bpf_map_type_list *tl) |
42 | { | 42 | { |
43 | list_add(&tl->list_node, &bpf_map_types); | 43 | list_add(&tl->list_node, &bpf_map_types); |
44 | } | 44 | } |
45 | 45 | ||
46 | /* called from workqueue */ | 46 | /* called from workqueue */ |
47 | static void bpf_map_free_deferred(struct work_struct *work) | 47 | static void bpf_map_free_deferred(struct work_struct *work) |
48 | { | 48 | { |
49 | struct bpf_map *map = container_of(work, struct bpf_map, work); | 49 | struct bpf_map *map = container_of(work, struct bpf_map, work); |
50 | 50 | ||
51 | /* implementation dependent freeing */ | 51 | /* implementation dependent freeing */ |
52 | map->ops->map_free(map); | 52 | map->ops->map_free(map); |
53 | } | 53 | } |
54 | 54 | ||
55 | /* decrement map refcnt and schedule it for freeing via workqueue | 55 | /* decrement map refcnt and schedule it for freeing via workqueue |
56 | * (unrelying map implementation ops->map_free() might sleep) | 56 | * (unrelying map implementation ops->map_free() might sleep) |
57 | */ | 57 | */ |
58 | void bpf_map_put(struct bpf_map *map) | 58 | void bpf_map_put(struct bpf_map *map) |
59 | { | 59 | { |
60 | if (atomic_dec_and_test(&map->refcnt)) { | 60 | if (atomic_dec_and_test(&map->refcnt)) { |
61 | INIT_WORK(&map->work, bpf_map_free_deferred); | 61 | INIT_WORK(&map->work, bpf_map_free_deferred); |
62 | schedule_work(&map->work); | 62 | schedule_work(&map->work); |
63 | } | 63 | } |
64 | } | 64 | } |
65 | 65 | ||
66 | static int bpf_map_release(struct inode *inode, struct file *filp) | 66 | static int bpf_map_release(struct inode *inode, struct file *filp) |
67 | { | 67 | { |
68 | struct bpf_map *map = filp->private_data; | 68 | struct bpf_map *map = filp->private_data; |
69 | 69 | ||
70 | bpf_map_put(map); | 70 | bpf_map_put(map); |
71 | return 0; | 71 | return 0; |
72 | } | 72 | } |
73 | 73 | ||
74 | static const struct file_operations bpf_map_fops = { | 74 | static const struct file_operations bpf_map_fops = { |
75 | .release = bpf_map_release, | 75 | .release = bpf_map_release, |
76 | }; | 76 | }; |
77 | 77 | ||
78 | /* helper macro to check that unused fields 'union bpf_attr' are zero */ | 78 | /* helper macro to check that unused fields 'union bpf_attr' are zero */ |
79 | #define CHECK_ATTR(CMD) \ | 79 | #define CHECK_ATTR(CMD) \ |
80 | memchr_inv((void *) &attr->CMD##_LAST_FIELD + \ | 80 | memchr_inv((void *) &attr->CMD##_LAST_FIELD + \ |
81 | sizeof(attr->CMD##_LAST_FIELD), 0, \ | 81 | sizeof(attr->CMD##_LAST_FIELD), 0, \ |
82 | sizeof(*attr) - \ | 82 | sizeof(*attr) - \ |
83 | offsetof(union bpf_attr, CMD##_LAST_FIELD) - \ | 83 | offsetof(union bpf_attr, CMD##_LAST_FIELD) - \ |
84 | sizeof(attr->CMD##_LAST_FIELD)) != NULL | 84 | sizeof(attr->CMD##_LAST_FIELD)) != NULL |
85 | 85 | ||
86 | #define BPF_MAP_CREATE_LAST_FIELD max_entries | 86 | #define BPF_MAP_CREATE_LAST_FIELD max_entries |
87 | /* called via syscall */ | 87 | /* called via syscall */ |
88 | static int map_create(union bpf_attr *attr) | 88 | static int map_create(union bpf_attr *attr) |
89 | { | 89 | { |
90 | struct bpf_map *map; | 90 | struct bpf_map *map; |
91 | int err; | 91 | int err; |
92 | 92 | ||
93 | err = CHECK_ATTR(BPF_MAP_CREATE); | 93 | err = CHECK_ATTR(BPF_MAP_CREATE); |
94 | if (err) | 94 | if (err) |
95 | return -EINVAL; | 95 | return -EINVAL; |
96 | 96 | ||
97 | /* find map type and init map: hashtable vs rbtree vs bloom vs ... */ | 97 | /* find map type and init map: hashtable vs rbtree vs bloom vs ... */ |
98 | map = find_and_alloc_map(attr); | 98 | map = find_and_alloc_map(attr); |
99 | if (IS_ERR(map)) | 99 | if (IS_ERR(map)) |
100 | return PTR_ERR(map); | 100 | return PTR_ERR(map); |
101 | 101 | ||
102 | atomic_set(&map->refcnt, 1); | 102 | atomic_set(&map->refcnt, 1); |
103 | 103 | ||
104 | err = anon_inode_getfd("bpf-map", &bpf_map_fops, map, O_RDWR | O_CLOEXEC); | 104 | err = anon_inode_getfd("bpf-map", &bpf_map_fops, map, O_RDWR | O_CLOEXEC); |
105 | 105 | ||
106 | if (err < 0) | 106 | if (err < 0) |
107 | /* failed to allocate fd */ | 107 | /* failed to allocate fd */ |
108 | goto free_map; | 108 | goto free_map; |
109 | 109 | ||
110 | return err; | 110 | return err; |
111 | 111 | ||
112 | free_map: | 112 | free_map: |
113 | map->ops->map_free(map); | 113 | map->ops->map_free(map); |
114 | return err; | 114 | return err; |
115 | } | 115 | } |
116 | 116 | ||
117 | /* if error is returned, fd is released. | 117 | /* if error is returned, fd is released. |
118 | * On success caller should complete fd access with matching fdput() | 118 | * On success caller should complete fd access with matching fdput() |
119 | */ | 119 | */ |
120 | struct bpf_map *bpf_map_get(struct fd f) | 120 | struct bpf_map *bpf_map_get(struct fd f) |
121 | { | 121 | { |
122 | struct bpf_map *map; | 122 | struct bpf_map *map; |
123 | 123 | ||
124 | if (!f.file) | 124 | if (!f.file) |
125 | return ERR_PTR(-EBADF); | 125 | return ERR_PTR(-EBADF); |
126 | 126 | ||
127 | if (f.file->f_op != &bpf_map_fops) { | 127 | if (f.file->f_op != &bpf_map_fops) { |
128 | fdput(f); | 128 | fdput(f); |
129 | return ERR_PTR(-EINVAL); | 129 | return ERR_PTR(-EINVAL); |
130 | } | 130 | } |
131 | 131 | ||
132 | map = f.file->private_data; | 132 | map = f.file->private_data; |
133 | 133 | ||
134 | return map; | 134 | return map; |
135 | } | 135 | } |
136 | 136 | ||
137 | /* helper to convert user pointers passed inside __aligned_u64 fields */ | 137 | /* helper to convert user pointers passed inside __aligned_u64 fields */ |
138 | static void __user *u64_to_ptr(__u64 val) | 138 | static void __user *u64_to_ptr(__u64 val) |
139 | { | 139 | { |
140 | return (void __user *) (unsigned long) val; | 140 | return (void __user *) (unsigned long) val; |
141 | } | 141 | } |
142 | 142 | ||
143 | /* last field in 'union bpf_attr' used by this command */ | 143 | /* last field in 'union bpf_attr' used by this command */ |
144 | #define BPF_MAP_LOOKUP_ELEM_LAST_FIELD value | 144 | #define BPF_MAP_LOOKUP_ELEM_LAST_FIELD value |
145 | 145 | ||
146 | static int map_lookup_elem(union bpf_attr *attr) | 146 | static int map_lookup_elem(union bpf_attr *attr) |
147 | { | 147 | { |
148 | void __user *ukey = u64_to_ptr(attr->key); | 148 | void __user *ukey = u64_to_ptr(attr->key); |
149 | void __user *uvalue = u64_to_ptr(attr->value); | 149 | void __user *uvalue = u64_to_ptr(attr->value); |
150 | int ufd = attr->map_fd; | 150 | int ufd = attr->map_fd; |
151 | struct fd f = fdget(ufd); | 151 | struct fd f = fdget(ufd); |
152 | struct bpf_map *map; | 152 | struct bpf_map *map; |
153 | void *key, *value; | 153 | void *key, *value, *ptr; |
154 | int err; | 154 | int err; |
155 | 155 | ||
156 | if (CHECK_ATTR(BPF_MAP_LOOKUP_ELEM)) | 156 | if (CHECK_ATTR(BPF_MAP_LOOKUP_ELEM)) |
157 | return -EINVAL; | 157 | return -EINVAL; |
158 | 158 | ||
159 | map = bpf_map_get(f); | 159 | map = bpf_map_get(f); |
160 | if (IS_ERR(map)) | 160 | if (IS_ERR(map)) |
161 | return PTR_ERR(map); | 161 | return PTR_ERR(map); |
162 | 162 | ||
163 | err = -ENOMEM; | 163 | err = -ENOMEM; |
164 | key = kmalloc(map->key_size, GFP_USER); | 164 | key = kmalloc(map->key_size, GFP_USER); |
165 | if (!key) | 165 | if (!key) |
166 | goto err_put; | 166 | goto err_put; |
167 | 167 | ||
168 | err = -EFAULT; | 168 | err = -EFAULT; |
169 | if (copy_from_user(key, ukey, map->key_size) != 0) | 169 | if (copy_from_user(key, ukey, map->key_size) != 0) |
170 | goto free_key; | 170 | goto free_key; |
171 | 171 | ||
172 | err = -ENOENT; | 172 | err = -ENOMEM; |
173 | rcu_read_lock(); | 173 | value = kmalloc(map->value_size, GFP_USER); |
174 | value = map->ops->map_lookup_elem(map, key); | ||
175 | if (!value) | 174 | if (!value) |
176 | goto err_unlock; | 175 | goto free_key; |
177 | 176 | ||
177 | rcu_read_lock(); | ||
178 | ptr = map->ops->map_lookup_elem(map, key); | ||
179 | if (ptr) | ||
180 | memcpy(value, ptr, map->value_size); | ||
181 | rcu_read_unlock(); | ||
182 | |||
183 | err = -ENOENT; | ||
184 | if (!ptr) | ||
185 | goto free_value; | ||
186 | |||
178 | err = -EFAULT; | 187 | err = -EFAULT; |
179 | if (copy_to_user(uvalue, value, map->value_size) != 0) | 188 | if (copy_to_user(uvalue, value, map->value_size) != 0) |
180 | goto err_unlock; | 189 | goto free_value; |
181 | 190 | ||
182 | err = 0; | 191 | err = 0; |
183 | 192 | ||
184 | err_unlock: | 193 | free_value: |
185 | rcu_read_unlock(); | 194 | kfree(value); |
186 | free_key: | 195 | free_key: |
187 | kfree(key); | 196 | kfree(key); |
188 | err_put: | 197 | err_put: |
189 | fdput(f); | 198 | fdput(f); |
190 | return err; | 199 | return err; |
191 | } | 200 | } |
192 | 201 | ||
193 | #define BPF_MAP_UPDATE_ELEM_LAST_FIELD flags | 202 | #define BPF_MAP_UPDATE_ELEM_LAST_FIELD flags |
194 | 203 | ||
195 | static int map_update_elem(union bpf_attr *attr) | 204 | static int map_update_elem(union bpf_attr *attr) |
196 | { | 205 | { |
197 | void __user *ukey = u64_to_ptr(attr->key); | 206 | void __user *ukey = u64_to_ptr(attr->key); |
198 | void __user *uvalue = u64_to_ptr(attr->value); | 207 | void __user *uvalue = u64_to_ptr(attr->value); |
199 | int ufd = attr->map_fd; | 208 | int ufd = attr->map_fd; |
200 | struct fd f = fdget(ufd); | 209 | struct fd f = fdget(ufd); |
201 | struct bpf_map *map; | 210 | struct bpf_map *map; |
202 | void *key, *value; | 211 | void *key, *value; |
203 | int err; | 212 | int err; |
204 | 213 | ||
205 | if (CHECK_ATTR(BPF_MAP_UPDATE_ELEM)) | 214 | if (CHECK_ATTR(BPF_MAP_UPDATE_ELEM)) |
206 | return -EINVAL; | 215 | return -EINVAL; |
207 | 216 | ||
208 | map = bpf_map_get(f); | 217 | map = bpf_map_get(f); |
209 | if (IS_ERR(map)) | 218 | if (IS_ERR(map)) |
210 | return PTR_ERR(map); | 219 | return PTR_ERR(map); |
211 | 220 | ||
212 | err = -ENOMEM; | 221 | err = -ENOMEM; |
213 | key = kmalloc(map->key_size, GFP_USER); | 222 | key = kmalloc(map->key_size, GFP_USER); |
214 | if (!key) | 223 | if (!key) |
215 | goto err_put; | 224 | goto err_put; |
216 | 225 | ||
217 | err = -EFAULT; | 226 | err = -EFAULT; |
218 | if (copy_from_user(key, ukey, map->key_size) != 0) | 227 | if (copy_from_user(key, ukey, map->key_size) != 0) |
219 | goto free_key; | 228 | goto free_key; |
220 | 229 | ||
221 | err = -ENOMEM; | 230 | err = -ENOMEM; |
222 | value = kmalloc(map->value_size, GFP_USER); | 231 | value = kmalloc(map->value_size, GFP_USER); |
223 | if (!value) | 232 | if (!value) |
224 | goto free_key; | 233 | goto free_key; |
225 | 234 | ||
226 | err = -EFAULT; | 235 | err = -EFAULT; |
227 | if (copy_from_user(value, uvalue, map->value_size) != 0) | 236 | if (copy_from_user(value, uvalue, map->value_size) != 0) |
228 | goto free_value; | 237 | goto free_value; |
229 | 238 | ||
230 | /* eBPF program that use maps are running under rcu_read_lock(), | 239 | /* eBPF program that use maps are running under rcu_read_lock(), |
231 | * therefore all map accessors rely on this fact, so do the same here | 240 | * therefore all map accessors rely on this fact, so do the same here |
232 | */ | 241 | */ |
233 | rcu_read_lock(); | 242 | rcu_read_lock(); |
234 | err = map->ops->map_update_elem(map, key, value, attr->flags); | 243 | err = map->ops->map_update_elem(map, key, value, attr->flags); |
235 | rcu_read_unlock(); | 244 | rcu_read_unlock(); |
236 | 245 | ||
237 | free_value: | 246 | free_value: |
238 | kfree(value); | 247 | kfree(value); |
239 | free_key: | 248 | free_key: |
240 | kfree(key); | 249 | kfree(key); |
241 | err_put: | 250 | err_put: |
242 | fdput(f); | 251 | fdput(f); |
243 | return err; | 252 | return err; |
244 | } | 253 | } |
245 | 254 | ||
246 | #define BPF_MAP_DELETE_ELEM_LAST_FIELD key | 255 | #define BPF_MAP_DELETE_ELEM_LAST_FIELD key |
247 | 256 | ||
248 | static int map_delete_elem(union bpf_attr *attr) | 257 | static int map_delete_elem(union bpf_attr *attr) |
249 | { | 258 | { |
250 | void __user *ukey = u64_to_ptr(attr->key); | 259 | void __user *ukey = u64_to_ptr(attr->key); |
251 | int ufd = attr->map_fd; | 260 | int ufd = attr->map_fd; |
252 | struct fd f = fdget(ufd); | 261 | struct fd f = fdget(ufd); |
253 | struct bpf_map *map; | 262 | struct bpf_map *map; |
254 | void *key; | 263 | void *key; |
255 | int err; | 264 | int err; |
256 | 265 | ||
257 | if (CHECK_ATTR(BPF_MAP_DELETE_ELEM)) | 266 | if (CHECK_ATTR(BPF_MAP_DELETE_ELEM)) |
258 | return -EINVAL; | 267 | return -EINVAL; |
259 | 268 | ||
260 | map = bpf_map_get(f); | 269 | map = bpf_map_get(f); |
261 | if (IS_ERR(map)) | 270 | if (IS_ERR(map)) |
262 | return PTR_ERR(map); | 271 | return PTR_ERR(map); |
263 | 272 | ||
264 | err = -ENOMEM; | 273 | err = -ENOMEM; |
265 | key = kmalloc(map->key_size, GFP_USER); | 274 | key = kmalloc(map->key_size, GFP_USER); |
266 | if (!key) | 275 | if (!key) |
267 | goto err_put; | 276 | goto err_put; |
268 | 277 | ||
269 | err = -EFAULT; | 278 | err = -EFAULT; |
270 | if (copy_from_user(key, ukey, map->key_size) != 0) | 279 | if (copy_from_user(key, ukey, map->key_size) != 0) |
271 | goto free_key; | 280 | goto free_key; |
272 | 281 | ||
273 | rcu_read_lock(); | 282 | rcu_read_lock(); |
274 | err = map->ops->map_delete_elem(map, key); | 283 | err = map->ops->map_delete_elem(map, key); |
275 | rcu_read_unlock(); | 284 | rcu_read_unlock(); |
276 | 285 | ||
277 | free_key: | 286 | free_key: |
278 | kfree(key); | 287 | kfree(key); |
279 | err_put: | 288 | err_put: |
280 | fdput(f); | 289 | fdput(f); |
281 | return err; | 290 | return err; |
282 | } | 291 | } |
283 | 292 | ||
284 | /* last field in 'union bpf_attr' used by this command */ | 293 | /* last field in 'union bpf_attr' used by this command */ |
285 | #define BPF_MAP_GET_NEXT_KEY_LAST_FIELD next_key | 294 | #define BPF_MAP_GET_NEXT_KEY_LAST_FIELD next_key |
286 | 295 | ||
287 | static int map_get_next_key(union bpf_attr *attr) | 296 | static int map_get_next_key(union bpf_attr *attr) |
288 | { | 297 | { |
289 | void __user *ukey = u64_to_ptr(attr->key); | 298 | void __user *ukey = u64_to_ptr(attr->key); |
290 | void __user *unext_key = u64_to_ptr(attr->next_key); | 299 | void __user *unext_key = u64_to_ptr(attr->next_key); |
291 | int ufd = attr->map_fd; | 300 | int ufd = attr->map_fd; |
292 | struct fd f = fdget(ufd); | 301 | struct fd f = fdget(ufd); |
293 | struct bpf_map *map; | 302 | struct bpf_map *map; |
294 | void *key, *next_key; | 303 | void *key, *next_key; |
295 | int err; | 304 | int err; |
296 | 305 | ||
297 | if (CHECK_ATTR(BPF_MAP_GET_NEXT_KEY)) | 306 | if (CHECK_ATTR(BPF_MAP_GET_NEXT_KEY)) |
298 | return -EINVAL; | 307 | return -EINVAL; |
299 | 308 | ||
300 | map = bpf_map_get(f); | 309 | map = bpf_map_get(f); |
301 | if (IS_ERR(map)) | 310 | if (IS_ERR(map)) |
302 | return PTR_ERR(map); | 311 | return PTR_ERR(map); |
303 | 312 | ||
304 | err = -ENOMEM; | 313 | err = -ENOMEM; |
305 | key = kmalloc(map->key_size, GFP_USER); | 314 | key = kmalloc(map->key_size, GFP_USER); |
306 | if (!key) | 315 | if (!key) |
307 | goto err_put; | 316 | goto err_put; |
308 | 317 | ||
309 | err = -EFAULT; | 318 | err = -EFAULT; |
310 | if (copy_from_user(key, ukey, map->key_size) != 0) | 319 | if (copy_from_user(key, ukey, map->key_size) != 0) |
311 | goto free_key; | 320 | goto free_key; |
312 | 321 | ||
313 | err = -ENOMEM; | 322 | err = -ENOMEM; |
314 | next_key = kmalloc(map->key_size, GFP_USER); | 323 | next_key = kmalloc(map->key_size, GFP_USER); |
315 | if (!next_key) | 324 | if (!next_key) |
316 | goto free_key; | 325 | goto free_key; |
317 | 326 | ||
318 | rcu_read_lock(); | 327 | rcu_read_lock(); |
319 | err = map->ops->map_get_next_key(map, key, next_key); | 328 | err = map->ops->map_get_next_key(map, key, next_key); |
320 | rcu_read_unlock(); | 329 | rcu_read_unlock(); |
321 | if (err) | 330 | if (err) |
322 | goto free_next_key; | 331 | goto free_next_key; |
323 | 332 | ||
324 | err = -EFAULT; | 333 | err = -EFAULT; |
325 | if (copy_to_user(unext_key, next_key, map->key_size) != 0) | 334 | if (copy_to_user(unext_key, next_key, map->key_size) != 0) |
326 | goto free_next_key; | 335 | goto free_next_key; |
327 | 336 | ||
328 | err = 0; | 337 | err = 0; |
329 | 338 | ||
330 | free_next_key: | 339 | free_next_key: |
331 | kfree(next_key); | 340 | kfree(next_key); |
332 | free_key: | 341 | free_key: |
333 | kfree(key); | 342 | kfree(key); |
334 | err_put: | 343 | err_put: |
335 | fdput(f); | 344 | fdput(f); |
336 | return err; | 345 | return err; |
337 | } | 346 | } |
338 | 347 | ||
339 | static LIST_HEAD(bpf_prog_types); | 348 | static LIST_HEAD(bpf_prog_types); |
340 | 349 | ||
341 | static int find_prog_type(enum bpf_prog_type type, struct bpf_prog *prog) | 350 | static int find_prog_type(enum bpf_prog_type type, struct bpf_prog *prog) |
342 | { | 351 | { |
343 | struct bpf_prog_type_list *tl; | 352 | struct bpf_prog_type_list *tl; |
344 | 353 | ||
345 | list_for_each_entry(tl, &bpf_prog_types, list_node) { | 354 | list_for_each_entry(tl, &bpf_prog_types, list_node) { |
346 | if (tl->type == type) { | 355 | if (tl->type == type) { |
347 | prog->aux->ops = tl->ops; | 356 | prog->aux->ops = tl->ops; |
348 | prog->aux->prog_type = type; | 357 | prog->aux->prog_type = type; |
349 | return 0; | 358 | return 0; |
350 | } | 359 | } |
351 | } | 360 | } |
352 | return -EINVAL; | 361 | return -EINVAL; |
353 | } | 362 | } |
354 | 363 | ||
355 | void bpf_register_prog_type(struct bpf_prog_type_list *tl) | 364 | void bpf_register_prog_type(struct bpf_prog_type_list *tl) |
356 | { | 365 | { |
357 | list_add(&tl->list_node, &bpf_prog_types); | 366 | list_add(&tl->list_node, &bpf_prog_types); |
358 | } | 367 | } |
359 | 368 | ||
360 | /* fixup insn->imm field of bpf_call instructions: | 369 | /* fixup insn->imm field of bpf_call instructions: |
361 | * if (insn->imm == BPF_FUNC_map_lookup_elem) | 370 | * if (insn->imm == BPF_FUNC_map_lookup_elem) |
362 | * insn->imm = bpf_map_lookup_elem - __bpf_call_base; | 371 | * insn->imm = bpf_map_lookup_elem - __bpf_call_base; |
363 | * else if (insn->imm == BPF_FUNC_map_update_elem) | 372 | * else if (insn->imm == BPF_FUNC_map_update_elem) |
364 | * insn->imm = bpf_map_update_elem - __bpf_call_base; | 373 | * insn->imm = bpf_map_update_elem - __bpf_call_base; |
365 | * else ... | 374 | * else ... |
366 | * | 375 | * |
367 | * this function is called after eBPF program passed verification | 376 | * this function is called after eBPF program passed verification |
368 | */ | 377 | */ |
369 | static void fixup_bpf_calls(struct bpf_prog *prog) | 378 | static void fixup_bpf_calls(struct bpf_prog *prog) |
370 | { | 379 | { |
371 | const struct bpf_func_proto *fn; | 380 | const struct bpf_func_proto *fn; |
372 | int i; | 381 | int i; |
373 | 382 | ||
374 | for (i = 0; i < prog->len; i++) { | 383 | for (i = 0; i < prog->len; i++) { |
375 | struct bpf_insn *insn = &prog->insnsi[i]; | 384 | struct bpf_insn *insn = &prog->insnsi[i]; |
376 | 385 | ||
377 | if (insn->code == (BPF_JMP | BPF_CALL)) { | 386 | if (insn->code == (BPF_JMP | BPF_CALL)) { |
378 | /* we reach here when program has bpf_call instructions | 387 | /* we reach here when program has bpf_call instructions |
379 | * and it passed bpf_check(), means that | 388 | * and it passed bpf_check(), means that |
380 | * ops->get_func_proto must have been supplied, check it | 389 | * ops->get_func_proto must have been supplied, check it |
381 | */ | 390 | */ |
382 | BUG_ON(!prog->aux->ops->get_func_proto); | 391 | BUG_ON(!prog->aux->ops->get_func_proto); |
383 | 392 | ||
384 | fn = prog->aux->ops->get_func_proto(insn->imm); | 393 | fn = prog->aux->ops->get_func_proto(insn->imm); |
385 | /* all functions that have prototype and verifier allowed | 394 | /* all functions that have prototype and verifier allowed |
386 | * programs to call them, must be real in-kernel functions | 395 | * programs to call them, must be real in-kernel functions |
387 | */ | 396 | */ |
388 | BUG_ON(!fn->func); | 397 | BUG_ON(!fn->func); |
389 | insn->imm = fn->func - __bpf_call_base; | 398 | insn->imm = fn->func - __bpf_call_base; |
390 | } | 399 | } |
391 | } | 400 | } |
392 | } | 401 | } |
393 | 402 | ||
394 | /* drop refcnt on maps used by eBPF program and free auxilary data */ | 403 | /* drop refcnt on maps used by eBPF program and free auxilary data */ |
395 | static void free_used_maps(struct bpf_prog_aux *aux) | 404 | static void free_used_maps(struct bpf_prog_aux *aux) |
396 | { | 405 | { |
397 | int i; | 406 | int i; |
398 | 407 | ||
399 | for (i = 0; i < aux->used_map_cnt; i++) | 408 | for (i = 0; i < aux->used_map_cnt; i++) |
400 | bpf_map_put(aux->used_maps[i]); | 409 | bpf_map_put(aux->used_maps[i]); |
401 | 410 | ||
402 | kfree(aux->used_maps); | 411 | kfree(aux->used_maps); |
403 | } | 412 | } |
404 | 413 | ||
405 | void bpf_prog_put(struct bpf_prog *prog) | 414 | void bpf_prog_put(struct bpf_prog *prog) |
406 | { | 415 | { |
407 | if (atomic_dec_and_test(&prog->aux->refcnt)) { | 416 | if (atomic_dec_and_test(&prog->aux->refcnt)) { |
408 | free_used_maps(prog->aux); | 417 | free_used_maps(prog->aux); |
409 | bpf_prog_free(prog); | 418 | bpf_prog_free(prog); |
410 | } | 419 | } |
411 | } | 420 | } |
412 | 421 | ||
413 | static int bpf_prog_release(struct inode *inode, struct file *filp) | 422 | static int bpf_prog_release(struct inode *inode, struct file *filp) |
414 | { | 423 | { |
415 | struct bpf_prog *prog = filp->private_data; | 424 | struct bpf_prog *prog = filp->private_data; |
416 | 425 | ||
417 | bpf_prog_put(prog); | 426 | bpf_prog_put(prog); |
418 | return 0; | 427 | return 0; |
419 | } | 428 | } |
420 | 429 | ||
421 | static const struct file_operations bpf_prog_fops = { | 430 | static const struct file_operations bpf_prog_fops = { |
422 | .release = bpf_prog_release, | 431 | .release = bpf_prog_release, |
423 | }; | 432 | }; |
424 | 433 | ||
425 | static struct bpf_prog *get_prog(struct fd f) | 434 | static struct bpf_prog *get_prog(struct fd f) |
426 | { | 435 | { |
427 | struct bpf_prog *prog; | 436 | struct bpf_prog *prog; |
428 | 437 | ||
429 | if (!f.file) | 438 | if (!f.file) |
430 | return ERR_PTR(-EBADF); | 439 | return ERR_PTR(-EBADF); |
431 | 440 | ||
432 | if (f.file->f_op != &bpf_prog_fops) { | 441 | if (f.file->f_op != &bpf_prog_fops) { |
433 | fdput(f); | 442 | fdput(f); |
434 | return ERR_PTR(-EINVAL); | 443 | return ERR_PTR(-EINVAL); |
435 | } | 444 | } |
436 | 445 | ||
437 | prog = f.file->private_data; | 446 | prog = f.file->private_data; |
438 | 447 | ||
439 | return prog; | 448 | return prog; |
440 | } | 449 | } |
441 | 450 | ||
442 | /* called by sockets/tracing/seccomp before attaching program to an event | 451 | /* called by sockets/tracing/seccomp before attaching program to an event |
443 | * pairs with bpf_prog_put() | 452 | * pairs with bpf_prog_put() |
444 | */ | 453 | */ |
445 | struct bpf_prog *bpf_prog_get(u32 ufd) | 454 | struct bpf_prog *bpf_prog_get(u32 ufd) |
446 | { | 455 | { |
447 | struct fd f = fdget(ufd); | 456 | struct fd f = fdget(ufd); |
448 | struct bpf_prog *prog; | 457 | struct bpf_prog *prog; |
449 | 458 | ||
450 | prog = get_prog(f); | 459 | prog = get_prog(f); |
451 | 460 | ||
452 | if (IS_ERR(prog)) | 461 | if (IS_ERR(prog)) |
453 | return prog; | 462 | return prog; |
454 | 463 | ||
455 | atomic_inc(&prog->aux->refcnt); | 464 | atomic_inc(&prog->aux->refcnt); |
456 | fdput(f); | 465 | fdput(f); |
457 | return prog; | 466 | return prog; |
458 | } | 467 | } |
459 | 468 | ||
460 | /* last field in 'union bpf_attr' used by this command */ | 469 | /* last field in 'union bpf_attr' used by this command */ |
461 | #define BPF_PROG_LOAD_LAST_FIELD log_buf | 470 | #define BPF_PROG_LOAD_LAST_FIELD log_buf |
462 | 471 | ||
463 | static int bpf_prog_load(union bpf_attr *attr) | 472 | static int bpf_prog_load(union bpf_attr *attr) |
464 | { | 473 | { |
465 | enum bpf_prog_type type = attr->prog_type; | 474 | enum bpf_prog_type type = attr->prog_type; |
466 | struct bpf_prog *prog; | 475 | struct bpf_prog *prog; |
467 | int err; | 476 | int err; |
468 | char license[128]; | 477 | char license[128]; |
469 | bool is_gpl; | 478 | bool is_gpl; |
470 | 479 | ||
471 | if (CHECK_ATTR(BPF_PROG_LOAD)) | 480 | if (CHECK_ATTR(BPF_PROG_LOAD)) |
472 | return -EINVAL; | 481 | return -EINVAL; |
473 | 482 | ||
474 | /* copy eBPF program license from user space */ | 483 | /* copy eBPF program license from user space */ |
475 | if (strncpy_from_user(license, u64_to_ptr(attr->license), | 484 | if (strncpy_from_user(license, u64_to_ptr(attr->license), |
476 | sizeof(license) - 1) < 0) | 485 | sizeof(license) - 1) < 0) |
477 | return -EFAULT; | 486 | return -EFAULT; |
478 | license[sizeof(license) - 1] = 0; | 487 | license[sizeof(license) - 1] = 0; |
479 | 488 | ||
480 | /* eBPF programs must be GPL compatible to use GPL-ed functions */ | 489 | /* eBPF programs must be GPL compatible to use GPL-ed functions */ |
481 | is_gpl = license_is_gpl_compatible(license); | 490 | is_gpl = license_is_gpl_compatible(license); |
482 | 491 | ||
483 | if (attr->insn_cnt >= BPF_MAXINSNS) | 492 | if (attr->insn_cnt >= BPF_MAXINSNS) |
484 | return -EINVAL; | 493 | return -EINVAL; |
485 | 494 | ||
486 | /* plain bpf_prog allocation */ | 495 | /* plain bpf_prog allocation */ |
487 | prog = bpf_prog_alloc(bpf_prog_size(attr->insn_cnt), GFP_USER); | 496 | prog = bpf_prog_alloc(bpf_prog_size(attr->insn_cnt), GFP_USER); |
488 | if (!prog) | 497 | if (!prog) |
489 | return -ENOMEM; | 498 | return -ENOMEM; |
490 | 499 | ||
491 | prog->len = attr->insn_cnt; | 500 | prog->len = attr->insn_cnt; |
492 | 501 | ||
493 | err = -EFAULT; | 502 | err = -EFAULT; |
494 | if (copy_from_user(prog->insns, u64_to_ptr(attr->insns), | 503 | if (copy_from_user(prog->insns, u64_to_ptr(attr->insns), |
495 | prog->len * sizeof(struct bpf_insn)) != 0) | 504 | prog->len * sizeof(struct bpf_insn)) != 0) |
496 | goto free_prog; | 505 | goto free_prog; |
497 | 506 | ||
498 | prog->orig_prog = NULL; | 507 | prog->orig_prog = NULL; |
499 | prog->jited = false; | 508 | prog->jited = false; |
500 | 509 | ||
501 | atomic_set(&prog->aux->refcnt, 1); | 510 | atomic_set(&prog->aux->refcnt, 1); |
502 | prog->aux->is_gpl_compatible = is_gpl; | 511 | prog->aux->is_gpl_compatible = is_gpl; |
503 | 512 | ||
504 | /* find program type: socket_filter vs tracing_filter */ | 513 | /* find program type: socket_filter vs tracing_filter */ |
505 | err = find_prog_type(type, prog); | 514 | err = find_prog_type(type, prog); |
506 | if (err < 0) | 515 | if (err < 0) |
507 | goto free_prog; | 516 | goto free_prog; |
508 | 517 | ||
509 | /* run eBPF verifier */ | 518 | /* run eBPF verifier */ |
510 | err = bpf_check(prog, attr); | 519 | err = bpf_check(prog, attr); |
511 | 520 | ||
512 | if (err < 0) | 521 | if (err < 0) |
513 | goto free_used_maps; | 522 | goto free_used_maps; |
514 | 523 | ||
515 | /* fixup BPF_CALL->imm field */ | 524 | /* fixup BPF_CALL->imm field */ |
516 | fixup_bpf_calls(prog); | 525 | fixup_bpf_calls(prog); |
517 | 526 | ||
518 | /* eBPF program is ready to be JITed */ | 527 | /* eBPF program is ready to be JITed */ |
519 | bpf_prog_select_runtime(prog); | 528 | bpf_prog_select_runtime(prog); |
520 | 529 | ||
521 | err = anon_inode_getfd("bpf-prog", &bpf_prog_fops, prog, O_RDWR | O_CLOEXEC); | 530 | err = anon_inode_getfd("bpf-prog", &bpf_prog_fops, prog, O_RDWR | O_CLOEXEC); |
522 | 531 | ||
523 | if (err < 0) | 532 | if (err < 0) |
524 | /* failed to allocate fd */ | 533 | /* failed to allocate fd */ |
525 | goto free_used_maps; | 534 | goto free_used_maps; |
526 | 535 | ||
527 | return err; | 536 | return err; |
528 | 537 | ||
529 | free_used_maps: | 538 | free_used_maps: |
530 | free_used_maps(prog->aux); | 539 | free_used_maps(prog->aux); |
531 | free_prog: | 540 | free_prog: |
532 | bpf_prog_free(prog); | 541 | bpf_prog_free(prog); |
533 | return err; | 542 | return err; |
534 | } | 543 | } |
535 | 544 | ||
536 | SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size) | 545 | SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size) |
537 | { | 546 | { |
538 | union bpf_attr attr = {}; | 547 | union bpf_attr attr = {}; |
539 | int err; | 548 | int err; |
540 | 549 | ||
541 | /* the syscall is limited to root temporarily. This restriction will be | 550 | /* the syscall is limited to root temporarily. This restriction will be |
542 | * lifted when security audit is clean. Note that eBPF+tracing must have | 551 | * lifted when security audit is clean. Note that eBPF+tracing must have |
543 | * this restriction, since it may pass kernel data to user space | 552 | * this restriction, since it may pass kernel data to user space |
544 | */ | 553 | */ |
545 | if (!capable(CAP_SYS_ADMIN)) | 554 | if (!capable(CAP_SYS_ADMIN)) |
546 | return -EPERM; | 555 | return -EPERM; |
547 | 556 | ||
548 | if (!access_ok(VERIFY_READ, uattr, 1)) | 557 | if (!access_ok(VERIFY_READ, uattr, 1)) |
549 | return -EFAULT; | 558 | return -EFAULT; |
550 | 559 | ||
551 | if (size > PAGE_SIZE) /* silly large */ | 560 | if (size > PAGE_SIZE) /* silly large */ |
552 | return -E2BIG; | 561 | return -E2BIG; |
553 | 562 | ||
554 | /* If we're handed a bigger struct than we know of, | 563 | /* If we're handed a bigger struct than we know of, |
555 | * ensure all the unknown bits are 0 - i.e. new | 564 | * ensure all the unknown bits are 0 - i.e. new |
556 | * user-space does not rely on any kernel feature | 565 | * user-space does not rely on any kernel feature |
557 | * extensions we dont know about yet. | 566 | * extensions we dont know about yet. |
558 | */ | 567 | */ |
559 | if (size > sizeof(attr)) { | 568 | if (size > sizeof(attr)) { |
560 | unsigned char __user *addr; | 569 | unsigned char __user *addr; |
561 | unsigned char __user *end; | 570 | unsigned char __user *end; |
562 | unsigned char val; | 571 | unsigned char val; |
563 | 572 | ||
564 | addr = (void __user *)uattr + sizeof(attr); | 573 | addr = (void __user *)uattr + sizeof(attr); |
565 | end = (void __user *)uattr + size; | 574 | end = (void __user *)uattr + size; |
566 | 575 | ||
567 | for (; addr < end; addr++) { | 576 | for (; addr < end; addr++) { |
568 | err = get_user(val, addr); | 577 | err = get_user(val, addr); |
569 | if (err) | 578 | if (err) |
570 | return err; | 579 | return err; |
571 | if (val) | 580 | if (val) |
572 | return -E2BIG; | 581 | return -E2BIG; |
573 | } | 582 | } |
574 | size = sizeof(attr); | 583 | size = sizeof(attr); |
575 | } | 584 | } |
576 | 585 | ||
577 | /* copy attributes from user space, may be less than sizeof(bpf_attr) */ | 586 | /* copy attributes from user space, may be less than sizeof(bpf_attr) */ |
578 | if (copy_from_user(&attr, uattr, size) != 0) | 587 | if (copy_from_user(&attr, uattr, size) != 0) |
579 | return -EFAULT; | 588 | return -EFAULT; |
580 | 589 | ||
581 | switch (cmd) { | 590 | switch (cmd) { |
582 | case BPF_MAP_CREATE: | 591 | case BPF_MAP_CREATE: |
583 | err = map_create(&attr); | 592 | err = map_create(&attr); |
584 | break; | 593 | break; |
585 | case BPF_MAP_LOOKUP_ELEM: | 594 | case BPF_MAP_LOOKUP_ELEM: |
586 | err = map_lookup_elem(&attr); | 595 | err = map_lookup_elem(&attr); |
587 | break; | 596 | break; |
588 | case BPF_MAP_UPDATE_ELEM: | 597 | case BPF_MAP_UPDATE_ELEM: |
589 | err = map_update_elem(&attr); | 598 | err = map_update_elem(&attr); |
590 | break; | 599 | break; |
591 | case BPF_MAP_DELETE_ELEM: | 600 | case BPF_MAP_DELETE_ELEM: |
592 | err = map_delete_elem(&attr); | 601 | err = map_delete_elem(&attr); |
593 | break; | 602 | break; |
594 | case BPF_MAP_GET_NEXT_KEY: | 603 | case BPF_MAP_GET_NEXT_KEY: |
595 | err = map_get_next_key(&attr); | 604 | err = map_get_next_key(&attr); |
596 | break; | 605 | break; |
597 | case BPF_PROG_LOAD: | 606 | case BPF_PROG_LOAD: |
598 | err = bpf_prog_load(&attr); | 607 | err = bpf_prog_load(&attr); |
599 | break; | 608 | break; |
600 | default: | 609 | default: |
601 | err = -EINVAL; | 610 | err = -EINVAL; |
602 | break; | 611 | break; |
603 | } | 612 | } |
604 | 613 | ||
605 | return err; | 614 | return err; |
606 | } | 615 | } |