Commit 8ebe667c41e054384df19f2f382bc415badfaee1

Authored by Alexei Starovoitov
Committed by David S. Miller
1 parent 600ddd6825

bpf: rcu lock must not be held when calling copy_to_user()

BUG: sleeping function called from invalid context at mm/memory.c:3732
in_atomic(): 0, irqs_disabled(): 0, pid: 671, name: test_maps
1 lock held by test_maps/671:
 #0:  (rcu_read_lock){......}, at: [<0000000000264190>] map_lookup_elem+0xe8/0x260
Call Trace:
([<0000000000115b7e>] show_trace+0x12e/0x150)
 [<0000000000115c40>] show_stack+0xa0/0x100
 [<00000000009b163c>] dump_stack+0x74/0xc8
 [<000000000017424a>] ___might_sleep+0x23a/0x248
 [<00000000002b58e8>] might_fault+0x70/0xe8
 [<0000000000264230>] map_lookup_elem+0x188/0x260
 [<0000000000264716>] SyS_bpf+0x20e/0x840

Fix it by allocating temporary buffer to store map element value.

Fixes: db20fd2b0108 ("bpf: add lookup/update/delete/iterate methods to BPF maps")
Reported-by: Michael Holzheu <holzheu@linux.vnet.ibm.com>
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Acked-by: Daniel Borkmann <dborkman@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

Showing 1 changed file with 17 additions and 8 deletions Inline Diff

kernel/bpf/syscall.c
1 /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com 1 /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
2 * 2 *
3 * This program is free software; you can redistribute it and/or 3 * This program is free software; you can redistribute it and/or
4 * modify it under the terms of version 2 of the GNU General Public 4 * modify it under the terms of version 2 of the GNU General Public
5 * License as published by the Free Software Foundation. 5 * License as published by the Free Software Foundation.
6 * 6 *
7 * This program is distributed in the hope that it will be useful, but 7 * This program is distributed in the hope that it will be useful, but
8 * WITHOUT ANY WARRANTY; without even the implied warranty of 8 * WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
10 * General Public License for more details. 10 * General Public License for more details.
11 */ 11 */
12 #include <linux/bpf.h> 12 #include <linux/bpf.h>
13 #include <linux/syscalls.h> 13 #include <linux/syscalls.h>
14 #include <linux/slab.h> 14 #include <linux/slab.h>
15 #include <linux/anon_inodes.h> 15 #include <linux/anon_inodes.h>
16 #include <linux/file.h> 16 #include <linux/file.h>
17 #include <linux/license.h> 17 #include <linux/license.h>
18 #include <linux/filter.h> 18 #include <linux/filter.h>
19 19
20 static LIST_HEAD(bpf_map_types); 20 static LIST_HEAD(bpf_map_types);
21 21
22 static struct bpf_map *find_and_alloc_map(union bpf_attr *attr) 22 static struct bpf_map *find_and_alloc_map(union bpf_attr *attr)
23 { 23 {
24 struct bpf_map_type_list *tl; 24 struct bpf_map_type_list *tl;
25 struct bpf_map *map; 25 struct bpf_map *map;
26 26
27 list_for_each_entry(tl, &bpf_map_types, list_node) { 27 list_for_each_entry(tl, &bpf_map_types, list_node) {
28 if (tl->type == attr->map_type) { 28 if (tl->type == attr->map_type) {
29 map = tl->ops->map_alloc(attr); 29 map = tl->ops->map_alloc(attr);
30 if (IS_ERR(map)) 30 if (IS_ERR(map))
31 return map; 31 return map;
32 map->ops = tl->ops; 32 map->ops = tl->ops;
33 map->map_type = attr->map_type; 33 map->map_type = attr->map_type;
34 return map; 34 return map;
35 } 35 }
36 } 36 }
37 return ERR_PTR(-EINVAL); 37 return ERR_PTR(-EINVAL);
38 } 38 }
39 39
40 /* boot time registration of different map implementations */ 40 /* boot time registration of different map implementations */
41 void bpf_register_map_type(struct bpf_map_type_list *tl) 41 void bpf_register_map_type(struct bpf_map_type_list *tl)
42 { 42 {
43 list_add(&tl->list_node, &bpf_map_types); 43 list_add(&tl->list_node, &bpf_map_types);
44 } 44 }
45 45
46 /* called from workqueue */ 46 /* called from workqueue */
47 static void bpf_map_free_deferred(struct work_struct *work) 47 static void bpf_map_free_deferred(struct work_struct *work)
48 { 48 {
49 struct bpf_map *map = container_of(work, struct bpf_map, work); 49 struct bpf_map *map = container_of(work, struct bpf_map, work);
50 50
51 /* implementation dependent freeing */ 51 /* implementation dependent freeing */
52 map->ops->map_free(map); 52 map->ops->map_free(map);
53 } 53 }
54 54
55 /* decrement map refcnt and schedule it for freeing via workqueue 55 /* decrement map refcnt and schedule it for freeing via workqueue
56 * (unrelying map implementation ops->map_free() might sleep) 56 * (unrelying map implementation ops->map_free() might sleep)
57 */ 57 */
58 void bpf_map_put(struct bpf_map *map) 58 void bpf_map_put(struct bpf_map *map)
59 { 59 {
60 if (atomic_dec_and_test(&map->refcnt)) { 60 if (atomic_dec_and_test(&map->refcnt)) {
61 INIT_WORK(&map->work, bpf_map_free_deferred); 61 INIT_WORK(&map->work, bpf_map_free_deferred);
62 schedule_work(&map->work); 62 schedule_work(&map->work);
63 } 63 }
64 } 64 }
65 65
66 static int bpf_map_release(struct inode *inode, struct file *filp) 66 static int bpf_map_release(struct inode *inode, struct file *filp)
67 { 67 {
68 struct bpf_map *map = filp->private_data; 68 struct bpf_map *map = filp->private_data;
69 69
70 bpf_map_put(map); 70 bpf_map_put(map);
71 return 0; 71 return 0;
72 } 72 }
73 73
74 static const struct file_operations bpf_map_fops = { 74 static const struct file_operations bpf_map_fops = {
75 .release = bpf_map_release, 75 .release = bpf_map_release,
76 }; 76 };
77 77
78 /* helper macro to check that unused fields 'union bpf_attr' are zero */ 78 /* helper macro to check that unused fields 'union bpf_attr' are zero */
79 #define CHECK_ATTR(CMD) \ 79 #define CHECK_ATTR(CMD) \
80 memchr_inv((void *) &attr->CMD##_LAST_FIELD + \ 80 memchr_inv((void *) &attr->CMD##_LAST_FIELD + \
81 sizeof(attr->CMD##_LAST_FIELD), 0, \ 81 sizeof(attr->CMD##_LAST_FIELD), 0, \
82 sizeof(*attr) - \ 82 sizeof(*attr) - \
83 offsetof(union bpf_attr, CMD##_LAST_FIELD) - \ 83 offsetof(union bpf_attr, CMD##_LAST_FIELD) - \
84 sizeof(attr->CMD##_LAST_FIELD)) != NULL 84 sizeof(attr->CMD##_LAST_FIELD)) != NULL
85 85
86 #define BPF_MAP_CREATE_LAST_FIELD max_entries 86 #define BPF_MAP_CREATE_LAST_FIELD max_entries
87 /* called via syscall */ 87 /* called via syscall */
88 static int map_create(union bpf_attr *attr) 88 static int map_create(union bpf_attr *attr)
89 { 89 {
90 struct bpf_map *map; 90 struct bpf_map *map;
91 int err; 91 int err;
92 92
93 err = CHECK_ATTR(BPF_MAP_CREATE); 93 err = CHECK_ATTR(BPF_MAP_CREATE);
94 if (err) 94 if (err)
95 return -EINVAL; 95 return -EINVAL;
96 96
97 /* find map type and init map: hashtable vs rbtree vs bloom vs ... */ 97 /* find map type and init map: hashtable vs rbtree vs bloom vs ... */
98 map = find_and_alloc_map(attr); 98 map = find_and_alloc_map(attr);
99 if (IS_ERR(map)) 99 if (IS_ERR(map))
100 return PTR_ERR(map); 100 return PTR_ERR(map);
101 101
102 atomic_set(&map->refcnt, 1); 102 atomic_set(&map->refcnt, 1);
103 103
104 err = anon_inode_getfd("bpf-map", &bpf_map_fops, map, O_RDWR | O_CLOEXEC); 104 err = anon_inode_getfd("bpf-map", &bpf_map_fops, map, O_RDWR | O_CLOEXEC);
105 105
106 if (err < 0) 106 if (err < 0)
107 /* failed to allocate fd */ 107 /* failed to allocate fd */
108 goto free_map; 108 goto free_map;
109 109
110 return err; 110 return err;
111 111
112 free_map: 112 free_map:
113 map->ops->map_free(map); 113 map->ops->map_free(map);
114 return err; 114 return err;
115 } 115 }
116 116
117 /* if error is returned, fd is released. 117 /* if error is returned, fd is released.
118 * On success caller should complete fd access with matching fdput() 118 * On success caller should complete fd access with matching fdput()
119 */ 119 */
120 struct bpf_map *bpf_map_get(struct fd f) 120 struct bpf_map *bpf_map_get(struct fd f)
121 { 121 {
122 struct bpf_map *map; 122 struct bpf_map *map;
123 123
124 if (!f.file) 124 if (!f.file)
125 return ERR_PTR(-EBADF); 125 return ERR_PTR(-EBADF);
126 126
127 if (f.file->f_op != &bpf_map_fops) { 127 if (f.file->f_op != &bpf_map_fops) {
128 fdput(f); 128 fdput(f);
129 return ERR_PTR(-EINVAL); 129 return ERR_PTR(-EINVAL);
130 } 130 }
131 131
132 map = f.file->private_data; 132 map = f.file->private_data;
133 133
134 return map; 134 return map;
135 } 135 }
136 136
137 /* helper to convert user pointers passed inside __aligned_u64 fields */ 137 /* helper to convert user pointers passed inside __aligned_u64 fields */
138 static void __user *u64_to_ptr(__u64 val) 138 static void __user *u64_to_ptr(__u64 val)
139 { 139 {
140 return (void __user *) (unsigned long) val; 140 return (void __user *) (unsigned long) val;
141 } 141 }
142 142
143 /* last field in 'union bpf_attr' used by this command */ 143 /* last field in 'union bpf_attr' used by this command */
144 #define BPF_MAP_LOOKUP_ELEM_LAST_FIELD value 144 #define BPF_MAP_LOOKUP_ELEM_LAST_FIELD value
145 145
146 static int map_lookup_elem(union bpf_attr *attr) 146 static int map_lookup_elem(union bpf_attr *attr)
147 { 147 {
148 void __user *ukey = u64_to_ptr(attr->key); 148 void __user *ukey = u64_to_ptr(attr->key);
149 void __user *uvalue = u64_to_ptr(attr->value); 149 void __user *uvalue = u64_to_ptr(attr->value);
150 int ufd = attr->map_fd; 150 int ufd = attr->map_fd;
151 struct fd f = fdget(ufd); 151 struct fd f = fdget(ufd);
152 struct bpf_map *map; 152 struct bpf_map *map;
153 void *key, *value; 153 void *key, *value, *ptr;
154 int err; 154 int err;
155 155
156 if (CHECK_ATTR(BPF_MAP_LOOKUP_ELEM)) 156 if (CHECK_ATTR(BPF_MAP_LOOKUP_ELEM))
157 return -EINVAL; 157 return -EINVAL;
158 158
159 map = bpf_map_get(f); 159 map = bpf_map_get(f);
160 if (IS_ERR(map)) 160 if (IS_ERR(map))
161 return PTR_ERR(map); 161 return PTR_ERR(map);
162 162
163 err = -ENOMEM; 163 err = -ENOMEM;
164 key = kmalloc(map->key_size, GFP_USER); 164 key = kmalloc(map->key_size, GFP_USER);
165 if (!key) 165 if (!key)
166 goto err_put; 166 goto err_put;
167 167
168 err = -EFAULT; 168 err = -EFAULT;
169 if (copy_from_user(key, ukey, map->key_size) != 0) 169 if (copy_from_user(key, ukey, map->key_size) != 0)
170 goto free_key; 170 goto free_key;
171 171
172 err = -ENOENT; 172 err = -ENOMEM;
173 rcu_read_lock(); 173 value = kmalloc(map->value_size, GFP_USER);
174 value = map->ops->map_lookup_elem(map, key);
175 if (!value) 174 if (!value)
176 goto err_unlock; 175 goto free_key;
177 176
177 rcu_read_lock();
178 ptr = map->ops->map_lookup_elem(map, key);
179 if (ptr)
180 memcpy(value, ptr, map->value_size);
181 rcu_read_unlock();
182
183 err = -ENOENT;
184 if (!ptr)
185 goto free_value;
186
178 err = -EFAULT; 187 err = -EFAULT;
179 if (copy_to_user(uvalue, value, map->value_size) != 0) 188 if (copy_to_user(uvalue, value, map->value_size) != 0)
180 goto err_unlock; 189 goto free_value;
181 190
182 err = 0; 191 err = 0;
183 192
184 err_unlock: 193 free_value:
185 rcu_read_unlock(); 194 kfree(value);
186 free_key: 195 free_key:
187 kfree(key); 196 kfree(key);
188 err_put: 197 err_put:
189 fdput(f); 198 fdput(f);
190 return err; 199 return err;
191 } 200 }
192 201
193 #define BPF_MAP_UPDATE_ELEM_LAST_FIELD flags 202 #define BPF_MAP_UPDATE_ELEM_LAST_FIELD flags
194 203
195 static int map_update_elem(union bpf_attr *attr) 204 static int map_update_elem(union bpf_attr *attr)
196 { 205 {
197 void __user *ukey = u64_to_ptr(attr->key); 206 void __user *ukey = u64_to_ptr(attr->key);
198 void __user *uvalue = u64_to_ptr(attr->value); 207 void __user *uvalue = u64_to_ptr(attr->value);
199 int ufd = attr->map_fd; 208 int ufd = attr->map_fd;
200 struct fd f = fdget(ufd); 209 struct fd f = fdget(ufd);
201 struct bpf_map *map; 210 struct bpf_map *map;
202 void *key, *value; 211 void *key, *value;
203 int err; 212 int err;
204 213
205 if (CHECK_ATTR(BPF_MAP_UPDATE_ELEM)) 214 if (CHECK_ATTR(BPF_MAP_UPDATE_ELEM))
206 return -EINVAL; 215 return -EINVAL;
207 216
208 map = bpf_map_get(f); 217 map = bpf_map_get(f);
209 if (IS_ERR(map)) 218 if (IS_ERR(map))
210 return PTR_ERR(map); 219 return PTR_ERR(map);
211 220
212 err = -ENOMEM; 221 err = -ENOMEM;
213 key = kmalloc(map->key_size, GFP_USER); 222 key = kmalloc(map->key_size, GFP_USER);
214 if (!key) 223 if (!key)
215 goto err_put; 224 goto err_put;
216 225
217 err = -EFAULT; 226 err = -EFAULT;
218 if (copy_from_user(key, ukey, map->key_size) != 0) 227 if (copy_from_user(key, ukey, map->key_size) != 0)
219 goto free_key; 228 goto free_key;
220 229
221 err = -ENOMEM; 230 err = -ENOMEM;
222 value = kmalloc(map->value_size, GFP_USER); 231 value = kmalloc(map->value_size, GFP_USER);
223 if (!value) 232 if (!value)
224 goto free_key; 233 goto free_key;
225 234
226 err = -EFAULT; 235 err = -EFAULT;
227 if (copy_from_user(value, uvalue, map->value_size) != 0) 236 if (copy_from_user(value, uvalue, map->value_size) != 0)
228 goto free_value; 237 goto free_value;
229 238
230 /* eBPF program that use maps are running under rcu_read_lock(), 239 /* eBPF program that use maps are running under rcu_read_lock(),
231 * therefore all map accessors rely on this fact, so do the same here 240 * therefore all map accessors rely on this fact, so do the same here
232 */ 241 */
233 rcu_read_lock(); 242 rcu_read_lock();
234 err = map->ops->map_update_elem(map, key, value, attr->flags); 243 err = map->ops->map_update_elem(map, key, value, attr->flags);
235 rcu_read_unlock(); 244 rcu_read_unlock();
236 245
237 free_value: 246 free_value:
238 kfree(value); 247 kfree(value);
239 free_key: 248 free_key:
240 kfree(key); 249 kfree(key);
241 err_put: 250 err_put:
242 fdput(f); 251 fdput(f);
243 return err; 252 return err;
244 } 253 }
245 254
246 #define BPF_MAP_DELETE_ELEM_LAST_FIELD key 255 #define BPF_MAP_DELETE_ELEM_LAST_FIELD key
247 256
248 static int map_delete_elem(union bpf_attr *attr) 257 static int map_delete_elem(union bpf_attr *attr)
249 { 258 {
250 void __user *ukey = u64_to_ptr(attr->key); 259 void __user *ukey = u64_to_ptr(attr->key);
251 int ufd = attr->map_fd; 260 int ufd = attr->map_fd;
252 struct fd f = fdget(ufd); 261 struct fd f = fdget(ufd);
253 struct bpf_map *map; 262 struct bpf_map *map;
254 void *key; 263 void *key;
255 int err; 264 int err;
256 265
257 if (CHECK_ATTR(BPF_MAP_DELETE_ELEM)) 266 if (CHECK_ATTR(BPF_MAP_DELETE_ELEM))
258 return -EINVAL; 267 return -EINVAL;
259 268
260 map = bpf_map_get(f); 269 map = bpf_map_get(f);
261 if (IS_ERR(map)) 270 if (IS_ERR(map))
262 return PTR_ERR(map); 271 return PTR_ERR(map);
263 272
264 err = -ENOMEM; 273 err = -ENOMEM;
265 key = kmalloc(map->key_size, GFP_USER); 274 key = kmalloc(map->key_size, GFP_USER);
266 if (!key) 275 if (!key)
267 goto err_put; 276 goto err_put;
268 277
269 err = -EFAULT; 278 err = -EFAULT;
270 if (copy_from_user(key, ukey, map->key_size) != 0) 279 if (copy_from_user(key, ukey, map->key_size) != 0)
271 goto free_key; 280 goto free_key;
272 281
273 rcu_read_lock(); 282 rcu_read_lock();
274 err = map->ops->map_delete_elem(map, key); 283 err = map->ops->map_delete_elem(map, key);
275 rcu_read_unlock(); 284 rcu_read_unlock();
276 285
277 free_key: 286 free_key:
278 kfree(key); 287 kfree(key);
279 err_put: 288 err_put:
280 fdput(f); 289 fdput(f);
281 return err; 290 return err;
282 } 291 }
283 292
284 /* last field in 'union bpf_attr' used by this command */ 293 /* last field in 'union bpf_attr' used by this command */
285 #define BPF_MAP_GET_NEXT_KEY_LAST_FIELD next_key 294 #define BPF_MAP_GET_NEXT_KEY_LAST_FIELD next_key
286 295
287 static int map_get_next_key(union bpf_attr *attr) 296 static int map_get_next_key(union bpf_attr *attr)
288 { 297 {
289 void __user *ukey = u64_to_ptr(attr->key); 298 void __user *ukey = u64_to_ptr(attr->key);
290 void __user *unext_key = u64_to_ptr(attr->next_key); 299 void __user *unext_key = u64_to_ptr(attr->next_key);
291 int ufd = attr->map_fd; 300 int ufd = attr->map_fd;
292 struct fd f = fdget(ufd); 301 struct fd f = fdget(ufd);
293 struct bpf_map *map; 302 struct bpf_map *map;
294 void *key, *next_key; 303 void *key, *next_key;
295 int err; 304 int err;
296 305
297 if (CHECK_ATTR(BPF_MAP_GET_NEXT_KEY)) 306 if (CHECK_ATTR(BPF_MAP_GET_NEXT_KEY))
298 return -EINVAL; 307 return -EINVAL;
299 308
300 map = bpf_map_get(f); 309 map = bpf_map_get(f);
301 if (IS_ERR(map)) 310 if (IS_ERR(map))
302 return PTR_ERR(map); 311 return PTR_ERR(map);
303 312
304 err = -ENOMEM; 313 err = -ENOMEM;
305 key = kmalloc(map->key_size, GFP_USER); 314 key = kmalloc(map->key_size, GFP_USER);
306 if (!key) 315 if (!key)
307 goto err_put; 316 goto err_put;
308 317
309 err = -EFAULT; 318 err = -EFAULT;
310 if (copy_from_user(key, ukey, map->key_size) != 0) 319 if (copy_from_user(key, ukey, map->key_size) != 0)
311 goto free_key; 320 goto free_key;
312 321
313 err = -ENOMEM; 322 err = -ENOMEM;
314 next_key = kmalloc(map->key_size, GFP_USER); 323 next_key = kmalloc(map->key_size, GFP_USER);
315 if (!next_key) 324 if (!next_key)
316 goto free_key; 325 goto free_key;
317 326
318 rcu_read_lock(); 327 rcu_read_lock();
319 err = map->ops->map_get_next_key(map, key, next_key); 328 err = map->ops->map_get_next_key(map, key, next_key);
320 rcu_read_unlock(); 329 rcu_read_unlock();
321 if (err) 330 if (err)
322 goto free_next_key; 331 goto free_next_key;
323 332
324 err = -EFAULT; 333 err = -EFAULT;
325 if (copy_to_user(unext_key, next_key, map->key_size) != 0) 334 if (copy_to_user(unext_key, next_key, map->key_size) != 0)
326 goto free_next_key; 335 goto free_next_key;
327 336
328 err = 0; 337 err = 0;
329 338
330 free_next_key: 339 free_next_key:
331 kfree(next_key); 340 kfree(next_key);
332 free_key: 341 free_key:
333 kfree(key); 342 kfree(key);
334 err_put: 343 err_put:
335 fdput(f); 344 fdput(f);
336 return err; 345 return err;
337 } 346 }
338 347
339 static LIST_HEAD(bpf_prog_types); 348 static LIST_HEAD(bpf_prog_types);
340 349
341 static int find_prog_type(enum bpf_prog_type type, struct bpf_prog *prog) 350 static int find_prog_type(enum bpf_prog_type type, struct bpf_prog *prog)
342 { 351 {
343 struct bpf_prog_type_list *tl; 352 struct bpf_prog_type_list *tl;
344 353
345 list_for_each_entry(tl, &bpf_prog_types, list_node) { 354 list_for_each_entry(tl, &bpf_prog_types, list_node) {
346 if (tl->type == type) { 355 if (tl->type == type) {
347 prog->aux->ops = tl->ops; 356 prog->aux->ops = tl->ops;
348 prog->aux->prog_type = type; 357 prog->aux->prog_type = type;
349 return 0; 358 return 0;
350 } 359 }
351 } 360 }
352 return -EINVAL; 361 return -EINVAL;
353 } 362 }
354 363
355 void bpf_register_prog_type(struct bpf_prog_type_list *tl) 364 void bpf_register_prog_type(struct bpf_prog_type_list *tl)
356 { 365 {
357 list_add(&tl->list_node, &bpf_prog_types); 366 list_add(&tl->list_node, &bpf_prog_types);
358 } 367 }
359 368
360 /* fixup insn->imm field of bpf_call instructions: 369 /* fixup insn->imm field of bpf_call instructions:
361 * if (insn->imm == BPF_FUNC_map_lookup_elem) 370 * if (insn->imm == BPF_FUNC_map_lookup_elem)
362 * insn->imm = bpf_map_lookup_elem - __bpf_call_base; 371 * insn->imm = bpf_map_lookup_elem - __bpf_call_base;
363 * else if (insn->imm == BPF_FUNC_map_update_elem) 372 * else if (insn->imm == BPF_FUNC_map_update_elem)
364 * insn->imm = bpf_map_update_elem - __bpf_call_base; 373 * insn->imm = bpf_map_update_elem - __bpf_call_base;
365 * else ... 374 * else ...
366 * 375 *
367 * this function is called after eBPF program passed verification 376 * this function is called after eBPF program passed verification
368 */ 377 */
369 static void fixup_bpf_calls(struct bpf_prog *prog) 378 static void fixup_bpf_calls(struct bpf_prog *prog)
370 { 379 {
371 const struct bpf_func_proto *fn; 380 const struct bpf_func_proto *fn;
372 int i; 381 int i;
373 382
374 for (i = 0; i < prog->len; i++) { 383 for (i = 0; i < prog->len; i++) {
375 struct bpf_insn *insn = &prog->insnsi[i]; 384 struct bpf_insn *insn = &prog->insnsi[i];
376 385
377 if (insn->code == (BPF_JMP | BPF_CALL)) { 386 if (insn->code == (BPF_JMP | BPF_CALL)) {
378 /* we reach here when program has bpf_call instructions 387 /* we reach here when program has bpf_call instructions
379 * and it passed bpf_check(), means that 388 * and it passed bpf_check(), means that
380 * ops->get_func_proto must have been supplied, check it 389 * ops->get_func_proto must have been supplied, check it
381 */ 390 */
382 BUG_ON(!prog->aux->ops->get_func_proto); 391 BUG_ON(!prog->aux->ops->get_func_proto);
383 392
384 fn = prog->aux->ops->get_func_proto(insn->imm); 393 fn = prog->aux->ops->get_func_proto(insn->imm);
385 /* all functions that have prototype and verifier allowed 394 /* all functions that have prototype and verifier allowed
386 * programs to call them, must be real in-kernel functions 395 * programs to call them, must be real in-kernel functions
387 */ 396 */
388 BUG_ON(!fn->func); 397 BUG_ON(!fn->func);
389 insn->imm = fn->func - __bpf_call_base; 398 insn->imm = fn->func - __bpf_call_base;
390 } 399 }
391 } 400 }
392 } 401 }
393 402
394 /* drop refcnt on maps used by eBPF program and free auxilary data */ 403 /* drop refcnt on maps used by eBPF program and free auxilary data */
395 static void free_used_maps(struct bpf_prog_aux *aux) 404 static void free_used_maps(struct bpf_prog_aux *aux)
396 { 405 {
397 int i; 406 int i;
398 407
399 for (i = 0; i < aux->used_map_cnt; i++) 408 for (i = 0; i < aux->used_map_cnt; i++)
400 bpf_map_put(aux->used_maps[i]); 409 bpf_map_put(aux->used_maps[i]);
401 410
402 kfree(aux->used_maps); 411 kfree(aux->used_maps);
403 } 412 }
404 413
405 void bpf_prog_put(struct bpf_prog *prog) 414 void bpf_prog_put(struct bpf_prog *prog)
406 { 415 {
407 if (atomic_dec_and_test(&prog->aux->refcnt)) { 416 if (atomic_dec_and_test(&prog->aux->refcnt)) {
408 free_used_maps(prog->aux); 417 free_used_maps(prog->aux);
409 bpf_prog_free(prog); 418 bpf_prog_free(prog);
410 } 419 }
411 } 420 }
412 421
413 static int bpf_prog_release(struct inode *inode, struct file *filp) 422 static int bpf_prog_release(struct inode *inode, struct file *filp)
414 { 423 {
415 struct bpf_prog *prog = filp->private_data; 424 struct bpf_prog *prog = filp->private_data;
416 425
417 bpf_prog_put(prog); 426 bpf_prog_put(prog);
418 return 0; 427 return 0;
419 } 428 }
420 429
421 static const struct file_operations bpf_prog_fops = { 430 static const struct file_operations bpf_prog_fops = {
422 .release = bpf_prog_release, 431 .release = bpf_prog_release,
423 }; 432 };
424 433
425 static struct bpf_prog *get_prog(struct fd f) 434 static struct bpf_prog *get_prog(struct fd f)
426 { 435 {
427 struct bpf_prog *prog; 436 struct bpf_prog *prog;
428 437
429 if (!f.file) 438 if (!f.file)
430 return ERR_PTR(-EBADF); 439 return ERR_PTR(-EBADF);
431 440
432 if (f.file->f_op != &bpf_prog_fops) { 441 if (f.file->f_op != &bpf_prog_fops) {
433 fdput(f); 442 fdput(f);
434 return ERR_PTR(-EINVAL); 443 return ERR_PTR(-EINVAL);
435 } 444 }
436 445
437 prog = f.file->private_data; 446 prog = f.file->private_data;
438 447
439 return prog; 448 return prog;
440 } 449 }
441 450
442 /* called by sockets/tracing/seccomp before attaching program to an event 451 /* called by sockets/tracing/seccomp before attaching program to an event
443 * pairs with bpf_prog_put() 452 * pairs with bpf_prog_put()
444 */ 453 */
445 struct bpf_prog *bpf_prog_get(u32 ufd) 454 struct bpf_prog *bpf_prog_get(u32 ufd)
446 { 455 {
447 struct fd f = fdget(ufd); 456 struct fd f = fdget(ufd);
448 struct bpf_prog *prog; 457 struct bpf_prog *prog;
449 458
450 prog = get_prog(f); 459 prog = get_prog(f);
451 460
452 if (IS_ERR(prog)) 461 if (IS_ERR(prog))
453 return prog; 462 return prog;
454 463
455 atomic_inc(&prog->aux->refcnt); 464 atomic_inc(&prog->aux->refcnt);
456 fdput(f); 465 fdput(f);
457 return prog; 466 return prog;
458 } 467 }
459 468
460 /* last field in 'union bpf_attr' used by this command */ 469 /* last field in 'union bpf_attr' used by this command */
461 #define BPF_PROG_LOAD_LAST_FIELD log_buf 470 #define BPF_PROG_LOAD_LAST_FIELD log_buf
462 471
463 static int bpf_prog_load(union bpf_attr *attr) 472 static int bpf_prog_load(union bpf_attr *attr)
464 { 473 {
465 enum bpf_prog_type type = attr->prog_type; 474 enum bpf_prog_type type = attr->prog_type;
466 struct bpf_prog *prog; 475 struct bpf_prog *prog;
467 int err; 476 int err;
468 char license[128]; 477 char license[128];
469 bool is_gpl; 478 bool is_gpl;
470 479
471 if (CHECK_ATTR(BPF_PROG_LOAD)) 480 if (CHECK_ATTR(BPF_PROG_LOAD))
472 return -EINVAL; 481 return -EINVAL;
473 482
474 /* copy eBPF program license from user space */ 483 /* copy eBPF program license from user space */
475 if (strncpy_from_user(license, u64_to_ptr(attr->license), 484 if (strncpy_from_user(license, u64_to_ptr(attr->license),
476 sizeof(license) - 1) < 0) 485 sizeof(license) - 1) < 0)
477 return -EFAULT; 486 return -EFAULT;
478 license[sizeof(license) - 1] = 0; 487 license[sizeof(license) - 1] = 0;
479 488
480 /* eBPF programs must be GPL compatible to use GPL-ed functions */ 489 /* eBPF programs must be GPL compatible to use GPL-ed functions */
481 is_gpl = license_is_gpl_compatible(license); 490 is_gpl = license_is_gpl_compatible(license);
482 491
483 if (attr->insn_cnt >= BPF_MAXINSNS) 492 if (attr->insn_cnt >= BPF_MAXINSNS)
484 return -EINVAL; 493 return -EINVAL;
485 494
486 /* plain bpf_prog allocation */ 495 /* plain bpf_prog allocation */
487 prog = bpf_prog_alloc(bpf_prog_size(attr->insn_cnt), GFP_USER); 496 prog = bpf_prog_alloc(bpf_prog_size(attr->insn_cnt), GFP_USER);
488 if (!prog) 497 if (!prog)
489 return -ENOMEM; 498 return -ENOMEM;
490 499
491 prog->len = attr->insn_cnt; 500 prog->len = attr->insn_cnt;
492 501
493 err = -EFAULT; 502 err = -EFAULT;
494 if (copy_from_user(prog->insns, u64_to_ptr(attr->insns), 503 if (copy_from_user(prog->insns, u64_to_ptr(attr->insns),
495 prog->len * sizeof(struct bpf_insn)) != 0) 504 prog->len * sizeof(struct bpf_insn)) != 0)
496 goto free_prog; 505 goto free_prog;
497 506
498 prog->orig_prog = NULL; 507 prog->orig_prog = NULL;
499 prog->jited = false; 508 prog->jited = false;
500 509
501 atomic_set(&prog->aux->refcnt, 1); 510 atomic_set(&prog->aux->refcnt, 1);
502 prog->aux->is_gpl_compatible = is_gpl; 511 prog->aux->is_gpl_compatible = is_gpl;
503 512
504 /* find program type: socket_filter vs tracing_filter */ 513 /* find program type: socket_filter vs tracing_filter */
505 err = find_prog_type(type, prog); 514 err = find_prog_type(type, prog);
506 if (err < 0) 515 if (err < 0)
507 goto free_prog; 516 goto free_prog;
508 517
509 /* run eBPF verifier */ 518 /* run eBPF verifier */
510 err = bpf_check(prog, attr); 519 err = bpf_check(prog, attr);
511 520
512 if (err < 0) 521 if (err < 0)
513 goto free_used_maps; 522 goto free_used_maps;
514 523
515 /* fixup BPF_CALL->imm field */ 524 /* fixup BPF_CALL->imm field */
516 fixup_bpf_calls(prog); 525 fixup_bpf_calls(prog);
517 526
518 /* eBPF program is ready to be JITed */ 527 /* eBPF program is ready to be JITed */
519 bpf_prog_select_runtime(prog); 528 bpf_prog_select_runtime(prog);
520 529
521 err = anon_inode_getfd("bpf-prog", &bpf_prog_fops, prog, O_RDWR | O_CLOEXEC); 530 err = anon_inode_getfd("bpf-prog", &bpf_prog_fops, prog, O_RDWR | O_CLOEXEC);
522 531
523 if (err < 0) 532 if (err < 0)
524 /* failed to allocate fd */ 533 /* failed to allocate fd */
525 goto free_used_maps; 534 goto free_used_maps;
526 535
527 return err; 536 return err;
528 537
529 free_used_maps: 538 free_used_maps:
530 free_used_maps(prog->aux); 539 free_used_maps(prog->aux);
531 free_prog: 540 free_prog:
532 bpf_prog_free(prog); 541 bpf_prog_free(prog);
533 return err; 542 return err;
534 } 543 }
535 544
536 SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size) 545 SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size)
537 { 546 {
538 union bpf_attr attr = {}; 547 union bpf_attr attr = {};
539 int err; 548 int err;
540 549
541 /* the syscall is limited to root temporarily. This restriction will be 550 /* the syscall is limited to root temporarily. This restriction will be
542 * lifted when security audit is clean. Note that eBPF+tracing must have 551 * lifted when security audit is clean. Note that eBPF+tracing must have
543 * this restriction, since it may pass kernel data to user space 552 * this restriction, since it may pass kernel data to user space
544 */ 553 */
545 if (!capable(CAP_SYS_ADMIN)) 554 if (!capable(CAP_SYS_ADMIN))
546 return -EPERM; 555 return -EPERM;
547 556
548 if (!access_ok(VERIFY_READ, uattr, 1)) 557 if (!access_ok(VERIFY_READ, uattr, 1))
549 return -EFAULT; 558 return -EFAULT;
550 559
551 if (size > PAGE_SIZE) /* silly large */ 560 if (size > PAGE_SIZE) /* silly large */
552 return -E2BIG; 561 return -E2BIG;
553 562
554 /* If we're handed a bigger struct than we know of, 563 /* If we're handed a bigger struct than we know of,
555 * ensure all the unknown bits are 0 - i.e. new 564 * ensure all the unknown bits are 0 - i.e. new
556 * user-space does not rely on any kernel feature 565 * user-space does not rely on any kernel feature
557 * extensions we dont know about yet. 566 * extensions we dont know about yet.
558 */ 567 */
559 if (size > sizeof(attr)) { 568 if (size > sizeof(attr)) {
560 unsigned char __user *addr; 569 unsigned char __user *addr;
561 unsigned char __user *end; 570 unsigned char __user *end;
562 unsigned char val; 571 unsigned char val;
563 572
564 addr = (void __user *)uattr + sizeof(attr); 573 addr = (void __user *)uattr + sizeof(attr);
565 end = (void __user *)uattr + size; 574 end = (void __user *)uattr + size;
566 575
567 for (; addr < end; addr++) { 576 for (; addr < end; addr++) {
568 err = get_user(val, addr); 577 err = get_user(val, addr);
569 if (err) 578 if (err)
570 return err; 579 return err;
571 if (val) 580 if (val)
572 return -E2BIG; 581 return -E2BIG;
573 } 582 }
574 size = sizeof(attr); 583 size = sizeof(attr);
575 } 584 }
576 585
577 /* copy attributes from user space, may be less than sizeof(bpf_attr) */ 586 /* copy attributes from user space, may be less than sizeof(bpf_attr) */
578 if (copy_from_user(&attr, uattr, size) != 0) 587 if (copy_from_user(&attr, uattr, size) != 0)
579 return -EFAULT; 588 return -EFAULT;
580 589
581 switch (cmd) { 590 switch (cmd) {
582 case BPF_MAP_CREATE: 591 case BPF_MAP_CREATE:
583 err = map_create(&attr); 592 err = map_create(&attr);
584 break; 593 break;
585 case BPF_MAP_LOOKUP_ELEM: 594 case BPF_MAP_LOOKUP_ELEM:
586 err = map_lookup_elem(&attr); 595 err = map_lookup_elem(&attr);
587 break; 596 break;
588 case BPF_MAP_UPDATE_ELEM: 597 case BPF_MAP_UPDATE_ELEM:
589 err = map_update_elem(&attr); 598 err = map_update_elem(&attr);
590 break; 599 break;
591 case BPF_MAP_DELETE_ELEM: 600 case BPF_MAP_DELETE_ELEM:
592 err = map_delete_elem(&attr); 601 err = map_delete_elem(&attr);
593 break; 602 break;
594 case BPF_MAP_GET_NEXT_KEY: 603 case BPF_MAP_GET_NEXT_KEY:
595 err = map_get_next_key(&attr); 604 err = map_get_next_key(&attr);
596 break; 605 break;
597 case BPF_PROG_LOAD: 606 case BPF_PROG_LOAD:
598 err = bpf_prog_load(&attr); 607 err = bpf_prog_load(&attr);
599 break; 608 break;
600 default: 609 default:
601 err = -EINVAL; 610 err = -EINVAL;
602 break; 611 break;
603 } 612 }
604 613
605 return err; 614 return err;
606 } 615 }