Blame view
kernel/seccomp.c
23.9 KB
1da177e4c Linux-2.6.12-rc2 |
1 2 3 4 5 |
/* * linux/kernel/seccomp.c * * Copyright 2004-2005 Andrea Arcangeli <andrea@cpushare.com> * |
e2cfabdfd seccomp: add syst... |
6 7 8 9 10 11 12 13 |
* Copyright (C) 2012 Google, Inc. * Will Drewry <wad@chromium.org> * * This defines a simple but solid secure-computing facility. * * Mode 1 uses a fixed list of allowed system calls. * Mode 2 allows user-defined system call filters in the form * of Berkeley Packet Filters/Linux Socket Filters. |
1da177e4c Linux-2.6.12-rc2 |
14 |
*/ |
e2cfabdfd seccomp: add syst... |
15 |
#include <linux/atomic.h> |
85e7bac33 seccomp: audit ab... |
16 |
#include <linux/audit.h> |
5b1017404 x86-64: seccomp: ... |
17 |
#include <linux/compat.h> |
e2cfabdfd seccomp: add syst... |
18 19 |
#include <linux/sched.h> #include <linux/seccomp.h> |
c8bee430d seccomp: split fi... |
20 |
#include <linux/slab.h> |
48dc92b9f seccomp: add "sec... |
21 |
#include <linux/syscalls.h> |
1da177e4c Linux-2.6.12-rc2 |
22 |
|
a4412fc94 seccomp,x86,arm,m... |
23 |
#ifdef CONFIG_HAVE_ARCH_SECCOMP_FILTER |
e2cfabdfd seccomp: add syst... |
24 |
#include <asm/syscall.h> |
a4412fc94 seccomp,x86,arm,m... |
25 |
#endif |
e2cfabdfd seccomp: add syst... |
26 27 |
#ifdef CONFIG_SECCOMP_FILTER |
e2cfabdfd seccomp: add syst... |
28 |
#include <linux/filter.h> |
c2e1f2e30 seccomp: implemen... |
29 |
#include <linux/pid.h> |
fb0fadf9b ptrace,seccomp: A... |
30 |
#include <linux/ptrace.h> |
e2cfabdfd seccomp: add syst... |
31 |
#include <linux/security.h> |
e2cfabdfd seccomp: add syst... |
32 33 34 35 36 37 38 39 40 41 42 43 |
#include <linux/tracehook.h> #include <linux/uaccess.h> /** * struct seccomp_filter - container for seccomp BPF programs * * @usage: reference count to manage the object lifetime. * get/put helpers should be used when accessing an instance * outside of a lifetime-guarded section. In general, this * is only needed for handling filters shared across tasks. * @prev: points to a previously installed, or inherited, filter * @len: the number of instructions in the program |
119ce5c8b kernel/seccomp.c:... |
44 |
* @insnsi: the BPF program instructions to evaluate |
e2cfabdfd seccomp: add syst... |
45 46 47 48 49 50 51 52 53 54 55 56 57 58 |
* * seccomp_filter objects are organized in a tree linked via the @prev * pointer. For any task, it appears to be a singly-linked list starting * with current->seccomp.filter, the most recently attached or inherited filter. * However, multiple filters may share a @prev node, by way of fork(), which * results in a unidirectional tree existing in memory. This is similar to * how namespaces work. * * seccomp_filter objects should never be modified after being attached * to a task_struct (other than @usage). */ struct seccomp_filter { atomic_t usage; struct seccomp_filter *prev; |
7ae457c1e net: filter: spli... |
59 |
struct bpf_prog *prog; |
e2cfabdfd seccomp: add syst... |
60 61 62 63 |
}; /* Limit any path through the tree to 256KB worth of instructions. */ #define MAX_INSNS_PER_PATH ((1 << 18) / sizeof(struct sock_filter)) |
bd4cf0ed3 net: filter: rewo... |
64 |
/* |
e2cfabdfd seccomp: add syst... |
65 66 67 |
* Endianness is explicitly ignored and left for BPF program authors to manage * as per the specific architecture. */ |
bd4cf0ed3 net: filter: rewo... |
68 |
static void populate_seccomp_data(struct seccomp_data *sd) |
e2cfabdfd seccomp: add syst... |
69 |
{ |
bd4cf0ed3 net: filter: rewo... |
70 71 |
struct task_struct *task = current; struct pt_regs *regs = task_pt_regs(task); |
2eac76483 seccomp: fix popu... |
72 |
unsigned long args[6]; |
e2cfabdfd seccomp: add syst... |
73 |
|
bd4cf0ed3 net: filter: rewo... |
74 |
sd->nr = syscall_get_nr(task, regs); |
0b747172d Merge git://git.i... |
75 |
sd->arch = syscall_get_arch(); |
2eac76483 seccomp: fix popu... |
76 77 78 79 80 81 82 |
syscall_get_arguments(task, regs, 0, 6, args); sd->args[0] = args[0]; sd->args[1] = args[1]; sd->args[2] = args[2]; sd->args[3] = args[3]; sd->args[4] = args[4]; sd->args[5] = args[5]; |
bd4cf0ed3 net: filter: rewo... |
83 |
sd->instruction_pointer = KSTK_EIP(task); |
e2cfabdfd seccomp: add syst... |
84 85 86 87 88 89 90 |
} /** * seccomp_check_filter - verify seccomp filter code * @filter: filter to verify * @flen: length of filter * |
4df95ff48 net: filter: rena... |
91 |
* Takes a previously checked filter (by bpf_check_classic) and |
e2cfabdfd seccomp: add syst... |
92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 |
* redirects all filter code that loads struct sk_buff data * and related data through seccomp_bpf_load. It also * enforces length and alignment checking of those loads. * * Returns 0 if the rule set is legal or -EINVAL if not. */ static int seccomp_check_filter(struct sock_filter *filter, unsigned int flen) { int pc; for (pc = 0; pc < flen; pc++) { struct sock_filter *ftest = &filter[pc]; u16 code = ftest->code; u32 k = ftest->k; switch (code) { |
348059313 net: filter: get ... |
107 |
case BPF_LD | BPF_W | BPF_ABS: |
bd4cf0ed3 net: filter: rewo... |
108 |
ftest->code = BPF_LDX | BPF_W | BPF_ABS; |
e2cfabdfd seccomp: add syst... |
109 110 111 112 |
/* 32-bit aligned and not out of bounds. */ if (k >= sizeof(struct seccomp_data) || k & 3) return -EINVAL; continue; |
348059313 net: filter: get ... |
113 |
case BPF_LD | BPF_W | BPF_LEN: |
bd4cf0ed3 net: filter: rewo... |
114 |
ftest->code = BPF_LD | BPF_IMM; |
e2cfabdfd seccomp: add syst... |
115 116 |
ftest->k = sizeof(struct seccomp_data); continue; |
348059313 net: filter: get ... |
117 |
case BPF_LDX | BPF_W | BPF_LEN: |
bd4cf0ed3 net: filter: rewo... |
118 |
ftest->code = BPF_LDX | BPF_IMM; |
e2cfabdfd seccomp: add syst... |
119 120 121 |
ftest->k = sizeof(struct seccomp_data); continue; /* Explicitly include allowed calls. */ |
348059313 net: filter: get ... |
122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 |
case BPF_RET | BPF_K: case BPF_RET | BPF_A: case BPF_ALU | BPF_ADD | BPF_K: case BPF_ALU | BPF_ADD | BPF_X: case BPF_ALU | BPF_SUB | BPF_K: case BPF_ALU | BPF_SUB | BPF_X: case BPF_ALU | BPF_MUL | BPF_K: case BPF_ALU | BPF_MUL | BPF_X: case BPF_ALU | BPF_DIV | BPF_K: case BPF_ALU | BPF_DIV | BPF_X: case BPF_ALU | BPF_AND | BPF_K: case BPF_ALU | BPF_AND | BPF_X: case BPF_ALU | BPF_OR | BPF_K: case BPF_ALU | BPF_OR | BPF_X: case BPF_ALU | BPF_XOR | BPF_K: case BPF_ALU | BPF_XOR | BPF_X: case BPF_ALU | BPF_LSH | BPF_K: case BPF_ALU | BPF_LSH | BPF_X: case BPF_ALU | BPF_RSH | BPF_K: case BPF_ALU | BPF_RSH | BPF_X: case BPF_ALU | BPF_NEG: case BPF_LD | BPF_IMM: case BPF_LDX | BPF_IMM: case BPF_MISC | BPF_TAX: case BPF_MISC | BPF_TXA: case BPF_LD | BPF_MEM: case BPF_LDX | BPF_MEM: case BPF_ST: case BPF_STX: case BPF_JMP | BPF_JA: case BPF_JMP | BPF_JEQ | BPF_K: case BPF_JMP | BPF_JEQ | BPF_X: case BPF_JMP | BPF_JGE | BPF_K: case BPF_JMP | BPF_JGE | BPF_X: case BPF_JMP | BPF_JGT | BPF_K: case BPF_JMP | BPF_JGT | BPF_X: case BPF_JMP | BPF_JSET | BPF_K: case BPF_JMP | BPF_JSET | BPF_X: |
e2cfabdfd seccomp: add syst... |
160 161 162 163 164 165 166 167 168 169 170 171 172 173 |
continue; default: return -EINVAL; } } return 0; } /** * seccomp_run_filters - evaluates all seccomp filters against @syscall * @syscall: number of the current system call * * Returns valid seccomp BPF response codes. */ |
d39bd00de seccomp: Allow ar... |
174 |
static u32 seccomp_run_filters(struct seccomp_data *sd) |
e2cfabdfd seccomp: add syst... |
175 |
{ |
3ba2530cc seccomp: allow mo... |
176 |
struct seccomp_filter *f = ACCESS_ONCE(current->seccomp.filter); |
d39bd00de seccomp: Allow ar... |
177 |
struct seccomp_data sd_local; |
acf3b2c71 seccomp: add SECC... |
178 179 180 |
u32 ret = SECCOMP_RET_ALLOW; /* Ensure unexpected behavior doesn't result in failing open. */ |
3ba2530cc seccomp: allow mo... |
181 |
if (unlikely(WARN_ON(f == NULL))) |
acf3b2c71 seccomp: add SECC... |
182 |
return SECCOMP_RET_KILL; |
3ba2530cc seccomp: allow mo... |
183 184 |
/* Make sure cross-thread synced filter points somewhere sane. */ smp_read_barrier_depends(); |
d39bd00de seccomp: Allow ar... |
185 186 187 188 |
if (!sd) { populate_seccomp_data(&sd_local); sd = &sd_local; } |
bd4cf0ed3 net: filter: rewo... |
189 |
|
e2cfabdfd seccomp: add syst... |
190 191 |
/* * All filters in the list are evaluated and the lowest BPF return |
acf3b2c71 seccomp: add SECC... |
192 |
* value always takes priority (ignoring the DATA). |
e2cfabdfd seccomp: add syst... |
193 |
*/ |
3ba2530cc seccomp: allow mo... |
194 |
for (; f; f = f->prev) { |
d39bd00de seccomp: Allow ar... |
195 |
u32 cur_ret = BPF_PROG_RUN(f->prog, (void *)sd); |
8f577cadf seccomp: JIT comp... |
196 |
|
acf3b2c71 seccomp: add SECC... |
197 198 |
if ((cur_ret & SECCOMP_RET_ACTION) < (ret & SECCOMP_RET_ACTION)) ret = cur_ret; |
e2cfabdfd seccomp: add syst... |
199 200 201 |
} return ret; } |
1f41b4504 seccomp: extract ... |
202 |
#endif /* CONFIG_SECCOMP_FILTER */ |
e2cfabdfd seccomp: add syst... |
203 |
|
1f41b4504 seccomp: extract ... |
204 205 |
static inline bool seccomp_may_assign_mode(unsigned long seccomp_mode) { |
69f6a34bd seccomp: Replace ... |
206 |
assert_spin_locked(¤t->sighand->siglock); |
dbd952127 seccomp: introduc... |
207 |
|
1f41b4504 seccomp: extract ... |
208 209 210 211 212 |
if (current->seccomp.mode && current->seccomp.mode != seccomp_mode) return false; return true; } |
3ba2530cc seccomp: allow mo... |
213 214 |
static inline void seccomp_assign_mode(struct task_struct *task, unsigned long seccomp_mode) |
1f41b4504 seccomp: extract ... |
215 |
{ |
69f6a34bd seccomp: Replace ... |
216 |
assert_spin_locked(&task->sighand->siglock); |
dbd952127 seccomp: introduc... |
217 |
|
3ba2530cc seccomp: allow mo... |
218 219 220 221 222 223 224 |
task->seccomp.mode = seccomp_mode; /* * Make sure TIF_SECCOMP cannot be set before the mode (and * filter) is set. */ smp_mb__before_atomic(); set_tsk_thread_flag(task, TIF_SECCOMP); |
1f41b4504 seccomp: extract ... |
225 226 227 |
} #ifdef CONFIG_SECCOMP_FILTER |
c2e1f2e30 seccomp: implemen... |
228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 |
/* Returns 1 if the parent is an ancestor of the child. */ static int is_ancestor(struct seccomp_filter *parent, struct seccomp_filter *child) { /* NULL is the root ancestor. */ if (parent == NULL) return 1; for (; child; child = child->prev) if (child == parent) return 1; return 0; } /** * seccomp_can_sync_threads: checks if all threads can be synchronized * * Expects sighand and cred_guard_mutex locks to be held. * * Returns 0 on success, -ve on error, or the pid of a thread which was * either not in the correct seccomp mode or it did not have an ancestral * seccomp filter. */ static inline pid_t seccomp_can_sync_threads(void) { struct task_struct *thread, *caller; BUG_ON(!mutex_is_locked(¤t->signal->cred_guard_mutex)); |
69f6a34bd seccomp: Replace ... |
255 |
assert_spin_locked(¤t->sighand->siglock); |
c2e1f2e30 seccomp: implemen... |
256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 |
/* Validate all threads being eligible for synchronization. */ caller = current; for_each_thread(caller, thread) { pid_t failed; /* Skip current, since it is initiating the sync. */ if (thread == caller) continue; if (thread->seccomp.mode == SECCOMP_MODE_DISABLED || (thread->seccomp.mode == SECCOMP_MODE_FILTER && is_ancestor(thread->seccomp.filter, caller->seccomp.filter))) continue; /* Return the first thread that cannot be synchronized. */ failed = task_pid_vnr(thread); /* If the pid cannot be resolved, then return -ESRCH */ if (unlikely(WARN_ON(failed == 0))) failed = -ESRCH; return failed; } return 0; } /** * seccomp_sync_threads: sets all threads to use current's filter * * Expects sighand and cred_guard_mutex locks to be held, and for * seccomp_can_sync_threads() to have returned success already * without dropping the locks. * */ static inline void seccomp_sync_threads(void) { struct task_struct *thread, *caller; BUG_ON(!mutex_is_locked(¤t->signal->cred_guard_mutex)); |
69f6a34bd seccomp: Replace ... |
296 |
assert_spin_locked(¤t->sighand->siglock); |
c2e1f2e30 seccomp: implemen... |
297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 |
/* Synchronize all threads. */ caller = current; for_each_thread(caller, thread) { /* Skip current, since it needs no changes. */ if (thread == caller) continue; /* Get a task reference for the new leaf node. */ get_seccomp_filter(caller); /* * Drop the task reference to the shared ancestor since * current's path will hold a reference. (This also * allows a put before the assignment.) */ put_seccomp_filter(thread); smp_store_release(&thread->seccomp.filter, caller->seccomp.filter); /* * Opt the other thread into seccomp if needed. * As threads are considered to be trust-realm * equivalent (see ptrace_may_access), it is safe to * allow one thread to transition the other. */ if (thread->seccomp.mode == SECCOMP_MODE_DISABLED) { /* * Don't let an unprivileged task work around * the no_new_privs restriction by creating * a thread that sets it up, enters seccomp, * then dies. */ if (task_no_new_privs(caller)) task_set_no_new_privs(thread); seccomp_assign_mode(thread, SECCOMP_MODE_FILTER); } } } |
e2cfabdfd seccomp: add syst... |
335 |
/** |
c8bee430d seccomp: split fi... |
336 |
* seccomp_prepare_filter: Prepares a seccomp filter for use. |
e2cfabdfd seccomp: add syst... |
337 338 |
* @fprog: BPF program to install * |
c8bee430d seccomp: split fi... |
339 |
* Returns filter on success or an ERR_PTR on failure. |
e2cfabdfd seccomp: add syst... |
340 |
*/ |
c8bee430d seccomp: split fi... |
341 |
static struct seccomp_filter *seccomp_prepare_filter(struct sock_fprog *fprog) |
e2cfabdfd seccomp: add syst... |
342 343 |
{ struct seccomp_filter *filter; |
c8bee430d seccomp: split fi... |
344 |
unsigned long fp_size; |
bd4cf0ed3 net: filter: rewo... |
345 346 |
struct sock_filter *fp; int new_len; |
e2cfabdfd seccomp: add syst... |
347 348 349 |
long ret; if (fprog->len == 0 || fprog->len > BPF_MAXINSNS) |
c8bee430d seccomp: split fi... |
350 351 352 |
return ERR_PTR(-EINVAL); BUG_ON(INT_MAX / fprog->len < sizeof(struct sock_filter)); fp_size = fprog->len * sizeof(struct sock_filter); |
e2cfabdfd seccomp: add syst... |
353 354 |
/* |
119ce5c8b kernel/seccomp.c:... |
355 |
* Installing a seccomp filter requires that the task has |
e2cfabdfd seccomp: add syst... |
356 357 358 359 |
* CAP_SYS_ADMIN in its namespace or be running with no_new_privs. * This avoids scenarios where unprivileged tasks can affect the * behavior of privileged children. */ |
1d4457f99 sched: move no_ne... |
360 |
if (!task_no_new_privs(current) && |
e2cfabdfd seccomp: add syst... |
361 362 |
security_capable_noaudit(current_cred(), current_user_ns(), CAP_SYS_ADMIN) != 0) |
c8bee430d seccomp: split fi... |
363 |
return ERR_PTR(-EACCES); |
e2cfabdfd seccomp: add syst... |
364 |
|
bd4cf0ed3 net: filter: rewo... |
365 366 |
fp = kzalloc(fp_size, GFP_KERNEL|__GFP_NOWARN); if (!fp) |
c8bee430d seccomp: split fi... |
367 |
return ERR_PTR(-ENOMEM); |
e2cfabdfd seccomp: add syst... |
368 369 370 |
/* Copy the instructions from fprog. */ ret = -EFAULT; |
bd4cf0ed3 net: filter: rewo... |
371 372 |
if (copy_from_user(fp, fprog->filter, fp_size)) goto free_prog; |
e2cfabdfd seccomp: add syst... |
373 374 |
/* Check and rewrite the fprog via the skb checker */ |
4df95ff48 net: filter: rena... |
375 |
ret = bpf_check_classic(fp, fprog->len); |
e2cfabdfd seccomp: add syst... |
376 |
if (ret) |
bd4cf0ed3 net: filter: rewo... |
377 |
goto free_prog; |
e2cfabdfd seccomp: add syst... |
378 379 |
/* Check and rewrite the fprog for seccomp use */ |
bd4cf0ed3 net: filter: rewo... |
380 381 382 |
ret = seccomp_check_filter(fp, fprog->len); if (ret) goto free_prog; |
2695fb552 net: filter: rena... |
383 |
/* Convert 'sock_filter' insns to 'bpf_insn' insns */ |
8fb575ca3 net: filter: rena... |
384 |
ret = bpf_convert_filter(fp, fprog->len, NULL, &new_len); |
bd4cf0ed3 net: filter: rewo... |
385 386 387 388 |
if (ret) goto free_prog; /* Allocate a new seccomp_filter */ |
0acf07d24 seccomp: fix memo... |
389 |
ret = -ENOMEM; |
8f577cadf seccomp: JIT comp... |
390 |
filter = kzalloc(sizeof(struct seccomp_filter), |
bd4cf0ed3 net: filter: rewo... |
391 392 393 |
GFP_KERNEL|__GFP_NOWARN); if (!filter) goto free_prog; |
60a3b2253 net: bpf: make eB... |
394 |
filter->prog = bpf_prog_alloc(bpf_prog_size(new_len), __GFP_NOWARN); |
8f577cadf seccomp: JIT comp... |
395 |
if (!filter->prog) |
bd4cf0ed3 net: filter: rewo... |
396 |
goto free_filter; |
8f577cadf seccomp: JIT comp... |
397 |
|
8fb575ca3 net: filter: rena... |
398 |
ret = bpf_convert_filter(fp, fprog->len, filter->prog->insnsi, &new_len); |
8f577cadf seccomp: JIT comp... |
399 400 |
if (ret) goto free_filter_prog; |
bd4cf0ed3 net: filter: rewo... |
401 |
|
60a3b2253 net: bpf: make eB... |
402 |
kfree(fp); |
bd4cf0ed3 net: filter: rewo... |
403 |
atomic_set(&filter->usage, 1); |
8f577cadf seccomp: JIT comp... |
404 |
filter->prog->len = new_len; |
8f577cadf seccomp: JIT comp... |
405 |
|
7ae457c1e net: filter: spli... |
406 |
bpf_prog_select_runtime(filter->prog); |
e2cfabdfd seccomp: add syst... |
407 |
|
c8bee430d seccomp: split fi... |
408 |
return filter; |
bd4cf0ed3 net: filter: rewo... |
409 |
|
8f577cadf seccomp: JIT comp... |
410 |
free_filter_prog: |
60a3b2253 net: bpf: make eB... |
411 |
__bpf_prog_free(filter->prog); |
bd4cf0ed3 net: filter: rewo... |
412 |
free_filter: |
e2cfabdfd seccomp: add syst... |
413 |
kfree(filter); |
bd4cf0ed3 net: filter: rewo... |
414 415 |
free_prog: kfree(fp); |
c8bee430d seccomp: split fi... |
416 |
return ERR_PTR(ret); |
e2cfabdfd seccomp: add syst... |
417 418 419 |
} /** |
c8bee430d seccomp: split fi... |
420 |
* seccomp_prepare_user_filter - prepares a user-supplied sock_fprog |
e2cfabdfd seccomp: add syst... |
421 422 423 424 |
* @user_filter: pointer to the user data containing a sock_fprog. * * Returns 0 on success and non-zero otherwise. */ |
c8bee430d seccomp: split fi... |
425 426 |
static struct seccomp_filter * seccomp_prepare_user_filter(const char __user *user_filter) |
e2cfabdfd seccomp: add syst... |
427 428 |
{ struct sock_fprog fprog; |
c8bee430d seccomp: split fi... |
429 |
struct seccomp_filter *filter = ERR_PTR(-EFAULT); |
e2cfabdfd seccomp: add syst... |
430 431 432 433 434 435 436 437 438 439 440 441 |
#ifdef CONFIG_COMPAT if (is_compat_task()) { struct compat_sock_fprog fprog32; if (copy_from_user(&fprog32, user_filter, sizeof(fprog32))) goto out; fprog.len = fprog32.len; fprog.filter = compat_ptr(fprog32.filter); } else /* falls through to the if below. */ #endif if (copy_from_user(&fprog, user_filter, sizeof(fprog))) goto out; |
c8bee430d seccomp: split fi... |
442 |
filter = seccomp_prepare_filter(&fprog); |
e2cfabdfd seccomp: add syst... |
443 |
out: |
c8bee430d seccomp: split fi... |
444 445 446 447 448 449 450 451 |
return filter; } /** * seccomp_attach_filter: validate and attach filter * @flags: flags to change filter behavior * @filter: seccomp filter to add to the current process * |
dbd952127 seccomp: introduc... |
452 453 |
* Caller must be holding current->sighand->siglock lock. * |
c8bee430d seccomp: split fi... |
454 455 456 457 458 459 460 |
* Returns 0 on success, -ve on error. */ static long seccomp_attach_filter(unsigned int flags, struct seccomp_filter *filter) { unsigned long total_insns; struct seccomp_filter *walker; |
69f6a34bd seccomp: Replace ... |
461 |
assert_spin_locked(¤t->sighand->siglock); |
dbd952127 seccomp: introduc... |
462 |
|
c8bee430d seccomp: split fi... |
463 464 465 466 467 468 |
/* Validate resulting filter length. */ total_insns = filter->prog->len; for (walker = current->seccomp.filter; walker; walker = walker->prev) total_insns += walker->prog->len + 4; /* 4 instr penalty */ if (total_insns > MAX_INSNS_PER_PATH) return -ENOMEM; |
c2e1f2e30 seccomp: implemen... |
469 470 471 472 473 474 475 476 |
/* If thread sync has been requested, check that it is possible. */ if (flags & SECCOMP_FILTER_FLAG_TSYNC) { int ret; ret = seccomp_can_sync_threads(); if (ret) return ret; } |
c8bee430d seccomp: split fi... |
477 478 479 480 481 482 |
/* * If there is an existing filter, make it the prev and don't drop its * task reference. */ filter->prev = current->seccomp.filter; current->seccomp.filter = filter; |
c2e1f2e30 seccomp: implemen... |
483 484 485 |
/* Now that the new filter is in place, synchronize to all threads. */ if (flags & SECCOMP_FILTER_FLAG_TSYNC) seccomp_sync_threads(); |
c8bee430d seccomp: split fi... |
486 |
return 0; |
e2cfabdfd seccomp: add syst... |
487 488 489 490 491 492 493 494 495 496 497 |
} /* get_seccomp_filter - increments the reference count of the filter on @tsk */ void get_seccomp_filter(struct task_struct *tsk) { struct seccomp_filter *orig = tsk->seccomp.filter; if (!orig) return; /* Reference count is bounded by the number of total processes. */ atomic_inc(&orig->usage); } |
c8bee430d seccomp: split fi... |
498 499 500 |
static inline void seccomp_filter_free(struct seccomp_filter *filter) { if (filter) { |
ae045e245 Merge git://git.k... |
501 |
bpf_prog_free(filter->prog); |
c8bee430d seccomp: split fi... |
502 503 504 |
kfree(filter); } } |
e2cfabdfd seccomp: add syst... |
505 506 507 508 509 510 511 512 |
/* put_seccomp_filter - decrements the ref count of tsk->seccomp.filter */ void put_seccomp_filter(struct task_struct *tsk) { struct seccomp_filter *orig = tsk->seccomp.filter; /* Clean up single-reference branches iteratively. */ while (orig && atomic_dec_and_test(&orig->usage)) { struct seccomp_filter *freeme = orig; orig = orig->prev; |
c8bee430d seccomp: split fi... |
513 |
seccomp_filter_free(freeme); |
e2cfabdfd seccomp: add syst... |
514 515 |
} } |
bb6ea4301 seccomp: Add SECC... |
516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 |
/** * seccomp_send_sigsys - signals the task to allow in-process syscall emulation * @syscall: syscall number to send to userland * @reason: filter-supplied reason code to send to userland (via si_errno) * * Forces a SIGSYS with a code of SYS_SECCOMP and related sigsys info. */ static void seccomp_send_sigsys(int syscall, int reason) { struct siginfo info; memset(&info, 0, sizeof(info)); info.si_signo = SIGSYS; info.si_code = SYS_SECCOMP; info.si_call_addr = (void __user *)KSTK_EIP(current); info.si_errno = reason; |
5e937a9ae syscall_get_arch:... |
532 |
info.si_arch = syscall_get_arch(); |
bb6ea4301 seccomp: Add SECC... |
533 534 535 |
info.si_syscall = syscall; force_sig_info(SIGSYS, &info, current); } |
e2cfabdfd seccomp: add syst... |
536 |
#endif /* CONFIG_SECCOMP_FILTER */ |
1da177e4c Linux-2.6.12-rc2 |
537 538 539 540 541 542 543 544 545 546 |
/* * Secure computing mode 1 allows only read/write/exit/sigreturn. * To be fully secure this must be combined with rlimit * to limit the stack allocations too. */ static int mode1_syscalls[] = { __NR_seccomp_read, __NR_seccomp_write, __NR_seccomp_exit, __NR_seccomp_sigreturn, 0, /* null terminated */ }; |
5b1017404 x86-64: seccomp: ... |
547 |
#ifdef CONFIG_COMPAT |
1da177e4c Linux-2.6.12-rc2 |
548 549 550 551 552 |
static int mode1_syscalls_32[] = { __NR_seccomp_read_32, __NR_seccomp_write_32, __NR_seccomp_exit_32, __NR_seccomp_sigreturn_32, 0, /* null terminated */ }; #endif |
a4412fc94 seccomp,x86,arm,m... |
553 |
static void __secure_computing_strict(int this_syscall) |
1da177e4c Linux-2.6.12-rc2 |
554 |
{ |
a4412fc94 seccomp,x86,arm,m... |
555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 |
int *syscall_whitelist = mode1_syscalls; #ifdef CONFIG_COMPAT if (is_compat_task()) syscall_whitelist = mode1_syscalls_32; #endif do { if (*syscall_whitelist == this_syscall) return; } while (*++syscall_whitelist); #ifdef SECCOMP_DEBUG dump_stack(); #endif audit_seccomp(this_syscall, SIGKILL, SECCOMP_RET_KILL); do_exit(SIGKILL); } #ifndef CONFIG_HAVE_ARCH_SECCOMP_FILTER void secure_computing_strict(int this_syscall) { int mode = current->seccomp.mode; if (mode == 0) return; else if (mode == SECCOMP_MODE_STRICT) __secure_computing_strict(this_syscall); else BUG(); } #else int __secure_computing(void) { |
d39bd00de seccomp: Allow ar... |
587 |
u32 phase1_result = seccomp_phase1(NULL); |
13aa72f0f seccomp: Refactor... |
588 589 590 591 592 593 594 595 596 597 |
if (likely(phase1_result == SECCOMP_PHASE1_OK)) return 0; else if (likely(phase1_result == SECCOMP_PHASE1_SKIP)) return -1; else return seccomp_phase2(phase1_result); } #ifdef CONFIG_SECCOMP_FILTER |
d39bd00de seccomp: Allow ar... |
598 |
static u32 __seccomp_phase1_filter(int this_syscall, struct seccomp_data *sd) |
13aa72f0f seccomp: Refactor... |
599 600 601 |
{ u32 filter_ret, action; int data; |
1da177e4c Linux-2.6.12-rc2 |
602 |
|
3ba2530cc seccomp: allow mo... |
603 604 605 606 607 |
/* * Make sure that any changes to mode from another thread have * been seen after TIF_SECCOMP was seen. */ rmb(); |
d39bd00de seccomp: Allow ar... |
608 |
filter_ret = seccomp_run_filters(sd); |
13aa72f0f seccomp: Refactor... |
609 610 611 612 613 614 |
data = filter_ret & SECCOMP_RET_DATA; action = filter_ret & SECCOMP_RET_ACTION; switch (action) { case SECCOMP_RET_ERRNO: /* Set the low-order 16-bits as a errno. */ |
d39bd00de seccomp: Allow ar... |
615 |
syscall_set_return_value(current, task_pt_regs(current), |
13aa72f0f seccomp: Refactor... |
616 617 618 619 620 |
-data, 0); goto skip; case SECCOMP_RET_TRAP: /* Show the handler the original registers. */ |
d39bd00de seccomp: Allow ar... |
621 |
syscall_rollback(current, task_pt_regs(current)); |
13aa72f0f seccomp: Refactor... |
622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 |
/* Let the filter pass back 16 bits of data. */ seccomp_send_sigsys(this_syscall, data); goto skip; case SECCOMP_RET_TRACE: return filter_ret; /* Save the rest for phase 2. */ case SECCOMP_RET_ALLOW: return SECCOMP_PHASE1_OK; case SECCOMP_RET_KILL: default: audit_seccomp(this_syscall, SIGSYS, action); do_exit(SIGSYS); } unreachable(); skip: audit_seccomp(this_syscall, 0, action); return SECCOMP_PHASE1_SKIP; } |
1da177e4c Linux-2.6.12-rc2 |
644 |
#endif |
13aa72f0f seccomp: Refactor... |
645 646 647 |
/** * seccomp_phase1() - run fast path seccomp checks on the current syscall |
d39bd00de seccomp: Allow ar... |
648 |
* @arg sd: The seccomp_data or NULL |
13aa72f0f seccomp: Refactor... |
649 650 651 652 653 |
* * This only reads pt_regs via the syscall_xyz helpers. The only change * it will make to pt_regs is via syscall_set_return_value, and it will * only do that if it returns SECCOMP_PHASE1_SKIP. * |
d39bd00de seccomp: Allow ar... |
654 655 |
* If sd is provided, it will not read pt_regs at all. * |
13aa72f0f seccomp: Refactor... |
656 657 658 659 660 661 662 663 664 665 666 667 668 |
* It may also call do_exit or force a signal; these actions must be * safe. * * If it returns SECCOMP_PHASE1_OK, the syscall passes checks and should * be processed normally. * * If it returns SECCOMP_PHASE1_SKIP, then the syscall should not be * invoked. In this case, seccomp_phase1 will have set the return value * using syscall_set_return_value. * * If it returns anything else, then the return value should be passed * to seccomp_phase2 from a context in which ptrace hooks are safe. */ |
d39bd00de seccomp: Allow ar... |
669 |
u32 seccomp_phase1(struct seccomp_data *sd) |
13aa72f0f seccomp: Refactor... |
670 671 |
{ int mode = current->seccomp.mode; |
d39bd00de seccomp: Allow ar... |
672 673 |
int this_syscall = sd ? sd->nr : syscall_get_nr(current, task_pt_regs(current)); |
13aa72f0f seccomp: Refactor... |
674 675 |
switch (mode) { |
e2cfabdfd seccomp: add syst... |
676 |
case SECCOMP_MODE_STRICT: |
13aa72f0f seccomp: Refactor... |
677 678 |
__secure_computing_strict(this_syscall); /* may call do_exit */ return SECCOMP_PHASE1_OK; |
e2cfabdfd seccomp: add syst... |
679 |
#ifdef CONFIG_SECCOMP_FILTER |
13aa72f0f seccomp: Refactor... |
680 |
case SECCOMP_MODE_FILTER: |
d39bd00de seccomp: Allow ar... |
681 |
return __seccomp_phase1_filter(this_syscall, sd); |
e2cfabdfd seccomp: add syst... |
682 |
#endif |
1da177e4c Linux-2.6.12-rc2 |
683 684 685 |
default: BUG(); } |
13aa72f0f seccomp: Refactor... |
686 |
} |
1da177e4c Linux-2.6.12-rc2 |
687 |
|
13aa72f0f seccomp: Refactor... |
688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 |
/** * seccomp_phase2() - finish slow path seccomp work for the current syscall * @phase1_result: The return value from seccomp_phase1() * * This must be called from a context in which ptrace hooks can be used. * * Returns 0 if the syscall should be processed or -1 to skip the syscall. */ int seccomp_phase2(u32 phase1_result) { struct pt_regs *regs = task_pt_regs(current); u32 action = phase1_result & SECCOMP_RET_ACTION; int data = phase1_result & SECCOMP_RET_DATA; BUG_ON(action != SECCOMP_RET_TRACE); audit_seccomp(syscall_get_nr(current, regs), 0, action); /* Skip these calls if there is no tracer. */ if (!ptrace_event_enabled(current, PTRACE_EVENT_SECCOMP)) { syscall_set_return_value(current, regs, -ENOSYS, 0); return -1; } /* Allow the BPF to provide the event message */ ptrace_event(PTRACE_EVENT_SECCOMP, data); /* * The delivery of a fatal signal during event * notification may silently skip tracer notification. * Terminating the task now avoids executing a system * call that may not be intended. */ if (fatal_signal_pending(current)) do_exit(SIGSYS); if (syscall_get_nr(current, regs) < 0) return -1; /* Explicit request to skip. */ return 0; |
1da177e4c Linux-2.6.12-rc2 |
727 |
} |
a4412fc94 seccomp,x86,arm,m... |
728 |
#endif /* CONFIG_HAVE_ARCH_SECCOMP_FILTER */ |
1d9d02fee move seccomp from... |
729 730 731 732 733 |
long prctl_get_seccomp(void) { return current->seccomp.mode; } |
e2cfabdfd seccomp: add syst... |
734 |
/** |
3b23dd128 seccomp: split mo... |
735 |
* seccomp_set_mode_strict: internal function for setting strict seccomp |
e2cfabdfd seccomp: add syst... |
736 737 738 739 740 |
* * Once current->seccomp.mode is non-zero, it may not be changed. * * Returns 0 on success or -EINVAL on failure. */ |
3b23dd128 seccomp: split mo... |
741 |
static long seccomp_set_mode_strict(void) |
1d9d02fee move seccomp from... |
742 |
{ |
3b23dd128 seccomp: split mo... |
743 |
const unsigned long seccomp_mode = SECCOMP_MODE_STRICT; |
e2cfabdfd seccomp: add syst... |
744 |
long ret = -EINVAL; |
1d9d02fee move seccomp from... |
745 |
|
dbd952127 seccomp: introduc... |
746 |
spin_lock_irq(¤t->sighand->siglock); |
1f41b4504 seccomp: extract ... |
747 |
if (!seccomp_may_assign_mode(seccomp_mode)) |
1d9d02fee move seccomp from... |
748 |
goto out; |
cf99abace make seccomp zero... |
749 |
#ifdef TIF_NOTSC |
3b23dd128 seccomp: split mo... |
750 |
disable_TSC(); |
cf99abace make seccomp zero... |
751 |
#endif |
3ba2530cc seccomp: allow mo... |
752 |
seccomp_assign_mode(current, seccomp_mode); |
3b23dd128 seccomp: split mo... |
753 754 755 |
ret = 0; out: |
dbd952127 seccomp: introduc... |
756 |
spin_unlock_irq(¤t->sighand->siglock); |
3b23dd128 seccomp: split mo... |
757 758 759 |
return ret; } |
e2cfabdfd seccomp: add syst... |
760 |
#ifdef CONFIG_SECCOMP_FILTER |
3b23dd128 seccomp: split mo... |
761 762 |
/** * seccomp_set_mode_filter: internal function for setting seccomp filter |
48dc92b9f seccomp: add "sec... |
763 |
* @flags: flags to change filter behavior |
3b23dd128 seccomp: split mo... |
764 765 766 767 768 769 770 771 772 773 |
* @filter: struct sock_fprog containing filter * * This function may be called repeatedly to install additional filters. * Every filter successfully installed will be evaluated (in reverse order) * for each system call the task makes. * * Once current->seccomp.mode is non-zero, it may not be changed. * * Returns 0 on success or -EINVAL on failure. */ |
48dc92b9f seccomp: add "sec... |
774 775 |
static long seccomp_set_mode_filter(unsigned int flags, const char __user *filter) |
3b23dd128 seccomp: split mo... |
776 777 |
{ const unsigned long seccomp_mode = SECCOMP_MODE_FILTER; |
c8bee430d seccomp: split fi... |
778 |
struct seccomp_filter *prepared = NULL; |
3b23dd128 seccomp: split mo... |
779 |
long ret = -EINVAL; |
48dc92b9f seccomp: add "sec... |
780 |
/* Validate flags. */ |
c2e1f2e30 seccomp: implemen... |
781 |
if (flags & ~SECCOMP_FILTER_FLAG_MASK) |
dbd952127 seccomp: introduc... |
782 |
return -EINVAL; |
48dc92b9f seccomp: add "sec... |
783 |
|
c8bee430d seccomp: split fi... |
784 785 786 787 |
/* Prepare the new filter before holding any locks. */ prepared = seccomp_prepare_user_filter(filter); if (IS_ERR(prepared)) return PTR_ERR(prepared); |
c2e1f2e30 seccomp: implemen... |
788 789 790 791 792 793 794 |
/* * Make sure we cannot change seccomp or nnp state via TSYNC * while another thread is in the middle of calling exec. */ if (flags & SECCOMP_FILTER_FLAG_TSYNC && mutex_lock_killable(¤t->signal->cred_guard_mutex)) goto out_free; |
dbd952127 seccomp: introduc... |
795 |
spin_lock_irq(¤t->sighand->siglock); |
3b23dd128 seccomp: split mo... |
796 797 |
if (!seccomp_may_assign_mode(seccomp_mode)) goto out; |
c8bee430d seccomp: split fi... |
798 |
ret = seccomp_attach_filter(flags, prepared); |
3b23dd128 seccomp: split mo... |
799 |
if (ret) |
e2cfabdfd seccomp: add syst... |
800 |
goto out; |
c8bee430d seccomp: split fi... |
801 802 |
/* Do not free the successfully attached filter. */ prepared = NULL; |
1d9d02fee move seccomp from... |
803 |
|
3ba2530cc seccomp: allow mo... |
804 |
seccomp_assign_mode(current, seccomp_mode); |
e2cfabdfd seccomp: add syst... |
805 |
out: |
dbd952127 seccomp: introduc... |
806 |
spin_unlock_irq(¤t->sighand->siglock); |
c2e1f2e30 seccomp: implemen... |
807 808 809 |
if (flags & SECCOMP_FILTER_FLAG_TSYNC) mutex_unlock(¤t->signal->cred_guard_mutex); out_free: |
c8bee430d seccomp: split fi... |
810 |
seccomp_filter_free(prepared); |
1d9d02fee move seccomp from... |
811 812 |
return ret; } |
3b23dd128 seccomp: split mo... |
813 |
#else |
48dc92b9f seccomp: add "sec... |
814 815 |
static inline long seccomp_set_mode_filter(unsigned int flags, const char __user *filter) |
3b23dd128 seccomp: split mo... |
816 817 818 819 |
{ return -EINVAL; } #endif |
d78ab02c2 seccomp: create i... |
820 |
|
48dc92b9f seccomp: add "sec... |
821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 |
/* Common entry point for both prctl and syscall. */ static long do_seccomp(unsigned int op, unsigned int flags, const char __user *uargs) { switch (op) { case SECCOMP_SET_MODE_STRICT: if (flags != 0 || uargs != NULL) return -EINVAL; return seccomp_set_mode_strict(); case SECCOMP_SET_MODE_FILTER: return seccomp_set_mode_filter(flags, uargs); default: return -EINVAL; } } SYSCALL_DEFINE3(seccomp, unsigned int, op, unsigned int, flags, const char __user *, uargs) { return do_seccomp(op, flags, uargs); } |
d78ab02c2 seccomp: create i... |
842 843 844 845 846 847 848 849 850 |
/** * prctl_set_seccomp: configures current->seccomp.mode * @seccomp_mode: requested mode to use * @filter: optional struct sock_fprog for use with SECCOMP_MODE_FILTER * * Returns 0 on success or -EINVAL on failure. */ long prctl_set_seccomp(unsigned long seccomp_mode, char __user *filter) { |
48dc92b9f seccomp: add "sec... |
851 852 |
unsigned int op; char __user *uargs; |
3b23dd128 seccomp: split mo... |
853 854 |
switch (seccomp_mode) { case SECCOMP_MODE_STRICT: |
48dc92b9f seccomp: add "sec... |
855 856 857 858 859 860 861 862 |
op = SECCOMP_SET_MODE_STRICT; /* * Setting strict mode through prctl always ignored filter, * so make sure it is always NULL here to pass the internal * check in do_seccomp(). */ uargs = NULL; break; |
3b23dd128 seccomp: split mo... |
863 |
case SECCOMP_MODE_FILTER: |
48dc92b9f seccomp: add "sec... |
864 865 866 |
op = SECCOMP_SET_MODE_FILTER; uargs = filter; break; |
3b23dd128 seccomp: split mo... |
867 868 869 |
default: return -EINVAL; } |
48dc92b9f seccomp: add "sec... |
870 871 872 |
/* prctl interface doesn't have flags, so they are always zero. */ return do_seccomp(op, 0, uargs); |
d78ab02c2 seccomp: create i... |
873 |
} |